import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer import torch import threading # Load model model_name = "Qwen/Qwen3Guard-Gen-8B" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype=torch.float16, device_map="auto" ) # Streaming response generator def chat_with_dudea(message, history): messages = [{"role": "system", "content": "You are DUDEAIBeta1.1, a futuristic, smooth-talking assistant."}] for user_msg, bot_msg in history: messages.append({"role": "user", "content": user_msg}) if bot_msg: messages.append({"role": "assistant", "content": bot_msg}) messages.append({"role": "user", "content": message}) # Tokenize inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt" ).to(model.device) # Create streamer streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) generation_kwargs = dict(inputs, max_new_tokens=300, streamer=streamer) # Run model.generate in a background thread thread = threading.Thread(target=model.generate, kwargs=generation_kwargs) thread.start() # Build response incrementally partial_response = "" for new_text in streamer: partial_response += new_text history[-1] = (message, partial_response) # update last message yield history # 🔥 this yields live typing updates # Custom UI with gr.Blocks(css=""" body {background: linear-gradient(135deg, #1e1e2f, #2c2c54); color: white; font-family: 'Inter', sans-serif;} #chatbox {height: 600px; overflow-y: auto; background: rgba(255,255,255,0.05); border-radius: 20px; padding: 20px;} .message {display: flex; align-items: flex-start; margin: 12px;} .user {flex-direction: row-reverse;} .avatar {width: 45px; height: 45px; border-radius: 50%; margin: 8px;} .bubble {padding: 14px 18px; border-radius: 18px; max-width: 70%; animation: fadeIn 0.3s ease;} .user .bubble {background: linear-gradient(135deg, #4f46e5, #6366f1); color: white; border-bottom-right-radius: 6px;} .bot .bubble {background: rgba(255,255,255,0.1); color: #f3f4f6; border-bottom-left-radius: 6px; backdrop-filter: blur(8px);} @keyframes fadeIn {from {opacity:0; transform: translateY(10px);} to {opacity:1; transform: translateY(0);}} """) as demo: gr.Markdown( "
A smooth, next-gen AI chat with live typing ✨
" ) chatbot = gr.Chatbot( elem_id="chatbox", avatar_images=("user.png", "ai.png") # put these files in your repo ) with gr.Row(): msg = gr.Textbox(placeholder="Type your message...", container=False, scale=10) clear = gr.Button("🧹", scale=1) # Live streaming respond def respond(message, history): history.append((message, "")) # add user message return chat_with_dudea(message, history) msg.submit(respond, [msg, chatbot], [chatbot]) clear.click(lambda: None, None, chatbot, queue=False) demo.launch()