🤖 DUDEAIBeta1.1

import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
import torch
import threading

# Load model
model_name = "Qwen/Qwen3Guard-Gen-8B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto"
)

# Streaming response generator
def chat_with_dudea(message, history):
    messages = [{"role": "system", "content": "You are DUDEAIBeta1.1, a futuristic, smooth-talking assistant."}]
    for user_msg, bot_msg in history:
        messages.append({"role": "user", "content": user_msg})
        if bot_msg:
            messages.append({"role": "assistant", "content": bot_msg})
    messages.append({"role": "user", "content": message})

    # Tokenize
    inputs = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        tokenize=True,
        return_dict=True,
        return_tensors="pt"
    ).to(model.device)

    # Create streamer
    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
    generation_kwargs = dict(inputs, max_new_tokens=300, streamer=streamer)

    # Run model.generate in a background thread
    thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
    thread.start()

    # Build response incrementally
    partial_response = ""
    for new_text in streamer:
        partial_response += new_text
        history[-1] = (message, partial_response)  # update last message
        yield history  # 🔥 this yields live typing updates

# Custom UI
with gr.Blocks(css="""
    body {background: linear-gradient(135deg, #1e1e2f, #2c2c54); color: white; font-family: 'Inter', sans-serif;}
    #chatbox {height: 600px; overflow-y: auto; background: rgba(255,255,255,0.05); border-radius: 20px; padding: 20px;}
    .message {display: flex; align-items: flex-start; margin: 12px;}
    .user {flex-direction: row-reverse;}
    .avatar {width: 45px; height: 45px; border-radius: 50%; margin: 8px;}
    .bubble {padding: 14px 18px; border-radius: 18px; max-width: 70%; animation: fadeIn 0.3s ease;}
    .user .bubble {background: linear-gradient(135deg, #4f46e5, #6366f1); color: white; border-bottom-right-radius: 6px;}
    .bot .bubble {background: rgba(255,255,255,0.1); color: #f3f4f6; border-bottom-left-radius: 6px; backdrop-filter: blur(8px);}
    @keyframes fadeIn {from {opacity:0; transform: translateY(10px);} to {opacity:1; transform: translateY(0);}}
""") as demo:
    gr.Markdown(
        "<h1 style='text-align: center; color:#9f9fff;'>🤖 DUDEAIBeta1.1</h1>"
        "<p style='text-align:center; opacity:0.8;'>A smooth, next-gen AI chat with <b>live typing</b> ✨</p>"
    )

    chatbot = gr.Chatbot(
        elem_id="chatbox",
        avatar_images=("user.png", "ai.png")  # put these files in your repo
    )

    with gr.Row():
        msg = gr.Textbox(placeholder="Type your message...", container=False, scale=10)
        clear = gr.Button("🧹", scale=1)

    # Live streaming respond
    def respond(message, history):
        history.append((message, ""))  # add user message
        return chat_with_dudea(message, history)

    msg.submit(respond, [msg, chatbot], [chatbot])
    clear.click(lambda: None, None, chatbot, queue=False)

demo.launch()