Spaces:

akhaliq
/

Qwen3-VL-2B-Instruct

Running on Zero

App Files Files Community

akhaliq HF Staff commited on 15 days ago

Commit

a14c972

verified ·

1 Parent(s): 3c2d41e

Deploy Gradio app with multiple files

Browse files

Files changed (2) hide show

app.py +172 -0
requirements.txt +7 -0

app.py ADDED Viewed

	@@ -0,0 +1,172 @@

+import gradio as gr
+from transformers import Qwen3VLForConditionalGeneration, AutoProcessor
+import torch
+from PIL import Image
+import io
+import base64
+import spaces
+# Load model and processor
+model = Qwen3VLForConditionalGeneration.from_pretrained(
+    "Qwen/Qwen3-VL-2B-Instruct",
+    torch_dtype=torch.bfloat16,
+    device_map="auto"
+)
+processor = AutoProcessor.from_pretrained("Qwen/Qwen3-VL-2B-Instruct")
+def process_image(image):
+    """Convert image to base64 string for processing"""
+    if isinstance(image, str):
+        return image
+    if isinstance(image, Image.Image):
+        buffered = io.BytesIO()
+        image.save(buffered, format="PNG")
+        img_str = base64.b64encode(buffered.getvalue()).decode()
+        return f"data:image/png;base64,{img_str}"
+    return image
+@spaces.GPU(duration=120)
+def qwen_chat(message, image, chat_history):
+    """
+    Process chat message with optional image input
+    Args:
+        message (str): User's text message
+        image: Optional image input
+        chat_history (list): Previous conversation history
+    Returns:
+        tuple: Updated chat history and empty message input
+    """
+    if not message and image is None:
+        return chat_history, ""
+    # Build messages list
+    messages = []
+    # Add previous chat history
+    for user_msg, assistant_msg in chat_history:
+        messages.append({"role": "user", "content": [{"type": "text", "text": user_msg}]})
+        messages.append({"role": "assistant", "content": [{"type": "text", "text": assistant_msg}]})
+    # Add current message with optional image
+    current_content = []
+    if image is not None:
+        current_content.append({
+            "type": "image",
+            "image": image
+        })
+    if message:
+        current_content.append({
+            "type": "text",
+            "text": message
+        })
+    messages.append({
+        "role": "user",
+        "content": current_content
+    })
+    # Prepare inputs
+    inputs = processor.apply_chat_template(
+        messages,
+        tokenize=True,
+        add_generation_prompt=True,
+        return_dict=True,
+        return_tensors="pt"
+    )
+    inputs = inputs.to(model.device)
+    # Generate response
+    with torch.no_grad():
+        generated_ids = model.generate(**inputs, max_new_tokens=256)
+    # Decode output
+    generated_ids_trimmed = [
+        out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
+    ]
+    output_text = processor.batch_decode(
+        generated_ids_trimmed,
+        skip_special_tokens=True,
+        clean_up_tokenization_spaces=False
+    )[0]
+    # Update chat history
+    chat_history.append((message if message else "[Image provided]", output_text))
+    return chat_history, ""
+# Create Gradio interface
+with gr.Blocks(title="Qwen3-VL Chat") as demo:
+    gr.Markdown(
+        """
+        # 🎨 Qwen3-VL Chat
+        Chat with Qwen3-VL-2B-Instruct - A multimodal AI that can understand both text and images!
+        [Built with anycoder](https://huggingface.co/spaces/akhaliq/anycoder)
+        """
+    )
+    with gr.Row():
+        with gr.Column(scale=3):
+            chatbot = gr.Chatbot(
+                label="Chat History",
+                type="messages",
+                height=600,
+                show_copy_button=True
+            )
+        with gr.Column(scale=1):
+            image_input = gr.Image(
+                label="Upload Image (Optional)",
+                type="pil",
+                sources=["upload", "clipboard"],
+                interactive=True
+            )
+    with gr.Row():
+        message_input = gr.Textbox(
+            label="Message",
+            placeholder="Type your message here...",
+            lines=2,
+            scale=4
+        )
+        send_btn = gr.Button("Send", scale=1, variant="primary")
+    with gr.Row():
+        clear_btn = gr.Button("Clear Chat", variant="secondary")
+    gr.Markdown(
+        """
+        ### Tips:
+        - Upload an image to ask questions about it
+        - Describe what you see or ask for analysis
+        - The model can answer questions about images and text
+        """
+    )
+    # Event handlers
+    def send_message(msg, img, history):
+        return qwen_chat(msg, img, history)
+    send_btn.click(
+        send_message,
+        inputs=[message_input, image_input, chatbot],
+        outputs=[chatbot, message_input]
+    )
+    message_input.submit(
+        send_message,
+        inputs=[message_input, image_input, chatbot],
+        outputs=[chatbot, message_input]
+    )
+    clear_btn.click(
+        lambda: ([], None, ""),
+        outputs=[chatbot, image_input, message_input]
+    )
+if __name__ == "__main__":
+    demo.launch(share=False)

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+gradio
+transformers
+torch
+torchvision
+pillow
+accelerate
+spaces