Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from gradio import ChatMessage | |
| from openai import OpenAI | |
| import time | |
| # Configure Lemonade Server connection | |
| base_url = "http://localhost:8000/api/v1" | |
| client = OpenAI( | |
| base_url=base_url, | |
| api_key="lemonade", # required, but unused in Lemonade | |
| ) | |
| def stream_chat_response(message: str, history: list, model_name: str, system_prompt: str): | |
| """ | |
| Stream responses from Lemonade Server and display thinking process separately. | |
| """ | |
| # Add user message to history | |
| history.append(ChatMessage(role="user", content=message)) | |
| yield history | |
| # Convert history to OpenAI format - only include actual conversation messages | |
| messages = [] | |
| # Add system prompt if provided | |
| if system_prompt and system_prompt.strip(): | |
| messages.append({"role": "system", "content": system_prompt}) | |
| # Convert history, skipping metadata-only messages | |
| for msg in history: | |
| if isinstance(msg, ChatMessage): | |
| # Skip thinking/metadata messages when sending to API | |
| if msg.metadata and msg.metadata.get("title"): | |
| continue | |
| messages.append({ | |
| "role": msg.role, | |
| "content": msg.content | |
| }) | |
| elif isinstance(msg, dict): | |
| # Skip metadata messages | |
| if msg.get("metadata"): | |
| continue | |
| messages.append({ | |
| "role": msg.get("role", "user"), | |
| "content": msg.get("content", "") | |
| }) | |
| try: | |
| # Initialize response tracking | |
| thinking_content = "" | |
| response_content = "" | |
| thinking_added = False | |
| response_added = False | |
| thinking_start_time = None | |
| # Stream response from Lemonade Server | |
| stream = client.chat.completions.create( | |
| model=model_name, | |
| messages=messages, | |
| stream=True, | |
| max_tokens=2048, | |
| temperature=0.7, | |
| ) | |
| for chunk in stream: | |
| # Safety check for chunk structure | |
| if not chunk.choices or len(chunk.choices) == 0: | |
| continue | |
| if not hasattr(chunk.choices[0], 'delta'): | |
| continue | |
| delta = chunk.choices[0].delta | |
| # Check for reasoning_content (thinking process) | |
| reasoning_content = getattr(delta, 'reasoning_content', None) | |
| # Check for regular content (final answer) | |
| content = getattr(delta, 'content', None) | |
| # Handle reasoning/thinking content | |
| if reasoning_content: | |
| if not thinking_added: | |
| # Add thinking section | |
| thinking_start_time = time.time() | |
| history.append(ChatMessage( | |
| role="assistant", | |
| content="", | |
| metadata={ | |
| "title": "π§ Thought Process", | |
| "status": "pending" | |
| } | |
| )) | |
| thinking_added = True | |
| # Accumulate thinking content | |
| thinking_content += reasoning_content | |
| history[-1] = ChatMessage( | |
| role="assistant", | |
| content=thinking_content, | |
| metadata={ | |
| "title": "π§ Thought Process", | |
| "status": "pending" | |
| } | |
| ) | |
| yield history | |
| # Handle regular content (final answer) | |
| elif content: | |
| # Finalize thinking section if it exists | |
| if thinking_added and thinking_start_time: | |
| elapsed = time.time() - thinking_start_time | |
| # Update the thinking message to "done" status | |
| for i in range(len(history) - 1, -1, -1): | |
| if isinstance(history[i], ChatMessage) and history[i].metadata and history[i].metadata.get("title") == "π§ Thought Process": | |
| history[i] = ChatMessage( | |
| role="assistant", | |
| content=thinking_content, | |
| metadata={ | |
| "title": "π§ Thought Process", | |
| "status": "done", | |
| "duration": elapsed | |
| } | |
| ) | |
| break | |
| thinking_start_time = None | |
| # Add or update response content | |
| if not response_added: | |
| history.append(ChatMessage( | |
| role="assistant", | |
| content="" | |
| )) | |
| response_added = True | |
| response_content += content | |
| history[-1] = ChatMessage( | |
| role="assistant", | |
| content=response_content | |
| ) | |
| yield history | |
| # Final check: if thinking section exists but wasn't finalized | |
| if thinking_added and thinking_start_time: | |
| elapsed = time.time() - thinking_start_time | |
| for i in range(len(history) - 1, -1, -1): | |
| if isinstance(history[i], ChatMessage) and history[i].metadata and history[i].metadata.get("title") == "π§ Thought Process": | |
| history[i] = ChatMessage( | |
| role="assistant", | |
| content=thinking_content, | |
| metadata={ | |
| "title": "π§ Thought Process", | |
| "status": "done", | |
| "duration": elapsed | |
| } | |
| ) | |
| break | |
| yield history | |
| except Exception as e: | |
| import traceback | |
| error_msg = str(e) | |
| error_trace = traceback.format_exc() | |
| # Try to extract more details from the error | |
| if "422" in error_msg: | |
| error_details = f""" | |
| β οΈ **Request Validation Error** | |
| The server rejected the request. Possible issues: | |
| - Model name might be incorrect (currently: `{model_name}`) | |
| - Check that the model is loaded on the server | |
| - Try simplifying the system prompt | |
| **Error:** {error_msg} | |
| """ | |
| elif "list index out of range" in error_msg or "IndexError" in error_trace: | |
| error_details = f""" | |
| β οΈ **Streaming Response Error** | |
| There was an issue processing the streaming response. | |
| **Debug Info:** | |
| - Model: `{model_name}` | |
| - Base URL: `{base_url}` | |
| - Error: {error_msg} | |
| Try refreshing and sending another message. | |
| """ | |
| else: | |
| error_details = f""" | |
| β οΈ **Connection Error** | |
| Error: {error_msg} | |
| Make sure: | |
| 1. Lemonade Server is running at `{base_url}` | |
| 2. Model `{model_name}` is loaded | |
| 3. The server is accessible | |
| **Debug trace:** | |
| ``` | |
| {error_trace[-500:]} | |
| ``` | |
| """ | |
| history.append(ChatMessage( | |
| role="assistant", | |
| content=error_details, | |
| metadata={ | |
| "title": "β οΈ Error Details" | |
| } | |
| )) | |
| yield history | |
| def clear_chat(): | |
| """Clear the chat history.""" | |
| return [] | |
| # Build the Gradio interface | |
| with gr.Blocks(theme=gr.themes.Ocean()) as demo: | |
| # Define input textbox first so it can be referenced in Examples | |
| msg = gr.Textbox( | |
| placeholder="Type your message here and press Enter...", | |
| show_label=False, | |
| container=False, | |
| render=False # Don't render yet, will be rendered in main area | |
| ) | |
| # Sidebar for settings and information | |
| with gr.Sidebar(position="left", open=True): | |
| gr.Markdown(""" | |
| # π Lemonade Reasoning Chatbot | |
| Chat with local LLMs running on AMD Lemonade Server. This interface beautifully displays the model's thinking process! | |
| """) | |
| gr.Markdown("### βοΈ Settings") | |
| model_dropdown = gr.Dropdown( | |
| choices=[ | |
| "Qwen3-0.6B-GGUF", | |
| "Llama-3.1-8B-Instruct-Hybrid", | |
| "Qwen2.5-7B-Instruct", | |
| "Phi-3.5-mini-instruct", | |
| "Meta-Llama-3-8B-Instruct" | |
| ], | |
| value="Qwen3-0.6B-GGUF", | |
| label="Model", | |
| info="Select the LLM model to use", | |
| allow_custom_value=True | |
| ) | |
| system_prompt = gr.Textbox( | |
| label="System Prompt (Optional)", | |
| value="You are a helpful assistant.", | |
| lines=3, | |
| info="Customize the model's behavior", | |
| placeholder="Leave empty to use model defaults" | |
| ) | |
| # How Thinking Works Accordion | |
| with gr.Accordion("π‘ How Thinking Works", open=False): | |
| gr.Markdown(""" | |
| - Reasoning models output `reasoning_content` (thinking) and `content` (final answer) separately | |
| - Thinking appears in a collapsible "π§ Thought Process" section | |
| - Duration of thinking is displayed automatically | |
| - Works with models like: DeepSeek-R1, QwQ, and other reasoning models | |
| """) | |
| # Current Model Accordion | |
| with gr.Accordion("π Current Model", open=False): | |
| gr.Markdown(""" | |
| Make sure your model supports reasoning output for thinking to be displayed. | |
| """) | |
| # Example Prompts Accordion | |
| with gr.Accordion("π Example Prompts", open=False): | |
| gr.Markdown(""" | |
| - "Solve: If a train travels 120 km in 2 hours, what's its speed?" | |
| - "Compare pros and cons of electric vs gas cars" | |
| - "Explain step-by-step how to make coffee" | |
| - "What's the difference between AI and ML?" | |
| """) | |
| # Add example interactions in sidebar | |
| gr.Examples( | |
| examples=[ | |
| "What is 15 + 24?", | |
| "Write a short poem about AI", | |
| "What is the capital of Japan?", | |
| "Explain what machine learning is in simple terms" | |
| ], | |
| inputs=msg, | |
| label="Quick Examples" | |
| ) | |
| # Main chat area - full screen | |
| chatbot = gr.Chatbot( | |
| type="messages", | |
| label="Chat", | |
| height="calc(100vh - 200px)", | |
| avatar_images=( | |
| "https://em-content.zobj.net/source/twitter/376/bust-in-silhouette_1f464.png", | |
| "https://em-content.zobj.net/source/twitter/376/robot_1f916.png" | |
| ), | |
| show_label=False, | |
| #placeholder="C:\Users\Yuvi\dev\testing\placeholder.png" | |
| placeholder= #""" | |
| #<div style="display: flex; justify-content: center; align-items: center; height: 100%;"> | |
| # <img src="/gradio_api/file=C:\\Users\\Yuvi\\dev\\testing\\placeholder.png" style="opacity: 0.4; max-width: 80%; max-height: 80%; object-fit: contain;" alt="Placeholder"> | |
| #</div> | |
| #""" | |
| """<div> | |
| <img src="/gradio_api/file=placeholder.png"> | |
| </div>""" | |
| ) | |
| # Render the input textbox in main area | |
| msg.render() | |
| # Event handlers - only submit event | |
| def submit_message(message, history, model, sys_prompt): | |
| """Wrapper to handle message submission""" | |
| if not message or message.strip() == "": | |
| return history, "" | |
| yield from stream_chat_response(message, history, model, sys_prompt) | |
| msg.submit( | |
| submit_message, | |
| inputs=[msg, chatbot, model_dropdown, system_prompt], | |
| outputs=chatbot | |
| ).then( | |
| lambda: "", | |
| None, | |
| msg | |
| ) | |
| # Launch the app | |
| if __name__ == "__main__": | |
| demo.launch(allowed_paths=["."], ssr_mode=True) |