ysharma HF Staff commited on
Commit
78752d9
·
verified ·
1 Parent(s): c0025b4

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +340 -0
app.py ADDED
@@ -0,0 +1,340 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from gradio import ChatMessage
3
+ from openai import OpenAI
4
+ import time
5
+
6
+ # Configure Lemonade Server connection
7
+ base_url = "http://localhost:8000/api/v1"
8
+ client = OpenAI(
9
+ base_url=base_url,
10
+ api_key="lemonade", # required, but unused in Lemonade
11
+ )
12
+
13
+ def stream_chat_response(message: str, history: list, model_name: str, system_prompt: str):
14
+ """
15
+ Stream responses from Lemonade Server and display thinking process separately.
16
+ """
17
+ # Add user message to history
18
+ history.append(ChatMessage(role="user", content=message))
19
+ yield history
20
+
21
+ # Convert history to OpenAI format - only include actual conversation messages
22
+ messages = []
23
+
24
+ # Add system prompt if provided
25
+ if system_prompt and system_prompt.strip():
26
+ messages.append({"role": "system", "content": system_prompt})
27
+
28
+ # Convert history, skipping metadata-only messages
29
+ for msg in history:
30
+ if isinstance(msg, ChatMessage):
31
+ # Skip thinking/metadata messages when sending to API
32
+ if msg.metadata and msg.metadata.get("title"):
33
+ continue
34
+ messages.append({
35
+ "role": msg.role,
36
+ "content": msg.content
37
+ })
38
+ elif isinstance(msg, dict):
39
+ # Skip metadata messages
40
+ if msg.get("metadata"):
41
+ continue
42
+ messages.append({
43
+ "role": msg.get("role", "user"),
44
+ "content": msg.get("content", "")
45
+ })
46
+
47
+ try:
48
+ # Initialize response tracking
49
+ thinking_content = ""
50
+ response_content = ""
51
+ thinking_added = False
52
+ response_added = False
53
+ thinking_start_time = None
54
+
55
+ # Stream response from Lemonade Server
56
+ stream = client.chat.completions.create(
57
+ model=model_name,
58
+ messages=messages,
59
+ stream=True,
60
+ max_tokens=2048,
61
+ temperature=0.7,
62
+ )
63
+
64
+ for chunk in stream:
65
+ # Safety check for chunk structure
66
+ if not chunk.choices or len(chunk.choices) == 0:
67
+ continue
68
+
69
+ if not hasattr(chunk.choices[0], 'delta'):
70
+ continue
71
+
72
+ delta = chunk.choices[0].delta
73
+
74
+ # Check for reasoning_content (thinking process)
75
+ reasoning_content = getattr(delta, 'reasoning_content', None)
76
+ # Check for regular content (final answer)
77
+ content = getattr(delta, 'content', None)
78
+
79
+ # Handle reasoning/thinking content
80
+ if reasoning_content:
81
+ if not thinking_added:
82
+ # Add thinking section
83
+ thinking_start_time = time.time()
84
+ history.append(ChatMessage(
85
+ role="assistant",
86
+ content="",
87
+ metadata={
88
+ "title": "🧠 Thought Process",
89
+ "status": "pending"
90
+ }
91
+ ))
92
+ thinking_added = True
93
+
94
+ # Accumulate thinking content
95
+ thinking_content += reasoning_content
96
+ history[-1] = ChatMessage(
97
+ role="assistant",
98
+ content=thinking_content,
99
+ metadata={
100
+ "title": "🧠 Thought Process",
101
+ "status": "pending"
102
+ }
103
+ )
104
+ yield history
105
+
106
+ # Handle regular content (final answer)
107
+ elif content:
108
+ # Finalize thinking section if it exists
109
+ if thinking_added and thinking_start_time:
110
+ elapsed = time.time() - thinking_start_time
111
+ # Update the thinking message to "done" status
112
+ for i in range(len(history) - 1, -1, -1):
113
+ if isinstance(history[i], ChatMessage) and history[i].metadata and history[i].metadata.get("title") == "🧠 Thought Process":
114
+ history[i] = ChatMessage(
115
+ role="assistant",
116
+ content=thinking_content,
117
+ metadata={
118
+ "title": "🧠 Thought Process",
119
+ "status": "done",
120
+ "duration": elapsed
121
+ }
122
+ )
123
+ break
124
+ thinking_start_time = None
125
+
126
+ # Add or update response content
127
+ if not response_added:
128
+ history.append(ChatMessage(
129
+ role="assistant",
130
+ content=""
131
+ ))
132
+ response_added = True
133
+
134
+ response_content += content
135
+ history[-1] = ChatMessage(
136
+ role="assistant",
137
+ content=response_content
138
+ )
139
+ yield history
140
+
141
+ # Final check: if thinking section exists but wasn't finalized
142
+ if thinking_added and thinking_start_time:
143
+ elapsed = time.time() - thinking_start_time
144
+ for i in range(len(history) - 1, -1, -1):
145
+ if isinstance(history[i], ChatMessage) and history[i].metadata and history[i].metadata.get("title") == "🧠 Thought Process":
146
+ history[i] = ChatMessage(
147
+ role="assistant",
148
+ content=thinking_content,
149
+ metadata={
150
+ "title": "🧠 Thought Process",
151
+ "status": "done",
152
+ "duration": elapsed
153
+ }
154
+ )
155
+ break
156
+ yield history
157
+
158
+ except Exception as e:
159
+ import traceback
160
+ error_msg = str(e)
161
+ error_trace = traceback.format_exc()
162
+
163
+ # Try to extract more details from the error
164
+ if "422" in error_msg:
165
+ error_details = f"""
166
+ ⚠️ **Request Validation Error**
167
+
168
+ The server rejected the request. Possible issues:
169
+ - Model name might be incorrect (currently: `{model_name}`)
170
+ - Check that the model is loaded on the server
171
+ - Try simplifying the system prompt
172
+
173
+ **Error:** {error_msg}
174
+ """
175
+ elif "list index out of range" in error_msg or "IndexError" in error_trace:
176
+ error_details = f"""
177
+ ⚠️ **Streaming Response Error**
178
+
179
+ There was an issue processing the streaming response.
180
+
181
+ **Debug Info:**
182
+ - Model: `{model_name}`
183
+ - Base URL: `{base_url}`
184
+ - Error: {error_msg}
185
+
186
+ Try refreshing and sending another message.
187
+ """
188
+ else:
189
+ error_details = f"""
190
+ ⚠️ **Connection Error**
191
+
192
+ Error: {error_msg}
193
+
194
+ Make sure:
195
+ 1. Lemonade Server is running at `{base_url}`
196
+ 2. Model `{model_name}` is loaded
197
+ 3. The server is accessible
198
+
199
+ **Debug trace:**
200
+ ```
201
+ {error_trace[-500:]}
202
+ ```
203
+ """
204
+
205
+ history.append(ChatMessage(
206
+ role="assistant",
207
+ content=error_details,
208
+ metadata={
209
+ "title": "⚠️ Error Details"
210
+ }
211
+ ))
212
+ yield history
213
+
214
+
215
+ def clear_chat():
216
+ """Clear the chat history."""
217
+ return []
218
+
219
+
220
+ # Build the Gradio interface
221
+ with gr.Blocks(theme=gr.themes.Ocean()) as demo:
222
+ # Define input textbox first so it can be referenced in Examples
223
+ msg = gr.Textbox(
224
+ placeholder="Type your message here and press Enter...",
225
+ show_label=False,
226
+ container=False,
227
+ render=False # Don't render yet, will be rendered in main area
228
+ )
229
+
230
+ # Sidebar for settings and information
231
+ with gr.Sidebar(position="left", open=True):
232
+ gr.Markdown("""
233
+ # 🍋 Lemonade Reasoning Chatbot
234
+ Chat with local LLMs running on AMD Lemonade Server. This interface beautifully displays the model's thinking process!
235
+ """)
236
+
237
+ gr.Markdown("### ⚙️ Settings")
238
+
239
+ model_dropdown = gr.Dropdown(
240
+ choices=[
241
+ "Qwen3-0.6B-GGUF",
242
+ "Llama-3.1-8B-Instruct-Hybrid",
243
+ "Qwen2.5-7B-Instruct",
244
+ "Phi-3.5-mini-instruct",
245
+ "Meta-Llama-3-8B-Instruct"
246
+ ],
247
+ value="Qwen3-0.6B-GGUF",
248
+ label="Model",
249
+ info="Select the LLM model to use",
250
+ allow_custom_value=True
251
+ )
252
+
253
+ system_prompt = gr.Textbox(
254
+ label="System Prompt (Optional)",
255
+ value="You are a helpful assistant.",
256
+ lines=3,
257
+ info="Customize the model's behavior",
258
+ placeholder="Leave empty to use model defaults"
259
+ )
260
+
261
+ # How Thinking Works Accordion
262
+ with gr.Accordion("💡 How Thinking Works", open=False):
263
+ gr.Markdown("""
264
+ - Reasoning models output `reasoning_content` (thinking) and `content` (final answer) separately
265
+ - Thinking appears in a collapsible "🧠 Thought Process" section
266
+ - Duration of thinking is displayed automatically
267
+ - Works with models like: DeepSeek-R1, QwQ, and other reasoning models
268
+ """)
269
+
270
+ # Current Model Accordion
271
+ with gr.Accordion("📋 Current Model", open=False):
272
+ gr.Markdown("""
273
+ Make sure your model supports reasoning output for thinking to be displayed.
274
+ """)
275
+
276
+ # Example Prompts Accordion
277
+ with gr.Accordion("📝 Example Prompts", open=False):
278
+ gr.Markdown("""
279
+ - "Solve: If a train travels 120 km in 2 hours, what's its speed?"
280
+ - "Compare pros and cons of electric vs gas cars"
281
+ - "Explain step-by-step how to make coffee"
282
+ - "What's the difference between AI and ML?"
283
+ """)
284
+
285
+ # Add example interactions in sidebar
286
+ gr.Examples(
287
+ examples=[
288
+ "What is 15 + 24?",
289
+ "Write a short poem about AI",
290
+ "What is the capital of Japan?",
291
+ "Explain what machine learning is in simple terms"
292
+ ],
293
+ inputs=msg,
294
+ label="Quick Examples"
295
+ )
296
+
297
+ # Main chat area - full screen
298
+ chatbot = gr.Chatbot(
299
+ type="messages",
300
+ label="Chat",
301
+ height="calc(100vh - 200px)",
302
+ avatar_images=(
303
+ "https://em-content.zobj.net/source/twitter/376/bust-in-silhouette_1f464.png",
304
+ "https://em-content.zobj.net/source/twitter/376/robot_1f916.png"
305
+ ),
306
+ show_label=False,
307
+ #placeholder="C:\Users\Yuvi\dev\testing\placeholder.png"
308
+ placeholder= #"""
309
+ #<div style="display: flex; justify-content: center; align-items: center; height: 100%;">
310
+ # <img src="/gradio_api/file=C:\\Users\\Yuvi\\dev\\testing\\placeholder.png" style="opacity: 0.4; max-width: 80%; max-height: 80%; object-fit: contain;" alt="Placeholder">
311
+ #</div>
312
+ #"""
313
+ """<div>
314
+ <img src="/gradio_api/file=placeholder.png">
315
+ </div>"""
316
+ )
317
+
318
+ # Render the input textbox in main area
319
+ msg.render()
320
+
321
+ # Event handlers - only submit event
322
+ def submit_message(message, history, model, sys_prompt):
323
+ """Wrapper to handle message submission"""
324
+ if not message or message.strip() == "":
325
+ return history, ""
326
+ yield from stream_chat_response(message, history, model, sys_prompt)
327
+
328
+ msg.submit(
329
+ submit_message,
330
+ inputs=[msg, chatbot, model_dropdown, system_prompt],
331
+ outputs=chatbot
332
+ ).then(
333
+ lambda: "",
334
+ None,
335
+ msg
336
+ )
337
+
338
+ # Launch the app
339
+ if __name__ == "__main__":
340
+ demo.launch(allowed_paths=["."], ssr_mode=True)