Qwen2.5-1.5B-Instruct / gradio_demo.py
wli1995's picture
add run_api.sh
79d1704 verified
import time
import gradio as gr
import requests
import json
# Base URL of your API server; adjust host and port as needed
API_URL = "http://10.168.232.93:8000"
def reset_chat(system_prompt):
"""
Calls the /api/reset endpoint (POST) to initialize a new conversation.
If system_prompt is provided, include it in the request body.
Returns empty history and clears input. On error, shows error in chat.
"""
payload = {}
if system_prompt:
payload["system_prompt"] = system_prompt
try:
response = requests.post(f"{API_URL}/api/reset", json=payload)
response.raise_for_status()
except Exception as e:
# Return error in chat if reset fails
return [("Error resetting chat:", str(e))], ""
# On successful reset, clear chat history and input
return [], ""
def stream_generate(history, message, temperature, repetition_penalty, top_p, top_k):
"""
Sends the user message and sampling parameters to /api/generate.
Streams the response chunks and updates the last bot message in history.
Clears input after sending. On error, shows error in chat.
"""
history = history + [(message, "")]
yield history, ""
payload = {
"prompt": message,
"temperature": temperature,
"repetition_penalty": repetition_penalty,
"top-p": top_p,
"top-k": top_k
}
try:
response = requests.post(f"{API_URL}/api/generate", json=payload, timeout=(3.05, None))
response.raise_for_status()
except Exception as e:
history[-1] = (message, f"Error: {str(e)}")
yield history, ""
return
time.sleep(0.1)
while True:
time.sleep(0.01)
response = requests.get(
f"{API_URL}/api/generate_provider"
)
data = response.json()
chunk:str = data.get("response", "")
done = data.get("done", False)
if done:
break
if chunk.strip() == "":
continue
history[-1] = (message, history[-1][1] + chunk)
yield history, ""
print("end")
def stop_generate():
try:
requests.get(f"{API_URL}/api/stop")
except Exception as e:
print(e)
# Build the Gradio interface optimized for PC with spacious layout
# custom_css = """
# .gradio-container {
# max-width: 1400px;
# margin: auto;
# padding: 20px;
# }
# .gradio-container > * {
# margin-bottom: 20px;
# }
# #chatbox .overflow-y-auto {
# height: 600px !important;
# }
# """
# Build the Gradio interface优化布局
with gr.Blocks(theme=gr.themes.Soft(font="Consolas"), fill_width=True) as demo:
gr.Markdown("<h2 style='text-align:center;'>🚀 Chatbot Demo with Axare API Backend</h2>")
# 使用Row包裹左右两个主要区域
with gr.Row():
# 左侧聊天主区域(占3/4宽度)
with gr.Column(scale=3):
system_prompt = gr.Textbox(label="System Prompt", placeholder="Optional system prompt", lines=2, value="You are Qwen, created by Alibaba Cloud. You are a helpful assistant.")
reset_button = gr.Button("🔄 Reset Chat")
chatbot = gr.Chatbot(elem_id="chatbox", label="Axera Chat",height=500)
user_input = gr.Textbox(label="Your Message", placeholder="Type your message here...", lines=2)
with gr.Row():
send_button = gr.Button("➡️ Send", variant="primary")
stop_button = gr.Button("🛑 Stop", variant="stop")
# 右侧参数设置区域(占1/4宽度)
with gr.Column(scale=1):
temperature = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.7, label="Temperature")
repetition_penalty = gr.Slider(minimum=1.0, maximum=2.0, step=0.01, value=1.0, label="Repetition Penalty")
top_p = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.9, label="Top-p Sampling")
top_k = gr.Slider(minimum=0, maximum=100, step=1, value=40, label="Top-k Sampling")
# Wire up events: reset clears chat and input
reset_button.click(fn=reset_chat, inputs=system_prompt, outputs=[chatbot, user_input])
# send streams chat and clears input
send_button.click(
fn=stream_generate,
inputs=[chatbot, user_input, temperature, repetition_penalty, top_p, top_k],
outputs=[chatbot, user_input]
)
stop_button.click(
fn=stop_generate
)
# allow Enter key to send
user_input.submit(
fn=stream_generate,
inputs=[chatbot, user_input, temperature, repetition_penalty, top_p, top_k],
outputs=[chatbot, user_input]
)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860) # adjust as needed