Spaces:
Build error
Build error
| import gradio as gr | |
| import spaces | |
| import llama_cpp | |
| import llama_cpp.llama_tokenizer | |
| import gradio as gr | |
| llama = llama_cpp.Llama.from_pretrained( | |
| repo_id="Qwen/Qwen1.5-0.5B-Chat-GGUF", | |
| filename="*q8_0.gguf", | |
| tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B"), | |
| verbose=False | |
| ) | |
| model = "gpt-3.5-turbo" | |
| def predict(message, history): | |
| messages = [] | |
| for user_message, assistant_message in history: | |
| messages.append({"role": "user", "content": user_message}) | |
| messages.append({"role": "assistant", "content": assistant_message}) | |
| messages.append({"role": "user", "content": message}) | |
| response = llama.create_chat_completion_openai_v1( | |
| model=model, | |
| messages=messages, | |
| stream=True | |
| ) | |
| text = "" | |
| for chunk in response: | |
| content = chunk.choices[0].delta.content | |
| if content: | |
| text += content | |
| yield text | |
| chat_interface = gr.ChatInterface( | |
| fn=chat_function, | |
| chatbot=gr.Chatbot(height=400), | |
| textbox=gr.Textbox(placeholder="Enter message here", container=False, scale=7), | |
| title="Chat with AI Model", | |
| description=""" | |
| Custom description based on the new GGUF model capabilities and features. | |
| """, | |
| theme="soft", | |
| additional_inputs=[ | |
| gr.Textbox(value="Hello!", label="System Prompt", placeholder="Enter a system prompt"), | |
| gr.Slider(minimum=50, maximum=1000, step=50, value=150, label="Max New Tokens"), | |
| gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.7, label="Temperature") | |
| ], | |
| allow_flagging="never" | |
| ) | |
| chat_interface.launch() | |