| from ctransformers import AutoModelForCausalLM | |
| import gradio as gr | |
| def generate_prompt(history): | |
| prompt = " " | |
| for chain in history[-2:-1]: | |
| prompt += f"<human>: {chain[0]}\n<bot>: {chain[1]}{end_token}\n" | |
| prompt += f"<human>: {history[-1][0]}\n<bot>:" | |
| return prompt | |
| def generate(history): | |
| prompt = generate_prompt(history) | |
| streamer = llm(prompt, stream=True, temperature=0, repetition_penalty=1.2) | |
| return streamer | |
| llm = AutoModelForCausalLM.from_pretrained("theodotus/llama-uk", model_file="model.bin", model_type='llama') | |
| end_token = "</s>" | |
| with gr.Blocks() as demo: | |
| chatbot = gr.Chatbot() | |
| msg = gr.Textbox() | |
| clear = gr.Button("Clear") | |
| def user(user_message, history): | |
| return "", history + [[user_message, ""]] | |
| def bot(history): | |
| streamer = generate(history) | |
| for token in streamer: | |
| history[-1][1] += token | |
| yield history | |
| msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then( | |
| bot, chatbot, chatbot | |
| ) | |
| clear.click(lambda: None, None, chatbot, queue=False) | |
| demo.queue() | |
| if __name__ == "__main__": | |
| demo.launch() |