# inference.py from transformers import pipeline from utils import build_conversation_prompt MODEL_ID = "hmnshudhmn24/gpt2-personal-assistant" def chat_once(model_id=MODEL_ID): generator = pipeline("text-generation", model=model_id, tokenizer=model_id, device=0 if __import__('torch').cuda.is_available() else -1) history = [ "User: Hello!", "Assistant: Hi there! How can I help you today?" ] user_input = "Can you summarize the benefits of exercise?" prompt = build_conversation_prompt(history, user_input, system_prompt="You are a helpful assistant.") outputs = generator(prompt, max_length=300, num_return_sequences=1, do_sample=False, pad_token_id=50256) print(outputs[0]["generated_text"]) if __name__ == "__main__": chat_once()