# inference.py
from transformers import pipeline
from utils import build_conversation_prompt

MODEL_ID = "hmnshudhmn24/gpt2-personal-assistant"

def chat_once(model_id=MODEL_ID):
    generator = pipeline("text-generation", model=model_id, tokenizer=model_id, device=0 if __import__('torch').cuda.is_available() else -1)
    history = [
        "User: Hello!",
        "Assistant: Hi there! How can I help you today?"
    ]
    user_input = "Can you summarize the benefits of exercise?"
    prompt = build_conversation_prompt(history, user_input, system_prompt="You are a helpful assistant.")
    outputs = generator(prompt, max_length=300, num_return_sequences=1, do_sample=False, pad_token_id=50256)
    print(outputs[0]["generated_text"])

if __name__ == "__main__":
    chat_once()