import os import uuid from flask import Flask, render_template, request, jsonify from gradio_client import Client app = Flask(__name__) client = Client("tonyassi/llm-api", token=os.environ.get("HF_TOKEN")) API_NAME = "/chat" HISTORIES = {} def llm_response(session_id: str, user_input: str) -> str: user_input = (user_input or "").strip() if not user_input: return "Type something first" history = HISTORIES.get(session_id, []) history.append({"role": "user", "content": user_input}) try: assistant_text = client.predict( messages=history, api_name=API_NAME ) if isinstance(assistant_text, str) and "-***-" in assistant_text: assistant_text = assistant_text.split("-***-")[0].strip() history.append({"role": "assistant", "content": assistant_text}) HISTORIES[session_id] = history return assistant_text except Exception as e: # rollback last user message on failure if history and history[-1].get("role") == "user": history.pop() HISTORIES[session_id] = history print("LLM API error:", e) return "Something isn't working..." @app.route("/") def index(): # New session id on every page load => refresh starts over session_id = uuid.uuid4().hex HISTORIES.pop(session_id, None) return render_template("index.html", session_id=session_id) @app.route("/send_message", methods=["POST"]) def send_message(): user_input = request.form.get("user_input", "") session_id = request.form.get("session_id", "") if not session_id: return jsonify({"response": "Missing session_id", "user_input": user_input}), 400 response = llm_response(session_id, user_input) return jsonify({"response": response, "user_input": user_input}) @app.route("/reset", methods=["POST"]) def reset(): session_id = request.form.get("session_id", "") if session_id: HISTORIES.pop(session_id, None) return jsonify({"ok": True}) if __name__ == "__main__": app.run(host="0.0.0.0", port=7860)