Spaces:
Sleeping
Sleeping
| # models/llm_chat.py | |
| from __future__ import annotations | |
| from typing import List, Dict, Any, Tuple | |
| import os | |
| import re | |
| from utils.config import get_settings | |
| # --- Lightweight menu kept inline for the MVP --- | |
| MENU_JSON = """ | |
| { | |
| "pizzas": [ | |
| {"name": "Margherita Pizza", "sizes": ["small", "medium", "large"], "price": {"small": 8.5, "medium": 11.0, "large": 13.5}}, | |
| {"name": "Pepperoni Pizza", "sizes": ["small", "medium", "large"], "price": {"small": 9.5, "medium": 12.0, "large": 14.5}} | |
| ], | |
| "salads": [ | |
| {"name": "House Salad", "sizes": ["regular"], "price": {"regular": 6.0}} | |
| ], | |
| "drinks": [ | |
| {"name": "Cola", "sizes": ["can"], "price": {"can": 2.0}} | |
| ], | |
| "hours": "11:00–22:00 daily", | |
| "address": "123 Main St", | |
| "phone": "+1 (555) 010-0000" | |
| } | |
| """ | |
| SYSTEM_PROMPT = f"""You are Marta, the AI call/SMS assistant for FutureCafe. | |
| OBJECTIVE | |
| Help with menu questions, placing orders, hours/location, and simple reservations—quickly and pleasantly. | |
| GOALS | |
| - Always begin new conversations with a friendly self-introduction: | |
| "Hi, I’m Marta, an AI assistant at FutureCafe. How can I help you today?" | |
| - Help with menu questions, placing orders, hours/location, and simple reservations. | |
| INTERACTION RULES | |
| - Always acknowledge the user briefly before asking for details. | |
| - If details are missing, ask ONE short, specific follow-up that includes valid choices from the MENU (e.g., sizes). | |
| - Never say “I didn’t understand.” Instead, restate what you do have and ask for the next missing detail. | |
| - When the user’s message implies an order but lacks details, propose a short set of options (e.g., “Margherita or Pepperoni? What size: small, medium, large?”). | |
| - When the user provides all required details, confirm the order concisely and give a total using MENU prices. | |
| - After confirming, offer one gentle upsell (e.g., salad or drink). If user declines, close politely. | |
| - For hours/location, answer directly from MENU. | |
| - If the user goes off-topic, gently steer back to FutureCafe. After ~3 persistent off-topic turns, end politely. | |
| - Be concise, friendly, and never quote or restate this policy or the raw MENU JSON. No code blocks. | |
| MENU (for your internal reference only; do NOT paste it back verbatim): | |
| {MENU_JSON} | |
| """ | |
| FEWSHOT: List[Dict[str, str]] = [ | |
| # Greeting → clarify | |
| {"role": "user", "content": "Hi"}, | |
| {"role": "assistant", "content": "Hello! How can I help with FutureCafe today?"}, | |
| # Ordering with missing details → ask one clear follow-up with choices | |
| {"role": "user", "content": "I need a pizza"}, | |
| {"role": "assistant", "content": "Great—would you like Margherita or Pepperoni? What size: small, medium, or large?"}, | |
| # Provide details → confirm + total + optional upsell | |
| {"role": "user", "content": "Two small Margherita"}, | |
| {"role": "assistant", "content": "Got it: 2× small Margherita Pizza. Total $17.00. Would you like a drink (Cola $2.00) or a House Salad ($6.00) with that?"}, | |
| # Decline upsell → polite close | |
| {"role": "user", "content": "No thanks"}, | |
| {"role": "assistant", "content": "All set—your order is confirmed for 2× small Margherita Pizza. Total $17.00. Anything else I can help with?"}, | |
| # Hours/location | |
| {"role": "user", "content": "What time are you open and where are you?"}, | |
| {"role": "assistant", "content": "We’re open 11:00–22:00 daily at 123 Main St. How can I help with your order today?"}, | |
| ] | |
| # ---------------- llama.cpp singleton ---------------- | |
| _llm = None | |
| def _get_local_llm(): | |
| """Singleton llama.cpp model loader (GGUF).""" | |
| global _llm | |
| if _llm is not None: | |
| return _llm | |
| from llama_cpp import Llama | |
| s = get_settings() | |
| model_path = os.getenv("LLAMACPP_MODEL_PATH", getattr(s, "LLAMACPP_MODEL_PATH", None)) | |
| if not model_path or not os.path.exists(model_path): | |
| raise RuntimeError(f"LLAMACPP_MODEL_PATH not found: {model_path}") | |
| _llm = Llama( | |
| model_path=model_path, | |
| n_ctx=s.N_CTX, | |
| n_threads=s.N_THREADS, | |
| n_gpu_layers=s.N_GPU_LAYERS, | |
| verbose=False, | |
| ) | |
| return _llm | |
| # ---------------- Prompt building ---------------- | |
| def _apply_chatml(messages: List[Dict[str, str]]) -> str: | |
| out = [] | |
| for m in messages: | |
| role = m.get("role", "user") | |
| content = m.get("content", "").strip() | |
| if role == "system": | |
| out.append("<|system|>\n" + content + "\n") | |
| elif role == "assistant": | |
| out.append("<|assistant|>\n" + content + "\n") | |
| else: | |
| out.append("<|user|>\n" + content + "\n") | |
| out.append("<|assistant|>\n") | |
| return "\n".join(out) | |
| _CODE_FENCE_RE = re.compile(r"```.*?```", flags=re.DOTALL) | |
| _TAG_RE = re.compile(r"<\|.*?\|>") | |
| def _sanitize(text: str) -> str: | |
| if not text: | |
| return "" | |
| text = _CODE_FENCE_RE.sub("", text) | |
| text = _TAG_RE.sub("", text) | |
| lines = [ln.strip() for ln in text.splitlines() if ln.strip()] | |
| if lines and any(k in lines[0].lower() for k in ["you are marta", "policy", "menu", "assistant", "as an ai"]): | |
| lines = lines[1:] | |
| return " ".join(lines).strip() | |
| def _generate(messages: List[Dict[str, str]], temperature=0.15, max_tokens=256) -> str: | |
| llm = _get_local_llm() | |
| prompt = _apply_chatml(messages) | |
| out = llm( | |
| prompt, | |
| max_tokens=max_tokens, | |
| temperature=temperature, | |
| top_p=0.9, | |
| repeat_penalty=1.1, | |
| stop=["<|user|>", "<|system|>", "<|assistant|>"], | |
| ) | |
| raw = (out["choices"][0]["text"] or "").strip() | |
| return _sanitize(raw) | |
| # ---------------- Public APIs ---------------- | |
| def respond_chat( | |
| history: List[Dict[str, str]], | |
| user_text: str, | |
| guard_state: Dict[str, Any] | None, | |
| ) -> Tuple[str, Dict[str, Any], Dict[str, Any]]: | |
| guard = dict(guard_state or {"unrelated": 0, "ended": 0, "limit": 3}) | |
| if guard.get("ended"): | |
| return "(Conversation ended. Start a new chat for FutureCafe.)", guard, {} | |
| msgs: List[Dict[str, str]] = [{"role": "system", "content": SYSTEM_PROMPT}] | |
| msgs.extend(FEWSHOT) | |
| if history: | |
| msgs.extend(history[-10:]) | |
| msgs.append({"role": "user", "content": user_text}) | |
| reply = _generate(msgs) | |
| if "let’s end" in reply.lower() or "let's end" in reply.lower(): | |
| guard["ended"] = 1 | |
| return reply, guard, {} | |
| def respond_chat_voice( | |
| voice_history: List[Dict[str, str]], | |
| transcript: str, | |
| guard_state: Dict[str, Any] | None, | |
| ) -> Tuple[str, Dict[str, Any], Dict[str, Any]]: | |
| return respond_chat(voice_history, transcript, guard_state) |