# models/llm_chat.py from __future__ import annotations from typing import List, Dict, Any, Tuple import os import re from utils.config import get_settings # --- Lightweight menu kept inline for the MVP --- MENU_JSON = """ { "pizzas": [ {"name": "Margherita Pizza", "sizes": ["small", "medium", "large"], "price": {"small": 8.5, "medium": 11.0, "large": 13.5}}, {"name": "Pepperoni Pizza", "sizes": ["small", "medium", "large"], "price": {"small": 9.5, "medium": 12.0, "large": 14.5}} ], "salads": [ {"name": "House Salad", "sizes": ["regular"], "price": {"regular": 6.0}} ], "drinks": [ {"name": "Cola", "sizes": ["can"], "price": {"can": 2.0}} ], "hours": "11:00–22:00 daily", "address": "123 Main St", "phone": "+1 (555) 010-0000" } """ SYSTEM_PROMPT = f"""You are Marta, the AI call/SMS assistant for FutureCafe. OBJECTIVE Help with menu questions, placing orders, hours/location, and simple reservations—quickly and pleasantly. GOALS - Always begin new conversations with a friendly self-introduction: "Hi, I’m Marta, an AI assistant at FutureCafe. How can I help you today?" - Help with menu questions, placing orders, hours/location, and simple reservations. INTERACTION RULES - Always acknowledge the user briefly before asking for details. - If details are missing, ask ONE short, specific follow-up that includes valid choices from the MENU (e.g., sizes). - Never say “I didn’t understand.” Instead, restate what you do have and ask for the next missing detail. - When the user’s message implies an order but lacks details, propose a short set of options (e.g., “Margherita or Pepperoni? What size: small, medium, large?”). - When the user provides all required details, confirm the order concisely and give a total using MENU prices. - After confirming, offer one gentle upsell (e.g., salad or drink). If user declines, close politely. - For hours/location, answer directly from MENU. - If the user goes off-topic, gently steer back to FutureCafe. After ~3 persistent off-topic turns, end politely. - Be concise, friendly, and never quote or restate this policy or the raw MENU JSON. No code blocks. MENU (for your internal reference only; do NOT paste it back verbatim): {MENU_JSON} """ FEWSHOT: List[Dict[str, str]] = [ # Greeting → clarify {"role": "user", "content": "Hi"}, {"role": "assistant", "content": "Hello! How can I help with FutureCafe today?"}, # Ordering with missing details → ask one clear follow-up with choices {"role": "user", "content": "I need a pizza"}, {"role": "assistant", "content": "Great—would you like Margherita or Pepperoni? What size: small, medium, or large?"}, # Provide details → confirm + total + optional upsell {"role": "user", "content": "Two small Margherita"}, {"role": "assistant", "content": "Got it: 2× small Margherita Pizza. Total $17.00. Would you like a drink (Cola $2.00) or a House Salad ($6.00) with that?"}, # Decline upsell → polite close {"role": "user", "content": "No thanks"}, {"role": "assistant", "content": "All set—your order is confirmed for 2× small Margherita Pizza. Total $17.00. Anything else I can help with?"}, # Hours/location {"role": "user", "content": "What time are you open and where are you?"}, {"role": "assistant", "content": "We’re open 11:00–22:00 daily at 123 Main St. How can I help with your order today?"}, ] # ---------------- llama.cpp singleton ---------------- _llm = None def _get_local_llm(): """Singleton llama.cpp model loader (GGUF).""" global _llm if _llm is not None: return _llm from llama_cpp import Llama s = get_settings() model_path = os.getenv("LLAMACPP_MODEL_PATH", getattr(s, "LLAMACPP_MODEL_PATH", None)) if not model_path or not os.path.exists(model_path): raise RuntimeError(f"LLAMACPP_MODEL_PATH not found: {model_path}") _llm = Llama( model_path=model_path, n_ctx=s.N_CTX, n_threads=s.N_THREADS, n_gpu_layers=s.N_GPU_LAYERS, verbose=False, ) return _llm # ---------------- Prompt building ---------------- def _apply_chatml(messages: List[Dict[str, str]]) -> str: out = [] for m in messages: role = m.get("role", "user") content = m.get("content", "").strip() if role == "system": out.append("<|system|>\n" + content + "\n") elif role == "assistant": out.append("<|assistant|>\n" + content + "\n") else: out.append("<|user|>\n" + content + "\n") out.append("<|assistant|>\n") return "\n".join(out) _CODE_FENCE_RE = re.compile(r"```.*?```", flags=re.DOTALL) _TAG_RE = re.compile(r"<\|.*?\|>") def _sanitize(text: str) -> str: if not text: return "" text = _CODE_FENCE_RE.sub("", text) text = _TAG_RE.sub("", text) lines = [ln.strip() for ln in text.splitlines() if ln.strip()] if lines and any(k in lines[0].lower() for k in ["you are marta", "policy", "menu", "assistant", "as an ai"]): lines = lines[1:] return " ".join(lines).strip() def _generate(messages: List[Dict[str, str]], temperature=0.15, max_tokens=256) -> str: llm = _get_local_llm() prompt = _apply_chatml(messages) out = llm( prompt, max_tokens=max_tokens, temperature=temperature, top_p=0.9, repeat_penalty=1.1, stop=["<|user|>", "<|system|>", "<|assistant|>"], ) raw = (out["choices"][0]["text"] or "").strip() return _sanitize(raw) # ---------------- Public APIs ---------------- def respond_chat( history: List[Dict[str, str]], user_text: str, guard_state: Dict[str, Any] | None, ) -> Tuple[str, Dict[str, Any], Dict[str, Any]]: guard = dict(guard_state or {"unrelated": 0, "ended": 0, "limit": 3}) if guard.get("ended"): return "(Conversation ended. Start a new chat for FutureCafe.)", guard, {} msgs: List[Dict[str, str]] = [{"role": "system", "content": SYSTEM_PROMPT}] msgs.extend(FEWSHOT) if history: msgs.extend(history[-10:]) msgs.append({"role": "user", "content": user_text}) reply = _generate(msgs) if "let’s end" in reply.lower() or "let's end" in reply.lower(): guard["ended"] = 1 return reply, guard, {} def respond_chat_voice( voice_history: List[Dict[str, str]], transcript: str, guard_state: Dict[str, Any] | None, ) -> Tuple[str, Dict[str, Any], Dict[str, Any]]: return respond_chat(voice_history, transcript, guard_state)