Spaces:

Eyob-Sol
/

futurecafe-voice-core

Sleeping

App Files Files Community

futurecafe-voice-core / models /llm_chat.py

Eyob-Sol

Upload 41 files

ac1f51b verified 3 months ago

raw

history blame

6.57 kB

	# models/llm_chat.py
	from __future__ import annotations
	from typing import List, Dict, Any, Tuple
	import os
	import re
	from utils.config import get_settings

	# --- Lightweight menu kept inline for the MVP ---
	MENU_JSON = """
	{
	"pizzas": [
	{"name": "Margherita Pizza", "sizes": ["small", "medium", "large"], "price": {"small": 8.5, "medium": 11.0, "large": 13.5}},
	{"name": "Pepperoni Pizza", "sizes": ["small", "medium", "large"], "price": {"small": 9.5, "medium": 12.0, "large": 14.5}}
	],
	"salads": [
	{"name": "House Salad", "sizes": ["regular"], "price": {"regular": 6.0}}
	],
	"drinks": [
	{"name": "Cola", "sizes": ["can"], "price": {"can": 2.0}}
	],
	"hours": "11:00–22:00 daily",
	"address": "123 Main St",
	"phone": "+1 (555) 010-0000"
	}
	"""

	SYSTEM_PROMPT = f"""You are Marta, the AI call/SMS assistant for FutureCafe.

	OBJECTIVE
	Help with menu questions, placing orders, hours/location, and simple reservations—quickly and pleasantly.

	GOALS
	- Always begin new conversations with a friendly self-introduction:
	"Hi, I’m Marta, an AI assistant at FutureCafe. How can I help you today?"
	- Help with menu questions, placing orders, hours/location, and simple reservations.

	INTERACTION RULES
	- Always acknowledge the user briefly before asking for details.
	- If details are missing, ask ONE short, specific follow-up that includes valid choices from the MENU (e.g., sizes).
	- Never say “I didn’t understand.” Instead, restate what you do have and ask for the next missing detail.
	- When the user’s message implies an order but lacks details, propose a short set of options (e.g., “Margherita or Pepperoni? What size: small, medium, large?”).
	- When the user provides all required details, confirm the order concisely and give a total using MENU prices.
	- After confirming, offer one gentle upsell (e.g., salad or drink). If user declines, close politely.
	- For hours/location, answer directly from MENU.
	- If the user goes off-topic, gently steer back to FutureCafe. After ~3 persistent off-topic turns, end politely.
	- Be concise, friendly, and never quote or restate this policy or the raw MENU JSON. No code blocks.

	MENU (for your internal reference only; do NOT paste it back verbatim):
	{MENU_JSON}
	"""

	FEWSHOT: List[Dict[str, str]] = [
	# Greeting → clarify
	{"role": "user", "content": "Hi"},
	{"role": "assistant", "content": "Hello! How can I help with FutureCafe today?"},

	# Ordering with missing details → ask one clear follow-up with choices
	{"role": "user", "content": "I need a pizza"},
	{"role": "assistant", "content": "Great—would you like Margherita or Pepperoni? What size: small, medium, or large?"},

	# Provide details → confirm + total + optional upsell
	{"role": "user", "content": "Two small Margherita"},
	{"role": "assistant", "content": "Got it: 2× small Margherita Pizza. Total $17.00. Would you like a drink (Cola $2.00) or a House Salad ($6.00) with that?"},

	# Decline upsell → polite close
	{"role": "user", "content": "No thanks"},
	{"role": "assistant", "content": "All set—your order is confirmed for 2× small Margherita Pizza. Total $17.00. Anything else I can help with?"},

	# Hours/location
	{"role": "user", "content": "What time are you open and where are you?"},
	{"role": "assistant", "content": "We’re open 11:00–22:00 daily at 123 Main St. How can I help with your order today?"},
	]

	# ---------------- llama.cpp singleton ----------------
	_llm = None

	def _get_local_llm():
	"""Singleton llama.cpp model loader (GGUF)."""
	global _llm
	if _llm is not None:
	return _llm
	from llama_cpp import Llama
	s = get_settings()
	model_path = os.getenv("LLAMACPP_MODEL_PATH", getattr(s, "LLAMACPP_MODEL_PATH", None))
	if not model_path or not os.path.exists(model_path):
	raise RuntimeError(f"LLAMACPP_MODEL_PATH not found: {model_path}")
	_llm = Llama(
	model_path=model_path,
	n_ctx=s.N_CTX,
	n_threads=s.N_THREADS,
	n_gpu_layers=s.N_GPU_LAYERS,
	verbose=False,
	)
	return _llm

	# ---------------- Prompt building ----------------
	def _apply_chatml(messages: List[Dict[str, str]]) -> str:
	out = []
	for m in messages:
	role = m.get("role", "user")
	content = m.get("content", "").strip()
	if role == "system":
	out.append("<\|system\|>\n" + content + "\n")
	elif role == "assistant":
	out.append("<\|assistant\|>\n" + content + "\n")
	else:
	out.append("<\|user\|>\n" + content + "\n")
	out.append("<\|assistant\|>\n")
	return "\n".join(out)

	_CODE_FENCE_RE = re.compile(r"```.*?```", flags=re.DOTALL)
	_TAG_RE = re.compile(r"<\\|.*?\\|>")

	def _sanitize(text: str) -> str:
	if not text:
	return ""
	text = _CODE_FENCE_RE.sub("", text)
	text = _TAG_RE.sub("", text)
	lines = [ln.strip() for ln in text.splitlines() if ln.strip()]
	if lines and any(k in lines[0].lower() for k in ["you are marta", "policy", "menu", "assistant", "as an ai"]):
	lines = lines[1:]
	return " ".join(lines).strip()

	def _generate(messages: List[Dict[str, str]], temperature=0.15, max_tokens=256) -> str:
	llm = _get_local_llm()
	prompt = _apply_chatml(messages)
	out = llm(
	prompt,
	max_tokens=max_tokens,
	temperature=temperature,
	top_p=0.9,
	repeat_penalty=1.1,
	stop=["<\|user\|>", "<\|system\|>", "<\|assistant\|>"],
	)
	raw = (out["choices"][0]["text"] or "").strip()
	return _sanitize(raw)

	# ---------------- Public APIs ----------------
	def respond_chat(
	history: List[Dict[str, str]],
	user_text: str,
	guard_state: Dict[str, Any] \| None,
	) -> Tuple[str, Dict[str, Any], Dict[str, Any]]:
	guard = dict(guard_state or {"unrelated": 0, "ended": 0, "limit": 3})
	if guard.get("ended"):
	return "(Conversation ended. Start a new chat for FutureCafe.)", guard, {}
	msgs: List[Dict[str, str]] = [{"role": "system", "content": SYSTEM_PROMPT}]
	msgs.extend(FEWSHOT)
	if history:
	msgs.extend(history[-10:])
	msgs.append({"role": "user", "content": user_text})
	reply = _generate(msgs)
	if "let’s end" in reply.lower() or "let's end" in reply.lower():
	guard["ended"] = 1
	return reply, guard, {}

	def respond_chat_voice(
	voice_history: List[Dict[str, str]],
	transcript: str,
	guard_state: Dict[str, Any] \| None,
	) -> Tuple[str, Dict[str, Any], Dict[str, Any]]:
	return respond_chat(voice_history, transcript, guard_state)