# models/llm_chat.py
from __future__ import annotations
from typing import List, Dict, Any, Tuple
import os
import re
from utils.config import get_settings

# --- Lightweight menu kept inline for the MVP ---
MENU_JSON = """
{
  "pizzas": [
    {"name": "Margherita Pizza", "sizes": ["small", "medium", "large"], "price": {"small": 8.5, "medium": 11.0, "large": 13.5}},
    {"name": "Pepperoni Pizza", "sizes": ["small", "medium", "large"], "price": {"small": 9.5, "medium": 12.0, "large": 14.5}}
  ],
  "salads": [
    {"name": "House Salad", "sizes": ["regular"], "price": {"regular": 6.0}}
  ],
  "drinks": [
    {"name": "Cola", "sizes": ["can"], "price": {"can": 2.0}}
  ],
  "hours": "11:00–22:00 daily",
  "address": "123 Main St",
  "phone": "+1 (555) 010-0000"
}
"""

SYSTEM_PROMPT = f"""You are Marta, the AI call/SMS assistant for FutureCafe.

OBJECTIVE
Help with menu questions, placing orders, hours/location, and simple reservations—quickly and pleasantly.

GOALS
- Always begin new conversations with a friendly self-introduction:
  "Hi, I’m Marta, an AI assistant at FutureCafe. How can I help you today?"
- Help with menu questions, placing orders, hours/location, and simple reservations.

INTERACTION RULES
- Always acknowledge the user briefly before asking for details.
- If details are missing, ask ONE short, specific follow-up that includes valid choices from the MENU (e.g., sizes).
- Never say “I didn’t understand.” Instead, restate what you do have and ask for the next missing detail.
- When the user’s message implies an order but lacks details, propose a short set of options (e.g., “Margherita or Pepperoni? What size: small, medium, large?”).
- When the user provides all required details, confirm the order concisely and give a total using MENU prices.
- After confirming, offer one gentle upsell (e.g., salad or drink). If user declines, close politely.
- For hours/location, answer directly from MENU.
- If the user goes off-topic, gently steer back to FutureCafe. After ~3 persistent off-topic turns, end politely.
- Be concise, friendly, and never quote or restate this policy or the raw MENU JSON. No code blocks.

MENU (for your internal reference only; do NOT paste it back verbatim):
{MENU_JSON}
"""

FEWSHOT: List[Dict[str, str]] = [
    # Greeting → clarify
    {"role": "user", "content": "Hi"},
    {"role": "assistant", "content": "Hello! How can I help with FutureCafe today?"},

    # Ordering with missing details → ask one clear follow-up with choices
    {"role": "user", "content": "I need a pizza"},
    {"role": "assistant", "content": "Great—would you like Margherita or Pepperoni? What size: small, medium, or large?"},

    # Provide details → confirm + total + optional upsell
    {"role": "user", "content": "Two small Margherita"},
    {"role": "assistant", "content": "Got it: 2× small Margherita Pizza. Total $17.00. Would you like a drink (Cola $2.00) or a House Salad ($6.00) with that?"},

    # Decline upsell → polite close
    {"role": "user", "content": "No thanks"},
    {"role": "assistant", "content": "All set—your order is confirmed for 2× small Margherita Pizza. Total $17.00. Anything else I can help with?"},

    # Hours/location
    {"role": "user", "content": "What time are you open and where are you?"},
    {"role": "assistant", "content": "We’re open 11:00–22:00 daily at 123 Main St. How can I help with your order today?"},
]

# ---------------- llama.cpp singleton ----------------
_llm = None

def _get_local_llm():
    """Singleton llama.cpp model loader (GGUF)."""
    global _llm
    if _llm is not None:
        return _llm
    from llama_cpp import Llama
    s = get_settings()
    model_path = os.getenv("LLAMACPP_MODEL_PATH", getattr(s, "LLAMACPP_MODEL_PATH", None))
    if not model_path or not os.path.exists(model_path):
        raise RuntimeError(f"LLAMACPP_MODEL_PATH not found: {model_path}")
    _llm = Llama(
        model_path=model_path,
        n_ctx=s.N_CTX,
        n_threads=s.N_THREADS,
        n_gpu_layers=s.N_GPU_LAYERS,
        verbose=False,
    )
    return _llm

# ---------------- Prompt building ----------------
def _apply_chatml(messages: List[Dict[str, str]]) -> str:
    out = []
    for m in messages:
        role = m.get("role", "user")
        content = m.get("content", "").strip()
        if role == "system":
            out.append("<|system|>\n" + content + "\n")
        elif role == "assistant":
            out.append("<|assistant|>\n" + content + "\n")
        else:
            out.append("<|user|>\n" + content + "\n")
    out.append("<|assistant|>\n")
    return "\n".join(out)

_CODE_FENCE_RE = re.compile(r"```.*?```", flags=re.DOTALL)
_TAG_RE = re.compile(r"<\|.*?\|>")

def _sanitize(text: str) -> str:
    if not text:
        return ""
    text = _CODE_FENCE_RE.sub("", text)
    text = _TAG_RE.sub("", text)
    lines = [ln.strip() for ln in text.splitlines() if ln.strip()]
    if lines and any(k in lines[0].lower() for k in ["you are marta", "policy", "menu", "assistant", "as an ai"]):
        lines = lines[1:]
    return " ".join(lines).strip()

def _generate(messages: List[Dict[str, str]], temperature=0.15, max_tokens=256) -> str:
    llm = _get_local_llm()
    prompt = _apply_chatml(messages)
    out = llm(
        prompt,
        max_tokens=max_tokens,
        temperature=temperature,
        top_p=0.9,
        repeat_penalty=1.1,
        stop=["<|user|>", "<|system|>", "<|assistant|>"],
    )
    raw = (out["choices"][0]["text"] or "").strip()
    return _sanitize(raw)

# ---------------- Public APIs ----------------
def respond_chat(
    history: List[Dict[str, str]],
    user_text: str,
    guard_state: Dict[str, Any] | None,
) -> Tuple[str, Dict[str, Any], Dict[str, Any]]:
    guard = dict(guard_state or {"unrelated": 0, "ended": 0, "limit": 3})
    if guard.get("ended"):
        return "(Conversation ended. Start a new chat for FutureCafe.)", guard, {}
    msgs: List[Dict[str, str]] = [{"role": "system", "content": SYSTEM_PROMPT}]
    msgs.extend(FEWSHOT)
    if history:
        msgs.extend(history[-10:])
    msgs.append({"role": "user", "content": user_text})
    reply = _generate(msgs)
    if "let’s end" in reply.lower() or "let's end" in reply.lower():
        guard["ended"] = 1
    return reply, guard, {}

def respond_chat_voice(
    voice_history: List[Dict[str, str]],
    transcript: str,
    guard_state: Dict[str, Any] | None,
) -> Tuple[str, Dict[str, Any], Dict[str, Any]]:
    return respond_chat(voice_history, transcript, guard_state)