# gradio_edu_app_fixed.py
"""
Educational Text Tutor – Gradio App (Patched)
Fixes:
- Properly updates CheckboxGroup choices using gr.update(...)
- Dataframes use type="array" to ensure list-of-lists I/O
- Robust _apply_edits() to handle empty/short rows and parse errors
- Safer student answer table parsing
Enhancements:
- Personalized Study Summary per student on Analysis & Homework tab
- Profile-aware student simulation with targeted accuracy by subtopic category
Run:
  pip install gradio openai
  python gradio_edu_app_fixed.py
"""

import json
import uuid
import re
import random
from typing import List, Dict, Any, Tuple
import gradio as gr

# --- Utility: OpenAI call helper ------------------------------------------------

def _call_openai_chat(
    api_key: str,
    model: str,
    messages: List[Dict[str, str]],
    temperature: float = 0.2,
    max_tokens: int = 2000,
) -> str:
    try:
        from openai import OpenAI
        client = OpenAI(api_key=api_key)
        try:
            resp = client.chat.completions.create(
                model=model,
                messages=messages,
                temperature=temperature,
                max_tokens=max_tokens,
            )
            return resp.choices[0].message.content
        except Exception:
            # Fallback to Responses API
            joined = "\n".join([f"{m['role'].upper()}: {m['content']}" for m in messages])
            resp = client.responses.create(
                model=model,
                input=joined,
                temperature=temperature,
                max_output_tokens=max_tokens,
            )
            if hasattr(resp, "output_text"):
                return resp.output_text
            try:
                return resp.choices[0].message.content  # type: ignore[attr-defined]
            except Exception:
                return str(resp)
    except ImportError:
        import openai  # type: ignore
        openai.api_key = api_key
        resp = openai.ChatCompletion.create(  # type: ignore
            model=model,
            messages=messages,
            temperature=temperature,
            max_tokens=max_tokens,
        )
        return resp["choices"][0]["message"]["content"]


# --- Prompt templates (ALL literal braces escaped) ------------------------------

SUBTOPIC_PROMPT = """You are a curriculum designer.
Extract at least {min_subtopics} clear, non-overlapping subtopics from the EDUCATIONAL TEXT below.
Each subtopic should be concise (3–8 words) and collectively cover the main ideas.

Return ONLY valid JSON of the form:
{{
  "subtopics": ["...", "...", "..."]
}}

EDUCATIONAL TEXT:
---
{source_text}
---
"""

QUESTION_PROMPT = """You are an assessment designer.
Create {n_per_subtopic} {qtype_desc} questions for EACH subtopic provided.
Vary difficulty around {difficulty} difficulty. Keep questions unambiguous and self-contained.

If question_type == "MCQ": provide *exactly four* options ("A","B","C","D") and the correct_key as one of "A"/"B"/"C"/"D".
If question_type == "Short Answer": provide a model_answer that is 1–3 sentences.

Return ONLY valid JSON in the following schema:
{{
  "items": [
    {{
      "subtopic": "String",
      "question_type": "{qtype}",
      "question": "String",
      "options": {{"A": "String", "B": "String", "C": "String", "D": "String"}} OR null,
      "correct_key": "A|B|C|D" OR null,
      "model_answer": "String" OR null
    }},
    ...
  ]
}}

SUBTOPICS (the generator must cover these and label each item with the matching subtopic):
{selected_subtopics}
"""

# policy-aware simulation prompt (subtopic-aware)
SIMULATE_STUDENT_PROMPT = """You will roleplay as a student with this profile:
---
{student_profile}
---

**Policy (you MUST follow):**
{policy_json}

Guidelines:
- Use the **subtopic** of each question to decide where to excel vs. struggle.
- Hit the target accuracy ranges by category (strong/weak/neutral). If needed, deliberately pick a plausible but wrong choice. Never admit you’re doing this.
- MCQ: answer ONLY the option key (A/B/C/D). Short Answer: 1–3 sentences; on weak areas, it’s ok to be vague, omit a key detail, or make a misconception.

Return ONLY valid JSON:
{{
  "answers": [
    {{"id": "QUESTION_ID", "answer": "String"}},
    ...
  ]
}}

QUESTIONS (with IDs & subtopics):
{questions_json}
"""

GRADING_PROMPT = """You are a strict teacher using a clear rubric.
Grade each student answer against the provided key/model answer.
For MCQ: mark correct if the chosen key matches the correct_key.
For Short Answer: mark correct if the essential facts match (allow paraphrase), else incorrect.
Give a one-sentence rationale.

Return ONLY valid JSON with this schema:
{{
  "results": [
    {{
      "id": "QUESTION_ID",
      "subtopic": "String",
      "is_correct": true/false,
      "score": 1 or 0,
      "rationale": "String"
    }},
    ...
  ],
  "by_subtopic": [
    {{
      "subtopic": "String",
      "total": N,
      "correct": M,
      "accuracy": 0.0_to_1.0
    }},
    ...
  ]
}}

QUESTIONS (with answers):
{questions_and_keys_json}

STUDENT ANSWERS:
{student_answers_json}
"""

PRESCRIPTION_PROMPT = """You are an expert tutor.
Based on the per-subtopic performance for two students, write:
1) A concise progress recap for each student (3–5 sentences).
2) A prioritized list of weak subtopics for each student (up to 5).
3) For each weak subtopic and each student, suggest a mini-homework plan: 3 concrete practice tasks (in increasing difficulty).

Return ONLY valid JSON:
{{
  "student_1": {{
    "recap": "String",
    "weak_subtopics": ["..."],
    "homework": [{{"subtopic":"String","tasks":["...","...","..."]}}]
  }},
  "student_2": {{
    "recap": "String",
    "weak_subtopics": ["..."],
    "homework": [{{"subtopic":"String","tasks":["...","...","..."]}}]
  }}
}}

PERFORMANCE SUMMARY (Student 1):
{perf_1_json}

PERFORMANCE SUMMARY (Student 2):
{perf_2_json}
"""

# Personalized study summary prompt
STUDY_SUMMARY_PROMPT = """You are a learning coach. Using the performance summary and the proposed homework for ONE student, write a short **personalized home-study summary** they can follow on their own.

Include, in order:
- **Strengths:** 2–3 quick bullets.
- **Weak spots:** 2–3 bullets naming subtopics (lowest accuracy first).
- **3 study goals** (clear, measurable).
- **7-day micro-plan:** Day 1 → Day 7 bullets (one action each).
- **Motivation tip** (1 sentence).

Constraints:
- Keep it concise: 120–180 words total.
- Use simple language and Markdown bullets.
- Do not mention accuracy numbers; just reflect them implicitly.

PERFORMANCE:
{perf_json}

HOMEWORK (may be empty):
{hw_json}
"""

# --- Core logic -----------------------------------------------------------------

def extract_subtopics(api_key: str, model: str, text: str, min_subtopics: int) -> List[str]:
    if not api_key or not model:
        raise gr.Error("Please enter your API key and select a model on the Setup tab.")
    if not text.strip():
        raise gr.Error("Please paste the educational text.")
    msg = [
        {"role": "system", "content": "You produce strictly valid JSON."},
        {"role": "user", "content": SUBTOPIC_PROMPT.format(min_subtopics=min_subtopics, source_text=text.strip())},
    ]
    raw = _call_openai_chat(api_key, model, msg, temperature=0.1)
    try:
        data = json.loads(raw)
        subs = data.get("subtopics", [])
        subs = [s.strip() for s in subs if isinstance(s, str) and s.strip()]
        if len(subs) < min_subtopics:
            extra_needed = min_subtopics - len(subs)
            subs += [f"Additional Subtopic {i+1}" for i in range(extra_needed)]
        seen, uniq = set(), []
        for s in subs:
            key = s.lower()
            if key not in seen:
                uniq.append(s)
                seen.add(key)
        return uniq
    except Exception:
        lines = [ln.strip("-• \t") for ln in raw.splitlines() if ln.strip()]
        return lines[:max(min_subtopics, len(lines))]

def generate_questions(
    api_key: str,
    model: str,
    selected_subtopics: List[str],
    qtype: str,
    n_per_subtopic: int,
    difficulty: str
) -> List[Dict[str, Any]]:
    if not selected_subtopics:
        raise gr.Error("Please select at least one subtopic in the Subtopics tab.")
    qtype_desc = "multiple-choice (MCQ with 4 options)" if qtype == "MCQ" else "short-answer"
    prompt = QUESTION_PROMPT.format(
        n_per_subtopic=n_per_subtopic,
        qtype_desc=qtype_desc,
        difficulty=difficulty,
        qtype=qtype,
        selected_subtopics=json.dumps(selected_subtopics, ensure_ascii=False, indent=2),
    )
    msg = [
        {"role": "system", "content": "You produce strictly valid JSON and follow the schema exactly."},
        {"role": "user", "content": prompt},
    ]
    raw = _call_openai_chat(api_key, model, msg, temperature=0.7, max_tokens=2800)
    try:
        data = json.loads(raw)
        items = data.get("items", [])
    except Exception:
        raise gr.Error("The model did not return valid JSON for questions. Try again or reduce counts.")

    questions: List[Dict[str, Any]] = []
    for it in items:
        qid = str(uuid.uuid4())
        subtopic = (it.get("subtopic") or "").strip()
        question_type = it.get("question_type") or qtype
        question = (it.get("question") or "").strip()
        options = it.get("options") or None
        correct_key = it.get("correct_key") or None
        model_answer = it.get("model_answer") or None
        if question_type == "MCQ":
            if not (isinstance(options, dict) and correct_key in {"A", "B", "C", "D"}):
                continue
        else:
            if not model_answer:
                continue
        questions.append({
            "id": qid,
            "subtopic": subtopic,
            "question_type": question_type,
            "question": question,
            "options": options,
            "correct_key": correct_key,
            "model_answer": model_answer,
        })
    return questions


# --- Policy helpers to force visible divergence between students ----------------

def _derive_policy(student_profile: str) -> Dict[str, Any]:
    """Infer strong/weak areas and target accuracies from a free-form profile."""
    p = student_profile.lower()
    strong_terms, weak_terms = set(), set()

    # Heuristics from profile
    if re.search(r"strong in (definitions?|theor(?:y|ies)|concepts?)", p):
        strong_terms |= {"definition", "definitions", "theory", "theories", "concept", "concepts", "term", "terms"}
    if re.search(r"weak(?:er)? in (definitions?|theor(?:y|ies)|concepts?)", p):
        weak_terms   |= {"definition", "definitions", "theory", "theories", "concept", "concepts", "term", "terms"}

    if re.search(r"strong in (applications?|problem ?solving|calculations?)", p):
        strong_terms |= {"application", "applications", "problem", "problems", "problem solving", "case", "cases", "calculation", "calculations", "practice"}
    if re.search(r"weak(?:er)? in (applications?|problem ?solving|calculations?)", p):
        weak_terms   |= {"application", "applications", "problem", "problems", "problem solving", "case", "cases", "calculation", "calculations", "practice"}

    # Generic defaults if not mentioned
    if not strong_terms and "theor" in p:
        strong_terms |= {"definition","concept","theory","term"}
    if not weak_terms and "careless" in p:
        weak_terms |= {"definition","term"}  # careless → slips on definitional precision

    # Accuracy targets
    overall = 0.65  # baseline realism
    if "anxious" in p:   overall -= 0.05
    if "confident" in p: overall += 0.05

    weak_acc    = 0.45
    strong_acc  = 0.85
    neutral_acc = overall

    careless_rate = 0.15 if "careless" in p else 0.05
    variance = 0.05  # small randomness

    return {
        "strong_terms": sorted(strong_terms),
        "weak_terms": sorted(weak_terms),
        "target_acc": {
            "strong": strong_acc,
            "weak": weak_acc,
            "neutral": neutral_acc
        },
        "overall_target": overall,
        "careless_rate": careless_rate,
        "variance": variance
    }

def _classify_subtopic(name: str, policy: Dict[str, Any]) -> str:
    s = (name or "").lower()
    strong_hits = any(t in s for t in policy["strong_terms"])
    weak_hits   = any(t in s for t in policy["weak_terms"])
    if weak_hits and not strong_hits:
        return "weak"
    if strong_hits and not weak_hits:
        return "strong"
    return "neutral"

def _wrong_option_letter(correct_key: str) -> str:
    pool = ["A","B","C","D"]
    pool = [x for x in pool if x != (correct_key or "").upper()]
    return random.choice(pool) if pool else "A"

def _enforce_profile_variation(
    questions: List[Dict[str, Any]],
    answers: List[Dict[str, Any]],
    policy: Dict[str, Any]
) -> List[Dict[str, Any]]:
    """Post-process MCQ answers to meet target wrong-rate per category. Short answers untouched."""
    # Indexing
    q_by_id = {q["id"]: q for q in questions}
    ans_by_id = {a["id"]: a["answer"] for a in answers}

    # Collect MCQs per category
    buckets = {"strong": [], "weak": [], "neutral": []}
    for q in questions:
        if q.get("question_type") != "MCQ":
            continue
        cat = _classify_subtopic(q.get("subtopic",""), policy)
        buckets[cat].append(q["id"])

    # For each category, compute current and target wrong counts
    for cat, qids in buckets.items():
        if not qids:
            continue
        target_acc = policy["target_acc"][cat]
        # add small variance so runs don't look identical
        target_acc += random.uniform(-policy["variance"], policy["variance"])
        target_acc = max(0.2, min(0.95, target_acc))

        total = len(qids)
        desired_wrong = round(total * (1 - target_acc))

        # Compute current wrongs
        current_wrong = 0
        correct_candidates = []  # qids currently correct → can flip to wrong if needed
        for qid in qids:
            q = q_by_id[qid]
            stu = (ans_by_id.get(qid) or "").strip().upper()
            correct = (q.get("correct_key") or "").strip().upper()
            if stu and correct and stu == correct:
                correct_candidates.append(qid)
            else:
                current_wrong += 1

        need_more_wrong = max(0, desired_wrong - current_wrong)

        # Flip some correct ones to wrong
        if need_more_wrong > 0 and correct_candidates:
            random.shuffle(correct_candidates)
            for qid in correct_candidates[:need_more_wrong]:
                correct = (q_by_id[qid].get("correct_key") or "").strip().upper()
                ans_by_id[qid] = _wrong_option_letter(correct)

        # Optional: sprinkle a few careless slips across all categories
        if random.random() < policy["careless_rate"]:
            for qid in random.sample(qids, k=max(0, min(1, len(qids)))):
                correct = (q_by_id[qid].get("correct_key") or "").strip().upper()
                if ans_by_id.get(qid, "").upper() == correct:
                    ans_by_id[qid] = _wrong_option_letter(correct)

    # Rebuild answers list
    out = []
    for a in answers:
        qid = a["id"]
        out.append({"id": qid, "answer": ans_by_id.get(qid, a["answer"])})
    return out


def simulate_student_answers(
    api_key: str,
    model: str,
    student_profile: str,
    questions: List[Dict[str, Any]],
) -> List[Dict[str, Any]]:
    # Pack questions with subtopics so the model can bias performance
    qpack = [
        {
            "id": q["id"],
            "subtopic": q["subtopic"],
            "question_type": q["question_type"],
            "question": q["question"],
            "options": q["options"],
        } for q in questions
    ]

    # Derive an explicit policy from the free-text profile
    policy = _derive_policy(student_profile)

    prompt = SIMULATE_STUDENT_PROMPT.format(
        student_profile=student_profile.strip(),
        policy_json=json.dumps(policy, ensure_ascii=False, indent=2),
        questions_json=json.dumps(qpack, ensure_ascii=False, indent=2),
    )
    msg = [
        {"role": "system", "content": "Return strictly valid JSON and keep answers realistic given the policy."},
        {"role": "user", "content": prompt},
    ]
    raw = _call_openai_chat(api_key, model, msg, temperature=0.8, max_tokens=3000)
    try:
        data = json.loads(raw)
        answers = data.get("answers", [])
    except Exception:
        raise gr.Error("Failed to parse student answers JSON.")

    # Normalize
    normalized = []
    for a in answers:
        qid = a.get("id")
        ans = (a.get("answer") or "").strip()
        if qid and ans:
            normalized.append({"id": qid, "answer": ans})

    # Keep only answers for our questions
    q_ids = {q["id"] for q in questions}
    filtered = [a for a in normalized if a["id"] in q_ids]

    # Enforce target variation to visibly differentiate students (MCQ-safe)
    filtered = _enforce_profile_variation(questions, filtered, policy)

    return filtered


def grade_student(
    api_key: str,
    model: str,
    questions: List[Dict[str, Any]],
    student_answers: List[Dict[str, Any]],
) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
    q_map = {q["id"]: q for q in questions}
    bundle = []
    for sa in student_answers:
        qid = sa["id"]
        if qid in q_map:
            q = q_map[qid]
            bundle.append({
                "id": qid,
                "subtopic": q["subtopic"],
                "question_type": q["question_type"],
                "question": q["question"],
                "options": q["options"],
                "correct_key": q.get("correct_key"),
                "model_answer": q.get("model_answer"),
                "student_answer": sa["answer"],
            })
    prompt = GRADING_PROMPT.format(
        questions_and_keys_json=json.dumps(bundle, ensure_ascii=False, indent=2),
        student_answers_json=json.dumps(student_answers, ensure_ascii=False, indent=2),
    )
    msg = [
        {"role": "system", "content": "Return strictly valid JSON following the schema."},
        {"role": "user", "content": prompt},
    ]
    raw = _call_openai_chat(api_key, model, msg, temperature=0.0, max_tokens=3500)
    try:
        data = json.loads(raw)
        results = data.get("results", [])
        by_subtopic = data.get("by_subtopic", [])
        for r in results:
            r.setdefault("score", 1 if r.get("is_correct") else 0)
        return results, by_subtopic
    except Exception:
        # Heuristic fallback (MCQ only)
        results = []
        tally = {}
        for b in bundle:
            is_correct = False
            if b["question_type"] == "MCQ":
                is_correct = (b["student_answer"].strip().upper() == (b.get("correct_key") or "").upper())
            score = 1 if is_correct else 0
            results.append({"id": b["id"], "subtopic": b["subtopic"], "is_correct": is_correct, "score": score, "rationale": "Heuristic fallback."})
            t = tally.setdefault(b["subtopic"], {"subtopic": b["subtopic"], "total": 0, "correct": 0, "accuracy": 0.0})
            t["total"] += 1
            t["correct"] += score
        for t in tally.values():
            t["accuracy"] = round(t["correct"] / max(1, t["total"]), 3)
        by_subtopic = list(tally.values())
        return results, by_subtopic


def prescribe_homework(
    api_key: str,
    model: str,
    perf1: List[Dict[str, Any]],
    perf2: List[Dict[str, Any]],
) -> Dict[str, Any]:
    prompt = PRESCRIPTION_PROMPT.format(
        perf_1_json=json.dumps(perf1, ensure_ascii=False, indent=2),
        perf_2_json=json.dumps(perf2, ensure_ascii=False, indent=2),
    )
    msg = [
        {"role": "system", "content": "Return strictly valid JSON exactly as requested."},
        {"role": "user", "content": prompt},
    ]
    raw = _call_openai_chat(api_key, model, msg, temperature=0.4, max_tokens=2200)
    try:
        data = json.loads(raw)
        return data
    except Exception:
        return {
            "student_1": {"recap": "N/A", "weak_subtopics": [], "homework": []},
            "student_2": {"recap": "N/A", "weak_subtopics": [], "homework": []},
        }

# Personalized study summary helper
def summarize_student(
    api_key: str,
    model: str,
    perf: List[Dict[str, Any]],
    rx_student: Dict[str, Any],
) -> str:
    prompt = STUDY_SUMMARY_PROMPT.format(
        perf_json=json.dumps(perf, ensure_ascii=False, indent=2),
        hw_json=json.dumps(rx_student or {}, ensure_ascii=False, indent=2),
    )
    msg = [
        {"role": "system", "content": "Write concise Markdown only (no JSON, no preamble). Max ~180 words."},
        {"role": "user", "content": prompt},
    ]
    text = _call_openai_chat(api_key, model, msg, temperature=0.3, max_tokens=500)
    return text.strip()


# --- Gradio UI ------------------------------------------------------------------

with gr.Blocks(css="footer {visibility: hidden}") as demo:
    gr.Markdown("# 🎓 Educational Tutor\nDesign subtopics → generate questions → simulate students → analyze → prescribe homework")

    # App-wide state
    st_api_key = gr.State("")
    st_model = gr.State("gpt-4o-mini")
    st_source_text = gr.State("")
    st_subtopics = gr.State([])                 # List[str]
    st_selected_subtopics = gr.State([])        # List[str]
    st_questions = gr.State([])                 # List[dict]
    st_student1_answers = gr.State([])          # List[dict]
    st_student2_answers = gr.State([])          # List[dict]
    st_grade1 = gr.State([])                    # List[dict] results
    st_grade2 = gr.State([])
    st_perf1 = gr.State([])                     # by_subtopic
    st_perf2 = gr.State([])
    st_rx = gr.State({})                        # prescriptions

    with gr.Tab("1) Setup"):
        with gr.Row():
            api_key_in = gr.Textbox(label="OpenAI API Key", type="password", placeholder="sk-...")
            model_in = gr.Dropdown(
                label="Model",
                choices=[
                    "gpt-4o-mini",
                    "gpt-4o",
                    "o4-mini",
                    "gpt-4.1-mini",
                    "gpt-4.1",
                    "gpt-3.5-turbo",
                    "gpt-4-turbo",
                ],
                value="gpt-4o-mini",
                allow_custom_value=True,
            )
        save_btn = gr.Button("Save Settings", variant="primary")
        status = gr.Markdown("")

        def _save_settings(api_key, model):
            if not api_key or not model:
                raise gr.Error("Please provide API key and a model.")
            return api_key, model, f"✅ Settings saved: **{model}**"

        save_btn.click(
            _save_settings,
            inputs=[api_key_in, model_in],
            outputs=[st_api_key, st_model, status],
        )

    with gr.Tab("2) Subtopics"):
        source_text = gr.Textbox(lines=12, label="Paste Educational Text", placeholder="Paste the text students will learn...")
        min_sub = gr.Slider(2, 20, value=5, step=1, label="Minimum number of subtopics")
        extract_btn = gr.Button("Extract Subtopics", variant="primary")
        subs_out = gr.CheckboxGroup(label="Select subtopics to include", choices=[])

        def _extract(api_key, model, text, min_n):
            subs = extract_subtopics(api_key, model, text, int(min_n))
            return (
                text,
                subs,
                gr.update(choices=subs, value=subs)
            )

        extract_btn.click(
            _extract,
            inputs=[st_api_key, st_model, source_text, min_sub],
            outputs=[st_source_text, st_subtopics, subs_out],
        )

        def _select_subs(selected, available):
            if not available:
                return []
            safe = [s for s in (selected or []) if s in available]
            return safe

        subs_out.change(
            _select_subs,
            inputs=[subs_out, st_subtopics],
            outputs=st_selected_subtopics
        )

    with gr.Tab("3) Generate Questions"):
        with gr.Row():
            qtype = gr.Radio(["Short Answer", "MCQ"], value="MCQ", label="Question Type")
            n_per_sub = gr.Slider(1, 10, value=3, step=1, label="Questions per selected subtopic")
            difficulty = gr.Dropdown(["easy", "medium", "hard"], value="medium", label="Difficulty")
        gen_btn = gr.Button("Generate Questions", variant="primary")
        q_table = gr.Dataframe(
            headers=["id","subtopic","question_type","question","options","correct_key","model_answer"],
            row_count=(1, "dynamic"),
            type="array",
            label="Generated Questions"
        )
        hint = gr.Markdown("You can edit cells. For MCQ 'options', keep valid JSON, e.g. {\"A\":\"...\",\"B\":\"...\",\"C\":\"...\",\"D\":\"...\"}")

        def _gen_q(api_key, model, selected, qtype_value, n, diff):
            qtype_norm = "MCQ" if qtype_value == "MCQ" else "Short Answer"
            qs = generate_questions(api_key, model, selected or [], qtype_norm, int(n), diff)
            rows = []
            for q in qs:
                rows.append([
                    q.get("id"),
                    q.get("subtopic"),
                    q.get("question_type"),
                    q.get("question"),
                    json.dumps(q.get("options"), ensure_ascii=False) if q.get("options") else None,
                    q.get("correct_key"),
                    q.get("model_answer"),
                ])
            return qs, rows

        gen_btn.click(
            _gen_q,
            inputs=[st_api_key, st_model, st_selected_subtopics, qtype, n_per_sub, difficulty],
            outputs=[st_questions, q_table],
        )

        def _apply_edits(df):
            qs = []
            if not isinstance(df, list):
                return qs
            for row in df:
                if not row:
                    continue
                row = list(row) + [None] * (7 - len(row))
                row = row[:7]
                qid, subtopic, qtype_v, question, options_raw, correct_key, model_answer = row
                if not (qid and question):
                    continue
                options = None
                if isinstance(options_raw, str) and options_raw.strip():
                    try:
                        parsed = json.loads(options_raw)
                        if isinstance(parsed, dict):
                            options = parsed
                    except Exception:
                        options = None
                elif isinstance(options_raw, dict):
                    options = options_raw
                qs.append({
                    "id": qid,
                    "subtopic": subtopic,
                    "question_type": qtype_v,
                    "question": question,
                    "options": options,
                    "correct_key": correct_key,
                    "model_answer": model_answer,
                })
            return qs

        q_table.change(_apply_edits, inputs=q_table, outputs=st_questions)

    with gr.Tab("4) Simulate Students"):
        gr.Markdown("Provide brief profiles. The model will answer as each persona.")
        s1 = gr.Textbox(label="Student 1 Profile", value="Diligent but anxious test-taker. Strong in theory, weaker in applications.")
        s2 = gr.Textbox(label="Student 2 Profile", value="Confident and fast, sometimes careless. Strong in applications, weaker in definitions.")
        sim_btn = gr.Button("Simulate Answers", variant="primary")
        s1_table = gr.Dataframe(headers=["question_id","answer"], row_count=(1, "dynamic"), type="array", label="Student 1 Answers (editable)")
        s2_table = gr.Dataframe(headers=["question_id","answer"], row_count=(1, "dynamic"), type="array", label="Student 2 Answers (editable)")

        def _simulate(api_key, model, prof1, prof2, qs):
            if not qs:
                raise gr.Error("No questions generated yet.")
            a1 = simulate_student_answers(api_key, model, prof1, qs)
            a2 = simulate_student_answers(api_key, model, prof2, qs)
            rows1 = [[x["id"], x["answer"]] for x in a1]
            rows2 = [[x["id"], x["answer"]] for x in a2]
            return a1, a2, rows1, rows2

        sim_btn.click(
            _simulate,
            inputs=[st_api_key, st_model, s1, s2, st_questions],
            outputs=[st_student1_answers, st_student2_answers, s1_table, s2_table],
        )

        def _apply_s_answers(df):
            out = []
            if not isinstance(df, list):
                return out
            for r in df:
                if not r or len(r) < 2:
                    continue
                qid = r[0]
                ans = r[1]
                if qid and ans is not None:
                    out.append({"id": qid, "answer": str(ans)})
            return out

        s1_table.change(_apply_s_answers, inputs=s1_table, outputs=st_student1_answers)
        s2_table.change(_apply_s_answers, inputs=s2_table, outputs=st_student2_answers)

    with gr.Tab("5) Analysis & Homework"):
        grade_btn = gr.Button("Grade & Analyze", variant="primary")
        with gr.Row():
            perf1_tbl = gr.Dataframe(headers=["subtopic","total","correct","accuracy"], row_count=(1, "dynamic"), type="array", label="Student 1 – Per-Subtopic Performance")
            perf2_tbl = gr.Dataframe(headers=["subtopic","total","correct","accuracy"], row_count=(1, "dynamic"), type="array", label="Student 2 – Per-Subtopic Performance")
        report_md = gr.Markdown()
        hw1 = gr.JSON(label="Student 1 – Homework Plan")
        hw2 = gr.JSON(label="Student 2 – Homework Plan")

        # Personalized study summaries
        gr.Markdown("### Student 1 – Personalized Study Summary")
        sum1_md = gr.Markdown()
        gr.Markdown("### Student 2 – Personalized Study Summary")
        sum2_md = gr.Markdown()

        def _grade_and_analyze(api_key, model, qs, a1, a2):
            if not qs or not a1 or not a2:
                raise gr.Error("Need questions and both students' answers first.")
            res1, by1 = grade_student(api_key, model, qs, a1)
            res2, by2 = grade_student(api_key, model, qs, a2)
            table1 = [[b["subtopic"], b["total"], b["correct"], b["accuracy"]] for b in by1]
            table2 = [[b["subtopic"], b["total"], b["correct"], b["accuracy"]] for b in by2]

            def _acc(by):
                if not by: return 0.0
                num = sum(b.get("correct", 0) for b in by)
                den = sum(b.get("total", 0) for b in by)
                return round(num / max(1, den), 3)

            rx_json = prescribe_homework(api_key, model, by1, by2)
            s = f"**Student 1 overall accuracy:** { _acc(by1) }  \n**Student 2 overall accuracy:** { _acc(by2) }  \n"
            s += "\n**Notes:** Lower-accuracy subtopics indicate targets for remediation. See Homework and Personalized Summaries below."

            s1_rx = rx_json.get("student_1", {})
            s2_rx = rx_json.get("student_2", {})

            # generate summaries using performance + homework
            s1_sum = summarize_student(api_key, model, by1, s1_rx)
            s2_sum = summarize_student(api_key, model, by2, s2_rx)

            return (
                res1, res2, by1, by2,
                table1, table2,
                s, s1_rx, s2_rx,
                s1_sum, s2_sum
            )

        grade_btn.click(
            _grade_and_analyze,
            inputs=[st_api_key, st_model, st_questions, st_student1_answers, st_student2_answers],
            outputs=[  # order must match return above
                st_grade1, st_grade2, st_perf1, st_perf2,
                perf1_tbl, perf2_tbl,
                report_md, hw1, hw2,
                sum1_md, sum2_md
            ],
        )

    gr.Markdown("— Built using Gradio + OpenAI —")

if __name__ == "__main__":
    # Set share=True to get a public link
    demo.launch(share=True)