# gradio_edu_app_fixed.py """ Educational Text Tutor – Gradio App (Patched) Fixes: - Properly updates CheckboxGroup choices using gr.update(...) - Dataframes use type="array" to ensure list-of-lists I/O - Robust _apply_edits() to handle empty/short rows and parse errors - Safer student answer table parsing Enhancements: - Personalized Study Summary per student on Analysis & Homework tab - Profile-aware student simulation with targeted accuracy by subtopic category Run: pip install gradio openai python gradio_edu_app_fixed.py """ import json import uuid import re import random from typing import List, Dict, Any, Tuple import gradio as gr # --- Utility: OpenAI call helper ------------------------------------------------ def _call_openai_chat( api_key: str, model: str, messages: List[Dict[str, str]], temperature: float = 0.2, max_tokens: int = 2000, ) -> str: try: from openai import OpenAI client = OpenAI(api_key=api_key) try: resp = client.chat.completions.create( model=model, messages=messages, temperature=temperature, max_tokens=max_tokens, ) return resp.choices[0].message.content except Exception: # Fallback to Responses API joined = "\n".join([f"{m['role'].upper()}: {m['content']}" for m in messages]) resp = client.responses.create( model=model, input=joined, temperature=temperature, max_output_tokens=max_tokens, ) if hasattr(resp, "output_text"): return resp.output_text try: return resp.choices[0].message.content # type: ignore[attr-defined] except Exception: return str(resp) except ImportError: import openai # type: ignore openai.api_key = api_key resp = openai.ChatCompletion.create( # type: ignore model=model, messages=messages, temperature=temperature, max_tokens=max_tokens, ) return resp["choices"][0]["message"]["content"] # --- Prompt templates (ALL literal braces escaped) ------------------------------ SUBTOPIC_PROMPT = """You are a curriculum designer. Extract at least {min_subtopics} clear, non-overlapping subtopics from the EDUCATIONAL TEXT below. Each subtopic should be concise (3–8 words) and collectively cover the main ideas. Return ONLY valid JSON of the form: {{ "subtopics": ["...", "...", "..."] }} EDUCATIONAL TEXT: --- {source_text} --- """ QUESTION_PROMPT = """You are an assessment designer. Create {n_per_subtopic} {qtype_desc} questions for EACH subtopic provided. Vary difficulty around {difficulty} difficulty. Keep questions unambiguous and self-contained. If question_type == "MCQ": provide *exactly four* options ("A","B","C","D") and the correct_key as one of "A"/"B"/"C"/"D". If question_type == "Short Answer": provide a model_answer that is 1–3 sentences. Return ONLY valid JSON in the following schema: {{ "items": [ {{ "subtopic": "String", "question_type": "{qtype}", "question": "String", "options": {{"A": "String", "B": "String", "C": "String", "D": "String"}} OR null, "correct_key": "A|B|C|D" OR null, "model_answer": "String" OR null }}, ... ] }} SUBTOPICS (the generator must cover these and label each item with the matching subtopic): {selected_subtopics} """ # policy-aware simulation prompt (subtopic-aware) SIMULATE_STUDENT_PROMPT = """You will roleplay as a student with this profile: --- {student_profile} --- **Policy (you MUST follow):** {policy_json} Guidelines: - Use the **subtopic** of each question to decide where to excel vs. struggle. - Hit the target accuracy ranges by category (strong/weak/neutral). If needed, deliberately pick a plausible but wrong choice. Never admit you’re doing this. - MCQ: answer ONLY the option key (A/B/C/D). Short Answer: 1–3 sentences; on weak areas, it’s ok to be vague, omit a key detail, or make a misconception. Return ONLY valid JSON: {{ "answers": [ {{"id": "QUESTION_ID", "answer": "String"}}, ... ] }} QUESTIONS (with IDs & subtopics): {questions_json} """ GRADING_PROMPT = """You are a strict teacher using a clear rubric. Grade each student answer against the provided key/model answer. For MCQ: mark correct if the chosen key matches the correct_key. For Short Answer: mark correct if the essential facts match (allow paraphrase), else incorrect. Give a one-sentence rationale. Return ONLY valid JSON with this schema: {{ "results": [ {{ "id": "QUESTION_ID", "subtopic": "String", "is_correct": true/false, "score": 1 or 0, "rationale": "String" }}, ... ], "by_subtopic": [ {{ "subtopic": "String", "total": N, "correct": M, "accuracy": 0.0_to_1.0 }}, ... ] }} QUESTIONS (with answers): {questions_and_keys_json} STUDENT ANSWERS: {student_answers_json} """ PRESCRIPTION_PROMPT = """You are an expert tutor. Based on the per-subtopic performance for two students, write: 1) A concise progress recap for each student (3–5 sentences). 2) A prioritized list of weak subtopics for each student (up to 5). 3) For each weak subtopic and each student, suggest a mini-homework plan: 3 concrete practice tasks (in increasing difficulty). Return ONLY valid JSON: {{ "student_1": {{ "recap": "String", "weak_subtopics": ["..."], "homework": [{{"subtopic":"String","tasks":["...","...","..."]}}] }}, "student_2": {{ "recap": "String", "weak_subtopics": ["..."], "homework": [{{"subtopic":"String","tasks":["...","...","..."]}}] }} }} PERFORMANCE SUMMARY (Student 1): {perf_1_json} PERFORMANCE SUMMARY (Student 2): {perf_2_json} """ # Personalized study summary prompt STUDY_SUMMARY_PROMPT = """You are a learning coach. Using the performance summary and the proposed homework for ONE student, write a short **personalized home-study summary** they can follow on their own. Include, in order: - **Strengths:** 2–3 quick bullets. - **Weak spots:** 2–3 bullets naming subtopics (lowest accuracy first). - **3 study goals** (clear, measurable). - **7-day micro-plan:** Day 1 → Day 7 bullets (one action each). - **Motivation tip** (1 sentence). Constraints: - Keep it concise: 120–180 words total. - Use simple language and Markdown bullets. - Do not mention accuracy numbers; just reflect them implicitly. PERFORMANCE: {perf_json} HOMEWORK (may be empty): {hw_json} """ # --- Core logic ----------------------------------------------------------------- def extract_subtopics(api_key: str, model: str, text: str, min_subtopics: int) -> List[str]: if not api_key or not model: raise gr.Error("Please enter your API key and select a model on the Setup tab.") if not text.strip(): raise gr.Error("Please paste the educational text.") msg = [ {"role": "system", "content": "You produce strictly valid JSON."}, {"role": "user", "content": SUBTOPIC_PROMPT.format(min_subtopics=min_subtopics, source_text=text.strip())}, ] raw = _call_openai_chat(api_key, model, msg, temperature=0.1) try: data = json.loads(raw) subs = data.get("subtopics", []) subs = [s.strip() for s in subs if isinstance(s, str) and s.strip()] if len(subs) < min_subtopics: extra_needed = min_subtopics - len(subs) subs += [f"Additional Subtopic {i+1}" for i in range(extra_needed)] seen, uniq = set(), [] for s in subs: key = s.lower() if key not in seen: uniq.append(s) seen.add(key) return uniq except Exception: lines = [ln.strip("-• \t") for ln in raw.splitlines() if ln.strip()] return lines[:max(min_subtopics, len(lines))] def generate_questions( api_key: str, model: str, selected_subtopics: List[str], qtype: str, n_per_subtopic: int, difficulty: str ) -> List[Dict[str, Any]]: if not selected_subtopics: raise gr.Error("Please select at least one subtopic in the Subtopics tab.") qtype_desc = "multiple-choice (MCQ with 4 options)" if qtype == "MCQ" else "short-answer" prompt = QUESTION_PROMPT.format( n_per_subtopic=n_per_subtopic, qtype_desc=qtype_desc, difficulty=difficulty, qtype=qtype, selected_subtopics=json.dumps(selected_subtopics, ensure_ascii=False, indent=2), ) msg = [ {"role": "system", "content": "You produce strictly valid JSON and follow the schema exactly."}, {"role": "user", "content": prompt}, ] raw = _call_openai_chat(api_key, model, msg, temperature=0.7, max_tokens=2800) try: data = json.loads(raw) items = data.get("items", []) except Exception: raise gr.Error("The model did not return valid JSON for questions. Try again or reduce counts.") questions: List[Dict[str, Any]] = [] for it in items: qid = str(uuid.uuid4()) subtopic = (it.get("subtopic") or "").strip() question_type = it.get("question_type") or qtype question = (it.get("question") or "").strip() options = it.get("options") or None correct_key = it.get("correct_key") or None model_answer = it.get("model_answer") or None if question_type == "MCQ": if not (isinstance(options, dict) and correct_key in {"A", "B", "C", "D"}): continue else: if not model_answer: continue questions.append({ "id": qid, "subtopic": subtopic, "question_type": question_type, "question": question, "options": options, "correct_key": correct_key, "model_answer": model_answer, }) return questions # --- Policy helpers to force visible divergence between students ---------------- def _derive_policy(student_profile: str) -> Dict[str, Any]: """Infer strong/weak areas and target accuracies from a free-form profile.""" p = student_profile.lower() strong_terms, weak_terms = set(), set() # Heuristics from profile if re.search(r"strong in (definitions?|theor(?:y|ies)|concepts?)", p): strong_terms |= {"definition", "definitions", "theory", "theories", "concept", "concepts", "term", "terms"} if re.search(r"weak(?:er)? in (definitions?|theor(?:y|ies)|concepts?)", p): weak_terms |= {"definition", "definitions", "theory", "theories", "concept", "concepts", "term", "terms"} if re.search(r"strong in (applications?|problem ?solving|calculations?)", p): strong_terms |= {"application", "applications", "problem", "problems", "problem solving", "case", "cases", "calculation", "calculations", "practice"} if re.search(r"weak(?:er)? in (applications?|problem ?solving|calculations?)", p): weak_terms |= {"application", "applications", "problem", "problems", "problem solving", "case", "cases", "calculation", "calculations", "practice"} # Generic defaults if not mentioned if not strong_terms and "theor" in p: strong_terms |= {"definition","concept","theory","term"} if not weak_terms and "careless" in p: weak_terms |= {"definition","term"} # careless → slips on definitional precision # Accuracy targets overall = 0.65 # baseline realism if "anxious" in p: overall -= 0.05 if "confident" in p: overall += 0.05 weak_acc = 0.45 strong_acc = 0.85 neutral_acc = overall careless_rate = 0.15 if "careless" in p else 0.05 variance = 0.05 # small randomness return { "strong_terms": sorted(strong_terms), "weak_terms": sorted(weak_terms), "target_acc": { "strong": strong_acc, "weak": weak_acc, "neutral": neutral_acc }, "overall_target": overall, "careless_rate": careless_rate, "variance": variance } def _classify_subtopic(name: str, policy: Dict[str, Any]) -> str: s = (name or "").lower() strong_hits = any(t in s for t in policy["strong_terms"]) weak_hits = any(t in s for t in policy["weak_terms"]) if weak_hits and not strong_hits: return "weak" if strong_hits and not weak_hits: return "strong" return "neutral" def _wrong_option_letter(correct_key: str) -> str: pool = ["A","B","C","D"] pool = [x for x in pool if x != (correct_key or "").upper()] return random.choice(pool) if pool else "A" def _enforce_profile_variation( questions: List[Dict[str, Any]], answers: List[Dict[str, Any]], policy: Dict[str, Any] ) -> List[Dict[str, Any]]: """Post-process MCQ answers to meet target wrong-rate per category. Short answers untouched.""" # Indexing q_by_id = {q["id"]: q for q in questions} ans_by_id = {a["id"]: a["answer"] for a in answers} # Collect MCQs per category buckets = {"strong": [], "weak": [], "neutral": []} for q in questions: if q.get("question_type") != "MCQ": continue cat = _classify_subtopic(q.get("subtopic",""), policy) buckets[cat].append(q["id"]) # For each category, compute current and target wrong counts for cat, qids in buckets.items(): if not qids: continue target_acc = policy["target_acc"][cat] # add small variance so runs don't look identical target_acc += random.uniform(-policy["variance"], policy["variance"]) target_acc = max(0.2, min(0.95, target_acc)) total = len(qids) desired_wrong = round(total * (1 - target_acc)) # Compute current wrongs current_wrong = 0 correct_candidates = [] # qids currently correct → can flip to wrong if needed for qid in qids: q = q_by_id[qid] stu = (ans_by_id.get(qid) or "").strip().upper() correct = (q.get("correct_key") or "").strip().upper() if stu and correct and stu == correct: correct_candidates.append(qid) else: current_wrong += 1 need_more_wrong = max(0, desired_wrong - current_wrong) # Flip some correct ones to wrong if need_more_wrong > 0 and correct_candidates: random.shuffle(correct_candidates) for qid in correct_candidates[:need_more_wrong]: correct = (q_by_id[qid].get("correct_key") or "").strip().upper() ans_by_id[qid] = _wrong_option_letter(correct) # Optional: sprinkle a few careless slips across all categories if random.random() < policy["careless_rate"]: for qid in random.sample(qids, k=max(0, min(1, len(qids)))): correct = (q_by_id[qid].get("correct_key") or "").strip().upper() if ans_by_id.get(qid, "").upper() == correct: ans_by_id[qid] = _wrong_option_letter(correct) # Rebuild answers list out = [] for a in answers: qid = a["id"] out.append({"id": qid, "answer": ans_by_id.get(qid, a["answer"])}) return out def simulate_student_answers( api_key: str, model: str, student_profile: str, questions: List[Dict[str, Any]], ) -> List[Dict[str, Any]]: # Pack questions with subtopics so the model can bias performance qpack = [ { "id": q["id"], "subtopic": q["subtopic"], "question_type": q["question_type"], "question": q["question"], "options": q["options"], } for q in questions ] # Derive an explicit policy from the free-text profile policy = _derive_policy(student_profile) prompt = SIMULATE_STUDENT_PROMPT.format( student_profile=student_profile.strip(), policy_json=json.dumps(policy, ensure_ascii=False, indent=2), questions_json=json.dumps(qpack, ensure_ascii=False, indent=2), ) msg = [ {"role": "system", "content": "Return strictly valid JSON and keep answers realistic given the policy."}, {"role": "user", "content": prompt}, ] raw = _call_openai_chat(api_key, model, msg, temperature=0.8, max_tokens=3000) try: data = json.loads(raw) answers = data.get("answers", []) except Exception: raise gr.Error("Failed to parse student answers JSON.") # Normalize normalized = [] for a in answers: qid = a.get("id") ans = (a.get("answer") or "").strip() if qid and ans: normalized.append({"id": qid, "answer": ans}) # Keep only answers for our questions q_ids = {q["id"] for q in questions} filtered = [a for a in normalized if a["id"] in q_ids] # Enforce target variation to visibly differentiate students (MCQ-safe) filtered = _enforce_profile_variation(questions, filtered, policy) return filtered def grade_student( api_key: str, model: str, questions: List[Dict[str, Any]], student_answers: List[Dict[str, Any]], ) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]: q_map = {q["id"]: q for q in questions} bundle = [] for sa in student_answers: qid = sa["id"] if qid in q_map: q = q_map[qid] bundle.append({ "id": qid, "subtopic": q["subtopic"], "question_type": q["question_type"], "question": q["question"], "options": q["options"], "correct_key": q.get("correct_key"), "model_answer": q.get("model_answer"), "student_answer": sa["answer"], }) prompt = GRADING_PROMPT.format( questions_and_keys_json=json.dumps(bundle, ensure_ascii=False, indent=2), student_answers_json=json.dumps(student_answers, ensure_ascii=False, indent=2), ) msg = [ {"role": "system", "content": "Return strictly valid JSON following the schema."}, {"role": "user", "content": prompt}, ] raw = _call_openai_chat(api_key, model, msg, temperature=0.0, max_tokens=3500) try: data = json.loads(raw) results = data.get("results", []) by_subtopic = data.get("by_subtopic", []) for r in results: r.setdefault("score", 1 if r.get("is_correct") else 0) return results, by_subtopic except Exception: # Heuristic fallback (MCQ only) results = [] tally = {} for b in bundle: is_correct = False if b["question_type"] == "MCQ": is_correct = (b["student_answer"].strip().upper() == (b.get("correct_key") or "").upper()) score = 1 if is_correct else 0 results.append({"id": b["id"], "subtopic": b["subtopic"], "is_correct": is_correct, "score": score, "rationale": "Heuristic fallback."}) t = tally.setdefault(b["subtopic"], {"subtopic": b["subtopic"], "total": 0, "correct": 0, "accuracy": 0.0}) t["total"] += 1 t["correct"] += score for t in tally.values(): t["accuracy"] = round(t["correct"] / max(1, t["total"]), 3) by_subtopic = list(tally.values()) return results, by_subtopic def prescribe_homework( api_key: str, model: str, perf1: List[Dict[str, Any]], perf2: List[Dict[str, Any]], ) -> Dict[str, Any]: prompt = PRESCRIPTION_PROMPT.format( perf_1_json=json.dumps(perf1, ensure_ascii=False, indent=2), perf_2_json=json.dumps(perf2, ensure_ascii=False, indent=2), ) msg = [ {"role": "system", "content": "Return strictly valid JSON exactly as requested."}, {"role": "user", "content": prompt}, ] raw = _call_openai_chat(api_key, model, msg, temperature=0.4, max_tokens=2200) try: data = json.loads(raw) return data except Exception: return { "student_1": {"recap": "N/A", "weak_subtopics": [], "homework": []}, "student_2": {"recap": "N/A", "weak_subtopics": [], "homework": []}, } # Personalized study summary helper def summarize_student( api_key: str, model: str, perf: List[Dict[str, Any]], rx_student: Dict[str, Any], ) -> str: prompt = STUDY_SUMMARY_PROMPT.format( perf_json=json.dumps(perf, ensure_ascii=False, indent=2), hw_json=json.dumps(rx_student or {}, ensure_ascii=False, indent=2), ) msg = [ {"role": "system", "content": "Write concise Markdown only (no JSON, no preamble). Max ~180 words."}, {"role": "user", "content": prompt}, ] text = _call_openai_chat(api_key, model, msg, temperature=0.3, max_tokens=500) return text.strip() # --- Gradio UI ------------------------------------------------------------------ with gr.Blocks(css="footer {visibility: hidden}") as demo: gr.Markdown("# 🎓 Educational Tutor\nDesign subtopics → generate questions → simulate students → analyze → prescribe homework") # App-wide state st_api_key = gr.State("") st_model = gr.State("gpt-4o-mini") st_source_text = gr.State("") st_subtopics = gr.State([]) # List[str] st_selected_subtopics = gr.State([]) # List[str] st_questions = gr.State([]) # List[dict] st_student1_answers = gr.State([]) # List[dict] st_student2_answers = gr.State([]) # List[dict] st_grade1 = gr.State([]) # List[dict] results st_grade2 = gr.State([]) st_perf1 = gr.State([]) # by_subtopic st_perf2 = gr.State([]) st_rx = gr.State({}) # prescriptions with gr.Tab("1) Setup"): with gr.Row(): api_key_in = gr.Textbox(label="OpenAI API Key", type="password", placeholder="sk-...") model_in = gr.Dropdown( label="Model", choices=[ "gpt-4o-mini", "gpt-4o", "o4-mini", "gpt-4.1-mini", "gpt-4.1", "gpt-3.5-turbo", "gpt-4-turbo", ], value="gpt-4o-mini", allow_custom_value=True, ) save_btn = gr.Button("Save Settings", variant="primary") status = gr.Markdown("") def _save_settings(api_key, model): if not api_key or not model: raise gr.Error("Please provide API key and a model.") return api_key, model, f"✅ Settings saved: **{model}**" save_btn.click( _save_settings, inputs=[api_key_in, model_in], outputs=[st_api_key, st_model, status], ) with gr.Tab("2) Subtopics"): source_text = gr.Textbox(lines=12, label="Paste Educational Text", placeholder="Paste the text students will learn...") min_sub = gr.Slider(2, 20, value=5, step=1, label="Minimum number of subtopics") extract_btn = gr.Button("Extract Subtopics", variant="primary") subs_out = gr.CheckboxGroup(label="Select subtopics to include", choices=[]) def _extract(api_key, model, text, min_n): subs = extract_subtopics(api_key, model, text, int(min_n)) return ( text, subs, gr.update(choices=subs, value=subs) ) extract_btn.click( _extract, inputs=[st_api_key, st_model, source_text, min_sub], outputs=[st_source_text, st_subtopics, subs_out], ) def _select_subs(selected, available): if not available: return [] safe = [s for s in (selected or []) if s in available] return safe subs_out.change( _select_subs, inputs=[subs_out, st_subtopics], outputs=st_selected_subtopics ) with gr.Tab("3) Generate Questions"): with gr.Row(): qtype = gr.Radio(["Short Answer", "MCQ"], value="MCQ", label="Question Type") n_per_sub = gr.Slider(1, 10, value=3, step=1, label="Questions per selected subtopic") difficulty = gr.Dropdown(["easy", "medium", "hard"], value="medium", label="Difficulty") gen_btn = gr.Button("Generate Questions", variant="primary") q_table = gr.Dataframe( headers=["id","subtopic","question_type","question","options","correct_key","model_answer"], row_count=(1, "dynamic"), type="array", label="Generated Questions" ) hint = gr.Markdown("You can edit cells. For MCQ 'options', keep valid JSON, e.g. {\"A\":\"...\",\"B\":\"...\",\"C\":\"...\",\"D\":\"...\"}") def _gen_q(api_key, model, selected, qtype_value, n, diff): qtype_norm = "MCQ" if qtype_value == "MCQ" else "Short Answer" qs = generate_questions(api_key, model, selected or [], qtype_norm, int(n), diff) rows = [] for q in qs: rows.append([ q.get("id"), q.get("subtopic"), q.get("question_type"), q.get("question"), json.dumps(q.get("options"), ensure_ascii=False) if q.get("options") else None, q.get("correct_key"), q.get("model_answer"), ]) return qs, rows gen_btn.click( _gen_q, inputs=[st_api_key, st_model, st_selected_subtopics, qtype, n_per_sub, difficulty], outputs=[st_questions, q_table], ) def _apply_edits(df): qs = [] if not isinstance(df, list): return qs for row in df: if not row: continue row = list(row) + [None] * (7 - len(row)) row = row[:7] qid, subtopic, qtype_v, question, options_raw, correct_key, model_answer = row if not (qid and question): continue options = None if isinstance(options_raw, str) and options_raw.strip(): try: parsed = json.loads(options_raw) if isinstance(parsed, dict): options = parsed except Exception: options = None elif isinstance(options_raw, dict): options = options_raw qs.append({ "id": qid, "subtopic": subtopic, "question_type": qtype_v, "question": question, "options": options, "correct_key": correct_key, "model_answer": model_answer, }) return qs q_table.change(_apply_edits, inputs=q_table, outputs=st_questions) with gr.Tab("4) Simulate Students"): gr.Markdown("Provide brief profiles. The model will answer as each persona.") s1 = gr.Textbox(label="Student 1 Profile", value="Diligent but anxious test-taker. Strong in theory, weaker in applications.") s2 = gr.Textbox(label="Student 2 Profile", value="Confident and fast, sometimes careless. Strong in applications, weaker in definitions.") sim_btn = gr.Button("Simulate Answers", variant="primary") s1_table = gr.Dataframe(headers=["question_id","answer"], row_count=(1, "dynamic"), type="array", label="Student 1 Answers (editable)") s2_table = gr.Dataframe(headers=["question_id","answer"], row_count=(1, "dynamic"), type="array", label="Student 2 Answers (editable)") def _simulate(api_key, model, prof1, prof2, qs): if not qs: raise gr.Error("No questions generated yet.") a1 = simulate_student_answers(api_key, model, prof1, qs) a2 = simulate_student_answers(api_key, model, prof2, qs) rows1 = [[x["id"], x["answer"]] for x in a1] rows2 = [[x["id"], x["answer"]] for x in a2] return a1, a2, rows1, rows2 sim_btn.click( _simulate, inputs=[st_api_key, st_model, s1, s2, st_questions], outputs=[st_student1_answers, st_student2_answers, s1_table, s2_table], ) def _apply_s_answers(df): out = [] if not isinstance(df, list): return out for r in df: if not r or len(r) < 2: continue qid = r[0] ans = r[1] if qid and ans is not None: out.append({"id": qid, "answer": str(ans)}) return out s1_table.change(_apply_s_answers, inputs=s1_table, outputs=st_student1_answers) s2_table.change(_apply_s_answers, inputs=s2_table, outputs=st_student2_answers) with gr.Tab("5) Analysis & Homework"): grade_btn = gr.Button("Grade & Analyze", variant="primary") with gr.Row(): perf1_tbl = gr.Dataframe(headers=["subtopic","total","correct","accuracy"], row_count=(1, "dynamic"), type="array", label="Student 1 – Per-Subtopic Performance") perf2_tbl = gr.Dataframe(headers=["subtopic","total","correct","accuracy"], row_count=(1, "dynamic"), type="array", label="Student 2 – Per-Subtopic Performance") report_md = gr.Markdown() hw1 = gr.JSON(label="Student 1 – Homework Plan") hw2 = gr.JSON(label="Student 2 – Homework Plan") # Personalized study summaries gr.Markdown("### Student 1 – Personalized Study Summary") sum1_md = gr.Markdown() gr.Markdown("### Student 2 – Personalized Study Summary") sum2_md = gr.Markdown() def _grade_and_analyze(api_key, model, qs, a1, a2): if not qs or not a1 or not a2: raise gr.Error("Need questions and both students' answers first.") res1, by1 = grade_student(api_key, model, qs, a1) res2, by2 = grade_student(api_key, model, qs, a2) table1 = [[b["subtopic"], b["total"], b["correct"], b["accuracy"]] for b in by1] table2 = [[b["subtopic"], b["total"], b["correct"], b["accuracy"]] for b in by2] def _acc(by): if not by: return 0.0 num = sum(b.get("correct", 0) for b in by) den = sum(b.get("total", 0) for b in by) return round(num / max(1, den), 3) rx_json = prescribe_homework(api_key, model, by1, by2) s = f"**Student 1 overall accuracy:** { _acc(by1) } \n**Student 2 overall accuracy:** { _acc(by2) } \n" s += "\n**Notes:** Lower-accuracy subtopics indicate targets for remediation. See Homework and Personalized Summaries below." s1_rx = rx_json.get("student_1", {}) s2_rx = rx_json.get("student_2", {}) # generate summaries using performance + homework s1_sum = summarize_student(api_key, model, by1, s1_rx) s2_sum = summarize_student(api_key, model, by2, s2_rx) return ( res1, res2, by1, by2, table1, table2, s, s1_rx, s2_rx, s1_sum, s2_sum ) grade_btn.click( _grade_and_analyze, inputs=[st_api_key, st_model, st_questions, st_student1_answers, st_student2_answers], outputs=[ # order must match return above st_grade1, st_grade2, st_perf1, st_perf2, perf1_tbl, perf2_tbl, report_md, hw1, hw2, sum1_md, sum2_md ], ) gr.Markdown("— Built using Gradio + OpenAI —") if __name__ == "__main__": # Set share=True to get a public link demo.launch(share=True)