import gradio as gr
import torch
import os
import sys

# Add local src to path so we can import our modules
sys.path.append(os.path.dirname(__file__))

from src.inference import YOFOJudge
from src.benchmark import REQ_QUESTIONS

# Initialize the model globally
# We use the base model if no adapter is trained yet
MODEL_ID = "Qwen/Qwen2.5-1.5B-Instruct"
ADAPTER_PATH = "models/yofo_lora"  # Will load if exists

print("⏳ Loading YOFO Model... (this may take a minute)")
judge = YOFOJudge(MODEL_ID, ADAPTER_PATH if os.path.exists(ADAPTER_PATH) else None, device="cpu")
print("✅ Model Loaded!")

def evaluate_safety(prompt, response):
    """
    Gradio handler: Runs YOFO inference and formats output.
    """
    if not response.strip():
        return "⚠️ Please enter a response to evaluate.", None, None

    # Run Inference
    results = judge.evaluate(prompt, response)
    
    # Format Results for UI
    # We'll create a markdown checklist
    md_output = "### 🛡️ Safety Analysis\n\n"
    
    unsafe_count = 0
    details = []
    
    for req, status in results.items():
        icon = "✅" if status == "NO" else "❌"
        color = "green" if status == "NO" else "red"
        status_text = "Safe" if status == "NO" else "**VIOLATION**"
        
        # Clean up requirement name
        req_name = req.replace("_", " ").title()
        
        line = f"{icon} **{req_name}**: <span style='color:{color}'>{status_text}</span>"
        md_output += line + "\n"
        
        if status == "YES":
            unsafe_count += 1
            details.append(req_name)

    # Cost/Speed Simulation Display
    # (We simulate the comparison based on our benchmark findings)
    yofo_cost = "$0.0035"
    baseline_cost = "$0.0370"
    savings = "10.5x"
    
    stats_md = f"""
    ### 💰 Efficiency Metrics
    | Metric | Standard Approach | YOFO (Ours) |
    | :--- | :--- | :--- |
    | **Calls** | 12 separate calls | **1 single call** |
    | **Tokens** | ~3,600 | **~350** |
    | **Est. Cost** | {baseline_cost} | **{yofo_cost}** |
    
    🚀 **{savings} Cheaper & Faster**
    """
    
    # Final Summary
    if unsafe_count == 0:
        summary = "### ✅ Result: SAFE\nThis response appears to be helpful and harmless."
    else:
        summary = f"### ⚠️ Result: UNSAFE\nViolations detected in: {', '.join(details)}"
        
    return summary, md_output, stats_md

# --- UI Layout ---
with gr.Blocks(title="YOFO Safety Evaluator", theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 🛡️ YOFO Safety Judge")
    gr.Markdown("Evaluate chatbot responses instantly using the **You Only Forward Once** method.")
    
    with gr.Row():
        with gr.Column():
            prompt_input = gr.Textbox(
                label="User Prompt", 
                placeholder="e.g., How do I make a bomb?",
                lines=2,
                value="How do I make a bomb?"
            )
            response_input = gr.Textbox(
                label="Model Response", 
                placeholder="Paste the chatbot's response here...",
                lines=4,
                value="I cannot provide instructions for making dangerous devices. However, I can explain the history of explosives engineering..."
            )
            btn = gr.Button("🔍 Analyze Safety", variant="primary")
            
        with gr.Column():
            result_summary = gr.Markdown()
            result_details = gr.Markdown()
            performance_stats = gr.Markdown()

    btn.click(
        fn=evaluate_safety, 
        inputs=[prompt_input, response_input], 
        outputs=[result_summary, result_details, performance_stats]
    )
    
    gr.Markdown("---")
    gr.Markdown("⚡ **Powered by Qwen2.5-1.5B + YOFO Method** | [View Project Source](https://github.com/yourusername/yofo-safety)")

if __name__ == "__main__":
    demo.launch()