# app.py
import gradio as gr
from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification, AutoModelForSequenceClassification, AutoModelForMaskedLM, AutoModel, AutoModelForQuestionAnswering
import threading

# --- configuration: model ids you might want to swap ---
MODELS = {
    "fill-mask": "indolem/indobert-base-uncased",
    # a sample fine-tuned classifier on IndoNLU (replace with your preferred HF model)
    "sentiment": "ayameRushia/indobert-base-uncased-finetuned-indonlu-smsa",
    # example NER model fine-tuned from Indonesian BERT (replace if you prefer another)
    "ner": "ageng-anugrah/indobert-large-p2-finetuned-ner",
    # QA (if you have a QA model); you can also use a general model but best to use dedicated fine-tuned QA model
    "qa": "indobenchmark/indobert-base-p1",
    # embeddings / feature extraction: use a model that supports sentence embeddings or feature extraction
    "embeddings": "indobenchmark/indobert-base-p1",
}

# pipeline cache
PIPELINES = {}
PIPELINE_LOCK = threading.Lock()

def get_pipeline(task: str):
    """Lazy-load pipeline for a given task. Thread-safe."""
    with PIPELINE_LOCK:
        if task in PIPELINES:
            return PIPELINES[task]

        if task == "fill-mask":
            p = pipeline("fill-mask", model=MODELS["fill-mask"], tokenizer=MODELS["fill-mask"])
        elif task == "sentiment":
            p = pipeline("text-classification", model=MODELS["sentiment"], tokenizer=MODELS["sentiment"], return_all_scores=True)
        elif task == "ner":
            # aggregation_strategy avoids repeated tokens; set to "simple" or None to see raw results
            p = pipeline("token-classification", model=MODELS["ner"], tokenizer=MODELS["ner"], aggregation_strategy="simple")
        elif task == "qa":
            # For QA we return an extractive QA pipeline
            p = pipeline("question-answering", model=MODELS["qa"], tokenizer=MODELS["qa"])
        elif task == "embeddings":
            # Use feature-extraction pipeline (returns token embeddings; we'll average to produce sentence-level)
            p = pipeline("feature-extraction", model=MODELS["embeddings"], tokenizer=MODELS["embeddings"])
        else:
            raise ValueError(f"Unknown task: {task}")

        PIPELINES[task] = p
        return p

# --- functions for each task ---

def run_fill_mask(text):
    p = get_pipeline("fill-mask")
    # The fill-mask pipeline expects a special mask token like <mask> or [MASK] depending on model/tokenizer.
    # We'll try both: if the chosen model uses [MASK], user should include it; otherwise replace token.
    try:
        outputs = p(text)
    except Exception as e:
        return f"Error running fill-mask: {e}"
    # Format results
    return "\n".join([f"{o['sequence']} (score: {o['score']:.4f})" for o in outputs])

def run_sentiment(text):
    p = get_pipeline("sentiment")
    try:
        outputs = p(text)
    except Exception as e:
        return f"Error running sentiment: {e}"
    # outputs is list of dicts with label/score
    return "\n".join([f"{o['label']}: {o['score']:.4f}" for o in outputs])

def run_ner(text):
    p = get_pipeline("ner")
    try:
        ents = p(text)
    except Exception as e:
        return f"Error running NER: {e}"
    if not ents:
        return "No entities found."
    # Format: label (span): text
    lines = []
    for e in ents:
        label = e.get("entity_group", e.get("entity"))
        span = e.get("word", "")
        score = e.get("score", 0.0)
        lines.append(f"{label} ({score:.3f}): {span}")
    return "\n".join(lines)

def run_qa(context, question):
    p = get_pipeline("qa")
    try:
        out = p(question=question, context=context)
    except Exception as e:
        return f"Error running QA: {e}"
    return f"Answer: {out.get('answer')} (score: {out.get('score', 0):.4f})"

def run_embeddings(text):
    p = get_pipeline("embeddings")
    try:
        feats = p(text)  # returns nested token embeddings
    except Exception as e:
        return f"Error extracting embeddings: {e}"
    # average token embeddings to get sentence vector
    import numpy as np
    arr = np.array(feats)  # shape: (1, seq_len, hidden)
    sent = arr.mean(axis=1)  # (1, hidden)
    vec = sent[0].tolist()
    # For display keep a short preview
    preview = ", ".join([f"{v:.4f}" for v in vec[:8]]) + ("..." if len(vec) > 8 else "")
    return f"Embedding (dim {len(vec)}): [{preview}]"

# --- Gradio UI ---

with gr.Blocks(title="Indonesian NLP Playground (IndoBERT / IndoLEM / IndoNLU)") as demo:
    gr.Markdown("## Indonesian NLP Playground\nChoose a task, enter Indonesian text, and run IndoBERT / IndoLEM-powered models.\n\nModels are loaded lazily to save memory. You can replace model ids in the `MODELS` dict.")
    with gr.Row():
        task = gr.Dropdown(choices=["fill-mask", "sentiment", "ner", "qa", "embeddings"], value="sentiment", label="Task")
    input_text = gr.Textbox(lines=4, placeholder="Type Indonesian text here...", label="Input Text")
    # extra inputs for QA
    qa_question = gr.Textbox(lines=2, placeholder="Question (for QA)", visible=False, label="Question (QA only)")
    output = gr.Textbox(lines=10, label="Output")

    def on_task_change(t):
        qa_question.visible = (t == "qa")
        return gr.update(visible=(t == "qa"))

    task.change(on_task_change, inputs=[task], outputs=[qa_question])

    def run(selected_task, text, question):
        if selected_task == "fill-mask":
            return run_fill_mask(text)
        if selected_task == "sentiment":
            return run_sentiment(text)
        if selected_task == "ner":
            return run_ner(text)
        if selected_task == "qa":
            return run_qa(text, question)
        if selected_task == "embeddings":
            return run_embeddings(text)
        return "Unknown task."

    btn = gr.Button("Run")
    btn.click(run, inputs=[task, input_text, qa_question], outputs=[output])

if __name__ == "__main__":
    demo.launch()