# app.py

import gradio as gr
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFacePipeline
from langchain.docstore.document import Document
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM

# Sample Q&A data
qa_pairs = [
    {"question": "What is the role of a data scientist?", 
     "answer": "A data scientist analyzes complex data to help organizations make informed decisions."},
    {"question": "How does machine learning differ from traditional programming?", 
     "answer": "Machine learning uses data to train models that make predictions, while traditional programming relies on explicit rules."},
    {"question": "What is overfitting in ML?", 
     "answer": "Overfitting occurs when a model learns noise in the training data, performing well on training but poorly on unseen data."},
    {"question": "What is LangChain used for?", 
     "answer": "LangChain is a framework for building applications powered by language models, especially for retrieval and chaining tasks."},
    {"question": "What is Chroma in LangChain?", 
     "answer": "Chroma is a vector store used to store and retrieve documents based on their embeddings."}
]

# Convert Q&A to LangChain documents
docs = [Document(page_content=f"Q: {q['question']}\nA: {q['answer']}") for q in qa_pairs]

# Text splitting
splitter = CharacterTextSplitter(chunk_size=200, chunk_overlap=20)
split_docs = splitter.split_documents(docs)

# Embedding model
embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Vector store
db = Chroma.from_documents(split_docs, embedding)
retriever = db.as_retriever()

# Load Flan-T5
model_name = "google/flan-t5-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
hf_pipeline = pipeline("text2text-generation", model=model, tokenizer=tokenizer)
llm = HuggingFacePipeline(pipeline=hf_pipeline)

# QA Chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever
)

# Gradio UI
def answer_query(query):
    return qa_chain.run(query)

demo = gr.Interface(
    fn=answer_query,
    inputs=gr.Textbox(label="Ask a question", placeholder="e.g. What is overfitting?"),
    outputs=gr.Textbox(label="Answer"),
    title="Exact Answer Q&A Chatbot",
    description="Powered by Flan-T5 and Chroma. Runs fully offline with precise answers."
)

if __name__ == "__main__":
    demo.launch()