File size: 3,611 Bytes
b9f2622 09c53e7 b9f2622 08b36b6 67fb27b 7800d9e 59f3ac5 7800d9e b9f2622 8210218 b9f2622 7800d9e b9f2622 18bc323 b9f2622 18bc323 b9f2622 7800d9e b9f2622 bc23c71 b9f2622 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
import faiss
import pickle
import os
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain.llms import HuggingFacePipeline
from langchain.vectorstores import FAISS as LangChainFAISS
from langchain.docstore import InMemoryDocstore
from langchain.schema import Document
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
import gradio as gr
# Paths (relative to app root)
vector_path = "vector_store_faiss_chroma/faiss_index.index"
metadata_path = "vector_store_faiss_chroma/metadata.pkl"
#model_path = "HuggingFaceModels/falcon-1b-instruct"
#model_path = "tiiuae/Falcon3-1B-Instruct"
#model_path = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
#model_path = "mistralai/Mistral-7B-Instruct-v0.1"
#model_path = "microsoft/phi-2"
model_path = "tiiuae/falcon-rw-1b"
# Load the FAISS index
faiss_index = faiss.read_index(f"{vector_path}")
# Load metadata (text chunks)
with open(f"{metadata_path}", "rb") as f:
metadata = pickle.load(f)
# Rebuild LangChain Documents
docs = [Document(page_content=doc["page_content"]) for doc in metadata]
# Link documents to FAISS vectors
docstore = InMemoryDocstore({str(i): docs[i] for i in range(len(docs))})
id_map = {i: str(i) for i in range(len(docs))}
# Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path)
# Create a generation pipeline
text_generator_pipeline = pipeline(
model=model,
tokenizer=tokenizer,
task="text-generation",
return_full_text=False,
max_new_tokens=128,
temperature=0.2
)
# Wrap it as a LangChain LLM
llm = HuggingFacePipeline(pipeline=text_generator_pipeline)
# Re-declare embedding function
embed_fn = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
# Create vectorstore and retriever
vectorstore_faiss = LangChainFAISS(
index=faiss_index,
docstore=docstore,
index_to_docstore_id=id_map,
embedding_function=embed_fn # Not needed for retrieval only
)
# Create a retriever that returns top-k most relevant chunks
retriever = vectorstore_faiss.as_retriever(search_kwargs={"k": 2})
# Create the RAG pipeline (Retriever + LLM)
qa_chain = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=retriever,
return_source_documents=True
)
# π Chatbot function: takes a user question, returns generated answer
def ask_rag(query):
result = qa_chain({"query": query})
answer = result["result"]
# Optional: include sources (limited to 2)
sources = result.get("source_documents", [])
source_texts = "\n\n".join([f"πΉ Source {i+1}:\n{doc.page_content[:300]}..." for i, doc in enumerate(sources[:2])])
return f"π Answer:\n{answer}\n\nπ Sources:\n{source_texts}"
# ποΈ Gradio UI components
gr.Interface(
fn=ask_rag,
inputs=gr.Textbox(lines=2, placeholder="Ask me about UCT admissions, housing, fees..."),
outputs="text",
title="π University of Cape Town Course Advisor Chatbot",
description="""
Ask any academic-related question about the University of Cape Town β admissions, programs, housing, fees, or wellness services.
This chatbot uses a Retrieval-Augmented Generation (RAG) pipeline powered by:
- FAISS for semantic search
- Falcon-E 1B Instruct for natural language answers
- LangChain for orchestration
π οΈ Developed by **Serge Tsimba**, Data Science Consultant at **Amdari**/Canada.
π Available 24/7 β Start by asking: "How can international students apply to UCT?"
""",
allow_flagging="never"
).launch() |