Spaces:

puzan789
/

portlae

Sleeping

App Files Files Community

puzan789 commited on May 2

Commit

2214088

0 Parent(s):

aAded

Browse files

Files changed (27) hide show

.env.example +3 -0
.gitignore +15 -0
.python-version +1 -0
Dockerfile +33 -0
README.md +73 -0
api.py +42 -0
app.py +56 -0
dia.png +0 -0
pyproject.toml +24 -0
ragevaluation/__init__.py +0 -0
ragevaluation/evaluate.py +124 -0
recipe_dataset.csv +50 -0
src/__init__.py +0 -0
src/answerquery/__init__.py +1 -0
src/answerquery/answerquery.py +102 -0
src/embedding/__init__.py +1 -0
src/embedding/embedding.py +13 -0
src/pipeline/__init__.py +1 -0
src/pipeline/pipeline.py +41 -0
src/schemas/__init__.py +1 -0
src/schemas/schemas.py +4 -0
src/settings.py +18 -0
src/utils.py +29 -0
src/vectorstore/__init__.py +2 -0
src/vectorstore/qdrant_document.py +37 -0
src/vectorstore/qdrant_vector_store.py +58 -0
uv.lock +0 -0

.env.example ADDED Viewed

	@@ -0,0 +1,3 @@

+GROQ_API_KEY
+QDRANT_URL
+QDRANT_API_KEY

.gitignore ADDED Viewed

	@@ -0,0 +1,15 @@

+# Python-generated files
+__pycache__/
+*.py[oc]
+build/
+dist/
+wheels/
+*.egg-info
+# Virtual environments
+.venv
+/notebooks
+.env

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.12

Dockerfile ADDED Viewed

	@@ -0,0 +1,33 @@

+FROM python:3.13-slim
+ENV PYTHONUNBUFFERED=1
+# Install astral UV runtime
+COPY --from=ghcr.io/astral-sh/uv:0.6.13 /uv /uvx /bin/
+ENV PATH="/app/.venv/bin:$PATH"
+ENV UV_COMPILE_BYTECODE=1
+ENV UV_LINK_MODE=copy
+WORKDIR /app
+# Copy dependency files first
+COPY ./pyproject.toml ./uv.lock /app/
+# Install dependencies
+RUN --mount=type=cache,target=/root/.cache/uv \
+    uv sync --frozen --no-install-project
+ENV PYTHONPATH=/app
+# Copy project files (including src/)
+COPY ./app.py /app/app.py
+COPY ./src /app/src
+# Final sync (optional, but safe)
+RUN --mount=type=cache,target=/root/.cache/uv \
+    uv sync
+EXPOSE 8501
+# Launch Streamlit app
+CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"]

README.md ADDED Viewed

	@@ -0,0 +1,73 @@

+# RAG-Based QA System For Recipe
+## Setup
+To install dependencies, use `uv`. First, install `uv` if you haven’t already:
+### Ubuntu/Linux:
+```bash
+curl -LsSf https://astral.sh/uv/install.sh | sh
+````
+### Windows (PowerShell):
+```powershell
+irm https://astral.sh/uv/install.ps1 | iex
+```
+Then, install dependencies:
+```bash
+uv sync
+```
+## Vector Store: Qdrant
+This project uses **Qdrant** as a vector store.
+You can run it locally using Docker:
+```bash
+docker run -p 6333:6333 -p 6334:6334 qdrant/qdrant
+```
+Or use **Qdrant Cloud**, which is already set up in this project.
+To create your own cloud instance, go to: [https://qdrant.tech](https://qdrant.tech)
+## LLM: ChatGroq
+This project uses **ChatGroq**.
+Set the following environment variables:
+```env
+GROQ_API_KEY=your_groq_api_key
+QDRANT_URL=your_qdrant_cloud_url
+QDRANT_API_KEY=your_qdrant_api_key
+```
+## Run the App
+To run the project:
+```bash
+streamlit run app.py
+```
+## Flow Diagram
+![alt text](dia.png)
+## Todo
+- [ ] **Create API with FastAPI**
+  - Set up FastAPI endpoints for question answering.
+- [ ] **Implement Reranking Strategy for Retrieval**
+  - Add a reranking approach to improve document relevance.
+- [ ] **Add Pre-Rephrasing Strategy**
+  - Rephrase user questions before querying Qdrant.

api.py ADDED Viewed

	@@ -0,0 +1,42 @@

+from fastapi import FastAPI, UploadFile,File
+from src import pipeline
+from src.pipeline import QAPipeline
+from langchain_community.document_loaders import CSVLoader
+import shutil
+import os
+pipeline= QAPipeline()
+app= FastAPI()
+@app.post("/")
+async def upload_documents(file:UploadFile=File(...)):
+    """
+    Create a new collection in Qdrant.
+    """
+    try:
+        if not file.filename.endswith('.csv'):
+            return {"error": "The uploaded file is not a CSV file."}
+        temp_path = f"/tmp/{file.filename}"
+        with open(temp_path, "wb") as buffer:
+            shutil.copyfileobj(file.file, buffer)
+        loader = CSVLoader(file_path=temp_path)
+        data = loader.load()
+        await pipeline.upload_documents(data)
+        os.remove(temp_path)
+        return {"message": "Documents uploaded successfully."}
+    except Exception as e:
+        raise e
+@app.get("/answer")
+async def answer_query(query:str):
+    """
+    Answer a query using the Groq model.
+    """
+    try:
+        response = await pipeline.answer_query_(query)
+        return {"response": response}
+    except Exception as e:
+        raise e

app.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import streamlit as st
+from src.pipeline import QAPipeline
+from langchain_community.document_loaders import CSVLoader
+import os
+import tempfile
+import asyncio
+import nest_asyncio
+nest_asyncio.apply()
+os.environ["STREAMLIT_WATCHER_IGNORE_PATTERNS"] = "*/torch/*"
+pipeline = QAPipeline()
+st.title("Recipe Q&A")
+# File upload section
+st.header("Upload CSV")
+uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
+if uploaded_file is not None:
+    if st.button("Add Documents"):
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp_file:
+            tmp_file.write(uploaded_file.read())
+            tmp_path = tmp_file.name
+        loader = CSVLoader(file_path=tmp_path)
+        data = loader.load()
+        with st.spinner("Uploading documents..."):
+            asyncio.run(pipeline.upload_documents(data))
+        os.remove(tmp_path)
+        st.success("Documents uploaded successfully.")
+# Query section
+st.header("Ask a Question")
+query = st.text_input("Enter your question:")
+if "response" not in st.session_state:
+    st.session_state.response = None
+# Get answer
+if st.button("Get Answer") and query:
+    with st.spinner("Getting answer..."):
+        response = asyncio.run(pipeline.answer_query_(query))
+        st.session_state.response = response
+        st.write("**Answer:**")
+        st.write(response.answer)
+if st.session_state.response:
+    if st.session_state.response.web_search:
+        if st.button("Search the web for this?"):
+            with st.spinner("Searching web..."):
+                web_response = asyncio.run(pipeline.search_web(query))
+                if web_response:
+                    st.write("**Web Search Result:**")
+                    st.write(web_response)
+                else:
+                    st.write("No web search result found.")

dia.png ADDED Viewed

pyproject.toml ADDED Viewed

	@@ -0,0 +1,24 @@

+[project]
+name = "bb"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = [
+    "langchain>=0.3.24",
+    "langchain-community>=0.3.23",
+    "langchain-groq>=0.3.2",
+    "langchain-huggingface>=0.1.2",
+    "langchain-qdrant>=0.2.0",
+    "pandas>=2.2.3",
+    "streamlit>=1.44.1",
+]
+[dependency-groups]
+dev = [
+    "fastapi[standard]>=0.115.12",
+    "ipykernel>=6.29.5",
+    "ipython>=9.2.0",
+    "langgraph>=0.4.1",
+    "ragas>=0.2.15",
+]

ragevaluation/__init__.py ADDED Viewed

File without changes

ragevaluation/evaluate.py ADDED Viewed

	@@ -0,0 +1,124 @@

+import asyncio
+import csv
+from langchain_groq import ChatGroq
+from pydantic import BaseModel
+from src.pipeline import QAPipeline
+from src.settings import settings
+class LLMResponse(BaseModel):
+    is_correct: bool
+    is_idk: bool
+pipeline = QAPipeline()
+llm = ChatGroq(
+    model="llama-3.3-70b-versatile",
+    max_tokens=512,
+    max_retries=2,
+    timeout=30000,
+    api_key=settings.GROQ_API_KEY,
+)
+# Bind structured evaluation output
+llm_evaluator = llm.with_structured_output(LLMResponse, include_raw=True)
+# Sample queries and ground truths
+queries = [
+    "What are the ingredients of tomato soup?",
+    "Recipe for an apple pie",
+    "How do you make a chocolate cake?",
+    "How do you make dal bhat?",
+    "What are the ingredients for making dhido?",
+    "How do you make chana masala?",
+    "Recipe for butter chicken",
+    "Recipe for ramen",
+]
+ground_truths = [
+    "Tomato Soup: tomatoes, onion, garlic, vegetable broth, cream, salt, pepper",
+    "Apple Pie: apples, flour, butter, sugar, cinnamon",
+    "I don't know",
+    "I don't know",
+    "I don't know",
+    "Chana Masala: chickpeas, tomato, onion, garlic, ginger, spices",
+    "Butter Chicken: chicken, tomato puree, cream, butter, spices, garlic, ginger",
+    "Ramen: ramen noodles, broth, boiled egg, green onion, soy sauce",
+]
+# Evaluation function
+async def evaluate_pipeline():
+    correct = 0
+    idk = 0
+    total = len(queries)
+    results = []
+    for q, gt in zip(queries, ground_truths):
+        response = await pipeline.answer_query_(q)
+        answer = response.answer
+        # Evaluation prompt
+        eval_prompt = f"""
+You are an evaluator. Assess whether the model's answer is both factually correct and acknowledges lack of knowledge when necessary.
+Question: {q}
+Model's Answer: {answer}
+Ground Truth: {gt}
+Evaluate the following:
+1. Is the model's answer semantically correct when compared to the ground truth?
+2. Does the model appropriately say "I don't know" or avoid answering if the answer is not available?
+Respond in JSON with two fields:
+- is_correct: true or false
+- is_idk: true or false
+"""
+        result = llm_evaluator.invoke(eval_prompt)
+        parsed = result["parsed"]
+        # Correct = either factually correct or correctly says "I don't know" when GT also says so
+        if parsed.is_correct or (parsed.is_idk and gt.strip().lower() == "i don't know"):
+            correct += 1
+        if parsed.is_idk:
+            idk += 1
+        # Log and store results
+        print(
+            f"Q: {q}\nA: {answer}\nGT: {gt}\nCorrect: {parsed.is_correct}, IDK: {parsed.is_idk}\n{'-' * 60}"
+        )
+        results.append(
+            {
+                "question": q,
+                "model_answer": answer,
+                "ground_truth": gt,
+                "is_correct": parsed.is_correct,
+                "is_idk": parsed.is_idk,
+            }
+        )
+    # Save results to CSV
+    with open("evaluation_results.csv", "w", newline="", encoding="utf-8") as csvfile:
+        writer = csv.DictWriter(
+            csvfile,
+            fieldnames=[
+                "question",
+                "model_answer",
+                "ground_truth",
+                "is_correct",
+                "is_idk",
+            ],
+        )
+        writer.writeheader()
+        writer.writerows(results)
+    # Print summary
+    print(f"\nEvaluation results saved to 'evaluation_results.csv'.")
+    print(f"Total Correct: {correct}/{total} ({(correct / total) * 100:.2f}%)")
+    print(f"'I don't know' Responses: {idk}/{3} ({(idk / 3) * 100:.2f}%)") #here 3 because there a re 3 total i dont know response
+# Entry point
+if __name__ == "__main__":
+    asyncio.run(evaluate_pipeline())

recipe_dataset.csv ADDED Viewed

	@@ -0,0 +1,50 @@

+dish_name,ingredients,instructions
+"Spaghetti Carbonara","spaghetti, eggs, parmesan cheese, bacon, black pepper","Boil pasta until al dente. Fry bacon until crispy. Beat eggs and mix with grated parmesan. Combine pasta with bacon and egg mixture. Season with pepper and serve hot."
+"Chicken Curry","chicken, onion, tomato, garlic, ginger, curry powder, salt, oil","Heat oil in a pan. Sauté onions, garlic, and ginger. Add tomatoes and cook down. Add chicken and curry powder. Simmer until chicken is cooked through. Serve with rice."
+"Grilled Cheese Sandwich","bread slices, cheddar cheese, butter","Butter the bread slices. Place cheese between two slices. Grill on a pan until golden brown and cheese melts."
+"Pancakes","flour, milk, eggs, sugar, baking powder, salt, butter","Mix dry ingredients. Whisk in milk and eggs. Heat butter in a pan and pour batter. Cook until bubbles form, flip and cook the other side."
+"Fried Rice","rice, mixed vegetables, soy sauce, garlic, egg, oil","Cook rice and let it cool. Scramble egg in a pan. Stir-fry garlic and vegetables. Add rice, soy sauce, and mix well."
+"Tomato Soup","tomatoes, onion, garlic, vegetable broth, cream, salt, pepper","Sauté onion and garlic. Add chopped tomatoes and broth. Simmer and blend. Add cream and season before serving."
+"Caesar Salad","lettuce, croutons, parmesan, Caesar dressing, chicken (optional)","Chop lettuce and toss with croutons and grated parmesan. Add dressing and grilled chicken if desired."
+"French Toast","bread slices, eggs, milk, cinnamon, sugar, butter","Whisk eggs, milk, sugar, and cinnamon. Dip bread slices. Fry on buttered pan until golden brown."
+"Veggie Stir Fry","mixed vegetables, soy sauce, garlic, ginger, oil, salt","Heat oil. Add garlic and ginger. Stir-fry veggies. Add soy sauce and cook until tender."
+"Beef Tacos","ground beef, taco shells, onion, tomato, lettuce, cheese, taco seasoning","Cook beef with taco seasoning. Fill taco shells with beef, chopped veggies, and cheese."
+"Mashed Potatoes","potatoes, butter, milk, salt, pepper","Boil potatoes until soft. Mash with butter and milk. Season with salt and pepper."
+"Omelette","eggs, onion, tomato, cheese, salt, pepper, oil","Whisk eggs with salt and pepper. Pour into pan. Add chopped veggies and cheese. Cook until firm."
+"Chocolate Chip Cookies","flour, sugar, butter, eggs, chocolate chips, vanilla extract, baking soda","Cream butter and sugar. Mix in eggs and vanilla. Add dry ingredients and chips. Scoop and bake until golden."
+"Mac and Cheese","macaroni, cheddar cheese, milk, butter, flour, salt","Cook macaroni. Make cheese sauce with butter, flour, milk, and cheese. Mix with pasta and bake if desired."
+"Banana Bread","ripe bananas, flour, sugar, eggs, butter, baking soda","Mash bananas. Mix with sugar, eggs, and butter. Add flour and baking soda. Pour into loaf pan and bake."
+"Veggie Burger","burger buns, veggie patties, lettuce, tomato, onion, cheese, sauce","Cook patties. Assemble burgers with veggies, cheese, and sauce in buns."
+"Butter Chicken","chicken, tomato puree, cream, butter, spices, garlic, ginger","Marinate chicken in spices. Cook in butter, add tomato puree and cream. Simmer until rich."
+"Garlic Bread","baguette, garlic, butter, parsley, salt","Mix garlic with butter and parsley. Spread on bread. Bake until golden and crisp."
+"Stir Fry Noodles","noodles, vegetables, soy sauce, garlic, oil","Boil noodles. Stir-fry garlic and vegetables. Add noodles and soy sauce. Toss well."
+"Chili","ground beef, beans, tomatoes, chili powder, onion, garlic, salt","Cook beef with onion and garlic. Add beans, tomatoes, and chili powder. Simmer until thick."
+"Egg Fried Rice","rice, egg, green onion, soy sauce, garlic, oil","Scramble egg in a pan. Add cooked rice, garlic, soy sauce, and green onions. Stir-fry well."
+"Margarita Pizza","pizza dough, tomato sauce, mozzarella, basil, olive oil","Spread sauce on dough. Top with mozzarella and basil. Bake until cheese melts."
+"Tuna Sandwich","bread, canned tuna, mayonnaise, lettuce, tomato, salt","Mix tuna with mayo. Layer with lettuce and tomato in bread slices."
+"Greek Salad","tomato, cucumber, feta, olives, red onion, olive oil","Chop vegetables. Toss with feta, olives, and olive oil."
+"Chicken Biryani","chicken, basmati rice, yogurt, onion, spices, saffron","Marinate chicken. Cook with onions and spices. Layer with rice and saffron. Steam until done."
+"Caprese Salad","mozzarella, tomato, basil, olive oil, balsamic vinegar","Slice mozzarella and tomatoes. Layer with basil. Drizzle with oil and vinegar."
+"Stuffed Bell Peppers","bell peppers, rice, ground beef, onion, tomato sauce, cheese","Stuff bell peppers with cooked beef and rice. Top with sauce and cheese. Bake until tender."
+"Fish Tacos","fish fillets, taco shells, cabbage, lime, mayonnaise","Cook fish. Assemble tacos with shredded cabbage and lime mayo sauce."
+"Chicken Alfredo","fettuccine, chicken, cream, parmesan, garlic, butter","Cook pasta. Sauté chicken with garlic. Add cream and parmesan. Combine with pasta."
+"Avocado Toast","bread, avocado, lemon juice, chili flakes, salt","Mash avocado with lemon juice and salt. Spread on toasted bread. Sprinkle chili flakes."
+"Vegetable Soup","mixed vegetables, broth, onion, garlic, herbs, salt","Sauté onion and garlic. Add veggies and broth. Simmer with herbs until tender."
+"BBQ Chicken Wings","chicken wings, BBQ sauce, garlic, salt, pepper","Marinate wings. Bake or grill while brushing with BBQ sauce."
+"Ramen","ramen noodles, broth, boiled egg, green onion, soy sauce","Cook noodles in broth. Add egg and soy sauce. Garnish with green onions."
+"Mango Smoothie","mango, yogurt, milk, honey, ice","Blend mango with yogurt, milk, and honey until smooth. Serve cold."
+"Egg Salad","eggs, mayonnaise, mustard, salt, pepper","Boil and chop eggs. Mix with mayo, mustard, salt, and pepper."
+"Tiramisu","ladyfingers, mascarpone, coffee, cocoa powder, sugar","Layer soaked ladyfingers with mascarpone cream. Dust with cocoa powder. Chill and serve."
+"Shrimp Scampi","shrimp, garlic, butter, lemon juice, spaghetti","Sauté garlic in butter. Add shrimp and lemon juice. Toss with cooked spaghetti."
+"Falafel Wrap","falafel, pita bread, lettuce, tomato, tahini sauce","Place falafel in pita. Add veggies and drizzle tahini sauce."
+"Peanut Butter Cookies","peanut butter, sugar, eggs, baking soda","Mix ingredients. Shape into cookies and bake until golden."
+"Eggplant Parmesan","eggplant, marinara sauce, mozzarella, parmesan, breadcrumbs","Bread and fry eggplant. Layer with sauce and cheese. Bake until bubbly."
+"Shepherd’s Pie","ground lamb, mashed potatoes, peas, carrots, onion","Cook meat with vegetables. Top with mashed potatoes. Bake until golden."
+"Apple Pie","apples, flour, butter, sugar, cinnamon","Make filling with apples and cinnamon. Place in crust and bake until golden."
+"Chicken Nuggets","chicken breast, flour, egg, breadcrumbs, salt","Coat chicken in flour, egg, and breadcrumbs. Fry until golden brown."
+"Zucchini Fritters","zucchini, flour, egg, garlic, salt, pepper","Grate zucchini and mix with ingredients. Fry spoonfuls in oil until crisp."
+"Beef Stroganoff","beef, onion, mushroom, sour cream, egg noodles","Sauté beef and veggies. Stir in sour cream. Serve over cooked noodles."
+"Berry Parfait","yogurt, berries, granola, honey","Layer yogurt with berries and granola. Drizzle honey on top."
+"French Crepes","flour, milk, eggs, sugar, butter","Whisk ingredients. Cook thin crepes in buttered pan. Fill as desired."
+"Tofu Stir Fry","tofu, vegetables, soy sauce, garlic, ginger, oil","Cook tofu until golden. Stir-fry with veggies and sauce."
+"Chana Masala","chickpeas, tomato, onion, garlic, ginger, spices","Sauté onion, garlic, and ginger. Add tomatoes and spices. Stir in chickpeas and simmer."

src/__init__.py ADDED Viewed

File without changes

src/answerquery/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .answerquery import AnswerQuery

src/answerquery/answerquery.py ADDED Viewed

	@@ -0,0 +1,102 @@

+from langchain_groq import ChatGroq
+from langchain_community.utilities import GoogleSerperAPIWrapper
+from src.settings import settings
+from src.vectorstore import answer_query_from_existing_collection
+from langchain_core.prompts import PromptTemplate
+from langchain_core.runnables import RunnablePassthrough
+from src.schemas import RagResponse
+class AnswerQuery:
+    def __init__(self, model_name: str = "llama-3.3-70b-versatile"):
+        """
+        Class to handle the Groq model for answering queries.
+        """
+        self.llm = ChatGroq(
+            model_name=model_name,
+            temperature=0.3,
+            max_tokens=512,
+            api_key=settings.GROQ_API_KEY,
+        )
+        self.serper = GoogleSerperAPIWrapper(serper_api_key=settings.SERPER_API_KEY)
+    def format_docs(self,docs):
+        return "\n\n".join(doc.page_content for doc in docs)
+    async def answer_query(
+        self, vectorembedding, query: str, collection_name: str = "recipe"
+    ):
+        """
+        Answer a query using the Groq model.
+        """
+        vector_store = await answer_query_from_existing_collection(
+            vectorembedding=vectorembedding,
+            collection_name_=collection_name,
+        )
+        # Retriever
+        retriever = vector_store.as_retriever(
+            search_type="mmr",
+            search_kwargs={"k": 3, "lambda_mult": 0.5},
+        )
+        template = """
+        Answer using ONLY the context below:
+        Context: {context}
+        Question: {question}
+        If context doesn't match with the question, say,I couldn’t find information about this,and set web_search to true.
+        Otherwise, set web_search to false and answer only according to the context.
+        """
+        prompt = PromptTemplate.from_template(template)
+        chain = (
+            {
+                "context": retriever|self.format_docs,
+                "question": RunnablePassthrough(),
+            }
+            | prompt
+            | self.llm.with_structured_output(
+              RagResponse,
+            )
+        )
+        response = chain.invoke(query)
+        return response
+    async def search_web(self, query: str):
+        """Search the web for a query"""
+        response =  self.serper.run(query)
+        template = """
+        Answer using ONLY the context below:
+        Context: {context}
+        Question: {question}
+        If context doesn't match with the question, say,I couldn’t find information about this.
+        """
+        prompt = PromptTemplate.from_template(template)
+        chain = (
+            {
+                "context": lambda x :response,
+                "question": RunnablePassthrough(),
+            }
+            | prompt
+            | self.llm
+            )
+        response = chain.invoke(query)
+        return response.content
+if __name__ == "__main__":
+    async def main():
+        answer_query = AnswerQuery()
+        query = "What is the capital of France?"
+        response = await answer_query.answer_query(query)
+        print(response)
+    import asyncio
+    asyncio.run(main())

src/embedding/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .embedding import all_minilm_l6_v2

src/embedding/embedding.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from langchain_huggingface import HuggingFaceEmbeddings
+embedding_instance = HuggingFaceEmbeddings(
+    model_name="all-MiniLM-L6-v2",
+    model_kwargs={"device": "cpu"},
+)
+def all_minilm_l6_v2():
+    """
+    Return the  embedding instance.
+    """
+    return embedding_instance

src/pipeline/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .pipeline import QAPipeline

src/pipeline/pipeline.py ADDED Viewed

	@@ -0,0 +1,41 @@

+from qdrant_client import QdrantClient
+from src.vectorstore import QdrantVectorStoreDB
+from src.answerquery import AnswerQuery
+from src.embedding import all_minilm_l6_v2
+from src.settings import settings
+class QAPipeline:
+    """
+    A class that handles the entire QA pipeline.
+    """
+    def __init__(self):
+        self.embeddings=all_minilm_l6_v2()
+        self.qdrant_client=QdrantClient(url=settings.QDRANT_URL, api_key=settings.QDRANT_API_KEY)
+        self.vector_store = QdrantVectorStoreDB(qdrant_client=self.qdrant_client,vector_embedding= self.embeddings)
+        self.answer_query = AnswerQuery()
+    async def upload_documents(self, documents, collection_name:str="recipe"):
+        """
+        Upload documents to the Qdrant vector store.
+        """
+        await self.vector_store.upload_documents(documents, collection_name)
+    async def answer_query_(self, query):
+        """
+        Answer a query using the Groq model.
+        """
+        return await self.answer_query.answer_query(
+            vectorembedding=self.embeddings,
+            query=query,
+        )
+    async def search_web(self, query):
+        """
+        Search the web for a query.
+        """
+        return await self.answer_query.search_web(
+            query=query
+        )

src/schemas/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .schemas import RagResponse

src/schemas/schemas.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from pydantic import BaseModel
+class RagResponse(BaseModel):
+    answer:str
+    web_search:bool=False

src/settings.py ADDED Viewed

	@@ -0,0 +1,18 @@

+from pydantic_settings import BaseSettings, SettingsConfigDict
+class Settings(BaseSettings):
+    """
+    Settings for the application.
+    """
+    GROQ_API_KEY: str
+    QDRANT_URL:str
+    QDRANT_API_KEY:str
+    SERPER_API_KEY:str
+    model_config = SettingsConfigDict(
+        env_file=".env",
+        env_ignore_empty=True,
+        extra="ignore",
+    )
+settings = Settings()

src/utils.py ADDED Viewed

	@@ -0,0 +1,29 @@

+import logging
+import re
+def get_logger(name: str) -> logging.Logger:
+    logger = logging.getLogger(name)
+    if not logger.handlers:  # Prevent adding multiple handlers
+        logger.setLevel(logging.INFO)
+        # Console handler
+        console_handler = logging.StreamHandler()
+        formatter = logging.Formatter('[%(asctime)s] [%(levelname)s] %(message)s')
+        console_handler.setFormatter(formatter)
+        logger.addHandler(console_handler)
+        # File handler
+        file_handler = logging.FileHandler('botanza.log')
+        file_handler.setFormatter(formatter)
+        logger.addHandler(file_handler)
+        logger.propagate = False
+    return logger
+def clean_text(text):
+    """
+    Clean the text by removing special characters and converting to lowercase.
+    """
+    text=text.lower()
+    text = re.sub(r'[^a-z0-9\s]', '', text)
+    return text

src/vectorstore/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .qdrant_document import upload_document_existing_collection, answer_query_from_existing_collection,upload_document_new_collection
2	+ from .qdrant_vector_store import QdrantVectorStoreDB

src/vectorstore/qdrant_document.py ADDED Viewed

	@@ -0,0 +1,37 @@

+from langchain_qdrant import QdrantVectorStore,RetrievalMode
+from src.settings import settings
+async def answer_query_from_existing_collection(vectorembedding,collection_name_:str):
+    vectorstore=QdrantVectorStore.from_existing_collection(
+        embedding=vectorembedding,
+        collection_name=collection_name_,
+        url=settings.QDRANT_URL,
+        api_key=settings.QDRANT_API_KEY,
+        retrieval_mode=RetrievalMode.DENSE
+    )
+    return vectorstore
+async def upload_document_existing_collection(documents_,vector_embeddings,collection_name_):
+    vector_store=QdrantVectorStore.from_documents(
+        documents=documents_,
+        embedding=vector_embeddings,
+        url=settings.QDRANT_URL,
+        api_key=settings.QDRANT_API_KEY,
+        prefer_grpc=True,
+        collection_name=collection_name_,
+        retrieval_mode=RetrievalMode.DENSE,
+        timeout=None
+    )
+async def upload_document_new_collection(vector_embeddings,collection_name_):
+    vector_store=QdrantVectorStore.from_documents(
+        documents=[],
+        embedding=vector_embeddings,
+        url=settings.QDRANT_URL,
+        api_key=settings.QDRANT_API_KEY,
+        prefer_grpc=True,
+        collection_name=collection_name_,
+        retrieval_mode=RetrievalMode.DENSE,
+        force_recreate=True,
+        timeout=None
+    )

src/vectorstore/qdrant_vector_store.py ADDED Viewed

	@@ -0,0 +1,58 @@

+from src.vectorstore import upload_document_existing_collection,answer_query_from_existing_collection,upload_document_new_collection
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from src.utils import logging,clean_text
+logger = logging.getLogger(__name__)
+class QdrantVectorStoreDB:
+    """
+    A class that handles the Qdrant vector store database operations.
+    """
+    def __init__(self,qdrant_client,vector_embedding):
+        self.qdrant_client = qdrant_client
+        self.vector_embedding = vector_embedding
+    async def create_collection(self, collection_name:str):
+        """
+        Create a new collection in Qdrant.
+        """
+        try:
+            await upload_document_new_collection(self.vector_embedding, collection_name)
+            logger.info(f"Collection {collection_name} created successfully.")
+            return
+        except Exception as e:
+            logger.error(f"Error creating chatbot: {e}")
+            raise e
+    async def upload_documents(self,documents,collection_name_:str="recipe"):
+        """Upload Documents to qdrant vectorstore"""
+        try:
+            splitter=RecursiveCharacterTextSplitter(
+            chunk_size=1500,
+            chunk_overlap=150,
+            add_start_index=True
+            )
+            for doc in documents:
+                doc.page_content = clean_text(doc.page_content)
+            chunks=splitter.split_documents(documents)
+            await upload_document_existing_collection(
+                documents_=chunks,
+                vector_embeddings=self.vector_embedding,
+                collection_name_=collection_name_
+            )
+            logger.info(f"Documents uploaded successfully to {collection_name_} collection.")
+            return
+        except Exception as e:
+            logger.error(f"Error uploading documents: {e}")
+            raise e

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff