puzan789 commited on
Commit
2214088
·
0 Parent(s):
.env.example ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ GROQ_API_KEY
2
+ QDRANT_URL
3
+ QDRANT_API_KEY
.gitignore ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python-generated files
2
+ __pycache__/
3
+ *.py[oc]
4
+ build/
5
+ dist/
6
+ wheels/
7
+ *.egg-info
8
+
9
+ # Virtual environments
10
+ .venv
11
+
12
+ /notebooks
13
+
14
+ .env
15
+
.python-version ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.12
Dockerfile ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.13-slim
2
+
3
+ ENV PYTHONUNBUFFERED=1
4
+
5
+ # Install astral UV runtime
6
+ COPY --from=ghcr.io/astral-sh/uv:0.6.13 /uv /uvx /bin/
7
+ ENV PATH="/app/.venv/bin:$PATH"
8
+ ENV UV_COMPILE_BYTECODE=1
9
+ ENV UV_LINK_MODE=copy
10
+
11
+ WORKDIR /app
12
+
13
+ # Copy dependency files first
14
+ COPY ./pyproject.toml ./uv.lock /app/
15
+
16
+ # Install dependencies
17
+ RUN --mount=type=cache,target=/root/.cache/uv \
18
+ uv sync --frozen --no-install-project
19
+
20
+ ENV PYTHONPATH=/app
21
+
22
+ # Copy project files (including src/)
23
+ COPY ./app.py /app/app.py
24
+ COPY ./src /app/src
25
+
26
+ # Final sync (optional, but safe)
27
+ RUN --mount=type=cache,target=/root/.cache/uv \
28
+ uv sync
29
+
30
+ EXPOSE 8501
31
+
32
+ # Launch Streamlit app
33
+ CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"]
README.md ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # RAG-Based QA System For Recipe
3
+
4
+ ## Setup
5
+
6
+ To install dependencies, use `uv`. First, install `uv` if you haven’t already:
7
+
8
+ ### Ubuntu/Linux:
9
+ ```bash
10
+ curl -LsSf https://astral.sh/uv/install.sh | sh
11
+ ````
12
+
13
+ ### Windows (PowerShell):
14
+
15
+ ```powershell
16
+ irm https://astral.sh/uv/install.ps1 | iex
17
+ ```
18
+
19
+ Then, install dependencies:
20
+
21
+ ```bash
22
+ uv sync
23
+ ```
24
+
25
+ ## Vector Store: Qdrant
26
+
27
+ This project uses **Qdrant** as a vector store.
28
+
29
+ You can run it locally using Docker:
30
+
31
+ ```bash
32
+ docker run -p 6333:6333 -p 6334:6334 qdrant/qdrant
33
+ ```
34
+
35
+ Or use **Qdrant Cloud**, which is already set up in this project.
36
+
37
+ To create your own cloud instance, go to: [https://qdrant.tech](https://qdrant.tech)
38
+
39
+ ## LLM: ChatGroq
40
+
41
+ This project uses **ChatGroq**.
42
+
43
+ Set the following environment variables:
44
+
45
+ ```env
46
+ GROQ_API_KEY=your_groq_api_key
47
+ QDRANT_URL=your_qdrant_cloud_url
48
+ QDRANT_API_KEY=your_qdrant_api_key
49
+ ```
50
+
51
+ ## Run the App
52
+
53
+ To run the project:
54
+
55
+ ```bash
56
+ streamlit run app.py
57
+ ```
58
+
59
+ ## Flow Diagram
60
+ ![alt text](dia.png)
61
+
62
+
63
+ ## Todo
64
+
65
+ - [ ] **Create API with FastAPI**
66
+ - Set up FastAPI endpoints for question answering.
67
+
68
+ - [ ] **Implement Reranking Strategy for Retrieval**
69
+ - Add a reranking approach to improve document relevance.
70
+
71
+ - [ ] **Add Pre-Rephrasing Strategy**
72
+ - Rephrase user questions before querying Qdrant.
73
+
api.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from fastapi import FastAPI, UploadFile,File
3
+ from src import pipeline
4
+ from src.pipeline import QAPipeline
5
+ from langchain_community.document_loaders import CSVLoader
6
+ import shutil
7
+ import os
8
+ pipeline= QAPipeline()
9
+
10
+ app= FastAPI()
11
+
12
+ @app.post("/")
13
+ async def upload_documents(file:UploadFile=File(...)):
14
+ """
15
+ Create a new collection in Qdrant.
16
+ """
17
+ try:
18
+ if not file.filename.endswith('.csv'):
19
+ return {"error": "The uploaded file is not a CSV file."}
20
+ temp_path = f"/tmp/{file.filename}"
21
+ with open(temp_path, "wb") as buffer:
22
+ shutil.copyfileobj(file.file, buffer)
23
+ loader = CSVLoader(file_path=temp_path)
24
+ data = loader.load()
25
+ await pipeline.upload_documents(data)
26
+ os.remove(temp_path)
27
+ return {"message": "Documents uploaded successfully."}
28
+
29
+ except Exception as e:
30
+ raise e
31
+
32
+ @app.get("/answer")
33
+ async def answer_query(query:str):
34
+ """
35
+ Answer a query using the Groq model.
36
+ """
37
+ try:
38
+ response = await pipeline.answer_query_(query)
39
+ return {"response": response}
40
+ except Exception as e:
41
+ raise e
42
+
app.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from src.pipeline import QAPipeline
3
+ from langchain_community.document_loaders import CSVLoader
4
+ import os
5
+ import tempfile
6
+ import asyncio
7
+ import nest_asyncio
8
+
9
+ nest_asyncio.apply()
10
+
11
+ os.environ["STREAMLIT_WATCHER_IGNORE_PATTERNS"] = "*/torch/*"
12
+
13
+ pipeline = QAPipeline()
14
+
15
+ st.title("Recipe Q&A")
16
+
17
+ # File upload section
18
+ st.header("Upload CSV")
19
+ uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
20
+ if uploaded_file is not None:
21
+ if st.button("Add Documents"):
22
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp_file:
23
+ tmp_file.write(uploaded_file.read())
24
+ tmp_path = tmp_file.name
25
+ loader = CSVLoader(file_path=tmp_path)
26
+ data = loader.load()
27
+ with st.spinner("Uploading documents..."):
28
+ asyncio.run(pipeline.upload_documents(data))
29
+ os.remove(tmp_path)
30
+ st.success("Documents uploaded successfully.")
31
+
32
+ # Query section
33
+ st.header("Ask a Question")
34
+ query = st.text_input("Enter your question:")
35
+
36
+ if "response" not in st.session_state:
37
+ st.session_state.response = None
38
+
39
+ # Get answer
40
+ if st.button("Get Answer") and query:
41
+ with st.spinner("Getting answer..."):
42
+ response = asyncio.run(pipeline.answer_query_(query))
43
+ st.session_state.response = response
44
+ st.write("**Answer:**")
45
+ st.write(response.answer)
46
+
47
+ if st.session_state.response:
48
+ if st.session_state.response.web_search:
49
+ if st.button("Search the web for this?"):
50
+ with st.spinner("Searching web..."):
51
+ web_response = asyncio.run(pipeline.search_web(query))
52
+ if web_response:
53
+ st.write("**Web Search Result:**")
54
+ st.write(web_response)
55
+ else:
56
+ st.write("No web search result found.")
dia.png ADDED
pyproject.toml ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "bb"
3
+ version = "0.1.0"
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ requires-python = ">=3.12"
7
+ dependencies = [
8
+ "langchain>=0.3.24",
9
+ "langchain-community>=0.3.23",
10
+ "langchain-groq>=0.3.2",
11
+ "langchain-huggingface>=0.1.2",
12
+ "langchain-qdrant>=0.2.0",
13
+ "pandas>=2.2.3",
14
+ "streamlit>=1.44.1",
15
+ ]
16
+
17
+ [dependency-groups]
18
+ dev = [
19
+ "fastapi[standard]>=0.115.12",
20
+ "ipykernel>=6.29.5",
21
+ "ipython>=9.2.0",
22
+ "langgraph>=0.4.1",
23
+ "ragas>=0.2.15",
24
+ ]
ragevaluation/__init__.py ADDED
File without changes
ragevaluation/evaluate.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import csv
3
+ from langchain_groq import ChatGroq
4
+ from pydantic import BaseModel
5
+ from src.pipeline import QAPipeline
6
+ from src.settings import settings
7
+
8
+
9
+ class LLMResponse(BaseModel):
10
+ is_correct: bool
11
+ is_idk: bool
12
+
13
+
14
+ pipeline = QAPipeline()
15
+
16
+ llm = ChatGroq(
17
+ model="llama-3.3-70b-versatile",
18
+ max_tokens=512,
19
+ max_retries=2,
20
+ timeout=30000,
21
+ api_key=settings.GROQ_API_KEY,
22
+ )
23
+
24
+ # Bind structured evaluation output
25
+ llm_evaluator = llm.with_structured_output(LLMResponse, include_raw=True)
26
+
27
+ # Sample queries and ground truths
28
+ queries = [
29
+ "What are the ingredients of tomato soup?",
30
+ "Recipe for an apple pie",
31
+ "How do you make a chocolate cake?",
32
+ "How do you make dal bhat?",
33
+ "What are the ingredients for making dhido?",
34
+ "How do you make chana masala?",
35
+ "Recipe for butter chicken",
36
+ "Recipe for ramen",
37
+ ]
38
+
39
+ ground_truths = [
40
+ "Tomato Soup: tomatoes, onion, garlic, vegetable broth, cream, salt, pepper",
41
+ "Apple Pie: apples, flour, butter, sugar, cinnamon",
42
+ "I don't know",
43
+ "I don't know",
44
+ "I don't know",
45
+ "Chana Masala: chickpeas, tomato, onion, garlic, ginger, spices",
46
+ "Butter Chicken: chicken, tomato puree, cream, butter, spices, garlic, ginger",
47
+ "Ramen: ramen noodles, broth, boiled egg, green onion, soy sauce",
48
+ ]
49
+
50
+ # Evaluation function
51
+ async def evaluate_pipeline():
52
+ correct = 0
53
+ idk = 0
54
+ total = len(queries)
55
+ results = []
56
+
57
+ for q, gt in zip(queries, ground_truths):
58
+ response = await pipeline.answer_query_(q)
59
+ answer = response.answer
60
+
61
+ # Evaluation prompt
62
+ eval_prompt = f"""
63
+ You are an evaluator. Assess whether the model's answer is both factually correct and acknowledges lack of knowledge when necessary.
64
+
65
+ Question: {q}
66
+ Model's Answer: {answer}
67
+ Ground Truth: {gt}
68
+
69
+ Evaluate the following:
70
+ 1. Is the model's answer semantically correct when compared to the ground truth?
71
+ 2. Does the model appropriately say "I don't know" or avoid answering if the answer is not available?
72
+
73
+ Respond in JSON with two fields:
74
+ - is_correct: true or false
75
+ - is_idk: true or false
76
+ """
77
+
78
+ result = llm_evaluator.invoke(eval_prompt)
79
+ parsed = result["parsed"]
80
+
81
+ # Correct = either factually correct or correctly says "I don't know" when GT also says so
82
+ if parsed.is_correct or (parsed.is_idk and gt.strip().lower() == "i don't know"):
83
+ correct += 1
84
+ if parsed.is_idk:
85
+ idk += 1
86
+
87
+ # Log and store results
88
+ print(
89
+ f"Q: {q}\nA: {answer}\nGT: {gt}\nCorrect: {parsed.is_correct}, IDK: {parsed.is_idk}\n{'-' * 60}"
90
+ )
91
+
92
+ results.append(
93
+ {
94
+ "question": q,
95
+ "model_answer": answer,
96
+ "ground_truth": gt,
97
+ "is_correct": parsed.is_correct,
98
+ "is_idk": parsed.is_idk,
99
+ }
100
+ )
101
+
102
+ # Save results to CSV
103
+ with open("evaluation_results.csv", "w", newline="", encoding="utf-8") as csvfile:
104
+ writer = csv.DictWriter(
105
+ csvfile,
106
+ fieldnames=[
107
+ "question",
108
+ "model_answer",
109
+ "ground_truth",
110
+ "is_correct",
111
+ "is_idk",
112
+ ],
113
+ )
114
+ writer.writeheader()
115
+ writer.writerows(results)
116
+
117
+ # Print summary
118
+ print(f"\nEvaluation results saved to 'evaluation_results.csv'.")
119
+ print(f"Total Correct: {correct}/{total} ({(correct / total) * 100:.2f}%)")
120
+ print(f"'I don't know' Responses: {idk}/{3} ({(idk / 3) * 100:.2f}%)") #here 3 because there a re 3 total i dont know response
121
+
122
+ # Entry point
123
+ if __name__ == "__main__":
124
+ asyncio.run(evaluate_pipeline())
recipe_dataset.csv ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dish_name,ingredients,instructions
2
+ "Spaghetti Carbonara","spaghetti, eggs, parmesan cheese, bacon, black pepper","Boil pasta until al dente. Fry bacon until crispy. Beat eggs and mix with grated parmesan. Combine pasta with bacon and egg mixture. Season with pepper and serve hot."
3
+ "Chicken Curry","chicken, onion, tomato, garlic, ginger, curry powder, salt, oil","Heat oil in a pan. Sauté onions, garlic, and ginger. Add tomatoes and cook down. Add chicken and curry powder. Simmer until chicken is cooked through. Serve with rice."
4
+ "Grilled Cheese Sandwich","bread slices, cheddar cheese, butter","Butter the bread slices. Place cheese between two slices. Grill on a pan until golden brown and cheese melts."
5
+ "Pancakes","flour, milk, eggs, sugar, baking powder, salt, butter","Mix dry ingredients. Whisk in milk and eggs. Heat butter in a pan and pour batter. Cook until bubbles form, flip and cook the other side."
6
+ "Fried Rice","rice, mixed vegetables, soy sauce, garlic, egg, oil","Cook rice and let it cool. Scramble egg in a pan. Stir-fry garlic and vegetables. Add rice, soy sauce, and mix well."
7
+ "Tomato Soup","tomatoes, onion, garlic, vegetable broth, cream, salt, pepper","Sauté onion and garlic. Add chopped tomatoes and broth. Simmer and blend. Add cream and season before serving."
8
+ "Caesar Salad","lettuce, croutons, parmesan, Caesar dressing, chicken (optional)","Chop lettuce and toss with croutons and grated parmesan. Add dressing and grilled chicken if desired."
9
+ "French Toast","bread slices, eggs, milk, cinnamon, sugar, butter","Whisk eggs, milk, sugar, and cinnamon. Dip bread slices. Fry on buttered pan until golden brown."
10
+ "Veggie Stir Fry","mixed vegetables, soy sauce, garlic, ginger, oil, salt","Heat oil. Add garlic and ginger. Stir-fry veggies. Add soy sauce and cook until tender."
11
+ "Beef Tacos","ground beef, taco shells, onion, tomato, lettuce, cheese, taco seasoning","Cook beef with taco seasoning. Fill taco shells with beef, chopped veggies, and cheese."
12
+ "Mashed Potatoes","potatoes, butter, milk, salt, pepper","Boil potatoes until soft. Mash with butter and milk. Season with salt and pepper."
13
+ "Omelette","eggs, onion, tomato, cheese, salt, pepper, oil","Whisk eggs with salt and pepper. Pour into pan. Add chopped veggies and cheese. Cook until firm."
14
+ "Chocolate Chip Cookies","flour, sugar, butter, eggs, chocolate chips, vanilla extract, baking soda","Cream butter and sugar. Mix in eggs and vanilla. Add dry ingredients and chips. Scoop and bake until golden."
15
+ "Mac and Cheese","macaroni, cheddar cheese, milk, butter, flour, salt","Cook macaroni. Make cheese sauce with butter, flour, milk, and cheese. Mix with pasta and bake if desired."
16
+ "Banana Bread","ripe bananas, flour, sugar, eggs, butter, baking soda","Mash bananas. Mix with sugar, eggs, and butter. Add flour and baking soda. Pour into loaf pan and bake."
17
+ "Veggie Burger","burger buns, veggie patties, lettuce, tomato, onion, cheese, sauce","Cook patties. Assemble burgers with veggies, cheese, and sauce in buns."
18
+ "Butter Chicken","chicken, tomato puree, cream, butter, spices, garlic, ginger","Marinate chicken in spices. Cook in butter, add tomato puree and cream. Simmer until rich."
19
+ "Garlic Bread","baguette, garlic, butter, parsley, salt","Mix garlic with butter and parsley. Spread on bread. Bake until golden and crisp."
20
+ "Stir Fry Noodles","noodles, vegetables, soy sauce, garlic, oil","Boil noodles. Stir-fry garlic and vegetables. Add noodles and soy sauce. Toss well."
21
+ "Chili","ground beef, beans, tomatoes, chili powder, onion, garlic, salt","Cook beef with onion and garlic. Add beans, tomatoes, and chili powder. Simmer until thick."
22
+ "Egg Fried Rice","rice, egg, green onion, soy sauce, garlic, oil","Scramble egg in a pan. Add cooked rice, garlic, soy sauce, and green onions. Stir-fry well."
23
+ "Margarita Pizza","pizza dough, tomato sauce, mozzarella, basil, olive oil","Spread sauce on dough. Top with mozzarella and basil. Bake until cheese melts."
24
+ "Tuna Sandwich","bread, canned tuna, mayonnaise, lettuce, tomato, salt","Mix tuna with mayo. Layer with lettuce and tomato in bread slices."
25
+ "Greek Salad","tomato, cucumber, feta, olives, red onion, olive oil","Chop vegetables. Toss with feta, olives, and olive oil."
26
+ "Chicken Biryani","chicken, basmati rice, yogurt, onion, spices, saffron","Marinate chicken. Cook with onions and spices. Layer with rice and saffron. Steam until done."
27
+ "Caprese Salad","mozzarella, tomato, basil, olive oil, balsamic vinegar","Slice mozzarella and tomatoes. Layer with basil. Drizzle with oil and vinegar."
28
+ "Stuffed Bell Peppers","bell peppers, rice, ground beef, onion, tomato sauce, cheese","Stuff bell peppers with cooked beef and rice. Top with sauce and cheese. Bake until tender."
29
+ "Fish Tacos","fish fillets, taco shells, cabbage, lime, mayonnaise","Cook fish. Assemble tacos with shredded cabbage and lime mayo sauce."
30
+ "Chicken Alfredo","fettuccine, chicken, cream, parmesan, garlic, butter","Cook pasta. Sauté chicken with garlic. Add cream and parmesan. Combine with pasta."
31
+ "Avocado Toast","bread, avocado, lemon juice, chili flakes, salt","Mash avocado with lemon juice and salt. Spread on toasted bread. Sprinkle chili flakes."
32
+ "Vegetable Soup","mixed vegetables, broth, onion, garlic, herbs, salt","Sauté onion and garlic. Add veggies and broth. Simmer with herbs until tender."
33
+ "BBQ Chicken Wings","chicken wings, BBQ sauce, garlic, salt, pepper","Marinate wings. Bake or grill while brushing with BBQ sauce."
34
+ "Ramen","ramen noodles, broth, boiled egg, green onion, soy sauce","Cook noodles in broth. Add egg and soy sauce. Garnish with green onions."
35
+ "Mango Smoothie","mango, yogurt, milk, honey, ice","Blend mango with yogurt, milk, and honey until smooth. Serve cold."
36
+ "Egg Salad","eggs, mayonnaise, mustard, salt, pepper","Boil and chop eggs. Mix with mayo, mustard, salt, and pepper."
37
+ "Tiramisu","ladyfingers, mascarpone, coffee, cocoa powder, sugar","Layer soaked ladyfingers with mascarpone cream. Dust with cocoa powder. Chill and serve."
38
+ "Shrimp Scampi","shrimp, garlic, butter, lemon juice, spaghetti","Sauté garlic in butter. Add shrimp and lemon juice. Toss with cooked spaghetti."
39
+ "Falafel Wrap","falafel, pita bread, lettuce, tomato, tahini sauce","Place falafel in pita. Add veggies and drizzle tahini sauce."
40
+ "Peanut Butter Cookies","peanut butter, sugar, eggs, baking soda","Mix ingredients. Shape into cookies and bake until golden."
41
+ "Eggplant Parmesan","eggplant, marinara sauce, mozzarella, parmesan, breadcrumbs","Bread and fry eggplant. Layer with sauce and cheese. Bake until bubbly."
42
+ "Shepherd’s Pie","ground lamb, mashed potatoes, peas, carrots, onion","Cook meat with vegetables. Top with mashed potatoes. Bake until golden."
43
+ "Apple Pie","apples, flour, butter, sugar, cinnamon","Make filling with apples and cinnamon. Place in crust and bake until golden."
44
+ "Chicken Nuggets","chicken breast, flour, egg, breadcrumbs, salt","Coat chicken in flour, egg, and breadcrumbs. Fry until golden brown."
45
+ "Zucchini Fritters","zucchini, flour, egg, garlic, salt, pepper","Grate zucchini and mix with ingredients. Fry spoonfuls in oil until crisp."
46
+ "Beef Stroganoff","beef, onion, mushroom, sour cream, egg noodles","Sauté beef and veggies. Stir in sour cream. Serve over cooked noodles."
47
+ "Berry Parfait","yogurt, berries, granola, honey","Layer yogurt with berries and granola. Drizzle honey on top."
48
+ "French Crepes","flour, milk, eggs, sugar, butter","Whisk ingredients. Cook thin crepes in buttered pan. Fill as desired."
49
+ "Tofu Stir Fry","tofu, vegetables, soy sauce, garlic, ginger, oil","Cook tofu until golden. Stir-fry with veggies and sauce."
50
+ "Chana Masala","chickpeas, tomato, onion, garlic, ginger, spices","Sauté onion, garlic, and ginger. Add tomatoes and spices. Stir in chickpeas and simmer."
src/__init__.py ADDED
File without changes
src/answerquery/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .answerquery import AnswerQuery
src/answerquery/answerquery.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_groq import ChatGroq
2
+ from langchain_community.utilities import GoogleSerperAPIWrapper
3
+ from src.settings import settings
4
+ from src.vectorstore import answer_query_from_existing_collection
5
+ from langchain_core.prompts import PromptTemplate
6
+ from langchain_core.runnables import RunnablePassthrough
7
+ from src.schemas import RagResponse
8
+
9
+ class AnswerQuery:
10
+ def __init__(self, model_name: str = "llama-3.3-70b-versatile"):
11
+ """
12
+ Class to handle the Groq model for answering queries.
13
+ """
14
+ self.llm = ChatGroq(
15
+ model_name=model_name,
16
+ temperature=0.3,
17
+ max_tokens=512,
18
+ api_key=settings.GROQ_API_KEY,
19
+ )
20
+ self.serper = GoogleSerperAPIWrapper(serper_api_key=settings.SERPER_API_KEY)
21
+
22
+ def format_docs(self,docs):
23
+ return "\n\n".join(doc.page_content for doc in docs)
24
+
25
+ async def answer_query(
26
+ self, vectorembedding, query: str, collection_name: str = "recipe"
27
+ ):
28
+ """
29
+ Answer a query using the Groq model.
30
+ """
31
+ vector_store = await answer_query_from_existing_collection(
32
+ vectorembedding=vectorembedding,
33
+ collection_name_=collection_name,
34
+ )
35
+
36
+ # Retriever
37
+ retriever = vector_store.as_retriever(
38
+ search_type="mmr",
39
+ search_kwargs={"k": 3, "lambda_mult": 0.5},
40
+ )
41
+ template = """
42
+ Answer using ONLY the context below:
43
+ Context: {context}
44
+ Question: {question}
45
+ If context doesn't match with the question, say,I couldn’t find information about this,and set web_search to true.
46
+ Otherwise, set web_search to false and answer only according to the context.
47
+
48
+
49
+ """
50
+ prompt = PromptTemplate.from_template(template)
51
+ chain = (
52
+ {
53
+ "context": retriever|self.format_docs,
54
+ "question": RunnablePassthrough(),
55
+
56
+ }
57
+ | prompt
58
+ | self.llm.with_structured_output(
59
+ RagResponse,
60
+ )
61
+
62
+ )
63
+
64
+ response = chain.invoke(query)
65
+ return response
66
+ async def search_web(self, query: str):
67
+ """Search the web for a query"""
68
+ response = self.serper.run(query)
69
+ template = """
70
+ Answer using ONLY the context below:
71
+ Context: {context}
72
+ Question: {question}
73
+ If context doesn't match with the question, say,I couldn’t find information about this.
74
+ """
75
+ prompt = PromptTemplate.from_template(template)
76
+ chain = (
77
+ {
78
+ "context": lambda x :response,
79
+ "question": RunnablePassthrough(),
80
+
81
+ }
82
+ | prompt
83
+ | self.llm
84
+ )
85
+
86
+
87
+ response = chain.invoke(query)
88
+ return response.content
89
+
90
+
91
+
92
+ if __name__ == "__main__":
93
+
94
+ async def main():
95
+ answer_query = AnswerQuery()
96
+ query = "What is the capital of France?"
97
+ response = await answer_query.answer_query(query)
98
+ print(response)
99
+
100
+ import asyncio
101
+
102
+ asyncio.run(main())
src/embedding/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .embedding import all_minilm_l6_v2
src/embedding/embedding.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_huggingface import HuggingFaceEmbeddings
2
+
3
+
4
+ embedding_instance = HuggingFaceEmbeddings(
5
+ model_name="all-MiniLM-L6-v2",
6
+ model_kwargs={"device": "cpu"},
7
+ )
8
+
9
+ def all_minilm_l6_v2():
10
+ """
11
+ Return the embedding instance.
12
+ """
13
+ return embedding_instance
src/pipeline/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .pipeline import QAPipeline
src/pipeline/pipeline.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from qdrant_client import QdrantClient
2
+ from src.vectorstore import QdrantVectorStoreDB
3
+ from src.answerquery import AnswerQuery
4
+ from src.embedding import all_minilm_l6_v2
5
+ from src.settings import settings
6
+
7
+
8
+ class QAPipeline:
9
+ """
10
+ A class that handles the entire QA pipeline.
11
+ """
12
+ def __init__(self):
13
+ self.embeddings=all_minilm_l6_v2()
14
+ self.qdrant_client=QdrantClient(url=settings.QDRANT_URL, api_key=settings.QDRANT_API_KEY)
15
+
16
+
17
+ self.vector_store = QdrantVectorStoreDB(qdrant_client=self.qdrant_client,vector_embedding= self.embeddings)
18
+ self.answer_query = AnswerQuery()
19
+
20
+ async def upload_documents(self, documents, collection_name:str="recipe"):
21
+ """
22
+ Upload documents to the Qdrant vector store.
23
+ """
24
+ await self.vector_store.upload_documents(documents, collection_name)
25
+
26
+
27
+ async def answer_query_(self, query):
28
+ """
29
+ Answer a query using the Groq model.
30
+ """
31
+ return await self.answer_query.answer_query(
32
+ vectorembedding=self.embeddings,
33
+ query=query,
34
+ )
35
+ async def search_web(self, query):
36
+ """
37
+ Search the web for a query.
38
+ """
39
+ return await self.answer_query.search_web(
40
+ query=query
41
+ )
src/schemas/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .schemas import RagResponse
src/schemas/schemas.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ class RagResponse(BaseModel):
3
+ answer:str
4
+ web_search:bool=False
src/settings.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic_settings import BaseSettings, SettingsConfigDict
2
+
3
+ class Settings(BaseSettings):
4
+ """
5
+ Settings for the application.
6
+ """
7
+ GROQ_API_KEY: str
8
+ QDRANT_URL:str
9
+ QDRANT_API_KEY:str
10
+ SERPER_API_KEY:str
11
+
12
+ model_config = SettingsConfigDict(
13
+ env_file=".env",
14
+ env_ignore_empty=True,
15
+ extra="ignore",
16
+ )
17
+
18
+ settings = Settings()
src/utils.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import re
3
+ def get_logger(name: str) -> logging.Logger:
4
+ logger = logging.getLogger(name)
5
+ if not logger.handlers: # Prevent adding multiple handlers
6
+ logger.setLevel(logging.INFO)
7
+
8
+ # Console handler
9
+ console_handler = logging.StreamHandler()
10
+ formatter = logging.Formatter('[%(asctime)s] [%(levelname)s] %(message)s')
11
+ console_handler.setFormatter(formatter)
12
+ logger.addHandler(console_handler)
13
+
14
+ # File handler
15
+ file_handler = logging.FileHandler('botanza.log')
16
+ file_handler.setFormatter(formatter)
17
+ logger.addHandler(file_handler)
18
+
19
+ logger.propagate = False
20
+
21
+ return logger
22
+
23
+ def clean_text(text):
24
+ """
25
+ Clean the text by removing special characters and converting to lowercase.
26
+ """
27
+ text=text.lower()
28
+ text = re.sub(r'[^a-z0-9\s]', '', text)
29
+ return text
src/vectorstore/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ from .qdrant_document import upload_document_existing_collection, answer_query_from_existing_collection,upload_document_new_collection
2
+ from .qdrant_vector_store import QdrantVectorStoreDB
src/vectorstore/qdrant_document.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_qdrant import QdrantVectorStore,RetrievalMode
2
+ from src.settings import settings
3
+
4
+ async def answer_query_from_existing_collection(vectorembedding,collection_name_:str):
5
+ vectorstore=QdrantVectorStore.from_existing_collection(
6
+ embedding=vectorembedding,
7
+ collection_name=collection_name_,
8
+ url=settings.QDRANT_URL,
9
+ api_key=settings.QDRANT_API_KEY,
10
+ retrieval_mode=RetrievalMode.DENSE
11
+ )
12
+ return vectorstore
13
+
14
+ async def upload_document_existing_collection(documents_,vector_embeddings,collection_name_):
15
+ vector_store=QdrantVectorStore.from_documents(
16
+ documents=documents_,
17
+ embedding=vector_embeddings,
18
+ url=settings.QDRANT_URL,
19
+ api_key=settings.QDRANT_API_KEY,
20
+ prefer_grpc=True,
21
+ collection_name=collection_name_,
22
+ retrieval_mode=RetrievalMode.DENSE,
23
+ timeout=None
24
+ )
25
+
26
+ async def upload_document_new_collection(vector_embeddings,collection_name_):
27
+ vector_store=QdrantVectorStore.from_documents(
28
+ documents=[],
29
+ embedding=vector_embeddings,
30
+ url=settings.QDRANT_URL,
31
+ api_key=settings.QDRANT_API_KEY,
32
+ prefer_grpc=True,
33
+ collection_name=collection_name_,
34
+ retrieval_mode=RetrievalMode.DENSE,
35
+ force_recreate=True,
36
+ timeout=None
37
+ )
src/vectorstore/qdrant_vector_store.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.vectorstore import upload_document_existing_collection,answer_query_from_existing_collection,upload_document_new_collection
2
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
3
+ from src.utils import logging,clean_text
4
+ logger = logging.getLogger(__name__)
5
+
6
+
7
+
8
+
9
+ class QdrantVectorStoreDB:
10
+ """
11
+ A class that handles the Qdrant vector store database operations.
12
+ """
13
+ def __init__(self,qdrant_client,vector_embedding):
14
+ self.qdrant_client = qdrant_client
15
+ self.vector_embedding = vector_embedding
16
+
17
+ async def create_collection(self, collection_name:str):
18
+ """
19
+ Create a new collection in Qdrant.
20
+ """
21
+ try:
22
+ await upload_document_new_collection(self.vector_embedding, collection_name)
23
+ logger.info(f"Collection {collection_name} created successfully.")
24
+ return
25
+ except Exception as e:
26
+ logger.error(f"Error creating chatbot: {e}")
27
+ raise e
28
+
29
+
30
+ async def upload_documents(self,documents,collection_name_:str="recipe"):
31
+ """Upload Documents to qdrant vectorstore"""
32
+
33
+ try:
34
+ splitter=RecursiveCharacterTextSplitter(
35
+ chunk_size=1500,
36
+ chunk_overlap=150,
37
+ add_start_index=True
38
+ )
39
+ for doc in documents:
40
+ doc.page_content = clean_text(doc.page_content)
41
+
42
+ chunks=splitter.split_documents(documents)
43
+ await upload_document_existing_collection(
44
+ documents_=chunks,
45
+ vector_embeddings=self.vector_embedding,
46
+ collection_name_=collection_name_
47
+ )
48
+ logger.info(f"Documents uploaded successfully to {collection_name_} collection.")
49
+ return
50
+
51
+
52
+ except Exception as e:
53
+ logger.error(f"Error uploading documents: {e}")
54
+ raise e
55
+
56
+
57
+
58
+
uv.lock ADDED
The diff for this file is too large to render. See raw diff