vikramvasudevan commited on
Commit
fd1b271
·
verified ·
1 Parent(s): 9609347

Upload folder using huggingface_hub

Browse files
Files changed (12) hide show
  1. .github/workflows/update_space.yml +28 -28
  2. README.md +1 -0
  3. app.py +74 -74
  4. config.py +108 -108
  5. db.py +35 -35
  6. drive_downloader.py +44 -44
  7. embeddings.py +9 -9
  8. graph_helper.py +103 -103
  9. push_notifications_helper.py +24 -24
  10. sanatan_assistant.py +98 -98
  11. serperdev_helper.py +13 -13
  12. tools.py +28 -28
.github/workflows/update_space.yml CHANGED
@@ -1,28 +1,28 @@
1
- name: Run Python script
2
-
3
- on:
4
- push:
5
- branches:
6
- - main
7
-
8
- jobs:
9
- build:
10
- runs-on: ubuntu-latest
11
-
12
- steps:
13
- - name: Checkout
14
- uses: actions/checkout@v2
15
-
16
- - name: Set up Python
17
- uses: actions/setup-python@v2
18
- with:
19
- python-version: '3.9'
20
-
21
- - name: Install Gradio
22
- run: python -m pip install gradio
23
-
24
- - name: Log in to Hugging Face
25
- run: python -c 'import huggingface_hub; huggingface_hub.login(token="${{ secrets.hf_token }}")'
26
-
27
- - name: Deploy to Spaces
28
- run: gradio deploy
 
1
+ name: Run Python script
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+
8
+ jobs:
9
+ build:
10
+ runs-on: ubuntu-latest
11
+
12
+ steps:
13
+ - name: Checkout
14
+ uses: actions/checkout@v2
15
+
16
+ - name: Set up Python
17
+ uses: actions/setup-python@v2
18
+ with:
19
+ python-version: '3.9'
20
+
21
+ - name: Install Gradio
22
+ run: python -m pip install gradio
23
+
24
+ - name: Log in to Hugging Face
25
+ run: python -c 'import huggingface_hub; huggingface_hub.login(token="${{ secrets.hf_token }}")'
26
+
27
+ - name: Deploy to Spaces
28
+ run: gradio deploy
README.md CHANGED
@@ -3,4 +3,5 @@ title: sanatan_ai
3
  app_file: app.py
4
  sdk: gradio
5
  sdk_version: 5.38.0
 
6
  ---
 
3
  app_file: app.py
4
  sdk: gradio
5
  sdk_version: 5.38.0
6
+ python_version: 3.12
7
  ---
app.py CHANGED
@@ -1,74 +1,74 @@
1
- import os
2
- import gradio as gr
3
- from config import SanatanConfig
4
- from drive_downloader import ZipDownloader
5
- from graph_helper import generate_graph
6
- import uuid
7
- import logging
8
- from dotenv import load_dotenv
9
-
10
- logging.basicConfig()
11
- logger = logging.getLogger()
12
- logger.setLevel(logging.INFO)
13
-
14
- graph = generate_graph()
15
-
16
- def init():
17
- load_dotenv(override=True)
18
-
19
- downloader = ZipDownloader(
20
- service_account_json=os.getenv("GOOGLE_SERVICE_ACCOUNT_JSON")
21
- )
22
- # 1. Download zip file
23
- zip_path = downloader.download_zip_from_drive(
24
- file_id=os.getenv("CHROMADB_FILE_ID"),
25
- output_path=SanatanConfig.dbStorePath,
26
- )
27
-
28
- # 2. Extract it
29
- downloader.unzip(zip_path, extract_to="./")
30
-
31
- def init_session():
32
- # Generate a unique ID per browser session
33
- return str(uuid.uuid4())
34
-
35
-
36
- def chat(message, history, thread_id):
37
- print("received chat message for thread:", thread_id)
38
- config = {"configurable": {"thread_id": thread_id}}
39
- response = graph.invoke(
40
- {"messages": [{"role": "user", "content": message}]}, config=config
41
- )
42
- return response["messages"][-1].content
43
-
44
-
45
- thread_id = gr.State(init_session)
46
- supported_scriptures = "\n - ".join(
47
- [scripture["title"] for scripture in SanatanConfig.scriptures]
48
- )
49
- intro_messages = [
50
- {"role" : "assistant","content" : f"Namaskaram! I can assist with the following scriptures: \n - {supported_scriptures}"},
51
- {"role" : "assistant","content" : """
52
- #### You can ask me questions like:
53
- - How do all scriptures describe the form of Vishnu?
54
- - What did Krishna teach in Gita?
55
- - How did Arjun react on seeing the Vishwarupa form of Krishna?
56
- - Give some names of Vishnu from Sahasranamam.
57
- """}
58
- ]
59
- chatbot = gr.Chatbot(
60
- elem_id="chatbot",
61
- avatar_images=("assets/avatar_user.png", "assets/adiyen_bot.png"),
62
- value=intro_messages,
63
- label="Sanatan-Bot",
64
- show_copy_button=True,
65
- show_copy_all_button=True,
66
- type="messages",
67
- height=800
68
- )
69
- chatInterface = gr.ChatInterface(title="Sanatan-AI",
70
- fn=chat, additional_inputs=[thread_id], chatbot=chatbot
71
- )
72
- # initializze database
73
- init()
74
- chatInterface.launch()
 
1
+ import os
2
+ import gradio as gr
3
+ from config import SanatanConfig
4
+ from drive_downloader import ZipDownloader
5
+ from graph_helper import generate_graph
6
+ import uuid
7
+ import logging
8
+ from dotenv import load_dotenv
9
+
10
+ logging.basicConfig()
11
+ logger = logging.getLogger()
12
+ logger.setLevel(logging.INFO)
13
+
14
+ graph = generate_graph()
15
+
16
+ def init():
17
+ load_dotenv(override=True)
18
+
19
+ downloader = ZipDownloader(
20
+ service_account_json=os.getenv("GOOGLE_SERVICE_ACCOUNT_JSON")
21
+ )
22
+ # 1. Download zip file
23
+ zip_path = downloader.download_zip_from_drive(
24
+ file_id=os.getenv("CHROMADB_FILE_ID"),
25
+ output_path=SanatanConfig.dbStorePath,
26
+ )
27
+
28
+ # 2. Extract it
29
+ downloader.unzip(zip_path, extract_to="./")
30
+
31
+ def init_session():
32
+ # Generate a unique ID per browser session
33
+ return str(uuid.uuid4())
34
+
35
+
36
+ def chat(message, history, thread_id):
37
+ print("received chat message for thread:", thread_id)
38
+ config = {"configurable": {"thread_id": thread_id}}
39
+ response = graph.invoke(
40
+ {"messages": [{"role": "user", "content": message}]}, config=config
41
+ )
42
+ return response["messages"][-1].content
43
+
44
+
45
+ thread_id = gr.State(init_session)
46
+ supported_scriptures = "\n - ".join(
47
+ [scripture["title"] for scripture in SanatanConfig.scriptures]
48
+ )
49
+ intro_messages = [
50
+ {"role" : "assistant","content" : f"Namaskaram! I can assist with the following scriptures: \n - {supported_scriptures}"},
51
+ {"role" : "assistant","content" : """
52
+ #### You can ask me questions like:
53
+ - How do all scriptures describe the form of Vishnu?
54
+ - What did Krishna teach in Gita?
55
+ - How did Arjun react on seeing the Vishwarupa form of Krishna?
56
+ - Give some names of Vishnu from Sahasranamam.
57
+ """}
58
+ ]
59
+ chatbot = gr.Chatbot(
60
+ elem_id="chatbot",
61
+ avatar_images=("assets/avatar_user.png", "assets/adiyen_bot.png"),
62
+ value=intro_messages,
63
+ label="Sanatan-Bot",
64
+ show_copy_button=True,
65
+ show_copy_all_button=True,
66
+ type="messages",
67
+ height=800
68
+ )
69
+ chatInterface = gr.ChatInterface(title="Sanatan-AI",
70
+ fn=chat, additional_inputs=[thread_id], chatbot=chatbot
71
+ )
72
+ # initializze database
73
+ init()
74
+ chatInterface.launch()
config.py CHANGED
@@ -1,108 +1,108 @@
1
- class SanatanConfig:
2
- # shuklaYajurVedamPdfPath: str = "./data/shukla-yajur-veda.pdf"
3
- # shuklaYajurVedamSmallPdfPath: str = "./data/shukla-yajur-veda-small.pdf"
4
- # vishnuPuranamPdfPath = "./data/vishnu_puranam.pdf"
5
- # datastores = [{"name": "sanskrit_001", "dbStorePath": "./chromadb-store"}, {"name": "nalayiram", "dbStorePath": "./chromadb-store-4000"}]
6
- dbStorePath: str = "./chromadb-store"
7
- # shuklaYajurVedamCollectionName: str = "shukla_yajur_vedam"
8
- # vishnuPuranamCollectionName: str = "vishnu_puranam"
9
- # shuklaYajurVedamOutputDir = "./output/shukla_yajur_vedam"
10
- # vishnuPuranamOutputDir = "./output/vishnu_puranam"
11
- scriptures = [
12
- {
13
- "name": "vishnu_puranam",
14
- "title": "Sri Vishnu Puranam",
15
- "output_dir": "./output/vishnu_puranam",
16
- "collection_name": "vishnu_puranam",
17
- "pdf_path": "./data/vishnu_puranam.pdf",
18
- "language": "san+eng",
19
- "example_labels": [
20
- "Vishnu's form",
21
- "About the five elements",
22
- "About Garuda",
23
- "Weapons of Vishnu",
24
- ],
25
- "examples": [
26
- "describe Vishnu's form",
27
- "five elements and their significance",
28
- "What is the significance of Garuda? Show some verses that describe him.",
29
- "What weapons does Vishnu hold?",
30
- ],
31
- },
32
- {
33
- "name": "shukla_yajur_vedam",
34
- "title": "Shukla Yajur Vedam",
35
- "output_dir": "./output/shukla_yajur_vedam",
36
- "collection_name": "shukla_yajur_vedam",
37
- "pdf_path": "./data/shukla-yajur-veda.pdf",
38
- "language": "san+eng",
39
- "example_labels": [
40
- "About Vedam",
41
- "About the five elements",
42
- "About Brahma",
43
- ],
44
- "examples": [
45
- "Gist of Shukla Yajur Vedam. Give me some sanskrit verses.",
46
- "What is the significance of fire and water. show some sanskrit verses",
47
- "Brahma",
48
- ],
49
- },
50
- {
51
- "name": "bhagavat_gita",
52
- "title": "Bhagavat Gita",
53
- "output_dir": "./output/bhagavat_gita",
54
- "collection_name": "bhagavat_gita",
55
- "pdf_path": "./data/bhagavat_gita.pdf",
56
- "language": "san+eng",
57
- "example_labels": [
58
- "About Arjuna",
59
- "About Karma",
60
- "About birth and death",
61
- "About the battle field",
62
- "About Krishna's form",
63
- ],
64
- "examples": [
65
- "Show some verses where Krishna advises Arjuna",
66
- "What does Krishna say about Karma",
67
- "What does Krishna say about birth and death",
68
- "describe the battle field",
69
- "Vishwarupa",
70
- ],
71
- },
72
- {
73
- "name": "valmiki_ramayanam",
74
- "title": "Valmiki Ramayanam",
75
- "output_dir": "./output/valmiki_ramayanam",
76
- "collection_name": "valmiki_ramayanam",
77
- "pdf_path": "./data/valmiki_ramayanam.pdf",
78
- "language": "san+eng",
79
- "example_labels": [
80
- "About Jatayu",
81
- "About Hanuman",
82
- "About Vali",
83
- "About Sita",
84
- "About Ravana",
85
- ],
86
- "examples": [
87
- "What is the significance of Jatayu? show some sanskrit verses to support the argument",
88
- "Show some verses where Hanuman is mentioned",
89
- "How did Rama kill Vali",
90
- "How was Sita abducted",
91
- "How did Rama kill Ravana?",
92
- ],
93
- },
94
- {
95
- "name": "vishnu_sahasranamam",
96
- "title": "Vishnu Sahasranamam",
97
- "output_dir": "./output/vishnu_sahasranamam",
98
- "collection_name": "vishnu_sahasranamam",
99
- "pdf_path": "./data/vishnu_sahasranamam.pdf",
100
- "language": "san+eng",
101
- "example_labels": ["Vanamali", "1000 names", "Sanskrit text search"],
102
- "examples": [
103
- "Vanamali",
104
- "Show some of the 1000 names of Vishnu along with their meaning",
105
- "show the verse that begins with शुक्लाम्बरधरं",
106
- ],
107
- },
108
- ]
 
1
+ class SanatanConfig:
2
+ # shuklaYajurVedamPdfPath: str = "./data/shukla-yajur-veda.pdf"
3
+ # shuklaYajurVedamSmallPdfPath: str = "./data/shukla-yajur-veda-small.pdf"
4
+ # vishnuPuranamPdfPath = "./data/vishnu_puranam.pdf"
5
+ # datastores = [{"name": "sanskrit_001", "dbStorePath": "./chromadb-store"}, {"name": "nalayiram", "dbStorePath": "./chromadb-store-4000"}]
6
+ dbStorePath: str = "./chromadb-store"
7
+ # shuklaYajurVedamCollectionName: str = "shukla_yajur_vedam"
8
+ # vishnuPuranamCollectionName: str = "vishnu_puranam"
9
+ # shuklaYajurVedamOutputDir = "./output/shukla_yajur_vedam"
10
+ # vishnuPuranamOutputDir = "./output/vishnu_puranam"
11
+ scriptures = [
12
+ {
13
+ "name": "vishnu_puranam",
14
+ "title": "Sri Vishnu Puranam",
15
+ "output_dir": "./output/vishnu_puranam",
16
+ "collection_name": "vishnu_puranam",
17
+ "pdf_path": "./data/vishnu_puranam.pdf",
18
+ "language": "san+eng",
19
+ "example_labels": [
20
+ "Vishnu's form",
21
+ "About the five elements",
22
+ "About Garuda",
23
+ "Weapons of Vishnu",
24
+ ],
25
+ "examples": [
26
+ "describe Vishnu's form",
27
+ "five elements and their significance",
28
+ "What is the significance of Garuda? Show some verses that describe him.",
29
+ "What weapons does Vishnu hold?",
30
+ ],
31
+ },
32
+ {
33
+ "name": "shukla_yajur_vedam",
34
+ "title": "Shukla Yajur Vedam",
35
+ "output_dir": "./output/shukla_yajur_vedam",
36
+ "collection_name": "shukla_yajur_vedam",
37
+ "pdf_path": "./data/shukla-yajur-veda.pdf",
38
+ "language": "san+eng",
39
+ "example_labels": [
40
+ "About Vedam",
41
+ "About the five elements",
42
+ "About Brahma",
43
+ ],
44
+ "examples": [
45
+ "Gist of Shukla Yajur Vedam. Give me some sanskrit verses.",
46
+ "What is the significance of fire and water. show some sanskrit verses",
47
+ "Brahma",
48
+ ],
49
+ },
50
+ {
51
+ "name": "bhagavat_gita",
52
+ "title": "Bhagavat Gita",
53
+ "output_dir": "./output/bhagavat_gita",
54
+ "collection_name": "bhagavat_gita",
55
+ "pdf_path": "./data/bhagavat_gita.pdf",
56
+ "language": "san+eng",
57
+ "example_labels": [
58
+ "About Arjuna",
59
+ "About Karma",
60
+ "About birth and death",
61
+ "About the battle field",
62
+ "About Krishna's form",
63
+ ],
64
+ "examples": [
65
+ "Show some verses where Krishna advises Arjuna",
66
+ "What does Krishna say about Karma",
67
+ "What does Krishna say about birth and death",
68
+ "describe the battle field",
69
+ "Vishwarupa",
70
+ ],
71
+ },
72
+ {
73
+ "name": "valmiki_ramayanam",
74
+ "title": "Valmiki Ramayanam",
75
+ "output_dir": "./output/valmiki_ramayanam",
76
+ "collection_name": "valmiki_ramayanam",
77
+ "pdf_path": "./data/valmiki_ramayanam.pdf",
78
+ "language": "san+eng",
79
+ "example_labels": [
80
+ "About Jatayu",
81
+ "About Hanuman",
82
+ "About Vali",
83
+ "About Sita",
84
+ "About Ravana",
85
+ ],
86
+ "examples": [
87
+ "What is the significance of Jatayu? show some sanskrit verses to support the argument",
88
+ "Show some verses where Hanuman is mentioned",
89
+ "How did Rama kill Vali",
90
+ "How was Sita abducted",
91
+ "How did Rama kill Ravana?",
92
+ ],
93
+ },
94
+ {
95
+ "name": "vishnu_sahasranamam",
96
+ "title": "Vishnu Sahasranamam",
97
+ "output_dir": "./output/vishnu_sahasranamam",
98
+ "collection_name": "vishnu_sahasranamam",
99
+ "pdf_path": "./data/vishnu_sahasranamam.pdf",
100
+ "language": "san+eng",
101
+ "example_labels": ["Vanamali", "1000 names", "Sanskrit text search"],
102
+ "examples": [
103
+ "Vanamali",
104
+ "Show some of the 1000 names of Vishnu along with their meaning",
105
+ "show the verse that begins with शुक्लाम्बरधरं",
106
+ ],
107
+ },
108
+ ]
db.py CHANGED
@@ -1,35 +1,35 @@
1
- import chromadb
2
-
3
- from config import SanatanConfig
4
- from embeddings import get_embedding
5
- import logging
6
-
7
- logger = logging.getLogger(__name__)
8
- logger.setLevel(logging.INFO)
9
-
10
-
11
- class SanatanDatabase:
12
- def __init__(self) -> None:
13
- self.chroma_client = chromadb.PersistentClient(path=SanatanConfig.dbStorePath)
14
-
15
- def does_data_exist(self, collection_name: str) -> bool:
16
- collection = self.chroma_client.get_or_create_collection(name=collection_name)
17
- num_rows = collection.count()
18
- logger.info("num_rows in %s = %d", collection_name, num_rows)
19
- return num_rows > 0
20
-
21
- def load(self, collection_name: str, ids, documents, embeddings, metadatas):
22
- collection = self.chroma_client.get_or_create_collection(name=collection_name)
23
- collection.add(
24
- ids=ids,
25
- documents=documents,
26
- embeddings=embeddings,
27
- metadatas=metadatas,
28
- )
29
-
30
- def search(self, collection_name: str, query: str, n_results=2):
31
- collection = self.chroma_client.get_or_create_collection(name=collection_name)
32
- response = collection.query(
33
- query_embeddings=[get_embedding(query)], n_results=n_results
34
- )
35
- return response
 
1
+ import chromadb
2
+
3
+ from config import SanatanConfig
4
+ from embeddings import get_embedding
5
+ import logging
6
+
7
+ logger = logging.getLogger(__name__)
8
+ logger.setLevel(logging.INFO)
9
+
10
+
11
+ class SanatanDatabase:
12
+ def __init__(self) -> None:
13
+ self.chroma_client = chromadb.PersistentClient(path=SanatanConfig.dbStorePath)
14
+
15
+ def does_data_exist(self, collection_name: str) -> bool:
16
+ collection = self.chroma_client.get_or_create_collection(name=collection_name)
17
+ num_rows = collection.count()
18
+ logger.info("num_rows in %s = %d", collection_name, num_rows)
19
+ return num_rows > 0
20
+
21
+ def load(self, collection_name: str, ids, documents, embeddings, metadatas):
22
+ collection = self.chroma_client.get_or_create_collection(name=collection_name)
23
+ collection.add(
24
+ ids=ids,
25
+ documents=documents,
26
+ embeddings=embeddings,
27
+ metadatas=metadatas,
28
+ )
29
+
30
+ def search(self, collection_name: str, query: str, n_results=2):
31
+ collection = self.chroma_client.get_or_create_collection(name=collection_name)
32
+ response = collection.query(
33
+ query_embeddings=[get_embedding(query)], n_results=n_results
34
+ )
35
+ return response
drive_downloader.py CHANGED
@@ -1,44 +1,44 @@
1
- import os
2
- import json
3
- import io
4
- import zipfile
5
-
6
- from google.oauth2.service_account import Credentials
7
- from googleapiclient.discovery import build
8
- from googleapiclient.http import MediaIoBaseDownload
9
-
10
-
11
- class ZipDownloader:
12
- def __init__(self, service_account_json: str):
13
- self.creds = Credentials.from_service_account_info(json.loads(service_account_json))
14
- self.service = build("drive", "v3", credentials=self.creds)
15
-
16
- def download_zip_from_drive(self, file_id: str, output_path: str) -> str:
17
- """
18
- Downloads a ZIP file from Google Drive and saves it locally.
19
- Returns the path to the downloaded ZIP.
20
- """
21
- request = self.service.files().get_media(fileId=file_id)
22
- local_zip_path = os.path.join(output_path, "downloaded.zip")
23
-
24
- os.makedirs(output_path, exist_ok=True)
25
- fh = io.FileIO(local_zip_path, 'wb')
26
- downloader = MediaIoBaseDownload(fh, request)
27
-
28
- print(f"⬇️ Downloading ZIP file from Drive ID: {file_id}")
29
- done = False
30
- while not done:
31
- status, done = downloader.next_chunk()
32
- print(f" ⏬ Progress: {int(status.progress() * 100)}%")
33
-
34
- print(f"✅ ZIP downloaded to: {local_zip_path}")
35
- return local_zip_path
36
-
37
- def unzip(self, zip_path: str, extract_to: str):
38
- """
39
- Unzips the downloaded ZIP file to a specified directory.
40
- """
41
- print(f"📂 Extracting ZIP: {zip_path} -> {extract_to}")
42
- with zipfile.ZipFile(zip_path, 'r') as zip_ref:
43
- zip_ref.extractall(extract_to)
44
- print("✅ Extraction complete.")
 
1
+ import os
2
+ import json
3
+ import io
4
+ import zipfile
5
+
6
+ from google.oauth2.service_account import Credentials
7
+ from googleapiclient.discovery import build
8
+ from googleapiclient.http import MediaIoBaseDownload
9
+
10
+
11
+ class ZipDownloader:
12
+ def __init__(self, service_account_json: str):
13
+ self.creds = Credentials.from_service_account_info(json.loads(service_account_json))
14
+ self.service = build("drive", "v3", credentials=self.creds)
15
+
16
+ def download_zip_from_drive(self, file_id: str, output_path: str) -> str:
17
+ """
18
+ Downloads a ZIP file from Google Drive and saves it locally.
19
+ Returns the path to the downloaded ZIP.
20
+ """
21
+ request = self.service.files().get_media(fileId=file_id)
22
+ local_zip_path = os.path.join(output_path, "downloaded.zip")
23
+
24
+ os.makedirs(output_path, exist_ok=True)
25
+ fh = io.FileIO(local_zip_path, 'wb')
26
+ downloader = MediaIoBaseDownload(fh, request)
27
+
28
+ print(f"⬇️ Downloading ZIP file from Drive ID: {file_id}")
29
+ done = False
30
+ while not done:
31
+ status, done = downloader.next_chunk()
32
+ print(f" ⏬ Progress: {int(status.progress() * 100)}%")
33
+
34
+ print(f"✅ ZIP downloaded to: {local_zip_path}")
35
+ return local_zip_path
36
+
37
+ def unzip(self, zip_path: str, extract_to: str):
38
+ """
39
+ Unzips the downloaded ZIP file to a specified directory.
40
+ """
41
+ print(f"📂 Extracting ZIP: {zip_path} -> {extract_to}")
42
+ with zipfile.ZipFile(zip_path, 'r') as zip_ref:
43
+ zip_ref.extractall(extract_to)
44
+ print("✅ Extraction complete.")
embeddings.py CHANGED
@@ -1,9 +1,9 @@
1
-
2
- from sentence_transformers import SentenceTransformer
3
-
4
- # Step 1: Load SentenceTransformer model
5
- # model = SentenceTransformer("all-MiniLM-L6-v2")
6
- model = SentenceTransformer("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
7
-
8
- def get_embedding(text: str) -> list:
9
- return model.encode(text).tolist()
 
1
+
2
+ from sentence_transformers import SentenceTransformer
3
+
4
+ # Step 1: Load SentenceTransformer model
5
+ # model = SentenceTransformer("all-MiniLM-L6-v2")
6
+ model = SentenceTransformer("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
7
+
8
+ def get_embedding(text: str) -> list:
9
+ return model.encode(text).tolist()
graph_helper.py CHANGED
@@ -1,103 +1,103 @@
1
- from typing import Annotated, TypedDict
2
- from langgraph.graph import StateGraph, START, END
3
- from langgraph.checkpoint.memory import MemorySaver
4
- from langgraph.graph.message import add_messages
5
- from langchain_openai import ChatOpenAI
6
- from langgraph.graph.state import CompiledStateGraph
7
- from tools import (
8
- tool_format_scripture_answer,
9
- tool_search_db,
10
- tool_search_web,
11
- tool_push,
12
- )
13
- from langgraph.prebuilt import ToolNode, tools_condition
14
- from langchain_core.messages import SystemMessage, ToolMessage, HumanMessage
15
- import logging
16
-
17
- logger = logging.getLogger(__name__)
18
- logger.setLevel(logging.INFO)
19
-
20
-
21
- class ChatState(TypedDict):
22
- messages: Annotated[list[str], add_messages]
23
-
24
-
25
- def generate_graph() -> CompiledStateGraph:
26
- memory = MemorySaver()
27
- tools = [tool_search_web, tool_push, tool_search_db, tool_format_scripture_answer]
28
- llm = ChatOpenAI(model="gpt-4o-mini").bind_tools(tools)
29
-
30
- def chatNode(state: ChatState) -> ChatState:
31
- # logger.info("messages before LLM: %s", str(state["messages"]))
32
- response = llm.invoke(state["messages"])
33
- # return {"messages": [response]}
34
- return {"messages": state["messages"] + [response]}
35
-
36
- def init_system_prompt_node(state: ChatState) -> ChatState:
37
- messages = state["messages"] or []
38
-
39
- # Check if system prompts were already added
40
- already_has_prompt = any(
41
- isinstance(m, SystemMessage) and "format_scripture_answer" in m.content
42
- for m in messages
43
- )
44
-
45
- if not already_has_prompt:
46
- messages += [
47
- SystemMessage(
48
- content="⚠️ Do NOT summarize or compress the output from the `query` tool. It will be passed directly to `format_scripture_answer` tool that formats the answer **AS IS**. DO NOT REMOVE SANSKRIT TEXTS"
49
- ),
50
- SystemMessage(
51
- content="You MUST call the `format_scripture_answer` tool if the user question is about scripture content and the `query` tool has returned a result."
52
- ),
53
- SystemMessage(
54
- content="If the user's question is about any scripture content (even if multiple scriptures), you must use the `tool_search_db`. Only use `tool_search_web` for general non-scriptural questions."
55
- ),
56
- ]
57
-
58
- return {"messages": messages}
59
-
60
- # def chatNode(state: ChatState) -> ChatState:
61
- # messages = state["messages"]
62
- # system_prompt = None
63
- # new_messages = []
64
-
65
- # for m in messages:
66
- # if isinstance(m, ToolMessage):
67
- # print("m.name = ", m.name)
68
- # if m.name == "format_scripture_answer":
69
- # system_prompt = m.content
70
- # else:
71
- # new_messages.append(m)
72
-
73
- # if system_prompt:
74
- # full_history = [
75
- # SystemMessage(content=system_prompt),
76
- # SystemMessage(
77
- # content="⚠️ Do NOT summarize or compress the context from the query tool. It will be passed directly to another tool that formats the answer."
78
- # ),
79
- # SystemMessage(
80
- # content="You MUST call the `format_scripture_answer` tool if the user question is about scripture content and the query tool has returned a result."
81
- # ),
82
- # ] + new_messages
83
- # else:
84
- # full_history = messages
85
-
86
- # # 🔍 Debug log (optional)
87
- # # print("\n🧠 LLM Full History:")
88
- # # for m in full_history:
89
- # # print(f"- {m.type.upper()}: {m.content[:100]}...\n")
90
-
91
- # ai_response = llm.invoke(full_history)
92
- # return {"messages": messages + [ai_response]}
93
-
94
- graph = StateGraph(ChatState)
95
- graph.add_node("init", init_system_prompt_node)
96
- graph.add_node("chat", chatNode)
97
- graph.add_node("tools", ToolNode(tools))
98
- graph.add_edge(START, "init")
99
- graph.add_edge("init", "chat")
100
- graph.add_conditional_edges("chat", tools_condition, "tools")
101
- graph.add_edge("tools", "chat")
102
-
103
- return graph.compile(checkpointer=memory)
 
1
+ from typing import Annotated, TypedDict
2
+ from langgraph.graph import StateGraph, START, END
3
+ from langgraph.checkpoint.memory import MemorySaver
4
+ from langgraph.graph.message import add_messages
5
+ from langchain_openai import ChatOpenAI
6
+ from langgraph.graph.state import CompiledStateGraph
7
+ from tools import (
8
+ tool_format_scripture_answer,
9
+ tool_search_db,
10
+ tool_search_web,
11
+ tool_push,
12
+ )
13
+ from langgraph.prebuilt import ToolNode, tools_condition
14
+ from langchain_core.messages import SystemMessage, ToolMessage, HumanMessage
15
+ import logging
16
+
17
+ logger = logging.getLogger(__name__)
18
+ logger.setLevel(logging.INFO)
19
+
20
+
21
+ class ChatState(TypedDict):
22
+ messages: Annotated[list[str], add_messages]
23
+
24
+
25
+ def generate_graph() -> CompiledStateGraph:
26
+ memory = MemorySaver()
27
+ tools = [tool_search_web, tool_push, tool_search_db, tool_format_scripture_answer]
28
+ llm = ChatOpenAI(model="gpt-4o-mini").bind_tools(tools)
29
+
30
+ def chatNode(state: ChatState) -> ChatState:
31
+ # logger.info("messages before LLM: %s", str(state["messages"]))
32
+ response = llm.invoke(state["messages"])
33
+ # return {"messages": [response]}
34
+ return {"messages": state["messages"] + [response]}
35
+
36
+ def init_system_prompt_node(state: ChatState) -> ChatState:
37
+ messages = state["messages"] or []
38
+
39
+ # Check if system prompts were already added
40
+ already_has_prompt = any(
41
+ isinstance(m, SystemMessage) and "format_scripture_answer" in m.content
42
+ for m in messages
43
+ )
44
+
45
+ if not already_has_prompt:
46
+ messages += [
47
+ SystemMessage(
48
+ content="⚠️ Do NOT summarize or compress the output from the `query` tool. It will be passed directly to `format_scripture_answer` tool that formats the answer **AS IS**. DO NOT REMOVE SANSKRIT TEXTS"
49
+ ),
50
+ SystemMessage(
51
+ content="You MUST call the `format_scripture_answer` tool if the user question is about scripture content and the `query` tool has returned a result."
52
+ ),
53
+ SystemMessage(
54
+ content="If the user's question is about any scripture content (even if multiple scriptures), you must use the `tool_search_db`. Only use `tool_search_web` for general non-scriptural questions."
55
+ ),
56
+ ]
57
+
58
+ return {"messages": messages}
59
+
60
+ # def chatNode(state: ChatState) -> ChatState:
61
+ # messages = state["messages"]
62
+ # system_prompt = None
63
+ # new_messages = []
64
+
65
+ # for m in messages:
66
+ # if isinstance(m, ToolMessage):
67
+ # print("m.name = ", m.name)
68
+ # if m.name == "format_scripture_answer":
69
+ # system_prompt = m.content
70
+ # else:
71
+ # new_messages.append(m)
72
+
73
+ # if system_prompt:
74
+ # full_history = [
75
+ # SystemMessage(content=system_prompt),
76
+ # SystemMessage(
77
+ # content="⚠️ Do NOT summarize or compress the context from the query tool. It will be passed directly to another tool that formats the answer."
78
+ # ),
79
+ # SystemMessage(
80
+ # content="You MUST call the `format_scripture_answer` tool if the user question is about scripture content and the query tool has returned a result."
81
+ # ),
82
+ # ] + new_messages
83
+ # else:
84
+ # full_history = messages
85
+
86
+ # # 🔍 Debug log (optional)
87
+ # # print("\n🧠 LLM Full History:")
88
+ # # for m in full_history:
89
+ # # print(f"- {m.type.upper()}: {m.content[:100]}...\n")
90
+
91
+ # ai_response = llm.invoke(full_history)
92
+ # return {"messages": messages + [ai_response]}
93
+
94
+ graph = StateGraph(ChatState)
95
+ graph.add_node("init", init_system_prompt_node)
96
+ graph.add_node("chat", chatNode)
97
+ graph.add_node("tools", ToolNode(tools))
98
+ graph.add_edge(START, "init")
99
+ graph.add_edge("init", "chat")
100
+ graph.add_conditional_edges("chat", tools_condition, "tools")
101
+ graph.add_edge("tools", "chat")
102
+
103
+ return graph.compile(checkpointer=memory)
push_notifications_helper.py CHANGED
@@ -1,24 +1,24 @@
1
- import os
2
- import requests
3
- from dotenv import load_dotenv
4
- import logging
5
-
6
- logger = logging.getLogger()
7
- logger.setLevel(logging.INFO)
8
-
9
- load_dotenv(override=True)
10
-
11
- pushover_token = os.getenv("PUSHOVER_TOKEN")
12
- pushover_user = os.getenv("PUSHOVER_USER")
13
- pushover_url = "https://api.pushover.net/1/messages.json"
14
-
15
-
16
- def push(text: str):
17
- """Send a push notification to the user"""
18
- logger.info("Sending a push notification for %s", text)
19
- response = requests.post(
20
- pushover_url,
21
- data={"token": pushover_token, "user": pushover_user, "message": text},
22
- )
23
- logger.info("response = %s", response.json())
24
- logger.info("Sent notification")
 
1
+ import os
2
+ import requests
3
+ from dotenv import load_dotenv
4
+ import logging
5
+
6
+ logger = logging.getLogger()
7
+ logger.setLevel(logging.INFO)
8
+
9
+ load_dotenv(override=True)
10
+
11
+ pushover_token = os.getenv("PUSHOVER_TOKEN")
12
+ pushover_user = os.getenv("PUSHOVER_USER")
13
+ pushover_url = "https://api.pushover.net/1/messages.json"
14
+
15
+
16
+ def push(text: str):
17
+ """Send a push notification to the user"""
18
+ logger.info("Sending a push notification for %s", text)
19
+ response = requests.post(
20
+ pushover_url,
21
+ data={"token": pushover_token, "user": pushover_user, "message": text},
22
+ )
23
+ logger.info("response = %s", response.json())
24
+ logger.info("Sent notification")
sanatan_assistant.py CHANGED
@@ -1,98 +1,98 @@
1
- import logging
2
- from typing import Literal
3
- from dotenv import load_dotenv
4
- from config import SanatanConfig
5
- from db import SanatanDatabase
6
-
7
- load_dotenv(override=True)
8
- logger = logging.getLogger(__name__)
9
- logger.setLevel(logging.INFO)
10
-
11
- sanatanDatabase = SanatanDatabase()
12
- allowedCollections = Literal[
13
- *[scripture["collection_name"] for scripture in SanatanConfig.scriptures]
14
- ]
15
-
16
-
17
- def format_scripture_answer(scripture_title: allowedCollections, question: str, query_tool_output: str):
18
- """
19
- Use this tool to generate a custom system prompt based on the scripture title, question, and query_tool_output.
20
-
21
- This is especially useful when the user has asked a question about a scripture, and the relevant context has been fetched using the `query` tool.
22
-
23
- The generated prompt will guide the assistant to respond using only that scripture’s content, with a clear format including Sanskrit verses, English explanations, and source chapters.
24
- """
25
-
26
- prompt = f"""You are a knowledgeable assistant on the scripture *{scripture_title}*, well-versed in both **Sanskrit** and **English**.
27
-
28
- You must answer the question using **only** the content from *{scripture_title}* provided in the context below.
29
- - Do **not** bring in information from **any other scripture or source**, or from prior knowledge, even if the answer seems obvious or well-known.
30
- - Do **not** quote any Sanskrit verses unless they appear **explicitly** in the provided context.
31
- - Do **not** use verse numbers or line references unless clearly mentioned in the context.
32
- - If the answer cannot be found in the context, clearly say:
33
- **"I do not have enough information from the {scripture_title} to answer this."**
34
-
35
- If the answer is not directly stated in the verses but is present in explanatory notes within the context, you may interpret — but **explicitly mention that it is an interpretation**.
36
-
37
- If the user query is not small talk, use the following response format (in Markdown):
38
-
39
- ### 🧾 Answer
40
- - Present the explanation in clear, concise **English**.
41
- - If it is an interpretation, say so explicitly.
42
-
43
- ### 🔍 Scripture
44
- - Mention the scripture from which the references were taken.
45
-
46
- ### 🔍 Chapter
47
- - Mention the chapter(s) from which the references were taken.
48
- - Only mention chapters if they are explicitly part of the context.
49
-
50
- ### 📜 Supporting Sanskrit Verse(s)
51
- - Quote **only the most relevant** Sanskrit verse(s) from the context.
52
- - Do **not** add verses from memory or outside the context.
53
- - Format each verse clearly, one per line.
54
- - **Avoid transliteration unless no Devanagari is available.**
55
- - Do **not** provide English text in this section.
56
-
57
- ### 🔍 English Translation
58
- - Provide the **corresponding English meaning** for each Sanskrit verse shown.
59
- - Keep it readable and aligned with the verse above.
60
-
61
- ### Notes
62
- - Bullet any extra points or cross-references from explanatory notes **only if present in the context**.
63
- - Do **not** include anything that is not supported or implied in the context.
64
-
65
- **Question:**
66
- {question}
67
-
68
- ---
69
-
70
- **Context:**
71
- {query_tool_output}
72
-
73
- ---
74
-
75
- Respond in **Markdown** format only. Ensure Sanskrit verses are always clearly shown and translated. If a section does not apply (e.g. no verses), you may omit it.
76
- """
77
-
78
- return prompt
79
-
80
-
81
-
82
- def query(collection_name: allowedCollections, query: str, n_results=5):
83
- """
84
- Search a scripture collection.
85
-
86
- Parameters:
87
- - collection_name (str): The name of the scripture collection to search. ...
88
- - query (str): The search query.
89
- - n_results (int): Number of results to return. Default is 5.
90
-
91
- Returns:
92
- - A list of matching results.
93
- """
94
- logger.info("Searching collection [%s] for [%s]", collection_name, query)
95
- response = sanatanDatabase.search(
96
- collection_name=collection_name, query=query, n_results=n_results
97
- )
98
- return "\n\n".join(response["documents"][0])
 
1
+ import logging
2
+ from typing import Literal
3
+ from dotenv import load_dotenv
4
+ from config import SanatanConfig
5
+ from db import SanatanDatabase
6
+
7
+ load_dotenv(override=True)
8
+ logger = logging.getLogger(__name__)
9
+ logger.setLevel(logging.INFO)
10
+
11
+ sanatanDatabase = SanatanDatabase()
12
+ allowedCollections = Literal[
13
+ *[scripture["collection_name"] for scripture in SanatanConfig.scriptures]
14
+ ]
15
+
16
+
17
+ def format_scripture_answer(scripture_title: allowedCollections, question: str, query_tool_output: str):
18
+ """
19
+ Use this tool to generate a custom system prompt based on the scripture title, question, and query_tool_output.
20
+
21
+ This is especially useful when the user has asked a question about a scripture, and the relevant context has been fetched using the `query` tool.
22
+
23
+ The generated prompt will guide the assistant to respond using only that scripture’s content, with a clear format including Sanskrit verses, English explanations, and source chapters.
24
+ """
25
+
26
+ prompt = f"""You are a knowledgeable assistant on the scripture *{scripture_title}*, well-versed in both **Sanskrit** and **English**.
27
+
28
+ You must answer the question using **only** the content from *{scripture_title}* provided in the context below.
29
+ - Do **not** bring in information from **any other scripture or source**, or from prior knowledge, even if the answer seems obvious or well-known.
30
+ - Do **not** quote any Sanskrit verses unless they appear **explicitly** in the provided context.
31
+ - Do **not** use verse numbers or line references unless clearly mentioned in the context.
32
+ - If the answer cannot be found in the context, clearly say:
33
+ **"I do not have enough information from the {scripture_title} to answer this."**
34
+
35
+ If the answer is not directly stated in the verses but is present in explanatory notes within the context, you may interpret — but **explicitly mention that it is an interpretation**.
36
+
37
+ If the user query is not small talk, use the following response format (in Markdown):
38
+
39
+ ### 🧾 Answer
40
+ - Present the explanation in clear, concise **English**.
41
+ - If it is an interpretation, say so explicitly.
42
+
43
+ ### 🔍 Scripture
44
+ - Mention the scripture from which the references were taken.
45
+
46
+ ### 🔍 Chapter
47
+ - Mention the chapter(s) from which the references were taken.
48
+ - Only mention chapters if they are explicitly part of the context.
49
+
50
+ ### 📜 Supporting Sanskrit Verse(s)
51
+ - Quote **only the most relevant** Sanskrit verse(s) from the context.
52
+ - Do **not** add verses from memory or outside the context.
53
+ - Format each verse clearly, one per line.
54
+ - **Avoid transliteration unless no Devanagari is available.**
55
+ - Do **not** provide English text in this section.
56
+
57
+ ### 🔍 English Translation
58
+ - Provide the **corresponding English meaning** for each Sanskrit verse shown.
59
+ - Keep it readable and aligned with the verse above.
60
+
61
+ ### Notes
62
+ - Bullet any extra points or cross-references from explanatory notes **only if present in the context**.
63
+ - Do **not** include anything that is not supported or implied in the context.
64
+
65
+ **Question:**
66
+ {question}
67
+
68
+ ---
69
+
70
+ **Context:**
71
+ {query_tool_output}
72
+
73
+ ---
74
+
75
+ Respond in **Markdown** format only. Ensure Sanskrit verses are always clearly shown and translated. If a section does not apply (e.g. no verses), you may omit it.
76
+ """
77
+
78
+ return prompt
79
+
80
+
81
+
82
+ def query(collection_name: allowedCollections, query: str, n_results=5):
83
+ """
84
+ Search a scripture collection.
85
+
86
+ Parameters:
87
+ - collection_name (str): The name of the scripture collection to search. ...
88
+ - query (str): The search query.
89
+ - n_results (int): Number of results to return. Default is 5.
90
+
91
+ Returns:
92
+ - A list of matching results.
93
+ """
94
+ logger.info("Searching collection [%s] for [%s]", collection_name, query)
95
+ response = sanatanDatabase.search(
96
+ collection_name=collection_name, query=query, n_results=n_results
97
+ )
98
+ return "\n\n".join(response["documents"][0])
serperdev_helper.py CHANGED
@@ -1,14 +1,14 @@
1
- from langchain_community.utilities import GoogleSerperAPIWrapper
2
- from dotenv import load_dotenv
3
- import logging
4
-
5
- load_dotenv(override=True)
6
- logger = logging.getLogger(__name__)
7
- logger.setLevel(logging.INFO)
8
-
9
- serper = GoogleSerperAPIWrapper()
10
-
11
- def search(query: str):
12
- """Search the web for any given query"""
13
- logger.info("Searching the web for %s", query)
14
  return serper.run(query)
 
1
+ from langchain_community.utilities import GoogleSerperAPIWrapper
2
+ from dotenv import load_dotenv
3
+ import logging
4
+
5
+ load_dotenv(override=True)
6
+ logger = logging.getLogger(__name__)
7
+ logger.setLevel(logging.INFO)
8
+
9
+ serper = GoogleSerperAPIWrapper()
10
+
11
+ def search(query: str):
12
+ """Search the web for any given query"""
13
+ logger.info("Searching the web for %s", query)
14
  return serper.run(query)
tools.py CHANGED
@@ -1,28 +1,28 @@
1
- from langchain.agents import Tool
2
- from langchain_core.tools import StructuredTool
3
-
4
- from config import SanatanConfig
5
- from push_notifications_helper import push
6
- from serperdev_helper import search as search_web
7
- from sanatan_assistant import format_scripture_answer, query
8
-
9
- tool_push = Tool(
10
- name="push", description="Send a push notification to the user", func=push
11
- )
12
-
13
- allowed_collections = [s["collection_name"] for s in SanatanConfig.scriptures]
14
-
15
- tool_search_db = StructuredTool.from_function(
16
- query,
17
- description=(
18
- "Search within a specific scripture collection. "
19
- f"The collection_name must be one of: {', '.join(allowed_collections)}."
20
- " Use this to find relevant scripture verses or explanations."
21
- ),
22
- )
23
-
24
- tool_search_web = Tool(
25
- name="search_web", description="Search the web for information", func=search_web
26
- )
27
-
28
- tool_format_scripture_answer = StructuredTool.from_function(format_scripture_answer)
 
1
+ from langchain.agents import Tool
2
+ from langchain_core.tools import StructuredTool
3
+
4
+ from config import SanatanConfig
5
+ from push_notifications_helper import push
6
+ from serperdev_helper import search as search_web
7
+ from sanatan_assistant import format_scripture_answer, query
8
+
9
+ tool_push = Tool(
10
+ name="push", description="Send a push notification to the user", func=push
11
+ )
12
+
13
+ allowed_collections = [s["collection_name"] for s in SanatanConfig.scriptures]
14
+
15
+ tool_search_db = StructuredTool.from_function(
16
+ query,
17
+ description=(
18
+ "Search within a specific scripture collection. "
19
+ f"The collection_name must be one of: {', '.join(allowed_collections)}."
20
+ " Use this to find relevant scripture verses or explanations."
21
+ ),
22
+ )
23
+
24
+ tool_search_web = Tool(
25
+ name="search_web", description="Search the web for information", func=search_web
26
+ )
27
+
28
+ tool_format_scripture_answer = StructuredTool.from_function(format_scripture_answer)