sanatan_ai / sanatan_assistant.py
vikramvasudevan's picture
Upload folder using huggingface_hub
d434239 verified
raw
history blame
7.69 kB
import logging
from typing import Any, Literal
from dotenv import load_dotenv
from config import SanatanConfig
from db import MetadataWhereClause, SanatanDatabase
load_dotenv(override=True)
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
sanatanDatabase = SanatanDatabase()
sanatanConfig = SanatanConfig()
allowedCollections = Literal[
*[scripture["collection_name"] for scripture in sanatanConfig.scriptures]
]
def format_scripture_answer(
collection_name: allowedCollections, question: str, query_tool_output: str
):
"""
Use this tool to generate a custom system prompt based on the scripture title, question, and query_tool_output.
This is especially useful when the user has asked a question about a scripture, and the relevant context has been fetched using the `query` tool.
The generated prompt will guide the assistant to respond using only that scriptureโ€™s content, with a clear format including Sanskrit/Tamil verses, English explanations, and source chapters.
"""
prompt = f"""You are a knowledgeable assistant on the scripture *{collection_name}*, well-versed in **Sanskrit** , **English** and **Tamil**.
You must answer the question using **only** the content from *{collection_name}* provided in the context below.
- Do **not** bring in information from **any other scripture or source**, or from prior knowledge, even if the answer seems obvious or well-known.
- Do **not** quote any Sanskrit/Tamil verses unless they appear **explicitly** in the provided context.
- Do **not** use verse numbers or line references unless clearly mentioned in the context.
- If the answer cannot be found in the context, clearly say:
**"I do not have enough information from the {collection_name} to answer this."**
If the answer is not directly stated in the verses but is present in explanatory notes within the context, you may interpret โ€” but **explicitly mention that it is an interpretation**.
If the user query is not small talk, use the following response format (in Markdown):
### ๐Ÿงพ Answer
- Present a brief summary of your response in concise **English**.
### ๐Ÿ•‰๏ธ Scripture
- {sanatanConfig.get_scripture_by_collection(collection_name=collection_name)["title"]}
### ๐Ÿ•ฎ Chapter Title
- Mention the chapter(s) from which the references were taken. Use the field *title* here from the context if available. For example `TVM 1.8.3`
### ๐Ÿ•ฎ Verse Number
- Mention the *verse number* from which the references were taken.
### ๐Ÿ”— Reference Link(s)
- Provide reference link(s) (`html_url`) if one is available in the context.
### ๐Ÿ“œ Native Verse(s)
- Quote the **original** native verse(s) from the context without any **translation, transliteration**, or **interpretation**.
- Do **not** include **any English text** in this section. Only show the Sanskrit/Tamil verses as-is from the context.
- Do **not repeat these verses** in the translation section โ€” just align the relevant transliteration and translation in the following sections.
### ๐Ÿ“œ English Transliteration
- For each verse above, provide the **matching English transliteration**.
- Maintain the **same order** as the verses listed above.
### ๐Ÿ“œ English Translation
- Provide the **English meaning** for each verse listed above.
- Again, follow the **same order**.
- Do **not** repeat the original verse here โ€” just the translation.
### ๐Ÿ“œ Notes
- Bullet any extra points or cross-references from explanatory notes **only if present in the context**.
- Do **not** include anything that is not supported or implied in the context.
โš ๏ธ Do **not duplicate content** across sections.
- Each section has a distinct purpose.
- If a verse is shown in `๐Ÿ“œ Supporting Verse(s)`, do **not** repeat it in the Translation section.
- Only transliterations and meanings should appear in their respective sections.
**Question:**
{question}
---
**Context:**
{query_tool_output}
---
Respond in **Markdown** format only. Ensure Sanskrit/Tamil verses are always clearly shown and translated. If a section does not apply (e.g. no verses), you may omit it.
"""
return prompt
def query(collection_name: allowedCollections, query: str, n_results=3):
"""
Search a scripture collection.
Parameters:
- collection_name (str): The name of the scripture collection to search. ...
- query (str): The search query.
- n_results (int): Number of results to return. Default is 3.
Returns:
- A list of matching results.
"""
logger.info("Semantic Search: Searching collection [%s] for [%s]", collection_name, query)
response = sanatanDatabase.search(
collection_name=collection_name, query=query, n_results=n_results
)
return "\n\n".join(
f"Document: {doc}\nMetadata: {meta}\nID: {id_}"
for doc, meta, id_ in zip(
response["documents"], response["metadatas"], response["ids"]
)
)
def query_by_metadata_field(
collection_name: allowedCollections,
query: str,
metadata_where_clause : MetadataWhereClause,
n_results=3,
):
"""
Search a scripture collection by metadata. Do NOT use this for semantic search. Only use when a specific metadata field is provided.
Parameters:
- collection_name (str): The name of the scripture collection to search. ...
- query (str): The search query.
- metadata_where_clause: the filter which is an array of the following type
- metadata_field (str) : The name of the metadata field. e.g. azhwar_name
- metadata_search_operator (str) : The search operator e.g. $eq or $in. DO NOT use $regex.
- metadata_value : Value to search for can be any primitive datatype like str or int (or a list[str] if metadata_search_operator = '$in'). for e.g. Thirumangai Azhwar or '2233' or 2233
- n_results (int): Number of results to return. Default is 3.
Returns:
- A list of matching results.
"""
logger.info("Searching collection [%s] for [%s]", collection_name, query)
try:
sanatanConfig.is_metadata_field_allowed(collection_name=collection_name, metadata_where_clause=metadata_where_clause)
except:
raise
response = sanatanDatabase.search_by_metadata(
collection_name=collection_name,
query=query,
metadata_where_clause=metadata_where_clause,
n_results=n_results,
)
return "\n\n".join(
f"Document: {doc}\nMetadata: {meta}\nID: {id_}"
for doc, meta, id_ in zip(
response["documents"], response["metadatas"], response["ids"]
)
)
def query_by_literal_text(
collection_name: allowedCollections,
literal_to_search_for: str,
n_results=3,
):
"""
Search a scripture collection by a literal. Do NOT use this for semantic search. Only use when the user specifically asks for literal search.
Parameters:
- collection_name (str): The name of the scripture collection to search. ...
- literal_to_search_for (str): The search query.
- n_results (int): Number of results to return. Default is 3.
Returns:
- A list of matching results.
"""
logger.info("Performing literal search in collection [%s] for [%s]", collection_name, literal_to_search_for)
response = sanatanDatabase.search_for_literal(
collection_name=collection_name,
literal_to_search_for=literal_to_search_for,
n_results=n_results,
)
return "\n\n".join(
f"Document: {doc}\nMetadata: {meta}\nID: {id_}"
for doc, meta, id_ in zip(
response["documents"], response["metadatas"], response["ids"]
)
)