Spaces:

vikramvasudevan
/

sanatan_ai

Running on CPU Upgrade

App Files Files Community

sanatan_ai / sanatan_assistant.py

vikramvasudevan

Upload folder using huggingface_hub

d434239 verified 3 months ago

raw

history blame

7.69 kB

	import logging
	from typing import Any, Literal
	from dotenv import load_dotenv
	from config import SanatanConfig
	from db import MetadataWhereClause, SanatanDatabase

	load_dotenv(override=True)
	logger = logging.getLogger(__name__)
	logger.setLevel(logging.INFO)

	sanatanDatabase = SanatanDatabase()
	sanatanConfig = SanatanConfig()
	allowedCollections = Literal[
	*[scripture["collection_name"] for scripture in sanatanConfig.scriptures]
	]


	def format_scripture_answer(
	collection_name: allowedCollections, question: str, query_tool_output: str
	):
	"""
	Use this tool to generate a custom system prompt based on the scripture title, question, and query_tool_output.

	This is especially useful when the user has asked a question about a scripture, and the relevant context has been fetched using the `query` tool.

	The generated prompt will guide the assistant to respond using only that scripture’s content, with a clear format including Sanskrit/Tamil verses, English explanations, and source chapters.
	"""

	prompt = f"""You are a knowledgeable assistant on the scripture {collection_name}, well-versed in Sanskrit , English and Tamil.

	You must answer the question using only the content from {collection_name} provided in the context below.
	- Do not bring in information from any other scripture or source, or from prior knowledge, even if the answer seems obvious or well-known.
	- Do not quote any Sanskrit/Tamil verses unless they appear explicitly in the provided context.
	- Do not use verse numbers or line references unless clearly mentioned in the context.
	- If the answer cannot be found in the context, clearly say:
	"I do not have enough information from the {collection_name} to answer this."

	If the answer is not directly stated in the verses but is present in explanatory notes within the context, you may interpret — but explicitly mention that it is an interpretation.

	If the user query is not small talk, use the following response format (in Markdown):

	### 🧾 Answer
	- Present a brief summary of your response in concise English.

	### 🕉️ Scripture
	- {sanatanConfig.get_scripture_by_collection(collection_name=collection_name)["title"]}

	### 🕮 Chapter Title
	- Mention the chapter(s) from which the references were taken. Use the field title here from the context if available. For example `TVM 1.8.3`

	### 🕮 Verse Number
	- Mention the verse number from which the references were taken.

	### 🔗 Reference Link(s)
	- Provide reference link(s) (`html_url`) if one is available in the context.

	### 📜 Native Verse(s)
	- Quote the original native verse(s) from the context without any translation, transliteration, or interpretation.
	- Do not include any English text in this section. Only show the Sanskrit/Tamil verses as-is from the context.
	- Do not repeat these verses in the translation section — just align the relevant transliteration and translation in the following sections.

	### 📜 English Transliteration
	- For each verse above, provide the matching English transliteration.
	- Maintain the same order as the verses listed above.

	### 📜 English Translation
	- Provide the English meaning for each verse listed above.
	- Again, follow the same order.
	- Do not repeat the original verse here — just the translation.

	### 📜 Notes
	- Bullet any extra points or cross-references from explanatory notes only if present in the context.
	- Do not include anything that is not supported or implied in the context.

	⚠️ Do not duplicate content across sections.
	- Each section has a distinct purpose.
	- If a verse is shown in `📜 Supporting Verse(s)`, do not repeat it in the Translation section.
	- Only transliterations and meanings should appear in their respective sections.


	Question:
	{question}

	---

	Context:
	{query_tool_output}

	---

	Respond in Markdown format only. Ensure Sanskrit/Tamil verses are always clearly shown and translated. If a section does not apply (e.g. no verses), you may omit it.
	"""

	return prompt


	def query(collection_name: allowedCollections, query: str, n_results=3):
	"""
	Search a scripture collection.

	Parameters:
	- collection_name (str): The name of the scripture collection to search. ...
	- query (str): The search query.
	- n_results (int): Number of results to return. Default is 3.

	Returns:
	- A list of matching results.
	"""
	logger.info("Semantic Search: Searching collection [%s] for [%s]", collection_name, query)
	response = sanatanDatabase.search(
	collection_name=collection_name, query=query, n_results=n_results
	)

	return "\n\n".join(
	f"Document: {doc}\nMetadata: {meta}\nID: {id_}"
	for doc, meta, id_ in zip(
	response["documents"], response["metadatas"], response["ids"]
	)
	)

	def query_by_metadata_field(
	collection_name: allowedCollections,
	query: str,
	metadata_where_clause : MetadataWhereClause,
	n_results=3,
	):
	"""
	Search a scripture collection by metadata. Do NOT use this for semantic search. Only use when a specific metadata field is provided.

	Parameters:
	- collection_name (str): The name of the scripture collection to search. ...
	- query (str): The search query.
	- metadata_where_clause: the filter which is an array of the following type
	- metadata_field (str) : The name of the metadata field. e.g. azhwar_name
	- metadata_search_operator (str) : The search operator e.g. $eq or $in. DO NOT use $regex.
	- metadata_value : Value to search for can be any primitive datatype like str or int (or a list[str] if metadata_search_operator = '$in'). for e.g. Thirumangai Azhwar or '2233' or 2233
	- n_results (int): Number of results to return. Default is 3.

	Returns:
	- A list of matching results.
	"""
	logger.info("Searching collection [%s] for [%s]", collection_name, query)

	try:
	sanatanConfig.is_metadata_field_allowed(collection_name=collection_name, metadata_where_clause=metadata_where_clause)
	except:
	raise

	response = sanatanDatabase.search_by_metadata(
	collection_name=collection_name,
	query=query,
	metadata_where_clause=metadata_where_clause,
	n_results=n_results,
	)

	return "\n\n".join(
	f"Document: {doc}\nMetadata: {meta}\nID: {id_}"
	for doc, meta, id_ in zip(
	response["documents"], response["metadatas"], response["ids"]
	)
	)


	def query_by_literal_text(
	collection_name: allowedCollections,
	literal_to_search_for: str,
	n_results=3,
	):
	"""
	Search a scripture collection by a literal. Do NOT use this for semantic search. Only use when the user specifically asks for literal search.

	Parameters:
	- collection_name (str): The name of the scripture collection to search. ...
	- literal_to_search_for (str): The search query.
	- n_results (int): Number of results to return. Default is 3.

	Returns:
	- A list of matching results.
	"""
	logger.info("Performing literal search in collection [%s] for [%s]", collection_name, literal_to_search_for)


	response = sanatanDatabase.search_for_literal(
	collection_name=collection_name,
	literal_to_search_for=literal_to_search_for,
	n_results=n_results,
	)

	return "\n\n".join(
	f"Document: {doc}\nMetadata: {meta}\nID: {id_}"
	for doc, meta, id_ in zip(
	response["documents"], response["metadatas"], response["ids"]
	)
	)