Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
| import logging | |
| from typing import Any, Literal | |
| from dotenv import load_dotenv | |
| from config import SanatanConfig | |
| from db import MetadataWhereClause, SanatanDatabase | |
| load_dotenv(override=True) | |
| logger = logging.getLogger(__name__) | |
| logger.setLevel(logging.INFO) | |
| sanatanDatabase = SanatanDatabase() | |
| sanatanConfig = SanatanConfig() | |
| allowedCollections = Literal[ | |
| *[scripture["collection_name"] for scripture in sanatanConfig.scriptures] | |
| ] | |
| def format_scripture_answer( | |
| collection_name: allowedCollections, question: str, query_tool_output: str | |
| ): | |
| """ | |
| Use this tool to generate a custom system prompt based on the scripture title, question, and query_tool_output. | |
| This is especially useful when the user has asked a question about a scripture, and the relevant context has been fetched using the `query` tool. | |
| The generated prompt will guide the assistant to respond using only that scriptureโs content, with a clear format including Sanskrit/Tamil verses, English explanations, and source chapters. | |
| """ | |
| prompt = f"""You are a knowledgeable assistant on the scripture *{collection_name}*, well-versed in **Sanskrit** , **English** and **Tamil**. | |
| You must answer the question using **only** the content from *{collection_name}* provided in the context below. | |
| - Do **not** bring in information from **any other scripture or source**, or from prior knowledge, even if the answer seems obvious or well-known. | |
| - Do **not** quote any Sanskrit/Tamil verses unless they appear **explicitly** in the provided context. | |
| - Do **not** use verse numbers or line references unless clearly mentioned in the context. | |
| - If the answer cannot be found in the context, clearly say: | |
| **"I do not have enough information from the {collection_name} to answer this."** | |
| If the answer is not directly stated in the verses but is present in explanatory notes within the context, you may interpret โ but **explicitly mention that it is an interpretation**. | |
| If the user query is not small talk, use the following response format (in Markdown): | |
| ### ๐งพ Answer | |
| - Present a brief summary of your response in concise **English**. | |
| ### ๐๏ธ Scripture | |
| - {sanatanConfig.get_scripture_by_collection(collection_name=collection_name)["title"]} | |
| ### ๐ฎ Chapter Title | |
| - Mention the chapter(s) from which the references were taken. Use the field *title* here from the context if available. For example `TVM 1.8.3` | |
| ### ๐ฎ Verse Number | |
| - Mention the *verse number* from which the references were taken. | |
| ### ๐ Reference Link(s) | |
| - Provide reference link(s) (`html_url`) if one is available in the context. | |
| ### ๐ Native Verse(s) | |
| - Quote the **original** native verse(s) from the context without any **translation, transliteration**, or **interpretation**. | |
| - Do **not** include **any English text** in this section. Only show the Sanskrit/Tamil verses as-is from the context. | |
| - Do **not repeat these verses** in the translation section โ just align the relevant transliteration and translation in the following sections. | |
| ### ๐ English Transliteration | |
| - For each verse above, provide the **matching English transliteration**. | |
| - Maintain the **same order** as the verses listed above. | |
| ### ๐ English Translation | |
| - Provide the **English meaning** for each verse listed above. | |
| - Again, follow the **same order**. | |
| - Do **not** repeat the original verse here โ just the translation. | |
| ### ๐ Notes | |
| - Bullet any extra points or cross-references from explanatory notes **only if present in the context**. | |
| - Do **not** include anything that is not supported or implied in the context. | |
| โ ๏ธ Do **not duplicate content** across sections. | |
| - Each section has a distinct purpose. | |
| - If a verse is shown in `๐ Supporting Verse(s)`, do **not** repeat it in the Translation section. | |
| - Only transliterations and meanings should appear in their respective sections. | |
| **Question:** | |
| {question} | |
| --- | |
| **Context:** | |
| {query_tool_output} | |
| --- | |
| Respond in **Markdown** format only. Ensure Sanskrit/Tamil verses are always clearly shown and translated. If a section does not apply (e.g. no verses), you may omit it. | |
| """ | |
| return prompt | |
| def query(collection_name: allowedCollections, query: str, n_results=3): | |
| """ | |
| Search a scripture collection. | |
| Parameters: | |
| - collection_name (str): The name of the scripture collection to search. ... | |
| - query (str): The search query. | |
| - n_results (int): Number of results to return. Default is 3. | |
| Returns: | |
| - A list of matching results. | |
| """ | |
| logger.info("Semantic Search: Searching collection [%s] for [%s]", collection_name, query) | |
| response = sanatanDatabase.search( | |
| collection_name=collection_name, query=query, n_results=n_results | |
| ) | |
| return "\n\n".join( | |
| f"Document: {doc}\nMetadata: {meta}\nID: {id_}" | |
| for doc, meta, id_ in zip( | |
| response["documents"], response["metadatas"], response["ids"] | |
| ) | |
| ) | |
| def query_by_metadata_field( | |
| collection_name: allowedCollections, | |
| query: str, | |
| metadata_where_clause : MetadataWhereClause, | |
| n_results=3, | |
| ): | |
| """ | |
| Search a scripture collection by metadata. Do NOT use this for semantic search. Only use when a specific metadata field is provided. | |
| Parameters: | |
| - collection_name (str): The name of the scripture collection to search. ... | |
| - query (str): The search query. | |
| - metadata_where_clause: the filter which is an array of the following type | |
| - metadata_field (str) : The name of the metadata field. e.g. azhwar_name | |
| - metadata_search_operator (str) : The search operator e.g. $eq or $in. DO NOT use $regex. | |
| - metadata_value : Value to search for can be any primitive datatype like str or int (or a list[str] if metadata_search_operator = '$in'). for e.g. Thirumangai Azhwar or '2233' or 2233 | |
| - n_results (int): Number of results to return. Default is 3. | |
| Returns: | |
| - A list of matching results. | |
| """ | |
| logger.info("Searching collection [%s] for [%s]", collection_name, query) | |
| try: | |
| sanatanConfig.is_metadata_field_allowed(collection_name=collection_name, metadata_where_clause=metadata_where_clause) | |
| except: | |
| raise | |
| response = sanatanDatabase.search_by_metadata( | |
| collection_name=collection_name, | |
| query=query, | |
| metadata_where_clause=metadata_where_clause, | |
| n_results=n_results, | |
| ) | |
| return "\n\n".join( | |
| f"Document: {doc}\nMetadata: {meta}\nID: {id_}" | |
| for doc, meta, id_ in zip( | |
| response["documents"], response["metadatas"], response["ids"] | |
| ) | |
| ) | |
| def query_by_literal_text( | |
| collection_name: allowedCollections, | |
| literal_to_search_for: str, | |
| n_results=3, | |
| ): | |
| """ | |
| Search a scripture collection by a literal. Do NOT use this for semantic search. Only use when the user specifically asks for literal search. | |
| Parameters: | |
| - collection_name (str): The name of the scripture collection to search. ... | |
| - literal_to_search_for (str): The search query. | |
| - n_results (int): Number of results to return. Default is 3. | |
| Returns: | |
| - A list of matching results. | |
| """ | |
| logger.info("Performing literal search in collection [%s] for [%s]", collection_name, literal_to_search_for) | |
| response = sanatanDatabase.search_for_literal( | |
| collection_name=collection_name, | |
| literal_to_search_for=literal_to_search_for, | |
| n_results=n_results, | |
| ) | |
| return "\n\n".join( | |
| f"Document: {doc}\nMetadata: {meta}\nID: {id_}" | |
| for doc, meta, id_ in zip( | |
| response["documents"], response["metadatas"], response["ids"] | |
| ) | |
| ) | |