Spaces:

vikramvasudevan
/

sanatan_ai

Running on CPU Upgrade

App Files Files Community

vikramvasudevan commited on Aug 7

Commit

bc05cd4

verified ·

1 Parent(s): 7290ba6

Upload folder using huggingface_hub

Browse files

Files changed (3) hide show

app.py +15 -12
graph_helper.py +14 -8
tools.py +35 -23

app.py CHANGED Viewed

@@ -177,10 +177,10 @@ async def chat_streaming(message, history, thread_id):
             def generate_processing_message():
                 return (
                     f"<div class='thinking-bubble'><em>🤔{random.choice(thinking_verbs)} ...</em></div>"
-                    f"<div style='opacity: 0.1' title='{full}'>"
-                    f"<span>{node}:{name or ''}:</span>"
-                    f"<strong>Looking for : [{message}]</strong> {truncated or '...'}"
-                    f"</div>"
                 )
             if (
@@ -194,11 +194,11 @@ async def chat_streaming(message, history, thread_id):
                 html = (
                     f"<div class='thinking-bubble'><em>🤔 {msg.name} tool: {random.choice(thinking_verbs)} ...</em></div>"
-                    f"<div style='opacity: 0.5'>"
-                    f"<strong>Looking for : [{message}]</strong><br>"
-                    f"<strong>Tool Args:</strong> {tooltip or '(no args)'}<br>"
-                    f"{truncated or '...'}"
-                    f"</div>"
                 )
                 yield f"### { ' → '.join(node_tree)}\n{html}"
             elif isinstance(msg, AIMessageChunk):
@@ -206,7 +206,7 @@ async def chat_streaming(message, history, thread_id):
                 def truncate_middle(text, front=50, back=50):
                     if len(text) <= front + back:
                         return text
-                    return f"{text[:front]}…{text[-back:]}"
                 if not msg.content:
                     # logger.warning("*** No Message Chunk!")
@@ -214,7 +214,7 @@ async def chat_streaming(message, history, thread_id):
                 else:
                     # Stream intermediate messages with transparent style
                     if node != final_node:
-                        streamed_response += (msg.content)
                         yield f"### { ' → '.join(node_tree) }\n<div class='intermediate-output'>{escape(truncate_middle(streamed_response))}</div>"
                     else:
                         # Buffer the final validated response instead of yielding
@@ -387,8 +387,11 @@ chatInterface = gr.ChatInterface(
 .intermediate-output {
     opacity: 0.4;
     font-style: italic;
-}
 """,
 )

             def generate_processing_message():
                 return (
                     f"<div class='thinking-bubble'><em>🤔{random.choice(thinking_verbs)} ...</em></div>"
+                    # f"<div style='opacity: 0.1' title='{full}'>"
+                    # f"<span>{node}:{name or ''}:</span>"
+                    # f"<strong>Looking for : [{message}]</strong> {truncated or '...'}"
+                    # f"</div>"
                 )
             if (
                 html = (
                     f"<div class='thinking-bubble'><em>🤔 {msg.name} tool: {random.choice(thinking_verbs)} ...</em></div>"
+                    # f"<div style='opacity: 0.5'>"
+                    # f"<strong>Looking for : [{message}]</strong><br>"
+                    # f"<strong>Tool Args:</strong> {tooltip or '(no args)'}<br>"
+                    # f"{truncated or '...'}"
+                    # f"</div>"
                 )
                 yield f"### { ' → '.join(node_tree)}\n{html}"
             elif isinstance(msg, AIMessageChunk):
                 def truncate_middle(text, front=50, back=50):
                     if len(text) <= front + back:
                         return text
+                    return f"{text[:front]}…{text[-back:]}".replace("\n", "") #remove new lines.
                 if not msg.content:
                     # logger.warning("*** No Message Chunk!")
                 else:
                     # Stream intermediate messages with transparent style
                     if node != final_node:
+                        streamed_response += msg.content
                         yield f"### { ' → '.join(node_tree) }\n<div class='intermediate-output'>{escape(truncate_middle(streamed_response))}</div>"
                     else:
                         # Buffer the final validated response instead of yielding
 .intermediate-output {
     opacity: 0.4;
     font-style: italic;
+    white-space: nowrap;
+    overflow: hidden;
+    text-overflow: ellipsis;
+}
 """,
 )

graph_helper.py CHANGED Viewed

@@ -27,6 +27,7 @@ logger.setLevel(logging.INFO)
 class ChatState(TypedDict):
     messages: Annotated[list[str], add_messages]
 def branching_condition(state: ChatState) -> str:
     last_message = state["messages"][-1]
     if hasattr(last_message, "tool_calls") and last_message.tool_calls:
@@ -58,7 +59,7 @@ def generate_graph() -> CompiledStateGraph:
     def validatorNode(state: ChatState) -> ChatState:
         messages = state["messages"] or []
         # Step 1: Separate out last message
         last_message = messages[-1]
         trimmed_messages = messages[:-1]
@@ -76,7 +77,9 @@ def generate_graph() -> CompiledStateGraph:
                     "Return the fixed version of the assistant's message."
                 )
             ),
-            HumanMessage(content=last_message.content)  # 🟢 convert AI output to Human input
         ]
         # Step 4: Invoke LLM
@@ -85,7 +88,6 @@ def generate_graph() -> CompiledStateGraph:
         # Step 5: Replace old AI message with validated one
         return {"messages": trimmed_messages + [response]}
     def init_system_prompt_node(state: ChatState) -> ChatState:
         messages = state["messages"] or []
@@ -104,7 +106,7 @@ def generate_graph() -> CompiledStateGraph:
                     content="You MUST call the `format_scripture_answer` tool if the user question is about scripture content and the `query` tool has returned a result."
                 ),
                 SystemMessage(
-                    content="If the user's question is about any scripture content (even if multiple scriptures), you must use the `tool_search_db`. Only use `tool_search_web` for general non-scriptural questions."
                 ),
             ]
@@ -118,10 +120,14 @@ def generate_graph() -> CompiledStateGraph:
     graph.add_edge(START, "init")
     graph.add_edge("init", "llm")
     # graph.add_conditional_edges("llm", tools_condition, "tools")
-    graph.add_conditional_edges("llm", branching_condition, {
-        "tools": "tools",
-        "validator": "validator",
-    })
     graph.add_edge("tools", "llm")
     graph.add_edge("validator", END)
     return graph.compile(checkpointer=memory)

 class ChatState(TypedDict):
     messages: Annotated[list[str], add_messages]
 def branching_condition(state: ChatState) -> str:
     last_message = state["messages"][-1]
     if hasattr(last_message, "tool_calls") and last_message.tool_calls:
     def validatorNode(state: ChatState) -> ChatState:
         messages = state["messages"] or []
         # Step 1: Separate out last message
         last_message = messages[-1]
         trimmed_messages = messages[:-1]
                     "Return the fixed version of the assistant's message."
                 )
             ),
+            HumanMessage(
+                content=last_message.content
+            ),  # 🟢 convert AI output to Human input
         ]
         # Step 4: Invoke LLM
         # Step 5: Replace old AI message with validated one
         return {"messages": trimmed_messages + [response]}
     def init_system_prompt_node(state: ChatState) -> ChatState:
         messages = state["messages"] or []
                     content="You MUST call the `format_scripture_answer` tool if the user question is about scripture content and the `query` tool has returned a result."
                 ),
                 SystemMessage(
+                    content="For general scripture queries, always prefer semantic search (tool_search_db). Use metadata or literal search only if the user specifies an exact verse, azhwar, or phrase."
                 ),
             ]
     graph.add_edge(START, "init")
     graph.add_edge("init", "llm")
     # graph.add_conditional_edges("llm", tools_condition, "tools")
+    graph.add_conditional_edges(
+        "llm",
+        branching_condition,
+        {
+            "tools": "tools",
+            "validator": "validator",
+        },
+    )
     graph.add_edge("tools", "llm")
     graph.add_edge("validator", END)
     return graph.compile(checkpointer=memory)

tools.py CHANGED Viewed

@@ -3,10 +3,18 @@ from langchain.agents import Tool
 from langchain_core.tools import StructuredTool
 from config import SanatanConfig
-from nalayiram_helper import get_standardized_azhwar_names, get_standardized_divya_desam_names
 from push_notifications_helper import push
 from serperdev_helper import search as search_web
-from sanatan_assistant import format_scripture_answer, query, query_by_metadata_field, query_by_literal_text
 tool_push = Tool(
     name="push", description="Send a push notification to the user", func=push
@@ -17,41 +25,45 @@ allowed_collections = [s["collection_name"] for s in SanatanConfig.scriptures]
 tool_search_db = StructuredTool.from_function(
     query,
     description=(
-        "Do a semantic vector search within a specific scripture collection. "
-        f"The collection_name must be one of: {', '.join(allowed_collections)}."
-        "Use this to narrow down relevant scripture verses or explanations based on the given query."
     ),
 )
 tool_search_db_for_literal = StructuredTool.from_function(
     query_by_literal_text,
     description=(
-        "Do a literal search within a specific scripture collection (only if user specifically asks for a literal search or if semantic search does not yield relevant results)."
-        f"The collection_name must be one of: {', '.join(allowed_collections)}."
-        "Use this to find relevant scripture verses or explanations based on the given query."
-        # "If the query doesn't yield any relevant results, then call `tool_search_db_by_metadata` tool to search specifically by a given metadata field (only if specific field from metadata has been mentioned)."
-        # f"use this configuration for reference :\n{json.dumps(SanatanConfig.scriptures, indent=1)}\n"
     ),
 )
 tool_search_db_by_metadata = StructuredTool.from_function(
     query_by_metadata_field,
     description=(
-        "Search within a specific scripture collection using a metadata field. use this only when the user provides a specific search criteria for verse number, pasuram number, azhwar name etc"
-        f"The collection_name must be one of: {', '.join(allowed_collections)}."
-        " Use this to find relevant scripture verses or explanations."
-        "if the user asks for a specific azhwar, use the `tool_get_standardized_azhwar_names` tool to get the standard name first and then pass to this tool to filter pasurams based on azhwar_name."
-        "if the user asks for a specific prabandham name, use the `tool_get_standardized_azhwar_names` tool to get the standard prabandham name first and then pass to this tool to filter pasurams based on prabandham_name."
-        "if the user asks for a specific divya desam name, use the `tool_get_standardized_divya_desam_names` tool to get the standard divya desam name first and then pass to this tool to filter pasurams based on `divya_desams`."
-        f"use this configuration for reference :\n{json.dumps(SanatanConfig.scriptures, indent=1)}\n"
-        # "be aware that verse numbers are sometimes stored as strings and sometimes as mumbers, so if str search does not yield results, try passing in the metadata_value as a number instead"
-        # "in the context of divya_prabandham, the verse/pasuram number is stored in metadata as the field `verse` and it is  stored as an int."
-        # "in the context of sahasranamam, the verse/pasuram number is stored in metadata as the field `verse` and it is  stored as an int."
-        # "in the context of kamba_ramayanam, the verse number is stored in metadata as the field `verse_number` and it is  stored as a string datatype."
-        # "for other scriptures, the verse number is stored either as `verse` or `verse_number` fields and it can be either str or int so check for both whichever yields results."
     ),
 )
 tool_search_web = Tool(
     name="search_web", description="Search the web for information", func=search_web
 )
@@ -87,4 +99,4 @@ tool_get_standardized_divya_desam_names = StructuredTool.from_function(
         "Use this tool to standardize the names of the divya desams when the user asks for pasurams written on a specific divya desam."
         "Usually this is followed by passing that standardized divya desam name for a metadata search using the `tool_search_db_by_metadata` tool by using the fiels `divya_desams`."
     ),
-)

 from langchain_core.tools import StructuredTool
 from config import SanatanConfig
+from nalayiram_helper import (
+    get_standardized_azhwar_names,
+    get_standardized_divya_desam_names,
+)
 from push_notifications_helper import push
 from serperdev_helper import search as search_web
+from sanatan_assistant import (
+    format_scripture_answer,
+    query,
+    query_by_metadata_field,
+    query_by_literal_text,
+)
 tool_push = Tool(
     name="push", description="Send a push notification to the user", func=push
 tool_search_db = StructuredTool.from_function(
     query,
     description=(
+        "This is the **PRIMARY** tool to use for most user queries about scripture."
+        " Use this when the user asks **about themes, stories, ideas, emotions, or meanings** in the scriptures."
+        " This tool uses semantic vector search and can understand context and meaning beyond keywords."
+        f" Only use other tools like metadata or literal search if the user explicitly asks for them."
+        f" The collection_name must be one of: {', '.join(allowed_collections)}."
     ),
 )
 tool_search_db_for_literal = StructuredTool.from_function(
     query_by_literal_text,
     description=(
+        "Use this only if the user explicitly says they want a 'literal match' or exact phrase search."
+        " This is not the default. Try semantic search first using `tool_search_db`."
+        f" The collection_name must be one of: {', '.join(allowed_collections)}."
     ),
 )
 tool_search_db_by_metadata = StructuredTool.from_function(
     query_by_metadata_field,
     description=(
+        "Use this tool **only when the user provides explicit metadata criteria**, such as: azhwar name, pasuram number, verse number, prabandham name, or divya desam name."
+        " This is not meant for general queries."
+        f" The collection_name must be one of: {', '.join(allowed_collections)}."
+        "If the user asks for a specific azhwar, use `tool_get_standardized_azhwar_names` first."
+        "If the user asks for a specific prabandham, use `tool_get_standardized_prabandham_names` first."
+        "If the user mentions a divya desam, use `tool_get_standardized_divya_desam_names` first."
+        "If you set metadata_search_operator to $in, then metadata_value must always be a list — even if it contains only a single item."
+        """🔒 Important:
+        When using the tool_get_standardized_azhwar_names, tool_get_standardized_divya_desam_names, or any similar standardization tool, you must use the standardized name exactly as returned by the tool — without modifying, reformatting, translating, or simplifying it in any way.
+        For example, if the tool returns Thirumālirum Solai, you must pass that exact string to tool_search_db_by_metadata. Do not change it to Thirumalirum Solai, Tirumalirumsolai, or anything else.
+        🔍 This is critical for the search to return results correctly.
+        🚫 Any deviation will cause the search to fail or miss results."""
+        f" Reference config:\n{json.dumps(SanatanConfig.scriptures, indent=1)}\n"
     ),
 )
 tool_search_web = Tool(
     name="search_web", description="Search the web for information", func=search_web
 )
         "Use this tool to standardize the names of the divya desams when the user asks for pasurams written on a specific divya desam."
         "Usually this is followed by passing that standardized divya desam name for a metadata search using the `tool_search_db_by_metadata` tool by using the fiels `divya_desams`."
     ),
+)