Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
| from metadata import MetadataWhereClause | |
| class SanatanConfig: | |
| # shuklaYajurVedamPdfPath: str = "./data/shukla-yajur-veda.pdf" | |
| # shuklaYajurVedamSmallPdfPath: str = "./data/shukla-yajur-veda-small.pdf" | |
| # vishnuPuranamPdfPath = "./data/vishnu_puranam.pdf" | |
| # datastores = [{"name": "sanskrit_001", "dbStorePath": "./chromadb-store"}, {"name": "nalayiram", "dbStorePath": "./chromadb-store-4000"}] | |
| dbStorePath: str = "./chromadb-store" | |
| # shuklaYajurVedamCollectionName: str = "shukla_yajur_vedam" | |
| # vishnuPuranamCollectionName: str = "vishnu_puranam" | |
| # shuklaYajurVedamOutputDir = "./output/shukla_yajur_vedam" | |
| # vishnuPuranamOutputDir = "./output/vishnu_puranam" | |
| scriptures = [ | |
| { | |
| "name": "vishnu_puranam", | |
| "title": "Sri Vishnu Puranam", | |
| "output_dir": "./output/vishnu_puranam", | |
| "collection_name": "vishnu_puranam", | |
| "metadata_fields": [ | |
| {"name": "file", "datatype": "str"}, | |
| {"name": "num_chars", "datatype": "str"}, | |
| {"name": "page", "datatype": "int"}, | |
| ], | |
| "pdf_path": "./data/vishnu_puranam.pdf", | |
| "source": "https://dn720005.ca.archive.org/0/items/vishnu-purana-sanskrit-english-ocr/VISHNU-PURANA-Sanskrit-English-OCR.pdf", | |
| "language": "san+eng", | |
| "example_labels": [ | |
| "Vishnu's form", | |
| "About the five elements", | |
| "About Garuda", | |
| "Weapons of Vishnu", | |
| "Vishnu's form (all scriptures)", | |
| ], | |
| "examples": [ | |
| "describe Vishnu's form as defined in vishnu puranam", | |
| "five elements and their significance as per vishnu puranam", | |
| "What is the significance of Garuda? Show some verses from vishnu puranam that describe him.", | |
| "What weapons does Vishnu hold as mentioned in vishnu puranam?", | |
| "How is the form of Vishnu described across the scriptures?", | |
| ], | |
| "llm_hints": [], | |
| }, | |
| { | |
| "name": "shukla_yajur_vedam", | |
| "title": "Shukla Yajur Vedam", | |
| "output_dir": "./output/shukla_yajur_vedam", | |
| "collection_name": "shukla_yajur_vedam", | |
| "metadata_fields": [ | |
| {"name": "file", "datatype": "str"}, | |
| {"name": "num_chars", "datatype": "str"}, | |
| {"name": "page", "datatype": "int"}, | |
| ], | |
| "pdf_path": "./data/shukla-yajur-veda.pdf", | |
| "source": "https://www.thearyasamaj.org/uploads/book/2014/04/R1sSjG_eLb_sub_406_yajurveda.pdf", | |
| "language": "san+eng", | |
| "example_labels": [ | |
| "About Vedam", | |
| "About the five elements", | |
| "About Brahma", | |
| ], | |
| "examples": [ | |
| "Gist of Shukla Yajur Vedam. Give me some sanskrit verses.", | |
| "What is the significance of fire and water. show some sanskrit verses", | |
| "Brahma", | |
| ], | |
| "llm_hints": [], | |
| }, | |
| { | |
| "name": "bhagavat_gita", | |
| "title": "Bhagavat Gita", | |
| "output_dir": "./output/bhagavat_gita", | |
| "collection_name": "bhagavat_gita", | |
| "metadata_fields": [ | |
| {"name": "file", "datatype": "str"}, | |
| {"name": "num_chars", "datatype": "str"}, | |
| {"name": "page", "datatype": "int"}, | |
| ], | |
| "pdf_path": "./data/bhagavat_gita.pdf", | |
| "source": "https://dn790006.ca.archive.org/0/items/in.gov.ignca.279/279_text.pdf", | |
| "language": "san+eng", | |
| "example_labels": [ | |
| "About Arjuna", | |
| "About Karma", | |
| "About birth and death", | |
| "About the battle field", | |
| "About Krishna's form", | |
| "Krishna's Teachings", | |
| ], | |
| "examples": [ | |
| "Show some verses where Krishna advises Arjuna", | |
| "What does Krishna say about Karma", | |
| "What does Krishna say about birth and death", | |
| "describe the battle field", | |
| "How did Arjuna respond upon witnessing Krishna’s Vishwarupa?" | |
| "What teachings did Krishna share in the Gita?", | |
| ], | |
| "llm_hints": [], | |
| }, | |
| { | |
| "name": "valmiki_ramayanam", | |
| "title": "Valmiki Ramayanam", | |
| "output_dir": "./output/valmiki_ramayanam", | |
| "collection_name": "valmiki_ramayanam", | |
| "metadata_fields": [ | |
| {"name": "file", "datatype": "str"}, | |
| {"name": "num_chars", "datatype": "str"}, | |
| {"name": "page", "datatype": "int"}, | |
| ], | |
| "pdf_path": "./data/valmiki_ramayanam.pdf", | |
| "source": "https://ia800509.us.archive.org/28/items/valmiki-ramayana-gita-press-english/Valmiki%20Ramayana%20Gita%20Press%20English.pdf", | |
| "language": "san+eng", | |
| "example_labels": [ | |
| "About Jatayu", | |
| "About Hanuman", | |
| "About Vali", | |
| "About Sita", | |
| "About Ravana", | |
| "A slokam by name", | |
| "Vibheeshana sharanagathi slokam", | |
| ], | |
| "examples": [ | |
| "What is the significance of Jatayu? show some sanskrit verses to support the argument", | |
| "Show some verses where Hanuman is mentioned", | |
| "How did Rama kill Vali", | |
| "How was Sita abducted", | |
| "How did Rama kill Ravana?", | |
| "explain sakrudeva prapannaaya shlokam in ramayana", | |
| "give the shlokam in ramayanam that vibheeshana uses to perform sharanagathi to rama, give the sanskrit shlokam and its meaning", | |
| ], | |
| "llm_hints": [], | |
| }, | |
| { | |
| "name": "vishnu_sahasranamam", | |
| "title": "Vishnu Sahasranamam", | |
| "output_dir": "./output/vishnu_sahasranamam", | |
| "collection_name": "vishnu_sahasranamam", | |
| "metadata_fields": [ | |
| {"name": "chapter", "datatype": "str"}, | |
| {"name": "page_number", "datatype": "int"}, | |
| {"name": "sanskrit", "datatype": "str"}, | |
| {"name": "translation", "datatype": "str"}, | |
| {"name": "transliteration", "datatype": "str"}, | |
| {"name": "verse", "datatype": "int"}, | |
| ], | |
| "pdf_path": "./data/vishnu_sahasranamam.pdf", | |
| "source": "https://www.swami-krishnananda.org/vishnu/Sri_Vishnu_Sahasranama_Stotram.pdf", | |
| "language": "san+eng", | |
| "example_labels": ["Vanamali", "1000 names", "Sanskrit text search"], | |
| "examples": [ | |
| "Vanamali", | |
| "Show some of the 1000 names of Vishnu along with their meaning", | |
| "show the verse that begins with शुक्लाम्बरधरं", | |
| ], | |
| "llm_hints": [], | |
| }, | |
| { | |
| "name": "divya_prabandham", | |
| "title": "4000 Divya Prabandham", | |
| "output_dir": "./output/divya_prabandham", | |
| "collection_name": "divya_prabandham", | |
| "collection_embedding_fn": "openai", | |
| "metadata_fields": [ | |
| { | |
| "name": "prabandham_code", | |
| "datatype": "str", | |
| "description": "contains the short prabandham_code. e.g. `TPL` for `Thiruppallandu`", | |
| }, | |
| { | |
| "name": "prabandham_name", | |
| "datatype": "str", | |
| "description": "contains the prabandham name. e.g. `Thiruppallandu`", | |
| }, | |
| { | |
| "name": "azhwar_name", | |
| "datatype": "str", | |
| "description": "contains the azhwar name. e.g. `Thirumangai Azhwar`", | |
| }, | |
| { | |
| "name": "divya_desams", | |
| "datatype": "str", | |
| "description": "comma separated list of divya desams. e.g. Thiruneermalai,Thiruvallikkeni", | |
| }, | |
| # {"name": "html_url", "datatype": "str", "description" : "Reference link for the source"}, | |
| # {"name": "pasuram_en", "datatype": "str", "description" : "Transliteration of pasuram in english"}, | |
| # {"name": "pasuram_ta", "datatype": "str", "description" : "Pasuram lyrics in tamil"}, | |
| { | |
| "name": "title", | |
| "datatype": "str", | |
| "description": ( | |
| "Title of this pasuram." | |
| "Use this when a specific prabandham code or name is given along with a relative verse number." | |
| "for example:\n" | |
| "- `TVM 1.8.3`\n" | |
| "- if the user query is 'give me 3rd pasuram in the 8th Thiruvaimozhi of the 1st decade.' - you must convert this representation to the format '{prabandham_code} {nth_decade}.{nth_chapter}.{nth_pasuram}' and pass as filter vaoue to the `title` field. \n" | |
| "If no decade is provided but a prabandham name is provided, assume decade = 1" | |
| ), | |
| }, | |
| { | |
| "name": "verse", | |
| "datatype": "int", | |
| "description": ( | |
| "Absolute verse number or pasuram number." | |
| "Use it only when a specific prabandham name is NOT mentioned in the user query." | |
| "For e.g. 'Give me pasuram 1176'" | |
| ), | |
| }, | |
| # {"name": "wbw_ta", "datatype": "str", "description" : "Word by word meaning in tamil."}, | |
| { | |
| "name": "decade", | |
| "datatype": "int", | |
| "description": ( | |
| "The decade (or `pathu` in Tamil) that this pasuram belongs to. decade is -1 when there is no associated decade." | |
| ), | |
| }, | |
| { | |
| "name": "chapter", | |
| "datatype": "int", | |
| "description": ( | |
| "chapter number of this pasuram. is -1 when there is no associated chapter number" | |
| ), | |
| }, | |
| { | |
| "name": "position_in_chapter", | |
| "datatype": "int", | |
| "description": ( | |
| "Relative verse number or pasuram number within a chapter." | |
| "Use it only when a specific prabandham name is mentioned in the user query." | |
| "For e.g. 'Give me the 5th pasuram from Thirupavai'" | |
| ), | |
| }, | |
| ], | |
| "pdf_path": "./data/divya_prabandham.pdf", | |
| "source": "https://uveda.org", | |
| "language": "tamil", | |
| "example_labels": [ | |
| "About the five elements", | |
| "About Garuda", | |
| "Pasuram about Krishna's Flute", | |
| "Andal's pasuram", | |
| "Specific Pasuram (absolute)", | |
| "Pasuram by Azhwar", | |
| "Specific pasuram(relative)", | |
| "Decade and Chapter Search", | |
| ], | |
| "examples": [ | |
| "five elements and their significance as defined in divya_prabandham", | |
| "What is the significance of Garuda? Show some verses from divya prabandham that describe him.", | |
| "Show me a pasuram that talks about how the animals and birds enjoy Krishna's flute playing.", | |
| "Give me a pasuram by Andal", | |
| "Show me Pasuram 1187 ", | |
| "Show me a pasuram by Thondaradippodi azhwar", | |
| "Give me the 2nd pasuram in the 3rd Thiruvaimozhi from the 2nd decade", | |
| "Give me just a few words from the starting lines and reference links of all 11 pasurams from thiruvaimozhi 5th decade 4th chapter.", | |
| ], | |
| "llm_hints": [ | |
| "If the user wishes to query at a decade or chapter level for a given prabandham, use the direct metadata query on the appropriate fields once instead of querying the tool multiple times for each pasuram from the chapter." | |
| ], | |
| }, | |
| { | |
| "name": "bhagavata_purana", | |
| "title": "Bhagavatha Puranam", | |
| "output_dir": "./output/bhagavata_purana", | |
| "collection_name": "bhagavata_purana", | |
| "metadata_fields": [ | |
| {"name": "file", "datatype": "str"}, | |
| {"name": "num_chars", "datatype": "str"}, | |
| {"name": "page", "datatype": "int"}, | |
| ], | |
| "pdf_path": "./data/bhagavata_purana.pdf", | |
| "source": "https://dn790003.ca.archive.org/0/items/bhagavatapuranagitapress_201907/Bhagavata%20Purana%20-%20Gita%20Press_text.pdf", | |
| "language": "san+eng", | |
| "example_labels": ["Gajendra Moksham", "Prahalad"], | |
| "examples": [ | |
| "State some verses that showcase the devotion of Gajendra the elephant", | |
| "State some verses that showcase the devotion of Prahlada", | |
| ], | |
| "llm_hints": [], | |
| }, | |
| { | |
| "name": "kamba_ramayanam_en", | |
| "title": "Kamba Ramayanam (English)", | |
| "output_dir": "./output/kamba_ramayanam", | |
| "collection_name": "kamba_ramayanam_en", | |
| "metadata_fields": [ | |
| { | |
| "name": "file", | |
| "datatype": "str", | |
| "description": "The name of the Kandam or the chapter.", | |
| }, | |
| { | |
| "name": "padalam_en", | |
| "datatype": "str", | |
| "description": "The name of the Padalam (Episode) in English.", | |
| }, | |
| { | |
| "name": "padalam_ta", | |
| "datatype": "str", | |
| "description": "The name of the Padalam (Episode) in Tamil.", | |
| }, | |
| {"name": "page", "datatype": "int"}, | |
| {"name": "verse_number", "datatype": "int"}, | |
| ], | |
| "pdf_path": "./data/kamba_ramayanam.pdf", | |
| "source": "https://www.hindupedia.com/images/1/13/Kamba_Ramayanam_I.pdf", | |
| "language": "tamil", | |
| "example_labels": [ | |
| "About Jatayu", | |
| "About Hanuman", | |
| "About Vali", | |
| "About Sita", | |
| "About Ravana", | |
| ], | |
| "examples": [ | |
| "What is the significance of Jatayu? show some sanskrit verses to support the argument", | |
| "Show some verses where Hanuman is mentioned", | |
| "How did Rama kill Vali", | |
| "How was Sita abducted", | |
| "How did Rama kill Ravana?", | |
| ], | |
| "llm_hints": [], | |
| }, | |
| { | |
| "name": "kamba_ramayanam", | |
| "title": "Kamba Ramayanam (Tamil)", | |
| "output_dir": "./output/kamba_ramayanam", | |
| "collection_name": "kamba_ramayanam", | |
| "metadata_fields": [ | |
| { | |
| "name": "chunk_index", | |
| "datatype": "int", | |
| "description": "The index of the chunk", | |
| }, | |
| { | |
| "name": "filename", | |
| "datatype": "str", | |
| "description": "The name of the file.", | |
| }, | |
| ], | |
| "pdf_path": "./data/kamba_ramayanam.pdf", | |
| "source": "https://archive.org/details/vrajeshkumar_gmail_061/01-%E0%AE%AA%E0%AE%BE%E0%AE%B2%20%E0%AE%95%E0%AE%BE%E0%AE%A3%E0%AF%8D%E0%AE%9F%E0%AE%AE%E0%AF%8D/page/n15/mode/2up", | |
| "language": "tamil", | |
| "example_labels": [ | |
| "About Jatayu", | |
| "About Hanuman", | |
| "About Vali", | |
| "About Sita", | |
| "About Ravana", | |
| ], | |
| "examples": [ | |
| "What is the significance of Jatayu? show some sanskrit verses to support the argument", | |
| "Show some verses where Hanuman is mentioned", | |
| "How did Rama kill Vali", | |
| "How was Sita abducted", | |
| "How did Rama kill Ravana?", | |
| ], | |
| "llm_hints": [], | |
| }, | |
| { | |
| "name": "chathusloki", | |
| "title": "Chathusloki by Sri Alavandar", | |
| "output_dir": "./output/chathusloki", | |
| "collection_name": "chathusloki", | |
| "metadata_fields": [ | |
| { | |
| "name": "sloka_number", | |
| "datatype": "int", | |
| "description": "The index of the sloka or verse", | |
| }, | |
| { | |
| "name": "meaning_short", | |
| "datatype": "str", | |
| "description": "A short meaning of the sanskrit verse in English.", | |
| }, | |
| ], | |
| "pdf_path": "./data/chathusloki.pdf", | |
| "source": "https://www.sadagopan.org/ebook/pdf/Chatusloki%20-%20VS.pdf", | |
| "language": "san+eng", | |
| "example_labels": ["Recite a sloka", "Commentary", "Role of Sridevi"], | |
| "examples": [ | |
| "Recite the 1st sloka from Chathusloki", | |
| "Show detailed commentary for sloka 2 from Chathusloki", | |
| "What is the role of Sri Devi in the universe according to the Chathusloki?", | |
| ], | |
| "llm_hints": [], | |
| }, | |
| { | |
| "name": "sri_stavam", | |
| "title": "Sri Stavam by Sri Koorathazhwar", | |
| "output_dir": "./output/sri_stavam", | |
| "collection_name": "sri_stavam", | |
| "metadata_fields": [ | |
| { | |
| "name": "sloka_number", | |
| "datatype": "int", | |
| "description": "The index of the sloka or verse", | |
| }, | |
| { | |
| "name": "meaning_short", | |
| "datatype": "str", | |
| "description": "A short meaning of the sanskrit verse in English.", | |
| }, | |
| { | |
| "name": "sanskrit", | |
| "datatype": "str", | |
| "description": "Verse in sanskrit", | |
| }, | |
| { | |
| "name": "transliteration", | |
| "datatype": "str", | |
| "description": "Verse transliterated to English", | |
| }, | |
| ], | |
| "pdf_path": "./data/sri_stavam.pdf", | |
| "source": "https://www.sadagopan.org/ebook/pdf/Sri%20Stavam.pdf", | |
| "language": "san+eng", | |
| "example_labels": ["Recite a sloka", "Commentary", "Role of Sridevi"], | |
| "examples": [ | |
| "Recite the 1st sloka from Sri Stavam", | |
| "Show detailed commentary for sloka 2 from Sri Stavam", | |
| "What is the role of Sri Devi in the universe according to the Sri Stavam?", | |
| ], | |
| "llm_hints": [ | |
| "if the user asks for nth sloka, do a metadata search on the `verse` field." | |
| ], | |
| }, | |
| { | |
| "name": "yt_metadata", | |
| "title": "Sampradayam in YouTube", | |
| "output_dir": "./output/yt_metadata", | |
| "collection_name": "yt_metadata", | |
| "collection_embedding_fn": "openai", | |
| "metadata_fields": [ | |
| { | |
| "name": "video_id", | |
| "datatype": "str", | |
| "description": "The video id as in YouTube", | |
| }, | |
| { | |
| "name": "video_title", | |
| "datatype": "str", | |
| "description": "The title of the video as in YouTube", | |
| }, | |
| { | |
| "name": "description", | |
| "datatype": "str", | |
| "description": "Description as in YouTube", | |
| }, | |
| { | |
| "name": "channel_url", | |
| "datatype": "str", | |
| "description": "URL of the YouTube Channel", | |
| }, | |
| { | |
| "name": "channel_title", | |
| "datatype": "str", | |
| "description": "Title of the YouTube Channel", | |
| }, | |
| ], | |
| "pdf_path": "./data/none.pdf", | |
| "source": "https://youtube.com", | |
| "language": "san+eng+tam", | |
| "example_labels": ["Srirangam", "Pasuram video"], | |
| "examples": [ | |
| "Show me YouTube videos that talk about Srirangam", | |
| "Show me lyrics of 1st pasuram of 1st decade in the 4st Thiruvaimozhi. Also show the related youtube videos.", | |
| ], | |
| "llm_hints": [ | |
| "if the user asks for YouTube videos, DO NOT do a web search, instead do a search on this collection." | |
| ], | |
| }, | |
| ] | |
| def get_scripture_by_collection(self, collection_name: str): | |
| return [ | |
| scripture | |
| for scripture in self.scriptures | |
| if scripture["collection_name"] == collection_name | |
| ][0] | |
| def is_metadata_field_allowed( | |
| self, collection_name: str, metadata_where_clause: MetadataWhereClause | |
| ): | |
| scripture = self.get_scripture_by_collection(collection_name=collection_name) | |
| for filter in metadata_where_clause.filters: | |
| if filter.metadata_field not in [ | |
| field["name"] for field in scripture["metadata_fields"] | |
| ]: | |
| raise Exception( | |
| f"metadata_field: [{filter.metadata_field}] not allowed in collection [{collection_name}]. Here are the allowed fields with their descriptions: {scripture["metadata_fields"]}" | |
| ) | |
| return True | |
| def get_embedding_for_collection(self, collection_name: str): | |
| scripture = self.get_scripture_by_collection(collection_name) | |
| embedding_fn = "hf" # default is huggingface sentence transformaers | |
| if "collection_embedding_fn" in scripture: | |
| embedding_fn = scripture["collection_embedding_fn"] # overridden in config | |
| return embedding_fn | |