File size: 22,639 Bytes
d434239
 
 
fd1b271
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d023803
 
 
 
a116972
fd1b271
0dfd87b
fd1b271
 
 
 
 
 
4591f5e
fd1b271
 
4591f5e
 
 
 
 
fd1b271
d434239
fd1b271
 
 
 
 
 
d023803
 
 
 
a116972
fd1b271
0dfd87b
fd1b271
 
 
 
 
 
 
 
 
 
 
d434239
fd1b271
 
 
 
 
 
d023803
 
 
 
a116972
fd1b271
0dfd87b
fd1b271
 
 
 
 
 
 
4591f5e
fd1b271
 
 
 
 
 
4591f5e
 
fd1b271
d434239
fd1b271
 
 
 
 
 
d023803
 
 
 
a116972
fd1b271
0dfd87b
fd1b271
 
 
 
 
 
 
4591f5e
 
fd1b271
 
 
 
 
 
 
4591f5e
 
fd1b271
d434239
fd1b271
 
 
 
 
 
d023803
 
 
 
 
 
 
a116972
fd1b271
b2bbee4
fd1b271
 
 
 
 
 
 
d434239
fd1b271
210796c
 
3da03c3
210796c
 
74c37c0
d023803
 
9d8a7d0
d023803
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ec6bd64
 
 
 
 
 
 
d13c5ed
d023803
 
 
 
d13c5ed
 
 
 
 
d023803
 
d434239
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a116972
210796c
0dfd87b
c4f7fa9
210796c
 
 
4591f5e
 
5ba944b
4591f5e
d434239
 
210796c
 
4591f5e
 
 
 
 
 
d434239
 
 
 
 
210796c
 
90898b4
 
 
 
 
d023803
 
 
 
a116972
90898b4
0dfd87b
90898b4
0dfd87b
90898b4
 
0dfd87b
 
d434239
90898b4
32cb5b7
dff332a
 
32cb5b7
34e1691
d023803
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a116972
32cb5b7
dff332a
c4f7fa9
dff332a
 
 
 
 
 
 
 
 
 
 
 
 
 
d434239
dff332a
 
 
 
 
 
d023803
 
 
 
 
 
 
 
 
 
 
a116972
dff332a
32cb5b7
c4f7fa9
32cb5b7
 
 
 
 
 
 
 
 
 
 
 
 
 
d434239
32cb5b7
2e93981
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4591f5e
2e93981
45e4cbc
2e93981
4591f5e
 
d13c5ed
4591f5e
74c37c0
2e93981
6ff7a4f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74c37c0
917d107
74c37c0
6ff7a4f
c7480d6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d8fdd61
c7480d6
 
 
 
 
fd1b271
d023803
 
 
 
 
 
 
 
d434239
 
 
d023803
d434239
 
 
 
 
 
 
 
74c37c0
 
 
 
 
 
c7480d6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
from metadata import MetadataWhereClause


class SanatanConfig:
    # shuklaYajurVedamPdfPath: str = "./data/shukla-yajur-veda.pdf"
    # shuklaYajurVedamSmallPdfPath: str = "./data/shukla-yajur-veda-small.pdf"
    # vishnuPuranamPdfPath = "./data/vishnu_puranam.pdf"
    # datastores = [{"name": "sanskrit_001", "dbStorePath": "./chromadb-store"}, {"name": "nalayiram", "dbStorePath": "./chromadb-store-4000"}]
    dbStorePath: str = "./chromadb-store"
    # shuklaYajurVedamCollectionName: str = "shukla_yajur_vedam"
    # vishnuPuranamCollectionName: str = "vishnu_puranam"
    # shuklaYajurVedamOutputDir = "./output/shukla_yajur_vedam"
    # vishnuPuranamOutputDir = "./output/vishnu_puranam"
    scriptures = [
        {
            "name": "vishnu_puranam",
            "title": "Sri Vishnu Puranam",
            "output_dir": "./output/vishnu_puranam",
            "collection_name": "vishnu_puranam",
            "metadata_fields": [
                {"name": "file", "datatype": "str"},
                {"name": "num_chars", "datatype": "str"},
                {"name": "page", "datatype": "int"},
            ],
            "pdf_path": "./data/vishnu_puranam.pdf",
            "source": "https://dn720005.ca.archive.org/0/items/vishnu-purana-sanskrit-english-ocr/VISHNU-PURANA-Sanskrit-English-OCR.pdf",
            "language": "san+eng",
            "example_labels": [
                "Vishnu's form",
                "About the five elements",
                "About Garuda",
                "Weapons of Vishnu",
                "Vishnu's form (all scriptures)",
            ],
            "examples": [
                "describe Vishnu's form as defined in vishnu puranam",
                "five elements and their significance as per vishnu puranam",
                "What is the significance of Garuda? Show some verses from vishnu puranam that describe him.",
                "What weapons does Vishnu hold as mentioned in vishnu puranam?",
                "How is the form of Vishnu described across the scriptures?",
            ],
            "llm_hints": [],
        },
        {
            "name": "shukla_yajur_vedam",
            "title": "Shukla Yajur Vedam",
            "output_dir": "./output/shukla_yajur_vedam",
            "collection_name": "shukla_yajur_vedam",
            "metadata_fields": [
                {"name": "file", "datatype": "str"},
                {"name": "num_chars", "datatype": "str"},
                {"name": "page", "datatype": "int"},
            ],
            "pdf_path": "./data/shukla-yajur-veda.pdf",
            "source": "https://www.thearyasamaj.org/uploads/book/2014/04/R1sSjG_eLb_sub_406_yajurveda.pdf",
            "language": "san+eng",
            "example_labels": [
                "About Vedam",
                "About the five elements",
                "About Brahma",
            ],
            "examples": [
                "Gist of Shukla Yajur Vedam. Give me some sanskrit verses.",
                "What is the significance of fire and water. show some sanskrit verses",
                "Brahma",
            ],
            "llm_hints": [],
        },
        {
            "name": "bhagavat_gita",
            "title": "Bhagavat Gita",
            "output_dir": "./output/bhagavat_gita",
            "collection_name": "bhagavat_gita",
            "metadata_fields": [
                {"name": "file", "datatype": "str"},
                {"name": "num_chars", "datatype": "str"},
                {"name": "page", "datatype": "int"},
            ],
            "pdf_path": "./data/bhagavat_gita.pdf",
            "source": "https://dn790006.ca.archive.org/0/items/in.gov.ignca.279/279_text.pdf",
            "language": "san+eng",
            "example_labels": [
                "About Arjuna",
                "About Karma",
                "About birth and death",
                "About the battle field",
                "About Krishna's form",
                "Krishna's Teachings",
            ],
            "examples": [
                "Show some verses where Krishna advises Arjuna",
                "What does Krishna say about Karma",
                "What does Krishna say about birth and death",
                "describe the battle field",
                "How did Arjuna respond upon witnessing Krishna’s Vishwarupa?"
                "What teachings did Krishna share in the Gita?",
            ],
            "llm_hints": [],
        },
        {
            "name": "valmiki_ramayanam",
            "title": "Valmiki Ramayanam",
            "output_dir": "./output/valmiki_ramayanam",
            "collection_name": "valmiki_ramayanam",
            "metadata_fields": [
                {"name": "file", "datatype": "str"},
                {"name": "num_chars", "datatype": "str"},
                {"name": "page", "datatype": "int"},
            ],
            "pdf_path": "./data/valmiki_ramayanam.pdf",
            "source": "https://ia800509.us.archive.org/28/items/valmiki-ramayana-gita-press-english/Valmiki%20Ramayana%20Gita%20Press%20English.pdf",
            "language": "san+eng",
            "example_labels": [
                "About Jatayu",
                "About Hanuman",
                "About Vali",
                "About Sita",
                "About Ravana",
                "A slokam by name",
                "Vibheeshana sharanagathi slokam",
            ],
            "examples": [
                "What is the significance of Jatayu? show some sanskrit verses to support the argument",
                "Show some verses where Hanuman is mentioned",
                "How did Rama kill Vali",
                "How was Sita abducted",
                "How did Rama kill Ravana?",
                "explain sakrudeva prapannaaya shlokam in ramayana",
                "give the shlokam in ramayanam that vibheeshana uses to perform sharanagathi to rama, give the sanskrit shlokam and its meaning",
            ],
            "llm_hints": [],
        },
        {
            "name": "vishnu_sahasranamam",
            "title": "Vishnu Sahasranamam",
            "output_dir": "./output/vishnu_sahasranamam",
            "collection_name": "vishnu_sahasranamam",
            "metadata_fields": [
                {"name": "chapter", "datatype": "str"},
                {"name": "page_number", "datatype": "int"},
                {"name": "sanskrit", "datatype": "str"},
                {"name": "translation", "datatype": "str"},
                {"name": "transliteration", "datatype": "str"},
                {"name": "verse", "datatype": "int"},
            ],
            "pdf_path": "./data/vishnu_sahasranamam.pdf",
            "source": "https://www.swami-krishnananda.org/vishnu/Sri_Vishnu_Sahasranama_Stotram.pdf",
            "language": "san+eng",
            "example_labels": ["Vanamali", "1000 names", "Sanskrit text search"],
            "examples": [
                "Vanamali",
                "Show some of the 1000 names of Vishnu along with their meaning",
                "show the verse that begins with शुक्लाम्बरधरं",
            ],
            "llm_hints": [],
        },
        {
            "name": "divya_prabandham",
            "title": "4000 Divya Prabandham",
            "output_dir": "./output/divya_prabandham",
            "collection_name": "divya_prabandham",
            "collection_embedding_fn": "openai",
            "metadata_fields": [
                {
                    "name": "prabandham_code",
                    "datatype": "str",
                    "description": "contains the short prabandham_code. e.g. `TPL` for `Thiruppallandu`",
                },
                {
                    "name": "prabandham_name",
                    "datatype": "str",
                    "description": "contains the prabandham name. e.g. `Thiruppallandu`",
                },
                {
                    "name": "azhwar_name",
                    "datatype": "str",
                    "description": "contains the azhwar name. e.g. `Thirumangai Azhwar`",
                },
                {
                    "name": "divya_desams",
                    "datatype": "str",
                    "description": "comma separated list of divya desams. e.g. Thiruneermalai,Thiruvallikkeni",
                },
                # {"name": "html_url", "datatype": "str", "description" : "Reference link for the source"},
                # {"name": "pasuram_en", "datatype": "str", "description" : "Transliteration of pasuram in english"},
                # {"name": "pasuram_ta", "datatype": "str", "description" : "Pasuram lyrics in tamil"},
                {
                    "name": "title",
                    "datatype": "str",
                    "description": (
                        "Title of this pasuram."
                        "Use this when a specific prabandham code or name is given along with a relative verse number."
                        "for example:\n"
                        "- `TVM 1.8.3`\n"
                        "- if the user query is 'give me 3rd pasuram in the 8th Thiruvaimozhi of the 1st decade.' - you must convert this representation to the format '{prabandham_code} {nth_decade}.{nth_chapter}.{nth_pasuram}' and pass as filter vaoue to the `title` field. \n"
                        "If no decade is provided but a prabandham name is provided, assume decade = 1"
                    ),
                },
                {
                    "name": "verse",
                    "datatype": "int",
                    "description": (
                        "Absolute verse number or pasuram number."
                        "Use it only when a specific prabandham name is NOT mentioned in the user query."
                        "For e.g. 'Give me pasuram 1176'"
                    ),
                },
                # {"name": "wbw_ta", "datatype": "str", "description" : "Word by word meaning in tamil."},
                {
                    "name": "decade",
                    "datatype": "int",
                    "description": (
                        "The decade (or `pathu` in Tamil) that this pasuram belongs to. decade is -1 when there is no associated decade."
                    ),
                },
                {
                    "name": "chapter",
                    "datatype": "int",
                    "description": (
                        "chapter number of this pasuram. is -1 when there is no associated chapter number"
                    ),
                },
                {
                    "name": "position_in_chapter",
                    "datatype": "int",
                    "description": (
                        "Relative verse number or pasuram number within a chapter."
                        "Use it only when a specific prabandham name is mentioned in the user query."
                        "For e.g. 'Give me the 5th pasuram from Thirupavai'"
                    ),
                },
            ],
            "pdf_path": "./data/divya_prabandham.pdf",
            "source": "https://uveda.org",
            "language": "tamil",
            "example_labels": [
                "About the five elements",
                "About Garuda",
                "Pasuram about Krishna's Flute",
                "Andal's pasuram",
                "Specific Pasuram (absolute)",
                "Pasuram by Azhwar",
                "Specific pasuram(relative)",
                "Decade and Chapter Search",
            ],
            "examples": [
                "five elements and their significance as defined in divya_prabandham",
                "What is the significance of Garuda? Show some verses from divya prabandham that describe him.",
                "Show me a pasuram that talks about how the animals and birds enjoy Krishna's flute playing.",
                "Give me a pasuram by Andal",
                "Show me Pasuram 1187 ",
                "Show me a pasuram by Thondaradippodi azhwar",
                "Give me the 2nd pasuram in the 3rd Thiruvaimozhi from the 2nd decade",
                "Give me just a few words from the starting lines and reference links of all 11 pasurams from thiruvaimozhi 5th decade 4th chapter.",
            ],
            "llm_hints": [
                "If the user wishes to query at a decade or chapter level for a given prabandham, use the direct metadata query on the appropriate fields once instead of querying the tool multiple times for each pasuram from the chapter."
            ],
        },
        {
            "name": "bhagavata_purana",
            "title": "Bhagavatha Puranam",
            "output_dir": "./output/bhagavata_purana",
            "collection_name": "bhagavata_purana",
            "metadata_fields": [
                {"name": "file", "datatype": "str"},
                {"name": "num_chars", "datatype": "str"},
                {"name": "page", "datatype": "int"},
            ],
            "pdf_path": "./data/bhagavata_purana.pdf",
            "source": "https://dn790003.ca.archive.org/0/items/bhagavatapuranagitapress_201907/Bhagavata%20Purana%20-%20Gita%20Press_text.pdf",
            "language": "san+eng",
            "example_labels": ["Gajendra Moksham", "Prahalad"],
            "examples": [
                "State some verses that showcase the devotion of Gajendra the elephant",
                "State some verses that showcase the devotion of Prahlada",
            ],
            "llm_hints": [],
        },
        {
            "name": "kamba_ramayanam_en",
            "title": "Kamba Ramayanam (English)",
            "output_dir": "./output/kamba_ramayanam",
            "collection_name": "kamba_ramayanam_en",
            "metadata_fields": [
                {
                    "name": "file",
                    "datatype": "str",
                    "description": "The name of the Kandam or the chapter.",
                },
                {
                    "name": "padalam_en",
                    "datatype": "str",
                    "description": "The name of the Padalam (Episode) in English.",
                },
                {
                    "name": "padalam_ta",
                    "datatype": "str",
                    "description": "The name of the Padalam (Episode) in Tamil.",
                },
                {"name": "page", "datatype": "int"},
                {"name": "verse_number", "datatype": "int"},
            ],
            "pdf_path": "./data/kamba_ramayanam.pdf",
            "source": "https://www.hindupedia.com/images/1/13/Kamba_Ramayanam_I.pdf",
            "language": "tamil",
            "example_labels": [
                "About Jatayu",
                "About Hanuman",
                "About Vali",
                "About Sita",
                "About Ravana",
            ],
            "examples": [
                "What is the significance of Jatayu? show some sanskrit verses to support the argument",
                "Show some verses where Hanuman is mentioned",
                "How did Rama kill Vali",
                "How was Sita abducted",
                "How did Rama kill Ravana?",
            ],
            "llm_hints": [],
        },
        {
            "name": "kamba_ramayanam",
            "title": "Kamba Ramayanam (Tamil)",
            "output_dir": "./output/kamba_ramayanam",
            "collection_name": "kamba_ramayanam",
            "metadata_fields": [
                {
                    "name": "chunk_index",
                    "datatype": "int",
                    "description": "The index of the chunk",
                },
                {
                    "name": "filename",
                    "datatype": "str",
                    "description": "The name of the file.",
                },
            ],
            "pdf_path": "./data/kamba_ramayanam.pdf",
            "source": "https://archive.org/details/vrajeshkumar_gmail_061/01-%E0%AE%AA%E0%AE%BE%E0%AE%B2%20%E0%AE%95%E0%AE%BE%E0%AE%A3%E0%AF%8D%E0%AE%9F%E0%AE%AE%E0%AF%8D/page/n15/mode/2up",
            "language": "tamil",
            "example_labels": [
                "About Jatayu",
                "About Hanuman",
                "About Vali",
                "About Sita",
                "About Ravana",
            ],
            "examples": [
                "What is the significance of Jatayu? show some sanskrit verses to support the argument",
                "Show some verses where Hanuman is mentioned",
                "How did Rama kill Vali",
                "How was Sita abducted",
                "How did Rama kill Ravana?",
            ],
            "llm_hints": [],
        },
        {
            "name": "chathusloki",
            "title": "Chathusloki by Sri Alavandar",
            "output_dir": "./output/chathusloki",
            "collection_name": "chathusloki",
            "metadata_fields": [
                {
                    "name": "sloka_number",
                    "datatype": "int",
                    "description": "The index of the sloka or verse",
                },
                {
                    "name": "meaning_short",
                    "datatype": "str",
                    "description": "A short meaning of the sanskrit verse in English.",
                },
            ],
            "pdf_path": "./data/chathusloki.pdf",
            "source": "https://www.sadagopan.org/ebook/pdf/Chatusloki%20-%20VS.pdf",
            "language": "san+eng",
            "example_labels": ["Recite a sloka", "Commentary", "Role of Sridevi"],
            "examples": [
                "Recite the 1st sloka from Chathusloki",
                "Show detailed commentary for sloka 2 from Chathusloki",
                "What is the role of Sri Devi in the universe according to the Chathusloki?",
            ],
            "llm_hints": [],
        },
        {
            "name": "sri_stavam",
            "title": "Sri Stavam by Sri Koorathazhwar",
            "output_dir": "./output/sri_stavam",
            "collection_name": "sri_stavam",
            "metadata_fields": [
                {
                    "name": "sloka_number",
                    "datatype": "int",
                    "description": "The index of the sloka or verse",
                },
                {
                    "name": "meaning_short",
                    "datatype": "str",
                    "description": "A short meaning of the sanskrit verse in English.",
                },
                {
                    "name": "sanskrit",
                    "datatype": "str",
                    "description": "Verse in sanskrit",
                },
                {
                    "name": "transliteration",
                    "datatype": "str",
                    "description": "Verse transliterated to English",
                },
            ],
            "pdf_path": "./data/sri_stavam.pdf",
            "source": "https://www.sadagopan.org/ebook/pdf/Sri%20Stavam.pdf",
            "language": "san+eng",
            "example_labels": ["Recite a sloka", "Commentary", "Role of Sridevi"],
            "examples": [
                "Recite the 1st sloka from Sri Stavam",
                "Show detailed commentary for sloka 2 from Sri Stavam",
                "What is the role of Sri Devi in the universe according to the Sri Stavam?",
            ],
            "llm_hints": [
                "if the user asks for nth sloka, do a metadata search on the `verse` field."
            ],
        },
        {
            "name": "yt_metadata",
            "title": "Sampradayam in YouTube",
            "output_dir": "./output/yt_metadata",
            "collection_name": "yt_metadata",
            "collection_embedding_fn": "openai",
            "metadata_fields": [
                {
                    "name": "video_id",
                    "datatype": "str",
                    "description": "The video id as in YouTube",
                },
                {
                    "name": "video_title",
                    "datatype": "str",
                    "description": "The title of the video as in YouTube",
                },
                {
                    "name": "description",
                    "datatype": "str",
                    "description": "Description as in YouTube",
                },
                {
                    "name": "channel_url",
                    "datatype": "str",
                    "description": "URL of the YouTube Channel",
                },
                {
                    "name": "channel_title",
                    "datatype": "str",
                    "description": "Title of the YouTube Channel",
                },
            ],
            "pdf_path": "./data/none.pdf",
            "source": "https://youtube.com",
            "language": "san+eng+tam",
            "example_labels": ["Srirangam", "Pasuram video"],
            "examples": [
                "Show me YouTube videos that talk about Srirangam",
                "Show me lyrics of 1st pasuram of 1st decade in the 4st Thiruvaimozhi. Also show the related youtube videos.",
            ],
            "llm_hints": [
                "if the user asks for YouTube videos, DO NOT do a web search, instead do a search on this collection."
            ],
        },
    ]

    def get_scripture_by_collection(self, collection_name: str):
        return [
            scripture
            for scripture in self.scriptures
            if scripture["collection_name"] == collection_name
        ][0]

    def is_metadata_field_allowed(
        self, collection_name: str, metadata_where_clause: MetadataWhereClause
    ):
        scripture = self.get_scripture_by_collection(collection_name=collection_name)
        for filter in metadata_where_clause.filters:
            if filter.metadata_field not in [
                field["name"] for field in scripture["metadata_fields"]
            ]:
                raise Exception(
                    f"metadata_field: [{filter.metadata_field}] not allowed in collection [{collection_name}]. Here are the allowed fields with their descriptions: {scripture["metadata_fields"]}"
                )
        return True

    def get_embedding_for_collection(self, collection_name: str):
        scripture = self.get_scripture_by_collection(collection_name)
        embedding_fn = "hf"  # default is huggingface sentence transformaers
        if "collection_embedding_fn" in scripture:
            embedding_fn = scripture["collection_embedding_fn"]  # overridden in config
        return embedding_fn