Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Upload folder using huggingface_hub
Browse files
config.py
CHANGED
|
@@ -424,6 +424,51 @@ class SanatanConfig:
|
|
| 424 |
"if the user asks for nth sloka, do a metadata search on the `verse` field."
|
| 425 |
],
|
| 426 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 427 |
]
|
| 428 |
|
| 429 |
def get_scripture_by_collection(self, collection_name: str):
|
|
@@ -451,4 +496,4 @@ class SanatanConfig:
|
|
| 451 |
embedding_fn = "hf" # default is huggingface sentence transformaers
|
| 452 |
if "collection_embedding_fn" in scripture:
|
| 453 |
embedding_fn = scripture["collection_embedding_fn"] # overridden in config
|
| 454 |
-
return embedding_fn
|
|
|
|
| 424 |
"if the user asks for nth sloka, do a metadata search on the `verse` field."
|
| 425 |
],
|
| 426 |
},
|
| 427 |
+
{
|
| 428 |
+
"name": "yt_metadata",
|
| 429 |
+
"title": "Sampradayam in YouTube",
|
| 430 |
+
"output_dir": "./output/yt_metadata",
|
| 431 |
+
"collection_name": "yt_metadata",
|
| 432 |
+
"collection_embedding_fn": "openai",
|
| 433 |
+
"metadata_fields": [
|
| 434 |
+
{
|
| 435 |
+
"name": "video_id",
|
| 436 |
+
"datatype": "str",
|
| 437 |
+
"description": "The video id as in YouTube",
|
| 438 |
+
},
|
| 439 |
+
{
|
| 440 |
+
"name": "video_title",
|
| 441 |
+
"datatype": "str",
|
| 442 |
+
"description": "The title of the video as in YouTube",
|
| 443 |
+
},
|
| 444 |
+
{
|
| 445 |
+
"name": "description",
|
| 446 |
+
"datatype": "str",
|
| 447 |
+
"description": "Description as in YouTube",
|
| 448 |
+
},
|
| 449 |
+
{
|
| 450 |
+
"name": "channel_url",
|
| 451 |
+
"datatype": "str",
|
| 452 |
+
"description": "URL of the YouTube Channel",
|
| 453 |
+
},
|
| 454 |
+
{
|
| 455 |
+
"name": "channel_title",
|
| 456 |
+
"datatype": "str",
|
| 457 |
+
"description": "Title of the YouTube Channel",
|
| 458 |
+
},
|
| 459 |
+
],
|
| 460 |
+
"pdf_path": "./data/none.pdf",
|
| 461 |
+
"source": "https://youtube.com",
|
| 462 |
+
"language": "san+eng+tam",
|
| 463 |
+
"example_labels": ["Srirangam", "Pasuram video"],
|
| 464 |
+
"examples": [
|
| 465 |
+
"Show me YouTube videos that talk about Srirangam",
|
| 466 |
+
"Periazhwar Thirumozhi pasuram from YouTube",
|
| 467 |
+
],
|
| 468 |
+
"llm_hints": [
|
| 469 |
+
"if the user asks for YouTube videos, DO NOT do a web search, instead do a search on this collection."
|
| 470 |
+
],
|
| 471 |
+
},
|
| 472 |
]
|
| 473 |
|
| 474 |
def get_scripture_by_collection(self, collection_name: str):
|
|
|
|
| 496 |
embedding_fn = "hf" # default is huggingface sentence transformaers
|
| 497 |
if "collection_embedding_fn" in scripture:
|
| 498 |
embedding_fn = scripture["collection_embedding_fn"] # overridden in config
|
| 499 |
+
return embedding_fn
|