Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Upload folder using huggingface_hub
Browse files
db.py
CHANGED
|
@@ -105,6 +105,10 @@ class SanatanDatabase:
|
|
| 105 |
else None
|
| 106 |
)
|
| 107 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
data = collection.get(include=["metadatas", "documents"], where=where_clause)
|
| 109 |
|
| 110 |
if data["metadatas"]:
|
|
|
|
| 105 |
else None
|
| 106 |
)
|
| 107 |
|
| 108 |
+
# If the conversion returns an empty dict, treat it as None
|
| 109 |
+
if isinstance(where_clause, dict) and not where_clause:
|
| 110 |
+
where_clause = None
|
| 111 |
+
|
| 112 |
data = collection.get(include=["metadatas", "documents"], where=where_clause)
|
| 113 |
|
| 114 |
if data["metadatas"]:
|
server.py
CHANGED
|
@@ -285,52 +285,107 @@ async def get_scripture_configs():
|
|
| 285 |
return {"scriptures": sorted(scriptures, key=lambda s: s["title"])}
|
| 286 |
|
| 287 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 288 |
@router.post("/scripture/{scripture_name}/search")
|
| 289 |
async def search_scripture_find_first_match(
|
| 290 |
scripture_name: str,
|
| 291 |
-
|
| 292 |
):
|
| 293 |
"""
|
| 294 |
-
Search scripture collection
|
| 295 |
- `scripture_name`: Name of the collection
|
| 296 |
- `filter_obj`: MetadataWhereClause (filters, groups, operator)
|
| 297 |
-
- `
|
| 298 |
"""
|
|
|
|
|
|
|
| 299 |
try:
|
| 300 |
logger.info(
|
| 301 |
-
"
|
| 302 |
scripture_name,
|
| 303 |
filter_obj,
|
|
|
|
| 304 |
)
|
|
|
|
| 305 |
db = SanatanDatabase()
|
| 306 |
config = next(
|
| 307 |
-
(s for s in SanatanConfig().scriptures if s["name"] == scripture_name),
|
|
|
|
| 308 |
)
|
|
|
|
|
|
|
| 309 |
|
|
|
|
| 310 |
results = db.fetch_first_match(
|
| 311 |
collection_name=config["collection_name"],
|
| 312 |
metadata_where_clause=filter_obj,
|
| 313 |
)
|
| 314 |
|
| 315 |
-
# print("results = ", results)
|
| 316 |
-
# Flatten + canonicalize results
|
| 317 |
formatted_results = []
|
| 318 |
for i in range(len(results["metadatas"])):
|
| 319 |
-
|
| 320 |
metadata_doc = results["metadatas"][i]
|
| 321 |
-
metadata_doc["id"] =
|
| 322 |
-
|
| 323 |
document_text = (
|
| 324 |
results["documents"][i] if results.get("documents") else None
|
| 325 |
)
|
| 326 |
-
|
| 327 |
canonical_doc = SanatanConfig().canonicalize_document(
|
| 328 |
scripture_name, document_text, metadata_doc
|
| 329 |
)
|
| 330 |
formatted_results.append(canonical_doc)
|
| 331 |
|
| 332 |
-
#
|
| 333 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 334 |
|
| 335 |
except Exception as e:
|
| 336 |
logger.error("Error while searching %s", e, exc_info=True)
|
|
@@ -341,7 +396,7 @@ class ScriptureMultiSearchRequest(BaseModel):
|
|
| 341 |
filter_obj: Optional[MetadataWhereClause] = None
|
| 342 |
page: int = 1
|
| 343 |
page_size: int = 20
|
| 344 |
-
has_audio: Optional[AudioType] = None
|
| 345 |
|
| 346 |
|
| 347 |
@router.post("/scripture/{scripture_name}/search/all")
|
|
@@ -367,7 +422,7 @@ async def search_scripture_find_all_matches(
|
|
| 367 |
filter_obj,
|
| 368 |
page,
|
| 369 |
page_size,
|
| 370 |
-
has_audio
|
| 371 |
)
|
| 372 |
|
| 373 |
db = SanatanDatabase()
|
|
@@ -458,7 +513,7 @@ async def search_scripture_find_all_matches(
|
|
| 458 |
"page": page,
|
| 459 |
"page_size": page_size,
|
| 460 |
}
|
| 461 |
-
|
| 462 |
except Exception as e:
|
| 463 |
logger.error("Error while searching %s", e, exc_info=True)
|
| 464 |
return {"error": str(e)}
|
|
|
|
| 285 |
return {"scriptures": sorted(scriptures, key=lambda s: s["title"])}
|
| 286 |
|
| 287 |
|
| 288 |
+
class ScriptureFirstSearchRequst(BaseModel):
|
| 289 |
+
filter_obj: Optional[MetadataWhereClause] = None
|
| 290 |
+
has_audio: Optional[AudioType] = None
|
| 291 |
+
|
| 292 |
+
|
| 293 |
@router.post("/scripture/{scripture_name}/search")
|
| 294 |
async def search_scripture_find_first_match(
|
| 295 |
scripture_name: str,
|
| 296 |
+
req: ScriptureFirstSearchRequst,
|
| 297 |
):
|
| 298 |
"""
|
| 299 |
+
Search scripture collection and return the first matching result.
|
| 300 |
- `scripture_name`: Name of the collection
|
| 301 |
- `filter_obj`: MetadataWhereClause (filters, groups, operator)
|
| 302 |
+
- `has_audio`: optional. can take values any|none|recitation|virutham|upanyasam
|
| 303 |
"""
|
| 304 |
+
filter_obj = req.filter_obj
|
| 305 |
+
has_audio = req.has_audio
|
| 306 |
try:
|
| 307 |
logger.info(
|
| 308 |
+
"search_scripture_find_first_match: searching for %s with filters=%s | has_audio=%s",
|
| 309 |
scripture_name,
|
| 310 |
filter_obj,
|
| 311 |
+
has_audio,
|
| 312 |
)
|
| 313 |
+
|
| 314 |
db = SanatanDatabase()
|
| 315 |
config = next(
|
| 316 |
+
(s for s in SanatanConfig().scriptures if s["name"] == scripture_name),
|
| 317 |
+
None,
|
| 318 |
)
|
| 319 |
+
if not config:
|
| 320 |
+
return {"error": f"Scripture '{scripture_name}' not found"}
|
| 321 |
|
| 322 |
+
# 1️⃣ Fetch results (same as before)
|
| 323 |
results = db.fetch_first_match(
|
| 324 |
collection_name=config["collection_name"],
|
| 325 |
metadata_where_clause=filter_obj,
|
| 326 |
)
|
| 327 |
|
|
|
|
|
|
|
| 328 |
formatted_results = []
|
| 329 |
for i in range(len(results["metadatas"])):
|
| 330 |
+
doc_id = results["ids"][i]
|
| 331 |
metadata_doc = results["metadatas"][i]
|
| 332 |
+
metadata_doc["id"] = doc_id
|
| 333 |
+
|
| 334 |
document_text = (
|
| 335 |
results["documents"][i] if results.get("documents") else None
|
| 336 |
)
|
|
|
|
| 337 |
canonical_doc = SanatanConfig().canonicalize_document(
|
| 338 |
scripture_name, document_text, metadata_doc
|
| 339 |
)
|
| 340 |
formatted_results.append(canonical_doc)
|
| 341 |
|
| 342 |
+
# 2️⃣ Apply has_audio filter (same logic as in search_scripture_find_all_matches)
|
| 343 |
+
if has_audio and formatted_results:
|
| 344 |
+
if has_audio == AudioType.none:
|
| 345 |
+
# Get all indices that have any audio
|
| 346 |
+
all_audio_indices = set()
|
| 347 |
+
for atype in [
|
| 348 |
+
AudioType.recitation,
|
| 349 |
+
AudioType.virutham,
|
| 350 |
+
AudioType.upanyasam,
|
| 351 |
+
AudioType.santhai,
|
| 352 |
+
]:
|
| 353 |
+
indices = await svc_get_indices_with_audio(scripture_name, atype)
|
| 354 |
+
all_audio_indices.update(indices)
|
| 355 |
+
|
| 356 |
+
# Keep only those without audio
|
| 357 |
+
formatted_results = [
|
| 358 |
+
r
|
| 359 |
+
for r in formatted_results
|
| 360 |
+
if r["_global_index"] not in all_audio_indices
|
| 361 |
+
]
|
| 362 |
+
|
| 363 |
+
else:
|
| 364 |
+
if has_audio == AudioType.any:
|
| 365 |
+
audio_indices = set()
|
| 366 |
+
for atype in [
|
| 367 |
+
AudioType.recitation,
|
| 368 |
+
AudioType.virutham,
|
| 369 |
+
AudioType.upanyasam,
|
| 370 |
+
AudioType.santhai,
|
| 371 |
+
]:
|
| 372 |
+
indices = await svc_get_indices_with_audio(
|
| 373 |
+
scripture_name, atype
|
| 374 |
+
)
|
| 375 |
+
audio_indices.update(indices)
|
| 376 |
+
else:
|
| 377 |
+
audio_indices = set(
|
| 378 |
+
await svc_get_indices_with_audio(scripture_name, has_audio)
|
| 379 |
+
)
|
| 380 |
+
|
| 381 |
+
formatted_results = [
|
| 382 |
+
r for r in formatted_results if r["_global_index"] in audio_indices
|
| 383 |
+
]
|
| 384 |
+
|
| 385 |
+
# 3️⃣ Return only the first valid result (if any)
|
| 386 |
+
return {
|
| 387 |
+
"results": formatted_results[:1] if formatted_results else [],
|
| 388 |
+
}
|
| 389 |
|
| 390 |
except Exception as e:
|
| 391 |
logger.error("Error while searching %s", e, exc_info=True)
|
|
|
|
| 396 |
filter_obj: Optional[MetadataWhereClause] = None
|
| 397 |
page: int = 1
|
| 398 |
page_size: int = 20
|
| 399 |
+
has_audio: Optional[AudioType] = None
|
| 400 |
|
| 401 |
|
| 402 |
@router.post("/scripture/{scripture_name}/search/all")
|
|
|
|
| 422 |
filter_obj,
|
| 423 |
page,
|
| 424 |
page_size,
|
| 425 |
+
has_audio,
|
| 426 |
)
|
| 427 |
|
| 428 |
db = SanatanDatabase()
|
|
|
|
| 513 |
"page": page,
|
| 514 |
"page_size": page_size,
|
| 515 |
}
|
| 516 |
+
|
| 517 |
except Exception as e:
|
| 518 |
logger.error("Error while searching %s", e, exc_info=True)
|
| 519 |
return {"error": str(e)}
|