vikramvasudevan commited on
Commit
2ff9f44
·
verified ·
1 Parent(s): 48189d1

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. db.py +4 -0
  2. server.py +71 -16
db.py CHANGED
@@ -105,6 +105,10 @@ class SanatanDatabase:
105
  else None
106
  )
107
 
 
 
 
 
108
  data = collection.get(include=["metadatas", "documents"], where=where_clause)
109
 
110
  if data["metadatas"]:
 
105
  else None
106
  )
107
 
108
+ # If the conversion returns an empty dict, treat it as None
109
+ if isinstance(where_clause, dict) and not where_clause:
110
+ where_clause = None
111
+
112
  data = collection.get(include=["metadatas", "documents"], where=where_clause)
113
 
114
  if data["metadatas"]:
server.py CHANGED
@@ -285,52 +285,107 @@ async def get_scripture_configs():
285
  return {"scriptures": sorted(scriptures, key=lambda s: s["title"])}
286
 
287
 
 
 
 
 
 
288
  @router.post("/scripture/{scripture_name}/search")
289
  async def search_scripture_find_first_match(
290
  scripture_name: str,
291
- filter_obj: Optional[MetadataWhereClause] = None,
292
  ):
293
  """
294
- Search scripture collection with optional filters.
295
  - `scripture_name`: Name of the collection
296
  - `filter_obj`: MetadataWhereClause (filters, groups, operator)
297
- - `n_results`: number of random results to return
298
  """
 
 
299
  try:
300
  logger.info(
301
- "search_scripture: searching for %s with filters %s",
302
  scripture_name,
303
  filter_obj,
 
304
  )
 
305
  db = SanatanDatabase()
306
  config = next(
307
- (s for s in SanatanConfig().scriptures if s["name"] == scripture_name), None
 
308
  )
 
 
309
 
 
310
  results = db.fetch_first_match(
311
  collection_name=config["collection_name"],
312
  metadata_where_clause=filter_obj,
313
  )
314
 
315
- # print("results = ", results)
316
- # Flatten + canonicalize results
317
  formatted_results = []
318
  for i in range(len(results["metadatas"])):
319
- id = results["ids"][i]
320
  metadata_doc = results["metadatas"][i]
321
- metadata_doc["id"] = id
322
- # print("metadata_doc = ", metadata_doc)
323
  document_text = (
324
  results["documents"][i] if results.get("documents") else None
325
  )
326
-
327
  canonical_doc = SanatanConfig().canonicalize_document(
328
  scripture_name, document_text, metadata_doc
329
  )
330
  formatted_results.append(canonical_doc)
331
 
332
- # print("formatted_results = ", formatted_results)
333
- return {"results": formatted_results}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
334
 
335
  except Exception as e:
336
  logger.error("Error while searching %s", e, exc_info=True)
@@ -341,7 +396,7 @@ class ScriptureMultiSearchRequest(BaseModel):
341
  filter_obj: Optional[MetadataWhereClause] = None
342
  page: int = 1
343
  page_size: int = 20
344
- has_audio: Optional[AudioType] = None # new optional field
345
 
346
 
347
  @router.post("/scripture/{scripture_name}/search/all")
@@ -367,7 +422,7 @@ async def search_scripture_find_all_matches(
367
  filter_obj,
368
  page,
369
  page_size,
370
- has_audio
371
  )
372
 
373
  db = SanatanDatabase()
@@ -458,7 +513,7 @@ async def search_scripture_find_all_matches(
458
  "page": page,
459
  "page_size": page_size,
460
  }
461
-
462
  except Exception as e:
463
  logger.error("Error while searching %s", e, exc_info=True)
464
  return {"error": str(e)}
 
285
  return {"scriptures": sorted(scriptures, key=lambda s: s["title"])}
286
 
287
 
288
+ class ScriptureFirstSearchRequst(BaseModel):
289
+ filter_obj: Optional[MetadataWhereClause] = None
290
+ has_audio: Optional[AudioType] = None
291
+
292
+
293
  @router.post("/scripture/{scripture_name}/search")
294
  async def search_scripture_find_first_match(
295
  scripture_name: str,
296
+ req: ScriptureFirstSearchRequst,
297
  ):
298
  """
299
+ Search scripture collection and return the first matching result.
300
  - `scripture_name`: Name of the collection
301
  - `filter_obj`: MetadataWhereClause (filters, groups, operator)
302
+ - `has_audio`: optional. can take values any|none|recitation|virutham|upanyasam
303
  """
304
+ filter_obj = req.filter_obj
305
+ has_audio = req.has_audio
306
  try:
307
  logger.info(
308
+ "search_scripture_find_first_match: searching for %s with filters=%s | has_audio=%s",
309
  scripture_name,
310
  filter_obj,
311
+ has_audio,
312
  )
313
+
314
  db = SanatanDatabase()
315
  config = next(
316
+ (s for s in SanatanConfig().scriptures if s["name"] == scripture_name),
317
+ None,
318
  )
319
+ if not config:
320
+ return {"error": f"Scripture '{scripture_name}' not found"}
321
 
322
+ # 1️⃣ Fetch results (same as before)
323
  results = db.fetch_first_match(
324
  collection_name=config["collection_name"],
325
  metadata_where_clause=filter_obj,
326
  )
327
 
 
 
328
  formatted_results = []
329
  for i in range(len(results["metadatas"])):
330
+ doc_id = results["ids"][i]
331
  metadata_doc = results["metadatas"][i]
332
+ metadata_doc["id"] = doc_id
333
+
334
  document_text = (
335
  results["documents"][i] if results.get("documents") else None
336
  )
 
337
  canonical_doc = SanatanConfig().canonicalize_document(
338
  scripture_name, document_text, metadata_doc
339
  )
340
  formatted_results.append(canonical_doc)
341
 
342
+ # 2️⃣ Apply has_audio filter (same logic as in search_scripture_find_all_matches)
343
+ if has_audio and formatted_results:
344
+ if has_audio == AudioType.none:
345
+ # Get all indices that have any audio
346
+ all_audio_indices = set()
347
+ for atype in [
348
+ AudioType.recitation,
349
+ AudioType.virutham,
350
+ AudioType.upanyasam,
351
+ AudioType.santhai,
352
+ ]:
353
+ indices = await svc_get_indices_with_audio(scripture_name, atype)
354
+ all_audio_indices.update(indices)
355
+
356
+ # Keep only those without audio
357
+ formatted_results = [
358
+ r
359
+ for r in formatted_results
360
+ if r["_global_index"] not in all_audio_indices
361
+ ]
362
+
363
+ else:
364
+ if has_audio == AudioType.any:
365
+ audio_indices = set()
366
+ for atype in [
367
+ AudioType.recitation,
368
+ AudioType.virutham,
369
+ AudioType.upanyasam,
370
+ AudioType.santhai,
371
+ ]:
372
+ indices = await svc_get_indices_with_audio(
373
+ scripture_name, atype
374
+ )
375
+ audio_indices.update(indices)
376
+ else:
377
+ audio_indices = set(
378
+ await svc_get_indices_with_audio(scripture_name, has_audio)
379
+ )
380
+
381
+ formatted_results = [
382
+ r for r in formatted_results if r["_global_index"] in audio_indices
383
+ ]
384
+
385
+ # 3️⃣ Return only the first valid result (if any)
386
+ return {
387
+ "results": formatted_results[:1] if formatted_results else [],
388
+ }
389
 
390
  except Exception as e:
391
  logger.error("Error while searching %s", e, exc_info=True)
 
396
  filter_obj: Optional[MetadataWhereClause] = None
397
  page: int = 1
398
  page_size: int = 20
399
+ has_audio: Optional[AudioType] = None
400
 
401
 
402
  @router.post("/scripture/{scripture_name}/search/all")
 
422
  filter_obj,
423
  page,
424
  page_size,
425
+ has_audio,
426
  )
427
 
428
  db = SanatanDatabase()
 
513
  "page": page,
514
  "page_size": page_size,
515
  }
516
+
517
  except Exception as e:
518
  logger.error("Error while searching %s", e, exc_info=True)
519
  return {"error": str(e)}