Spaces:

vikramvasudevan
/

sanatan_ai

Running on CPU Upgrade

App Files Files Community

vikramvasudevan commited on Sep 19

Commit

07505ba

verified ·

1 Parent(s): b0fac67

Upload folder using huggingface_hub

Browse files

Files changed (10) hide show

assets/video_metadata.json +74 -0
main.py +1 -3
modules/audio/model.py +5 -0
modules/audio/service.py +7 -0
modules/dropbox/audio.py +125 -0
modules/dropbox/client.py +22 -0
modules/dropbox/video.py +68 -0
modules/video/model.py +11 -0
modules/video/service.py +23 -0
server.py +13 -3

assets/video_metadata.json ADDED Viewed

	@@ -0,0 +1,74 @@

+[
+    {
+        "scripture": "divya_prabandham",
+        "global_index": 0,
+        "video_url": "https://www.youtube.com/watch?v=B8pTQ3cTnnQ",
+        "type": "recitation"
+    },
+    {
+        "scripture": "divya_prabandham",
+        "global_index": 1,
+        "video_url": "https://www.youtube.com/watch?v=vHGz7QISQ4k",
+        "type": "recitation"
+    },
+    {
+        "scripture": "divya_prabandham",
+        "global_index": 2,
+        "video_url": "https://www.youtube.com/watch?v=GG_t1CTgq2I",
+        "type": "recitation"
+    },
+    {
+        "scripture": "divya_prabandham",
+        "global_index": 3,
+        "video_url": "https://www.youtube.com/watch?v=FhZ84N0lS3g",
+        "type": "recitation"
+    },
+    {
+        "scripture": "divya_prabandham",
+        "global_index": 4,
+        "video_url": "https://www.youtube.com/watch?v=wOwcnclgg20",
+        "type": "recitation"
+    },
+    {
+        "scripture": "divya_prabandham",
+        "global_index": 5,
+        "video_url": "https://www.youtube.com/watch?v=t8_JA8ejRDQ",
+        "type": "recitation"
+    },
+    {
+        "scripture": "divya_prabandham",
+        "global_index": 6,
+        "video_url": "https://www.youtube.com/watch?v=c8LpKLMuvdM",
+        "type": "recitation"
+    },
+    {
+        "scripture": "divya_prabandham",
+        "global_index": 7,
+        "video_url": "https://www.youtube.com/watch?v=JNg1qh_j_6A",
+        "type": "recitation"
+    },
+    {
+        "scripture": "divya_prabandham",
+        "global_index": 8,
+        "video_url": "https://www.youtube.com/watch?v=o5PU-lSJu6w&pp=0gcJCeAJAYcqIYzv",
+        "type": "recitation"
+    },
+    {
+        "scripture": "divya_prabandham",
+        "global_index": 9,
+        "video_url": "https://www.youtube.com/watch?v=_yI-0ACPP5o",
+        "type": "recitation"
+    },
+    {
+        "scripture": "divya_prabandham",
+        "global_index": 10,
+        "video_url": "https://www.youtube.com/watch?v=CATdhzkENqQ&pp=0gcJCeAJAYcqIYzv",
+        "type": "recitation"
+    },
+    {
+        "scripture": "divya_prabandham",
+        "global_index": 11,
+        "video_url": "https://www.youtube.com/watch?v=_ax75GP3NoY",
+        "type": "recitation"
+    }
+]

main.py CHANGED Viewed

@@ -3,13 +3,11 @@ from contextlib import asynccontextmanager
 from fastapi.responses import RedirectResponse
 import uvicorn
 from fastapi import FastAPI
-from db import SanatanDatabase
-from modules.dropbox import cleanup_audio_url_cache
 from server import router as mobile_router
 from app import gradio_app  # your Blocks object
 import gradio as gr
 import logging
-from fastapi.middleware import Middleware
 from fastapi import Request
 logging.basicConfig(level=logging.INFO)

 from fastapi.responses import RedirectResponse
 import uvicorn
 from fastapi import FastAPI
+from modules.dropbox.audio import cleanup_audio_url_cache
 from server import router as mobile_router
 from app import gradio_app  # your Blocks object
 import gradio as gr
 import logging
 from fastapi import Request
 logging.basicConfig(level=logging.INFO)

modules/audio/model.py ADDED Viewed

	@@ -0,0 +1,5 @@

+from pydantic import BaseModel
+class AudioRequest(BaseModel):
+    scripture_name: str
+    global_index: int

modules/audio/service.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from modules.audio.model import AudioRequest
+from modules.dropbox.audio import get_audio_urls
+async def svc_get_audio_urls(req: AudioRequest):
+    urls = await get_audio_urls(req)
+    return urls

modules/dropbox/audio.py ADDED Viewed

	@@ -0,0 +1,125 @@

+import asyncio
+import json
+from fastapi import HTTPException
+import dropbox
+from dropbox.files import FolderMetadata, FileMetadata
+from datetime import datetime, timedelta, timezone
+from modules.audio.model import AudioRequest
+import logging
+from modules.dropbox.client import dbx
+logging.basicConfig()
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+def list_dropbox_folder_hierarchy(dbx: dropbox.Dropbox, base_path: str = ""):
+    """
+    Recursively fetches the folder/file hierarchy from Dropbox starting at base_path.
+    Includes direct temporary download links for files.
+    Args:
+        dbx (dropbox.Dropbox): Authenticated Dropbox client.
+        base_path (str): Path inside Dropbox ("" means root).
+    Returns:
+        dict: Nested dict with folders -> {subfolders/files with links}.
+    """
+    hierarchy = {}
+    try:
+        print("listing files in", base_path)
+        result = dbx.files_list_folder(base_path)
+        while True:
+            for entry in result.entries:
+                if isinstance(entry, FolderMetadata):
+                    # Recurse into subfolder
+                    hierarchy[entry.name] = list_dropbox_folder_hierarchy(
+                        dbx, entry.path_lower
+                    )
+                elif isinstance(entry, FileMetadata):
+                    try:
+                        link = dbx.files_get_temporary_link(entry.path_lower).link
+                        hierarchy.setdefault("__files__", []).append(
+                            {
+                                "name": entry.name,
+                                "path": entry.path_lower,
+                                "download_url": link,
+                            }
+                        )
+                    except Exception as link_err:
+                        print(
+                            f"Could not generate link for {entry.path_lower}: {link_err}"
+                        )
+            if result.has_more:
+                result = dbx.files_list_folder_continue(result.cursor)
+            else:
+                break
+    except Exception as e:
+        print(f"Error listing folder {base_path}: {e}")
+    return hierarchy
+# cache = {(scripture_name, global_index, type): {"url": ..., "expiry": ...}}
+audio_cache: dict[tuple[str, int, str], dict] = {}
+CACHE_TTL = timedelta(hours=3, minutes=30)  # refresh before 4h expiry
+async def get_audio_urls(req: AudioRequest):
+    base_path = f"/{req.scripture_name}/audio"
+    files_to_check = {
+        "recitation": f"{req.global_index}-recitation.mp3",
+        "santhai": f"{req.global_index}-santhai.mp3",
+    }
+    urls = {}
+    now = datetime.now(timezone.utc)  # timezone-aware UTC datetime
+    for key, filename in files_to_check.items():
+        cache_key = (req.scripture_name, req.global_index, key)
+        # Check cache first
+        cached = audio_cache.get(cache_key)
+        if cached and cached["expiry"] > now:
+            urls[key] = cached["url"]
+            continue
+        # Generate new temporary link
+        file_path = f"{base_path}/{filename}"
+        try:
+            metadata = dbx.files_get_metadata(file_path)
+            if isinstance(metadata, FileMetadata):
+                temp_link = dbx.files_get_temporary_link(file_path).link
+                urls[key] = temp_link
+                # store in cache with expiry
+                audio_cache[cache_key] = {"url": temp_link, "expiry": now + CACHE_TTL}
+        except dropbox.exceptions.ApiError:
+            urls[key] = None
+    if not any(urls.values()):
+        raise HTTPException(status_code=404, detail="No audio files found")
+    return urls
+async def cleanup_audio_url_cache(interval_seconds: int = 600):
+    """Periodically remove expired entries from audio_cache."""
+    while True:
+        now = datetime.now(timezone.utc)
+        expired_keys = [key for key, val in audio_cache.items() if val["expiry"] <= now]
+        for key in expired_keys:
+            del audio_cache[key]
+        # Debug log
+        if expired_keys:
+            print(f"Cleaned up {len(expired_keys)} expired cache entries")
+        await asyncio.sleep(interval_seconds)
+if __name__ == "__main__":
+    # Create Dropbox client with your access token
+    # data = list_dropbox_folder_hierarchy(dbx, "")
+    data = asyncio.run(
+        get_audio_urls(AudioRequest(scripture_name="divya_prabandham", global_index=0))
+    )
+    print(json.dumps(data, indent=2))

modules/dropbox/client.py ADDED Viewed

	@@ -0,0 +1,22 @@

+import os
+from dotenv import load_dotenv
+import dropbox
+load_dotenv()
+REFRESH_TOKEN = os.getenv("DROPBOX_REFRESH_TOKEN")
+APP_KEY = os.getenv("DROPBOX_APP_KEY")
+APP_SECRET = os.getenv("DROPBOX_APP_SECRET")
+if not REFRESH_TOKEN:
+    raise Exception("DROPBOX_REFRESH_TOKEN missing")
+if not APP_KEY:
+    raise Exception("APP_KEY missing")
+if not APP_SECRET:
+    raise Exception("APP_SECRET missing")
+dbx = dropbox.Dropbox(
+    app_key=APP_KEY, app_secret=APP_SECRET, oauth2_refresh_token=REFRESH_TOKEN
+)

modules/dropbox/video.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import asyncio
+import json
+from datetime import datetime, timedelta
+from typing import List
+import dropbox
+import logging
+from modules.dropbox.client import dbx
+from modules.video.model import VideoMetadata
+# Initialize logger
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+# Dropbox client (already configured)
+# dbx = dropbox.Dropbox(app_key=APP_KEY, app_secret=APP_SECRET, oauth2_refresh_token=REFRESH_TOKEN)
+# Cache dictionary
+# Key: scripture_name, Value: {"timestamp": datetime, "data": List[VideoMetadata]}
+_video_cache: dict[str, dict] = {}
+CACHE_TTL = timedelta(hours=1)  # Cache time-to-live
+async def fetch_video_urls_from_dropbox(scripture_name: str) -> list[VideoMetadata]:
+    """
+    Fetch video metadata JSON from Dropbox with caching.
+    :param scripture_name: Name of the scripture folder in Dropbox
+    :return: List of VideoMetadata objects
+    """
+    loop = asyncio.get_running_loop()
+    # Check if we have cached data that is still fresh
+    cache_entry = _video_cache.get(scripture_name)
+    if cache_entry:
+        age = datetime.now() - cache_entry["timestamp"]
+        if age < CACHE_TTL:
+            logger.info(f"Using cached video metadata for '{scripture_name}' (age: {age})")
+            return cache_entry["data"]
+    logger.info(f"Fetching video metadata for '{scripture_name}' from Dropbox")
+    try:
+        # Dropbox SDK is synchronous, run in executor
+        metadata, res = await loop.run_in_executor(
+            None, dbx.files_download, f"/{scripture_name}/video_metadata.json"
+        )
+        data = res.content.decode("utf-8")
+        logger.info("video data = %s",data)
+        json_list = json.loads(data)
+        # Convert each JSON item to VideoMetadata
+        video_data = [VideoMetadata(**item) for item in json_list]
+        # Update cache
+        _video_cache[scripture_name] = {
+            "timestamp": datetime.now(),
+            "data": video_data
+        }
+        logger.info(f"Cached {len(video_data)} videos for '{scripture_name}'")
+        return video_data
+    except Exception as e:
+        logger.error(f"Error fetching video metadata for '{scripture_name}' from Dropbox", exc_info=e)
+        # Fallback to cached data if available
+        if cache_entry:
+            logger.warning(f"Returning stale cached data for '{scripture_name}' due to Dropbox error")
+            return cache_entry["data"]
+        else:
+            logger.warning(f"No cached data available for '{scripture_name}', returning empty list")
+            return []

modules/video/model.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from pydantic import BaseModel
+class VideoRequest(BaseModel):
+    scripture_name: str
+    global_index: int
+class VideoMetadata(BaseModel):
+    scripture : str
+    global_index : int
+    video_url : str
+    type: str

modules/video/service.py ADDED Viewed

	@@ -0,0 +1,23 @@

+import asyncio
+from modules.dropbox.video import fetch_video_urls_from_dropbox
+from modules.video.model import VideoRequest
+async def svc_get_video_urls(req: VideoRequest):
+    videos = await fetch_video_urls_from_dropbox(scripture_name=req.scripture_name)
+    matching_videos = [
+        video
+        for video in videos
+        if video.global_index == req.global_index
+        and video.scripture == req.scripture_name
+    ]
+    return {video.type: video.video_url for video in matching_videos}
+if __name__ == "__main__":
+    data = asyncio.run(
+        svc_get_video_urls(
+            VideoRequest(global_index=4, scripture_name="divya_prabandham")
+        )
+    )
+    print(data)

server.py CHANGED Viewed

@@ -12,12 +12,16 @@ from chat_utils import chat
 from config import SanatanConfig
 from db import SanatanDatabase
 from metadata import MetadataWhereClause
-from modules.dropbox import AudioRequest, get_audio_urls
 from modules.quiz.answer_validator import validate_answer
 from modules.quiz.models import Question
 from modules.quiz.quiz_helper import generate_question
 import logging
 logging.basicConfig()
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
@@ -326,5 +330,11 @@ async def search_scripture(
 @router.post("/audio")
 async def generate_audio_urls(req: AudioRequest):
     logger.info("generate_audio_urls: %s", req)
-    audio_urls = await get_audio_urls(req)
-    return audio_urls

 from config import SanatanConfig
 from db import SanatanDatabase
 from metadata import MetadataWhereClause
+from modules.audio.model import AudioRequest
+from modules.audio.service import svc_get_audio_urls
 from modules.quiz.answer_validator import validate_answer
 from modules.quiz.models import Question
 from modules.quiz.quiz_helper import generate_question
 import logging
+from modules.video.model import VideoRequest
+from modules.video.service import svc_get_video_urls
 logging.basicConfig()
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
 @router.post("/audio")
 async def generate_audio_urls(req: AudioRequest):
     logger.info("generate_audio_urls: %s", req)
+    audio_urls = await svc_get_audio_urls(req)
+    return audio_urls
+@router.post("/video")
+async def generate_audio_urls(req: VideoRequest):
+    logger.info("generate_audio_urls: %s", req)
+    video_urls = await svc_get_video_urls(req)
+    return video_urls