vikramvasudevan commited on
Commit
07505ba
·
verified ·
1 Parent(s): b0fac67

Upload folder using huggingface_hub

Browse files
assets/video_metadata.json ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "scripture": "divya_prabandham",
4
+ "global_index": 0,
5
+ "video_url": "https://www.youtube.com/watch?v=B8pTQ3cTnnQ",
6
+ "type": "recitation"
7
+ },
8
+ {
9
+ "scripture": "divya_prabandham",
10
+ "global_index": 1,
11
+ "video_url": "https://www.youtube.com/watch?v=vHGz7QISQ4k",
12
+ "type": "recitation"
13
+ },
14
+ {
15
+ "scripture": "divya_prabandham",
16
+ "global_index": 2,
17
+ "video_url": "https://www.youtube.com/watch?v=GG_t1CTgq2I",
18
+ "type": "recitation"
19
+ },
20
+ {
21
+ "scripture": "divya_prabandham",
22
+ "global_index": 3,
23
+ "video_url": "https://www.youtube.com/watch?v=FhZ84N0lS3g",
24
+ "type": "recitation"
25
+ },
26
+ {
27
+ "scripture": "divya_prabandham",
28
+ "global_index": 4,
29
+ "video_url": "https://www.youtube.com/watch?v=wOwcnclgg20",
30
+ "type": "recitation"
31
+ },
32
+ {
33
+ "scripture": "divya_prabandham",
34
+ "global_index": 5,
35
+ "video_url": "https://www.youtube.com/watch?v=t8_JA8ejRDQ",
36
+ "type": "recitation"
37
+ },
38
+ {
39
+ "scripture": "divya_prabandham",
40
+ "global_index": 6,
41
+ "video_url": "https://www.youtube.com/watch?v=c8LpKLMuvdM",
42
+ "type": "recitation"
43
+ },
44
+ {
45
+ "scripture": "divya_prabandham",
46
+ "global_index": 7,
47
+ "video_url": "https://www.youtube.com/watch?v=JNg1qh_j_6A",
48
+ "type": "recitation"
49
+ },
50
+ {
51
+ "scripture": "divya_prabandham",
52
+ "global_index": 8,
53
+ "video_url": "https://www.youtube.com/watch?v=o5PU-lSJu6w&pp=0gcJCeAJAYcqIYzv",
54
+ "type": "recitation"
55
+ },
56
+ {
57
+ "scripture": "divya_prabandham",
58
+ "global_index": 9,
59
+ "video_url": "https://www.youtube.com/watch?v=_yI-0ACPP5o",
60
+ "type": "recitation"
61
+ },
62
+ {
63
+ "scripture": "divya_prabandham",
64
+ "global_index": 10,
65
+ "video_url": "https://www.youtube.com/watch?v=CATdhzkENqQ&pp=0gcJCeAJAYcqIYzv",
66
+ "type": "recitation"
67
+ },
68
+ {
69
+ "scripture": "divya_prabandham",
70
+ "global_index": 11,
71
+ "video_url": "https://www.youtube.com/watch?v=_ax75GP3NoY",
72
+ "type": "recitation"
73
+ }
74
+ ]
main.py CHANGED
@@ -3,13 +3,11 @@ from contextlib import asynccontextmanager
3
  from fastapi.responses import RedirectResponse
4
  import uvicorn
5
  from fastapi import FastAPI
6
- from db import SanatanDatabase
7
- from modules.dropbox import cleanup_audio_url_cache
8
  from server import router as mobile_router
9
  from app import gradio_app # your Blocks object
10
  import gradio as gr
11
  import logging
12
- from fastapi.middleware import Middleware
13
  from fastapi import Request
14
 
15
  logging.basicConfig(level=logging.INFO)
 
3
  from fastapi.responses import RedirectResponse
4
  import uvicorn
5
  from fastapi import FastAPI
6
+ from modules.dropbox.audio import cleanup_audio_url_cache
 
7
  from server import router as mobile_router
8
  from app import gradio_app # your Blocks object
9
  import gradio as gr
10
  import logging
 
11
  from fastapi import Request
12
 
13
  logging.basicConfig(level=logging.INFO)
modules/audio/model.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+
3
+ class AudioRequest(BaseModel):
4
+ scripture_name: str
5
+ global_index: int
modules/audio/service.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from modules.audio.model import AudioRequest
2
+ from modules.dropbox.audio import get_audio_urls
3
+
4
+
5
+ async def svc_get_audio_urls(req: AudioRequest):
6
+ urls = await get_audio_urls(req)
7
+ return urls
modules/dropbox/audio.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import json
3
+ from fastapi import HTTPException
4
+ import dropbox
5
+ from dropbox.files import FolderMetadata, FileMetadata
6
+ from datetime import datetime, timedelta, timezone
7
+ from modules.audio.model import AudioRequest
8
+ import logging
9
+ from modules.dropbox.client import dbx
10
+
11
+ logging.basicConfig()
12
+ logger = logging.getLogger(__name__)
13
+ logger.setLevel(logging.INFO)
14
+
15
+ def list_dropbox_folder_hierarchy(dbx: dropbox.Dropbox, base_path: str = ""):
16
+ """
17
+ Recursively fetches the folder/file hierarchy from Dropbox starting at base_path.
18
+ Includes direct temporary download links for files.
19
+
20
+ Args:
21
+ dbx (dropbox.Dropbox): Authenticated Dropbox client.
22
+ base_path (str): Path inside Dropbox ("" means root).
23
+
24
+ Returns:
25
+ dict: Nested dict with folders -> {subfolders/files with links}.
26
+ """
27
+ hierarchy = {}
28
+
29
+ try:
30
+ print("listing files in", base_path)
31
+ result = dbx.files_list_folder(base_path)
32
+
33
+ while True:
34
+ for entry in result.entries:
35
+ if isinstance(entry, FolderMetadata):
36
+ # Recurse into subfolder
37
+ hierarchy[entry.name] = list_dropbox_folder_hierarchy(
38
+ dbx, entry.path_lower
39
+ )
40
+ elif isinstance(entry, FileMetadata):
41
+ try:
42
+ link = dbx.files_get_temporary_link(entry.path_lower).link
43
+ hierarchy.setdefault("__files__", []).append(
44
+ {
45
+ "name": entry.name,
46
+ "path": entry.path_lower,
47
+ "download_url": link,
48
+ }
49
+ )
50
+ except Exception as link_err:
51
+ print(
52
+ f"Could not generate link for {entry.path_lower}: {link_err}"
53
+ )
54
+
55
+ if result.has_more:
56
+ result = dbx.files_list_folder_continue(result.cursor)
57
+ else:
58
+ break
59
+
60
+ except Exception as e:
61
+ print(f"Error listing folder {base_path}: {e}")
62
+
63
+ return hierarchy
64
+
65
+ # cache = {(scripture_name, global_index, type): {"url": ..., "expiry": ...}}
66
+ audio_cache: dict[tuple[str, int, str], dict] = {}
67
+ CACHE_TTL = timedelta(hours=3, minutes=30) # refresh before 4h expiry
68
+
69
+
70
+ async def get_audio_urls(req: AudioRequest):
71
+ base_path = f"/{req.scripture_name}/audio"
72
+ files_to_check = {
73
+ "recitation": f"{req.global_index}-recitation.mp3",
74
+ "santhai": f"{req.global_index}-santhai.mp3",
75
+ }
76
+
77
+ urls = {}
78
+ now = datetime.now(timezone.utc) # timezone-aware UTC datetime
79
+
80
+ for key, filename in files_to_check.items():
81
+ cache_key = (req.scripture_name, req.global_index, key)
82
+
83
+ # Check cache first
84
+ cached = audio_cache.get(cache_key)
85
+ if cached and cached["expiry"] > now:
86
+ urls[key] = cached["url"]
87
+ continue
88
+
89
+ # Generate new temporary link
90
+ file_path = f"{base_path}/{filename}"
91
+ try:
92
+ metadata = dbx.files_get_metadata(file_path)
93
+ if isinstance(metadata, FileMetadata):
94
+ temp_link = dbx.files_get_temporary_link(file_path).link
95
+ urls[key] = temp_link
96
+ # store in cache with expiry
97
+ audio_cache[cache_key] = {"url": temp_link, "expiry": now + CACHE_TTL}
98
+ except dropbox.exceptions.ApiError:
99
+ urls[key] = None
100
+
101
+ if not any(urls.values()):
102
+ raise HTTPException(status_code=404, detail="No audio files found")
103
+
104
+ return urls
105
+
106
+
107
+ async def cleanup_audio_url_cache(interval_seconds: int = 600):
108
+ """Periodically remove expired entries from audio_cache."""
109
+ while True:
110
+ now = datetime.now(timezone.utc)
111
+ expired_keys = [key for key, val in audio_cache.items() if val["expiry"] <= now]
112
+ for key in expired_keys:
113
+ del audio_cache[key]
114
+ # Debug log
115
+ if expired_keys:
116
+ print(f"Cleaned up {len(expired_keys)} expired cache entries")
117
+ await asyncio.sleep(interval_seconds)
118
+
119
+ if __name__ == "__main__":
120
+ # Create Dropbox client with your access token
121
+ # data = list_dropbox_folder_hierarchy(dbx, "")
122
+ data = asyncio.run(
123
+ get_audio_urls(AudioRequest(scripture_name="divya_prabandham", global_index=0))
124
+ )
125
+ print(json.dumps(data, indent=2))
modules/dropbox/client.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+ import dropbox
4
+
5
+ load_dotenv()
6
+
7
+ REFRESH_TOKEN = os.getenv("DROPBOX_REFRESH_TOKEN")
8
+ APP_KEY = os.getenv("DROPBOX_APP_KEY")
9
+ APP_SECRET = os.getenv("DROPBOX_APP_SECRET")
10
+
11
+ if not REFRESH_TOKEN:
12
+ raise Exception("DROPBOX_REFRESH_TOKEN missing")
13
+
14
+ if not APP_KEY:
15
+ raise Exception("APP_KEY missing")
16
+
17
+ if not APP_SECRET:
18
+ raise Exception("APP_SECRET missing")
19
+
20
+ dbx = dropbox.Dropbox(
21
+ app_key=APP_KEY, app_secret=APP_SECRET, oauth2_refresh_token=REFRESH_TOKEN
22
+ )
modules/dropbox/video.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import json
3
+ from datetime import datetime, timedelta
4
+ from typing import List
5
+ import dropbox
6
+ import logging
7
+ from modules.dropbox.client import dbx
8
+ from modules.video.model import VideoMetadata
9
+
10
+ # Initialize logger
11
+ logger = logging.getLogger(__name__)
12
+ logger.setLevel(logging.INFO)
13
+
14
+ # Dropbox client (already configured)
15
+ # dbx = dropbox.Dropbox(app_key=APP_KEY, app_secret=APP_SECRET, oauth2_refresh_token=REFRESH_TOKEN)
16
+
17
+ # Cache dictionary
18
+ # Key: scripture_name, Value: {"timestamp": datetime, "data": List[VideoMetadata]}
19
+ _video_cache: dict[str, dict] = {}
20
+ CACHE_TTL = timedelta(hours=1) # Cache time-to-live
21
+
22
+ async def fetch_video_urls_from_dropbox(scripture_name: str) -> list[VideoMetadata]:
23
+ """
24
+ Fetch video metadata JSON from Dropbox with caching.
25
+
26
+ :param scripture_name: Name of the scripture folder in Dropbox
27
+ :return: List of VideoMetadata objects
28
+ """
29
+ loop = asyncio.get_running_loop()
30
+
31
+ # Check if we have cached data that is still fresh
32
+ cache_entry = _video_cache.get(scripture_name)
33
+ if cache_entry:
34
+ age = datetime.now() - cache_entry["timestamp"]
35
+ if age < CACHE_TTL:
36
+ logger.info(f"Using cached video metadata for '{scripture_name}' (age: {age})")
37
+ return cache_entry["data"]
38
+
39
+ logger.info(f"Fetching video metadata for '{scripture_name}' from Dropbox")
40
+ try:
41
+ # Dropbox SDK is synchronous, run in executor
42
+ metadata, res = await loop.run_in_executor(
43
+ None, dbx.files_download, f"/{scripture_name}/video_metadata.json"
44
+ )
45
+ data = res.content.decode("utf-8")
46
+ logger.info("video data = %s",data)
47
+ json_list = json.loads(data)
48
+
49
+ # Convert each JSON item to VideoMetadata
50
+ video_data = [VideoMetadata(**item) for item in json_list]
51
+
52
+ # Update cache
53
+ _video_cache[scripture_name] = {
54
+ "timestamp": datetime.now(),
55
+ "data": video_data
56
+ }
57
+ logger.info(f"Cached {len(video_data)} videos for '{scripture_name}'")
58
+ return video_data
59
+
60
+ except Exception as e:
61
+ logger.error(f"Error fetching video metadata for '{scripture_name}' from Dropbox", exc_info=e)
62
+ # Fallback to cached data if available
63
+ if cache_entry:
64
+ logger.warning(f"Returning stale cached data for '{scripture_name}' due to Dropbox error")
65
+ return cache_entry["data"]
66
+ else:
67
+ logger.warning(f"No cached data available for '{scripture_name}', returning empty list")
68
+ return []
modules/video/model.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+
3
+ class VideoRequest(BaseModel):
4
+ scripture_name: str
5
+ global_index: int
6
+
7
+ class VideoMetadata(BaseModel):
8
+ scripture : str
9
+ global_index : int
10
+ video_url : str
11
+ type: str
modules/video/service.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ from modules.dropbox.video import fetch_video_urls_from_dropbox
3
+ from modules.video.model import VideoRequest
4
+
5
+
6
+ async def svc_get_video_urls(req: VideoRequest):
7
+ videos = await fetch_video_urls_from_dropbox(scripture_name=req.scripture_name)
8
+ matching_videos = [
9
+ video
10
+ for video in videos
11
+ if video.global_index == req.global_index
12
+ and video.scripture == req.scripture_name
13
+ ]
14
+
15
+ return {video.type: video.video_url for video in matching_videos}
16
+
17
+ if __name__ == "__main__":
18
+ data = asyncio.run(
19
+ svc_get_video_urls(
20
+ VideoRequest(global_index=4, scripture_name="divya_prabandham")
21
+ )
22
+ )
23
+ print(data)
server.py CHANGED
@@ -12,12 +12,16 @@ from chat_utils import chat
12
  from config import SanatanConfig
13
  from db import SanatanDatabase
14
  from metadata import MetadataWhereClause
15
- from modules.dropbox import AudioRequest, get_audio_urls
 
16
  from modules.quiz.answer_validator import validate_answer
17
  from modules.quiz.models import Question
18
  from modules.quiz.quiz_helper import generate_question
19
  import logging
20
 
 
 
 
21
  logging.basicConfig()
22
  logger = logging.getLogger(__name__)
23
  logger.setLevel(logging.INFO)
@@ -326,5 +330,11 @@ async def search_scripture(
326
  @router.post("/audio")
327
  async def generate_audio_urls(req: AudioRequest):
328
  logger.info("generate_audio_urls: %s", req)
329
- audio_urls = await get_audio_urls(req)
330
- return audio_urls
 
 
 
 
 
 
 
12
  from config import SanatanConfig
13
  from db import SanatanDatabase
14
  from metadata import MetadataWhereClause
15
+ from modules.audio.model import AudioRequest
16
+ from modules.audio.service import svc_get_audio_urls
17
  from modules.quiz.answer_validator import validate_answer
18
  from modules.quiz.models import Question
19
  from modules.quiz.quiz_helper import generate_question
20
  import logging
21
 
22
+ from modules.video.model import VideoRequest
23
+ from modules.video.service import svc_get_video_urls
24
+
25
  logging.basicConfig()
26
  logger = logging.getLogger(__name__)
27
  logger.setLevel(logging.INFO)
 
330
  @router.post("/audio")
331
  async def generate_audio_urls(req: AudioRequest):
332
  logger.info("generate_audio_urls: %s", req)
333
+ audio_urls = await svc_get_audio_urls(req)
334
+ return audio_urls
335
+
336
+ @router.post("/video")
337
+ async def generate_audio_urls(req: VideoRequest):
338
+ logger.info("generate_audio_urls: %s", req)
339
+ video_urls = await svc_get_video_urls(req)
340
+ return video_urls