|
|
"""
|
|
|
Configuration settings for the Corpus Collection Engine
|
|
|
"""
|
|
|
|
|
|
import os
|
|
|
from pathlib import Path
|
|
|
from typing import List, Dict
|
|
|
|
|
|
|
|
|
PROJECT_ROOT = Path(__file__).parent.parent
|
|
|
DATA_DIR = PROJECT_ROOT / "data"
|
|
|
MODELS_DIR = PROJECT_ROOT / "models"
|
|
|
CACHE_DIR = PROJECT_ROOT / ".cache"
|
|
|
|
|
|
|
|
|
SUPPORTED_LANGUAGES: Dict[str, str] = {
|
|
|
'hi': 'Hindi',
|
|
|
'bn': 'Bengali',
|
|
|
'ta': 'Tamil',
|
|
|
'te': 'Telugu',
|
|
|
'ml': 'Malayalam',
|
|
|
'kn': 'Kannada',
|
|
|
'gu': 'Gujarati',
|
|
|
'mr': 'Marathi',
|
|
|
'pa': 'Punjabi',
|
|
|
'or': 'Odia',
|
|
|
'en': 'English'
|
|
|
}
|
|
|
|
|
|
|
|
|
ACTIVITY_TYPES: List[str] = [
|
|
|
'meme',
|
|
|
'recipe',
|
|
|
'folklore',
|
|
|
'landmark'
|
|
|
]
|
|
|
|
|
|
|
|
|
AI_CONFIG = {
|
|
|
'text_model': 'sarvamai/sarvam-1',
|
|
|
'vision_model': 'microsoft/DiT-base',
|
|
|
'max_tokens': 512,
|
|
|
'temperature': 0.7
|
|
|
}
|
|
|
|
|
|
|
|
|
DATABASE_CONFIG = {
|
|
|
'local_db': 'sqlite:///corpus_collection.db',
|
|
|
'remote_db': os.getenv('DATABASE_URL', ''),
|
|
|
'batch_size': 100
|
|
|
}
|
|
|
|
|
|
|
|
|
PWA_CONFIG = {
|
|
|
'cache_version': 'v1.0.0',
|
|
|
'offline_timeout': 5000,
|
|
|
'sync_interval': 300000,
|
|
|
'max_offline_storage': 50 * 1024 * 1024
|
|
|
}
|
|
|
|
|
|
|
|
|
VALIDATION_CONFIG = {
|
|
|
'min_text_length': 10,
|
|
|
'max_text_length': 5000,
|
|
|
'max_image_size': 10 * 1024 * 1024,
|
|
|
'allowed_image_types': ['jpg', 'jpeg', 'png', 'webp']
|
|
|
}
|
|
|
|
|
|
|
|
|
for directory in [DATA_DIR, MODELS_DIR, CACHE_DIR]:
|
|
|
directory.mkdir(exist_ok=True) |