singarajusaiteja's picture
core app upload
dad1de9 verified
raw
history blame
1.68 kB
"""
Configuration settings for the Corpus Collection Engine
"""
import os
from pathlib import Path
from typing import List, Dict
# Project paths
PROJECT_ROOT = Path(__file__).parent.parent
DATA_DIR = PROJECT_ROOT / "data"
MODELS_DIR = PROJECT_ROOT / "models"
CACHE_DIR = PROJECT_ROOT / ".cache"
# Supported Indic languages
SUPPORTED_LANGUAGES: Dict[str, str] = {
'hi': 'Hindi',
'bn': 'Bengali',
'ta': 'Tamil',
'te': 'Telugu',
'ml': 'Malayalam',
'kn': 'Kannada',
'gu': 'Gujarati',
'mr': 'Marathi',
'pa': 'Punjabi',
'or': 'Odia',
'en': 'English'
}
# Activity types
ACTIVITY_TYPES: List[str] = [
'meme',
'recipe',
'folklore',
'landmark'
]
# AI model configurations
AI_CONFIG = {
'text_model': 'sarvamai/sarvam-1',
'vision_model': 'microsoft/DiT-base',
'max_tokens': 512,
'temperature': 0.7
}
# Database configuration
DATABASE_CONFIG = {
'local_db': 'sqlite:///corpus_collection.db',
'remote_db': os.getenv('DATABASE_URL', ''),
'batch_size': 100
}
# PWA and offline configuration
PWA_CONFIG = {
'cache_version': 'v1.0.0',
'offline_timeout': 5000, # milliseconds
'sync_interval': 300000, # 5 minutes in milliseconds
'max_offline_storage': 50 * 1024 * 1024 # 50MB
}
# Content validation settings
VALIDATION_CONFIG = {
'min_text_length': 10,
'max_text_length': 5000,
'max_image_size': 10 * 1024 * 1024, # 10MB
'allowed_image_types': ['jpg', 'jpeg', 'png', 'webp']
}
# Create necessary directories
for directory in [DATA_DIR, MODELS_DIR, CACHE_DIR]:
directory.mkdir(exist_ok=True)