""" HuggingFace Collection Storage Service Uploads LoRA adapters as individual models to HuggingFace Hub Models can be added to the LEMM collection for organization """ import os import logging from pathlib import Path from typing import List, Dict, Optional import shutil import yaml logger = logging.getLogger(__name__) class HFStorageService: """Service for uploading LoRAs as models to HuggingFace Hub""" def __init__(self, username: str = "Gamahea", dataset_repo: str = "lemmdata"): """ Initialize HF storage service Args: username: HuggingFace username dataset_repo: Dataset repository name for storing training artifacts """ self.username = username self.dataset_repo = dataset_repo self.repo_id = f"{username}/{dataset_repo}" self.local_cache = Path("hf_cache") self.local_cache.mkdir(exist_ok=True) logger.info(f"HF Storage initialized for user: {username}") logger.info(f"Dataset Repo: https://huggingface.co/datasets/{self.repo_id}") # Get HF token from environment self.token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN") # Try to import huggingface_hub try: from huggingface_hub import HfApi self.api = HfApi(token=self.token) if self.token else HfApi() self.has_hf = True if self.token: logger.info("✅ HuggingFace Hub available with authentication") else: logger.warning("⚠️ HuggingFace Hub available but no token found (uploads may fail)") except ImportError: logger.warning("⚠️ huggingface_hub not available, uploads will be skipped") self.has_hf = False def sync_on_startup(self, loras_dir: Path, datasets_dir: Path = None) -> Dict: """ Sync LoRAs and datasets from HuggingFace dataset repo on startup Downloads missing LoRAs and datasets from the repo to local storage Args: loras_dir: Local directory for LoRA storage datasets_dir: Local directory for dataset storage (optional) Returns: Dict with sync results: {'loras': [...], 'datasets': [...], 'synced': count} """ if not self.has_hf: logger.debug("HF not available, skipping sync") return {'loras': [], 'datasets': [], 'synced': 0} try: # List LoRAs in dataset repo collection_loras = self.list_dataset_loras() if not collection_loras: logger.info("No LoRAs found in dataset repo") return {'loras': [], 'datasets': [], 'synced': 0} logger.info(f"Found {len(collection_loras)} LoRA(s) in dataset repo") # Check which ones are missing locally loras_dir.mkdir(parents=True, exist_ok=True) existing_loras = set(d.name for d in loras_dir.iterdir() if d.is_dir()) synced_count = 0 for lora in collection_loras: lora_name = lora['name'] # Handle name conflicts - add number suffix if needed final_name = lora_name counter = 1 while final_name in existing_loras: final_name = f"{lora_name}_{counter}" counter += 1 target_dir = loras_dir / final_name # Download if not present locally if not target_dir.exists(): logger.info(f"Downloading LoRA from dataset repo: {lora['path']}") if self.download_lora(lora['path'], target_dir): synced_count += 1 existing_loras.add(final_name) if final_name != lora_name: logger.info(f"Downloaded as '{final_name}' (name conflict resolved)") logger.info(f"Synced {synced_count} new LoRA(s) from dataset repo") return {'loras': collection_loras, 'datasets': [], 'synced': synced_count} except Exception as e: logger.error(f"Sync failed: {str(e)}", exc_info=True) return {'loras': [], 'datasets': [], 'synced': 0, 'error': str(e)} def list_dataset_loras(self) -> List[Dict[str, str]]: """ List all LoRA ZIP files stored in the dataset repo Returns: List of dicts with 'name' and 'path' """ if not self.has_hf: logger.debug("HF not available, skipping dataset list") return [] try: from huggingface_hub import list_repo_files # List all files in the loras/ folder files = list_repo_files( repo_id=self.repo_id, repo_type="dataset", token=self.token ) # Extract LoRA names from ZIP files in loras/ folder loras = [] for file in files: if file.startswith("loras/") and file.endswith(".zip"): # Extract name from "loras/name.zip" lora_name = file[6:-4] # Remove "loras/" and ".zip" loras.append({ 'name': lora_name, 'path': f"loras/{lora_name}" }) logger.info(f"Found {len(loras)} LoRA(s) in dataset repo") return loras except Exception as e: logger.error(f"Failed to list dataset LoRAs: {e}") return [] def download_lora(self, lora_path: str, target_dir: Path) -> bool: """ Download a LoRA ZIP file from dataset repo and extract it Args: lora_path: Path within dataset repo (e.g., "loras/jazz-v1") target_dir: Local directory to extract to Returns: True if successful """ if not self.has_hf: logger.debug("HF not available, skipping download") return False try: from huggingface_hub import hf_hub_download import zipfile import tempfile # Expect ZIP file lora_name = lora_path.split('/')[-1] zip_filename = f"loras/{lora_name}.zip" logger.info(f"Downloading LoRA ZIP from {self.repo_id}/{zip_filename}...") # Download ZIP file to temp location zip_path = hf_hub_download( repo_id=self.repo_id, repo_type="dataset", filename=zip_filename, token=self.token ) # Extract to target directory target_dir.mkdir(parents=True, exist_ok=True) with zipfile.ZipFile(zip_path, 'r') as zipf: zipf.extractall(target_dir) logger.info(f"✅ Downloaded and extracted LoRA to {target_dir}") return True except Exception as e: logger.error(f"Failed to download LoRA: {e}") return False def upload_lora(self, lora_dir: Path, training_config: Optional[Dict] = None) -> Optional[Dict]: """ Upload a LoRA adapter as a ZIP file to HuggingFace dataset repo Args: lora_dir: Local LoRA directory training_config: Optional training configuration dict Returns: Dict with repo_id and url if successful, None otherwise """ if not self.has_hf: logger.info(f"💾 LoRA saved locally: {lora_dir.name}") return None if not self.token: logger.warning("⚠️ No HuggingFace token found - cannot upload") logger.info("💡 To enable uploads: Log in to HuggingFace or set HF_TOKEN environment variable") logger.info(f"💾 LoRA saved locally: {lora_dir.name}") return None try: from huggingface_hub import upload_file import zipfile import tempfile lora_name = lora_dir.name logger.info(f"📤 Creating ZIP and uploading LoRA to dataset repo: {self.repo_id}/loras/{lora_name}.zip...") # Create README.md for the LoRA readme_content = self._generate_lora_readme(lora_name, training_config) readme_path = lora_dir / "README.md" with open(readme_path, 'w', encoding='utf-8') as f: f.write(readme_content) # Create ZIP file with tempfile.NamedTemporaryFile(mode='wb', suffix='.zip', delete=False) as tmp_file: zip_path = tmp_file.name try: with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf: for file_path in lora_dir.rglob('*'): if file_path.is_file(): arcname = file_path.relative_to(lora_dir) zipf.write(file_path, arcname) # Upload ZIP file to loras/ folder in dataset repo upload_file( repo_id=self.repo_id, repo_type="dataset", path_or_fileobj=zip_path, path_in_repo=f"loras/{lora_name}.zip", commit_message=f"Upload LEMM LoRA adapter: {lora_name}", token=self.token ) finally: # Clean up temp file import os if os.path.exists(zip_path): os.unlink(zip_path) logger.info(f"✅ Uploaded LoRA: {self.repo_id}/loras/{lora_name}.zip") logger.info(f"🔗 View at: https://huggingface.co/datasets/{self.repo_id}/blob/main/loras/{lora_name}.zip") return { 'repo_id': f"{self.repo_id}/loras/{lora_name}.zip", 'url': f"https://huggingface.co/datasets/{self.repo_id}/blob/main/loras/{lora_name}.zip", 'dataset_repo': f"https://huggingface.co/datasets/{self.repo_id}" } except Exception as e: logger.error(f"Failed to upload LoRA: {e}") logger.info(f"💾 LoRA saved locally: {lora_dir.name}") return None def _generate_lora_readme(self, lora_name: str, config: Optional[Dict] = None) -> str: """Generate README.md content for a LoRA model""" config_info = "" if config: config_info = f""" ## Training Configuration - **Dataset**: {config.get('dataset', 'N/A')} - **Epochs**: {config.get('epochs', 'N/A')} - **Learning Rate**: {config.get('learning_rate', 'N/A')} - **Batch Size**: {config.get('batch_size', 'N/A')} - **LoRA Rank**: {config.get('lora_rank', 'N/A')} """ return f"""--- license: mit tags: - lora - music-generation - diffrhythm2 - lemm library_name: diffusers --- # LEMM LoRA: {lora_name} This is a LoRA (Low-Rank Adaptation) adapter for DiffRhythm2 music generation, trained using LEMM (Let Everyone Make Music). ## About LEMM LEMM is an advanced AI music generation system that allows you to: - Generate high-quality music with built-in vocals - Train custom LoRA adapters for specific styles - Fine-tune models on your own datasets 🎵 **Try it**: [LEMM Space](https://huggingface.co/spaces/Gamahea/lemm-test-100) {config_info} ## How to Use ### In LEMM Space 1. Visit [LEMM](https://huggingface.co/spaces/Gamahea/lemm-test-100) 2. Go to "LoRA Management" tab 3. Enter this model ID: `{self.username}/lemm-lora-{lora_name}` 4. Click "Download from Hub" 5. Use in generation or as base for continued training ### In Your Code ```python from pathlib import Path from huggingface_hub import snapshot_download # Download LoRA lora_path = snapshot_download( repo_id="{self.username}/lemm-lora-{lora_name}", local_dir="./loras/{lora_name}" ) # Load and use with DiffRhythm2 # (See LEMM documentation for integration) ``` ## Model Files - `final_model.pt` - Trained LoRA weights - `config.yaml` - Training configuration - `README.md` - This file ## Dataset Repository Part of the [LEMM Training Data Repository](https://huggingface.co/datasets/{self.repo_id}) ## License MIT License - Free to use and modify """ def upload_dataset(self, dataset_dir: Path, dataset_info: Optional[Dict] = None) -> Optional[Dict]: """ Upload a prepared dataset as ZIP file to HF dataset repo Args: dataset_dir: Local dataset directory dataset_info: Optional dataset metadata Returns: Dict with upload results or None if failed """ if not self.has_hf: logger.info(f"💾 Dataset saved locally: {dataset_dir.name}") return None if not self.token: logger.warning("⚠️ No HuggingFace token found - cannot upload dataset") logger.info(f"💾 Dataset saved locally: {dataset_dir.name}") return None try: from huggingface_hub import upload_file import zipfile import tempfile dataset_name = dataset_dir.name logger.info(f"📤 Creating ZIP and uploading dataset to repo: {self.repo_id}/datasets/{dataset_name}.zip...") # Create ZIP file with tempfile.NamedTemporaryFile(mode='wb', suffix='.zip', delete=False) as tmp_file: zip_path = tmp_file.name try: with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf: for file_path in dataset_dir.rglob('*'): if file_path.is_file(): arcname = file_path.relative_to(dataset_dir) zipf.write(file_path, arcname) # Upload ZIP to datasets/ folder in dataset repo upload_file( repo_id=self.repo_id, repo_type="dataset", path_or_fileobj=zip_path, path_in_repo=f"datasets/{dataset_name}.zip", commit_message=f"Upload prepared dataset: {dataset_name}", token=self.token ) finally: # Clean up temp file import os if os.path.exists(zip_path): os.unlink(zip_path) logger.info(f"✅ Uploaded dataset: {self.repo_id}/datasets/{dataset_name}.zip") return { 'repo_id': f"{self.repo_id}/datasets/{dataset_name}.zip", 'url': f"https://huggingface.co/datasets/{self.repo_id}/blob/main/datasets/{dataset_name}.zip", 'dataset_repo': f"https://huggingface.co/datasets/{self.repo_id}" } except Exception as e: logger.error(f"Failed to upload dataset: {e}") logger.info(f"💾 Dataset saved locally: {dataset_dir.name}") return None