Spaces:
Running
on
Zero
Running
on
Zero
| """ | |
| HuggingFace Collection Storage Service | |
| Uploads LoRA adapters as individual models to HuggingFace Hub | |
| Models can be added to the LEMM collection for organization | |
| """ | |
| import os | |
| import logging | |
| from pathlib import Path | |
| from typing import List, Dict, Optional | |
| import shutil | |
| import yaml | |
| logger = logging.getLogger(__name__) | |
| class HFStorageService: | |
| """Service for uploading LoRAs as models to HuggingFace Hub""" | |
| def __init__(self, username: str = "Gamahea", dataset_repo: str = "lemmdata"): | |
| """ | |
| Initialize HF storage service | |
| Args: | |
| username: HuggingFace username | |
| dataset_repo: Dataset repository name for storing training artifacts | |
| """ | |
| self.username = username | |
| self.dataset_repo = dataset_repo | |
| self.repo_id = f"{username}/{dataset_repo}" | |
| self.local_cache = Path("hf_cache") | |
| self.local_cache.mkdir(exist_ok=True) | |
| logger.info(f"HF Storage initialized for user: {username}") | |
| logger.info(f"Dataset Repo: https://huggingface.co/datasets/{self.repo_id}") | |
| # Get HF token from environment | |
| self.token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN") | |
| # Try to import huggingface_hub | |
| try: | |
| from huggingface_hub import HfApi | |
| self.api = HfApi(token=self.token) if self.token else HfApi() | |
| self.has_hf = True | |
| if self.token: | |
| logger.info("โ HuggingFace Hub available with authentication") | |
| else: | |
| logger.warning("โ ๏ธ HuggingFace Hub available but no token found (uploads may fail)") | |
| except ImportError: | |
| logger.warning("โ ๏ธ huggingface_hub not available, uploads will be skipped") | |
| self.has_hf = False | |
| def sync_on_startup(self, loras_dir: Path, datasets_dir: Path = None) -> Dict: | |
| """ | |
| Sync LoRAs and datasets from HuggingFace dataset repo on startup | |
| Downloads missing LoRAs and datasets from the repo to local storage | |
| Args: | |
| loras_dir: Local directory for LoRA storage | |
| datasets_dir: Local directory for dataset storage (optional) | |
| Returns: | |
| Dict with sync results: {'loras': [...], 'datasets': [...], 'synced': count} | |
| """ | |
| if not self.has_hf: | |
| logger.debug("HF not available, skipping sync") | |
| return {'loras': [], 'datasets': [], 'synced': 0} | |
| try: | |
| # List LoRAs in dataset repo | |
| collection_loras = self.list_dataset_loras() | |
| if not collection_loras: | |
| logger.info("No LoRAs found in dataset repo") | |
| return {'loras': [], 'datasets': [], 'synced': 0} | |
| logger.info(f"Found {len(collection_loras)} LoRA(s) in dataset repo") | |
| # Check which ones are missing locally | |
| loras_dir.mkdir(parents=True, exist_ok=True) | |
| existing_loras = set(d.name for d in loras_dir.iterdir() if d.is_dir()) | |
| synced_count = 0 | |
| for lora in collection_loras: | |
| lora_name = lora['name'] | |
| # Handle name conflicts - add number suffix if needed | |
| final_name = lora_name | |
| counter = 1 | |
| while final_name in existing_loras: | |
| final_name = f"{lora_name}_{counter}" | |
| counter += 1 | |
| target_dir = loras_dir / final_name | |
| # Download if not present locally | |
| if not target_dir.exists(): | |
| logger.info(f"Downloading LoRA from dataset repo: {lora['path']}") | |
| if self.download_lora(lora['path'], target_dir): | |
| synced_count += 1 | |
| existing_loras.add(final_name) | |
| if final_name != lora_name: | |
| logger.info(f"Downloaded as '{final_name}' (name conflict resolved)") | |
| logger.info(f"Synced {synced_count} new LoRA(s) from dataset repo") | |
| return {'loras': collection_loras, 'datasets': [], 'synced': synced_count} | |
| except Exception as e: | |
| logger.error(f"Sync failed: {str(e)}", exc_info=True) | |
| return {'loras': [], 'datasets': [], 'synced': 0, 'error': str(e)} | |
| def list_dataset_loras(self) -> List[Dict[str, str]]: | |
| """ | |
| List all LoRA ZIP files stored in the dataset repo | |
| Returns: | |
| List of dicts with 'name' and 'path' | |
| """ | |
| if not self.has_hf: | |
| logger.debug("HF not available, skipping dataset list") | |
| return [] | |
| try: | |
| from huggingface_hub import list_repo_files | |
| # List all files in the loras/ folder | |
| files = list_repo_files( | |
| repo_id=self.repo_id, | |
| repo_type="dataset", | |
| token=self.token | |
| ) | |
| # Extract LoRA names from ZIP files in loras/ folder | |
| loras = [] | |
| for file in files: | |
| if file.startswith("loras/") and file.endswith(".zip"): | |
| # Extract name from "loras/name.zip" | |
| lora_name = file[6:-4] # Remove "loras/" and ".zip" | |
| loras.append({ | |
| 'name': lora_name, | |
| 'path': f"loras/{lora_name}" | |
| }) | |
| logger.info(f"Found {len(loras)} LoRA(s) in dataset repo") | |
| return loras | |
| except Exception as e: | |
| logger.error(f"Failed to list dataset LoRAs: {e}") | |
| return [] | |
| def download_lora(self, lora_path: str, target_dir: Path) -> bool: | |
| """ | |
| Download a LoRA ZIP file from dataset repo and extract it | |
| Args: | |
| lora_path: Path within dataset repo (e.g., "loras/jazz-v1") | |
| target_dir: Local directory to extract to | |
| Returns: | |
| True if successful | |
| """ | |
| if not self.has_hf: | |
| logger.debug("HF not available, skipping download") | |
| return False | |
| try: | |
| from huggingface_hub import hf_hub_download | |
| import zipfile | |
| import tempfile | |
| # Expect ZIP file | |
| lora_name = lora_path.split('/')[-1] | |
| zip_filename = f"loras/{lora_name}.zip" | |
| logger.info(f"Downloading LoRA ZIP from {self.repo_id}/{zip_filename}...") | |
| # Download ZIP file to temp location | |
| zip_path = hf_hub_download( | |
| repo_id=self.repo_id, | |
| repo_type="dataset", | |
| filename=zip_filename, | |
| token=self.token | |
| ) | |
| # Extract to target directory | |
| target_dir.mkdir(parents=True, exist_ok=True) | |
| with zipfile.ZipFile(zip_path, 'r') as zipf: | |
| zipf.extractall(target_dir) | |
| logger.info(f"โ Downloaded and extracted LoRA to {target_dir}") | |
| return True | |
| except Exception as e: | |
| logger.error(f"Failed to download LoRA: {e}") | |
| return False | |
| def upload_lora(self, lora_dir: Path, training_config: Optional[Dict] = None) -> Optional[Dict]: | |
| """ | |
| Upload a LoRA adapter as a ZIP file to HuggingFace dataset repo | |
| Args: | |
| lora_dir: Local LoRA directory | |
| training_config: Optional training configuration dict | |
| Returns: | |
| Dict with repo_id and url if successful, None otherwise | |
| """ | |
| if not self.has_hf: | |
| logger.info(f"๐พ LoRA saved locally: {lora_dir.name}") | |
| return None | |
| if not self.token: | |
| logger.warning("โ ๏ธ No HuggingFace token found - cannot upload") | |
| logger.info("๐ก To enable uploads: Log in to HuggingFace or set HF_TOKEN environment variable") | |
| logger.info(f"๐พ LoRA saved locally: {lora_dir.name}") | |
| return None | |
| try: | |
| from huggingface_hub import upload_file | |
| import zipfile | |
| import tempfile | |
| lora_name = lora_dir.name | |
| logger.info(f"๐ค Creating ZIP and uploading LoRA to dataset repo: {self.repo_id}/loras/{lora_name}.zip...") | |
| # Create README.md for the LoRA | |
| readme_content = self._generate_lora_readme(lora_name, training_config) | |
| readme_path = lora_dir / "README.md" | |
| with open(readme_path, 'w', encoding='utf-8') as f: | |
| f.write(readme_content) | |
| # Create ZIP file | |
| with tempfile.NamedTemporaryFile(mode='wb', suffix='.zip', delete=False) as tmp_file: | |
| zip_path = tmp_file.name | |
| try: | |
| with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf: | |
| for file_path in lora_dir.rglob('*'): | |
| if file_path.is_file(): | |
| arcname = file_path.relative_to(lora_dir) | |
| zipf.write(file_path, arcname) | |
| # Upload ZIP file to loras/ folder in dataset repo | |
| upload_file( | |
| repo_id=self.repo_id, | |
| repo_type="dataset", | |
| path_or_fileobj=zip_path, | |
| path_in_repo=f"loras/{lora_name}.zip", | |
| commit_message=f"Upload LEMM LoRA adapter: {lora_name}", | |
| token=self.token | |
| ) | |
| finally: | |
| # Clean up temp file | |
| import os | |
| if os.path.exists(zip_path): | |
| os.unlink(zip_path) | |
| logger.info(f"โ Uploaded LoRA: {self.repo_id}/loras/{lora_name}.zip") | |
| logger.info(f"๐ View at: https://huggingface.co/datasets/{self.repo_id}/blob/main/loras/{lora_name}.zip") | |
| return { | |
| 'repo_id': f"{self.repo_id}/loras/{lora_name}.zip", | |
| 'url': f"https://huggingface.co/datasets/{self.repo_id}/blob/main/loras/{lora_name}.zip", | |
| 'dataset_repo': f"https://huggingface.co/datasets/{self.repo_id}" | |
| } | |
| except Exception as e: | |
| logger.error(f"Failed to upload LoRA: {e}") | |
| logger.info(f"๐พ LoRA saved locally: {lora_dir.name}") | |
| return None | |
| def _generate_lora_readme(self, lora_name: str, config: Optional[Dict] = None) -> str: | |
| """Generate README.md content for a LoRA model""" | |
| config_info = "" | |
| if config: | |
| config_info = f""" | |
| ## Training Configuration | |
| - **Dataset**: {config.get('dataset', 'N/A')} | |
| - **Epochs**: {config.get('epochs', 'N/A')} | |
| - **Learning Rate**: {config.get('learning_rate', 'N/A')} | |
| - **Batch Size**: {config.get('batch_size', 'N/A')} | |
| - **LoRA Rank**: {config.get('lora_rank', 'N/A')} | |
| """ | |
| return f"""--- | |
| license: mit | |
| tags: | |
| - lora | |
| - music-generation | |
| - diffrhythm2 | |
| - lemm | |
| library_name: diffusers | |
| --- | |
| # LEMM LoRA: {lora_name} | |
| This is a LoRA (Low-Rank Adaptation) adapter for DiffRhythm2 music generation, trained using LEMM (Let Everyone Make Music). | |
| ## About LEMM | |
| LEMM is an advanced AI music generation system that allows you to: | |
| - Generate high-quality music with built-in vocals | |
| - Train custom LoRA adapters for specific styles | |
| - Fine-tune models on your own datasets | |
| ๐ต **Try it**: [LEMM Space](https://huggingface.co/spaces/Gamahea/lemm-test-100) | |
| {config_info} | |
| ## How to Use | |
| ### In LEMM Space | |
| 1. Visit [LEMM](https://huggingface.co/spaces/Gamahea/lemm-test-100) | |
| 2. Go to "LoRA Management" tab | |
| 3. Enter this model ID: `{self.username}/lemm-lora-{lora_name}` | |
| 4. Click "Download from Hub" | |
| 5. Use in generation or as base for continued training | |
| ### In Your Code | |
| ```python | |
| from pathlib import Path | |
| from huggingface_hub import snapshot_download | |
| # Download LoRA | |
| lora_path = snapshot_download( | |
| repo_id="{self.username}/lemm-lora-{lora_name}", | |
| local_dir="./loras/{lora_name}" | |
| ) | |
| # Load and use with DiffRhythm2 | |
| # (See LEMM documentation for integration) | |
| ``` | |
| ## Model Files | |
| - `final_model.pt` - Trained LoRA weights | |
| - `config.yaml` - Training configuration | |
| - `README.md` - This file | |
| ## Dataset Repository | |
| Part of the [LEMM Training Data Repository](https://huggingface.co/datasets/{self.repo_id}) | |
| ## License | |
| MIT License - Free to use and modify | |
| """ | |
| def upload_dataset(self, dataset_dir: Path, dataset_info: Optional[Dict] = None) -> Optional[Dict]: | |
| """ | |
| Upload a prepared dataset as ZIP file to HF dataset repo | |
| Args: | |
| dataset_dir: Local dataset directory | |
| dataset_info: Optional dataset metadata | |
| Returns: | |
| Dict with upload results or None if failed | |
| """ | |
| if not self.has_hf: | |
| logger.info(f"๐พ Dataset saved locally: {dataset_dir.name}") | |
| return None | |
| if not self.token: | |
| logger.warning("โ ๏ธ No HuggingFace token found - cannot upload dataset") | |
| logger.info(f"๐พ Dataset saved locally: {dataset_dir.name}") | |
| return None | |
| try: | |
| from huggingface_hub import upload_file | |
| import zipfile | |
| import tempfile | |
| dataset_name = dataset_dir.name | |
| logger.info(f"๐ค Creating ZIP and uploading dataset to repo: {self.repo_id}/datasets/{dataset_name}.zip...") | |
| # Create ZIP file | |
| with tempfile.NamedTemporaryFile(mode='wb', suffix='.zip', delete=False) as tmp_file: | |
| zip_path = tmp_file.name | |
| try: | |
| with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf: | |
| for file_path in dataset_dir.rglob('*'): | |
| if file_path.is_file(): | |
| arcname = file_path.relative_to(dataset_dir) | |
| zipf.write(file_path, arcname) | |
| # Upload ZIP to datasets/ folder in dataset repo | |
| upload_file( | |
| repo_id=self.repo_id, | |
| repo_type="dataset", | |
| path_or_fileobj=zip_path, | |
| path_in_repo=f"datasets/{dataset_name}.zip", | |
| commit_message=f"Upload prepared dataset: {dataset_name}", | |
| token=self.token | |
| ) | |
| finally: | |
| # Clean up temp file | |
| import os | |
| if os.path.exists(zip_path): | |
| os.unlink(zip_path) | |
| logger.info(f"โ Uploaded dataset: {self.repo_id}/datasets/{dataset_name}.zip") | |
| return { | |
| 'repo_id': f"{self.repo_id}/datasets/{dataset_name}.zip", | |
| 'url': f"https://huggingface.co/datasets/{self.repo_id}/blob/main/datasets/{dataset_name}.zip", | |
| 'dataset_repo': f"https://huggingface.co/datasets/{self.repo_id}" | |
| } | |
| except Exception as e: | |
| logger.error(f"Failed to upload dataset: {e}") | |
| logger.info(f"๐พ Dataset saved locally: {dataset_dir.name}") | |
| return None | |