Spaces:

Gamahea
/

lemm-test-100

Running on Zero

App Files Files Community

Gamahea commited on 6 days ago

Commit

9ce6733

verified ·

1 Parent(s): 63a147f

Force upload clean hf_storage_service.py - fix syntax error

Browse files

Files changed (1) hide show

backend/services/hf_storage_service.py +415 -0

backend/services/hf_storage_service.py ADDED Viewed

	@@ -0,0 +1,415 @@

+"""
+HuggingFace Collection Storage Service
+Uploads LoRA adapters as individual models to HuggingFace Hub
+Models can be added to the LEMM collection for organization
+"""
+import os
+import logging
+from pathlib import Path
+from typing import List, Dict, Optional
+import shutil
+import yaml
+logger = logging.getLogger(__name__)
+class HFStorageService:
+    """Service for uploading LoRAs as models to HuggingFace Hub"""
+    def __init__(self, username: str = "Gamahea", dataset_repo: str = "lemmdata"):
+        """
+        Initialize HF storage service
+        Args:
+            username: HuggingFace username
+            dataset_repo: Dataset repository name for storing training artifacts
+        """
+        self.username = username
+        self.dataset_repo = dataset_repo
+        self.repo_id = f"{username}/{dataset_repo}"
+        self.local_cache = Path("hf_cache")
+        self.local_cache.mkdir(exist_ok=True)
+        logger.info(f"HF Storage initialized for user: {username}")
+        logger.info(f"Dataset Repo: https://huggingface.co/datasets/{self.repo_id}")
+        # Get HF token from environment
+        self.token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")
+        # Try to import huggingface_hub
+        try:
+            from huggingface_hub import HfApi
+            self.api = HfApi(token=self.token) if self.token else HfApi()
+            self.has_hf = True
+            if self.token:
+                logger.info("✅ HuggingFace Hub available with authentication")
+            else:
+                logger.warning("⚠️ HuggingFace Hub available but no token found (uploads may fail)")
+        except ImportError:
+            logger.warning("⚠️ huggingface_hub not available, uploads will be skipped")
+            self.has_hf = False
+    def sync_on_startup(self, loras_dir: Path, datasets_dir: Path = None) -> Dict:
+        """
+        Sync LoRAs and datasets from HuggingFace dataset repo on startup
+        Downloads missing LoRAs and datasets from the repo to local storage
+        Args:
+            loras_dir: Local directory for LoRA storage
+            datasets_dir: Local directory for dataset storage (optional)
+        Returns:
+            Dict with sync results: {'loras': [...], 'datasets': [...], 'synced': count}
+        """
+        if not self.has_hf:
+            logger.debug("HF not available, skipping sync")
+            return {'loras': [], 'datasets': [], 'synced': 0}
+        try:
+            # List LoRAs in dataset repo
+            collection_loras = self.list_dataset_loras()
+            if not collection_loras:
+                logger.info("No LoRAs found in dataset repo")
+                return {'loras': [], 'datasets': [], 'synced': 0}
+            logger.info(f"Found {len(collection_loras)} LoRA(s) in dataset repo")
+            # Check which ones are missing locally
+            loras_dir.mkdir(parents=True, exist_ok=True)
+            existing_loras = set(d.name for d in loras_dir.iterdir() if d.is_dir())
+            synced_count = 0
+            for lora in collection_loras:
+                lora_name = lora['name']
+                # Handle name conflicts - add number suffix if needed
+                final_name = lora_name
+                counter = 1
+                while final_name in existing_loras:
+                    final_name = f"{lora_name}_{counter}"
+                    counter += 1
+                target_dir = loras_dir / final_name
+                # Download if not present locally
+                if not target_dir.exists():
+                    logger.info(f"Downloading LoRA from dataset repo: {lora['path']}")
+                    if self.download_lora(lora['path'], target_dir):
+                        synced_count += 1
+                        existing_loras.add(final_name)
+                        if final_name != lora_name:
+                            logger.info(f"Downloaded as '{final_name}' (name conflict resolved)")
+            logger.info(f"Synced {synced_count} new LoRA(s) from dataset repo")
+            return {'loras': collection_loras, 'datasets': [], 'synced': synced_count}
+        except Exception as e:
+            logger.error(f"Sync failed: {str(e)}", exc_info=True)
+            return {'loras': [], 'datasets': [], 'synced': 0, 'error': str(e)}
+    def list_dataset_loras(self) -> List[Dict[str, str]]:
+        """
+        List all LoRA ZIP files stored in the dataset repo
+        Returns:
+            List of dicts with 'name' and 'path'
+        """
+        if not self.has_hf:
+            logger.debug("HF not available, skipping dataset list")
+            return []
+        try:
+            from huggingface_hub import list_repo_files
+            # List all files in the loras/ folder
+            files = list_repo_files(
+                repo_id=self.repo_id,
+                repo_type="dataset",
+                token=self.token
+            )
+            # Extract LoRA names from ZIP files in loras/ folder
+            loras = []
+            for file in files:
+                if file.startswith("loras/") and file.endswith(".zip"):
+                    # Extract name from "loras/name.zip"
+                    lora_name = file[6:-4]  # Remove "loras/" and ".zip"
+                    loras.append({
+                        'name': lora_name,
+                        'path': f"loras/{lora_name}"
+                    })
+            logger.info(f"Found {len(loras)} LoRA(s) in dataset repo")
+            return loras
+        except Exception as e:
+            logger.error(f"Failed to list dataset LoRAs: {e}")
+            return []
+    def download_lora(self, lora_path: str, target_dir: Path) -> bool:
+        """
+        Download a LoRA ZIP file from dataset repo and extract it
+        Args:
+            lora_path: Path within dataset repo (e.g., "loras/jazz-v1")
+            target_dir: Local directory to extract to
+        Returns:
+            True if successful
+        """
+        if not self.has_hf:
+            logger.debug("HF not available, skipping download")
+            return False
+        try:
+            from huggingface_hub import hf_hub_download
+            import zipfile
+            import tempfile
+            # Expect ZIP file
+            lora_name = lora_path.split('/')[-1]
+            zip_filename = f"loras/{lora_name}.zip"
+            logger.info(f"Downloading LoRA ZIP from {self.repo_id}/{zip_filename}...")
+            # Download ZIP file to temp location
+            zip_path = hf_hub_download(
+                repo_id=self.repo_id,
+                repo_type="dataset",
+                filename=zip_filename,
+                token=self.token
+            )
+            # Extract to target directory
+            target_dir.mkdir(parents=True, exist_ok=True)
+            with zipfile.ZipFile(zip_path, 'r') as zipf:
+                zipf.extractall(target_dir)
+            logger.info(f"✅ Downloaded and extracted LoRA to {target_dir}")
+            return True
+        except Exception as e:
+            logger.error(f"Failed to download LoRA: {e}")
+            return False
+    def upload_lora(self, lora_dir: Path, training_config: Optional[Dict] = None) -> Optional[Dict]:
+        """
+        Upload a LoRA adapter as a ZIP file to HuggingFace dataset repo
+        Args:
+            lora_dir: Local LoRA directory
+            training_config: Optional training configuration dict
+        Returns:
+            Dict with repo_id and url if successful, None otherwise
+        """
+        if not self.has_hf:
+            logger.info(f"💾 LoRA saved locally: {lora_dir.name}")
+            return None
+        if not self.token:
+            logger.warning("⚠️ No HuggingFace token found - cannot upload")
+            logger.info("💡 To enable uploads: Log in to HuggingFace or set HF_TOKEN environment variable")
+            logger.info(f"💾 LoRA saved locally: {lora_dir.name}")
+            return None
+        try:
+            from huggingface_hub import upload_file
+            import zipfile
+            import tempfile
+            lora_name = lora_dir.name
+            logger.info(f"📤 Creating ZIP and uploading LoRA to dataset repo: {self.repo_id}/loras/{lora_name}.zip...")
+            # Create README.md for the LoRA
+            readme_content = self._generate_lora_readme(lora_name, training_config)
+            readme_path = lora_dir / "README.md"
+            with open(readme_path, 'w', encoding='utf-8') as f:
+                f.write(readme_content)
+            # Create ZIP file
+            with tempfile.NamedTemporaryFile(mode='wb', suffix='.zip', delete=False) as tmp_file:
+                zip_path = tmp_file.name
+            try:
+                with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
+                    for file_path in lora_dir.rglob('*'):
+                        if file_path.is_file():
+                            arcname = file_path.relative_to(lora_dir)
+                            zipf.write(file_path, arcname)
+                # Upload ZIP file to loras/ folder in dataset repo
+                upload_file(
+                    repo_id=self.repo_id,
+                    repo_type="dataset",
+                    path_or_fileobj=zip_path,
+                    path_in_repo=f"loras/{lora_name}.zip",
+                    commit_message=f"Upload LEMM LoRA adapter: {lora_name}",
+                    token=self.token
+                )
+            finally:
+                # Clean up temp file
+                import os
+                if os.path.exists(zip_path):
+                    os.unlink(zip_path)
+            logger.info(f"✅ Uploaded LoRA: {self.repo_id}/loras/{lora_name}.zip")
+            logger.info(f"🔗 View at: https://huggingface.co/datasets/{self.repo_id}/blob/main/loras/{lora_name}.zip")
+            return {
+                'repo_id': f"{self.repo_id}/loras/{lora_name}.zip",
+                'url': f"https://huggingface.co/datasets/{self.repo_id}/blob/main/loras/{lora_name}.zip",
+                'dataset_repo': f"https://huggingface.co/datasets/{self.repo_id}"
+            }
+        except Exception as e:
+            logger.error(f"Failed to upload LoRA: {e}")
+            logger.info(f"💾 LoRA saved locally: {lora_dir.name}")
+            return None
+    def _generate_lora_readme(self, lora_name: str, config: Optional[Dict] = None) -> str:
+        """Generate README.md content for a LoRA model"""
+        config_info = ""
+        if config:
+            config_info = f"""
+## Training Configuration
+- **Dataset**: {config.get('dataset', 'N/A')}
+- **Epochs**: {config.get('epochs', 'N/A')}
+- **Learning Rate**: {config.get('learning_rate', 'N/A')}
+- **Batch Size**: {config.get('batch_size', 'N/A')}
+- **LoRA Rank**: {config.get('lora_rank', 'N/A')}
+"""
+        return f"""---
+license: mit
+tags:
+- lora
+- music-generation
+- diffrhythm2
+- lemm
+library_name: diffusers
+---
+# LEMM LoRA: {lora_name}
+This is a LoRA (Low-Rank Adaptation) adapter for DiffRhythm2 music generation, trained using LEMM (Let Everyone Make Music).
+## About LEMM
+LEMM is an advanced AI music generation system that allows you to:
+- Generate high-quality music with built-in vocals
+- Train custom LoRA adapters for specific styles
+- Fine-tune models on your own datasets
+🎵 **Try it**: [LEMM Space](https://huggingface.co/spaces/Gamahea/lemm-test-100)
+{config_info}
+## How to Use
+### In LEMM Space
+1. Visit [LEMM](https://huggingface.co/spaces/Gamahea/lemm-test-100)
+2. Go to "LoRA Management" tab
+3. Enter this model ID: `{self.username}/lemm-lora-{lora_name}`
+4. Click "Download from Hub"
+5. Use in generation or as base for continued training
+### In Your Code
+```python
+from pathlib import Path
+from huggingface_hub import snapshot_download
+# Download LoRA
+lora_path = snapshot_download(
+    repo_id="{self.username}/lemm-lora-{lora_name}",
+    local_dir="./loras/{lora_name}"
+)
+# Load and use with DiffRhythm2
+# (See LEMM documentation for integration)
+```
+## Model Files
+- `final_model.pt` - Trained LoRA weights
+- `config.yaml` - Training configuration
+- `README.md` - This file
+## Dataset Repository
+Part of the [LEMM Training Data Repository](https://huggingface.co/datasets/{self.repo_id})
+## License
+MIT License - Free to use and modify
+"""
+    def upload_dataset(self, dataset_dir: Path, dataset_info: Optional[Dict] = None) -> Optional[Dict]:
+        """
+        Upload a prepared dataset as ZIP file to HF dataset repo
+        Args:
+            dataset_dir: Local dataset directory
+            dataset_info: Optional dataset metadata
+        Returns:
+            Dict with upload results or None if failed
+        """
+        if not self.has_hf:
+            logger.info(f"💾 Dataset saved locally: {dataset_dir.name}")
+            return None
+        if not self.token:
+            logger.warning("⚠️ No HuggingFace token found - cannot upload dataset")
+            logger.info(f"💾 Dataset saved locally: {dataset_dir.name}")
+            return None
+        try:
+            from huggingface_hub import upload_file
+            import zipfile
+            import tempfile
+            dataset_name = dataset_dir.name
+            logger.info(f"📤 Creating ZIP and uploading dataset to repo: {self.repo_id}/datasets/{dataset_name}.zip...")
+            # Create ZIP file
+            with tempfile.NamedTemporaryFile(mode='wb', suffix='.zip', delete=False) as tmp_file:
+                zip_path = tmp_file.name
+            try:
+                with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
+                    for file_path in dataset_dir.rglob('*'):
+                        if file_path.is_file():
+                            arcname = file_path.relative_to(dataset_dir)
+                            zipf.write(file_path, arcname)
+                # Upload ZIP to datasets/ folder in dataset repo
+                upload_file(
+                    repo_id=self.repo_id,
+                    repo_type="dataset",
+                    path_or_fileobj=zip_path,
+                    path_in_repo=f"datasets/{dataset_name}.zip",
+                    commit_message=f"Upload prepared dataset: {dataset_name}",
+                    token=self.token
+                )
+            finally:
+                # Clean up temp file
+                import os
+                if os.path.exists(zip_path):
+                    os.unlink(zip_path)
+            logger.info(f"✅ Uploaded dataset: {self.repo_id}/datasets/{dataset_name}.zip")
+            return {
+                'repo_id': f"{self.repo_id}/datasets/{dataset_name}.zip",
+                'url': f"https://huggingface.co/datasets/{self.repo_id}/blob/main/datasets/{dataset_name}.zip",
+                'dataset_repo': f"https://huggingface.co/datasets/{self.repo_id}"
+            }
+        except Exception as e:
+            logger.error(f"Failed to upload dataset: {e}")
+            logger.info(f"💾 Dataset saved locally: {dataset_dir.name}")
+            return None