Gamahea commited on
Commit
63a147f
·
verified ·
1 Parent(s): 1262e7a

Delete corrupted file

Browse files
backend/services/hf_storage_service.py DELETED
@@ -1,359 +0,0 @@
1
- """
2
- HuggingFace Dataset Repository Storage Service
3
- Uploads LoRA adapters and datasets as ZIP files to centralized dataset repo
4
- """
5
- import os
6
- import logging
7
- from pathlib import Path
8
- from typing import List, Dict, Optional
9
- import shutil
10
- import yaml
11
-
12
- logger = logging.getLogger(__name__)
13
-
14
- class HFStorageService:
15
- """Service for uploading LoRAs as models to HuggingFace Hub"""
16
-
17
- def __init__(self, username: str = "Gamahea", dataset_repo: str = "lemmdata"):
18
- """
19
- Initialize HF storage service
20
-
21
- Args:
22
- username: HuggingFace username
23
- dataset_repo: Dataset repository name for storing training artifacts
24
- """
25
- self.username = username
26
- self.dataset_repo = dataset_repo
27
- self.repo_id = f"{username}/{dataset_repo}"
28
- self.local_cache = Path("hf_cache")
29
- self.local_cache.mkdir(exist_ok=True)
30
-
31
- logger.info(f"HF Storage initialized for user: {username}")
32
- logger.info(f"Dataset Repo: https://huggingface.co/datasets/{self.repo_id}")
33
-
34
- # Get HF token from environment
35
- self.token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")
36
-
37
- # Try to import huggingface_hub
38
- try:
39
- from huggingface_hub import HfApi
40
- self.api = HfApi(token=self.token) if self.token else HfApi()
41
- self.has_hf = True
42
- if self.token:
43
- logger.info("✅ HuggingFace Hub available with authentication")
44
- else:
45
- logger.warning("⚠️ HuggingFace Hub available but no token found (uploads may fail)")
46
- except ImportError:
47
- logger.warning("⚠️ huggingface_hub not available, uploads will be skipped")
48
- self.has_hf = False
49
-
50
- def sync_on_startup(self, loras_dir: Path, datasets_dir: Path = None) -> Dict:
51
- """
52
- Sync LoRAs and datasets from HuggingFace dataset repo on startup
53
- Downloads missing LoRAs and datasets from the repo to local storage
54
-
55
- Args:
56
- loras_dir: Local directory for LoRA storage
57
- datasets_dir: Local directory for dataset storage (optional)
58
-
59
- Returns:
60
- Dict with sync results: {'loras': [...], 'datasets': [...], 'synced': count}
61
- """
62
- if not self.has_hf:
63
- logger.debug("HF not available, skipping sync")
64
- return {'loras': [], 'datasets': [], 'synced': 0}
65
-
66
- try:
67
- # List LoRAs in dataset repo
68
- collection_loras = self.list_dataset_loras()
69
-
70
- if not collection_loras:
71
- logger.info("No LoRAs found in dataset repo")
72
- return {'loras': [], 'datasets': [], 'synced': 0}
73
-
74
- logger.info(f"Found {len(collection_loras)} LoRA(s) in dataset repo")
75
-
76
- # Check which ones are missing locally
77
- loras_dir.mkdir(parents=True, exist_ok=True)
78
- existing_loras = set(d.name for d in loras_dir.iterdir() if d.is_dir())
79
-
80
- synced_count = 0
81
- for lora in collection_loras:
82
- lora_name = lora['name']
83
-
84
- # Handle name conflicts - add number suffix if needed
85
- final_name = lora_name
86
- counter = 1
87
- while final_name in existing_loras:
88
- final_name = f"{lora_name}_{counter}"
89
- counter += 1
90
-
91
- target_dir = loras_dir / final_name
92
-
93
- # Download if not present locally
94
- if not target_dir.exists():
95
- logger.info(f"Downloading LoRA from dataset repo: {lora['path']}")
96
- if self.download_lora(lora['path'], target_dir):
97
- synced_count += 1
98
- existing_loras.add(final_name)
99
- if final_name != lora_name:
100
- logger.info(f"Downloaded as '{final_name}' (name conflict resolved)")
101
-
102
- logger.info(f"Synced {synced_count} new LoRA(s) from dataset repo")
103
- return {'loras': collection_loras, 'datasets': [], 'synced': synced_count}
104
-
105
- except Exception as e:
106
- logger.error(f"Sync failed: {str(e)}", exc_info=True)
107
- return {'loras': [], 'datasets': [], 'synced': 0, 'error': str(e)}
108
-
109
- def list_dataset_loras(self) -> List[Dict[str, str]]:
110
- """
111
- List all LoRA ZIP files stored in the dataset repo
112
-
113
- Returns:
114
- List of dicts with 'name' and 'path'
115
- """
116
- if not self.has_hf:
117
- logger.debug("HF not available, skipping dataset list")
118
- return []
119
-
120
- try:
121
- from huggingface_hub import list_repo_files
122
-
123
- # List all files in the loras/ folder
124
- files = list_repo_files(
125
- repo_id=self.repo_id,
126
- repo_type="dataset",
127
- token=self.token
128
- )
129
-
130
- # Extract LoRA names from ZIP files in loras/ folder
131
- loras = []
132
- for file in files:
133
- if file.startswith("loras/") and file.endswith(".zip"):
134
- # Extract name from "loras/name.zip"
135
- lora_name = file[6:-4] # Remove "loras/" and ".zip"
136
- loras.append({
137
- 'name': lora_name,
138
- 'path': f"loras/{lora_name}"
139
- })
140
-
141
- logger.info(f"Found {len(loras)} LoRA(s) in dataset repo")
142
- return loras
143
-
144
- except Exception as e:
145
- logger.error(f"Failed to list dataset LoRAs: {e}")
146
- return []
147
-
148
- def download_lora(self, lora_path: str, target_dir: Path) -> bool:
149
- """
150
- Download a LoRA ZIP file from dataset repo and extract it
151
-
152
- Args:
153
- lora_path: Path within dataset repo (e.g., "loras/jazz-v1")
154
- target_dir: Local directory to extract to
155
-
156
- Returns:
157
- True if successful
158
- """
159
- if not self.has_hf:
160
- logger.debug("HF not available, skipping download")
161
- return False
162
-
163
- try:
164
- from huggingface_hub import hf_hub_download
165
- import zipfile
166
- import tempfile
167
-
168
- # Expect ZIP file
169
- lora_name = lora_path.split('/')[-1]
170
- zip_filename = f"loras/{lora_name}.zip"
171
-
172
- logger.info(f"Downloading LoRA ZIP from {self.repo_id}/{zip_filename}...")
173
-
174
- # Download ZIP file to temp location
175
- zip_path = hf_hub_download(
176
- repo_id=self.repo_id,
177
- repo_type="dataset",
178
- filename=zip_filename,
179
- token=self.token
180
- )
181
-
182
- # Extract to target directory
183
- target_dir.mkdir(parents=True, exist_ok=True)
184
-
185
- with zipfile.ZipFile(zip_path, 'r') as zipf:
186
- zipf.extractall(target_dir)
187
-
188
- logger.info(f"✅ Downloaded and extracted LoRA to {target_dir}")
189
- return True
190
-
191
- except Exception as e:
192
- logger.error(f"Failed to download LoRA: {e}")
193
- return False
194
-
195
- def upload_lora(self, lora_dir: Path, training_config: Optional[Dict] = None) -> Optional[Dict]:
196
- """
197
- Upload a LoRA adapter as a ZIP file to HuggingFace dataset repo
198
-
199
- Args:
200
- lora_dir: Local LoRA directory
201
- training_config: Optional training configuration dict
202
-
203
- Returns:
204
- Dict with repo_id and url if successful, None otherwise
205
- """
206
- if not self.has_hf:
207
- logger.info(f"💾 LoRA saved locally: {lora_dir.name}")
208
- return None
209
-
210
- if not self.token:
211
- logger.warning("⚠️ No HuggingFace token found - cannot upload")
212
- logger.info("💡 To enable uploads: Log in to HuggingFace or set HF_TOKEN environment variable")
213
- logger.info(f"💾 LoRA saved locally: {lora_dir.name}")
214
- return None
215
-
216
- try:
217
- from huggingface_hub import upload_file
218
- import zipfile
219
- import tempfile
220
-
221
- lora_name = lora_dir.name
222
-
223
- logger.info(f"📤 Creating ZIP and uploading LoRA to dataset repo: {self.repo_id}/loras/{lora_name}.zip...")
224
-
225
- # Create README.md for the LoRA
226
- readme_content = self._generate_lora_readme(lora_name, training_config)
227
- readme_path = lora_dir / "README.md"
228
- with open(readme_path, 'w', encoding='utf-8') as f:
229
- f.write(readme_content)
230
-
231
- # Create ZIP file
232
- with tempfile.NamedTemporaryFile(mode='wb', suffix='.zip', delete=False) as tmp_file:
233
- zip_path = tmp_file.name
234
-
235
- try:
236
- with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
237
- for file_path in lora_dir.rglob('*'):
238
- if file_path.is_file():
239
- arcname = file_path.relative_to(lora_dir)
240
- zipf.write(file_path, arcname)
241
-
242
- # Upload ZIP file to loras/ folder in dataset repo
243
- upload_file(
244
- repo_id=self.repo_id,
245
- repo_type="dataset",
246
- path_or_fileobj=zip_path,
247
- path_in_repo=f"loras/{lora_name}.zip",
248
- commit_message=f"Upload LEMM LoRA adapter: {lora_name}",
249
- token=self.token
250
- )
251
- finally:
252
- # Clean up temp file
253
- import os
254
- if os.path.exists(zip_path):
255
- os.unlink(zip_path)
256
-
257
- logger.info(f"✅ Uploaded LoRA: {self.repo_id}/loras/{lora_name}.zip")
258
- logger.info(f"🔗 View at: https://huggingface.co/datasets/{self.repo_id}/blob/main/loras/{lora_name}.zip")
259
-
260
- return {
261
- 'repo_id': f"{self.repo_id}/loras/{lora_name}.zip",
262
- 'url': f"https://huggingface.co/datasets/{self.repo_id}/blob/main/loras/{lora_name}.zip",
263
- 'dataset_repo': f"https://huggingface.co/datasets/{self.repo_id}"
264
- }
265
-
266
- except Exception as e:
267
- logger.error(f"Failed to upload LoRA: {e}")
268
- logger.info(f"💾 LoRA saved locally: {lora_dir.name}")
269
- return None
270
-
271
- def _generate_lora_readme(self, lora_name: str, config: Optional[Dict] = None) -> str:
272
- """Generate README.md content for a LoRA model"""
273
-
274
- config_info = ""
275
- if config:
276
- config_info = f"""
277
- ## Training Configuration
278
-
279
- - **Dataset**: {config.get('dataset', 'N/A')}
280
- - **Epochs**: {config.get('epochs', 'N/A')}
281
- - **Learning Rate**: {config.get('learning_rate', 'N/A')}
282
- - **Batch Size**: {config.get('batch_size', 'N/A')}
283
- - **LoRA Rank**: {config.get('lora_rank', 'N/A')}
284
- """
285
-
286
- return f"""---
287
- license: mit
288
- tags:
289
- - lora
290
- - music-generation
291
- - diffrhythm2
292
- - lemm
293
- library_name: diffusers
294
- ---
295
-
296
- # LEMM LoRA: {lora_name}
297
-
298
- This is a LoRA (Low-Rank Adaptation) adapter for DiffRhythm2 music generation, trained using LEMM (Let Everyone Make Music).
299
-
300
- ## About LEMM
301
-
302
- LEMM is an advanced AI music generation system that allows you to:
303
- - Generate high-quality music with built-in vocals
304
- - Train custom LoRA adapters for specific styles
305
- - Fine-tune models on your own datasets
306
-
307
- 🎵 **Try it**: [LEMM Space](https://huggingface.co/spaces/Gamahea/lemm-test-100)
308
- {config_info}
309
- ## How to Use
310
-
311
- ### In LEMM Space
312
- 1. Visit [LEMM](https://huggingface.co/spaces/Gamahea/lemm-test-100)
313
- 2. Go to "LoRA Management" tab
314
- 3. Enter this model ID: `{self.username}/lemm-lora-{lora_name}`
315
- 4. Click "Download from Hub"
316
- 5. Use in generation or as base for continued training
317
-
318
- ### In Your Code
319
- ```python
320
- from pathlib import Path
321
- from huggingface_hub import snapshot_download
322
-
323
- # Download LoRA
324
- lora_path = snapshot_download(
325
- repo_id="{self.username}/lemm-lora-{lora_name}",
326
- local_dir="./loras/{lora_name}"
327
- )
328
-
329
- # Load and use with DiffRhythm2
330
- # (See LEMM documentation for integration)
331
- ```
332
-
333
- ## Model Files
334
-
335
- - `final_model.pt` - Trained LoRA weights
336
- - `config.yaml` - Training configuration
337
- - `README.md` - This file
338
-
339
- ## Dataset Repository
340
-
341
- Part of the [LEMM Training Data Repository](https://huggingface.co/datasets/{self.repo_id})
342
-
343
- ## License
344
-
345
- MIT License - Free to use and modify
346
- """
347
-
348
- def upload_dataset(self, dataset_dir: Path, dataset_info: Optional[Dict] = None) -> Optional[Dict]:
349
- """
350
- Upload a prepared dataset to HF dataset repo
351
-
352
- Args:
353
- dataset_dir: Local dataset directory
354
- dataset_info: Optional dataset metadata
355
-
356
- Returns:
357
- Dict with upload results or None if failed
358
- \"\"\"\n if not self.has_hf:\n logger.info(f\"💾 Dataset saved locally: {dataset_dir.name}\")\n return None\n \n if not self.token:\n logger.warning(\"⚠️ No HuggingFace token found - cannot upload dataset\")\n logger.info(f\"💾 Dataset saved locally: {dataset_dir.name}\")\n return None\n \n try:\n from huggingface_hub import upload_folder\n \n dataset_name = dataset_dir.name\n \n logger.info(f\"📤 Uploading dataset to repo: {self.repo_id}/datasets/{dataset_name}...\")\n \n # Upload to datasets/ folder in dataset repo\n upload_folder(\n repo_id=self.repo_id,\n repo_type=\"dataset\",\n folder_path=str(dataset_dir),\n path_in_repo=f\"datasets/{dataset_name}\",\n commit_message=f\"Upload prepared dataset: {dataset_name}\",\n token=self.token\n )\n \n logger.info(f\"✅ Uploaded dataset: {self.repo_id}/datasets/{dataset_name}\")\n \n return {\n 'repo_id': f\"{self.repo_id}/datasets/{dataset_name}\",\n 'url': f\"https://huggingface.co/datasets/{self.repo_id}/tree/main/datasets/{dataset_name}\",\n 'dataset_repo': f\"https://huggingface.co/datasets/{self.repo_id}\"\n }\n \n except Exception as e:\n logger.error(f\"Failed to upload dataset: {e}\")\n logger.info(f\"💾 Dataset saved locally: {dataset_dir.name}\")\n return None
359
-