Gamahea commited on
Commit
9ce6733
Β·
verified Β·
1 Parent(s): 63a147f

Force upload clean hf_storage_service.py - fix syntax error

Browse files
backend/services/hf_storage_service.py ADDED
@@ -0,0 +1,415 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ HuggingFace Collection Storage Service
3
+ Uploads LoRA adapters as individual models to HuggingFace Hub
4
+ Models can be added to the LEMM collection for organization
5
+ """
6
+ import os
7
+ import logging
8
+ from pathlib import Path
9
+ from typing import List, Dict, Optional
10
+ import shutil
11
+ import yaml
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ class HFStorageService:
16
+ """Service for uploading LoRAs as models to HuggingFace Hub"""
17
+
18
+ def __init__(self, username: str = "Gamahea", dataset_repo: str = "lemmdata"):
19
+ """
20
+ Initialize HF storage service
21
+
22
+ Args:
23
+ username: HuggingFace username
24
+ dataset_repo: Dataset repository name for storing training artifacts
25
+ """
26
+ self.username = username
27
+ self.dataset_repo = dataset_repo
28
+ self.repo_id = f"{username}/{dataset_repo}"
29
+ self.local_cache = Path("hf_cache")
30
+ self.local_cache.mkdir(exist_ok=True)
31
+
32
+ logger.info(f"HF Storage initialized for user: {username}")
33
+ logger.info(f"Dataset Repo: https://huggingface.co/datasets/{self.repo_id}")
34
+
35
+ # Get HF token from environment
36
+ self.token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")
37
+
38
+ # Try to import huggingface_hub
39
+ try:
40
+ from huggingface_hub import HfApi
41
+ self.api = HfApi(token=self.token) if self.token else HfApi()
42
+ self.has_hf = True
43
+ if self.token:
44
+ logger.info("βœ… HuggingFace Hub available with authentication")
45
+ else:
46
+ logger.warning("⚠️ HuggingFace Hub available but no token found (uploads may fail)")
47
+ except ImportError:
48
+ logger.warning("⚠️ huggingface_hub not available, uploads will be skipped")
49
+ self.has_hf = False
50
+
51
+ def sync_on_startup(self, loras_dir: Path, datasets_dir: Path = None) -> Dict:
52
+ """
53
+ Sync LoRAs and datasets from HuggingFace dataset repo on startup
54
+ Downloads missing LoRAs and datasets from the repo to local storage
55
+
56
+ Args:
57
+ loras_dir: Local directory for LoRA storage
58
+ datasets_dir: Local directory for dataset storage (optional)
59
+
60
+ Returns:
61
+ Dict with sync results: {'loras': [...], 'datasets': [...], 'synced': count}
62
+ """
63
+ if not self.has_hf:
64
+ logger.debug("HF not available, skipping sync")
65
+ return {'loras': [], 'datasets': [], 'synced': 0}
66
+
67
+ try:
68
+ # List LoRAs in dataset repo
69
+ collection_loras = self.list_dataset_loras()
70
+
71
+ if not collection_loras:
72
+ logger.info("No LoRAs found in dataset repo")
73
+ return {'loras': [], 'datasets': [], 'synced': 0}
74
+
75
+ logger.info(f"Found {len(collection_loras)} LoRA(s) in dataset repo")
76
+
77
+ # Check which ones are missing locally
78
+ loras_dir.mkdir(parents=True, exist_ok=True)
79
+ existing_loras = set(d.name for d in loras_dir.iterdir() if d.is_dir())
80
+
81
+ synced_count = 0
82
+ for lora in collection_loras:
83
+ lora_name = lora['name']
84
+
85
+ # Handle name conflicts - add number suffix if needed
86
+ final_name = lora_name
87
+ counter = 1
88
+ while final_name in existing_loras:
89
+ final_name = f"{lora_name}_{counter}"
90
+ counter += 1
91
+
92
+ target_dir = loras_dir / final_name
93
+
94
+ # Download if not present locally
95
+ if not target_dir.exists():
96
+ logger.info(f"Downloading LoRA from dataset repo: {lora['path']}")
97
+ if self.download_lora(lora['path'], target_dir):
98
+ synced_count += 1
99
+ existing_loras.add(final_name)
100
+ if final_name != lora_name:
101
+ logger.info(f"Downloaded as '{final_name}' (name conflict resolved)")
102
+
103
+ logger.info(f"Synced {synced_count} new LoRA(s) from dataset repo")
104
+ return {'loras': collection_loras, 'datasets': [], 'synced': synced_count}
105
+
106
+ except Exception as e:
107
+ logger.error(f"Sync failed: {str(e)}", exc_info=True)
108
+ return {'loras': [], 'datasets': [], 'synced': 0, 'error': str(e)}
109
+
110
+ def list_dataset_loras(self) -> List[Dict[str, str]]:
111
+ """
112
+ List all LoRA ZIP files stored in the dataset repo
113
+
114
+ Returns:
115
+ List of dicts with 'name' and 'path'
116
+ """
117
+ if not self.has_hf:
118
+ logger.debug("HF not available, skipping dataset list")
119
+ return []
120
+
121
+ try:
122
+ from huggingface_hub import list_repo_files
123
+
124
+ # List all files in the loras/ folder
125
+ files = list_repo_files(
126
+ repo_id=self.repo_id,
127
+ repo_type="dataset",
128
+ token=self.token
129
+ )
130
+
131
+ # Extract LoRA names from ZIP files in loras/ folder
132
+ loras = []
133
+ for file in files:
134
+ if file.startswith("loras/") and file.endswith(".zip"):
135
+ # Extract name from "loras/name.zip"
136
+ lora_name = file[6:-4] # Remove "loras/" and ".zip"
137
+ loras.append({
138
+ 'name': lora_name,
139
+ 'path': f"loras/{lora_name}"
140
+ })
141
+
142
+ logger.info(f"Found {len(loras)} LoRA(s) in dataset repo")
143
+ return loras
144
+
145
+ except Exception as e:
146
+ logger.error(f"Failed to list dataset LoRAs: {e}")
147
+ return []
148
+
149
+ def download_lora(self, lora_path: str, target_dir: Path) -> bool:
150
+ """
151
+ Download a LoRA ZIP file from dataset repo and extract it
152
+
153
+ Args:
154
+ lora_path: Path within dataset repo (e.g., "loras/jazz-v1")
155
+ target_dir: Local directory to extract to
156
+
157
+ Returns:
158
+ True if successful
159
+ """
160
+ if not self.has_hf:
161
+ logger.debug("HF not available, skipping download")
162
+ return False
163
+
164
+ try:
165
+ from huggingface_hub import hf_hub_download
166
+ import zipfile
167
+ import tempfile
168
+
169
+ # Expect ZIP file
170
+ lora_name = lora_path.split('/')[-1]
171
+ zip_filename = f"loras/{lora_name}.zip"
172
+
173
+ logger.info(f"Downloading LoRA ZIP from {self.repo_id}/{zip_filename}...")
174
+
175
+ # Download ZIP file to temp location
176
+ zip_path = hf_hub_download(
177
+ repo_id=self.repo_id,
178
+ repo_type="dataset",
179
+ filename=zip_filename,
180
+ token=self.token
181
+ )
182
+
183
+ # Extract to target directory
184
+ target_dir.mkdir(parents=True, exist_ok=True)
185
+
186
+ with zipfile.ZipFile(zip_path, 'r') as zipf:
187
+ zipf.extractall(target_dir)
188
+
189
+ logger.info(f"βœ… Downloaded and extracted LoRA to {target_dir}")
190
+ return True
191
+
192
+ except Exception as e:
193
+ logger.error(f"Failed to download LoRA: {e}")
194
+ return False
195
+
196
+ def upload_lora(self, lora_dir: Path, training_config: Optional[Dict] = None) -> Optional[Dict]:
197
+ """
198
+ Upload a LoRA adapter as a ZIP file to HuggingFace dataset repo
199
+
200
+ Args:
201
+ lora_dir: Local LoRA directory
202
+ training_config: Optional training configuration dict
203
+
204
+ Returns:
205
+ Dict with repo_id and url if successful, None otherwise
206
+ """
207
+ if not self.has_hf:
208
+ logger.info(f"πŸ’Ύ LoRA saved locally: {lora_dir.name}")
209
+ return None
210
+
211
+ if not self.token:
212
+ logger.warning("⚠️ No HuggingFace token found - cannot upload")
213
+ logger.info("πŸ’‘ To enable uploads: Log in to HuggingFace or set HF_TOKEN environment variable")
214
+ logger.info(f"πŸ’Ύ LoRA saved locally: {lora_dir.name}")
215
+ return None
216
+
217
+ try:
218
+ from huggingface_hub import upload_file
219
+ import zipfile
220
+ import tempfile
221
+
222
+ lora_name = lora_dir.name
223
+
224
+ logger.info(f"πŸ“€ Creating ZIP and uploading LoRA to dataset repo: {self.repo_id}/loras/{lora_name}.zip...")
225
+
226
+ # Create README.md for the LoRA
227
+ readme_content = self._generate_lora_readme(lora_name, training_config)
228
+ readme_path = lora_dir / "README.md"
229
+ with open(readme_path, 'w', encoding='utf-8') as f:
230
+ f.write(readme_content)
231
+
232
+ # Create ZIP file
233
+ with tempfile.NamedTemporaryFile(mode='wb', suffix='.zip', delete=False) as tmp_file:
234
+ zip_path = tmp_file.name
235
+
236
+ try:
237
+ with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
238
+ for file_path in lora_dir.rglob('*'):
239
+ if file_path.is_file():
240
+ arcname = file_path.relative_to(lora_dir)
241
+ zipf.write(file_path, arcname)
242
+
243
+ # Upload ZIP file to loras/ folder in dataset repo
244
+ upload_file(
245
+ repo_id=self.repo_id,
246
+ repo_type="dataset",
247
+ path_or_fileobj=zip_path,
248
+ path_in_repo=f"loras/{lora_name}.zip",
249
+ commit_message=f"Upload LEMM LoRA adapter: {lora_name}",
250
+ token=self.token
251
+ )
252
+ finally:
253
+ # Clean up temp file
254
+ import os
255
+ if os.path.exists(zip_path):
256
+ os.unlink(zip_path)
257
+
258
+ logger.info(f"βœ… Uploaded LoRA: {self.repo_id}/loras/{lora_name}.zip")
259
+ logger.info(f"πŸ”— View at: https://huggingface.co/datasets/{self.repo_id}/blob/main/loras/{lora_name}.zip")
260
+
261
+ return {
262
+ 'repo_id': f"{self.repo_id}/loras/{lora_name}.zip",
263
+ 'url': f"https://huggingface.co/datasets/{self.repo_id}/blob/main/loras/{lora_name}.zip",
264
+ 'dataset_repo': f"https://huggingface.co/datasets/{self.repo_id}"
265
+ }
266
+
267
+ except Exception as e:
268
+ logger.error(f"Failed to upload LoRA: {e}")
269
+ logger.info(f"πŸ’Ύ LoRA saved locally: {lora_dir.name}")
270
+ return None
271
+
272
+ def _generate_lora_readme(self, lora_name: str, config: Optional[Dict] = None) -> str:
273
+ """Generate README.md content for a LoRA model"""
274
+
275
+ config_info = ""
276
+ if config:
277
+ config_info = f"""
278
+ ## Training Configuration
279
+
280
+ - **Dataset**: {config.get('dataset', 'N/A')}
281
+ - **Epochs**: {config.get('epochs', 'N/A')}
282
+ - **Learning Rate**: {config.get('learning_rate', 'N/A')}
283
+ - **Batch Size**: {config.get('batch_size', 'N/A')}
284
+ - **LoRA Rank**: {config.get('lora_rank', 'N/A')}
285
+ """
286
+
287
+ return f"""---
288
+ license: mit
289
+ tags:
290
+ - lora
291
+ - music-generation
292
+ - diffrhythm2
293
+ - lemm
294
+ library_name: diffusers
295
+ ---
296
+
297
+ # LEMM LoRA: {lora_name}
298
+
299
+ This is a LoRA (Low-Rank Adaptation) adapter for DiffRhythm2 music generation, trained using LEMM (Let Everyone Make Music).
300
+
301
+ ## About LEMM
302
+
303
+ LEMM is an advanced AI music generation system that allows you to:
304
+ - Generate high-quality music with built-in vocals
305
+ - Train custom LoRA adapters for specific styles
306
+ - Fine-tune models on your own datasets
307
+
308
+ 🎡 **Try it**: [LEMM Space](https://huggingface.co/spaces/Gamahea/lemm-test-100)
309
+ {config_info}
310
+ ## How to Use
311
+
312
+ ### In LEMM Space
313
+ 1. Visit [LEMM](https://huggingface.co/spaces/Gamahea/lemm-test-100)
314
+ 2. Go to "LoRA Management" tab
315
+ 3. Enter this model ID: `{self.username}/lemm-lora-{lora_name}`
316
+ 4. Click "Download from Hub"
317
+ 5. Use in generation or as base for continued training
318
+
319
+ ### In Your Code
320
+ ```python
321
+ from pathlib import Path
322
+ from huggingface_hub import snapshot_download
323
+
324
+ # Download LoRA
325
+ lora_path = snapshot_download(
326
+ repo_id="{self.username}/lemm-lora-{lora_name}",
327
+ local_dir="./loras/{lora_name}"
328
+ )
329
+
330
+ # Load and use with DiffRhythm2
331
+ # (See LEMM documentation for integration)
332
+ ```
333
+
334
+ ## Model Files
335
+
336
+ - `final_model.pt` - Trained LoRA weights
337
+ - `config.yaml` - Training configuration
338
+ - `README.md` - This file
339
+
340
+ ## Dataset Repository
341
+
342
+ Part of the [LEMM Training Data Repository](https://huggingface.co/datasets/{self.repo_id})
343
+
344
+ ## License
345
+
346
+ MIT License - Free to use and modify
347
+ """
348
+
349
+ def upload_dataset(self, dataset_dir: Path, dataset_info: Optional[Dict] = None) -> Optional[Dict]:
350
+ """
351
+ Upload a prepared dataset as ZIP file to HF dataset repo
352
+
353
+ Args:
354
+ dataset_dir: Local dataset directory
355
+ dataset_info: Optional dataset metadata
356
+
357
+ Returns:
358
+ Dict with upload results or None if failed
359
+ """
360
+ if not self.has_hf:
361
+ logger.info(f"πŸ’Ύ Dataset saved locally: {dataset_dir.name}")
362
+ return None
363
+
364
+ if not self.token:
365
+ logger.warning("⚠️ No HuggingFace token found - cannot upload dataset")
366
+ logger.info(f"πŸ’Ύ Dataset saved locally: {dataset_dir.name}")
367
+ return None
368
+
369
+ try:
370
+ from huggingface_hub import upload_file
371
+ import zipfile
372
+ import tempfile
373
+
374
+ dataset_name = dataset_dir.name
375
+
376
+ logger.info(f"πŸ“€ Creating ZIP and uploading dataset to repo: {self.repo_id}/datasets/{dataset_name}.zip...")
377
+
378
+ # Create ZIP file
379
+ with tempfile.NamedTemporaryFile(mode='wb', suffix='.zip', delete=False) as tmp_file:
380
+ zip_path = tmp_file.name
381
+
382
+ try:
383
+ with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
384
+ for file_path in dataset_dir.rglob('*'):
385
+ if file_path.is_file():
386
+ arcname = file_path.relative_to(dataset_dir)
387
+ zipf.write(file_path, arcname)
388
+
389
+ # Upload ZIP to datasets/ folder in dataset repo
390
+ upload_file(
391
+ repo_id=self.repo_id,
392
+ repo_type="dataset",
393
+ path_or_fileobj=zip_path,
394
+ path_in_repo=f"datasets/{dataset_name}.zip",
395
+ commit_message=f"Upload prepared dataset: {dataset_name}",
396
+ token=self.token
397
+ )
398
+ finally:
399
+ # Clean up temp file
400
+ import os
401
+ if os.path.exists(zip_path):
402
+ os.unlink(zip_path)
403
+
404
+ logger.info(f"βœ… Uploaded dataset: {self.repo_id}/datasets/{dataset_name}.zip")
405
+
406
+ return {
407
+ 'repo_id': f"{self.repo_id}/datasets/{dataset_name}.zip",
408
+ 'url': f"https://huggingface.co/datasets/{self.repo_id}/blob/main/datasets/{dataset_name}.zip",
409
+ 'dataset_repo': f"https://huggingface.co/datasets/{self.repo_id}"
410
+ }
411
+
412
+ except Exception as e:
413
+ logger.error(f"Failed to upload dataset: {e}")
414
+ logger.info(f"πŸ’Ύ Dataset saved locally: {dataset_dir.name}")
415
+ return None