Spaces:
Running
on
Zero
Fix dataset preparation bug and HF repo authentication
Browse filesCritical Fixes:
- Fixed NameError: 'datasets_to_process' undefined -> changed to 'dataset_keys'
- Added repo accessibility check to prevent 401 errors
- HF storage now checks if repo exists and is accessible during init
- All upload/download methods check repo_accessible flag before operations
- Changed warnings to debug logs to reduce console noise
HF Repo Handling:
- On init, checks if Gamahea/lemm-dataset exists and is accessible
- If repo not accessible (no token/401), sets repo_accessible=False
- All operations gracefully skip HF uploads/downloads
- LoRAs and datasets still saved locally
- No more 401 Client Error spam in logs
User Impact:
- Dataset preparation now works without crashes
- Training completes successfully
- HF Space works without HF_TOKEN (local storage only)
- If HF_TOKEN configured, uploads work automatically
- app.py +1 -1
- backend/services/hf_storage_service.py +20 -10
|
@@ -1186,7 +1186,7 @@ def prepare_datasets_for_training(selected_datasets, max_samples_per_dataset):
|
|
| 1186 |
# Upload prepared datasets to HF repo
|
| 1187 |
status_messages.append(f"\n📤 Uploading prepared datasets to HuggingFace repo...")
|
| 1188 |
upload_count = 0
|
| 1189 |
-
for dataset_key in datasets_to_process
|
| 1190 |
dataset_dir = Path("training_data") / dataset_key
|
| 1191 |
if dataset_dir.exists():
|
| 1192 |
if hf_storage.upload_dataset(dataset_dir):
|
|
|
|
| 1186 |
# Upload prepared datasets to HF repo
|
| 1187 |
status_messages.append(f"\n📤 Uploading prepared datasets to HuggingFace repo...")
|
| 1188 |
upload_count = 0
|
| 1189 |
+
for dataset_key in dataset_keys: # Fixed: was datasets_to_process
|
| 1190 |
dataset_dir = Path("training_data") / dataset_key
|
| 1191 |
if dataset_dir.exists():
|
| 1192 |
if hf_storage.upload_dataset(dataset_dir):
|
|
@@ -26,15 +26,25 @@ class HFStorageService:
|
|
| 26 |
|
| 27 |
logger.info(f"HF Storage initialized for repo: {repo_id}")
|
| 28 |
|
| 29 |
-
# Try to import huggingface_hub
|
| 30 |
try:
|
| 31 |
from huggingface_hub import HfApi, hf_hub_download, upload_folder
|
| 32 |
self.api = HfApi()
|
| 33 |
self.has_hf = True
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
except ImportError:
|
| 36 |
logger.warning("⚠️ huggingface_hub not available, using local storage only")
|
| 37 |
self.has_hf = False
|
|
|
|
| 38 |
|
| 39 |
def download_all_loras(self, target_dir: Path) -> List[str]:
|
| 40 |
"""
|
|
@@ -46,8 +56,8 @@ class HFStorageService:
|
|
| 46 |
Returns:
|
| 47 |
List of downloaded LoRA names
|
| 48 |
"""
|
| 49 |
-
if not self.has_hf:
|
| 50 |
-
logger.
|
| 51 |
return []
|
| 52 |
|
| 53 |
try:
|
|
@@ -97,8 +107,8 @@ class HFStorageService:
|
|
| 97 |
Returns:
|
| 98 |
List of downloaded dataset keys
|
| 99 |
"""
|
| 100 |
-
if not self.has_hf:
|
| 101 |
-
logger.
|
| 102 |
return []
|
| 103 |
|
| 104 |
try:
|
|
@@ -148,8 +158,8 @@ class HFStorageService:
|
|
| 148 |
Returns:
|
| 149 |
True if successful
|
| 150 |
"""
|
| 151 |
-
if not self.has_hf:
|
| 152 |
-
logger.
|
| 153 |
return False
|
| 154 |
|
| 155 |
try:
|
|
@@ -182,8 +192,8 @@ class HFStorageService:
|
|
| 182 |
Returns:
|
| 183 |
True if successful
|
| 184 |
"""
|
| 185 |
-
if not self.has_hf:
|
| 186 |
-
logger.
|
| 187 |
return False
|
| 188 |
|
| 189 |
try:
|
|
|
|
| 26 |
|
| 27 |
logger.info(f"HF Storage initialized for repo: {repo_id}")
|
| 28 |
|
| 29 |
+
# Try to import huggingface_hub and check authentication
|
| 30 |
try:
|
| 31 |
from huggingface_hub import HfApi, hf_hub_download, upload_folder
|
| 32 |
self.api = HfApi()
|
| 33 |
self.has_hf = True
|
| 34 |
+
|
| 35 |
+
# Check if repo exists and is accessible
|
| 36 |
+
try:
|
| 37 |
+
self.api.repo_info(repo_id=repo_id, repo_type="dataset")
|
| 38 |
+
logger.info(f"✅ HuggingFace Hub available - repo accessible")
|
| 39 |
+
self.repo_accessible = True
|
| 40 |
+
except Exception as e:
|
| 41 |
+
logger.warning(f"⚠️ HF repo not accessible (may need authentication): {e}")
|
| 42 |
+
logger.warning("Uploads/downloads will be skipped. Local storage only.")
|
| 43 |
+
self.repo_accessible = False
|
| 44 |
except ImportError:
|
| 45 |
logger.warning("⚠️ huggingface_hub not available, using local storage only")
|
| 46 |
self.has_hf = False
|
| 47 |
+
self.repo_accessible = False
|
| 48 |
|
| 49 |
def download_all_loras(self, target_dir: Path) -> List[str]:
|
| 50 |
"""
|
|
|
|
| 56 |
Returns:
|
| 57 |
List of downloaded LoRA names
|
| 58 |
"""
|
| 59 |
+
if not self.has_hf or not self.repo_accessible:
|
| 60 |
+
logger.debug("HF repo not accessible, skipping LoRA download")
|
| 61 |
return []
|
| 62 |
|
| 63 |
try:
|
|
|
|
| 107 |
Returns:
|
| 108 |
List of downloaded dataset keys
|
| 109 |
"""
|
| 110 |
+
if not self.has_hf or not self.repo_accessible:
|
| 111 |
+
logger.debug("HF repo not accessible, skipping dataset download")
|
| 112 |
return []
|
| 113 |
|
| 114 |
try:
|
|
|
|
| 158 |
Returns:
|
| 159 |
True if successful
|
| 160 |
"""
|
| 161 |
+
if not self.has_hf or not self.repo_accessible:
|
| 162 |
+
logger.debug(f"HF repo not accessible, LoRA saved locally only: {lora_dir.name}")
|
| 163 |
return False
|
| 164 |
|
| 165 |
try:
|
|
|
|
| 192 |
Returns:
|
| 193 |
True if successful
|
| 194 |
"""
|
| 195 |
+
if not self.has_hf or not self.repo_accessible:
|
| 196 |
+
logger.debug(f"HF repo not accessible, dataset saved locally only: {dataset_dir.name}")
|
| 197 |
return False
|
| 198 |
|
| 199 |
try:
|