Spaces:
Running
on
Zero
Running
on
Zero
Gamahea
commited on
Commit
·
fbc33ea
1
Parent(s):
623cee1
Fix missing is_dataset_downloaded methods
Browse files- Added is_dataset_downloaded() method to check if dataset exists
- Added get_downloaded_datasets() method to retrieve all downloaded datasets
- Methods were being called but not defined in the class
backend/services/dataset_service.py
CHANGED
|
@@ -119,6 +119,43 @@ class DatasetService:
|
|
| 119 |
"""
|
| 120 |
self.base_dir = Path(base_dir)
|
| 121 |
self.base_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
|
| 123 |
def download_dataset(self, dataset_key: str, progress_callback=None) -> Dict:
|
| 124 |
"""
|
|
|
|
| 119 |
"""
|
| 120 |
self.base_dir = Path(base_dir)
|
| 121 |
self.base_dir.mkdir(parents=True, exist_ok=True)
|
| 122 |
+
|
| 123 |
+
def is_dataset_downloaded(self, dataset_key: str) -> bool:
|
| 124 |
+
"""
|
| 125 |
+
Check if a dataset has already been downloaded
|
| 126 |
+
|
| 127 |
+
Args:
|
| 128 |
+
dataset_key: Key identifying the dataset
|
| 129 |
+
|
| 130 |
+
Returns:
|
| 131 |
+
True if dataset exists and has metadata file, False otherwise
|
| 132 |
+
"""
|
| 133 |
+
dataset_dir = self.base_dir / dataset_key
|
| 134 |
+
metadata_path = dataset_dir / 'dataset_info.json'
|
| 135 |
+
return metadata_path.exists()
|
| 136 |
+
|
| 137 |
+
def get_downloaded_datasets(self) -> Dict[str, Dict]:
|
| 138 |
+
"""
|
| 139 |
+
Get information about all downloaded datasets
|
| 140 |
+
|
| 141 |
+
Returns:
|
| 142 |
+
Dictionary mapping dataset keys to their metadata
|
| 143 |
+
"""
|
| 144 |
+
downloaded = {}
|
| 145 |
+
|
| 146 |
+
for dataset_key in self.DATASETS.keys():
|
| 147 |
+
if self.is_dataset_downloaded(dataset_key):
|
| 148 |
+
dataset_dir = self.base_dir / dataset_key
|
| 149 |
+
metadata_path = dataset_dir / 'dataset_info.json'
|
| 150 |
+
|
| 151 |
+
try:
|
| 152 |
+
with open(metadata_path, 'r') as f:
|
| 153 |
+
info = json.load(f)
|
| 154 |
+
downloaded[dataset_key] = info
|
| 155 |
+
except Exception as e:
|
| 156 |
+
logger.warning(f"Failed to load metadata for {dataset_key}: {e}")
|
| 157 |
+
|
| 158 |
+
return downloaded
|
| 159 |
|
| 160 |
def download_dataset(self, dataset_key: str, progress_callback=None) -> Dict:
|
| 161 |
"""
|