Spaces:
Running
on
Zero
Running
on
Zero
Gamahea
commited on
Commit
·
661fe20
1
Parent(s):
9a8320c
Upgrade to complete datasets with 11TB Pro storage
Browse filesAdded: Million Song Dataset, FMA Large, MusicCaps, AudioSet Music, complete NSynth/MAESTRO/Common Voice/LibriSpeech
- app.py +24 -14
- backend/services/dataset_service.py +6 -5
app.py
CHANGED
|
@@ -889,13 +889,18 @@ def download_prepare_datasets(vocal_datasets, symbolic_datasets):
|
|
| 889 |
dataset_map = {
|
| 890 |
# Music datasets
|
| 891 |
"GTZAN Music Genre (1000 tracks, 10 genres)": "gtzan",
|
| 892 |
-
"NSynth
|
| 893 |
-
"MAESTRO Piano Performances (
|
|
|
|
|
|
|
|
|
|
|
|
|
| 894 |
# Vocal & Sound datasets
|
| 895 |
-
"LJSpeech (13k vocal clips
|
| 896 |
-
"Common Voice English (
|
| 897 |
-
"
|
| 898 |
-
"
|
|
|
|
| 899 |
}
|
| 900 |
|
| 901 |
status_messages = []
|
|
@@ -1565,24 +1570,29 @@ with gr.Blocks(
|
|
| 1565 |
vocal_datasets = gr.CheckboxGroup(
|
| 1566 |
choices=[
|
| 1567 |
"GTZAN Music Genre (1000 tracks, 10 genres)",
|
| 1568 |
-
"NSynth
|
| 1569 |
-
"MAESTRO Piano Performances (
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1570 |
],
|
| 1571 |
label="Select Music Datasets",
|
| 1572 |
-
info="
|
| 1573 |
)
|
| 1574 |
|
| 1575 |
with gr.Column():
|
| 1576 |
gr.Markdown("**Vocal & Sound Datasets**")
|
| 1577 |
symbolic_datasets = gr.CheckboxGroup(
|
| 1578 |
choices=[
|
| 1579 |
-
"LJSpeech (13k vocal clips
|
| 1580 |
-
"Common Voice English (
|
| 1581 |
-
"
|
| 1582 |
-
"
|
|
|
|
| 1583 |
],
|
| 1584 |
label="Select Vocal/Sound Datasets",
|
| 1585 |
-
info="
|
| 1586 |
)
|
| 1587 |
|
| 1588 |
dataset_download_btn = gr.Button("📥 Download & Prepare Datasets", variant="secondary")
|
|
|
|
| 889 |
dataset_map = {
|
| 890 |
# Music datasets
|
| 891 |
"GTZAN Music Genre (1000 tracks, 10 genres)": "gtzan",
|
| 892 |
+
"NSynth Complete (300k+ musical notes)": "nsynth",
|
| 893 |
+
"MAESTRO Piano Performances (complete)": "maestro",
|
| 894 |
+
"Million Song Dataset (10k subset)": "million_song",
|
| 895 |
+
"Free Music Archive Large (106k tracks)": "fma_large",
|
| 896 |
+
"MusicCaps (5.5k clips with descriptions)": "musiccaps",
|
| 897 |
+
"AudioSet Music (labeled audio events)": "audioset_music",
|
| 898 |
# Vocal & Sound datasets
|
| 899 |
+
"LJSpeech (13k vocal clips)": "ljspeech",
|
| 900 |
+
"Common Voice English (complete)": "common_voice_en",
|
| 901 |
+
"LibriSpeech Complete (1000 hours)": "librispeech",
|
| 902 |
+
"ESC-50 Environmental Sounds": "esc50",
|
| 903 |
+
"Google Speech Commands": "speech_commands"
|
| 904 |
}
|
| 905 |
|
| 906 |
status_messages = []
|
|
|
|
| 1570 |
vocal_datasets = gr.CheckboxGroup(
|
| 1571 |
choices=[
|
| 1572 |
"GTZAN Music Genre (1000 tracks, 10 genres)",
|
| 1573 |
+
"NSynth Complete (300k+ musical notes)",
|
| 1574 |
+
"MAESTRO Piano Performances (complete)",
|
| 1575 |
+
"Million Song Dataset (10k subset)",
|
| 1576 |
+
"Free Music Archive Large (106k tracks)",
|
| 1577 |
+
"MusicCaps (5.5k clips with descriptions)",
|
| 1578 |
+
"AudioSet Music (labeled audio events)"
|
| 1579 |
],
|
| 1580 |
label="Select Music Datasets",
|
| 1581 |
+
info="Comprehensive music datasets for training (11TB storage available)"
|
| 1582 |
)
|
| 1583 |
|
| 1584 |
with gr.Column():
|
| 1585 |
gr.Markdown("**Vocal & Sound Datasets**")
|
| 1586 |
symbolic_datasets = gr.CheckboxGroup(
|
| 1587 |
choices=[
|
| 1588 |
+
"LJSpeech (13k vocal clips)",
|
| 1589 |
+
"Common Voice English (complete)",
|
| 1590 |
+
"LibriSpeech Complete (1000 hours)",
|
| 1591 |
+
"ESC-50 Environmental Sounds",
|
| 1592 |
+
"Google Speech Commands"
|
| 1593 |
],
|
| 1594 |
label="Select Vocal/Sound Datasets",
|
| 1595 |
+
info="Complete vocal and sound datasets"
|
| 1596 |
)
|
| 1597 |
|
| 1598 |
dataset_download_btn = gr.Button("📥 Download & Prepare Datasets", variant="secondary")
|
backend/services/dataset_service.py
CHANGED
|
@@ -130,12 +130,13 @@ class DatasetService:
|
|
| 130 |
if progress_callback:
|
| 131 |
progress_callback(f"📦 Starting download: {dataset_name}")
|
| 132 |
|
| 133 |
-
#
|
| 134 |
size_gb = dataset_config.get('size_gb', 0)
|
| 135 |
-
if size_gb >
|
| 136 |
-
progress_callback(f"⚠️
|
| 137 |
-
progress_callback(f" This
|
| 138 |
-
|
|
|
|
| 139 |
else:
|
| 140 |
progress_callback(f"ℹ️ Dataset size: ~{size_gb:.1f} GB")
|
| 141 |
|
|
|
|
| 130 |
if progress_callback:
|
| 131 |
progress_callback(f"📦 Starting download: {dataset_name}")
|
| 132 |
|
| 133 |
+
# Show dataset size info
|
| 134 |
size_gb = dataset_config.get('size_gb', 0)
|
| 135 |
+
if size_gb > 100.0:
|
| 136 |
+
progress_callback(f"⚠️ Large dataset: {size_gb:.1f} GB")
|
| 137 |
+
progress_callback(f" This may take significant time to download.")
|
| 138 |
+
elif size_gb > 10.0:
|
| 139 |
+
progress_callback(f"ℹ️ Dataset size: ~{size_gb:.1f} GB (may take a few minutes)")
|
| 140 |
else:
|
| 141 |
progress_callback(f"ℹ️ Dataset size: ~{size_gb:.1f} GB")
|
| 142 |
|