Spaces:
Running
Running
Commit
·
01942d8
1
Parent(s):
d6b3b9f
Add dataset probing and validation
Browse files
app.py
CHANGED
|
@@ -1,10 +1,50 @@
|
|
| 1 |
import gradio as gr
|
|
|
|
| 2 |
|
| 3 |
|
| 4 |
theme = gr.themes.Soft(
|
| 5 |
primary_hue="green",
|
| 6 |
)
|
| 7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
with gr.Blocks(theme=theme) as iface:
|
| 9 |
with gr.Row():
|
| 10 |
with gr.Column():
|
|
@@ -28,13 +68,38 @@ with gr.Blocks(theme=theme) as iface:
|
|
| 28 |
placeholder="tweet_eval",
|
| 29 |
)
|
| 30 |
|
| 31 |
-
gr.Dropdown(
|
| 32 |
label="Hugging Face dataset subset",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
)
|
| 34 |
|
| 35 |
-
gr.Dropdown(
|
| 36 |
label="Hugging Face dataset split",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
)
|
| 38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
iface.queue(max_size=20)
|
| 40 |
iface.launch()
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
import datasets
|
| 3 |
|
| 4 |
|
| 5 |
theme = gr.themes.Soft(
|
| 6 |
primary_hue="green",
|
| 7 |
)
|
| 8 |
|
| 9 |
+
|
| 10 |
+
def check_dataset(dataset_id, dataset_config="default", dataset_split="test"):
|
| 11 |
+
try:
|
| 12 |
+
configs = datasets.get_dataset_config_names(dataset_id)
|
| 13 |
+
except Exception:
|
| 14 |
+
# Dataset may not exist
|
| 15 |
+
return None, dataset_config, dataset_split
|
| 16 |
+
|
| 17 |
+
if dataset_config not in configs:
|
| 18 |
+
# Need to choose dataset subset (config)
|
| 19 |
+
return dataset_id, configs, dataset_split
|
| 20 |
+
|
| 21 |
+
ds = datasets.load_dataset(dataset_id, dataset_config)
|
| 22 |
+
|
| 23 |
+
if isinstance(ds, datasets.DatasetDict):
|
| 24 |
+
# Need to choose dataset split
|
| 25 |
+
if dataset_split not in ds.keys():
|
| 26 |
+
return dataset_id, None, list(ds.keys())
|
| 27 |
+
elif not isinstance(ds, datasets.Dataset):
|
| 28 |
+
# Unknown type
|
| 29 |
+
return dataset_id, None, None
|
| 30 |
+
return dataset_id, dataset_config, dataset_split
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def try_submit(dataset_id, dataset_config, dataset_split):
|
| 34 |
+
# Validate dataset
|
| 35 |
+
d_id, config, split = check_dataset(dataset_id=dataset_id, dataset_config=dataset_config, dataset_split=dataset_split)
|
| 36 |
+
|
| 37 |
+
if d_id is None:
|
| 38 |
+
gr.Warning(f'Dataset "{dataset_id}" is not accessible. Please set your HF_TOKEN if it is a private dataset.')
|
| 39 |
+
if isinstance(config, list):
|
| 40 |
+
gr.Warning(f'Dataset "{dataset_id}" does have "{dataset_config}" config. Please choose a valid config.')
|
| 41 |
+
config = gr.Dropdown.update(choices=config, value=config[0])
|
| 42 |
+
if isinstance(split, list):
|
| 43 |
+
gr.Warning(f'Dataset "{dataset_id}" does have "{dataset_split}" split. Please choose a valid split.')
|
| 44 |
+
split = gr.Dropdown.update(choices=split, value=split[0])
|
| 45 |
+
|
| 46 |
+
return config, split
|
| 47 |
+
|
| 48 |
with gr.Blocks(theme=theme) as iface:
|
| 49 |
with gr.Row():
|
| 50 |
with gr.Column():
|
|
|
|
| 68 |
placeholder="tweet_eval",
|
| 69 |
)
|
| 70 |
|
| 71 |
+
dataset_config_input = gr.Dropdown(
|
| 72 |
label="Hugging Face dataset subset",
|
| 73 |
+
choices=[
|
| 74 |
+
"default",
|
| 75 |
+
],
|
| 76 |
+
allow_custom_value=True,
|
| 77 |
+
value="default",
|
| 78 |
)
|
| 79 |
|
| 80 |
+
dataset_split_input = gr.Dropdown(
|
| 81 |
label="Hugging Face dataset split",
|
| 82 |
+
choices=[
|
| 83 |
+
"test",
|
| 84 |
+
],
|
| 85 |
+
allow_custom_value=True,
|
| 86 |
+
value="test",
|
| 87 |
)
|
| 88 |
|
| 89 |
+
with gr.Row():
|
| 90 |
+
run_btn = gr.Button("Validate and submit", variant="primary")
|
| 91 |
+
run_btn.click(
|
| 92 |
+
try_submit,
|
| 93 |
+
inputs=[
|
| 94 |
+
dataset_id_input,
|
| 95 |
+
dataset_config_input,
|
| 96 |
+
dataset_split_input
|
| 97 |
+
],
|
| 98 |
+
outputs=[
|
| 99 |
+
dataset_config_input,
|
| 100 |
+
dataset_split_input
|
| 101 |
+
],
|
| 102 |
+
)
|
| 103 |
+
|
| 104 |
iface.queue(max_size=20)
|
| 105 |
iface.launch()
|