Spaces:
Running
Running
Commit
·
e4b8ab6
1
Parent(s):
c283445
init
Browse files
app.py
CHANGED
|
@@ -66,7 +66,7 @@ def get_available_splits(repo, benchmark):
|
|
| 66 |
def load_details_and_results(repo, subset, split):
|
| 67 |
def worker(example):
|
| 68 |
example["predictions"] = example["predictions"]
|
| 69 |
-
example["gold"] = example["gold"]
|
| 70 |
example["metrics"] = example["metrics"]
|
| 71 |
return example
|
| 72 |
|
|
@@ -198,16 +198,32 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 198 |
)
|
| 199 |
|
| 200 |
with gr.Row():
|
| 201 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 202 |
label="Benchmark",
|
| 203 |
choices=[],
|
| 204 |
-
info="Select the benchmark subset"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 205 |
)
|
| 206 |
split = gr.Dropdown(
|
| 207 |
label="Split",
|
| 208 |
choices=[],
|
| 209 |
info="Select evaluation."
|
| 210 |
)
|
|
|
|
| 211 |
|
| 212 |
with gr.Row():
|
| 213 |
example_index = gr.Number(
|
|
@@ -229,6 +245,17 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 229 |
def get_active_repo(selection_method, dropdown_value, custom_value):
|
| 230 |
return custom_value if selection_method == "Custom" else dropdown_value
|
| 231 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 232 |
# Update the event handlers
|
| 233 |
repo_select.change(
|
| 234 |
fn=toggle_repo_input,
|
|
@@ -236,6 +263,12 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 236 |
outputs=[repo_dropdown, repo_custom]
|
| 237 |
)
|
| 238 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 239 |
# Update the repository change handler to update available benchmarks
|
| 240 |
def update_benchmarks(selection_method, dropdown_value, custom_value):
|
| 241 |
repo = get_active_repo(selection_method, dropdown_value, custom_value)
|
|
@@ -246,35 +279,48 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 246 |
repo_dropdown.change(
|
| 247 |
fn=update_benchmarks,
|
| 248 |
inputs=[repo_select, repo_dropdown, repo_custom],
|
| 249 |
-
outputs=
|
| 250 |
)
|
| 251 |
|
| 252 |
repo_custom.change(
|
| 253 |
fn=update_benchmarks,
|
| 254 |
inputs=[repo_select, repo_dropdown, repo_custom],
|
| 255 |
-
outputs=
|
| 256 |
)
|
| 257 |
|
| 258 |
# Update the benchmark change handler
|
| 259 |
-
|
| 260 |
-
fn=lambda selection_method, dropdown, custom, bench:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 261 |
get_active_repo(selection_method, dropdown, custom),
|
| 262 |
-
|
| 263 |
),
|
| 264 |
-
inputs=[repo_select, repo_dropdown, repo_custom,
|
| 265 |
outputs=split
|
| 266 |
)
|
| 267 |
|
| 268 |
# Display results
|
| 269 |
output = gr.HTML()
|
| 270 |
submit_btn.click(
|
| 271 |
-
fn=lambda
|
| 272 |
-
get_active_repo(
|
| 273 |
-
|
| 274 |
split_val,
|
| 275 |
idx
|
| 276 |
),
|
| 277 |
-
inputs=[repo_select, repo_dropdown, repo_custom,
|
| 278 |
outputs=output
|
| 279 |
)
|
| 280 |
|
|
|
|
| 66 |
def load_details_and_results(repo, subset, split):
|
| 67 |
def worker(example):
|
| 68 |
example["predictions"] = example["predictions"]
|
| 69 |
+
example["gold"] = example["gold"]
|
| 70 |
example["metrics"] = example["metrics"]
|
| 71 |
return example
|
| 72 |
|
|
|
|
| 198 |
)
|
| 199 |
|
| 200 |
with gr.Row():
|
| 201 |
+
benchmark_select = gr.Radio(
|
| 202 |
+
choices=["Choose from list", "Custom"],
|
| 203 |
+
label="Benchmark Selection Method",
|
| 204 |
+
value="Choose from list",
|
| 205 |
+
info="Select how you want to specify the benchmark"
|
| 206 |
+
)
|
| 207 |
+
|
| 208 |
+
with gr.Row():
|
| 209 |
+
benchmark_dropdown = gr.Dropdown(
|
| 210 |
label="Benchmark",
|
| 211 |
choices=[],
|
| 212 |
+
info="Select the benchmark subset",
|
| 213 |
+
visible=True
|
| 214 |
+
)
|
| 215 |
+
benchmark_custom = gr.Textbox(
|
| 216 |
+
label="Custom Benchmark",
|
| 217 |
+
placeholder="e.g. lighteval|gpqa:diamond|0",
|
| 218 |
+
visible=False,
|
| 219 |
+
info="Enter custom benchmark name"
|
| 220 |
)
|
| 221 |
split = gr.Dropdown(
|
| 222 |
label="Split",
|
| 223 |
choices=[],
|
| 224 |
info="Select evaluation."
|
| 225 |
)
|
| 226 |
+
load_splits_btn = gr.Button("Load Splits", variant="secondary")
|
| 227 |
|
| 228 |
with gr.Row():
|
| 229 |
example_index = gr.Number(
|
|
|
|
| 245 |
def get_active_repo(selection_method, dropdown_value, custom_value):
|
| 246 |
return custom_value if selection_method == "Custom" else dropdown_value
|
| 247 |
|
| 248 |
+
# Add this function to handle benchmark visibility toggling
|
| 249 |
+
def toggle_benchmark_input(choice):
|
| 250 |
+
return {
|
| 251 |
+
benchmark_dropdown: gr.update(visible=(choice == "Choose from list")),
|
| 252 |
+
benchmark_custom: gr.update(visible=(choice == "Custom"))
|
| 253 |
+
}
|
| 254 |
+
|
| 255 |
+
# Add this function to get the active benchmark name
|
| 256 |
+
def get_active_benchmark(selection_method, dropdown_value, custom_value):
|
| 257 |
+
return custom_value if selection_method == "Custom" else dropdown_value
|
| 258 |
+
|
| 259 |
# Update the event handlers
|
| 260 |
repo_select.change(
|
| 261 |
fn=toggle_repo_input,
|
|
|
|
| 263 |
outputs=[repo_dropdown, repo_custom]
|
| 264 |
)
|
| 265 |
|
| 266 |
+
benchmark_select.change(
|
| 267 |
+
fn=toggle_benchmark_input,
|
| 268 |
+
inputs=[benchmark_select],
|
| 269 |
+
outputs=[benchmark_dropdown, benchmark_custom]
|
| 270 |
+
)
|
| 271 |
+
|
| 272 |
# Update the repository change handler to update available benchmarks
|
| 273 |
def update_benchmarks(selection_method, dropdown_value, custom_value):
|
| 274 |
repo = get_active_repo(selection_method, dropdown_value, custom_value)
|
|
|
|
| 279 |
repo_dropdown.change(
|
| 280 |
fn=update_benchmarks,
|
| 281 |
inputs=[repo_select, repo_dropdown, repo_custom],
|
| 282 |
+
outputs=benchmark_dropdown
|
| 283 |
)
|
| 284 |
|
| 285 |
repo_custom.change(
|
| 286 |
fn=update_benchmarks,
|
| 287 |
inputs=[repo_select, repo_dropdown, repo_custom],
|
| 288 |
+
outputs=benchmark_dropdown
|
| 289 |
)
|
| 290 |
|
| 291 |
# Update the benchmark change handler
|
| 292 |
+
benchmark_dropdown.change(
|
| 293 |
+
fn=lambda selection_method, dropdown, custom, bench: gr.Dropdown(choices=[], value=None),
|
| 294 |
+
inputs=[repo_select, repo_dropdown, repo_custom, benchmark_dropdown],
|
| 295 |
+
outputs=split
|
| 296 |
+
)
|
| 297 |
+
|
| 298 |
+
benchmark_custom.change(
|
| 299 |
+
fn=lambda selection_method, dropdown, custom, bench: gr.Dropdown(choices=[], value=None),
|
| 300 |
+
inputs=[repo_select, repo_dropdown, repo_custom, benchmark_custom],
|
| 301 |
+
outputs=split
|
| 302 |
+
)
|
| 303 |
+
|
| 304 |
+
# Add handler for the load splits button
|
| 305 |
+
load_splits_btn.click(
|
| 306 |
+
fn=lambda selection_method, dropdown, custom, bench_selection_method, bench_dropdown, bench_custom: update_splits(
|
| 307 |
get_active_repo(selection_method, dropdown, custom),
|
| 308 |
+
get_active_benchmark(bench_selection_method, bench_dropdown, bench_custom)
|
| 309 |
),
|
| 310 |
+
inputs=[repo_select, repo_dropdown, repo_custom, benchmark_select, benchmark_dropdown, benchmark_custom],
|
| 311 |
outputs=split
|
| 312 |
)
|
| 313 |
|
| 314 |
# Display results
|
| 315 |
output = gr.HTML()
|
| 316 |
submit_btn.click(
|
| 317 |
+
fn=lambda repo_selection_method, repo_dropdown, repo_custom, bench_selection_method, bench_dropdown, bench_custom, split_val, idx: display_model_details(
|
| 318 |
+
get_active_repo(repo_selection_method, repo_dropdown, repo_custom),
|
| 319 |
+
get_active_benchmark(bench_selection_method, bench_dropdown, bench_custom),
|
| 320 |
split_val,
|
| 321 |
idx
|
| 322 |
),
|
| 323 |
+
inputs=[repo_select, repo_dropdown, repo_custom, benchmark_select, benchmark_dropdown, benchmark_custom, split, example_index],
|
| 324 |
outputs=output
|
| 325 |
)
|
| 326 |
|