Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Implement login for GPQA Details
Browse files- README.md +1 -0
- app.py +9 -7
- src/constants.py +3 -3
- src/details.py +10 -5
README.md
CHANGED
|
@@ -6,6 +6,7 @@ colorTo: green
|
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: 4.44.1
|
| 8 |
app_file: app.py
|
|
|
|
| 9 |
pinned: false
|
| 10 |
short_description: Compare Open LLM Leaderboard results
|
| 11 |
datasets:
|
|
|
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: 4.44.1
|
| 8 |
app_file: app.py
|
| 9 |
+
hf_oauth: true
|
| 10 |
pinned: false
|
| 11 |
short_description: Compare Open LLM Leaderboard results
|
| 12 |
datasets:
|
app.py
CHANGED
|
@@ -82,7 +82,7 @@ with gr.Blocks(fill_height=True, fill_width=True) as demo:
|
|
| 82 |
configs = gr.HTML()
|
| 83 |
with gr.Tab("Details"):
|
| 84 |
details_task = gr.Radio(
|
| 85 |
-
|
| 86 |
label="Tasks",
|
| 87 |
info="Evaluation tasks to be loaded",
|
| 88 |
interactive=True,
|
|
@@ -91,11 +91,13 @@ with gr.Blocks(fill_height=True, fill_width=True) as demo:
|
|
| 91 |
label="Task Description",
|
| 92 |
lines=3,
|
| 93 |
)
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
|
|
|
|
|
|
| 99 |
load_details_btn = gr.Button("Load Details", interactive=False)
|
| 100 |
clear_details_btn = gr.Button("Clear Details")
|
| 101 |
sample_idx = gr.Number(
|
|
@@ -166,7 +168,7 @@ with gr.Blocks(fill_height=True, fill_width=True) as demo:
|
|
| 166 |
).then(
|
| 167 |
fn=update_subtasks_component,
|
| 168 |
inputs=details_task,
|
| 169 |
-
outputs=subtask,
|
| 170 |
)
|
| 171 |
gr.on(
|
| 172 |
triggers=[model_id_1.input, model_id_2.input, subtask.input, details_task.input],
|
|
|
|
| 82 |
configs = gr.HTML()
|
| 83 |
with gr.Tab("Details"):
|
| 84 |
details_task = gr.Radio(
|
| 85 |
+
list(constants.TASKS.values()),
|
| 86 |
label="Tasks",
|
| 87 |
info="Evaluation tasks to be loaded",
|
| 88 |
interactive=True,
|
|
|
|
| 91 |
label="Task Description",
|
| 92 |
lines=3,
|
| 93 |
)
|
| 94 |
+
with gr.Row():
|
| 95 |
+
login_btn = gr.LoginButton(size="sm", visible=False)
|
| 96 |
+
subtask = gr.Radio(
|
| 97 |
+
choices=None, # constants.SUBTASKS.get(details_task.value),
|
| 98 |
+
label="Subtasks",
|
| 99 |
+
info="Evaluation subtasks to be loaded (choose one of the Tasks above)",
|
| 100 |
+
)
|
| 101 |
load_details_btn = gr.Button("Load Details", interactive=False)
|
| 102 |
clear_details_btn = gr.Button("Clear Details")
|
| 103 |
sample_idx = gr.Number(
|
|
|
|
| 168 |
).then(
|
| 169 |
fn=update_subtasks_component,
|
| 170 |
inputs=details_task,
|
| 171 |
+
outputs=[login_btn, subtask],
|
| 172 |
)
|
| 173 |
gr.on(
|
| 174 |
triggers=[model_id_1.input, model_id_2.input, subtask.input, details_task.input],
|
src/constants.py
CHANGED
|
@@ -39,9 +39,9 @@ SUBTASKS = {
|
|
| 39 |
("Web of Lies", "leaderboard_bbh_web_of_lies"),
|
| 40 |
],
|
| 41 |
"leaderboard_gpqa": [
|
| 42 |
-
"leaderboard_gpqa_extended",
|
| 43 |
-
"leaderboard_gpqa_diamond",
|
| 44 |
-
"leaderboard_gpqa_main",
|
| 45 |
],
|
| 46 |
"leaderboard_ifeval": [("IFEval", "leaderboard_ifeval")],
|
| 47 |
# "leaderboard_math_hard": [
|
|
|
|
| 39 |
("Web of Lies", "leaderboard_bbh_web_of_lies"),
|
| 40 |
],
|
| 41 |
"leaderboard_gpqa": [
|
| 42 |
+
("Extended", "leaderboard_gpqa_extended"),
|
| 43 |
+
("Diamond", "leaderboard_gpqa_diamond"),
|
| 44 |
+
("Main", "leaderboard_gpqa_main"),
|
| 45 |
],
|
| 46 |
"leaderboard_ifeval": [("IFEval", "leaderboard_ifeval")],
|
| 47 |
# "leaderboard_math_hard": [
|
src/details.py
CHANGED
|
@@ -20,11 +20,16 @@ def update_task_description_component(task):
|
|
| 20 |
)
|
| 21 |
|
| 22 |
|
| 23 |
-
def update_subtasks_component(task):
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
)
|
| 29 |
|
| 30 |
|
|
|
|
| 20 |
)
|
| 21 |
|
| 22 |
|
| 23 |
+
def update_subtasks_component(task, profile: gr.OAuthProfile | None):
|
| 24 |
+
visible_login_btn = True if task == "leaderboard_gpqa" else False
|
| 25 |
+
subtasks = None if task == "leaderboard_gpqa" and not profile else constants.SUBTASKS.get(task)
|
| 26 |
+
return (
|
| 27 |
+
gr.LoginButton(size="sm", visible=visible_login_btn),
|
| 28 |
+
gr.Radio(
|
| 29 |
+
choices=subtasks,
|
| 30 |
+
info="Evaluation subtasks to be loaded",
|
| 31 |
+
value=None,
|
| 32 |
+
),
|
| 33 |
)
|
| 34 |
|
| 35 |
|