|
|
import logging |
|
|
import os |
|
|
import subprocess |
|
|
|
|
|
import gradio as gr |
|
|
from apscheduler.schedulers.background import BackgroundScheduler |
|
|
from gradio_leaderboard import Leaderboard, SelectColumns |
|
|
from gradio_space_ci import enable_space_ci |
|
|
|
|
|
from src.display.about import ( |
|
|
INTRODUCTION_TEXT, |
|
|
TITLE, |
|
|
) |
|
|
from src.display.css_html_js import custom_css |
|
|
from src.display.utils import ( |
|
|
AutoEvalColumn, |
|
|
fields, |
|
|
) |
|
|
from src.envs import API, H4_TOKEN, HF_HOME, REPO_ID, RESET_JUDGEMENT_ENV |
|
|
from src.leaderboard.build_leaderboard import build_leadearboard_df, download_openbench |
|
|
|
|
|
os.environ["GRADIO_ANALYTICS_ENABLED"] = "false" |
|
|
|
|
|
|
|
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") |
|
|
|
|
|
|
|
|
enable_space_ci() |
|
|
|
|
|
download_openbench() |
|
|
|
|
|
|
|
|
def restart_space(): |
|
|
API.restart_space(repo_id=REPO_ID, token=H4_TOKEN) |
|
|
|
|
|
|
|
|
def build_demo(): |
|
|
demo = gr.Blocks(title="Chatbot Arena Leaderboard", css=custom_css) |
|
|
leaderboard_df = build_leadearboard_df() |
|
|
with demo: |
|
|
gr.HTML(TITLE) |
|
|
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") |
|
|
|
|
|
with gr.Tabs(elem_classes="tab-buttons"): |
|
|
with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0): |
|
|
Leaderboard( |
|
|
value=leaderboard_df, |
|
|
datatype=[c.type for c in fields(AutoEvalColumn)], |
|
|
select_columns=SelectColumns( |
|
|
default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default], |
|
|
cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden or c.dummy], |
|
|
label="Select Columns to Display:", |
|
|
), |
|
|
search_columns=[ |
|
|
AutoEvalColumn.model.name, |
|
|
|
|
|
|
|
|
], |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.TabItem("🚀 Submit ", elem_id="llm-benchmark-tab-table", id=3): |
|
|
with gr.Row(): |
|
|
gr.Markdown("# ✨ Submit your model here!", elem_classes="markdown-text") |
|
|
|
|
|
with gr.Column(): |
|
|
model_name_textbox = gr.Textbox(label="Model name") |
|
|
|
|
|
def upload_file(file): |
|
|
file_path = file.name.split("/")[-1] if "/" in file.name else file.name |
|
|
logging.info("New submition: file saved to %s", file_path) |
|
|
API.upload_file( |
|
|
path_or_fileobj=file.name, |
|
|
path_in_repo="model_answers/external/" + file_path, |
|
|
repo_id="Vikhrmodels/openbench-eval", |
|
|
repo_type="dataset", |
|
|
) |
|
|
os.environ[RESET_JUDGEMENT_ENV] = "1" |
|
|
return file.name |
|
|
|
|
|
if model_name_textbox: |
|
|
file_output = gr.File() |
|
|
upload_button = gr.UploadButton( |
|
|
"Click to Upload & Submit Answers", file_types=["*"], file_count="single" |
|
|
) |
|
|
upload_button.upload(upload_file, upload_button, file_output) |
|
|
|
|
|
return demo |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def update_board(): |
|
|
need_reset = os.environ.get(RESET_JUDGEMENT_ENV) |
|
|
logging.info("Updating the judgement: %s", need_reset) |
|
|
if need_reset != "1": |
|
|
return |
|
|
os.environ[RESET_JUDGEMENT_ENV] = "0" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
show_result_file = os.path.join(HF_HOME, "src/gen/show_result.py") |
|
|
subprocess.run(["python3", show_result_file, "--output"], check=True) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
os.environ[RESET_JUDGEMENT_ENV] = "1" |
|
|
|
|
|
scheduler = BackgroundScheduler() |
|
|
scheduler.add_job(update_board, "interval", minutes=10) |
|
|
scheduler.start() |
|
|
|
|
|
demo_app = build_demo() |
|
|
demo_app.launch(debug=True) |
|
|
|