Commit
·
b7741fd
1
Parent(s):
b19c539
More verbose
Browse files- app.py +1 -1
- src/leaderboard/build_leaderboard.py +7 -5
app.py
CHANGED
|
@@ -108,7 +108,7 @@ def update_board():
|
|
| 108 |
show_result_file = os.path.join(HF_HOME, "src/gen/show_result.py")
|
| 109 |
subprocess.run(["python3", show_result_file, "--output"], check=True)
|
| 110 |
|
| 111 |
-
# update the gr item
|
| 112 |
# TODO
|
| 113 |
|
| 114 |
|
|
|
|
| 108 |
show_result_file = os.path.join(HF_HOME, "src/gen/show_result.py")
|
| 109 |
subprocess.run(["python3", show_result_file, "--output"], check=True)
|
| 110 |
|
| 111 |
+
# update the gr item with leaderboard
|
| 112 |
# TODO
|
| 113 |
|
| 114 |
|
src/leaderboard/build_leaderboard.py
CHANGED
|
@@ -61,14 +61,17 @@ def download_openbench():
|
|
| 61 |
# download answers of different models that we trust
|
| 62 |
download_dataset("Vikhrmodels/openbench-eval", EVAL_RESULTS_PATH)
|
| 63 |
|
| 64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
|
| 66 |
# copy the trusted model answers to data
|
| 67 |
subprocess.run(
|
| 68 |
[
|
| 69 |
"rsync",
|
| 70 |
-
"-
|
| 71 |
-
"--ignore-existing",
|
| 72 |
f"{EVAL_RESULTS_PATH}/internal/*",
|
| 73 |
f"{DATA_ARENA_PATH}/model_answer/internal/",
|
| 74 |
],
|
|
@@ -79,8 +82,7 @@ def download_openbench():
|
|
| 79 |
subprocess.run(
|
| 80 |
[
|
| 81 |
"rsync",
|
| 82 |
-
"-
|
| 83 |
-
"--ignore-existing",
|
| 84 |
f"{EVAL_RESULTS_PATH}/model_judgment/*",
|
| 85 |
f"{DATA_ARENA_PATH}/model_judgement/",
|
| 86 |
],
|
|
|
|
| 61 |
# download answers of different models that we trust
|
| 62 |
download_dataset("Vikhrmodels/openbench-eval", EVAL_RESULTS_PATH)
|
| 63 |
|
| 64 |
+
logging.info("\nInternal models in openbench-eval:")
|
| 65 |
+
subprocess.run(["ls", f"{EVAL_RESULTS_PATH}/internal/"], check=True)
|
| 66 |
+
|
| 67 |
+
logging.info("\nJudgement in openbench-eval")
|
| 68 |
+
subprocess.run(["ls", f"{EVAL_RESULTS_PATH}/model_judgment/"], check=True)
|
| 69 |
|
| 70 |
# copy the trusted model answers to data
|
| 71 |
subprocess.run(
|
| 72 |
[
|
| 73 |
"rsync",
|
| 74 |
+
"-azPvh",
|
|
|
|
| 75 |
f"{EVAL_RESULTS_PATH}/internal/*",
|
| 76 |
f"{DATA_ARENA_PATH}/model_answer/internal/",
|
| 77 |
],
|
|
|
|
| 82 |
subprocess.run(
|
| 83 |
[
|
| 84 |
"rsync",
|
| 85 |
+
"-azPvh",
|
|
|
|
| 86 |
f"{EVAL_RESULTS_PATH}/model_judgment/*",
|
| 87 |
f"{DATA_ARENA_PATH}/model_judgement/",
|
| 88 |
],
|