Commit
·
9ee8875
1
Parent(s):
103ed5f
Still trying to find why crash
Browse files
src/leaderboard/build_leaderboard.py
CHANGED
|
@@ -61,14 +61,16 @@ def download_openbench():
|
|
| 61 |
# download answers of different models that we trust
|
| 62 |
download_dataset("Vikhrmodels/openbench-eval", EVAL_RESULTS_PATH)
|
| 63 |
|
| 64 |
-
|
| 65 |
-
|
|
|
|
|
|
|
| 66 |
|
| 67 |
-
|
| 68 |
-
subprocess.run(["ls", f"{EVAL_RESULTS_PATH}/internal/"], check=
|
| 69 |
|
| 70 |
-
|
| 71 |
-
subprocess.run(["ls", f"{EVAL_RESULTS_PATH}/model_judgment/"], check=
|
| 72 |
|
| 73 |
# copy the trusted model answers to data
|
| 74 |
subprocess.run(
|
|
|
|
| 61 |
# download answers of different models that we trust
|
| 62 |
download_dataset("Vikhrmodels/openbench-eval", EVAL_RESULTS_PATH)
|
| 63 |
|
| 64 |
+
subprocess.run(["pwd"], check=False)
|
| 65 |
+
logging.info(EVAL_RESULTS_PATH)
|
| 66 |
+
print("\nFiles openbench-eval:")
|
| 67 |
+
subprocess.run(["ls", EVAL_RESULTS_PATH], check=False)
|
| 68 |
|
| 69 |
+
print("\nInternal models in openbench-eval:")
|
| 70 |
+
subprocess.run(["ls", f"{EVAL_RESULTS_PATH}/internal/"], check=False)
|
| 71 |
|
| 72 |
+
print("\nJudgement in openbench-eval")
|
| 73 |
+
subprocess.run(["ls", f"{EVAL_RESULTS_PATH}/model_judgment/gpt-4-1106-preview"], check=False)
|
| 74 |
|
| 75 |
# copy the trusted model answers to data
|
| 76 |
subprocess.run(
|