Commit
·
9fd3876
1
Parent(s):
c72a31e
Update README.md
Browse files
README.md
CHANGED
|
@@ -72,6 +72,63 @@ Evaluation metrics:
|
|
| 72 |
|anli_r2| 0|acc |0.346|± |0.0151|
|
| 73 |
|anli_r3| 0|acc |0.355|± |0.0138|
|
| 74 |
|drop| 1|f1 |0.0034|± |0.0004|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
|
| 76 |
|
| 77 |
## Limitations and Bias
|
|
|
|
| 72 |
|anli_r2| 0|acc |0.346|± |0.0151|
|
| 73 |
|anli_r3| 0|acc |0.355|± |0.0138|
|
| 74 |
|drop| 1|f1 |0.0034|± |0.0004|
|
| 75 |
+
|hendrycksTest-abstract_algebra | 1|acc | 0.32|± |0.0952|
|
| 76 |
+
|hendrycksTest-anatomy | 1|acc | 0.44|± |0.1013|
|
| 77 |
+
|hendrycksTest-astronomy | 1|acc | 0.24|± |0.0872|
|
| 78 |
+
|hendrycksTest-business_ethics | 1|acc | 0.24|± |0.0872|
|
| 79 |
+
|hendrycksTest-clinical_knowledge | 1|acc | 0.24|± |0.0872|
|
| 80 |
+
|hendrycksTest-college_biology | 1|acc | 0.20|± |0.0816|
|
| 81 |
+
|hendrycksTest-college_chemistry | 1|acc | 0.40|± |0.1000|
|
| 82 |
+
|hendrycksTest-college_computer_science | 1|acc | 0.36|± |0.0980|
|
| 83 |
+
|hendrycksTest-college_mathematics | 1|acc | 0.48|± |0.1020|
|
| 84 |
+
|hendrycksTest-college_medicine | 1|acc | 0.20|± |0.0816|
|
| 85 |
+
|hendrycksTest-college_physics | 1|acc | 0.44|± |0.1013|
|
| 86 |
+
|hendrycksTest-computer_security | 1|acc | 0.16|± |0.0748|
|
| 87 |
+
|hendrycksTest-conceptual_physics | 1|acc | 0.12|± |0.0663|
|
| 88 |
+
|hendrycksTest-econometrics | 1|acc | 0.16|± |0.0748|
|
| 89 |
+
|hendrycksTest-electrical_engineering | 1|acc | 0.28|± |0.0917|
|
| 90 |
+
|hendrycksTest-elementary_mathematics | 1|acc | 0.36|± |0.0980|
|
| 91 |
+
|hendrycksTest-formal_logic | 1|acc | 0.44|± |0.1013|
|
| 92 |
+
|hendrycksTest-global_facts | 1|acc | 0.20|± |0.0816|
|
| 93 |
+
|hendrycksTest-high_school_biology | 1|acc | 0.20|± |0.0816|
|
| 94 |
+
|hendrycksTest-high_school_chemistry | 1|acc | 0.28|± |0.0917|
|
| 95 |
+
|hendrycksTest-high_school_computer_science | 1|acc | 0.24|± |0.0872|
|
| 96 |
+
|hendrycksTest-high_school_european_history | 1|acc | 0.32|± |0.0952|
|
| 97 |
+
|hendrycksTest-high_school_geography | 1|acc | 0.32|± |0.0952|
|
| 98 |
+
|hendrycksTest-high_school_government_and_politics| 1|acc | 0.28|± |0.0917|
|
| 99 |
+
|hendrycksTest-high_school_macroeconomics | 1|acc | 0.28|± |0.0917|
|
| 100 |
+
|hendrycksTest-high_school_mathematics | 1|acc | 0.20|± |0.0816|
|
| 101 |
+
|hendrycksTest-high_school_microeconomics | 1|acc | 0.24|± |0.0872|
|
| 102 |
+
|hendrycksTest-high_school_physics | 1|acc | 0.28|± |0.0917|
|
| 103 |
+
|hendrycksTest-high_school_psychology | 1|acc | 0.32|± |0.0952|
|
| 104 |
+
|hendrycksTest-high_school_statistics | 1|acc | 0.40|± |0.1000|
|
| 105 |
+
|hendrycksTest-high_school_us_history | 1|acc | 0.32|± |0.0952|
|
| 106 |
+
|hendrycksTest-high_school_world_history | 1|acc | 0.36|± |0.0980||
|
| 107 |
+
|hendrycksTest-human_aging | 1|acc | 0.16|± |0.0748|
|
| 108 |
+
|hendrycksTest-human_sexuality | 1|acc | 0.40|± |0.1000|
|
| 109 |
+
|hendrycksTest-international_law | 1|acc | 0.24|± |0.0872|
|
| 110 |
+
|hendrycksTest-jurisprudence | 1|acc | 0.08|± |0.0554|
|
| 111 |
+
|hendrycksTest-logical_fallacies | 1|acc | 0.52|± |0.1020|
|
| 112 |
+
|hendrycksTest-machine_learning | 1|acc | 0.12|± |0.0663|
|
| 113 |
+
|hendrycksTest-management | 1|acc | 0.12|± |0.0663|
|
| 114 |
+
|hendrycksTest-marketing | 1|acc | 0.16|± |0.0748|
|
| 115 |
+
|hendrycksTest-medical_genetics | 1|acc | 0.12|± |0.0663|
|
| 116 |
+
|hendrycksTest-miscellaneous | 1|acc | 0.36|± |0.0980|
|
| 117 |
+
|hendrycksTest-moral_disputes | 1|acc | 0.08|± |0.0554|
|
| 118 |
+
|hendrycksTest-moral_scenarios | 1|acc | 0.44|± |0.1013|
|
| 119 |
+
|hendrycksTest-nutrition | 1|acc | 0.32|± |0.0952|
|
| 120 |
+
|hendrycksTest-philosophy | 1|acc | 0.44|± |0.1013|
|
| 121 |
+
|hendrycksTest-prehistory | 1|acc | 0.16|± |0.0748|
|
| 122 |
+
|hendrycksTest-professional_accounting | 1|acc | 0.28|± |0.0917|
|
| 123 |
+
|hendrycksTest-professional_law | 1|acc | 0.12|± |0.0663|
|
| 124 |
+
|hendrycksTest-professional_medicine | 1|acc | 0.40|± |0.1000|
|
| 125 |
+
|hendrycksTest-professional_psychology | 1|acc | 0.24|± |0.0872|
|
| 126 |
+
|hendrycksTest-public_relations | 1|acc | 0.08|± |0.0554|
|
| 127 |
+
|hendrycksTest-security_studies | 1|acc | 0.24|± |0.0872|
|
| 128 |
+
|hendrycksTest-sociology | 1|acc | 0.28|± |0.0917|
|
| 129 |
+
|hendrycksTest-us_foreign_policy | 1|acc | 0.24|± |0.0872|
|
| 130 |
+
|hendrycksTest-virology | 1|acc | 0.20|± |0.0816|
|
| 131 |
+
|hendrycksTest-world_religions | 1|acc | 0.16|± |0.0748|
|
| 132 |
|
| 133 |
|
| 134 |
## Limitations and Bias
|