| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.6464124111182935, |
| "global_step": 2000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0, |
| "learning_rate": 0.0002, |
| "loss": 0.9529, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.0002, |
| "loss": 0.7908, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.0002, |
| "loss": 0.7173, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 0.0002, |
| "loss": 0.7776, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.0002, |
| "loss": 0.6779, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.0002, |
| "loss": 0.7011, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 0.0002, |
| "loss": 0.7156, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.0002, |
| "loss": 0.7418, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.0002, |
| "loss": 0.7465, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 0.0002, |
| "loss": 0.7595, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.0002, |
| "loss": 0.6816, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.0002, |
| "loss": 0.7981, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 0.0002, |
| "loss": 0.6494, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 0.0002, |
| "loss": 0.7423, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 0.0002, |
| "loss": 0.7555, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 0.0002, |
| "loss": 0.5799, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 0.0002, |
| "loss": 0.699, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 0.0002, |
| "loss": 0.7154, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 0.0002, |
| "loss": 0.6096, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 0.0002, |
| "loss": 0.5713, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.06, |
| "eval_loss": 0.6557502150535583, |
| "eval_runtime": 92.7109, |
| "eval_samples_per_second": 10.786, |
| "eval_steps_per_second": 5.393, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.06, |
| "mmlu_eval_accuracy": 0.47294392264572244, |
| "mmlu_eval_accuracy_abstract_algebra": 0.2727272727272727, |
| "mmlu_eval_accuracy_anatomy": 0.6428571428571429, |
| "mmlu_eval_accuracy_astronomy": 0.4375, |
| "mmlu_eval_accuracy_business_ethics": 0.6363636363636364, |
| "mmlu_eval_accuracy_clinical_knowledge": 0.41379310344827586, |
| "mmlu_eval_accuracy_college_biology": 0.5, |
| "mmlu_eval_accuracy_college_chemistry": 0.25, |
| "mmlu_eval_accuracy_college_computer_science": 0.36363636363636365, |
| "mmlu_eval_accuracy_college_mathematics": 0.36363636363636365, |
| "mmlu_eval_accuracy_college_medicine": 0.3181818181818182, |
| "mmlu_eval_accuracy_college_physics": 0.5454545454545454, |
| "mmlu_eval_accuracy_computer_security": 0.36363636363636365, |
| "mmlu_eval_accuracy_conceptual_physics": 0.46153846153846156, |
| "mmlu_eval_accuracy_econometrics": 0.16666666666666666, |
| "mmlu_eval_accuracy_electrical_engineering": 0.375, |
| "mmlu_eval_accuracy_elementary_mathematics": 0.34146341463414637, |
| "mmlu_eval_accuracy_formal_logic": 0.35714285714285715, |
| "mmlu_eval_accuracy_global_facts": 0.5, |
| "mmlu_eval_accuracy_high_school_biology": 0.40625, |
| "mmlu_eval_accuracy_high_school_chemistry": 0.4090909090909091, |
| "mmlu_eval_accuracy_high_school_computer_science": 0.3333333333333333, |
| "mmlu_eval_accuracy_high_school_european_history": 0.4444444444444444, |
| "mmlu_eval_accuracy_high_school_geography": 0.8181818181818182, |
| "mmlu_eval_accuracy_high_school_government_and_politics": 0.6666666666666666, |
| "mmlu_eval_accuracy_high_school_macroeconomics": 0.3488372093023256, |
| "mmlu_eval_accuracy_high_school_mathematics": 0.20689655172413793, |
| "mmlu_eval_accuracy_high_school_microeconomics": 0.46153846153846156, |
| "mmlu_eval_accuracy_high_school_physics": 0.35294117647058826, |
| "mmlu_eval_accuracy_high_school_psychology": 0.7166666666666667, |
| "mmlu_eval_accuracy_high_school_statistics": 0.34782608695652173, |
| "mmlu_eval_accuracy_high_school_us_history": 0.5, |
| "mmlu_eval_accuracy_high_school_world_history": 0.5769230769230769, |
| "mmlu_eval_accuracy_human_aging": 0.6521739130434783, |
| "mmlu_eval_accuracy_human_sexuality": 0.4166666666666667, |
| "mmlu_eval_accuracy_international_law": 0.8461538461538461, |
| "mmlu_eval_accuracy_jurisprudence": 0.18181818181818182, |
| "mmlu_eval_accuracy_logical_fallacies": 0.5, |
| "mmlu_eval_accuracy_machine_learning": 0.18181818181818182, |
| "mmlu_eval_accuracy_management": 0.5454545454545454, |
| "mmlu_eval_accuracy_marketing": 0.72, |
| "mmlu_eval_accuracy_medical_genetics": 0.7272727272727273, |
| "mmlu_eval_accuracy_miscellaneous": 0.6395348837209303, |
| "mmlu_eval_accuracy_moral_disputes": 0.5263157894736842, |
| "mmlu_eval_accuracy_moral_scenarios": 0.25, |
| "mmlu_eval_accuracy_nutrition": 0.6363636363636364, |
| "mmlu_eval_accuracy_philosophy": 0.47058823529411764, |
| "mmlu_eval_accuracy_prehistory": 0.42857142857142855, |
| "mmlu_eval_accuracy_professional_accounting": 0.25806451612903225, |
| "mmlu_eval_accuracy_professional_law": 0.3176470588235294, |
| "mmlu_eval_accuracy_professional_medicine": 0.4838709677419355, |
| "mmlu_eval_accuracy_professional_psychology": 0.391304347826087, |
| "mmlu_eval_accuracy_public_relations": 0.6666666666666666, |
| "mmlu_eval_accuracy_security_studies": 0.48148148148148145, |
| "mmlu_eval_accuracy_sociology": 0.7727272727272727, |
| "mmlu_eval_accuracy_us_foreign_policy": 0.7272727272727273, |
| "mmlu_eval_accuracy_virology": 0.5, |
| "mmlu_eval_accuracy_world_religions": 0.7368421052631579, |
| "mmlu_loss": 0.8432865626915472, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 0.0002, |
| "loss": 0.6339, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 0.0002, |
| "loss": 0.701, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 0.0002, |
| "loss": 0.689, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 0.0002, |
| "loss": 0.6955, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 0.0002, |
| "loss": 0.6124, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 0.0002, |
| "loss": 0.6648, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 0.0002, |
| "loss": 0.7364, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 0.0002, |
| "loss": 0.558, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 0.0002, |
| "loss": 0.7089, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 0.0002, |
| "loss": 0.5691, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 0.0002, |
| "loss": 0.7079, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 0.0002, |
| "loss": 0.6558, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.0002, |
| "loss": 0.5887, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.0002, |
| "loss": 0.704, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 0.0002, |
| "loss": 0.638, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.0002, |
| "loss": 0.708, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.0002, |
| "loss": 0.6203, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 0.0002, |
| "loss": 0.6288, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.0002, |
| "loss": 0.6725, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.0002, |
| "loss": 0.6849, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.13, |
| "eval_loss": 0.6309903860092163, |
| "eval_runtime": 92.7675, |
| "eval_samples_per_second": 10.78, |
| "eval_steps_per_second": 5.39, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.13, |
| "mmlu_eval_accuracy": 0.46759845160702646, |
| "mmlu_eval_accuracy_abstract_algebra": 0.2727272727272727, |
| "mmlu_eval_accuracy_anatomy": 0.5714285714285714, |
| "mmlu_eval_accuracy_astronomy": 0.4375, |
| "mmlu_eval_accuracy_business_ethics": 0.5454545454545454, |
| "mmlu_eval_accuracy_clinical_knowledge": 0.4482758620689655, |
| "mmlu_eval_accuracy_college_biology": 0.375, |
| "mmlu_eval_accuracy_college_chemistry": 0.125, |
| "mmlu_eval_accuracy_college_computer_science": 0.45454545454545453, |
| "mmlu_eval_accuracy_college_mathematics": 0.18181818181818182, |
| "mmlu_eval_accuracy_college_medicine": 0.4090909090909091, |
| "mmlu_eval_accuracy_college_physics": 0.5454545454545454, |
| "mmlu_eval_accuracy_computer_security": 0.36363636363636365, |
| "mmlu_eval_accuracy_conceptual_physics": 0.5, |
| "mmlu_eval_accuracy_econometrics": 0.16666666666666666, |
| "mmlu_eval_accuracy_electrical_engineering": 0.4375, |
| "mmlu_eval_accuracy_elementary_mathematics": 0.2926829268292683, |
| "mmlu_eval_accuracy_formal_logic": 0.2857142857142857, |
| "mmlu_eval_accuracy_global_facts": 0.4, |
| "mmlu_eval_accuracy_high_school_biology": 0.375, |
| "mmlu_eval_accuracy_high_school_chemistry": 0.36363636363636365, |
| "mmlu_eval_accuracy_high_school_computer_science": 0.5555555555555556, |
| "mmlu_eval_accuracy_high_school_european_history": 0.5555555555555556, |
| "mmlu_eval_accuracy_high_school_geography": 0.7727272727272727, |
| "mmlu_eval_accuracy_high_school_government_and_politics": 0.6190476190476191, |
| "mmlu_eval_accuracy_high_school_macroeconomics": 0.32558139534883723, |
| "mmlu_eval_accuracy_high_school_mathematics": 0.27586206896551724, |
| "mmlu_eval_accuracy_high_school_microeconomics": 0.23076923076923078, |
| "mmlu_eval_accuracy_high_school_physics": 0.35294117647058826, |
| "mmlu_eval_accuracy_high_school_psychology": 0.75, |
| "mmlu_eval_accuracy_high_school_statistics": 0.391304347826087, |
| "mmlu_eval_accuracy_high_school_us_history": 0.6363636363636364, |
| "mmlu_eval_accuracy_high_school_world_history": 0.5, |
| "mmlu_eval_accuracy_human_aging": 0.6521739130434783, |
| "mmlu_eval_accuracy_human_sexuality": 0.5, |
| "mmlu_eval_accuracy_international_law": 0.8461538461538461, |
| "mmlu_eval_accuracy_jurisprudence": 0.36363636363636365, |
| "mmlu_eval_accuracy_logical_fallacies": 0.6111111111111112, |
| "mmlu_eval_accuracy_machine_learning": 0.18181818181818182, |
| "mmlu_eval_accuracy_management": 0.6363636363636364, |
| "mmlu_eval_accuracy_marketing": 0.72, |
| "mmlu_eval_accuracy_medical_genetics": 0.7272727272727273, |
| "mmlu_eval_accuracy_miscellaneous": 0.6511627906976745, |
| "mmlu_eval_accuracy_moral_disputes": 0.4473684210526316, |
| "mmlu_eval_accuracy_moral_scenarios": 0.25, |
| "mmlu_eval_accuracy_nutrition": 0.5757575757575758, |
| "mmlu_eval_accuracy_philosophy": 0.5, |
| "mmlu_eval_accuracy_prehistory": 0.45714285714285713, |
| "mmlu_eval_accuracy_professional_accounting": 0.25806451612903225, |
| "mmlu_eval_accuracy_professional_law": 0.3176470588235294, |
| "mmlu_eval_accuracy_professional_medicine": 0.41935483870967744, |
| "mmlu_eval_accuracy_professional_psychology": 0.391304347826087, |
| "mmlu_eval_accuracy_public_relations": 0.5833333333333334, |
| "mmlu_eval_accuracy_security_studies": 0.48148148148148145, |
| "mmlu_eval_accuracy_sociology": 0.7272727272727273, |
| "mmlu_eval_accuracy_us_foreign_policy": 0.5454545454545454, |
| "mmlu_eval_accuracy_virology": 0.5555555555555556, |
| "mmlu_eval_accuracy_world_religions": 0.7368421052631579, |
| "mmlu_loss": 0.8323084239996289, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 0.0002, |
| "loss": 0.5261, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 0.0002, |
| "loss": 0.6614, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 0.0002, |
| "loss": 0.6213, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 0.0002, |
| "loss": 0.7206, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 0.0002, |
| "loss": 0.6944, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 0.0002, |
| "loss": 0.675, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 0.0002, |
| "loss": 0.5907, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 0.0002, |
| "loss": 0.5778, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 0.0002, |
| "loss": 0.5583, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 0.0002, |
| "loss": 0.7171, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 0.0002, |
| "loss": 0.728, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 0.0002, |
| "loss": 0.6037, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 0.0002, |
| "loss": 0.6598, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 0.0002, |
| "loss": 0.6913, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 0.0002, |
| "loss": 0.6092, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 0.0002, |
| "loss": 0.589, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 0.0002, |
| "loss": 0.5608, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 0.0002, |
| "loss": 0.6251, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 0.0002, |
| "loss": 0.6527, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 0.0002, |
| "loss": 0.6518, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.19, |
| "eval_loss": 0.618711531162262, |
| "eval_runtime": 92.7537, |
| "eval_samples_per_second": 10.781, |
| "eval_steps_per_second": 5.391, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.19, |
| "mmlu_eval_accuracy": 0.4660645174643213, |
| "mmlu_eval_accuracy_abstract_algebra": 0.2727272727272727, |
| "mmlu_eval_accuracy_anatomy": 0.5714285714285714, |
| "mmlu_eval_accuracy_astronomy": 0.4375, |
| "mmlu_eval_accuracy_business_ethics": 0.5454545454545454, |
| "mmlu_eval_accuracy_clinical_knowledge": 0.4827586206896552, |
| "mmlu_eval_accuracy_college_biology": 0.4375, |
| "mmlu_eval_accuracy_college_chemistry": 0.125, |
| "mmlu_eval_accuracy_college_computer_science": 0.36363636363636365, |
| "mmlu_eval_accuracy_college_mathematics": 0.36363636363636365, |
| "mmlu_eval_accuracy_college_medicine": 0.4090909090909091, |
| "mmlu_eval_accuracy_college_physics": 0.45454545454545453, |
| "mmlu_eval_accuracy_computer_security": 0.36363636363636365, |
| "mmlu_eval_accuracy_conceptual_physics": 0.46153846153846156, |
| "mmlu_eval_accuracy_econometrics": 0.16666666666666666, |
| "mmlu_eval_accuracy_electrical_engineering": 0.375, |
| "mmlu_eval_accuracy_elementary_mathematics": 0.3170731707317073, |
| "mmlu_eval_accuracy_formal_logic": 0.21428571428571427, |
| "mmlu_eval_accuracy_global_facts": 0.4, |
| "mmlu_eval_accuracy_high_school_biology": 0.46875, |
| "mmlu_eval_accuracy_high_school_chemistry": 0.36363636363636365, |
| "mmlu_eval_accuracy_high_school_computer_science": 0.5555555555555556, |
| "mmlu_eval_accuracy_high_school_european_history": 0.5555555555555556, |
| "mmlu_eval_accuracy_high_school_geography": 0.7727272727272727, |
| "mmlu_eval_accuracy_high_school_government_and_politics": 0.6190476190476191, |
| "mmlu_eval_accuracy_high_school_macroeconomics": 0.37209302325581395, |
| "mmlu_eval_accuracy_high_school_mathematics": 0.3103448275862069, |
| "mmlu_eval_accuracy_high_school_microeconomics": 0.4230769230769231, |
| "mmlu_eval_accuracy_high_school_physics": 0.23529411764705882, |
| "mmlu_eval_accuracy_high_school_psychology": 0.7333333333333333, |
| "mmlu_eval_accuracy_high_school_statistics": 0.34782608695652173, |
| "mmlu_eval_accuracy_high_school_us_history": 0.6363636363636364, |
| "mmlu_eval_accuracy_high_school_world_history": 0.5769230769230769, |
| "mmlu_eval_accuracy_human_aging": 0.6521739130434783, |
| "mmlu_eval_accuracy_human_sexuality": 0.4166666666666667, |
| "mmlu_eval_accuracy_international_law": 0.7692307692307693, |
| "mmlu_eval_accuracy_jurisprudence": 0.2727272727272727, |
| "mmlu_eval_accuracy_logical_fallacies": 0.5, |
| "mmlu_eval_accuracy_machine_learning": 0.18181818181818182, |
| "mmlu_eval_accuracy_management": 0.6363636363636364, |
| "mmlu_eval_accuracy_marketing": 0.68, |
| "mmlu_eval_accuracy_medical_genetics": 0.7272727272727273, |
| "mmlu_eval_accuracy_miscellaneous": 0.6744186046511628, |
| "mmlu_eval_accuracy_moral_disputes": 0.47368421052631576, |
| "mmlu_eval_accuracy_moral_scenarios": 0.26, |
| "mmlu_eval_accuracy_nutrition": 0.5454545454545454, |
| "mmlu_eval_accuracy_philosophy": 0.5, |
| "mmlu_eval_accuracy_prehistory": 0.4, |
| "mmlu_eval_accuracy_professional_accounting": 0.2903225806451613, |
| "mmlu_eval_accuracy_professional_law": 0.32941176470588235, |
| "mmlu_eval_accuracy_professional_medicine": 0.45161290322580644, |
| "mmlu_eval_accuracy_professional_psychology": 0.42028985507246375, |
| "mmlu_eval_accuracy_public_relations": 0.6666666666666666, |
| "mmlu_eval_accuracy_security_studies": 0.5185185185185185, |
| "mmlu_eval_accuracy_sociology": 0.6818181818181818, |
| "mmlu_eval_accuracy_us_foreign_policy": 0.5454545454545454, |
| "mmlu_eval_accuracy_virology": 0.5555555555555556, |
| "mmlu_eval_accuracy_world_religions": 0.6842105263157895, |
| "mmlu_loss": 0.8410176051775718, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 0.0002, |
| "loss": 0.6876, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 0.0002, |
| "loss": 0.7174, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 0.0002, |
| "loss": 0.6002, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 0.0002, |
| "loss": 0.7426, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 0.0002, |
| "loss": 0.5213, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 0.0002, |
| "loss": 0.574, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 0.0002, |
| "loss": 0.6669, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 0.0002, |
| "loss": 0.5847, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 0.0002, |
| "loss": 0.7233, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 0.0002, |
| "loss": 0.6199, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 0.0002, |
| "loss": 0.5977, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 0.0002, |
| "loss": 0.6853, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 0.0002, |
| "loss": 0.5777, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 0.0002, |
| "loss": 0.5943, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 0.0002, |
| "loss": 0.6323, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 0.0002, |
| "loss": 0.6367, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 0.0002, |
| "loss": 0.6367, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 0.0002, |
| "loss": 0.6197, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.0002, |
| "loss": 0.5639, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.0002, |
| "loss": 0.6281, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.26, |
| "eval_loss": 0.6189825534820557, |
| "eval_runtime": 92.99, |
| "eval_samples_per_second": 10.754, |
| "eval_steps_per_second": 5.377, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.26, |
| "mmlu_eval_accuracy": 0.48167538061853127, |
| "mmlu_eval_accuracy_abstract_algebra": 0.36363636363636365, |
| "mmlu_eval_accuracy_anatomy": 0.6428571428571429, |
| "mmlu_eval_accuracy_astronomy": 0.4375, |
| "mmlu_eval_accuracy_business_ethics": 0.45454545454545453, |
| "mmlu_eval_accuracy_clinical_knowledge": 0.4827586206896552, |
| "mmlu_eval_accuracy_college_biology": 0.4375, |
| "mmlu_eval_accuracy_college_chemistry": 0.125, |
| "mmlu_eval_accuracy_college_computer_science": 0.36363636363636365, |
| "mmlu_eval_accuracy_college_mathematics": 0.2727272727272727, |
| "mmlu_eval_accuracy_college_medicine": 0.4090909090909091, |
| "mmlu_eval_accuracy_college_physics": 0.45454545454545453, |
| "mmlu_eval_accuracy_computer_security": 0.6363636363636364, |
| "mmlu_eval_accuracy_conceptual_physics": 0.46153846153846156, |
| "mmlu_eval_accuracy_econometrics": 0.08333333333333333, |
| "mmlu_eval_accuracy_electrical_engineering": 0.375, |
| "mmlu_eval_accuracy_elementary_mathematics": 0.36585365853658536, |
| "mmlu_eval_accuracy_formal_logic": 0.2857142857142857, |
| "mmlu_eval_accuracy_global_facts": 0.5, |
| "mmlu_eval_accuracy_high_school_biology": 0.40625, |
| "mmlu_eval_accuracy_high_school_chemistry": 0.36363636363636365, |
| "mmlu_eval_accuracy_high_school_computer_science": 0.4444444444444444, |
| "mmlu_eval_accuracy_high_school_european_history": 0.3888888888888889, |
| "mmlu_eval_accuracy_high_school_geography": 0.7272727272727273, |
| "mmlu_eval_accuracy_high_school_government_and_politics": 0.6666666666666666, |
| "mmlu_eval_accuracy_high_school_macroeconomics": 0.3953488372093023, |
| "mmlu_eval_accuracy_high_school_mathematics": 0.2413793103448276, |
| "mmlu_eval_accuracy_high_school_microeconomics": 0.4230769230769231, |
| "mmlu_eval_accuracy_high_school_physics": 0.35294117647058826, |
| "mmlu_eval_accuracy_high_school_psychology": 0.7666666666666667, |
| "mmlu_eval_accuracy_high_school_statistics": 0.391304347826087, |
| "mmlu_eval_accuracy_high_school_us_history": 0.6818181818181818, |
| "mmlu_eval_accuracy_high_school_world_history": 0.5769230769230769, |
| "mmlu_eval_accuracy_human_aging": 0.6521739130434783, |
| "mmlu_eval_accuracy_human_sexuality": 0.5, |
| "mmlu_eval_accuracy_international_law": 0.7692307692307693, |
| "mmlu_eval_accuracy_jurisprudence": 0.36363636363636365, |
| "mmlu_eval_accuracy_logical_fallacies": 0.6111111111111112, |
| "mmlu_eval_accuracy_machine_learning": 0.36363636363636365, |
| "mmlu_eval_accuracy_management": 0.7272727272727273, |
| "mmlu_eval_accuracy_marketing": 0.72, |
| "mmlu_eval_accuracy_medical_genetics": 0.7272727272727273, |
| "mmlu_eval_accuracy_miscellaneous": 0.686046511627907, |
| "mmlu_eval_accuracy_moral_disputes": 0.5, |
| "mmlu_eval_accuracy_moral_scenarios": 0.27, |
| "mmlu_eval_accuracy_nutrition": 0.5454545454545454, |
| "mmlu_eval_accuracy_philosophy": 0.47058823529411764, |
| "mmlu_eval_accuracy_prehistory": 0.5142857142857142, |
| "mmlu_eval_accuracy_professional_accounting": 0.25806451612903225, |
| "mmlu_eval_accuracy_professional_law": 0.3352941176470588, |
| "mmlu_eval_accuracy_professional_medicine": 0.4838709677419355, |
| "mmlu_eval_accuracy_professional_psychology": 0.4057971014492754, |
| "mmlu_eval_accuracy_public_relations": 0.5, |
| "mmlu_eval_accuracy_security_studies": 0.5185185185185185, |
| "mmlu_eval_accuracy_sociology": 0.6818181818181818, |
| "mmlu_eval_accuracy_us_foreign_policy": 0.6363636363636364, |
| "mmlu_eval_accuracy_virology": 0.5, |
| "mmlu_eval_accuracy_world_religions": 0.7368421052631579, |
| "mmlu_loss": 0.9606302572954105, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 0.0002, |
| "loss": 0.637, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 0.0002, |
| "loss": 0.6902, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 0.0002, |
| "loss": 0.5872, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 0.0002, |
| "loss": 0.6115, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 0.0002, |
| "loss": 0.6499, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.0002, |
| "loss": 0.5977, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.0002, |
| "loss": 0.5873, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.0002, |
| "loss": 0.6776, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 0.0002, |
| "loss": 0.6804, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 0.0002, |
| "loss": 0.5914, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 0.0002, |
| "loss": 0.6313, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.0002, |
| "loss": 0.5869, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.0002, |
| "loss": 0.519, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 0.0002, |
| "loss": 0.4877, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 0.0002, |
| "loss": 0.5931, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 0.0002, |
| "loss": 0.6614, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 0.0002, |
| "loss": 0.6287, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 0.0002, |
| "loss": 0.5139, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 0.0002, |
| "loss": 0.6392, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 0.0002, |
| "loss": 0.7001, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.32, |
| "eval_loss": 0.5796898007392883, |
| "eval_runtime": 92.7581, |
| "eval_samples_per_second": 10.781, |
| "eval_steps_per_second": 5.39, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.32, |
| "mmlu_eval_accuracy": 0.47460361123181055, |
| "mmlu_eval_accuracy_abstract_algebra": 0.36363636363636365, |
| "mmlu_eval_accuracy_anatomy": 0.7142857142857143, |
| "mmlu_eval_accuracy_astronomy": 0.375, |
| "mmlu_eval_accuracy_business_ethics": 0.5454545454545454, |
| "mmlu_eval_accuracy_clinical_knowledge": 0.4482758620689655, |
| "mmlu_eval_accuracy_college_biology": 0.4375, |
| "mmlu_eval_accuracy_college_chemistry": 0.25, |
| "mmlu_eval_accuracy_college_computer_science": 0.36363636363636365, |
| "mmlu_eval_accuracy_college_mathematics": 0.36363636363636365, |
| "mmlu_eval_accuracy_college_medicine": 0.36363636363636365, |
| "mmlu_eval_accuracy_college_physics": 0.5454545454545454, |
| "mmlu_eval_accuracy_computer_security": 0.36363636363636365, |
| "mmlu_eval_accuracy_conceptual_physics": 0.46153846153846156, |
| "mmlu_eval_accuracy_econometrics": 0.25, |
| "mmlu_eval_accuracy_electrical_engineering": 0.3125, |
| "mmlu_eval_accuracy_elementary_mathematics": 0.36585365853658536, |
| "mmlu_eval_accuracy_formal_logic": 0.21428571428571427, |
| "mmlu_eval_accuracy_global_facts": 0.5, |
| "mmlu_eval_accuracy_high_school_biology": 0.53125, |
| "mmlu_eval_accuracy_high_school_chemistry": 0.4090909090909091, |
| "mmlu_eval_accuracy_high_school_computer_science": 0.4444444444444444, |
| "mmlu_eval_accuracy_high_school_european_history": 0.5, |
| "mmlu_eval_accuracy_high_school_geography": 0.7727272727272727, |
| "mmlu_eval_accuracy_high_school_government_and_politics": 0.6666666666666666, |
| "mmlu_eval_accuracy_high_school_macroeconomics": 0.37209302325581395, |
| "mmlu_eval_accuracy_high_school_mathematics": 0.1724137931034483, |
| "mmlu_eval_accuracy_high_school_microeconomics": 0.34615384615384615, |
| "mmlu_eval_accuracy_high_school_physics": 0.35294117647058826, |
| "mmlu_eval_accuracy_high_school_psychology": 0.75, |
| "mmlu_eval_accuracy_high_school_statistics": 0.34782608695652173, |
| "mmlu_eval_accuracy_high_school_us_history": 0.5454545454545454, |
| "mmlu_eval_accuracy_high_school_world_history": 0.46153846153846156, |
| "mmlu_eval_accuracy_human_aging": 0.6956521739130435, |
| "mmlu_eval_accuracy_human_sexuality": 0.3333333333333333, |
| "mmlu_eval_accuracy_international_law": 0.7692307692307693, |
| "mmlu_eval_accuracy_jurisprudence": 0.2727272727272727, |
| "mmlu_eval_accuracy_logical_fallacies": 0.6111111111111112, |
| "mmlu_eval_accuracy_machine_learning": 0.2727272727272727, |
| "mmlu_eval_accuracy_management": 0.7272727272727273, |
| "mmlu_eval_accuracy_marketing": 0.72, |
| "mmlu_eval_accuracy_medical_genetics": 0.6363636363636364, |
| "mmlu_eval_accuracy_miscellaneous": 0.6627906976744186, |
| "mmlu_eval_accuracy_moral_disputes": 0.4473684210526316, |
| "mmlu_eval_accuracy_moral_scenarios": 0.29, |
| "mmlu_eval_accuracy_nutrition": 0.5757575757575758, |
| "mmlu_eval_accuracy_philosophy": 0.5588235294117647, |
| "mmlu_eval_accuracy_prehistory": 0.42857142857142855, |
| "mmlu_eval_accuracy_professional_accounting": 0.3225806451612903, |
| "mmlu_eval_accuracy_professional_law": 0.3176470588235294, |
| "mmlu_eval_accuracy_professional_medicine": 0.41935483870967744, |
| "mmlu_eval_accuracy_professional_psychology": 0.4927536231884058, |
| "mmlu_eval_accuracy_public_relations": 0.5833333333333334, |
| "mmlu_eval_accuracy_security_studies": 0.48148148148148145, |
| "mmlu_eval_accuracy_sociology": 0.6818181818181818, |
| "mmlu_eval_accuracy_us_foreign_policy": 0.5454545454545454, |
| "mmlu_eval_accuracy_virology": 0.6111111111111112, |
| "mmlu_eval_accuracy_world_religions": 0.6842105263157895, |
| "mmlu_loss": 0.8773848874586836, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 0.0002, |
| "loss": 0.6671, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 0.0002, |
| "loss": 0.5942, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 0.0002, |
| "loss": 0.6236, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 0.0002, |
| "loss": 0.6162, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 0.0002, |
| "loss": 0.734, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 0.0002, |
| "loss": 0.6108, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 0.0002, |
| "loss": 0.6669, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 0.0002, |
| "loss": 0.6991, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 0.0002, |
| "loss": 0.6696, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.0002, |
| "loss": 0.6511, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.0002, |
| "loss": 0.6007, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 0.0002, |
| "loss": 0.5543, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 0.0002, |
| "loss": 0.6399, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 0.0002, |
| "loss": 0.569, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 0.0002, |
| "loss": 0.6436, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 0.0002, |
| "loss": 0.4886, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 0.0002, |
| "loss": 0.5431, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 0.0002, |
| "loss": 0.5516, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 0.0002, |
| "loss": 0.5041, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 0.0002, |
| "loss": 0.6737, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.39, |
| "eval_loss": 0.5647590160369873, |
| "eval_runtime": 92.7185, |
| "eval_samples_per_second": 10.785, |
| "eval_steps_per_second": 5.393, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.39, |
| "mmlu_eval_accuracy": 0.47729744414466774, |
| "mmlu_eval_accuracy_abstract_algebra": 0.36363636363636365, |
| "mmlu_eval_accuracy_anatomy": 0.6428571428571429, |
| "mmlu_eval_accuracy_astronomy": 0.5, |
| "mmlu_eval_accuracy_business_ethics": 0.36363636363636365, |
| "mmlu_eval_accuracy_clinical_knowledge": 0.5172413793103449, |
| "mmlu_eval_accuracy_college_biology": 0.4375, |
| "mmlu_eval_accuracy_college_chemistry": 0.125, |
| "mmlu_eval_accuracy_college_computer_science": 0.2727272727272727, |
| "mmlu_eval_accuracy_college_mathematics": 0.36363636363636365, |
| "mmlu_eval_accuracy_college_medicine": 0.45454545454545453, |
| "mmlu_eval_accuracy_college_physics": 0.45454545454545453, |
| "mmlu_eval_accuracy_computer_security": 0.45454545454545453, |
| "mmlu_eval_accuracy_conceptual_physics": 0.5384615384615384, |
| "mmlu_eval_accuracy_econometrics": 0.08333333333333333, |
| "mmlu_eval_accuracy_electrical_engineering": 0.4375, |
| "mmlu_eval_accuracy_elementary_mathematics": 0.34146341463414637, |
| "mmlu_eval_accuracy_formal_logic": 0.21428571428571427, |
| "mmlu_eval_accuracy_global_facts": 0.5, |
| "mmlu_eval_accuracy_high_school_biology": 0.53125, |
| "mmlu_eval_accuracy_high_school_chemistry": 0.3181818181818182, |
| "mmlu_eval_accuracy_high_school_computer_science": 0.4444444444444444, |
| "mmlu_eval_accuracy_high_school_european_history": 0.5555555555555556, |
| "mmlu_eval_accuracy_high_school_geography": 0.7727272727272727, |
| "mmlu_eval_accuracy_high_school_government_and_politics": 0.5714285714285714, |
| "mmlu_eval_accuracy_high_school_macroeconomics": 0.46511627906976744, |
| "mmlu_eval_accuracy_high_school_mathematics": 0.13793103448275862, |
| "mmlu_eval_accuracy_high_school_microeconomics": 0.46153846153846156, |
| "mmlu_eval_accuracy_high_school_physics": 0.29411764705882354, |
| "mmlu_eval_accuracy_high_school_psychology": 0.8, |
| "mmlu_eval_accuracy_high_school_statistics": 0.30434782608695654, |
| "mmlu_eval_accuracy_high_school_us_history": 0.5454545454545454, |
| "mmlu_eval_accuracy_high_school_world_history": 0.5384615384615384, |
| "mmlu_eval_accuracy_human_aging": 0.6956521739130435, |
| "mmlu_eval_accuracy_human_sexuality": 0.4166666666666667, |
| "mmlu_eval_accuracy_international_law": 0.7692307692307693, |
| "mmlu_eval_accuracy_jurisprudence": 0.2727272727272727, |
| "mmlu_eval_accuracy_logical_fallacies": 0.6111111111111112, |
| "mmlu_eval_accuracy_machine_learning": 0.2727272727272727, |
| "mmlu_eval_accuracy_management": 0.8181818181818182, |
| "mmlu_eval_accuracy_marketing": 0.76, |
| "mmlu_eval_accuracy_medical_genetics": 0.7272727272727273, |
| "mmlu_eval_accuracy_miscellaneous": 0.6627906976744186, |
| "mmlu_eval_accuracy_moral_disputes": 0.5, |
| "mmlu_eval_accuracy_moral_scenarios": 0.27, |
| "mmlu_eval_accuracy_nutrition": 0.6060606060606061, |
| "mmlu_eval_accuracy_philosophy": 0.47058823529411764, |
| "mmlu_eval_accuracy_prehistory": 0.5428571428571428, |
| "mmlu_eval_accuracy_professional_accounting": 0.25806451612903225, |
| "mmlu_eval_accuracy_professional_law": 0.3352941176470588, |
| "mmlu_eval_accuracy_professional_medicine": 0.45161290322580644, |
| "mmlu_eval_accuracy_professional_psychology": 0.34782608695652173, |
| "mmlu_eval_accuracy_public_relations": 0.5, |
| "mmlu_eval_accuracy_security_studies": 0.5185185185185185, |
| "mmlu_eval_accuracy_sociology": 0.6818181818181818, |
| "mmlu_eval_accuracy_us_foreign_policy": 0.7272727272727273, |
| "mmlu_eval_accuracy_virology": 0.5, |
| "mmlu_eval_accuracy_world_religions": 0.6842105263157895, |
| "mmlu_loss": 0.9123951537490825, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 0.0002, |
| "loss": 0.4926, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 0.0002, |
| "loss": 0.5814, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 0.0002, |
| "loss": 0.4985, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 0.0002, |
| "loss": 0.5966, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 0.0002, |
| "loss": 0.6493, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 0.0002, |
| "loss": 0.5811, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 0.0002, |
| "loss": 0.563, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 0.0002, |
| "loss": 0.6206, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 0.0002, |
| "loss": 0.5657, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 0.0002, |
| "loss": 0.6061, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 0.0002, |
| "loss": 0.5776, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 0.0002, |
| "loss": 0.6702, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 0.0002, |
| "loss": 0.5688, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 0.0002, |
| "loss": 0.5237, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 0.0002, |
| "loss": 0.5883, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 0.0002, |
| "loss": 0.5206, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 0.0002, |
| "loss": 0.6948, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 0.0002, |
| "loss": 0.5656, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 0.0002, |
| "loss": 0.6026, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 0.0002, |
| "loss": 0.6319, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.45, |
| "eval_loss": 0.5552607774734497, |
| "eval_runtime": 92.7391, |
| "eval_samples_per_second": 10.783, |
| "eval_steps_per_second": 5.391, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.45, |
| "mmlu_eval_accuracy": 0.4812787579159337, |
| "mmlu_eval_accuracy_abstract_algebra": 0.36363636363636365, |
| "mmlu_eval_accuracy_anatomy": 0.6428571428571429, |
| "mmlu_eval_accuracy_astronomy": 0.375, |
| "mmlu_eval_accuracy_business_ethics": 0.45454545454545453, |
| "mmlu_eval_accuracy_clinical_knowledge": 0.41379310344827586, |
| "mmlu_eval_accuracy_college_biology": 0.5, |
| "mmlu_eval_accuracy_college_chemistry": 0.125, |
| "mmlu_eval_accuracy_college_computer_science": 0.45454545454545453, |
| "mmlu_eval_accuracy_college_mathematics": 0.2727272727272727, |
| "mmlu_eval_accuracy_college_medicine": 0.45454545454545453, |
| "mmlu_eval_accuracy_college_physics": 0.45454545454545453, |
| "mmlu_eval_accuracy_computer_security": 0.7272727272727273, |
| "mmlu_eval_accuracy_conceptual_physics": 0.5, |
| "mmlu_eval_accuracy_econometrics": 0.16666666666666666, |
| "mmlu_eval_accuracy_electrical_engineering": 0.375, |
| "mmlu_eval_accuracy_elementary_mathematics": 0.34146341463414637, |
| "mmlu_eval_accuracy_formal_logic": 0.2857142857142857, |
| "mmlu_eval_accuracy_global_facts": 0.5, |
| "mmlu_eval_accuracy_high_school_biology": 0.5, |
| "mmlu_eval_accuracy_high_school_chemistry": 0.36363636363636365, |
| "mmlu_eval_accuracy_high_school_computer_science": 0.3333333333333333, |
| "mmlu_eval_accuracy_high_school_european_history": 0.5555555555555556, |
| "mmlu_eval_accuracy_high_school_geography": 0.8181818181818182, |
| "mmlu_eval_accuracy_high_school_government_and_politics": 0.5238095238095238, |
| "mmlu_eval_accuracy_high_school_macroeconomics": 0.37209302325581395, |
| "mmlu_eval_accuracy_high_school_mathematics": 0.1724137931034483, |
| "mmlu_eval_accuracy_high_school_microeconomics": 0.5384615384615384, |
| "mmlu_eval_accuracy_high_school_physics": 0.29411764705882354, |
| "mmlu_eval_accuracy_high_school_psychology": 0.7833333333333333, |
| "mmlu_eval_accuracy_high_school_statistics": 0.391304347826087, |
| "mmlu_eval_accuracy_high_school_us_history": 0.5909090909090909, |
| "mmlu_eval_accuracy_high_school_world_history": 0.6153846153846154, |
| "mmlu_eval_accuracy_human_aging": 0.6956521739130435, |
| "mmlu_eval_accuracy_human_sexuality": 0.4166666666666667, |
| "mmlu_eval_accuracy_international_law": 0.7692307692307693, |
| "mmlu_eval_accuracy_jurisprudence": 0.2727272727272727, |
| "mmlu_eval_accuracy_logical_fallacies": 0.5555555555555556, |
| "mmlu_eval_accuracy_machine_learning": 0.2727272727272727, |
| "mmlu_eval_accuracy_management": 0.8181818181818182, |
| "mmlu_eval_accuracy_marketing": 0.76, |
| "mmlu_eval_accuracy_medical_genetics": 0.7272727272727273, |
| "mmlu_eval_accuracy_miscellaneous": 0.6744186046511628, |
| "mmlu_eval_accuracy_moral_disputes": 0.5263157894736842, |
| "mmlu_eval_accuracy_moral_scenarios": 0.25, |
| "mmlu_eval_accuracy_nutrition": 0.5757575757575758, |
| "mmlu_eval_accuracy_philosophy": 0.47058823529411764, |
| "mmlu_eval_accuracy_prehistory": 0.4857142857142857, |
| "mmlu_eval_accuracy_professional_accounting": 0.2903225806451613, |
| "mmlu_eval_accuracy_professional_law": 0.3, |
| "mmlu_eval_accuracy_professional_medicine": 0.4838709677419355, |
| "mmlu_eval_accuracy_professional_psychology": 0.391304347826087, |
| "mmlu_eval_accuracy_public_relations": 0.5, |
| "mmlu_eval_accuracy_security_studies": 0.4444444444444444, |
| "mmlu_eval_accuracy_sociology": 0.7272727272727273, |
| "mmlu_eval_accuracy_us_foreign_policy": 0.6363636363636364, |
| "mmlu_eval_accuracy_virology": 0.4444444444444444, |
| "mmlu_eval_accuracy_world_religions": 0.6842105263157895, |
| "mmlu_loss": 0.8453598066862896, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 0.0002, |
| "loss": 0.4931, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 0.0002, |
| "loss": 0.4544, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 0.0002, |
| "loss": 0.563, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 0.0002, |
| "loss": 0.4629, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 0.0002, |
| "loss": 0.4903, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 0.0002, |
| "loss": 0.5581, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.0002, |
| "loss": 0.6571, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.0002, |
| "loss": 0.5562, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.0002, |
| "loss": 0.4745, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 0.0002, |
| "loss": 0.6383, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 0.0002, |
| "loss": 0.6477, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 0.0002, |
| "loss": 0.5758, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 0.0002, |
| "loss": 0.5195, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 0.0002, |
| "loss": 0.5691, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 0.0002, |
| "loss": 0.5451, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 0.0002, |
| "loss": 0.5753, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 0.0002, |
| "loss": 0.5859, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 0.0002, |
| "loss": 0.6325, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 0.0002, |
| "loss": 0.7023, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 0.0002, |
| "loss": 0.5009, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.52, |
| "eval_loss": 0.5365247130393982, |
| "eval_runtime": 92.8248, |
| "eval_samples_per_second": 10.773, |
| "eval_steps_per_second": 5.386, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.52, |
| "mmlu_eval_accuracy": 0.4625997452457202, |
| "mmlu_eval_accuracy_abstract_algebra": 0.36363636363636365, |
| "mmlu_eval_accuracy_anatomy": 0.6428571428571429, |
| "mmlu_eval_accuracy_astronomy": 0.4375, |
| "mmlu_eval_accuracy_business_ethics": 0.45454545454545453, |
| "mmlu_eval_accuracy_clinical_knowledge": 0.4827586206896552, |
| "mmlu_eval_accuracy_college_biology": 0.4375, |
| "mmlu_eval_accuracy_college_chemistry": 0.0, |
| "mmlu_eval_accuracy_college_computer_science": 0.2727272727272727, |
| "mmlu_eval_accuracy_college_mathematics": 0.2727272727272727, |
| "mmlu_eval_accuracy_college_medicine": 0.4090909090909091, |
| "mmlu_eval_accuracy_college_physics": 0.45454545454545453, |
| "mmlu_eval_accuracy_computer_security": 0.5454545454545454, |
| "mmlu_eval_accuracy_conceptual_physics": 0.46153846153846156, |
| "mmlu_eval_accuracy_econometrics": 0.08333333333333333, |
| "mmlu_eval_accuracy_electrical_engineering": 0.3125, |
| "mmlu_eval_accuracy_elementary_mathematics": 0.34146341463414637, |
| "mmlu_eval_accuracy_formal_logic": 0.2857142857142857, |
| "mmlu_eval_accuracy_global_facts": 0.5, |
| "mmlu_eval_accuracy_high_school_biology": 0.5625, |
| "mmlu_eval_accuracy_high_school_chemistry": 0.2727272727272727, |
| "mmlu_eval_accuracy_high_school_computer_science": 0.6666666666666666, |
| "mmlu_eval_accuracy_high_school_european_history": 0.6666666666666666, |
| "mmlu_eval_accuracy_high_school_geography": 0.8181818181818182, |
| "mmlu_eval_accuracy_high_school_government_and_politics": 0.5238095238095238, |
| "mmlu_eval_accuracy_high_school_macroeconomics": 0.4186046511627907, |
| "mmlu_eval_accuracy_high_school_mathematics": 0.13793103448275862, |
| "mmlu_eval_accuracy_high_school_microeconomics": 0.38461538461538464, |
| "mmlu_eval_accuracy_high_school_physics": 0.29411764705882354, |
| "mmlu_eval_accuracy_high_school_psychology": 0.7333333333333333, |
| "mmlu_eval_accuracy_high_school_statistics": 0.30434782608695654, |
| "mmlu_eval_accuracy_high_school_us_history": 0.5454545454545454, |
| "mmlu_eval_accuracy_high_school_world_history": 0.4230769230769231, |
| "mmlu_eval_accuracy_human_aging": 0.6521739130434783, |
| "mmlu_eval_accuracy_human_sexuality": 0.25, |
| "mmlu_eval_accuracy_international_law": 0.8461538461538461, |
| "mmlu_eval_accuracy_jurisprudence": 0.2727272727272727, |
| "mmlu_eval_accuracy_logical_fallacies": 0.5555555555555556, |
| "mmlu_eval_accuracy_machine_learning": 0.18181818181818182, |
| "mmlu_eval_accuracy_management": 0.6363636363636364, |
| "mmlu_eval_accuracy_marketing": 0.76, |
| "mmlu_eval_accuracy_medical_genetics": 0.7272727272727273, |
| "mmlu_eval_accuracy_miscellaneous": 0.6162790697674418, |
| "mmlu_eval_accuracy_moral_disputes": 0.5263157894736842, |
| "mmlu_eval_accuracy_moral_scenarios": 0.24, |
| "mmlu_eval_accuracy_nutrition": 0.5454545454545454, |
| "mmlu_eval_accuracy_philosophy": 0.4411764705882353, |
| "mmlu_eval_accuracy_prehistory": 0.4857142857142857, |
| "mmlu_eval_accuracy_professional_accounting": 0.2903225806451613, |
| "mmlu_eval_accuracy_professional_law": 0.34705882352941175, |
| "mmlu_eval_accuracy_professional_medicine": 0.45161290322580644, |
| "mmlu_eval_accuracy_professional_psychology": 0.463768115942029, |
| "mmlu_eval_accuracy_public_relations": 0.6666666666666666, |
| "mmlu_eval_accuracy_security_studies": 0.5185185185185185, |
| "mmlu_eval_accuracy_sociology": 0.5454545454545454, |
| "mmlu_eval_accuracy_us_foreign_policy": 0.5454545454545454, |
| "mmlu_eval_accuracy_virology": 0.5555555555555556, |
| "mmlu_eval_accuracy_world_religions": 0.7368421052631579, |
| "mmlu_loss": 0.8016339474243377, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 0.0002, |
| "loss": 0.5698, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 0.0002, |
| "loss": 0.5115, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 0.0002, |
| "loss": 0.5908, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 0.0002, |
| "loss": 0.6114, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 0.0002, |
| "loss": 0.5335, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 0.0002, |
| "loss": 0.6275, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 0.0002, |
| "loss": 0.4862, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 0.0002, |
| "loss": 0.6334, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 0.0002, |
| "loss": 0.6494, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 0.0002, |
| "loss": 0.5298, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 0.0002, |
| "loss": 0.5359, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 0.0002, |
| "loss": 0.5049, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 0.0002, |
| "loss": 0.5015, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 0.0002, |
| "loss": 0.6523, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 0.0002, |
| "loss": 0.5047, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 0.0002, |
| "loss": 0.5321, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 0.0002, |
| "loss": 0.5595, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 0.0002, |
| "loss": 0.7094, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 0.0002, |
| "loss": 0.6828, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 0.0002, |
| "loss": 0.5355, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.58, |
| "eval_loss": 0.533126711845398, |
| "eval_runtime": 92.8481, |
| "eval_samples_per_second": 10.77, |
| "eval_steps_per_second": 5.385, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.58, |
| "mmlu_eval_accuracy": 0.4894584764122198, |
| "mmlu_eval_accuracy_abstract_algebra": 0.36363636363636365, |
| "mmlu_eval_accuracy_anatomy": 0.5, |
| "mmlu_eval_accuracy_astronomy": 0.4375, |
| "mmlu_eval_accuracy_business_ethics": 0.5454545454545454, |
| "mmlu_eval_accuracy_clinical_knowledge": 0.3793103448275862, |
| "mmlu_eval_accuracy_college_biology": 0.375, |
| "mmlu_eval_accuracy_college_chemistry": 0.25, |
| "mmlu_eval_accuracy_college_computer_science": 0.36363636363636365, |
| "mmlu_eval_accuracy_college_mathematics": 0.2727272727272727, |
| "mmlu_eval_accuracy_college_medicine": 0.45454545454545453, |
| "mmlu_eval_accuracy_college_physics": 0.45454545454545453, |
| "mmlu_eval_accuracy_computer_security": 0.36363636363636365, |
| "mmlu_eval_accuracy_conceptual_physics": 0.5384615384615384, |
| "mmlu_eval_accuracy_econometrics": 0.16666666666666666, |
| "mmlu_eval_accuracy_electrical_engineering": 0.375, |
| "mmlu_eval_accuracy_elementary_mathematics": 0.36585365853658536, |
| "mmlu_eval_accuracy_formal_logic": 0.21428571428571427, |
| "mmlu_eval_accuracy_global_facts": 0.6, |
| "mmlu_eval_accuracy_high_school_biology": 0.40625, |
| "mmlu_eval_accuracy_high_school_chemistry": 0.36363636363636365, |
| "mmlu_eval_accuracy_high_school_computer_science": 0.6666666666666666, |
| "mmlu_eval_accuracy_high_school_european_history": 0.6111111111111112, |
| "mmlu_eval_accuracy_high_school_geography": 0.8636363636363636, |
| "mmlu_eval_accuracy_high_school_government_and_politics": 0.6666666666666666, |
| "mmlu_eval_accuracy_high_school_macroeconomics": 0.3488372093023256, |
| "mmlu_eval_accuracy_high_school_mathematics": 0.20689655172413793, |
| "mmlu_eval_accuracy_high_school_microeconomics": 0.5, |
| "mmlu_eval_accuracy_high_school_physics": 0.17647058823529413, |
| "mmlu_eval_accuracy_high_school_psychology": 0.7333333333333333, |
| "mmlu_eval_accuracy_high_school_statistics": 0.391304347826087, |
| "mmlu_eval_accuracy_high_school_us_history": 0.5909090909090909, |
| "mmlu_eval_accuracy_high_school_world_history": 0.46153846153846156, |
| "mmlu_eval_accuracy_human_aging": 0.6521739130434783, |
| "mmlu_eval_accuracy_human_sexuality": 0.5833333333333334, |
| "mmlu_eval_accuracy_international_law": 0.8461538461538461, |
| "mmlu_eval_accuracy_jurisprudence": 0.45454545454545453, |
| "mmlu_eval_accuracy_logical_fallacies": 0.5, |
| "mmlu_eval_accuracy_machine_learning": 0.2727272727272727, |
| "mmlu_eval_accuracy_management": 0.8181818181818182, |
| "mmlu_eval_accuracy_marketing": 0.72, |
| "mmlu_eval_accuracy_medical_genetics": 0.7272727272727273, |
| "mmlu_eval_accuracy_miscellaneous": 0.6511627906976745, |
| "mmlu_eval_accuracy_moral_disputes": 0.5263157894736842, |
| "mmlu_eval_accuracy_moral_scenarios": 0.25, |
| "mmlu_eval_accuracy_nutrition": 0.5757575757575758, |
| "mmlu_eval_accuracy_philosophy": 0.4411764705882353, |
| "mmlu_eval_accuracy_prehistory": 0.4857142857142857, |
| "mmlu_eval_accuracy_professional_accounting": 0.3870967741935484, |
| "mmlu_eval_accuracy_professional_law": 0.3411764705882353, |
| "mmlu_eval_accuracy_professional_medicine": 0.41935483870967744, |
| "mmlu_eval_accuracy_professional_psychology": 0.37681159420289856, |
| "mmlu_eval_accuracy_public_relations": 0.6666666666666666, |
| "mmlu_eval_accuracy_security_studies": 0.5925925925925926, |
| "mmlu_eval_accuracy_sociology": 0.7272727272727273, |
| "mmlu_eval_accuracy_us_foreign_policy": 0.6363636363636364, |
| "mmlu_eval_accuracy_virology": 0.5555555555555556, |
| "mmlu_eval_accuracy_world_religions": 0.6842105263157895, |
| "mmlu_loss": 0.9346268427978919, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 0.0002, |
| "loss": 0.4742, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 0.0002, |
| "loss": 0.5088, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 0.0002, |
| "loss": 0.459, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 0.0002, |
| "loss": 0.5195, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 0.0002, |
| "loss": 0.617, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 0.0002, |
| "loss": 0.4523, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 0.0002, |
| "loss": 0.646, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 0.0002, |
| "loss": 0.4423, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 0.0002, |
| "loss": 0.5904, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 0.0002, |
| "loss": 0.5508, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 0.0002, |
| "loss": 0.4987, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 0.0002, |
| "loss": 0.5466, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 0.0002, |
| "loss": 0.5342, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 0.0002, |
| "loss": 0.6875, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 0.0002, |
| "loss": 0.5443, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 0.0002, |
| "loss": 0.5367, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 0.0002, |
| "loss": 0.5529, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 0.0002, |
| "loss": 0.438, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 0.0002, |
| "loss": 0.4422, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 0.0002, |
| "loss": 0.5624, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.65, |
| "eval_loss": 0.5018913745880127, |
| "eval_runtime": 92.7994, |
| "eval_samples_per_second": 10.776, |
| "eval_steps_per_second": 5.388, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.65, |
| "mmlu_eval_accuracy": 0.4873839805836957, |
| "mmlu_eval_accuracy_abstract_algebra": 0.18181818181818182, |
| "mmlu_eval_accuracy_anatomy": 0.5714285714285714, |
| "mmlu_eval_accuracy_astronomy": 0.5625, |
| "mmlu_eval_accuracy_business_ethics": 0.6363636363636364, |
| "mmlu_eval_accuracy_clinical_knowledge": 0.41379310344827586, |
| "mmlu_eval_accuracy_college_biology": 0.375, |
| "mmlu_eval_accuracy_college_chemistry": 0.25, |
| "mmlu_eval_accuracy_college_computer_science": 0.45454545454545453, |
| "mmlu_eval_accuracy_college_mathematics": 0.36363636363636365, |
| "mmlu_eval_accuracy_college_medicine": 0.45454545454545453, |
| "mmlu_eval_accuracy_college_physics": 0.45454545454545453, |
| "mmlu_eval_accuracy_computer_security": 0.5454545454545454, |
| "mmlu_eval_accuracy_conceptual_physics": 0.4230769230769231, |
| "mmlu_eval_accuracy_econometrics": 0.08333333333333333, |
| "mmlu_eval_accuracy_electrical_engineering": 0.3125, |
| "mmlu_eval_accuracy_elementary_mathematics": 0.36585365853658536, |
| "mmlu_eval_accuracy_formal_logic": 0.21428571428571427, |
| "mmlu_eval_accuracy_global_facts": 0.5, |
| "mmlu_eval_accuracy_high_school_biology": 0.4375, |
| "mmlu_eval_accuracy_high_school_chemistry": 0.45454545454545453, |
| "mmlu_eval_accuracy_high_school_computer_science": 0.5555555555555556, |
| "mmlu_eval_accuracy_high_school_european_history": 0.6111111111111112, |
| "mmlu_eval_accuracy_high_school_geography": 0.8181818181818182, |
| "mmlu_eval_accuracy_high_school_government_and_politics": 0.6190476190476191, |
| "mmlu_eval_accuracy_high_school_macroeconomics": 0.3953488372093023, |
| "mmlu_eval_accuracy_high_school_mathematics": 0.1724137931034483, |
| "mmlu_eval_accuracy_high_school_microeconomics": 0.4230769230769231, |
| "mmlu_eval_accuracy_high_school_physics": 0.29411764705882354, |
| "mmlu_eval_accuracy_high_school_psychology": 0.7666666666666667, |
| "mmlu_eval_accuracy_high_school_statistics": 0.391304347826087, |
| "mmlu_eval_accuracy_high_school_us_history": 0.6363636363636364, |
| "mmlu_eval_accuracy_high_school_world_history": 0.5384615384615384, |
| "mmlu_eval_accuracy_human_aging": 0.7391304347826086, |
| "mmlu_eval_accuracy_human_sexuality": 0.4166666666666667, |
| "mmlu_eval_accuracy_international_law": 0.8461538461538461, |
| "mmlu_eval_accuracy_jurisprudence": 0.36363636363636365, |
| "mmlu_eval_accuracy_logical_fallacies": 0.5555555555555556, |
| "mmlu_eval_accuracy_machine_learning": 0.18181818181818182, |
| "mmlu_eval_accuracy_management": 0.8181818181818182, |
| "mmlu_eval_accuracy_marketing": 0.72, |
| "mmlu_eval_accuracy_medical_genetics": 0.7272727272727273, |
| "mmlu_eval_accuracy_miscellaneous": 0.6744186046511628, |
| "mmlu_eval_accuracy_moral_disputes": 0.5789473684210527, |
| "mmlu_eval_accuracy_moral_scenarios": 0.27, |
| "mmlu_eval_accuracy_nutrition": 0.5454545454545454, |
| "mmlu_eval_accuracy_philosophy": 0.4411764705882353, |
| "mmlu_eval_accuracy_prehistory": 0.4857142857142857, |
| "mmlu_eval_accuracy_professional_accounting": 0.3548387096774194, |
| "mmlu_eval_accuracy_professional_law": 0.3588235294117647, |
| "mmlu_eval_accuracy_professional_medicine": 0.4838709677419355, |
| "mmlu_eval_accuracy_professional_psychology": 0.4492753623188406, |
| "mmlu_eval_accuracy_public_relations": 0.5833333333333334, |
| "mmlu_eval_accuracy_security_studies": 0.48148148148148145, |
| "mmlu_eval_accuracy_sociology": 0.5909090909090909, |
| "mmlu_eval_accuracy_us_foreign_policy": 0.5454545454545454, |
| "mmlu_eval_accuracy_virology": 0.5555555555555556, |
| "mmlu_eval_accuracy_world_religions": 0.7368421052631579, |
| "mmlu_loss": 0.8898517729972268, |
| "step": 2000 |
| } |
| ], |
| "max_steps": 5000, |
| "num_train_epochs": 2, |
| "total_flos": 1.634186455273636e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|