| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.9960768928991763, | |
| "global_step": 954, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 9.997799572243123e-06, | |
| "loss": 0.4974, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 9.990195641770761e-06, | |
| "loss": 0.4841, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_loss": 0.4602764844894409, | |
| "eval_runtime": 62.0223, | |
| "eval_samples_per_second": 13.431, | |
| "eval_steps_per_second": 0.855, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 9.9771693033643e-06, | |
| "loss": 0.4748, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 9.958734711603195e-06, | |
| "loss": 0.4678, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "eval_loss": 0.45144811272621155, | |
| "eval_runtime": 61.8568, | |
| "eval_samples_per_second": 13.467, | |
| "eval_steps_per_second": 0.857, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 9.934911897741493e-06, | |
| "loss": 0.4687, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 9.905726747941616e-06, | |
| "loss": 0.4687, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "eval_loss": 0.44708773493766785, | |
| "eval_runtime": 62.0907, | |
| "eval_samples_per_second": 13.416, | |
| "eval_steps_per_second": 0.854, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 9.871210975146135e-06, | |
| "loss": 0.473, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 9.831402084618113e-06, | |
| "loss": 0.4608, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "eval_loss": 0.44395506381988525, | |
| "eval_runtime": 61.9484, | |
| "eval_samples_per_second": 13.447, | |
| "eval_steps_per_second": 0.856, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 9.786343333187412e-06, | |
| "loss": 0.4542, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 9.736083682247287e-06, | |
| "loss": 0.4593, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "eval_loss": 0.43974635004997253, | |
| "eval_runtime": 61.9084, | |
| "eval_samples_per_second": 13.455, | |
| "eval_steps_per_second": 0.856, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 9.680677744552346e-06, | |
| "loss": 0.4681, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 9.620185724875652e-06, | |
| "loss": 0.4488, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "eval_loss": 0.435758113861084, | |
| "eval_runtime": 61.8043, | |
| "eval_samples_per_second": 13.478, | |
| "eval_steps_per_second": 0.858, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 9.55467335458948e-06, | |
| "loss": 0.4478, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 9.484211820240797e-06, | |
| "loss": 0.4494, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "eval_loss": 0.434115469455719, | |
| "eval_runtime": 61.841, | |
| "eval_samples_per_second": 13.47, | |
| "eval_steps_per_second": 0.857, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 9.408877686199078e-06, | |
| "loss": 0.44, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 9.328752811460542e-06, | |
| "loss": 0.4477, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "eval_loss": 0.4319334030151367, | |
| "eval_runtime": 61.9956, | |
| "eval_samples_per_second": 13.436, | |
| "eval_steps_per_second": 0.855, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 9.243924260699133e-06, | |
| "loss": 0.4465, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 9.154484209661002e-06, | |
| "loss": 0.445, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "eval_loss": 0.42997926473617554, | |
| "eval_runtime": 62.0149, | |
| "eval_samples_per_second": 13.432, | |
| "eval_steps_per_second": 0.855, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 9.060529845005184e-06, | |
| "loss": 0.4332, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 8.962163258699397e-06, | |
| "loss": 0.4366, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "eval_loss": 0.4274918735027313, | |
| "eval_runtime": 61.8797, | |
| "eval_samples_per_second": 13.462, | |
| "eval_steps_per_second": 0.857, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 8.859491337085643e-06, | |
| "loss": 0.428, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 8.752625644736204e-06, | |
| "loss": 0.442, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "eval_loss": 0.42491650581359863, | |
| "eval_runtime": 62.1183, | |
| "eval_samples_per_second": 13.41, | |
| "eval_steps_per_second": 0.853, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 8.641682303226197e-06, | |
| "loss": 0.4442, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 8.526781864954453e-06, | |
| "loss": 0.4424, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "eval_loss": 0.4228505492210388, | |
| "eval_runtime": 62.0338, | |
| "eval_samples_per_second": 13.428, | |
| "eval_steps_per_second": 0.854, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 8.40804918214979e-06, | |
| "loss": 0.4301, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 8.28561327120505e-06, | |
| "loss": 0.4427, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "eval_loss": 0.41998758912086487, | |
| "eval_runtime": 61.9344, | |
| "eval_samples_per_second": 13.45, | |
| "eval_steps_per_second": 0.856, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 8.159607172486301e-06, | |
| "loss": 0.4316, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 8.030167805769537e-06, | |
| "loss": 0.4372, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "eval_loss": 0.4175536036491394, | |
| "eval_runtime": 61.971, | |
| "eval_samples_per_second": 13.442, | |
| "eval_steps_per_second": 0.855, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 7.897435821461964e-06, | |
| "loss": 0.4398, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 7.761555447769548e-06, | |
| "loss": 0.4335, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "eval_loss": 0.4155929982662201, | |
| "eval_runtime": 61.9696, | |
| "eval_samples_per_second": 13.442, | |
| "eval_steps_per_second": 0.855, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 7.622674333976863e-06, | |
| "loss": 0.4371, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 7.4809433900095705e-06, | |
| "loss": 0.4088, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 0.41465404629707336, | |
| "eval_runtime": 61.9822, | |
| "eval_samples_per_second": 13.439, | |
| "eval_steps_per_second": 0.855, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 7.336516622453833e-06, | |
| "loss": 0.3166, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 7.1895509672108674e-06, | |
| "loss": 0.3145, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "eval_loss": 0.42051395773887634, | |
| "eval_runtime": 62.5118, | |
| "eval_samples_per_second": 13.325, | |
| "eval_steps_per_second": 0.848, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 7.040206118968466e-06, | |
| "loss": 0.3136, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 6.88864435767478e-06, | |
| "loss": 0.3151, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "eval_loss": 0.4205115735530853, | |
| "eval_runtime": 62.5792, | |
| "eval_samples_per_second": 13.311, | |
| "eval_steps_per_second": 0.847, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 6.735030372202942e-06, | |
| "loss": 0.3137, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 6.579531081398105e-06, | |
| "loss": 0.3019, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "eval_loss": 0.4216003119945526, | |
| "eval_runtime": 62.3646, | |
| "eval_samples_per_second": 13.357, | |
| "eval_steps_per_second": 0.85, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 6.4223154527013755e-06, | |
| "loss": 0.3044, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 6.263554318547713e-06, | |
| "loss": 0.3044, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "eval_loss": 0.4185173809528351, | |
| "eval_runtime": 61.9058, | |
| "eval_samples_per_second": 13.456, | |
| "eval_steps_per_second": 0.856, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 6.1034201907373045e-06, | |
| "loss": 0.305, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 5.942087072982131e-06, | |
| "loss": 0.3034, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "eval_loss": 0.41815003752708435, | |
| "eval_runtime": 62.0238, | |
| "eval_samples_per_second": 13.43, | |
| "eval_steps_per_second": 0.855, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 5.779730271831384e-06, | |
| "loss": 0.3115, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 5.616526206181215e-06, | |
| "loss": 0.3026, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "eval_loss": 0.41711267828941345, | |
| "eval_runtime": 62.5134, | |
| "eval_samples_per_second": 13.325, | |
| "eval_steps_per_second": 0.848, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 5.4526522155758015e-06, | |
| "loss": 0.3077, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 5.288286367508009e-06, | |
| "loss": 0.3062, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "eval_loss": 0.41751572489738464, | |
| "eval_runtime": 62.561, | |
| "eval_samples_per_second": 13.315, | |
| "eval_steps_per_second": 0.847, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 5.123607263929075e-06, | |
| "loss": 0.3076, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 4.958793847177518e-06, | |
| "loss": 0.315, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "eval_loss": 0.41455498337745667, | |
| "eval_runtime": 62.0669, | |
| "eval_samples_per_second": 13.421, | |
| "eval_steps_per_second": 0.854, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 4.7940252055382115e-06, | |
| "loss": 0.3024, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 4.629480378642832e-06, | |
| "loss": 0.3124, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "eval_loss": 0.41453319787979126, | |
| "eval_runtime": 61.9067, | |
| "eval_samples_per_second": 13.456, | |
| "eval_steps_per_second": 0.856, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 4.46533816292321e-06, | |
| "loss": 0.31, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 4.301776917328918e-06, | |
| "loss": 0.3096, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "eval_loss": 0.41273748874664307, | |
| "eval_runtime": 62.1417, | |
| "eval_samples_per_second": 13.405, | |
| "eval_steps_per_second": 0.853, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 4.138974369520252e-06, | |
| "loss": 0.3044, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 3.977107422747163e-06, | |
| "loss": 0.3178, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "eval_loss": 0.4111482501029968, | |
| "eval_runtime": 62.5639, | |
| "eval_samples_per_second": 13.314, | |
| "eval_steps_per_second": 0.847, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 3.816351963624017e-06, | |
| "loss": 0.3102, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 3.6568826710090353e-06, | |
| "loss": 0.3044, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "eval_loss": 0.4110707640647888, | |
| "eval_runtime": 62.5689, | |
| "eval_samples_per_second": 13.313, | |
| "eval_steps_per_second": 0.847, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 3.4988728261960957e-06, | |
| "loss": 0.306, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 3.3424941246251574e-06, | |
| "loss": 0.3078, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "eval_loss": 0.4091060757637024, | |
| "eval_runtime": 62.0676, | |
| "eval_samples_per_second": 13.421, | |
| "eval_steps_per_second": 0.854, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 3.1879164893158713e-06, | |
| "loss": 0.2977, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 3.035307886227156e-06, | |
| "loss": 0.2967, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "eval_loss": 0.4094270169734955, | |
| "eval_runtime": 62.0655, | |
| "eval_samples_per_second": 13.421, | |
| "eval_steps_per_second": 0.854, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 2.8848341417433036e-06, | |
| "loss": 0.3069, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 2.736658762485005e-06, | |
| "loss": 0.3068, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "eval_loss": 0.4080323278903961, | |
| "eval_runtime": 62.0098, | |
| "eval_samples_per_second": 13.433, | |
| "eval_steps_per_second": 0.855, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 2.590942757641035e-06, | |
| "loss": 0.3037, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 2.447844464013703e-06, | |
| "loss": 0.276, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "eval_loss": 0.4152510464191437, | |
| "eval_runtime": 61.9609, | |
| "eval_samples_per_second": 13.444, | |
| "eval_steps_per_second": 0.855, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 2.3075193739681182e-06, | |
| "loss": 0.2327, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 2.170119966472293e-06, | |
| "loss": 0.2288, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "eval_loss": 0.43204566836357117, | |
| "eval_runtime": 62.0792, | |
| "eval_samples_per_second": 13.418, | |
| "eval_steps_per_second": 0.854, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 2.0357955414116075e-06, | |
| "loss": 0.2267, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 1.9046920573577239e-06, | |
| "loss": 0.2244, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "eval_loss": 0.4292474389076233, | |
| "eval_runtime": 62.0223, | |
| "eval_samples_per_second": 13.431, | |
| "eval_steps_per_second": 0.855, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 1.7769519729682105e-06, | |
| "loss": 0.2327, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 1.6527140921892066e-06, | |
| "loss": 0.2336, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "eval_loss": 0.427610844373703, | |
| "eval_runtime": 62.1107, | |
| "eval_samples_per_second": 13.412, | |
| "eval_steps_per_second": 0.853, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 1.532113413429357e-06, | |
| "loss": 0.2386, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 1.4152809828688708e-06, | |
| "loss": 0.2266, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "eval_loss": 0.4290391206741333, | |
| "eval_runtime": 62.2449, | |
| "eval_samples_per_second": 13.383, | |
| "eval_steps_per_second": 0.851, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 1.3023437520631426e-06, | |
| "loss": 0.2328, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 1.1934244399956206e-06, | |
| "loss": 0.2312, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "eval_loss": 0.42950907349586487, | |
| "eval_runtime": 62.2915, | |
| "eval_samples_per_second": 13.373, | |
| "eval_steps_per_second": 0.851, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 1.0886413997298595e-06, | |
| "loss": 0.2338, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 9.881084898056197e-07, | |
| "loss": 0.2277, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "eval_loss": 0.4284292161464691, | |
| "eval_runtime": 62.4097, | |
| "eval_samples_per_second": 13.347, | |
| "eval_steps_per_second": 0.849, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 8.919349505187813e-07, | |
| "loss": 0.2333, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 8.002252852194992e-07, | |
| "loss": 0.2332, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "eval_loss": 0.42790091037750244, | |
| "eval_runtime": 62.4651, | |
| "eval_samples_per_second": 13.335, | |
| "eval_steps_per_second": 0.848, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 7.130791467575676e-07, | |
| "loss": 0.2257, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 6.305912291984229e-07, | |
| "loss": 0.2289, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "eval_loss": 0.42792582511901855, | |
| "eval_runtime": 62.5296, | |
| "eval_samples_per_second": 13.322, | |
| "eval_steps_per_second": 0.848, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 5.528511649273932e-07, | |
| "loss": 0.2303, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 4.799434272540576e-07, | |
| "loss": 0.2279, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "eval_loss": 0.4278266131877899, | |
| "eval_runtime": 62.5218, | |
| "eval_samples_per_second": 13.323, | |
| "eval_steps_per_second": 0.848, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 4.1194723862250317e-07, | |
| "loss": 0.2267, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 3.4893648452724636e-07, | |
| "loss": 0.2312, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "eval_loss": 0.4273243546485901, | |
| "eval_runtime": 62.5591, | |
| "eval_samples_per_second": 13.315, | |
| "eval_steps_per_second": 0.847, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 2.9097963322834597e-07, | |
| "loss": 0.2306, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 2.3813966135294574e-07, | |
| "loss": 0.2334, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "eval_loss": 0.42646506428718567, | |
| "eval_runtime": 62.6378, | |
| "eval_samples_per_second": 13.299, | |
| "eval_steps_per_second": 0.846, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 1.9047398546410633e-07, | |
| "loss": 0.2306, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 1.4803439967125022e-07, | |
| "loss": 0.2278, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "eval_loss": 0.42754805088043213, | |
| "eval_runtime": 62.1476, | |
| "eval_samples_per_second": 13.404, | |
| "eval_steps_per_second": 0.853, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 1.1086701935005606e-07, | |
| "loss": 0.2296, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 7.901223103291833e-08, | |
| "loss": 0.2295, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "eval_loss": 0.4276488721370697, | |
| "eval_runtime": 61.9957, | |
| "eval_samples_per_second": 13.436, | |
| "eval_steps_per_second": 0.855, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 5.250464852444792e-08, | |
| "loss": 0.2334, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 3.137307528968292e-08, | |
| "loss": 0.2292, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "eval_loss": 0.4273829162120819, | |
| "eval_runtime": 62.0452, | |
| "eval_samples_per_second": 13.426, | |
| "eval_steps_per_second": 0.854, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 1.5640473155894566e-08, | |
| "loss": 0.2284, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 5.323937361977338e-09, | |
| "loss": 0.2291, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "eval_loss": 0.42734310030937195, | |
| "eval_runtime": 61.9825, | |
| "eval_samples_per_second": 13.439, | |
| "eval_steps_per_second": 0.855, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 4.346779825575853e-10, | |
| "loss": 0.2288, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 954, | |
| "total_flos": 6.195687991759864e+18, | |
| "train_loss": 0.1527079766776327, | |
| "train_runtime": 29559.5606, | |
| "train_samples_per_second": 4.138, | |
| "train_steps_per_second": 0.032 | |
| } | |
| ], | |
| "max_steps": 954, | |
| "num_train_epochs": 3, | |
| "total_flos": 6.195687991759864e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |