{ "best_metric": null, "best_model_checkpoint": null, "epoch": 195.09022923101935, "global_step": 300000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.33, "learning_rate": 0.00019999862922474268, "loss": 3.8845, "step": 500 }, { "epoch": 0.65, "learning_rate": 0.00019999451693655123, "loss": 3.3985, "step": 1000 }, { "epoch": 0.98, "learning_rate": 0.00019998766324816607, "loss": 3.2874, "step": 1500 }, { "epoch": 1.3, "learning_rate": 0.00019997806834748456, "loss": 3.0969, "step": 2000 }, { "epoch": 1.63, "learning_rate": 0.00019996573249755572, "loss": 3.1114, "step": 2500 }, { "epoch": 1.95, "learning_rate": 0.00019995065603657316, "loss": 3.0458, "step": 3000 }, { "epoch": 2.28, "learning_rate": 0.00019993283937786563, "loss": 2.9674, "step": 3500 }, { "epoch": 2.6, "learning_rate": 0.00019991228300988585, "loss": 3.0031, "step": 4000 }, { "epoch": 2.93, "learning_rate": 0.00019988898749619702, "loss": 2.8428, "step": 4500 }, { "epoch": 3.25, "learning_rate": 0.0001998629534754574, "loss": 2.7971, "step": 5000 }, { "epoch": 3.58, "learning_rate": 0.00019983418166140285, "loss": 2.8229, "step": 5500 }, { "epoch": 3.9, "learning_rate": 0.00019980267284282717, "loss": 2.7483, "step": 6000 }, { "epoch": 4.23, "learning_rate": 0.00019976842788356055, "loss": 2.7053, "step": 6500 }, { "epoch": 4.55, "learning_rate": 0.00019973144772244582, "loss": 2.6547, "step": 7000 }, { "epoch": 4.88, "learning_rate": 0.0001996917333733128, "loss": 2.6413, "step": 7500 }, { "epoch": 5.2, "learning_rate": 0.00019964928592495045, "loss": 2.6228, "step": 8000 }, { "epoch": 5.53, "learning_rate": 0.00019960410654107697, "loss": 2.5562, "step": 8500 }, { "epoch": 5.85, "learning_rate": 0.00019955619646030802, "loss": 2.5558, "step": 9000 }, { "epoch": 6.18, "learning_rate": 0.00019950555699612267, "loss": 2.5479, "step": 9500 }, { "epoch": 6.5, "learning_rate": 0.00019945218953682734, "loss": 2.5797, "step": 10000 }, { "epoch": 6.5, "eval_runtime": 0.0015, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "step": 10000 }, { "epoch": 6.83, "learning_rate": 0.000199396095545518, "loss": 2.5277, "step": 10500 }, { "epoch": 7.15, "learning_rate": 0.00019933727656003963, "loss": 2.4641, "step": 11000 }, { "epoch": 7.48, "learning_rate": 0.00019927573419294456, "loss": 2.4278, "step": 11500 }, { "epoch": 7.8, "learning_rate": 0.0001992114701314478, "loss": 2.3937, "step": 12000 }, { "epoch": 8.13, "learning_rate": 0.00019914448613738106, "loss": 2.3969, "step": 12500 }, { "epoch": 8.45, "learning_rate": 0.00019907478404714436, "loss": 2.38, "step": 13000 }, { "epoch": 8.78, "learning_rate": 0.00019900236577165576, "loss": 2.3317, "step": 13500 }, { "epoch": 9.1, "learning_rate": 0.00019892723329629887, "loss": 2.3562, "step": 14000 }, { "epoch": 9.43, "learning_rate": 0.00019884938868086835, "loss": 2.3601, "step": 14500 }, { "epoch": 9.75, "learning_rate": 0.00019876883405951377, "loss": 2.352, "step": 15000 }, { "epoch": 10.08, "learning_rate": 0.00019868557164068074, "loss": 2.3208, "step": 15500 }, { "epoch": 10.4, "learning_rate": 0.0001985996037070505, "loss": 2.2729, "step": 16000 }, { "epoch": 10.73, "learning_rate": 0.0001985109326154774, "loss": 2.2739, "step": 16500 }, { "epoch": 11.06, "learning_rate": 0.0001984195607969242, "loss": 2.2425, "step": 17000 }, { "epoch": 11.38, "learning_rate": 0.0001983254907563955, "loss": 2.2249, "step": 17500 }, { "epoch": 11.71, "learning_rate": 0.0001982287250728689, "loss": 2.211, "step": 18000 }, { "epoch": 12.03, "learning_rate": 0.0001981292663992245, "loss": 2.2012, "step": 18500 }, { "epoch": 12.36, "learning_rate": 0.00019802711746217218, "loss": 2.2002, "step": 19000 }, { "epoch": 12.68, "learning_rate": 0.00019792228106217658, "loss": 2.1889, "step": 19500 }, { "epoch": 13.01, "learning_rate": 0.00019781476007338058, "loss": 2.1837, "step": 20000 }, { "epoch": 13.01, "eval_runtime": 0.0015, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "step": 20000 }, { "epoch": 13.33, "learning_rate": 0.0001977045574435264, "loss": 2.1589, "step": 20500 }, { "epoch": 13.66, "learning_rate": 0.00019759167619387476, "loss": 2.1809, "step": 21000 }, { "epoch": 13.98, "learning_rate": 0.0001974761194191222, "loss": 2.1251, "step": 21500 }, { "epoch": 14.31, "learning_rate": 0.00019735789028731604, "loss": 2.0866, "step": 22000 }, { "epoch": 14.63, "learning_rate": 0.00019723699203976766, "loss": 2.1219, "step": 22500 }, { "epoch": 14.96, "learning_rate": 0.00019711342799096361, "loss": 2.121, "step": 23000 }, { "epoch": 15.28, "learning_rate": 0.00019698720152847468, "loss": 2.0853, "step": 23500 }, { "epoch": 15.61, "learning_rate": 0.0001968583161128631, "loss": 2.0996, "step": 24000 }, { "epoch": 15.93, "learning_rate": 0.0001967267752775877, "loss": 2.0758, "step": 24500 }, { "epoch": 16.26, "learning_rate": 0.00019659258262890683, "loss": 2.0532, "step": 25000 }, { "epoch": 16.58, "learning_rate": 0.00019645574184577982, "loss": 2.0133, "step": 25500 }, { "epoch": 16.91, "learning_rate": 0.00019631625667976583, "loss": 1.996, "step": 26000 }, { "epoch": 17.23, "learning_rate": 0.00019617413095492114, "loss": 2.0053, "step": 26500 }, { "epoch": 17.56, "learning_rate": 0.0001960293685676943, "loss": 2.0098, "step": 27000 }, { "epoch": 17.88, "learning_rate": 0.0001958819734868193, "loss": 2.0401, "step": 27500 }, { "epoch": 18.21, "learning_rate": 0.00019573194975320673, "loss": 1.9947, "step": 28000 }, { "epoch": 18.53, "learning_rate": 0.00019557930147983302, "loss": 1.9812, "step": 28500 }, { "epoch": 18.86, "learning_rate": 0.0001954240328516277, "loss": 2.0141, "step": 29000 }, { "epoch": 19.18, "learning_rate": 0.00019526614812535864, "loss": 1.9574, "step": 29500 }, { "epoch": 19.51, "learning_rate": 0.00019510565162951537, "loss": 1.9243, "step": 30000 }, { "epoch": 19.51, "eval_runtime": 0.0019, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "step": 30000 }, { "epoch": 19.83, "learning_rate": 0.0001949425477641904, "loss": 1.923, "step": 30500 }, { "epoch": 20.16, "learning_rate": 0.0001947768410009586, "loss": 1.9097, "step": 31000 }, { "epoch": 20.48, "learning_rate": 0.00019460853588275454, "loss": 1.8941, "step": 31500 }, { "epoch": 20.81, "learning_rate": 0.00019443763702374812, "loss": 1.9159, "step": 32000 }, { "epoch": 21.13, "learning_rate": 0.00019426414910921787, "loss": 1.8793, "step": 32500 }, { "epoch": 21.46, "learning_rate": 0.00019408807689542257, "loss": 1.9007, "step": 33000 }, { "epoch": 21.79, "learning_rate": 0.0001939094252094709, "loss": 1.8847, "step": 33500 }, { "epoch": 22.11, "learning_rate": 0.00019372819894918915, "loss": 1.9223, "step": 34000 }, { "epoch": 22.44, "learning_rate": 0.00019354440308298675, "loss": 1.8933, "step": 34500 }, { "epoch": 22.76, "learning_rate": 0.00019335804264972018, "loss": 1.8753, "step": 35000 }, { "epoch": 23.09, "learning_rate": 0.0001931691227585549, "loss": 1.8752, "step": 35500 }, { "epoch": 23.41, "learning_rate": 0.00019297764858882514, "loss": 1.8201, "step": 36000 }, { "epoch": 23.74, "learning_rate": 0.000192783625389892, "loss": 1.8452, "step": 36500 }, { "epoch": 24.06, "learning_rate": 0.0001925870584809995, "loss": 1.8877, "step": 37000 }, { "epoch": 24.39, "learning_rate": 0.0001923879532511287, "loss": 1.8377, "step": 37500 }, { "epoch": 24.71, "learning_rate": 0.00019218631515885006, "loss": 1.8613, "step": 38000 }, { "epoch": 25.04, "learning_rate": 0.00019198214973217378, "loss": 1.8599, "step": 38500 }, { "epoch": 25.36, "learning_rate": 0.00019177546256839812, "loss": 1.8242, "step": 39000 }, { "epoch": 25.69, "learning_rate": 0.00019156625933395614, "loss": 1.8917, "step": 39500 }, { "epoch": 26.01, "learning_rate": 0.0001913545457642601, "loss": 1.8431, "step": 40000 }, { "epoch": 26.01, "eval_runtime": 0.0016, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "step": 40000 }, { "epoch": 26.34, "learning_rate": 0.00019114032766354453, "loss": 1.8362, "step": 40500 }, { "epoch": 26.66, "learning_rate": 0.00019092361090470688, "loss": 1.8107, "step": 41000 }, { "epoch": 26.99, "learning_rate": 0.0001907044014291465, "loss": 1.846, "step": 41500 }, { "epoch": 27.31, "learning_rate": 0.00019048270524660196, "loss": 1.8259, "step": 42000 }, { "epoch": 27.64, "learning_rate": 0.00019025852843498607, "loss": 1.7882, "step": 42500 }, { "epoch": 27.96, "learning_rate": 0.00019003187714021938, "loss": 1.8067, "step": 43000 }, { "epoch": 28.29, "learning_rate": 0.00018980275757606157, "loss": 1.837, "step": 43500 }, { "epoch": 28.61, "learning_rate": 0.0001895711760239413, "loss": 1.7629, "step": 44000 }, { "epoch": 28.94, "learning_rate": 0.00018933713883278376, "loss": 1.7724, "step": 44500 }, { "epoch": 29.26, "learning_rate": 0.0001891006524188368, "loss": 1.7355, "step": 45000 }, { "epoch": 29.59, "learning_rate": 0.0001888617232654949, "loss": 1.7706, "step": 45500 }, { "epoch": 29.91, "learning_rate": 0.00018862035792312147, "loss": 1.7667, "step": 46000 }, { "epoch": 30.24, "learning_rate": 0.00018837656300886937, "loss": 1.7589, "step": 46500 }, { "epoch": 30.56, "learning_rate": 0.0001881303452064992, "loss": 1.7815, "step": 47000 }, { "epoch": 30.89, "learning_rate": 0.00018788171126619653, "loss": 1.7756, "step": 47500 }, { "epoch": 31.21, "learning_rate": 0.00018763066800438636, "loss": 1.7395, "step": 48000 }, { "epoch": 31.54, "learning_rate": 0.00018737722230354655, "loss": 1.7551, "step": 48500 }, { "epoch": 31.86, "learning_rate": 0.00018712138111201895, "loss": 1.7464, "step": 49000 }, { "epoch": 32.19, "learning_rate": 0.00018686315144381913, "loss": 1.7418, "step": 49500 }, { "epoch": 32.52, "learning_rate": 0.00018660254037844388, "loss": 1.7134, "step": 50000 }, { "epoch": 32.52, "eval_runtime": 0.0015, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "step": 50000 }, { "epoch": 32.84, "learning_rate": 0.00018633955506067718, "loss": 1.7335, "step": 50500 }, { "epoch": 33.17, "learning_rate": 0.0001860742027003944, "loss": 1.698, "step": 51000 }, { "epoch": 33.49, "learning_rate": 0.00018580649057236447, "loss": 1.7395, "step": 51500 }, { "epoch": 33.82, "learning_rate": 0.00018553642601605068, "loss": 1.7032, "step": 52000 }, { "epoch": 34.14, "learning_rate": 0.00018526401643540922, "loss": 1.7098, "step": 52500 }, { "epoch": 34.47, "learning_rate": 0.00018498926929868642, "loss": 1.7279, "step": 53000 }, { "epoch": 34.79, "learning_rate": 0.00018471219213821375, "loss": 1.735, "step": 53500 }, { "epoch": 35.12, "learning_rate": 0.00018443279255020152, "loss": 1.6936, "step": 54000 }, { "epoch": 35.44, "learning_rate": 0.00018415107819453062, "loss": 1.6992, "step": 54500 }, { "epoch": 35.77, "learning_rate": 0.00018386705679454242, "loss": 1.706, "step": 55000 }, { "epoch": 36.09, "learning_rate": 0.00018358073613682706, "loss": 1.7532, "step": 55500 }, { "epoch": 36.42, "learning_rate": 0.00018329212407100994, "loss": 1.6917, "step": 56000 }, { "epoch": 36.74, "learning_rate": 0.00018300122850953675, "loss": 1.6996, "step": 56500 }, { "epoch": 37.07, "learning_rate": 0.00018270805742745617, "loss": 1.7492, "step": 57000 }, { "epoch": 37.39, "learning_rate": 0.00018241261886220154, "loss": 1.6722, "step": 57500 }, { "epoch": 37.72, "learning_rate": 0.00018211492091337042, "loss": 1.6911, "step": 58000 }, { "epoch": 38.04, "learning_rate": 0.00018181497174250236, "loss": 1.657, "step": 58500 }, { "epoch": 38.37, "learning_rate": 0.00018151277957285543, "loss": 1.6558, "step": 59000 }, { "epoch": 38.69, "learning_rate": 0.00018120835268918063, "loss": 1.6926, "step": 59500 }, { "epoch": 39.02, "learning_rate": 0.00018090169943749476, "loss": 1.6785, "step": 60000 }, { "epoch": 39.02, "eval_runtime": 0.0012, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "step": 60000 }, { "epoch": 39.34, "learning_rate": 0.00018059282822485158, "loss": 1.6601, "step": 60500 }, { "epoch": 39.67, "learning_rate": 0.00018028174751911146, "loss": 1.7, "step": 61000 }, { "epoch": 39.99, "learning_rate": 0.00017996846584870908, "loss": 1.6339, "step": 61500 }, { "epoch": 40.32, "learning_rate": 0.00017965299180241963, "loss": 1.6275, "step": 62000 }, { "epoch": 40.64, "learning_rate": 0.00017933533402912354, "loss": 1.6315, "step": 62500 }, { "epoch": 40.97, "learning_rate": 0.00017901550123756906, "loss": 1.6423, "step": 63000 }, { "epoch": 41.29, "learning_rate": 0.00017869350219613375, "loss": 1.6913, "step": 63500 }, { "epoch": 41.62, "learning_rate": 0.000178369345732584, "loss": 1.623, "step": 64000 }, { "epoch": 41.94, "learning_rate": 0.000178043040733833, "loss": 1.6645, "step": 64500 }, { "epoch": 42.27, "learning_rate": 0.0001777145961456971, "loss": 1.6585, "step": 65000 }, { "epoch": 42.59, "learning_rate": 0.00017738402097265064, "loss": 1.5515, "step": 65500 }, { "epoch": 42.92, "learning_rate": 0.00017705132427757895, "loss": 1.6737, "step": 66000 }, { "epoch": 43.25, "learning_rate": 0.00017671651518153, "loss": 1.6143, "step": 66500 }, { "epoch": 43.57, "learning_rate": 0.00017637960286346425, "loss": 1.6923, "step": 67000 }, { "epoch": 43.9, "learning_rate": 0.0001760405965600031, "loss": 1.623, "step": 67500 }, { "epoch": 44.22, "learning_rate": 0.00017569950556517566, "loss": 1.6416, "step": 68000 }, { "epoch": 44.55, "learning_rate": 0.0001753563392301638, "loss": 1.6161, "step": 68500 }, { "epoch": 44.87, "learning_rate": 0.00017501110696304596, "loss": 1.6079, "step": 69000 }, { "epoch": 45.2, "learning_rate": 0.00017466381822853915, "loss": 1.5893, "step": 69500 }, { "epoch": 45.52, "learning_rate": 0.00017431448254773944, "loss": 1.6074, "step": 70000 }, { "epoch": 45.52, "eval_runtime": 0.0016, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "step": 70000 }, { "epoch": 45.85, "learning_rate": 0.000173963109497861, "loss": 1.6074, "step": 70500 }, { "epoch": 46.17, "learning_rate": 0.00017360970871197346, "loss": 1.6223, "step": 71000 }, { "epoch": 46.5, "learning_rate": 0.0001732542898787379, "loss": 1.6068, "step": 71500 }, { "epoch": 46.82, "learning_rate": 0.00017289686274214118, "loss": 1.615, "step": 72000 }, { "epoch": 47.15, "learning_rate": 0.00017253743710122875, "loss": 1.5937, "step": 72500 }, { "epoch": 47.47, "learning_rate": 0.00017217602280983623, "loss": 1.6432, "step": 73000 }, { "epoch": 47.8, "learning_rate": 0.00017181262977631888, "loss": 1.6094, "step": 73500 }, { "epoch": 48.12, "learning_rate": 0.00017144726796328034, "loss": 1.6062, "step": 74000 }, { "epoch": 48.45, "learning_rate": 0.00017107994738729926, "loss": 1.5807, "step": 74500 }, { "epoch": 48.77, "learning_rate": 0.00017071067811865476, "loss": 1.6068, "step": 75000 }, { "epoch": 49.1, "learning_rate": 0.00017033947028105039, "loss": 1.5722, "step": 75500 }, { "epoch": 49.42, "learning_rate": 0.00016996633405133655, "loss": 1.5687, "step": 76000 }, { "epoch": 49.75, "learning_rate": 0.00016959127965923142, "loss": 1.6048, "step": 76500 }, { "epoch": 50.07, "learning_rate": 0.0001692143173870407, "loss": 1.5781, "step": 77000 }, { "epoch": 50.4, "learning_rate": 0.0001688354575693754, "loss": 1.5699, "step": 77500 }, { "epoch": 50.72, "learning_rate": 0.00016845471059286887, "loss": 1.5764, "step": 78000 }, { "epoch": 51.05, "learning_rate": 0.0001680720868958918, "loss": 1.577, "step": 78500 }, { "epoch": 51.37, "learning_rate": 0.00016768759696826608, "loss": 1.5438, "step": 79000 }, { "epoch": 51.7, "learning_rate": 0.00016730125135097735, "loss": 1.5751, "step": 79500 }, { "epoch": 52.02, "learning_rate": 0.00016691306063588583, "loss": 1.6465, "step": 80000 }, { "epoch": 52.02, "eval_runtime": 0.0012, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "step": 80000 }, { "epoch": 52.35, "learning_rate": 0.00016652303546543608, "loss": 1.6022, "step": 80500 }, { "epoch": 52.67, "learning_rate": 0.00016613118653236518, "loss": 1.5599, "step": 81000 }, { "epoch": 53.0, "learning_rate": 0.0001657375245794096, "loss": 1.5909, "step": 81500 }, { "epoch": 53.32, "learning_rate": 0.00016534206039901057, "loss": 1.5483, "step": 82000 }, { "epoch": 53.65, "learning_rate": 0.00016494480483301836, "loss": 1.5531, "step": 82500 }, { "epoch": 53.97, "learning_rate": 0.00016454576877239507, "loss": 1.579, "step": 83000 }, { "epoch": 54.3, "learning_rate": 0.00016414496315691581, "loss": 1.5757, "step": 83500 }, { "epoch": 54.63, "learning_rate": 0.000163742398974869, "loss": 1.5794, "step": 84000 }, { "epoch": 54.95, "learning_rate": 0.000163338087262755, "loss": 1.6013, "step": 84500 }, { "epoch": 55.28, "learning_rate": 0.00016293203910498376, "loss": 1.5567, "step": 85000 }, { "epoch": 55.6, "learning_rate": 0.00016252426563357055, "loss": 1.5536, "step": 85500 }, { "epoch": 55.93, "learning_rate": 0.00016211477802783103, "loss": 1.5524, "step": 86000 }, { "epoch": 56.25, "learning_rate": 0.00016170358751407487, "loss": 1.5279, "step": 86500 }, { "epoch": 56.58, "learning_rate": 0.00016129070536529766, "loss": 1.5523, "step": 87000 }, { "epoch": 56.9, "learning_rate": 0.00016087614290087208, "loss": 1.5422, "step": 87500 }, { "epoch": 57.23, "learning_rate": 0.0001604599114862375, "loss": 1.561, "step": 88000 }, { "epoch": 57.55, "learning_rate": 0.00016004202253258842, "loss": 1.5354, "step": 88500 }, { "epoch": 57.88, "learning_rate": 0.0001596224874965616, "loss": 1.5443, "step": 89000 }, { "epoch": 58.2, "learning_rate": 0.00015920131787992197, "loss": 1.509, "step": 89500 }, { "epoch": 58.53, "learning_rate": 0.00015877852522924732, "loss": 1.5282, "step": 90000 }, { "epoch": 58.53, "eval_runtime": 0.0013, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "step": 90000 }, { "epoch": 58.85, "learning_rate": 0.00015835412113561175, "loss": 1.5356, "step": 90500 }, { "epoch": 59.18, "learning_rate": 0.0001579281172342679, "loss": 1.5452, "step": 91000 }, { "epoch": 59.5, "learning_rate": 0.00015750052520432787, "loss": 1.5231, "step": 91500 }, { "epoch": 59.83, "learning_rate": 0.0001570713567684432, "loss": 1.5276, "step": 92000 }, { "epoch": 60.15, "learning_rate": 0.00015664062369248328, "loss": 1.525, "step": 92500 }, { "epoch": 60.48, "learning_rate": 0.00015620833778521307, "loss": 1.5317, "step": 93000 }, { "epoch": 60.8, "learning_rate": 0.00015577451089796905, "loss": 1.5324, "step": 93500 }, { "epoch": 61.13, "learning_rate": 0.00015533915492433443, "loss": 1.5415, "step": 94000 }, { "epoch": 61.45, "learning_rate": 0.0001549022817998132, "loss": 1.5377, "step": 94500 }, { "epoch": 61.78, "learning_rate": 0.00015446390350150273, "loss": 1.4902, "step": 95000 }, { "epoch": 62.1, "learning_rate": 0.0001540240320477655, "loss": 1.4984, "step": 95500 }, { "epoch": 62.43, "learning_rate": 0.00015358267949789966, "loss": 1.5049, "step": 96000 }, { "epoch": 62.75, "learning_rate": 0.00015313985795180828, "loss": 1.527, "step": 96500 }, { "epoch": 63.08, "learning_rate": 0.00015269557954966778, "loss": 1.4838, "step": 97000 }, { "epoch": 63.4, "learning_rate": 0.0001522498564715949, "loss": 1.5343, "step": 97500 }, { "epoch": 63.73, "learning_rate": 0.00015180270093731303, "loss": 1.5185, "step": 98000 }, { "epoch": 64.05, "learning_rate": 0.00015135412520581702, "loss": 1.5109, "step": 98500 }, { "epoch": 64.38, "learning_rate": 0.00015090414157503714, "loss": 1.5228, "step": 99000 }, { "epoch": 64.7, "learning_rate": 0.00015045276238150192, "loss": 1.4978, "step": 99500 }, { "epoch": 65.03, "learning_rate": 0.00015000000000000001, "loss": 1.4764, "step": 100000 }, { "epoch": 65.03, "eval_runtime": 0.0016, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "step": 100000 }, { "epoch": 65.36, "learning_rate": 0.00014954586684324078, "loss": 1.5, "step": 100500 }, { "epoch": 65.68, "learning_rate": 0.00014909037536151409, "loss": 1.5275, "step": 101000 }, { "epoch": 66.01, "learning_rate": 0.00014863353804234905, "loss": 1.4656, "step": 101500 }, { "epoch": 66.33, "learning_rate": 0.00014817536741017152, "loss": 1.5234, "step": 102000 }, { "epoch": 66.66, "learning_rate": 0.00014771587602596084, "loss": 1.5018, "step": 102500 }, { "epoch": 66.98, "learning_rate": 0.00014725507648690543, "loss": 1.4856, "step": 103000 }, { "epoch": 67.31, "learning_rate": 0.00014679298142605734, "loss": 1.5146, "step": 103500 }, { "epoch": 67.63, "learning_rate": 0.00014632960351198618, "loss": 1.477, "step": 104000 }, { "epoch": 67.96, "learning_rate": 0.00014586495544843152, "loss": 1.5255, "step": 104500 }, { "epoch": 68.28, "learning_rate": 0.00014539904997395468, "loss": 1.4731, "step": 105000 }, { "epoch": 68.61, "learning_rate": 0.00014493189986158965, "loss": 1.5142, "step": 105500 }, { "epoch": 68.93, "learning_rate": 0.00014446351791849276, "loss": 1.4807, "step": 106000 }, { "epoch": 69.26, "learning_rate": 0.00014399391698559152, "loss": 1.4664, "step": 106500 }, { "epoch": 69.58, "learning_rate": 0.00014352310993723277, "loss": 1.4428, "step": 107000 }, { "epoch": 69.91, "learning_rate": 0.00014305110968082952, "loss": 1.4612, "step": 107500 }, { "epoch": 70.23, "learning_rate": 0.00014257792915650728, "loss": 1.4578, "step": 108000 }, { "epoch": 70.56, "learning_rate": 0.00014210358133674912, "loss": 1.4706, "step": 108500 }, { "epoch": 70.88, "learning_rate": 0.00014162807922604012, "loss": 1.4724, "step": 109000 }, { "epoch": 71.21, "learning_rate": 0.00014115143586051088, "loss": 1.4551, "step": 109500 }, { "epoch": 71.53, "learning_rate": 0.00014067366430758004, "loss": 1.4767, "step": 110000 }, { "epoch": 71.53, "eval_runtime": 0.0013, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "step": 110000 }, { "epoch": 71.86, "learning_rate": 0.00014019477766559604, "loss": 1.4857, "step": 110500 }, { "epoch": 72.18, "learning_rate": 0.00013971478906347806, "loss": 1.4728, "step": 111000 }, { "epoch": 72.51, "learning_rate": 0.00013923371166035616, "loss": 1.4558, "step": 111500 }, { "epoch": 72.83, "learning_rate": 0.0001387515586452103, "loss": 1.3975, "step": 112000 }, { "epoch": 73.16, "learning_rate": 0.000138268343236509, "loss": 1.507, "step": 112500 }, { "epoch": 73.48, "learning_rate": 0.00013778407868184672, "loss": 1.4588, "step": 113000 }, { "epoch": 73.81, "learning_rate": 0.0001372987782575809, "loss": 1.4787, "step": 113500 }, { "epoch": 74.13, "learning_rate": 0.00013681245526846783, "loss": 1.4403, "step": 114000 }, { "epoch": 74.46, "learning_rate": 0.00013632512304729785, "loss": 1.4818, "step": 114500 }, { "epoch": 74.78, "learning_rate": 0.00013583679495453, "loss": 1.4553, "step": 115000 }, { "epoch": 75.11, "learning_rate": 0.00013534748437792573, "loss": 1.4651, "step": 115500 }, { "epoch": 75.43, "learning_rate": 0.00013485720473218154, "loss": 1.462, "step": 116000 }, { "epoch": 75.76, "learning_rate": 0.00013436596945856164, "loss": 1.4843, "step": 116500 }, { "epoch": 76.09, "learning_rate": 0.00013387379202452917, "loss": 1.4452, "step": 117000 }, { "epoch": 76.41, "learning_rate": 0.0001333806859233771, "loss": 1.4007, "step": 117500 }, { "epoch": 76.74, "learning_rate": 0.00013288666467385833, "loss": 1.4758, "step": 118000 }, { "epoch": 77.06, "learning_rate": 0.00013239174181981495, "loss": 1.505, "step": 118500 }, { "epoch": 77.39, "learning_rate": 0.00013189593092980702, "loss": 1.4362, "step": 119000 }, { "epoch": 77.71, "learning_rate": 0.00013139924559674052, "loss": 1.4407, "step": 119500 }, { "epoch": 78.04, "learning_rate": 0.00013090169943749476, "loss": 1.4661, "step": 120000 }, { "epoch": 78.04, "eval_runtime": 0.0013, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "step": 120000 }, { "epoch": 78.36, "learning_rate": 0.00013040330609254903, "loss": 1.4859, "step": 120500 }, { "epoch": 78.69, "learning_rate": 0.00012990407922560868, "loss": 1.4639, "step": 121000 }, { "epoch": 79.01, "learning_rate": 0.0001294040325232304, "loss": 1.4493, "step": 121500 }, { "epoch": 79.34, "learning_rate": 0.00012890317969444716, "loss": 1.4161, "step": 122000 }, { "epoch": 79.66, "learning_rate": 0.00012840153447039228, "loss": 1.4757, "step": 122500 }, { "epoch": 79.99, "learning_rate": 0.00012789911060392294, "loss": 1.4217, "step": 123000 }, { "epoch": 80.31, "learning_rate": 0.00012739592186924328, "loss": 1.4079, "step": 123500 }, { "epoch": 80.64, "learning_rate": 0.00012689198206152657, "loss": 1.4328, "step": 124000 }, { "epoch": 80.96, "learning_rate": 0.0001263873049965373, "loss": 1.4509, "step": 124500 }, { "epoch": 81.29, "learning_rate": 0.00012588190451025207, "loss": 1.3947, "step": 125000 }, { "epoch": 81.61, "learning_rate": 0.00012537579445848058, "loss": 1.4561, "step": 125500 }, { "epoch": 81.94, "learning_rate": 0.0001248689887164855, "loss": 1.4229, "step": 126000 }, { "epoch": 82.26, "learning_rate": 0.00012436150117860225, "loss": 1.4349, "step": 126500 }, { "epoch": 82.59, "learning_rate": 0.0001238533457578581, "loss": 1.422, "step": 127000 }, { "epoch": 82.91, "learning_rate": 0.00012334453638559057, "loss": 1.4223, "step": 127500 }, { "epoch": 83.24, "learning_rate": 0.00012283508701106557, "loss": 1.404, "step": 128000 }, { "epoch": 83.56, "learning_rate": 0.00012232501160109514, "loss": 1.4299, "step": 128500 }, { "epoch": 83.89, "learning_rate": 0.00012181432413965428, "loss": 1.4128, "step": 129000 }, { "epoch": 84.21, "learning_rate": 0.00012130303862749767, "loss": 1.3965, "step": 129500 }, { "epoch": 84.54, "learning_rate": 0.00012079116908177593, "loss": 1.4154, "step": 130000 }, { "epoch": 84.54, "eval_runtime": 0.0013, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "step": 130000 }, { "epoch": 84.86, "learning_rate": 0.00012027872953565125, "loss": 1.4085, "step": 130500 }, { "epoch": 85.19, "learning_rate": 0.00011976573403791262, "loss": 1.4114, "step": 131000 }, { "epoch": 85.51, "learning_rate": 0.00011925219665259075, "loss": 1.4066, "step": 131500 }, { "epoch": 85.84, "learning_rate": 0.00011873813145857249, "loss": 1.4308, "step": 132000 }, { "epoch": 86.16, "learning_rate": 0.00011822355254921478, "loss": 1.4277, "step": 132500 }, { "epoch": 86.49, "learning_rate": 0.00011770847403195834, "loss": 1.3809, "step": 133000 }, { "epoch": 86.82, "learning_rate": 0.00011719291002794096, "loss": 1.4093, "step": 133500 }, { "epoch": 87.14, "learning_rate": 0.00011667687467161024, "loss": 1.4209, "step": 134000 }, { "epoch": 87.47, "learning_rate": 0.00011616038211033613, "loss": 1.4072, "step": 134500 }, { "epoch": 87.79, "learning_rate": 0.0001156434465040231, "loss": 1.4197, "step": 135000 }, { "epoch": 88.12, "learning_rate": 0.00011512608202472194, "loss": 1.3941, "step": 135500 }, { "epoch": 88.44, "learning_rate": 0.00011460830285624118, "loss": 1.3687, "step": 136000 }, { "epoch": 88.77, "learning_rate": 0.00011409012319375827, "loss": 1.3804, "step": 136500 }, { "epoch": 89.09, "learning_rate": 0.00011357155724343045, "loss": 1.3745, "step": 137000 }, { "epoch": 89.42, "learning_rate": 0.00011305261922200519, "loss": 1.4224, "step": 137500 }, { "epoch": 89.74, "learning_rate": 0.00011253332335643043, "loss": 1.3903, "step": 138000 }, { "epoch": 90.07, "learning_rate": 0.00011201368388346471, "loss": 1.4107, "step": 138500 }, { "epoch": 90.39, "learning_rate": 0.00011149371504928668, "loss": 1.4075, "step": 139000 }, { "epoch": 90.72, "learning_rate": 0.00011097343110910452, "loss": 1.3999, "step": 139500 }, { "epoch": 91.04, "learning_rate": 0.00011045284632676536, "loss": 1.4005, "step": 140000 }, { "epoch": 91.04, "eval_runtime": 0.0012, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "step": 140000 }, { "epoch": 91.37, "learning_rate": 0.00010993197497436391, "loss": 1.4144, "step": 140500 }, { "epoch": 91.69, "learning_rate": 0.00010941083133185146, "loss": 1.4213, "step": 141000 }, { "epoch": 92.02, "learning_rate": 0.00010888942968664417, "loss": 1.3982, "step": 141500 }, { "epoch": 92.34, "learning_rate": 0.00010836778433323158, "loss": 1.4067, "step": 142000 }, { "epoch": 92.67, "learning_rate": 0.0001078459095727845, "loss": 1.4014, "step": 142500 }, { "epoch": 92.99, "learning_rate": 0.00010732381971276318, "loss": 1.408, "step": 143000 }, { "epoch": 93.32, "learning_rate": 0.00010680152906652483, "loss": 1.3757, "step": 143500 }, { "epoch": 93.64, "learning_rate": 0.00010627905195293135, "loss": 1.3949, "step": 144000 }, { "epoch": 93.97, "learning_rate": 0.00010575640269595675, "loss": 1.3828, "step": 144500 }, { "epoch": 94.29, "learning_rate": 0.0001052335956242944, "loss": 1.3768, "step": 145000 }, { "epoch": 94.62, "learning_rate": 0.00010471064507096426, "loss": 1.3962, "step": 145500 }, { "epoch": 94.94, "learning_rate": 0.00010418756537291996, "loss": 1.3791, "step": 146000 }, { "epoch": 95.27, "learning_rate": 0.00010366437087065564, "loss": 1.3751, "step": 146500 }, { "epoch": 95.59, "learning_rate": 0.00010314107590781284, "loss": 1.3748, "step": 147000 }, { "epoch": 95.92, "learning_rate": 0.00010261769483078733, "loss": 1.3773, "step": 147500 }, { "epoch": 96.24, "learning_rate": 0.0001020942419883357, "loss": 1.3847, "step": 148000 }, { "epoch": 96.57, "learning_rate": 0.00010157073173118208, "loss": 1.3949, "step": 148500 }, { "epoch": 96.89, "learning_rate": 0.00010104717841162458, "loss": 1.3732, "step": 149000 }, { "epoch": 97.22, "learning_rate": 0.00010052359638314195, "loss": 1.3609, "step": 149500 }, { "epoch": 97.55, "learning_rate": 0.0001, "loss": 1.3965, "step": 150000 }, { "epoch": 97.55, "eval_runtime": 0.0012, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "step": 150000 }, { "epoch": 97.87, "learning_rate": 9.947640361685804e-05, "loss": 1.3683, "step": 150500 }, { "epoch": 98.2, "learning_rate": 9.895282158837545e-05, "loss": 1.3975, "step": 151000 }, { "epoch": 98.52, "learning_rate": 9.842926826881796e-05, "loss": 1.3746, "step": 151500 }, { "epoch": 98.85, "learning_rate": 9.790575801166432e-05, "loss": 1.3471, "step": 152000 }, { "epoch": 99.17, "learning_rate": 9.73823051692127e-05, "loss": 1.3849, "step": 152500 }, { "epoch": 99.5, "learning_rate": 9.685892409218717e-05, "loss": 1.3635, "step": 153000 }, { "epoch": 99.82, "learning_rate": 9.633562912934436e-05, "loss": 1.3589, "step": 153500 }, { "epoch": 100.15, "learning_rate": 9.581243462708006e-05, "loss": 1.3696, "step": 154000 }, { "epoch": 100.47, "learning_rate": 9.528935492903575e-05, "loss": 1.3769, "step": 154500 }, { "epoch": 100.8, "learning_rate": 9.476640437570562e-05, "loss": 1.3547, "step": 155000 }, { "epoch": 101.12, "learning_rate": 9.424359730404329e-05, "loss": 1.3267, "step": 155500 }, { "epoch": 101.45, "learning_rate": 9.372094804706867e-05, "loss": 1.3589, "step": 156000 }, { "epoch": 101.77, "learning_rate": 9.319847093347522e-05, "loss": 1.3543, "step": 156500 }, { "epoch": 102.1, "learning_rate": 9.267618028723686e-05, "loss": 1.3551, "step": 157000 }, { "epoch": 102.42, "learning_rate": 9.215409042721552e-05, "loss": 1.3655, "step": 157500 }, { "epoch": 102.75, "learning_rate": 9.163221566676847e-05, "loss": 1.3621, "step": 158000 }, { "epoch": 103.07, "learning_rate": 9.111057031335585e-05, "loss": 1.3626, "step": 158500 }, { "epoch": 103.4, "learning_rate": 9.058916866814858e-05, "loss": 1.3766, "step": 159000 }, { "epoch": 103.72, "learning_rate": 9.006802502563612e-05, "loss": 1.3416, "step": 159500 }, { "epoch": 104.05, "learning_rate": 8.954715367323468e-05, "loss": 1.3788, "step": 160000 }, { "epoch": 104.05, "eval_runtime": 0.0016, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "step": 160000 }, { "epoch": 104.37, "learning_rate": 8.902656889089548e-05, "loss": 1.321, "step": 160500 }, { "epoch": 104.7, "learning_rate": 8.850628495071336e-05, "loss": 1.3473, "step": 161000 }, { "epoch": 105.02, "learning_rate": 8.79863161165353e-05, "loss": 1.318, "step": 161500 }, { "epoch": 105.35, "learning_rate": 8.746667664356956e-05, "loss": 1.341, "step": 162000 }, { "epoch": 105.67, "learning_rate": 8.694738077799488e-05, "loss": 1.3762, "step": 162500 }, { "epoch": 106.0, "learning_rate": 8.642844275656957e-05, "loss": 1.3681, "step": 163000 }, { "epoch": 106.32, "learning_rate": 8.590987680624174e-05, "loss": 1.3809, "step": 163500 }, { "epoch": 106.65, "learning_rate": 8.539169714375885e-05, "loss": 1.3593, "step": 164000 }, { "epoch": 106.97, "learning_rate": 8.487391797527808e-05, "loss": 1.3202, "step": 164500 }, { "epoch": 107.3, "learning_rate": 8.435655349597689e-05, "loss": 1.3989, "step": 165000 }, { "epoch": 107.62, "learning_rate": 8.383961788966391e-05, "loss": 1.3576, "step": 165500 }, { "epoch": 107.95, "learning_rate": 8.332312532838978e-05, "loss": 1.3826, "step": 166000 }, { "epoch": 108.28, "learning_rate": 8.280708997205904e-05, "loss": 1.3641, "step": 166500 }, { "epoch": 108.6, "learning_rate": 8.229152596804168e-05, "loss": 1.3701, "step": 167000 }, { "epoch": 108.93, "learning_rate": 8.177644745078526e-05, "loss": 1.3053, "step": 167500 }, { "epoch": 109.25, "learning_rate": 8.126186854142752e-05, "loss": 1.3344, "step": 168000 }, { "epoch": 109.58, "learning_rate": 8.074780334740928e-05, "loss": 1.329, "step": 168500 }, { "epoch": 109.9, "learning_rate": 8.023426596208739e-05, "loss": 1.3179, "step": 169000 }, { "epoch": 110.23, "learning_rate": 7.972127046434878e-05, "loss": 1.3291, "step": 169500 }, { "epoch": 110.55, "learning_rate": 7.920883091822408e-05, "loss": 1.3191, "step": 170000 }, { "epoch": 110.55, "eval_runtime": 0.0014, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "step": 170000 }, { "epoch": 110.88, "learning_rate": 7.869696137250235e-05, "loss": 1.3441, "step": 170500 }, { "epoch": 111.2, "learning_rate": 7.818567586034577e-05, "loss": 1.354, "step": 171000 }, { "epoch": 111.53, "learning_rate": 7.767498839890488e-05, "loss": 1.3219, "step": 171500 }, { "epoch": 111.85, "learning_rate": 7.716491298893442e-05, "loss": 1.3389, "step": 172000 }, { "epoch": 112.18, "learning_rate": 7.66554636144095e-05, "loss": 1.3039, "step": 172500 }, { "epoch": 112.5, "learning_rate": 7.614665424214193e-05, "loss": 1.3437, "step": 173000 }, { "epoch": 112.83, "learning_rate": 7.563849882139776e-05, "loss": 1.3324, "step": 173500 }, { "epoch": 113.15, "learning_rate": 7.513101128351454e-05, "loss": 1.3324, "step": 174000 }, { "epoch": 113.48, "learning_rate": 7.462420554151944e-05, "loss": 1.3431, "step": 174500 }, { "epoch": 113.8, "learning_rate": 7.411809548974792e-05, "loss": 1.3103, "step": 175000 }, { "epoch": 114.13, "learning_rate": 7.361269500346274e-05, "loss": 1.3188, "step": 175500 }, { "epoch": 114.45, "learning_rate": 7.310801793847344e-05, "loss": 1.3246, "step": 176000 }, { "epoch": 114.78, "learning_rate": 7.260407813075676e-05, "loss": 1.3444, "step": 176500 }, { "epoch": 115.1, "learning_rate": 7.210088939607708e-05, "loss": 1.3134, "step": 177000 }, { "epoch": 115.43, "learning_rate": 7.159846552960774e-05, "loss": 1.3163, "step": 177500 }, { "epoch": 115.75, "learning_rate": 7.109682030555283e-05, "loss": 1.3096, "step": 178000 }, { "epoch": 116.08, "learning_rate": 7.059596747676962e-05, "loss": 1.2948, "step": 178500 }, { "epoch": 116.4, "learning_rate": 7.009592077439134e-05, "loss": 1.3084, "step": 179000 }, { "epoch": 116.73, "learning_rate": 6.959669390745097e-05, "loss": 1.3069, "step": 179500 }, { "epoch": 117.05, "learning_rate": 6.909830056250527e-05, "loss": 1.3097, "step": 180000 }, { "epoch": 117.05, "eval_runtime": 0.0013, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "step": 180000 }, { "epoch": 117.38, "learning_rate": 6.860075440325951e-05, "loss": 1.3253, "step": 180500 }, { "epoch": 117.7, "learning_rate": 6.8104069070193e-05, "loss": 1.326, "step": 181000 }, { "epoch": 118.03, "learning_rate": 6.760825818018508e-05, "loss": 1.2957, "step": 181500 }, { "epoch": 118.35, "learning_rate": 6.711333532614168e-05, "loss": 1.316, "step": 182000 }, { "epoch": 118.68, "learning_rate": 6.661931407662292e-05, "loss": 1.3295, "step": 182500 }, { "epoch": 119.01, "learning_rate": 6.612620797547087e-05, "loss": 1.2891, "step": 183000 }, { "epoch": 119.33, "learning_rate": 6.56340305414384e-05, "loss": 1.3008, "step": 183500 }, { "epoch": 119.66, "learning_rate": 6.51427952678185e-05, "loss": 1.3158, "step": 184000 }, { "epoch": 119.98, "learning_rate": 6.465251562207431e-05, "loss": 1.3154, "step": 184500 }, { "epoch": 120.31, "learning_rate": 6.416320504546997e-05, "loss": 1.3166, "step": 185000 }, { "epoch": 120.63, "learning_rate": 6.367487695270217e-05, "loss": 1.3134, "step": 185500 }, { "epoch": 120.96, "learning_rate": 6.318754473153221e-05, "loss": 1.308, "step": 186000 }, { "epoch": 121.28, "learning_rate": 6.27012217424191e-05, "loss": 1.3229, "step": 186500 }, { "epoch": 121.61, "learning_rate": 6.22159213181533e-05, "loss": 1.2987, "step": 187000 }, { "epoch": 121.93, "learning_rate": 6.173165676349103e-05, "loss": 1.3054, "step": 187500 }, { "epoch": 122.26, "learning_rate": 6.12484413547897e-05, "loss": 1.2986, "step": 188000 }, { "epoch": 122.58, "learning_rate": 6.076628833964388e-05, "loss": 1.2911, "step": 188500 }, { "epoch": 122.91, "learning_rate": 6.0285210936521955e-05, "loss": 1.2844, "step": 189000 }, { "epoch": 123.23, "learning_rate": 5.9805222334404e-05, "loss": 1.2987, "step": 189500 }, { "epoch": 123.56, "learning_rate": 5.9326335692419995e-05, "loss": 1.325, "step": 190000 }, { "epoch": 123.56, "eval_runtime": 0.0012, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "step": 190000 }, { "epoch": 123.88, "learning_rate": 5.884856413948913e-05, "loss": 1.3178, "step": 190500 }, { "epoch": 124.21, "learning_rate": 5.83719207739599e-05, "loss": 1.3022, "step": 191000 }, { "epoch": 124.53, "learning_rate": 5.789641866325091e-05, "loss": 1.3017, "step": 191500 }, { "epoch": 124.86, "learning_rate": 5.7422070843492734e-05, "loss": 1.2894, "step": 192000 }, { "epoch": 125.18, "learning_rate": 5.694889031917047e-05, "loss": 1.2886, "step": 192500 }, { "epoch": 125.51, "learning_rate": 5.647689006276726e-05, "loss": 1.3058, "step": 193000 }, { "epoch": 125.83, "learning_rate": 5.6006083014408484e-05, "loss": 1.3201, "step": 193500 }, { "epoch": 126.16, "learning_rate": 5.553648208150728e-05, "loss": 1.2908, "step": 194000 }, { "epoch": 126.48, "learning_rate": 5.506810013841036e-05, "loss": 1.2965, "step": 194500 }, { "epoch": 126.81, "learning_rate": 5.4600950026045326e-05, "loss": 1.296, "step": 195000 }, { "epoch": 127.13, "learning_rate": 5.4135044551568546e-05, "loss": 1.2668, "step": 195500 }, { "epoch": 127.46, "learning_rate": 5.3670396488013854e-05, "loss": 1.3238, "step": 196000 }, { "epoch": 127.78, "learning_rate": 5.320701857394268e-05, "loss": 1.3058, "step": 196500 }, { "epoch": 128.11, "learning_rate": 5.274492351309461e-05, "loss": 1.2915, "step": 197000 }, { "epoch": 128.43, "learning_rate": 5.2284123974039154e-05, "loss": 1.2827, "step": 197500 }, { "epoch": 128.76, "learning_rate": 5.182463258982846e-05, "loss": 1.2545, "step": 198000 }, { "epoch": 129.08, "learning_rate": 5.1366461957650954e-05, "loss": 1.3182, "step": 198500 }, { "epoch": 129.41, "learning_rate": 5.090962463848592e-05, "loss": 1.2939, "step": 199000 }, { "epoch": 129.74, "learning_rate": 5.045413315675924e-05, "loss": 1.3114, "step": 199500 }, { "epoch": 130.06, "learning_rate": 5.000000000000002e-05, "loss": 1.285, "step": 200000 }, { "epoch": 130.06, "eval_runtime": 0.0014, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "step": 200000 }, { "epoch": 130.39, "learning_rate": 4.9547237618498085e-05, "loss": 1.2967, "step": 200500 }, { "epoch": 130.71, "learning_rate": 4.909585842496287e-05, "loss": 1.2901, "step": 201000 }, { "epoch": 131.04, "learning_rate": 4.864587479418302e-05, "loss": 1.2764, "step": 201500 }, { "epoch": 131.36, "learning_rate": 4.8197299062686995e-05, "loss": 1.2933, "step": 202000 }, { "epoch": 131.69, "learning_rate": 4.7750143528405126e-05, "loss": 1.3121, "step": 202500 }, { "epoch": 132.01, "learning_rate": 4.7304420450332244e-05, "loss": 1.2743, "step": 203000 }, { "epoch": 132.34, "learning_rate": 4.68601420481917e-05, "loss": 1.2615, "step": 203500 }, { "epoch": 132.66, "learning_rate": 4.6417320502100316e-05, "loss": 1.2778, "step": 204000 }, { "epoch": 132.99, "learning_rate": 4.59759679522345e-05, "loss": 1.2898, "step": 204500 }, { "epoch": 133.31, "learning_rate": 4.5536096498497295e-05, "loss": 1.2782, "step": 205000 }, { "epoch": 133.64, "learning_rate": 4.5097718200186814e-05, "loss": 1.2726, "step": 205500 }, { "epoch": 133.96, "learning_rate": 4.46608450756656e-05, "loss": 1.2616, "step": 206000 }, { "epoch": 134.29, "learning_rate": 4.4225489102030995e-05, "loss": 1.2849, "step": 206500 }, { "epoch": 134.61, "learning_rate": 4.379166221478697e-05, "loss": 1.253, "step": 207000 }, { "epoch": 134.94, "learning_rate": 4.335937630751674e-05, "loss": 1.2782, "step": 207500 }, { "epoch": 135.26, "learning_rate": 4.2928643231556844e-05, "loss": 1.2926, "step": 208000 }, { "epoch": 135.59, "learning_rate": 4.249947479567218e-05, "loss": 1.3252, "step": 208500 }, { "epoch": 135.91, "learning_rate": 4.207188276573214e-05, "loss": 1.3277, "step": 209000 }, { "epoch": 136.24, "learning_rate": 4.1645878864388266e-05, "loss": 1.269, "step": 209500 }, { "epoch": 136.56, "learning_rate": 4.12214747707527e-05, "loss": 1.322, "step": 210000 }, { "epoch": 136.56, "eval_runtime": 0.0013, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "step": 210000 }, { "epoch": 136.89, "learning_rate": 4.0798682120078044e-05, "loss": 1.3141, "step": 210500 }, { "epoch": 137.21, "learning_rate": 4.037751250343841e-05, "loss": 1.2815, "step": 211000 }, { "epoch": 137.54, "learning_rate": 3.9957977467411615e-05, "loss": 1.2729, "step": 211500 }, { "epoch": 137.86, "learning_rate": 3.954008851376252e-05, "loss": 1.2726, "step": 212000 }, { "epoch": 138.19, "learning_rate": 3.9123857099127936e-05, "loss": 1.2877, "step": 212500 }, { "epoch": 138.51, "learning_rate": 3.8709294634702376e-05, "loss": 1.2954, "step": 213000 }, { "epoch": 138.84, "learning_rate": 3.829641248592515e-05, "loss": 1.2716, "step": 213500 }, { "epoch": 139.16, "learning_rate": 3.788522197216897e-05, "loss": 1.2603, "step": 214000 }, { "epoch": 139.49, "learning_rate": 3.747573436642951e-05, "loss": 1.2564, "step": 214500 }, { "epoch": 139.81, "learning_rate": 3.7067960895016275e-05, "loss": 1.2978, "step": 215000 }, { "epoch": 140.14, "learning_rate": 3.6661912737245e-05, "loss": 1.2693, "step": 215500 }, { "epoch": 140.46, "learning_rate": 3.6257601025131026e-05, "loss": 1.2827, "step": 216000 }, { "epoch": 140.79, "learning_rate": 3.585503684308421e-05, "loss": 1.291, "step": 216500 }, { "epoch": 141.12, "learning_rate": 3.545423122760493e-05, "loss": 1.2743, "step": 217000 }, { "epoch": 141.44, "learning_rate": 3.5055195166981645e-05, "loss": 1.2899, "step": 217500 }, { "epoch": 141.77, "learning_rate": 3.465793960098945e-05, "loss": 1.2654, "step": 218000 }, { "epoch": 142.09, "learning_rate": 3.426247542059041e-05, "loss": 1.2695, "step": 218500 }, { "epoch": 142.42, "learning_rate": 3.386881346763483e-05, "loss": 1.2615, "step": 219000 }, { "epoch": 142.74, "learning_rate": 3.347696453456393e-05, "loss": 1.2506, "step": 219500 }, { "epoch": 143.07, "learning_rate": 3.308693936411421e-05, "loss": 1.2812, "step": 220000 }, { "epoch": 143.07, "eval_runtime": 0.0012, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "step": 220000 }, { "epoch": 143.39, "learning_rate": 3.269874864902269e-05, "loss": 1.2625, "step": 220500 }, { "epoch": 143.72, "learning_rate": 3.231240303173394e-05, "loss": 1.2722, "step": 221000 }, { "epoch": 144.04, "learning_rate": 3.192791310410822e-05, "loss": 1.278, "step": 221500 }, { "epoch": 144.37, "learning_rate": 3.154528940713113e-05, "loss": 1.3162, "step": 222000 }, { "epoch": 144.69, "learning_rate": 3.116454243062459e-05, "loss": 1.2748, "step": 222500 }, { "epoch": 145.02, "learning_rate": 3.078568261295933e-05, "loss": 1.2756, "step": 223000 }, { "epoch": 145.34, "learning_rate": 3.0408720340768572e-05, "loss": 1.2389, "step": 223500 }, { "epoch": 145.67, "learning_rate": 3.0033665948663448e-05, "loss": 1.259, "step": 224000 }, { "epoch": 145.99, "learning_rate": 2.9660529718949627e-05, "loss": 1.2801, "step": 224500 }, { "epoch": 146.32, "learning_rate": 2.9289321881345254e-05, "loss": 1.2844, "step": 225000 }, { "epoch": 146.64, "learning_rate": 2.8920052612700754e-05, "loss": 1.3067, "step": 225500 }, { "epoch": 146.97, "learning_rate": 2.8552732036719687e-05, "loss": 1.2596, "step": 226000 }, { "epoch": 147.29, "learning_rate": 2.8187370223681132e-05, "loss": 1.2737, "step": 226500 }, { "epoch": 147.62, "learning_rate": 2.7823977190163786e-05, "loss": 1.2689, "step": 227000 }, { "epoch": 147.94, "learning_rate": 2.746256289877126e-05, "loss": 1.2418, "step": 227500 }, { "epoch": 148.27, "learning_rate": 2.7103137257858868e-05, "loss": 1.2921, "step": 228000 }, { "epoch": 148.59, "learning_rate": 2.6745710121262136e-05, "loss": 1.2561, "step": 228500 }, { "epoch": 148.92, "learning_rate": 2.639029128802657e-05, "loss": 1.2658, "step": 229000 }, { "epoch": 149.24, "learning_rate": 2.603689050213902e-05, "loss": 1.2722, "step": 229500 }, { "epoch": 149.57, "learning_rate": 2.5685517452260567e-05, "loss": 1.2175, "step": 230000 }, { "epoch": 149.57, "eval_runtime": 0.0012, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "step": 230000 }, { "epoch": 149.89, "learning_rate": 2.5336181771460876e-05, "loss": 1.2331, "step": 230500 }, { "epoch": 150.22, "learning_rate": 2.4988893036954043e-05, "loss": 1.2488, "step": 231000 }, { "epoch": 150.54, "learning_rate": 2.464366076983623e-05, "loss": 1.2598, "step": 231500 }, { "epoch": 150.87, "learning_rate": 2.4300494434824373e-05, "loss": 1.2805, "step": 232000 }, { "epoch": 151.19, "learning_rate": 2.3959403439996907e-05, "loss": 1.2435, "step": 232500 }, { "epoch": 151.52, "learning_rate": 2.362039713653581e-05, "loss": 1.2641, "step": 233000 }, { "epoch": 151.85, "learning_rate": 2.328348481847006e-05, "loss": 1.2625, "step": 233500 }, { "epoch": 152.17, "learning_rate": 2.2948675722421086e-05, "loss": 1.2149, "step": 234000 }, { "epoch": 152.5, "learning_rate": 2.2615979027349387e-05, "loss": 1.2519, "step": 234500 }, { "epoch": 152.82, "learning_rate": 2.2285403854302912e-05, "loss": 1.2497, "step": 235000 }, { "epoch": 153.15, "learning_rate": 2.195695926616702e-05, "loss": 1.2638, "step": 235500 }, { "epoch": 153.47, "learning_rate": 2.163065426741603e-05, "loss": 1.2347, "step": 236000 }, { "epoch": 153.8, "learning_rate": 2.1306497803866277e-05, "loss": 1.2572, "step": 236500 }, { "epoch": 154.12, "learning_rate": 2.098449876243096e-05, "loss": 1.2383, "step": 237000 }, { "epoch": 154.45, "learning_rate": 2.0664665970876496e-05, "loss": 1.2517, "step": 237500 }, { "epoch": 154.77, "learning_rate": 2.0347008197580374e-05, "loss": 1.2381, "step": 238000 }, { "epoch": 155.1, "learning_rate": 2.0031534151290943e-05, "loss": 1.2762, "step": 238500 }, { "epoch": 155.42, "learning_rate": 1.9718252480888566e-05, "loss": 1.2562, "step": 239000 }, { "epoch": 155.75, "learning_rate": 1.9407171775148436e-05, "loss": 1.2569, "step": 239500 }, { "epoch": 156.07, "learning_rate": 1.9098300562505266e-05, "loss": 1.2424, "step": 240000 }, { "epoch": 156.07, "eval_runtime": 0.0012, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "step": 240000 }, { "epoch": 156.4, "learning_rate": 1.879164731081937e-05, "loss": 1.2454, "step": 240500 }, { "epoch": 156.72, "learning_rate": 1.848722042714457e-05, "loss": 1.2628, "step": 241000 }, { "epoch": 157.05, "learning_rate": 1.818502825749764e-05, "loss": 1.2726, "step": 241500 }, { "epoch": 157.37, "learning_rate": 1.78850790866296e-05, "loss": 1.2418, "step": 242000 }, { "epoch": 157.7, "learning_rate": 1.7587381137798432e-05, "loss": 1.27, "step": 242500 }, { "epoch": 158.02, "learning_rate": 1.7291942572543807e-05, "loss": 1.2433, "step": 243000 }, { "epoch": 158.35, "learning_rate": 1.6998771490463262e-05, "loss": 1.2645, "step": 243500 }, { "epoch": 158.67, "learning_rate": 1.6707875928990058e-05, "loss": 1.2575, "step": 244000 }, { "epoch": 159.0, "learning_rate": 1.6419263863172997e-05, "loss": 1.2702, "step": 244500 }, { "epoch": 159.32, "learning_rate": 1.6132943205457606e-05, "loss": 1.254, "step": 245000 }, { "epoch": 159.65, "learning_rate": 1.5848921805469397e-05, "loss": 1.2707, "step": 245500 }, { "epoch": 159.97, "learning_rate": 1.5567207449798515e-05, "loss": 1.2871, "step": 246000 }, { "epoch": 160.3, "learning_rate": 1.528780786178631e-05, "loss": 1.2611, "step": 246500 }, { "epoch": 160.62, "learning_rate": 1.5010730701313625e-05, "loss": 1.2422, "step": 247000 }, { "epoch": 160.95, "learning_rate": 1.4735983564590783e-05, "loss": 1.2355, "step": 247500 }, { "epoch": 161.27, "learning_rate": 1.4463573983949341e-05, "loss": 1.2152, "step": 248000 }, { "epoch": 161.6, "learning_rate": 1.4193509427635543e-05, "loss": 1.2499, "step": 248500 }, { "epoch": 161.92, "learning_rate": 1.3925797299605647e-05, "loss": 1.2226, "step": 249000 }, { "epoch": 162.25, "learning_rate": 1.3660444939322836e-05, "loss": 1.2423, "step": 249500 }, { "epoch": 162.58, "learning_rate": 1.339745962155613e-05, "loss": 1.2512, "step": 250000 }, { "epoch": 162.58, "eval_runtime": 0.0013, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "step": 250000 }, { "epoch": 162.9, "learning_rate": 1.3136848556180892e-05, "loss": 1.2637, "step": 250500 }, { "epoch": 163.23, "learning_rate": 1.2878618887981064e-05, "loss": 1.2572, "step": 251000 }, { "epoch": 163.55, "learning_rate": 1.262277769645348e-05, "loss": 1.2812, "step": 251500 }, { "epoch": 163.88, "learning_rate": 1.2369331995613665e-05, "loss": 1.2265, "step": 252000 }, { "epoch": 164.2, "learning_rate": 1.2118288733803473e-05, "loss": 1.2682, "step": 252500 }, { "epoch": 164.53, "learning_rate": 1.1869654793500784e-05, "loss": 1.2759, "step": 253000 }, { "epoch": 164.85, "learning_rate": 1.1623436991130654e-05, "loss": 1.2526, "step": 253500 }, { "epoch": 165.18, "learning_rate": 1.1379642076878527e-05, "loss": 1.2597, "step": 254000 }, { "epoch": 165.5, "learning_rate": 1.1138276734505104e-05, "loss": 1.203, "step": 254500 }, { "epoch": 165.83, "learning_rate": 1.0899347581163221e-05, "loss": 1.2535, "step": 255000 }, { "epoch": 166.15, "learning_rate": 1.0662861167216243e-05, "loss": 1.2394, "step": 255500 }, { "epoch": 166.48, "learning_rate": 1.042882397605871e-05, "loss": 1.2433, "step": 256000 }, { "epoch": 166.8, "learning_rate": 1.0197242423938446e-05, "loss": 1.2433, "step": 256500 }, { "epoch": 167.13, "learning_rate": 9.968122859780648e-06, "loss": 1.2309, "step": 257000 }, { "epoch": 167.45, "learning_rate": 9.74147156501396e-06, "loss": 1.2279, "step": 257500 }, { "epoch": 167.78, "learning_rate": 9.517294753398064e-06, "loss": 1.2313, "step": 258000 }, { "epoch": 168.1, "learning_rate": 9.295598570853514e-06, "loss": 1.2593, "step": 258500 }, { "epoch": 168.43, "learning_rate": 9.076389095293148e-06, "loss": 1.2374, "step": 259000 }, { "epoch": 168.75, "learning_rate": 8.85967233645547e-06, "loss": 1.2336, "step": 259500 }, { "epoch": 169.08, "learning_rate": 8.645454235739903e-06, "loss": 1.2567, "step": 260000 }, { "epoch": 169.08, "eval_runtime": 0.0013, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "step": 260000 }, { "epoch": 169.4, "learning_rate": 8.433740666043898e-06, "loss": 1.2084, "step": 260500 }, { "epoch": 169.73, "learning_rate": 8.224537431601886e-06, "loss": 1.239, "step": 261000 }, { "epoch": 170.05, "learning_rate": 8.017850267826232e-06, "loss": 1.2482, "step": 261500 }, { "epoch": 170.38, "learning_rate": 7.81368484114996e-06, "loss": 1.2488, "step": 262000 }, { "epoch": 170.7, "learning_rate": 7.612046748871327e-06, "loss": 1.2325, "step": 262500 }, { "epoch": 171.03, "learning_rate": 7.412941519000527e-06, "loss": 1.2575, "step": 263000 }, { "epoch": 171.35, "learning_rate": 7.216374610108012e-06, "loss": 1.2599, "step": 263500 }, { "epoch": 171.68, "learning_rate": 7.022351411174866e-06, "loss": 1.2562, "step": 264000 }, { "epoch": 172.0, "learning_rate": 6.830877241445111e-06, "loss": 1.2147, "step": 264500 }, { "epoch": 172.33, "learning_rate": 6.6419573502798374e-06, "loss": 1.255, "step": 265000 }, { "epoch": 172.65, "learning_rate": 6.455596917013273e-06, "loss": 1.2347, "step": 265500 }, { "epoch": 172.98, "learning_rate": 6.2718010508108545e-06, "loss": 1.2591, "step": 266000 }, { "epoch": 173.31, "learning_rate": 6.090574790529091e-06, "loss": 1.2623, "step": 266500 }, { "epoch": 173.63, "learning_rate": 5.911923104577455e-06, "loss": 1.2462, "step": 267000 }, { "epoch": 173.96, "learning_rate": 5.735850890782157e-06, "loss": 1.2318, "step": 267500 }, { "epoch": 174.28, "learning_rate": 5.562362976251901e-06, "loss": 1.2481, "step": 268000 }, { "epoch": 174.61, "learning_rate": 5.39146411724547e-06, "loss": 1.2649, "step": 268500 }, { "epoch": 174.93, "learning_rate": 5.223158999041444e-06, "loss": 1.2464, "step": 269000 }, { "epoch": 175.26, "learning_rate": 5.057452235809624e-06, "loss": 1.2538, "step": 269500 }, { "epoch": 175.58, "learning_rate": 4.8943483704846475e-06, "loss": 1.2532, "step": 270000 }, { "epoch": 175.58, "eval_runtime": 0.0012, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "step": 270000 }, { "epoch": 175.91, "learning_rate": 4.733851874641382e-06, "loss": 1.256, "step": 270500 }, { "epoch": 176.23, "learning_rate": 4.575967148372317e-06, "loss": 1.2083, "step": 271000 }, { "epoch": 176.56, "learning_rate": 4.420698520166988e-06, "loss": 1.2336, "step": 271500 }, { "epoch": 176.88, "learning_rate": 4.268050246793276e-06, "loss": 1.2793, "step": 272000 }, { "epoch": 177.21, "learning_rate": 4.118026513180695e-06, "loss": 1.2365, "step": 272500 }, { "epoch": 177.53, "learning_rate": 3.970631432305694e-06, "loss": 1.2496, "step": 273000 }, { "epoch": 177.86, "learning_rate": 3.825869045078867e-06, "loss": 1.2603, "step": 273500 }, { "epoch": 178.18, "learning_rate": 3.68374332023419e-06, "loss": 1.2342, "step": 274000 }, { "epoch": 178.51, "learning_rate": 3.5442581542201923e-06, "loss": 1.2548, "step": 274500 }, { "epoch": 178.83, "learning_rate": 3.40741737109318e-06, "loss": 1.2268, "step": 275000 }, { "epoch": 179.16, "learning_rate": 3.273224722412327e-06, "loss": 1.2424, "step": 275500 }, { "epoch": 179.48, "learning_rate": 3.1416838871368924e-06, "loss": 1.2274, "step": 276000 }, { "epoch": 179.81, "learning_rate": 3.012798471525324e-06, "loss": 1.251, "step": 276500 }, { "epoch": 180.13, "learning_rate": 2.8865720090364034e-06, "loss": 1.2266, "step": 277000 }, { "epoch": 180.46, "learning_rate": 2.7630079602323442e-06, "loss": 1.2417, "step": 277500 }, { "epoch": 180.78, "learning_rate": 2.6421097126839712e-06, "loss": 1.2536, "step": 278000 }, { "epoch": 181.11, "learning_rate": 2.5238805808778242e-06, "loss": 1.2233, "step": 278500 }, { "epoch": 181.43, "learning_rate": 2.4083238061252567e-06, "loss": 1.2278, "step": 279000 }, { "epoch": 181.76, "learning_rate": 2.295442556473637e-06, "loss": 1.2092, "step": 279500 }, { "epoch": 182.08, "learning_rate": 2.1852399266194314e-06, "loss": 1.2115, "step": 280000 }, { "epoch": 182.08, "eval_runtime": 0.0012, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "step": 280000 }, { "epoch": 182.41, "learning_rate": 2.0777189378234143e-06, "loss": 1.2362, "step": 280500 }, { "epoch": 182.73, "learning_rate": 1.9728825378278246e-06, "loss": 1.1964, "step": 281000 }, { "epoch": 183.06, "learning_rate": 1.8707336007754873e-06, "loss": 1.2569, "step": 281500 }, { "epoch": 183.38, "learning_rate": 1.771274927131139e-06, "loss": 1.2593, "step": 282000 }, { "epoch": 183.71, "learning_rate": 1.6745092436045494e-06, "loss": 1.2745, "step": 282500 }, { "epoch": 184.04, "learning_rate": 1.580439203075812e-06, "loss": 1.2483, "step": 283000 }, { "epoch": 184.36, "learning_rate": 1.4890673845226133e-06, "loss": 1.2369, "step": 283500 }, { "epoch": 184.69, "learning_rate": 1.400396292949513e-06, "loss": 1.2195, "step": 284000 }, { "epoch": 185.01, "learning_rate": 1.3144283593192752e-06, "loss": 1.236, "step": 284500 }, { "epoch": 185.34, "learning_rate": 1.231165940486234e-06, "loss": 1.2671, "step": 285000 }, { "epoch": 185.66, "learning_rate": 1.1506113191316447e-06, "loss": 1.226, "step": 285500 }, { "epoch": 185.99, "learning_rate": 1.0727667037011668e-06, "loss": 1.2473, "step": 286000 }, { "epoch": 186.31, "learning_rate": 9.976342283442463e-07, "loss": 1.2191, "step": 286500 }, { "epoch": 186.64, "learning_rate": 9.252159528556403e-07, "loss": 1.2208, "step": 287000 }, { "epoch": 186.96, "learning_rate": 8.555138626189618e-07, "loss": 1.2619, "step": 287500 }, { "epoch": 187.29, "learning_rate": 7.885298685522235e-07, "loss": 1.2389, "step": 288000 }, { "epoch": 187.61, "learning_rate": 7.242658070554464e-07, "loss": 1.244, "step": 288500 }, { "epoch": 187.94, "learning_rate": 6.627234399603555e-07, "loss": 1.238, "step": 289000 }, { "epoch": 188.26, "learning_rate": 6.039044544820404e-07, "loss": 1.2443, "step": 289500 }, { "epoch": 188.59, "learning_rate": 5.478104631726711e-07, "loss": 1.2178, "step": 290000 }, { "epoch": 188.59, "eval_runtime": 0.0013, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "step": 290000 }, { "epoch": 188.91, "learning_rate": 4.944430038773762e-07, "loss": 1.2326, "step": 290500 }, { "epoch": 189.24, "learning_rate": 4.438035396920004e-07, "loss": 1.2231, "step": 291000 }, { "epoch": 189.56, "learning_rate": 3.958934589230467e-07, "loss": 1.2526, "step": 291500 }, { "epoch": 189.89, "learning_rate": 3.50714075049563e-07, "loss": 1.2415, "step": 292000 }, { "epoch": 190.21, "learning_rate": 3.0826662668720364e-07, "loss": 1.2414, "step": 292500 }, { "epoch": 190.54, "learning_rate": 2.685522775541904e-07, "loss": 1.2515, "step": 293000 }, { "epoch": 190.86, "learning_rate": 2.315721164394713e-07, "loss": 1.238, "step": 293500 }, { "epoch": 191.19, "learning_rate": 1.973271571728441e-07, "loss": 1.2693, "step": 294000 }, { "epoch": 191.51, "learning_rate": 1.6581833859716788e-07, "loss": 1.2387, "step": 294500 }, { "epoch": 191.84, "learning_rate": 1.3704652454261668e-07, "loss": 1.2411, "step": 295000 }, { "epoch": 192.16, "learning_rate": 1.1101250380300965e-07, "loss": 1.2521, "step": 295500 }, { "epoch": 192.49, "learning_rate": 8.771699011416168e-08, "loss": 1.2672, "step": 296000 }, { "epoch": 192.81, "learning_rate": 6.71606221343768e-08, "loss": 1.259, "step": 296500 }, { "epoch": 193.14, "learning_rate": 4.934396342684e-08, "loss": 1.2573, "step": 297000 }, { "epoch": 193.46, "learning_rate": 3.4267502444274015e-08, "loss": 1.228, "step": 297500 }, { "epoch": 193.79, "learning_rate": 2.193165251545004e-08, "loss": 1.2397, "step": 298000 }, { "epoch": 194.11, "learning_rate": 1.2336751833941229e-08, "loss": 1.2125, "step": 298500 }, { "epoch": 194.44, "learning_rate": 5.483063448785686e-09, "loss": 1.2484, "step": 299000 }, { "epoch": 194.77, "learning_rate": 1.3707752573255405e-09, "loss": 1.2367, "step": 299500 }, { "epoch": 195.09, "learning_rate": 0.0, "loss": 1.2385, "step": 300000 }, { "epoch": 195.09, "eval_runtime": 0.0012, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "step": 300000 } ], "max_steps": 300000, "num_train_epochs": 196, "total_flos": 2.1823985264086426e+18, "trial_name": null, "trial_params": null }