{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.09890621363742146, "eval_steps": 85, "global_step": 850, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0009308820107051431, "grad_norm": 246781.0625, "learning_rate": 9.997671440214229e-06, "loss": 0.5292, "step": 8 }, { "epoch": 0.0018617640214102862, "grad_norm": 187779.84375, "learning_rate": 9.988357201071139e-06, "loss": 0.1916, "step": 16 }, { "epoch": 0.0027926460321154294, "grad_norm": 191587.796875, "learning_rate": 9.979042961928047e-06, "loss": 0.1835, "step": 24 }, { "epoch": 0.0037235280428205724, "grad_norm": 192646.890625, "learning_rate": 9.969728722784958e-06, "loss": 0.1706, "step": 32 }, { "epoch": 0.004654410053525715, "grad_norm": 218090.6875, "learning_rate": 9.960414483641868e-06, "loss": 0.1885, "step": 40 }, { "epoch": 0.005585292064230859, "grad_norm": 193089.328125, "learning_rate": 9.951100244498778e-06, "loss": 0.172, "step": 48 }, { "epoch": 0.006516174074936002, "grad_norm": 196219.21875, "learning_rate": 9.941786005355688e-06, "loss": 0.1658, "step": 56 }, { "epoch": 0.007447056085641145, "grad_norm": 177410.421875, "learning_rate": 9.932471766212598e-06, "loss": 0.1672, "step": 64 }, { "epoch": 0.008377938096346288, "grad_norm": 143304.375, "learning_rate": 9.923157527069508e-06, "loss": 0.1645, "step": 72 }, { "epoch": 0.00930882010705143, "grad_norm": 168312.640625, "learning_rate": 9.913843287926418e-06, "loss": 0.1578, "step": 80 }, { "epoch": 0.009890621363742146, "eval_loss": 0.12741638720035553, "eval_runtime": 5949.5202, "eval_samples_per_second": 2.665, "eval_steps_per_second": 0.083, "eval_wer": 9.4490594862981, "step": 85 }, { "epoch": 0.010239702117756575, "grad_norm": 155638.546875, "learning_rate": 9.904529048783329e-06, "loss": 0.1555, "step": 88 }, { "epoch": 0.011170584128461718, "grad_norm": 125002.9296875, "learning_rate": 9.895214809640237e-06, "loss": 0.1572, "step": 96 }, { "epoch": 0.01210146613916686, "grad_norm": 128746.8671875, "learning_rate": 9.885900570497147e-06, "loss": 0.1426, "step": 104 }, { "epoch": 0.013032348149872005, "grad_norm": 170330.53125, "learning_rate": 9.876586331354059e-06, "loss": 0.1522, "step": 112 }, { "epoch": 0.013963230160577147, "grad_norm": 159400.9375, "learning_rate": 9.86727209221097e-06, "loss": 0.1465, "step": 120 }, { "epoch": 0.01489411217128229, "grad_norm": 176768.453125, "learning_rate": 9.85795785306788e-06, "loss": 0.1289, "step": 128 }, { "epoch": 0.015824994181987434, "grad_norm": 162940.640625, "learning_rate": 9.84864361392479e-06, "loss": 0.1493, "step": 136 }, { "epoch": 0.016755876192692577, "grad_norm": 107537.65625, "learning_rate": 9.839329374781698e-06, "loss": 0.1465, "step": 144 }, { "epoch": 0.01768675820339772, "grad_norm": 179104.046875, "learning_rate": 9.830015135638608e-06, "loss": 0.144, "step": 152 }, { "epoch": 0.01861764021410286, "grad_norm": 170439.359375, "learning_rate": 9.820700896495518e-06, "loss": 0.1578, "step": 160 }, { "epoch": 0.019548522224808004, "grad_norm": 139290.828125, "learning_rate": 9.811386657352428e-06, "loss": 0.1392, "step": 168 }, { "epoch": 0.019781242727484293, "eval_loss": 0.0959169790148735, "eval_runtime": 5931.1781, "eval_samples_per_second": 2.673, "eval_steps_per_second": 0.084, "eval_wer": 6.917470320506699, "step": 170 }, { "epoch": 0.02047940423551315, "grad_norm": 141791.703125, "learning_rate": 9.802072418209339e-06, "loss": 0.138, "step": 176 }, { "epoch": 0.021410286246218293, "grad_norm": 152705.890625, "learning_rate": 9.792758179066249e-06, "loss": 0.139, "step": 184 }, { "epoch": 0.022341168256923435, "grad_norm": 176748.265625, "learning_rate": 9.783443939923159e-06, "loss": 0.1371, "step": 192 }, { "epoch": 0.023272050267628578, "grad_norm": 166765.390625, "learning_rate": 9.774129700780069e-06, "loss": 0.1265, "step": 200 }, { "epoch": 0.02420293227833372, "grad_norm": 155719.375, "learning_rate": 9.764815461636979e-06, "loss": 0.1326, "step": 208 }, { "epoch": 0.025133814289038863, "grad_norm": 155453.3125, "learning_rate": 9.755501222493888e-06, "loss": 0.1279, "step": 216 }, { "epoch": 0.02606469629974401, "grad_norm": 167183.234375, "learning_rate": 9.746186983350798e-06, "loss": 0.1385, "step": 224 }, { "epoch": 0.026995578310449152, "grad_norm": 150657.203125, "learning_rate": 9.736872744207708e-06, "loss": 0.1378, "step": 232 }, { "epoch": 0.027926460321154294, "grad_norm": 137554.734375, "learning_rate": 9.727558505064618e-06, "loss": 0.1358, "step": 240 }, { "epoch": 0.028857342331859437, "grad_norm": 139477.828125, "learning_rate": 9.718244265921528e-06, "loss": 0.1427, "step": 248 }, { "epoch": 0.02967186409122644, "eval_loss": 0.10620440542697906, "eval_runtime": 5969.2466, "eval_samples_per_second": 2.656, "eval_steps_per_second": 0.083, "eval_wer": 7.396161558292754, "step": 255 }, { "epoch": 0.02978822434256458, "grad_norm": 151097.390625, "learning_rate": 9.708930026778438e-06, "loss": 0.1232, "step": 256 }, { "epoch": 0.030719106353269722, "grad_norm": 137314.75, "learning_rate": 9.699615787635348e-06, "loss": 0.1423, "step": 264 }, { "epoch": 0.03164998836397487, "grad_norm": 142663.078125, "learning_rate": 9.690301548492259e-06, "loss": 0.1329, "step": 272 }, { "epoch": 0.03258087037468001, "grad_norm": 168839.84375, "learning_rate": 9.680987309349169e-06, "loss": 0.1446, "step": 280 }, { "epoch": 0.03351175238538515, "grad_norm": 151602.734375, "learning_rate": 9.671673070206077e-06, "loss": 0.1271, "step": 288 }, { "epoch": 0.03444263439609029, "grad_norm": 133047.96875, "learning_rate": 9.662358831062987e-06, "loss": 0.1453, "step": 296 }, { "epoch": 0.03537351640679544, "grad_norm": 148903.75, "learning_rate": 9.653044591919897e-06, "loss": 0.1362, "step": 304 }, { "epoch": 0.036304398417500584, "grad_norm": 170333.4375, "learning_rate": 9.643730352776808e-06, "loss": 0.1684, "step": 312 }, { "epoch": 0.03723528042820572, "grad_norm": 116653.328125, "learning_rate": 9.63441611363372e-06, "loss": 0.147, "step": 320 }, { "epoch": 0.03816616243891087, "grad_norm": 212308.03125, "learning_rate": 9.62510187449063e-06, "loss": 0.1441, "step": 328 }, { "epoch": 0.03909704444961601, "grad_norm": 157418.203125, "learning_rate": 9.615787635347538e-06, "loss": 0.1268, "step": 336 }, { "epoch": 0.039562485454968585, "eval_loss": 0.10842841863632202, "eval_runtime": 5961.2518, "eval_samples_per_second": 2.66, "eval_steps_per_second": 0.083, "eval_wer": 7.3318692510901045, "step": 340 }, { "epoch": 0.040027926460321155, "grad_norm": 148816.703125, "learning_rate": 9.606473396204448e-06, "loss": 0.1297, "step": 344 }, { "epoch": 0.0409588084710263, "grad_norm": 191683.796875, "learning_rate": 9.597159157061358e-06, "loss": 0.1593, "step": 352 }, { "epoch": 0.04188969048173144, "grad_norm": 113932.3203125, "learning_rate": 9.587844917918269e-06, "loss": 0.129, "step": 360 }, { "epoch": 0.042820572492436586, "grad_norm": 117160.140625, "learning_rate": 9.578530678775179e-06, "loss": 0.1244, "step": 368 }, { "epoch": 0.043751454503141725, "grad_norm": 167088.71875, "learning_rate": 9.569216439632089e-06, "loss": 0.1189, "step": 376 }, { "epoch": 0.04468233651384687, "grad_norm": 154759.875, "learning_rate": 9.559902200488999e-06, "loss": 0.1316, "step": 384 }, { "epoch": 0.04561321852455201, "grad_norm": 130896.296875, "learning_rate": 9.550587961345909e-06, "loss": 0.1378, "step": 392 }, { "epoch": 0.046544100535257156, "grad_norm": 130177.3203125, "learning_rate": 9.54127372220282e-06, "loss": 0.1364, "step": 400 }, { "epoch": 0.0474749825459623, "grad_norm": 124157.765625, "learning_rate": 9.531959483059728e-06, "loss": 0.1321, "step": 408 }, { "epoch": 0.04840586455666744, "grad_norm": 167979.953125, "learning_rate": 9.522645243916638e-06, "loss": 0.1409, "step": 416 }, { "epoch": 0.04933674656737259, "grad_norm": 151171.75, "learning_rate": 9.513331004773548e-06, "loss": 0.1374, "step": 424 }, { "epoch": 0.04945310681871073, "eval_loss": 0.11252985894680023, "eval_runtime": 5961.7203, "eval_samples_per_second": 2.66, "eval_steps_per_second": 0.083, "eval_wer": 7.531111747668608, "step": 425 }, { "epoch": 0.050267628578077726, "grad_norm": 89195.203125, "learning_rate": 9.504016765630458e-06, "loss": 0.1177, "step": 432 }, { "epoch": 0.05119851058878287, "grad_norm": 169561.765625, "learning_rate": 9.494702526487368e-06, "loss": 0.1211, "step": 440 }, { "epoch": 0.05212939259948802, "grad_norm": 157022.328125, "learning_rate": 9.485388287344278e-06, "loss": 0.1497, "step": 448 }, { "epoch": 0.05306027461019316, "grad_norm": 161315.6875, "learning_rate": 9.476074048201189e-06, "loss": 0.1396, "step": 456 }, { "epoch": 0.053991156620898303, "grad_norm": 179211.484375, "learning_rate": 9.466759809058099e-06, "loss": 0.1463, "step": 464 }, { "epoch": 0.05492203863160344, "grad_norm": 139493.796875, "learning_rate": 9.457445569915007e-06, "loss": 0.1385, "step": 472 }, { "epoch": 0.05585292064230859, "grad_norm": 121290.3125, "learning_rate": 9.448131330771917e-06, "loss": 0.1257, "step": 480 }, { "epoch": 0.05678380265301373, "grad_norm": 189379.421875, "learning_rate": 9.438817091628828e-06, "loss": 0.1332, "step": 488 }, { "epoch": 0.057714684663718874, "grad_norm": 137769.5, "learning_rate": 9.429502852485738e-06, "loss": 0.1252, "step": 496 }, { "epoch": 0.05864556667442402, "grad_norm": 151028.71875, "learning_rate": 9.420188613342648e-06, "loss": 0.1209, "step": 504 }, { "epoch": 0.05934372818245288, "eval_loss": 0.11397241055965424, "eval_runtime": 5959.2697, "eval_samples_per_second": 2.661, "eval_steps_per_second": 0.083, "eval_wer": 7.744358509182342, "step": 510 }, { "epoch": 0.05957644868512916, "grad_norm": 150909.71875, "learning_rate": 9.410874374199558e-06, "loss": 0.129, "step": 512 }, { "epoch": 0.060507330695834305, "grad_norm": 156785.796875, "learning_rate": 9.401560135056468e-06, "loss": 0.139, "step": 520 }, { "epoch": 0.061438212706539444, "grad_norm": 130850.4453125, "learning_rate": 9.392245895913378e-06, "loss": 0.1275, "step": 528 }, { "epoch": 0.06236909471724459, "grad_norm": 141489.40625, "learning_rate": 9.382931656770288e-06, "loss": 0.1275, "step": 536 }, { "epoch": 0.06329997672794974, "grad_norm": 179663.359375, "learning_rate": 9.373617417627199e-06, "loss": 0.1338, "step": 544 }, { "epoch": 0.06423085873865488, "grad_norm": 165837.234375, "learning_rate": 9.364303178484109e-06, "loss": 0.1333, "step": 552 }, { "epoch": 0.06516174074936001, "grad_norm": 143674.65625, "learning_rate": 9.354988939341019e-06, "loss": 0.1446, "step": 560 }, { "epoch": 0.06609262276006517, "grad_norm": 139713.765625, "learning_rate": 9.345674700197929e-06, "loss": 0.1293, "step": 568 }, { "epoch": 0.0670235047707703, "grad_norm": 120400.5625, "learning_rate": 9.33636046105484e-06, "loss": 0.1223, "step": 576 }, { "epoch": 0.06795438678147545, "grad_norm": 120168.7734375, "learning_rate": 9.32704622191175e-06, "loss": 0.1311, "step": 584 }, { "epoch": 0.06888526879218058, "grad_norm": 183192.234375, "learning_rate": 9.317731982768658e-06, "loss": 0.1373, "step": 592 }, { "epoch": 0.06923434954619502, "eval_loss": 0.11407212913036346, "eval_runtime": 5966.9756, "eval_samples_per_second": 2.657, "eval_steps_per_second": 0.083, "eval_wer": 7.737356376714727, "step": 595 }, { "epoch": 0.06981615080288574, "grad_norm": 147151.234375, "learning_rate": 9.308417743625568e-06, "loss": 0.1222, "step": 600 }, { "epoch": 0.07074703281359088, "grad_norm": 110441.546875, "learning_rate": 9.299103504482478e-06, "loss": 0.1356, "step": 608 }, { "epoch": 0.07167791482429602, "grad_norm": 134779.78125, "learning_rate": 9.289789265339388e-06, "loss": 0.152, "step": 616 }, { "epoch": 0.07260879683500117, "grad_norm": 157262.90625, "learning_rate": 9.280475026196298e-06, "loss": 0.1425, "step": 624 }, { "epoch": 0.07353967884570631, "grad_norm": 94122.4140625, "learning_rate": 9.271160787053209e-06, "loss": 0.1192, "step": 632 }, { "epoch": 0.07447056085641145, "grad_norm": 129110.6875, "learning_rate": 9.261846547910119e-06, "loss": 0.1244, "step": 640 }, { "epoch": 0.0754014428671166, "grad_norm": 137216.5625, "learning_rate": 9.252532308767029e-06, "loss": 0.1355, "step": 648 }, { "epoch": 0.07633232487782174, "grad_norm": 108335.875, "learning_rate": 9.243218069623939e-06, "loss": 0.1094, "step": 656 }, { "epoch": 0.07726320688852688, "grad_norm": 104589.25, "learning_rate": 9.233903830480847e-06, "loss": 0.1215, "step": 664 }, { "epoch": 0.07819408889923202, "grad_norm": 117729.4609375, "learning_rate": 9.224589591337758e-06, "loss": 0.1471, "step": 672 }, { "epoch": 0.07912497090993717, "grad_norm": 142323.6875, "learning_rate": 9.215275352194668e-06, "loss": 0.1302, "step": 680 }, { "epoch": 0.07912497090993717, "eval_loss": 0.11676130443811417, "eval_runtime": 5962.6488, "eval_samples_per_second": 2.659, "eval_steps_per_second": 0.083, "eval_wer": 7.715713421814825, "step": 680 }, { "epoch": 0.08005585292064231, "grad_norm": 138700.578125, "learning_rate": 9.205961113051578e-06, "loss": 0.141, "step": 688 }, { "epoch": 0.08098673493134745, "grad_norm": 129724.2578125, "learning_rate": 9.196646873908488e-06, "loss": 0.1326, "step": 696 }, { "epoch": 0.0819176169420526, "grad_norm": 156790.84375, "learning_rate": 9.187332634765398e-06, "loss": 0.1203, "step": 704 }, { "epoch": 0.08284849895275774, "grad_norm": 142871.296875, "learning_rate": 9.178018395622308e-06, "loss": 0.1484, "step": 712 }, { "epoch": 0.08377938096346288, "grad_norm": 106344.171875, "learning_rate": 9.168704156479218e-06, "loss": 0.1118, "step": 720 }, { "epoch": 0.08471026297416802, "grad_norm": 300278.625, "learning_rate": 9.159389917336129e-06, "loss": 0.1431, "step": 728 }, { "epoch": 0.08564114498487317, "grad_norm": 163834.234375, "learning_rate": 9.150075678193039e-06, "loss": 0.1313, "step": 736 }, { "epoch": 0.08657202699557831, "grad_norm": 161322.71875, "learning_rate": 9.140761439049949e-06, "loss": 0.1366, "step": 744 }, { "epoch": 0.08750290900628345, "grad_norm": 138388.65625, "learning_rate": 9.131447199906859e-06, "loss": 0.1302, "step": 752 }, { "epoch": 0.0884337910169886, "grad_norm": 125799.078125, "learning_rate": 9.12213296076377e-06, "loss": 0.1569, "step": 760 }, { "epoch": 0.0890155922736793, "eval_loss": 0.11295511573553085, "eval_runtime": 5970.6229, "eval_samples_per_second": 2.656, "eval_steps_per_second": 0.083, "eval_wer": 8.026353480378114, "step": 765 }, { "epoch": 0.08936467302769374, "grad_norm": 126927.6484375, "learning_rate": 9.11281872162068e-06, "loss": 0.1428, "step": 768 }, { "epoch": 0.09029555503839888, "grad_norm": 126672.28125, "learning_rate": 9.10350448247759e-06, "loss": 0.1307, "step": 776 }, { "epoch": 0.09122643704910402, "grad_norm": 99957.9453125, "learning_rate": 9.094190243334498e-06, "loss": 0.1465, "step": 784 }, { "epoch": 0.09215731905980917, "grad_norm": 120560.9140625, "learning_rate": 9.084876004191408e-06, "loss": 0.1205, "step": 792 }, { "epoch": 0.09308820107051431, "grad_norm": 147541.890625, "learning_rate": 9.075561765048318e-06, "loss": 0.1418, "step": 800 }, { "epoch": 0.09401908308121945, "grad_norm": 154493.171875, "learning_rate": 9.066247525905228e-06, "loss": 0.1512, "step": 808 }, { "epoch": 0.0949499650919246, "grad_norm": 143224.46875, "learning_rate": 9.056933286762139e-06, "loss": 0.1372, "step": 816 }, { "epoch": 0.09588084710262974, "grad_norm": 112816.7890625, "learning_rate": 9.047619047619049e-06, "loss": 0.1426, "step": 824 }, { "epoch": 0.09681172911333488, "grad_norm": 125948.0703125, "learning_rate": 9.038304808475959e-06, "loss": 0.1477, "step": 832 }, { "epoch": 0.09774261112404002, "grad_norm": 117891.203125, "learning_rate": 9.028990569332869e-06, "loss": 0.1548, "step": 840 }, { "epoch": 0.09867349313474517, "grad_norm": 145955.125, "learning_rate": 9.019676330189779e-06, "loss": 0.134, "step": 848 }, { "epoch": 0.09890621363742146, "eval_loss": 0.11149411648511887, "eval_runtime": 5853.7074, "eval_samples_per_second": 2.709, "eval_steps_per_second": 0.085, "eval_wer": 7.540660110124447, "step": 850 } ], "logging_steps": 8, "max_steps": 8594, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 850, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.85498685407232e+20, "train_batch_size": 32, "trial_name": null, "trial_params": null }