| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.09890621363742146, | |
| "eval_steps": 85, | |
| "global_step": 850, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0009308820107051431, | |
| "grad_norm": 246781.0625, | |
| "learning_rate": 9.997671440214229e-06, | |
| "loss": 0.5292, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.0018617640214102862, | |
| "grad_norm": 187779.84375, | |
| "learning_rate": 9.988357201071139e-06, | |
| "loss": 0.1916, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.0027926460321154294, | |
| "grad_norm": 191587.796875, | |
| "learning_rate": 9.979042961928047e-06, | |
| "loss": 0.1835, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.0037235280428205724, | |
| "grad_norm": 192646.890625, | |
| "learning_rate": 9.969728722784958e-06, | |
| "loss": 0.1706, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.004654410053525715, | |
| "grad_norm": 218090.6875, | |
| "learning_rate": 9.960414483641868e-06, | |
| "loss": 0.1885, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.005585292064230859, | |
| "grad_norm": 193089.328125, | |
| "learning_rate": 9.951100244498778e-06, | |
| "loss": 0.172, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.006516174074936002, | |
| "grad_norm": 196219.21875, | |
| "learning_rate": 9.941786005355688e-06, | |
| "loss": 0.1658, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.007447056085641145, | |
| "grad_norm": 177410.421875, | |
| "learning_rate": 9.932471766212598e-06, | |
| "loss": 0.1672, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.008377938096346288, | |
| "grad_norm": 143304.375, | |
| "learning_rate": 9.923157527069508e-06, | |
| "loss": 0.1645, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.00930882010705143, | |
| "grad_norm": 168312.640625, | |
| "learning_rate": 9.913843287926418e-06, | |
| "loss": 0.1578, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.009890621363742146, | |
| "eval_loss": 0.12741638720035553, | |
| "eval_runtime": 5949.5202, | |
| "eval_samples_per_second": 2.665, | |
| "eval_steps_per_second": 0.083, | |
| "eval_wer": 9.4490594862981, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.010239702117756575, | |
| "grad_norm": 155638.546875, | |
| "learning_rate": 9.904529048783329e-06, | |
| "loss": 0.1555, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.011170584128461718, | |
| "grad_norm": 125002.9296875, | |
| "learning_rate": 9.895214809640237e-06, | |
| "loss": 0.1572, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.01210146613916686, | |
| "grad_norm": 128746.8671875, | |
| "learning_rate": 9.885900570497147e-06, | |
| "loss": 0.1426, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.013032348149872005, | |
| "grad_norm": 170330.53125, | |
| "learning_rate": 9.876586331354059e-06, | |
| "loss": 0.1522, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.013963230160577147, | |
| "grad_norm": 159400.9375, | |
| "learning_rate": 9.86727209221097e-06, | |
| "loss": 0.1465, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.01489411217128229, | |
| "grad_norm": 176768.453125, | |
| "learning_rate": 9.85795785306788e-06, | |
| "loss": 0.1289, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.015824994181987434, | |
| "grad_norm": 162940.640625, | |
| "learning_rate": 9.84864361392479e-06, | |
| "loss": 0.1493, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.016755876192692577, | |
| "grad_norm": 107537.65625, | |
| "learning_rate": 9.839329374781698e-06, | |
| "loss": 0.1465, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.01768675820339772, | |
| "grad_norm": 179104.046875, | |
| "learning_rate": 9.830015135638608e-06, | |
| "loss": 0.144, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.01861764021410286, | |
| "grad_norm": 170439.359375, | |
| "learning_rate": 9.820700896495518e-06, | |
| "loss": 0.1578, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.019548522224808004, | |
| "grad_norm": 139290.828125, | |
| "learning_rate": 9.811386657352428e-06, | |
| "loss": 0.1392, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.019781242727484293, | |
| "eval_loss": 0.0959169790148735, | |
| "eval_runtime": 5931.1781, | |
| "eval_samples_per_second": 2.673, | |
| "eval_steps_per_second": 0.084, | |
| "eval_wer": 6.917470320506699, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.02047940423551315, | |
| "grad_norm": 141791.703125, | |
| "learning_rate": 9.802072418209339e-06, | |
| "loss": 0.138, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.021410286246218293, | |
| "grad_norm": 152705.890625, | |
| "learning_rate": 9.792758179066249e-06, | |
| "loss": 0.139, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.022341168256923435, | |
| "grad_norm": 176748.265625, | |
| "learning_rate": 9.783443939923159e-06, | |
| "loss": 0.1371, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.023272050267628578, | |
| "grad_norm": 166765.390625, | |
| "learning_rate": 9.774129700780069e-06, | |
| "loss": 0.1265, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.02420293227833372, | |
| "grad_norm": 155719.375, | |
| "learning_rate": 9.764815461636979e-06, | |
| "loss": 0.1326, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.025133814289038863, | |
| "grad_norm": 155453.3125, | |
| "learning_rate": 9.755501222493888e-06, | |
| "loss": 0.1279, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.02606469629974401, | |
| "grad_norm": 167183.234375, | |
| "learning_rate": 9.746186983350798e-06, | |
| "loss": 0.1385, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.026995578310449152, | |
| "grad_norm": 150657.203125, | |
| "learning_rate": 9.736872744207708e-06, | |
| "loss": 0.1378, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.027926460321154294, | |
| "grad_norm": 137554.734375, | |
| "learning_rate": 9.727558505064618e-06, | |
| "loss": 0.1358, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.028857342331859437, | |
| "grad_norm": 139477.828125, | |
| "learning_rate": 9.718244265921528e-06, | |
| "loss": 0.1427, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.02967186409122644, | |
| "eval_loss": 0.10620440542697906, | |
| "eval_runtime": 5969.2466, | |
| "eval_samples_per_second": 2.656, | |
| "eval_steps_per_second": 0.083, | |
| "eval_wer": 7.396161558292754, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.02978822434256458, | |
| "grad_norm": 151097.390625, | |
| "learning_rate": 9.708930026778438e-06, | |
| "loss": 0.1232, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.030719106353269722, | |
| "grad_norm": 137314.75, | |
| "learning_rate": 9.699615787635348e-06, | |
| "loss": 0.1423, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.03164998836397487, | |
| "grad_norm": 142663.078125, | |
| "learning_rate": 9.690301548492259e-06, | |
| "loss": 0.1329, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.03258087037468001, | |
| "grad_norm": 168839.84375, | |
| "learning_rate": 9.680987309349169e-06, | |
| "loss": 0.1446, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.03351175238538515, | |
| "grad_norm": 151602.734375, | |
| "learning_rate": 9.671673070206077e-06, | |
| "loss": 0.1271, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.03444263439609029, | |
| "grad_norm": 133047.96875, | |
| "learning_rate": 9.662358831062987e-06, | |
| "loss": 0.1453, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.03537351640679544, | |
| "grad_norm": 148903.75, | |
| "learning_rate": 9.653044591919897e-06, | |
| "loss": 0.1362, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.036304398417500584, | |
| "grad_norm": 170333.4375, | |
| "learning_rate": 9.643730352776808e-06, | |
| "loss": 0.1684, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.03723528042820572, | |
| "grad_norm": 116653.328125, | |
| "learning_rate": 9.63441611363372e-06, | |
| "loss": 0.147, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.03816616243891087, | |
| "grad_norm": 212308.03125, | |
| "learning_rate": 9.62510187449063e-06, | |
| "loss": 0.1441, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.03909704444961601, | |
| "grad_norm": 157418.203125, | |
| "learning_rate": 9.615787635347538e-06, | |
| "loss": 0.1268, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.039562485454968585, | |
| "eval_loss": 0.10842841863632202, | |
| "eval_runtime": 5961.2518, | |
| "eval_samples_per_second": 2.66, | |
| "eval_steps_per_second": 0.083, | |
| "eval_wer": 7.3318692510901045, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.040027926460321155, | |
| "grad_norm": 148816.703125, | |
| "learning_rate": 9.606473396204448e-06, | |
| "loss": 0.1297, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.0409588084710263, | |
| "grad_norm": 191683.796875, | |
| "learning_rate": 9.597159157061358e-06, | |
| "loss": 0.1593, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.04188969048173144, | |
| "grad_norm": 113932.3203125, | |
| "learning_rate": 9.587844917918269e-06, | |
| "loss": 0.129, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.042820572492436586, | |
| "grad_norm": 117160.140625, | |
| "learning_rate": 9.578530678775179e-06, | |
| "loss": 0.1244, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.043751454503141725, | |
| "grad_norm": 167088.71875, | |
| "learning_rate": 9.569216439632089e-06, | |
| "loss": 0.1189, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.04468233651384687, | |
| "grad_norm": 154759.875, | |
| "learning_rate": 9.559902200488999e-06, | |
| "loss": 0.1316, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.04561321852455201, | |
| "grad_norm": 130896.296875, | |
| "learning_rate": 9.550587961345909e-06, | |
| "loss": 0.1378, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.046544100535257156, | |
| "grad_norm": 130177.3203125, | |
| "learning_rate": 9.54127372220282e-06, | |
| "loss": 0.1364, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.0474749825459623, | |
| "grad_norm": 124157.765625, | |
| "learning_rate": 9.531959483059728e-06, | |
| "loss": 0.1321, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.04840586455666744, | |
| "grad_norm": 167979.953125, | |
| "learning_rate": 9.522645243916638e-06, | |
| "loss": 0.1409, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.04933674656737259, | |
| "grad_norm": 151171.75, | |
| "learning_rate": 9.513331004773548e-06, | |
| "loss": 0.1374, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.04945310681871073, | |
| "eval_loss": 0.11252985894680023, | |
| "eval_runtime": 5961.7203, | |
| "eval_samples_per_second": 2.66, | |
| "eval_steps_per_second": 0.083, | |
| "eval_wer": 7.531111747668608, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.050267628578077726, | |
| "grad_norm": 89195.203125, | |
| "learning_rate": 9.504016765630458e-06, | |
| "loss": 0.1177, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.05119851058878287, | |
| "grad_norm": 169561.765625, | |
| "learning_rate": 9.494702526487368e-06, | |
| "loss": 0.1211, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.05212939259948802, | |
| "grad_norm": 157022.328125, | |
| "learning_rate": 9.485388287344278e-06, | |
| "loss": 0.1497, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.05306027461019316, | |
| "grad_norm": 161315.6875, | |
| "learning_rate": 9.476074048201189e-06, | |
| "loss": 0.1396, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.053991156620898303, | |
| "grad_norm": 179211.484375, | |
| "learning_rate": 9.466759809058099e-06, | |
| "loss": 0.1463, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.05492203863160344, | |
| "grad_norm": 139493.796875, | |
| "learning_rate": 9.457445569915007e-06, | |
| "loss": 0.1385, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.05585292064230859, | |
| "grad_norm": 121290.3125, | |
| "learning_rate": 9.448131330771917e-06, | |
| "loss": 0.1257, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.05678380265301373, | |
| "grad_norm": 189379.421875, | |
| "learning_rate": 9.438817091628828e-06, | |
| "loss": 0.1332, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.057714684663718874, | |
| "grad_norm": 137769.5, | |
| "learning_rate": 9.429502852485738e-06, | |
| "loss": 0.1252, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.05864556667442402, | |
| "grad_norm": 151028.71875, | |
| "learning_rate": 9.420188613342648e-06, | |
| "loss": 0.1209, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.05934372818245288, | |
| "eval_loss": 0.11397241055965424, | |
| "eval_runtime": 5959.2697, | |
| "eval_samples_per_second": 2.661, | |
| "eval_steps_per_second": 0.083, | |
| "eval_wer": 7.744358509182342, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.05957644868512916, | |
| "grad_norm": 150909.71875, | |
| "learning_rate": 9.410874374199558e-06, | |
| "loss": 0.129, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.060507330695834305, | |
| "grad_norm": 156785.796875, | |
| "learning_rate": 9.401560135056468e-06, | |
| "loss": 0.139, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.061438212706539444, | |
| "grad_norm": 130850.4453125, | |
| "learning_rate": 9.392245895913378e-06, | |
| "loss": 0.1275, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.06236909471724459, | |
| "grad_norm": 141489.40625, | |
| "learning_rate": 9.382931656770288e-06, | |
| "loss": 0.1275, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.06329997672794974, | |
| "grad_norm": 179663.359375, | |
| "learning_rate": 9.373617417627199e-06, | |
| "loss": 0.1338, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.06423085873865488, | |
| "grad_norm": 165837.234375, | |
| "learning_rate": 9.364303178484109e-06, | |
| "loss": 0.1333, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.06516174074936001, | |
| "grad_norm": 143674.65625, | |
| "learning_rate": 9.354988939341019e-06, | |
| "loss": 0.1446, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.06609262276006517, | |
| "grad_norm": 139713.765625, | |
| "learning_rate": 9.345674700197929e-06, | |
| "loss": 0.1293, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.0670235047707703, | |
| "grad_norm": 120400.5625, | |
| "learning_rate": 9.33636046105484e-06, | |
| "loss": 0.1223, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.06795438678147545, | |
| "grad_norm": 120168.7734375, | |
| "learning_rate": 9.32704622191175e-06, | |
| "loss": 0.1311, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.06888526879218058, | |
| "grad_norm": 183192.234375, | |
| "learning_rate": 9.317731982768658e-06, | |
| "loss": 0.1373, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.06923434954619502, | |
| "eval_loss": 0.11407212913036346, | |
| "eval_runtime": 5966.9756, | |
| "eval_samples_per_second": 2.657, | |
| "eval_steps_per_second": 0.083, | |
| "eval_wer": 7.737356376714727, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.06981615080288574, | |
| "grad_norm": 147151.234375, | |
| "learning_rate": 9.308417743625568e-06, | |
| "loss": 0.1222, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.07074703281359088, | |
| "grad_norm": 110441.546875, | |
| "learning_rate": 9.299103504482478e-06, | |
| "loss": 0.1356, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.07167791482429602, | |
| "grad_norm": 134779.78125, | |
| "learning_rate": 9.289789265339388e-06, | |
| "loss": 0.152, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.07260879683500117, | |
| "grad_norm": 157262.90625, | |
| "learning_rate": 9.280475026196298e-06, | |
| "loss": 0.1425, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.07353967884570631, | |
| "grad_norm": 94122.4140625, | |
| "learning_rate": 9.271160787053209e-06, | |
| "loss": 0.1192, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.07447056085641145, | |
| "grad_norm": 129110.6875, | |
| "learning_rate": 9.261846547910119e-06, | |
| "loss": 0.1244, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.0754014428671166, | |
| "grad_norm": 137216.5625, | |
| "learning_rate": 9.252532308767029e-06, | |
| "loss": 0.1355, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.07633232487782174, | |
| "grad_norm": 108335.875, | |
| "learning_rate": 9.243218069623939e-06, | |
| "loss": 0.1094, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.07726320688852688, | |
| "grad_norm": 104589.25, | |
| "learning_rate": 9.233903830480847e-06, | |
| "loss": 0.1215, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.07819408889923202, | |
| "grad_norm": 117729.4609375, | |
| "learning_rate": 9.224589591337758e-06, | |
| "loss": 0.1471, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.07912497090993717, | |
| "grad_norm": 142323.6875, | |
| "learning_rate": 9.215275352194668e-06, | |
| "loss": 0.1302, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.07912497090993717, | |
| "eval_loss": 0.11676130443811417, | |
| "eval_runtime": 5962.6488, | |
| "eval_samples_per_second": 2.659, | |
| "eval_steps_per_second": 0.083, | |
| "eval_wer": 7.715713421814825, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.08005585292064231, | |
| "grad_norm": 138700.578125, | |
| "learning_rate": 9.205961113051578e-06, | |
| "loss": 0.141, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.08098673493134745, | |
| "grad_norm": 129724.2578125, | |
| "learning_rate": 9.196646873908488e-06, | |
| "loss": 0.1326, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.0819176169420526, | |
| "grad_norm": 156790.84375, | |
| "learning_rate": 9.187332634765398e-06, | |
| "loss": 0.1203, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.08284849895275774, | |
| "grad_norm": 142871.296875, | |
| "learning_rate": 9.178018395622308e-06, | |
| "loss": 0.1484, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.08377938096346288, | |
| "grad_norm": 106344.171875, | |
| "learning_rate": 9.168704156479218e-06, | |
| "loss": 0.1118, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.08471026297416802, | |
| "grad_norm": 300278.625, | |
| "learning_rate": 9.159389917336129e-06, | |
| "loss": 0.1431, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.08564114498487317, | |
| "grad_norm": 163834.234375, | |
| "learning_rate": 9.150075678193039e-06, | |
| "loss": 0.1313, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.08657202699557831, | |
| "grad_norm": 161322.71875, | |
| "learning_rate": 9.140761439049949e-06, | |
| "loss": 0.1366, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.08750290900628345, | |
| "grad_norm": 138388.65625, | |
| "learning_rate": 9.131447199906859e-06, | |
| "loss": 0.1302, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.0884337910169886, | |
| "grad_norm": 125799.078125, | |
| "learning_rate": 9.12213296076377e-06, | |
| "loss": 0.1569, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.0890155922736793, | |
| "eval_loss": 0.11295511573553085, | |
| "eval_runtime": 5970.6229, | |
| "eval_samples_per_second": 2.656, | |
| "eval_steps_per_second": 0.083, | |
| "eval_wer": 8.026353480378114, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.08936467302769374, | |
| "grad_norm": 126927.6484375, | |
| "learning_rate": 9.11281872162068e-06, | |
| "loss": 0.1428, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.09029555503839888, | |
| "grad_norm": 126672.28125, | |
| "learning_rate": 9.10350448247759e-06, | |
| "loss": 0.1307, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.09122643704910402, | |
| "grad_norm": 99957.9453125, | |
| "learning_rate": 9.094190243334498e-06, | |
| "loss": 0.1465, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.09215731905980917, | |
| "grad_norm": 120560.9140625, | |
| "learning_rate": 9.084876004191408e-06, | |
| "loss": 0.1205, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.09308820107051431, | |
| "grad_norm": 147541.890625, | |
| "learning_rate": 9.075561765048318e-06, | |
| "loss": 0.1418, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.09401908308121945, | |
| "grad_norm": 154493.171875, | |
| "learning_rate": 9.066247525905228e-06, | |
| "loss": 0.1512, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.0949499650919246, | |
| "grad_norm": 143224.46875, | |
| "learning_rate": 9.056933286762139e-06, | |
| "loss": 0.1372, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.09588084710262974, | |
| "grad_norm": 112816.7890625, | |
| "learning_rate": 9.047619047619049e-06, | |
| "loss": 0.1426, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.09681172911333488, | |
| "grad_norm": 125948.0703125, | |
| "learning_rate": 9.038304808475959e-06, | |
| "loss": 0.1477, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.09774261112404002, | |
| "grad_norm": 117891.203125, | |
| "learning_rate": 9.028990569332869e-06, | |
| "loss": 0.1548, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.09867349313474517, | |
| "grad_norm": 145955.125, | |
| "learning_rate": 9.019676330189779e-06, | |
| "loss": 0.134, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.09890621363742146, | |
| "eval_loss": 0.11149411648511887, | |
| "eval_runtime": 5853.7074, | |
| "eval_samples_per_second": 2.709, | |
| "eval_steps_per_second": 0.085, | |
| "eval_wer": 7.540660110124447, | |
| "step": 850 | |
| } | |
| ], | |
| "logging_steps": 8, | |
| "max_steps": 8594, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 9223372036854775807, | |
| "save_steps": 850, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.85498685407232e+20, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |