whisper-large-v3-turbo-latam / trainer_state.json
marianbasti's picture
Duplicate from marianbasti/whisper-large-v3-turbo-latam
ce57bd9 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.09890621363742146,
"eval_steps": 85,
"global_step": 850,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0009308820107051431,
"grad_norm": 246781.0625,
"learning_rate": 9.997671440214229e-06,
"loss": 0.5292,
"step": 8
},
{
"epoch": 0.0018617640214102862,
"grad_norm": 187779.84375,
"learning_rate": 9.988357201071139e-06,
"loss": 0.1916,
"step": 16
},
{
"epoch": 0.0027926460321154294,
"grad_norm": 191587.796875,
"learning_rate": 9.979042961928047e-06,
"loss": 0.1835,
"step": 24
},
{
"epoch": 0.0037235280428205724,
"grad_norm": 192646.890625,
"learning_rate": 9.969728722784958e-06,
"loss": 0.1706,
"step": 32
},
{
"epoch": 0.004654410053525715,
"grad_norm": 218090.6875,
"learning_rate": 9.960414483641868e-06,
"loss": 0.1885,
"step": 40
},
{
"epoch": 0.005585292064230859,
"grad_norm": 193089.328125,
"learning_rate": 9.951100244498778e-06,
"loss": 0.172,
"step": 48
},
{
"epoch": 0.006516174074936002,
"grad_norm": 196219.21875,
"learning_rate": 9.941786005355688e-06,
"loss": 0.1658,
"step": 56
},
{
"epoch": 0.007447056085641145,
"grad_norm": 177410.421875,
"learning_rate": 9.932471766212598e-06,
"loss": 0.1672,
"step": 64
},
{
"epoch": 0.008377938096346288,
"grad_norm": 143304.375,
"learning_rate": 9.923157527069508e-06,
"loss": 0.1645,
"step": 72
},
{
"epoch": 0.00930882010705143,
"grad_norm": 168312.640625,
"learning_rate": 9.913843287926418e-06,
"loss": 0.1578,
"step": 80
},
{
"epoch": 0.009890621363742146,
"eval_loss": 0.12741638720035553,
"eval_runtime": 5949.5202,
"eval_samples_per_second": 2.665,
"eval_steps_per_second": 0.083,
"eval_wer": 9.4490594862981,
"step": 85
},
{
"epoch": 0.010239702117756575,
"grad_norm": 155638.546875,
"learning_rate": 9.904529048783329e-06,
"loss": 0.1555,
"step": 88
},
{
"epoch": 0.011170584128461718,
"grad_norm": 125002.9296875,
"learning_rate": 9.895214809640237e-06,
"loss": 0.1572,
"step": 96
},
{
"epoch": 0.01210146613916686,
"grad_norm": 128746.8671875,
"learning_rate": 9.885900570497147e-06,
"loss": 0.1426,
"step": 104
},
{
"epoch": 0.013032348149872005,
"grad_norm": 170330.53125,
"learning_rate": 9.876586331354059e-06,
"loss": 0.1522,
"step": 112
},
{
"epoch": 0.013963230160577147,
"grad_norm": 159400.9375,
"learning_rate": 9.86727209221097e-06,
"loss": 0.1465,
"step": 120
},
{
"epoch": 0.01489411217128229,
"grad_norm": 176768.453125,
"learning_rate": 9.85795785306788e-06,
"loss": 0.1289,
"step": 128
},
{
"epoch": 0.015824994181987434,
"grad_norm": 162940.640625,
"learning_rate": 9.84864361392479e-06,
"loss": 0.1493,
"step": 136
},
{
"epoch": 0.016755876192692577,
"grad_norm": 107537.65625,
"learning_rate": 9.839329374781698e-06,
"loss": 0.1465,
"step": 144
},
{
"epoch": 0.01768675820339772,
"grad_norm": 179104.046875,
"learning_rate": 9.830015135638608e-06,
"loss": 0.144,
"step": 152
},
{
"epoch": 0.01861764021410286,
"grad_norm": 170439.359375,
"learning_rate": 9.820700896495518e-06,
"loss": 0.1578,
"step": 160
},
{
"epoch": 0.019548522224808004,
"grad_norm": 139290.828125,
"learning_rate": 9.811386657352428e-06,
"loss": 0.1392,
"step": 168
},
{
"epoch": 0.019781242727484293,
"eval_loss": 0.0959169790148735,
"eval_runtime": 5931.1781,
"eval_samples_per_second": 2.673,
"eval_steps_per_second": 0.084,
"eval_wer": 6.917470320506699,
"step": 170
},
{
"epoch": 0.02047940423551315,
"grad_norm": 141791.703125,
"learning_rate": 9.802072418209339e-06,
"loss": 0.138,
"step": 176
},
{
"epoch": 0.021410286246218293,
"grad_norm": 152705.890625,
"learning_rate": 9.792758179066249e-06,
"loss": 0.139,
"step": 184
},
{
"epoch": 0.022341168256923435,
"grad_norm": 176748.265625,
"learning_rate": 9.783443939923159e-06,
"loss": 0.1371,
"step": 192
},
{
"epoch": 0.023272050267628578,
"grad_norm": 166765.390625,
"learning_rate": 9.774129700780069e-06,
"loss": 0.1265,
"step": 200
},
{
"epoch": 0.02420293227833372,
"grad_norm": 155719.375,
"learning_rate": 9.764815461636979e-06,
"loss": 0.1326,
"step": 208
},
{
"epoch": 0.025133814289038863,
"grad_norm": 155453.3125,
"learning_rate": 9.755501222493888e-06,
"loss": 0.1279,
"step": 216
},
{
"epoch": 0.02606469629974401,
"grad_norm": 167183.234375,
"learning_rate": 9.746186983350798e-06,
"loss": 0.1385,
"step": 224
},
{
"epoch": 0.026995578310449152,
"grad_norm": 150657.203125,
"learning_rate": 9.736872744207708e-06,
"loss": 0.1378,
"step": 232
},
{
"epoch": 0.027926460321154294,
"grad_norm": 137554.734375,
"learning_rate": 9.727558505064618e-06,
"loss": 0.1358,
"step": 240
},
{
"epoch": 0.028857342331859437,
"grad_norm": 139477.828125,
"learning_rate": 9.718244265921528e-06,
"loss": 0.1427,
"step": 248
},
{
"epoch": 0.02967186409122644,
"eval_loss": 0.10620440542697906,
"eval_runtime": 5969.2466,
"eval_samples_per_second": 2.656,
"eval_steps_per_second": 0.083,
"eval_wer": 7.396161558292754,
"step": 255
},
{
"epoch": 0.02978822434256458,
"grad_norm": 151097.390625,
"learning_rate": 9.708930026778438e-06,
"loss": 0.1232,
"step": 256
},
{
"epoch": 0.030719106353269722,
"grad_norm": 137314.75,
"learning_rate": 9.699615787635348e-06,
"loss": 0.1423,
"step": 264
},
{
"epoch": 0.03164998836397487,
"grad_norm": 142663.078125,
"learning_rate": 9.690301548492259e-06,
"loss": 0.1329,
"step": 272
},
{
"epoch": 0.03258087037468001,
"grad_norm": 168839.84375,
"learning_rate": 9.680987309349169e-06,
"loss": 0.1446,
"step": 280
},
{
"epoch": 0.03351175238538515,
"grad_norm": 151602.734375,
"learning_rate": 9.671673070206077e-06,
"loss": 0.1271,
"step": 288
},
{
"epoch": 0.03444263439609029,
"grad_norm": 133047.96875,
"learning_rate": 9.662358831062987e-06,
"loss": 0.1453,
"step": 296
},
{
"epoch": 0.03537351640679544,
"grad_norm": 148903.75,
"learning_rate": 9.653044591919897e-06,
"loss": 0.1362,
"step": 304
},
{
"epoch": 0.036304398417500584,
"grad_norm": 170333.4375,
"learning_rate": 9.643730352776808e-06,
"loss": 0.1684,
"step": 312
},
{
"epoch": 0.03723528042820572,
"grad_norm": 116653.328125,
"learning_rate": 9.63441611363372e-06,
"loss": 0.147,
"step": 320
},
{
"epoch": 0.03816616243891087,
"grad_norm": 212308.03125,
"learning_rate": 9.62510187449063e-06,
"loss": 0.1441,
"step": 328
},
{
"epoch": 0.03909704444961601,
"grad_norm": 157418.203125,
"learning_rate": 9.615787635347538e-06,
"loss": 0.1268,
"step": 336
},
{
"epoch": 0.039562485454968585,
"eval_loss": 0.10842841863632202,
"eval_runtime": 5961.2518,
"eval_samples_per_second": 2.66,
"eval_steps_per_second": 0.083,
"eval_wer": 7.3318692510901045,
"step": 340
},
{
"epoch": 0.040027926460321155,
"grad_norm": 148816.703125,
"learning_rate": 9.606473396204448e-06,
"loss": 0.1297,
"step": 344
},
{
"epoch": 0.0409588084710263,
"grad_norm": 191683.796875,
"learning_rate": 9.597159157061358e-06,
"loss": 0.1593,
"step": 352
},
{
"epoch": 0.04188969048173144,
"grad_norm": 113932.3203125,
"learning_rate": 9.587844917918269e-06,
"loss": 0.129,
"step": 360
},
{
"epoch": 0.042820572492436586,
"grad_norm": 117160.140625,
"learning_rate": 9.578530678775179e-06,
"loss": 0.1244,
"step": 368
},
{
"epoch": 0.043751454503141725,
"grad_norm": 167088.71875,
"learning_rate": 9.569216439632089e-06,
"loss": 0.1189,
"step": 376
},
{
"epoch": 0.04468233651384687,
"grad_norm": 154759.875,
"learning_rate": 9.559902200488999e-06,
"loss": 0.1316,
"step": 384
},
{
"epoch": 0.04561321852455201,
"grad_norm": 130896.296875,
"learning_rate": 9.550587961345909e-06,
"loss": 0.1378,
"step": 392
},
{
"epoch": 0.046544100535257156,
"grad_norm": 130177.3203125,
"learning_rate": 9.54127372220282e-06,
"loss": 0.1364,
"step": 400
},
{
"epoch": 0.0474749825459623,
"grad_norm": 124157.765625,
"learning_rate": 9.531959483059728e-06,
"loss": 0.1321,
"step": 408
},
{
"epoch": 0.04840586455666744,
"grad_norm": 167979.953125,
"learning_rate": 9.522645243916638e-06,
"loss": 0.1409,
"step": 416
},
{
"epoch": 0.04933674656737259,
"grad_norm": 151171.75,
"learning_rate": 9.513331004773548e-06,
"loss": 0.1374,
"step": 424
},
{
"epoch": 0.04945310681871073,
"eval_loss": 0.11252985894680023,
"eval_runtime": 5961.7203,
"eval_samples_per_second": 2.66,
"eval_steps_per_second": 0.083,
"eval_wer": 7.531111747668608,
"step": 425
},
{
"epoch": 0.050267628578077726,
"grad_norm": 89195.203125,
"learning_rate": 9.504016765630458e-06,
"loss": 0.1177,
"step": 432
},
{
"epoch": 0.05119851058878287,
"grad_norm": 169561.765625,
"learning_rate": 9.494702526487368e-06,
"loss": 0.1211,
"step": 440
},
{
"epoch": 0.05212939259948802,
"grad_norm": 157022.328125,
"learning_rate": 9.485388287344278e-06,
"loss": 0.1497,
"step": 448
},
{
"epoch": 0.05306027461019316,
"grad_norm": 161315.6875,
"learning_rate": 9.476074048201189e-06,
"loss": 0.1396,
"step": 456
},
{
"epoch": 0.053991156620898303,
"grad_norm": 179211.484375,
"learning_rate": 9.466759809058099e-06,
"loss": 0.1463,
"step": 464
},
{
"epoch": 0.05492203863160344,
"grad_norm": 139493.796875,
"learning_rate": 9.457445569915007e-06,
"loss": 0.1385,
"step": 472
},
{
"epoch": 0.05585292064230859,
"grad_norm": 121290.3125,
"learning_rate": 9.448131330771917e-06,
"loss": 0.1257,
"step": 480
},
{
"epoch": 0.05678380265301373,
"grad_norm": 189379.421875,
"learning_rate": 9.438817091628828e-06,
"loss": 0.1332,
"step": 488
},
{
"epoch": 0.057714684663718874,
"grad_norm": 137769.5,
"learning_rate": 9.429502852485738e-06,
"loss": 0.1252,
"step": 496
},
{
"epoch": 0.05864556667442402,
"grad_norm": 151028.71875,
"learning_rate": 9.420188613342648e-06,
"loss": 0.1209,
"step": 504
},
{
"epoch": 0.05934372818245288,
"eval_loss": 0.11397241055965424,
"eval_runtime": 5959.2697,
"eval_samples_per_second": 2.661,
"eval_steps_per_second": 0.083,
"eval_wer": 7.744358509182342,
"step": 510
},
{
"epoch": 0.05957644868512916,
"grad_norm": 150909.71875,
"learning_rate": 9.410874374199558e-06,
"loss": 0.129,
"step": 512
},
{
"epoch": 0.060507330695834305,
"grad_norm": 156785.796875,
"learning_rate": 9.401560135056468e-06,
"loss": 0.139,
"step": 520
},
{
"epoch": 0.061438212706539444,
"grad_norm": 130850.4453125,
"learning_rate": 9.392245895913378e-06,
"loss": 0.1275,
"step": 528
},
{
"epoch": 0.06236909471724459,
"grad_norm": 141489.40625,
"learning_rate": 9.382931656770288e-06,
"loss": 0.1275,
"step": 536
},
{
"epoch": 0.06329997672794974,
"grad_norm": 179663.359375,
"learning_rate": 9.373617417627199e-06,
"loss": 0.1338,
"step": 544
},
{
"epoch": 0.06423085873865488,
"grad_norm": 165837.234375,
"learning_rate": 9.364303178484109e-06,
"loss": 0.1333,
"step": 552
},
{
"epoch": 0.06516174074936001,
"grad_norm": 143674.65625,
"learning_rate": 9.354988939341019e-06,
"loss": 0.1446,
"step": 560
},
{
"epoch": 0.06609262276006517,
"grad_norm": 139713.765625,
"learning_rate": 9.345674700197929e-06,
"loss": 0.1293,
"step": 568
},
{
"epoch": 0.0670235047707703,
"grad_norm": 120400.5625,
"learning_rate": 9.33636046105484e-06,
"loss": 0.1223,
"step": 576
},
{
"epoch": 0.06795438678147545,
"grad_norm": 120168.7734375,
"learning_rate": 9.32704622191175e-06,
"loss": 0.1311,
"step": 584
},
{
"epoch": 0.06888526879218058,
"grad_norm": 183192.234375,
"learning_rate": 9.317731982768658e-06,
"loss": 0.1373,
"step": 592
},
{
"epoch": 0.06923434954619502,
"eval_loss": 0.11407212913036346,
"eval_runtime": 5966.9756,
"eval_samples_per_second": 2.657,
"eval_steps_per_second": 0.083,
"eval_wer": 7.737356376714727,
"step": 595
},
{
"epoch": 0.06981615080288574,
"grad_norm": 147151.234375,
"learning_rate": 9.308417743625568e-06,
"loss": 0.1222,
"step": 600
},
{
"epoch": 0.07074703281359088,
"grad_norm": 110441.546875,
"learning_rate": 9.299103504482478e-06,
"loss": 0.1356,
"step": 608
},
{
"epoch": 0.07167791482429602,
"grad_norm": 134779.78125,
"learning_rate": 9.289789265339388e-06,
"loss": 0.152,
"step": 616
},
{
"epoch": 0.07260879683500117,
"grad_norm": 157262.90625,
"learning_rate": 9.280475026196298e-06,
"loss": 0.1425,
"step": 624
},
{
"epoch": 0.07353967884570631,
"grad_norm": 94122.4140625,
"learning_rate": 9.271160787053209e-06,
"loss": 0.1192,
"step": 632
},
{
"epoch": 0.07447056085641145,
"grad_norm": 129110.6875,
"learning_rate": 9.261846547910119e-06,
"loss": 0.1244,
"step": 640
},
{
"epoch": 0.0754014428671166,
"grad_norm": 137216.5625,
"learning_rate": 9.252532308767029e-06,
"loss": 0.1355,
"step": 648
},
{
"epoch": 0.07633232487782174,
"grad_norm": 108335.875,
"learning_rate": 9.243218069623939e-06,
"loss": 0.1094,
"step": 656
},
{
"epoch": 0.07726320688852688,
"grad_norm": 104589.25,
"learning_rate": 9.233903830480847e-06,
"loss": 0.1215,
"step": 664
},
{
"epoch": 0.07819408889923202,
"grad_norm": 117729.4609375,
"learning_rate": 9.224589591337758e-06,
"loss": 0.1471,
"step": 672
},
{
"epoch": 0.07912497090993717,
"grad_norm": 142323.6875,
"learning_rate": 9.215275352194668e-06,
"loss": 0.1302,
"step": 680
},
{
"epoch": 0.07912497090993717,
"eval_loss": 0.11676130443811417,
"eval_runtime": 5962.6488,
"eval_samples_per_second": 2.659,
"eval_steps_per_second": 0.083,
"eval_wer": 7.715713421814825,
"step": 680
},
{
"epoch": 0.08005585292064231,
"grad_norm": 138700.578125,
"learning_rate": 9.205961113051578e-06,
"loss": 0.141,
"step": 688
},
{
"epoch": 0.08098673493134745,
"grad_norm": 129724.2578125,
"learning_rate": 9.196646873908488e-06,
"loss": 0.1326,
"step": 696
},
{
"epoch": 0.0819176169420526,
"grad_norm": 156790.84375,
"learning_rate": 9.187332634765398e-06,
"loss": 0.1203,
"step": 704
},
{
"epoch": 0.08284849895275774,
"grad_norm": 142871.296875,
"learning_rate": 9.178018395622308e-06,
"loss": 0.1484,
"step": 712
},
{
"epoch": 0.08377938096346288,
"grad_norm": 106344.171875,
"learning_rate": 9.168704156479218e-06,
"loss": 0.1118,
"step": 720
},
{
"epoch": 0.08471026297416802,
"grad_norm": 300278.625,
"learning_rate": 9.159389917336129e-06,
"loss": 0.1431,
"step": 728
},
{
"epoch": 0.08564114498487317,
"grad_norm": 163834.234375,
"learning_rate": 9.150075678193039e-06,
"loss": 0.1313,
"step": 736
},
{
"epoch": 0.08657202699557831,
"grad_norm": 161322.71875,
"learning_rate": 9.140761439049949e-06,
"loss": 0.1366,
"step": 744
},
{
"epoch": 0.08750290900628345,
"grad_norm": 138388.65625,
"learning_rate": 9.131447199906859e-06,
"loss": 0.1302,
"step": 752
},
{
"epoch": 0.0884337910169886,
"grad_norm": 125799.078125,
"learning_rate": 9.12213296076377e-06,
"loss": 0.1569,
"step": 760
},
{
"epoch": 0.0890155922736793,
"eval_loss": 0.11295511573553085,
"eval_runtime": 5970.6229,
"eval_samples_per_second": 2.656,
"eval_steps_per_second": 0.083,
"eval_wer": 8.026353480378114,
"step": 765
},
{
"epoch": 0.08936467302769374,
"grad_norm": 126927.6484375,
"learning_rate": 9.11281872162068e-06,
"loss": 0.1428,
"step": 768
},
{
"epoch": 0.09029555503839888,
"grad_norm": 126672.28125,
"learning_rate": 9.10350448247759e-06,
"loss": 0.1307,
"step": 776
},
{
"epoch": 0.09122643704910402,
"grad_norm": 99957.9453125,
"learning_rate": 9.094190243334498e-06,
"loss": 0.1465,
"step": 784
},
{
"epoch": 0.09215731905980917,
"grad_norm": 120560.9140625,
"learning_rate": 9.084876004191408e-06,
"loss": 0.1205,
"step": 792
},
{
"epoch": 0.09308820107051431,
"grad_norm": 147541.890625,
"learning_rate": 9.075561765048318e-06,
"loss": 0.1418,
"step": 800
},
{
"epoch": 0.09401908308121945,
"grad_norm": 154493.171875,
"learning_rate": 9.066247525905228e-06,
"loss": 0.1512,
"step": 808
},
{
"epoch": 0.0949499650919246,
"grad_norm": 143224.46875,
"learning_rate": 9.056933286762139e-06,
"loss": 0.1372,
"step": 816
},
{
"epoch": 0.09588084710262974,
"grad_norm": 112816.7890625,
"learning_rate": 9.047619047619049e-06,
"loss": 0.1426,
"step": 824
},
{
"epoch": 0.09681172911333488,
"grad_norm": 125948.0703125,
"learning_rate": 9.038304808475959e-06,
"loss": 0.1477,
"step": 832
},
{
"epoch": 0.09774261112404002,
"grad_norm": 117891.203125,
"learning_rate": 9.028990569332869e-06,
"loss": 0.1548,
"step": 840
},
{
"epoch": 0.09867349313474517,
"grad_norm": 145955.125,
"learning_rate": 9.019676330189779e-06,
"loss": 0.134,
"step": 848
},
{
"epoch": 0.09890621363742146,
"eval_loss": 0.11149411648511887,
"eval_runtime": 5853.7074,
"eval_samples_per_second": 2.709,
"eval_steps_per_second": 0.085,
"eval_wer": 7.540660110124447,
"step": 850
}
],
"logging_steps": 8,
"max_steps": 8594,
"num_input_tokens_seen": 0,
"num_train_epochs": 9223372036854775807,
"save_steps": 850,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.85498685407232e+20,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}