whisper-small-bn-all-600 / trainer_state.json
Rakib's picture
End of training
ed73af4
{
"best_metric": 10.597058661151479,
"best_model_checkpoint": "./checkpoint-2800",
"epoch": 3.3819247627933176,
"global_step": 3000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03,
"learning_rate": 2.8153153153153155e-07,
"loss": 2.3778,
"step": 25
},
{
"epoch": 0.06,
"learning_rate": 5.630630630630631e-07,
"loss": 2.2187,
"step": 50
},
{
"epoch": 0.08,
"learning_rate": 8.445945945945947e-07,
"loss": 1.8708,
"step": 75
},
{
"epoch": 0.11,
"learning_rate": 1.1261261261261262e-06,
"loss": 1.4902,
"step": 100
},
{
"epoch": 0.11,
"eval_cer": 1208.7377528430404,
"eval_loss": 1.2635722160339355,
"eval_runtime": 1766.3017,
"eval_samples_per_second": 5.148,
"eval_steps_per_second": 0.161,
"eval_wer": 889.9405871635997,
"step": 100
},
{
"epoch": 0.14,
"learning_rate": 1.4076576576576579e-06,
"loss": 1.1418,
"step": 125
},
{
"epoch": 0.17,
"learning_rate": 1.6891891891891894e-06,
"loss": 0.8588,
"step": 150
},
{
"epoch": 0.2,
"learning_rate": 1.970720720720721e-06,
"loss": 0.683,
"step": 175
},
{
"epoch": 0.23,
"learning_rate": 2.2522522522522524e-06,
"loss": 0.5703,
"step": 200
},
{
"epoch": 0.23,
"eval_cer": 121.77656307170339,
"eval_loss": 0.461249440908432,
"eval_runtime": 1640.8217,
"eval_samples_per_second": 5.542,
"eval_steps_per_second": 0.174,
"eval_wer": 108.64012978243605,
"step": 200
},
{
"epoch": 0.25,
"learning_rate": 2.533783783783784e-06,
"loss": 0.4998,
"step": 225
},
{
"epoch": 0.28,
"learning_rate": 2.8153153153153158e-06,
"loss": 0.4463,
"step": 250
},
{
"epoch": 0.31,
"learning_rate": 3.096846846846847e-06,
"loss": 0.4037,
"step": 275
},
{
"epoch": 0.34,
"learning_rate": 3.3783783783783788e-06,
"loss": 0.3679,
"step": 300
},
{
"epoch": 0.34,
"eval_cer": 17.738466144072024,
"eval_loss": 0.30462178587913513,
"eval_runtime": 1238.7679,
"eval_samples_per_second": 7.34,
"eval_steps_per_second": 0.23,
"eval_wer": 35.2744074641888,
"step": 300
},
{
"epoch": 0.37,
"learning_rate": 3.65990990990991e-06,
"loss": 0.3325,
"step": 325
},
{
"epoch": 0.39,
"learning_rate": 3.941441441441442e-06,
"loss": 0.2873,
"step": 350
},
{
"epoch": 0.42,
"learning_rate": 4.222972972972974e-06,
"loss": 0.2461,
"step": 375
},
{
"epoch": 0.45,
"learning_rate": 4.504504504504505e-06,
"loss": 0.2301,
"step": 400
},
{
"epoch": 0.45,
"eval_cer": 14.885262018013623,
"eval_loss": 0.20585699379444122,
"eval_runtime": 1256.6908,
"eval_samples_per_second": 7.236,
"eval_steps_per_second": 0.227,
"eval_wer": 29.76714754683167,
"step": 400
},
{
"epoch": 0.48,
"learning_rate": 4.7860360360360364e-06,
"loss": 0.2191,
"step": 425
},
{
"epoch": 0.51,
"learning_rate": 5.067567567567568e-06,
"loss": 0.2041,
"step": 450
},
{
"epoch": 0.54,
"learning_rate": 5.3490990990991e-06,
"loss": 0.1964,
"step": 475
},
{
"epoch": 0.56,
"learning_rate": 5.6306306306306316e-06,
"loss": 0.191,
"step": 500
},
{
"epoch": 0.56,
"eval_cer": 12.756076918243874,
"eval_loss": 0.16926029324531555,
"eval_runtime": 1255.9571,
"eval_samples_per_second": 7.24,
"eval_steps_per_second": 0.227,
"eval_wer": 25.452815845688004,
"step": 500
},
{
"epoch": 0.59,
"learning_rate": 5.912162162162162e-06,
"loss": 0.1829,
"step": 525
},
{
"epoch": 0.62,
"learning_rate": 6.193693693693694e-06,
"loss": 0.1734,
"step": 550
},
{
"epoch": 0.65,
"learning_rate": 6.475225225225226e-06,
"loss": 0.1689,
"step": 575
},
{
"epoch": 0.68,
"learning_rate": 6.7567567567567575e-06,
"loss": 0.1605,
"step": 600
},
{
"epoch": 0.68,
"eval_cer": 11.58800028088538,
"eval_loss": 0.14617891609668732,
"eval_runtime": 1253.5295,
"eval_samples_per_second": 7.254,
"eval_steps_per_second": 0.227,
"eval_wer": 22.78454671700549,
"step": 600
},
{
"epoch": 0.7,
"learning_rate": 7.038288288288288e-06,
"loss": 0.158,
"step": 625
},
{
"epoch": 0.73,
"learning_rate": 7.31981981981982e-06,
"loss": 0.153,
"step": 650
},
{
"epoch": 0.76,
"learning_rate": 7.601351351351352e-06,
"loss": 0.1509,
"step": 675
},
{
"epoch": 0.79,
"learning_rate": 7.882882882882884e-06,
"loss": 0.146,
"step": 700
},
{
"epoch": 0.79,
"eval_cer": 10.432119982407704,
"eval_loss": 0.13004416227340698,
"eval_runtime": 1254.5015,
"eval_samples_per_second": 7.248,
"eval_steps_per_second": 0.227,
"eval_wer": 20.554123437705705,
"step": 700
},
{
"epoch": 0.82,
"learning_rate": 8.164414414414416e-06,
"loss": 0.1417,
"step": 725
},
{
"epoch": 0.84,
"learning_rate": 8.445945945945948e-06,
"loss": 0.1392,
"step": 750
},
{
"epoch": 0.87,
"learning_rate": 8.727477477477478e-06,
"loss": 0.1355,
"step": 775
},
{
"epoch": 0.9,
"learning_rate": 9.00900900900901e-06,
"loss": 0.1296,
"step": 800
},
{
"epoch": 0.9,
"eval_cer": 9.857228917889072,
"eval_loss": 0.11560462415218353,
"eval_runtime": 1268.8039,
"eval_samples_per_second": 7.167,
"eval_steps_per_second": 0.225,
"eval_wer": 19.214255683290087,
"step": 800
},
{
"epoch": 0.93,
"learning_rate": 9.290540540540541e-06,
"loss": 0.1262,
"step": 825
},
{
"epoch": 0.96,
"learning_rate": 9.572072072072073e-06,
"loss": 0.1247,
"step": 850
},
{
"epoch": 0.99,
"learning_rate": 9.853603603603605e-06,
"loss": 0.1223,
"step": 875
},
{
"epoch": 1.01,
"learning_rate": 9.999203468625017e-06,
"loss": 0.1212,
"step": 900
},
{
"epoch": 1.01,
"eval_cer": 8.946199362094518,
"eval_loss": 0.10553693026304245,
"eval_runtime": 1266.5433,
"eval_samples_per_second": 7.179,
"eval_steps_per_second": 0.225,
"eval_wer": 17.400444216060578,
"step": 900
},
{
"epoch": 1.04,
"learning_rate": 9.992429130775193e-06,
"loss": 0.1149,
"step": 925
},
{
"epoch": 1.07,
"learning_rate": 9.978751539864958e-06,
"loss": 0.1105,
"step": 950
},
{
"epoch": 1.1,
"learning_rate": 9.958189608505554e-06,
"loss": 0.1092,
"step": 975
},
{
"epoch": 1.13,
"learning_rate": 9.930771768590934e-06,
"loss": 0.1072,
"step": 1000
},
{
"epoch": 1.13,
"eval_cer": 8.267454624075574,
"eval_loss": 0.09784528613090515,
"eval_runtime": 1262.057,
"eval_samples_per_second": 7.205,
"eval_steps_per_second": 0.226,
"eval_wer": 15.92340458060653,
"step": 1000
},
{
"epoch": 1.16,
"learning_rate": 9.896535931983703e-06,
"loss": 0.107,
"step": 1025
},
{
"epoch": 1.18,
"learning_rate": 9.855529438092723e-06,
"loss": 0.1036,
"step": 1050
},
{
"epoch": 1.21,
"learning_rate": 9.807808988414811e-06,
"loss": 0.1035,
"step": 1075
},
{
"epoch": 1.24,
"learning_rate": 9.753440568131056e-06,
"loss": 0.1013,
"step": 1100
},
{
"epoch": 1.24,
"eval_cer": 7.791797407723609,
"eval_loss": 0.09121902287006378,
"eval_runtime": 1263.6249,
"eval_samples_per_second": 7.196,
"eval_steps_per_second": 0.226,
"eval_wer": 15.060453304228547,
"step": 1100
},
{
"epoch": 1.27,
"learning_rate": 9.692499354866194e-06,
"loss": 0.099,
"step": 1125
},
{
"epoch": 1.3,
"learning_rate": 9.62506961473717e-06,
"loss": 0.0989,
"step": 1150
},
{
"epoch": 1.32,
"learning_rate": 9.551244585834649e-06,
"loss": 0.097,
"step": 1175
},
{
"epoch": 1.35,
"learning_rate": 9.471126349298557e-06,
"loss": 0.0952,
"step": 1200
},
{
"epoch": 1.35,
"eval_cer": 7.549718560240675,
"eval_loss": 0.0853806585073471,
"eval_runtime": 1267.0907,
"eval_samples_per_second": 7.176,
"eval_steps_per_second": 0.225,
"eval_wer": 14.320659444262777,
"step": 1200
},
{
"epoch": 1.38,
"learning_rate": 9.384825688165987e-06,
"loss": 0.0946,
"step": 1225
},
{
"epoch": 1.41,
"learning_rate": 9.292461934186572e-06,
"loss": 0.0922,
"step": 1250
},
{
"epoch": 1.44,
"learning_rate": 9.194162802817177e-06,
"loss": 0.0919,
"step": 1275
},
{
"epoch": 1.47,
"learning_rate": 9.090064216624093e-06,
"loss": 0.0915,
"step": 1300
},
{
"epoch": 1.47,
"eval_cer": 6.983327974335947,
"eval_loss": 0.0808597058057785,
"eval_runtime": 1272.1477,
"eval_samples_per_second": 7.148,
"eval_steps_per_second": 0.224,
"eval_wer": 13.316289479383872,
"step": 1300
},
{
"epoch": 1.49,
"learning_rate": 8.980310117336864e-06,
"loss": 0.0894,
"step": 1325
},
{
"epoch": 1.52,
"learning_rate": 8.865052266813686e-06,
"loss": 0.0884,
"step": 1350
},
{
"epoch": 1.55,
"learning_rate": 8.744450037193558e-06,
"loss": 0.0872,
"step": 1375
},
{
"epoch": 1.58,
"learning_rate": 8.61867019052535e-06,
"loss": 0.0843,
"step": 1400
},
{
"epoch": 1.58,
"eval_cer": 6.642200071699689,
"eval_loss": 0.07798563688993454,
"eval_runtime": 1266.8664,
"eval_samples_per_second": 7.178,
"eval_steps_per_second": 0.225,
"eval_wer": 12.717914307919024,
"step": 1400
},
{
"epoch": 1.61,
"learning_rate": 8.48788664817855e-06,
"loss": 0.0858,
"step": 1425
},
{
"epoch": 1.63,
"learning_rate": 8.352280250354445e-06,
"loss": 0.0849,
"step": 1450
},
{
"epoch": 1.66,
"learning_rate": 8.212038506030386e-06,
"loss": 0.0843,
"step": 1475
},
{
"epoch": 1.69,
"learning_rate": 8.067355333682799e-06,
"loss": 0.0819,
"step": 1500
},
{
"epoch": 1.69,
"eval_cer": 6.728683201945501,
"eval_loss": 0.07437845319509506,
"eval_runtime": 1268.04,
"eval_samples_per_second": 7.171,
"eval_steps_per_second": 0.225,
"eval_wer": 12.65888368419041,
"step": 1500
},
{
"epoch": 1.72,
"learning_rate": 7.91843079314751e-06,
"loss": 0.0814,
"step": 1525
},
{
"epoch": 1.75,
"learning_rate": 7.765470808988156e-06,
"loss": 0.0814,
"step": 1550
},
{
"epoch": 1.78,
"learning_rate": 7.608686885755146e-06,
"loss": 0.0795,
"step": 1575
},
{
"epoch": 1.8,
"learning_rate": 7.448295815528956e-06,
"loss": 0.0798,
"step": 1600
},
{
"epoch": 1.8,
"eval_cer": 6.496213591156545,
"eval_loss": 0.07181866466999054,
"eval_runtime": 1262.2284,
"eval_samples_per_second": 7.204,
"eval_steps_per_second": 0.226,
"eval_wer": 12.302151857341244,
"step": 1600
},
{
"epoch": 1.83,
"learning_rate": 7.284519378152104e-06,
"loss": 0.0798,
"step": 1625
},
{
"epoch": 1.86,
"learning_rate": 7.117584034564329e-06,
"loss": 0.0794,
"step": 1650
},
{
"epoch": 1.89,
"learning_rate": 6.947720613665016e-06,
"loss": 0.0778,
"step": 1675
},
{
"epoch": 1.92,
"learning_rate": 6.775163993135843e-06,
"loss": 0.0774,
"step": 1700
},
{
"epoch": 1.92,
"eval_cer": 6.219763243191301,
"eval_loss": 0.06941211223602295,
"eval_runtime": 1257.6258,
"eval_samples_per_second": 7.23,
"eval_steps_per_second": 0.227,
"eval_wer": 11.841373247661071,
"step": 1700
},
{
"epoch": 1.94,
"learning_rate": 6.600152774664997e-06,
"loss": 0.0769,
"step": 1725
},
{
"epoch": 1.97,
"learning_rate": 6.422928954022047e-06,
"loss": 0.0778,
"step": 1750
},
{
"epoch": 2.0,
"learning_rate": 6.243737586439663e-06,
"loss": 0.0784,
"step": 1775
},
{
"epoch": 2.03,
"learning_rate": 6.062826447764883e-06,
"loss": 0.0695,
"step": 1800
},
{
"epoch": 2.03,
"eval_cer": 6.13457366403891,
"eval_loss": 0.06795131415128708,
"eval_runtime": 1263.7565,
"eval_samples_per_second": 7.195,
"eval_steps_per_second": 0.226,
"eval_wer": 11.568303527822959,
"step": 1800
},
{
"epoch": 2.06,
"learning_rate": 5.880445691848471e-06,
"loss": 0.0711,
"step": 1825
},
{
"epoch": 2.09,
"learning_rate": 5.696847504646093e-06,
"loss": 0.069,
"step": 1850
},
{
"epoch": 2.11,
"learning_rate": 5.512285755509618e-06,
"loss": 0.0709,
"step": 1875
},
{
"epoch": 2.14,
"learning_rate": 5.327015646150716e-06,
"loss": 0.0686,
"step": 1900
},
{
"epoch": 2.14,
"eval_cer": 5.975836465574909,
"eval_loss": 0.06619075685739517,
"eval_runtime": 1261.7232,
"eval_samples_per_second": 7.207,
"eval_steps_per_second": 0.226,
"eval_wer": 11.248518925897457,
"step": 1900
},
{
"epoch": 2.17,
"learning_rate": 5.1412933577621346e-06,
"loss": 0.0678,
"step": 1925
},
{
"epoch": 2.2,
"learning_rate": 4.955375696784614e-06,
"loss": 0.0681,
"step": 1950
},
{
"epoch": 2.23,
"learning_rate": 4.769519739809227e-06,
"loss": 0.0676,
"step": 1975
},
{
"epoch": 2.25,
"learning_rate": 4.583982478106189e-06,
"loss": 0.0681,
"step": 2000
},
{
"epoch": 2.25,
"eval_cer": 6.059917286647226,
"eval_loss": 0.06468059122562408,
"eval_runtime": 1264.0065,
"eval_samples_per_second": 7.194,
"eval_steps_per_second": 0.225,
"eval_wer": 11.284192108582374,
"step": 2000
},
{
"epoch": 2.28,
"learning_rate": 4.3990204622716405e-06,
"loss": 0.0667,
"step": 2025
},
{
"epoch": 2.31,
"learning_rate": 4.214889447483755e-06,
"loss": 0.0673,
"step": 2050
},
{
"epoch": 2.34,
"learning_rate": 4.031844039858726e-06,
"loss": 0.068,
"step": 2075
},
{
"epoch": 2.37,
"learning_rate": 3.850137344395598e-06,
"loss": 0.0661,
"step": 2100
},
{
"epoch": 2.37,
"eval_cer": 5.949965443706504,
"eval_loss": 0.06391420215368271,
"eval_runtime": 1259.247,
"eval_samples_per_second": 7.221,
"eval_steps_per_second": 0.226,
"eval_wer": 11.13555384739522,
"step": 2100
},
{
"epoch": 2.4,
"learning_rate": 3.6700206149967698e-06,
"loss": 0.0657,
"step": 2125
},
{
"epoch": 2.42,
"learning_rate": 3.4917429070480825e-06,
"loss": 0.0648,
"step": 2150
},
{
"epoch": 2.45,
"learning_rate": 3.3155507330389004e-06,
"loss": 0.0658,
"step": 2175
},
{
"epoch": 2.48,
"learning_rate": 3.141687721698363e-06,
"loss": 0.0653,
"step": 2200
},
{
"epoch": 2.48,
"eval_cer": 5.811370683697191,
"eval_loss": 0.06310658901929855,
"eval_runtime": 1258.5174,
"eval_samples_per_second": 7.225,
"eval_steps_per_second": 0.226,
"eval_wer": 10.895184545018283,
"step": 2200
},
{
"epoch": 2.51,
"learning_rate": 2.9703942811191423e-06,
"loss": 0.0668,
"step": 2225
},
{
"epoch": 2.54,
"learning_rate": 2.801907266334516e-06,
"loss": 0.0651,
"step": 2250
},
{
"epoch": 2.56,
"learning_rate": 2.6364596518084124e-06,
"loss": 0.0646,
"step": 2275
},
{
"epoch": 2.59,
"learning_rate": 2.474280209291299e-06,
"loss": 0.0636,
"step": 2300
},
{
"epoch": 2.59,
"eval_cer": 5.850177216499799,
"eval_loss": 0.06220058351755142,
"eval_runtime": 1261.9688,
"eval_samples_per_second": 7.205,
"eval_steps_per_second": 0.226,
"eval_wer": 10.938501981135682,
"step": 2300
},
{
"epoch": 2.62,
"learning_rate": 2.3155931914873297e-06,
"loss": 0.0649,
"step": 2325
},
{
"epoch": 2.65,
"learning_rate": 2.1606180219702057e-06,
"loss": 0.065,
"step": 2350
},
{
"epoch": 2.68,
"learning_rate": 2.009568991776456e-06,
"loss": 0.0638,
"step": 2375
},
{
"epoch": 2.71,
"learning_rate": 1.8626549630957397e-06,
"loss": 0.0641,
"step": 2400
},
{
"epoch": 2.71,
"eval_cer": 5.738192650412273,
"eval_loss": 0.06147067993879318,
"eval_runtime": 1257.8302,
"eval_samples_per_second": 7.229,
"eval_steps_per_second": 0.227,
"eval_wer": 10.701530124728736,
"step": 2400
},
{
"epoch": 2.73,
"learning_rate": 1.720079080467828e-06,
"loss": 0.0624,
"step": 2425
},
{
"epoch": 2.76,
"learning_rate": 1.5820384898856433e-06,
"loss": 0.0638,
"step": 2450
},
{
"epoch": 2.79,
"learning_rate": 1.4487240661927627e-06,
"loss": 0.063,
"step": 2475
},
{
"epoch": 2.82,
"learning_rate": 1.3203201491523027e-06,
"loss": 0.0633,
"step": 2500
},
{
"epoch": 2.82,
"eval_cer": 5.703821149929963,
"eval_loss": 0.06121416017413139,
"eval_runtime": 1253.1864,
"eval_samples_per_second": 7.256,
"eval_steps_per_second": 0.227,
"eval_wer": 10.645472266223868,
"step": 2500
},
{
"epoch": 2.85,
"learning_rate": 1.197004288552167e-06,
"loss": 0.0637,
"step": 2525
},
{
"epoch": 2.87,
"learning_rate": 1.078946998699073e-06,
"loss": 0.0635,
"step": 2550
},
{
"epoch": 2.9,
"learning_rate": 9.663115226408732e-07,
"loss": 0.0633,
"step": 2575
},
{
"epoch": 2.93,
"learning_rate": 8.592536064431467e-07,
"loss": 0.0626,
"step": 2600
},
{
"epoch": 2.93,
"eval_cer": 5.8058268932968184,
"eval_loss": 0.06081194430589676,
"eval_runtime": 1267.5166,
"eval_samples_per_second": 7.174,
"eval_steps_per_second": 0.225,
"eval_wer": 10.759711386964849,
"step": 2600
},
{
"epoch": 2.96,
"learning_rate": 7.579212838322164e-07,
"loss": 0.0614,
"step": 2625
},
{
"epoch": 2.99,
"learning_rate": 6.624546715023544e-07,
"loss": 0.0626,
"step": 2650
},
{
"epoch": 3.02,
"learning_rate": 5.729857753702118e-07,
"loss": 0.0616,
"step": 2675
},
{
"epoch": 3.04,
"learning_rate": 4.896383080443934e-07,
"loss": 0.06,
"step": 2700
},
{
"epoch": 3.04,
"eval_cer": 5.732833653025247,
"eval_loss": 0.06054000183939934,
"eval_runtime": 1260.8932,
"eval_samples_per_second": 7.212,
"eval_steps_per_second": 0.226,
"eval_wer": 10.637403332045135,
"step": 2700
},
{
"epoch": 3.07,
"learning_rate": 4.1252751776254373e-07,
"loss": 0.059,
"step": 2725
},
{
"epoch": 3.1,
"learning_rate": 3.417600290325063e-07,
"loss": 0.0609,
"step": 2750
},
{
"epoch": 3.13,
"learning_rate": 2.7743369519788397e-07,
"loss": 0.0605,
"step": 2775
},
{
"epoch": 3.16,
"learning_rate": 2.1963746313188762e-07,
"loss": 0.0584,
"step": 2800
},
{
"epoch": 3.16,
"eval_cer": 5.673699888754606,
"eval_loss": 0.06046656146645546,
"eval_runtime": 1262.3183,
"eval_samples_per_second": 7.203,
"eval_steps_per_second": 0.226,
"eval_wer": 10.597058661151479,
"step": 2800
},
{
"epoch": 3.18,
"learning_rate": 1.684512502465513e-07,
"loss": 0.0595,
"step": 2825
},
{
"epoch": 3.21,
"learning_rate": 1.2394583398738114e-07,
"loss": 0.0599,
"step": 2850
},
{
"epoch": 3.24,
"learning_rate": 8.618275396624742e-08,
"loss": 0.0588,
"step": 2875
},
{
"epoch": 3.27,
"learning_rate": 5.521422686783295e-08,
"loss": 0.0585,
"step": 2900
},
{
"epoch": 3.27,
"eval_cer": 5.687744157768883,
"eval_loss": 0.06037650257349014,
"eval_runtime": 1261.1509,
"eval_samples_per_second": 7.21,
"eval_steps_per_second": 0.226,
"eval_wer": 10.609799083538949,
"step": 2900
},
{
"epoch": 3.3,
"learning_rate": 3.1083074247311715e-08,
"loss": 0.0599,
"step": 2925
},
{
"epoch": 3.33,
"learning_rate": 1.382266331908133e-08,
"loss": 0.0613,
"step": 2950
},
{
"epoch": 3.35,
"learning_rate": 3.4568608184420983e-09,
"loss": 0.0598,
"step": 2975
},
{
"epoch": 3.38,
"learning_rate": 0.0,
"loss": 0.0598,
"step": 3000
},
{
"epoch": 3.38,
"eval_cer": 5.707517010196878,
"eval_loss": 0.06033782660961151,
"eval_runtime": 1268.7447,
"eval_samples_per_second": 7.167,
"eval_steps_per_second": 0.225,
"eval_wer": 10.604278233837713,
"step": 3000
},
{
"epoch": 3.38,
"step": 3000,
"total_flos": 4.435355593138176e+20,
"train_loss": 0.1956545435587565,
"train_runtime": 112740.1771,
"train_samples_per_second": 13.624,
"train_steps_per_second": 0.027
}
],
"max_steps": 3000,
"num_train_epochs": 4,
"total_flos": 4.435355593138176e+20,
"trial_name": null,
"trial_params": null
}