{ "best_metric": 10.597058661151479, "best_model_checkpoint": "./checkpoint-2800", "epoch": 3.3819247627933176, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 2.8153153153153155e-07, "loss": 2.3778, "step": 25 }, { "epoch": 0.06, "learning_rate": 5.630630630630631e-07, "loss": 2.2187, "step": 50 }, { "epoch": 0.08, "learning_rate": 8.445945945945947e-07, "loss": 1.8708, "step": 75 }, { "epoch": 0.11, "learning_rate": 1.1261261261261262e-06, "loss": 1.4902, "step": 100 }, { "epoch": 0.11, "eval_cer": 1208.7377528430404, "eval_loss": 1.2635722160339355, "eval_runtime": 1766.3017, "eval_samples_per_second": 5.148, "eval_steps_per_second": 0.161, "eval_wer": 889.9405871635997, "step": 100 }, { "epoch": 0.14, "learning_rate": 1.4076576576576579e-06, "loss": 1.1418, "step": 125 }, { "epoch": 0.17, "learning_rate": 1.6891891891891894e-06, "loss": 0.8588, "step": 150 }, { "epoch": 0.2, "learning_rate": 1.970720720720721e-06, "loss": 0.683, "step": 175 }, { "epoch": 0.23, "learning_rate": 2.2522522522522524e-06, "loss": 0.5703, "step": 200 }, { "epoch": 0.23, "eval_cer": 121.77656307170339, "eval_loss": 0.461249440908432, "eval_runtime": 1640.8217, "eval_samples_per_second": 5.542, "eval_steps_per_second": 0.174, "eval_wer": 108.64012978243605, "step": 200 }, { "epoch": 0.25, "learning_rate": 2.533783783783784e-06, "loss": 0.4998, "step": 225 }, { "epoch": 0.28, "learning_rate": 2.8153153153153158e-06, "loss": 0.4463, "step": 250 }, { "epoch": 0.31, "learning_rate": 3.096846846846847e-06, "loss": 0.4037, "step": 275 }, { "epoch": 0.34, "learning_rate": 3.3783783783783788e-06, "loss": 0.3679, "step": 300 }, { "epoch": 0.34, "eval_cer": 17.738466144072024, "eval_loss": 0.30462178587913513, "eval_runtime": 1238.7679, "eval_samples_per_second": 7.34, "eval_steps_per_second": 0.23, "eval_wer": 35.2744074641888, "step": 300 }, { "epoch": 0.37, "learning_rate": 3.65990990990991e-06, "loss": 0.3325, "step": 325 }, { "epoch": 0.39, "learning_rate": 3.941441441441442e-06, "loss": 0.2873, "step": 350 }, { "epoch": 0.42, "learning_rate": 4.222972972972974e-06, "loss": 0.2461, "step": 375 }, { "epoch": 0.45, "learning_rate": 4.504504504504505e-06, "loss": 0.2301, "step": 400 }, { "epoch": 0.45, "eval_cer": 14.885262018013623, "eval_loss": 0.20585699379444122, "eval_runtime": 1256.6908, "eval_samples_per_second": 7.236, "eval_steps_per_second": 0.227, "eval_wer": 29.76714754683167, "step": 400 }, { "epoch": 0.48, "learning_rate": 4.7860360360360364e-06, "loss": 0.2191, "step": 425 }, { "epoch": 0.51, "learning_rate": 5.067567567567568e-06, "loss": 0.2041, "step": 450 }, { "epoch": 0.54, "learning_rate": 5.3490990990991e-06, "loss": 0.1964, "step": 475 }, { "epoch": 0.56, "learning_rate": 5.6306306306306316e-06, "loss": 0.191, "step": 500 }, { "epoch": 0.56, "eval_cer": 12.756076918243874, "eval_loss": 0.16926029324531555, "eval_runtime": 1255.9571, "eval_samples_per_second": 7.24, "eval_steps_per_second": 0.227, "eval_wer": 25.452815845688004, "step": 500 }, { "epoch": 0.59, "learning_rate": 5.912162162162162e-06, "loss": 0.1829, "step": 525 }, { "epoch": 0.62, "learning_rate": 6.193693693693694e-06, "loss": 0.1734, "step": 550 }, { "epoch": 0.65, "learning_rate": 6.475225225225226e-06, "loss": 0.1689, "step": 575 }, { "epoch": 0.68, "learning_rate": 6.7567567567567575e-06, "loss": 0.1605, "step": 600 }, { "epoch": 0.68, "eval_cer": 11.58800028088538, "eval_loss": 0.14617891609668732, "eval_runtime": 1253.5295, "eval_samples_per_second": 7.254, "eval_steps_per_second": 0.227, "eval_wer": 22.78454671700549, "step": 600 }, { "epoch": 0.7, "learning_rate": 7.038288288288288e-06, "loss": 0.158, "step": 625 }, { "epoch": 0.73, "learning_rate": 7.31981981981982e-06, "loss": 0.153, "step": 650 }, { "epoch": 0.76, "learning_rate": 7.601351351351352e-06, "loss": 0.1509, "step": 675 }, { "epoch": 0.79, "learning_rate": 7.882882882882884e-06, "loss": 0.146, "step": 700 }, { "epoch": 0.79, "eval_cer": 10.432119982407704, "eval_loss": 0.13004416227340698, "eval_runtime": 1254.5015, "eval_samples_per_second": 7.248, "eval_steps_per_second": 0.227, "eval_wer": 20.554123437705705, "step": 700 }, { "epoch": 0.82, "learning_rate": 8.164414414414416e-06, "loss": 0.1417, "step": 725 }, { "epoch": 0.84, "learning_rate": 8.445945945945948e-06, "loss": 0.1392, "step": 750 }, { "epoch": 0.87, "learning_rate": 8.727477477477478e-06, "loss": 0.1355, "step": 775 }, { "epoch": 0.9, "learning_rate": 9.00900900900901e-06, "loss": 0.1296, "step": 800 }, { "epoch": 0.9, "eval_cer": 9.857228917889072, "eval_loss": 0.11560462415218353, "eval_runtime": 1268.8039, "eval_samples_per_second": 7.167, "eval_steps_per_second": 0.225, "eval_wer": 19.214255683290087, "step": 800 }, { "epoch": 0.93, "learning_rate": 9.290540540540541e-06, "loss": 0.1262, "step": 825 }, { "epoch": 0.96, "learning_rate": 9.572072072072073e-06, "loss": 0.1247, "step": 850 }, { "epoch": 0.99, "learning_rate": 9.853603603603605e-06, "loss": 0.1223, "step": 875 }, { "epoch": 1.01, "learning_rate": 9.999203468625017e-06, "loss": 0.1212, "step": 900 }, { "epoch": 1.01, "eval_cer": 8.946199362094518, "eval_loss": 0.10553693026304245, "eval_runtime": 1266.5433, "eval_samples_per_second": 7.179, "eval_steps_per_second": 0.225, "eval_wer": 17.400444216060578, "step": 900 }, { "epoch": 1.04, "learning_rate": 9.992429130775193e-06, "loss": 0.1149, "step": 925 }, { "epoch": 1.07, "learning_rate": 9.978751539864958e-06, "loss": 0.1105, "step": 950 }, { "epoch": 1.1, "learning_rate": 9.958189608505554e-06, "loss": 0.1092, "step": 975 }, { "epoch": 1.13, "learning_rate": 9.930771768590934e-06, "loss": 0.1072, "step": 1000 }, { "epoch": 1.13, "eval_cer": 8.267454624075574, "eval_loss": 0.09784528613090515, "eval_runtime": 1262.057, "eval_samples_per_second": 7.205, "eval_steps_per_second": 0.226, "eval_wer": 15.92340458060653, "step": 1000 }, { "epoch": 1.16, "learning_rate": 9.896535931983703e-06, "loss": 0.107, "step": 1025 }, { "epoch": 1.18, "learning_rate": 9.855529438092723e-06, "loss": 0.1036, "step": 1050 }, { "epoch": 1.21, "learning_rate": 9.807808988414811e-06, "loss": 0.1035, "step": 1075 }, { "epoch": 1.24, "learning_rate": 9.753440568131056e-06, "loss": 0.1013, "step": 1100 }, { "epoch": 1.24, "eval_cer": 7.791797407723609, "eval_loss": 0.09121902287006378, "eval_runtime": 1263.6249, "eval_samples_per_second": 7.196, "eval_steps_per_second": 0.226, "eval_wer": 15.060453304228547, "step": 1100 }, { "epoch": 1.27, "learning_rate": 9.692499354866194e-06, "loss": 0.099, "step": 1125 }, { "epoch": 1.3, "learning_rate": 9.62506961473717e-06, "loss": 0.0989, "step": 1150 }, { "epoch": 1.32, "learning_rate": 9.551244585834649e-06, "loss": 0.097, "step": 1175 }, { "epoch": 1.35, "learning_rate": 9.471126349298557e-06, "loss": 0.0952, "step": 1200 }, { "epoch": 1.35, "eval_cer": 7.549718560240675, "eval_loss": 0.0853806585073471, "eval_runtime": 1267.0907, "eval_samples_per_second": 7.176, "eval_steps_per_second": 0.225, "eval_wer": 14.320659444262777, "step": 1200 }, { "epoch": 1.38, "learning_rate": 9.384825688165987e-06, "loss": 0.0946, "step": 1225 }, { "epoch": 1.41, "learning_rate": 9.292461934186572e-06, "loss": 0.0922, "step": 1250 }, { "epoch": 1.44, "learning_rate": 9.194162802817177e-06, "loss": 0.0919, "step": 1275 }, { "epoch": 1.47, "learning_rate": 9.090064216624093e-06, "loss": 0.0915, "step": 1300 }, { "epoch": 1.47, "eval_cer": 6.983327974335947, "eval_loss": 0.0808597058057785, "eval_runtime": 1272.1477, "eval_samples_per_second": 7.148, "eval_steps_per_second": 0.224, "eval_wer": 13.316289479383872, "step": 1300 }, { "epoch": 1.49, "learning_rate": 8.980310117336864e-06, "loss": 0.0894, "step": 1325 }, { "epoch": 1.52, "learning_rate": 8.865052266813686e-06, "loss": 0.0884, "step": 1350 }, { "epoch": 1.55, "learning_rate": 8.744450037193558e-06, "loss": 0.0872, "step": 1375 }, { "epoch": 1.58, "learning_rate": 8.61867019052535e-06, "loss": 0.0843, "step": 1400 }, { "epoch": 1.58, "eval_cer": 6.642200071699689, "eval_loss": 0.07798563688993454, "eval_runtime": 1266.8664, "eval_samples_per_second": 7.178, "eval_steps_per_second": 0.225, "eval_wer": 12.717914307919024, "step": 1400 }, { "epoch": 1.61, "learning_rate": 8.48788664817855e-06, "loss": 0.0858, "step": 1425 }, { "epoch": 1.63, "learning_rate": 8.352280250354445e-06, "loss": 0.0849, "step": 1450 }, { "epoch": 1.66, "learning_rate": 8.212038506030386e-06, "loss": 0.0843, "step": 1475 }, { "epoch": 1.69, "learning_rate": 8.067355333682799e-06, "loss": 0.0819, "step": 1500 }, { "epoch": 1.69, "eval_cer": 6.728683201945501, "eval_loss": 0.07437845319509506, "eval_runtime": 1268.04, "eval_samples_per_second": 7.171, "eval_steps_per_second": 0.225, "eval_wer": 12.65888368419041, "step": 1500 }, { "epoch": 1.72, "learning_rate": 7.91843079314751e-06, "loss": 0.0814, "step": 1525 }, { "epoch": 1.75, "learning_rate": 7.765470808988156e-06, "loss": 0.0814, "step": 1550 }, { "epoch": 1.78, "learning_rate": 7.608686885755146e-06, "loss": 0.0795, "step": 1575 }, { "epoch": 1.8, "learning_rate": 7.448295815528956e-06, "loss": 0.0798, "step": 1600 }, { "epoch": 1.8, "eval_cer": 6.496213591156545, "eval_loss": 0.07181866466999054, "eval_runtime": 1262.2284, "eval_samples_per_second": 7.204, "eval_steps_per_second": 0.226, "eval_wer": 12.302151857341244, "step": 1600 }, { "epoch": 1.83, "learning_rate": 7.284519378152104e-06, "loss": 0.0798, "step": 1625 }, { "epoch": 1.86, "learning_rate": 7.117584034564329e-06, "loss": 0.0794, "step": 1650 }, { "epoch": 1.89, "learning_rate": 6.947720613665016e-06, "loss": 0.0778, "step": 1675 }, { "epoch": 1.92, "learning_rate": 6.775163993135843e-06, "loss": 0.0774, "step": 1700 }, { "epoch": 1.92, "eval_cer": 6.219763243191301, "eval_loss": 0.06941211223602295, "eval_runtime": 1257.6258, "eval_samples_per_second": 7.23, "eval_steps_per_second": 0.227, "eval_wer": 11.841373247661071, "step": 1700 }, { "epoch": 1.94, "learning_rate": 6.600152774664997e-06, "loss": 0.0769, "step": 1725 }, { "epoch": 1.97, "learning_rate": 6.422928954022047e-06, "loss": 0.0778, "step": 1750 }, { "epoch": 2.0, "learning_rate": 6.243737586439663e-06, "loss": 0.0784, "step": 1775 }, { "epoch": 2.03, "learning_rate": 6.062826447764883e-06, "loss": 0.0695, "step": 1800 }, { "epoch": 2.03, "eval_cer": 6.13457366403891, "eval_loss": 0.06795131415128708, "eval_runtime": 1263.7565, "eval_samples_per_second": 7.195, "eval_steps_per_second": 0.226, "eval_wer": 11.568303527822959, "step": 1800 }, { "epoch": 2.06, "learning_rate": 5.880445691848471e-06, "loss": 0.0711, "step": 1825 }, { "epoch": 2.09, "learning_rate": 5.696847504646093e-06, "loss": 0.069, "step": 1850 }, { "epoch": 2.11, "learning_rate": 5.512285755509618e-06, "loss": 0.0709, "step": 1875 }, { "epoch": 2.14, "learning_rate": 5.327015646150716e-06, "loss": 0.0686, "step": 1900 }, { "epoch": 2.14, "eval_cer": 5.975836465574909, "eval_loss": 0.06619075685739517, "eval_runtime": 1261.7232, "eval_samples_per_second": 7.207, "eval_steps_per_second": 0.226, "eval_wer": 11.248518925897457, "step": 1900 }, { "epoch": 2.17, "learning_rate": 5.1412933577621346e-06, "loss": 0.0678, "step": 1925 }, { "epoch": 2.2, "learning_rate": 4.955375696784614e-06, "loss": 0.0681, "step": 1950 }, { "epoch": 2.23, "learning_rate": 4.769519739809227e-06, "loss": 0.0676, "step": 1975 }, { "epoch": 2.25, "learning_rate": 4.583982478106189e-06, "loss": 0.0681, "step": 2000 }, { "epoch": 2.25, "eval_cer": 6.059917286647226, "eval_loss": 0.06468059122562408, "eval_runtime": 1264.0065, "eval_samples_per_second": 7.194, "eval_steps_per_second": 0.225, "eval_wer": 11.284192108582374, "step": 2000 }, { "epoch": 2.28, "learning_rate": 4.3990204622716405e-06, "loss": 0.0667, "step": 2025 }, { "epoch": 2.31, "learning_rate": 4.214889447483755e-06, "loss": 0.0673, "step": 2050 }, { "epoch": 2.34, "learning_rate": 4.031844039858726e-06, "loss": 0.068, "step": 2075 }, { "epoch": 2.37, "learning_rate": 3.850137344395598e-06, "loss": 0.0661, "step": 2100 }, { "epoch": 2.37, "eval_cer": 5.949965443706504, "eval_loss": 0.06391420215368271, "eval_runtime": 1259.247, "eval_samples_per_second": 7.221, "eval_steps_per_second": 0.226, "eval_wer": 11.13555384739522, "step": 2100 }, { "epoch": 2.4, "learning_rate": 3.6700206149967698e-06, "loss": 0.0657, "step": 2125 }, { "epoch": 2.42, "learning_rate": 3.4917429070480825e-06, "loss": 0.0648, "step": 2150 }, { "epoch": 2.45, "learning_rate": 3.3155507330389004e-06, "loss": 0.0658, "step": 2175 }, { "epoch": 2.48, "learning_rate": 3.141687721698363e-06, "loss": 0.0653, "step": 2200 }, { "epoch": 2.48, "eval_cer": 5.811370683697191, "eval_loss": 0.06310658901929855, "eval_runtime": 1258.5174, "eval_samples_per_second": 7.225, "eval_steps_per_second": 0.226, "eval_wer": 10.895184545018283, "step": 2200 }, { "epoch": 2.51, "learning_rate": 2.9703942811191423e-06, "loss": 0.0668, "step": 2225 }, { "epoch": 2.54, "learning_rate": 2.801907266334516e-06, "loss": 0.0651, "step": 2250 }, { "epoch": 2.56, "learning_rate": 2.6364596518084124e-06, "loss": 0.0646, "step": 2275 }, { "epoch": 2.59, "learning_rate": 2.474280209291299e-06, "loss": 0.0636, "step": 2300 }, { "epoch": 2.59, "eval_cer": 5.850177216499799, "eval_loss": 0.06220058351755142, "eval_runtime": 1261.9688, "eval_samples_per_second": 7.205, "eval_steps_per_second": 0.226, "eval_wer": 10.938501981135682, "step": 2300 }, { "epoch": 2.62, "learning_rate": 2.3155931914873297e-06, "loss": 0.0649, "step": 2325 }, { "epoch": 2.65, "learning_rate": 2.1606180219702057e-06, "loss": 0.065, "step": 2350 }, { "epoch": 2.68, "learning_rate": 2.009568991776456e-06, "loss": 0.0638, "step": 2375 }, { "epoch": 2.71, "learning_rate": 1.8626549630957397e-06, "loss": 0.0641, "step": 2400 }, { "epoch": 2.71, "eval_cer": 5.738192650412273, "eval_loss": 0.06147067993879318, "eval_runtime": 1257.8302, "eval_samples_per_second": 7.229, "eval_steps_per_second": 0.227, "eval_wer": 10.701530124728736, "step": 2400 }, { "epoch": 2.73, "learning_rate": 1.720079080467828e-06, "loss": 0.0624, "step": 2425 }, { "epoch": 2.76, "learning_rate": 1.5820384898856433e-06, "loss": 0.0638, "step": 2450 }, { "epoch": 2.79, "learning_rate": 1.4487240661927627e-06, "loss": 0.063, "step": 2475 }, { "epoch": 2.82, "learning_rate": 1.3203201491523027e-06, "loss": 0.0633, "step": 2500 }, { "epoch": 2.82, "eval_cer": 5.703821149929963, "eval_loss": 0.06121416017413139, "eval_runtime": 1253.1864, "eval_samples_per_second": 7.256, "eval_steps_per_second": 0.227, "eval_wer": 10.645472266223868, "step": 2500 }, { "epoch": 2.85, "learning_rate": 1.197004288552167e-06, "loss": 0.0637, "step": 2525 }, { "epoch": 2.87, "learning_rate": 1.078946998699073e-06, "loss": 0.0635, "step": 2550 }, { "epoch": 2.9, "learning_rate": 9.663115226408732e-07, "loss": 0.0633, "step": 2575 }, { "epoch": 2.93, "learning_rate": 8.592536064431467e-07, "loss": 0.0626, "step": 2600 }, { "epoch": 2.93, "eval_cer": 5.8058268932968184, "eval_loss": 0.06081194430589676, "eval_runtime": 1267.5166, "eval_samples_per_second": 7.174, "eval_steps_per_second": 0.225, "eval_wer": 10.759711386964849, "step": 2600 }, { "epoch": 2.96, "learning_rate": 7.579212838322164e-07, "loss": 0.0614, "step": 2625 }, { "epoch": 2.99, "learning_rate": 6.624546715023544e-07, "loss": 0.0626, "step": 2650 }, { "epoch": 3.02, "learning_rate": 5.729857753702118e-07, "loss": 0.0616, "step": 2675 }, { "epoch": 3.04, "learning_rate": 4.896383080443934e-07, "loss": 0.06, "step": 2700 }, { "epoch": 3.04, "eval_cer": 5.732833653025247, "eval_loss": 0.06054000183939934, "eval_runtime": 1260.8932, "eval_samples_per_second": 7.212, "eval_steps_per_second": 0.226, "eval_wer": 10.637403332045135, "step": 2700 }, { "epoch": 3.07, "learning_rate": 4.1252751776254373e-07, "loss": 0.059, "step": 2725 }, { "epoch": 3.1, "learning_rate": 3.417600290325063e-07, "loss": 0.0609, "step": 2750 }, { "epoch": 3.13, "learning_rate": 2.7743369519788397e-07, "loss": 0.0605, "step": 2775 }, { "epoch": 3.16, "learning_rate": 2.1963746313188762e-07, "loss": 0.0584, "step": 2800 }, { "epoch": 3.16, "eval_cer": 5.673699888754606, "eval_loss": 0.06046656146645546, "eval_runtime": 1262.3183, "eval_samples_per_second": 7.203, "eval_steps_per_second": 0.226, "eval_wer": 10.597058661151479, "step": 2800 }, { "epoch": 3.18, "learning_rate": 1.684512502465513e-07, "loss": 0.0595, "step": 2825 }, { "epoch": 3.21, "learning_rate": 1.2394583398738114e-07, "loss": 0.0599, "step": 2850 }, { "epoch": 3.24, "learning_rate": 8.618275396624742e-08, "loss": 0.0588, "step": 2875 }, { "epoch": 3.27, "learning_rate": 5.521422686783295e-08, "loss": 0.0585, "step": 2900 }, { "epoch": 3.27, "eval_cer": 5.687744157768883, "eval_loss": 0.06037650257349014, "eval_runtime": 1261.1509, "eval_samples_per_second": 7.21, "eval_steps_per_second": 0.226, "eval_wer": 10.609799083538949, "step": 2900 }, { "epoch": 3.3, "learning_rate": 3.1083074247311715e-08, "loss": 0.0599, "step": 2925 }, { "epoch": 3.33, "learning_rate": 1.382266331908133e-08, "loss": 0.0613, "step": 2950 }, { "epoch": 3.35, "learning_rate": 3.4568608184420983e-09, "loss": 0.0598, "step": 2975 }, { "epoch": 3.38, "learning_rate": 0.0, "loss": 0.0598, "step": 3000 }, { "epoch": 3.38, "eval_cer": 5.707517010196878, "eval_loss": 0.06033782660961151, "eval_runtime": 1268.7447, "eval_samples_per_second": 7.167, "eval_steps_per_second": 0.225, "eval_wer": 10.604278233837713, "step": 3000 }, { "epoch": 3.38, "step": 3000, "total_flos": 4.435355593138176e+20, "train_loss": 0.1956545435587565, "train_runtime": 112740.1771, "train_samples_per_second": 13.624, "train_steps_per_second": 0.027 } ], "max_steps": 3000, "num_train_epochs": 4, "total_flos": 4.435355593138176e+20, "trial_name": null, "trial_params": null }