| { | |
| "best_metric": 10.597058661151479, | |
| "best_model_checkpoint": "./checkpoint-2800", | |
| "epoch": 3.3819247627933176, | |
| "global_step": 3000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 2.8153153153153155e-07, | |
| "loss": 2.3778, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 5.630630630630631e-07, | |
| "loss": 2.2187, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 8.445945945945947e-07, | |
| "loss": 1.8708, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.1261261261261262e-06, | |
| "loss": 1.4902, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "eval_cer": 1208.7377528430404, | |
| "eval_loss": 1.2635722160339355, | |
| "eval_runtime": 1766.3017, | |
| "eval_samples_per_second": 5.148, | |
| "eval_steps_per_second": 0.161, | |
| "eval_wer": 889.9405871635997, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 1.4076576576576579e-06, | |
| "loss": 1.1418, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 1.6891891891891894e-06, | |
| "loss": 0.8588, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 1.970720720720721e-06, | |
| "loss": 0.683, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 2.2522522522522524e-06, | |
| "loss": 0.5703, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "eval_cer": 121.77656307170339, | |
| "eval_loss": 0.461249440908432, | |
| "eval_runtime": 1640.8217, | |
| "eval_samples_per_second": 5.542, | |
| "eval_steps_per_second": 0.174, | |
| "eval_wer": 108.64012978243605, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 2.533783783783784e-06, | |
| "loss": 0.4998, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 2.8153153153153158e-06, | |
| "loss": 0.4463, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.096846846846847e-06, | |
| "loss": 0.4037, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.3783783783783788e-06, | |
| "loss": 0.3679, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "eval_cer": 17.738466144072024, | |
| "eval_loss": 0.30462178587913513, | |
| "eval_runtime": 1238.7679, | |
| "eval_samples_per_second": 7.34, | |
| "eval_steps_per_second": 0.23, | |
| "eval_wer": 35.2744074641888, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.65990990990991e-06, | |
| "loss": 0.3325, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.941441441441442e-06, | |
| "loss": 0.2873, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.222972972972974e-06, | |
| "loss": 0.2461, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.504504504504505e-06, | |
| "loss": 0.2301, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "eval_cer": 14.885262018013623, | |
| "eval_loss": 0.20585699379444122, | |
| "eval_runtime": 1256.6908, | |
| "eval_samples_per_second": 7.236, | |
| "eval_steps_per_second": 0.227, | |
| "eval_wer": 29.76714754683167, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.7860360360360364e-06, | |
| "loss": 0.2191, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 5.067567567567568e-06, | |
| "loss": 0.2041, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 5.3490990990991e-06, | |
| "loss": 0.1964, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 5.6306306306306316e-06, | |
| "loss": 0.191, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "eval_cer": 12.756076918243874, | |
| "eval_loss": 0.16926029324531555, | |
| "eval_runtime": 1255.9571, | |
| "eval_samples_per_second": 7.24, | |
| "eval_steps_per_second": 0.227, | |
| "eval_wer": 25.452815845688004, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 5.912162162162162e-06, | |
| "loss": 0.1829, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 6.193693693693694e-06, | |
| "loss": 0.1734, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 6.475225225225226e-06, | |
| "loss": 0.1689, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 6.7567567567567575e-06, | |
| "loss": 0.1605, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "eval_cer": 11.58800028088538, | |
| "eval_loss": 0.14617891609668732, | |
| "eval_runtime": 1253.5295, | |
| "eval_samples_per_second": 7.254, | |
| "eval_steps_per_second": 0.227, | |
| "eval_wer": 22.78454671700549, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 7.038288288288288e-06, | |
| "loss": 0.158, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 7.31981981981982e-06, | |
| "loss": 0.153, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 7.601351351351352e-06, | |
| "loss": 0.1509, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 7.882882882882884e-06, | |
| "loss": 0.146, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "eval_cer": 10.432119982407704, | |
| "eval_loss": 0.13004416227340698, | |
| "eval_runtime": 1254.5015, | |
| "eval_samples_per_second": 7.248, | |
| "eval_steps_per_second": 0.227, | |
| "eval_wer": 20.554123437705705, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 8.164414414414416e-06, | |
| "loss": 0.1417, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 8.445945945945948e-06, | |
| "loss": 0.1392, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 8.727477477477478e-06, | |
| "loss": 0.1355, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 9.00900900900901e-06, | |
| "loss": 0.1296, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_cer": 9.857228917889072, | |
| "eval_loss": 0.11560462415218353, | |
| "eval_runtime": 1268.8039, | |
| "eval_samples_per_second": 7.167, | |
| "eval_steps_per_second": 0.225, | |
| "eval_wer": 19.214255683290087, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 9.290540540540541e-06, | |
| "loss": 0.1262, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 9.572072072072073e-06, | |
| "loss": 0.1247, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 9.853603603603605e-06, | |
| "loss": 0.1223, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 9.999203468625017e-06, | |
| "loss": 0.1212, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "eval_cer": 8.946199362094518, | |
| "eval_loss": 0.10553693026304245, | |
| "eval_runtime": 1266.5433, | |
| "eval_samples_per_second": 7.179, | |
| "eval_steps_per_second": 0.225, | |
| "eval_wer": 17.400444216060578, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 9.992429130775193e-06, | |
| "loss": 0.1149, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 9.978751539864958e-06, | |
| "loss": 0.1105, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 9.958189608505554e-06, | |
| "loss": 0.1092, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 9.930771768590934e-06, | |
| "loss": 0.1072, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "eval_cer": 8.267454624075574, | |
| "eval_loss": 0.09784528613090515, | |
| "eval_runtime": 1262.057, | |
| "eval_samples_per_second": 7.205, | |
| "eval_steps_per_second": 0.226, | |
| "eval_wer": 15.92340458060653, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 9.896535931983703e-06, | |
| "loss": 0.107, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 9.855529438092723e-06, | |
| "loss": 0.1036, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 9.807808988414811e-06, | |
| "loss": 0.1035, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 9.753440568131056e-06, | |
| "loss": 0.1013, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "eval_cer": 7.791797407723609, | |
| "eval_loss": 0.09121902287006378, | |
| "eval_runtime": 1263.6249, | |
| "eval_samples_per_second": 7.196, | |
| "eval_steps_per_second": 0.226, | |
| "eval_wer": 15.060453304228547, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 9.692499354866194e-06, | |
| "loss": 0.099, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 9.62506961473717e-06, | |
| "loss": 0.0989, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 9.551244585834649e-06, | |
| "loss": 0.097, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 9.471126349298557e-06, | |
| "loss": 0.0952, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "eval_cer": 7.549718560240675, | |
| "eval_loss": 0.0853806585073471, | |
| "eval_runtime": 1267.0907, | |
| "eval_samples_per_second": 7.176, | |
| "eval_steps_per_second": 0.225, | |
| "eval_wer": 14.320659444262777, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 9.384825688165987e-06, | |
| "loss": 0.0946, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 9.292461934186572e-06, | |
| "loss": 0.0922, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 9.194162802817177e-06, | |
| "loss": 0.0919, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 9.090064216624093e-06, | |
| "loss": 0.0915, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "eval_cer": 6.983327974335947, | |
| "eval_loss": 0.0808597058057785, | |
| "eval_runtime": 1272.1477, | |
| "eval_samples_per_second": 7.148, | |
| "eval_steps_per_second": 0.224, | |
| "eval_wer": 13.316289479383872, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 8.980310117336864e-06, | |
| "loss": 0.0894, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 8.865052266813686e-06, | |
| "loss": 0.0884, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 8.744450037193558e-06, | |
| "loss": 0.0872, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 8.61867019052535e-06, | |
| "loss": 0.0843, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "eval_cer": 6.642200071699689, | |
| "eval_loss": 0.07798563688993454, | |
| "eval_runtime": 1266.8664, | |
| "eval_samples_per_second": 7.178, | |
| "eval_steps_per_second": 0.225, | |
| "eval_wer": 12.717914307919024, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 8.48788664817855e-06, | |
| "loss": 0.0858, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 8.352280250354445e-06, | |
| "loss": 0.0849, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 8.212038506030386e-06, | |
| "loss": 0.0843, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 8.067355333682799e-06, | |
| "loss": 0.0819, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "eval_cer": 6.728683201945501, | |
| "eval_loss": 0.07437845319509506, | |
| "eval_runtime": 1268.04, | |
| "eval_samples_per_second": 7.171, | |
| "eval_steps_per_second": 0.225, | |
| "eval_wer": 12.65888368419041, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 7.91843079314751e-06, | |
| "loss": 0.0814, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 7.765470808988156e-06, | |
| "loss": 0.0814, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 7.608686885755146e-06, | |
| "loss": 0.0795, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 7.448295815528956e-06, | |
| "loss": 0.0798, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "eval_cer": 6.496213591156545, | |
| "eval_loss": 0.07181866466999054, | |
| "eval_runtime": 1262.2284, | |
| "eval_samples_per_second": 7.204, | |
| "eval_steps_per_second": 0.226, | |
| "eval_wer": 12.302151857341244, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 7.284519378152104e-06, | |
| "loss": 0.0798, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 7.117584034564329e-06, | |
| "loss": 0.0794, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 6.947720613665016e-06, | |
| "loss": 0.0778, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 6.775163993135843e-06, | |
| "loss": 0.0774, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "eval_cer": 6.219763243191301, | |
| "eval_loss": 0.06941211223602295, | |
| "eval_runtime": 1257.6258, | |
| "eval_samples_per_second": 7.23, | |
| "eval_steps_per_second": 0.227, | |
| "eval_wer": 11.841373247661071, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 6.600152774664997e-06, | |
| "loss": 0.0769, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 6.422928954022047e-06, | |
| "loss": 0.0778, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 6.243737586439663e-06, | |
| "loss": 0.0784, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 6.062826447764883e-06, | |
| "loss": 0.0695, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "eval_cer": 6.13457366403891, | |
| "eval_loss": 0.06795131415128708, | |
| "eval_runtime": 1263.7565, | |
| "eval_samples_per_second": 7.195, | |
| "eval_steps_per_second": 0.226, | |
| "eval_wer": 11.568303527822959, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 5.880445691848471e-06, | |
| "loss": 0.0711, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 5.696847504646093e-06, | |
| "loss": 0.069, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 5.512285755509618e-06, | |
| "loss": 0.0709, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 5.327015646150716e-06, | |
| "loss": 0.0686, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "eval_cer": 5.975836465574909, | |
| "eval_loss": 0.06619075685739517, | |
| "eval_runtime": 1261.7232, | |
| "eval_samples_per_second": 7.207, | |
| "eval_steps_per_second": 0.226, | |
| "eval_wer": 11.248518925897457, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 5.1412933577621346e-06, | |
| "loss": 0.0678, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 4.955375696784614e-06, | |
| "loss": 0.0681, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 4.769519739809227e-06, | |
| "loss": 0.0676, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 4.583982478106189e-06, | |
| "loss": 0.0681, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "eval_cer": 6.059917286647226, | |
| "eval_loss": 0.06468059122562408, | |
| "eval_runtime": 1264.0065, | |
| "eval_samples_per_second": 7.194, | |
| "eval_steps_per_second": 0.225, | |
| "eval_wer": 11.284192108582374, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 4.3990204622716405e-06, | |
| "loss": 0.0667, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 4.214889447483755e-06, | |
| "loss": 0.0673, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 4.031844039858726e-06, | |
| "loss": 0.068, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 3.850137344395598e-06, | |
| "loss": 0.0661, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "eval_cer": 5.949965443706504, | |
| "eval_loss": 0.06391420215368271, | |
| "eval_runtime": 1259.247, | |
| "eval_samples_per_second": 7.221, | |
| "eval_steps_per_second": 0.226, | |
| "eval_wer": 11.13555384739522, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 3.6700206149967698e-06, | |
| "loss": 0.0657, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 3.4917429070480825e-06, | |
| "loss": 0.0648, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 3.3155507330389004e-06, | |
| "loss": 0.0658, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 3.141687721698363e-06, | |
| "loss": 0.0653, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "eval_cer": 5.811370683697191, | |
| "eval_loss": 0.06310658901929855, | |
| "eval_runtime": 1258.5174, | |
| "eval_samples_per_second": 7.225, | |
| "eval_steps_per_second": 0.226, | |
| "eval_wer": 10.895184545018283, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 2.9703942811191423e-06, | |
| "loss": 0.0668, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 2.801907266334516e-06, | |
| "loss": 0.0651, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 2.6364596518084124e-06, | |
| "loss": 0.0646, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 2.474280209291299e-06, | |
| "loss": 0.0636, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "eval_cer": 5.850177216499799, | |
| "eval_loss": 0.06220058351755142, | |
| "eval_runtime": 1261.9688, | |
| "eval_samples_per_second": 7.205, | |
| "eval_steps_per_second": 0.226, | |
| "eval_wer": 10.938501981135682, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 2.3155931914873297e-06, | |
| "loss": 0.0649, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 2.1606180219702057e-06, | |
| "loss": 0.065, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 2.009568991776456e-06, | |
| "loss": 0.0638, | |
| "step": 2375 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 1.8626549630957397e-06, | |
| "loss": 0.0641, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "eval_cer": 5.738192650412273, | |
| "eval_loss": 0.06147067993879318, | |
| "eval_runtime": 1257.8302, | |
| "eval_samples_per_second": 7.229, | |
| "eval_steps_per_second": 0.227, | |
| "eval_wer": 10.701530124728736, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 1.720079080467828e-06, | |
| "loss": 0.0624, | |
| "step": 2425 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 1.5820384898856433e-06, | |
| "loss": 0.0638, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 1.4487240661927627e-06, | |
| "loss": 0.063, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 1.3203201491523027e-06, | |
| "loss": 0.0633, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "eval_cer": 5.703821149929963, | |
| "eval_loss": 0.06121416017413139, | |
| "eval_runtime": 1253.1864, | |
| "eval_samples_per_second": 7.256, | |
| "eval_steps_per_second": 0.227, | |
| "eval_wer": 10.645472266223868, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 1.197004288552167e-06, | |
| "loss": 0.0637, | |
| "step": 2525 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 1.078946998699073e-06, | |
| "loss": 0.0635, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 9.663115226408732e-07, | |
| "loss": 0.0633, | |
| "step": 2575 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 8.592536064431467e-07, | |
| "loss": 0.0626, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "eval_cer": 5.8058268932968184, | |
| "eval_loss": 0.06081194430589676, | |
| "eval_runtime": 1267.5166, | |
| "eval_samples_per_second": 7.174, | |
| "eval_steps_per_second": 0.225, | |
| "eval_wer": 10.759711386964849, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 7.579212838322164e-07, | |
| "loss": 0.0614, | |
| "step": 2625 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 6.624546715023544e-07, | |
| "loss": 0.0626, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "learning_rate": 5.729857753702118e-07, | |
| "loss": 0.0616, | |
| "step": 2675 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 4.896383080443934e-07, | |
| "loss": 0.06, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "eval_cer": 5.732833653025247, | |
| "eval_loss": 0.06054000183939934, | |
| "eval_runtime": 1260.8932, | |
| "eval_samples_per_second": 7.212, | |
| "eval_steps_per_second": 0.226, | |
| "eval_wer": 10.637403332045135, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "learning_rate": 4.1252751776254373e-07, | |
| "loss": 0.059, | |
| "step": 2725 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "learning_rate": 3.417600290325063e-07, | |
| "loss": 0.0609, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 3.13, | |
| "learning_rate": 2.7743369519788397e-07, | |
| "loss": 0.0605, | |
| "step": 2775 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "learning_rate": 2.1963746313188762e-07, | |
| "loss": 0.0584, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "eval_cer": 5.673699888754606, | |
| "eval_loss": 0.06046656146645546, | |
| "eval_runtime": 1262.3183, | |
| "eval_samples_per_second": 7.203, | |
| "eval_steps_per_second": 0.226, | |
| "eval_wer": 10.597058661151479, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "learning_rate": 1.684512502465513e-07, | |
| "loss": 0.0595, | |
| "step": 2825 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "learning_rate": 1.2394583398738114e-07, | |
| "loss": 0.0599, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "learning_rate": 8.618275396624742e-08, | |
| "loss": 0.0588, | |
| "step": 2875 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "learning_rate": 5.521422686783295e-08, | |
| "loss": 0.0585, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "eval_cer": 5.687744157768883, | |
| "eval_loss": 0.06037650257349014, | |
| "eval_runtime": 1261.1509, | |
| "eval_samples_per_second": 7.21, | |
| "eval_steps_per_second": 0.226, | |
| "eval_wer": 10.609799083538949, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 3.3, | |
| "learning_rate": 3.1083074247311715e-08, | |
| "loss": 0.0599, | |
| "step": 2925 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "learning_rate": 1.382266331908133e-08, | |
| "loss": 0.0613, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "learning_rate": 3.4568608184420983e-09, | |
| "loss": 0.0598, | |
| "step": 2975 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "learning_rate": 0.0, | |
| "loss": 0.0598, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "eval_cer": 5.707517010196878, | |
| "eval_loss": 0.06033782660961151, | |
| "eval_runtime": 1268.7447, | |
| "eval_samples_per_second": 7.167, | |
| "eval_steps_per_second": 0.225, | |
| "eval_wer": 10.604278233837713, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "step": 3000, | |
| "total_flos": 4.435355593138176e+20, | |
| "train_loss": 0.1956545435587565, | |
| "train_runtime": 112740.1771, | |
| "train_samples_per_second": 13.624, | |
| "train_steps_per_second": 0.027 | |
| } | |
| ], | |
| "max_steps": 3000, | |
| "num_train_epochs": 4, | |
| "total_flos": 4.435355593138176e+20, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |