| {"current_steps": 5, "total_steps": 1800, "loss": 0.5046, "lr": 1.1111111111111112e-06, "epoch": 0.05555555555555555, "percentage": 0.28, "elapsed_time": "0:00:01", "remaining_time": "0:06:58", "throughput": 1373.02, "total_tokens": 1600} | |
| {"current_steps": 10, "total_steps": 1800, "loss": 0.7285, "lr": 2.5e-06, "epoch": 0.1111111111111111, "percentage": 0.56, "elapsed_time": "0:00:01", "remaining_time": "0:05:33", "throughput": 1718.0, "total_tokens": 3200} | |
| {"current_steps": 15, "total_steps": 1800, "loss": 0.5145, "lr": 3.888888888888889e-06, "epoch": 0.16666666666666666, "percentage": 0.83, "elapsed_time": "0:00:02", "remaining_time": "0:05:04", "throughput": 1849.69, "total_tokens": 4736} | |
| {"current_steps": 20, "total_steps": 1800, "loss": 0.5433, "lr": 5.277777777777778e-06, "epoch": 0.2222222222222222, "percentage": 1.11, "elapsed_time": "0:00:03", "remaining_time": "0:04:49", "throughput": 1954.36, "total_tokens": 6368} | |
| {"current_steps": 25, "total_steps": 1800, "loss": 0.7358, "lr": 6.666666666666667e-06, "epoch": 0.2777777777777778, "percentage": 1.39, "elapsed_time": "0:00:03", "remaining_time": "0:04:40", "throughput": 1990.93, "total_tokens": 7872} | |
| {"current_steps": 30, "total_steps": 1800, "loss": 0.6497, "lr": 8.055555555555557e-06, "epoch": 0.3333333333333333, "percentage": 1.67, "elapsed_time": "0:00:04", "remaining_time": "0:04:34", "throughput": 2030.85, "total_tokens": 9440} | |
| {"current_steps": 35, "total_steps": 1800, "loss": 0.6108, "lr": 9.444444444444445e-06, "epoch": 0.3888888888888889, "percentage": 1.94, "elapsed_time": "0:00:05", "remaining_time": "0:04:29", "throughput": 2059.83, "total_tokens": 11008} | |
| {"current_steps": 40, "total_steps": 1800, "loss": 0.5964, "lr": 1.0833333333333334e-05, "epoch": 0.4444444444444444, "percentage": 2.22, "elapsed_time": "0:00:06", "remaining_time": "0:04:25", "throughput": 2088.38, "total_tokens": 12608} | |
| {"current_steps": 45, "total_steps": 1800, "loss": 0.592, "lr": 1.2222222222222222e-05, "epoch": 0.5, "percentage": 2.5, "elapsed_time": "0:00:06", "remaining_time": "0:04:22", "throughput": 2101.69, "total_tokens": 14144} | |
| {"current_steps": 50, "total_steps": 1800, "loss": 0.864, "lr": 1.3611111111111111e-05, "epoch": 0.5555555555555556, "percentage": 2.78, "elapsed_time": "0:00:07", "remaining_time": "0:04:19", "throughput": 2121.23, "total_tokens": 15744} | |
| {"current_steps": 55, "total_steps": 1800, "loss": 0.6727, "lr": 1.5e-05, "epoch": 0.6111111111111112, "percentage": 3.06, "elapsed_time": "0:00:08", "remaining_time": "0:04:17", "throughput": 2132.76, "total_tokens": 17312} | |
| {"current_steps": 60, "total_steps": 1800, "loss": 0.491, "lr": 1.638888888888889e-05, "epoch": 0.6666666666666666, "percentage": 3.33, "elapsed_time": "0:00:08", "remaining_time": "0:04:15", "throughput": 2141.91, "total_tokens": 18880} | |
| {"current_steps": 65, "total_steps": 1800, "loss": 0.547, "lr": 1.777777777777778e-05, "epoch": 0.7222222222222222, "percentage": 3.61, "elapsed_time": "0:00:09", "remaining_time": "0:04:13", "throughput": 2146.85, "total_tokens": 20416} | |
| {"current_steps": 70, "total_steps": 1800, "loss": 0.5315, "lr": 1.9166666666666667e-05, "epoch": 0.7777777777777778, "percentage": 3.89, "elapsed_time": "0:00:10", "remaining_time": "0:04:12", "throughput": 2148.25, "total_tokens": 21920} | |
| {"current_steps": 75, "total_steps": 1800, "loss": 0.4814, "lr": 2.0555555555555555e-05, "epoch": 0.8333333333333334, "percentage": 4.17, "elapsed_time": "0:00:10", "remaining_time": "0:04:10", "throughput": 2155.6, "total_tokens": 23488} | |
| {"current_steps": 80, "total_steps": 1800, "loss": 0.6245, "lr": 2.1944444444444445e-05, "epoch": 0.8888888888888888, "percentage": 4.44, "elapsed_time": "0:00:11", "remaining_time": "0:04:09", "throughput": 2161.96, "total_tokens": 25056} | |
| {"current_steps": 85, "total_steps": 1800, "loss": 0.7819, "lr": 2.3333333333333336e-05, "epoch": 0.9444444444444444, "percentage": 4.72, "elapsed_time": "0:00:12", "remaining_time": "0:04:07", "throughput": 2164.93, "total_tokens": 26592} | |
| {"current_steps": 90, "total_steps": 1800, "loss": 0.9281, "lr": 2.4722222222222223e-05, "epoch": 1.0, "percentage": 5.0, "elapsed_time": "0:00:12", "remaining_time": "0:04:06", "throughput": 2169.07, "total_tokens": 28192} | |
| {"current_steps": 90, "total_steps": 1800, "eval_loss": 0.549659252166748, "epoch": 1.0, "percentage": 5.0, "elapsed_time": "0:00:13", "remaining_time": "0:04:16", "throughput": 2087.24, "total_tokens": 28192} | |
| {"current_steps": 95, "total_steps": 1800, "loss": 0.629, "lr": 2.6111111111111114e-05, "epoch": 1.0555555555555556, "percentage": 5.28, "elapsed_time": "0:00:15", "remaining_time": "0:04:30", "throughput": 1976.67, "total_tokens": 29792} | |
| {"current_steps": 100, "total_steps": 1800, "loss": 0.5807, "lr": 2.7500000000000004e-05, "epoch": 1.1111111111111112, "percentage": 5.56, "elapsed_time": "0:00:15", "remaining_time": "0:04:28", "throughput": 1987.84, "total_tokens": 31360} | |
| {"current_steps": 105, "total_steps": 1800, "loss": 0.4806, "lr": 2.8888888888888888e-05, "epoch": 1.1666666666666667, "percentage": 5.83, "elapsed_time": "0:00:16", "remaining_time": "0:04:25", "throughput": 1993.34, "total_tokens": 32832} | |
| {"current_steps": 110, "total_steps": 1800, "loss": 0.5371, "lr": 3.0277777777777776e-05, "epoch": 1.2222222222222223, "percentage": 6.11, "elapsed_time": "0:00:17", "remaining_time": "0:04:23", "throughput": 2000.22, "total_tokens": 34336} | |
| {"current_steps": 115, "total_steps": 1800, "loss": 0.8599, "lr": 3.1666666666666666e-05, "epoch": 1.2777777777777777, "percentage": 6.39, "elapsed_time": "0:00:17", "remaining_time": "0:04:21", "throughput": 2011.39, "total_tokens": 35936} | |
| {"current_steps": 120, "total_steps": 1800, "loss": 0.6148, "lr": 3.3055555555555553e-05, "epoch": 1.3333333333333333, "percentage": 6.67, "elapsed_time": "0:00:18", "remaining_time": "0:04:19", "throughput": 2021.82, "total_tokens": 37536} | |
| {"current_steps": 125, "total_steps": 1800, "loss": 0.4763, "lr": 3.444444444444445e-05, "epoch": 1.3888888888888888, "percentage": 6.94, "elapsed_time": "0:00:19", "remaining_time": "0:04:18", "throughput": 2026.51, "total_tokens": 39040} | |
| {"current_steps": 130, "total_steps": 1800, "loss": 0.5887, "lr": 3.5833333333333335e-05, "epoch": 1.4444444444444444, "percentage": 7.22, "elapsed_time": "0:00:19", "remaining_time": "0:04:16", "throughput": 2032.08, "total_tokens": 40576} | |
| {"current_steps": 135, "total_steps": 1800, "loss": 0.2671, "lr": 3.722222222222222e-05, "epoch": 1.5, "percentage": 7.5, "elapsed_time": "0:00:20", "remaining_time": "0:04:14", "throughput": 2042.12, "total_tokens": 42208} | |
| {"current_steps": 140, "total_steps": 1800, "loss": 0.3021, "lr": 3.8611111111111116e-05, "epoch": 1.5555555555555556, "percentage": 7.78, "elapsed_time": "0:00:21", "remaining_time": "0:04:13", "throughput": 2047.81, "total_tokens": 43776} | |
| {"current_steps": 145, "total_steps": 1800, "loss": 0.113, "lr": 4e-05, "epoch": 1.6111111111111112, "percentage": 8.06, "elapsed_time": "0:00:22", "remaining_time": "0:04:12", "throughput": 2055.12, "total_tokens": 45376} | |
| {"current_steps": 150, "total_steps": 1800, "loss": 0.178, "lr": 4.138888888888889e-05, "epoch": 1.6666666666666665, "percentage": 8.33, "elapsed_time": "0:00:22", "remaining_time": "0:04:10", "throughput": 2059.69, "total_tokens": 46944} | |
| {"current_steps": 155, "total_steps": 1800, "loss": 0.0566, "lr": 4.277777777777778e-05, "epoch": 1.7222222222222223, "percentage": 8.61, "elapsed_time": "0:00:23", "remaining_time": "0:04:09", "throughput": 2065.25, "total_tokens": 48512} | |
| {"current_steps": 160, "total_steps": 1800, "loss": 0.1192, "lr": 4.4166666666666665e-05, "epoch": 1.7777777777777777, "percentage": 8.89, "elapsed_time": "0:00:24", "remaining_time": "0:04:07", "throughput": 2071.68, "total_tokens": 50112} | |
| {"current_steps": 165, "total_steps": 1800, "loss": 0.27, "lr": 4.555555555555556e-05, "epoch": 1.8333333333333335, "percentage": 9.17, "elapsed_time": "0:00:24", "remaining_time": "0:04:06", "throughput": 2076.55, "total_tokens": 51680} | |
| {"current_steps": 170, "total_steps": 1800, "loss": 0.0716, "lr": 4.6944444444444446e-05, "epoch": 1.8888888888888888, "percentage": 9.44, "elapsed_time": "0:00:25", "remaining_time": "0:04:05", "throughput": 2079.85, "total_tokens": 53216} | |
| {"current_steps": 175, "total_steps": 1800, "loss": 0.3628, "lr": 4.8333333333333334e-05, "epoch": 1.9444444444444444, "percentage": 9.72, "elapsed_time": "0:00:26", "remaining_time": "0:04:04", "throughput": 2081.83, "total_tokens": 54720} | |
| {"current_steps": 180, "total_steps": 1800, "loss": 0.068, "lr": 4.972222222222223e-05, "epoch": 2.0, "percentage": 10.0, "elapsed_time": "0:00:27", "remaining_time": "0:04:03", "throughput": 2082.18, "total_tokens": 56256} | |
| {"current_steps": 180, "total_steps": 1800, "eval_loss": 0.04117237403988838, "epoch": 2.0, "percentage": 10.0, "elapsed_time": "0:00:27", "remaining_time": "0:04:07", "throughput": 2043.33, "total_tokens": 56256} | |
| {"current_steps": 185, "total_steps": 1800, "loss": 0.065, "lr": 4.9999247861994194e-05, "epoch": 2.0555555555555554, "percentage": 10.28, "elapsed_time": "0:00:28", "remaining_time": "0:04:11", "throughput": 2004.95, "total_tokens": 57856} | |
| {"current_steps": 190, "total_steps": 1800, "loss": 0.0287, "lr": 4.9996192378909786e-05, "epoch": 2.111111111111111, "percentage": 10.56, "elapsed_time": "0:00:29", "remaining_time": "0:04:10", "throughput": 2011.63, "total_tokens": 59456} | |
| {"current_steps": 195, "total_steps": 1800, "loss": 0.1964, "lr": 4.999078682916774e-05, "epoch": 2.1666666666666665, "percentage": 10.83, "elapsed_time": "0:00:30", "remaining_time": "0:04:09", "throughput": 2016.7, "total_tokens": 61024} | |
| {"current_steps": 200, "total_steps": 1800, "loss": 0.0145, "lr": 4.998303172098155e-05, "epoch": 2.2222222222222223, "percentage": 11.11, "elapsed_time": "0:00:30", "remaining_time": "0:04:07", "throughput": 2021.65, "total_tokens": 62592} | |
| {"current_steps": 205, "total_steps": 1800, "loss": 0.1137, "lr": 4.997292778346312e-05, "epoch": 2.2777777777777777, "percentage": 11.39, "elapsed_time": "0:00:31", "remaining_time": "0:04:06", "throughput": 2025.59, "total_tokens": 64128} | |
| {"current_steps": 210, "total_steps": 1800, "loss": 0.155, "lr": 4.996047596655418e-05, "epoch": 2.3333333333333335, "percentage": 11.67, "elapsed_time": "0:00:32", "remaining_time": "0:04:04", "throughput": 2030.49, "total_tokens": 65696} | |
| {"current_steps": 215, "total_steps": 1800, "loss": 0.1587, "lr": 4.994567744093703e-05, "epoch": 2.388888888888889, "percentage": 11.94, "elapsed_time": "0:00:33", "remaining_time": "0:04:03", "throughput": 2033.26, "total_tokens": 67200} | |
| {"current_steps": 220, "total_steps": 1800, "loss": 0.2451, "lr": 4.992853359792444e-05, "epoch": 2.4444444444444446, "percentage": 12.22, "elapsed_time": "0:00:33", "remaining_time": "0:04:02", "throughput": 2038.43, "total_tokens": 68800} | |
| {"current_steps": 225, "total_steps": 1800, "loss": 0.2798, "lr": 4.9909046049328846e-05, "epoch": 2.5, "percentage": 12.5, "elapsed_time": "0:00:34", "remaining_time": "0:04:01", "throughput": 2042.74, "total_tokens": 70368} | |
| {"current_steps": 230, "total_steps": 1800, "loss": 0.1249, "lr": 4.988721662731083e-05, "epoch": 2.5555555555555554, "percentage": 12.78, "elapsed_time": "0:00:35", "remaining_time": "0:03:59", "throughput": 2045.05, "total_tokens": 71872} | |
| {"current_steps": 235, "total_steps": 1800, "loss": 0.192, "lr": 4.9863047384206835e-05, "epoch": 2.611111111111111, "percentage": 13.06, "elapsed_time": "0:00:35", "remaining_time": "0:03:58", "throughput": 2049.9, "total_tokens": 73472} | |
| {"current_steps": 240, "total_steps": 1800, "loss": 0.0599, "lr": 4.983654059233626e-05, "epoch": 2.6666666666666665, "percentage": 13.33, "elapsed_time": "0:00:36", "remaining_time": "0:03:57", "throughput": 2053.72, "total_tokens": 75040} | |
| {"current_steps": 245, "total_steps": 1800, "loss": 0.042, "lr": 4.9807698743787744e-05, "epoch": 2.7222222222222223, "percentage": 13.61, "elapsed_time": "0:00:37", "remaining_time": "0:03:56", "throughput": 2055.53, "total_tokens": 76544} | |
| {"current_steps": 250, "total_steps": 1800, "loss": 0.04, "lr": 4.9776524550184965e-05, "epoch": 2.7777777777777777, "percentage": 13.89, "elapsed_time": "0:00:37", "remaining_time": "0:03:55", "throughput": 2057.31, "total_tokens": 78048} | |
| {"current_steps": 255, "total_steps": 1800, "loss": 0.0117, "lr": 4.974302094243164e-05, "epoch": 2.8333333333333335, "percentage": 14.17, "elapsed_time": "0:00:38", "remaining_time": "0:03:54", "throughput": 2062.52, "total_tokens": 79680} | |
| {"current_steps": 260, "total_steps": 1800, "loss": 0.0255, "lr": 4.970719107043595e-05, "epoch": 2.888888888888889, "percentage": 14.44, "elapsed_time": "0:00:39", "remaining_time": "0:03:52", "throughput": 2064.37, "total_tokens": 81184} | |
| {"current_steps": 265, "total_steps": 1800, "loss": 0.1139, "lr": 4.966903830281449e-05, "epoch": 2.9444444444444446, "percentage": 14.72, "elapsed_time": "0:00:40", "remaining_time": "0:03:51", "throughput": 2068.06, "total_tokens": 82784} | |
| {"current_steps": 270, "total_steps": 1800, "loss": 0.0154, "lr": 4.962856622657541e-05, "epoch": 3.0, "percentage": 15.0, "elapsed_time": "0:00:40", "remaining_time": "0:03:50", "throughput": 2068.51, "total_tokens": 84320} | |
| {"current_steps": 270, "total_steps": 1800, "eval_loss": 0.0333462655544281, "epoch": 3.0, "percentage": 15.0, "elapsed_time": "0:00:41", "remaining_time": "0:03:53", "throughput": 2042.55, "total_tokens": 84320} | |
| {"current_steps": 275, "total_steps": 1800, "loss": 0.1616, "lr": 4.9585778646781364e-05, "epoch": 3.0555555555555554, "percentage": 15.28, "elapsed_time": "0:00:42", "remaining_time": "0:03:56", "throughput": 2014.69, "total_tokens": 85920} | |
| {"current_steps": 280, "total_steps": 1800, "loss": 0.0242, "lr": 4.9540679586191605e-05, "epoch": 3.111111111111111, "percentage": 15.56, "elapsed_time": "0:00:43", "remaining_time": "0:03:55", "throughput": 2017.65, "total_tokens": 87456} | |
| {"current_steps": 285, "total_steps": 1800, "loss": 0.1117, "lr": 4.9493273284883854e-05, "epoch": 3.1666666666666665, "percentage": 15.83, "elapsed_time": "0:00:44", "remaining_time": "0:03:54", "throughput": 2021.91, "total_tokens": 89056} | |
| {"current_steps": 290, "total_steps": 1800, "loss": 0.0502, "lr": 4.9443564199855666e-05, "epoch": 3.2222222222222223, "percentage": 16.11, "elapsed_time": "0:00:44", "remaining_time": "0:03:52", "throughput": 2023.87, "total_tokens": 90560} | |
| {"current_steps": 295, "total_steps": 1800, "loss": 0.0859, "lr": 4.939155700460536e-05, "epoch": 3.2777777777777777, "percentage": 16.39, "elapsed_time": "0:00:45", "remaining_time": "0:03:51", "throughput": 2027.0, "total_tokens": 92128} | |
| {"current_steps": 300, "total_steps": 1800, "loss": 0.0761, "lr": 4.933725658869267e-05, "epoch": 3.3333333333333335, "percentage": 16.67, "elapsed_time": "0:00:46", "remaining_time": "0:03:50", "throughput": 2030.97, "total_tokens": 93728} | |
| {"current_steps": 305, "total_steps": 1800, "loss": 0.0546, "lr": 4.9280668057279014e-05, "epoch": 3.388888888888889, "percentage": 16.94, "elapsed_time": "0:00:46", "remaining_time": "0:03:49", "throughput": 2033.26, "total_tokens": 95264} | |
| {"current_steps": 310, "total_steps": 1800, "loss": 0.0572, "lr": 4.9221796730647516e-05, "epoch": 3.4444444444444446, "percentage": 17.22, "elapsed_time": "0:00:47", "remaining_time": "0:03:48", "throughput": 2035.0, "total_tokens": 96768} | |
| {"current_steps": 315, "total_steps": 1800, "loss": 0.1042, "lr": 4.916064814370287e-05, "epoch": 3.5, "percentage": 17.5, "elapsed_time": "0:00:48", "remaining_time": "0:03:47", "throughput": 2039.36, "total_tokens": 98400} | |
| {"current_steps": 320, "total_steps": 1800, "loss": 0.0325, "lr": 4.9097228045450864e-05, "epoch": 3.5555555555555554, "percentage": 17.78, "elapsed_time": "0:00:48", "remaining_time": "0:03:46", "throughput": 2042.28, "total_tokens": 99968} | |
| {"current_steps": 325, "total_steps": 1800, "loss": 0.1191, "lr": 4.9031542398457974e-05, "epoch": 3.611111111111111, "percentage": 18.06, "elapsed_time": "0:00:49", "remaining_time": "0:03:45", "throughput": 2044.53, "total_tokens": 101504} | |
| {"current_steps": 330, "total_steps": 1800, "loss": 0.1714, "lr": 4.896359737829071e-05, "epoch": 3.6666666666666665, "percentage": 18.33, "elapsed_time": "0:00:50", "remaining_time": "0:03:44", "throughput": 2046.09, "total_tokens": 103008} | |
| {"current_steps": 335, "total_steps": 1800, "loss": 0.1025, "lr": 4.889339937293508e-05, "epoch": 3.7222222222222223, "percentage": 18.61, "elapsed_time": "0:00:51", "remaining_time": "0:03:43", "throughput": 2049.56, "total_tokens": 104608} | |
| {"current_steps": 340, "total_steps": 1800, "loss": 0.0776, "lr": 4.8820954982195905e-05, "epoch": 3.7777777777777777, "percentage": 18.89, "elapsed_time": "0:00:51", "remaining_time": "0:03:42", "throughput": 2051.58, "total_tokens": 106144} | |
| {"current_steps": 345, "total_steps": 1800, "loss": 0.0047, "lr": 4.874627101707644e-05, "epoch": 3.8333333333333335, "percentage": 19.17, "elapsed_time": "0:00:52", "remaining_time": "0:03:41", "throughput": 2054.82, "total_tokens": 107744} | |
| {"current_steps": 350, "total_steps": 1800, "loss": 0.1184, "lr": 4.8669354499137955e-05, "epoch": 3.888888888888889, "percentage": 19.44, "elapsed_time": "0:00:53", "remaining_time": "0:03:40", "throughput": 2056.78, "total_tokens": 109280} | |
| {"current_steps": 355, "total_steps": 1800, "loss": 0.157, "lr": 4.859021265983959e-05, "epoch": 3.9444444444444446, "percentage": 19.72, "elapsed_time": "0:00:53", "remaining_time": "0:03:39", "throughput": 2059.13, "total_tokens": 110848} | |
| {"current_steps": 360, "total_steps": 1800, "loss": 0.0329, "lr": 4.850885293985853e-05, "epoch": 4.0, "percentage": 20.0, "elapsed_time": "0:00:54", "remaining_time": "0:03:38", "throughput": 2060.29, "total_tokens": 112416} | |
| {"current_steps": 360, "total_steps": 1800, "eval_loss": 0.0329216904938221, "epoch": 4.0, "percentage": 20.0, "elapsed_time": "0:00:55", "remaining_time": "0:03:40", "throughput": 2040.9, "total_tokens": 112416} | |
| {"current_steps": 365, "total_steps": 1800, "loss": 0.0338, "lr": 4.8425282988390376e-05, "epoch": 4.055555555555555, "percentage": 20.28, "elapsed_time": "0:00:56", "remaining_time": "0:03:41", "throughput": 2020.96, "total_tokens": 113984} | |
| {"current_steps": 370, "total_steps": 1800, "loss": 0.1254, "lr": 4.8339510662430046e-05, "epoch": 4.111111111111111, "percentage": 20.56, "elapsed_time": "0:00:57", "remaining_time": "0:03:40", "throughput": 2023.68, "total_tokens": 115552} | |
| {"current_steps": 375, "total_steps": 1800, "loss": 0.031, "lr": 4.825154402603308e-05, "epoch": 4.166666666666667, "percentage": 20.83, "elapsed_time": "0:00:57", "remaining_time": "0:03:39", "throughput": 2026.83, "total_tokens": 117152} | |
| {"current_steps": 380, "total_steps": 1800, "loss": 0.1421, "lr": 4.816139134955746e-05, "epoch": 4.222222222222222, "percentage": 21.11, "elapsed_time": "0:00:58", "remaining_time": "0:03:38", "throughput": 2030.5, "total_tokens": 118784} | |
| {"current_steps": 385, "total_steps": 1800, "loss": 0.0938, "lr": 4.806906110888606e-05, "epoch": 4.277777777777778, "percentage": 21.39, "elapsed_time": "0:00:59", "remaining_time": "0:03:37", "throughput": 2031.37, "total_tokens": 120256} | |
| {"current_steps": 390, "total_steps": 1800, "loss": 0.0677, "lr": 4.797456198462979e-05, "epoch": 4.333333333333333, "percentage": 21.67, "elapsed_time": "0:00:59", "remaining_time": "0:03:36", "throughput": 2033.66, "total_tokens": 121824} | |
| {"current_steps": 395, "total_steps": 1800, "loss": 0.0602, "lr": 4.7877902861311446e-05, "epoch": 4.388888888888889, "percentage": 21.94, "elapsed_time": "0:01:00", "remaining_time": "0:03:35", "throughput": 2035.9, "total_tokens": 123392} | |
| {"current_steps": 400, "total_steps": 1800, "loss": 0.1148, "lr": 4.777909282653042e-05, "epoch": 4.444444444444445, "percentage": 22.22, "elapsed_time": "0:01:01", "remaining_time": "0:03:34", "throughput": 2037.74, "total_tokens": 124928} | |
| {"current_steps": 405, "total_steps": 1800, "loss": 0.3272, "lr": 4.7678141170108345e-05, "epoch": 4.5, "percentage": 22.5, "elapsed_time": "0:01:02", "remaining_time": "0:03:33", "throughput": 2040.09, "total_tokens": 126496} | |
| {"current_steps": 410, "total_steps": 1800, "loss": 0.0376, "lr": 4.757505738321563e-05, "epoch": 4.555555555555555, "percentage": 22.78, "elapsed_time": "0:01:02", "remaining_time": "0:03:32", "throughput": 2042.39, "total_tokens": 128064} | |
| {"current_steps": 415, "total_steps": 1800, "loss": 0.031, "lr": 4.7469851157479177e-05, "epoch": 4.611111111111111, "percentage": 23.06, "elapsed_time": "0:01:03", "remaining_time": "0:03:31", "throughput": 2044.62, "total_tokens": 129632} | |
| {"current_steps": 420, "total_steps": 1800, "loss": 0.0468, "lr": 4.736253238407119e-05, "epoch": 4.666666666666667, "percentage": 23.33, "elapsed_time": "0:01:04", "remaining_time": "0:03:30", "throughput": 2046.83, "total_tokens": 131200} | |
| {"current_steps": 425, "total_steps": 1800, "loss": 0.041, "lr": 4.725311115277924e-05, "epoch": 4.722222222222222, "percentage": 23.61, "elapsed_time": "0:01:04", "remaining_time": "0:03:29", "throughput": 2049.51, "total_tokens": 132800} | |
| {"current_steps": 430, "total_steps": 1800, "loss": 0.0588, "lr": 4.714159775105765e-05, "epoch": 4.777777777777778, "percentage": 23.89, "elapsed_time": "0:01:05", "remaining_time": "0:03:28", "throughput": 2051.0, "total_tokens": 134336} | |
| {"current_steps": 435, "total_steps": 1800, "loss": 0.1, "lr": 4.70280026630603e-05, "epoch": 4.833333333333333, "percentage": 24.17, "elapsed_time": "0:01:06", "remaining_time": "0:03:27", "throughput": 2053.06, "total_tokens": 135904} | |
| {"current_steps": 440, "total_steps": 1800, "loss": 0.0378, "lr": 4.6912336568654925e-05, "epoch": 4.888888888888889, "percentage": 24.44, "elapsed_time": "0:01:06", "remaining_time": "0:03:26", "throughput": 2055.55, "total_tokens": 137504} | |
| {"current_steps": 445, "total_steps": 1800, "loss": 0.0514, "lr": 4.679461034241906e-05, "epoch": 4.944444444444445, "percentage": 24.72, "elapsed_time": "0:01:07", "remaining_time": "0:03:25", "throughput": 2057.02, "total_tokens": 139040} | |
| {"current_steps": 450, "total_steps": 1800, "loss": 0.0093, "lr": 4.667483505261762e-05, "epoch": 5.0, "percentage": 25.0, "elapsed_time": "0:01:08", "remaining_time": "0:03:24", "throughput": 2056.94, "total_tokens": 140544} | |
| {"current_steps": 450, "total_steps": 1800, "eval_loss": 0.03255601227283478, "epoch": 5.0, "percentage": 25.0, "elapsed_time": "0:01:08", "remaining_time": "0:03:26", "throughput": 2041.4, "total_tokens": 140544} | |
| {"current_steps": 455, "total_steps": 1800, "loss": 0.1989, "lr": 4.655302196016228e-05, "epoch": 5.055555555555555, "percentage": 25.28, "elapsed_time": "0:01:10", "remaining_time": "0:03:27", "throughput": 2024.96, "total_tokens": 142176} | |
| {"current_steps": 460, "total_steps": 1800, "loss": 0.0232, "lr": 4.642918251755281e-05, "epoch": 5.111111111111111, "percentage": 25.56, "elapsed_time": "0:01:10", "remaining_time": "0:03:26", "throughput": 2027.47, "total_tokens": 143776} | |
| {"current_steps": 465, "total_steps": 1800, "loss": 0.1717, "lr": 4.6303328367800284e-05, "epoch": 5.166666666666667, "percentage": 25.83, "elapsed_time": "0:01:11", "remaining_time": "0:03:25", "throughput": 2029.1, "total_tokens": 145312} | |
| {"current_steps": 470, "total_steps": 1800, "loss": 0.1136, "lr": 4.6175471343332485e-05, "epoch": 5.222222222222222, "percentage": 26.11, "elapsed_time": "0:01:12", "remaining_time": "0:03:24", "throughput": 2031.14, "total_tokens": 146880} | |
| {"current_steps": 475, "total_steps": 1800, "loss": 0.0973, "lr": 4.604562346488144e-05, "epoch": 5.277777777777778, "percentage": 26.39, "elapsed_time": "0:01:13", "remaining_time": "0:03:23", "throughput": 2032.58, "total_tokens": 148416} | |
| {"current_steps": 480, "total_steps": 1800, "loss": 0.0145, "lr": 4.591379694035325e-05, "epoch": 5.333333333333333, "percentage": 26.67, "elapsed_time": "0:01:13", "remaining_time": "0:03:22", "throughput": 2034.13, "total_tokens": 149952} | |
| {"current_steps": 485, "total_steps": 1800, "loss": 0.0283, "lr": 4.5780004163680365e-05, "epoch": 5.388888888888889, "percentage": 26.94, "elapsed_time": "0:01:14", "remaining_time": "0:03:21", "throughput": 2035.64, "total_tokens": 151488} | |
| {"current_steps": 490, "total_steps": 1800, "loss": 0.0516, "lr": 4.5644257713656356e-05, "epoch": 5.444444444444445, "percentage": 27.22, "elapsed_time": "0:01:15", "remaining_time": "0:03:20", "throughput": 2037.04, "total_tokens": 153024} | |
| {"current_steps": 495, "total_steps": 1800, "loss": 0.0163, "lr": 4.550657035275323e-05, "epoch": 5.5, "percentage": 27.5, "elapsed_time": "0:01:15", "remaining_time": "0:03:19", "throughput": 2038.85, "total_tokens": 154592} | |
| {"current_steps": 500, "total_steps": 1800, "loss": 0.0706, "lr": 4.536695502592162e-05, "epoch": 5.555555555555555, "percentage": 27.78, "elapsed_time": "0:01:16", "remaining_time": "0:03:18", "throughput": 2040.31, "total_tokens": 156128} | |
| {"current_steps": 505, "total_steps": 1800, "loss": 0.055, "lr": 4.522542485937369e-05, "epoch": 5.611111111111111, "percentage": 28.06, "elapsed_time": "0:01:17", "remaining_time": "0:03:18", "throughput": 2042.56, "total_tokens": 157728} | |
| {"current_steps": 510, "total_steps": 1800, "loss": 0.143, "lr": 4.5081993159349056e-05, "epoch": 5.666666666666667, "percentage": 28.33, "elapsed_time": "0:01:17", "remaining_time": "0:03:17", "throughput": 2044.48, "total_tokens": 159296} | |
| {"current_steps": 515, "total_steps": 1800, "loss": 0.0284, "lr": 4.493667341086379e-05, "epoch": 5.722222222222222, "percentage": 28.61, "elapsed_time": "0:01:18", "remaining_time": "0:03:16", "throughput": 2046.77, "total_tokens": 160896} | |
| {"current_steps": 520, "total_steps": 1800, "loss": 0.0859, "lr": 4.478947927644258e-05, "epoch": 5.777777777777778, "percentage": 28.89, "elapsed_time": "0:01:19", "remaining_time": "0:03:15", "throughput": 2047.77, "total_tokens": 162400} | |
| {"current_steps": 525, "total_steps": 1800, "loss": 0.0153, "lr": 4.464042459483425e-05, "epoch": 5.833333333333333, "percentage": 29.17, "elapsed_time": "0:01:20", "remaining_time": "0:03:14", "throughput": 2049.46, "total_tokens": 163968} | |
| {"current_steps": 530, "total_steps": 1800, "loss": 0.0893, "lr": 4.448952337971064e-05, "epoch": 5.888888888888889, "percentage": 29.44, "elapsed_time": "0:01:20", "remaining_time": "0:03:13", "throughput": 2051.12, "total_tokens": 165536} | |
| {"current_steps": 535, "total_steps": 1800, "loss": 0.1111, "lr": 4.43367898183491e-05, "epoch": 5.944444444444445, "percentage": 29.72, "elapsed_time": "0:01:21", "remaining_time": "0:03:12", "throughput": 2053.56, "total_tokens": 167168} | |
| {"current_steps": 540, "total_steps": 1800, "loss": 0.0023, "lr": 4.418223827029867e-05, "epoch": 6.0, "percentage": 30.0, "elapsed_time": "0:01:22", "remaining_time": "0:03:11", "throughput": 2054.77, "total_tokens": 168768} | |
| {"current_steps": 540, "total_steps": 1800, "eval_loss": 0.031396519392728806, "epoch": 6.0, "percentage": 30.0, "elapsed_time": "0:01:22", "remaining_time": "0:03:12", "throughput": 2041.89, "total_tokens": 168768} | |
| {"current_steps": 545, "total_steps": 1800, "loss": 0.0532, "lr": 4.402588326603002e-05, "epoch": 6.055555555555555, "percentage": 30.28, "elapsed_time": "0:01:24", "remaining_time": "0:03:13", "throughput": 2026.82, "total_tokens": 170336} | |
| {"current_steps": 550, "total_steps": 1800, "loss": 0.0122, "lr": 4.386773950556931e-05, "epoch": 6.111111111111111, "percentage": 30.56, "elapsed_time": "0:01:24", "remaining_time": "0:03:12", "throughput": 2028.48, "total_tokens": 171904} | |
| {"current_steps": 555, "total_steps": 1800, "loss": 0.0254, "lr": 4.3707821857116176e-05, "epoch": 6.166666666666667, "percentage": 30.83, "elapsed_time": "0:01:25", "remaining_time": "0:03:11", "throughput": 2029.68, "total_tokens": 173440} | |
| {"current_steps": 560, "total_steps": 1800, "loss": 0.0156, "lr": 4.354614535564588e-05, "epoch": 6.222222222222222, "percentage": 31.11, "elapsed_time": "0:01:26", "remaining_time": "0:03:10", "throughput": 2030.92, "total_tokens": 174976} | |
| {"current_steps": 565, "total_steps": 1800, "loss": 0.2395, "lr": 4.3382725201495723e-05, "epoch": 6.277777777777778, "percentage": 31.39, "elapsed_time": "0:01:26", "remaining_time": "0:03:09", "throughput": 2033.34, "total_tokens": 176608} | |
| {"current_steps": 570, "total_steps": 1800, "loss": 0.1044, "lr": 4.321757675893596e-05, "epoch": 6.333333333333333, "percentage": 31.67, "elapsed_time": "0:01:27", "remaining_time": "0:03:08", "throughput": 2034.63, "total_tokens": 178144} | |
| {"current_steps": 575, "total_steps": 1800, "loss": 0.0359, "lr": 4.305071555472534e-05, "epoch": 6.388888888888889, "percentage": 31.94, "elapsed_time": "0:01:28", "remaining_time": "0:03:08", "throughput": 2035.91, "total_tokens": 179680} | |
| {"current_steps": 580, "total_steps": 1800, "loss": 0.0707, "lr": 4.288215727665129e-05, "epoch": 6.444444444444445, "percentage": 32.22, "elapsed_time": "0:01:28", "remaining_time": "0:03:07", "throughput": 2037.46, "total_tokens": 181248} | |
| {"current_steps": 585, "total_steps": 1800, "loss": 0.0741, "lr": 4.2711917772055e-05, "epoch": 6.5, "percentage": 32.5, "elapsed_time": "0:01:29", "remaining_time": "0:03:06", "throughput": 2039.41, "total_tokens": 182848} | |
| {"current_steps": 590, "total_steps": 1800, "loss": 0.0499, "lr": 4.254001304634151e-05, "epoch": 6.555555555555555, "percentage": 32.78, "elapsed_time": "0:01:30", "remaining_time": "0:03:05", "throughput": 2039.97, "total_tokens": 184352} | |
| {"current_steps": 595, "total_steps": 1800, "loss": 0.0484, "lr": 4.2366459261474933e-05, "epoch": 6.611111111111111, "percentage": 33.06, "elapsed_time": "0:01:31", "remaining_time": "0:03:04", "throughput": 2041.84, "total_tokens": 185952} | |
| {"current_steps": 600, "total_steps": 1800, "loss": 0.1036, "lr": 4.2191272734458955e-05, "epoch": 6.666666666666667, "percentage": 33.33, "elapsed_time": "0:01:31", "remaining_time": "0:03:03", "throughput": 2043.0, "total_tokens": 187488} | |
| {"current_steps": 605, "total_steps": 1800, "loss": 0.0225, "lr": 4.201446993580276e-05, "epoch": 6.722222222222222, "percentage": 33.61, "elapsed_time": "0:01:32", "remaining_time": "0:03:02", "throughput": 2044.47, "total_tokens": 189056} | |
| {"current_steps": 610, "total_steps": 1800, "loss": 0.0768, "lr": 4.183606748797251e-05, "epoch": 6.777777777777778, "percentage": 33.89, "elapsed_time": "0:01:33", "remaining_time": "0:03:01", "throughput": 2045.57, "total_tokens": 190592} | |
| {"current_steps": 615, "total_steps": 1800, "loss": 0.0737, "lr": 4.1656082163828566e-05, "epoch": 6.833333333333333, "percentage": 34.17, "elapsed_time": "0:01:33", "remaining_time": "0:03:00", "throughput": 2047.36, "total_tokens": 192192} | |
| {"current_steps": 620, "total_steps": 1800, "loss": 0.156, "lr": 4.147453088504854e-05, "epoch": 6.888888888888889, "percentage": 34.44, "elapsed_time": "0:01:34", "remaining_time": "0:02:59", "throughput": 2049.48, "total_tokens": 193824} | |
| {"current_steps": 625, "total_steps": 1800, "loss": 0.0192, "lr": 4.129143072053638e-05, "epoch": 6.944444444444445, "percentage": 34.72, "elapsed_time": "0:01:35", "remaining_time": "0:02:59", "throughput": 2050.82, "total_tokens": 195392} | |
| {"current_steps": 630, "total_steps": 1800, "loss": 0.0313, "lr": 4.110679888481763e-05, "epoch": 7.0, "percentage": 35.0, "elapsed_time": "0:01:36", "remaining_time": "0:02:58", "throughput": 2050.78, "total_tokens": 196896} | |
| {"current_steps": 630, "total_steps": 1800, "eval_loss": 0.032164059579372406, "epoch": 7.0, "percentage": 35.0, "elapsed_time": "0:01:36", "remaining_time": "0:02:59", "throughput": 2039.66, "total_tokens": 196896} | |
| {"current_steps": 635, "total_steps": 1800, "loss": 0.1462, "lr": 4.09206527364209e-05, "epoch": 7.055555555555555, "percentage": 35.28, "elapsed_time": "0:01:37", "remaining_time": "0:02:59", "throughput": 2028.31, "total_tokens": 198496} | |
| {"current_steps": 640, "total_steps": 1800, "loss": 0.0053, "lr": 4.073300977624594e-05, "epoch": 7.111111111111111, "percentage": 35.56, "elapsed_time": "0:01:38", "remaining_time": "0:02:58", "throughput": 2029.8, "total_tokens": 200064} | |
| {"current_steps": 645, "total_steps": 1800, "loss": 0.1039, "lr": 4.054388764591822e-05, "epoch": 7.166666666666667, "percentage": 35.83, "elapsed_time": "0:01:39", "remaining_time": "0:02:57", "throughput": 2030.96, "total_tokens": 201600} | |
| {"current_steps": 650, "total_steps": 1800, "loss": 0.013, "lr": 4.035330412613035e-05, "epoch": 7.222222222222222, "percentage": 36.11, "elapsed_time": "0:01:39", "remaining_time": "0:02:56", "throughput": 2032.46, "total_tokens": 203168} | |
| {"current_steps": 655, "total_steps": 1800, "loss": 0.0344, "lr": 4.0161277134970345e-05, "epoch": 7.277777777777778, "percentage": 36.39, "elapsed_time": "0:01:40", "remaining_time": "0:02:55", "throughput": 2033.56, "total_tokens": 204704} | |
| {"current_steps": 660, "total_steps": 1800, "loss": 0.0252, "lr": 3.996782472623705e-05, "epoch": 7.333333333333333, "percentage": 36.67, "elapsed_time": "0:01:41", "remaining_time": "0:02:55", "throughput": 2035.07, "total_tokens": 206272} | |
| {"current_steps": 665, "total_steps": 1800, "loss": 0.0581, "lr": 3.977296508774278e-05, "epoch": 7.388888888888889, "percentage": 36.94, "elapsed_time": "0:01:42", "remaining_time": "0:02:54", "throughput": 2036.88, "total_tokens": 207872} | |
| {"current_steps": 670, "total_steps": 1800, "loss": 0.0441, "lr": 3.957671653960337e-05, "epoch": 7.444444444444445, "percentage": 37.22, "elapsed_time": "0:01:42", "remaining_time": "0:02:53", "throughput": 2037.78, "total_tokens": 209408} | |
| {"current_steps": 675, "total_steps": 1800, "loss": 0.0953, "lr": 3.9379097532515725e-05, "epoch": 7.5, "percentage": 37.5, "elapsed_time": "0:01:43", "remaining_time": "0:02:52", "throughput": 2038.58, "total_tokens": 210912} | |
| {"current_steps": 680, "total_steps": 1800, "loss": 0.1051, "lr": 3.918012664602317e-05, "epoch": 7.555555555555555, "percentage": 37.78, "elapsed_time": "0:01:44", "remaining_time": "0:02:51", "throughput": 2039.97, "total_tokens": 212480} | |
| {"current_steps": 685, "total_steps": 1800, "loss": 0.07, "lr": 3.897982258676867e-05, "epoch": 7.611111111111111, "percentage": 38.06, "elapsed_time": "0:01:44", "remaining_time": "0:02:50", "throughput": 2040.96, "total_tokens": 214016} | |
| {"current_steps": 690, "total_steps": 1800, "loss": 0.0754, "lr": 3.8778204186736076e-05, "epoch": 7.666666666666667, "percentage": 38.33, "elapsed_time": "0:01:45", "remaining_time": "0:02:49", "throughput": 2042.19, "total_tokens": 215584} | |
| {"current_steps": 695, "total_steps": 1800, "loss": 0.0418, "lr": 3.8575290401479586e-05, "epoch": 7.722222222222222, "percentage": 38.61, "elapsed_time": "0:01:46", "remaining_time": "0:02:48", "throughput": 2044.11, "total_tokens": 217216} | |
| {"current_steps": 700, "total_steps": 1800, "loss": 0.1331, "lr": 3.837110030834161e-05, "epoch": 7.777777777777778, "percentage": 38.89, "elapsed_time": "0:01:46", "remaining_time": "0:02:48", "throughput": 2045.1, "total_tokens": 218752} | |
| {"current_steps": 705, "total_steps": 1800, "loss": 0.0038, "lr": 3.8165653104659185e-05, "epoch": 7.833333333333333, "percentage": 39.17, "elapsed_time": "0:01:47", "remaining_time": "0:02:47", "throughput": 2045.79, "total_tokens": 220256} | |
| {"current_steps": 710, "total_steps": 1800, "loss": 0.1427, "lr": 3.79589681059591e-05, "epoch": 7.888888888888889, "percentage": 39.44, "elapsed_time": "0:01:48", "remaining_time": "0:02:46", "throughput": 2046.63, "total_tokens": 221792} | |
| {"current_steps": 715, "total_steps": 1800, "loss": 0.025, "lr": 3.775106474414188e-05, "epoch": 7.944444444444445, "percentage": 39.72, "elapsed_time": "0:01:49", "remaining_time": "0:02:45", "throughput": 2048.56, "total_tokens": 223424} | |
| {"current_steps": 720, "total_steps": 1800, "loss": 0.0137, "lr": 3.75419625656549e-05, "epoch": 8.0, "percentage": 40.0, "elapsed_time": "0:01:49", "remaining_time": "0:02:44", "throughput": 2049.53, "total_tokens": 225024} | |
| {"current_steps": 720, "total_steps": 1800, "eval_loss": 0.03138193488121033, "epoch": 8.0, "percentage": 40.0, "elapsed_time": "0:01:50", "remaining_time": "0:02:45", "throughput": 2039.78, "total_tokens": 225024} | |
| {"current_steps": 725, "total_steps": 1800, "loss": 0.0289, "lr": 3.7331681229654635e-05, "epoch": 8.055555555555555, "percentage": 40.28, "elapsed_time": "0:01:51", "remaining_time": "0:02:45", "throughput": 2028.82, "total_tokens": 226560} | |
| {"current_steps": 730, "total_steps": 1800, "loss": 0.0468, "lr": 3.712024050615843e-05, "epoch": 8.11111111111111, "percentage": 40.56, "elapsed_time": "0:01:52", "remaining_time": "0:02:44", "throughput": 2030.1, "total_tokens": 228128} | |
| {"current_steps": 735, "total_steps": 1800, "loss": 0.0158, "lr": 3.690766027418573e-05, "epoch": 8.166666666666666, "percentage": 40.83, "elapsed_time": "0:01:53", "remaining_time": "0:02:43", "throughput": 2031.67, "total_tokens": 229728} | |
| {"current_steps": 740, "total_steps": 1800, "loss": 0.0354, "lr": 3.6693960519889106e-05, "epoch": 8.222222222222221, "percentage": 41.11, "elapsed_time": "0:01:53", "remaining_time": "0:02:42", "throughput": 2033.21, "total_tokens": 231328} | |
| {"current_steps": 745, "total_steps": 1800, "loss": 0.0475, "lr": 3.6479161334675296e-05, "epoch": 8.277777777777779, "percentage": 41.39, "elapsed_time": "0:01:54", "remaining_time": "0:02:42", "throughput": 2034.47, "total_tokens": 232896} | |
| {"current_steps": 750, "total_steps": 1800, "loss": 0.1457, "lr": 3.626328291331618e-05, "epoch": 8.333333333333334, "percentage": 41.67, "elapsed_time": "0:01:55", "remaining_time": "0:02:41", "throughput": 2036.0, "total_tokens": 234496} | |
| {"current_steps": 755, "total_steps": 1800, "loss": 0.092, "lr": 3.60463455520502e-05, "epoch": 8.38888888888889, "percentage": 41.94, "elapsed_time": "0:01:55", "remaining_time": "0:02:40", "throughput": 2037.22, "total_tokens": 236064} | |
| {"current_steps": 760, "total_steps": 1800, "loss": 0.0455, "lr": 3.582836964667408e-05, "epoch": 8.444444444444445, "percentage": 42.22, "elapsed_time": "0:01:56", "remaining_time": "0:02:39", "throughput": 2038.5, "total_tokens": 237632} | |
| {"current_steps": 765, "total_steps": 1800, "loss": 0.034, "lr": 3.560937569062538e-05, "epoch": 8.5, "percentage": 42.5, "elapsed_time": "0:01:57", "remaining_time": "0:02:38", "throughput": 2039.79, "total_tokens": 239200} | |
| {"current_steps": 770, "total_steps": 1800, "loss": 0.0325, "lr": 3.538938427305573e-05, "epoch": 8.555555555555555, "percentage": 42.78, "elapsed_time": "0:01:57", "remaining_time": "0:02:37", "throughput": 2041.63, "total_tokens": 240832} | |
| {"current_steps": 775, "total_steps": 1800, "loss": 0.1802, "lr": 3.516841607689501e-05, "epoch": 8.61111111111111, "percentage": 43.06, "elapsed_time": "0:01:58", "remaining_time": "0:02:36", "throughput": 2042.05, "total_tokens": 242304} | |
| {"current_steps": 780, "total_steps": 1800, "loss": 0.0515, "lr": 3.494649187690695e-05, "epoch": 8.666666666666666, "percentage": 43.33, "elapsed_time": "0:01:59", "remaining_time": "0:02:36", "throughput": 2042.97, "total_tokens": 243840} | |
| {"current_steps": 785, "total_steps": 1800, "loss": 0.0246, "lr": 3.4723632537735846e-05, "epoch": 8.722222222222221, "percentage": 43.61, "elapsed_time": "0:02:00", "remaining_time": "0:02:35", "throughput": 2043.87, "total_tokens": 245376} | |
| {"current_steps": 790, "total_steps": 1800, "loss": 0.0403, "lr": 3.449985901194498e-05, "epoch": 8.777777777777779, "percentage": 43.89, "elapsed_time": "0:02:00", "remaining_time": "0:02:34", "throughput": 2044.45, "total_tokens": 246880} | |
| {"current_steps": 795, "total_steps": 1800, "loss": 0.0739, "lr": 3.427519233804667e-05, "epoch": 8.833333333333334, "percentage": 44.17, "elapsed_time": "0:02:01", "remaining_time": "0:02:33", "throughput": 2045.61, "total_tokens": 248448} | |
| {"current_steps": 800, "total_steps": 1800, "loss": 0.0263, "lr": 3.404965363852437e-05, "epoch": 8.88888888888889, "percentage": 44.44, "elapsed_time": "0:02:02", "remaining_time": "0:02:32", "throughput": 2046.71, "total_tokens": 250016} | |
| {"current_steps": 805, "total_steps": 1800, "loss": 0.0715, "lr": 3.382326411784672e-05, "epoch": 8.944444444444445, "percentage": 44.72, "elapsed_time": "0:02:02", "remaining_time": "0:02:31", "throughput": 2047.83, "total_tokens": 251584} | |
| {"current_steps": 810, "total_steps": 1800, "loss": 0.0757, "lr": 3.359604506047403e-05, "epoch": 9.0, "percentage": 45.0, "elapsed_time": "0:02:03", "remaining_time": "0:02:31", "throughput": 2048.34, "total_tokens": 253152} | |
| {"current_steps": 810, "total_steps": 1800, "eval_loss": 0.030915316194295883, "epoch": 9.0, "percentage": 45.0, "elapsed_time": "0:02:04", "remaining_time": "0:02:31", "throughput": 2039.7, "total_tokens": 253152} | |
| {"current_steps": 815, "total_steps": 1800, "loss": 0.0109, "lr": 3.336801782885712e-05, "epoch": 9.055555555555555, "percentage": 45.28, "elapsed_time": "0:02:05", "remaining_time": "0:02:31", "throughput": 2030.06, "total_tokens": 254720} | |
| {"current_steps": 820, "total_steps": 1800, "loss": 0.0521, "lr": 3.313920386142892e-05, "epoch": 9.11111111111111, "percentage": 45.56, "elapsed_time": "0:02:06", "remaining_time": "0:02:30", "throughput": 2031.13, "total_tokens": 256288} | |
| {"current_steps": 825, "total_steps": 1800, "loss": 0.1917, "lr": 3.290962467058891e-05, "epoch": 9.166666666666666, "percentage": 45.83, "elapsed_time": "0:02:06", "remaining_time": "0:02:29", "throughput": 2031.96, "total_tokens": 257824} | |
| {"current_steps": 830, "total_steps": 1800, "loss": 0.0211, "lr": 3.267930184068057e-05, "epoch": 9.222222222222221, "percentage": 46.11, "elapsed_time": "0:02:07", "remaining_time": "0:02:29", "throughput": 2033.05, "total_tokens": 259392} | |
| {"current_steps": 835, "total_steps": 1800, "loss": 0.0899, "lr": 3.244825702596205e-05, "epoch": 9.277777777777779, "percentage": 46.39, "elapsed_time": "0:02:08", "remaining_time": "0:02:28", "throughput": 2033.59, "total_tokens": 260896} | |
| {"current_steps": 840, "total_steps": 1800, "loss": 0.0153, "lr": 3.2216511948570374e-05, "epoch": 9.333333333333334, "percentage": 46.67, "elapsed_time": "0:02:08", "remaining_time": "0:02:27", "throughput": 2034.69, "total_tokens": 262464} | |
| {"current_steps": 845, "total_steps": 1800, "loss": 0.0088, "lr": 3.198408839647911e-05, "epoch": 9.38888888888889, "percentage": 46.94, "elapsed_time": "0:02:09", "remaining_time": "0:02:26", "throughput": 2035.54, "total_tokens": 264000} | |
| {"current_steps": 850, "total_steps": 1800, "loss": 0.0128, "lr": 3.1751008221450025e-05, "epoch": 9.444444444444445, "percentage": 47.22, "elapsed_time": "0:02:10", "remaining_time": "0:02:25", "throughput": 2036.36, "total_tokens": 265536} | |
| {"current_steps": 855, "total_steps": 1800, "loss": 0.0711, "lr": 3.151729333697854e-05, "epoch": 9.5, "percentage": 47.5, "elapsed_time": "0:02:11", "remaining_time": "0:02:24", "throughput": 2036.94, "total_tokens": 267040} | |
| {"current_steps": 860, "total_steps": 1800, "loss": 0.1078, "lr": 3.1282965716233594e-05, "epoch": 9.555555555555555, "percentage": 47.78, "elapsed_time": "0:02:11", "remaining_time": "0:02:24", "throughput": 2037.73, "total_tokens": 268576} | |
| {"current_steps": 865, "total_steps": 1800, "loss": 0.0054, "lr": 3.104804738999169e-05, "epoch": 9.61111111111111, "percentage": 48.06, "elapsed_time": "0:02:12", "remaining_time": "0:02:23", "throughput": 2038.76, "total_tokens": 270144} | |
| {"current_steps": 870, "total_steps": 1800, "loss": 0.0161, "lr": 3.0812560444565745e-05, "epoch": 9.666666666666666, "percentage": 48.33, "elapsed_time": "0:02:13", "remaining_time": "0:02:22", "throughput": 2039.28, "total_tokens": 271648} | |
| {"current_steps": 875, "total_steps": 1800, "loss": 0.048, "lr": 3.057652701972848e-05, "epoch": 9.722222222222221, "percentage": 48.61, "elapsed_time": "0:02:13", "remaining_time": "0:02:21", "throughput": 2040.79, "total_tokens": 273280} | |
| {"current_steps": 880, "total_steps": 1800, "loss": 0.1125, "lr": 3.0339969306631005e-05, "epoch": 9.777777777777779, "percentage": 48.89, "elapsed_time": "0:02:14", "remaining_time": "0:02:20", "throughput": 2042.05, "total_tokens": 274880} | |
| {"current_steps": 885, "total_steps": 1800, "loss": 0.0188, "lr": 3.0102909545716396e-05, "epoch": 9.833333333333334, "percentage": 49.17, "elapsed_time": "0:02:15", "remaining_time": "0:02:19", "throughput": 2043.27, "total_tokens": 276480} | |
| {"current_steps": 890, "total_steps": 1800, "loss": 0.0734, "lr": 2.9865370024628775e-05, "epoch": 9.88888888888889, "percentage": 49.44, "elapsed_time": "0:02:16", "remaining_time": "0:02:19", "throughput": 2044.51, "total_tokens": 278080} | |
| {"current_steps": 895, "total_steps": 1800, "loss": 0.0415, "lr": 2.9627373076117863e-05, "epoch": 9.944444444444445, "percentage": 49.72, "elapsed_time": "0:02:16", "remaining_time": "0:02:18", "throughput": 2045.74, "total_tokens": 279680} | |
| {"current_steps": 900, "total_steps": 1800, "loss": 0.1181, "lr": 2.9388941075939334e-05, "epoch": 10.0, "percentage": 50.0, "elapsed_time": "0:02:17", "remaining_time": "0:02:17", "throughput": 2046.67, "total_tokens": 281312} | |
| {"current_steps": 900, "total_steps": 1800, "eval_loss": 0.03135865181684494, "epoch": 10.0, "percentage": 50.0, "elapsed_time": "0:02:17", "remaining_time": "0:02:17", "throughput": 2038.81, "total_tokens": 281312} | |
| {"current_steps": 905, "total_steps": 1800, "loss": 0.0198, "lr": 2.9150096440751107e-05, "epoch": 10.055555555555555, "percentage": 50.28, "elapsed_time": "0:02:19", "remaining_time": "0:02:17", "throughput": 2030.09, "total_tokens": 282848} | |
| {"current_steps": 910, "total_steps": 1800, "loss": 0.0467, "lr": 2.8910861626005776e-05, "epoch": 10.11111111111111, "percentage": 50.56, "elapsed_time": "0:02:20", "remaining_time": "0:02:16", "throughput": 2031.33, "total_tokens": 284448} | |
| {"current_steps": 915, "total_steps": 1800, "loss": 0.0683, "lr": 2.8671259123839472e-05, "epoch": 10.166666666666666, "percentage": 50.83, "elapsed_time": "0:02:20", "remaining_time": "0:02:16", "throughput": 2032.15, "total_tokens": 285984} | |
| {"current_steps": 920, "total_steps": 1800, "loss": 0.0251, "lr": 2.843131146095719e-05, "epoch": 10.222222222222221, "percentage": 51.11, "elapsed_time": "0:02:21", "remaining_time": "0:02:15", "throughput": 2032.75, "total_tokens": 287488} | |
| {"current_steps": 925, "total_steps": 1800, "loss": 0.0927, "lr": 2.8191041196514873e-05, "epoch": 10.277777777777779, "percentage": 51.39, "elapsed_time": "0:02:22", "remaining_time": "0:02:14", "throughput": 2033.61, "total_tokens": 289024} | |
| {"current_steps": 930, "total_steps": 1800, "loss": 0.0875, "lr": 2.795047091999849e-05, "epoch": 10.333333333333334, "percentage": 51.67, "elapsed_time": "0:02:22", "remaining_time": "0:02:13", "throughput": 2034.23, "total_tokens": 290528} | |
| {"current_steps": 935, "total_steps": 1800, "loss": 0.0106, "lr": 2.770962324910027e-05, "epoch": 10.38888888888889, "percentage": 51.94, "elapsed_time": "0:02:23", "remaining_time": "0:02:12", "throughput": 2035.49, "total_tokens": 292128} | |
| {"current_steps": 940, "total_steps": 1800, "loss": 0.0566, "lr": 2.7468520827592197e-05, "epoch": 10.444444444444445, "percentage": 52.22, "elapsed_time": "0:02:24", "remaining_time": "0:02:11", "throughput": 2036.74, "total_tokens": 293728} | |
| {"current_steps": 945, "total_steps": 1800, "loss": 0.0827, "lr": 2.7227186323197162e-05, "epoch": 10.5, "percentage": 52.5, "elapsed_time": "0:02:24", "remaining_time": "0:02:11", "throughput": 2037.49, "total_tokens": 295264} | |
| {"current_steps": 950, "total_steps": 1800, "loss": 0.0499, "lr": 2.6985642425457757e-05, "epoch": 10.555555555555555, "percentage": 52.78, "elapsed_time": "0:02:25", "remaining_time": "0:02:10", "throughput": 2038.86, "total_tokens": 296896} | |
| {"current_steps": 955, "total_steps": 1800, "loss": 0.027, "lr": 2.674391184360313e-05, "epoch": 10.61111111111111, "percentage": 53.06, "elapsed_time": "0:02:26", "remaining_time": "0:02:09", "throughput": 2039.4, "total_tokens": 298400} | |
| {"current_steps": 960, "total_steps": 1800, "loss": 0.0128, "lr": 2.650201730441392e-05, "epoch": 10.666666666666666, "percentage": 53.33, "elapsed_time": "0:02:27", "remaining_time": "0:02:08", "throughput": 2040.07, "total_tokens": 299936} | |
| {"current_steps": 965, "total_steps": 1800, "loss": 0.0147, "lr": 2.6259981550085504e-05, "epoch": 10.722222222222221, "percentage": 53.61, "elapsed_time": "0:02:27", "remaining_time": "0:02:07", "throughput": 2041.27, "total_tokens": 301536} | |
| {"current_steps": 970, "total_steps": 1800, "loss": 0.0585, "lr": 2.60178273360899e-05, "epoch": 10.777777777777779, "percentage": 53.89, "elapsed_time": "0:02:28", "remaining_time": "0:02:06", "throughput": 2042.24, "total_tokens": 303104} | |
| {"current_steps": 975, "total_steps": 1800, "loss": 0.0269, "lr": 2.5775577429036345e-05, "epoch": 10.833333333333334, "percentage": 54.17, "elapsed_time": "0:02:29", "remaining_time": "0:02:06", "throughput": 2043.16, "total_tokens": 304672} | |
| {"current_steps": 980, "total_steps": 1800, "loss": 0.1381, "lr": 2.553325460453086e-05, "epoch": 10.88888888888889, "percentage": 54.44, "elapsed_time": "0:02:29", "remaining_time": "0:02:05", "throughput": 2044.24, "total_tokens": 306272} | |
| {"current_steps": 985, "total_steps": 1800, "loss": 0.0531, "lr": 2.5290881645034932e-05, "epoch": 10.944444444444445, "percentage": 54.72, "elapsed_time": "0:02:30", "remaining_time": "0:02:04", "throughput": 2044.88, "total_tokens": 307808} | |
| {"current_steps": 990, "total_steps": 1800, "loss": 0.0914, "lr": 2.504848133772358e-05, "epoch": 11.0, "percentage": 55.0, "elapsed_time": "0:02:31", "remaining_time": "0:02:03", "throughput": 2044.67, "total_tokens": 309280} | |
| {"current_steps": 990, "total_steps": 1800, "eval_loss": 0.03227944299578667, "epoch": 11.0, "percentage": 55.0, "elapsed_time": "0:02:31", "remaining_time": "0:02:04", "throughput": 2037.53, "total_tokens": 309280} | |
| {"current_steps": 995, "total_steps": 1800, "loss": 0.1405, "lr": 2.4806076472342997e-05, "epoch": 11.055555555555555, "percentage": 55.28, "elapsed_time": "0:02:33", "remaining_time": "0:02:03", "throughput": 2030.12, "total_tokens": 310816} | |
| {"current_steps": 1000, "total_steps": 1800, "loss": 0.0503, "lr": 2.4563689839067913e-05, "epoch": 11.11111111111111, "percentage": 55.56, "elapsed_time": "0:02:33", "remaining_time": "0:02:03", "throughput": 2031.25, "total_tokens": 312416} | |
| {"current_steps": 1005, "total_steps": 1800, "loss": 0.0336, "lr": 2.432134422635893e-05, "epoch": 11.166666666666666, "percentage": 55.83, "elapsed_time": "0:02:34", "remaining_time": "0:02:02", "throughput": 2032.54, "total_tokens": 314048} | |
| {"current_steps": 1010, "total_steps": 1800, "loss": 0.0072, "lr": 2.4079062418820002e-05, "epoch": 11.222222222222221, "percentage": 56.11, "elapsed_time": "0:02:35", "remaining_time": "0:02:01", "throughput": 2033.46, "total_tokens": 315616} | |
| {"current_steps": 1015, "total_steps": 1800, "loss": 0.0356, "lr": 2.3836867195056335e-05, "epoch": 11.277777777777779, "percentage": 56.39, "elapsed_time": "0:02:35", "remaining_time": "0:02:00", "throughput": 2033.95, "total_tokens": 317120} | |
| {"current_steps": 1020, "total_steps": 1800, "loss": 0.0729, "lr": 2.3594781325532784e-05, "epoch": 11.333333333333334, "percentage": 56.67, "elapsed_time": "0:02:36", "remaining_time": "0:01:59", "throughput": 2034.83, "total_tokens": 318688} | |
| {"current_steps": 1025, "total_steps": 1800, "loss": 0.0873, "lr": 2.3352827570433036e-05, "epoch": 11.38888888888889, "percentage": 56.94, "elapsed_time": "0:02:37", "remaining_time": "0:01:58", "throughput": 2035.57, "total_tokens": 320224} | |
| {"current_steps": 1030, "total_steps": 1800, "loss": 0.0398, "lr": 2.3111028677519804e-05, "epoch": 11.444444444444445, "percentage": 57.22, "elapsed_time": "0:02:38", "remaining_time": "0:01:58", "throughput": 2036.51, "total_tokens": 321792} | |
| {"current_steps": 1035, "total_steps": 1800, "loss": 0.0751, "lr": 2.2869407379996088e-05, "epoch": 11.5, "percentage": 57.5, "elapsed_time": "0:02:38", "remaining_time": "0:01:57", "throughput": 2037.45, "total_tokens": 323360} | |
| {"current_steps": 1040, "total_steps": 1800, "loss": 0.0256, "lr": 2.2627986394367938e-05, "epoch": 11.555555555555555, "percentage": 57.78, "elapsed_time": "0:02:39", "remaining_time": "0:01:56", "throughput": 2038.16, "total_tokens": 324896} | |
| {"current_steps": 1045, "total_steps": 1800, "loss": 0.0242, "lr": 2.238678841830867e-05, "epoch": 11.61111111111111, "percentage": 58.06, "elapsed_time": "0:02:40", "remaining_time": "0:01:55", "throughput": 2039.27, "total_tokens": 326496} | |
| {"current_steps": 1050, "total_steps": 1800, "loss": 0.0677, "lr": 2.2145836128524902e-05, "epoch": 11.666666666666666, "percentage": 58.33, "elapsed_time": "0:02:40", "remaining_time": "0:01:54", "throughput": 2040.12, "total_tokens": 328064} | |
| {"current_steps": 1055, "total_steps": 1800, "loss": 0.0345, "lr": 2.1905152178624595e-05, "epoch": 11.722222222222221, "percentage": 58.61, "elapsed_time": "0:02:41", "remaining_time": "0:01:54", "throughput": 2041.17, "total_tokens": 329664} | |
| {"current_steps": 1060, "total_steps": 1800, "loss": 0.0826, "lr": 2.1664759196987182e-05, "epoch": 11.777777777777779, "percentage": 58.89, "elapsed_time": "0:02:42", "remaining_time": "0:01:53", "throughput": 2042.41, "total_tokens": 331296} | |
| {"current_steps": 1065, "total_steps": 1800, "loss": 0.013, "lr": 2.1424679784636144e-05, "epoch": 11.833333333333334, "percentage": 59.17, "elapsed_time": "0:02:42", "remaining_time": "0:01:52", "throughput": 2043.04, "total_tokens": 332832} | |
| {"current_steps": 1070, "total_steps": 1800, "loss": 0.0164, "lr": 2.118493651311413e-05, "epoch": 11.88888888888889, "percentage": 59.44, "elapsed_time": "0:02:43", "remaining_time": "0:01:51", "throughput": 2043.87, "total_tokens": 334400} | |
| {"current_steps": 1075, "total_steps": 1800, "loss": 0.0382, "lr": 2.0945551922360818e-05, "epoch": 11.944444444444445, "percentage": 59.72, "elapsed_time": "0:02:44", "remaining_time": "0:01:50", "throughput": 2044.71, "total_tokens": 335968} | |
| {"current_steps": 1080, "total_steps": 1800, "loss": 0.071, "lr": 2.070654851859383e-05, "epoch": 12.0, "percentage": 60.0, "elapsed_time": "0:02:45", "remaining_time": "0:01:50", "throughput": 2045.0, "total_tokens": 337536} | |
| {"current_steps": 1080, "total_steps": 1800, "eval_loss": 0.03230804204940796, "epoch": 12.0, "percentage": 60.0, "elapsed_time": "0:02:45", "remaining_time": "0:01:50", "throughput": 2038.43, "total_tokens": 337536} | |
| {"current_steps": 1085, "total_steps": 1800, "loss": 0.0902, "lr": 2.0467948772192713e-05, "epoch": 12.055555555555555, "percentage": 60.28, "elapsed_time": "0:02:46", "remaining_time": "0:01:50", "throughput": 2031.16, "total_tokens": 339072} | |
| {"current_steps": 1090, "total_steps": 1800, "loss": 0.0549, "lr": 2.022977511558638e-05, "epoch": 12.11111111111111, "percentage": 60.56, "elapsed_time": "0:02:47", "remaining_time": "0:01:49", "throughput": 2032.09, "total_tokens": 340640} | |
| {"current_steps": 1095, "total_steps": 1800, "loss": 0.0427, "lr": 1.9992049941144066e-05, "epoch": 12.166666666666666, "percentage": 60.83, "elapsed_time": "0:02:48", "remaining_time": "0:01:48", "throughput": 2032.96, "total_tokens": 342208} | |
| {"current_steps": 1100, "total_steps": 1800, "loss": 0.032, "lr": 1.9754795599070068e-05, "epoch": 12.222222222222221, "percentage": 61.11, "elapsed_time": "0:02:49", "remaining_time": "0:01:47", "throughput": 2033.88, "total_tokens": 343776} | |
| {"current_steps": 1105, "total_steps": 1800, "loss": 0.0227, "lr": 1.9518034395302414e-05, "epoch": 12.277777777777779, "percentage": 61.39, "elapsed_time": "0:02:49", "remaining_time": "0:01:46", "throughput": 2035.01, "total_tokens": 345376} | |
| {"current_steps": 1110, "total_steps": 1800, "loss": 0.0637, "lr": 1.9281788589415804e-05, "epoch": 12.333333333333334, "percentage": 61.67, "elapsed_time": "0:02:50", "remaining_time": "0:01:45", "throughput": 2036.28, "total_tokens": 347008} | |
| {"current_steps": 1115, "total_steps": 1800, "loss": 0.0281, "lr": 1.9046080392528735e-05, "epoch": 12.38888888888889, "percentage": 61.94, "elapsed_time": "0:02:51", "remaining_time": "0:01:45", "throughput": 2037.17, "total_tokens": 348576} | |
| {"current_steps": 1120, "total_steps": 1800, "loss": 0.0172, "lr": 1.8810931965215356e-05, "epoch": 12.444444444444445, "percentage": 62.22, "elapsed_time": "0:02:51", "remaining_time": "0:01:44", "throughput": 2037.88, "total_tokens": 350112} | |
| {"current_steps": 1125, "total_steps": 1800, "loss": 0.0398, "lr": 1.857636541542195e-05, "epoch": 12.5, "percentage": 62.5, "elapsed_time": "0:02:52", "remaining_time": "0:01:43", "throughput": 2038.55, "total_tokens": 351648} | |
| {"current_steps": 1130, "total_steps": 1800, "loss": 0.0236, "lr": 1.8342402796388445e-05, "epoch": 12.555555555555555, "percentage": 62.78, "elapsed_time": "0:02:53", "remaining_time": "0:01:42", "throughput": 2039.48, "total_tokens": 353216} | |
| {"current_steps": 1135, "total_steps": 1800, "loss": 0.0863, "lr": 1.8109066104575023e-05, "epoch": 12.61111111111111, "percentage": 63.06, "elapsed_time": "0:02:53", "remaining_time": "0:01:41", "throughput": 2040.4, "total_tokens": 354784} | |
| {"current_steps": 1140, "total_steps": 1800, "loss": 0.1332, "lr": 1.7876377277594053e-05, "epoch": 12.666666666666666, "percentage": 63.33, "elapsed_time": "0:02:54", "remaining_time": "0:01:41", "throughput": 2041.32, "total_tokens": 356352} | |
| {"current_steps": 1145, "total_steps": 1800, "loss": 0.0848, "lr": 1.764435819214762e-05, "epoch": 12.722222222222221, "percentage": 63.61, "elapsed_time": "0:02:55", "remaining_time": "0:01:40", "throughput": 2042.22, "total_tokens": 357920} | |
| {"current_steps": 1150, "total_steps": 1800, "loss": 0.1068, "lr": 1.7413030661970742e-05, "epoch": 12.777777777777779, "percentage": 63.89, "elapsed_time": "0:02:55", "remaining_time": "0:01:39", "throughput": 2042.94, "total_tokens": 359456} | |
| {"current_steps": 1155, "total_steps": 1800, "loss": 0.0084, "lr": 1.7182416435780454e-05, "epoch": 12.833333333333334, "percentage": 64.17, "elapsed_time": "0:02:56", "remaining_time": "0:01:38", "throughput": 2043.82, "total_tokens": 361024} | |
| {"current_steps": 1160, "total_steps": 1800, "loss": 0.0087, "lr": 1.695253719523115e-05, "epoch": 12.88888888888889, "percentage": 64.44, "elapsed_time": "0:02:57", "remaining_time": "0:01:37", "throughput": 2044.35, "total_tokens": 362528} | |
| {"current_steps": 1165, "total_steps": 1800, "loss": 0.0241, "lr": 1.672341455287605e-05, "epoch": 12.944444444444445, "percentage": 64.72, "elapsed_time": "0:02:58", "remaining_time": "0:01:37", "throughput": 2045.04, "total_tokens": 364064} | |
| {"current_steps": 1170, "total_steps": 1800, "loss": 0.0213, "lr": 1.649507005013532e-05, "epoch": 13.0, "percentage": 65.0, "elapsed_time": "0:02:58", "remaining_time": "0:01:36", "throughput": 2045.47, "total_tokens": 365632} | |
| {"current_steps": 1170, "total_steps": 1800, "eval_loss": 0.03144800662994385, "epoch": 13.0, "percentage": 65.0, "elapsed_time": "0:02:59", "remaining_time": "0:01:36", "throughput": 2039.53, "total_tokens": 365632} | |
| {"current_steps": 1175, "total_steps": 1800, "loss": 0.0552, "lr": 1.6267525155270773e-05, "epoch": 13.055555555555555, "percentage": 65.28, "elapsed_time": "0:03:00", "remaining_time": "0:01:36", "throughput": 2030.91, "total_tokens": 367232} | |
| {"current_steps": 1180, "total_steps": 1800, "loss": 0.0211, "lr": 1.6040801261367493e-05, "epoch": 13.11111111111111, "percentage": 65.56, "elapsed_time": "0:03:01", "remaining_time": "0:01:35", "throughput": 2031.69, "total_tokens": 368800} | |
| {"current_steps": 1185, "total_steps": 1800, "loss": 0.0226, "lr": 1.5814919684322545e-05, "epoch": 13.166666666666666, "percentage": 65.83, "elapsed_time": "0:03:02", "remaining_time": "0:01:34", "throughput": 2032.31, "total_tokens": 370336} | |
| {"current_steps": 1190, "total_steps": 1800, "loss": 0.0298, "lr": 1.5589901660840896e-05, "epoch": 13.222222222222221, "percentage": 66.11, "elapsed_time": "0:03:02", "remaining_time": "0:01:33", "throughput": 2032.91, "total_tokens": 371872} | |
| {"current_steps": 1195, "total_steps": 1800, "loss": 0.0291, "lr": 1.5365768346438797e-05, "epoch": 13.277777777777779, "percentage": 66.39, "elapsed_time": "0:03:03", "remaining_time": "0:01:32", "throughput": 2033.85, "total_tokens": 373472} | |
| {"current_steps": 1200, "total_steps": 1800, "loss": 0.0384, "lr": 1.5142540813454836e-05, "epoch": 13.333333333333334, "percentage": 66.67, "elapsed_time": "0:03:04", "remaining_time": "0:01:32", "throughput": 2034.6, "total_tokens": 375040} | |
| {"current_steps": 1205, "total_steps": 1800, "loss": 0.0593, "lr": 1.4920240049068748e-05, "epoch": 13.38888888888889, "percentage": 66.94, "elapsed_time": "0:03:05", "remaining_time": "0:01:31", "throughput": 2034.84, "total_tokens": 376512} | |
| {"current_steps": 1210, "total_steps": 1800, "loss": 0.0989, "lr": 1.4698886953328292e-05, "epoch": 13.444444444444445, "percentage": 67.22, "elapsed_time": "0:03:05", "remaining_time": "0:01:30", "throughput": 2035.78, "total_tokens": 378112} | |
| {"current_steps": 1215, "total_steps": 1800, "loss": 0.0304, "lr": 1.4478502337184274e-05, "epoch": 13.5, "percentage": 67.5, "elapsed_time": "0:03:06", "remaining_time": "0:01:29", "throughput": 2034.88, "total_tokens": 379648} | |
| {"current_steps": 1220, "total_steps": 1800, "loss": 0.1141, "lr": 1.4259106920533955e-05, "epoch": 13.555555555555555, "percentage": 67.78, "elapsed_time": "0:03:07", "remaining_time": "0:01:29", "throughput": 2035.51, "total_tokens": 381184} | |
| {"current_steps": 1225, "total_steps": 1800, "loss": 0.0132, "lr": 1.4040721330273062e-05, "epoch": 13.61111111111111, "percentage": 68.06, "elapsed_time": "0:03:07", "remaining_time": "0:01:28", "throughput": 2036.3, "total_tokens": 382752} | |
| {"current_steps": 1230, "total_steps": 1800, "loss": 0.0147, "lr": 1.3823366098356487e-05, "epoch": 13.666666666666666, "percentage": 68.33, "elapsed_time": "0:03:08", "remaining_time": "0:01:27", "throughput": 2037.1, "total_tokens": 384320} | |
| {"current_steps": 1235, "total_steps": 1800, "loss": 0.0466, "lr": 1.3607061659867892e-05, "epoch": 13.722222222222221, "percentage": 68.61, "elapsed_time": "0:03:09", "remaining_time": "0:01:26", "throughput": 2037.92, "total_tokens": 385888} | |
| {"current_steps": 1240, "total_steps": 1800, "loss": 0.0416, "lr": 1.3391828351098578e-05, "epoch": 13.777777777777779, "percentage": 68.89, "elapsed_time": "0:03:10", "remaining_time": "0:01:25", "throughput": 2038.93, "total_tokens": 387488} | |
| {"current_steps": 1245, "total_steps": 1800, "loss": 0.0721, "lr": 1.3177686407635417e-05, "epoch": 13.833333333333334, "percentage": 69.17, "elapsed_time": "0:03:10", "remaining_time": "0:01:25", "throughput": 2039.58, "total_tokens": 389024} | |
| {"current_steps": 1250, "total_steps": 1800, "loss": 0.0988, "lr": 1.29646559624584e-05, "epoch": 13.88888888888889, "percentage": 69.44, "elapsed_time": "0:03:11", "remaining_time": "0:01:24", "throughput": 2039.38, "total_tokens": 390528} | |
| {"current_steps": 1255, "total_steps": 1800, "loss": 0.0317, "lr": 1.2752757044047827e-05, "epoch": 13.944444444444445, "percentage": 69.72, "elapsed_time": "0:03:12", "remaining_time": "0:01:23", "throughput": 2040.2, "total_tokens": 392096} | |
| {"current_steps": 1260, "total_steps": 1800, "loss": 0.0429, "lr": 1.2542009574501246e-05, "epoch": 14.0, "percentage": 70.0, "elapsed_time": "0:03:12", "remaining_time": "0:01:22", "throughput": 2040.46, "total_tokens": 393632} | |
| {"current_steps": 1260, "total_steps": 1800, "eval_loss": 0.032425910234451294, "epoch": 14.0, "percentage": 70.0, "elapsed_time": "0:03:13", "remaining_time": "0:01:22", "throughput": 2035.0, "total_tokens": 393632} | |
| {"current_steps": 1265, "total_steps": 1800, "loss": 0.013, "lr": 1.2332433367660442e-05, "epoch": 14.055555555555555, "percentage": 70.28, "elapsed_time": "0:03:14", "remaining_time": "0:01:22", "throughput": 2026.37, "total_tokens": 395136} | |
| {"current_steps": 1270, "total_steps": 1800, "loss": 0.0371, "lr": 1.2124048127248644e-05, "epoch": 14.11111111111111, "percentage": 70.56, "elapsed_time": "0:03:15", "remaining_time": "0:01:21", "throughput": 2026.91, "total_tokens": 396672} | |
| {"current_steps": 1275, "total_steps": 1800, "loss": 0.0286, "lr": 1.1916873445017982e-05, "epoch": 14.166666666666666, "percentage": 70.83, "elapsed_time": "0:03:16", "remaining_time": "0:01:20", "throughput": 2027.84, "total_tokens": 398272} | |
| {"current_steps": 1280, "total_steps": 1800, "loss": 0.0367, "lr": 1.1710928798907556e-05, "epoch": 14.222222222222221, "percentage": 71.11, "elapsed_time": "0:03:17", "remaining_time": "0:01:20", "throughput": 2028.42, "total_tokens": 399808} | |
| {"current_steps": 1285, "total_steps": 1800, "loss": 0.0487, "lr": 1.1506233551212186e-05, "epoch": 14.277777777777779, "percentage": 71.39, "elapsed_time": "0:03:17", "remaining_time": "0:01:19", "throughput": 2028.99, "total_tokens": 401344} | |
| {"current_steps": 1290, "total_steps": 1800, "loss": 0.0173, "lr": 1.1302806946762004e-05, "epoch": 14.333333333333334, "percentage": 71.67, "elapsed_time": "0:03:18", "remaining_time": "0:01:18", "throughput": 2029.74, "total_tokens": 402912} | |
| {"current_steps": 1295, "total_steps": 1800, "loss": 0.0906, "lr": 1.1100668111113166e-05, "epoch": 14.38888888888889, "percentage": 71.94, "elapsed_time": "0:03:19", "remaining_time": "0:01:17", "throughput": 2029.32, "total_tokens": 404512} | |
| {"current_steps": 1300, "total_steps": 1800, "loss": 0.0409, "lr": 1.0899836048749645e-05, "epoch": 14.444444444444445, "percentage": 72.22, "elapsed_time": "0:03:20", "remaining_time": "0:01:16", "throughput": 2029.95, "total_tokens": 406048} | |
| {"current_steps": 1305, "total_steps": 1800, "loss": 0.0137, "lr": 1.0700329641296541e-05, "epoch": 14.5, "percentage": 72.5, "elapsed_time": "0:03:20", "remaining_time": "0:01:16", "throughput": 2030.81, "total_tokens": 407648} | |
| {"current_steps": 1310, "total_steps": 1800, "loss": 0.04, "lr": 1.0502167645744895e-05, "epoch": 14.555555555555555, "percentage": 72.78, "elapsed_time": "0:03:21", "remaining_time": "0:01:15", "throughput": 2031.58, "total_tokens": 409216} | |
| {"current_steps": 1315, "total_steps": 1800, "loss": 0.018, "lr": 1.0305368692688174e-05, "epoch": 14.61111111111111, "percentage": 73.06, "elapsed_time": "0:03:22", "remaining_time": "0:01:14", "throughput": 2032.02, "total_tokens": 410720} | |
| {"current_steps": 1320, "total_steps": 1800, "loss": 0.0953, "lr": 1.01099512845707e-05, "epoch": 14.666666666666666, "percentage": 73.33, "elapsed_time": "0:03:22", "remaining_time": "0:01:13", "throughput": 2032.62, "total_tokens": 412256} | |
| {"current_steps": 1325, "total_steps": 1800, "loss": 0.0141, "lr": 9.91593379394811e-06, "epoch": 14.722222222222221, "percentage": 73.61, "elapsed_time": "0:03:23", "remaining_time": "0:01:12", "throughput": 2033.39, "total_tokens": 413824} | |
| {"current_steps": 1330, "total_steps": 1800, "loss": 0.0411, "lr": 9.723334461760006e-06, "epoch": 14.777777777777779, "percentage": 73.89, "elapsed_time": "0:03:24", "remaining_time": "0:01:12", "throughput": 2034.13, "total_tokens": 415392} | |
| {"current_steps": 1335, "total_steps": 1800, "loss": 0.0173, "lr": 9.532171395615036e-06, "epoch": 14.833333333333334, "percentage": 74.17, "elapsed_time": "0:03:24", "remaining_time": "0:01:11", "throughput": 2035.19, "total_tokens": 417024} | |
| {"current_steps": 1340, "total_steps": 1800, "loss": 0.0501, "lr": 9.342462568088416e-06, "epoch": 14.88888888888889, "percentage": 74.44, "elapsed_time": "0:03:25", "remaining_time": "0:01:10", "throughput": 2035.92, "total_tokens": 418592} | |
| {"current_steps": 1345, "total_steps": 1800, "loss": 0.0458, "lr": 9.154225815032242e-06, "epoch": 14.944444444444445, "percentage": 74.72, "elapsed_time": "0:03:26", "remaining_time": "0:01:09", "throughput": 2036.5, "total_tokens": 420128} | |
| {"current_steps": 1350, "total_steps": 1800, "loss": 0.191, "lr": 8.967478833898612e-06, "epoch": 15.0, "percentage": 75.0, "elapsed_time": "0:03:27", "remaining_time": "0:01:09", "throughput": 2036.88, "total_tokens": 421696} | |
| {"current_steps": 1350, "total_steps": 1800, "eval_loss": 0.031447865068912506, "epoch": 15.0, "percentage": 75.0, "elapsed_time": "0:03:27", "remaining_time": "0:01:09", "throughput": 2031.8, "total_tokens": 421696} | |
| {"current_steps": 1355, "total_steps": 1800, "loss": 0.008, "lr": 8.78223918207575e-06, "epoch": 15.055555555555555, "percentage": 75.28, "elapsed_time": "0:03:29", "remaining_time": "0:01:08", "throughput": 2023.67, "total_tokens": 423264} | |
| {"current_steps": 1360, "total_steps": 1800, "loss": 0.0171, "lr": 8.598524275237322e-06, "epoch": 15.11111111111111, "percentage": 75.56, "elapsed_time": "0:03:29", "remaining_time": "0:01:07", "throughput": 2024.48, "total_tokens": 424864} | |
| {"current_steps": 1365, "total_steps": 1800, "loss": 0.1021, "lr": 8.41635138570507e-06, "epoch": 15.166666666666666, "percentage": 75.83, "elapsed_time": "0:03:30", "remaining_time": "0:01:07", "throughput": 2025.25, "total_tokens": 426464} | |
| {"current_steps": 1370, "total_steps": 1800, "loss": 0.0648, "lr": 8.235737640824908e-06, "epoch": 15.222222222222221, "percentage": 76.11, "elapsed_time": "0:03:31", "remaining_time": "0:01:06", "throughput": 2025.94, "total_tokens": 428032} | |
| {"current_steps": 1375, "total_steps": 1800, "loss": 0.0877, "lr": 8.056700021356694e-06, "epoch": 15.277777777777779, "percentage": 76.39, "elapsed_time": "0:03:32", "remaining_time": "0:01:05", "throughput": 2025.6, "total_tokens": 429632} | |
| {"current_steps": 1380, "total_steps": 1800, "loss": 0.0344, "lr": 7.879255359877705e-06, "epoch": 15.333333333333334, "percentage": 76.67, "elapsed_time": "0:03:32", "remaining_time": "0:01:04", "throughput": 2026.47, "total_tokens": 431232} | |
| {"current_steps": 1385, "total_steps": 1800, "loss": 0.0281, "lr": 7.703420339200101e-06, "epoch": 15.38888888888889, "percentage": 76.94, "elapsed_time": "0:03:33", "remaining_time": "0:01:03", "throughput": 2027.29, "total_tokens": 432832} | |
| {"current_steps": 1390, "total_steps": 1800, "loss": 0.005, "lr": 7.529211490802498e-06, "epoch": 15.444444444444445, "percentage": 77.22, "elapsed_time": "0:03:34", "remaining_time": "0:01:03", "throughput": 2028.02, "total_tokens": 434400} | |
| {"current_steps": 1395, "total_steps": 1800, "loss": 0.0228, "lr": 7.3566451932756744e-06, "epoch": 15.5, "percentage": 77.5, "elapsed_time": "0:03:34", "remaining_time": "0:01:02", "throughput": 2028.78, "total_tokens": 435968} | |
| {"current_steps": 1400, "total_steps": 1800, "loss": 0.0176, "lr": 7.185737670782727e-06, "epoch": 15.555555555555555, "percentage": 77.78, "elapsed_time": "0:03:35", "remaining_time": "0:01:01", "throughput": 2029.37, "total_tokens": 437504} | |
| {"current_steps": 1405, "total_steps": 1800, "loss": 0.0193, "lr": 7.016504991533726e-06, "epoch": 15.61111111111111, "percentage": 78.06, "elapsed_time": "0:03:36", "remaining_time": "0:01:00", "throughput": 2030.11, "total_tokens": 439072} | |
| {"current_steps": 1410, "total_steps": 1800, "loss": 0.0193, "lr": 6.848963066275027e-06, "epoch": 15.666666666666666, "percentage": 78.33, "elapsed_time": "0:03:36", "remaining_time": "0:01:00", "throughput": 2030.69, "total_tokens": 440608} | |
| {"current_steps": 1415, "total_steps": 1800, "loss": 0.0615, "lr": 6.683127646793411e-06, "epoch": 15.722222222222221, "percentage": 78.61, "elapsed_time": "0:03:37", "remaining_time": "0:00:59", "throughput": 2031.27, "total_tokens": 442144} | |
| {"current_steps": 1420, "total_steps": 1800, "loss": 0.0944, "lr": 6.519014324435102e-06, "epoch": 15.777777777777779, "percentage": 78.89, "elapsed_time": "0:03:38", "remaining_time": "0:00:58", "throughput": 2031.86, "total_tokens": 443680} | |
| {"current_steps": 1425, "total_steps": 1800, "loss": 0.0493, "lr": 6.356638528639955e-06, "epoch": 15.833333333333334, "percentage": 79.17, "elapsed_time": "0:03:39", "remaining_time": "0:00:57", "throughput": 2032.59, "total_tokens": 445248} | |
| {"current_steps": 1430, "total_steps": 1800, "loss": 0.0348, "lr": 6.196015525490825e-06, "epoch": 15.88888888888889, "percentage": 79.44, "elapsed_time": "0:03:39", "remaining_time": "0:00:56", "throughput": 2033.34, "total_tokens": 446816} | |
| {"current_steps": 1435, "total_steps": 1800, "loss": 0.1467, "lr": 6.037160416278278e-06, "epoch": 15.944444444444445, "percentage": 79.72, "elapsed_time": "0:03:40", "remaining_time": "0:00:56", "throughput": 2034.04, "total_tokens": 448384} | |
| {"current_steps": 1440, "total_steps": 1800, "loss": 0.0206, "lr": 5.880088136080814e-06, "epoch": 16.0, "percentage": 80.0, "elapsed_time": "0:03:41", "remaining_time": "0:00:55", "throughput": 2034.51, "total_tokens": 449984} | |
| {"current_steps": 1440, "total_steps": 1800, "eval_loss": 0.03053303435444832, "epoch": 16.0, "percentage": 80.0, "elapsed_time": "0:03:41", "remaining_time": "0:00:55", "throughput": 2029.74, "total_tokens": 449984} | |
| {"current_steps": 1445, "total_steps": 1800, "loss": 0.0467, "lr": 5.724813452360736e-06, "epoch": 16.055555555555557, "percentage": 80.28, "elapsed_time": "0:03:43", "remaining_time": "0:00:54", "throughput": 2022.96, "total_tokens": 451520} | |
| {"current_steps": 1450, "total_steps": 1800, "loss": 0.0764, "lr": 5.571350963575728e-06, "epoch": 16.11111111111111, "percentage": 80.56, "elapsed_time": "0:03:43", "remaining_time": "0:00:54", "throughput": 2023.61, "total_tokens": 453088} | |
| {"current_steps": 1455, "total_steps": 1800, "loss": 0.0392, "lr": 5.4197150978063965e-06, "epoch": 16.166666666666668, "percentage": 80.83, "elapsed_time": "0:03:44", "remaining_time": "0:00:53", "throughput": 2023.98, "total_tokens": 454592} | |
| {"current_steps": 1460, "total_steps": 1800, "loss": 0.0565, "lr": 5.269920111399732e-06, "epoch": 16.22222222222222, "percentage": 81.11, "elapsed_time": "0:03:45", "remaining_time": "0:00:52", "throughput": 2024.45, "total_tokens": 456128} | |
| {"current_steps": 1465, "total_steps": 1800, "loss": 0.0834, "lr": 5.121980087628803e-06, "epoch": 16.27777777777778, "percentage": 81.39, "elapsed_time": "0:03:46", "remaining_time": "0:00:51", "throughput": 2024.21, "total_tokens": 457760} | |
| {"current_steps": 1470, "total_steps": 1800, "loss": 0.0131, "lr": 4.975908935368701e-06, "epoch": 16.333333333333332, "percentage": 81.67, "elapsed_time": "0:03:46", "remaining_time": "0:00:50", "throughput": 2024.92, "total_tokens": 459328} | |
| {"current_steps": 1475, "total_steps": 1800, "loss": 0.013, "lr": 4.831720387788827e-06, "epoch": 16.38888888888889, "percentage": 81.94, "elapsed_time": "0:03:47", "remaining_time": "0:00:50", "throughput": 2025.88, "total_tokens": 460960} | |
| {"current_steps": 1480, "total_steps": 1800, "loss": 0.0327, "lr": 4.689428001061774e-06, "epoch": 16.444444444444443, "percentage": 82.22, "elapsed_time": "0:03:48", "remaining_time": "0:00:49", "throughput": 2026.6, "total_tokens": 462528} | |
| {"current_steps": 1485, "total_steps": 1800, "loss": 0.0178, "lr": 4.549045153088813e-06, "epoch": 16.5, "percentage": 82.5, "elapsed_time": "0:03:48", "remaining_time": "0:00:48", "throughput": 2027.18, "total_tokens": 464064} | |
| {"current_steps": 1490, "total_steps": 1800, "loss": 0.0341, "lr": 4.410585042242124e-06, "epoch": 16.555555555555557, "percentage": 82.78, "elapsed_time": "0:03:49", "remaining_time": "0:00:47", "throughput": 2027.72, "total_tokens": 465600} | |
| {"current_steps": 1495, "total_steps": 1800, "loss": 0.0083, "lr": 4.274060686123959e-06, "epoch": 16.61111111111111, "percentage": 83.06, "elapsed_time": "0:03:50", "remaining_time": "0:00:46", "throughput": 2028.41, "total_tokens": 467168} | |
| {"current_steps": 1500, "total_steps": 1800, "loss": 0.0652, "lr": 4.1394849203427284e-06, "epoch": 16.666666666666668, "percentage": 83.33, "elapsed_time": "0:03:51", "remaining_time": "0:00:46", "throughput": 2029.23, "total_tokens": 468768} | |
| {"current_steps": 1505, "total_steps": 1800, "loss": 0.0779, "lr": 4.006870397306256e-06, "epoch": 16.72222222222222, "percentage": 83.61, "elapsed_time": "0:03:51", "remaining_time": "0:00:45", "throughput": 2029.91, "total_tokens": 470336} | |
| {"current_steps": 1510, "total_steps": 1800, "loss": 0.0191, "lr": 3.876229585032245e-06, "epoch": 16.77777777777778, "percentage": 83.89, "elapsed_time": "0:03:52", "remaining_time": "0:00:44", "throughput": 2030.45, "total_tokens": 471872} | |
| {"current_steps": 1515, "total_steps": 1800, "loss": 0.0345, "lr": 3.7475747659760502e-06, "epoch": 16.833333333333332, "percentage": 84.17, "elapsed_time": "0:03:53", "remaining_time": "0:00:43", "throughput": 2030.98, "total_tokens": 473408} | |
| {"current_steps": 1520, "total_steps": 1800, "loss": 0.0935, "lr": 3.6209180358759394e-06, "epoch": 16.88888888888889, "percentage": 84.44, "elapsed_time": "0:03:53", "remaining_time": "0:00:43", "throughput": 2031.24, "total_tokens": 474880} | |
| {"current_steps": 1525, "total_steps": 1800, "loss": 0.0533, "lr": 3.4962713026158694e-06, "epoch": 16.944444444444443, "percentage": 84.72, "elapsed_time": "0:03:54", "remaining_time": "0:00:42", "throughput": 2031.91, "total_tokens": 476448} | |
| {"current_steps": 1530, "total_steps": 1800, "loss": 0.0522, "lr": 3.373646285105958e-06, "epoch": 17.0, "percentage": 85.0, "elapsed_time": "0:03:55", "remaining_time": "0:00:41", "throughput": 2032.27, "total_tokens": 478016} | |
| {"current_steps": 1530, "total_steps": 1800, "eval_loss": 0.031296148896217346, "epoch": 17.0, "percentage": 85.0, "elapsed_time": "0:03:55", "remaining_time": "0:00:41", "throughput": 2027.81, "total_tokens": 478016} | |
| {"current_steps": 1535, "total_steps": 1800, "loss": 0.0107, "lr": 3.2530545121807145e-06, "epoch": 17.055555555555557, "percentage": 85.28, "elapsed_time": "0:03:57", "remaining_time": "0:00:40", "throughput": 2021.6, "total_tokens": 479584} | |
| {"current_steps": 1540, "total_steps": 1800, "loss": 0.0541, "lr": 3.1345073215151066e-06, "epoch": 17.11111111111111, "percentage": 85.56, "elapsed_time": "0:03:57", "remaining_time": "0:00:40", "throughput": 2022.33, "total_tokens": 481184} | |
| {"current_steps": 1545, "total_steps": 1800, "loss": 0.0023, "lr": 3.0180158585586397e-06, "epoch": 17.166666666666668, "percentage": 85.83, "elapsed_time": "0:03:58", "remaining_time": "0:00:39", "throughput": 2022.95, "total_tokens": 482752} | |
| {"current_steps": 1550, "total_steps": 1800, "loss": 0.0237, "lr": 2.9035910754875136e-06, "epoch": 17.22222222222222, "percentage": 86.11, "elapsed_time": "0:03:59", "remaining_time": "0:00:38", "throughput": 2023.42, "total_tokens": 484288} | |
| {"current_steps": 1555, "total_steps": 1800, "loss": 0.0122, "lr": 2.7912437301749026e-06, "epoch": 17.27777777777778, "percentage": 86.39, "elapsed_time": "0:04:00", "remaining_time": "0:00:37", "throughput": 2023.59, "total_tokens": 485824} | |
| {"current_steps": 1560, "total_steps": 1800, "loss": 0.0097, "lr": 2.6809843851795357e-06, "epoch": 17.333333333333332, "percentage": 86.67, "elapsed_time": "0:04:00", "remaining_time": "0:00:37", "throughput": 2023.12, "total_tokens": 487328} | |
| {"current_steps": 1565, "total_steps": 1800, "loss": 0.0152, "lr": 2.57282340675267e-06, "epoch": 17.38888888888889, "percentage": 86.94, "elapsed_time": "0:04:01", "remaining_time": "0:00:36", "throughput": 2023.77, "total_tokens": 488896} | |
| {"current_steps": 1570, "total_steps": 1800, "loss": 0.0522, "lr": 2.4667709638634434e-06, "epoch": 17.444444444444443, "percentage": 87.22, "elapsed_time": "0:04:02", "remaining_time": "0:00:35", "throughput": 2024.27, "total_tokens": 490432} | |
| {"current_steps": 1575, "total_steps": 1800, "loss": 0.0731, "lr": 2.3628370272428564e-06, "epoch": 17.5, "percentage": 87.5, "elapsed_time": "0:04:02", "remaining_time": "0:00:34", "throughput": 2025.08, "total_tokens": 492032} | |
| {"current_steps": 1580, "total_steps": 1800, "loss": 0.1432, "lr": 2.2610313684463177e-06, "epoch": 17.555555555555557, "percentage": 87.78, "elapsed_time": "0:04:03", "remaining_time": "0:00:33", "throughput": 2025.78, "total_tokens": 493600} | |
| {"current_steps": 1585, "total_steps": 1800, "loss": 0.0117, "lr": 2.1613635589349756e-06, "epoch": 17.61111111111111, "percentage": 88.06, "elapsed_time": "0:04:04", "remaining_time": "0:00:33", "throughput": 2026.59, "total_tokens": 495200} | |
| {"current_steps": 1590, "total_steps": 1800, "loss": 0.1051, "lr": 2.063842969175847e-06, "epoch": 17.666666666666668, "percentage": 88.33, "elapsed_time": "0:04:05", "remaining_time": "0:00:32", "throughput": 2027.24, "total_tokens": 496768} | |
| {"current_steps": 1595, "total_steps": 1800, "loss": 0.0318, "lr": 1.968478767760812e-06, "epoch": 17.72222222222222, "percentage": 88.61, "elapsed_time": "0:04:05", "remaining_time": "0:00:31", "throughput": 2027.88, "total_tokens": 498336} | |
| {"current_steps": 1600, "total_steps": 1800, "loss": 0.0489, "lr": 1.8752799205445982e-06, "epoch": 17.77777777777778, "percentage": 88.89, "elapsed_time": "0:04:06", "remaining_time": "0:00:30", "throughput": 2028.65, "total_tokens": 499936} | |
| {"current_steps": 1605, "total_steps": 1800, "loss": 0.058, "lr": 1.784255189801895e-06, "epoch": 17.833333333333332, "percentage": 89.17, "elapsed_time": "0:04:07", "remaining_time": "0:00:30", "throughput": 2029.29, "total_tokens": 501504} | |
| {"current_steps": 1610, "total_steps": 1800, "loss": 0.0404, "lr": 1.6954131334034922e-06, "epoch": 17.88888888888889, "percentage": 89.44, "elapsed_time": "0:04:07", "remaining_time": "0:00:29", "throughput": 2029.76, "total_tokens": 503040} | |
| {"current_steps": 1615, "total_steps": 1800, "loss": 0.0824, "lr": 1.6087621040117157e-06, "epoch": 17.944444444444443, "percentage": 89.72, "elapsed_time": "0:04:08", "remaining_time": "0:00:28", "throughput": 2030.66, "total_tokens": 504672} | |
| {"current_steps": 1620, "total_steps": 1800, "loss": 0.0282, "lr": 1.524310248295152e-06, "epoch": 18.0, "percentage": 90.0, "elapsed_time": "0:04:09", "remaining_time": "0:00:27", "throughput": 2031.11, "total_tokens": 506272} | |
| {"current_steps": 1620, "total_steps": 1800, "eval_loss": 0.03161947801709175, "epoch": 18.0, "percentage": 90.0, "elapsed_time": "0:04:09", "remaining_time": "0:00:27", "throughput": 2026.87, "total_tokens": 506272} | |
| {"current_steps": 1625, "total_steps": 1800, "loss": 0.0353, "lr": 1.4420655061626932e-06, "epoch": 18.055555555555557, "percentage": 90.28, "elapsed_time": "0:04:11", "remaining_time": "0:00:27", "throughput": 2021.02, "total_tokens": 507904} | |
| {"current_steps": 1630, "total_steps": 1800, "loss": 0.0085, "lr": 1.362035610017079e-06, "epoch": 18.11111111111111, "percentage": 90.56, "elapsed_time": "0:04:12", "remaining_time": "0:00:26", "throughput": 2021.73, "total_tokens": 509504} | |
| {"current_steps": 1635, "total_steps": 1800, "loss": 0.0286, "lr": 1.2842280840278997e-06, "epoch": 18.166666666666668, "percentage": 90.83, "elapsed_time": "0:04:12", "remaining_time": "0:00:25", "throughput": 2022.19, "total_tokens": 511040} | |
| {"current_steps": 1640, "total_steps": 1800, "loss": 0.0749, "lr": 1.2086502434241865e-06, "epoch": 18.22222222222222, "percentage": 91.11, "elapsed_time": "0:04:13", "remaining_time": "0:00:24", "throughput": 2022.65, "total_tokens": 512576} | |
| {"current_steps": 1645, "total_steps": 1800, "loss": 0.0364, "lr": 1.1353091938067023e-06, "epoch": 18.27777777777778, "percentage": 91.39, "elapsed_time": "0:04:14", "remaining_time": "0:00:23", "throughput": 2023.23, "total_tokens": 514144} | |
| {"current_steps": 1650, "total_steps": 1800, "loss": 0.0779, "lr": 1.0642118304798442e-06, "epoch": 18.333333333333332, "percentage": 91.67, "elapsed_time": "0:04:14", "remaining_time": "0:00:23", "throughput": 2023.92, "total_tokens": 515744} | |
| {"current_steps": 1655, "total_steps": 1800, "loss": 0.0109, "lr": 9.95364837803392e-07, "epoch": 18.38888888888889, "percentage": 91.94, "elapsed_time": "0:04:15", "remaining_time": "0:00:22", "throughput": 2023.53, "total_tokens": 517312} | |
| {"current_steps": 1660, "total_steps": 1800, "loss": 0.031, "lr": 9.287746885640603e-07, "epoch": 18.444444444444443, "percentage": 92.22, "elapsed_time": "0:04:16", "remaining_time": "0:00:21", "throughput": 2024.14, "total_tokens": 518880} | |
| {"current_steps": 1665, "total_steps": 1800, "loss": 0.0066, "lr": 8.64447643366953e-07, "epoch": 18.5, "percentage": 92.5, "elapsed_time": "0:04:17", "remaining_time": "0:00:20", "throughput": 2024.85, "total_tokens": 520480} | |
| {"current_steps": 1670, "total_steps": 1800, "loss": 0.011, "lr": 8.023897500469391e-07, "epoch": 18.555555555555557, "percentage": 92.78, "elapsed_time": "0:04:17", "remaining_time": "0:00:20", "throughput": 2025.33, "total_tokens": 522016} | |
| {"current_steps": 1675, "total_steps": 1800, "loss": 0.0319, "lr": 7.426068431000882e-07, "epoch": 18.61111111111111, "percentage": 93.06, "elapsed_time": "0:04:18", "remaining_time": "0:00:19", "throughput": 2025.86, "total_tokens": 523552} | |
| {"current_steps": 1680, "total_steps": 1800, "loss": 0.0489, "lr": 6.851045431350927e-07, "epoch": 18.666666666666668, "percentage": 93.33, "elapsed_time": "0:04:19", "remaining_time": "0:00:18", "throughput": 2026.36, "total_tokens": 525088} | |
| {"current_steps": 1685, "total_steps": 1800, "loss": 0.0319, "lr": 6.298882563448599e-07, "epoch": 18.72222222222222, "percentage": 93.61, "elapsed_time": "0:04:19", "remaining_time": "0:00:17", "throughput": 2027.11, "total_tokens": 526688} | |
| {"current_steps": 1690, "total_steps": 1800, "loss": 0.1109, "lr": 5.769631739982267e-07, "epoch": 18.77777777777778, "percentage": 93.89, "elapsed_time": "0:04:20", "remaining_time": "0:00:16", "throughput": 2027.85, "total_tokens": 528288} | |
| {"current_steps": 1695, "total_steps": 1800, "loss": 0.0714, "lr": 5.263342719518921e-07, "epoch": 18.833333333333332, "percentage": 94.17, "elapsed_time": "0:04:21", "remaining_time": "0:00:16", "throughput": 2028.58, "total_tokens": 529888} | |
| {"current_steps": 1700, "total_steps": 1800, "loss": 0.1008, "lr": 4.780063101826132e-07, "epoch": 18.88888888888889, "percentage": 94.44, "elapsed_time": "0:04:21", "remaining_time": "0:00:15", "throughput": 2028.82, "total_tokens": 531360} | |
| {"current_steps": 1705, "total_steps": 1800, "loss": 0.0675, "lr": 4.319838323396691e-07, "epoch": 18.944444444444443, "percentage": 94.72, "elapsed_time": "0:04:22", "remaining_time": "0:00:14", "throughput": 2029.32, "total_tokens": 532896} | |
| {"current_steps": 1710, "total_steps": 1800, "loss": 0.0144, "lr": 3.88271165317694e-07, "epoch": 19.0, "percentage": 95.0, "elapsed_time": "0:04:23", "remaining_time": "0:00:13", "throughput": 2029.52, "total_tokens": 534432} | |
| {"current_steps": 1710, "total_steps": 1800, "eval_loss": 0.03127529099583626, "epoch": 19.0, "percentage": 95.0, "elapsed_time": "0:04:23", "remaining_time": "0:00:13", "throughput": 2025.52, "total_tokens": 534432} | |
| {"current_steps": 1715, "total_steps": 1800, "loss": 0.0156, "lr": 3.468724188498751e-07, "epoch": 19.055555555555557, "percentage": 95.28, "elapsed_time": "0:04:25", "remaining_time": "0:00:13", "throughput": 2020.11, "total_tokens": 536000} | |
| {"current_steps": 1720, "total_steps": 1800, "loss": 0.0598, "lr": 3.077914851215585e-07, "epoch": 19.11111111111111, "percentage": 95.56, "elapsed_time": "0:04:26", "remaining_time": "0:00:12", "throughput": 2020.67, "total_tokens": 537568} | |
| {"current_steps": 1725, "total_steps": 1800, "loss": 0.0329, "lr": 2.71032038404323e-07, "epoch": 19.166666666666668, "percentage": 95.83, "elapsed_time": "0:04:26", "remaining_time": "0:00:11", "throughput": 2021.13, "total_tokens": 539104} | |
| {"current_steps": 1730, "total_steps": 1800, "loss": 0.0443, "lr": 2.365975347105448e-07, "epoch": 19.22222222222222, "percentage": 96.11, "elapsed_time": "0:04:27", "remaining_time": "0:00:10", "throughput": 2021.68, "total_tokens": 540672} | |
| {"current_steps": 1735, "total_steps": 1800, "loss": 0.0125, "lr": 2.0449121146845774e-07, "epoch": 19.27777777777778, "percentage": 96.39, "elapsed_time": "0:04:28", "remaining_time": "0:00:10", "throughput": 2021.21, "total_tokens": 542208} | |
| {"current_steps": 1740, "total_steps": 1800, "loss": 0.0189, "lr": 1.747160872177883e-07, "epoch": 19.333333333333332, "percentage": 96.67, "elapsed_time": "0:04:28", "remaining_time": "0:00:09", "throughput": 2021.69, "total_tokens": 543744} | |
| {"current_steps": 1745, "total_steps": 1800, "loss": 0.0315, "lr": 1.472749613259661e-07, "epoch": 19.38888888888889, "percentage": 96.94, "elapsed_time": "0:04:29", "remaining_time": "0:00:08", "throughput": 2022.24, "total_tokens": 545312} | |
| {"current_steps": 1750, "total_steps": 1800, "loss": 0.0255, "lr": 1.22170413724923e-07, "epoch": 19.444444444444443, "percentage": 97.22, "elapsed_time": "0:04:30", "remaining_time": "0:00:07", "throughput": 2022.9, "total_tokens": 546912} | |
| {"current_steps": 1755, "total_steps": 1800, "loss": 0.175, "lr": 9.940480466855417e-08, "epoch": 19.5, "percentage": 97.5, "elapsed_time": "0:04:31", "remaining_time": "0:00:06", "throughput": 2023.61, "total_tokens": 548512} | |
| {"current_steps": 1760, "total_steps": 1800, "loss": 0.0038, "lr": 7.898027451078982e-08, "epoch": 19.555555555555557, "percentage": 97.78, "elapsed_time": "0:04:31", "remaining_time": "0:00:06", "throughput": 2024.32, "total_tokens": 550112} | |
| {"current_steps": 1765, "total_steps": 1800, "loss": 0.0717, "lr": 6.089874350439506e-08, "epoch": 19.61111111111111, "percentage": 98.06, "elapsed_time": "0:04:32", "remaining_time": "0:00:05", "throughput": 2024.91, "total_tokens": 551680} | |
| {"current_steps": 1770, "total_steps": 1800, "loss": 0.0192, "lr": 4.516191162040051e-08, "epoch": 19.666666666666668, "percentage": 98.33, "elapsed_time": "0:04:33", "remaining_time": "0:00:04", "throughput": 2025.58, "total_tokens": 553280} | |
| {"current_steps": 1775, "total_steps": 1800, "loss": 0.089, "lr": 3.177125838830786e-08, "epoch": 19.72222222222222, "percentage": 98.61, "elapsed_time": "0:04:33", "remaining_time": "0:00:03", "throughput": 2026.2, "total_tokens": 554848} | |
| {"current_steps": 1780, "total_steps": 1800, "loss": 0.0752, "lr": 2.0728042756967824e-08, "epoch": 19.77777777777778, "percentage": 98.89, "elapsed_time": "0:04:34", "remaining_time": "0:00:03", "throughput": 2027.16, "total_tokens": 556512} | |
| {"current_steps": 1785, "total_steps": 1800, "loss": 0.0118, "lr": 1.2033302976222071e-08, "epoch": 19.833333333333332, "percentage": 99.17, "elapsed_time": "0:04:35", "remaining_time": "0:00:02", "throughput": 2027.88, "total_tokens": 558112} | |
| {"current_steps": 1790, "total_steps": 1800, "loss": 0.0298, "lr": 5.687856499297928e-09, "epoch": 19.88888888888889, "percentage": 99.44, "elapsed_time": "0:04:35", "remaining_time": "0:00:01", "throughput": 2028.6, "total_tokens": 559712} | |
| {"current_steps": 1795, "total_steps": 1800, "loss": 0.0178, "lr": 1.692299905944883e-09, "epoch": 19.944444444444443, "percentage": 99.72, "elapsed_time": "0:04:36", "remaining_time": "0:00:00", "throughput": 2029.21, "total_tokens": 561280} | |
| {"current_steps": 1800, "total_steps": 1800, "loss": 0.0631, "lr": 4.700884634611935e-11, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:04:37", "remaining_time": "0:00:00", "throughput": 2029.5, "total_tokens": 562848} | |
| {"current_steps": 1800, "total_steps": 1800, "eval_loss": 0.03115851804614067, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:04:37", "remaining_time": "0:00:00", "throughput": 2025.73, "total_tokens": 562848} | |
| {"current_steps": 1800, "total_steps": 1800, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:04:38", "remaining_time": "0:00:00", "throughput": 2020.08, "total_tokens": 562848} | |