rbelanec commited on
Commit
6540c1a
verified
1 Parent(s): e3a5c4d

Training in progress, step 9648

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +106 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3bae8178482833144749a2a903eae3c648c5c526854433dcd39bf8e30d3a89eb
3
  size 798032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8e82d8eda51412ebef966c5821e7796611535b5b026fcd409f084f21a89ac9d
3
  size 798032
trainer_log.jsonl CHANGED
@@ -1850,3 +1850,109 @@
1850
  {"current_steps": 9165, "total_steps": 10720, "loss": 0.752, "lr": 3.1408248589922083e-06, "epoch": 17.098880597014926, "percentage": 85.49, "elapsed_time": "0:22:15", "remaining_time": "0:03:46", "throughput": 1958.26, "total_tokens": 2615656}
1851
  {"current_steps": 9170, "total_steps": 10720, "loss": 0.6818, "lr": 3.1211022627298692e-06, "epoch": 17.10820895522388, "percentage": 85.54, "elapsed_time": "0:22:16", "remaining_time": "0:03:45", "throughput": 1958.3, "total_tokens": 2617064}
1852
  {"current_steps": 9175, "total_steps": 10720, "loss": 0.5772, "lr": 3.1014376614614036e-06, "epoch": 17.117537313432837, "percentage": 85.59, "elapsed_time": "0:22:17", "remaining_time": "0:03:45", "throughput": 1958.33, "total_tokens": 2618472}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1850
  {"current_steps": 9165, "total_steps": 10720, "loss": 0.752, "lr": 3.1408248589922083e-06, "epoch": 17.098880597014926, "percentage": 85.49, "elapsed_time": "0:22:15", "remaining_time": "0:03:46", "throughput": 1958.26, "total_tokens": 2615656}
1851
  {"current_steps": 9170, "total_steps": 10720, "loss": 0.6818, "lr": 3.1211022627298692e-06, "epoch": 17.10820895522388, "percentage": 85.54, "elapsed_time": "0:22:16", "remaining_time": "0:03:45", "throughput": 1958.3, "total_tokens": 2617064}
1852
  {"current_steps": 9175, "total_steps": 10720, "loss": 0.5772, "lr": 3.1014376614614036e-06, "epoch": 17.117537313432837, "percentage": 85.59, "elapsed_time": "0:22:17", "remaining_time": "0:03:45", "throughput": 1958.33, "total_tokens": 2618472}
1853
+ {"current_steps": 9180, "total_steps": 10720, "loss": 0.5038, "lr": 3.081831107312308e-06, "epoch": 17.12686567164179, "percentage": 85.63, "elapsed_time": "0:22:17", "remaining_time": "0:03:44", "throughput": 1958.36, "total_tokens": 2619880}
1854
+ {"current_steps": 9185, "total_steps": 10720, "loss": 0.7333, "lr": 3.0622826522542196e-06, "epoch": 17.136194029850746, "percentage": 85.68, "elapsed_time": "0:22:18", "remaining_time": "0:03:43", "throughput": 1958.28, "total_tokens": 2621128}
1855
+ {"current_steps": 9190, "total_steps": 10720, "loss": 0.6497, "lr": 3.0427923481047645e-06, "epoch": 17.145522388059703, "percentage": 85.73, "elapsed_time": "0:22:19", "remaining_time": "0:03:42", "throughput": 1958.37, "total_tokens": 2622600}
1856
+ {"current_steps": 9195, "total_steps": 10720, "loss": 0.6129, "lr": 3.023360246527418e-06, "epoch": 17.154850746268657, "percentage": 85.77, "elapsed_time": "0:22:19", "remaining_time": "0:03:42", "throughput": 1958.46, "total_tokens": 2624136}
1857
+ {"current_steps": 9200, "total_steps": 10720, "loss": 0.7486, "lr": 3.0039863990313917e-06, "epoch": 17.16417910447761, "percentage": 85.82, "elapsed_time": "0:22:20", "remaining_time": "0:03:41", "throughput": 1958.5, "total_tokens": 2625544}
1858
+ {"current_steps": 9205, "total_steps": 10720, "loss": 0.6723, "lr": 2.984670856971475e-06, "epoch": 17.17350746268657, "percentage": 85.87, "elapsed_time": "0:22:21", "remaining_time": "0:03:40", "throughput": 1958.29, "total_tokens": 2626856}
1859
+ {"current_steps": 9210, "total_steps": 10720, "loss": 0.8374, "lr": 2.965413671547901e-06, "epoch": 17.182835820895523, "percentage": 85.91, "elapsed_time": "0:22:22", "remaining_time": "0:03:40", "throughput": 1958.27, "total_tokens": 2628200}
1860
+ {"current_steps": 9215, "total_steps": 10720, "loss": 0.6805, "lr": 2.9462148938062123e-06, "epoch": 17.192164179104477, "percentage": 85.96, "elapsed_time": "0:22:22", "remaining_time": "0:03:39", "throughput": 1958.29, "total_tokens": 2629640}
1861
+ {"current_steps": 9220, "total_steps": 10720, "loss": 0.623, "lr": 2.927074574637148e-06, "epoch": 17.20149253731343, "percentage": 86.01, "elapsed_time": "0:22:23", "remaining_time": "0:03:38", "throughput": 1958.36, "total_tokens": 2631080}
1862
+ {"current_steps": 9225, "total_steps": 10720, "loss": 0.9284, "lr": 2.907992764776471e-06, "epoch": 17.21082089552239, "percentage": 86.05, "elapsed_time": "0:22:24", "remaining_time": "0:03:37", "throughput": 1958.35, "total_tokens": 2632424}
1863
+ {"current_steps": 9230, "total_steps": 10720, "loss": 1.1096, "lr": 2.888969514804854e-06, "epoch": 17.220149253731343, "percentage": 86.1, "elapsed_time": "0:22:24", "remaining_time": "0:03:37", "throughput": 1958.33, "total_tokens": 2633736}
1864
+ {"current_steps": 9235, "total_steps": 10720, "loss": 0.5835, "lr": 2.8700048751477527e-06, "epoch": 17.229477611940297, "percentage": 86.15, "elapsed_time": "0:22:25", "remaining_time": "0:03:36", "throughput": 1958.48, "total_tokens": 2635368}
1865
+ {"current_steps": 9240, "total_steps": 10720, "loss": 0.3853, "lr": 2.8510988960752575e-06, "epoch": 17.238805970149254, "percentage": 86.19, "elapsed_time": "0:22:26", "remaining_time": "0:03:35", "throughput": 1958.77, "total_tokens": 2637160}
1866
+ {"current_steps": 9245, "total_steps": 10720, "loss": 0.7172, "lr": 2.8322516277019624e-06, "epoch": 17.24813432835821, "percentage": 86.24, "elapsed_time": "0:22:27", "remaining_time": "0:03:34", "throughput": 1958.79, "total_tokens": 2638536}
1867
+ {"current_steps": 9250, "total_steps": 10720, "loss": 0.6969, "lr": 2.813463119986834e-06, "epoch": 17.257462686567163, "percentage": 86.29, "elapsed_time": "0:22:27", "remaining_time": "0:03:34", "throughput": 1958.78, "total_tokens": 2639880}
1868
+ {"current_steps": 9255, "total_steps": 10720, "loss": 0.5964, "lr": 2.7947334227330897e-06, "epoch": 17.26679104477612, "percentage": 86.33, "elapsed_time": "0:22:28", "remaining_time": "0:03:33", "throughput": 1958.75, "total_tokens": 2641192}
1869
+ {"current_steps": 9260, "total_steps": 10720, "loss": 0.8397, "lr": 2.776062585588063e-06, "epoch": 17.276119402985074, "percentage": 86.38, "elapsed_time": "0:22:29", "remaining_time": "0:03:32", "throughput": 1958.79, "total_tokens": 2642600}
1870
+ {"current_steps": 9265, "total_steps": 10720, "loss": 0.546, "lr": 2.7574506580430287e-06, "epoch": 17.28544776119403, "percentage": 86.43, "elapsed_time": "0:22:29", "remaining_time": "0:03:31", "throughput": 1958.85, "total_tokens": 2644040}
1871
+ {"current_steps": 9270, "total_steps": 10720, "loss": 0.5591, "lr": 2.7388976894331537e-06, "epoch": 17.294776119402986, "percentage": 86.47, "elapsed_time": "0:22:30", "remaining_time": "0:03:31", "throughput": 1958.98, "total_tokens": 2645576}
1872
+ {"current_steps": 9275, "total_steps": 10720, "loss": 0.711, "lr": 2.72040372893729e-06, "epoch": 17.30410447761194, "percentage": 86.52, "elapsed_time": "0:22:31", "remaining_time": "0:03:30", "throughput": 1959.14, "total_tokens": 2647144}
1873
+ {"current_steps": 9280, "total_steps": 10720, "loss": 0.8925, "lr": 2.7019688255778857e-06, "epoch": 17.313432835820894, "percentage": 86.57, "elapsed_time": "0:22:31", "remaining_time": "0:03:29", "throughput": 1959.14, "total_tokens": 2648488}
1874
+ {"current_steps": 9285, "total_steps": 10720, "loss": 0.629, "lr": 2.6835930282208517e-06, "epoch": 17.32276119402985, "percentage": 86.61, "elapsed_time": "0:22:32", "remaining_time": "0:03:29", "throughput": 1959.23, "total_tokens": 2649960}
1875
+ {"current_steps": 9290, "total_steps": 10720, "loss": 0.7025, "lr": 2.6652763855754106e-06, "epoch": 17.332089552238806, "percentage": 86.66, "elapsed_time": "0:22:33", "remaining_time": "0:03:28", "throughput": 1959.2, "total_tokens": 2651272}
1876
+ {"current_steps": 9295, "total_steps": 10720, "loss": 0.5435, "lr": 2.647018946193999e-06, "epoch": 17.34141791044776, "percentage": 86.71, "elapsed_time": "0:22:33", "remaining_time": "0:03:27", "throughput": 1959.17, "total_tokens": 2652584}
1877
+ {"current_steps": 9300, "total_steps": 10720, "loss": 0.7609, "lr": 2.628820758472095e-06, "epoch": 17.350746268656717, "percentage": 86.75, "elapsed_time": "0:22:34", "remaining_time": "0:03:26", "throughput": 1959.21, "total_tokens": 2654056}
1878
+ {"current_steps": 9305, "total_steps": 10720, "loss": 0.638, "lr": 2.610681870648149e-06, "epoch": 17.36007462686567, "percentage": 86.8, "elapsed_time": "0:22:35", "remaining_time": "0:03:26", "throughput": 1959.3, "total_tokens": 2655528}
1879
+ {"current_steps": 9310, "total_steps": 10720, "loss": 0.6193, "lr": 2.5926023308033952e-06, "epoch": 17.369402985074625, "percentage": 86.85, "elapsed_time": "0:22:36", "remaining_time": "0:03:25", "throughput": 1959.32, "total_tokens": 2656904}
1880
+ {"current_steps": 9315, "total_steps": 10720, "loss": 0.4637, "lr": 2.5745821868617792e-06, "epoch": 17.378731343283583, "percentage": 86.89, "elapsed_time": "0:22:36", "remaining_time": "0:03:24", "throughput": 1959.49, "total_tokens": 2658472}
1881
+ {"current_steps": 9320, "total_steps": 10720, "loss": 0.5945, "lr": 2.556621486589783e-06, "epoch": 17.388059701492537, "percentage": 86.94, "elapsed_time": "0:22:37", "remaining_time": "0:03:23", "throughput": 1959.46, "total_tokens": 2659784}
1882
+ {"current_steps": 9325, "total_steps": 10720, "loss": 0.5395, "lr": 2.5387202775963236e-06, "epoch": 17.39738805970149, "percentage": 86.99, "elapsed_time": "0:22:38", "remaining_time": "0:03:23", "throughput": 1959.65, "total_tokens": 2661448}
1883
+ {"current_steps": 9330, "total_steps": 10720, "loss": 0.6108, "lr": 2.520878607332641e-06, "epoch": 17.40671641791045, "percentage": 87.03, "elapsed_time": "0:22:38", "remaining_time": "0:03:22", "throughput": 1959.65, "total_tokens": 2662792}
1884
+ {"current_steps": 9335, "total_steps": 10720, "loss": 0.6506, "lr": 2.5030965230921186e-06, "epoch": 17.416044776119403, "percentage": 87.08, "elapsed_time": "0:22:39", "remaining_time": "0:03:21", "throughput": 1959.53, "total_tokens": 2663976}
1885
+ {"current_steps": 9340, "total_steps": 10720, "loss": 0.6417, "lr": 2.485374072010224e-06, "epoch": 17.425373134328357, "percentage": 87.13, "elapsed_time": "0:22:40", "remaining_time": "0:03:20", "throughput": 1959.69, "total_tokens": 2665544}
1886
+ {"current_steps": 9345, "total_steps": 10720, "loss": 0.8112, "lr": 2.4677113010643486e-06, "epoch": 17.434701492537314, "percentage": 87.17, "elapsed_time": "0:22:40", "remaining_time": "0:03:20", "throughput": 1959.73, "total_tokens": 2666952}
1887
+ {"current_steps": 9350, "total_steps": 10720, "loss": 0.5644, "lr": 2.450108257073683e-06, "epoch": 17.44402985074627, "percentage": 87.22, "elapsed_time": "0:22:41", "remaining_time": "0:03:19", "throughput": 1959.8, "total_tokens": 2668392}
1888
+ {"current_steps": 9355, "total_steps": 10720, "loss": 0.7331, "lr": 2.4325649866990928e-06, "epoch": 17.453358208955223, "percentage": 87.27, "elapsed_time": "0:22:42", "remaining_time": "0:03:18", "throughput": 1959.8, "total_tokens": 2669736}
1889
+ {"current_steps": 9360, "total_steps": 10720, "loss": 0.6641, "lr": 2.4150815364430036e-06, "epoch": 17.46268656716418, "percentage": 87.31, "elapsed_time": "0:22:42", "remaining_time": "0:03:18", "throughput": 1959.94, "total_tokens": 2671272}
1890
+ {"current_steps": 9365, "total_steps": 10720, "loss": 0.6265, "lr": 2.397657952649285e-06, "epoch": 17.472014925373134, "percentage": 87.36, "elapsed_time": "0:22:43", "remaining_time": "0:03:17", "throughput": 1959.99, "total_tokens": 2672680}
1891
+ {"current_steps": 9370, "total_steps": 10720, "loss": 0.7038, "lr": 2.380294281503104e-06, "epoch": 17.48134328358209, "percentage": 87.41, "elapsed_time": "0:22:44", "remaining_time": "0:03:16", "throughput": 1960.11, "total_tokens": 2674248}
1892
+ {"current_steps": 9375, "total_steps": 10720, "loss": 0.6143, "lr": 2.3629905690308126e-06, "epoch": 17.490671641791046, "percentage": 87.45, "elapsed_time": "0:22:45", "remaining_time": "0:03:15", "throughput": 1960.2, "total_tokens": 2675720}
1893
+ {"current_steps": 9380, "total_steps": 10720, "loss": 0.5688, "lr": 2.3457468610998486e-06, "epoch": 17.5, "percentage": 87.5, "elapsed_time": "0:22:45", "remaining_time": "0:03:15", "throughput": 1960.22, "total_tokens": 2677096}
1894
+ {"current_steps": 9385, "total_steps": 10720, "loss": 1.2265, "lr": 2.328563203418574e-06, "epoch": 17.509328358208954, "percentage": 87.55, "elapsed_time": "0:22:46", "remaining_time": "0:03:14", "throughput": 1960.27, "total_tokens": 2678504}
1895
+ {"current_steps": 9390, "total_steps": 10720, "loss": 0.803, "lr": 2.311439641536184e-06, "epoch": 17.51865671641791, "percentage": 87.59, "elapsed_time": "0:22:47", "remaining_time": "0:03:13", "throughput": 1960.34, "total_tokens": 2680008}
1896
+ {"current_steps": 9395, "total_steps": 10720, "loss": 0.6276, "lr": 2.2943762208425646e-06, "epoch": 17.527985074626866, "percentage": 87.64, "elapsed_time": "0:22:47", "remaining_time": "0:03:12", "throughput": 1960.39, "total_tokens": 2681480}
1897
+ {"current_steps": 9400, "total_steps": 10720, "loss": 0.6515, "lr": 2.2773729865682046e-06, "epoch": 17.53731343283582, "percentage": 87.69, "elapsed_time": "0:22:48", "remaining_time": "0:03:12", "throughput": 1960.39, "total_tokens": 2682824}
1898
+ {"current_steps": 9405, "total_steps": 10720, "loss": 0.5848, "lr": 2.2604299837840374e-06, "epoch": 17.546641791044777, "percentage": 87.73, "elapsed_time": "0:22:49", "remaining_time": "0:03:11", "throughput": 1960.39, "total_tokens": 2684232}
1899
+ {"current_steps": 9410, "total_steps": 10720, "loss": 0.7093, "lr": 2.2435472574013433e-06, "epoch": 17.55597014925373, "percentage": 87.78, "elapsed_time": "0:22:49", "remaining_time": "0:03:10", "throughput": 1960.56, "total_tokens": 2685800}
1900
+ {"current_steps": 9415, "total_steps": 10720, "loss": 0.5712, "lr": 2.2267248521716327e-06, "epoch": 17.565298507462686, "percentage": 87.83, "elapsed_time": "0:22:50", "remaining_time": "0:03:09", "throughput": 1960.56, "total_tokens": 2687144}
1901
+ {"current_steps": 9420, "total_steps": 10720, "loss": 0.7221, "lr": 2.209962812686514e-06, "epoch": 17.574626865671643, "percentage": 87.87, "elapsed_time": "0:22:51", "remaining_time": "0:03:09", "throughput": 1960.67, "total_tokens": 2688648}
1902
+ {"current_steps": 9425, "total_steps": 10720, "loss": 0.7375, "lr": 2.1932611833775846e-06, "epoch": 17.583955223880597, "percentage": 87.92, "elapsed_time": "0:22:51", "remaining_time": "0:03:08", "throughput": 1960.75, "total_tokens": 2690120}
1903
+ {"current_steps": 9430, "total_steps": 10720, "loss": 0.594, "lr": 2.1766200085163058e-06, "epoch": 17.59328358208955, "percentage": 87.97, "elapsed_time": "0:22:52", "remaining_time": "0:03:07", "throughput": 1960.8, "total_tokens": 2691528}
1904
+ {"current_steps": 9435, "total_steps": 10720, "loss": 0.6798, "lr": 2.1600393322139034e-06, "epoch": 17.60261194029851, "percentage": 88.01, "elapsed_time": "0:22:53", "remaining_time": "0:03:07", "throughput": 1960.97, "total_tokens": 2693096}
1905
+ {"current_steps": 9440, "total_steps": 10720, "loss": 0.5149, "lr": 2.1435191984212315e-06, "epoch": 17.611940298507463, "percentage": 88.06, "elapsed_time": "0:22:54", "remaining_time": "0:03:06", "throughput": 1961.13, "total_tokens": 2694664}
1906
+ {"current_steps": 9445, "total_steps": 10720, "loss": 0.6447, "lr": 2.1270596509286504e-06, "epoch": 17.621268656716417, "percentage": 88.11, "elapsed_time": "0:22:54", "remaining_time": "0:03:05", "throughput": 1961.09, "total_tokens": 2695944}
1907
+ {"current_steps": 9450, "total_steps": 10720, "loss": 0.6261, "lr": 2.1106607333659463e-06, "epoch": 17.630597014925375, "percentage": 88.15, "elapsed_time": "0:22:55", "remaining_time": "0:03:04", "throughput": 1961.11, "total_tokens": 2697320}
1908
+ {"current_steps": 9455, "total_steps": 10720, "loss": 0.8068, "lr": 2.0943224892021746e-06, "epoch": 17.63992537313433, "percentage": 88.2, "elapsed_time": "0:22:56", "remaining_time": "0:03:04", "throughput": 1961.11, "total_tokens": 2698664}
1909
+ {"current_steps": 9460, "total_steps": 10720, "loss": 0.6833, "lr": 2.078044961745562e-06, "epoch": 17.649253731343283, "percentage": 88.25, "elapsed_time": "0:22:56", "remaining_time": "0:03:03", "throughput": 1961.18, "total_tokens": 2700104}
1910
+ {"current_steps": 9465, "total_steps": 10720, "loss": 0.5058, "lr": 2.061828194143406e-06, "epoch": 17.65858208955224, "percentage": 88.29, "elapsed_time": "0:22:57", "remaining_time": "0:03:02", "throughput": 1961.32, "total_tokens": 2701640}
1911
+ {"current_steps": 9470, "total_steps": 10720, "loss": 0.766, "lr": 2.0456722293819315e-06, "epoch": 17.667910447761194, "percentage": 88.34, "elapsed_time": "0:22:58", "remaining_time": "0:03:01", "throughput": 1961.4, "total_tokens": 2703080}
1912
+ {"current_steps": 9475, "total_steps": 10720, "loss": 0.6095, "lr": 2.0295771102861987e-06, "epoch": 17.67723880597015, "percentage": 88.39, "elapsed_time": "0:22:58", "remaining_time": "0:03:01", "throughput": 1961.44, "total_tokens": 2704488}
1913
+ {"current_steps": 9480, "total_steps": 10720, "loss": 0.7703, "lr": 2.013542879519975e-06, "epoch": 17.686567164179106, "percentage": 88.43, "elapsed_time": "0:22:59", "remaining_time": "0:03:00", "throughput": 1961.47, "total_tokens": 2705864}
1914
+ {"current_steps": 9485, "total_steps": 10720, "loss": 0.6, "lr": 1.997569579585648e-06, "epoch": 17.69589552238806, "percentage": 88.48, "elapsed_time": "0:23:00", "remaining_time": "0:02:59", "throughput": 1961.57, "total_tokens": 2707368}
1915
+ {"current_steps": 9490, "total_steps": 10720, "loss": 0.5134, "lr": 1.9816572528240707e-06, "epoch": 17.705223880597014, "percentage": 88.53, "elapsed_time": "0:23:00", "remaining_time": "0:02:58", "throughput": 1961.63, "total_tokens": 2708808}
1916
+ {"current_steps": 9495, "total_steps": 10720, "loss": 0.9161, "lr": 1.9658059414144834e-06, "epoch": 17.71455223880597, "percentage": 88.57, "elapsed_time": "0:23:01", "remaining_time": "0:02:58", "throughput": 1961.67, "total_tokens": 2710216}
1917
+ {"current_steps": 9500, "total_steps": 10720, "loss": 0.4653, "lr": 1.9500156873743985e-06, "epoch": 17.723880597014926, "percentage": 88.62, "elapsed_time": "0:23:02", "remaining_time": "0:02:57", "throughput": 1961.74, "total_tokens": 2711656}
1918
+ {"current_steps": 9505, "total_steps": 10720, "loss": 0.6761, "lr": 1.934286532559468e-06, "epoch": 17.73320895522388, "percentage": 88.67, "elapsed_time": "0:23:02", "remaining_time": "0:02:56", "throughput": 1961.86, "total_tokens": 2713160}
1919
+ {"current_steps": 9510, "total_steps": 10720, "loss": 0.7755, "lr": 1.9186185186634066e-06, "epoch": 17.742537313432837, "percentage": 88.71, "elapsed_time": "0:23:03", "remaining_time": "0:02:56", "throughput": 1961.9, "total_tokens": 2714568}
1920
+ {"current_steps": 9515, "total_steps": 10720, "loss": 0.6768, "lr": 1.9030116872178316e-06, "epoch": 17.75186567164179, "percentage": 88.76, "elapsed_time": "0:23:04", "remaining_time": "0:02:55", "throughput": 1961.94, "total_tokens": 2715976}
1921
+ {"current_steps": 9520, "total_steps": 10720, "loss": 0.5356, "lr": 1.8874660795922067e-06, "epoch": 17.761194029850746, "percentage": 88.81, "elapsed_time": "0:23:05", "remaining_time": "0:02:54", "throughput": 1961.94, "total_tokens": 2717320}
1922
+ {"current_steps": 9525, "total_steps": 10720, "loss": 0.7166, "lr": 1.8719817369937082e-06, "epoch": 17.770522388059703, "percentage": 88.85, "elapsed_time": "0:23:05", "remaining_time": "0:02:53", "throughput": 1961.94, "total_tokens": 2718664}
1923
+ {"current_steps": 9530, "total_steps": 10720, "loss": 0.8246, "lr": 1.8565587004670898e-06, "epoch": 17.779850746268657, "percentage": 88.9, "elapsed_time": "0:23:06", "remaining_time": "0:02:53", "throughput": 1962.01, "total_tokens": 2720104}
1924
+ {"current_steps": 9535, "total_steps": 10720, "loss": 0.6914, "lr": 1.8411970108946296e-06, "epoch": 17.78917910447761, "percentage": 88.95, "elapsed_time": "0:23:07", "remaining_time": "0:02:52", "throughput": 1962.12, "total_tokens": 2721608}
1925
+ {"current_steps": 9540, "total_steps": 10720, "loss": 0.6322, "lr": 1.8258967089959749e-06, "epoch": 17.798507462686565, "percentage": 88.99, "elapsed_time": "0:23:07", "remaining_time": "0:02:51", "throughput": 1962.17, "total_tokens": 2723016}
1926
+ {"current_steps": 9545, "total_steps": 10720, "loss": 0.855, "lr": 1.8106578353280585e-06, "epoch": 17.807835820895523, "percentage": 89.04, "elapsed_time": "0:23:08", "remaining_time": "0:02:50", "throughput": 1962.12, "total_tokens": 2724296}
1927
+ {"current_steps": 9550, "total_steps": 10720, "loss": 0.7638, "lr": 1.7954804302849793e-06, "epoch": 17.817164179104477, "percentage": 89.09, "elapsed_time": "0:23:09", "remaining_time": "0:02:50", "throughput": 1962.25, "total_tokens": 2725832}
1928
+ {"current_steps": 9555, "total_steps": 10720, "loss": 0.5106, "lr": 1.7803645340978948e-06, "epoch": 17.826492537313435, "percentage": 89.13, "elapsed_time": "0:23:09", "remaining_time": "0:02:49", "throughput": 1962.29, "total_tokens": 2727240}
1929
+ {"current_steps": 9560, "total_steps": 10720, "loss": 0.7506, "lr": 1.7653101868349343e-06, "epoch": 17.83582089552239, "percentage": 89.18, "elapsed_time": "0:23:10", "remaining_time": "0:02:48", "throughput": 1962.3, "total_tokens": 2728616}
1930
+ {"current_steps": 9565, "total_steps": 10720, "loss": 0.7365, "lr": 1.750317428401066e-06, "epoch": 17.845149253731343, "percentage": 89.23, "elapsed_time": "0:23:11", "remaining_time": "0:02:47", "throughput": 1962.27, "total_tokens": 2729928}
1931
+ {"current_steps": 9570, "total_steps": 10720, "loss": 0.8183, "lr": 1.7353862985380027e-06, "epoch": 17.854477611940297, "percentage": 89.27, "elapsed_time": "0:23:11", "remaining_time": "0:02:47", "throughput": 1962.27, "total_tokens": 2731272}
1932
+ {"current_steps": 9575, "total_steps": 10720, "loss": 0.7378, "lr": 1.7205168368240986e-06, "epoch": 17.863805970149254, "percentage": 89.32, "elapsed_time": "0:23:12", "remaining_time": "0:02:46", "throughput": 1962.37, "total_tokens": 2732872}
1933
+ {"current_steps": 9580, "total_steps": 10720, "loss": 0.5925, "lr": 1.7057090826742505e-06, "epoch": 17.87313432835821, "percentage": 89.37, "elapsed_time": "0:23:13", "remaining_time": "0:02:45", "throughput": 1962.67, "total_tokens": 2734760}
1934
+ {"current_steps": 9585, "total_steps": 10720, "loss": 0.8297, "lr": 1.6909630753397716e-06, "epoch": 17.882462686567163, "percentage": 89.41, "elapsed_time": "0:23:14", "remaining_time": "0:02:45", "throughput": 1962.67, "total_tokens": 2736104}
1935
+ {"current_steps": 9590, "total_steps": 10720, "loss": 0.7121, "lr": 1.6762788539083086e-06, "epoch": 17.89179104477612, "percentage": 89.46, "elapsed_time": "0:23:14", "remaining_time": "0:02:44", "throughput": 1962.71, "total_tokens": 2737512}
1936
+ {"current_steps": 9595, "total_steps": 10720, "loss": 0.732, "lr": 1.6616564573037342e-06, "epoch": 17.901119402985074, "percentage": 89.51, "elapsed_time": "0:23:15", "remaining_time": "0:02:43", "throughput": 1962.73, "total_tokens": 2738888}
1937
+ {"current_steps": 9600, "total_steps": 10720, "loss": 0.9011, "lr": 1.6470959242860296e-06, "epoch": 17.91044776119403, "percentage": 89.55, "elapsed_time": "0:23:16", "remaining_time": "0:02:42", "throughput": 1962.59, "total_tokens": 2740040}
1938
+ {"current_steps": 9605, "total_steps": 10720, "loss": 0.6195, "lr": 1.6325972934512018e-06, "epoch": 17.919776119402986, "percentage": 89.6, "elapsed_time": "0:23:16", "remaining_time": "0:02:42", "throughput": 1962.61, "total_tokens": 2741416}
1939
+ {"current_steps": 9610, "total_steps": 10720, "loss": 0.4596, "lr": 1.6181606032311696e-06, "epoch": 17.92910447761194, "percentage": 89.65, "elapsed_time": "0:23:17", "remaining_time": "0:02:41", "throughput": 1962.56, "total_tokens": 2742760}
1940
+ {"current_steps": 9615, "total_steps": 10720, "loss": 0.7076, "lr": 1.6037858918936638e-06, "epoch": 17.938432835820894, "percentage": 89.69, "elapsed_time": "0:23:18", "remaining_time": "0:02:40", "throughput": 1962.42, "total_tokens": 2743912}
1941
+ {"current_steps": 9620, "total_steps": 10720, "loss": 0.6186, "lr": 1.589473197542124e-06, "epoch": 17.94776119402985, "percentage": 89.74, "elapsed_time": "0:23:18", "remaining_time": "0:02:39", "throughput": 1962.5, "total_tokens": 2745416}
1942
+ {"current_steps": 9625, "total_steps": 10720, "loss": 0.8379, "lr": 1.5752225581155993e-06, "epoch": 17.957089552238806, "percentage": 89.79, "elapsed_time": "0:23:19", "remaining_time": "0:02:39", "throughput": 1962.66, "total_tokens": 2746984}
1943
+ {"current_steps": 9630, "total_steps": 10720, "loss": 0.6485, "lr": 1.5610340113886568e-06, "epoch": 17.96641791044776, "percentage": 89.83, "elapsed_time": "0:23:20", "remaining_time": "0:02:38", "throughput": 1962.77, "total_tokens": 2748552}
1944
+ {"current_steps": 9635, "total_steps": 10720, "loss": 0.8664, "lr": 1.5469075949712613e-06, "epoch": 17.975746268656717, "percentage": 89.88, "elapsed_time": "0:23:21", "remaining_time": "0:02:37", "throughput": 1962.79, "total_tokens": 2749992}
1945
+ {"current_steps": 9640, "total_steps": 10720, "loss": 0.5613, "lr": 1.5328433463086904e-06, "epoch": 17.98507462686567, "percentage": 89.93, "elapsed_time": "0:23:21", "remaining_time": "0:02:37", "throughput": 1962.79, "total_tokens": 2751336}
1946
+ {"current_steps": 9645, "total_steps": 10720, "loss": 0.6361, "lr": 1.5188413026814396e-06, "epoch": 17.994402985074625, "percentage": 89.97, "elapsed_time": "0:23:22", "remaining_time": "0:02:36", "throughput": 1962.88, "total_tokens": 2752808}
1947
+ {"current_steps": 9648, "total_steps": 10720, "eval_loss": 0.6946848630905151, "epoch": 18.0, "percentage": 90.0, "elapsed_time": "0:23:25", "remaining_time": "0:02:36", "throughput": 1959.31, "total_tokens": 2753536}
1948
+ {"current_steps": 9650, "total_steps": 10720, "loss": 0.6194, "lr": 1.5049015012051055e-06, "epoch": 18.003731343283583, "percentage": 90.02, "elapsed_time": "0:23:26", "remaining_time": "0:02:35", "throughput": 1957.7, "total_tokens": 2754048}
1949
+ {"current_steps": 9655, "total_steps": 10720, "loss": 0.6043, "lr": 1.4910239788303027e-06, "epoch": 18.013059701492537, "percentage": 90.07, "elapsed_time": "0:23:27", "remaining_time": "0:02:35", "throughput": 1957.7, "total_tokens": 2755424}
1950
+ {"current_steps": 9660, "total_steps": 10720, "loss": 0.703, "lr": 1.4772087723425559e-06, "epoch": 18.02238805970149, "percentage": 90.11, "elapsed_time": "0:23:28", "remaining_time": "0:02:34", "throughput": 1957.35, "total_tokens": 2756736}
1951
+ {"current_steps": 9665, "total_steps": 10720, "loss": 0.5608, "lr": 1.4634559183622193e-06, "epoch": 18.03171641791045, "percentage": 90.16, "elapsed_time": "0:23:29", "remaining_time": "0:02:33", "throughput": 1957.42, "total_tokens": 2758208}
1952
+ {"current_steps": 9670, "total_steps": 10720, "loss": 0.6379, "lr": 1.4497654533443538e-06, "epoch": 18.041044776119403, "percentage": 90.21, "elapsed_time": "0:23:29", "remaining_time": "0:02:33", "throughput": 1957.45, "total_tokens": 2759616}
1953
+ {"current_steps": 9675, "total_steps": 10720, "loss": 0.5381, "lr": 1.436137413578653e-06, "epoch": 18.050373134328357, "percentage": 90.25, "elapsed_time": "0:23:30", "remaining_time": "0:02:32", "throughput": 1957.43, "total_tokens": 2760960}
1954
+ {"current_steps": 9680, "total_steps": 10720, "loss": 0.756, "lr": 1.4225718351893374e-06, "epoch": 18.059701492537314, "percentage": 90.3, "elapsed_time": "0:23:31", "remaining_time": "0:02:31", "throughput": 1957.37, "total_tokens": 2762240}
1955
+ {"current_steps": 9685, "total_steps": 10720, "loss": 0.6621, "lr": 1.4090687541350538e-06, "epoch": 18.06902985074627, "percentage": 90.35, "elapsed_time": "0:23:31", "remaining_time": "0:02:30", "throughput": 1957.45, "total_tokens": 2763712}
1956
+ {"current_steps": 9690, "total_steps": 10720, "loss": 0.5612, "lr": 1.3956282062087933e-06, "epoch": 18.078358208955223, "percentage": 90.39, "elapsed_time": "0:23:32", "remaining_time": "0:02:30", "throughput": 1957.24, "total_tokens": 2765056}
1957
+ {"current_steps": 9695, "total_steps": 10720, "loss": 0.9437, "lr": 1.3822502270377762e-06, "epoch": 18.08768656716418, "percentage": 90.44, "elapsed_time": "0:23:33", "remaining_time": "0:02:29", "throughput": 1957.28, "total_tokens": 2766528}
1958
+ {"current_steps": 9700, "total_steps": 10720, "loss": 0.6803, "lr": 1.368934852083384e-06, "epoch": 18.097014925373134, "percentage": 90.49, "elapsed_time": "0:23:34", "remaining_time": "0:02:28", "throughput": 1957.41, "total_tokens": 2768064}