rbelanec commited on
Commit
d5b8c3a
verified
1 Parent(s): 3ccd34b

Training in progress, step 24440

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +236 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a5bdc357c494a9d052707c56b75f04effa5dee28f4c4078a4b6af02172125e10
3
  size 798032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:976cb1b7e6f315eb8b5a5f29e403e479322ecbb6daad3a38d4fed1d14bbb63d1
3
  size 798032
trainer_log.jsonl CHANGED
@@ -4671,3 +4671,239 @@
4671
  {"current_steps": 23260, "total_steps": 24440, "loss": 0.1342, "lr": 3.5480699778832615e-07, "epoch": 19.034369885433716, "percentage": 95.17, "elapsed_time": "0:56:20", "remaining_time": "0:02:51", "throughput": 2373.62, "total_tokens": 8023888}
4672
  {"current_steps": 23265, "total_steps": 24440, "loss": 0.4486, "lr": 3.5181611590298924e-07, "epoch": 19.03846153846154, "percentage": 95.19, "elapsed_time": "0:56:21", "remaining_time": "0:02:50", "throughput": 2373.64, "total_tokens": 8025616}
4673
  {"current_steps": 23270, "total_steps": 24440, "loss": 0.1787, "lr": 3.4883780407441726e-07, "epoch": 19.04255319148936, "percentage": 95.21, "elapsed_time": "0:56:21", "remaining_time": "0:02:50", "throughput": 2373.68, "total_tokens": 8027408}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4671
  {"current_steps": 23260, "total_steps": 24440, "loss": 0.1342, "lr": 3.5480699778832615e-07, "epoch": 19.034369885433716, "percentage": 95.17, "elapsed_time": "0:56:20", "remaining_time": "0:02:51", "throughput": 2373.62, "total_tokens": 8023888}
4672
  {"current_steps": 23265, "total_steps": 24440, "loss": 0.4486, "lr": 3.5181611590298924e-07, "epoch": 19.03846153846154, "percentage": 95.19, "elapsed_time": "0:56:21", "remaining_time": "0:02:50", "throughput": 2373.64, "total_tokens": 8025616}
4673
  {"current_steps": 23270, "total_steps": 24440, "loss": 0.1787, "lr": 3.4883780407441726e-07, "epoch": 19.04255319148936, "percentage": 95.21, "elapsed_time": "0:56:21", "remaining_time": "0:02:50", "throughput": 2373.68, "total_tokens": 8027408}
4674
+ {"current_steps": 23275, "total_steps": 24440, "loss": 0.1916, "lr": 3.458720638214841e-07, "epoch": 19.046644844517186, "percentage": 95.23, "elapsed_time": "0:56:22", "remaining_time": "0:02:49", "throughput": 2373.68, "total_tokens": 8029104}
4675
+ {"current_steps": 23280, "total_steps": 24440, "loss": 0.1084, "lr": 3.4291889665665766e-07, "epoch": 19.050736497545007, "percentage": 95.25, "elapsed_time": "0:56:23", "remaining_time": "0:02:48", "throughput": 2373.7, "total_tokens": 8030832}
4676
+ {"current_steps": 23285, "total_steps": 24440, "loss": 0.1382, "lr": 3.399783040859944e-07, "epoch": 19.05482815057283, "percentage": 95.27, "elapsed_time": "0:56:23", "remaining_time": "0:02:47", "throughput": 2373.73, "total_tokens": 8032656}
4677
+ {"current_steps": 23290, "total_steps": 24440, "loss": 0.247, "lr": 3.37050287609128e-07, "epoch": 19.058919803600656, "percentage": 95.29, "elapsed_time": "0:56:24", "remaining_time": "0:02:47", "throughput": 2373.76, "total_tokens": 8034416}
4678
+ {"current_steps": 23295, "total_steps": 24440, "loss": 0.2609, "lr": 3.3413484871928635e-07, "epoch": 19.063011456628477, "percentage": 95.32, "elapsed_time": "0:56:25", "remaining_time": "0:02:46", "throughput": 2373.79, "total_tokens": 8036176}
4679
+ {"current_steps": 23300, "total_steps": 24440, "loss": 0.1507, "lr": 3.312319889032883e-07, "epoch": 19.0671031096563, "percentage": 95.34, "elapsed_time": "0:56:26", "remaining_time": "0:02:45", "throughput": 2373.8, "total_tokens": 8037840}
4680
+ {"current_steps": 23305, "total_steps": 24440, "loss": 0.4038, "lr": 3.2834170964152755e-07, "epoch": 19.071194762684126, "percentage": 95.36, "elapsed_time": "0:56:26", "remaining_time": "0:02:44", "throughput": 2373.83, "total_tokens": 8039600}
4681
+ {"current_steps": 23310, "total_steps": 24440, "loss": 0.1246, "lr": 3.2546401240798607e-07, "epoch": 19.075286415711947, "percentage": 95.38, "elapsed_time": "0:56:27", "remaining_time": "0:02:44", "throughput": 2373.84, "total_tokens": 8041296}
4682
+ {"current_steps": 23315, "total_steps": 24440, "loss": 0.1802, "lr": 3.2259889867022886e-07, "epoch": 19.07937806873977, "percentage": 95.4, "elapsed_time": "0:56:28", "remaining_time": "0:02:43", "throughput": 2373.86, "total_tokens": 8043024}
4683
+ {"current_steps": 23320, "total_steps": 24440, "loss": 0.3508, "lr": 3.19746369889401e-07, "epoch": 19.083469721767592, "percentage": 95.42, "elapsed_time": "0:56:28", "remaining_time": "0:02:42", "throughput": 2373.89, "total_tokens": 8044752}
4684
+ {"current_steps": 23325, "total_steps": 24440, "loss": 0.4152, "lr": 3.1690642752023605e-07, "epoch": 19.087561374795417, "percentage": 95.44, "elapsed_time": "0:56:29", "remaining_time": "0:02:42", "throughput": 2373.9, "total_tokens": 8046448}
4685
+ {"current_steps": 23330, "total_steps": 24440, "loss": 0.1328, "lr": 3.140790730110449e-07, "epoch": 19.09165302782324, "percentage": 95.46, "elapsed_time": "0:56:30", "remaining_time": "0:02:41", "throughput": 2373.94, "total_tokens": 8048240}
4686
+ {"current_steps": 23335, "total_steps": 24440, "loss": 0.1662, "lr": 3.112643078037214e-07, "epoch": 19.095744680851062, "percentage": 95.48, "elapsed_time": "0:56:30", "remaining_time": "0:02:40", "throughput": 2373.94, "total_tokens": 8049904}
4687
+ {"current_steps": 23340, "total_steps": 24440, "loss": 0.2905, "lr": 3.084621333337312e-07, "epoch": 19.099836333878887, "percentage": 95.5, "elapsed_time": "0:56:31", "remaining_time": "0:02:39", "throughput": 2373.96, "total_tokens": 8051632}
4688
+ {"current_steps": 23345, "total_steps": 24440, "loss": 0.4205, "lr": 3.0567255103012557e-07, "epoch": 19.10392798690671, "percentage": 95.52, "elapsed_time": "0:56:32", "remaining_time": "0:02:39", "throughput": 2373.98, "total_tokens": 8053424}
4689
+ {"current_steps": 23350, "total_steps": 24440, "loss": 0.1713, "lr": 3.0289556231553607e-07, "epoch": 19.108019639934533, "percentage": 95.54, "elapsed_time": "0:56:33", "remaining_time": "0:02:38", "throughput": 2373.98, "total_tokens": 8055088}
4690
+ {"current_steps": 23355, "total_steps": 24440, "loss": 0.0721, "lr": 3.001311686061686e-07, "epoch": 19.112111292962357, "percentage": 95.56, "elapsed_time": "0:56:33", "remaining_time": "0:02:37", "throughput": 2374.0, "total_tokens": 8056816}
4691
+ {"current_steps": 23360, "total_steps": 24440, "loss": 0.1547, "lr": 2.973793713118039e-07, "epoch": 19.11620294599018, "percentage": 95.58, "elapsed_time": "0:56:34", "remaining_time": "0:02:36", "throughput": 2374.02, "total_tokens": 8058544}
4692
+ {"current_steps": 23365, "total_steps": 24440, "loss": 0.1836, "lr": 2.9464017183579995e-07, "epoch": 19.120294599018003, "percentage": 95.6, "elapsed_time": "0:56:35", "remaining_time": "0:02:36", "throughput": 2374.05, "total_tokens": 8060304}
4693
+ {"current_steps": 23370, "total_steps": 24440, "loss": 0.2951, "lr": 2.91913571575092e-07, "epoch": 19.124386252045827, "percentage": 95.62, "elapsed_time": "0:56:35", "remaining_time": "0:02:35", "throughput": 2374.07, "total_tokens": 8062032}
4694
+ {"current_steps": 23375, "total_steps": 24440, "loss": 0.1345, "lr": 2.8919957192019007e-07, "epoch": 19.128477905073648, "percentage": 95.64, "elapsed_time": "0:56:36", "remaining_time": "0:02:34", "throughput": 2374.09, "total_tokens": 8063728}
4695
+ {"current_steps": 23380, "total_steps": 24440, "loss": 0.1553, "lr": 2.864981742551759e-07, "epoch": 19.132569558101473, "percentage": 95.66, "elapsed_time": "0:56:37", "remaining_time": "0:02:34", "throughput": 2374.11, "total_tokens": 8065456}
4696
+ {"current_steps": 23385, "total_steps": 24440, "loss": 0.2916, "lr": 2.838093799577085e-07, "epoch": 19.136661211129297, "percentage": 95.68, "elapsed_time": "0:56:37", "remaining_time": "0:02:33", "throughput": 2374.13, "total_tokens": 8067248}
4697
+ {"current_steps": 23390, "total_steps": 24440, "loss": 0.2322, "lr": 2.811331903990133e-07, "epoch": 19.14075286415712, "percentage": 95.7, "elapsed_time": "0:56:38", "remaining_time": "0:02:32", "throughput": 2374.16, "total_tokens": 8069104}
4698
+ {"current_steps": 23395, "total_steps": 24440, "loss": 0.2848, "lr": 2.7846960694389024e-07, "epoch": 19.144844517184943, "percentage": 95.72, "elapsed_time": "0:56:39", "remaining_time": "0:02:31", "throughput": 2374.19, "total_tokens": 8070992}
4699
+ {"current_steps": 23400, "total_steps": 24440, "loss": 0.0714, "lr": 2.758186309507138e-07, "epoch": 19.148936170212767, "percentage": 95.74, "elapsed_time": "0:56:40", "remaining_time": "0:02:31", "throughput": 2374.2, "total_tokens": 8072688}
4700
+ {"current_steps": 23405, "total_steps": 24440, "loss": 0.2163, "lr": 2.731802637714276e-07, "epoch": 19.15302782324059, "percentage": 95.77, "elapsed_time": "0:56:40", "remaining_time": "0:02:30", "throughput": 2374.21, "total_tokens": 8074448}
4701
+ {"current_steps": 23410, "total_steps": 24440, "loss": 0.1171, "lr": 2.705545067515386e-07, "epoch": 19.157119476268413, "percentage": 95.79, "elapsed_time": "0:56:41", "remaining_time": "0:02:29", "throughput": 2374.25, "total_tokens": 8076304}
4702
+ {"current_steps": 23415, "total_steps": 24440, "loss": 0.297, "lr": 2.679413612301285e-07, "epoch": 19.161211129296234, "percentage": 95.81, "elapsed_time": "0:56:42", "remaining_time": "0:02:28", "throughput": 2374.28, "total_tokens": 8078064}
4703
+ {"current_steps": 23420, "total_steps": 24440, "loss": 0.1647, "lr": 2.6534082853985063e-07, "epoch": 19.16530278232406, "percentage": 95.83, "elapsed_time": "0:56:43", "remaining_time": "0:02:28", "throughput": 2374.32, "total_tokens": 8079856}
4704
+ {"current_steps": 23425, "total_steps": 24440, "loss": 0.1857, "lr": 2.6275291000691646e-07, "epoch": 19.169394435351883, "percentage": 95.85, "elapsed_time": "0:56:43", "remaining_time": "0:02:27", "throughput": 2374.37, "total_tokens": 8081648}
4705
+ {"current_steps": 23430, "total_steps": 24440, "loss": 0.1146, "lr": 2.601776069511147e-07, "epoch": 19.173486088379704, "percentage": 95.87, "elapsed_time": "0:56:44", "remaining_time": "0:02:26", "throughput": 2374.39, "total_tokens": 8083472}
4706
+ {"current_steps": 23435, "total_steps": 24440, "loss": 0.1975, "lr": 2.576149206857975e-07, "epoch": 19.17757774140753, "percentage": 95.89, "elapsed_time": "0:56:45", "remaining_time": "0:02:26", "throughput": 2374.42, "total_tokens": 8085200}
4707
+ {"current_steps": 23440, "total_steps": 24440, "loss": 0.2644, "lr": 2.55064852517875e-07, "epoch": 19.181669394435353, "percentage": 95.91, "elapsed_time": "0:56:45", "remaining_time": "0:02:25", "throughput": 2374.41, "total_tokens": 8086864}
4708
+ {"current_steps": 23445, "total_steps": 24440, "loss": 0.1771, "lr": 2.525274037478348e-07, "epoch": 19.185761047463174, "percentage": 95.93, "elapsed_time": "0:56:46", "remaining_time": "0:02:24", "throughput": 2374.44, "total_tokens": 8088592}
4709
+ {"current_steps": 23450, "total_steps": 24440, "loss": 0.1166, "lr": 2.500025756697166e-07, "epoch": 19.189852700491, "percentage": 95.95, "elapsed_time": "0:56:47", "remaining_time": "0:02:23", "throughput": 2374.45, "total_tokens": 8090288}
4710
+ {"current_steps": 23455, "total_steps": 24440, "loss": 0.2147, "lr": 2.47490369571135e-07, "epoch": 19.193944353518823, "percentage": 95.97, "elapsed_time": "0:56:47", "remaining_time": "0:02:23", "throughput": 2374.46, "total_tokens": 8091984}
4711
+ {"current_steps": 23460, "total_steps": 24440, "loss": 0.3044, "lr": 2.449907867332596e-07, "epoch": 19.198036006546644, "percentage": 95.99, "elapsed_time": "0:56:48", "remaining_time": "0:02:22", "throughput": 2374.46, "total_tokens": 8093648}
4712
+ {"current_steps": 23465, "total_steps": 24440, "loss": 0.1909, "lr": 2.4250382843082343e-07, "epoch": 19.20212765957447, "percentage": 96.01, "elapsed_time": "0:56:49", "remaining_time": "0:02:21", "throughput": 2374.5, "total_tokens": 8095408}
4713
+ {"current_steps": 23470, "total_steps": 24440, "loss": 0.2667, "lr": 2.4002949593212866e-07, "epoch": 19.20621931260229, "percentage": 96.03, "elapsed_time": "0:56:50", "remaining_time": "0:02:20", "throughput": 2374.53, "total_tokens": 8097168}
4714
+ {"current_steps": 23475, "total_steps": 24440, "loss": 0.1498, "lr": 2.37567790499027e-07, "epoch": 19.210310965630114, "percentage": 96.05, "elapsed_time": "0:56:50", "remaining_time": "0:02:20", "throughput": 2374.55, "total_tokens": 8098864}
4715
+ {"current_steps": 23480, "total_steps": 24440, "loss": 0.1859, "lr": 2.3511871338693926e-07, "epoch": 19.21440261865794, "percentage": 96.07, "elapsed_time": "0:56:51", "remaining_time": "0:02:19", "throughput": 2374.58, "total_tokens": 8100688}
4716
+ {"current_steps": 23485, "total_steps": 24440, "loss": 0.1799, "lr": 2.3268226584484408e-07, "epoch": 19.21849427168576, "percentage": 96.09, "elapsed_time": "0:56:52", "remaining_time": "0:02:18", "throughput": 2374.59, "total_tokens": 8102384}
4717
+ {"current_steps": 23490, "total_steps": 24440, "loss": 0.1386, "lr": 2.3025844911527815e-07, "epoch": 19.222585924713584, "percentage": 96.11, "elapsed_time": "0:56:52", "remaining_time": "0:02:18", "throughput": 2374.63, "total_tokens": 8104176}
4718
+ {"current_steps": 23495, "total_steps": 24440, "loss": 0.1272, "lr": 2.2784726443433602e-07, "epoch": 19.22667757774141, "percentage": 96.13, "elapsed_time": "0:56:53", "remaining_time": "0:02:17", "throughput": 2374.68, "total_tokens": 8106000}
4719
+ {"current_steps": 23500, "total_steps": 24440, "loss": 0.1379, "lr": 2.2544871303167015e-07, "epoch": 19.23076923076923, "percentage": 96.15, "elapsed_time": "0:56:54", "remaining_time": "0:02:16", "throughput": 2374.69, "total_tokens": 8107664}
4720
+ {"current_steps": 23505, "total_steps": 24440, "loss": 0.1692, "lr": 2.230627961304993e-07, "epoch": 19.234860883797054, "percentage": 96.17, "elapsed_time": "0:56:54", "remaining_time": "0:02:15", "throughput": 2374.71, "total_tokens": 8109392}
4721
+ {"current_steps": 23510, "total_steps": 24440, "loss": 0.1943, "lr": 2.2068951494758072e-07, "epoch": 19.238952536824875, "percentage": 96.19, "elapsed_time": "0:56:55", "remaining_time": "0:02:15", "throughput": 2374.73, "total_tokens": 8111088}
4722
+ {"current_steps": 23515, "total_steps": 24440, "loss": 0.1976, "lr": 2.1832887069324348e-07, "epoch": 19.2430441898527, "percentage": 96.22, "elapsed_time": "0:56:56", "remaining_time": "0:02:14", "throughput": 2374.72, "total_tokens": 8112720}
4723
+ {"current_steps": 23520, "total_steps": 24440, "loss": 0.2451, "lr": 2.1598086457136625e-07, "epoch": 19.247135842880525, "percentage": 96.24, "elapsed_time": "0:56:56", "remaining_time": "0:02:13", "throughput": 2374.75, "total_tokens": 8114448}
4724
+ {"current_steps": 23525, "total_steps": 24440, "loss": 0.3983, "lr": 2.1364549777938004e-07, "epoch": 19.251227495908346, "percentage": 96.26, "elapsed_time": "0:56:57", "remaining_time": "0:02:12", "throughput": 2374.75, "total_tokens": 8116112}
4725
+ {"current_steps": 23530, "total_steps": 24440, "loss": 0.2904, "lr": 2.113227715082766e-07, "epoch": 19.25531914893617, "percentage": 96.28, "elapsed_time": "0:56:58", "remaining_time": "0:02:12", "throughput": 2374.76, "total_tokens": 8117808}
4726
+ {"current_steps": 23535, "total_steps": 24440, "loss": 0.3651, "lr": 2.0901268694259734e-07, "epoch": 19.259410801963995, "percentage": 96.3, "elapsed_time": "0:56:59", "remaining_time": "0:02:11", "throughput": 2374.78, "total_tokens": 8119504}
4727
+ {"current_steps": 23540, "total_steps": 24440, "loss": 0.2713, "lr": 2.0671524526043317e-07, "epoch": 19.263502454991816, "percentage": 96.32, "elapsed_time": "0:56:59", "remaining_time": "0:02:10", "throughput": 2374.84, "total_tokens": 8121360}
4728
+ {"current_steps": 23545, "total_steps": 24440, "loss": 0.1901, "lr": 2.0443044763343577e-07, "epoch": 19.26759410801964, "percentage": 96.34, "elapsed_time": "0:57:00", "remaining_time": "0:02:10", "throughput": 2374.86, "total_tokens": 8123120}
4729
+ {"current_steps": 23550, "total_steps": 24440, "loss": 0.2723, "lr": 2.0215829522680084e-07, "epoch": 19.271685761047465, "percentage": 96.36, "elapsed_time": "0:57:01", "remaining_time": "0:02:09", "throughput": 2374.89, "total_tokens": 8124848}
4730
+ {"current_steps": 23555, "total_steps": 24440, "loss": 0.2703, "lr": 1.9989878919928206e-07, "epoch": 19.275777414075286, "percentage": 96.38, "elapsed_time": "0:57:01", "remaining_time": "0:02:08", "throughput": 2374.91, "total_tokens": 8126576}
4731
+ {"current_steps": 23560, "total_steps": 24440, "loss": 0.3243, "lr": 1.9765193070317712e-07, "epoch": 19.27986906710311, "percentage": 96.4, "elapsed_time": "0:57:02", "remaining_time": "0:02:07", "throughput": 2374.93, "total_tokens": 8128336}
4732
+ {"current_steps": 23565, "total_steps": 24440, "loss": 0.2802, "lr": 1.9541772088433607e-07, "epoch": 19.28396072013093, "percentage": 96.42, "elapsed_time": "0:57:03", "remaining_time": "0:02:07", "throughput": 2374.92, "total_tokens": 8129968}
4733
+ {"current_steps": 23570, "total_steps": 24440, "loss": 0.1618, "lr": 1.9319616088215864e-07, "epoch": 19.288052373158756, "percentage": 96.44, "elapsed_time": "0:57:03", "remaining_time": "0:02:06", "throughput": 2374.94, "total_tokens": 8131696}
4734
+ {"current_steps": 23575, "total_steps": 24440, "loss": 0.323, "lr": 1.9098725182959688e-07, "epoch": 19.29214402618658, "percentage": 96.46, "elapsed_time": "0:57:04", "remaining_time": "0:02:05", "throughput": 2374.96, "total_tokens": 8133424}
4735
+ {"current_steps": 23580, "total_steps": 24440, "loss": 0.1071, "lr": 1.8879099485314688e-07, "epoch": 19.2962356792144, "percentage": 96.48, "elapsed_time": "0:57:05", "remaining_time": "0:02:04", "throughput": 2374.99, "total_tokens": 8135184}
4736
+ {"current_steps": 23585, "total_steps": 24440, "loss": 0.1308, "lr": 1.866073910728544e-07, "epoch": 19.300327332242226, "percentage": 96.5, "elapsed_time": "0:57:06", "remaining_time": "0:02:04", "throughput": 2375.03, "total_tokens": 8136944}
4737
+ {"current_steps": 23590, "total_steps": 24440, "loss": 0.2883, "lr": 1.8443644160230923e-07, "epoch": 19.30441898527005, "percentage": 96.52, "elapsed_time": "0:57:06", "remaining_time": "0:02:03", "throughput": 2375.05, "total_tokens": 8138672}
4738
+ {"current_steps": 23595, "total_steps": 24440, "loss": 0.1724, "lr": 1.8227814754865068e-07, "epoch": 19.30851063829787, "percentage": 96.54, "elapsed_time": "0:57:07", "remaining_time": "0:02:02", "throughput": 2375.07, "total_tokens": 8140432}
4739
+ {"current_steps": 23600, "total_steps": 24440, "loss": 0.3135, "lr": 1.8013251001256503e-07, "epoch": 19.312602291325696, "percentage": 96.56, "elapsed_time": "0:57:08", "remaining_time": "0:02:02", "throughput": 2375.08, "total_tokens": 8142096}
4740
+ {"current_steps": 23605, "total_steps": 24440, "loss": 0.3923, "lr": 1.7799953008827975e-07, "epoch": 19.316693944353517, "percentage": 96.58, "elapsed_time": "0:57:08", "remaining_time": "0:02:01", "throughput": 2375.08, "total_tokens": 8143760}
4741
+ {"current_steps": 23610, "total_steps": 24440, "loss": 0.2273, "lr": 1.7587920886357468e-07, "epoch": 19.32078559738134, "percentage": 96.6, "elapsed_time": "0:57:09", "remaining_time": "0:02:00", "throughput": 2375.09, "total_tokens": 8145424}
4742
+ {"current_steps": 23615, "total_steps": 24440, "loss": 0.2819, "lr": 1.7377154741976542e-07, "epoch": 19.324877250409166, "percentage": 96.62, "elapsed_time": "0:57:10", "remaining_time": "0:01:59", "throughput": 2375.09, "total_tokens": 8147088}
4743
+ {"current_steps": 23620, "total_steps": 24440, "loss": 0.1426, "lr": 1.716765468317144e-07, "epoch": 19.328968903436987, "percentage": 96.64, "elapsed_time": "0:57:10", "remaining_time": "0:01:59", "throughput": 2375.12, "total_tokens": 8148848}
4744
+ {"current_steps": 23625, "total_steps": 24440, "loss": 0.3019, "lr": 1.6959420816783089e-07, "epoch": 19.33306055646481, "percentage": 96.67, "elapsed_time": "0:57:11", "remaining_time": "0:01:58", "throughput": 2375.17, "total_tokens": 8150640}
4745
+ {"current_steps": 23630, "total_steps": 24440, "loss": 0.1238, "lr": 1.6752453249005984e-07, "epoch": 19.337152209492636, "percentage": 96.69, "elapsed_time": "0:57:12", "remaining_time": "0:01:57", "throughput": 2375.18, "total_tokens": 8152336}
4746
+ {"current_steps": 23635, "total_steps": 24440, "loss": 0.2059, "lr": 1.6546752085389861e-07, "epoch": 19.341243862520457, "percentage": 96.71, "elapsed_time": "0:57:13", "remaining_time": "0:01:56", "throughput": 2375.2, "total_tokens": 8154096}
4747
+ {"current_steps": 23640, "total_steps": 24440, "loss": 0.2306, "lr": 1.6342317430837472e-07, "epoch": 19.345335515548282, "percentage": 96.73, "elapsed_time": "0:57:13", "remaining_time": "0:01:56", "throughput": 2375.2, "total_tokens": 8155760}
4748
+ {"current_steps": 23645, "total_steps": 24440, "loss": 0.2202, "lr": 1.6139149389606256e-07, "epoch": 19.349427168576106, "percentage": 96.75, "elapsed_time": "0:57:14", "remaining_time": "0:01:55", "throughput": 2375.21, "total_tokens": 8157424}
4749
+ {"current_steps": 23650, "total_steps": 24440, "loss": 0.1626, "lr": 1.5937248065307775e-07, "epoch": 19.353518821603927, "percentage": 96.77, "elapsed_time": "0:57:15", "remaining_time": "0:01:54", "throughput": 2375.21, "total_tokens": 8159056}
4750
+ {"current_steps": 23655, "total_steps": 24440, "loss": 0.1561, "lr": 1.5736613560907444e-07, "epoch": 19.357610474631752, "percentage": 96.79, "elapsed_time": "0:57:15", "remaining_time": "0:01:54", "throughput": 2375.24, "total_tokens": 8160816}
4751
+ {"current_steps": 23660, "total_steps": 24440, "loss": 0.2603, "lr": 1.55372459787248e-07, "epoch": 19.361702127659573, "percentage": 96.81, "elapsed_time": "0:57:16", "remaining_time": "0:01:53", "throughput": 2375.23, "total_tokens": 8162448}
4752
+ {"current_steps": 23665, "total_steps": 24440, "loss": 0.1405, "lr": 1.5339145420433244e-07, "epoch": 19.365793780687397, "percentage": 96.83, "elapsed_time": "0:57:17", "remaining_time": "0:01:52", "throughput": 2375.25, "total_tokens": 8164144}
4753
+ {"current_steps": 23670, "total_steps": 24440, "loss": 0.172, "lr": 1.5142311987059177e-07, "epoch": 19.369885433715222, "percentage": 96.85, "elapsed_time": "0:57:17", "remaining_time": "0:01:51", "throughput": 2375.28, "total_tokens": 8165904}
4754
+ {"current_steps": 23675, "total_steps": 24440, "loss": 0.1503, "lr": 1.4946745778984251e-07, "epoch": 19.373977086743043, "percentage": 96.87, "elapsed_time": "0:57:18", "remaining_time": "0:01:51", "throughput": 2375.32, "total_tokens": 8167728}
4755
+ {"current_steps": 23680, "total_steps": 24440, "loss": 0.1914, "lr": 1.475244689594285e-07, "epoch": 19.378068739770868, "percentage": 96.89, "elapsed_time": "0:57:19", "remaining_time": "0:01:50", "throughput": 2375.33, "total_tokens": 8169424}
4756
+ {"current_steps": 23685, "total_steps": 24440, "loss": 0.2429, "lr": 1.4559415437023493e-07, "epoch": 19.382160392798692, "percentage": 96.91, "elapsed_time": "0:57:19", "remaining_time": "0:01:49", "throughput": 2375.34, "total_tokens": 8171088}
4757
+ {"current_steps": 23690, "total_steps": 24440, "loss": 0.1937, "lr": 1.4367651500667712e-07, "epoch": 19.386252045826513, "percentage": 96.93, "elapsed_time": "0:57:20", "remaining_time": "0:01:48", "throughput": 2375.34, "total_tokens": 8172752}
4758
+ {"current_steps": 23695, "total_steps": 24440, "loss": 0.1395, "lr": 1.4177155184671443e-07, "epoch": 19.390343698854338, "percentage": 96.95, "elapsed_time": "0:57:21", "remaining_time": "0:01:48", "throughput": 2375.34, "total_tokens": 8174416}
4759
+ {"current_steps": 23700, "total_steps": 24440, "loss": 0.1375, "lr": 1.398792658618392e-07, "epoch": 19.39443535188216, "percentage": 96.97, "elapsed_time": "0:57:22", "remaining_time": "0:01:47", "throughput": 2375.37, "total_tokens": 8176144}
4760
+ {"current_steps": 23705, "total_steps": 24440, "loss": 0.2395, "lr": 1.379996580170767e-07, "epoch": 19.398527004909983, "percentage": 96.99, "elapsed_time": "0:57:22", "remaining_time": "0:01:46", "throughput": 2375.36, "total_tokens": 8177776}
4761
+ {"current_steps": 23710, "total_steps": 24440, "loss": 0.2143, "lr": 1.3613272927098796e-07, "epoch": 19.402618657937808, "percentage": 97.01, "elapsed_time": "0:57:23", "remaining_time": "0:01:46", "throughput": 2375.4, "total_tokens": 8179568}
4762
+ {"current_steps": 23715, "total_steps": 24440, "loss": 0.2919, "lr": 1.3427848057566694e-07, "epoch": 19.40671031096563, "percentage": 97.03, "elapsed_time": "0:57:24", "remaining_time": "0:01:45", "throughput": 2375.42, "total_tokens": 8181264}
4763
+ {"current_steps": 23720, "total_steps": 24440, "loss": 0.0887, "lr": 1.3243691287673777e-07, "epoch": 19.410801963993453, "percentage": 97.05, "elapsed_time": "0:57:24", "remaining_time": "0:01:44", "throughput": 2375.4, "total_tokens": 8182864}
4764
+ {"current_steps": 23725, "total_steps": 24440, "loss": 0.2428, "lr": 1.306080271133686e-07, "epoch": 19.414893617021278, "percentage": 97.07, "elapsed_time": "0:57:25", "remaining_time": "0:01:43", "throughput": 2375.42, "total_tokens": 8184592}
4765
+ {"current_steps": 23730, "total_steps": 24440, "loss": 0.2692, "lr": 1.2879182421824675e-07, "epoch": 19.4189852700491, "percentage": 97.09, "elapsed_time": "0:57:26", "remaining_time": "0:01:43", "throughput": 2375.44, "total_tokens": 8186288}
4766
+ {"current_steps": 23735, "total_steps": 24440, "loss": 0.2144, "lr": 1.2698830511760072e-07, "epoch": 19.423076923076923, "percentage": 97.12, "elapsed_time": "0:57:26", "remaining_time": "0:01:42", "throughput": 2375.46, "total_tokens": 8188112}
4767
+ {"current_steps": 23740, "total_steps": 24440, "loss": 0.1416, "lr": 1.251974707311865e-07, "epoch": 19.427168576104748, "percentage": 97.14, "elapsed_time": "0:57:27", "remaining_time": "0:01:41", "throughput": 2375.5, "total_tokens": 8189872}
4768
+ {"current_steps": 23745, "total_steps": 24440, "loss": 0.3567, "lr": 1.2341932197229012e-07, "epoch": 19.43126022913257, "percentage": 97.16, "elapsed_time": "0:57:28", "remaining_time": "0:01:40", "throughput": 2375.5, "total_tokens": 8191536}
4769
+ {"current_steps": 23750, "total_steps": 24440, "loss": 0.2945, "lr": 1.2165385974773348e-07, "epoch": 19.435351882160393, "percentage": 97.18, "elapsed_time": "0:57:29", "remaining_time": "0:01:40", "throughput": 2375.56, "total_tokens": 8193360}
4770
+ {"current_steps": 23755, "total_steps": 24440, "loss": 0.2171, "lr": 1.19901084957863e-07, "epoch": 19.439443535188214, "percentage": 97.2, "elapsed_time": "0:57:29", "remaining_time": "0:01:39", "throughput": 2375.58, "total_tokens": 8195088}
4771
+ {"current_steps": 23760, "total_steps": 24440, "loss": 0.2719, "lr": 1.1816099849656093e-07, "epoch": 19.44353518821604, "percentage": 97.22, "elapsed_time": "0:57:30", "remaining_time": "0:01:38", "throughput": 2375.6, "total_tokens": 8196816}
4772
+ {"current_steps": 23765, "total_steps": 24440, "loss": 0.2232, "lr": 1.1643360125123126e-07, "epoch": 19.447626841243864, "percentage": 97.24, "elapsed_time": "0:57:31", "remaining_time": "0:01:38", "throughput": 2375.63, "total_tokens": 8198544}
4773
+ {"current_steps": 23770, "total_steps": 24440, "loss": 0.2071, "lr": 1.14718894102811e-07, "epoch": 19.451718494271685, "percentage": 97.26, "elapsed_time": "0:57:31", "remaining_time": "0:01:37", "throughput": 2375.64, "total_tokens": 8200240}
4774
+ {"current_steps": 23775, "total_steps": 24440, "loss": 0.0994, "lr": 1.1301687792576454e-07, "epoch": 19.45581014729951, "percentage": 97.28, "elapsed_time": "0:57:32", "remaining_time": "0:01:36", "throughput": 2375.65, "total_tokens": 8201936}
4775
+ {"current_steps": 23780, "total_steps": 24440, "loss": 0.2416, "lr": 1.1132755358808367e-07, "epoch": 19.459901800327334, "percentage": 97.3, "elapsed_time": "0:57:33", "remaining_time": "0:01:35", "throughput": 2375.67, "total_tokens": 8203632}
4776
+ {"current_steps": 23785, "total_steps": 24440, "loss": 0.1386, "lr": 1.0965092195129034e-07, "epoch": 19.463993453355155, "percentage": 97.32, "elapsed_time": "0:57:33", "remaining_time": "0:01:35", "throughput": 2375.67, "total_tokens": 8205296}
4777
+ {"current_steps": 23790, "total_steps": 24440, "loss": 0.2921, "lr": 1.0798698387043115e-07, "epoch": 19.46808510638298, "percentage": 97.34, "elapsed_time": "0:57:34", "remaining_time": "0:01:34", "throughput": 2375.65, "total_tokens": 8206896}
4778
+ {"current_steps": 23795, "total_steps": 24440, "loss": 0.2191, "lr": 1.0633574019407733e-07, "epoch": 19.4721767594108, "percentage": 97.36, "elapsed_time": "0:57:35", "remaining_time": "0:01:33", "throughput": 2375.68, "total_tokens": 8208656}
4779
+ {"current_steps": 23800, "total_steps": 24440, "loss": 0.2403, "lr": 1.0469719176433024e-07, "epoch": 19.476268412438625, "percentage": 97.38, "elapsed_time": "0:57:36", "remaining_time": "0:01:32", "throughput": 2375.67, "total_tokens": 8210352}
4780
+ {"current_steps": 23805, "total_steps": 24440, "loss": 0.217, "lr": 1.0307133941681591e-07, "epoch": 19.48036006546645, "percentage": 97.4, "elapsed_time": "0:57:36", "remaining_time": "0:01:32", "throughput": 2375.67, "total_tokens": 8212016}
4781
+ {"current_steps": 23810, "total_steps": 24440, "loss": 0.3406, "lr": 1.0145818398068774e-07, "epoch": 19.48445171849427, "percentage": 97.42, "elapsed_time": "0:57:37", "remaining_time": "0:01:31", "throughput": 2375.68, "total_tokens": 8213712}
4782
+ {"current_steps": 23815, "total_steps": 24440, "loss": 0.2921, "lr": 9.985772627861545e-08, "epoch": 19.488543371522095, "percentage": 97.44, "elapsed_time": "0:57:38", "remaining_time": "0:01:30", "throughput": 2375.7, "total_tokens": 8215440}
4783
+ {"current_steps": 23820, "total_steps": 24440, "loss": 0.2413, "lr": 9.826996712679892e-08, "epoch": 19.49263502454992, "percentage": 97.46, "elapsed_time": "0:57:38", "remaining_time": "0:01:30", "throughput": 2375.72, "total_tokens": 8217136}
4784
+ {"current_steps": 23825, "total_steps": 24440, "loss": 0.2566, "lr": 9.669490733496544e-08, "epoch": 19.49672667757774, "percentage": 97.48, "elapsed_time": "0:57:39", "remaining_time": "0:01:29", "throughput": 2375.74, "total_tokens": 8218864}
4785
+ {"current_steps": 23830, "total_steps": 24440, "loss": 0.2406, "lr": 9.513254770636137e-08, "epoch": 19.500818330605565, "percentage": 97.5, "elapsed_time": "0:57:40", "remaining_time": "0:01:28", "throughput": 2375.76, "total_tokens": 8220592}
4786
+ {"current_steps": 23835, "total_steps": 24440, "loss": 0.1124, "lr": 9.358288903775769e-08, "epoch": 19.50490998363339, "percentage": 97.52, "elapsed_time": "0:57:40", "remaining_time": "0:01:27", "throughput": 2375.77, "total_tokens": 8222256}
4787
+ {"current_steps": 23840, "total_steps": 24440, "loss": 0.1653, "lr": 9.204593211944723e-08, "epoch": 19.50900163666121, "percentage": 97.55, "elapsed_time": "0:57:41", "remaining_time": "0:01:27", "throughput": 2375.77, "total_tokens": 8223920}
4788
+ {"current_steps": 23845, "total_steps": 24440, "loss": 0.1439, "lr": 9.052167773524744e-08, "epoch": 19.513093289689035, "percentage": 97.57, "elapsed_time": "0:57:42", "remaining_time": "0:01:26", "throughput": 2375.8, "total_tokens": 8225680}
4789
+ {"current_steps": 23850, "total_steps": 24440, "loss": 0.3408, "lr": 8.901012666249208e-08, "epoch": 19.517184942716856, "percentage": 97.59, "elapsed_time": "0:57:42", "remaining_time": "0:01:25", "throughput": 2375.82, "total_tokens": 8227408}
4790
+ {"current_steps": 23855, "total_steps": 24440, "loss": 0.1826, "lr": 8.751127967204509e-08, "epoch": 19.52127659574468, "percentage": 97.61, "elapsed_time": "0:57:43", "remaining_time": "0:01:24", "throughput": 2375.83, "total_tokens": 8229104}
4791
+ {"current_steps": 23860, "total_steps": 24440, "loss": 0.1465, "lr": 8.60251375282839e-08, "epoch": 19.525368248772505, "percentage": 97.63, "elapsed_time": "0:57:44", "remaining_time": "0:01:24", "throughput": 2375.86, "total_tokens": 8230864}
4792
+ {"current_steps": 23865, "total_steps": 24440, "loss": 0.2146, "lr": 8.455170098911059e-08, "epoch": 19.529459901800326, "percentage": 97.65, "elapsed_time": "0:57:45", "remaining_time": "0:01:23", "throughput": 2375.85, "total_tokens": 8232496}
4793
+ {"current_steps": 23870, "total_steps": 24440, "loss": 0.2724, "lr": 8.30909708059463e-08, "epoch": 19.53355155482815, "percentage": 97.67, "elapsed_time": "0:57:45", "remaining_time": "0:01:22", "throughput": 2375.85, "total_tokens": 8234160}
4794
+ {"current_steps": 23875, "total_steps": 24440, "loss": 0.1472, "lr": 8.164294772373681e-08, "epoch": 19.537643207855975, "percentage": 97.69, "elapsed_time": "0:57:46", "remaining_time": "0:01:22", "throughput": 2375.88, "total_tokens": 8235920}
4795
+ {"current_steps": 23880, "total_steps": 24440, "loss": 0.2734, "lr": 8.020763248093589e-08, "epoch": 19.541734860883796, "percentage": 97.71, "elapsed_time": "0:57:47", "remaining_time": "0:01:21", "throughput": 2375.9, "total_tokens": 8237648}
4796
+ {"current_steps": 23885, "total_steps": 24440, "loss": 0.4104, "lr": 7.878502580953296e-08, "epoch": 19.54582651391162, "percentage": 97.73, "elapsed_time": "0:57:47", "remaining_time": "0:01:20", "throughput": 2375.93, "total_tokens": 8239408}
4797
+ {"current_steps": 23890, "total_steps": 24440, "loss": 0.1764, "lr": 7.737512843501993e-08, "epoch": 19.54991816693944, "percentage": 97.75, "elapsed_time": "0:57:48", "remaining_time": "0:01:19", "throughput": 2375.95, "total_tokens": 8241136}
4798
+ {"current_steps": 23895, "total_steps": 24440, "loss": 0.2555, "lr": 7.597794107641887e-08, "epoch": 19.554009819967266, "percentage": 97.77, "elapsed_time": "0:57:49", "remaining_time": "0:01:19", "throughput": 2375.99, "total_tokens": 8242992}
4799
+ {"current_steps": 23900, "total_steps": 24440, "loss": 0.1819, "lr": 7.459346444626814e-08, "epoch": 19.55810147299509, "percentage": 97.79, "elapsed_time": "0:57:49", "remaining_time": "0:01:18", "throughput": 2376.0, "total_tokens": 8244688}
4800
+ {"current_steps": 23905, "total_steps": 24440, "loss": 0.1155, "lr": 7.322169925061684e-08, "epoch": 19.562193126022912, "percentage": 97.81, "elapsed_time": "0:57:50", "remaining_time": "0:01:17", "throughput": 2376.01, "total_tokens": 8246352}
4801
+ {"current_steps": 23910, "total_steps": 24440, "loss": 0.067, "lr": 7.186264618903871e-08, "epoch": 19.566284779050736, "percentage": 97.83, "elapsed_time": "0:57:51", "remaining_time": "0:01:16", "throughput": 2376.04, "total_tokens": 8248176}
4802
+ {"current_steps": 23915, "total_steps": 24440, "loss": 0.1778, "lr": 7.0516305954621e-08, "epoch": 19.57037643207856, "percentage": 97.85, "elapsed_time": "0:57:52", "remaining_time": "0:01:16", "throughput": 2376.06, "total_tokens": 8249904}
4803
+ {"current_steps": 23920, "total_steps": 24440, "loss": 0.2028, "lr": 6.918267923397282e-08, "epoch": 19.574468085106382, "percentage": 97.87, "elapsed_time": "0:57:52", "remaining_time": "0:01:15", "throughput": 2376.08, "total_tokens": 8251600}
4804
+ {"current_steps": 23925, "total_steps": 24440, "loss": 0.1085, "lr": 6.786176670721122e-08, "epoch": 19.578559738134206, "percentage": 97.89, "elapsed_time": "0:57:53", "remaining_time": "0:01:14", "throughput": 2376.12, "total_tokens": 8253456}
4805
+ {"current_steps": 23930, "total_steps": 24440, "loss": 0.3025, "lr": 6.655356904797517e-08, "epoch": 19.58265139116203, "percentage": 97.91, "elapsed_time": "0:57:54", "remaining_time": "0:01:14", "throughput": 2376.14, "total_tokens": 8255248}
4806
+ {"current_steps": 23935, "total_steps": 24440, "loss": 0.3047, "lr": 6.525808692341984e-08, "epoch": 19.586743044189852, "percentage": 97.93, "elapsed_time": "0:57:54", "remaining_time": "0:01:13", "throughput": 2376.15, "total_tokens": 8256944}
4807
+ {"current_steps": 23940, "total_steps": 24440, "loss": 0.1541, "lr": 6.3975320994214e-08, "epoch": 19.590834697217677, "percentage": 97.95, "elapsed_time": "0:57:55", "remaining_time": "0:01:12", "throughput": 2376.16, "total_tokens": 8258640}
4808
+ {"current_steps": 23945, "total_steps": 24440, "loss": 0.3445, "lr": 6.270527191453989e-08, "epoch": 19.594926350245498, "percentage": 97.97, "elapsed_time": "0:57:56", "remaining_time": "0:01:11", "throughput": 2376.18, "total_tokens": 8260336}
4809
+ {"current_steps": 23950, "total_steps": 24440, "loss": 0.1415, "lr": 6.14479403320961e-08, "epoch": 19.599018003273322, "percentage": 98.0, "elapsed_time": "0:57:57", "remaining_time": "0:01:11", "throughput": 2376.18, "total_tokens": 8262000}
4810
+ {"current_steps": 23955, "total_steps": 24440, "loss": 0.1017, "lr": 6.020332688809471e-08, "epoch": 19.603109656301147, "percentage": 98.02, "elapsed_time": "0:57:57", "remaining_time": "0:01:10", "throughput": 2376.22, "total_tokens": 8263792}
4811
+ {"current_steps": 23960, "total_steps": 24440, "loss": 0.1802, "lr": 5.897143221726409e-08, "epoch": 19.607201309328968, "percentage": 98.04, "elapsed_time": "0:57:58", "remaining_time": "0:01:09", "throughput": 2376.23, "total_tokens": 8265488}
4812
+ {"current_steps": 23965, "total_steps": 24440, "loss": 0.1995, "lr": 5.775225694784336e-08, "epoch": 19.611292962356792, "percentage": 98.06, "elapsed_time": "0:57:59", "remaining_time": "0:01:08", "throughput": 2376.27, "total_tokens": 8267248}
4813
+ {"current_steps": 23970, "total_steps": 24440, "loss": 0.2815, "lr": 5.654580170158519e-08, "epoch": 19.615384615384617, "percentage": 98.08, "elapsed_time": "0:57:59", "remaining_time": "0:01:08", "throughput": 2376.27, "total_tokens": 8268912}
4814
+ {"current_steps": 23975, "total_steps": 24440, "loss": 0.1424, "lr": 5.535206709376128e-08, "epoch": 19.619476268412438, "percentage": 98.1, "elapsed_time": "0:58:00", "remaining_time": "0:01:07", "throughput": 2376.3, "total_tokens": 8270672}
4815
+ {"current_steps": 23980, "total_steps": 24440, "loss": 0.269, "lr": 5.417105373314579e-08, "epoch": 19.623567921440262, "percentage": 98.12, "elapsed_time": "0:58:01", "remaining_time": "0:01:06", "throughput": 2376.31, "total_tokens": 8272368}
4816
+ {"current_steps": 23985, "total_steps": 24440, "loss": 0.28, "lr": 5.300276222203193e-08, "epoch": 19.627659574468083, "percentage": 98.14, "elapsed_time": "0:58:01", "remaining_time": "0:01:06", "throughput": 2376.34, "total_tokens": 8274128}
4817
+ {"current_steps": 23990, "total_steps": 24440, "loss": 0.3207, "lr": 5.184719315622644e-08, "epoch": 19.631751227495908, "percentage": 98.16, "elapsed_time": "0:58:02", "remaining_time": "0:01:05", "throughput": 2376.37, "total_tokens": 8275888}
4818
+ {"current_steps": 23995, "total_steps": 24440, "loss": 0.2904, "lr": 5.0704347125044016e-08, "epoch": 19.635842880523732, "percentage": 98.18, "elapsed_time": "0:58:03", "remaining_time": "0:01:04", "throughput": 2376.37, "total_tokens": 8277552}
4819
+ {"current_steps": 24000, "total_steps": 24440, "loss": 0.1892, "lr": 4.95742247113129e-08, "epoch": 19.639934533551553, "percentage": 98.2, "elapsed_time": "0:58:03", "remaining_time": "0:01:03", "throughput": 2376.4, "total_tokens": 8279312}
4820
+ {"current_steps": 24005, "total_steps": 24440, "loss": 0.2243, "lr": 4.845682649136929e-08, "epoch": 19.644026186579378, "percentage": 98.22, "elapsed_time": "0:58:04", "remaining_time": "0:01:03", "throughput": 2376.42, "total_tokens": 8281040}
4821
+ {"current_steps": 24010, "total_steps": 24440, "loss": 0.3631, "lr": 4.735215303506568e-08, "epoch": 19.648117839607202, "percentage": 98.24, "elapsed_time": "0:58:05", "remaining_time": "0:01:02", "throughput": 2376.48, "total_tokens": 8282896}
4822
+ {"current_steps": 24015, "total_steps": 24440, "loss": 0.128, "lr": 4.626020490575978e-08, "epoch": 19.652209492635023, "percentage": 98.26, "elapsed_time": "0:58:06", "remaining_time": "0:01:01", "throughput": 2376.51, "total_tokens": 8284656}
4823
+ {"current_steps": 24020, "total_steps": 24440, "loss": 0.3252, "lr": 4.518098266032278e-08, "epoch": 19.656301145662848, "percentage": 98.28, "elapsed_time": "0:58:06", "remaining_time": "0:01:00", "throughput": 2376.54, "total_tokens": 8286416}
4824
+ {"current_steps": 24025, "total_steps": 24440, "loss": 0.1845, "lr": 4.411448684913666e-08, "epoch": 19.660392798690673, "percentage": 98.3, "elapsed_time": "0:58:07", "remaining_time": "0:01:00", "throughput": 2376.56, "total_tokens": 8288144}
4825
+ {"current_steps": 24030, "total_steps": 24440, "loss": 0.3227, "lr": 4.306071801609412e-08, "epoch": 19.664484451718494, "percentage": 98.32, "elapsed_time": "0:58:08", "remaining_time": "0:00:59", "throughput": 2376.58, "total_tokens": 8289936}
4826
+ {"current_steps": 24035, "total_steps": 24440, "loss": 0.2639, "lr": 4.20196766985903e-08, "epoch": 19.668576104746318, "percentage": 98.34, "elapsed_time": "0:58:08", "remaining_time": "0:00:58", "throughput": 2376.63, "total_tokens": 8291760}
4827
+ {"current_steps": 24040, "total_steps": 24440, "loss": 0.1901, "lr": 4.0991363427536624e-08, "epoch": 19.67266775777414, "percentage": 98.36, "elapsed_time": "0:58:09", "remaining_time": "0:00:58", "throughput": 2376.66, "total_tokens": 8293520}
4828
+ {"current_steps": 24045, "total_steps": 24440, "loss": 0.0775, "lr": 3.99757787273497e-08, "epoch": 19.676759410801964, "percentage": 98.38, "elapsed_time": "0:58:10", "remaining_time": "0:00:57", "throughput": 2376.66, "total_tokens": 8295152}
4829
+ {"current_steps": 24050, "total_steps": 24440, "loss": 0.2015, "lr": 3.8972923115959684e-08, "epoch": 19.680851063829788, "percentage": 98.4, "elapsed_time": "0:58:10", "remaining_time": "0:00:56", "throughput": 2376.69, "total_tokens": 8296912}
4830
+ {"current_steps": 24055, "total_steps": 24440, "loss": 0.3074, "lr": 3.7982797104799126e-08, "epoch": 19.68494271685761, "percentage": 98.42, "elapsed_time": "0:58:11", "remaining_time": "0:00:55", "throughput": 2376.71, "total_tokens": 8298640}
4831
+ {"current_steps": 24060, "total_steps": 24440, "loss": 0.157, "lr": 3.700540119881413e-08, "epoch": 19.689034369885434, "percentage": 98.45, "elapsed_time": "0:58:12", "remaining_time": "0:00:55", "throughput": 2376.73, "total_tokens": 8300432}
4832
+ {"current_steps": 24065, "total_steps": 24440, "loss": 0.1627, "lr": 3.604073589645596e-08, "epoch": 19.69312602291326, "percentage": 98.47, "elapsed_time": "0:58:13", "remaining_time": "0:00:54", "throughput": 2376.76, "total_tokens": 8302160}
4833
+ {"current_steps": 24070, "total_steps": 24440, "loss": 0.2016, "lr": 3.5088801689678317e-08, "epoch": 19.69721767594108, "percentage": 98.49, "elapsed_time": "0:58:13", "remaining_time": "0:00:53", "throughput": 2376.77, "total_tokens": 8303856}
4834
+ {"current_steps": 24075, "total_steps": 24440, "loss": 0.3333, "lr": 3.4149599063953985e-08, "epoch": 19.701309328968904, "percentage": 98.51, "elapsed_time": "0:58:14", "remaining_time": "0:00:52", "throughput": 2376.79, "total_tokens": 8305584}
4835
+ {"current_steps": 24080, "total_steps": 24440, "loss": 0.1498, "lr": 3.322312849825537e-08, "epoch": 19.705400981996725, "percentage": 98.53, "elapsed_time": "0:58:15", "remaining_time": "0:00:52", "throughput": 2376.8, "total_tokens": 8307280}
4836
+ {"current_steps": 24085, "total_steps": 24440, "loss": 0.2085, "lr": 3.23093904650601e-08, "epoch": 19.70949263502455, "percentage": 98.55, "elapsed_time": "0:58:15", "remaining_time": "0:00:51", "throughput": 2376.8, "total_tokens": 8308944}
4837
+ {"current_steps": 24090, "total_steps": 24440, "loss": 0.2848, "lr": 3.1408385430356516e-08, "epoch": 19.713584288052374, "percentage": 98.57, "elapsed_time": "0:58:16", "remaining_time": "0:00:50", "throughput": 2376.83, "total_tokens": 8310672}
4838
+ {"current_steps": 24095, "total_steps": 24440, "loss": 0.3697, "lr": 3.052011385364095e-08, "epoch": 19.717675941080195, "percentage": 98.59, "elapsed_time": "0:58:17", "remaining_time": "0:00:50", "throughput": 2376.82, "total_tokens": 8312304}
4839
+ {"current_steps": 24100, "total_steps": 24440, "loss": 0.2071, "lr": 2.964457618790939e-08, "epoch": 19.72176759410802, "percentage": 98.61, "elapsed_time": "0:58:17", "remaining_time": "0:00:49", "throughput": 2376.84, "total_tokens": 8314000}
4840
+ {"current_steps": 24105, "total_steps": 24440, "loss": 0.1778, "lr": 2.878177287967132e-08, "epoch": 19.725859247135844, "percentage": 98.63, "elapsed_time": "0:58:18", "remaining_time": "0:00:48", "throughput": 2376.86, "total_tokens": 8315728}
4841
+ {"current_steps": 24110, "total_steps": 24440, "loss": 0.1238, "lr": 2.7931704368935884e-08, "epoch": 19.729950900163665, "percentage": 98.65, "elapsed_time": "0:58:19", "remaining_time": "0:00:47", "throughput": 2376.89, "total_tokens": 8317456}
4842
+ {"current_steps": 24115, "total_steps": 24440, "loss": 0.2606, "lr": 2.7094371089220194e-08, "epoch": 19.73404255319149, "percentage": 98.67, "elapsed_time": "0:58:19", "remaining_time": "0:00:47", "throughput": 2376.93, "total_tokens": 8319248}
4843
+ {"current_steps": 24120, "total_steps": 24440, "loss": 0.1526, "lr": 2.626977346754933e-08, "epoch": 19.738134206219314, "percentage": 98.69, "elapsed_time": "0:58:20", "remaining_time": "0:00:46", "throughput": 2376.95, "total_tokens": 8320976}
4844
+ {"current_steps": 24125, "total_steps": 24440, "loss": 0.0784, "lr": 2.5457911924445244e-08, "epoch": 19.742225859247135, "percentage": 98.71, "elapsed_time": "0:58:21", "remaining_time": "0:00:45", "throughput": 2376.98, "total_tokens": 8322704}
4845
+ {"current_steps": 24130, "total_steps": 24440, "loss": 0.1575, "lr": 2.4658786873946182e-08, "epoch": 19.74631751227496, "percentage": 98.73, "elapsed_time": "0:58:22", "remaining_time": "0:00:44", "throughput": 2377.0, "total_tokens": 8324432}
4846
+ {"current_steps": 24135, "total_steps": 24440, "loss": 0.2838, "lr": 2.3872398723587264e-08, "epoch": 19.75040916530278, "percentage": 98.75, "elapsed_time": "0:58:22", "remaining_time": "0:00:44", "throughput": 2377.0, "total_tokens": 8326160}
4847
+ {"current_steps": 24140, "total_steps": 24440, "loss": 0.223, "lr": 2.30987478744088e-08, "epoch": 19.754500818330605, "percentage": 98.77, "elapsed_time": "0:58:23", "remaining_time": "0:00:43", "throughput": 2377.03, "total_tokens": 8327952}
4848
+ {"current_steps": 24145, "total_steps": 24440, "loss": 0.3087, "lr": 2.2337834720959072e-08, "epoch": 19.75859247135843, "percentage": 98.79, "elapsed_time": "0:58:24", "remaining_time": "0:00:42", "throughput": 2377.05, "total_tokens": 8329680}
4849
+ {"current_steps": 24150, "total_steps": 24440, "loss": 0.274, "lr": 2.1589659651283233e-08, "epoch": 19.76268412438625, "percentage": 98.81, "elapsed_time": "0:58:24", "remaining_time": "0:00:42", "throughput": 2377.07, "total_tokens": 8331408}
4850
+ {"current_steps": 24155, "total_steps": 24440, "loss": 0.1449, "lr": 2.085422304693996e-08, "epoch": 19.766775777414075, "percentage": 98.83, "elapsed_time": "0:58:25", "remaining_time": "0:00:41", "throughput": 2377.1, "total_tokens": 8333200}
4851
+ {"current_steps": 24160, "total_steps": 24440, "loss": 0.081, "lr": 2.013152528298201e-08, "epoch": 19.7708674304419, "percentage": 98.85, "elapsed_time": "0:58:26", "remaining_time": "0:00:40", "throughput": 2377.13, "total_tokens": 8334960}
4852
+ {"current_steps": 24165, "total_steps": 24440, "loss": 0.2123, "lr": 1.942156672797568e-08, "epoch": 19.77495908346972, "percentage": 98.87, "elapsed_time": "0:58:27", "remaining_time": "0:00:39", "throughput": 2377.17, "total_tokens": 8336816}
4853
+ {"current_steps": 24170, "total_steps": 24440, "loss": 0.1672, "lr": 1.8724347743978575e-08, "epoch": 19.779050736497545, "percentage": 98.9, "elapsed_time": "0:58:27", "remaining_time": "0:00:39", "throughput": 2377.19, "total_tokens": 8338544}
4854
+ {"current_steps": 24175, "total_steps": 24440, "loss": 0.1988, "lr": 1.8039868686561823e-08, "epoch": 19.78314238952537, "percentage": 98.92, "elapsed_time": "0:58:28", "remaining_time": "0:00:38", "throughput": 2377.23, "total_tokens": 8340400}
4855
+ {"current_steps": 24180, "total_steps": 24440, "loss": 0.3647, "lr": 1.7368129904793416e-08, "epoch": 19.78723404255319, "percentage": 98.94, "elapsed_time": "0:58:29", "remaining_time": "0:00:37", "throughput": 2377.25, "total_tokens": 8342128}
4856
+ {"current_steps": 24185, "total_steps": 24440, "loss": 0.1807, "lr": 1.6709131741246552e-08, "epoch": 19.791325695581016, "percentage": 98.96, "elapsed_time": "0:58:29", "remaining_time": "0:00:37", "throughput": 2377.25, "total_tokens": 8343760}
4857
+ {"current_steps": 24190, "total_steps": 24440, "loss": 0.2171, "lr": 1.6062874531999615e-08, "epoch": 19.795417348608837, "percentage": 98.98, "elapsed_time": "0:58:30", "remaining_time": "0:00:36", "throughput": 2377.26, "total_tokens": 8345456}
4858
+ {"current_steps": 24195, "total_steps": 24440, "loss": 0.2906, "lr": 1.542935860662509e-08, "epoch": 19.79950900163666, "percentage": 99.0, "elapsed_time": "0:58:31", "remaining_time": "0:00:35", "throughput": 2377.26, "total_tokens": 8347120}
4859
+ {"current_steps": 24200, "total_steps": 24440, "loss": 0.1874, "lr": 1.480858428820342e-08, "epoch": 19.803600654664486, "percentage": 99.02, "elapsed_time": "0:58:31", "remaining_time": "0:00:34", "throughput": 2377.26, "total_tokens": 8348784}
4860
+ {"current_steps": 24205, "total_steps": 24440, "loss": 0.2518, "lr": 1.4200551893320257e-08, "epoch": 19.807692307692307, "percentage": 99.04, "elapsed_time": "0:58:32", "remaining_time": "0:00:34", "throughput": 2377.26, "total_tokens": 8350416}
4861
+ {"current_steps": 24210, "total_steps": 24440, "loss": 0.2987, "lr": 1.3605261732058117e-08, "epoch": 19.81178396072013, "percentage": 99.06, "elapsed_time": "0:58:33", "remaining_time": "0:00:33", "throughput": 2377.26, "total_tokens": 8352080}
4862
+ {"current_steps": 24215, "total_steps": 24440, "loss": 0.3453, "lr": 1.3022714108001932e-08, "epoch": 19.815875613747956, "percentage": 99.08, "elapsed_time": "0:58:34", "remaining_time": "0:00:32", "throughput": 2377.31, "total_tokens": 8353872}
4863
+ {"current_steps": 24220, "total_steps": 24440, "loss": 0.2875, "lr": 1.2452909318236283e-08, "epoch": 19.819967266775777, "percentage": 99.1, "elapsed_time": "0:58:34", "remaining_time": "0:00:31", "throughput": 2377.35, "total_tokens": 8355664}
4864
+ {"current_steps": 24225, "total_steps": 24440, "loss": 0.2735, "lr": 1.189584765335372e-08, "epoch": 19.8240589198036, "percentage": 99.12, "elapsed_time": "0:58:35", "remaining_time": "0:00:31", "throughput": 2377.35, "total_tokens": 8357392}
4865
+ {"current_steps": 24230, "total_steps": 24440, "loss": 0.1617, "lr": 1.1351529397440885e-08, "epoch": 19.828150572831422, "percentage": 99.14, "elapsed_time": "0:58:36", "remaining_time": "0:00:30", "throughput": 2377.38, "total_tokens": 8359120}
4866
+ {"current_steps": 24235, "total_steps": 24440, "loss": 0.1804, "lr": 1.0819954828089618e-08, "epoch": 19.832242225859247, "percentage": 99.16, "elapsed_time": "0:58:36", "remaining_time": "0:00:29", "throughput": 2377.39, "total_tokens": 8360816}
4867
+ {"current_steps": 24240, "total_steps": 24440, "loss": 0.1009, "lr": 1.0301124216394176e-08, "epoch": 19.83633387888707, "percentage": 99.18, "elapsed_time": "0:58:37", "remaining_time": "0:00:29", "throughput": 2377.41, "total_tokens": 8362544}
4868
+ {"current_steps": 24245, "total_steps": 24440, "loss": 0.3357, "lr": 9.79503782694291e-09, "epoch": 19.840425531914892, "percentage": 99.2, "elapsed_time": "0:58:38", "remaining_time": "0:00:28", "throughput": 2377.44, "total_tokens": 8364272}
4869
+ {"current_steps": 24250, "total_steps": 24440, "loss": 0.1756, "lr": 9.301695917834919e-09, "epoch": 19.844517184942717, "percentage": 99.22, "elapsed_time": "0:58:38", "remaining_time": "0:00:27", "throughput": 2377.44, "total_tokens": 8365936}
4870
+ {"current_steps": 24255, "total_steps": 24440, "loss": 0.2798, "lr": 8.821098740657841e-09, "epoch": 19.84860883797054, "percentage": 99.24, "elapsed_time": "0:58:39", "remaining_time": "0:00:26", "throughput": 2377.46, "total_tokens": 8367632}
4871
+ {"current_steps": 24260, "total_steps": 24440, "loss": 0.2876, "lr": 8.353246540510062e-09, "epoch": 19.852700490998362, "percentage": 99.26, "elapsed_time": "0:58:40", "remaining_time": "0:00:26", "throughput": 2377.47, "total_tokens": 8369328}
4872
+ {"current_steps": 24265, "total_steps": 24440, "loss": 0.1684, "lr": 7.898139555986838e-09, "epoch": 19.856792144026187, "percentage": 99.28, "elapsed_time": "0:58:40", "remaining_time": "0:00:25", "throughput": 2377.51, "total_tokens": 8371120}
4873
+ {"current_steps": 24270, "total_steps": 24440, "loss": 0.1263, "lr": 7.455778019180293e-09, "epoch": 19.86088379705401, "percentage": 99.3, "elapsed_time": "0:58:41", "remaining_time": "0:00:24", "throughput": 2377.52, "total_tokens": 8372784}
4874
+ {"current_steps": 24275, "total_steps": 24440, "loss": 0.2644, "lr": 7.0261621556877434e-09, "epoch": 19.864975450081833, "percentage": 99.32, "elapsed_time": "0:58:42", "remaining_time": "0:00:23", "throughput": 2377.53, "total_tokens": 8374480}
4875
+ {"current_steps": 24280, "total_steps": 24440, "loss": 0.153, "lr": 6.609292184603377e-09, "epoch": 19.869067103109657, "percentage": 99.35, "elapsed_time": "0:58:43", "remaining_time": "0:00:23", "throughput": 2377.54, "total_tokens": 8376176}
4876
+ {"current_steps": 24285, "total_steps": 24440, "loss": 0.2323, "lr": 6.205168318523802e-09, "epoch": 19.873158756137478, "percentage": 99.37, "elapsed_time": "0:58:43", "remaining_time": "0:00:22", "throughput": 2377.56, "total_tokens": 8377936}
4877
+ {"current_steps": 24290, "total_steps": 24440, "loss": 0.2189, "lr": 5.813790763539717e-09, "epoch": 19.877250409165303, "percentage": 99.39, "elapsed_time": "0:58:44", "remaining_time": "0:00:21", "throughput": 2377.57, "total_tokens": 8379632}
4878
+ {"current_steps": 24295, "total_steps": 24440, "loss": 0.1794, "lr": 5.435159719249794e-09, "epoch": 19.881342062193127, "percentage": 99.41, "elapsed_time": "0:58:45", "remaining_time": "0:00:21", "throughput": 2377.59, "total_tokens": 8381360}
4879
+ {"current_steps": 24300, "total_steps": 24440, "loss": 0.1147, "lr": 5.069275378746796e-09, "epoch": 19.885433715220948, "percentage": 99.43, "elapsed_time": "0:58:45", "remaining_time": "0:00:20", "throughput": 2377.61, "total_tokens": 8383088}
4880
+ {"current_steps": 24305, "total_steps": 24440, "loss": 0.0753, "lr": 4.7161379286231325e-09, "epoch": 19.889525368248773, "percentage": 99.45, "elapsed_time": "0:58:46", "remaining_time": "0:00:19", "throughput": 2377.63, "total_tokens": 8384816}
4881
+ {"current_steps": 24310, "total_steps": 24440, "loss": 0.1728, "lr": 4.3757475489708545e-09, "epoch": 19.893617021276597, "percentage": 99.47, "elapsed_time": "0:58:47", "remaining_time": "0:00:18", "throughput": 2377.65, "total_tokens": 8386512}
4882
+ {"current_steps": 24315, "total_steps": 24440, "loss": 0.3733, "lr": 4.048104413384434e-09, "epoch": 19.89770867430442, "percentage": 99.49, "elapsed_time": "0:58:47", "remaining_time": "0:00:18", "throughput": 2377.66, "total_tokens": 8388208}
4883
+ {"current_steps": 24320, "total_steps": 24440, "loss": 0.3124, "lr": 3.7332086889524385e-09, "epoch": 19.901800327332243, "percentage": 99.51, "elapsed_time": "0:58:48", "remaining_time": "0:00:17", "throughput": 2377.66, "total_tokens": 8389872}
4884
+ {"current_steps": 24325, "total_steps": 24440, "loss": 0.3439, "lr": 3.431060536265851e-09, "epoch": 19.905891980360064, "percentage": 99.53, "elapsed_time": "0:58:49", "remaining_time": "0:00:16", "throughput": 2377.68, "total_tokens": 8391600}
4885
+ {"current_steps": 24330, "total_steps": 24440, "loss": 0.2543, "lr": 3.1416601094153007e-09, "epoch": 19.90998363338789, "percentage": 99.55, "elapsed_time": "0:58:50", "remaining_time": "0:00:15", "throughput": 2377.71, "total_tokens": 8393360}
4886
+ {"current_steps": 24335, "total_steps": 24440, "loss": 0.1804, "lr": 2.8650075559882862e-09, "epoch": 19.914075286415713, "percentage": 99.57, "elapsed_time": "0:58:50", "remaining_time": "0:00:15", "throughput": 2377.73, "total_tokens": 8395088}
4887
+ {"current_steps": 24340, "total_steps": 24440, "loss": 0.2352, "lr": 2.6011030170691732e-09, "epoch": 19.918166939443534, "percentage": 99.59, "elapsed_time": "0:58:51", "remaining_time": "0:00:14", "throughput": 2377.74, "total_tokens": 8396784}
4888
+ {"current_steps": 24345, "total_steps": 24440, "loss": 0.1774, "lr": 2.3499466272475236e-09, "epoch": 19.92225859247136, "percentage": 99.61, "elapsed_time": "0:58:52", "remaining_time": "0:00:13", "throughput": 2377.77, "total_tokens": 8398608}
4889
+ {"current_steps": 24350, "total_steps": 24440, "loss": 0.1448, "lr": 2.111538514606992e-09, "epoch": 19.926350245499183, "percentage": 99.63, "elapsed_time": "0:58:52", "remaining_time": "0:00:13", "throughput": 2377.78, "total_tokens": 8400304}
4890
+ {"current_steps": 24355, "total_steps": 24440, "loss": 0.2918, "lr": 1.8858788007281027e-09, "epoch": 19.930441898527004, "percentage": 99.65, "elapsed_time": "0:58:53", "remaining_time": "0:00:12", "throughput": 2377.81, "total_tokens": 8402032}
4891
+ {"current_steps": 24360, "total_steps": 24440, "loss": 0.1516, "lr": 1.6729676006937979e-09, "epoch": 19.93453355155483, "percentage": 99.67, "elapsed_time": "0:58:54", "remaining_time": "0:00:11", "throughput": 2377.83, "total_tokens": 8403824}
4892
+ {"current_steps": 24365, "total_steps": 24440, "loss": 0.2286, "lr": 1.4728050230866651e-09, "epoch": 19.938625204582653, "percentage": 99.69, "elapsed_time": "0:58:54", "remaining_time": "0:00:10", "throughput": 2377.81, "total_tokens": 8405424}
4893
+ {"current_steps": 24370, "total_steps": 24440, "loss": 0.2599, "lr": 1.2853911699833853e-09, "epoch": 19.942716857610474, "percentage": 99.71, "elapsed_time": "0:58:55", "remaining_time": "0:00:10", "throughput": 2377.83, "total_tokens": 8407120}
4894
+ {"current_steps": 24375, "total_steps": 24440, "loss": 0.1833, "lr": 1.1107261369602828e-09, "epoch": 19.9468085106383, "percentage": 99.73, "elapsed_time": "0:58:56", "remaining_time": "0:00:09", "throughput": 2377.83, "total_tokens": 8408784}
4895
+ {"current_steps": 24380, "total_steps": 24440, "loss": 0.4271, "lr": 9.488100130961019e-10, "epoch": 19.95090016366612, "percentage": 99.75, "elapsed_time": "0:58:57", "remaining_time": "0:00:08", "throughput": 2377.85, "total_tokens": 8410544}
4896
+ {"current_steps": 24385, "total_steps": 24440, "loss": 0.351, "lr": 7.996428809609046e-10, "epoch": 19.954991816693944, "percentage": 99.77, "elapsed_time": "0:58:57", "remaining_time": "0:00:07", "throughput": 2377.87, "total_tokens": 8412336}
4897
+ {"current_steps": 24390, "total_steps": 24440, "loss": 0.2643, "lr": 6.632248166271726e-10, "epoch": 19.95908346972177, "percentage": 99.8, "elapsed_time": "0:58:58", "remaining_time": "0:00:07", "throughput": 2377.91, "total_tokens": 8414224}
4898
+ {"current_steps": 24395, "total_steps": 24440, "loss": 0.2363, "lr": 5.395558896698072e-10, "epoch": 19.96317512274959, "percentage": 99.82, "elapsed_time": "0:58:59", "remaining_time": "0:00:06", "throughput": 2377.92, "total_tokens": 8415888}
4899
+ {"current_steps": 24400, "total_steps": 24440, "loss": 0.3828, "lr": 4.2863616315225176e-10, "epoch": 19.967266775777414, "percentage": 99.84, "elapsed_time": "0:58:59", "remaining_time": "0:00:05", "throughput": 2377.93, "total_tokens": 8417584}
4900
+ {"current_steps": 24405, "total_steps": 24440, "loss": 0.239, "lr": 3.3046569364314493e-10, "epoch": 19.97135842880524, "percentage": 99.86, "elapsed_time": "0:59:00", "remaining_time": "0:00:05", "throughput": 2377.95, "total_tokens": 8419312}
4901
+ {"current_steps": 24410, "total_steps": 24440, "loss": 0.1104, "lr": 2.450445312052185e-10, "epoch": 19.97545008183306, "percentage": 99.88, "elapsed_time": "0:59:01", "remaining_time": "0:00:04", "throughput": 2377.95, "total_tokens": 8420976}
4902
+ {"current_steps": 24415, "total_steps": 24440, "loss": 0.1913, "lr": 1.7237271940639954e-10, "epoch": 19.979541734860884, "percentage": 99.9, "elapsed_time": "0:59:01", "remaining_time": "0:00:03", "throughput": 2377.95, "total_tokens": 8422640}
4903
+ {"current_steps": 24420, "total_steps": 24440, "loss": 0.1702, "lr": 1.1245029530315698e-10, "epoch": 19.983633387888705, "percentage": 99.92, "elapsed_time": "0:59:02", "remaining_time": "0:00:02", "throughput": 2377.97, "total_tokens": 8424336}
4904
+ {"current_steps": 24425, "total_steps": 24440, "loss": 0.1409, "lr": 6.527728945715517e-11, "epoch": 19.98772504091653, "percentage": 99.94, "elapsed_time": "0:59:03", "remaining_time": "0:00:02", "throughput": 2377.99, "total_tokens": 8426064}
4905
+ {"current_steps": 24430, "total_steps": 24440, "loss": 0.2276, "lr": 3.0853725926927034e-11, "epoch": 19.991816693944354, "percentage": 99.96, "elapsed_time": "0:59:04", "remaining_time": "0:00:01", "throughput": 2378.01, "total_tokens": 8427760}
4906
+ {"current_steps": 24435, "total_steps": 24440, "loss": 0.2204, "lr": 9.179622262323052e-12, "epoch": 19.995908346972175, "percentage": 99.98, "elapsed_time": "0:59:04", "remaining_time": "0:00:00", "throughput": 2378.02, "total_tokens": 8429456}
4907
+ {"current_steps": 24440, "total_steps": 24440, "loss": 0.0466, "lr": 2.5498952116453923e-13, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:59:05", "remaining_time": "0:00:00", "throughput": 2377.96, "total_tokens": 8431032}
4908
+ {"current_steps": 24440, "total_steps": 24440, "eval_loss": 0.2953493893146515, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:59:11", "remaining_time": "0:00:00", "throughput": 2374.14, "total_tokens": 8431032}
4909
+ {"current_steps": 24440, "total_steps": 24440, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:59:12", "remaining_time": "0:00:00", "throughput": 2373.47, "total_tokens": 8431032}