rbelanec commited on
Commit
51428b3
verified
1 Parent(s): 6540c1a

Training in progress, step 10184

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +108 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8e82d8eda51412ebef966c5821e7796611535b5b026fcd409f084f21a89ac9d
3
  size 798032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e88b6157b5be0021ce1e33d054d217721ccd9a9dd01fe06e184f28c8f863178
3
  size 798032
trainer_log.jsonl CHANGED
@@ -1956,3 +1956,111 @@
1956
  {"current_steps": 9690, "total_steps": 10720, "loss": 0.5612, "lr": 1.3956282062087933e-06, "epoch": 18.078358208955223, "percentage": 90.39, "elapsed_time": "0:23:32", "remaining_time": "0:02:30", "throughput": 1957.24, "total_tokens": 2765056}
1957
  {"current_steps": 9695, "total_steps": 10720, "loss": 0.9437, "lr": 1.3822502270377762e-06, "epoch": 18.08768656716418, "percentage": 90.44, "elapsed_time": "0:23:33", "remaining_time": "0:02:29", "throughput": 1957.28, "total_tokens": 2766528}
1958
  {"current_steps": 9700, "total_steps": 10720, "loss": 0.6803, "lr": 1.368934852083384e-06, "epoch": 18.097014925373134, "percentage": 90.49, "elapsed_time": "0:23:34", "remaining_time": "0:02:28", "throughput": 1957.41, "total_tokens": 2768064}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1956
  {"current_steps": 9690, "total_steps": 10720, "loss": 0.5612, "lr": 1.3956282062087933e-06, "epoch": 18.078358208955223, "percentage": 90.39, "elapsed_time": "0:23:32", "remaining_time": "0:02:30", "throughput": 1957.24, "total_tokens": 2765056}
1957
  {"current_steps": 9695, "total_steps": 10720, "loss": 0.9437, "lr": 1.3822502270377762e-06, "epoch": 18.08768656716418, "percentage": 90.44, "elapsed_time": "0:23:33", "remaining_time": "0:02:29", "throughput": 1957.28, "total_tokens": 2766528}
1958
  {"current_steps": 9700, "total_steps": 10720, "loss": 0.6803, "lr": 1.368934852083384e-06, "epoch": 18.097014925373134, "percentage": 90.49, "elapsed_time": "0:23:34", "remaining_time": "0:02:28", "throughput": 1957.41, "total_tokens": 2768064}
1959
+ {"current_steps": 9705, "total_steps": 10720, "loss": 0.6564, "lr": 1.3556821166410522e-06, "epoch": 18.10634328358209, "percentage": 90.53, "elapsed_time": "0:23:34", "remaining_time": "0:02:27", "throughput": 1957.51, "total_tokens": 2769568}
1960
+ {"current_steps": 9710, "total_steps": 10720, "loss": 0.5792, "lr": 1.3424920558401611e-06, "epoch": 18.115671641791046, "percentage": 90.58, "elapsed_time": "0:23:35", "remaining_time": "0:02:27", "throughput": 1957.45, "total_tokens": 2770848}
1961
+ {"current_steps": 9715, "total_steps": 10720, "loss": 0.5438, "lr": 1.3293647046439678e-06, "epoch": 18.125, "percentage": 90.62, "elapsed_time": "0:23:36", "remaining_time": "0:02:26", "throughput": 1957.47, "total_tokens": 2772224}
1962
+ {"current_steps": 9720, "total_steps": 10720, "loss": 0.5722, "lr": 1.3163000978495072e-06, "epoch": 18.134328358208954, "percentage": 90.67, "elapsed_time": "0:23:36", "remaining_time": "0:02:25", "throughput": 1957.56, "total_tokens": 2773696}
1963
+ {"current_steps": 9725, "total_steps": 10720, "loss": 0.5266, "lr": 1.3032982700874802e-06, "epoch": 18.14365671641791, "percentage": 90.72, "elapsed_time": "0:23:37", "remaining_time": "0:02:25", "throughput": 1957.65, "total_tokens": 2775168}
1964
+ {"current_steps": 9730, "total_steps": 10720, "loss": 0.695, "lr": 1.2903592558222016e-06, "epoch": 18.152985074626866, "percentage": 90.76, "elapsed_time": "0:23:38", "remaining_time": "0:02:24", "throughput": 1957.87, "total_tokens": 2776832}
1965
+ {"current_steps": 9735, "total_steps": 10720, "loss": 0.7484, "lr": 1.2774830893514583e-06, "epoch": 18.16231343283582, "percentage": 90.81, "elapsed_time": "0:23:38", "remaining_time": "0:02:23", "throughput": 1957.85, "total_tokens": 2778144}
1966
+ {"current_steps": 9740, "total_steps": 10720, "loss": 0.7066, "lr": 1.2646698048064703e-06, "epoch": 18.171641791044777, "percentage": 90.86, "elapsed_time": "0:23:39", "remaining_time": "0:02:22", "throughput": 1957.98, "total_tokens": 2779680}
1967
+ {"current_steps": 9745, "total_steps": 10720, "loss": 0.7921, "lr": 1.2519194361517466e-06, "epoch": 18.18097014925373, "percentage": 90.9, "elapsed_time": "0:23:40", "remaining_time": "0:02:22", "throughput": 1958.1, "total_tokens": 2781248}
1968
+ {"current_steps": 9750, "total_steps": 10720, "loss": 0.7151, "lr": 1.2392320171850546e-06, "epoch": 18.190298507462686, "percentage": 90.95, "elapsed_time": "0:23:41", "remaining_time": "0:02:21", "throughput": 1958.12, "total_tokens": 2782624}
1969
+ {"current_steps": 9755, "total_steps": 10720, "loss": 0.473, "lr": 1.2266075815372701e-06, "epoch": 18.199626865671643, "percentage": 91.0, "elapsed_time": "0:23:41", "remaining_time": "0:02:20", "throughput": 1958.25, "total_tokens": 2784224}
1970
+ {"current_steps": 9760, "total_steps": 10720, "loss": 0.6928, "lr": 1.2140461626723414e-06, "epoch": 18.208955223880597, "percentage": 91.04, "elapsed_time": "0:23:42", "remaining_time": "0:02:19", "throughput": 1958.32, "total_tokens": 2785664}
1971
+ {"current_steps": 9765, "total_steps": 10720, "loss": 0.642, "lr": 1.2015477938871617e-06, "epoch": 18.21828358208955, "percentage": 91.09, "elapsed_time": "0:23:43", "remaining_time": "0:02:19", "throughput": 1958.36, "total_tokens": 2787072}
1972
+ {"current_steps": 9770, "total_steps": 10720, "loss": 0.6552, "lr": 1.1891125083114962e-06, "epoch": 18.22761194029851, "percentage": 91.14, "elapsed_time": "0:23:43", "remaining_time": "0:02:18", "throughput": 1958.41, "total_tokens": 2788480}
1973
+ {"current_steps": 9775, "total_steps": 10720, "loss": 0.8629, "lr": 1.1767403389079057e-06, "epoch": 18.236940298507463, "percentage": 91.18, "elapsed_time": "0:23:44", "remaining_time": "0:02:17", "throughput": 1958.39, "total_tokens": 2789792}
1974
+ {"current_steps": 9780, "total_steps": 10720, "loss": 0.6859, "lr": 1.164431318471626e-06, "epoch": 18.246268656716417, "percentage": 91.23, "elapsed_time": "0:23:45", "remaining_time": "0:02:16", "throughput": 1958.43, "total_tokens": 2791200}
1975
+ {"current_steps": 9785, "total_steps": 10720, "loss": 0.5254, "lr": 1.1521854796305242e-06, "epoch": 18.255597014925375, "percentage": 91.28, "elapsed_time": "0:23:45", "remaining_time": "0:02:16", "throughput": 1958.61, "total_tokens": 2792896}
1976
+ {"current_steps": 9790, "total_steps": 10720, "loss": 0.8092, "lr": 1.1400028548449821e-06, "epoch": 18.26492537313433, "percentage": 91.32, "elapsed_time": "0:23:46", "remaining_time": "0:02:15", "throughput": 1958.68, "total_tokens": 2794336}
1977
+ {"current_steps": 9795, "total_steps": 10720, "loss": 0.7838, "lr": 1.1278834764078123e-06, "epoch": 18.274253731343283, "percentage": 91.37, "elapsed_time": "0:23:47", "remaining_time": "0:02:14", "throughput": 1958.72, "total_tokens": 2795744}
1978
+ {"current_steps": 9800, "total_steps": 10720, "loss": 0.7545, "lr": 1.1158273764441868e-06, "epoch": 18.28358208955224, "percentage": 91.42, "elapsed_time": "0:23:48", "remaining_time": "0:02:14", "throughput": 1958.76, "total_tokens": 2797152}
1979
+ {"current_steps": 9805, "total_steps": 10720, "loss": 0.7285, "lr": 1.103834586911534e-06, "epoch": 18.292910447761194, "percentage": 91.46, "elapsed_time": "0:23:48", "remaining_time": "0:02:13", "throughput": 1958.87, "total_tokens": 2798720}
1980
+ {"current_steps": 9810, "total_steps": 10720, "loss": 0.7454, "lr": 1.0919051395994778e-06, "epoch": 18.30223880597015, "percentage": 91.51, "elapsed_time": "0:23:49", "remaining_time": "0:02:12", "throughput": 1958.82, "total_tokens": 2800000}
1981
+ {"current_steps": 9815, "total_steps": 10720, "loss": 0.5528, "lr": 1.0800390661297261e-06, "epoch": 18.311567164179106, "percentage": 91.56, "elapsed_time": "0:23:50", "remaining_time": "0:02:11", "throughput": 1958.87, "total_tokens": 2801408}
1982
+ {"current_steps": 9820, "total_steps": 10720, "loss": 0.9416, "lr": 1.0682363979560046e-06, "epoch": 18.32089552238806, "percentage": 91.6, "elapsed_time": "0:23:50", "remaining_time": "0:02:11", "throughput": 1958.9, "total_tokens": 2802784}
1983
+ {"current_steps": 9825, "total_steps": 10720, "loss": 0.6448, "lr": 1.056497166363976e-06, "epoch": 18.330223880597014, "percentage": 91.65, "elapsed_time": "0:23:51", "remaining_time": "0:02:10", "throughput": 1958.87, "total_tokens": 2804096}
1984
+ {"current_steps": 9830, "total_steps": 10720, "loss": 0.8478, "lr": 1.0448214024711384e-06, "epoch": 18.33955223880597, "percentage": 91.7, "elapsed_time": "0:23:52", "remaining_time": "0:02:09", "throughput": 1958.92, "total_tokens": 2805504}
1985
+ {"current_steps": 9835, "total_steps": 10720, "loss": 0.5772, "lr": 1.0332091372267566e-06, "epoch": 18.348880597014926, "percentage": 91.74, "elapsed_time": "0:23:52", "remaining_time": "0:02:08", "throughput": 1958.99, "total_tokens": 2806944}
1986
+ {"current_steps": 9840, "total_steps": 10720, "loss": 0.616, "lr": 1.0216604014117837e-06, "epoch": 18.35820895522388, "percentage": 91.79, "elapsed_time": "0:23:53", "remaining_time": "0:02:08", "throughput": 1959.05, "total_tokens": 2808384}
1987
+ {"current_steps": 9845, "total_steps": 10720, "loss": 0.8747, "lr": 1.0101752256387682e-06, "epoch": 18.367537313432837, "percentage": 91.84, "elapsed_time": "0:23:54", "remaining_time": "0:02:07", "throughput": 1959.17, "total_tokens": 2809888}
1988
+ {"current_steps": 9850, "total_steps": 10720, "loss": 0.573, "lr": 9.98753640351785e-07, "epoch": 18.37686567164179, "percentage": 91.88, "elapsed_time": "0:23:54", "remaining_time": "0:02:06", "throughput": 1959.31, "total_tokens": 2811424}
1989
+ {"current_steps": 9855, "total_steps": 10720, "loss": 0.647, "lr": 9.873956758263359e-07, "epoch": 18.386194029850746, "percentage": 91.93, "elapsed_time": "0:23:55", "remaining_time": "0:02:06", "throughput": 1959.28, "total_tokens": 2812736}
1990
+ {"current_steps": 9860, "total_steps": 10720, "loss": 0.5383, "lr": 9.76101362169296e-07, "epoch": 18.395522388059703, "percentage": 91.98, "elapsed_time": "0:23:56", "remaining_time": "0:02:05", "throughput": 1959.44, "total_tokens": 2814304}
1991
+ {"current_steps": 9865, "total_steps": 10720, "loss": 0.7062, "lr": 9.64870729318809e-07, "epoch": 18.404850746268657, "percentage": 92.02, "elapsed_time": "0:23:56", "remaining_time": "0:02:04", "throughput": 1959.5, "total_tokens": 2815744}
1992
+ {"current_steps": 9870, "total_steps": 10720, "loss": 0.7897, "lr": 9.537038070442206e-07, "epoch": 18.41417910447761, "percentage": 92.07, "elapsed_time": "0:23:57", "remaining_time": "0:02:03", "throughput": 1959.52, "total_tokens": 2817120}
1993
+ {"current_steps": 9875, "total_steps": 10720, "loss": 0.518, "lr": 9.42600624945994e-07, "epoch": 18.423507462686565, "percentage": 92.12, "elapsed_time": "0:23:58", "remaining_time": "0:02:03", "throughput": 1959.59, "total_tokens": 2818560}
1994
+ {"current_steps": 9880, "total_steps": 10720, "loss": 0.661, "lr": 9.315612124556477e-07, "epoch": 18.432835820895523, "percentage": 92.16, "elapsed_time": "0:23:59", "remaining_time": "0:02:02", "throughput": 1959.7, "total_tokens": 2820128}
1995
+ {"current_steps": 9885, "total_steps": 10720, "loss": 0.7235, "lr": 9.205855988356466e-07, "epoch": 18.442164179104477, "percentage": 92.21, "elapsed_time": "0:23:59", "remaining_time": "0:02:01", "throughput": 1959.79, "total_tokens": 2821664}
1996
+ {"current_steps": 9890, "total_steps": 10720, "loss": 0.7774, "lr": 9.096738131793542e-07, "epoch": 18.451492537313435, "percentage": 92.26, "elapsed_time": "0:24:00", "remaining_time": "0:02:00", "throughput": 1959.78, "total_tokens": 2823008}
1997
+ {"current_steps": 9895, "total_steps": 10720, "loss": 0.7428, "lr": 8.988258844109393e-07, "epoch": 18.46082089552239, "percentage": 92.3, "elapsed_time": "0:24:01", "remaining_time": "0:02:00", "throughput": 1959.84, "total_tokens": 2824448}
1998
+ {"current_steps": 9900, "total_steps": 10720, "loss": 0.6033, "lr": 8.880418412853059e-07, "epoch": 18.470149253731343, "percentage": 92.35, "elapsed_time": "0:24:01", "remaining_time": "0:01:59", "throughput": 1959.91, "total_tokens": 2825888}
1999
+ {"current_steps": 9905, "total_steps": 10720, "loss": 0.5768, "lr": 8.773217123880073e-07, "epoch": 18.479477611940297, "percentage": 92.4, "elapsed_time": "0:24:02", "remaining_time": "0:01:58", "throughput": 1960.03, "total_tokens": 2827392}
2000
+ {"current_steps": 9910, "total_steps": 10720, "loss": 0.7755, "lr": 8.666655261351908e-07, "epoch": 18.488805970149254, "percentage": 92.44, "elapsed_time": "0:24:03", "remaining_time": "0:01:57", "throughput": 1960.03, "total_tokens": 2828736}
2001
+ {"current_steps": 9915, "total_steps": 10720, "loss": 0.657, "lr": 8.560733107734947e-07, "epoch": 18.49813432835821, "percentage": 92.49, "elapsed_time": "0:24:03", "remaining_time": "0:01:57", "throughput": 1960.16, "total_tokens": 2830336}
2002
+ {"current_steps": 9920, "total_steps": 10720, "loss": 0.7193, "lr": 8.455450943799958e-07, "epoch": 18.507462686567163, "percentage": 92.54, "elapsed_time": "0:24:04", "remaining_time": "0:01:56", "throughput": 1960.11, "total_tokens": 2831616}
2003
+ {"current_steps": 9925, "total_steps": 10720, "loss": 0.887, "lr": 8.35080904862126e-07, "epoch": 18.51679104477612, "percentage": 92.58, "elapsed_time": "0:24:05", "remaining_time": "0:01:55", "throughput": 1960.09, "total_tokens": 2832928}
2004
+ {"current_steps": 9930, "total_steps": 10720, "loss": 0.4844, "lr": 8.246807699576032e-07, "epoch": 18.526119402985074, "percentage": 92.63, "elapsed_time": "0:24:06", "remaining_time": "0:01:55", "throughput": 1960.24, "total_tokens": 2834560}
2005
+ {"current_steps": 9935, "total_steps": 10720, "loss": 0.723, "lr": 8.143447172343471e-07, "epoch": 18.53544776119403, "percentage": 92.68, "elapsed_time": "0:24:06", "remaining_time": "0:01:54", "throughput": 1960.23, "total_tokens": 2835904}
2006
+ {"current_steps": 9940, "total_steps": 10720, "loss": 0.5678, "lr": 8.040727740904113e-07, "epoch": 18.544776119402986, "percentage": 92.72, "elapsed_time": "0:24:07", "remaining_time": "0:01:53", "throughput": 1960.38, "total_tokens": 2837536}
2007
+ {"current_steps": 9945, "total_steps": 10720, "loss": 0.4664, "lr": 7.938649677539267e-07, "epoch": 18.55410447761194, "percentage": 92.77, "elapsed_time": "0:24:08", "remaining_time": "0:01:52", "throughput": 1960.34, "total_tokens": 2838816}
2008
+ {"current_steps": 9950, "total_steps": 10720, "loss": 0.5293, "lr": 7.837213252829989e-07, "epoch": 18.563432835820894, "percentage": 92.82, "elapsed_time": "0:24:08", "remaining_time": "0:01:52", "throughput": 1960.31, "total_tokens": 2840128}
2009
+ {"current_steps": 9955, "total_steps": 10720, "loss": 0.8258, "lr": 7.736418735656586e-07, "epoch": 18.57276119402985, "percentage": 92.86, "elapsed_time": "0:24:09", "remaining_time": "0:01:51", "throughput": 1960.42, "total_tokens": 2841632}
2010
+ {"current_steps": 9960, "total_steps": 10720, "loss": 0.6839, "lr": 7.636266393197866e-07, "epoch": 18.582089552238806, "percentage": 92.91, "elapsed_time": "0:24:10", "remaining_time": "0:01:50", "throughput": 1960.46, "total_tokens": 2843040}
2011
+ {"current_steps": 9965, "total_steps": 10720, "loss": 0.4837, "lr": 7.536756490930358e-07, "epoch": 18.59141791044776, "percentage": 92.96, "elapsed_time": "0:24:10", "remaining_time": "0:01:49", "throughput": 1960.55, "total_tokens": 2844512}
2012
+ {"current_steps": 9970, "total_steps": 10720, "loss": 0.5624, "lr": 7.437889292627787e-07, "epoch": 18.600746268656717, "percentage": 93.0, "elapsed_time": "0:24:11", "remaining_time": "0:01:49", "throughput": 1960.68, "total_tokens": 2846048}
2013
+ {"current_steps": 9975, "total_steps": 10720, "loss": 0.7062, "lr": 7.339665060360018e-07, "epoch": 18.61007462686567, "percentage": 93.05, "elapsed_time": "0:24:12", "remaining_time": "0:01:48", "throughput": 1960.75, "total_tokens": 2847488}
2014
+ {"current_steps": 9980, "total_steps": 10720, "loss": 0.7294, "lr": 7.24208405449281e-07, "epoch": 18.619402985074625, "percentage": 93.1, "elapsed_time": "0:24:12", "remaining_time": "0:01:47", "throughput": 1960.78, "total_tokens": 2848928}
2015
+ {"current_steps": 9985, "total_steps": 10720, "loss": 0.9256, "lr": 7.145146533686725e-07, "epoch": 18.628731343283583, "percentage": 93.14, "elapsed_time": "0:24:13", "remaining_time": "0:01:47", "throughput": 1960.71, "total_tokens": 2850176}
2016
+ {"current_steps": 9990, "total_steps": 10720, "loss": 0.5916, "lr": 7.048852754896806e-07, "epoch": 18.638059701492537, "percentage": 93.19, "elapsed_time": "0:24:14", "remaining_time": "0:01:46", "throughput": 1960.8, "total_tokens": 2851648}
2017
+ {"current_steps": 9995, "total_steps": 10720, "loss": 0.4239, "lr": 6.953202973371514e-07, "epoch": 18.64738805970149, "percentage": 93.24, "elapsed_time": "0:24:15", "remaining_time": "0:01:45", "throughput": 1960.9, "total_tokens": 2853248}
2018
+ {"current_steps": 10000, "total_steps": 10720, "loss": 0.6579, "lr": 6.858197442652369e-07, "epoch": 18.65671641791045, "percentage": 93.28, "elapsed_time": "0:24:15", "remaining_time": "0:01:44", "throughput": 1960.94, "total_tokens": 2854656}
2019
+ {"current_steps": 10005, "total_steps": 10720, "loss": 0.7626, "lr": 6.763836414573232e-07, "epoch": 18.666044776119403, "percentage": 93.33, "elapsed_time": "0:24:16", "remaining_time": "0:01:44", "throughput": 1960.94, "total_tokens": 2856000}
2020
+ {"current_steps": 10010, "total_steps": 10720, "loss": 0.5963, "lr": 6.670120139259328e-07, "epoch": 18.675373134328357, "percentage": 93.38, "elapsed_time": "0:24:17", "remaining_time": "0:01:43", "throughput": 1960.98, "total_tokens": 2857408}
2021
+ {"current_steps": 10015, "total_steps": 10720, "loss": 0.6501, "lr": 6.577048865127028e-07, "epoch": 18.684701492537314, "percentage": 93.42, "elapsed_time": "0:24:17", "remaining_time": "0:01:42", "throughput": 1961.01, "total_tokens": 2858784}
2022
+ {"current_steps": 10020, "total_steps": 10720, "loss": 0.6134, "lr": 6.484622838882903e-07, "epoch": 18.69402985074627, "percentage": 93.47, "elapsed_time": "0:24:18", "remaining_time": "0:01:41", "throughput": 1961.12, "total_tokens": 2860288}
2023
+ {"current_steps": 10025, "total_steps": 10720, "loss": 0.6841, "lr": 6.392842305523172e-07, "epoch": 18.703358208955223, "percentage": 93.52, "elapsed_time": "0:24:19", "remaining_time": "0:01:41", "throughput": 1961.15, "total_tokens": 2861664}
2024
+ {"current_steps": 10030, "total_steps": 10720, "loss": 0.7465, "lr": 6.301707508332977e-07, "epoch": 18.71268656716418, "percentage": 93.56, "elapsed_time": "0:24:19", "remaining_time": "0:01:40", "throughput": 1961.25, "total_tokens": 2863168}
2025
+ {"current_steps": 10035, "total_steps": 10720, "loss": 1.2308, "lr": 6.2112186888858e-07, "epoch": 18.722014925373134, "percentage": 93.61, "elapsed_time": "0:24:20", "remaining_time": "0:01:39", "throughput": 1961.3, "total_tokens": 2864576}
2026
+ {"current_steps": 10040, "total_steps": 10720, "loss": 0.8963, "lr": 6.121376087042913e-07, "epoch": 18.73134328358209, "percentage": 93.66, "elapsed_time": "0:24:21", "remaining_time": "0:01:38", "throughput": 1961.37, "total_tokens": 2866080}
2027
+ {"current_steps": 10045, "total_steps": 10720, "loss": 0.7163, "lr": 6.032179940952509e-07, "epoch": 18.740671641791046, "percentage": 93.7, "elapsed_time": "0:24:21", "remaining_time": "0:01:38", "throughput": 1961.41, "total_tokens": 2867488}
2028
+ {"current_steps": 10050, "total_steps": 10720, "loss": 0.7495, "lr": 5.943630487049295e-07, "epoch": 18.75, "percentage": 93.75, "elapsed_time": "0:24:22", "remaining_time": "0:01:37", "throughput": 1961.42, "total_tokens": 2868832}
2029
+ {"current_steps": 10055, "total_steps": 10720, "loss": 0.648, "lr": 5.855727960053653e-07, "epoch": 18.759328358208954, "percentage": 93.8, "elapsed_time": "0:24:23", "remaining_time": "0:01:36", "throughput": 1961.44, "total_tokens": 2870208}
2030
+ {"current_steps": 10060, "total_steps": 10720, "loss": 0.6361, "lr": 5.768472592971308e-07, "epoch": 18.76865671641791, "percentage": 93.84, "elapsed_time": "0:24:24", "remaining_time": "0:01:36", "throughput": 1961.6, "total_tokens": 2871840}
2031
+ {"current_steps": 10065, "total_steps": 10720, "loss": 0.5297, "lr": 5.681864617092414e-07, "epoch": 18.777985074626866, "percentage": 93.89, "elapsed_time": "0:24:24", "remaining_time": "0:01:35", "throughput": 1961.53, "total_tokens": 2873088}
2032
+ {"current_steps": 10070, "total_steps": 10720, "loss": 0.5447, "lr": 5.595904261991109e-07, "epoch": 18.78731343283582, "percentage": 93.94, "elapsed_time": "0:24:25", "remaining_time": "0:01:34", "throughput": 1961.71, "total_tokens": 2874688}
2033
+ {"current_steps": 10075, "total_steps": 10720, "loss": 0.5077, "lr": 5.510591755524874e-07, "epoch": 18.796641791044777, "percentage": 93.98, "elapsed_time": "0:24:26", "remaining_time": "0:01:33", "throughput": 1961.73, "total_tokens": 2876064}
2034
+ {"current_steps": 10080, "total_steps": 10720, "loss": 0.4801, "lr": 5.425927323833902e-07, "epoch": 18.80597014925373, "percentage": 94.03, "elapsed_time": "0:24:26", "remaining_time": "0:01:33", "throughput": 1961.77, "total_tokens": 2877472}
2035
+ {"current_steps": 10085, "total_steps": 10720, "loss": 0.5506, "lr": 5.341911191340504e-07, "epoch": 18.815298507462686, "percentage": 94.08, "elapsed_time": "0:24:27", "remaining_time": "0:01:32", "throughput": 1961.83, "total_tokens": 2878912}
2036
+ {"current_steps": 10090, "total_steps": 10720, "loss": 0.6319, "lr": 5.258543580748565e-07, "epoch": 18.824626865671643, "percentage": 94.12, "elapsed_time": "0:24:28", "remaining_time": "0:01:31", "throughput": 1961.89, "total_tokens": 2880352}
2037
+ {"current_steps": 10095, "total_steps": 10720, "loss": 0.579, "lr": 5.175824713042926e-07, "epoch": 18.833955223880597, "percentage": 94.17, "elapsed_time": "0:24:28", "remaining_time": "0:01:30", "throughput": 1961.98, "total_tokens": 2881824}
2038
+ {"current_steps": 10100, "total_steps": 10720, "loss": 0.6428, "lr": 5.093754807488693e-07, "epoch": 18.84328358208955, "percentage": 94.22, "elapsed_time": "0:24:29", "remaining_time": "0:01:30", "throughput": 1962.12, "total_tokens": 2883456}
2039
+ {"current_steps": 10105, "total_steps": 10720, "loss": 0.8422, "lr": 5.01233408163082e-07, "epoch": 18.85261194029851, "percentage": 94.26, "elapsed_time": "0:24:30", "remaining_time": "0:01:29", "throughput": 1962.11, "total_tokens": 2884800}
2040
+ {"current_steps": 10110, "total_steps": 10720, "loss": 0.515, "lr": 4.931562751293528e-07, "epoch": 18.861940298507463, "percentage": 94.31, "elapsed_time": "0:24:30", "remaining_time": "0:01:28", "throughput": 1962.22, "total_tokens": 2886368}
2041
+ {"current_steps": 10115, "total_steps": 10720, "loss": 0.7592, "lr": 4.851441030579523e-07, "epoch": 18.871268656716417, "percentage": 94.36, "elapsed_time": "0:24:31", "remaining_time": "0:01:28", "throughput": 1962.24, "total_tokens": 2887744}
2042
+ {"current_steps": 10120, "total_steps": 10720, "loss": 0.64, "lr": 4.771969131869669e-07, "epoch": 18.880597014925375, "percentage": 94.4, "elapsed_time": "0:24:32", "remaining_time": "0:01:27", "throughput": 1962.3, "total_tokens": 2889184}
2043
+ {"current_steps": 10125, "total_steps": 10720, "loss": 0.808, "lr": 4.6931472658223176e-07, "epoch": 18.88992537313433, "percentage": 94.45, "elapsed_time": "0:24:33", "remaining_time": "0:01:26", "throughput": 1962.35, "total_tokens": 2890592}
2044
+ {"current_steps": 10130, "total_steps": 10720, "loss": 0.7487, "lr": 4.614975641372754e-07, "epoch": 18.899253731343283, "percentage": 94.5, "elapsed_time": "0:24:33", "remaining_time": "0:01:25", "throughput": 1962.35, "total_tokens": 2891936}
2045
+ {"current_steps": 10135, "total_steps": 10720, "loss": 0.7323, "lr": 4.5374544657326157e-07, "epoch": 18.90858208955224, "percentage": 94.54, "elapsed_time": "0:24:34", "remaining_time": "0:01:25", "throughput": 1962.43, "total_tokens": 2893408}
2046
+ {"current_steps": 10140, "total_steps": 10720, "loss": 0.9823, "lr": 4.460583944389418e-07, "epoch": 18.917910447761194, "percentage": 94.59, "elapsed_time": "0:24:35", "remaining_time": "0:01:24", "throughput": 1962.46, "total_tokens": 2894784}
2047
+ {"current_steps": 10145, "total_steps": 10720, "loss": 0.852, "lr": 4.3843642811059737e-07, "epoch": 18.92723880597015, "percentage": 94.64, "elapsed_time": "0:24:35", "remaining_time": "0:01:23", "throughput": 1962.48, "total_tokens": 2896160}
2048
+ {"current_steps": 10150, "total_steps": 10720, "loss": 0.4968, "lr": 4.3087956779198356e-07, "epoch": 18.936567164179106, "percentage": 94.68, "elapsed_time": "0:24:36", "remaining_time": "0:01:22", "throughput": 1962.42, "total_tokens": 2897408}
2049
+ {"current_steps": 10155, "total_steps": 10720, "loss": 0.6075, "lr": 4.2338783351427156e-07, "epoch": 18.94589552238806, "percentage": 94.73, "elapsed_time": "0:24:37", "remaining_time": "0:01:22", "throughput": 1962.54, "total_tokens": 2899008}
2050
+ {"current_steps": 10160, "total_steps": 10720, "loss": 0.469, "lr": 4.159612451360151e-07, "epoch": 18.955223880597014, "percentage": 94.78, "elapsed_time": "0:24:37", "remaining_time": "0:01:21", "throughput": 1962.6, "total_tokens": 2900448}
2051
+ {"current_steps": 10165, "total_steps": 10720, "loss": 0.569, "lr": 4.085998223430698e-07, "epoch": 18.96455223880597, "percentage": 94.82, "elapsed_time": "0:24:38", "remaining_time": "0:01:20", "throughput": 1962.72, "total_tokens": 2901984}
2052
+ {"current_steps": 10170, "total_steps": 10720, "loss": 0.9144, "lr": 4.013035846485658e-07, "epoch": 18.973880597014926, "percentage": 94.87, "elapsed_time": "0:24:39", "remaining_time": "0:01:19", "throughput": 1962.76, "total_tokens": 2903392}
2053
+ {"current_steps": 10175, "total_steps": 10720, "loss": 0.638, "lr": 3.940725513928323e-07, "epoch": 18.98320895522388, "percentage": 94.92, "elapsed_time": "0:24:39", "remaining_time": "0:01:19", "throughput": 1962.76, "total_tokens": 2904736}
2054
+ {"current_steps": 10180, "total_steps": 10720, "loss": 0.8247, "lr": 3.8690674174337305e-07, "epoch": 18.992537313432837, "percentage": 94.96, "elapsed_time": "0:24:40", "remaining_time": "0:01:18", "throughput": 1962.7, "total_tokens": 2905984}
2055
+ {"current_steps": 10184, "total_steps": 10720, "eval_loss": 0.6947132349014282, "epoch": 19.0, "percentage": 95.0, "elapsed_time": "0:24:43", "remaining_time": "0:01:18", "throughput": 1959.33, "total_tokens": 2906984}
2056
+ {"current_steps": 10185, "total_steps": 10720, "loss": 0.6705, "lr": 3.7980617469479953e-07, "epoch": 19.00186567164179, "percentage": 95.01, "elapsed_time": "0:24:44", "remaining_time": "0:01:18", "throughput": 1957.77, "total_tokens": 2907240}
2057
+ {"current_steps": 10190, "total_steps": 10720, "loss": 0.6323, "lr": 3.7277086906877256e-07, "epoch": 19.011194029850746, "percentage": 95.06, "elapsed_time": "0:24:45", "remaining_time": "0:01:17", "throughput": 1957.81, "total_tokens": 2908680}
2058
+ {"current_steps": 10195, "total_steps": 10720, "loss": 0.6044, "lr": 3.65800843513972e-07, "epoch": 19.020522388059703, "percentage": 95.1, "elapsed_time": "0:24:46", "remaining_time": "0:01:16", "throughput": 1957.94, "total_tokens": 2910248}
2059
+ {"current_steps": 10200, "total_steps": 10720, "loss": 0.7154, "lr": 3.588961165060356e-07, "epoch": 19.029850746268657, "percentage": 95.15, "elapsed_time": "0:24:47", "remaining_time": "0:01:15", "throughput": 1957.98, "total_tokens": 2911720}
2060
+ {"current_steps": 10205, "total_steps": 10720, "loss": 0.7465, "lr": 3.5205670634751163e-07, "epoch": 19.03917910447761, "percentage": 95.2, "elapsed_time": "0:24:47", "remaining_time": "0:01:15", "throughput": 1958.0, "total_tokens": 2913128}
2061
+ {"current_steps": 10210, "total_steps": 10720, "loss": 0.7233, "lr": 3.452826311678148e-07, "epoch": 19.04850746268657, "percentage": 95.24, "elapsed_time": "0:24:48", "remaining_time": "0:01:14", "throughput": 1957.97, "total_tokens": 2914440}
2062
+ {"current_steps": 10215, "total_steps": 10720, "loss": 0.6746, "lr": 3.3857390892316764e-07, "epoch": 19.057835820895523, "percentage": 95.29, "elapsed_time": "0:24:49", "remaining_time": "0:01:13", "throughput": 1958.01, "total_tokens": 2915880}
2063
+ {"current_steps": 10220, "total_steps": 10720, "loss": 0.9204, "lr": 3.319305573965703e-07, "epoch": 19.067164179104477, "percentage": 95.34, "elapsed_time": "0:24:49", "remaining_time": "0:01:12", "throughput": 1958.06, "total_tokens": 2917320}
2064
+ {"current_steps": 10225, "total_steps": 10720, "loss": 0.6955, "lr": 3.253525941977309e-07, "epoch": 19.07649253731343, "percentage": 95.38, "elapsed_time": "0:24:50", "remaining_time": "0:01:12", "throughput": 1958.16, "total_tokens": 2918952}
2065
+ {"current_steps": 10230, "total_steps": 10720, "loss": 0.6313, "lr": 3.1884003676303786e-07, "epoch": 19.08582089552239, "percentage": 95.43, "elapsed_time": "0:24:51", "remaining_time": "0:01:11", "throughput": 1958.04, "total_tokens": 2920328}
2066
+ {"current_steps": 10235, "total_steps": 10720, "loss": 0.7169, "lr": 3.1239290235550724e-07, "epoch": 19.095149253731343, "percentage": 95.48, "elapsed_time": "0:24:52", "remaining_time": "0:01:10", "throughput": 1958.08, "total_tokens": 2921736}