Training in progress, step 10720
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +99 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 798032
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c1268540463a154ae3b879cf308817ac58cf6169c494d37c3cecb79bb8f0a073
|
| 3 |
size 798032
|
trainer_log.jsonl
CHANGED
|
@@ -2064,3 +2064,102 @@
|
|
| 2064 |
{"current_steps": 10225, "total_steps": 10720, "loss": 0.6955, "lr": 3.253525941977309e-07, "epoch": 19.07649253731343, "percentage": 95.38, "elapsed_time": "0:24:50", "remaining_time": "0:01:12", "throughput": 1958.16, "total_tokens": 2918952}
|
| 2065 |
{"current_steps": 10230, "total_steps": 10720, "loss": 0.6313, "lr": 3.1884003676303786e-07, "epoch": 19.08582089552239, "percentage": 95.43, "elapsed_time": "0:24:51", "remaining_time": "0:01:11", "throughput": 1958.04, "total_tokens": 2920328}
|
| 2066 |
{"current_steps": 10235, "total_steps": 10720, "loss": 0.7169, "lr": 3.1239290235550724e-07, "epoch": 19.095149253731343, "percentage": 95.48, "elapsed_time": "0:24:52", "remaining_time": "0:01:10", "throughput": 1958.08, "total_tokens": 2921736}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2064 |
{"current_steps": 10225, "total_steps": 10720, "loss": 0.6955, "lr": 3.253525941977309e-07, "epoch": 19.07649253731343, "percentage": 95.38, "elapsed_time": "0:24:50", "remaining_time": "0:01:12", "throughput": 1958.16, "total_tokens": 2918952}
|
| 2065 |
{"current_steps": 10230, "total_steps": 10720, "loss": 0.6313, "lr": 3.1884003676303786e-07, "epoch": 19.08582089552239, "percentage": 95.43, "elapsed_time": "0:24:51", "remaining_time": "0:01:11", "throughput": 1958.04, "total_tokens": 2920328}
|
| 2066 |
{"current_steps": 10235, "total_steps": 10720, "loss": 0.7169, "lr": 3.1239290235550724e-07, "epoch": 19.095149253731343, "percentage": 95.48, "elapsed_time": "0:24:52", "remaining_time": "0:01:10", "throughput": 1958.08, "total_tokens": 2921736}
|
| 2067 |
+
{"current_steps": 10240, "total_steps": 10720, "loss": 0.7417, "lr": 3.0601120806473535e-07, "epoch": 19.104477611940297, "percentage": 95.52, "elapsed_time": "0:24:52", "remaining_time": "0:01:09", "throughput": 1958.19, "total_tokens": 2923272}
|
| 2068 |
+
{"current_steps": 10245, "total_steps": 10720, "loss": 0.8859, "lr": 2.9969497080685196e-07, "epoch": 19.113805970149254, "percentage": 95.57, "elapsed_time": "0:24:53", "remaining_time": "0:01:09", "throughput": 1958.26, "total_tokens": 2924744}
|
| 2069 |
+
{"current_steps": 10250, "total_steps": 10720, "loss": 0.5709, "lr": 2.934442073244809e-07, "epoch": 19.12313432835821, "percentage": 95.62, "elapsed_time": "0:24:54", "remaining_time": "0:01:08", "throughput": 1958.34, "total_tokens": 2926216}
|
| 2070 |
+
{"current_steps": 10255, "total_steps": 10720, "loss": 0.7567, "lr": 2.87258934186696e-07, "epoch": 19.132462686567163, "percentage": 95.66, "elapsed_time": "0:24:54", "remaining_time": "0:01:07", "throughput": 1958.44, "total_tokens": 2927720}
|
| 2071 |
+
{"current_steps": 10260, "total_steps": 10720, "loss": 0.6973, "lr": 2.8113916778896575e-07, "epoch": 19.14179104477612, "percentage": 95.71, "elapsed_time": "0:24:55", "remaining_time": "0:01:07", "throughput": 1958.49, "total_tokens": 2929160}
|
| 2072 |
+
{"current_steps": 10265, "total_steps": 10720, "loss": 0.6811, "lr": 2.750849243531223e-07, "epoch": 19.151119402985074, "percentage": 95.76, "elapsed_time": "0:24:56", "remaining_time": "0:01:06", "throughput": 1958.47, "total_tokens": 2930472}
|
| 2073 |
+
{"current_steps": 10270, "total_steps": 10720, "loss": 0.6818, "lr": 2.6909621992731726e-07, "epoch": 19.16044776119403, "percentage": 95.8, "elapsed_time": "0:24:56", "remaining_time": "0:01:05", "throughput": 1958.44, "total_tokens": 2931784}
|
| 2074 |
+
{"current_steps": 10275, "total_steps": 10720, "loss": 0.7448, "lr": 2.6317307038597196e-07, "epoch": 19.169776119402986, "percentage": 95.85, "elapsed_time": "0:24:57", "remaining_time": "0:01:04", "throughput": 1958.5, "total_tokens": 2933224}
|
| 2075 |
+
{"current_steps": 10280, "total_steps": 10720, "loss": 0.71, "lr": 2.573154914297438e-07, "epoch": 19.17910447761194, "percentage": 95.9, "elapsed_time": "0:24:58", "remaining_time": "0:01:04", "throughput": 1958.52, "total_tokens": 2934600}
|
| 2076 |
+
{"current_steps": 10285, "total_steps": 10720, "loss": 0.7602, "lr": 2.515234985854736e-07, "epoch": 19.188432835820894, "percentage": 95.94, "elapsed_time": "0:24:59", "remaining_time": "0:01:03", "throughput": 1958.67, "total_tokens": 2936168}
|
| 2077 |
+
{"current_steps": 10290, "total_steps": 10720, "loss": 0.8304, "lr": 2.45797107206161e-07, "epoch": 19.19776119402985, "percentage": 95.99, "elapsed_time": "0:24:59", "remaining_time": "0:01:02", "throughput": 1958.85, "total_tokens": 2937864}
|
| 2078 |
+
{"current_steps": 10295, "total_steps": 10720, "loss": 0.5908, "lr": 2.401363324709055e-07, "epoch": 19.207089552238806, "percentage": 96.04, "elapsed_time": "0:25:00", "remaining_time": "0:01:01", "throughput": 1958.88, "total_tokens": 2939272}
|
| 2079 |
+
{"current_steps": 10300, "total_steps": 10720, "loss": 0.4307, "lr": 2.3454118938487367e-07, "epoch": 19.21641791044776, "percentage": 96.08, "elapsed_time": "0:25:01", "remaining_time": "0:01:01", "throughput": 1958.87, "total_tokens": 2940616}
|
| 2080 |
+
{"current_steps": 10305, "total_steps": 10720, "loss": 0.6784, "lr": 2.2901169277927127e-07, "epoch": 19.225746268656717, "percentage": 96.13, "elapsed_time": "0:25:01", "remaining_time": "0:01:00", "throughput": 1958.86, "total_tokens": 2941960}
|
| 2081 |
+
{"current_steps": 10310, "total_steps": 10720, "loss": 0.5246, "lr": 2.2354785731128482e-07, "epoch": 19.23507462686567, "percentage": 96.18, "elapsed_time": "0:25:02", "remaining_time": "0:00:59", "throughput": 1958.91, "total_tokens": 2943400}
|
| 2082 |
+
{"current_steps": 10315, "total_steps": 10720, "loss": 0.4646, "lr": 2.18149697464054e-07, "epoch": 19.244402985074625, "percentage": 96.22, "elapsed_time": "0:25:03", "remaining_time": "0:00:59", "throughput": 1959.0, "total_tokens": 2944936}
|
| 2083 |
+
{"current_steps": 10320, "total_steps": 10720, "loss": 0.8452, "lr": 2.128172275466217e-07, "epoch": 19.253731343283583, "percentage": 96.27, "elapsed_time": "0:25:03", "remaining_time": "0:00:58", "throughput": 1959.01, "total_tokens": 2946312}
|
| 2084 |
+
{"current_steps": 10325, "total_steps": 10720, "loss": 0.7365, "lr": 2.0755046169392e-07, "epoch": 19.263059701492537, "percentage": 96.32, "elapsed_time": "0:25:04", "remaining_time": "0:00:57", "throughput": 1958.99, "total_tokens": 2947624}
|
| 2085 |
+
{"current_steps": 10330, "total_steps": 10720, "loss": 0.5902, "lr": 2.0234941386670925e-07, "epoch": 19.27238805970149, "percentage": 96.36, "elapsed_time": "0:25:05", "remaining_time": "0:00:56", "throughput": 1959.07, "total_tokens": 2949160}
|
| 2086 |
+
{"current_steps": 10335, "total_steps": 10720, "loss": 0.7088, "lr": 1.9721409785154466e-07, "epoch": 19.28171641791045, "percentage": 96.41, "elapsed_time": "0:25:06", "remaining_time": "0:00:56", "throughput": 1959.09, "total_tokens": 2950536}
|
| 2087 |
+
{"current_steps": 10340, "total_steps": 10720, "loss": 0.5402, "lr": 1.9214452726075137e-07, "epoch": 19.291044776119403, "percentage": 96.46, "elapsed_time": "0:25:06", "remaining_time": "0:00:55", "throughput": 1959.07, "total_tokens": 2951848}
|
| 2088 |
+
{"current_steps": 10345, "total_steps": 10720, "loss": 0.5823, "lr": 1.871407155323801e-07, "epoch": 19.300373134328357, "percentage": 96.5, "elapsed_time": "0:25:07", "remaining_time": "0:00:54", "throughput": 1959.26, "total_tokens": 2953480}
|
| 2089 |
+
{"current_steps": 10350, "total_steps": 10720, "loss": 0.5044, "lr": 1.8220267593017092e-07, "epoch": 19.309701492537314, "percentage": 96.55, "elapsed_time": "0:25:08", "remaining_time": "0:00:53", "throughput": 1959.38, "total_tokens": 2955016}
|
| 2090 |
+
{"current_steps": 10355, "total_steps": 10720, "loss": 0.6817, "lr": 1.7733042154352008e-07, "epoch": 19.31902985074627, "percentage": 96.6, "elapsed_time": "0:25:08", "remaining_time": "0:00:53", "throughput": 1959.51, "total_tokens": 2956552}
|
| 2091 |
+
{"current_steps": 10360, "total_steps": 10720, "loss": 0.5955, "lr": 1.7252396528744663e-07, "epoch": 19.328358208955223, "percentage": 96.64, "elapsed_time": "0:25:09", "remaining_time": "0:00:52", "throughput": 1959.47, "total_tokens": 2957832}
|
| 2092 |
+
{"current_steps": 10365, "total_steps": 10720, "loss": 0.6036, "lr": 1.6778331990255914e-07, "epoch": 19.33768656716418, "percentage": 96.69, "elapsed_time": "0:25:10", "remaining_time": "0:00:51", "throughput": 1959.58, "total_tokens": 2959336}
|
| 2093 |
+
{"current_steps": 10370, "total_steps": 10720, "loss": 0.8523, "lr": 1.6310849795500848e-07, "epoch": 19.347014925373134, "percentage": 96.74, "elapsed_time": "0:25:10", "remaining_time": "0:00:50", "throughput": 1959.6, "total_tokens": 2960712}
|
| 2094 |
+
{"current_steps": 10375, "total_steps": 10720, "loss": 0.7317, "lr": 1.584995118364796e-07, "epoch": 19.35634328358209, "percentage": 96.78, "elapsed_time": "0:25:11", "remaining_time": "0:00:50", "throughput": 1959.57, "total_tokens": 2962024}
|
| 2095 |
+
{"current_steps": 10380, "total_steps": 10720, "loss": 0.666, "lr": 1.5395637376413585e-07, "epoch": 19.365671641791046, "percentage": 96.83, "elapsed_time": "0:25:12", "remaining_time": "0:00:49", "throughput": 1959.74, "total_tokens": 2963688}
|
| 2096 |
+
{"current_steps": 10385, "total_steps": 10720, "loss": 0.692, "lr": 1.4947909578059971e-07, "epoch": 19.375, "percentage": 96.88, "elapsed_time": "0:25:12", "remaining_time": "0:00:48", "throughput": 1959.74, "total_tokens": 2965032}
|
| 2097 |
+
{"current_steps": 10390, "total_steps": 10720, "loss": 0.8505, "lr": 1.4506768975391382e-07, "epoch": 19.384328358208954, "percentage": 96.92, "elapsed_time": "0:25:13", "remaining_time": "0:00:48", "throughput": 1959.73, "total_tokens": 2966376}
|
| 2098 |
+
{"current_steps": 10395, "total_steps": 10720, "loss": 0.8438, "lr": 1.4072216737751055e-07, "epoch": 19.39365671641791, "percentage": 96.97, "elapsed_time": "0:25:14", "remaining_time": "0:00:47", "throughput": 1959.68, "total_tokens": 2967656}
|
| 2099 |
+
{"current_steps": 10400, "total_steps": 10720, "loss": 0.8172, "lr": 1.3644254017018964e-07, "epoch": 19.402985074626866, "percentage": 97.01, "elapsed_time": "0:25:15", "remaining_time": "0:00:46", "throughput": 1959.83, "total_tokens": 2969224}
|
| 2100 |
+
{"current_steps": 10405, "total_steps": 10720, "loss": 0.6583, "lr": 1.3222881947607123e-07, "epoch": 19.41231343283582, "percentage": 97.06, "elapsed_time": "0:25:15", "remaining_time": "0:00:45", "throughput": 1959.86, "total_tokens": 2970632}
|
| 2101 |
+
{"current_steps": 10410, "total_steps": 10720, "loss": 0.6038, "lr": 1.280810164645846e-07, "epoch": 19.421641791044777, "percentage": 97.11, "elapsed_time": "0:25:16", "remaining_time": "0:00:45", "throughput": 1959.95, "total_tokens": 2972104}
|
| 2102 |
+
{"current_steps": 10415, "total_steps": 10720, "loss": 0.5608, "lr": 1.2399914213042373e-07, "epoch": 19.43097014925373, "percentage": 97.15, "elapsed_time": "0:25:17", "remaining_time": "0:00:44", "throughput": 1960.01, "total_tokens": 2973608}
|
| 2103 |
+
{"current_steps": 10420, "total_steps": 10720, "loss": 0.6067, "lr": 1.1998320729352252e-07, "epoch": 19.440298507462686, "percentage": 97.2, "elapsed_time": "0:25:17", "remaining_time": "0:00:43", "throughput": 1960.04, "total_tokens": 2974984}
|
| 2104 |
+
{"current_steps": 10425, "total_steps": 10720, "loss": 0.4829, "lr": 1.160332225990296e-07, "epoch": 19.449626865671643, "percentage": 97.25, "elapsed_time": "0:25:18", "remaining_time": "0:00:42", "throughput": 1960.22, "total_tokens": 2976712}
|
| 2105 |
+
{"current_steps": 10430, "total_steps": 10720, "loss": 0.787, "lr": 1.1214919851728068e-07, "epoch": 19.458955223880597, "percentage": 97.29, "elapsed_time": "0:25:19", "remaining_time": "0:00:42", "throughput": 1960.22, "total_tokens": 2978056}
|
| 2106 |
+
{"current_steps": 10435, "total_steps": 10720, "loss": 0.6665, "lr": 1.0833114534376798e-07, "epoch": 19.46828358208955, "percentage": 97.34, "elapsed_time": "0:25:19", "remaining_time": "0:00:41", "throughput": 1960.24, "total_tokens": 2979432}
|
| 2107 |
+
{"current_steps": 10440, "total_steps": 10720, "loss": 0.7488, "lr": 1.0457907319909865e-07, "epoch": 19.47761194029851, "percentage": 97.39, "elapsed_time": "0:25:20", "remaining_time": "0:00:40", "throughput": 1960.32, "total_tokens": 2980904}
|
| 2108 |
+
{"current_steps": 10445, "total_steps": 10720, "loss": 0.6916, "lr": 1.0089299202900304e-07, "epoch": 19.486940298507463, "percentage": 97.43, "elapsed_time": "0:25:21", "remaining_time": "0:00:40", "throughput": 1960.46, "total_tokens": 2982536}
|
| 2109 |
+
{"current_steps": 10450, "total_steps": 10720, "loss": 0.9073, "lr": 9.727291160427366e-08, "epoch": 19.496268656716417, "percentage": 97.48, "elapsed_time": "0:25:22", "remaining_time": "0:00:39", "throughput": 1960.57, "total_tokens": 2984040}
|
| 2110 |
+
{"current_steps": 10455, "total_steps": 10720, "loss": 0.5175, "lr": 9.371884152075683e-08, "epoch": 19.505597014925375, "percentage": 97.53, "elapsed_time": "0:25:22", "remaining_time": "0:00:38", "throughput": 1960.71, "total_tokens": 2985608}
|
| 2111 |
+
{"current_steps": 10460, "total_steps": 10720, "loss": 0.5744, "lr": 9.023079119932498e-08, "epoch": 19.51492537313433, "percentage": 97.57, "elapsed_time": "0:25:23", "remaining_time": "0:00:37", "throughput": 1960.79, "total_tokens": 2987080}
|
| 2112 |
+
{"current_steps": 10465, "total_steps": 10720, "loss": 0.8774, "lr": 8.680876988584608e-08, "epoch": 19.524253731343283, "percentage": 97.62, "elapsed_time": "0:25:24", "remaining_time": "0:00:37", "throughput": 1960.75, "total_tokens": 2988360}
|
| 2113 |
+
{"current_steps": 10470, "total_steps": 10720, "loss": 0.7055, "lr": 8.345278665116974e-08, "epoch": 19.53358208955224, "percentage": 97.67, "elapsed_time": "0:25:24", "remaining_time": "0:00:36", "throughput": 1960.88, "total_tokens": 2989960}
|
| 2114 |
+
{"current_steps": 10475, "total_steps": 10720, "loss": 0.7468, "lr": 8.01628503910884e-08, "epoch": 19.542910447761194, "percentage": 97.71, "elapsed_time": "0:25:25", "remaining_time": "0:00:35", "throughput": 1960.88, "total_tokens": 2991304}
|
| 2115 |
+
{"current_steps": 10480, "total_steps": 10720, "loss": 0.7607, "lr": 7.693896982632898e-08, "epoch": 19.55223880597015, "percentage": 97.76, "elapsed_time": "0:25:26", "remaining_time": "0:00:34", "throughput": 1960.84, "total_tokens": 2992584}
|
| 2116 |
+
{"current_steps": 10485, "total_steps": 10720, "loss": 0.6805, "lr": 7.378115350251957e-08, "epoch": 19.561567164179106, "percentage": 97.81, "elapsed_time": "0:25:26", "remaining_time": "0:00:34", "throughput": 1960.98, "total_tokens": 2994216}
|
| 2117 |
+
{"current_steps": 10490, "total_steps": 10720, "loss": 0.672, "lr": 7.068940979017003e-08, "epoch": 19.57089552238806, "percentage": 97.85, "elapsed_time": "0:25:27", "remaining_time": "0:00:33", "throughput": 1960.98, "total_tokens": 2995560}
|
| 2118 |
+
{"current_steps": 10495, "total_steps": 10720, "loss": 0.609, "lr": 6.766374688464971e-08, "epoch": 19.580223880597014, "percentage": 97.9, "elapsed_time": "0:25:28", "remaining_time": "0:00:32", "throughput": 1961.06, "total_tokens": 2997032}
|
| 2119 |
+
{"current_steps": 10500, "total_steps": 10720, "loss": 0.614, "lr": 6.470417280616814e-08, "epoch": 19.58955223880597, "percentage": 97.95, "elapsed_time": "0:25:28", "remaining_time": "0:00:32", "throughput": 1961.04, "total_tokens": 2998344}
|
| 2120 |
+
{"current_steps": 10505, "total_steps": 10720, "loss": 1.0406, "lr": 6.181069539974716e-08, "epoch": 19.598880597014926, "percentage": 97.99, "elapsed_time": "0:25:29", "remaining_time": "0:00:31", "throughput": 1960.98, "total_tokens": 2999592}
|
| 2121 |
+
{"current_steps": 10510, "total_steps": 10720, "loss": 0.4992, "lr": 5.898332233520987e-08, "epoch": 19.60820895522388, "percentage": 98.04, "elapsed_time": "0:25:30", "remaining_time": "0:00:30", "throughput": 1961.05, "total_tokens": 3001064}
|
| 2122 |
+
{"current_steps": 10515, "total_steps": 10720, "loss": 0.5235, "lr": 5.622206110714734e-08, "epoch": 19.617537313432837, "percentage": 98.09, "elapsed_time": "0:25:31", "remaining_time": "0:00:29", "throughput": 1961.0, "total_tokens": 3002344}
|
| 2123 |
+
{"current_steps": 10520, "total_steps": 10720, "loss": 0.707, "lr": 5.352691903491303e-08, "epoch": 19.62686567164179, "percentage": 98.13, "elapsed_time": "0:25:31", "remaining_time": "0:00:29", "throughput": 1960.94, "total_tokens": 3003592}
|
| 2124 |
+
{"current_steps": 10525, "total_steps": 10720, "loss": 0.7635, "lr": 5.089790326259225e-08, "epoch": 19.636194029850746, "percentage": 98.18, "elapsed_time": "0:25:32", "remaining_time": "0:00:28", "throughput": 1960.88, "total_tokens": 3004840}
|
| 2125 |
+
{"current_steps": 10530, "total_steps": 10720, "loss": 0.7534, "lr": 4.83350207589911e-08, "epoch": 19.645522388059703, "percentage": 98.23, "elapsed_time": "0:25:33", "remaining_time": "0:00:27", "throughput": 1960.83, "total_tokens": 3006120}
|
| 2126 |
+
{"current_steps": 10535, "total_steps": 10720, "loss": 0.7359, "lr": 4.583827831761145e-08, "epoch": 19.654850746268657, "percentage": 98.27, "elapsed_time": "0:25:33", "remaining_time": "0:00:26", "throughput": 1960.89, "total_tokens": 3007560}
|
| 2127 |
+
{"current_steps": 10540, "total_steps": 10720, "loss": 0.8041, "lr": 4.340768255663708e-08, "epoch": 19.66417910447761, "percentage": 98.32, "elapsed_time": "0:25:34", "remaining_time": "0:00:26", "throughput": 1960.87, "total_tokens": 3008872}
|
| 2128 |
+
{"current_steps": 10545, "total_steps": 10720, "loss": 0.7634, "lr": 4.1043239918914233e-08, "epoch": 19.673507462686565, "percentage": 98.37, "elapsed_time": "0:25:35", "remaining_time": "0:00:25", "throughput": 1960.84, "total_tokens": 3010184}
|
| 2129 |
+
{"current_steps": 10550, "total_steps": 10720, "loss": 0.5307, "lr": 3.8744956671937784e-08, "epoch": 19.682835820895523, "percentage": 98.41, "elapsed_time": "0:25:35", "remaining_time": "0:00:24", "throughput": 1960.86, "total_tokens": 3011560}
|
| 2130 |
+
{"current_steps": 10555, "total_steps": 10720, "loss": 0.7157, "lr": 3.6512838907828974e-08, "epoch": 19.692164179104477, "percentage": 98.46, "elapsed_time": "0:25:36", "remaining_time": "0:00:24", "throughput": 1961.02, "total_tokens": 3013160}
|
| 2131 |
+
{"current_steps": 10560, "total_steps": 10720, "loss": 0.8588, "lr": 3.4346892543321576e-08, "epoch": 19.701492537313435, "percentage": 98.51, "elapsed_time": "0:25:37", "remaining_time": "0:00:23", "throughput": 1961.1, "total_tokens": 3014632}
|
| 2132 |
+
{"current_steps": 10565, "total_steps": 10720, "loss": 0.5909, "lr": 3.224712331975077e-08, "epoch": 19.71082089552239, "percentage": 98.55, "elapsed_time": "0:25:37", "remaining_time": "0:00:22", "throughput": 1961.09, "total_tokens": 3015976}
|
| 2133 |
+
{"current_steps": 10570, "total_steps": 10720, "loss": 0.6945, "lr": 3.021353680303096e-08, "epoch": 19.720149253731343, "percentage": 98.6, "elapsed_time": "0:25:38", "remaining_time": "0:00:21", "throughput": 1961.13, "total_tokens": 3017384}
|
| 2134 |
+
{"current_steps": 10575, "total_steps": 10720, "loss": 0.5384, "lr": 2.82461383836502e-08, "epoch": 19.729477611940297, "percentage": 98.65, "elapsed_time": "0:25:39", "remaining_time": "0:00:21", "throughput": 1961.23, "total_tokens": 3018888}
|
| 2135 |
+
{"current_steps": 10580, "total_steps": 10720, "loss": 0.8826, "lr": 2.634493327663967e-08, "epoch": 19.738805970149254, "percentage": 98.69, "elapsed_time": "0:25:39", "remaining_time": "0:00:20", "throughput": 1961.23, "total_tokens": 3020232}
|
| 2136 |
+
{"current_steps": 10585, "total_steps": 10720, "loss": 0.8419, "lr": 2.450992652157924e-08, "epoch": 19.74813432835821, "percentage": 98.74, "elapsed_time": "0:25:40", "remaining_time": "0:00:19", "throughput": 1961.28, "total_tokens": 3021640}
|
| 2137 |
+
{"current_steps": 10590, "total_steps": 10720, "loss": 0.6776, "lr": 2.2741122982569694e-08, "epoch": 19.757462686567163, "percentage": 98.79, "elapsed_time": "0:25:41", "remaining_time": "0:00:18", "throughput": 1961.3, "total_tokens": 3023016}
|
| 2138 |
+
{"current_steps": 10595, "total_steps": 10720, "loss": 0.7002, "lr": 2.1038527348229974e-08, "epoch": 19.76679104477612, "percentage": 98.83, "elapsed_time": "0:25:42", "remaining_time": "0:00:18", "throughput": 1961.36, "total_tokens": 3024456}
|
| 2139 |
+
{"current_steps": 10600, "total_steps": 10720, "loss": 0.4083, "lr": 1.940214413167496e-08, "epoch": 19.776119402985074, "percentage": 98.88, "elapsed_time": "0:25:42", "remaining_time": "0:00:17", "throughput": 1961.44, "total_tokens": 3025928}
|
| 2140 |
+
{"current_steps": 10605, "total_steps": 10720, "loss": 0.5945, "lr": 1.7831977670507148e-08, "epoch": 19.78544776119403, "percentage": 98.93, "elapsed_time": "0:25:43", "remaining_time": "0:00:16", "throughput": 1961.42, "total_tokens": 3027240}
|
| 2141 |
+
{"current_steps": 10610, "total_steps": 10720, "loss": 0.6234, "lr": 1.632803212681666e-08, "epoch": 19.794776119402986, "percentage": 98.97, "elapsed_time": "0:25:44", "remaining_time": "0:00:16", "throughput": 1961.46, "total_tokens": 3028648}
|
| 2142 |
+
{"current_steps": 10615, "total_steps": 10720, "loss": 0.7224, "lr": 1.4890311487150698e-08, "epoch": 19.80410447761194, "percentage": 99.02, "elapsed_time": "0:25:44", "remaining_time": "0:00:15", "throughput": 1961.49, "total_tokens": 3030056}
|
| 2143 |
+
{"current_steps": 10620, "total_steps": 10720, "loss": 0.858, "lr": 1.3518819562510776e-08, "epoch": 19.813432835820894, "percentage": 99.07, "elapsed_time": "0:25:45", "remaining_time": "0:00:14", "throughput": 1961.5, "total_tokens": 3031432}
|
| 2144 |
+
{"current_steps": 10625, "total_steps": 10720, "loss": 0.6413, "lr": 1.221355998835272e-08, "epoch": 19.82276119402985, "percentage": 99.11, "elapsed_time": "0:25:46", "remaining_time": "0:00:13", "throughput": 1961.6, "total_tokens": 3032936}
|
| 2145 |
+
{"current_steps": 10630, "total_steps": 10720, "loss": 0.5857, "lr": 1.0974536224561682e-08, "epoch": 19.832089552238806, "percentage": 99.16, "elapsed_time": "0:25:46", "remaining_time": "0:00:13", "throughput": 1961.68, "total_tokens": 3034408}
|
| 2146 |
+
{"current_steps": 10635, "total_steps": 10720, "loss": 0.8918, "lr": 9.801751555452154e-09, "epoch": 19.84141791044776, "percentage": 99.21, "elapsed_time": "0:25:47", "remaining_time": "0:00:12", "throughput": 1961.7, "total_tokens": 3035784}
|
| 2147 |
+
{"current_steps": 10640, "total_steps": 10720, "loss": 0.5075, "lr": 8.695209089759626e-09, "epoch": 19.850746268656717, "percentage": 99.25, "elapsed_time": "0:25:48", "remaining_time": "0:00:11", "throughput": 1961.82, "total_tokens": 3037320}
|
| 2148 |
+
{"current_steps": 10645, "total_steps": 10720, "loss": 0.6397, "lr": 7.654911760621163e-09, "epoch": 19.86007462686567, "percentage": 99.3, "elapsed_time": "0:25:48", "remaining_time": "0:00:10", "throughput": 1961.9, "total_tokens": 3038792}
|
| 2149 |
+
{"current_steps": 10650, "total_steps": 10720, "loss": 0.4452, "lr": 6.680862325583736e-09, "epoch": 19.869402985074625, "percentage": 99.35, "elapsed_time": "0:25:49", "remaining_time": "0:00:10", "throughput": 1962.04, "total_tokens": 3040424}
|
| 2150 |
+
{"current_steps": 10655, "total_steps": 10720, "loss": 0.8456, "lr": 5.7730633665903365e-09, "epoch": 19.878731343283583, "percentage": 99.39, "elapsed_time": "0:25:50", "remaining_time": "0:00:09", "throughput": 1962.03, "total_tokens": 3041736}
|
| 2151 |
+
{"current_steps": 10660, "total_steps": 10720, "loss": 0.8273, "lr": 4.931517289963328e-09, "epoch": 19.888059701492537, "percentage": 99.44, "elapsed_time": "0:25:50", "remaining_time": "0:00:08", "throughput": 1961.96, "total_tokens": 3042984}
|
| 2152 |
+
{"current_steps": 10665, "total_steps": 10720, "loss": 1.012, "lr": 4.156226326415547e-09, "epoch": 19.89738805970149, "percentage": 99.49, "elapsed_time": "0:25:51", "remaining_time": "0:00:08", "throughput": 1961.92, "total_tokens": 3044264}
|
| 2153 |
+
{"current_steps": 10670, "total_steps": 10720, "loss": 0.6836, "lr": 3.4471925310280985e-09, "epoch": 19.90671641791045, "percentage": 99.53, "elapsed_time": "0:25:52", "remaining_time": "0:00:07", "throughput": 1961.94, "total_tokens": 3045704}
|
| 2154 |
+
{"current_steps": 10675, "total_steps": 10720, "loss": 0.5702, "lr": 2.804417783261459e-09, "epoch": 19.916044776119403, "percentage": 99.58, "elapsed_time": "0:25:53", "remaining_time": "0:00:06", "throughput": 1961.99, "total_tokens": 3047144}
|
| 2155 |
+
{"current_steps": 10680, "total_steps": 10720, "loss": 0.6302, "lr": 2.2279037869304964e-09, "epoch": 19.925373134328357, "percentage": 99.63, "elapsed_time": "0:25:53", "remaining_time": "0:00:05", "throughput": 1962.09, "total_tokens": 3048648}
|
| 2156 |
+
{"current_steps": 10685, "total_steps": 10720, "loss": 0.7604, "lr": 1.7176520702238964e-09, "epoch": 19.934701492537314, "percentage": 99.67, "elapsed_time": "0:25:54", "remaining_time": "0:00:05", "throughput": 1962.17, "total_tokens": 3050184}
|
| 2157 |
+
{"current_steps": 10690, "total_steps": 10720, "loss": 0.592, "lr": 1.2736639856736344e-09, "epoch": 19.94402985074627, "percentage": 99.72, "elapsed_time": "0:25:55", "remaining_time": "0:00:04", "throughput": 1962.23, "total_tokens": 3051688}
|
| 2158 |
+
{"current_steps": 10695, "total_steps": 10720, "loss": 0.4236, "lr": 8.959407101716277e-10, "epoch": 19.953358208955223, "percentage": 99.77, "elapsed_time": "0:25:55", "remaining_time": "0:00:03", "throughput": 1962.18, "total_tokens": 3052968}
|
| 2159 |
+
{"current_steps": 10700, "total_steps": 10720, "loss": 0.6928, "lr": 5.844832449641846e-10, "epoch": 19.96268656716418, "percentage": 99.81, "elapsed_time": "0:25:56", "remaining_time": "0:00:02", "throughput": 1962.32, "total_tokens": 3054536}
|
| 2160 |
+
{"current_steps": 10705, "total_steps": 10720, "loss": 0.4705, "lr": 3.3929241563535053e-10, "epoch": 19.972014925373134, "percentage": 99.86, "elapsed_time": "0:25:57", "remaining_time": "0:00:02", "throughput": 1962.43, "total_tokens": 3056072}
|
| 2161 |
+
{"current_steps": 10710, "total_steps": 10720, "loss": 0.8328, "lr": 1.6036887212078634e-10, "epoch": 19.98134328358209, "percentage": 99.91, "elapsed_time": "0:25:58", "remaining_time": "0:00:01", "throughput": 1962.51, "total_tokens": 3057608}
|
| 2162 |
+
{"current_steps": 10715, "total_steps": 10720, "loss": 0.8278, "lr": 4.771308869666591e-11, "epoch": 19.990671641791046, "percentage": 99.95, "elapsed_time": "0:25:58", "remaining_time": "0:00:00", "throughput": 1962.55, "total_tokens": 3059016}
|
| 2163 |
+
{"current_steps": 10720, "total_steps": 10720, "loss": 0.5962, "lr": 1.3253639852273126e-12, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:25:59", "remaining_time": "0:00:00", "throughput": 1962.39, "total_tokens": 3060208}
|
| 2164 |
+
{"current_steps": 10720, "total_steps": 10720, "eval_loss": 0.6943244934082031, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:26:01", "remaining_time": "0:00:00", "throughput": 1959.29, "total_tokens": 3060208}
|
| 2165 |
+
{"current_steps": 10720, "total_steps": 10720, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:26:02", "remaining_time": "0:00:00", "throughput": 1958.17, "total_tokens": 3060208}
|