Training in progress, step 3002
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +32 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 798032
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:505315aec3fe201b1b1d645986c61f813991f01a9dcb8b8014dd46173b5a26c9
|
| 3 |
size 798032
|
trainer_log.jsonl
CHANGED
|
@@ -589,3 +589,35 @@
|
|
| 589 |
{"current_steps": 2855, "total_steps": 3160, "loss": 0.1043, "lr": 1.4146675069466403e-06, "epoch": 18.069620253164558, "percentage": 90.35, "elapsed_time": "0:08:39", "remaining_time": "0:00:55", "throughput": 2489.17, "total_tokens": 1292592}
|
| 590 |
{"current_steps": 2860, "total_steps": 3160, "loss": 0.0525, "lr": 1.369237543152016e-06, "epoch": 18.10126582278481, "percentage": 90.51, "elapsed_time": "0:08:40", "remaining_time": "0:00:54", "throughput": 2489.01, "total_tokens": 1294864}
|
| 591 |
{"current_steps": 2865, "total_steps": 3160, "loss": 0.0617, "lr": 1.3245284497695993e-06, "epoch": 18.132911392405063, "percentage": 90.66, "elapsed_time": "0:08:41", "remaining_time": "0:00:53", "throughput": 2489.36, "total_tokens": 1297168}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 589 |
{"current_steps": 2855, "total_steps": 3160, "loss": 0.1043, "lr": 1.4146675069466403e-06, "epoch": 18.069620253164558, "percentage": 90.35, "elapsed_time": "0:08:39", "remaining_time": "0:00:55", "throughput": 2489.17, "total_tokens": 1292592}
|
| 590 |
{"current_steps": 2860, "total_steps": 3160, "loss": 0.0525, "lr": 1.369237543152016e-06, "epoch": 18.10126582278481, "percentage": 90.51, "elapsed_time": "0:08:40", "remaining_time": "0:00:54", "throughput": 2489.01, "total_tokens": 1294864}
|
| 591 |
{"current_steps": 2865, "total_steps": 3160, "loss": 0.0617, "lr": 1.3245284497695993e-06, "epoch": 18.132911392405063, "percentage": 90.66, "elapsed_time": "0:08:41", "remaining_time": "0:00:53", "throughput": 2489.36, "total_tokens": 1297168}
|
| 592 |
+
{"current_steps": 2870, "total_steps": 3160, "loss": 0.0774, "lr": 1.2805415906767621e-06, "epoch": 18.164556962025316, "percentage": 90.82, "elapsed_time": "0:08:41", "remaining_time": "0:00:52", "throughput": 2489.92, "total_tokens": 1299600}
|
| 593 |
+
{"current_steps": 2875, "total_steps": 3160, "loss": 0.0218, "lr": 1.2372783077187117e-06, "epoch": 18.196202531645568, "percentage": 90.98, "elapsed_time": "0:08:42", "remaining_time": "0:00:51", "throughput": 2490.12, "total_tokens": 1301808}
|
| 594 |
+
{"current_steps": 2880, "total_steps": 3160, "loss": 0.0666, "lr": 1.1947399206675369e-06, "epoch": 18.227848101265824, "percentage": 91.14, "elapsed_time": "0:08:43", "remaining_time": "0:00:50", "throughput": 2490.52, "total_tokens": 1304144}
|
| 595 |
+
{"current_steps": 2885, "total_steps": 3160, "loss": 0.1095, "lr": 1.152927727181935e-06, "epoch": 18.259493670886076, "percentage": 91.3, "elapsed_time": "0:08:44", "remaining_time": "0:00:49", "throughput": 2490.72, "total_tokens": 1306352}
|
| 596 |
+
{"current_steps": 2890, "total_steps": 3160, "loss": 0.1, "lr": 1.1118430027676486e-06, "epoch": 18.29113924050633, "percentage": 91.46, "elapsed_time": "0:08:45", "remaining_time": "0:00:49", "throughput": 2490.98, "total_tokens": 1308528}
|
| 597 |
+
{"current_steps": 2895, "total_steps": 3160, "loss": 0.0659, "lr": 1.0714870007385497e-06, "epoch": 18.32278481012658, "percentage": 91.61, "elapsed_time": "0:08:46", "remaining_time": "0:00:48", "throughput": 2491.13, "total_tokens": 1310704}
|
| 598 |
+
{"current_steps": 2900, "total_steps": 3160, "loss": 0.1447, "lr": 1.0318609521783818e-06, "epoch": 18.354430379746834, "percentage": 91.77, "elapsed_time": "0:08:47", "remaining_time": "0:00:47", "throughput": 2491.47, "total_tokens": 1313008}
|
| 599 |
+
{"current_steps": 2905, "total_steps": 3160, "loss": 0.1658, "lr": 9.929660659032475e-07, "epoch": 18.38607594936709, "percentage": 91.93, "elapsed_time": "0:08:47", "remaining_time": "0:00:46", "throughput": 2491.78, "total_tokens": 1315280}
|
| 600 |
+
{"current_steps": 2910, "total_steps": 3160, "loss": 0.1333, "lr": 9.548035284246998e-07, "epoch": 18.417721518987342, "percentage": 92.09, "elapsed_time": "0:08:48", "remaining_time": "0:00:45", "throughput": 2491.98, "total_tokens": 1317488}
|
| 601 |
+
{"current_steps": 2915, "total_steps": 3160, "loss": 0.0966, "lr": 9.173745039135622e-07, "epoch": 18.449367088607595, "percentage": 92.25, "elapsed_time": "0:08:49", "remaining_time": "0:00:44", "throughput": 2492.18, "total_tokens": 1319696}
|
| 602 |
+
{"current_steps": 2920, "total_steps": 3160, "loss": 0.0564, "lr": 8.806801341644022e-07, "epoch": 18.481012658227847, "percentage": 92.41, "elapsed_time": "0:08:50", "remaining_time": "0:00:43", "throughput": 2492.4, "total_tokens": 1321840}
|
| 603 |
+
{"current_steps": 2925, "total_steps": 3160, "loss": 0.0356, "lr": 8.447215385607138e-07, "epoch": 18.5126582278481, "percentage": 92.56, "elapsed_time": "0:08:51", "remaining_time": "0:00:42", "throughput": 2492.6, "total_tokens": 1324048}
|
| 604 |
+
{"current_steps": 2930, "total_steps": 3160, "loss": 0.1526, "lr": 8.094998140407678e-07, "epoch": 18.544303797468356, "percentage": 92.72, "elapsed_time": "0:08:52", "remaining_time": "0:00:41", "throughput": 2493.05, "total_tokens": 1326416}
|
| 605 |
+
{"current_steps": 2935, "total_steps": 3160, "loss": 0.0635, "lr": 7.750160350641467e-07, "epoch": 18.575949367088608, "percentage": 92.88, "elapsed_time": "0:08:52", "remaining_time": "0:00:40", "throughput": 2493.24, "total_tokens": 1328624}
|
| 606 |
+
{"current_steps": 2940, "total_steps": 3160, "loss": 0.0786, "lr": 7.41271253578954e-07, "epoch": 18.60759493670886, "percentage": 93.04, "elapsed_time": "0:08:53", "remaining_time": "0:00:39", "throughput": 2493.67, "total_tokens": 1330992}
|
| 607 |
+
{"current_steps": 2945, "total_steps": 3160, "loss": 0.1048, "lr": 7.082664989897487e-07, "epoch": 18.639240506329113, "percentage": 93.2, "elapsed_time": "0:08:54", "remaining_time": "0:00:39", "throughput": 2494.11, "total_tokens": 1333360}
|
| 608 |
+
{"current_steps": 2950, "total_steps": 3160, "loss": 0.0412, "lr": 6.760027781261336e-07, "epoch": 18.67088607594937, "percentage": 93.35, "elapsed_time": "0:08:55", "remaining_time": "0:00:38", "throughput": 2494.25, "total_tokens": 1335536}
|
| 609 |
+
{"current_steps": 2955, "total_steps": 3160, "loss": 0.1036, "lr": 6.444810752120278e-07, "epoch": 18.70253164556962, "percentage": 93.51, "elapsed_time": "0:08:56", "remaining_time": "0:00:37", "throughput": 2494.63, "total_tokens": 1337872}
|
| 610 |
+
{"current_steps": 2960, "total_steps": 3160, "loss": 0.0896, "lr": 6.137023518356599e-07, "epoch": 18.734177215189874, "percentage": 93.67, "elapsed_time": "0:08:57", "remaining_time": "0:00:36", "throughput": 2495.03, "total_tokens": 1340208}
|
| 611 |
+
{"current_steps": 2965, "total_steps": 3160, "loss": 0.0836, "lr": 5.836675469202247e-07, "epoch": 18.765822784810126, "percentage": 93.83, "elapsed_time": "0:08:58", "remaining_time": "0:00:35", "throughput": 2495.24, "total_tokens": 1342448}
|
| 612 |
+
{"current_steps": 2970, "total_steps": 3160, "loss": 0.0857, "lr": 5.543775766952447e-07, "epoch": 18.79746835443038, "percentage": 93.99, "elapsed_time": "0:08:58", "remaining_time": "0:00:34", "throughput": 2495.53, "total_tokens": 1344720}
|
| 613 |
+
{"current_steps": 2975, "total_steps": 3160, "loss": 0.0607, "lr": 5.258333346686211e-07, "epoch": 18.82911392405063, "percentage": 94.15, "elapsed_time": "0:08:59", "remaining_time": "0:00:33", "throughput": 2495.97, "total_tokens": 1347088}
|
| 614 |
+
{"current_steps": 2980, "total_steps": 3160, "loss": 0.0819, "lr": 4.980356915993661e-07, "epoch": 18.860759493670887, "percentage": 94.3, "elapsed_time": "0:09:00", "remaining_time": "0:00:32", "throughput": 2496.25, "total_tokens": 1349360}
|
| 615 |
+
{"current_steps": 2985, "total_steps": 3160, "loss": 0.0847, "lr": 4.709854954710602e-07, "epoch": 18.89240506329114, "percentage": 94.46, "elapsed_time": "0:09:01", "remaining_time": "0:00:31", "throughput": 2496.49, "total_tokens": 1351600}
|
| 616 |
+
{"current_steps": 2990, "total_steps": 3160, "loss": 0.0962, "lr": 4.4468357146596475e-07, "epoch": 18.924050632911392, "percentage": 94.62, "elapsed_time": "0:09:02", "remaining_time": "0:00:30", "throughput": 2496.74, "total_tokens": 1353776}
|
| 617 |
+
{"current_steps": 2995, "total_steps": 3160, "loss": 0.1347, "lr": 4.191307219398588e-07, "epoch": 18.955696202531644, "percentage": 94.78, "elapsed_time": "0:09:03", "remaining_time": "0:00:29", "throughput": 2497.08, "total_tokens": 1356080}
|
| 618 |
+
{"current_steps": 3000, "total_steps": 3160, "loss": 0.0871, "lr": 3.943277263975559e-07, "epoch": 18.9873417721519, "percentage": 94.94, "elapsed_time": "0:09:03", "remaining_time": "0:00:29", "throughput": 2497.43, "total_tokens": 1358448}
|
| 619 |
+
{"current_steps": 3002, "total_steps": 3160, "eval_loss": 0.21432405710220337, "epoch": 19.0, "percentage": 95.0, "elapsed_time": "0:09:05", "remaining_time": "0:00:28", "throughput": 2492.19, "total_tokens": 1359120}
|
| 620 |
+
{"current_steps": 3005, "total_steps": 3160, "loss": 0.1245, "lr": 3.7027534146913676e-07, "epoch": 19.018987341772153, "percentage": 95.09, "elapsed_time": "0:09:06", "remaining_time": "0:00:28", "throughput": 2488.69, "total_tokens": 1360464}
|
| 621 |
+
{"current_steps": 3010, "total_steps": 3160, "loss": 0.0609, "lr": 3.469743008868542e-07, "epoch": 19.050632911392405, "percentage": 95.25, "elapsed_time": "0:09:07", "remaining_time": "0:00:27", "throughput": 2489.06, "total_tokens": 1362832}
|
| 622 |
+
{"current_steps": 3015, "total_steps": 3160, "loss": 0.058, "lr": 3.244253154627619e-07, "epoch": 19.082278481012658, "percentage": 95.41, "elapsed_time": "0:09:08", "remaining_time": "0:00:26", "throughput": 2488.94, "total_tokens": 1365072}
|
| 623 |
+
{"current_steps": 3020, "total_steps": 3160, "loss": 0.0696, "lr": 3.026290730670206e-07, "epoch": 19.11392405063291, "percentage": 95.57, "elapsed_time": "0:09:09", "remaining_time": "0:00:25", "throughput": 2489.38, "total_tokens": 1367440}
|