End of training
Browse files- all_results.json +2 -2
- model.safetensors +1 -1
- train_results.json +2 -2
- trainer_state.json +4 -4
all_results.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"total_flos": 0.0,
|
| 3 |
"train_loss": -0.00011151888082939157,
|
| 4 |
-
"train_runtime":
|
| 5 |
"train_samples": 3,
|
| 6 |
-
"train_samples_per_second": 0.
|
| 7 |
"train_steps_per_second": 0.011
|
| 8 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"total_flos": 0.0,
|
| 3 |
"train_loss": -0.00011151888082939157,
|
| 4 |
+
"train_runtime": 1866.5324,
|
| 5 |
"train_samples": 3,
|
| 6 |
+
"train_samples_per_second": 0.043,
|
| 7 |
"train_steps_per_second": 0.011
|
| 8 |
}
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1976163472
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:63073bffd65cd38e2813613d2c79a4a61e3a6b4d1f1c85be273e56e884cfd0c3
|
| 3 |
size 1976163472
|
train_results.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"total_flos": 0.0,
|
| 3 |
"train_loss": -0.00011151888082939157,
|
| 4 |
-
"train_runtime":
|
| 5 |
"train_samples": 3,
|
| 6 |
-
"train_samples_per_second": 0.
|
| 7 |
"train_steps_per_second": 0.011
|
| 8 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"total_flos": 0.0,
|
| 3 |
"train_loss": -0.00011151888082939157,
|
| 4 |
+
"train_runtime": 1866.5324,
|
| 5 |
"train_samples": 3,
|
| 6 |
+
"train_samples_per_second": 0.043,
|
| 7 |
"train_steps_per_second": 0.011
|
| 8 |
}
|
trainer_state.json
CHANGED
|
@@ -335,7 +335,7 @@
|
|
| 335 |
"completions/min_length": 77.5,
|
| 336 |
"completions/min_terminated_length": 77.5,
|
| 337 |
"epoch": 17.666666666666668,
|
| 338 |
-
"grad_norm": 5.
|
| 339 |
"kl": 0.0,
|
| 340 |
"learning_rate": 3.013156219837776e-08,
|
| 341 |
"loss": -0.0011,
|
|
@@ -374,7 +374,7 @@
|
|
| 374 |
"completions/min_length": 89.5,
|
| 375 |
"completions/min_terminated_length": 89.5,
|
| 376 |
"epoch": 19.666666666666668,
|
| 377 |
-
"grad_norm": 7.
|
| 378 |
"kl": 9.199701889173184e-09,
|
| 379 |
"learning_rate": 3.4096741493194193e-09,
|
| 380 |
"loss": 0.0,
|
|
@@ -404,8 +404,8 @@
|
|
| 404 |
"step": 20,
|
| 405 |
"total_flos": 0.0,
|
| 406 |
"train_loss": -0.00011151888082939157,
|
| 407 |
-
"train_runtime":
|
| 408 |
-
"train_samples_per_second": 0.
|
| 409 |
"train_steps_per_second": 0.011
|
| 410 |
}
|
| 411 |
],
|
|
|
|
| 335 |
"completions/min_length": 77.5,
|
| 336 |
"completions/min_terminated_length": 77.5,
|
| 337 |
"epoch": 17.666666666666668,
|
| 338 |
+
"grad_norm": 5.54299783706665,
|
| 339 |
"kl": 0.0,
|
| 340 |
"learning_rate": 3.013156219837776e-08,
|
| 341 |
"loss": -0.0011,
|
|
|
|
| 374 |
"completions/min_length": 89.5,
|
| 375 |
"completions/min_terminated_length": 89.5,
|
| 376 |
"epoch": 19.666666666666668,
|
| 377 |
+
"grad_norm": 7.460154847649392e-06,
|
| 378 |
"kl": 9.199701889173184e-09,
|
| 379 |
"learning_rate": 3.4096741493194193e-09,
|
| 380 |
"loss": 0.0,
|
|
|
|
| 404 |
"step": 20,
|
| 405 |
"total_flos": 0.0,
|
| 406 |
"train_loss": -0.00011151888082939157,
|
| 407 |
+
"train_runtime": 1866.5324,
|
| 408 |
+
"train_samples_per_second": 0.043,
|
| 409 |
"train_steps_per_second": 0.011
|
| 410 |
}
|
| 411 |
],
|