sandernotenbaert commited on
Commit
b56cd00
·
verified ·
1 Parent(s): d1d4e79

Training in progress, step 7500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c6f3d2ee2d998af1140cd807dca278a2d638e32c8832a185b3c1c335accaf4d9
3
  size 1783055976
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48ab6fbe729d04037d979e3727d665bb8a3d92ea87dfd2ed84a826f73114e2ff
3
  size 1783055976
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f3cfb5931120bdb0cf5880d22e5d65a167fb333a2b6082dadeb74986b21b2c6
3
  size 3566173562
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34887632667319441e15da8fbef52c84ef86e33be3a881d2c79ccd5c4dfd7e52
3
  size 3566173562
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c07c9483d2aaa0e0aa4859fa05bacc55a60e0f30ff9c95a3b76854e880483a96
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6cfe3540f24c66f9bc38a546448a9e5d9989705fafc8d4b37aa5dafd0c7460f2
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:65756de75e6bd7eadc99b49bf68e2f0250c3d3583565151c3f9ae8bcb8dc720a
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a43c200beec982be6ba6814f2f475edee50b971e23470ab6b587b0a72dccb9b7
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ce0422a403e43a28bbd9bd594119fd94d3cecadc3ec09cc0340aa8a5d6e984c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af2924a0eb31db835ecc1090391e433a4e9097b02bdc25d6cb879aecfffdd0e2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.015656680314073006,
6
  "eval_steps": 500,
7
- "global_step": 7000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1100,6 +1100,84 @@
1100
  "eval_samples_per_second": 6.19,
1101
  "eval_steps_per_second": 6.19,
1102
  "step": 7000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1103
  }
1104
  ],
1105
  "logging_steps": 50,
@@ -1119,7 +1197,7 @@
1119
  "attributes": {}
1120
  }
1121
  },
1122
- "total_flos": 7.6591354871808e+16,
1123
  "train_batch_size": 1,
1124
  "trial_name": null,
1125
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.016775014622221078,
6
  "eval_steps": 500,
7
+ "global_step": 7500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1100
  "eval_samples_per_second": 6.19,
1101
  "eval_steps_per_second": 6.19,
1102
  "step": 7000
1103
+ },
1104
+ {
1105
+ "epoch": 0.015768513744887816,
1106
+ "grad_norm": 0.754082977771759,
1107
+ "learning_rate": 8.758915480006959e-06,
1108
+ "loss": 0.3346,
1109
+ "step": 7050
1110
+ },
1111
+ {
1112
+ "epoch": 0.015880347175702622,
1113
+ "grad_norm": 1.140832781791687,
1114
+ "learning_rate": 8.821044260543255e-06,
1115
+ "loss": 0.3276,
1116
+ "step": 7100
1117
+ },
1118
+ {
1119
+ "epoch": 0.015992180606517428,
1120
+ "grad_norm": 1.2793915271759033,
1121
+ "learning_rate": 8.883173041079549e-06,
1122
+ "loss": 0.3247,
1123
+ "step": 7150
1124
+ },
1125
+ {
1126
+ "epoch": 0.016104014037332234,
1127
+ "grad_norm": 1.4779784679412842,
1128
+ "learning_rate": 8.945301821615845e-06,
1129
+ "loss": 0.3184,
1130
+ "step": 7200
1131
+ },
1132
+ {
1133
+ "epoch": 0.016215847468147044,
1134
+ "grad_norm": 0.8328399062156677,
1135
+ "learning_rate": 9.007430602152143e-06,
1136
+ "loss": 0.3283,
1137
+ "step": 7250
1138
+ },
1139
+ {
1140
+ "epoch": 0.01632768089896185,
1141
+ "grad_norm": 1.057673454284668,
1142
+ "learning_rate": 9.069559382688437e-06,
1143
+ "loss": 0.3214,
1144
+ "step": 7300
1145
+ },
1146
+ {
1147
+ "epoch": 0.016439514329776656,
1148
+ "grad_norm": 1.005010724067688,
1149
+ "learning_rate": 9.131688163224733e-06,
1150
+ "loss": 0.314,
1151
+ "step": 7350
1152
+ },
1153
+ {
1154
+ "epoch": 0.016551347760591466,
1155
+ "grad_norm": 1.0946522951126099,
1156
+ "learning_rate": 9.193816943761029e-06,
1157
+ "loss": 0.3425,
1158
+ "step": 7400
1159
+ },
1160
+ {
1161
+ "epoch": 0.016663181191406272,
1162
+ "grad_norm": 0.7940588593482971,
1163
+ "learning_rate": 9.255945724297325e-06,
1164
+ "loss": 0.3276,
1165
+ "step": 7450
1166
+ },
1167
+ {
1168
+ "epoch": 0.016775014622221078,
1169
+ "grad_norm": 0.7163941264152527,
1170
+ "learning_rate": 9.318074504833619e-06,
1171
+ "loss": 0.3222,
1172
+ "step": 7500
1173
+ },
1174
+ {
1175
+ "epoch": 0.016775014622221078,
1176
+ "eval_loss": 0.328526109457016,
1177
+ "eval_runtime": 1452.7965,
1178
+ "eval_samples_per_second": 6.218,
1179
+ "eval_steps_per_second": 6.218,
1180
+ "step": 7500
1181
  }
1182
  ],
1183
  "logging_steps": 50,
 
1197
  "attributes": {}
1198
  }
1199
  },
1200
+ "total_flos": 8.206216593408e+16,
1201
  "train_batch_size": 1,
1202
  "trial_name": null,
1203
  "trial_params": null