Kabuyu commited on
Commit
bef429c
·
verified ·
1 Parent(s): 26382ce

End of training

Browse files
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": -0.030795110389590263,
4
- "train_runtime": 10341.1372,
5
- "train_samples": 88,
6
- "train_samples_per_second": 0.004,
7
  "train_steps_per_second": 0.001
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": -0.007105642557144165,
4
+ "train_runtime": 13988.242,
5
+ "train_samples": 104,
6
+ "train_samples_per_second": 0.003,
7
  "train_steps_per_second": 0.001
8
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8844bb76d8438458fc3e9ef30e981e4c63a3835196663aac3693c5e6a15bfe0
3
  size 1976163472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3cf4d7c87d918cf4ec9e91d4086e44b37784ce5614f96edb4cf2d11aa85e6df
3
  size 1976163472
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": -0.030795110389590263,
4
- "train_runtime": 10341.1372,
5
- "train_samples": 88,
6
- "train_samples_per_second": 0.004,
7
  "train_steps_per_second": 0.001
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": -0.007105642557144165,
4
+ "train_runtime": 13988.242,
5
+ "train_samples": 104,
6
+ "train_samples_per_second": 0.003,
7
  "train_steps_per_second": 0.001
8
  }
trainer_state.json CHANGED
@@ -2,7 +2,7 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.22727272727272727,
6
  "eval_steps": 500,
7
  "global_step": 10,
8
  "is_hyper_param_search": false,
@@ -16,21 +16,21 @@
16
  "clip_ratio/low_min": 0.0,
17
  "clip_ratio/region_mean": 0.0,
18
  "completions/clipped_ratio": 0.0,
19
- "completions/max_length": 74.5,
20
- "completions/max_terminated_length": 74.5,
21
- "completions/mean_length": 36.875,
22
- "completions/mean_terminated_length": 36.875,
23
- "completions/min_length": 4.0,
24
- "completions/min_terminated_length": 4.0,
25
- "epoch": 0.045454545454545456,
26
  "frac_reward_zero_std": 0.0,
27
- "grad_norm": 30.32355499267578,
28
  "kl": 0.0,
29
  "learning_rate": 5e-07,
30
- "loss": -0.0699,
31
- "num_tokens": 1319.0,
32
- "reward": 0.08947309106588364,
33
- "reward_std": 0.02952927676960826,
34
  "rewards/concensus_correctness_reward_func/mean": 0.0,
35
  "rewards/concensus_correctness_reward_func/std": 0.0,
36
  "rewards/consensus_reward_func/mean": 0.0,
@@ -39,8 +39,8 @@
39
  "rewards/cumulative_reward_2/std": 0.0,
40
  "rewards/final_correctness_reward_func/mean": 0.0,
41
  "rewards/final_correctness_reward_func/std": 0.0,
42
- "rewards/question_recreation_reward_func/mean": 0.08947309292852879,
43
- "rewards/question_recreation_reward_func/std": 0.02744971076026559,
44
  "rewards/soft_format_reward_func/mean": 0.0,
45
  "rewards/soft_format_reward_func/std": 0.0,
46
  "rewards/strict_format_reward_func/mean": 0.0,
@@ -55,22 +55,22 @@
55
  "clip_ratio/low_mean": 0.0,
56
  "clip_ratio/low_min": 0.0,
57
  "clip_ratio/region_mean": 0.0,
58
- "completions/clipped_ratio": 0.125,
59
- "completions/max_length": 130.5,
60
- "completions/max_terminated_length": 35.5,
61
- "completions/mean_length": 45.0,
62
- "completions/mean_terminated_length": 16.541666984558105,
63
- "completions/min_length": 6.5,
64
- "completions/min_terminated_length": 6.5,
65
- "epoch": 0.09090909090909091,
66
- "frac_reward_zero_std": 0.25,
67
- "grad_norm": 71.5344009399414,
68
- "kl": 0.0013015866788919084,
69
  "learning_rate": 4.415111107797445e-07,
70
- "loss": -0.0174,
71
- "num_tokens": 2703.0,
72
- "reward": 0.08281245455145836,
73
- "reward_std": 0.03817772259935737,
74
  "rewards/concensus_correctness_reward_func/mean": 0.0,
75
  "rewards/concensus_correctness_reward_func/std": 0.0,
76
  "rewards/consensus_reward_func/mean": 0.0,
@@ -79,8 +79,8 @@
79
  "rewards/cumulative_reward_2/std": 0.0,
80
  "rewards/final_correctness_reward_func/mean": 0.0,
81
  "rewards/final_correctness_reward_func/std": 0.0,
82
- "rewards/question_recreation_reward_func/mean": 0.08281246200203896,
83
- "rewards/question_recreation_reward_func/std": 0.04765166528522968,
84
  "rewards/soft_format_reward_func/mean": 0.0,
85
  "rewards/soft_format_reward_func/std": 0.0,
86
  "rewards/strict_format_reward_func/mean": 0.0,
@@ -95,22 +95,22 @@
95
  "clip_ratio/low_mean": 0.0,
96
  "clip_ratio/low_min": 0.0,
97
  "clip_ratio/region_mean": 0.0,
98
- "completions/clipped_ratio": 0.0,
99
- "completions/max_length": 177.5,
100
- "completions/max_terminated_length": 177.5,
101
- "completions/mean_length": 76.375,
102
- "completions/mean_terminated_length": 76.375,
103
- "completions/min_length": 23.5,
104
- "completions/min_terminated_length": 23.5,
105
- "epoch": 0.13636363636363635,
106
  "frac_reward_zero_std": 0.0,
107
- "grad_norm": 60.853816986083984,
108
- "kl": 0.0031654702906962484,
109
  "learning_rate": 2.934120444167326e-07,
110
- "loss": -0.0248,
111
- "num_tokens": 4338.0,
112
- "reward": 0.10044452454894781,
113
- "reward_std": 0.021916877012699842,
114
  "rewards/concensus_correctness_reward_func/mean": 0.0,
115
  "rewards/concensus_correctness_reward_func/std": 0.0,
116
  "rewards/consensus_reward_func/mean": 0.0,
@@ -119,8 +119,8 @@
119
  "rewards/cumulative_reward_2/std": 0.0,
120
  "rewards/final_correctness_reward_func/mean": 0.0,
121
  "rewards/final_correctness_reward_func/std": 0.0,
122
- "rewards/question_recreation_reward_func/mean": 0.10044452454894781,
123
- "rewards/question_recreation_reward_func/std": 0.024618458934128284,
124
  "rewards/soft_format_reward_func/mean": 0.0,
125
  "rewards/soft_format_reward_func/std": 0.0,
126
  "rewards/strict_format_reward_func/mean": 0.0,
@@ -136,21 +136,21 @@
136
  "clip_ratio/low_min": 0.0,
137
  "clip_ratio/region_mean": 0.0,
138
  "completions/clipped_ratio": 0.125,
139
- "completions/max_length": 210.5,
140
- "completions/max_terminated_length": 128.0,
141
- "completions/mean_length": 76.5,
142
- "completions/mean_terminated_length": 48.70833396911621,
143
- "completions/min_length": 4.5,
144
- "completions/min_terminated_length": 4.5,
145
- "epoch": 0.18181818181818182,
146
- "frac_reward_zero_std": 0.25,
147
- "grad_norm": 22.12053680419922,
148
- "kl": 0.0013654041031259112,
149
  "learning_rate": 1.2500000000000005e-07,
150
- "loss": 0.0056,
151
- "num_tokens": 5974.0,
152
- "reward": 0.029887909069657326,
153
- "reward_std": 0.018557347357273102,
154
  "rewards/concensus_correctness_reward_func/mean": 0.0,
155
  "rewards/concensus_correctness_reward_func/std": 0.0,
156
  "rewards/consensus_reward_func/mean": 0.0,
@@ -159,8 +159,8 @@
159
  "rewards/cumulative_reward_2/std": 0.0,
160
  "rewards/final_correctness_reward_func/mean": 0.0,
161
  "rewards/final_correctness_reward_func/std": 0.0,
162
- "rewards/question_recreation_reward_func/mean": 0.029887909069657326,
163
- "rewards/question_recreation_reward_func/std": 0.025635237339884043,
164
  "rewards/soft_format_reward_func/mean": 0.0,
165
  "rewards/soft_format_reward_func/std": 0.0,
166
  "rewards/strict_format_reward_func/mean": 0.0,
@@ -175,22 +175,22 @@
175
  "clip_ratio/low_mean": 0.0,
176
  "clip_ratio/low_min": 0.0,
177
  "clip_ratio/region_mean": 0.0,
178
- "completions/clipped_ratio": 0.0,
179
- "completions/max_length": 123.5,
180
- "completions/max_terminated_length": 123.5,
181
- "completions/mean_length": 41.625,
182
- "completions/mean_terminated_length": 41.625,
183
- "completions/min_length": 3.5,
184
- "completions/min_terminated_length": 3.5,
185
- "epoch": 0.22727272727272727,
186
- "frac_reward_zero_std": 0.25,
187
- "grad_norm": 64.85562133789062,
188
- "kl": 0.0105830222601071,
189
  "learning_rate": 1.507684480352292e-08,
190
- "loss": -0.0475,
191
- "num_tokens": 7331.0,
192
- "reward": 0.02319902554154396,
193
- "reward_std": 0.008738046046346426,
194
  "rewards/concensus_correctness_reward_func/mean": 0.0,
195
  "rewards/concensus_correctness_reward_func/std": 0.0,
196
  "rewards/consensus_reward_func/mean": 0.0,
@@ -199,8 +199,8 @@
199
  "rewards/cumulative_reward_2/std": 0.0,
200
  "rewards/final_correctness_reward_func/mean": 0.0,
201
  "rewards/final_correctness_reward_func/std": 0.0,
202
- "rewards/question_recreation_reward_func/mean": 0.02319902554154396,
203
- "rewards/question_recreation_reward_func/std": 0.010856612119823694,
204
  "rewards/soft_format_reward_func/mean": 0.0,
205
  "rewards/soft_format_reward_func/std": 0.0,
206
  "rewards/strict_format_reward_func/mean": 0.0,
@@ -210,18 +210,18 @@
210
  "step": 10
211
  },
212
  {
213
- "epoch": 0.22727272727272727,
214
  "step": 10,
215
  "total_flos": 0.0,
216
- "train_loss": -0.030795110389590263,
217
- "train_runtime": 10341.1372,
218
- "train_samples_per_second": 0.004,
219
  "train_steps_per_second": 0.001
220
  }
221
  ],
222
  "logging_steps": 2,
223
  "max_steps": 10,
224
- "num_input_tokens_seen": 7331,
225
  "num_train_epochs": 1,
226
  "save_steps": 10,
227
  "stateful_callbacks": {
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.19230769230769232,
6
  "eval_steps": 500,
7
  "global_step": 10,
8
  "is_hyper_param_search": false,
 
16
  "clip_ratio/low_min": 0.0,
17
  "clip_ratio/region_mean": 0.0,
18
  "completions/clipped_ratio": 0.0,
19
+ "completions/max_length": 71.0,
20
+ "completions/max_terminated_length": 71.0,
21
+ "completions/mean_length": 30.125,
22
+ "completions/mean_terminated_length": 30.125,
23
+ "completions/min_length": 10.0,
24
+ "completions/min_terminated_length": 10.0,
25
+ "epoch": 0.038461538461538464,
26
  "frac_reward_zero_std": 0.0,
27
+ "grad_norm": 26.099611282348633,
28
  "kl": 0.0,
29
  "learning_rate": 5e-07,
30
+ "loss": -0.1539,
31
+ "num_tokens": 1265.0,
32
+ "reward": 0.17112372070550919,
33
+ "reward_std": 0.06430763006210327,
34
  "rewards/concensus_correctness_reward_func/mean": 0.0,
35
  "rewards/concensus_correctness_reward_func/std": 0.0,
36
  "rewards/consensus_reward_func/mean": 0.0,
 
39
  "rewards/cumulative_reward_2/std": 0.0,
40
  "rewards/final_correctness_reward_func/mean": 0.0,
41
  "rewards/final_correctness_reward_func/std": 0.0,
42
+ "rewards/question_recreation_reward_func/mean": 0.17112372815608978,
43
+ "rewards/question_recreation_reward_func/std": 0.06673818826675415,
44
  "rewards/soft_format_reward_func/mean": 0.0,
45
  "rewards/soft_format_reward_func/std": 0.0,
46
  "rewards/strict_format_reward_func/mean": 0.0,
 
55
  "clip_ratio/low_mean": 0.0,
56
  "clip_ratio/low_min": 0.0,
57
  "clip_ratio/region_mean": 0.0,
58
+ "completions/clipped_ratio": 0.0,
59
+ "completions/max_length": 112.0,
60
+ "completions/max_terminated_length": 112.0,
61
+ "completions/mean_length": 80.125,
62
+ "completions/mean_terminated_length": 80.125,
63
+ "completions/min_length": 53.5,
64
+ "completions/min_terminated_length": 53.5,
65
+ "epoch": 0.07692307692307693,
66
+ "frac_reward_zero_std": 0.0,
67
+ "grad_norm": 15.406155586242676,
68
+ "kl": 0.001192556883324869,
69
  "learning_rate": 4.415111107797445e-07,
70
+ "loss": -0.0825,
71
+ "num_tokens": 2930.0,
72
+ "reward": 0.1593383327126503,
73
+ "reward_std": 0.04592637158930302,
74
  "rewards/concensus_correctness_reward_func/mean": 0.0,
75
  "rewards/concensus_correctness_reward_func/std": 0.0,
76
  "rewards/consensus_reward_func/mean": 0.0,
 
79
  "rewards/cumulative_reward_2/std": 0.0,
80
  "rewards/final_correctness_reward_func/mean": 0.0,
81
  "rewards/final_correctness_reward_func/std": 0.0,
82
+ "rewards/question_recreation_reward_func/mean": 0.1593383327126503,
83
+ "rewards/question_recreation_reward_func/std": 0.04842444974929094,
84
  "rewards/soft_format_reward_func/mean": 0.0,
85
  "rewards/soft_format_reward_func/std": 0.0,
86
  "rewards/strict_format_reward_func/mean": 0.0,
 
95
  "clip_ratio/low_mean": 0.0,
96
  "clip_ratio/low_min": 0.0,
97
  "clip_ratio/region_mean": 0.0,
98
+ "completions/clipped_ratio": 0.25,
99
+ "completions/max_length": 256.0,
100
+ "completions/max_terminated_length": 161.0,
101
+ "completions/mean_length": 132.5,
102
+ "completions/mean_terminated_length": 91.33333587646484,
103
+ "completions/min_length": 36.0,
104
+ "completions/min_terminated_length": 36.0,
105
+ "epoch": 0.11538461538461539,
106
  "frac_reward_zero_std": 0.0,
107
+ "grad_norm": 15.05298900604248,
108
+ "kl": 0.0028286711749387905,
109
  "learning_rate": 2.934120444167326e-07,
110
+ "loss": 0.0432,
111
+ "num_tokens": 5014.0,
112
+ "reward": 0.07234940817579627,
113
+ "reward_std": 0.022284463979303837,
114
  "rewards/concensus_correctness_reward_func/mean": 0.0,
115
  "rewards/concensus_correctness_reward_func/std": 0.0,
116
  "rewards/consensus_reward_func/mean": 0.0,
 
119
  "rewards/cumulative_reward_2/std": 0.0,
120
  "rewards/final_correctness_reward_func/mean": 0.0,
121
  "rewards/final_correctness_reward_func/std": 0.0,
122
+ "rewards/question_recreation_reward_func/mean": 0.07234940817579627,
123
+ "rewards/question_recreation_reward_func/std": 0.02770008658990264,
124
  "rewards/soft_format_reward_func/mean": 0.0,
125
  "rewards/soft_format_reward_func/std": 0.0,
126
  "rewards/strict_format_reward_func/mean": 0.0,
 
136
  "clip_ratio/low_min": 0.0,
137
  "clip_ratio/region_mean": 0.0,
138
  "completions/clipped_ratio": 0.125,
139
+ "completions/max_length": 174.0,
140
+ "completions/max_terminated_length": 94.5,
141
+ "completions/mean_length": 73.875,
142
+ "completions/mean_terminated_length": 50.375,
143
+ "completions/min_length": 27.0,
144
+ "completions/min_terminated_length": 27.0,
145
+ "epoch": 0.15384615384615385,
146
+ "frac_reward_zero_std": 0.0,
147
+ "grad_norm": 47.132999420166016,
148
+ "kl": 0.007395870692562312,
149
  "learning_rate": 1.2500000000000005e-07,
150
+ "loss": 0.0003,
151
+ "num_tokens": 6629.0,
152
+ "reward": 0.0922946073114872,
153
+ "reward_std": 0.03348500933498144,
154
  "rewards/concensus_correctness_reward_func/mean": 0.0,
155
  "rewards/concensus_correctness_reward_func/std": 0.0,
156
  "rewards/consensus_reward_func/mean": 0.0,
 
159
  "rewards/cumulative_reward_2/std": 0.0,
160
  "rewards/final_correctness_reward_func/mean": 0.0,
161
  "rewards/final_correctness_reward_func/std": 0.0,
162
+ "rewards/question_recreation_reward_func/mean": 0.0922946073114872,
163
+ "rewards/question_recreation_reward_func/std": 0.0378275141119957,
164
  "rewards/soft_format_reward_func/mean": 0.0,
165
  "rewards/soft_format_reward_func/std": 0.0,
166
  "rewards/strict_format_reward_func/mean": 0.0,
 
175
  "clip_ratio/low_mean": 0.0,
176
  "clip_ratio/low_min": 0.0,
177
  "clip_ratio/region_mean": 0.0,
178
+ "completions/clipped_ratio": 0.25,
179
+ "completions/max_length": 256.0,
180
+ "completions/max_terminated_length": 153.5,
181
+ "completions/mean_length": 127.875,
182
+ "completions/mean_terminated_length": 85.16666603088379,
183
+ "completions/min_length": 35.0,
184
+ "completions/min_terminated_length": 35.0,
185
+ "epoch": 0.19230769230769232,
186
+ "frac_reward_zero_std": 0.0,
187
+ "grad_norm": 15.468522071838379,
188
+ "kl": 0.005031221779063344,
189
  "learning_rate": 1.507684480352292e-08,
190
+ "loss": 0.1575,
191
+ "num_tokens": 8676.0,
192
+ "reward": 0.028173266910016537,
193
+ "reward_std": 0.012423289939761162,
194
  "rewards/concensus_correctness_reward_func/mean": 0.0,
195
  "rewards/concensus_correctness_reward_func/std": 0.0,
196
  "rewards/consensus_reward_func/mean": 0.0,
 
199
  "rewards/cumulative_reward_2/std": 0.0,
200
  "rewards/final_correctness_reward_func/mean": 0.0,
201
  "rewards/final_correctness_reward_func/std": 0.0,
202
+ "rewards/question_recreation_reward_func/mean": 0.028173265978693962,
203
+ "rewards/question_recreation_reward_func/std": 0.016421337611973286,
204
  "rewards/soft_format_reward_func/mean": 0.0,
205
  "rewards/soft_format_reward_func/std": 0.0,
206
  "rewards/strict_format_reward_func/mean": 0.0,
 
210
  "step": 10
211
  },
212
  {
213
+ "epoch": 0.19230769230769232,
214
  "step": 10,
215
  "total_flos": 0.0,
216
+ "train_loss": -0.007105642557144165,
217
+ "train_runtime": 13988.242,
218
+ "train_samples_per_second": 0.003,
219
  "train_steps_per_second": 0.001
220
  }
221
  ],
222
  "logging_steps": 2,
223
  "max_steps": 10,
224
+ "num_input_tokens_seen": 8676,
225
  "num_train_epochs": 1,
226
  "save_steps": 10,
227
  "stateful_callbacks": {