haedahae commited on
Commit
d8f7ffb
·
verified ·
1 Parent(s): 8d4c1db

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +4 -4
  2. model.safetensors +1 -1
  3. train_results.json +4 -4
  4. trainer_state.json +101 -101
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 9.117599711316871e-06,
4
- "train_runtime": 360.6804,
5
  "train_samples": 160,
6
- "train_samples_per_second": 0.887,
7
- "train_steps_per_second": 0.055
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 1.6838312149047852e-06,
4
+ "train_runtime": 318.2882,
5
  "train_samples": 160,
6
+ "train_samples_per_second": 1.005,
7
+ "train_steps_per_second": 0.063
8
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d256fae7e77041b593bee4d9ac36959593b8cf3440ad21d9f55a7812557ce754
3
  size 1976163472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8991881f2d6c28b94356372a3c54e0157e3d2dc517e5f8ac2da541cdd5834e7c
3
  size 1976163472
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 9.117599711316871e-06,
4
- "train_runtime": 360.6804,
5
  "train_samples": 160,
6
- "train_samples_per_second": 0.887,
7
- "train_steps_per_second": 0.055
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 1.6838312149047852e-06,
4
+ "train_runtime": 318.2882,
5
  "train_samples": 160,
6
+ "train_samples_per_second": 1.005,
7
+ "train_steps_per_second": 0.063
8
  }
trainer_state.json CHANGED
@@ -10,203 +10,203 @@
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
- "completion_length": 364.1875,
14
  "epoch": 0.1,
15
- "grad_norm": 7.79181432723999,
16
  "kl": 0.0,
17
  "learning_rate": 5e-07,
18
  "loss": -0.0,
19
- "reward": 0.39489845104981214,
20
- "reward_std": 0.9017761385766789,
21
- "rewards/concensus_correctness_reward_func": 0.0,
22
- "rewards/consensus_reward_func": 0.0,
23
  "rewards/cumulative_reward_2": 0.0,
24
- "rewards/final_correctness_reward_func": 0.1875,
25
- "rewards/question_recreation_reward_func": 0.36814845085609704,
26
- "rewards/soft_format_reward_func": 0.0,
27
  "rewards/strict_format_reward_func": 0.0,
28
- "rewards/xmlcount_reward_func": -0.16074999328702688,
29
  "step": 2
30
  },
31
  {
32
- "completion_length": 432.375,
33
  "epoch": 0.2,
34
- "grad_norm": 9.590618133544922,
35
- "kl": 0.0014355634102685144,
36
  "learning_rate": 4.864543104251586e-07,
37
  "loss": 0.0,
38
- "reward": 0.7185110447462648,
39
- "reward_std": 0.732723499299027,
40
- "rewards/concensus_correctness_reward_func": 0.008187499828636646,
41
- "rewards/consensus_reward_func": 0.0,
42
  "rewards/cumulative_reward_2": 0.0,
43
- "rewards/final_correctness_reward_func": 0.0625,
44
- "rewards/question_recreation_reward_func": 0.3436360324267298,
45
  "rewards/soft_format_reward_func": 0.0,
46
  "rewards/strict_format_reward_func": 0.0,
47
- "rewards/xmlcount_reward_func": 0.30418750178068876,
48
  "step": 4
49
  },
50
  {
51
- "completion_length": 382.71875,
52
  "epoch": 0.3,
53
- "grad_norm": 4.208951473236084,
54
- "kl": 0.0012705847912002355,
55
  "learning_rate": 4.472851273490984e-07,
56
  "loss": 0.0,
57
- "reward": 0.6679772045463324,
58
- "reward_std": 0.8955608154865331,
59
- "rewards/concensus_correctness_reward_func": 0.045499999076128006,
60
  "rewards/consensus_reward_func": 0.0625,
61
  "rewards/cumulative_reward_2": 0.0,
62
- "rewards/final_correctness_reward_func": 0.125,
63
- "rewards/question_recreation_reward_func": 0.3797584424028173,
64
  "rewards/soft_format_reward_func": 0.0,
65
  "rewards/strict_format_reward_func": 0.0,
66
- "rewards/xmlcount_reward_func": 0.05521875782869756,
67
  "step": 6
68
  },
69
  {
70
- "completion_length": 457.59375,
71
  "epoch": 0.4,
72
- "grad_norm": 4.035867691040039,
73
- "kl": 0.002204360036557773,
74
  "learning_rate": 3.867370395306068e-07,
75
  "loss": 0.0,
76
- "reward": 0.5797329153865576,
77
- "reward_std": 0.6650298394015408,
78
- "rewards/concensus_correctness_reward_func": 0.024437500163912773,
79
- "rewards/consensus_reward_func": 0.0,
80
  "rewards/cumulative_reward_2": 0.0,
81
  "rewards/final_correctness_reward_func": 0.0,
82
- "rewards/question_recreation_reward_func": 0.4533579223207198,
83
  "rewards/soft_format_reward_func": 0.0,
84
  "rewards/strict_format_reward_func": 0.0,
85
- "rewards/xmlcount_reward_func": 0.101937499595806,
86
  "step": 8
87
  },
88
  {
89
- "completion_length": 542.875,
90
  "epoch": 0.5,
91
- "grad_norm": 4.634764671325684,
92
- "kl": 0.005208585236687213,
93
  "learning_rate": 3.1137137178519977e-07,
94
  "loss": 0.0,
95
- "reward": 0.9039936233311892,
96
- "reward_std": 1.0827069451333955,
97
- "rewards/concensus_correctness_reward_func": 0.0,
98
- "rewards/consensus_reward_func": 0.0,
99
  "rewards/cumulative_reward_2": 0.0,
100
- "rewards/final_correctness_reward_func": 0.0625,
101
- "rewards/question_recreation_reward_func": 0.5436498466879129,
102
  "rewards/soft_format_reward_func": 0.0,
103
- "rewards/strict_format_reward_func": 0.015625,
104
- "rewards/xmlcount_reward_func": 0.28221875708550215,
105
  "step": 10
106
  },
107
  {
108
- "completion_length": 345.46875,
109
  "epoch": 0.6,
110
- "grad_norm": 6.202835559844971,
111
- "kl": 0.02693040803569602,
112
  "learning_rate": 2.2935516363191693e-07,
113
  "loss": 0.0,
114
- "reward": 0.618082148488611,
115
- "reward_std": 0.7239898710104171,
116
- "rewards/concensus_correctness_reward_func": 0.024687500670552254,
117
- "rewards/consensus_reward_func": 0.125,
118
  "rewards/cumulative_reward_2": 0.0,
119
- "rewards/final_correctness_reward_func": 0.0,
120
- "rewards/question_recreation_reward_func": 0.2905508743133396,
121
  "rewards/soft_format_reward_func": 0.0,
122
- "rewards/strict_format_reward_func": 0.015625,
123
- "rewards/xmlcount_reward_func": 0.16221874905750155,
124
  "step": 12
125
  },
126
  {
127
- "completion_length": 437.09375,
128
  "epoch": 0.7,
129
- "grad_norm": 4.228725910186768,
130
- "kl": 0.006790324652683921,
131
  "learning_rate": 1.4957614383675767e-07,
132
  "loss": 0.0,
133
- "reward": 0.5991453088354319,
134
- "reward_std": 0.561388993752189,
135
- "rewards/concensus_correctness_reward_func": 0.021687500178813934,
136
- "rewards/consensus_reward_func": 0.0,
137
  "rewards/cumulative_reward_2": 0.0,
138
- "rewards/final_correctness_reward_func": 0.0,
139
- "rewards/question_recreation_reward_func": 0.3984578078961931,
140
- "rewards/soft_format_reward_func": 0.015625,
141
  "rewards/strict_format_reward_func": 0.0,
142
- "rewards/xmlcount_reward_func": 0.16337500140070915,
143
  "step": 14
144
  },
145
  {
146
- "completion_length": 378.78125,
147
  "epoch": 0.8,
148
- "grad_norm": 7.585692882537842,
149
- "kl": 0.010340058604924707,
150
  "learning_rate": 8.067960709356478e-08,
151
  "loss": 0.0,
152
- "reward": 0.7918121283873916,
153
- "reward_std": 0.7190128332586028,
154
- "rewards/concensus_correctness_reward_func": 0.03837500046938658,
155
- "rewards/consensus_reward_func": 0.0625,
156
  "rewards/cumulative_reward_2": 0.0,
157
  "rewards/final_correctness_reward_func": 0.1875,
158
- "rewards/question_recreation_reward_func": 0.38859339867485687,
159
  "rewards/soft_format_reward_func": 0.0,
160
  "rewards/strict_format_reward_func": 0.0,
161
- "rewards/xmlcount_reward_func": 0.11484375223517418,
162
  "step": 16
163
  },
164
  {
165
- "completion_length": 405.21875,
166
  "epoch": 0.9,
167
- "grad_norm": 3.7810328006744385,
168
- "kl": 0.005060435782070272,
169
  "learning_rate": 3.013156219837776e-08,
170
  "loss": 0.0,
171
- "reward": 0.6734253508038819,
172
- "reward_std": 0.8879609380383044,
173
- "rewards/concensus_correctness_reward_func": 0.045625001192092896,
174
  "rewards/consensus_reward_func": 0.0,
175
  "rewards/cumulative_reward_2": 0.0,
176
- "rewards/final_correctness_reward_func": 0.125,
177
- "rewards/question_recreation_reward_func": 0.4548003734089434,
178
  "rewards/soft_format_reward_func": 0.0,
179
  "rewards/strict_format_reward_func": 0.0,
180
- "rewards/xmlcount_reward_func": 0.04799999948590994,
181
  "step": 18
182
  },
183
  {
184
- "completion_length": 416.15625,
185
  "epoch": 1.0,
186
- "grad_norm": 5.279428005218506,
187
- "kl": 0.03254748803738039,
188
  "learning_rate": 3.4096741493194193e-09,
189
  "loss": 0.0,
190
- "reward": 2.115490459022112,
191
- "reward_std": 1.9778084652352845,
192
- "rewards/concensus_correctness_reward_func": 0.902562496252358,
193
  "rewards/consensus_reward_func": 0.1875,
194
  "rewards/cumulative_reward_2": 0.0,
195
  "rewards/final_correctness_reward_func": 0.1875,
196
- "rewards/question_recreation_reward_func": 0.5558030122192577,
197
  "rewards/soft_format_reward_func": 0.0,
198
  "rewards/strict_format_reward_func": 0.0,
199
- "rewards/xmlcount_reward_func": 0.28212499618530273,
200
  "step": 20
201
  },
202
  {
203
  "epoch": 1.0,
204
  "step": 20,
205
  "total_flos": 0.0,
206
- "train_loss": 9.117599711316871e-06,
207
- "train_runtime": 360.6804,
208
- "train_samples_per_second": 0.887,
209
- "train_steps_per_second": 0.055
210
  }
211
  ],
212
  "logging_steps": 2,
 
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
+ "completion_length": 351.71875,
14
  "epoch": 0.1,
15
+ "grad_norm": 8.513227462768555,
16
  "kl": 0.0,
17
  "learning_rate": 5e-07,
18
  "loss": -0.0,
19
+ "reward": 0.8320725136436522,
20
+ "reward_std": 0.8263479982997524,
21
+ "rewards/concensus_correctness_reward_func": 0.0755624994635582,
22
+ "rewards/consensus_reward_func": 0.1875,
23
  "rewards/cumulative_reward_2": 0.0,
24
+ "rewards/final_correctness_reward_func": 0.0625,
25
+ "rewards/question_recreation_reward_func": 0.37107251823181286,
26
+ "rewards/soft_format_reward_func": 0.015625,
27
  "rewards/strict_format_reward_func": 0.0,
28
+ "rewards/xmlcount_reward_func": 0.11981249786913395,
29
  "step": 2
30
  },
31
  {
32
+ "completion_length": 403.84375,
33
  "epoch": 0.2,
34
+ "grad_norm": 4.751884460449219,
35
+ "kl": 0.0016504357699886896,
36
  "learning_rate": 4.864543104251586e-07,
37
  "loss": 0.0,
38
+ "reward": 0.8297341071302071,
39
+ "reward_std": 0.9467946260992903,
40
+ "rewards/concensus_correctness_reward_func": 0.10125000029802322,
41
+ "rewards/consensus_reward_func": 0.125,
42
  "rewards/cumulative_reward_2": 0.0,
43
+ "rewards/final_correctness_reward_func": 0.0,
44
+ "rewards/question_recreation_reward_func": 0.40404659567866474,
45
  "rewards/soft_format_reward_func": 0.0,
46
  "rewards/strict_format_reward_func": 0.0,
47
+ "rewards/xmlcount_reward_func": 0.19943749252706766,
48
  "step": 4
49
  },
50
  {
51
+ "completion_length": 401.75,
52
  "epoch": 0.3,
53
+ "grad_norm": 3.9849419593811035,
54
+ "kl": 0.0013705312012461945,
55
  "learning_rate": 4.472851273490984e-07,
56
  "loss": 0.0,
57
+ "reward": 0.7022149509284645,
58
+ "reward_std": 0.5484118369640782,
59
+ "rewards/concensus_correctness_reward_func": 0.0,
60
  "rewards/consensus_reward_func": 0.0625,
61
  "rewards/cumulative_reward_2": 0.0,
62
+ "rewards/final_correctness_reward_func": 0.1875,
63
+ "rewards/question_recreation_reward_func": 0.4711212064139545,
64
  "rewards/soft_format_reward_func": 0.0,
65
  "rewards/strict_format_reward_func": 0.0,
66
+ "rewards/xmlcount_reward_func": -0.018906254321336746,
67
  "step": 6
68
  },
69
  {
70
+ "completion_length": 448.03125,
71
  "epoch": 0.4,
72
+ "grad_norm": 3.4099152088165283,
73
+ "kl": 0.0012933166872244328,
74
  "learning_rate": 3.867370395306068e-07,
75
  "loss": 0.0,
76
+ "reward": 0.8546537263318896,
77
+ "reward_std": 0.7673704327316955,
78
+ "rewards/concensus_correctness_reward_func": 0.0064999996684491634,
79
+ "rewards/consensus_reward_func": 0.1875,
80
  "rewards/cumulative_reward_2": 0.0,
81
  "rewards/final_correctness_reward_func": 0.0,
82
+ "rewards/question_recreation_reward_func": 0.38755996932741255,
83
  "rewards/soft_format_reward_func": 0.0,
84
  "rewards/strict_format_reward_func": 0.0,
85
+ "rewards/xmlcount_reward_func": 0.2730937581509352,
86
  "step": 8
87
  },
88
  {
89
+ "completion_length": 408.53125,
90
  "epoch": 0.5,
91
+ "grad_norm": 4.682156085968018,
92
+ "kl": 0.0020962205962860025,
93
  "learning_rate": 3.1137137178519977e-07,
94
  "loss": 0.0,
95
+ "reward": 2.0500756152905524,
96
+ "reward_std": 1.4114575510611758,
97
+ "rewards/concensus_correctness_reward_func": 0.892937496304512,
98
+ "rewards/consensus_reward_func": 0.25,
99
  "rewards/cumulative_reward_2": 0.0,
100
+ "rewards/final_correctness_reward_func": 0.25,
101
+ "rewards/question_recreation_reward_func": 0.339450626866892,
102
  "rewards/soft_format_reward_func": 0.0,
103
+ "rewards/strict_format_reward_func": 0.0,
104
+ "rewards/xmlcount_reward_func": 0.3176874998025596,
105
  "step": 10
106
  },
107
  {
108
+ "completion_length": 356.15625,
109
  "epoch": 0.6,
110
+ "grad_norm": 7.706838607788086,
111
+ "kl": 0.0017779090485419147,
112
  "learning_rate": 2.2935516363191693e-07,
113
  "loss": 0.0,
114
+ "reward": 0.7448974205181003,
115
+ "reward_std": 0.6400141692720354,
116
+ "rewards/concensus_correctness_reward_func": 0.031375001184642315,
117
+ "rewards/consensus_reward_func": 0.0,
118
  "rewards/cumulative_reward_2": 0.0,
119
+ "rewards/final_correctness_reward_func": 0.125,
120
+ "rewards/question_recreation_reward_func": 0.3690224272431806,
121
  "rewards/soft_format_reward_func": 0.0,
122
+ "rewards/strict_format_reward_func": 0.0,
123
+ "rewards/xmlcount_reward_func": 0.2195000002393499,
124
  "step": 12
125
  },
126
  {
127
+ "completion_length": 444.375,
128
  "epoch": 0.7,
129
+ "grad_norm": 3.907611131668091,
130
+ "kl": 0.0013627830157929566,
131
  "learning_rate": 1.4957614383675767e-07,
132
  "loss": 0.0,
133
+ "reward": 2.271299608051777,
134
+ "reward_std": 1.8669061198015697,
135
+ "rewards/concensus_correctness_reward_func": 0.7388749998062849,
136
+ "rewards/consensus_reward_func": 0.4375,
137
  "rewards/cumulative_reward_2": 0.0,
138
+ "rewards/final_correctness_reward_func": 0.25,
139
+ "rewards/question_recreation_reward_func": 0.6308308928273618,
140
+ "rewards/soft_format_reward_func": 0.0,
141
  "rewards/strict_format_reward_func": 0.0,
142
+ "rewards/xmlcount_reward_func": 0.21409375220537186,
143
  "step": 14
144
  },
145
  {
146
+ "completion_length": 318.375,
147
  "epoch": 0.8,
148
+ "grad_norm": 4.985864162445068,
149
+ "kl": 0.002357518140343018,
150
  "learning_rate": 8.067960709356478e-08,
151
  "loss": 0.0,
152
+ "reward": 1.5525614071812015,
153
+ "reward_std": 0.9043538695550524,
154
+ "rewards/concensus_correctness_reward_func": 0.20749999582767487,
155
+ "rewards/consensus_reward_func": 0.3125,
156
  "rewards/cumulative_reward_2": 0.0,
157
  "rewards/final_correctness_reward_func": 0.1875,
158
+ "rewards/question_recreation_reward_func": 0.4891551311593503,
159
  "rewards/soft_format_reward_func": 0.0,
160
  "rewards/strict_format_reward_func": 0.0,
161
+ "rewards/xmlcount_reward_func": 0.3559062508866191,
162
  "step": 16
163
  },
164
  {
165
+ "completion_length": 335.875,
166
  "epoch": 0.9,
167
+ "grad_norm": 10.401203155517578,
168
+ "kl": 0.0028135806787759066,
169
  "learning_rate": 3.013156219837776e-08,
170
  "loss": 0.0,
171
+ "reward": 1.749845893587917,
172
+ "reward_std": 1.5136296135606244,
173
+ "rewards/concensus_correctness_reward_func": 0.6584999989718199,
174
  "rewards/consensus_reward_func": 0.0,
175
  "rewards/cumulative_reward_2": 0.0,
176
+ "rewards/final_correctness_reward_func": 0.1875,
177
+ "rewards/question_recreation_reward_func": 0.4342209289316088,
178
  "rewards/soft_format_reward_func": 0.0,
179
  "rewards/strict_format_reward_func": 0.0,
180
+ "rewards/xmlcount_reward_func": 0.46962500223889947,
181
  "step": 18
182
  },
183
  {
184
+ "completion_length": 429.78125,
185
  "epoch": 1.0,
186
+ "grad_norm": 4.305922508239746,
187
+ "kl": 0.0020146886017755605,
188
  "learning_rate": 3.4096741493194193e-09,
189
  "loss": 0.0,
190
+ "reward": 1.6820365519961342,
191
+ "reward_std": 1.639612780767493,
192
+ "rewards/concensus_correctness_reward_func": 0.7378125041723251,
193
  "rewards/consensus_reward_func": 0.1875,
194
  "rewards/cumulative_reward_2": 0.0,
195
  "rewards/final_correctness_reward_func": 0.1875,
196
+ "rewards/question_recreation_reward_func": 0.42944281999371015,
197
  "rewards/soft_format_reward_func": 0.0,
198
  "rewards/strict_format_reward_func": 0.0,
199
+ "rewards/xmlcount_reward_func": 0.13978125574067235,
200
  "step": 20
201
  },
202
  {
203
  "epoch": 1.0,
204
  "step": 20,
205
  "total_flos": 0.0,
206
+ "train_loss": 1.6838312149047852e-06,
207
+ "train_runtime": 318.2882,
208
+ "train_samples_per_second": 1.005,
209
+ "train_steps_per_second": 0.063
210
  }
211
  ],
212
  "logging_steps": 2,