Starrrrrry commited on
Commit
5f59383
·
verified ·
1 Parent(s): cdbfd51

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. model.safetensors +3 -0
  2. trainer_state.json +614 -0
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ed10fd4d99df33fd3bc230d6d04f23bcfbe3283c2de9e96ff766e9ef7b7052c
3
+ size 29214685368
trainer_state.json ADDED
@@ -0,0 +1,614 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 0.25749346208006435,
6
+ "eval_steps": 500,
7
+ "global_step": 20,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "clip_ratio/high_max": 0.0,
14
+ "clip_ratio/high_mean": 0.0,
15
+ "clip_ratio/low_mean": 0.0,
16
+ "clip_ratio/low_min": 0.0,
17
+ "clip_ratio/region_mean": 0.0,
18
+ "completions/clipped_ratio": 0.1611328125,
19
+ "completions/max_length": 10637.0,
20
+ "completions/mean_length": 1210.8232421875,
21
+ "completions/min_length": 21.0,
22
+ "depth_nums": 0.1611328125,
23
+ "entropy": 6.012878332287073,
24
+ "epoch": 0.012874673104003219,
25
+ "grad_norm": 20.75629234313965,
26
+ "learning_rate": 3e-06,
27
+ "loss": -0.0,
28
+ "num_tokens": 4379765.0,
29
+ "reward": 1.2593750953674316,
30
+ "reward_std": 0.6255239248275757,
31
+ "rewards/accuracy_reward_with_llm/mean": 0.556640625,
32
+ "rewards/accuracy_reward_with_llm/std": 0.4970242381095886,
33
+ "rewards/format_reward/mean": 0.7021484375,
34
+ "rewards/format_reward/std": 0.45753735303878784,
35
+ "rewards/visual_gain_reward/mean": 0.0005859375232830644,
36
+ "rewards/visual_gain_reward/std": 0.010814730077981949,
37
+ "segmentation_nums": 0.15234375,
38
+ "step": 1,
39
+ "total_gen_nums": 0.3134765625
40
+ },
41
+ {
42
+ "clip_ratio/high_max": 0.0,
43
+ "clip_ratio/high_mean": 0.0,
44
+ "clip_ratio/low_mean": 0.0,
45
+ "clip_ratio/low_min": 0.0,
46
+ "clip_ratio/region_mean": 0.0,
47
+ "completions/clipped_ratio": 0.1845703125,
48
+ "completions/max_length": 11014.0,
49
+ "completions/mean_length": 1261.671875,
50
+ "completions/min_length": 18.0,
51
+ "depth_nums": 0.1767578125,
52
+ "entropy": 5.868425816297531,
53
+ "epoch": 0.025749346208006438,
54
+ "grad_norm": 28.14173126220703,
55
+ "learning_rate": 2.9987834972573546e-06,
56
+ "loss": 0.0001,
57
+ "num_tokens": 8647497.0,
58
+ "reward": 1.1925780773162842,
59
+ "reward_std": 0.6145570278167725,
60
+ "rewards/accuracy_reward_with_llm/mean": 0.5205078125,
61
+ "rewards/accuracy_reward_with_llm/std": 0.49982336163520813,
62
+ "rewards/format_reward/mean": 0.671875,
63
+ "rewards/format_reward/std": 0.4697600305080414,
64
+ "rewards/visual_gain_reward/mean": 0.00019531250291038305,
65
+ "rewards/visual_gain_reward/std": 0.0062500000931322575,
66
+ "segmentation_nums": 0.142578125,
67
+ "step": 2,
68
+ "total_gen_nums": 0.3193359375
69
+ },
70
+ {
71
+ "clip_ratio/high_max": 0.0,
72
+ "clip_ratio/high_mean": 0.0,
73
+ "clip_ratio/low_mean": 0.0,
74
+ "clip_ratio/low_min": 0.0,
75
+ "clip_ratio/region_mean": 0.0,
76
+ "completions/clipped_ratio": 0.16796875,
77
+ "completions/max_length": 8707.0,
78
+ "completions/mean_length": 1188.65625,
79
+ "completions/min_length": 18.0,
80
+ "depth_nums": 0.2060546875,
81
+ "entropy": 5.8414958491921425,
82
+ "epoch": 0.03862401931200966,
83
+ "grad_norm": 33.402503967285156,
84
+ "learning_rate": 2.9951359622013153e-06,
85
+ "loss": 0.0,
86
+ "num_tokens": 12897429.0,
87
+ "reward": 1.220117211341858,
88
+ "reward_std": 0.6342688798904419,
89
+ "rewards/accuracy_reward_with_llm/mean": 0.5419921875,
90
+ "rewards/accuracy_reward_with_llm/std": 0.4984769821166992,
91
+ "rewards/format_reward/mean": 0.677734375,
92
+ "rewards/format_reward/std": 0.4675724506378174,
93
+ "rewards/visual_gain_reward/mean": 0.0003906250058207661,
94
+ "rewards/visual_gain_reward/std": 0.008834514766931534,
95
+ "segmentation_nums": 0.119140625,
96
+ "step": 3,
97
+ "total_gen_nums": 0.3251953125
98
+ },
99
+ {
100
+ "clip_ratio/high_max": 0.0,
101
+ "clip_ratio/high_mean": 0.0,
102
+ "clip_ratio/low_mean": 0.0,
103
+ "clip_ratio/low_min": 0.0,
104
+ "clip_ratio/region_mean": 0.0,
105
+ "completions/clipped_ratio": 0.1435546875,
106
+ "completions/max_length": 8885.0,
107
+ "completions/mean_length": 1253.751953125,
108
+ "completions/min_length": 24.0,
109
+ "depth_nums": 0.21484375,
110
+ "entropy": 5.834823831915855,
111
+ "epoch": 0.051498692416012876,
112
+ "grad_norm": 13.457406997680664,
113
+ "learning_rate": 2.989063311147081e-06,
114
+ "loss": -0.0,
115
+ "num_tokens": 17253257.0,
116
+ "reward": 1.2726562023162842,
117
+ "reward_std": 0.6112420558929443,
118
+ "rewards/accuracy_reward_with_llm/mean": 0.5498046875,
119
+ "rewards/accuracy_reward_with_llm/std": 0.4977564215660095,
120
+ "rewards/format_reward/mean": 0.72265625,
121
+ "rewards/format_reward/std": 0.44790637493133545,
122
+ "rewards/visual_gain_reward/mean": 0.00019531250291038305,
123
+ "rewards/visual_gain_reward/std": 0.0062500000931322575,
124
+ "segmentation_nums": 0.1337890625,
125
+ "step": 4,
126
+ "total_gen_nums": 0.3486328125
127
+ },
128
+ {
129
+ "clip_ratio/high_max": 0.0,
130
+ "clip_ratio/high_mean": 0.0,
131
+ "clip_ratio/low_mean": 0.0,
132
+ "clip_ratio/low_min": 0.0,
133
+ "clip_ratio/region_mean": 0.0,
134
+ "completions/clipped_ratio": 0.140625,
135
+ "completions/max_length": 8690.0,
136
+ "completions/mean_length": 1261.009765625,
137
+ "completions/min_length": 16.0,
138
+ "depth_nums": 0.2158203125,
139
+ "entropy": 5.8213246911764145,
140
+ "epoch": 0.06437336552001609,
141
+ "grad_norm": 12.64630126953125,
142
+ "learning_rate": 2.9805753939568693e-06,
143
+ "loss": 0.0001,
144
+ "num_tokens": 21637837.0,
145
+ "reward": 1.2509765625,
146
+ "reward_std": 0.6066232323646545,
147
+ "rewards/accuracy_reward_with_llm/mean": 0.51953125,
148
+ "rewards/accuracy_reward_with_llm/std": 0.49986252188682556,
149
+ "rewards/format_reward/mean": 0.7314453125,
150
+ "rewards/format_reward/std": 0.4434242844581604,
151
+ "rewards/visual_gain_reward/mean": 0.0,
152
+ "rewards/visual_gain_reward/std": 0.0,
153
+ "segmentation_nums": 0.1279296875,
154
+ "step": 5,
155
+ "total_gen_nums": 0.34375
156
+ },
157
+ {
158
+ "clip_ratio/high_max": 0.0,
159
+ "clip_ratio/high_mean": 0.0,
160
+ "clip_ratio/low_mean": 0.0,
161
+ "clip_ratio/low_min": 0.0,
162
+ "clip_ratio/region_mean": 0.0,
163
+ "completions/clipped_ratio": 0.1494140625,
164
+ "completions/max_length": 8820.0,
165
+ "completions/mean_length": 1227.6279296875,
166
+ "completions/min_length": 20.0,
167
+ "depth_nums": 0.1904296875,
168
+ "entropy": 5.848804991692305,
169
+ "epoch": 0.07724803862401931,
170
+ "grad_norm": 10.392542839050293,
171
+ "learning_rate": 2.9696859780634016e-06,
172
+ "loss": 0.0,
173
+ "num_tokens": 25954420.0,
174
+ "reward": 1.3287110328674316,
175
+ "reward_std": 0.5927157998085022,
176
+ "rewards/accuracy_reward_with_llm/mean": 0.5888671875,
177
+ "rewards/accuracy_reward_with_llm/std": 0.49227967858314514,
178
+ "rewards/format_reward/mean": 0.7392578125,
179
+ "rewards/format_reward/std": 0.4392540454864502,
180
+ "rewards/visual_gain_reward/mean": 0.0005859375232830644,
181
+ "rewards/visual_gain_reward/std": 0.010814730077981949,
182
+ "segmentation_nums": 0.1474609375,
183
+ "step": 6,
184
+ "total_gen_nums": 0.337890625
185
+ },
186
+ {
187
+ "clip_ratio/high_max": 0.0,
188
+ "clip_ratio/high_mean": 0.0,
189
+ "clip_ratio/low_mean": 0.0,
190
+ "clip_ratio/low_min": 0.0,
191
+ "clip_ratio/region_mean": 0.0,
192
+ "completions/clipped_ratio": 0.076171875,
193
+ "completions/max_length": 8984.0,
194
+ "completions/mean_length": 1092.6708984375,
195
+ "completions/min_length": 22.0,
196
+ "depth_nums": 0.162109375,
197
+ "entropy": 5.889556594192982,
198
+ "epoch": 0.09012271172802253,
199
+ "grad_norm": 62.187469482421875,
200
+ "learning_rate": 2.956412726139078e-06,
201
+ "loss": 0.0,
202
+ "num_tokens": 30148439.0,
203
+ "reward": 1.4267578125,
204
+ "reward_std": 0.5138623118400574,
205
+ "rewards/accuracy_reward_with_llm/mean": 0.5888671875,
206
+ "rewards/accuracy_reward_with_llm/std": 0.49227967858314514,
207
+ "rewards/format_reward/mean": 0.837890625,
208
+ "rewards/format_reward/std": 0.36873120069503784,
209
+ "rewards/visual_gain_reward/mean": 0.0,
210
+ "rewards/visual_gain_reward/std": 0.0,
211
+ "segmentation_nums": 0.146484375,
212
+ "step": 7,
213
+ "total_gen_nums": 0.30859375
214
+ },
215
+ {
216
+ "clip_ratio/high_max": 0.0,
217
+ "clip_ratio/high_mean": 0.0,
218
+ "clip_ratio/low_mean": 0.0,
219
+ "clip_ratio/low_min": 0.0,
220
+ "clip_ratio/region_mean": 0.0,
221
+ "completions/clipped_ratio": 0.09375,
222
+ "completions/max_length": 9044.0,
223
+ "completions/mean_length": 1225.798828125,
224
+ "completions/min_length": 26.0,
225
+ "depth_nums": 0.21875,
226
+ "entropy": 5.749105926603079,
227
+ "epoch": 0.10299738483202575,
228
+ "grad_norm": 4.391378402709961,
229
+ "learning_rate": 2.9407771674470586e-06,
230
+ "loss": 0.0,
231
+ "num_tokens": 34471603.0,
232
+ "reward": 1.3777344226837158,
233
+ "reward_std": 0.5527065992355347,
234
+ "rewards/accuracy_reward_with_llm/mean": 0.568359375,
235
+ "rewards/accuracy_reward_with_llm/std": 0.49554696679115295,
236
+ "rewards/format_reward/mean": 0.80859375,
237
+ "rewards/format_reward/std": 0.3936002850532532,
238
+ "rewards/visual_gain_reward/mean": 0.0007812500116415322,
239
+ "rewards/visual_gain_reward/std": 0.012481658719480038,
240
+ "segmentation_nums": 0.1279296875,
241
+ "step": 8,
242
+ "total_gen_nums": 0.3466796875
243
+ },
244
+ {
245
+ "clip_ratio/high_max": 0.0,
246
+ "clip_ratio/high_mean": 0.0,
247
+ "clip_ratio/low_mean": 0.0,
248
+ "clip_ratio/low_min": 0.0,
249
+ "clip_ratio/region_mean": 0.0,
250
+ "completions/clipped_ratio": 0.080078125,
251
+ "completions/max_length": 8989.0,
252
+ "completions/mean_length": 1115.748046875,
253
+ "completions/min_length": 9.0,
254
+ "depth_nums": 0.185546875,
255
+ "entropy": 5.770555350929499,
256
+ "epoch": 0.11587205793602896,
257
+ "grad_norm": 5.756628036499023,
258
+ "learning_rate": 2.922804662920718e-06,
259
+ "loss": -0.0001,
260
+ "num_tokens": 38669155.0,
261
+ "reward": 1.4249999523162842,
262
+ "reward_std": 0.5238240361213684,
263
+ "rewards/accuracy_reward_with_llm/mean": 0.5947265625,
264
+ "rewards/accuracy_reward_with_llm/std": 0.4911847710609436,
265
+ "rewards/format_reward/mean": 0.830078125,
266
+ "rewards/format_reward/std": 0.3757476806640625,
267
+ "rewards/visual_gain_reward/mean": 0.00019531250291038305,
268
+ "rewards/visual_gain_reward/std": 0.006250000558793545,
269
+ "segmentation_nums": 0.134765625,
270
+ "step": 9,
271
+ "total_gen_nums": 0.3203125
272
+ },
273
+ {
274
+ "clip_ratio/high_max": 0.0,
275
+ "clip_ratio/high_mean": 0.0,
276
+ "clip_ratio/low_mean": 0.0,
277
+ "clip_ratio/low_min": 0.0,
278
+ "clip_ratio/region_mean": 0.0,
279
+ "completions/clipped_ratio": 0.0810546875,
280
+ "completions/max_length": 8784.0,
281
+ "completions/mean_length": 1067.3955078125,
282
+ "completions/min_length": 20.0,
283
+ "depth_nums": 0.19921875,
284
+ "entropy": 5.8339343555271626,
285
+ "epoch": 0.12874673104003218,
286
+ "grad_norm": 3.002086877822876,
287
+ "learning_rate": 2.9025243640281224e-06,
288
+ "loss": -0.0,
289
+ "num_tokens": 42779380.0,
290
+ "reward": 1.4367187023162842,
291
+ "reward_std": 0.4874913692474365,
292
+ "rewards/accuracy_reward_with_llm/mean": 0.5869140625,
293
+ "rewards/accuracy_reward_with_llm/std": 0.4926286041736603,
294
+ "rewards/format_reward/mean": 0.849609375,
295
+ "rewards/format_reward/std": 0.35762855410575867,
296
+ "rewards/visual_gain_reward/mean": 0.00019531250291038305,
297
+ "rewards/visual_gain_reward/std": 0.0062500000931322575,
298
+ "segmentation_nums": 0.109375,
299
+ "step": 10,
300
+ "total_gen_nums": 0.30859375
301
+ },
302
+ {
303
+ "clip_ratio/high_max": 0.0,
304
+ "clip_ratio/high_mean": 0.0,
305
+ "clip_ratio/low_mean": 0.0,
306
+ "clip_ratio/low_min": 0.0,
307
+ "clip_ratio/region_mean": 0.0,
308
+ "completions/clipped_ratio": 0.0888671875,
309
+ "completions/max_length": 5823.0,
310
+ "completions/mean_length": 1022.0263671875,
311
+ "completions/min_length": 3.0,
312
+ "depth_nums": 0.169921875,
313
+ "entropy": 5.852696340531111,
314
+ "epoch": 0.1416214041440354,
315
+ "grad_norm": 2.620856523513794,
316
+ "learning_rate": 2.8799691654882364e-06,
317
+ "loss": -0.0001,
318
+ "num_tokens": 46905439.0,
319
+ "reward": 1.451562523841858,
320
+ "reward_std": 0.5207765102386475,
321
+ "rewards/accuracy_reward_with_llm/mean": 0.611328125,
322
+ "rewards/accuracy_reward_with_llm/std": 0.4876866936683655,
323
+ "rewards/format_reward/mean": 0.83984375,
324
+ "rewards/format_reward/std": 0.36693012714385986,
325
+ "rewards/visual_gain_reward/mean": 0.0003906250058207661,
326
+ "rewards/visual_gain_reward/std": 0.008834514766931534,
327
+ "segmentation_nums": 0.1298828125,
328
+ "step": 11,
329
+ "total_gen_nums": 0.2998046875
330
+ },
331
+ {
332
+ "clip_ratio/high_max": 0.0,
333
+ "clip_ratio/high_mean": 0.0,
334
+ "clip_ratio/low_mean": 0.0,
335
+ "clip_ratio/low_min": 0.0,
336
+ "clip_ratio/region_mean": 0.0,
337
+ "completions/clipped_ratio": 0.0556640625,
338
+ "completions/max_length": 9018.0,
339
+ "completions/mean_length": 1051.802734375,
340
+ "completions/min_length": 33.0,
341
+ "depth_nums": 0.18359375,
342
+ "entropy": 5.979498442262411,
343
+ "epoch": 0.15449607724803863,
344
+ "grad_norm": 4.12182092666626,
345
+ "learning_rate": 2.8551756519155732e-06,
346
+ "loss": 0.0,
347
+ "num_tokens": 51027613.0,
348
+ "reward": 1.531640648841858,
349
+ "reward_std": 0.4414806663990021,
350
+ "rewards/accuracy_reward_with_llm/mean": 0.6396484375,
351
+ "rewards/accuracy_reward_with_llm/std": 0.480336993932724,
352
+ "rewards/format_reward/mean": 0.8916015625,
353
+ "rewards/format_reward/std": 0.3110348582267761,
354
+ "rewards/visual_gain_reward/mean": 0.0003906250058207661,
355
+ "rewards/visual_gain_reward/std": 0.008834514766931534,
356
+ "segmentation_nums": 0.1142578125,
357
+ "step": 12,
358
+ "total_gen_nums": 0.2978515625
359
+ },
360
+ {
361
+ "clip_ratio/high_max": 0.0,
362
+ "clip_ratio/high_mean": 0.0,
363
+ "clip_ratio/low_mean": 0.0,
364
+ "clip_ratio/low_min": 0.0,
365
+ "clip_ratio/region_mean": 0.0,
366
+ "completions/clipped_ratio": 0.0302734375,
367
+ "completions/max_length": 6003.0,
368
+ "completions/mean_length": 804.115234375,
369
+ "completions/min_length": 30.0,
370
+ "depth_nums": 0.140625,
371
+ "entropy": 6.25120921805501,
372
+ "epoch": 0.16737075035204185,
373
+ "grad_norm": 3.7815661430358887,
374
+ "learning_rate": 2.8281840384798147e-06,
375
+ "loss": -0.0,
376
+ "num_tokens": 54883153.0,
377
+ "reward": 1.576562523841858,
378
+ "reward_std": 0.410561740398407,
379
+ "rewards/accuracy_reward_with_llm/mean": 0.6474609375,
380
+ "rewards/accuracy_reward_with_llm/std": 0.4779941439628601,
381
+ "rewards/format_reward/mean": 0.9287109375,
382
+ "rewards/format_reward/std": 0.2574327886104584,
383
+ "rewards/visual_gain_reward/mean": 0.0003906250058207661,
384
+ "rewards/visual_gain_reward/std": 0.008834514766931534,
385
+ "segmentation_nums": 0.05859375,
386
+ "step": 13,
387
+ "total_gen_nums": 0.19921875
388
+ },
389
+ {
390
+ "clip_ratio/high_max": 0.0,
391
+ "clip_ratio/high_mean": 0.0,
392
+ "clip_ratio/low_mean": 0.0,
393
+ "clip_ratio/low_min": 0.0,
394
+ "clip_ratio/region_mean": 0.0,
395
+ "completions/clipped_ratio": 0.0400390625,
396
+ "completions/max_length": 8630.0,
397
+ "completions/mean_length": 996.5263671875,
398
+ "completions/min_length": 41.0,
399
+ "depth_nums": 0.1728515625,
400
+ "entropy": 6.108686868101358,
401
+ "epoch": 0.18024542345604505,
402
+ "grad_norm": 6.139822483062744,
403
+ "learning_rate": 2.7990381056766585e-06,
404
+ "loss": 0.0,
405
+ "num_tokens": 58956744.0,
406
+ "reward": 1.508398413658142,
407
+ "reward_std": 0.45154306292533875,
408
+ "rewards/accuracy_reward_with_llm/mean": 0.6005859375,
409
+ "rewards/accuracy_reward_with_llm/std": 0.490017294883728,
410
+ "rewards/format_reward/mean": 0.9072265625,
411
+ "rewards/format_reward/std": 0.29025644063949585,
412
+ "rewards/visual_gain_reward/mean": 0.0005859375232830644,
413
+ "rewards/visual_gain_reward/std": 0.010814730077981949,
414
+ "segmentation_nums": 0.0927734375,
415
+ "step": 14,
416
+ "total_gen_nums": 0.265625
417
+ },
418
+ {
419
+ "clip_ratio/high_max": 0.0,
420
+ "clip_ratio/high_mean": 0.0,
421
+ "clip_ratio/low_mean": 0.0,
422
+ "clip_ratio/low_min": 0.0,
423
+ "clip_ratio/region_mean": 0.0,
424
+ "completions/clipped_ratio": 0.029296875,
425
+ "completions/max_length": 8819.0,
426
+ "completions/mean_length": 1031.1953125,
427
+ "completions/min_length": 29.0,
428
+ "depth_nums": 0.1884765625,
429
+ "entropy": 6.060374926775694,
430
+ "epoch": 0.19312009656004828,
431
+ "grad_norm": 7.209692001342773,
432
+ "learning_rate": 2.7677851283156924e-06,
433
+ "loss": -0.0,
434
+ "num_tokens": 63076588.0,
435
+ "reward": 1.534570336341858,
436
+ "reward_std": 0.45015260577201843,
437
+ "rewards/accuracy_reward_with_llm/mean": 0.6083984375,
438
+ "rewards/accuracy_reward_with_llm/std": 0.4883468747138977,
439
+ "rewards/format_reward/mean": 0.92578125,
440
+ "rewards/format_reward/std": 0.26225462555885315,
441
+ "rewards/visual_gain_reward/mean": 0.0003906250058207661,
442
+ "rewards/visual_gain_reward/std": 0.008834514766931534,
443
+ "segmentation_nums": 0.08203125,
444
+ "step": 15,
445
+ "total_gen_nums": 0.2705078125
446
+ },
447
+ {
448
+ "clip_ratio/high_max": 0.0,
449
+ "clip_ratio/high_mean": 0.0,
450
+ "clip_ratio/low_mean": 0.0,
451
+ "clip_ratio/low_min": 0.0,
452
+ "clip_ratio/region_mean": 0.0,
453
+ "completions/clipped_ratio": 0.0380859375,
454
+ "completions/max_length": 8755.0,
455
+ "completions/mean_length": 924.4208984375,
456
+ "completions/min_length": 36.0,
457
+ "depth_nums": 0.1435546875,
458
+ "entropy": 6.233527477830648,
459
+ "epoch": 0.2059947696640515,
460
+ "grad_norm": 4.891477108001709,
461
+ "learning_rate": 2.7344757988404844e-06,
462
+ "loss": 0.0,
463
+ "num_tokens": 67082447.0,
464
+ "reward": 1.54296875,
465
+ "reward_std": 0.4315545856952667,
466
+ "rewards/accuracy_reward_with_llm/mean": 0.62109375,
467
+ "rewards/accuracy_reward_with_llm/std": 0.4853517711162567,
468
+ "rewards/format_reward/mean": 0.921875,
469
+ "rewards/format_reward/std": 0.26849931478500366,
470
+ "rewards/visual_gain_reward/mean": 0.0,
471
+ "rewards/visual_gain_reward/std": 0.0,
472
+ "segmentation_nums": 0.0732421875,
473
+ "step": 16,
474
+ "total_gen_nums": 0.216796875
475
+ },
476
+ {
477
+ "clip_ratio/high_max": 0.0,
478
+ "clip_ratio/high_mean": 0.0,
479
+ "clip_ratio/low_mean": 0.0,
480
+ "clip_ratio/low_min": 0.0,
481
+ "clip_ratio/region_mean": 0.0,
482
+ "completions/clipped_ratio": 0.0380859375,
483
+ "completions/max_length": 6920.0,
484
+ "completions/mean_length": 941.1455078125,
485
+ "completions/min_length": 9.0,
486
+ "depth_nums": 0.1611328125,
487
+ "entropy": 6.030625708401203,
488
+ "epoch": 0.21886944276805473,
489
+ "grad_norm": 2.913377285003662,
490
+ "learning_rate": 2.699164145105252e-06,
491
+ "loss": -0.0,
492
+ "num_tokens": 71067000.0,
493
+ "reward": 1.5294921398162842,
494
+ "reward_std": 0.3852996826171875,
495
+ "rewards/accuracy_reward_with_llm/mean": 0.6005859375,
496
+ "rewards/accuracy_reward_with_llm/std": 0.490017294883728,
497
+ "rewards/format_reward/mean": 0.9287109375,
498
+ "rewards/format_reward/std": 0.2574327886104584,
499
+ "rewards/visual_gain_reward/mean": 0.00019531250291038305,
500
+ "rewards/visual_gain_reward/std": 0.006250000558793545,
501
+ "segmentation_nums": 0.0751953125,
502
+ "step": 17,
503
+ "total_gen_nums": 0.236328125
504
+ },
505
+ {
506
+ "clip_ratio/high_max": 0.0,
507
+ "clip_ratio/high_mean": 0.0,
508
+ "clip_ratio/low_mean": 0.0,
509
+ "clip_ratio/low_min": 0.0,
510
+ "clip_ratio/region_mean": 0.0,
511
+ "completions/clipped_ratio": 0.0302734375,
512
+ "completions/max_length": 8720.0,
513
+ "completions/mean_length": 976.4716796875,
514
+ "completions/min_length": 48.0,
515
+ "depth_nums": 0.1728515625,
516
+ "entropy": 6.0241236835718155,
517
+ "epoch": 0.23174411587205793,
518
+ "grad_norm": 1.8226603269577026,
519
+ "learning_rate": 2.6619074427414816e-06,
520
+ "loss": -0.0,
521
+ "num_tokens": 75093701.0,
522
+ "reward": 1.563867211341858,
523
+ "reward_std": 0.3692563772201538,
524
+ "rewards/accuracy_reward_with_llm/mean": 0.6259765625,
525
+ "rewards/accuracy_reward_with_llm/std": 0.4841061532497406,
526
+ "rewards/format_reward/mean": 0.9375,
527
+ "rewards/format_reward/std": 0.242179736495018,
528
+ "rewards/visual_gain_reward/mean": 0.0003906250058207661,
529
+ "rewards/visual_gain_reward/std": 0.008834514766931534,
530
+ "segmentation_nums": 0.08203125,
531
+ "step": 18,
532
+ "total_gen_nums": 0.2548828125
533
+ },
534
+ {
535
+ "clip_ratio/high_max": 0.0,
536
+ "clip_ratio/high_mean": 0.0,
537
+ "clip_ratio/low_mean": 0.0,
538
+ "clip_ratio/low_min": 0.0,
539
+ "clip_ratio/region_mean": 0.0,
540
+ "completions/clipped_ratio": 0.03125,
541
+ "completions/max_length": 9009.0,
542
+ "completions/mean_length": 965.1865234375,
543
+ "completions/min_length": 41.0,
544
+ "depth_nums": 0.1689453125,
545
+ "entropy": 6.073840655386448,
546
+ "epoch": 0.24461878897606115,
547
+ "grad_norm": 2.7203147411346436,
548
+ "learning_rate": 2.6227661222566517e-06,
549
+ "loss": 0.0,
550
+ "num_tokens": 79134800.0,
551
+ "reward": 1.5759766101837158,
552
+ "reward_std": 0.3747516870498657,
553
+ "rewards/accuracy_reward_with_llm/mean": 0.6435546875,
554
+ "rewards/accuracy_reward_with_llm/std": 0.4791829288005829,
555
+ "rewards/format_reward/mean": 0.931640625,
556
+ "rewards/format_reward/std": 0.25248491764068604,
557
+ "rewards/visual_gain_reward/mean": 0.0007812500116415322,
558
+ "rewards/visual_gain_reward/std": 0.012481658719480038,
559
+ "segmentation_nums": 0.0693359375,
560
+ "step": 19,
561
+ "total_gen_nums": 0.23828125
562
+ },
563
+ {
564
+ "clip_ratio/high_max": 0.0,
565
+ "clip_ratio/high_mean": 0.0,
566
+ "clip_ratio/low_mean": 0.0,
567
+ "clip_ratio/low_min": 0.0,
568
+ "clip_ratio/region_mean": 0.0,
569
+ "completions/clipped_ratio": 0.0244140625,
570
+ "completions/max_length": 8861.0,
571
+ "completions/mean_length": 1025.93359375,
572
+ "completions/min_length": 42.0,
573
+ "depth_nums": 0.1904296875,
574
+ "entropy": 5.9863284938037395,
575
+ "epoch": 0.25749346208006435,
576
+ "grad_norm": 2.001307249069214,
577
+ "learning_rate": 2.581803671015722e-06,
578
+ "loss": 0.0001,
579
+ "num_tokens": 83173602.0,
580
+ "reward": 1.569726586341858,
581
+ "reward_std": 0.38102471828460693,
582
+ "rewards/accuracy_reward_with_llm/mean": 0.640625,
583
+ "rewards/accuracy_reward_with_llm/std": 0.480051726102829,
584
+ "rewards/format_reward/mean": 0.9287109375,
585
+ "rewards/format_reward/std": 0.2574327886104584,
586
+ "rewards/visual_gain_reward/mean": 0.0003906250058207661,
587
+ "rewards/visual_gain_reward/std": 0.008834514766931534,
588
+ "segmentation_nums": 0.0634765625,
589
+ "step": 20,
590
+ "total_gen_nums": 0.25390625
591
+ }
592
+ ],
593
+ "logging_steps": 1.0,
594
+ "max_steps": 78,
595
+ "num_input_tokens_seen": 83173602,
596
+ "num_train_epochs": 1,
597
+ "save_steps": 10,
598
+ "stateful_callbacks": {
599
+ "TrainerControl": {
600
+ "args": {
601
+ "should_epoch_stop": false,
602
+ "should_evaluate": false,
603
+ "should_log": false,
604
+ "should_save": true,
605
+ "should_training_stop": false
606
+ },
607
+ "attributes": {}
608
+ }
609
+ },
610
+ "total_flos": 0.0,
611
+ "train_batch_size": 1,
612
+ "trial_name": null,
613
+ "trial_params": null
614
+ }