Abner0803 commited on
Commit
820a108
·
verified ·
1 Parent(s): 5d0bf56

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
added_tokens.json ADDED
@@ -0,0 +1,1052 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</think>": 151668,
3
+ "</tool_call>": 151658,
4
+ "</tool_response>": 151666,
5
+ "<think>": 151667,
6
+ "<tool_call>": 151657,
7
+ "<tool_response>": 151665,
8
+ "<|box_end|>": 151649,
9
+ "<|box_start|>": 151648,
10
+ "<|d0_0|>": 151669,
11
+ "<|d0_100|>": 151769,
12
+ "<|d0_101|>": 151770,
13
+ "<|d0_102|>": 151771,
14
+ "<|d0_103|>": 151772,
15
+ "<|d0_104|>": 151773,
16
+ "<|d0_105|>": 151774,
17
+ "<|d0_106|>": 151775,
18
+ "<|d0_107|>": 151776,
19
+ "<|d0_108|>": 151777,
20
+ "<|d0_109|>": 151778,
21
+ "<|d0_10|>": 151679,
22
+ "<|d0_110|>": 151779,
23
+ "<|d0_111|>": 151780,
24
+ "<|d0_112|>": 151781,
25
+ "<|d0_113|>": 151782,
26
+ "<|d0_114|>": 151783,
27
+ "<|d0_115|>": 151784,
28
+ "<|d0_116|>": 151785,
29
+ "<|d0_117|>": 151786,
30
+ "<|d0_118|>": 151787,
31
+ "<|d0_119|>": 151788,
32
+ "<|d0_11|>": 151680,
33
+ "<|d0_120|>": 151789,
34
+ "<|d0_121|>": 151790,
35
+ "<|d0_122|>": 151791,
36
+ "<|d0_123|>": 151792,
37
+ "<|d0_124|>": 151793,
38
+ "<|d0_125|>": 151794,
39
+ "<|d0_126|>": 151795,
40
+ "<|d0_127|>": 151796,
41
+ "<|d0_128|>": 151797,
42
+ "<|d0_129|>": 151798,
43
+ "<|d0_12|>": 151681,
44
+ "<|d0_130|>": 151799,
45
+ "<|d0_131|>": 151800,
46
+ "<|d0_132|>": 151801,
47
+ "<|d0_133|>": 151802,
48
+ "<|d0_134|>": 151803,
49
+ "<|d0_135|>": 151804,
50
+ "<|d0_136|>": 151805,
51
+ "<|d0_137|>": 151806,
52
+ "<|d0_138|>": 151807,
53
+ "<|d0_139|>": 151808,
54
+ "<|d0_13|>": 151682,
55
+ "<|d0_140|>": 151809,
56
+ "<|d0_141|>": 151810,
57
+ "<|d0_142|>": 151811,
58
+ "<|d0_143|>": 151812,
59
+ "<|d0_144|>": 151813,
60
+ "<|d0_145|>": 151814,
61
+ "<|d0_146|>": 151815,
62
+ "<|d0_147|>": 151816,
63
+ "<|d0_148|>": 151817,
64
+ "<|d0_149|>": 151818,
65
+ "<|d0_14|>": 151683,
66
+ "<|d0_150|>": 151819,
67
+ "<|d0_151|>": 151820,
68
+ "<|d0_152|>": 151821,
69
+ "<|d0_153|>": 151822,
70
+ "<|d0_154|>": 151823,
71
+ "<|d0_155|>": 151824,
72
+ "<|d0_156|>": 151825,
73
+ "<|d0_157|>": 151826,
74
+ "<|d0_158|>": 151827,
75
+ "<|d0_159|>": 151828,
76
+ "<|d0_15|>": 151684,
77
+ "<|d0_160|>": 151829,
78
+ "<|d0_161|>": 151830,
79
+ "<|d0_162|>": 151831,
80
+ "<|d0_163|>": 151832,
81
+ "<|d0_164|>": 151833,
82
+ "<|d0_165|>": 151834,
83
+ "<|d0_166|>": 151835,
84
+ "<|d0_167|>": 151836,
85
+ "<|d0_168|>": 151837,
86
+ "<|d0_169|>": 151838,
87
+ "<|d0_16|>": 151685,
88
+ "<|d0_170|>": 151839,
89
+ "<|d0_171|>": 151840,
90
+ "<|d0_172|>": 151841,
91
+ "<|d0_173|>": 151842,
92
+ "<|d0_174|>": 151843,
93
+ "<|d0_175|>": 151844,
94
+ "<|d0_176|>": 151845,
95
+ "<|d0_177|>": 151846,
96
+ "<|d0_178|>": 151847,
97
+ "<|d0_179|>": 151848,
98
+ "<|d0_17|>": 151686,
99
+ "<|d0_180|>": 151849,
100
+ "<|d0_181|>": 151850,
101
+ "<|d0_182|>": 151851,
102
+ "<|d0_183|>": 151852,
103
+ "<|d0_184|>": 151853,
104
+ "<|d0_185|>": 151854,
105
+ "<|d0_186|>": 151855,
106
+ "<|d0_187|>": 151856,
107
+ "<|d0_188|>": 151857,
108
+ "<|d0_189|>": 151858,
109
+ "<|d0_18|>": 151687,
110
+ "<|d0_190|>": 151859,
111
+ "<|d0_191|>": 151860,
112
+ "<|d0_192|>": 151861,
113
+ "<|d0_193|>": 151862,
114
+ "<|d0_194|>": 151863,
115
+ "<|d0_195|>": 151864,
116
+ "<|d0_196|>": 151865,
117
+ "<|d0_197|>": 151866,
118
+ "<|d0_198|>": 151867,
119
+ "<|d0_199|>": 151868,
120
+ "<|d0_19|>": 151688,
121
+ "<|d0_1|>": 151670,
122
+ "<|d0_200|>": 151869,
123
+ "<|d0_201|>": 151870,
124
+ "<|d0_202|>": 151871,
125
+ "<|d0_203|>": 151872,
126
+ "<|d0_204|>": 151873,
127
+ "<|d0_205|>": 151874,
128
+ "<|d0_206|>": 151875,
129
+ "<|d0_207|>": 151876,
130
+ "<|d0_208|>": 151877,
131
+ "<|d0_209|>": 151878,
132
+ "<|d0_20|>": 151689,
133
+ "<|d0_210|>": 151879,
134
+ "<|d0_211|>": 151880,
135
+ "<|d0_212|>": 151881,
136
+ "<|d0_213|>": 151882,
137
+ "<|d0_214|>": 151883,
138
+ "<|d0_215|>": 151884,
139
+ "<|d0_216|>": 151885,
140
+ "<|d0_217|>": 151886,
141
+ "<|d0_218|>": 151887,
142
+ "<|d0_219|>": 151888,
143
+ "<|d0_21|>": 151690,
144
+ "<|d0_220|>": 151889,
145
+ "<|d0_221|>": 151890,
146
+ "<|d0_222|>": 151891,
147
+ "<|d0_223|>": 151892,
148
+ "<|d0_224|>": 151893,
149
+ "<|d0_225|>": 151894,
150
+ "<|d0_226|>": 151895,
151
+ "<|d0_227|>": 151896,
152
+ "<|d0_228|>": 151897,
153
+ "<|d0_229|>": 151898,
154
+ "<|d0_22|>": 151691,
155
+ "<|d0_230|>": 151899,
156
+ "<|d0_231|>": 151900,
157
+ "<|d0_232|>": 151901,
158
+ "<|d0_233|>": 151902,
159
+ "<|d0_234|>": 151903,
160
+ "<|d0_235|>": 151904,
161
+ "<|d0_236|>": 151905,
162
+ "<|d0_237|>": 151906,
163
+ "<|d0_238|>": 151907,
164
+ "<|d0_239|>": 151908,
165
+ "<|d0_23|>": 151692,
166
+ "<|d0_240|>": 151909,
167
+ "<|d0_241|>": 151910,
168
+ "<|d0_242|>": 151911,
169
+ "<|d0_243|>": 151912,
170
+ "<|d0_244|>": 151913,
171
+ "<|d0_245|>": 151914,
172
+ "<|d0_246|>": 151915,
173
+ "<|d0_247|>": 151916,
174
+ "<|d0_248|>": 151917,
175
+ "<|d0_249|>": 151918,
176
+ "<|d0_24|>": 151693,
177
+ "<|d0_250|>": 151919,
178
+ "<|d0_251|>": 151920,
179
+ "<|d0_252|>": 151921,
180
+ "<|d0_253|>": 151922,
181
+ "<|d0_254|>": 151923,
182
+ "<|d0_255|>": 151924,
183
+ "<|d0_25|>": 151694,
184
+ "<|d0_26|>": 151695,
185
+ "<|d0_27|>": 151696,
186
+ "<|d0_28|>": 151697,
187
+ "<|d0_29|>": 151698,
188
+ "<|d0_2|>": 151671,
189
+ "<|d0_30|>": 151699,
190
+ "<|d0_31|>": 151700,
191
+ "<|d0_32|>": 151701,
192
+ "<|d0_33|>": 151702,
193
+ "<|d0_34|>": 151703,
194
+ "<|d0_35|>": 151704,
195
+ "<|d0_36|>": 151705,
196
+ "<|d0_37|>": 151706,
197
+ "<|d0_38|>": 151707,
198
+ "<|d0_39|>": 151708,
199
+ "<|d0_3|>": 151672,
200
+ "<|d0_40|>": 151709,
201
+ "<|d0_41|>": 151710,
202
+ "<|d0_42|>": 151711,
203
+ "<|d0_43|>": 151712,
204
+ "<|d0_44|>": 151713,
205
+ "<|d0_45|>": 151714,
206
+ "<|d0_46|>": 151715,
207
+ "<|d0_47|>": 151716,
208
+ "<|d0_48|>": 151717,
209
+ "<|d0_49|>": 151718,
210
+ "<|d0_4|>": 151673,
211
+ "<|d0_50|>": 151719,
212
+ "<|d0_51|>": 151720,
213
+ "<|d0_52|>": 151721,
214
+ "<|d0_53|>": 151722,
215
+ "<|d0_54|>": 151723,
216
+ "<|d0_55|>": 151724,
217
+ "<|d0_56|>": 151725,
218
+ "<|d0_57|>": 151726,
219
+ "<|d0_58|>": 151727,
220
+ "<|d0_59|>": 151728,
221
+ "<|d0_5|>": 151674,
222
+ "<|d0_60|>": 151729,
223
+ "<|d0_61|>": 151730,
224
+ "<|d0_62|>": 151731,
225
+ "<|d0_63|>": 151732,
226
+ "<|d0_64|>": 151733,
227
+ "<|d0_65|>": 151734,
228
+ "<|d0_66|>": 151735,
229
+ "<|d0_67|>": 151736,
230
+ "<|d0_68|>": 151737,
231
+ "<|d0_69|>": 151738,
232
+ "<|d0_6|>": 151675,
233
+ "<|d0_70|>": 151739,
234
+ "<|d0_71|>": 151740,
235
+ "<|d0_72|>": 151741,
236
+ "<|d0_73|>": 151742,
237
+ "<|d0_74|>": 151743,
238
+ "<|d0_75|>": 151744,
239
+ "<|d0_76|>": 151745,
240
+ "<|d0_77|>": 151746,
241
+ "<|d0_78|>": 151747,
242
+ "<|d0_79|>": 151748,
243
+ "<|d0_7|>": 151676,
244
+ "<|d0_80|>": 151749,
245
+ "<|d0_81|>": 151750,
246
+ "<|d0_82|>": 151751,
247
+ "<|d0_83|>": 151752,
248
+ "<|d0_84|>": 151753,
249
+ "<|d0_85|>": 151754,
250
+ "<|d0_86|>": 151755,
251
+ "<|d0_87|>": 151756,
252
+ "<|d0_88|>": 151757,
253
+ "<|d0_89|>": 151758,
254
+ "<|d0_8|>": 151677,
255
+ "<|d0_90|>": 151759,
256
+ "<|d0_91|>": 151760,
257
+ "<|d0_92|>": 151761,
258
+ "<|d0_93|>": 151762,
259
+ "<|d0_94|>": 151763,
260
+ "<|d0_95|>": 151764,
261
+ "<|d0_96|>": 151765,
262
+ "<|d0_97|>": 151766,
263
+ "<|d0_98|>": 151767,
264
+ "<|d0_99|>": 151768,
265
+ "<|d0_9|>": 151678,
266
+ "<|d1_0|>": 151925,
267
+ "<|d1_100|>": 152025,
268
+ "<|d1_101|>": 152026,
269
+ "<|d1_102|>": 152027,
270
+ "<|d1_103|>": 152028,
271
+ "<|d1_104|>": 152029,
272
+ "<|d1_105|>": 152030,
273
+ "<|d1_106|>": 152031,
274
+ "<|d1_107|>": 152032,
275
+ "<|d1_108|>": 152033,
276
+ "<|d1_109|>": 152034,
277
+ "<|d1_10|>": 151935,
278
+ "<|d1_110|>": 152035,
279
+ "<|d1_111|>": 152036,
280
+ "<|d1_112|>": 152037,
281
+ "<|d1_113|>": 152038,
282
+ "<|d1_114|>": 152039,
283
+ "<|d1_115|>": 152040,
284
+ "<|d1_116|>": 152041,
285
+ "<|d1_117|>": 152042,
286
+ "<|d1_118|>": 152043,
287
+ "<|d1_119|>": 152044,
288
+ "<|d1_11|>": 151936,
289
+ "<|d1_120|>": 152045,
290
+ "<|d1_121|>": 152046,
291
+ "<|d1_122|>": 152047,
292
+ "<|d1_123|>": 152048,
293
+ "<|d1_124|>": 152049,
294
+ "<|d1_125|>": 152050,
295
+ "<|d1_126|>": 152051,
296
+ "<|d1_127|>": 152052,
297
+ "<|d1_128|>": 152053,
298
+ "<|d1_129|>": 152054,
299
+ "<|d1_12|>": 151937,
300
+ "<|d1_130|>": 152055,
301
+ "<|d1_131|>": 152056,
302
+ "<|d1_132|>": 152057,
303
+ "<|d1_133|>": 152058,
304
+ "<|d1_134|>": 152059,
305
+ "<|d1_135|>": 152060,
306
+ "<|d1_136|>": 152061,
307
+ "<|d1_137|>": 152062,
308
+ "<|d1_138|>": 152063,
309
+ "<|d1_139|>": 152064,
310
+ "<|d1_13|>": 151938,
311
+ "<|d1_140|>": 152065,
312
+ "<|d1_141|>": 152066,
313
+ "<|d1_142|>": 152067,
314
+ "<|d1_143|>": 152068,
315
+ "<|d1_144|>": 152069,
316
+ "<|d1_145|>": 152070,
317
+ "<|d1_146|>": 152071,
318
+ "<|d1_147|>": 152072,
319
+ "<|d1_148|>": 152073,
320
+ "<|d1_149|>": 152074,
321
+ "<|d1_14|>": 151939,
322
+ "<|d1_150|>": 152075,
323
+ "<|d1_151|>": 152076,
324
+ "<|d1_152|>": 152077,
325
+ "<|d1_153|>": 152078,
326
+ "<|d1_154|>": 152079,
327
+ "<|d1_155|>": 152080,
328
+ "<|d1_156|>": 152081,
329
+ "<|d1_157|>": 152082,
330
+ "<|d1_158|>": 152083,
331
+ "<|d1_159|>": 152084,
332
+ "<|d1_15|>": 151940,
333
+ "<|d1_160|>": 152085,
334
+ "<|d1_161|>": 152086,
335
+ "<|d1_162|>": 152087,
336
+ "<|d1_163|>": 152088,
337
+ "<|d1_164|>": 152089,
338
+ "<|d1_165|>": 152090,
339
+ "<|d1_166|>": 152091,
340
+ "<|d1_167|>": 152092,
341
+ "<|d1_168|>": 152093,
342
+ "<|d1_169|>": 152094,
343
+ "<|d1_16|>": 151941,
344
+ "<|d1_170|>": 152095,
345
+ "<|d1_171|>": 152096,
346
+ "<|d1_172|>": 152097,
347
+ "<|d1_173|>": 152098,
348
+ "<|d1_174|>": 152099,
349
+ "<|d1_175|>": 152100,
350
+ "<|d1_176|>": 152101,
351
+ "<|d1_177|>": 152102,
352
+ "<|d1_178|>": 152103,
353
+ "<|d1_179|>": 152104,
354
+ "<|d1_17|>": 151942,
355
+ "<|d1_180|>": 152105,
356
+ "<|d1_181|>": 152106,
357
+ "<|d1_182|>": 152107,
358
+ "<|d1_183|>": 152108,
359
+ "<|d1_184|>": 152109,
360
+ "<|d1_185|>": 152110,
361
+ "<|d1_186|>": 152111,
362
+ "<|d1_187|>": 152112,
363
+ "<|d1_188|>": 152113,
364
+ "<|d1_189|>": 152114,
365
+ "<|d1_18|>": 151943,
366
+ "<|d1_190|>": 152115,
367
+ "<|d1_191|>": 152116,
368
+ "<|d1_192|>": 152117,
369
+ "<|d1_193|>": 152118,
370
+ "<|d1_194|>": 152119,
371
+ "<|d1_195|>": 152120,
372
+ "<|d1_196|>": 152121,
373
+ "<|d1_197|>": 152122,
374
+ "<|d1_198|>": 152123,
375
+ "<|d1_199|>": 152124,
376
+ "<|d1_19|>": 151944,
377
+ "<|d1_1|>": 151926,
378
+ "<|d1_200|>": 152125,
379
+ "<|d1_201|>": 152126,
380
+ "<|d1_202|>": 152127,
381
+ "<|d1_203|>": 152128,
382
+ "<|d1_204|>": 152129,
383
+ "<|d1_205|>": 152130,
384
+ "<|d1_206|>": 152131,
385
+ "<|d1_207|>": 152132,
386
+ "<|d1_208|>": 152133,
387
+ "<|d1_209|>": 152134,
388
+ "<|d1_20|>": 151945,
389
+ "<|d1_210|>": 152135,
390
+ "<|d1_211|>": 152136,
391
+ "<|d1_212|>": 152137,
392
+ "<|d1_213|>": 152138,
393
+ "<|d1_214|>": 152139,
394
+ "<|d1_215|>": 152140,
395
+ "<|d1_216|>": 152141,
396
+ "<|d1_217|>": 152142,
397
+ "<|d1_218|>": 152143,
398
+ "<|d1_219|>": 152144,
399
+ "<|d1_21|>": 151946,
400
+ "<|d1_220|>": 152145,
401
+ "<|d1_221|>": 152146,
402
+ "<|d1_222|>": 152147,
403
+ "<|d1_223|>": 152148,
404
+ "<|d1_224|>": 152149,
405
+ "<|d1_225|>": 152150,
406
+ "<|d1_226|>": 152151,
407
+ "<|d1_227|>": 152152,
408
+ "<|d1_228|>": 152153,
409
+ "<|d1_229|>": 152154,
410
+ "<|d1_22|>": 151947,
411
+ "<|d1_230|>": 152155,
412
+ "<|d1_231|>": 152156,
413
+ "<|d1_232|>": 152157,
414
+ "<|d1_233|>": 152158,
415
+ "<|d1_234|>": 152159,
416
+ "<|d1_235|>": 152160,
417
+ "<|d1_236|>": 152161,
418
+ "<|d1_237|>": 152162,
419
+ "<|d1_238|>": 152163,
420
+ "<|d1_239|>": 152164,
421
+ "<|d1_23|>": 151948,
422
+ "<|d1_240|>": 152165,
423
+ "<|d1_241|>": 152166,
424
+ "<|d1_242|>": 152167,
425
+ "<|d1_243|>": 152168,
426
+ "<|d1_244|>": 152169,
427
+ "<|d1_245|>": 152170,
428
+ "<|d1_246|>": 152171,
429
+ "<|d1_247|>": 152172,
430
+ "<|d1_248|>": 152173,
431
+ "<|d1_249|>": 152174,
432
+ "<|d1_24|>": 151949,
433
+ "<|d1_250|>": 152175,
434
+ "<|d1_251|>": 152176,
435
+ "<|d1_252|>": 152177,
436
+ "<|d1_253|>": 152178,
437
+ "<|d1_254|>": 152179,
438
+ "<|d1_255|>": 152180,
439
+ "<|d1_25|>": 151950,
440
+ "<|d1_26|>": 151951,
441
+ "<|d1_27|>": 151952,
442
+ "<|d1_28|>": 151953,
443
+ "<|d1_29|>": 151954,
444
+ "<|d1_2|>": 151927,
445
+ "<|d1_30|>": 151955,
446
+ "<|d1_31|>": 151956,
447
+ "<|d1_32|>": 151957,
448
+ "<|d1_33|>": 151958,
449
+ "<|d1_34|>": 151959,
450
+ "<|d1_35|>": 151960,
451
+ "<|d1_36|>": 151961,
452
+ "<|d1_37|>": 151962,
453
+ "<|d1_38|>": 151963,
454
+ "<|d1_39|>": 151964,
455
+ "<|d1_3|>": 151928,
456
+ "<|d1_40|>": 151965,
457
+ "<|d1_41|>": 151966,
458
+ "<|d1_42|>": 151967,
459
+ "<|d1_43|>": 151968,
460
+ "<|d1_44|>": 151969,
461
+ "<|d1_45|>": 151970,
462
+ "<|d1_46|>": 151971,
463
+ "<|d1_47|>": 151972,
464
+ "<|d1_48|>": 151973,
465
+ "<|d1_49|>": 151974,
466
+ "<|d1_4|>": 151929,
467
+ "<|d1_50|>": 151975,
468
+ "<|d1_51|>": 151976,
469
+ "<|d1_52|>": 151977,
470
+ "<|d1_53|>": 151978,
471
+ "<|d1_54|>": 151979,
472
+ "<|d1_55|>": 151980,
473
+ "<|d1_56|>": 151981,
474
+ "<|d1_57|>": 151982,
475
+ "<|d1_58|>": 151983,
476
+ "<|d1_59|>": 151984,
477
+ "<|d1_5|>": 151930,
478
+ "<|d1_60|>": 151985,
479
+ "<|d1_61|>": 151986,
480
+ "<|d1_62|>": 151987,
481
+ "<|d1_63|>": 151988,
482
+ "<|d1_64|>": 151989,
483
+ "<|d1_65|>": 151990,
484
+ "<|d1_66|>": 151991,
485
+ "<|d1_67|>": 151992,
486
+ "<|d1_68|>": 151993,
487
+ "<|d1_69|>": 151994,
488
+ "<|d1_6|>": 151931,
489
+ "<|d1_70|>": 151995,
490
+ "<|d1_71|>": 151996,
491
+ "<|d1_72|>": 151997,
492
+ "<|d1_73|>": 151998,
493
+ "<|d1_74|>": 151999,
494
+ "<|d1_75|>": 152000,
495
+ "<|d1_76|>": 152001,
496
+ "<|d1_77|>": 152002,
497
+ "<|d1_78|>": 152003,
498
+ "<|d1_79|>": 152004,
499
+ "<|d1_7|>": 151932,
500
+ "<|d1_80|>": 152005,
501
+ "<|d1_81|>": 152006,
502
+ "<|d1_82|>": 152007,
503
+ "<|d1_83|>": 152008,
504
+ "<|d1_84|>": 152009,
505
+ "<|d1_85|>": 152010,
506
+ "<|d1_86|>": 152011,
507
+ "<|d1_87|>": 152012,
508
+ "<|d1_88|>": 152013,
509
+ "<|d1_89|>": 152014,
510
+ "<|d1_8|>": 151933,
511
+ "<|d1_90|>": 152015,
512
+ "<|d1_91|>": 152016,
513
+ "<|d1_92|>": 152017,
514
+ "<|d1_93|>": 152018,
515
+ "<|d1_94|>": 152019,
516
+ "<|d1_95|>": 152020,
517
+ "<|d1_96|>": 152021,
518
+ "<|d1_97|>": 152022,
519
+ "<|d1_98|>": 152023,
520
+ "<|d1_99|>": 152024,
521
+ "<|d1_9|>": 151934,
522
+ "<|d2_0|>": 152181,
523
+ "<|d2_100|>": 152281,
524
+ "<|d2_101|>": 152282,
525
+ "<|d2_102|>": 152283,
526
+ "<|d2_103|>": 152284,
527
+ "<|d2_104|>": 152285,
528
+ "<|d2_105|>": 152286,
529
+ "<|d2_106|>": 152287,
530
+ "<|d2_107|>": 152288,
531
+ "<|d2_108|>": 152289,
532
+ "<|d2_109|>": 152290,
533
+ "<|d2_10|>": 152191,
534
+ "<|d2_110|>": 152291,
535
+ "<|d2_111|>": 152292,
536
+ "<|d2_112|>": 152293,
537
+ "<|d2_113|>": 152294,
538
+ "<|d2_114|>": 152295,
539
+ "<|d2_115|>": 152296,
540
+ "<|d2_116|>": 152297,
541
+ "<|d2_117|>": 152298,
542
+ "<|d2_118|>": 152299,
543
+ "<|d2_119|>": 152300,
544
+ "<|d2_11|>": 152192,
545
+ "<|d2_120|>": 152301,
546
+ "<|d2_121|>": 152302,
547
+ "<|d2_122|>": 152303,
548
+ "<|d2_123|>": 152304,
549
+ "<|d2_124|>": 152305,
550
+ "<|d2_125|>": 152306,
551
+ "<|d2_126|>": 152307,
552
+ "<|d2_127|>": 152308,
553
+ "<|d2_128|>": 152309,
554
+ "<|d2_129|>": 152310,
555
+ "<|d2_12|>": 152193,
556
+ "<|d2_130|>": 152311,
557
+ "<|d2_131|>": 152312,
558
+ "<|d2_132|>": 152313,
559
+ "<|d2_133|>": 152314,
560
+ "<|d2_134|>": 152315,
561
+ "<|d2_135|>": 152316,
562
+ "<|d2_136|>": 152317,
563
+ "<|d2_137|>": 152318,
564
+ "<|d2_138|>": 152319,
565
+ "<|d2_139|>": 152320,
566
+ "<|d2_13|>": 152194,
567
+ "<|d2_140|>": 152321,
568
+ "<|d2_141|>": 152322,
569
+ "<|d2_142|>": 152323,
570
+ "<|d2_143|>": 152324,
571
+ "<|d2_144|>": 152325,
572
+ "<|d2_145|>": 152326,
573
+ "<|d2_146|>": 152327,
574
+ "<|d2_147|>": 152328,
575
+ "<|d2_148|>": 152329,
576
+ "<|d2_149|>": 152330,
577
+ "<|d2_14|>": 152195,
578
+ "<|d2_150|>": 152331,
579
+ "<|d2_151|>": 152332,
580
+ "<|d2_152|>": 152333,
581
+ "<|d2_153|>": 152334,
582
+ "<|d2_154|>": 152335,
583
+ "<|d2_155|>": 152336,
584
+ "<|d2_156|>": 152337,
585
+ "<|d2_157|>": 152338,
586
+ "<|d2_158|>": 152339,
587
+ "<|d2_159|>": 152340,
588
+ "<|d2_15|>": 152196,
589
+ "<|d2_160|>": 152341,
590
+ "<|d2_161|>": 152342,
591
+ "<|d2_162|>": 152343,
592
+ "<|d2_163|>": 152344,
593
+ "<|d2_164|>": 152345,
594
+ "<|d2_165|>": 152346,
595
+ "<|d2_166|>": 152347,
596
+ "<|d2_167|>": 152348,
597
+ "<|d2_168|>": 152349,
598
+ "<|d2_169|>": 152350,
599
+ "<|d2_16|>": 152197,
600
+ "<|d2_170|>": 152351,
601
+ "<|d2_171|>": 152352,
602
+ "<|d2_172|>": 152353,
603
+ "<|d2_173|>": 152354,
604
+ "<|d2_174|>": 152355,
605
+ "<|d2_175|>": 152356,
606
+ "<|d2_176|>": 152357,
607
+ "<|d2_177|>": 152358,
608
+ "<|d2_178|>": 152359,
609
+ "<|d2_179|>": 152360,
610
+ "<|d2_17|>": 152198,
611
+ "<|d2_180|>": 152361,
612
+ "<|d2_181|>": 152362,
613
+ "<|d2_182|>": 152363,
614
+ "<|d2_183|>": 152364,
615
+ "<|d2_184|>": 152365,
616
+ "<|d2_185|>": 152366,
617
+ "<|d2_186|>": 152367,
618
+ "<|d2_187|>": 152368,
619
+ "<|d2_188|>": 152369,
620
+ "<|d2_189|>": 152370,
621
+ "<|d2_18|>": 152199,
622
+ "<|d2_190|>": 152371,
623
+ "<|d2_191|>": 152372,
624
+ "<|d2_192|>": 152373,
625
+ "<|d2_193|>": 152374,
626
+ "<|d2_194|>": 152375,
627
+ "<|d2_195|>": 152376,
628
+ "<|d2_196|>": 152377,
629
+ "<|d2_197|>": 152378,
630
+ "<|d2_198|>": 152379,
631
+ "<|d2_199|>": 152380,
632
+ "<|d2_19|>": 152200,
633
+ "<|d2_1|>": 152182,
634
+ "<|d2_200|>": 152381,
635
+ "<|d2_201|>": 152382,
636
+ "<|d2_202|>": 152383,
637
+ "<|d2_203|>": 152384,
638
+ "<|d2_204|>": 152385,
639
+ "<|d2_205|>": 152386,
640
+ "<|d2_206|>": 152387,
641
+ "<|d2_207|>": 152388,
642
+ "<|d2_208|>": 152389,
643
+ "<|d2_209|>": 152390,
644
+ "<|d2_20|>": 152201,
645
+ "<|d2_210|>": 152391,
646
+ "<|d2_211|>": 152392,
647
+ "<|d2_212|>": 152393,
648
+ "<|d2_213|>": 152394,
649
+ "<|d2_214|>": 152395,
650
+ "<|d2_215|>": 152396,
651
+ "<|d2_216|>": 152397,
652
+ "<|d2_217|>": 152398,
653
+ "<|d2_218|>": 152399,
654
+ "<|d2_219|>": 152400,
655
+ "<|d2_21|>": 152202,
656
+ "<|d2_220|>": 152401,
657
+ "<|d2_221|>": 152402,
658
+ "<|d2_222|>": 152403,
659
+ "<|d2_223|>": 152404,
660
+ "<|d2_224|>": 152405,
661
+ "<|d2_225|>": 152406,
662
+ "<|d2_226|>": 152407,
663
+ "<|d2_227|>": 152408,
664
+ "<|d2_228|>": 152409,
665
+ "<|d2_229|>": 152410,
666
+ "<|d2_22|>": 152203,
667
+ "<|d2_230|>": 152411,
668
+ "<|d2_231|>": 152412,
669
+ "<|d2_232|>": 152413,
670
+ "<|d2_233|>": 152414,
671
+ "<|d2_234|>": 152415,
672
+ "<|d2_235|>": 152416,
673
+ "<|d2_236|>": 152417,
674
+ "<|d2_237|>": 152418,
675
+ "<|d2_238|>": 152419,
676
+ "<|d2_239|>": 152420,
677
+ "<|d2_23|>": 152204,
678
+ "<|d2_240|>": 152421,
679
+ "<|d2_241|>": 152422,
680
+ "<|d2_242|>": 152423,
681
+ "<|d2_243|>": 152424,
682
+ "<|d2_244|>": 152425,
683
+ "<|d2_245|>": 152426,
684
+ "<|d2_246|>": 152427,
685
+ "<|d2_247|>": 152428,
686
+ "<|d2_248|>": 152429,
687
+ "<|d2_249|>": 152430,
688
+ "<|d2_24|>": 152205,
689
+ "<|d2_250|>": 152431,
690
+ "<|d2_251|>": 152432,
691
+ "<|d2_252|>": 152433,
692
+ "<|d2_253|>": 152434,
693
+ "<|d2_254|>": 152435,
694
+ "<|d2_255|>": 152436,
695
+ "<|d2_25|>": 152206,
696
+ "<|d2_26|>": 152207,
697
+ "<|d2_27|>": 152208,
698
+ "<|d2_28|>": 152209,
699
+ "<|d2_29|>": 152210,
700
+ "<|d2_2|>": 152183,
701
+ "<|d2_30|>": 152211,
702
+ "<|d2_31|>": 152212,
703
+ "<|d2_32|>": 152213,
704
+ "<|d2_33|>": 152214,
705
+ "<|d2_34|>": 152215,
706
+ "<|d2_35|>": 152216,
707
+ "<|d2_36|>": 152217,
708
+ "<|d2_37|>": 152218,
709
+ "<|d2_38|>": 152219,
710
+ "<|d2_39|>": 152220,
711
+ "<|d2_3|>": 152184,
712
+ "<|d2_40|>": 152221,
713
+ "<|d2_41|>": 152222,
714
+ "<|d2_42|>": 152223,
715
+ "<|d2_43|>": 152224,
716
+ "<|d2_44|>": 152225,
717
+ "<|d2_45|>": 152226,
718
+ "<|d2_46|>": 152227,
719
+ "<|d2_47|>": 152228,
720
+ "<|d2_48|>": 152229,
721
+ "<|d2_49|>": 152230,
722
+ "<|d2_4|>": 152185,
723
+ "<|d2_50|>": 152231,
724
+ "<|d2_51|>": 152232,
725
+ "<|d2_52|>": 152233,
726
+ "<|d2_53|>": 152234,
727
+ "<|d2_54|>": 152235,
728
+ "<|d2_55|>": 152236,
729
+ "<|d2_56|>": 152237,
730
+ "<|d2_57|>": 152238,
731
+ "<|d2_58|>": 152239,
732
+ "<|d2_59|>": 152240,
733
+ "<|d2_5|>": 152186,
734
+ "<|d2_60|>": 152241,
735
+ "<|d2_61|>": 152242,
736
+ "<|d2_62|>": 152243,
737
+ "<|d2_63|>": 152244,
738
+ "<|d2_64|>": 152245,
739
+ "<|d2_65|>": 152246,
740
+ "<|d2_66|>": 152247,
741
+ "<|d2_67|>": 152248,
742
+ "<|d2_68|>": 152249,
743
+ "<|d2_69|>": 152250,
744
+ "<|d2_6|>": 152187,
745
+ "<|d2_70|>": 152251,
746
+ "<|d2_71|>": 152252,
747
+ "<|d2_72|>": 152253,
748
+ "<|d2_73|>": 152254,
749
+ "<|d2_74|>": 152255,
750
+ "<|d2_75|>": 152256,
751
+ "<|d2_76|>": 152257,
752
+ "<|d2_77|>": 152258,
753
+ "<|d2_78|>": 152259,
754
+ "<|d2_79|>": 152260,
755
+ "<|d2_7|>": 152188,
756
+ "<|d2_80|>": 152261,
757
+ "<|d2_81|>": 152262,
758
+ "<|d2_82|>": 152263,
759
+ "<|d2_83|>": 152264,
760
+ "<|d2_84|>": 152265,
761
+ "<|d2_85|>": 152266,
762
+ "<|d2_86|>": 152267,
763
+ "<|d2_87|>": 152268,
764
+ "<|d2_88|>": 152269,
765
+ "<|d2_89|>": 152270,
766
+ "<|d2_8|>": 152189,
767
+ "<|d2_90|>": 152271,
768
+ "<|d2_91|>": 152272,
769
+ "<|d2_92|>": 152273,
770
+ "<|d2_93|>": 152274,
771
+ "<|d2_94|>": 152275,
772
+ "<|d2_95|>": 152276,
773
+ "<|d2_96|>": 152277,
774
+ "<|d2_97|>": 152278,
775
+ "<|d2_98|>": 152279,
776
+ "<|d2_99|>": 152280,
777
+ "<|d2_9|>": 152190,
778
+ "<|d3_0|>": 152437,
779
+ "<|d3_100|>": 152537,
780
+ "<|d3_101|>": 152538,
781
+ "<|d3_102|>": 152539,
782
+ "<|d3_103|>": 152540,
783
+ "<|d3_104|>": 152541,
784
+ "<|d3_105|>": 152542,
785
+ "<|d3_106|>": 152543,
786
+ "<|d3_107|>": 152544,
787
+ "<|d3_108|>": 152545,
788
+ "<|d3_109|>": 152546,
789
+ "<|d3_10|>": 152447,
790
+ "<|d3_110|>": 152547,
791
+ "<|d3_111|>": 152548,
792
+ "<|d3_112|>": 152549,
793
+ "<|d3_113|>": 152550,
794
+ "<|d3_114|>": 152551,
795
+ "<|d3_115|>": 152552,
796
+ "<|d3_116|>": 152553,
797
+ "<|d3_117|>": 152554,
798
+ "<|d3_118|>": 152555,
799
+ "<|d3_119|>": 152556,
800
+ "<|d3_11|>": 152448,
801
+ "<|d3_120|>": 152557,
802
+ "<|d3_121|>": 152558,
803
+ "<|d3_122|>": 152559,
804
+ "<|d3_123|>": 152560,
805
+ "<|d3_124|>": 152561,
806
+ "<|d3_125|>": 152562,
807
+ "<|d3_126|>": 152563,
808
+ "<|d3_127|>": 152564,
809
+ "<|d3_128|>": 152565,
810
+ "<|d3_129|>": 152566,
811
+ "<|d3_12|>": 152449,
812
+ "<|d3_130|>": 152567,
813
+ "<|d3_131|>": 152568,
814
+ "<|d3_132|>": 152569,
815
+ "<|d3_133|>": 152570,
816
+ "<|d3_134|>": 152571,
817
+ "<|d3_135|>": 152572,
818
+ "<|d3_136|>": 152573,
819
+ "<|d3_137|>": 152574,
820
+ "<|d3_138|>": 152575,
821
+ "<|d3_139|>": 152576,
822
+ "<|d3_13|>": 152450,
823
+ "<|d3_140|>": 152577,
824
+ "<|d3_141|>": 152578,
825
+ "<|d3_142|>": 152579,
826
+ "<|d3_143|>": 152580,
827
+ "<|d3_144|>": 152581,
828
+ "<|d3_145|>": 152582,
829
+ "<|d3_146|>": 152583,
830
+ "<|d3_147|>": 152584,
831
+ "<|d3_148|>": 152585,
832
+ "<|d3_149|>": 152586,
833
+ "<|d3_14|>": 152451,
834
+ "<|d3_150|>": 152587,
835
+ "<|d3_151|>": 152588,
836
+ "<|d3_152|>": 152589,
837
+ "<|d3_153|>": 152590,
838
+ "<|d3_154|>": 152591,
839
+ "<|d3_155|>": 152592,
840
+ "<|d3_156|>": 152593,
841
+ "<|d3_157|>": 152594,
842
+ "<|d3_158|>": 152595,
843
+ "<|d3_159|>": 152596,
844
+ "<|d3_15|>": 152452,
845
+ "<|d3_160|>": 152597,
846
+ "<|d3_161|>": 152598,
847
+ "<|d3_162|>": 152599,
848
+ "<|d3_163|>": 152600,
849
+ "<|d3_164|>": 152601,
850
+ "<|d3_165|>": 152602,
851
+ "<|d3_166|>": 152603,
852
+ "<|d3_167|>": 152604,
853
+ "<|d3_168|>": 152605,
854
+ "<|d3_169|>": 152606,
855
+ "<|d3_16|>": 152453,
856
+ "<|d3_170|>": 152607,
857
+ "<|d3_171|>": 152608,
858
+ "<|d3_172|>": 152609,
859
+ "<|d3_173|>": 152610,
860
+ "<|d3_174|>": 152611,
861
+ "<|d3_175|>": 152612,
862
+ "<|d3_176|>": 152613,
863
+ "<|d3_177|>": 152614,
864
+ "<|d3_178|>": 152615,
865
+ "<|d3_179|>": 152616,
866
+ "<|d3_17|>": 152454,
867
+ "<|d3_180|>": 152617,
868
+ "<|d3_181|>": 152618,
869
+ "<|d3_182|>": 152619,
870
+ "<|d3_183|>": 152620,
871
+ "<|d3_184|>": 152621,
872
+ "<|d3_185|>": 152622,
873
+ "<|d3_186|>": 152623,
874
+ "<|d3_187|>": 152624,
875
+ "<|d3_188|>": 152625,
876
+ "<|d3_189|>": 152626,
877
+ "<|d3_18|>": 152455,
878
+ "<|d3_190|>": 152627,
879
+ "<|d3_191|>": 152628,
880
+ "<|d3_192|>": 152629,
881
+ "<|d3_193|>": 152630,
882
+ "<|d3_194|>": 152631,
883
+ "<|d3_195|>": 152632,
884
+ "<|d3_196|>": 152633,
885
+ "<|d3_197|>": 152634,
886
+ "<|d3_198|>": 152635,
887
+ "<|d3_199|>": 152636,
888
+ "<|d3_19|>": 152456,
889
+ "<|d3_1|>": 152438,
890
+ "<|d3_200|>": 152637,
891
+ "<|d3_201|>": 152638,
892
+ "<|d3_202|>": 152639,
893
+ "<|d3_203|>": 152640,
894
+ "<|d3_204|>": 152641,
895
+ "<|d3_205|>": 152642,
896
+ "<|d3_206|>": 152643,
897
+ "<|d3_207|>": 152644,
898
+ "<|d3_208|>": 152645,
899
+ "<|d3_209|>": 152646,
900
+ "<|d3_20|>": 152457,
901
+ "<|d3_210|>": 152647,
902
+ "<|d3_211|>": 152648,
903
+ "<|d3_212|>": 152649,
904
+ "<|d3_213|>": 152650,
905
+ "<|d3_214|>": 152651,
906
+ "<|d3_215|>": 152652,
907
+ "<|d3_216|>": 152653,
908
+ "<|d3_217|>": 152654,
909
+ "<|d3_218|>": 152655,
910
+ "<|d3_219|>": 152656,
911
+ "<|d3_21|>": 152458,
912
+ "<|d3_220|>": 152657,
913
+ "<|d3_221|>": 152658,
914
+ "<|d3_222|>": 152659,
915
+ "<|d3_223|>": 152660,
916
+ "<|d3_224|>": 152661,
917
+ "<|d3_225|>": 152662,
918
+ "<|d3_226|>": 152663,
919
+ "<|d3_227|>": 152664,
920
+ "<|d3_228|>": 152665,
921
+ "<|d3_229|>": 152666,
922
+ "<|d3_22|>": 152459,
923
+ "<|d3_230|>": 152667,
924
+ "<|d3_231|>": 152668,
925
+ "<|d3_232|>": 152669,
926
+ "<|d3_233|>": 152670,
927
+ "<|d3_234|>": 152671,
928
+ "<|d3_235|>": 152672,
929
+ "<|d3_236|>": 152673,
930
+ "<|d3_237|>": 152674,
931
+ "<|d3_238|>": 152675,
932
+ "<|d3_239|>": 152676,
933
+ "<|d3_23|>": 152460,
934
+ "<|d3_240|>": 152677,
935
+ "<|d3_241|>": 152678,
936
+ "<|d3_242|>": 152679,
937
+ "<|d3_243|>": 152680,
938
+ "<|d3_244|>": 152681,
939
+ "<|d3_245|>": 152682,
940
+ "<|d3_246|>": 152683,
941
+ "<|d3_247|>": 152684,
942
+ "<|d3_248|>": 152685,
943
+ "<|d3_249|>": 152686,
944
+ "<|d3_24|>": 152461,
945
+ "<|d3_250|>": 152687,
946
+ "<|d3_251|>": 152688,
947
+ "<|d3_252|>": 152689,
948
+ "<|d3_253|>": 152690,
949
+ "<|d3_254|>": 152691,
950
+ "<|d3_255|>": 152692,
951
+ "<|d3_25|>": 152462,
952
+ "<|d3_26|>": 152463,
953
+ "<|d3_27|>": 152464,
954
+ "<|d3_28|>": 152465,
955
+ "<|d3_29|>": 152466,
956
+ "<|d3_2|>": 152439,
957
+ "<|d3_30|>": 152467,
958
+ "<|d3_31|>": 152468,
959
+ "<|d3_32|>": 152469,
960
+ "<|d3_33|>": 152470,
961
+ "<|d3_34|>": 152471,
962
+ "<|d3_35|>": 152472,
963
+ "<|d3_36|>": 152473,
964
+ "<|d3_37|>": 152474,
965
+ "<|d3_38|>": 152475,
966
+ "<|d3_39|>": 152476,
967
+ "<|d3_3|>": 152440,
968
+ "<|d3_40|>": 152477,
969
+ "<|d3_41|>": 152478,
970
+ "<|d3_42|>": 152479,
971
+ "<|d3_43|>": 152480,
972
+ "<|d3_44|>": 152481,
973
+ "<|d3_45|>": 152482,
974
+ "<|d3_46|>": 152483,
975
+ "<|d3_47|>": 152484,
976
+ "<|d3_48|>": 152485,
977
+ "<|d3_49|>": 152486,
978
+ "<|d3_4|>": 152441,
979
+ "<|d3_50|>": 152487,
980
+ "<|d3_51|>": 152488,
981
+ "<|d3_52|>": 152489,
982
+ "<|d3_53|>": 152490,
983
+ "<|d3_54|>": 152491,
984
+ "<|d3_55|>": 152492,
985
+ "<|d3_56|>": 152493,
986
+ "<|d3_57|>": 152494,
987
+ "<|d3_58|>": 152495,
988
+ "<|d3_59|>": 152496,
989
+ "<|d3_5|>": 152442,
990
+ "<|d3_60|>": 152497,
991
+ "<|d3_61|>": 152498,
992
+ "<|d3_62|>": 152499,
993
+ "<|d3_63|>": 152500,
994
+ "<|d3_64|>": 152501,
995
+ "<|d3_65|>": 152502,
996
+ "<|d3_66|>": 152503,
997
+ "<|d3_67|>": 152504,
998
+ "<|d3_68|>": 152505,
999
+ "<|d3_69|>": 152506,
1000
+ "<|d3_6|>": 152443,
1001
+ "<|d3_70|>": 152507,
1002
+ "<|d3_71|>": 152508,
1003
+ "<|d3_72|>": 152509,
1004
+ "<|d3_73|>": 152510,
1005
+ "<|d3_74|>": 152511,
1006
+ "<|d3_75|>": 152512,
1007
+ "<|d3_76|>": 152513,
1008
+ "<|d3_77|>": 152514,
1009
+ "<|d3_78|>": 152515,
1010
+ "<|d3_79|>": 152516,
1011
+ "<|d3_7|>": 152444,
1012
+ "<|d3_80|>": 152517,
1013
+ "<|d3_81|>": 152518,
1014
+ "<|d3_82|>": 152519,
1015
+ "<|d3_83|>": 152520,
1016
+ "<|d3_84|>": 152521,
1017
+ "<|d3_85|>": 152522,
1018
+ "<|d3_86|>": 152523,
1019
+ "<|d3_87|>": 152524,
1020
+ "<|d3_88|>": 152525,
1021
+ "<|d3_89|>": 152526,
1022
+ "<|d3_8|>": 152445,
1023
+ "<|d3_90|>": 152527,
1024
+ "<|d3_91|>": 152528,
1025
+ "<|d3_92|>": 152529,
1026
+ "<|d3_93|>": 152530,
1027
+ "<|d3_94|>": 152531,
1028
+ "<|d3_95|>": 152532,
1029
+ "<|d3_96|>": 152533,
1030
+ "<|d3_97|>": 152534,
1031
+ "<|d3_98|>": 152535,
1032
+ "<|d3_99|>": 152536,
1033
+ "<|d3_9|>": 152446,
1034
+ "<|endoftext|>": 151643,
1035
+ "<|file_sep|>": 151664,
1036
+ "<|fim_middle|>": 151660,
1037
+ "<|fim_pad|>": 151662,
1038
+ "<|fim_prefix|>": 151659,
1039
+ "<|fim_suffix|>": 151661,
1040
+ "<|im_end|>": 151645,
1041
+ "<|im_start|>": 151644,
1042
+ "<|image_pad|>": 151655,
1043
+ "<|object_ref_end|>": 151647,
1044
+ "<|object_ref_start|>": 151646,
1045
+ "<|quad_end|>": 151651,
1046
+ "<|quad_start|>": 151650,
1047
+ "<|repo_name|>": 151663,
1048
+ "<|video_pad|>": 151656,
1049
+ "<|vision_end|>": 151653,
1050
+ "<|vision_pad|>": 151654,
1051
+ "<|vision_start|>": 151652
1052
+ }
chat_template.jinja ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0].role == 'system' %}
4
+ {{- messages[0].content + '\n\n' }}
5
+ {%- endif %}
6
+ {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
7
+ {%- for tool in tools %}
8
+ {{- "\n" }}
9
+ {{- tool | tojson }}
10
+ {%- endfor %}
11
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
12
+ {%- else %}
13
+ {%- if messages[0].role == 'system' %}
14
+ {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
15
+ {%- endif %}
16
+ {%- endif %}
17
+ {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
18
+ {%- for message in messages[::-1] %}
19
+ {%- set index = (messages|length - 1) - loop.index0 %}
20
+ {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
21
+ {%- set ns.multi_step_tool = false %}
22
+ {%- set ns.last_query_index = index %}
23
+ {%- endif %}
24
+ {%- endfor %}
25
+ {%- for message in messages %}
26
+ {%- if message.content is string %}
27
+ {%- set content = message.content %}
28
+ {%- else %}
29
+ {%- set content = '' %}
30
+ {%- endif %}
31
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
32
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
33
+ {%- elif message.role == "assistant" %}
34
+ {%- set reasoning_content = '' %}
35
+ {%- if message.reasoning_content is string %}
36
+ {%- set reasoning_content = message.reasoning_content %}
37
+ {%- else %}
38
+ {%- if '</think>' in content %}
39
+ {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
40
+ {%- set content = content.split('</think>')[-1].lstrip('\n') %}
41
+ {%- endif %}
42
+ {%- endif %}
43
+ {%- if loop.index0 > ns.last_query_index %}
44
+ {%- if loop.last or (not loop.last and reasoning_content) %}
45
+ {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
46
+ {%- else %}
47
+ {{- '<|im_start|>' + message.role + '\n' + content }}
48
+ {%- endif %}
49
+ {%- else %}
50
+ {{- '<|im_start|>' + message.role + '\n' + content }}
51
+ {%- endif %}
52
+ {%- if message.tool_calls %}
53
+ {%- for tool_call in message.tool_calls %}
54
+ {%- if (loop.first and content) or (not loop.first) %}
55
+ {{- '\n' }}
56
+ {%- endif %}
57
+ {%- if tool_call.function %}
58
+ {%- set tool_call = tool_call.function %}
59
+ {%- endif %}
60
+ {{- '<tool_call>\n{"name": "' }}
61
+ {{- tool_call.name }}
62
+ {{- '", "arguments": ' }}
63
+ {%- if tool_call.arguments is string %}
64
+ {{- tool_call.arguments }}
65
+ {%- else %}
66
+ {{- tool_call.arguments | tojson }}
67
+ {%- endif %}
68
+ {{- '}\n</tool_call>' }}
69
+ {%- endfor %}
70
+ {%- endif %}
71
+ {{- '<|im_end|>\n' }}
72
+ {%- elif message.role == "tool" %}
73
+ {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
74
+ {{- '<|im_start|>user' }}
75
+ {%- endif %}
76
+ {{- '\n<tool_response>\n' }}
77
+ {{- content }}
78
+ {{- '\n</tool_response>' }}
79
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
80
+ {{- '<|im_end|>\n' }}
81
+ {%- endif %}
82
+ {%- endif %}
83
+ {%- endfor %}
84
+ {%- if add_generation_prompt %}
85
+ {{- '<|im_start|>assistant\n' }}
86
+ {%- if enable_thinking is defined and enable_thinking is false %}
87
+ {{- '<think>\n\n</think>\n\n' }}
88
+ {%- endif %}
89
+ {%- endif %}
config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "dtype": "float32",
8
+ "eos_token_id": 151645,
9
+ "head_dim": 128,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 1024,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_types": [
15
+ "full_attention",
16
+ "full_attention",
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention"
43
+ ],
44
+ "max_position_embeddings": 40960,
45
+ "max_window_layers": 28,
46
+ "model_type": "qwen3",
47
+ "num_attention_heads": 16,
48
+ "num_hidden_layers": 28,
49
+ "num_key_value_heads": 8,
50
+ "rms_norm_eps": 1e-06,
51
+ "rope_scaling": null,
52
+ "rope_theta": 1000000,
53
+ "sliding_window": null,
54
+ "tie_word_embeddings": true,
55
+ "transformers_version": "4.56.1",
56
+ "use_cache": false,
57
+ "use_sliding_window": false,
58
+ "vocab_size": 152693
59
+ }
generation_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "pad_token_id": 151643,
9
+ "temperature": 0.6,
10
+ "top_k": 20,
11
+ "top_p": 0.95,
12
+ "transformers_version": "4.56.1"
13
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cd3bd3bffacd3cf25ee3318b3badb44ccfde5e2f963f6ca2a5acee9e7532390
3
+ size 1506531744
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7d3f1d26dcfdbc174b6e331fb6f098f228415dd39d53cbaf637312abae9a09e
3
+ size 3013253306
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d6dbaea400350134c87609f2a6e7d568a19190ab54bccf06fc0c89f270eb6d2
3
+ size 14244
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1033b16d48bfd019e213c24993bc27ceb3ea27083616ea8cc3a7a6be4da61de2
3
+ size 1064
special_tokens_map.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6a6445ce802be05913d6a5f9c3b20a81a5e0a0e9eed6d2500290208e77870f5
3
+ size 11613702
tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff
 
trainer_state.json ADDED
@@ -0,0 +1,1800 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 99.37735849056604,
6
+ "eval_steps": 500,
7
+ "global_step": 7950,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0,
14
+ "eval_loss": 12.89406681060791,
15
+ "eval_runtime": 4.5915,
16
+ "eval_samples_per_second": 44.865,
17
+ "eval_steps_per_second": 11.325,
18
+ "memory/device_mem_reserved(gib)": 7.41,
19
+ "memory/max_mem_active(gib)": 7.36,
20
+ "memory/max_mem_allocated(gib)": 7.36,
21
+ "step": 0
22
+ },
23
+ {
24
+ "epoch": 0.6289308176100629,
25
+ "grad_norm": 45.51484298706055,
26
+ "learning_rate": 6.163522012578617e-06,
27
+ "loss": 10.6397,
28
+ "memory/device_mem_reserved(gib)": 22.98,
29
+ "memory/max_mem_active(gib)": 21.19,
30
+ "memory/max_mem_allocated(gib)": 21.19,
31
+ "step": 50
32
+ },
33
+ {
34
+ "epoch": 1.251572327044025,
35
+ "grad_norm": 12.857139587402344,
36
+ "learning_rate": 1.2452830188679246e-05,
37
+ "loss": 4.0542,
38
+ "memory/device_mem_reserved(gib)": 22.98,
39
+ "memory/max_mem_active(gib)": 21.19,
40
+ "memory/max_mem_allocated(gib)": 21.19,
41
+ "step": 100
42
+ },
43
+ {
44
+ "epoch": 1.880503144654088,
45
+ "grad_norm": 24.607196807861328,
46
+ "learning_rate": 1.8742138364779874e-05,
47
+ "loss": 2.7236,
48
+ "memory/device_mem_reserved(gib)": 22.98,
49
+ "memory/max_mem_active(gib)": 21.19,
50
+ "memory/max_mem_allocated(gib)": 21.19,
51
+ "step": 150
52
+ },
53
+ {
54
+ "epoch": 2.50314465408805,
55
+ "grad_norm": 20.296478271484375,
56
+ "learning_rate": 2.5031446540880503e-05,
57
+ "loss": 1.9898,
58
+ "memory/device_mem_reserved(gib)": 22.98,
59
+ "memory/max_mem_active(gib)": 21.19,
60
+ "memory/max_mem_allocated(gib)": 21.19,
61
+ "step": 200
62
+ },
63
+ {
64
+ "epoch": 3.1257861635220126,
65
+ "grad_norm": 59.58812713623047,
66
+ "learning_rate": 3.132075471698113e-05,
67
+ "loss": 1.6871,
68
+ "memory/device_mem_reserved(gib)": 22.98,
69
+ "memory/max_mem_active(gib)": 21.19,
70
+ "memory/max_mem_allocated(gib)": 21.19,
71
+ "step": 250
72
+ },
73
+ {
74
+ "epoch": 3.7547169811320753,
75
+ "grad_norm": 19.767593383789062,
76
+ "learning_rate": 3.761006289308177e-05,
77
+ "loss": 1.5739,
78
+ "memory/device_mem_reserved(gib)": 22.98,
79
+ "memory/max_mem_active(gib)": 21.19,
80
+ "memory/max_mem_allocated(gib)": 21.19,
81
+ "step": 300
82
+ },
83
+ {
84
+ "epoch": 4.377358490566038,
85
+ "grad_norm": 16.475500106811523,
86
+ "learning_rate": 4.3899371069182394e-05,
87
+ "loss": 1.4301,
88
+ "memory/device_mem_reserved(gib)": 22.98,
89
+ "memory/max_mem_active(gib)": 21.19,
90
+ "memory/max_mem_allocated(gib)": 21.19,
91
+ "step": 350
92
+ },
93
+ {
94
+ "epoch": 5.0,
95
+ "grad_norm": 16.650306701660156,
96
+ "learning_rate": 5.018867924528302e-05,
97
+ "loss": 1.2817,
98
+ "memory/device_mem_reserved(gib)": 22.98,
99
+ "memory/max_mem_active(gib)": 21.19,
100
+ "memory/max_mem_allocated(gib)": 21.19,
101
+ "step": 400
102
+ },
103
+ {
104
+ "epoch": 5.628930817610063,
105
+ "grad_norm": 10.766618728637695,
106
+ "learning_rate": 5.6477987421383646e-05,
107
+ "loss": 1.0323,
108
+ "memory/device_mem_reserved(gib)": 22.98,
109
+ "memory/max_mem_active(gib)": 21.19,
110
+ "memory/max_mem_allocated(gib)": 21.19,
111
+ "step": 450
112
+ },
113
+ {
114
+ "epoch": 6.251572327044025,
115
+ "grad_norm": 11.595151901245117,
116
+ "learning_rate": 6.276729559748428e-05,
117
+ "loss": 0.8638,
118
+ "memory/device_mem_reserved(gib)": 22.98,
119
+ "memory/max_mem_active(gib)": 21.19,
120
+ "memory/max_mem_allocated(gib)": 21.19,
121
+ "step": 500
122
+ },
123
+ {
124
+ "epoch": 6.251572327044025,
125
+ "eval_loss": 0.9813075661659241,
126
+ "eval_runtime": 3.2032,
127
+ "eval_samples_per_second": 64.31,
128
+ "eval_steps_per_second": 16.234,
129
+ "memory/device_mem_reserved(gib)": 22.98,
130
+ "memory/max_mem_active(gib)": 21.19,
131
+ "memory/max_mem_allocated(gib)": 21.19,
132
+ "step": 500
133
+ },
134
+ {
135
+ "epoch": 6.880503144654088,
136
+ "grad_norm": 11.030454635620117,
137
+ "learning_rate": 6.90566037735849e-05,
138
+ "loss": 0.6994,
139
+ "memory/device_mem_reserved(gib)": 22.98,
140
+ "memory/max_mem_active(gib)": 21.19,
141
+ "memory/max_mem_allocated(gib)": 21.19,
142
+ "step": 550
143
+ },
144
+ {
145
+ "epoch": 7.50314465408805,
146
+ "grad_norm": 9.649336814880371,
147
+ "learning_rate": 7.534591194968554e-05,
148
+ "loss": 0.5307,
149
+ "memory/device_mem_reserved(gib)": 22.98,
150
+ "memory/max_mem_active(gib)": 21.19,
151
+ "memory/max_mem_allocated(gib)": 21.19,
152
+ "step": 600
153
+ },
154
+ {
155
+ "epoch": 8.125786163522013,
156
+ "grad_norm": 6.349382400512695,
157
+ "learning_rate": 8.163522012578617e-05,
158
+ "loss": 0.4517,
159
+ "memory/device_mem_reserved(gib)": 22.98,
160
+ "memory/max_mem_active(gib)": 21.19,
161
+ "memory/max_mem_allocated(gib)": 21.19,
162
+ "step": 650
163
+ },
164
+ {
165
+ "epoch": 8.754716981132075,
166
+ "grad_norm": 7.1547746658325195,
167
+ "learning_rate": 8.79245283018868e-05,
168
+ "loss": 0.3415,
169
+ "memory/device_mem_reserved(gib)": 22.98,
170
+ "memory/max_mem_active(gib)": 21.19,
171
+ "memory/max_mem_allocated(gib)": 21.19,
172
+ "step": 700
173
+ },
174
+ {
175
+ "epoch": 9.377358490566039,
176
+ "grad_norm": 5.784008979797363,
177
+ "learning_rate": 9.421383647798742e-05,
178
+ "loss": 0.2882,
179
+ "memory/device_mem_reserved(gib)": 22.98,
180
+ "memory/max_mem_active(gib)": 21.19,
181
+ "memory/max_mem_allocated(gib)": 21.19,
182
+ "step": 750
183
+ },
184
+ {
185
+ "epoch": 10.0,
186
+ "grad_norm": 6.659787178039551,
187
+ "learning_rate": 9.999992288473615e-05,
188
+ "loss": 0.2553,
189
+ "memory/device_mem_reserved(gib)": 22.98,
190
+ "memory/max_mem_active(gib)": 21.19,
191
+ "memory/max_mem_allocated(gib)": 21.19,
192
+ "step": 800
193
+ },
194
+ {
195
+ "epoch": 10.628930817610064,
196
+ "grad_norm": 4.395882606506348,
197
+ "learning_rate": 9.998594639794501e-05,
198
+ "loss": 0.1754,
199
+ "memory/device_mem_reserved(gib)": 22.98,
200
+ "memory/max_mem_active(gib)": 21.19,
201
+ "memory/max_mem_allocated(gib)": 21.19,
202
+ "step": 850
203
+ },
204
+ {
205
+ "epoch": 11.251572327044025,
206
+ "grad_norm": 4.044342517852783,
207
+ "learning_rate": 9.994787912603747e-05,
208
+ "loss": 0.158,
209
+ "memory/device_mem_reserved(gib)": 22.98,
210
+ "memory/max_mem_active(gib)": 21.19,
211
+ "memory/max_mem_allocated(gib)": 21.19,
212
+ "step": 900
213
+ },
214
+ {
215
+ "epoch": 11.880503144654089,
216
+ "grad_norm": 3.8275651931762695,
217
+ "learning_rate": 9.988573941557954e-05,
218
+ "loss": 0.1206,
219
+ "memory/device_mem_reserved(gib)": 22.98,
220
+ "memory/max_mem_active(gib)": 21.19,
221
+ "memory/max_mem_allocated(gib)": 21.19,
222
+ "step": 950
223
+ },
224
+ {
225
+ "epoch": 12.50314465408805,
226
+ "grad_norm": 3.210361957550049,
227
+ "learning_rate": 9.979955721487716e-05,
228
+ "loss": 0.0884,
229
+ "memory/device_mem_reserved(gib)": 22.98,
230
+ "memory/max_mem_active(gib)": 21.19,
231
+ "memory/max_mem_allocated(gib)": 21.19,
232
+ "step": 1000
233
+ },
234
+ {
235
+ "epoch": 12.50314465408805,
236
+ "eval_loss": 0.4656066298484802,
237
+ "eval_runtime": 3.193,
238
+ "eval_samples_per_second": 64.516,
239
+ "eval_steps_per_second": 16.286,
240
+ "memory/device_mem_reserved(gib)": 22.98,
241
+ "memory/max_mem_active(gib)": 21.19,
242
+ "memory/max_mem_allocated(gib)": 21.19,
243
+ "step": 1000
244
+ },
245
+ {
246
+ "epoch": 13.125786163522013,
247
+ "grad_norm": 3.0777196884155273,
248
+ "learning_rate": 9.968937405954233e-05,
249
+ "loss": 0.0776,
250
+ "memory/device_mem_reserved(gib)": 22.98,
251
+ "memory/max_mem_active(gib)": 21.19,
252
+ "memory/max_mem_allocated(gib)": 21.19,
253
+ "step": 1050
254
+ },
255
+ {
256
+ "epoch": 13.754716981132075,
257
+ "grad_norm": 2.625596761703491,
258
+ "learning_rate": 9.955524305247519e-05,
259
+ "loss": 0.0561,
260
+ "memory/device_mem_reserved(gib)": 22.98,
261
+ "memory/max_mem_active(gib)": 21.19,
262
+ "memory/max_mem_allocated(gib)": 21.19,
263
+ "step": 1100
264
+ },
265
+ {
266
+ "epoch": 14.377358490566039,
267
+ "grad_norm": 2.118314743041992,
268
+ "learning_rate": 9.93972288382709e-05,
269
+ "loss": 0.0505,
270
+ "memory/device_mem_reserved(gib)": 22.98,
271
+ "memory/max_mem_active(gib)": 21.19,
272
+ "memory/max_mem_allocated(gib)": 21.19,
273
+ "step": 1150
274
+ },
275
+ {
276
+ "epoch": 15.0,
277
+ "grad_norm": 2.850794553756714,
278
+ "learning_rate": 9.921540757206413e-05,
279
+ "loss": 0.0474,
280
+ "memory/device_mem_reserved(gib)": 22.98,
281
+ "memory/max_mem_active(gib)": 21.19,
282
+ "memory/max_mem_allocated(gib)": 21.19,
283
+ "step": 1200
284
+ },
285
+ {
286
+ "epoch": 15.628930817610064,
287
+ "grad_norm": 1.7832540273666382,
288
+ "learning_rate": 9.900986688282601e-05,
289
+ "loss": 0.0369,
290
+ "memory/device_mem_reserved(gib)": 22.98,
291
+ "memory/max_mem_active(gib)": 21.19,
292
+ "memory/max_mem_allocated(gib)": 21.19,
293
+ "step": 1250
294
+ },
295
+ {
296
+ "epoch": 16.251572327044027,
297
+ "grad_norm": 2.3697540760040283,
298
+ "learning_rate": 9.878070583113123e-05,
299
+ "loss": 0.0361,
300
+ "memory/device_mem_reserved(gib)": 22.98,
301
+ "memory/max_mem_active(gib)": 21.19,
302
+ "memory/max_mem_allocated(gib)": 21.19,
303
+ "step": 1300
304
+ },
305
+ {
306
+ "epoch": 16.88050314465409,
307
+ "grad_norm": 1.7535895109176636,
308
+ "learning_rate": 9.85280348614157e-05,
309
+ "loss": 0.0306,
310
+ "memory/device_mem_reserved(gib)": 22.98,
311
+ "memory/max_mem_active(gib)": 21.19,
312
+ "memory/max_mem_allocated(gib)": 21.19,
313
+ "step": 1350
314
+ },
315
+ {
316
+ "epoch": 17.50314465408805,
317
+ "grad_norm": 1.4870631694793701,
318
+ "learning_rate": 9.825197574874774e-05,
319
+ "loss": 0.0255,
320
+ "memory/device_mem_reserved(gib)": 22.98,
321
+ "memory/max_mem_active(gib)": 21.19,
322
+ "memory/max_mem_allocated(gib)": 21.19,
323
+ "step": 1400
324
+ },
325
+ {
326
+ "epoch": 18.12578616352201,
327
+ "grad_norm": 1.5221953392028809,
328
+ "learning_rate": 9.79526615401384e-05,
329
+ "loss": 0.0249,
330
+ "memory/device_mem_reserved(gib)": 22.98,
331
+ "memory/max_mem_active(gib)": 21.19,
332
+ "memory/max_mem_allocated(gib)": 21.19,
333
+ "step": 1450
334
+ },
335
+ {
336
+ "epoch": 18.754716981132077,
337
+ "grad_norm": 1.6464166641235352,
338
+ "learning_rate": 9.76302364904193e-05,
339
+ "loss": 0.0239,
340
+ "memory/device_mem_reserved(gib)": 22.98,
341
+ "memory/max_mem_active(gib)": 21.19,
342
+ "memory/max_mem_allocated(gib)": 21.19,
343
+ "step": 1500
344
+ },
345
+ {
346
+ "epoch": 18.754716981132077,
347
+ "eval_loss": 0.4606185555458069,
348
+ "eval_runtime": 3.1934,
349
+ "eval_samples_per_second": 64.508,
350
+ "eval_steps_per_second": 16.284,
351
+ "memory/device_mem_reserved(gib)": 22.98,
352
+ "memory/max_mem_active(gib)": 21.19,
353
+ "memory/max_mem_allocated(gib)": 21.19,
354
+ "step": 1500
355
+ },
356
+ {
357
+ "epoch": 19.37735849056604,
358
+ "grad_norm": 0.9110147953033447,
359
+ "learning_rate": 9.728485599271889e-05,
360
+ "loss": 0.0193,
361
+ "memory/device_mem_reserved(gib)": 22.98,
362
+ "memory/max_mem_active(gib)": 21.19,
363
+ "memory/max_mem_allocated(gib)": 21.19,
364
+ "step": 1550
365
+ },
366
+ {
367
+ "epoch": 20.0,
368
+ "grad_norm": 1.9765591621398926,
369
+ "learning_rate": 9.691668650357051e-05,
370
+ "loss": 0.0181,
371
+ "memory/device_mem_reserved(gib)": 22.98,
372
+ "memory/max_mem_active(gib)": 21.19,
373
+ "memory/max_mem_allocated(gib)": 21.19,
374
+ "step": 1600
375
+ },
376
+ {
377
+ "epoch": 20.628930817610062,
378
+ "grad_norm": 0.8914468288421631,
379
+ "learning_rate": 9.65259054626885e-05,
380
+ "loss": 0.0137,
381
+ "memory/device_mem_reserved(gib)": 22.98,
382
+ "memory/max_mem_active(gib)": 21.19,
383
+ "memory/max_mem_allocated(gib)": 21.19,
384
+ "step": 1650
385
+ },
386
+ {
387
+ "epoch": 21.251572327044027,
388
+ "grad_norm": 0.8400326371192932,
389
+ "learning_rate": 9.61127012074509e-05,
390
+ "loss": 0.0141,
391
+ "memory/device_mem_reserved(gib)": 22.98,
392
+ "memory/max_mem_active(gib)": 21.19,
393
+ "memory/max_mem_allocated(gib)": 21.19,
394
+ "step": 1700
395
+ },
396
+ {
397
+ "epoch": 21.88050314465409,
398
+ "grad_norm": 1.2185256481170654,
399
+ "learning_rate": 9.567727288213005e-05,
400
+ "loss": 0.0132,
401
+ "memory/device_mem_reserved(gib)": 22.98,
402
+ "memory/max_mem_active(gib)": 21.19,
403
+ "memory/max_mem_allocated(gib)": 21.19,
404
+ "step": 1750
405
+ },
406
+ {
407
+ "epoch": 22.50314465408805,
408
+ "grad_norm": 1.3600502014160156,
409
+ "learning_rate": 9.521983034191473e-05,
410
+ "loss": 0.0112,
411
+ "memory/device_mem_reserved(gib)": 22.98,
412
+ "memory/max_mem_active(gib)": 21.19,
413
+ "memory/max_mem_allocated(gib)": 21.19,
414
+ "step": 1800
415
+ },
416
+ {
417
+ "epoch": 23.12578616352201,
418
+ "grad_norm": 0.7943138480186462,
419
+ "learning_rate": 9.474059405177029e-05,
420
+ "loss": 0.0094,
421
+ "memory/device_mem_reserved(gib)": 22.98,
422
+ "memory/max_mem_active(gib)": 21.19,
423
+ "memory/max_mem_allocated(gib)": 21.19,
424
+ "step": 1850
425
+ },
426
+ {
427
+ "epoch": 23.754716981132077,
428
+ "grad_norm": 0.9606059789657593,
429
+ "learning_rate": 9.423979498018521e-05,
430
+ "loss": 0.0067,
431
+ "memory/device_mem_reserved(gib)": 22.98,
432
+ "memory/max_mem_active(gib)": 21.19,
433
+ "memory/max_mem_allocated(gib)": 21.19,
434
+ "step": 1900
435
+ },
436
+ {
437
+ "epoch": 24.37735849056604,
438
+ "grad_norm": 1.3146306276321411,
439
+ "learning_rate": 9.371767448785566e-05,
440
+ "loss": 0.0059,
441
+ "memory/device_mem_reserved(gib)": 22.98,
442
+ "memory/max_mem_active(gib)": 21.19,
443
+ "memory/max_mem_allocated(gib)": 21.19,
444
+ "step": 1950
445
+ },
446
+ {
447
+ "epoch": 25.0,
448
+ "grad_norm": 0.18922117352485657,
449
+ "learning_rate": 9.317448421136134e-05,
450
+ "loss": 0.0034,
451
+ "memory/device_mem_reserved(gib)": 22.98,
452
+ "memory/max_mem_active(gib)": 21.19,
453
+ "memory/max_mem_allocated(gib)": 21.19,
454
+ "step": 2000
455
+ },
456
+ {
457
+ "epoch": 25.0,
458
+ "eval_loss": 0.4182418882846832,
459
+ "eval_runtime": 3.2062,
460
+ "eval_samples_per_second": 64.251,
461
+ "eval_steps_per_second": 16.219,
462
+ "memory/device_mem_reserved(gib)": 22.98,
463
+ "memory/max_mem_active(gib)": 21.19,
464
+ "memory/max_mem_allocated(gib)": 21.19,
465
+ "step": 2000
466
+ },
467
+ {
468
+ "epoch": 25.628930817610062,
469
+ "grad_norm": 0.5072735548019409,
470
+ "learning_rate": 9.261048594188899e-05,
471
+ "loss": 0.0015,
472
+ "memory/device_mem_reserved(gib)": 22.98,
473
+ "memory/max_mem_active(gib)": 21.19,
474
+ "memory/max_mem_allocated(gib)": 21.19,
475
+ "step": 2050
476
+ },
477
+ {
478
+ "epoch": 26.251572327044027,
479
+ "grad_norm": 0.014916314743459225,
480
+ "learning_rate": 9.202595149906186e-05,
481
+ "loss": 0.0009,
482
+ "memory/device_mem_reserved(gib)": 22.98,
483
+ "memory/max_mem_active(gib)": 21.19,
484
+ "memory/max_mem_allocated(gib)": 21.19,
485
+ "step": 2100
486
+ },
487
+ {
488
+ "epoch": 26.88050314465409,
489
+ "grad_norm": 0.01302888710051775,
490
+ "learning_rate": 9.142116259993581e-05,
491
+ "loss": 0.0005,
492
+ "memory/device_mem_reserved(gib)": 22.98,
493
+ "memory/max_mem_active(gib)": 21.19,
494
+ "memory/max_mem_allocated(gib)": 21.19,
495
+ "step": 2150
496
+ },
497
+ {
498
+ "epoch": 27.50314465408805,
499
+ "grad_norm": 0.027999432757496834,
500
+ "learning_rate": 9.079641072322556e-05,
501
+ "loss": 0.0004,
502
+ "memory/device_mem_reserved(gib)": 22.98,
503
+ "memory/max_mem_active(gib)": 21.19,
504
+ "memory/max_mem_allocated(gib)": 21.19,
505
+ "step": 2200
506
+ },
507
+ {
508
+ "epoch": 28.12578616352201,
509
+ "grad_norm": 0.09736159443855286,
510
+ "learning_rate": 9.015199696882614e-05,
511
+ "loss": 0.0005,
512
+ "memory/device_mem_reserved(gib)": 22.98,
513
+ "memory/max_mem_active(gib)": 21.19,
514
+ "memory/max_mem_allocated(gib)": 21.19,
515
+ "step": 2250
516
+ },
517
+ {
518
+ "epoch": 28.754716981132077,
519
+ "grad_norm": 0.0072149247862398624,
520
+ "learning_rate": 8.948823191269741e-05,
521
+ "loss": 0.0004,
522
+ "memory/device_mem_reserved(gib)": 22.98,
523
+ "memory/max_mem_active(gib)": 21.19,
524
+ "memory/max_mem_allocated(gib)": 21.19,
525
+ "step": 2300
526
+ },
527
+ {
528
+ "epoch": 29.37735849056604,
529
+ "grad_norm": 0.006900622975081205,
530
+ "learning_rate": 8.880543545718172e-05,
531
+ "loss": 0.0003,
532
+ "memory/device_mem_reserved(gib)": 22.98,
533
+ "memory/max_mem_active(gib)": 21.19,
534
+ "memory/max_mem_allocated(gib)": 21.19,
535
+ "step": 2350
536
+ },
537
+ {
538
+ "epoch": 30.0,
539
+ "grad_norm": 0.23880253732204437,
540
+ "learning_rate": 8.810393667682645e-05,
541
+ "loss": 0.0004,
542
+ "memory/device_mem_reserved(gib)": 22.98,
543
+ "memory/max_mem_active(gib)": 21.19,
544
+ "memory/max_mem_allocated(gib)": 21.19,
545
+ "step": 2400
546
+ },
547
+ {
548
+ "epoch": 30.628930817610062,
549
+ "grad_norm": 0.00512400409206748,
550
+ "learning_rate": 8.738407365978627e-05,
551
+ "loss": 0.0004,
552
+ "memory/device_mem_reserved(gib)": 22.98,
553
+ "memory/max_mem_active(gib)": 21.19,
554
+ "memory/max_mem_allocated(gib)": 21.19,
555
+ "step": 2450
556
+ },
557
+ {
558
+ "epoch": 31.251572327044027,
559
+ "grad_norm": 0.005065740551799536,
560
+ "learning_rate": 8.664619334488112e-05,
561
+ "loss": 0.0003,
562
+ "memory/device_mem_reserved(gib)": 22.98,
563
+ "memory/max_mem_active(gib)": 21.19,
564
+ "memory/max_mem_allocated(gib)": 21.19,
565
+ "step": 2500
566
+ },
567
+ {
568
+ "epoch": 31.251572327044027,
569
+ "eval_loss": 0.4146847128868103,
570
+ "eval_runtime": 3.199,
571
+ "eval_samples_per_second": 64.396,
572
+ "eval_steps_per_second": 16.255,
573
+ "memory/device_mem_reserved(gib)": 22.98,
574
+ "memory/max_mem_active(gib)": 21.19,
575
+ "memory/max_mem_allocated(gib)": 21.19,
576
+ "step": 2500
577
+ },
578
+ {
579
+ "epoch": 31.88050314465409,
580
+ "grad_norm": 0.005813656374812126,
581
+ "learning_rate": 8.589065135438866e-05,
582
+ "loss": 0.0004,
583
+ "memory/device_mem_reserved(gib)": 22.98,
584
+ "memory/max_mem_active(gib)": 21.19,
585
+ "memory/max_mem_allocated(gib)": 21.19,
586
+ "step": 2550
587
+ },
588
+ {
589
+ "epoch": 32.503144654088054,
590
+ "grad_norm": 0.004174518398940563,
591
+ "learning_rate": 8.511781182265162e-05,
592
+ "loss": 0.0003,
593
+ "memory/device_mem_reserved(gib)": 22.98,
594
+ "memory/max_mem_active(gib)": 21.19,
595
+ "memory/max_mem_allocated(gib)": 21.19,
596
+ "step": 2600
597
+ },
598
+ {
599
+ "epoch": 33.125786163522015,
600
+ "grad_norm": 0.005025345832109451,
601
+ "learning_rate": 8.432804722058296e-05,
602
+ "loss": 0.0003,
603
+ "memory/device_mem_reserved(gib)": 22.98,
604
+ "memory/max_mem_active(gib)": 21.19,
605
+ "memory/max_mem_allocated(gib)": 21.19,
606
+ "step": 2650
607
+ },
608
+ {
609
+ "epoch": 33.75471698113208,
610
+ "grad_norm": 0.003451400436460972,
611
+ "learning_rate": 8.352173817615298e-05,
612
+ "loss": 0.0003,
613
+ "memory/device_mem_reserved(gib)": 22.98,
614
+ "memory/max_mem_active(gib)": 21.19,
615
+ "memory/max_mem_allocated(gib)": 21.19,
616
+ "step": 2700
617
+ },
618
+ {
619
+ "epoch": 34.37735849056604,
620
+ "grad_norm": 0.005046437028795481,
621
+ "learning_rate": 8.269927329094524e-05,
622
+ "loss": 0.0003,
623
+ "memory/device_mem_reserved(gib)": 22.98,
624
+ "memory/max_mem_active(gib)": 21.19,
625
+ "memory/max_mem_allocated(gib)": 21.19,
626
+ "step": 2750
627
+ },
628
+ {
629
+ "epoch": 35.0,
630
+ "grad_norm": 0.007225444074720144,
631
+ "learning_rate": 8.186104895286962e-05,
632
+ "loss": 0.0003,
633
+ "memory/device_mem_reserved(gib)": 22.98,
634
+ "memory/max_mem_active(gib)": 21.19,
635
+ "memory/max_mem_allocated(gib)": 21.19,
636
+ "step": 2800
637
+ },
638
+ {
639
+ "epoch": 35.62893081761006,
640
+ "grad_norm": 0.06628947705030441,
641
+ "learning_rate": 8.100746914512264e-05,
642
+ "loss": 0.0003,
643
+ "memory/device_mem_reserved(gib)": 22.98,
644
+ "memory/max_mem_active(gib)": 21.19,
645
+ "memory/max_mem_allocated(gib)": 21.19,
646
+ "step": 2850
647
+ },
648
+ {
649
+ "epoch": 36.25157232704402,
650
+ "grad_norm": 0.035715293139219284,
651
+ "learning_rate": 8.013894525148739e-05,
652
+ "loss": 0.0003,
653
+ "memory/device_mem_reserved(gib)": 22.98,
654
+ "memory/max_mem_active(gib)": 21.19,
655
+ "memory/max_mem_allocated(gib)": 21.19,
656
+ "step": 2900
657
+ },
658
+ {
659
+ "epoch": 36.880503144654085,
660
+ "grad_norm": 0.09320684522390366,
661
+ "learning_rate": 7.925589585806647e-05,
662
+ "loss": 0.0003,
663
+ "memory/device_mem_reserved(gib)": 22.98,
664
+ "memory/max_mem_active(gib)": 21.19,
665
+ "memory/max_mem_allocated(gib)": 21.19,
666
+ "step": 2950
667
+ },
668
+ {
669
+ "epoch": 37.503144654088054,
670
+ "grad_norm": 0.0035134428180754185,
671
+ "learning_rate": 7.835874655154409e-05,
672
+ "loss": 0.0003,
673
+ "memory/device_mem_reserved(gib)": 22.98,
674
+ "memory/max_mem_active(gib)": 21.19,
675
+ "memory/max_mem_allocated(gib)": 21.19,
676
+ "step": 3000
677
+ },
678
+ {
679
+ "epoch": 37.503144654088054,
680
+ "eval_loss": 0.4172806143760681,
681
+ "eval_runtime": 3.205,
682
+ "eval_samples_per_second": 64.274,
683
+ "eval_steps_per_second": 16.224,
684
+ "memory/device_mem_reserved(gib)": 22.98,
685
+ "memory/max_mem_active(gib)": 21.19,
686
+ "memory/max_mem_allocated(gib)": 21.19,
687
+ "step": 3000
688
+ },
689
+ {
690
+ "epoch": 38.125786163522015,
691
+ "grad_norm": 0.053384799510240555,
692
+ "learning_rate": 7.744792971407403e-05,
693
+ "loss": 0.0003,
694
+ "memory/device_mem_reserved(gib)": 22.98,
695
+ "memory/max_mem_active(gib)": 21.19,
696
+ "memory/max_mem_allocated(gib)": 21.19,
697
+ "step": 3050
698
+ },
699
+ {
700
+ "epoch": 38.75471698113208,
701
+ "grad_norm": 0.003731059841811657,
702
+ "learning_rate": 7.652388431489248e-05,
703
+ "loss": 0.0003,
704
+ "memory/device_mem_reserved(gib)": 22.98,
705
+ "memory/max_mem_active(gib)": 21.19,
706
+ "memory/max_mem_allocated(gib)": 21.19,
707
+ "step": 3100
708
+ },
709
+ {
710
+ "epoch": 39.37735849056604,
711
+ "grad_norm": 0.0034423263277858496,
712
+ "learning_rate": 7.558705569875639e-05,
713
+ "loss": 0.0002,
714
+ "memory/device_mem_reserved(gib)": 22.98,
715
+ "memory/max_mem_active(gib)": 21.19,
716
+ "memory/max_mem_allocated(gib)": 21.19,
717
+ "step": 3150
718
+ },
719
+ {
720
+ "epoch": 40.0,
721
+ "grad_norm": 0.15515735745429993,
722
+ "learning_rate": 7.463789537130901e-05,
723
+ "loss": 0.0003,
724
+ "memory/device_mem_reserved(gib)": 22.98,
725
+ "memory/max_mem_active(gib)": 21.19,
726
+ "memory/max_mem_allocated(gib)": 21.19,
727
+ "step": 3200
728
+ },
729
+ {
730
+ "epoch": 40.62893081761006,
731
+ "grad_norm": 0.0036120035219937563,
732
+ "learning_rate": 7.367686078147627e-05,
733
+ "loss": 0.0003,
734
+ "memory/device_mem_reserved(gib)": 22.98,
735
+ "memory/max_mem_active(gib)": 21.19,
736
+ "memory/max_mem_allocated(gib)": 21.19,
737
+ "step": 3250
738
+ },
739
+ {
740
+ "epoch": 41.25157232704402,
741
+ "grad_norm": 0.0032184540759772062,
742
+ "learning_rate": 7.270441510099862e-05,
743
+ "loss": 0.0003,
744
+ "memory/device_mem_reserved(gib)": 22.98,
745
+ "memory/max_mem_active(gib)": 21.19,
746
+ "memory/max_mem_allocated(gib)": 21.19,
747
+ "step": 3300
748
+ },
749
+ {
750
+ "epoch": 41.880503144654085,
751
+ "grad_norm": 0.0027952201198786497,
752
+ "learning_rate": 7.1721027001205e-05,
753
+ "loss": 0.0003,
754
+ "memory/device_mem_reserved(gib)": 22.98,
755
+ "memory/max_mem_active(gib)": 21.19,
756
+ "memory/max_mem_allocated(gib)": 21.19,
757
+ "step": 3350
758
+ },
759
+ {
760
+ "epoch": 42.503144654088054,
761
+ "grad_norm": 0.0036804918199777603,
762
+ "learning_rate": 7.072717042713608e-05,
763
+ "loss": 0.0002,
764
+ "memory/device_mem_reserved(gib)": 22.98,
765
+ "memory/max_mem_active(gib)": 21.19,
766
+ "memory/max_mem_allocated(gib)": 21.19,
767
+ "step": 3400
768
+ },
769
+ {
770
+ "epoch": 43.125786163522015,
771
+ "grad_norm": 0.07971662282943726,
772
+ "learning_rate": 6.97233243691259e-05,
773
+ "loss": 0.0003,
774
+ "memory/device_mem_reserved(gib)": 22.98,
775
+ "memory/max_mem_active(gib)": 21.19,
776
+ "memory/max_mem_allocated(gib)": 21.19,
777
+ "step": 3450
778
+ },
779
+ {
780
+ "epoch": 43.75471698113208,
781
+ "grad_norm": 0.0031471922993659973,
782
+ "learning_rate": 6.870997263195189e-05,
783
+ "loss": 0.0002,
784
+ "memory/device_mem_reserved(gib)": 22.98,
785
+ "memory/max_mem_active(gib)": 21.19,
786
+ "memory/max_mem_allocated(gib)": 21.19,
787
+ "step": 3500
788
+ },
789
+ {
790
+ "epoch": 43.75471698113208,
791
+ "eval_loss": 0.4193211495876312,
792
+ "eval_runtime": 3.2022,
793
+ "eval_samples_per_second": 64.332,
794
+ "eval_steps_per_second": 16.239,
795
+ "memory/device_mem_reserved(gib)": 22.98,
796
+ "memory/max_mem_active(gib)": 21.19,
797
+ "memory/max_mem_allocated(gib)": 21.19,
798
+ "step": 3500
799
+ },
800
+ {
801
+ "epoch": 44.37735849056604,
802
+ "grad_norm": 0.0884564146399498,
803
+ "learning_rate": 6.768760360166471e-05,
804
+ "loss": 0.0003,
805
+ "memory/device_mem_reserved(gib)": 22.98,
806
+ "memory/max_mem_active(gib)": 21.19,
807
+ "memory/max_mem_allocated(gib)": 21.19,
808
+ "step": 3550
809
+ },
810
+ {
811
+ "epoch": 45.0,
812
+ "grad_norm": 0.003808132605627179,
813
+ "learning_rate": 6.665671001020987e-05,
814
+ "loss": 0.0002,
815
+ "memory/device_mem_reserved(gib)": 22.98,
816
+ "memory/max_mem_active(gib)": 21.19,
817
+ "memory/max_mem_allocated(gib)": 21.19,
818
+ "step": 3600
819
+ },
820
+ {
821
+ "epoch": 45.62893081761006,
822
+ "grad_norm": 0.00299069844186306,
823
+ "learning_rate": 6.561778869795514e-05,
824
+ "loss": 0.0002,
825
+ "memory/device_mem_reserved(gib)": 22.98,
826
+ "memory/max_mem_active(gib)": 21.19,
827
+ "memory/max_mem_allocated(gib)": 21.19,
828
+ "step": 3650
829
+ },
830
+ {
831
+ "epoch": 46.25157232704402,
832
+ "grad_norm": 0.0026494229678064585,
833
+ "learning_rate": 6.457134037423757e-05,
834
+ "loss": 0.0003,
835
+ "memory/device_mem_reserved(gib)": 22.98,
836
+ "memory/max_mem_active(gib)": 21.19,
837
+ "memory/max_mem_allocated(gib)": 21.19,
838
+ "step": 3700
839
+ },
840
+ {
841
+ "epoch": 46.880503144654085,
842
+ "grad_norm": 0.002516557229682803,
843
+ "learning_rate": 6.351786937604631e-05,
844
+ "loss": 0.0003,
845
+ "memory/device_mem_reserved(gib)": 22.98,
846
+ "memory/max_mem_active(gib)": 21.19,
847
+ "memory/max_mem_allocated(gib)": 21.19,
848
+ "step": 3750
849
+ },
850
+ {
851
+ "epoch": 47.503144654088054,
852
+ "grad_norm": 0.0023329760879278183,
853
+ "learning_rate": 6.245788342495654e-05,
854
+ "loss": 0.0002,
855
+ "memory/device_mem_reserved(gib)": 22.98,
856
+ "memory/max_mem_active(gib)": 21.19,
857
+ "memory/max_mem_allocated(gib)": 21.19,
858
+ "step": 3800
859
+ },
860
+ {
861
+ "epoch": 48.125786163522015,
862
+ "grad_norm": 0.06959404051303864,
863
+ "learning_rate": 6.13918933824326e-05,
864
+ "loss": 0.0003,
865
+ "memory/device_mem_reserved(gib)": 22.98,
866
+ "memory/max_mem_active(gib)": 21.19,
867
+ "memory/max_mem_allocated(gib)": 21.19,
868
+ "step": 3850
869
+ },
870
+ {
871
+ "epoch": 48.75471698113208,
872
+ "grad_norm": 0.002554183593019843,
873
+ "learning_rate": 6.032041300361773e-05,
874
+ "loss": 0.0003,
875
+ "memory/device_mem_reserved(gib)": 22.98,
876
+ "memory/max_mem_active(gib)": 21.19,
877
+ "memory/max_mem_allocated(gib)": 21.19,
878
+ "step": 3900
879
+ },
880
+ {
881
+ "epoch": 49.37735849056604,
882
+ "grad_norm": 0.002376449992880225,
883
+ "learning_rate": 5.924395868972912e-05,
884
+ "loss": 0.0002,
885
+ "memory/device_mem_reserved(gib)": 22.98,
886
+ "memory/max_mem_active(gib)": 21.19,
887
+ "memory/max_mem_allocated(gib)": 21.19,
888
+ "step": 3950
889
+ },
890
+ {
891
+ "epoch": 50.0,
892
+ "grad_norm": 0.1847931295633316,
893
+ "learning_rate": 5.8163049239177784e-05,
894
+ "loss": 0.0003,
895
+ "memory/device_mem_reserved(gib)": 22.98,
896
+ "memory/max_mem_active(gib)": 21.19,
897
+ "memory/max_mem_allocated(gib)": 21.19,
898
+ "step": 4000
899
+ },
900
+ {
901
+ "epoch": 50.0,
902
+ "eval_loss": 0.41984716057777405,
903
+ "eval_runtime": 3.345,
904
+ "eval_samples_per_second": 61.585,
905
+ "eval_steps_per_second": 15.546,
906
+ "memory/device_mem_reserved(gib)": 22.98,
907
+ "memory/max_mem_active(gib)": 21.19,
908
+ "memory/max_mem_allocated(gib)": 21.19,
909
+ "step": 4000
910
+ },
911
+ {
912
+ "epoch": 50.62893081761006,
913
+ "grad_norm": 0.001976967789232731,
914
+ "learning_rate": 5.707820559753301e-05,
915
+ "loss": 0.0002,
916
+ "memory/device_mem_reserved(gib)": 22.98,
917
+ "memory/max_mem_active(gib)": 21.19,
918
+ "memory/max_mem_allocated(gib)": 21.19,
919
+ "step": 4050
920
+ },
921
+ {
922
+ "epoch": 51.25157232704402,
923
+ "grad_norm": 0.002146355574950576,
924
+ "learning_rate": 5.598995060645207e-05,
925
+ "loss": 0.0002,
926
+ "memory/device_mem_reserved(gib)": 22.98,
927
+ "memory/max_mem_active(gib)": 21.19,
928
+ "memory/max_mem_allocated(gib)": 21.19,
929
+ "step": 4100
930
+ },
931
+ {
932
+ "epoch": 51.880503144654085,
933
+ "grad_norm": 0.08366034924983978,
934
+ "learning_rate": 5.4898808751696115e-05,
935
+ "loss": 0.0003,
936
+ "memory/device_mem_reserved(gib)": 22.98,
937
+ "memory/max_mem_active(gib)": 21.19,
938
+ "memory/max_mem_allocated(gib)": 21.19,
939
+ "step": 4150
940
+ },
941
+ {
942
+ "epoch": 52.503144654088054,
943
+ "grad_norm": 0.002259905682876706,
944
+ "learning_rate": 5.380530591035358e-05,
945
+ "loss": 0.0002,
946
+ "memory/device_mem_reserved(gib)": 22.98,
947
+ "memory/max_mem_active(gib)": 21.19,
948
+ "memory/max_mem_allocated(gib)": 21.19,
949
+ "step": 4200
950
+ },
951
+ {
952
+ "epoch": 53.125786163522015,
953
+ "grad_norm": 0.002441129647195339,
954
+ "learning_rate": 5.2709969097393106e-05,
955
+ "loss": 0.0003,
956
+ "memory/device_mem_reserved(gib)": 22.98,
957
+ "memory/max_mem_active(gib)": 21.19,
958
+ "memory/max_mem_allocated(gib)": 21.19,
959
+ "step": 4250
960
+ },
961
+ {
962
+ "epoch": 53.75471698113208,
963
+ "grad_norm": 0.002149126259610057,
964
+ "learning_rate": 5.161332621166817e-05,
965
+ "loss": 0.0002,
966
+ "memory/device_mem_reserved(gib)": 22.98,
967
+ "memory/max_mem_active(gib)": 21.19,
968
+ "memory/max_mem_allocated(gib)": 21.19,
969
+ "step": 4300
970
+ },
971
+ {
972
+ "epoch": 54.37735849056604,
973
+ "grad_norm": 0.001962183741852641,
974
+ "learning_rate": 5.0515905781495456e-05,
975
+ "loss": 0.0002,
976
+ "memory/device_mem_reserved(gib)": 22.98,
977
+ "memory/max_mem_active(gib)": 21.19,
978
+ "memory/max_mem_allocated(gib)": 21.19,
979
+ "step": 4350
980
+ },
981
+ {
982
+ "epoch": 55.0,
983
+ "grad_norm": 0.002487902995198965,
984
+ "learning_rate": 4.941823670993016e-05,
985
+ "loss": 0.0003,
986
+ "memory/device_mem_reserved(gib)": 22.98,
987
+ "memory/max_mem_active(gib)": 21.19,
988
+ "memory/max_mem_allocated(gib)": 21.19,
989
+ "step": 4400
990
+ },
991
+ {
992
+ "epoch": 55.62893081761006,
993
+ "grad_norm": 0.0021235146559774876,
994
+ "learning_rate": 4.8320848019860454e-05,
995
+ "loss": 0.0002,
996
+ "memory/device_mem_reserved(gib)": 22.98,
997
+ "memory/max_mem_active(gib)": 21.19,
998
+ "memory/max_mem_allocated(gib)": 21.19,
999
+ "step": 4450
1000
+ },
1001
+ {
1002
+ "epoch": 56.25157232704402,
1003
+ "grad_norm": 0.002039574086666107,
1004
+ "learning_rate": 4.722426859904432e-05,
1005
+ "loss": 0.0002,
1006
+ "memory/device_mem_reserved(gib)": 22.98,
1007
+ "memory/max_mem_active(gib)": 21.19,
1008
+ "memory/max_mem_allocated(gib)": 21.19,
1009
+ "step": 4500
1010
+ },
1011
+ {
1012
+ "epoch": 56.25157232704402,
1013
+ "eval_loss": 0.42212262749671936,
1014
+ "eval_runtime": 3.3228,
1015
+ "eval_samples_per_second": 61.996,
1016
+ "eval_steps_per_second": 15.649,
1017
+ "memory/device_mem_reserved(gib)": 22.98,
1018
+ "memory/max_mem_active(gib)": 21.19,
1019
+ "memory/max_mem_allocated(gib)": 21.19,
1020
+ "step": 4500
1021
+ },
1022
+ {
1023
+ "epoch": 56.880503144654085,
1024
+ "grad_norm": 0.0019425100181251764,
1025
+ "learning_rate": 4.612902694521153e-05,
1026
+ "loss": 0.0002,
1027
+ "memory/device_mem_reserved(gib)": 22.98,
1028
+ "memory/max_mem_active(gib)": 21.19,
1029
+ "memory/max_mem_allocated(gib)": 21.19,
1030
+ "step": 4550
1031
+ },
1032
+ {
1033
+ "epoch": 57.503144654088054,
1034
+ "grad_norm": 0.0017751130508258939,
1035
+ "learning_rate": 4.503565091135342e-05,
1036
+ "loss": 0.0002,
1037
+ "memory/device_mem_reserved(gib)": 22.98,
1038
+ "memory/max_mem_active(gib)": 21.19,
1039
+ "memory/max_mem_allocated(gib)": 21.19,
1040
+ "step": 4600
1041
+ },
1042
+ {
1043
+ "epoch": 58.125786163522015,
1044
+ "grad_norm": 0.0020404935348778963,
1045
+ "learning_rate": 4.3944667451323694e-05,
1046
+ "loss": 0.0002,
1047
+ "memory/device_mem_reserved(gib)": 22.98,
1048
+ "memory/max_mem_active(gib)": 21.19,
1049
+ "memory/max_mem_allocated(gib)": 21.19,
1050
+ "step": 4650
1051
+ },
1052
+ {
1053
+ "epoch": 58.75471698113208,
1054
+ "grad_norm": 0.001929473364725709,
1055
+ "learning_rate": 4.285660236587222e-05,
1056
+ "loss": 0.0002,
1057
+ "memory/device_mem_reserved(gib)": 22.98,
1058
+ "memory/max_mem_active(gib)": 21.19,
1059
+ "memory/max_mem_allocated(gib)": 21.19,
1060
+ "step": 4700
1061
+ },
1062
+ {
1063
+ "epoch": 59.37735849056604,
1064
+ "grad_norm": 0.0017358898185193539,
1065
+ "learning_rate": 4.177198004923492e-05,
1066
+ "loss": 0.0002,
1067
+ "memory/device_mem_reserved(gib)": 22.98,
1068
+ "memory/max_mem_active(gib)": 21.19,
1069
+ "memory/max_mem_allocated(gib)": 21.19,
1070
+ "step": 4750
1071
+ },
1072
+ {
1073
+ "epoch": 60.0,
1074
+ "grad_norm": 0.0023231045342981815,
1075
+ "learning_rate": 4.0691323236401194e-05,
1076
+ "loss": 0.0002,
1077
+ "memory/device_mem_reserved(gib)": 22.98,
1078
+ "memory/max_mem_active(gib)": 21.19,
1079
+ "memory/max_mem_allocated(gib)": 21.19,
1080
+ "step": 4800
1081
+ },
1082
+ {
1083
+ "epoch": 60.62893081761006,
1084
+ "grad_norm": 0.08456777036190033,
1085
+ "learning_rate": 3.961515275118134e-05,
1086
+ "loss": 0.0002,
1087
+ "memory/device_mem_reserved(gib)": 22.98,
1088
+ "memory/max_mem_active(gib)": 21.19,
1089
+ "memory/max_mem_allocated(gib)": 21.19,
1090
+ "step": 4850
1091
+ },
1092
+ {
1093
+ "epoch": 61.25157232704402,
1094
+ "grad_norm": 0.0015685250982642174,
1095
+ "learning_rate": 3.854398725519479e-05,
1096
+ "loss": 0.0002,
1097
+ "memory/device_mem_reserved(gib)": 22.98,
1098
+ "memory/max_mem_active(gib)": 21.19,
1099
+ "memory/max_mem_allocated(gib)": 21.19,
1100
+ "step": 4900
1101
+ },
1102
+ {
1103
+ "epoch": 61.880503144654085,
1104
+ "grad_norm": 0.0017422462115064263,
1105
+ "learning_rate": 3.747834299790057e-05,
1106
+ "loss": 0.0002,
1107
+ "memory/device_mem_reserved(gib)": 22.98,
1108
+ "memory/max_mem_active(gib)": 21.19,
1109
+ "memory/max_mem_allocated(gib)": 21.19,
1110
+ "step": 4950
1111
+ },
1112
+ {
1113
+ "epoch": 62.503144654088054,
1114
+ "grad_norm": 0.0019026623340323567,
1115
+ "learning_rate": 3.641873356779035e-05,
1116
+ "loss": 0.0002,
1117
+ "memory/device_mem_reserved(gib)": 22.98,
1118
+ "memory/max_mem_active(gib)": 21.19,
1119
+ "memory/max_mem_allocated(gib)": 21.19,
1120
+ "step": 5000
1121
+ },
1122
+ {
1123
+ "epoch": 62.503144654088054,
1124
+ "eval_loss": 0.4231208860874176,
1125
+ "eval_runtime": 3.3761,
1126
+ "eval_samples_per_second": 61.017,
1127
+ "eval_steps_per_second": 15.402,
1128
+ "memory/device_mem_reserved(gib)": 22.98,
1129
+ "memory/max_mem_active(gib)": 21.19,
1130
+ "memory/max_mem_allocated(gib)": 21.19,
1131
+ "step": 5000
1132
+ },
1133
+ {
1134
+ "epoch": 63.125786163522015,
1135
+ "grad_norm": 0.00161712896078825,
1136
+ "learning_rate": 3.536566964486373e-05,
1137
+ "loss": 0.0002,
1138
+ "memory/device_mem_reserved(gib)": 22.98,
1139
+ "memory/max_mem_active(gib)": 21.19,
1140
+ "memory/max_mem_allocated(gib)": 21.19,
1141
+ "step": 5050
1142
+ },
1143
+ {
1144
+ "epoch": 63.75471698113208,
1145
+ "grad_norm": 0.0018402227433398366,
1146
+ "learning_rate": 3.4319658754505536e-05,
1147
+ "loss": 0.0002,
1148
+ "memory/device_mem_reserved(gib)": 22.98,
1149
+ "memory/max_mem_active(gib)": 21.19,
1150
+ "memory/max_mem_allocated(gib)": 21.19,
1151
+ "step": 5100
1152
+ },
1153
+ {
1154
+ "epoch": 64.37735849056604,
1155
+ "grad_norm": 0.0017006592825055122,
1156
+ "learning_rate": 3.3281205022883296e-05,
1157
+ "loss": 0.0002,
1158
+ "memory/device_mem_reserved(gib)": 22.98,
1159
+ "memory/max_mem_active(gib)": 21.19,
1160
+ "memory/max_mem_allocated(gib)": 21.19,
1161
+ "step": 5150
1162
+ },
1163
+ {
1164
+ "epoch": 65.0,
1165
+ "grad_norm": 0.002361567923799157,
1166
+ "learning_rate": 3.2250808933983124e-05,
1167
+ "loss": 0.0002,
1168
+ "memory/device_mem_reserved(gib)": 22.98,
1169
+ "memory/max_mem_active(gib)": 21.19,
1170
+ "memory/max_mem_allocated(gib)": 21.19,
1171
+ "step": 5200
1172
+ },
1173
+ {
1174
+ "epoch": 65.62893081761007,
1175
+ "grad_norm": 0.0015942014288157225,
1176
+ "learning_rate": 3.122896708840084e-05,
1177
+ "loss": 0.0002,
1178
+ "memory/device_mem_reserved(gib)": 22.98,
1179
+ "memory/max_mem_active(gib)": 21.19,
1180
+ "memory/max_mem_allocated(gib)": 21.19,
1181
+ "step": 5250
1182
+ },
1183
+ {
1184
+ "epoch": 66.25157232704403,
1185
+ "grad_norm": 0.06022707000374794,
1186
+ "learning_rate": 3.0216171964004658e-05,
1187
+ "loss": 0.0002,
1188
+ "memory/device_mem_reserved(gib)": 22.98,
1189
+ "memory/max_mem_active(gib)": 21.19,
1190
+ "memory/max_mem_allocated(gib)": 21.19,
1191
+ "step": 5300
1192
+ },
1193
+ {
1194
+ "epoch": 66.88050314465409,
1195
+ "grad_norm": 0.001938488450832665,
1196
+ "learning_rate": 2.9212911678585043e-05,
1197
+ "loss": 0.0002,
1198
+ "memory/device_mem_reserved(gib)": 22.98,
1199
+ "memory/max_mem_active(gib)": 21.19,
1200
+ "memory/max_mem_allocated(gib)": 21.19,
1201
+ "step": 5350
1202
+ },
1203
+ {
1204
+ "epoch": 67.50314465408805,
1205
+ "grad_norm": 0.0015994412824511528,
1206
+ "learning_rate": 2.821966975460566e-05,
1207
+ "loss": 0.0001,
1208
+ "memory/device_mem_reserved(gib)": 22.98,
1209
+ "memory/max_mem_active(gib)": 21.19,
1210
+ "memory/max_mem_allocated(gib)": 21.19,
1211
+ "step": 5400
1212
+ },
1213
+ {
1214
+ "epoch": 68.12578616352201,
1215
+ "grad_norm": 0.0013752554077655077,
1216
+ "learning_rate": 2.723692488616924e-05,
1217
+ "loss": 0.0002,
1218
+ "memory/device_mem_reserved(gib)": 22.98,
1219
+ "memory/max_mem_active(gib)": 21.19,
1220
+ "memory/max_mem_allocated(gib)": 21.19,
1221
+ "step": 5450
1222
+ },
1223
+ {
1224
+ "epoch": 68.75471698113208,
1225
+ "grad_norm": 0.0018416305538266897,
1226
+ "learning_rate": 2.6265150708310316e-05,
1227
+ "loss": 0.0002,
1228
+ "memory/device_mem_reserved(gib)": 22.98,
1229
+ "memory/max_mem_active(gib)": 21.19,
1230
+ "memory/max_mem_allocated(gib)": 21.19,
1231
+ "step": 5500
1232
+ },
1233
+ {
1234
+ "epoch": 68.75471698113208,
1235
+ "eval_loss": 0.42299509048461914,
1236
+ "eval_runtime": 3.3086,
1237
+ "eval_samples_per_second": 62.261,
1238
+ "eval_steps_per_second": 15.716,
1239
+ "memory/device_mem_reserved(gib)": 22.98,
1240
+ "memory/max_mem_active(gib)": 21.19,
1241
+ "memory/max_mem_allocated(gib)": 21.19,
1242
+ "step": 5500
1243
+ },
1244
+ {
1245
+ "epoch": 69.37735849056604,
1246
+ "grad_norm": 0.05967230722308159,
1247
+ "learning_rate": 2.530481556872636e-05,
1248
+ "loss": 0.0001,
1249
+ "memory/device_mem_reserved(gib)": 22.98,
1250
+ "memory/max_mem_active(gib)": 21.19,
1251
+ "memory/max_mem_allocated(gib)": 21.19,
1252
+ "step": 5550
1253
+ },
1254
+ {
1255
+ "epoch": 70.0,
1256
+ "grad_norm": 0.002312347525730729,
1257
+ "learning_rate": 2.435638230205701e-05,
1258
+ "loss": 0.0002,
1259
+ "memory/device_mem_reserved(gib)": 22.98,
1260
+ "memory/max_mem_active(gib)": 21.19,
1261
+ "memory/max_mem_allocated(gib)": 21.19,
1262
+ "step": 5600
1263
+ },
1264
+ {
1265
+ "epoch": 70.62893081761007,
1266
+ "grad_norm": 0.07449054718017578,
1267
+ "learning_rate": 2.3420308006820345e-05,
1268
+ "loss": 0.0002,
1269
+ "memory/device_mem_reserved(gib)": 22.98,
1270
+ "memory/max_mem_active(gib)": 21.19,
1271
+ "memory/max_mem_allocated(gib)": 21.19,
1272
+ "step": 5650
1273
+ },
1274
+ {
1275
+ "epoch": 71.25157232704403,
1276
+ "grad_norm": 0.0014853739412501454,
1277
+ "learning_rate": 2.2497043825113824e-05,
1278
+ "loss": 0.0002,
1279
+ "memory/device_mem_reserved(gib)": 22.98,
1280
+ "memory/max_mem_active(gib)": 21.19,
1281
+ "memory/max_mem_allocated(gib)": 21.19,
1282
+ "step": 5700
1283
+ },
1284
+ {
1285
+ "epoch": 71.88050314465409,
1286
+ "grad_norm": 0.0014424559194594622,
1287
+ "learning_rate": 2.158703472518569e-05,
1288
+ "loss": 0.0002,
1289
+ "memory/device_mem_reserved(gib)": 22.98,
1290
+ "memory/max_mem_active(gib)": 21.19,
1291
+ "memory/max_mem_allocated(gib)": 21.19,
1292
+ "step": 5750
1293
+ },
1294
+ {
1295
+ "epoch": 72.50314465408805,
1296
+ "grad_norm": 0.0015837795799598098,
1297
+ "learning_rate": 2.0690719286982125e-05,
1298
+ "loss": 0.0002,
1299
+ "memory/device_mem_reserved(gib)": 22.98,
1300
+ "memory/max_mem_active(gib)": 21.19,
1301
+ "memory/max_mem_allocated(gib)": 21.19,
1302
+ "step": 5800
1303
+ },
1304
+ {
1305
+ "epoch": 73.12578616352201,
1306
+ "grad_norm": 0.06110122799873352,
1307
+ "learning_rate": 1.9808529490773042e-05,
1308
+ "loss": 0.0002,
1309
+ "memory/device_mem_reserved(gib)": 22.98,
1310
+ "memory/max_mem_active(gib)": 21.19,
1311
+ "memory/max_mem_allocated(gib)": 21.19,
1312
+ "step": 5850
1313
+ },
1314
+ {
1315
+ "epoch": 73.75471698113208,
1316
+ "grad_norm": 0.0015458406414836645,
1317
+ "learning_rate": 1.894089050895879e-05,
1318
+ "loss": 0.0002,
1319
+ "memory/device_mem_reserved(gib)": 22.98,
1320
+ "memory/max_mem_active(gib)": 21.19,
1321
+ "memory/max_mem_allocated(gib)": 21.19,
1322
+ "step": 5900
1323
+ },
1324
+ {
1325
+ "epoch": 74.37735849056604,
1326
+ "grad_norm": 0.0013154478510841727,
1327
+ "learning_rate": 1.8088220501157755e-05,
1328
+ "loss": 0.0002,
1329
+ "memory/device_mem_reserved(gib)": 22.98,
1330
+ "memory/max_mem_active(gib)": 21.19,
1331
+ "memory/max_mem_allocated(gib)": 21.19,
1332
+ "step": 5950
1333
+ },
1334
+ {
1335
+ "epoch": 75.0,
1336
+ "grad_norm": 0.0016847378574311733,
1337
+ "learning_rate": 1.7250930412673794e-05,
1338
+ "loss": 0.0002,
1339
+ "memory/device_mem_reserved(gib)": 22.98,
1340
+ "memory/max_mem_active(gib)": 21.19,
1341
+ "memory/max_mem_allocated(gib)": 21.19,
1342
+ "step": 6000
1343
+ },
1344
+ {
1345
+ "epoch": 75.0,
1346
+ "eval_loss": 0.42366164922714233,
1347
+ "eval_runtime": 3.3318,
1348
+ "eval_samples_per_second": 61.828,
1349
+ "eval_steps_per_second": 15.607,
1350
+ "memory/device_mem_reserved(gib)": 22.98,
1351
+ "memory/max_mem_active(gib)": 21.19,
1352
+ "memory/max_mem_allocated(gib)": 21.19,
1353
+ "step": 6000
1354
+ },
1355
+ {
1356
+ "epoch": 75.62893081761007,
1357
+ "grad_norm": 0.06749001890420914,
1358
+ "learning_rate": 1.6429423776440807e-05,
1359
+ "loss": 0.0002,
1360
+ "memory/device_mem_reserved(gib)": 22.98,
1361
+ "memory/max_mem_active(gib)": 21.19,
1362
+ "memory/max_mem_allocated(gib)": 21.19,
1363
+ "step": 6050
1364
+ },
1365
+ {
1366
+ "epoch": 76.25157232704403,
1367
+ "grad_norm": 0.0014320407062768936,
1368
+ "learning_rate": 1.5624096518539412e-05,
1369
+ "loss": 0.0002,
1370
+ "memory/device_mem_reserved(gib)": 22.98,
1371
+ "memory/max_mem_active(gib)": 21.19,
1372
+ "memory/max_mem_allocated(gib)": 21.19,
1373
+ "step": 6100
1374
+ },
1375
+ {
1376
+ "epoch": 76.88050314465409,
1377
+ "grad_norm": 0.0012947659706696868,
1378
+ "learning_rate": 1.4835336767380053e-05,
1379
+ "loss": 0.0002,
1380
+ "memory/device_mem_reserved(gib)": 22.98,
1381
+ "memory/max_mem_active(gib)": 21.19,
1382
+ "memory/max_mem_allocated(gib)": 21.19,
1383
+ "step": 6150
1384
+ },
1385
+ {
1386
+ "epoch": 77.50314465408805,
1387
+ "grad_norm": 0.06402043998241425,
1388
+ "learning_rate": 1.4063524666643956e-05,
1389
+ "loss": 0.0002,
1390
+ "memory/device_mem_reserved(gib)": 22.98,
1391
+ "memory/max_mem_active(gib)": 21.19,
1392
+ "memory/max_mem_allocated(gib)": 21.19,
1393
+ "step": 6200
1394
+ },
1395
+ {
1396
+ "epoch": 78.12578616352201,
1397
+ "grad_norm": 0.0012907255440950394,
1398
+ "learning_rate": 1.3309032192072463e-05,
1399
+ "loss": 0.0002,
1400
+ "memory/device_mem_reserved(gib)": 22.98,
1401
+ "memory/max_mem_active(gib)": 21.19,
1402
+ "memory/max_mem_allocated(gib)": 21.19,
1403
+ "step": 6250
1404
+ },
1405
+ {
1406
+ "epoch": 78.75471698113208,
1407
+ "grad_norm": 0.0012127637164667249,
1408
+ "learning_rate": 1.2572222972192955e-05,
1409
+ "loss": 0.0001,
1410
+ "memory/device_mem_reserved(gib)": 22.98,
1411
+ "memory/max_mem_active(gib)": 21.19,
1412
+ "memory/max_mem_allocated(gib)": 21.19,
1413
+ "step": 6300
1414
+ },
1415
+ {
1416
+ "epoch": 79.37735849056604,
1417
+ "grad_norm": 0.056700922548770905,
1418
+ "learning_rate": 1.1853452113067543e-05,
1419
+ "loss": 0.0002,
1420
+ "memory/device_mem_reserved(gib)": 22.98,
1421
+ "memory/max_mem_active(gib)": 21.19,
1422
+ "memory/max_mem_allocated(gib)": 21.19,
1423
+ "step": 6350
1424
+ },
1425
+ {
1426
+ "epoch": 80.0,
1427
+ "grad_norm": 0.0018291111337020993,
1428
+ "learning_rate": 1.1153066027149422e-05,
1429
+ "loss": 0.0002,
1430
+ "memory/device_mem_reserved(gib)": 22.98,
1431
+ "memory/max_mem_active(gib)": 21.19,
1432
+ "memory/max_mem_allocated(gib)": 21.19,
1433
+ "step": 6400
1434
+ },
1435
+ {
1436
+ "epoch": 80.62893081761007,
1437
+ "grad_norm": 0.001427468378096819,
1438
+ "learning_rate": 1.0471402266328844e-05,
1439
+ "loss": 0.0001,
1440
+ "memory/device_mem_reserved(gib)": 22.98,
1441
+ "memory/max_mem_active(gib)": 21.19,
1442
+ "memory/max_mem_allocated(gib)": 21.19,
1443
+ "step": 6450
1444
+ },
1445
+ {
1446
+ "epoch": 81.25157232704403,
1447
+ "grad_norm": 0.0013415232533589005,
1448
+ "learning_rate": 9.808789359249726e-06,
1449
+ "loss": 0.0002,
1450
+ "memory/device_mem_reserved(gib)": 22.98,
1451
+ "memory/max_mem_active(gib)": 21.19,
1452
+ "memory/max_mem_allocated(gib)": 21.19,
1453
+ "step": 6500
1454
+ },
1455
+ {
1456
+ "epoch": 81.25157232704403,
1457
+ "eval_loss": 0.4232681393623352,
1458
+ "eval_runtime": 3.3357,
1459
+ "eval_samples_per_second": 61.757,
1460
+ "eval_steps_per_second": 15.589,
1461
+ "memory/device_mem_reserved(gib)": 22.98,
1462
+ "memory/max_mem_active(gib)": 21.19,
1463
+ "memory/max_mem_allocated(gib)": 21.19,
1464
+ "step": 6500
1465
+ },
1466
+ {
1467
+ "epoch": 81.88050314465409,
1468
+ "grad_norm": 0.0014300766633823514,
1469
+ "learning_rate": 9.165546652974766e-06,
1470
+ "loss": 0.0002,
1471
+ "memory/device_mem_reserved(gib)": 22.98,
1472
+ "memory/max_mem_active(gib)": 21.19,
1473
+ "memory/max_mem_allocated(gib)": 21.19,
1474
+ "step": 6550
1475
+ },
1476
+ {
1477
+ "epoch": 82.50314465408805,
1478
+ "grad_norm": 0.0013741519069299102,
1479
+ "learning_rate": 8.541984159075866e-06,
1480
+ "loss": 0.0002,
1481
+ "memory/device_mem_reserved(gib)": 22.98,
1482
+ "memory/max_mem_active(gib)": 21.19,
1483
+ "memory/max_mem_allocated(gib)": 21.19,
1484
+ "step": 6600
1485
+ },
1486
+ {
1487
+ "epoch": 83.12578616352201,
1488
+ "grad_norm": 0.05915085971355438,
1489
+ "learning_rate": 7.938402404223605e-06,
1490
+ "loss": 0.0002,
1491
+ "memory/device_mem_reserved(gib)": 22.98,
1492
+ "memory/max_mem_active(gib)": 21.19,
1493
+ "memory/max_mem_allocated(gib)": 21.19,
1494
+ "step": 6650
1495
+ },
1496
+ {
1497
+ "epoch": 83.75471698113208,
1498
+ "grad_norm": 0.0013567082351073623,
1499
+ "learning_rate": 7.3550922853480915e-06,
1500
+ "loss": 0.0002,
1501
+ "memory/device_mem_reserved(gib)": 22.98,
1502
+ "memory/max_mem_active(gib)": 21.19,
1503
+ "memory/max_mem_allocated(gib)": 21.19,
1504
+ "step": 6700
1505
+ },
1506
+ {
1507
+ "epoch": 84.37735849056604,
1508
+ "grad_norm": 0.0014516132650896907,
1509
+ "learning_rate": 6.792334929440886e-06,
1510
+ "loss": 0.0001,
1511
+ "memory/device_mem_reserved(gib)": 22.98,
1512
+ "memory/max_mem_active(gib)": 21.19,
1513
+ "memory/max_mem_allocated(gib)": 21.19,
1514
+ "step": 6750
1515
+ },
1516
+ {
1517
+ "epoch": 85.0,
1518
+ "grad_norm": 0.0018640623893588781,
1519
+ "learning_rate": 6.250401558065444e-06,
1520
+ "loss": 0.0002,
1521
+ "memory/device_mem_reserved(gib)": 22.98,
1522
+ "memory/max_mem_active(gib)": 21.19,
1523
+ "memory/max_mem_allocated(gib)": 21.19,
1524
+ "step": 6800
1525
+ },
1526
+ {
1527
+ "epoch": 85.62893081761007,
1528
+ "grad_norm": 0.0013895193114876747,
1529
+ "learning_rate": 5.729553356641643e-06,
1530
+ "loss": 0.0002,
1531
+ "memory/device_mem_reserved(gib)": 22.98,
1532
+ "memory/max_mem_active(gib)": 21.19,
1533
+ "memory/max_mem_allocated(gib)": 21.19,
1534
+ "step": 6850
1535
+ },
1536
+ {
1537
+ "epoch": 86.25157232704403,
1538
+ "grad_norm": 0.0012052542297169566,
1539
+ "learning_rate": 5.230041348567105e-06,
1540
+ "loss": 0.0002,
1541
+ "memory/device_mem_reserved(gib)": 22.98,
1542
+ "memory/max_mem_active(gib)": 21.19,
1543
+ "memory/max_mem_allocated(gib)": 21.19,
1544
+ "step": 6900
1545
+ },
1546
+ {
1547
+ "epoch": 86.88050314465409,
1548
+ "grad_norm": 0.0013040176127105951,
1549
+ "learning_rate": 4.7521062742362285e-06,
1550
+ "loss": 0.0002,
1551
+ "memory/device_mem_reserved(gib)": 22.98,
1552
+ "memory/max_mem_active(gib)": 21.19,
1553
+ "memory/max_mem_allocated(gib)": 21.19,
1554
+ "step": 6950
1555
+ },
1556
+ {
1557
+ "epoch": 87.50314465408805,
1558
+ "grad_norm": 0.0013780539156869054,
1559
+ "learning_rate": 4.295978475015033e-06,
1560
+ "loss": 0.0001,
1561
+ "memory/device_mem_reserved(gib)": 22.98,
1562
+ "memory/max_mem_active(gib)": 21.19,
1563
+ "memory/max_mem_allocated(gib)": 21.19,
1564
+ "step": 7000
1565
+ },
1566
+ {
1567
+ "epoch": 87.50314465408805,
1568
+ "eval_loss": 0.42496076226234436,
1569
+ "eval_runtime": 3.3047,
1570
+ "eval_samples_per_second": 62.335,
1571
+ "eval_steps_per_second": 15.735,
1572
+ "memory/device_mem_reserved(gib)": 22.98,
1573
+ "memory/max_mem_active(gib)": 21.19,
1574
+ "memory/max_mem_allocated(gib)": 21.19,
1575
+ "step": 7000
1576
+ },
1577
+ {
1578
+ "epoch": 88.12578616352201,
1579
+ "grad_norm": 0.0012548089725896716,
1580
+ "learning_rate": 3.861877782227885e-06,
1581
+ "loss": 0.0002,
1582
+ "memory/device_mem_reserved(gib)": 22.98,
1583
+ "memory/max_mem_active(gib)": 21.19,
1584
+ "memory/max_mem_allocated(gib)": 21.19,
1585
+ "step": 7050
1586
+ },
1587
+ {
1588
+ "epoch": 88.75471698113208,
1589
+ "grad_norm": 0.0012777590891346335,
1590
+ "learning_rate": 3.4500134112095518e-06,
1591
+ "loss": 0.0002,
1592
+ "memory/device_mem_reserved(gib)": 22.98,
1593
+ "memory/max_mem_active(gib)": 21.19,
1594
+ "memory/max_mem_allocated(gib)": 21.19,
1595
+ "step": 7100
1596
+ },
1597
+ {
1598
+ "epoch": 89.37735849056604,
1599
+ "grad_norm": 0.0012396974489092827,
1600
+ "learning_rate": 3.060583860473587e-06,
1601
+ "loss": 0.0001,
1602
+ "memory/device_mem_reserved(gib)": 22.98,
1603
+ "memory/max_mem_active(gib)": 21.19,
1604
+ "memory/max_mem_allocated(gib)": 21.19,
1605
+ "step": 7150
1606
+ },
1607
+ {
1608
+ "epoch": 90.0,
1609
+ "grad_norm": 0.001860588090494275,
1610
+ "learning_rate": 2.6937768160457954e-06,
1611
+ "loss": 0.0002,
1612
+ "memory/device_mem_reserved(gib)": 22.98,
1613
+ "memory/max_mem_active(gib)": 21.19,
1614
+ "memory/max_mem_allocated(gib)": 21.19,
1615
+ "step": 7200
1616
+ },
1617
+ {
1618
+ "epoch": 90.62893081761007,
1619
+ "grad_norm": 0.001240984071046114,
1620
+ "learning_rate": 2.3497690610086777e-06,
1621
+ "loss": 0.0002,
1622
+ "memory/device_mem_reserved(gib)": 22.98,
1623
+ "memory/max_mem_active(gib)": 21.19,
1624
+ "memory/max_mem_allocated(gib)": 21.19,
1625
+ "step": 7250
1626
+ },
1627
+ {
1628
+ "epoch": 91.25157232704403,
1629
+ "grad_norm": 0.001372280647046864,
1630
+ "learning_rate": 2.028726390300667e-06,
1631
+ "loss": 0.0001,
1632
+ "memory/device_mem_reserved(gib)": 22.98,
1633
+ "memory/max_mem_active(gib)": 21.19,
1634
+ "memory/max_mem_allocated(gib)": 21.19,
1635
+ "step": 7300
1636
+ },
1637
+ {
1638
+ "epoch": 91.88050314465409,
1639
+ "grad_norm": 0.0013157941866666079,
1640
+ "learning_rate": 1.7308035308110204e-06,
1641
+ "loss": 0.0002,
1642
+ "memory/device_mem_reserved(gib)": 22.98,
1643
+ "memory/max_mem_active(gib)": 21.19,
1644
+ "memory/max_mem_allocated(gib)": 21.19,
1645
+ "step": 7350
1646
+ },
1647
+ {
1648
+ "epoch": 92.50314465408805,
1649
+ "grad_norm": 0.06030140072107315,
1650
+ "learning_rate": 1.4561440668090508e-06,
1651
+ "loss": 0.0002,
1652
+ "memory/device_mem_reserved(gib)": 22.98,
1653
+ "memory/max_mem_active(gib)": 21.19,
1654
+ "memory/max_mem_allocated(gib)": 21.19,
1655
+ "step": 7400
1656
+ },
1657
+ {
1658
+ "epoch": 93.12578616352201,
1659
+ "grad_norm": 0.07388274371623993,
1660
+ "learning_rate": 1.2048803707434997e-06,
1661
+ "loss": 0.0002,
1662
+ "memory/device_mem_reserved(gib)": 22.98,
1663
+ "memory/max_mem_active(gib)": 21.19,
1664
+ "memory/max_mem_allocated(gib)": 21.19,
1665
+ "step": 7450
1666
+ },
1667
+ {
1668
+ "epoch": 93.75471698113208,
1669
+ "grad_norm": 0.0011325875530019403,
1670
+ "learning_rate": 9.771335394454906e-07,
1671
+ "loss": 0.0002,
1672
+ "memory/device_mem_reserved(gib)": 22.98,
1673
+ "memory/max_mem_active(gib)": 21.19,
1674
+ "memory/max_mem_allocated(gib)": 21.19,
1675
+ "step": 7500
1676
+ },
1677
+ {
1678
+ "epoch": 93.75471698113208,
1679
+ "eval_loss": 0.4239003360271454,
1680
+ "eval_runtime": 3.3346,
1681
+ "eval_samples_per_second": 61.777,
1682
+ "eval_steps_per_second": 15.594,
1683
+ "memory/device_mem_reserved(gib)": 22.98,
1684
+ "memory/max_mem_active(gib)": 21.19,
1685
+ "memory/max_mem_allocated(gib)": 21.19,
1686
+ "step": 7500
1687
+ },
1688
+ {
1689
+ "epoch": 94.37735849056604,
1690
+ "grad_norm": 0.001379797118715942,
1691
+ "learning_rate": 7.73013335765771e-07,
1692
+ "loss": 0.0002,
1693
+ "memory/device_mem_reserved(gib)": 22.98,
1694
+ "memory/max_mem_active(gib)": 21.19,
1695
+ "memory/max_mem_allocated(gib)": 21.19,
1696
+ "step": 7550
1697
+ },
1698
+ {
1699
+ "epoch": 95.0,
1700
+ "grad_norm": 0.002098705852404237,
1701
+ "learning_rate": 5.92618135674361e-07,
1702
+ "loss": 0.0001,
1703
+ "memory/device_mem_reserved(gib)": 22.98,
1704
+ "memory/max_mem_active(gib)": 21.19,
1705
+ "memory/max_mem_allocated(gib)": 21.19,
1706
+ "step": 7600
1707
+ },
1708
+ {
1709
+ "epoch": 95.62893081761007,
1710
+ "grad_norm": 0.0013479177141562104,
1711
+ "learning_rate": 4.36034880848174e-07,
1712
+ "loss": 0.0002,
1713
+ "memory/device_mem_reserved(gib)": 22.98,
1714
+ "memory/max_mem_active(gib)": 21.19,
1715
+ "memory/max_mem_allocated(gib)": 21.19,
1716
+ "step": 7650
1717
+ },
1718
+ {
1719
+ "epoch": 96.25157232704403,
1720
+ "grad_norm": 0.10964928567409515,
1721
+ "learning_rate": 3.033390367693401e-07,
1722
+ "loss": 0.0002,
1723
+ "memory/device_mem_reserved(gib)": 22.98,
1724
+ "memory/max_mem_active(gib)": 21.19,
1725
+ "memory/max_mem_allocated(gib)": 21.19,
1726
+ "step": 7700
1727
+ },
1728
+ {
1729
+ "epoch": 96.88050314465409,
1730
+ "grad_norm": 0.001252847840078175,
1731
+ "learning_rate": 1.945945563545437e-07,
1732
+ "loss": 0.0002,
1733
+ "memory/device_mem_reserved(gib)": 22.98,
1734
+ "memory/max_mem_active(gib)": 21.19,
1735
+ "memory/max_mem_allocated(gib)": 21.19,
1736
+ "step": 7750
1737
+ },
1738
+ {
1739
+ "epoch": 97.50314465408805,
1740
+ "grad_norm": 0.0012745157582685351,
1741
+ "learning_rate": 1.0985384913285157e-07,
1742
+ "loss": 0.0001,
1743
+ "memory/device_mem_reserved(gib)": 22.98,
1744
+ "memory/max_mem_active(gib)": 21.19,
1745
+ "memory/max_mem_allocated(gib)": 21.19,
1746
+ "step": 7800
1747
+ },
1748
+ {
1749
+ "epoch": 98.12578616352201,
1750
+ "grad_norm": 0.057910941541194916,
1751
+ "learning_rate": 4.915775598685057e-08,
1752
+ "loss": 0.0002,
1753
+ "memory/device_mem_reserved(gib)": 22.98,
1754
+ "memory/max_mem_active(gib)": 21.19,
1755
+ "memory/max_mem_allocated(gib)": 21.19,
1756
+ "step": 7850
1757
+ },
1758
+ {
1759
+ "epoch": 98.75471698113208,
1760
+ "grad_norm": 0.0011539831757545471,
1761
+ "learning_rate": 1.2535529469370755e-08,
1762
+ "loss": 0.0002,
1763
+ "memory/device_mem_reserved(gib)": 22.98,
1764
+ "memory/max_mem_active(gib)": 21.19,
1765
+ "memory/max_mem_allocated(gib)": 21.19,
1766
+ "step": 7900
1767
+ },
1768
+ {
1769
+ "epoch": 99.37735849056604,
1770
+ "grad_norm": 0.001350544043816626,
1771
+ "learning_rate": 4.819705151648535e-12,
1772
+ "loss": 0.0002,
1773
+ "memory/device_mem_reserved(gib)": 22.98,
1774
+ "memory/max_mem_active(gib)": 21.19,
1775
+ "memory/max_mem_allocated(gib)": 21.19,
1776
+ "step": 7950
1777
+ }
1778
+ ],
1779
+ "logging_steps": 50,
1780
+ "max_steps": 7950,
1781
+ "num_input_tokens_seen": 0,
1782
+ "num_train_epochs": 100,
1783
+ "save_steps": 1000,
1784
+ "stateful_callbacks": {
1785
+ "TrainerControl": {
1786
+ "args": {
1787
+ "should_epoch_stop": false,
1788
+ "should_evaluate": false,
1789
+ "should_log": false,
1790
+ "should_save": true,
1791
+ "should_training_stop": true
1792
+ },
1793
+ "attributes": {}
1794
+ }
1795
+ },
1796
+ "total_flos": 1.566101899604656e+18,
1797
+ "train_batch_size": 4,
1798
+ "trial_name": null,
1799
+ "trial_params": null
1800
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f6d80e56946b670b563dd8c5e70630088c9699e7938abc4b3bbf4a4a0838343
3
+ size 6840
vocab.json ADDED
The diff for this file is too large to render. See raw diff