panda992 commited on
Commit
b25c508
·
verified ·
1 Parent(s): 2e63b3f

🍻 cheers

Browse files
README.md CHANGED
@@ -3,6 +3,7 @@ library_name: transformers
3
  license: apache-2.0
4
  base_model: panda992/fish_disease_datasets
5
  tags:
 
6
  - generated_from_trainer
7
  metrics:
8
  - accuracy
@@ -16,7 +17,7 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # fish_disease_datasets
18
 
19
- This model is a fine-tuned version of [panda992/fish_disease_datasets](https://huggingface.co/panda992/fish_disease_datasets) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
  - Loss: 0.0675
22
  - Accuracy: 0.9810
 
3
  license: apache-2.0
4
  base_model: panda992/fish_disease_datasets
5
  tags:
6
+ - image-classification
7
  - generated_from_trainer
8
  metrics:
9
  - accuracy
 
17
 
18
  # fish_disease_datasets
19
 
20
+ This model is a fine-tuned version of [panda992/fish_disease_datasets](https://huggingface.co/panda992/fish_disease_datasets) on the fish_disease_datasets dataset.
21
  It achieves the following results on the evaluation set:
22
  - Loss: 0.0675
23
  - Accuracy: 0.9810
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 4.0,
3
- "eval_accuracy": 0.9728260869565217,
4
- "eval_loss": 0.10525541752576828,
5
- "eval_runtime": 1.7228,
6
- "eval_samples_per_second": 213.607,
7
- "eval_steps_per_second": 26.701,
8
  "total_flos": 6.45382209997357e+17,
9
- "train_loss": 0.4479620875293062,
10
- "train_runtime": 145.1835,
11
- "train_samples_per_second": 57.362,
12
- "train_steps_per_second": 3.609
13
  }
 
1
  {
2
  "epoch": 4.0,
3
+ "eval_accuracy": 0.9809782608695652,
4
+ "eval_loss": 0.0674990862607956,
5
+ "eval_runtime": 1.7056,
6
+ "eval_samples_per_second": 215.763,
7
+ "eval_steps_per_second": 26.97,
8
  "total_flos": 6.45382209997357e+17,
9
+ "train_loss": 0.13892418100633694,
10
+ "train_runtime": 142.587,
11
+ "train_samples_per_second": 58.406,
12
+ "train_steps_per_second": 3.675
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 4.0,
3
- "eval_accuracy": 0.9728260869565217,
4
- "eval_loss": 0.10525541752576828,
5
- "eval_runtime": 1.7228,
6
- "eval_samples_per_second": 213.607,
7
- "eval_steps_per_second": 26.701
8
  }
 
1
  {
2
  "epoch": 4.0,
3
+ "eval_accuracy": 0.9809782608695652,
4
+ "eval_loss": 0.0674990862607956,
5
+ "eval_runtime": 1.7056,
6
+ "eval_samples_per_second": 215.763,
7
+ "eval_steps_per_second": 26.97
8
  }
runs/May10_06-47-56_35deed949fff/events.out.tfevents.1746859873.35deed949fff.185.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb9c7c10eebac90047314e50b42f8b2322a33fb966e97b57edd5d2f795b919cb
3
+ size 411
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 4.0,
3
  "total_flos": 6.45382209997357e+17,
4
- "train_loss": 0.4479620875293062,
5
- "train_runtime": 145.1835,
6
- "train_samples_per_second": 57.362,
7
- "train_steps_per_second": 3.609
8
  }
 
1
  {
2
  "epoch": 4.0,
3
  "total_flos": 6.45382209997357e+17,
4
+ "train_loss": 0.13892418100633694,
5
+ "train_runtime": 142.587,
6
+ "train_samples_per_second": 58.406,
7
+ "train_steps_per_second": 3.675
8
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 500,
3
- "best_metric": 0.10525541752576828,
4
  "best_model_checkpoint": "fish_disease_datasets/checkpoint-500",
5
  "epoch": 4.0,
6
  "eval_steps": 100,
@@ -11,421 +11,421 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.07633587786259542,
14
- "grad_norm": 1.9409185647964478,
15
  "learning_rate": 0.00019656488549618322,
16
- "loss": 1.8218,
17
  "step": 10
18
  },
19
  {
20
  "epoch": 0.15267175572519084,
21
- "grad_norm": 2.1680898666381836,
22
  "learning_rate": 0.00019274809160305345,
23
- "loss": 1.5952,
24
  "step": 20
25
  },
26
  {
27
  "epoch": 0.22900763358778625,
28
- "grad_norm": 2.6097843647003174,
29
  "learning_rate": 0.00018893129770992367,
30
- "loss": 1.3356,
31
  "step": 30
32
  },
33
  {
34
  "epoch": 0.3053435114503817,
35
- "grad_norm": 3.603273391723633,
36
  "learning_rate": 0.0001851145038167939,
37
- "loss": 1.2841,
38
  "step": 40
39
  },
40
  {
41
  "epoch": 0.3816793893129771,
42
- "grad_norm": 2.5564615726470947,
43
  "learning_rate": 0.00018129770992366412,
44
- "loss": 1.1569,
45
  "step": 50
46
  },
47
  {
48
  "epoch": 0.4580152671755725,
49
- "grad_norm": 1.7292225360870361,
50
  "learning_rate": 0.00017748091603053437,
51
- "loss": 0.9338,
52
  "step": 60
53
  },
54
  {
55
  "epoch": 0.5343511450381679,
56
- "grad_norm": 2.0024733543395996,
57
  "learning_rate": 0.0001736641221374046,
58
- "loss": 0.8578,
59
  "step": 70
60
  },
61
  {
62
  "epoch": 0.6106870229007634,
63
- "grad_norm": 3.2344448566436768,
64
  "learning_rate": 0.00016984732824427482,
65
- "loss": 0.9114,
66
  "step": 80
67
  },
68
  {
69
  "epoch": 0.6870229007633588,
70
- "grad_norm": 2.080090045928955,
71
  "learning_rate": 0.00016603053435114505,
72
- "loss": 0.7153,
73
  "step": 90
74
  },
75
  {
76
  "epoch": 0.7633587786259542,
77
- "grad_norm": 3.047137975692749,
78
  "learning_rate": 0.00016221374045801527,
79
- "loss": 0.7341,
80
  "step": 100
81
  },
82
  {
83
  "epoch": 0.7633587786259542,
84
- "eval_accuracy": 0.8994565217391305,
85
- "eval_loss": 0.47246649861335754,
86
- "eval_runtime": 1.6845,
87
- "eval_samples_per_second": 218.467,
88
- "eval_steps_per_second": 27.308,
89
  "step": 100
90
  },
91
  {
92
  "epoch": 0.8396946564885496,
93
- "grad_norm": 2.9488649368286133,
94
  "learning_rate": 0.0001583969465648855,
95
- "loss": 0.6839,
96
  "step": 110
97
  },
98
  {
99
  "epoch": 0.916030534351145,
100
- "grad_norm": 7.901376724243164,
101
  "learning_rate": 0.00015458015267175574,
102
- "loss": 0.6448,
103
  "step": 120
104
  },
105
  {
106
  "epoch": 0.9923664122137404,
107
- "grad_norm": 2.6855387687683105,
108
  "learning_rate": 0.00015076335877862594,
109
- "loss": 0.5792,
110
  "step": 130
111
  },
112
  {
113
  "epoch": 1.0687022900763359,
114
- "grad_norm": 3.498868703842163,
115
  "learning_rate": 0.0001469465648854962,
116
- "loss": 0.4598,
117
  "step": 140
118
  },
119
  {
120
  "epoch": 1.1450381679389312,
121
- "grad_norm": 1.6161444187164307,
122
  "learning_rate": 0.00014312977099236642,
123
- "loss": 0.346,
124
  "step": 150
125
  },
126
  {
127
  "epoch": 1.2213740458015268,
128
- "grad_norm": 3.7343335151672363,
129
  "learning_rate": 0.00013931297709923664,
130
- "loss": 0.3722,
131
  "step": 160
132
  },
133
  {
134
  "epoch": 1.297709923664122,
135
- "grad_norm": 3.1330108642578125,
136
  "learning_rate": 0.0001354961832061069,
137
- "loss": 0.3455,
138
  "step": 170
139
  },
140
  {
141
  "epoch": 1.3740458015267176,
142
- "grad_norm": 3.1974072456359863,
143
  "learning_rate": 0.0001316793893129771,
144
- "loss": 0.4522,
145
  "step": 180
146
  },
147
  {
148
  "epoch": 1.450381679389313,
149
- "grad_norm": 4.500912189483643,
150
  "learning_rate": 0.00012786259541984734,
151
- "loss": 0.5162,
152
  "step": 190
153
  },
154
  {
155
  "epoch": 1.5267175572519083,
156
- "grad_norm": 1.8331102132797241,
157
  "learning_rate": 0.00012404580152671757,
158
- "loss": 0.4232,
159
  "step": 200
160
  },
161
  {
162
  "epoch": 1.5267175572519083,
163
- "eval_accuracy": 0.9538043478260869,
164
- "eval_loss": 0.24539989233016968,
165
- "eval_runtime": 1.9758,
166
- "eval_samples_per_second": 186.25,
167
- "eval_steps_per_second": 23.281,
168
  "step": 200
169
  },
170
  {
171
  "epoch": 1.6030534351145038,
172
- "grad_norm": 5.288910388946533,
173
  "learning_rate": 0.0001202290076335878,
174
- "loss": 0.3414,
175
  "step": 210
176
  },
177
  {
178
  "epoch": 1.6793893129770994,
179
- "grad_norm": 3.0168659687042236,
180
  "learning_rate": 0.00011641221374045803,
181
- "loss": 0.3141,
182
  "step": 220
183
  },
184
  {
185
  "epoch": 1.7557251908396947,
186
- "grad_norm": 3.6415915489196777,
187
  "learning_rate": 0.00011259541984732824,
188
- "loss": 0.3561,
189
  "step": 230
190
  },
191
  {
192
  "epoch": 1.83206106870229,
193
- "grad_norm": 4.600650787353516,
194
  "learning_rate": 0.00010877862595419848,
195
- "loss": 0.3937,
196
  "step": 240
197
  },
198
  {
199
  "epoch": 1.9083969465648853,
200
- "grad_norm": 1.985364317893982,
201
  "learning_rate": 0.00010496183206106871,
202
- "loss": 0.325,
203
  "step": 250
204
  },
205
  {
206
  "epoch": 1.984732824427481,
207
- "grad_norm": 4.19254732131958,
208
  "learning_rate": 0.00010114503816793894,
209
- "loss": 0.3214,
210
  "step": 260
211
  },
212
  {
213
  "epoch": 2.0610687022900764,
214
- "grad_norm": 0.3229253888130188,
215
  "learning_rate": 9.732824427480916e-05,
216
- "loss": 0.2053,
217
  "step": 270
218
  },
219
  {
220
  "epoch": 2.1374045801526718,
221
- "grad_norm": 4.743760108947754,
222
  "learning_rate": 9.351145038167939e-05,
223
- "loss": 0.3318,
224
  "step": 280
225
  },
226
  {
227
  "epoch": 2.213740458015267,
228
- "grad_norm": 2.43632435798645,
229
  "learning_rate": 8.969465648854962e-05,
230
- "loss": 0.2364,
231
  "step": 290
232
  },
233
  {
234
  "epoch": 2.2900763358778624,
235
- "grad_norm": 0.9254801869392395,
236
  "learning_rate": 8.587786259541986e-05,
237
- "loss": 0.1134,
238
  "step": 300
239
  },
240
  {
241
  "epoch": 2.2900763358778624,
242
- "eval_accuracy": 0.9755434782608695,
243
- "eval_loss": 0.16888892650604248,
244
- "eval_runtime": 1.9035,
245
- "eval_samples_per_second": 193.324,
246
- "eval_steps_per_second": 24.166,
247
  "step": 300
248
  },
249
  {
250
  "epoch": 2.366412213740458,
251
- "grad_norm": 6.0736894607543945,
252
  "learning_rate": 8.206106870229007e-05,
253
- "loss": 0.2712,
254
  "step": 310
255
  },
256
  {
257
  "epoch": 2.4427480916030535,
258
- "grad_norm": 7.306392192840576,
259
  "learning_rate": 7.824427480916031e-05,
260
- "loss": 0.3705,
261
  "step": 320
262
  },
263
  {
264
  "epoch": 2.519083969465649,
265
- "grad_norm": 0.37736839056015015,
266
  "learning_rate": 7.442748091603053e-05,
267
- "loss": 0.2431,
268
  "step": 330
269
  },
270
  {
271
  "epoch": 2.595419847328244,
272
- "grad_norm": 3.2378218173980713,
273
  "learning_rate": 7.061068702290077e-05,
274
- "loss": 0.2495,
275
  "step": 340
276
  },
277
  {
278
  "epoch": 2.67175572519084,
279
- "grad_norm": 4.0994086265563965,
280
  "learning_rate": 6.6793893129771e-05,
281
- "loss": 0.2218,
282
  "step": 350
283
  },
284
  {
285
  "epoch": 2.7480916030534353,
286
- "grad_norm": 0.19563277065753937,
287
  "learning_rate": 6.297709923664122e-05,
288
- "loss": 0.2717,
289
  "step": 360
290
  },
291
  {
292
  "epoch": 2.8244274809160306,
293
- "grad_norm": 2.251434803009033,
294
  "learning_rate": 5.916030534351146e-05,
295
- "loss": 0.134,
296
  "step": 370
297
  },
298
  {
299
  "epoch": 2.900763358778626,
300
- "grad_norm": 2.093945026397705,
301
  "learning_rate": 5.534351145038168e-05,
302
- "loss": 0.1724,
303
  "step": 380
304
  },
305
  {
306
  "epoch": 2.9770992366412212,
307
- "grad_norm": 2.778428554534912,
308
  "learning_rate": 5.152671755725191e-05,
309
- "loss": 0.2283,
310
  "step": 390
311
  },
312
  {
313
  "epoch": 3.053435114503817,
314
- "grad_norm": 6.020068168640137,
315
  "learning_rate": 4.7709923664122144e-05,
316
- "loss": 0.2237,
317
  "step": 400
318
  },
319
  {
320
  "epoch": 3.053435114503817,
321
- "eval_accuracy": 0.9592391304347826,
322
- "eval_loss": 0.16388548910617828,
323
- "eval_runtime": 1.6041,
324
- "eval_samples_per_second": 229.414,
325
- "eval_steps_per_second": 28.677,
326
  "step": 400
327
  },
328
  {
329
  "epoch": 3.1297709923664123,
330
- "grad_norm": 0.23176027834415436,
331
  "learning_rate": 4.389312977099237e-05,
332
- "loss": 0.2231,
333
  "step": 410
334
  },
335
  {
336
  "epoch": 3.2061068702290076,
337
- "grad_norm": 2.9403698444366455,
338
  "learning_rate": 4.00763358778626e-05,
339
- "loss": 0.1379,
340
  "step": 420
341
  },
342
  {
343
  "epoch": 3.282442748091603,
344
- "grad_norm": 3.401761531829834,
345
  "learning_rate": 3.625954198473282e-05,
346
- "loss": 0.2339,
347
  "step": 430
348
  },
349
  {
350
  "epoch": 3.3587786259541983,
351
- "grad_norm": 2.4639177322387695,
352
  "learning_rate": 3.2442748091603054e-05,
353
- "loss": 0.1795,
354
  "step": 440
355
  },
356
  {
357
  "epoch": 3.435114503816794,
358
- "grad_norm": 3.926365613937378,
359
  "learning_rate": 2.862595419847328e-05,
360
- "loss": 0.1326,
361
  "step": 450
362
  },
363
  {
364
  "epoch": 3.5114503816793894,
365
- "grad_norm": 3.4511287212371826,
366
  "learning_rate": 2.4809160305343512e-05,
367
- "loss": 0.2162,
368
  "step": 460
369
  },
370
  {
371
  "epoch": 3.5877862595419847,
372
- "grad_norm": 0.1071397215127945,
373
  "learning_rate": 2.099236641221374e-05,
374
- "loss": 0.0821,
375
  "step": 470
376
  },
377
  {
378
  "epoch": 3.66412213740458,
379
- "grad_norm": 0.11275670677423477,
380
  "learning_rate": 1.717557251908397e-05,
381
- "loss": 0.0965,
382
  "step": 480
383
  },
384
  {
385
  "epoch": 3.7404580152671754,
386
- "grad_norm": 1.8266428709030151,
387
  "learning_rate": 1.3358778625954198e-05,
388
- "loss": 0.2015,
389
  "step": 490
390
  },
391
  {
392
  "epoch": 3.816793893129771,
393
- "grad_norm": 1.146474003791809,
394
  "learning_rate": 9.541984732824428e-06,
395
- "loss": 0.1431,
396
  "step": 500
397
  },
398
  {
399
  "epoch": 3.816793893129771,
400
- "eval_accuracy": 0.9728260869565217,
401
- "eval_loss": 0.10525541752576828,
402
- "eval_runtime": 1.5956,
403
- "eval_samples_per_second": 230.634,
404
- "eval_steps_per_second": 28.829,
405
  "step": 500
406
  },
407
  {
408
  "epoch": 3.8931297709923665,
409
- "grad_norm": 4.4021992683410645,
410
  "learning_rate": 5.725190839694657e-06,
411
- "loss": 0.1294,
412
  "step": 510
413
  },
414
  {
415
  "epoch": 3.969465648854962,
416
- "grad_norm": 0.27708983421325684,
417
  "learning_rate": 1.908396946564886e-06,
418
- "loss": 0.1359,
419
  "step": 520
420
  },
421
  {
422
  "epoch": 4.0,
423
  "step": 524,
424
  "total_flos": 6.45382209997357e+17,
425
- "train_loss": 0.4479620875293062,
426
- "train_runtime": 145.1835,
427
- "train_samples_per_second": 57.362,
428
- "train_steps_per_second": 3.609
429
  }
430
  ],
431
  "logging_steps": 10,
 
1
  {
2
  "best_global_step": 500,
3
+ "best_metric": 0.0674990862607956,
4
  "best_model_checkpoint": "fish_disease_datasets/checkpoint-500",
5
  "epoch": 4.0,
6
  "eval_steps": 100,
 
11
  "log_history": [
12
  {
13
  "epoch": 0.07633587786259542,
14
+ "grad_norm": 8.140775680541992,
15
  "learning_rate": 0.00019656488549618322,
16
+ "loss": 0.2332,
17
  "step": 10
18
  },
19
  {
20
  "epoch": 0.15267175572519084,
21
+ "grad_norm": 5.731508731842041,
22
  "learning_rate": 0.00019274809160305345,
23
+ "loss": 0.2397,
24
  "step": 20
25
  },
26
  {
27
  "epoch": 0.22900763358778625,
28
+ "grad_norm": 4.3719563484191895,
29
  "learning_rate": 0.00018893129770992367,
30
+ "loss": 0.2881,
31
  "step": 30
32
  },
33
  {
34
  "epoch": 0.3053435114503817,
35
+ "grad_norm": 2.272369623184204,
36
  "learning_rate": 0.0001851145038167939,
37
+ "loss": 0.2598,
38
  "step": 40
39
  },
40
  {
41
  "epoch": 0.3816793893129771,
42
+ "grad_norm": 3.4309451580047607,
43
  "learning_rate": 0.00018129770992366412,
44
+ "loss": 0.1827,
45
  "step": 50
46
  },
47
  {
48
  "epoch": 0.4580152671755725,
49
+ "grad_norm": 1.1197692155838013,
50
  "learning_rate": 0.00017748091603053437,
51
+ "loss": 0.152,
52
  "step": 60
53
  },
54
  {
55
  "epoch": 0.5343511450381679,
56
+ "grad_norm": 4.275225639343262,
57
  "learning_rate": 0.0001736641221374046,
58
+ "loss": 0.2208,
59
  "step": 70
60
  },
61
  {
62
  "epoch": 0.6106870229007634,
63
+ "grad_norm": 3.2157912254333496,
64
  "learning_rate": 0.00016984732824427482,
65
+ "loss": 0.3555,
66
  "step": 80
67
  },
68
  {
69
  "epoch": 0.6870229007633588,
70
+ "grad_norm": 5.242416858673096,
71
  "learning_rate": 0.00016603053435114505,
72
+ "loss": 0.163,
73
  "step": 90
74
  },
75
  {
76
  "epoch": 0.7633587786259542,
77
+ "grad_norm": 0.5541224479675293,
78
  "learning_rate": 0.00016221374045801527,
79
+ "loss": 0.2052,
80
  "step": 100
81
  },
82
  {
83
  "epoch": 0.7633587786259542,
84
+ "eval_accuracy": 0.9483695652173914,
85
+ "eval_loss": 0.1684044897556305,
86
+ "eval_runtime": 1.6021,
87
+ "eval_samples_per_second": 229.704,
88
+ "eval_steps_per_second": 28.713,
89
  "step": 100
90
  },
91
  {
92
  "epoch": 0.8396946564885496,
93
+ "grad_norm": 0.14907874166965485,
94
  "learning_rate": 0.0001583969465648855,
95
+ "loss": 0.1426,
96
  "step": 110
97
  },
98
  {
99
  "epoch": 0.916030534351145,
100
+ "grad_norm": 1.8745291233062744,
101
  "learning_rate": 0.00015458015267175574,
102
+ "loss": 0.1216,
103
  "step": 120
104
  },
105
  {
106
  "epoch": 0.9923664122137404,
107
+ "grad_norm": 5.384662628173828,
108
  "learning_rate": 0.00015076335877862594,
109
+ "loss": 0.17,
110
  "step": 130
111
  },
112
  {
113
  "epoch": 1.0687022900763359,
114
+ "grad_norm": 4.790631294250488,
115
  "learning_rate": 0.0001469465648854962,
116
+ "loss": 0.0845,
117
  "step": 140
118
  },
119
  {
120
  "epoch": 1.1450381679389312,
121
+ "grad_norm": 4.6353230476379395,
122
  "learning_rate": 0.00014312977099236642,
123
+ "loss": 0.1092,
124
  "step": 150
125
  },
126
  {
127
  "epoch": 1.2213740458015268,
128
+ "grad_norm": 0.8423321843147278,
129
  "learning_rate": 0.00013931297709923664,
130
+ "loss": 0.126,
131
  "step": 160
132
  },
133
  {
134
  "epoch": 1.297709923664122,
135
+ "grad_norm": 0.4985540211200714,
136
  "learning_rate": 0.0001354961832061069,
137
+ "loss": 0.1103,
138
  "step": 170
139
  },
140
  {
141
  "epoch": 1.3740458015267176,
142
+ "grad_norm": 4.837319850921631,
143
  "learning_rate": 0.0001316793893129771,
144
+ "loss": 0.1849,
145
  "step": 180
146
  },
147
  {
148
  "epoch": 1.450381679389313,
149
+ "grad_norm": 4.57224702835083,
150
  "learning_rate": 0.00012786259541984734,
151
+ "loss": 0.195,
152
  "step": 190
153
  },
154
  {
155
  "epoch": 1.5267175572519083,
156
+ "grad_norm": 0.5998224020004272,
157
  "learning_rate": 0.00012404580152671757,
158
+ "loss": 0.05,
159
  "step": 200
160
  },
161
  {
162
  "epoch": 1.5267175572519083,
163
+ "eval_accuracy": 0.9592391304347826,
164
+ "eval_loss": 0.16003353893756866,
165
+ "eval_runtime": 1.5924,
166
+ "eval_samples_per_second": 231.101,
167
+ "eval_steps_per_second": 28.888,
168
  "step": 200
169
  },
170
  {
171
  "epoch": 1.6030534351145038,
172
+ "grad_norm": 2.8493752479553223,
173
  "learning_rate": 0.0001202290076335878,
174
+ "loss": 0.3001,
175
  "step": 210
176
  },
177
  {
178
  "epoch": 1.6793893129770994,
179
+ "grad_norm": 0.12870310246944427,
180
  "learning_rate": 0.00011641221374045803,
181
+ "loss": 0.0944,
182
  "step": 220
183
  },
184
  {
185
  "epoch": 1.7557251908396947,
186
+ "grad_norm": 6.482563018798828,
187
  "learning_rate": 0.00011259541984732824,
188
+ "loss": 0.0905,
189
  "step": 230
190
  },
191
  {
192
  "epoch": 1.83206106870229,
193
+ "grad_norm": 1.615530252456665,
194
  "learning_rate": 0.00010877862595419848,
195
+ "loss": 0.1326,
196
  "step": 240
197
  },
198
  {
199
  "epoch": 1.9083969465648853,
200
+ "grad_norm": 2.059842586517334,
201
  "learning_rate": 0.00010496183206106871,
202
+ "loss": 0.1274,
203
  "step": 250
204
  },
205
  {
206
  "epoch": 1.984732824427481,
207
+ "grad_norm": 2.750730276107788,
208
  "learning_rate": 0.00010114503816793894,
209
+ "loss": 0.2225,
210
  "step": 260
211
  },
212
  {
213
  "epoch": 2.0610687022900764,
214
+ "grad_norm": 0.04552418366074562,
215
  "learning_rate": 9.732824427480916e-05,
216
+ "loss": 0.0418,
217
  "step": 270
218
  },
219
  {
220
  "epoch": 2.1374045801526718,
221
+ "grad_norm": 3.4131999015808105,
222
  "learning_rate": 9.351145038167939e-05,
223
+ "loss": 0.1439,
224
  "step": 280
225
  },
226
  {
227
  "epoch": 2.213740458015267,
228
+ "grad_norm": 1.8281826972961426,
229
  "learning_rate": 8.969465648854962e-05,
230
+ "loss": 0.1084,
231
  "step": 290
232
  },
233
  {
234
  "epoch": 2.2900763358778624,
235
+ "grad_norm": 0.03664281591773033,
236
  "learning_rate": 8.587786259541986e-05,
237
+ "loss": 0.0579,
238
  "step": 300
239
  },
240
  {
241
  "epoch": 2.2900763358778624,
242
+ "eval_accuracy": 0.9510869565217391,
243
+ "eval_loss": 0.161894753575325,
244
+ "eval_runtime": 1.5917,
245
+ "eval_samples_per_second": 231.199,
246
+ "eval_steps_per_second": 28.9,
247
  "step": 300
248
  },
249
  {
250
  "epoch": 2.366412213740458,
251
+ "grad_norm": 10.416833877563477,
252
  "learning_rate": 8.206106870229007e-05,
253
+ "loss": 0.1502,
254
  "step": 310
255
  },
256
  {
257
  "epoch": 2.4427480916030535,
258
+ "grad_norm": 2.426678419113159,
259
  "learning_rate": 7.824427480916031e-05,
260
+ "loss": 0.2334,
261
  "step": 320
262
  },
263
  {
264
  "epoch": 2.519083969465649,
265
+ "grad_norm": 5.191472053527832,
266
  "learning_rate": 7.442748091603053e-05,
267
+ "loss": 0.0829,
268
  "step": 330
269
  },
270
  {
271
  "epoch": 2.595419847328244,
272
+ "grad_norm": 2.1241517066955566,
273
  "learning_rate": 7.061068702290077e-05,
274
+ "loss": 0.066,
275
  "step": 340
276
  },
277
  {
278
  "epoch": 2.67175572519084,
279
+ "grad_norm": 0.10309349745512009,
280
  "learning_rate": 6.6793893129771e-05,
281
+ "loss": 0.1356,
282
  "step": 350
283
  },
284
  {
285
  "epoch": 2.7480916030534353,
286
+ "grad_norm": 0.8078840374946594,
287
  "learning_rate": 6.297709923664122e-05,
288
+ "loss": 0.09,
289
  "step": 360
290
  },
291
  {
292
  "epoch": 2.8244274809160306,
293
+ "grad_norm": 0.7653095126152039,
294
  "learning_rate": 5.916030534351146e-05,
295
+ "loss": 0.0678,
296
  "step": 370
297
  },
298
  {
299
  "epoch": 2.900763358778626,
300
+ "grad_norm": 0.1392490267753601,
301
  "learning_rate": 5.534351145038168e-05,
302
+ "loss": 0.0532,
303
  "step": 380
304
  },
305
  {
306
  "epoch": 2.9770992366412212,
307
+ "grad_norm": 7.884419918060303,
308
  "learning_rate": 5.152671755725191e-05,
309
+ "loss": 0.1382,
310
  "step": 390
311
  },
312
  {
313
  "epoch": 3.053435114503817,
314
+ "grad_norm": 10.337115287780762,
315
  "learning_rate": 4.7709923664122144e-05,
316
+ "loss": 0.1181,
317
  "step": 400
318
  },
319
  {
320
  "epoch": 3.053435114503817,
321
+ "eval_accuracy": 0.9809782608695652,
322
+ "eval_loss": 0.06854937225580215,
323
+ "eval_runtime": 2.5796,
324
+ "eval_samples_per_second": 142.659,
325
+ "eval_steps_per_second": 17.832,
326
  "step": 400
327
  },
328
  {
329
  "epoch": 3.1297709923664123,
330
+ "grad_norm": 0.06403453648090363,
331
  "learning_rate": 4.389312977099237e-05,
332
+ "loss": 0.1245,
333
  "step": 410
334
  },
335
  {
336
  "epoch": 3.2061068702290076,
337
+ "grad_norm": 3.0858404636383057,
338
  "learning_rate": 4.00763358778626e-05,
339
+ "loss": 0.0901,
340
  "step": 420
341
  },
342
  {
343
  "epoch": 3.282442748091603,
344
+ "grad_norm": 1.1947516202926636,
345
  "learning_rate": 3.625954198473282e-05,
346
+ "loss": 0.1319,
347
  "step": 430
348
  },
349
  {
350
  "epoch": 3.3587786259541983,
351
+ "grad_norm": 2.4845542907714844,
352
  "learning_rate": 3.2442748091603054e-05,
353
+ "loss": 0.0932,
354
  "step": 440
355
  },
356
  {
357
  "epoch": 3.435114503816794,
358
+ "grad_norm": 0.7602748870849609,
359
  "learning_rate": 2.862595419847328e-05,
360
+ "loss": 0.0749,
361
  "step": 450
362
  },
363
  {
364
  "epoch": 3.5114503816793894,
365
+ "grad_norm": 3.669577121734619,
366
  "learning_rate": 2.4809160305343512e-05,
367
+ "loss": 0.1067,
368
  "step": 460
369
  },
370
  {
371
  "epoch": 3.5877862595419847,
372
+ "grad_norm": 0.02125421166419983,
373
  "learning_rate": 2.099236641221374e-05,
374
+ "loss": 0.0278,
375
  "step": 470
376
  },
377
  {
378
  "epoch": 3.66412213740458,
379
+ "grad_norm": 0.025344278663396835,
380
  "learning_rate": 1.717557251908397e-05,
381
+ "loss": 0.0717,
382
  "step": 480
383
  },
384
  {
385
  "epoch": 3.7404580152671754,
386
+ "grad_norm": 0.454380601644516,
387
  "learning_rate": 1.3358778625954198e-05,
388
+ "loss": 0.1241,
389
  "step": 490
390
  },
391
  {
392
  "epoch": 3.816793893129771,
393
+ "grad_norm": 0.49031591415405273,
394
  "learning_rate": 9.541984732824428e-06,
395
+ "loss": 0.1321,
396
  "step": 500
397
  },
398
  {
399
  "epoch": 3.816793893129771,
400
+ "eval_accuracy": 0.9809782608695652,
401
+ "eval_loss": 0.0674990862607956,
402
+ "eval_runtime": 1.6192,
403
+ "eval_samples_per_second": 227.276,
404
+ "eval_steps_per_second": 28.409,
405
  "step": 500
406
  },
407
  {
408
  "epoch": 3.8931297709923665,
409
+ "grad_norm": 7.950806617736816,
410
  "learning_rate": 5.725190839694657e-06,
411
+ "loss": 0.1726,
412
  "step": 510
413
  },
414
  {
415
  "epoch": 3.969465648854962,
416
+ "grad_norm": 0.29777708649635315,
417
  "learning_rate": 1.908396946564886e-06,
418
+ "loss": 0.0653,
419
  "step": 520
420
  },
421
  {
422
  "epoch": 4.0,
423
  "step": 524,
424
  "total_flos": 6.45382209997357e+17,
425
+ "train_loss": 0.13892418100633694,
426
+ "train_runtime": 142.587,
427
+ "train_samples_per_second": 58.406,
428
+ "train_steps_per_second": 3.675
429
  }
430
  ],
431
  "logging_steps": 10,