| { | |
| "best_global_step": 500, | |
| "best_metric": 0.08664274215698242, | |
| "best_model_checkpoint": "fish_disease_datasets/checkpoint-500", | |
| "epoch": 4.0, | |
| "eval_steps": 100, | |
| "global_step": 524, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.07633587786259542, | |
| "grad_norm": 2.0032925605773926, | |
| "learning_rate": 0.00019656488549618322, | |
| "loss": 1.8167, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.15267175572519084, | |
| "grad_norm": 2.0694987773895264, | |
| "learning_rate": 0.00019274809160305345, | |
| "loss": 1.5113, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.22900763358778625, | |
| "grad_norm": 1.8945856094360352, | |
| "learning_rate": 0.00018893129770992367, | |
| "loss": 1.3072, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.3053435114503817, | |
| "grad_norm": 1.994113564491272, | |
| "learning_rate": 0.0001851145038167939, | |
| "loss": 1.0743, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.3816793893129771, | |
| "grad_norm": 2.0848724842071533, | |
| "learning_rate": 0.00018129770992366412, | |
| "loss": 0.7789, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.4580152671755725, | |
| "grad_norm": 2.2140491008758545, | |
| "learning_rate": 0.00017748091603053437, | |
| "loss": 0.7127, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.5343511450381679, | |
| "grad_norm": 2.714928388595581, | |
| "learning_rate": 0.0001736641221374046, | |
| "loss": 0.592, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.6106870229007634, | |
| "grad_norm": 0.919693112373352, | |
| "learning_rate": 0.00016984732824427482, | |
| "loss": 0.4939, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.6870229007633588, | |
| "grad_norm": 3.400426149368286, | |
| "learning_rate": 0.00016603053435114505, | |
| "loss": 0.4973, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.7633587786259542, | |
| "grad_norm": 6.015654563903809, | |
| "learning_rate": 0.00016221374045801527, | |
| "loss": 0.3865, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.7633587786259542, | |
| "eval_accuracy": 0.8913043478260869, | |
| "eval_loss": 0.4161355495452881, | |
| "eval_runtime": 1.7389, | |
| "eval_samples_per_second": 211.632, | |
| "eval_steps_per_second": 26.454, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.8396946564885496, | |
| "grad_norm": 2.465869665145874, | |
| "learning_rate": 0.0001583969465648855, | |
| "loss": 0.3979, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.916030534351145, | |
| "grad_norm": 3.375448226928711, | |
| "learning_rate": 0.00015458015267175574, | |
| "loss": 0.3569, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.9923664122137404, | |
| "grad_norm": 1.7015740871429443, | |
| "learning_rate": 0.00015076335877862594, | |
| "loss": 0.4148, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.0687022900763359, | |
| "grad_norm": 0.402433305978775, | |
| "learning_rate": 0.0001469465648854962, | |
| "loss": 0.2044, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.1450381679389312, | |
| "grad_norm": 2.2606430053710938, | |
| "learning_rate": 0.00014312977099236642, | |
| "loss": 0.1364, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.2213740458015268, | |
| "grad_norm": 1.087276816368103, | |
| "learning_rate": 0.00013931297709923664, | |
| "loss": 0.2527, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.297709923664122, | |
| "grad_norm": 3.1737782955169678, | |
| "learning_rate": 0.0001354961832061069, | |
| "loss": 0.1463, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.3740458015267176, | |
| "grad_norm": 0.22228187322616577, | |
| "learning_rate": 0.0001316793893129771, | |
| "loss": 0.136, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.450381679389313, | |
| "grad_norm": 6.465834617614746, | |
| "learning_rate": 0.00012786259541984734, | |
| "loss": 0.1747, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.5267175572519083, | |
| "grad_norm": 1.6935511827468872, | |
| "learning_rate": 0.00012404580152671757, | |
| "loss": 0.1206, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.5267175572519083, | |
| "eval_accuracy": 0.9456521739130435, | |
| "eval_loss": 0.2170080989599228, | |
| "eval_runtime": 1.5107, | |
| "eval_samples_per_second": 243.59, | |
| "eval_steps_per_second": 30.449, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.6030534351145038, | |
| "grad_norm": 2.237194061279297, | |
| "learning_rate": 0.0001202290076335878, | |
| "loss": 0.1119, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.6793893129770994, | |
| "grad_norm": 0.3265667259693146, | |
| "learning_rate": 0.00011641221374045803, | |
| "loss": 0.1954, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.7557251908396947, | |
| "grad_norm": 4.540020942687988, | |
| "learning_rate": 0.00011259541984732824, | |
| "loss": 0.1225, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.83206106870229, | |
| "grad_norm": 4.758110046386719, | |
| "learning_rate": 0.00010877862595419848, | |
| "loss": 0.1258, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.9083969465648853, | |
| "grad_norm": 0.1943335086107254, | |
| "learning_rate": 0.00010496183206106871, | |
| "loss": 0.0803, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.984732824427481, | |
| "grad_norm": 3.112128734588623, | |
| "learning_rate": 0.00010114503816793894, | |
| "loss": 0.0871, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 2.0610687022900764, | |
| "grad_norm": 2.004229784011841, | |
| "learning_rate": 9.732824427480916e-05, | |
| "loss": 0.1014, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 2.1374045801526718, | |
| "grad_norm": 2.089447498321533, | |
| "learning_rate": 9.351145038167939e-05, | |
| "loss": 0.0635, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 2.213740458015267, | |
| "grad_norm": 0.177068829536438, | |
| "learning_rate": 8.969465648854962e-05, | |
| "loss": 0.0506, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 2.2900763358778624, | |
| "grad_norm": 1.701705813407898, | |
| "learning_rate": 8.587786259541986e-05, | |
| "loss": 0.1132, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.2900763358778624, | |
| "eval_accuracy": 0.967391304347826, | |
| "eval_loss": 0.13170665502548218, | |
| "eval_runtime": 1.8599, | |
| "eval_samples_per_second": 197.858, | |
| "eval_steps_per_second": 24.732, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.366412213740458, | |
| "grad_norm": 0.22077667713165283, | |
| "learning_rate": 8.206106870229007e-05, | |
| "loss": 0.0579, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 2.4427480916030535, | |
| "grad_norm": 4.027545928955078, | |
| "learning_rate": 7.824427480916031e-05, | |
| "loss": 0.0684, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 2.519083969465649, | |
| "grad_norm": 0.08583887666463852, | |
| "learning_rate": 7.442748091603053e-05, | |
| "loss": 0.0709, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 2.595419847328244, | |
| "grad_norm": 0.07327867299318314, | |
| "learning_rate": 7.061068702290077e-05, | |
| "loss": 0.0301, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 2.67175572519084, | |
| "grad_norm": 0.08430969715118408, | |
| "learning_rate": 6.6793893129771e-05, | |
| "loss": 0.0843, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 2.7480916030534353, | |
| "grad_norm": 0.08751753717660904, | |
| "learning_rate": 6.297709923664122e-05, | |
| "loss": 0.0345, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 2.8244274809160306, | |
| "grad_norm": 0.06909404695034027, | |
| "learning_rate": 5.916030534351146e-05, | |
| "loss": 0.0416, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 2.900763358778626, | |
| "grad_norm": 0.06500696390867233, | |
| "learning_rate": 5.534351145038168e-05, | |
| "loss": 0.066, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 2.9770992366412212, | |
| "grad_norm": 0.08221684396266937, | |
| "learning_rate": 5.152671755725191e-05, | |
| "loss": 0.0433, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 3.053435114503817, | |
| "grad_norm": 1.0126067399978638, | |
| "learning_rate": 4.7709923664122144e-05, | |
| "loss": 0.0547, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 3.053435114503817, | |
| "eval_accuracy": 0.9809782608695652, | |
| "eval_loss": 0.08785175532102585, | |
| "eval_runtime": 1.8079, | |
| "eval_samples_per_second": 203.548, | |
| "eval_steps_per_second": 25.443, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 3.1297709923664123, | |
| "grad_norm": 0.07123812288045883, | |
| "learning_rate": 4.389312977099237e-05, | |
| "loss": 0.0272, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 3.2061068702290076, | |
| "grad_norm": 0.06167351081967354, | |
| "learning_rate": 4.00763358778626e-05, | |
| "loss": 0.0386, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 3.282442748091603, | |
| "grad_norm": 0.06654069572687149, | |
| "learning_rate": 3.625954198473282e-05, | |
| "loss": 0.0312, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 3.3587786259541983, | |
| "grad_norm": 0.12739847600460052, | |
| "learning_rate": 3.2442748091603054e-05, | |
| "loss": 0.0446, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 3.435114503816794, | |
| "grad_norm": 0.8813410997390747, | |
| "learning_rate": 2.862595419847328e-05, | |
| "loss": 0.0417, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 3.5114503816793894, | |
| "grad_norm": 0.9467947483062744, | |
| "learning_rate": 2.4809160305343512e-05, | |
| "loss": 0.027, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 3.5877862595419847, | |
| "grad_norm": 0.22488470375537872, | |
| "learning_rate": 2.099236641221374e-05, | |
| "loss": 0.021, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 3.66412213740458, | |
| "grad_norm": 0.05164281651377678, | |
| "learning_rate": 1.717557251908397e-05, | |
| "loss": 0.0254, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 3.7404580152671754, | |
| "grad_norm": 0.055942848324775696, | |
| "learning_rate": 1.3358778625954198e-05, | |
| "loss": 0.0376, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 3.816793893129771, | |
| "grad_norm": 0.17778228223323822, | |
| "learning_rate": 9.541984732824428e-06, | |
| "loss": 0.0209, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 3.816793893129771, | |
| "eval_accuracy": 0.9728260869565217, | |
| "eval_loss": 0.08664274215698242, | |
| "eval_runtime": 1.5113, | |
| "eval_samples_per_second": 243.505, | |
| "eval_steps_per_second": 30.438, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 3.8931297709923665, | |
| "grad_norm": 0.05874630808830261, | |
| "learning_rate": 5.725190839694657e-06, | |
| "loss": 0.0193, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 3.969465648854962, | |
| "grad_norm": 0.07613587379455566, | |
| "learning_rate": 1.908396946564886e-06, | |
| "loss": 0.0316, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 524, | |
| "total_flos": 6.45382209997357e+17, | |
| "train_loss": 0.2574100203186501, | |
| "train_runtime": 180.8127, | |
| "train_samples_per_second": 46.059, | |
| "train_steps_per_second": 2.898 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 524, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 6.45382209997357e+17, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |