| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 6.0, | |
| "global_step": 9564, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.7386030949393564e-05, | |
| "loss": 1.4207, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "eval_accuracy": 0.9008888888888889, | |
| "eval_loss": 0.7066789269447327, | |
| "eval_runtime": 2.6965, | |
| "eval_samples_per_second": 3337.621, | |
| "eval_steps_per_second": 417.203, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 4.477206189878712e-05, | |
| "loss": 0.5086, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "eval_accuracy": 0.9516666666666667, | |
| "eval_loss": 0.3055577874183655, | |
| "eval_runtime": 2.6576, | |
| "eval_samples_per_second": 3386.509, | |
| "eval_steps_per_second": 423.314, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 4.215809284818068e-05, | |
| "loss": 0.2731, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "eval_accuracy": 0.9648888888888889, | |
| "eval_loss": 0.18555375933647156, | |
| "eval_runtime": 2.6597, | |
| "eval_samples_per_second": 3383.793, | |
| "eval_steps_per_second": 422.974, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 3.954412379757424e-05, | |
| "loss": 0.1976, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "eval_accuracy": 0.9701111111111111, | |
| "eval_loss": 0.14159560203552246, | |
| "eval_runtime": 2.715, | |
| "eval_samples_per_second": 3314.86, | |
| "eval_steps_per_second": 414.357, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 3.69301547469678e-05, | |
| "loss": 0.1565, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "eval_accuracy": 0.9738888888888889, | |
| "eval_loss": 0.11081045866012573, | |
| "eval_runtime": 2.6963, | |
| "eval_samples_per_second": 3337.905, | |
| "eval_steps_per_second": 417.238, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 3.431618569636136e-05, | |
| "loss": 0.128, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "eval_accuracy": 0.976, | |
| "eval_loss": 0.09747562557458878, | |
| "eval_runtime": 2.6961, | |
| "eval_samples_per_second": 3338.209, | |
| "eval_steps_per_second": 417.276, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 3.170221664575492e-05, | |
| "loss": 0.1133, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "eval_accuracy": 0.9788888888888889, | |
| "eval_loss": 0.08474569022655487, | |
| "eval_runtime": 2.7245, | |
| "eval_samples_per_second": 3303.375, | |
| "eval_steps_per_second": 412.922, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 2.9088247595148475e-05, | |
| "loss": 0.1031, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "eval_accuracy": 0.9804444444444445, | |
| "eval_loss": 0.07724875211715698, | |
| "eval_runtime": 2.6363, | |
| "eval_samples_per_second": 3413.847, | |
| "eval_steps_per_second": 426.731, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 2.6474278544542037e-05, | |
| "loss": 0.09, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "eval_accuracy": 0.9818888888888889, | |
| "eval_loss": 0.0697416290640831, | |
| "eval_runtime": 2.6295, | |
| "eval_samples_per_second": 3422.689, | |
| "eval_steps_per_second": 427.836, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "learning_rate": 2.386030949393559e-05, | |
| "loss": 0.0871, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "eval_accuracy": 0.9815555555555555, | |
| "eval_loss": 0.066066212952137, | |
| "eval_runtime": 2.6946, | |
| "eval_samples_per_second": 3340.06, | |
| "eval_steps_per_second": 417.507, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "learning_rate": 2.1246340443329153e-05, | |
| "loss": 0.0733, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "eval_accuracy": 0.9822222222222222, | |
| "eval_loss": 0.06342040002346039, | |
| "eval_runtime": 2.6897, | |
| "eval_samples_per_second": 3346.09, | |
| "eval_steps_per_second": 418.261, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "learning_rate": 1.863237139272271e-05, | |
| "loss": 0.0761, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "eval_accuracy": 0.983, | |
| "eval_loss": 0.06072380393743515, | |
| "eval_runtime": 2.6938, | |
| "eval_samples_per_second": 3340.98, | |
| "eval_steps_per_second": 417.623, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "learning_rate": 1.601840234211627e-05, | |
| "loss": 0.0739, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "eval_accuracy": 0.9832222222222222, | |
| "eval_loss": 0.05795769765973091, | |
| "eval_runtime": 2.6767, | |
| "eval_samples_per_second": 3362.391, | |
| "eval_steps_per_second": 420.299, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 4.39, | |
| "learning_rate": 1.340443329150983e-05, | |
| "loss": 0.0643, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 4.39, | |
| "eval_accuracy": 0.9844444444444445, | |
| "eval_loss": 0.05685265362262726, | |
| "eval_runtime": 2.6876, | |
| "eval_samples_per_second": 3348.672, | |
| "eval_steps_per_second": 418.584, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 4.71, | |
| "learning_rate": 1.0790464240903388e-05, | |
| "loss": 0.0678, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 4.71, | |
| "eval_accuracy": 0.984, | |
| "eval_loss": 0.05617769435048103, | |
| "eval_runtime": 2.6484, | |
| "eval_samples_per_second": 3398.278, | |
| "eval_steps_per_second": 424.785, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 5.02, | |
| "learning_rate": 8.176495190296946e-06, | |
| "loss": 0.0617, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 5.02, | |
| "eval_accuracy": 0.9853333333333333, | |
| "eval_loss": 0.053985536098480225, | |
| "eval_runtime": 2.672, | |
| "eval_samples_per_second": 3368.244, | |
| "eval_steps_per_second": 421.03, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "learning_rate": 5.562526139690506e-06, | |
| "loss": 0.0571, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "eval_accuracy": 0.9847777777777778, | |
| "eval_loss": 0.05352585390210152, | |
| "eval_runtime": 2.7082, | |
| "eval_samples_per_second": 3323.274, | |
| "eval_steps_per_second": 415.409, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 5.65, | |
| "learning_rate": 2.9485570890840656e-06, | |
| "loss": 0.0608, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 5.65, | |
| "eval_accuracy": 0.9851111111111112, | |
| "eval_loss": 0.053133774548769, | |
| "eval_runtime": 2.6753, | |
| "eval_samples_per_second": 3364.134, | |
| "eval_steps_per_second": 420.517, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 5.96, | |
| "learning_rate": 3.345880384776244e-07, | |
| "loss": 0.0571, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 5.96, | |
| "eval_accuracy": 0.9847777777777778, | |
| "eval_loss": 0.05344167724251747, | |
| "eval_runtime": 2.6425, | |
| "eval_samples_per_second": 3405.863, | |
| "eval_steps_per_second": 425.733, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "step": 9564, | |
| "total_flos": 264960533376000.0, | |
| "train_loss": 0.1922683648263396, | |
| "train_runtime": 134.4457, | |
| "train_samples_per_second": 2276.012, | |
| "train_steps_per_second": 71.137 | |
| } | |
| ], | |
| "max_steps": 9564, | |
| "num_train_epochs": 6, | |
| "total_flos": 264960533376000.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |