| { | |
| "best_metric": 0.8917858651698956, | |
| "best_model_checkpoint": "/kaggle/working/sweep_10/checkpoint-290", | |
| "epoch": 5.0, | |
| "eval_steps": 500, | |
| "global_step": 290, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.8620689655172413, | |
| "grad_norm": 1.1105767488479614, | |
| "learning_rate": 8.823529411764707e-06, | |
| "loss": 0.1464, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.8466257668711656, | |
| "eval_f1_macro": 0.6189095670064528, | |
| "eval_loss": 0.1092783659696579, | |
| "eval_precision_macro": 0.9203821656050956, | |
| "eval_recall_macro": 0.5967741935483871, | |
| "eval_runtime": 0.9405, | |
| "eval_samples_per_second": 173.321, | |
| "eval_steps_per_second": 6.38, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 1.7241379310344827, | |
| "grad_norm": 4.811639308929443, | |
| "learning_rate": 6.985294117647059e-06, | |
| "loss": 0.1023, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.8834355828220859, | |
| "eval_f1_macro": 0.8289422811378073, | |
| "eval_loss": 0.06354018300771713, | |
| "eval_precision_macro": 0.8046747967479675, | |
| "eval_recall_macro": 0.8663245356793744, | |
| "eval_runtime": 0.9481, | |
| "eval_samples_per_second": 171.927, | |
| "eval_steps_per_second": 6.329, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 2.586206896551724, | |
| "grad_norm": 2.146620273590088, | |
| "learning_rate": 5.147058823529411e-06, | |
| "loss": 0.0693, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.9263803680981595, | |
| "eval_f1_macro": 0.8582608695652174, | |
| "eval_loss": 0.09711485356092453, | |
| "eval_precision_macro": 0.9583333333333333, | |
| "eval_recall_macro": 0.8064516129032258, | |
| "eval_runtime": 0.9496, | |
| "eval_samples_per_second": 171.655, | |
| "eval_steps_per_second": 6.319, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 3.4482758620689653, | |
| "grad_norm": 3.015160322189331, | |
| "learning_rate": 3.308823529411765e-06, | |
| "loss": 0.0577, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.6441717791411042, | |
| "eval_f1_macro": 0.6175566343042072, | |
| "eval_loss": 0.09405244886875153, | |
| "eval_precision_macro": 0.6741573033707865, | |
| "eval_recall_macro": 0.7803030303030303, | |
| "eval_runtime": 0.9464, | |
| "eval_samples_per_second": 172.229, | |
| "eval_steps_per_second": 6.34, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 4.310344827586207, | |
| "grad_norm": 1.337144374847412, | |
| "learning_rate": 1.4705882352941177e-06, | |
| "loss": 0.0689, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.9325153374233128, | |
| "eval_f1_macro": 0.8917858651698956, | |
| "eval_loss": 0.047600872814655304, | |
| "eval_precision_macro": 0.8871660305343512, | |
| "eval_recall_macro": 0.8966275659824048, | |
| "eval_runtime": 0.9309, | |
| "eval_samples_per_second": 175.093, | |
| "eval_steps_per_second": 6.445, | |
| "step": 290 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 290, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 2, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 0 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 823562615180892.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |