| { | |
| "best_global_step": 120, | |
| "best_metric": 0.7559308736188931, | |
| "best_model_checkpoint": "/content/gemma_lora_imb/checkpoint-120", | |
| "epoch": 1.9047619047619047, | |
| "eval_steps": 20, | |
| "global_step": 120, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.31746031746031744, | |
| "grad_norm": 98.11976623535156, | |
| "learning_rate": 8.492063492063492e-06, | |
| "loss": 2.2416, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.31746031746031744, | |
| "eval_f1_macro": 0.3599461767195109, | |
| "eval_loss": 1.7048946619033813, | |
| "eval_runtime": 38.4621, | |
| "eval_samples_per_second": 51.999, | |
| "eval_steps_per_second": 1.638, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.6349206349206349, | |
| "grad_norm": 8.736888885498047, | |
| "learning_rate": 6.9047619047619055e-06, | |
| "loss": 1.2264, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.6349206349206349, | |
| "eval_f1_macro": 0.559413732908684, | |
| "eval_loss": 1.029550552368164, | |
| "eval_runtime": 38.4686, | |
| "eval_samples_per_second": 51.99, | |
| "eval_steps_per_second": 1.638, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.9523809523809523, | |
| "grad_norm": 32.25969314575195, | |
| "learning_rate": 5.317460317460318e-06, | |
| "loss": 0.703, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.9523809523809523, | |
| "eval_f1_macro": 0.6382031896392115, | |
| "eval_loss": 1.148618221282959, | |
| "eval_runtime": 38.479, | |
| "eval_samples_per_second": 51.976, | |
| "eval_steps_per_second": 1.637, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.2698412698412698, | |
| "grad_norm": 14.99205493927002, | |
| "learning_rate": 3.7301587301587305e-06, | |
| "loss": 0.4368, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.2698412698412698, | |
| "eval_f1_macro": 0.7212127024041496, | |
| "eval_loss": 0.8294157385826111, | |
| "eval_runtime": 38.5268, | |
| "eval_samples_per_second": 51.912, | |
| "eval_steps_per_second": 1.635, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.5873015873015874, | |
| "grad_norm": 5.438352108001709, | |
| "learning_rate": 2.1428571428571427e-06, | |
| "loss": 0.3713, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.5873015873015874, | |
| "eval_f1_macro": 0.7422904035381885, | |
| "eval_loss": 0.5684086084365845, | |
| "eval_runtime": 38.5351, | |
| "eval_samples_per_second": 51.901, | |
| "eval_steps_per_second": 1.635, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.9047619047619047, | |
| "grad_norm": 9.52097225189209, | |
| "learning_rate": 5.555555555555555e-07, | |
| "loss": 0.2307, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.9047619047619047, | |
| "eval_f1_macro": 0.7559308736188931, | |
| "eval_loss": 0.5474382638931274, | |
| "eval_runtime": 38.5464, | |
| "eval_samples_per_second": 51.886, | |
| "eval_steps_per_second": 1.634, | |
| "step": 120 | |
| } | |
| ], | |
| "logging_steps": 20, | |
| "max_steps": 126, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 20, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.4825056756023296e+16, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |