| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.838709677419355, | |
| "eval_steps": 100, | |
| "global_step": 45, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.06451612903225806, | |
| "grad_norm": 23.374803412764983, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3092, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.3225806451612903, | |
| "grad_norm": 9.912797872940994, | |
| "learning_rate": 1.9760758775559275e-05, | |
| "loss": 1.1097, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.6451612903225806, | |
| "grad_norm": 2.55903680303641, | |
| "learning_rate": 1.833997817889878e-05, | |
| "loss": 0.8245, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.967741935483871, | |
| "grad_norm": 1.821033764230243, | |
| "learning_rate": 1.581858915557953e-05, | |
| "loss": 0.7069, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 1.2580645161290323, | |
| "grad_norm": 1.0077080917561687, | |
| "learning_rate": 1.2529333823916807e-05, | |
| "loss": 0.6359, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 1.5806451612903225, | |
| "grad_norm": 1.2382608394592693, | |
| "learning_rate": 8.906287916221259e-06, | |
| "loss": 0.6027, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 1.903225806451613, | |
| "grad_norm": 1.1112565004772472, | |
| "learning_rate": 5.427576766953615e-06, | |
| "loss": 0.5911, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 2.193548387096774, | |
| "grad_norm": 0.8443207154565046, | |
| "learning_rate": 2.5522781725621814e-06, | |
| "loss": 0.5516, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 2.5161290322580645, | |
| "grad_norm": 0.8699964797536935, | |
| "learning_rate": 6.598389126745209e-07, | |
| "loss": 0.5359, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 2.838709677419355, | |
| "grad_norm": 0.8264176913325781, | |
| "learning_rate": 0.0, | |
| "loss": 0.5253, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 2.838709677419355, | |
| "step": 45, | |
| "total_flos": 3.320604324475699e+16, | |
| "train_loss": 0.6803963449266222, | |
| "train_runtime": 1169.4096, | |
| "train_samples_per_second": 0.626, | |
| "train_steps_per_second": 0.038 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 45, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.320604324475699e+16, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |