| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 183, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.16597510373443983, | |
| "grad_norm": 510.4101257324219, | |
| "learning_rate": 1.8000000000000001e-06, | |
| "loss": 57.8057, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.33195020746887965, | |
| "grad_norm": 498.0323486328125, | |
| "learning_rate": 3.8000000000000005e-06, | |
| "loss": 55.2929, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.4979253112033195, | |
| "grad_norm": 419.85296630859375, | |
| "learning_rate": 5.8e-06, | |
| "loss": 47.3227, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.6639004149377593, | |
| "grad_norm": 380.8525390625, | |
| "learning_rate": 7.800000000000002e-06, | |
| "loss": 38.025, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.8298755186721992, | |
| "grad_norm": 322.61480712890625, | |
| "learning_rate": 9.800000000000001e-06, | |
| "loss": 30.559, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.995850622406639, | |
| "grad_norm": 324.3410339355469, | |
| "learning_rate": 1.18e-05, | |
| "loss": 26.6962, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.1493775933609958, | |
| "grad_norm": 316.6482238769531, | |
| "learning_rate": 1.38e-05, | |
| "loss": 20.3688, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.3153526970954357, | |
| "grad_norm": 307.2823486328125, | |
| "learning_rate": 1.58e-05, | |
| "loss": 17.6791, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.4813278008298756, | |
| "grad_norm": 298.7247619628906, | |
| "learning_rate": 1.7800000000000002e-05, | |
| "loss": 12.7207, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.6473029045643153, | |
| "grad_norm": 233.05892944335938, | |
| "learning_rate": 1.98e-05, | |
| "loss": 7.7271, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.8132780082987552, | |
| "grad_norm": 123.60958099365234, | |
| "learning_rate": 1.783132530120482e-05, | |
| "loss": 3.8698, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.979253112033195, | |
| "grad_norm": 51.369815826416016, | |
| "learning_rate": 1.5421686746987955e-05, | |
| "loss": 2.031, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 2.132780082987552, | |
| "grad_norm": 25.313810348510742, | |
| "learning_rate": 1.3012048192771085e-05, | |
| "loss": 1.2163, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 2.2987551867219915, | |
| "grad_norm": 14.643232345581055, | |
| "learning_rate": 1.0602409638554219e-05, | |
| "loss": 1.0068, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 2.4647302904564317, | |
| "grad_norm": 10.708518981933594, | |
| "learning_rate": 8.19277108433735e-06, | |
| "loss": 0.8702, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 2.6307053941908713, | |
| "grad_norm": 8.732069969177246, | |
| "learning_rate": 5.783132530120482e-06, | |
| "loss": 0.7605, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.796680497925311, | |
| "grad_norm": 8.132953643798828, | |
| "learning_rate": 3.3734939759036146e-06, | |
| "loss": 0.7393, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 2.962655601659751, | |
| "grad_norm": 7.966629505157471, | |
| "learning_rate": 9.638554216867472e-07, | |
| "loss": 0.7033, | |
| "step": 180 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 183, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |