{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "eval_steps": 500, "global_step": 40, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.32, "grad_norm": 0.016916594177412977, "learning_rate": 0.0, "loss": 1.855, "step": 1 }, { "epoch": 0.64, "grad_norm": 0.017764729438934316, "learning_rate": 1e-05, "loss": 1.9926, "step": 2 }, { "epoch": 0.96, "grad_norm": 0.057052856900927554, "learning_rate": 9.983786540671052e-06, "loss": 1.9848, "step": 3 }, { "epoch": 1.0, "grad_norm": 0.057052856900927554, "learning_rate": 9.935251313189564e-06, "loss": 1.9359, "step": 4 }, { "epoch": 1.32, "grad_norm": 0.17417676141134789, "learning_rate": 9.854709087130261e-06, "loss": 1.855, "step": 5 }, { "epoch": 1.6400000000000001, "grad_norm": 0.01827000018622016, "learning_rate": 9.742682209735727e-06, "loss": 1.9922, "step": 6 }, { "epoch": 1.96, "grad_norm": 0.06176588244404544, "learning_rate": 9.599897218294122e-06, "loss": 1.9845, "step": 7 }, { "epoch": 2.0, "grad_norm": 0.06176588244404544, "learning_rate": 9.427280128266049e-06, "loss": 1.9369, "step": 8 }, { "epoch": 2.32, "grad_norm": 0.14595643091645755, "learning_rate": 9.225950427718974e-06, "loss": 1.8548, "step": 9 }, { "epoch": 2.64, "grad_norm": 0.021857116329411633, "learning_rate": 8.997213817017508e-06, "loss": 1.9923, "step": 10 }, { "epoch": 2.96, "grad_norm": 0.08999185200429612, "learning_rate": 8.742553740855507e-06, "loss": 1.9846, "step": 11 }, { "epoch": 3.0, "grad_norm": 0.08999185200429612, "learning_rate": 8.463621767547998e-06, "loss": 1.9367, "step": 12 }, { "epoch": 3.32, "grad_norm": 0.160582199492949, "learning_rate": 8.162226877976886e-06, "loss": 1.8548, "step": 13 }, { "epoch": 3.64, "grad_norm": 0.01769788592303312, "learning_rate": 7.84032373365578e-06, "loss": 1.9921, "step": 14 }, { "epoch": 3.96, "grad_norm": 0.038205714080758, "learning_rate": 7.500000000000001e-06, "loss": 1.9841, "step": 15 }, { "epoch": 4.0, "grad_norm": 0.038205714080758, "learning_rate": 7.143462807015271e-06, "loss": 1.9361, "step": 16 }, { "epoch": 4.32, "grad_norm": 0.2366511908051694, "learning_rate": 6.773024435212678e-06, "loss": 1.8548, "step": 17 }, { "epoch": 4.64, "grad_norm": 0.04879141353799034, "learning_rate": 6.391087319582264e-06, "loss": 1.9922, "step": 18 }, { "epoch": 4.96, "grad_norm": 0.034924688915375805, "learning_rate": 6.000128468880223e-06, "loss": 1.9839, "step": 19 }, { "epoch": 5.0, "grad_norm": 0.034924688915375805, "learning_rate": 5.6026834012766155e-06, "loss": 1.9351, "step": 20 }, { "epoch": 5.32, "grad_norm": 0.6160927282559335, "learning_rate": 5.201329700547077e-06, "loss": 1.8548, "step": 21 }, { "epoch": 5.64, "grad_norm": 0.028056079275503732, "learning_rate": 4.798670299452926e-06, "loss": 1.9922, "step": 22 }, { "epoch": 5.96, "grad_norm": 0.03227351117560889, "learning_rate": 4.397316598723385e-06, "loss": 1.9837, "step": 23 }, { "epoch": 6.0, "grad_norm": 0.03227351117560889, "learning_rate": 3.999871531119779e-06, "loss": 1.936, "step": 24 }, { "epoch": 6.32, "grad_norm": 0.23832671432217362, "learning_rate": 3.6089126804177373e-06, "loss": 1.8549, "step": 25 }, { "epoch": 6.64, "grad_norm": 0.021342564833274163, "learning_rate": 3.226975564787322e-06, "loss": 1.992, "step": 26 }, { "epoch": 6.96, "grad_norm": 0.0317716838127563, "learning_rate": 2.8565371929847286e-06, "loss": 1.9836, "step": 27 }, { "epoch": 7.0, "grad_norm": 0.0317716838127563, "learning_rate": 2.5000000000000015e-06, "loss": 1.9349, "step": 28 }, { "epoch": 7.32, "grad_norm": 0.17707467504287808, "learning_rate": 2.159676266344222e-06, "loss": 1.8547, "step": 29 }, { "epoch": 7.64, "grad_norm": 0.020579457767089776, "learning_rate": 1.8377731220231144e-06, "loss": 1.992, "step": 30 }, { "epoch": 7.96, "grad_norm": 0.03172880072777926, "learning_rate": 1.5363782324520033e-06, "loss": 1.9832, "step": 31 }, { "epoch": 8.0, "grad_norm": 0.24177038558002523, "learning_rate": 1.257446259144494e-06, "loss": 1.9352, "step": 32 }, { "epoch": 8.32, "grad_norm": 0.026730695403794896, "learning_rate": 1.0027861829824953e-06, "loss": 1.8548, "step": 33 }, { "epoch": 8.64, "grad_norm": 0.023669965523795133, "learning_rate": 7.740495722810271e-07, "loss": 1.9921, "step": 34 }, { "epoch": 8.96, "grad_norm": 0.14375080871048362, "learning_rate": 5.727198717339511e-07, "loss": 1.9837, "step": 35 }, { "epoch": 9.0, "grad_norm": 0.14375080871048362, "learning_rate": 4.001027817058789e-07, "loss": 1.9359, "step": 36 }, { "epoch": 9.32, "grad_norm": 0.154064477409402, "learning_rate": 2.573177902642726e-07, "loss": 1.8547, "step": 37 }, { "epoch": 9.64, "grad_norm": 0.021139596720734836, "learning_rate": 1.4529091286973994e-07, "loss": 1.9919, "step": 38 }, { "epoch": 9.96, "grad_norm": 0.0639147174796051, "learning_rate": 6.474868681043578e-08, "loss": 1.9835, "step": 39 }, { "epoch": 10.0, "grad_norm": 0.0639147174796051, "learning_rate": 1.6213459328950355e-08, "loss": 1.9351, "step": 40 } ], "logging_steps": 1, "max_steps": 40, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 312913466228736.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }