{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "eval_steps": 500, "global_step": 40, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.32, "grad_norm": 0.012638984824819675, "learning_rate": 0.0, "loss": 1.855, "step": 1 }, { "epoch": 0.64, "grad_norm": 0.012340458206891919, "learning_rate": 1e-06, "loss": 1.9926, "step": 2 }, { "epoch": 0.96, "grad_norm": 0.03581427227272684, "learning_rate": 9.98378654067105e-07, "loss": 1.9847, "step": 3 }, { "epoch": 1.0, "grad_norm": 0.03581427227272684, "learning_rate": 9.935251313189563e-07, "loss": 1.9369, "step": 4 }, { "epoch": 1.32, "grad_norm": 0.07949289920887785, "learning_rate": 9.85470908713026e-07, "loss": 1.855, "step": 5 }, { "epoch": 1.6400000000000001, "grad_norm": 0.011520378413809698, "learning_rate": 9.742682209735727e-07, "loss": 1.9926, "step": 6 }, { "epoch": 1.96, "grad_norm": 0.03799569152816728, "learning_rate": 9.599897218294121e-07, "loss": 1.9848, "step": 7 }, { "epoch": 2.0, "grad_norm": 0.03799569152816728, "learning_rate": 9.427280128266049e-07, "loss": 1.9376, "step": 8 }, { "epoch": 2.32, "grad_norm": 0.07345131274085664, "learning_rate": 9.225950427718974e-07, "loss": 1.8549, "step": 9 }, { "epoch": 2.64, "grad_norm": 0.010980750834152606, "learning_rate": 8.997213817017506e-07, "loss": 1.9925, "step": 10 }, { "epoch": 2.96, "grad_norm": 0.033117274234404914, "learning_rate": 8.742553740855505e-07, "loss": 1.9849, "step": 11 }, { "epoch": 3.0, "grad_norm": 0.033117274234404914, "learning_rate": 8.463621767547997e-07, "loss": 1.9374, "step": 12 }, { "epoch": 3.32, "grad_norm": 0.0932577390005764, "learning_rate": 8.162226877976886e-07, "loss": 1.855, "step": 13 }, { "epoch": 3.64, "grad_norm": 0.013729171616592057, "learning_rate": 7.840323733655778e-07, "loss": 1.9923, "step": 14 }, { "epoch": 3.96, "grad_norm": 0.02238884716887795, "learning_rate": 7.5e-07, "loss": 1.9848, "step": 15 }, { "epoch": 4.0, "grad_norm": 0.02238884716887795, "learning_rate": 7.14346280701527e-07, "loss": 1.9365, "step": 16 }, { "epoch": 4.32, "grad_norm": 0.09307472193875552, "learning_rate": 6.773024435212677e-07, "loss": 1.8547, "step": 17 }, { "epoch": 4.64, "grad_norm": 0.014080112498125649, "learning_rate": 6.391087319582263e-07, "loss": 1.9923, "step": 18 }, { "epoch": 4.96, "grad_norm": 0.0681000224516119, "learning_rate": 6.000128468880222e-07, "loss": 1.9848, "step": 19 }, { "epoch": 5.0, "grad_norm": 0.0681000224516119, "learning_rate": 5.602683401276614e-07, "loss": 1.9364, "step": 20 }, { "epoch": 5.32, "grad_norm": 0.2855909207567928, "learning_rate": 5.201329700547076e-07, "loss": 1.8547, "step": 21 }, { "epoch": 5.64, "grad_norm": 0.014081499502207754, "learning_rate": 4.798670299452926e-07, "loss": 1.9924, "step": 22 }, { "epoch": 5.96, "grad_norm": 0.020495914128444336, "learning_rate": 4.397316598723385e-07, "loss": 1.9848, "step": 23 }, { "epoch": 6.0, "grad_norm": 0.020495914128444336, "learning_rate": 3.9998715311197783e-07, "loss": 1.9378, "step": 24 }, { "epoch": 6.32, "grad_norm": 0.09100068727172535, "learning_rate": 3.6089126804177364e-07, "loss": 1.8547, "step": 25 }, { "epoch": 6.64, "grad_norm": 0.0171770009707001, "learning_rate": 3.2269755647873214e-07, "loss": 1.9922, "step": 26 }, { "epoch": 6.96, "grad_norm": 0.015409134120937642, "learning_rate": 2.856537192984728e-07, "loss": 1.9848, "step": 27 }, { "epoch": 7.0, "grad_norm": 0.015409134120937642, "learning_rate": 2.500000000000001e-07, "loss": 1.9365, "step": 28 }, { "epoch": 7.32, "grad_norm": 0.08008822639984792, "learning_rate": 2.1596762663442213e-07, "loss": 1.855, "step": 29 }, { "epoch": 7.64, "grad_norm": 0.012415766111407774, "learning_rate": 1.837773122023114e-07, "loss": 1.9924, "step": 30 }, { "epoch": 7.96, "grad_norm": 0.01431160321057798, "learning_rate": 1.536378232452003e-07, "loss": 1.9846, "step": 31 }, { "epoch": 8.0, "grad_norm": 0.0978141751305368, "learning_rate": 1.257446259144494e-07, "loss": 1.9373, "step": 32 }, { "epoch": 8.32, "grad_norm": 0.02208980981289174, "learning_rate": 1.0027861829824952e-07, "loss": 1.8551, "step": 33 }, { "epoch": 8.64, "grad_norm": 0.012216527071483598, "learning_rate": 7.740495722810269e-08, "loss": 1.9924, "step": 34 }, { "epoch": 8.96, "grad_norm": 0.04589178497022974, "learning_rate": 5.72719871733951e-08, "loss": 1.9847, "step": 35 }, { "epoch": 9.0, "grad_norm": 0.04589178497022974, "learning_rate": 4.0010278170587884e-08, "loss": 1.9374, "step": 36 }, { "epoch": 9.32, "grad_norm": 0.06320807250237533, "learning_rate": 2.5731779026427257e-08, "loss": 1.8548, "step": 37 }, { "epoch": 9.64, "grad_norm": 0.011534721147345644, "learning_rate": 1.4529091286973993e-08, "loss": 1.9923, "step": 38 }, { "epoch": 9.96, "grad_norm": 0.03774292060691598, "learning_rate": 6.474868681043577e-09, "loss": 1.9848, "step": 39 }, { "epoch": 10.0, "grad_norm": 0.03774292060691598, "learning_rate": 1.6213459328950352e-09, "loss": 1.9375, "step": 40 }, { "epoch": 10.0, "step": 40, "total_flos": 332965594791936.0, "train_loss": 1.942303839325905, "train_runtime": 1492.0318, "train_samples_per_second": 0.664, "train_steps_per_second": 0.027 } ], "logging_steps": 1, "max_steps": 40, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 332965594791936.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }