| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 10.0, | |
| "eval_steps": 500, | |
| "global_step": 40, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 0.016585721146764668, | |
| "learning_rate": 0.0, | |
| "loss": 1.855, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 0.01677702929181964, | |
| "learning_rate": 1e-06, | |
| "loss": 1.9926, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 0.05073890028052113, | |
| "learning_rate": 9.98378654067105e-07, | |
| "loss": 1.9849, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.05073890028052113, | |
| "learning_rate": 9.935251313189563e-07, | |
| "loss": 1.9371, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "grad_norm": 0.1536509017148004, | |
| "learning_rate": 9.85470908713026e-07, | |
| "loss": 1.8549, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 1.6400000000000001, | |
| "grad_norm": 0.015792830580800027, | |
| "learning_rate": 9.742682209735727e-07, | |
| "loss": 1.9926, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "grad_norm": 0.05261997433384393, | |
| "learning_rate": 9.599897218294121e-07, | |
| "loss": 1.9848, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.05261997433384393, | |
| "learning_rate": 9.427280128266049e-07, | |
| "loss": 1.9361, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "grad_norm": 0.2987070312238438, | |
| "learning_rate": 9.225950427718974e-07, | |
| "loss": 1.8548, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "grad_norm": 0.017241858336362693, | |
| "learning_rate": 8.997213817017506e-07, | |
| "loss": 1.9925, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "grad_norm": 0.05300382819081621, | |
| "learning_rate": 8.742553740855505e-07, | |
| "loss": 1.9848, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.05300382819081621, | |
| "learning_rate": 8.463621767547997e-07, | |
| "loss": 1.937, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "grad_norm": 0.11174785150926758, | |
| "learning_rate": 8.162226877976886e-07, | |
| "loss": 1.8547, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "grad_norm": 0.016349349252879262, | |
| "learning_rate": 7.840323733655778e-07, | |
| "loss": 1.9926, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "grad_norm": 0.03459460400919377, | |
| "learning_rate": 7.5e-07, | |
| "loss": 1.9848, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.03459460400919377, | |
| "learning_rate": 7.14346280701527e-07, | |
| "loss": 1.9362, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "grad_norm": 0.3422091567988848, | |
| "learning_rate": 6.773024435212677e-07, | |
| "loss": 1.855, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 4.64, | |
| "grad_norm": 0.01652565440862823, | |
| "learning_rate": 6.391087319582263e-07, | |
| "loss": 1.9925, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "grad_norm": 0.028052540792361507, | |
| "learning_rate": 6.000128468880222e-07, | |
| "loss": 1.9848, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.028052540792361507, | |
| "learning_rate": 5.602683401276614e-07, | |
| "loss": 1.937, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 5.32, | |
| "grad_norm": 0.15562528914810267, | |
| "learning_rate": 5.201329700547076e-07, | |
| "loss": 1.8548, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 5.64, | |
| "grad_norm": 0.019674143915993505, | |
| "learning_rate": 4.798670299452926e-07, | |
| "loss": 1.9924, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 5.96, | |
| "grad_norm": 0.09623527051626159, | |
| "learning_rate": 4.397316598723385e-07, | |
| "loss": 1.9849, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 0.09623527051626159, | |
| "learning_rate": 3.9998715311197783e-07, | |
| "loss": 1.9368, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 6.32, | |
| "grad_norm": 0.11985804134909249, | |
| "learning_rate": 3.6089126804177364e-07, | |
| "loss": 1.8548, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 6.64, | |
| "grad_norm": 0.019682397443786706, | |
| "learning_rate": 3.2269755647873214e-07, | |
| "loss": 1.9925, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 6.96, | |
| "grad_norm": 0.023053888216924723, | |
| "learning_rate": 2.856537192984728e-07, | |
| "loss": 1.985, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 0.023053888216924723, | |
| "learning_rate": 2.500000000000001e-07, | |
| "loss": 1.9369, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 7.32, | |
| "grad_norm": 0.21719404416250201, | |
| "learning_rate": 2.1596762663442213e-07, | |
| "loss": 1.8548, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 7.64, | |
| "grad_norm": 0.021249978280051787, | |
| "learning_rate": 1.837773122023114e-07, | |
| "loss": 1.9924, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 7.96, | |
| "grad_norm": 0.019648806884757328, | |
| "learning_rate": 1.536378232452003e-07, | |
| "loss": 1.9849, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 0.410349000849933, | |
| "learning_rate": 1.257446259144494e-07, | |
| "loss": 1.9361, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 8.32, | |
| "grad_norm": 0.018918373716908438, | |
| "learning_rate": 1.0027861829824952e-07, | |
| "loss": 1.8548, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 8.64, | |
| "grad_norm": 0.016569819767445457, | |
| "learning_rate": 7.740495722810269e-08, | |
| "loss": 1.9925, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 8.96, | |
| "grad_norm": 0.107009520948546, | |
| "learning_rate": 5.72719871733951e-08, | |
| "loss": 1.9847, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 0.107009520948546, | |
| "learning_rate": 4.0010278170587884e-08, | |
| "loss": 1.9376, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 9.32, | |
| "grad_norm": 0.08365770322839408, | |
| "learning_rate": 2.5731779026427257e-08, | |
| "loss": 1.8548, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 9.64, | |
| "grad_norm": 0.016124977942298727, | |
| "learning_rate": 1.4529091286973993e-08, | |
| "loss": 1.9923, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 9.96, | |
| "grad_norm": 0.0871056137523833, | |
| "learning_rate": 6.474868681043577e-09, | |
| "loss": 1.9847, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 0.0871056137523833, | |
| "learning_rate": 1.6213459328950352e-09, | |
| "loss": 1.9372, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "step": 40, | |
| "total_flos": 174553779142656.0, | |
| "train_loss": 1.942233791947365, | |
| "train_runtime": 1437.3222, | |
| "train_samples_per_second": 0.689, | |
| "train_steps_per_second": 0.028 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 40, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 174553779142656.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |