{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 10.0,
  "eval_steps": 500,
  "global_step": 40,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.32,
      "grad_norm": 0.012638984824819675,
      "learning_rate": 0.0,
      "loss": 1.855,
      "step": 1
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.012340458206891919,
      "learning_rate": 1e-06,
      "loss": 1.9926,
      "step": 2
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.03581427227272684,
      "learning_rate": 9.98378654067105e-07,
      "loss": 1.9847,
      "step": 3
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.03581427227272684,
      "learning_rate": 9.935251313189563e-07,
      "loss": 1.9369,
      "step": 4
    },
    {
      "epoch": 1.32,
      "grad_norm": 0.07949289920887785,
      "learning_rate": 9.85470908713026e-07,
      "loss": 1.855,
      "step": 5
    },
    {
      "epoch": 1.6400000000000001,
      "grad_norm": 0.011520378413809698,
      "learning_rate": 9.742682209735727e-07,
      "loss": 1.9926,
      "step": 6
    },
    {
      "epoch": 1.96,
      "grad_norm": 0.03799569152816728,
      "learning_rate": 9.599897218294121e-07,
      "loss": 1.9848,
      "step": 7
    },
    {
      "epoch": 2.0,
      "grad_norm": 0.03799569152816728,
      "learning_rate": 9.427280128266049e-07,
      "loss": 1.9376,
      "step": 8
    },
    {
      "epoch": 2.32,
      "grad_norm": 0.07345131274085664,
      "learning_rate": 9.225950427718974e-07,
      "loss": 1.8549,
      "step": 9
    },
    {
      "epoch": 2.64,
      "grad_norm": 0.010980750834152606,
      "learning_rate": 8.997213817017506e-07,
      "loss": 1.9925,
      "step": 10
    },
    {
      "epoch": 2.96,
      "grad_norm": 0.033117274234404914,
      "learning_rate": 8.742553740855505e-07,
      "loss": 1.9849,
      "step": 11
    },
    {
      "epoch": 3.0,
      "grad_norm": 0.033117274234404914,
      "learning_rate": 8.463621767547997e-07,
      "loss": 1.9374,
      "step": 12
    },
    {
      "epoch": 3.32,
      "grad_norm": 0.0932577390005764,
      "learning_rate": 8.162226877976886e-07,
      "loss": 1.855,
      "step": 13
    },
    {
      "epoch": 3.64,
      "grad_norm": 0.013729171616592057,
      "learning_rate": 7.840323733655778e-07,
      "loss": 1.9923,
      "step": 14
    },
    {
      "epoch": 3.96,
      "grad_norm": 0.02238884716887795,
      "learning_rate": 7.5e-07,
      "loss": 1.9848,
      "step": 15
    },
    {
      "epoch": 4.0,
      "grad_norm": 0.02238884716887795,
      "learning_rate": 7.14346280701527e-07,
      "loss": 1.9365,
      "step": 16
    },
    {
      "epoch": 4.32,
      "grad_norm": 0.09307472193875552,
      "learning_rate": 6.773024435212677e-07,
      "loss": 1.8547,
      "step": 17
    },
    {
      "epoch": 4.64,
      "grad_norm": 0.014080112498125649,
      "learning_rate": 6.391087319582263e-07,
      "loss": 1.9923,
      "step": 18
    },
    {
      "epoch": 4.96,
      "grad_norm": 0.0681000224516119,
      "learning_rate": 6.000128468880222e-07,
      "loss": 1.9848,
      "step": 19
    },
    {
      "epoch": 5.0,
      "grad_norm": 0.0681000224516119,
      "learning_rate": 5.602683401276614e-07,
      "loss": 1.9364,
      "step": 20
    },
    {
      "epoch": 5.32,
      "grad_norm": 0.2855909207567928,
      "learning_rate": 5.201329700547076e-07,
      "loss": 1.8547,
      "step": 21
    },
    {
      "epoch": 5.64,
      "grad_norm": 0.014081499502207754,
      "learning_rate": 4.798670299452926e-07,
      "loss": 1.9924,
      "step": 22
    },
    {
      "epoch": 5.96,
      "grad_norm": 0.020495914128444336,
      "learning_rate": 4.397316598723385e-07,
      "loss": 1.9848,
      "step": 23
    },
    {
      "epoch": 6.0,
      "grad_norm": 0.020495914128444336,
      "learning_rate": 3.9998715311197783e-07,
      "loss": 1.9378,
      "step": 24
    },
    {
      "epoch": 6.32,
      "grad_norm": 0.09100068727172535,
      "learning_rate": 3.6089126804177364e-07,
      "loss": 1.8547,
      "step": 25
    },
    {
      "epoch": 6.64,
      "grad_norm": 0.0171770009707001,
      "learning_rate": 3.2269755647873214e-07,
      "loss": 1.9922,
      "step": 26
    },
    {
      "epoch": 6.96,
      "grad_norm": 0.015409134120937642,
      "learning_rate": 2.856537192984728e-07,
      "loss": 1.9848,
      "step": 27
    },
    {
      "epoch": 7.0,
      "grad_norm": 0.015409134120937642,
      "learning_rate": 2.500000000000001e-07,
      "loss": 1.9365,
      "step": 28
    },
    {
      "epoch": 7.32,
      "grad_norm": 0.08008822639984792,
      "learning_rate": 2.1596762663442213e-07,
      "loss": 1.855,
      "step": 29
    },
    {
      "epoch": 7.64,
      "grad_norm": 0.012415766111407774,
      "learning_rate": 1.837773122023114e-07,
      "loss": 1.9924,
      "step": 30
    },
    {
      "epoch": 7.96,
      "grad_norm": 0.01431160321057798,
      "learning_rate": 1.536378232452003e-07,
      "loss": 1.9846,
      "step": 31
    },
    {
      "epoch": 8.0,
      "grad_norm": 0.0978141751305368,
      "learning_rate": 1.257446259144494e-07,
      "loss": 1.9373,
      "step": 32
    },
    {
      "epoch": 8.32,
      "grad_norm": 0.02208980981289174,
      "learning_rate": 1.0027861829824952e-07,
      "loss": 1.8551,
      "step": 33
    },
    {
      "epoch": 8.64,
      "grad_norm": 0.012216527071483598,
      "learning_rate": 7.740495722810269e-08,
      "loss": 1.9924,
      "step": 34
    },
    {
      "epoch": 8.96,
      "grad_norm": 0.04589178497022974,
      "learning_rate": 5.72719871733951e-08,
      "loss": 1.9847,
      "step": 35
    },
    {
      "epoch": 9.0,
      "grad_norm": 0.04589178497022974,
      "learning_rate": 4.0010278170587884e-08,
      "loss": 1.9374,
      "step": 36
    },
    {
      "epoch": 9.32,
      "grad_norm": 0.06320807250237533,
      "learning_rate": 2.5731779026427257e-08,
      "loss": 1.8548,
      "step": 37
    },
    {
      "epoch": 9.64,
      "grad_norm": 0.011534721147345644,
      "learning_rate": 1.4529091286973993e-08,
      "loss": 1.9923,
      "step": 38
    },
    {
      "epoch": 9.96,
      "grad_norm": 0.03774292060691598,
      "learning_rate": 6.474868681043577e-09,
      "loss": 1.9848,
      "step": 39
    },
    {
      "epoch": 10.0,
      "grad_norm": 0.03774292060691598,
      "learning_rate": 1.6213459328950352e-09,
      "loss": 1.9375,
      "step": 40
    },
    {
      "epoch": 10.0,
      "step": 40,
      "total_flos": 332965594791936.0,
      "train_loss": 1.942303839325905,
      "train_runtime": 1492.0318,
      "train_samples_per_second": 0.664,
      "train_steps_per_second": 0.027
    }
  ],
  "logging_steps": 1,
  "max_steps": 40,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 10,
  "save_steps": 100,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 332965594791936.0,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}