{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 10.0,
  "eval_steps": 500,
  "global_step": 40,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.32,
      "grad_norm": 0.008281990499941126,
      "learning_rate": 0.0,
      "loss": 1.855,
      "step": 1
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.008393080292653378,
      "learning_rate": 5e-06,
      "loss": 1.9926,
      "step": 2
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.029356247388450375,
      "learning_rate": 4.991893270335526e-06,
      "loss": 1.9848,
      "step": 3
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.029356247388450375,
      "learning_rate": 4.967625656594782e-06,
      "loss": 1.9369,
      "step": 4
    },
    {
      "epoch": 1.32,
      "grad_norm": 0.04989466478864689,
      "learning_rate": 4.927354543565131e-06,
      "loss": 1.855,
      "step": 5
    },
    {
      "epoch": 1.6400000000000001,
      "grad_norm": 0.007937928881715916,
      "learning_rate": 4.8713411048678635e-06,
      "loss": 1.9924,
      "step": 6
    },
    {
      "epoch": 1.96,
      "grad_norm": 0.025729209518294046,
      "learning_rate": 4.799948609147061e-06,
      "loss": 1.9848,
      "step": 7
    },
    {
      "epoch": 2.0,
      "grad_norm": 0.025729209518294046,
      "learning_rate": 4.7136400641330245e-06,
      "loss": 1.9374,
      "step": 8
    },
    {
      "epoch": 2.32,
      "grad_norm": 0.07029971966612308,
      "learning_rate": 4.612975213859487e-06,
      "loss": 1.8549,
      "step": 9
    },
    {
      "epoch": 2.64,
      "grad_norm": 0.008939585590866585,
      "learning_rate": 4.498606908508754e-06,
      "loss": 1.9923,
      "step": 10
    },
    {
      "epoch": 2.96,
      "grad_norm": 0.023278242233569228,
      "learning_rate": 4.3712768704277535e-06,
      "loss": 1.985,
      "step": 11
    },
    {
      "epoch": 3.0,
      "grad_norm": 0.023278242233569228,
      "learning_rate": 4.231810883773999e-06,
      "loss": 1.9366,
      "step": 12
    },
    {
      "epoch": 3.32,
      "grad_norm": 0.07990192517108737,
      "learning_rate": 4.081113438988443e-06,
      "loss": 1.8549,
      "step": 13
    },
    {
      "epoch": 3.64,
      "grad_norm": 0.008270658943252188,
      "learning_rate": 3.92016186682789e-06,
      "loss": 1.9924,
      "step": 14
    },
    {
      "epoch": 3.96,
      "grad_norm": 0.017104235701761287,
      "learning_rate": 3.7500000000000005e-06,
      "loss": 1.9847,
      "step": 15
    },
    {
      "epoch": 4.0,
      "grad_norm": 0.017104235701761287,
      "learning_rate": 3.5717314035076355e-06,
      "loss": 1.9366,
      "step": 16
    },
    {
      "epoch": 4.32,
      "grad_norm": 0.13307810310143006,
      "learning_rate": 3.386512217606339e-06,
      "loss": 1.855,
      "step": 17
    },
    {
      "epoch": 4.64,
      "grad_norm": 0.00970396249321868,
      "learning_rate": 3.195543659791132e-06,
      "loss": 1.9923,
      "step": 18
    },
    {
      "epoch": 4.96,
      "grad_norm": 0.028438345179473263,
      "learning_rate": 3.0000642344401115e-06,
      "loss": 1.9849,
      "step": 19
    },
    {
      "epoch": 5.0,
      "grad_norm": 0.028438345179473263,
      "learning_rate": 2.8013417006383078e-06,
      "loss": 1.9364,
      "step": 20
    },
    {
      "epoch": 5.32,
      "grad_norm": 0.09073185510459704,
      "learning_rate": 2.6006648502735384e-06,
      "loss": 1.8551,
      "step": 21
    },
    {
      "epoch": 5.64,
      "grad_norm": 0.009996241739521737,
      "learning_rate": 2.399335149726463e-06,
      "loss": 1.9922,
      "step": 22
    },
    {
      "epoch": 5.96,
      "grad_norm": 0.013471113132443889,
      "learning_rate": 2.1986582993616926e-06,
      "loss": 1.9846,
      "step": 23
    },
    {
      "epoch": 6.0,
      "grad_norm": 0.013471113132443889,
      "learning_rate": 1.9999357655598894e-06,
      "loss": 1.9376,
      "step": 24
    },
    {
      "epoch": 6.32,
      "grad_norm": 0.10771276943319022,
      "learning_rate": 1.8044563402088686e-06,
      "loss": 1.8548,
      "step": 25
    },
    {
      "epoch": 6.64,
      "grad_norm": 0.010875730943091112,
      "learning_rate": 1.613487782393661e-06,
      "loss": 1.9925,
      "step": 26
    },
    {
      "epoch": 6.96,
      "grad_norm": 0.012476490558621243,
      "learning_rate": 1.4282685964923643e-06,
      "loss": 1.9851,
      "step": 27
    },
    {
      "epoch": 7.0,
      "grad_norm": 0.012476490558621243,
      "learning_rate": 1.2500000000000007e-06,
      "loss": 1.9366,
      "step": 28
    },
    {
      "epoch": 7.32,
      "grad_norm": 0.09585279564744115,
      "learning_rate": 1.079838133172111e-06,
      "loss": 1.8548,
      "step": 29
    },
    {
      "epoch": 7.64,
      "grad_norm": 0.010003265664870921,
      "learning_rate": 9.188865610115572e-07,
      "loss": 1.9925,
      "step": 30
    },
    {
      "epoch": 7.96,
      "grad_norm": 0.010857427499039972,
      "learning_rate": 7.681891162260016e-07,
      "loss": 1.9846,
      "step": 31
    },
    {
      "epoch": 8.0,
      "grad_norm": 0.13937310629960933,
      "learning_rate": 6.28723129572247e-07,
      "loss": 1.9372,
      "step": 32
    },
    {
      "epoch": 8.32,
      "grad_norm": 0.010124718301379253,
      "learning_rate": 5.013930914912477e-07,
      "loss": 1.8548,
      "step": 33
    },
    {
      "epoch": 8.64,
      "grad_norm": 0.010170427323459208,
      "learning_rate": 3.8702478614051353e-07,
      "loss": 1.9924,
      "step": 34
    },
    {
      "epoch": 8.96,
      "grad_norm": 0.032160442525003156,
      "learning_rate": 2.8635993586697555e-07,
      "loss": 1.9848,
      "step": 35
    },
    {
      "epoch": 9.0,
      "grad_norm": 0.032160442525003156,
      "learning_rate": 2.0005139085293945e-07,
      "loss": 1.9365,
      "step": 36
    },
    {
      "epoch": 9.32,
      "grad_norm": 0.21934852923645737,
      "learning_rate": 1.286588951321363e-07,
      "loss": 1.8547,
      "step": 37
    },
    {
      "epoch": 9.64,
      "grad_norm": 0.009250123584571853,
      "learning_rate": 7.264545643486997e-08,
      "loss": 1.9925,
      "step": 38
    },
    {
      "epoch": 9.96,
      "grad_norm": 0.029391449274511882,
      "learning_rate": 3.237434340521789e-08,
      "loss": 1.9849,
      "step": 39
    },
    {
      "epoch": 10.0,
      "grad_norm": 0.029391449274511882,
      "learning_rate": 8.106729664475178e-09,
      "loss": 1.9368,
      "step": 40
    },
    {
      "epoch": 10.0,
      "step": 40,
      "total_flos": 174553779142656.0,
      "train_loss": 1.9422493666410445,
      "train_runtime": 1457.2923,
      "train_samples_per_second": 0.679,
      "train_steps_per_second": 0.027
    }
  ],
  "logging_steps": 1,
  "max_steps": 40,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 10,
  "save_steps": 100,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 174553779142656.0,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}