{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 3.0,
  "eval_steps": 500,
  "global_step": 276,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.1092896174863388,
      "grad_norm": 7.8006248254154436,
      "learning_rate": 1.6071428571428574e-06,
      "loss": 1.2223,
      "step": 10
    },
    {
      "epoch": 0.2185792349726776,
      "grad_norm": 4.844388222783676,
      "learning_rate": 3.3928571428571435e-06,
      "loss": 1.0903,
      "step": 20
    },
    {
      "epoch": 0.32786885245901637,
      "grad_norm": 3.0957110922340183,
      "learning_rate": 4.999799414013322e-06,
      "loss": 0.9481,
      "step": 30
    },
    {
      "epoch": 0.4371584699453552,
      "grad_norm": 2.680281559117405,
      "learning_rate": 4.975768018471877e-06,
      "loss": 0.8858,
      "step": 40
    },
    {
      "epoch": 0.546448087431694,
      "grad_norm": 2.7043491664434707,
      "learning_rate": 4.912060841339536e-06,
      "loss": 0.8931,
      "step": 50
    },
    {
      "epoch": 0.6557377049180327,
      "grad_norm": 2.806617891201655,
      "learning_rate": 4.809698831278217e-06,
      "loss": 0.8647,
      "step": 60
    },
    {
      "epoch": 0.7650273224043715,
      "grad_norm": 2.988877065894127,
      "learning_rate": 4.670322405614621e-06,
      "loss": 0.8276,
      "step": 70
    },
    {
      "epoch": 0.8743169398907104,
      "grad_norm": 3.224849095250869,
      "learning_rate": 4.4961651615930344e-06,
      "loss": 0.8418,
      "step": 80
    },
    {
      "epoch": 0.9836065573770492,
      "grad_norm": 2.8043508949300717,
      "learning_rate": 4.290018081536807e-06,
      "loss": 0.829,
      "step": 90
    },
    {
      "epoch": 1.0874316939890711,
      "grad_norm": 3.097062688827004,
      "learning_rate": 4.0551848055539345e-06,
      "loss": 0.6056,
      "step": 100
    },
    {
      "epoch": 1.1967213114754098,
      "grad_norm": 2.7435175097760536,
      "learning_rate": 3.795428688570505e-06,
      "loss": 0.5148,
      "step": 110
    },
    {
      "epoch": 1.3060109289617485,
      "grad_norm": 2.8323835125718384,
      "learning_rate": 3.514912490137268e-06,
      "loss": 0.478,
      "step": 120
    },
    {
      "epoch": 1.4153005464480874,
      "grad_norm": 2.8362148177995863,
      "learning_rate": 3.2181316635191125e-06,
      "loss": 0.4782,
      "step": 130
    },
    {
      "epoch": 1.5245901639344264,
      "grad_norm": 2.6159666839429807,
      "learning_rate": 2.909842313152888e-06,
      "loss": 0.4866,
      "step": 140
    },
    {
      "epoch": 1.633879781420765,
      "grad_norm": 2.6862167736013034,
      "learning_rate": 2.5949849750018486e-06,
      "loss": 0.4925,
      "step": 150
    },
    {
      "epoch": 1.7431693989071038,
      "grad_norm": 2.9005042165979877,
      "learning_rate": 2.27860544127575e-06,
      "loss": 0.4672,
      "step": 160
    },
    {
      "epoch": 1.8524590163934427,
      "grad_norm": 2.8760498712590588,
      "learning_rate": 1.9657738983516227e-06,
      "loss": 0.4581,
      "step": 170
    },
    {
      "epoch": 1.9617486338797814,
      "grad_norm": 2.8992839199365017,
      "learning_rate": 1.6615036737622574e-06,
      "loss": 0.4614,
      "step": 180
    },
    {
      "epoch": 2.0655737704918034,
      "grad_norm": 2.6886295405633924,
      "learning_rate": 1.3706708943843822e-06,
      "loss": 0.348,
      "step": 190
    },
    {
      "epoch": 2.1748633879781423,
      "grad_norm": 2.841014116152583,
      "learning_rate": 1.0979363433559892e-06,
      "loss": 0.2581,
      "step": 200
    },
    {
      "epoch": 2.2841530054644807,
      "grad_norm": 2.5452200095961905,
      "learning_rate": 8.476707680161486e-07,
      "loss": 0.2529,
      "step": 210
    },
    {
      "epoch": 2.3934426229508197,
      "grad_norm": 2.7836057891970567,
      "learning_rate": 6.238848358558439e-07,
      "loss": 0.2509,
      "step": 220
    },
    {
      "epoch": 2.5027322404371586,
      "grad_norm": 2.8036652568167026,
      "learning_rate": 4.3016486098094667e-07,
      "loss": 0.2387,
      "step": 230
    },
    {
      "epoch": 2.612021857923497,
      "grad_norm": 2.5075873714323333,
      "learning_rate": 2.696153311122704e-07,
      "loss": 0.2508,
      "step": 240
    },
    {
      "epoch": 2.721311475409836,
      "grad_norm": 2.5704796097332814,
      "learning_rate": 1.448091561646628e-07,
      "loss": 0.2736,
      "step": 250
    },
    {
      "epoch": 2.830601092896175,
      "grad_norm": 2.639131043723551,
      "learning_rate": 5.774643570378296e-08,
      "loss": 0.273,
      "step": 260
    },
    {
      "epoch": 2.939890710382514,
      "grad_norm": 2.499486520403044,
      "learning_rate": 9.822406058697665e-09,
      "loss": 0.2575,
      "step": 270
    },
    {
      "epoch": 3.0,
      "step": 276,
      "total_flos": 14707949568000.0,
      "train_loss": 0.5563258748987446,
      "train_runtime": 4077.0893,
      "train_samples_per_second": 2.145,
      "train_steps_per_second": 0.068
    }
  ],
  "logging_steps": 10,
  "max_steps": 276,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 3,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 14707949568000.0,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}