{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 260, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07736943907156674, "grad_norm": 1.0296438932418823, "learning_rate": 0.0001930769230769231, "loss": 2.7908, "step": 10 }, { "epoch": 0.15473887814313347, "grad_norm": 0.9864906668663025, "learning_rate": 0.0001853846153846154, "loss": 1.9878, "step": 20 }, { "epoch": 0.23210831721470018, "grad_norm": 0.8276298642158508, "learning_rate": 0.0001776923076923077, "loss": 1.8722, "step": 30 }, { "epoch": 0.30947775628626695, "grad_norm": 0.9250670075416565, "learning_rate": 0.00017, "loss": 1.7767, "step": 40 }, { "epoch": 0.38684719535783363, "grad_norm": 0.8650696277618408, "learning_rate": 0.0001623076923076923, "loss": 1.8079, "step": 50 }, { "epoch": 0.46421663442940037, "grad_norm": 0.8957677483558655, "learning_rate": 0.00015461538461538464, "loss": 1.786, "step": 60 }, { "epoch": 0.5415860735009671, "grad_norm": 0.7478697299957275, "learning_rate": 0.00014692307692307693, "loss": 1.7628, "step": 70 }, { "epoch": 0.6189555125725339, "grad_norm": 0.7914655804634094, "learning_rate": 0.00013923076923076923, "loss": 1.712, "step": 80 }, { "epoch": 0.6963249516441006, "grad_norm": 0.8356083631515503, "learning_rate": 0.00013153846153846156, "loss": 1.7362, "step": 90 }, { "epoch": 0.7736943907156673, "grad_norm": 0.8357738256454468, "learning_rate": 0.00012384615384615385, "loss": 1.7215, "step": 100 }, { "epoch": 0.851063829787234, "grad_norm": 0.8000754714012146, "learning_rate": 0.00011615384615384617, "loss": 1.6271, "step": 110 }, { "epoch": 0.9284332688588007, "grad_norm": 0.7836863994598389, "learning_rate": 0.00010846153846153846, "loss": 1.7314, "step": 120 }, { "epoch": 1.0, "grad_norm": 1.5980422496795654, "learning_rate": 0.00010076923076923077, "loss": 1.6623, "step": 130 }, { "epoch": 1.0773694390715667, "grad_norm": 0.8195829391479492, "learning_rate": 9.307692307692309e-05, "loss": 1.5811, "step": 140 }, { "epoch": 1.1547388781431334, "grad_norm": 0.824920654296875, "learning_rate": 8.538461538461538e-05, "loss": 1.5525, "step": 150 }, { "epoch": 1.2321083172147003, "grad_norm": 0.9823306798934937, "learning_rate": 7.76923076923077e-05, "loss": 1.6602, "step": 160 }, { "epoch": 1.309477756286267, "grad_norm": 0.8831573724746704, "learning_rate": 7e-05, "loss": 1.5839, "step": 170 }, { "epoch": 1.3868471953578336, "grad_norm": 1.0566450357437134, "learning_rate": 6.23076923076923e-05, "loss": 1.6407, "step": 180 }, { "epoch": 1.4642166344294003, "grad_norm": 0.8523500561714172, "learning_rate": 5.461538461538461e-05, "loss": 1.5857, "step": 190 }, { "epoch": 1.5415860735009672, "grad_norm": 0.9045595526695251, "learning_rate": 4.692307692307693e-05, "loss": 1.5332, "step": 200 }, { "epoch": 1.618955512572534, "grad_norm": 0.941744863986969, "learning_rate": 3.923076923076923e-05, "loss": 1.5534, "step": 210 }, { "epoch": 1.6963249516441006, "grad_norm": 0.8504011631011963, "learning_rate": 3.153846153846154e-05, "loss": 1.5462, "step": 220 }, { "epoch": 1.7736943907156673, "grad_norm": 0.8891894817352295, "learning_rate": 2.384615384615385e-05, "loss": 1.6007, "step": 230 }, { "epoch": 1.851063829787234, "grad_norm": 0.8817383050918579, "learning_rate": 1.6153846153846154e-05, "loss": 1.5908, "step": 240 }, { "epoch": 1.9284332688588006, "grad_norm": 0.9529927372932434, "learning_rate": 8.461538461538462e-06, "loss": 1.5941, "step": 250 }, { "epoch": 2.0, "grad_norm": 1.8291347026824951, "learning_rate": 7.692307692307694e-07, "loss": 1.5418, "step": 260 } ], "logging_steps": 10, "max_steps": 260, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.2925171077384038e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }