| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.6423841059602649, | |
| "eval_steps": 500, | |
| "global_step": 97, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.033112582781456956, | |
| "grad_norm": 26.791088104248047, | |
| "learning_rate": 0.0001999783578606323, | |
| "loss": 10.8058, | |
| "num_input_tokens_seen": 98464, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.06622516556291391, | |
| "grad_norm": 22.559255599975586, | |
| "learning_rate": 0.00019984613426472932, | |
| "loss": 2.5613, | |
| "num_input_tokens_seen": 196880, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.09933774834437085, | |
| "grad_norm": 8.285969734191895, | |
| "learning_rate": 0.00019959386925858942, | |
| "loss": 1.4579, | |
| "num_input_tokens_seen": 295472, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.13245033112582782, | |
| "grad_norm": 7.880527496337891, | |
| "learning_rate": 0.0001992218661313415, | |
| "loss": 1.1407, | |
| "num_input_tokens_seen": 394096, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.16556291390728478, | |
| "grad_norm": 14.62392807006836, | |
| "learning_rate": 0.00019873057212894398, | |
| "loss": 0.7902, | |
| "num_input_tokens_seen": 492464, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.1986754966887417, | |
| "grad_norm": 12.557646751403809, | |
| "learning_rate": 0.00019812057791647686, | |
| "loss": 0.983, | |
| "num_input_tokens_seen": 590896, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.23178807947019867, | |
| "grad_norm": 12.462843894958496, | |
| "learning_rate": 0.0001973926168680066, | |
| "loss": 0.9299, | |
| "num_input_tokens_seen": 689328, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.26490066225165565, | |
| "grad_norm": 3.7140164375305176, | |
| "learning_rate": 0.00019654756418487667, | |
| "loss": 0.6314, | |
| "num_input_tokens_seen": 788032, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.2980132450331126, | |
| "grad_norm": 16.123748779296875, | |
| "learning_rate": 0.00019558643584348476, | |
| "loss": 0.8311, | |
| "num_input_tokens_seen": 886144, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.33112582781456956, | |
| "grad_norm": 7.482938289642334, | |
| "learning_rate": 0.00019451038737381077, | |
| "loss": 0.5493, | |
| "num_input_tokens_seen": 984672, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.36423841059602646, | |
| "grad_norm": 4.410764694213867, | |
| "learning_rate": 0.00019332071247016476, | |
| "loss": 0.4694, | |
| "num_input_tokens_seen": 1083232, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.3973509933774834, | |
| "grad_norm": 6.899860858917236, | |
| "learning_rate": 0.00019201884143582495, | |
| "loss": 0.5595, | |
| "num_input_tokens_seen": 1181568, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.4304635761589404, | |
| "grad_norm": 3.364258050918579, | |
| "learning_rate": 0.0001906063394634356, | |
| "loss": 0.2787, | |
| "num_input_tokens_seen": 1279936, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.46357615894039733, | |
| "grad_norm": 20.896175384521484, | |
| "learning_rate": 0.00018970643640796642, | |
| "loss": 0.5269, | |
| "num_input_tokens_seen": 1378544, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.4966887417218543, | |
| "grad_norm": 3.4167935848236084, | |
| "learning_rate": 0.00018812051176267307, | |
| "loss": 0.7782, | |
| "num_input_tokens_seen": 1476896, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.5298013245033113, | |
| "grad_norm": 10.354905128479004, | |
| "learning_rate": 0.00018642864300065767, | |
| "loss": 0.5458, | |
| "num_input_tokens_seen": 1574912, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.5629139072847682, | |
| "grad_norm": 3.3909523487091064, | |
| "learning_rate": 0.00018463286419478255, | |
| "loss": 0.3148, | |
| "num_input_tokens_seen": 1673056, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.5960264900662252, | |
| "grad_norm": 13.916143417358398, | |
| "learning_rate": 0.00018273533434521263, | |
| "loss": 0.301, | |
| "num_input_tokens_seen": 1771536, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.6291390728476821, | |
| "grad_norm": 4.097564697265625, | |
| "learning_rate": 0.0001807383347837268, | |
| "loss": 0.6369, | |
| "num_input_tokens_seen": 1869952, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.6423841059602649, | |
| "num_input_tokens_seen": 1916832, | |
| "step": 97, | |
| "total_flos": 8.57490291718226e+16, | |
| "train_loss": 1.3051114364997627, | |
| "train_runtime": 5464.1136, | |
| "train_samples_per_second": 1.326, | |
| "train_steps_per_second": 0.083 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 453, | |
| "num_input_tokens_seen": 1916832, | |
| "num_train_epochs": 3, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8.57490291718226e+16, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |