| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9893390191897654, | |
| "eval_steps": 100, | |
| "global_step": 58, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 637.0468978881836, | |
| "epoch": 0.017057569296375266, | |
| "grad_norm": 0.29794225096702576, | |
| "kl": 0.0, | |
| "learning_rate": 5e-07, | |
| "loss": 0.0399, | |
| "reward": 0.5323661006987095, | |
| "reward_std": 0.4141998440027237, | |
| "rewards/accuracy_reward": 0.522321455180645, | |
| "rewards/format_reward": 0.010044643306173384, | |
| "step": 1 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 620.5482950210571, | |
| "epoch": 0.08528784648187633, | |
| "grad_norm": 1.4550719261169434, | |
| "kl": 0.00039458274841308594, | |
| "learning_rate": 2.5e-06, | |
| "loss": 0.0399, | |
| "reward": 0.5259486855939031, | |
| "reward_std": 0.40668863616883755, | |
| "rewards/accuracy_reward": 0.5150669887661934, | |
| "rewards/format_reward": 0.010881697031436488, | |
| "step": 5 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 623.8926574707032, | |
| "epoch": 0.17057569296375266, | |
| "grad_norm": 4.9805378913879395, | |
| "kl": 0.016739654541015624, | |
| "learning_rate": 2.956412726139078e-06, | |
| "loss": 0.0634, | |
| "reward": 0.6223214566707611, | |
| "reward_std": 0.36663368344306946, | |
| "rewards/accuracy_reward": 0.6098214544355869, | |
| "rewards/format_reward": 0.01250000074505806, | |
| "step": 10 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 615.7777038574219, | |
| "epoch": 0.255863539445629, | |
| "grad_norm": 0.47206947207450867, | |
| "kl": 0.013239669799804687, | |
| "learning_rate": 2.7836719084521715e-06, | |
| "loss": 0.0924, | |
| "reward": 0.6946428909897804, | |
| "reward_std": 0.3172408826649189, | |
| "rewards/accuracy_reward": 0.6904018193483352, | |
| "rewards/format_reward": 0.004241071664728225, | |
| "step": 15 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 599.7145324707031, | |
| "epoch": 0.3411513859275053, | |
| "grad_norm": 0.2272499054670334, | |
| "kl": 0.01047210693359375, | |
| "learning_rate": 2.4946839873611927e-06, | |
| "loss": 0.0599, | |
| "reward": 0.7276786029338836, | |
| "reward_std": 0.27400995828211305, | |
| "rewards/accuracy_reward": 0.724107176065445, | |
| "rewards/format_reward": 0.00357142873108387, | |
| "step": 20 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 603.7484680175781, | |
| "epoch": 0.42643923240938164, | |
| "grad_norm": 0.3881862163543701, | |
| "kl": 0.0113189697265625, | |
| "learning_rate": 2.1156192081791355e-06, | |
| "loss": 0.0633, | |
| "reward": 0.7245536014437676, | |
| "reward_std": 0.2838048007339239, | |
| "rewards/accuracy_reward": 0.7116071715950966, | |
| "rewards/format_reward": 0.012946429196745157, | |
| "step": 25 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 600.9332847595215, | |
| "epoch": 0.511727078891258, | |
| "grad_norm": 0.3461417853832245, | |
| "kl": 0.013724517822265626, | |
| "learning_rate": 1.6808050203829845e-06, | |
| "loss": 0.055, | |
| "reward": 0.7535714626312255, | |
| "reward_std": 0.3158104930073023, | |
| "rewards/accuracy_reward": 0.6966518193483353, | |
| "rewards/format_reward": 0.056919645587913695, | |
| "step": 30 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 575.5292694091797, | |
| "epoch": 0.5970149253731343, | |
| "grad_norm": 0.3223617970943451, | |
| "kl": 0.03236236572265625, | |
| "learning_rate": 1.2296174432791415e-06, | |
| "loss": 0.0614, | |
| "reward": 0.8600446820259094, | |
| "reward_std": 0.3953482583165169, | |
| "rewards/accuracy_reward": 0.6738839574158192, | |
| "rewards/format_reward": 0.18616072218865157, | |
| "step": 35 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 552.3160972595215, | |
| "epoch": 0.6823027718550106, | |
| "grad_norm": 0.49858179688453674, | |
| "kl": 0.02544708251953125, | |
| "learning_rate": 8.029152419343472e-07, | |
| "loss": 0.0709, | |
| "reward": 0.9142857566475868, | |
| "reward_std": 0.4068648174405098, | |
| "rewards/accuracy_reward": 0.7058036103844643, | |
| "rewards/format_reward": 0.20848215278238058, | |
| "step": 40 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 556.3649818420411, | |
| "epoch": 0.767590618336887, | |
| "grad_norm": 0.4330020546913147, | |
| "kl": 0.035955810546875, | |
| "learning_rate": 4.3933982822017883e-07, | |
| "loss": 0.069, | |
| "reward": 0.9526786163449288, | |
| "reward_std": 0.4379459634423256, | |
| "rewards/accuracy_reward": 0.6968750283122063, | |
| "rewards/format_reward": 0.25580358393490316, | |
| "step": 45 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 558.1154296875, | |
| "epoch": 0.8528784648187633, | |
| "grad_norm": 0.6382198333740234, | |
| "kl": 0.044219970703125, | |
| "learning_rate": 1.718159615201853e-07, | |
| "loss": 0.0789, | |
| "reward": 0.9250000432133675, | |
| "reward_std": 0.4314121402800083, | |
| "rewards/accuracy_reward": 0.6897321730852127, | |
| "rewards/format_reward": 0.23526786901056768, | |
| "step": 50 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 548.7268096923829, | |
| "epoch": 0.9381663113006397, | |
| "grad_norm": 0.397476464509964, | |
| "kl": 0.0381134033203125, | |
| "learning_rate": 2.4570139579284723e-08, | |
| "loss": 0.0697, | |
| "reward": 0.9647321805357933, | |
| "reward_std": 0.45185114070773125, | |
| "rewards/accuracy_reward": 0.7183036014437676, | |
| "rewards/format_reward": 0.24642858281731606, | |
| "step": 55 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 540.5064531962076, | |
| "epoch": 0.9893390191897654, | |
| "kl": 0.037785847981770836, | |
| "reward": 0.955729216337204, | |
| "reward_std": 0.44279487431049347, | |
| "rewards/accuracy_reward": 0.7120536069075266, | |
| "rewards/format_reward": 0.24367560787747303, | |
| "step": 58, | |
| "total_flos": 0.0, | |
| "train_loss": 0.06606895855531611, | |
| "train_runtime": 9129.9094, | |
| "train_samples_per_second": 0.821, | |
| "train_steps_per_second": 0.006 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 58, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |