{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9523809523809523, "eval_steps": 500, "global_step": 10, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09523809523809523, "grad_norm": 10.040461540222168, "learning_rate": 0.0, "logits/chosen": 0.5517578125, "logits/rejected": 0.5625, "logps/chosen": -384.5, "logps/rejected": -438.0, "loss": 0.6982, "rewards/accuracies": 0.0833333358168602, "rewards/chosen": -0.016666412353515625, "rewards/margins": -0.009382247924804688, "rewards/rejected": -0.00731658935546875, "step": 1 }, { "epoch": 0.19047619047619047, "grad_norm": 8.76193904876709, "learning_rate": 1e-06, "logits/chosen": 0.54248046875, "logits/rejected": 0.54052734375, "logps/chosen": -347.5, "logps/rejected": -419.5, "loss": 0.6898, "rewards/accuracies": 0.109375, "rewards/chosen": -0.010042190551757812, "rewards/margins": 0.00848388671875, "rewards/rejected": -0.01854705810546875, "step": 2 }, { "epoch": 0.2857142857142857, "grad_norm": 7.859212875366211, "learning_rate": 9.728616793536587e-07, "logits/chosen": 0.546875, "logits/rejected": 0.5322265625, "logps/chosen": -372.0, "logps/rejected": -420.0, "loss": 0.6895, "rewards/accuracies": 0.0807291716337204, "rewards/chosen": 0.007147789001464844, "rewards/margins": 0.0082244873046875, "rewards/rejected": -0.0010318756103515625, "step": 3 }, { "epoch": 0.38095238095238093, "grad_norm": 8.137142181396484, "learning_rate": 8.9471999940354e-07, "logits/chosen": 0.50927734375, "logits/rejected": 0.55322265625, "logps/chosen": -374.5, "logps/rejected": -434.5, "loss": 0.6903, "rewards/accuracies": 0.109375, "rewards/chosen": -0.02203369140625, "rewards/margins": 0.0063877105712890625, "rewards/rejected": -0.028411865234375, "step": 4 }, { "epoch": 0.47619047619047616, "grad_norm": 6.646923542022705, "learning_rate": 7.75e-07, "logits/chosen": 0.45166015625, "logits/rejected": 0.46630859375, "logps/chosen": -373.5, "logps/rejected": -412.5, "loss": 0.6938, "rewards/accuracies": 0.1093750074505806, "rewards/chosen": -0.02127838134765625, "rewards/margins": 0.0022153854370117188, "rewards/rejected": -0.0235137939453125, "step": 5 }, { "epoch": 0.5714285714285714, "grad_norm": 8.178302764892578, "learning_rate": 6.281416799501187e-07, "logits/chosen": 0.533203125, "logits/rejected": 0.544921875, "logps/chosen": -387.0, "logps/rejected": -442.5, "loss": 0.6883, "rewards/accuracies": 0.1223958358168602, "rewards/chosen": -0.029693603515625, "rewards/margins": 0.0110931396484375, "rewards/rejected": -0.040740966796875, "step": 6 }, { "epoch": 0.6666666666666666, "grad_norm": 9.006941795349121, "learning_rate": 4.7185832004988133e-07, "logits/chosen": 0.5537109375, "logits/rejected": 0.5390625, "logps/chosen": -368.0, "logps/rejected": -418.0, "loss": 0.6898, "rewards/accuracies": 0.140625, "rewards/chosen": -0.062744140625, "rewards/margins": 0.009137153625488281, "rewards/rejected": -0.0718994140625, "step": 7 }, { "epoch": 0.7619047619047619, "grad_norm": 6.875649929046631, "learning_rate": 3.250000000000001e-07, "logits/chosen": 0.5703125, "logits/rejected": 0.5859375, "logps/chosen": -377.5, "logps/rejected": -427.0, "loss": 0.6864, "rewards/accuracies": 0.1614583432674408, "rewards/chosen": -0.06878662109375, "rewards/margins": 0.01445770263671875, "rewards/rejected": -0.083251953125, "step": 8 }, { "epoch": 0.8571428571428571, "grad_norm": 7.995317459106445, "learning_rate": 2.0528000059645995e-07, "logits/chosen": 0.5068359375, "logits/rejected": 0.544921875, "logps/chosen": -388.0, "logps/rejected": -447.0, "loss": 0.6867, "rewards/accuracies": 0.1822916716337204, "rewards/chosen": -0.0706787109375, "rewards/margins": 0.016702651977539062, "rewards/rejected": -0.0875244140625, "step": 9 }, { "epoch": 0.9523809523809523, "grad_norm": 9.462100982666016, "learning_rate": 1.2713832064634125e-07, "logits/chosen": 0.5576171875, "logits/rejected": 0.560546875, "logps/chosen": -362.0, "logps/rejected": -437.0, "loss": 0.6829, "rewards/accuracies": 0.1953125, "rewards/chosen": -0.0859375, "rewards/margins": 0.022670745849609375, "rewards/rejected": -0.108642578125, "step": 10 }, { "epoch": 0.9523809523809523, "step": 10, "total_flos": 0.0, "train_loss": 0.6895670831203461, "train_runtime": 273.9395, "train_samples_per_second": 14.602, "train_steps_per_second": 0.037 } ], "logging_steps": 1, "max_steps": 10, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }