| { |
| "epoch": 2.986666666666667, |
| "eval_logits/chosen": -0.26991021633148193, |
| "eval_logits/rejected": 0.17896275222301483, |
| "eval_logps/chosen": -0.16233521699905396, |
| "eval_logps/rejected": -0.90810227394104, |
| "eval_loss": 0.20663729310035706, |
| "eval_odds_ratio_loss": 1.8880867958068848, |
| "eval_rewards/accuracies": 0.8299999833106995, |
| "eval_rewards/chosen": -0.016233522444963455, |
| "eval_rewards/margins": 0.0745767131447792, |
| "eval_rewards/rejected": -0.09081023186445236, |
| "eval_runtime": 6.9404, |
| "eval_samples_per_second": 14.408, |
| "eval_sft_loss": 0.017828578129410744, |
| "eval_steps_per_second": 7.204, |
| "total_flos": 3.471912421559501e+16, |
| "train_loss": 1.00575195536727, |
| "train_runtime": 639.1511, |
| "train_samples_per_second": 4.224, |
| "train_steps_per_second": 0.263 |
| } |