| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 15.89591957421644, | |
| "eval_steps": 400, | |
| "global_step": 3360, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.15138971023063277, | |
| "grad_norm": 27.229875564575195, | |
| "learning_rate": 4.7619047619047613e-08, | |
| "log_odds_chosen": -0.0525120347738266, | |
| "log_odds_ratio": -0.7864450216293335, | |
| "logits/chosen": 1.5500602722167969, | |
| "logits/rejected": 1.3292943239212036, | |
| "logps/chosen": -1.191162109375, | |
| "logps/rejected": -1.1635648012161255, | |
| "loss": 1.6156, | |
| "nll_loss": 1.46018648147583, | |
| "rewards/accuracies": 0.43359375, | |
| "rewards/chosen": -0.1786743402481079, | |
| "rewards/margins": -0.004139607772231102, | |
| "rewards/rejected": -0.17453473806381226, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.30277942046126555, | |
| "grad_norm": 27.179594039916992, | |
| "learning_rate": 9.523809523809523e-08, | |
| "log_odds_chosen": 0.019624141976237297, | |
| "log_odds_ratio": -0.748144805431366, | |
| "logits/chosen": 1.5089401006698608, | |
| "logits/rejected": 1.3729290962219238, | |
| "logps/chosen": -1.2469431161880493, | |
| "logps/rejected": -1.26250422000885, | |
| "loss": 1.5984, | |
| "nll_loss": 1.5403207540512085, | |
| "rewards/accuracies": 0.5078125, | |
| "rewards/chosen": -0.1870414763689041, | |
| "rewards/margins": 0.002334160730242729, | |
| "rewards/rejected": -0.1893756240606308, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.4541691306918983, | |
| "grad_norm": 25.431949615478516, | |
| "learning_rate": 1.4285714285714285e-07, | |
| "log_odds_chosen": -0.053292229771614075, | |
| "log_odds_ratio": -0.78084397315979, | |
| "logits/chosen": 1.5771101713180542, | |
| "logits/rejected": 1.4359058141708374, | |
| "logps/chosen": -1.20406174659729, | |
| "logps/rejected": -1.1857094764709473, | |
| "loss": 1.6007, | |
| "nll_loss": 1.4434431791305542, | |
| "rewards/accuracies": 0.421875, | |
| "rewards/chosen": -0.18060927093029022, | |
| "rewards/margins": -0.0027528139762580395, | |
| "rewards/rejected": -0.1778564453125, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.6055588409225311, | |
| "grad_norm": 21.6727294921875, | |
| "learning_rate": 1.9047619047619045e-07, | |
| "log_odds_chosen": 0.05749227851629257, | |
| "log_odds_ratio": -0.7303333878517151, | |
| "logits/chosen": 1.468267560005188, | |
| "logits/rejected": 1.4105079174041748, | |
| "logps/chosen": -1.216729998588562, | |
| "logps/rejected": -1.2735037803649902, | |
| "loss": 1.5421, | |
| "nll_loss": 1.4667391777038574, | |
| "rewards/accuracies": 0.48046875, | |
| "rewards/chosen": -0.18250951170921326, | |
| "rewards/margins": 0.008516057394444942, | |
| "rewards/rejected": -0.19102558493614197, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.7569485511531638, | |
| "grad_norm": 18.762540817260742, | |
| "learning_rate": 2.3809523809523806e-07, | |
| "log_odds_chosen": -0.01149587519466877, | |
| "log_odds_ratio": -0.7697539329528809, | |
| "logits/chosen": 1.5571284294128418, | |
| "logits/rejected": 1.4197614192962646, | |
| "logps/chosen": -1.1996402740478516, | |
| "logps/rejected": -1.206023931503296, | |
| "loss": 1.4771, | |
| "nll_loss": 1.3902133703231812, | |
| "rewards/accuracies": 0.4609375, | |
| "rewards/chosen": -0.17994605004787445, | |
| "rewards/margins": 0.000957544194534421, | |
| "rewards/rejected": -0.1809035986661911, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.9083382613837966, | |
| "grad_norm": 14.901942253112793, | |
| "learning_rate": 2.857142857142857e-07, | |
| "log_odds_chosen": -0.0564657598733902, | |
| "log_odds_ratio": -0.791755735874176, | |
| "logits/chosen": 1.7110127210617065, | |
| "logits/rejected": 1.559685468673706, | |
| "logps/chosen": -1.201224446296692, | |
| "logps/rejected": -1.1592237949371338, | |
| "loss": 1.4155, | |
| "nll_loss": 1.3239426612854004, | |
| "rewards/accuracies": 0.50390625, | |
| "rewards/chosen": -0.18018370866775513, | |
| "rewards/margins": -0.00630012946203351, | |
| "rewards/rejected": -0.1738835722208023, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 1.0597279716144294, | |
| "grad_norm": 14.1319580078125, | |
| "learning_rate": 3.333333333333333e-07, | |
| "log_odds_chosen": -0.07105285674333572, | |
| "log_odds_ratio": -0.7984029650688171, | |
| "logits/chosen": 1.623414397239685, | |
| "logits/rejected": 1.496307134628296, | |
| "logps/chosen": -1.1715890169143677, | |
| "logps/rejected": -1.1218650341033936, | |
| "loss": 1.3513, | |
| "nll_loss": 1.2719416618347168, | |
| "rewards/accuracies": 0.46484375, | |
| "rewards/chosen": -0.1757383644580841, | |
| "rewards/margins": -0.007458594627678394, | |
| "rewards/rejected": -0.16827978193759918, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 1.2111176818450622, | |
| "grad_norm": 13.425606727600098, | |
| "learning_rate": 3.809523809523809e-07, | |
| "log_odds_chosen": 0.10578853636980057, | |
| "log_odds_ratio": -0.70930016040802, | |
| "logits/chosen": 1.6068717241287231, | |
| "logits/rejected": 1.3819518089294434, | |
| "logps/chosen": -1.1136094331741333, | |
| "logps/rejected": -1.1912662982940674, | |
| "loss": 1.3252, | |
| "nll_loss": 1.2232904434204102, | |
| "rewards/accuracies": 0.546875, | |
| "rewards/chosen": -0.16704143583774567, | |
| "rewards/margins": 0.0116485096514225, | |
| "rewards/rejected": -0.17868994176387787, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.362507392075695, | |
| "grad_norm": 11.342605590820312, | |
| "learning_rate": 4.285714285714285e-07, | |
| "log_odds_chosen": 0.13081349432468414, | |
| "log_odds_ratio": -0.6838027238845825, | |
| "logits/chosen": 1.4501845836639404, | |
| "logits/rejected": 1.3310956954956055, | |
| "logps/chosen": -1.0919809341430664, | |
| "logps/rejected": -1.1673154830932617, | |
| "loss": 1.2755, | |
| "nll_loss": 1.1733828783035278, | |
| "rewards/accuracies": 0.578125, | |
| "rewards/chosen": -0.16379712522029877, | |
| "rewards/margins": 0.011300182901322842, | |
| "rewards/rejected": -0.1750973016023636, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 1.5138971023063275, | |
| "grad_norm": 12.543194770812988, | |
| "learning_rate": 4.761904761904761e-07, | |
| "log_odds_chosen": 0.19491538405418396, | |
| "log_odds_ratio": -0.6595159769058228, | |
| "logits/chosen": 1.4903298616409302, | |
| "logits/rejected": 1.3049672842025757, | |
| "logps/chosen": -1.0332714319229126, | |
| "logps/rejected": -1.1428489685058594, | |
| "loss": 1.2223, | |
| "nll_loss": 1.0796581506729126, | |
| "rewards/accuracies": 0.6171875, | |
| "rewards/chosen": -0.15499071776866913, | |
| "rewards/margins": 0.0164366252720356, | |
| "rewards/rejected": -0.17142733931541443, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.6652868125369604, | |
| "grad_norm": 8.179709434509277, | |
| "learning_rate": 4.999654636727764e-07, | |
| "log_odds_chosen": 0.14331884682178497, | |
| "log_odds_ratio": -0.6748344302177429, | |
| "logits/chosen": 1.4205052852630615, | |
| "logits/rejected": 1.3244390487670898, | |
| "logps/chosen": -1.0807911157608032, | |
| "logps/rejected": -1.159712314605713, | |
| "loss": 1.1776, | |
| "nll_loss": 1.0815861225128174, | |
| "rewards/accuracies": 0.609375, | |
| "rewards/chosen": -0.16211867332458496, | |
| "rewards/margins": 0.011838208884000778, | |
| "rewards/rejected": -0.17395688593387604, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 1.8166765227675932, | |
| "grad_norm": 9.002681732177734, | |
| "learning_rate": 4.996892303047305e-07, | |
| "log_odds_chosen": 0.15229541063308716, | |
| "log_odds_ratio": -0.6689931154251099, | |
| "logits/chosen": 1.3082184791564941, | |
| "logits/rejected": 1.208222508430481, | |
| "logps/chosen": -1.0531638860702515, | |
| "logps/rejected": -1.1305123567581177, | |
| "loss": 1.1209, | |
| "nll_loss": 1.026604175567627, | |
| "rewards/accuracies": 0.62890625, | |
| "rewards/chosen": -0.15797458589076996, | |
| "rewards/margins": 0.01160226296633482, | |
| "rewards/rejected": -0.16957685351371765, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.8923713778829097, | |
| "eval_log_odds_chosen": 0.9119634628295898, | |
| "eval_log_odds_ratio": -0.3477023243904114, | |
| "eval_logits/chosen": 0.8482466340065002, | |
| "eval_logits/rejected": 0.7518002986907959, | |
| "eval_logps/chosen": -0.7484418153762817, | |
| "eval_logps/rejected": -1.3053876161575317, | |
| "eval_loss": 0.84984290599823, | |
| "eval_nll_loss": 0.7749183773994446, | |
| "eval_rewards/accuracies": 1.0, | |
| "eval_rewards/chosen": -0.11226626485586166, | |
| "eval_rewards/margins": 0.0835418850183487, | |
| "eval_rewards/rejected": -0.19580814242362976, | |
| "eval_runtime": 1.7821, | |
| "eval_samples_per_second": 76.877, | |
| "eval_steps_per_second": 10.101, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.968066232998226, | |
| "grad_norm": 10.631780624389648, | |
| "learning_rate": 4.991370688303038e-07, | |
| "log_odds_chosen": 0.20428910851478577, | |
| "log_odds_ratio": -0.6562178134918213, | |
| "logits/chosen": 1.2808618545532227, | |
| "logits/rejected": 1.1230928897857666, | |
| "logps/chosen": -1.0897853374481201, | |
| "logps/rejected": -1.1929757595062256, | |
| "loss": 1.0818, | |
| "nll_loss": 1.0095133781433105, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.16346779465675354, | |
| "rewards/margins": 0.015478584915399551, | |
| "rewards/rejected": -0.1789463758468628, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 2.119455943228859, | |
| "grad_norm": 9.022457122802734, | |
| "learning_rate": 4.983095894354857e-07, | |
| "log_odds_chosen": 0.22953583300113678, | |
| "log_odds_ratio": -0.6313825845718384, | |
| "logits/chosen": 1.349506139755249, | |
| "logits/rejected": 1.1360180377960205, | |
| "logps/chosen": -1.0178955793380737, | |
| "logps/rejected": -1.142075538635254, | |
| "loss": 1.0595, | |
| "nll_loss": 0.9702749848365784, | |
| "rewards/accuracies": 0.63671875, | |
| "rewards/chosen": -0.15268434584140778, | |
| "rewards/margins": 0.018626993522047997, | |
| "rewards/rejected": -0.17131134867668152, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 2.2708456534594914, | |
| "grad_norm": 8.519028663635254, | |
| "learning_rate": 4.972077065562821e-07, | |
| "log_odds_chosen": 0.20490483939647675, | |
| "log_odds_ratio": -0.6597353219985962, | |
| "logits/chosen": 1.2364730834960938, | |
| "logits/rejected": 1.1246590614318848, | |
| "logps/chosen": -1.0860800743103027, | |
| "logps/rejected": -1.1871649026870728, | |
| "loss": 1.0455, | |
| "nll_loss": 0.9942155480384827, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.16291199624538422, | |
| "rewards/margins": 0.015162724070250988, | |
| "rewards/rejected": -0.17807474732398987, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 2.4222353636901244, | |
| "grad_norm": 7.702695369720459, | |
| "learning_rate": 4.958326378681848e-07, | |
| "log_odds_chosen": 0.3035791516304016, | |
| "log_odds_ratio": -0.6072664260864258, | |
| "logits/chosen": 1.2193742990493774, | |
| "logits/rejected": 1.0568186044692993, | |
| "logps/chosen": -1.029651165008545, | |
| "logps/rejected": -1.1974754333496094, | |
| "loss": 1.031, | |
| "nll_loss": 0.9461196660995483, | |
| "rewards/accuracies": 0.7109375, | |
| "rewards/chosen": -0.15444767475128174, | |
| "rewards/margins": 0.02517363429069519, | |
| "rewards/rejected": -0.17962132394313812, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 2.573625073920757, | |
| "grad_norm": 8.201448440551758, | |
| "learning_rate": 4.941859029405353e-07, | |
| "log_odds_chosen": 0.35751351714134216, | |
| "log_odds_ratio": -0.5834794044494629, | |
| "logits/chosen": 1.2276177406311035, | |
| "logits/rejected": 1.0265512466430664, | |
| "logps/chosen": -1.0028650760650635, | |
| "logps/rejected": -1.1897025108337402, | |
| "loss": 1.0218, | |
| "nll_loss": 0.9072933793067932, | |
| "rewards/accuracies": 0.73828125, | |
| "rewards/chosen": -0.15042978525161743, | |
| "rewards/margins": 0.028025589883327484, | |
| "rewards/rejected": -0.17845536768436432, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 2.72501478415139, | |
| "grad_norm": 7.634998798370361, | |
| "learning_rate": 4.922693215572695e-07, | |
| "log_odds_chosen": 0.45870620012283325, | |
| "log_odds_ratio": -0.54433274269104, | |
| "logits/chosen": 1.1980278491973877, | |
| "logits/rejected": 1.0682458877563477, | |
| "logps/chosen": -0.978523313999176, | |
| "logps/rejected": -1.243023157119751, | |
| "loss": 0.9966, | |
| "nll_loss": 0.921144962310791, | |
| "rewards/accuracies": 0.7578125, | |
| "rewards/chosen": -0.14677852392196655, | |
| "rewards/margins": 0.03967496007680893, | |
| "rewards/rejected": -0.1864534616470337, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 2.8764044943820224, | |
| "grad_norm": 7.217565059661865, | |
| "learning_rate": 4.900850117058999e-07, | |
| "log_odds_chosen": 0.47186481952667236, | |
| "log_odds_ratio": -0.5484339594841003, | |
| "logits/chosen": 1.152608871459961, | |
| "logits/rejected": 1.015822172164917, | |
| "logps/chosen": -1.01084566116333, | |
| "logps/rejected": -1.277451992034912, | |
| "loss": 0.9987, | |
| "nll_loss": 0.9013168215751648, | |
| "rewards/accuracies": 0.7421875, | |
| "rewards/chosen": -0.1516268402338028, | |
| "rewards/margins": 0.03999098762869835, | |
| "rewards/rejected": -0.19161783158779144, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 3.0277942046126554, | |
| "grad_norm": 6.927852630615234, | |
| "learning_rate": 4.876353872369572e-07, | |
| "log_odds_chosen": 0.48829925060272217, | |
| "log_odds_ratio": -0.5393761396408081, | |
| "logits/chosen": 1.0784587860107422, | |
| "logits/rejected": 0.9411880970001221, | |
| "logps/chosen": -1.0088391304016113, | |
| "logps/rejected": -1.271460771560669, | |
| "loss": 0.9837, | |
| "nll_loss": 0.9265193343162537, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": -0.15132588148117065, | |
| "rewards/margins": 0.039393242448568344, | |
| "rewards/rejected": -0.1907191127538681, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 3.179183914843288, | |
| "grad_norm": 6.685938358306885, | |
| "learning_rate": 4.849231551964771e-07, | |
| "log_odds_chosen": 0.562548041343689, | |
| "log_odds_ratio": -0.5139177441596985, | |
| "logits/chosen": 1.1162034273147583, | |
| "logits/rejected": 0.927276611328125, | |
| "logps/chosen": -0.9777481555938721, | |
| "logps/rejected": -1.3050942420959473, | |
| "loss": 0.9845, | |
| "nll_loss": 0.8839849233627319, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -0.14666223526000977, | |
| "rewards/margins": 0.04910193011164665, | |
| "rewards/rejected": -0.19576415419578552, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 3.330573625073921, | |
| "grad_norm": 5.24590539932251, | |
| "learning_rate": 4.819513128344813e-07, | |
| "log_odds_chosen": 0.4602447748184204, | |
| "log_odds_ratio": -0.5505639314651489, | |
| "logits/chosen": 1.1351033449172974, | |
| "logits/rejected": 0.9407525062561035, | |
| "logps/chosen": -0.9991594552993774, | |
| "logps/rejected": -1.2416499853134155, | |
| "loss": 0.9658, | |
| "nll_loss": 0.8639576435089111, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.14987392723560333, | |
| "rewards/margins": 0.036373574286699295, | |
| "rewards/rejected": -0.18624748289585114, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 3.4819633353045534, | |
| "grad_norm": 6.936483860015869, | |
| "learning_rate": 4.787231442927586e-07, | |
| "log_odds_chosen": 0.5815439820289612, | |
| "log_odds_ratio": -0.5048896074295044, | |
| "logits/chosen": 1.0991628170013428, | |
| "logits/rejected": 0.8964717984199524, | |
| "logps/chosen": -0.9201152920722961, | |
| "logps/rejected": -1.242402195930481, | |
| "loss": 0.9681, | |
| "nll_loss": 0.8501954674720764, | |
| "rewards/accuracies": 0.7890625, | |
| "rewards/chosen": -0.13801729679107666, | |
| "rewards/margins": 0.04834304004907608, | |
| "rewards/rejected": -0.18636034429073334, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 3.6333530455351863, | |
| "grad_norm": 6.671252250671387, | |
| "learning_rate": 4.752422169756047e-07, | |
| "log_odds_chosen": 0.494718998670578, | |
| "log_odds_ratio": -0.5431851148605347, | |
| "logits/chosen": 1.1249089241027832, | |
| "logits/rejected": 1.0157999992370605, | |
| "logps/chosen": -1.0117340087890625, | |
| "logps/rejected": -1.293691635131836, | |
| "loss": 0.978, | |
| "nll_loss": 0.9269427061080933, | |
| "rewards/accuracies": 0.7578125, | |
| "rewards/chosen": -0.15176010131835938, | |
| "rewards/margins": 0.042293645441532135, | |
| "rewards/rejected": -0.1940537393093109, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 3.7847427557658193, | |
| "grad_norm": 7.028476715087891, | |
| "learning_rate": 4.715123776075336e-07, | |
| "log_odds_chosen": 0.5061647891998291, | |
| "log_odds_ratio": -0.5440715551376343, | |
| "logits/chosen": 1.0956813097000122, | |
| "logits/rejected": 0.9653363823890686, | |
| "logps/chosen": -1.0257270336151123, | |
| "logps/rejected": -1.3192400932312012, | |
| "loss": 0.9528, | |
| "nll_loss": 0.8593652844429016, | |
| "rewards/accuracies": 0.75390625, | |
| "rewards/chosen": -0.15385906398296356, | |
| "rewards/margins": 0.044026970863342285, | |
| "rewards/rejected": -0.19788604974746704, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 3.7847427557658193, | |
| "eval_log_odds_chosen": 1.0081945657730103, | |
| "eval_log_odds_ratio": -0.3223256468772888, | |
| "eval_logits/chosen": 0.7514240145683289, | |
| "eval_logits/rejected": 0.6671679615974426, | |
| "eval_logps/chosen": -0.7009862065315247, | |
| "eval_logps/rejected": -1.2981789112091064, | |
| "eval_loss": 0.7884585857391357, | |
| "eval_nll_loss": 0.7162714600563049, | |
| "eval_rewards/accuracies": 1.0, | |
| "eval_rewards/chosen": -0.10514792799949646, | |
| "eval_rewards/margins": 0.08957889676094055, | |
| "eval_rewards/rejected": -0.1947268396615982, | |
| "eval_runtime": 1.7846, | |
| "eval_samples_per_second": 76.767, | |
| "eval_steps_per_second": 10.086, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 3.936132465996452, | |
| "grad_norm": 6.8125834465026855, | |
| "learning_rate": 4.675377479823153e-07, | |
| "log_odds_chosen": 0.6920242309570312, | |
| "log_odds_ratio": -0.4726037383079529, | |
| "logits/chosen": 1.0377426147460938, | |
| "logits/rejected": 0.9017472863197327, | |
| "logps/chosen": -0.922009289264679, | |
| "logps/rejected": -1.3119571208953857, | |
| "loss": 0.9638, | |
| "nll_loss": 0.8638713359832764, | |
| "rewards/accuracies": 0.82421875, | |
| "rewards/chosen": -0.13830138742923737, | |
| "rewards/margins": 0.05849217250943184, | |
| "rewards/rejected": -0.1967935562133789, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 4.087522176227084, | |
| "grad_norm": 6.372574806213379, | |
| "learning_rate": 4.6332272040803887e-07, | |
| "log_odds_chosen": 0.6877175569534302, | |
| "log_odds_ratio": -0.46182751655578613, | |
| "logits/chosen": 1.1110641956329346, | |
| "logits/rejected": 0.9074443578720093, | |
| "logps/chosen": -0.9194135069847107, | |
| "logps/rejected": -1.3282839059829712, | |
| "loss": 0.9604, | |
| "nll_loss": 0.8556405901908875, | |
| "rewards/accuracies": 0.83203125, | |
| "rewards/chosen": -0.13791203498840332, | |
| "rewards/margins": 0.06133056432008743, | |
| "rewards/rejected": -0.19924262166023254, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 4.238911886457718, | |
| "grad_norm": 6.756438255310059, | |
| "learning_rate": 4.588719528532341e-07, | |
| "log_odds_chosen": 0.6642757058143616, | |
| "log_odds_ratio": -0.4779506325721741, | |
| "logits/chosen": 1.0594482421875, | |
| "logits/rejected": 0.9946908354759216, | |
| "logps/chosen": -0.965737521648407, | |
| "logps/rejected": -1.359665870666504, | |
| "loss": 0.954, | |
| "nll_loss": 0.8865021467208862, | |
| "rewards/accuracies": 0.77734375, | |
| "rewards/chosen": -0.1448606252670288, | |
| "rewards/margins": 0.059089258313179016, | |
| "rewards/rejected": -0.20394988358020782, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 4.39030159668835, | |
| "grad_norm": 7.890772342681885, | |
| "learning_rate": 4.5419036379941414e-07, | |
| "log_odds_chosen": 0.7298649549484253, | |
| "log_odds_ratio": -0.4601740837097168, | |
| "logits/chosen": 1.1765400171279907, | |
| "logits/rejected": 0.9228672981262207, | |
| "logps/chosen": -0.9468764066696167, | |
| "logps/rejected": -1.3811423778533936, | |
| "loss": 0.9453, | |
| "nll_loss": 0.8525615930557251, | |
| "rewards/accuracies": 0.8203125, | |
| "rewards/chosen": -0.1420314460992813, | |
| "rewards/margins": 0.06513990461826324, | |
| "rewards/rejected": -0.20717135071754456, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 4.541691306918983, | |
| "grad_norm": 6.184362888336182, | |
| "learning_rate": 4.492831268057306e-07, | |
| "log_odds_chosen": 0.7427738904953003, | |
| "log_odds_ratio": -0.46771693229675293, | |
| "logits/chosen": 1.0225163698196411, | |
| "logits/rejected": 0.9077222943305969, | |
| "logps/chosen": -0.9970439672470093, | |
| "logps/rejected": -1.456943154335022, | |
| "loss": 0.938, | |
| "nll_loss": 0.8868236541748047, | |
| "rewards/accuracies": 0.77734375, | |
| "rewards/chosen": -0.14955660700798035, | |
| "rewards/margins": 0.06898489594459534, | |
| "rewards/rejected": -0.2185414880514145, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 4.693081017149615, | |
| "grad_norm": 6.933351993560791, | |
| "learning_rate": 4.441556647917446e-07, | |
| "log_odds_chosen": 0.8609212636947632, | |
| "log_odds_ratio": -0.43812429904937744, | |
| "logits/chosen": 1.0455502271652222, | |
| "logits/rejected": 0.906272292137146, | |
| "logps/chosen": -0.9208173155784607, | |
| "logps/rejected": -1.457839012145996, | |
| "loss": 0.9434, | |
| "nll_loss": 0.8511086106300354, | |
| "rewards/accuracies": 0.83203125, | |
| "rewards/chosen": -0.13812260329723358, | |
| "rewards/margins": 0.08055327087640762, | |
| "rewards/rejected": -0.2186758816242218, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 4.844470727380249, | |
| "grad_norm": 6.150376796722412, | |
| "learning_rate": 4.3881364404463375e-07, | |
| "log_odds_chosen": 0.9446333050727844, | |
| "log_odds_ratio": -0.4172128438949585, | |
| "logits/chosen": 1.110432744026184, | |
| "logits/rejected": 0.8510321974754333, | |
| "logps/chosen": -0.9673236608505249, | |
| "logps/rejected": -1.5632784366607666, | |
| "loss": 0.9264, | |
| "nll_loss": 0.8780388832092285, | |
| "rewards/accuracies": 0.81640625, | |
| "rewards/chosen": -0.14509856700897217, | |
| "rewards/margins": 0.08939322084188461, | |
| "rewards/rejected": -0.234491765499115, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 4.995860437610881, | |
| "grad_norm": 5.648180961608887, | |
| "learning_rate": 4.332629679574565e-07, | |
| "log_odds_chosen": 0.9642012715339661, | |
| "log_odds_ratio": -0.42083150148391724, | |
| "logits/chosen": 1.0487498044967651, | |
| "logits/rejected": 0.8362730741500854, | |
| "logps/chosen": -0.9618784189224243, | |
| "logps/rejected": -1.5873997211456299, | |
| "loss": 0.925, | |
| "nll_loss": 0.8492802381515503, | |
| "rewards/accuracies": 0.8203125, | |
| "rewards/chosen": -0.1442817747592926, | |
| "rewards/margins": 0.09382818639278412, | |
| "rewards/rejected": -0.23810997605323792, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 5.147250147841514, | |
| "grad_norm": 6.209354400634766, | |
| "learning_rate": 4.2750977050539503e-07, | |
| "log_odds_chosen": 1.127962350845337, | |
| "log_odds_ratio": -0.3810023367404938, | |
| "logits/chosen": 0.9537469148635864, | |
| "logits/rejected": 0.8183348178863525, | |
| "logps/chosen": -0.9366539120674133, | |
| "logps/rejected": -1.6753818988800049, | |
| "loss": 0.9233, | |
| "nll_loss": 0.8444766998291016, | |
| "rewards/accuracies": 0.828125, | |
| "rewards/chosen": -0.140498086810112, | |
| "rewards/margins": 0.11080917716026306, | |
| "rewards/rejected": -0.25130727887153625, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 5.298639858072146, | |
| "grad_norm": 6.09550666809082, | |
| "learning_rate": 4.2156040946718343e-07, | |
| "log_odds_chosen": 1.1001328229904175, | |
| "log_odds_ratio": -0.40834715962409973, | |
| "logits/chosen": 0.9538164734840393, | |
| "logits/rejected": 0.8565899133682251, | |
| "logps/chosen": -0.9979989528656006, | |
| "logps/rejected": -1.736957311630249, | |
| "loss": 0.9264, | |
| "nll_loss": 0.8773810863494873, | |
| "rewards/accuracies": 0.8203125, | |
| "rewards/chosen": -0.1496998369693756, | |
| "rewards/margins": 0.11084374785423279, | |
| "rewards/rejected": -0.2605435848236084, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 5.45002956830278, | |
| "grad_norm": 5.165525913238525, | |
| "learning_rate": 4.154214593992149e-07, | |
| "log_odds_chosen": 1.4560502767562866, | |
| "log_odds_ratio": -0.36019906401634216, | |
| "logits/chosen": 1.0402196645736694, | |
| "logits/rejected": 0.8284226655960083, | |
| "logps/chosen": -0.9208565950393677, | |
| "logps/rejected": -1.9513992071151733, | |
| "loss": 0.9184, | |
| "nll_loss": 0.8742519617080688, | |
| "rewards/accuracies": 0.83984375, | |
| "rewards/chosen": -0.13812850415706635, | |
| "rewards/margins": 0.15458139777183533, | |
| "rewards/rejected": -0.2927098870277405, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 5.601419278533412, | |
| "grad_norm": 5.565188407897949, | |
| "learning_rate": 4.090997043700909e-07, | |
| "log_odds_chosen": 1.8058509826660156, | |
| "log_odds_ratio": -0.34831157326698303, | |
| "logits/chosen": 0.9435930252075195, | |
| "logits/rejected": 0.7780628204345703, | |
| "logps/chosen": -0.9824676513671875, | |
| "logps/rejected": -2.3647959232330322, | |
| "loss": 0.915, | |
| "nll_loss": 0.9026926159858704, | |
| "rewards/accuracies": 0.828125, | |
| "rewards/chosen": -0.1473701447248459, | |
| "rewards/margins": 0.2073492407798767, | |
| "rewards/rejected": -0.3547194004058838, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 5.677114133648729, | |
| "eval_log_odds_chosen": 1.515297532081604, | |
| "eval_log_odds_ratio": -0.21902640163898468, | |
| "eval_logits/chosen": 0.5878681540489197, | |
| "eval_logits/rejected": 0.514284610748291, | |
| "eval_logps/chosen": -0.6912536025047302, | |
| "eval_logps/rejected": -1.6357617378234863, | |
| "eval_loss": 0.7435688972473145, | |
| "eval_nll_loss": 0.6848150491714478, | |
| "eval_rewards/accuracies": 1.0, | |
| "eval_rewards/chosen": -0.10368803888559341, | |
| "eval_rewards/margins": 0.14167624711990356, | |
| "eval_rewards/rejected": -0.2453642636537552, | |
| "eval_runtime": 1.7754, | |
| "eval_samples_per_second": 77.164, | |
| "eval_steps_per_second": 10.138, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 5.752808988764045, | |
| "grad_norm": 5.417468547821045, | |
| "learning_rate": 4.0260213046364076e-07, | |
| "log_odds_chosen": 2.085019111633301, | |
| "log_odds_ratio": -0.31234151124954224, | |
| "logits/chosen": 0.9959389567375183, | |
| "logits/rejected": 0.7742877006530762, | |
| "logps/chosen": -0.9441136121749878, | |
| "logps/rejected": -2.5619935989379883, | |
| "loss": 0.9004, | |
| "nll_loss": 0.8535679578781128, | |
| "rewards/accuracies": 0.8359375, | |
| "rewards/chosen": -0.1416170299053192, | |
| "rewards/margins": 0.24268200993537903, | |
| "rewards/rejected": -0.38429906964302063, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 5.904198698994678, | |
| "grad_norm": 7.9577178955078125, | |
| "learning_rate": 3.959359180586975e-07, | |
| "log_odds_chosen": 2.5801219940185547, | |
| "log_odds_ratio": -0.31198883056640625, | |
| "logits/chosen": 0.9304694533348083, | |
| "logits/rejected": 0.7099679112434387, | |
| "logps/chosen": -1.004030466079712, | |
| "logps/rejected": -3.1341824531555176, | |
| "loss": 0.9199, | |
| "nll_loss": 0.888052225112915, | |
| "rewards/accuracies": 0.8515625, | |
| "rewards/chosen": -0.15060456097126007, | |
| "rewards/margins": 0.3195228576660156, | |
| "rewards/rejected": -0.4701274335384369, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 6.055588409225311, | |
| "grad_norm": 7.205864906311035, | |
| "learning_rate": 3.891084338941603e-07, | |
| "log_odds_chosen": 2.632976770401001, | |
| "log_odds_ratio": -0.33525609970092773, | |
| "logits/chosen": 0.8941175937652588, | |
| "logits/rejected": 0.712418794631958, | |
| "logps/chosen": -0.9687196016311646, | |
| "logps/rejected": -3.1566426753997803, | |
| "loss": 0.9156, | |
| "nll_loss": 0.8564908504486084, | |
| "rewards/accuracies": 0.84765625, | |
| "rewards/chosen": -0.14530794322490692, | |
| "rewards/margins": 0.3281884789466858, | |
| "rewards/rejected": -0.4734964370727539, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 6.206978119455943, | |
| "grad_norm": 5.557631015777588, | |
| "learning_rate": 3.8212722292811383e-07, | |
| "log_odds_chosen": 2.9796371459960938, | |
| "log_odds_ratio": -0.3066112995147705, | |
| "logits/chosen": 0.9908494353294373, | |
| "logits/rejected": 0.7001262903213501, | |
| "logps/chosen": -0.9531494975090027, | |
| "logps/rejected": -3.451514720916748, | |
| "loss": 0.8945, | |
| "nll_loss": 0.8682339191436768, | |
| "rewards/accuracies": 0.83203125, | |
| "rewards/chosen": -0.1429724246263504, | |
| "rewards/margins": 0.3747548460960388, | |
| "rewards/rejected": -0.517727255821228, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 6.358367829686576, | |
| "grad_norm": 7.154934883117676, | |
| "learning_rate": 3.75e-07, | |
| "log_odds_chosen": 3.150144338607788, | |
| "log_odds_ratio": -0.30607521533966064, | |
| "logits/chosen": 0.8532112240791321, | |
| "logits/rejected": 0.688872754573822, | |
| "logps/chosen": -0.9391156435012817, | |
| "logps/rejected": -3.6156792640686035, | |
| "loss": 0.9013, | |
| "nll_loss": 0.8500258922576904, | |
| "rewards/accuracies": 0.84765625, | |
| "rewards/chosen": -0.14086736738681793, | |
| "rewards/margins": 0.40148457884788513, | |
| "rewards/rejected": -0.5423519611358643, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 6.509757539917208, | |
| "grad_norm": 5.582004070281982, | |
| "learning_rate": 3.67734641305055e-07, | |
| "log_odds_chosen": 3.518749475479126, | |
| "log_odds_ratio": -0.26204630732536316, | |
| "logits/chosen": 1.0231519937515259, | |
| "logits/rejected": 0.6429997682571411, | |
| "logps/chosen": -0.8629344701766968, | |
| "logps/rejected": -3.8456180095672607, | |
| "loss": 0.9023, | |
| "nll_loss": 0.801094114780426, | |
| "rewards/accuracies": 0.8671875, | |
| "rewards/chosen": -0.12944017350673676, | |
| "rewards/margins": 0.4474025368690491, | |
| "rewards/rejected": -0.576842725276947, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 6.661147250147842, | |
| "grad_norm": 6.333008766174316, | |
| "learning_rate": 3.6033917569043597e-07, | |
| "log_odds_chosen": 3.5107364654541016, | |
| "log_odds_ratio": -0.25962206721305847, | |
| "logits/chosen": 0.9015189409255981, | |
| "logits/rejected": 0.604630172252655, | |
| "logps/chosen": -0.9094609618186951, | |
| "logps/rejected": -3.901463270187378, | |
| "loss": 0.8982, | |
| "nll_loss": 0.8305466175079346, | |
| "rewards/accuracies": 0.87109375, | |
| "rewards/chosen": -0.1364191472530365, | |
| "rewards/margins": 0.44880032539367676, | |
| "rewards/rejected": -0.5852195024490356, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 6.812536960378474, | |
| "grad_norm": 9.112639427185059, | |
| "learning_rate": 3.528217757826529e-07, | |
| "log_odds_chosen": 3.822404384613037, | |
| "log_odds_ratio": -0.28991812467575073, | |
| "logits/chosen": 0.9384167790412903, | |
| "logits/rejected": 0.6002436280250549, | |
| "logps/chosen": -0.966259241104126, | |
| "logps/rejected": -4.308917999267578, | |
| "loss": 0.899, | |
| "nll_loss": 0.8516695499420166, | |
| "rewards/accuracies": 0.83203125, | |
| "rewards/chosen": -0.1449388712644577, | |
| "rewards/margins": 0.5013989210128784, | |
| "rewards/rejected": -0.6463377475738525, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 6.963926670609107, | |
| "grad_norm": 7.574125289916992, | |
| "learning_rate": 3.4519074895611236e-07, | |
| "log_odds_chosen": 3.943324327468872, | |
| "log_odds_ratio": -0.26691746711730957, | |
| "logits/chosen": 0.9103025197982788, | |
| "logits/rejected": 0.6238164901733398, | |
| "logps/chosen": -0.8985946774482727, | |
| "logps/rejected": -4.343371391296387, | |
| "loss": 0.8962, | |
| "nll_loss": 0.8179515600204468, | |
| "rewards/accuracies": 0.8984375, | |
| "rewards/chosen": -0.13478921353816986, | |
| "rewards/margins": 0.516716480255127, | |
| "rewards/rejected": -0.651505708694458, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 7.11531638083974, | |
| "grad_norm": 6.7364115715026855, | |
| "learning_rate": 3.374545281527537e-07, | |
| "log_odds_chosen": 4.374906539916992, | |
| "log_odds_ratio": -0.2600148320198059, | |
| "logits/chosen": 0.9600415229797363, | |
| "logits/rejected": 0.6132468581199646, | |
| "logps/chosen": -0.9232965707778931, | |
| "logps/rejected": -4.797858715057373, | |
| "loss": 0.8895, | |
| "nll_loss": 0.8346379995346069, | |
| "rewards/accuracies": 0.87109375, | |
| "rewards/chosen": -0.13849450647830963, | |
| "rewards/margins": 0.5811843872070312, | |
| "rewards/rejected": -0.7196788787841797, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 7.266706091070373, | |
| "grad_norm": 8.94677448272705, | |
| "learning_rate": 3.296216625629211e-07, | |
| "log_odds_chosen": 3.412320375442505, | |
| "log_odds_ratio": -0.2966606616973877, | |
| "logits/chosen": 0.9029962420463562, | |
| "logits/rejected": 0.6578757762908936, | |
| "logps/chosen": -0.9623314738273621, | |
| "logps/rejected": -3.89890193939209, | |
| "loss": 0.8925, | |
| "nll_loss": 0.8433880805969238, | |
| "rewards/accuracies": 0.8671875, | |
| "rewards/chosen": -0.14434972405433655, | |
| "rewards/margins": 0.4404855966567993, | |
| "rewards/rejected": -0.5848353505134583, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 7.418095801301005, | |
| "grad_norm": 5.7957353591918945, | |
| "learning_rate": 3.2170080817777257e-07, | |
| "log_odds_chosen": 4.052781581878662, | |
| "log_odds_ratio": -0.2798649072647095, | |
| "logits/chosen": 0.9068763256072998, | |
| "logits/rejected": 0.6250233054161072, | |
| "logps/chosen": -0.9688931107521057, | |
| "logps/rejected": -4.529140472412109, | |
| "loss": 0.9004, | |
| "nll_loss": 0.8530284762382507, | |
| "rewards/accuracies": 0.859375, | |
| "rewards/chosen": -0.14533399045467377, | |
| "rewards/margins": 0.534037172794342, | |
| "rewards/rejected": -0.6793711185455322, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 7.569485511531638, | |
| "grad_norm": 6.567281723022461, | |
| "learning_rate": 3.137007182236637e-07, | |
| "log_odds_chosen": 3.9496092796325684, | |
| "log_odds_ratio": -0.25981855392456055, | |
| "logits/chosen": 0.9131721258163452, | |
| "logits/rejected": 0.6535216569900513, | |
| "logps/chosen": -0.9185097813606262, | |
| "logps/rejected": -4.3686933517456055, | |
| "loss": 0.892, | |
| "nll_loss": 0.8527241945266724, | |
| "rewards/accuracies": 0.87890625, | |
| "rewards/chosen": -0.1377764791250229, | |
| "rewards/margins": 0.5175275206565857, | |
| "rewards/rejected": -0.655303955078125, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 7.569485511531638, | |
| "eval_log_odds_chosen": 1.6673216819763184, | |
| "eval_log_odds_ratio": -0.19718672335147858, | |
| "eval_logits/chosen": 0.5625311136245728, | |
| "eval_logits/rejected": 0.4984322190284729, | |
| "eval_logps/chosen": -0.6667929887771606, | |
| "eval_logps/rejected": -1.6969513893127441, | |
| "eval_loss": 0.7337117195129395, | |
| "eval_nll_loss": 0.6776795387268066, | |
| "eval_rewards/accuracies": 1.0, | |
| "eval_rewards/chosen": -0.1000189557671547, | |
| "eval_rewards/margins": 0.15452374517917633, | |
| "eval_rewards/rejected": -0.2545427083969116, | |
| "eval_runtime": 1.7831, | |
| "eval_samples_per_second": 76.831, | |
| "eval_steps_per_second": 10.095, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 7.720875221762271, | |
| "grad_norm": 8.987198829650879, | |
| "learning_rate": 3.056302334890786e-07, | |
| "log_odds_chosen": 3.926710844039917, | |
| "log_odds_ratio": -0.2714899480342865, | |
| "logits/chosen": 0.9191571474075317, | |
| "logits/rejected": 0.5953992605209351, | |
| "logps/chosen": -0.9368714094161987, | |
| "logps/rejected": -4.384879112243652, | |
| "loss": 0.888, | |
| "nll_loss": 0.8406177759170532, | |
| "rewards/accuracies": 0.8671875, | |
| "rewards/chosen": -0.14053073525428772, | |
| "rewards/margins": 0.5172011852264404, | |
| "rewards/rejected": -0.6577318906784058, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 7.872264931992904, | |
| "grad_norm": 5.6181230545043945, | |
| "learning_rate": 2.974982725547975e-07, | |
| "log_odds_chosen": 3.617192506790161, | |
| "log_odds_ratio": -0.32495206594467163, | |
| "logits/chosen": 0.8457501530647278, | |
| "logits/rejected": 0.6253533363342285, | |
| "logps/chosen": -1.0049875974655151, | |
| "logps/rejected": -4.156848907470703, | |
| "loss": 0.8977, | |
| "nll_loss": 0.8724310994148254, | |
| "rewards/accuracies": 0.85546875, | |
| "rewards/chosen": -0.1507481336593628, | |
| "rewards/margins": 0.47277915477752686, | |
| "rewards/rejected": -0.6235272884368896, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 8.023654642223537, | |
| "grad_norm": 5.31005334854126, | |
| "learning_rate": 2.893138219380963e-07, | |
| "log_odds_chosen": 4.234038829803467, | |
| "log_odds_ratio": -0.30224624276161194, | |
| "logits/chosen": 0.922171950340271, | |
| "logits/rejected": 0.5847682952880859, | |
| "logps/chosen": -0.9686514139175415, | |
| "logps/rejected": -4.734119892120361, | |
| "loss": 0.8864, | |
| "nll_loss": 0.8605988025665283, | |
| "rewards/accuracies": 0.859375, | |
| "rewards/chosen": -0.14529772102832794, | |
| "rewards/margins": 0.5648203492164612, | |
| "rewards/rejected": -0.7101180553436279, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 8.175044352454169, | |
| "grad_norm": 4.773166179656982, | |
| "learning_rate": 2.810859261618713e-07, | |
| "log_odds_chosen": 4.176573753356934, | |
| "log_odds_ratio": -0.2747136056423187, | |
| "logits/chosen": 0.9669155478477478, | |
| "logits/rejected": 0.6131560206413269, | |
| "logps/chosen": -0.941318690776825, | |
| "logps/rejected": -4.627261638641357, | |
| "loss": 0.8908, | |
| "nll_loss": 0.8429233431816101, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -0.1411978155374527, | |
| "rewards/margins": 0.5528914928436279, | |
| "rewards/rejected": -0.6940892934799194, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 8.326434062684802, | |
| "grad_norm": 7.928328990936279, | |
| "learning_rate": 2.728236777596621e-07, | |
| "log_odds_chosen": 4.232769012451172, | |
| "log_odds_ratio": -0.2622223496437073, | |
| "logits/chosen": 0.8704826831817627, | |
| "logits/rejected": 0.6309795379638672, | |
| "logps/chosen": -0.9345431327819824, | |
| "logps/rejected": -4.6482343673706055, | |
| "loss": 0.8856, | |
| "nll_loss": 0.849586009979248, | |
| "rewards/accuracies": 0.87109375, | |
| "rewards/chosen": -0.14018146693706512, | |
| "rewards/margins": 0.5570536851882935, | |
| "rewards/rejected": -0.6972352266311646, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 8.477823772915436, | |
| "grad_norm": 7.50920295715332, | |
| "learning_rate": 2.6453620722761895e-07, | |
| "log_odds_chosen": 3.835066795349121, | |
| "log_odds_ratio": -0.2713623344898224, | |
| "logits/chosen": 0.8469685316085815, | |
| "logits/rejected": 0.5746083855628967, | |
| "logps/chosen": -0.9510048031806946, | |
| "logps/rejected": -4.287370204925537, | |
| "loss": 0.9005, | |
| "nll_loss": 0.8307653069496155, | |
| "rewards/accuracies": 0.8828125, | |
| "rewards/chosen": -0.14265072345733643, | |
| "rewards/margins": 0.5004547238349915, | |
| "rewards/rejected": -0.6431055068969727, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 8.629213483146067, | |
| "grad_norm": 4.420612812042236, | |
| "learning_rate": 2.5623267293451823e-07, | |
| "log_odds_chosen": 4.375966548919678, | |
| "log_odds_ratio": -0.26864683628082275, | |
| "logits/chosen": 0.8368352055549622, | |
| "logits/rejected": 0.5574530959129333, | |
| "logps/chosen": -0.9161982536315918, | |
| "logps/rejected": -4.765947341918945, | |
| "loss": 0.8711, | |
| "nll_loss": 0.8084649443626404, | |
| "rewards/accuracies": 0.890625, | |
| "rewards/chosen": -0.13742974400520325, | |
| "rewards/margins": 0.577462375164032, | |
| "rewards/rejected": -0.7148921489715576, | |
| "step": 1824 | |
| }, | |
| { | |
| "epoch": 8.7806031933767, | |
| "grad_norm": 5.372297286987305, | |
| "learning_rate": 2.4792225100097575e-07, | |
| "log_odds_chosen": 4.036057472229004, | |
| "log_odds_ratio": -0.28991392254829407, | |
| "logits/chosen": 0.8479549884796143, | |
| "logits/rejected": 0.6130175590515137, | |
| "logps/chosen": -0.996108889579773, | |
| "logps/rejected": -4.58188009262085, | |
| "loss": 0.8868, | |
| "nll_loss": 0.8779551386833191, | |
| "rewards/accuracies": 0.8671875, | |
| "rewards/chosen": -0.14941634237766266, | |
| "rewards/margins": 0.5378656387329102, | |
| "rewards/rejected": -0.6872820258140564, | |
| "step": 1856 | |
| }, | |
| { | |
| "epoch": 8.931992903607332, | |
| "grad_norm": 7.286854267120361, | |
| "learning_rate": 2.3961412515904335e-07, | |
| "log_odds_chosen": 4.6014862060546875, | |
| "log_odds_ratio": -0.23827242851257324, | |
| "logits/chosen": 0.8562659621238708, | |
| "logits/rejected": 0.5500348806381226, | |
| "logps/chosen": -0.8830623030662537, | |
| "logps/rejected": -4.95844841003418, | |
| "loss": 0.8922, | |
| "nll_loss": 0.8241187930107117, | |
| "rewards/accuracies": 0.91796875, | |
| "rewards/chosen": -0.132459357380867, | |
| "rewards/margins": 0.6113079190254211, | |
| "rewards/rejected": -0.743767261505127, | |
| "step": 1888 | |
| }, | |
| { | |
| "epoch": 9.083382613837966, | |
| "grad_norm": 5.4477949142456055, | |
| "learning_rate": 2.3131747660339394e-07, | |
| "log_odds_chosen": 4.262630939483643, | |
| "log_odds_ratio": -0.25891953706741333, | |
| "logits/chosen": 0.7675349712371826, | |
| "logits/rejected": 0.51315838098526, | |
| "logps/chosen": -0.9374942779541016, | |
| "logps/rejected": -4.686108589172363, | |
| "loss": 0.8815, | |
| "nll_loss": 0.8197700381278992, | |
| "rewards/accuracies": 0.91015625, | |
| "rewards/chosen": -0.14062415063381195, | |
| "rewards/margins": 0.562292218208313, | |
| "rewards/rejected": -0.7029163837432861, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 9.234772324068599, | |
| "grad_norm": 5.214437484741211, | |
| "learning_rate": 2.2304147384531036e-07, | |
| "log_odds_chosen": 4.728519439697266, | |
| "log_odds_ratio": -0.26717641949653625, | |
| "logits/chosen": 0.8268774747848511, | |
| "logits/rejected": 0.5454421639442444, | |
| "logps/chosen": -0.9330585598945618, | |
| "logps/rejected": -5.14246129989624, | |
| "loss": 0.8819, | |
| "nll_loss": 0.8327500820159912, | |
| "rewards/accuracies": 0.86328125, | |
| "rewards/chosen": -0.13995879888534546, | |
| "rewards/margins": 0.6314104795455933, | |
| "rewards/rejected": -0.771369218826294, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 9.38616203429923, | |
| "grad_norm": 6.283268928527832, | |
| "learning_rate": 2.1479526258069083e-07, | |
| "log_odds_chosen": 4.715708255767822, | |
| "log_odds_ratio": -0.24205940961837769, | |
| "logits/chosen": 0.920197069644928, | |
| "logits/rejected": 0.5509434342384338, | |
| "logps/chosen": -0.9300947189331055, | |
| "logps/rejected": -5.131900787353516, | |
| "loss": 0.8768, | |
| "nll_loss": 0.8383646011352539, | |
| "rewards/accuracies": 0.87890625, | |
| "rewards/chosen": -0.13951420783996582, | |
| "rewards/margins": 0.6302710175514221, | |
| "rewards/rejected": -0.7697851657867432, | |
| "step": 1984 | |
| }, | |
| { | |
| "epoch": 9.461856889414548, | |
| "eval_log_odds_chosen": 1.7867234945297241, | |
| "eval_log_odds_ratio": -0.18122754991054535, | |
| "eval_logits/chosen": 0.5212496519088745, | |
| "eval_logits/rejected": 0.4629932940006256, | |
| "eval_logps/chosen": -0.6597533226013184, | |
| "eval_logps/rejected": -1.7732388973236084, | |
| "eval_loss": 0.7272647619247437, | |
| "eval_nll_loss": 0.6736801266670227, | |
| "eval_rewards/accuracies": 1.0, | |
| "eval_rewards/chosen": -0.09896300733089447, | |
| "eval_rewards/margins": 0.16702282428741455, | |
| "eval_rewards/rejected": -0.2659858167171478, | |
| "eval_runtime": 1.766, | |
| "eval_samples_per_second": 77.575, | |
| "eval_steps_per_second": 10.192, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 9.537551744529864, | |
| "grad_norm": 4.955827713012695, | |
| "learning_rate": 2.065879555832674e-07, | |
| "log_odds_chosen": 4.195652008056641, | |
| "log_odds_ratio": -0.25658515095710754, | |
| "logits/chosen": 0.8670744895935059, | |
| "logits/rejected": 0.6037735939025879, | |
| "logps/chosen": -0.9333707094192505, | |
| "logps/rejected": -4.614899158477783, | |
| "loss": 0.8862, | |
| "nll_loss": 0.8495485782623291, | |
| "rewards/accuracies": 0.87109375, | |
| "rewards/chosen": -0.14000560343265533, | |
| "rewards/margins": 0.5522292852401733, | |
| "rewards/rejected": -0.6922348737716675, | |
| "step": 2016 | |
| }, | |
| { | |
| "epoch": 9.688941454760498, | |
| "grad_norm": 5.796345233917236, | |
| "learning_rate": 1.984286226342056e-07, | |
| "log_odds_chosen": 4.511747360229492, | |
| "log_odds_ratio": -0.27976271510124207, | |
| "logits/chosen": 0.8292367458343506, | |
| "logits/rejected": 0.5408206582069397, | |
| "logps/chosen": -0.9318006038665771, | |
| "logps/rejected": -4.940333843231201, | |
| "loss": 0.8937, | |
| "nll_loss": 0.824824869632721, | |
| "rewards/accuracies": 0.86328125, | |
| "rewards/chosen": -0.13977007567882538, | |
| "rewards/margins": 0.6012800931930542, | |
| "rewards/rejected": -0.7410501837730408, | |
| "step": 2048 | |
| }, | |
| { | |
| "epoch": 9.84033116499113, | |
| "grad_norm": 5.052858352661133, | |
| "learning_rate": 1.9032628049921556e-07, | |
| "log_odds_chosen": 4.3274006843566895, | |
| "log_odds_ratio": -0.2658219337463379, | |
| "logits/chosen": 0.7942694425582886, | |
| "logits/rejected": 0.5170871615409851, | |
| "logps/chosen": -0.9714781045913696, | |
| "logps/rejected": -4.801671028137207, | |
| "loss": 0.896, | |
| "nll_loss": 0.8439369201660156, | |
| "rewards/accuracies": 0.8828125, | |
| "rewards/chosen": -0.14572171866893768, | |
| "rewards/margins": 0.5745289325714111, | |
| "rewards/rejected": -0.72025066614151, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 9.991720875221763, | |
| "grad_norm": 5.781661510467529, | |
| "learning_rate": 1.8228988296424875e-07, | |
| "log_odds_chosen": 4.903880596160889, | |
| "log_odds_ratio": -0.24629831314086914, | |
| "logits/chosen": 0.888929009437561, | |
| "logits/rejected": 0.5151562690734863, | |
| "logps/chosen": -0.9433965682983398, | |
| "logps/rejected": -5.327086925506592, | |
| "loss": 0.8761, | |
| "nll_loss": 0.8339080214500427, | |
| "rewards/accuracies": 0.8828125, | |
| "rewards/chosen": -0.1415095031261444, | |
| "rewards/margins": 0.6575536131858826, | |
| "rewards/rejected": -0.7990630865097046, | |
| "step": 2112 | |
| }, | |
| { | |
| "epoch": 10.143110585452394, | |
| "grad_norm": 8.988626480102539, | |
| "learning_rate": 1.7432831094079352e-07, | |
| "log_odds_chosen": 4.3950042724609375, | |
| "log_odds_ratio": -0.28862205147743225, | |
| "logits/chosen": 0.8134148120880127, | |
| "logits/rejected": 0.5847084522247314, | |
| "logps/chosen": -1.0293586254119873, | |
| "logps/rejected": -4.959186553955078, | |
| "loss": 0.8813, | |
| "nll_loss": 0.8774588108062744, | |
| "rewards/accuracies": 0.85546875, | |
| "rewards/chosen": -0.15440379083156586, | |
| "rewards/margins": 0.5894742608070374, | |
| "rewards/rejected": -0.7438780069351196, | |
| "step": 2144 | |
| }, | |
| { | |
| "epoch": 10.294500295683028, | |
| "grad_norm": 5.275697231292725, | |
| "learning_rate": 1.6645036265170313e-07, | |
| "log_odds_chosen": 5.46366548538208, | |
| "log_odds_ratio": -0.27606436610221863, | |
| "logits/chosen": 0.8438766598701477, | |
| "logits/rejected": 0.5199805498123169, | |
| "logps/chosen": -0.9803435802459717, | |
| "logps/rejected": -5.95693826675415, | |
| "loss": 0.8932, | |
| "nll_loss": 0.8328185677528381, | |
| "rewards/accuracies": 0.86328125, | |
| "rewards/chosen": -0.14705155789852142, | |
| "rewards/margins": 0.7464891076087952, | |
| "rewards/rejected": -0.8935407400131226, | |
| "step": 2176 | |
| }, | |
| { | |
| "epoch": 10.445890005913661, | |
| "grad_norm": 6.923160552978516, | |
| "learning_rate": 1.5866474390840124e-07, | |
| "log_odds_chosen": 4.528408050537109, | |
| "log_odds_ratio": -0.25843387842178345, | |
| "logits/chosen": 0.8353314399719238, | |
| "logits/rejected": 0.5368306636810303, | |
| "logps/chosen": -0.9630373120307922, | |
| "logps/rejected": -4.988365173339844, | |
| "loss": 0.887, | |
| "nll_loss": 0.855586051940918, | |
| "rewards/accuracies": 0.8984375, | |
| "rewards/chosen": -0.14445561170578003, | |
| "rewards/margins": 0.6037992238998413, | |
| "rewards/rejected": -0.7482547760009766, | |
| "step": 2208 | |
| }, | |
| { | |
| "epoch": 10.597279716144293, | |
| "grad_norm": 4.630692005157471, | |
| "learning_rate": 1.5098005849021078e-07, | |
| "log_odds_chosen": 4.724957466125488, | |
| "log_odds_ratio": -0.2811046242713928, | |
| "logits/chosen": 0.858523428440094, | |
| "logits/rejected": 0.5616721510887146, | |
| "logps/chosen": -0.9630488753318787, | |
| "logps/rejected": -5.188055038452148, | |
| "loss": 0.8694, | |
| "nll_loss": 0.858130693435669, | |
| "rewards/accuracies": 0.86328125, | |
| "rewards/chosen": -0.14445732533931732, | |
| "rewards/margins": 0.6337509155273438, | |
| "rewards/rejected": -0.778208315372467, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 10.748669426374926, | |
| "grad_norm": 6.591275215148926, | |
| "learning_rate": 1.4340479863643656e-07, | |
| "log_odds_chosen": 4.770135402679443, | |
| "log_odds_ratio": -0.2736424207687378, | |
| "logits/chosen": 0.7936345934867859, | |
| "logits/rejected": 0.5366979837417603, | |
| "logps/chosen": -0.9466649889945984, | |
| "logps/rejected": -5.203468322753906, | |
| "loss": 0.8882, | |
| "nll_loss": 0.8353475332260132, | |
| "rewards/accuracies": 0.85546875, | |
| "rewards/chosen": -0.141999751329422, | |
| "rewards/margins": 0.6385205984115601, | |
| "rewards/rejected": -0.7805203795433044, | |
| "step": 2272 | |
| }, | |
| { | |
| "epoch": 10.90005913660556, | |
| "grad_norm": 5.084187984466553, | |
| "learning_rate": 1.3594733566170925e-07, | |
| "log_odds_chosen": 4.994205474853516, | |
| "log_odds_ratio": -0.30068373680114746, | |
| "logits/chosen": 0.8074924945831299, | |
| "logits/rejected": 0.522384524345398, | |
| "logps/chosen": -0.9593102335929871, | |
| "logps/rejected": -5.447037220001221, | |
| "loss": 0.8834, | |
| "nll_loss": 0.8404646515846252, | |
| "rewards/accuracies": 0.8203125, | |
| "rewards/chosen": -0.14389653503894806, | |
| "rewards/margins": 0.673159122467041, | |
| "rewards/rejected": -0.8170557022094727, | |
| "step": 2304 | |
| }, | |
| { | |
| "epoch": 11.051448846836191, | |
| "grad_norm": 5.465320110321045, | |
| "learning_rate": 1.2861591070496192e-07, | |
| "log_odds_chosen": 4.723004341125488, | |
| "log_odds_ratio": -0.25821179151535034, | |
| "logits/chosen": 0.8570014238357544, | |
| "logits/rejected": 0.5345165133476257, | |
| "logps/chosen": -0.9341294765472412, | |
| "logps/rejected": -5.139418601989746, | |
| "loss": 0.8586, | |
| "nll_loss": 0.8409022092819214, | |
| "rewards/accuracies": 0.89453125, | |
| "rewards/chosen": -0.14011943340301514, | |
| "rewards/margins": 0.6307933330535889, | |
| "rewards/rejected": -0.770912766456604, | |
| "step": 2336 | |
| }, | |
| { | |
| "epoch": 11.202838557066825, | |
| "grad_norm": 5.109860420227051, | |
| "learning_rate": 1.2141862562226164e-07, | |
| "log_odds_chosen": 4.454768180847168, | |
| "log_odds_ratio": -0.24100762605667114, | |
| "logits/chosen": 0.7960795760154724, | |
| "logits/rejected": 0.511499285697937, | |
| "logps/chosen": -0.9228134751319885, | |
| "logps/rejected": -4.836323261260986, | |
| "loss": 0.8823, | |
| "nll_loss": 0.8171857595443726, | |
| "rewards/accuracies": 0.8984375, | |
| "rewards/chosen": -0.13842202723026276, | |
| "rewards/margins": 0.5870264172554016, | |
| "rewards/rejected": -0.725448489189148, | |
| "step": 2368 | |
| }, | |
| { | |
| "epoch": 11.354228267297458, | |
| "grad_norm": 4.377430438995361, | |
| "learning_rate": 1.1436343403356016e-07, | |
| "log_odds_chosen": 4.902271270751953, | |
| "log_odds_ratio": -0.24684929847717285, | |
| "logits/chosen": 0.8083094358444214, | |
| "logits/rejected": 0.5171899199485779, | |
| "logps/chosen": -0.9131155610084534, | |
| "logps/rejected": -5.286437034606934, | |
| "loss": 0.8823, | |
| "nll_loss": 0.8163360953330994, | |
| "rewards/accuracies": 0.89453125, | |
| "rewards/chosen": -0.13696734607219696, | |
| "rewards/margins": 0.6559982299804688, | |
| "rewards/rejected": -0.7929655313491821, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 11.354228267297458, | |
| "eval_log_odds_chosen": 1.8271435499191284, | |
| "eval_log_odds_ratio": -0.17579954862594604, | |
| "eval_logits/chosen": 0.49480167031288147, | |
| "eval_logits/rejected": 0.437588095664978, | |
| "eval_logps/chosen": -0.657990038394928, | |
| "eval_logps/rejected": -1.80092453956604, | |
| "eval_loss": 0.7247140407562256, | |
| "eval_nll_loss": 0.6719491481781006, | |
| "eval_rewards/accuracies": 1.0, | |
| "eval_rewards/chosen": -0.09869851171970367, | |
| "eval_rewards/margins": 0.1714402139186859, | |
| "eval_rewards/rejected": -0.2701387107372284, | |
| "eval_runtime": 1.7829, | |
| "eval_samples_per_second": 76.839, | |
| "eval_steps_per_second": 10.096, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 11.50561797752809, | |
| "grad_norm": 6.997631072998047, | |
| "learning_rate": 1.0745813253325956e-07, | |
| "log_odds_chosen": 4.850367069244385, | |
| "log_odds_ratio": -0.23768070340156555, | |
| "logits/chosen": 0.8919247984886169, | |
| "logits/rejected": 0.5253655910491943, | |
| "logps/chosen": -0.9427354335784912, | |
| "logps/rejected": -5.268039226531982, | |
| "loss": 0.8907, | |
| "nll_loss": 0.8325998783111572, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": -0.14141032099723816, | |
| "rewards/margins": 0.6487956643104553, | |
| "rewards/rejected": -0.7902059555053711, | |
| "step": 2432 | |
| }, | |
| { | |
| "epoch": 11.657007687758723, | |
| "grad_norm": 6.977694988250732, | |
| "learning_rate": 1.007103520743035e-07, | |
| "log_odds_chosen": 4.430591106414795, | |
| "log_odds_ratio": -0.2736847698688507, | |
| "logits/chosen": 0.772072434425354, | |
| "logits/rejected": 0.5454930067062378, | |
| "logps/chosen": -0.972098171710968, | |
| "logps/rejected": -4.8969268798828125, | |
| "loss": 0.8668, | |
| "nll_loss": 0.864302396774292, | |
| "rewards/accuracies": 0.8828125, | |
| "rewards/chosen": -0.14581473171710968, | |
| "rewards/margins": 0.5887242555618286, | |
| "rewards/rejected": -0.7345390319824219, | |
| "step": 2464 | |
| }, | |
| { | |
| "epoch": 11.808397397989355, | |
| "grad_norm": 9.901198387145996, | |
| "learning_rate": 9.412754953531663e-08, | |
| "log_odds_chosen": 5.721859455108643, | |
| "log_odds_ratio": -0.2376585453748703, | |
| "logits/chosen": 0.9159454107284546, | |
| "logits/rejected": 0.49302536249160767, | |
| "logps/chosen": -0.905667781829834, | |
| "logps/rejected": -6.09743070602417, | |
| "loss": 0.8778, | |
| "nll_loss": 0.8210791945457458, | |
| "rewards/accuracies": 0.8984375, | |
| "rewards/chosen": -0.135850191116333, | |
| "rewards/margins": 0.7787644267082214, | |
| "rewards/rejected": -0.9146146178245544, | |
| "step": 2496 | |
| }, | |
| { | |
| "epoch": 11.959787108219988, | |
| "grad_norm": 6.6628241539001465, | |
| "learning_rate": 8.771699948011203e-08, | |
| "log_odds_chosen": 4.282519817352295, | |
| "log_odds_ratio": -0.2792586088180542, | |
| "logits/chosen": 0.790172815322876, | |
| "logits/rejected": 0.563973069190979, | |
| "logps/chosen": -0.9786302447319031, | |
| "logps/rejected": -4.77611780166626, | |
| "loss": 0.8802, | |
| "nll_loss": 0.8442527651786804, | |
| "rewards/accuracies": 0.84765625, | |
| "rewards/chosen": -0.14679455757141113, | |
| "rewards/margins": 0.5696231722831726, | |
| "rewards/rejected": -0.716417670249939, | |
| "step": 2528 | |
| }, | |
| { | |
| "epoch": 12.111176818450621, | |
| "grad_norm": 5.591745853424072, | |
| "learning_rate": 8.148578611867113e-08, | |
| "log_odds_chosen": 4.849425315856934, | |
| "log_odds_ratio": -0.29553845524787903, | |
| "logits/chosen": 0.8502916097640991, | |
| "logits/rejected": 0.5881719589233398, | |
| "logps/chosen": -0.9942740201950073, | |
| "logps/rejected": -5.380496025085449, | |
| "loss": 0.8794, | |
| "nll_loss": 0.894903302192688, | |
| "rewards/accuracies": 0.83984375, | |
| "rewards/chosen": -0.1491411030292511, | |
| "rewards/margins": 0.6579334139823914, | |
| "rewards/rejected": -0.8070744276046753, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 12.262566528681253, | |
| "grad_norm": 4.799871921539307, | |
| "learning_rate": 7.544079547848181e-08, | |
| "log_odds_chosen": 4.629427909851074, | |
| "log_odds_ratio": -0.2579698860645294, | |
| "logits/chosen": 0.8144665360450745, | |
| "logits/rejected": 0.5345531702041626, | |
| "logps/chosen": -0.9962482452392578, | |
| "logps/rejected": -5.126289367675781, | |
| "loss": 0.8853, | |
| "nll_loss": 0.8719948530197144, | |
| "rewards/accuracies": 0.87890625, | |
| "rewards/chosen": -0.14943724870681763, | |
| "rewards/margins": 0.6195061802864075, | |
| "rewards/rejected": -0.7689434885978699, | |
| "step": 2592 | |
| }, | |
| { | |
| "epoch": 12.413956238911886, | |
| "grad_norm": 5.2031779289245605, | |
| "learning_rate": 6.958870779488446e-08, | |
| "log_odds_chosen": 5.763055801391602, | |
| "log_odds_ratio": -0.24303670227527618, | |
| "logits/chosen": 0.85135418176651, | |
| "logits/rejected": 0.5018079876899719, | |
| "logps/chosen": -0.9315154552459717, | |
| "logps/rejected": -6.163926124572754, | |
| "loss": 0.8732, | |
| "nll_loss": 0.8289435505867004, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -0.13972733914852142, | |
| "rewards/margins": 0.78486168384552, | |
| "rewards/rejected": -0.9245890378952026, | |
| "step": 2624 | |
| }, | |
| { | |
| "epoch": 12.56534594914252, | |
| "grad_norm": 4.5774712562561035, | |
| "learning_rate": 6.393599012883707e-08, | |
| "log_odds_chosen": 4.685327529907227, | |
| "log_odds_ratio": -0.2833007574081421, | |
| "logits/chosen": 0.7489104270935059, | |
| "logits/rejected": 0.5779923796653748, | |
| "logps/chosen": -0.9675414562225342, | |
| "logps/rejected": -5.168377876281738, | |
| "loss": 0.8694, | |
| "nll_loss": 0.8434449434280396, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -0.14513123035430908, | |
| "rewards/margins": 0.6301255226135254, | |
| "rewards/rejected": -0.7752567529678345, | |
| "step": 2656 | |
| }, | |
| { | |
| "epoch": 12.716735659373152, | |
| "grad_norm": 5.854611396789551, | |
| "learning_rate": 5.848888922025552e-08, | |
| "log_odds_chosen": 5.014122009277344, | |
| "log_odds_ratio": -0.23267918825149536, | |
| "logits/chosen": 0.8252905607223511, | |
| "logits/rejected": 0.4858684539794922, | |
| "logps/chosen": -0.8893996477127075, | |
| "logps/rejected": -5.326512336730957, | |
| "loss": 0.878, | |
| "nll_loss": 0.8163630366325378, | |
| "rewards/accuracies": 0.8828125, | |
| "rewards/chosen": -0.13340994715690613, | |
| "rewards/margins": 0.6655669212341309, | |
| "rewards/rejected": -0.7989768981933594, | |
| "step": 2688 | |
| }, | |
| { | |
| "epoch": 12.868125369603785, | |
| "grad_norm": 5.542015075683594, | |
| "learning_rate": 5.325342458482779e-08, | |
| "log_odds_chosen": 5.052638530731201, | |
| "log_odds_ratio": -0.2500526010990143, | |
| "logits/chosen": 0.8215246796607971, | |
| "logits/rejected": 0.573950469493866, | |
| "logps/chosen": -0.8597905039787292, | |
| "logps/rejected": -5.335259437561035, | |
| "loss": 0.8812, | |
| "nll_loss": 0.8173032999038696, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -0.12896858155727386, | |
| "rewards/margins": 0.6713204383850098, | |
| "rewards/rejected": -0.8002889156341553, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 13.019515079834418, | |
| "grad_norm": 7.424806118011475, | |
| "learning_rate": 4.823538186193096e-08, | |
| "log_odds_chosen": 5.35725212097168, | |
| "log_odds_ratio": -0.23181939125061035, | |
| "logits/chosen": 0.8148990273475647, | |
| "logits/rejected": 0.4551333785057068, | |
| "logps/chosen": -0.9124429225921631, | |
| "logps/rejected": -5.717087268829346, | |
| "loss": 0.8778, | |
| "nll_loss": 0.8277573585510254, | |
| "rewards/accuracies": 0.91015625, | |
| "rewards/chosen": -0.13686645030975342, | |
| "rewards/margins": 0.7206966876983643, | |
| "rewards/rejected": -0.8575630784034729, | |
| "step": 2752 | |
| }, | |
| { | |
| "epoch": 13.17090479006505, | |
| "grad_norm": 6.039958953857422, | |
| "learning_rate": 4.3440306421001324e-08, | |
| "log_odds_chosen": 5.5131001472473145, | |
| "log_odds_ratio": -0.24206629395484924, | |
| "logits/chosen": 0.873075008392334, | |
| "logits/rejected": 0.5212752223014832, | |
| "logps/chosen": -0.8922577500343323, | |
| "logps/rejected": -5.86539888381958, | |
| "loss": 0.8901, | |
| "nll_loss": 0.8136817216873169, | |
| "rewards/accuracies": 0.88671875, | |
| "rewards/chosen": -0.13383866846561432, | |
| "rewards/margins": 0.745971143245697, | |
| "rewards/rejected": -0.8798097968101501, | |
| "step": 2784 | |
| }, | |
| { | |
| "epoch": 13.246599645180366, | |
| "eval_log_odds_chosen": 1.8457978963851929, | |
| "eval_log_odds_ratio": -0.17291945219039917, | |
| "eval_logits/chosen": 0.5009181499481201, | |
| "eval_logits/rejected": 0.446205198764801, | |
| "eval_logps/chosen": -0.6597917675971985, | |
| "eval_logps/rejected": -1.8198742866516113, | |
| "eval_loss": 0.7256795763969421, | |
| "eval_nll_loss": 0.6736116409301758, | |
| "eval_rewards/accuracies": 1.0, | |
| "eval_rewards/chosen": -0.09896877408027649, | |
| "eval_rewards/margins": 0.17401237785816193, | |
| "eval_rewards/rejected": -0.2729811668395996, | |
| "eval_runtime": 1.7675, | |
| "eval_samples_per_second": 77.512, | |
| "eval_steps_per_second": 10.184, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 13.322294500295683, | |
| "grad_norm": 4.649291515350342, | |
| "learning_rate": 3.887349723342303e-08, | |
| "log_odds_chosen": 5.655206203460693, | |
| "log_odds_ratio": -0.22572118043899536, | |
| "logits/chosen": 0.8335084915161133, | |
| "logits/rejected": 0.4884824752807617, | |
| "logps/chosen": -0.8668183088302612, | |
| "logps/rejected": -5.9456024169921875, | |
| "loss": 0.8773, | |
| "nll_loss": 0.7930561900138855, | |
| "rewards/accuracies": 0.90234375, | |
| "rewards/chosen": -0.13002273440361023, | |
| "rewards/margins": 0.7618176937103271, | |
| "rewards/rejected": -0.8918405175209045, | |
| "step": 2816 | |
| }, | |
| { | |
| "epoch": 13.473684210526315, | |
| "grad_norm": 5.706801414489746, | |
| "learning_rate": 3.454000101670901e-08, | |
| "log_odds_chosen": 4.356830596923828, | |
| "log_odds_ratio": -0.24235375225543976, | |
| "logits/chosen": 0.7453078031539917, | |
| "logits/rejected": 0.5251801609992981, | |
| "logps/chosen": -0.9370274543762207, | |
| "logps/rejected": -4.772754192352295, | |
| "loss": 0.8771, | |
| "nll_loss": 0.8157171010971069, | |
| "rewards/accuracies": 0.88671875, | |
| "rewards/chosen": -0.14055413007736206, | |
| "rewards/margins": 0.5753591060638428, | |
| "rewards/rejected": -0.7159131765365601, | |
| "step": 2848 | |
| }, | |
| { | |
| "epoch": 13.625073920756948, | |
| "grad_norm": 6.6824140548706055, | |
| "learning_rate": 3.044460665744283e-08, | |
| "log_odds_chosen": 4.974400043487549, | |
| "log_odds_ratio": -0.24002020061016083, | |
| "logits/chosen": 0.7889403700828552, | |
| "logits/rejected": 0.4931294322013855, | |
| "logps/chosen": -0.9762779474258423, | |
| "logps/rejected": -5.440495491027832, | |
| "loss": 0.8849, | |
| "nll_loss": 0.8199655413627625, | |
| "rewards/accuracies": 0.8984375, | |
| "rewards/chosen": -0.14644168317317963, | |
| "rewards/margins": 0.6696327328681946, | |
| "rewards/rejected": -0.8160744905471802, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 13.776463630987582, | |
| "grad_norm": 9.858070373535156, | |
| "learning_rate": 2.659183991914696e-08, | |
| "log_odds_chosen": 4.271711349487305, | |
| "log_odds_ratio": -0.25790902972221375, | |
| "logits/chosen": 0.7586400508880615, | |
| "logits/rejected": 0.5483137369155884, | |
| "logps/chosen": -0.9079785346984863, | |
| "logps/rejected": -4.65129280090332, | |
| "loss": 0.8755, | |
| "nll_loss": 0.8172128200531006, | |
| "rewards/accuracies": 0.87890625, | |
| "rewards/chosen": -0.1361967921257019, | |
| "rewards/margins": 0.561497151851654, | |
| "rewards/rejected": -0.697693943977356, | |
| "step": 2912 | |
| }, | |
| { | |
| "epoch": 13.927853341218214, | |
| "grad_norm": 4.99421501159668, | |
| "learning_rate": 2.298595844092377e-08, | |
| "log_odds_chosen": 5.054343223571777, | |
| "log_odds_ratio": -0.2358601987361908, | |
| "logits/chosen": 0.7982761859893799, | |
| "logits/rejected": 0.5060718655586243, | |
| "logps/chosen": -0.9570282697677612, | |
| "logps/rejected": -5.482752799987793, | |
| "loss": 0.8707, | |
| "nll_loss": 0.8115738034248352, | |
| "rewards/accuracies": 0.90234375, | |
| "rewards/chosen": -0.143554225564003, | |
| "rewards/margins": 0.6788586974143982, | |
| "rewards/rejected": -0.82241290807724, | |
| "step": 2944 | |
| }, | |
| { | |
| "epoch": 14.079243051448847, | |
| "grad_norm": 17.175851821899414, | |
| "learning_rate": 1.9630947032398066e-08, | |
| "log_odds_chosen": 5.8499908447265625, | |
| "log_odds_ratio": -0.22148607671260834, | |
| "logits/chosen": 0.817506730556488, | |
| "logits/rejected": 0.44914665818214417, | |
| "logps/chosen": -0.8968250751495361, | |
| "logps/rejected": -6.185724258422852, | |
| "loss": 0.8673, | |
| "nll_loss": 0.8207356333732605, | |
| "rewards/accuracies": 0.921875, | |
| "rewards/chosen": -0.13452376425266266, | |
| "rewards/margins": 0.7933349013328552, | |
| "rewards/rejected": -0.9278587698936462, | |
| "step": 2976 | |
| }, | |
| { | |
| "epoch": 14.23063276167948, | |
| "grad_norm": 7.170802593231201, | |
| "learning_rate": 1.653051327015911e-08, | |
| "log_odds_chosen": 4.76658296585083, | |
| "log_odds_ratio": -0.24812592566013336, | |
| "logits/chosen": 0.8145585060119629, | |
| "logits/rejected": 0.5187351703643799, | |
| "logps/chosen": -0.9258391261100769, | |
| "logps/rejected": -5.176287651062012, | |
| "loss": 0.8781, | |
| "nll_loss": 0.8292718529701233, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": -0.13887587189674377, | |
| "rewards/margins": 0.6375671625137329, | |
| "rewards/rejected": -0.7764431834220886, | |
| "step": 3008 | |
| }, | |
| { | |
| "epoch": 14.382022471910112, | |
| "grad_norm": 5.404478073120117, | |
| "learning_rate": 1.368808340056879e-08, | |
| "log_odds_chosen": 5.262024879455566, | |
| "log_odds_ratio": -0.22128547728061676, | |
| "logits/chosen": 0.7849254608154297, | |
| "logits/rejected": 0.4733457863330841, | |
| "logps/chosen": -0.9194588661193848, | |
| "logps/rejected": -5.613149166107178, | |
| "loss": 0.8665, | |
| "nll_loss": 0.8110780715942383, | |
| "rewards/accuracies": 0.8828125, | |
| "rewards/chosen": -0.1379188597202301, | |
| "rewards/margins": 0.704053521156311, | |
| "rewards/rejected": -0.8419723510742188, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 14.533412182140745, | |
| "grad_norm": 4.717693328857422, | |
| "learning_rate": 1.1106798553464802e-08, | |
| "log_odds_chosen": 5.532874584197998, | |
| "log_odds_ratio": -0.23889514803886414, | |
| "logits/chosen": 0.887575626373291, | |
| "logits/rejected": 0.503061056137085, | |
| "logps/chosen": -0.9478439092636108, | |
| "logps/rejected": -5.9591827392578125, | |
| "loss": 0.8689, | |
| "nll_loss": 0.8499802947044373, | |
| "rewards/accuracies": 0.9140625, | |
| "rewards/chosen": -0.1421765685081482, | |
| "rewards/margins": 0.7517008185386658, | |
| "rewards/rejected": -0.893877387046814, | |
| "step": 3072 | |
| }, | |
| { | |
| "epoch": 14.684801892371379, | |
| "grad_norm": 7.475513458251953, | |
| "learning_rate": 8.789511270941269e-09, | |
| "log_odds_chosen": 4.4497551918029785, | |
| "log_odds_ratio": -0.27013376355171204, | |
| "logits/chosen": 0.7935608625411987, | |
| "logits/rejected": 0.5559485554695129, | |
| "logps/chosen": -0.9605445861816406, | |
| "logps/rejected": -4.917541980743408, | |
| "loss": 0.8786, | |
| "nll_loss": 0.8641871213912964, | |
| "rewards/accuracies": 0.8828125, | |
| "rewards/chosen": -0.1440816968679428, | |
| "rewards/margins": 0.5935496091842651, | |
| "rewards/rejected": -0.7376313209533691, | |
| "step": 3104 | |
| }, | |
| { | |
| "epoch": 14.83619160260201, | |
| "grad_norm": 6.8675408363342285, | |
| "learning_rate": 6.738782355044048e-09, | |
| "log_odds_chosen": 4.509281635284424, | |
| "log_odds_ratio": -0.27578622102737427, | |
| "logits/chosen": 0.7721443772315979, | |
| "logits/rejected": 0.5139036774635315, | |
| "logps/chosen": -0.9913955926895142, | |
| "logps/rejected": -5.035284042358398, | |
| "loss": 0.8838, | |
| "nll_loss": 0.8683611154556274, | |
| "rewards/accuracies": 0.88671875, | |
| "rewards/chosen": -0.14870934188365936, | |
| "rewards/margins": 0.6065833568572998, | |
| "rewards/rejected": -0.7552926540374756, | |
| "step": 3136 | |
| }, | |
| { | |
| "epoch": 14.987581312832644, | |
| "grad_norm": 7.102670669555664, | |
| "learning_rate": 4.956878037864043e-09, | |
| "log_odds_chosen": 4.306816101074219, | |
| "log_odds_ratio": -0.30200034379959106, | |
| "logits/chosen": 0.8607514500617981, | |
| "logits/rejected": 0.591871440410614, | |
| "logps/chosen": -0.9792557954788208, | |
| "logps/rejected": -4.773169040679932, | |
| "loss": 0.8869, | |
| "nll_loss": 0.8911793231964111, | |
| "rewards/accuracies": 0.859375, | |
| "rewards/chosen": -0.1468883752822876, | |
| "rewards/margins": 0.569087028503418, | |
| "rewards/rejected": -0.7159753441810608, | |
| "step": 3168 | |
| }, | |
| { | |
| "epoch": 15.138971023063275, | |
| "grad_norm": 4.992292881011963, | |
| "learning_rate": 3.4457674771554422e-09, | |
| "log_odds_chosen": 4.759942054748535, | |
| "log_odds_ratio": -0.2575688362121582, | |
| "logits/chosen": 0.7185624241828918, | |
| "logits/rejected": 0.42112159729003906, | |
| "logps/chosen": -0.9415456652641296, | |
| "logps/rejected": -5.170385360717773, | |
| "loss": 0.858, | |
| "nll_loss": 0.8277443647384644, | |
| "rewards/accuracies": 0.8828125, | |
| "rewards/chosen": -0.14123186469078064, | |
| "rewards/margins": 0.6343258619308472, | |
| "rewards/rejected": -0.775557816028595, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 15.138971023063275, | |
| "eval_log_odds_chosen": 1.8564934730529785, | |
| "eval_log_odds_ratio": -0.17145967483520508, | |
| "eval_logits/chosen": 0.48080742359161377, | |
| "eval_logits/rejected": 0.4276208281517029, | |
| "eval_logps/chosen": -0.6593887209892273, | |
| "eval_logps/rejected": -1.8252443075180054, | |
| "eval_loss": 0.7243954539299011, | |
| "eval_nll_loss": 0.6726279854774475, | |
| "eval_rewards/accuracies": 1.0, | |
| "eval_rewards/chosen": -0.09890830516815186, | |
| "eval_rewards/margins": 0.17487837374210358, | |
| "eval_rewards/rejected": -0.27378666400909424, | |
| "eval_runtime": 1.7744, | |
| "eval_samples_per_second": 77.208, | |
| "eval_steps_per_second": 10.144, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 15.290360733293909, | |
| "grad_norm": 6.751287937164307, | |
| "learning_rate": 2.2071205802468297e-09, | |
| "log_odds_chosen": 4.854089736938477, | |
| "log_odds_ratio": -0.2636704742908478, | |
| "logits/chosen": 0.7567326426506042, | |
| "logits/rejected": 0.527582585811615, | |
| "logps/chosen": -0.9423821568489075, | |
| "logps/rejected": -5.277737617492676, | |
| "loss": 0.8852, | |
| "nll_loss": 0.844541609287262, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -0.14135733246803284, | |
| "rewards/margins": 0.6503032445907593, | |
| "rewards/rejected": -0.7916606068611145, | |
| "step": 3232 | |
| }, | |
| { | |
| "epoch": 15.441750443524542, | |
| "grad_norm": 5.534750938415527, | |
| "learning_rate": 1.2423061586496476e-09, | |
| "log_odds_chosen": 5.184489727020264, | |
| "log_odds_ratio": -0.24983780086040497, | |
| "logits/chosen": 0.8209244608879089, | |
| "logits/rejected": 0.5052769780158997, | |
| "logps/chosen": -0.9556353688240051, | |
| "logps/rejected": -5.621804237365723, | |
| "loss": 0.8706, | |
| "nll_loss": 0.8418364524841309, | |
| "rewards/accuracies": 0.88671875, | |
| "rewards/chosen": -0.14334531128406525, | |
| "rewards/margins": 0.699925422668457, | |
| "rewards/rejected": -0.8432707786560059, | |
| "step": 3264 | |
| }, | |
| { | |
| "epoch": 15.593140153755174, | |
| "grad_norm": 5.217104434967041, | |
| "learning_rate": 5.523904154037528e-10, | |
| "log_odds_chosen": 5.348480701446533, | |
| "log_odds_ratio": -0.2507275640964508, | |
| "logits/chosen": 0.8220376372337341, | |
| "logits/rejected": 0.5271560549736023, | |
| "logps/chosen": -0.9200209975242615, | |
| "logps/rejected": -5.755062103271484, | |
| "loss": 0.887, | |
| "nll_loss": 0.8451349139213562, | |
| "rewards/accuracies": 0.85546875, | |
| "rewards/chosen": -0.1380031555891037, | |
| "rewards/margins": 0.7252561450004578, | |
| "rewards/rejected": -0.8632593154907227, | |
| "step": 3296 | |
| }, | |
| { | |
| "epoch": 15.744529863985807, | |
| "grad_norm": 6.9226460456848145, | |
| "learning_rate": 1.3813576683111006e-10, | |
| "log_odds_chosen": 4.370879650115967, | |
| "log_odds_ratio": -0.24154168367385864, | |
| "logits/chosen": 0.7712342739105225, | |
| "logits/rejected": 0.5409867763519287, | |
| "logps/chosen": -0.9708598256111145, | |
| "logps/rejected": -4.831565856933594, | |
| "loss": 0.8729, | |
| "nll_loss": 0.8363229036331177, | |
| "rewards/accuracies": 0.921875, | |
| "rewards/chosen": -0.14562898874282837, | |
| "rewards/margins": 0.5791059136390686, | |
| "rewards/rejected": -0.724734902381897, | |
| "step": 3328 | |
| }, | |
| { | |
| "epoch": 15.89591957421644, | |
| "grad_norm": 7.291532516479492, | |
| "learning_rate": 0.0, | |
| "log_odds_chosen": 5.1468186378479, | |
| "log_odds_ratio": -0.2334214597940445, | |
| "logits/chosen": 0.8100905418395996, | |
| "logits/rejected": 0.48369458317756653, | |
| "logps/chosen": -0.8902687430381775, | |
| "logps/rejected": -5.482838153839111, | |
| "loss": 0.883, | |
| "nll_loss": 0.8243392705917358, | |
| "rewards/accuracies": 0.890625, | |
| "rewards/chosen": -0.1335403174161911, | |
| "rewards/margins": 0.688885509967804, | |
| "rewards/rejected": -0.8224257826805115, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 15.89591957421644, | |
| "eval_log_odds_chosen": 1.8541311025619507, | |
| "eval_log_odds_ratio": -0.17156726121902466, | |
| "eval_logits/chosen": 0.4940509796142578, | |
| "eval_logits/rejected": 0.4394443929195404, | |
| "eval_logps/chosen": -0.6573522090911865, | |
| "eval_logps/rejected": -1.8197245597839355, | |
| "eval_loss": 0.7246665954589844, | |
| "eval_nll_loss": 0.6722227334976196, | |
| "eval_rewards/accuracies": 1.0, | |
| "eval_rewards/chosen": -0.09860283136367798, | |
| "eval_rewards/margins": 0.17435584962368011, | |
| "eval_rewards/rejected": -0.2729586660861969, | |
| "eval_runtime": 1.7715, | |
| "eval_samples_per_second": 77.334, | |
| "eval_steps_per_second": 10.161, | |
| "step": 3360 | |
| } | |
| ], | |
| "logging_steps": 32, | |
| "max_steps": 3360, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 16, | |
| "save_steps": 400, | |
| "total_flos": 0.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |