| [ | |
| { | |
| "loss": 0.6931, | |
| "grad_norm": 5.219141483306885, | |
| "learning_rate": 0.00018, | |
| "rewards/chosen": 0.024159394204616547, | |
| "rewards/rejected": 0.0037573135923594236, | |
| "rewards/accuracies": 0.42500001192092896, | |
| "rewards/margins": 0.020402083173394203, | |
| "logps/chosen": -146.83187866210938, | |
| "logps/rejected": -130.77682495117188, | |
| "logits/chosen": -0.2618408203125, | |
| "logits/rejected": -0.405269056558609, | |
| "epoch": 0.5925925925925926, | |
| "step": 10 | |
| }, | |
| { | |
| "loss": 0.682, | |
| "grad_norm": 6.610161781311035, | |
| "learning_rate": 0.00019333333333333333, | |
| "rewards/chosen": -0.0013131718151271343, | |
| "rewards/rejected": -0.18291252851486206, | |
| "rewards/accuracies": 0.594936728477478, | |
| "rewards/margins": 0.18159937858581543, | |
| "logps/chosen": -144.95765686035156, | |
| "logps/rejected": -133.71145629882812, | |
| "logits/chosen": 0.21812255680561066, | |
| "logits/rejected": 0.1329454481601715, | |
| "epoch": 1.1777777777777778, | |
| "step": 20 | |
| }, | |
| { | |
| "loss": 0.3351, | |
| "grad_norm": 2.0350332260131836, | |
| "learning_rate": 0.00018500000000000002, | |
| "rewards/chosen": 0.19238564372062683, | |
| "rewards/rejected": -1.004982590675354, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/margins": 1.1973682641983032, | |
| "logps/chosen": -140.00352478027344, | |
| "logps/rejected": -134.99171447753906, | |
| "logits/chosen": 0.40491190552711487, | |
| "logits/rejected": 0.2921258509159088, | |
| "epoch": 1.7703703703703704, | |
| "step": 30 | |
| }, | |
| { | |
| "loss": 0.1806, | |
| "grad_norm": 2.579374313354492, | |
| "learning_rate": 0.00017666666666666666, | |
| "rewards/chosen": 0.3689553439617157, | |
| "rewards/rejected": -2.125339984893799, | |
| "rewards/accuracies": 0.9620253443717957, | |
| "rewards/margins": 2.494295358657837, | |
| "logps/chosen": -143.1613006591797, | |
| "logps/rejected": -159.1355438232422, | |
| "logits/chosen": 0.11085856705904007, | |
| "logits/rejected": 0.032480403780937195, | |
| "epoch": 2.3555555555555556, | |
| "step": 40 | |
| }, | |
| { | |
| "loss": 0.1049, | |
| "grad_norm": 0.8431211709976196, | |
| "learning_rate": 0.00016833333333333335, | |
| "rewards/chosen": -0.37566089630126953, | |
| "rewards/rejected": -4.225518226623535, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/margins": 3.8498573303222656, | |
| "logps/chosen": -146.19427490234375, | |
| "logps/rejected": -168.6130828857422, | |
| "logits/chosen": -0.39661893248558044, | |
| "logits/rejected": -0.4452442228794098, | |
| "epoch": 2.948148148148148, | |
| "step": 50 | |
| }, | |
| { | |
| "loss": 0.0286, | |
| "grad_norm": 1.5647461414337158, | |
| "learning_rate": 0.00016, | |
| "rewards/chosen": -1.5753328800201416, | |
| "rewards/rejected": -7.724185943603516, | |
| "rewards/accuracies": 0.9873417615890503, | |
| "rewards/margins": 6.148852825164795, | |
| "logps/chosen": -164.2067413330078, | |
| "logps/rejected": -209.5588836669922, | |
| "logits/chosen": -0.9004085659980774, | |
| "logits/rejected": -0.9173569679260254, | |
| "epoch": 3.533333333333333, | |
| "step": 60 | |
| }, | |
| { | |
| "loss": 0.0204, | |
| "grad_norm": 0.05377896502614021, | |
| "learning_rate": 0.00015166666666666668, | |
| "rewards/chosen": -3.3284900188446045, | |
| "rewards/rejected": -11.170860290527344, | |
| "rewards/accuracies": 0.9873417615890503, | |
| "rewards/margins": 7.842370986938477, | |
| "logps/chosen": -170.9961395263672, | |
| "logps/rejected": -241.1328125, | |
| "logits/chosen": -1.1217529773712158, | |
| "logits/rejected": -1.1465003490447998, | |
| "epoch": 4.118518518518519, | |
| "step": 70 | |
| }, | |
| { | |
| "loss": 0.0098, | |
| "grad_norm": 0.05912935361266136, | |
| "learning_rate": 0.00014333333333333334, | |
| "rewards/chosen": -5.450322151184082, | |
| "rewards/rejected": -15.317463874816895, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/margins": 9.867142677307129, | |
| "logps/chosen": -200.71902465820312, | |
| "logps/rejected": -283.8058166503906, | |
| "logits/chosen": -1.3752106428146362, | |
| "logits/rejected": -1.3843052387237549, | |
| "epoch": 4.711111111111111, | |
| "step": 80 | |
| }, | |
| { | |
| "loss": 0.0103, | |
| "grad_norm": 0.08618709444999695, | |
| "learning_rate": 0.00013500000000000003, | |
| "rewards/chosen": -9.06311321258545, | |
| "rewards/rejected": -20.052576065063477, | |
| "rewards/accuracies": 0.9873417615890503, | |
| "rewards/margins": 10.989459991455078, | |
| "logps/chosen": -237.59788513183594, | |
| "logps/rejected": -331.6789245605469, | |
| "logits/chosen": -1.3256553411483765, | |
| "logits/rejected": -1.3671971559524536, | |
| "epoch": 5.296296296296296, | |
| "step": 90 | |
| }, | |
| { | |
| "loss": 0.0176, | |
| "grad_norm": 0.0007632412016391754, | |
| "learning_rate": 0.00012666666666666666, | |
| "rewards/chosen": -6.595943450927734, | |
| "rewards/rejected": -18.24319839477539, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/margins": 11.647254943847656, | |
| "logps/chosen": -212.84860229492188, | |
| "logps/rejected": -312.7068786621094, | |
| "logits/chosen": -1.4357213973999023, | |
| "logits/rejected": -1.4478198289871216, | |
| "epoch": 5.888888888888889, | |
| "step": 100 | |
| }, | |
| { | |
| "loss": 0.0002, | |
| "grad_norm": 0.021229052916169167, | |
| "learning_rate": 0.00011833333333333334, | |
| "rewards/chosen": -7.2799482345581055, | |
| "rewards/rejected": -19.018455505371094, | |
| "rewards/accuracies": 1.0, | |
| "rewards/margins": 11.738507270812988, | |
| "logps/chosen": -217.70120239257812, | |
| "logps/rejected": -321.6396484375, | |
| "logits/chosen": -1.3416528701782227, | |
| "logits/rejected": -1.3565353155136108, | |
| "epoch": 6.474074074074074, | |
| "step": 110 | |
| }, | |
| { | |
| "loss": 0.0174, | |
| "grad_norm": 0.011705581098794937, | |
| "learning_rate": 0.00011000000000000002, | |
| "rewards/chosen": -6.8897318840026855, | |
| "rewards/rejected": -18.329551696777344, | |
| "rewards/accuracies": 0.9746835231781006, | |
| "rewards/margins": 11.4398193359375, | |
| "logps/chosen": -211.3556365966797, | |
| "logps/rejected": -315.4004211425781, | |
| "logits/chosen": -1.3887122869491577, | |
| "logits/rejected": -1.4279637336730957, | |
| "epoch": 7.059259259259259, | |
| "step": 120 | |
| }, | |
| { | |
| "loss": 0.0088, | |
| "grad_norm": 0.006066357716917992, | |
| "learning_rate": 0.00010166666666666667, | |
| "rewards/chosen": -7.70062255859375, | |
| "rewards/rejected": -19.32270050048828, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/margins": 11.622076034545898, | |
| "logps/chosen": -218.02810668945312, | |
| "logps/rejected": -321.9827880859375, | |
| "logits/chosen": -1.3698838949203491, | |
| "logits/rejected": -1.379957675933838, | |
| "epoch": 7.651851851851852, | |
| "step": 130 | |
| }, | |
| { | |
| "loss": 0.0088, | |
| "grad_norm": 0.014796565286815166, | |
| "learning_rate": 9.333333333333334e-05, | |
| "rewards/chosen": -8.16592788696289, | |
| "rewards/rejected": -20.24009132385254, | |
| "rewards/accuracies": 0.9873417615890503, | |
| "rewards/margins": 12.074161529541016, | |
| "logps/chosen": -228.3029022216797, | |
| "logps/rejected": -333.3442687988281, | |
| "logits/chosen": -1.312727689743042, | |
| "logits/rejected": -1.3648468255996704, | |
| "epoch": 8.237037037037037, | |
| "step": 140 | |
| }, | |
| { | |
| "loss": 0.0174, | |
| "grad_norm": 0.010800166986882687, | |
| "learning_rate": 8.5e-05, | |
| "rewards/chosen": -7.864575386047363, | |
| "rewards/rejected": -20.18320083618164, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/margins": 12.318623542785645, | |
| "logps/chosen": -219.8230438232422, | |
| "logps/rejected": -329.8297424316406, | |
| "logits/chosen": -1.3495450019836426, | |
| "logits/rejected": -1.400110125541687, | |
| "epoch": 8.829629629629629, | |
| "step": 150 | |
| }, | |
| { | |
| "loss": 0.0, | |
| "grad_norm": 0.0027752986643463373, | |
| "learning_rate": 7.666666666666667e-05, | |
| "rewards/chosen": -7.922427177429199, | |
| "rewards/rejected": -20.483713150024414, | |
| "rewards/accuracies": 1.0, | |
| "rewards/margins": 12.561285018920898, | |
| "logps/chosen": -236.33514404296875, | |
| "logps/rejected": -339.6175231933594, | |
| "logits/chosen": -1.387330412864685, | |
| "logits/rejected": -1.379171371459961, | |
| "epoch": 9.414814814814815, | |
| "step": 160 | |
| }, | |
| { | |
| "loss": 0.0174, | |
| "grad_norm": 0.022991616278886795, | |
| "learning_rate": 6.833333333333333e-05, | |
| "rewards/chosen": -8.79272174835205, | |
| "rewards/rejected": -21.603660583496094, | |
| "rewards/accuracies": 0.9746835231781006, | |
| "rewards/margins": 12.81093978881836, | |
| "logps/chosen": -223.5404510498047, | |
| "logps/rejected": -344.2777099609375, | |
| "logits/chosen": -1.332489013671875, | |
| "logits/rejected": -1.4169538021087646, | |
| "epoch": 10.0, | |
| "step": 170 | |
| }, | |
| { | |
| "loss": 0.0001, | |
| "grad_norm": 0.008427063003182411, | |
| "learning_rate": 6e-05, | |
| "rewards/chosen": -9.925287246704102, | |
| "rewards/rejected": -22.660680770874023, | |
| "rewards/accuracies": 1.0, | |
| "rewards/margins": 12.735391616821289, | |
| "logps/chosen": -246.78573608398438, | |
| "logps/rejected": -358.0868225097656, | |
| "logits/chosen": -1.3674745559692383, | |
| "logits/rejected": -1.393139123916626, | |
| "epoch": 10.592592592592592, | |
| "step": 180 | |
| }, | |
| { | |
| "loss": 0.0174, | |
| "grad_norm": 0.00592564232647419, | |
| "learning_rate": 5.166666666666667e-05, | |
| "rewards/chosen": -7.4557271003723145, | |
| "rewards/rejected": -20.30694580078125, | |
| "rewards/accuracies": 0.9746835231781006, | |
| "rewards/margins": 12.851216316223145, | |
| "logps/chosen": -212.6167449951172, | |
| "logps/rejected": -328.70428466796875, | |
| "logits/chosen": -1.3256752490997314, | |
| "logits/rejected": -1.3827478885650635, | |
| "epoch": 11.177777777777777, | |
| "step": 190 | |
| }, | |
| { | |
| "loss": 0.0174, | |
| "grad_norm": 0.0025185132399201393, | |
| "learning_rate": 4.3333333333333334e-05, | |
| "rewards/chosen": -8.556998252868652, | |
| "rewards/rejected": -21.165555953979492, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/margins": 12.60855770111084, | |
| "logps/chosen": -230.914794921875, | |
| "logps/rejected": -344.1314392089844, | |
| "logits/chosen": -1.3429509401321411, | |
| "logits/rejected": -1.3791346549987793, | |
| "epoch": 11.77037037037037, | |
| "step": 200 | |
| }, | |
| { | |
| "loss": 0.0087, | |
| "grad_norm": 0.0017514040227979422, | |
| "learning_rate": 3.5e-05, | |
| "rewards/chosen": -10.171927452087402, | |
| "rewards/rejected": -23.150039672851562, | |
| "rewards/accuracies": 0.9873417615890503, | |
| "rewards/margins": 12.97811508178711, | |
| "logps/chosen": -251.9849395751953, | |
| "logps/rejected": -366.4695739746094, | |
| "logits/chosen": -1.3986672163009644, | |
| "logits/rejected": -1.4266730546951294, | |
| "epoch": 12.355555555555556, | |
| "step": 210 | |
| }, | |
| { | |
| "loss": 0.0087, | |
| "grad_norm": 0.006925302557647228, | |
| "learning_rate": 2.6666666666666667e-05, | |
| "rewards/chosen": -9.249361038208008, | |
| "rewards/rejected": -22.13502311706543, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/margins": 12.885663032531738, | |
| "logps/chosen": -235.34341430664062, | |
| "logps/rejected": -348.89794921875, | |
| "logits/chosen": -1.3144868612289429, | |
| "logits/rejected": -1.3764691352844238, | |
| "epoch": 12.948148148148148, | |
| "step": 220 | |
| }, | |
| { | |
| "loss": 0.0087, | |
| "grad_norm": 0.007612653076648712, | |
| "learning_rate": 1.8333333333333333e-05, | |
| "rewards/chosen": -9.676132202148438, | |
| "rewards/rejected": -22.771854400634766, | |
| "rewards/accuracies": 0.9873417615890503, | |
| "rewards/margins": 13.095723152160645, | |
| "logps/chosen": -238.63157653808594, | |
| "logps/rejected": -359.6723327636719, | |
| "logits/chosen": -1.3230102062225342, | |
| "logits/rejected": -1.3840538263320923, | |
| "epoch": 13.533333333333333, | |
| "step": 230 | |
| }, | |
| { | |
| "loss": 0.0087, | |
| "grad_norm": 0.00543614849448204, | |
| "learning_rate": 1e-05, | |
| "rewards/chosen": -9.432672500610352, | |
| "rewards/rejected": -22.142011642456055, | |
| "rewards/accuracies": 0.9873417615890503, | |
| "rewards/margins": 12.709343910217285, | |
| "logps/chosen": -243.76123046875, | |
| "logps/rejected": -351.6514892578125, | |
| "logits/chosen": -1.4037139415740967, | |
| "logits/rejected": -1.439239263534546, | |
| "epoch": 14.118518518518519, | |
| "step": 240 | |
| }, | |
| { | |
| "loss": 0.0, | |
| "grad_norm": 0.0029208704363554716, | |
| "learning_rate": 1.6666666666666667e-06, | |
| "rewards/chosen": -9.081149101257324, | |
| "rewards/rejected": -22.619098663330078, | |
| "rewards/accuracies": 1.0, | |
| "rewards/margins": 13.537951469421387, | |
| "logps/chosen": -238.7053985595703, | |
| "logps/rejected": -358.72821044921875, | |
| "logits/chosen": -1.3238470554351807, | |
| "logits/rejected": -1.3789857625961304, | |
| "epoch": 14.71111111111111, | |
| "step": 250 | |
| }, | |
| { | |
| "train_runtime": 1619.2606, | |
| "train_samples_per_second": 1.235, | |
| "train_steps_per_second": 0.154, | |
| "total_flos": 0.0, | |
| "train_loss": 0.08888284659932834, | |
| "epoch": 14.71111111111111, | |
| "step": 250 | |
| } | |
| ] |