| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 38.48, | |
| "eval_steps": 500, | |
| "global_step": 500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 4.544456958770752, | |
| "learning_rate": 0.00016, | |
| "logits/chosen": -0.15711309015750885, | |
| "logits/rejected": -0.17393989861011505, | |
| "logps/chosen": -146.22421264648438, | |
| "logps/rejected": -135.63302612304688, | |
| "loss": 0.7347, | |
| "rewards/accuracies": 0.3375000059604645, | |
| "rewards/chosen": -0.016495775431394577, | |
| "rewards/margins": -0.05580342561006546, | |
| "rewards/rejected": 0.03930765017867088, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "grad_norm": 3.64050030708313, | |
| "learning_rate": 0.00019673469387755104, | |
| "logits/chosen": 0.22568197548389435, | |
| "logits/rejected": 0.24968074262142181, | |
| "logps/chosen": -140.56317138671875, | |
| "logps/rejected": -136.4517822265625, | |
| "loss": 0.5906, | |
| "rewards/accuracies": 0.6973684430122375, | |
| "rewards/chosen": 0.14059841632843018, | |
| "rewards/margins": 0.35573944449424744, | |
| "rewards/rejected": -0.21514104306697845, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "grad_norm": 2.210195779800415, | |
| "learning_rate": 0.0001926530612244898, | |
| "logits/chosen": 0.3498680293560028, | |
| "logits/rejected": 0.3381582796573639, | |
| "logps/chosen": -139.37893676757812, | |
| "logps/rejected": -140.91236877441406, | |
| "loss": 0.3742, | |
| "rewards/accuracies": 0.9342105388641357, | |
| "rewards/chosen": 0.26392117142677307, | |
| "rewards/margins": 1.149539828300476, | |
| "rewards/rejected": -0.8856186866760254, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "grad_norm": 1.595664143562317, | |
| "learning_rate": 0.00018857142857142857, | |
| "logits/chosen": 0.33004075288772583, | |
| "logits/rejected": 0.3743434250354767, | |
| "logps/chosen": -142.27561950683594, | |
| "logps/rejected": -155.76829528808594, | |
| "loss": 0.2073, | |
| "rewards/accuracies": 0.9736841917037964, | |
| "rewards/chosen": 0.2223319262266159, | |
| "rewards/margins": 2.211127519607544, | |
| "rewards/rejected": -1.9887956380844116, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "grad_norm": 0.8180229663848877, | |
| "learning_rate": 0.00018448979591836735, | |
| "logits/chosen": -0.3215048909187317, | |
| "logits/rejected": -0.17942988872528076, | |
| "logps/chosen": -146.15176391601562, | |
| "logps/rejected": -177.8779296875, | |
| "loss": 0.064, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": -0.18169204890727997, | |
| "rewards/margins": 4.198596954345703, | |
| "rewards/rejected": -4.380288600921631, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 4.64, | |
| "grad_norm": 0.2704571485519409, | |
| "learning_rate": 0.00018040816326530615, | |
| "logits/chosen": -0.8548356890678406, | |
| "logits/rejected": -0.7396419644355774, | |
| "logps/chosen": -180.8902130126953, | |
| "logps/rejected": -247.38720703125, | |
| "loss": 0.0253, | |
| "rewards/accuracies": 0.9868420958518982, | |
| "rewards/chosen": -3.894167423248291, | |
| "rewards/margins": 7.3716654777526855, | |
| "rewards/rejected": -11.265832901000977, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 5.4, | |
| "grad_norm": 0.04568086564540863, | |
| "learning_rate": 0.0001763265306122449, | |
| "logits/chosen": -1.0966769456863403, | |
| "logits/rejected": -0.9558103680610657, | |
| "logps/chosen": -216.4236297607422, | |
| "logps/rejected": -299.0456848144531, | |
| "loss": 0.0086, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -7.186522006988525, | |
| "rewards/margins": 9.163583755493164, | |
| "rewards/rejected": -16.35010528564453, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 6.16, | |
| "grad_norm": 0.030845321714878082, | |
| "learning_rate": 0.00017224489795918368, | |
| "logits/chosen": -0.8765879273414612, | |
| "logits/rejected": -0.710603654384613, | |
| "logps/chosen": -208.9407196044922, | |
| "logps/rejected": -290.63800048828125, | |
| "loss": 0.0222, | |
| "rewards/accuracies": 0.9736841917037964, | |
| "rewards/chosen": -6.255966663360596, | |
| "rewards/margins": 9.707125663757324, | |
| "rewards/rejected": -15.963091850280762, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 6.96, | |
| "grad_norm": 0.06601617485284805, | |
| "learning_rate": 0.00016816326530612246, | |
| "logits/chosen": -0.7680649757385254, | |
| "logits/rejected": -0.5714871287345886, | |
| "logps/chosen": -177.8467254638672, | |
| "logps/rejected": -262.7944030761719, | |
| "loss": 0.0006, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.312117338180542, | |
| "rewards/margins": 9.123556137084961, | |
| "rewards/rejected": -12.435674667358398, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 7.72, | |
| "grad_norm": 0.02727358043193817, | |
| "learning_rate": 0.00016408163265306124, | |
| "logits/chosen": -0.7908716797828674, | |
| "logits/rejected": -0.5970498919487, | |
| "logps/chosen": -179.80191040039062, | |
| "logps/rejected": -264.28271484375, | |
| "loss": 0.0091, | |
| "rewards/accuracies": 0.9868420958518982, | |
| "rewards/chosen": -3.6998748779296875, | |
| "rewards/margins": 9.295143127441406, | |
| "rewards/rejected": -12.995016098022461, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 8.48, | |
| "grad_norm": 0.052736785262823105, | |
| "learning_rate": 0.00016, | |
| "logits/chosen": -0.6252234578132629, | |
| "logits/rejected": -0.4707661271095276, | |
| "logps/chosen": -185.8507537841797, | |
| "logps/rejected": -273.919677734375, | |
| "loss": 0.0004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.9508209228515625, | |
| "rewards/margins": 9.781594276428223, | |
| "rewards/rejected": -13.732414245605469, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 9.24, | |
| "grad_norm": 0.010697088204324245, | |
| "learning_rate": 0.0001559183673469388, | |
| "logits/chosen": -0.9722104072570801, | |
| "logits/rejected": -0.7994766235351562, | |
| "logps/chosen": -193.7563018798828, | |
| "logps/rejected": -286.2286682128906, | |
| "loss": 0.0088, | |
| "rewards/accuracies": 0.9868420958518982, | |
| "rewards/chosen": -5.037867069244385, | |
| "rewards/margins": 10.28415298461914, | |
| "rewards/rejected": -15.322019577026367, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": NaN, | |
| "learning_rate": 0.00015183673469387757, | |
| "logits/chosen": -0.9392030835151672, | |
| "logits/rejected": -0.7859267592430115, | |
| "logps/chosen": -208.22225952148438, | |
| "logps/rejected": -304.37054443359375, | |
| "loss": 0.0174, | |
| "rewards/accuracies": 0.9868420958518982, | |
| "rewards/chosen": -6.531684875488281, | |
| "rewards/margins": 10.490182876586914, | |
| "rewards/rejected": -17.021865844726562, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 10.8, | |
| "grad_norm": 0.004046889953315258, | |
| "learning_rate": 0.00014816326530612246, | |
| "logits/chosen": -0.9692522883415222, | |
| "logits/rejected": -0.8100675344467163, | |
| "logps/chosen": -207.4010772705078, | |
| "logps/rejected": -305.93890380859375, | |
| "loss": 0.0087, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -6.667567253112793, | |
| "rewards/margins": 10.732935905456543, | |
| "rewards/rejected": -17.400503158569336, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 11.56, | |
| "grad_norm": 0.0038029830902814865, | |
| "learning_rate": 0.00014408163265306124, | |
| "logits/chosen": -0.9378202557563782, | |
| "logits/rejected": -0.7926595211029053, | |
| "logps/chosen": -211.8084259033203, | |
| "logps/rejected": -317.88360595703125, | |
| "loss": 0.0087, | |
| "rewards/accuracies": 0.9868420958518982, | |
| "rewards/chosen": -6.8508453369140625, | |
| "rewards/margins": 11.4996337890625, | |
| "rewards/rejected": -18.350479125976562, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 12.32, | |
| "grad_norm": 0.006996906362473965, | |
| "learning_rate": 0.00014, | |
| "logits/chosen": -0.9922997355461121, | |
| "logits/rejected": -0.8554012775421143, | |
| "logps/chosen": -225.67135620117188, | |
| "logps/rejected": -333.3880615234375, | |
| "loss": 0.0087, | |
| "rewards/accuracies": 0.9868420958518982, | |
| "rewards/chosen": -7.963207244873047, | |
| "rewards/margins": 11.519519805908203, | |
| "rewards/rejected": -19.482725143432617, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 13.08, | |
| "grad_norm": 0.004378203302621841, | |
| "learning_rate": 0.0001359183673469388, | |
| "logits/chosen": -0.9898152351379395, | |
| "logits/rejected": -0.8856151103973389, | |
| "logps/chosen": -217.1047821044922, | |
| "logps/rejected": -322.1737976074219, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -7.197223663330078, | |
| "rewards/margins": 11.574359893798828, | |
| "rewards/rejected": -18.771583557128906, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 13.88, | |
| "grad_norm": 0.006621514912694693, | |
| "learning_rate": 0.00013183673469387757, | |
| "logits/chosen": -1.0127683877944946, | |
| "logits/rejected": -0.8718019723892212, | |
| "logps/chosen": -220.30392456054688, | |
| "logps/rejected": -324.86114501953125, | |
| "loss": 0.0087, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -7.355535984039307, | |
| "rewards/margins": 11.52188777923584, | |
| "rewards/rejected": -18.877422332763672, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 14.64, | |
| "grad_norm": 0.0018915284890681505, | |
| "learning_rate": 0.00012775510204081632, | |
| "logits/chosen": -1.0274461507797241, | |
| "logits/rejected": -0.8911333680152893, | |
| "logps/chosen": -222.65452575683594, | |
| "logps/rejected": -327.3719787597656, | |
| "loss": 0.0087, | |
| "rewards/accuracies": 0.9868420958518982, | |
| "rewards/chosen": -8.228594779968262, | |
| "rewards/margins": 11.633173942565918, | |
| "rewards/rejected": -19.861770629882812, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 15.4, | |
| "grad_norm": 0.004701931029558182, | |
| "learning_rate": 0.0001236734693877551, | |
| "logits/chosen": -0.9393897652626038, | |
| "logits/rejected": -0.8236327171325684, | |
| "logps/chosen": -233.4442901611328, | |
| "logps/rejected": -337.7091979980469, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -8.586127281188965, | |
| "rewards/margins": 11.664649963378906, | |
| "rewards/rejected": -20.250778198242188, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 16.16, | |
| "grad_norm": 0.0026641907170414925, | |
| "learning_rate": 0.00011959183673469388, | |
| "logits/chosen": -1.07357656955719, | |
| "logits/rejected": -0.9411842226982117, | |
| "logps/chosen": -217.97422790527344, | |
| "logps/rejected": -331.9872131347656, | |
| "loss": 0.0087, | |
| "rewards/accuracies": 0.9868420958518982, | |
| "rewards/chosen": -7.6344499588012695, | |
| "rewards/margins": 11.908411026000977, | |
| "rewards/rejected": -19.542861938476562, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 16.96, | |
| "grad_norm": 0.003238040255382657, | |
| "learning_rate": 0.00011551020408163267, | |
| "logits/chosen": -0.9635592699050903, | |
| "logits/rejected": -0.8426879644393921, | |
| "logps/chosen": -231.2787322998047, | |
| "logps/rejected": -343.9873962402344, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -8.459344863891602, | |
| "rewards/margins": 12.18354320526123, | |
| "rewards/rejected": -20.642887115478516, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 17.72, | |
| "grad_norm": 0.004305878188461065, | |
| "learning_rate": 0.00011142857142857144, | |
| "logits/chosen": -0.9933919310569763, | |
| "logits/rejected": -0.9030781984329224, | |
| "logps/chosen": -227.8687286376953, | |
| "logps/rejected": -337.8186950683594, | |
| "loss": 0.026, | |
| "rewards/accuracies": 0.9736841917037964, | |
| "rewards/chosen": -8.76144790649414, | |
| "rewards/margins": 11.788559913635254, | |
| "rewards/rejected": -20.550006866455078, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 18.48, | |
| "grad_norm": 0.005013110116124153, | |
| "learning_rate": 0.00010734693877551021, | |
| "logits/chosen": -1.106400489807129, | |
| "logits/rejected": -0.9791207313537598, | |
| "logps/chosen": -235.90512084960938, | |
| "logps/rejected": -355.18023681640625, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -9.072104454040527, | |
| "rewards/margins": 12.6239652633667, | |
| "rewards/rejected": -21.696069717407227, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 19.24, | |
| "grad_norm": 0.0031214996706694365, | |
| "learning_rate": 0.00010326530612244899, | |
| "logits/chosen": -0.9300792813301086, | |
| "logits/rejected": -0.8107971549034119, | |
| "logps/chosen": -226.8355712890625, | |
| "logps/rejected": -337.895751953125, | |
| "loss": 0.0087, | |
| "rewards/accuracies": 0.9868420958518982, | |
| "rewards/chosen": -8.232963562011719, | |
| "rewards/margins": 12.14808177947998, | |
| "rewards/rejected": -20.381046295166016, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 0.0045935832895338535, | |
| "learning_rate": 9.918367346938776e-05, | |
| "logits/chosen": -1.0340404510498047, | |
| "logits/rejected": -0.9297473430633545, | |
| "logps/chosen": -234.9521942138672, | |
| "logps/rejected": -347.18572998046875, | |
| "loss": 0.0087, | |
| "rewards/accuracies": 0.9868420958518982, | |
| "rewards/chosen": -9.113563537597656, | |
| "rewards/margins": 12.3864107131958, | |
| "rewards/rejected": -21.499975204467773, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 20.8, | |
| "grad_norm": 0.0013183593982830644, | |
| "learning_rate": 9.510204081632653e-05, | |
| "logits/chosen": -1.018434762954712, | |
| "logits/rejected": -0.9138419032096863, | |
| "logps/chosen": -236.2734832763672, | |
| "logps/rejected": -347.966064453125, | |
| "loss": 0.0087, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -8.8914794921875, | |
| "rewards/margins": 12.274205207824707, | |
| "rewards/rejected": -21.16568374633789, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 21.56, | |
| "grad_norm": 0.002180259209126234, | |
| "learning_rate": 9.102040816326532e-05, | |
| "logits/chosen": -0.9927906394004822, | |
| "logits/rejected": -0.9210112690925598, | |
| "logps/chosen": -230.18836975097656, | |
| "logps/rejected": -351.64263916015625, | |
| "loss": 0.0087, | |
| "rewards/accuracies": 0.9868420958518982, | |
| "rewards/chosen": -9.216283798217773, | |
| "rewards/margins": 12.91382884979248, | |
| "rewards/rejected": -22.13011360168457, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 22.32, | |
| "grad_norm": 0.0018169954419136047, | |
| "learning_rate": 8.693877551020408e-05, | |
| "logits/chosen": -1.0399179458618164, | |
| "logits/rejected": -0.9215599894523621, | |
| "logps/chosen": -243.0306854248047, | |
| "logps/rejected": -352.8495178222656, | |
| "loss": 0.0087, | |
| "rewards/accuracies": 0.9868420958518982, | |
| "rewards/chosen": -9.664202690124512, | |
| "rewards/margins": 12.071381568908691, | |
| "rewards/rejected": -21.73558235168457, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 23.08, | |
| "grad_norm": 0.0032243800815194845, | |
| "learning_rate": 8.285714285714287e-05, | |
| "logits/chosen": -1.0440365076065063, | |
| "logits/rejected": -0.909357488155365, | |
| "logps/chosen": -233.0224609375, | |
| "logps/rejected": -356.0140686035156, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -8.971783638000488, | |
| "rewards/margins": 12.992220878601074, | |
| "rewards/rejected": -21.964004516601562, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 23.88, | |
| "grad_norm": 0.005289977416396141, | |
| "learning_rate": 7.877551020408164e-05, | |
| "logits/chosen": -1.0011231899261475, | |
| "logits/rejected": -0.890425980091095, | |
| "logps/chosen": -247.11978149414062, | |
| "logps/rejected": -363.4911804199219, | |
| "loss": 0.0087, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -10.250194549560547, | |
| "rewards/margins": 12.585700035095215, | |
| "rewards/rejected": -22.835895538330078, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 24.64, | |
| "grad_norm": 0.0014915637439116836, | |
| "learning_rate": 7.469387755102041e-05, | |
| "logits/chosen": -1.0355056524276733, | |
| "logits/rejected": -0.9172827005386353, | |
| "logps/chosen": -237.4662628173828, | |
| "logps/rejected": -356.5818786621094, | |
| "loss": 0.0087, | |
| "rewards/accuracies": 0.9868420958518982, | |
| "rewards/chosen": -9.470785140991211, | |
| "rewards/margins": 12.789039611816406, | |
| "rewards/rejected": -22.259824752807617, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 25.4, | |
| "grad_norm": 0.003703987691551447, | |
| "learning_rate": 7.061224489795919e-05, | |
| "logits/chosen": -0.9380186200141907, | |
| "logits/rejected": -0.8530542254447937, | |
| "logps/chosen": -240.56121826171875, | |
| "logps/rejected": -359.228271484375, | |
| "loss": 0.0087, | |
| "rewards/accuracies": 0.9868420958518982, | |
| "rewards/chosen": -9.604844093322754, | |
| "rewards/margins": 12.866401672363281, | |
| "rewards/rejected": -22.47124481201172, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 26.16, | |
| "grad_norm": 0.007798569742590189, | |
| "learning_rate": 6.653061224489796e-05, | |
| "logits/chosen": -1.1456927061080933, | |
| "logits/rejected": -1.036659598350525, | |
| "logps/chosen": -246.2130126953125, | |
| "logps/rejected": -367.6144714355469, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -10.008074760437012, | |
| "rewards/margins": 12.975547790527344, | |
| "rewards/rejected": -22.983623504638672, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 26.96, | |
| "grad_norm": 0.0026127954479306936, | |
| "learning_rate": 6.244897959183675e-05, | |
| "logits/chosen": -0.9940476417541504, | |
| "logits/rejected": -0.8720202445983887, | |
| "logps/chosen": -234.91995239257812, | |
| "logps/rejected": -348.8143615722656, | |
| "loss": 0.0087, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -9.309648513793945, | |
| "rewards/margins": 12.443445205688477, | |
| "rewards/rejected": -21.753093719482422, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 27.72, | |
| "grad_norm": 0.0053157140500843525, | |
| "learning_rate": 5.836734693877551e-05, | |
| "logits/chosen": -0.9642550945281982, | |
| "logits/rejected": -0.8671989440917969, | |
| "logps/chosen": -247.78762817382812, | |
| "logps/rejected": -368.91265869140625, | |
| "loss": 0.0087, | |
| "rewards/accuracies": 0.9868420958518982, | |
| "rewards/chosen": -10.423563003540039, | |
| "rewards/margins": 12.920208930969238, | |
| "rewards/rejected": -23.34377098083496, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 28.48, | |
| "grad_norm": 0.0024100164882838726, | |
| "learning_rate": 5.428571428571428e-05, | |
| "logits/chosen": -1.0245031118392944, | |
| "logits/rejected": -0.9381424784660339, | |
| "logps/chosen": -250.5137481689453, | |
| "logps/rejected": -367.24285888671875, | |
| "loss": 0.0087, | |
| "rewards/accuracies": 0.9868420958518982, | |
| "rewards/chosen": -10.55148696899414, | |
| "rewards/margins": 13.011177062988281, | |
| "rewards/rejected": -23.562665939331055, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 29.24, | |
| "grad_norm": 0.006509924773126841, | |
| "learning_rate": 5.0204081632653066e-05, | |
| "logits/chosen": -1.0565561056137085, | |
| "logits/rejected": -0.9717170596122742, | |
| "logps/chosen": -231.0986328125, | |
| "logps/rejected": -354.435791015625, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -8.792767524719238, | |
| "rewards/margins": 12.750945091247559, | |
| "rewards/rejected": -21.543716430664062, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "grad_norm": 0.005250515416264534, | |
| "learning_rate": 4.612244897959184e-05, | |
| "logits/chosen": -1.0130590200424194, | |
| "logits/rejected": -0.9052179455757141, | |
| "logps/chosen": -254.4619140625, | |
| "logps/rejected": -371.8909606933594, | |
| "loss": 0.0087, | |
| "rewards/accuracies": 0.9868420958518982, | |
| "rewards/chosen": -10.911393165588379, | |
| "rewards/margins": 12.962776184082031, | |
| "rewards/rejected": -23.874168395996094, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 30.8, | |
| "grad_norm": 0.003937486559152603, | |
| "learning_rate": 4.2040816326530615e-05, | |
| "logits/chosen": -1.068474531173706, | |
| "logits/rejected": -0.9814627766609192, | |
| "logps/chosen": -243.24130249023438, | |
| "logps/rejected": -363.50213623046875, | |
| "loss": 0.0087, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -9.871267318725586, | |
| "rewards/margins": 12.944598197937012, | |
| "rewards/rejected": -22.815866470336914, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 31.56, | |
| "grad_norm": 0.0019006684888154268, | |
| "learning_rate": 3.795918367346939e-05, | |
| "logits/chosen": -0.9314201474189758, | |
| "logits/rejected": -0.8595296740531921, | |
| "logps/chosen": -246.16455078125, | |
| "logps/rejected": -366.99896240234375, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -10.104809761047363, | |
| "rewards/margins": 13.133968353271484, | |
| "rewards/rejected": -23.2387752532959, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 32.32, | |
| "grad_norm": 0.0005793002783320844, | |
| "learning_rate": 3.387755102040816e-05, | |
| "logits/chosen": -1.098380208015442, | |
| "logits/rejected": -0.9939666390419006, | |
| "logps/chosen": -252.05137634277344, | |
| "logps/rejected": -373.0602722167969, | |
| "loss": 0.0087, | |
| "rewards/accuracies": 0.9868420958518982, | |
| "rewards/chosen": -10.78331470489502, | |
| "rewards/margins": 12.870674133300781, | |
| "rewards/rejected": -23.65399169921875, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 33.08, | |
| "grad_norm": 0.0039499541744589806, | |
| "learning_rate": 2.9795918367346944e-05, | |
| "logits/chosen": -0.9925128817558289, | |
| "logits/rejected": -0.8669744729995728, | |
| "logps/chosen": -248.5987548828125, | |
| "logps/rejected": -369.866943359375, | |
| "loss": 0.0087, | |
| "rewards/accuracies": 0.9868420958518982, | |
| "rewards/chosen": -10.700756072998047, | |
| "rewards/margins": 13.224184036254883, | |
| "rewards/rejected": -23.924942016601562, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 33.88, | |
| "grad_norm": 0.0036473730579018593, | |
| "learning_rate": 2.5714285714285714e-05, | |
| "logits/chosen": -1.0634641647338867, | |
| "logits/rejected": -0.9340164065361023, | |
| "logps/chosen": -245.3182830810547, | |
| "logps/rejected": -368.24359130859375, | |
| "loss": 0.0087, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -10.03348445892334, | |
| "rewards/margins": 13.248746871948242, | |
| "rewards/rejected": -23.2822322845459, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 34.64, | |
| "grad_norm": 0.0020049242302775383, | |
| "learning_rate": 2.1632653061224492e-05, | |
| "logits/chosen": -0.9656567573547363, | |
| "logits/rejected": -0.9384468197822571, | |
| "logps/chosen": -248.10145568847656, | |
| "logps/rejected": -374.0014953613281, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -10.594840049743652, | |
| "rewards/margins": 13.160733222961426, | |
| "rewards/rejected": -23.755571365356445, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 35.4, | |
| "grad_norm": 0.0025986225809901953, | |
| "learning_rate": 1.7551020408163266e-05, | |
| "logits/chosen": -1.0361719131469727, | |
| "logits/rejected": -0.925805926322937, | |
| "logps/chosen": -250.07704162597656, | |
| "logps/rejected": -365.67340087890625, | |
| "loss": 0.0173, | |
| "rewards/accuracies": 0.9736841917037964, | |
| "rewards/chosen": -10.339940071105957, | |
| "rewards/margins": 12.819701194763184, | |
| "rewards/rejected": -23.159643173217773, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 36.16, | |
| "grad_norm": 0.000490883132442832, | |
| "learning_rate": 1.3469387755102042e-05, | |
| "logits/chosen": -1.0601824522018433, | |
| "logits/rejected": -0.9313357472419739, | |
| "logps/chosen": -246.32838439941406, | |
| "logps/rejected": -368.8094787597656, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -9.99472427368164, | |
| "rewards/margins": 13.403642654418945, | |
| "rewards/rejected": -23.398366928100586, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 36.96, | |
| "grad_norm": 0.0015991979744285345, | |
| "learning_rate": 9.387755102040816e-06, | |
| "logits/chosen": -0.973480224609375, | |
| "logits/rejected": -0.883003830909729, | |
| "logps/chosen": -242.6292724609375, | |
| "logps/rejected": -364.950927734375, | |
| "loss": 0.0087, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -10.180352210998535, | |
| "rewards/margins": 12.991645812988281, | |
| "rewards/rejected": -23.171995162963867, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 37.72, | |
| "grad_norm": 0.0009250708390027285, | |
| "learning_rate": 5.306122448979592e-06, | |
| "logits/chosen": -0.9940276741981506, | |
| "logits/rejected": -0.9116230010986328, | |
| "logps/chosen": -246.79559326171875, | |
| "logps/rejected": -374.49420166015625, | |
| "loss": 0.0087, | |
| "rewards/accuracies": 0.9868420958518982, | |
| "rewards/chosen": -10.589569091796875, | |
| "rewards/margins": 13.223187446594238, | |
| "rewards/rejected": -23.81275749206543, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 38.48, | |
| "grad_norm": 0.001163547858595848, | |
| "learning_rate": 1.2244897959183673e-06, | |
| "logits/chosen": -1.030373215675354, | |
| "logits/rejected": -0.9079036712646484, | |
| "logps/chosen": -249.69741821289062, | |
| "logps/rejected": -373.4053039550781, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -10.532529830932617, | |
| "rewards/margins": 13.417372703552246, | |
| "rewards/rejected": -23.949905395507812, | |
| "step": 500 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 500, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 39, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |