diff --git "a/modpo/use_reward/0.9helpful_0.1harmless/checkpoint-6000/trainer_state.json" "b/modpo/use_reward/0.9helpful_0.1harmless/checkpoint-6000/trainer_state.json" new file mode 100644--- /dev/null +++ "b/modpo/use_reward/0.9helpful_0.1harmless/checkpoint-6000/trainer_state.json" @@ -0,0 +1,7849 @@ +{ + "best_metric": 0.7478973269462585, + "best_model_checkpoint": "./output/modpo/lm/(0.9)helpful+(1-0.9)harmless/checkpoint-6000", + "epoch": 1.5, + "eval_steps": 3000, + "global_step": 6000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "accuracy": 0.48750001192092896, + "epoch": 0.0, + "learning_rate": 9.999991842021366e-06, + "logps/chosen": -93.38034057617188, + "logps/margins": 11.913398742675781, + "logps/rejected": -105.29373931884766, + "loss": 1.8336, + "rewards/chosen": 5.109144687652588, + "rewards/margins": 0.11058555543422699, + "rewards/rejected": 4.99855899810791, + "step": 10 + }, + { + "accuracy": 0.5, + "epoch": 0.01, + "learning_rate": 9.999951060762224e-06, + "logps/chosen": -135.76004028320312, + "logps/margins": -6.75750732421875, + "logps/rejected": -129.00253295898438, + "loss": 1.902, + "rewards/chosen": 6.5460524559021, + "rewards/margins": -0.06892473995685577, + "rewards/rejected": 6.614976406097412, + "step": 20 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.01, + "learning_rate": 9.999885057043291e-06, + "logps/chosen": -114.8847427368164, + "logps/margins": -1.0581402778625488, + "logps/rejected": -113.82661437988281, + "loss": 1.8892, + "rewards/chosen": 5.887767791748047, + "rewards/margins": 0.1564546674489975, + "rewards/rejected": 5.731313705444336, + "step": 30 + }, + { + "accuracy": 0.5, + "epoch": 0.01, + "learning_rate": 9.999779163833078e-06, + "logps/chosen": -120.33304595947266, + "logps/margins": 2.6488749980926514, + "logps/rejected": -122.98191833496094, + "loss": 1.8469, + "rewards/chosen": 6.306436538696289, + "rewards/margins": 0.2671842575073242, + "rewards/rejected": 6.039252281188965, + "step": 40 + }, + { + "accuracy": 0.5, + "epoch": 0.01, + "learning_rate": 9.999639002125162e-06, + "logps/chosen": -111.88919830322266, + "logps/margins": 2.995131015777588, + "logps/rejected": -114.88432312011719, + "loss": 1.5436, + "rewards/chosen": 6.004732608795166, + "rewards/margins": 0.041681695729494095, + "rewards/rejected": 5.963050842285156, + "step": 50 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.01, + "learning_rate": 9.999464572880208e-06, + "logps/chosen": -102.3431167602539, + "logps/margins": -4.245372772216797, + "logps/rejected": -98.09774017333984, + "loss": 1.6289, + "rewards/chosen": 5.343186855316162, + "rewards/margins": 0.5225377678871155, + "rewards/rejected": 4.82064962387085, + "step": 60 + }, + { + "accuracy": 0.4375, + "epoch": 0.02, + "learning_rate": 9.999255877293756e-06, + "logps/chosen": -94.01886749267578, + "logps/margins": 10.370506286621094, + "logps/rejected": -104.3893814086914, + "loss": 1.6697, + "rewards/chosen": 4.6346635818481445, + "rewards/margins": 0.017035793513059616, + "rewards/rejected": 4.6176276206970215, + "step": 70 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.02, + "learning_rate": 9.999012916796205e-06, + "logps/chosen": -106.21893310546875, + "logps/margins": -1.2109102010726929, + "logps/rejected": -105.00801849365234, + "loss": 1.5927, + "rewards/chosen": 5.781027317047119, + "rewards/margins": 0.2608667016029358, + "rewards/rejected": 5.52016019821167, + "step": 80 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.02, + "learning_rate": 9.998735693052809e-06, + "logps/chosen": -122.74066162109375, + "logps/margins": -12.237990379333496, + "logps/rejected": -110.5026626586914, + "loss": 1.8353, + "rewards/chosen": 6.233822822570801, + "rewards/margins": 0.4213257431983948, + "rewards/rejected": 5.812496662139893, + "step": 90 + }, + { + "accuracy": 0.44999998807907104, + "epoch": 0.03, + "learning_rate": 9.998424207963658e-06, + "logps/chosen": -114.6319580078125, + "logps/margins": 10.312301635742188, + "logps/rejected": -124.94425964355469, + "loss": 1.9814, + "rewards/chosen": 5.596517086029053, + "rewards/margins": -0.6197859644889832, + "rewards/rejected": 6.216302871704102, + "step": 100 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.03, + "learning_rate": 9.998114579692461e-06, + "logps/chosen": -118.44438171386719, + "logps/margins": -1.2328144311904907, + "logps/rejected": -117.21158599853516, + "loss": 1.8886, + "rewards/chosen": 5.977663993835449, + "rewards/margins": 0.4643685221672058, + "rewards/rejected": 5.513296604156494, + "step": 110 + }, + { + "accuracy": 0.5, + "epoch": 0.03, + "learning_rate": 9.997738004122153e-06, + "logps/chosen": -123.51136779785156, + "logps/margins": -12.292215347290039, + "logps/rejected": -111.2191390991211, + "loss": 1.8606, + "rewards/chosen": 6.552567958831787, + "rewards/margins": 0.5511636734008789, + "rewards/rejected": 6.001404762268066, + "step": 120 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.03, + "learning_rate": 9.997327174044255e-06, + "logps/chosen": -120.5638656616211, + "logps/margins": 4.9105987548828125, + "logps/rejected": -125.47447204589844, + "loss": 1.6104, + "rewards/chosen": 6.336331367492676, + "rewards/margins": 0.21816179156303406, + "rewards/rejected": 6.118170261383057, + "step": 130 + }, + { + "accuracy": 0.4375, + "epoch": 0.04, + "learning_rate": 9.996882092274593e-06, + "logps/chosen": -130.65631103515625, + "logps/margins": -0.399627685546875, + "logps/rejected": -130.2566680908203, + "loss": 1.6149, + "rewards/chosen": 6.756723880767822, + "rewards/margins": 0.40364760160446167, + "rewards/rejected": 6.353075981140137, + "step": 140 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.04, + "learning_rate": 9.996402761863761e-06, + "logps/chosen": -126.5574951171875, + "logps/margins": -22.661352157592773, + "logps/rejected": -103.89615631103516, + "loss": 1.9684, + "rewards/chosen": 6.470550537109375, + "rewards/margins": 1.592828392982483, + "rewards/rejected": 4.877722263336182, + "step": 150 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.04, + "learning_rate": 9.995889186097093e-06, + "logps/chosen": -116.50477600097656, + "logps/margins": -6.241939544677734, + "logps/rejected": -110.26285552978516, + "loss": 2.0023, + "rewards/chosen": 6.077791213989258, + "rewards/margins": 0.7048730850219727, + "rewards/rejected": 5.372918128967285, + "step": 160 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.04, + "learning_rate": 9.995341368494632e-06, + "logps/chosen": -107.73106384277344, + "logps/margins": 2.258758544921875, + "logps/rejected": -109.98982238769531, + "loss": 1.9799, + "rewards/chosen": 5.525664329528809, + "rewards/margins": 0.11249864101409912, + "rewards/rejected": 5.41316556930542, + "step": 170 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.04, + "learning_rate": 9.994759312811127e-06, + "logps/chosen": -108.63682556152344, + "logps/margins": 8.882070541381836, + "logps/rejected": -117.5188980102539, + "loss": 1.9222, + "rewards/chosen": 5.461306095123291, + "rewards/margins": -0.3686821460723877, + "rewards/rejected": 5.8299880027771, + "step": 180 + }, + { + "accuracy": 0.44999998807907104, + "epoch": 0.05, + "learning_rate": 9.994143023035987e-06, + "logps/chosen": -127.97639465332031, + "logps/margins": -1.8207848072052002, + "logps/rejected": -126.1556167602539, + "loss": 2.1024, + "rewards/chosen": 6.416459560394287, + "rewards/margins": 0.29830387234687805, + "rewards/rejected": 6.118155479431152, + "step": 190 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.05, + "learning_rate": 9.993492503393263e-06, + "logps/chosen": -103.14668273925781, + "logps/margins": 0.341788113117218, + "logps/rejected": -103.48848724365234, + "loss": 1.8638, + "rewards/chosen": 5.473768711090088, + "rewards/margins": 0.33015722036361694, + "rewards/rejected": 5.143611431121826, + "step": 200 + }, + { + "accuracy": 0.612500011920929, + "epoch": 0.05, + "learning_rate": 9.992807758341618e-06, + "logps/chosen": -133.42172241210938, + "logps/margins": -10.071492195129395, + "logps/rejected": -123.3502197265625, + "loss": 1.9081, + "rewards/chosen": 7.246514320373535, + "rewards/margins": 1.0005252361297607, + "rewards/rejected": 6.2459893226623535, + "step": 210 + }, + { + "accuracy": 0.5, + "epoch": 0.06, + "learning_rate": 9.992088792574298e-06, + "logps/chosen": -118.6468505859375, + "logps/margins": -12.179803848266602, + "logps/rejected": -106.46702575683594, + "loss": 1.6112, + "rewards/chosen": 5.844839572906494, + "rewards/margins": 0.9412676692008972, + "rewards/rejected": 4.903571605682373, + "step": 220 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 0.06, + "learning_rate": 9.991335611019095e-06, + "logps/chosen": -117.39437103271484, + "logps/margins": 8.628446578979492, + "logps/rejected": -126.0228271484375, + "loss": 1.6355, + "rewards/chosen": 6.054928779602051, + "rewards/margins": 0.04293825477361679, + "rewards/rejected": 6.011991024017334, + "step": 230 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.06, + "learning_rate": 9.990548218838316e-06, + "logps/chosen": -139.26937866210938, + "logps/margins": 0.9801594018936157, + "logps/rejected": -140.24954223632812, + "loss": 1.8812, + "rewards/chosen": 7.323525905609131, + "rewards/margins": 0.39287251234054565, + "rewards/rejected": 6.9306535720825195, + "step": 240 + }, + { + "accuracy": 0.4375, + "epoch": 0.06, + "learning_rate": 9.989726621428749e-06, + "logps/chosen": -120.38094329833984, + "logps/margins": 6.584187984466553, + "logps/rejected": -126.96512603759766, + "loss": 2.05, + "rewards/chosen": 5.8485941886901855, + "rewards/margins": -0.35422176122665405, + "rewards/rejected": 6.202816009521484, + "step": 250 + }, + { + "accuracy": 0.375, + "epoch": 0.07, + "learning_rate": 9.988870824421626e-06, + "logps/chosen": -107.37776947021484, + "logps/margins": 21.43488121032715, + "logps/rejected": -128.81265258789062, + "loss": 1.9466, + "rewards/chosen": 5.4471845626831055, + "rewards/margins": -0.8748068809509277, + "rewards/rejected": 6.321991443634033, + "step": 260 + }, + { + "accuracy": 0.5625, + "epoch": 0.07, + "learning_rate": 9.98798083368258e-06, + "logps/chosen": -123.7821044921875, + "logps/margins": 3.1715950965881348, + "logps/rejected": -126.95368957519531, + "loss": 1.891, + "rewards/chosen": 6.255987167358398, + "rewards/margins": 0.27523988485336304, + "rewards/rejected": 5.980746269226074, + "step": 270 + }, + { + "accuracy": 0.42500001192092896, + "epoch": 0.07, + "learning_rate": 9.987056655311611e-06, + "logps/chosen": -85.76194763183594, + "logps/margins": 25.137910842895508, + "logps/rejected": -110.89986419677734, + "loss": 2.0658, + "rewards/chosen": 4.518533706665039, + "rewards/margins": -0.9006655812263489, + "rewards/rejected": 5.419199466705322, + "step": 280 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.07, + "learning_rate": 9.986098295643039e-06, + "logps/chosen": -97.52912902832031, + "logps/margins": -6.8081374168396, + "logps/rejected": -90.72099304199219, + "loss": 1.9598, + "rewards/chosen": 4.835160255432129, + "rewards/margins": 0.4367671608924866, + "rewards/rejected": 4.398393154144287, + "step": 290 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.07, + "learning_rate": 9.985105761245461e-06, + "logps/chosen": -166.57162475585938, + "logps/margins": -12.8536958694458, + "logps/rejected": -153.71792602539062, + "loss": 1.8972, + "rewards/chosen": 7.88947057723999, + "rewards/margins": 0.5230575203895569, + "rewards/rejected": 7.3664140701293945, + "step": 300 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.08, + "learning_rate": 9.984183266512048e-06, + "logps/chosen": -99.32716369628906, + "logps/margins": -9.03243350982666, + "logps/rejected": -90.29472351074219, + "loss": 1.797, + "rewards/chosen": 5.267740726470947, + "rewards/margins": 0.7921133041381836, + "rewards/rejected": 4.4756269454956055, + "step": 310 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.08, + "learning_rate": 9.983125819064725e-06, + "logps/chosen": -121.1572265625, + "logps/margins": -7.001384735107422, + "logps/rejected": -114.15582275390625, + "loss": 1.5913, + "rewards/chosen": 6.0754194259643555, + "rewards/margins": 0.7616773843765259, + "rewards/rejected": 5.313742160797119, + "step": 320 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.08, + "learning_rate": 9.98203421726176e-06, + "logps/chosen": -141.78111267089844, + "logps/margins": -14.702291488647461, + "logps/rejected": -127.07881927490234, + "loss": 1.6056, + "rewards/chosen": 7.1940436363220215, + "rewards/margins": 1.245888590812683, + "rewards/rejected": 5.948155879974365, + "step": 330 + }, + { + "accuracy": 0.42500001192092896, + "epoch": 0.09, + "learning_rate": 9.980908468584996e-06, + "logps/chosen": -113.09223937988281, + "logps/margins": 12.976480484008789, + "logps/rejected": -126.06871032714844, + "loss": 2.0549, + "rewards/chosen": 6.2704925537109375, + "rewards/margins": 0.014498258009552956, + "rewards/rejected": 6.2559943199157715, + "step": 340 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.09, + "learning_rate": 9.979748580750312e-06, + "logps/chosen": -123.32918548583984, + "logps/margins": -6.59561014175415, + "logps/rejected": -116.73358154296875, + "loss": 1.6623, + "rewards/chosen": 6.254411697387695, + "rewards/margins": 0.42041030526161194, + "rewards/rejected": 5.834001064300537, + "step": 350 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.09, + "learning_rate": 9.978554561707585e-06, + "logps/chosen": -108.12400817871094, + "logps/margins": -3.0909698009490967, + "logps/rejected": -105.03304290771484, + "loss": 1.5683, + "rewards/chosen": 5.455635070800781, + "rewards/margins": 0.2906327247619629, + "rewards/rejected": 5.165002346038818, + "step": 360 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.09, + "learning_rate": 9.977326419640625e-06, + "logps/chosen": -119.7560806274414, + "logps/margins": -12.51281452178955, + "logps/rejected": -107.24327087402344, + "loss": 1.8813, + "rewards/chosen": 5.812981605529785, + "rewards/margins": 0.7436602711677551, + "rewards/rejected": 5.069321632385254, + "step": 370 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 0.1, + "learning_rate": 9.976064162967119e-06, + "logps/chosen": -133.084716796875, + "logps/margins": -29.818653106689453, + "logps/rejected": -103.26605224609375, + "loss": 1.9166, + "rewards/chosen": 6.449902534484863, + "rewards/margins": 1.5401164293289185, + "rewards/rejected": 4.909786224365234, + "step": 380 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.1, + "learning_rate": 9.974767800338575e-06, + "logps/chosen": -127.6087646484375, + "logps/margins": 6.958619594573975, + "logps/rejected": -134.5673828125, + "loss": 2.1036, + "rewards/chosen": 6.486325740814209, + "rewards/margins": 0.15423394739627838, + "rewards/rejected": 6.332091808319092, + "step": 390 + }, + { + "accuracy": 0.5625, + "epoch": 0.1, + "learning_rate": 9.97343734064027e-06, + "logps/chosen": -113.54975891113281, + "logps/margins": 3.7485451698303223, + "logps/rejected": -117.2983169555664, + "loss": 1.6023, + "rewards/chosen": 5.503276348114014, + "rewards/margins": -0.21575994789600372, + "rewards/rejected": 5.719037055969238, + "step": 400 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.1, + "learning_rate": 9.972072792991174e-06, + "logps/chosen": -119.14912414550781, + "logps/margins": 2.4335358142852783, + "logps/rejected": -121.5826644897461, + "loss": 1.7093, + "rewards/chosen": 5.5079240798950195, + "rewards/margins": 0.09962362051010132, + "rewards/rejected": 5.408300399780273, + "step": 410 + }, + { + "accuracy": 0.5, + "epoch": 0.1, + "learning_rate": 9.970674166743902e-06, + "logps/chosen": -111.63450622558594, + "logps/margins": 17.147808074951172, + "logps/rejected": -128.7823028564453, + "loss": 1.6341, + "rewards/chosen": 5.540095329284668, + "rewards/margins": -0.38307538628578186, + "rewards/rejected": 5.923171043395996, + "step": 420 + }, + { + "accuracy": 0.5625, + "epoch": 0.11, + "learning_rate": 9.96924147148464e-06, + "logps/chosen": -136.34762573242188, + "logps/margins": -22.253097534179688, + "logps/rejected": -114.09454345703125, + "loss": 1.3059, + "rewards/chosen": 6.375910758972168, + "rewards/margins": 1.2692499160766602, + "rewards/rejected": 5.106661796569824, + "step": 430 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.11, + "learning_rate": 9.967774717033087e-06, + "logps/chosen": -124.4723892211914, + "logps/margins": -15.860456466674805, + "logps/rejected": -108.6119384765625, + "loss": 1.5872, + "rewards/chosen": 5.366484642028809, + "rewards/margins": 0.8536316156387329, + "rewards/rejected": 4.512853145599365, + "step": 440 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.11, + "learning_rate": 9.966273913442378e-06, + "logps/chosen": -133.6871337890625, + "logps/margins": 1.6693938970565796, + "logps/rejected": -135.3565216064453, + "loss": 1.6193, + "rewards/chosen": 6.031922340393066, + "rewards/margins": -0.014263915829360485, + "rewards/rejected": 6.0461859703063965, + "step": 450 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.12, + "learning_rate": 9.964739070999025e-06, + "logps/chosen": -118.98359680175781, + "logps/margins": -11.851815223693848, + "logps/rejected": -107.13179016113281, + "loss": 1.6527, + "rewards/chosen": 5.6537957191467285, + "rewards/margins": 1.0875290632247925, + "rewards/rejected": 4.566267013549805, + "step": 460 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.12, + "learning_rate": 9.963170200222842e-06, + "logps/chosen": -142.10147094726562, + "logps/margins": 4.2556023597717285, + "logps/rejected": -146.35708618164062, + "loss": 1.7692, + "rewards/chosen": 5.802268028259277, + "rewards/margins": -0.14006440341472626, + "rewards/rejected": 5.9423322677612305, + "step": 470 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.12, + "learning_rate": 9.961567311866875e-06, + "logps/chosen": -125.03279113769531, + "logps/margins": -12.079760551452637, + "logps/rejected": -112.95301818847656, + "loss": 1.7471, + "rewards/chosen": 5.379918575286865, + "rewards/margins": 0.44170933961868286, + "rewards/rejected": 4.938209533691406, + "step": 480 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.12, + "learning_rate": 9.959930416917323e-06, + "logps/chosen": -118.28129577636719, + "logps/margins": 4.664868354797363, + "logps/rejected": -122.94615173339844, + "loss": 1.563, + "rewards/chosen": 4.769405364990234, + "rewards/margins": -0.01859002187848091, + "rewards/rejected": 4.787995338439941, + "step": 490 + }, + { + "accuracy": 0.44999998807907104, + "epoch": 0.12, + "learning_rate": 9.958259526593465e-06, + "logps/chosen": -120.03038024902344, + "logps/margins": 13.22276496887207, + "logps/rejected": -133.25315856933594, + "loss": 1.6885, + "rewards/chosen": 4.757286071777344, + "rewards/margins": -0.2749316394329071, + "rewards/rejected": 5.032217979431152, + "step": 500 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.13, + "learning_rate": 9.956554652347591e-06, + "logps/chosen": -124.68904876708984, + "logps/margins": 8.64969539642334, + "logps/rejected": -133.3387451171875, + "loss": 1.6476, + "rewards/chosen": 4.9756550788879395, + "rewards/margins": 0.06309598684310913, + "rewards/rejected": 4.912558555603027, + "step": 510 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.13, + "learning_rate": 9.954815805864911e-06, + "logps/chosen": -136.79421997070312, + "logps/margins": -9.166430473327637, + "logps/rejected": -127.6278076171875, + "loss": 1.4208, + "rewards/chosen": 5.172430515289307, + "rewards/margins": 0.9554030299186707, + "rewards/rejected": 4.217027187347412, + "step": 520 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.13, + "learning_rate": 9.953042999063482e-06, + "logps/chosen": -117.29981994628906, + "logps/margins": -2.5968971252441406, + "logps/rejected": -114.70291900634766, + "loss": 1.1629, + "rewards/chosen": 3.88728666305542, + "rewards/margins": 0.352276086807251, + "rewards/rejected": 3.535010576248169, + "step": 530 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.14, + "learning_rate": 9.951236244094127e-06, + "logps/chosen": -147.85391235351562, + "logps/margins": -23.046194076538086, + "logps/rejected": -124.8077163696289, + "loss": 1.1118, + "rewards/chosen": 4.093080997467041, + "rewards/margins": 0.7096647024154663, + "rewards/rejected": 3.383415937423706, + "step": 540 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.14, + "learning_rate": 9.949395553340349e-06, + "logps/chosen": -137.44674682617188, + "logps/margins": -6.84769344329834, + "logps/rejected": -130.5990753173828, + "loss": 1.1553, + "rewards/chosen": 3.6311817169189453, + "rewards/margins": 0.5994359254837036, + "rewards/rejected": 3.0317459106445312, + "step": 550 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.14, + "learning_rate": 9.947520939418245e-06, + "logps/chosen": -131.57958984375, + "logps/margins": 6.997740268707275, + "logps/rejected": -138.57733154296875, + "loss": 1.2034, + "rewards/chosen": 3.0507590770721436, + "rewards/margins": 0.17111361026763916, + "rewards/rejected": 2.879645824432373, + "step": 560 + }, + { + "accuracy": 0.4375, + "epoch": 0.14, + "learning_rate": 9.945612415176426e-06, + "logps/chosen": -140.58132934570312, + "logps/margins": -19.616926193237305, + "logps/rejected": -120.96439361572266, + "loss": 1.0881, + "rewards/chosen": 2.8486058712005615, + "rewards/margins": 0.25832873582839966, + "rewards/rejected": 2.5902771949768066, + "step": 570 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 0.14, + "learning_rate": 9.943669993695919e-06, + "logps/chosen": -151.5093231201172, + "logps/margins": -18.318321228027344, + "logps/rejected": -133.1909942626953, + "loss": 1.1161, + "rewards/chosen": 2.3254706859588623, + "rewards/margins": 0.2963281571865082, + "rewards/rejected": 2.029142379760742, + "step": 580 + }, + { + "accuracy": 0.5625, + "epoch": 0.15, + "learning_rate": 9.941693688290085e-06, + "logps/chosen": -147.8477020263672, + "logps/margins": 4.324751377105713, + "logps/rejected": -152.1724395751953, + "loss": 1.0862, + "rewards/chosen": 2.0108516216278076, + "rewards/margins": 0.4088834226131439, + "rewards/rejected": 1.6019681692123413, + "step": 590 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.15, + "learning_rate": 9.939683512504528e-06, + "logps/chosen": -149.92286682128906, + "logps/margins": -1.7963898181915283, + "logps/rejected": -148.12648010253906, + "loss": 1.1338, + "rewards/chosen": 1.4565500020980835, + "rewards/margins": -0.2254774123430252, + "rewards/rejected": 1.6820274591445923, + "step": 600 + }, + { + "accuracy": 0.4000000059604645, + "epoch": 0.15, + "learning_rate": 9.937639480116993e-06, + "logps/chosen": -149.3422393798828, + "logps/margins": 5.6771135330200195, + "logps/rejected": -155.01934814453125, + "loss": 1.122, + "rewards/chosen": 1.8707154989242554, + "rewards/margins": -0.23694105446338654, + "rewards/rejected": 2.107656717300415, + "step": 610 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.15, + "learning_rate": 9.935561605137289e-06, + "logps/chosen": -154.9318389892578, + "logps/margins": -26.140798568725586, + "logps/rejected": -128.79103088378906, + "loss": 1.0635, + "rewards/chosen": 1.8631260395050049, + "rewards/margins": 0.3234782814979553, + "rewards/rejected": 1.5396478176116943, + "step": 620 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.16, + "learning_rate": 9.933449901807171e-06, + "logps/chosen": -161.14865112304688, + "logps/margins": 23.34848403930664, + "logps/rejected": -184.49713134765625, + "loss": 0.9723, + "rewards/chosen": 2.019709587097168, + "rewards/margins": -0.17707836627960205, + "rewards/rejected": 2.1967883110046387, + "step": 630 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.16, + "learning_rate": 9.931520457528119e-06, + "logps/chosen": -189.56741333007812, + "logps/margins": -4.537158012390137, + "logps/rejected": -185.0302276611328, + "loss": 0.9951, + "rewards/chosen": 2.1462416648864746, + "rewards/margins": 0.35901370644569397, + "rewards/rejected": 1.7872282266616821, + "step": 640 + }, + { + "accuracy": 0.5, + "epoch": 0.16, + "learning_rate": 9.92934452039859e-06, + "logps/chosen": -164.42535400390625, + "logps/margins": -43.8677864074707, + "logps/rejected": -120.55757904052734, + "loss": 1.1578, + "rewards/chosen": 1.4558143615722656, + "rewards/margins": -0.1470411866903305, + "rewards/rejected": 1.6028554439544678, + "step": 650 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.17, + "learning_rate": 9.927134797530561e-06, + "logps/chosen": -144.9272003173828, + "logps/margins": 6.433265686035156, + "logps/rejected": -151.36045837402344, + "loss": 1.0144, + "rewards/chosen": 1.3912265300750732, + "rewards/margins": 0.060493774712085724, + "rewards/rejected": 1.3307329416275024, + "step": 660 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.17, + "learning_rate": 9.924891304069477e-06, + "logps/chosen": -139.90447998046875, + "logps/margins": -4.661585330963135, + "logps/rejected": -135.24290466308594, + "loss": 0.9865, + "rewards/chosen": 1.7835805416107178, + "rewards/margins": -0.09399493783712387, + "rewards/rejected": 1.8775756359100342, + "step": 670 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.17, + "learning_rate": 9.92261405539225e-06, + "logps/chosen": -162.5684814453125, + "logps/margins": -37.87208557128906, + "logps/rejected": -124.6963882446289, + "loss": 1.0156, + "rewards/chosen": 1.867201566696167, + "rewards/margins": 0.2595711350440979, + "rewards/rejected": 1.6076304912567139, + "step": 680 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.17, + "learning_rate": 9.920303067107145e-06, + "logps/chosen": -157.85446166992188, + "logps/margins": -10.372437477111816, + "logps/rejected": -147.48202514648438, + "loss": 1.0458, + "rewards/chosen": 2.28122615814209, + "rewards/margins": 0.5393844842910767, + "rewards/rejected": 1.7418416738510132, + "step": 690 + }, + { + "accuracy": 0.625, + "epoch": 0.17, + "learning_rate": 9.917958355053681e-06, + "logps/chosen": -168.91946411132812, + "logps/margins": -25.6357479095459, + "logps/rejected": -143.28372192382812, + "loss": 1.0233, + "rewards/chosen": 2.2451210021972656, + "rewards/margins": 0.22837433218955994, + "rewards/rejected": 2.016746997833252, + "step": 700 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.18, + "learning_rate": 9.915579935302521e-06, + "logps/chosen": -142.89210510253906, + "logps/margins": 1.3719890117645264, + "logps/rejected": -144.2640838623047, + "loss": 1.0359, + "rewards/chosen": 1.5756165981292725, + "rewards/margins": 0.19118764996528625, + "rewards/rejected": 1.3844289779663086, + "step": 710 + }, + { + "accuracy": 0.4375, + "epoch": 0.18, + "learning_rate": 9.913167824155358e-06, + "logps/chosen": -140.5428924560547, + "logps/margins": -9.356057167053223, + "logps/rejected": -131.1868438720703, + "loss": 0.9801, + "rewards/chosen": 1.5717885494232178, + "rewards/margins": -0.061538565903902054, + "rewards/rejected": 1.6333271265029907, + "step": 720 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.18, + "learning_rate": 9.910722038144809e-06, + "logps/chosen": -147.37010192871094, + "logps/margins": 3.831080675125122, + "logps/rejected": -151.201171875, + "loss": 0.9843, + "rewards/chosen": 1.8470379114151, + "rewards/margins": 0.03712614253163338, + "rewards/rejected": 1.8099114894866943, + "step": 730 + }, + { + "accuracy": 0.44999998807907104, + "epoch": 0.18, + "learning_rate": 9.908242594034293e-06, + "logps/chosen": -140.7128448486328, + "logps/margins": 5.858675479888916, + "logps/rejected": -146.57150268554688, + "loss": 1.0252, + "rewards/chosen": 1.9413522481918335, + "rewards/margins": -0.4406326413154602, + "rewards/rejected": 2.3819847106933594, + "step": 740 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.19, + "learning_rate": 9.905729508817931e-06, + "logps/chosen": -159.4577178955078, + "logps/margins": -2.965106248855591, + "logps/rejected": -156.4925994873047, + "loss": 1.0375, + "rewards/chosen": 1.6923128366470337, + "rewards/margins": 0.30112066864967346, + "rewards/rejected": 1.3911921977996826, + "step": 750 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.19, + "learning_rate": 9.90318279972041e-06, + "logps/chosen": -146.78695678710938, + "logps/margins": 10.177630424499512, + "logps/rejected": -156.96456909179688, + "loss": 0.9355, + "rewards/chosen": 1.7363805770874023, + "rewards/margins": 0.20642626285552979, + "rewards/rejected": 1.5299543142318726, + "step": 760 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.19, + "learning_rate": 9.900862027536175e-06, + "logps/chosen": -180.95803833007812, + "logps/margins": -19.27467918395996, + "logps/rejected": -161.683349609375, + "loss": 1.0141, + "rewards/chosen": 2.0001168251037598, + "rewards/margins": 0.4025591015815735, + "rewards/rejected": 1.597557783126831, + "step": 770 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.2, + "learning_rate": 9.898251481343773e-06, + "logps/chosen": -177.97366333007812, + "logps/margins": 13.344332695007324, + "logps/rejected": -191.31797790527344, + "loss": 1.0125, + "rewards/chosen": 1.648980736732483, + "rewards/margins": 0.008140301331877708, + "rewards/rejected": 1.6408401727676392, + "step": 780 + }, + { + "accuracy": 0.4375, + "epoch": 0.2, + "learning_rate": 9.895607362524631e-06, + "logps/chosen": -181.28817749023438, + "logps/margins": -24.692012786865234, + "logps/rejected": -156.59619140625, + "loss": 0.9706, + "rewards/chosen": 1.7742103338241577, + "rewards/margins": -0.08051016181707382, + "rewards/rejected": 1.8547203540802002, + "step": 790 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.2, + "learning_rate": 9.892929689201545e-06, + "logps/chosen": -178.93397521972656, + "logps/margins": -11.522749900817871, + "logps/rejected": -167.41122436523438, + "loss": 0.9685, + "rewards/chosen": 1.7500654458999634, + "rewards/margins": -0.028841054067015648, + "rewards/rejected": 1.7789065837860107, + "step": 800 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.2, + "learning_rate": 9.890218479727294e-06, + "logps/chosen": -177.42764282226562, + "logps/margins": -15.491392135620117, + "logps/rejected": -161.93624877929688, + "loss": 0.9007, + "rewards/chosen": 2.0393176078796387, + "rewards/margins": 0.34074297547340393, + "rewards/rejected": 1.6985746622085571, + "step": 810 + }, + { + "accuracy": 0.4375, + "epoch": 0.2, + "learning_rate": 9.887473752684515e-06, + "logps/chosen": -161.78909301757812, + "logps/margins": -2.778823137283325, + "logps/rejected": -159.01028442382812, + "loss": 1.0436, + "rewards/chosen": 1.682908058166504, + "rewards/margins": -0.168172687292099, + "rewards/rejected": 1.8510808944702148, + "step": 820 + }, + { + "accuracy": 0.38749998807907104, + "epoch": 0.21, + "learning_rate": 9.884695526885574e-06, + "logps/chosen": -151.64749145507812, + "logps/margins": -0.1611984223127365, + "logps/rejected": -151.48629760742188, + "loss": 0.9551, + "rewards/chosen": 1.3659042119979858, + "rewards/margins": -0.38882842659950256, + "rewards/rejected": 1.7547328472137451, + "step": 830 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.21, + "learning_rate": 9.881883821372436e-06, + "logps/chosen": -140.57131958007812, + "logps/margins": -5.530011177062988, + "logps/rejected": -135.0413055419922, + "loss": 0.9358, + "rewards/chosen": 1.2723175287246704, + "rewards/margins": -0.11282980442047119, + "rewards/rejected": 1.3851473331451416, + "step": 840 + }, + { + "accuracy": 0.4124999940395355, + "epoch": 0.21, + "learning_rate": 9.879038655416536e-06, + "logps/chosen": -179.29159545898438, + "logps/margins": -5.520005226135254, + "logps/rejected": -173.77159118652344, + "loss": 1.0266, + "rewards/chosen": 1.6179869174957275, + "rewards/margins": -0.3334459364414215, + "rewards/rejected": 1.9514325857162476, + "step": 850 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 0.21, + "learning_rate": 9.876160048518646e-06, + "logps/chosen": -146.00396728515625, + "logps/margins": -1.2213438749313354, + "logps/rejected": -144.7826385498047, + "loss": 0.8996, + "rewards/chosen": 1.306060791015625, + "rewards/margins": -0.033823203295469284, + "rewards/rejected": 1.3398840427398682, + "step": 860 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.22, + "learning_rate": 9.87324802040875e-06, + "logps/chosen": -150.81924438476562, + "logps/margins": -2.4973397254943848, + "logps/rejected": -148.3218994140625, + "loss": 1.0671, + "rewards/chosen": 1.421643853187561, + "rewards/margins": 0.08586404472589493, + "rewards/rejected": 1.3357797861099243, + "step": 870 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.22, + "learning_rate": 9.870302591045892e-06, + "logps/chosen": -180.90975952148438, + "logps/margins": -19.462383270263672, + "logps/rejected": -161.44740295410156, + "loss": 0.8835, + "rewards/chosen": 1.8036632537841797, + "rewards/margins": 0.12437786906957626, + "rewards/rejected": 1.6792854070663452, + "step": 880 + }, + { + "accuracy": 0.4000000059604645, + "epoch": 0.22, + "learning_rate": 9.86732378061805e-06, + "logps/chosen": -160.3754119873047, + "logps/margins": -9.997312545776367, + "logps/rejected": -150.37808227539062, + "loss": 0.9699, + "rewards/chosen": 1.781908392906189, + "rewards/margins": -0.18765194714069366, + "rewards/rejected": 1.9695602655410767, + "step": 890 + }, + { + "accuracy": 0.4000000059604645, + "epoch": 0.23, + "learning_rate": 9.864311609542002e-06, + "logps/chosen": -127.0546646118164, + "logps/margins": 17.639873504638672, + "logps/rejected": -144.69454956054688, + "loss": 0.9206, + "rewards/chosen": 1.5364660024642944, + "rewards/margins": -0.029984693974256516, + "rewards/rejected": 1.566450834274292, + "step": 900 + }, + { + "accuracy": 0.5, + "epoch": 0.23, + "learning_rate": 9.861266098463169e-06, + "logps/chosen": -147.8013153076172, + "logps/margins": -11.108197212219238, + "logps/rejected": -136.693115234375, + "loss": 0.9103, + "rewards/chosen": 2.1111650466918945, + "rewards/margins": 0.1486923098564148, + "rewards/rejected": 1.962472677230835, + "step": 910 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.23, + "learning_rate": 9.858187268255496e-06, + "logps/chosen": -140.09344482421875, + "logps/margins": -4.577602386474609, + "logps/rejected": -135.51583862304688, + "loss": 0.9162, + "rewards/chosen": 2.0921835899353027, + "rewards/margins": 0.14385966956615448, + "rewards/rejected": 1.9483239650726318, + "step": 920 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.23, + "learning_rate": 9.85507514002129e-06, + "logps/chosen": -196.1652374267578, + "logps/margins": -25.233646392822266, + "logps/rejected": -170.93161010742188, + "loss": 0.8237, + "rewards/chosen": 2.5406811237335205, + "rewards/margins": 0.5653001070022583, + "rewards/rejected": 1.9753808975219727, + "step": 930 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.23, + "learning_rate": 9.851929735091086e-06, + "logps/chosen": -163.54891967773438, + "logps/margins": -39.696372985839844, + "logps/rejected": -123.85255432128906, + "loss": 0.8814, + "rewards/chosen": 1.9840795993804932, + "rewards/margins": 0.36643046140670776, + "rewards/rejected": 1.6176488399505615, + "step": 940 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.24, + "learning_rate": 9.848751075023494e-06, + "logps/chosen": -152.39613342285156, + "logps/margins": -6.493553161621094, + "logps/rejected": -145.90255737304688, + "loss": 0.9086, + "rewards/chosen": 2.0433950424194336, + "rewards/margins": 0.1634274423122406, + "rewards/rejected": 1.8799673318862915, + "step": 950 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.24, + "learning_rate": 9.84553918160506e-06, + "logps/chosen": -157.0005645751953, + "logps/margins": 5.283432960510254, + "logps/rejected": -162.2840118408203, + "loss": 0.9004, + "rewards/chosen": 2.185926914215088, + "rewards/margins": 0.35132330656051636, + "rewards/rejected": 1.8346033096313477, + "step": 960 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.24, + "learning_rate": 9.842294076850113e-06, + "logps/chosen": -161.79470825195312, + "logps/margins": 6.562838077545166, + "logps/rejected": -168.35755920410156, + "loss": 0.9151, + "rewards/chosen": 1.789467453956604, + "rewards/margins": 0.3674093782901764, + "rewards/rejected": 1.4220579862594604, + "step": 970 + }, + { + "accuracy": 0.5625, + "epoch": 0.24, + "learning_rate": 9.839015783000597e-06, + "logps/chosen": -175.66624450683594, + "logps/margins": -25.768230438232422, + "logps/rejected": -149.89797973632812, + "loss": 0.8288, + "rewards/chosen": 1.8941459655761719, + "rewards/margins": 0.39253947138786316, + "rewards/rejected": 1.5016063451766968, + "step": 980 + }, + { + "accuracy": 0.625, + "epoch": 0.25, + "learning_rate": 9.835704322525954e-06, + "logps/chosen": -167.6163787841797, + "logps/margins": -3.1078941822052, + "logps/rejected": -164.5084991455078, + "loss": 0.8941, + "rewards/chosen": 1.8167266845703125, + "rewards/margins": 0.3749098479747772, + "rewards/rejected": 1.441817045211792, + "step": 990 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.25, + "learning_rate": 9.832359718122939e-06, + "logps/chosen": -158.74981689453125, + "logps/margins": -21.72418785095215, + "logps/rejected": -137.025634765625, + "loss": 0.8179, + "rewards/chosen": 1.9152021408081055, + "rewards/margins": 0.3552405536174774, + "rewards/rejected": 1.5599615573883057, + "step": 1000 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.25, + "learning_rate": 9.828981992715479e-06, + "logps/chosen": -145.79006958007812, + "logps/margins": -7.068762302398682, + "logps/rejected": -138.72128295898438, + "loss": 0.8845, + "rewards/chosen": 2.168658494949341, + "rewards/margins": 0.2922838628292084, + "rewards/rejected": 1.87637460231781, + "step": 1010 + }, + { + "accuracy": 0.612500011920929, + "epoch": 0.26, + "learning_rate": 9.825571169454511e-06, + "logps/chosen": -168.78416442871094, + "logps/margins": -13.750897407531738, + "logps/rejected": -155.0332489013672, + "loss": 0.8311, + "rewards/chosen": 2.1091105937957764, + "rewards/margins": 0.5144209861755371, + "rewards/rejected": 1.5946893692016602, + "step": 1020 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.26, + "learning_rate": 9.822127271717825e-06, + "logps/chosen": -136.5363311767578, + "logps/margins": 1.720564842224121, + "logps/rejected": -138.2569122314453, + "loss": 0.8649, + "rewards/chosen": 1.9865185022354126, + "rewards/margins": 0.4811386466026306, + "rewards/rejected": 1.5053794384002686, + "step": 1030 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.26, + "learning_rate": 9.818650323109904e-06, + "logps/chosen": -170.8836669921875, + "logps/margins": 10.731355667114258, + "logps/rejected": -181.61502075195312, + "loss": 0.8166, + "rewards/chosen": 1.8703758716583252, + "rewards/margins": 0.29043930768966675, + "rewards/rejected": 1.5799365043640137, + "step": 1040 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.26, + "learning_rate": 9.815140347461764e-06, + "logps/chosen": -140.1612091064453, + "logps/margins": -8.15324592590332, + "logps/rejected": -132.00796508789062, + "loss": 0.8072, + "rewards/chosen": 1.5995724201202393, + "rewards/margins": 0.1591363251209259, + "rewards/rejected": 1.4404361248016357, + "step": 1050 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 0.27, + "learning_rate": 9.811597368830783e-06, + "logps/chosen": -139.02783203125, + "logps/margins": 1.2959182262420654, + "logps/rejected": -140.32376098632812, + "loss": 0.9571, + "rewards/chosen": 1.4544225931167603, + "rewards/margins": -0.052755583077669144, + "rewards/rejected": 1.5071781873703003, + "step": 1060 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.27, + "learning_rate": 9.808021411500546e-06, + "logps/chosen": -136.28179931640625, + "logps/margins": -6.942041873931885, + "logps/rejected": -129.3397674560547, + "loss": 0.861, + "rewards/chosen": 1.8051027059555054, + "rewards/margins": 0.35802793502807617, + "rewards/rejected": 1.4470746517181396, + "step": 1070 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.27, + "learning_rate": 9.80441249998067e-06, + "logps/chosen": -162.54974365234375, + "logps/margins": -6.373490333557129, + "logps/rejected": -156.17626953125, + "loss": 0.8857, + "rewards/chosen": 1.5471500158309937, + "rewards/margins": 0.1165885329246521, + "rewards/rejected": 1.4305615425109863, + "step": 1080 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.27, + "learning_rate": 9.800770659006646e-06, + "logps/chosen": -178.4582977294922, + "logps/margins": -22.30353355407715, + "logps/rejected": -156.15475463867188, + "loss": 0.8928, + "rewards/chosen": 1.782330870628357, + "rewards/margins": 0.24549439549446106, + "rewards/rejected": 1.5368363857269287, + "step": 1090 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 0.28, + "learning_rate": 9.79709591353966e-06, + "logps/chosen": -138.05850219726562, + "logps/margins": 25.72446060180664, + "logps/rejected": -163.782958984375, + "loss": 0.9741, + "rewards/chosen": 1.8192415237426758, + "rewards/margins": -0.07427935302257538, + "rewards/rejected": 1.8935210704803467, + "step": 1100 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.28, + "learning_rate": 9.793388288766426e-06, + "logps/chosen": -162.1980743408203, + "logps/margins": -8.0133638381958, + "logps/rejected": -154.18472290039062, + "loss": 0.823, + "rewards/chosen": 1.9859386682510376, + "rewards/margins": 0.21398241817951202, + "rewards/rejected": 1.771956205368042, + "step": 1110 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.28, + "learning_rate": 9.78964781009901e-06, + "logps/chosen": -154.9339599609375, + "logps/margins": 22.017454147338867, + "logps/rejected": -176.951416015625, + "loss": 0.8848, + "rewards/chosen": 1.8680915832519531, + "rewards/margins": 0.042823903262615204, + "rewards/rejected": 1.8252675533294678, + "step": 1120 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.28, + "learning_rate": 9.785874503174662e-06, + "logps/chosen": -144.37045288085938, + "logps/margins": -26.884754180908203, + "logps/rejected": -117.4857177734375, + "loss": 0.9758, + "rewards/chosen": 1.4734928607940674, + "rewards/margins": 0.02336726151406765, + "rewards/rejected": 1.4501255750656128, + "step": 1130 + }, + { + "accuracy": 0.5, + "epoch": 0.28, + "learning_rate": 9.782068393855638e-06, + "logps/chosen": -136.8752899169922, + "logps/margins": 5.071300983428955, + "logps/rejected": -141.9465789794922, + "loss": 0.9041, + "rewards/chosen": 1.4244577884674072, + "rewards/margins": -0.05741150304675102, + "rewards/rejected": 1.4818692207336426, + "step": 1140 + }, + { + "accuracy": 0.5, + "epoch": 0.29, + "learning_rate": 9.778229508229018e-06, + "logps/chosen": -163.36964416503906, + "logps/margins": -4.506762504577637, + "logps/rejected": -158.86288452148438, + "loss": 0.8715, + "rewards/chosen": 1.357474684715271, + "rewards/margins": -0.045003920793533325, + "rewards/rejected": 1.402478575706482, + "step": 1150 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.29, + "learning_rate": 9.774357872606535e-06, + "logps/chosen": -186.77279663085938, + "logps/margins": -25.582441329956055, + "logps/rejected": -161.19036865234375, + "loss": 0.9185, + "rewards/chosen": 1.7494417428970337, + "rewards/margins": 0.10148294270038605, + "rewards/rejected": 1.647958755493164, + "step": 1160 + }, + { + "accuracy": 0.5, + "epoch": 0.29, + "learning_rate": 9.770453513524386e-06, + "logps/chosen": -146.69717407226562, + "logps/margins": -8.776657104492188, + "logps/rejected": -137.92051696777344, + "loss": 0.9027, + "rewards/chosen": 1.8887031078338623, + "rewards/margins": -0.03893054649233818, + "rewards/rejected": 1.9276340007781982, + "step": 1170 + }, + { + "accuracy": 0.5, + "epoch": 0.29, + "learning_rate": 9.766516457743058e-06, + "logps/chosen": -160.04222106933594, + "logps/margins": 5.474094390869141, + "logps/rejected": -165.5163116455078, + "loss": 0.9204, + "rewards/chosen": 1.7510411739349365, + "rewards/margins": 0.10351963341236115, + "rewards/rejected": 1.6475216150283813, + "step": 1180 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.3, + "learning_rate": 9.762546732247141e-06, + "logps/chosen": -142.36509704589844, + "logps/margins": -12.649444580078125, + "logps/rejected": -129.7156524658203, + "loss": 0.8391, + "rewards/chosen": 1.8815829753875732, + "rewards/margins": 0.42103347182273865, + "rewards/rejected": 1.4605494737625122, + "step": 1190 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 0.3, + "learning_rate": 9.758544364245142e-06, + "logps/chosen": -183.9442138671875, + "logps/margins": -11.90736198425293, + "logps/rejected": -172.03683471679688, + "loss": 0.8927, + "rewards/chosen": 2.1199896335601807, + "rewards/margins": -0.1310155689716339, + "rewards/rejected": 2.251004934310913, + "step": 1200 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 0.3, + "learning_rate": 9.7545093811693e-06, + "logps/chosen": -144.1505126953125, + "logps/margins": -5.138739109039307, + "logps/rejected": -139.0117950439453, + "loss": 0.8977, + "rewards/chosen": 1.6401937007904053, + "rewards/margins": -0.007590307388454676, + "rewards/rejected": 1.6477839946746826, + "step": 1210 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.3, + "learning_rate": 9.750441810675398e-06, + "logps/chosen": -144.27529907226562, + "logps/margins": 1.4390274286270142, + "logps/rejected": -145.71432495117188, + "loss": 0.8574, + "rewards/chosen": 1.7657264471054077, + "rewards/margins": -0.02588719129562378, + "rewards/rejected": 1.7916133403778076, + "step": 1220 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.31, + "learning_rate": 9.746341680642572e-06, + "logps/chosen": -144.88253784179688, + "logps/margins": -5.341070652008057, + "logps/rejected": -139.54147338867188, + "loss": 0.8674, + "rewards/chosen": 1.7323579788208008, + "rewards/margins": 0.19564639031887054, + "rewards/rejected": 1.5367116928100586, + "step": 1230 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.31, + "learning_rate": 9.742209019173125e-06, + "logps/chosen": -157.19650268554688, + "logps/margins": -16.224735260009766, + "logps/rejected": -140.97178649902344, + "loss": 0.9133, + "rewards/chosen": 1.8641220331192017, + "rewards/margins": -0.02793651446700096, + "rewards/rejected": 1.8920583724975586, + "step": 1240 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.31, + "learning_rate": 9.738043854592327e-06, + "logps/chosen": -186.19308471679688, + "logps/margins": 13.080360412597656, + "logps/rejected": -199.27345275878906, + "loss": 0.8, + "rewards/chosen": 2.0433402061462402, + "rewards/margins": 0.1339813470840454, + "rewards/rejected": 1.9093587398529053, + "step": 1250 + }, + { + "accuracy": 0.612500011920929, + "epoch": 0.32, + "learning_rate": 9.733846215448226e-06, + "logps/chosen": -166.62429809570312, + "logps/margins": -30.089427947998047, + "logps/rejected": -136.5348663330078, + "loss": 0.8331, + "rewards/chosen": 1.8805596828460693, + "rewards/margins": 0.24542757868766785, + "rewards/rejected": 1.6351318359375, + "step": 1260 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.32, + "learning_rate": 9.72961613051145e-06, + "logps/chosen": -171.31813049316406, + "logps/margins": -6.958993434906006, + "logps/rejected": -164.35914611816406, + "loss": 0.852, + "rewards/chosen": 2.167374610900879, + "rewards/margins": 0.27035635709762573, + "rewards/rejected": 1.8970181941986084, + "step": 1270 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.32, + "learning_rate": 9.72535362877501e-06, + "logps/chosen": -154.11856079101562, + "logps/margins": -23.472820281982422, + "logps/rejected": -130.64576721191406, + "loss": 0.775, + "rewards/chosen": 1.6765483617782593, + "rewards/margins": 0.23718352615833282, + "rewards/rejected": 1.439365029335022, + "step": 1280 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.32, + "learning_rate": 9.721058739454104e-06, + "logps/chosen": -145.79397583007812, + "logps/margins": -10.878741264343262, + "logps/rejected": -134.9152374267578, + "loss": 1.0619, + "rewards/chosen": 2.1911463737487793, + "rewards/margins": 0.34823596477508545, + "rewards/rejected": 1.8429104089736938, + "step": 1290 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.33, + "learning_rate": 9.716731491985912e-06, + "logps/chosen": -170.170166015625, + "logps/margins": -16.27873992919922, + "logps/rejected": -153.89141845703125, + "loss": 0.8089, + "rewards/chosen": 1.8226245641708374, + "rewards/margins": 0.2551000118255615, + "rewards/rejected": 1.5675245523452759, + "step": 1300 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.33, + "learning_rate": 9.712371916029398e-06, + "logps/chosen": -122.67264556884766, + "logps/margins": -7.548191070556641, + "logps/rejected": -115.12443542480469, + "loss": 0.8954, + "rewards/chosen": 1.513864278793335, + "rewards/margins": 0.12121949344873428, + "rewards/rejected": 1.392645001411438, + "step": 1310 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.33, + "learning_rate": 9.707980041465107e-06, + "logps/chosen": -140.32586669921875, + "logps/margins": 39.533992767333984, + "logps/rejected": -179.85984802246094, + "loss": 0.8803, + "rewards/chosen": 1.7652714252471924, + "rewards/margins": 0.18915463984012604, + "rewards/rejected": 1.5761165618896484, + "step": 1320 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.33, + "learning_rate": 9.703555898394953e-06, + "logps/chosen": -129.5006103515625, + "logps/margins": 2.545534610748291, + "logps/rejected": -132.04612731933594, + "loss": 0.8371, + "rewards/chosen": 1.8392177820205688, + "rewards/margins": 0.05049833655357361, + "rewards/rejected": 1.7887195348739624, + "step": 1330 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 0.34, + "learning_rate": 9.699099517142028e-06, + "logps/chosen": -156.70346069335938, + "logps/margins": 4.285260200500488, + "logps/rejected": -160.9887237548828, + "loss": 0.8255, + "rewards/chosen": 1.8164058923721313, + "rewards/margins": -0.15966984629631042, + "rewards/rejected": 1.9760758876800537, + "step": 1340 + }, + { + "accuracy": 0.44999998807907104, + "epoch": 0.34, + "learning_rate": 9.694610928250374e-06, + "logps/chosen": -164.55393981933594, + "logps/margins": 1.9614883661270142, + "logps/rejected": -166.5154266357422, + "loss": 0.8349, + "rewards/chosen": 1.6731479167938232, + "rewards/margins": -0.0794978216290474, + "rewards/rejected": 1.752645492553711, + "step": 1350 + }, + { + "accuracy": 0.5625, + "epoch": 0.34, + "learning_rate": 9.690090162484795e-06, + "logps/chosen": -150.24972534179688, + "logps/margins": 22.32929039001465, + "logps/rejected": -172.57901000976562, + "loss": 0.8213, + "rewards/chosen": 1.6465768814086914, + "rewards/margins": 0.19454586505889893, + "rewards/rejected": 1.452030897140503, + "step": 1360 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.34, + "learning_rate": 9.685537250830625e-06, + "logps/chosen": -147.19168090820312, + "logps/margins": -8.287646293640137, + "logps/rejected": -138.90403747558594, + "loss": 0.8349, + "rewards/chosen": 2.058687925338745, + "rewards/margins": 0.2557194232940674, + "rewards/rejected": 1.802968978881836, + "step": 1370 + }, + { + "accuracy": 0.5625, + "epoch": 0.34, + "learning_rate": 9.680952224493534e-06, + "logps/chosen": -178.67041015625, + "logps/margins": -31.229793548583984, + "logps/rejected": -147.44061279296875, + "loss": 0.7511, + "rewards/chosen": 2.066157817840576, + "rewards/margins": 0.2523805499076843, + "rewards/rejected": 1.8137773275375366, + "step": 1380 + }, + { + "accuracy": 0.637499988079071, + "epoch": 0.35, + "learning_rate": 9.676335114899301e-06, + "logps/chosen": -139.00100708007812, + "logps/margins": 17.114728927612305, + "logps/rejected": -156.11573791503906, + "loss": 0.8169, + "rewards/chosen": 1.6733551025390625, + "rewards/margins": 0.125624418258667, + "rewards/rejected": 1.547730803489685, + "step": 1390 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.35, + "learning_rate": 9.671685953693606e-06, + "logps/chosen": -146.45240783691406, + "logps/margins": -3.9639840126037598, + "logps/rejected": -142.4884033203125, + "loss": 0.824, + "rewards/chosen": 1.6949371099472046, + "rewards/margins": 0.1053905338048935, + "rewards/rejected": 1.58954656124115, + "step": 1400 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.35, + "learning_rate": 9.66700477274181e-06, + "logps/chosen": -191.86099243164062, + "logps/margins": -63.5861701965332, + "logps/rejected": -128.27481079101562, + "loss": 0.7761, + "rewards/chosen": 1.8620039224624634, + "rewards/margins": 0.4337918758392334, + "rewards/rejected": 1.4282119274139404, + "step": 1410 + }, + { + "accuracy": 0.44999998807907104, + "epoch": 0.35, + "learning_rate": 9.662291604128739e-06, + "logps/chosen": -151.45095825195312, + "logps/margins": -31.19417381286621, + "logps/rejected": -120.25679016113281, + "loss": 0.8699, + "rewards/chosen": 2.004847288131714, + "rewards/margins": 0.2191537618637085, + "rewards/rejected": 1.7856934070587158, + "step": 1420 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.36, + "learning_rate": 9.65754648015846e-06, + "logps/chosen": -161.12332153320312, + "logps/margins": -21.93655014038086, + "logps/rejected": -139.18673706054688, + "loss": 0.9375, + "rewards/chosen": 1.6424764394760132, + "rewards/margins": -0.1259138137102127, + "rewards/rejected": 1.7683902978897095, + "step": 1430 + }, + { + "accuracy": 0.675000011920929, + "epoch": 0.36, + "learning_rate": 9.652769433354065e-06, + "logps/chosen": -161.1542510986328, + "logps/margins": -8.030868530273438, + "logps/rejected": -153.12338256835938, + "loss": 0.8442, + "rewards/chosen": 1.9806150197982788, + "rewards/margins": 0.5255073308944702, + "rewards/rejected": 1.4551074504852295, + "step": 1440 + }, + { + "accuracy": 0.4375, + "epoch": 0.36, + "learning_rate": 9.647960496457444e-06, + "logps/chosen": -166.8245086669922, + "logps/margins": -14.479487419128418, + "logps/rejected": -152.34503173828125, + "loss": 0.9106, + "rewards/chosen": 1.7062251567840576, + "rewards/margins": -0.04488358646631241, + "rewards/rejected": 1.7511088848114014, + "step": 1450 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.36, + "learning_rate": 9.643119702429057e-06, + "logps/chosen": -173.20440673828125, + "logps/margins": -28.684823989868164, + "logps/rejected": -144.5195770263672, + "loss": 0.824, + "rewards/chosen": 1.960404634475708, + "rewards/margins": 0.31568005681037903, + "rewards/rejected": 1.6447244882583618, + "step": 1460 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.37, + "learning_rate": 9.638247084447723e-06, + "logps/chosen": -164.98948669433594, + "logps/margins": -8.795886039733887, + "logps/rejected": -156.193603515625, + "loss": 0.8262, + "rewards/chosen": 2.0697386264801025, + "rewards/margins": 0.5673502087593079, + "rewards/rejected": 1.50238835811615, + "step": 1470 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.37, + "learning_rate": 9.633342675910374e-06, + "logps/chosen": -153.81039428710938, + "logps/margins": -25.131587982177734, + "logps/rejected": -128.67880249023438, + "loss": 0.8208, + "rewards/chosen": 1.3760143518447876, + "rewards/margins": 0.07183907926082611, + "rewards/rejected": 1.3041752576828003, + "step": 1480 + }, + { + "accuracy": 0.5625, + "epoch": 0.37, + "learning_rate": 9.628406510431836e-06, + "logps/chosen": -151.3573760986328, + "logps/margins": -28.149948120117188, + "logps/rejected": -123.2074203491211, + "loss": 0.8767, + "rewards/chosen": 1.5835094451904297, + "rewards/margins": 0.44074511528015137, + "rewards/rejected": 1.1427643299102783, + "step": 1490 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.38, + "learning_rate": 9.6234386218446e-06, + "logps/chosen": -129.8954620361328, + "logps/margins": 9.33775520324707, + "logps/rejected": -139.23321533203125, + "loss": 0.8792, + "rewards/chosen": 1.6803743839263916, + "rewards/margins": 0.04734629765152931, + "rewards/rejected": 1.6330280303955078, + "step": 1500 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.38, + "learning_rate": 9.618439044198587e-06, + "logps/chosen": -145.76075744628906, + "logps/margins": -8.094935417175293, + "logps/rejected": -137.6658172607422, + "loss": 0.8854, + "rewards/chosen": 1.7104465961456299, + "rewards/margins": 0.07147760689258575, + "rewards/rejected": 1.638968825340271, + "step": 1510 + }, + { + "accuracy": 0.5, + "epoch": 0.38, + "learning_rate": 9.61340781176091e-06, + "logps/chosen": -149.36721801757812, + "logps/margins": -21.91195297241211, + "logps/rejected": -127.45526123046875, + "loss": 0.7673, + "rewards/chosen": 1.8591524362564087, + "rewards/margins": 0.3141605257987976, + "rewards/rejected": 1.5449917316436768, + "step": 1520 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.38, + "learning_rate": 9.608344959015649e-06, + "logps/chosen": -147.9269561767578, + "logps/margins": 7.347940921783447, + "logps/rejected": -155.27488708496094, + "loss": 0.8568, + "rewards/chosen": 1.672999382019043, + "rewards/margins": -0.042715318500995636, + "rewards/rejected": 1.715714693069458, + "step": 1530 + }, + { + "accuracy": 0.44999998807907104, + "epoch": 0.39, + "learning_rate": 9.60325052066361e-06, + "logps/chosen": -151.431396484375, + "logps/margins": 4.354189395904541, + "logps/rejected": -155.78561401367188, + "loss": 0.848, + "rewards/chosen": 1.6201680898666382, + "rewards/margins": 0.04147641360759735, + "rewards/rejected": 1.5786917209625244, + "step": 1540 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.39, + "learning_rate": 9.598124531622084e-06, + "logps/chosen": -187.59036254882812, + "logps/margins": -24.431259155273438, + "logps/rejected": -163.15908813476562, + "loss": 0.8519, + "rewards/chosen": 1.748784065246582, + "rewards/margins": 0.050236739218235016, + "rewards/rejected": 1.69854736328125, + "step": 1550 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.39, + "learning_rate": 9.592967027024609e-06, + "logps/chosen": -166.96377563476562, + "logps/margins": -7.707032203674316, + "logps/rejected": -159.25674438476562, + "loss": 0.835, + "rewards/chosen": 1.425602674484253, + "rewards/margins": 0.29081496596336365, + "rewards/rejected": 1.1347877979278564, + "step": 1560 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.39, + "learning_rate": 9.587778042220736e-06, + "logps/chosen": -141.1848602294922, + "logps/margins": 20.280067443847656, + "logps/rejected": -161.4649200439453, + "loss": 0.8764, + "rewards/chosen": 1.8093258142471313, + "rewards/margins": -0.010090534575283527, + "rewards/rejected": 1.8194164037704468, + "step": 1570 + }, + { + "accuracy": 0.44999998807907104, + "epoch": 0.4, + "learning_rate": 9.582557612775778e-06, + "logps/chosen": -164.12722778320312, + "logps/margins": -4.764612674713135, + "logps/rejected": -159.36260986328125, + "loss": 0.7378, + "rewards/chosen": 1.9242103099822998, + "rewards/margins": 0.3095531761646271, + "rewards/rejected": 1.6146570444107056, + "step": 1580 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.4, + "learning_rate": 9.577305774470568e-06, + "logps/chosen": -186.56993103027344, + "logps/margins": -32.362693786621094, + "logps/rejected": -154.20726013183594, + "loss": 0.7723, + "rewards/chosen": 1.6604382991790771, + "rewards/margins": 0.27905914187431335, + "rewards/rejected": 1.3813793659210205, + "step": 1590 + }, + { + "accuracy": 0.625, + "epoch": 0.4, + "learning_rate": 9.572022563301222e-06, + "logps/chosen": -151.0451202392578, + "logps/margins": -11.142895698547363, + "logps/rejected": -139.90220642089844, + "loss": 0.7617, + "rewards/chosen": 1.8053767681121826, + "rewards/margins": 0.327472448348999, + "rewards/rejected": 1.4779040813446045, + "step": 1600 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.4, + "learning_rate": 9.566708015478878e-06, + "logps/chosen": -178.99655151367188, + "logps/margins": 4.333693027496338, + "logps/rejected": -183.33023071289062, + "loss": 0.8338, + "rewards/chosen": 1.819528579711914, + "rewards/margins": 0.2881702780723572, + "rewards/rejected": 1.5313583612442017, + "step": 1610 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.41, + "learning_rate": 9.56136216742946e-06, + "logps/chosen": -155.27374267578125, + "logps/margins": 9.862548828125, + "logps/rejected": -165.1362762451172, + "loss": 0.8148, + "rewards/chosen": 1.8142101764678955, + "rewards/margins": 0.33326777815818787, + "rewards/rejected": 1.4809424877166748, + "step": 1620 + }, + { + "accuracy": 0.625, + "epoch": 0.41, + "learning_rate": 9.555985055793422e-06, + "logps/chosen": -133.46585083007812, + "logps/margins": -10.51818561553955, + "logps/rejected": -122.94764709472656, + "loss": 0.8442, + "rewards/chosen": 1.9428646564483643, + "rewards/margins": 0.40966564416885376, + "rewards/rejected": 1.5331989526748657, + "step": 1630 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.41, + "learning_rate": 9.550576717425501e-06, + "logps/chosen": -177.87936401367188, + "logps/margins": -38.132808685302734, + "logps/rejected": -139.74655151367188, + "loss": 0.8373, + "rewards/chosen": 1.8098485469818115, + "rewards/margins": 0.018934685736894608, + "rewards/rejected": 1.7909138202667236, + "step": 1640 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.41, + "learning_rate": 9.545137189394459e-06, + "logps/chosen": -176.143798828125, + "logps/margins": -5.887171745300293, + "logps/rejected": -170.2566375732422, + "loss": 0.9063, + "rewards/chosen": 1.7588393688201904, + "rewards/margins": -0.03469324856996536, + "rewards/rejected": 1.7935327291488647, + "step": 1650 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.41, + "learning_rate": 9.53966650898283e-06, + "logps/chosen": -145.28883361816406, + "logps/margins": -11.124507904052734, + "logps/rejected": -134.164306640625, + "loss": 0.8712, + "rewards/chosen": 1.575321912765503, + "rewards/margins": 0.09707476943731308, + "rewards/rejected": 1.4782471656799316, + "step": 1660 + }, + { + "accuracy": 0.5, + "epoch": 0.42, + "learning_rate": 9.534164713686677e-06, + "logps/chosen": -144.61868286132812, + "logps/margins": 25.188129425048828, + "logps/rejected": -169.8068389892578, + "loss": 0.8506, + "rewards/chosen": 1.3740068674087524, + "rewards/margins": -0.22523269057273865, + "rewards/rejected": 1.599239468574524, + "step": 1670 + }, + { + "accuracy": 0.5625, + "epoch": 0.42, + "learning_rate": 9.528631841215312e-06, + "logps/chosen": -137.08810424804688, + "logps/margins": 1.6299006938934326, + "logps/rejected": -138.71800231933594, + "loss": 0.8598, + "rewards/chosen": 1.5404658317565918, + "rewards/margins": 0.2575059235095978, + "rewards/rejected": 1.2829598188400269, + "step": 1680 + }, + { + "accuracy": 0.625, + "epoch": 0.42, + "learning_rate": 9.52306792949106e-06, + "logps/chosen": -158.11549377441406, + "logps/margins": -13.466079711914062, + "logps/rejected": -144.64942932128906, + "loss": 0.793, + "rewards/chosen": 1.9619176387786865, + "rewards/margins": 0.30079352855682373, + "rewards/rejected": 1.6611239910125732, + "step": 1690 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.42, + "learning_rate": 9.517473016648977e-06, + "logps/chosen": -157.74221801757812, + "logps/margins": 13.248675346374512, + "logps/rejected": -170.99087524414062, + "loss": 0.8689, + "rewards/chosen": 1.416425108909607, + "rewards/margins": -0.04252084344625473, + "rewards/rejected": 1.458945870399475, + "step": 1700 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.43, + "learning_rate": 9.511847141036616e-06, + "logps/chosen": -184.86428833007812, + "logps/margins": -30.957168579101562, + "logps/rejected": -153.9071044921875, + "loss": 0.7791, + "rewards/chosen": 1.7445602416992188, + "rewards/margins": 0.12616640329360962, + "rewards/rejected": 1.6183936595916748, + "step": 1710 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.43, + "learning_rate": 9.50619034121374e-06, + "logps/chosen": -155.2756805419922, + "logps/margins": 5.653558731079102, + "logps/rejected": -160.9292449951172, + "loss": 0.857, + "rewards/chosen": 1.6249401569366455, + "rewards/margins": 0.06854955852031708, + "rewards/rejected": 1.5563905239105225, + "step": 1720 + }, + { + "accuracy": 0.4375, + "epoch": 0.43, + "learning_rate": 9.500502655952064e-06, + "logps/chosen": -154.45733642578125, + "logps/margins": 3.067988634109497, + "logps/rejected": -157.52532958984375, + "loss": 0.9216, + "rewards/chosen": 1.165889024734497, + "rewards/margins": -0.16678759455680847, + "rewards/rejected": 1.3326765298843384, + "step": 1730 + }, + { + "accuracy": 0.4375, + "epoch": 0.43, + "learning_rate": 9.494784124234999e-06, + "logps/chosen": -179.69869995117188, + "logps/margins": -22.62604331970215, + "logps/rejected": -157.07266235351562, + "loss": 0.8273, + "rewards/chosen": 1.5295450687408447, + "rewards/margins": -0.042375583201646805, + "rewards/rejected": 1.57192063331604, + "step": 1740 + }, + { + "accuracy": 0.42500001192092896, + "epoch": 0.44, + "learning_rate": 9.489034785257372e-06, + "logps/chosen": -179.21920776367188, + "logps/margins": -34.96796417236328, + "logps/rejected": -144.251220703125, + "loss": 0.9165, + "rewards/chosen": 1.5738520622253418, + "rewards/margins": -0.12565232813358307, + "rewards/rejected": 1.6995042562484741, + "step": 1750 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.44, + "learning_rate": 9.483254678425166e-06, + "logps/chosen": -162.9087371826172, + "logps/margins": -13.896394729614258, + "logps/rejected": -149.01234436035156, + "loss": 0.7608, + "rewards/chosen": 2.1526412963867188, + "rewards/margins": 0.2928185760974884, + "rewards/rejected": 1.8598226308822632, + "step": 1760 + }, + { + "accuracy": 0.5625, + "epoch": 0.44, + "learning_rate": 9.477443843355248e-06, + "logps/chosen": -171.17800903320312, + "logps/margins": -12.703057289123535, + "logps/rejected": -158.47496032714844, + "loss": 0.9419, + "rewards/chosen": 2.2187118530273438, + "rewards/margins": 0.2649349570274353, + "rewards/rejected": 1.9537767171859741, + "step": 1770 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.45, + "learning_rate": 9.471602319875092e-06, + "logps/chosen": -163.45155334472656, + "logps/margins": -3.0442728996276855, + "logps/rejected": -160.40728759765625, + "loss": 0.8717, + "rewards/chosen": 2.1406242847442627, + "rewards/margins": 0.17071543633937836, + "rewards/rejected": 1.9699089527130127, + "step": 1780 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.45, + "learning_rate": 9.465730148022516e-06, + "logps/chosen": -162.66729736328125, + "logps/margins": -4.228088855743408, + "logps/rejected": -158.439208984375, + "loss": 0.7452, + "rewards/chosen": 1.8978217840194702, + "rewards/margins": 0.3565039336681366, + "rewards/rejected": 1.5413178205490112, + "step": 1790 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 0.45, + "learning_rate": 9.459827368045398e-06, + "logps/chosen": -157.06532287597656, + "logps/margins": 12.102384567260742, + "logps/rejected": -169.16770935058594, + "loss": 0.8809, + "rewards/chosen": 1.3664157390594482, + "rewards/margins": -0.11095012724399567, + "rewards/rejected": 1.4773657321929932, + "step": 1800 + }, + { + "accuracy": 0.5625, + "epoch": 0.45, + "learning_rate": 9.453894020401405e-06, + "logps/chosen": -170.4252166748047, + "logps/margins": -24.851381301879883, + "logps/rejected": -145.57382202148438, + "loss": 0.854, + "rewards/chosen": 1.6877434253692627, + "rewards/margins": 0.0690455287694931, + "rewards/rejected": 1.6186978816986084, + "step": 1810 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.46, + "learning_rate": 9.447930145757714e-06, + "logps/chosen": -159.48239135742188, + "logps/margins": -30.633930206298828, + "logps/rejected": -128.84844970703125, + "loss": 0.8265, + "rewards/chosen": 1.6804593801498413, + "rewards/margins": 0.17598025500774384, + "rewards/rejected": 1.504479169845581, + "step": 1820 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.46, + "learning_rate": 9.44193578499074e-06, + "logps/chosen": -153.574951171875, + "logps/margins": 16.195648193359375, + "logps/rejected": -169.77061462402344, + "loss": 0.8313, + "rewards/chosen": 2.1873607635498047, + "rewards/margins": 0.2899624705314636, + "rewards/rejected": 1.8973983526229858, + "step": 1830 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.46, + "learning_rate": 9.435910979185838e-06, + "logps/chosen": -165.07798767089844, + "logps/margins": -10.375341415405273, + "logps/rejected": -154.70262145996094, + "loss": 0.775, + "rewards/chosen": 2.0324389934539795, + "rewards/margins": 0.29568594694137573, + "rewards/rejected": 1.7367531061172485, + "step": 1840 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.46, + "learning_rate": 9.429855769637046e-06, + "logps/chosen": -167.1012725830078, + "logps/margins": -13.259292602539062, + "logps/rejected": -153.8419647216797, + "loss": 0.7373, + "rewards/chosen": 1.9836927652359009, + "rewards/margins": 0.2919122576713562, + "rewards/rejected": 1.691780686378479, + "step": 1850 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.47, + "learning_rate": 9.423770197846782e-06, + "logps/chosen": -163.0049285888672, + "logps/margins": -33.225242614746094, + "logps/rejected": -129.77969360351562, + "loss": 0.7788, + "rewards/chosen": 1.8734190464019775, + "rewards/margins": 0.4123324453830719, + "rewards/rejected": 1.4610862731933594, + "step": 1860 + }, + { + "accuracy": 0.675000011920929, + "epoch": 0.47, + "learning_rate": 9.41765430552557e-06, + "logps/chosen": -149.49099731445312, + "logps/margins": -6.378541946411133, + "logps/rejected": -143.11245727539062, + "loss": 0.7847, + "rewards/chosen": 2.011195182800293, + "rewards/margins": 0.5788583159446716, + "rewards/rejected": 1.4323368072509766, + "step": 1870 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.47, + "learning_rate": 9.41150813459175e-06, + "logps/chosen": -166.43115234375, + "logps/margins": -41.93206024169922, + "logps/rejected": -124.49909973144531, + "loss": 0.8225, + "rewards/chosen": 2.0830183029174805, + "rewards/margins": 0.31741800904273987, + "rewards/rejected": 1.7656002044677734, + "step": 1880 + }, + { + "accuracy": 0.637499988079071, + "epoch": 0.47, + "learning_rate": 9.405331727171188e-06, + "logps/chosen": -167.52420043945312, + "logps/margins": -13.432635307312012, + "logps/rejected": -154.09158325195312, + "loss": 0.7891, + "rewards/chosen": 1.6620880365371704, + "rewards/margins": 0.29735979437828064, + "rewards/rejected": 1.3647282123565674, + "step": 1890 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.47, + "learning_rate": 9.399125125596998e-06, + "logps/chosen": -126.37520599365234, + "logps/margins": -0.17889738082885742, + "logps/rejected": -126.19630432128906, + "loss": 0.8321, + "rewards/chosen": 1.6267598867416382, + "rewards/margins": 0.07309775054454803, + "rewards/rejected": 1.5536620616912842, + "step": 1900 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.48, + "learning_rate": 9.392888372409241e-06, + "logps/chosen": -140.39883422851562, + "logps/margins": -4.906195640563965, + "logps/rejected": -135.4926300048828, + "loss": 0.9009, + "rewards/chosen": 1.4131845235824585, + "rewards/margins": -0.08801586925983429, + "rewards/rejected": 1.5012004375457764, + "step": 1910 + }, + { + "accuracy": 0.5, + "epoch": 0.48, + "learning_rate": 9.386621510354637e-06, + "logps/chosen": -161.00296020507812, + "logps/margins": -9.53292179107666, + "logps/rejected": -151.47003173828125, + "loss": 0.8814, + "rewards/chosen": 1.7925631999969482, + "rewards/margins": -0.0049194516614079475, + "rewards/rejected": 1.7974828481674194, + "step": 1920 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.48, + "learning_rate": 9.380324582386271e-06, + "logps/chosen": -177.1588134765625, + "logps/margins": -4.3390045166015625, + "logps/rejected": -172.81980895996094, + "loss": 0.7887, + "rewards/chosen": 2.100454330444336, + "rewards/margins": -0.054205410182476044, + "rewards/rejected": 2.1546597480773926, + "step": 1930 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.48, + "learning_rate": 9.373997631663305e-06, + "logps/chosen": -171.18698120117188, + "logps/margins": 7.4255876541137695, + "logps/rejected": -178.61257934570312, + "loss": 0.7667, + "rewards/chosen": 1.9432332515716553, + "rewards/margins": 0.1751747578382492, + "rewards/rejected": 1.7680585384368896, + "step": 1940 + }, + { + "accuracy": 0.612500011920929, + "epoch": 0.49, + "learning_rate": 9.36764070155067e-06, + "logps/chosen": -159.49673461914062, + "logps/margins": -13.000417709350586, + "logps/rejected": -146.49630737304688, + "loss": 0.8482, + "rewards/chosen": 1.8596569299697876, + "rewards/margins": 0.3872297406196594, + "rewards/rejected": 1.4724268913269043, + "step": 1950 + }, + { + "accuracy": 0.5, + "epoch": 0.49, + "learning_rate": 9.361253835618781e-06, + "logps/chosen": -156.4537811279297, + "logps/margins": -0.1926700621843338, + "logps/rejected": -156.26109313964844, + "loss": 0.8376, + "rewards/chosen": 1.8370577096939087, + "rewards/margins": 0.30063915252685547, + "rewards/rejected": 1.5364184379577637, + "step": 1960 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.49, + "learning_rate": 9.354837077643233e-06, + "logps/chosen": -190.13194274902344, + "logps/margins": -39.07542419433594, + "logps/rejected": -151.05653381347656, + "loss": 0.8207, + "rewards/chosen": 1.7069008350372314, + "rewards/margins": 0.30920490622520447, + "rewards/rejected": 1.3976958990097046, + "step": 1970 + }, + { + "accuracy": 0.625, + "epoch": 0.49, + "learning_rate": 9.348390471604495e-06, + "logps/chosen": -136.9534912109375, + "logps/margins": 15.227800369262695, + "logps/rejected": -152.18128967285156, + "loss": 0.8268, + "rewards/chosen": 1.5990936756134033, + "rewards/margins": 0.10957352817058563, + "rewards/rejected": 1.4895203113555908, + "step": 1980 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.5, + "learning_rate": 9.341914061687627e-06, + "logps/chosen": -166.63194274902344, + "logps/margins": -14.423248291015625, + "logps/rejected": -152.2086944580078, + "loss": 0.7765, + "rewards/chosen": 1.6230442523956299, + "rewards/margins": 0.3474811017513275, + "rewards/rejected": 1.2755630016326904, + "step": 1990 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.5, + "learning_rate": 9.33540789228195e-06, + "logps/chosen": -120.91500091552734, + "logps/margins": 8.692987442016602, + "logps/rejected": -129.60800170898438, + "loss": 0.8261, + "rewards/chosen": 1.6760156154632568, + "rewards/margins": 0.18651649355888367, + "rewards/rejected": 1.4894990921020508, + "step": 2000 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.5, + "learning_rate": 9.328872007980766e-06, + "logps/chosen": -167.70864868164062, + "logps/margins": -10.464682579040527, + "logps/rejected": -157.24395751953125, + "loss": 0.7936, + "rewards/chosen": 1.7660713195800781, + "rewards/margins": 0.13920119404792786, + "rewards/rejected": 1.6268701553344727, + "step": 2010 + }, + { + "accuracy": 0.5625, + "epoch": 0.51, + "learning_rate": 9.322306453581044e-06, + "logps/chosen": -158.08399963378906, + "logps/margins": -10.94042682647705, + "logps/rejected": -147.14356994628906, + "loss": 0.8144, + "rewards/chosen": 1.985290765762329, + "rewards/margins": 0.20295262336730957, + "rewards/rejected": 1.7823379039764404, + "step": 2020 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.51, + "learning_rate": 9.315711274083104e-06, + "logps/chosen": -146.8514862060547, + "logps/margins": 36.926002502441406, + "logps/rejected": -183.77749633789062, + "loss": 0.839, + "rewards/chosen": 1.5715597867965698, + "rewards/margins": -0.21107907593250275, + "rewards/rejected": 1.7826389074325562, + "step": 2030 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.51, + "learning_rate": 9.309086514690325e-06, + "logps/chosen": -165.84413146972656, + "logps/margins": -4.257134437561035, + "logps/rejected": -161.58700561523438, + "loss": 0.7648, + "rewards/chosen": 1.877524971961975, + "rewards/margins": 0.29181772470474243, + "rewards/rejected": 1.5857069492340088, + "step": 2040 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.51, + "learning_rate": 9.30243222080882e-06, + "logps/chosen": -162.48243713378906, + "logps/margins": -14.9164457321167, + "logps/rejected": -147.5659942626953, + "loss": 0.7444, + "rewards/chosen": 2.0277488231658936, + "rewards/margins": 0.3224651515483856, + "rewards/rejected": 1.705283761024475, + "step": 2050 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.52, + "learning_rate": 9.295748438047135e-06, + "logps/chosen": -167.5883026123047, + "logps/margins": -22.718687057495117, + "logps/rejected": -144.86959838867188, + "loss": 0.7477, + "rewards/chosen": 1.823158860206604, + "rewards/margins": 0.20282845199108124, + "rewards/rejected": 1.6203302145004272, + "step": 2060 + }, + { + "accuracy": 0.5625, + "epoch": 0.52, + "learning_rate": 9.289035212215934e-06, + "logps/chosen": -142.55181884765625, + "logps/margins": -10.19264030456543, + "logps/rejected": -132.35916137695312, + "loss": 0.7773, + "rewards/chosen": 1.799380898475647, + "rewards/margins": 0.1436457335948944, + "rewards/rejected": 1.6557352542877197, + "step": 2070 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.52, + "learning_rate": 9.28229258932768e-06, + "logps/chosen": -169.29144287109375, + "logps/margins": -27.8690185546875, + "logps/rejected": -141.42242431640625, + "loss": 0.7503, + "rewards/chosen": 1.7960469722747803, + "rewards/margins": 0.26247820258140564, + "rewards/rejected": 1.5335689783096313, + "step": 2080 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.52, + "learning_rate": 9.275520615596327e-06, + "logps/chosen": -152.72715759277344, + "logps/margins": -0.012831497006118298, + "logps/rejected": -152.71432495117188, + "loss": 0.7708, + "rewards/chosen": 1.7320560216903687, + "rewards/margins": 0.003945007920265198, + "rewards/rejected": 1.728110909461975, + "step": 2090 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.53, + "learning_rate": 9.268719337436996e-06, + "logps/chosen": -162.87667846679688, + "logps/margins": 2.2590041160583496, + "logps/rejected": -165.1356964111328, + "loss": 0.8203, + "rewards/chosen": 1.7294566631317139, + "rewards/margins": 0.255315363407135, + "rewards/rejected": 1.4741413593292236, + "step": 2100 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.53, + "learning_rate": 9.261888801465665e-06, + "logps/chosen": -159.4783477783203, + "logps/margins": -2.8032524585723877, + "logps/rejected": -156.67507934570312, + "loss": 0.8294, + "rewards/chosen": 1.7062733173370361, + "rewards/margins": 0.056447289884090424, + "rewards/rejected": 1.6498260498046875, + "step": 2110 + }, + { + "accuracy": 0.5, + "epoch": 0.53, + "learning_rate": 9.255029054498845e-06, + "logps/chosen": -152.52377319335938, + "logps/margins": 2.105691432952881, + "logps/rejected": -154.62945556640625, + "loss": 0.7723, + "rewards/chosen": 1.4842714071273804, + "rewards/margins": 0.08453354984521866, + "rewards/rejected": 1.39973783493042, + "step": 2120 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.53, + "learning_rate": 9.248140143553253e-06, + "logps/chosen": -125.8509521484375, + "logps/margins": 7.844496726989746, + "logps/rejected": -133.69544982910156, + "loss": 0.7172, + "rewards/chosen": 1.461629867553711, + "rewards/margins": 0.22742874920368195, + "rewards/rejected": 1.234201192855835, + "step": 2130 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.54, + "learning_rate": 9.2412221158455e-06, + "logps/chosen": -158.1907958984375, + "logps/margins": -40.407691955566406, + "logps/rejected": -117.7831039428711, + "loss": 0.8188, + "rewards/chosen": 1.3421199321746826, + "rewards/margins": -0.09975908696651459, + "rewards/rejected": 1.4418790340423584, + "step": 2140 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.54, + "learning_rate": 9.234275018791769e-06, + "logps/chosen": -148.055908203125, + "logps/margins": -5.959725379943848, + "logps/rejected": -142.09622192382812, + "loss": 0.8061, + "rewards/chosen": 1.774924635887146, + "rewards/margins": 0.25871556997299194, + "rewards/rejected": 1.5162090063095093, + "step": 2150 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.54, + "learning_rate": 9.227298900007474e-06, + "logps/chosen": -137.8963165283203, + "logps/margins": 13.159306526184082, + "logps/rejected": -151.05563354492188, + "loss": 0.8412, + "rewards/chosen": 1.4557017087936401, + "rewards/margins": 0.05740945786237717, + "rewards/rejected": 1.3982923030853271, + "step": 2160 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.54, + "learning_rate": 9.220293807306948e-06, + "logps/chosen": -178.25088500976562, + "logps/margins": -25.431116104125977, + "logps/rejected": -152.8197479248047, + "loss": 0.8634, + "rewards/chosen": 1.5417152643203735, + "rewards/margins": -0.1308407038450241, + "rewards/rejected": 1.672555923461914, + "step": 2170 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.55, + "learning_rate": 9.213259788703118e-06, + "logps/chosen": -182.5172576904297, + "logps/margins": -21.872121810913086, + "logps/rejected": -160.6451416015625, + "loss": 0.8366, + "rewards/chosen": 1.80950129032135, + "rewards/margins": 0.14453944563865662, + "rewards/rejected": 1.664961814880371, + "step": 2180 + }, + { + "accuracy": 0.612500011920929, + "epoch": 0.55, + "learning_rate": 9.206196892407158e-06, + "logps/chosen": -168.9132537841797, + "logps/margins": -35.30933380126953, + "logps/rejected": -133.6039276123047, + "loss": 0.7842, + "rewards/chosen": 1.8597646951675415, + "rewards/margins": 0.14125025272369385, + "rewards/rejected": 1.7185144424438477, + "step": 2190 + }, + { + "accuracy": 0.4375, + "epoch": 0.55, + "learning_rate": 9.199105166828179e-06, + "logps/chosen": -139.2642364501953, + "logps/margins": 16.201541900634766, + "logps/rejected": -155.46578979492188, + "loss": 0.8081, + "rewards/chosen": 1.5757122039794922, + "rewards/margins": -0.13621333241462708, + "rewards/rejected": 1.711925745010376, + "step": 2200 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.55, + "learning_rate": 9.19198466057288e-06, + "logps/chosen": -134.79681396484375, + "logps/margins": 3.2388598918914795, + "logps/rejected": -138.03567504882812, + "loss": 0.8749, + "rewards/chosen": 1.8258867263793945, + "rewards/margins": 0.27075594663619995, + "rewards/rejected": 1.555130958557129, + "step": 2210 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.56, + "learning_rate": 9.18483542244523e-06, + "logps/chosen": -178.60345458984375, + "logps/margins": -18.947158813476562, + "logps/rejected": -159.6562957763672, + "loss": 0.7734, + "rewards/chosen": 1.7505261898040771, + "rewards/margins": 0.2822708785533905, + "rewards/rejected": 1.4682552814483643, + "step": 2220 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.56, + "learning_rate": 9.177657501446125e-06, + "logps/chosen": -160.14938354492188, + "logps/margins": -24.04581069946289, + "logps/rejected": -136.1035614013672, + "loss": 0.7546, + "rewards/chosen": 1.8193641901016235, + "rewards/margins": 0.5835358500480652, + "rewards/rejected": 1.2358283996582031, + "step": 2230 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 0.56, + "learning_rate": 9.170450946773047e-06, + "logps/chosen": -139.8365936279297, + "logps/margins": 10.286653518676758, + "logps/rejected": -150.12326049804688, + "loss": 0.877, + "rewards/chosen": 1.694026231765747, + "rewards/margins": -0.005369952414184809, + "rewards/rejected": 1.6993963718414307, + "step": 2240 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.56, + "learning_rate": 9.163215807819742e-06, + "logps/chosen": -141.0439453125, + "logps/margins": -5.759264945983887, + "logps/rejected": -135.28466796875, + "loss": 0.8019, + "rewards/chosen": 1.4926296472549438, + "rewards/margins": 0.10668959468603134, + "rewards/rejected": 1.3859398365020752, + "step": 2250 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.56, + "learning_rate": 9.155952134175866e-06, + "logps/chosen": -147.4812469482422, + "logps/margins": 25.673351287841797, + "logps/rejected": -173.1545867919922, + "loss": 0.788, + "rewards/chosen": 1.9739952087402344, + "rewards/margins": -0.09373383224010468, + "rewards/rejected": 2.0677289962768555, + "step": 2260 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.57, + "learning_rate": 9.148659975626657e-06, + "logps/chosen": -143.13723754882812, + "logps/margins": -16.106327056884766, + "logps/rejected": -127.0309066772461, + "loss": 0.7857, + "rewards/chosen": 1.7816076278686523, + "rewards/margins": 0.26208606362342834, + "rewards/rejected": 1.5195214748382568, + "step": 2270 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.57, + "learning_rate": 9.141339382152584e-06, + "logps/chosen": -162.29852294921875, + "logps/margins": -20.860191345214844, + "logps/rejected": -141.43833923339844, + "loss": 0.7919, + "rewards/chosen": 1.8563302755355835, + "rewards/margins": 0.21059107780456543, + "rewards/rejected": 1.645738959312439, + "step": 2280 + }, + { + "accuracy": 0.637499988079071, + "epoch": 0.57, + "learning_rate": 9.133990403929013e-06, + "logps/chosen": -156.83230590820312, + "logps/margins": -16.177400588989258, + "logps/rejected": -140.6549072265625, + "loss": 0.7496, + "rewards/chosen": 1.7892961502075195, + "rewards/margins": 0.25427499413490295, + "rewards/rejected": 1.5350210666656494, + "step": 2290 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.57, + "learning_rate": 9.126613091325856e-06, + "logps/chosen": -159.8083953857422, + "logps/margins": -0.3494918942451477, + "logps/rejected": -159.45889282226562, + "loss": 0.7925, + "rewards/chosen": 1.592434287071228, + "rewards/margins": -0.017310649156570435, + "rewards/rejected": 1.6097447872161865, + "step": 2300 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.58, + "learning_rate": 9.119207494907233e-06, + "logps/chosen": -151.39175415039062, + "logps/margins": 18.52858543395996, + "logps/rejected": -169.9203338623047, + "loss": 0.7989, + "rewards/chosen": 1.9851707220077515, + "rewards/margins": 0.09721750766038895, + "rewards/rejected": 1.8879531621932983, + "step": 2310 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.58, + "learning_rate": 9.111773665431114e-06, + "logps/chosen": -145.84954833984375, + "logps/margins": 8.649538040161133, + "logps/rejected": -154.4990997314453, + "loss": 0.8464, + "rewards/chosen": 1.5865757465362549, + "rewards/margins": -0.05357605218887329, + "rewards/rejected": 1.6401517391204834, + "step": 2320 + }, + { + "accuracy": 0.625, + "epoch": 0.58, + "learning_rate": 9.104311653848988e-06, + "logps/chosen": -165.7419891357422, + "logps/margins": -16.4604434967041, + "logps/rejected": -149.2815704345703, + "loss": 0.7278, + "rewards/chosen": 2.0504238605499268, + "rewards/margins": 0.5390018224716187, + "rewards/rejected": 1.511421799659729, + "step": 2330 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.58, + "learning_rate": 9.096821511305494e-06, + "logps/chosen": -175.6136932373047, + "logps/margins": -11.866948127746582, + "logps/rejected": -163.7467498779297, + "loss": 0.8684, + "rewards/chosen": 1.8378212451934814, + "rewards/margins": 0.17367655038833618, + "rewards/rejected": 1.664144515991211, + "step": 2340 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.59, + "learning_rate": 9.089303289138091e-06, + "logps/chosen": -150.4571533203125, + "logps/margins": -9.216192245483398, + "logps/rejected": -141.24095153808594, + "loss": 0.7793, + "rewards/chosen": 1.6893303394317627, + "rewards/margins": -0.04371819272637367, + "rewards/rejected": 1.7330482006072998, + "step": 2350 + }, + { + "accuracy": 0.5, + "epoch": 0.59, + "learning_rate": 9.081757038876689e-06, + "logps/chosen": -144.45582580566406, + "logps/margins": 5.97637414932251, + "logps/rejected": -150.4322052001953, + "loss": 0.7776, + "rewards/chosen": 1.6179802417755127, + "rewards/margins": 0.029508357867598534, + "rewards/rejected": 1.5884718894958496, + "step": 2360 + }, + { + "accuracy": 0.5625, + "epoch": 0.59, + "learning_rate": 9.074182812243301e-06, + "logps/chosen": -143.5775909423828, + "logps/margins": -18.096410751342773, + "logps/rejected": -125.48116302490234, + "loss": 0.7718, + "rewards/chosen": 1.6561262607574463, + "rewards/margins": 0.27639085054397583, + "rewards/rejected": 1.3797352313995361, + "step": 2370 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.59, + "learning_rate": 9.0665806611517e-06, + "logps/chosen": -140.8172607421875, + "logps/margins": -11.822988510131836, + "logps/rejected": -128.99429321289062, + "loss": 0.821, + "rewards/chosen": 1.6826212406158447, + "rewards/margins": 0.013142955489456654, + "rewards/rejected": 1.669478178024292, + "step": 2380 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 0.6, + "learning_rate": 9.058950637707043e-06, + "logps/chosen": -162.18556213378906, + "logps/margins": -21.36416244506836, + "logps/rejected": -140.82139587402344, + "loss": 0.8002, + "rewards/chosen": 1.8264729976654053, + "rewards/margins": 0.42425379157066345, + "rewards/rejected": 1.4022191762924194, + "step": 2390 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.6, + "learning_rate": 9.051292794205526e-06, + "logps/chosen": -139.4217071533203, + "logps/margins": 7.258561134338379, + "logps/rejected": -146.68026733398438, + "loss": 0.8115, + "rewards/chosen": 1.5714809894561768, + "rewards/margins": 0.19860979914665222, + "rewards/rejected": 1.3728711605072021, + "step": 2400 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.6, + "learning_rate": 9.043607183134029e-06, + "logps/chosen": -170.65122985839844, + "logps/margins": -13.439526557922363, + "logps/rejected": -157.2117156982422, + "loss": 0.854, + "rewards/chosen": 1.9065945148468018, + "rewards/margins": 0.10046534240245819, + "rewards/rejected": 1.8061290979385376, + "step": 2410 + }, + { + "accuracy": 0.5625, + "epoch": 0.6, + "learning_rate": 9.035893857169746e-06, + "logps/chosen": -143.10107421875, + "logps/margins": 4.725135803222656, + "logps/rejected": -147.8262176513672, + "loss": 0.7349, + "rewards/chosen": 1.816597580909729, + "rewards/margins": 0.16226616501808167, + "rewards/rejected": 1.6543312072753906, + "step": 2420 + }, + { + "accuracy": 0.5, + "epoch": 0.61, + "learning_rate": 9.028152869179831e-06, + "logps/chosen": -153.50521850585938, + "logps/margins": 3.902616024017334, + "logps/rejected": -157.4078369140625, + "loss": 0.754, + "rewards/chosen": 1.8235887289047241, + "rewards/margins": 0.17439498007297516, + "rewards/rejected": 1.6491937637329102, + "step": 2430 + }, + { + "accuracy": 0.637499988079071, + "epoch": 0.61, + "learning_rate": 9.020384272221035e-06, + "logps/chosen": -144.56881713867188, + "logps/margins": 0.3802814483642578, + "logps/rejected": -144.9490966796875, + "loss": 0.7922, + "rewards/chosen": 2.1358447074890137, + "rewards/margins": 0.341838538646698, + "rewards/rejected": 1.794006109237671, + "step": 2440 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.61, + "learning_rate": 9.012588119539337e-06, + "logps/chosen": -174.88278198242188, + "logps/margins": -21.781993865966797, + "logps/rejected": -153.1007843017578, + "loss": 0.8242, + "rewards/chosen": 1.9694465398788452, + "rewards/margins": 0.4216902256011963, + "rewards/rejected": 1.5477564334869385, + "step": 2450 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.61, + "learning_rate": 9.004764464569584e-06, + "logps/chosen": -142.15045166015625, + "logps/margins": -4.6801066398620605, + "logps/rejected": -137.47035217285156, + "loss": 0.82, + "rewards/chosen": 1.8222211599349976, + "rewards/margins": 0.20227757096290588, + "rewards/rejected": 1.619943618774414, + "step": 2460 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.62, + "learning_rate": 8.996913360935129e-06, + "logps/chosen": -147.394775390625, + "logps/margins": -2.0654754638671875, + "logps/rejected": -145.3292999267578, + "loss": 0.8503, + "rewards/chosen": 1.5807874202728271, + "rewards/margins": 0.2154150754213333, + "rewards/rejected": 1.3653721809387207, + "step": 2470 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.62, + "learning_rate": 8.98903486244745e-06, + "logps/chosen": -180.33253479003906, + "logps/margins": -11.23771858215332, + "logps/rejected": -169.09481811523438, + "loss": 0.8198, + "rewards/chosen": 1.9343461990356445, + "rewards/margins": 0.06712658703327179, + "rewards/rejected": 1.8672195672988892, + "step": 2480 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 0.62, + "learning_rate": 8.981129023105795e-06, + "logps/chosen": -166.7818145751953, + "logps/margins": -7.165487766265869, + "logps/rejected": -159.61634826660156, + "loss": 0.8073, + "rewards/chosen": 1.6975730657577515, + "rewards/margins": -0.007704681716859341, + "rewards/rejected": 1.7052780389785767, + "step": 2490 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.62, + "learning_rate": 8.973195897096806e-06, + "logps/chosen": -147.55386352539062, + "logps/margins": -2.1087851524353027, + "logps/rejected": -145.445068359375, + "loss": 0.7808, + "rewards/chosen": 1.3104054927825928, + "rewards/margins": -0.2292346954345703, + "rewards/rejected": 1.5396400690078735, + "step": 2500 + }, + { + "accuracy": 0.5, + "epoch": 0.63, + "learning_rate": 8.965235538794145e-06, + "logps/chosen": -153.317138671875, + "logps/margins": -13.002995491027832, + "logps/rejected": -140.3141632080078, + "loss": 0.8174, + "rewards/chosen": 1.7759571075439453, + "rewards/margins": 0.1918468177318573, + "rewards/rejected": 1.5841103792190552, + "step": 2510 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.63, + "learning_rate": 8.957248002758125e-06, + "logps/chosen": -160.869384765625, + "logps/margins": 21.718250274658203, + "logps/rejected": -182.58761596679688, + "loss": 0.844, + "rewards/chosen": 1.6739715337753296, + "rewards/margins": -0.03881732374429703, + "rewards/rejected": 1.7127888202667236, + "step": 2520 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 0.63, + "learning_rate": 8.949233343735339e-06, + "logps/chosen": -172.44351196289062, + "logps/margins": -13.763504028320312, + "logps/rejected": -158.67999267578125, + "loss": 0.8251, + "rewards/chosen": 1.7791717052459717, + "rewards/margins": 0.35052114725112915, + "rewards/rejected": 1.4286506175994873, + "step": 2530 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.64, + "learning_rate": 8.941191616658275e-06, + "logps/chosen": -155.2109832763672, + "logps/margins": -3.1990745067596436, + "logps/rejected": -152.01187133789062, + "loss": 0.8031, + "rewards/chosen": 1.6604912281036377, + "rewards/margins": 0.10237333923578262, + "rewards/rejected": 1.5581179857254028, + "step": 2540 + }, + { + "accuracy": 0.612500011920929, + "epoch": 0.64, + "learning_rate": 8.933122876644953e-06, + "logps/chosen": -178.10671997070312, + "logps/margins": -14.75318431854248, + "logps/rejected": -163.3535614013672, + "loss": 0.8632, + "rewards/chosen": 1.9766952991485596, + "rewards/margins": 0.22956128418445587, + "rewards/rejected": 1.7471338510513306, + "step": 2550 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.64, + "learning_rate": 8.925027178998528e-06, + "logps/chosen": -170.93984985351562, + "logps/margins": 4.341712951660156, + "logps/rejected": -175.2815704345703, + "loss": 0.7558, + "rewards/chosen": 1.8331180810928345, + "rewards/margins": 0.30583077669143677, + "rewards/rejected": 1.527287244796753, + "step": 2560 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.64, + "learning_rate": 8.916904579206937e-06, + "logps/chosen": -183.05691528320312, + "logps/margins": 8.751852989196777, + "logps/rejected": -191.8087615966797, + "loss": 0.7759, + "rewards/chosen": 1.9488487243652344, + "rewards/margins": 0.5405310392379761, + "rewards/rejected": 1.4083175659179688, + "step": 2570 + }, + { + "accuracy": 0.612500011920929, + "epoch": 0.65, + "learning_rate": 8.908755132942494e-06, + "logps/chosen": -136.88320922851562, + "logps/margins": -11.020042419433594, + "logps/rejected": -125.8631591796875, + "loss": 0.7794, + "rewards/chosen": 1.530692458152771, + "rewards/margins": 0.2433866560459137, + "rewards/rejected": 1.2873058319091797, + "step": 2580 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.65, + "learning_rate": 8.900578896061524e-06, + "logps/chosen": -146.7117919921875, + "logps/margins": -11.104670524597168, + "logps/rejected": -135.6071319580078, + "loss": 0.8857, + "rewards/chosen": 1.2212272882461548, + "rewards/margins": 0.07540423423051834, + "rewards/rejected": 1.1458228826522827, + "step": 2590 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.65, + "learning_rate": 8.89237592460397e-06, + "logps/chosen": -138.83975219726562, + "logps/margins": -2.215001344680786, + "logps/rejected": -136.624755859375, + "loss": 0.8233, + "rewards/chosen": 1.7345571517944336, + "rewards/margins": -0.03856398165225983, + "rewards/rejected": 1.7731212377548218, + "step": 2600 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.65, + "learning_rate": 8.884146274793022e-06, + "logps/chosen": -160.31529235839844, + "logps/margins": 14.85515308380127, + "logps/rejected": -175.1704559326172, + "loss": 0.8134, + "rewards/chosen": 1.4943859577178955, + "rewards/margins": -0.018301963806152344, + "rewards/rejected": 1.5126876831054688, + "step": 2610 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 0.66, + "learning_rate": 8.875890003034713e-06, + "logps/chosen": -146.08267211914062, + "logps/margins": 40.0573616027832, + "logps/rejected": -186.1400146484375, + "loss": 0.7523, + "rewards/chosen": 1.6075210571289062, + "rewards/margins": 0.4434809684753418, + "rewards/rejected": 1.164040207862854, + "step": 2620 + }, + { + "accuracy": 0.637499988079071, + "epoch": 0.66, + "learning_rate": 8.86760716591755e-06, + "logps/chosen": -155.77761840820312, + "logps/margins": 11.430095672607422, + "logps/rejected": -167.20773315429688, + "loss": 0.8126, + "rewards/chosen": 1.6409187316894531, + "rewards/margins": 0.4476473927497864, + "rewards/rejected": 1.1932713985443115, + "step": 2630 + }, + { + "accuracy": 0.42500001192092896, + "epoch": 0.66, + "learning_rate": 8.859297820212118e-06, + "logps/chosen": -188.03599548339844, + "logps/margins": -8.601591110229492, + "logps/rejected": -179.43441772460938, + "loss": 0.792, + "rewards/chosen": 1.3785462379455566, + "rewards/margins": -0.18648740649223328, + "rewards/rejected": 1.5650336742401123, + "step": 2640 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.66, + "learning_rate": 8.850962022870692e-06, + "logps/chosen": -151.62315368652344, + "logps/margins": -5.000039100646973, + "logps/rejected": -146.62313842773438, + "loss": 0.8002, + "rewards/chosen": 1.631996512413025, + "rewards/margins": 0.4164932668209076, + "rewards/rejected": 1.2155033349990845, + "step": 2650 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.67, + "learning_rate": 8.842599831026846e-06, + "logps/chosen": -197.48471069335938, + "logps/margins": -24.33269691467285, + "logps/rejected": -173.15200805664062, + "loss": 0.7711, + "rewards/chosen": 1.63047194480896, + "rewards/margins": 0.1193113699555397, + "rewards/rejected": 1.5111606121063232, + "step": 2660 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.67, + "learning_rate": 8.83421130199506e-06, + "logps/chosen": -148.03115844726562, + "logps/margins": 3.6409599781036377, + "logps/rejected": -151.67210388183594, + "loss": 0.7547, + "rewards/chosen": 1.974037766456604, + "rewards/margins": 0.2518533766269684, + "rewards/rejected": 1.722184181213379, + "step": 2670 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.67, + "learning_rate": 8.825796493270332e-06, + "logps/chosen": -147.1207275390625, + "logps/margins": -1.126124620437622, + "logps/rejected": -145.99459838867188, + "loss": 0.7721, + "rewards/chosen": 1.8635289669036865, + "rewards/margins": 0.28442710638046265, + "rewards/rejected": 1.579101800918579, + "step": 2680 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.67, + "learning_rate": 8.817355462527779e-06, + "logps/chosen": -141.459716796875, + "logps/margins": 5.246458530426025, + "logps/rejected": -146.7061767578125, + "loss": 0.774, + "rewards/chosen": 1.573882818222046, + "rewards/margins": 0.14829522371292114, + "rewards/rejected": 1.4255876541137695, + "step": 2690 + }, + { + "accuracy": 0.625, + "epoch": 0.68, + "learning_rate": 8.808888267622243e-06, + "logps/chosen": -156.78652954101562, + "logps/margins": -9.923416137695312, + "logps/rejected": -146.8631134033203, + "loss": 0.7625, + "rewards/chosen": 1.564741611480713, + "rewards/margins": 0.2138366401195526, + "rewards/rejected": 1.3509048223495483, + "step": 2700 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.68, + "learning_rate": 8.800394966587905e-06, + "logps/chosen": -161.2109375, + "logps/margins": -22.525259017944336, + "logps/rejected": -138.68565368652344, + "loss": 0.7972, + "rewards/chosen": 1.6655502319335938, + "rewards/margins": 0.21709592640399933, + "rewards/rejected": 1.4484546184539795, + "step": 2710 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.68, + "learning_rate": 8.791875617637861e-06, + "logps/chosen": -167.04269409179688, + "logps/margins": -10.494729995727539, + "logps/rejected": -156.54795837402344, + "loss": 0.8381, + "rewards/chosen": 1.614736557006836, + "rewards/margins": 0.13579224050045013, + "rewards/rejected": 1.4789444208145142, + "step": 2720 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.68, + "learning_rate": 8.783330279163753e-06, + "logps/chosen": -123.72306060791016, + "logps/margins": -10.220602035522461, + "logps/rejected": -113.5024642944336, + "loss": 0.7595, + "rewards/chosen": 1.708743691444397, + "rewards/margins": 0.11826448142528534, + "rewards/rejected": 1.5904791355133057, + "step": 2730 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.69, + "learning_rate": 8.77475900973535e-06, + "logps/chosen": -192.87059020996094, + "logps/margins": -20.477632522583008, + "logps/rejected": -172.3929443359375, + "loss": 0.7957, + "rewards/chosen": 1.6575543880462646, + "rewards/margins": 0.050416022539138794, + "rewards/rejected": 1.6071382761001587, + "step": 2740 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.69, + "learning_rate": 8.766161868100147e-06, + "logps/chosen": -183.8955078125, + "logps/margins": 1.8340591192245483, + "logps/rejected": -185.72958374023438, + "loss": 0.767, + "rewards/chosen": 1.5932084321975708, + "rewards/margins": -0.040756307542324066, + "rewards/rejected": 1.6339647769927979, + "step": 2750 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.69, + "learning_rate": 8.757538913182977e-06, + "logps/chosen": -129.14199829101562, + "logps/margins": 30.8277645111084, + "logps/rejected": -159.96975708007812, + "loss": 0.81, + "rewards/chosen": 1.3314985036849976, + "rewards/margins": -0.10257778316736221, + "rewards/rejected": 1.4340763092041016, + "step": 2760 + }, + { + "accuracy": 0.5625, + "epoch": 0.69, + "learning_rate": 8.748890204085593e-06, + "logps/chosen": -130.80186462402344, + "logps/margins": 19.50129508972168, + "logps/rejected": -150.30316162109375, + "loss": 0.8098, + "rewards/chosen": 1.486842393875122, + "rewards/margins": 0.145850270986557, + "rewards/rejected": 1.3409919738769531, + "step": 2770 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.69, + "learning_rate": 8.740215800086262e-06, + "logps/chosen": -150.61740112304688, + "logps/margins": 2.5855257511138916, + "logps/rejected": -153.20294189453125, + "loss": 0.7317, + "rewards/chosen": 1.428755521774292, + "rewards/margins": 0.2611759305000305, + "rewards/rejected": 1.1675795316696167, + "step": 2780 + }, + { + "accuracy": 0.637499988079071, + "epoch": 0.7, + "learning_rate": 8.731515760639375e-06, + "logps/chosen": -149.7344207763672, + "logps/margins": -28.57843017578125, + "logps/rejected": -121.1559829711914, + "loss": 0.7053, + "rewards/chosen": 1.700934648513794, + "rewards/margins": 0.31255167722702026, + "rewards/rejected": 1.388382911682129, + "step": 2790 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 0.7, + "learning_rate": 8.722790145375018e-06, + "logps/chosen": -165.7180938720703, + "logps/margins": -15.831774711608887, + "logps/rejected": -149.8863067626953, + "loss": 0.8156, + "rewards/chosen": 2.0403616428375244, + "rewards/margins": 0.31420427560806274, + "rewards/rejected": 1.7261574268341064, + "step": 2800 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.7, + "learning_rate": 8.714039014098577e-06, + "logps/chosen": -138.14642333984375, + "logps/margins": 24.827730178833008, + "logps/rejected": -162.97415161132812, + "loss": 0.7578, + "rewards/chosen": 1.554478645324707, + "rewards/margins": 0.22009606659412384, + "rewards/rejected": 1.3343826532363892, + "step": 2810 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.7, + "learning_rate": 8.705262426790328e-06, + "logps/chosen": -144.08914184570312, + "logps/margins": 11.892007827758789, + "logps/rejected": -155.9811553955078, + "loss": 0.8661, + "rewards/chosen": 1.545195460319519, + "rewards/margins": -0.10685305297374725, + "rewards/rejected": 1.6520484685897827, + "step": 2820 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.71, + "learning_rate": 8.69646044360502e-06, + "logps/chosen": -156.28567504882812, + "logps/margins": -11.327496528625488, + "logps/rejected": -144.95816040039062, + "loss": 0.8077, + "rewards/chosen": 1.528448462486267, + "rewards/margins": -0.09929704666137695, + "rewards/rejected": 1.6277453899383545, + "step": 2830 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.71, + "learning_rate": 8.687633124871462e-06, + "logps/chosen": -150.17543029785156, + "logps/margins": 6.499469757080078, + "logps/rejected": -156.67489624023438, + "loss": 0.7569, + "rewards/chosen": 1.5286836624145508, + "rewards/margins": 0.01687273196876049, + "rewards/rejected": 1.5118108987808228, + "step": 2840 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.71, + "learning_rate": 8.678780531092122e-06, + "logps/chosen": -164.65780639648438, + "logps/margins": -2.193402051925659, + "logps/rejected": -162.4644012451172, + "loss": 0.7836, + "rewards/chosen": 1.6761707067489624, + "rewards/margins": 0.13601139187812805, + "rewards/rejected": 1.5401592254638672, + "step": 2850 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.71, + "learning_rate": 8.669902722942695e-06, + "logps/chosen": -179.60025024414062, + "logps/margins": 8.734827041625977, + "logps/rejected": -188.33505249023438, + "loss": 0.8083, + "rewards/chosen": 1.4502613544464111, + "rewards/margins": 0.0022860795725136995, + "rewards/rejected": 1.4479749202728271, + "step": 2860 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.72, + "learning_rate": 8.6609997612717e-06, + "logps/chosen": -185.20169067382812, + "logps/margins": -14.612690925598145, + "logps/rejected": -170.5889892578125, + "loss": 0.8609, + "rewards/chosen": 1.4797416925430298, + "rewards/margins": 0.02350521646440029, + "rewards/rejected": 1.4562366008758545, + "step": 2870 + }, + { + "accuracy": 0.625, + "epoch": 0.72, + "learning_rate": 8.652071707100054e-06, + "logps/chosen": -139.81324768066406, + "logps/margins": -10.358976364135742, + "logps/rejected": -129.45425415039062, + "loss": 0.7497, + "rewards/chosen": 1.483909010887146, + "rewards/margins": 0.2548331618309021, + "rewards/rejected": 1.2290759086608887, + "step": 2880 + }, + { + "accuracy": 0.5, + "epoch": 0.72, + "learning_rate": 8.64311862162066e-06, + "logps/chosen": -153.6216278076172, + "logps/margins": -6.6676788330078125, + "logps/rejected": -146.9539337158203, + "loss": 0.7835, + "rewards/chosen": 1.508525013923645, + "rewards/margins": 0.02880988083779812, + "rewards/rejected": 1.47971510887146, + "step": 2890 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.72, + "learning_rate": 8.63414056619799e-06, + "logps/chosen": -144.7294921875, + "logps/margins": 7.182837009429932, + "logps/rejected": -151.91233825683594, + "loss": 0.8166, + "rewards/chosen": 1.8214155435562134, + "rewards/margins": 0.23993360996246338, + "rewards/rejected": 1.58148193359375, + "step": 2900 + }, + { + "accuracy": 0.612500011920929, + "epoch": 0.73, + "learning_rate": 8.625137602367653e-06, + "logps/chosen": -163.61148071289062, + "logps/margins": 3.5040078163146973, + "logps/rejected": -167.115478515625, + "loss": 0.7534, + "rewards/chosen": 1.5655899047851562, + "rewards/margins": 0.2786282002925873, + "rewards/rejected": 1.2869617938995361, + "step": 2910 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.73, + "learning_rate": 8.616109791835984e-06, + "logps/chosen": -163.35218811035156, + "logps/margins": -1.1953474283218384, + "logps/rejected": -162.15682983398438, + "loss": 0.7732, + "rewards/chosen": 1.7934242486953735, + "rewards/margins": 0.08110041171312332, + "rewards/rejected": 1.712323784828186, + "step": 2920 + }, + { + "accuracy": 0.5625, + "epoch": 0.73, + "learning_rate": 8.607057196479617e-06, + "logps/chosen": -148.98927307128906, + "logps/margins": -19.40814781188965, + "logps/rejected": -129.5811309814453, + "loss": 0.8467, + "rewards/chosen": 1.4410368204116821, + "rewards/margins": -0.014748156070709229, + "rewards/rejected": 1.4557850360870361, + "step": 2930 + }, + { + "accuracy": 0.612500011920929, + "epoch": 0.73, + "learning_rate": 8.597979878345062e-06, + "logps/chosen": -162.67930603027344, + "logps/margins": -0.1245359405875206, + "logps/rejected": -162.5547637939453, + "loss": 0.7511, + "rewards/chosen": 1.7473443746566772, + "rewards/margins": 0.48985424637794495, + "rewards/rejected": 1.2574901580810547, + "step": 2940 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.74, + "learning_rate": 8.588877899648276e-06, + "logps/chosen": -175.69664001464844, + "logps/margins": -20.07693862915039, + "logps/rejected": -155.6197052001953, + "loss": 0.828, + "rewards/chosen": 1.8812503814697266, + "rewards/margins": 0.3405774235725403, + "rewards/rejected": 1.540673017501831, + "step": 2950 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.74, + "learning_rate": 8.579751322774244e-06, + "logps/chosen": -147.95489501953125, + "logps/margins": -2.609814405441284, + "logps/rejected": -145.34506225585938, + "loss": 0.8053, + "rewards/chosen": 1.5963928699493408, + "rewards/margins": 0.03403860330581665, + "rewards/rejected": 1.562354326248169, + "step": 2960 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.74, + "learning_rate": 8.570600210276541e-06, + "logps/chosen": -136.46490478515625, + "logps/margins": -5.8197021484375, + "logps/rejected": -130.6452178955078, + "loss": 0.7715, + "rewards/chosen": 1.5920674800872803, + "rewards/margins": 0.10355864465236664, + "rewards/rejected": 1.4885088205337524, + "step": 2970 + }, + { + "accuracy": 0.4124999940395355, + "epoch": 0.74, + "learning_rate": 8.561424624876917e-06, + "logps/chosen": -138.11569213867188, + "logps/margins": 10.209531784057617, + "logps/rejected": -148.32522583007812, + "loss": 0.8056, + "rewards/chosen": 1.659637212753296, + "rewards/margins": -0.0970558226108551, + "rewards/rejected": 1.7566931247711182, + "step": 2980 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.75, + "learning_rate": 8.552224629464854e-06, + "logps/chosen": -155.06893920898438, + "logps/margins": -21.292409896850586, + "logps/rejected": -133.77651977539062, + "loss": 0.7958, + "rewards/chosen": 1.474408507347107, + "rewards/margins": -0.11505673080682755, + "rewards/rejected": 1.5894651412963867, + "step": 2990 + }, + { + "accuracy": 0.6875, + "epoch": 0.75, + "learning_rate": 8.543000287097141e-06, + "logps/chosen": -156.76683044433594, + "logps/margins": 13.566311836242676, + "logps/rejected": -170.33316040039062, + "loss": 0.7411, + "rewards/chosen": 1.4821707010269165, + "rewards/margins": 0.5528401136398315, + "rewards/rejected": 0.9293305277824402, + "step": 3000 + }, + { + "epoch": 0.75, + "eval_accuracy": 0.5486257928118393, + "eval_logps/chosen": -155.9010772705078, + "eval_logps/margins": -6.521376609802246, + "eval_logps/rejected": -149.37969970703125, + "eval_loss": 0.8003174066543579, + "eval_rewards/chosen": 1.7650580406188965, + "eval_rewards/margins": 0.16896478831768036, + "eval_rewards/rejected": 1.5960932970046997, + "eval_runtime": 1294.3661, + "eval_samples_per_second": 10.963, + "eval_steps_per_second": 1.371, + "step": 3000 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.75, + "learning_rate": 8.53375166099744e-06, + "logps/chosen": -161.08810424804688, + "logps/margins": -0.3329780697822571, + "logps/rejected": -160.755126953125, + "loss": 0.8118, + "rewards/chosen": 1.634568452835083, + "rewards/margins": 0.07734532654285431, + "rewards/rejected": 1.5572230815887451, + "step": 3010 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.76, + "learning_rate": 8.524478814555855e-06, + "logps/chosen": -185.60482788085938, + "logps/margins": -4.625077247619629, + "logps/rejected": -180.97975158691406, + "loss": 0.8838, + "rewards/chosen": 2.093295097351074, + "rewards/margins": 0.30580079555511475, + "rewards/rejected": 1.787494421005249, + "step": 3020 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.76, + "learning_rate": 8.515181811328498e-06, + "logps/chosen": -150.15780639648438, + "logps/margins": 7.099617004394531, + "logps/rejected": -157.25741577148438, + "loss": 0.7537, + "rewards/chosen": 1.8199164867401123, + "rewards/margins": 0.25283852219581604, + "rewards/rejected": 1.5670779943466187, + "step": 3030 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.76, + "learning_rate": 8.505860715037054e-06, + "logps/chosen": -144.1829833984375, + "logps/margins": -16.597780227661133, + "logps/rejected": -127.58522033691406, + "loss": 0.7326, + "rewards/chosen": 1.6525452136993408, + "rewards/margins": 0.22734513878822327, + "rewards/rejected": 1.4252002239227295, + "step": 3040 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.76, + "learning_rate": 8.49651558956833e-06, + "logps/chosen": -167.9637451171875, + "logps/margins": -9.961614608764648, + "logps/rejected": -158.00213623046875, + "loss": 0.8065, + "rewards/chosen": 1.675106406211853, + "rewards/margins": 0.17599111795425415, + "rewards/rejected": 1.4991153478622437, + "step": 3050 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 0.77, + "learning_rate": 8.487146498973841e-06, + "logps/chosen": -174.6428680419922, + "logps/margins": -25.455190658569336, + "logps/rejected": -149.1876678466797, + "loss": 0.8235, + "rewards/chosen": 2.4818341732025146, + "rewards/margins": 0.07841117680072784, + "rewards/rejected": 2.4034228324890137, + "step": 3060 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.77, + "learning_rate": 8.477753507469357e-06, + "logps/chosen": -174.24676513671875, + "logps/margins": -13.05420970916748, + "logps/rejected": -161.19256591796875, + "loss": 0.8725, + "rewards/chosen": 1.5876998901367188, + "rewards/margins": 0.03590340167284012, + "rewards/rejected": 1.5517964363098145, + "step": 3070 + }, + { + "accuracy": 0.6625000238418579, + "epoch": 0.77, + "learning_rate": 8.468336679434461e-06, + "logps/chosen": -192.79833984375, + "logps/margins": -43.42304992675781, + "logps/rejected": -149.3752899169922, + "loss": 0.7302, + "rewards/chosen": 1.7570441961288452, + "rewards/margins": 0.40183025598526, + "rewards/rejected": 1.3552137613296509, + "step": 3080 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.77, + "learning_rate": 8.458896079412114e-06, + "logps/chosen": -192.26625061035156, + "logps/margins": -50.38763427734375, + "logps/rejected": -141.87860107421875, + "loss": 0.7633, + "rewards/chosen": 2.1395068168640137, + "rewards/margins": 0.3148060441017151, + "rewards/rejected": 1.8247007131576538, + "step": 3090 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.78, + "learning_rate": 8.44943177210821e-06, + "logps/chosen": -146.1753387451172, + "logps/margins": 11.312209129333496, + "logps/rejected": -157.487548828125, + "loss": 0.7106, + "rewards/chosen": 2.024766683578491, + "rewards/margins": 0.24736681580543518, + "rewards/rejected": 1.7773997783660889, + "step": 3100 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.78, + "learning_rate": 8.439943822391132e-06, + "logps/chosen": -183.7640838623047, + "logps/margins": -50.22600173950195, + "logps/rejected": -133.53811645507812, + "loss": 0.7581, + "rewards/chosen": 1.8503338098526, + "rewards/margins": 0.027590256184339523, + "rewards/rejected": 1.8227436542510986, + "step": 3110 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.78, + "learning_rate": 8.43043229529131e-06, + "logps/chosen": -122.57987213134766, + "logps/margins": 15.709098815917969, + "logps/rejected": -138.28897094726562, + "loss": 0.7385, + "rewards/chosen": 1.671735405921936, + "rewards/margins": 0.1854889839887619, + "rewards/rejected": 1.4862463474273682, + "step": 3120 + }, + { + "accuracy": 0.5, + "epoch": 0.78, + "learning_rate": 8.420897256000771e-06, + "logps/chosen": -143.1592254638672, + "logps/margins": -12.776369094848633, + "logps/rejected": -130.3828582763672, + "loss": 0.7978, + "rewards/chosen": 1.4812790155410767, + "rewards/margins": -0.010179603472352028, + "rewards/rejected": 1.491458535194397, + "step": 3130 + }, + { + "accuracy": 0.5625, + "epoch": 0.79, + "learning_rate": 8.411338769872697e-06, + "logps/chosen": -158.930908203125, + "logps/margins": 1.2776397466659546, + "logps/rejected": -160.2085418701172, + "loss": 0.8615, + "rewards/chosen": 1.7491512298583984, + "rewards/margins": -0.10457686334848404, + "rewards/rejected": 1.85372793674469, + "step": 3140 + }, + { + "accuracy": 0.5625, + "epoch": 0.79, + "learning_rate": 8.401756902420975e-06, + "logps/chosen": -134.58473205566406, + "logps/margins": 25.656137466430664, + "logps/rejected": -160.24087524414062, + "loss": 0.7601, + "rewards/chosen": 1.51541268825531, + "rewards/margins": 0.3289361000061035, + "rewards/rejected": 1.186476707458496, + "step": 3150 + }, + { + "accuracy": 0.5625, + "epoch": 0.79, + "learning_rate": 8.39215171931974e-06, + "logps/chosen": -155.1742706298828, + "logps/margins": -8.295679092407227, + "logps/rejected": -146.8785858154297, + "loss": 0.7783, + "rewards/chosen": 1.5980865955352783, + "rewards/margins": 0.29294320940971375, + "rewards/rejected": 1.3051433563232422, + "step": 3160 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.79, + "learning_rate": 8.382523286402947e-06, + "logps/chosen": -183.69366455078125, + "logps/margins": -3.2288055419921875, + "logps/rejected": -180.46484375, + "loss": 0.7377, + "rewards/chosen": 1.7234938144683838, + "rewards/margins": 0.37495914101600647, + "rewards/rejected": 1.3485344648361206, + "step": 3170 + }, + { + "accuracy": 0.5625, + "epoch": 0.8, + "learning_rate": 8.37287166966389e-06, + "logps/chosen": -145.9056854248047, + "logps/margins": -2.2215399742126465, + "logps/rejected": -143.68414306640625, + "loss": 0.7742, + "rewards/chosen": 1.5664255619049072, + "rewards/margins": 0.25297456979751587, + "rewards/rejected": 1.3134510517120361, + "step": 3180 + }, + { + "accuracy": 0.5625, + "epoch": 0.8, + "learning_rate": 8.363196935254778e-06, + "logps/chosen": -126.447998046875, + "logps/margins": 23.140649795532227, + "logps/rejected": -149.58863830566406, + "loss": 0.8042, + "rewards/chosen": 1.3213412761688232, + "rewards/margins": 0.06879094988107681, + "rewards/rejected": 1.252550482749939, + "step": 3190 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.8, + "learning_rate": 8.35349914948626e-06, + "logps/chosen": -153.10629272460938, + "logps/margins": -7.21134090423584, + "logps/rejected": -145.8949432373047, + "loss": 0.8203, + "rewards/chosen": 1.3600586652755737, + "rewards/margins": 0.21659858524799347, + "rewards/rejected": 1.1434600353240967, + "step": 3200 + }, + { + "accuracy": 0.637499988079071, + "epoch": 0.8, + "learning_rate": 8.34377837882698e-06, + "logps/chosen": -162.39382934570312, + "logps/margins": 7.168750762939453, + "logps/rejected": -169.5625762939453, + "loss": 0.7405, + "rewards/chosen": 1.3610525131225586, + "rewards/margins": 0.3161638379096985, + "rewards/rejected": 1.0448886156082153, + "step": 3210 + }, + { + "accuracy": 0.625, + "epoch": 0.81, + "learning_rate": 8.334034689903121e-06, + "logps/chosen": -160.90206909179688, + "logps/margins": -21.66839027404785, + "logps/rejected": -139.2336883544922, + "loss": 0.7945, + "rewards/chosen": 1.3712489604949951, + "rewards/margins": 0.28245019912719727, + "rewards/rejected": 1.0887987613677979, + "step": 3220 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.81, + "learning_rate": 8.324268149497954e-06, + "logps/chosen": -158.0965576171875, + "logps/margins": -7.221676826477051, + "logps/rejected": -150.8748779296875, + "loss": 0.7716, + "rewards/chosen": 1.2841920852661133, + "rewards/margins": 0.15127582848072052, + "rewards/rejected": 1.1329162120819092, + "step": 3230 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.81, + "learning_rate": 8.314478824551364e-06, + "logps/chosen": -128.3480987548828, + "logps/margins": 14.737256050109863, + "logps/rejected": -143.08535766601562, + "loss": 0.7549, + "rewards/chosen": 1.4126204252243042, + "rewards/margins": 0.26671385765075684, + "rewards/rejected": 1.1459064483642578, + "step": 3240 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.81, + "learning_rate": 8.30466678215941e-06, + "logps/chosen": -193.5419921875, + "logps/margins": -31.942550659179688, + "logps/rejected": -161.5994110107422, + "loss": 0.8155, + "rewards/chosen": 1.6444562673568726, + "rewards/margins": 0.03188382461667061, + "rewards/rejected": 1.612572431564331, + "step": 3250 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.81, + "learning_rate": 8.294832089573853e-06, + "logps/chosen": -161.07583618164062, + "logps/margins": -4.5086164474487305, + "logps/rejected": -156.5672149658203, + "loss": 0.7744, + "rewards/chosen": 1.5193272829055786, + "rewards/margins": 0.2577361464500427, + "rewards/rejected": 1.2615910768508911, + "step": 3260 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.82, + "learning_rate": 8.284974814201694e-06, + "logps/chosen": -146.56365966796875, + "logps/margins": 14.04448413848877, + "logps/rejected": -160.608154296875, + "loss": 0.8143, + "rewards/chosen": 1.751704216003418, + "rewards/margins": 0.15864914655685425, + "rewards/rejected": 1.5930547714233398, + "step": 3270 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.82, + "learning_rate": 8.275095023604724e-06, + "logps/chosen": -179.9011993408203, + "logps/margins": -13.970852851867676, + "logps/rejected": -165.93035888671875, + "loss": 0.7845, + "rewards/chosen": 1.7380592823028564, + "rewards/margins": 0.36463767290115356, + "rewards/rejected": 1.3734214305877686, + "step": 3280 + }, + { + "accuracy": 0.5, + "epoch": 0.82, + "learning_rate": 8.26519278549905e-06, + "logps/chosen": -169.5118865966797, + "logps/margins": 15.470805168151855, + "logps/rejected": -184.98269653320312, + "loss": 0.8802, + "rewards/chosen": 1.6773399114608765, + "rewards/margins": -0.0064588068053126335, + "rewards/rejected": 1.6837987899780273, + "step": 3290 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.82, + "learning_rate": 8.255268167754632e-06, + "logps/chosen": -138.21778869628906, + "logps/margins": -0.12071685492992401, + "logps/rejected": -138.09707641601562, + "loss": 0.7784, + "rewards/chosen": 1.7837855815887451, + "rewards/margins": 0.18200163543224335, + "rewards/rejected": 1.601784110069275, + "step": 3300 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.83, + "learning_rate": 8.245321238394827e-06, + "logps/chosen": -168.6961669921875, + "logps/margins": -19.171979904174805, + "logps/rejected": -149.52420043945312, + "loss": 0.7968, + "rewards/chosen": 2.0667340755462646, + "rewards/margins": 0.529453694820404, + "rewards/rejected": 1.5372803211212158, + "step": 3310 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.83, + "learning_rate": 8.235352065595908e-06, + "logps/chosen": -173.46902465820312, + "logps/margins": -6.0329108238220215, + "logps/rejected": -167.4361114501953, + "loss": 0.7435, + "rewards/chosen": 1.6746422052383423, + "rewards/margins": 0.10841004550457001, + "rewards/rejected": 1.5662320852279663, + "step": 3320 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.83, + "learning_rate": 8.225360717686606e-06, + "logps/chosen": -156.3230743408203, + "logps/margins": -6.365569114685059, + "logps/rejected": -149.95748901367188, + "loss": 0.7835, + "rewards/chosen": 1.6193897724151611, + "rewards/margins": 0.1265874207019806, + "rewards/rejected": 1.4928020238876343, + "step": 3330 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.83, + "learning_rate": 8.215347263147649e-06, + "logps/chosen": -210.0120086669922, + "logps/margins": -30.997217178344727, + "logps/rejected": -179.01480102539062, + "loss": 0.8086, + "rewards/chosen": 1.8879203796386719, + "rewards/margins": 0.15007278323173523, + "rewards/rejected": 1.7378475666046143, + "step": 3340 + }, + { + "accuracy": 0.5625, + "epoch": 0.84, + "learning_rate": 8.205311770611269e-06, + "logps/chosen": -149.6796417236328, + "logps/margins": 11.477659225463867, + "logps/rejected": -161.1573028564453, + "loss": 0.7589, + "rewards/chosen": 1.603755235671997, + "rewards/margins": 0.21250459551811218, + "rewards/rejected": 1.3912506103515625, + "step": 3350 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.84, + "learning_rate": 8.19525430886076e-06, + "logps/chosen": -175.27981567382812, + "logps/margins": -27.390121459960938, + "logps/rejected": -147.88967895507812, + "loss": 0.7069, + "rewards/chosen": 2.0394558906555176, + "rewards/margins": 0.36864930391311646, + "rewards/rejected": 1.6708062887191772, + "step": 3360 + }, + { + "accuracy": 0.637499988079071, + "epoch": 0.84, + "learning_rate": 8.185174946829986e-06, + "logps/chosen": -175.32638549804688, + "logps/margins": -30.108501434326172, + "logps/rejected": -145.2178955078125, + "loss": 0.734, + "rewards/chosen": 1.8845609426498413, + "rewards/margins": 0.43025127053260803, + "rewards/rejected": 1.4543099403381348, + "step": 3370 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.84, + "learning_rate": 8.175073753602918e-06, + "logps/chosen": -154.2449493408203, + "logps/margins": -1.7641388177871704, + "logps/rejected": -152.4807891845703, + "loss": 0.74, + "rewards/chosen": 1.7165447473526, + "rewards/margins": 0.28324785828590393, + "rewards/rejected": 1.433296799659729, + "step": 3380 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.85, + "learning_rate": 8.164950798413153e-06, + "logps/chosen": -153.0354461669922, + "logps/margins": 1.106896996498108, + "logps/rejected": -154.14234924316406, + "loss": 0.7639, + "rewards/chosen": 1.8354475498199463, + "rewards/margins": 0.21832025051116943, + "rewards/rejected": 1.6171271800994873, + "step": 3390 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.85, + "learning_rate": 8.154806150643456e-06, + "logps/chosen": -177.07888793945312, + "logps/margins": -6.0497541427612305, + "logps/rejected": -171.0291290283203, + "loss": 0.7419, + "rewards/chosen": 1.5892397165298462, + "rewards/margins": 0.21471253037452698, + "rewards/rejected": 1.3745272159576416, + "step": 3400 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.85, + "learning_rate": 8.144639879825262e-06, + "logps/chosen": -175.3135528564453, + "logps/margins": -18.72549057006836, + "logps/rejected": -156.5880584716797, + "loss": 0.7904, + "rewards/chosen": 2.0188117027282715, + "rewards/margins": 0.30751943588256836, + "rewards/rejected": 1.7112922668457031, + "step": 3410 + }, + { + "accuracy": 0.625, + "epoch": 0.85, + "learning_rate": 8.134452055638211e-06, + "logps/chosen": -165.5194854736328, + "logps/margins": -20.98834800720215, + "logps/rejected": -144.5311279296875, + "loss": 0.6977, + "rewards/chosen": 1.8513362407684326, + "rewards/margins": 0.3126828372478485, + "rewards/rejected": 1.5386536121368408, + "step": 3420 + }, + { + "accuracy": 0.5625, + "epoch": 0.86, + "learning_rate": 8.124242747909678e-06, + "logps/chosen": -186.35317993164062, + "logps/margins": -33.190338134765625, + "logps/rejected": -153.162841796875, + "loss": 0.7514, + "rewards/chosen": 2.014575719833374, + "rewards/margins": 0.35961395502090454, + "rewards/rejected": 1.6549618244171143, + "step": 3430 + }, + { + "accuracy": 0.5625, + "epoch": 0.86, + "learning_rate": 8.11401202661428e-06, + "logps/chosen": -154.21261596679688, + "logps/margins": 1.2508293390274048, + "logps/rejected": -155.46347045898438, + "loss": 0.7763, + "rewards/chosen": 1.75021493434906, + "rewards/margins": 0.11648325622081757, + "rewards/rejected": 1.6337318420410156, + "step": 3440 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.86, + "learning_rate": 8.103759961873403e-06, + "logps/chosen": -131.79953002929688, + "logps/margins": 12.465619087219238, + "logps/rejected": -144.26516723632812, + "loss": 0.7711, + "rewards/chosen": 1.8068921566009521, + "rewards/margins": -0.018017753958702087, + "rewards/rejected": 1.8249099254608154, + "step": 3450 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.86, + "learning_rate": 8.093486623954723e-06, + "logps/chosen": -146.01734924316406, + "logps/margins": 10.133278846740723, + "logps/rejected": -156.150634765625, + "loss": 0.7455, + "rewards/chosen": 1.706298828125, + "rewards/margins": 0.08658869564533234, + "rewards/rejected": 1.6197102069854736, + "step": 3460 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.87, + "learning_rate": 8.08319208327172e-06, + "logps/chosen": -191.7599639892578, + "logps/margins": 2.3573758602142334, + "logps/rejected": -194.11734008789062, + "loss": 0.7013, + "rewards/chosen": 1.9074863195419312, + "rewards/margins": 0.4116293787956238, + "rewards/rejected": 1.4958570003509521, + "step": 3470 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 0.87, + "learning_rate": 8.072876410383199e-06, + "logps/chosen": -147.97479248046875, + "logps/margins": -15.253326416015625, + "logps/rejected": -132.72146606445312, + "loss": 0.708, + "rewards/chosen": 1.3487708568572998, + "rewards/margins": 0.23281045258045197, + "rewards/rejected": 1.1159604787826538, + "step": 3480 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.87, + "learning_rate": 8.062539675992807e-06, + "logps/chosen": -138.44216918945312, + "logps/margins": 36.819217681884766, + "logps/rejected": -175.26136779785156, + "loss": 0.8048, + "rewards/chosen": 1.5128448009490967, + "rewards/margins": 0.09899057447910309, + "rewards/rejected": 1.4138542413711548, + "step": 3490 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.88, + "learning_rate": 8.052181950948544e-06, + "logps/chosen": -171.80516052246094, + "logps/margins": -51.980369567871094, + "logps/rejected": -119.8248062133789, + "loss": 0.7365, + "rewards/chosen": 1.331697702407837, + "rewards/margins": 0.1663578301668167, + "rewards/rejected": 1.1653398275375366, + "step": 3500 + }, + { + "accuracy": 0.612500011920929, + "epoch": 0.88, + "learning_rate": 8.041803306242282e-06, + "logps/chosen": -168.68783569335938, + "logps/margins": -8.771944046020508, + "logps/rejected": -159.91590881347656, + "loss": 0.8014, + "rewards/chosen": 1.6454660892486572, + "rewards/margins": 0.2971499562263489, + "rewards/rejected": 1.3483160734176636, + "step": 3510 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.88, + "learning_rate": 8.031403813009273e-06, + "logps/chosen": -150.67105102539062, + "logps/margins": 19.517623901367188, + "logps/rejected": -170.1886749267578, + "loss": 0.8019, + "rewards/chosen": 2.0029842853546143, + "rewards/margins": 0.2589420974254608, + "rewards/rejected": 1.744042158126831, + "step": 3520 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.88, + "learning_rate": 8.020983542527669e-06, + "logps/chosen": -161.2472381591797, + "logps/margins": 6.7901434898376465, + "logps/rejected": -168.03738403320312, + "loss": 0.7681, + "rewards/chosen": 1.5617696046829224, + "rewards/margins": 0.023355094715952873, + "rewards/rejected": 1.5384143590927124, + "step": 3530 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.89, + "learning_rate": 8.010542566218026e-06, + "logps/chosen": -125.4128189086914, + "logps/margins": 8.50660514831543, + "logps/rejected": -133.91943359375, + "loss": 0.7969, + "rewards/chosen": 1.4624067544937134, + "rewards/margins": 0.07958179712295532, + "rewards/rejected": 1.3828251361846924, + "step": 3540 + }, + { + "accuracy": 0.625, + "epoch": 0.89, + "learning_rate": 8.000080955642819e-06, + "logps/chosen": -186.24436950683594, + "logps/margins": -36.11909103393555, + "logps/rejected": -150.12530517578125, + "loss": 0.7742, + "rewards/chosen": 1.564686894416809, + "rewards/margins": 0.33360210061073303, + "rewards/rejected": 1.2310845851898193, + "step": 3550 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.89, + "learning_rate": 7.989598782505954e-06, + "logps/chosen": -143.9381866455078, + "logps/margins": 32.61821746826172, + "logps/rejected": -176.55642700195312, + "loss": 0.838, + "rewards/chosen": 1.272360920906067, + "rewards/margins": -0.05570871755480766, + "rewards/rejected": 1.3280696868896484, + "step": 3560 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.89, + "learning_rate": 7.979096118652267e-06, + "logps/chosen": -183.2843780517578, + "logps/margins": 1.9387588500976562, + "logps/rejected": -185.22312927246094, + "loss": 0.7738, + "rewards/chosen": 1.6671069860458374, + "rewards/margins": 0.4146422743797302, + "rewards/rejected": 1.252464771270752, + "step": 3570 + }, + { + "accuracy": 0.5625, + "epoch": 0.9, + "learning_rate": 7.96857303606704e-06, + "logps/chosen": -153.29017639160156, + "logps/margins": -39.799129486083984, + "logps/rejected": -113.49104309082031, + "loss": 0.7831, + "rewards/chosen": 1.3374580144882202, + "rewards/margins": 0.10456130653619766, + "rewards/rejected": 1.2328965663909912, + "step": 3580 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.9, + "learning_rate": 7.958029606875503e-06, + "logps/chosen": -128.26589965820312, + "logps/margins": 15.221063613891602, + "logps/rejected": -143.48695373535156, + "loss": 0.7696, + "rewards/chosen": 1.7023261785507202, + "rewards/margins": 0.2918190360069275, + "rewards/rejected": 1.4105072021484375, + "step": 3590 + }, + { + "accuracy": 0.5, + "epoch": 0.9, + "learning_rate": 7.947465903342348e-06, + "logps/chosen": -153.58782958984375, + "logps/margins": 15.419158935546875, + "logps/rejected": -169.00698852539062, + "loss": 0.8004, + "rewards/chosen": 1.6826026439666748, + "rewards/margins": -0.01102372445166111, + "rewards/rejected": 1.6936264038085938, + "step": 3600 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.9, + "learning_rate": 7.936881997871217e-06, + "logps/chosen": -138.57852172851562, + "logps/margins": -13.574409484863281, + "logps/rejected": -125.00411224365234, + "loss": 0.7191, + "rewards/chosen": 1.5303868055343628, + "rewards/margins": 0.2576327919960022, + "rewards/rejected": 1.2727539539337158, + "step": 3610 + }, + { + "accuracy": 0.625, + "epoch": 0.91, + "learning_rate": 7.926277963004227e-06, + "logps/chosen": -185.51663208007812, + "logps/margins": -20.25797462463379, + "logps/rejected": -165.25863647460938, + "loss": 0.7274, + "rewards/chosen": 2.054831027984619, + "rewards/margins": 0.3326185345649719, + "rewards/rejected": 1.7222124338150024, + "step": 3620 + }, + { + "accuracy": 0.5625, + "epoch": 0.91, + "learning_rate": 7.915653871421458e-06, + "logps/chosen": -152.35568237304688, + "logps/margins": 16.110992431640625, + "logps/rejected": -168.46670532226562, + "loss": 0.7801, + "rewards/chosen": 1.678218126296997, + "rewards/margins": 0.03800428658723831, + "rewards/rejected": 1.640213966369629, + "step": 3630 + }, + { + "accuracy": 0.612500011920929, + "epoch": 0.91, + "learning_rate": 7.905009795940451e-06, + "logps/chosen": -157.21725463867188, + "logps/margins": 0.5339088439941406, + "logps/rejected": -157.7511444091797, + "loss": 0.771, + "rewards/chosen": 1.8164851665496826, + "rewards/margins": 0.3296557068824768, + "rewards/rejected": 1.4868295192718506, + "step": 3640 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.91, + "learning_rate": 7.894345809515728e-06, + "logps/chosen": -143.79592895507812, + "logps/margins": -19.141103744506836, + "logps/rejected": -124.65482330322266, + "loss": 0.747, + "rewards/chosen": 1.6596828699111938, + "rewards/margins": 0.2845556139945984, + "rewards/rejected": 1.3751273155212402, + "step": 3650 + }, + { + "accuracy": 0.612500011920929, + "epoch": 0.92, + "learning_rate": 7.883661985238277e-06, + "logps/chosen": -142.795166015625, + "logps/margins": 24.80935287475586, + "logps/rejected": -167.60450744628906, + "loss": 0.7595, + "rewards/chosen": 1.5907258987426758, + "rewards/margins": 0.24582405388355255, + "rewards/rejected": 1.3449019193649292, + "step": 3660 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.92, + "learning_rate": 7.872958396335052e-06, + "logps/chosen": -144.52029418945312, + "logps/margins": 11.030893325805664, + "logps/rejected": -155.55117797851562, + "loss": 0.8323, + "rewards/chosen": 1.3688607215881348, + "rewards/margins": -0.032265789806842804, + "rewards/rejected": 1.4011263847351074, + "step": 3670 + }, + { + "accuracy": 0.5, + "epoch": 0.92, + "learning_rate": 7.862235116168476e-06, + "logps/chosen": -162.84774780273438, + "logps/margins": 13.034228324890137, + "logps/rejected": -175.8819580078125, + "loss": 0.7692, + "rewards/chosen": 1.3408589363098145, + "rewards/margins": -0.12653210759162903, + "rewards/rejected": 1.467391014099121, + "step": 3680 + }, + { + "accuracy": 0.5625, + "epoch": 0.92, + "learning_rate": 7.851492218235936e-06, + "logps/chosen": -145.5940704345703, + "logps/margins": -8.614776611328125, + "logps/rejected": -136.97927856445312, + "loss": 0.8084, + "rewards/chosen": 1.659777283668518, + "rewards/margins": 0.23170706629753113, + "rewards/rejected": 1.428070306777954, + "step": 3690 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.93, + "learning_rate": 7.840729776169277e-06, + "logps/chosen": -144.14027404785156, + "logps/margins": 8.698692321777344, + "logps/rejected": -152.83897399902344, + "loss": 0.753, + "rewards/chosen": 1.6009562015533447, + "rewards/margins": 0.2810685634613037, + "rewards/rejected": 1.319887638092041, + "step": 3700 + }, + { + "accuracy": 0.625, + "epoch": 0.93, + "learning_rate": 7.829947863734302e-06, + "logps/chosen": -148.00088500976562, + "logps/margins": -4.441626071929932, + "logps/rejected": -143.55926513671875, + "loss": 0.6968, + "rewards/chosen": 1.6761270761489868, + "rewards/margins": 0.43606337904930115, + "rewards/rejected": 1.2400636672973633, + "step": 3710 + }, + { + "accuracy": 0.5625, + "epoch": 0.93, + "learning_rate": 7.819146554830265e-06, + "logps/chosen": -177.2362060546875, + "logps/margins": -36.520179748535156, + "logps/rejected": -140.7160186767578, + "loss": 0.7705, + "rewards/chosen": 1.4532076120376587, + "rewards/margins": 0.19381985068321228, + "rewards/rejected": 1.259387731552124, + "step": 3720 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.93, + "learning_rate": 7.808325923489361e-06, + "logps/chosen": -180.3607635498047, + "logps/margins": -6.179944038391113, + "logps/rejected": -174.18081665039062, + "loss": 0.7403, + "rewards/chosen": 2.095268487930298, + "rewards/margins": 0.3455480635166168, + "rewards/rejected": 1.7497203350067139, + "step": 3730 + }, + { + "accuracy": 0.625, + "epoch": 0.94, + "learning_rate": 7.79748604387622e-06, + "logps/chosen": -145.1133575439453, + "logps/margins": -23.09562110900879, + "logps/rejected": -122.01773834228516, + "loss": 0.7348, + "rewards/chosen": 1.813936471939087, + "rewards/margins": 0.3782232701778412, + "rewards/rejected": 1.4357131719589233, + "step": 3740 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.94, + "learning_rate": 7.786626990287403e-06, + "logps/chosen": -160.62728881835938, + "logps/margins": -16.29179573059082, + "logps/rejected": -144.33551025390625, + "loss": 0.7393, + "rewards/chosen": 1.7499898672103882, + "rewards/margins": 0.27588844299316406, + "rewards/rejected": 1.4741013050079346, + "step": 3750 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 0.94, + "learning_rate": 7.775748837150887e-06, + "logps/chosen": -138.15023803710938, + "logps/margins": 11.254546165466309, + "logps/rejected": -149.40476989746094, + "loss": 0.7397, + "rewards/chosen": 1.5744158029556274, + "rewards/margins": -0.11831261217594147, + "rewards/rejected": 1.6927284002304077, + "step": 3760 + }, + { + "accuracy": 0.5625, + "epoch": 0.94, + "learning_rate": 7.764851659025557e-06, + "logps/chosen": -182.69862365722656, + "logps/margins": -28.24844741821289, + "logps/rejected": -154.45018005371094, + "loss": 0.8041, + "rewards/chosen": 1.814875602722168, + "rewards/margins": 0.14602433145046234, + "rewards/rejected": 1.6688512563705444, + "step": 3770 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.94, + "learning_rate": 7.753935530600701e-06, + "logps/chosen": -148.72219848632812, + "logps/margins": 26.870590209960938, + "logps/rejected": -175.59280395507812, + "loss": 0.7701, + "rewards/chosen": 1.6487045288085938, + "rewards/margins": 0.2798656225204468, + "rewards/rejected": 1.3688390254974365, + "step": 3780 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.95, + "learning_rate": 7.743000526695483e-06, + "logps/chosen": -154.10372924804688, + "logps/margins": -19.46083641052246, + "logps/rejected": -134.6428985595703, + "loss": 0.814, + "rewards/chosen": 1.5845978260040283, + "rewards/margins": 0.22639739513397217, + "rewards/rejected": 1.3582004308700562, + "step": 3790 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.95, + "learning_rate": 7.732046722258449e-06, + "logps/chosen": -128.5774383544922, + "logps/margins": 44.12200927734375, + "logps/rejected": -172.69947814941406, + "loss": 0.7305, + "rewards/chosen": 1.4345492124557495, + "rewards/margins": 0.1706922948360443, + "rewards/rejected": 1.2638570070266724, + "step": 3800 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.95, + "learning_rate": 7.721074192366995e-06, + "logps/chosen": -182.56686401367188, + "logps/margins": -63.1977424621582, + "logps/rejected": -119.369140625, + "loss": 0.753, + "rewards/chosen": 1.6460587978363037, + "rewards/margins": 0.32916659116744995, + "rewards/rejected": 1.3168920278549194, + "step": 3810 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.95, + "learning_rate": 7.710083012226867e-06, + "logps/chosen": -153.7138671875, + "logps/margins": 25.897411346435547, + "logps/rejected": -179.61126708984375, + "loss": 0.7474, + "rewards/chosen": 1.5397789478302002, + "rewards/margins": 0.23249849677085876, + "rewards/rejected": 1.3072805404663086, + "step": 3820 + }, + { + "accuracy": 0.637499988079071, + "epoch": 0.96, + "learning_rate": 7.69907325717164e-06, + "logps/chosen": -141.46884155273438, + "logps/margins": -4.953783988952637, + "logps/rejected": -136.51507568359375, + "loss": 0.6873, + "rewards/chosen": 1.8768560886383057, + "rewards/margins": 0.4181802272796631, + "rewards/rejected": 1.4586756229400635, + "step": 3830 + }, + { + "accuracy": 0.625, + "epoch": 0.96, + "learning_rate": 7.6880450026622e-06, + "logps/chosen": -154.4681854248047, + "logps/margins": 18.503726959228516, + "logps/rejected": -172.97189331054688, + "loss": 0.7152, + "rewards/chosen": 1.681918740272522, + "rewards/margins": 0.39809325337409973, + "rewards/rejected": 1.2838256359100342, + "step": 3840 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.96, + "learning_rate": 7.676998324286227e-06, + "logps/chosen": -146.54550170898438, + "logps/margins": 2.264216899871826, + "logps/rejected": -148.80972290039062, + "loss": 0.7201, + "rewards/chosen": 1.6331093311309814, + "rewards/margins": 0.2702735960483551, + "rewards/rejected": 1.3628356456756592, + "step": 3850 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.96, + "learning_rate": 7.665933297757681e-06, + "logps/chosen": -188.87542724609375, + "logps/margins": -4.7278594970703125, + "logps/rejected": -184.14756774902344, + "loss": 0.8157, + "rewards/chosen": 1.738433599472046, + "rewards/margins": 0.28007808327674866, + "rewards/rejected": 1.4583555459976196, + "step": 3860 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.97, + "learning_rate": 7.654849998916279e-06, + "logps/chosen": -137.9560546875, + "logps/margins": 15.675437927246094, + "logps/rejected": -153.63150024414062, + "loss": 0.7777, + "rewards/chosen": 1.5735784769058228, + "rewards/margins": 0.2500787377357483, + "rewards/rejected": 1.3234997987747192, + "step": 3870 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.97, + "learning_rate": 7.643748503726972e-06, + "logps/chosen": -148.77391052246094, + "logps/margins": 11.068714141845703, + "logps/rejected": -159.84262084960938, + "loss": 0.7857, + "rewards/chosen": 1.5443099737167358, + "rewards/margins": 0.19855117797851562, + "rewards/rejected": 1.3457587957382202, + "step": 3880 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 0.97, + "learning_rate": 7.63262888827944e-06, + "logps/chosen": -172.26815795898438, + "logps/margins": -22.975425720214844, + "logps/rejected": -149.29270935058594, + "loss": 0.7007, + "rewards/chosen": 1.791285753250122, + "rewards/margins": 0.5659340023994446, + "rewards/rejected": 1.2253516912460327, + "step": 3890 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.97, + "learning_rate": 7.621491228787549e-06, + "logps/chosen": -135.41494750976562, + "logps/margins": 11.223118782043457, + "logps/rejected": -146.63807678222656, + "loss": 0.749, + "rewards/chosen": 1.467739462852478, + "rewards/margins": 0.27108117938041687, + "rewards/rejected": 1.1966582536697388, + "step": 3900 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.98, + "learning_rate": 7.610335601588841e-06, + "logps/chosen": -139.9392547607422, + "logps/margins": -2.3184967041015625, + "logps/rejected": -137.62075805664062, + "loss": 0.72, + "rewards/chosen": 1.1532618999481201, + "rewards/margins": 0.17669668793678284, + "rewards/rejected": 0.9765651822090149, + "step": 3910 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.98, + "learning_rate": 7.5991620831440115e-06, + "logps/chosen": -151.81661987304688, + "logps/margins": -13.098960876464844, + "logps/rejected": -138.7176513671875, + "loss": 0.7655, + "rewards/chosen": 1.327757716178894, + "rewards/margins": 0.21372659504413605, + "rewards/rejected": 1.1140310764312744, + "step": 3920 + }, + { + "accuracy": 0.5, + "epoch": 0.98, + "learning_rate": 7.587970750036382e-06, + "logps/chosen": -139.4608917236328, + "logps/margins": 43.11005401611328, + "logps/rejected": -182.57093811035156, + "loss": 0.7331, + "rewards/chosen": 1.573331594467163, + "rewards/margins": 0.09147181361913681, + "rewards/rejected": 1.481859803199768, + "step": 3930 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.98, + "learning_rate": 7.576761678971374e-06, + "logps/chosen": -185.39028930664062, + "logps/margins": -18.38593292236328, + "logps/rejected": -167.00436401367188, + "loss": 0.7898, + "rewards/chosen": 1.3942126035690308, + "rewards/margins": -0.03146573528647423, + "rewards/rejected": 1.4256784915924072, + "step": 3940 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.99, + "learning_rate": 7.565534946775987e-06, + "logps/chosen": -149.90420532226562, + "logps/margins": 9.147459030151367, + "logps/rejected": -159.05166625976562, + "loss": 0.7562, + "rewards/chosen": 1.4092555046081543, + "rewards/margins": 0.2562856078147888, + "rewards/rejected": 1.1529698371887207, + "step": 3950 + }, + { + "accuracy": 0.625, + "epoch": 0.99, + "learning_rate": 7.554290630398266e-06, + "logps/chosen": -192.16375732421875, + "logps/margins": -16.23194694519043, + "logps/rejected": -175.93179321289062, + "loss": 0.7734, + "rewards/chosen": 1.9337066411972046, + "rewards/margins": 0.36547115445137024, + "rewards/rejected": 1.5682355165481567, + "step": 3960 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.99, + "learning_rate": 7.5430288069067845e-06, + "logps/chosen": -148.0521240234375, + "logps/margins": 13.075075149536133, + "logps/rejected": -161.12718200683594, + "loss": 0.7528, + "rewards/chosen": 1.4726979732513428, + "rewards/margins": 0.19843187928199768, + "rewards/rejected": 1.2742661237716675, + "step": 3970 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.99, + "learning_rate": 7.531749553490104e-06, + "logps/chosen": -164.43051147460938, + "logps/margins": -4.897665977478027, + "logps/rejected": -159.53282165527344, + "loss": 0.904, + "rewards/chosen": 1.2066371440887451, + "rewards/margins": 0.09104237705469131, + "rewards/rejected": 1.115594744682312, + "step": 3980 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.0, + "learning_rate": 7.520452947456253e-06, + "logps/chosen": -136.290283203125, + "logps/margins": -15.671384811401367, + "logps/rejected": -120.618896484375, + "loss": 0.8051, + "rewards/chosen": 1.5731546878814697, + "rewards/margins": 0.22923961281776428, + "rewards/rejected": 1.3439149856567383, + "step": 3990 + }, + { + "accuracy": 0.637499988079071, + "epoch": 1.0, + "learning_rate": 7.509139066232199e-06, + "logps/chosen": -177.18353271484375, + "logps/margins": -12.226296424865723, + "logps/rejected": -164.95724487304688, + "loss": 0.7727, + "rewards/chosen": 1.8766725063323975, + "rewards/margins": 0.3457058370113373, + "rewards/rejected": 1.5309667587280273, + "step": 4000 + }, + { + "accuracy": 0.42500001192092896, + "epoch": 1.0, + "learning_rate": 7.497807987363302e-06, + "logps/chosen": -174.06900024414062, + "logps/margins": 15.997894287109375, + "logps/rejected": -190.06689453125, + "loss": 0.7832, + "rewards/chosen": 1.6984426975250244, + "rewards/margins": 0.02764594554901123, + "rewards/rejected": 1.6707966327667236, + "step": 4010 + }, + { + "accuracy": 0.5625, + "epoch": 1.0, + "learning_rate": 7.486459788512807e-06, + "logps/chosen": -172.26309204101562, + "logps/margins": 13.097381591796875, + "logps/rejected": -185.3604736328125, + "loss": 0.7865, + "rewards/chosen": 1.7627757787704468, + "rewards/margins": 0.2671945095062256, + "rewards/rejected": 1.4955812692642212, + "step": 4020 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.01, + "learning_rate": 7.475094547461292e-06, + "logps/chosen": -160.47525024414062, + "logps/margins": -0.2944812774658203, + "logps/rejected": -160.18075561523438, + "loss": 0.737, + "rewards/chosen": 1.5085080862045288, + "rewards/margins": 0.24001213908195496, + "rewards/rejected": 1.2684959173202515, + "step": 4030 + }, + { + "accuracy": 0.637499988079071, + "epoch": 1.01, + "learning_rate": 7.46371234210615e-06, + "logps/chosen": -167.26171875, + "logps/margins": 2.0640063285827637, + "logps/rejected": -169.3257293701172, + "loss": 0.7642, + "rewards/chosen": 1.8065824508666992, + "rewards/margins": 0.19954144954681396, + "rewards/rejected": 1.6070410013198853, + "step": 4040 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.01, + "learning_rate": 7.4523132504610385e-06, + "logps/chosen": -176.74020385742188, + "logps/margins": 2.258197546005249, + "logps/rejected": -178.9984130859375, + "loss": 0.7187, + "rewards/chosen": 1.6201982498168945, + "rewards/margins": 0.30109286308288574, + "rewards/rejected": 1.3191055059432983, + "step": 4050 + }, + { + "accuracy": 0.637499988079071, + "epoch": 1.01, + "learning_rate": 7.440897350655356e-06, + "logps/chosen": -169.9674530029297, + "logps/margins": 3.152961015701294, + "logps/rejected": -173.12039184570312, + "loss": 0.686, + "rewards/chosen": 1.5426915884017944, + "rewards/margins": 0.5120700001716614, + "rewards/rejected": 1.0306216478347778, + "step": 4060 + }, + { + "accuracy": 0.5, + "epoch": 1.02, + "learning_rate": 7.429464720933708e-06, + "logps/chosen": -172.28445434570312, + "logps/margins": 5.591904640197754, + "logps/rejected": -177.8763427734375, + "loss": 0.7539, + "rewards/chosen": 1.620485544204712, + "rewards/margins": 0.15299120545387268, + "rewards/rejected": 1.467494249343872, + "step": 4070 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.02, + "learning_rate": 7.4180154396553635e-06, + "logps/chosen": -182.07839965820312, + "logps/margins": -23.49332046508789, + "logps/rejected": -158.5850830078125, + "loss": 0.734, + "rewards/chosen": 1.6804590225219727, + "rewards/margins": 0.20921507477760315, + "rewards/rejected": 1.4712437391281128, + "step": 4080 + }, + { + "accuracy": 0.6625000238418579, + "epoch": 1.02, + "learning_rate": 7.406549585293723e-06, + "logps/chosen": -141.35646057128906, + "logps/margins": -2.136355400085449, + "logps/rejected": -139.2200927734375, + "loss": 0.6839, + "rewards/chosen": 1.5659420490264893, + "rewards/margins": 0.4100092053413391, + "rewards/rejected": 1.155932903289795, + "step": 4090 + }, + { + "accuracy": 0.512499988079071, + "epoch": 1.02, + "learning_rate": 7.395067236435779e-06, + "logps/chosen": -166.07357788085938, + "logps/margins": -18.360103607177734, + "logps/rejected": -147.71347045898438, + "loss": 0.7753, + "rewards/chosen": 1.3077415227890015, + "rewards/margins": -0.0014196217525750399, + "rewards/rejected": 1.3091611862182617, + "step": 4100 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.03, + "learning_rate": 7.383568471781575e-06, + "logps/chosen": -154.1851043701172, + "logps/margins": -15.830960273742676, + "logps/rejected": -138.35415649414062, + "loss": 0.7385, + "rewards/chosen": 1.4739139080047607, + "rewards/margins": 0.2409222573041916, + "rewards/rejected": 1.2329918146133423, + "step": 4110 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.03, + "learning_rate": 7.372053370143671e-06, + "logps/chosen": -171.49594116210938, + "logps/margins": -2.2147507667541504, + "logps/rejected": -169.2811737060547, + "loss": 0.7531, + "rewards/chosen": 1.4803273677825928, + "rewards/margins": 0.22889061272144318, + "rewards/rejected": 1.251436710357666, + "step": 4120 + }, + { + "accuracy": 0.4375, + "epoch": 1.03, + "learning_rate": 7.360522010446598e-06, + "logps/chosen": -172.559814453125, + "logps/margins": -4.311659812927246, + "logps/rejected": -168.2481689453125, + "loss": 0.7692, + "rewards/chosen": 1.453960657119751, + "rewards/margins": 0.009570717811584473, + "rewards/rejected": 1.4443899393081665, + "step": 4130 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.03, + "learning_rate": 7.348974471726324e-06, + "logps/chosen": -169.0891876220703, + "logps/margins": 14.942840576171875, + "logps/rejected": -184.03204345703125, + "loss": 0.7398, + "rewards/chosen": 2.0780797004699707, + "rewards/margins": 0.4016752243041992, + "rewards/rejected": 1.676404595375061, + "step": 4140 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 1.04, + "learning_rate": 7.337410833129702e-06, + "logps/chosen": -177.57479858398438, + "logps/margins": -49.22270584106445, + "logps/rejected": -128.35211181640625, + "loss": 0.7239, + "rewards/chosen": 1.6473169326782227, + "rewards/margins": 0.14460408687591553, + "rewards/rejected": 1.5027129650115967, + "step": 4150 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.04, + "learning_rate": 7.325831173913941e-06, + "logps/chosen": -146.4510955810547, + "logps/margins": -3.8535125255584717, + "logps/rejected": -142.59756469726562, + "loss": 0.7712, + "rewards/chosen": 1.5185668468475342, + "rewards/margins": 0.22370903193950653, + "rewards/rejected": 1.2948578596115112, + "step": 4160 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.04, + "learning_rate": 7.3142355734460505e-06, + "logps/chosen": -149.79026794433594, + "logps/margins": 1.4765808582305908, + "logps/rejected": -151.26686096191406, + "loss": 0.7457, + "rewards/chosen": 1.6303132772445679, + "rewards/margins": 0.054227955639362335, + "rewards/rejected": 1.5760853290557861, + "step": 4170 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 1.04, + "learning_rate": 7.3026241112023e-06, + "logps/chosen": -144.48402404785156, + "logps/margins": 1.231209397315979, + "logps/rejected": -145.71524047851562, + "loss": 0.7475, + "rewards/chosen": 1.5024534463882446, + "rewards/margins": 0.03385138511657715, + "rewards/rejected": 1.4686022996902466, + "step": 4180 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.05, + "learning_rate": 7.290996866767679e-06, + "logps/chosen": -164.1485137939453, + "logps/margins": -20.309356689453125, + "logps/rejected": -143.8391571044922, + "loss": 0.7513, + "rewards/chosen": 1.7277233600616455, + "rewards/margins": 0.5337013602256775, + "rewards/rejected": 1.1940219402313232, + "step": 4190 + }, + { + "accuracy": 0.612500011920929, + "epoch": 1.05, + "learning_rate": 7.279353919835349e-06, + "logps/chosen": -177.1483612060547, + "logps/margins": -28.922061920166016, + "logps/rejected": -148.226318359375, + "loss": 0.6995, + "rewards/chosen": 1.7972309589385986, + "rewards/margins": 0.4310983717441559, + "rewards/rejected": 1.366132378578186, + "step": 4200 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 1.05, + "learning_rate": 7.267695350206094e-06, + "logps/chosen": -144.38973999023438, + "logps/margins": 10.877660751342773, + "logps/rejected": -155.2673797607422, + "loss": 0.8322, + "rewards/chosen": 1.438655138015747, + "rewards/margins": 0.23612526059150696, + "rewards/rejected": 1.2025299072265625, + "step": 4210 + }, + { + "accuracy": 0.699999988079071, + "epoch": 1.05, + "learning_rate": 7.256021237787775e-06, + "logps/chosen": -155.82058715820312, + "logps/margins": 0.8385831117630005, + "logps/rejected": -156.65916442871094, + "loss": 0.728, + "rewards/chosen": 1.9981924295425415, + "rewards/margins": 0.5415070652961731, + "rewards/rejected": 1.4566853046417236, + "step": 4220 + }, + { + "accuracy": 0.44999998807907104, + "epoch": 1.06, + "learning_rate": 7.244331662594787e-06, + "logps/chosen": -167.79994201660156, + "logps/margins": -6.7847795486450195, + "logps/rejected": -161.01515197753906, + "loss": 0.8022, + "rewards/chosen": 1.7649271488189697, + "rewards/margins": 0.01399625837802887, + "rewards/rejected": 1.7509310245513916, + "step": 4230 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.06, + "learning_rate": 7.232626704747502e-06, + "logps/chosen": -125.04264831542969, + "logps/margins": 12.3101806640625, + "logps/rejected": -137.3528289794922, + "loss": 0.7487, + "rewards/chosen": 1.5529954433441162, + "rewards/margins": 0.21140269935131073, + "rewards/rejected": 1.341592788696289, + "step": 4240 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 1.06, + "learning_rate": 7.22090644447173e-06, + "logps/chosen": -137.6590118408203, + "logps/margins": 24.768798828125, + "logps/rejected": -162.42779541015625, + "loss": 0.8526, + "rewards/chosen": 1.300793170928955, + "rewards/margins": -0.022921044379472733, + "rewards/rejected": 1.323714256286621, + "step": 4250 + }, + { + "accuracy": 0.5625, + "epoch": 1.06, + "learning_rate": 7.209170962098161e-06, + "logps/chosen": -157.95462036132812, + "logps/margins": -33.26263427734375, + "logps/rejected": -124.69200134277344, + "loss": 0.7308, + "rewards/chosen": 1.5474984645843506, + "rewards/margins": 0.1565721333026886, + "rewards/rejected": 1.3909262418746948, + "step": 4260 + }, + { + "accuracy": 0.625, + "epoch": 1.07, + "learning_rate": 7.197420338061818e-06, + "logps/chosen": -163.55955505371094, + "logps/margins": -10.182560920715332, + "logps/rejected": -153.3769989013672, + "loss": 0.7416, + "rewards/chosen": 1.7411181926727295, + "rewards/margins": 0.4385937750339508, + "rewards/rejected": 1.3025243282318115, + "step": 4270 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 1.07, + "learning_rate": 7.1856546529015055e-06, + "logps/chosen": -123.6063003540039, + "logps/margins": 13.551968574523926, + "logps/rejected": -137.1582794189453, + "loss": 0.7675, + "rewards/chosen": 1.3951785564422607, + "rewards/margins": 0.17083539068698883, + "rewards/rejected": 1.2243432998657227, + "step": 4280 + }, + { + "accuracy": 0.612500011920929, + "epoch": 1.07, + "learning_rate": 7.173873987259254e-06, + "logps/chosen": -179.62075805664062, + "logps/margins": -38.27435302734375, + "logps/rejected": -141.34640502929688, + "loss": 0.7412, + "rewards/chosen": 1.5116660594940186, + "rewards/margins": 0.26735085248947144, + "rewards/rejected": 1.2443150281906128, + "step": 4290 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.07, + "learning_rate": 7.1620784218797724e-06, + "logps/chosen": -170.80270385742188, + "logps/margins": -0.8492755889892578, + "logps/rejected": -169.95346069335938, + "loss": 0.7269, + "rewards/chosen": 1.8155040740966797, + "rewards/margins": 0.3146992325782776, + "rewards/rejected": 1.5008049011230469, + "step": 4300 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.08, + "learning_rate": 7.150268037609893e-06, + "logps/chosen": -145.50621032714844, + "logps/margins": -4.444832801818848, + "logps/rejected": -141.06137084960938, + "loss": 0.743, + "rewards/chosen": 1.6890052556991577, + "rewards/margins": 0.23061080276966095, + "rewards/rejected": 1.4583944082260132, + "step": 4310 + }, + { + "accuracy": 0.625, + "epoch": 1.08, + "learning_rate": 7.138442915398021e-06, + "logps/chosen": -149.55722045898438, + "logps/margins": -9.289469718933105, + "logps/rejected": -140.2677459716797, + "loss": 0.7399, + "rewards/chosen": 1.3576009273529053, + "rewards/margins": 0.11820483207702637, + "rewards/rejected": 1.239396095275879, + "step": 4320 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 1.08, + "learning_rate": 7.126603136293564e-06, + "logps/chosen": -181.40245056152344, + "logps/margins": -5.146039009094238, + "logps/rejected": -176.2564239501953, + "loss": 0.7677, + "rewards/chosen": 1.7800486087799072, + "rewards/margins": -0.07062134891748428, + "rewards/rejected": 1.8506698608398438, + "step": 4330 + }, + { + "accuracy": 0.512499988079071, + "epoch": 1.08, + "learning_rate": 7.114748781446397e-06, + "logps/chosen": -163.51194763183594, + "logps/margins": -5.786983489990234, + "logps/rejected": -157.7249755859375, + "loss": 0.7623, + "rewards/chosen": 1.6984193325042725, + "rewards/margins": 0.06969909369945526, + "rewards/rejected": 1.6287205219268799, + "step": 4340 + }, + { + "accuracy": 0.5, + "epoch": 1.09, + "learning_rate": 7.102879932106296e-06, + "logps/chosen": -161.2931365966797, + "logps/margins": 5.0004563331604, + "logps/rejected": -166.29360961914062, + "loss": 0.7716, + "rewards/chosen": 1.6429948806762695, + "rewards/margins": -0.09241975098848343, + "rewards/rejected": 1.735414743423462, + "step": 4350 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 1.09, + "learning_rate": 7.090996669622381e-06, + "logps/chosen": -154.58047485351562, + "logps/margins": -6.111647129058838, + "logps/rejected": -148.46884155273438, + "loss": 0.7228, + "rewards/chosen": 1.878414511680603, + "rewards/margins": 0.49233540892601013, + "rewards/rejected": 1.3860793113708496, + "step": 4360 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 1.09, + "learning_rate": 7.079099075442559e-06, + "logps/chosen": -169.3294677734375, + "logps/margins": -27.80454444885254, + "logps/rejected": -141.52493286132812, + "loss": 0.7339, + "rewards/chosen": 1.7300500869750977, + "rewards/margins": 0.06426367908716202, + "rewards/rejected": 1.6657863855361938, + "step": 4370 + }, + { + "accuracy": 0.6625000238418579, + "epoch": 1.09, + "learning_rate": 7.067187231112969e-06, + "logps/chosen": -134.58961486816406, + "logps/margins": -16.743486404418945, + "logps/rejected": -117.84611511230469, + "loss": 0.6995, + "rewards/chosen": 1.6379743814468384, + "rewards/margins": 0.5354331135749817, + "rewards/rejected": 1.102541208267212, + "step": 4380 + }, + { + "accuracy": 0.699999988079071, + "epoch": 1.1, + "learning_rate": 7.055261218277418e-06, + "logps/chosen": -149.31375122070312, + "logps/margins": -3.707923412322998, + "logps/rejected": -145.6058349609375, + "loss": 0.67, + "rewards/chosen": 1.926243543624878, + "rewards/margins": 0.724867582321167, + "rewards/rejected": 1.2013760805130005, + "step": 4390 + }, + { + "accuracy": 0.637499988079071, + "epoch": 1.1, + "learning_rate": 7.043321118676826e-06, + "logps/chosen": -187.04171752929688, + "logps/margins": -15.380317687988281, + "logps/rejected": -171.66140747070312, + "loss": 0.6769, + "rewards/chosen": 1.7952638864517212, + "rewards/margins": 0.5859988927841187, + "rewards/rejected": 1.2092649936676025, + "step": 4400 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.1, + "learning_rate": 7.03136701414866e-06, + "logps/chosen": -156.5198516845703, + "logps/margins": -12.496341705322266, + "logps/rejected": -144.02349853515625, + "loss": 0.7027, + "rewards/chosen": 1.772512674331665, + "rewards/margins": 0.2056950330734253, + "rewards/rejected": 1.5668176412582397, + "step": 4410 + }, + { + "accuracy": 0.5625, + "epoch": 1.1, + "learning_rate": 7.019398986626381e-06, + "logps/chosen": -163.7215118408203, + "logps/margins": -28.276840209960938, + "logps/rejected": -135.44467163085938, + "loss": 0.7138, + "rewards/chosen": 1.6778713464736938, + "rewards/margins": 0.12970879673957825, + "rewards/rejected": 1.548162579536438, + "step": 4420 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 1.11, + "learning_rate": 7.007417118138879e-06, + "logps/chosen": -154.2147979736328, + "logps/margins": -1.8716331720352173, + "logps/rejected": -152.34317016601562, + "loss": 0.7362, + "rewards/chosen": 1.58733332157135, + "rewards/margins": 0.19882899522781372, + "rewards/rejected": 1.3885042667388916, + "step": 4430 + }, + { + "accuracy": 0.612500011920929, + "epoch": 1.11, + "learning_rate": 6.9954214908099064e-06, + "logps/chosen": -141.78665161132812, + "logps/margins": -21.271657943725586, + "logps/rejected": -120.51497650146484, + "loss": 0.7502, + "rewards/chosen": 1.447582483291626, + "rewards/margins": 0.28070706129074097, + "rewards/rejected": 1.1668753623962402, + "step": 4440 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 1.11, + "learning_rate": 6.9834121868575165e-06, + "logps/chosen": -182.0108642578125, + "logps/margins": -37.0256233215332, + "logps/rejected": -144.98524475097656, + "loss": 0.717, + "rewards/chosen": 1.8737337589263916, + "rewards/margins": 0.47903576493263245, + "rewards/rejected": 1.394697666168213, + "step": 4450 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 1.11, + "learning_rate": 6.97138928859351e-06, + "logps/chosen": -147.23580932617188, + "logps/margins": 17.645111083984375, + "logps/rejected": -164.8809051513672, + "loss": 0.7096, + "rewards/chosen": 1.4505765438079834, + "rewards/margins": 0.0757400244474411, + "rewards/rejected": 1.3748365640640259, + "step": 4460 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 1.12, + "learning_rate": 6.959352878422856e-06, + "logps/chosen": -186.58334350585938, + "logps/margins": -21.638961791992188, + "logps/rejected": -164.94439697265625, + "loss": 0.6958, + "rewards/chosen": 1.5360299348831177, + "rewards/margins": 0.12578359246253967, + "rewards/rejected": 1.4102461338043213, + "step": 4470 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.12, + "learning_rate": 6.947303038843141e-06, + "logps/chosen": -158.3206787109375, + "logps/margins": 2.165442943572998, + "logps/rejected": -160.4861297607422, + "loss": 0.7901, + "rewards/chosen": 1.6433191299438477, + "rewards/margins": 0.22361640632152557, + "rewards/rejected": 1.4197026491165161, + "step": 4480 + }, + { + "accuracy": 0.512499988079071, + "epoch": 1.12, + "learning_rate": 6.935239852443989e-06, + "logps/chosen": -159.32958984375, + "logps/margins": -23.872114181518555, + "logps/rejected": -135.4574737548828, + "loss": 0.7102, + "rewards/chosen": 1.581380009651184, + "rewards/margins": 0.24028635025024414, + "rewards/rejected": 1.34109365940094, + "step": 4490 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.12, + "learning_rate": 6.92316340190651e-06, + "logps/chosen": -198.69090270996094, + "logps/margins": -23.158008575439453, + "logps/rejected": -175.5328826904297, + "loss": 0.7108, + "rewards/chosen": 1.8736053705215454, + "rewards/margins": 0.3234180808067322, + "rewards/rejected": 1.550187349319458, + "step": 4500 + }, + { + "accuracy": 0.625, + "epoch": 1.13, + "learning_rate": 6.911073770002718e-06, + "logps/chosen": -157.5779266357422, + "logps/margins": -30.79327392578125, + "logps/rejected": -126.78465270996094, + "loss": 0.7329, + "rewards/chosen": 1.625817894935608, + "rewards/margins": 0.45610690116882324, + "rewards/rejected": 1.1697109937667847, + "step": 4510 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.13, + "learning_rate": 6.898971039594983e-06, + "logps/chosen": -153.0753173828125, + "logps/margins": -8.957990646362305, + "logps/rejected": -144.11732482910156, + "loss": 0.7981, + "rewards/chosen": 1.60171639919281, + "rewards/margins": 0.17321477830410004, + "rewards/rejected": 1.4285017251968384, + "step": 4520 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 1.13, + "learning_rate": 6.886855293635444e-06, + "logps/chosen": -164.86683654785156, + "logps/margins": -13.63398265838623, + "logps/rejected": -151.2328643798828, + "loss": 0.7408, + "rewards/chosen": 1.8711875677108765, + "rewards/margins": 0.42857685685157776, + "rewards/rejected": 1.4426108598709106, + "step": 4530 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 1.14, + "learning_rate": 6.874726615165453e-06, + "logps/chosen": -138.4912109375, + "logps/margins": -21.73735809326172, + "logps/rejected": -116.75386047363281, + "loss": 0.7853, + "rewards/chosen": 1.2705638408660889, + "rewards/margins": -0.026124369353055954, + "rewards/rejected": 1.296688199043274, + "step": 4540 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 1.14, + "learning_rate": 6.862585087314998e-06, + "logps/chosen": -157.77963256835938, + "logps/margins": -24.891592025756836, + "logps/rejected": -132.88804626464844, + "loss": 0.7262, + "rewards/chosen": 1.5259357690811157, + "rewards/margins": 0.2608209252357483, + "rewards/rejected": 1.2651147842407227, + "step": 4550 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.14, + "learning_rate": 6.850430793302137e-06, + "logps/chosen": -134.70266723632812, + "logps/margins": 32.7626838684082, + "logps/rejected": -167.46533203125, + "loss": 0.778, + "rewards/chosen": 1.5278652906417847, + "rewards/margins": 0.14634697139263153, + "rewards/rejected": 1.3815182447433472, + "step": 4560 + }, + { + "accuracy": 0.5625, + "epoch": 1.14, + "learning_rate": 6.838263816432433e-06, + "logps/chosen": -144.55160522460938, + "logps/margins": -26.96480941772461, + "logps/rejected": -117.58680725097656, + "loss": 0.7416, + "rewards/chosen": 1.9209918975830078, + "rewards/margins": 0.3755248785018921, + "rewards/rejected": 1.5454667806625366, + "step": 4570 + }, + { + "accuracy": 0.512499988079071, + "epoch": 1.15, + "learning_rate": 6.826084240098369e-06, + "logps/chosen": -157.5438690185547, + "logps/margins": 4.361745834350586, + "logps/rejected": -161.90560913085938, + "loss": 0.7426, + "rewards/chosen": 1.524963140487671, + "rewards/margins": 0.08754493296146393, + "rewards/rejected": 1.4374183416366577, + "step": 4580 + }, + { + "accuracy": 0.5625, + "epoch": 1.15, + "learning_rate": 6.813892147778789e-06, + "logps/chosen": -156.1680450439453, + "logps/margins": -16.739023208618164, + "logps/rejected": -139.42901611328125, + "loss": 0.758, + "rewards/chosen": 1.4516359567642212, + "rewards/margins": 0.1226608008146286, + "rewards/rejected": 1.328974962234497, + "step": 4590 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.15, + "learning_rate": 6.801687623038324e-06, + "logps/chosen": -158.90335083007812, + "logps/margins": 2.980297803878784, + "logps/rejected": -161.88365173339844, + "loss": 0.7656, + "rewards/chosen": 1.615722417831421, + "rewards/margins": 0.21170561015605927, + "rewards/rejected": 1.4040168523788452, + "step": 4600 + }, + { + "accuracy": 0.5625, + "epoch": 1.15, + "learning_rate": 6.789470749526814e-06, + "logps/chosen": -164.73312377929688, + "logps/margins": -19.807016372680664, + "logps/rejected": -144.92611694335938, + "loss": 0.7119, + "rewards/chosen": 1.5639922618865967, + "rewards/margins": 0.19748859107494354, + "rewards/rejected": 1.3665037155151367, + "step": 4610 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 1.16, + "learning_rate": 6.777241610978736e-06, + "logps/chosen": -167.4090576171875, + "logps/margins": -9.23045539855957, + "logps/rejected": -158.17860412597656, + "loss": 0.7487, + "rewards/chosen": 1.492275595664978, + "rewards/margins": 0.11757062375545502, + "rewards/rejected": 1.3747050762176514, + "step": 4620 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.16, + "learning_rate": 6.765000291212632e-06, + "logps/chosen": -145.50167846679688, + "logps/margins": 26.4615535736084, + "logps/rejected": -171.96322631835938, + "loss": 0.7526, + "rewards/chosen": 1.7057723999023438, + "rewards/margins": 0.305040568113327, + "rewards/rejected": 1.4007318019866943, + "step": 4630 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 1.16, + "learning_rate": 6.752746874130538e-06, + "logps/chosen": -151.9162139892578, + "logps/margins": -17.291059494018555, + "logps/rejected": -134.62515258789062, + "loss": 0.7187, + "rewards/chosen": 1.5880920886993408, + "rewards/margins": 0.4144747853279114, + "rewards/rejected": 1.1736172437667847, + "step": 4640 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 1.16, + "learning_rate": 6.740481443717403e-06, + "logps/chosen": -147.86163330078125, + "logps/margins": -6.114023685455322, + "logps/rejected": -141.74758911132812, + "loss": 0.7385, + "rewards/chosen": 1.6136209964752197, + "rewards/margins": 0.17959676682949066, + "rewards/rejected": 1.4340239763259888, + "step": 4650 + }, + { + "accuracy": 0.512499988079071, + "epoch": 1.17, + "learning_rate": 6.728204084040513e-06, + "logps/chosen": -184.00587463378906, + "logps/margins": -40.49638366699219, + "logps/rejected": -143.50949096679688, + "loss": 0.7737, + "rewards/chosen": 1.5602309703826904, + "rewards/margins": 0.15864290297031403, + "rewards/rejected": 1.4015882015228271, + "step": 4660 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 1.17, + "learning_rate": 6.715914879248918e-06, + "logps/chosen": -178.43313598632812, + "logps/margins": -22.022329330444336, + "logps/rejected": -156.41079711914062, + "loss": 0.8003, + "rewards/chosen": 1.5573582649230957, + "rewards/margins": 0.1599697321653366, + "rewards/rejected": 1.3973884582519531, + "step": 4670 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 1.17, + "learning_rate": 6.703613913572857e-06, + "logps/chosen": -159.41444396972656, + "logps/margins": -21.514019012451172, + "logps/rejected": -137.90042114257812, + "loss": 0.7118, + "rewards/chosen": 1.942657470703125, + "rewards/margins": 0.40954598784446716, + "rewards/rejected": 1.533111572265625, + "step": 4680 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.17, + "learning_rate": 6.691301271323178e-06, + "logps/chosen": -140.05186462402344, + "logps/margins": -15.965902328491211, + "logps/rejected": -124.08597564697266, + "loss": 0.7296, + "rewards/chosen": 1.7811996936798096, + "rewards/margins": 0.3652705252170563, + "rewards/rejected": 1.4159290790557861, + "step": 4690 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 1.18, + "learning_rate": 6.678977036890754e-06, + "logps/chosen": -157.95156860351562, + "logps/margins": 6.859846591949463, + "logps/rejected": -164.8114013671875, + "loss": 0.7019, + "rewards/chosen": 1.6833372116088867, + "rewards/margins": 0.11221089214086533, + "rewards/rejected": 1.5711263418197632, + "step": 4700 + }, + { + "accuracy": 0.737500011920929, + "epoch": 1.18, + "learning_rate": 6.666641294745923e-06, + "logps/chosen": -146.15841674804688, + "logps/margins": 13.32597541809082, + "logps/rejected": -159.48439025878906, + "loss": 0.7545, + "rewards/chosen": 1.709477186203003, + "rewards/margins": 0.25687000155448914, + "rewards/rejected": 1.452607274055481, + "step": 4710 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.18, + "learning_rate": 6.654294129437885e-06, + "logps/chosen": -151.46261596679688, + "logps/margins": -9.845657348632812, + "logps/rejected": -141.61697387695312, + "loss": 0.7667, + "rewards/chosen": 1.5328562259674072, + "rewards/margins": 0.08666034042835236, + "rewards/rejected": 1.4461958408355713, + "step": 4720 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.18, + "learning_rate": 6.641935625594138e-06, + "logps/chosen": -178.7781982421875, + "logps/margins": 28.1187801361084, + "logps/rejected": -206.89700317382812, + "loss": 0.7624, + "rewards/chosen": 1.5296825170516968, + "rewards/margins": 0.3762456774711609, + "rewards/rejected": 1.1534368991851807, + "step": 4730 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.19, + "learning_rate": 6.629565867919897e-06, + "logps/chosen": -153.3395538330078, + "logps/margins": -14.405820846557617, + "logps/rejected": -138.9337158203125, + "loss": 0.732, + "rewards/chosen": 1.5936164855957031, + "rewards/margins": 0.404587984085083, + "rewards/rejected": 1.1890285015106201, + "step": 4740 + }, + { + "accuracy": 0.6625000238418579, + "epoch": 1.19, + "learning_rate": 6.617184941197509e-06, + "logps/chosen": -158.10215759277344, + "logps/margins": 1.7747207880020142, + "logps/rejected": -159.87689208984375, + "loss": 0.7268, + "rewards/chosen": 1.8418605327606201, + "rewards/margins": 0.3878012001514435, + "rewards/rejected": 1.454059362411499, + "step": 4750 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 1.19, + "learning_rate": 6.604792930285875e-06, + "logps/chosen": -157.2828826904297, + "logps/margins": -21.42497444152832, + "logps/rejected": -135.85789489746094, + "loss": 0.7455, + "rewards/chosen": 1.6213194131851196, + "rewards/margins": 0.3128574788570404, + "rewards/rejected": 1.3084617853164673, + "step": 4760 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 1.19, + "learning_rate": 6.592389920119864e-06, + "logps/chosen": -171.2572021484375, + "logps/margins": -20.275569915771484, + "logps/rejected": -150.98162841796875, + "loss": 0.8146, + "rewards/chosen": 1.721016526222229, + "rewards/margins": 0.12398360669612885, + "rewards/rejected": 1.597032904624939, + "step": 4770 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.2, + "learning_rate": 6.579975995709736e-06, + "logps/chosen": -142.03707885742188, + "logps/margins": -21.605384826660156, + "logps/rejected": -120.43168640136719, + "loss": 0.7, + "rewards/chosen": 1.5253794193267822, + "rewards/margins": 0.2672179639339447, + "rewards/rejected": 1.2581614255905151, + "step": 4780 + }, + { + "accuracy": 0.612500011920929, + "epoch": 1.2, + "learning_rate": 6.567551242140555e-06, + "logps/chosen": -157.65887451171875, + "logps/margins": 5.36818265914917, + "logps/rejected": -163.02706909179688, + "loss": 0.8064, + "rewards/chosen": 1.6819689273834229, + "rewards/margins": 0.06451436132192612, + "rewards/rejected": 1.6174545288085938, + "step": 4790 + }, + { + "accuracy": 0.625, + "epoch": 1.2, + "learning_rate": 6.555115744571615e-06, + "logps/chosen": -147.1795654296875, + "logps/margins": 21.539039611816406, + "logps/rejected": -168.71861267089844, + "loss": 0.7622, + "rewards/chosen": 1.353946566581726, + "rewards/margins": 0.3413313627243042, + "rewards/rejected": 1.0126152038574219, + "step": 4800 + }, + { + "accuracy": 0.625, + "epoch": 1.2, + "learning_rate": 6.542669588235841e-06, + "logps/chosen": -161.65402221679688, + "logps/margins": 7.2505202293396, + "logps/rejected": -168.90457153320312, + "loss": 0.7022, + "rewards/chosen": 1.7740504741668701, + "rewards/margins": 0.35845574736595154, + "rewards/rejected": 1.4155948162078857, + "step": 4810 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 1.21, + "learning_rate": 6.53021285843922e-06, + "logps/chosen": -159.63368225097656, + "logps/margins": -11.669804573059082, + "logps/rejected": -147.96389770507812, + "loss": 0.7178, + "rewards/chosen": 1.607292890548706, + "rewards/margins": 0.34129029512405396, + "rewards/rejected": 1.2660025358200073, + "step": 4820 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.21, + "learning_rate": 6.517745640560201e-06, + "logps/chosen": -159.4026336669922, + "logps/margins": -11.522550582885742, + "logps/rejected": -147.88009643554688, + "loss": 0.7043, + "rewards/chosen": 1.55764901638031, + "rewards/margins": 0.2731221318244934, + "rewards/rejected": 1.284527063369751, + "step": 4830 + }, + { + "accuracy": 0.5625, + "epoch": 1.21, + "learning_rate": 6.505268020049127e-06, + "logps/chosen": -143.4359588623047, + "logps/margins": 10.558084487915039, + "logps/rejected": -153.99404907226562, + "loss": 0.7497, + "rewards/chosen": 1.4685932397842407, + "rewards/margins": 0.2947949469089508, + "rewards/rejected": 1.1737983226776123, + "step": 4840 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.21, + "learning_rate": 6.492780082427637e-06, + "logps/chosen": -189.3929901123047, + "logps/margins": -49.71562957763672, + "logps/rejected": -139.6773681640625, + "loss": 0.7285, + "rewards/chosen": 1.7643178701400757, + "rewards/margins": 0.4061128497123718, + "rewards/rejected": 1.3582050800323486, + "step": 4850 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 1.22, + "learning_rate": 6.480281913288083e-06, + "logps/chosen": -160.0203399658203, + "logps/margins": -16.416759490966797, + "logps/rejected": -143.60357666015625, + "loss": 0.6886, + "rewards/chosen": 1.394260287284851, + "rewards/margins": 0.09722080826759338, + "rewards/rejected": 1.2970393896102905, + "step": 4860 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.22, + "learning_rate": 6.467773598292946e-06, + "logps/chosen": -155.70057678222656, + "logps/margins": 16.508405685424805, + "logps/rejected": -172.20895385742188, + "loss": 0.7475, + "rewards/chosen": 1.60134756565094, + "rewards/margins": 0.18941007554531097, + "rewards/rejected": 1.4119374752044678, + "step": 4870 + }, + { + "accuracy": 0.38749998807907104, + "epoch": 1.22, + "learning_rate": 6.455255223174243e-06, + "logps/chosen": -157.41146850585938, + "logps/margins": 18.128217697143555, + "logps/rejected": -175.53970336914062, + "loss": 0.824, + "rewards/chosen": 1.5767641067504883, + "rewards/margins": -0.034236349165439606, + "rewards/rejected": 1.611000418663025, + "step": 4880 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.22, + "learning_rate": 6.442726873732947e-06, + "logps/chosen": -152.3926544189453, + "logps/margins": -25.8064022064209, + "logps/rejected": -126.58624267578125, + "loss": 0.706, + "rewards/chosen": 1.5150153636932373, + "rewards/margins": 0.406000554561615, + "rewards/rejected": 1.109014868736267, + "step": 4890 + }, + { + "accuracy": 0.625, + "epoch": 1.23, + "learning_rate": 6.430188635838392e-06, + "logps/chosen": -172.89962768554688, + "logps/margins": -40.604454040527344, + "logps/rejected": -132.29519653320312, + "loss": 0.7495, + "rewards/chosen": 1.4899805784225464, + "rewards/margins": 0.25234854221343994, + "rewards/rejected": 1.237632155418396, + "step": 4900 + }, + { + "accuracy": 0.637499988079071, + "epoch": 1.23, + "learning_rate": 6.41764059542769e-06, + "logps/chosen": -141.91195678710938, + "logps/margins": 7.650518894195557, + "logps/rejected": -149.56246948242188, + "loss": 0.7188, + "rewards/chosen": 1.6412073373794556, + "rewards/margins": 0.34154170751571655, + "rewards/rejected": 1.2996656894683838, + "step": 4910 + }, + { + "accuracy": 0.612500011920929, + "epoch": 1.23, + "learning_rate": 6.40508283850514e-06, + "logps/chosen": -169.83633422851562, + "logps/margins": -10.035348892211914, + "logps/rejected": -159.8009796142578, + "loss": 0.7436, + "rewards/chosen": 1.6321032047271729, + "rewards/margins": 0.18890276551246643, + "rewards/rejected": 1.4432004690170288, + "step": 4920 + }, + { + "accuracy": 0.637499988079071, + "epoch": 1.23, + "learning_rate": 6.3925154511416345e-06, + "logps/chosen": -127.94288635253906, + "logps/margins": 8.8143310546875, + "logps/rejected": -136.75721740722656, + "loss": 0.7437, + "rewards/chosen": 1.6725658178329468, + "rewards/margins": 0.2667856812477112, + "rewards/rejected": 1.4057800769805908, + "step": 4930 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.23, + "learning_rate": 6.3799385194740735e-06, + "logps/chosen": -122.64558410644531, + "logps/margins": 12.790786743164062, + "logps/rejected": -135.43637084960938, + "loss": 0.7081, + "rewards/chosen": 1.461777925491333, + "rewards/margins": 0.3406018614768982, + "rewards/rejected": 1.12117600440979, + "step": 4940 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.24, + "learning_rate": 6.367352129704777e-06, + "logps/chosen": -149.3013916015625, + "logps/margins": 16.090312957763672, + "logps/rejected": -165.3917236328125, + "loss": 0.725, + "rewards/chosen": 1.5528943538665771, + "rewards/margins": 0.3267061114311218, + "rewards/rejected": 1.2261884212493896, + "step": 4950 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 1.24, + "learning_rate": 6.354756368100888e-06, + "logps/chosen": -179.0587615966797, + "logps/margins": -17.813461303710938, + "logps/rejected": -161.2452850341797, + "loss": 0.7819, + "rewards/chosen": 1.5369234085083008, + "rewards/margins": -0.058191489428281784, + "rewards/rejected": 1.5951149463653564, + "step": 4960 + }, + { + "accuracy": 0.5625, + "epoch": 1.24, + "learning_rate": 6.342151320993788e-06, + "logps/chosen": -122.87980651855469, + "logps/margins": 21.917587280273438, + "logps/rejected": -144.7974090576172, + "loss": 0.7713, + "rewards/chosen": 1.431814432144165, + "rewards/margins": 0.16931791603565216, + "rewards/rejected": 1.2624963521957397, + "step": 4970 + }, + { + "accuracy": 0.5625, + "epoch": 1.25, + "learning_rate": 6.329537074778494e-06, + "logps/chosen": -179.00137329101562, + "logps/margins": -8.288309097290039, + "logps/rejected": -170.71307373046875, + "loss": 0.7655, + "rewards/chosen": 1.9044349193572998, + "rewards/margins": 0.2563362717628479, + "rewards/rejected": 1.6480985879898071, + "step": 4980 + }, + { + "accuracy": 0.699999988079071, + "epoch": 1.25, + "learning_rate": 6.316913715913082e-06, + "logps/chosen": -142.3838348388672, + "logps/margins": 5.806654930114746, + "logps/rejected": -148.19049072265625, + "loss": 0.7863, + "rewards/chosen": 1.7120916843414307, + "rewards/margins": 0.5562489628791809, + "rewards/rejected": 1.1558425426483154, + "step": 4990 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.25, + "learning_rate": 6.304281330918079e-06, + "logps/chosen": -130.83453369140625, + "logps/margins": 2.3205955028533936, + "logps/rejected": -133.15513610839844, + "loss": 0.7871, + "rewards/chosen": 1.5653244256973267, + "rewards/margins": 0.38230443000793457, + "rewards/rejected": 1.1830198764801025, + "step": 5000 + }, + { + "accuracy": 0.625, + "epoch": 1.25, + "learning_rate": 6.291640006375882e-06, + "logps/chosen": -151.2340850830078, + "logps/margins": -10.201196670532227, + "logps/rejected": -141.0328826904297, + "loss": 0.8121, + "rewards/chosen": 1.6454102993011475, + "rewards/margins": 0.23128366470336914, + "rewards/rejected": 1.4141267538070679, + "step": 5010 + }, + { + "accuracy": 0.637499988079071, + "epoch": 1.25, + "learning_rate": 6.27898982893016e-06, + "logps/chosen": -171.12440490722656, + "logps/margins": -1.9663136005401611, + "logps/rejected": -169.1580810546875, + "loss": 0.7431, + "rewards/chosen": 1.7115182876586914, + "rewards/margins": 0.37104958295822144, + "rewards/rejected": 1.3404687643051147, + "step": 5020 + }, + { + "accuracy": 0.5625, + "epoch": 1.26, + "learning_rate": 6.2663308852852525e-06, + "logps/chosen": -155.58973693847656, + "logps/margins": -4.143744468688965, + "logps/rejected": -151.4459991455078, + "loss": 0.7712, + "rewards/chosen": 1.5143144130706787, + "rewards/margins": 0.32183441519737244, + "rewards/rejected": 1.1924798488616943, + "step": 5030 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.26, + "learning_rate": 6.253663262205593e-06, + "logps/chosen": -177.77529907226562, + "logps/margins": -58.832725524902344, + "logps/rejected": -118.94258117675781, + "loss": 0.788, + "rewards/chosen": 1.6441223621368408, + "rewards/margins": 0.2770431637763977, + "rewards/rejected": 1.3670790195465088, + "step": 5040 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 1.26, + "learning_rate": 6.240987046515096e-06, + "logps/chosen": -131.17593383789062, + "logps/margins": 10.835644721984863, + "logps/rejected": -142.01156616210938, + "loss": 0.7118, + "rewards/chosen": 1.3152341842651367, + "rewards/margins": 0.2057148963212967, + "rewards/rejected": 1.109519362449646, + "step": 5050 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.27, + "learning_rate": 6.228302325096574e-06, + "logps/chosen": -158.71505737304688, + "logps/margins": -12.218968391418457, + "logps/rejected": -146.49607849121094, + "loss": 0.7528, + "rewards/chosen": 2.0128684043884277, + "rewards/margins": 0.41734856367111206, + "rewards/rejected": 1.5955199003219604, + "step": 5060 + }, + { + "accuracy": 0.637499988079071, + "epoch": 1.27, + "learning_rate": 6.215609184891133e-06, + "logps/chosen": -174.6935272216797, + "logps/margins": -22.03658103942871, + "logps/rejected": -152.65695190429688, + "loss": 0.7107, + "rewards/chosen": 1.7201868295669556, + "rewards/margins": 0.35576948523521423, + "rewards/rejected": 1.3644174337387085, + "step": 5070 + }, + { + "accuracy": 0.5625, + "epoch": 1.27, + "learning_rate": 6.202907712897591e-06, + "logps/chosen": -148.1775665283203, + "logps/margins": 13.041943550109863, + "logps/rejected": -161.21949768066406, + "loss": 0.8075, + "rewards/chosen": 1.9800361394882202, + "rewards/margins": 0.23238544166088104, + "rewards/rejected": 1.7476507425308228, + "step": 5080 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.27, + "learning_rate": 6.190197996171861e-06, + "logps/chosen": -169.82705688476562, + "logps/margins": 31.24056053161621, + "logps/rejected": -201.06761169433594, + "loss": 0.7652, + "rewards/chosen": 1.46670663356781, + "rewards/margins": 0.1525311917066574, + "rewards/rejected": 1.3141753673553467, + "step": 5090 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 1.27, + "learning_rate": 6.177480121826372e-06, + "logps/chosen": -176.33309936523438, + "logps/margins": -26.30515480041504, + "logps/rejected": -150.02793884277344, + "loss": 0.7385, + "rewards/chosen": 1.586025595664978, + "rewards/margins": 0.0009313821792602539, + "rewards/rejected": 1.5850943326950073, + "step": 5100 + }, + { + "accuracy": 0.6625000238418579, + "epoch": 1.28, + "learning_rate": 6.1647541770294614e-06, + "logps/chosen": -157.5048828125, + "logps/margins": 11.542614936828613, + "logps/rejected": -169.04751586914062, + "loss": 0.7027, + "rewards/chosen": 1.8223068714141846, + "rewards/margins": 0.4998514652252197, + "rewards/rejected": 1.3224552869796753, + "step": 5110 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 1.28, + "learning_rate": 6.152020249004786e-06, + "logps/chosen": -157.6554412841797, + "logps/margins": -12.17426586151123, + "logps/rejected": -145.48118591308594, + "loss": 0.6801, + "rewards/chosen": 1.6239858865737915, + "rewards/margins": 0.3804410696029663, + "rewards/rejected": 1.2435449361801147, + "step": 5120 + }, + { + "accuracy": 0.6625000238418579, + "epoch": 1.28, + "learning_rate": 6.139278425030717e-06, + "logps/chosen": -159.12953186035156, + "logps/margins": -6.698736667633057, + "logps/rejected": -152.4307861328125, + "loss": 0.6877, + "rewards/chosen": 1.6081244945526123, + "rewards/margins": 0.40253764390945435, + "rewards/rejected": 1.2055867910385132, + "step": 5130 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 1.28, + "learning_rate": 6.126528792439743e-06, + "logps/chosen": -189.52659606933594, + "logps/margins": -40.89472579956055, + "logps/rejected": -148.63186645507812, + "loss": 0.7399, + "rewards/chosen": 1.3909708261489868, + "rewards/margins": 0.011243830434978008, + "rewards/rejected": 1.3797270059585571, + "step": 5140 + }, + { + "accuracy": 0.625, + "epoch": 1.29, + "learning_rate": 6.113771438617877e-06, + "logps/chosen": -150.76974487304688, + "logps/margins": 18.153573989868164, + "logps/rejected": -168.92333984375, + "loss": 0.7704, + "rewards/chosen": 1.578174114227295, + "rewards/margins": 0.29279306530952454, + "rewards/rejected": 1.2853810787200928, + "step": 5150 + }, + { + "accuracy": 0.6625000238418579, + "epoch": 1.29, + "learning_rate": 6.101006451004049e-06, + "logps/chosen": -142.00057983398438, + "logps/margins": -0.3727971911430359, + "logps/rejected": -141.62779235839844, + "loss": 0.7128, + "rewards/chosen": 1.6239397525787354, + "rewards/margins": 0.49854373931884766, + "rewards/rejected": 1.1253958940505981, + "step": 5160 + }, + { + "accuracy": 0.612500011920929, + "epoch": 1.29, + "learning_rate": 6.088233917089513e-06, + "logps/chosen": -141.65484619140625, + "logps/margins": -8.830339431762695, + "logps/rejected": -132.8245086669922, + "loss": 0.6555, + "rewards/chosen": 1.7338759899139404, + "rewards/margins": 0.5189031362533569, + "rewards/rejected": 1.2149730920791626, + "step": 5170 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.29, + "learning_rate": 6.075453924417248e-06, + "logps/chosen": -168.49122619628906, + "logps/margins": -42.011863708496094, + "logps/rejected": -126.4793472290039, + "loss": 0.7436, + "rewards/chosen": 1.810417890548706, + "rewards/margins": 0.20606985688209534, + "rewards/rejected": 1.6043481826782227, + "step": 5180 + }, + { + "accuracy": 0.625, + "epoch": 1.3, + "learning_rate": 6.06266656058135e-06, + "logps/chosen": -157.50521850585938, + "logps/margins": -12.730794906616211, + "logps/rejected": -144.7744140625, + "loss": 0.6994, + "rewards/chosen": 1.4022176265716553, + "rewards/margins": 0.22986188530921936, + "rewards/rejected": 1.1723556518554688, + "step": 5190 + }, + { + "accuracy": 0.6625000238418579, + "epoch": 1.3, + "learning_rate": 6.04987191322644e-06, + "logps/chosen": -173.8582763671875, + "logps/margins": -15.063423156738281, + "logps/rejected": -158.7948455810547, + "loss": 0.691, + "rewards/chosen": 1.5703957080841064, + "rewards/margins": 0.2514503598213196, + "rewards/rejected": 1.3189454078674316, + "step": 5200 + }, + { + "accuracy": 0.612500011920929, + "epoch": 1.3, + "learning_rate": 6.037070070047063e-06, + "logps/chosen": -175.35873413085938, + "logps/margins": -50.71623992919922, + "logps/rejected": -124.64250183105469, + "loss": 0.7064, + "rewards/chosen": 2.007868766784668, + "rewards/margins": 0.5471641421318054, + "rewards/rejected": 1.4607045650482178, + "step": 5210 + }, + { + "accuracy": 0.512499988079071, + "epoch": 1.3, + "learning_rate": 6.0242611187870756e-06, + "logps/chosen": -135.90957641601562, + "logps/margins": -12.127862930297852, + "logps/rejected": -123.78172302246094, + "loss": 0.8495, + "rewards/chosen": 1.7278273105621338, + "rewards/margins": 0.31238415837287903, + "rewards/rejected": 1.4154431819915771, + "step": 5220 + }, + { + "accuracy": 0.625, + "epoch": 1.31, + "learning_rate": 6.011445147239063e-06, + "logps/chosen": -146.40159606933594, + "logps/margins": -18.971403121948242, + "logps/rejected": -127.43019104003906, + "loss": 0.7001, + "rewards/chosen": 1.6193259954452515, + "rewards/margins": 0.3653852045536041, + "rewards/rejected": 1.2539408206939697, + "step": 5230 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.31, + "learning_rate": 5.998622243243723e-06, + "logps/chosen": -187.36436462402344, + "logps/margins": 1.5915634632110596, + "logps/rejected": -188.95594787597656, + "loss": 0.6876, + "rewards/chosen": 1.7392375469207764, + "rewards/margins": 0.4315492510795593, + "rewards/rejected": 1.3076883554458618, + "step": 5240 + }, + { + "accuracy": 0.5625, + "epoch": 1.31, + "learning_rate": 5.985792494689265e-06, + "logps/chosen": -145.41183471679688, + "logps/margins": 3.6851959228515625, + "logps/rejected": -149.09703063964844, + "loss": 0.7216, + "rewards/chosen": 1.6869869232177734, + "rewards/margins": 0.29782360792160034, + "rewards/rejected": 1.3891632556915283, + "step": 5250 + }, + { + "accuracy": 0.625, + "epoch": 1.31, + "learning_rate": 5.97295598951082e-06, + "logps/chosen": -164.96902465820312, + "logps/margins": -0.19436874985694885, + "logps/rejected": -164.77464294433594, + "loss": 0.7062, + "rewards/chosen": 1.6675602197647095, + "rewards/margins": 0.4102051854133606, + "rewards/rejected": 1.2573550939559937, + "step": 5260 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.32, + "learning_rate": 5.960112815689819e-06, + "logps/chosen": -145.9226837158203, + "logps/margins": -0.42312487959861755, + "logps/rejected": -145.49954223632812, + "loss": 0.7508, + "rewards/chosen": 1.3987632989883423, + "rewards/margins": 0.17990709841251373, + "rewards/rejected": 1.2188560962677002, + "step": 5270 + }, + { + "accuracy": 0.5625, + "epoch": 1.32, + "learning_rate": 5.9472630612534055e-06, + "logps/chosen": -135.58197021484375, + "logps/margins": -0.3163507580757141, + "logps/rejected": -135.265625, + "loss": 0.7564, + "rewards/chosen": 1.4992902278900146, + "rewards/margins": 0.23811094462871552, + "rewards/rejected": 1.2611792087554932, + "step": 5280 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.32, + "learning_rate": 5.934406814273829e-06, + "logps/chosen": -167.3329620361328, + "logps/margins": -12.887275695800781, + "logps/rejected": -154.44570922851562, + "loss": 0.756, + "rewards/chosen": 1.5215914249420166, + "rewards/margins": 0.21805009245872498, + "rewards/rejected": 1.3035413026809692, + "step": 5290 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.32, + "learning_rate": 5.921544162867829e-06, + "logps/chosen": -161.7301788330078, + "logps/margins": -26.398529052734375, + "logps/rejected": -135.33164978027344, + "loss": 0.7043, + "rewards/chosen": 1.704332709312439, + "rewards/margins": 0.3039229214191437, + "rewards/rejected": 1.4004098176956177, + "step": 5300 + }, + { + "accuracy": 0.5625, + "epoch": 1.33, + "learning_rate": 5.908675195196053e-06, + "logps/chosen": -148.4464111328125, + "logps/margins": 16.045602798461914, + "logps/rejected": -164.4920196533203, + "loss": 0.7021, + "rewards/chosen": 1.5473792552947998, + "rewards/margins": 0.17474989593029022, + "rewards/rejected": 1.3726295232772827, + "step": 5310 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.33, + "learning_rate": 5.895799999462433e-06, + "logps/chosen": -134.166015625, + "logps/margins": 1.5058866739273071, + "logps/rejected": -135.67190551757812, + "loss": 0.8065, + "rewards/chosen": 1.5211851596832275, + "rewards/margins": 0.44475632905960083, + "rewards/rejected": 1.076428771018982, + "step": 5320 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 1.33, + "learning_rate": 5.882918663913587e-06, + "logps/chosen": -171.63241577148438, + "logps/margins": 27.762609481811523, + "logps/rejected": -199.39501953125, + "loss": 0.7176, + "rewards/chosen": 1.5653254985809326, + "rewards/margins": 0.18104076385498047, + "rewards/rejected": 1.3842847347259521, + "step": 5330 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.33, + "learning_rate": 5.870031276838223e-06, + "logps/chosen": -137.03872680664062, + "logps/margins": -9.805635452270508, + "logps/rejected": -127.23310852050781, + "loss": 0.7345, + "rewards/chosen": 1.5989949703216553, + "rewards/margins": 0.30325326323509216, + "rewards/rejected": 1.2957415580749512, + "step": 5340 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.34, + "learning_rate": 5.857137926566516e-06, + "logps/chosen": -160.0192108154297, + "logps/margins": 25.99019432067871, + "logps/rejected": -186.0093994140625, + "loss": 0.7601, + "rewards/chosen": 1.7115033864974976, + "rewards/margins": -0.004513204097747803, + "rewards/rejected": 1.716016411781311, + "step": 5350 + }, + { + "accuracy": 0.6625000238418579, + "epoch": 1.34, + "learning_rate": 5.84423870146952e-06, + "logps/chosen": -182.6631622314453, + "logps/margins": 1.3142541646957397, + "logps/rejected": -183.97738647460938, + "loss": 0.7329, + "rewards/chosen": 1.8308374881744385, + "rewards/margins": 0.31798094511032104, + "rewards/rejected": 1.5128564834594727, + "step": 5360 + }, + { + "accuracy": 0.5, + "epoch": 1.34, + "learning_rate": 5.831333689958555e-06, + "logps/chosen": -172.5367889404297, + "logps/margins": -6.605926513671875, + "logps/rejected": -165.93087768554688, + "loss": 0.7356, + "rewards/chosen": 1.5800632238388062, + "rewards/margins": 0.2258564978837967, + "rewards/rejected": 1.3542068004608154, + "step": 5370 + }, + { + "accuracy": 0.625, + "epoch": 1.34, + "learning_rate": 5.818422980484597e-06, + "logps/chosen": -134.66600036621094, + "logps/margins": 29.83827781677246, + "logps/rejected": -164.5042724609375, + "loss": 0.704, + "rewards/chosen": 1.3328368663787842, + "rewards/margins": 0.3366268575191498, + "rewards/rejected": 0.9962100982666016, + "step": 5380 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 1.35, + "learning_rate": 5.805506661537678e-06, + "logps/chosen": -176.3031463623047, + "logps/margins": -45.98903274536133, + "logps/rejected": -130.31411743164062, + "loss": 0.7184, + "rewards/chosen": 1.4929618835449219, + "rewards/margins": 0.0719807967543602, + "rewards/rejected": 1.4209811687469482, + "step": 5390 + }, + { + "accuracy": 0.6625000238418579, + "epoch": 1.35, + "learning_rate": 5.792584821646278e-06, + "logps/chosen": -129.8586883544922, + "logps/margins": -2.243013381958008, + "logps/rejected": -127.61567687988281, + "loss": 0.6933, + "rewards/chosen": 1.510141134262085, + "rewards/margins": 0.6418448686599731, + "rewards/rejected": 0.8682962656021118, + "step": 5400 + }, + { + "accuracy": 0.675000011920929, + "epoch": 1.35, + "learning_rate": 5.779657549376716e-06, + "logps/chosen": -189.4259033203125, + "logps/margins": -22.510643005371094, + "logps/rejected": -166.91526794433594, + "loss": 0.6758, + "rewards/chosen": 1.813315749168396, + "rewards/margins": 0.5916121006011963, + "rewards/rejected": 1.2217038869857788, + "step": 5410 + }, + { + "accuracy": 0.612500011920929, + "epoch": 1.35, + "learning_rate": 5.766724933332545e-06, + "logps/chosen": -186.01217651367188, + "logps/margins": -31.01999282836914, + "logps/rejected": -154.9921875, + "loss": 0.7017, + "rewards/chosen": 1.8551937341690063, + "rewards/margins": 0.42761191725730896, + "rewards/rejected": 1.4275819063186646, + "step": 5420 + }, + { + "accuracy": 0.5625, + "epoch": 1.36, + "learning_rate": 5.753787062153947e-06, + "logps/chosen": -128.2163543701172, + "logps/margins": 39.49677658081055, + "logps/rejected": -167.713134765625, + "loss": 0.7297, + "rewards/chosen": 1.3552345037460327, + "rewards/margins": 0.11643465608358383, + "rewards/rejected": 1.2387999296188354, + "step": 5430 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.36, + "learning_rate": 5.7408440245171185e-06, + "logps/chosen": -170.32110595703125, + "logps/margins": -10.857378005981445, + "logps/rejected": -159.46372985839844, + "loss": 0.7105, + "rewards/chosen": 2.0298686027526855, + "rewards/margins": 0.4292038083076477, + "rewards/rejected": 1.6006648540496826, + "step": 5440 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.36, + "learning_rate": 5.72789590913367e-06, + "logps/chosen": -166.81875610351562, + "logps/margins": 28.72918128967285, + "logps/rejected": -195.54794311523438, + "loss": 0.7614, + "rewards/chosen": 2.0543618202209473, + "rewards/margins": 0.49189358949661255, + "rewards/rejected": 1.562468409538269, + "step": 5450 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.36, + "learning_rate": 5.714942804750012e-06, + "logps/chosen": -173.94088745117188, + "logps/margins": 4.52652645111084, + "logps/rejected": -178.46742248535156, + "loss": 0.8208, + "rewards/chosen": 1.8147386312484741, + "rewards/margins": 0.3165207803249359, + "rewards/rejected": 1.4982178211212158, + "step": 5460 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.37, + "learning_rate": 5.7019848001467516e-06, + "logps/chosen": -205.4329376220703, + "logps/margins": -48.174537658691406, + "logps/rejected": -157.25839233398438, + "loss": 0.718, + "rewards/chosen": 1.7369821071624756, + "rewards/margins": 0.18487076461315155, + "rewards/rejected": 1.5521115064620972, + "step": 5470 + }, + { + "accuracy": 0.5625, + "epoch": 1.37, + "learning_rate": 5.6890219841380835e-06, + "logps/chosen": -137.05026245117188, + "logps/margins": 24.883108139038086, + "logps/rejected": -161.93338012695312, + "loss": 0.6838, + "rewards/chosen": 1.8056952953338623, + "rewards/margins": 0.3463909924030304, + "rewards/rejected": 1.4593042135238647, + "step": 5480 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 1.37, + "learning_rate": 5.676054445571175e-06, + "logps/chosen": -145.9791259765625, + "logps/margins": 8.895891189575195, + "logps/rejected": -154.87503051757812, + "loss": 0.7424, + "rewards/chosen": 1.5609519481658936, + "rewards/margins": 0.13086891174316406, + "rewards/rejected": 1.430083155632019, + "step": 5490 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.38, + "learning_rate": 5.663082273325568e-06, + "logps/chosen": -160.75375366210938, + "logps/margins": -1.261610984802246, + "logps/rejected": -159.4921417236328, + "loss": 0.7634, + "rewards/chosen": 1.6964995861053467, + "rewards/margins": 0.3085925281047821, + "rewards/rejected": 1.3879071474075317, + "step": 5500 + }, + { + "accuracy": 0.512499988079071, + "epoch": 1.38, + "learning_rate": 5.6501055563125574e-06, + "logps/chosen": -163.81549072265625, + "logps/margins": 1.4159587621688843, + "logps/rejected": -165.2314453125, + "loss": 0.7543, + "rewards/chosen": 1.4497385025024414, + "rewards/margins": 0.10968782752752304, + "rewards/rejected": 1.3400506973266602, + "step": 5510 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.38, + "learning_rate": 5.637124383474592e-06, + "logps/chosen": -207.0496063232422, + "logps/margins": -26.114797592163086, + "logps/rejected": -180.93484497070312, + "loss": 0.7674, + "rewards/chosen": 1.6942847967147827, + "rewards/margins": 0.32077234983444214, + "rewards/rejected": 1.3735123872756958, + "step": 5520 + }, + { + "accuracy": 0.612500011920929, + "epoch": 1.38, + "learning_rate": 5.624138843784662e-06, + "logps/chosen": -183.96987915039062, + "logps/margins": -3.3738930225372314, + "logps/rejected": -180.59597778320312, + "loss": 0.7426, + "rewards/chosen": 1.6988093852996826, + "rewards/margins": 0.48210129141807556, + "rewards/rejected": 1.2167081832885742, + "step": 5530 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.39, + "learning_rate": 5.611149026245683e-06, + "logps/chosen": -149.31764221191406, + "logps/margins": -3.167083263397217, + "logps/rejected": -146.15054321289062, + "loss": 0.7478, + "rewards/chosen": 1.484277606010437, + "rewards/margins": 0.0687042698264122, + "rewards/rejected": 1.415573239326477, + "step": 5540 + }, + { + "accuracy": 0.5625, + "epoch": 1.39, + "learning_rate": 5.598155019889896e-06, + "logps/chosen": -156.6920928955078, + "logps/margins": 10.109004020690918, + "logps/rejected": -166.8011016845703, + "loss": 0.7236, + "rewards/chosen": 1.4883196353912354, + "rewards/margins": 0.16127082705497742, + "rewards/rejected": 1.327048659324646, + "step": 5550 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 1.39, + "learning_rate": 5.5851569137782465e-06, + "logps/chosen": -154.56295776367188, + "logps/margins": -15.01945686340332, + "logps/rejected": -139.54348754882812, + "loss": 0.797, + "rewards/chosen": 1.2589267492294312, + "rewards/margins": -0.056695032864809036, + "rewards/rejected": 1.3156219720840454, + "step": 5560 + }, + { + "accuracy": 0.612500011920929, + "epoch": 1.39, + "learning_rate": 5.5721547969997866e-06, + "logps/chosen": -133.49951171875, + "logps/margins": 6.751640319824219, + "logps/rejected": -140.2511444091797, + "loss": 0.7185, + "rewards/chosen": 1.2730587720870972, + "rewards/margins": 0.09053818881511688, + "rewards/rejected": 1.1825206279754639, + "step": 5570 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.4, + "learning_rate": 5.5604495364333085e-06, + "logps/chosen": -152.4004669189453, + "logps/margins": -5.293655872344971, + "logps/rejected": -147.10678100585938, + "loss": 0.7691, + "rewards/chosen": 1.4804632663726807, + "rewards/margins": 0.14566269516944885, + "rewards/rejected": 1.3348006010055542, + "step": 5580 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.4, + "learning_rate": 5.547440044926198e-06, + "logps/chosen": -140.5923614501953, + "logps/margins": 6.542186737060547, + "logps/rejected": -147.13455200195312, + "loss": 0.782, + "rewards/chosen": 1.136541724205017, + "rewards/margins": 0.13275261223316193, + "rewards/rejected": 1.0037891864776611, + "step": 5590 + }, + { + "accuracy": 0.5625, + "epoch": 1.4, + "learning_rate": 5.534426801263779e-06, + "logps/chosen": -169.9979248046875, + "logps/margins": -22.774517059326172, + "logps/rejected": -147.22340393066406, + "loss": 0.7946, + "rewards/chosen": 1.4123528003692627, + "rewards/margins": 0.162205770611763, + "rewards/rejected": 1.2501471042633057, + "step": 5600 + }, + { + "accuracy": 0.6625000238418579, + "epoch": 1.4, + "learning_rate": 5.521409894638854e-06, + "logps/chosen": -178.98597717285156, + "logps/margins": -35.57769775390625, + "logps/rejected": -143.4082794189453, + "loss": 0.6927, + "rewards/chosen": 1.6132652759552002, + "rewards/margins": 0.3370493948459625, + "rewards/rejected": 1.27621591091156, + "step": 5610 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.41, + "learning_rate": 5.5083894142693315e-06, + "logps/chosen": -144.6505126953125, + "logps/margins": 1.944797158241272, + "logps/rejected": -146.59530639648438, + "loss": 0.7037, + "rewards/chosen": 1.405866026878357, + "rewards/margins": 0.2136956751346588, + "rewards/rejected": 1.192170262336731, + "step": 5620 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.41, + "learning_rate": 5.495365449397613e-06, + "logps/chosen": -149.2288055419922, + "logps/margins": -12.753352165222168, + "logps/rejected": -136.47544860839844, + "loss": 0.7394, + "rewards/chosen": 1.3791271448135376, + "rewards/margins": 0.18719109892845154, + "rewards/rejected": 1.1919360160827637, + "step": 5630 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.41, + "learning_rate": 5.482338089289982e-06, + "logps/chosen": -183.57162475585938, + "logps/margins": -28.113910675048828, + "logps/rejected": -155.45773315429688, + "loss": 0.7237, + "rewards/chosen": 1.741651177406311, + "rewards/margins": 0.24294829368591309, + "rewards/rejected": 1.4987030029296875, + "step": 5640 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 1.41, + "learning_rate": 5.469307423235997e-06, + "logps/chosen": -157.01309204101562, + "logps/margins": -14.2610445022583, + "logps/rejected": -142.7520294189453, + "loss": 0.7091, + "rewards/chosen": 1.6885029077529907, + "rewards/margins": 0.3885074257850647, + "rewards/rejected": 1.2999956607818604, + "step": 5650 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 1.42, + "learning_rate": 5.456273540547871e-06, + "logps/chosen": -162.21449279785156, + "logps/margins": -14.72693157196045, + "logps/rejected": -147.48757934570312, + "loss": 0.8143, + "rewards/chosen": 1.530364990234375, + "rewards/margins": -0.1417171210050583, + "rewards/rejected": 1.6720821857452393, + "step": 5660 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.42, + "learning_rate": 5.443236530559863e-06, + "logps/chosen": -170.67613220214844, + "logps/margins": -4.8558807373046875, + "logps/rejected": -165.8202667236328, + "loss": 0.7288, + "rewards/chosen": 1.4650720357894897, + "rewards/margins": 0.26263687014579773, + "rewards/rejected": 1.2024351358413696, + "step": 5670 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 1.42, + "learning_rate": 5.430196482627673e-06, + "logps/chosen": -158.334228515625, + "logps/margins": 11.41904354095459, + "logps/rejected": -169.75326538085938, + "loss": 0.765, + "rewards/chosen": 1.6002607345581055, + "rewards/margins": 0.24975450336933136, + "rewards/rejected": 1.35050630569458, + "step": 5680 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.42, + "learning_rate": 5.417153486127818e-06, + "logps/chosen": -151.82003784179688, + "logps/margins": -10.875391960144043, + "logps/rejected": -140.9446258544922, + "loss": 0.723, + "rewards/chosen": 1.6592031717300415, + "rewards/margins": 0.2735028862953186, + "rewards/rejected": 1.3857002258300781, + "step": 5690 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.43, + "learning_rate": 5.404107630457024e-06, + "logps/chosen": -161.78799438476562, + "logps/margins": -13.234598159790039, + "logps/rejected": -148.5534210205078, + "loss": 0.754, + "rewards/chosen": 1.5500133037567139, + "rewards/margins": 0.30869191884994507, + "rewards/rejected": 1.2413215637207031, + "step": 5700 + }, + { + "accuracy": 0.625, + "epoch": 1.43, + "learning_rate": 5.3910590050316165e-06, + "logps/chosen": -160.0657196044922, + "logps/margins": -13.919367790222168, + "logps/rejected": -146.14637756347656, + "loss": 0.7357, + "rewards/chosen": 1.483858585357666, + "rewards/margins": 0.419452041387558, + "rewards/rejected": 1.064406394958496, + "step": 5710 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.43, + "learning_rate": 5.378007699286904e-06, + "logps/chosen": -154.39894104003906, + "logps/margins": -9.410590171813965, + "logps/rejected": -144.9883575439453, + "loss": 0.7421, + "rewards/chosen": 1.5918755531311035, + "rewards/margins": 0.2058297097682953, + "rewards/rejected": 1.3860459327697754, + "step": 5720 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 1.43, + "learning_rate": 5.364953802676563e-06, + "logps/chosen": -139.1897735595703, + "logps/margins": -17.51617431640625, + "logps/rejected": -121.673583984375, + "loss": 0.7099, + "rewards/chosen": 1.3949676752090454, + "rewards/margins": 0.2602354884147644, + "rewards/rejected": 1.1347322463989258, + "step": 5730 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.44, + "learning_rate": 5.351897404672033e-06, + "logps/chosen": -136.61865234375, + "logps/margins": -13.783647537231445, + "logps/rejected": -122.8349838256836, + "loss": 0.7439, + "rewards/chosen": 1.2710367441177368, + "rewards/margins": 0.032409533858299255, + "rewards/rejected": 1.2386271953582764, + "step": 5740 + }, + { + "accuracy": 0.625, + "epoch": 1.44, + "learning_rate": 5.338838594761896e-06, + "logps/chosen": -168.38601684570312, + "logps/margins": -28.6624698638916, + "logps/rejected": -139.72354125976562, + "loss": 0.7537, + "rewards/chosen": 1.7158035039901733, + "rewards/margins": 0.3373875916004181, + "rewards/rejected": 1.378415822982788, + "step": 5750 + }, + { + "accuracy": 0.5625, + "epoch": 1.44, + "learning_rate": 5.325777462451262e-06, + "logps/chosen": -146.7727813720703, + "logps/margins": 12.768696784973145, + "logps/rejected": -159.54147338867188, + "loss": 0.8007, + "rewards/chosen": 1.3416997194290161, + "rewards/margins": 0.13176873326301575, + "rewards/rejected": 1.2099311351776123, + "step": 5760 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 1.44, + "learning_rate": 5.3127140972611654e-06, + "logps/chosen": -156.87179565429688, + "logps/margins": 8.120512008666992, + "logps/rejected": -164.99227905273438, + "loss": 0.7656, + "rewards/chosen": 1.3799902200698853, + "rewards/margins": 0.03770449757575989, + "rewards/rejected": 1.3422856330871582, + "step": 5770 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.45, + "learning_rate": 5.299648588727937e-06, + "logps/chosen": -153.733642578125, + "logps/margins": -9.389876365661621, + "logps/rejected": -144.34378051757812, + "loss": 0.7328, + "rewards/chosen": 1.4924728870391846, + "rewards/margins": 0.3293708562850952, + "rewards/rejected": 1.163102149963379, + "step": 5780 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.45, + "learning_rate": 5.286581026402603e-06, + "logps/chosen": -153.72698974609375, + "logps/margins": -9.940195083618164, + "logps/rejected": -143.7867889404297, + "loss": 0.6922, + "rewards/chosen": 1.8377565145492554, + "rewards/margins": 0.35638314485549927, + "rewards/rejected": 1.4813735485076904, + "step": 5790 + }, + { + "accuracy": 0.5625, + "epoch": 1.45, + "learning_rate": 5.273511499850267e-06, + "logps/chosen": -160.94082641601562, + "logps/margins": -29.102447509765625, + "logps/rejected": -131.83839416503906, + "loss": 0.7204, + "rewards/chosen": 1.5206208229064941, + "rewards/margins": 0.4060136675834656, + "rewards/rejected": 1.1146070957183838, + "step": 5800 + }, + { + "accuracy": 0.625, + "epoch": 1.45, + "learning_rate": 5.26044009864949e-06, + "logps/chosen": -171.05935668945312, + "logps/margins": 5.292855262756348, + "logps/rejected": -176.35220336914062, + "loss": 0.7462, + "rewards/chosen": 1.9989153146743774, + "rewards/margins": 0.5080349445343018, + "rewards/rejected": 1.4908804893493652, + "step": 5810 + }, + { + "accuracy": 0.512499988079071, + "epoch": 1.46, + "learning_rate": 5.247366912391689e-06, + "logps/chosen": -185.25357055664062, + "logps/margins": -32.15409469604492, + "logps/rejected": -153.0994873046875, + "loss": 0.75, + "rewards/chosen": 1.4722042083740234, + "rewards/margins": 0.14152461290359497, + "rewards/rejected": 1.3306795358657837, + "step": 5820 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.46, + "learning_rate": 5.234292030680509e-06, + "logps/chosen": -157.81483459472656, + "logps/margins": -3.082869052886963, + "logps/rejected": -154.73196411132812, + "loss": 0.7286, + "rewards/chosen": 1.8751846551895142, + "rewards/margins": 0.24605628848075867, + "rewards/rejected": 1.6291286945343018, + "step": 5830 + }, + { + "accuracy": 0.6875, + "epoch": 1.46, + "learning_rate": 5.221215543131221e-06, + "logps/chosen": -163.16171264648438, + "logps/margins": -11.626922607421875, + "logps/rejected": -151.53477478027344, + "loss": 0.6477, + "rewards/chosen": 1.732105016708374, + "rewards/margins": 0.3726832866668701, + "rewards/rejected": 1.359421730041504, + "step": 5840 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 1.46, + "learning_rate": 5.208137539370101e-06, + "logps/chosen": -149.0413360595703, + "logps/margins": 13.026446342468262, + "logps/rejected": -162.06777954101562, + "loss": 0.7375, + "rewards/chosen": 1.8997814655303955, + "rewards/margins": 0.2037171870470047, + "rewards/rejected": 1.6960642337799072, + "step": 5850 + }, + { + "accuracy": 0.625, + "epoch": 1.47, + "learning_rate": 5.195058109033813e-06, + "logps/chosen": -162.4295654296875, + "logps/margins": -4.865842342376709, + "logps/rejected": -157.56370544433594, + "loss": 0.735, + "rewards/chosen": 1.7543100118637085, + "rewards/margins": 0.43926873803138733, + "rewards/rejected": 1.3150413036346436, + "step": 5860 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 1.47, + "learning_rate": 5.181977341768805e-06, + "logps/chosen": -140.98873901367188, + "logps/margins": 17.297122955322266, + "logps/rejected": -158.285888671875, + "loss": 0.742, + "rewards/chosen": 1.3781960010528564, + "rewards/margins": 0.1770985871553421, + "rewards/rejected": 1.2010974884033203, + "step": 5870 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.47, + "learning_rate": 5.1688953272306855e-06, + "logps/chosen": -154.7940216064453, + "logps/margins": -24.776287078857422, + "logps/rejected": -130.01773071289062, + "loss": 0.6457, + "rewards/chosen": 1.7411186695098877, + "rewards/margins": 0.4411458373069763, + "rewards/rejected": 1.2999727725982666, + "step": 5880 + }, + { + "accuracy": 0.7250000238418579, + "epoch": 1.47, + "learning_rate": 5.155812155083609e-06, + "logps/chosen": -173.3274383544922, + "logps/margins": -27.61617088317871, + "logps/rejected": -145.71127319335938, + "loss": 0.6555, + "rewards/chosen": 1.591841459274292, + "rewards/margins": 0.47192493081092834, + "rewards/rejected": 1.1199166774749756, + "step": 5890 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.48, + "learning_rate": 5.142727914999669e-06, + "logps/chosen": -150.91864013671875, + "logps/margins": 19.20001220703125, + "logps/rejected": -170.11865234375, + "loss": 0.7354, + "rewards/chosen": 1.711281418800354, + "rewards/margins": 0.2782706320285797, + "rewards/rejected": 1.4330108165740967, + "step": 5900 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.48, + "learning_rate": 5.129642696658279e-06, + "logps/chosen": -154.39859008789062, + "logps/margins": 27.85101318359375, + "logps/rejected": -182.24960327148438, + "loss": 0.7114, + "rewards/chosen": 1.7411203384399414, + "rewards/margins": 0.5508637428283691, + "rewards/rejected": 1.1902567148208618, + "step": 5910 + }, + { + "accuracy": 0.5625, + "epoch": 1.48, + "learning_rate": 5.11655658974555e-06, + "logps/chosen": -168.35702514648438, + "logps/margins": -27.318897247314453, + "logps/rejected": -141.03811645507812, + "loss": 0.7338, + "rewards/chosen": 1.6101325750350952, + "rewards/margins": 0.3776584267616272, + "rewards/rejected": 1.2324742078781128, + "step": 5920 + }, + { + "accuracy": 0.6875, + "epoch": 1.48, + "learning_rate": 5.103469683953694e-06, + "logps/chosen": -144.71237182617188, + "logps/margins": -13.303802490234375, + "logps/rejected": -131.40859985351562, + "loss": 0.7454, + "rewards/chosen": 1.5730059146881104, + "rewards/margins": 0.43756207823753357, + "rewards/rejected": 1.135443925857544, + "step": 5930 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 1.48, + "learning_rate": 5.0903820689803874e-06, + "logps/chosen": -162.18429565429688, + "logps/margins": -10.229942321777344, + "logps/rejected": -151.95436096191406, + "loss": 0.7485, + "rewards/chosen": 1.626556396484375, + "rewards/margins": 0.3839971423149109, + "rewards/rejected": 1.2425591945648193, + "step": 5940 + }, + { + "accuracy": 0.612500011920929, + "epoch": 1.49, + "learning_rate": 5.077293834528175e-06, + "logps/chosen": -175.92588806152344, + "logps/margins": -4.947401523590088, + "logps/rejected": -170.978515625, + "loss": 0.723, + "rewards/chosen": 2.0475027561187744, + "rewards/margins": 0.44176197052001953, + "rewards/rejected": 1.6057409048080444, + "step": 5950 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.49, + "learning_rate": 5.064205070303848e-06, + "logps/chosen": -152.53414916992188, + "logps/margins": -5.0932488441467285, + "logps/rejected": -147.44091796875, + "loss": 0.7117, + "rewards/chosen": 1.7001174688339233, + "rewards/margins": 0.4611106812953949, + "rewards/rejected": 1.2390069961547852, + "step": 5960 + }, + { + "accuracy": 0.612500011920929, + "epoch": 1.49, + "learning_rate": 5.051115866017823e-06, + "logps/chosen": -164.5131072998047, + "logps/margins": 13.223596572875977, + "logps/rejected": -177.7366943359375, + "loss": 0.7198, + "rewards/chosen": 1.655583381652832, + "rewards/margins": 0.4308537542819977, + "rewards/rejected": 1.2247296571731567, + "step": 5970 + }, + { + "accuracy": 0.5, + "epoch": 1.5, + "learning_rate": 5.038026311383536e-06, + "logps/chosen": -146.76358032226562, + "logps/margins": -16.049083709716797, + "logps/rejected": -130.71450805664062, + "loss": 0.7681, + "rewards/chosen": 1.3532880544662476, + "rewards/margins": -0.13548573851585388, + "rewards/rejected": 1.4887738227844238, + "step": 5980 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 1.5, + "learning_rate": 5.0249364961168255e-06, + "logps/chosen": -153.42922973632812, + "logps/margins": 18.665233612060547, + "logps/rejected": -172.09446716308594, + "loss": 0.7755, + "rewards/chosen": 1.3128784894943237, + "rewards/margins": 0.15676501393318176, + "rewards/rejected": 1.1561133861541748, + "step": 5990 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.5, + "learning_rate": 5.011846509935314e-06, + "logps/chosen": -145.02706909179688, + "logps/margins": 3.740046739578247, + "logps/rejected": -148.76710510253906, + "loss": 0.7661, + "rewards/chosen": 1.772761583328247, + "rewards/margins": 0.23846474289894104, + "rewards/rejected": 1.5342966318130493, + "step": 6000 + }, + { + "epoch": 1.5, + "eval_accuracy": 0.580338266384778, + "eval_logps/chosen": -159.01666259765625, + "eval_logps/margins": -5.753211975097656, + "eval_logps/rejected": -153.26345825195312, + "eval_loss": 0.7478973269462585, + "eval_rewards/chosen": 1.418882131576538, + "eval_rewards/margins": 0.2543162703514099, + "eval_rewards/rejected": 1.1645658016204834, + "eval_runtime": 1290.8664, + "eval_samples_per_second": 10.993, + "eval_steps_per_second": 1.374, + "step": 6000 + } + ], + "logging_steps": 10, + "max_steps": 12000, + "num_train_epochs": 3, + "save_steps": 3000, + "total_flos": 4.4912043125626634e+18, + "trial_name": null, + "trial_params": null +}