| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.0, | |
| "eval_steps": 500, | |
| "global_step": 4556, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00877963125548727, | |
| "grad_norm": 109.13970328247358, | |
| "learning_rate": 9.980245829675154e-07, | |
| "logits/chosen": -2.030468702316284, | |
| "logits/rejected": -1.919531226158142, | |
| "logps/chosen": -433.3999938964844, | |
| "logps/rejected": -303.70001220703125, | |
| "loss": 0.6555, | |
| "rewards/accuracies": 0.40625, | |
| "rewards/chosen": -0.0002197265566792339, | |
| "rewards/margins": 0.09664078056812286, | |
| "rewards/rejected": -0.09693603217601776, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.01755926251097454, | |
| "grad_norm": 94.34540367670509, | |
| "learning_rate": 9.958296751536435e-07, | |
| "logits/chosen": -2.0238280296325684, | |
| "logits/rejected": -1.886328101158142, | |
| "logps/chosen": -482.5, | |
| "logps/rejected": -309.8500061035156, | |
| "loss": 0.5293, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": 0.28374022245407104, | |
| "rewards/margins": 0.501757800579071, | |
| "rewards/rejected": -0.21784362196922302, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.02633889376646181, | |
| "grad_norm": 84.05289271642286, | |
| "learning_rate": 9.936347673397717e-07, | |
| "logits/chosen": -2.0140624046325684, | |
| "logits/rejected": -1.7859375476837158, | |
| "logps/chosen": -377.1000061035156, | |
| "logps/rejected": -266.32501220703125, | |
| "loss": 0.4752, | |
| "rewards/accuracies": 0.793749988079071, | |
| "rewards/chosen": 0.7591797113418579, | |
| "rewards/margins": 0.7768310308456421, | |
| "rewards/rejected": -0.01757202111184597, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.03511852502194908, | |
| "grad_norm": 70.26702505015753, | |
| "learning_rate": 9.914398595259e-07, | |
| "logits/chosen": -1.951171875, | |
| "logits/rejected": -1.865234375, | |
| "logps/chosen": -364.8999938964844, | |
| "logps/rejected": -247.0, | |
| "loss": 0.4345, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": 0.990429699420929, | |
| "rewards/margins": 1.031835913658142, | |
| "rewards/rejected": -0.04274902492761612, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.043898156277436345, | |
| "grad_norm": 62.654357226046166, | |
| "learning_rate": 9.89244951712028e-07, | |
| "logits/chosen": -2.075390577316284, | |
| "logits/rejected": -1.98046875, | |
| "logps/chosen": -431.1499938964844, | |
| "logps/rejected": -273.75, | |
| "loss": 0.3164, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 1.0437500476837158, | |
| "rewards/margins": 1.5490233898162842, | |
| "rewards/rejected": -0.5049804449081421, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.05267778753292362, | |
| "grad_norm": 72.32495506595563, | |
| "learning_rate": 9.870500438981562e-07, | |
| "logits/chosen": -1.935156226158142, | |
| "logits/rejected": -1.8683593273162842, | |
| "logps/chosen": -501.0, | |
| "logps/rejected": -326.8500061035156, | |
| "loss": 0.275, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 1.001074194908142, | |
| "rewards/margins": 1.902929663658142, | |
| "rewards/rejected": -0.9029541015625, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.061457418788410885, | |
| "grad_norm": 90.82689626044017, | |
| "learning_rate": 9.848551360842844e-07, | |
| "logits/chosen": -2.0765624046325684, | |
| "logits/rejected": -1.990234375, | |
| "logps/chosen": -455.8500061035156, | |
| "logps/rejected": -292.8500061035156, | |
| "loss": 0.3594, | |
| "rewards/accuracies": 0.831250011920929, | |
| "rewards/chosen": 0.942919909954071, | |
| "rewards/margins": 1.808203101158142, | |
| "rewards/rejected": -0.8660644292831421, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.07023705004389816, | |
| "grad_norm": 48.71442518982463, | |
| "learning_rate": 9.826602282704126e-07, | |
| "logits/chosen": -1.946874976158142, | |
| "logits/rejected": -1.884374976158142, | |
| "logps/chosen": -441.29998779296875, | |
| "logps/rejected": -303.3999938964844, | |
| "loss": 0.3085, | |
| "rewards/accuracies": 0.862500011920929, | |
| "rewards/chosen": 0.9786132574081421, | |
| "rewards/margins": 1.95703125, | |
| "rewards/rejected": -0.9785400629043579, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.07901668129938542, | |
| "grad_norm": 110.03171960576965, | |
| "learning_rate": 9.804653204565408e-07, | |
| "logits/chosen": -2.08203125, | |
| "logits/rejected": -2.0140624046325684, | |
| "logps/chosen": -477.29998779296875, | |
| "logps/rejected": -298.6000061035156, | |
| "loss": 0.2688, | |
| "rewards/accuracies": 0.887499988079071, | |
| "rewards/chosen": 0.789599597454071, | |
| "rewards/margins": 2.289843797683716, | |
| "rewards/rejected": -1.500390648841858, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.08779631255487269, | |
| "grad_norm": 90.10158868954922, | |
| "learning_rate": 9.78270412642669e-07, | |
| "logits/chosen": -2.047656297683716, | |
| "logits/rejected": -2.063281297683716, | |
| "logps/chosen": -441.04998779296875, | |
| "logps/rejected": -264.3500061035156, | |
| "loss": 0.2648, | |
| "rewards/accuracies": 0.8812500238418579, | |
| "rewards/chosen": 1.027929663658142, | |
| "rewards/margins": 2.478710889816284, | |
| "rewards/rejected": -1.4507324695587158, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.09657594381035997, | |
| "grad_norm": 63.29128281876568, | |
| "learning_rate": 9.760755048287971e-07, | |
| "logits/chosen": -2.0367188453674316, | |
| "logits/rejected": -1.9226562976837158, | |
| "logps/chosen": -435.79998779296875, | |
| "logps/rejected": -316.75, | |
| "loss": 0.3114, | |
| "rewards/accuracies": 0.856249988079071, | |
| "rewards/chosen": 1.017480492591858, | |
| "rewards/margins": 2.360156297683716, | |
| "rewards/rejected": -1.344335913658142, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.10535557506584724, | |
| "grad_norm": 82.60117425160554, | |
| "learning_rate": 9.738805970149253e-07, | |
| "logits/chosen": -2.029296875, | |
| "logits/rejected": -2.008984327316284, | |
| "logps/chosen": -460.75, | |
| "logps/rejected": -306.20001220703125, | |
| "loss": 0.24, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": 1.074609398841858, | |
| "rewards/margins": 2.743359327316284, | |
| "rewards/rejected": -1.669921875, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.1141352063213345, | |
| "grad_norm": 75.6782650414838, | |
| "learning_rate": 9.716856892010535e-07, | |
| "logits/chosen": -2.0703125, | |
| "logits/rejected": -2.0328125953674316, | |
| "logps/chosen": -462.25, | |
| "logps/rejected": -339.0, | |
| "loss": 0.3127, | |
| "rewards/accuracies": 0.831250011920929, | |
| "rewards/chosen": 0.9510253667831421, | |
| "rewards/margins": 2.646679639816284, | |
| "rewards/rejected": -1.694433569908142, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.12291483757682177, | |
| "grad_norm": 85.8365746604714, | |
| "learning_rate": 9.694907813871816e-07, | |
| "logits/chosen": -1.9753906726837158, | |
| "logits/rejected": -1.9874999523162842, | |
| "logps/chosen": -428.70001220703125, | |
| "logps/rejected": -305.3999938964844, | |
| "loss": 0.3302, | |
| "rewards/accuracies": 0.856249988079071, | |
| "rewards/chosen": 0.566821277141571, | |
| "rewards/margins": 2.488476514816284, | |
| "rewards/rejected": -1.921875, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.13169446883230904, | |
| "grad_norm": 111.78470030134876, | |
| "learning_rate": 9.672958735733098e-07, | |
| "logits/chosen": -2.1148438453674316, | |
| "logits/rejected": -2.053906202316284, | |
| "logps/chosen": -423.3999938964844, | |
| "logps/rejected": -276.25, | |
| "loss": 0.2385, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": 0.37567138671875, | |
| "rewards/margins": 2.6773438453674316, | |
| "rewards/rejected": -2.303906202316284, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.14047410008779632, | |
| "grad_norm": 109.6378007960495, | |
| "learning_rate": 9.651009657594382e-07, | |
| "logits/chosen": -2.0562500953674316, | |
| "logits/rejected": -2.002734422683716, | |
| "logps/chosen": -395.6000061035156, | |
| "logps/rejected": -296.67498779296875, | |
| "loss": 0.2739, | |
| "rewards/accuracies": 0.8812500238418579, | |
| "rewards/chosen": 0.6416991949081421, | |
| "rewards/margins": 2.7640624046325684, | |
| "rewards/rejected": -2.121875047683716, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.14925373134328357, | |
| "grad_norm": 67.17728743004999, | |
| "learning_rate": 9.629060579455661e-07, | |
| "logits/chosen": -2.1363282203674316, | |
| "logits/rejected": -2.057812452316284, | |
| "logps/chosen": -425.5, | |
| "logps/rejected": -270.3500061035156, | |
| "loss": 0.2559, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/chosen": 0.841381847858429, | |
| "rewards/margins": 2.6683592796325684, | |
| "rewards/rejected": -1.825585961341858, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.15803336259877085, | |
| "grad_norm": 94.40359155524563, | |
| "learning_rate": 9.607111501316945e-07, | |
| "logits/chosen": -2.1382813453674316, | |
| "logits/rejected": -2.098437547683716, | |
| "logps/chosen": -471.1000061035156, | |
| "logps/rejected": -287.95001220703125, | |
| "loss": 0.2294, | |
| "rewards/accuracies": 0.893750011920929, | |
| "rewards/chosen": 0.947033703327179, | |
| "rewards/margins": 2.9976563453674316, | |
| "rewards/rejected": -2.0517578125, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.16681299385425813, | |
| "grad_norm": 72.03506390577603, | |
| "learning_rate": 9.585162423178225e-07, | |
| "logits/chosen": -2.045703172683716, | |
| "logits/rejected": -2.084765672683716, | |
| "logps/chosen": -501.29998779296875, | |
| "logps/rejected": -317.1000061035156, | |
| "loss": 0.2127, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": 0.67205810546875, | |
| "rewards/margins": 3.350781202316284, | |
| "rewards/rejected": -2.676953077316284, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.17559262510974538, | |
| "grad_norm": 93.27955751185753, | |
| "learning_rate": 9.563213345039509e-07, | |
| "logits/chosen": -2.211718797683716, | |
| "logits/rejected": -2.0687499046325684, | |
| "logps/chosen": -463.8500061035156, | |
| "logps/rejected": -325.3500061035156, | |
| "loss": 0.2497, | |
| "rewards/accuracies": 0.8812500238418579, | |
| "rewards/chosen": -0.2869018614292145, | |
| "rewards/margins": 3.149609327316284, | |
| "rewards/rejected": -3.4410157203674316, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.18437225636523266, | |
| "grad_norm": 77.82334585299272, | |
| "learning_rate": 9.541264266900788e-07, | |
| "logits/chosen": -2.124218702316284, | |
| "logits/rejected": -2.1273436546325684, | |
| "logps/chosen": -436.95001220703125, | |
| "logps/rejected": -288.70001220703125, | |
| "loss": 0.2713, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -0.07237549126148224, | |
| "rewards/margins": 3.153515577316284, | |
| "rewards/rejected": -3.227734327316284, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.19315188762071994, | |
| "grad_norm": 21.694976416377187, | |
| "learning_rate": 9.519315188762071e-07, | |
| "logits/chosen": -2.134765625, | |
| "logits/rejected": -2.01171875, | |
| "logps/chosen": -459.70001220703125, | |
| "logps/rejected": -327.8999938964844, | |
| "loss": 0.2954, | |
| "rewards/accuracies": 0.9125000238418579, | |
| "rewards/chosen": 0.06580200046300888, | |
| "rewards/margins": 3.3824219703674316, | |
| "rewards/rejected": -3.31640625, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.2019315188762072, | |
| "grad_norm": 30.56832606851288, | |
| "learning_rate": 9.497366110623354e-07, | |
| "logits/chosen": -2.142578125, | |
| "logits/rejected": -2.0542969703674316, | |
| "logps/chosen": -458.1000061035156, | |
| "logps/rejected": -332.29998779296875, | |
| "loss": 0.2406, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": 0.21712036430835724, | |
| "rewards/margins": 3.201171875, | |
| "rewards/rejected": -2.986328125, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.21071115013169447, | |
| "grad_norm": 30.77078164453209, | |
| "learning_rate": 9.475417032484635e-07, | |
| "logits/chosen": -2.12109375, | |
| "logits/rejected": -1.9968750476837158, | |
| "logps/chosen": -446.70001220703125, | |
| "logps/rejected": -306.20001220703125, | |
| "loss": 0.238, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": 0.6296142339706421, | |
| "rewards/margins": 3.125781297683716, | |
| "rewards/rejected": -2.493359327316284, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.21949078138718173, | |
| "grad_norm": 33.56293554427966, | |
| "learning_rate": 9.453467954345917e-07, | |
| "logits/chosen": -2.2210936546325684, | |
| "logits/rejected": -2.1546874046325684, | |
| "logps/chosen": -431.70001220703125, | |
| "logps/rejected": -295.29998779296875, | |
| "loss": 0.2395, | |
| "rewards/accuracies": 0.8812500238418579, | |
| "rewards/chosen": 0.33845216035842896, | |
| "rewards/margins": 3.3363280296325684, | |
| "rewards/rejected": -2.9957032203674316, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.228270412642669, | |
| "grad_norm": 55.524382065908405, | |
| "learning_rate": 9.431518876207198e-07, | |
| "logits/chosen": -2.1773438453674316, | |
| "logits/rejected": -2.0042967796325684, | |
| "logps/chosen": -427.20001220703125, | |
| "logps/rejected": -320.70001220703125, | |
| "loss": 0.255, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": -0.26520997285842896, | |
| "rewards/margins": 3.2484374046325684, | |
| "rewards/rejected": -3.512500047683716, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.2370500438981563, | |
| "grad_norm": 54.01974298101663, | |
| "learning_rate": 9.409569798068481e-07, | |
| "logits/chosen": -2.1273436546325684, | |
| "logits/rejected": -2.111328125, | |
| "logps/chosen": -374.75, | |
| "logps/rejected": -274.70001220703125, | |
| "loss": 0.2185, | |
| "rewards/accuracies": 0.893750011920929, | |
| "rewards/chosen": 0.4251342713832855, | |
| "rewards/margins": 3.160937547683716, | |
| "rewards/rejected": -2.7337889671325684, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.24582967515364354, | |
| "grad_norm": 64.18071883073675, | |
| "learning_rate": 9.387620719929763e-07, | |
| "logits/chosen": -1.9445312023162842, | |
| "logits/rejected": -2.003124952316284, | |
| "logps/chosen": -430.5, | |
| "logps/rejected": -334.75, | |
| "loss": 0.2238, | |
| "rewards/accuracies": 0.9125000238418579, | |
| "rewards/chosen": 0.799877941608429, | |
| "rewards/margins": 3.1640625, | |
| "rewards/rejected": -2.3648438453674316, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.2546093064091308, | |
| "grad_norm": 41.89305555971381, | |
| "learning_rate": 9.365671641791044e-07, | |
| "logits/chosen": -2.055859327316284, | |
| "logits/rejected": -2.1539063453674316, | |
| "logps/chosen": -415.29998779296875, | |
| "logps/rejected": -249.14999389648438, | |
| "loss": 0.2134, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": 0.30503541231155396, | |
| "rewards/margins": 3.417773485183716, | |
| "rewards/rejected": -3.1148438453674316, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.2633889376646181, | |
| "grad_norm": 35.25365172838076, | |
| "learning_rate": 9.343722563652326e-07, | |
| "logits/chosen": -2.0835938453674316, | |
| "logits/rejected": -2.063671827316284, | |
| "logps/chosen": -478.1000061035156, | |
| "logps/rejected": -307.6499938964844, | |
| "loss": 0.1894, | |
| "rewards/accuracies": 0.9125000238418579, | |
| "rewards/chosen": 0.4594482481479645, | |
| "rewards/margins": 3.591015577316284, | |
| "rewards/rejected": -3.1304688453674316, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.2721685689201054, | |
| "grad_norm": 45.43698598182238, | |
| "learning_rate": 9.321773485513608e-07, | |
| "logits/chosen": -2.1117186546325684, | |
| "logits/rejected": -2.143359422683716, | |
| "logps/chosen": -392.5, | |
| "logps/rejected": -290.6000061035156, | |
| "loss": 0.2509, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": 0.20820312201976776, | |
| "rewards/margins": 3.137890577316284, | |
| "rewards/rejected": -2.92578125, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.28094820017559263, | |
| "grad_norm": 24.08317849479442, | |
| "learning_rate": 9.29982440737489e-07, | |
| "logits/chosen": -2.1488280296325684, | |
| "logits/rejected": -2.112109422683716, | |
| "logps/chosen": -464.6000061035156, | |
| "logps/rejected": -310.8500061035156, | |
| "loss": 0.1627, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.8848632574081421, | |
| "rewards/margins": 3.76171875, | |
| "rewards/rejected": -2.8753905296325684, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.2897278314310799, | |
| "grad_norm": 67.04668935007606, | |
| "learning_rate": 9.277875329236171e-07, | |
| "logits/chosen": -2.0374999046325684, | |
| "logits/rejected": -2.1058592796325684, | |
| "logps/chosen": -438.5, | |
| "logps/rejected": -311.6000061035156, | |
| "loss": 0.1902, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/chosen": 0.5768798589706421, | |
| "rewards/margins": 3.610156297683716, | |
| "rewards/rejected": -3.032421827316284, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.29850746268656714, | |
| "grad_norm": 105.4845968633654, | |
| "learning_rate": 9.255926251097453e-07, | |
| "logits/chosen": -2.0992188453674316, | |
| "logits/rejected": -2.1871094703674316, | |
| "logps/chosen": -420.79998779296875, | |
| "logps/rejected": -290.29998779296875, | |
| "loss": 0.2165, | |
| "rewards/accuracies": 0.887499988079071, | |
| "rewards/chosen": 0.36976319551467896, | |
| "rewards/margins": 3.784374952316284, | |
| "rewards/rejected": -3.4117188453674316, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.30728709394205445, | |
| "grad_norm": 72.50177354695649, | |
| "learning_rate": 9.233977172958736e-07, | |
| "logits/chosen": -2.2191405296325684, | |
| "logits/rejected": -2.1753907203674316, | |
| "logps/chosen": -434.8999938964844, | |
| "logps/rejected": -325.79998779296875, | |
| "loss": 0.1292, | |
| "rewards/accuracies": 0.956250011920929, | |
| "rewards/chosen": 0.178955078125, | |
| "rewards/margins": 4.399218559265137, | |
| "rewards/rejected": -4.219531059265137, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.3160667251975417, | |
| "grad_norm": 75.1023432074706, | |
| "learning_rate": 9.212028094820017e-07, | |
| "logits/chosen": -2.11328125, | |
| "logits/rejected": -2.141406297683716, | |
| "logps/chosen": -442.3999938964844, | |
| "logps/rejected": -273.45001220703125, | |
| "loss": 0.172, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.5267333984375, | |
| "rewards/margins": 4.272656440734863, | |
| "rewards/rejected": -3.748046875, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.32484635645302895, | |
| "grad_norm": 50.469284265081235, | |
| "learning_rate": 9.190079016681299e-07, | |
| "logits/chosen": -2.0835938453674316, | |
| "logits/rejected": -2.088671922683716, | |
| "logps/chosen": -452.3999938964844, | |
| "logps/rejected": -303.3500061035156, | |
| "loss": 0.2088, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": 0.18303832411766052, | |
| "rewards/margins": 4.36328125, | |
| "rewards/rejected": -4.1796875, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.33362598770851626, | |
| "grad_norm": 16.754396284484763, | |
| "learning_rate": 9.16812993854258e-07, | |
| "logits/chosen": -2.130859375, | |
| "logits/rejected": -2.141406297683716, | |
| "logps/chosen": -466.1000061035156, | |
| "logps/rejected": -335.54998779296875, | |
| "loss": 0.2428, | |
| "rewards/accuracies": 0.862500011920929, | |
| "rewards/chosen": 0.03295288234949112, | |
| "rewards/margins": 3.80078125, | |
| "rewards/rejected": -3.7671875953674316, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.3424056189640035, | |
| "grad_norm": 78.8803531723786, | |
| "learning_rate": 9.146180860403863e-07, | |
| "logits/chosen": -2.076953172683716, | |
| "logits/rejected": -2.0972657203674316, | |
| "logps/chosen": -440.04998779296875, | |
| "logps/rejected": -332.75, | |
| "loss": 0.2099, | |
| "rewards/accuracies": 0.887499988079071, | |
| "rewards/chosen": 0.579296886920929, | |
| "rewards/margins": 4.063281059265137, | |
| "rewards/rejected": -3.4820313453674316, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.35118525021949076, | |
| "grad_norm": 14.433067656955, | |
| "learning_rate": 9.124231782265145e-07, | |
| "logits/chosen": -2.079296827316284, | |
| "logits/rejected": -2.067578077316284, | |
| "logps/chosen": -472.5, | |
| "logps/rejected": -318.95001220703125, | |
| "loss": 0.1494, | |
| "rewards/accuracies": 0.9125000238418579, | |
| "rewards/chosen": 1.1320312023162842, | |
| "rewards/margins": 4.486718654632568, | |
| "rewards/rejected": -3.356250047683716, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.35996488147497807, | |
| "grad_norm": 66.59493048581147, | |
| "learning_rate": 9.102282704126426e-07, | |
| "logits/chosen": -2.1039061546325684, | |
| "logits/rejected": -2.1117186546325684, | |
| "logps/chosen": -432.3500061035156, | |
| "logps/rejected": -323.3999938964844, | |
| "loss": 0.282, | |
| "rewards/accuracies": 0.8812500238418579, | |
| "rewards/chosen": 0.4548583924770355, | |
| "rewards/margins": 3.774218797683716, | |
| "rewards/rejected": -3.3207030296325684, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.3687445127304653, | |
| "grad_norm": 134.96750879284693, | |
| "learning_rate": 9.080333625987708e-07, | |
| "logits/chosen": -2.1214842796325684, | |
| "logits/rejected": -2.075390577316284, | |
| "logps/chosen": -430.75, | |
| "logps/rejected": -299.54998779296875, | |
| "loss": 0.2632, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": 0.25281983613967896, | |
| "rewards/margins": 4.217187404632568, | |
| "rewards/rejected": -3.969531297683716, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.3775241439859526, | |
| "grad_norm": 53.570790263433366, | |
| "learning_rate": 9.05838454784899e-07, | |
| "logits/chosen": -2.1441407203674316, | |
| "logits/rejected": -2.1468749046325684, | |
| "logps/chosen": -462.6499938964844, | |
| "logps/rejected": -325.54998779296875, | |
| "loss": 0.1967, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -0.22116699814796448, | |
| "rewards/margins": 3.889843702316284, | |
| "rewards/rejected": -4.114062309265137, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.3863037752414399, | |
| "grad_norm": 57.08040279451106, | |
| "learning_rate": 9.036435469710271e-07, | |
| "logits/chosen": -2.112109422683716, | |
| "logits/rejected": -2.118359327316284, | |
| "logps/chosen": -416.5, | |
| "logps/rejected": -311.70001220703125, | |
| "loss": 0.2331, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -0.5199950933456421, | |
| "rewards/margins": 4.294531345367432, | |
| "rewards/rejected": -4.817968845367432, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.39508340649692714, | |
| "grad_norm": 95.43206409640776, | |
| "learning_rate": 9.014486391571554e-07, | |
| "logits/chosen": -2.216796875, | |
| "logits/rejected": -2.139453172683716, | |
| "logps/chosen": -439.70001220703125, | |
| "logps/rejected": -336.5, | |
| "loss": 0.2249, | |
| "rewards/accuracies": 0.9125000238418579, | |
| "rewards/chosen": -0.24812011420726776, | |
| "rewards/margins": 4.46875, | |
| "rewards/rejected": -4.710156440734863, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.4038630377524144, | |
| "grad_norm": 57.93147205581145, | |
| "learning_rate": 8.992537313432835e-07, | |
| "logits/chosen": -2.2828125953674316, | |
| "logits/rejected": -2.182812452316284, | |
| "logps/chosen": -411.29998779296875, | |
| "logps/rejected": -306.70001220703125, | |
| "loss": 0.2234, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": 0.8202148675918579, | |
| "rewards/margins": 3.5902342796325684, | |
| "rewards/rejected": -2.7685546875, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.41264266900790164, | |
| "grad_norm": 92.79683259836257, | |
| "learning_rate": 8.970588235294118e-07, | |
| "logits/chosen": -2.178906202316284, | |
| "logits/rejected": -2.2320313453674316, | |
| "logps/chosen": -433.04998779296875, | |
| "logps/rejected": -263.95001220703125, | |
| "loss": 0.2153, | |
| "rewards/accuracies": 0.887499988079071, | |
| "rewards/chosen": 0.9303222894668579, | |
| "rewards/margins": 3.6566405296325684, | |
| "rewards/rejected": -2.723559617996216, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.42142230026338895, | |
| "grad_norm": 32.576034948593, | |
| "learning_rate": 8.948639157155398e-07, | |
| "logits/chosen": -2.012890577316284, | |
| "logits/rejected": -2.1917967796325684, | |
| "logps/chosen": -472.29998779296875, | |
| "logps/rejected": -313.8500061035156, | |
| "loss": 0.1522, | |
| "rewards/accuracies": 0.9312499761581421, | |
| "rewards/chosen": -0.0063232420943677425, | |
| "rewards/margins": 4.173047065734863, | |
| "rewards/rejected": -4.177343845367432, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.4302019315188762, | |
| "grad_norm": 65.15745347067617, | |
| "learning_rate": 8.926690079016681e-07, | |
| "logits/chosen": -2.140625, | |
| "logits/rejected": -2.1429686546325684, | |
| "logps/chosen": -391.29998779296875, | |
| "logps/rejected": -326.20001220703125, | |
| "loss": 0.1283, | |
| "rewards/accuracies": 0.9437500238418579, | |
| "rewards/chosen": -0.711108386516571, | |
| "rewards/margins": 4.59375, | |
| "rewards/rejected": -5.305468559265137, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.43898156277436345, | |
| "grad_norm": 97.60751286979364, | |
| "learning_rate": 8.904741000877962e-07, | |
| "logits/chosen": -2.1156249046325684, | |
| "logits/rejected": -2.221484422683716, | |
| "logps/chosen": -481.45001220703125, | |
| "logps/rejected": -335.20001220703125, | |
| "loss": 0.2104, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/chosen": -0.555676281452179, | |
| "rewards/margins": 4.49609375, | |
| "rewards/rejected": -5.053124904632568, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.44776119402985076, | |
| "grad_norm": 54.498651704384, | |
| "learning_rate": 8.882791922739245e-07, | |
| "logits/chosen": -2.060546875, | |
| "logits/rejected": -2.046875, | |
| "logps/chosen": -517.0999755859375, | |
| "logps/rejected": -383.6000061035156, | |
| "loss": 0.2556, | |
| "rewards/accuracies": 0.9125000238418579, | |
| "rewards/chosen": -0.3011230528354645, | |
| "rewards/margins": 4.125781059265137, | |
| "rewards/rejected": -4.425000190734863, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.456540825285338, | |
| "grad_norm": 74.72413097614373, | |
| "learning_rate": 8.860842844600526e-07, | |
| "logits/chosen": -2.03125, | |
| "logits/rejected": -2.072265625, | |
| "logps/chosen": -433.6499938964844, | |
| "logps/rejected": -318.8500061035156, | |
| "loss": 0.2302, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -0.05378418043255806, | |
| "rewards/margins": 4.114062309265137, | |
| "rewards/rejected": -4.168359279632568, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.46532045654082527, | |
| "grad_norm": 38.340095947553, | |
| "learning_rate": 8.838893766461808e-07, | |
| "logits/chosen": -2.131640672683716, | |
| "logits/rejected": -2.0914063453674316, | |
| "logps/chosen": -405.3999938964844, | |
| "logps/rejected": -278.95001220703125, | |
| "loss": 0.1763, | |
| "rewards/accuracies": 0.918749988079071, | |
| "rewards/chosen": -0.0306396484375, | |
| "rewards/margins": 4.125781059265137, | |
| "rewards/rejected": -4.15234375, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.4741000877963126, | |
| "grad_norm": 46.83712424011487, | |
| "learning_rate": 8.81694468832309e-07, | |
| "logits/chosen": -2.0882811546325684, | |
| "logits/rejected": -2.0777344703674316, | |
| "logps/chosen": -491.70001220703125, | |
| "logps/rejected": -329.79998779296875, | |
| "loss": 0.1081, | |
| "rewards/accuracies": 0.956250011920929, | |
| "rewards/chosen": 0.144866943359375, | |
| "rewards/margins": 4.575781345367432, | |
| "rewards/rejected": -4.434374809265137, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.4828797190517998, | |
| "grad_norm": 83.49552287784984, | |
| "learning_rate": 8.794995610184372e-07, | |
| "logits/chosen": -2.149218797683716, | |
| "logits/rejected": -2.098437547683716, | |
| "logps/chosen": -389.95001220703125, | |
| "logps/rejected": -314.29998779296875, | |
| "loss": 0.1915, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/chosen": -0.42718505859375, | |
| "rewards/margins": 4.41796875, | |
| "rewards/rejected": -4.846875190734863, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.4916593503072871, | |
| "grad_norm": 44.480640119351804, | |
| "learning_rate": 8.773046532045653e-07, | |
| "logits/chosen": -2.104687452316284, | |
| "logits/rejected": -2.112109422683716, | |
| "logps/chosen": -414.79998779296875, | |
| "logps/rejected": -322.3999938964844, | |
| "loss": 0.2297, | |
| "rewards/accuracies": 0.9437500238418579, | |
| "rewards/chosen": 0.07548828423023224, | |
| "rewards/margins": 4.655468940734863, | |
| "rewards/rejected": -4.583593845367432, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.5004389815627743, | |
| "grad_norm": 68.92253975153191, | |
| "learning_rate": 8.751097453906936e-07, | |
| "logits/chosen": -2.1968750953674316, | |
| "logits/rejected": -2.147656202316284, | |
| "logps/chosen": -432.25, | |
| "logps/rejected": -394.5, | |
| "loss": 0.2615, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -0.9510253667831421, | |
| "rewards/margins": 4.291406154632568, | |
| "rewards/rejected": -5.2421875, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.5092186128182616, | |
| "grad_norm": 108.15215870599476, | |
| "learning_rate": 8.729148375768217e-07, | |
| "logits/chosen": -2.1171875, | |
| "logits/rejected": -2.160937547683716, | |
| "logps/chosen": -450.8999938964844, | |
| "logps/rejected": -343.70001220703125, | |
| "loss": 0.1849, | |
| "rewards/accuracies": 0.9125000238418579, | |
| "rewards/chosen": -1.55517578125, | |
| "rewards/margins": 4.383593559265137, | |
| "rewards/rejected": -5.936718940734863, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.517998244073749, | |
| "grad_norm": 15.306293998296644, | |
| "learning_rate": 8.7071992976295e-07, | |
| "logits/chosen": -2.1265625953674316, | |
| "logits/rejected": -2.1500000953674316, | |
| "logps/chosen": -471.95001220703125, | |
| "logps/rejected": -336.95001220703125, | |
| "loss": 0.2001, | |
| "rewards/accuracies": 0.9125000238418579, | |
| "rewards/chosen": -0.23862305283546448, | |
| "rewards/margins": 4.780468940734863, | |
| "rewards/rejected": -5.028124809265137, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.5267778753292361, | |
| "grad_norm": 66.51668565473275, | |
| "learning_rate": 8.68525021949078e-07, | |
| "logits/chosen": -2.112109422683716, | |
| "logits/rejected": -2.1917967796325684, | |
| "logps/chosen": -449.95001220703125, | |
| "logps/rejected": -316.8500061035156, | |
| "loss": 0.2073, | |
| "rewards/accuracies": 0.9125000238418579, | |
| "rewards/chosen": -0.45073240995407104, | |
| "rewards/margins": 4.410937309265137, | |
| "rewards/rejected": -4.859375, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.5355575065847235, | |
| "grad_norm": 64.80150769604948, | |
| "learning_rate": 8.663301141352063e-07, | |
| "logits/chosen": -2.072265625, | |
| "logits/rejected": -2.1429686546325684, | |
| "logps/chosen": -456.5, | |
| "logps/rejected": -331.5, | |
| "loss": 0.1895, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.0469970703125, | |
| "rewards/margins": 4.651562690734863, | |
| "rewards/rejected": -4.599999904632568, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.5443371378402108, | |
| "grad_norm": 101.53106401670574, | |
| "learning_rate": 8.641352063213345e-07, | |
| "logits/chosen": -2.223437547683716, | |
| "logits/rejected": -2.255859375, | |
| "logps/chosen": -395.3999938964844, | |
| "logps/rejected": -304.95001220703125, | |
| "loss": 0.1795, | |
| "rewards/accuracies": 0.9125000238418579, | |
| "rewards/chosen": -0.731152355670929, | |
| "rewards/margins": 4.413671970367432, | |
| "rewards/rejected": -5.14453125, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.553116769095698, | |
| "grad_norm": 73.80132473437388, | |
| "learning_rate": 8.619402985074626e-07, | |
| "logits/chosen": -2.1773438453674316, | |
| "logits/rejected": -2.2093749046325684, | |
| "logps/chosen": -471.8999938964844, | |
| "logps/rejected": -320.0, | |
| "loss": 0.1802, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -0.28009033203125, | |
| "rewards/margins": 4.625781059265137, | |
| "rewards/rejected": -4.90625, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.5618964003511853, | |
| "grad_norm": 104.32149056296203, | |
| "learning_rate": 8.597453906935908e-07, | |
| "logits/chosen": -2.166796922683716, | |
| "logits/rejected": -2.1624999046325684, | |
| "logps/chosen": -458.79998779296875, | |
| "logps/rejected": -333.29998779296875, | |
| "loss": 0.1938, | |
| "rewards/accuracies": 0.918749988079071, | |
| "rewards/chosen": -0.10056152194738388, | |
| "rewards/margins": 4.796093940734863, | |
| "rewards/rejected": -4.893750190734863, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.5706760316066726, | |
| "grad_norm": 50.95752771225326, | |
| "learning_rate": 8.57550482879719e-07, | |
| "logits/chosen": -2.178906202316284, | |
| "logits/rejected": -2.205078125, | |
| "logps/chosen": -467.6000061035156, | |
| "logps/rejected": -313.45001220703125, | |
| "loss": 0.2282, | |
| "rewards/accuracies": 0.918749988079071, | |
| "rewards/chosen": -0.3135742247104645, | |
| "rewards/margins": 5.034375190734863, | |
| "rewards/rejected": -5.346875190734863, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.5794556628621598, | |
| "grad_norm": 71.82484643634139, | |
| "learning_rate": 8.553555750658472e-07, | |
| "logits/chosen": -2.168750047683716, | |
| "logits/rejected": -2.262890577316284, | |
| "logps/chosen": -438.70001220703125, | |
| "logps/rejected": -336.1000061035156, | |
| "loss": 0.1772, | |
| "rewards/accuracies": 0.9125000238418579, | |
| "rewards/chosen": -0.5922485589981079, | |
| "rewards/margins": 4.578906059265137, | |
| "rewards/rejected": -5.165625095367432, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.5882352941176471, | |
| "grad_norm": 85.27753406534178, | |
| "learning_rate": 8.531606672519753e-07, | |
| "logits/chosen": -2.216015577316284, | |
| "logits/rejected": -2.1796875, | |
| "logps/chosen": -466.95001220703125, | |
| "logps/rejected": -359.45001220703125, | |
| "loss": 0.1464, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/chosen": -0.520068347454071, | |
| "rewards/margins": 5.157031059265137, | |
| "rewards/rejected": -5.673437595367432, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.5970149253731343, | |
| "grad_norm": 106.36339279941318, | |
| "learning_rate": 8.509657594381035e-07, | |
| "logits/chosen": -2.1597657203674316, | |
| "logits/rejected": -2.207812547683716, | |
| "logps/chosen": -448.0, | |
| "logps/rejected": -354.0, | |
| "loss": 0.1332, | |
| "rewards/accuracies": 0.956250011920929, | |
| "rewards/chosen": -1.391717553138733, | |
| "rewards/margins": 4.916406154632568, | |
| "rewards/rejected": -6.3046875, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.6057945566286216, | |
| "grad_norm": 109.28671964199744, | |
| "learning_rate": 8.487708516242318e-07, | |
| "logits/chosen": -2.3355469703674316, | |
| "logits/rejected": -2.342968702316284, | |
| "logps/chosen": -459.8999938964844, | |
| "logps/rejected": -347.1499938964844, | |
| "loss": 0.2749, | |
| "rewards/accuracies": 0.8812500238418579, | |
| "rewards/chosen": -0.57513427734375, | |
| "rewards/margins": 4.717968940734863, | |
| "rewards/rejected": -5.296093940734863, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.6145741878841089, | |
| "grad_norm": 95.56702401243868, | |
| "learning_rate": 8.465759438103599e-07, | |
| "logits/chosen": -2.2109375, | |
| "logits/rejected": -2.282421827316284, | |
| "logps/chosen": -426.75, | |
| "logps/rejected": -296.6000061035156, | |
| "loss": 0.246, | |
| "rewards/accuracies": 0.887499988079071, | |
| "rewards/chosen": 0.03012695349752903, | |
| "rewards/margins": 4.321484565734863, | |
| "rewards/rejected": -4.290625095367432, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.6233538191395961, | |
| "grad_norm": 113.23103347450649, | |
| "learning_rate": 8.443810359964881e-07, | |
| "logits/chosen": -2.3062500953674316, | |
| "logits/rejected": -2.252734422683716, | |
| "logps/chosen": -433.1499938964844, | |
| "logps/rejected": -321.75, | |
| "loss": 0.2098, | |
| "rewards/accuracies": 0.893750011920929, | |
| "rewards/chosen": -0.11279296875, | |
| "rewards/margins": 4.678124904632568, | |
| "rewards/rejected": -4.792187690734863, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.6321334503950834, | |
| "grad_norm": 36.876855762131605, | |
| "learning_rate": 8.421861281826162e-07, | |
| "logits/chosen": -2.1468749046325684, | |
| "logits/rejected": -2.17578125, | |
| "logps/chosen": -421.6000061035156, | |
| "logps/rejected": -307.6499938964844, | |
| "loss": 0.1765, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -0.70428466796875, | |
| "rewards/margins": 4.767968654632568, | |
| "rewards/rejected": -5.47265625, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.6409130816505707, | |
| "grad_norm": 97.63938648530551, | |
| "learning_rate": 8.399912203687445e-07, | |
| "logits/chosen": -2.181640625, | |
| "logits/rejected": -2.1820311546325684, | |
| "logps/chosen": -480.1000061035156, | |
| "logps/rejected": -339.25, | |
| "loss": 0.155, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -0.7850586175918579, | |
| "rewards/margins": 5.161718845367432, | |
| "rewards/rejected": -5.943749904632568, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.6496927129060579, | |
| "grad_norm": 75.71291991172833, | |
| "learning_rate": 8.377963125548727e-07, | |
| "logits/chosen": -2.098828077316284, | |
| "logits/rejected": -2.1371092796325684, | |
| "logps/chosen": -439.0, | |
| "logps/rejected": -320.79998779296875, | |
| "loss": 0.3184, | |
| "rewards/accuracies": 0.887499988079071, | |
| "rewards/chosen": -0.5986328125, | |
| "rewards/margins": 4.768750190734863, | |
| "rewards/rejected": -5.3671875, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.6584723441615452, | |
| "grad_norm": 111.53159387834462, | |
| "learning_rate": 8.356014047410008e-07, | |
| "logits/chosen": -2.115234375, | |
| "logits/rejected": -2.1976561546325684, | |
| "logps/chosen": -435.29998779296875, | |
| "logps/rejected": -317.54998779296875, | |
| "loss": 0.3034, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -0.20980224013328552, | |
| "rewards/margins": 4.228125095367432, | |
| "rewards/rejected": -4.436718940734863, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.6672519754170325, | |
| "grad_norm": 61.33023200058874, | |
| "learning_rate": 8.33406496927129e-07, | |
| "logits/chosen": -2.161328077316284, | |
| "logits/rejected": -2.1996092796325684, | |
| "logps/chosen": -439.3999938964844, | |
| "logps/rejected": -315.6499938964844, | |
| "loss": 0.1935, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -0.4091750979423523, | |
| "rewards/margins": 3.9925780296325684, | |
| "rewards/rejected": -4.400781154632568, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.6760316066725197, | |
| "grad_norm": 37.83314141749024, | |
| "learning_rate": 8.312115891132572e-07, | |
| "logits/chosen": -2.27734375, | |
| "logits/rejected": -2.2066407203674316, | |
| "logps/chosen": -433.45001220703125, | |
| "logps/rejected": -318.8999938964844, | |
| "loss": 0.1838, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/chosen": -0.5521606206893921, | |
| "rewards/margins": 4.80859375, | |
| "rewards/rejected": -5.366406440734863, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.684811237928007, | |
| "grad_norm": 69.75077577447426, | |
| "learning_rate": 8.290166812993854e-07, | |
| "logits/chosen": -2.1675782203674316, | |
| "logits/rejected": -2.2613282203674316, | |
| "logps/chosen": -456.95001220703125, | |
| "logps/rejected": -315.75, | |
| "loss": 0.1864, | |
| "rewards/accuracies": 0.956250011920929, | |
| "rewards/chosen": -0.14486083388328552, | |
| "rewards/margins": 5.21484375, | |
| "rewards/rejected": -5.359375, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.6935908691834943, | |
| "grad_norm": 90.50776580259476, | |
| "learning_rate": 8.268217734855135e-07, | |
| "logits/chosen": -2.2406249046325684, | |
| "logits/rejected": -2.294921875, | |
| "logps/chosen": -472.8999938964844, | |
| "logps/rejected": -342.6499938964844, | |
| "loss": 0.1657, | |
| "rewards/accuracies": 0.9312499761581421, | |
| "rewards/chosen": -0.4073547422885895, | |
| "rewards/margins": 4.453906059265137, | |
| "rewards/rejected": -4.85546875, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.7023705004389815, | |
| "grad_norm": 45.30350511046526, | |
| "learning_rate": 8.246268656716417e-07, | |
| "logits/chosen": -2.313281297683716, | |
| "logits/rejected": -2.301562547683716, | |
| "logps/chosen": -475.8999938964844, | |
| "logps/rejected": -351.1000061035156, | |
| "loss": 0.1385, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -0.6451660394668579, | |
| "rewards/margins": 4.946875095367432, | |
| "rewards/rejected": -5.586718559265137, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.7111501316944688, | |
| "grad_norm": 62.7359014369885, | |
| "learning_rate": 8.2243195785777e-07, | |
| "logits/chosen": -2.2718749046325684, | |
| "logits/rejected": -2.317578077316284, | |
| "logps/chosen": -471.5, | |
| "logps/rejected": -359.25, | |
| "loss": 0.1708, | |
| "rewards/accuracies": 0.9125000238418579, | |
| "rewards/chosen": -0.73858642578125, | |
| "rewards/margins": 4.953125, | |
| "rewards/rejected": -5.6953125, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.7199297629499561, | |
| "grad_norm": 64.39391927677985, | |
| "learning_rate": 8.20237050043898e-07, | |
| "logits/chosen": -2.265625, | |
| "logits/rejected": -2.338671922683716, | |
| "logps/chosen": -472.8999938964844, | |
| "logps/rejected": -308.8500061035156, | |
| "loss": 0.1875, | |
| "rewards/accuracies": 0.918749988079071, | |
| "rewards/chosen": -0.84967041015625, | |
| "rewards/margins": 4.270312309265137, | |
| "rewards/rejected": -5.124218940734863, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.7287093942054433, | |
| "grad_norm": 46.31488820683875, | |
| "learning_rate": 8.180421422300263e-07, | |
| "logits/chosen": -2.2105469703674316, | |
| "logits/rejected": -2.2093749046325684, | |
| "logps/chosen": -505.6499938964844, | |
| "logps/rejected": -330.3500061035156, | |
| "loss": 0.2332, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": 0.0020751953125, | |
| "rewards/margins": 4.325390815734863, | |
| "rewards/rejected": -4.324999809265137, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.7374890254609306, | |
| "grad_norm": 68.37218163162461, | |
| "learning_rate": 8.158472344161544e-07, | |
| "logits/chosen": -2.1753907203674316, | |
| "logits/rejected": -2.266406297683716, | |
| "logps/chosen": -474.20001220703125, | |
| "logps/rejected": -354.1499938964844, | |
| "loss": 0.1858, | |
| "rewards/accuracies": 0.918749988079071, | |
| "rewards/chosen": 0.12888184189796448, | |
| "rewards/margins": 4.827343940734863, | |
| "rewards/rejected": -4.699999809265137, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.746268656716418, | |
| "grad_norm": 70.37051115875163, | |
| "learning_rate": 8.136523266022827e-07, | |
| "logits/chosen": -2.2289061546325684, | |
| "logits/rejected": -2.350781202316284, | |
| "logps/chosen": -455.5, | |
| "logps/rejected": -324.6499938964844, | |
| "loss": 0.1995, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -0.42094725370407104, | |
| "rewards/margins": 4.266406059265137, | |
| "rewards/rejected": -4.69140625, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.7550482879719052, | |
| "grad_norm": 23.265883596850873, | |
| "learning_rate": 8.114574187884108e-07, | |
| "logits/chosen": -2.145703077316284, | |
| "logits/rejected": -2.197265625, | |
| "logps/chosen": -440.3999938964844, | |
| "logps/rejected": -330.79998779296875, | |
| "loss": 0.1503, | |
| "rewards/accuracies": 0.9125000238418579, | |
| "rewards/chosen": 0.5169433355331421, | |
| "rewards/margins": 4.733593940734863, | |
| "rewards/rejected": -4.217968940734863, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.7638279192273925, | |
| "grad_norm": 39.06071564098125, | |
| "learning_rate": 8.09262510974539e-07, | |
| "logits/chosen": -2.132031202316284, | |
| "logits/rejected": -2.100390672683716, | |
| "logps/chosen": -433.1000061035156, | |
| "logps/rejected": -332.70001220703125, | |
| "loss": 0.182, | |
| "rewards/accuracies": 0.9125000238418579, | |
| "rewards/chosen": 0.3395141661167145, | |
| "rewards/margins": 4.525781154632568, | |
| "rewards/rejected": -4.186718940734863, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.7726075504828798, | |
| "grad_norm": 58.16494119873086, | |
| "learning_rate": 8.070676031606672e-07, | |
| "logits/chosen": -2.0980467796325684, | |
| "logits/rejected": -2.188281297683716, | |
| "logps/chosen": -477.0, | |
| "logps/rejected": -340.3999938964844, | |
| "loss": 0.1971, | |
| "rewards/accuracies": 0.9125000238418579, | |
| "rewards/chosen": 0.788586437702179, | |
| "rewards/margins": 4.595312595367432, | |
| "rewards/rejected": -3.807421922683716, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.781387181738367, | |
| "grad_norm": 54.01268795395281, | |
| "learning_rate": 8.048726953467954e-07, | |
| "logits/chosen": -2.2171874046325684, | |
| "logits/rejected": -2.188281297683716, | |
| "logps/chosen": -417.3500061035156, | |
| "logps/rejected": -315.1000061035156, | |
| "loss": 0.186, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": 0.21174316108226776, | |
| "rewards/margins": 4.379296779632568, | |
| "rewards/rejected": -4.167578220367432, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.7901668129938543, | |
| "grad_norm": 27.117656990648012, | |
| "learning_rate": 8.026777875329235e-07, | |
| "logits/chosen": -2.2152342796325684, | |
| "logits/rejected": -2.3082032203674316, | |
| "logps/chosen": -422.3500061035156, | |
| "logps/rejected": -318.95001220703125, | |
| "loss": 0.2889, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": -0.11767578125, | |
| "rewards/margins": 4.21875, | |
| "rewards/rejected": -4.3359375, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.7989464442493416, | |
| "grad_norm": 52.013178205321424, | |
| "learning_rate": 8.004828797190518e-07, | |
| "logits/chosen": -2.166015625, | |
| "logits/rejected": -2.127734422683716, | |
| "logps/chosen": -420.6000061035156, | |
| "logps/rejected": -326.75, | |
| "loss": 0.229, | |
| "rewards/accuracies": 0.9125000238418579, | |
| "rewards/chosen": -0.07258300483226776, | |
| "rewards/margins": 4.283593654632568, | |
| "rewards/rejected": -4.352343559265137, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.8077260755048288, | |
| "grad_norm": 42.8864089493482, | |
| "learning_rate": 7.982879719051799e-07, | |
| "logits/chosen": -2.176562547683716, | |
| "logits/rejected": -2.2035155296325684, | |
| "logps/chosen": -516.2999877929688, | |
| "logps/rejected": -329.29998779296875, | |
| "loss": 0.1272, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -0.08149413764476776, | |
| "rewards/margins": 4.810937404632568, | |
| "rewards/rejected": -4.896093845367432, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.8165057067603161, | |
| "grad_norm": 81.29850288294224, | |
| "learning_rate": 7.960930640913082e-07, | |
| "logits/chosen": -2.1382813453674316, | |
| "logits/rejected": -2.200000047683716, | |
| "logps/chosen": -464.8500061035156, | |
| "logps/rejected": -386.79998779296875, | |
| "loss": 0.1435, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -0.658398449420929, | |
| "rewards/margins": 5.216406345367432, | |
| "rewards/rejected": -5.87109375, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.8252853380158033, | |
| "grad_norm": 23.07930640174056, | |
| "learning_rate": 7.938981562774362e-07, | |
| "logits/chosen": -2.2132811546325684, | |
| "logits/rejected": -2.2265625, | |
| "logps/chosen": -451.3500061035156, | |
| "logps/rejected": -341.95001220703125, | |
| "loss": 0.172, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -1.641845703125, | |
| "rewards/margins": 5.371874809265137, | |
| "rewards/rejected": -7.012499809265137, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.8340649692712906, | |
| "grad_norm": 115.8755700408629, | |
| "learning_rate": 7.917032484635645e-07, | |
| "logits/chosen": -2.2152342796325684, | |
| "logits/rejected": -2.204296827316284, | |
| "logps/chosen": -462.70001220703125, | |
| "logps/rejected": -384.8999938964844, | |
| "loss": 0.1867, | |
| "rewards/accuracies": 0.9125000238418579, | |
| "rewards/chosen": -2.431445360183716, | |
| "rewards/margins": 5.3515625, | |
| "rewards/rejected": -7.782812595367432, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.8428446005267779, | |
| "grad_norm": 25.653630387821707, | |
| "learning_rate": 7.895083406496926e-07, | |
| "logits/chosen": -2.328906297683716, | |
| "logits/rejected": -2.303515672683716, | |
| "logps/chosen": -490.75, | |
| "logps/rejected": -339.6499938964844, | |
| "loss": 0.2299, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/chosen": -2.1357421875, | |
| "rewards/margins": 5.127343654632568, | |
| "rewards/rejected": -7.268750190734863, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.8516242317822651, | |
| "grad_norm": 95.14751058027227, | |
| "learning_rate": 7.873134328358209e-07, | |
| "logits/chosen": -2.1656250953674316, | |
| "logits/rejected": -2.266406297683716, | |
| "logps/chosen": -484.3999938964844, | |
| "logps/rejected": -329.70001220703125, | |
| "loss": 0.1852, | |
| "rewards/accuracies": 0.893750011920929, | |
| "rewards/chosen": -1.1531493663787842, | |
| "rewards/margins": 5.088281154632568, | |
| "rewards/rejected": -6.245312690734863, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.8604038630377524, | |
| "grad_norm": 28.295653106475484, | |
| "learning_rate": 7.85118525021949e-07, | |
| "logits/chosen": -2.206249952316284, | |
| "logits/rejected": -2.283203125, | |
| "logps/chosen": -517.25, | |
| "logps/rejected": -332.8999938964844, | |
| "loss": 0.2181, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": -0.5602051019668579, | |
| "rewards/margins": 4.998437404632568, | |
| "rewards/rejected": -5.55859375, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.8691834942932397, | |
| "grad_norm": 115.92989249439722, | |
| "learning_rate": 7.829236172080772e-07, | |
| "logits/chosen": -2.171875, | |
| "logits/rejected": -2.2203125953674316, | |
| "logps/chosen": -470.6000061035156, | |
| "logps/rejected": -338.5, | |
| "loss": 0.1899, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/chosen": -0.2669921815395355, | |
| "rewards/margins": 4.7734375, | |
| "rewards/rejected": -5.040625095367432, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.8779631255487269, | |
| "grad_norm": 130.84667844083398, | |
| "learning_rate": 7.807287093942054e-07, | |
| "logits/chosen": -2.073437452316284, | |
| "logits/rejected": -2.165234327316284, | |
| "logps/chosen": -475.1000061035156, | |
| "logps/rejected": -363.6000061035156, | |
| "loss": 0.1733, | |
| "rewards/accuracies": 0.956250011920929, | |
| "rewards/chosen": -0.2905029356479645, | |
| "rewards/margins": 5.186718940734863, | |
| "rewards/rejected": -5.477343559265137, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.8867427568042142, | |
| "grad_norm": 37.77710702099301, | |
| "learning_rate": 7.785338015803336e-07, | |
| "logits/chosen": -2.264843702316284, | |
| "logits/rejected": -2.3125, | |
| "logps/chosen": -441.20001220703125, | |
| "logps/rejected": -335.3999938964844, | |
| "loss": 0.2014, | |
| "rewards/accuracies": 0.9437500238418579, | |
| "rewards/chosen": -0.19730225205421448, | |
| "rewards/margins": 4.876562595367432, | |
| "rewards/rejected": -5.076562404632568, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.8955223880597015, | |
| "grad_norm": 30.576564253058173, | |
| "learning_rate": 7.763388937664617e-07, | |
| "logits/chosen": -2.2066407203674316, | |
| "logits/rejected": -2.313281297683716, | |
| "logps/chosen": -487.29998779296875, | |
| "logps/rejected": -327.1000061035156, | |
| "loss": 0.1331, | |
| "rewards/accuracies": 0.9437500238418579, | |
| "rewards/chosen": -0.11490478366613388, | |
| "rewards/margins": 4.994531154632568, | |
| "rewards/rejected": -5.107031345367432, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.9043020193151887, | |
| "grad_norm": 57.88214656404599, | |
| "learning_rate": 7.7414398595259e-07, | |
| "logits/chosen": -2.331249952316284, | |
| "logits/rejected": -2.314453125, | |
| "logps/chosen": -412.75, | |
| "logps/rejected": -349.20001220703125, | |
| "loss": 0.1739, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": -0.825244128704071, | |
| "rewards/margins": 5.032812595367432, | |
| "rewards/rejected": -5.860156059265137, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.913081650570676, | |
| "grad_norm": 23.925090656985137, | |
| "learning_rate": 7.719490781387181e-07, | |
| "logits/chosen": -2.193359375, | |
| "logits/rejected": -2.276171922683716, | |
| "logps/chosen": -449.0, | |
| "logps/rejected": -335.70001220703125, | |
| "loss": 0.17, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -0.765881359577179, | |
| "rewards/margins": 5.408593654632568, | |
| "rewards/rejected": -6.17578125, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.9218612818261633, | |
| "grad_norm": 83.0727283656847, | |
| "learning_rate": 7.697541703248464e-07, | |
| "logits/chosen": -2.247265577316284, | |
| "logits/rejected": -2.362499952316284, | |
| "logps/chosen": -452.04998779296875, | |
| "logps/rejected": -311.1000061035156, | |
| "loss": 0.2202, | |
| "rewards/accuracies": 0.9125000238418579, | |
| "rewards/chosen": -0.20871582627296448, | |
| "rewards/margins": 4.963281154632568, | |
| "rewards/rejected": -5.170312404632568, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.9306409130816505, | |
| "grad_norm": 96.27268091601239, | |
| "learning_rate": 7.675592625109744e-07, | |
| "logits/chosen": -2.1890625953674316, | |
| "logits/rejected": -2.270312547683716, | |
| "logps/chosen": -452.70001220703125, | |
| "logps/rejected": -333.8500061035156, | |
| "loss": 0.1036, | |
| "rewards/accuracies": 0.956250011920929, | |
| "rewards/chosen": -0.754589855670929, | |
| "rewards/margins": 5.392968654632568, | |
| "rewards/rejected": -6.146093845367432, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.9394205443371378, | |
| "grad_norm": 91.99247970516416, | |
| "learning_rate": 7.653643546971027e-07, | |
| "logits/chosen": -2.2914061546325684, | |
| "logits/rejected": -2.3394532203674316, | |
| "logps/chosen": -438.25, | |
| "logps/rejected": -351.75, | |
| "loss": 0.2778, | |
| "rewards/accuracies": 0.8687499761581421, | |
| "rewards/chosen": -1.72076416015625, | |
| "rewards/margins": 4.728125095367432, | |
| "rewards/rejected": -6.448437690734863, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.9482001755926251, | |
| "grad_norm": 9.422589898632934, | |
| "learning_rate": 7.631694468832308e-07, | |
| "logits/chosen": -2.285937547683716, | |
| "logits/rejected": -2.401562452316284, | |
| "logps/chosen": -471.3999938964844, | |
| "logps/rejected": -344.95001220703125, | |
| "loss": 0.1715, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -1.692724585533142, | |
| "rewards/margins": 5.327343940734863, | |
| "rewards/rejected": -7.0234375, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.9569798068481123, | |
| "grad_norm": 19.40091159798641, | |
| "learning_rate": 7.60974539069359e-07, | |
| "logits/chosen": -2.3257813453674316, | |
| "logits/rejected": -2.374218702316284, | |
| "logps/chosen": -494.70001220703125, | |
| "logps/rejected": -354.3999938964844, | |
| "loss": 0.1768, | |
| "rewards/accuracies": 0.9125000238418579, | |
| "rewards/chosen": -0.658764660358429, | |
| "rewards/margins": 5.016406059265137, | |
| "rewards/rejected": -5.670312404632568, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.9657594381035997, | |
| "grad_norm": 24.834680629028092, | |
| "learning_rate": 7.587796312554873e-07, | |
| "logits/chosen": -2.194531202316284, | |
| "logits/rejected": -2.2320313453674316, | |
| "logps/chosen": -450.1000061035156, | |
| "logps/rejected": -335.54998779296875, | |
| "loss": 0.1382, | |
| "rewards/accuracies": 0.956250011920929, | |
| "rewards/chosen": -0.794995129108429, | |
| "rewards/margins": 5.7734375, | |
| "rewards/rejected": -6.565625190734863, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.974539069359087, | |
| "grad_norm": 109.26386816223865, | |
| "learning_rate": 7.565847234416154e-07, | |
| "logits/chosen": -2.2054686546325684, | |
| "logits/rejected": -2.198046922683716, | |
| "logps/chosen": -441.20001220703125, | |
| "logps/rejected": -355.6499938964844, | |
| "loss": 0.2044, | |
| "rewards/accuracies": 0.9125000238418579, | |
| "rewards/chosen": -1.019433617591858, | |
| "rewards/margins": 5.417187690734863, | |
| "rewards/rejected": -6.432812690734863, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.9833187006145742, | |
| "grad_norm": 92.40061486472263, | |
| "learning_rate": 7.543898156277437e-07, | |
| "logits/chosen": -2.3023438453674316, | |
| "logits/rejected": -2.2542967796325684, | |
| "logps/chosen": -435.79998779296875, | |
| "logps/rejected": -353.29998779296875, | |
| "loss": 0.2875, | |
| "rewards/accuracies": 0.8812500238418579, | |
| "rewards/chosen": -1.7038085460662842, | |
| "rewards/margins": 4.735156059265137, | |
| "rewards/rejected": -6.439062595367432, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.9920983318700615, | |
| "grad_norm": 24.081621299054948, | |
| "learning_rate": 7.521949078138717e-07, | |
| "logits/chosen": -2.21484375, | |
| "logits/rejected": -2.35546875, | |
| "logps/chosen": -475.20001220703125, | |
| "logps/rejected": -367.0, | |
| "loss": 0.133, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -1.2808716297149658, | |
| "rewards/margins": 5.571093559265137, | |
| "rewards/rejected": -6.848437309265137, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.0008779631255487, | |
| "grad_norm": 6.542707754283824, | |
| "learning_rate": 7.5e-07, | |
| "logits/chosen": -2.3101563453674316, | |
| "logits/rejected": -2.3359375, | |
| "logps/chosen": -491.29998779296875, | |
| "logps/rejected": -352.25, | |
| "loss": 0.2204, | |
| "rewards/accuracies": 0.9125000238418579, | |
| "rewards/chosen": -0.9757324457168579, | |
| "rewards/margins": 5.400000095367432, | |
| "rewards/rejected": -6.376562595367432, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.009657594381036, | |
| "grad_norm": 8.22464752681467, | |
| "learning_rate": 7.478050921861282e-07, | |
| "logits/chosen": -2.3226561546325684, | |
| "logits/rejected": -2.3960938453674316, | |
| "logps/chosen": -445.0, | |
| "logps/rejected": -348.29998779296875, | |
| "loss": 0.0239, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.8250488042831421, | |
| "rewards/margins": 6.173437595367432, | |
| "rewards/rejected": -6.996874809265137, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.0184372256365233, | |
| "grad_norm": 41.28518142183959, | |
| "learning_rate": 7.456101843722563e-07, | |
| "logits/chosen": -2.25390625, | |
| "logits/rejected": -2.2808594703674316, | |
| "logps/chosen": -423.8500061035156, | |
| "logps/rejected": -352.0, | |
| "loss": 0.0459, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": -0.4247680604457855, | |
| "rewards/margins": 6.464062690734863, | |
| "rewards/rejected": -6.889062404632568, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.0272168568920106, | |
| "grad_norm": 6.788851461800516, | |
| "learning_rate": 7.434152765583845e-07, | |
| "logits/chosen": -2.253124952316284, | |
| "logits/rejected": -2.298046827316284, | |
| "logps/chosen": -467.3999938964844, | |
| "logps/rejected": -344.20001220703125, | |
| "loss": 0.0384, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -0.13165283203125, | |
| "rewards/margins": 6.7421875, | |
| "rewards/rejected": -6.875, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.035996488147498, | |
| "grad_norm": 21.063475879709234, | |
| "learning_rate": 7.412203687445126e-07, | |
| "logits/chosen": -2.20703125, | |
| "logits/rejected": -2.329296827316284, | |
| "logps/chosen": -453.29998779296875, | |
| "logps/rejected": -335.70001220703125, | |
| "loss": 0.0376, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": 0.20769043266773224, | |
| "rewards/margins": 7.021874904632568, | |
| "rewards/rejected": -6.810937404632568, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.044776119402985, | |
| "grad_norm": 79.63007232998406, | |
| "learning_rate": 7.390254609306409e-07, | |
| "logits/chosen": -2.280468702316284, | |
| "logits/rejected": -2.380859375, | |
| "logps/chosen": -430.70001220703125, | |
| "logps/rejected": -341.6000061035156, | |
| "loss": 0.0512, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": 0.20472411811351776, | |
| "rewards/margins": 6.78125, | |
| "rewards/rejected": -6.576562404632568, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.0535557506584723, | |
| "grad_norm": 25.593289570345775, | |
| "learning_rate": 7.368305531167692e-07, | |
| "logits/chosen": -2.227734327316284, | |
| "logits/rejected": -2.4281249046325684, | |
| "logps/chosen": -452.29998779296875, | |
| "logps/rejected": -384.0, | |
| "loss": 0.0357, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -0.604321300983429, | |
| "rewards/margins": 8.114062309265137, | |
| "rewards/rejected": -8.712499618530273, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.0623353819139596, | |
| "grad_norm": 2.353433977029733, | |
| "learning_rate": 7.346356453028972e-07, | |
| "logits/chosen": -2.303515672683716, | |
| "logits/rejected": -2.484375, | |
| "logps/chosen": -466.1000061035156, | |
| "logps/rejected": -357.45001220703125, | |
| "loss": 0.0377, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -0.4786376953125, | |
| "rewards/margins": 7.521874904632568, | |
| "rewards/rejected": -7.9921875, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.071115013169447, | |
| "grad_norm": 3.5237004339105806, | |
| "learning_rate": 7.324407374890255e-07, | |
| "logits/chosen": -2.309765577316284, | |
| "logits/rejected": -2.4203124046325684, | |
| "logps/chosen": -458.70001220703125, | |
| "logps/rejected": -345.25, | |
| "loss": 0.0287, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -0.2781005799770355, | |
| "rewards/margins": 6.956250190734863, | |
| "rewards/rejected": -7.228125095367432, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.0798946444249342, | |
| "grad_norm": 14.636595390488218, | |
| "learning_rate": 7.302458296751536e-07, | |
| "logits/chosen": -2.471874952316284, | |
| "logits/rejected": -2.535937547683716, | |
| "logps/chosen": -451.0, | |
| "logps/rejected": -338.95001220703125, | |
| "loss": 0.0188, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.637646496295929, | |
| "rewards/margins": 7.251562595367432, | |
| "rewards/rejected": -7.889062404632568, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 1.0886742756804215, | |
| "grad_norm": 24.305981968774155, | |
| "learning_rate": 7.280509218612819e-07, | |
| "logits/chosen": -2.372265577316284, | |
| "logits/rejected": -2.434765577316284, | |
| "logps/chosen": -438.8999938964844, | |
| "logps/rejected": -368.79998779296875, | |
| "loss": 0.0254, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.565893530845642, | |
| "rewards/margins": 7.984375, | |
| "rewards/rejected": -9.551562309265137, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.0974539069359086, | |
| "grad_norm": 6.908149573647215, | |
| "learning_rate": 7.258560140474099e-07, | |
| "logits/chosen": -2.4320311546325684, | |
| "logits/rejected": -2.5992188453674316, | |
| "logps/chosen": -484.79998779296875, | |
| "logps/rejected": -365.95001220703125, | |
| "loss": 0.0308, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -1.1583251953125, | |
| "rewards/margins": 8.490625381469727, | |
| "rewards/rejected": -9.653124809265137, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.106233538191396, | |
| "grad_norm": 7.488206058151286, | |
| "learning_rate": 7.236611062335382e-07, | |
| "logits/chosen": -2.481640577316284, | |
| "logits/rejected": -2.547656297683716, | |
| "logps/chosen": -420.3999938964844, | |
| "logps/rejected": -364.8999938964844, | |
| "loss": 0.0359, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -0.4010253846645355, | |
| "rewards/margins": 8.009374618530273, | |
| "rewards/rejected": -8.410937309265137, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.1150131694468832, | |
| "grad_norm": 62.40763934297754, | |
| "learning_rate": 7.214661984196664e-07, | |
| "logits/chosen": -2.417187452316284, | |
| "logits/rejected": -2.485546827316284, | |
| "logps/chosen": -422.8999938964844, | |
| "logps/rejected": -380.8999938964844, | |
| "loss": 0.0562, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": -0.884228527545929, | |
| "rewards/margins": 6.974999904632568, | |
| "rewards/rejected": -7.856249809265137, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.1237928007023705, | |
| "grad_norm": 3.1149240594299874, | |
| "learning_rate": 7.192712906057946e-07, | |
| "logits/chosen": -2.407031297683716, | |
| "logits/rejected": -2.5687499046325684, | |
| "logps/chosen": -468.3500061035156, | |
| "logps/rejected": -362.1499938964844, | |
| "loss": 0.05, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": -0.22341307997703552, | |
| "rewards/margins": 7.40625, | |
| "rewards/rejected": -7.629687309265137, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.1325724319578578, | |
| "grad_norm": 9.019174379006817, | |
| "learning_rate": 7.170763827919227e-07, | |
| "logits/chosen": -2.330859422683716, | |
| "logits/rejected": -2.530468702316284, | |
| "logps/chosen": -480.1000061035156, | |
| "logps/rejected": -358.1000061035156, | |
| "loss": 0.0456, | |
| "rewards/accuracies": 0.981249988079071, | |
| "rewards/chosen": -2.103515625, | |
| "rewards/margins": 7.824999809265137, | |
| "rewards/rejected": -9.931249618530273, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 1.1413520632133451, | |
| "grad_norm": 51.7296547225416, | |
| "learning_rate": 7.148814749780509e-07, | |
| "logits/chosen": -2.400390625, | |
| "logits/rejected": -2.5234375, | |
| "logps/chosen": -423.5, | |
| "logps/rejected": -331.8999938964844, | |
| "loss": 0.0611, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": 0.03999023512005806, | |
| "rewards/margins": 7.240624904632568, | |
| "rewards/rejected": -7.203125, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.1501316944688322, | |
| "grad_norm": 58.973834938303774, | |
| "learning_rate": 7.126865671641791e-07, | |
| "logits/chosen": -2.443359375, | |
| "logits/rejected": -2.5875000953674316, | |
| "logps/chosen": -449.29998779296875, | |
| "logps/rejected": -337.20001220703125, | |
| "loss": 0.1015, | |
| "rewards/accuracies": 0.981249988079071, | |
| "rewards/chosen": -0.162109375, | |
| "rewards/margins": 6.690625190734863, | |
| "rewards/rejected": -6.845312595367432, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 1.1589113257243195, | |
| "grad_norm": 10.928685659671538, | |
| "learning_rate": 7.104916593503074e-07, | |
| "logits/chosen": -2.4921875, | |
| "logits/rejected": -2.639843702316284, | |
| "logps/chosen": -477.79998779296875, | |
| "logps/rejected": -373.45001220703125, | |
| "loss": 0.0348, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -0.7756103277206421, | |
| "rewards/margins": 7.276562690734863, | |
| "rewards/rejected": -8.048437118530273, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.1676909569798068, | |
| "grad_norm": 17.470305548338914, | |
| "learning_rate": 7.082967515364354e-07, | |
| "logits/chosen": -2.4144530296325684, | |
| "logits/rejected": -2.4917969703674316, | |
| "logps/chosen": -461.79998779296875, | |
| "logps/rejected": -378.0, | |
| "loss": 0.0258, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -0.6912902593612671, | |
| "rewards/margins": 7.7421875, | |
| "rewards/rejected": -8.421875, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 1.1764705882352942, | |
| "grad_norm": 10.471402034890781, | |
| "learning_rate": 7.061018437225637e-07, | |
| "logits/chosen": -2.36328125, | |
| "logits/rejected": -2.473437547683716, | |
| "logps/chosen": -430.3999938964844, | |
| "logps/rejected": -379.6000061035156, | |
| "loss": 0.022, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.176000952720642, | |
| "rewards/margins": 7.246874809265137, | |
| "rewards/rejected": -8.426562309265137, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 1.1852502194907815, | |
| "grad_norm": 7.716569511212797, | |
| "learning_rate": 7.039069359086918e-07, | |
| "logits/chosen": -2.414843797683716, | |
| "logits/rejected": -2.467968702316284, | |
| "logps/chosen": -484.04998779296875, | |
| "logps/rejected": -368.8999938964844, | |
| "loss": 0.0496, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -0.749951183795929, | |
| "rewards/margins": 7.645312309265137, | |
| "rewards/rejected": -8.393750190734863, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.1940298507462686, | |
| "grad_norm": 7.332905932679851, | |
| "learning_rate": 7.0171202809482e-07, | |
| "logits/chosen": -2.4296875, | |
| "logits/rejected": -2.659374952316284, | |
| "logps/chosen": -506.79998779296875, | |
| "logps/rejected": -396.70001220703125, | |
| "loss": 0.0295, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -1.0792968273162842, | |
| "rewards/margins": 7.714062690734863, | |
| "rewards/rejected": -8.800000190734863, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 1.2028094820017559, | |
| "grad_norm": 36.022927358889355, | |
| "learning_rate": 6.995171202809481e-07, | |
| "logits/chosen": -2.48046875, | |
| "logits/rejected": -2.6460938453674316, | |
| "logps/chosen": -403.20001220703125, | |
| "logps/rejected": -373.75, | |
| "loss": 0.0339, | |
| "rewards/accuracies": 0.981249988079071, | |
| "rewards/chosen": -1.180517554283142, | |
| "rewards/margins": 7.921875, | |
| "rewards/rejected": -9.095312118530273, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 1.2115891132572432, | |
| "grad_norm": 10.415996727273347, | |
| "learning_rate": 6.973222124670764e-07, | |
| "logits/chosen": -2.440624952316284, | |
| "logits/rejected": -2.660937547683716, | |
| "logps/chosen": -423.70001220703125, | |
| "logps/rejected": -351.95001220703125, | |
| "loss": 0.0193, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -0.9304443597793579, | |
| "rewards/margins": 8.326562881469727, | |
| "rewards/rejected": -9.254687309265137, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 1.2203687445127305, | |
| "grad_norm": 2.6800723062854965, | |
| "learning_rate": 6.951273046532046e-07, | |
| "logits/chosen": -2.440624952316284, | |
| "logits/rejected": -2.5093750953674316, | |
| "logps/chosen": -463.1000061035156, | |
| "logps/rejected": -376.20001220703125, | |
| "loss": 0.0259, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -0.8814361691474915, | |
| "rewards/margins": 7.842187404632568, | |
| "rewards/rejected": -8.7265625, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 1.2291483757682178, | |
| "grad_norm": 3.333937633914336, | |
| "learning_rate": 6.929323968393327e-07, | |
| "logits/chosen": -2.477343797683716, | |
| "logits/rejected": -2.6484375, | |
| "logps/chosen": -434.6000061035156, | |
| "logps/rejected": -327.95001220703125, | |
| "loss": 0.0239, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -1.211328148841858, | |
| "rewards/margins": 7.989062309265137, | |
| "rewards/rejected": -9.1953125, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.237928007023705, | |
| "grad_norm": 7.236800984080168, | |
| "learning_rate": 6.907374890254609e-07, | |
| "logits/chosen": -2.4906249046325684, | |
| "logits/rejected": -2.651171922683716, | |
| "logps/chosen": -431.79998779296875, | |
| "logps/rejected": -377.20001220703125, | |
| "loss": 0.057, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": -2.7494139671325684, | |
| "rewards/margins": 8.004687309265137, | |
| "rewards/rejected": -10.75, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 1.2467076382791924, | |
| "grad_norm": 89.68578759479496, | |
| "learning_rate": 6.885425812115891e-07, | |
| "logits/chosen": -2.45703125, | |
| "logits/rejected": -2.631640672683716, | |
| "logps/chosen": -431.8999938964844, | |
| "logps/rejected": -333.5, | |
| "loss": 0.0424, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -1.768823266029358, | |
| "rewards/margins": 8.817187309265137, | |
| "rewards/rejected": -10.579687118530273, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 1.2554872695346795, | |
| "grad_norm": 53.9181708740632, | |
| "learning_rate": 6.863476733977173e-07, | |
| "logits/chosen": -2.473437547683716, | |
| "logits/rejected": -2.6742186546325684, | |
| "logps/chosen": -445.95001220703125, | |
| "logps/rejected": -347.8999938964844, | |
| "loss": 0.0283, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.28544920682907104, | |
| "rewards/margins": 7.209374904632568, | |
| "rewards/rejected": -6.915625095367432, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 1.2642669007901668, | |
| "grad_norm": 36.434854944054194, | |
| "learning_rate": 6.841527655838455e-07, | |
| "logits/chosen": -2.4273438453674316, | |
| "logits/rejected": -2.5718750953674316, | |
| "logps/chosen": -393.6499938964844, | |
| "logps/rejected": -352.54998779296875, | |
| "loss": 0.0599, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": -0.8592987060546875, | |
| "rewards/margins": 7.4375, | |
| "rewards/rejected": -8.293749809265137, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 1.273046532045654, | |
| "grad_norm": 20.67431604894428, | |
| "learning_rate": 6.819578577699736e-07, | |
| "logits/chosen": -2.5101561546325684, | |
| "logits/rejected": -2.667187452316284, | |
| "logps/chosen": -483.3999938964844, | |
| "logps/rejected": -385.54998779296875, | |
| "loss": 0.1111, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": -1.441137671470642, | |
| "rewards/margins": 8.453125, | |
| "rewards/rejected": -9.901562690734863, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.2818261633011414, | |
| "grad_norm": 5.114805909142126, | |
| "learning_rate": 6.797629499561019e-07, | |
| "logits/chosen": -2.419140577316284, | |
| "logits/rejected": -2.6820311546325684, | |
| "logps/chosen": -501.1000061035156, | |
| "logps/rejected": -376.1000061035156, | |
| "loss": 0.0536, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": -2.1053709983825684, | |
| "rewards/margins": 7.839062690734863, | |
| "rewards/rejected": -9.946874618530273, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 1.2906057945566287, | |
| "grad_norm": 67.90946806886183, | |
| "learning_rate": 6.7756804214223e-07, | |
| "logits/chosen": -2.391796827316284, | |
| "logits/rejected": -2.6578125953674316, | |
| "logps/chosen": -495.20001220703125, | |
| "logps/rejected": -385.04998779296875, | |
| "loss": 0.0534, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": -1.4423828125, | |
| "rewards/margins": 8.024999618530273, | |
| "rewards/rejected": -9.467187881469727, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 1.2993854258121158, | |
| "grad_norm": 5.842886581660366, | |
| "learning_rate": 6.753731343283582e-07, | |
| "logits/chosen": -2.4515624046325684, | |
| "logits/rejected": -2.633593797683716, | |
| "logps/chosen": -466.25, | |
| "logps/rejected": -351.45001220703125, | |
| "loss": 0.0498, | |
| "rewards/accuracies": 0.981249988079071, | |
| "rewards/chosen": -0.8983398675918579, | |
| "rewards/margins": 7.671875, | |
| "rewards/rejected": -8.564062118530273, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 1.308165057067603, | |
| "grad_norm": 51.42331842998411, | |
| "learning_rate": 6.731782265144864e-07, | |
| "logits/chosen": -2.4828124046325684, | |
| "logits/rejected": -2.587890625, | |
| "logps/chosen": -478.04998779296875, | |
| "logps/rejected": -361.8500061035156, | |
| "loss": 0.0868, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": -0.9744628667831421, | |
| "rewards/margins": 7.604687690734863, | |
| "rewards/rejected": -8.589062690734863, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 1.3169446883230904, | |
| "grad_norm": 29.35151098932549, | |
| "learning_rate": 6.709833187006146e-07, | |
| "logits/chosen": -2.450390577316284, | |
| "logits/rejected": -2.62890625, | |
| "logps/chosen": -452.3999938964844, | |
| "logps/rejected": -376.5, | |
| "loss": 0.0501, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": -1.2184326648712158, | |
| "rewards/margins": 7.607812404632568, | |
| "rewards/rejected": -8.829687118530273, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.3257243195785777, | |
| "grad_norm": 70.94386203474886, | |
| "learning_rate": 6.687884108867427e-07, | |
| "logits/chosen": -2.520312547683716, | |
| "logits/rejected": -2.586718797683716, | |
| "logps/chosen": -424.70001220703125, | |
| "logps/rejected": -373.3999938964844, | |
| "loss": 0.0401, | |
| "rewards/accuracies": 0.981249988079071, | |
| "rewards/chosen": -1.43011474609375, | |
| "rewards/margins": 8.6015625, | |
| "rewards/rejected": -10.028124809265137, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 1.334503950834065, | |
| "grad_norm": 7.153019837799803, | |
| "learning_rate": 6.665935030728709e-07, | |
| "logits/chosen": -2.5101561546325684, | |
| "logits/rejected": -2.599609375, | |
| "logps/chosen": -476.45001220703125, | |
| "logps/rejected": -377.29998779296875, | |
| "loss": 0.0388, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -1.012719750404358, | |
| "rewards/margins": 8.139062881469727, | |
| "rewards/rejected": -9.149999618530273, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 1.3432835820895521, | |
| "grad_norm": 12.472620043787545, | |
| "learning_rate": 6.643985952589991e-07, | |
| "logits/chosen": -2.526562452316284, | |
| "logits/rejected": -2.586718797683716, | |
| "logps/chosen": -453.3999938964844, | |
| "logps/rejected": -389.04998779296875, | |
| "loss": 0.0622, | |
| "rewards/accuracies": 0.981249988079071, | |
| "rewards/chosen": -0.970898449420929, | |
| "rewards/margins": 7.5078125, | |
| "rewards/rejected": -8.485937118530273, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 1.3520632133450394, | |
| "grad_norm": 34.93917929216004, | |
| "learning_rate": 6.622036874451273e-07, | |
| "logits/chosen": -2.4429688453674316, | |
| "logits/rejected": -2.542187452316284, | |
| "logps/chosen": -462.1000061035156, | |
| "logps/rejected": -384.1499938964844, | |
| "loss": 0.0327, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -0.6491333246231079, | |
| "rewards/margins": 7.932812690734863, | |
| "rewards/rejected": -8.582812309265137, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 1.3608428446005267, | |
| "grad_norm": 31.69065386752026, | |
| "learning_rate": 6.600087796312554e-07, | |
| "logits/chosen": -2.530468702316284, | |
| "logits/rejected": -2.643749952316284, | |
| "logps/chosen": -428.0, | |
| "logps/rejected": -347.04998779296875, | |
| "loss": 0.0812, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": -1.160363793373108, | |
| "rewards/margins": 7.675000190734863, | |
| "rewards/rejected": -8.839062690734863, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.369622475856014, | |
| "grad_norm": 37.12700415966545, | |
| "learning_rate": 6.578138718173837e-07, | |
| "logits/chosen": -2.553906202316284, | |
| "logits/rejected": -2.7085938453674316, | |
| "logps/chosen": -477.1499938964844, | |
| "logps/rejected": -364.3999938964844, | |
| "loss": 0.0502, | |
| "rewards/accuracies": 0.981249988079071, | |
| "rewards/chosen": -2.14990234375, | |
| "rewards/margins": 7.875, | |
| "rewards/rejected": -10.029687881469727, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 1.3784021071115014, | |
| "grad_norm": 2.414722031121448, | |
| "learning_rate": 6.556189640035118e-07, | |
| "logits/chosen": -2.5269532203674316, | |
| "logits/rejected": -2.6585936546325684, | |
| "logps/chosen": -457.79998779296875, | |
| "logps/rejected": -397.1000061035156, | |
| "loss": 0.0694, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": -2.302929639816284, | |
| "rewards/margins": 7.3125, | |
| "rewards/rejected": -9.612500190734863, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 1.3871817383669887, | |
| "grad_norm": 19.525703531957156, | |
| "learning_rate": 6.534240561896401e-07, | |
| "logits/chosen": -2.616406202316284, | |
| "logits/rejected": -2.7835936546325684, | |
| "logps/chosen": -461.0, | |
| "logps/rejected": -383.79998779296875, | |
| "loss": 0.0539, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -1.888281226158142, | |
| "rewards/margins": 7.506249904632568, | |
| "rewards/rejected": -9.395312309265137, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 1.395961369622476, | |
| "grad_norm": 2.9624857660743156, | |
| "learning_rate": 6.512291483757681e-07, | |
| "logits/chosen": -2.538281202316284, | |
| "logits/rejected": -2.813281297683716, | |
| "logps/chosen": -439.1000061035156, | |
| "logps/rejected": -342.29998779296875, | |
| "loss": 0.0346, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -1.653417944908142, | |
| "rewards/margins": 7.9453125, | |
| "rewards/rejected": -9.598437309265137, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 1.404741000877963, | |
| "grad_norm": 8.60748646976303, | |
| "learning_rate": 6.490342405618964e-07, | |
| "logits/chosen": -2.616406202316284, | |
| "logits/rejected": -2.72265625, | |
| "logps/chosen": -466.25, | |
| "logps/rejected": -348.79998779296875, | |
| "loss": 0.0614, | |
| "rewards/accuracies": 0.956250011920929, | |
| "rewards/chosen": -1.9064209461212158, | |
| "rewards/margins": 8.167187690734863, | |
| "rewards/rejected": -10.078125, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.4135206321334504, | |
| "grad_norm": 6.861518501910524, | |
| "learning_rate": 6.468393327480246e-07, | |
| "logits/chosen": -2.447265625, | |
| "logits/rejected": -2.660937547683716, | |
| "logps/chosen": -444.8999938964844, | |
| "logps/rejected": -371.70001220703125, | |
| "loss": 0.0209, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -0.6099609136581421, | |
| "rewards/margins": 8.2265625, | |
| "rewards/rejected": -8.84375, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 1.4223002633889377, | |
| "grad_norm": 9.835804113502677, | |
| "learning_rate": 6.446444249341528e-07, | |
| "logits/chosen": -2.4820313453674316, | |
| "logits/rejected": -2.59765625, | |
| "logps/chosen": -476.29998779296875, | |
| "logps/rejected": -406.79998779296875, | |
| "loss": 0.0651, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": -0.7592102289199829, | |
| "rewards/margins": 8.393750190734863, | |
| "rewards/rejected": -9.149999618530273, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 1.431079894644425, | |
| "grad_norm": 55.43335867914973, | |
| "learning_rate": 6.424495171202809e-07, | |
| "logits/chosen": -2.457812547683716, | |
| "logits/rejected": -2.585156202316284, | |
| "logps/chosen": -458.6000061035156, | |
| "logps/rejected": -384.1000061035156, | |
| "loss": 0.0505, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": -0.7320556640625, | |
| "rewards/margins": 7.793749809265137, | |
| "rewards/rejected": -8.53125, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 1.4398595258999123, | |
| "grad_norm": 30.97088906696782, | |
| "learning_rate": 6.402546093064091e-07, | |
| "logits/chosen": -2.5726561546325684, | |
| "logits/rejected": -2.6273436546325684, | |
| "logps/chosen": -437.8500061035156, | |
| "logps/rejected": -370.29998779296875, | |
| "loss": 0.0581, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -1.1980469226837158, | |
| "rewards/margins": 7.556250095367432, | |
| "rewards/rejected": -8.754687309265137, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 1.4486391571553994, | |
| "grad_norm": 47.70033602734044, | |
| "learning_rate": 6.380597014925373e-07, | |
| "logits/chosen": -2.4625000953674316, | |
| "logits/rejected": -2.6890625953674316, | |
| "logps/chosen": -445.20001220703125, | |
| "logps/rejected": -371.6000061035156, | |
| "loss": 0.0534, | |
| "rewards/accuracies": 0.981249988079071, | |
| "rewards/chosen": -1.353124976158142, | |
| "rewards/margins": 8.296875, | |
| "rewards/rejected": -9.642187118530273, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.4574187884108867, | |
| "grad_norm": 0.3432780004349103, | |
| "learning_rate": 6.358647936786655e-07, | |
| "logits/chosen": -2.5367188453674316, | |
| "logits/rejected": -2.7222657203674316, | |
| "logps/chosen": -451.20001220703125, | |
| "logps/rejected": -385.1499938964844, | |
| "loss": 0.0263, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -1.0514647960662842, | |
| "rewards/margins": 8.571874618530273, | |
| "rewards/rejected": -9.6171875, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 1.466198419666374, | |
| "grad_norm": 0.6019082419077392, | |
| "learning_rate": 6.336698858647936e-07, | |
| "logits/chosen": -2.498046875, | |
| "logits/rejected": -2.707812547683716, | |
| "logps/chosen": -478.75, | |
| "logps/rejected": -374.95001220703125, | |
| "loss": 0.0508, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": -1.147314429283142, | |
| "rewards/margins": 8.162500381469727, | |
| "rewards/rejected": -9.3046875, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 1.4749780509218613, | |
| "grad_norm": 24.064089504214195, | |
| "learning_rate": 6.314749780509219e-07, | |
| "logits/chosen": -2.473828077316284, | |
| "logits/rejected": -2.5875000953674316, | |
| "logps/chosen": -436.70001220703125, | |
| "logps/rejected": -392.3999938964844, | |
| "loss": 0.0591, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": -2.1285400390625, | |
| "rewards/margins": 8.354687690734863, | |
| "rewards/rejected": -10.489062309265137, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 1.4837576821773486, | |
| "grad_norm": 16.2290064209153, | |
| "learning_rate": 6.2928007023705e-07, | |
| "logits/chosen": -2.51171875, | |
| "logits/rejected": -2.7249999046325684, | |
| "logps/chosen": -420.75, | |
| "logps/rejected": -354.20001220703125, | |
| "loss": 0.0631, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": -2.022113084793091, | |
| "rewards/margins": 7.995312690734863, | |
| "rewards/rejected": -10.017187118530273, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 1.4925373134328357, | |
| "grad_norm": 3.229196553799357, | |
| "learning_rate": 6.270851624231783e-07, | |
| "logits/chosen": -2.6390624046325684, | |
| "logits/rejected": -2.8882813453674316, | |
| "logps/chosen": -467.3999938964844, | |
| "logps/rejected": -371.1499938964844, | |
| "loss": 0.0201, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -2.3235716819763184, | |
| "rewards/margins": 7.943749904632568, | |
| "rewards/rejected": -10.271875381469727, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.5013169446883232, | |
| "grad_norm": 9.188257643019353, | |
| "learning_rate": 6.248902546093063e-07, | |
| "logits/chosen": -2.526562452316284, | |
| "logits/rejected": -2.805468797683716, | |
| "logps/chosen": -434.70001220703125, | |
| "logps/rejected": -361.75, | |
| "loss": 0.0408, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -2.5238280296325684, | |
| "rewards/margins": 7.942187309265137, | |
| "rewards/rejected": -10.471875190734863, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 1.5100965759438103, | |
| "grad_norm": 29.26452195738192, | |
| "learning_rate": 6.226953467954346e-07, | |
| "logits/chosen": -2.541015625, | |
| "logits/rejected": -2.875, | |
| "logps/chosen": -426.1000061035156, | |
| "logps/rejected": -355.0, | |
| "loss": 0.027, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -1.9853515625, | |
| "rewards/margins": 8.178125381469727, | |
| "rewards/rejected": -10.165624618530273, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 1.5188762071992976, | |
| "grad_norm": 19.205836789188172, | |
| "learning_rate": 6.205004389815628e-07, | |
| "logits/chosen": -2.590625047683716, | |
| "logits/rejected": -2.907031297683716, | |
| "logps/chosen": -460.95001220703125, | |
| "logps/rejected": -368.0, | |
| "loss": 0.0297, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -1.270410180091858, | |
| "rewards/margins": 9.171875, | |
| "rewards/rejected": -10.434374809265137, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 1.527655838454785, | |
| "grad_norm": 34.625243306663386, | |
| "learning_rate": 6.18305531167691e-07, | |
| "logits/chosen": -2.589062452316284, | |
| "logits/rejected": -2.7984375953674316, | |
| "logps/chosen": -440.70001220703125, | |
| "logps/rejected": -359.20001220703125, | |
| "loss": 0.0908, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": -1.383886694908142, | |
| "rewards/margins": 8.3046875, | |
| "rewards/rejected": -9.681249618530273, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 1.536435469710272, | |
| "grad_norm": 2.9213182931390484, | |
| "learning_rate": 6.161106233538191e-07, | |
| "logits/chosen": -2.582812547683716, | |
| "logits/rejected": -2.809375047683716, | |
| "logps/chosen": -479.5, | |
| "logps/rejected": -404.04998779296875, | |
| "loss": 0.0387, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -0.9427734613418579, | |
| "rewards/margins": 8.5859375, | |
| "rewards/rejected": -9.524999618530273, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.5452151009657595, | |
| "grad_norm": 68.38428797919153, | |
| "learning_rate": 6.139157155399473e-07, | |
| "logits/chosen": -2.579296827316284, | |
| "logits/rejected": -2.831249952316284, | |
| "logps/chosen": -397.1000061035156, | |
| "logps/rejected": -356.8500061035156, | |
| "loss": 0.0485, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": -1.036279320716858, | |
| "rewards/margins": 7.775000095367432, | |
| "rewards/rejected": -8.817187309265137, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 1.5539947322212466, | |
| "grad_norm": 1.0331151816332567, | |
| "learning_rate": 6.117208077260755e-07, | |
| "logits/chosen": -2.587109327316284, | |
| "logits/rejected": -2.7484374046325684, | |
| "logps/chosen": -428.3999938964844, | |
| "logps/rejected": -368.29998779296875, | |
| "loss": 0.0768, | |
| "rewards/accuracies": 0.981249988079071, | |
| "rewards/chosen": -0.949902355670929, | |
| "rewards/margins": 7.314062595367432, | |
| "rewards/rejected": -8.260937690734863, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 1.562774363476734, | |
| "grad_norm": 82.7667900525252, | |
| "learning_rate": 6.095258999122037e-07, | |
| "logits/chosen": -2.481250047683716, | |
| "logits/rejected": -2.7109375, | |
| "logps/chosen": -439.79998779296875, | |
| "logps/rejected": -377.04998779296875, | |
| "loss": 0.0503, | |
| "rewards/accuracies": 0.981249988079071, | |
| "rewards/chosen": -0.32332152128219604, | |
| "rewards/margins": 8.734375, | |
| "rewards/rejected": -9.060937881469727, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 1.5715539947322212, | |
| "grad_norm": 28.160790924621615, | |
| "learning_rate": 6.073309920983318e-07, | |
| "logits/chosen": -2.498046875, | |
| "logits/rejected": -2.690624952316284, | |
| "logps/chosen": -462.20001220703125, | |
| "logps/rejected": -377.8999938964844, | |
| "loss": 0.0719, | |
| "rewards/accuracies": 0.981249988079071, | |
| "rewards/chosen": -1.471777319908142, | |
| "rewards/margins": 8.529687881469727, | |
| "rewards/rejected": -9.998437881469727, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 1.5803336259877085, | |
| "grad_norm": 13.087605043884519, | |
| "learning_rate": 6.051360842844601e-07, | |
| "logits/chosen": -2.686718702316284, | |
| "logits/rejected": -2.9164061546325684, | |
| "logps/chosen": -460.29998779296875, | |
| "logps/rejected": -366.75, | |
| "loss": 0.0456, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -2.2251954078674316, | |
| "rewards/margins": 8.2109375, | |
| "rewards/rejected": -10.446874618530273, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.5891132572431959, | |
| "grad_norm": 82.74475452839468, | |
| "learning_rate": 6.029411764705882e-07, | |
| "logits/chosen": -2.6402344703674316, | |
| "logits/rejected": -2.848437547683716, | |
| "logps/chosen": -443.3500061035156, | |
| "logps/rejected": -392.1000061035156, | |
| "loss": 0.0368, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": -1.8682129383087158, | |
| "rewards/margins": 8.699999809265137, | |
| "rewards/rejected": -10.578125, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 1.597892888498683, | |
| "grad_norm": 51.98127865086831, | |
| "learning_rate": 6.007462686567164e-07, | |
| "logits/chosen": -2.5582032203674316, | |
| "logits/rejected": -2.913281202316284, | |
| "logps/chosen": -467.75, | |
| "logps/rejected": -381.0, | |
| "loss": 0.0429, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": -1.9124755859375, | |
| "rewards/margins": 8.715624809265137, | |
| "rewards/rejected": -10.629687309265137, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 1.6066725197541705, | |
| "grad_norm": 13.687291910833912, | |
| "learning_rate": 5.985513608428445e-07, | |
| "logits/chosen": -2.706249952316284, | |
| "logits/rejected": -2.821093797683716, | |
| "logps/chosen": -465.54998779296875, | |
| "logps/rejected": -404.3999938964844, | |
| "loss": 0.0303, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -1.164697289466858, | |
| "rewards/margins": 8.839062690734863, | |
| "rewards/rejected": -10.0078125, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 1.6154521510096576, | |
| "grad_norm": 3.141632413942404, | |
| "learning_rate": 5.963564530289728e-07, | |
| "logits/chosen": -2.7421875, | |
| "logits/rejected": -2.9671874046325684, | |
| "logps/chosen": -451.95001220703125, | |
| "logps/rejected": -354.6000061035156, | |
| "loss": 0.0487, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": -1.1751708984375, | |
| "rewards/margins": 8.1796875, | |
| "rewards/rejected": -9.3515625, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 1.6242317822651449, | |
| "grad_norm": 40.215753609476586, | |
| "learning_rate": 5.94161545215101e-07, | |
| "logits/chosen": -2.473437547683716, | |
| "logits/rejected": -2.79296875, | |
| "logps/chosen": -446.70001220703125, | |
| "logps/rejected": -378.04998779296875, | |
| "loss": 0.0354, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -1.3036377429962158, | |
| "rewards/margins": 8.565625190734863, | |
| "rewards/rejected": -9.868749618530273, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.6330114135206322, | |
| "grad_norm": 121.84868653993065, | |
| "learning_rate": 5.919666374012291e-07, | |
| "logits/chosen": -2.530468702316284, | |
| "logits/rejected": -2.73046875, | |
| "logps/chosen": -490.20001220703125, | |
| "logps/rejected": -394.25, | |
| "loss": 0.0511, | |
| "rewards/accuracies": 0.981249988079071, | |
| "rewards/chosen": -0.5744994878768921, | |
| "rewards/margins": 9.151562690734863, | |
| "rewards/rejected": -9.725000381469727, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 1.6417910447761193, | |
| "grad_norm": 1.4363450893315823, | |
| "learning_rate": 5.897717295873573e-07, | |
| "logits/chosen": -2.5914063453674316, | |
| "logits/rejected": -2.7925782203674316, | |
| "logps/chosen": -502.6499938964844, | |
| "logps/rejected": -402.29998779296875, | |
| "loss": 0.0501, | |
| "rewards/accuracies": 0.981249988079071, | |
| "rewards/chosen": -1.456884741783142, | |
| "rewards/margins": 8.784375190734863, | |
| "rewards/rejected": -10.245312690734863, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 1.6505706760316068, | |
| "grad_norm": 6.331791348918632, | |
| "learning_rate": 5.875768217734855e-07, | |
| "logits/chosen": -2.6546874046325684, | |
| "logits/rejected": -2.8804688453674316, | |
| "logps/chosen": -502.8999938964844, | |
| "logps/rejected": -385.04998779296875, | |
| "loss": 0.0838, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": -2.12109375, | |
| "rewards/margins": 8.404687881469727, | |
| "rewards/rejected": -10.53125, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 1.6593503072870939, | |
| "grad_norm": 30.203770737498132, | |
| "learning_rate": 5.853819139596137e-07, | |
| "logits/chosen": -2.573437452316284, | |
| "logits/rejected": -2.7835936546325684, | |
| "logps/chosen": -459.1499938964844, | |
| "logps/rejected": -374.3999938964844, | |
| "loss": 0.0291, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -1.6189453601837158, | |
| "rewards/margins": 8.415624618530273, | |
| "rewards/rejected": -10.03125, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 1.6681299385425812, | |
| "grad_norm": 17.78176630586189, | |
| "learning_rate": 5.831870061457419e-07, | |
| "logits/chosen": -2.495312452316284, | |
| "logits/rejected": -2.623046875, | |
| "logps/chosen": -482.29998779296875, | |
| "logps/rejected": -361.1000061035156, | |
| "loss": 0.0488, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": -1.176123023033142, | |
| "rewards/margins": 8.346875190734863, | |
| "rewards/rejected": -9.520312309265137, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.6769095697980685, | |
| "grad_norm": 5.365005743269785, | |
| "learning_rate": 5.8099209833187e-07, | |
| "logits/chosen": -2.4820313453674316, | |
| "logits/rejected": -2.6820311546325684, | |
| "logps/chosen": -498.1000061035156, | |
| "logps/rejected": -376.1000061035156, | |
| "loss": 0.0314, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -1.484277367591858, | |
| "rewards/margins": 9.003125190734863, | |
| "rewards/rejected": -10.487500190734863, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 1.6856892010535558, | |
| "grad_norm": 4.911837616919007, | |
| "learning_rate": 5.787971905179983e-07, | |
| "logits/chosen": -2.551953077316284, | |
| "logits/rejected": -2.7515625953674316, | |
| "logps/chosen": -507.5, | |
| "logps/rejected": -418.3999938964844, | |
| "loss": 0.0599, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": -1.892431616783142, | |
| "rewards/margins": 9.432812690734863, | |
| "rewards/rejected": -11.321874618530273, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 1.694468832309043, | |
| "grad_norm": 48.41935696742479, | |
| "learning_rate": 5.766022827041263e-07, | |
| "logits/chosen": -2.6695313453674316, | |
| "logits/rejected": -2.874218702316284, | |
| "logps/chosen": -478.5, | |
| "logps/rejected": -369.3999938964844, | |
| "loss": 0.0661, | |
| "rewards/accuracies": 0.981249988079071, | |
| "rewards/chosen": -2.2271971702575684, | |
| "rewards/margins": 8.535937309265137, | |
| "rewards/rejected": -10.762499809265137, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 1.7032484635645302, | |
| "grad_norm": 3.1904332195658536, | |
| "learning_rate": 5.744073748902546e-07, | |
| "logits/chosen": -2.678906202316284, | |
| "logits/rejected": -2.8359375, | |
| "logps/chosen": -474.70001220703125, | |
| "logps/rejected": -378.45001220703125, | |
| "loss": 0.0354, | |
| "rewards/accuracies": 0.981249988079071, | |
| "rewards/chosen": -1.5466797351837158, | |
| "rewards/margins": 8.540624618530273, | |
| "rewards/rejected": -10.090624809265137, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 1.7120280948200177, | |
| "grad_norm": 36.581197901006014, | |
| "learning_rate": 5.722124670763828e-07, | |
| "logits/chosen": -2.6664061546325684, | |
| "logits/rejected": -2.796875, | |
| "logps/chosen": -479.3999938964844, | |
| "logps/rejected": -392.1000061035156, | |
| "loss": 0.0301, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -1.533593773841858, | |
| "rewards/margins": 8.985937118530273, | |
| "rewards/rejected": -10.5234375, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.7208077260755048, | |
| "grad_norm": 3.0076297690669938, | |
| "learning_rate": 5.70017559262511e-07, | |
| "logits/chosen": -2.46484375, | |
| "logits/rejected": -2.8765625953674316, | |
| "logps/chosen": -519.5999755859375, | |
| "logps/rejected": -390.6000061035156, | |
| "loss": 0.0343, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -2.000195264816284, | |
| "rewards/margins": 8.259374618530273, | |
| "rewards/rejected": -10.254687309265137, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 1.7295873573309921, | |
| "grad_norm": 75.69219904133588, | |
| "learning_rate": 5.678226514486391e-07, | |
| "logits/chosen": -2.6109375953674316, | |
| "logits/rejected": -2.823437452316284, | |
| "logps/chosen": -477.5, | |
| "logps/rejected": -376.29998779296875, | |
| "loss": 0.1164, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/chosen": -1.6555664539337158, | |
| "rewards/margins": 8.425000190734863, | |
| "rewards/rejected": -10.078125, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 1.7383669885864794, | |
| "grad_norm": 4.499107693623519, | |
| "learning_rate": 5.656277436347673e-07, | |
| "logits/chosen": -2.5121092796325684, | |
| "logits/rejected": -2.8316407203674316, | |
| "logps/chosen": -501.1000061035156, | |
| "logps/rejected": -397.29998779296875, | |
| "loss": 0.0534, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -2.2420897483825684, | |
| "rewards/margins": 8.145312309265137, | |
| "rewards/rejected": -10.384374618530273, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 1.7471466198419665, | |
| "grad_norm": 2.123299764443924, | |
| "learning_rate": 5.634328358208955e-07, | |
| "logits/chosen": -2.59765625, | |
| "logits/rejected": -2.8656249046325684, | |
| "logps/chosen": -508.0, | |
| "logps/rejected": -381.0, | |
| "loss": 0.0178, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -2.7275390625, | |
| "rewards/margins": 8.609375, | |
| "rewards/rejected": -11.34375, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 1.755926251097454, | |
| "grad_norm": 0.238167592399743, | |
| "learning_rate": 5.612379280070237e-07, | |
| "logits/chosen": -2.5492186546325684, | |
| "logits/rejected": -2.825000047683716, | |
| "logps/chosen": -455.3500061035156, | |
| "logps/rejected": -415.70001220703125, | |
| "loss": 0.0175, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.5980956554412842, | |
| "rewards/margins": 9.026562690734863, | |
| "rewards/rejected": -10.631250381469727, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.7647058823529411, | |
| "grad_norm": 57.651481695029794, | |
| "learning_rate": 5.590430201931518e-07, | |
| "logits/chosen": -2.609375, | |
| "logits/rejected": -2.796093702316284, | |
| "logps/chosen": -462.1000061035156, | |
| "logps/rejected": -383.6000061035156, | |
| "loss": 0.0649, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": -1.4416015148162842, | |
| "rewards/margins": 8.684374809265137, | |
| "rewards/rejected": -10.123437881469727, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 1.7734855136084284, | |
| "grad_norm": 8.324861608533597, | |
| "learning_rate": 5.568481123792801e-07, | |
| "logits/chosen": -2.5218749046325684, | |
| "logits/rejected": -2.78125, | |
| "logps/chosen": -503.20001220703125, | |
| "logps/rejected": -389.0, | |
| "loss": 0.0448, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -1.0042724609375, | |
| "rewards/margins": 8.893750190734863, | |
| "rewards/rejected": -9.904687881469727, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 1.7822651448639157, | |
| "grad_norm": 22.411601827446088, | |
| "learning_rate": 5.546532045654082e-07, | |
| "logits/chosen": -2.596484422683716, | |
| "logits/rejected": -2.8179688453674316, | |
| "logps/chosen": -490.5, | |
| "logps/rejected": -391.75, | |
| "loss": 0.0258, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -1.495214819908142, | |
| "rewards/margins": 8.793749809265137, | |
| "rewards/rejected": -10.284375190734863, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 1.7910447761194028, | |
| "grad_norm": 4.2346820725814895, | |
| "learning_rate": 5.524582967515365e-07, | |
| "logits/chosen": -2.577343702316284, | |
| "logits/rejected": -2.766406297683716, | |
| "logps/chosen": -474.3500061035156, | |
| "logps/rejected": -425.25, | |
| "loss": 0.0597, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": -1.630590796470642, | |
| "rewards/margins": 8.478124618530273, | |
| "rewards/rejected": -10.107812881469727, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 1.7998244073748904, | |
| "grad_norm": 12.385046369282417, | |
| "learning_rate": 5.502633889376645e-07, | |
| "logits/chosen": -2.6156249046325684, | |
| "logits/rejected": -2.82421875, | |
| "logps/chosen": -449.8999938964844, | |
| "logps/rejected": -390.3500061035156, | |
| "loss": 0.0174, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.770031750202179, | |
| "rewards/margins": 8.639062881469727, | |
| "rewards/rejected": -9.403124809265137, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.8086040386303774, | |
| "grad_norm": 1.8844697228375282, | |
| "learning_rate": 5.480684811237928e-07, | |
| "logits/chosen": -2.5484375953674316, | |
| "logits/rejected": -2.852343797683716, | |
| "logps/chosen": -513.5999755859375, | |
| "logps/rejected": -415.79998779296875, | |
| "loss": 0.0112, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.4247069358825684, | |
| "rewards/margins": 9.189062118530273, | |
| "rewards/rejected": -11.615625381469727, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 1.8173836698858647, | |
| "grad_norm": 2.637432316749487, | |
| "learning_rate": 5.45873573309921e-07, | |
| "logits/chosen": -2.5562500953674316, | |
| "logits/rejected": -2.8031249046325684, | |
| "logps/chosen": -480.0, | |
| "logps/rejected": -414.1499938964844, | |
| "loss": 0.024, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -2.6226563453674316, | |
| "rewards/margins": 9.015625, | |
| "rewards/rejected": -11.640625, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 1.826163301141352, | |
| "grad_norm": 4.752570108114763, | |
| "learning_rate": 5.436786654960492e-07, | |
| "logits/chosen": -2.745312452316284, | |
| "logits/rejected": -3.008593797683716, | |
| "logps/chosen": -468.04998779296875, | |
| "logps/rejected": -398.8999938964844, | |
| "loss": 0.0567, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": -1.5981934070587158, | |
| "rewards/margins": 8.998437881469727, | |
| "rewards/rejected": -10.603124618530273, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 1.8349429323968394, | |
| "grad_norm": 1.4557584230319258, | |
| "learning_rate": 5.414837576821773e-07, | |
| "logits/chosen": -2.598437547683716, | |
| "logits/rejected": -2.8257813453674316, | |
| "logps/chosen": -522.2999877929688, | |
| "logps/rejected": -383.0, | |
| "loss": 0.0337, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -1.663232445716858, | |
| "rewards/margins": 8.885937690734863, | |
| "rewards/rejected": -10.542187690734863, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 1.8437225636523267, | |
| "grad_norm": 5.671437996698917, | |
| "learning_rate": 5.392888498683055e-07, | |
| "logits/chosen": -2.612499952316284, | |
| "logits/rejected": -2.9242186546325684, | |
| "logps/chosen": -479.1000061035156, | |
| "logps/rejected": -371.6000061035156, | |
| "loss": 0.0371, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -1.3997681140899658, | |
| "rewards/margins": 9.600000381469727, | |
| "rewards/rejected": -10.996874809265137, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.8525021949078138, | |
| "grad_norm": 25.852137847817737, | |
| "learning_rate": 5.370939420544337e-07, | |
| "logits/chosen": -2.69921875, | |
| "logits/rejected": -2.957812547683716, | |
| "logps/chosen": -494.70001220703125, | |
| "logps/rejected": -387.70001220703125, | |
| "loss": 0.0307, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -2.2505860328674316, | |
| "rewards/margins": 9.481249809265137, | |
| "rewards/rejected": -11.734375, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 1.8612818261633013, | |
| "grad_norm": 10.007930071076435, | |
| "learning_rate": 5.348990342405619e-07, | |
| "logits/chosen": -2.621875047683716, | |
| "logits/rejected": -2.8687500953674316, | |
| "logps/chosen": -485.5, | |
| "logps/rejected": -425.6000061035156, | |
| "loss": 0.0136, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.8060545921325684, | |
| "rewards/margins": 9.565625190734863, | |
| "rewards/rejected": -12.375, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 1.8700614574187884, | |
| "grad_norm": 29.969263411854946, | |
| "learning_rate": 5.3270412642669e-07, | |
| "logits/chosen": -2.702343702316284, | |
| "logits/rejected": -2.9140625, | |
| "logps/chosen": -483.1000061035156, | |
| "logps/rejected": -437.20001220703125, | |
| "loss": 0.0255, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -2.2657227516174316, | |
| "rewards/margins": 9.53125, | |
| "rewards/rejected": -11.793749809265137, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 1.8788410886742757, | |
| "grad_norm": 74.83505274593155, | |
| "learning_rate": 5.305092186128183e-07, | |
| "logits/chosen": -2.729296922683716, | |
| "logits/rejected": -3.0093750953674316, | |
| "logps/chosen": -441.0, | |
| "logps/rejected": -382.45001220703125, | |
| "loss": 0.1658, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/chosen": -2.8695311546325684, | |
| "rewards/margins": 8.7734375, | |
| "rewards/rejected": -11.637499809265137, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 1.887620719929763, | |
| "grad_norm": 54.97304214370293, | |
| "learning_rate": 5.283143107989464e-07, | |
| "logits/chosen": -2.768749952316284, | |
| "logits/rejected": -2.9820313453674316, | |
| "logps/chosen": -455.8500061035156, | |
| "logps/rejected": -388.20001220703125, | |
| "loss": 0.0439, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -1.9685547351837158, | |
| "rewards/margins": 8.732812881469727, | |
| "rewards/rejected": -10.704687118530273, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.89640035118525, | |
| "grad_norm": 9.651110238221477, | |
| "learning_rate": 5.261194029850747e-07, | |
| "logits/chosen": -2.710156202316284, | |
| "logits/rejected": -2.95703125, | |
| "logps/chosen": -459.70001220703125, | |
| "logps/rejected": -403.8500061035156, | |
| "loss": 0.0299, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -1.7095215320587158, | |
| "rewards/margins": 8.949999809265137, | |
| "rewards/rejected": -10.65625, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 1.9051799824407376, | |
| "grad_norm": 2.1942154568623855, | |
| "learning_rate": 5.239244951712027e-07, | |
| "logits/chosen": -2.678906202316284, | |
| "logits/rejected": -3.046093702316284, | |
| "logps/chosen": -429.8999938964844, | |
| "logps/rejected": -370.5, | |
| "loss": 0.0716, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": -2.79638671875, | |
| "rewards/margins": 8.962499618530273, | |
| "rewards/rejected": -11.759374618530273, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 1.9139596136962247, | |
| "grad_norm": 11.58708748732068, | |
| "learning_rate": 5.21729587357331e-07, | |
| "logits/chosen": -2.587890625, | |
| "logits/rejected": -2.9703125953674316, | |
| "logps/chosen": -500.20001220703125, | |
| "logps/rejected": -372.04998779296875, | |
| "loss": 0.0123, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.0755858421325684, | |
| "rewards/margins": 9.807812690734863, | |
| "rewards/rejected": -11.881250381469727, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 1.922739244951712, | |
| "grad_norm": 11.266654651590073, | |
| "learning_rate": 5.195346795434592e-07, | |
| "logits/chosen": -2.7164063453674316, | |
| "logits/rejected": -2.87890625, | |
| "logps/chosen": -435.1000061035156, | |
| "logps/rejected": -369.20001220703125, | |
| "loss": 0.0449, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": -1.085302710533142, | |
| "rewards/margins": 9.196874618530273, | |
| "rewards/rejected": -10.271875381469727, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 1.9315188762071993, | |
| "grad_norm": 6.332808464658899, | |
| "learning_rate": 5.173397717295873e-07, | |
| "logits/chosen": -2.698046922683716, | |
| "logits/rejected": -2.944531202316284, | |
| "logps/chosen": -426.20001220703125, | |
| "logps/rejected": -388.3999938964844, | |
| "loss": 0.0275, | |
| "rewards/accuracies": 0.981249988079071, | |
| "rewards/chosen": -1.277307152748108, | |
| "rewards/margins": 9.339062690734863, | |
| "rewards/rejected": -10.609375, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.9402985074626866, | |
| "grad_norm": 2.899822450260841, | |
| "learning_rate": 5.151448639157155e-07, | |
| "logits/chosen": -2.604687452316284, | |
| "logits/rejected": -2.8359375, | |
| "logps/chosen": -485.29998779296875, | |
| "logps/rejected": -378.3999938964844, | |
| "loss": 0.0142, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.796435534954071, | |
| "rewards/margins": 9.4296875, | |
| "rewards/rejected": -10.225000381469727, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 1.949078138718174, | |
| "grad_norm": 90.00031641616339, | |
| "learning_rate": 5.129499561018437e-07, | |
| "logits/chosen": -2.688281297683716, | |
| "logits/rejected": -2.9507813453674316, | |
| "logps/chosen": -497.20001220703125, | |
| "logps/rejected": -399.8999938964844, | |
| "loss": 0.0402, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -2.4178709983825684, | |
| "rewards/margins": 9.600000381469727, | |
| "rewards/rejected": -12.018750190734863, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 1.957857769973661, | |
| "grad_norm": 5.759216959258573, | |
| "learning_rate": 5.107550482879719e-07, | |
| "logits/chosen": -2.7828125953674316, | |
| "logits/rejected": -3.0269532203674316, | |
| "logps/chosen": -476.79998779296875, | |
| "logps/rejected": -377.6000061035156, | |
| "loss": 0.0213, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -3.258007764816284, | |
| "rewards/margins": 9.470312118530273, | |
| "rewards/rejected": -12.728124618530273, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 1.9666374012291485, | |
| "grad_norm": 76.46826217334423, | |
| "learning_rate": 5.085601404741001e-07, | |
| "logits/chosen": -2.688281297683716, | |
| "logits/rejected": -3.0257811546325684, | |
| "logps/chosen": -433.95001220703125, | |
| "logps/rejected": -363.3500061035156, | |
| "loss": 0.0891, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/chosen": -1.761865258216858, | |
| "rewards/margins": 8.635937690734863, | |
| "rewards/rejected": -10.393750190734863, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 1.9754170324846356, | |
| "grad_norm": 7.416795070241535, | |
| "learning_rate": 5.063652326602282e-07, | |
| "logits/chosen": -2.62109375, | |
| "logits/rejected": -2.850781202316284, | |
| "logps/chosen": -495.6000061035156, | |
| "logps/rejected": -415.5, | |
| "loss": 0.0447, | |
| "rewards/accuracies": 0.981249988079071, | |
| "rewards/chosen": -1.2946288585662842, | |
| "rewards/margins": 8.646875381469727, | |
| "rewards/rejected": -9.932812690734863, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.984196663740123, | |
| "grad_norm": 80.48079644613428, | |
| "learning_rate": 5.041703248463565e-07, | |
| "logits/chosen": -2.6468749046325684, | |
| "logits/rejected": -2.8335938453674316, | |
| "logps/chosen": -536.5, | |
| "logps/rejected": -458.1000061035156, | |
| "loss": 0.0504, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -1.196874976158142, | |
| "rewards/margins": 9.204687118530273, | |
| "rewards/rejected": -10.399999618530273, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 1.9929762949956102, | |
| "grad_norm": 60.45942940519937, | |
| "learning_rate": 5.019754170324846e-07, | |
| "logits/chosen": -2.6734375953674316, | |
| "logits/rejected": -2.932812452316284, | |
| "logps/chosen": -393.25, | |
| "logps/rejected": -378.3999938964844, | |
| "loss": 0.0345, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -1.770361304283142, | |
| "rewards/margins": 8.771875381469727, | |
| "rewards/rejected": -10.548437118530273, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 2.0017559262510973, | |
| "grad_norm": 5.829231629804558, | |
| "learning_rate": 4.997805092186128e-07, | |
| "logits/chosen": -2.75390625, | |
| "logits/rejected": -3.000781297683716, | |
| "logps/chosen": -391.45001220703125, | |
| "logps/rejected": -397.0, | |
| "loss": 0.0342, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -1.8310546875, | |
| "rewards/margins": 8.748437881469727, | |
| "rewards/rejected": -10.582812309265137, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 2.010535557506585, | |
| "grad_norm": 8.129958503101284, | |
| "learning_rate": 4.97585601404741e-07, | |
| "logits/chosen": -2.6031250953674316, | |
| "logits/rejected": -2.879687547683716, | |
| "logps/chosen": -450.3500061035156, | |
| "logps/rejected": -412.20001220703125, | |
| "loss": 0.0347, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -1.5906250476837158, | |
| "rewards/margins": 9.707812309265137, | |
| "rewards/rejected": -11.295312881469727, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 2.019315188762072, | |
| "grad_norm": 1.256296094856506, | |
| "learning_rate": 4.953906935908692e-07, | |
| "logits/chosen": -2.6742186546325684, | |
| "logits/rejected": -2.9398436546325684, | |
| "logps/chosen": -486.1000061035156, | |
| "logps/rejected": -398.3999938964844, | |
| "loss": 0.0065, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.4610352516174316, | |
| "rewards/margins": 9.824999809265137, | |
| "rewards/rejected": -12.287500381469727, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 2.0280948200175595, | |
| "grad_norm": 2.4979103671971536, | |
| "learning_rate": 4.931957857769974e-07, | |
| "logits/chosen": -2.625, | |
| "logits/rejected": -2.9765625, | |
| "logps/chosen": -543.5, | |
| "logps/rejected": -409.20001220703125, | |
| "loss": 0.0122, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -1.585778832435608, | |
| "rewards/margins": 10.546875, | |
| "rewards/rejected": -12.131250381469727, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 2.0368744512730466, | |
| "grad_norm": 1.6401242067323407, | |
| "learning_rate": 4.910008779631255e-07, | |
| "logits/chosen": -2.764843702316284, | |
| "logits/rejected": -2.98828125, | |
| "logps/chosen": -469.3999938964844, | |
| "logps/rejected": -410.20001220703125, | |
| "loss": 0.0016, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.1011719703674316, | |
| "rewards/margins": 9.9609375, | |
| "rewards/rejected": -12.059374809265137, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 2.0456540825285336, | |
| "grad_norm": 5.31502201409783, | |
| "learning_rate": 4.888059701492537e-07, | |
| "logits/chosen": -2.859375, | |
| "logits/rejected": -3.11328125, | |
| "logps/chosen": -462.1000061035156, | |
| "logps/rejected": -402.20001220703125, | |
| "loss": 0.0048, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.7080078125, | |
| "rewards/margins": 10.503125190734863, | |
| "rewards/rejected": -13.206250190734863, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 2.054433713784021, | |
| "grad_norm": 3.0086319393185024, | |
| "learning_rate": 4.866110623353819e-07, | |
| "logits/chosen": -2.7691407203674316, | |
| "logits/rejected": -3.0718750953674316, | |
| "logps/chosen": -404.75, | |
| "logps/rejected": -369.8500061035156, | |
| "loss": 0.0048, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.324414014816284, | |
| "rewards/margins": 10.246874809265137, | |
| "rewards/rejected": -12.587499618530273, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 2.0632133450395083, | |
| "grad_norm": 0.7091409954115243, | |
| "learning_rate": 4.8441615452151e-07, | |
| "logits/chosen": -2.7242188453674316, | |
| "logits/rejected": -3.082812547683716, | |
| "logps/chosen": -466.0, | |
| "logps/rejected": -405.3999938964844, | |
| "loss": 0.0028, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.139453172683716, | |
| "rewards/margins": 10.290624618530273, | |
| "rewards/rejected": -12.425000190734863, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 2.071992976294996, | |
| "grad_norm": 0.9127856913215332, | |
| "learning_rate": 4.822212467076382e-07, | |
| "logits/chosen": -2.7789063453674316, | |
| "logits/rejected": -3.0687499046325684, | |
| "logps/chosen": -468.70001220703125, | |
| "logps/rejected": -414.0, | |
| "loss": 0.0047, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.1996092796325684, | |
| "rewards/margins": 10.240625381469727, | |
| "rewards/rejected": -12.449999809265137, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 2.080772607550483, | |
| "grad_norm": 0.4108646918329786, | |
| "learning_rate": 4.800263388937664e-07, | |
| "logits/chosen": -2.79296875, | |
| "logits/rejected": -3.06640625, | |
| "logps/chosen": -404.8999938964844, | |
| "logps/rejected": -381.70001220703125, | |
| "loss": 0.003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.2166380882263184, | |
| "rewards/margins": 10.6875, | |
| "rewards/rejected": -12.903124809265137, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 2.08955223880597, | |
| "grad_norm": 25.929695524989565, | |
| "learning_rate": 4.778314310798946e-07, | |
| "logits/chosen": -2.796875, | |
| "logits/rejected": -3.08203125, | |
| "logps/chosen": -489.29998779296875, | |
| "logps/rejected": -403.1499938964844, | |
| "loss": 0.044, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -3.372265577316284, | |
| "rewards/margins": 11.120312690734863, | |
| "rewards/rejected": -14.490625381469727, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 2.0983318700614575, | |
| "grad_norm": 3.2995867770152203, | |
| "learning_rate": 4.7563652326602285e-07, | |
| "logits/chosen": -2.645312547683716, | |
| "logits/rejected": -3.0921874046325684, | |
| "logps/chosen": -487.3999938964844, | |
| "logps/rejected": -392.8999938964844, | |
| "loss": 0.0033, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.2496094703674316, | |
| "rewards/margins": 10.725000381469727, | |
| "rewards/rejected": -12.984375, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 2.1071115013169446, | |
| "grad_norm": 0.7473818449492524, | |
| "learning_rate": 4.73441615452151e-07, | |
| "logits/chosen": -2.5804686546325684, | |
| "logits/rejected": -2.932812452316284, | |
| "logps/chosen": -485.8500061035156, | |
| "logps/rejected": -416.20001220703125, | |
| "loss": 0.0033, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.7834961414337158, | |
| "rewards/margins": 11.267187118530273, | |
| "rewards/rejected": -13.068750381469727, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 2.115891132572432, | |
| "grad_norm": 3.7988199924865587, | |
| "learning_rate": 4.712467076382792e-07, | |
| "logits/chosen": -2.7671875953674316, | |
| "logits/rejected": -3.0835938453674316, | |
| "logps/chosen": -476.8999938964844, | |
| "logps/rejected": -423.79998779296875, | |
| "loss": 0.0143, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -2.292919874191284, | |
| "rewards/margins": 10.571874618530273, | |
| "rewards/rejected": -12.865625381469727, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 2.124670763827919, | |
| "grad_norm": 16.27799332682975, | |
| "learning_rate": 4.6905179982440737e-07, | |
| "logits/chosen": -2.671875, | |
| "logits/rejected": -3.042187452316284, | |
| "logps/chosen": -421.8999938964844, | |
| "logps/rejected": -403.8500061035156, | |
| "loss": 0.0073, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.303417921066284, | |
| "rewards/margins": 10.759374618530273, | |
| "rewards/rejected": -13.046875, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 2.1334503950834067, | |
| "grad_norm": 7.129915553152379, | |
| "learning_rate": 4.6685689201053554e-07, | |
| "logits/chosen": -2.733593702316284, | |
| "logits/rejected": -3.1640625, | |
| "logps/chosen": -477.0, | |
| "logps/rejected": -409.0, | |
| "loss": 0.0082, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.944433569908142, | |
| "rewards/margins": 10.596875190734863, | |
| "rewards/rejected": -12.546875, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 2.142230026338894, | |
| "grad_norm": 0.48165144700266316, | |
| "learning_rate": 4.646619841966637e-07, | |
| "logits/chosen": -2.680468797683716, | |
| "logits/rejected": -3.049999952316284, | |
| "logps/chosen": -499.6000061035156, | |
| "logps/rejected": -414.04998779296875, | |
| "loss": 0.0039, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.22021484375, | |
| "rewards/margins": 10.978124618530273, | |
| "rewards/rejected": -12.1875, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 2.151009657594381, | |
| "grad_norm": 51.656135188406964, | |
| "learning_rate": 4.6246707638279194e-07, | |
| "logits/chosen": -2.6664061546325684, | |
| "logits/rejected": -3.0484375953674316, | |
| "logps/chosen": -527.0999755859375, | |
| "logps/rejected": -414.0, | |
| "loss": 0.0085, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -2.46142578125, | |
| "rewards/margins": 10.625, | |
| "rewards/rejected": -13.087499618530273, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 2.1597892888498684, | |
| "grad_norm": 0.6427210650270349, | |
| "learning_rate": 4.602721685689201e-07, | |
| "logits/chosen": -2.8109374046325684, | |
| "logits/rejected": -3.11328125, | |
| "logps/chosen": -435.1000061035156, | |
| "logps/rejected": -410.8999938964844, | |
| "loss": 0.007, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.098876953125, | |
| "rewards/margins": 10.529687881469727, | |
| "rewards/rejected": -12.625, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 2.1685689201053555, | |
| "grad_norm": 0.47656816634790133, | |
| "learning_rate": 4.580772607550483e-07, | |
| "logits/chosen": -2.747265577316284, | |
| "logits/rejected": -3.2109375, | |
| "logps/chosen": -433.29998779296875, | |
| "logps/rejected": -375.5, | |
| "loss": 0.0155, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -2.5643553733825684, | |
| "rewards/margins": 10.326562881469727, | |
| "rewards/rejected": -12.890625, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 2.177348551360843, | |
| "grad_norm": 0.34832012632633447, | |
| "learning_rate": 4.5588235294117646e-07, | |
| "logits/chosen": -2.87890625, | |
| "logits/rejected": -3.200000047683716, | |
| "logps/chosen": -481.79998779296875, | |
| "logps/rejected": -385.8999938964844, | |
| "loss": 0.0098, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.3958497047424316, | |
| "rewards/margins": 10.46875, | |
| "rewards/rejected": -12.862500190734863, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 2.18612818261633, | |
| "grad_norm": 6.7496892019060555, | |
| "learning_rate": 4.5368744512730464e-07, | |
| "logits/chosen": -2.6851563453674316, | |
| "logits/rejected": -3.10546875, | |
| "logps/chosen": -507.3999938964844, | |
| "logps/rejected": -424.1000061035156, | |
| "loss": 0.0022, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.153027296066284, | |
| "rewards/margins": 11.878125190734863, | |
| "rewards/rejected": -14.024999618530273, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 2.194907813871817, | |
| "grad_norm": 0.6121482329361724, | |
| "learning_rate": 4.5149253731343286e-07, | |
| "logits/chosen": -2.6484375, | |
| "logits/rejected": -2.8570313453674316, | |
| "logps/chosen": -488.5, | |
| "logps/rejected": -436.5, | |
| "loss": 0.0035, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.054492235183716, | |
| "rewards/margins": 10.856249809265137, | |
| "rewards/rejected": -12.903124809265137, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.2036874451273047, | |
| "grad_norm": 6.349396051477548, | |
| "learning_rate": 4.4929762949956104e-07, | |
| "logits/chosen": -2.692187547683716, | |
| "logits/rejected": -3.0640625953674316, | |
| "logps/chosen": -496.20001220703125, | |
| "logps/rejected": -457.1000061035156, | |
| "loss": 0.024, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -1.9005615711212158, | |
| "rewards/margins": 11.1796875, | |
| "rewards/rejected": -13.081250190734863, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 2.212467076382792, | |
| "grad_norm": 3.644255831457209, | |
| "learning_rate": 4.471027216856892e-07, | |
| "logits/chosen": -2.703125, | |
| "logits/rejected": -3.024218797683716, | |
| "logps/chosen": -487.3500061035156, | |
| "logps/rejected": -425.6000061035156, | |
| "loss": 0.0085, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -2.2077393531799316, | |
| "rewards/margins": 11.196874618530273, | |
| "rewards/rejected": -13.412500381469727, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 2.2212467076382794, | |
| "grad_norm": 16.285576198010308, | |
| "learning_rate": 4.449078138718174e-07, | |
| "logits/chosen": -2.735156297683716, | |
| "logits/rejected": -3.051562547683716, | |
| "logps/chosen": -448.70001220703125, | |
| "logps/rejected": -380.8999938964844, | |
| "loss": 0.0043, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.8092772960662842, | |
| "rewards/margins": 11.115625381469727, | |
| "rewards/rejected": -12.918749809265137, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 2.2300263388937664, | |
| "grad_norm": 2.1018638746025315, | |
| "learning_rate": 4.4271290605794556e-07, | |
| "logits/chosen": -2.746875047683716, | |
| "logits/rejected": -3.100781202316284, | |
| "logps/chosen": -465.5, | |
| "logps/rejected": -397.20001220703125, | |
| "loss": 0.0066, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.140600562095642, | |
| "rewards/margins": 10.571874618530273, | |
| "rewards/rejected": -11.71875, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 2.2388059701492535, | |
| "grad_norm": 0.25354455787351377, | |
| "learning_rate": 4.4051799824407373e-07, | |
| "logits/chosen": -2.7249999046325684, | |
| "logits/rejected": -2.9632811546325684, | |
| "logps/chosen": -440.0, | |
| "logps/rejected": -415.1499938964844, | |
| "loss": 0.002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.2507812976837158, | |
| "rewards/margins": 10.503125190734863, | |
| "rewards/rejected": -11.756250381469727, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 2.247585601404741, | |
| "grad_norm": 4.465127542028957, | |
| "learning_rate": 4.3832309043020195e-07, | |
| "logits/chosen": -2.7445311546325684, | |
| "logits/rejected": -3.1460938453674316, | |
| "logps/chosen": -437.8999938964844, | |
| "logps/rejected": -379.29998779296875, | |
| "loss": 0.0046, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.8772461414337158, | |
| "rewards/margins": 11.028124809265137, | |
| "rewards/rejected": -12.899999618530273, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 2.256365232660228, | |
| "grad_norm": 0.6486833863520731, | |
| "learning_rate": 4.3612818261633013e-07, | |
| "logits/chosen": -2.6429686546325684, | |
| "logits/rejected": -3.1187500953674316, | |
| "logps/chosen": -466.1000061035156, | |
| "logps/rejected": -410.3999938964844, | |
| "loss": 0.0084, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.922656297683716, | |
| "rewards/margins": 10.970312118530273, | |
| "rewards/rejected": -13.890625, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 2.2651448639157157, | |
| "grad_norm": 0.7044609478091358, | |
| "learning_rate": 4.339332748024583e-07, | |
| "logits/chosen": -2.774218797683716, | |
| "logits/rejected": -3.1351561546325684, | |
| "logps/chosen": -504.20001220703125, | |
| "logps/rejected": -411.3999938964844, | |
| "loss": 0.011, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -2.546679735183716, | |
| "rewards/margins": 10.440625190734863, | |
| "rewards/rejected": -12.981249809265137, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 2.2739244951712028, | |
| "grad_norm": 114.96753115572733, | |
| "learning_rate": 4.317383669885865e-07, | |
| "logits/chosen": -2.8578124046325684, | |
| "logits/rejected": -3.203906297683716, | |
| "logps/chosen": -423.8500061035156, | |
| "logps/rejected": -413.20001220703125, | |
| "loss": 0.0242, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -3.119335889816284, | |
| "rewards/margins": 10.962499618530273, | |
| "rewards/rejected": -14.065625190734863, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 2.2827041264266903, | |
| "grad_norm": 0.16763494251044442, | |
| "learning_rate": 4.2954345917471465e-07, | |
| "logits/chosen": -2.909374952316284, | |
| "logits/rejected": -3.171093702316284, | |
| "logps/chosen": -424.54998779296875, | |
| "logps/rejected": -419.79998779296875, | |
| "loss": 0.0073, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.5365233421325684, | |
| "rewards/margins": 11.149999618530273, | |
| "rewards/rejected": -13.690625190734863, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 2.2914837576821774, | |
| "grad_norm": 21.805969916190726, | |
| "learning_rate": 4.273485513608428e-07, | |
| "logits/chosen": -2.8460936546325684, | |
| "logits/rejected": -3.143749952316284, | |
| "logps/chosen": -454.20001220703125, | |
| "logps/rejected": -421.20001220703125, | |
| "loss": 0.0025, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.786914110183716, | |
| "rewards/margins": 11.381250381469727, | |
| "rewards/rejected": -14.171875, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 2.3002633889376645, | |
| "grad_norm": 0.8865558015025492, | |
| "learning_rate": 4.2515364354697105e-07, | |
| "logits/chosen": -2.8375000953674316, | |
| "logits/rejected": -3.27734375, | |
| "logps/chosen": -507.20001220703125, | |
| "logps/rejected": -414.79998779296875, | |
| "loss": 0.0044, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.6654295921325684, | |
| "rewards/margins": 10.876562118530273, | |
| "rewards/rejected": -13.543749809265137, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 2.309043020193152, | |
| "grad_norm": 0.43481201424948623, | |
| "learning_rate": 4.229587357330992e-07, | |
| "logits/chosen": -2.903125047683716, | |
| "logits/rejected": -3.1382813453674316, | |
| "logps/chosen": -442.79998779296875, | |
| "logps/rejected": -444.20001220703125, | |
| "loss": 0.0042, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.9332032203674316, | |
| "rewards/margins": 11.135937690734863, | |
| "rewards/rejected": -14.068750381469727, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 2.317822651448639, | |
| "grad_norm": 3.0035868757732205, | |
| "learning_rate": 4.207638279192274e-07, | |
| "logits/chosen": -2.7164063453674316, | |
| "logits/rejected": -3.1546874046325684, | |
| "logps/chosen": -477.5, | |
| "logps/rejected": -438.20001220703125, | |
| "loss": 0.008, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -2.118945360183716, | |
| "rewards/margins": 11.784375190734863, | |
| "rewards/rejected": -13.90625, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 2.3266022827041266, | |
| "grad_norm": 0.1748044867695441, | |
| "learning_rate": 4.1856892010535557e-07, | |
| "logits/chosen": -2.784374952316284, | |
| "logits/rejected": -3.0390625, | |
| "logps/chosen": -446.1000061035156, | |
| "logps/rejected": -414.70001220703125, | |
| "loss": 0.0032, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.2357420921325684, | |
| "rewards/margins": 11.015625, | |
| "rewards/rejected": -14.246874809265137, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 2.3353819139596137, | |
| "grad_norm": 0.7395235440829846, | |
| "learning_rate": 4.1637401229148374e-07, | |
| "logits/chosen": -2.6539063453674316, | |
| "logits/rejected": -3.0687499046325684, | |
| "logps/chosen": -503.29998779296875, | |
| "logps/rejected": -451.5, | |
| "loss": 0.0025, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.322460889816284, | |
| "rewards/margins": 11.384374618530273, | |
| "rewards/rejected": -14.699999809265137, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 2.344161545215101, | |
| "grad_norm": 1.370237299000094, | |
| "learning_rate": 4.141791044776119e-07, | |
| "logits/chosen": -2.7249999046325684, | |
| "logits/rejected": -2.989062547683716, | |
| "logps/chosen": -484.79998779296875, | |
| "logps/rejected": -436.20001220703125, | |
| "loss": 0.0023, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.90771484375, | |
| "rewards/margins": 11.446874618530273, | |
| "rewards/rejected": -13.362500190734863, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 2.3529411764705883, | |
| "grad_norm": 3.562965363362462, | |
| "learning_rate": 4.1198419666374014e-07, | |
| "logits/chosen": -2.762500047683716, | |
| "logits/rejected": -3.2132811546325684, | |
| "logps/chosen": -522.5999755859375, | |
| "logps/rejected": -448.5, | |
| "loss": 0.003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.107067823410034, | |
| "rewards/margins": 11.3125, | |
| "rewards/rejected": -13.421875, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 2.3617208077260754, | |
| "grad_norm": 0.39954500286815386, | |
| "learning_rate": 4.097892888498683e-07, | |
| "logits/chosen": -2.9007811546325684, | |
| "logits/rejected": -3.207812547683716, | |
| "logps/chosen": -437.95001220703125, | |
| "logps/rejected": -403.79998779296875, | |
| "loss": 0.0031, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.5396728515625, | |
| "rewards/margins": 10.618749618530273, | |
| "rewards/rejected": -13.165624618530273, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 2.370500438981563, | |
| "grad_norm": 7.2235979515104995, | |
| "learning_rate": 4.075943810359965e-07, | |
| "logits/chosen": -2.797656297683716, | |
| "logits/rejected": -3.2367186546325684, | |
| "logps/chosen": -428.0, | |
| "logps/rejected": -392.29998779296875, | |
| "loss": 0.0031, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.944140672683716, | |
| "rewards/margins": 11.212499618530273, | |
| "rewards/rejected": -14.149999618530273, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 2.37928007023705, | |
| "grad_norm": 0.09996865617916992, | |
| "learning_rate": 4.0539947322212466e-07, | |
| "logits/chosen": -2.68359375, | |
| "logits/rejected": -3.0218749046325684, | |
| "logps/chosen": -549.0999755859375, | |
| "logps/rejected": -460.5, | |
| "loss": 0.0063, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.9423828125, | |
| "rewards/margins": 11.543749809265137, | |
| "rewards/rejected": -13.490625381469727, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 2.388059701492537, | |
| "grad_norm": 0.9165577405271063, | |
| "learning_rate": 4.0320456540825283e-07, | |
| "logits/chosen": -2.735156297683716, | |
| "logits/rejected": -3.075000047683716, | |
| "logps/chosen": -451.5, | |
| "logps/rejected": -430.8999938964844, | |
| "loss": 0.0031, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.525952100753784, | |
| "rewards/margins": 10.753125190734863, | |
| "rewards/rejected": -13.284375190734863, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 2.3968393327480246, | |
| "grad_norm": 2.5092185290788347, | |
| "learning_rate": 4.0100965759438106e-07, | |
| "logits/chosen": -2.7953124046325684, | |
| "logits/rejected": -3.149218797683716, | |
| "logps/chosen": -451.6000061035156, | |
| "logps/rejected": -390.8500061035156, | |
| "loss": 0.0018, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.133593797683716, | |
| "rewards/margins": 10.518750190734863, | |
| "rewards/rejected": -12.646875381469727, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 2.4056189640035117, | |
| "grad_norm": 4.217551683564792, | |
| "learning_rate": 3.9881474978050923e-07, | |
| "logits/chosen": -2.750781297683716, | |
| "logits/rejected": -3.1624999046325684, | |
| "logps/chosen": -510.0, | |
| "logps/rejected": -433.6000061035156, | |
| "loss": 0.0032, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.56591796875, | |
| "rewards/margins": 11.643750190734863, | |
| "rewards/rejected": -14.209375381469727, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 2.4143985952589992, | |
| "grad_norm": 0.5296646290506889, | |
| "learning_rate": 3.966198419666374e-07, | |
| "logits/chosen": -2.77734375, | |
| "logits/rejected": -3.1773438453674316, | |
| "logps/chosen": -439.20001220703125, | |
| "logps/rejected": -409.95001220703125, | |
| "loss": 0.0021, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.0345091819763184, | |
| "rewards/margins": 11.918749809265137, | |
| "rewards/rejected": -13.956250190734863, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 2.4231782265144863, | |
| "grad_norm": 3.787800599099169, | |
| "learning_rate": 3.944249341527656e-07, | |
| "logits/chosen": -2.7476563453674316, | |
| "logits/rejected": -3.090625047683716, | |
| "logps/chosen": -429.0, | |
| "logps/rejected": -414.29998779296875, | |
| "loss": 0.0095, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -1.340087890625, | |
| "rewards/margins": 10.671875, | |
| "rewards/rejected": -12.015625, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 2.431957857769974, | |
| "grad_norm": 0.12784068177739766, | |
| "learning_rate": 3.9223002633889375e-07, | |
| "logits/chosen": -2.7945313453674316, | |
| "logits/rejected": -3.2171874046325684, | |
| "logps/chosen": -502.6000061035156, | |
| "logps/rejected": -423.3999938964844, | |
| "loss": 0.0024, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.700781226158142, | |
| "rewards/margins": 11.206250190734863, | |
| "rewards/rejected": -12.899999618530273, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 2.440737489025461, | |
| "grad_norm": 7.938389431158632, | |
| "learning_rate": 3.9003511852502193e-07, | |
| "logits/chosen": -2.85546875, | |
| "logits/rejected": -3.266406297683716, | |
| "logps/chosen": -430.95001220703125, | |
| "logps/rejected": -364.6499938964844, | |
| "loss": 0.0125, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -1.839135766029358, | |
| "rewards/margins": 11.03125, | |
| "rewards/rejected": -12.875, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 2.449517120280948, | |
| "grad_norm": 14.998778482568488, | |
| "learning_rate": 3.8784021071115015e-07, | |
| "logits/chosen": -2.8882813453674316, | |
| "logits/rejected": -3.35546875, | |
| "logps/chosen": -489.29998779296875, | |
| "logps/rejected": -411.8999938964844, | |
| "loss": 0.0229, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -2.9105162620544434, | |
| "rewards/margins": 10.8984375, | |
| "rewards/rejected": -13.8125, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 2.4582967515364356, | |
| "grad_norm": 1.7565187000826334, | |
| "learning_rate": 3.8564530289727833e-07, | |
| "logits/chosen": -2.78515625, | |
| "logits/rejected": -3.2281250953674316, | |
| "logps/chosen": -435.5, | |
| "logps/rejected": -379.79998779296875, | |
| "loss": 0.0013, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.641796827316284, | |
| "rewards/margins": 11.184374809265137, | |
| "rewards/rejected": -13.837499618530273, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 2.4670763827919227, | |
| "grad_norm": 0.341595067850225, | |
| "learning_rate": 3.834503950834065e-07, | |
| "logits/chosen": -2.7679686546325684, | |
| "logits/rejected": -3.137500047683716, | |
| "logps/chosen": -445.5, | |
| "logps/rejected": -389.1499938964844, | |
| "loss": 0.0121, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -2.1552734375, | |
| "rewards/margins": 10.665624618530273, | |
| "rewards/rejected": -12.806249618530273, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 2.47585601404741, | |
| "grad_norm": 0.3086115567290912, | |
| "learning_rate": 3.8125548726953467e-07, | |
| "logits/chosen": -2.753124952316284, | |
| "logits/rejected": -3.176562547683716, | |
| "logps/chosen": -475.70001220703125, | |
| "logps/rejected": -385.70001220703125, | |
| "loss": 0.0056, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.162426710128784, | |
| "rewards/margins": 10.217187881469727, | |
| "rewards/rejected": -12.381250381469727, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 2.4846356453028973, | |
| "grad_norm": 3.008743939193885, | |
| "learning_rate": 3.7906057945566285e-07, | |
| "logits/chosen": -2.813281297683716, | |
| "logits/rejected": -3.2015624046325684, | |
| "logps/chosen": -493.3999938964844, | |
| "logps/rejected": -422.0, | |
| "loss": 0.003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.0386719703674316, | |
| "rewards/margins": 11.518750190734863, | |
| "rewards/rejected": -13.553125381469727, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 2.493415276558385, | |
| "grad_norm": 0.28240199788216075, | |
| "learning_rate": 3.76865671641791e-07, | |
| "logits/chosen": -2.7445311546325684, | |
| "logits/rejected": -3.05078125, | |
| "logps/chosen": -471.8999938964844, | |
| "logps/rejected": -413.8999938964844, | |
| "loss": 0.0141, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -2.1889891624450684, | |
| "rewards/margins": 10.887499809265137, | |
| "rewards/rejected": -13.081250190734863, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 2.502194907813872, | |
| "grad_norm": 1.569333218820518, | |
| "learning_rate": 3.7467076382791925e-07, | |
| "logits/chosen": -2.850781202316284, | |
| "logits/rejected": -3.149218797683716, | |
| "logps/chosen": -434.3999938964844, | |
| "logps/rejected": -441.20001220703125, | |
| "loss": 0.0035, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.505859375, | |
| "rewards/margins": 11.535937309265137, | |
| "rewards/rejected": -15.037500381469727, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 2.510974539069359, | |
| "grad_norm": 0.2789923961660158, | |
| "learning_rate": 3.724758560140474e-07, | |
| "logits/chosen": -2.8296875953674316, | |
| "logits/rejected": -3.1500000953674316, | |
| "logps/chosen": -447.8999938964844, | |
| "logps/rejected": -423.29998779296875, | |
| "loss": 0.01, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.0689454078674316, | |
| "rewards/margins": 11.237500190734863, | |
| "rewards/rejected": -14.300000190734863, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 2.5197541703248465, | |
| "grad_norm": 0.4753340620513279, | |
| "learning_rate": 3.702809482001756e-07, | |
| "logits/chosen": -2.858593702316284, | |
| "logits/rejected": -3.129687547683716, | |
| "logps/chosen": -484.1000061035156, | |
| "logps/rejected": -421.20001220703125, | |
| "loss": 0.0009, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.6117186546325684, | |
| "rewards/margins": 11.662500381469727, | |
| "rewards/rejected": -14.262499809265137, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 2.5285338015803336, | |
| "grad_norm": 1.0295897199859099, | |
| "learning_rate": 3.6808604038630377e-07, | |
| "logits/chosen": -2.856250047683716, | |
| "logits/rejected": -3.2750000953674316, | |
| "logps/chosen": -487.1000061035156, | |
| "logps/rejected": -439.20001220703125, | |
| "loss": 0.0124, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -2.9287109375, | |
| "rewards/margins": 11.756250381469727, | |
| "rewards/rejected": -14.684374809265137, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 2.5373134328358207, | |
| "grad_norm": 0.12552949327545365, | |
| "learning_rate": 3.6589113257243194e-07, | |
| "logits/chosen": -2.8968749046325684, | |
| "logits/rejected": -3.2046875953674316, | |
| "logps/chosen": -480.6000061035156, | |
| "logps/rejected": -426.5, | |
| "loss": 0.0017, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.5074219703674316, | |
| "rewards/margins": 12.118749618530273, | |
| "rewards/rejected": -15.612500190734863, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 2.546093064091308, | |
| "grad_norm": 11.535142224971468, | |
| "learning_rate": 3.636962247585601e-07, | |
| "logits/chosen": -2.7578125, | |
| "logits/rejected": -3.184375047683716, | |
| "logps/chosen": -523.7000122070312, | |
| "logps/rejected": -426.3999938964844, | |
| "loss": 0.0133, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -2.697070360183716, | |
| "rewards/margins": 11.251562118530273, | |
| "rewards/rejected": -13.953125, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 2.5548726953467953, | |
| "grad_norm": 1.3369389375281253, | |
| "learning_rate": 3.6150131694468834e-07, | |
| "logits/chosen": -2.858593702316284, | |
| "logits/rejected": -3.0882811546325684, | |
| "logps/chosen": -428.25, | |
| "logps/rejected": -428.6000061035156, | |
| "loss": 0.0015, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.26708984375, | |
| "rewards/margins": 11.596875190734863, | |
| "rewards/rejected": -13.862500190734863, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 2.563652326602283, | |
| "grad_norm": 0.12741373891156255, | |
| "learning_rate": 3.593064091308165e-07, | |
| "logits/chosen": -2.840625047683716, | |
| "logits/rejected": -3.215625047683716, | |
| "logps/chosen": -508.8999938964844, | |
| "logps/rejected": -448.25, | |
| "loss": 0.0113, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -2.0849609375, | |
| "rewards/margins": 11.515625, | |
| "rewards/rejected": -13.59375, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 2.57243195785777, | |
| "grad_norm": 1.7791509170089177, | |
| "learning_rate": 3.571115013169447e-07, | |
| "logits/chosen": -2.850781202316284, | |
| "logits/rejected": -3.1734375953674316, | |
| "logps/chosen": -472.6499938964844, | |
| "logps/rejected": -426.6000061035156, | |
| "loss": 0.0033, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.8910155296325684, | |
| "rewards/margins": 11.371874809265137, | |
| "rewards/rejected": -14.259374618530273, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 2.5812115891132574, | |
| "grad_norm": 0.2907557433975636, | |
| "learning_rate": 3.5491659350307286e-07, | |
| "logits/chosen": -2.8359375, | |
| "logits/rejected": -3.0765624046325684, | |
| "logps/chosen": -467.79998779296875, | |
| "logps/rejected": -441.79998779296875, | |
| "loss": 0.0019, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.855175733566284, | |
| "rewards/margins": 11.368749618530273, | |
| "rewards/rejected": -14.228124618530273, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 2.5899912203687445, | |
| "grad_norm": 1.0476924646665897, | |
| "learning_rate": 3.5272168568920103e-07, | |
| "logits/chosen": -2.727343797683716, | |
| "logits/rejected": -3.180468797683716, | |
| "logps/chosen": -464.5, | |
| "logps/rejected": -406.8999938964844, | |
| "loss": 0.0084, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -2.3169798851013184, | |
| "rewards/margins": 11.553125381469727, | |
| "rewards/rejected": -13.865625381469727, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 2.5987708516242316, | |
| "grad_norm": 9.104389180024516, | |
| "learning_rate": 3.505267778753292e-07, | |
| "logits/chosen": -2.9476561546325684, | |
| "logits/rejected": -3.44140625, | |
| "logps/chosen": -449.75, | |
| "logps/rejected": -373.79998779296875, | |
| "loss": 0.0281, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -2.9820313453674316, | |
| "rewards/margins": 10.881250381469727, | |
| "rewards/rejected": -13.856249809265137, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 2.607550482879719, | |
| "grad_norm": 25.49941033653934, | |
| "learning_rate": 3.4833187006145743e-07, | |
| "logits/chosen": -2.8851561546325684, | |
| "logits/rejected": -3.385937452316284, | |
| "logps/chosen": -506.6000061035156, | |
| "logps/rejected": -430.70001220703125, | |
| "loss": 0.0025, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.6386475563049316, | |
| "rewards/margins": 11.34375, | |
| "rewards/rejected": -14.981249809265137, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 2.616330114135206, | |
| "grad_norm": 0.6357750064402232, | |
| "learning_rate": 3.461369622475856e-07, | |
| "logits/chosen": -2.815624952316284, | |
| "logits/rejected": -3.2109375, | |
| "logps/chosen": -494.70001220703125, | |
| "logps/rejected": -411.0, | |
| "loss": 0.0111, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -2.4117188453674316, | |
| "rewards/margins": 11.503125190734863, | |
| "rewards/rejected": -13.909375190734863, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 2.6251097453906937, | |
| "grad_norm": 12.462486367828244, | |
| "learning_rate": 3.439420544337138e-07, | |
| "logits/chosen": -2.7679686546325684, | |
| "logits/rejected": -3.109375, | |
| "logps/chosen": -472.8999938964844, | |
| "logps/rejected": -466.3999938964844, | |
| "loss": 0.0021, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.8158690929412842, | |
| "rewards/margins": 11.871874809265137, | |
| "rewards/rejected": -13.6875, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 2.633889376646181, | |
| "grad_norm": 0.6622488809590633, | |
| "learning_rate": 3.4174714661984195e-07, | |
| "logits/chosen": -2.90625, | |
| "logits/rejected": -3.272656202316284, | |
| "logps/chosen": -507.1000061035156, | |
| "logps/rejected": -424.20001220703125, | |
| "loss": 0.0322, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -3.3148436546325684, | |
| "rewards/margins": 11.407812118530273, | |
| "rewards/rejected": -14.71875, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.6426690079016684, | |
| "grad_norm": 17.74994267414667, | |
| "learning_rate": 3.395522388059701e-07, | |
| "logits/chosen": -2.96875, | |
| "logits/rejected": -3.3101563453674316, | |
| "logps/chosen": -444.0, | |
| "logps/rejected": -430.29998779296875, | |
| "loss": 0.0088, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.209606885910034, | |
| "rewards/margins": 10.965624809265137, | |
| "rewards/rejected": -14.162500381469727, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 2.6514486391571555, | |
| "grad_norm": 1.6508557021991002, | |
| "learning_rate": 3.3735733099209835e-07, | |
| "logits/chosen": -2.7171874046325684, | |
| "logits/rejected": -3.1539063453674316, | |
| "logps/chosen": -521.7999877929688, | |
| "logps/rejected": -429.3999938964844, | |
| "loss": 0.0016, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.3973631858825684, | |
| "rewards/margins": 11.565625190734863, | |
| "rewards/rejected": -13.962499618530273, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 2.6602282704126425, | |
| "grad_norm": 2.4941847948107765, | |
| "learning_rate": 3.351624231782265e-07, | |
| "logits/chosen": -2.8359375, | |
| "logits/rejected": -3.3125, | |
| "logps/chosen": -461.45001220703125, | |
| "logps/rejected": -391.3999938964844, | |
| "loss": 0.0106, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -2.6383941173553467, | |
| "rewards/margins": 10.6796875, | |
| "rewards/rejected": -13.324999809265137, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 2.66900790166813, | |
| "grad_norm": 0.46773944577867005, | |
| "learning_rate": 3.329675153643547e-07, | |
| "logits/chosen": -2.7796874046325684, | |
| "logits/rejected": -3.0296874046325684, | |
| "logps/chosen": -473.70001220703125, | |
| "logps/rejected": -428.5, | |
| "loss": 0.001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.0389404296875, | |
| "rewards/margins": 11.693750381469727, | |
| "rewards/rejected": -13.728124618530273, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 2.677787532923617, | |
| "grad_norm": 7.796743106497015, | |
| "learning_rate": 3.3077260755048287e-07, | |
| "logits/chosen": -2.70703125, | |
| "logits/rejected": -3.0531249046325684, | |
| "logps/chosen": -510.1000061035156, | |
| "logps/rejected": -426.8999938964844, | |
| "loss": 0.0025, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.334277391433716, | |
| "rewards/margins": 11.012499809265137, | |
| "rewards/rejected": -13.340624809265137, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 2.6865671641791042, | |
| "grad_norm": 149.5400140191504, | |
| "learning_rate": 3.2857769973661104e-07, | |
| "logits/chosen": -2.819531202316284, | |
| "logits/rejected": -3.1507811546325684, | |
| "logps/chosen": -508.20001220703125, | |
| "logps/rejected": -429.1000061035156, | |
| "loss": 0.0251, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -2.698779344558716, | |
| "rewards/margins": 11.685937881469727, | |
| "rewards/rejected": -14.387499809265137, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 2.6953467954345918, | |
| "grad_norm": 2.294379394738642, | |
| "learning_rate": 3.263827919227392e-07, | |
| "logits/chosen": -2.754687547683716, | |
| "logits/rejected": -3.042187452316284, | |
| "logps/chosen": -467.1000061035156, | |
| "logps/rejected": -426.8999938964844, | |
| "loss": 0.0181, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -3.1273436546325684, | |
| "rewards/margins": 11.746874809265137, | |
| "rewards/rejected": -14.878125190734863, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 2.704126426690079, | |
| "grad_norm": 0.5390688743726256, | |
| "learning_rate": 3.2418788410886744e-07, | |
| "logits/chosen": -2.8515625, | |
| "logits/rejected": -3.12109375, | |
| "logps/chosen": -478.8500061035156, | |
| "logps/rejected": -417.70001220703125, | |
| "loss": 0.0028, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.1832032203674316, | |
| "rewards/margins": 11.684374809265137, | |
| "rewards/rejected": -14.887499809265137, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 2.7129060579455664, | |
| "grad_norm": 0.2930461290342309, | |
| "learning_rate": 3.219929762949956e-07, | |
| "logits/chosen": -2.7109375, | |
| "logits/rejected": -2.9144530296325684, | |
| "logps/chosen": -461.45001220703125, | |
| "logps/rejected": -432.20001220703125, | |
| "loss": 0.0066, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.153515577316284, | |
| "rewards/margins": 11.628125190734863, | |
| "rewards/rejected": -13.787500381469727, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 2.7216856892010535, | |
| "grad_norm": 0.8512322206144427, | |
| "learning_rate": 3.197980684811238e-07, | |
| "logits/chosen": -2.813281297683716, | |
| "logits/rejected": -3.2750000953674316, | |
| "logps/chosen": -471.6000061035156, | |
| "logps/rejected": -413.5, | |
| "loss": 0.0041, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.836328148841858, | |
| "rewards/margins": 11.196874618530273, | |
| "rewards/rejected": -13.043749809265137, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 2.730465320456541, | |
| "grad_norm": 0.5905195511439048, | |
| "learning_rate": 3.1760316066725196e-07, | |
| "logits/chosen": -2.6429686546325684, | |
| "logits/rejected": -3.124218702316284, | |
| "logps/chosen": -437.0, | |
| "logps/rejected": -411.79998779296875, | |
| "loss": 0.0074, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -1.979760766029358, | |
| "rewards/margins": 11.25, | |
| "rewards/rejected": -13.21875, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 2.739244951712028, | |
| "grad_norm": 1.3860649479622287, | |
| "learning_rate": 3.1540825285338014e-07, | |
| "logits/chosen": -2.9164061546325684, | |
| "logits/rejected": -3.3031249046325684, | |
| "logps/chosen": -489.20001220703125, | |
| "logps/rejected": -410.29998779296875, | |
| "loss": 0.0349, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -2.767773389816284, | |
| "rewards/margins": 11.106249809265137, | |
| "rewards/rejected": -13.881250381469727, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 2.748024582967515, | |
| "grad_norm": 0.5854640407417739, | |
| "learning_rate": 3.132133450395083e-07, | |
| "logits/chosen": -2.59375, | |
| "logits/rejected": -3.090625047683716, | |
| "logps/chosen": -472.25, | |
| "logps/rejected": -410.29998779296875, | |
| "loss": 0.0037, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.4012694358825684, | |
| "rewards/margins": 11.078125, | |
| "rewards/rejected": -13.481249809265137, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 2.7568042142230027, | |
| "grad_norm": 0.271113356300562, | |
| "learning_rate": 3.1101843722563654e-07, | |
| "logits/chosen": -2.82421875, | |
| "logits/rejected": -3.1851563453674316, | |
| "logps/chosen": -513.5999755859375, | |
| "logps/rejected": -429.70001220703125, | |
| "loss": 0.0047, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.95703125, | |
| "rewards/margins": 11.831250190734863, | |
| "rewards/rejected": -14.806249618530273, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 2.76558384547849, | |
| "grad_norm": 34.269855016463474, | |
| "learning_rate": 3.088235294117647e-07, | |
| "logits/chosen": -3.0484375953674316, | |
| "logits/rejected": -3.395312547683716, | |
| "logps/chosen": -468.75, | |
| "logps/rejected": -441.70001220703125, | |
| "loss": 0.0196, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -4.3515625, | |
| "rewards/margins": 11.784375190734863, | |
| "rewards/rejected": -16.137500762939453, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 2.7743634767339773, | |
| "grad_norm": 0.18088218776182413, | |
| "learning_rate": 3.066286215978929e-07, | |
| "logits/chosen": -2.702343702316284, | |
| "logits/rejected": -3.0570311546325684, | |
| "logps/chosen": -509.5, | |
| "logps/rejected": -448.5, | |
| "loss": 0.0047, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.2892088890075684, | |
| "rewards/margins": 11.959375381469727, | |
| "rewards/rejected": -14.243749618530273, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 2.7831431079894644, | |
| "grad_norm": 3.5616361764442743, | |
| "learning_rate": 3.0443371378402106e-07, | |
| "logits/chosen": -2.729687452316284, | |
| "logits/rejected": -3.254687547683716, | |
| "logps/chosen": -493.0, | |
| "logps/rejected": -406.20001220703125, | |
| "loss": 0.002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.224902391433716, | |
| "rewards/margins": 11.518750190734863, | |
| "rewards/rejected": -13.743749618530273, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 2.791922739244952, | |
| "grad_norm": 0.7910837882655565, | |
| "learning_rate": 3.0223880597014923e-07, | |
| "logits/chosen": -2.774609327316284, | |
| "logits/rejected": -3.2164063453674316, | |
| "logps/chosen": -524.7999877929688, | |
| "logps/rejected": -430.29998779296875, | |
| "loss": 0.004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.0347657203674316, | |
| "rewards/margins": 10.639062881469727, | |
| "rewards/rejected": -12.671875, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 2.800702370500439, | |
| "grad_norm": 1.2630786654471164, | |
| "learning_rate": 3.000438981562774e-07, | |
| "logits/chosen": -2.74609375, | |
| "logits/rejected": -3.1742186546325684, | |
| "logps/chosen": -462.54998779296875, | |
| "logps/rejected": -444.8999938964844, | |
| "loss": 0.0029, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.286328077316284, | |
| "rewards/margins": 11.484375, | |
| "rewards/rejected": -13.762499809265137, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 2.809482001755926, | |
| "grad_norm": 0.3941320072196412, | |
| "learning_rate": 2.9784899034240563e-07, | |
| "logits/chosen": -2.8304686546325684, | |
| "logits/rejected": -3.268749952316284, | |
| "logps/chosen": -471.1000061035156, | |
| "logps/rejected": -432.79998779296875, | |
| "loss": 0.0083, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.32177734375, | |
| "rewards/margins": 11.768750190734863, | |
| "rewards/rejected": -15.090624809265137, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 2.8182616330114136, | |
| "grad_norm": 0.3071148935777903, | |
| "learning_rate": 2.956540825285338e-07, | |
| "logits/chosen": -2.749218702316284, | |
| "logits/rejected": -3.198437452316284, | |
| "logps/chosen": -468.79998779296875, | |
| "logps/rejected": -430.79998779296875, | |
| "loss": 0.0015, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.77734375, | |
| "rewards/margins": 11.734375, | |
| "rewards/rejected": -15.512499809265137, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 2.8270412642669007, | |
| "grad_norm": 0.5084614550875632, | |
| "learning_rate": 2.93459174714662e-07, | |
| "logits/chosen": -2.87109375, | |
| "logits/rejected": -3.3648438453674316, | |
| "logps/chosen": -489.29998779296875, | |
| "logps/rejected": -413.20001220703125, | |
| "loss": 0.0058, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.2464842796325684, | |
| "rewards/margins": 11.649999618530273, | |
| "rewards/rejected": -14.896875381469727, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 2.835820895522388, | |
| "grad_norm": 24.3273411326105, | |
| "learning_rate": 2.9126426690079015e-07, | |
| "logits/chosen": -2.961718797683716, | |
| "logits/rejected": -3.401562452316284, | |
| "logps/chosen": -399.6499938964844, | |
| "logps/rejected": -395.6000061035156, | |
| "loss": 0.0019, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.007617235183716, | |
| "rewards/margins": 11.540624618530273, | |
| "rewards/rejected": -14.553125381469727, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 2.8446005267778753, | |
| "grad_norm": 3.314565321429036, | |
| "learning_rate": 2.890693590869183e-07, | |
| "logits/chosen": -2.6656250953674316, | |
| "logits/rejected": -3.0718750953674316, | |
| "logps/chosen": -487.5, | |
| "logps/rejected": -427.5, | |
| "loss": 0.0026, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.892187476158142, | |
| "rewards/margins": 11.556249618530273, | |
| "rewards/rejected": -13.453125, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 2.853380158033363, | |
| "grad_norm": 28.686295534814914, | |
| "learning_rate": 2.868744512730465e-07, | |
| "logits/chosen": -2.7359375953674316, | |
| "logits/rejected": -3.233593702316284, | |
| "logps/chosen": -465.79998779296875, | |
| "logps/rejected": -410.79998779296875, | |
| "loss": 0.0031, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.409374952316284, | |
| "rewards/margins": 11.899999618530273, | |
| "rewards/rejected": -14.303125381469727, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 2.86215978928885, | |
| "grad_norm": 7.789187486173813, | |
| "learning_rate": 2.846795434591747e-07, | |
| "logits/chosen": -2.7984375953674316, | |
| "logits/rejected": -3.1656250953674316, | |
| "logps/chosen": -487.8999938964844, | |
| "logps/rejected": -417.6000061035156, | |
| "loss": 0.0088, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.659960985183716, | |
| "rewards/margins": 11.609375, | |
| "rewards/rejected": -14.278124809265137, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 2.870939420544337, | |
| "grad_norm": 29.108873641287463, | |
| "learning_rate": 2.824846356453029e-07, | |
| "logits/chosen": -2.765625, | |
| "logits/rejected": -3.1890625953674316, | |
| "logps/chosen": -452.29998779296875, | |
| "logps/rejected": -428.20001220703125, | |
| "loss": 0.0194, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -2.977343797683716, | |
| "rewards/margins": 11.050000190734863, | |
| "rewards/rejected": -14.024999618530273, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 2.8797190517998246, | |
| "grad_norm": 0.6865625237809583, | |
| "learning_rate": 2.8028972783143107e-07, | |
| "logits/chosen": -2.668750047683716, | |
| "logits/rejected": -3.0523438453674316, | |
| "logps/chosen": -534.9000244140625, | |
| "logps/rejected": -444.5, | |
| "loss": 0.0024, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.4755859375, | |
| "rewards/margins": 12.193750381469727, | |
| "rewards/rejected": -14.668749809265137, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 2.8884986830553117, | |
| "grad_norm": 11.89063702085244, | |
| "learning_rate": 2.7809482001755924e-07, | |
| "logits/chosen": -2.8031249046325684, | |
| "logits/rejected": -3.246875047683716, | |
| "logps/chosen": -533.4000244140625, | |
| "logps/rejected": -439.79998779296875, | |
| "loss": 0.014, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -2.4878907203674316, | |
| "rewards/margins": 11.518750190734863, | |
| "rewards/rejected": -14.003125190734863, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 2.8972783143107987, | |
| "grad_norm": 0.37840473401428426, | |
| "learning_rate": 2.758999122036874e-07, | |
| "logits/chosen": -2.8296875953674316, | |
| "logits/rejected": -3.260937452316284, | |
| "logps/chosen": -453.1000061035156, | |
| "logps/rejected": -409.1000061035156, | |
| "loss": 0.0304, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -2.133007764816284, | |
| "rewards/margins": 11.653124809265137, | |
| "rewards/rejected": -13.78125, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 2.9060579455662863, | |
| "grad_norm": 0.16595628402576, | |
| "learning_rate": 2.7370500438981564e-07, | |
| "logits/chosen": -2.8531250953674316, | |
| "logits/rejected": -3.231250047683716, | |
| "logps/chosen": -423.8500061035156, | |
| "logps/rejected": -414.1000061035156, | |
| "loss": 0.0024, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.3470702171325684, | |
| "rewards/margins": 11.168749809265137, | |
| "rewards/rejected": -13.512499809265137, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 2.9148375768217734, | |
| "grad_norm": 0.45341250829508684, | |
| "learning_rate": 2.715100965759438e-07, | |
| "logits/chosen": -2.8515625, | |
| "logits/rejected": -3.274218797683716, | |
| "logps/chosen": -516.5, | |
| "logps/rejected": -402.1000061035156, | |
| "loss": 0.0008, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.4305663108825684, | |
| "rewards/margins": 11.706250190734863, | |
| "rewards/rejected": -14.149999618530273, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 2.923617208077261, | |
| "grad_norm": 18.752800779260905, | |
| "learning_rate": 2.69315188762072e-07, | |
| "logits/chosen": -2.821093797683716, | |
| "logits/rejected": -3.246875047683716, | |
| "logps/chosen": -474.0, | |
| "logps/rejected": -414.0, | |
| "loss": 0.007, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -2.3584961891174316, | |
| "rewards/margins": 12.050000190734863, | |
| "rewards/rejected": -14.409375190734863, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 2.932396839332748, | |
| "grad_norm": 0.2712213729846258, | |
| "learning_rate": 2.6712028094820016e-07, | |
| "logits/chosen": -2.8539061546325684, | |
| "logits/rejected": -3.1859374046325684, | |
| "logps/chosen": -513.5999755859375, | |
| "logps/rejected": -430.70001220703125, | |
| "loss": 0.0011, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.98095703125, | |
| "rewards/margins": 12.356249809265137, | |
| "rewards/rejected": -15.328125, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 2.9411764705882355, | |
| "grad_norm": 1.4837851904643853, | |
| "learning_rate": 2.6492537313432834e-07, | |
| "logits/chosen": -2.98046875, | |
| "logits/rejected": -3.4320311546325684, | |
| "logps/chosen": -419.1000061035156, | |
| "logps/rejected": -374.8999938964844, | |
| "loss": 0.0078, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.9925780296325684, | |
| "rewards/margins": 11.024999618530273, | |
| "rewards/rejected": -14.015625, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 2.9499561018437226, | |
| "grad_norm": 0.5966690636532355, | |
| "learning_rate": 2.627304653204565e-07, | |
| "logits/chosen": -2.9429688453674316, | |
| "logits/rejected": -3.34765625, | |
| "logps/chosen": -479.8999938964844, | |
| "logps/rejected": -446.6000061035156, | |
| "loss": 0.005, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.4771971702575684, | |
| "rewards/margins": 12.046875, | |
| "rewards/rejected": -15.537500381469727, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 2.9587357330992097, | |
| "grad_norm": 0.7037761701425428, | |
| "learning_rate": 2.6053555750658474e-07, | |
| "logits/chosen": -2.8265624046325684, | |
| "logits/rejected": -3.30859375, | |
| "logps/chosen": -489.3500061035156, | |
| "logps/rejected": -447.0, | |
| "loss": 0.0133, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -3.331591844558716, | |
| "rewards/margins": 12.596875190734863, | |
| "rewards/rejected": -15.9375, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 2.967515364354697, | |
| "grad_norm": 4.5995831378514564, | |
| "learning_rate": 2.583406496927129e-07, | |
| "logits/chosen": -2.8671875, | |
| "logits/rejected": -3.1796875, | |
| "logps/chosen": -469.1000061035156, | |
| "logps/rejected": -450.20001220703125, | |
| "loss": 0.0032, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.9496092796325684, | |
| "rewards/margins": 12.296875, | |
| "rewards/rejected": -15.259374618530273, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 2.9762949956101843, | |
| "grad_norm": 1.9942442507224407, | |
| "learning_rate": 2.561457418788411e-07, | |
| "logits/chosen": -2.729687452316284, | |
| "logits/rejected": -3.401562452316284, | |
| "logps/chosen": -499.6000061035156, | |
| "logps/rejected": -394.70001220703125, | |
| "loss": 0.005, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.23828125, | |
| "rewards/margins": 11.303125381469727, | |
| "rewards/rejected": -13.546875, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 2.9850746268656714, | |
| "grad_norm": 0.826774420947246, | |
| "learning_rate": 2.5395083406496926e-07, | |
| "logits/chosen": -2.7992186546325684, | |
| "logits/rejected": -3.25, | |
| "logps/chosen": -455.0, | |
| "logps/rejected": -406.3999938964844, | |
| "loss": 0.0046, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.381542921066284, | |
| "rewards/margins": 11.965624809265137, | |
| "rewards/rejected": -14.34375, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 2.993854258121159, | |
| "grad_norm": 0.21107534256101781, | |
| "learning_rate": 2.5175592625109743e-07, | |
| "logits/chosen": -2.8421874046325684, | |
| "logits/rejected": -3.336718797683716, | |
| "logps/chosen": -425.54998779296875, | |
| "logps/rejected": -428.1000061035156, | |
| "loss": 0.0008, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.1407227516174316, | |
| "rewards/margins": 12.328125, | |
| "rewards/rejected": -15.462499618530273, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 3.002633889376646, | |
| "grad_norm": 0.19105478413577648, | |
| "learning_rate": 2.4956101843722566e-07, | |
| "logits/chosen": -2.676953077316284, | |
| "logits/rejected": -3.2578125, | |
| "logps/chosen": -477.0, | |
| "logps/rejected": -429.1499938964844, | |
| "loss": 0.0042, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.495800733566284, | |
| "rewards/margins": 11.5625, | |
| "rewards/rejected": -14.056249618530273, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 3.0114135206321335, | |
| "grad_norm": 0.2897671146845593, | |
| "learning_rate": 2.4736611062335383e-07, | |
| "logits/chosen": -2.76171875, | |
| "logits/rejected": -3.1703124046325684, | |
| "logps/chosen": -527.0999755859375, | |
| "logps/rejected": -449.6000061035156, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.3095703125, | |
| "rewards/margins": 12.865625381469727, | |
| "rewards/rejected": -15.175000190734863, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 3.0201931518876206, | |
| "grad_norm": 0.13142725375963107, | |
| "learning_rate": 2.45171202809482e-07, | |
| "logits/chosen": -2.952343702316284, | |
| "logits/rejected": -3.26953125, | |
| "logps/chosen": -443.70001220703125, | |
| "logps/rejected": -444.1000061035156, | |
| "loss": 0.0259, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -3.306835889816284, | |
| "rewards/margins": 12.168749809265137, | |
| "rewards/rejected": -15.481249809265137, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 3.028972783143108, | |
| "grad_norm": 0.1804390208209594, | |
| "learning_rate": 2.429762949956102e-07, | |
| "logits/chosen": -2.70703125, | |
| "logits/rejected": -3.2992186546325684, | |
| "logps/chosen": -509.1000061035156, | |
| "logps/rejected": -423.79998779296875, | |
| "loss": 0.0007, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.696484327316284, | |
| "rewards/margins": 12.456250190734863, | |
| "rewards/rejected": -15.146875381469727, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 3.0377524143985952, | |
| "grad_norm": 1.5459458361152458, | |
| "learning_rate": 2.4078138718173835e-07, | |
| "logits/chosen": -2.91015625, | |
| "logits/rejected": -3.4976563453674316, | |
| "logps/chosen": -398.1499938964844, | |
| "logps/rejected": -395.70001220703125, | |
| "loss": 0.0007, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.157177686691284, | |
| "rewards/margins": 12.5625, | |
| "rewards/rejected": -14.71875, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 3.0465320456540823, | |
| "grad_norm": 0.29451979000339223, | |
| "learning_rate": 2.385864793678666e-07, | |
| "logits/chosen": -2.78515625, | |
| "logits/rejected": -3.194531202316284, | |
| "logps/chosen": -451.3500061035156, | |
| "logps/rejected": -419.8999938964844, | |
| "loss": 0.0044, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.0389161109924316, | |
| "rewards/margins": 12.375, | |
| "rewards/rejected": -14.396875381469727, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 3.05531167690957, | |
| "grad_norm": 0.22625894057676185, | |
| "learning_rate": 2.3639157155399472e-07, | |
| "logits/chosen": -2.859375, | |
| "logits/rejected": -3.3023438453674316, | |
| "logps/chosen": -486.29998779296875, | |
| "logps/rejected": -440.5, | |
| "loss": 0.0012, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.752148389816284, | |
| "rewards/margins": 12.478124618530273, | |
| "rewards/rejected": -15.221875190734863, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 3.064091308165057, | |
| "grad_norm": 0.433111943744663, | |
| "learning_rate": 2.341966637401229e-07, | |
| "logits/chosen": -2.9000000953674316, | |
| "logits/rejected": -3.335156202316284, | |
| "logps/chosen": -552.7999877929688, | |
| "logps/rejected": -435.5, | |
| "loss": 0.0006, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.7406249046325684, | |
| "rewards/margins": 13.028124809265137, | |
| "rewards/rejected": -15.774999618530273, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 3.0728709394205445, | |
| "grad_norm": 0.17475867501196074, | |
| "learning_rate": 2.320017559262511e-07, | |
| "logits/chosen": -2.8726563453674316, | |
| "logits/rejected": -3.2320313453674316, | |
| "logps/chosen": -474.75, | |
| "logps/rejected": -458.5, | |
| "loss": 0.0006, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.07421875, | |
| "rewards/margins": 12.300000190734863, | |
| "rewards/rejected": -15.375, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 3.0816505706760315, | |
| "grad_norm": 1.1673770666973489, | |
| "learning_rate": 2.2980684811237927e-07, | |
| "logits/chosen": -2.9242186546325684, | |
| "logits/rejected": -3.30078125, | |
| "logps/chosen": -495.8999938964844, | |
| "logps/rejected": -478.20001220703125, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.9310545921325684, | |
| "rewards/margins": 12.453125, | |
| "rewards/rejected": -15.387499809265137, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 3.090430201931519, | |
| "grad_norm": 1.8334127424859914, | |
| "learning_rate": 2.2761194029850744e-07, | |
| "logits/chosen": -2.8765625953674316, | |
| "logits/rejected": -3.499218702316284, | |
| "logps/chosen": -460.70001220703125, | |
| "logps/rejected": -415.29998779296875, | |
| "loss": 0.0006, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.2349610328674316, | |
| "rewards/margins": 12.709375381469727, | |
| "rewards/rejected": -14.931249618530273, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 3.099209833187006, | |
| "grad_norm": 0.0662337109745872, | |
| "learning_rate": 2.2541703248463564e-07, | |
| "logits/chosen": -2.848437547683716, | |
| "logits/rejected": -3.24609375, | |
| "logps/chosen": -494.5, | |
| "logps/rejected": -438.79998779296875, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.72412109375, | |
| "rewards/margins": 12.690625190734863, | |
| "rewards/rejected": -15.421875, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 3.1079894644424932, | |
| "grad_norm": 0.17196436735775314, | |
| "learning_rate": 2.2322212467076381e-07, | |
| "logits/chosen": -2.953125, | |
| "logits/rejected": -3.3843750953674316, | |
| "logps/chosen": -461.3500061035156, | |
| "logps/rejected": -419.5, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.92626953125, | |
| "rewards/margins": 12.571874618530273, | |
| "rewards/rejected": -15.5, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 3.1167690956979808, | |
| "grad_norm": 11.34050266346943, | |
| "learning_rate": 2.21027216856892e-07, | |
| "logits/chosen": -2.932812452316284, | |
| "logits/rejected": -3.453125, | |
| "logps/chosen": -472.04998779296875, | |
| "logps/rejected": -416.8999938964844, | |
| "loss": 0.001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.991406202316284, | |
| "rewards/margins": 13.496874809265137, | |
| "rewards/rejected": -16.484375, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 3.125548726953468, | |
| "grad_norm": 2.835787372901712, | |
| "learning_rate": 2.188323090430202e-07, | |
| "logits/chosen": -2.8671875, | |
| "logits/rejected": -3.4242186546325684, | |
| "logps/chosen": -440.45001220703125, | |
| "logps/rejected": -394.1000061035156, | |
| "loss": 0.0007, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.6015625, | |
| "rewards/margins": 12.134374618530273, | |
| "rewards/rejected": -14.737500190734863, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 3.1343283582089554, | |
| "grad_norm": 0.2898615380402753, | |
| "learning_rate": 2.1663740122914836e-07, | |
| "logits/chosen": -2.809375047683716, | |
| "logits/rejected": -3.27734375, | |
| "logps/chosen": -472.1000061035156, | |
| "logps/rejected": -454.6000061035156, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.3189697265625, | |
| "rewards/margins": 12.465624809265137, | |
| "rewards/rejected": -14.787500381469727, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 3.1431079894644425, | |
| "grad_norm": 0.22346892383750283, | |
| "learning_rate": 2.1444249341527653e-07, | |
| "logits/chosen": -2.819531202316284, | |
| "logits/rejected": -3.2406249046325684, | |
| "logps/chosen": -458.70001220703125, | |
| "logps/rejected": -412.6000061035156, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.282910108566284, | |
| "rewards/margins": 13.006250381469727, | |
| "rewards/rejected": -15.300000190734863, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 3.1518876207199296, | |
| "grad_norm": 0.06165003089752172, | |
| "learning_rate": 2.1224758560140473e-07, | |
| "logits/chosen": -2.8843750953674316, | |
| "logits/rejected": -3.4000000953674316, | |
| "logps/chosen": -436.25, | |
| "logps/rejected": -404.70001220703125, | |
| "loss": 0.0039, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.8697266578674316, | |
| "rewards/margins": 12.5625, | |
| "rewards/rejected": -15.434374809265137, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 3.160667251975417, | |
| "grad_norm": 0.35852430505146193, | |
| "learning_rate": 2.100526777875329e-07, | |
| "logits/chosen": -2.905468702316284, | |
| "logits/rejected": -3.313281297683716, | |
| "logps/chosen": -464.1000061035156, | |
| "logps/rejected": -479.8999938964844, | |
| "loss": 0.0006, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.8792967796325684, | |
| "rewards/margins": 13.137499809265137, | |
| "rewards/rejected": -16.021875381469727, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 3.169446883230904, | |
| "grad_norm": 0.6426315052360575, | |
| "learning_rate": 2.0785776997366108e-07, | |
| "logits/chosen": -2.995312452316284, | |
| "logits/rejected": -3.46875, | |
| "logps/chosen": -429.1000061035156, | |
| "logps/rejected": -421.5, | |
| "loss": 0.0043, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.863818407058716, | |
| "rewards/margins": 12.074999809265137, | |
| "rewards/rejected": -14.9375, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 3.1782265144863917, | |
| "grad_norm": 0.2104731802633413, | |
| "learning_rate": 2.0566286215978928e-07, | |
| "logits/chosen": -2.723437547683716, | |
| "logits/rejected": -3.2964844703674316, | |
| "logps/chosen": -528.0999755859375, | |
| "logps/rejected": -425.8999938964844, | |
| "loss": 0.0009, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.5757813453674316, | |
| "rewards/margins": 12.834375381469727, | |
| "rewards/rejected": -15.415624618530273, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 3.187006145741879, | |
| "grad_norm": 0.21403310051401384, | |
| "learning_rate": 2.0346795434591745e-07, | |
| "logits/chosen": -2.922656297683716, | |
| "logits/rejected": -3.47265625, | |
| "logps/chosen": -483.3500061035156, | |
| "logps/rejected": -429.3999938964844, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.660595655441284, | |
| "rewards/margins": 12.615625381469727, | |
| "rewards/rejected": -15.274999618530273, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 3.195785776997366, | |
| "grad_norm": 0.11778338364967891, | |
| "learning_rate": 2.0127304653204563e-07, | |
| "logits/chosen": -2.9906249046325684, | |
| "logits/rejected": -3.38671875, | |
| "logps/chosen": -503.8999938964844, | |
| "logps/rejected": -439.6000061035156, | |
| "loss": 0.0011, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.6205077171325684, | |
| "rewards/margins": 12.443750381469727, | |
| "rewards/rejected": -15.068750381469727, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 3.2045654082528534, | |
| "grad_norm": 1.2168006025364513, | |
| "learning_rate": 1.9907813871817383e-07, | |
| "logits/chosen": -2.82421875, | |
| "logits/rejected": -3.30078125, | |
| "logps/chosen": -473.45001220703125, | |
| "logps/rejected": -408.79998779296875, | |
| "loss": 0.0008, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.6533203125, | |
| "rewards/margins": 11.981249809265137, | |
| "rewards/rejected": -14.640625, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 3.2133450395083405, | |
| "grad_norm": 0.31806520029939034, | |
| "learning_rate": 1.96883230904302e-07, | |
| "logits/chosen": -2.9124999046325684, | |
| "logits/rejected": -3.325000047683716, | |
| "logps/chosen": -460.3999938964844, | |
| "logps/rejected": -434.0, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.6673827171325684, | |
| "rewards/margins": 12.850000381469727, | |
| "rewards/rejected": -15.515625, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 3.222124670763828, | |
| "grad_norm": 0.0645329869839452, | |
| "learning_rate": 1.946883230904302e-07, | |
| "logits/chosen": -2.768749952316284, | |
| "logits/rejected": -3.24609375, | |
| "logps/chosen": -485.0, | |
| "logps/rejected": -485.5, | |
| "loss": 0.0004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.003857374191284, | |
| "rewards/margins": 13.121874809265137, | |
| "rewards/rejected": -16.125, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 3.230904302019315, | |
| "grad_norm": 0.24265794854362419, | |
| "learning_rate": 1.9249341527655837e-07, | |
| "logits/chosen": -2.921093702316284, | |
| "logits/rejected": -3.3343749046325684, | |
| "logps/chosen": -481.1000061035156, | |
| "logps/rejected": -446.0, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.3812012672424316, | |
| "rewards/margins": 12.559374809265137, | |
| "rewards/rejected": -14.943750381469727, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 3.2396839332748026, | |
| "grad_norm": 0.17333913417238933, | |
| "learning_rate": 1.9029850746268655e-07, | |
| "logits/chosen": -2.835156202316284, | |
| "logits/rejected": -3.473437547683716, | |
| "logps/chosen": -474.6499938964844, | |
| "logps/rejected": -402.20001220703125, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.8431639671325684, | |
| "rewards/margins": 12.606249809265137, | |
| "rewards/rejected": -15.446874618530273, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 3.2484635645302897, | |
| "grad_norm": 0.1394520929874817, | |
| "learning_rate": 1.8810359964881475e-07, | |
| "logits/chosen": -2.7242188453674316, | |
| "logits/rejected": -3.1968750953674316, | |
| "logps/chosen": -476.6000061035156, | |
| "logps/rejected": -433.3999938964844, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.120898485183716, | |
| "rewards/margins": 12.612500190734863, | |
| "rewards/rejected": -14.731249809265137, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 3.257243195785777, | |
| "grad_norm": 124.02747783437341, | |
| "learning_rate": 1.8590869183494292e-07, | |
| "logits/chosen": -2.684765577316284, | |
| "logits/rejected": -3.2796874046325684, | |
| "logps/chosen": -494.25, | |
| "logps/rejected": -471.1000061035156, | |
| "loss": 0.0055, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -3.1326050758361816, | |
| "rewards/margins": 12.896875381469727, | |
| "rewards/rejected": -16.03125, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 3.2660228270412643, | |
| "grad_norm": 0.4924574534539922, | |
| "learning_rate": 1.837137840210711e-07, | |
| "logits/chosen": -2.9351563453674316, | |
| "logits/rejected": -3.358593702316284, | |
| "logps/chosen": -492.75, | |
| "logps/rejected": -443.6000061035156, | |
| "loss": 0.0005, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.913769483566284, | |
| "rewards/margins": 12.78125, | |
| "rewards/rejected": -15.693750381469727, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 3.2748024582967514, | |
| "grad_norm": 0.654673041651116, | |
| "learning_rate": 1.815188762071993e-07, | |
| "logits/chosen": -2.8746094703674316, | |
| "logits/rejected": -3.2484374046325684, | |
| "logps/chosen": -500.20001220703125, | |
| "logps/rejected": -467.70001220703125, | |
| "loss": 0.0024, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.586865186691284, | |
| "rewards/margins": 13.371874809265137, | |
| "rewards/rejected": -15.959375381469727, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 3.283582089552239, | |
| "grad_norm": 0.07458317569216595, | |
| "learning_rate": 1.7932396839332747e-07, | |
| "logits/chosen": -2.9453125, | |
| "logits/rejected": -3.311718702316284, | |
| "logps/chosen": -451.45001220703125, | |
| "logps/rejected": -462.1000061035156, | |
| "loss": 0.0004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.230175733566284, | |
| "rewards/margins": 13.628125190734863, | |
| "rewards/rejected": -16.850000381469727, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 3.292361720807726, | |
| "grad_norm": 0.10553653536181591, | |
| "learning_rate": 1.7712906057945564e-07, | |
| "logits/chosen": -2.953125, | |
| "logits/rejected": -3.4906249046325684, | |
| "logps/chosen": -508.29998779296875, | |
| "logps/rejected": -426.29998779296875, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.3525390625, | |
| "rewards/margins": 12.456250190734863, | |
| "rewards/rejected": -15.803125381469727, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 3.3011413520632136, | |
| "grad_norm": 0.05722732486084981, | |
| "learning_rate": 1.7493415276558384e-07, | |
| "logits/chosen": -2.8828125, | |
| "logits/rejected": -3.340625047683716, | |
| "logps/chosen": -443.1000061035156, | |
| "logps/rejected": -435.79998779296875, | |
| "loss": 0.0005, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.0640625953674316, | |
| "rewards/margins": 13.106249809265137, | |
| "rewards/rejected": -16.165624618530273, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 3.3099209833187007, | |
| "grad_norm": 0.0754017491576997, | |
| "learning_rate": 1.72739244951712e-07, | |
| "logits/chosen": -2.901562452316284, | |
| "logits/rejected": -3.44140625, | |
| "logps/chosen": -500.79998779296875, | |
| "logps/rejected": -424.5, | |
| "loss": 0.0008, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.3499999046325684, | |
| "rewards/margins": 12.615625381469727, | |
| "rewards/rejected": -15.971875190734863, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 3.3187006145741877, | |
| "grad_norm": 0.2651092306336141, | |
| "learning_rate": 1.7054433713784019e-07, | |
| "logits/chosen": -2.936718702316284, | |
| "logits/rejected": -3.3203125, | |
| "logps/chosen": -445.54998779296875, | |
| "logps/rejected": -410.29998779296875, | |
| "loss": 0.001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.1976561546325684, | |
| "rewards/margins": 12.009374618530273, | |
| "rewards/rejected": -15.212499618530273, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 3.3274802458296753, | |
| "grad_norm": 0.12226431439251058, | |
| "learning_rate": 1.6834942932396839e-07, | |
| "logits/chosen": -2.9281249046325684, | |
| "logits/rejected": -3.426562547683716, | |
| "logps/chosen": -476.8500061035156, | |
| "logps/rejected": -428.1000061035156, | |
| "loss": 0.0004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.4535155296325684, | |
| "rewards/margins": 13.168749809265137, | |
| "rewards/rejected": -16.625, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 3.3362598770851624, | |
| "grad_norm": 1.0456090219350036, | |
| "learning_rate": 1.6615452151009656e-07, | |
| "logits/chosen": -2.8851561546325684, | |
| "logits/rejected": -3.362499952316284, | |
| "logps/chosen": -509.29998779296875, | |
| "logps/rejected": -448.70001220703125, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.881640672683716, | |
| "rewards/margins": 12.943750381469727, | |
| "rewards/rejected": -15.809374809265137, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 3.3450395083406494, | |
| "grad_norm": 0.19182329344783963, | |
| "learning_rate": 1.6395961369622473e-07, | |
| "logits/chosen": -2.77734375, | |
| "logits/rejected": -3.137500047683716, | |
| "logps/chosen": -507.8999938964844, | |
| "logps/rejected": -511.20001220703125, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.092578172683716, | |
| "rewards/margins": 13.212499618530273, | |
| "rewards/rejected": -16.309375762939453, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 3.353819139596137, | |
| "grad_norm": 4.527202299510661, | |
| "learning_rate": 1.6176470588235293e-07, | |
| "logits/chosen": -2.94140625, | |
| "logits/rejected": -3.397656202316284, | |
| "logps/chosen": -473.3999938964844, | |
| "logps/rejected": -428.3999938964844, | |
| "loss": 0.0006, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.9330077171325684, | |
| "rewards/margins": 12.568750381469727, | |
| "rewards/rejected": -15.5, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 3.362598770851624, | |
| "grad_norm": 0.8945758518684351, | |
| "learning_rate": 1.595697980684811e-07, | |
| "logits/chosen": -2.896484375, | |
| "logits/rejected": -3.499218702316284, | |
| "logps/chosen": -465.1000061035156, | |
| "logps/rejected": -421.5, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.2679443359375, | |
| "rewards/margins": 12.378125190734863, | |
| "rewards/rejected": -14.640625, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 3.3713784021071116, | |
| "grad_norm": 0.05290599153908576, | |
| "learning_rate": 1.5737489025460928e-07, | |
| "logits/chosen": -2.921875, | |
| "logits/rejected": -3.282031297683716, | |
| "logps/chosen": -449.75, | |
| "logps/rejected": -464.1000061035156, | |
| "loss": 0.0004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.8628907203674316, | |
| "rewards/margins": 12.287500381469727, | |
| "rewards/rejected": -15.146875381469727, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 3.3801580333625987, | |
| "grad_norm": 0.8799763925896169, | |
| "learning_rate": 1.5517998244073748e-07, | |
| "logits/chosen": -3.0648436546325684, | |
| "logits/rejected": -3.616406202316284, | |
| "logps/chosen": -424.1000061035156, | |
| "logps/rejected": -387.1000061035156, | |
| "loss": 0.0006, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.1626954078674316, | |
| "rewards/margins": 12.384374618530273, | |
| "rewards/rejected": -15.546875, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 3.388937664618086, | |
| "grad_norm": 2.747670141209021, | |
| "learning_rate": 1.5298507462686565e-07, | |
| "logits/chosen": -2.9296875, | |
| "logits/rejected": -3.4390625953674316, | |
| "logps/chosen": -464.79998779296875, | |
| "logps/rejected": -426.3999938964844, | |
| "loss": 0.0006, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.5855469703674316, | |
| "rewards/margins": 12.675000190734863, | |
| "rewards/rejected": -15.25, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 3.3977172958735733, | |
| "grad_norm": 0.6049742078697031, | |
| "learning_rate": 1.5079016681299383e-07, | |
| "logits/chosen": -2.885937452316284, | |
| "logits/rejected": -3.149218797683716, | |
| "logps/chosen": -456.8999938964844, | |
| "logps/rejected": -453.20001220703125, | |
| "loss": 0.0005, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.618115186691284, | |
| "rewards/margins": 12.5625, | |
| "rewards/rejected": -15.184374809265137, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 3.4064969271290604, | |
| "grad_norm": 0.1470989199812354, | |
| "learning_rate": 1.4859525899912203e-07, | |
| "logits/chosen": -3.04296875, | |
| "logits/rejected": -3.522656202316284, | |
| "logps/chosen": -493.1000061035156, | |
| "logps/rejected": -409.70001220703125, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.778125047683716, | |
| "rewards/margins": 12.265625, | |
| "rewards/rejected": -15.043749809265137, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 3.415276558384548, | |
| "grad_norm": 0.13161265388801072, | |
| "learning_rate": 1.464003511852502e-07, | |
| "logits/chosen": -2.9781250953674316, | |
| "logits/rejected": -3.413281202316284, | |
| "logps/chosen": -437.8500061035156, | |
| "logps/rejected": -437.8999938964844, | |
| "loss": 0.0007, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.8871092796325684, | |
| "rewards/margins": 11.887499809265137, | |
| "rewards/rejected": -14.778124809265137, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 3.424056189640035, | |
| "grad_norm": 1.196199409101085, | |
| "learning_rate": 1.4420544337137837e-07, | |
| "logits/chosen": -2.8851561546325684, | |
| "logits/rejected": -3.518749952316284, | |
| "logps/chosen": -499.5, | |
| "logps/rejected": -418.79998779296875, | |
| "loss": 0.0005, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.387890577316284, | |
| "rewards/margins": 12.559374809265137, | |
| "rewards/rejected": -15.943750381469727, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 3.4328358208955225, | |
| "grad_norm": 0.7215686703181198, | |
| "learning_rate": 1.4201053555750657e-07, | |
| "logits/chosen": -3.0687499046325684, | |
| "logits/rejected": -3.596874952316284, | |
| "logps/chosen": -484.0, | |
| "logps/rejected": -444.5, | |
| "loss": 0.0014, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.642578125, | |
| "rewards/margins": 12.634374618530273, | |
| "rewards/rejected": -16.271875381469727, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 3.4416154521510096, | |
| "grad_norm": 0.4406738843864271, | |
| "learning_rate": 1.3981562774363474e-07, | |
| "logits/chosen": -2.7437500953674316, | |
| "logits/rejected": -3.1546874046325684, | |
| "logps/chosen": -532.2999877929688, | |
| "logps/rejected": -455.70001220703125, | |
| "loss": 0.0011, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.461230516433716, | |
| "rewards/margins": 13.050000190734863, | |
| "rewards/rejected": -15.524999618530273, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 3.450395083406497, | |
| "grad_norm": 0.4340808016288137, | |
| "learning_rate": 1.3762071992976294e-07, | |
| "logits/chosen": -2.964062452316284, | |
| "logits/rejected": -3.3984375, | |
| "logps/chosen": -490.0, | |
| "logps/rejected": -420.70001220703125, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.340625047683716, | |
| "rewards/margins": 13.475000381469727, | |
| "rewards/rejected": -16.809375762939453, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 3.4591747146619842, | |
| "grad_norm": 0.05533862491991608, | |
| "learning_rate": 1.3542581211589112e-07, | |
| "logits/chosen": -2.9375, | |
| "logits/rejected": -3.2437500953674316, | |
| "logps/chosen": -458.8999938964844, | |
| "logps/rejected": -433.79998779296875, | |
| "loss": 0.0004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.80859375, | |
| "rewards/margins": 12.756250381469727, | |
| "rewards/rejected": -15.565625190734863, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 3.4679543459174713, | |
| "grad_norm": 0.18822112601303712, | |
| "learning_rate": 1.332309043020193e-07, | |
| "logits/chosen": -2.940624952316284, | |
| "logits/rejected": -3.43359375, | |
| "logps/chosen": -486.20001220703125, | |
| "logps/rejected": -410.20001220703125, | |
| "loss": 0.0004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.6832032203674316, | |
| "rewards/margins": 12.743749618530273, | |
| "rewards/rejected": -16.412500381469727, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 3.476733977172959, | |
| "grad_norm": 0.18996959944850028, | |
| "learning_rate": 1.310359964881475e-07, | |
| "logits/chosen": -2.9937500953674316, | |
| "logits/rejected": -3.518749952316284, | |
| "logps/chosen": -435.8999938964844, | |
| "logps/rejected": -429.79998779296875, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.096874952316284, | |
| "rewards/margins": 13.153124809265137, | |
| "rewards/rejected": -16.231250762939453, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 3.485513608428446, | |
| "grad_norm": 0.01910957476568621, | |
| "learning_rate": 1.2884108867427566e-07, | |
| "logits/chosen": -2.807812452316284, | |
| "logits/rejected": -3.321093797683716, | |
| "logps/chosen": -529.7999877929688, | |
| "logps/rejected": -440.70001220703125, | |
| "loss": 0.001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.3363280296325684, | |
| "rewards/margins": 13.03125, | |
| "rewards/rejected": -16.368749618530273, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 3.4942932396839335, | |
| "grad_norm": 2.102750056933607, | |
| "learning_rate": 1.2664618086040384e-07, | |
| "logits/chosen": -2.7914061546325684, | |
| "logits/rejected": -3.3765625953674316, | |
| "logps/chosen": -512.9500122070312, | |
| "logps/rejected": -459.5, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.6820311546325684, | |
| "rewards/margins": 13.162500381469727, | |
| "rewards/rejected": -15.850000381469727, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 3.5030728709394205, | |
| "grad_norm": 0.07518352660108933, | |
| "learning_rate": 1.2445127304653204e-07, | |
| "logits/chosen": -2.8296875953674316, | |
| "logits/rejected": -3.547656297683716, | |
| "logps/chosen": -479.29998779296875, | |
| "logps/rejected": -404.79998779296875, | |
| "loss": 0.0011, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.124755859375, | |
| "rewards/margins": 12.378125190734863, | |
| "rewards/rejected": -15.496874809265137, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 3.511852502194908, | |
| "grad_norm": 3.3562862303540464, | |
| "learning_rate": 1.2225636523266024e-07, | |
| "logits/chosen": -2.762500047683716, | |
| "logits/rejected": -3.207812547683716, | |
| "logps/chosen": -510.6499938964844, | |
| "logps/rejected": -437.20001220703125, | |
| "loss": 0.0005, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.0020508766174316, | |
| "rewards/margins": 12.731249809265137, | |
| "rewards/rejected": -14.737500190734863, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 3.520632133450395, | |
| "grad_norm": 0.44469118787885575, | |
| "learning_rate": 1.200614574187884e-07, | |
| "logits/chosen": -2.9765625, | |
| "logits/rejected": -3.510937452316284, | |
| "logps/chosen": -426.20001220703125, | |
| "logps/rejected": -392.3999938964844, | |
| "loss": 0.0005, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.1279296875, | |
| "rewards/margins": 12.796875, | |
| "rewards/rejected": -15.918749809265137, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 3.5294117647058822, | |
| "grad_norm": 0.13597357049994596, | |
| "learning_rate": 1.1786654960491658e-07, | |
| "logits/chosen": -3.0078125, | |
| "logits/rejected": -3.5, | |
| "logps/chosen": -432.8999938964844, | |
| "logps/rejected": -436.6000061035156, | |
| "loss": 0.0005, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.1011719703674316, | |
| "rewards/margins": 12.603124618530273, | |
| "rewards/rejected": -15.696874618530273, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 3.53819139596137, | |
| "grad_norm": 0.15523896636363424, | |
| "learning_rate": 1.1567164179104477e-07, | |
| "logits/chosen": -2.8578124046325684, | |
| "logits/rejected": -3.108593702316284, | |
| "logps/chosen": -462.20001220703125, | |
| "logps/rejected": -485.20001220703125, | |
| "loss": 0.0007, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.6551756858825684, | |
| "rewards/margins": 12.740625381469727, | |
| "rewards/rejected": -15.403124809265137, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 3.546971027216857, | |
| "grad_norm": 1.652462109336473, | |
| "learning_rate": 1.1347673397717296e-07, | |
| "logits/chosen": -2.854687452316284, | |
| "logits/rejected": -3.3343749046325684, | |
| "logps/chosen": -548.9000244140625, | |
| "logps/rejected": -451.6000061035156, | |
| "loss": 0.0005, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.1659178733825684, | |
| "rewards/margins": 12.746874809265137, | |
| "rewards/rejected": -14.921875, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 3.555750658472344, | |
| "grad_norm": 0.19301163849638503, | |
| "learning_rate": 1.1128182616330113e-07, | |
| "logits/chosen": -2.844531297683716, | |
| "logits/rejected": -3.2750000953674316, | |
| "logps/chosen": -447.04998779296875, | |
| "logps/rejected": -453.20001220703125, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.0361328125, | |
| "rewards/margins": 12.875, | |
| "rewards/rejected": -15.921875, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 3.5645302897278315, | |
| "grad_norm": 0.9313979779632242, | |
| "learning_rate": 1.0908691834942932e-07, | |
| "logits/chosen": -2.90234375, | |
| "logits/rejected": -3.3148436546325684, | |
| "logps/chosen": -487.1000061035156, | |
| "logps/rejected": -476.0, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.663769483566284, | |
| "rewards/margins": 13.459375381469727, | |
| "rewards/rejected": -16.131250381469727, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 3.5733099209833186, | |
| "grad_norm": 0.07980997449800085, | |
| "learning_rate": 1.068920105355575e-07, | |
| "logits/chosen": -2.9242186546325684, | |
| "logits/rejected": -3.2398438453674316, | |
| "logps/chosen": -483.20001220703125, | |
| "logps/rejected": -446.1000061035156, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.184765577316284, | |
| "rewards/margins": 12.918749809265137, | |
| "rewards/rejected": -16.100000381469727, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 3.582089552238806, | |
| "grad_norm": 0.05193347424244529, | |
| "learning_rate": 1.0469710272168568e-07, | |
| "logits/chosen": -2.9625000953674316, | |
| "logits/rejected": -3.453125, | |
| "logps/chosen": -482.20001220703125, | |
| "logps/rejected": -425.20001220703125, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.33544921875, | |
| "rewards/margins": 12.084375381469727, | |
| "rewards/rejected": -15.40625, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 3.590869183494293, | |
| "grad_norm": 0.23734802974722582, | |
| "learning_rate": 1.0250219490781386e-07, | |
| "logits/chosen": -2.9375, | |
| "logits/rejected": -3.4000000953674316, | |
| "logps/chosen": -401.25, | |
| "logps/rejected": -408.5, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.783496141433716, | |
| "rewards/margins": 12.915624618530273, | |
| "rewards/rejected": -15.675000190734863, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 3.5996488147497807, | |
| "grad_norm": 0.1646840228622721, | |
| "learning_rate": 1.0030728709394205e-07, | |
| "logits/chosen": -2.9429688453674316, | |
| "logits/rejected": -3.484375, | |
| "logps/chosen": -508.1000061035156, | |
| "logps/rejected": -455.20001220703125, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.922070264816284, | |
| "rewards/margins": 13.053125381469727, | |
| "rewards/rejected": -15.975000381469727, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 3.608428446005268, | |
| "grad_norm": 0.5424737470781746, | |
| "learning_rate": 9.811237928007022e-08, | |
| "logits/chosen": -2.8812499046325684, | |
| "logits/rejected": -3.5445313453674316, | |
| "logps/chosen": -420.6000061035156, | |
| "logps/rejected": -414.1000061035156, | |
| "loss": 0.0004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.350781202316284, | |
| "rewards/margins": 13.543749809265137, | |
| "rewards/rejected": -16.893749237060547, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 3.617208077260755, | |
| "grad_norm": 0.319465348304568, | |
| "learning_rate": 9.591747146619841e-08, | |
| "logits/chosen": -3.020312547683716, | |
| "logits/rejected": -3.4609375, | |
| "logps/chosen": -461.20001220703125, | |
| "logps/rejected": -412.0, | |
| "loss": 0.001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.85498046875, | |
| "rewards/margins": 12.981249809265137, | |
| "rewards/rejected": -15.834375381469727, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 3.6259877085162424, | |
| "grad_norm": 21.26942592839781, | |
| "learning_rate": 9.37225636523266e-08, | |
| "logits/chosen": -2.8734374046325684, | |
| "logits/rejected": -3.3578124046325684, | |
| "logps/chosen": -510.6000061035156, | |
| "logps/rejected": -421.1000061035156, | |
| "loss": 0.0014, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.2734375, | |
| "rewards/margins": 12.959375381469727, | |
| "rewards/rejected": -15.234375, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 3.6347673397717295, | |
| "grad_norm": 0.05669587340693758, | |
| "learning_rate": 9.152765583845478e-08, | |
| "logits/chosen": -3.003124952316284, | |
| "logits/rejected": -3.500781297683716, | |
| "logps/chosen": -498.20001220703125, | |
| "logps/rejected": -400.79998779296875, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.163403272628784, | |
| "rewards/margins": 13.096875190734863, | |
| "rewards/rejected": -16.256250381469727, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 3.6435469710272166, | |
| "grad_norm": 0.19390133786865785, | |
| "learning_rate": 8.933274802458296e-08, | |
| "logits/chosen": -2.840625047683716, | |
| "logits/rejected": -3.250781297683716, | |
| "logps/chosen": -510.3999938964844, | |
| "logps/rejected": -480.0, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.2867188453674316, | |
| "rewards/margins": 12.990625381469727, | |
| "rewards/rejected": -16.278125762939453, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 3.652326602282704, | |
| "grad_norm": 0.022028018683772615, | |
| "learning_rate": 8.713784021071114e-08, | |
| "logits/chosen": -2.9195313453674316, | |
| "logits/rejected": -3.37109375, | |
| "logps/chosen": -470.45001220703125, | |
| "logps/rejected": -413.8999938964844, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.0035157203674316, | |
| "rewards/margins": 12.774999618530273, | |
| "rewards/rejected": -15.771875381469727, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 3.6611062335381916, | |
| "grad_norm": 0.7821369463512009, | |
| "learning_rate": 8.494293239683933e-08, | |
| "logits/chosen": -2.860156297683716, | |
| "logits/rejected": -3.299999952316284, | |
| "logps/chosen": -528.5, | |
| "logps/rejected": -498.1000061035156, | |
| "loss": 0.0004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.1226563453674316, | |
| "rewards/margins": 12.753125190734863, | |
| "rewards/rejected": -15.868749618530273, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 3.6698858647936787, | |
| "grad_norm": 0.06689871902320767, | |
| "learning_rate": 8.27480245829675e-08, | |
| "logits/chosen": -2.8968749046325684, | |
| "logits/rejected": -3.452343702316284, | |
| "logps/chosen": -490.0, | |
| "logps/rejected": -421.1000061035156, | |
| "loss": 0.001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.5009765625, | |
| "rewards/margins": 12.368749618530273, | |
| "rewards/rejected": -14.881250381469727, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 3.678665496049166, | |
| "grad_norm": 0.18775309242892332, | |
| "learning_rate": 8.055311676909569e-08, | |
| "logits/chosen": -2.973437547683716, | |
| "logits/rejected": -3.4476561546325684, | |
| "logps/chosen": -469.3999938964844, | |
| "logps/rejected": -434.5, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.11767578125, | |
| "rewards/margins": 12.856249809265137, | |
| "rewards/rejected": -15.975000381469727, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 3.6874451273046533, | |
| "grad_norm": 0.06338303708779713, | |
| "learning_rate": 7.835820895522388e-08, | |
| "logits/chosen": -2.8414063453674316, | |
| "logits/rejected": -3.2328124046325684, | |
| "logps/chosen": -445.6000061035156, | |
| "logps/rejected": -461.20001220703125, | |
| "loss": 0.0004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.0074219703674316, | |
| "rewards/margins": 12.118749618530273, | |
| "rewards/rejected": -15.131250381469727, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 3.6962247585601404, | |
| "grad_norm": 0.373425027629608, | |
| "learning_rate": 7.616330114135205e-08, | |
| "logits/chosen": -2.965625047683716, | |
| "logits/rejected": -3.452343702316284, | |
| "logps/chosen": -432.6000061035156, | |
| "logps/rejected": -431.6000061035156, | |
| "loss": 0.0017, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.4754395484924316, | |
| "rewards/margins": 12.737500190734863, | |
| "rewards/rejected": -16.225000381469727, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 3.7050043898156275, | |
| "grad_norm": 7.129266016403341, | |
| "learning_rate": 7.396839332748024e-08, | |
| "logits/chosen": -2.8765625953674316, | |
| "logits/rejected": -3.4007811546325684, | |
| "logps/chosen": -476.3500061035156, | |
| "logps/rejected": -473.0, | |
| "loss": 0.0008, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.4273438453674316, | |
| "rewards/margins": 13.015625, | |
| "rewards/rejected": -16.440624237060547, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 3.713784021071115, | |
| "grad_norm": 0.09490785771518713, | |
| "learning_rate": 7.177348551360842e-08, | |
| "logits/chosen": -2.934375047683716, | |
| "logits/rejected": -3.37109375, | |
| "logps/chosen": -502.04998779296875, | |
| "logps/rejected": -446.8999938964844, | |
| "loss": 0.0007, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.301562547683716, | |
| "rewards/margins": 12.709375381469727, | |
| "rewards/rejected": -16.015625, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 3.722563652326602, | |
| "grad_norm": 0.08737448223316606, | |
| "learning_rate": 6.95785776997366e-08, | |
| "logits/chosen": -2.8617186546325684, | |
| "logits/rejected": -3.359375, | |
| "logps/chosen": -503.20001220703125, | |
| "logps/rejected": -444.8999938964844, | |
| "loss": 0.0004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.017773389816284, | |
| "rewards/margins": 12.971875190734863, | |
| "rewards/rejected": -15.990625381469727, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 3.7313432835820897, | |
| "grad_norm": 0.19127685117675428, | |
| "learning_rate": 6.738366988586478e-08, | |
| "logits/chosen": -2.8296875953674316, | |
| "logits/rejected": -3.3851561546325684, | |
| "logps/chosen": -479.95001220703125, | |
| "logps/rejected": -426.6000061035156, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.4801268577575684, | |
| "rewards/margins": 13.0625, | |
| "rewards/rejected": -15.540624618530273, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 3.7401229148375768, | |
| "grad_norm": 0.27325222538850813, | |
| "learning_rate": 6.518876207199297e-08, | |
| "logits/chosen": -2.86328125, | |
| "logits/rejected": -3.424999952316284, | |
| "logps/chosen": -486.8999938964844, | |
| "logps/rejected": -438.5, | |
| "loss": 0.0004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.8695311546325684, | |
| "rewards/margins": 12.915624618530273, | |
| "rewards/rejected": -15.793749809265137, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 3.7489025460930643, | |
| "grad_norm": 0.24953992223000473, | |
| "learning_rate": 6.299385425812117e-08, | |
| "logits/chosen": -2.9359374046325684, | |
| "logits/rejected": -3.3921875953674316, | |
| "logps/chosen": -455.5, | |
| "logps/rejected": -437.8999938964844, | |
| "loss": 0.0004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.7079100608825684, | |
| "rewards/margins": 12.928125381469727, | |
| "rewards/rejected": -15.637499809265137, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 3.7576821773485514, | |
| "grad_norm": 0.16534712472113114, | |
| "learning_rate": 6.079894644424934e-08, | |
| "logits/chosen": -2.7734375, | |
| "logits/rejected": -3.375781297683716, | |
| "logps/chosen": -504.45001220703125, | |
| "logps/rejected": -443.6000061035156, | |
| "loss": 0.0007, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.8353271484375, | |
| "rewards/margins": 12.565625190734863, | |
| "rewards/rejected": -15.403124809265137, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 3.7664618086040385, | |
| "grad_norm": 0.05811605261647762, | |
| "learning_rate": 5.860403863037752e-08, | |
| "logits/chosen": -2.9749999046325684, | |
| "logits/rejected": -3.44921875, | |
| "logps/chosen": -461.70001220703125, | |
| "logps/rejected": -453.8999938964844, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.014843702316284, | |
| "rewards/margins": 12.471875190734863, | |
| "rewards/rejected": -15.490625381469727, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 3.775241439859526, | |
| "grad_norm": 0.48854372959318854, | |
| "learning_rate": 5.64091308165057e-08, | |
| "logits/chosen": -2.74609375, | |
| "logits/rejected": -3.2984375953674316, | |
| "logps/chosen": -545.4000244140625, | |
| "logps/rejected": -437.29998779296875, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.6949219703674316, | |
| "rewards/margins": 12.440625190734863, | |
| "rewards/rejected": -15.128125190734863, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 3.784021071115013, | |
| "grad_norm": 0.5697026376379704, | |
| "learning_rate": 5.421422300263389e-08, | |
| "logits/chosen": -3.024218797683716, | |
| "logits/rejected": -3.446093797683716, | |
| "logps/chosen": -413.75, | |
| "logps/rejected": -412.70001220703125, | |
| "loss": 0.0023, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.653515577316284, | |
| "rewards/margins": 13.046875, | |
| "rewards/rejected": -16.700000762939453, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 3.7928007023705006, | |
| "grad_norm": 0.1639430491168361, | |
| "learning_rate": 5.201931518876207e-08, | |
| "logits/chosen": -2.9820313453674316, | |
| "logits/rejected": -3.42578125, | |
| "logps/chosen": -431.6499938964844, | |
| "logps/rejected": -433.5, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.415234327316284, | |
| "rewards/margins": 13.534375190734863, | |
| "rewards/rejected": -16.946874618530273, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 3.8015803336259877, | |
| "grad_norm": 0.09088752958966294, | |
| "learning_rate": 4.982440737489025e-08, | |
| "logits/chosen": -2.981250047683716, | |
| "logits/rejected": -3.617968797683716, | |
| "logps/chosen": -454.6000061035156, | |
| "logps/rejected": -408.25, | |
| "loss": 0.0078, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -3.1500000953674316, | |
| "rewards/margins": 11.981249809265137, | |
| "rewards/rejected": -15.131250381469727, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 3.810359964881475, | |
| "grad_norm": 0.0965181394541503, | |
| "learning_rate": 4.7629499561018435e-08, | |
| "logits/chosen": -2.8359375, | |
| "logits/rejected": -3.3187499046325684, | |
| "logps/chosen": -478.20001220703125, | |
| "logps/rejected": -421.6000061035156, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.8701171875, | |
| "rewards/margins": 13.481249809265137, | |
| "rewards/rejected": -16.34375, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 3.8191395961369623, | |
| "grad_norm": 0.3430323438027083, | |
| "learning_rate": 4.5434591747146615e-08, | |
| "logits/chosen": -2.88671875, | |
| "logits/rejected": -3.2578125, | |
| "logps/chosen": -489.0, | |
| "logps/rejected": -440.70001220703125, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.102246046066284, | |
| "rewards/margins": 12.925000190734863, | |
| "rewards/rejected": -16.037500381469727, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 3.8279192273924494, | |
| "grad_norm": 0.13442608564589695, | |
| "learning_rate": 4.32396839332748e-08, | |
| "logits/chosen": -2.7007813453674316, | |
| "logits/rejected": -3.272656202316284, | |
| "logps/chosen": -453.1000061035156, | |
| "logps/rejected": -455.5, | |
| "loss": 0.0004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.218493700027466, | |
| "rewards/margins": 12.668749809265137, | |
| "rewards/rejected": -15.875, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 3.836698858647937, | |
| "grad_norm": 0.27484934916126125, | |
| "learning_rate": 4.104477611940298e-08, | |
| "logits/chosen": -2.9585938453674316, | |
| "logits/rejected": -3.4828124046325684, | |
| "logps/chosen": -482.29998779296875, | |
| "logps/rejected": -414.8999938964844, | |
| "loss": 0.0005, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.061328172683716, | |
| "rewards/margins": 12.524999618530273, | |
| "rewards/rejected": -15.574999809265137, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 3.845478489903424, | |
| "grad_norm": 0.8196527442730976, | |
| "learning_rate": 3.884986830553116e-08, | |
| "logits/chosen": -2.9703125953674316, | |
| "logits/rejected": -3.4437499046325684, | |
| "logps/chosen": -499.0, | |
| "logps/rejected": -430.20001220703125, | |
| "loss": 0.001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.223437547683716, | |
| "rewards/margins": 12.559374809265137, | |
| "rewards/rejected": -15.774999618530273, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 3.854258121158911, | |
| "grad_norm": 0.46506898711620936, | |
| "learning_rate": 3.665496049165935e-08, | |
| "logits/chosen": -2.871875047683716, | |
| "logits/rejected": -3.2562499046325684, | |
| "logps/chosen": -466.0, | |
| "logps/rejected": -442.0, | |
| "loss": 0.0187, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -3.1322264671325684, | |
| "rewards/margins": 12.928125381469727, | |
| "rewards/rejected": -16.065624237060547, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 3.8630377524143986, | |
| "grad_norm": 0.03219121616056209, | |
| "learning_rate": 3.446005267778753e-08, | |
| "logits/chosen": -2.9515624046325684, | |
| "logits/rejected": -3.196093797683716, | |
| "logps/chosen": -485.0, | |
| "logps/rejected": -483.29998779296875, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.470654249191284, | |
| "rewards/margins": 12.703125, | |
| "rewards/rejected": -15.178125381469727, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 3.8718173836698857, | |
| "grad_norm": 0.2560706183120807, | |
| "learning_rate": 3.226514486391571e-08, | |
| "logits/chosen": -2.7203125953674316, | |
| "logits/rejected": -3.237499952316284, | |
| "logps/chosen": -490.8999938964844, | |
| "logps/rejected": -467.79998779296875, | |
| "loss": 0.0005, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.8060059547424316, | |
| "rewards/margins": 12.925000190734863, | |
| "rewards/rejected": -15.734375, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 3.8805970149253732, | |
| "grad_norm": 0.05563468285862625, | |
| "learning_rate": 3.00702370500439e-08, | |
| "logits/chosen": -2.987499952316284, | |
| "logits/rejected": -3.4281249046325684, | |
| "logps/chosen": -487.6499938964844, | |
| "logps/rejected": -436.0, | |
| "loss": 0.0006, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.027148485183716, | |
| "rewards/margins": 13.168749809265137, | |
| "rewards/rejected": -16.200000762939453, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 3.8893766461808603, | |
| "grad_norm": 0.21685540754942034, | |
| "learning_rate": 2.7875329236172078e-08, | |
| "logits/chosen": -2.9359374046325684, | |
| "logits/rejected": -3.35546875, | |
| "logps/chosen": -516.5999755859375, | |
| "logps/rejected": -444.29998779296875, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.76318359375, | |
| "rewards/margins": 12.846875190734863, | |
| "rewards/rejected": -15.609375, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 3.898156277436348, | |
| "grad_norm": 0.17466063974336163, | |
| "learning_rate": 2.568042142230026e-08, | |
| "logits/chosen": -2.9828124046325684, | |
| "logits/rejected": -3.33203125, | |
| "logps/chosen": -455.70001220703125, | |
| "logps/rejected": -438.79998779296875, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.195117235183716, | |
| "rewards/margins": 12.921875, | |
| "rewards/rejected": -16.109375, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 3.906935908691835, | |
| "grad_norm": 0.0476552217836039, | |
| "learning_rate": 2.3485513608428444e-08, | |
| "logits/chosen": -2.973437547683716, | |
| "logits/rejected": -3.2750000953674316, | |
| "logps/chosen": -488.54998779296875, | |
| "logps/rejected": -493.3999938964844, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.513476610183716, | |
| "rewards/margins": 13.78125, | |
| "rewards/rejected": -16.296875, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 3.915715539947322, | |
| "grad_norm": 0.08905525732564612, | |
| "learning_rate": 2.1290605794556627e-08, | |
| "logits/chosen": -2.874218702316284, | |
| "logits/rejected": -3.3203125, | |
| "logps/chosen": -505.29998779296875, | |
| "logps/rejected": -462.70001220703125, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.136914014816284, | |
| "rewards/margins": 12.728124618530273, | |
| "rewards/rejected": -15.865625381469727, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 3.9244951712028096, | |
| "grad_norm": 0.1668928334643476, | |
| "learning_rate": 1.909569798068481e-08, | |
| "logits/chosen": -2.823437452316284, | |
| "logits/rejected": -3.495312452316284, | |
| "logps/chosen": -453.20001220703125, | |
| "logps/rejected": -405.6000061035156, | |
| "loss": 0.0007, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.267773389816284, | |
| "rewards/margins": 13.134374618530273, | |
| "rewards/rejected": -16.409374237060547, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 3.9332748024582966, | |
| "grad_norm": 0.09234843085234354, | |
| "learning_rate": 1.690079016681299e-08, | |
| "logits/chosen": -2.960156202316284, | |
| "logits/rejected": -3.48828125, | |
| "logps/chosen": -499.29998779296875, | |
| "logps/rejected": -430.79998779296875, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.131640672683716, | |
| "rewards/margins": 12.934374809265137, | |
| "rewards/rejected": -16.071874618530273, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 3.942054433713784, | |
| "grad_norm": 0.1280532747188893, | |
| "learning_rate": 1.4705882352941176e-08, | |
| "logits/chosen": -2.805468797683716, | |
| "logits/rejected": -3.268749952316284, | |
| "logps/chosen": -482.25, | |
| "logps/rejected": -416.8999938964844, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.1329102516174316, | |
| "rewards/margins": 12.287500381469727, | |
| "rewards/rejected": -15.421875, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 3.9508340649692713, | |
| "grad_norm": 0.09592585371257221, | |
| "learning_rate": 1.2510974539069359e-08, | |
| "logits/chosen": -3.022265672683716, | |
| "logits/rejected": -3.438281297683716, | |
| "logps/chosen": -442.3999938964844, | |
| "logps/rejected": -428.70001220703125, | |
| "loss": 0.0004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.1246094703674316, | |
| "rewards/margins": 12.428125381469727, | |
| "rewards/rejected": -15.5625, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 3.959613696224759, | |
| "grad_norm": 0.3078398776322869, | |
| "learning_rate": 1.031606672519754e-08, | |
| "logits/chosen": -2.8140625953674316, | |
| "logits/rejected": -3.491406202316284, | |
| "logps/chosen": -532.5, | |
| "logps/rejected": -414.8999938964844, | |
| "loss": 0.0006, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.138476610183716, | |
| "rewards/margins": 12.71875, | |
| "rewards/rejected": -15.846875190734863, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 3.968393327480246, | |
| "grad_norm": 0.3337277488774481, | |
| "learning_rate": 8.121158911325724e-09, | |
| "logits/chosen": -2.910937547683716, | |
| "logits/rejected": -3.335156202316284, | |
| "logps/chosen": -491.20001220703125, | |
| "logps/rejected": -435.70001220703125, | |
| "loss": 0.0004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.9751954078674316, | |
| "rewards/margins": 12.503125190734863, | |
| "rewards/rejected": -15.481249809265137, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 3.977172958735733, | |
| "grad_norm": 0.7026092340484815, | |
| "learning_rate": 5.926251097453907e-09, | |
| "logits/chosen": -2.8515625, | |
| "logits/rejected": -3.3929686546325684, | |
| "logps/chosen": -485.8999938964844, | |
| "logps/rejected": -450.1000061035156, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.555346727371216, | |
| "rewards/margins": 13.0, | |
| "rewards/rejected": -15.553125381469727, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 3.9859525899912205, | |
| "grad_norm": 6.171708025221414, | |
| "learning_rate": 3.731343283582089e-09, | |
| "logits/chosen": -2.8125, | |
| "logits/rejected": -3.36328125, | |
| "logps/chosen": -459.20001220703125, | |
| "logps/rejected": -406.0, | |
| "loss": 0.0005, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.0492186546325684, | |
| "rewards/margins": 12.556249618530273, | |
| "rewards/rejected": -15.618749618530273, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 3.9947322212467076, | |
| "grad_norm": 1.3860823114375178, | |
| "learning_rate": 1.5364354697102721e-09, | |
| "logits/chosen": -2.910937547683716, | |
| "logits/rejected": -3.3671875, | |
| "logps/chosen": -423.8500061035156, | |
| "logps/rejected": -408.29998779296875, | |
| "loss": 0.0009, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.051953077316284, | |
| "rewards/margins": 12.409375190734863, | |
| "rewards/rejected": -15.449999809265137, | |
| "step": 4550 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 4556, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |