| { | |
| "best_global_step": 351, | |
| "best_metric": 0.08202474, | |
| "best_model_checkpoint": "/ckpts/models/wohu_ui_llm/7b/test_dpo_0730_8723_3e-6_beta_005_add_claude2_epoch3/v0-20250801-071953/checkpoint-351", | |
| "epoch": 2.9957446808510637, | |
| "eval_steps": 100, | |
| "global_step": 351, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00851063829787234, | |
| "grad_norm": 113.12678527832031, | |
| "learning_rate": 1.6666666666666665e-07, | |
| "logits/chosen": -2.26171875, | |
| "logits/rejected": -2.27734375, | |
| "logps/chosen": -421.0, | |
| "logps/rejected": -97.875, | |
| "loss": 1.2347412109375, | |
| "memory(GiB)": 63.45, | |
| "nll_loss": 0.54248046875, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 1, | |
| "train_speed(iter/s)": 0.015536 | |
| }, | |
| { | |
| "epoch": 0.0425531914893617, | |
| "grad_norm": 116.12273406982422, | |
| "learning_rate": 8.333333333333334e-07, | |
| "logits/chosen": -2.2197265625, | |
| "logits/rejected": -2.2900390625, | |
| "logps/chosen": -518.375, | |
| "logps/rejected": -105.0625, | |
| "loss": 1.295806884765625, | |
| "memory(GiB)": 78.07, | |
| "nll_loss": 0.623046875, | |
| "rewards/accuracies": 0.44921875, | |
| "rewards/chosen": 0.04306221008300781, | |
| "rewards/margins": 0.03983926773071289, | |
| "rewards/rejected": 0.0031530149281024933, | |
| "step": 5, | |
| "train_speed(iter/s)": 0.020262 | |
| }, | |
| { | |
| "epoch": 0.0851063829787234, | |
| "grad_norm": 16.63855743408203, | |
| "learning_rate": 1.6666666666666669e-06, | |
| "logits/chosen": -2.178906202316284, | |
| "logits/rejected": -2.296875, | |
| "logps/chosen": -496.29998779296875, | |
| "logps/rejected": -100.19999694824219, | |
| "loss": 0.902490234375, | |
| "memory(GiB)": 78.07, | |
| "nll_loss": 0.584765613079071, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.335205078125, | |
| "rewards/margins": 1.311669945716858, | |
| "rewards/rejected": 0.024295806884765625, | |
| "step": 10, | |
| "train_speed(iter/s)": 0.023151 | |
| }, | |
| { | |
| "epoch": 0.1276595744680851, | |
| "grad_norm": 3.581925392150879, | |
| "learning_rate": 2.5e-06, | |
| "logits/chosen": -1.9874999523162842, | |
| "logits/rejected": -2.1171875, | |
| "logps/chosen": -399.20001220703125, | |
| "logps/rejected": -105.57499694824219, | |
| "loss": 0.502459716796875, | |
| "memory(GiB)": 78.11, | |
| "nll_loss": 0.4808593690395355, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.135937690734863, | |
| "rewards/margins": 5.94140625, | |
| "rewards/rejected": 0.19367675483226776, | |
| "step": 15, | |
| "train_speed(iter/s)": 0.02441 | |
| }, | |
| { | |
| "epoch": 0.1702127659574468, | |
| "grad_norm": 1.2561246156692505, | |
| "learning_rate": 2.99973299484371e-06, | |
| "logits/chosen": -0.9429687261581421, | |
| "logits/rejected": -1.2861328125, | |
| "logps/chosen": -267.79998779296875, | |
| "logps/rejected": -103.30000305175781, | |
| "loss": 0.3127166748046875, | |
| "memory(GiB)": 78.11, | |
| "nll_loss": 0.3118652403354645, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 12.771875381469727, | |
| "rewards/margins": 12.581250190734863, | |
| "rewards/rejected": 0.18445129692554474, | |
| "step": 20, | |
| "train_speed(iter/s)": 0.024676 | |
| }, | |
| { | |
| "epoch": 0.2127659574468085, | |
| "grad_norm": 0.6227205395698547, | |
| "learning_rate": 2.9967302783835462e-06, | |
| "logits/chosen": 0.38526612520217896, | |
| "logits/rejected": 0.3094726502895355, | |
| "logps/chosen": -193.35000610351562, | |
| "logps/rejected": -125.125, | |
| "loss": 0.229217529296875, | |
| "memory(GiB)": 78.11, | |
| "nll_loss": 0.22915038466453552, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 16.259374618530273, | |
| "rewards/margins": 17.034374237060547, | |
| "rewards/rejected": -0.7733398675918579, | |
| "step": 25, | |
| "train_speed(iter/s)": 0.024962 | |
| }, | |
| { | |
| "epoch": 0.2553191489361702, | |
| "grad_norm": 0.41129618883132935, | |
| "learning_rate": 2.9903977914295545e-06, | |
| "logits/chosen": 0.7916015386581421, | |
| "logits/rejected": 1.0089843273162842, | |
| "logps/chosen": -168.39999389648438, | |
| "logps/rejected": -163.8000030517578, | |
| "loss": 0.19467926025390625, | |
| "memory(GiB)": 78.11, | |
| "nll_loss": 0.19462890923023224, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 17.774999618530273, | |
| "rewards/margins": 20.4375, | |
| "rewards/rejected": -2.672656297683716, | |
| "step": 30, | |
| "train_speed(iter/s)": 0.025268 | |
| }, | |
| { | |
| "epoch": 0.2978723404255319, | |
| "grad_norm": 0.4016430974006653, | |
| "learning_rate": 2.9807496218427986e-06, | |
| "logits/chosen": 0.7188476324081421, | |
| "logits/rejected": 1.051367163658142, | |
| "logps/chosen": -147.5749969482422, | |
| "logps/rejected": -167.4499969482422, | |
| "loss": 0.1833892822265625, | |
| "memory(GiB)": 78.11, | |
| "nll_loss": 0.18344727158546448, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 17.71875, | |
| "rewards/margins": 21.081249237060547, | |
| "rewards/rejected": -3.3648438453674316, | |
| "step": 35, | |
| "train_speed(iter/s)": 0.025286 | |
| }, | |
| { | |
| "epoch": 0.3404255319148936, | |
| "grad_norm": 0.31658557057380676, | |
| "learning_rate": 2.967807233871629e-06, | |
| "logits/chosen": 0.45478516817092896, | |
| "logits/rejected": 0.924609363079071, | |
| "logps/chosen": -145.5749969482422, | |
| "logps/rejected": -178.89999389648438, | |
| "loss": 0.17320556640625, | |
| "memory(GiB)": 78.11, | |
| "nll_loss": 0.17319336533546448, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 19.15625, | |
| "rewards/margins": 22.78125, | |
| "rewards/rejected": -3.624218702316284, | |
| "step": 40, | |
| "train_speed(iter/s)": 0.025482 | |
| }, | |
| { | |
| "epoch": 0.3829787234042553, | |
| "grad_norm": 0.27831006050109863, | |
| "learning_rate": 2.9515994204002487e-06, | |
| "logits/chosen": 0.1028236374258995, | |
| "logits/rejected": 0.770703136920929, | |
| "logps/chosen": -139.27499389648438, | |
| "logps/rejected": -189.64999389648438, | |
| "loss": 0.1634765625, | |
| "memory(GiB)": 78.11, | |
| "nll_loss": 0.16347655653953552, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 19.056249618530273, | |
| "rewards/margins": 23.15625, | |
| "rewards/rejected": -4.110937595367432, | |
| "step": 45, | |
| "train_speed(iter/s)": 0.025643 | |
| }, | |
| { | |
| "epoch": 0.425531914893617, | |
| "grad_norm": 0.25902220606803894, | |
| "learning_rate": 2.93216223889328e-06, | |
| "logits/chosen": -0.0480804443359375, | |
| "logits/rejected": 0.71923828125, | |
| "logps/chosen": -133.72500610351562, | |
| "logps/rejected": -198.5, | |
| "loss": 0.15597991943359374, | |
| "memory(GiB)": 78.11, | |
| "nll_loss": 0.15595702826976776, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 20.168750762939453, | |
| "rewards/margins": 24.75, | |
| "rewards/rejected": -4.5859375, | |
| "step": 50, | |
| "train_speed(iter/s)": 0.025474 | |
| }, | |
| { | |
| "epoch": 0.46808510638297873, | |
| "grad_norm": 0.27047035098075867, | |
| "learning_rate": 2.9095389311788626e-06, | |
| "logits/chosen": -0.18197020888328552, | |
| "logits/rejected": 0.5546875, | |
| "logps/chosen": -125.92500305175781, | |
| "logps/rejected": -208.14999389648438, | |
| "loss": 0.14620513916015626, | |
| "memory(GiB)": 78.11, | |
| "nll_loss": 0.14619140326976776, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 19.799999237060547, | |
| "rewards/margins": 24.693750381469727, | |
| "rewards/rejected": -4.910937309265137, | |
| "step": 55, | |
| "train_speed(iter/s)": 0.025306 | |
| }, | |
| { | |
| "epoch": 0.5106382978723404, | |
| "grad_norm": 0.2637348473072052, | |
| "learning_rate": 2.8837798272487033e-06, | |
| "logits/chosen": -0.17402306199073792, | |
| "logits/rejected": 0.616503894329071, | |
| "logps/chosen": -130.97500610351562, | |
| "logps/rejected": -213.89999389648438, | |
| "loss": 0.14871978759765625, | |
| "memory(GiB)": 78.11, | |
| "nll_loss": 0.14863280951976776, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 20.625, | |
| "rewards/margins": 25.737499237060547, | |
| "rewards/rejected": -5.128125190734863, | |
| "step": 60, | |
| "train_speed(iter/s)": 0.025358 | |
| }, | |
| { | |
| "epoch": 0.5531914893617021, | |
| "grad_norm": 0.2757234573364258, | |
| "learning_rate": 2.8549422332891285e-06, | |
| "logits/chosen": -0.1392822265625, | |
| "logits/rejected": 0.5840820074081421, | |
| "logps/chosen": -122.125, | |
| "logps/rejected": -214.10000610351562, | |
| "loss": 0.1463531494140625, | |
| "memory(GiB)": 78.11, | |
| "nll_loss": 0.14619140326976776, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 20.606250762939453, | |
| "rewards/margins": 25.912500381469727, | |
| "rewards/rejected": -5.298437595367432, | |
| "step": 65, | |
| "train_speed(iter/s)": 0.02546 | |
| }, | |
| { | |
| "epoch": 0.5957446808510638, | |
| "grad_norm": 0.283324658870697, | |
| "learning_rate": 2.823090304192217e-06, | |
| "logits/chosen": -0.08134154975414276, | |
| "logits/rejected": 0.64697265625, | |
| "logps/chosen": -113.42500305175781, | |
| "logps/rejected": -220.1999969482422, | |
| "loss": 0.13967132568359375, | |
| "memory(GiB)": 78.11, | |
| "nll_loss": 0.1396484375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 19.106250762939453, | |
| "rewards/margins": 24.731250762939453, | |
| "rewards/rejected": -5.606249809265137, | |
| "step": 70, | |
| "train_speed(iter/s)": 0.025562 | |
| }, | |
| { | |
| "epoch": 0.6382978723404256, | |
| "grad_norm": 0.25037115812301636, | |
| "learning_rate": 2.7882949008306392e-06, | |
| "logits/chosen": -0.18788452446460724, | |
| "logits/rejected": 0.564404308795929, | |
| "logps/chosen": -116.57499694824219, | |
| "logps/rejected": -237.60000610351562, | |
| "loss": 0.1353363037109375, | |
| "memory(GiB)": 78.11, | |
| "nll_loss": 0.13532714545726776, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 20.306249618530273, | |
| "rewards/margins": 26.40625, | |
| "rewards/rejected": -6.090624809265137, | |
| "step": 75, | |
| "train_speed(iter/s)": 0.025539 | |
| }, | |
| { | |
| "epoch": 0.6808510638297872, | |
| "grad_norm": 0.2523050308227539, | |
| "learning_rate": 2.750633432413728e-06, | |
| "logits/chosen": -0.17770537734031677, | |
| "logits/rejected": 0.498748779296875, | |
| "logps/chosen": -112.0250015258789, | |
| "logps/rejected": -236.9499969482422, | |
| "loss": 0.13072662353515624, | |
| "memory(GiB)": 78.11, | |
| "nll_loss": 0.13068847358226776, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 19.984375, | |
| "rewards/margins": 26.318750381469727, | |
| "rewards/rejected": -6.315625190734863, | |
| "step": 80, | |
| "train_speed(iter/s)": 0.025618 | |
| }, | |
| { | |
| "epoch": 0.723404255319149, | |
| "grad_norm": 0.2926895320415497, | |
| "learning_rate": 2.7101896842754866e-06, | |
| "logits/chosen": -0.23759765923023224, | |
| "logits/rejected": 0.45625001192092896, | |
| "logps/chosen": -111.875, | |
| "logps/rejected": -246.39999389648438, | |
| "loss": 0.12862091064453124, | |
| "memory(GiB)": 78.11, | |
| "nll_loss": 0.128662109375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 20.412500381469727, | |
| "rewards/margins": 27.200000762939453, | |
| "rewards/rejected": -6.787499904632568, | |
| "step": 85, | |
| "train_speed(iter/s)": 0.025658 | |
| }, | |
| { | |
| "epoch": 0.7659574468085106, | |
| "grad_norm": 0.2679479420185089, | |
| "learning_rate": 2.6670536314776595e-06, | |
| "logits/chosen": -0.18338623642921448, | |
| "logits/rejected": 0.4960083067417145, | |
| "logps/chosen": -108.4000015258789, | |
| "logps/rejected": -243.9499969482422, | |
| "loss": 0.13025360107421874, | |
| "memory(GiB)": 78.11, | |
| "nll_loss": 0.1302490234375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 20.603124618530273, | |
| "rewards/margins": 27.668750762939453, | |
| "rewards/rejected": -7.056250095367432, | |
| "step": 90, | |
| "train_speed(iter/s)": 0.025811 | |
| }, | |
| { | |
| "epoch": 0.8085106382978723, | |
| "grad_norm": 0.2539767622947693, | |
| "learning_rate": 2.6213212386425304e-06, | |
| "logits/chosen": -0.35136717557907104, | |
| "logits/rejected": 0.42353516817092896, | |
| "logps/chosen": -111.375, | |
| "logps/rejected": -262.45001220703125, | |
| "loss": 0.12735443115234374, | |
| "memory(GiB)": 78.11, | |
| "nll_loss": 0.1273193359375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 20.912500381469727, | |
| "rewards/margins": 28.243749618530273, | |
| "rewards/rejected": -7.326562404632568, | |
| "step": 95, | |
| "train_speed(iter/s)": 0.025843 | |
| }, | |
| { | |
| "epoch": 0.851063829787234, | |
| "grad_norm": 0.2883126735687256, | |
| "learning_rate": 2.573094246460773e-06, | |
| "logits/chosen": -0.3926025331020355, | |
| "logits/rejected": 0.3401855528354645, | |
| "logps/chosen": -101.8499984741211, | |
| "logps/rejected": -255.14999389648438, | |
| "loss": 0.12263336181640624, | |
| "memory(GiB)": 78.11, | |
| "nll_loss": 0.12258300930261612, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 20.381250381469727, | |
| "rewards/margins": 27.981250762939453, | |
| "rewards/rejected": -7.606249809265137, | |
| "step": 100, | |
| "train_speed(iter/s)": 0.025837 | |
| }, | |
| { | |
| "epoch": 0.851063829787234, | |
| "eval_logits/chosen": -0.3551269471645355, | |
| "eval_logits/rejected": 0.3993896543979645, | |
| "eval_logps/chosen": -114.19999694824219, | |
| "eval_logps/rejected": -283.3999938964844, | |
| "eval_loss": 0.12689127027988434, | |
| "eval_nll_loss": 0.13129882514476776, | |
| "eval_rewards/accuracies": 1.0, | |
| "eval_rewards/chosen": 21.575000762939453, | |
| "eval_rewards/margins": 29.962499618530273, | |
| "eval_rewards/rejected": -8.393750190734863, | |
| "eval_runtime": 16.796, | |
| "eval_samples_per_second": 4.465, | |
| "eval_steps_per_second": 0.595, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.8936170212765957, | |
| "grad_norm": 0.26771247386932373, | |
| "learning_rate": 2.5224799453492994e-06, | |
| "logits/chosen": -0.45463865995407104, | |
| "logits/rejected": 0.3071838319301605, | |
| "logps/chosen": -103.67500305175781, | |
| "logps/rejected": -268.0, | |
| "loss": 0.12566070556640624, | |
| "memory(GiB)": 78.11, | |
| "nll_loss": 0.12568359076976776, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 21.1875, | |
| "rewards/margins": 29.40625, | |
| "rewards/rejected": -8.196874618530273, | |
| "step": 105, | |
| "train_speed(iter/s)": 0.025625 | |
| }, | |
| { | |
| "epoch": 0.9361702127659575, | |
| "grad_norm": 0.26143553853034973, | |
| "learning_rate": 2.469590936762654e-06, | |
| "logits/chosen": -0.6083984375, | |
| "logits/rejected": 0.19863280653953552, | |
| "logps/chosen": -102.7750015258789, | |
| "logps/rejected": -276.70001220703125, | |
| "loss": 0.11554946899414062, | |
| "memory(GiB)": 78.11, | |
| "nll_loss": 0.11552734673023224, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 21.356250762939453, | |
| "rewards/margins": 29.475000381469727, | |
| "rewards/rejected": -8.104687690734863, | |
| "step": 110, | |
| "train_speed(iter/s)": 0.025585 | |
| }, | |
| { | |
| "epoch": 0.9787234042553191, | |
| "grad_norm": 0.26631999015808105, | |
| "learning_rate": 2.414544882688961e-06, | |
| "logits/chosen": -0.5418945550918579, | |
| "logits/rejected": 0.3081298768520355, | |
| "logps/chosen": -96.5999984741211, | |
| "logps/rejected": -275.20001220703125, | |
| "loss": 0.11951446533203125, | |
| "memory(GiB)": 78.11, | |
| "nll_loss": 0.11955566704273224, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 20.450000762939453, | |
| "rewards/margins": 28.706249237060547, | |
| "rewards/rejected": -8.2578125, | |
| "step": 115, | |
| "train_speed(iter/s)": 0.025519 | |
| }, | |
| { | |
| "epoch": 1.025531914893617, | |
| "grad_norm": 0.261348694562912, | |
| "learning_rate": 2.3574642438877183e-06, | |
| "logits/chosen": -0.6530877947807312, | |
| "logits/rejected": 0.1812860369682312, | |
| "logps/chosen": -95.26190185546875, | |
| "logps/rejected": -286.047607421875, | |
| "loss": 0.1169525146484375, | |
| "memory(GiB)": 78.11, | |
| "nll_loss": 0.1113978773355484, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 21.404762268066406, | |
| "rewards/margins": 30.095237731933594, | |
| "rewards/rejected": -8.691964149475098, | |
| "step": 120, | |
| "train_speed(iter/s)": 0.025517 | |
| }, | |
| { | |
| "epoch": 1.0680851063829788, | |
| "grad_norm": 0.266865998506546, | |
| "learning_rate": 2.2984760074517883e-06, | |
| "logits/chosen": -0.744921863079071, | |
| "logits/rejected": 0.12298583984375, | |
| "logps/chosen": -91.2249984741211, | |
| "logps/rejected": -293.79998779296875, | |
| "loss": 0.10560760498046876, | |
| "memory(GiB)": 78.11, | |
| "nll_loss": 0.10561523586511612, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 21.450000762939453, | |
| "rewards/margins": 30.8125, | |
| "rewards/rejected": -9.362500190734863, | |
| "step": 125, | |
| "train_speed(iter/s)": 0.025403 | |
| }, | |
| { | |
| "epoch": 1.1106382978723404, | |
| "grad_norm": 0.24889616668224335, | |
| "learning_rate": 2.2377114042996625e-06, | |
| "logits/chosen": -0.736035168170929, | |
| "logits/rejected": 0.17665405571460724, | |
| "logps/chosen": -87.07499694824219, | |
| "logps/rejected": -295.8999938964844, | |
| "loss": 0.10625762939453125, | |
| "memory(GiB)": 78.11, | |
| "nll_loss": 0.10622558742761612, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 20.903125762939453, | |
| "rewards/margins": 30.087499618530273, | |
| "rewards/rejected": -9.206250190734863, | |
| "step": 130, | |
| "train_speed(iter/s)": 0.025476 | |
| }, | |
| { | |
| "epoch": 1.1531914893617021, | |
| "grad_norm": 0.2701774835586548, | |
| "learning_rate": 2.17530561722651e-06, | |
| "logits/chosen": -0.8929687738418579, | |
| "logits/rejected": 0.006848144344985485, | |
| "logps/chosen": -84.75, | |
| "logps/rejected": -301.04998779296875, | |
| "loss": 0.099554443359375, | |
| "memory(GiB)": 78.11, | |
| "nll_loss": 0.09956054389476776, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 21.587499618530273, | |
| "rewards/margins": 31.524999618530273, | |
| "rewards/rejected": -9.928125381469727, | |
| "step": 135, | |
| "train_speed(iter/s)": 0.02541 | |
| }, | |
| { | |
| "epoch": 1.195744680851064, | |
| "grad_norm": 0.27444973587989807, | |
| "learning_rate": 2.1113974801634947e-06, | |
| "logits/chosen": -0.9662109613418579, | |
| "logits/rejected": 0.02514800988137722, | |
| "logps/chosen": -83.9749984741211, | |
| "logps/rejected": -317.3999938964844, | |
| "loss": 0.0990264892578125, | |
| "memory(GiB)": 78.11, | |
| "nll_loss": 0.09904785454273224, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 21.65625, | |
| "rewards/margins": 31.981250762939453, | |
| "rewards/rejected": -10.328125, | |
| "step": 140, | |
| "train_speed(iter/s)": 0.025373 | |
| }, | |
| { | |
| "epoch": 1.2382978723404254, | |
| "grad_norm": 0.29677021503448486, | |
| "learning_rate": 2.046129169314426e-06, | |
| "logits/chosen": -1.015039086341858, | |
| "logits/rejected": -0.07387695461511612, | |
| "logps/chosen": -83.88749694824219, | |
| "logps/rejected": -319.5, | |
| "loss": 0.0983306884765625, | |
| "memory(GiB)": 78.11, | |
| "nll_loss": 0.09833984076976776, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 21.225000381469727, | |
| "rewards/margins": 31.84375, | |
| "rewards/rejected": -10.637499809265137, | |
| "step": 145, | |
| "train_speed(iter/s)": 0.025449 | |
| }, | |
| { | |
| "epoch": 1.2808510638297872, | |
| "grad_norm": 0.2784733474254608, | |
| "learning_rate": 1.979645886856868e-06, | |
| "logits/chosen": -1.034570336341858, | |
| "logits/rejected": -0.11180724948644638, | |
| "logps/chosen": -82.82499694824219, | |
| "logps/rejected": -329.3999938964844, | |
| "loss": 0.09817352294921874, | |
| "memory(GiB)": 78.11, | |
| "nll_loss": 0.09816894680261612, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 21.762500762939453, | |
| "rewards/margins": 32.525001525878906, | |
| "rewards/rejected": -10.774999618530273, | |
| "step": 150, | |
| "train_speed(iter/s)": 0.025502 | |
| }, | |
| { | |
| "epoch": 1.323404255319149, | |
| "grad_norm": 0.30967381596565247, | |
| "learning_rate": 1.9120955379113745e-06, | |
| "logits/chosen": -1.0148437023162842, | |
| "logits/rejected": -0.06700439751148224, | |
| "logps/chosen": -80.7874984741211, | |
| "logps/rejected": -321.0, | |
| "loss": 0.0952880859375, | |
| "memory(GiB)": 78.11, | |
| "nll_loss": 0.09526367485523224, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 20.943750381469727, | |
| "rewards/margins": 31.568750381469727, | |
| "rewards/rejected": -10.643750190734863, | |
| "step": 155, | |
| "train_speed(iter/s)": 0.025434 | |
| }, | |
| { | |
| "epoch": 1.3659574468085105, | |
| "grad_norm": 0.28575485944747925, | |
| "learning_rate": 1.843628401497495e-06, | |
| "logits/chosen": -1.0359375476837158, | |
| "logits/rejected": -0.08933410793542862, | |
| "logps/chosen": -82.32499694824219, | |
| "logps/rejected": -332.3999938964844, | |
| "loss": 0.09707794189453126, | |
| "memory(GiB)": 78.11, | |
| "nll_loss": 0.09709472954273224, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 22.137500762939453, | |
| "rewards/margins": 33.33124923706055, | |
| "rewards/rejected": -11.215624809265137, | |
| "step": 160, | |
| "train_speed(iter/s)": 0.02552 | |
| }, | |
| { | |
| "epoch": 1.4085106382978723, | |
| "grad_norm": 0.30471956729888916, | |
| "learning_rate": 1.7743967962085799e-06, | |
| "logits/chosen": -1.074609398841858, | |
| "logits/rejected": -0.05779419094324112, | |
| "logps/chosen": -77.69999694824219, | |
| "logps/rejected": -341.1000061035156, | |
| "loss": 0.0906494140625, | |
| "memory(GiB)": 78.11, | |
| "nll_loss": 0.09067382663488388, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 22.637500762939453, | |
| "rewards/margins": 34.0625, | |
| "rewards/rejected": -11.471875190734863, | |
| "step": 165, | |
| "train_speed(iter/s)": 0.025578 | |
| }, | |
| { | |
| "epoch": 1.451063829787234, | |
| "grad_norm": 0.2834922671318054, | |
| "learning_rate": 1.7045547413491502e-06, | |
| "logits/chosen": -1.1212890148162842, | |
| "logits/rejected": -0.20931701362133026, | |
| "logps/chosen": -79.4625015258789, | |
| "logps/rejected": -334.29998779296875, | |
| "loss": 0.09400634765625, | |
| "memory(GiB)": 78.11, | |
| "nll_loss": 0.09396972507238388, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 21.221874237060547, | |
| "rewards/margins": 32.66875076293945, | |
| "rewards/rejected": -11.440625190734863, | |
| "step": 170, | |
| "train_speed(iter/s)": 0.025547 | |
| }, | |
| { | |
| "epoch": 1.4936170212765958, | |
| "grad_norm": 0.33174580335617065, | |
| "learning_rate": 1.6342576142887001e-06, | |
| "logits/chosen": -1.1828124523162842, | |
| "logits/rejected": -0.18878021836280823, | |
| "logps/chosen": -75.125, | |
| "logps/rejected": -336.8999938964844, | |
| "loss": 0.09259796142578125, | |
| "memory(GiB)": 78.11, | |
| "nll_loss": 0.09261474758386612, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 22.381250381469727, | |
| "rewards/margins": 34.01874923706055, | |
| "rewards/rejected": -11.65625, | |
| "step": 175, | |
| "train_speed(iter/s)": 0.025586 | |
| }, | |
| { | |
| "epoch": 1.5361702127659576, | |
| "grad_norm": 0.3023318648338318, | |
| "learning_rate": 1.5636618047942224e-06, | |
| "logits/chosen": -1.2580077648162842, | |
| "logits/rejected": -0.2625732421875, | |
| "logps/chosen": -75.9749984741211, | |
| "logps/rejected": -345.70001220703125, | |
| "loss": 0.08780364990234375, | |
| "memory(GiB)": 78.11, | |
| "nll_loss": 0.08784179389476776, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 22.193750381469727, | |
| "rewards/margins": 34.10625076293945, | |
| "rewards/rejected": -11.934374809265137, | |
| "step": 180, | |
| "train_speed(iter/s)": 0.025549 | |
| }, | |
| { | |
| "epoch": 1.578723404255319, | |
| "grad_norm": 0.3265296220779419, | |
| "learning_rate": 1.492924367110452e-06, | |
| "logits/chosen": -1.228124976158142, | |
| "logits/rejected": -0.18481139838695526, | |
| "logps/chosen": -72.94999694824219, | |
| "logps/rejected": -332.3999938964844, | |
| "loss": 0.0884490966796875, | |
| "memory(GiB)": 78.11, | |
| "nll_loss": 0.08845214545726776, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 21.912500381469727, | |
| "rewards/margins": 33.243751525878906, | |
| "rewards/rejected": -11.318750381469727, | |
| "step": 185, | |
| "train_speed(iter/s)": 0.025616 | |
| }, | |
| { | |
| "epoch": 1.6212765957446809, | |
| "grad_norm": 0.3522721529006958, | |
| "learning_rate": 1.4222026705618485e-06, | |
| "logits/chosen": -1.308984398841858, | |
| "logits/rejected": -0.3495544493198395, | |
| "logps/chosen": -82.75, | |
| "logps/rejected": -339.0, | |
| "loss": 0.09106903076171875, | |
| "memory(GiB)": 78.11, | |
| "nll_loss": 0.09104003757238388, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 22.306249618530273, | |
| "rewards/margins": 33.86249923706055, | |
| "rewards/rejected": -11.59375, | |
| "step": 190, | |
| "train_speed(iter/s)": 0.025589 | |
| }, | |
| { | |
| "epoch": 1.6638297872340426, | |
| "grad_norm": 0.3345926105976105, | |
| "learning_rate": 1.3516540494536255e-06, | |
| "logits/chosen": -1.242578148841858, | |
| "logits/rejected": -0.24477234482765198, | |
| "logps/chosen": -80.61250305175781, | |
| "logps/rejected": -340.1000061035156, | |
| "loss": 0.0941619873046875, | |
| "memory(GiB)": 78.11, | |
| "nll_loss": 0.0941162109375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 22.962499618530273, | |
| "rewards/margins": 34.537498474121094, | |
| "rewards/rejected": -11.559374809265137, | |
| "step": 195, | |
| "train_speed(iter/s)": 0.025589 | |
| }, | |
| { | |
| "epoch": 1.7063829787234042, | |
| "grad_norm": 0.28689754009246826, | |
| "learning_rate": 1.281435453050683e-06, | |
| "logits/chosen": -1.349609375, | |
| "logits/rejected": -0.32838135957717896, | |
| "logps/chosen": -75.3375015258789, | |
| "logps/rejected": -342.29998779296875, | |
| "loss": 0.08746185302734374, | |
| "memory(GiB)": 78.11, | |
| "nll_loss": 0.08747558295726776, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 22.237499237060547, | |
| "rewards/margins": 34.03125, | |
| "rewards/rejected": -11.774999618530273, | |
| "step": 200, | |
| "train_speed(iter/s)": 0.025601 | |
| }, | |
| { | |
| "epoch": 1.7063829787234042, | |
| "eval_logits/chosen": -1.2234375476837158, | |
| "eval_logits/rejected": -0.24904784560203552, | |
| "eval_logps/chosen": -88.7249984741211, | |
| "eval_logps/rejected": -361.3999938964844, | |
| "eval_loss": 0.09561848640441895, | |
| "eval_nll_loss": 0.10009765625, | |
| "eval_rewards/accuracies": 1.0, | |
| "eval_rewards/chosen": 22.875, | |
| "eval_rewards/margins": 35.17499923706055, | |
| "eval_rewards/rejected": -12.3125, | |
| "eval_runtime": 16.8954, | |
| "eval_samples_per_second": 4.439, | |
| "eval_steps_per_second": 0.592, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.748936170212766, | |
| "grad_norm": 0.3166359066963196, | |
| "learning_rate": 1.211703096413141e-06, | |
| "logits/chosen": -1.329687476158142, | |
| "logits/rejected": -0.3259124755859375, | |
| "logps/chosen": -77.3125, | |
| "logps/rejected": -346.0, | |
| "loss": 0.0876129150390625, | |
| "memory(GiB)": 78.11, | |
| "nll_loss": 0.08759765326976776, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 22.806249618530273, | |
| "rewards/margins": 34.587501525878906, | |
| "rewards/rejected": -11.800000190734863, | |
| "step": 205, | |
| "train_speed(iter/s)": 0.025467 | |
| }, | |
| { | |
| "epoch": 1.7914893617021277, | |
| "grad_norm": 0.3028061091899872, | |
| "learning_rate": 1.1426121128652528e-06, | |
| "logits/chosen": -1.3445312976837158, | |
| "logits/rejected": -0.32697755098342896, | |
| "logps/chosen": -74.69999694824219, | |
| "logps/rejected": -338.3999938964844, | |
| "loss": 0.08412704467773438, | |
| "memory(GiB)": 78.11, | |
| "nll_loss": 0.08408202975988388, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 21.924999237060547, | |
| "rewards/margins": 33.65625, | |
| "rewards/rejected": -11.725000381469727, | |
| "step": 210, | |
| "train_speed(iter/s)": 0.025437 | |
| }, | |
| { | |
| "epoch": 1.8340425531914892, | |
| "grad_norm": 0.3295106291770935, | |
| "learning_rate": 1.0743162088708549e-06, | |
| "logits/chosen": -1.3125, | |
| "logits/rejected": -0.33466798067092896, | |
| "logps/chosen": -72.98750305175781, | |
| "logps/rejected": -345.0, | |
| "loss": 0.08744964599609376, | |
| "memory(GiB)": 78.11, | |
| "nll_loss": 0.08747558295726776, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 22.737499237060547, | |
| "rewards/margins": 34.631248474121094, | |
| "rewards/rejected": -11.878125190734863, | |
| "step": 215, | |
| "train_speed(iter/s)": 0.025475 | |
| }, | |
| { | |
| "epoch": 1.8765957446808512, | |
| "grad_norm": 0.35723093152046204, | |
| "learning_rate": 1.006967322083147e-06, | |
| "logits/chosen": -1.4054687023162842, | |
| "logits/rejected": -0.357666015625, | |
| "logps/chosen": -67.86250305175781, | |
| "logps/rejected": -345.5, | |
| "loss": 0.08179931640625, | |
| "memory(GiB)": 86.43, | |
| "nll_loss": 0.08175048977136612, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 22.371875762939453, | |
| "rewards/margins": 34.068748474121094, | |
| "rewards/rejected": -11.684374809265137, | |
| "step": 220, | |
| "train_speed(iter/s)": 0.025478 | |
| }, | |
| { | |
| "epoch": 1.9191489361702128, | |
| "grad_norm": 0.31711524724960327, | |
| "learning_rate": 9.407152833295372e-07, | |
| "logits/chosen": -1.478515625, | |
| "logits/rejected": -0.43549805879592896, | |
| "logps/chosen": -71.625, | |
| "logps/rejected": -349.5, | |
| "loss": 0.08211593627929688, | |
| "memory(GiB)": 86.43, | |
| "nll_loss": 0.08212890475988388, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 22.71875, | |
| "rewards/margins": 34.82500076293945, | |
| "rewards/rejected": -12.143750190734863, | |
| "step": 225, | |
| "train_speed(iter/s)": 0.025434 | |
| }, | |
| { | |
| "epoch": 1.9617021276595743, | |
| "grad_norm": 0.33292317390441895, | |
| "learning_rate": 8.757074832835386e-07, | |
| "logits/chosen": -1.5011718273162842, | |
| "logits/rejected": -0.40864259004592896, | |
| "logps/chosen": -72.36250305175781, | |
| "logps/rejected": -353.0, | |
| "loss": 0.081207275390625, | |
| "memory(GiB)": 86.43, | |
| "nll_loss": 0.08120117336511612, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 23.90625, | |
| "rewards/margins": 35.91875076293945, | |
| "rewards/rejected": -12.018750190734863, | |
| "step": 230, | |
| "train_speed(iter/s)": 0.025453 | |
| }, | |
| { | |
| "epoch": 2.008510638297872, | |
| "grad_norm": 0.32713377475738525, | |
| "learning_rate": 8.12088544565264e-07, | |
| "logits/chosen": -1.4508928060531616, | |
| "logits/rejected": -0.4289202094078064, | |
| "logps/chosen": -65.23809814453125, | |
| "logps/rejected": -349.047607421875, | |
| "loss": 0.08363189697265624, | |
| "memory(GiB)": 86.43, | |
| "nll_loss": 0.0796247199177742, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 21.845237731933594, | |
| "rewards/margins": 34.005950927734375, | |
| "rewards/rejected": -12.142857551574707, | |
| "step": 235, | |
| "train_speed(iter/s)": 0.025437 | |
| }, | |
| { | |
| "epoch": 2.051063829787234, | |
| "grad_norm": 0.2999580204486847, | |
| "learning_rate": 7.500000000000003e-07, | |
| "logits/chosen": -1.605078101158142, | |
| "logits/rejected": -0.5160156488418579, | |
| "logps/chosen": -58.01250076293945, | |
| "logps/rejected": -359.3999938964844, | |
| "loss": 0.06568679809570313, | |
| "memory(GiB)": 86.43, | |
| "nll_loss": 0.06569824367761612, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 23.456249237060547, | |
| "rewards/margins": 35.88750076293945, | |
| "rewards/rejected": -12.465624809265137, | |
| "step": 240, | |
| "train_speed(iter/s)": 0.025403 | |
| }, | |
| { | |
| "epoch": 2.0936170212765957, | |
| "grad_norm": 0.3553922772407532, | |
| "learning_rate": 6.895799777506399e-07, | |
| "logits/chosen": -1.6183593273162842, | |
| "logits/rejected": -0.5166015625, | |
| "logps/chosen": -61.787498474121094, | |
| "logps/rejected": -365.5, | |
| "loss": 0.07020797729492187, | |
| "memory(GiB)": 86.43, | |
| "nll_loss": 0.07022704929113388, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 22.4375, | |
| "rewards/margins": 35.25, | |
| "rewards/rejected": -12.784375190734863, | |
| "step": 245, | |
| "train_speed(iter/s)": 0.025384 | |
| }, | |
| { | |
| "epoch": 2.1361702127659576, | |
| "grad_norm": 0.30416449904441833, | |
| "learning_rate": 6.3096289402445e-07, | |
| "logits/chosen": -1.5906250476837158, | |
| "logits/rejected": -0.5077148675918579, | |
| "logps/chosen": -57.6875, | |
| "logps/rejected": -379.20001220703125, | |
| "loss": 0.06732330322265626, | |
| "memory(GiB)": 86.43, | |
| "nll_loss": 0.06728515774011612, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 22.893749237060547, | |
| "rewards/margins": 36.212501525878906, | |
| "rewards/rejected": -13.306249618530273, | |
| "step": 250, | |
| "train_speed(iter/s)": 0.025433 | |
| }, | |
| { | |
| "epoch": 2.178723404255319, | |
| "grad_norm": 0.29929059743881226, | |
| "learning_rate": 5.742791540378176e-07, | |
| "logits/chosen": -1.740234375, | |
| "logits/rejected": -0.644824206829071, | |
| "logps/chosen": -60.212501525878906, | |
| "logps/rejected": -368.1000061035156, | |
| "loss": 0.0677215576171875, | |
| "memory(GiB)": 86.43, | |
| "nll_loss": 0.06768798828125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 22.662500381469727, | |
| "rewards/margins": 35.6875, | |
| "rewards/rejected": -13.021875381469727, | |
| "step": 255, | |
| "train_speed(iter/s)": 0.025403 | |
| }, | |
| { | |
| "epoch": 2.2212765957446807, | |
| "grad_norm": 0.3583555817604065, | |
| "learning_rate": 5.196548619042311e-07, | |
| "logits/chosen": -1.7507812976837158, | |
| "logits/rejected": -0.62744140625, | |
| "logps/chosen": -57.537498474121094, | |
| "logps/rejected": -370.5, | |
| "loss": 0.06677093505859374, | |
| "memory(GiB)": 86.43, | |
| "nll_loss": 0.06679687649011612, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 22.850000381469727, | |
| "rewards/margins": 36.04999923706055, | |
| "rewards/rejected": -13.178125381469727, | |
| "step": 260, | |
| "train_speed(iter/s)": 0.025386 | |
| }, | |
| { | |
| "epoch": 2.2638297872340427, | |
| "grad_norm": 0.35006001591682434, | |
| "learning_rate": 4.672115400909117e-07, | |
| "logits/chosen": -1.760156273841858, | |
| "logits/rejected": -0.6236327886581421, | |
| "logps/chosen": -55.13750076293945, | |
| "logps/rejected": -373.20001220703125, | |
| "loss": 0.0646148681640625, | |
| "memory(GiB)": 86.43, | |
| "nll_loss": 0.06462402641773224, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 22.743749618530273, | |
| "rewards/margins": 35.89374923706055, | |
| "rewards/rejected": -13.184374809265137, | |
| "step": 265, | |
| "train_speed(iter/s)": 0.02541 | |
| }, | |
| { | |
| "epoch": 2.3063829787234043, | |
| "grad_norm": 0.3760732412338257, | |
| "learning_rate": 4.170658590682134e-07, | |
| "logits/chosen": -1.755859375, | |
| "logits/rejected": -0.6622070074081421, | |
| "logps/chosen": -54.17499923706055, | |
| "logps/rejected": -383.8999938964844, | |
| "loss": 0.06567840576171875, | |
| "memory(GiB)": 86.43, | |
| "nll_loss": 0.06573486328125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 23.168750762939453, | |
| "rewards/margins": 36.662498474121094, | |
| "rewards/rejected": -13.5, | |
| "step": 270, | |
| "train_speed(iter/s)": 0.025453 | |
| }, | |
| { | |
| "epoch": 2.348936170212766, | |
| "grad_norm": 0.32026490569114685, | |
| "learning_rate": 3.6932937775324586e-07, | |
| "logits/chosen": -1.851953148841858, | |
| "logits/rejected": -0.682812511920929, | |
| "logps/chosen": -53.837501525878906, | |
| "logps/rejected": -378.70001220703125, | |
| "loss": 0.063421630859375, | |
| "memory(GiB)": 86.43, | |
| "nll_loss": 0.06342773139476776, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 23.049999237060547, | |
| "rewards/margins": 36.58124923706055, | |
| "rewards/rejected": -13.506250381469727, | |
| "step": 275, | |
| "train_speed(iter/s)": 0.02545 | |
| }, | |
| { | |
| "epoch": 2.391489361702128, | |
| "grad_norm": 0.3174494504928589, | |
| "learning_rate": 3.2410829532515156e-07, | |
| "logits/chosen": -1.8175780773162842, | |
| "logits/rejected": -0.7105468511581421, | |
| "logps/chosen": -59.625, | |
| "logps/rejected": -367.0, | |
| "loss": 0.0679901123046875, | |
| "memory(GiB)": 86.43, | |
| "nll_loss": 0.06796874850988388, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 23.868749618530273, | |
| "rewards/margins": 37.25, | |
| "rewards/rejected": -13.409375190734863, | |
| "step": 280, | |
| "train_speed(iter/s)": 0.025469 | |
| }, | |
| { | |
| "epoch": 2.4340425531914893, | |
| "grad_norm": 0.33984753489494324, | |
| "learning_rate": 2.8150321496417134e-07, | |
| "logits/chosen": -1.87109375, | |
| "logits/rejected": -0.7925781011581421, | |
| "logps/chosen": -59.212501525878906, | |
| "logps/rejected": -390.3999938964844, | |
| "loss": 0.06616058349609374, | |
| "memory(GiB)": 86.43, | |
| "nll_loss": 0.06617431342601776, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 23.549999237060547, | |
| "rewards/margins": 37.57500076293945, | |
| "rewards/rejected": -14.046875, | |
| "step": 285, | |
| "train_speed(iter/s)": 0.025429 | |
| }, | |
| { | |
| "epoch": 2.476595744680851, | |
| "grad_norm": 0.3459691107273102, | |
| "learning_rate": 2.4160892004010924e-07, | |
| "logits/chosen": -1.845703125, | |
| "logits/rejected": -0.737109363079071, | |
| "logps/chosen": -54.82500076293945, | |
| "logps/rejected": -380.3999938964844, | |
| "loss": 0.06555709838867188, | |
| "memory(GiB)": 86.43, | |
| "nll_loss": 0.06560058891773224, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 23.674999237060547, | |
| "rewards/margins": 37.45000076293945, | |
| "rewards/rejected": -13.78125, | |
| "step": 290, | |
| "train_speed(iter/s)": 0.025456 | |
| }, | |
| { | |
| "epoch": 2.519148936170213, | |
| "grad_norm": 0.33621102571487427, | |
| "learning_rate": 2.0451416324810927e-07, | |
| "logits/chosen": -1.851171851158142, | |
| "logits/rejected": -0.726367175579071, | |
| "logps/chosen": -55.17499923706055, | |
| "logps/rejected": -389.0, | |
| "loss": 0.06555557250976562, | |
| "memory(GiB)": 86.43, | |
| "nll_loss": 0.06556396186351776, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 22.631250381469727, | |
| "rewards/margins": 36.45000076293945, | |
| "rewards/rejected": -13.834375381469727, | |
| "step": 295, | |
| "train_speed(iter/s)": 0.025462 | |
| }, | |
| { | |
| "epoch": 2.5617021276595744, | |
| "grad_norm": 0.3450789153575897, | |
| "learning_rate": 1.7030146916085187e-07, | |
| "logits/chosen": -1.864843726158142, | |
| "logits/rejected": -0.7413085699081421, | |
| "logps/chosen": -53.76250076293945, | |
| "logps/rejected": -380.79998779296875, | |
| "loss": 0.0641754150390625, | |
| "memory(GiB)": 86.43, | |
| "nll_loss": 0.06414794921875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 22.737499237060547, | |
| "rewards/margins": 36.493751525878906, | |
| "rewards/rejected": -13.712499618530273, | |
| "step": 300, | |
| "train_speed(iter/s)": 0.025502 | |
| }, | |
| { | |
| "epoch": 2.5617021276595744, | |
| "eval_logits/chosen": -1.7765624523162842, | |
| "eval_logits/rejected": -0.6953125, | |
| "eval_logps/chosen": -78.2750015258789, | |
| "eval_logps/rejected": -407.79998779296875, | |
| "eval_loss": 0.08268880099058151, | |
| "eval_nll_loss": 0.08737792819738388, | |
| "eval_rewards/accuracies": 1.0, | |
| "eval_rewards/chosen": 23.412500381469727, | |
| "eval_rewards/margins": 38.0, | |
| "eval_rewards/rejected": -14.606249809265137, | |
| "eval_runtime": 17.0595, | |
| "eval_samples_per_second": 4.396, | |
| "eval_steps_per_second": 0.586, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.604255319148936, | |
| "grad_norm": 0.3593633770942688, | |
| "learning_rate": 1.3904695063643336e-07, | |
| "logits/chosen": -1.878515601158142, | |
| "logits/rejected": -0.77197265625, | |
| "logps/chosen": -57.875, | |
| "logps/rejected": -386.3999938964844, | |
| "loss": 0.064154052734375, | |
| "memory(GiB)": 86.43, | |
| "nll_loss": 0.06413574516773224, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 23.649999618530273, | |
| "rewards/margins": 37.650001525878906, | |
| "rewards/rejected": -13.984375, | |
| "step": 305, | |
| "train_speed(iter/s)": 0.025388 | |
| }, | |
| { | |
| "epoch": 2.646808510638298, | |
| "grad_norm": 0.36554569005966187, | |
| "learning_rate": 1.1082013949036119e-07, | |
| "logits/chosen": -1.890625, | |
| "logits/rejected": -0.768261730670929, | |
| "logps/chosen": -59.54999923706055, | |
| "logps/rejected": -386.1000061035156, | |
| "loss": 0.06463623046875, | |
| "memory(GiB)": 86.43, | |
| "nll_loss": 0.06468506157398224, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 24.274999618530273, | |
| "rewards/margins": 38.20000076293945, | |
| "rewards/rejected": -13.925000190734863, | |
| "step": 310, | |
| "train_speed(iter/s)": 0.025368 | |
| }, | |
| { | |
| "epoch": 2.6893617021276595, | |
| "grad_norm": 0.3650209605693817, | |
| "learning_rate": 8.568383180837369e-08, | |
| "logits/chosen": -1.8703124523162842, | |
| "logits/rejected": -0.714648425579071, | |
| "logps/chosen": -53.162498474121094, | |
| "logps/rejected": -390.29998779296875, | |
| "loss": 0.062223052978515624, | |
| "memory(GiB)": 86.43, | |
| "nll_loss": 0.06223144382238388, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 23.568750381469727, | |
| "rewards/margins": 37.318748474121094, | |
| "rewards/rejected": -13.746874809265137, | |
| "step": 315, | |
| "train_speed(iter/s)": 0.025378 | |
| }, | |
| { | |
| "epoch": 2.731914893617021, | |
| "grad_norm": 0.37099653482437134, | |
| "learning_rate": 6.369394824421365e-08, | |
| "logits/chosen": -1.875390648841858, | |
| "logits/rejected": -0.8294922113418579, | |
| "logps/chosen": -60.73749923706055, | |
| "logps/rejected": -385.8999938964844, | |
| "loss": 0.06792373657226562, | |
| "memory(GiB)": 86.43, | |
| "nll_loss": 0.06791992485523224, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 23.037500381469727, | |
| "rewards/margins": 37.11249923706055, | |
| "rewards/rejected": -14.065625190734863, | |
| "step": 320, | |
| "train_speed(iter/s)": 0.02535 | |
| }, | |
| { | |
| "epoch": 2.774468085106383, | |
| "grad_norm": 0.35642552375793457, | |
| "learning_rate": 4.489940961314881e-08, | |
| "logits/chosen": -1.8425781726837158, | |
| "logits/rejected": -0.7470703125, | |
| "logps/chosen": -60.037498474121094, | |
| "logps/rejected": -395.1000061035156, | |
| "loss": 0.06998977661132813, | |
| "memory(GiB)": 86.43, | |
| "nll_loss": 0.06997070461511612, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 24.143749237060547, | |
| "rewards/margins": 37.962501525878906, | |
| "rewards/rejected": -13.831250190734863, | |
| "step": 325, | |
| "train_speed(iter/s)": 0.025386 | |
| }, | |
| { | |
| "epoch": 2.8170212765957445, | |
| "grad_norm": 0.32820969820022583, | |
| "learning_rate": 2.9342028058009896e-08, | |
| "logits/chosen": -1.870703101158142, | |
| "logits/rejected": -0.7451171875, | |
| "logps/chosen": -54.76250076293945, | |
| "logps/rejected": -387.29998779296875, | |
| "loss": 0.06422500610351563, | |
| "memory(GiB)": 86.43, | |
| "nll_loss": 0.06422118842601776, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 22.887500762939453, | |
| "rewards/margins": 36.88750076293945, | |
| "rewards/rejected": -14.028124809265137, | |
| "step": 330, | |
| "train_speed(iter/s)": 0.02541 | |
| }, | |
| { | |
| "epoch": 2.8595744680851065, | |
| "grad_norm": 0.35879823565483093, | |
| "learning_rate": 1.7056414029866018e-08, | |
| "logits/chosen": -1.8328125476837158, | |
| "logits/rejected": -0.7754882574081421, | |
| "logps/chosen": -53.400001525878906, | |
| "logps/rejected": -388.29998779296875, | |
| "loss": 0.064886474609375, | |
| "memory(GiB)": 86.43, | |
| "nll_loss": 0.06489257514476776, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 22.431249618530273, | |
| "rewards/margins": 36.58124923706055, | |
| "rewards/rejected": -14.153124809265137, | |
| "step": 335, | |
| "train_speed(iter/s)": 0.025437 | |
| }, | |
| { | |
| "epoch": 2.902127659574468, | |
| "grad_norm": 0.32765164971351624, | |
| "learning_rate": 8.069899290277683e-09, | |
| "logits/chosen": -1.848046898841858, | |
| "logits/rejected": -0.744140625, | |
| "logps/chosen": -54.67499923706055, | |
| "logps/rejected": -380.5, | |
| "loss": 0.0650665283203125, | |
| "memory(GiB)": 86.43, | |
| "nll_loss": 0.06510009616613388, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 22.681249618530273, | |
| "rewards/margins": 36.70000076293945, | |
| "rewards/rejected": -14.009374618530273, | |
| "step": 340, | |
| "train_speed(iter/s)": 0.025437 | |
| }, | |
| { | |
| "epoch": 2.94468085106383, | |
| "grad_norm": 0.370295912027359, | |
| "learning_rate": 2.4024761064254664e-09, | |
| "logits/chosen": -1.872656226158142, | |
| "logits/rejected": -0.7118164300918579, | |
| "logps/chosen": -57.849998474121094, | |
| "logps/rejected": -377.5, | |
| "loss": 0.06773147583007813, | |
| "memory(GiB)": 86.43, | |
| "nll_loss": 0.06773681938648224, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 23.737499237060547, | |
| "rewards/margins": 37.337501525878906, | |
| "rewards/rejected": -13.590624809265137, | |
| "step": 345, | |
| "train_speed(iter/s)": 0.02546 | |
| }, | |
| { | |
| "epoch": 2.9872340425531916, | |
| "grad_norm": 0.3416670560836792, | |
| "learning_rate": 6.675277438356054e-11, | |
| "logits/chosen": -1.8679687976837158, | |
| "logits/rejected": -0.7328125238418579, | |
| "logps/chosen": -56.537498474121094, | |
| "logps/rejected": -390.0, | |
| "loss": 0.06469192504882812, | |
| "memory(GiB)": 86.43, | |
| "nll_loss": 0.06467285007238388, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 22.943750381469727, | |
| "rewards/margins": 36.98749923706055, | |
| "rewards/rejected": -14.0625, | |
| "step": 350, | |
| "train_speed(iter/s)": 0.025452 | |
| }, | |
| { | |
| "epoch": 2.9957446808510637, | |
| "eval_logits/chosen": -1.7937500476837158, | |
| "eval_logits/rejected": -0.7124999761581421, | |
| "eval_logps/chosen": -77.80000305175781, | |
| "eval_logps/rejected": -408.6000061035156, | |
| "eval_loss": 0.08202473819255829, | |
| "eval_nll_loss": 0.08659668266773224, | |
| "eval_rewards/accuracies": 1.0, | |
| "eval_rewards/chosen": 23.399999618530273, | |
| "eval_rewards/margins": 38.087501525878906, | |
| "eval_rewards/rejected": -14.662500381469727, | |
| "eval_runtime": 17.0317, | |
| "eval_samples_per_second": 4.404, | |
| "eval_steps_per_second": 0.587, | |
| "step": 351 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 351, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.038030084192076e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |