Llama-Poro-2-8B-Instruct / trainer_state.json
jonabur's picture
Upload folder using huggingface_hub
a101b75 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 1446,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.002074688796680498,
"grad_norm": 8.200224942309267,
"learning_rate": 3.4482758620689654e-09,
"logits/chosen": -0.7109375,
"logits/rejected": -0.94140625,
"logps/chosen": -336.0,
"logps/rejected": -288.0,
"loss": 0.6914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.02074688796680498,
"grad_norm": 8.985547005814288,
"learning_rate": 3.448275862068965e-08,
"logits/chosen": -1.40625,
"logits/rejected": -1.46875,
"logps/chosen": -380.0,
"logps/rejected": -326.0,
"loss": 0.692,
"rewards/accuracies": 0.1111111119389534,
"rewards/chosen": -0.0011138916015625,
"rewards/margins": -0.00055694580078125,
"rewards/rejected": -0.00055694580078125,
"step": 10
},
{
"epoch": 0.04149377593360996,
"grad_norm": 7.537541071519504,
"learning_rate": 6.89655172413793e-08,
"logits/chosen": -1.4296875,
"logits/rejected": -1.4453125,
"logps/chosen": -328.0,
"logps/rejected": -366.0,
"loss": 0.6914,
"rewards/accuracies": 0.25,
"rewards/chosen": 0.000751495361328125,
"rewards/margins": -0.00225830078125,
"rewards/rejected": 0.0030059814453125,
"step": 20
},
{
"epoch": 0.06224066390041494,
"grad_norm": 8.46410953809254,
"learning_rate": 1.0344827586206897e-07,
"logits/chosen": -1.375,
"logits/rejected": -1.46875,
"logps/chosen": -420.0,
"logps/rejected": -374.0,
"loss": 0.692,
"rewards/accuracies": 0.20000000298023224,
"rewards/chosen": 0.00238037109375,
"rewards/margins": 0.001129150390625,
"rewards/rejected": 0.001251220703125,
"step": 30
},
{
"epoch": 0.08298755186721991,
"grad_norm": 8.157147362592257,
"learning_rate": 1.379310344827586e-07,
"logits/chosen": -1.453125,
"logits/rejected": -1.5078125,
"logps/chosen": -432.0,
"logps/rejected": -388.0,
"loss": 0.6911,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": 0.00250244140625,
"rewards/margins": 0.0052490234375,
"rewards/rejected": -0.00274658203125,
"step": 40
},
{
"epoch": 0.1037344398340249,
"grad_norm": 8.08592590477226,
"learning_rate": 1.7241379310344828e-07,
"logits/chosen": -1.40625,
"logits/rejected": -1.4296875,
"logps/chosen": -340.0,
"logps/rejected": -300.0,
"loss": 0.6909,
"rewards/accuracies": 0.3499999940395355,
"rewards/chosen": 0.0054931640625,
"rewards/margins": 0.00150299072265625,
"rewards/rejected": 0.003997802734375,
"step": 50
},
{
"epoch": 0.12448132780082988,
"grad_norm": 8.524351634398997,
"learning_rate": 2.0689655172413793e-07,
"logits/chosen": -1.4375,
"logits/rejected": -1.4375,
"logps/chosen": -506.0,
"logps/rejected": -478.0,
"loss": 0.6896,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": 0.02001953125,
"rewards/margins": 0.0030059814453125,
"rewards/rejected": 0.0169677734375,
"step": 60
},
{
"epoch": 0.14522821576763487,
"grad_norm": 7.5678704019111365,
"learning_rate": 2.413793103448276e-07,
"logits/chosen": -1.4296875,
"logits/rejected": -1.46875,
"logps/chosen": -386.0,
"logps/rejected": -262.0,
"loss": 0.6877,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": 0.028076171875,
"rewards/margins": 0.020263671875,
"rewards/rejected": 0.00775146484375,
"step": 70
},
{
"epoch": 0.16597510373443983,
"grad_norm": 7.513277000409331,
"learning_rate": 2.758620689655172e-07,
"logits/chosen": -1.3515625,
"logits/rejected": -1.375,
"logps/chosen": -320.0,
"logps/rejected": -312.0,
"loss": 0.6847,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": 0.029052734375,
"rewards/margins": 0.001739501953125,
"rewards/rejected": 0.0272216796875,
"step": 80
},
{
"epoch": 0.18672199170124482,
"grad_norm": 7.275100713194908,
"learning_rate": 3.103448275862069e-07,
"logits/chosen": -1.40625,
"logits/rejected": -1.4140625,
"logps/chosen": -452.0,
"logps/rejected": -404.0,
"loss": 0.6811,
"rewards/accuracies": 0.5,
"rewards/chosen": 0.062255859375,
"rewards/margins": 0.04150390625,
"rewards/rejected": 0.020751953125,
"step": 90
},
{
"epoch": 0.2074688796680498,
"grad_norm": 7.279491499537065,
"learning_rate": 3.4482758620689656e-07,
"logits/chosen": -1.46875,
"logits/rejected": -1.4921875,
"logps/chosen": -488.0,
"logps/rejected": -470.0,
"loss": 0.6749,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": 0.08154296875,
"rewards/margins": 0.04296875,
"rewards/rejected": 0.038818359375,
"step": 100
},
{
"epoch": 0.22821576763485477,
"grad_norm": 7.0878060086303085,
"learning_rate": 3.793103448275862e-07,
"logits/chosen": -1.4375,
"logits/rejected": -1.40625,
"logps/chosen": -346.0,
"logps/rejected": -350.0,
"loss": 0.6672,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.078125,
"rewards/margins": 0.041748046875,
"rewards/rejected": 0.03662109375,
"step": 110
},
{
"epoch": 0.24896265560165975,
"grad_norm": 6.836599327469131,
"learning_rate": 4.1379310344827586e-07,
"logits/chosen": -1.453125,
"logits/rejected": -1.453125,
"logps/chosen": -436.0,
"logps/rejected": -328.0,
"loss": 0.6642,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.12109375,
"rewards/margins": 0.09521484375,
"rewards/rejected": 0.0260009765625,
"step": 120
},
{
"epoch": 0.2697095435684647,
"grad_norm": 7.277406568232138,
"learning_rate": 4.482758620689655e-07,
"logits/chosen": -1.5078125,
"logits/rejected": -1.421875,
"logps/chosen": -368.0,
"logps/rejected": -350.0,
"loss": 0.6436,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": 0.0615234375,
"rewards/margins": 0.0908203125,
"rewards/rejected": -0.029052734375,
"step": 130
},
{
"epoch": 0.29045643153526973,
"grad_norm": 7.649612249719239,
"learning_rate": 4.827586206896552e-07,
"logits/chosen": -1.3984375,
"logits/rejected": -1.390625,
"logps/chosen": -362.0,
"logps/rejected": -322.0,
"loss": 0.6473,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": 0.046875,
"rewards/margins": 0.150390625,
"rewards/rejected": -0.103515625,
"step": 140
},
{
"epoch": 0.3112033195020747,
"grad_norm": 7.208353024534284,
"learning_rate": 4.99981778257793e-07,
"logits/chosen": -1.453125,
"logits/rejected": -1.4375,
"logps/chosen": -436.0,
"logps/rejected": -432.0,
"loss": 0.6378,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": -0.05517578125,
"rewards/margins": 0.0341796875,
"rewards/rejected": -0.0888671875,
"step": 150
},
{
"epoch": 0.33195020746887965,
"grad_norm": 8.231246609192565,
"learning_rate": 4.998360202572815e-07,
"logits/chosen": -1.3359375,
"logits/rejected": -1.3984375,
"logps/chosen": -372.0,
"logps/rejected": -376.0,
"loss": 0.6266,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 0.003570556640625,
"rewards/margins": 0.1337890625,
"rewards/rejected": -0.1298828125,
"step": 160
},
{
"epoch": 0.35269709543568467,
"grad_norm": 7.711663165069283,
"learning_rate": 4.995445892440316e-07,
"logits/chosen": -1.40625,
"logits/rejected": -1.2578125,
"logps/chosen": -388.0,
"logps/rejected": -434.0,
"loss": 0.631,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.1064453125,
"rewards/margins": 0.083984375,
"rewards/rejected": -0.1904296875,
"step": 170
},
{
"epoch": 0.37344398340248963,
"grad_norm": 8.152087874731409,
"learning_rate": 4.991076551440359e-07,
"logits/chosen": -1.2890625,
"logits/rejected": -1.2421875,
"logps/chosen": -450.0,
"logps/rejected": -448.0,
"loss": 0.6192,
"rewards/accuracies": 0.5,
"rewards/chosen": 0.021484375,
"rewards/margins": 0.1630859375,
"rewards/rejected": -0.1416015625,
"step": 180
},
{
"epoch": 0.3941908713692946,
"grad_norm": 8.294099174165812,
"learning_rate": 4.985254727224266e-07,
"logits/chosen": -1.2578125,
"logits/rejected": -1.21875,
"logps/chosen": -436.0,
"logps/rejected": -402.0,
"loss": 0.6019,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 0.146484375,
"rewards/margins": 0.232421875,
"rewards/rejected": -0.08642578125,
"step": 190
},
{
"epoch": 0.4149377593360996,
"grad_norm": 8.447421779277066,
"learning_rate": 4.977983814349285e-07,
"logits/chosen": -1.3125,
"logits/rejected": -1.359375,
"logps/chosen": -468.0,
"logps/rejected": -396.0,
"loss": 0.608,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.220703125,
"rewards/margins": 0.0311279296875,
"rewards/rejected": -0.251953125,
"step": 200
},
{
"epoch": 0.43568464730290457,
"grad_norm": 9.723114067333197,
"learning_rate": 4.969268052299307e-07,
"logits/chosen": -1.1796875,
"logits/rejected": -1.234375,
"logps/chosen": -356.0,
"logps/rejected": -366.0,
"loss": 0.5878,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.1279296875,
"rewards/margins": 0.29296875,
"rewards/rejected": -0.421875,
"step": 210
},
{
"epoch": 0.45643153526970953,
"grad_norm": 10.716738710931661,
"learning_rate": 4.959112523012938e-07,
"logits/chosen": -1.328125,
"logits/rejected": -1.359375,
"logps/chosen": -480.0,
"logps/rejected": -460.0,
"loss": 0.5766,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.328125,
"rewards/margins": 0.3671875,
"rewards/rejected": -0.6953125,
"step": 220
},
{
"epoch": 0.47717842323651455,
"grad_norm": 9.819110125640536,
"learning_rate": 4.947523147920345e-07,
"logits/chosen": -1.3203125,
"logits/rejected": -1.234375,
"logps/chosen": -548.0,
"logps/rejected": -450.0,
"loss": 0.5689,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.68359375,
"rewards/margins": 0.330078125,
"rewards/rejected": -1.015625,
"step": 230
},
{
"epoch": 0.4979253112033195,
"grad_norm": 11.406864477616395,
"learning_rate": 4.934506684490621e-07,
"logits/chosen": -1.2109375,
"logits/rejected": -1.1953125,
"logps/chosen": -448.0,
"logps/rejected": -442.0,
"loss": 0.5737,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.6796875,
"rewards/margins": 0.375,
"rewards/rejected": -1.0546875,
"step": 240
},
{
"epoch": 0.5186721991701245,
"grad_norm": 12.451204475119791,
"learning_rate": 4.920070722291682e-07,
"logits/chosen": -1.3515625,
"logits/rejected": -1.4140625,
"logps/chosen": -520.0,
"logps/rejected": -552.0,
"loss": 0.5527,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.8515625,
"rewards/margins": 0.7890625,
"rewards/rejected": -1.640625,
"step": 250
},
{
"epoch": 0.5394190871369294,
"grad_norm": 14.291210178551019,
"learning_rate": 4.904223678564975e-07,
"logits/chosen": -1.171875,
"logits/rejected": -1.1015625,
"logps/chosen": -498.0,
"logps/rejected": -450.0,
"loss": 0.5554,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.953125,
"rewards/margins": 0.32421875,
"rewards/rejected": -1.2734375,
"step": 260
},
{
"epoch": 0.5601659751037344,
"grad_norm": 13.863578096384135,
"learning_rate": 4.886974793317607e-07,
"logits/chosen": -1.171875,
"logits/rejected": -1.28125,
"logps/chosen": -516.0,
"logps/rejected": -532.0,
"loss": 0.5048,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.8203125,
"rewards/margins": 0.875,
"rewards/rejected": -1.6875,
"step": 270
},
{
"epoch": 0.5809128630705395,
"grad_norm": 12.53026425282876,
"learning_rate": 4.86833412393473e-07,
"logits/chosen": -1.0703125,
"logits/rejected": -1.1328125,
"logps/chosen": -432.0,
"logps/rejected": -452.0,
"loss": 0.5557,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.68359375,
"rewards/margins": 0.80078125,
"rewards/rejected": -1.4921875,
"step": 280
},
{
"epoch": 0.6016597510373444,
"grad_norm": 14.022892671657644,
"learning_rate": 4.848312539315334e-07,
"logits/chosen": -1.375,
"logits/rejected": -1.359375,
"logps/chosen": -488.0,
"logps/rejected": -454.0,
"loss": 0.5069,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.7890625,
"rewards/margins": 0.55859375,
"rewards/rejected": -1.34375,
"step": 290
},
{
"epoch": 0.6224066390041494,
"grad_norm": 16.89545464924121,
"learning_rate": 4.826921713534873e-07,
"logits/chosen": -1.2265625,
"logits/rejected": -1.28125,
"logps/chosen": -520.0,
"logps/rejected": -548.0,
"loss": 0.5104,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.67578125,
"rewards/margins": 0.74609375,
"rewards/rejected": -1.421875,
"step": 300
},
{
"epoch": 0.6431535269709544,
"grad_norm": 14.047474903550272,
"learning_rate": 4.804174119038404e-07,
"logits/chosen": -1.171875,
"logits/rejected": -1.15625,
"logps/chosen": -472.0,
"logps/rejected": -498.0,
"loss": 0.5325,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.640625,
"rewards/margins": 0.69140625,
"rewards/rejected": -1.328125,
"step": 310
},
{
"epoch": 0.6639004149377593,
"grad_norm": 11.967189628642249,
"learning_rate": 4.78008301936823e-07,
"logits/chosen": -1.2265625,
"logits/rejected": -1.1875,
"logps/chosen": -458.0,
"logps/rejected": -504.0,
"loss": 0.514,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.59375,
"rewards/margins": 0.734375,
"rewards/rejected": -1.328125,
"step": 320
},
{
"epoch": 0.6846473029045643,
"grad_norm": 13.820746186375771,
"learning_rate": 4.754662461430258e-07,
"logits/chosen": -1.3203125,
"logits/rejected": -1.328125,
"logps/chosen": -612.0,
"logps/rejected": -556.0,
"loss": 0.5133,
"rewards/accuracies": 0.75,
"rewards/chosen": -1.15625,
"rewards/margins": 0.69140625,
"rewards/rejected": -1.84375,
"step": 330
},
{
"epoch": 0.7053941908713693,
"grad_norm": 15.47060039648899,
"learning_rate": 4.727927267303612e-07,
"logits/chosen": -1.171875,
"logits/rejected": -1.09375,
"logps/chosen": -452.0,
"logps/rejected": -454.0,
"loss": 0.5407,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -1.265625,
"rewards/margins": 0.5234375,
"rewards/rejected": -1.7890625,
"step": 340
},
{
"epoch": 0.7261410788381742,
"grad_norm": 11.835480559229415,
"learning_rate": 4.699893025598255e-07,
"logits/chosen": -1.1875,
"logits/rejected": -1.1953125,
"logps/chosen": -484.0,
"logps/rejected": -490.0,
"loss": 0.5124,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -1.1796875,
"rewards/margins": 0.384765625,
"rewards/rejected": -1.5703125,
"step": 350
},
{
"epoch": 0.7468879668049793,
"grad_norm": 17.32570297851737,
"learning_rate": 4.67057608236567e-07,
"logits/chosen": -1.0703125,
"logits/rejected": -1.0,
"logps/chosen": -436.0,
"logps/rejected": -468.0,
"loss": 0.4606,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -1.234375,
"rewards/margins": 0.796875,
"rewards/rejected": -2.03125,
"step": 360
},
{
"epoch": 0.7676348547717843,
"grad_norm": 14.712913827549949,
"learning_rate": 4.6399935315678893e-07,
"logits/chosen": -1.0703125,
"logits/rejected": -1.0859375,
"logps/chosen": -552.0,
"logps/rejected": -498.0,
"loss": 0.4847,
"rewards/accuracies": 0.75,
"rewards/chosen": -1.78125,
"rewards/margins": 0.6328125,
"rewards/rejected": -2.40625,
"step": 370
},
{
"epoch": 0.7883817427385892,
"grad_norm": 15.253980446488892,
"learning_rate": 4.608163205110447e-07,
"logits/chosen": -1.3125,
"logits/rejected": -1.359375,
"logps/chosen": -544.0,
"logps/rejected": -572.0,
"loss": 0.4847,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.890625,
"rewards/margins": 0.765625,
"rewards/rejected": -1.65625,
"step": 380
},
{
"epoch": 0.8091286307053942,
"grad_norm": 14.996759932569487,
"learning_rate": 4.5751036624450445e-07,
"logits/chosen": -1.4140625,
"logits/rejected": -1.34375,
"logps/chosen": -552.0,
"logps/rejected": -540.0,
"loss": 0.4827,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -1.046875,
"rewards/margins": 0.375,
"rewards/rejected": -1.421875,
"step": 390
},
{
"epoch": 0.8298755186721992,
"grad_norm": 16.766050665345595,
"learning_rate": 4.540834179748012e-07,
"logits/chosen": -1.171875,
"logits/rejected": -1.25,
"logps/chosen": -420.0,
"logps/rejected": -528.0,
"loss": 0.4556,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -1.21875,
"rewards/margins": 0.60546875,
"rewards/rejected": -1.8203125,
"step": 400
},
{
"epoch": 0.8506224066390041,
"grad_norm": 19.558534780127147,
"learning_rate": 4.5053747386808564e-07,
"logits/chosen": -1.1953125,
"logits/rejected": -1.3203125,
"logps/chosen": -508.0,
"logps/rejected": -688.0,
"loss": 0.4707,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -1.3671875,
"rewards/margins": 0.9296875,
"rewards/rejected": -2.296875,
"step": 410
},
{
"epoch": 0.8713692946058091,
"grad_norm": 14.183474677606634,
"learning_rate": 4.4687460147394706e-07,
"logits/chosen": -1.3125,
"logits/rejected": -1.3515625,
"logps/chosen": -532.0,
"logps/rejected": -588.0,
"loss": 0.4869,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -1.1796875,
"rewards/margins": 0.73046875,
"rewards/rejected": -1.9140625,
"step": 420
},
{
"epoch": 0.8921161825726142,
"grad_norm": 13.143331743152638,
"learning_rate": 4.4309693651987726e-07,
"logits/chosen": -1.328125,
"logits/rejected": -1.28125,
"logps/chosen": -624.0,
"logps/rejected": -600.0,
"loss": 0.4787,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -1.1484375,
"rewards/margins": 1.0390625,
"rewards/rejected": -2.1875,
"step": 430
},
{
"epoch": 0.9128630705394191,
"grad_norm": 16.10745504932835,
"learning_rate": 4.3920668166598273e-07,
"logits/chosen": -1.3671875,
"logits/rejected": -1.40625,
"logps/chosen": -476.0,
"logps/rejected": -520.0,
"loss": 0.423,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -1.0703125,
"rewards/margins": 0.88671875,
"rewards/rejected": -1.9609375,
"step": 440
},
{
"epoch": 0.9336099585062241,
"grad_norm": 17.6229119736944,
"learning_rate": 4.352061052206695e-07,
"logits/chosen": -1.28125,
"logits/rejected": -1.2578125,
"logps/chosen": -580.0,
"logps/rejected": -644.0,
"loss": 0.4367,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -2.140625,
"rewards/margins": 0.88671875,
"rewards/rejected": -3.03125,
"step": 450
},
{
"epoch": 0.9543568464730291,
"grad_norm": 27.688196316137688,
"learning_rate": 4.3109753981805045e-07,
"logits/chosen": -1.3515625,
"logits/rejected": -1.3828125,
"logps/chosen": -696.0,
"logps/rejected": -692.0,
"loss": 0.4332,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -1.890625,
"rewards/margins": 1.3125,
"rewards/rejected": -3.1875,
"step": 460
},
{
"epoch": 0.975103734439834,
"grad_norm": 19.37927464187824,
"learning_rate": 4.2688338105784584e-07,
"logits/chosen": -1.203125,
"logits/rejected": -1.2890625,
"logps/chosen": -556.0,
"logps/rejected": -568.0,
"loss": 0.4428,
"rewards/accuracies": 0.75,
"rewards/chosen": -1.2890625,
"rewards/margins": 1.09375,
"rewards/rejected": -2.390625,
"step": 470
},
{
"epoch": 0.995850622406639,
"grad_norm": 16.81355148557565,
"learning_rate": 4.2256608610857014e-07,
"logits/chosen": -1.3984375,
"logits/rejected": -1.390625,
"logps/chosen": -604.0,
"logps/rejected": -576.0,
"loss": 0.4534,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -1.3828125,
"rewards/margins": 1.0390625,
"rewards/rejected": -2.421875,
"step": 480
},
{
"epoch": 1.016597510373444,
"grad_norm": 22.885104807982884,
"learning_rate": 4.181481722748197e-07,
"logits/chosen": -1.3671875,
"logits/rejected": -1.328125,
"logps/chosen": -446.0,
"logps/rejected": -548.0,
"loss": 0.3546,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -1.4375,
"rewards/margins": 1.0546875,
"rewards/rejected": -2.5,
"step": 490
},
{
"epoch": 1.037344398340249,
"grad_norm": 28.388093085355763,
"learning_rate": 4.136322155294968e-07,
"logits/chosen": -1.1875,
"logits/rejected": -1.3046875,
"logps/chosen": -656.0,
"logps/rejected": -740.0,
"loss": 0.3066,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -2.25,
"rewards/margins": 1.2890625,
"rewards/rejected": -3.53125,
"step": 500
},
{
"epoch": 1.058091286307054,
"grad_norm": 18.000547586432397,
"learning_rate": 4.090208490118253e-07,
"logits/chosen": -1.21875,
"logits/rejected": -1.3671875,
"logps/chosen": -792.0,
"logps/rejected": -820.0,
"loss": 0.3002,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -2.234375,
"rewards/margins": 1.640625,
"rewards/rejected": -3.859375,
"step": 510
},
{
"epoch": 1.0788381742738589,
"grad_norm": 26.116853952186087,
"learning_rate": 4.0431676149203457e-07,
"logits/chosen": -1.25,
"logits/rejected": -1.2421875,
"logps/chosen": -544.0,
"logps/rejected": -660.0,
"loss": 0.305,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -1.8828125,
"rewards/margins": 1.9453125,
"rewards/rejected": -3.828125,
"step": 520
},
{
"epoch": 1.099585062240664,
"grad_norm": 20.665661382654836,
"learning_rate": 3.995226958036058e-07,
"logits/chosen": -1.0625,
"logits/rejected": -1.125,
"logps/chosen": -628.0,
"logps/rejected": -728.0,
"loss": 0.282,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -2.53125,
"rewards/margins": 1.625,
"rewards/rejected": -4.15625,
"step": 530
},
{
"epoch": 1.120331950207469,
"grad_norm": 20.874625813560073,
"learning_rate": 3.9464144724399605e-07,
"logits/chosen": -1.1328125,
"logits/rejected": -1.109375,
"logps/chosen": -656.0,
"logps/rejected": -780.0,
"loss": 0.2842,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -1.9140625,
"rewards/margins": 2.1875,
"rewards/rejected": -4.09375,
"step": 540
},
{
"epoch": 1.1410788381742738,
"grad_norm": 15.838820640241945,
"learning_rate": 3.896758619447714e-07,
"logits/chosen": -1.1796875,
"logits/rejected": -1.21875,
"logps/chosen": -608.0,
"logps/rejected": -804.0,
"loss": 0.2805,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -2.484375,
"rewards/margins": 2.078125,
"rewards/rejected": -4.5625,
"step": 550
},
{
"epoch": 1.161825726141079,
"grad_norm": 16.332425845632645,
"learning_rate": 3.846288352121003e-07,
"logits/chosen": -1.3203125,
"logits/rejected": -1.265625,
"logps/chosen": -696.0,
"logps/rejected": -712.0,
"loss": 0.3048,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -2.765625,
"rewards/margins": 1.0234375,
"rewards/rejected": -3.78125,
"step": 560
},
{
"epoch": 1.1825726141078838,
"grad_norm": 17.929954538214847,
"learning_rate": 3.795033098385744e-07,
"logits/chosen": -1.28125,
"logits/rejected": -1.3125,
"logps/chosen": -620.0,
"logps/rejected": -832.0,
"loss": 0.2894,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -2.609375,
"rewards/margins": 1.71875,
"rewards/rejected": -4.34375,
"step": 570
},
{
"epoch": 1.2033195020746887,
"grad_norm": 32.79066665207477,
"learning_rate": 3.7430227438734086e-07,
"logits/chosen": -1.2265625,
"logits/rejected": -1.1875,
"logps/chosen": -656.0,
"logps/rejected": -764.0,
"loss": 0.2766,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -2.640625,
"rewards/margins": 1.609375,
"rewards/rejected": -4.25,
"step": 580
},
{
"epoch": 1.2240663900414939,
"grad_norm": 18.82509345910388,
"learning_rate": 3.690287614495481e-07,
"logits/chosen": -1.328125,
"logits/rejected": -1.3203125,
"logps/chosen": -740.0,
"logps/rejected": -920.0,
"loss": 0.2188,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -2.75,
"rewards/margins": 2.296875,
"rewards/rejected": -5.0625,
"step": 590
},
{
"epoch": 1.2448132780082988,
"grad_norm": 22.18730206624369,
"learning_rate": 3.6368584587611854e-07,
"logits/chosen": -1.2734375,
"logits/rejected": -1.3203125,
"logps/chosen": -692.0,
"logps/rejected": -920.0,
"loss": 0.2944,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -3.125,
"rewards/margins": 2.28125,
"rewards/rejected": -5.40625,
"step": 600
},
{
"epoch": 1.2655601659751037,
"grad_norm": 16.250841109003115,
"learning_rate": 3.582766429848818e-07,
"logits/chosen": -1.34375,
"logits/rejected": -1.375,
"logps/chosen": -652.0,
"logps/rejected": -756.0,
"loss": 0.2651,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -2.59375,
"rewards/margins": 1.75,
"rewards/rejected": -4.34375,
"step": 610
},
{
"epoch": 1.2863070539419086,
"grad_norm": 32.2790592968402,
"learning_rate": 3.528043067441123e-07,
"logits/chosen": -1.2265625,
"logits/rejected": -1.28125,
"logps/chosen": -516.0,
"logps/rejected": -724.0,
"loss": 0.2655,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -2.5,
"rewards/margins": 1.7734375,
"rewards/rejected": -4.28125,
"step": 620
},
{
"epoch": 1.3070539419087137,
"grad_norm": 21.008974688174074,
"learning_rate": 3.472720279335305e-07,
"logits/chosen": -1.3515625,
"logits/rejected": -1.3125,
"logps/chosen": -716.0,
"logps/rejected": -856.0,
"loss": 0.2614,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -3.0,
"rewards/margins": 1.8828125,
"rewards/rejected": -4.875,
"step": 630
},
{
"epoch": 1.3278008298755186,
"grad_norm": 24.817331119096536,
"learning_rate": 3.4168303228384097e-07,
"logits/chosen": -1.2578125,
"logits/rejected": -1.34375,
"logps/chosen": -744.0,
"logps/rejected": -880.0,
"loss": 0.2602,
"rewards/accuracies": 0.75,
"rewards/chosen": -2.984375,
"rewards/margins": 1.375,
"rewards/rejected": -4.375,
"step": 640
},
{
"epoch": 1.3485477178423237,
"grad_norm": 16.172899710422996,
"learning_rate": 3.36040578595891e-07,
"logits/chosen": -1.2109375,
"logits/rejected": -1.234375,
"logps/chosen": -704.0,
"logps/rejected": -928.0,
"loss": 0.2566,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -2.8125,
"rewards/margins": 2.09375,
"rewards/rejected": -4.90625,
"step": 650
},
{
"epoch": 1.3692946058091287,
"grad_norm": 28.674908041856455,
"learning_rate": 3.303479568405467e-07,
"logits/chosen": -1.2578125,
"logits/rejected": -1.34375,
"logps/chosen": -692.0,
"logps/rejected": -712.0,
"loss": 0.248,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -2.59375,
"rewards/margins": 1.40625,
"rewards/rejected": -4.0,
"step": 660
},
{
"epoch": 1.3900414937759336,
"grad_norm": 18.929031877783473,
"learning_rate": 3.246084862403949e-07,
"logits/chosen": -1.0546875,
"logits/rejected": -1.1875,
"logps/chosen": -724.0,
"logps/rejected": -856.0,
"loss": 0.2285,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -2.609375,
"rewards/margins": 2.390625,
"rewards/rejected": -5.0,
"step": 670
},
{
"epoch": 1.4107883817427385,
"grad_norm": 27.863022449197903,
"learning_rate": 3.188255133343896e-07,
"logits/chosen": -1.3359375,
"logits/rejected": -1.390625,
"logps/chosen": -744.0,
"logps/rejected": -1012.0,
"loss": 0.2616,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -3.328125,
"rewards/margins": 2.359375,
"rewards/rejected": -5.6875,
"step": 680
},
{
"epoch": 1.4315352697095436,
"grad_norm": 20.6381381654102,
"learning_rate": 3.1300241002656964e-07,
"logits/chosen": -1.2578125,
"logits/rejected": -1.3125,
"logps/chosen": -744.0,
"logps/rejected": -932.0,
"loss": 0.2343,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -3.046875,
"rewards/margins": 2.515625,
"rewards/rejected": -5.5625,
"step": 690
},
{
"epoch": 1.4522821576763485,
"grad_norm": 21.416162109964308,
"learning_rate": 3.071425716199882e-07,
"logits/chosen": -1.265625,
"logits/rejected": -1.3203125,
"logps/chosen": -708.0,
"logps/rejected": -940.0,
"loss": 0.2137,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.765625,
"rewards/margins": 2.734375,
"rewards/rejected": -5.5,
"step": 700
},
{
"epoch": 1.4730290456431536,
"grad_norm": 27.537624866222753,
"learning_rate": 3.0124941483699753e-07,
"logits/chosen": -1.34375,
"logits/rejected": -1.34375,
"logps/chosen": -892.0,
"logps/rejected": -1064.0,
"loss": 0.2099,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -4.34375,
"rewards/margins": 1.875,
"rewards/rejected": -6.21875,
"step": 710
},
{
"epoch": 1.4937759336099585,
"grad_norm": 17.582919177615466,
"learning_rate": 2.953263758270459e-07,
"logits/chosen": -1.21875,
"logits/rejected": -1.328125,
"logps/chosen": -612.0,
"logps/rejected": -720.0,
"loss": 0.236,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -2.390625,
"rewards/margins": 1.9453125,
"rewards/rejected": -4.34375,
"step": 720
},
{
"epoch": 1.5145228215767634,
"grad_norm": 33.007588469499844,
"learning_rate": 2.8937690816314577e-07,
"logits/chosen": -1.328125,
"logits/rejected": -1.390625,
"logps/chosen": -744.0,
"logps/rejected": -980.0,
"loss": 0.2203,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -3.515625,
"rewards/margins": 2.5625,
"rewards/rejected": -6.0625,
"step": 730
},
{
"epoch": 1.5352697095435683,
"grad_norm": 24.645168136425823,
"learning_rate": 2.834044808281841e-07,
"logits/chosen": -1.3046875,
"logits/rejected": -1.4375,
"logps/chosen": -664.0,
"logps/rejected": -868.0,
"loss": 0.229,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.6875,
"rewards/margins": 2.75,
"rewards/rejected": -5.4375,
"step": 740
},
{
"epoch": 1.5560165975103735,
"grad_norm": 19.904894364928772,
"learning_rate": 2.774125761922463e-07,
"logits/chosen": -1.1953125,
"logits/rejected": -1.21875,
"logps/chosen": -580.0,
"logps/rejected": -884.0,
"loss": 0.2046,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -2.453125,
"rewards/margins": 2.90625,
"rewards/rejected": -5.34375,
"step": 750
},
{
"epoch": 1.5767634854771784,
"grad_norm": 30.15619672097981,
"learning_rate": 2.714046879821358e-07,
"logits/chosen": -1.1875,
"logits/rejected": -1.328125,
"logps/chosen": -716.0,
"logps/rejected": -992.0,
"loss": 0.2351,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -3.484375,
"rewards/margins": 2.9375,
"rewards/rejected": -6.40625,
"step": 760
},
{
"epoch": 1.5975103734439835,
"grad_norm": 27.783614402532425,
"learning_rate": 2.653843192442699e-07,
"logits/chosen": -1.2734375,
"logits/rejected": -1.234375,
"logps/chosen": -724.0,
"logps/rejected": -860.0,
"loss": 0.2573,
"rewards/accuracies": 1.0,
"rewards/chosen": -3.203125,
"rewards/margins": 2.046875,
"rewards/rejected": -5.25,
"step": 770
},
{
"epoch": 1.6182572614107884,
"grad_norm": 19.560666870988193,
"learning_rate": 2.5935498030214397e-07,
"logits/chosen": -1.3515625,
"logits/rejected": -1.40625,
"logps/chosen": -700.0,
"logps/rejected": -860.0,
"loss": 0.2086,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -3.0,
"rewards/margins": 1.8828125,
"rewards/rejected": -4.875,
"step": 780
},
{
"epoch": 1.6390041493775933,
"grad_norm": 21.195930951645135,
"learning_rate": 2.533201867095504e-07,
"logits/chosen": -1.375,
"logits/rejected": -1.34375,
"logps/chosen": -756.0,
"logps/rejected": -1048.0,
"loss": 0.2734,
"rewards/accuracies": 1.0,
"rewards/chosen": -3.53125,
"rewards/margins": 3.640625,
"rewards/rejected": -7.1875,
"step": 790
},
{
"epoch": 1.6597510373443982,
"grad_norm": 20.462891895112204,
"learning_rate": 2.472834572007493e-07,
"logits/chosen": -1.4140625,
"logits/rejected": -1.4765625,
"logps/chosen": -728.0,
"logps/rejected": -872.0,
"loss": 0.2306,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -2.421875,
"rewards/margins": 2.84375,
"rewards/rejected": -5.28125,
"step": 800
},
{
"epoch": 1.6804979253112033,
"grad_norm": 23.905476075586677,
"learning_rate": 2.4124831163878427e-07,
"logits/chosen": -1.234375,
"logits/rejected": -1.3125,
"logps/chosen": -700.0,
"logps/rejected": -936.0,
"loss": 0.2081,
"rewards/accuracies": 1.0,
"rewards/chosen": -3.3125,
"rewards/margins": 2.609375,
"rewards/rejected": -5.9375,
"step": 810
},
{
"epoch": 1.7012448132780082,
"grad_norm": 23.605115040621836,
"learning_rate": 2.3521826896313965e-07,
"logits/chosen": -1.2734375,
"logits/rejected": -1.4140625,
"logps/chosen": -704.0,
"logps/rejected": -1104.0,
"loss": 0.2039,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -3.59375,
"rewards/margins": 4.0,
"rewards/rejected": -7.59375,
"step": 820
},
{
"epoch": 1.7219917012448134,
"grad_norm": 25.173345988155088,
"learning_rate": 2.2919684513793704e-07,
"logits/chosen": -1.2421875,
"logits/rejected": -1.4296875,
"logps/chosen": -736.0,
"logps/rejected": -972.0,
"loss": 0.2168,
"rewards/accuracies": 1.0,
"rewards/chosen": -3.609375,
"rewards/margins": 2.84375,
"rewards/rejected": -6.4375,
"step": 830
},
{
"epoch": 1.7427385892116183,
"grad_norm": 18.48915801624125,
"learning_rate": 2.2318755110186602e-07,
"logits/chosen": -1.2734375,
"logits/rejected": -1.3671875,
"logps/chosen": -688.0,
"logps/rejected": -828.0,
"loss": 0.2236,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.4375,
"rewards/margins": 2.546875,
"rewards/rejected": -5.0,
"step": 840
},
{
"epoch": 1.7634854771784232,
"grad_norm": 18.093111350178464,
"learning_rate": 2.171938907210457e-07,
"logits/chosen": -1.2421875,
"logits/rejected": -1.2109375,
"logps/chosen": -632.0,
"logps/rejected": -904.0,
"loss": 0.203,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -2.890625,
"rewards/margins": 2.390625,
"rewards/rejected": -5.28125,
"step": 850
},
{
"epoch": 1.784232365145228,
"grad_norm": 19.412483996631583,
"learning_rate": 2.1121935874600914e-07,
"logits/chosen": -1.2265625,
"logits/rejected": -1.3203125,
"logps/chosen": -704.0,
"logps/rejected": -944.0,
"loss": 0.2065,
"rewards/accuracies": 1.0,
"rewards/chosen": -3.25,
"rewards/margins": 2.71875,
"rewards/rejected": -5.96875,
"step": 860
},
{
"epoch": 1.8049792531120332,
"grad_norm": 27.072030223868076,
"learning_rate": 2.052674387740039e-07,
"logits/chosen": -1.34375,
"logits/rejected": -1.3046875,
"logps/chosen": -736.0,
"logps/rejected": -1008.0,
"loss": 0.2191,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -3.5,
"rewards/margins": 2.875,
"rewards/rejected": -6.375,
"step": 870
},
{
"epoch": 1.8257261410788381,
"grad_norm": 23.115028154031677,
"learning_rate": 1.9934160121779511e-07,
"logits/chosen": -1.140625,
"logits/rejected": -1.25,
"logps/chosen": -812.0,
"logps/rejected": -1000.0,
"loss": 0.2042,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -3.765625,
"rewards/margins": 2.296875,
"rewards/rejected": -6.0625,
"step": 880
},
{
"epoch": 1.8464730290456433,
"grad_norm": 21.41388111371229,
"learning_rate": 1.9344530128215644e-07,
"logits/chosen": -1.2734375,
"logits/rejected": -1.3515625,
"logps/chosen": -752.0,
"logps/rejected": -852.0,
"loss": 0.2198,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -2.59375,
"rewards/margins": 2.453125,
"rewards/rejected": -5.03125,
"step": 890
},
{
"epoch": 1.8672199170124482,
"grad_norm": 21.253233874014462,
"learning_rate": 1.8758197694922812e-07,
"logits/chosen": -1.25,
"logits/rejected": -1.3671875,
"logps/chosen": -740.0,
"logps/rejected": -944.0,
"loss": 0.2285,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -2.953125,
"rewards/margins": 2.078125,
"rewards/rejected": -5.03125,
"step": 900
},
{
"epoch": 1.887966804979253,
"grad_norm": 20.368884562679153,
"learning_rate": 1.8175504697391728e-07,
"logits/chosen": -1.265625,
"logits/rejected": -1.296875,
"logps/chosen": -852.0,
"logps/rejected": -968.0,
"loss": 0.1723,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -3.953125,
"rewards/margins": 2.015625,
"rewards/rejected": -5.96875,
"step": 910
},
{
"epoch": 1.908713692946058,
"grad_norm": 16.864214805797634,
"learning_rate": 1.7596790889050907e-07,
"logits/chosen": -1.34375,
"logits/rejected": -1.34375,
"logps/chosen": -720.0,
"logps/rejected": -940.0,
"loss": 0.1957,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -3.0625,
"rewards/margins": 2.78125,
"rewards/rejected": -5.84375,
"step": 920
},
{
"epoch": 1.929460580912863,
"grad_norm": 16.073322912809243,
"learning_rate": 1.702239370316515e-07,
"logits/chosen": -1.2578125,
"logits/rejected": -1.34375,
"logps/chosen": -792.0,
"logps/rejected": -1056.0,
"loss": 0.1968,
"rewards/accuracies": 1.0,
"rewards/chosen": -3.5,
"rewards/margins": 3.4375,
"rewards/rejected": -6.9375,
"step": 930
},
{
"epoch": 1.950207468879668,
"grad_norm": 22.36544108559203,
"learning_rate": 1.645264805608674e-07,
"logits/chosen": -1.3046875,
"logits/rejected": -1.2890625,
"logps/chosen": -816.0,
"logps/rejected": -1032.0,
"loss": 0.1829,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -3.75,
"rewards/margins": 2.296875,
"rewards/rejected": -6.0625,
"step": 940
},
{
"epoch": 1.9709543568464731,
"grad_norm": 27.387195026328936,
"learning_rate": 1.58878861519743e-07,
"logits/chosen": -1.21875,
"logits/rejected": -1.28125,
"logps/chosen": -624.0,
"logps/rejected": -928.0,
"loss": 0.2129,
"rewards/accuracies": 1.0,
"rewards/chosen": -3.125,
"rewards/margins": 2.9375,
"rewards/rejected": -6.0625,
"step": 950
},
{
"epoch": 1.991701244813278,
"grad_norm": 36.05924674346635,
"learning_rate": 1.5328437289093015e-07,
"logits/chosen": -1.3203125,
"logits/rejected": -1.3515625,
"logps/chosen": -712.0,
"logps/rejected": -1020.0,
"loss": 0.1837,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -3.65625,
"rewards/margins": 2.78125,
"rewards/rejected": -6.4375,
"step": 960
},
{
"epoch": 2.012448132780083,
"grad_norm": 9.806579172400033,
"learning_rate": 1.4774627667809223e-07,
"logits/chosen": -1.3046875,
"logits/rejected": -1.3515625,
"logps/chosen": -700.0,
"logps/rejected": -1008.0,
"loss": 0.1358,
"rewards/accuracies": 1.0,
"rewards/chosen": -3.171875,
"rewards/margins": 3.421875,
"rewards/rejected": -6.5625,
"step": 970
},
{
"epoch": 2.033195020746888,
"grad_norm": 11.092351348103373,
"learning_rate": 1.4226780200391267e-07,
"logits/chosen": -0.91796875,
"logits/rejected": -1.0,
"logps/chosen": -756.0,
"logps/rejected": -1160.0,
"loss": 0.0784,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.0625,
"rewards/margins": 3.78125,
"rewards/rejected": -7.84375,
"step": 980
},
{
"epoch": 2.0539419087136928,
"grad_norm": 10.816346221719781,
"learning_rate": 1.3685214322727596e-07,
"logits/chosen": -1.046875,
"logits/rejected": -1.203125,
"logps/chosen": -880.0,
"logps/rejected": -1272.0,
"loss": 0.0719,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.59375,
"rewards/margins": 4.875,
"rewards/rejected": -9.4375,
"step": 990
},
{
"epoch": 2.074688796680498,
"grad_norm": 10.729589636729887,
"learning_rate": 1.3150245808071854e-07,
"logits/chosen": -1.265625,
"logits/rejected": -1.265625,
"logps/chosen": -1020.0,
"logps/rejected": -1288.0,
"loss": 0.0733,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.21875,
"rewards/margins": 3.71875,
"rewards/rejected": -8.9375,
"step": 1000
},
{
"epoch": 2.095435684647303,
"grad_norm": 7.237413191543059,
"learning_rate": 1.2622186582923566e-07,
"logits/chosen": -1.203125,
"logits/rejected": -1.28125,
"logps/chosen": -996.0,
"logps/rejected": -1264.0,
"loss": 0.0756,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.53125,
"rewards/margins": 3.25,
"rewards/rejected": -7.78125,
"step": 1010
},
{
"epoch": 2.116182572614108,
"grad_norm": 14.390311697685057,
"learning_rate": 1.2101344545151713e-07,
"logits/chosen": -1.234375,
"logits/rejected": -1.2890625,
"logps/chosen": -856.0,
"logps/rejected": -1168.0,
"loss": 0.0688,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.71875,
"rewards/margins": 3.578125,
"rewards/rejected": -8.3125,
"step": 1020
},
{
"epoch": 2.136929460580913,
"grad_norm": 9.371742649196001,
"learning_rate": 1.1588023384467335e-07,
"logits/chosen": -1.2578125,
"logits/rejected": -1.34375,
"logps/chosen": -848.0,
"logps/rejected": -1240.0,
"loss": 0.0873,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.78125,
"rewards/margins": 4.53125,
"rewards/rejected": -9.3125,
"step": 1030
},
{
"epoch": 2.1576763485477177,
"grad_norm": 13.411003854877661,
"learning_rate": 1.1082522405349834e-07,
"logits/chosen": -1.21875,
"logits/rejected": -1.2734375,
"logps/chosen": -744.0,
"logps/rejected": -1144.0,
"loss": 0.0694,
"rewards/accuracies": 1.0,
"rewards/chosen": -3.546875,
"rewards/margins": 4.875,
"rewards/rejected": -8.4375,
"step": 1040
},
{
"epoch": 2.1784232365145226,
"grad_norm": 10.33738561356746,
"learning_rate": 1.0585136352530172e-07,
"logits/chosen": -1.4296875,
"logits/rejected": -1.484375,
"logps/chosen": -876.0,
"logps/rejected": -1184.0,
"loss": 0.0812,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -4.5625,
"rewards/margins": 3.015625,
"rewards/rejected": -7.59375,
"step": 1050
},
{
"epoch": 2.199170124481328,
"grad_norm": 15.189705649161667,
"learning_rate": 1.0096155239132675e-07,
"logits/chosen": -1.296875,
"logits/rejected": -1.328125,
"logps/chosen": -672.0,
"logps/rejected": -924.0,
"loss": 0.0763,
"rewards/accuracies": 1.0,
"rewards/chosen": -3.625,
"rewards/margins": 3.21875,
"rewards/rejected": -6.84375,
"step": 1060
},
{
"epoch": 2.219917012448133,
"grad_norm": 9.763294237607763,
"learning_rate": 9.615864177575836e-08,
"logits/chosen": -1.2109375,
"logits/rejected": -1.3125,
"logps/chosen": -1112.0,
"logps/rejected": -1456.0,
"loss": 0.0735,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.53125,
"rewards/margins": 5.21875,
"rewards/rejected": -10.75,
"step": 1070
},
{
"epoch": 2.240663900414938,
"grad_norm": 12.16820558108209,
"learning_rate": 9.144543213330493e-08,
"logits/chosen": -1.390625,
"logits/rejected": -1.390625,
"logps/chosen": -848.0,
"logps/rejected": -1296.0,
"loss": 0.0737,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -4.3125,
"rewards/margins": 4.9375,
"rewards/rejected": -9.25,
"step": 1080
},
{
"epoch": 2.2614107883817427,
"grad_norm": 13.349946012287942,
"learning_rate": 8.682467161632508e-08,
"logits/chosen": -1.3359375,
"logits/rejected": -1.421875,
"logps/chosen": -764.0,
"logps/rejected": -1216.0,
"loss": 0.0706,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.25,
"rewards/margins": 4.34375,
"rewards/rejected": -8.5625,
"step": 1090
},
{
"epoch": 2.2821576763485476,
"grad_norm": 20.08687451523014,
"learning_rate": 8.229905447244942e-08,
"logits/chosen": -1.328125,
"logits/rejected": -1.3828125,
"logps/chosen": -908.0,
"logps/rejected": -1176.0,
"loss": 0.0642,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.4375,
"rewards/margins": 3.53125,
"rewards/rejected": -8.0,
"step": 1100
},
{
"epoch": 2.3029045643153525,
"grad_norm": 16.153630097446893,
"learning_rate": 7.787121947363393e-08,
"logits/chosen": -1.1171875,
"logits/rejected": -1.1953125,
"logps/chosen": -964.0,
"logps/rejected": -1360.0,
"loss": 0.0691,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.21875,
"rewards/margins": 4.34375,
"rewards/rejected": -9.5625,
"step": 1110
},
{
"epoch": 2.323651452282158,
"grad_norm": 14.370101853953887,
"learning_rate": 7.354374837755919e-08,
"logits/chosen": -1.1328125,
"logits/rejected": -1.21875,
"logps/chosen": -892.0,
"logps/rejected": -1272.0,
"loss": 0.0749,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.8125,
"rewards/margins": 4.625,
"rewards/rejected": -9.4375,
"step": 1120
},
{
"epoch": 2.3443983402489628,
"grad_norm": 15.871062321763814,
"learning_rate": 6.931916442227335e-08,
"logits/chosen": -1.28125,
"logits/rejected": -1.28125,
"logps/chosen": -808.0,
"logps/rejected": -1184.0,
"loss": 0.068,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.34375,
"rewards/margins": 4.1875,
"rewards/rejected": -8.5,
"step": 1130
},
{
"epoch": 2.3651452282157677,
"grad_norm": 8.312067272107306,
"learning_rate": 6.519993085495622e-08,
"logits/chosen": -1.1640625,
"logits/rejected": -1.3046875,
"logps/chosen": -928.0,
"logps/rejected": -1440.0,
"loss": 0.0656,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.96875,
"rewards/margins": 5.375,
"rewards/rejected": -10.375,
"step": 1140
},
{
"epoch": 2.3858921161825726,
"grad_norm": 10.430795094415418,
"learning_rate": 6.118844949566293e-08,
"logits/chosen": -1.2578125,
"logits/rejected": -1.3515625,
"logps/chosen": -900.0,
"logps/rejected": -1384.0,
"loss": 0.0564,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.875,
"rewards/margins": 5.71875,
"rewards/rejected": -10.5625,
"step": 1150
},
{
"epoch": 2.4066390041493775,
"grad_norm": 22.177489465859505,
"learning_rate": 5.728705933688349e-08,
"logits/chosen": -1.2265625,
"logits/rejected": -1.2890625,
"logps/chosen": -908.0,
"logps/rejected": -1344.0,
"loss": 0.0803,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.875,
"rewards/margins": 4.28125,
"rewards/rejected": -9.125,
"step": 1160
},
{
"epoch": 2.4273858921161824,
"grad_norm": 6.857181586976422,
"learning_rate": 5.3498035179736475e-08,
"logits/chosen": -1.21875,
"logits/rejected": -1.3203125,
"logps/chosen": -768.0,
"logps/rejected": -1144.0,
"loss": 0.0637,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.65625,
"rewards/margins": 3.875,
"rewards/rejected": -8.5625,
"step": 1170
},
{
"epoch": 2.4481327800829877,
"grad_norm": 18.641976214083737,
"learning_rate": 4.98235863075899e-08,
"logits/chosen": -1.203125,
"logits/rejected": -1.234375,
"logps/chosen": -844.0,
"logps/rejected": -1304.0,
"loss": 0.0823,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -4.75,
"rewards/margins": 4.75,
"rewards/rejected": -9.5,
"step": 1180
},
{
"epoch": 2.4688796680497926,
"grad_norm": 11.12170347340474,
"learning_rate": 4.626585519788476e-08,
"logits/chosen": -1.28125,
"logits/rejected": -1.28125,
"logps/chosen": -776.0,
"logps/rejected": -1272.0,
"loss": 0.0621,
"rewards/accuracies": 1.0,
"rewards/chosen": -3.859375,
"rewards/margins": 5.21875,
"rewards/rejected": -9.0625,
"step": 1190
},
{
"epoch": 2.4896265560165975,
"grad_norm": 10.533918362287494,
"learning_rate": 4.2826916272911154e-08,
"logits/chosen": -1.1640625,
"logits/rejected": -1.2890625,
"logps/chosen": -916.0,
"logps/rejected": -1256.0,
"loss": 0.0647,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -4.96875,
"rewards/margins": 4.09375,
"rewards/rejected": -9.0625,
"step": 1200
},
{
"epoch": 2.5103734439834025,
"grad_norm": 11.17720015590518,
"learning_rate": 3.950877469026523e-08,
"logits/chosen": -1.2734375,
"logits/rejected": -1.3046875,
"logps/chosen": -1128.0,
"logps/rejected": -1528.0,
"loss": 0.0642,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.625,
"rewards/margins": 5.1875,
"rewards/rejected": -10.8125,
"step": 1210
},
{
"epoch": 2.5311203319502074,
"grad_norm": 19.957831888824806,
"learning_rate": 3.631336517369313e-08,
"logits/chosen": -1.1953125,
"logits/rejected": -1.328125,
"logps/chosen": -768.0,
"logps/rejected": -1160.0,
"loss": 0.0622,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -4.28125,
"rewards/margins": 4.125,
"rewards/rejected": -8.4375,
"step": 1220
},
{
"epoch": 2.5518672199170123,
"grad_norm": 17.996461125043027,
"learning_rate": 3.3242550885002805e-08,
"logits/chosen": -1.1953125,
"logits/rejected": -1.2421875,
"logps/chosen": -1008.0,
"logps/rejected": -1400.0,
"loss": 0.0673,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.0,
"rewards/margins": 5.03125,
"rewards/rejected": -10.0,
"step": 1230
},
{
"epoch": 2.572614107883817,
"grad_norm": 12.137212294540161,
"learning_rate": 3.029812233770215e-08,
"logits/chosen": -1.2421875,
"logits/rejected": -1.234375,
"logps/chosen": -776.0,
"logps/rejected": -1112.0,
"loss": 0.0709,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -4.03125,
"rewards/margins": 3.65625,
"rewards/rejected": -7.6875,
"step": 1240
},
{
"epoch": 2.5933609958506225,
"grad_norm": 6.03151820091559,
"learning_rate": 2.74817963529958e-08,
"logits/chosen": -1.1171875,
"logits/rejected": -1.171875,
"logps/chosen": -672.0,
"logps/rejected": -1160.0,
"loss": 0.0584,
"rewards/accuracies": 1.0,
"rewards/chosen": -3.765625,
"rewards/margins": 4.8125,
"rewards/rejected": -8.5625,
"step": 1250
},
{
"epoch": 2.6141078838174274,
"grad_norm": 25.243724218800992,
"learning_rate": 2.479521505875079e-08,
"logits/chosen": -1.265625,
"logits/rejected": -1.2578125,
"logps/chosen": -976.0,
"logps/rejected": -1352.0,
"loss": 0.0643,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.53125,
"rewards/margins": 4.8125,
"rewards/rejected": -10.375,
"step": 1260
},
{
"epoch": 2.6348547717842323,
"grad_norm": 9.357215641019858,
"learning_rate": 2.223994493201342e-08,
"logits/chosen": -1.2265625,
"logits/rejected": -1.3125,
"logps/chosen": -888.0,
"logps/rejected": -1272.0,
"loss": 0.0645,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -4.84375,
"rewards/margins": 4.15625,
"rewards/rejected": -9.0,
"step": 1270
},
{
"epoch": 2.6556016597510372,
"grad_norm": 11.426915475943359,
"learning_rate": 1.9817475885636868e-08,
"logits/chosen": -1.109375,
"logits/rejected": -1.265625,
"logps/chosen": -924.0,
"logps/rejected": -1344.0,
"loss": 0.0652,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.8125,
"rewards/margins": 4.90625,
"rewards/rejected": -9.6875,
"step": 1280
},
{
"epoch": 2.6763485477178426,
"grad_norm": 13.561447852898182,
"learning_rate": 1.7529220399550376e-08,
"logits/chosen": -1.140625,
"logits/rejected": -1.25,
"logps/chosen": -988.0,
"logps/rejected": -1472.0,
"loss": 0.0567,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.625,
"rewards/margins": 5.28125,
"rewards/rejected": -10.875,
"step": 1290
},
{
"epoch": 2.6970954356846475,
"grad_norm": 8.09337248601455,
"learning_rate": 1.5376512697178713e-08,
"logits/chosen": -1.234375,
"logits/rejected": -1.2265625,
"logps/chosen": -824.0,
"logps/rejected": -1224.0,
"loss": 0.063,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.5625,
"rewards/margins": 4.71875,
"rewards/rejected": -9.25,
"step": 1300
},
{
"epoch": 2.7178423236514524,
"grad_norm": 16.861726238832322,
"learning_rate": 1.3360607967490307e-08,
"logits/chosen": -1.1640625,
"logits/rejected": -1.15625,
"logps/chosen": -1048.0,
"logps/rejected": -1400.0,
"loss": 0.0647,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.78125,
"rewards/margins": 4.34375,
"rewards/rejected": -10.125,
"step": 1310
},
{
"epoch": 2.7385892116182573,
"grad_norm": 20.632985201134645,
"learning_rate": 1.1482681633128738e-08,
"logits/chosen": -1.3046875,
"logits/rejected": -1.3515625,
"logps/chosen": -920.0,
"logps/rejected": -1200.0,
"loss": 0.0816,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -4.40625,
"rewards/margins": 3.65625,
"rewards/rejected": -8.0625,
"step": 1320
},
{
"epoch": 2.759336099585062,
"grad_norm": 9.84758156918914,
"learning_rate": 9.743828665053466e-09,
"logits/chosen": -1.1796875,
"logits/rejected": -1.203125,
"logps/chosen": -868.0,
"logps/rejected": -1296.0,
"loss": 0.05,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.625,
"rewards/margins": 5.0,
"rewards/rejected": -9.625,
"step": 1330
},
{
"epoch": 2.780082987551867,
"grad_norm": 11.328737169791557,
"learning_rate": 8.145062944090425e-09,
"logits/chosen": -1.2578125,
"logits/rejected": -1.1953125,
"logps/chosen": -928.0,
"logps/rejected": -1368.0,
"loss": 0.0804,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.0,
"rewards/margins": 4.25,
"rewards/rejected": -9.25,
"step": 1340
},
{
"epoch": 2.800829875518672,
"grad_norm": 24.41038502693378,
"learning_rate": 6.687316669763937e-09,
"logits/chosen": -1.2265625,
"logits/rejected": -1.3125,
"logps/chosen": -808.0,
"logps/rejected": -1200.0,
"loss": 0.0622,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.46875,
"rewards/margins": 4.28125,
"rewards/rejected": -8.75,
"step": 1350
},
{
"epoch": 2.821576763485477,
"grad_norm": 15.889369514063155,
"learning_rate": 5.371439816754892e-09,
"logits/chosen": -1.1640625,
"logits/rejected": -1.234375,
"logps/chosen": -856.0,
"logps/rejected": -1136.0,
"loss": 0.0684,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -4.96875,
"rewards/margins": 3.5,
"rewards/rejected": -8.4375,
"step": 1360
},
{
"epoch": 2.8423236514522823,
"grad_norm": 13.23599572717501,
"learning_rate": 4.198199639302152e-09,
"logits/chosen": -1.1796875,
"logits/rejected": -1.21875,
"logps/chosen": -800.0,
"logps/rejected": -1280.0,
"loss": 0.0608,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.59375,
"rewards/margins": 4.71875,
"rewards/rejected": -9.3125,
"step": 1370
},
{
"epoch": 2.863070539419087,
"grad_norm": 9.4244826040974,
"learning_rate": 3.1682802238362506e-09,
"logits/chosen": -1.1953125,
"logits/rejected": -1.2578125,
"logps/chosen": -820.0,
"logps/rejected": -1280.0,
"loss": 0.0549,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.40625,
"rewards/margins": 4.59375,
"rewards/rejected": -9.0,
"step": 1380
},
{
"epoch": 2.883817427385892,
"grad_norm": 9.572805205888423,
"learning_rate": 2.2822820901060025e-09,
"logits/chosen": -1.3046875,
"logits/rejected": -1.2265625,
"logps/chosen": -920.0,
"logps/rejected": -1168.0,
"loss": 0.0583,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.5625,
"rewards/margins": 4.0625,
"rewards/rejected": -8.625,
"step": 1390
},
{
"epoch": 2.904564315352697,
"grad_norm": 5.8710007402955835,
"learning_rate": 1.5407218410307398e-09,
"logits/chosen": -1.3125,
"logits/rejected": -1.3828125,
"logps/chosen": -824.0,
"logps/rejected": -1168.0,
"loss": 0.0627,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -5.09375,
"rewards/margins": 3.34375,
"rewards/rejected": -8.4375,
"step": 1400
},
{
"epoch": 2.9253112033195023,
"grad_norm": 8.81585615187788,
"learning_rate": 9.440318614823417e-10,
"logits/chosen": -1.234375,
"logits/rejected": -1.2421875,
"logps/chosen": -788.0,
"logps/rejected": -1128.0,
"loss": 0.0704,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -4.34375,
"rewards/margins": 3.9375,
"rewards/rejected": -8.3125,
"step": 1410
},
{
"epoch": 2.9460580912863072,
"grad_norm": 8.442496137212597,
"learning_rate": 4.925600661726537e-10,
"logits/chosen": -1.2421875,
"logits/rejected": -1.3359375,
"logps/chosen": -916.0,
"logps/rejected": -1320.0,
"loss": 0.0613,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.46875,
"rewards/margins": 4.84375,
"rewards/rejected": -9.3125,
"step": 1420
},
{
"epoch": 2.966804979253112,
"grad_norm": 14.014913089264976,
"learning_rate": 1.8656969679323176e-10,
"logits/chosen": -1.2734375,
"logits/rejected": -1.2890625,
"logps/chosen": -936.0,
"logps/rejected": -1256.0,
"loss": 0.0667,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.0,
"rewards/margins": 4.40625,
"rewards/rejected": -9.375,
"step": 1430
},
{
"epoch": 2.987551867219917,
"grad_norm": 8.079064512428207,
"learning_rate": 2.6239168525898915e-11,
"logits/chosen": -1.25,
"logits/rejected": -1.1328125,
"logps/chosen": -860.0,
"logps/rejected": -1152.0,
"loss": 0.0733,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -4.65625,
"rewards/margins": 3.859375,
"rewards/rejected": -8.5,
"step": 1440
},
{
"epoch": 3.0,
"step": 1446,
"total_flos": 0.0,
"train_loss": 0.29230043576466097,
"train_runtime": 30177.164,
"train_samples_per_second": 3.062,
"train_steps_per_second": 0.048
}
],
"logging_steps": 10,
"max_steps": 1446,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}