| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9965714285714286, | |
| "eval_steps": 500, | |
| "global_step": 109, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.009142857142857144, | |
| "grad_norm": 0.6255323886871338, | |
| "learning_rate": 2.9993770144857767e-06, | |
| "logits/chosen": -2.1389834880828857, | |
| "logits/rejected": -2.141430139541626, | |
| "logps/chosen": -19.425989151000977, | |
| "logps/rejected": -21.582773208618164, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.018285714285714287, | |
| "grad_norm": 0.6554356217384338, | |
| "learning_rate": 2.997508575424375e-06, | |
| "logits/chosen": -2.1365740299224854, | |
| "logits/rejected": -2.1396265029907227, | |
| "logps/chosen": -20.762622833251953, | |
| "logps/rejected": -22.60515785217285, | |
| "loss": 0.6943, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -0.0017696216236799955, | |
| "rewards/margins": -0.002286846749484539, | |
| "rewards/rejected": 0.0005172253004275262, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.027428571428571427, | |
| "grad_norm": 0.6278586387634277, | |
| "learning_rate": 2.9943962348297537e-06, | |
| "logits/chosen": -2.1222903728485107, | |
| "logits/rejected": -2.125791549682617, | |
| "logps/chosen": -21.520599365234375, | |
| "logps/rejected": -24.2766170501709, | |
| "loss": 0.6956, | |
| "rewards/accuracies": 0.390625, | |
| "rewards/chosen": -0.0007706253090873361, | |
| "rewards/margins": -0.0048321266658604145, | |
| "rewards/rejected": 0.004061501007527113, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.036571428571428574, | |
| "grad_norm": 0.6467044949531555, | |
| "learning_rate": 2.9900425779593876e-06, | |
| "logits/chosen": -2.1400036811828613, | |
| "logits/rejected": -2.1466779708862305, | |
| "logps/chosen": -19.16310691833496, | |
| "logps/rejected": -25.431270599365234, | |
| "loss": 0.6923, | |
| "rewards/accuracies": 0.515625, | |
| "rewards/chosen": 0.0013154743937775493, | |
| "rewards/margins": 0.00192451779730618, | |
| "rewards/rejected": -0.0006090432871133089, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.045714285714285714, | |
| "grad_norm": 0.6181118488311768, | |
| "learning_rate": 2.9844512211668286e-06, | |
| "logits/chosen": -2.1338605880737305, | |
| "logits/rejected": -2.137308359146118, | |
| "logps/chosen": -20.26681137084961, | |
| "logps/rejected": -21.6939754486084, | |
| "loss": 0.6914, | |
| "rewards/accuracies": 0.546875, | |
| "rewards/chosen": 0.0007669397164136171, | |
| "rewards/margins": 0.003516831435263157, | |
| "rewards/rejected": -0.002749891486018896, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.054857142857142854, | |
| "grad_norm": 0.6369356513023376, | |
| "learning_rate": 2.977626808897792e-06, | |
| "logits/chosen": -2.148895740509033, | |
| "logits/rejected": -2.151296377182007, | |
| "logps/chosen": -19.613313674926758, | |
| "logps/rejected": -21.868637084960938, | |
| "loss": 0.6921, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": 0.0020878687500953674, | |
| "rewards/margins": 0.002298696432262659, | |
| "rewards/rejected": -0.00021082756575196981, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.064, | |
| "grad_norm": 0.643375813961029, | |
| "learning_rate": 2.9695750098322613e-06, | |
| "logits/chosen": -2.154219150543213, | |
| "logits/rejected": -2.1564598083496094, | |
| "logps/chosen": -19.349716186523438, | |
| "logps/rejected": -22.341163635253906, | |
| "loss": 0.6945, | |
| "rewards/accuracies": 0.484375, | |
| "rewards/chosen": -0.004500311333686113, | |
| "rewards/margins": -0.0025411711540073156, | |
| "rewards/rejected": -0.001959140645340085, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.07314285714285715, | |
| "grad_norm": 0.6222244501113892, | |
| "learning_rate": 2.9603025121758102e-06, | |
| "logits/chosen": -2.126340627670288, | |
| "logits/rejected": -2.130244731903076, | |
| "logps/chosen": -19.825477600097656, | |
| "logps/rejected": -23.661293029785156, | |
| "loss": 0.6917, | |
| "rewards/accuracies": 0.578125, | |
| "rewards/chosen": 0.0011193343671038747, | |
| "rewards/margins": 0.0031258827075362206, | |
| "rewards/rejected": -0.0020065484568476677, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.08228571428571428, | |
| "grad_norm": 0.5961363911628723, | |
| "learning_rate": 2.9498170181040663e-06, | |
| "logits/chosen": -2.14841365814209, | |
| "logits/rejected": -2.14998197555542, | |
| "logps/chosen": -17.929092407226562, | |
| "logps/rejected": -19.984407424926758, | |
| "loss": 0.6922, | |
| "rewards/accuracies": 0.578125, | |
| "rewards/chosen": -0.0016110084252431989, | |
| "rewards/margins": 0.001981835812330246, | |
| "rewards/rejected": -0.003592844121158123, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.09142857142857143, | |
| "grad_norm": 0.6394132375717163, | |
| "learning_rate": 2.938127237364918e-06, | |
| "logits/chosen": -2.1390151977539062, | |
| "logits/rejected": -2.14105486869812, | |
| "logps/chosen": -19.9459228515625, | |
| "logps/rejected": -21.593914031982422, | |
| "loss": 0.6949, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -0.0058130137622356415, | |
| "rewards/margins": -0.003376076463609934, | |
| "rewards/rejected": -0.0024369372986257076, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.10057142857142858, | |
| "grad_norm": 0.637840986251831, | |
| "learning_rate": 2.925242880043786e-06, | |
| "logits/chosen": -2.1370978355407715, | |
| "logits/rejected": -2.1393895149230957, | |
| "logps/chosen": -20.649080276489258, | |
| "logps/rejected": -23.88674545288086, | |
| "loss": 0.6941, | |
| "rewards/accuracies": 0.484375, | |
| "rewards/chosen": -0.0013475671876221895, | |
| "rewards/margins": -0.0018120227614417672, | |
| "rewards/rejected": 0.0004644556902348995, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.10971428571428571, | |
| "grad_norm": 0.624940037727356, | |
| "learning_rate": 2.911174648497964e-06, | |
| "logits/chosen": -2.1435601711273193, | |
| "logits/rejected": -2.146998882293701, | |
| "logps/chosen": -19.336463928222656, | |
| "logps/rejected": -22.77804183959961, | |
| "loss": 0.6907, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.0014964112779125571, | |
| "rewards/margins": 0.005147767253220081, | |
| "rewards/rejected": -0.006644178181886673, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.11885714285714286, | |
| "grad_norm": 0.6471104621887207, | |
| "learning_rate": 2.895934228466738e-06, | |
| "logits/chosen": -2.136577606201172, | |
| "logits/rejected": -2.1388959884643555, | |
| "logps/chosen": -20.625932693481445, | |
| "logps/rejected": -23.377975463867188, | |
| "loss": 0.6886, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.0014629564248025417, | |
| "rewards/margins": 0.00928102433681488, | |
| "rewards/rejected": -0.007818068377673626, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.128, | |
| "grad_norm": 0.629192054271698, | |
| "learning_rate": 2.879534279364654e-06, | |
| "logits/chosen": -2.1251070499420166, | |
| "logits/rejected": -2.1296639442443848, | |
| "logps/chosen": -17.793655395507812, | |
| "logps/rejected": -24.011507034301758, | |
| "loss": 0.6917, | |
| "rewards/accuracies": 0.515625, | |
| "rewards/chosen": -0.000675417366437614, | |
| "rewards/margins": 0.0030865983571857214, | |
| "rewards/rejected": -0.0037620156072080135, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.13714285714285715, | |
| "grad_norm": 0.659120500087738, | |
| "learning_rate": 2.8619884237660125e-06, | |
| "logits/chosen": -2.1358160972595215, | |
| "logits/rejected": -2.142625331878662, | |
| "logps/chosen": -18.37673568725586, | |
| "logps/rejected": -25.251014709472656, | |
| "loss": 0.6907, | |
| "rewards/accuracies": 0.578125, | |
| "rewards/chosen": -0.00043936213478446007, | |
| "rewards/margins": 0.004951969254761934, | |
| "rewards/rejected": -0.005391330923885107, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.1462857142857143, | |
| "grad_norm": 0.6336076259613037, | |
| "learning_rate": 2.843311236089309e-06, | |
| "logits/chosen": -2.1342644691467285, | |
| "logits/rejected": -2.1355390548706055, | |
| "logps/chosen": -20.63397216796875, | |
| "logps/rejected": -21.67581558227539, | |
| "loss": 0.6923, | |
| "rewards/accuracies": 0.578125, | |
| "rewards/chosen": -0.006054366007447243, | |
| "rewards/margins": 0.0018282074015587568, | |
| "rewards/rejected": -0.007882573641836643, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.15542857142857142, | |
| "grad_norm": 0.6278834342956543, | |
| "learning_rate": 2.8235182304910364e-06, | |
| "logits/chosen": -2.1471428871154785, | |
| "logits/rejected": -2.148350477218628, | |
| "logps/chosen": -21.62627410888672, | |
| "logps/rejected": -22.867630004882812, | |
| "loss": 0.6914, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.002310897456482053, | |
| "rewards/margins": 0.00360050518065691, | |
| "rewards/rejected": -0.005911402404308319, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.16457142857142856, | |
| "grad_norm": 0.6396936178207397, | |
| "learning_rate": 2.8026258479788888e-06, | |
| "logits/chosen": -2.131674289703369, | |
| "logits/rejected": -2.1344425678253174, | |
| "logps/chosen": -17.968589782714844, | |
| "logps/rejected": -23.94507598876953, | |
| "loss": 0.6891, | |
| "rewards/accuracies": 0.640625, | |
| "rewards/chosen": -0.0026061469689011574, | |
| "rewards/margins": 0.008287503384053707, | |
| "rewards/rejected": -0.010893651284277439, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.1737142857142857, | |
| "grad_norm": 0.6427000164985657, | |
| "learning_rate": 2.780651442755083e-06, | |
| "logits/chosen": -2.1325266361236572, | |
| "logits/rejected": -2.1359243392944336, | |
| "logps/chosen": -19.952186584472656, | |
| "logps/rejected": -20.840421676635742, | |
| "loss": 0.6887, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.002477221190929413, | |
| "rewards/margins": 0.009127501398324966, | |
| "rewards/rejected": -0.006650280207395554, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.18285714285714286, | |
| "grad_norm": 0.634149968624115, | |
| "learning_rate": 2.7576132678011365e-06, | |
| "logits/chosen": -2.137594223022461, | |
| "logits/rejected": -2.1397337913513184, | |
| "logps/chosen": -20.24038314819336, | |
| "logps/rejected": -21.273605346679688, | |
| "loss": 0.6886, | |
| "rewards/accuracies": 0.609375, | |
| "rewards/chosen": -0.0009703578543849289, | |
| "rewards/margins": 0.009282448329031467, | |
| "rewards/rejected": -0.010252806358039379, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.192, | |
| "grad_norm": 0.7092023491859436, | |
| "learning_rate": 2.7335304597160764e-06, | |
| "logits/chosen": -2.1394314765930176, | |
| "logits/rejected": -2.1454715728759766, | |
| "logps/chosen": -21.92709732055664, | |
| "logps/rejected": -28.169654846191406, | |
| "loss": 0.6845, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.0013219192624092102, | |
| "rewards/margins": 0.017537159845232964, | |
| "rewards/rejected": -0.016215242445468903, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.20114285714285715, | |
| "grad_norm": 0.6428853869438171, | |
| "learning_rate": 2.7084230228206746e-06, | |
| "logits/chosen": -2.1274845600128174, | |
| "logits/rejected": -2.128504991531372, | |
| "logps/chosen": -19.982959747314453, | |
| "logps/rejected": -23.259571075439453, | |
| "loss": 0.688, | |
| "rewards/accuracies": 0.671875, | |
| "rewards/chosen": -0.0038342936895787716, | |
| "rewards/margins": 0.010476754978299141, | |
| "rewards/rejected": -0.0143110491335392, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.2102857142857143, | |
| "grad_norm": 0.6467615962028503, | |
| "learning_rate": 2.6823118125409112e-06, | |
| "logits/chosen": -2.1434879302978516, | |
| "logits/rejected": -2.14566707611084, | |
| "logps/chosen": -20.100147247314453, | |
| "logps/rejected": -23.975025177001953, | |
| "loss": 0.6897, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.001528523163869977, | |
| "rewards/margins": 0.0071428027004003525, | |
| "rewards/rejected": -0.008671325631439686, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.21942857142857142, | |
| "grad_norm": 0.6638103127479553, | |
| "learning_rate": 2.6552185180844704e-06, | |
| "logits/chosen": -2.1213717460632324, | |
| "logits/rejected": -2.1236109733581543, | |
| "logps/chosen": -21.576557159423828, | |
| "logps/rejected": -23.23206329345703, | |
| "loss": 0.6861, | |
| "rewards/accuracies": 0.671875, | |
| "rewards/chosen": 0.0032195569947361946, | |
| "rewards/margins": 0.01447179913520813, | |
| "rewards/rejected": -0.011252242140471935, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.22857142857142856, | |
| "grad_norm": 0.6348288655281067, | |
| "learning_rate": 2.6271656444246578e-06, | |
| "logits/chosen": -2.1333892345428467, | |
| "logits/rejected": -2.1361846923828125, | |
| "logps/chosen": -19.42316436767578, | |
| "logps/rejected": -22.707563400268555, | |
| "loss": 0.6831, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": 0.0022394396364688873, | |
| "rewards/margins": 0.020372504368424416, | |
| "rewards/rejected": -0.018133066594600677, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.2377142857142857, | |
| "grad_norm": 0.6526222825050354, | |
| "learning_rate": 2.598176493606703e-06, | |
| "logits/chosen": -2.1356377601623535, | |
| "logits/rejected": -2.1370201110839844, | |
| "logps/chosen": -20.537616729736328, | |
| "logps/rejected": -24.898578643798828, | |
| "loss": 0.6859, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.002131823683157563, | |
| "rewards/margins": 0.014900727197527885, | |
| "rewards/rejected": -0.017032550647854805, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.24685714285714286, | |
| "grad_norm": 0.6682783365249634, | |
| "learning_rate": 2.568275145391978e-06, | |
| "logits/chosen": -2.1460518836975098, | |
| "logits/rejected": -2.1491003036499023, | |
| "logps/chosen": -20.905759811401367, | |
| "logps/rejected": -24.251680374145508, | |
| "loss": 0.6857, | |
| "rewards/accuracies": 0.703125, | |
| "rewards/chosen": 0.0005356475012376904, | |
| "rewards/margins": 0.015149888582527637, | |
| "rewards/rejected": -0.014614241197705269, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.256, | |
| "grad_norm": 0.6456180214881897, | |
| "learning_rate": 2.5374864372562077e-06, | |
| "logits/chosen": -2.1365909576416016, | |
| "logits/rejected": -2.1375560760498047, | |
| "logps/chosen": -21.477279663085938, | |
| "logps/rejected": -22.589874267578125, | |
| "loss": 0.6853, | |
| "rewards/accuracies": 0.703125, | |
| "rewards/chosen": -0.0013170776655897498, | |
| "rewards/margins": 0.01594378799200058, | |
| "rewards/rejected": -0.01726086437702179, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.2651428571428571, | |
| "grad_norm": 0.6599003672599792, | |
| "learning_rate": 2.505835943758286e-06, | |
| "logits/chosen": -2.1302995681762695, | |
| "logits/rejected": -2.1338701248168945, | |
| "logps/chosen": -20.774627685546875, | |
| "logps/rejected": -24.625228881835938, | |
| "loss": 0.6838, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.0004698322154581547, | |
| "rewards/margins": 0.019142411649227142, | |
| "rewards/rejected": -0.018672579899430275, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.2742857142857143, | |
| "grad_norm": 0.6650639176368713, | |
| "learning_rate": 2.4733499552968357e-06, | |
| "logits/chosen": -2.1260218620300293, | |
| "logits/rejected": -2.128187894821167, | |
| "logps/chosen": -20.981136322021484, | |
| "logps/rejected": -23.799392700195312, | |
| "loss": 0.683, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.00021876831306144595, | |
| "rewards/margins": 0.020755982026457787, | |
| "rewards/rejected": -0.02053721249103546, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.2834285714285714, | |
| "grad_norm": 0.6870555877685547, | |
| "learning_rate": 2.440055456272159e-06, | |
| "logits/chosen": -2.1325454711914062, | |
| "logits/rejected": -2.1314170360565186, | |
| "logps/chosen": -20.572166442871094, | |
| "logps/rejected": -19.940898895263672, | |
| "loss": 0.6861, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.00320088560692966, | |
| "rewards/margins": 0.014535932801663876, | |
| "rewards/rejected": -0.01773681864142418, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.2925714285714286, | |
| "grad_norm": 0.6859702467918396, | |
| "learning_rate": 2.4059801026717166e-06, | |
| "logits/chosen": -2.138218402862549, | |
| "logits/rejected": -2.1400537490844727, | |
| "logps/chosen": -20.59479331970215, | |
| "logps/rejected": -24.294113159179688, | |
| "loss": 0.6824, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.0018003403674811125, | |
| "rewards/margins": 0.02209661900997162, | |
| "rewards/rejected": -0.02029627561569214, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.3017142857142857, | |
| "grad_norm": 0.6709543466567993, | |
| "learning_rate": 2.3711521990977554e-06, | |
| "logits/chosen": -2.134920120239258, | |
| "logits/rejected": -2.137303352355957, | |
| "logps/chosen": -21.195552825927734, | |
| "logps/rejected": -24.645339965820312, | |
| "loss": 0.6847, | |
| "rewards/accuracies": 0.640625, | |
| "rewards/chosen": -0.00019013590645045042, | |
| "rewards/margins": 0.0173664353787899, | |
| "rewards/rejected": -0.01755657233297825, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.31085714285714283, | |
| "grad_norm": 0.6602835655212402, | |
| "learning_rate": 2.3356006752561658e-06, | |
| "logits/chosen": -2.1185295581817627, | |
| "logits/rejected": -2.122647762298584, | |
| "logps/chosen": -17.77151870727539, | |
| "logps/rejected": -25.318552017211914, | |
| "loss": 0.6817, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.0011812887387350202, | |
| "rewards/margins": 0.023368019610643387, | |
| "rewards/rejected": -0.022186731919646263, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 0.6702331900596619, | |
| "learning_rate": 2.299355061926096e-06, | |
| "logits/chosen": -2.1439552307128906, | |
| "logits/rejected": -2.148176670074463, | |
| "logps/chosen": -19.662979125976562, | |
| "logps/rejected": -25.61541748046875, | |
| "loss": 0.6812, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.0001765764318406582, | |
| "rewards/margins": 0.02442769892513752, | |
| "rewards/rejected": -0.024604275822639465, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.3291428571428571, | |
| "grad_norm": 0.6621116399765015, | |
| "learning_rate": 2.262445466430292e-06, | |
| "logits/chosen": -2.138071060180664, | |
| "logits/rejected": -2.139529228210449, | |
| "logps/chosen": -19.943336486816406, | |
| "logps/rejected": -23.18177032470703, | |
| "loss": 0.6822, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -0.006531356833875179, | |
| "rewards/margins": 0.022531913593411446, | |
| "rewards/rejected": -0.0290632676333189, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.3382857142857143, | |
| "grad_norm": 0.7125285863876343, | |
| "learning_rate": 2.2249025476265262e-06, | |
| "logits/chosen": -2.1278233528137207, | |
| "logits/rejected": -2.1309316158294678, | |
| "logps/chosen": -21.678462982177734, | |
| "logps/rejected": -23.819469451904297, | |
| "loss": 0.6809, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.003998810425400734, | |
| "rewards/margins": 0.025094730779528618, | |
| "rewards/rejected": -0.029093541204929352, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.3474285714285714, | |
| "grad_norm": 0.6747680902481079, | |
| "learning_rate": 2.1867574904409007e-06, | |
| "logits/chosen": -2.128553628921509, | |
| "logits/rejected": -2.1311511993408203, | |
| "logps/chosen": -18.51136589050293, | |
| "logps/rejected": -24.083953857421875, | |
| "loss": 0.6797, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.00025194010231643915, | |
| "rewards/margins": 0.027810033410787582, | |
| "rewards/rejected": -0.027558093890547752, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.3565714285714286, | |
| "grad_norm": 0.6505147218704224, | |
| "learning_rate": 2.1480419799641695e-06, | |
| "logits/chosen": -2.1170382499694824, | |
| "logits/rejected": -2.1211585998535156, | |
| "logps/chosen": -18.79464340209961, | |
| "logps/rejected": -23.59588050842285, | |
| "loss": 0.6836, | |
| "rewards/accuracies": 0.703125, | |
| "rewards/chosen": -0.006827862001955509, | |
| "rewards/margins": 0.019631091505289078, | |
| "rewards/rejected": -0.026458950713276863, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.3657142857142857, | |
| "grad_norm": 0.6365678310394287, | |
| "learning_rate": 2.1087881751326035e-06, | |
| "logits/chosen": -2.1277003288269043, | |
| "logits/rejected": -2.1313459873199463, | |
| "logps/chosen": -20.50314712524414, | |
| "logps/rejected": -22.63813018798828, | |
| "loss": 0.6812, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.0014799695927649736, | |
| "rewards/margins": 0.024493195116519928, | |
| "rewards/rejected": -0.023013222962617874, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.37485714285714283, | |
| "grad_norm": 0.6673828959465027, | |
| "learning_rate": 2.0690286820152535e-06, | |
| "logits/chosen": -2.1289217472076416, | |
| "logits/rejected": -2.131746768951416, | |
| "logps/chosen": -20.128999710083008, | |
| "logps/rejected": -23.057846069335938, | |
| "loss": 0.6726, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": 0.004846580792218447, | |
| "rewards/margins": 0.042193807661533356, | |
| "rewards/rejected": -0.037347227334976196, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.384, | |
| "grad_norm": 0.6725500226020813, | |
| "learning_rate": 2.028796526729806e-06, | |
| "logits/chosen": -2.121724843978882, | |
| "logits/rejected": -2.125291347503662, | |
| "logps/chosen": -19.95975112915039, | |
| "logps/rejected": -24.067777633666992, | |
| "loss": 0.6773, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.000510699232108891, | |
| "rewards/margins": 0.03263135999441147, | |
| "rewards/rejected": -0.03212066367268562, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.3931428571428571, | |
| "grad_norm": 0.7096243500709534, | |
| "learning_rate": 1.9881251280095263e-06, | |
| "logits/chosen": -2.12835693359375, | |
| "logits/rejected": -2.1325571537017822, | |
| "logps/chosen": -19.971481323242188, | |
| "logps/rejected": -24.266193389892578, | |
| "loss": 0.6759, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": 0.0037878660950809717, | |
| "rewards/margins": 0.03536036238074303, | |
| "rewards/rejected": -0.03157249093055725, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.4022857142857143, | |
| "grad_norm": 0.6290874481201172, | |
| "learning_rate": 1.9470482694440755e-06, | |
| "logits/chosen": -2.139394760131836, | |
| "logits/rejected": -2.1419851779937744, | |
| "logps/chosen": -17.84711265563965, | |
| "logps/rejected": -22.699108123779297, | |
| "loss": 0.6802, | |
| "rewards/accuracies": 0.640625, | |
| "rewards/chosen": -0.0038270740769803524, | |
| "rewards/margins": 0.027179870754480362, | |
| "rewards/rejected": -0.031006945297122, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.4114285714285714, | |
| "grad_norm": 0.6832275986671448, | |
| "learning_rate": 1.9056000714172617e-06, | |
| "logits/chosen": -2.138123035430908, | |
| "logits/rejected": -2.142123222351074, | |
| "logps/chosen": -19.396350860595703, | |
| "logps/rejected": -22.903085708618164, | |
| "loss": 0.6728, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -0.000809194054454565, | |
| "rewards/margins": 0.042555954307317734, | |
| "rewards/rejected": -0.04336514696478844, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.4205714285714286, | |
| "grad_norm": 0.6851588487625122, | |
| "learning_rate": 1.8638149627650335e-06, | |
| "logits/chosen": -2.1379756927490234, | |
| "logits/rejected": -2.1380934715270996, | |
| "logps/chosen": -21.08904266357422, | |
| "logps/rejected": -23.63918685913086, | |
| "loss": 0.674, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": 0.001778717152774334, | |
| "rewards/margins": 0.03978656232357025, | |
| "rewards/rejected": -0.03800784423947334, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.4297142857142857, | |
| "grad_norm": 0.6948539614677429, | |
| "learning_rate": 1.8217276521772582e-06, | |
| "logits/chosen": -2.1302433013916016, | |
| "logits/rejected": -2.1331663131713867, | |
| "logps/chosen": -20.23948860168457, | |
| "logps/rejected": -23.1295223236084, | |
| "loss": 0.6725, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": -0.004014923237264156, | |
| "rewards/margins": 0.042658429592847824, | |
| "rewards/rejected": -0.046673357486724854, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.43885714285714283, | |
| "grad_norm": 0.6569979190826416, | |
| "learning_rate": 1.7793730993670408e-06, | |
| "logits/chosen": -2.1294007301330566, | |
| "logits/rejected": -2.1324024200439453, | |
| "logps/chosen": -20.591182708740234, | |
| "logps/rejected": -23.661182403564453, | |
| "loss": 0.6775, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.004753855522722006, | |
| "rewards/margins": 0.03247044235467911, | |
| "rewards/rejected": -0.03722430020570755, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.448, | |
| "grad_norm": 0.6771251559257507, | |
| "learning_rate": 1.736786486031531e-06, | |
| "logits/chosen": -2.126737117767334, | |
| "logits/rejected": -2.1294384002685547, | |
| "logps/chosen": -20.071245193481445, | |
| "logps/rejected": -22.262264251708984, | |
| "loss": 0.6708, | |
| "rewards/accuracies": 0.703125, | |
| "rewards/chosen": 0.005568951368331909, | |
| "rewards/margins": 0.04644326865673065, | |
| "rewards/rejected": -0.04087432101368904, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.45714285714285713, | |
| "grad_norm": 0.6473885774612427, | |
| "learning_rate": 1.6940031866283395e-06, | |
| "logits/chosen": -2.1336934566497803, | |
| "logits/rejected": -2.1349339485168457, | |
| "logps/chosen": -19.305700302124023, | |
| "logps/rejected": -21.597030639648438, | |
| "loss": 0.6798, | |
| "rewards/accuracies": 0.671875, | |
| "rewards/chosen": -0.002087415661662817, | |
| "rewards/margins": 0.02759050950407982, | |
| "rewards/rejected": -0.029677925631403923, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.4662857142857143, | |
| "grad_norm": 0.6721633672714233, | |
| "learning_rate": 1.6510587389918377e-06, | |
| "logits/chosen": -2.1234569549560547, | |
| "logits/rejected": -2.1260986328125, | |
| "logps/chosen": -20.71694564819336, | |
| "logps/rejected": -24.932897567749023, | |
| "loss": 0.6813, | |
| "rewards/accuracies": 0.703125, | |
| "rewards/chosen": -0.011087682098150253, | |
| "rewards/margins": 0.024922657757997513, | |
| "rewards/rejected": -0.036010339856147766, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.4754285714285714, | |
| "grad_norm": 0.7036443948745728, | |
| "learning_rate": 1.6079888148137507e-06, | |
| "logits/chosen": -2.1245672702789307, | |
| "logits/rejected": -2.1277780532836914, | |
| "logps/chosen": -21.870973587036133, | |
| "logps/rejected": -24.941219329833984, | |
| "loss": 0.6717, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -0.0029917103238403797, | |
| "rewards/margins": 0.04497722536325455, | |
| "rewards/rejected": -0.04796893894672394, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.4845714285714286, | |
| "grad_norm": 0.68109130859375, | |
| "learning_rate": 1.564829190012561e-06, | |
| "logits/chosen": -2.1461524963378906, | |
| "logits/rejected": -2.1497020721435547, | |
| "logps/chosen": -21.60137176513672, | |
| "logps/rejected": -25.58949089050293, | |
| "loss": 0.6722, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -0.005693153943866491, | |
| "rewards/margins": 0.043281424790620804, | |
| "rewards/rejected": -0.04897458106279373, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.4937142857142857, | |
| "grad_norm": 0.6599106192588806, | |
| "learning_rate": 1.521615715016336e-06, | |
| "logits/chosen": -2.140432357788086, | |
| "logits/rejected": -2.1406521797180176, | |
| "logps/chosen": -20.149822235107422, | |
| "logps/rejected": -22.249767303466797, | |
| "loss": 0.6777, | |
| "rewards/accuracies": 0.734375, | |
| "rewards/chosen": -0.00302593014203012, | |
| "rewards/margins": 0.0317457839846611, | |
| "rewards/rejected": -0.03477171063423157, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.5028571428571429, | |
| "grad_norm": 0.7043587565422058, | |
| "learning_rate": 1.4783842849836645e-06, | |
| "logits/chosen": -2.1249215602874756, | |
| "logits/rejected": -2.1282291412353516, | |
| "logps/chosen": -20.65789031982422, | |
| "logps/rejected": -23.718164443969727, | |
| "loss": 0.668, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": 0.0033555193804204464, | |
| "rewards/margins": 0.05225363373756409, | |
| "rewards/rejected": -0.048898108303546906, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.512, | |
| "grad_norm": 0.6670368313789368, | |
| "learning_rate": 1.435170809987439e-06, | |
| "logits/chosen": -2.122504949569702, | |
| "logits/rejected": -2.126192569732666, | |
| "logps/chosen": -20.417633056640625, | |
| "logps/rejected": -24.24279022216797, | |
| "loss": 0.6724, | |
| "rewards/accuracies": 0.703125, | |
| "rewards/chosen": -0.006906700320541859, | |
| "rewards/margins": 0.04406347870826721, | |
| "rewards/rejected": -0.0509701743721962, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.5211428571428571, | |
| "grad_norm": 0.6748237013816833, | |
| "learning_rate": 1.3920111851862494e-06, | |
| "logits/chosen": -2.1295788288116455, | |
| "logits/rejected": -2.132110834121704, | |
| "logps/chosen": -20.603960037231445, | |
| "logps/rejected": -23.54561996459961, | |
| "loss": 0.6711, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": -0.011474862694740295, | |
| "rewards/margins": 0.04592112824320793, | |
| "rewards/rejected": -0.057395994663238525, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.5302857142857142, | |
| "grad_norm": 0.6662198901176453, | |
| "learning_rate": 1.3489412610081626e-06, | |
| "logits/chosen": -2.1300594806671143, | |
| "logits/rejected": -2.132218837738037, | |
| "logps/chosen": -20.97345542907715, | |
| "logps/rejected": -24.11111068725586, | |
| "loss": 0.6737, | |
| "rewards/accuracies": 0.703125, | |
| "rewards/chosen": -0.015223701484501362, | |
| "rewards/margins": 0.04077855125069618, | |
| "rewards/rejected": -0.05600225552916527, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.5394285714285715, | |
| "grad_norm": 0.6957614421844482, | |
| "learning_rate": 1.3059968133716607e-06, | |
| "logits/chosen": -2.132567882537842, | |
| "logits/rejected": -2.132495880126953, | |
| "logps/chosen": -21.196874618530273, | |
| "logps/rejected": -23.613279342651367, | |
| "loss": 0.6747, | |
| "rewards/accuracies": 0.734375, | |
| "rewards/chosen": -0.008941511623561382, | |
| "rewards/margins": 0.03851575776934624, | |
| "rewards/rejected": -0.047457270324230194, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.5485714285714286, | |
| "grad_norm": 0.7162770628929138, | |
| "learning_rate": 1.2632135139684691e-06, | |
| "logits/chosen": -2.1271543502807617, | |
| "logits/rejected": -2.130880117416382, | |
| "logps/chosen": -20.923919677734375, | |
| "logps/rejected": -25.65717315673828, | |
| "loss": 0.6663, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": 0.009123304858803749, | |
| "rewards/margins": 0.05547190085053444, | |
| "rewards/rejected": -0.04634860157966614, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.5577142857142857, | |
| "grad_norm": 0.6612498164176941, | |
| "learning_rate": 1.2206269006329595e-06, | |
| "logits/chosen": -2.116666316986084, | |
| "logits/rejected": -2.1212408542633057, | |
| "logps/chosen": -20.269481658935547, | |
| "logps/rejected": -24.660919189453125, | |
| "loss": 0.6721, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.013498620130121708, | |
| "rewards/margins": 0.0441305935382843, | |
| "rewards/rejected": -0.057629212737083435, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.5668571428571428, | |
| "grad_norm": 0.6904810667037964, | |
| "learning_rate": 1.178272347822742e-06, | |
| "logits/chosen": -2.1359639167785645, | |
| "logits/rejected": -2.137200355529785, | |
| "logps/chosen": -21.87899398803711, | |
| "logps/rejected": -22.924833297729492, | |
| "loss": 0.6645, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": 0.008273976854979992, | |
| "rewards/margins": 0.06065124645829201, | |
| "rewards/rejected": -0.05237726867198944, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.576, | |
| "grad_norm": 0.6719346642494202, | |
| "learning_rate": 1.1361850372349668e-06, | |
| "logits/chosen": -2.134481906890869, | |
| "logits/rejected": -2.136564016342163, | |
| "logps/chosen": -20.749956130981445, | |
| "logps/rejected": -24.487869262695312, | |
| "loss": 0.6714, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -0.005040324293076992, | |
| "rewards/margins": 0.045264218002557755, | |
| "rewards/rejected": -0.05030454322695732, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.5851428571428572, | |
| "grad_norm": 0.6895278692245483, | |
| "learning_rate": 1.0943999285827381e-06, | |
| "logits/chosen": -2.1309783458709717, | |
| "logits/rejected": -2.133222818374634, | |
| "logps/chosen": -21.91169548034668, | |
| "logps/rejected": -23.928085327148438, | |
| "loss": 0.6741, | |
| "rewards/accuracies": 0.671875, | |
| "rewards/chosen": -0.007841155864298344, | |
| "rewards/margins": 0.03996167331933975, | |
| "rewards/rejected": -0.04780282825231552, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.5942857142857143, | |
| "grad_norm": 0.6835947036743164, | |
| "learning_rate": 1.0529517305559246e-06, | |
| "logits/chosen": -2.1413941383361816, | |
| "logits/rejected": -2.1449737548828125, | |
| "logps/chosen": -19.570405960083008, | |
| "logps/rejected": -23.024538040161133, | |
| "loss": 0.6683, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -0.0007449511904269457, | |
| "rewards/margins": 0.051878269761800766, | |
| "rewards/rejected": -0.05262322351336479, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.6034285714285714, | |
| "grad_norm": 0.7086966633796692, | |
| "learning_rate": 1.0118748719904738e-06, | |
| "logits/chosen": -2.1314806938171387, | |
| "logits/rejected": -2.132997512817383, | |
| "logps/chosen": -22.395124435424805, | |
| "logps/rejected": -24.68596839904785, | |
| "loss": 0.6714, | |
| "rewards/accuracies": 0.734375, | |
| "rewards/chosen": -0.004053800366818905, | |
| "rewards/margins": 0.045246005058288574, | |
| "rewards/rejected": -0.049299806356430054, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.6125714285714285, | |
| "grad_norm": 0.7053619623184204, | |
| "learning_rate": 9.712034732701943e-07, | |
| "logits/chosen": -2.136747360229492, | |
| "logits/rejected": -2.1409151554107666, | |
| "logps/chosen": -19.340253829956055, | |
| "logps/rejected": -26.333112716674805, | |
| "loss": 0.6711, | |
| "rewards/accuracies": 0.734375, | |
| "rewards/chosen": -0.012234330177307129, | |
| "rewards/margins": 0.04644213989377022, | |
| "rewards/rejected": -0.05867646634578705, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.6217142857142857, | |
| "grad_norm": 0.7142196893692017, | |
| "learning_rate": 9.309713179847465e-07, | |
| "logits/chosen": -2.1288576126098633, | |
| "logits/rejected": -2.132416009902954, | |
| "logps/chosen": -21.31295394897461, | |
| "logps/rejected": -24.50257682800293, | |
| "loss": 0.6644, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": -0.0032868993002921343, | |
| "rewards/margins": 0.05996156856417656, | |
| "rewards/rejected": -0.06324847042560577, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.6308571428571429, | |
| "grad_norm": 0.7126966714859009, | |
| "learning_rate": 8.912118248673967e-07, | |
| "logits/chosen": -2.118403434753418, | |
| "logits/rejected": -2.1224937438964844, | |
| "logps/chosen": -20.098617553710938, | |
| "logps/rejected": -24.383617401123047, | |
| "loss": 0.6529, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": 0.013771215453743935, | |
| "rewards/margins": 0.08454546332359314, | |
| "rewards/rejected": -0.07077424228191376, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 0.7309445142745972, | |
| "learning_rate": 8.519580200358309e-07, | |
| "logits/chosen": -2.1299290657043457, | |
| "logits/rejected": -2.1309103965759277, | |
| "logps/chosen": -19.261966705322266, | |
| "logps/rejected": -21.874065399169922, | |
| "loss": 0.6644, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": -0.0036550310906022787, | |
| "rewards/margins": 0.06022891029715538, | |
| "rewards/rejected": -0.0638839453458786, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.6491428571428571, | |
| "grad_norm": 0.6933106780052185, | |
| "learning_rate": 8.132425095591e-07, | |
| "logits/chosen": -2.126209259033203, | |
| "logits/rejected": -2.1274337768554688, | |
| "logps/chosen": -21.729415893554688, | |
| "logps/rejected": -23.67267417907715, | |
| "loss": 0.6612, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -0.0012235536705702543, | |
| "rewards/margins": 0.066755510866642, | |
| "rewards/rejected": -0.06797906756401062, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.6582857142857143, | |
| "grad_norm": 0.6878139972686768, | |
| "learning_rate": 7.750974523734742e-07, | |
| "logits/chosen": -2.120508909225464, | |
| "logits/rejected": -2.1226325035095215, | |
| "logps/chosen": -19.833683013916016, | |
| "logps/rejected": -24.432552337646484, | |
| "loss": 0.6564, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -0.005440461914986372, | |
| "rewards/margins": 0.07887633144855499, | |
| "rewards/rejected": -0.08431679010391235, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.6674285714285715, | |
| "grad_norm": 0.6848965883255005, | |
| "learning_rate": 7.375545335697085e-07, | |
| "logits/chosen": -2.1300716400146484, | |
| "logits/rejected": -2.1321635246276855, | |
| "logps/chosen": -20.999731063842773, | |
| "logps/rejected": -24.038249969482422, | |
| "loss": 0.662, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -0.003927034325897694, | |
| "rewards/margins": 0.06512106209993362, | |
| "rewards/rejected": -0.06904809176921844, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.6765714285714286, | |
| "grad_norm": 0.7239253520965576, | |
| "learning_rate": 7.00644938073904e-07, | |
| "logits/chosen": -2.136348247528076, | |
| "logits/rejected": -2.137990951538086, | |
| "logps/chosen": -21.84921646118164, | |
| "logps/rejected": -25.642963409423828, | |
| "loss": 0.6528, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": 0.001280774362385273, | |
| "rewards/margins": 0.08441222459077835, | |
| "rewards/rejected": -0.08313144743442535, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.6857142857142857, | |
| "grad_norm": 0.6780479550361633, | |
| "learning_rate": 6.643993247438348e-07, | |
| "logits/chosen": -2.122738838195801, | |
| "logits/rejected": -2.127403497695923, | |
| "logps/chosen": -19.919052124023438, | |
| "logps/rejected": -23.570457458496094, | |
| "loss": 0.6607, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": 0.0010643948335200548, | |
| "rewards/margins": 0.06831549108028412, | |
| "rewards/rejected": -0.06725109368562698, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.6948571428571428, | |
| "grad_norm": 0.7106800675392151, | |
| "learning_rate": 6.288478009022447e-07, | |
| "logits/chosen": -2.1340596675872803, | |
| "logits/rejected": -2.1375958919525146, | |
| "logps/chosen": -20.532428741455078, | |
| "logps/rejected": -23.923845291137695, | |
| "loss": 0.6577, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": 0.0032222855370491743, | |
| "rewards/margins": 0.07417334616184235, | |
| "rewards/rejected": -0.07095105946063995, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.704, | |
| "grad_norm": 0.6871652007102966, | |
| "learning_rate": 5.940198973282838e-07, | |
| "logits/chosen": -2.1262307167053223, | |
| "logits/rejected": -2.131108283996582, | |
| "logps/chosen": -18.413406372070312, | |
| "logps/rejected": -23.341732025146484, | |
| "loss": 0.6645, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.01665385626256466, | |
| "rewards/margins": 0.060584962368011475, | |
| "rewards/rejected": -0.07723881304264069, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.7131428571428572, | |
| "grad_norm": 0.6611953973770142, | |
| "learning_rate": 5.599445437278412e-07, | |
| "logits/chosen": -2.135463237762451, | |
| "logits/rejected": -2.1379756927490234, | |
| "logps/chosen": -18.5693359375, | |
| "logps/rejected": -21.502685546875, | |
| "loss": 0.6567, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": 0.005435650702565908, | |
| "rewards/margins": 0.0763789713382721, | |
| "rewards/rejected": -0.07094332575798035, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.7222857142857143, | |
| "grad_norm": 0.7486832141876221, | |
| "learning_rate": 5.266500447031646e-07, | |
| "logits/chosen": -2.1247940063476562, | |
| "logits/rejected": -2.122842788696289, | |
| "logps/chosen": -21.570556640625, | |
| "logps/rejected": -21.164236068725586, | |
| "loss": 0.6655, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -0.01216865424066782, | |
| "rewards/margins": 0.05833979696035385, | |
| "rewards/rejected": -0.0705084502696991, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.7314285714285714, | |
| "grad_norm": 0.6928249001502991, | |
| "learning_rate": 4.941640562417138e-07, | |
| "logits/chosen": -2.1150875091552734, | |
| "logits/rejected": -2.1165449619293213, | |
| "logps/chosen": -21.510713577270508, | |
| "logps/rejected": -24.29326057434082, | |
| "loss": 0.657, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -0.006187473423779011, | |
| "rewards/margins": 0.07648099958896637, | |
| "rewards/rejected": -0.0826684832572937, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.7405714285714285, | |
| "grad_norm": 0.7375283241271973, | |
| "learning_rate": 4.6251356274379226e-07, | |
| "logits/chosen": -2.1273298263549805, | |
| "logits/rejected": -2.129077434539795, | |
| "logps/chosen": -22.487407684326172, | |
| "logps/rejected": -24.35370635986328, | |
| "loss": 0.6587, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -0.004263547249138355, | |
| "rewards/margins": 0.0724744200706482, | |
| "rewards/rejected": -0.07673796266317368, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.7497142857142857, | |
| "grad_norm": 0.7093409299850464, | |
| "learning_rate": 4.317248546080218e-07, | |
| "logits/chosen": -2.1207175254821777, | |
| "logits/rejected": -2.124617099761963, | |
| "logps/chosen": -19.91944694519043, | |
| "logps/rejected": -25.45476722717285, | |
| "loss": 0.6658, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.011447591707110405, | |
| "rewards/margins": 0.05775396525859833, | |
| "rewards/rejected": -0.06920155882835388, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.7588571428571429, | |
| "grad_norm": 0.659227728843689, | |
| "learning_rate": 4.018235063932971e-07, | |
| "logits/chosen": -2.129696846008301, | |
| "logits/rejected": -2.1302928924560547, | |
| "logps/chosen": -19.911724090576172, | |
| "logps/rejected": -21.486099243164062, | |
| "loss": 0.6629, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.015592245385050774, | |
| "rewards/margins": 0.0647309422492981, | |
| "rewards/rejected": -0.08032318949699402, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.768, | |
| "grad_norm": 0.6856977343559265, | |
| "learning_rate": 3.7283435557534184e-07, | |
| "logits/chosen": -2.115324020385742, | |
| "logits/rejected": -2.118356704711914, | |
| "logps/chosen": -20.232105255126953, | |
| "logps/rejected": -25.640562057495117, | |
| "loss": 0.6603, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -0.0029051026795059443, | |
| "rewards/margins": 0.07027439773082733, | |
| "rewards/rejected": -0.07317950576543808, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.7771428571428571, | |
| "grad_norm": 0.9134950637817383, | |
| "learning_rate": 3.447814819155292e-07, | |
| "logits/chosen": -2.1142709255218506, | |
| "logits/rejected": -2.1176223754882812, | |
| "logps/chosen": -22.407655715942383, | |
| "logps/rejected": -25.338939666748047, | |
| "loss": 0.6606, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": -0.003580818185582757, | |
| "rewards/margins": 0.06903493404388428, | |
| "rewards/rejected": -0.07261575758457184, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.7862857142857143, | |
| "grad_norm": 0.8176380395889282, | |
| "learning_rate": 3.1768818745908876e-07, | |
| "logits/chosen": -2.128533363342285, | |
| "logits/rejected": -2.130959987640381, | |
| "logps/chosen": -21.104782104492188, | |
| "logps/rejected": -23.96923065185547, | |
| "loss": 0.6615, | |
| "rewards/accuracies": 0.734375, | |
| "rewards/chosen": -0.01085490919649601, | |
| "rewards/margins": 0.06671939790248871, | |
| "rewards/rejected": -0.07757431268692017, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.7954285714285714, | |
| "grad_norm": 0.7210907340049744, | |
| "learning_rate": 2.915769771793256e-07, | |
| "logits/chosen": -2.115241050720215, | |
| "logits/rejected": -2.1181540489196777, | |
| "logps/chosen": -19.796510696411133, | |
| "logps/rejected": -24.347612380981445, | |
| "loss": 0.6599, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": 0.0054216571152210236, | |
| "rewards/margins": 0.07189285755157471, | |
| "rewards/rejected": -0.06647119671106339, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.8045714285714286, | |
| "grad_norm": 0.7129194140434265, | |
| "learning_rate": 2.6646954028392375e-07, | |
| "logits/chosen": -2.1180641651153564, | |
| "logits/rejected": -2.118509531021118, | |
| "logps/chosen": -21.633447647094727, | |
| "logps/rejected": -24.8947811126709, | |
| "loss": 0.6714, | |
| "rewards/accuracies": 0.703125, | |
| "rewards/chosen": -0.018895957618951797, | |
| "rewards/margins": 0.04705498740077019, | |
| "rewards/rejected": -0.06595094501972198, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.8137142857142857, | |
| "grad_norm": 0.751731276512146, | |
| "learning_rate": 2.4238673219886385e-07, | |
| "logits/chosen": -2.1097123622894287, | |
| "logits/rejected": -2.1142430305480957, | |
| "logps/chosen": -21.052400588989258, | |
| "logps/rejected": -25.112659454345703, | |
| "loss": 0.6497, | |
| "rewards/accuracies": 0.859375, | |
| "rewards/chosen": 0.006454586982727051, | |
| "rewards/margins": 0.09120012819766998, | |
| "rewards/rejected": -0.08474554121494293, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.8228571428571428, | |
| "grad_norm": 0.7012993693351746, | |
| "learning_rate": 2.1934855724491708e-07, | |
| "logits/chosen": -2.108950138092041, | |
| "logits/rejected": -2.1133596897125244, | |
| "logps/chosen": -20.4837646484375, | |
| "logps/rejected": -24.65631866455078, | |
| "loss": 0.6582, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -0.00821665208786726, | |
| "rewards/margins": 0.07355347275733948, | |
| "rewards/rejected": -0.08177012950181961, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.832, | |
| "grad_norm": 0.7529146075248718, | |
| "learning_rate": 1.9737415202111148e-07, | |
| "logits/chosen": -2.118992805480957, | |
| "logits/rejected": -2.1215620040893555, | |
| "logps/chosen": -21.363391876220703, | |
| "logps/rejected": -26.736839294433594, | |
| "loss": 0.6614, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": -0.017014339566230774, | |
| "rewards/margins": 0.06742921471595764, | |
| "rewards/rejected": -0.08444354683160782, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.8411428571428572, | |
| "grad_norm": 0.6690794229507446, | |
| "learning_rate": 1.764817695089636e-07, | |
| "logits/chosen": -2.1333415508270264, | |
| "logits/rejected": -2.1368355751037598, | |
| "logps/chosen": -18.803630828857422, | |
| "logps/rejected": -24.878826141357422, | |
| "loss": 0.6552, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": 0.0014820651849731803, | |
| "rewards/margins": 0.08085457980632782, | |
| "rewards/rejected": -0.07937251776456833, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.8502857142857143, | |
| "grad_norm": 0.6794025301933289, | |
| "learning_rate": 1.566887639106911e-07, | |
| "logits/chosen": -2.12078595161438, | |
| "logits/rejected": -2.1245594024658203, | |
| "logps/chosen": -20.484092712402344, | |
| "logps/rejected": -24.03988265991211, | |
| "loss": 0.6592, | |
| "rewards/accuracies": 0.703125, | |
| "rewards/chosen": -0.0017876154743134975, | |
| "rewards/margins": 0.07191066443920135, | |
| "rewards/rejected": -0.07369828224182129, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.8594285714285714, | |
| "grad_norm": 0.6425016522407532, | |
| "learning_rate": 1.380115762339877e-07, | |
| "logits/chosen": -2.126593589782715, | |
| "logits/rejected": -2.125735282897949, | |
| "logps/chosen": -19.677227020263672, | |
| "logps/rejected": -20.280244827270508, | |
| "loss": 0.6657, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -0.014084220863878727, | |
| "rewards/margins": 0.05799565464258194, | |
| "rewards/rejected": -0.07207988202571869, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.8685714285714285, | |
| "grad_norm": 0.7096425294876099, | |
| "learning_rate": 1.204657206353459e-07, | |
| "logits/chosen": -2.1364822387695312, | |
| "logits/rejected": -2.1406309604644775, | |
| "logps/chosen": -19.647085189819336, | |
| "logps/rejected": -25.55003547668457, | |
| "loss": 0.6538, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": 0.0021409899927675724, | |
| "rewards/margins": 0.0829853042960167, | |
| "rewards/rejected": -0.08084432035684586, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.8777142857142857, | |
| "grad_norm": 0.806024968624115, | |
| "learning_rate": 1.0406577153326192e-07, | |
| "logits/chosen": -2.12524676322937, | |
| "logits/rejected": -2.128009557723999, | |
| "logps/chosen": -21.744781494140625, | |
| "logps/rejected": -24.876564025878906, | |
| "loss": 0.664, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -0.014198469929397106, | |
| "rewards/margins": 0.060674287378787994, | |
| "rewards/rejected": -0.07487276196479797, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.8868571428571429, | |
| "grad_norm": 0.6857902407646179, | |
| "learning_rate": 8.882535150203569e-08, | |
| "logits/chosen": -2.128683567047119, | |
| "logits/rejected": -2.1316354274749756, | |
| "logps/chosen": -18.787628173828125, | |
| "logps/rejected": -23.368488311767578, | |
| "loss": 0.6702, | |
| "rewards/accuracies": 0.703125, | |
| "rewards/chosen": -0.024203235283493996, | |
| "rewards/margins": 0.049591515213251114, | |
| "rewards/rejected": -0.07379475235939026, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.896, | |
| "grad_norm": 0.6921999454498291, | |
| "learning_rate": 7.475711995621387e-08, | |
| "logits/chosen": -2.1213159561157227, | |
| "logits/rejected": -2.1230628490448, | |
| "logps/chosen": -21.5747127532959, | |
| "logps/rejected": -24.342790603637695, | |
| "loss": 0.6603, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.022949904203414917, | |
| "rewards/margins": 0.0700790211558342, | |
| "rewards/rejected": -0.09302891790866852, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.9051428571428571, | |
| "grad_norm": 0.7130371928215027, | |
| "learning_rate": 6.187276263508168e-08, | |
| "logits/chosen": -2.1171250343322754, | |
| "logits/rejected": -2.119697093963623, | |
| "logps/chosen": -21.54928970336914, | |
| "logps/rejected": -25.877193450927734, | |
| "loss": 0.6673, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -0.009239297360181808, | |
| "rewards/margins": 0.055657997727394104, | |
| "rewards/rejected": -0.06489729136228561, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.9142857142857143, | |
| "grad_norm": 0.7120369076728821, | |
| "learning_rate": 5.018298189593368e-08, | |
| "logits/chosen": -2.140258312225342, | |
| "logits/rejected": -2.1442980766296387, | |
| "logps/chosen": -20.42232322692871, | |
| "logps/rejected": -25.16228485107422, | |
| "loss": 0.6531, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": 0.0013755280524492264, | |
| "rewards/margins": 0.08448025584220886, | |
| "rewards/rejected": -0.08310472220182419, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.9234285714285714, | |
| "grad_norm": 0.6894267797470093, | |
| "learning_rate": 3.969748782418991e-08, | |
| "logits/chosen": -2.140516757965088, | |
| "logits/rejected": -2.143148899078369, | |
| "logps/chosen": -20.96661376953125, | |
| "logps/rejected": -24.055721282958984, | |
| "loss": 0.6593, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -0.004207253456115723, | |
| "rewards/margins": 0.07119009643793106, | |
| "rewards/rejected": -0.07539734244346619, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.9325714285714286, | |
| "grad_norm": 0.7290534377098083, | |
| "learning_rate": 3.042499016773881e-08, | |
| "logits/chosen": -2.132014751434326, | |
| "logits/rejected": -2.1332242488861084, | |
| "logps/chosen": -19.451780319213867, | |
| "logps/rejected": -21.663698196411133, | |
| "loss": 0.6617, | |
| "rewards/accuracies": 0.765625, | |
| "rewards/chosen": -0.00491691380739212, | |
| "rewards/margins": 0.06624950468540192, | |
| "rewards/rejected": -0.07116641849279404, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.9417142857142857, | |
| "grad_norm": 0.7135753035545349, | |
| "learning_rate": 2.2373191102207647e-08, | |
| "logits/chosen": -2.1299057006835938, | |
| "logits/rejected": -2.1332318782806396, | |
| "logps/chosen": -19.843944549560547, | |
| "logps/rejected": -24.937877655029297, | |
| "loss": 0.6494, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": 0.0014770530397072434, | |
| "rewards/margins": 0.09241947531700134, | |
| "rewards/rejected": -0.09094242751598358, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.9508571428571428, | |
| "grad_norm": 0.6818587183952332, | |
| "learning_rate": 1.5548778833171463e-08, | |
| "logits/chosen": -2.130626916885376, | |
| "logits/rejected": -2.132134437561035, | |
| "logps/chosen": -21.063608169555664, | |
| "logps/rejected": -22.76825714111328, | |
| "loss": 0.6539, | |
| "rewards/accuracies": 0.828125, | |
| "rewards/chosen": -0.0026874844916164875, | |
| "rewards/margins": 0.0823042243719101, | |
| "rewards/rejected": -0.0849917083978653, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 0.6808786392211914, | |
| "learning_rate": 9.957422040612507e-09, | |
| "logits/chosen": -2.117967128753662, | |
| "logits/rejected": -2.1230428218841553, | |
| "logps/chosen": -19.646337509155273, | |
| "logps/rejected": -24.299863815307617, | |
| "loss": 0.6662, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -0.016201000660657883, | |
| "rewards/margins": 0.05751265585422516, | |
| "rewards/rejected": -0.07371365278959274, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.9691428571428572, | |
| "grad_norm": 0.6646621227264404, | |
| "learning_rate": 5.6037651702463e-09, | |
| "logits/chosen": -2.1203553676605225, | |
| "logits/rejected": -2.1216330528259277, | |
| "logps/chosen": -20.876747131347656, | |
| "logps/rejected": -23.389541625976562, | |
| "loss": 0.661, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -0.013487475924193859, | |
| "rewards/margins": 0.06747360527515411, | |
| "rewards/rejected": -0.08096107840538025, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.9782857142857143, | |
| "grad_norm": 0.651802659034729, | |
| "learning_rate": 2.491424575625123e-09, | |
| "logits/chosen": -2.1184940338134766, | |
| "logits/rejected": -2.1203343868255615, | |
| "logps/chosen": -19.80478858947754, | |
| "logps/rejected": -23.504154205322266, | |
| "loss": 0.6654, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -0.012541299685835838, | |
| "rewards/margins": 0.058582596480846405, | |
| "rewards/rejected": -0.07112389802932739, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.9874285714285714, | |
| "grad_norm": 0.6968909502029419, | |
| "learning_rate": 6.229855142232399e-10, | |
| "logits/chosen": -2.1146626472473145, | |
| "logits/rejected": -2.1161539554595947, | |
| "logps/chosen": -19.548887252807617, | |
| "logps/rejected": -22.608421325683594, | |
| "loss": 0.6522, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -0.004665360786020756, | |
| "rewards/margins": 0.08596684038639069, | |
| "rewards/rejected": -0.09063220024108887, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.9965714285714286, | |
| "grad_norm": 0.7107558250427246, | |
| "learning_rate": 0.0, | |
| "logits/chosen": -2.1318564414978027, | |
| "logits/rejected": -2.134704351425171, | |
| "logps/chosen": -21.569801330566406, | |
| "logps/rejected": -25.938987731933594, | |
| "loss": 0.6582, | |
| "rewards/accuracies": 0.796875, | |
| "rewards/chosen": -0.003806858789175749, | |
| "rewards/margins": 0.07306241989135742, | |
| "rewards/rejected": -0.07686927914619446, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.9965714285714286, | |
| "step": 109, | |
| "total_flos": 2.557771887987917e+18, | |
| "train_loss": 0.6735316744638146, | |
| "train_runtime": 12905.5168, | |
| "train_samples_per_second": 0.542, | |
| "train_steps_per_second": 0.008 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 109, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.557771887987917e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |