LLaVA-v1.6-Vicuna-13B-SENTINEL / trainer_state.json
psp_dada
add file
39bbbe8
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9965714285714286,
"eval_steps": 500,
"global_step": 109,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.009142857142857144,
"grad_norm": 0.6255323886871338,
"learning_rate": 2.9993770144857767e-06,
"logits/chosen": -2.1389834880828857,
"logits/rejected": -2.141430139541626,
"logps/chosen": -19.425989151000977,
"logps/rejected": -21.582773208618164,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.018285714285714287,
"grad_norm": 0.6554356217384338,
"learning_rate": 2.997508575424375e-06,
"logits/chosen": -2.1365740299224854,
"logits/rejected": -2.1396265029907227,
"logps/chosen": -20.762622833251953,
"logps/rejected": -22.60515785217285,
"loss": 0.6943,
"rewards/accuracies": 0.4375,
"rewards/chosen": -0.0017696216236799955,
"rewards/margins": -0.002286846749484539,
"rewards/rejected": 0.0005172253004275262,
"step": 2
},
{
"epoch": 0.027428571428571427,
"grad_norm": 0.6278586387634277,
"learning_rate": 2.9943962348297537e-06,
"logits/chosen": -2.1222903728485107,
"logits/rejected": -2.125791549682617,
"logps/chosen": -21.520599365234375,
"logps/rejected": -24.2766170501709,
"loss": 0.6956,
"rewards/accuracies": 0.390625,
"rewards/chosen": -0.0007706253090873361,
"rewards/margins": -0.0048321266658604145,
"rewards/rejected": 0.004061501007527113,
"step": 3
},
{
"epoch": 0.036571428571428574,
"grad_norm": 0.6467044949531555,
"learning_rate": 2.9900425779593876e-06,
"logits/chosen": -2.1400036811828613,
"logits/rejected": -2.1466779708862305,
"logps/chosen": -19.16310691833496,
"logps/rejected": -25.431270599365234,
"loss": 0.6923,
"rewards/accuracies": 0.515625,
"rewards/chosen": 0.0013154743937775493,
"rewards/margins": 0.00192451779730618,
"rewards/rejected": -0.0006090432871133089,
"step": 4
},
{
"epoch": 0.045714285714285714,
"grad_norm": 0.6181118488311768,
"learning_rate": 2.9844512211668286e-06,
"logits/chosen": -2.1338605880737305,
"logits/rejected": -2.137308359146118,
"logps/chosen": -20.26681137084961,
"logps/rejected": -21.6939754486084,
"loss": 0.6914,
"rewards/accuracies": 0.546875,
"rewards/chosen": 0.0007669397164136171,
"rewards/margins": 0.003516831435263157,
"rewards/rejected": -0.002749891486018896,
"step": 5
},
{
"epoch": 0.054857142857142854,
"grad_norm": 0.6369356513023376,
"learning_rate": 2.977626808897792e-06,
"logits/chosen": -2.148895740509033,
"logits/rejected": -2.151296377182007,
"logps/chosen": -19.613313674926758,
"logps/rejected": -21.868637084960938,
"loss": 0.6921,
"rewards/accuracies": 0.53125,
"rewards/chosen": 0.0020878687500953674,
"rewards/margins": 0.002298696432262659,
"rewards/rejected": -0.00021082756575196981,
"step": 6
},
{
"epoch": 0.064,
"grad_norm": 0.643375813961029,
"learning_rate": 2.9695750098322613e-06,
"logits/chosen": -2.154219150543213,
"logits/rejected": -2.1564598083496094,
"logps/chosen": -19.349716186523438,
"logps/rejected": -22.341163635253906,
"loss": 0.6945,
"rewards/accuracies": 0.484375,
"rewards/chosen": -0.004500311333686113,
"rewards/margins": -0.0025411711540073156,
"rewards/rejected": -0.001959140645340085,
"step": 7
},
{
"epoch": 0.07314285714285715,
"grad_norm": 0.6222244501113892,
"learning_rate": 2.9603025121758102e-06,
"logits/chosen": -2.126340627670288,
"logits/rejected": -2.130244731903076,
"logps/chosen": -19.825477600097656,
"logps/rejected": -23.661293029785156,
"loss": 0.6917,
"rewards/accuracies": 0.578125,
"rewards/chosen": 0.0011193343671038747,
"rewards/margins": 0.0031258827075362206,
"rewards/rejected": -0.0020065484568476677,
"step": 8
},
{
"epoch": 0.08228571428571428,
"grad_norm": 0.5961363911628723,
"learning_rate": 2.9498170181040663e-06,
"logits/chosen": -2.14841365814209,
"logits/rejected": -2.14998197555542,
"logps/chosen": -17.929092407226562,
"logps/rejected": -19.984407424926758,
"loss": 0.6922,
"rewards/accuracies": 0.578125,
"rewards/chosen": -0.0016110084252431989,
"rewards/margins": 0.001981835812330246,
"rewards/rejected": -0.003592844121158123,
"step": 9
},
{
"epoch": 0.09142857142857143,
"grad_norm": 0.6394132375717163,
"learning_rate": 2.938127237364918e-06,
"logits/chosen": -2.1390151977539062,
"logits/rejected": -2.14105486869812,
"logps/chosen": -19.9459228515625,
"logps/rejected": -21.593914031982422,
"loss": 0.6949,
"rewards/accuracies": 0.4375,
"rewards/chosen": -0.0058130137622356415,
"rewards/margins": -0.003376076463609934,
"rewards/rejected": -0.0024369372986257076,
"step": 10
},
{
"epoch": 0.10057142857142858,
"grad_norm": 0.637840986251831,
"learning_rate": 2.925242880043786e-06,
"logits/chosen": -2.1370978355407715,
"logits/rejected": -2.1393895149230957,
"logps/chosen": -20.649080276489258,
"logps/rejected": -23.88674545288086,
"loss": 0.6941,
"rewards/accuracies": 0.484375,
"rewards/chosen": -0.0013475671876221895,
"rewards/margins": -0.0018120227614417672,
"rewards/rejected": 0.0004644556902348995,
"step": 11
},
{
"epoch": 0.10971428571428571,
"grad_norm": 0.624940037727356,
"learning_rate": 2.911174648497964e-06,
"logits/chosen": -2.1435601711273193,
"logits/rejected": -2.146998882293701,
"logps/chosen": -19.336463928222656,
"logps/rejected": -22.77804183959961,
"loss": 0.6907,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.0014964112779125571,
"rewards/margins": 0.005147767253220081,
"rewards/rejected": -0.006644178181886673,
"step": 12
},
{
"epoch": 0.11885714285714286,
"grad_norm": 0.6471104621887207,
"learning_rate": 2.895934228466738e-06,
"logits/chosen": -2.136577606201172,
"logits/rejected": -2.1388959884643555,
"logps/chosen": -20.625932693481445,
"logps/rejected": -23.377975463867188,
"loss": 0.6886,
"rewards/accuracies": 0.5625,
"rewards/chosen": 0.0014629564248025417,
"rewards/margins": 0.00928102433681488,
"rewards/rejected": -0.007818068377673626,
"step": 13
},
{
"epoch": 0.128,
"grad_norm": 0.629192054271698,
"learning_rate": 2.879534279364654e-06,
"logits/chosen": -2.1251070499420166,
"logits/rejected": -2.1296639442443848,
"logps/chosen": -17.793655395507812,
"logps/rejected": -24.011507034301758,
"loss": 0.6917,
"rewards/accuracies": 0.515625,
"rewards/chosen": -0.000675417366437614,
"rewards/margins": 0.0030865983571857214,
"rewards/rejected": -0.0037620156072080135,
"step": 14
},
{
"epoch": 0.13714285714285715,
"grad_norm": 0.659120500087738,
"learning_rate": 2.8619884237660125e-06,
"logits/chosen": -2.1358160972595215,
"logits/rejected": -2.142625331878662,
"logps/chosen": -18.37673568725586,
"logps/rejected": -25.251014709472656,
"loss": 0.6907,
"rewards/accuracies": 0.578125,
"rewards/chosen": -0.00043936213478446007,
"rewards/margins": 0.004951969254761934,
"rewards/rejected": -0.005391330923885107,
"step": 15
},
{
"epoch": 0.1462857142857143,
"grad_norm": 0.6336076259613037,
"learning_rate": 2.843311236089309e-06,
"logits/chosen": -2.1342644691467285,
"logits/rejected": -2.1355390548706055,
"logps/chosen": -20.63397216796875,
"logps/rejected": -21.67581558227539,
"loss": 0.6923,
"rewards/accuracies": 0.578125,
"rewards/chosen": -0.006054366007447243,
"rewards/margins": 0.0018282074015587568,
"rewards/rejected": -0.007882573641836643,
"step": 16
},
{
"epoch": 0.15542857142857142,
"grad_norm": 0.6278834342956543,
"learning_rate": 2.8235182304910364e-06,
"logits/chosen": -2.1471428871154785,
"logits/rejected": -2.148350477218628,
"logps/chosen": -21.62627410888672,
"logps/rejected": -22.867630004882812,
"loss": 0.6914,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.002310897456482053,
"rewards/margins": 0.00360050518065691,
"rewards/rejected": -0.005911402404308319,
"step": 17
},
{
"epoch": 0.16457142857142856,
"grad_norm": 0.6396936178207397,
"learning_rate": 2.8026258479788888e-06,
"logits/chosen": -2.131674289703369,
"logits/rejected": -2.1344425678253174,
"logps/chosen": -17.968589782714844,
"logps/rejected": -23.94507598876953,
"loss": 0.6891,
"rewards/accuracies": 0.640625,
"rewards/chosen": -0.0026061469689011574,
"rewards/margins": 0.008287503384053707,
"rewards/rejected": -0.010893651284277439,
"step": 18
},
{
"epoch": 0.1737142857142857,
"grad_norm": 0.6427000164985657,
"learning_rate": 2.780651442755083e-06,
"logits/chosen": -2.1325266361236572,
"logits/rejected": -2.1359243392944336,
"logps/chosen": -19.952186584472656,
"logps/rejected": -20.840421676635742,
"loss": 0.6887,
"rewards/accuracies": 0.5625,
"rewards/chosen": 0.002477221190929413,
"rewards/margins": 0.009127501398324966,
"rewards/rejected": -0.006650280207395554,
"step": 19
},
{
"epoch": 0.18285714285714286,
"grad_norm": 0.634149968624115,
"learning_rate": 2.7576132678011365e-06,
"logits/chosen": -2.137594223022461,
"logits/rejected": -2.1397337913513184,
"logps/chosen": -20.24038314819336,
"logps/rejected": -21.273605346679688,
"loss": 0.6886,
"rewards/accuracies": 0.609375,
"rewards/chosen": -0.0009703578543849289,
"rewards/margins": 0.009282448329031467,
"rewards/rejected": -0.010252806358039379,
"step": 20
},
{
"epoch": 0.192,
"grad_norm": 0.7092023491859436,
"learning_rate": 2.7335304597160764e-06,
"logits/chosen": -2.1394314765930176,
"logits/rejected": -2.1454715728759766,
"logps/chosen": -21.92709732055664,
"logps/rejected": -28.169654846191406,
"loss": 0.6845,
"rewards/accuracies": 0.6875,
"rewards/chosen": 0.0013219192624092102,
"rewards/margins": 0.017537159845232964,
"rewards/rejected": -0.016215242445468903,
"step": 21
},
{
"epoch": 0.20114285714285715,
"grad_norm": 0.6428853869438171,
"learning_rate": 2.7084230228206746e-06,
"logits/chosen": -2.1274845600128174,
"logits/rejected": -2.128504991531372,
"logps/chosen": -19.982959747314453,
"logps/rejected": -23.259571075439453,
"loss": 0.688,
"rewards/accuracies": 0.671875,
"rewards/chosen": -0.0038342936895787716,
"rewards/margins": 0.010476754978299141,
"rewards/rejected": -0.0143110491335392,
"step": 22
},
{
"epoch": 0.2102857142857143,
"grad_norm": 0.6467615962028503,
"learning_rate": 2.6823118125409112e-06,
"logits/chosen": -2.1434879302978516,
"logits/rejected": -2.14566707611084,
"logps/chosen": -20.100147247314453,
"logps/rejected": -23.975025177001953,
"loss": 0.6897,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.001528523163869977,
"rewards/margins": 0.0071428027004003525,
"rewards/rejected": -0.008671325631439686,
"step": 23
},
{
"epoch": 0.21942857142857142,
"grad_norm": 0.6638103127479553,
"learning_rate": 2.6552185180844704e-06,
"logits/chosen": -2.1213717460632324,
"logits/rejected": -2.1236109733581543,
"logps/chosen": -21.576557159423828,
"logps/rejected": -23.23206329345703,
"loss": 0.6861,
"rewards/accuracies": 0.671875,
"rewards/chosen": 0.0032195569947361946,
"rewards/margins": 0.01447179913520813,
"rewards/rejected": -0.011252242140471935,
"step": 24
},
{
"epoch": 0.22857142857142856,
"grad_norm": 0.6348288655281067,
"learning_rate": 2.6271656444246578e-06,
"logits/chosen": -2.1333892345428467,
"logits/rejected": -2.1361846923828125,
"logps/chosen": -19.42316436767578,
"logps/rejected": -22.707563400268555,
"loss": 0.6831,
"rewards/accuracies": 0.71875,
"rewards/chosen": 0.0022394396364688873,
"rewards/margins": 0.020372504368424416,
"rewards/rejected": -0.018133066594600677,
"step": 25
},
{
"epoch": 0.2377142857142857,
"grad_norm": 0.6526222825050354,
"learning_rate": 2.598176493606703e-06,
"logits/chosen": -2.1356377601623535,
"logits/rejected": -2.1370201110839844,
"logps/chosen": -20.537616729736328,
"logps/rejected": -24.898578643798828,
"loss": 0.6859,
"rewards/accuracies": 0.5625,
"rewards/chosen": -0.002131823683157563,
"rewards/margins": 0.014900727197527885,
"rewards/rejected": -0.017032550647854805,
"step": 26
},
{
"epoch": 0.24685714285714286,
"grad_norm": 0.6682783365249634,
"learning_rate": 2.568275145391978e-06,
"logits/chosen": -2.1460518836975098,
"logits/rejected": -2.1491003036499023,
"logps/chosen": -20.905759811401367,
"logps/rejected": -24.251680374145508,
"loss": 0.6857,
"rewards/accuracies": 0.703125,
"rewards/chosen": 0.0005356475012376904,
"rewards/margins": 0.015149888582527637,
"rewards/rejected": -0.014614241197705269,
"step": 27
},
{
"epoch": 0.256,
"grad_norm": 0.6456180214881897,
"learning_rate": 2.5374864372562077e-06,
"logits/chosen": -2.1365909576416016,
"logits/rejected": -2.1375560760498047,
"logps/chosen": -21.477279663085938,
"logps/rejected": -22.589874267578125,
"loss": 0.6853,
"rewards/accuracies": 0.703125,
"rewards/chosen": -0.0013170776655897498,
"rewards/margins": 0.01594378799200058,
"rewards/rejected": -0.01726086437702179,
"step": 28
},
{
"epoch": 0.2651428571428571,
"grad_norm": 0.6599003672599792,
"learning_rate": 2.505835943758286e-06,
"logits/chosen": -2.1302995681762695,
"logits/rejected": -2.1338701248168945,
"logps/chosen": -20.774627685546875,
"logps/rejected": -24.625228881835938,
"loss": 0.6838,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.0004698322154581547,
"rewards/margins": 0.019142411649227142,
"rewards/rejected": -0.018672579899430275,
"step": 29
},
{
"epoch": 0.2742857142857143,
"grad_norm": 0.6650639176368713,
"learning_rate": 2.4733499552968357e-06,
"logits/chosen": -2.1260218620300293,
"logits/rejected": -2.128187894821167,
"logps/chosen": -20.981136322021484,
"logps/rejected": -23.799392700195312,
"loss": 0.683,
"rewards/accuracies": 0.6875,
"rewards/chosen": 0.00021876831306144595,
"rewards/margins": 0.020755982026457787,
"rewards/rejected": -0.02053721249103546,
"step": 30
},
{
"epoch": 0.2834285714285714,
"grad_norm": 0.6870555877685547,
"learning_rate": 2.440055456272159e-06,
"logits/chosen": -2.1325454711914062,
"logits/rejected": -2.1314170360565186,
"logps/chosen": -20.572166442871094,
"logps/rejected": -19.940898895263672,
"loss": 0.6861,
"rewards/accuracies": 0.65625,
"rewards/chosen": -0.00320088560692966,
"rewards/margins": 0.014535932801663876,
"rewards/rejected": -0.01773681864142418,
"step": 31
},
{
"epoch": 0.2925714285714286,
"grad_norm": 0.6859702467918396,
"learning_rate": 2.4059801026717166e-06,
"logits/chosen": -2.138218402862549,
"logits/rejected": -2.1400537490844727,
"logps/chosen": -20.59479331970215,
"logps/rejected": -24.294113159179688,
"loss": 0.6824,
"rewards/accuracies": 0.75,
"rewards/chosen": 0.0018003403674811125,
"rewards/margins": 0.02209661900997162,
"rewards/rejected": -0.02029627561569214,
"step": 32
},
{
"epoch": 0.3017142857142857,
"grad_norm": 0.6709543466567993,
"learning_rate": 2.3711521990977554e-06,
"logits/chosen": -2.134920120239258,
"logits/rejected": -2.137303352355957,
"logps/chosen": -21.195552825927734,
"logps/rejected": -24.645339965820312,
"loss": 0.6847,
"rewards/accuracies": 0.640625,
"rewards/chosen": -0.00019013590645045042,
"rewards/margins": 0.0173664353787899,
"rewards/rejected": -0.01755657233297825,
"step": 33
},
{
"epoch": 0.31085714285714283,
"grad_norm": 0.6602835655212402,
"learning_rate": 2.3356006752561658e-06,
"logits/chosen": -2.1185295581817627,
"logits/rejected": -2.122647762298584,
"logps/chosen": -17.77151870727539,
"logps/rejected": -25.318552017211914,
"loss": 0.6817,
"rewards/accuracies": 0.75,
"rewards/chosen": 0.0011812887387350202,
"rewards/margins": 0.023368019610643387,
"rewards/rejected": -0.022186731919646263,
"step": 34
},
{
"epoch": 0.32,
"grad_norm": 0.6702331900596619,
"learning_rate": 2.299355061926096e-06,
"logits/chosen": -2.1439552307128906,
"logits/rejected": -2.148176670074463,
"logps/chosen": -19.662979125976562,
"logps/rejected": -25.61541748046875,
"loss": 0.6812,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.0001765764318406582,
"rewards/margins": 0.02442769892513752,
"rewards/rejected": -0.024604275822639465,
"step": 35
},
{
"epoch": 0.3291428571428571,
"grad_norm": 0.6621116399765015,
"learning_rate": 2.262445466430292e-06,
"logits/chosen": -2.138071060180664,
"logits/rejected": -2.139529228210449,
"logps/chosen": -19.943336486816406,
"logps/rejected": -23.18177032470703,
"loss": 0.6822,
"rewards/accuracies": 0.8125,
"rewards/chosen": -0.006531356833875179,
"rewards/margins": 0.022531913593411446,
"rewards/rejected": -0.0290632676333189,
"step": 36
},
{
"epoch": 0.3382857142857143,
"grad_norm": 0.7125285863876343,
"learning_rate": 2.2249025476265262e-06,
"logits/chosen": -2.1278233528137207,
"logits/rejected": -2.1309316158294678,
"logps/chosen": -21.678462982177734,
"logps/rejected": -23.819469451904297,
"loss": 0.6809,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.003998810425400734,
"rewards/margins": 0.025094730779528618,
"rewards/rejected": -0.029093541204929352,
"step": 37
},
{
"epoch": 0.3474285714285714,
"grad_norm": 0.6747680902481079,
"learning_rate": 2.1867574904409007e-06,
"logits/chosen": -2.128553628921509,
"logits/rejected": -2.1311511993408203,
"logps/chosen": -18.51136589050293,
"logps/rejected": -24.083953857421875,
"loss": 0.6797,
"rewards/accuracies": 0.75,
"rewards/chosen": 0.00025194010231643915,
"rewards/margins": 0.027810033410787582,
"rewards/rejected": -0.027558093890547752,
"step": 38
},
{
"epoch": 0.3565714285714286,
"grad_norm": 0.6505147218704224,
"learning_rate": 2.1480419799641695e-06,
"logits/chosen": -2.1170382499694824,
"logits/rejected": -2.1211585998535156,
"logps/chosen": -18.79464340209961,
"logps/rejected": -23.59588050842285,
"loss": 0.6836,
"rewards/accuracies": 0.703125,
"rewards/chosen": -0.006827862001955509,
"rewards/margins": 0.019631091505289078,
"rewards/rejected": -0.026458950713276863,
"step": 39
},
{
"epoch": 0.3657142857142857,
"grad_norm": 0.6365678310394287,
"learning_rate": 2.1087881751326035e-06,
"logits/chosen": -2.1277003288269043,
"logits/rejected": -2.1313459873199463,
"logps/chosen": -20.50314712524414,
"logps/rejected": -22.63813018798828,
"loss": 0.6812,
"rewards/accuracies": 0.75,
"rewards/chosen": 0.0014799695927649736,
"rewards/margins": 0.024493195116519928,
"rewards/rejected": -0.023013222962617874,
"step": 40
},
{
"epoch": 0.37485714285714283,
"grad_norm": 0.6673828959465027,
"learning_rate": 2.0690286820152535e-06,
"logits/chosen": -2.1289217472076416,
"logits/rejected": -2.131746768951416,
"logps/chosen": -20.128999710083008,
"logps/rejected": -23.057846069335938,
"loss": 0.6726,
"rewards/accuracies": 0.78125,
"rewards/chosen": 0.004846580792218447,
"rewards/margins": 0.042193807661533356,
"rewards/rejected": -0.037347227334976196,
"step": 41
},
{
"epoch": 0.384,
"grad_norm": 0.6725500226020813,
"learning_rate": 2.028796526729806e-06,
"logits/chosen": -2.121724843978882,
"logits/rejected": -2.125291347503662,
"logps/chosen": -19.95975112915039,
"logps/rejected": -24.067777633666992,
"loss": 0.6773,
"rewards/accuracies": 0.6875,
"rewards/chosen": 0.000510699232108891,
"rewards/margins": 0.03263135999441147,
"rewards/rejected": -0.03212066367268562,
"step": 42
},
{
"epoch": 0.3931428571428571,
"grad_norm": 0.7096243500709534,
"learning_rate": 1.9881251280095263e-06,
"logits/chosen": -2.12835693359375,
"logits/rejected": -2.1325571537017822,
"logps/chosen": -19.971481323242188,
"logps/rejected": -24.266193389892578,
"loss": 0.6759,
"rewards/accuracies": 0.8125,
"rewards/chosen": 0.0037878660950809717,
"rewards/margins": 0.03536036238074303,
"rewards/rejected": -0.03157249093055725,
"step": 43
},
{
"epoch": 0.4022857142857143,
"grad_norm": 0.6290874481201172,
"learning_rate": 1.9470482694440755e-06,
"logits/chosen": -2.139394760131836,
"logits/rejected": -2.1419851779937744,
"logps/chosen": -17.84711265563965,
"logps/rejected": -22.699108123779297,
"loss": 0.6802,
"rewards/accuracies": 0.640625,
"rewards/chosen": -0.0038270740769803524,
"rewards/margins": 0.027179870754480362,
"rewards/rejected": -0.031006945297122,
"step": 44
},
{
"epoch": 0.4114285714285714,
"grad_norm": 0.6832275986671448,
"learning_rate": 1.9056000714172617e-06,
"logits/chosen": -2.138123035430908,
"logits/rejected": -2.142123222351074,
"logps/chosen": -19.396350860595703,
"logps/rejected": -22.903085708618164,
"loss": 0.6728,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.000809194054454565,
"rewards/margins": 0.042555954307317734,
"rewards/rejected": -0.04336514696478844,
"step": 45
},
{
"epoch": 0.4205714285714286,
"grad_norm": 0.6851588487625122,
"learning_rate": 1.8638149627650335e-06,
"logits/chosen": -2.1379756927490234,
"logits/rejected": -2.1380934715270996,
"logps/chosen": -21.08904266357422,
"logps/rejected": -23.63918685913086,
"loss": 0.674,
"rewards/accuracies": 0.78125,
"rewards/chosen": 0.001778717152774334,
"rewards/margins": 0.03978656232357025,
"rewards/rejected": -0.03800784423947334,
"step": 46
},
{
"epoch": 0.4297142857142857,
"grad_norm": 0.6948539614677429,
"learning_rate": 1.8217276521772582e-06,
"logits/chosen": -2.1302433013916016,
"logits/rejected": -2.1331663131713867,
"logps/chosen": -20.23948860168457,
"logps/rejected": -23.1295223236084,
"loss": 0.6725,
"rewards/accuracies": 0.765625,
"rewards/chosen": -0.004014923237264156,
"rewards/margins": 0.042658429592847824,
"rewards/rejected": -0.046673357486724854,
"step": 47
},
{
"epoch": 0.43885714285714283,
"grad_norm": 0.6569979190826416,
"learning_rate": 1.7793730993670408e-06,
"logits/chosen": -2.1294007301330566,
"logits/rejected": -2.1324024200439453,
"logps/chosen": -20.591182708740234,
"logps/rejected": -23.661182403564453,
"loss": 0.6775,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.004753855522722006,
"rewards/margins": 0.03247044235467911,
"rewards/rejected": -0.03722430020570755,
"step": 48
},
{
"epoch": 0.448,
"grad_norm": 0.6771251559257507,
"learning_rate": 1.736786486031531e-06,
"logits/chosen": -2.126737117767334,
"logits/rejected": -2.1294384002685547,
"logps/chosen": -20.071245193481445,
"logps/rejected": -22.262264251708984,
"loss": 0.6708,
"rewards/accuracies": 0.703125,
"rewards/chosen": 0.005568951368331909,
"rewards/margins": 0.04644326865673065,
"rewards/rejected": -0.04087432101368904,
"step": 49
},
{
"epoch": 0.45714285714285713,
"grad_norm": 0.6473885774612427,
"learning_rate": 1.6940031866283395e-06,
"logits/chosen": -2.1336934566497803,
"logits/rejected": -2.1349339485168457,
"logps/chosen": -19.305700302124023,
"logps/rejected": -21.597030639648438,
"loss": 0.6798,
"rewards/accuracies": 0.671875,
"rewards/chosen": -0.002087415661662817,
"rewards/margins": 0.02759050950407982,
"rewards/rejected": -0.029677925631403923,
"step": 50
},
{
"epoch": 0.4662857142857143,
"grad_norm": 0.6721633672714233,
"learning_rate": 1.6510587389918377e-06,
"logits/chosen": -2.1234569549560547,
"logits/rejected": -2.1260986328125,
"logps/chosen": -20.71694564819336,
"logps/rejected": -24.932897567749023,
"loss": 0.6813,
"rewards/accuracies": 0.703125,
"rewards/chosen": -0.011087682098150253,
"rewards/margins": 0.024922657757997513,
"rewards/rejected": -0.036010339856147766,
"step": 51
},
{
"epoch": 0.4754285714285714,
"grad_norm": 0.7036443948745728,
"learning_rate": 1.6079888148137507e-06,
"logits/chosen": -2.1245672702789307,
"logits/rejected": -2.1277780532836914,
"logps/chosen": -21.870973587036133,
"logps/rejected": -24.941219329833984,
"loss": 0.6717,
"rewards/accuracies": 0.796875,
"rewards/chosen": -0.0029917103238403797,
"rewards/margins": 0.04497722536325455,
"rewards/rejected": -0.04796893894672394,
"step": 52
},
{
"epoch": 0.4845714285714286,
"grad_norm": 0.68109130859375,
"learning_rate": 1.564829190012561e-06,
"logits/chosen": -2.1461524963378906,
"logits/rejected": -2.1497020721435547,
"logps/chosen": -21.60137176513672,
"logps/rejected": -25.58949089050293,
"loss": 0.6722,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.005693153943866491,
"rewards/margins": 0.043281424790620804,
"rewards/rejected": -0.04897458106279373,
"step": 53
},
{
"epoch": 0.4937142857142857,
"grad_norm": 0.6599106192588806,
"learning_rate": 1.521615715016336e-06,
"logits/chosen": -2.140432357788086,
"logits/rejected": -2.1406521797180176,
"logps/chosen": -20.149822235107422,
"logps/rejected": -22.249767303466797,
"loss": 0.6777,
"rewards/accuracies": 0.734375,
"rewards/chosen": -0.00302593014203012,
"rewards/margins": 0.0317457839846611,
"rewards/rejected": -0.03477171063423157,
"step": 54
},
{
"epoch": 0.5028571428571429,
"grad_norm": 0.7043587565422058,
"learning_rate": 1.4783842849836645e-06,
"logits/chosen": -2.1249215602874756,
"logits/rejected": -2.1282291412353516,
"logps/chosen": -20.65789031982422,
"logps/rejected": -23.718164443969727,
"loss": 0.668,
"rewards/accuracies": 0.765625,
"rewards/chosen": 0.0033555193804204464,
"rewards/margins": 0.05225363373756409,
"rewards/rejected": -0.048898108303546906,
"step": 55
},
{
"epoch": 0.512,
"grad_norm": 0.6670368313789368,
"learning_rate": 1.435170809987439e-06,
"logits/chosen": -2.122504949569702,
"logits/rejected": -2.126192569732666,
"logps/chosen": -20.417633056640625,
"logps/rejected": -24.24279022216797,
"loss": 0.6724,
"rewards/accuracies": 0.703125,
"rewards/chosen": -0.006906700320541859,
"rewards/margins": 0.04406347870826721,
"rewards/rejected": -0.0509701743721962,
"step": 56
},
{
"epoch": 0.5211428571428571,
"grad_norm": 0.6748237013816833,
"learning_rate": 1.3920111851862494e-06,
"logits/chosen": -2.1295788288116455,
"logits/rejected": -2.132110834121704,
"logps/chosen": -20.603960037231445,
"logps/rejected": -23.54561996459961,
"loss": 0.6711,
"rewards/accuracies": 0.765625,
"rewards/chosen": -0.011474862694740295,
"rewards/margins": 0.04592112824320793,
"rewards/rejected": -0.057395994663238525,
"step": 57
},
{
"epoch": 0.5302857142857142,
"grad_norm": 0.6662198901176453,
"learning_rate": 1.3489412610081626e-06,
"logits/chosen": -2.1300594806671143,
"logits/rejected": -2.132218837738037,
"logps/chosen": -20.97345542907715,
"logps/rejected": -24.11111068725586,
"loss": 0.6737,
"rewards/accuracies": 0.703125,
"rewards/chosen": -0.015223701484501362,
"rewards/margins": 0.04077855125069618,
"rewards/rejected": -0.05600225552916527,
"step": 58
},
{
"epoch": 0.5394285714285715,
"grad_norm": 0.6957614421844482,
"learning_rate": 1.3059968133716607e-06,
"logits/chosen": -2.132567882537842,
"logits/rejected": -2.132495880126953,
"logps/chosen": -21.196874618530273,
"logps/rejected": -23.613279342651367,
"loss": 0.6747,
"rewards/accuracies": 0.734375,
"rewards/chosen": -0.008941511623561382,
"rewards/margins": 0.03851575776934624,
"rewards/rejected": -0.047457270324230194,
"step": 59
},
{
"epoch": 0.5485714285714286,
"grad_norm": 0.7162770628929138,
"learning_rate": 1.2632135139684691e-06,
"logits/chosen": -2.1271543502807617,
"logits/rejected": -2.130880117416382,
"logps/chosen": -20.923919677734375,
"logps/rejected": -25.65717315673828,
"loss": 0.6663,
"rewards/accuracies": 0.78125,
"rewards/chosen": 0.009123304858803749,
"rewards/margins": 0.05547190085053444,
"rewards/rejected": -0.04634860157966614,
"step": 60
},
{
"epoch": 0.5577142857142857,
"grad_norm": 0.6612498164176941,
"learning_rate": 1.2206269006329595e-06,
"logits/chosen": -2.116666316986084,
"logits/rejected": -2.1212408542633057,
"logps/chosen": -20.269481658935547,
"logps/rejected": -24.660919189453125,
"loss": 0.6721,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.013498620130121708,
"rewards/margins": 0.0441305935382843,
"rewards/rejected": -0.057629212737083435,
"step": 61
},
{
"epoch": 0.5668571428571428,
"grad_norm": 0.6904810667037964,
"learning_rate": 1.178272347822742e-06,
"logits/chosen": -2.1359639167785645,
"logits/rejected": -2.137200355529785,
"logps/chosen": -21.87899398803711,
"logps/rejected": -22.924833297729492,
"loss": 0.6645,
"rewards/accuracies": 0.65625,
"rewards/chosen": 0.008273976854979992,
"rewards/margins": 0.06065124645829201,
"rewards/rejected": -0.05237726867198944,
"step": 62
},
{
"epoch": 0.576,
"grad_norm": 0.6719346642494202,
"learning_rate": 1.1361850372349668e-06,
"logits/chosen": -2.134481906890869,
"logits/rejected": -2.136564016342163,
"logps/chosen": -20.749956130981445,
"logps/rejected": -24.487869262695312,
"loss": 0.6714,
"rewards/accuracies": 0.78125,
"rewards/chosen": -0.005040324293076992,
"rewards/margins": 0.045264218002557755,
"rewards/rejected": -0.05030454322695732,
"step": 63
},
{
"epoch": 0.5851428571428572,
"grad_norm": 0.6895278692245483,
"learning_rate": 1.0943999285827381e-06,
"logits/chosen": -2.1309783458709717,
"logits/rejected": -2.133222818374634,
"logps/chosen": -21.91169548034668,
"logps/rejected": -23.928085327148438,
"loss": 0.6741,
"rewards/accuracies": 0.671875,
"rewards/chosen": -0.007841155864298344,
"rewards/margins": 0.03996167331933975,
"rewards/rejected": -0.04780282825231552,
"step": 64
},
{
"epoch": 0.5942857142857143,
"grad_norm": 0.6835947036743164,
"learning_rate": 1.0529517305559246e-06,
"logits/chosen": -2.1413941383361816,
"logits/rejected": -2.1449737548828125,
"logps/chosen": -19.570405960083008,
"logps/rejected": -23.024538040161133,
"loss": 0.6683,
"rewards/accuracies": 0.78125,
"rewards/chosen": -0.0007449511904269457,
"rewards/margins": 0.051878269761800766,
"rewards/rejected": -0.05262322351336479,
"step": 65
},
{
"epoch": 0.6034285714285714,
"grad_norm": 0.7086966633796692,
"learning_rate": 1.0118748719904738e-06,
"logits/chosen": -2.1314806938171387,
"logits/rejected": -2.132997512817383,
"logps/chosen": -22.395124435424805,
"logps/rejected": -24.68596839904785,
"loss": 0.6714,
"rewards/accuracies": 0.734375,
"rewards/chosen": -0.004053800366818905,
"rewards/margins": 0.045246005058288574,
"rewards/rejected": -0.049299806356430054,
"step": 66
},
{
"epoch": 0.6125714285714285,
"grad_norm": 0.7053619623184204,
"learning_rate": 9.712034732701943e-07,
"logits/chosen": -2.136747360229492,
"logits/rejected": -2.1409151554107666,
"logps/chosen": -19.340253829956055,
"logps/rejected": -26.333112716674805,
"loss": 0.6711,
"rewards/accuracies": 0.734375,
"rewards/chosen": -0.012234330177307129,
"rewards/margins": 0.04644213989377022,
"rewards/rejected": -0.05867646634578705,
"step": 67
},
{
"epoch": 0.6217142857142857,
"grad_norm": 0.7142196893692017,
"learning_rate": 9.309713179847465e-07,
"logits/chosen": -2.1288576126098633,
"logits/rejected": -2.132416009902954,
"logps/chosen": -21.31295394897461,
"logps/rejected": -24.50257682800293,
"loss": 0.6644,
"rewards/accuracies": 0.765625,
"rewards/chosen": -0.0032868993002921343,
"rewards/margins": 0.05996156856417656,
"rewards/rejected": -0.06324847042560577,
"step": 68
},
{
"epoch": 0.6308571428571429,
"grad_norm": 0.7126966714859009,
"learning_rate": 8.912118248673967e-07,
"logits/chosen": -2.118403434753418,
"logits/rejected": -2.1224937438964844,
"logps/chosen": -20.098617553710938,
"logps/rejected": -24.383617401123047,
"loss": 0.6529,
"rewards/accuracies": 0.8125,
"rewards/chosen": 0.013771215453743935,
"rewards/margins": 0.08454546332359314,
"rewards/rejected": -0.07077424228191376,
"step": 69
},
{
"epoch": 0.64,
"grad_norm": 0.7309445142745972,
"learning_rate": 8.519580200358309e-07,
"logits/chosen": -2.1299290657043457,
"logits/rejected": -2.1309103965759277,
"logps/chosen": -19.261966705322266,
"logps/rejected": -21.874065399169922,
"loss": 0.6644,
"rewards/accuracies": 0.765625,
"rewards/chosen": -0.0036550310906022787,
"rewards/margins": 0.06022891029715538,
"rewards/rejected": -0.0638839453458786,
"step": 70
},
{
"epoch": 0.6491428571428571,
"grad_norm": 0.6933106780052185,
"learning_rate": 8.132425095591e-07,
"logits/chosen": -2.126209259033203,
"logits/rejected": -2.1274337768554688,
"logps/chosen": -21.729415893554688,
"logps/rejected": -23.67267417907715,
"loss": 0.6612,
"rewards/accuracies": 0.796875,
"rewards/chosen": -0.0012235536705702543,
"rewards/margins": 0.066755510866642,
"rewards/rejected": -0.06797906756401062,
"step": 71
},
{
"epoch": 0.6582857142857143,
"grad_norm": 0.6878139972686768,
"learning_rate": 7.750974523734742e-07,
"logits/chosen": -2.120508909225464,
"logits/rejected": -2.1226325035095215,
"logps/chosen": -19.833683013916016,
"logps/rejected": -24.432552337646484,
"loss": 0.6564,
"rewards/accuracies": 0.78125,
"rewards/chosen": -0.005440461914986372,
"rewards/margins": 0.07887633144855499,
"rewards/rejected": -0.08431679010391235,
"step": 72
},
{
"epoch": 0.6674285714285715,
"grad_norm": 0.6848965883255005,
"learning_rate": 7.375545335697085e-07,
"logits/chosen": -2.1300716400146484,
"logits/rejected": -2.1321635246276855,
"logps/chosen": -20.999731063842773,
"logps/rejected": -24.038249969482422,
"loss": 0.662,
"rewards/accuracies": 0.78125,
"rewards/chosen": -0.003927034325897694,
"rewards/margins": 0.06512106209993362,
"rewards/rejected": -0.06904809176921844,
"step": 73
},
{
"epoch": 0.6765714285714286,
"grad_norm": 0.7239253520965576,
"learning_rate": 7.00644938073904e-07,
"logits/chosen": -2.136348247528076,
"logits/rejected": -2.137990951538086,
"logps/chosen": -21.84921646118164,
"logps/rejected": -25.642963409423828,
"loss": 0.6528,
"rewards/accuracies": 0.8125,
"rewards/chosen": 0.001280774362385273,
"rewards/margins": 0.08441222459077835,
"rewards/rejected": -0.08313144743442535,
"step": 74
},
{
"epoch": 0.6857142857142857,
"grad_norm": 0.6780479550361633,
"learning_rate": 6.643993247438348e-07,
"logits/chosen": -2.122738838195801,
"logits/rejected": -2.127403497695923,
"logps/chosen": -19.919052124023438,
"logps/rejected": -23.570457458496094,
"loss": 0.6607,
"rewards/accuracies": 0.8125,
"rewards/chosen": 0.0010643948335200548,
"rewards/margins": 0.06831549108028412,
"rewards/rejected": -0.06725109368562698,
"step": 75
},
{
"epoch": 0.6948571428571428,
"grad_norm": 0.7106800675392151,
"learning_rate": 6.288478009022447e-07,
"logits/chosen": -2.1340596675872803,
"logits/rejected": -2.1375958919525146,
"logps/chosen": -20.532428741455078,
"logps/rejected": -23.923845291137695,
"loss": 0.6577,
"rewards/accuracies": 0.84375,
"rewards/chosen": 0.0032222855370491743,
"rewards/margins": 0.07417334616184235,
"rewards/rejected": -0.07095105946063995,
"step": 76
},
{
"epoch": 0.704,
"grad_norm": 0.6871652007102966,
"learning_rate": 5.940198973282838e-07,
"logits/chosen": -2.1262307167053223,
"logits/rejected": -2.131108283996582,
"logps/chosen": -18.413406372070312,
"logps/rejected": -23.341732025146484,
"loss": 0.6645,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.01665385626256466,
"rewards/margins": 0.060584962368011475,
"rewards/rejected": -0.07723881304264069,
"step": 77
},
{
"epoch": 0.7131428571428572,
"grad_norm": 0.6611953973770142,
"learning_rate": 5.599445437278412e-07,
"logits/chosen": -2.135463237762451,
"logits/rejected": -2.1379756927490234,
"logps/chosen": -18.5693359375,
"logps/rejected": -21.502685546875,
"loss": 0.6567,
"rewards/accuracies": 0.765625,
"rewards/chosen": 0.005435650702565908,
"rewards/margins": 0.0763789713382721,
"rewards/rejected": -0.07094332575798035,
"step": 78
},
{
"epoch": 0.7222857142857143,
"grad_norm": 0.7486832141876221,
"learning_rate": 5.266500447031646e-07,
"logits/chosen": -2.1247940063476562,
"logits/rejected": -2.122842788696289,
"logps/chosen": -21.570556640625,
"logps/rejected": -21.164236068725586,
"loss": 0.6655,
"rewards/accuracies": 0.78125,
"rewards/chosen": -0.01216865424066782,
"rewards/margins": 0.05833979696035385,
"rewards/rejected": -0.0705084502696991,
"step": 79
},
{
"epoch": 0.7314285714285714,
"grad_norm": 0.6928249001502991,
"learning_rate": 4.941640562417138e-07,
"logits/chosen": -2.1150875091552734,
"logits/rejected": -2.1165449619293213,
"logps/chosen": -21.510713577270508,
"logps/rejected": -24.29326057434082,
"loss": 0.657,
"rewards/accuracies": 0.8125,
"rewards/chosen": -0.006187473423779011,
"rewards/margins": 0.07648099958896637,
"rewards/rejected": -0.0826684832572937,
"step": 80
},
{
"epoch": 0.7405714285714285,
"grad_norm": 0.7375283241271973,
"learning_rate": 4.6251356274379226e-07,
"logits/chosen": -2.1273298263549805,
"logits/rejected": -2.129077434539795,
"logps/chosen": -22.487407684326172,
"logps/rejected": -24.35370635986328,
"loss": 0.6587,
"rewards/accuracies": 0.796875,
"rewards/chosen": -0.004263547249138355,
"rewards/margins": 0.0724744200706482,
"rewards/rejected": -0.07673796266317368,
"step": 81
},
{
"epoch": 0.7497142857142857,
"grad_norm": 0.7093409299850464,
"learning_rate": 4.317248546080218e-07,
"logits/chosen": -2.1207175254821777,
"logits/rejected": -2.124617099761963,
"logps/chosen": -19.91944694519043,
"logps/rejected": -25.45476722717285,
"loss": 0.6658,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.011447591707110405,
"rewards/margins": 0.05775396525859833,
"rewards/rejected": -0.06920155882835388,
"step": 82
},
{
"epoch": 0.7588571428571429,
"grad_norm": 0.659227728843689,
"learning_rate": 4.018235063932971e-07,
"logits/chosen": -2.129696846008301,
"logits/rejected": -2.1302928924560547,
"logps/chosen": -19.911724090576172,
"logps/rejected": -21.486099243164062,
"loss": 0.6629,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.015592245385050774,
"rewards/margins": 0.0647309422492981,
"rewards/rejected": -0.08032318949699402,
"step": 83
},
{
"epoch": 0.768,
"grad_norm": 0.6856977343559265,
"learning_rate": 3.7283435557534184e-07,
"logits/chosen": -2.115324020385742,
"logits/rejected": -2.118356704711914,
"logps/chosen": -20.232105255126953,
"logps/rejected": -25.640562057495117,
"loss": 0.6603,
"rewards/accuracies": 0.796875,
"rewards/chosen": -0.0029051026795059443,
"rewards/margins": 0.07027439773082733,
"rewards/rejected": -0.07317950576543808,
"step": 84
},
{
"epoch": 0.7771428571428571,
"grad_norm": 0.9134950637817383,
"learning_rate": 3.447814819155292e-07,
"logits/chosen": -2.1142709255218506,
"logits/rejected": -2.1176223754882812,
"logps/chosen": -22.407655715942383,
"logps/rejected": -25.338939666748047,
"loss": 0.6606,
"rewards/accuracies": 0.765625,
"rewards/chosen": -0.003580818185582757,
"rewards/margins": 0.06903493404388428,
"rewards/rejected": -0.07261575758457184,
"step": 85
},
{
"epoch": 0.7862857142857143,
"grad_norm": 0.8176380395889282,
"learning_rate": 3.1768818745908876e-07,
"logits/chosen": -2.128533363342285,
"logits/rejected": -2.130959987640381,
"logps/chosen": -21.104782104492188,
"logps/rejected": -23.96923065185547,
"loss": 0.6615,
"rewards/accuracies": 0.734375,
"rewards/chosen": -0.01085490919649601,
"rewards/margins": 0.06671939790248871,
"rewards/rejected": -0.07757431268692017,
"step": 86
},
{
"epoch": 0.7954285714285714,
"grad_norm": 0.7210907340049744,
"learning_rate": 2.915769771793256e-07,
"logits/chosen": -2.115241050720215,
"logits/rejected": -2.1181540489196777,
"logps/chosen": -19.796510696411133,
"logps/rejected": -24.347612380981445,
"loss": 0.6599,
"rewards/accuracies": 0.71875,
"rewards/chosen": 0.0054216571152210236,
"rewards/margins": 0.07189285755157471,
"rewards/rejected": -0.06647119671106339,
"step": 87
},
{
"epoch": 0.8045714285714286,
"grad_norm": 0.7129194140434265,
"learning_rate": 2.6646954028392375e-07,
"logits/chosen": -2.1180641651153564,
"logits/rejected": -2.118509531021118,
"logps/chosen": -21.633447647094727,
"logps/rejected": -24.8947811126709,
"loss": 0.6714,
"rewards/accuracies": 0.703125,
"rewards/chosen": -0.018895957618951797,
"rewards/margins": 0.04705498740077019,
"rewards/rejected": -0.06595094501972198,
"step": 88
},
{
"epoch": 0.8137142857142857,
"grad_norm": 0.751731276512146,
"learning_rate": 2.4238673219886385e-07,
"logits/chosen": -2.1097123622894287,
"logits/rejected": -2.1142430305480957,
"logps/chosen": -21.052400588989258,
"logps/rejected": -25.112659454345703,
"loss": 0.6497,
"rewards/accuracies": 0.859375,
"rewards/chosen": 0.006454586982727051,
"rewards/margins": 0.09120012819766998,
"rewards/rejected": -0.08474554121494293,
"step": 89
},
{
"epoch": 0.8228571428571428,
"grad_norm": 0.7012993693351746,
"learning_rate": 2.1934855724491708e-07,
"logits/chosen": -2.108950138092041,
"logits/rejected": -2.1133596897125244,
"logps/chosen": -20.4837646484375,
"logps/rejected": -24.65631866455078,
"loss": 0.6582,
"rewards/accuracies": 0.8125,
"rewards/chosen": -0.00821665208786726,
"rewards/margins": 0.07355347275733948,
"rewards/rejected": -0.08177012950181961,
"step": 90
},
{
"epoch": 0.832,
"grad_norm": 0.7529146075248718,
"learning_rate": 1.9737415202111148e-07,
"logits/chosen": -2.118992805480957,
"logits/rejected": -2.1215620040893555,
"logps/chosen": -21.363391876220703,
"logps/rejected": -26.736839294433594,
"loss": 0.6614,
"rewards/accuracies": 0.765625,
"rewards/chosen": -0.017014339566230774,
"rewards/margins": 0.06742921471595764,
"rewards/rejected": -0.08444354683160782,
"step": 91
},
{
"epoch": 0.8411428571428572,
"grad_norm": 0.6690794229507446,
"learning_rate": 1.764817695089636e-07,
"logits/chosen": -2.1333415508270264,
"logits/rejected": -2.1368355751037598,
"logps/chosen": -18.803630828857422,
"logps/rejected": -24.878826141357422,
"loss": 0.6552,
"rewards/accuracies": 0.78125,
"rewards/chosen": 0.0014820651849731803,
"rewards/margins": 0.08085457980632782,
"rewards/rejected": -0.07937251776456833,
"step": 92
},
{
"epoch": 0.8502857142857143,
"grad_norm": 0.6794025301933289,
"learning_rate": 1.566887639106911e-07,
"logits/chosen": -2.12078595161438,
"logits/rejected": -2.1245594024658203,
"logps/chosen": -20.484092712402344,
"logps/rejected": -24.03988265991211,
"loss": 0.6592,
"rewards/accuracies": 0.703125,
"rewards/chosen": -0.0017876154743134975,
"rewards/margins": 0.07191066443920135,
"rewards/rejected": -0.07369828224182129,
"step": 93
},
{
"epoch": 0.8594285714285714,
"grad_norm": 0.6425016522407532,
"learning_rate": 1.380115762339877e-07,
"logits/chosen": -2.126593589782715,
"logits/rejected": -2.125735282897949,
"logps/chosen": -19.677227020263672,
"logps/rejected": -20.280244827270508,
"loss": 0.6657,
"rewards/accuracies": 0.796875,
"rewards/chosen": -0.014084220863878727,
"rewards/margins": 0.05799565464258194,
"rewards/rejected": -0.07207988202571869,
"step": 94
},
{
"epoch": 0.8685714285714285,
"grad_norm": 0.7096425294876099,
"learning_rate": 1.204657206353459e-07,
"logits/chosen": -2.1364822387695312,
"logits/rejected": -2.1406309604644775,
"logps/chosen": -19.647085189819336,
"logps/rejected": -25.55003547668457,
"loss": 0.6538,
"rewards/accuracies": 0.84375,
"rewards/chosen": 0.0021409899927675724,
"rewards/margins": 0.0829853042960167,
"rewards/rejected": -0.08084432035684586,
"step": 95
},
{
"epoch": 0.8777142857142857,
"grad_norm": 0.806024968624115,
"learning_rate": 1.0406577153326192e-07,
"logits/chosen": -2.12524676322937,
"logits/rejected": -2.128009557723999,
"logps/chosen": -21.744781494140625,
"logps/rejected": -24.876564025878906,
"loss": 0.664,
"rewards/accuracies": 0.796875,
"rewards/chosen": -0.014198469929397106,
"rewards/margins": 0.060674287378787994,
"rewards/rejected": -0.07487276196479797,
"step": 96
},
{
"epoch": 0.8868571428571429,
"grad_norm": 0.6857902407646179,
"learning_rate": 8.882535150203569e-08,
"logits/chosen": -2.128683567047119,
"logits/rejected": -2.1316354274749756,
"logps/chosen": -18.787628173828125,
"logps/rejected": -23.368488311767578,
"loss": 0.6702,
"rewards/accuracies": 0.703125,
"rewards/chosen": -0.024203235283493996,
"rewards/margins": 0.049591515213251114,
"rewards/rejected": -0.07379475235939026,
"step": 97
},
{
"epoch": 0.896,
"grad_norm": 0.6921999454498291,
"learning_rate": 7.475711995621387e-08,
"logits/chosen": -2.1213159561157227,
"logits/rejected": -2.1230628490448,
"logps/chosen": -21.5747127532959,
"logps/rejected": -24.342790603637695,
"loss": 0.6603,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.022949904203414917,
"rewards/margins": 0.0700790211558342,
"rewards/rejected": -0.09302891790866852,
"step": 98
},
{
"epoch": 0.9051428571428571,
"grad_norm": 0.7130371928215027,
"learning_rate": 6.187276263508168e-08,
"logits/chosen": -2.1171250343322754,
"logits/rejected": -2.119697093963623,
"logps/chosen": -21.54928970336914,
"logps/rejected": -25.877193450927734,
"loss": 0.6673,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.009239297360181808,
"rewards/margins": 0.055657997727394104,
"rewards/rejected": -0.06489729136228561,
"step": 99
},
{
"epoch": 0.9142857142857143,
"grad_norm": 0.7120369076728821,
"learning_rate": 5.018298189593368e-08,
"logits/chosen": -2.140258312225342,
"logits/rejected": -2.1442980766296387,
"logps/chosen": -20.42232322692871,
"logps/rejected": -25.16228485107422,
"loss": 0.6531,
"rewards/accuracies": 0.78125,
"rewards/chosen": 0.0013755280524492264,
"rewards/margins": 0.08448025584220886,
"rewards/rejected": -0.08310472220182419,
"step": 100
},
{
"epoch": 0.9234285714285714,
"grad_norm": 0.6894267797470093,
"learning_rate": 3.969748782418991e-08,
"logits/chosen": -2.140516757965088,
"logits/rejected": -2.143148899078369,
"logps/chosen": -20.96661376953125,
"logps/rejected": -24.055721282958984,
"loss": 0.6593,
"rewards/accuracies": 0.796875,
"rewards/chosen": -0.004207253456115723,
"rewards/margins": 0.07119009643793106,
"rewards/rejected": -0.07539734244346619,
"step": 101
},
{
"epoch": 0.9325714285714286,
"grad_norm": 0.7290534377098083,
"learning_rate": 3.042499016773881e-08,
"logits/chosen": -2.132014751434326,
"logits/rejected": -2.1332242488861084,
"logps/chosen": -19.451780319213867,
"logps/rejected": -21.663698196411133,
"loss": 0.6617,
"rewards/accuracies": 0.765625,
"rewards/chosen": -0.00491691380739212,
"rewards/margins": 0.06624950468540192,
"rewards/rejected": -0.07116641849279404,
"step": 102
},
{
"epoch": 0.9417142857142857,
"grad_norm": 0.7135753035545349,
"learning_rate": 2.2373191102207647e-08,
"logits/chosen": -2.1299057006835938,
"logits/rejected": -2.1332318782806396,
"logps/chosen": -19.843944549560547,
"logps/rejected": -24.937877655029297,
"loss": 0.6494,
"rewards/accuracies": 0.8125,
"rewards/chosen": 0.0014770530397072434,
"rewards/margins": 0.09241947531700134,
"rewards/rejected": -0.09094242751598358,
"step": 103
},
{
"epoch": 0.9508571428571428,
"grad_norm": 0.6818587183952332,
"learning_rate": 1.5548778833171463e-08,
"logits/chosen": -2.130626916885376,
"logits/rejected": -2.132134437561035,
"logps/chosen": -21.063608169555664,
"logps/rejected": -22.76825714111328,
"loss": 0.6539,
"rewards/accuracies": 0.828125,
"rewards/chosen": -0.0026874844916164875,
"rewards/margins": 0.0823042243719101,
"rewards/rejected": -0.0849917083978653,
"step": 104
},
{
"epoch": 0.96,
"grad_norm": 0.6808786392211914,
"learning_rate": 9.957422040612507e-09,
"logits/chosen": -2.117967128753662,
"logits/rejected": -2.1230428218841553,
"logps/chosen": -19.646337509155273,
"logps/rejected": -24.299863815307617,
"loss": 0.6662,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.016201000660657883,
"rewards/margins": 0.05751265585422516,
"rewards/rejected": -0.07371365278959274,
"step": 105
},
{
"epoch": 0.9691428571428572,
"grad_norm": 0.6646621227264404,
"learning_rate": 5.6037651702463e-09,
"logits/chosen": -2.1203553676605225,
"logits/rejected": -2.1216330528259277,
"logps/chosen": -20.876747131347656,
"logps/rejected": -23.389541625976562,
"loss": 0.661,
"rewards/accuracies": 0.78125,
"rewards/chosen": -0.013487475924193859,
"rewards/margins": 0.06747360527515411,
"rewards/rejected": -0.08096107840538025,
"step": 106
},
{
"epoch": 0.9782857142857143,
"grad_norm": 0.651802659034729,
"learning_rate": 2.491424575625123e-09,
"logits/chosen": -2.1184940338134766,
"logits/rejected": -2.1203343868255615,
"logps/chosen": -19.80478858947754,
"logps/rejected": -23.504154205322266,
"loss": 0.6654,
"rewards/accuracies": 0.796875,
"rewards/chosen": -0.012541299685835838,
"rewards/margins": 0.058582596480846405,
"rewards/rejected": -0.07112389802932739,
"step": 107
},
{
"epoch": 0.9874285714285714,
"grad_norm": 0.6968909502029419,
"learning_rate": 6.229855142232399e-10,
"logits/chosen": -2.1146626472473145,
"logits/rejected": -2.1161539554595947,
"logps/chosen": -19.548887252807617,
"logps/rejected": -22.608421325683594,
"loss": 0.6522,
"rewards/accuracies": 0.8125,
"rewards/chosen": -0.004665360786020756,
"rewards/margins": 0.08596684038639069,
"rewards/rejected": -0.09063220024108887,
"step": 108
},
{
"epoch": 0.9965714285714286,
"grad_norm": 0.7107558250427246,
"learning_rate": 0.0,
"logits/chosen": -2.1318564414978027,
"logits/rejected": -2.134704351425171,
"logps/chosen": -21.569801330566406,
"logps/rejected": -25.938987731933594,
"loss": 0.6582,
"rewards/accuracies": 0.796875,
"rewards/chosen": -0.003806858789175749,
"rewards/margins": 0.07306241989135742,
"rewards/rejected": -0.07686927914619446,
"step": 109
},
{
"epoch": 0.9965714285714286,
"step": 109,
"total_flos": 2.557771887987917e+18,
"train_loss": 0.6735316744638146,
"train_runtime": 12905.5168,
"train_samples_per_second": 0.542,
"train_steps_per_second": 0.008
}
],
"logging_steps": 1.0,
"max_steps": 109,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.557771887987917e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}