llama3-dpo-llm-judge / training_metrics.json
pyamy's picture
Upload DPO LLM Judge fine-tuned model
5ab83fa verified
[
{
"loss": 0.6931,
"grad_norm": 5.219141483306885,
"learning_rate": 0.00018,
"rewards/chosen": 0.024159394204616547,
"rewards/rejected": 0.0037573135923594236,
"rewards/accuracies": 0.42500001192092896,
"rewards/margins": 0.020402083173394203,
"logps/chosen": -146.83187866210938,
"logps/rejected": -130.77682495117188,
"logits/chosen": -0.2618408203125,
"logits/rejected": -0.405269056558609,
"epoch": 0.5925925925925926,
"step": 10
},
{
"loss": 0.682,
"grad_norm": 6.610161781311035,
"learning_rate": 0.00019333333333333333,
"rewards/chosen": -0.0013131718151271343,
"rewards/rejected": -0.18291252851486206,
"rewards/accuracies": 0.594936728477478,
"rewards/margins": 0.18159937858581543,
"logps/chosen": -144.95765686035156,
"logps/rejected": -133.71145629882812,
"logits/chosen": 0.21812255680561066,
"logits/rejected": 0.1329454481601715,
"epoch": 1.1777777777777778,
"step": 20
},
{
"loss": 0.3351,
"grad_norm": 2.0350332260131836,
"learning_rate": 0.00018500000000000002,
"rewards/chosen": 0.19238564372062683,
"rewards/rejected": -1.004982590675354,
"rewards/accuracies": 0.925000011920929,
"rewards/margins": 1.1973682641983032,
"logps/chosen": -140.00352478027344,
"logps/rejected": -134.99171447753906,
"logits/chosen": 0.40491190552711487,
"logits/rejected": 0.2921258509159088,
"epoch": 1.7703703703703704,
"step": 30
},
{
"loss": 0.1806,
"grad_norm": 2.579374313354492,
"learning_rate": 0.00017666666666666666,
"rewards/chosen": 0.3689553439617157,
"rewards/rejected": -2.125339984893799,
"rewards/accuracies": 0.9620253443717957,
"rewards/margins": 2.494295358657837,
"logps/chosen": -143.1613006591797,
"logps/rejected": -159.1355438232422,
"logits/chosen": 0.11085856705904007,
"logits/rejected": 0.032480403780937195,
"epoch": 2.3555555555555556,
"step": 40
},
{
"loss": 0.1049,
"grad_norm": 0.8431211709976196,
"learning_rate": 0.00016833333333333335,
"rewards/chosen": -0.37566089630126953,
"rewards/rejected": -4.225518226623535,
"rewards/accuracies": 0.9624999761581421,
"rewards/margins": 3.8498573303222656,
"logps/chosen": -146.19427490234375,
"logps/rejected": -168.6130828857422,
"logits/chosen": -0.39661893248558044,
"logits/rejected": -0.4452442228794098,
"epoch": 2.948148148148148,
"step": 50
},
{
"loss": 0.0286,
"grad_norm": 1.5647461414337158,
"learning_rate": 0.00016,
"rewards/chosen": -1.5753328800201416,
"rewards/rejected": -7.724185943603516,
"rewards/accuracies": 0.9873417615890503,
"rewards/margins": 6.148852825164795,
"logps/chosen": -164.2067413330078,
"logps/rejected": -209.5588836669922,
"logits/chosen": -0.9004085659980774,
"logits/rejected": -0.9173569679260254,
"epoch": 3.533333333333333,
"step": 60
},
{
"loss": 0.0204,
"grad_norm": 0.05377896502614021,
"learning_rate": 0.00015166666666666668,
"rewards/chosen": -3.3284900188446045,
"rewards/rejected": -11.170860290527344,
"rewards/accuracies": 0.9873417615890503,
"rewards/margins": 7.842370986938477,
"logps/chosen": -170.9961395263672,
"logps/rejected": -241.1328125,
"logits/chosen": -1.1217529773712158,
"logits/rejected": -1.1465003490447998,
"epoch": 4.118518518518519,
"step": 70
},
{
"loss": 0.0098,
"grad_norm": 0.05912935361266136,
"learning_rate": 0.00014333333333333334,
"rewards/chosen": -5.450322151184082,
"rewards/rejected": -15.317463874816895,
"rewards/accuracies": 0.987500011920929,
"rewards/margins": 9.867142677307129,
"logps/chosen": -200.71902465820312,
"logps/rejected": -283.8058166503906,
"logits/chosen": -1.3752106428146362,
"logits/rejected": -1.3843052387237549,
"epoch": 4.711111111111111,
"step": 80
},
{
"loss": 0.0103,
"grad_norm": 0.08618709444999695,
"learning_rate": 0.00013500000000000003,
"rewards/chosen": -9.06311321258545,
"rewards/rejected": -20.052576065063477,
"rewards/accuracies": 0.9873417615890503,
"rewards/margins": 10.989459991455078,
"logps/chosen": -237.59788513183594,
"logps/rejected": -331.6789245605469,
"logits/chosen": -1.3256553411483765,
"logits/rejected": -1.3671971559524536,
"epoch": 5.296296296296296,
"step": 90
},
{
"loss": 0.0176,
"grad_norm": 0.0007632412016391754,
"learning_rate": 0.00012666666666666666,
"rewards/chosen": -6.595943450927734,
"rewards/rejected": -18.24319839477539,
"rewards/accuracies": 0.9750000238418579,
"rewards/margins": 11.647254943847656,
"logps/chosen": -212.84860229492188,
"logps/rejected": -312.7068786621094,
"logits/chosen": -1.4357213973999023,
"logits/rejected": -1.4478198289871216,
"epoch": 5.888888888888889,
"step": 100
},
{
"loss": 0.0002,
"grad_norm": 0.021229052916169167,
"learning_rate": 0.00011833333333333334,
"rewards/chosen": -7.2799482345581055,
"rewards/rejected": -19.018455505371094,
"rewards/accuracies": 1.0,
"rewards/margins": 11.738507270812988,
"logps/chosen": -217.70120239257812,
"logps/rejected": -321.6396484375,
"logits/chosen": -1.3416528701782227,
"logits/rejected": -1.3565353155136108,
"epoch": 6.474074074074074,
"step": 110
},
{
"loss": 0.0174,
"grad_norm": 0.011705581098794937,
"learning_rate": 0.00011000000000000002,
"rewards/chosen": -6.8897318840026855,
"rewards/rejected": -18.329551696777344,
"rewards/accuracies": 0.9746835231781006,
"rewards/margins": 11.4398193359375,
"logps/chosen": -211.3556365966797,
"logps/rejected": -315.4004211425781,
"logits/chosen": -1.3887122869491577,
"logits/rejected": -1.4279637336730957,
"epoch": 7.059259259259259,
"step": 120
},
{
"loss": 0.0088,
"grad_norm": 0.006066357716917992,
"learning_rate": 0.00010166666666666667,
"rewards/chosen": -7.70062255859375,
"rewards/rejected": -19.32270050048828,
"rewards/accuracies": 0.987500011920929,
"rewards/margins": 11.622076034545898,
"logps/chosen": -218.02810668945312,
"logps/rejected": -321.9827880859375,
"logits/chosen": -1.3698838949203491,
"logits/rejected": -1.379957675933838,
"epoch": 7.651851851851852,
"step": 130
},
{
"loss": 0.0088,
"grad_norm": 0.014796565286815166,
"learning_rate": 9.333333333333334e-05,
"rewards/chosen": -8.16592788696289,
"rewards/rejected": -20.24009132385254,
"rewards/accuracies": 0.9873417615890503,
"rewards/margins": 12.074161529541016,
"logps/chosen": -228.3029022216797,
"logps/rejected": -333.3442687988281,
"logits/chosen": -1.312727689743042,
"logits/rejected": -1.3648468255996704,
"epoch": 8.237037037037037,
"step": 140
},
{
"loss": 0.0174,
"grad_norm": 0.010800166986882687,
"learning_rate": 8.5e-05,
"rewards/chosen": -7.864575386047363,
"rewards/rejected": -20.18320083618164,
"rewards/accuracies": 0.9750000238418579,
"rewards/margins": 12.318623542785645,
"logps/chosen": -219.8230438232422,
"logps/rejected": -329.8297424316406,
"logits/chosen": -1.3495450019836426,
"logits/rejected": -1.400110125541687,
"epoch": 8.829629629629629,
"step": 150
},
{
"loss": 0.0,
"grad_norm": 0.0027752986643463373,
"learning_rate": 7.666666666666667e-05,
"rewards/chosen": -7.922427177429199,
"rewards/rejected": -20.483713150024414,
"rewards/accuracies": 1.0,
"rewards/margins": 12.561285018920898,
"logps/chosen": -236.33514404296875,
"logps/rejected": -339.6175231933594,
"logits/chosen": -1.387330412864685,
"logits/rejected": -1.379171371459961,
"epoch": 9.414814814814815,
"step": 160
},
{
"loss": 0.0174,
"grad_norm": 0.022991616278886795,
"learning_rate": 6.833333333333333e-05,
"rewards/chosen": -8.79272174835205,
"rewards/rejected": -21.603660583496094,
"rewards/accuracies": 0.9746835231781006,
"rewards/margins": 12.81093978881836,
"logps/chosen": -223.5404510498047,
"logps/rejected": -344.2777099609375,
"logits/chosen": -1.332489013671875,
"logits/rejected": -1.4169538021087646,
"epoch": 10.0,
"step": 170
},
{
"loss": 0.0001,
"grad_norm": 0.008427063003182411,
"learning_rate": 6e-05,
"rewards/chosen": -9.925287246704102,
"rewards/rejected": -22.660680770874023,
"rewards/accuracies": 1.0,
"rewards/margins": 12.735391616821289,
"logps/chosen": -246.78573608398438,
"logps/rejected": -358.0868225097656,
"logits/chosen": -1.3674745559692383,
"logits/rejected": -1.393139123916626,
"epoch": 10.592592592592592,
"step": 180
},
{
"loss": 0.0174,
"grad_norm": 0.00592564232647419,
"learning_rate": 5.166666666666667e-05,
"rewards/chosen": -7.4557271003723145,
"rewards/rejected": -20.30694580078125,
"rewards/accuracies": 0.9746835231781006,
"rewards/margins": 12.851216316223145,
"logps/chosen": -212.6167449951172,
"logps/rejected": -328.70428466796875,
"logits/chosen": -1.3256752490997314,
"logits/rejected": -1.3827478885650635,
"epoch": 11.177777777777777,
"step": 190
},
{
"loss": 0.0174,
"grad_norm": 0.0025185132399201393,
"learning_rate": 4.3333333333333334e-05,
"rewards/chosen": -8.556998252868652,
"rewards/rejected": -21.165555953979492,
"rewards/accuracies": 0.9750000238418579,
"rewards/margins": 12.60855770111084,
"logps/chosen": -230.914794921875,
"logps/rejected": -344.1314392089844,
"logits/chosen": -1.3429509401321411,
"logits/rejected": -1.3791346549987793,
"epoch": 11.77037037037037,
"step": 200
},
{
"loss": 0.0087,
"grad_norm": 0.0017514040227979422,
"learning_rate": 3.5e-05,
"rewards/chosen": -10.171927452087402,
"rewards/rejected": -23.150039672851562,
"rewards/accuracies": 0.9873417615890503,
"rewards/margins": 12.97811508178711,
"logps/chosen": -251.9849395751953,
"logps/rejected": -366.4695739746094,
"logits/chosen": -1.3986672163009644,
"logits/rejected": -1.4266730546951294,
"epoch": 12.355555555555556,
"step": 210
},
{
"loss": 0.0087,
"grad_norm": 0.006925302557647228,
"learning_rate": 2.6666666666666667e-05,
"rewards/chosen": -9.249361038208008,
"rewards/rejected": -22.13502311706543,
"rewards/accuracies": 0.987500011920929,
"rewards/margins": 12.885663032531738,
"logps/chosen": -235.34341430664062,
"logps/rejected": -348.89794921875,
"logits/chosen": -1.3144868612289429,
"logits/rejected": -1.3764691352844238,
"epoch": 12.948148148148148,
"step": 220
},
{
"loss": 0.0087,
"grad_norm": 0.007612653076648712,
"learning_rate": 1.8333333333333333e-05,
"rewards/chosen": -9.676132202148438,
"rewards/rejected": -22.771854400634766,
"rewards/accuracies": 0.9873417615890503,
"rewards/margins": 13.095723152160645,
"logps/chosen": -238.63157653808594,
"logps/rejected": -359.6723327636719,
"logits/chosen": -1.3230102062225342,
"logits/rejected": -1.3840538263320923,
"epoch": 13.533333333333333,
"step": 230
},
{
"loss": 0.0087,
"grad_norm": 0.00543614849448204,
"learning_rate": 1e-05,
"rewards/chosen": -9.432672500610352,
"rewards/rejected": -22.142011642456055,
"rewards/accuracies": 0.9873417615890503,
"rewards/margins": 12.709343910217285,
"logps/chosen": -243.76123046875,
"logps/rejected": -351.6514892578125,
"logits/chosen": -1.4037139415740967,
"logits/rejected": -1.439239263534546,
"epoch": 14.118518518518519,
"step": 240
},
{
"loss": 0.0,
"grad_norm": 0.0029208704363554716,
"learning_rate": 1.6666666666666667e-06,
"rewards/chosen": -9.081149101257324,
"rewards/rejected": -22.619098663330078,
"rewards/accuracies": 1.0,
"rewards/margins": 13.537951469421387,
"logps/chosen": -238.7053985595703,
"logps/rejected": -358.72821044921875,
"logits/chosen": -1.3238470554351807,
"logits/rejected": -1.3789857625961304,
"epoch": 14.71111111111111,
"step": 250
},
{
"train_runtime": 1619.2606,
"train_samples_per_second": 1.235,
"train_steps_per_second": 0.154,
"total_flos": 0.0,
"train_loss": 0.08888284659932834,
"epoch": 14.71111111111111,
"step": 250
}
]