DSR1-Qwen-32B-fc-v4 / trainer_state.json
moogician's picture
Upload trainer_state.json with huggingface_hub
8f6c225 verified
raw
history blame
21.2 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 14.884210526315789,
"eval_steps": 500,
"global_step": 119,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.12631578947368421,
"grad_norm": 1.9382071495056152,
"learning_rate": 8.333333333333333e-07,
"loss": 0.5018,
"step": 1
},
{
"epoch": 0.25263157894736843,
"grad_norm": 1.9020211696624756,
"learning_rate": 1.6666666666666667e-06,
"loss": 0.4715,
"step": 2
},
{
"epoch": 0.37894736842105264,
"grad_norm": 2.1491496562957764,
"learning_rate": 2.5e-06,
"loss": 0.5222,
"step": 3
},
{
"epoch": 0.5052631578947369,
"grad_norm": 1.9012116193771362,
"learning_rate": 3.3333333333333333e-06,
"loss": 0.4848,
"step": 4
},
{
"epoch": 0.631578947368421,
"grad_norm": 1.9676955938339233,
"learning_rate": 4.166666666666667e-06,
"loss": 0.4981,
"step": 5
},
{
"epoch": 0.7578947368421053,
"grad_norm": 1.5441391468048096,
"learning_rate": 5e-06,
"loss": 0.468,
"step": 6
},
{
"epoch": 0.8842105263157894,
"grad_norm": 1.542313575744629,
"learning_rate": 5.833333333333334e-06,
"loss": 0.4635,
"step": 7
},
{
"epoch": 1.0,
"grad_norm": 1.542313575744629,
"learning_rate": 6.666666666666667e-06,
"loss": 0.4412,
"step": 8
},
{
"epoch": 1.1263157894736842,
"grad_norm": 1.0980747938156128,
"learning_rate": 7.500000000000001e-06,
"loss": 0.4205,
"step": 9
},
{
"epoch": 1.2526315789473683,
"grad_norm": 0.8775039911270142,
"learning_rate": 8.333333333333334e-06,
"loss": 0.4191,
"step": 10
},
{
"epoch": 1.3789473684210527,
"grad_norm": 0.4762185215950012,
"learning_rate": 9.166666666666666e-06,
"loss": 0.4331,
"step": 11
},
{
"epoch": 1.5052631578947369,
"grad_norm": 1.0445164442062378,
"learning_rate": 1e-05,
"loss": 0.376,
"step": 12
},
{
"epoch": 1.631578947368421,
"grad_norm": 1.0010490417480469,
"learning_rate": 9.997845031134992e-06,
"loss": 0.4098,
"step": 13
},
{
"epoch": 1.7578947368421054,
"grad_norm": 0.9467893838882446,
"learning_rate": 9.991381982096293e-06,
"loss": 0.3882,
"step": 14
},
{
"epoch": 1.8842105263157896,
"grad_norm": 0.7490498423576355,
"learning_rate": 9.98061642395168e-06,
"loss": 0.408,
"step": 15
},
{
"epoch": 2.0,
"grad_norm": 0.8292229175567627,
"learning_rate": 9.965557636478203e-06,
"loss": 0.3526,
"step": 16
},
{
"epoch": 2.126315789473684,
"grad_norm": 0.6992197036743164,
"learning_rate": 9.94621860016312e-06,
"loss": 0.3457,
"step": 17
},
{
"epoch": 2.2526315789473683,
"grad_norm": 0.4710032045841217,
"learning_rate": 9.922615985014887e-06,
"loss": 0.3445,
"step": 18
},
{
"epoch": 2.3789473684210525,
"grad_norm": 0.3899461030960083,
"learning_rate": 9.894770136193814e-06,
"loss": 0.3548,
"step": 19
},
{
"epoch": 2.5052631578947366,
"grad_norm": 0.40362292528152466,
"learning_rate": 9.862705056474795e-06,
"loss": 0.3454,
"step": 20
},
{
"epoch": 2.6315789473684212,
"grad_norm": 0.36370548605918884,
"learning_rate": 9.826448385557208e-06,
"loss": 0.3503,
"step": 21
},
{
"epoch": 2.7578947368421054,
"grad_norm": 0.35196444392204285,
"learning_rate": 9.786031376239842e-06,
"loss": 0.3332,
"step": 22
},
{
"epoch": 2.8842105263157896,
"grad_norm": 0.3312518894672394,
"learning_rate": 9.741488867481377e-06,
"loss": 0.3266,
"step": 23
},
{
"epoch": 3.0,
"grad_norm": 0.3002198338508606,
"learning_rate": 9.692859254369631e-06,
"loss": 0.3333,
"step": 24
},
{
"epoch": 3.126315789473684,
"grad_norm": 0.31604424118995667,
"learning_rate": 9.640184455025472e-06,
"loss": 0.3105,
"step": 25
},
{
"epoch": 3.2526315789473683,
"grad_norm": 0.2710190713405609,
"learning_rate": 9.583509874469924e-06,
"loss": 0.303,
"step": 26
},
{
"epoch": 3.3789473684210525,
"grad_norm": 0.2576391100883484,
"learning_rate": 9.522884365485599e-06,
"loss": 0.3219,
"step": 27
},
{
"epoch": 3.5052631578947366,
"grad_norm": 0.23554575443267822,
"learning_rate": 9.458360186506212e-06,
"loss": 0.292,
"step": 28
},
{
"epoch": 3.6315789473684212,
"grad_norm": 0.22661598026752472,
"learning_rate": 9.389992956570463e-06,
"loss": 0.3079,
"step": 29
},
{
"epoch": 3.7578947368421054,
"grad_norm": 0.238206148147583,
"learning_rate": 9.317841607379106e-06,
"loss": 0.3033,
"step": 30
},
{
"epoch": 3.8842105263157896,
"grad_norm": 0.2077244073152542,
"learning_rate": 9.241968332496576e-06,
"loss": 0.2923,
"step": 31
},
{
"epoch": 4.0,
"grad_norm": 0.21579864621162415,
"learning_rate": 9.162438533740891e-06,
"loss": 0.2884,
"step": 32
},
{
"epoch": 4.126315789473685,
"grad_norm": 0.21424686908721924,
"learning_rate": 9.07932076480812e-06,
"loss": 0.2787,
"step": 33
},
{
"epoch": 4.252631578947368,
"grad_norm": 0.2355010062456131,
"learning_rate": 8.99268667217993e-06,
"loss": 0.2795,
"step": 34
},
{
"epoch": 4.378947368421053,
"grad_norm": 0.19621430337429047,
"learning_rate": 8.90261093336523e-06,
"loss": 0.2726,
"step": 35
},
{
"epoch": 4.505263157894737,
"grad_norm": 0.2064390927553177,
"learning_rate": 8.809171192529074e-06,
"loss": 0.283,
"step": 36
},
{
"epoch": 4.631578947368421,
"grad_norm": 0.22728653252124786,
"learning_rate": 8.712447993564362e-06,
"loss": 0.2521,
"step": 37
},
{
"epoch": 4.757894736842105,
"grad_norm": 0.19322557747364044,
"learning_rate": 8.612524710664012e-06,
"loss": 0.247,
"step": 38
},
{
"epoch": 4.88421052631579,
"grad_norm": 0.1852494776248932,
"learning_rate": 8.509487476453442e-06,
"loss": 0.2585,
"step": 39
},
{
"epoch": 5.0,
"grad_norm": 0.22497323155403137,
"learning_rate": 8.403425107745315e-06,
"loss": 0.2697,
"step": 40
},
{
"epoch": 5.126315789473685,
"grad_norm": 0.23915345966815948,
"learning_rate": 8.294429028980555e-06,
"loss": 0.2322,
"step": 41
},
{
"epoch": 5.252631578947368,
"grad_norm": 0.1770770251750946,
"learning_rate": 8.182593193421625e-06,
"loss": 0.23,
"step": 42
},
{
"epoch": 5.378947368421053,
"grad_norm": 0.1872120201587677,
"learning_rate": 8.06801400216597e-06,
"loss": 0.232,
"step": 43
},
{
"epoch": 5.505263157894737,
"grad_norm": 0.2505616247653961,
"learning_rate": 7.950790221049485e-06,
"loss": 0.2359,
"step": 44
},
{
"epoch": 5.631578947368421,
"grad_norm": 0.19844704866409302,
"learning_rate": 7.831022895511586e-06,
"loss": 0.2451,
"step": 45
},
{
"epoch": 5.757894736842105,
"grad_norm": 0.1715887188911438,
"learning_rate": 7.708815263495307e-06,
"loss": 0.2282,
"step": 46
},
{
"epoch": 5.88421052631579,
"grad_norm": 0.2623206079006195,
"learning_rate": 7.584272666457471e-06,
"loss": 0.224,
"step": 47
},
{
"epoch": 6.0,
"grad_norm": 0.23196087777614594,
"learning_rate": 7.457502458565673e-06,
"loss": 0.2259,
"step": 48
},
{
"epoch": 6.126315789473685,
"grad_norm": 0.2110043466091156,
"learning_rate": 7.328613914160319e-06,
"loss": 0.2162,
"step": 49
},
{
"epoch": 6.252631578947368,
"grad_norm": 0.19842451810836792,
"learning_rate": 7.1977181335615085e-06,
"loss": 0.2009,
"step": 50
},
{
"epoch": 6.378947368421053,
"grad_norm": 0.20280607044696808,
"learning_rate": 7.064927947301942e-06,
"loss": 0.2042,
"step": 51
},
{
"epoch": 6.505263157894737,
"grad_norm": 0.19076986610889435,
"learning_rate": 6.9303578188684085e-06,
"loss": 0.1802,
"step": 52
},
{
"epoch": 6.631578947368421,
"grad_norm": 0.17408154904842377,
"learning_rate": 6.79412374603568e-06,
"loss": 0.1857,
"step": 53
},
{
"epoch": 6.757894736842105,
"grad_norm": 0.22106344997882843,
"learning_rate": 6.65634316087788e-06,
"loss": 0.197,
"step": 54
},
{
"epoch": 6.88421052631579,
"grad_norm": 0.20985056459903717,
"learning_rate": 6.5171348285434965e-06,
"loss": 0.1894,
"step": 55
},
{
"epoch": 7.0,
"grad_norm": 0.1847132444381714,
"learning_rate": 6.3766187448813e-06,
"loss": 0.1941,
"step": 56
},
{
"epoch": 7.126315789473685,
"grad_norm": 0.17527417838573456,
"learning_rate": 6.234916033005421e-06,
"loss": 0.1592,
"step": 57
},
{
"epoch": 7.252631578947368,
"grad_norm": 0.18749035894870758,
"learning_rate": 6.0921488388887315e-06,
"loss": 0.1629,
"step": 58
},
{
"epoch": 7.378947368421053,
"grad_norm": 0.19064860045909882,
"learning_rate": 5.948440226074539e-06,
"loss": 0.1691,
"step": 59
},
{
"epoch": 7.505263157894737,
"grad_norm": 0.1939338892698288,
"learning_rate": 5.803914069597342e-06,
"loss": 0.1469,
"step": 60
},
{
"epoch": 7.631578947368421,
"grad_norm": 0.19160349667072296,
"learning_rate": 5.658694949204094e-06,
"loss": 0.1536,
"step": 61
},
{
"epoch": 7.757894736842105,
"grad_norm": 0.19728262722492218,
"learning_rate": 5.512908041968018e-06,
"loss": 0.1572,
"step": 62
},
{
"epoch": 7.88421052631579,
"grad_norm": 0.4989687502384186,
"learning_rate": 5.36667901438752e-06,
"loss": 0.1571,
"step": 63
},
{
"epoch": 8.0,
"grad_norm": 0.1953885853290558,
"learning_rate": 5.220133914063239e-06,
"loss": 0.1567,
"step": 64
},
{
"epoch": 8.126315789473685,
"grad_norm": 0.2087051123380661,
"learning_rate": 5.073399061046584e-06,
"loss": 0.1345,
"step": 65
},
{
"epoch": 8.25263157894737,
"grad_norm": 0.21436747908592224,
"learning_rate": 4.926600938953418e-06,
"loss": 0.1302,
"step": 66
},
{
"epoch": 8.378947368421052,
"grad_norm": 0.20336535573005676,
"learning_rate": 4.779866085936762e-06,
"loss": 0.128,
"step": 67
},
{
"epoch": 8.505263157894737,
"grad_norm": 0.2767126262187958,
"learning_rate": 4.6333209856124814e-06,
"loss": 0.121,
"step": 68
},
{
"epoch": 8.631578947368421,
"grad_norm": 0.18408919870853424,
"learning_rate": 4.487091958031984e-06,
"loss": 0.1195,
"step": 69
},
{
"epoch": 8.757894736842106,
"grad_norm": 0.2123204469680786,
"learning_rate": 4.341305050795907e-06,
"loss": 0.128,
"step": 70
},
{
"epoch": 8.884210526315789,
"grad_norm": 0.2557663023471832,
"learning_rate": 4.19608593040266e-06,
"loss": 0.12,
"step": 71
},
{
"epoch": 9.0,
"grad_norm": 0.23377615213394165,
"learning_rate": 4.051559773925462e-06,
"loss": 0.1289,
"step": 72
},
{
"epoch": 9.126315789473685,
"grad_norm": 0.26173460483551025,
"learning_rate": 3.907851161111269e-06,
"loss": 0.1108,
"step": 73
},
{
"epoch": 9.25263157894737,
"grad_norm": 0.1853621006011963,
"learning_rate": 3.7650839669945804e-06,
"loss": 0.0998,
"step": 74
},
{
"epoch": 9.378947368421052,
"grad_norm": 0.25105205178260803,
"learning_rate": 3.623381255118702e-06,
"loss": 0.1048,
"step": 75
},
{
"epoch": 9.505263157894737,
"grad_norm": 0.3097490072250366,
"learning_rate": 3.4828651714565056e-06,
"loss": 0.1017,
"step": 76
},
{
"epoch": 9.631578947368421,
"grad_norm": 0.18099191784858704,
"learning_rate": 3.3436568391221215e-06,
"loss": 0.0921,
"step": 77
},
{
"epoch": 9.757894736842106,
"grad_norm": 0.17075076699256897,
"learning_rate": 3.2058762539643214e-06,
"loss": 0.0917,
"step": 78
},
{
"epoch": 9.884210526315789,
"grad_norm": 0.26677173376083374,
"learning_rate": 3.0696421811315923e-06,
"loss": 0.0915,
"step": 79
},
{
"epoch": 10.0,
"grad_norm": 0.24106067419052124,
"learning_rate": 2.9350720526980592e-06,
"loss": 0.0872,
"step": 80
},
{
"epoch": 10.126315789473685,
"grad_norm": 0.24568013846874237,
"learning_rate": 2.8022818664384945e-06,
"loss": 0.0834,
"step": 81
},
{
"epoch": 10.25263157894737,
"grad_norm": 0.16468282043933868,
"learning_rate": 2.671386085839682e-06,
"loss": 0.0797,
"step": 82
},
{
"epoch": 10.378947368421052,
"grad_norm": 0.18845616281032562,
"learning_rate": 2.542497541434329e-06,
"loss": 0.0752,
"step": 83
},
{
"epoch": 10.505263157894737,
"grad_norm": 0.277291476726532,
"learning_rate": 2.4157273335425296e-06,
"loss": 0.076,
"step": 84
},
{
"epoch": 10.631578947368421,
"grad_norm": 0.20707395672798157,
"learning_rate": 2.291184736504695e-06,
"loss": 0.0725,
"step": 85
},
{
"epoch": 10.757894736842106,
"grad_norm": 0.16026121377944946,
"learning_rate": 2.168977104488415e-06,
"loss": 0.0725,
"step": 86
},
{
"epoch": 10.884210526315789,
"grad_norm": 0.18469811975955963,
"learning_rate": 2.049209778950518e-06,
"loss": 0.0753,
"step": 87
},
{
"epoch": 11.0,
"grad_norm": 0.240610271692276,
"learning_rate": 1.9319859978340312e-06,
"loss": 0.0745,
"step": 88
},
{
"epoch": 11.126315789473685,
"grad_norm": 0.2793143689632416,
"learning_rate": 1.8174068065783768e-06,
"loss": 0.0637,
"step": 89
},
{
"epoch": 11.25263157894737,
"grad_norm": 0.21911287307739258,
"learning_rate": 1.7055709710194452e-06,
"loss": 0.0673,
"step": 90
},
{
"epoch": 11.378947368421052,
"grad_norm": 0.15809978544712067,
"learning_rate": 1.5965748922546876e-06,
"loss": 0.0604,
"step": 91
},
{
"epoch": 11.505263157894737,
"grad_norm": 0.18286827206611633,
"learning_rate": 1.490512523546559e-06,
"loss": 0.057,
"step": 92
},
{
"epoch": 11.631578947368421,
"grad_norm": 0.308363676071167,
"learning_rate": 1.38747528933599e-06,
"loss": 0.0639,
"step": 93
},
{
"epoch": 11.757894736842106,
"grad_norm": 0.28524667024612427,
"learning_rate": 1.28755200643564e-06,
"loss": 0.0613,
"step": 94
},
{
"epoch": 11.884210526315789,
"grad_norm": 0.18594607710838318,
"learning_rate": 1.190828807470929e-06,
"loss": 0.0597,
"step": 95
},
{
"epoch": 12.0,
"grad_norm": 0.1585705280303955,
"learning_rate": 1.0973890666347703e-06,
"loss": 0.0542,
"step": 96
},
{
"epoch": 12.126315789473685,
"grad_norm": 0.2194358855485916,
"learning_rate": 1.0073133278200702e-06,
"loss": 0.0534,
"step": 97
},
{
"epoch": 12.25263157894737,
"grad_norm": 0.2553711235523224,
"learning_rate": 9.206792351918809e-07,
"loss": 0.0555,
"step": 98
},
{
"epoch": 12.378947368421052,
"grad_norm": 0.23654919862747192,
"learning_rate": 8.375614662591097e-07,
"loss": 0.0509,
"step": 99
},
{
"epoch": 12.505263157894737,
"grad_norm": 0.22062428295612335,
"learning_rate": 7.580316675034255e-07,
"loss": 0.0547,
"step": 100
},
{
"epoch": 12.631578947368421,
"grad_norm": 0.17180566489696503,
"learning_rate": 6.821583926208947e-07,
"loss": 0.0533,
"step": 101
},
{
"epoch": 12.757894736842106,
"grad_norm": 0.11739594489336014,
"learning_rate": 6.100070434295379e-07,
"loss": 0.0507,
"step": 102
},
{
"epoch": 12.884210526315789,
"grad_norm": 0.14421358704566956,
"learning_rate": 5.416398134937878e-07,
"loss": 0.0469,
"step": 103
},
{
"epoch": 13.0,
"grad_norm": 0.14421358704566956,
"learning_rate": 4.771156345144018e-07,
"loss": 0.0498,
"step": 104
},
{
"epoch": 13.126315789473685,
"grad_norm": 0.38636109232902527,
"learning_rate": 4.1649012553007795e-07,
"loss": 0.049,
"step": 105
},
{
"epoch": 13.25263157894737,
"grad_norm": 0.15430928766727448,
"learning_rate": 3.5981554497452886e-07,
"loss": 0.0467,
"step": 106
},
{
"epoch": 13.378947368421052,
"grad_norm": 0.16978979110717773,
"learning_rate": 3.0714074563037043e-07,
"loss": 0.0432,
"step": 107
},
{
"epoch": 13.505263157894737,
"grad_norm": 0.15888580679893494,
"learning_rate": 2.585111325186235e-07,
"loss": 0.0483,
"step": 108
},
{
"epoch": 13.631578947368421,
"grad_norm": 0.16612868010997772,
"learning_rate": 2.1396862376015904e-07,
"loss": 0.0465,
"step": 109
},
{
"epoch": 13.757894736842106,
"grad_norm": 0.12936703860759735,
"learning_rate": 1.7355161444279346e-07,
"loss": 0.0486,
"step": 110
},
{
"epoch": 13.884210526315789,
"grad_norm": 0.12057878822088242,
"learning_rate": 1.372949435252058e-07,
"loss": 0.0465,
"step": 111
},
{
"epoch": 14.0,
"grad_norm": 0.12300478667020798,
"learning_rate": 1.0522986380618606e-07,
"loss": 0.0491,
"step": 112
},
{
"epoch": 14.126315789473685,
"grad_norm": 0.1282297968864441,
"learning_rate": 7.738401498511406e-08,
"loss": 0.0466,
"step": 113
},
{
"epoch": 14.25263157894737,
"grad_norm": 0.12143048644065857,
"learning_rate": 5.378139983688135e-08,
"loss": 0.0509,
"step": 114
},
{
"epoch": 14.378947368421052,
"grad_norm": 0.11874306946992874,
"learning_rate": 3.444236352179831e-08,
"loss": 0.0479,
"step": 115
},
{
"epoch": 14.505263157894737,
"grad_norm": 0.12550322711467743,
"learning_rate": 1.9383576048320752e-08,
"loss": 0.0445,
"step": 116
},
{
"epoch": 14.631578947368421,
"grad_norm": 0.11551317572593689,
"learning_rate": 8.618017903708198e-09,
"loss": 0.0489,
"step": 117
},
{
"epoch": 14.757894736842106,
"grad_norm": 0.12495766580104828,
"learning_rate": 2.154968865007989e-09,
"loss": 0.0439,
"step": 118
},
{
"epoch": 14.884210526315789,
"grad_norm": 0.10572399199008942,
"learning_rate": 0.0,
"loss": 0.0444,
"step": 119
},
{
"epoch": 14.884210526315789,
"step": 119,
"total_flos": 381420994822144.0,
"train_loss": 0.19363727301609615,
"train_runtime": 21617.7907,
"train_samples_per_second": 0.595,
"train_steps_per_second": 0.006
}
],
"logging_steps": 1,
"max_steps": 119,
"num_input_tokens_seen": 0,
"num_train_epochs": 17,
"save_steps": 32,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 381420994822144.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}