TreeVGR-7B-CI / trainer_state.json
HaochenWang's picture
Upload folder using huggingface_hub
d7b6a17 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9979661016949153,
"eval_steps": 500,
"global_step": 276,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.003615819209039548,
"grad_norm": 14.63072256054265,
"learning_rate": 0.0,
"loss": 1.4293,
"step": 1
},
{
"epoch": 0.007231638418079096,
"grad_norm": 13.703421994615557,
"learning_rate": 1.7857142857142858e-07,
"loss": 1.4193,
"step": 2
},
{
"epoch": 0.010847457627118645,
"grad_norm": 13.716616968172858,
"learning_rate": 3.5714285714285716e-07,
"loss": 1.3691,
"step": 3
},
{
"epoch": 0.014463276836158192,
"grad_norm": 13.81492601698403,
"learning_rate": 5.357142857142857e-07,
"loss": 1.4337,
"step": 4
},
{
"epoch": 0.01807909604519774,
"grad_norm": 13.013807610008277,
"learning_rate": 7.142857142857143e-07,
"loss": 1.335,
"step": 5
},
{
"epoch": 0.02169491525423729,
"grad_norm": 13.316675508543259,
"learning_rate": 8.928571428571429e-07,
"loss": 1.4104,
"step": 6
},
{
"epoch": 0.025310734463276835,
"grad_norm": 12.54293399835329,
"learning_rate": 1.0714285714285714e-06,
"loss": 1.3364,
"step": 7
},
{
"epoch": 0.028926553672316384,
"grad_norm": 11.706383970770567,
"learning_rate": 1.25e-06,
"loss": 1.3289,
"step": 8
},
{
"epoch": 0.03254237288135593,
"grad_norm": 9.135561727877839,
"learning_rate": 1.4285714285714286e-06,
"loss": 1.2395,
"step": 9
},
{
"epoch": 0.03615819209039548,
"grad_norm": 8.24371141549115,
"learning_rate": 1.6071428571428574e-06,
"loss": 1.2597,
"step": 10
},
{
"epoch": 0.03977401129943503,
"grad_norm": 5.641164800490142,
"learning_rate": 1.7857142857142859e-06,
"loss": 1.1323,
"step": 11
},
{
"epoch": 0.04338983050847458,
"grad_norm": 5.154352679322036,
"learning_rate": 1.9642857142857144e-06,
"loss": 1.1555,
"step": 12
},
{
"epoch": 0.04700564971751412,
"grad_norm": 4.902918571287102,
"learning_rate": 2.1428571428571427e-06,
"loss": 1.1208,
"step": 13
},
{
"epoch": 0.05062146892655367,
"grad_norm": 4.651473817511105,
"learning_rate": 2.321428571428572e-06,
"loss": 1.1211,
"step": 14
},
{
"epoch": 0.05423728813559322,
"grad_norm": 7.855915558603105,
"learning_rate": 2.5e-06,
"loss": 1.0921,
"step": 15
},
{
"epoch": 0.05785310734463277,
"grad_norm": 9.766906290977035,
"learning_rate": 2.6785714285714285e-06,
"loss": 1.0519,
"step": 16
},
{
"epoch": 0.061468926553672316,
"grad_norm": 7.745681824751476,
"learning_rate": 2.8571428571428573e-06,
"loss": 1.0559,
"step": 17
},
{
"epoch": 0.06508474576271187,
"grad_norm": 4.81594013217655,
"learning_rate": 3.0357142857142856e-06,
"loss": 1.0629,
"step": 18
},
{
"epoch": 0.06870056497175141,
"grad_norm": 4.091417378072179,
"learning_rate": 3.2142857142857147e-06,
"loss": 1.0244,
"step": 19
},
{
"epoch": 0.07231638418079096,
"grad_norm": 2.8101858455670237,
"learning_rate": 3.3928571428571435e-06,
"loss": 0.9697,
"step": 20
},
{
"epoch": 0.0759322033898305,
"grad_norm": 3.180728091735543,
"learning_rate": 3.5714285714285718e-06,
"loss": 0.9601,
"step": 21
},
{
"epoch": 0.07954802259887006,
"grad_norm": 3.6493251911677658,
"learning_rate": 3.7500000000000005e-06,
"loss": 0.9409,
"step": 22
},
{
"epoch": 0.0831638418079096,
"grad_norm": 3.415956781592975,
"learning_rate": 3.928571428571429e-06,
"loss": 0.9486,
"step": 23
},
{
"epoch": 0.08677966101694916,
"grad_norm": 3.0472462438826,
"learning_rate": 4.107142857142857e-06,
"loss": 0.8874,
"step": 24
},
{
"epoch": 0.0903954802259887,
"grad_norm": 2.264144754905512,
"learning_rate": 4.2857142857142855e-06,
"loss": 0.8994,
"step": 25
},
{
"epoch": 0.09401129943502824,
"grad_norm": 2.191754581281806,
"learning_rate": 4.464285714285715e-06,
"loss": 0.8975,
"step": 26
},
{
"epoch": 0.0976271186440678,
"grad_norm": 2.3668184420746385,
"learning_rate": 4.642857142857144e-06,
"loss": 0.8581,
"step": 27
},
{
"epoch": 0.10124293785310734,
"grad_norm": 2.7515269578252193,
"learning_rate": 4.821428571428572e-06,
"loss": 0.8423,
"step": 28
},
{
"epoch": 0.1048587570621469,
"grad_norm": 2.3735525312849837,
"learning_rate": 5e-06,
"loss": 0.8205,
"step": 29
},
{
"epoch": 0.10847457627118644,
"grad_norm": 2.032163014842967,
"learning_rate": 4.999799414013322e-06,
"loss": 0.8754,
"step": 30
},
{
"epoch": 0.112090395480226,
"grad_norm": 2.0590504400576997,
"learning_rate": 4.999197688241076e-06,
"loss": 0.8267,
"step": 31
},
{
"epoch": 0.11570621468926554,
"grad_norm": 2.092420106610846,
"learning_rate": 4.998194919241471e-06,
"loss": 0.8312,
"step": 32
},
{
"epoch": 0.11932203389830509,
"grad_norm": 1.9381311525751297,
"learning_rate": 4.996791267927632e-06,
"loss": 0.7878,
"step": 33
},
{
"epoch": 0.12293785310734463,
"grad_norm": 1.7374506458276127,
"learning_rate": 4.994986959541788e-06,
"loss": 0.7816,
"step": 34
},
{
"epoch": 0.12655367231638417,
"grad_norm": 2.0807346358096166,
"learning_rate": 4.9927822836191185e-06,
"loss": 0.7818,
"step": 35
},
{
"epoch": 0.13016949152542373,
"grad_norm": 1.73060241181151,
"learning_rate": 4.990177593941303e-06,
"loss": 0.7674,
"step": 36
},
{
"epoch": 0.1337853107344633,
"grad_norm": 1.7060389302256593,
"learning_rate": 4.987173308479738e-06,
"loss": 0.7652,
"step": 37
},
{
"epoch": 0.13740112994350281,
"grad_norm": 1.6025020216781503,
"learning_rate": 4.9837699093284765e-06,
"loss": 0.7457,
"step": 38
},
{
"epoch": 0.14101694915254237,
"grad_norm": 1.7291972266390814,
"learning_rate": 4.9799679426268575e-06,
"loss": 0.7943,
"step": 39
},
{
"epoch": 0.14463276836158193,
"grad_norm": 1.6572877077102874,
"learning_rate": 4.975768018471877e-06,
"loss": 0.7815,
"step": 40
},
{
"epoch": 0.14824858757062148,
"grad_norm": 1.5680833894820443,
"learning_rate": 4.971170810820279e-06,
"loss": 0.7557,
"step": 41
},
{
"epoch": 0.151864406779661,
"grad_norm": 1.4822606907798626,
"learning_rate": 4.966177057380409e-06,
"loss": 0.7561,
"step": 42
},
{
"epoch": 0.15548022598870057,
"grad_norm": 1.6178903613745546,
"learning_rate": 4.960787559493836e-06,
"loss": 0.7474,
"step": 43
},
{
"epoch": 0.15909604519774012,
"grad_norm": 1.5096967013865987,
"learning_rate": 4.955003182006761e-06,
"loss": 0.716,
"step": 44
},
{
"epoch": 0.16271186440677965,
"grad_norm": 1.4671027619246142,
"learning_rate": 4.948824853131237e-06,
"loss": 0.7353,
"step": 45
},
{
"epoch": 0.1663276836158192,
"grad_norm": 1.4778933913112275,
"learning_rate": 4.942253564296217e-06,
"loss": 0.7358,
"step": 46
},
{
"epoch": 0.16994350282485876,
"grad_norm": 1.4672254672280391,
"learning_rate": 4.935290369988468e-06,
"loss": 0.7419,
"step": 47
},
{
"epoch": 0.17355932203389832,
"grad_norm": 1.4415543659264245,
"learning_rate": 4.927936387583348e-06,
"loss": 0.748,
"step": 48
},
{
"epoch": 0.17717514124293784,
"grad_norm": 1.3940953807201077,
"learning_rate": 4.920192797165511e-06,
"loss": 0.7571,
"step": 49
},
{
"epoch": 0.1807909604519774,
"grad_norm": 1.4519222988783331,
"learning_rate": 4.912060841339536e-06,
"loss": 0.7127,
"step": 50
},
{
"epoch": 0.18440677966101696,
"grad_norm": 1.4865547512367276,
"learning_rate": 4.9035418250305314e-06,
"loss": 0.7272,
"step": 51
},
{
"epoch": 0.18802259887005648,
"grad_norm": 1.5008090646455723,
"learning_rate": 4.894637115274728e-06,
"loss": 0.7258,
"step": 52
},
{
"epoch": 0.19163841807909604,
"grad_norm": 1.7037112279423843,
"learning_rate": 4.8853481410001225e-06,
"loss": 0.7316,
"step": 53
},
{
"epoch": 0.1952542372881356,
"grad_norm": 1.440963552161302,
"learning_rate": 4.875676392797169e-06,
"loss": 0.7551,
"step": 54
},
{
"epoch": 0.19887005649717515,
"grad_norm": 1.440064113753515,
"learning_rate": 4.865623422679593e-06,
"loss": 0.7446,
"step": 55
},
{
"epoch": 0.20248587570621468,
"grad_norm": 1.6503815942075044,
"learning_rate": 4.855190843835338e-06,
"loss": 0.6955,
"step": 56
},
{
"epoch": 0.20610169491525424,
"grad_norm": 1.3995241998208656,
"learning_rate": 4.844380330367701e-06,
"loss": 0.7214,
"step": 57
},
{
"epoch": 0.2097175141242938,
"grad_norm": 1.461439825228087,
"learning_rate": 4.833193617026692e-06,
"loss": 0.7386,
"step": 58
},
{
"epoch": 0.21333333333333335,
"grad_norm": 1.5840510779057535,
"learning_rate": 4.821632498930656e-06,
"loss": 0.7156,
"step": 59
},
{
"epoch": 0.21694915254237288,
"grad_norm": 1.5720479925852917,
"learning_rate": 4.809698831278217e-06,
"loss": 0.6961,
"step": 60
},
{
"epoch": 0.22056497175141243,
"grad_norm": 1.6408543211624975,
"learning_rate": 4.797394529050577e-06,
"loss": 0.7223,
"step": 61
},
{
"epoch": 0.224180790960452,
"grad_norm": 1.4636770967664614,
"learning_rate": 4.784721566704217e-06,
"loss": 0.7157,
"step": 62
},
{
"epoch": 0.22779661016949151,
"grad_norm": 1.5059556334177817,
"learning_rate": 4.771681977854062e-06,
"loss": 0.7091,
"step": 63
},
{
"epoch": 0.23141242937853107,
"grad_norm": 1.3659765286811016,
"learning_rate": 4.75827785494715e-06,
"loss": 0.7012,
"step": 64
},
{
"epoch": 0.23502824858757063,
"grad_norm": 1.418860606361076,
"learning_rate": 4.744511348926855e-06,
"loss": 0.7124,
"step": 65
},
{
"epoch": 0.23864406779661018,
"grad_norm": 1.4260454819088881,
"learning_rate": 4.730384668887731e-06,
"loss": 0.7215,
"step": 66
},
{
"epoch": 0.2422598870056497,
"grad_norm": 1.5391993842680236,
"learning_rate": 4.715900081721021e-06,
"loss": 0.6946,
"step": 67
},
{
"epoch": 0.24587570621468927,
"grad_norm": 1.552630241762705,
"learning_rate": 4.7010599117508936e-06,
"loss": 0.7109,
"step": 68
},
{
"epoch": 0.24949152542372882,
"grad_norm": 1.5270739656846004,
"learning_rate": 4.685866540361456e-06,
"loss": 0.7092,
"step": 69
},
{
"epoch": 0.25310734463276835,
"grad_norm": 1.5114873681601728,
"learning_rate": 4.670322405614621e-06,
"loss": 0.7055,
"step": 70
},
{
"epoch": 0.25672316384180793,
"grad_norm": 1.5299853438436042,
"learning_rate": 4.654430001858874e-06,
"loss": 0.6878,
"step": 71
},
{
"epoch": 0.26033898305084746,
"grad_norm": 1.445451157672883,
"learning_rate": 4.638191879329005e-06,
"loss": 0.7222,
"step": 72
},
{
"epoch": 0.263954802259887,
"grad_norm": 1.4221665529233392,
"learning_rate": 4.621610643736878e-06,
"loss": 0.7237,
"step": 73
},
{
"epoch": 0.2675706214689266,
"grad_norm": 1.469857395185097,
"learning_rate": 4.6046889558532925e-06,
"loss": 0.6966,
"step": 74
},
{
"epoch": 0.2711864406779661,
"grad_norm": 1.5144096217089977,
"learning_rate": 4.587429531081019e-06,
"loss": 0.7018,
"step": 75
},
{
"epoch": 0.27480225988700563,
"grad_norm": 1.3933861614255332,
"learning_rate": 4.569835139019054e-06,
"loss": 0.6548,
"step": 76
},
{
"epoch": 0.2784180790960452,
"grad_norm": 1.7309884671892724,
"learning_rate": 4.551908603018191e-06,
"loss": 0.6976,
"step": 77
},
{
"epoch": 0.28203389830508474,
"grad_norm": 1.5328106326816477,
"learning_rate": 4.53365279972796e-06,
"loss": 0.693,
"step": 78
},
{
"epoch": 0.28564971751412427,
"grad_norm": 1.5098349501089556,
"learning_rate": 4.515070658635013e-06,
"loss": 0.697,
"step": 79
},
{
"epoch": 0.28926553672316385,
"grad_norm": 1.7412535656694954,
"learning_rate": 4.4961651615930344e-06,
"loss": 0.7115,
"step": 80
},
{
"epoch": 0.2928813559322034,
"grad_norm": 1.5076941472793814,
"learning_rate": 4.476939342344246e-06,
"loss": 0.7163,
"step": 81
},
{
"epoch": 0.29649717514124296,
"grad_norm": 1.4730053806555845,
"learning_rate": 4.457396286032589e-06,
"loss": 0.6886,
"step": 82
},
{
"epoch": 0.3001129943502825,
"grad_norm": 1.4855157413144469,
"learning_rate": 4.437539128708647e-06,
"loss": 0.7339,
"step": 83
},
{
"epoch": 0.303728813559322,
"grad_norm": 1.5405150766486688,
"learning_rate": 4.417371056826417e-06,
"loss": 0.7149,
"step": 84
},
{
"epoch": 0.3073446327683616,
"grad_norm": 1.5508769466195937,
"learning_rate": 4.396895306731978e-06,
"loss": 0.6822,
"step": 85
},
{
"epoch": 0.31096045197740113,
"grad_norm": 1.4588960420622106,
"learning_rate": 4.376115164144157e-06,
"loss": 0.6836,
"step": 86
},
{
"epoch": 0.31457627118644066,
"grad_norm": 1.6627413334009173,
"learning_rate": 4.355033963627277e-06,
"loss": 0.7131,
"step": 87
},
{
"epoch": 0.31819209039548024,
"grad_norm": 1.5553517341256553,
"learning_rate": 4.333655088056065e-06,
"loss": 0.6854,
"step": 88
},
{
"epoch": 0.32180790960451977,
"grad_norm": 1.6684230900983013,
"learning_rate": 4.3119819680728e-06,
"loss": 0.7094,
"step": 89
},
{
"epoch": 0.3254237288135593,
"grad_norm": 1.3726285594917855,
"learning_rate": 4.290018081536807e-06,
"loss": 0.6872,
"step": 90
},
{
"epoch": 0.3290395480225989,
"grad_norm": 1.380724539985608,
"learning_rate": 4.267766952966369e-06,
"loss": 0.7139,
"step": 91
},
{
"epoch": 0.3326553672316384,
"grad_norm": 1.5348446771711333,
"learning_rate": 4.245232152973148e-06,
"loss": 0.6778,
"step": 92
},
{
"epoch": 0.336271186440678,
"grad_norm": 1.5229418737475282,
"learning_rate": 4.222417297689217e-06,
"loss": 0.6689,
"step": 93
},
{
"epoch": 0.3398870056497175,
"grad_norm": 1.5070456041355724,
"learning_rate": 4.199326048186783e-06,
"loss": 0.6894,
"step": 94
},
{
"epoch": 0.34350282485875705,
"grad_norm": 1.3616666744687627,
"learning_rate": 4.175962109890697e-06,
"loss": 0.6554,
"step": 95
},
{
"epoch": 0.34711864406779663,
"grad_norm": 1.5391506984706091,
"learning_rate": 4.152329231983852e-06,
"loss": 0.7023,
"step": 96
},
{
"epoch": 0.35073446327683616,
"grad_norm": 1.4241158917993244,
"learning_rate": 4.128431206805556e-06,
"loss": 0.7107,
"step": 97
},
{
"epoch": 0.3543502824858757,
"grad_norm": 1.4631603034598988,
"learning_rate": 4.104271869242975e-06,
"loss": 0.6894,
"step": 98
},
{
"epoch": 0.3579661016949153,
"grad_norm": 1.4094826388778878,
"learning_rate": 4.07985509611576e-06,
"loss": 0.677,
"step": 99
},
{
"epoch": 0.3615819209039548,
"grad_norm": 1.4740189681153695,
"learning_rate": 4.0551848055539345e-06,
"loss": 0.6699,
"step": 100
},
{
"epoch": 0.36519774011299433,
"grad_norm": 1.4525217649150708,
"learning_rate": 4.030264956369158e-06,
"loss": 0.7368,
"step": 101
},
{
"epoch": 0.3688135593220339,
"grad_norm": 1.451460518174865,
"learning_rate": 4.005099547419458e-06,
"loss": 0.7034,
"step": 102
},
{
"epoch": 0.37242937853107344,
"grad_norm": 1.5038951264951017,
"learning_rate": 3.979692616967543e-06,
"loss": 0.6837,
"step": 103
},
{
"epoch": 0.37604519774011297,
"grad_norm": 1.4157220265249935,
"learning_rate": 3.9540482420327845e-06,
"loss": 0.6875,
"step": 104
},
{
"epoch": 0.37966101694915255,
"grad_norm": 1.4737148597287204,
"learning_rate": 3.9281705377369814e-06,
"loss": 0.6901,
"step": 105
},
{
"epoch": 0.3832768361581921,
"grad_norm": 1.4677149428122727,
"learning_rate": 3.902063656644012e-06,
"loss": 0.6792,
"step": 106
},
{
"epoch": 0.38689265536723166,
"grad_norm": 1.3434999781262245,
"learning_rate": 3.875731788093478e-06,
"loss": 0.6876,
"step": 107
},
{
"epoch": 0.3905084745762712,
"grad_norm": 1.4987089091978376,
"learning_rate": 3.84917915752845e-06,
"loss": 0.6761,
"step": 108
},
{
"epoch": 0.3941242937853107,
"grad_norm": 1.508794866255814,
"learning_rate": 3.8224100258174066e-06,
"loss": 0.6838,
"step": 109
},
{
"epoch": 0.3977401129943503,
"grad_norm": 1.4688146101109116,
"learning_rate": 3.795428688570505e-06,
"loss": 0.684,
"step": 110
},
{
"epoch": 0.40135593220338983,
"grad_norm": 1.4606514616212791,
"learning_rate": 3.7682394754502687e-06,
"loss": 0.6824,
"step": 111
},
{
"epoch": 0.40497175141242936,
"grad_norm": 1.4096709625800348,
"learning_rate": 3.7408467494768104e-06,
"loss": 0.6969,
"step": 112
},
{
"epoch": 0.40858757062146894,
"grad_norm": 1.418524365345472,
"learning_rate": 3.7132549063277033e-06,
"loss": 0.7097,
"step": 113
},
{
"epoch": 0.41220338983050847,
"grad_norm": 1.5410281672091906,
"learning_rate": 3.685468373632613e-06,
"loss": 0.6746,
"step": 114
},
{
"epoch": 0.415819209039548,
"grad_norm": 1.3641313015018903,
"learning_rate": 3.657491610262802e-06,
"loss": 0.6448,
"step": 115
},
{
"epoch": 0.4194350282485876,
"grad_norm": 1.3041765820835833,
"learning_rate": 3.6293291056156178e-06,
"loss": 0.6819,
"step": 116
},
{
"epoch": 0.4230508474576271,
"grad_norm": 1.506905844856063,
"learning_rate": 3.600985378894086e-06,
"loss": 0.6876,
"step": 117
},
{
"epoch": 0.4266666666666667,
"grad_norm": 1.376689012221553,
"learning_rate": 3.572464978381719e-06,
"loss": 0.684,
"step": 118
},
{
"epoch": 0.4302824858757062,
"grad_norm": 1.340240336011346,
"learning_rate": 3.5437724807126583e-06,
"loss": 0.6505,
"step": 119
},
{
"epoch": 0.43389830508474575,
"grad_norm": 1.325670474835959,
"learning_rate": 3.514912490137268e-06,
"loss": 0.6357,
"step": 120
},
{
"epoch": 0.43751412429378533,
"grad_norm": 1.2967281789427187,
"learning_rate": 3.4858896377832966e-06,
"loss": 0.6716,
"step": 121
},
{
"epoch": 0.44112994350282486,
"grad_norm": 1.4289334076188702,
"learning_rate": 3.4567085809127247e-06,
"loss": 0.6749,
"step": 122
},
{
"epoch": 0.4447457627118644,
"grad_norm": 1.4006278853440177,
"learning_rate": 3.42737400217442e-06,
"loss": 0.6675,
"step": 123
},
{
"epoch": 0.448361581920904,
"grad_norm": 1.4903303746050145,
"learning_rate": 3.397890608852718e-06,
"loss": 0.6795,
"step": 124
},
{
"epoch": 0.4519774011299435,
"grad_norm": 1.5237327914091412,
"learning_rate": 3.3682631321120507e-06,
"loss": 0.6834,
"step": 125
},
{
"epoch": 0.45559322033898303,
"grad_norm": 1.375637272974929,
"learning_rate": 3.3384963262377434e-06,
"loss": 0.6546,
"step": 126
},
{
"epoch": 0.4592090395480226,
"grad_norm": 1.4010303116849099,
"learning_rate": 3.3085949678730953e-06,
"loss": 0.6687,
"step": 127
},
{
"epoch": 0.46282485875706214,
"grad_norm": 1.3723912301345371,
"learning_rate": 3.278563855252885e-06,
"loss": 0.6927,
"step": 128
},
{
"epoch": 0.46644067796610167,
"grad_norm": 1.4580226139492987,
"learning_rate": 3.248407807433396e-06,
"loss": 0.6843,
"step": 129
},
{
"epoch": 0.47005649717514125,
"grad_norm": 1.5657029406507326,
"learning_rate": 3.2181316635191125e-06,
"loss": 0.6639,
"step": 130
},
{
"epoch": 0.4736723163841808,
"grad_norm": 1.5943610613148829,
"learning_rate": 3.1877402818861954e-06,
"loss": 0.6655,
"step": 131
},
{
"epoch": 0.47728813559322036,
"grad_norm": 1.398551732301804,
"learning_rate": 3.157238539402862e-06,
"loss": 0.6648,
"step": 132
},
{
"epoch": 0.4809039548022599,
"grad_norm": 1.3527863119261647,
"learning_rate": 3.1266313306468018e-06,
"loss": 0.6793,
"step": 133
},
{
"epoch": 0.4845197740112994,
"grad_norm": 1.4133737666494006,
"learning_rate": 3.095923567119748e-06,
"loss": 0.6808,
"step": 134
},
{
"epoch": 0.488135593220339,
"grad_norm": 1.3489274410441074,
"learning_rate": 3.0651201764593375e-06,
"loss": 0.669,
"step": 135
},
{
"epoch": 0.49175141242937853,
"grad_norm": 1.4710077216567483,
"learning_rate": 3.034226101648377e-06,
"loss": 0.6685,
"step": 136
},
{
"epoch": 0.49536723163841806,
"grad_norm": 1.4143201023235143,
"learning_rate": 3.0032463002216504e-06,
"loss": 0.6803,
"step": 137
},
{
"epoch": 0.49898305084745764,
"grad_norm": 1.350434140774409,
"learning_rate": 2.972185743470386e-06,
"loss": 0.6293,
"step": 138
},
{
"epoch": 0.5025988700564972,
"grad_norm": 1.4061918089975518,
"learning_rate": 2.941049415644522e-06,
"loss": 0.6981,
"step": 139
},
{
"epoch": 0.5062146892655367,
"grad_norm": 1.4466820101061297,
"learning_rate": 2.909842313152888e-06,
"loss": 0.6738,
"step": 140
},
{
"epoch": 0.5098305084745762,
"grad_norm": 1.5124850873525673,
"learning_rate": 2.878569443761442e-06,
"loss": 0.7131,
"step": 141
},
{
"epoch": 0.5134463276836159,
"grad_norm": 1.4743009883750753,
"learning_rate": 2.847235825789673e-06,
"loss": 0.7016,
"step": 142
},
{
"epoch": 0.5170621468926554,
"grad_norm": 1.3586041525935764,
"learning_rate": 2.8158464873053236e-06,
"loss": 0.6724,
"step": 143
},
{
"epoch": 0.5206779661016949,
"grad_norm": 1.4996529158631906,
"learning_rate": 2.784406465317538e-06,
"loss": 0.6662,
"step": 144
},
{
"epoch": 0.5242937853107345,
"grad_norm": 1.4671966292049852,
"learning_rate": 2.752920804968581e-06,
"loss": 0.6631,
"step": 145
},
{
"epoch": 0.527909604519774,
"grad_norm": 1.444812582502839,
"learning_rate": 2.7213945587242507e-06,
"loss": 0.6513,
"step": 146
},
{
"epoch": 0.5315254237288135,
"grad_norm": 1.3053540444757412,
"learning_rate": 2.689832785563116e-06,
"loss": 0.6555,
"step": 147
},
{
"epoch": 0.5351412429378531,
"grad_norm": 1.314006962084444,
"learning_rate": 2.658240550164704e-06,
"loss": 0.6661,
"step": 148
},
{
"epoch": 0.5387570621468927,
"grad_norm": 1.4304673510029906,
"learning_rate": 2.626622922096782e-06,
"loss": 0.6621,
"step": 149
},
{
"epoch": 0.5423728813559322,
"grad_norm": 1.4876534124839516,
"learning_rate": 2.5949849750018486e-06,
"loss": 0.6758,
"step": 150
},
{
"epoch": 0.5459887005649717,
"grad_norm": 1.3200607589115334,
"learning_rate": 2.56333178578297e-06,
"loss": 0.6559,
"step": 151
},
{
"epoch": 0.5496045197740113,
"grad_norm": 1.3240990359086642,
"learning_rate": 2.5316684337891005e-06,
"loss": 0.6232,
"step": 152
},
{
"epoch": 0.5532203389830509,
"grad_norm": 1.3124913285203368,
"learning_rate": 2.5e-06,
"loss": 0.6373,
"step": 153
},
{
"epoch": 0.5568361581920904,
"grad_norm": 1.3981996964149215,
"learning_rate": 2.4683315662109003e-06,
"loss": 0.6779,
"step": 154
},
{
"epoch": 0.56045197740113,
"grad_norm": 1.3816310911971024,
"learning_rate": 2.436668214217031e-06,
"loss": 0.654,
"step": 155
},
{
"epoch": 0.5640677966101695,
"grad_norm": 1.263049809743652,
"learning_rate": 2.4050150249981522e-06,
"loss": 0.6625,
"step": 156
},
{
"epoch": 0.567683615819209,
"grad_norm": 1.3247706606665524,
"learning_rate": 2.3733770779032185e-06,
"loss": 0.6862,
"step": 157
},
{
"epoch": 0.5712994350282485,
"grad_norm": 1.3384592393063528,
"learning_rate": 2.341759449835297e-06,
"loss": 0.669,
"step": 158
},
{
"epoch": 0.5749152542372882,
"grad_norm": 1.3084651079974374,
"learning_rate": 2.310167214436885e-06,
"loss": 0.6389,
"step": 159
},
{
"epoch": 0.5785310734463277,
"grad_norm": 1.3374680724124108,
"learning_rate": 2.27860544127575e-06,
"loss": 0.6472,
"step": 160
},
{
"epoch": 0.5821468926553672,
"grad_norm": 1.3122640789722633,
"learning_rate": 2.24707919503142e-06,
"loss": 0.6579,
"step": 161
},
{
"epoch": 0.5857627118644068,
"grad_norm": 1.4412496554625216,
"learning_rate": 2.2155935346824634e-06,
"loss": 0.6481,
"step": 162
},
{
"epoch": 0.5893785310734463,
"grad_norm": 1.3863338838498946,
"learning_rate": 2.1841535126946777e-06,
"loss": 0.6535,
"step": 163
},
{
"epoch": 0.5929943502824859,
"grad_norm": 1.345502076046215,
"learning_rate": 2.1527641742103282e-06,
"loss": 0.6707,
"step": 164
},
{
"epoch": 0.5966101694915255,
"grad_norm": 1.3847319032734033,
"learning_rate": 2.1214305562385592e-06,
"loss": 0.6663,
"step": 165
},
{
"epoch": 0.600225988700565,
"grad_norm": 1.430665603476504,
"learning_rate": 2.0901576868471125e-06,
"loss": 0.6747,
"step": 166
},
{
"epoch": 0.6038418079096045,
"grad_norm": 1.353261054674096,
"learning_rate": 2.05895058435548e-06,
"loss": 0.6512,
"step": 167
},
{
"epoch": 0.607457627118644,
"grad_norm": 1.2859249411363938,
"learning_rate": 2.0278142565296153e-06,
"loss": 0.6324,
"step": 168
},
{
"epoch": 0.6110734463276836,
"grad_norm": 1.4013926433738113,
"learning_rate": 1.9967536997783495e-06,
"loss": 0.6679,
"step": 169
},
{
"epoch": 0.6146892655367232,
"grad_norm": 1.3862043238014647,
"learning_rate": 1.9657738983516227e-06,
"loss": 0.6729,
"step": 170
},
{
"epoch": 0.6183050847457627,
"grad_norm": 1.3586507239524463,
"learning_rate": 1.934879823540663e-06,
"loss": 0.6493,
"step": 171
},
{
"epoch": 0.6219209039548023,
"grad_norm": 1.3297630157555045,
"learning_rate": 1.9040764328802523e-06,
"loss": 0.6398,
"step": 172
},
{
"epoch": 0.6255367231638418,
"grad_norm": 1.3817085459115725,
"learning_rate": 1.8733686693531986e-06,
"loss": 0.6582,
"step": 173
},
{
"epoch": 0.6291525423728813,
"grad_norm": 1.3202548993972594,
"learning_rate": 1.842761460597138e-06,
"loss": 0.6532,
"step": 174
},
{
"epoch": 0.632768361581921,
"grad_norm": 1.3288961390749972,
"learning_rate": 1.812259718113805e-06,
"loss": 0.6603,
"step": 175
},
{
"epoch": 0.6363841807909605,
"grad_norm": 1.329943461477084,
"learning_rate": 1.7818683364808883e-06,
"loss": 0.658,
"step": 176
},
{
"epoch": 0.64,
"grad_norm": 1.3692273444745175,
"learning_rate": 1.7515921925666053e-06,
"loss": 0.6317,
"step": 177
},
{
"epoch": 0.6436158192090395,
"grad_norm": 1.421706203526152,
"learning_rate": 1.7214361447471156e-06,
"loss": 0.677,
"step": 178
},
{
"epoch": 0.6472316384180791,
"grad_norm": 1.3083895534561967,
"learning_rate": 1.6914050321269049e-06,
"loss": 0.6736,
"step": 179
},
{
"epoch": 0.6508474576271186,
"grad_norm": 1.3408157323699283,
"learning_rate": 1.6615036737622574e-06,
"loss": 0.6802,
"step": 180
},
{
"epoch": 0.6544632768361582,
"grad_norm": 1.2866436449644132,
"learning_rate": 1.6317368678879497e-06,
"loss": 0.646,
"step": 181
},
{
"epoch": 0.6580790960451978,
"grad_norm": 1.4469309073814418,
"learning_rate": 1.6021093911472825e-06,
"loss": 0.6502,
"step": 182
},
{
"epoch": 0.6616949152542373,
"grad_norm": 1.3285438490415578,
"learning_rate": 1.572625997825581e-06,
"loss": 0.6392,
"step": 183
},
{
"epoch": 0.6653107344632768,
"grad_norm": 1.3767595914556963,
"learning_rate": 1.5432914190872757e-06,
"loss": 0.6478,
"step": 184
},
{
"epoch": 0.6689265536723163,
"grad_norm": 1.3913230479527472,
"learning_rate": 1.5141103622167042e-06,
"loss": 0.6624,
"step": 185
},
{
"epoch": 0.672542372881356,
"grad_norm": 1.3717018869154762,
"learning_rate": 1.4850875098627326e-06,
"loss": 0.6519,
"step": 186
},
{
"epoch": 0.6761581920903955,
"grad_norm": 1.3411020703523342,
"learning_rate": 1.456227519287343e-06,
"loss": 0.6382,
"step": 187
},
{
"epoch": 0.679774011299435,
"grad_norm": 1.238291654237968,
"learning_rate": 1.4275350216182824e-06,
"loss": 0.6391,
"step": 188
},
{
"epoch": 0.6833898305084746,
"grad_norm": 1.374517850534095,
"learning_rate": 1.3990146211059141e-06,
"loss": 0.6456,
"step": 189
},
{
"epoch": 0.6870056497175141,
"grad_norm": 1.306148052181935,
"learning_rate": 1.3706708943843822e-06,
"loss": 0.6441,
"step": 190
},
{
"epoch": 0.6906214689265536,
"grad_norm": 1.3876372946236282,
"learning_rate": 1.3425083897371983e-06,
"loss": 0.6603,
"step": 191
},
{
"epoch": 0.6942372881355933,
"grad_norm": 1.3904204488306329,
"learning_rate": 1.3145316263673874e-06,
"loss": 0.6721,
"step": 192
},
{
"epoch": 0.6978531073446328,
"grad_norm": 1.510808405530817,
"learning_rate": 1.286745093672298e-06,
"loss": 0.649,
"step": 193
},
{
"epoch": 0.7014689265536723,
"grad_norm": 1.4312145193748698,
"learning_rate": 1.2591532505231906e-06,
"loss": 0.6573,
"step": 194
},
{
"epoch": 0.7050847457627119,
"grad_norm": 1.4024105046257231,
"learning_rate": 1.2317605245497324e-06,
"loss": 0.6727,
"step": 195
},
{
"epoch": 0.7087005649717514,
"grad_norm": 1.3505688502581619,
"learning_rate": 1.204571311429496e-06,
"loss": 0.6131,
"step": 196
},
{
"epoch": 0.7123163841807909,
"grad_norm": 1.319472329102849,
"learning_rate": 1.1775899741825947e-06,
"loss": 0.6434,
"step": 197
},
{
"epoch": 0.7159322033898305,
"grad_norm": 1.5152192403656248,
"learning_rate": 1.1508208424715511e-06,
"loss": 0.656,
"step": 198
},
{
"epoch": 0.7195480225988701,
"grad_norm": 1.6136474853206006,
"learning_rate": 1.1242682119065217e-06,
"loss": 0.6613,
"step": 199
},
{
"epoch": 0.7231638418079096,
"grad_norm": 1.312425015581362,
"learning_rate": 1.0979363433559892e-06,
"loss": 0.6577,
"step": 200
},
{
"epoch": 0.7267796610169491,
"grad_norm": 1.3953598075891687,
"learning_rate": 1.0718294622630188e-06,
"loss": 0.6905,
"step": 201
},
{
"epoch": 0.7303954802259887,
"grad_norm": 1.372903007290825,
"learning_rate": 1.045951757967215e-06,
"loss": 0.6448,
"step": 202
},
{
"epoch": 0.7340112994350283,
"grad_norm": 1.4652703276389691,
"learning_rate": 1.0203073830324566e-06,
"loss": 0.6395,
"step": 203
},
{
"epoch": 0.7376271186440678,
"grad_norm": 1.366422271732463,
"learning_rate": 9.949004525805423e-07,
"loss": 0.6148,
"step": 204
},
{
"epoch": 0.7412429378531074,
"grad_norm": 1.2886663538886012,
"learning_rate": 9.697350436308428e-07,
"loss": 0.6322,
"step": 205
},
{
"epoch": 0.7448587570621469,
"grad_norm": 1.3574509449302405,
"learning_rate": 9.448151944460657e-07,
"loss": 0.6835,
"step": 206
},
{
"epoch": 0.7484745762711864,
"grad_norm": 1.393654049362733,
"learning_rate": 9.201449038842403e-07,
"loss": 0.6713,
"step": 207
},
{
"epoch": 0.7520903954802259,
"grad_norm": 1.3962653237548226,
"learning_rate": 8.957281307570254e-07,
"loss": 0.6349,
"step": 208
},
{
"epoch": 0.7557062146892656,
"grad_norm": 1.2716349996486196,
"learning_rate": 8.71568793194445e-07,
"loss": 0.6395,
"step": 209
},
{
"epoch": 0.7593220338983051,
"grad_norm": 1.3461355380403557,
"learning_rate": 8.476707680161486e-07,
"loss": 0.6566,
"step": 210
},
{
"epoch": 0.7629378531073446,
"grad_norm": 1.3005123962225364,
"learning_rate": 8.240378901093035e-07,
"loss": 0.6498,
"step": 211
},
{
"epoch": 0.7665536723163842,
"grad_norm": 1.2709444859864987,
"learning_rate": 8.006739518132179e-07,
"loss": 0.6702,
"step": 212
},
{
"epoch": 0.7701694915254237,
"grad_norm": 1.3327493213768535,
"learning_rate": 7.775827023107835e-07,
"loss": 0.6403,
"step": 213
},
{
"epoch": 0.7737853107344633,
"grad_norm": 1.4047092975707265,
"learning_rate": 7.547678470268526e-07,
"loss": 0.6492,
"step": 214
},
{
"epoch": 0.7774011299435029,
"grad_norm": 1.383631976222337,
"learning_rate": 7.322330470336314e-07,
"loss": 0.6289,
"step": 215
},
{
"epoch": 0.7810169491525424,
"grad_norm": 1.2567546465681303,
"learning_rate": 7.099819184631929e-07,
"loss": 0.6393,
"step": 216
},
{
"epoch": 0.7846327683615819,
"grad_norm": 1.2354895441269147,
"learning_rate": 6.880180319272006e-07,
"loss": 0.6429,
"step": 217
},
{
"epoch": 0.7882485875706214,
"grad_norm": 1.3478465932161185,
"learning_rate": 6.663449119439358e-07,
"loss": 0.6652,
"step": 218
},
{
"epoch": 0.791864406779661,
"grad_norm": 1.3094969549541247,
"learning_rate": 6.449660363727236e-07,
"loss": 0.6424,
"step": 219
},
{
"epoch": 0.7954802259887006,
"grad_norm": 1.385053278123551,
"learning_rate": 6.238848358558439e-07,
"loss": 0.6409,
"step": 220
},
{
"epoch": 0.7990960451977401,
"grad_norm": 1.3349840769877994,
"learning_rate": 6.031046932680229e-07,
"loss": 0.6815,
"step": 221
},
{
"epoch": 0.8027118644067797,
"grad_norm": 1.4435164246937722,
"learning_rate": 5.826289431735832e-07,
"loss": 0.6489,
"step": 222
},
{
"epoch": 0.8063276836158192,
"grad_norm": 1.3356172924249725,
"learning_rate": 5.624608712913531e-07,
"loss": 0.6298,
"step": 223
},
{
"epoch": 0.8099435028248587,
"grad_norm": 1.2977048398488058,
"learning_rate": 5.426037139674117e-07,
"loss": 0.6509,
"step": 224
},
{
"epoch": 0.8135593220338984,
"grad_norm": 1.3774919529523362,
"learning_rate": 5.23060657655754e-07,
"loss": 0.6573,
"step": 225
},
{
"epoch": 0.8171751412429379,
"grad_norm": 1.3410009004205188,
"learning_rate": 5.038348384069663e-07,
"loss": 0.633,
"step": 226
},
{
"epoch": 0.8207909604519774,
"grad_norm": 1.2928727452886148,
"learning_rate": 4.84929341364988e-07,
"loss": 0.6754,
"step": 227
},
{
"epoch": 0.8244067796610169,
"grad_norm": 1.358332904803305,
"learning_rate": 4.6634720027204093e-07,
"loss": 0.6614,
"step": 228
},
{
"epoch": 0.8280225988700565,
"grad_norm": 1.4907573288328078,
"learning_rate": 4.480913969818099e-07,
"loss": 0.6281,
"step": 229
},
{
"epoch": 0.831638418079096,
"grad_norm": 1.3392158203262607,
"learning_rate": 4.3016486098094667e-07,
"loss": 0.6161,
"step": 230
},
{
"epoch": 0.8352542372881356,
"grad_norm": 1.2444012140753318,
"learning_rate": 4.125704689189819e-07,
"loss": 0.6247,
"step": 231
},
{
"epoch": 0.8388700564971752,
"grad_norm": 1.4784447467052926,
"learning_rate": 3.953110441467073e-07,
"loss": 0.6586,
"step": 232
},
{
"epoch": 0.8424858757062147,
"grad_norm": 1.366997421968513,
"learning_rate": 3.7838935626312246e-07,
"loss": 0.6463,
"step": 233
},
{
"epoch": 0.8461016949152542,
"grad_norm": 1.4541952821449111,
"learning_rate": 3.6180812067099477e-07,
"loss": 0.6726,
"step": 234
},
{
"epoch": 0.8497175141242937,
"grad_norm": 1.3494835181921292,
"learning_rate": 3.455699981411259e-07,
"loss": 0.6378,
"step": 235
},
{
"epoch": 0.8533333333333334,
"grad_norm": 1.3091535029497192,
"learning_rate": 3.296775943853789e-07,
"loss": 0.6381,
"step": 236
},
{
"epoch": 0.8569491525423729,
"grad_norm": 1.292004503291086,
"learning_rate": 3.141334596385448e-07,
"loss": 0.6361,
"step": 237
},
{
"epoch": 0.8605649717514124,
"grad_norm": 1.3072359435785652,
"learning_rate": 2.9894008824910726e-07,
"loss": 0.6311,
"step": 238
},
{
"epoch": 0.864180790960452,
"grad_norm": 1.3418443898914727,
"learning_rate": 2.840999182789797e-07,
"loss": 0.6584,
"step": 239
},
{
"epoch": 0.8677966101694915,
"grad_norm": 1.3790407833555425,
"learning_rate": 2.696153311122704e-07,
"loss": 0.6275,
"step": 240
},
{
"epoch": 0.871412429378531,
"grad_norm": 1.3668643411134043,
"learning_rate": 2.5548865107314606e-07,
"loss": 0.6574,
"step": 241
},
{
"epoch": 0.8750282485875707,
"grad_norm": 1.267748779732102,
"learning_rate": 2.4172214505285006e-07,
"loss": 0.6394,
"step": 242
},
{
"epoch": 0.8786440677966102,
"grad_norm": 1.2912726516960031,
"learning_rate": 2.2831802214593774e-07,
"loss": 0.6352,
"step": 243
},
{
"epoch": 0.8822598870056497,
"grad_norm": 1.3408535402209096,
"learning_rate": 2.1527843329578328e-07,
"loss": 0.6332,
"step": 244
},
{
"epoch": 0.8858757062146893,
"grad_norm": 1.369964120309017,
"learning_rate": 2.026054709494235e-07,
"loss": 0.6488,
"step": 245
},
{
"epoch": 0.8894915254237288,
"grad_norm": 1.2985394908560597,
"learning_rate": 1.9030116872178317e-07,
"loss": 0.6268,
"step": 246
},
{
"epoch": 0.8931073446327683,
"grad_norm": 1.2695253299944342,
"learning_rate": 1.7836750106934475e-07,
"loss": 0.6098,
"step": 247
},
{
"epoch": 0.896723163841808,
"grad_norm": 1.2614680998276686,
"learning_rate": 1.6680638297330854e-07,
"loss": 0.6328,
"step": 248
},
{
"epoch": 0.9003389830508475,
"grad_norm": 1.3022416023187458,
"learning_rate": 1.5561966963229925e-07,
"loss": 0.6353,
"step": 249
},
{
"epoch": 0.903954802259887,
"grad_norm": 1.3192179782937912,
"learning_rate": 1.448091561646628e-07,
"loss": 0.6212,
"step": 250
},
{
"epoch": 0.9075706214689265,
"grad_norm": 1.391821357807215,
"learning_rate": 1.3437657732040783e-07,
"loss": 0.6581,
"step": 251
},
{
"epoch": 0.9111864406779661,
"grad_norm": 1.348097897319632,
"learning_rate": 1.243236072028317e-07,
"loss": 0.6483,
"step": 252
},
{
"epoch": 0.9148022598870057,
"grad_norm": 1.3327973766461398,
"learning_rate": 1.1465185899987797e-07,
"loss": 0.6688,
"step": 253
},
{
"epoch": 0.9184180790960452,
"grad_norm": 1.3050936769777537,
"learning_rate": 1.0536288472527162e-07,
"loss": 0.6702,
"step": 254
},
{
"epoch": 0.9220338983050848,
"grad_norm": 1.3816378424721178,
"learning_rate": 9.645817496946902e-08,
"loss": 0.663,
"step": 255
},
{
"epoch": 0.9256497175141243,
"grad_norm": 1.3585454766173655,
"learning_rate": 8.79391586604636e-08,
"loss": 0.64,
"step": 256
},
{
"epoch": 0.9292655367231638,
"grad_norm": 1.3292531553316056,
"learning_rate": 7.980720283448957e-08,
"loss": 0.6335,
"step": 257
},
{
"epoch": 0.9328813559322033,
"grad_norm": 1.2599441985317357,
"learning_rate": 7.206361241665266e-08,
"loss": 0.6403,
"step": 258
},
{
"epoch": 0.936497175141243,
"grad_norm": 1.3958906990596642,
"learning_rate": 6.470963001153268e-08,
"loss": 0.6383,
"step": 259
},
{
"epoch": 0.9401129943502825,
"grad_norm": 1.295975911169058,
"learning_rate": 5.774643570378296e-08,
"loss": 0.6485,
"step": 260
},
{
"epoch": 0.943728813559322,
"grad_norm": 1.247115424184442,
"learning_rate": 5.117514686876379e-08,
"loss": 0.6479,
"step": 261
},
{
"epoch": 0.9473446327683616,
"grad_norm": 1.2578135030925344,
"learning_rate": 4.4996817993239464e-08,
"loss": 0.6363,
"step": 262
},
{
"epoch": 0.9509604519774011,
"grad_norm": 1.2567862444318008,
"learning_rate": 3.9212440506164465e-08,
"loss": 0.6417,
"step": 263
},
{
"epoch": 0.9545762711864407,
"grad_norm": 1.2972287074987505,
"learning_rate": 3.382294261959157e-08,
"loss": 0.676,
"step": 264
},
{
"epoch": 0.9581920903954803,
"grad_norm": 1.3028064182497767,
"learning_rate": 2.8829189179721552e-08,
"loss": 0.6502,
"step": 265
},
{
"epoch": 0.9618079096045198,
"grad_norm": 1.3401154575841365,
"learning_rate": 2.423198152812306e-08,
"loss": 0.6271,
"step": 266
},
{
"epoch": 0.9654237288135593,
"grad_norm": 1.332097876741386,
"learning_rate": 2.0032057373142453e-08,
"loss": 0.6312,
"step": 267
},
{
"epoch": 0.9690395480225988,
"grad_norm": 1.2752118722459802,
"learning_rate": 1.6230090671524312e-08,
"loss": 0.6313,
"step": 268
},
{
"epoch": 0.9726553672316384,
"grad_norm": 1.315897104629199,
"learning_rate": 1.2826691520262114e-08,
"loss": 0.6328,
"step": 269
},
{
"epoch": 0.976271186440678,
"grad_norm": 1.341142823473401,
"learning_rate": 9.822406058697665e-09,
"loss": 0.6247,
"step": 270
},
{
"epoch": 0.9798870056497175,
"grad_norm": 1.3751891651375663,
"learning_rate": 7.217716380881479e-09,
"loss": 0.6185,
"step": 271
},
{
"epoch": 0.9835028248587571,
"grad_norm": 1.2993917620171413,
"learning_rate": 5.0130404582127144e-09,
"loss": 0.6308,
"step": 272
},
{
"epoch": 0.9871186440677966,
"grad_norm": 1.306024529686539,
"learning_rate": 3.208732072368104e-09,
"loss": 0.6383,
"step": 273
},
{
"epoch": 0.9907344632768361,
"grad_norm": 1.3213672912672132,
"learning_rate": 1.8050807585293095e-09,
"loss": 0.6559,
"step": 274
},
{
"epoch": 0.9943502824858758,
"grad_norm": 1.3174616601564904,
"learning_rate": 8.023117589237017e-10,
"loss": 0.6701,
"step": 275
},
{
"epoch": 0.9979661016949153,
"grad_norm": 1.3624163578590245,
"learning_rate": 2.0058598667854755e-10,
"loss": 0.6445,
"step": 276
},
{
"epoch": 0.9979661016949153,
"step": 276,
"total_flos": 100810460790784.0,
"train_loss": 0.7222011056931122,
"train_runtime": 4200.972,
"train_samples_per_second": 8.426,
"train_steps_per_second": 0.066
}
],
"logging_steps": 1,
"max_steps": 276,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 100810460790784.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}