{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999359426045737, "eval_steps": 500, "global_step": 7805, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 1.7584967613220215, "learning_rate": 4.2553191489361707e-08, "loss": 0.7626, "step": 1 }, { "epoch": 0.0, "grad_norm": 6.582927227020264, "learning_rate": 8.510638297872341e-08, "loss": 0.951, "step": 2 }, { "epoch": 0.0, "grad_norm": 8.378379821777344, "learning_rate": 1.276595744680851e-07, "loss": 0.9451, "step": 3 }, { "epoch": 0.0, "grad_norm": 9.346311569213867, "learning_rate": 1.7021276595744683e-07, "loss": 1.0475, "step": 4 }, { "epoch": 0.0, "grad_norm": 6.422606945037842, "learning_rate": 2.1276595744680852e-07, "loss": 0.8773, "step": 5 }, { "epoch": 0.0, "grad_norm": 13.563347816467285, "learning_rate": 2.553191489361702e-07, "loss": 0.8474, "step": 6 }, { "epoch": 0.0, "grad_norm": 6.2637224197387695, "learning_rate": 2.9787234042553196e-07, "loss": 0.9445, "step": 7 }, { "epoch": 0.0, "grad_norm": 7.488187313079834, "learning_rate": 3.4042553191489365e-07, "loss": 1.0331, "step": 8 }, { "epoch": 0.0, "grad_norm": 9.318852424621582, "learning_rate": 3.8297872340425535e-07, "loss": 1.0007, "step": 9 }, { "epoch": 0.0, "grad_norm": 8.960872650146484, "learning_rate": 4.2553191489361704e-07, "loss": 0.9706, "step": 10 }, { "epoch": 0.0, "grad_norm": 7.051608562469482, "learning_rate": 4.6808510638297873e-07, "loss": 1.0274, "step": 11 }, { "epoch": 0.0, "grad_norm": 5.044897556304932, "learning_rate": 5.106382978723404e-07, "loss": 0.9102, "step": 12 }, { "epoch": 0.0, "grad_norm": 5.335330009460449, "learning_rate": 5.531914893617021e-07, "loss": 1.0717, "step": 13 }, { "epoch": 0.0, "grad_norm": 5.157832145690918, "learning_rate": 5.957446808510639e-07, "loss": 0.9263, "step": 14 }, { "epoch": 0.0, "grad_norm": 5.903883934020996, "learning_rate": 6.382978723404255e-07, "loss": 1.1029, "step": 15 }, { "epoch": 0.0, "grad_norm": 3.430100679397583, "learning_rate": 6.808510638297873e-07, "loss": 0.8821, "step": 16 }, { "epoch": 0.0, "grad_norm": 4.4080023765563965, "learning_rate": 7.234042553191489e-07, "loss": 0.9419, "step": 17 }, { "epoch": 0.0, "grad_norm": 3.959429979324341, "learning_rate": 7.659574468085107e-07, "loss": 0.922, "step": 18 }, { "epoch": 0.0, "grad_norm": 3.123316764831543, "learning_rate": 8.085106382978725e-07, "loss": 0.7857, "step": 19 }, { "epoch": 0.0, "grad_norm": 3.357064723968506, "learning_rate": 8.510638297872341e-07, "loss": 0.8643, "step": 20 }, { "epoch": 0.0, "grad_norm": 3.7573935985565186, "learning_rate": 8.936170212765959e-07, "loss": 0.9179, "step": 21 }, { "epoch": 0.0, "grad_norm": 6.969594478607178, "learning_rate": 9.361702127659575e-07, "loss": 0.8299, "step": 22 }, { "epoch": 0.0, "grad_norm": 2.6956169605255127, "learning_rate": 9.787234042553193e-07, "loss": 0.7627, "step": 23 }, { "epoch": 0.0, "grad_norm": 3.103651523590088, "learning_rate": 1.0212765957446809e-06, "loss": 0.8709, "step": 24 }, { "epoch": 0.0, "grad_norm": 2.5962374210357666, "learning_rate": 1.0638297872340427e-06, "loss": 0.6788, "step": 25 }, { "epoch": 0.0, "grad_norm": 3.108245849609375, "learning_rate": 1.1063829787234042e-06, "loss": 0.8235, "step": 26 }, { "epoch": 0.0, "grad_norm": 2.832303762435913, "learning_rate": 1.148936170212766e-06, "loss": 0.8102, "step": 27 }, { "epoch": 0.0, "grad_norm": 3.666902780532837, "learning_rate": 1.1914893617021278e-06, "loss": 0.7508, "step": 28 }, { "epoch": 0.0, "grad_norm": 2.241725206375122, "learning_rate": 1.2340425531914894e-06, "loss": 0.8194, "step": 29 }, { "epoch": 0.0, "grad_norm": 2.413132905960083, "learning_rate": 1.276595744680851e-06, "loss": 0.8131, "step": 30 }, { "epoch": 0.0, "grad_norm": 1.7967190742492676, "learning_rate": 1.3191489361702128e-06, "loss": 0.766, "step": 31 }, { "epoch": 0.0, "grad_norm": 1.8691911697387695, "learning_rate": 1.3617021276595746e-06, "loss": 0.8, "step": 32 }, { "epoch": 0.0, "grad_norm": 1.7689558267593384, "learning_rate": 1.4042553191489364e-06, "loss": 0.8018, "step": 33 }, { "epoch": 0.0, "grad_norm": 1.7399364709854126, "learning_rate": 1.4468085106382978e-06, "loss": 0.5927, "step": 34 }, { "epoch": 0.0, "grad_norm": 1.6319252252578735, "learning_rate": 1.4893617021276596e-06, "loss": 0.6501, "step": 35 }, { "epoch": 0.0, "grad_norm": 1.570933222770691, "learning_rate": 1.5319148936170214e-06, "loss": 0.7304, "step": 36 }, { "epoch": 0.0, "grad_norm": 1.6395325660705566, "learning_rate": 1.5744680851063832e-06, "loss": 0.5934, "step": 37 }, { "epoch": 0.0, "grad_norm": 1.7731611728668213, "learning_rate": 1.617021276595745e-06, "loss": 0.775, "step": 38 }, { "epoch": 0.0, "grad_norm": 1.7520402669906616, "learning_rate": 1.6595744680851064e-06, "loss": 0.6107, "step": 39 }, { "epoch": 0.01, "grad_norm": 1.782818078994751, "learning_rate": 1.7021276595744682e-06, "loss": 0.7435, "step": 40 }, { "epoch": 0.01, "grad_norm": 1.6297168731689453, "learning_rate": 1.74468085106383e-06, "loss": 0.738, "step": 41 }, { "epoch": 0.01, "grad_norm": 1.5183852910995483, "learning_rate": 1.7872340425531918e-06, "loss": 0.7191, "step": 42 }, { "epoch": 0.01, "grad_norm": 2.116650104522705, "learning_rate": 1.8297872340425531e-06, "loss": 0.7662, "step": 43 }, { "epoch": 0.01, "grad_norm": 1.5137921571731567, "learning_rate": 1.872340425531915e-06, "loss": 0.7391, "step": 44 }, { "epoch": 0.01, "grad_norm": 1.1828657388687134, "learning_rate": 1.9148936170212767e-06, "loss": 0.7095, "step": 45 }, { "epoch": 0.01, "grad_norm": 1.526063084602356, "learning_rate": 1.9574468085106385e-06, "loss": 0.7774, "step": 46 }, { "epoch": 0.01, "grad_norm": 1.8120944499969482, "learning_rate": 2.0000000000000003e-06, "loss": 0.6718, "step": 47 }, { "epoch": 0.01, "grad_norm": 1.4887276887893677, "learning_rate": 2.0425531914893617e-06, "loss": 0.6486, "step": 48 }, { "epoch": 0.01, "grad_norm": 1.4536372423171997, "learning_rate": 2.0851063829787235e-06, "loss": 0.7203, "step": 49 }, { "epoch": 0.01, "grad_norm": 1.6725234985351562, "learning_rate": 2.1276595744680853e-06, "loss": 0.6735, "step": 50 }, { "epoch": 0.01, "grad_norm": 1.526537537574768, "learning_rate": 2.170212765957447e-06, "loss": 0.6525, "step": 51 }, { "epoch": 0.01, "grad_norm": 3.7510359287261963, "learning_rate": 2.2127659574468085e-06, "loss": 0.5954, "step": 52 }, { "epoch": 0.01, "grad_norm": 1.2007803916931152, "learning_rate": 2.2553191489361703e-06, "loss": 0.7316, "step": 53 }, { "epoch": 0.01, "grad_norm": 1.4945361614227295, "learning_rate": 2.297872340425532e-06, "loss": 0.7241, "step": 54 }, { "epoch": 0.01, "grad_norm": 1.3967207670211792, "learning_rate": 2.340425531914894e-06, "loss": 0.6345, "step": 55 }, { "epoch": 0.01, "grad_norm": 1.3292012214660645, "learning_rate": 2.3829787234042557e-06, "loss": 0.7134, "step": 56 }, { "epoch": 0.01, "grad_norm": 1.5092567205429077, "learning_rate": 2.425531914893617e-06, "loss": 0.6689, "step": 57 }, { "epoch": 0.01, "grad_norm": 1.9081007242202759, "learning_rate": 2.468085106382979e-06, "loss": 0.8589, "step": 58 }, { "epoch": 0.01, "grad_norm": 1.3839454650878906, "learning_rate": 2.5106382978723402e-06, "loss": 0.6063, "step": 59 }, { "epoch": 0.01, "grad_norm": 1.3715382814407349, "learning_rate": 2.553191489361702e-06, "loss": 0.5894, "step": 60 }, { "epoch": 0.01, "grad_norm": 1.2574067115783691, "learning_rate": 2.595744680851064e-06, "loss": 0.6662, "step": 61 }, { "epoch": 0.01, "grad_norm": 1.4591063261032104, "learning_rate": 2.6382978723404256e-06, "loss": 0.6227, "step": 62 }, { "epoch": 0.01, "grad_norm": 2.287207841873169, "learning_rate": 2.6808510638297874e-06, "loss": 0.639, "step": 63 }, { "epoch": 0.01, "grad_norm": 1.7553712129592896, "learning_rate": 2.7234042553191492e-06, "loss": 0.6952, "step": 64 }, { "epoch": 0.01, "grad_norm": 1.2830992937088013, "learning_rate": 2.765957446808511e-06, "loss": 0.6323, "step": 65 }, { "epoch": 0.01, "grad_norm": 1.0832946300506592, "learning_rate": 2.808510638297873e-06, "loss": 0.6222, "step": 66 }, { "epoch": 0.01, "grad_norm": 1.8043638467788696, "learning_rate": 2.8510638297872346e-06, "loss": 0.6623, "step": 67 }, { "epoch": 0.01, "grad_norm": 1.494361162185669, "learning_rate": 2.8936170212765956e-06, "loss": 0.6664, "step": 68 }, { "epoch": 0.01, "grad_norm": 1.4487807750701904, "learning_rate": 2.9361702127659574e-06, "loss": 0.6772, "step": 69 }, { "epoch": 0.01, "grad_norm": 1.7595090866088867, "learning_rate": 2.978723404255319e-06, "loss": 0.6911, "step": 70 }, { "epoch": 0.01, "grad_norm": 1.4395978450775146, "learning_rate": 3.021276595744681e-06, "loss": 0.6153, "step": 71 }, { "epoch": 0.01, "grad_norm": 1.4251700639724731, "learning_rate": 3.0638297872340428e-06, "loss": 0.6041, "step": 72 }, { "epoch": 0.01, "grad_norm": 1.3661823272705078, "learning_rate": 3.1063829787234046e-06, "loss": 0.6282, "step": 73 }, { "epoch": 0.01, "grad_norm": 1.562219262123108, "learning_rate": 3.1489361702127664e-06, "loss": 0.5959, "step": 74 }, { "epoch": 0.01, "grad_norm": 2.0947787761688232, "learning_rate": 3.191489361702128e-06, "loss": 0.6361, "step": 75 }, { "epoch": 0.01, "grad_norm": 2.0909323692321777, "learning_rate": 3.23404255319149e-06, "loss": 0.659, "step": 76 }, { "epoch": 0.01, "grad_norm": 2.1692888736724854, "learning_rate": 3.276595744680851e-06, "loss": 0.6357, "step": 77 }, { "epoch": 0.01, "grad_norm": 1.6602727174758911, "learning_rate": 3.3191489361702127e-06, "loss": 0.6931, "step": 78 }, { "epoch": 0.01, "grad_norm": 1.3999605178833008, "learning_rate": 3.3617021276595745e-06, "loss": 0.6471, "step": 79 }, { "epoch": 0.01, "grad_norm": 1.3343745470046997, "learning_rate": 3.4042553191489363e-06, "loss": 0.6078, "step": 80 }, { "epoch": 0.01, "grad_norm": 1.4638773202896118, "learning_rate": 3.446808510638298e-06, "loss": 0.5754, "step": 81 }, { "epoch": 0.01, "grad_norm": 2.1183791160583496, "learning_rate": 3.48936170212766e-06, "loss": 0.6925, "step": 82 }, { "epoch": 0.01, "grad_norm": 2.213529348373413, "learning_rate": 3.5319148936170217e-06, "loss": 0.6461, "step": 83 }, { "epoch": 0.01, "grad_norm": 1.4197733402252197, "learning_rate": 3.5744680851063835e-06, "loss": 0.5335, "step": 84 }, { "epoch": 0.01, "grad_norm": 1.487101435661316, "learning_rate": 3.6170212765957453e-06, "loss": 0.5241, "step": 85 }, { "epoch": 0.01, "grad_norm": 1.4877684116363525, "learning_rate": 3.6595744680851063e-06, "loss": 0.7288, "step": 86 }, { "epoch": 0.01, "grad_norm": 1.5158016681671143, "learning_rate": 3.702127659574468e-06, "loss": 0.7598, "step": 87 }, { "epoch": 0.01, "grad_norm": 1.8782508373260498, "learning_rate": 3.74468085106383e-06, "loss": 0.5951, "step": 88 }, { "epoch": 0.01, "grad_norm": 1.9978234767913818, "learning_rate": 3.7872340425531917e-06, "loss": 0.7127, "step": 89 }, { "epoch": 0.01, "grad_norm": 5.292247772216797, "learning_rate": 3.8297872340425535e-06, "loss": 0.5761, "step": 90 }, { "epoch": 0.01, "grad_norm": 1.4210480451583862, "learning_rate": 3.872340425531915e-06, "loss": 0.6722, "step": 91 }, { "epoch": 0.01, "grad_norm": 1.5236140489578247, "learning_rate": 3.914893617021277e-06, "loss": 0.6568, "step": 92 }, { "epoch": 0.01, "grad_norm": 1.1957457065582275, "learning_rate": 3.957446808510639e-06, "loss": 0.6213, "step": 93 }, { "epoch": 0.01, "grad_norm": 1.5347182750701904, "learning_rate": 4.000000000000001e-06, "loss": 0.574, "step": 94 }, { "epoch": 0.01, "grad_norm": 1.1585361957550049, "learning_rate": 4.042553191489362e-06, "loss": 0.5121, "step": 95 }, { "epoch": 0.01, "grad_norm": 1.6174169778823853, "learning_rate": 4.085106382978723e-06, "loss": 0.6843, "step": 96 }, { "epoch": 0.01, "grad_norm": 1.3933274745941162, "learning_rate": 4.127659574468085e-06, "loss": 0.7019, "step": 97 }, { "epoch": 0.01, "grad_norm": 3.139785051345825, "learning_rate": 4.170212765957447e-06, "loss": 0.6227, "step": 98 }, { "epoch": 0.01, "grad_norm": 1.218185544013977, "learning_rate": 4.212765957446809e-06, "loss": 0.6092, "step": 99 }, { "epoch": 0.01, "grad_norm": 1.3104459047317505, "learning_rate": 4.255319148936171e-06, "loss": 0.6566, "step": 100 }, { "epoch": 0.01, "grad_norm": 1.3607113361358643, "learning_rate": 4.297872340425532e-06, "loss": 0.6997, "step": 101 }, { "epoch": 0.01, "grad_norm": 1.6485787630081177, "learning_rate": 4.340425531914894e-06, "loss": 0.7079, "step": 102 }, { "epoch": 0.01, "grad_norm": 3.3833348751068115, "learning_rate": 4.382978723404256e-06, "loss": 0.5843, "step": 103 }, { "epoch": 0.01, "grad_norm": 1.7810088396072388, "learning_rate": 4.425531914893617e-06, "loss": 0.6966, "step": 104 }, { "epoch": 0.01, "grad_norm": 1.511935830116272, "learning_rate": 4.468085106382979e-06, "loss": 0.6428, "step": 105 }, { "epoch": 0.01, "grad_norm": 1.4718260765075684, "learning_rate": 4.5106382978723406e-06, "loss": 0.6144, "step": 106 }, { "epoch": 0.01, "grad_norm": 1.8542895317077637, "learning_rate": 4.553191489361702e-06, "loss": 0.6564, "step": 107 }, { "epoch": 0.01, "grad_norm": 1.34366774559021, "learning_rate": 4.595744680851064e-06, "loss": 0.5284, "step": 108 }, { "epoch": 0.01, "grad_norm": 1.454941987991333, "learning_rate": 4.638297872340426e-06, "loss": 0.8061, "step": 109 }, { "epoch": 0.01, "grad_norm": 1.738389015197754, "learning_rate": 4.680851063829788e-06, "loss": 0.6759, "step": 110 }, { "epoch": 0.01, "grad_norm": 1.6170207262039185, "learning_rate": 4.7234042553191496e-06, "loss": 0.6429, "step": 111 }, { "epoch": 0.01, "grad_norm": 1.628777027130127, "learning_rate": 4.765957446808511e-06, "loss": 0.6829, "step": 112 }, { "epoch": 0.01, "grad_norm": 1.252131700515747, "learning_rate": 4.808510638297872e-06, "loss": 0.5218, "step": 113 }, { "epoch": 0.01, "grad_norm": 1.4437956809997559, "learning_rate": 4.851063829787234e-06, "loss": 0.667, "step": 114 }, { "epoch": 0.01, "grad_norm": 6.339471817016602, "learning_rate": 4.893617021276596e-06, "loss": 0.668, "step": 115 }, { "epoch": 0.01, "grad_norm": 1.4656023979187012, "learning_rate": 4.936170212765958e-06, "loss": 0.6703, "step": 116 }, { "epoch": 0.01, "grad_norm": 8.32915210723877, "learning_rate": 4.9787234042553195e-06, "loss": 0.6228, "step": 117 }, { "epoch": 0.02, "grad_norm": 1.32205069065094, "learning_rate": 5.0212765957446805e-06, "loss": 0.5615, "step": 118 }, { "epoch": 0.02, "grad_norm": 1.2250367403030396, "learning_rate": 5.063829787234042e-06, "loss": 0.6659, "step": 119 }, { "epoch": 0.02, "grad_norm": 1.2720946073532104, "learning_rate": 5.106382978723404e-06, "loss": 0.5885, "step": 120 }, { "epoch": 0.02, "grad_norm": 1.233543872833252, "learning_rate": 5.148936170212766e-06, "loss": 0.7094, "step": 121 }, { "epoch": 0.02, "grad_norm": 1.4377548694610596, "learning_rate": 5.191489361702128e-06, "loss": 0.7314, "step": 122 }, { "epoch": 0.02, "grad_norm": 2.2915871143341064, "learning_rate": 5.2340425531914895e-06, "loss": 0.6415, "step": 123 }, { "epoch": 0.02, "grad_norm": 2.056044101715088, "learning_rate": 5.276595744680851e-06, "loss": 0.6211, "step": 124 }, { "epoch": 0.02, "grad_norm": 2.617048740386963, "learning_rate": 5.319148936170213e-06, "loss": 0.6948, "step": 125 }, { "epoch": 0.02, "grad_norm": 1.7886021137237549, "learning_rate": 5.361702127659575e-06, "loss": 0.7082, "step": 126 }, { "epoch": 0.02, "grad_norm": 1.2329413890838623, "learning_rate": 5.404255319148937e-06, "loss": 0.6325, "step": 127 }, { "epoch": 0.02, "grad_norm": 1.183271050453186, "learning_rate": 5.4468085106382985e-06, "loss": 0.5629, "step": 128 }, { "epoch": 0.02, "grad_norm": 1.68038809299469, "learning_rate": 5.48936170212766e-06, "loss": 0.6629, "step": 129 }, { "epoch": 0.02, "grad_norm": 1.1709634065628052, "learning_rate": 5.531914893617022e-06, "loss": 0.7344, "step": 130 }, { "epoch": 0.02, "grad_norm": 1.245133638381958, "learning_rate": 5.574468085106384e-06, "loss": 0.7115, "step": 131 }, { "epoch": 0.02, "grad_norm": 1.9844391345977783, "learning_rate": 5.617021276595746e-06, "loss": 0.6059, "step": 132 }, { "epoch": 0.02, "grad_norm": 3.47454571723938, "learning_rate": 5.6595744680851075e-06, "loss": 0.6277, "step": 133 }, { "epoch": 0.02, "grad_norm": 1.5599538087844849, "learning_rate": 5.702127659574469e-06, "loss": 0.7082, "step": 134 }, { "epoch": 0.02, "grad_norm": 1.6010850667953491, "learning_rate": 5.744680851063831e-06, "loss": 0.6496, "step": 135 }, { "epoch": 0.02, "grad_norm": 1.419260859489441, "learning_rate": 5.787234042553191e-06, "loss": 0.7087, "step": 136 }, { "epoch": 0.02, "grad_norm": 1.7564315795898438, "learning_rate": 5.829787234042553e-06, "loss": 0.6404, "step": 137 }, { "epoch": 0.02, "grad_norm": 1.2922577857971191, "learning_rate": 5.872340425531915e-06, "loss": 0.7446, "step": 138 }, { "epoch": 0.02, "grad_norm": 1.5558539628982544, "learning_rate": 5.9148936170212766e-06, "loss": 0.6687, "step": 139 }, { "epoch": 0.02, "grad_norm": 1.5046948194503784, "learning_rate": 5.957446808510638e-06, "loss": 0.661, "step": 140 }, { "epoch": 0.02, "grad_norm": 1.169022560119629, "learning_rate": 6e-06, "loss": 0.6933, "step": 141 }, { "epoch": 0.02, "grad_norm": 1.2964138984680176, "learning_rate": 6.042553191489362e-06, "loss": 0.621, "step": 142 }, { "epoch": 0.02, "grad_norm": 1.2966022491455078, "learning_rate": 6.085106382978724e-06, "loss": 0.6333, "step": 143 }, { "epoch": 0.02, "grad_norm": 2.7783942222595215, "learning_rate": 6.1276595744680855e-06, "loss": 0.6224, "step": 144 }, { "epoch": 0.02, "grad_norm": 2.098170280456543, "learning_rate": 6.170212765957447e-06, "loss": 0.6892, "step": 145 }, { "epoch": 0.02, "grad_norm": 1.532152533531189, "learning_rate": 6.212765957446809e-06, "loss": 0.6353, "step": 146 }, { "epoch": 0.02, "grad_norm": 2.6122751235961914, "learning_rate": 6.255319148936171e-06, "loss": 0.603, "step": 147 }, { "epoch": 0.02, "grad_norm": 1.3150262832641602, "learning_rate": 6.297872340425533e-06, "loss": 0.68, "step": 148 }, { "epoch": 0.02, "grad_norm": 1.8408077955245972, "learning_rate": 6.3404255319148945e-06, "loss": 0.7014, "step": 149 }, { "epoch": 0.02, "grad_norm": 1.3194700479507446, "learning_rate": 6.382978723404256e-06, "loss": 0.6772, "step": 150 }, { "epoch": 0.02, "grad_norm": 1.445401668548584, "learning_rate": 6.425531914893618e-06, "loss": 0.5895, "step": 151 }, { "epoch": 0.02, "grad_norm": 1.475083827972412, "learning_rate": 6.46808510638298e-06, "loss": 0.6744, "step": 152 }, { "epoch": 0.02, "grad_norm": 3.218212127685547, "learning_rate": 6.510638297872342e-06, "loss": 0.6075, "step": 153 }, { "epoch": 0.02, "grad_norm": 1.3714830875396729, "learning_rate": 6.553191489361702e-06, "loss": 0.6051, "step": 154 }, { "epoch": 0.02, "grad_norm": 1.9937951564788818, "learning_rate": 6.595744680851064e-06, "loss": 0.7034, "step": 155 }, { "epoch": 0.02, "grad_norm": 1.379773497581482, "learning_rate": 6.6382978723404254e-06, "loss": 0.6591, "step": 156 }, { "epoch": 0.02, "grad_norm": 1.3940249681472778, "learning_rate": 6.680851063829787e-06, "loss": 0.6154, "step": 157 }, { "epoch": 0.02, "grad_norm": 1.862964391708374, "learning_rate": 6.723404255319149e-06, "loss": 0.6576, "step": 158 }, { "epoch": 0.02, "grad_norm": 1.3830742835998535, "learning_rate": 6.765957446808511e-06, "loss": 0.6628, "step": 159 }, { "epoch": 0.02, "grad_norm": 1.437272548675537, "learning_rate": 6.808510638297873e-06, "loss": 0.6593, "step": 160 }, { "epoch": 0.02, "grad_norm": 1.473165512084961, "learning_rate": 6.8510638297872344e-06, "loss": 0.7078, "step": 161 }, { "epoch": 0.02, "grad_norm": 1.078347086906433, "learning_rate": 6.893617021276596e-06, "loss": 0.5887, "step": 162 }, { "epoch": 0.02, "grad_norm": 1.8319272994995117, "learning_rate": 6.936170212765958e-06, "loss": 0.6499, "step": 163 }, { "epoch": 0.02, "grad_norm": 1.3383046388626099, "learning_rate": 6.97872340425532e-06, "loss": 0.7129, "step": 164 }, { "epoch": 0.02, "grad_norm": 1.6647669076919556, "learning_rate": 7.021276595744682e-06, "loss": 0.5852, "step": 165 }, { "epoch": 0.02, "grad_norm": 1.294133186340332, "learning_rate": 7.0638297872340434e-06, "loss": 0.6741, "step": 166 }, { "epoch": 0.02, "grad_norm": 2.3639767169952393, "learning_rate": 7.106382978723405e-06, "loss": 0.5949, "step": 167 }, { "epoch": 0.02, "grad_norm": 1.6028077602386475, "learning_rate": 7.148936170212767e-06, "loss": 0.653, "step": 168 }, { "epoch": 0.02, "grad_norm": 1.474517583847046, "learning_rate": 7.191489361702129e-06, "loss": 0.6168, "step": 169 }, { "epoch": 0.02, "grad_norm": 1.2433522939682007, "learning_rate": 7.234042553191491e-06, "loss": 0.7361, "step": 170 }, { "epoch": 0.02, "grad_norm": 1.665745735168457, "learning_rate": 7.2765957446808524e-06, "loss": 0.6894, "step": 171 }, { "epoch": 0.02, "grad_norm": 1.8916876316070557, "learning_rate": 7.3191489361702125e-06, "loss": 0.6372, "step": 172 }, { "epoch": 0.02, "grad_norm": 1.5415910482406616, "learning_rate": 7.361702127659574e-06, "loss": 0.5954, "step": 173 }, { "epoch": 0.02, "grad_norm": 2.2413551807403564, "learning_rate": 7.404255319148936e-06, "loss": 0.69, "step": 174 }, { "epoch": 0.02, "grad_norm": 1.9613412618637085, "learning_rate": 7.446808510638298e-06, "loss": 0.6342, "step": 175 }, { "epoch": 0.02, "grad_norm": 1.9772624969482422, "learning_rate": 7.48936170212766e-06, "loss": 0.6331, "step": 176 }, { "epoch": 0.02, "grad_norm": 1.7678852081298828, "learning_rate": 7.5319148936170215e-06, "loss": 0.629, "step": 177 }, { "epoch": 0.02, "grad_norm": 2.1412477493286133, "learning_rate": 7.574468085106383e-06, "loss": 0.622, "step": 178 }, { "epoch": 0.02, "grad_norm": 1.3353233337402344, "learning_rate": 7.617021276595745e-06, "loss": 0.6866, "step": 179 }, { "epoch": 0.02, "grad_norm": 1.1513804197311401, "learning_rate": 7.659574468085107e-06, "loss": 0.6276, "step": 180 }, { "epoch": 0.02, "grad_norm": 1.2835663557052612, "learning_rate": 7.702127659574469e-06, "loss": 0.6185, "step": 181 }, { "epoch": 0.02, "grad_norm": 1.3194074630737305, "learning_rate": 7.74468085106383e-06, "loss": 0.6201, "step": 182 }, { "epoch": 0.02, "grad_norm": 1.5225788354873657, "learning_rate": 7.787234042553192e-06, "loss": 0.5948, "step": 183 }, { "epoch": 0.02, "grad_norm": 1.1869926452636719, "learning_rate": 7.829787234042554e-06, "loss": 0.5794, "step": 184 }, { "epoch": 0.02, "grad_norm": 1.3368916511535645, "learning_rate": 7.872340425531916e-06, "loss": 0.7929, "step": 185 }, { "epoch": 0.02, "grad_norm": 1.3683775663375854, "learning_rate": 7.914893617021278e-06, "loss": 0.7516, "step": 186 }, { "epoch": 0.02, "grad_norm": 1.7117352485656738, "learning_rate": 7.95744680851064e-06, "loss": 0.6344, "step": 187 }, { "epoch": 0.02, "grad_norm": 1.4180941581726074, "learning_rate": 8.000000000000001e-06, "loss": 0.6158, "step": 188 }, { "epoch": 0.02, "grad_norm": 1.504319667816162, "learning_rate": 8.042553191489363e-06, "loss": 0.79, "step": 189 }, { "epoch": 0.02, "grad_norm": 1.4916445016860962, "learning_rate": 8.085106382978723e-06, "loss": 0.7096, "step": 190 }, { "epoch": 0.02, "grad_norm": 1.3161081075668335, "learning_rate": 8.127659574468085e-06, "loss": 0.6206, "step": 191 }, { "epoch": 0.02, "grad_norm": 1.3922781944274902, "learning_rate": 8.170212765957447e-06, "loss": 0.6443, "step": 192 }, { "epoch": 0.02, "grad_norm": 1.5196666717529297, "learning_rate": 8.212765957446809e-06, "loss": 0.6352, "step": 193 }, { "epoch": 0.02, "grad_norm": 1.4567962884902954, "learning_rate": 8.25531914893617e-06, "loss": 0.5775, "step": 194 }, { "epoch": 0.02, "grad_norm": 1.2705228328704834, "learning_rate": 8.297872340425532e-06, "loss": 0.6438, "step": 195 }, { "epoch": 0.03, "grad_norm": 1.4779032468795776, "learning_rate": 8.340425531914894e-06, "loss": 0.6037, "step": 196 }, { "epoch": 0.03, "grad_norm": 1.4720426797866821, "learning_rate": 8.382978723404256e-06, "loss": 0.594, "step": 197 }, { "epoch": 0.03, "grad_norm": 1.6737343072891235, "learning_rate": 8.425531914893618e-06, "loss": 0.6083, "step": 198 }, { "epoch": 0.03, "grad_norm": 1.425614356994629, "learning_rate": 8.46808510638298e-06, "loss": 0.6219, "step": 199 }, { "epoch": 0.03, "grad_norm": 1.4513856172561646, "learning_rate": 8.510638297872341e-06, "loss": 0.6389, "step": 200 }, { "epoch": 0.03, "grad_norm": 1.7207236289978027, "learning_rate": 8.553191489361703e-06, "loss": 0.5883, "step": 201 }, { "epoch": 0.03, "grad_norm": 1.8808568716049194, "learning_rate": 8.595744680851065e-06, "loss": 0.5775, "step": 202 }, { "epoch": 0.03, "grad_norm": 2.1202540397644043, "learning_rate": 8.638297872340427e-06, "loss": 0.5386, "step": 203 }, { "epoch": 0.03, "grad_norm": 1.2687338590621948, "learning_rate": 8.680851063829788e-06, "loss": 0.6925, "step": 204 }, { "epoch": 0.03, "grad_norm": 1.3964853286743164, "learning_rate": 8.72340425531915e-06, "loss": 0.6858, "step": 205 }, { "epoch": 0.03, "grad_norm": 1.5686947107315063, "learning_rate": 8.765957446808512e-06, "loss": 0.6917, "step": 206 }, { "epoch": 0.03, "grad_norm": 1.251679539680481, "learning_rate": 8.808510638297874e-06, "loss": 0.7344, "step": 207 }, { "epoch": 0.03, "grad_norm": 3.4568703174591064, "learning_rate": 8.851063829787234e-06, "loss": 0.6002, "step": 208 }, { "epoch": 0.03, "grad_norm": 1.5010696649551392, "learning_rate": 8.893617021276596e-06, "loss": 0.6429, "step": 209 }, { "epoch": 0.03, "grad_norm": 1.468229055404663, "learning_rate": 8.936170212765958e-06, "loss": 0.5552, "step": 210 }, { "epoch": 0.03, "grad_norm": 1.107020378112793, "learning_rate": 8.97872340425532e-06, "loss": 0.5931, "step": 211 }, { "epoch": 0.03, "grad_norm": 1.6930768489837646, "learning_rate": 9.021276595744681e-06, "loss": 0.6574, "step": 212 }, { "epoch": 0.03, "grad_norm": 1.752554178237915, "learning_rate": 9.063829787234043e-06, "loss": 0.653, "step": 213 }, { "epoch": 0.03, "grad_norm": 1.4542021751403809, "learning_rate": 9.106382978723405e-06, "loss": 0.6302, "step": 214 }, { "epoch": 0.03, "grad_norm": 1.3564616441726685, "learning_rate": 9.148936170212767e-06, "loss": 0.6823, "step": 215 }, { "epoch": 0.03, "grad_norm": 1.2571592330932617, "learning_rate": 9.191489361702128e-06, "loss": 0.659, "step": 216 }, { "epoch": 0.03, "grad_norm": 1.4564090967178345, "learning_rate": 9.23404255319149e-06, "loss": 0.6032, "step": 217 }, { "epoch": 0.03, "grad_norm": 1.5122485160827637, "learning_rate": 9.276595744680852e-06, "loss": 0.6931, "step": 218 }, { "epoch": 0.03, "grad_norm": 1.4033244848251343, "learning_rate": 9.319148936170214e-06, "loss": 0.6148, "step": 219 }, { "epoch": 0.03, "grad_norm": 1.2605109214782715, "learning_rate": 9.361702127659576e-06, "loss": 0.6233, "step": 220 }, { "epoch": 0.03, "grad_norm": 1.4508942365646362, "learning_rate": 9.404255319148937e-06, "loss": 0.6114, "step": 221 }, { "epoch": 0.03, "grad_norm": 2.8977622985839844, "learning_rate": 9.446808510638299e-06, "loss": 0.5908, "step": 222 }, { "epoch": 0.03, "grad_norm": 1.4348738193511963, "learning_rate": 9.489361702127661e-06, "loss": 0.6485, "step": 223 }, { "epoch": 0.03, "grad_norm": 1.2617361545562744, "learning_rate": 9.531914893617023e-06, "loss": 0.6773, "step": 224 }, { "epoch": 0.03, "grad_norm": 1.2666085958480835, "learning_rate": 9.574468085106385e-06, "loss": 0.5397, "step": 225 }, { "epoch": 0.03, "grad_norm": 1.5835410356521606, "learning_rate": 9.617021276595745e-06, "loss": 0.6741, "step": 226 }, { "epoch": 0.03, "grad_norm": 1.776307225227356, "learning_rate": 9.659574468085106e-06, "loss": 0.6239, "step": 227 }, { "epoch": 0.03, "grad_norm": 1.3855375051498413, "learning_rate": 9.702127659574468e-06, "loss": 0.648, "step": 228 }, { "epoch": 0.03, "grad_norm": 1.173299789428711, "learning_rate": 9.74468085106383e-06, "loss": 0.6081, "step": 229 }, { "epoch": 0.03, "grad_norm": 1.3490828275680542, "learning_rate": 9.787234042553192e-06, "loss": 0.662, "step": 230 }, { "epoch": 0.03, "grad_norm": 1.2873908281326294, "learning_rate": 9.829787234042554e-06, "loss": 0.6217, "step": 231 }, { "epoch": 0.03, "grad_norm": 1.5484164953231812, "learning_rate": 9.872340425531915e-06, "loss": 0.6764, "step": 232 }, { "epoch": 0.03, "grad_norm": 1.4617042541503906, "learning_rate": 9.914893617021277e-06, "loss": 0.7215, "step": 233 }, { "epoch": 0.03, "grad_norm": 1.3531588315963745, "learning_rate": 9.957446808510639e-06, "loss": 0.589, "step": 234 }, { "epoch": 0.03, "grad_norm": 1.4462039470672607, "learning_rate": 1e-05, "loss": 0.6209, "step": 235 }, { "epoch": 0.03, "grad_norm": 1.5413349866867065, "learning_rate": 9.999999569425815e-06, "loss": 0.6695, "step": 236 }, { "epoch": 0.03, "grad_norm": 1.396835207939148, "learning_rate": 9.999998277703333e-06, "loss": 0.5872, "step": 237 }, { "epoch": 0.03, "grad_norm": 1.3538330793380737, "learning_rate": 9.999996124832776e-06, "loss": 0.7435, "step": 238 }, { "epoch": 0.03, "grad_norm": 1.7953016757965088, "learning_rate": 9.999993110814515e-06, "loss": 0.6416, "step": 239 }, { "epoch": 0.03, "grad_norm": 1.1062463521957397, "learning_rate": 9.999989235649068e-06, "loss": 0.717, "step": 240 }, { "epoch": 0.03, "grad_norm": 1.3287006616592407, "learning_rate": 9.999984499337105e-06, "loss": 0.61, "step": 241 }, { "epoch": 0.03, "grad_norm": 1.226269006729126, "learning_rate": 9.99997890187944e-06, "loss": 0.6009, "step": 242 }, { "epoch": 0.03, "grad_norm": 1.6038551330566406, "learning_rate": 9.99997244327704e-06, "loss": 0.7504, "step": 243 }, { "epoch": 0.03, "grad_norm": 1.3797829151153564, "learning_rate": 9.999965123531012e-06, "loss": 0.5251, "step": 244 }, { "epoch": 0.03, "grad_norm": 1.4653568267822266, "learning_rate": 9.999956942642622e-06, "loss": 0.6416, "step": 245 }, { "epoch": 0.03, "grad_norm": 1.0714013576507568, "learning_rate": 9.999947900613274e-06, "loss": 0.5643, "step": 246 }, { "epoch": 0.03, "grad_norm": 1.486270785331726, "learning_rate": 9.999937997444528e-06, "loss": 0.6592, "step": 247 }, { "epoch": 0.03, "grad_norm": 1.3784781694412231, "learning_rate": 9.999927233138092e-06, "loss": 0.722, "step": 248 }, { "epoch": 0.03, "grad_norm": 1.3505719900131226, "learning_rate": 9.999915607695814e-06, "loss": 0.6928, "step": 249 }, { "epoch": 0.03, "grad_norm": 1.3439764976501465, "learning_rate": 9.999903121119701e-06, "loss": 0.5936, "step": 250 }, { "epoch": 0.03, "grad_norm": 1.3594400882720947, "learning_rate": 9.999889773411903e-06, "loss": 0.6679, "step": 251 }, { "epoch": 0.03, "grad_norm": 1.1484427452087402, "learning_rate": 9.999875564574717e-06, "loss": 0.6183, "step": 252 }, { "epoch": 0.03, "grad_norm": 1.2887957096099854, "learning_rate": 9.999860494610595e-06, "loss": 0.5997, "step": 253 }, { "epoch": 0.03, "grad_norm": 1.2470365762710571, "learning_rate": 9.999844563522123e-06, "loss": 0.6587, "step": 254 }, { "epoch": 0.03, "grad_norm": 1.387832522392273, "learning_rate": 9.999827771312053e-06, "loss": 0.596, "step": 255 }, { "epoch": 0.03, "grad_norm": 1.2869324684143066, "learning_rate": 9.999810117983275e-06, "loss": 0.6862, "step": 256 }, { "epoch": 0.03, "grad_norm": 1.3541619777679443, "learning_rate": 9.99979160353883e-06, "loss": 0.6792, "step": 257 }, { "epoch": 0.03, "grad_norm": 1.6047093868255615, "learning_rate": 9.999772227981905e-06, "loss": 0.6403, "step": 258 }, { "epoch": 0.03, "grad_norm": 1.1892220973968506, "learning_rate": 9.999751991315838e-06, "loss": 0.6682, "step": 259 }, { "epoch": 0.03, "grad_norm": 1.4554706811904907, "learning_rate": 9.999730893544115e-06, "loss": 0.5767, "step": 260 }, { "epoch": 0.03, "grad_norm": 1.3690476417541504, "learning_rate": 9.999708934670366e-06, "loss": 0.7132, "step": 261 }, { "epoch": 0.03, "grad_norm": 1.6959372758865356, "learning_rate": 9.99968611469838e-06, "loss": 0.6062, "step": 262 }, { "epoch": 0.03, "grad_norm": 1.7080563306808472, "learning_rate": 9.99966243363208e-06, "loss": 0.6362, "step": 263 }, { "epoch": 0.03, "grad_norm": 1.2266885042190552, "learning_rate": 9.999637891475549e-06, "loss": 0.6942, "step": 264 }, { "epoch": 0.03, "grad_norm": 1.2976837158203125, "learning_rate": 9.99961248823301e-06, "loss": 0.7392, "step": 265 }, { "epoch": 0.03, "grad_norm": 1.6553566455841064, "learning_rate": 9.999586223908845e-06, "loss": 0.6517, "step": 266 }, { "epoch": 0.03, "grad_norm": 1.199306607246399, "learning_rate": 9.999559098507571e-06, "loss": 0.6287, "step": 267 }, { "epoch": 0.03, "grad_norm": 1.3642069101333618, "learning_rate": 9.999531112033863e-06, "loss": 0.5917, "step": 268 }, { "epoch": 0.03, "grad_norm": 1.6378116607666016, "learning_rate": 9.99950226449254e-06, "loss": 0.6069, "step": 269 }, { "epoch": 0.03, "grad_norm": 2.4352381229400635, "learning_rate": 9.99947255588857e-06, "loss": 0.6093, "step": 270 }, { "epoch": 0.03, "grad_norm": 1.4313955307006836, "learning_rate": 9.999441986227071e-06, "loss": 0.5984, "step": 271 }, { "epoch": 0.03, "grad_norm": 1.6048554182052612, "learning_rate": 9.999410555513308e-06, "loss": 0.6339, "step": 272 }, { "epoch": 0.03, "grad_norm": 1.5470483303070068, "learning_rate": 9.999378263752691e-06, "loss": 0.6551, "step": 273 }, { "epoch": 0.04, "grad_norm": 1.2593071460723877, "learning_rate": 9.999345110950787e-06, "loss": 0.6564, "step": 274 }, { "epoch": 0.04, "grad_norm": 1.4662338495254517, "learning_rate": 9.999311097113303e-06, "loss": 0.503, "step": 275 }, { "epoch": 0.04, "grad_norm": 1.6436269283294678, "learning_rate": 9.999276222246094e-06, "loss": 0.7033, "step": 276 }, { "epoch": 0.04, "grad_norm": 2.233489990234375, "learning_rate": 9.999240486355173e-06, "loss": 0.6238, "step": 277 }, { "epoch": 0.04, "grad_norm": 1.1347347497940063, "learning_rate": 9.999203889446691e-06, "loss": 0.6618, "step": 278 }, { "epoch": 0.04, "grad_norm": 1.7216888666152954, "learning_rate": 9.999166431526952e-06, "loss": 0.5455, "step": 279 }, { "epoch": 0.04, "grad_norm": 1.1763736009597778, "learning_rate": 9.999128112602406e-06, "loss": 0.5321, "step": 280 }, { "epoch": 0.04, "grad_norm": 1.6581827402114868, "learning_rate": 9.999088932679653e-06, "loss": 0.6222, "step": 281 }, { "epoch": 0.04, "grad_norm": 1.3219795227050781, "learning_rate": 9.999048891765443e-06, "loss": 0.601, "step": 282 }, { "epoch": 0.04, "grad_norm": 1.4004895687103271, "learning_rate": 9.999007989866671e-06, "loss": 0.6141, "step": 283 }, { "epoch": 0.04, "grad_norm": 2.341827630996704, "learning_rate": 9.99896622699038e-06, "loss": 0.6072, "step": 284 }, { "epoch": 0.04, "grad_norm": 1.2262059450149536, "learning_rate": 9.998923603143767e-06, "loss": 0.7096, "step": 285 }, { "epoch": 0.04, "grad_norm": 1.5562580823898315, "learning_rate": 9.998880118334167e-06, "loss": 0.6483, "step": 286 }, { "epoch": 0.04, "grad_norm": 1.8266093730926514, "learning_rate": 9.998835772569075e-06, "loss": 0.5435, "step": 287 }, { "epoch": 0.04, "grad_norm": 1.291782259941101, "learning_rate": 9.998790565856124e-06, "loss": 0.5632, "step": 288 }, { "epoch": 0.04, "grad_norm": 1.3557615280151367, "learning_rate": 9.998744498203104e-06, "loss": 0.7585, "step": 289 }, { "epoch": 0.04, "grad_norm": 1.248962640762329, "learning_rate": 9.998697569617947e-06, "loss": 0.6625, "step": 290 }, { "epoch": 0.04, "grad_norm": 1.591097116470337, "learning_rate": 9.998649780108737e-06, "loss": 0.6722, "step": 291 }, { "epoch": 0.04, "grad_norm": 1.3779689073562622, "learning_rate": 9.998601129683703e-06, "loss": 0.6342, "step": 292 }, { "epoch": 0.04, "grad_norm": 2.6783649921417236, "learning_rate": 9.998551618351225e-06, "loss": 0.5614, "step": 293 }, { "epoch": 0.04, "grad_norm": 1.789115071296692, "learning_rate": 9.998501246119828e-06, "loss": 0.6905, "step": 294 }, { "epoch": 0.04, "grad_norm": 1.309683084487915, "learning_rate": 9.998450012998192e-06, "loss": 0.619, "step": 295 }, { "epoch": 0.04, "grad_norm": 1.9988428354263306, "learning_rate": 9.998397918995138e-06, "loss": 0.5937, "step": 296 }, { "epoch": 0.04, "grad_norm": 1.2818232774734497, "learning_rate": 9.998344964119639e-06, "loss": 0.6503, "step": 297 }, { "epoch": 0.04, "grad_norm": 1.3331990242004395, "learning_rate": 9.998291148380813e-06, "loss": 0.7047, "step": 298 }, { "epoch": 0.04, "grad_norm": 1.3238475322723389, "learning_rate": 9.998236471787933e-06, "loss": 0.758, "step": 299 }, { "epoch": 0.04, "grad_norm": 2.516418218612671, "learning_rate": 9.998180934350413e-06, "loss": 0.6936, "step": 300 }, { "epoch": 0.04, "grad_norm": 1.0435734987258911, "learning_rate": 9.998124536077819e-06, "loss": 0.6652, "step": 301 }, { "epoch": 0.04, "grad_norm": 1.999271035194397, "learning_rate": 9.998067276979863e-06, "loss": 0.639, "step": 302 }, { "epoch": 0.04, "grad_norm": 1.2757498025894165, "learning_rate": 9.99800915706641e-06, "loss": 0.5991, "step": 303 }, { "epoch": 0.04, "grad_norm": 1.3763235807418823, "learning_rate": 9.997950176347469e-06, "loss": 0.6986, "step": 304 }, { "epoch": 0.04, "grad_norm": 1.3771135807037354, "learning_rate": 9.997890334833195e-06, "loss": 0.7234, "step": 305 }, { "epoch": 0.04, "grad_norm": 1.662296175956726, "learning_rate": 9.997829632533897e-06, "loss": 0.7195, "step": 306 }, { "epoch": 0.04, "grad_norm": 1.1534143686294556, "learning_rate": 9.99776806946003e-06, "loss": 0.6387, "step": 307 }, { "epoch": 0.04, "grad_norm": 1.804293155670166, "learning_rate": 9.997705645622195e-06, "loss": 0.6533, "step": 308 }, { "epoch": 0.04, "grad_norm": 1.2307173013687134, "learning_rate": 9.997642361031147e-06, "loss": 0.6438, "step": 309 }, { "epoch": 0.04, "grad_norm": 1.4131581783294678, "learning_rate": 9.997578215697782e-06, "loss": 0.5973, "step": 310 }, { "epoch": 0.04, "grad_norm": 1.2833446264266968, "learning_rate": 9.997513209633149e-06, "loss": 0.6023, "step": 311 }, { "epoch": 0.04, "grad_norm": 1.623981237411499, "learning_rate": 9.997447342848443e-06, "loss": 0.604, "step": 312 }, { "epoch": 0.04, "grad_norm": 1.3072586059570312, "learning_rate": 9.99738061535501e-06, "loss": 0.6669, "step": 313 }, { "epoch": 0.04, "grad_norm": 1.3638581037521362, "learning_rate": 9.997313027164342e-06, "loss": 0.7149, "step": 314 }, { "epoch": 0.04, "grad_norm": 1.2896875143051147, "learning_rate": 9.997244578288079e-06, "loss": 0.6452, "step": 315 }, { "epoch": 0.04, "grad_norm": 1.1254551410675049, "learning_rate": 9.99717526873801e-06, "loss": 0.6556, "step": 316 }, { "epoch": 0.04, "grad_norm": 3.2857415676116943, "learning_rate": 9.997105098526073e-06, "loss": 0.6838, "step": 317 }, { "epoch": 0.04, "grad_norm": 2.3155784606933594, "learning_rate": 9.997034067664352e-06, "loss": 0.6333, "step": 318 }, { "epoch": 0.04, "grad_norm": 1.4330530166625977, "learning_rate": 9.996962176165081e-06, "loss": 0.6069, "step": 319 }, { "epoch": 0.04, "grad_norm": 1.828263282775879, "learning_rate": 9.996889424040644e-06, "loss": 0.6589, "step": 320 }, { "epoch": 0.04, "grad_norm": 1.317088007926941, "learning_rate": 9.996815811303566e-06, "loss": 0.6511, "step": 321 }, { "epoch": 0.04, "grad_norm": 2.6714189052581787, "learning_rate": 9.996741337966531e-06, "loss": 0.6292, "step": 322 }, { "epoch": 0.04, "grad_norm": 1.569022297859192, "learning_rate": 9.996666004042364e-06, "loss": 0.6129, "step": 323 }, { "epoch": 0.04, "grad_norm": 1.6436303853988647, "learning_rate": 9.996589809544036e-06, "loss": 0.6159, "step": 324 }, { "epoch": 0.04, "grad_norm": 1.2534624338150024, "learning_rate": 9.996512754484675e-06, "loss": 0.6133, "step": 325 }, { "epoch": 0.04, "grad_norm": 2.129281759262085, "learning_rate": 9.996434838877549e-06, "loss": 0.6453, "step": 326 }, { "epoch": 0.04, "grad_norm": 1.673301100730896, "learning_rate": 9.996356062736077e-06, "loss": 0.6298, "step": 327 }, { "epoch": 0.04, "grad_norm": 1.3405145406723022, "learning_rate": 9.99627642607383e-06, "loss": 0.6878, "step": 328 }, { "epoch": 0.04, "grad_norm": 1.0584392547607422, "learning_rate": 9.996195928904522e-06, "loss": 0.6118, "step": 329 }, { "epoch": 0.04, "grad_norm": 1.4414960145950317, "learning_rate": 9.996114571242015e-06, "loss": 0.5883, "step": 330 }, { "epoch": 0.04, "grad_norm": 1.4856294393539429, "learning_rate": 9.996032353100324e-06, "loss": 0.5985, "step": 331 }, { "epoch": 0.04, "grad_norm": 1.4282467365264893, "learning_rate": 9.995949274493608e-06, "loss": 0.6427, "step": 332 }, { "epoch": 0.04, "grad_norm": 1.0834569931030273, "learning_rate": 9.995865335436177e-06, "loss": 0.7503, "step": 333 }, { "epoch": 0.04, "grad_norm": 1.4855166673660278, "learning_rate": 9.995780535942485e-06, "loss": 0.6853, "step": 334 }, { "epoch": 0.04, "grad_norm": 2.0462682247161865, "learning_rate": 9.99569487602714e-06, "loss": 0.6613, "step": 335 }, { "epoch": 0.04, "grad_norm": 1.3587714433670044, "learning_rate": 9.995608355704893e-06, "loss": 0.6956, "step": 336 }, { "epoch": 0.04, "grad_norm": 2.249725103378296, "learning_rate": 9.995520974990646e-06, "loss": 0.6199, "step": 337 }, { "epoch": 0.04, "grad_norm": 1.6236398220062256, "learning_rate": 9.99543273389945e-06, "loss": 0.6971, "step": 338 }, { "epoch": 0.04, "grad_norm": 1.2508240938186646, "learning_rate": 9.995343632446501e-06, "loss": 0.7062, "step": 339 }, { "epoch": 0.04, "grad_norm": 1.3989686965942383, "learning_rate": 9.995253670647146e-06, "loss": 0.7234, "step": 340 }, { "epoch": 0.04, "grad_norm": 1.2701517343521118, "learning_rate": 9.995162848516878e-06, "loss": 0.6243, "step": 341 }, { "epoch": 0.04, "grad_norm": 2.829824924468994, "learning_rate": 9.995071166071339e-06, "loss": 0.575, "step": 342 }, { "epoch": 0.04, "grad_norm": 1.5379947423934937, "learning_rate": 9.994978623326321e-06, "loss": 0.6044, "step": 343 }, { "epoch": 0.04, "grad_norm": 1.1864334344863892, "learning_rate": 9.994885220297763e-06, "loss": 0.6259, "step": 344 }, { "epoch": 0.04, "grad_norm": 1.7398015260696411, "learning_rate": 9.994790957001748e-06, "loss": 0.7088, "step": 345 }, { "epoch": 0.04, "grad_norm": 1.7878941297531128, "learning_rate": 9.994695833454515e-06, "loss": 0.6905, "step": 346 }, { "epoch": 0.04, "grad_norm": 1.316868543624878, "learning_rate": 9.994599849672446e-06, "loss": 0.6109, "step": 347 }, { "epoch": 0.04, "grad_norm": 1.9868359565734863, "learning_rate": 9.994503005672072e-06, "loss": 0.6019, "step": 348 }, { "epoch": 0.04, "grad_norm": 1.3597651720046997, "learning_rate": 9.994405301470072e-06, "loss": 0.5773, "step": 349 }, { "epoch": 0.04, "grad_norm": 1.5507285594940186, "learning_rate": 9.994306737083275e-06, "loss": 0.6289, "step": 350 }, { "epoch": 0.04, "grad_norm": 1.1398341655731201, "learning_rate": 9.994207312528655e-06, "loss": 0.6136, "step": 351 }, { "epoch": 0.05, "grad_norm": 1.3331668376922607, "learning_rate": 9.994107027823334e-06, "loss": 0.6668, "step": 352 }, { "epoch": 0.05, "grad_norm": 1.4200987815856934, "learning_rate": 9.994005882984588e-06, "loss": 0.6294, "step": 353 }, { "epoch": 0.05, "grad_norm": 1.577136754989624, "learning_rate": 9.993903878029838e-06, "loss": 0.607, "step": 354 }, { "epoch": 0.05, "grad_norm": 1.2228367328643799, "learning_rate": 9.993801012976647e-06, "loss": 0.6131, "step": 355 }, { "epoch": 0.05, "grad_norm": 1.4739247560501099, "learning_rate": 9.993697287842735e-06, "loss": 0.666, "step": 356 }, { "epoch": 0.05, "grad_norm": 1.5883527994155884, "learning_rate": 9.993592702645966e-06, "loss": 0.7499, "step": 357 }, { "epoch": 0.05, "grad_norm": 1.7352406978607178, "learning_rate": 9.993487257404352e-06, "loss": 0.6809, "step": 358 }, { "epoch": 0.05, "grad_norm": 1.7410889863967896, "learning_rate": 9.993380952136057e-06, "loss": 0.6538, "step": 359 }, { "epoch": 0.05, "grad_norm": 1.5513193607330322, "learning_rate": 9.993273786859384e-06, "loss": 0.63, "step": 360 }, { "epoch": 0.05, "grad_norm": 1.3114519119262695, "learning_rate": 9.993165761592795e-06, "loss": 0.7388, "step": 361 }, { "epoch": 0.05, "grad_norm": 1.255658745765686, "learning_rate": 9.993056876354892e-06, "loss": 0.6261, "step": 362 }, { "epoch": 0.05, "grad_norm": 1.130070686340332, "learning_rate": 9.992947131164432e-06, "loss": 0.6732, "step": 363 }, { "epoch": 0.05, "grad_norm": 1.0119282007217407, "learning_rate": 9.992836526040312e-06, "loss": 0.5841, "step": 364 }, { "epoch": 0.05, "grad_norm": 1.1341041326522827, "learning_rate": 9.992725061001585e-06, "loss": 0.6315, "step": 365 }, { "epoch": 0.05, "grad_norm": 1.1530150175094604, "learning_rate": 9.992612736067446e-06, "loss": 0.5805, "step": 366 }, { "epoch": 0.05, "grad_norm": 1.2198214530944824, "learning_rate": 9.992499551257243e-06, "loss": 0.6518, "step": 367 }, { "epoch": 0.05, "grad_norm": 1.5688096284866333, "learning_rate": 9.992385506590467e-06, "loss": 0.6451, "step": 368 }, { "epoch": 0.05, "grad_norm": 1.202008605003357, "learning_rate": 9.992270602086764e-06, "loss": 0.7313, "step": 369 }, { "epoch": 0.05, "grad_norm": 1.660534143447876, "learning_rate": 9.992154837765919e-06, "loss": 0.658, "step": 370 }, { "epoch": 0.05, "grad_norm": 1.2800931930541992, "learning_rate": 9.992038213647875e-06, "loss": 0.6472, "step": 371 }, { "epoch": 0.05, "grad_norm": 1.4618778228759766, "learning_rate": 9.991920729752713e-06, "loss": 0.6719, "step": 372 }, { "epoch": 0.05, "grad_norm": 1.3455302715301514, "learning_rate": 9.991802386100672e-06, "loss": 0.638, "step": 373 }, { "epoch": 0.05, "grad_norm": 1.2282071113586426, "learning_rate": 9.99168318271213e-06, "loss": 0.7115, "step": 374 }, { "epoch": 0.05, "grad_norm": 1.1178747415542603, "learning_rate": 9.991563119607622e-06, "loss": 0.5796, "step": 375 }, { "epoch": 0.05, "grad_norm": 1.1349022388458252, "learning_rate": 9.991442196807823e-06, "loss": 0.5702, "step": 376 }, { "epoch": 0.05, "grad_norm": 1.206775426864624, "learning_rate": 9.991320414333559e-06, "loss": 0.6191, "step": 377 }, { "epoch": 0.05, "grad_norm": 2.0808825492858887, "learning_rate": 9.991197772205808e-06, "loss": 0.6128, "step": 378 }, { "epoch": 0.05, "grad_norm": 1.1514902114868164, "learning_rate": 9.99107427044569e-06, "loss": 0.7401, "step": 379 }, { "epoch": 0.05, "grad_norm": 1.1866756677627563, "learning_rate": 9.990949909074477e-06, "loss": 0.6648, "step": 380 }, { "epoch": 0.05, "grad_norm": 1.040029764175415, "learning_rate": 9.990824688113584e-06, "loss": 0.6372, "step": 381 }, { "epoch": 0.05, "grad_norm": 1.2645063400268555, "learning_rate": 9.990698607584584e-06, "loss": 0.6317, "step": 382 }, { "epoch": 0.05, "grad_norm": 1.032130479812622, "learning_rate": 9.990571667509187e-06, "loss": 0.6034, "step": 383 }, { "epoch": 0.05, "grad_norm": 1.203916072845459, "learning_rate": 9.990443867909258e-06, "loss": 0.6927, "step": 384 }, { "epoch": 0.05, "grad_norm": 1.2320196628570557, "learning_rate": 9.990315208806807e-06, "loss": 0.6196, "step": 385 }, { "epoch": 0.05, "grad_norm": 1.374750018119812, "learning_rate": 9.990185690223995e-06, "loss": 0.6235, "step": 386 }, { "epoch": 0.05, "grad_norm": 1.7351794242858887, "learning_rate": 9.990055312183125e-06, "loss": 0.5726, "step": 387 }, { "epoch": 0.05, "grad_norm": 1.2510026693344116, "learning_rate": 9.989924074706654e-06, "loss": 0.7773, "step": 388 }, { "epoch": 0.05, "grad_norm": 1.2288150787353516, "learning_rate": 9.989791977817187e-06, "loss": 0.6208, "step": 389 }, { "epoch": 0.05, "grad_norm": 1.2345741987228394, "learning_rate": 9.989659021537471e-06, "loss": 0.6605, "step": 390 }, { "epoch": 0.05, "grad_norm": 1.257174015045166, "learning_rate": 9.989525205890407e-06, "loss": 0.6108, "step": 391 }, { "epoch": 0.05, "grad_norm": 1.563079595565796, "learning_rate": 9.989390530899044e-06, "loss": 0.5989, "step": 392 }, { "epoch": 0.05, "grad_norm": 1.1823904514312744, "learning_rate": 9.989254996586575e-06, "loss": 0.5835, "step": 393 }, { "epoch": 0.05, "grad_norm": 1.3369593620300293, "learning_rate": 9.989118602976343e-06, "loss": 0.6985, "step": 394 }, { "epoch": 0.05, "grad_norm": 2.9907262325286865, "learning_rate": 9.98898135009184e-06, "loss": 0.6928, "step": 395 }, { "epoch": 0.05, "grad_norm": 4.113388538360596, "learning_rate": 9.988843237956703e-06, "loss": 0.5624, "step": 396 }, { "epoch": 0.05, "grad_norm": 1.2897573709487915, "learning_rate": 9.98870426659472e-06, "loss": 0.6339, "step": 397 }, { "epoch": 0.05, "grad_norm": 1.5871529579162598, "learning_rate": 9.988564436029826e-06, "loss": 0.627, "step": 398 }, { "epoch": 0.05, "grad_norm": 1.316611409187317, "learning_rate": 9.988423746286105e-06, "loss": 0.6233, "step": 399 }, { "epoch": 0.05, "grad_norm": 1.2401022911071777, "learning_rate": 9.988282197387787e-06, "loss": 0.7542, "step": 400 }, { "epoch": 0.05, "grad_norm": 1.3629307746887207, "learning_rate": 9.988139789359252e-06, "loss": 0.6359, "step": 401 }, { "epoch": 0.05, "grad_norm": 1.324845552444458, "learning_rate": 9.987996522225025e-06, "loss": 0.7213, "step": 402 }, { "epoch": 0.05, "grad_norm": 2.809295177459717, "learning_rate": 9.987852396009783e-06, "loss": 0.634, "step": 403 }, { "epoch": 0.05, "grad_norm": 1.5875015258789062, "learning_rate": 9.987707410738346e-06, "loss": 0.6653, "step": 404 }, { "epoch": 0.05, "grad_norm": 1.2210239171981812, "learning_rate": 9.987561566435688e-06, "loss": 0.7018, "step": 405 }, { "epoch": 0.05, "grad_norm": 1.1161646842956543, "learning_rate": 9.987414863126926e-06, "loss": 0.768, "step": 406 }, { "epoch": 0.05, "grad_norm": 1.0861924886703491, "learning_rate": 9.987267300837327e-06, "loss": 0.5714, "step": 407 }, { "epoch": 0.05, "grad_norm": 1.3647254705429077, "learning_rate": 9.987118879592303e-06, "loss": 0.7023, "step": 408 }, { "epoch": 0.05, "grad_norm": 1.4136096239089966, "learning_rate": 9.986969599417422e-06, "loss": 0.6671, "step": 409 }, { "epoch": 0.05, "grad_norm": 1.715958595275879, "learning_rate": 9.98681946033839e-06, "loss": 0.6588, "step": 410 }, { "epoch": 0.05, "grad_norm": 1.387445330619812, "learning_rate": 9.986668462381065e-06, "loss": 0.7723, "step": 411 }, { "epoch": 0.05, "grad_norm": 1.0558326244354248, "learning_rate": 9.98651660557146e-06, "loss": 0.6056, "step": 412 }, { "epoch": 0.05, "grad_norm": 1.917008638381958, "learning_rate": 9.98636388993572e-06, "loss": 0.6206, "step": 413 }, { "epoch": 0.05, "grad_norm": 1.2889763116836548, "learning_rate": 9.986210315500154e-06, "loss": 0.7506, "step": 414 }, { "epoch": 0.05, "grad_norm": 1.4434642791748047, "learning_rate": 9.986055882291208e-06, "loss": 0.5823, "step": 415 }, { "epoch": 0.05, "grad_norm": 1.9407073259353638, "learning_rate": 9.985900590335483e-06, "loss": 0.7348, "step": 416 }, { "epoch": 0.05, "grad_norm": 1.259628415107727, "learning_rate": 9.985744439659724e-06, "loss": 0.5983, "step": 417 }, { "epoch": 0.05, "grad_norm": 2.4348812103271484, "learning_rate": 9.985587430290822e-06, "loss": 0.6106, "step": 418 }, { "epoch": 0.05, "grad_norm": 1.2677141427993774, "learning_rate": 9.985429562255822e-06, "loss": 0.6385, "step": 419 }, { "epoch": 0.05, "grad_norm": 1.1897544860839844, "learning_rate": 9.985270835581914e-06, "loss": 0.7339, "step": 420 }, { "epoch": 0.05, "grad_norm": 1.1162315607070923, "learning_rate": 9.985111250296434e-06, "loss": 0.7, "step": 421 }, { "epoch": 0.05, "grad_norm": 1.4913219213485718, "learning_rate": 9.984950806426865e-06, "loss": 0.5671, "step": 422 }, { "epoch": 0.05, "grad_norm": 1.8393346071243286, "learning_rate": 9.984789504000844e-06, "loss": 0.7219, "step": 423 }, { "epoch": 0.05, "grad_norm": 1.3586616516113281, "learning_rate": 9.98462734304615e-06, "loss": 0.6634, "step": 424 }, { "epoch": 0.05, "grad_norm": 1.41817307472229, "learning_rate": 9.984464323590712e-06, "loss": 0.5488, "step": 425 }, { "epoch": 0.05, "grad_norm": 1.3771775960922241, "learning_rate": 9.984300445662608e-06, "loss": 0.64, "step": 426 }, { "epoch": 0.05, "grad_norm": 1.3530077934265137, "learning_rate": 9.984135709290061e-06, "loss": 0.5721, "step": 427 }, { "epoch": 0.05, "grad_norm": 1.2935279607772827, "learning_rate": 9.983970114501447e-06, "loss": 0.7433, "step": 428 }, { "epoch": 0.05, "grad_norm": 1.2527360916137695, "learning_rate": 9.983803661325282e-06, "loss": 0.6697, "step": 429 }, { "epoch": 0.06, "grad_norm": 1.7714976072311401, "learning_rate": 9.983636349790235e-06, "loss": 0.626, "step": 430 }, { "epoch": 0.06, "grad_norm": 1.2465953826904297, "learning_rate": 9.983468179925124e-06, "loss": 0.6589, "step": 431 }, { "epoch": 0.06, "grad_norm": 1.5122458934783936, "learning_rate": 9.983299151758912e-06, "loss": 0.5727, "step": 432 }, { "epoch": 0.06, "grad_norm": 1.4289777278900146, "learning_rate": 9.98312926532071e-06, "loss": 0.6082, "step": 433 }, { "epoch": 0.06, "grad_norm": 1.2806988954544067, "learning_rate": 9.982958520639778e-06, "loss": 0.6652, "step": 434 }, { "epoch": 0.06, "grad_norm": 1.4954502582550049, "learning_rate": 9.982786917745523e-06, "loss": 0.6851, "step": 435 }, { "epoch": 0.06, "grad_norm": 2.040862560272217, "learning_rate": 9.982614456667502e-06, "loss": 0.6674, "step": 436 }, { "epoch": 0.06, "grad_norm": 1.2818940877914429, "learning_rate": 9.982441137435414e-06, "loss": 0.6397, "step": 437 }, { "epoch": 0.06, "grad_norm": 1.2709026336669922, "learning_rate": 9.982266960079113e-06, "loss": 0.6375, "step": 438 }, { "epoch": 0.06, "grad_norm": 1.2646985054016113, "learning_rate": 9.982091924628596e-06, "loss": 0.7385, "step": 439 }, { "epoch": 0.06, "grad_norm": 1.0162150859832764, "learning_rate": 9.981916031114011e-06, "loss": 0.5205, "step": 440 }, { "epoch": 0.06, "grad_norm": 1.4943784475326538, "learning_rate": 9.98173927956565e-06, "loss": 0.6367, "step": 441 }, { "epoch": 0.06, "grad_norm": 1.2825793027877808, "learning_rate": 9.981561670013955e-06, "loss": 0.6305, "step": 442 }, { "epoch": 0.06, "grad_norm": 1.1641693115234375, "learning_rate": 9.981383202489517e-06, "loss": 0.6917, "step": 443 }, { "epoch": 0.06, "grad_norm": 1.4706101417541504, "learning_rate": 9.981203877023074e-06, "loss": 0.6046, "step": 444 }, { "epoch": 0.06, "grad_norm": 1.171685814857483, "learning_rate": 9.981023693645509e-06, "loss": 0.5857, "step": 445 }, { "epoch": 0.06, "grad_norm": 1.3791475296020508, "learning_rate": 9.980842652387855e-06, "loss": 0.542, "step": 446 }, { "epoch": 0.06, "grad_norm": 1.1576347351074219, "learning_rate": 9.980660753281296e-06, "loss": 0.6192, "step": 447 }, { "epoch": 0.06, "grad_norm": 1.4999419450759888, "learning_rate": 9.980477996357154e-06, "loss": 0.6426, "step": 448 }, { "epoch": 0.06, "grad_norm": 1.2739951610565186, "learning_rate": 9.980294381646912e-06, "loss": 0.6035, "step": 449 }, { "epoch": 0.06, "grad_norm": 1.450174331665039, "learning_rate": 9.98010990918219e-06, "loss": 0.667, "step": 450 }, { "epoch": 0.06, "grad_norm": 1.3379954099655151, "learning_rate": 9.979924578994761e-06, "loss": 0.6993, "step": 451 }, { "epoch": 0.06, "grad_norm": 1.5418367385864258, "learning_rate": 9.979738391116543e-06, "loss": 0.5721, "step": 452 }, { "epoch": 0.06, "grad_norm": 1.351285696029663, "learning_rate": 9.979551345579606e-06, "loss": 0.6084, "step": 453 }, { "epoch": 0.06, "grad_norm": 1.3503434658050537, "learning_rate": 9.97936344241616e-06, "loss": 0.7051, "step": 454 }, { "epoch": 0.06, "grad_norm": 1.6836271286010742, "learning_rate": 9.979174681658575e-06, "loss": 0.6324, "step": 455 }, { "epoch": 0.06, "grad_norm": 1.482871174812317, "learning_rate": 9.978985063339353e-06, "loss": 0.634, "step": 456 }, { "epoch": 0.06, "grad_norm": 1.6858736276626587, "learning_rate": 9.978794587491156e-06, "loss": 0.6554, "step": 457 }, { "epoch": 0.06, "grad_norm": 1.5458368062973022, "learning_rate": 9.97860325414679e-06, "loss": 0.599, "step": 458 }, { "epoch": 0.06, "grad_norm": 1.42569899559021, "learning_rate": 9.978411063339205e-06, "loss": 0.6472, "step": 459 }, { "epoch": 0.06, "grad_norm": 1.1632460355758667, "learning_rate": 9.978218015101508e-06, "loss": 0.651, "step": 460 }, { "epoch": 0.06, "grad_norm": 1.2948800325393677, "learning_rate": 9.978024109466942e-06, "loss": 0.5811, "step": 461 }, { "epoch": 0.06, "grad_norm": 2.991281509399414, "learning_rate": 9.977829346468906e-06, "loss": 0.6956, "step": 462 }, { "epoch": 0.06, "grad_norm": 1.260887622833252, "learning_rate": 9.977633726140942e-06, "loss": 0.6101, "step": 463 }, { "epoch": 0.06, "grad_norm": 1.1584416627883911, "learning_rate": 9.977437248516744e-06, "loss": 0.5652, "step": 464 }, { "epoch": 0.06, "grad_norm": 1.210117220878601, "learning_rate": 9.977239913630149e-06, "loss": 0.7002, "step": 465 }, { "epoch": 0.06, "grad_norm": 1.4283682107925415, "learning_rate": 9.977041721515146e-06, "loss": 0.6748, "step": 466 }, { "epoch": 0.06, "grad_norm": 1.113726258277893, "learning_rate": 9.976842672205868e-06, "loss": 0.6123, "step": 467 }, { "epoch": 0.06, "grad_norm": 1.1056057214736938, "learning_rate": 9.976642765736597e-06, "loss": 0.6603, "step": 468 }, { "epoch": 0.06, "grad_norm": 1.267026424407959, "learning_rate": 9.976442002141767e-06, "loss": 0.6771, "step": 469 }, { "epoch": 0.06, "grad_norm": 1.5912866592407227, "learning_rate": 9.976240381455949e-06, "loss": 0.6103, "step": 470 }, { "epoch": 0.06, "grad_norm": 1.5797346830368042, "learning_rate": 9.976037903713872e-06, "loss": 0.6114, "step": 471 }, { "epoch": 0.06, "grad_norm": 1.223134160041809, "learning_rate": 9.975834568950406e-06, "loss": 0.6158, "step": 472 }, { "epoch": 0.06, "grad_norm": 1.211577296257019, "learning_rate": 9.975630377200575e-06, "loss": 0.6709, "step": 473 }, { "epoch": 0.06, "grad_norm": 1.0716959238052368, "learning_rate": 9.975425328499546e-06, "loss": 0.6498, "step": 474 }, { "epoch": 0.06, "grad_norm": 1.189151644706726, "learning_rate": 9.975219422882631e-06, "loss": 0.6125, "step": 475 }, { "epoch": 0.06, "grad_norm": 1.2349804639816284, "learning_rate": 9.975012660385298e-06, "loss": 0.6524, "step": 476 }, { "epoch": 0.06, "grad_norm": 1.7489399909973145, "learning_rate": 9.974805041043154e-06, "loss": 0.7575, "step": 477 }, { "epoch": 0.06, "grad_norm": 1.7448736429214478, "learning_rate": 9.974596564891958e-06, "loss": 0.6242, "step": 478 }, { "epoch": 0.06, "grad_norm": 1.4200482368469238, "learning_rate": 9.974387231967618e-06, "loss": 0.7357, "step": 479 }, { "epoch": 0.06, "grad_norm": 1.3394062519073486, "learning_rate": 9.974177042306184e-06, "loss": 0.7014, "step": 480 }, { "epoch": 0.06, "grad_norm": 1.8164784908294678, "learning_rate": 9.973965995943857e-06, "loss": 0.745, "step": 481 }, { "epoch": 0.06, "grad_norm": 1.347047209739685, "learning_rate": 9.973754092916989e-06, "loss": 0.7665, "step": 482 }, { "epoch": 0.06, "grad_norm": 1.2569183111190796, "learning_rate": 9.973541333262073e-06, "loss": 0.604, "step": 483 }, { "epoch": 0.06, "grad_norm": 1.4116610288619995, "learning_rate": 9.973327717015753e-06, "loss": 0.6642, "step": 484 }, { "epoch": 0.06, "grad_norm": 1.4871090650558472, "learning_rate": 9.97311324421482e-06, "loss": 0.6648, "step": 485 }, { "epoch": 0.06, "grad_norm": 1.491804599761963, "learning_rate": 9.972897914896215e-06, "loss": 0.7223, "step": 486 }, { "epoch": 0.06, "grad_norm": 1.3295527696609497, "learning_rate": 9.97268172909702e-06, "loss": 0.6121, "step": 487 }, { "epoch": 0.06, "grad_norm": 1.0643301010131836, "learning_rate": 9.972464686854473e-06, "loss": 0.6677, "step": 488 }, { "epoch": 0.06, "grad_norm": 1.3908393383026123, "learning_rate": 9.97224678820595e-06, "loss": 0.616, "step": 489 }, { "epoch": 0.06, "grad_norm": 1.06988525390625, "learning_rate": 9.972028033188984e-06, "loss": 0.6551, "step": 490 }, { "epoch": 0.06, "grad_norm": 1.0374271869659424, "learning_rate": 9.97180842184125e-06, "loss": 0.7074, "step": 491 }, { "epoch": 0.06, "grad_norm": 1.0564719438552856, "learning_rate": 9.97158795420057e-06, "loss": 0.6798, "step": 492 }, { "epoch": 0.06, "grad_norm": 1.26225745677948, "learning_rate": 9.971366630304917e-06, "loss": 0.6457, "step": 493 }, { "epoch": 0.06, "grad_norm": 1.0807775259017944, "learning_rate": 9.971144450192408e-06, "loss": 0.588, "step": 494 }, { "epoch": 0.06, "grad_norm": 1.1355947256088257, "learning_rate": 9.97092141390131e-06, "loss": 0.7856, "step": 495 }, { "epoch": 0.06, "grad_norm": 1.3302438259124756, "learning_rate": 9.970697521470036e-06, "loss": 0.614, "step": 496 }, { "epoch": 0.06, "grad_norm": 1.4943525791168213, "learning_rate": 9.970472772937148e-06, "loss": 0.6594, "step": 497 }, { "epoch": 0.06, "grad_norm": 1.7916415929794312, "learning_rate": 9.970247168341352e-06, "loss": 0.68, "step": 498 }, { "epoch": 0.06, "grad_norm": 1.2984795570373535, "learning_rate": 9.970020707721505e-06, "loss": 0.676, "step": 499 }, { "epoch": 0.06, "grad_norm": 1.271872878074646, "learning_rate": 9.969793391116613e-06, "loss": 0.7291, "step": 500 }, { "epoch": 0.06, "grad_norm": 1.3915470838546753, "learning_rate": 9.969565218565823e-06, "loss": 0.6427, "step": 501 }, { "epoch": 0.06, "grad_norm": 1.4289871454238892, "learning_rate": 9.969336190108434e-06, "loss": 0.683, "step": 502 }, { "epoch": 0.06, "grad_norm": 1.4833756685256958, "learning_rate": 9.969106305783894e-06, "loss": 0.6101, "step": 503 }, { "epoch": 0.06, "grad_norm": 1.1719868183135986, "learning_rate": 9.96887556563179e-06, "loss": 0.611, "step": 504 }, { "epoch": 0.06, "grad_norm": 1.2994431257247925, "learning_rate": 9.968643969691869e-06, "loss": 0.6425, "step": 505 }, { "epoch": 0.06, "grad_norm": 1.4861727952957153, "learning_rate": 9.968411518004013e-06, "loss": 0.6118, "step": 506 }, { "epoch": 0.06, "grad_norm": 1.2328635454177856, "learning_rate": 9.968178210608261e-06, "loss": 0.5606, "step": 507 }, { "epoch": 0.07, "grad_norm": 1.6703741550445557, "learning_rate": 9.967944047544795e-06, "loss": 0.6405, "step": 508 }, { "epoch": 0.07, "grad_norm": 1.5043929815292358, "learning_rate": 9.967709028853943e-06, "loss": 0.6459, "step": 509 }, { "epoch": 0.07, "grad_norm": 1.377638339996338, "learning_rate": 9.967473154576185e-06, "loss": 0.6761, "step": 510 }, { "epoch": 0.07, "grad_norm": 1.2160335779190063, "learning_rate": 9.96723642475214e-06, "loss": 0.627, "step": 511 }, { "epoch": 0.07, "grad_norm": 1.4559895992279053, "learning_rate": 9.966998839422587e-06, "loss": 0.6755, "step": 512 }, { "epoch": 0.07, "grad_norm": 1.4201894998550415, "learning_rate": 9.966760398628441e-06, "loss": 0.6708, "step": 513 }, { "epoch": 0.07, "grad_norm": 1.3131368160247803, "learning_rate": 9.966521102410769e-06, "loss": 0.6067, "step": 514 }, { "epoch": 0.07, "grad_norm": 1.503257393836975, "learning_rate": 9.966280950810787e-06, "loss": 0.6305, "step": 515 }, { "epoch": 0.07, "grad_norm": 1.0093584060668945, "learning_rate": 9.966039943869853e-06, "loss": 0.5931, "step": 516 }, { "epoch": 0.07, "grad_norm": 1.2581803798675537, "learning_rate": 9.965798081629478e-06, "loss": 0.5784, "step": 517 }, { "epoch": 0.07, "grad_norm": 1.0922319889068604, "learning_rate": 9.965555364131316e-06, "loss": 0.6432, "step": 518 }, { "epoch": 0.07, "grad_norm": 1.2308220863342285, "learning_rate": 9.965311791417173e-06, "loss": 0.6207, "step": 519 }, { "epoch": 0.07, "grad_norm": 2.4796323776245117, "learning_rate": 9.965067363528996e-06, "loss": 0.697, "step": 520 }, { "epoch": 0.07, "grad_norm": 3.5409436225891113, "learning_rate": 9.964822080508884e-06, "loss": 0.5826, "step": 521 }, { "epoch": 0.07, "grad_norm": 2.550527811050415, "learning_rate": 9.964575942399085e-06, "loss": 0.6782, "step": 522 }, { "epoch": 0.07, "grad_norm": 1.4845041036605835, "learning_rate": 9.964328949241987e-06, "loss": 0.6793, "step": 523 }, { "epoch": 0.07, "grad_norm": 1.7661019563674927, "learning_rate": 9.964081101080133e-06, "loss": 0.6368, "step": 524 }, { "epoch": 0.07, "grad_norm": 1.430008053779602, "learning_rate": 9.963832397956206e-06, "loss": 0.6865, "step": 525 }, { "epoch": 0.07, "grad_norm": 1.7027018070220947, "learning_rate": 9.963582839913042e-06, "loss": 0.6343, "step": 526 }, { "epoch": 0.07, "grad_norm": 1.6025042533874512, "learning_rate": 9.963332426993623e-06, "loss": 0.6292, "step": 527 }, { "epoch": 0.07, "grad_norm": 1.252992033958435, "learning_rate": 9.963081159241077e-06, "loss": 0.682, "step": 528 }, { "epoch": 0.07, "grad_norm": 1.218106746673584, "learning_rate": 9.96282903669868e-06, "loss": 0.5634, "step": 529 }, { "epoch": 0.07, "grad_norm": 1.6767704486846924, "learning_rate": 9.962576059409854e-06, "loss": 0.6234, "step": 530 }, { "epoch": 0.07, "grad_norm": 1.111166000366211, "learning_rate": 9.96232222741817e-06, "loss": 0.6472, "step": 531 }, { "epoch": 0.07, "grad_norm": 1.2849923372268677, "learning_rate": 9.962067540767345e-06, "loss": 0.5941, "step": 532 }, { "epoch": 0.07, "grad_norm": 5.2195000648498535, "learning_rate": 9.961811999501245e-06, "loss": 0.5667, "step": 533 }, { "epoch": 0.07, "grad_norm": 1.106995701789856, "learning_rate": 9.96155560366388e-06, "loss": 0.6523, "step": 534 }, { "epoch": 0.07, "grad_norm": 2.545809268951416, "learning_rate": 9.96129835329941e-06, "loss": 0.5382, "step": 535 }, { "epoch": 0.07, "grad_norm": 1.297607183456421, "learning_rate": 9.96104024845214e-06, "loss": 0.545, "step": 536 }, { "epoch": 0.07, "grad_norm": 1.8389314413070679, "learning_rate": 9.960781289166524e-06, "loss": 0.7501, "step": 537 }, { "epoch": 0.07, "grad_norm": 1.588292121887207, "learning_rate": 9.960521475487164e-06, "loss": 0.5668, "step": 538 }, { "epoch": 0.07, "grad_norm": 1.2533057928085327, "learning_rate": 9.960260807458806e-06, "loss": 0.6534, "step": 539 }, { "epoch": 0.07, "grad_norm": 1.1930246353149414, "learning_rate": 9.959999285126344e-06, "loss": 0.615, "step": 540 }, { "epoch": 0.07, "grad_norm": 1.2591867446899414, "learning_rate": 9.959736908534821e-06, "loss": 0.676, "step": 541 }, { "epoch": 0.07, "grad_norm": 1.2490293979644775, "learning_rate": 9.959473677729426e-06, "loss": 0.7228, "step": 542 }, { "epoch": 0.07, "grad_norm": 0.9905379414558411, "learning_rate": 9.959209592755495e-06, "loss": 0.6434, "step": 543 }, { "epoch": 0.07, "grad_norm": 1.484781265258789, "learning_rate": 9.958944653658513e-06, "loss": 0.7403, "step": 544 }, { "epoch": 0.07, "grad_norm": 1.3250967264175415, "learning_rate": 9.958678860484106e-06, "loss": 0.7106, "step": 545 }, { "epoch": 0.07, "grad_norm": 1.2482023239135742, "learning_rate": 9.958412213278057e-06, "loss": 0.6753, "step": 546 }, { "epoch": 0.07, "grad_norm": 1.0016635656356812, "learning_rate": 9.958144712086286e-06, "loss": 0.6783, "step": 547 }, { "epoch": 0.07, "grad_norm": 1.143125057220459, "learning_rate": 9.957876356954867e-06, "loss": 0.7045, "step": 548 }, { "epoch": 0.07, "grad_norm": 1.036476969718933, "learning_rate": 9.957607147930017e-06, "loss": 0.6173, "step": 549 }, { "epoch": 0.07, "grad_norm": 1.2345410585403442, "learning_rate": 9.957337085058103e-06, "loss": 0.6915, "step": 550 }, { "epoch": 0.07, "grad_norm": 1.4582602977752686, "learning_rate": 9.957066168385639e-06, "loss": 0.6074, "step": 551 }, { "epoch": 0.07, "grad_norm": 1.2883025407791138, "learning_rate": 9.956794397959283e-06, "loss": 0.6289, "step": 552 }, { "epoch": 0.07, "grad_norm": 1.3238660097122192, "learning_rate": 9.956521773825842e-06, "loss": 0.6089, "step": 553 }, { "epoch": 0.07, "grad_norm": 1.081383228302002, "learning_rate": 9.956248296032271e-06, "loss": 0.6748, "step": 554 }, { "epoch": 0.07, "grad_norm": 1.2733744382858276, "learning_rate": 9.955973964625671e-06, "loss": 0.6841, "step": 555 }, { "epoch": 0.07, "grad_norm": 1.1720517873764038, "learning_rate": 9.95569877965329e-06, "loss": 0.6472, "step": 556 }, { "epoch": 0.07, "grad_norm": 1.179827094078064, "learning_rate": 9.955422741162521e-06, "loss": 0.6, "step": 557 }, { "epoch": 0.07, "grad_norm": 1.3065040111541748, "learning_rate": 9.95514584920091e-06, "loss": 0.6316, "step": 558 }, { "epoch": 0.07, "grad_norm": 1.4125103950500488, "learning_rate": 9.95486810381614e-06, "loss": 0.6008, "step": 559 }, { "epoch": 0.07, "grad_norm": 1.5217939615249634, "learning_rate": 9.954589505056054e-06, "loss": 0.704, "step": 560 }, { "epoch": 0.07, "grad_norm": 1.4721038341522217, "learning_rate": 9.95431005296863e-06, "loss": 0.6193, "step": 561 }, { "epoch": 0.07, "grad_norm": 1.0634781122207642, "learning_rate": 9.954029747601999e-06, "loss": 0.5784, "step": 562 }, { "epoch": 0.07, "grad_norm": 1.4376946687698364, "learning_rate": 9.95374858900444e-06, "loss": 0.6244, "step": 563 }, { "epoch": 0.07, "grad_norm": 1.3898831605911255, "learning_rate": 9.953466577224374e-06, "loss": 0.6728, "step": 564 }, { "epoch": 0.07, "grad_norm": 1.651649832725525, "learning_rate": 9.953183712310373e-06, "loss": 0.5602, "step": 565 }, { "epoch": 0.07, "grad_norm": 2.1175930500030518, "learning_rate": 9.952899994311153e-06, "loss": 0.6814, "step": 566 }, { "epoch": 0.07, "grad_norm": 1.2130919694900513, "learning_rate": 9.952615423275584e-06, "loss": 0.6513, "step": 567 }, { "epoch": 0.07, "grad_norm": 1.3236795663833618, "learning_rate": 9.952329999252673e-06, "loss": 0.602, "step": 568 }, { "epoch": 0.07, "grad_norm": 2.570998191833496, "learning_rate": 9.952043722291577e-06, "loss": 0.7349, "step": 569 }, { "epoch": 0.07, "grad_norm": 1.0845404863357544, "learning_rate": 9.951756592441606e-06, "loss": 0.6471, "step": 570 }, { "epoch": 0.07, "grad_norm": 1.2199392318725586, "learning_rate": 9.95146860975221e-06, "loss": 0.6956, "step": 571 }, { "epoch": 0.07, "grad_norm": 1.1437824964523315, "learning_rate": 9.951179774272988e-06, "loss": 0.5707, "step": 572 }, { "epoch": 0.07, "grad_norm": 1.277314305305481, "learning_rate": 9.950890086053686e-06, "loss": 0.6485, "step": 573 }, { "epoch": 0.07, "grad_norm": 1.1325360536575317, "learning_rate": 9.9505995451442e-06, "loss": 0.574, "step": 574 }, { "epoch": 0.07, "grad_norm": 1.1564849615097046, "learning_rate": 9.950308151594563e-06, "loss": 0.595, "step": 575 }, { "epoch": 0.07, "grad_norm": 1.2639933824539185, "learning_rate": 9.950015905454969e-06, "loss": 0.712, "step": 576 }, { "epoch": 0.07, "grad_norm": 2.110800266265869, "learning_rate": 9.949722806775746e-06, "loss": 0.6254, "step": 577 }, { "epoch": 0.07, "grad_norm": 1.1644377708435059, "learning_rate": 9.949428855607377e-06, "loss": 0.6832, "step": 578 }, { "epoch": 0.07, "grad_norm": 1.162166953086853, "learning_rate": 9.949134052000488e-06, "loss": 0.742, "step": 579 }, { "epoch": 0.07, "grad_norm": 1.1934207677841187, "learning_rate": 9.948838396005854e-06, "loss": 0.7117, "step": 580 }, { "epoch": 0.07, "grad_norm": 1.3645777702331543, "learning_rate": 9.948541887674396e-06, "loss": 0.6821, "step": 581 }, { "epoch": 0.07, "grad_norm": 1.4982502460479736, "learning_rate": 9.94824452705718e-06, "loss": 0.654, "step": 582 }, { "epoch": 0.07, "grad_norm": 4.82792329788208, "learning_rate": 9.947946314205421e-06, "loss": 0.6821, "step": 583 }, { "epoch": 0.07, "grad_norm": 1.4962670803070068, "learning_rate": 9.94764724917048e-06, "loss": 0.6436, "step": 584 }, { "epoch": 0.07, "grad_norm": 1.179075837135315, "learning_rate": 9.947347332003865e-06, "loss": 0.6445, "step": 585 }, { "epoch": 0.08, "grad_norm": 1.9640148878097534, "learning_rate": 9.94704656275723e-06, "loss": 0.6306, "step": 586 }, { "epoch": 0.08, "grad_norm": 1.0042561292648315, "learning_rate": 9.946744941482379e-06, "loss": 0.713, "step": 587 }, { "epoch": 0.08, "grad_norm": 1.8437522649765015, "learning_rate": 9.946442468231257e-06, "loss": 0.7496, "step": 588 }, { "epoch": 0.08, "grad_norm": 1.3848373889923096, "learning_rate": 9.94613914305596e-06, "loss": 0.6736, "step": 589 }, { "epoch": 0.08, "grad_norm": 1.5635427236557007, "learning_rate": 9.94583496600873e-06, "loss": 0.6535, "step": 590 }, { "epoch": 0.08, "grad_norm": 1.3483617305755615, "learning_rate": 9.945529937141953e-06, "loss": 0.616, "step": 591 }, { "epoch": 0.08, "grad_norm": 1.2669429779052734, "learning_rate": 9.945224056508168e-06, "loss": 0.6904, "step": 592 }, { "epoch": 0.08, "grad_norm": 1.7648189067840576, "learning_rate": 9.944917324160055e-06, "loss": 0.6421, "step": 593 }, { "epoch": 0.08, "grad_norm": 1.4274234771728516, "learning_rate": 9.944609740150441e-06, "loss": 0.6098, "step": 594 }, { "epoch": 0.08, "grad_norm": 1.2724729776382446, "learning_rate": 9.944301304532302e-06, "loss": 0.5873, "step": 595 }, { "epoch": 0.08, "grad_norm": 1.306372880935669, "learning_rate": 9.943992017358761e-06, "loss": 0.6709, "step": 596 }, { "epoch": 0.08, "grad_norm": 1.3919581174850464, "learning_rate": 9.943681878683085e-06, "loss": 0.6482, "step": 597 }, { "epoch": 0.08, "grad_norm": 1.333129644393921, "learning_rate": 9.94337088855869e-06, "loss": 0.7064, "step": 598 }, { "epoch": 0.08, "grad_norm": 1.256874680519104, "learning_rate": 9.943059047039137e-06, "loss": 0.6856, "step": 599 }, { "epoch": 0.08, "grad_norm": 1.1219730377197266, "learning_rate": 9.942746354178133e-06, "loss": 0.6434, "step": 600 }, { "epoch": 0.08, "grad_norm": 1.3746181726455688, "learning_rate": 9.942432810029536e-06, "loss": 0.6124, "step": 601 }, { "epoch": 0.08, "grad_norm": 1.2980557680130005, "learning_rate": 9.942118414647346e-06, "loss": 0.6688, "step": 602 }, { "epoch": 0.08, "grad_norm": 1.4423713684082031, "learning_rate": 9.941803168085711e-06, "loss": 0.6483, "step": 603 }, { "epoch": 0.08, "grad_norm": 1.7107197046279907, "learning_rate": 9.941487070398928e-06, "loss": 0.6004, "step": 604 }, { "epoch": 0.08, "grad_norm": 1.115032434463501, "learning_rate": 9.941170121641434e-06, "loss": 0.5836, "step": 605 }, { "epoch": 0.08, "grad_norm": 1.354966402053833, "learning_rate": 9.940852321867821e-06, "loss": 0.6629, "step": 606 }, { "epoch": 0.08, "grad_norm": 1.070405125617981, "learning_rate": 9.940533671132821e-06, "loss": 0.6276, "step": 607 }, { "epoch": 0.08, "grad_norm": 1.133563756942749, "learning_rate": 9.940214169491318e-06, "loss": 0.715, "step": 608 }, { "epoch": 0.08, "grad_norm": 1.1300113201141357, "learning_rate": 9.939893816998337e-06, "loss": 0.6603, "step": 609 }, { "epoch": 0.08, "grad_norm": 1.367214322090149, "learning_rate": 9.939572613709052e-06, "loss": 0.6713, "step": 610 }, { "epoch": 0.08, "grad_norm": 1.45294988155365, "learning_rate": 9.939250559678787e-06, "loss": 0.579, "step": 611 }, { "epoch": 0.08, "grad_norm": 1.3254419565200806, "learning_rate": 9.938927654963007e-06, "loss": 0.579, "step": 612 }, { "epoch": 0.08, "grad_norm": 1.1809929609298706, "learning_rate": 9.938603899617323e-06, "loss": 0.6424, "step": 613 }, { "epoch": 0.08, "grad_norm": 1.836148977279663, "learning_rate": 9.9382792936975e-06, "loss": 0.598, "step": 614 }, { "epoch": 0.08, "grad_norm": 1.1036103963851929, "learning_rate": 9.937953837259444e-06, "loss": 0.5946, "step": 615 }, { "epoch": 0.08, "grad_norm": 1.2749247550964355, "learning_rate": 9.937627530359206e-06, "loss": 0.6951, "step": 616 }, { "epoch": 0.08, "grad_norm": 1.2078628540039062, "learning_rate": 9.937300373052987e-06, "loss": 0.6501, "step": 617 }, { "epoch": 0.08, "grad_norm": 1.239443302154541, "learning_rate": 9.936972365397133e-06, "loss": 0.6449, "step": 618 }, { "epoch": 0.08, "grad_norm": 1.7138959169387817, "learning_rate": 9.936643507448138e-06, "loss": 0.714, "step": 619 }, { "epoch": 0.08, "grad_norm": 1.0761655569076538, "learning_rate": 9.93631379926264e-06, "loss": 0.6237, "step": 620 }, { "epoch": 0.08, "grad_norm": 1.2124683856964111, "learning_rate": 9.935983240897422e-06, "loss": 0.6217, "step": 621 }, { "epoch": 0.08, "grad_norm": 1.4442805051803589, "learning_rate": 9.93565183240942e-06, "loss": 0.6459, "step": 622 }, { "epoch": 0.08, "grad_norm": 1.2149344682693481, "learning_rate": 9.935319573855711e-06, "loss": 0.6465, "step": 623 }, { "epoch": 0.08, "grad_norm": 1.1919761896133423, "learning_rate": 9.934986465293518e-06, "loss": 0.5959, "step": 624 }, { "epoch": 0.08, "grad_norm": 1.3023431301116943, "learning_rate": 9.934652506780214e-06, "loss": 0.6582, "step": 625 }, { "epoch": 0.08, "grad_norm": 1.142003059387207, "learning_rate": 9.934317698373317e-06, "loss": 0.6515, "step": 626 }, { "epoch": 0.08, "grad_norm": 1.3569769859313965, "learning_rate": 9.93398204013049e-06, "loss": 0.7018, "step": 627 }, { "epoch": 0.08, "grad_norm": 1.1718806028366089, "learning_rate": 9.933645532109544e-06, "loss": 0.5533, "step": 628 }, { "epoch": 0.08, "grad_norm": 1.1356302499771118, "learning_rate": 9.933308174368434e-06, "loss": 0.6077, "step": 629 }, { "epoch": 0.08, "grad_norm": 1.1580795049667358, "learning_rate": 9.932969966965267e-06, "loss": 0.6484, "step": 630 }, { "epoch": 0.08, "grad_norm": 1.378556728363037, "learning_rate": 9.932630909958287e-06, "loss": 0.6518, "step": 631 }, { "epoch": 0.08, "grad_norm": 1.0749226808547974, "learning_rate": 9.932291003405893e-06, "loss": 0.6614, "step": 632 }, { "epoch": 0.08, "grad_norm": 1.4288339614868164, "learning_rate": 9.931950247366625e-06, "loss": 0.7169, "step": 633 }, { "epoch": 0.08, "grad_norm": 2.120861291885376, "learning_rate": 9.931608641899172e-06, "loss": 0.6148, "step": 634 }, { "epoch": 0.08, "grad_norm": 1.725447654724121, "learning_rate": 9.931266187062372e-06, "loss": 0.5805, "step": 635 }, { "epoch": 0.08, "grad_norm": 1.3710678815841675, "learning_rate": 9.930922882915201e-06, "loss": 0.6566, "step": 636 }, { "epoch": 0.08, "grad_norm": 1.3103550672531128, "learning_rate": 9.93057872951679e-06, "loss": 0.6861, "step": 637 }, { "epoch": 0.08, "grad_norm": 1.3011488914489746, "learning_rate": 9.93023372692641e-06, "loss": 0.6222, "step": 638 }, { "epoch": 0.08, "grad_norm": 1.2335342168807983, "learning_rate": 9.92988787520348e-06, "loss": 0.7169, "step": 639 }, { "epoch": 0.08, "grad_norm": 1.844463586807251, "learning_rate": 9.929541174407568e-06, "loss": 0.6041, "step": 640 }, { "epoch": 0.08, "grad_norm": 1.6094704866409302, "learning_rate": 9.929193624598386e-06, "loss": 0.6485, "step": 641 }, { "epoch": 0.08, "grad_norm": 1.2318650484085083, "learning_rate": 9.928845225835791e-06, "loss": 0.7301, "step": 642 }, { "epoch": 0.08, "grad_norm": 1.3937469720840454, "learning_rate": 9.928495978179788e-06, "loss": 0.658, "step": 643 }, { "epoch": 0.08, "grad_norm": 1.3361302614212036, "learning_rate": 9.928145881690527e-06, "loss": 0.6271, "step": 644 }, { "epoch": 0.08, "grad_norm": 1.590607762336731, "learning_rate": 9.927794936428308e-06, "loss": 0.6345, "step": 645 }, { "epoch": 0.08, "grad_norm": 1.0950250625610352, "learning_rate": 9.927443142453573e-06, "loss": 0.5692, "step": 646 }, { "epoch": 0.08, "grad_norm": 3.009345531463623, "learning_rate": 9.92709049982691e-06, "loss": 0.6775, "step": 647 }, { "epoch": 0.08, "grad_norm": 1.5916540622711182, "learning_rate": 9.926737008609055e-06, "loss": 0.6645, "step": 648 }, { "epoch": 0.08, "grad_norm": 1.0630630254745483, "learning_rate": 9.92638266886089e-06, "loss": 0.596, "step": 649 }, { "epoch": 0.08, "grad_norm": 1.2478364706039429, "learning_rate": 9.926027480643442e-06, "loss": 0.6118, "step": 650 }, { "epoch": 0.08, "grad_norm": 1.2530955076217651, "learning_rate": 9.925671444017887e-06, "loss": 0.658, "step": 651 }, { "epoch": 0.08, "grad_norm": 1.3152321577072144, "learning_rate": 9.925314559045543e-06, "loss": 0.6898, "step": 652 }, { "epoch": 0.08, "grad_norm": 1.2925292253494263, "learning_rate": 9.924956825787877e-06, "loss": 0.6682, "step": 653 }, { "epoch": 0.08, "grad_norm": 1.472712755203247, "learning_rate": 9.924598244306502e-06, "loss": 0.6382, "step": 654 }, { "epoch": 0.08, "grad_norm": 1.6111242771148682, "learning_rate": 9.924238814663174e-06, "loss": 0.5262, "step": 655 }, { "epoch": 0.08, "grad_norm": 1.210485816001892, "learning_rate": 9.9238785369198e-06, "loss": 0.6865, "step": 656 }, { "epoch": 0.08, "grad_norm": 1.3140826225280762, "learning_rate": 9.92351741113843e-06, "loss": 0.6448, "step": 657 }, { "epoch": 0.08, "grad_norm": 1.125855803489685, "learning_rate": 9.92315543738126e-06, "loss": 0.6277, "step": 658 }, { "epoch": 0.08, "grad_norm": 0.9021656513214111, "learning_rate": 9.922792615710632e-06, "loss": 0.6673, "step": 659 }, { "epoch": 0.08, "grad_norm": 1.304652452468872, "learning_rate": 9.922428946189038e-06, "loss": 0.6477, "step": 660 }, { "epoch": 0.08, "grad_norm": 1.2016267776489258, "learning_rate": 9.922064428879108e-06, "loss": 0.7264, "step": 661 }, { "epoch": 0.08, "grad_norm": 1.3767685890197754, "learning_rate": 9.921699063843624e-06, "loss": 0.6343, "step": 662 }, { "epoch": 0.08, "grad_norm": 1.5952433347702026, "learning_rate": 9.921332851145516e-06, "loss": 0.6332, "step": 663 }, { "epoch": 0.09, "grad_norm": 1.1251158714294434, "learning_rate": 9.920965790847852e-06, "loss": 0.6287, "step": 664 }, { "epoch": 0.09, "grad_norm": 1.7617466449737549, "learning_rate": 9.920597883013854e-06, "loss": 0.6315, "step": 665 }, { "epoch": 0.09, "grad_norm": 1.5821009874343872, "learning_rate": 9.920229127706884e-06, "loss": 0.5471, "step": 666 }, { "epoch": 0.09, "grad_norm": 1.1487936973571777, "learning_rate": 9.919859524990456e-06, "loss": 0.7097, "step": 667 }, { "epoch": 0.09, "grad_norm": 1.3274495601654053, "learning_rate": 9.919489074928223e-06, "loss": 0.5928, "step": 668 }, { "epoch": 0.09, "grad_norm": 1.1028430461883545, "learning_rate": 9.91911777758399e-06, "loss": 0.6415, "step": 669 }, { "epoch": 0.09, "grad_norm": 1.2603744268417358, "learning_rate": 9.918745633021706e-06, "loss": 0.7122, "step": 670 }, { "epoch": 0.09, "grad_norm": 1.1494251489639282, "learning_rate": 9.918372641305463e-06, "loss": 0.6002, "step": 671 }, { "epoch": 0.09, "grad_norm": 1.1268144845962524, "learning_rate": 9.917998802499502e-06, "loss": 0.677, "step": 672 }, { "epoch": 0.09, "grad_norm": 1.5660556554794312, "learning_rate": 9.91762411666821e-06, "loss": 0.6431, "step": 673 }, { "epoch": 0.09, "grad_norm": 1.1017109155654907, "learning_rate": 9.917248583876116e-06, "loss": 0.7173, "step": 674 }, { "epoch": 0.09, "grad_norm": 1.498296856880188, "learning_rate": 9.916872204187902e-06, "loss": 0.6641, "step": 675 }, { "epoch": 0.09, "grad_norm": 1.0133637189865112, "learning_rate": 9.91649497766839e-06, "loss": 0.6279, "step": 676 }, { "epoch": 0.09, "grad_norm": 1.355507731437683, "learning_rate": 9.91611690438255e-06, "loss": 0.5455, "step": 677 }, { "epoch": 0.09, "grad_norm": 1.849907636642456, "learning_rate": 9.915737984395498e-06, "loss": 0.7237, "step": 678 }, { "epoch": 0.09, "grad_norm": 1.1791698932647705, "learning_rate": 9.915358217772491e-06, "loss": 0.6523, "step": 679 }, { "epoch": 0.09, "grad_norm": 1.3907570838928223, "learning_rate": 9.91497760457894e-06, "loss": 0.6599, "step": 680 }, { "epoch": 0.09, "grad_norm": 1.5050311088562012, "learning_rate": 9.914596144880399e-06, "loss": 0.6543, "step": 681 }, { "epoch": 0.09, "grad_norm": 1.2648143768310547, "learning_rate": 9.914213838742566e-06, "loss": 0.5308, "step": 682 }, { "epoch": 0.09, "grad_norm": 1.1902687549591064, "learning_rate": 9.913830686231281e-06, "loss": 0.6256, "step": 683 }, { "epoch": 0.09, "grad_norm": 1.0023192167282104, "learning_rate": 9.913446687412541e-06, "loss": 0.6192, "step": 684 }, { "epoch": 0.09, "grad_norm": 1.0021028518676758, "learning_rate": 9.913061842352475e-06, "loss": 0.6225, "step": 685 }, { "epoch": 0.09, "grad_norm": 2.0987367630004883, "learning_rate": 9.912676151117371e-06, "loss": 0.6478, "step": 686 }, { "epoch": 0.09, "grad_norm": 1.1188229322433472, "learning_rate": 9.912289613773653e-06, "loss": 0.5543, "step": 687 }, { "epoch": 0.09, "grad_norm": 1.3304802179336548, "learning_rate": 9.911902230387896e-06, "loss": 0.6396, "step": 688 }, { "epoch": 0.09, "grad_norm": 1.0771523714065552, "learning_rate": 9.911514001026816e-06, "loss": 0.6503, "step": 689 }, { "epoch": 0.09, "grad_norm": 1.0926355123519897, "learning_rate": 9.911124925757282e-06, "loss": 0.6256, "step": 690 }, { "epoch": 0.09, "grad_norm": 1.1526925563812256, "learning_rate": 9.9107350046463e-06, "loss": 0.6386, "step": 691 }, { "epoch": 0.09, "grad_norm": 1.0600298643112183, "learning_rate": 9.91034423776103e-06, "loss": 0.6494, "step": 692 }, { "epoch": 0.09, "grad_norm": 1.321643352508545, "learning_rate": 9.90995262516877e-06, "loss": 0.5935, "step": 693 }, { "epoch": 0.09, "grad_norm": 1.1695914268493652, "learning_rate": 9.909560166936968e-06, "loss": 0.7052, "step": 694 }, { "epoch": 0.09, "grad_norm": 1.3552606105804443, "learning_rate": 9.909166863133218e-06, "loss": 0.6771, "step": 695 }, { "epoch": 0.09, "grad_norm": 1.070600152015686, "learning_rate": 9.908772713825259e-06, "loss": 0.6489, "step": 696 }, { "epoch": 0.09, "grad_norm": 1.541309118270874, "learning_rate": 9.908377719080976e-06, "loss": 0.7063, "step": 697 }, { "epoch": 0.09, "grad_norm": 1.2539750337600708, "learning_rate": 9.907981878968395e-06, "loss": 0.7322, "step": 698 }, { "epoch": 0.09, "grad_norm": 1.3420991897583008, "learning_rate": 9.907585193555696e-06, "loss": 0.5876, "step": 699 }, { "epoch": 0.09, "grad_norm": 5.159241676330566, "learning_rate": 9.907187662911195e-06, "loss": 0.6435, "step": 700 }, { "epoch": 0.09, "grad_norm": 1.334041953086853, "learning_rate": 9.906789287103364e-06, "loss": 0.67, "step": 701 }, { "epoch": 0.09, "grad_norm": 1.2002819776535034, "learning_rate": 9.90639006620081e-06, "loss": 0.6775, "step": 702 }, { "epoch": 0.09, "grad_norm": 1.6984732151031494, "learning_rate": 9.905990000272295e-06, "loss": 0.6121, "step": 703 }, { "epoch": 0.09, "grad_norm": 1.382564663887024, "learning_rate": 9.90558908938672e-06, "loss": 0.6936, "step": 704 }, { "epoch": 0.09, "grad_norm": 1.2500004768371582, "learning_rate": 9.905187333613134e-06, "loss": 0.6399, "step": 705 }, { "epoch": 0.09, "grad_norm": 3.314711332321167, "learning_rate": 9.904784733020732e-06, "loss": 0.639, "step": 706 }, { "epoch": 0.09, "grad_norm": 1.5994131565093994, "learning_rate": 9.904381287678853e-06, "loss": 0.5754, "step": 707 }, { "epoch": 0.09, "grad_norm": 1.1765432357788086, "learning_rate": 9.903976997656982e-06, "loss": 0.5971, "step": 708 }, { "epoch": 0.09, "grad_norm": 1.9509575366973877, "learning_rate": 9.903571863024752e-06, "loss": 0.6634, "step": 709 }, { "epoch": 0.09, "grad_norm": 1.1447938680648804, "learning_rate": 9.903165883851936e-06, "loss": 0.6595, "step": 710 }, { "epoch": 0.09, "grad_norm": 1.166501760482788, "learning_rate": 9.902759060208456e-06, "loss": 0.6014, "step": 711 }, { "epoch": 0.09, "grad_norm": 1.1991925239562988, "learning_rate": 9.90235139216438e-06, "loss": 0.6136, "step": 712 }, { "epoch": 0.09, "grad_norm": 1.2937411069869995, "learning_rate": 9.901942879789923e-06, "loss": 0.5599, "step": 713 }, { "epoch": 0.09, "grad_norm": 1.0895254611968994, "learning_rate": 9.90153352315544e-06, "loss": 0.6231, "step": 714 }, { "epoch": 0.09, "grad_norm": 1.2510082721710205, "learning_rate": 9.901123322331434e-06, "loss": 0.6087, "step": 715 }, { "epoch": 0.09, "grad_norm": 0.9192419648170471, "learning_rate": 9.900712277388556e-06, "loss": 0.5165, "step": 716 }, { "epoch": 0.09, "grad_norm": 1.6960457563400269, "learning_rate": 9.900300388397597e-06, "loss": 0.7067, "step": 717 }, { "epoch": 0.09, "grad_norm": 1.0721408128738403, "learning_rate": 9.899887655429501e-06, "loss": 0.5991, "step": 718 }, { "epoch": 0.09, "grad_norm": 1.2546442747116089, "learning_rate": 9.89947407855535e-06, "loss": 0.5862, "step": 719 }, { "epoch": 0.09, "grad_norm": 1.3291999101638794, "learning_rate": 9.899059657846373e-06, "loss": 0.6478, "step": 720 }, { "epoch": 0.09, "grad_norm": 1.157029628753662, "learning_rate": 9.898644393373946e-06, "loss": 0.6432, "step": 721 }, { "epoch": 0.09, "grad_norm": 1.4094780683517456, "learning_rate": 9.898228285209593e-06, "loss": 0.582, "step": 722 }, { "epoch": 0.09, "grad_norm": 1.2219111919403076, "learning_rate": 9.897811333424977e-06, "loss": 0.5938, "step": 723 }, { "epoch": 0.09, "grad_norm": 1.2780520915985107, "learning_rate": 9.897393538091912e-06, "loss": 0.5572, "step": 724 }, { "epoch": 0.09, "grad_norm": 2.0696425437927246, "learning_rate": 9.896974899282352e-06, "loss": 0.6494, "step": 725 }, { "epoch": 0.09, "grad_norm": 1.4121901988983154, "learning_rate": 9.896555417068401e-06, "loss": 0.6419, "step": 726 }, { "epoch": 0.09, "grad_norm": 1.3450160026550293, "learning_rate": 9.896135091522305e-06, "loss": 0.7238, "step": 727 }, { "epoch": 0.09, "grad_norm": 1.0523320436477661, "learning_rate": 9.895713922716457e-06, "loss": 0.5947, "step": 728 }, { "epoch": 0.09, "grad_norm": 1.5163980722427368, "learning_rate": 9.895291910723396e-06, "loss": 0.5785, "step": 729 }, { "epoch": 0.09, "grad_norm": 1.4099388122558594, "learning_rate": 9.894869055615803e-06, "loss": 0.5904, "step": 730 }, { "epoch": 0.09, "grad_norm": 0.9844495058059692, "learning_rate": 9.894445357466507e-06, "loss": 0.6382, "step": 731 }, { "epoch": 0.09, "grad_norm": 1.2758938074111938, "learning_rate": 9.894020816348483e-06, "loss": 0.6405, "step": 732 }, { "epoch": 0.09, "grad_norm": 1.246328353881836, "learning_rate": 9.893595432334846e-06, "loss": 0.6006, "step": 733 }, { "epoch": 0.09, "grad_norm": 1.2072193622589111, "learning_rate": 9.893169205498864e-06, "loss": 0.6452, "step": 734 }, { "epoch": 0.09, "grad_norm": 1.2706694602966309, "learning_rate": 9.892742135913942e-06, "loss": 0.619, "step": 735 }, { "epoch": 0.09, "grad_norm": 1.2876821756362915, "learning_rate": 9.892314223653636e-06, "loss": 0.6796, "step": 736 }, { "epoch": 0.09, "grad_norm": 1.3346693515777588, "learning_rate": 9.891885468791645e-06, "loss": 0.6494, "step": 737 }, { "epoch": 0.09, "grad_norm": 1.2320876121520996, "learning_rate": 9.891455871401814e-06, "loss": 0.6095, "step": 738 }, { "epoch": 0.09, "grad_norm": 1.2823511362075806, "learning_rate": 9.89102543155813e-06, "loss": 0.7429, "step": 739 }, { "epoch": 0.09, "grad_norm": 1.3542308807373047, "learning_rate": 9.890594149334731e-06, "loss": 0.6251, "step": 740 }, { "epoch": 0.09, "grad_norm": 2.2784230709075928, "learning_rate": 9.890162024805895e-06, "loss": 0.693, "step": 741 }, { "epoch": 0.1, "grad_norm": 1.167242169380188, "learning_rate": 9.889729058046045e-06, "loss": 0.6828, "step": 742 }, { "epoch": 0.1, "grad_norm": 1.067370891571045, "learning_rate": 9.889295249129754e-06, "loss": 0.5998, "step": 743 }, { "epoch": 0.1, "grad_norm": 1.2341341972351074, "learning_rate": 9.888860598131733e-06, "loss": 0.5873, "step": 744 }, { "epoch": 0.1, "grad_norm": 1.1772515773773193, "learning_rate": 9.888425105126845e-06, "loss": 0.6183, "step": 745 }, { "epoch": 0.1, "grad_norm": 0.9825636744499207, "learning_rate": 9.887988770190093e-06, "loss": 0.6205, "step": 746 }, { "epoch": 0.1, "grad_norm": 1.7665997743606567, "learning_rate": 9.887551593396628e-06, "loss": 0.6367, "step": 747 }, { "epoch": 0.1, "grad_norm": 1.423568606376648, "learning_rate": 9.887113574821743e-06, "loss": 0.635, "step": 748 }, { "epoch": 0.1, "grad_norm": 1.1755050420761108, "learning_rate": 9.88667471454088e-06, "loss": 0.5415, "step": 749 }, { "epoch": 0.1, "grad_norm": 1.1046626567840576, "learning_rate": 9.886235012629623e-06, "loss": 0.6031, "step": 750 }, { "epoch": 0.1, "grad_norm": 1.0809575319290161, "learning_rate": 9.885794469163702e-06, "loss": 0.6192, "step": 751 }, { "epoch": 0.1, "grad_norm": 1.5112090110778809, "learning_rate": 9.88535308421899e-06, "loss": 0.6352, "step": 752 }, { "epoch": 0.1, "grad_norm": 1.2142413854599, "learning_rate": 9.884910857871508e-06, "loss": 0.6814, "step": 753 }, { "epoch": 0.1, "grad_norm": 1.40277099609375, "learning_rate": 9.88446779019742e-06, "loss": 0.6682, "step": 754 }, { "epoch": 0.1, "grad_norm": 1.2482854127883911, "learning_rate": 9.884023881273038e-06, "loss": 0.7463, "step": 755 }, { "epoch": 0.1, "grad_norm": 1.209838628768921, "learning_rate": 9.883579131174813e-06, "loss": 0.6347, "step": 756 }, { "epoch": 0.1, "grad_norm": 1.2466986179351807, "learning_rate": 9.883133539979342e-06, "loss": 0.5901, "step": 757 }, { "epoch": 0.1, "grad_norm": 1.2496932744979858, "learning_rate": 9.882687107763374e-06, "loss": 0.6438, "step": 758 }, { "epoch": 0.1, "grad_norm": 1.1028106212615967, "learning_rate": 9.882239834603798e-06, "loss": 0.6648, "step": 759 }, { "epoch": 0.1, "grad_norm": 1.3057689666748047, "learning_rate": 9.881791720577643e-06, "loss": 0.634, "step": 760 }, { "epoch": 0.1, "grad_norm": 1.1680352687835693, "learning_rate": 9.88134276576209e-06, "loss": 0.6429, "step": 761 }, { "epoch": 0.1, "grad_norm": 1.238755702972412, "learning_rate": 9.880892970234464e-06, "loss": 0.7034, "step": 762 }, { "epoch": 0.1, "grad_norm": 1.2210910320281982, "learning_rate": 9.880442334072232e-06, "loss": 0.6392, "step": 763 }, { "epoch": 0.1, "grad_norm": 1.2049086093902588, "learning_rate": 9.879990857353007e-06, "loss": 0.5435, "step": 764 }, { "epoch": 0.1, "grad_norm": 1.2110891342163086, "learning_rate": 9.879538540154545e-06, "loss": 0.6339, "step": 765 }, { "epoch": 0.1, "grad_norm": 1.2696014642715454, "learning_rate": 9.87908538255475e-06, "loss": 0.5821, "step": 766 }, { "epoch": 0.1, "grad_norm": 1.348198652267456, "learning_rate": 9.87863138463167e-06, "loss": 0.7056, "step": 767 }, { "epoch": 0.1, "grad_norm": 1.1112827062606812, "learning_rate": 9.878176546463495e-06, "loss": 0.5591, "step": 768 }, { "epoch": 0.1, "grad_norm": 1.0482698678970337, "learning_rate": 9.877720868128562e-06, "loss": 0.6431, "step": 769 }, { "epoch": 0.1, "grad_norm": 1.2081336975097656, "learning_rate": 9.877264349705353e-06, "loss": 0.6349, "step": 770 }, { "epoch": 0.1, "grad_norm": 1.1520190238952637, "learning_rate": 9.876806991272497e-06, "loss": 0.6484, "step": 771 }, { "epoch": 0.1, "grad_norm": 1.0996439456939697, "learning_rate": 9.876348792908757e-06, "loss": 0.7195, "step": 772 }, { "epoch": 0.1, "grad_norm": 1.3486486673355103, "learning_rate": 9.875889754693056e-06, "loss": 0.675, "step": 773 }, { "epoch": 0.1, "grad_norm": 1.2174115180969238, "learning_rate": 9.875429876704448e-06, "loss": 0.6746, "step": 774 }, { "epoch": 0.1, "grad_norm": 1.3793160915374756, "learning_rate": 9.874969159022143e-06, "loss": 0.6585, "step": 775 }, { "epoch": 0.1, "grad_norm": 1.225388526916504, "learning_rate": 9.874507601725486e-06, "loss": 0.6186, "step": 776 }, { "epoch": 0.1, "grad_norm": 1.5237325429916382, "learning_rate": 9.874045204893975e-06, "loss": 0.6844, "step": 777 }, { "epoch": 0.1, "grad_norm": 1.6531785726547241, "learning_rate": 9.873581968607243e-06, "loss": 0.617, "step": 778 }, { "epoch": 0.1, "grad_norm": 1.1168662309646606, "learning_rate": 9.87311789294508e-06, "loss": 0.6097, "step": 779 }, { "epoch": 0.1, "grad_norm": 1.150830864906311, "learning_rate": 9.872652977987409e-06, "loss": 0.5975, "step": 780 }, { "epoch": 0.1, "grad_norm": 1.5129787921905518, "learning_rate": 9.8721872238143e-06, "loss": 0.594, "step": 781 }, { "epoch": 0.1, "grad_norm": 1.2734906673431396, "learning_rate": 9.871720630505975e-06, "loss": 0.5761, "step": 782 }, { "epoch": 0.1, "grad_norm": 1.1508820056915283, "learning_rate": 9.871253198142792e-06, "loss": 0.4496, "step": 783 }, { "epoch": 0.1, "grad_norm": 1.085600733757019, "learning_rate": 9.870784926805258e-06, "loss": 0.5349, "step": 784 }, { "epoch": 0.1, "grad_norm": 1.2703758478164673, "learning_rate": 9.870315816574022e-06, "loss": 0.501, "step": 785 }, { "epoch": 0.1, "grad_norm": 1.0857547521591187, "learning_rate": 9.86984586752988e-06, "loss": 0.6313, "step": 786 }, { "epoch": 0.1, "grad_norm": 1.0965255498886108, "learning_rate": 9.869375079753772e-06, "loss": 0.6636, "step": 787 }, { "epoch": 0.1, "grad_norm": 1.169797658920288, "learning_rate": 9.868903453326776e-06, "loss": 0.7224, "step": 788 }, { "epoch": 0.1, "grad_norm": 1.5188205242156982, "learning_rate": 9.868430988330129e-06, "loss": 0.6751, "step": 789 }, { "epoch": 0.1, "grad_norm": 1.0894997119903564, "learning_rate": 9.867957684845196e-06, "loss": 0.6496, "step": 790 }, { "epoch": 0.1, "grad_norm": 1.124399185180664, "learning_rate": 9.867483542953498e-06, "loss": 0.7526, "step": 791 }, { "epoch": 0.1, "grad_norm": 1.100160837173462, "learning_rate": 9.867008562736695e-06, "loss": 0.6073, "step": 792 }, { "epoch": 0.1, "grad_norm": 1.3616927862167358, "learning_rate": 9.86653274427659e-06, "loss": 0.6064, "step": 793 }, { "epoch": 0.1, "grad_norm": 1.117180585861206, "learning_rate": 9.86605608765514e-06, "loss": 0.5775, "step": 794 }, { "epoch": 0.1, "grad_norm": 1.1300320625305176, "learning_rate": 9.865578592954433e-06, "loss": 0.5791, "step": 795 }, { "epoch": 0.1, "grad_norm": 1.2053767442703247, "learning_rate": 9.86510026025671e-06, "loss": 0.6415, "step": 796 }, { "epoch": 0.1, "grad_norm": 1.2116667032241821, "learning_rate": 9.864621089644356e-06, "loss": 0.6081, "step": 797 }, { "epoch": 0.1, "grad_norm": 1.282387614250183, "learning_rate": 9.864141081199893e-06, "loss": 0.6175, "step": 798 }, { "epoch": 0.1, "grad_norm": 1.0022624731063843, "learning_rate": 9.863660235005999e-06, "loss": 0.6809, "step": 799 }, { "epoch": 0.1, "grad_norm": 1.160461664199829, "learning_rate": 9.863178551145484e-06, "loss": 0.6991, "step": 800 }, { "epoch": 0.1, "grad_norm": 1.3416701555252075, "learning_rate": 9.862696029701316e-06, "loss": 0.5662, "step": 801 }, { "epoch": 0.1, "grad_norm": 1.1150017976760864, "learning_rate": 9.86221267075659e-06, "loss": 0.5754, "step": 802 }, { "epoch": 0.1, "grad_norm": 1.3248844146728516, "learning_rate": 9.861728474394564e-06, "loss": 0.6595, "step": 803 }, { "epoch": 0.1, "grad_norm": 1.3990356922149658, "learning_rate": 9.861243440698622e-06, "loss": 0.643, "step": 804 }, { "epoch": 0.1, "grad_norm": 1.2016390562057495, "learning_rate": 9.860757569752309e-06, "loss": 0.6551, "step": 805 }, { "epoch": 0.1, "grad_norm": 1.7251582145690918, "learning_rate": 9.860270861639303e-06, "loss": 0.6475, "step": 806 }, { "epoch": 0.1, "grad_norm": 1.329265832901001, "learning_rate": 9.859783316443429e-06, "loss": 0.6333, "step": 807 }, { "epoch": 0.1, "grad_norm": 1.7398295402526855, "learning_rate": 9.859294934248658e-06, "loss": 0.6681, "step": 808 }, { "epoch": 0.1, "grad_norm": 1.1610875129699707, "learning_rate": 9.858805715139102e-06, "loss": 0.643, "step": 809 }, { "epoch": 0.1, "grad_norm": 1.1122682094573975, "learning_rate": 9.858315659199023e-06, "loss": 0.599, "step": 810 }, { "epoch": 0.1, "grad_norm": 1.0971416234970093, "learning_rate": 9.857824766512818e-06, "loss": 0.5669, "step": 811 }, { "epoch": 0.1, "grad_norm": 1.1175240278244019, "learning_rate": 9.857333037165038e-06, "loss": 0.6802, "step": 812 }, { "epoch": 0.1, "grad_norm": 1.5853863954544067, "learning_rate": 9.85684047124037e-06, "loss": 0.6693, "step": 813 }, { "epoch": 0.1, "grad_norm": 1.930460810661316, "learning_rate": 9.85634706882365e-06, "loss": 0.5844, "step": 814 }, { "epoch": 0.1, "grad_norm": 1.2769625186920166, "learning_rate": 9.855852829999857e-06, "loss": 0.672, "step": 815 }, { "epoch": 0.1, "grad_norm": 1.0591260194778442, "learning_rate": 9.855357754854112e-06, "loss": 0.617, "step": 816 }, { "epoch": 0.1, "grad_norm": 1.2074815034866333, "learning_rate": 9.854861843471683e-06, "loss": 0.6256, "step": 817 }, { "epoch": 0.1, "grad_norm": 1.4503984451293945, "learning_rate": 9.854365095937982e-06, "loss": 0.6393, "step": 818 }, { "epoch": 0.1, "grad_norm": 1.3213568925857544, "learning_rate": 9.85386751233856e-06, "loss": 0.74, "step": 819 }, { "epoch": 0.11, "grad_norm": 1.231770396232605, "learning_rate": 9.853369092759118e-06, "loss": 0.6627, "step": 820 }, { "epoch": 0.11, "grad_norm": 1.1289067268371582, "learning_rate": 9.852869837285497e-06, "loss": 0.73, "step": 821 }, { "epoch": 0.11, "grad_norm": 1.8980963230133057, "learning_rate": 9.852369746003685e-06, "loss": 0.6039, "step": 822 }, { "epoch": 0.11, "grad_norm": 3.139829397201538, "learning_rate": 9.851868818999813e-06, "loss": 0.5732, "step": 823 }, { "epoch": 0.11, "grad_norm": 1.1257266998291016, "learning_rate": 9.851367056360154e-06, "loss": 0.6825, "step": 824 }, { "epoch": 0.11, "grad_norm": 1.271867275238037, "learning_rate": 9.85086445817113e-06, "loss": 0.6534, "step": 825 }, { "epoch": 0.11, "grad_norm": 1.1219916343688965, "learning_rate": 9.850361024519298e-06, "loss": 0.7195, "step": 826 }, { "epoch": 0.11, "grad_norm": 1.4811643362045288, "learning_rate": 9.849856755491367e-06, "loss": 0.6099, "step": 827 }, { "epoch": 0.11, "grad_norm": 1.355452299118042, "learning_rate": 9.849351651174188e-06, "loss": 0.6789, "step": 828 }, { "epoch": 0.11, "grad_norm": 1.5336081981658936, "learning_rate": 9.848845711654754e-06, "loss": 0.6725, "step": 829 }, { "epoch": 0.11, "grad_norm": 1.1408854722976685, "learning_rate": 9.8483389370202e-06, "loss": 0.6471, "step": 830 }, { "epoch": 0.11, "grad_norm": 1.3404558897018433, "learning_rate": 9.847831327357813e-06, "loss": 0.7116, "step": 831 }, { "epoch": 0.11, "grad_norm": 1.0383011102676392, "learning_rate": 9.847322882755015e-06, "loss": 0.7415, "step": 832 }, { "epoch": 0.11, "grad_norm": 1.4247316122055054, "learning_rate": 9.846813603299377e-06, "loss": 0.6714, "step": 833 }, { "epoch": 0.11, "grad_norm": 1.263272762298584, "learning_rate": 9.846303489078608e-06, "loss": 0.6267, "step": 834 }, { "epoch": 0.11, "grad_norm": 0.9594109654426575, "learning_rate": 9.845792540180569e-06, "loss": 0.6712, "step": 835 }, { "epoch": 0.11, "grad_norm": 1.1622424125671387, "learning_rate": 9.845280756693262e-06, "loss": 0.6655, "step": 836 }, { "epoch": 0.11, "grad_norm": 1.1319442987442017, "learning_rate": 9.844768138704825e-06, "loss": 0.531, "step": 837 }, { "epoch": 0.11, "grad_norm": 1.1469734907150269, "learning_rate": 9.844254686303552e-06, "loss": 0.5315, "step": 838 }, { "epoch": 0.11, "grad_norm": 1.3223508596420288, "learning_rate": 9.843740399577872e-06, "loss": 0.5838, "step": 839 }, { "epoch": 0.11, "grad_norm": 1.5986974239349365, "learning_rate": 9.843225278616363e-06, "loss": 0.7203, "step": 840 }, { "epoch": 0.11, "grad_norm": 1.020804762840271, "learning_rate": 9.842709323507737e-06, "loss": 0.6836, "step": 841 }, { "epoch": 0.11, "grad_norm": 1.5095237493515015, "learning_rate": 9.842192534340864e-06, "loss": 0.6815, "step": 842 }, { "epoch": 0.11, "grad_norm": 1.7738572359085083, "learning_rate": 9.84167491120475e-06, "loss": 0.6532, "step": 843 }, { "epoch": 0.11, "grad_norm": 1.0000008344650269, "learning_rate": 9.84115645418854e-06, "loss": 0.6143, "step": 844 }, { "epoch": 0.11, "grad_norm": 1.1826478242874146, "learning_rate": 9.840637163381534e-06, "loss": 0.6351, "step": 845 }, { "epoch": 0.11, "grad_norm": 1.4132177829742432, "learning_rate": 9.840117038873165e-06, "loss": 0.744, "step": 846 }, { "epoch": 0.11, "grad_norm": 1.2735340595245361, "learning_rate": 9.839596080753015e-06, "loss": 0.7583, "step": 847 }, { "epoch": 0.11, "grad_norm": 1.9481948614120483, "learning_rate": 9.83907428911081e-06, "loss": 0.6783, "step": 848 }, { "epoch": 0.11, "grad_norm": 1.3666658401489258, "learning_rate": 9.838551664036414e-06, "loss": 0.6517, "step": 849 }, { "epoch": 0.11, "grad_norm": 1.2822260856628418, "learning_rate": 9.838028205619843e-06, "loss": 0.6163, "step": 850 }, { "epoch": 0.11, "grad_norm": 1.2314839363098145, "learning_rate": 9.837503913951249e-06, "loss": 0.6752, "step": 851 }, { "epoch": 0.11, "grad_norm": 1.1731816530227661, "learning_rate": 9.836978789120933e-06, "loss": 0.648, "step": 852 }, { "epoch": 0.11, "grad_norm": 1.2162363529205322, "learning_rate": 9.836452831219334e-06, "loss": 0.5972, "step": 853 }, { "epoch": 0.11, "grad_norm": 1.1823762655258179, "learning_rate": 9.83592604033704e-06, "loss": 0.5839, "step": 854 }, { "epoch": 0.11, "grad_norm": 1.037787675857544, "learning_rate": 9.835398416564781e-06, "loss": 0.6314, "step": 855 }, { "epoch": 0.11, "grad_norm": 2.1973769664764404, "learning_rate": 9.834869959993427e-06, "loss": 0.6461, "step": 856 }, { "epoch": 0.11, "grad_norm": 1.4603095054626465, "learning_rate": 9.834340670713994e-06, "loss": 0.548, "step": 857 }, { "epoch": 0.11, "grad_norm": 1.1810789108276367, "learning_rate": 9.833810548817644e-06, "loss": 0.5911, "step": 858 }, { "epoch": 0.11, "grad_norm": 1.5874276161193848, "learning_rate": 9.833279594395676e-06, "loss": 0.6555, "step": 859 }, { "epoch": 0.11, "grad_norm": 1.2780070304870605, "learning_rate": 9.832747807539538e-06, "loss": 0.6349, "step": 860 }, { "epoch": 0.11, "grad_norm": 1.2569433450698853, "learning_rate": 9.83221518834082e-06, "loss": 0.7218, "step": 861 }, { "epoch": 0.11, "grad_norm": 1.1834901571273804, "learning_rate": 9.831681736891255e-06, "loss": 0.5845, "step": 862 }, { "epoch": 0.11, "grad_norm": 1.2185614109039307, "learning_rate": 9.831147453282717e-06, "loss": 0.6264, "step": 863 }, { "epoch": 0.11, "grad_norm": 1.341444969177246, "learning_rate": 9.830612337607227e-06, "loss": 0.7407, "step": 864 }, { "epoch": 0.11, "grad_norm": 1.4523122310638428, "learning_rate": 9.830076389956949e-06, "loss": 0.6082, "step": 865 }, { "epoch": 0.11, "grad_norm": 1.6930763721466064, "learning_rate": 9.829539610424186e-06, "loss": 0.621, "step": 866 }, { "epoch": 0.11, "grad_norm": 1.5043352842330933, "learning_rate": 9.82900199910139e-06, "loss": 0.6928, "step": 867 }, { "epoch": 0.11, "grad_norm": 1.6809231042861938, "learning_rate": 9.828463556081153e-06, "loss": 0.6384, "step": 868 }, { "epoch": 0.11, "grad_norm": 1.4405546188354492, "learning_rate": 9.827924281456211e-06, "loss": 0.6553, "step": 869 }, { "epoch": 0.11, "grad_norm": 1.1939656734466553, "learning_rate": 9.827384175319443e-06, "loss": 0.6822, "step": 870 }, { "epoch": 0.11, "grad_norm": 1.065596103668213, "learning_rate": 9.82684323776387e-06, "loss": 0.6755, "step": 871 }, { "epoch": 0.11, "grad_norm": 1.822675347328186, "learning_rate": 9.82630146888266e-06, "loss": 0.5842, "step": 872 }, { "epoch": 0.11, "grad_norm": 1.0467437505722046, "learning_rate": 9.825758868769117e-06, "loss": 0.6246, "step": 873 }, { "epoch": 0.11, "grad_norm": 1.2898199558258057, "learning_rate": 9.8252154375167e-06, "loss": 0.6213, "step": 874 }, { "epoch": 0.11, "grad_norm": 1.1529115438461304, "learning_rate": 9.824671175218999e-06, "loss": 0.6626, "step": 875 }, { "epoch": 0.11, "grad_norm": 1.5466557741165161, "learning_rate": 9.82412608196975e-06, "loss": 0.5785, "step": 876 }, { "epoch": 0.11, "grad_norm": 0.961628794670105, "learning_rate": 9.82358015786284e-06, "loss": 0.5305, "step": 877 }, { "epoch": 0.11, "grad_norm": 1.2553181648254395, "learning_rate": 9.823033402992292e-06, "loss": 0.6527, "step": 878 }, { "epoch": 0.11, "grad_norm": 1.308699607849121, "learning_rate": 9.82248581745227e-06, "loss": 0.6968, "step": 879 }, { "epoch": 0.11, "grad_norm": 2.1388115882873535, "learning_rate": 9.821937401337086e-06, "loss": 0.71, "step": 880 }, { "epoch": 0.11, "grad_norm": 1.1120529174804688, "learning_rate": 9.821388154741198e-06, "loss": 0.7006, "step": 881 }, { "epoch": 0.11, "grad_norm": 1.2875672578811646, "learning_rate": 9.820838077759195e-06, "loss": 0.638, "step": 882 }, { "epoch": 0.11, "grad_norm": 1.338017463684082, "learning_rate": 9.820287170485822e-06, "loss": 0.5622, "step": 883 }, { "epoch": 0.11, "grad_norm": 1.2129225730895996, "learning_rate": 9.81973543301596e-06, "loss": 0.6085, "step": 884 }, { "epoch": 0.11, "grad_norm": 1.669423222541809, "learning_rate": 9.819182865444633e-06, "loss": 0.6449, "step": 885 }, { "epoch": 0.11, "grad_norm": 1.1424647569656372, "learning_rate": 9.81862946786701e-06, "loss": 0.5807, "step": 886 }, { "epoch": 0.11, "grad_norm": 1.2412691116333008, "learning_rate": 9.818075240378406e-06, "loss": 0.6645, "step": 887 }, { "epoch": 0.11, "grad_norm": 1.2484862804412842, "learning_rate": 9.817520183074273e-06, "loss": 0.6166, "step": 888 }, { "epoch": 0.11, "grad_norm": 1.2282848358154297, "learning_rate": 9.816964296050208e-06, "loss": 0.5975, "step": 889 }, { "epoch": 0.11, "grad_norm": 1.0314067602157593, "learning_rate": 9.81640757940195e-06, "loss": 0.6313, "step": 890 }, { "epoch": 0.11, "grad_norm": 1.192475438117981, "learning_rate": 9.815850033225384e-06, "loss": 0.6075, "step": 891 }, { "epoch": 0.11, "grad_norm": 1.1968879699707031, "learning_rate": 9.815291657616537e-06, "loss": 0.6369, "step": 892 }, { "epoch": 0.11, "grad_norm": 1.4490315914154053, "learning_rate": 9.814732452671575e-06, "loss": 0.6103, "step": 893 }, { "epoch": 0.11, "grad_norm": 1.05226731300354, "learning_rate": 9.81417241848681e-06, "loss": 0.6934, "step": 894 }, { "epoch": 0.11, "grad_norm": 1.841873049736023, "learning_rate": 9.813611555158701e-06, "loss": 0.6649, "step": 895 }, { "epoch": 0.11, "grad_norm": 1.2420374155044556, "learning_rate": 9.81304986278384e-06, "loss": 0.6011, "step": 896 }, { "epoch": 0.11, "grad_norm": 1.2118569612503052, "learning_rate": 9.81248734145897e-06, "loss": 0.6009, "step": 897 }, { "epoch": 0.12, "grad_norm": 1.2946357727050781, "learning_rate": 9.811923991280972e-06, "loss": 0.6431, "step": 898 }, { "epoch": 0.12, "grad_norm": 1.1636987924575806, "learning_rate": 9.811359812346872e-06, "loss": 0.741, "step": 899 }, { "epoch": 0.12, "grad_norm": 1.2798045873641968, "learning_rate": 9.810794804753838e-06, "loss": 0.5853, "step": 900 }, { "epoch": 0.12, "grad_norm": 1.2087842226028442, "learning_rate": 9.810228968599185e-06, "loss": 0.675, "step": 901 }, { "epoch": 0.12, "grad_norm": 1.2338616847991943, "learning_rate": 9.809662303980361e-06, "loss": 0.62, "step": 902 }, { "epoch": 0.12, "grad_norm": 1.2784311771392822, "learning_rate": 9.809094810994966e-06, "loss": 0.6567, "step": 903 }, { "epoch": 0.12, "grad_norm": 1.1460515260696411, "learning_rate": 9.808526489740739e-06, "loss": 0.6384, "step": 904 }, { "epoch": 0.12, "grad_norm": 1.9330402612686157, "learning_rate": 9.80795734031556e-06, "loss": 0.5802, "step": 905 }, { "epoch": 0.12, "grad_norm": 1.233659029006958, "learning_rate": 9.807387362817455e-06, "loss": 0.7364, "step": 906 }, { "epoch": 0.12, "grad_norm": 3.561555862426758, "learning_rate": 9.806816557344592e-06, "loss": 0.6519, "step": 907 }, { "epoch": 0.12, "grad_norm": 1.063315510749817, "learning_rate": 9.806244923995275e-06, "loss": 0.6765, "step": 908 }, { "epoch": 0.12, "grad_norm": 1.2617602348327637, "learning_rate": 9.805672462867966e-06, "loss": 0.6217, "step": 909 }, { "epoch": 0.12, "grad_norm": 1.2094106674194336, "learning_rate": 9.80509917406125e-06, "loss": 0.5438, "step": 910 }, { "epoch": 0.12, "grad_norm": 1.0630391836166382, "learning_rate": 9.804525057673871e-06, "loss": 0.5533, "step": 911 }, { "epoch": 0.12, "grad_norm": 1.280405879020691, "learning_rate": 9.803950113804705e-06, "loss": 0.6173, "step": 912 }, { "epoch": 0.12, "grad_norm": 0.9976212978363037, "learning_rate": 9.803374342552776e-06, "loss": 0.6336, "step": 913 }, { "epoch": 0.12, "grad_norm": 1.2395212650299072, "learning_rate": 9.80279774401725e-06, "loss": 0.639, "step": 914 }, { "epoch": 0.12, "grad_norm": 1.0815329551696777, "learning_rate": 9.802220318297432e-06, "loss": 0.5816, "step": 915 }, { "epoch": 0.12, "grad_norm": 1.3019007444381714, "learning_rate": 9.801642065492773e-06, "loss": 0.5943, "step": 916 }, { "epoch": 0.12, "grad_norm": 1.2104711532592773, "learning_rate": 9.801062985702865e-06, "loss": 0.6206, "step": 917 }, { "epoch": 0.12, "grad_norm": 1.823017954826355, "learning_rate": 9.800483079027443e-06, "loss": 0.6293, "step": 918 }, { "epoch": 0.12, "grad_norm": 1.2773282527923584, "learning_rate": 9.799902345566384e-06, "loss": 0.5979, "step": 919 }, { "epoch": 0.12, "grad_norm": 1.3274632692337036, "learning_rate": 9.799320785419709e-06, "loss": 0.6451, "step": 920 }, { "epoch": 0.12, "grad_norm": 1.7359416484832764, "learning_rate": 9.798738398687577e-06, "loss": 0.6662, "step": 921 }, { "epoch": 0.12, "grad_norm": 1.245658278465271, "learning_rate": 9.798155185470296e-06, "loss": 0.726, "step": 922 }, { "epoch": 0.12, "grad_norm": 2.3268086910247803, "learning_rate": 9.797571145868308e-06, "loss": 0.6302, "step": 923 }, { "epoch": 0.12, "grad_norm": 1.6008139848709106, "learning_rate": 9.796986279982204e-06, "loss": 0.6036, "step": 924 }, { "epoch": 0.12, "grad_norm": 1.4884922504425049, "learning_rate": 9.796400587912717e-06, "loss": 0.5932, "step": 925 }, { "epoch": 0.12, "grad_norm": 1.7857106924057007, "learning_rate": 9.795814069760719e-06, "loss": 0.7126, "step": 926 }, { "epoch": 0.12, "grad_norm": 1.5720033645629883, "learning_rate": 9.795226725627225e-06, "loss": 0.6674, "step": 927 }, { "epoch": 0.12, "grad_norm": 1.4257798194885254, "learning_rate": 9.794638555613396e-06, "loss": 0.6579, "step": 928 }, { "epoch": 0.12, "grad_norm": 1.1718521118164062, "learning_rate": 9.794049559820531e-06, "loss": 0.7428, "step": 929 }, { "epoch": 0.12, "grad_norm": 1.1296699047088623, "learning_rate": 9.79345973835007e-06, "loss": 0.6206, "step": 930 }, { "epoch": 0.12, "grad_norm": 1.188733458518982, "learning_rate": 9.7928690913036e-06, "loss": 0.6334, "step": 931 }, { "epoch": 0.12, "grad_norm": 1.2753381729125977, "learning_rate": 9.792277618782848e-06, "loss": 0.5784, "step": 932 }, { "epoch": 0.12, "grad_norm": 1.5454579591751099, "learning_rate": 9.791685320889683e-06, "loss": 0.6151, "step": 933 }, { "epoch": 0.12, "grad_norm": 1.210261344909668, "learning_rate": 9.791092197726118e-06, "loss": 0.6163, "step": 934 }, { "epoch": 0.12, "grad_norm": 1.2641156911849976, "learning_rate": 9.790498249394301e-06, "loss": 0.5978, "step": 935 }, { "epoch": 0.12, "grad_norm": 1.322339415550232, "learning_rate": 9.789903475996533e-06, "loss": 0.6684, "step": 936 }, { "epoch": 0.12, "grad_norm": 1.0136418342590332, "learning_rate": 9.78930787763525e-06, "loss": 0.6621, "step": 937 }, { "epoch": 0.12, "grad_norm": 1.7304836511611938, "learning_rate": 9.78871145441303e-06, "loss": 0.679, "step": 938 }, { "epoch": 0.12, "grad_norm": 1.1343514919281006, "learning_rate": 9.788114206432597e-06, "loss": 0.6472, "step": 939 }, { "epoch": 0.12, "grad_norm": 1.1854240894317627, "learning_rate": 9.787516133796816e-06, "loss": 0.5903, "step": 940 }, { "epoch": 0.12, "grad_norm": 1.0253652334213257, "learning_rate": 9.786917236608687e-06, "loss": 0.6461, "step": 941 }, { "epoch": 0.12, "grad_norm": 1.077979564666748, "learning_rate": 9.786317514971364e-06, "loss": 0.6138, "step": 942 }, { "epoch": 0.12, "grad_norm": 1.2683407068252563, "learning_rate": 9.785716968988133e-06, "loss": 0.6181, "step": 943 }, { "epoch": 0.12, "grad_norm": 1.0985451936721802, "learning_rate": 9.785115598762429e-06, "loss": 0.6436, "step": 944 }, { "epoch": 0.12, "grad_norm": 1.1492769718170166, "learning_rate": 9.784513404397823e-06, "loss": 0.6205, "step": 945 }, { "epoch": 0.12, "grad_norm": 1.1750531196594238, "learning_rate": 9.783910385998034e-06, "loss": 0.6014, "step": 946 }, { "epoch": 0.12, "grad_norm": 1.1315195560455322, "learning_rate": 9.783306543666916e-06, "loss": 0.6288, "step": 947 }, { "epoch": 0.12, "grad_norm": 1.1699100732803345, "learning_rate": 9.78270187750847e-06, "loss": 0.6545, "step": 948 }, { "epoch": 0.12, "grad_norm": 1.1685606241226196, "learning_rate": 9.782096387626839e-06, "loss": 0.6522, "step": 949 }, { "epoch": 0.12, "grad_norm": 1.6117463111877441, "learning_rate": 9.781490074126305e-06, "loss": 0.593, "step": 950 }, { "epoch": 0.12, "grad_norm": 1.1152682304382324, "learning_rate": 9.780882937111292e-06, "loss": 0.6196, "step": 951 }, { "epoch": 0.12, "grad_norm": 1.2452787160873413, "learning_rate": 9.780274976686369e-06, "loss": 0.6037, "step": 952 }, { "epoch": 0.12, "grad_norm": 1.2231415510177612, "learning_rate": 9.779666192956243e-06, "loss": 0.5567, "step": 953 }, { "epoch": 0.12, "grad_norm": 1.1500630378723145, "learning_rate": 9.779056586025766e-06, "loss": 0.6495, "step": 954 }, { "epoch": 0.12, "grad_norm": 1.3221664428710938, "learning_rate": 9.77844615599993e-06, "loss": 0.6374, "step": 955 }, { "epoch": 0.12, "grad_norm": 1.1846741437911987, "learning_rate": 9.777834902983869e-06, "loss": 0.6598, "step": 956 }, { "epoch": 0.12, "grad_norm": 1.418726921081543, "learning_rate": 9.77722282708286e-06, "loss": 0.7151, "step": 957 }, { "epoch": 0.12, "grad_norm": 1.4351948499679565, "learning_rate": 9.776609928402319e-06, "loss": 0.6353, "step": 958 }, { "epoch": 0.12, "grad_norm": 1.146484136581421, "learning_rate": 9.775996207047804e-06, "loss": 0.6261, "step": 959 }, { "epoch": 0.12, "grad_norm": 1.1938923597335815, "learning_rate": 9.775381663125021e-06, "loss": 0.6846, "step": 960 }, { "epoch": 0.12, "grad_norm": 1.3629497289657593, "learning_rate": 9.774766296739808e-06, "loss": 0.6335, "step": 961 }, { "epoch": 0.12, "grad_norm": 1.2427971363067627, "learning_rate": 9.774150107998153e-06, "loss": 0.7141, "step": 962 }, { "epoch": 0.12, "grad_norm": 1.049538016319275, "learning_rate": 9.77353309700618e-06, "loss": 0.6309, "step": 963 }, { "epoch": 0.12, "grad_norm": 1.2472624778747559, "learning_rate": 9.772915263870155e-06, "loss": 0.6833, "step": 964 }, { "epoch": 0.12, "grad_norm": 1.1295534372329712, "learning_rate": 9.77229660869649e-06, "loss": 0.5353, "step": 965 }, { "epoch": 0.12, "grad_norm": 1.565324306488037, "learning_rate": 9.771677131591734e-06, "loss": 0.6867, "step": 966 }, { "epoch": 0.12, "grad_norm": 1.5009647607803345, "learning_rate": 9.771056832662581e-06, "loss": 0.6216, "step": 967 }, { "epoch": 0.12, "grad_norm": 1.4722943305969238, "learning_rate": 9.770435712015863e-06, "loss": 0.6958, "step": 968 }, { "epoch": 0.12, "grad_norm": 1.2581201791763306, "learning_rate": 9.769813769758556e-06, "loss": 0.455, "step": 969 }, { "epoch": 0.12, "grad_norm": 1.1791751384735107, "learning_rate": 9.769191005997781e-06, "loss": 0.6739, "step": 970 }, { "epoch": 0.12, "grad_norm": 1.7730474472045898, "learning_rate": 9.76856742084079e-06, "loss": 0.6605, "step": 971 }, { "epoch": 0.12, "grad_norm": 1.2831331491470337, "learning_rate": 9.767943014394982e-06, "loss": 0.6259, "step": 972 }, { "epoch": 0.12, "grad_norm": 1.7151639461517334, "learning_rate": 9.767317786767907e-06, "loss": 0.6003, "step": 973 }, { "epoch": 0.12, "grad_norm": 1.258449912071228, "learning_rate": 9.76669173806724e-06, "loss": 0.6271, "step": 974 }, { "epoch": 0.12, "grad_norm": 1.1486563682556152, "learning_rate": 9.766064868400808e-06, "loss": 0.6293, "step": 975 }, { "epoch": 0.13, "grad_norm": 1.290198564529419, "learning_rate": 9.765437177876576e-06, "loss": 0.6359, "step": 976 }, { "epoch": 0.13, "grad_norm": 1.152216911315918, "learning_rate": 9.764808666602651e-06, "loss": 0.6303, "step": 977 }, { "epoch": 0.13, "grad_norm": 1.1518402099609375, "learning_rate": 9.764179334687284e-06, "loss": 0.6305, "step": 978 }, { "epoch": 0.13, "grad_norm": 1.1398547887802124, "learning_rate": 9.76354918223886e-06, "loss": 0.5392, "step": 979 }, { "epoch": 0.13, "grad_norm": 1.361898422241211, "learning_rate": 9.762918209365914e-06, "loss": 0.6505, "step": 980 }, { "epoch": 0.13, "grad_norm": 1.2657636404037476, "learning_rate": 9.762286416177114e-06, "loss": 0.631, "step": 981 }, { "epoch": 0.13, "grad_norm": 1.4761649370193481, "learning_rate": 9.761653802781276e-06, "loss": 0.6788, "step": 982 }, { "epoch": 0.13, "grad_norm": 1.5255000591278076, "learning_rate": 9.761020369287358e-06, "loss": 0.572, "step": 983 }, { "epoch": 0.13, "grad_norm": 1.4596574306488037, "learning_rate": 9.76038611580445e-06, "loss": 0.6187, "step": 984 }, { "epoch": 0.13, "grad_norm": 1.4689172506332397, "learning_rate": 9.759751042441792e-06, "loss": 0.5464, "step": 985 }, { "epoch": 0.13, "grad_norm": 1.1188050508499146, "learning_rate": 9.759115149308761e-06, "loss": 0.6616, "step": 986 }, { "epoch": 0.13, "grad_norm": 1.3672010898590088, "learning_rate": 9.758478436514882e-06, "loss": 0.7279, "step": 987 }, { "epoch": 0.13, "grad_norm": 1.522152304649353, "learning_rate": 9.75784090416981e-06, "loss": 0.5857, "step": 988 }, { "epoch": 0.13, "grad_norm": 1.1300358772277832, "learning_rate": 9.757202552383349e-06, "loss": 0.6825, "step": 989 }, { "epoch": 0.13, "grad_norm": 1.252014398574829, "learning_rate": 9.756563381265443e-06, "loss": 0.6484, "step": 990 }, { "epoch": 0.13, "grad_norm": 1.564777135848999, "learning_rate": 9.755923390926173e-06, "loss": 0.6302, "step": 991 }, { "epoch": 0.13, "grad_norm": 1.6608165502548218, "learning_rate": 9.755282581475769e-06, "loss": 0.6619, "step": 992 }, { "epoch": 0.13, "grad_norm": 2.2679226398468018, "learning_rate": 9.754640953024595e-06, "loss": 0.6166, "step": 993 }, { "epoch": 0.13, "grad_norm": 1.119726538658142, "learning_rate": 9.753998505683157e-06, "loss": 0.6459, "step": 994 }, { "epoch": 0.13, "grad_norm": 1.1786524057388306, "learning_rate": 9.753355239562106e-06, "loss": 0.6327, "step": 995 }, { "epoch": 0.13, "grad_norm": 1.1883645057678223, "learning_rate": 9.75271115477223e-06, "loss": 0.6711, "step": 996 }, { "epoch": 0.13, "grad_norm": 1.6034826040267944, "learning_rate": 9.75206625142446e-06, "loss": 0.6559, "step": 997 }, { "epoch": 0.13, "grad_norm": 1.1943557262420654, "learning_rate": 9.751420529629869e-06, "loss": 0.5873, "step": 998 }, { "epoch": 0.13, "grad_norm": 1.274271011352539, "learning_rate": 9.750773989499665e-06, "loss": 0.6383, "step": 999 }, { "epoch": 0.13, "grad_norm": 1.3029786348342896, "learning_rate": 9.750126631145207e-06, "loss": 0.6523, "step": 1000 }, { "epoch": 0.13, "grad_norm": 1.4161441326141357, "learning_rate": 9.749478454677986e-06, "loss": 0.6542, "step": 1001 }, { "epoch": 0.13, "grad_norm": 1.0572928190231323, "learning_rate": 9.748829460209638e-06, "loss": 0.6725, "step": 1002 }, { "epoch": 0.13, "grad_norm": 1.0757194757461548, "learning_rate": 9.748179647851939e-06, "loss": 0.5555, "step": 1003 }, { "epoch": 0.13, "grad_norm": 2.006316900253296, "learning_rate": 9.747529017716806e-06, "loss": 0.6257, "step": 1004 }, { "epoch": 0.13, "grad_norm": 1.1826972961425781, "learning_rate": 9.746877569916297e-06, "loss": 0.6045, "step": 1005 }, { "epoch": 0.13, "grad_norm": 1.1543837785720825, "learning_rate": 9.74622530456261e-06, "loss": 0.5696, "step": 1006 }, { "epoch": 0.13, "grad_norm": 1.5225355625152588, "learning_rate": 9.745572221768085e-06, "loss": 0.6135, "step": 1007 }, { "epoch": 0.13, "grad_norm": 1.3935890197753906, "learning_rate": 9.744918321645203e-06, "loss": 0.6532, "step": 1008 }, { "epoch": 0.13, "grad_norm": 1.2201215028762817, "learning_rate": 9.744263604306582e-06, "loss": 0.654, "step": 1009 }, { "epoch": 0.13, "grad_norm": 1.4088938236236572, "learning_rate": 9.743608069864988e-06, "loss": 0.6236, "step": 1010 }, { "epoch": 0.13, "grad_norm": 1.1685870885849, "learning_rate": 9.742951718433321e-06, "loss": 0.6695, "step": 1011 }, { "epoch": 0.13, "grad_norm": 1.9566943645477295, "learning_rate": 9.742294550124625e-06, "loss": 0.6839, "step": 1012 }, { "epoch": 0.13, "grad_norm": 1.222412347793579, "learning_rate": 9.741636565052081e-06, "loss": 0.614, "step": 1013 }, { "epoch": 0.13, "grad_norm": 1.131980299949646, "learning_rate": 9.740977763329018e-06, "loss": 0.598, "step": 1014 }, { "epoch": 0.13, "grad_norm": 1.1899813413619995, "learning_rate": 9.740318145068902e-06, "loss": 0.6678, "step": 1015 }, { "epoch": 0.13, "grad_norm": 1.1226192712783813, "learning_rate": 9.739657710385332e-06, "loss": 0.6595, "step": 1016 }, { "epoch": 0.13, "grad_norm": 3.9692671298980713, "learning_rate": 9.73899645939206e-06, "loss": 0.6617, "step": 1017 }, { "epoch": 0.13, "grad_norm": 1.3050158023834229, "learning_rate": 9.738334392202971e-06, "loss": 0.6292, "step": 1018 }, { "epoch": 0.13, "grad_norm": 1.244711995124817, "learning_rate": 9.737671508932093e-06, "loss": 0.6136, "step": 1019 }, { "epoch": 0.13, "grad_norm": 1.4500633478164673, "learning_rate": 9.737007809693595e-06, "loss": 0.6355, "step": 1020 }, { "epoch": 0.13, "grad_norm": 1.2068707942962646, "learning_rate": 9.736343294601784e-06, "loss": 0.5697, "step": 1021 }, { "epoch": 0.13, "grad_norm": 1.1867763996124268, "learning_rate": 9.735677963771112e-06, "loss": 0.672, "step": 1022 }, { "epoch": 0.13, "grad_norm": 1.561131477355957, "learning_rate": 9.735011817316166e-06, "loss": 0.6256, "step": 1023 }, { "epoch": 0.13, "grad_norm": 4.072286128997803, "learning_rate": 9.734344855351678e-06, "loss": 0.6563, "step": 1024 }, { "epoch": 0.13, "grad_norm": 1.22327721118927, "learning_rate": 9.733677077992515e-06, "loss": 0.6342, "step": 1025 }, { "epoch": 0.13, "grad_norm": 1.7818228006362915, "learning_rate": 9.733008485353694e-06, "loss": 0.645, "step": 1026 }, { "epoch": 0.13, "grad_norm": 1.4120960235595703, "learning_rate": 9.732339077550364e-06, "loss": 0.6384, "step": 1027 }, { "epoch": 0.13, "grad_norm": 1.8205273151397705, "learning_rate": 9.731668854697814e-06, "loss": 0.6097, "step": 1028 }, { "epoch": 0.13, "grad_norm": 1.6517037153244019, "learning_rate": 9.730997816911479e-06, "loss": 0.6568, "step": 1029 }, { "epoch": 0.13, "grad_norm": 1.1279886960983276, "learning_rate": 9.730325964306931e-06, "loss": 0.5864, "step": 1030 }, { "epoch": 0.13, "grad_norm": 1.175146222114563, "learning_rate": 9.729653296999883e-06, "loss": 0.6494, "step": 1031 }, { "epoch": 0.13, "grad_norm": 1.0476584434509277, "learning_rate": 9.72897981510619e-06, "loss": 0.5538, "step": 1032 }, { "epoch": 0.13, "grad_norm": 1.0945863723754883, "learning_rate": 9.728305518741844e-06, "loss": 0.589, "step": 1033 }, { "epoch": 0.13, "grad_norm": 1.1936923265457153, "learning_rate": 9.727630408022977e-06, "loss": 0.5998, "step": 1034 }, { "epoch": 0.13, "grad_norm": 1.1819581985473633, "learning_rate": 9.726954483065864e-06, "loss": 0.6506, "step": 1035 }, { "epoch": 0.13, "grad_norm": 1.1652607917785645, "learning_rate": 9.726277743986923e-06, "loss": 0.6516, "step": 1036 }, { "epoch": 0.13, "grad_norm": 1.4037582874298096, "learning_rate": 9.725600190902704e-06, "loss": 0.7143, "step": 1037 }, { "epoch": 0.13, "grad_norm": 1.1429693698883057, "learning_rate": 9.724921823929905e-06, "loss": 0.6593, "step": 1038 }, { "epoch": 0.13, "grad_norm": 1.021639347076416, "learning_rate": 9.724242643185357e-06, "loss": 0.6161, "step": 1039 }, { "epoch": 0.13, "grad_norm": 1.2161128520965576, "learning_rate": 9.72356264878604e-06, "loss": 0.6372, "step": 1040 }, { "epoch": 0.13, "grad_norm": 1.1113938093185425, "learning_rate": 9.722881840849066e-06, "loss": 0.63, "step": 1041 }, { "epoch": 0.13, "grad_norm": 1.1752476692199707, "learning_rate": 9.722200219491692e-06, "loss": 0.7094, "step": 1042 }, { "epoch": 0.13, "grad_norm": 1.142798662185669, "learning_rate": 9.72151778483131e-06, "loss": 0.6925, "step": 1043 }, { "epoch": 0.13, "grad_norm": 1.1876989603042603, "learning_rate": 9.72083453698546e-06, "loss": 0.6793, "step": 1044 }, { "epoch": 0.13, "grad_norm": 1.0826714038848877, "learning_rate": 9.720150476071814e-06, "loss": 0.6886, "step": 1045 }, { "epoch": 0.13, "grad_norm": 2.2857930660247803, "learning_rate": 9.71946560220819e-06, "loss": 0.5505, "step": 1046 }, { "epoch": 0.13, "grad_norm": 1.1982847452163696, "learning_rate": 9.718779915512542e-06, "loss": 0.651, "step": 1047 }, { "epoch": 0.13, "grad_norm": 1.4459384679794312, "learning_rate": 9.718093416102967e-06, "loss": 0.6223, "step": 1048 }, { "epoch": 0.13, "grad_norm": 1.1260446310043335, "learning_rate": 9.717406104097702e-06, "loss": 0.6248, "step": 1049 }, { "epoch": 0.13, "grad_norm": 1.0635147094726562, "learning_rate": 9.716717979615118e-06, "loss": 0.5919, "step": 1050 }, { "epoch": 0.13, "grad_norm": 1.1763317584991455, "learning_rate": 9.716029042773733e-06, "loss": 0.636, "step": 1051 }, { "epoch": 0.13, "grad_norm": 1.2086479663848877, "learning_rate": 9.715339293692203e-06, "loss": 0.651, "step": 1052 }, { "epoch": 0.13, "grad_norm": 1.3193252086639404, "learning_rate": 9.714648732489322e-06, "loss": 0.633, "step": 1053 }, { "epoch": 0.14, "grad_norm": 1.6744786500930786, "learning_rate": 9.713957359284025e-06, "loss": 0.5887, "step": 1054 }, { "epoch": 0.14, "grad_norm": 1.2904282808303833, "learning_rate": 9.713265174195388e-06, "loss": 0.68, "step": 1055 }, { "epoch": 0.14, "grad_norm": 1.4331204891204834, "learning_rate": 9.712572177342627e-06, "loss": 0.5989, "step": 1056 }, { "epoch": 0.14, "grad_norm": 0.9341638684272766, "learning_rate": 9.711878368845094e-06, "loss": 0.647, "step": 1057 }, { "epoch": 0.14, "grad_norm": 1.3745675086975098, "learning_rate": 9.711183748822284e-06, "loss": 0.6102, "step": 1058 }, { "epoch": 0.14, "grad_norm": 1.2217185497283936, "learning_rate": 9.710488317393832e-06, "loss": 0.6091, "step": 1059 }, { "epoch": 0.14, "grad_norm": 2.1755502223968506, "learning_rate": 9.709792074679511e-06, "loss": 0.6482, "step": 1060 }, { "epoch": 0.14, "grad_norm": 1.2285881042480469, "learning_rate": 9.709095020799236e-06, "loss": 0.7853, "step": 1061 }, { "epoch": 0.14, "grad_norm": 1.1444462537765503, "learning_rate": 9.70839715587306e-06, "loss": 0.6351, "step": 1062 }, { "epoch": 0.14, "grad_norm": 1.0068621635437012, "learning_rate": 9.707698480021174e-06, "loss": 0.7285, "step": 1063 }, { "epoch": 0.14, "grad_norm": 1.1805295944213867, "learning_rate": 9.706998993363913e-06, "loss": 0.5867, "step": 1064 }, { "epoch": 0.14, "grad_norm": 1.2351415157318115, "learning_rate": 9.70629869602175e-06, "loss": 0.5677, "step": 1065 }, { "epoch": 0.14, "grad_norm": 1.22136652469635, "learning_rate": 9.705597588115295e-06, "loss": 0.647, "step": 1066 }, { "epoch": 0.14, "grad_norm": 1.124104619026184, "learning_rate": 9.7048956697653e-06, "loss": 0.5704, "step": 1067 }, { "epoch": 0.14, "grad_norm": 1.2475918531417847, "learning_rate": 9.704192941092657e-06, "loss": 0.6191, "step": 1068 }, { "epoch": 0.14, "grad_norm": 1.219774842262268, "learning_rate": 9.703489402218394e-06, "loss": 0.7046, "step": 1069 }, { "epoch": 0.14, "grad_norm": 1.1891069412231445, "learning_rate": 9.702785053263685e-06, "loss": 0.5343, "step": 1070 }, { "epoch": 0.14, "grad_norm": 1.167614459991455, "learning_rate": 9.702079894349838e-06, "loss": 0.6189, "step": 1071 }, { "epoch": 0.14, "grad_norm": 1.0407310724258423, "learning_rate": 9.701373925598304e-06, "loss": 0.6745, "step": 1072 }, { "epoch": 0.14, "grad_norm": 1.5812993049621582, "learning_rate": 9.70066714713067e-06, "loss": 0.632, "step": 1073 }, { "epoch": 0.14, "grad_norm": 1.2237656116485596, "learning_rate": 9.699959559068664e-06, "loss": 0.738, "step": 1074 }, { "epoch": 0.14, "grad_norm": 1.2793015241622925, "learning_rate": 9.699251161534153e-06, "loss": 0.7889, "step": 1075 }, { "epoch": 0.14, "grad_norm": 1.167056679725647, "learning_rate": 9.698541954649145e-06, "loss": 0.6292, "step": 1076 }, { "epoch": 0.14, "grad_norm": 1.5537493228912354, "learning_rate": 9.69783193853579e-06, "loss": 0.6604, "step": 1077 }, { "epoch": 0.14, "grad_norm": 1.2457462549209595, "learning_rate": 9.697121113316367e-06, "loss": 0.618, "step": 1078 }, { "epoch": 0.14, "grad_norm": 1.6209555864334106, "learning_rate": 9.696409479113306e-06, "loss": 0.5909, "step": 1079 }, { "epoch": 0.14, "grad_norm": 1.1111366748809814, "learning_rate": 9.695697036049173e-06, "loss": 0.6108, "step": 1080 }, { "epoch": 0.14, "grad_norm": 1.2718392610549927, "learning_rate": 9.694983784246665e-06, "loss": 0.7539, "step": 1081 }, { "epoch": 0.14, "grad_norm": 1.2289928197860718, "learning_rate": 9.69426972382863e-06, "loss": 0.5455, "step": 1082 }, { "epoch": 0.14, "grad_norm": 1.0418940782546997, "learning_rate": 9.693554854918052e-06, "loss": 0.5804, "step": 1083 }, { "epoch": 0.14, "grad_norm": 1.1238722801208496, "learning_rate": 9.692839177638049e-06, "loss": 0.6448, "step": 1084 }, { "epoch": 0.14, "grad_norm": 1.0625381469726562, "learning_rate": 9.692122692111883e-06, "loss": 0.6399, "step": 1085 }, { "epoch": 0.14, "grad_norm": 1.4463095664978027, "learning_rate": 9.691405398462956e-06, "loss": 0.6596, "step": 1086 }, { "epoch": 0.14, "grad_norm": 1.1603282690048218, "learning_rate": 9.690687296814803e-06, "loss": 0.615, "step": 1087 }, { "epoch": 0.14, "grad_norm": 1.1188900470733643, "learning_rate": 9.689968387291107e-06, "loss": 0.5733, "step": 1088 }, { "epoch": 0.14, "grad_norm": 1.2128100395202637, "learning_rate": 9.68924867001568e-06, "loss": 0.6144, "step": 1089 }, { "epoch": 0.14, "grad_norm": 1.295868158340454, "learning_rate": 9.688528145112486e-06, "loss": 0.6801, "step": 1090 }, { "epoch": 0.14, "grad_norm": 1.2260469198226929, "learning_rate": 9.687806812705616e-06, "loss": 0.5645, "step": 1091 }, { "epoch": 0.14, "grad_norm": 1.2258944511413574, "learning_rate": 9.687084672919305e-06, "loss": 0.5889, "step": 1092 }, { "epoch": 0.14, "grad_norm": 1.4465066194534302, "learning_rate": 9.686361725877926e-06, "loss": 0.6063, "step": 1093 }, { "epoch": 0.14, "grad_norm": 1.3785881996154785, "learning_rate": 9.685637971705998e-06, "loss": 0.5688, "step": 1094 }, { "epoch": 0.14, "grad_norm": 1.1346842050552368, "learning_rate": 9.684913410528164e-06, "loss": 0.609, "step": 1095 }, { "epoch": 0.14, "grad_norm": 1.1224477291107178, "learning_rate": 9.68418804246922e-06, "loss": 0.6036, "step": 1096 }, { "epoch": 0.14, "grad_norm": 1.019282579421997, "learning_rate": 9.683461867654098e-06, "loss": 0.6989, "step": 1097 }, { "epoch": 0.14, "grad_norm": 1.0686557292938232, "learning_rate": 9.682734886207863e-06, "loss": 0.586, "step": 1098 }, { "epoch": 0.14, "grad_norm": 1.1529673337936401, "learning_rate": 9.682007098255724e-06, "loss": 0.5873, "step": 1099 }, { "epoch": 0.14, "grad_norm": 1.140615463256836, "learning_rate": 9.681278503923026e-06, "loss": 0.5277, "step": 1100 }, { "epoch": 0.14, "grad_norm": 1.3383703231811523, "learning_rate": 9.680549103335259e-06, "loss": 0.6375, "step": 1101 }, { "epoch": 0.14, "grad_norm": 1.2095407247543335, "learning_rate": 9.679818896618044e-06, "loss": 0.7485, "step": 1102 }, { "epoch": 0.14, "grad_norm": 1.5559945106506348, "learning_rate": 9.679087883897142e-06, "loss": 0.5913, "step": 1103 }, { "epoch": 0.14, "grad_norm": 1.2311608791351318, "learning_rate": 9.678356065298463e-06, "loss": 0.6464, "step": 1104 }, { "epoch": 0.14, "grad_norm": 1.1227388381958008, "learning_rate": 9.677623440948039e-06, "loss": 0.6501, "step": 1105 }, { "epoch": 0.14, "grad_norm": 1.120994210243225, "learning_rate": 9.676890010972055e-06, "loss": 0.5631, "step": 1106 }, { "epoch": 0.14, "grad_norm": 1.5825780630111694, "learning_rate": 9.676155775496828e-06, "loss": 0.6043, "step": 1107 }, { "epoch": 0.14, "grad_norm": 1.1823532581329346, "learning_rate": 9.675420734648814e-06, "loss": 0.5822, "step": 1108 }, { "epoch": 0.14, "grad_norm": 1.5761380195617676, "learning_rate": 9.674684888554611e-06, "loss": 0.6976, "step": 1109 }, { "epoch": 0.14, "grad_norm": 1.2008821964263916, "learning_rate": 9.673948237340951e-06, "loss": 0.5651, "step": 1110 }, { "epoch": 0.14, "grad_norm": 1.1293318271636963, "learning_rate": 9.67321078113471e-06, "loss": 0.6455, "step": 1111 }, { "epoch": 0.14, "grad_norm": 1.1750528812408447, "learning_rate": 9.6724725200629e-06, "loss": 0.6694, "step": 1112 }, { "epoch": 0.14, "grad_norm": 1.3616904020309448, "learning_rate": 9.671733454252668e-06, "loss": 0.6136, "step": 1113 }, { "epoch": 0.14, "grad_norm": 1.360683560371399, "learning_rate": 9.670993583831305e-06, "loss": 0.6378, "step": 1114 }, { "epoch": 0.14, "grad_norm": 1.126110553741455, "learning_rate": 9.67025290892624e-06, "loss": 0.584, "step": 1115 }, { "epoch": 0.14, "grad_norm": 1.2391899824142456, "learning_rate": 9.669511429665035e-06, "loss": 0.6807, "step": 1116 }, { "epoch": 0.14, "grad_norm": 1.075810432434082, "learning_rate": 9.668769146175401e-06, "loss": 0.6883, "step": 1117 }, { "epoch": 0.14, "grad_norm": 0.9843566417694092, "learning_rate": 9.668026058585175e-06, "loss": 0.6793, "step": 1118 }, { "epoch": 0.14, "grad_norm": 1.44784677028656, "learning_rate": 9.667282167022343e-06, "loss": 0.6444, "step": 1119 }, { "epoch": 0.14, "grad_norm": 1.0862517356872559, "learning_rate": 9.666537471615024e-06, "loss": 0.5965, "step": 1120 }, { "epoch": 0.14, "grad_norm": 1.401402235031128, "learning_rate": 9.665791972491476e-06, "loss": 0.6598, "step": 1121 }, { "epoch": 0.14, "grad_norm": 1.1191637516021729, "learning_rate": 9.665045669780098e-06, "loss": 0.7505, "step": 1122 }, { "epoch": 0.14, "grad_norm": 1.114385962486267, "learning_rate": 9.664298563609422e-06, "loss": 0.655, "step": 1123 }, { "epoch": 0.14, "grad_norm": 4.621027946472168, "learning_rate": 9.663550654108124e-06, "loss": 0.58, "step": 1124 }, { "epoch": 0.14, "grad_norm": 1.0535495281219482, "learning_rate": 9.662801941405016e-06, "loss": 0.5866, "step": 1125 }, { "epoch": 0.14, "grad_norm": 1.036341905593872, "learning_rate": 9.662052425629049e-06, "loss": 0.5704, "step": 1126 }, { "epoch": 0.14, "grad_norm": 1.1747620105743408, "learning_rate": 9.661302106909311e-06, "loss": 0.5937, "step": 1127 }, { "epoch": 0.14, "grad_norm": 1.3271484375, "learning_rate": 9.66055098537503e-06, "loss": 0.6407, "step": 1128 }, { "epoch": 0.14, "grad_norm": 1.1150386333465576, "learning_rate": 9.659799061155573e-06, "loss": 0.6461, "step": 1129 }, { "epoch": 0.14, "grad_norm": 1.1988625526428223, "learning_rate": 9.659046334380438e-06, "loss": 0.6438, "step": 1130 }, { "epoch": 0.14, "grad_norm": 1.3866018056869507, "learning_rate": 9.658292805179272e-06, "loss": 0.6631, "step": 1131 }, { "epoch": 0.15, "grad_norm": 1.2857260704040527, "learning_rate": 9.657538473681855e-06, "loss": 0.6402, "step": 1132 }, { "epoch": 0.15, "grad_norm": 1.7626216411590576, "learning_rate": 9.656783340018103e-06, "loss": 0.6901, "step": 1133 }, { "epoch": 0.15, "grad_norm": 1.3316880464553833, "learning_rate": 9.656027404318073e-06, "loss": 0.655, "step": 1134 }, { "epoch": 0.15, "grad_norm": 1.3176456689834595, "learning_rate": 9.65527066671196e-06, "loss": 0.6157, "step": 1135 }, { "epoch": 0.15, "grad_norm": 1.27821683883667, "learning_rate": 9.654513127330097e-06, "loss": 0.752, "step": 1136 }, { "epoch": 0.15, "grad_norm": 1.4629007577896118, "learning_rate": 9.653754786302953e-06, "loss": 0.6666, "step": 1137 }, { "epoch": 0.15, "grad_norm": 1.0769317150115967, "learning_rate": 9.652995643761139e-06, "loss": 0.6648, "step": 1138 }, { "epoch": 0.15, "grad_norm": 1.0857163667678833, "learning_rate": 9.652235699835402e-06, "loss": 0.5751, "step": 1139 }, { "epoch": 0.15, "grad_norm": 1.2306089401245117, "learning_rate": 9.651474954656626e-06, "loss": 0.6447, "step": 1140 }, { "epoch": 0.15, "grad_norm": 1.1074614524841309, "learning_rate": 9.650713408355832e-06, "loss": 0.6232, "step": 1141 }, { "epoch": 0.15, "grad_norm": 1.069425106048584, "learning_rate": 9.649951061064184e-06, "loss": 0.6284, "step": 1142 }, { "epoch": 0.15, "grad_norm": 1.4408040046691895, "learning_rate": 9.649187912912978e-06, "loss": 0.603, "step": 1143 }, { "epoch": 0.15, "grad_norm": 1.965793490409851, "learning_rate": 9.648423964033653e-06, "loss": 0.609, "step": 1144 }, { "epoch": 0.15, "grad_norm": 1.2444179058074951, "learning_rate": 9.647659214557784e-06, "loss": 0.6883, "step": 1145 }, { "epoch": 0.15, "grad_norm": 1.3600820302963257, "learning_rate": 9.646893664617082e-06, "loss": 0.6466, "step": 1146 }, { "epoch": 0.15, "grad_norm": 1.187540888786316, "learning_rate": 9.646127314343398e-06, "loss": 0.6567, "step": 1147 }, { "epoch": 0.15, "grad_norm": 1.062788486480713, "learning_rate": 9.64536016386872e-06, "loss": 0.6549, "step": 1148 }, { "epoch": 0.15, "grad_norm": 1.4273498058319092, "learning_rate": 9.644592213325174e-06, "loss": 0.573, "step": 1149 }, { "epoch": 0.15, "grad_norm": 2.6031153202056885, "learning_rate": 9.643823462845023e-06, "loss": 0.6893, "step": 1150 }, { "epoch": 0.15, "grad_norm": 1.0803120136260986, "learning_rate": 9.643053912560672e-06, "loss": 0.6714, "step": 1151 }, { "epoch": 0.15, "grad_norm": 1.4752882719039917, "learning_rate": 9.642283562604658e-06, "loss": 0.6817, "step": 1152 }, { "epoch": 0.15, "grad_norm": 1.3621220588684082, "learning_rate": 9.641512413109656e-06, "loss": 0.6725, "step": 1153 }, { "epoch": 0.15, "grad_norm": 1.622391700744629, "learning_rate": 9.640740464208484e-06, "loss": 0.6874, "step": 1154 }, { "epoch": 0.15, "grad_norm": 1.2322818040847778, "learning_rate": 9.639967716034095e-06, "loss": 0.6397, "step": 1155 }, { "epoch": 0.15, "grad_norm": 1.1383590698242188, "learning_rate": 9.639194168719577e-06, "loss": 0.6025, "step": 1156 }, { "epoch": 0.15, "grad_norm": 1.2494959831237793, "learning_rate": 9.638419822398159e-06, "loss": 0.6905, "step": 1157 }, { "epoch": 0.15, "grad_norm": 1.1969854831695557, "learning_rate": 9.637644677203205e-06, "loss": 0.5309, "step": 1158 }, { "epoch": 0.15, "grad_norm": 1.3315047025680542, "learning_rate": 9.636868733268218e-06, "loss": 0.691, "step": 1159 }, { "epoch": 0.15, "grad_norm": 1.0063773393630981, "learning_rate": 9.636091990726843e-06, "loss": 0.5789, "step": 1160 }, { "epoch": 0.15, "grad_norm": 0.9836358428001404, "learning_rate": 9.635314449712852e-06, "loss": 0.6658, "step": 1161 }, { "epoch": 0.15, "grad_norm": 1.1989636421203613, "learning_rate": 9.634536110360164e-06, "loss": 0.6999, "step": 1162 }, { "epoch": 0.15, "grad_norm": 1.0682141780853271, "learning_rate": 9.633756972802832e-06, "loss": 0.6808, "step": 1163 }, { "epoch": 0.15, "grad_norm": 1.0833097696304321, "learning_rate": 9.632977037175046e-06, "loss": 0.6291, "step": 1164 }, { "epoch": 0.15, "grad_norm": 1.168710470199585, "learning_rate": 9.632196303611131e-06, "loss": 0.721, "step": 1165 }, { "epoch": 0.15, "grad_norm": 1.2726129293441772, "learning_rate": 9.63141477224556e-06, "loss": 0.6047, "step": 1166 }, { "epoch": 0.15, "grad_norm": 1.4029903411865234, "learning_rate": 9.63063244321293e-06, "loss": 0.6346, "step": 1167 }, { "epoch": 0.15, "grad_norm": 1.2573045492172241, "learning_rate": 9.629849316647983e-06, "loss": 0.5604, "step": 1168 }, { "epoch": 0.15, "grad_norm": 1.3267803192138672, "learning_rate": 9.629065392685594e-06, "loss": 0.7928, "step": 1169 }, { "epoch": 0.15, "grad_norm": 1.286224126815796, "learning_rate": 9.628280671460784e-06, "loss": 0.6406, "step": 1170 }, { "epoch": 0.15, "grad_norm": 1.2955398559570312, "learning_rate": 9.627495153108698e-06, "loss": 0.6572, "step": 1171 }, { "epoch": 0.15, "grad_norm": 1.24276864528656, "learning_rate": 9.626708837764634e-06, "loss": 0.6414, "step": 1172 }, { "epoch": 0.15, "grad_norm": 1.2331914901733398, "learning_rate": 9.62592172556401e-06, "loss": 0.5933, "step": 1173 }, { "epoch": 0.15, "grad_norm": 1.0565133094787598, "learning_rate": 9.625133816642395e-06, "loss": 0.7286, "step": 1174 }, { "epoch": 0.15, "grad_norm": 1.4214918613433838, "learning_rate": 9.624345111135492e-06, "loss": 0.6858, "step": 1175 }, { "epoch": 0.15, "grad_norm": 1.2732834815979004, "learning_rate": 9.623555609179134e-06, "loss": 0.6623, "step": 1176 }, { "epoch": 0.15, "grad_norm": 1.498653769493103, "learning_rate": 9.6227653109093e-06, "loss": 0.6133, "step": 1177 }, { "epoch": 0.15, "grad_norm": 1.5062004327774048, "learning_rate": 9.621974216462101e-06, "loss": 0.6736, "step": 1178 }, { "epoch": 0.15, "grad_norm": 1.379712462425232, "learning_rate": 9.621182325973792e-06, "loss": 0.6587, "step": 1179 }, { "epoch": 0.15, "grad_norm": 1.242719292640686, "learning_rate": 9.620389639580753e-06, "loss": 0.7805, "step": 1180 }, { "epoch": 0.15, "grad_norm": 1.2989170551300049, "learning_rate": 9.619596157419514e-06, "loss": 0.6942, "step": 1181 }, { "epoch": 0.15, "grad_norm": 1.712216854095459, "learning_rate": 9.618801879626734e-06, "loss": 0.6364, "step": 1182 }, { "epoch": 0.15, "grad_norm": 2.460629940032959, "learning_rate": 9.618006806339209e-06, "loss": 0.6022, "step": 1183 }, { "epoch": 0.15, "grad_norm": 1.011130928993225, "learning_rate": 9.617210937693876e-06, "loss": 0.6697, "step": 1184 }, { "epoch": 0.15, "grad_norm": 0.9472834467887878, "learning_rate": 9.616414273827808e-06, "loss": 0.5716, "step": 1185 }, { "epoch": 0.15, "grad_norm": 1.0987147092819214, "learning_rate": 9.615616814878213e-06, "loss": 0.6417, "step": 1186 }, { "epoch": 0.15, "grad_norm": 1.320693850517273, "learning_rate": 9.614818560982439e-06, "loss": 0.6858, "step": 1187 }, { "epoch": 0.15, "grad_norm": 1.2989826202392578, "learning_rate": 9.614019512277966e-06, "loss": 0.6196, "step": 1188 }, { "epoch": 0.15, "grad_norm": 1.199810266494751, "learning_rate": 9.613219668902417e-06, "loss": 0.7115, "step": 1189 }, { "epoch": 0.15, "grad_norm": 1.0295907258987427, "learning_rate": 9.612419030993545e-06, "loss": 0.7165, "step": 1190 }, { "epoch": 0.15, "grad_norm": 1.7425963878631592, "learning_rate": 9.611617598689248e-06, "loss": 0.5757, "step": 1191 }, { "epoch": 0.15, "grad_norm": 0.9693262577056885, "learning_rate": 9.610815372127553e-06, "loss": 0.534, "step": 1192 }, { "epoch": 0.15, "grad_norm": 1.3094513416290283, "learning_rate": 9.610012351446629e-06, "loss": 0.5866, "step": 1193 }, { "epoch": 0.15, "grad_norm": 1.1087934970855713, "learning_rate": 9.60920853678478e-06, "loss": 0.6526, "step": 1194 }, { "epoch": 0.15, "grad_norm": 0.9788431525230408, "learning_rate": 9.608403928280445e-06, "loss": 0.5877, "step": 1195 }, { "epoch": 0.15, "grad_norm": 1.0494896173477173, "learning_rate": 9.607598526072203e-06, "loss": 0.6029, "step": 1196 }, { "epoch": 0.15, "grad_norm": 1.3554272651672363, "learning_rate": 9.606792330298767e-06, "loss": 0.6737, "step": 1197 }, { "epoch": 0.15, "grad_norm": 1.2967965602874756, "learning_rate": 9.60598534109899e-06, "loss": 0.7037, "step": 1198 }, { "epoch": 0.15, "grad_norm": 1.304180383682251, "learning_rate": 9.605177558611859e-06, "loss": 0.6131, "step": 1199 }, { "epoch": 0.15, "grad_norm": 1.786306619644165, "learning_rate": 9.604368982976495e-06, "loss": 0.6623, "step": 1200 }, { "epoch": 0.15, "grad_norm": 1.139390468597412, "learning_rate": 9.603559614332162e-06, "loss": 0.5915, "step": 1201 }, { "epoch": 0.15, "grad_norm": 1.1185011863708496, "learning_rate": 9.602749452818256e-06, "loss": 0.5222, "step": 1202 }, { "epoch": 0.15, "grad_norm": 1.157008409500122, "learning_rate": 9.601938498574312e-06, "loss": 0.6954, "step": 1203 }, { "epoch": 0.15, "grad_norm": 2.68686842918396, "learning_rate": 9.601126751739997e-06, "loss": 0.6317, "step": 1204 }, { "epoch": 0.15, "grad_norm": 1.0472536087036133, "learning_rate": 9.600314212455125e-06, "loss": 0.6819, "step": 1205 }, { "epoch": 0.15, "grad_norm": 3.0865917205810547, "learning_rate": 9.599500880859632e-06, "loss": 0.572, "step": 1206 }, { "epoch": 0.15, "grad_norm": 1.095560073852539, "learning_rate": 9.5986867570936e-06, "loss": 0.6667, "step": 1207 }, { "epoch": 0.15, "grad_norm": 1.0914700031280518, "learning_rate": 9.597871841297246e-06, "loss": 0.6668, "step": 1208 }, { "epoch": 0.15, "grad_norm": 1.1000221967697144, "learning_rate": 9.597056133610923e-06, "loss": 0.6105, "step": 1209 }, { "epoch": 0.16, "grad_norm": 1.5570249557495117, "learning_rate": 9.596239634175119e-06, "loss": 0.6424, "step": 1210 }, { "epoch": 0.16, "grad_norm": 1.3458069562911987, "learning_rate": 9.595422343130462e-06, "loss": 0.6504, "step": 1211 }, { "epoch": 0.16, "grad_norm": 1.21104896068573, "learning_rate": 9.59460426061771e-06, "loss": 0.6194, "step": 1212 }, { "epoch": 0.16, "grad_norm": 1.3381779193878174, "learning_rate": 9.593785386777763e-06, "loss": 0.5563, "step": 1213 }, { "epoch": 0.16, "grad_norm": 1.3582426309585571, "learning_rate": 9.592965721751657e-06, "loss": 0.5941, "step": 1214 }, { "epoch": 0.16, "grad_norm": 1.5648891925811768, "learning_rate": 9.59214526568056e-06, "loss": 0.5831, "step": 1215 }, { "epoch": 0.16, "grad_norm": 1.3309880495071411, "learning_rate": 9.591324018705779e-06, "loss": 0.5284, "step": 1216 }, { "epoch": 0.16, "grad_norm": 1.4098135232925415, "learning_rate": 9.590501980968758e-06, "loss": 0.7147, "step": 1217 }, { "epoch": 0.16, "grad_norm": 1.2456073760986328, "learning_rate": 9.589679152611078e-06, "loss": 0.5923, "step": 1218 }, { "epoch": 0.16, "grad_norm": 1.2749463319778442, "learning_rate": 9.58885553377445e-06, "loss": 0.6064, "step": 1219 }, { "epoch": 0.16, "grad_norm": 1.382809042930603, "learning_rate": 9.58803112460073e-06, "loss": 0.6162, "step": 1220 }, { "epoch": 0.16, "grad_norm": 1.5532960891723633, "learning_rate": 9.587205925231904e-06, "loss": 0.6222, "step": 1221 }, { "epoch": 0.16, "grad_norm": 1.1475423574447632, "learning_rate": 9.586379935810096e-06, "loss": 0.5918, "step": 1222 }, { "epoch": 0.16, "grad_norm": 1.1791549921035767, "learning_rate": 9.585553156477565e-06, "loss": 0.5603, "step": 1223 }, { "epoch": 0.16, "grad_norm": 1.2540336847305298, "learning_rate": 9.584725587376708e-06, "loss": 0.6537, "step": 1224 }, { "epoch": 0.16, "grad_norm": 1.3627375364303589, "learning_rate": 9.583897228650056e-06, "loss": 0.6247, "step": 1225 }, { "epoch": 0.16, "grad_norm": 1.172321081161499, "learning_rate": 9.583068080440278e-06, "loss": 0.7451, "step": 1226 }, { "epoch": 0.16, "grad_norm": 1.1737582683563232, "learning_rate": 9.582238142890179e-06, "loss": 0.6437, "step": 1227 }, { "epoch": 0.16, "grad_norm": 1.0629295110702515, "learning_rate": 9.581407416142696e-06, "loss": 0.6622, "step": 1228 }, { "epoch": 0.16, "grad_norm": 1.3306161165237427, "learning_rate": 9.580575900340904e-06, "loss": 0.7027, "step": 1229 }, { "epoch": 0.16, "grad_norm": 1.119742512702942, "learning_rate": 9.57974359562802e-06, "loss": 0.6573, "step": 1230 }, { "epoch": 0.16, "grad_norm": 1.1853952407836914, "learning_rate": 9.578910502147387e-06, "loss": 0.7601, "step": 1231 }, { "epoch": 0.16, "grad_norm": 2.302206516265869, "learning_rate": 9.578076620042492e-06, "loss": 0.616, "step": 1232 }, { "epoch": 0.16, "grad_norm": 1.1335538625717163, "learning_rate": 9.57724194945695e-06, "loss": 0.5944, "step": 1233 }, { "epoch": 0.16, "grad_norm": 1.0384563207626343, "learning_rate": 9.576406490534518e-06, "loss": 0.6307, "step": 1234 }, { "epoch": 0.16, "grad_norm": 1.1229547262191772, "learning_rate": 9.575570243419087e-06, "loss": 0.585, "step": 1235 }, { "epoch": 0.16, "grad_norm": 1.213402271270752, "learning_rate": 9.574733208254684e-06, "loss": 0.5698, "step": 1236 }, { "epoch": 0.16, "grad_norm": 1.1010196208953857, "learning_rate": 9.57389538518547e-06, "loss": 0.593, "step": 1237 }, { "epoch": 0.16, "grad_norm": 1.357301115989685, "learning_rate": 9.573056774355745e-06, "loss": 0.6061, "step": 1238 }, { "epoch": 0.16, "grad_norm": 1.095462679862976, "learning_rate": 9.57221737590994e-06, "loss": 0.6389, "step": 1239 }, { "epoch": 0.16, "grad_norm": 1.4295891523361206, "learning_rate": 9.571377189992627e-06, "loss": 0.6627, "step": 1240 }, { "epoch": 0.16, "grad_norm": 1.268510341644287, "learning_rate": 9.570536216748509e-06, "loss": 0.6064, "step": 1241 }, { "epoch": 0.16, "grad_norm": 1.1977204084396362, "learning_rate": 9.569694456322428e-06, "loss": 0.6375, "step": 1242 }, { "epoch": 0.16, "grad_norm": 1.0961164236068726, "learning_rate": 9.56885190885936e-06, "loss": 0.6977, "step": 1243 }, { "epoch": 0.16, "grad_norm": 1.396316409111023, "learning_rate": 9.568008574504415e-06, "loss": 0.6978, "step": 1244 }, { "epoch": 0.16, "grad_norm": 2.1277964115142822, "learning_rate": 9.56716445340284e-06, "loss": 0.5666, "step": 1245 }, { "epoch": 0.16, "grad_norm": 1.1821579933166504, "learning_rate": 9.566319545700021e-06, "loss": 0.6249, "step": 1246 }, { "epoch": 0.16, "grad_norm": 1.187343955039978, "learning_rate": 9.565473851541473e-06, "loss": 0.6518, "step": 1247 }, { "epoch": 0.16, "grad_norm": 1.382359504699707, "learning_rate": 9.564627371072853e-06, "loss": 0.6206, "step": 1248 }, { "epoch": 0.16, "grad_norm": 1.0749714374542236, "learning_rate": 9.563780104439945e-06, "loss": 0.6691, "step": 1249 }, { "epoch": 0.16, "grad_norm": 1.6738566160202026, "learning_rate": 9.562932051788677e-06, "loss": 0.6744, "step": 1250 }, { "epoch": 0.16, "grad_norm": 1.1276246309280396, "learning_rate": 9.56208321326511e-06, "loss": 0.6447, "step": 1251 }, { "epoch": 0.16, "grad_norm": 1.1615641117095947, "learning_rate": 9.561233589015435e-06, "loss": 0.4971, "step": 1252 }, { "epoch": 0.16, "grad_norm": 1.220762848854065, "learning_rate": 9.560383179185986e-06, "loss": 0.6402, "step": 1253 }, { "epoch": 0.16, "grad_norm": 1.1979705095291138, "learning_rate": 9.559531983923226e-06, "loss": 0.6028, "step": 1254 }, { "epoch": 0.16, "grad_norm": 1.3428279161453247, "learning_rate": 9.558680003373762e-06, "loss": 0.65, "step": 1255 }, { "epoch": 0.16, "grad_norm": 1.0979018211364746, "learning_rate": 9.557827237684324e-06, "loss": 0.582, "step": 1256 }, { "epoch": 0.16, "grad_norm": 1.1736618280410767, "learning_rate": 9.556973687001782e-06, "loss": 0.6034, "step": 1257 }, { "epoch": 0.16, "grad_norm": 1.021039366722107, "learning_rate": 9.55611935147315e-06, "loss": 0.5749, "step": 1258 }, { "epoch": 0.16, "grad_norm": 1.1807692050933838, "learning_rate": 9.555264231245566e-06, "loss": 0.652, "step": 1259 }, { "epoch": 0.16, "grad_norm": 0.9832674264907837, "learning_rate": 9.554408326466306e-06, "loss": 0.6777, "step": 1260 }, { "epoch": 0.16, "grad_norm": 1.0194718837738037, "learning_rate": 9.553551637282785e-06, "loss": 0.6897, "step": 1261 }, { "epoch": 0.16, "grad_norm": 1.3178449869155884, "learning_rate": 9.552694163842548e-06, "loss": 0.6381, "step": 1262 }, { "epoch": 0.16, "grad_norm": 1.1459720134735107, "learning_rate": 9.55183590629328e-06, "loss": 0.6509, "step": 1263 }, { "epoch": 0.16, "grad_norm": 1.71978759765625, "learning_rate": 9.550976864782792e-06, "loss": 0.6781, "step": 1264 }, { "epoch": 0.16, "grad_norm": 1.1249512434005737, "learning_rate": 9.550117039459045e-06, "loss": 0.5807, "step": 1265 }, { "epoch": 0.16, "grad_norm": 1.0362927913665771, "learning_rate": 9.549256430470123e-06, "loss": 0.6622, "step": 1266 }, { "epoch": 0.16, "grad_norm": 1.005173683166504, "learning_rate": 9.548395037964247e-06, "loss": 0.6316, "step": 1267 }, { "epoch": 0.16, "grad_norm": 1.2679580450057983, "learning_rate": 9.547532862089775e-06, "loss": 0.5937, "step": 1268 }, { "epoch": 0.16, "grad_norm": 1.0311354398727417, "learning_rate": 9.5466699029952e-06, "loss": 0.5042, "step": 1269 }, { "epoch": 0.16, "grad_norm": 0.9762603044509888, "learning_rate": 9.545806160829147e-06, "loss": 0.6358, "step": 1270 }, { "epoch": 0.16, "grad_norm": 0.9036137461662292, "learning_rate": 9.544941635740382e-06, "loss": 0.6065, "step": 1271 }, { "epoch": 0.16, "grad_norm": 1.0758832693099976, "learning_rate": 9.544076327877799e-06, "loss": 0.5666, "step": 1272 }, { "epoch": 0.16, "grad_norm": 1.2641098499298096, "learning_rate": 9.543210237390429e-06, "loss": 0.6395, "step": 1273 }, { "epoch": 0.16, "grad_norm": 1.3716449737548828, "learning_rate": 9.54234336442744e-06, "loss": 0.7927, "step": 1274 }, { "epoch": 0.16, "grad_norm": 1.1750115156173706, "learning_rate": 9.541475709138135e-06, "loss": 0.5781, "step": 1275 }, { "epoch": 0.16, "grad_norm": 1.270869493484497, "learning_rate": 9.540607271671947e-06, "loss": 0.7835, "step": 1276 }, { "epoch": 0.16, "grad_norm": 1.7524586915969849, "learning_rate": 9.539738052178446e-06, "loss": 0.6404, "step": 1277 }, { "epoch": 0.16, "grad_norm": 1.035844326019287, "learning_rate": 9.538868050807342e-06, "loss": 0.6465, "step": 1278 }, { "epoch": 0.16, "grad_norm": 1.166962742805481, "learning_rate": 9.53799726770847e-06, "loss": 0.5387, "step": 1279 }, { "epoch": 0.16, "grad_norm": 1.160826563835144, "learning_rate": 9.537125703031809e-06, "loss": 0.6005, "step": 1280 }, { "epoch": 0.16, "grad_norm": 1.0476465225219727, "learning_rate": 9.536253356927465e-06, "loss": 0.6716, "step": 1281 }, { "epoch": 0.16, "grad_norm": 1.0793278217315674, "learning_rate": 9.535380229545684e-06, "loss": 0.7254, "step": 1282 }, { "epoch": 0.16, "grad_norm": 1.1872631311416626, "learning_rate": 9.534506321036842e-06, "loss": 0.6274, "step": 1283 }, { "epoch": 0.16, "grad_norm": 1.5636075735092163, "learning_rate": 9.533631631551455e-06, "loss": 0.6149, "step": 1284 }, { "epoch": 0.16, "grad_norm": 1.2059067487716675, "learning_rate": 9.532756161240166e-06, "loss": 0.6378, "step": 1285 }, { "epoch": 0.16, "grad_norm": 1.154067039489746, "learning_rate": 9.531879910253762e-06, "loss": 0.7198, "step": 1286 }, { "epoch": 0.16, "grad_norm": 1.2851213216781616, "learning_rate": 9.531002878743158e-06, "loss": 0.6087, "step": 1287 }, { "epoch": 0.17, "grad_norm": 1.1828138828277588, "learning_rate": 9.530125066859404e-06, "loss": 0.7095, "step": 1288 }, { "epoch": 0.17, "grad_norm": 1.218656063079834, "learning_rate": 9.529246474753684e-06, "loss": 0.6411, "step": 1289 }, { "epoch": 0.17, "grad_norm": 1.1605652570724487, "learning_rate": 9.528367102577321e-06, "loss": 0.6413, "step": 1290 }, { "epoch": 0.17, "grad_norm": 1.1710295677185059, "learning_rate": 9.527486950481765e-06, "loss": 0.6295, "step": 1291 }, { "epoch": 0.17, "grad_norm": 0.972027063369751, "learning_rate": 9.526606018618608e-06, "loss": 0.5839, "step": 1292 }, { "epoch": 0.17, "grad_norm": 1.12654709815979, "learning_rate": 9.52572430713957e-06, "loss": 0.5989, "step": 1293 }, { "epoch": 0.17, "grad_norm": 1.2400918006896973, "learning_rate": 9.52484181619651e-06, "loss": 0.5958, "step": 1294 }, { "epoch": 0.17, "grad_norm": 1.2699034214019775, "learning_rate": 9.523958545941417e-06, "loss": 0.6808, "step": 1295 }, { "epoch": 0.17, "grad_norm": 1.1967825889587402, "learning_rate": 9.523074496526418e-06, "loss": 0.6116, "step": 1296 }, { "epoch": 0.17, "grad_norm": 1.2007198333740234, "learning_rate": 9.522189668103771e-06, "loss": 0.564, "step": 1297 }, { "epoch": 0.17, "grad_norm": 1.3140076398849487, "learning_rate": 9.521304060825872e-06, "loss": 0.6417, "step": 1298 }, { "epoch": 0.17, "grad_norm": 1.6398093700408936, "learning_rate": 9.520417674845246e-06, "loss": 0.6366, "step": 1299 }, { "epoch": 0.17, "grad_norm": 1.0856890678405762, "learning_rate": 9.519530510314558e-06, "loss": 0.6931, "step": 1300 }, { "epoch": 0.17, "grad_norm": 1.219240665435791, "learning_rate": 9.518642567386603e-06, "loss": 0.6608, "step": 1301 }, { "epoch": 0.17, "grad_norm": 1.2921411991119385, "learning_rate": 9.517753846214309e-06, "loss": 0.6584, "step": 1302 }, { "epoch": 0.17, "grad_norm": 1.0977832078933716, "learning_rate": 9.51686434695074e-06, "loss": 0.6219, "step": 1303 }, { "epoch": 0.17, "grad_norm": 1.0890233516693115, "learning_rate": 9.515974069749098e-06, "loss": 0.6462, "step": 1304 }, { "epoch": 0.17, "grad_norm": 1.3405479192733765, "learning_rate": 9.515083014762714e-06, "loss": 0.5702, "step": 1305 }, { "epoch": 0.17, "grad_norm": 0.9640637636184692, "learning_rate": 9.514191182145052e-06, "loss": 0.6735, "step": 1306 }, { "epoch": 0.17, "grad_norm": 1.3195781707763672, "learning_rate": 9.513298572049714e-06, "loss": 0.6042, "step": 1307 }, { "epoch": 0.17, "grad_norm": 1.054443120956421, "learning_rate": 9.512405184630432e-06, "loss": 0.6659, "step": 1308 }, { "epoch": 0.17, "grad_norm": 2.662334442138672, "learning_rate": 9.511511020041076e-06, "loss": 0.5981, "step": 1309 }, { "epoch": 0.17, "grad_norm": 1.3885622024536133, "learning_rate": 9.510616078435646e-06, "loss": 0.7479, "step": 1310 }, { "epoch": 0.17, "grad_norm": 1.1111118793487549, "learning_rate": 9.509720359968279e-06, "loss": 0.6262, "step": 1311 }, { "epoch": 0.17, "grad_norm": 1.2182552814483643, "learning_rate": 9.508823864793242e-06, "loss": 0.6059, "step": 1312 }, { "epoch": 0.17, "grad_norm": 1.1193416118621826, "learning_rate": 9.507926593064941e-06, "loss": 0.6478, "step": 1313 }, { "epoch": 0.17, "grad_norm": 1.3522484302520752, "learning_rate": 9.50702854493791e-06, "loss": 0.6145, "step": 1314 }, { "epoch": 0.17, "grad_norm": 1.071323275566101, "learning_rate": 9.506129720566823e-06, "loss": 0.6033, "step": 1315 }, { "epoch": 0.17, "grad_norm": 1.270236611366272, "learning_rate": 9.50523012010648e-06, "loss": 0.5783, "step": 1316 }, { "epoch": 0.17, "grad_norm": 1.0845481157302856, "learning_rate": 9.504329743711822e-06, "loss": 0.6448, "step": 1317 }, { "epoch": 0.17, "grad_norm": 1.229878544807434, "learning_rate": 9.50342859153792e-06, "loss": 0.6432, "step": 1318 }, { "epoch": 0.17, "grad_norm": 1.2673510313034058, "learning_rate": 9.50252666373998e-06, "loss": 0.6371, "step": 1319 }, { "epoch": 0.17, "grad_norm": 1.5885368585586548, "learning_rate": 9.501623960473336e-06, "loss": 0.669, "step": 1320 }, { "epoch": 0.17, "grad_norm": 1.237269401550293, "learning_rate": 9.500720481893466e-06, "loss": 0.6404, "step": 1321 }, { "epoch": 0.17, "grad_norm": 1.3405085802078247, "learning_rate": 9.499816228155972e-06, "loss": 0.625, "step": 1322 }, { "epoch": 0.17, "grad_norm": 0.9338662624359131, "learning_rate": 9.498911199416596e-06, "loss": 0.5723, "step": 1323 }, { "epoch": 0.17, "grad_norm": 1.9581477642059326, "learning_rate": 9.49800539583121e-06, "loss": 0.5658, "step": 1324 }, { "epoch": 0.17, "grad_norm": 1.2048728466033936, "learning_rate": 9.497098817555818e-06, "loss": 0.628, "step": 1325 }, { "epoch": 0.17, "grad_norm": 1.456356406211853, "learning_rate": 9.496191464746564e-06, "loss": 0.6503, "step": 1326 }, { "epoch": 0.17, "grad_norm": 1.1231420040130615, "learning_rate": 9.495283337559718e-06, "loss": 0.5696, "step": 1327 }, { "epoch": 0.17, "grad_norm": 1.0166943073272705, "learning_rate": 9.494374436151688e-06, "loss": 0.63, "step": 1328 }, { "epoch": 0.17, "grad_norm": 1.5634657144546509, "learning_rate": 9.493464760679012e-06, "loss": 0.6734, "step": 1329 }, { "epoch": 0.17, "grad_norm": 1.0731712579727173, "learning_rate": 9.492554311298363e-06, "loss": 0.6909, "step": 1330 }, { "epoch": 0.17, "grad_norm": 1.0333720445632935, "learning_rate": 9.49164308816655e-06, "loss": 0.6646, "step": 1331 }, { "epoch": 0.17, "grad_norm": 0.9412813186645508, "learning_rate": 9.49073109144051e-06, "loss": 0.6295, "step": 1332 }, { "epoch": 0.17, "grad_norm": 1.3674019575119019, "learning_rate": 9.489818321277318e-06, "loss": 0.6832, "step": 1333 }, { "epoch": 0.17, "grad_norm": 1.2104706764221191, "learning_rate": 9.488904777834178e-06, "loss": 0.6606, "step": 1334 }, { "epoch": 0.17, "grad_norm": 1.2919623851776123, "learning_rate": 9.487990461268431e-06, "loss": 0.6844, "step": 1335 }, { "epoch": 0.17, "grad_norm": 1.6904457807540894, "learning_rate": 9.48707537173755e-06, "loss": 0.6159, "step": 1336 }, { "epoch": 0.17, "grad_norm": 0.9881489872932434, "learning_rate": 9.486159509399138e-06, "loss": 0.5704, "step": 1337 }, { "epoch": 0.17, "grad_norm": 1.1692382097244263, "learning_rate": 9.485242874410936e-06, "loss": 0.6502, "step": 1338 }, { "epoch": 0.17, "grad_norm": 1.3247543573379517, "learning_rate": 9.484325466930814e-06, "loss": 0.6244, "step": 1339 }, { "epoch": 0.17, "grad_norm": 1.2117483615875244, "learning_rate": 9.483407287116777e-06, "loss": 0.5659, "step": 1340 }, { "epoch": 0.17, "grad_norm": 0.9740604162216187, "learning_rate": 9.482488335126964e-06, "loss": 0.6486, "step": 1341 }, { "epoch": 0.17, "grad_norm": 1.1840041875839233, "learning_rate": 9.481568611119647e-06, "loss": 0.7439, "step": 1342 }, { "epoch": 0.17, "grad_norm": 1.2369718551635742, "learning_rate": 9.480648115253225e-06, "loss": 0.5795, "step": 1343 }, { "epoch": 0.17, "grad_norm": 1.4914673566818237, "learning_rate": 9.479726847686241e-06, "loss": 0.5856, "step": 1344 }, { "epoch": 0.17, "grad_norm": 1.54400634765625, "learning_rate": 9.478804808577359e-06, "loss": 0.6174, "step": 1345 }, { "epoch": 0.17, "grad_norm": 1.0934762954711914, "learning_rate": 9.477881998085386e-06, "loss": 0.6775, "step": 1346 }, { "epoch": 0.17, "grad_norm": 1.1581801176071167, "learning_rate": 9.476958416369253e-06, "loss": 0.619, "step": 1347 }, { "epoch": 0.17, "grad_norm": 1.2244946956634521, "learning_rate": 9.47603406358803e-06, "loss": 0.5794, "step": 1348 }, { "epoch": 0.17, "grad_norm": 1.1591740846633911, "learning_rate": 9.47510893990092e-06, "loss": 0.6477, "step": 1349 }, { "epoch": 0.17, "grad_norm": 1.1715471744537354, "learning_rate": 9.474183045467255e-06, "loss": 0.6397, "step": 1350 }, { "epoch": 0.17, "grad_norm": 1.2296302318572998, "learning_rate": 9.473256380446501e-06, "loss": 0.7369, "step": 1351 }, { "epoch": 0.17, "grad_norm": 1.7023544311523438, "learning_rate": 9.472328944998256e-06, "loss": 0.53, "step": 1352 }, { "epoch": 0.17, "grad_norm": 1.0957367420196533, "learning_rate": 9.471400739282258e-06, "loss": 0.5876, "step": 1353 }, { "epoch": 0.17, "grad_norm": 1.7862164974212646, "learning_rate": 9.470471763458364e-06, "loss": 0.666, "step": 1354 }, { "epoch": 0.17, "grad_norm": 1.7836625576019287, "learning_rate": 9.469542017686574e-06, "loss": 0.6425, "step": 1355 }, { "epoch": 0.17, "grad_norm": 1.832993984222412, "learning_rate": 9.468611502127021e-06, "loss": 0.5768, "step": 1356 }, { "epoch": 0.17, "grad_norm": 1.1718406677246094, "learning_rate": 9.467680216939964e-06, "loss": 0.5895, "step": 1357 }, { "epoch": 0.17, "grad_norm": 1.15514075756073, "learning_rate": 9.466748162285797e-06, "loss": 0.5945, "step": 1358 }, { "epoch": 0.17, "grad_norm": 1.2014521360397339, "learning_rate": 9.46581533832505e-06, "loss": 0.6036, "step": 1359 }, { "epoch": 0.17, "grad_norm": 1.222567081451416, "learning_rate": 9.464881745218382e-06, "loss": 0.6194, "step": 1360 }, { "epoch": 0.17, "grad_norm": 1.3445652723312378, "learning_rate": 9.463947383126586e-06, "loss": 0.6184, "step": 1361 }, { "epoch": 0.17, "grad_norm": 1.1077783107757568, "learning_rate": 9.463012252210586e-06, "loss": 0.6802, "step": 1362 }, { "epoch": 0.17, "grad_norm": 1.173924446105957, "learning_rate": 9.46207635263144e-06, "loss": 0.6404, "step": 1363 }, { "epoch": 0.17, "grad_norm": 1.214298129081726, "learning_rate": 9.461139684550335e-06, "loss": 0.6512, "step": 1364 }, { "epoch": 0.17, "grad_norm": 1.7848788499832153, "learning_rate": 9.460202248128598e-06, "loss": 0.5918, "step": 1365 }, { "epoch": 0.18, "grad_norm": 1.0078115463256836, "learning_rate": 9.45926404352768e-06, "loss": 0.517, "step": 1366 }, { "epoch": 0.18, "grad_norm": 1.179234504699707, "learning_rate": 9.458325070909169e-06, "loss": 0.7112, "step": 1367 }, { "epoch": 0.18, "grad_norm": 1.8004214763641357, "learning_rate": 9.457385330434782e-06, "loss": 0.6046, "step": 1368 }, { "epoch": 0.18, "grad_norm": 1.1646499633789062, "learning_rate": 9.456444822266373e-06, "loss": 0.5833, "step": 1369 }, { "epoch": 0.18, "grad_norm": 1.1174657344818115, "learning_rate": 9.455503546565923e-06, "loss": 0.6349, "step": 1370 }, { "epoch": 0.18, "grad_norm": 1.5000091791152954, "learning_rate": 9.45456150349555e-06, "loss": 0.6107, "step": 1371 }, { "epoch": 0.18, "grad_norm": 1.1921900510787964, "learning_rate": 9.453618693217498e-06, "loss": 0.6262, "step": 1372 }, { "epoch": 0.18, "grad_norm": 1.1785988807678223, "learning_rate": 9.452675115894151e-06, "loss": 0.6661, "step": 1373 }, { "epoch": 0.18, "grad_norm": 1.1983801126480103, "learning_rate": 9.45173077168802e-06, "loss": 0.6726, "step": 1374 }, { "epoch": 0.18, "grad_norm": 1.2910903692245483, "learning_rate": 9.450785660761747e-06, "loss": 0.6717, "step": 1375 }, { "epoch": 0.18, "grad_norm": 1.1190389394760132, "learning_rate": 9.44983978327811e-06, "loss": 0.6953, "step": 1376 }, { "epoch": 0.18, "grad_norm": 1.6207910776138306, "learning_rate": 9.448893139400016e-06, "loss": 0.6883, "step": 1377 }, { "epoch": 0.18, "grad_norm": 1.0816600322723389, "learning_rate": 9.447945729290507e-06, "loss": 0.6447, "step": 1378 }, { "epoch": 0.18, "grad_norm": 2.4702744483947754, "learning_rate": 9.446997553112753e-06, "loss": 0.5832, "step": 1379 }, { "epoch": 0.18, "grad_norm": 0.8984103202819824, "learning_rate": 9.446048611030061e-06, "loss": 0.6695, "step": 1380 }, { "epoch": 0.18, "grad_norm": 1.11966073513031, "learning_rate": 9.445098903205863e-06, "loss": 0.6151, "step": 1381 }, { "epoch": 0.18, "grad_norm": 1.413903832435608, "learning_rate": 9.44414842980373e-06, "loss": 0.5619, "step": 1382 }, { "epoch": 0.18, "grad_norm": 1.174336314201355, "learning_rate": 9.443197190987359e-06, "loss": 0.7763, "step": 1383 }, { "epoch": 0.18, "grad_norm": 1.1478570699691772, "learning_rate": 9.442245186920585e-06, "loss": 0.5953, "step": 1384 }, { "epoch": 0.18, "grad_norm": 1.0508737564086914, "learning_rate": 9.44129241776737e-06, "loss": 0.6232, "step": 1385 }, { "epoch": 0.18, "grad_norm": 1.4047367572784424, "learning_rate": 9.440338883691807e-06, "loss": 0.611, "step": 1386 }, { "epoch": 0.18, "grad_norm": 1.1691255569458008, "learning_rate": 9.439384584858125e-06, "loss": 0.6286, "step": 1387 }, { "epoch": 0.18, "grad_norm": 1.2848888635635376, "learning_rate": 9.43842952143068e-06, "loss": 0.6801, "step": 1388 }, { "epoch": 0.18, "grad_norm": 1.1584075689315796, "learning_rate": 9.437473693573969e-06, "loss": 0.6356, "step": 1389 }, { "epoch": 0.18, "grad_norm": 1.8564170598983765, "learning_rate": 9.436517101452607e-06, "loss": 0.6309, "step": 1390 }, { "epoch": 0.18, "grad_norm": 1.3005411624908447, "learning_rate": 9.435559745231348e-06, "loss": 0.6847, "step": 1391 }, { "epoch": 0.18, "grad_norm": 1.392625331878662, "learning_rate": 9.434601625075082e-06, "loss": 0.5639, "step": 1392 }, { "epoch": 0.18, "grad_norm": 1.3917368650436401, "learning_rate": 9.43364274114882e-06, "loss": 0.7253, "step": 1393 }, { "epoch": 0.18, "grad_norm": 1.6492226123809814, "learning_rate": 9.432683093617716e-06, "loss": 0.6228, "step": 1394 }, { "epoch": 0.18, "grad_norm": 1.330742359161377, "learning_rate": 9.431722682647044e-06, "loss": 0.7072, "step": 1395 }, { "epoch": 0.18, "grad_norm": 1.6217615604400635, "learning_rate": 9.43076150840222e-06, "loss": 0.5747, "step": 1396 }, { "epoch": 0.18, "grad_norm": 1.2197303771972656, "learning_rate": 9.429799571048784e-06, "loss": 0.6557, "step": 1397 }, { "epoch": 0.18, "grad_norm": 1.0048884153366089, "learning_rate": 9.428836870752411e-06, "loss": 0.707, "step": 1398 }, { "epoch": 0.18, "grad_norm": 1.2903577089309692, "learning_rate": 9.427873407678907e-06, "loss": 0.7498, "step": 1399 }, { "epoch": 0.18, "grad_norm": 1.230726718902588, "learning_rate": 9.426909181994208e-06, "loss": 0.6526, "step": 1400 }, { "epoch": 0.18, "grad_norm": 1.1689950227737427, "learning_rate": 9.425944193864382e-06, "loss": 0.6607, "step": 1401 }, { "epoch": 0.18, "grad_norm": 1.2050648927688599, "learning_rate": 9.42497844345563e-06, "loss": 0.7587, "step": 1402 }, { "epoch": 0.18, "grad_norm": 1.1159378290176392, "learning_rate": 9.424011930934283e-06, "loss": 0.6073, "step": 1403 }, { "epoch": 0.18, "grad_norm": 1.159040927886963, "learning_rate": 9.4230446564668e-06, "loss": 0.6005, "step": 1404 }, { "epoch": 0.18, "grad_norm": 1.3513133525848389, "learning_rate": 9.422076620219777e-06, "loss": 0.6888, "step": 1405 }, { "epoch": 0.18, "grad_norm": 1.4324787855148315, "learning_rate": 9.42110782235994e-06, "loss": 0.6781, "step": 1406 }, { "epoch": 0.18, "grad_norm": 1.0883930921554565, "learning_rate": 9.420138263054143e-06, "loss": 0.6243, "step": 1407 }, { "epoch": 0.18, "grad_norm": 1.2407677173614502, "learning_rate": 9.419167942469372e-06, "loss": 0.6807, "step": 1408 }, { "epoch": 0.18, "grad_norm": 1.275864839553833, "learning_rate": 9.418196860772746e-06, "loss": 0.6301, "step": 1409 }, { "epoch": 0.18, "grad_norm": 2.2127833366394043, "learning_rate": 9.417225018131513e-06, "loss": 0.6004, "step": 1410 }, { "epoch": 0.18, "grad_norm": 1.683578610420227, "learning_rate": 9.416252414713056e-06, "loss": 0.5999, "step": 1411 }, { "epoch": 0.18, "grad_norm": 1.1180976629257202, "learning_rate": 9.415279050684882e-06, "loss": 0.6629, "step": 1412 }, { "epoch": 0.18, "grad_norm": 1.122758388519287, "learning_rate": 9.414304926214637e-06, "loss": 0.582, "step": 1413 }, { "epoch": 0.18, "grad_norm": 1.2862956523895264, "learning_rate": 9.413330041470092e-06, "loss": 0.5903, "step": 1414 }, { "epoch": 0.18, "grad_norm": 1.222602367401123, "learning_rate": 9.412354396619151e-06, "loss": 0.6605, "step": 1415 }, { "epoch": 0.18, "grad_norm": 1.1338920593261719, "learning_rate": 9.411377991829851e-06, "loss": 0.5989, "step": 1416 }, { "epoch": 0.18, "grad_norm": 1.2186603546142578, "learning_rate": 9.410400827270356e-06, "loss": 0.669, "step": 1417 }, { "epoch": 0.18, "grad_norm": 1.038238763809204, "learning_rate": 9.409422903108963e-06, "loss": 0.6425, "step": 1418 }, { "epoch": 0.18, "grad_norm": 1.0504441261291504, "learning_rate": 9.4084442195141e-06, "loss": 0.5588, "step": 1419 }, { "epoch": 0.18, "grad_norm": 1.3752822875976562, "learning_rate": 9.407464776654326e-06, "loss": 0.5757, "step": 1420 }, { "epoch": 0.18, "grad_norm": 1.1799776554107666, "learning_rate": 9.406484574698328e-06, "loss": 0.7499, "step": 1421 }, { "epoch": 0.18, "grad_norm": 1.192615270614624, "learning_rate": 9.405503613814927e-06, "loss": 0.4951, "step": 1422 }, { "epoch": 0.18, "grad_norm": 1.10984206199646, "learning_rate": 9.404521894173075e-06, "loss": 0.5502, "step": 1423 }, { "epoch": 0.18, "grad_norm": 1.2776201963424683, "learning_rate": 9.403539415941852e-06, "loss": 0.7015, "step": 1424 }, { "epoch": 0.18, "grad_norm": 1.2026793956756592, "learning_rate": 9.402556179290468e-06, "loss": 0.6019, "step": 1425 }, { "epoch": 0.18, "grad_norm": 1.2137603759765625, "learning_rate": 9.401572184388268e-06, "loss": 0.6522, "step": 1426 }, { "epoch": 0.18, "grad_norm": 1.404771327972412, "learning_rate": 9.400587431404726e-06, "loss": 0.6633, "step": 1427 }, { "epoch": 0.18, "grad_norm": 1.0881354808807373, "learning_rate": 9.399601920509442e-06, "loss": 0.6346, "step": 1428 }, { "epoch": 0.18, "grad_norm": 1.1489696502685547, "learning_rate": 9.398615651872154e-06, "loss": 0.5722, "step": 1429 }, { "epoch": 0.18, "grad_norm": 1.202345609664917, "learning_rate": 9.397628625662724e-06, "loss": 0.6491, "step": 1430 }, { "epoch": 0.18, "grad_norm": 1.0614575147628784, "learning_rate": 9.39664084205115e-06, "loss": 0.6658, "step": 1431 }, { "epoch": 0.18, "grad_norm": 1.1777621507644653, "learning_rate": 9.395652301207556e-06, "loss": 0.6096, "step": 1432 }, { "epoch": 0.18, "grad_norm": 0.9109401106834412, "learning_rate": 9.394663003302197e-06, "loss": 0.5908, "step": 1433 }, { "epoch": 0.18, "grad_norm": 1.0313465595245361, "learning_rate": 9.393672948505461e-06, "loss": 0.6819, "step": 1434 }, { "epoch": 0.18, "grad_norm": 1.7536683082580566, "learning_rate": 9.392682136987865e-06, "loss": 0.6045, "step": 1435 }, { "epoch": 0.18, "grad_norm": 1.0943320989608765, "learning_rate": 9.391690568920055e-06, "loss": 0.5611, "step": 1436 }, { "epoch": 0.18, "grad_norm": 1.105167031288147, "learning_rate": 9.390698244472808e-06, "loss": 0.6414, "step": 1437 }, { "epoch": 0.18, "grad_norm": 1.1738401651382446, "learning_rate": 9.389705163817034e-06, "loss": 0.7103, "step": 1438 }, { "epoch": 0.18, "grad_norm": 1.1961818933486938, "learning_rate": 9.388711327123769e-06, "loss": 0.6541, "step": 1439 }, { "epoch": 0.18, "grad_norm": 1.2521352767944336, "learning_rate": 9.38771673456418e-06, "loss": 0.6377, "step": 1440 }, { "epoch": 0.18, "grad_norm": 1.9367631673812866, "learning_rate": 9.386721386309569e-06, "loss": 0.6122, "step": 1441 }, { "epoch": 0.18, "grad_norm": 1.1017770767211914, "learning_rate": 9.385725282531364e-06, "loss": 0.5612, "step": 1442 }, { "epoch": 0.18, "grad_norm": 1.280482530593872, "learning_rate": 9.38472842340112e-06, "loss": 0.672, "step": 1443 }, { "epoch": 0.18, "grad_norm": 1.5452642440795898, "learning_rate": 9.383730809090528e-06, "loss": 0.6426, "step": 1444 }, { "epoch": 0.19, "grad_norm": 1.0411386489868164, "learning_rate": 9.382732439771409e-06, "loss": 0.5851, "step": 1445 }, { "epoch": 0.19, "grad_norm": 1.0093711614608765, "learning_rate": 9.381733315615708e-06, "loss": 0.662, "step": 1446 }, { "epoch": 0.19, "grad_norm": 1.6546435356140137, "learning_rate": 9.380733436795506e-06, "loss": 0.5422, "step": 1447 }, { "epoch": 0.19, "grad_norm": 1.1968332529067993, "learning_rate": 9.379732803483011e-06, "loss": 0.6345, "step": 1448 }, { "epoch": 0.19, "grad_norm": 1.473617672920227, "learning_rate": 9.378731415850561e-06, "loss": 0.5548, "step": 1449 }, { "epoch": 0.19, "grad_norm": 1.1579792499542236, "learning_rate": 9.377729274070627e-06, "loss": 0.6089, "step": 1450 }, { "epoch": 0.19, "grad_norm": 1.1918591260910034, "learning_rate": 9.376726378315806e-06, "loss": 0.5618, "step": 1451 }, { "epoch": 0.19, "grad_norm": 1.340204119682312, "learning_rate": 9.375722728758826e-06, "loss": 0.5787, "step": 1452 }, { "epoch": 0.19, "grad_norm": 1.30586576461792, "learning_rate": 9.374718325572547e-06, "loss": 0.683, "step": 1453 }, { "epoch": 0.19, "grad_norm": 1.144736409187317, "learning_rate": 9.373713168929954e-06, "loss": 0.6424, "step": 1454 }, { "epoch": 0.19, "grad_norm": 1.587199091911316, "learning_rate": 9.372707259004168e-06, "loss": 0.6204, "step": 1455 }, { "epoch": 0.19, "grad_norm": 1.1502057313919067, "learning_rate": 9.371700595968437e-06, "loss": 0.6197, "step": 1456 }, { "epoch": 0.19, "grad_norm": 1.1308425664901733, "learning_rate": 9.370693179996133e-06, "loss": 0.5867, "step": 1457 }, { "epoch": 0.19, "grad_norm": 1.8421180248260498, "learning_rate": 9.369685011260768e-06, "loss": 0.6537, "step": 1458 }, { "epoch": 0.19, "grad_norm": 1.224798560142517, "learning_rate": 9.368676089935978e-06, "loss": 0.6643, "step": 1459 }, { "epoch": 0.19, "grad_norm": 1.2730331420898438, "learning_rate": 9.367666416195526e-06, "loss": 0.6882, "step": 1460 }, { "epoch": 0.19, "grad_norm": 1.4292099475860596, "learning_rate": 9.366655990213311e-06, "loss": 0.6895, "step": 1461 }, { "epoch": 0.19, "grad_norm": 1.8112746477127075, "learning_rate": 9.365644812163356e-06, "loss": 0.7695, "step": 1462 }, { "epoch": 0.19, "grad_norm": 1.171579360961914, "learning_rate": 9.36463288221982e-06, "loss": 0.648, "step": 1463 }, { "epoch": 0.19, "grad_norm": 1.1482553482055664, "learning_rate": 9.363620200556983e-06, "loss": 0.6616, "step": 1464 }, { "epoch": 0.19, "grad_norm": 1.1553841829299927, "learning_rate": 9.36260676734926e-06, "loss": 0.5983, "step": 1465 }, { "epoch": 0.19, "grad_norm": 1.32172691822052, "learning_rate": 9.361592582771195e-06, "loss": 0.5921, "step": 1466 }, { "epoch": 0.19, "grad_norm": 0.9891936779022217, "learning_rate": 9.36057764699746e-06, "loss": 0.6451, "step": 1467 }, { "epoch": 0.19, "grad_norm": 1.3381367921829224, "learning_rate": 9.359561960202857e-06, "loss": 0.6476, "step": 1468 }, { "epoch": 0.19, "grad_norm": 1.140139102935791, "learning_rate": 9.35854552256232e-06, "loss": 0.6169, "step": 1469 }, { "epoch": 0.19, "grad_norm": 1.0977221727371216, "learning_rate": 9.357528334250905e-06, "loss": 0.5992, "step": 1470 }, { "epoch": 0.19, "grad_norm": 1.3049180507659912, "learning_rate": 9.356510395443804e-06, "loss": 0.5462, "step": 1471 }, { "epoch": 0.19, "grad_norm": 1.2993820905685425, "learning_rate": 9.35549170631634e-06, "loss": 0.6289, "step": 1472 }, { "epoch": 0.19, "grad_norm": 1.1152647733688354, "learning_rate": 9.354472267043955e-06, "loss": 0.6773, "step": 1473 }, { "epoch": 0.19, "grad_norm": 1.593841552734375, "learning_rate": 9.353452077802233e-06, "loss": 0.6024, "step": 1474 }, { "epoch": 0.19, "grad_norm": 1.0295051336288452, "learning_rate": 9.352431138766875e-06, "loss": 0.556, "step": 1475 }, { "epoch": 0.19, "grad_norm": 1.2775508165359497, "learning_rate": 9.35140945011372e-06, "loss": 0.6177, "step": 1476 }, { "epoch": 0.19, "grad_norm": 1.1952461004257202, "learning_rate": 9.350387012018734e-06, "loss": 0.6117, "step": 1477 }, { "epoch": 0.19, "grad_norm": 1.298227071762085, "learning_rate": 9.34936382465801e-06, "loss": 0.6345, "step": 1478 }, { "epoch": 0.19, "grad_norm": 1.2173748016357422, "learning_rate": 9.348339888207771e-06, "loss": 0.6348, "step": 1479 }, { "epoch": 0.19, "grad_norm": 1.5317448377609253, "learning_rate": 9.347315202844371e-06, "loss": 0.6326, "step": 1480 }, { "epoch": 0.19, "grad_norm": 1.0272226333618164, "learning_rate": 9.346289768744288e-06, "loss": 0.7068, "step": 1481 }, { "epoch": 0.19, "grad_norm": 0.982128381729126, "learning_rate": 9.345263586084135e-06, "loss": 0.5463, "step": 1482 }, { "epoch": 0.19, "grad_norm": 0.9588432908058167, "learning_rate": 9.344236655040649e-06, "loss": 0.6088, "step": 1483 }, { "epoch": 0.19, "grad_norm": 1.1015230417251587, "learning_rate": 9.343208975790699e-06, "loss": 0.6073, "step": 1484 }, { "epoch": 0.19, "grad_norm": 1.1615246534347534, "learning_rate": 9.342180548511283e-06, "loss": 0.5965, "step": 1485 }, { "epoch": 0.19, "grad_norm": 1.2907776832580566, "learning_rate": 9.341151373379527e-06, "loss": 0.6704, "step": 1486 }, { "epoch": 0.19, "grad_norm": 1.2365469932556152, "learning_rate": 9.340121450572681e-06, "loss": 0.732, "step": 1487 }, { "epoch": 0.19, "grad_norm": 1.2405096292495728, "learning_rate": 9.339090780268133e-06, "loss": 0.5716, "step": 1488 }, { "epoch": 0.19, "grad_norm": 1.2856647968292236, "learning_rate": 9.338059362643393e-06, "loss": 0.6443, "step": 1489 }, { "epoch": 0.19, "grad_norm": 1.0891637802124023, "learning_rate": 9.337027197876103e-06, "loss": 0.6259, "step": 1490 }, { "epoch": 0.19, "grad_norm": 1.1378039121627808, "learning_rate": 9.33599428614403e-06, "loss": 0.6413, "step": 1491 }, { "epoch": 0.19, "grad_norm": 1.0595463514328003, "learning_rate": 9.334960627625075e-06, "loss": 0.593, "step": 1492 }, { "epoch": 0.19, "grad_norm": 1.307681918144226, "learning_rate": 9.333926222497263e-06, "loss": 0.5555, "step": 1493 }, { "epoch": 0.19, "grad_norm": 2.9554941654205322, "learning_rate": 9.332891070938749e-06, "loss": 0.6792, "step": 1494 }, { "epoch": 0.19, "grad_norm": 1.0907893180847168, "learning_rate": 9.331855173127817e-06, "loss": 0.6127, "step": 1495 }, { "epoch": 0.19, "grad_norm": 1.8585306406021118, "learning_rate": 9.33081852924288e-06, "loss": 0.5657, "step": 1496 }, { "epoch": 0.19, "grad_norm": 1.3523632287979126, "learning_rate": 9.329781139462479e-06, "loss": 0.6613, "step": 1497 }, { "epoch": 0.19, "grad_norm": 1.0348542928695679, "learning_rate": 9.328743003965283e-06, "loss": 0.5763, "step": 1498 }, { "epoch": 0.19, "grad_norm": 1.102908968925476, "learning_rate": 9.32770412293009e-06, "loss": 0.6235, "step": 1499 }, { "epoch": 0.19, "grad_norm": 4.521458148956299, "learning_rate": 9.326664496535825e-06, "loss": 0.7002, "step": 1500 }, { "epoch": 0.19, "grad_norm": 1.6622076034545898, "learning_rate": 9.325624124961542e-06, "loss": 0.587, "step": 1501 }, { "epoch": 0.19, "grad_norm": 1.0094268321990967, "learning_rate": 9.324583008386425e-06, "loss": 0.6609, "step": 1502 }, { "epoch": 0.19, "grad_norm": 1.205629825592041, "learning_rate": 9.323541146989788e-06, "loss": 0.5841, "step": 1503 }, { "epoch": 0.19, "grad_norm": 1.3504383563995361, "learning_rate": 9.322498540951067e-06, "loss": 0.7027, "step": 1504 }, { "epoch": 0.19, "grad_norm": 1.421724557876587, "learning_rate": 9.321455190449828e-06, "loss": 0.6843, "step": 1505 }, { "epoch": 0.19, "grad_norm": 1.1611723899841309, "learning_rate": 9.32041109566577e-06, "loss": 0.6121, "step": 1506 }, { "epoch": 0.19, "grad_norm": 1.239842176437378, "learning_rate": 9.319366256778717e-06, "loss": 0.7677, "step": 1507 }, { "epoch": 0.19, "grad_norm": 1.2777602672576904, "learning_rate": 9.318320673968622e-06, "loss": 0.7695, "step": 1508 }, { "epoch": 0.19, "grad_norm": 1.2827951908111572, "learning_rate": 9.317274347415561e-06, "loss": 0.6129, "step": 1509 }, { "epoch": 0.19, "grad_norm": 1.05486261844635, "learning_rate": 9.316227277299748e-06, "loss": 0.6093, "step": 1510 }, { "epoch": 0.19, "grad_norm": 1.1314477920532227, "learning_rate": 9.315179463801518e-06, "loss": 0.6235, "step": 1511 }, { "epoch": 0.19, "grad_norm": 1.0239319801330566, "learning_rate": 9.314130907101332e-06, "loss": 0.5927, "step": 1512 }, { "epoch": 0.19, "grad_norm": 1.2008589506149292, "learning_rate": 9.313081607379786e-06, "loss": 0.6737, "step": 1513 }, { "epoch": 0.19, "grad_norm": 1.1876063346862793, "learning_rate": 9.3120315648176e-06, "loss": 0.5764, "step": 1514 }, { "epoch": 0.19, "grad_norm": 1.021418571472168, "learning_rate": 9.310980779595623e-06, "loss": 0.6656, "step": 1515 }, { "epoch": 0.19, "grad_norm": 0.923373818397522, "learning_rate": 9.309929251894828e-06, "loss": 0.6674, "step": 1516 }, { "epoch": 0.19, "grad_norm": 1.0926281213760376, "learning_rate": 9.308876981896326e-06, "loss": 0.6407, "step": 1517 }, { "epoch": 0.19, "grad_norm": 1.034195065498352, "learning_rate": 9.307823969781342e-06, "loss": 0.6774, "step": 1518 }, { "epoch": 0.19, "grad_norm": 1.0667943954467773, "learning_rate": 9.30677021573124e-06, "loss": 0.7005, "step": 1519 }, { "epoch": 0.19, "grad_norm": 1.644808292388916, "learning_rate": 9.305715719927507e-06, "loss": 0.6978, "step": 1520 }, { "epoch": 0.19, "grad_norm": 1.2867639064788818, "learning_rate": 9.30466048255176e-06, "loss": 0.6115, "step": 1521 }, { "epoch": 0.19, "grad_norm": 1.1789501905441284, "learning_rate": 9.303604503785737e-06, "loss": 0.7755, "step": 1522 }, { "epoch": 0.2, "grad_norm": 1.2231398820877075, "learning_rate": 9.302547783811312e-06, "loss": 0.741, "step": 1523 }, { "epoch": 0.2, "grad_norm": 1.3856559991836548, "learning_rate": 9.301490322810487e-06, "loss": 0.632, "step": 1524 }, { "epoch": 0.2, "grad_norm": 1.1552003622055054, "learning_rate": 9.300432120965384e-06, "loss": 0.6781, "step": 1525 }, { "epoch": 0.2, "grad_norm": 1.2242902517318726, "learning_rate": 9.299373178458255e-06, "loss": 0.6174, "step": 1526 }, { "epoch": 0.2, "grad_norm": 1.118219017982483, "learning_rate": 9.298313495471486e-06, "loss": 0.6034, "step": 1527 }, { "epoch": 0.2, "grad_norm": 1.7142438888549805, "learning_rate": 9.297253072187585e-06, "loss": 0.6345, "step": 1528 }, { "epoch": 0.2, "grad_norm": 1.1490740776062012, "learning_rate": 9.296191908789186e-06, "loss": 0.6571, "step": 1529 }, { "epoch": 0.2, "grad_norm": 1.1473088264465332, "learning_rate": 9.295130005459055e-06, "loss": 0.5983, "step": 1530 }, { "epoch": 0.2, "grad_norm": 1.0597072839736938, "learning_rate": 9.294067362380081e-06, "loss": 0.6132, "step": 1531 }, { "epoch": 0.2, "grad_norm": 1.013634443283081, "learning_rate": 9.293003979735284e-06, "loss": 0.5525, "step": 1532 }, { "epoch": 0.2, "grad_norm": 1.376117467880249, "learning_rate": 9.291939857707812e-06, "loss": 0.5656, "step": 1533 }, { "epoch": 0.2, "grad_norm": 1.3028066158294678, "learning_rate": 9.290874996480935e-06, "loss": 0.6226, "step": 1534 }, { "epoch": 0.2, "grad_norm": 1.4443479776382446, "learning_rate": 9.289809396238054e-06, "loss": 0.5352, "step": 1535 }, { "epoch": 0.2, "grad_norm": 1.0546255111694336, "learning_rate": 9.288743057162698e-06, "loss": 0.586, "step": 1536 }, { "epoch": 0.2, "grad_norm": 1.3574661016464233, "learning_rate": 9.287675979438526e-06, "loss": 0.6401, "step": 1537 }, { "epoch": 0.2, "grad_norm": 1.0744688510894775, "learning_rate": 9.286608163249314e-06, "loss": 0.6846, "step": 1538 }, { "epoch": 0.2, "grad_norm": 1.2303810119628906, "learning_rate": 9.285539608778976e-06, "loss": 0.6477, "step": 1539 }, { "epoch": 0.2, "grad_norm": 1.0581923723220825, "learning_rate": 9.284470316211545e-06, "loss": 0.6514, "step": 1540 }, { "epoch": 0.2, "grad_norm": 1.0219451189041138, "learning_rate": 9.283400285731188e-06, "loss": 0.6836, "step": 1541 }, { "epoch": 0.2, "grad_norm": 1.6948091983795166, "learning_rate": 9.282329517522196e-06, "loss": 0.6725, "step": 1542 }, { "epoch": 0.2, "grad_norm": 1.1440834999084473, "learning_rate": 9.281258011768985e-06, "loss": 0.5548, "step": 1543 }, { "epoch": 0.2, "grad_norm": 1.2690272331237793, "learning_rate": 9.280185768656103e-06, "loss": 0.5611, "step": 1544 }, { "epoch": 0.2, "grad_norm": 1.544965386390686, "learning_rate": 9.279112788368218e-06, "loss": 0.6296, "step": 1545 }, { "epoch": 0.2, "grad_norm": 1.2523674964904785, "learning_rate": 9.278039071090135e-06, "loss": 0.671, "step": 1546 }, { "epoch": 0.2, "grad_norm": 1.0705721378326416, "learning_rate": 9.276964617006772e-06, "loss": 0.653, "step": 1547 }, { "epoch": 0.2, "grad_norm": 1.2598583698272705, "learning_rate": 9.27588942630319e-06, "loss": 0.6127, "step": 1548 }, { "epoch": 0.2, "grad_norm": 1.0954736471176147, "learning_rate": 9.274813499164563e-06, "loss": 0.5496, "step": 1549 }, { "epoch": 0.2, "grad_norm": 1.3450437784194946, "learning_rate": 9.273736835776199e-06, "loss": 0.6587, "step": 1550 }, { "epoch": 0.2, "grad_norm": 1.1436505317687988, "learning_rate": 9.272659436323535e-06, "loss": 0.5947, "step": 1551 }, { "epoch": 0.2, "grad_norm": 1.2184737920761108, "learning_rate": 9.271581300992125e-06, "loss": 0.6579, "step": 1552 }, { "epoch": 0.2, "grad_norm": 1.0481288433074951, "learning_rate": 9.27050242996766e-06, "loss": 0.5941, "step": 1553 }, { "epoch": 0.2, "grad_norm": 1.0766609907150269, "learning_rate": 9.269422823435953e-06, "loss": 0.7407, "step": 1554 }, { "epoch": 0.2, "grad_norm": 1.0673633813858032, "learning_rate": 9.268342481582944e-06, "loss": 0.6136, "step": 1555 }, { "epoch": 0.2, "grad_norm": 1.2412662506103516, "learning_rate": 9.267261404594698e-06, "loss": 0.6064, "step": 1556 }, { "epoch": 0.2, "grad_norm": 1.0127815008163452, "learning_rate": 9.266179592657414e-06, "loss": 0.5653, "step": 1557 }, { "epoch": 0.2, "grad_norm": 1.0416415929794312, "learning_rate": 9.265097045957405e-06, "loss": 0.6664, "step": 1558 }, { "epoch": 0.2, "grad_norm": 1.0409266948699951, "learning_rate": 9.264013764681123e-06, "loss": 0.5291, "step": 1559 }, { "epoch": 0.2, "grad_norm": 1.155242919921875, "learning_rate": 9.26292974901514e-06, "loss": 0.6171, "step": 1560 }, { "epoch": 0.2, "grad_norm": 1.2648565769195557, "learning_rate": 9.261844999146153e-06, "loss": 0.618, "step": 1561 }, { "epoch": 0.2, "grad_norm": 1.3313794136047363, "learning_rate": 9.260759515260991e-06, "loss": 0.6499, "step": 1562 }, { "epoch": 0.2, "grad_norm": 0.9920714497566223, "learning_rate": 9.259673297546606e-06, "loss": 0.6458, "step": 1563 }, { "epoch": 0.2, "grad_norm": 0.902812123298645, "learning_rate": 9.258586346190077e-06, "loss": 0.5976, "step": 1564 }, { "epoch": 0.2, "grad_norm": 1.122768521308899, "learning_rate": 9.257498661378608e-06, "loss": 0.6489, "step": 1565 }, { "epoch": 0.2, "grad_norm": 1.1595557928085327, "learning_rate": 9.256410243299532e-06, "loss": 0.6044, "step": 1566 }, { "epoch": 0.2, "grad_norm": 1.0857417583465576, "learning_rate": 9.255321092140305e-06, "loss": 0.6606, "step": 1567 }, { "epoch": 0.2, "grad_norm": 1.2797273397445679, "learning_rate": 9.254231208088514e-06, "loss": 0.5624, "step": 1568 }, { "epoch": 0.2, "grad_norm": 1.0136442184448242, "learning_rate": 9.253140591331868e-06, "loss": 0.6768, "step": 1569 }, { "epoch": 0.2, "grad_norm": 1.0762922763824463, "learning_rate": 9.252049242058202e-06, "loss": 0.693, "step": 1570 }, { "epoch": 0.2, "grad_norm": 1.4632151126861572, "learning_rate": 9.250957160455483e-06, "loss": 0.658, "step": 1571 }, { "epoch": 0.2, "grad_norm": 1.5439205169677734, "learning_rate": 9.249864346711794e-06, "loss": 0.602, "step": 1572 }, { "epoch": 0.2, "grad_norm": 3.6240251064300537, "learning_rate": 9.248770801015355e-06, "loss": 0.6716, "step": 1573 }, { "epoch": 0.2, "grad_norm": 1.2808942794799805, "learning_rate": 9.247676523554503e-06, "loss": 0.6153, "step": 1574 }, { "epoch": 0.2, "grad_norm": 1.0833419561386108, "learning_rate": 9.24658151451771e-06, "loss": 0.5805, "step": 1575 }, { "epoch": 0.2, "grad_norm": 2.3130383491516113, "learning_rate": 9.245485774093563e-06, "loss": 0.6547, "step": 1576 }, { "epoch": 0.2, "grad_norm": 1.1251847743988037, "learning_rate": 9.244389302470785e-06, "loss": 0.7021, "step": 1577 }, { "epoch": 0.2, "grad_norm": 1.2331297397613525, "learning_rate": 9.243292099838222e-06, "loss": 0.7117, "step": 1578 }, { "epoch": 0.2, "grad_norm": 1.0849312543869019, "learning_rate": 9.24219416638484e-06, "loss": 0.6178, "step": 1579 }, { "epoch": 0.2, "grad_norm": 1.2171435356140137, "learning_rate": 9.24109550229974e-06, "loss": 0.568, "step": 1580 }, { "epoch": 0.2, "grad_norm": 0.9565621614456177, "learning_rate": 9.239996107772144e-06, "loss": 0.6697, "step": 1581 }, { "epoch": 0.2, "grad_norm": 1.4817333221435547, "learning_rate": 9.238895982991398e-06, "loss": 0.6317, "step": 1582 }, { "epoch": 0.2, "grad_norm": 1.3165608644485474, "learning_rate": 9.23779512814698e-06, "loss": 0.6251, "step": 1583 }, { "epoch": 0.2, "grad_norm": 1.244059681892395, "learning_rate": 9.236693543428485e-06, "loss": 0.6704, "step": 1584 }, { "epoch": 0.2, "grad_norm": 1.1906737089157104, "learning_rate": 9.235591229025643e-06, "loss": 0.6598, "step": 1585 }, { "epoch": 0.2, "grad_norm": 1.268912434577942, "learning_rate": 9.234488185128304e-06, "loss": 0.5556, "step": 1586 }, { "epoch": 0.2, "grad_norm": 1.1951463222503662, "learning_rate": 9.233384411926442e-06, "loss": 0.7621, "step": 1587 }, { "epoch": 0.2, "grad_norm": 1.4412353038787842, "learning_rate": 9.232279909610163e-06, "loss": 0.7506, "step": 1588 }, { "epoch": 0.2, "grad_norm": 1.2029129266738892, "learning_rate": 9.231174678369695e-06, "loss": 0.6591, "step": 1589 }, { "epoch": 0.2, "grad_norm": 1.1525003910064697, "learning_rate": 9.23006871839539e-06, "loss": 0.6395, "step": 1590 }, { "epoch": 0.2, "grad_norm": 1.305783748626709, "learning_rate": 9.228962029877724e-06, "loss": 0.6779, "step": 1591 }, { "epoch": 0.2, "grad_norm": 1.1824591159820557, "learning_rate": 9.227854613007308e-06, "loss": 0.7231, "step": 1592 }, { "epoch": 0.2, "grad_norm": 1.8407436609268188, "learning_rate": 9.22674646797487e-06, "loss": 0.5819, "step": 1593 }, { "epoch": 0.2, "grad_norm": 1.2329612970352173, "learning_rate": 9.225637594971265e-06, "loss": 0.7076, "step": 1594 }, { "epoch": 0.2, "grad_norm": 1.214455246925354, "learning_rate": 9.224527994187471e-06, "loss": 0.6108, "step": 1595 }, { "epoch": 0.2, "grad_norm": 1.1704137325286865, "learning_rate": 9.223417665814599e-06, "loss": 0.6927, "step": 1596 }, { "epoch": 0.2, "grad_norm": 1.149852991104126, "learning_rate": 9.222306610043877e-06, "loss": 0.602, "step": 1597 }, { "epoch": 0.2, "grad_norm": 1.2092525959014893, "learning_rate": 9.221194827066664e-06, "loss": 0.5947, "step": 1598 }, { "epoch": 0.2, "grad_norm": 1.0760937929153442, "learning_rate": 9.22008231707444e-06, "loss": 0.5526, "step": 1599 }, { "epoch": 0.2, "grad_norm": 1.1572587490081787, "learning_rate": 9.218969080258816e-06, "loss": 0.6291, "step": 1600 }, { "epoch": 0.21, "grad_norm": 1.240919828414917, "learning_rate": 9.217855116811519e-06, "loss": 0.6204, "step": 1601 }, { "epoch": 0.21, "grad_norm": 1.2830955982208252, "learning_rate": 9.21674042692441e-06, "loss": 0.6191, "step": 1602 }, { "epoch": 0.21, "grad_norm": 1.104110598564148, "learning_rate": 9.215625010789469e-06, "loss": 0.6335, "step": 1603 }, { "epoch": 0.21, "grad_norm": 1.0325019359588623, "learning_rate": 9.214508868598807e-06, "loss": 0.5526, "step": 1604 }, { "epoch": 0.21, "grad_norm": 1.1608110666275024, "learning_rate": 9.213392000544656e-06, "loss": 0.6592, "step": 1605 }, { "epoch": 0.21, "grad_norm": 1.1397209167480469, "learning_rate": 9.212274406819373e-06, "loss": 0.6102, "step": 1606 }, { "epoch": 0.21, "grad_norm": 2.098311424255371, "learning_rate": 9.211156087615442e-06, "loss": 0.6441, "step": 1607 }, { "epoch": 0.21, "grad_norm": 1.3748606443405151, "learning_rate": 9.210037043125469e-06, "loss": 0.6898, "step": 1608 }, { "epoch": 0.21, "grad_norm": 1.2580170631408691, "learning_rate": 9.208917273542188e-06, "loss": 0.6937, "step": 1609 }, { "epoch": 0.21, "grad_norm": 1.1992050409317017, "learning_rate": 9.207796779058456e-06, "loss": 0.7689, "step": 1610 }, { "epoch": 0.21, "grad_norm": 1.173262596130371, "learning_rate": 9.206675559867254e-06, "loss": 0.5427, "step": 1611 }, { "epoch": 0.21, "grad_norm": 1.1421955823898315, "learning_rate": 9.205553616161692e-06, "loss": 0.648, "step": 1612 }, { "epoch": 0.21, "grad_norm": 1.1522105932235718, "learning_rate": 9.204430948135e-06, "loss": 0.6322, "step": 1613 }, { "epoch": 0.21, "grad_norm": 1.0081772804260254, "learning_rate": 9.203307555980536e-06, "loss": 0.6584, "step": 1614 }, { "epoch": 0.21, "grad_norm": 1.1464192867279053, "learning_rate": 9.20218343989178e-06, "loss": 0.7091, "step": 1615 }, { "epoch": 0.21, "grad_norm": 1.0025413036346436, "learning_rate": 9.20105860006234e-06, "loss": 0.6502, "step": 1616 }, { "epoch": 0.21, "grad_norm": 1.2751092910766602, "learning_rate": 9.199933036685946e-06, "loss": 0.6899, "step": 1617 }, { "epoch": 0.21, "grad_norm": 1.9596514701843262, "learning_rate": 9.198806749956453e-06, "loss": 0.6055, "step": 1618 }, { "epoch": 0.21, "grad_norm": 1.1237578392028809, "learning_rate": 9.197679740067842e-06, "loss": 0.7279, "step": 1619 }, { "epoch": 0.21, "grad_norm": 1.0528181791305542, "learning_rate": 9.196552007214215e-06, "loss": 0.6048, "step": 1620 }, { "epoch": 0.21, "grad_norm": 1.0963466167449951, "learning_rate": 9.195423551589803e-06, "loss": 0.6251, "step": 1621 }, { "epoch": 0.21, "grad_norm": 1.2052900791168213, "learning_rate": 9.194294373388962e-06, "loss": 0.7226, "step": 1622 }, { "epoch": 0.21, "grad_norm": 1.6359459161758423, "learning_rate": 9.193164472806165e-06, "loss": 0.6516, "step": 1623 }, { "epoch": 0.21, "grad_norm": 1.1933037042617798, "learning_rate": 9.192033850036018e-06, "loss": 0.6134, "step": 1624 }, { "epoch": 0.21, "grad_norm": 1.69362211227417, "learning_rate": 9.190902505273247e-06, "loss": 0.6487, "step": 1625 }, { "epoch": 0.21, "grad_norm": 1.2256443500518799, "learning_rate": 9.189770438712701e-06, "loss": 0.5863, "step": 1626 }, { "epoch": 0.21, "grad_norm": 1.431551218032837, "learning_rate": 9.188637650549357e-06, "loss": 0.6076, "step": 1627 }, { "epoch": 0.21, "grad_norm": 1.1389654874801636, "learning_rate": 9.187504140978316e-06, "loss": 0.6828, "step": 1628 }, { "epoch": 0.21, "grad_norm": 1.3692309856414795, "learning_rate": 9.1863699101948e-06, "loss": 0.609, "step": 1629 }, { "epoch": 0.21, "grad_norm": 1.4854897260665894, "learning_rate": 9.18523495839416e-06, "loss": 0.5828, "step": 1630 }, { "epoch": 0.21, "grad_norm": 1.3503080606460571, "learning_rate": 9.184099285771865e-06, "loss": 0.6239, "step": 1631 }, { "epoch": 0.21, "grad_norm": 1.0891162157058716, "learning_rate": 9.182962892523515e-06, "loss": 0.7232, "step": 1632 }, { "epoch": 0.21, "grad_norm": 2.0536813735961914, "learning_rate": 9.181825778844826e-06, "loss": 0.5698, "step": 1633 }, { "epoch": 0.21, "grad_norm": 1.0989372730255127, "learning_rate": 9.180687944931646e-06, "loss": 0.7344, "step": 1634 }, { "epoch": 0.21, "grad_norm": 1.1523288488388062, "learning_rate": 9.179549390979946e-06, "loss": 0.6036, "step": 1635 }, { "epoch": 0.21, "grad_norm": 1.1294163465499878, "learning_rate": 9.178410117185811e-06, "loss": 0.6317, "step": 1636 }, { "epoch": 0.21, "grad_norm": 1.0369744300842285, "learning_rate": 9.177270123745466e-06, "loss": 0.6539, "step": 1637 }, { "epoch": 0.21, "grad_norm": 1.0784070491790771, "learning_rate": 9.176129410855248e-06, "loss": 0.7082, "step": 1638 }, { "epoch": 0.21, "grad_norm": 1.0242846012115479, "learning_rate": 9.17498797871162e-06, "loss": 0.6233, "step": 1639 }, { "epoch": 0.21, "grad_norm": 1.3577667474746704, "learning_rate": 9.173845827511176e-06, "loss": 0.7456, "step": 1640 }, { "epoch": 0.21, "grad_norm": 1.2746890783309937, "learning_rate": 9.172702957450622e-06, "loss": 0.6227, "step": 1641 }, { "epoch": 0.21, "grad_norm": 1.0363303422927856, "learning_rate": 9.171559368726798e-06, "loss": 0.6176, "step": 1642 }, { "epoch": 0.21, "grad_norm": 1.156578540802002, "learning_rate": 9.170415061536661e-06, "loss": 0.5688, "step": 1643 }, { "epoch": 0.21, "grad_norm": 1.0655452013015747, "learning_rate": 9.1692700360773e-06, "loss": 0.6034, "step": 1644 }, { "epoch": 0.21, "grad_norm": 1.3639652729034424, "learning_rate": 9.168124292545917e-06, "loss": 0.6289, "step": 1645 }, { "epoch": 0.21, "grad_norm": 1.4169421195983887, "learning_rate": 9.166977831139845e-06, "loss": 0.6277, "step": 1646 }, { "epoch": 0.21, "grad_norm": 1.0737788677215576, "learning_rate": 9.165830652056537e-06, "loss": 0.5081, "step": 1647 }, { "epoch": 0.21, "grad_norm": 1.884833574295044, "learning_rate": 9.164682755493574e-06, "loss": 0.5832, "step": 1648 }, { "epoch": 0.21, "grad_norm": 1.2018709182739258, "learning_rate": 9.163534141648658e-06, "loss": 0.602, "step": 1649 }, { "epoch": 0.21, "grad_norm": 1.1860284805297852, "learning_rate": 9.162384810719612e-06, "loss": 0.605, "step": 1650 }, { "epoch": 0.21, "grad_norm": 1.697682499885559, "learning_rate": 9.161234762904386e-06, "loss": 0.5906, "step": 1651 }, { "epoch": 0.21, "grad_norm": 1.6724482774734497, "learning_rate": 9.160083998401053e-06, "loss": 0.7244, "step": 1652 }, { "epoch": 0.21, "grad_norm": 1.3993253707885742, "learning_rate": 9.158932517407806e-06, "loss": 0.703, "step": 1653 }, { "epoch": 0.21, "grad_norm": 1.4852396249771118, "learning_rate": 9.15778032012297e-06, "loss": 0.6691, "step": 1654 }, { "epoch": 0.21, "grad_norm": 1.278127670288086, "learning_rate": 9.15662740674498e-06, "loss": 0.6629, "step": 1655 }, { "epoch": 0.21, "grad_norm": 1.3181184530258179, "learning_rate": 9.155473777472408e-06, "loss": 0.6775, "step": 1656 }, { "epoch": 0.21, "grad_norm": 1.3667001724243164, "learning_rate": 9.15431943250394e-06, "loss": 0.5703, "step": 1657 }, { "epoch": 0.21, "grad_norm": 1.3015559911727905, "learning_rate": 9.15316437203839e-06, "loss": 0.65, "step": 1658 }, { "epoch": 0.21, "grad_norm": 1.3116071224212646, "learning_rate": 9.152008596274695e-06, "loss": 0.5931, "step": 1659 }, { "epoch": 0.21, "grad_norm": 1.37589430809021, "learning_rate": 9.15085210541191e-06, "loss": 0.5934, "step": 1660 }, { "epoch": 0.21, "grad_norm": 1.3749538660049438, "learning_rate": 9.149694899649218e-06, "loss": 0.6775, "step": 1661 }, { "epoch": 0.21, "grad_norm": 1.1061633825302124, "learning_rate": 9.148536979185927e-06, "loss": 0.6038, "step": 1662 }, { "epoch": 0.21, "grad_norm": 2.6514101028442383, "learning_rate": 9.147378344221462e-06, "loss": 0.5828, "step": 1663 }, { "epoch": 0.21, "grad_norm": 1.279590129852295, "learning_rate": 9.146218994955378e-06, "loss": 0.548, "step": 1664 }, { "epoch": 0.21, "grad_norm": 1.0067777633666992, "learning_rate": 9.145058931587345e-06, "loss": 0.5791, "step": 1665 }, { "epoch": 0.21, "grad_norm": 1.2838937044143677, "learning_rate": 9.143898154317164e-06, "loss": 0.5998, "step": 1666 }, { "epoch": 0.21, "grad_norm": 1.0655570030212402, "learning_rate": 9.142736663344754e-06, "loss": 0.7026, "step": 1667 }, { "epoch": 0.21, "grad_norm": 1.2458590269088745, "learning_rate": 9.141574458870156e-06, "loss": 0.7518, "step": 1668 }, { "epoch": 0.21, "grad_norm": 1.3690664768218994, "learning_rate": 9.140411541093539e-06, "loss": 0.5947, "step": 1669 }, { "epoch": 0.21, "grad_norm": 1.4112671613693237, "learning_rate": 9.139247910215192e-06, "loss": 0.6794, "step": 1670 }, { "epoch": 0.21, "grad_norm": 1.0723782777786255, "learning_rate": 9.138083566435525e-06, "loss": 0.6019, "step": 1671 }, { "epoch": 0.21, "grad_norm": 1.5119447708129883, "learning_rate": 9.136918509955074e-06, "loss": 0.6187, "step": 1672 }, { "epoch": 0.21, "grad_norm": 1.17743980884552, "learning_rate": 9.135752740974495e-06, "loss": 0.5778, "step": 1673 }, { "epoch": 0.21, "grad_norm": 1.1854586601257324, "learning_rate": 9.13458625969457e-06, "loss": 0.6281, "step": 1674 }, { "epoch": 0.21, "grad_norm": 0.9571298360824585, "learning_rate": 9.133419066316198e-06, "loss": 0.5768, "step": 1675 }, { "epoch": 0.21, "grad_norm": 1.1036064624786377, "learning_rate": 9.13225116104041e-06, "loss": 0.5698, "step": 1676 }, { "epoch": 0.21, "grad_norm": 1.1489185094833374, "learning_rate": 9.131082544068346e-06, "loss": 0.6844, "step": 1677 }, { "epoch": 0.21, "grad_norm": 1.2321306467056274, "learning_rate": 9.129913215601286e-06, "loss": 0.6463, "step": 1678 }, { "epoch": 0.22, "grad_norm": 1.2575052976608276, "learning_rate": 9.128743175840615e-06, "loss": 0.5624, "step": 1679 }, { "epoch": 0.22, "grad_norm": 1.1586254835128784, "learning_rate": 9.127572424987853e-06, "loss": 0.7156, "step": 1680 }, { "epoch": 0.22, "grad_norm": 1.7823494672775269, "learning_rate": 9.126400963244636e-06, "loss": 0.5968, "step": 1681 }, { "epoch": 0.22, "grad_norm": 1.0591415166854858, "learning_rate": 9.125228790812726e-06, "loss": 0.7415, "step": 1682 }, { "epoch": 0.22, "grad_norm": 1.2111726999282837, "learning_rate": 9.124055907894004e-06, "loss": 0.6182, "step": 1683 }, { "epoch": 0.22, "grad_norm": 1.14926278591156, "learning_rate": 9.12288231469048e-06, "loss": 0.6119, "step": 1684 }, { "epoch": 0.22, "grad_norm": 1.0378811359405518, "learning_rate": 9.121708011404275e-06, "loss": 0.6497, "step": 1685 }, { "epoch": 0.22, "grad_norm": 1.4201372861862183, "learning_rate": 9.120532998237642e-06, "loss": 0.7219, "step": 1686 }, { "epoch": 0.22, "grad_norm": 1.1894242763519287, "learning_rate": 9.119357275392954e-06, "loss": 0.7498, "step": 1687 }, { "epoch": 0.22, "grad_norm": 1.3526839017868042, "learning_rate": 9.118180843072705e-06, "loss": 0.6255, "step": 1688 }, { "epoch": 0.22, "grad_norm": 1.2016130685806274, "learning_rate": 9.117003701479508e-06, "loss": 0.6505, "step": 1689 }, { "epoch": 0.22, "grad_norm": 1.1583237648010254, "learning_rate": 9.115825850816106e-06, "loss": 0.5844, "step": 1690 }, { "epoch": 0.22, "grad_norm": 1.1343616247177124, "learning_rate": 9.114647291285358e-06, "loss": 0.6809, "step": 1691 }, { "epoch": 0.22, "grad_norm": 1.766956090927124, "learning_rate": 9.113468023090251e-06, "loss": 0.6212, "step": 1692 }, { "epoch": 0.22, "grad_norm": 1.2435486316680908, "learning_rate": 9.112288046433883e-06, "loss": 0.6192, "step": 1693 }, { "epoch": 0.22, "grad_norm": 1.0251524448394775, "learning_rate": 9.111107361519485e-06, "loss": 0.5751, "step": 1694 }, { "epoch": 0.22, "grad_norm": 1.0255200862884521, "learning_rate": 9.109925968550405e-06, "loss": 0.5642, "step": 1695 }, { "epoch": 0.22, "grad_norm": 1.2505881786346436, "learning_rate": 9.108743867730115e-06, "loss": 0.6587, "step": 1696 }, { "epoch": 0.22, "grad_norm": 1.3478747606277466, "learning_rate": 9.107561059262207e-06, "loss": 0.5907, "step": 1697 }, { "epoch": 0.22, "grad_norm": 1.4244675636291504, "learning_rate": 9.106377543350396e-06, "loss": 0.6059, "step": 1698 }, { "epoch": 0.22, "grad_norm": 0.9292632937431335, "learning_rate": 9.105193320198518e-06, "loss": 0.5866, "step": 1699 }, { "epoch": 0.22, "grad_norm": 0.9981103539466858, "learning_rate": 9.104008390010532e-06, "loss": 0.6674, "step": 1700 }, { "epoch": 0.22, "grad_norm": 1.207874059677124, "learning_rate": 9.102822752990517e-06, "loss": 0.6106, "step": 1701 }, { "epoch": 0.22, "grad_norm": 1.0549036264419556, "learning_rate": 9.101636409342676e-06, "loss": 0.583, "step": 1702 }, { "epoch": 0.22, "grad_norm": 1.4138764142990112, "learning_rate": 9.100449359271333e-06, "loss": 0.7072, "step": 1703 }, { "epoch": 0.22, "grad_norm": 1.2411020994186401, "learning_rate": 9.099261602980933e-06, "loss": 0.6662, "step": 1704 }, { "epoch": 0.22, "grad_norm": 1.9189475774765015, "learning_rate": 9.098073140676043e-06, "loss": 0.6922, "step": 1705 }, { "epoch": 0.22, "grad_norm": 1.334502100944519, "learning_rate": 9.096883972561347e-06, "loss": 0.7025, "step": 1706 }, { "epoch": 0.22, "grad_norm": 1.3240032196044922, "learning_rate": 9.095694098841662e-06, "loss": 0.6391, "step": 1707 }, { "epoch": 0.22, "grad_norm": 1.224280595779419, "learning_rate": 9.094503519721917e-06, "loss": 0.5683, "step": 1708 }, { "epoch": 0.22, "grad_norm": 1.2366915941238403, "learning_rate": 9.093312235407166e-06, "loss": 0.6315, "step": 1709 }, { "epoch": 0.22, "grad_norm": 1.2290258407592773, "learning_rate": 9.09212024610258e-06, "loss": 0.6049, "step": 1710 }, { "epoch": 0.22, "grad_norm": 1.0810284614562988, "learning_rate": 9.090927552013457e-06, "loss": 0.6721, "step": 1711 }, { "epoch": 0.22, "grad_norm": 1.1469855308532715, "learning_rate": 9.089734153345215e-06, "loss": 0.6771, "step": 1712 }, { "epoch": 0.22, "grad_norm": 1.5117367506027222, "learning_rate": 9.088540050303392e-06, "loss": 0.6422, "step": 1713 }, { "epoch": 0.22, "grad_norm": 1.0608218908309937, "learning_rate": 9.087345243093646e-06, "loss": 0.6211, "step": 1714 }, { "epoch": 0.22, "grad_norm": 1.1350992918014526, "learning_rate": 9.086149731921763e-06, "loss": 0.6627, "step": 1715 }, { "epoch": 0.22, "grad_norm": 1.1392055749893188, "learning_rate": 9.084953516993642e-06, "loss": 0.574, "step": 1716 }, { "epoch": 0.22, "grad_norm": 1.4839973449707031, "learning_rate": 9.083756598515307e-06, "loss": 0.6127, "step": 1717 }, { "epoch": 0.22, "grad_norm": 1.2805958986282349, "learning_rate": 9.082558976692904e-06, "loss": 0.6159, "step": 1718 }, { "epoch": 0.22, "grad_norm": 0.9901695847511292, "learning_rate": 9.081360651732698e-06, "loss": 0.5861, "step": 1719 }, { "epoch": 0.22, "grad_norm": 1.3135817050933838, "learning_rate": 9.080161623841077e-06, "loss": 0.646, "step": 1720 }, { "epoch": 0.22, "grad_norm": 1.2557127475738525, "learning_rate": 9.078961893224548e-06, "loss": 0.6455, "step": 1721 }, { "epoch": 0.22, "grad_norm": 1.4702677726745605, "learning_rate": 9.07776146008974e-06, "loss": 0.6288, "step": 1722 }, { "epoch": 0.22, "grad_norm": 1.4625836610794067, "learning_rate": 9.076560324643405e-06, "loss": 0.7098, "step": 1723 }, { "epoch": 0.22, "grad_norm": 1.4593355655670166, "learning_rate": 9.075358487092413e-06, "loss": 0.6319, "step": 1724 }, { "epoch": 0.22, "grad_norm": 1.3008965253829956, "learning_rate": 9.074155947643757e-06, "loss": 0.6806, "step": 1725 }, { "epoch": 0.22, "grad_norm": 1.2681013345718384, "learning_rate": 9.07295270650455e-06, "loss": 0.575, "step": 1726 }, { "epoch": 0.22, "grad_norm": 1.2825214862823486, "learning_rate": 9.071748763882025e-06, "loss": 0.5942, "step": 1727 }, { "epoch": 0.22, "grad_norm": 1.12113356590271, "learning_rate": 9.070544119983536e-06, "loss": 0.6244, "step": 1728 }, { "epoch": 0.22, "grad_norm": 1.394400715827942, "learning_rate": 9.069338775016558e-06, "loss": 0.7336, "step": 1729 }, { "epoch": 0.22, "grad_norm": 1.3107945919036865, "learning_rate": 9.06813272918869e-06, "loss": 0.6463, "step": 1730 }, { "epoch": 0.22, "grad_norm": 1.9889616966247559, "learning_rate": 9.066925982707647e-06, "loss": 0.5611, "step": 1731 }, { "epoch": 0.22, "grad_norm": 1.0906484127044678, "learning_rate": 9.065718535781266e-06, "loss": 0.6513, "step": 1732 }, { "epoch": 0.22, "grad_norm": 1.320393443107605, "learning_rate": 9.064510388617507e-06, "loss": 0.6201, "step": 1733 }, { "epoch": 0.22, "grad_norm": 1.2902657985687256, "learning_rate": 9.063301541424447e-06, "loss": 0.662, "step": 1734 }, { "epoch": 0.22, "grad_norm": 1.093662142753601, "learning_rate": 9.062091994410286e-06, "loss": 0.6321, "step": 1735 }, { "epoch": 0.22, "grad_norm": 1.2275786399841309, "learning_rate": 9.060881747783347e-06, "loss": 0.5982, "step": 1736 }, { "epoch": 0.22, "grad_norm": 1.4073659181594849, "learning_rate": 9.059670801752065e-06, "loss": 0.6775, "step": 1737 }, { "epoch": 0.22, "grad_norm": 1.3687350749969482, "learning_rate": 9.058459156525003e-06, "loss": 0.7289, "step": 1738 }, { "epoch": 0.22, "grad_norm": 1.213643193244934, "learning_rate": 9.057246812310844e-06, "loss": 0.6809, "step": 1739 }, { "epoch": 0.22, "grad_norm": 1.2109642028808594, "learning_rate": 9.056033769318387e-06, "loss": 0.6457, "step": 1740 }, { "epoch": 0.22, "grad_norm": 1.1009825468063354, "learning_rate": 9.054820027756556e-06, "loss": 0.6372, "step": 1741 }, { "epoch": 0.22, "grad_norm": 0.8893618583679199, "learning_rate": 9.05360558783439e-06, "loss": 0.6498, "step": 1742 }, { "epoch": 0.22, "grad_norm": 1.9988924264907837, "learning_rate": 9.052390449761057e-06, "loss": 0.6498, "step": 1743 }, { "epoch": 0.22, "grad_norm": 1.142358422279358, "learning_rate": 9.051174613745836e-06, "loss": 0.5587, "step": 1744 }, { "epoch": 0.22, "grad_norm": 1.34147047996521, "learning_rate": 9.049958079998132e-06, "loss": 0.6156, "step": 1745 }, { "epoch": 0.22, "grad_norm": 1.456156849861145, "learning_rate": 9.048740848727467e-06, "loss": 0.6106, "step": 1746 }, { "epoch": 0.22, "grad_norm": 1.0447920560836792, "learning_rate": 9.047522920143483e-06, "loss": 0.6259, "step": 1747 }, { "epoch": 0.22, "grad_norm": 2.416192054748535, "learning_rate": 9.046304294455945e-06, "loss": 0.5878, "step": 1748 }, { "epoch": 0.22, "grad_norm": 1.2161362171173096, "learning_rate": 9.045084971874738e-06, "loss": 0.6203, "step": 1749 }, { "epoch": 0.22, "grad_norm": 1.0151557922363281, "learning_rate": 9.043864952609863e-06, "loss": 0.6412, "step": 1750 }, { "epoch": 0.22, "grad_norm": 1.4537842273712158, "learning_rate": 9.042644236871445e-06, "loss": 0.6007, "step": 1751 }, { "epoch": 0.22, "grad_norm": 1.3863743543624878, "learning_rate": 9.041422824869729e-06, "loss": 0.6443, "step": 1752 }, { "epoch": 0.22, "grad_norm": 1.5908379554748535, "learning_rate": 9.040200716815073e-06, "loss": 0.5611, "step": 1753 }, { "epoch": 0.22, "grad_norm": 1.6172839403152466, "learning_rate": 9.038977912917963e-06, "loss": 0.6122, "step": 1754 }, { "epoch": 0.22, "grad_norm": 1.8064736127853394, "learning_rate": 9.037754413389006e-06, "loss": 0.6604, "step": 1755 }, { "epoch": 0.22, "grad_norm": 1.4816584587097168, "learning_rate": 9.03653021843892e-06, "loss": 0.6819, "step": 1756 }, { "epoch": 0.23, "grad_norm": 1.1716893911361694, "learning_rate": 9.035305328278549e-06, "loss": 0.6599, "step": 1757 }, { "epoch": 0.23, "grad_norm": 1.0522955656051636, "learning_rate": 9.034079743118857e-06, "loss": 0.6139, "step": 1758 }, { "epoch": 0.23, "grad_norm": 1.0910663604736328, "learning_rate": 9.032853463170925e-06, "loss": 0.7266, "step": 1759 }, { "epoch": 0.23, "grad_norm": 1.1195260286331177, "learning_rate": 9.031626488645955e-06, "loss": 0.5692, "step": 1760 }, { "epoch": 0.23, "grad_norm": 1.161078929901123, "learning_rate": 9.030398819755268e-06, "loss": 0.6371, "step": 1761 }, { "epoch": 0.23, "grad_norm": 1.5670857429504395, "learning_rate": 9.029170456710303e-06, "loss": 0.5881, "step": 1762 }, { "epoch": 0.23, "grad_norm": 2.10579252243042, "learning_rate": 9.027941399722626e-06, "loss": 0.6167, "step": 1763 }, { "epoch": 0.23, "grad_norm": 1.177972435951233, "learning_rate": 9.026711649003911e-06, "loss": 0.6471, "step": 1764 }, { "epoch": 0.23, "grad_norm": 1.4338864088058472, "learning_rate": 9.025481204765963e-06, "loss": 0.6252, "step": 1765 }, { "epoch": 0.23, "grad_norm": 1.052895188331604, "learning_rate": 9.024250067220697e-06, "loss": 0.6478, "step": 1766 }, { "epoch": 0.23, "grad_norm": 1.422892689704895, "learning_rate": 9.023018236580154e-06, "loss": 0.6252, "step": 1767 }, { "epoch": 0.23, "grad_norm": 1.3154759407043457, "learning_rate": 9.02178571305649e-06, "loss": 0.5794, "step": 1768 }, { "epoch": 0.23, "grad_norm": 1.0219439268112183, "learning_rate": 9.020552496861982e-06, "loss": 0.6179, "step": 1769 }, { "epoch": 0.23, "grad_norm": 4.240836143493652, "learning_rate": 9.019318588209028e-06, "loss": 0.6145, "step": 1770 }, { "epoch": 0.23, "grad_norm": 1.2236559391021729, "learning_rate": 9.018083987310143e-06, "loss": 0.7196, "step": 1771 }, { "epoch": 0.23, "grad_norm": 1.407920479774475, "learning_rate": 9.01684869437796e-06, "loss": 0.5701, "step": 1772 }, { "epoch": 0.23, "grad_norm": 1.0528749227523804, "learning_rate": 9.015612709625236e-06, "loss": 0.6588, "step": 1773 }, { "epoch": 0.23, "grad_norm": 1.439102053642273, "learning_rate": 9.014376033264845e-06, "loss": 0.6773, "step": 1774 }, { "epoch": 0.23, "grad_norm": 0.9866570830345154, "learning_rate": 9.013138665509776e-06, "loss": 0.6061, "step": 1775 }, { "epoch": 0.23, "grad_norm": 1.7695170640945435, "learning_rate": 9.011900606573142e-06, "loss": 0.6408, "step": 1776 }, { "epoch": 0.23, "grad_norm": 1.3377400636672974, "learning_rate": 9.010661856668172e-06, "loss": 0.7959, "step": 1777 }, { "epoch": 0.23, "grad_norm": 1.6860862970352173, "learning_rate": 9.00942241600822e-06, "loss": 0.55, "step": 1778 }, { "epoch": 0.23, "grad_norm": 1.0897938013076782, "learning_rate": 9.00818228480675e-06, "loss": 0.6778, "step": 1779 }, { "epoch": 0.23, "grad_norm": 1.2090890407562256, "learning_rate": 9.00694146327735e-06, "loss": 0.7092, "step": 1780 }, { "epoch": 0.23, "grad_norm": 1.0635035037994385, "learning_rate": 9.005699951633727e-06, "loss": 0.4973, "step": 1781 }, { "epoch": 0.23, "grad_norm": 1.029817819595337, "learning_rate": 9.004457750089709e-06, "loss": 0.7106, "step": 1782 }, { "epoch": 0.23, "grad_norm": 1.2057615518569946, "learning_rate": 9.003214858859234e-06, "loss": 0.7264, "step": 1783 }, { "epoch": 0.23, "grad_norm": 1.096470594406128, "learning_rate": 9.001971278156367e-06, "loss": 0.6101, "step": 1784 }, { "epoch": 0.23, "grad_norm": 1.1284945011138916, "learning_rate": 9.000727008195293e-06, "loss": 0.5879, "step": 1785 }, { "epoch": 0.23, "grad_norm": 1.3269438743591309, "learning_rate": 8.999482049190308e-06, "loss": 0.6274, "step": 1786 }, { "epoch": 0.23, "grad_norm": 1.038558006286621, "learning_rate": 8.998236401355835e-06, "loss": 0.5697, "step": 1787 }, { "epoch": 0.23, "grad_norm": 1.3907923698425293, "learning_rate": 8.996990064906408e-06, "loss": 0.7388, "step": 1788 }, { "epoch": 0.23, "grad_norm": 1.2428247928619385, "learning_rate": 8.995743040056683e-06, "loss": 0.6643, "step": 1789 }, { "epoch": 0.23, "grad_norm": 1.2280868291854858, "learning_rate": 8.994495327021438e-06, "loss": 0.6722, "step": 1790 }, { "epoch": 0.23, "grad_norm": 1.2366783618927002, "learning_rate": 8.993246926015562e-06, "loss": 0.6621, "step": 1791 }, { "epoch": 0.23, "grad_norm": 1.1328561305999756, "learning_rate": 8.99199783725407e-06, "loss": 0.5697, "step": 1792 }, { "epoch": 0.23, "grad_norm": 1.3674986362457275, "learning_rate": 8.990748060952091e-06, "loss": 0.6256, "step": 1793 }, { "epoch": 0.23, "grad_norm": 1.234496831893921, "learning_rate": 8.989497597324872e-06, "loss": 0.6446, "step": 1794 }, { "epoch": 0.23, "grad_norm": 1.1295185089111328, "learning_rate": 8.988246446587781e-06, "loss": 0.6178, "step": 1795 }, { "epoch": 0.23, "grad_norm": 1.1279648542404175, "learning_rate": 8.986994608956305e-06, "loss": 0.6405, "step": 1796 }, { "epoch": 0.23, "grad_norm": 1.143036961555481, "learning_rate": 8.985742084646048e-06, "loss": 0.6375, "step": 1797 }, { "epoch": 0.23, "grad_norm": 1.3107563257217407, "learning_rate": 8.984488873872728e-06, "loss": 0.6425, "step": 1798 }, { "epoch": 0.23, "grad_norm": 1.1655327081680298, "learning_rate": 8.983234976852187e-06, "loss": 0.7571, "step": 1799 }, { "epoch": 0.23, "grad_norm": 1.195489764213562, "learning_rate": 8.981980393800384e-06, "loss": 0.6266, "step": 1800 }, { "epoch": 0.23, "grad_norm": 1.3523964881896973, "learning_rate": 8.980725124933396e-06, "loss": 0.6549, "step": 1801 }, { "epoch": 0.23, "grad_norm": 1.1617556810379028, "learning_rate": 8.979469170467415e-06, "loss": 0.62, "step": 1802 }, { "epoch": 0.23, "grad_norm": 1.08810293674469, "learning_rate": 8.978212530618756e-06, "loss": 0.6809, "step": 1803 }, { "epoch": 0.23, "grad_norm": 1.0384219884872437, "learning_rate": 8.976955205603849e-06, "loss": 0.5748, "step": 1804 }, { "epoch": 0.23, "grad_norm": 0.9001789093017578, "learning_rate": 8.975697195639242e-06, "loss": 0.5481, "step": 1805 }, { "epoch": 0.23, "grad_norm": 1.583070993423462, "learning_rate": 8.974438500941603e-06, "loss": 0.6438, "step": 1806 }, { "epoch": 0.23, "grad_norm": 1.3476251363754272, "learning_rate": 8.973179121727713e-06, "loss": 0.6684, "step": 1807 }, { "epoch": 0.23, "grad_norm": 1.0399832725524902, "learning_rate": 8.97191905821448e-06, "loss": 0.6654, "step": 1808 }, { "epoch": 0.23, "grad_norm": 1.1862304210662842, "learning_rate": 8.97065831061892e-06, "loss": 0.6808, "step": 1809 }, { "epoch": 0.23, "grad_norm": 1.1737608909606934, "learning_rate": 8.969396879158173e-06, "loss": 0.6566, "step": 1810 }, { "epoch": 0.23, "grad_norm": 1.5188488960266113, "learning_rate": 8.968134764049495e-06, "loss": 0.6548, "step": 1811 }, { "epoch": 0.23, "grad_norm": 1.1486880779266357, "learning_rate": 8.96687196551026e-06, "loss": 0.6055, "step": 1812 }, { "epoch": 0.23, "grad_norm": 1.047976016998291, "learning_rate": 8.965608483757958e-06, "loss": 0.5543, "step": 1813 }, { "epoch": 0.23, "grad_norm": 1.347322702407837, "learning_rate": 8.964344319010196e-06, "loss": 0.6678, "step": 1814 }, { "epoch": 0.23, "grad_norm": 2.521556854248047, "learning_rate": 8.963079471484707e-06, "loss": 0.6393, "step": 1815 }, { "epoch": 0.23, "grad_norm": 2.818633794784546, "learning_rate": 8.96181394139933e-06, "loss": 0.6287, "step": 1816 }, { "epoch": 0.23, "grad_norm": 1.1293917894363403, "learning_rate": 8.960547728972028e-06, "loss": 0.6093, "step": 1817 }, { "epoch": 0.23, "grad_norm": 4.344512462615967, "learning_rate": 8.959280834420882e-06, "loss": 0.5448, "step": 1818 }, { "epoch": 0.23, "grad_norm": 1.3196654319763184, "learning_rate": 8.958013257964086e-06, "loss": 0.7376, "step": 1819 }, { "epoch": 0.23, "grad_norm": 1.4868218898773193, "learning_rate": 8.956744999819958e-06, "loss": 0.6157, "step": 1820 }, { "epoch": 0.23, "grad_norm": 1.2535372972488403, "learning_rate": 8.955476060206928e-06, "loss": 0.5815, "step": 1821 }, { "epoch": 0.23, "grad_norm": 1.2110216617584229, "learning_rate": 8.954206439343543e-06, "loss": 0.6472, "step": 1822 }, { "epoch": 0.23, "grad_norm": 1.1209574937820435, "learning_rate": 8.95293613744847e-06, "loss": 0.6802, "step": 1823 }, { "epoch": 0.23, "grad_norm": 1.10364830493927, "learning_rate": 8.951665154740495e-06, "loss": 0.6275, "step": 1824 }, { "epoch": 0.23, "grad_norm": 1.5042253732681274, "learning_rate": 8.950393491438518e-06, "loss": 0.5856, "step": 1825 }, { "epoch": 0.23, "grad_norm": 1.1649775505065918, "learning_rate": 8.949121147761556e-06, "loss": 0.7074, "step": 1826 }, { "epoch": 0.23, "grad_norm": 1.2775204181671143, "learning_rate": 8.947848123928747e-06, "loss": 0.6768, "step": 1827 }, { "epoch": 0.23, "grad_norm": 1.109620213508606, "learning_rate": 8.94657442015934e-06, "loss": 0.6156, "step": 1828 }, { "epoch": 0.23, "grad_norm": 1.454298973083496, "learning_rate": 8.945300036672709e-06, "loss": 0.5578, "step": 1829 }, { "epoch": 0.23, "grad_norm": 1.3531197309494019, "learning_rate": 8.944024973688334e-06, "loss": 0.637, "step": 1830 }, { "epoch": 0.23, "grad_norm": 1.2573683261871338, "learning_rate": 8.942749231425824e-06, "loss": 0.6422, "step": 1831 }, { "epoch": 0.23, "grad_norm": 1.0720746517181396, "learning_rate": 8.941472810104898e-06, "loss": 0.6234, "step": 1832 }, { "epoch": 0.23, "grad_norm": 1.301047444343567, "learning_rate": 8.940195709945395e-06, "loss": 0.6037, "step": 1833 }, { "epoch": 0.23, "grad_norm": 1.0918940305709839, "learning_rate": 8.938917931167268e-06, "loss": 0.6048, "step": 1834 }, { "epoch": 0.24, "grad_norm": 1.1322157382965088, "learning_rate": 8.93763947399059e-06, "loss": 0.6611, "step": 1835 }, { "epoch": 0.24, "grad_norm": 1.2955923080444336, "learning_rate": 8.936360338635546e-06, "loss": 0.6222, "step": 1836 }, { "epoch": 0.24, "grad_norm": 1.2364767789840698, "learning_rate": 8.935080525322443e-06, "loss": 0.6594, "step": 1837 }, { "epoch": 0.24, "grad_norm": 1.4350496530532837, "learning_rate": 8.933800034271706e-06, "loss": 0.6246, "step": 1838 }, { "epoch": 0.24, "grad_norm": 1.5731194019317627, "learning_rate": 8.932518865703868e-06, "loss": 0.6258, "step": 1839 }, { "epoch": 0.24, "grad_norm": 1.1843388080596924, "learning_rate": 8.931237019839587e-06, "loss": 0.5611, "step": 1840 }, { "epoch": 0.24, "grad_norm": 1.2916220426559448, "learning_rate": 8.929954496899636e-06, "loss": 0.6868, "step": 1841 }, { "epoch": 0.24, "grad_norm": 1.1749646663665771, "learning_rate": 8.928671297104901e-06, "loss": 0.6049, "step": 1842 }, { "epoch": 0.24, "grad_norm": 1.2070494890213013, "learning_rate": 8.927387420676387e-06, "loss": 0.6208, "step": 1843 }, { "epoch": 0.24, "grad_norm": 1.1378085613250732, "learning_rate": 8.92610286783522e-06, "loss": 0.7296, "step": 1844 }, { "epoch": 0.24, "grad_norm": 1.2360167503356934, "learning_rate": 8.924817638802634e-06, "loss": 0.6041, "step": 1845 }, { "epoch": 0.24, "grad_norm": 1.0370523929595947, "learning_rate": 8.923531733799984e-06, "loss": 0.6482, "step": 1846 }, { "epoch": 0.24, "grad_norm": 1.1742146015167236, "learning_rate": 8.922245153048742e-06, "loss": 0.5543, "step": 1847 }, { "epoch": 0.24, "grad_norm": 0.9824729561805725, "learning_rate": 8.920957896770495e-06, "loss": 0.6877, "step": 1848 }, { "epoch": 0.24, "grad_norm": 1.5906468629837036, "learning_rate": 8.919669965186946e-06, "loss": 0.6197, "step": 1849 }, { "epoch": 0.24, "grad_norm": 1.0606296062469482, "learning_rate": 8.918381358519916e-06, "loss": 0.6899, "step": 1850 }, { "epoch": 0.24, "grad_norm": 1.3511884212493896, "learning_rate": 8.917092076991342e-06, "loss": 0.677, "step": 1851 }, { "epoch": 0.24, "grad_norm": 1.168999195098877, "learning_rate": 8.915802120823274e-06, "loss": 0.5945, "step": 1852 }, { "epoch": 0.24, "grad_norm": 1.0430333614349365, "learning_rate": 8.914511490237883e-06, "loss": 0.5796, "step": 1853 }, { "epoch": 0.24, "grad_norm": 1.0085581541061401, "learning_rate": 8.913220185457455e-06, "loss": 0.5656, "step": 1854 }, { "epoch": 0.24, "grad_norm": 1.1767834424972534, "learning_rate": 8.911928206704388e-06, "loss": 0.6895, "step": 1855 }, { "epoch": 0.24, "grad_norm": 0.9236755967140198, "learning_rate": 8.910635554201199e-06, "loss": 0.6282, "step": 1856 }, { "epoch": 0.24, "grad_norm": 1.189577341079712, "learning_rate": 8.909342228170523e-06, "loss": 0.6037, "step": 1857 }, { "epoch": 0.24, "grad_norm": 1.07065749168396, "learning_rate": 8.90804822883511e-06, "loss": 0.6259, "step": 1858 }, { "epoch": 0.24, "grad_norm": 1.224584698677063, "learning_rate": 8.906753556417822e-06, "loss": 0.5854, "step": 1859 }, { "epoch": 0.24, "grad_norm": 1.136846899986267, "learning_rate": 8.905458211141642e-06, "loss": 0.6162, "step": 1860 }, { "epoch": 0.24, "grad_norm": 1.1061546802520752, "learning_rate": 8.904162193229667e-06, "loss": 0.6708, "step": 1861 }, { "epoch": 0.24, "grad_norm": 1.1807464361190796, "learning_rate": 8.90286550290511e-06, "loss": 0.6058, "step": 1862 }, { "epoch": 0.24, "grad_norm": 1.3680741786956787, "learning_rate": 8.901568140391298e-06, "loss": 0.5691, "step": 1863 }, { "epoch": 0.24, "grad_norm": 1.45723557472229, "learning_rate": 8.900270105911676e-06, "loss": 0.6373, "step": 1864 }, { "epoch": 0.24, "grad_norm": 1.1721493005752563, "learning_rate": 8.898971399689804e-06, "loss": 0.6277, "step": 1865 }, { "epoch": 0.24, "grad_norm": 1.2810320854187012, "learning_rate": 8.89767202194936e-06, "loss": 0.647, "step": 1866 }, { "epoch": 0.24, "grad_norm": 1.231530785560608, "learning_rate": 8.896371972914131e-06, "loss": 0.6348, "step": 1867 }, { "epoch": 0.24, "grad_norm": 1.2142878770828247, "learning_rate": 8.895071252808025e-06, "loss": 0.6003, "step": 1868 }, { "epoch": 0.24, "grad_norm": 1.8936800956726074, "learning_rate": 8.893769861855068e-06, "loss": 0.6248, "step": 1869 }, { "epoch": 0.24, "grad_norm": 1.1831871271133423, "learning_rate": 8.892467800279396e-06, "loss": 0.6167, "step": 1870 }, { "epoch": 0.24, "grad_norm": 1.351455807685852, "learning_rate": 8.891165068305263e-06, "loss": 0.6668, "step": 1871 }, { "epoch": 0.24, "grad_norm": 1.335632562637329, "learning_rate": 8.889861666157038e-06, "loss": 0.6294, "step": 1872 }, { "epoch": 0.24, "grad_norm": 1.1500890254974365, "learning_rate": 8.888557594059204e-06, "loss": 0.6425, "step": 1873 }, { "epoch": 0.24, "grad_norm": 1.1159440279006958, "learning_rate": 8.887252852236365e-06, "loss": 0.6158, "step": 1874 }, { "epoch": 0.24, "grad_norm": 1.4507861137390137, "learning_rate": 8.885947440913232e-06, "loss": 0.6023, "step": 1875 }, { "epoch": 0.24, "grad_norm": 1.169786810874939, "learning_rate": 8.884641360314636e-06, "loss": 0.6775, "step": 1876 }, { "epoch": 0.24, "grad_norm": 1.3057630062103271, "learning_rate": 8.883334610665527e-06, "loss": 0.5631, "step": 1877 }, { "epoch": 0.24, "grad_norm": 1.4330273866653442, "learning_rate": 8.88202719219096e-06, "loss": 0.6352, "step": 1878 }, { "epoch": 0.24, "grad_norm": 1.038002610206604, "learning_rate": 8.880719105116116e-06, "loss": 0.6006, "step": 1879 }, { "epoch": 0.24, "grad_norm": 0.988121747970581, "learning_rate": 8.879410349666284e-06, "loss": 0.5431, "step": 1880 }, { "epoch": 0.24, "grad_norm": 3.9103357791900635, "learning_rate": 8.87810092606687e-06, "loss": 0.6212, "step": 1881 }, { "epoch": 0.24, "grad_norm": 1.3641910552978516, "learning_rate": 8.876790834543398e-06, "loss": 0.7146, "step": 1882 }, { "epoch": 0.24, "grad_norm": 1.3473871946334839, "learning_rate": 8.875480075321506e-06, "loss": 0.6231, "step": 1883 }, { "epoch": 0.24, "grad_norm": 1.0874309539794922, "learning_rate": 8.87416864862694e-06, "loss": 0.6109, "step": 1884 }, { "epoch": 0.24, "grad_norm": 1.3928847312927246, "learning_rate": 8.872856554685569e-06, "loss": 0.6334, "step": 1885 }, { "epoch": 0.24, "grad_norm": 1.6365965604782104, "learning_rate": 8.871543793723378e-06, "loss": 0.6463, "step": 1886 }, { "epoch": 0.24, "grad_norm": 1.2042670249938965, "learning_rate": 8.870230365966459e-06, "loss": 0.5847, "step": 1887 }, { "epoch": 0.24, "grad_norm": 1.115470290184021, "learning_rate": 8.868916271641025e-06, "loss": 0.6091, "step": 1888 }, { "epoch": 0.24, "grad_norm": 1.1585477590560913, "learning_rate": 8.867601510973402e-06, "loss": 0.7887, "step": 1889 }, { "epoch": 0.24, "grad_norm": 1.2588825225830078, "learning_rate": 8.86628608419003e-06, "loss": 0.4943, "step": 1890 }, { "epoch": 0.24, "grad_norm": 1.139786720275879, "learning_rate": 8.864969991517465e-06, "loss": 0.5754, "step": 1891 }, { "epoch": 0.24, "grad_norm": 1.179455280303955, "learning_rate": 8.86365323318238e-06, "loss": 0.5672, "step": 1892 }, { "epoch": 0.24, "grad_norm": 1.169512152671814, "learning_rate": 8.862335809411556e-06, "loss": 0.5903, "step": 1893 }, { "epoch": 0.24, "grad_norm": 1.500291347503662, "learning_rate": 8.861017720431893e-06, "loss": 0.7036, "step": 1894 }, { "epoch": 0.24, "grad_norm": 1.1209545135498047, "learning_rate": 8.859698966470404e-06, "loss": 0.6231, "step": 1895 }, { "epoch": 0.24, "grad_norm": 1.0172909498214722, "learning_rate": 8.858379547754222e-06, "loss": 0.6046, "step": 1896 }, { "epoch": 0.24, "grad_norm": 1.06539785861969, "learning_rate": 8.857059464510586e-06, "loss": 0.5999, "step": 1897 }, { "epoch": 0.24, "grad_norm": 1.3168922662734985, "learning_rate": 8.855738716966857e-06, "loss": 0.5925, "step": 1898 }, { "epoch": 0.24, "grad_norm": 0.9954319596290588, "learning_rate": 8.854417305350503e-06, "loss": 0.5778, "step": 1899 }, { "epoch": 0.24, "grad_norm": 1.1088371276855469, "learning_rate": 8.853095229889112e-06, "loss": 0.5955, "step": 1900 }, { "epoch": 0.24, "grad_norm": 1.4451113939285278, "learning_rate": 8.851772490810386e-06, "loss": 0.5305, "step": 1901 }, { "epoch": 0.24, "grad_norm": 1.0991573333740234, "learning_rate": 8.850449088342138e-06, "loss": 0.7227, "step": 1902 }, { "epoch": 0.24, "grad_norm": 1.025597333908081, "learning_rate": 8.849125022712297e-06, "loss": 0.5666, "step": 1903 }, { "epoch": 0.24, "grad_norm": 1.0423365831375122, "learning_rate": 8.847800294148908e-06, "loss": 0.6944, "step": 1904 }, { "epoch": 0.24, "grad_norm": 1.0984046459197998, "learning_rate": 8.846474902880128e-06, "loss": 0.6633, "step": 1905 }, { "epoch": 0.24, "grad_norm": 1.16025710105896, "learning_rate": 8.845148849134228e-06, "loss": 0.7596, "step": 1906 }, { "epoch": 0.24, "grad_norm": 1.2320754528045654, "learning_rate": 8.843822133139595e-06, "loss": 0.5805, "step": 1907 }, { "epoch": 0.24, "grad_norm": 1.1268361806869507, "learning_rate": 8.842494755124728e-06, "loss": 0.6417, "step": 1908 }, { "epoch": 0.24, "grad_norm": 1.4205336570739746, "learning_rate": 8.84116671531824e-06, "loss": 0.684, "step": 1909 }, { "epoch": 0.24, "grad_norm": 1.1660581827163696, "learning_rate": 8.839838013948861e-06, "loss": 0.6393, "step": 1910 }, { "epoch": 0.24, "grad_norm": 1.339078426361084, "learning_rate": 8.838508651245432e-06, "loss": 0.6015, "step": 1911 }, { "epoch": 0.24, "grad_norm": 1.1819093227386475, "learning_rate": 8.837178627436907e-06, "loss": 0.5398, "step": 1912 }, { "epoch": 0.25, "grad_norm": 1.0426304340362549, "learning_rate": 8.835847942752357e-06, "loss": 0.5841, "step": 1913 }, { "epoch": 0.25, "grad_norm": 1.4607350826263428, "learning_rate": 8.834516597420968e-06, "loss": 0.5586, "step": 1914 }, { "epoch": 0.25, "grad_norm": 1.166911244392395, "learning_rate": 8.833184591672033e-06, "loss": 0.6171, "step": 1915 }, { "epoch": 0.25, "grad_norm": 1.135672688484192, "learning_rate": 8.831851925734963e-06, "loss": 0.6544, "step": 1916 }, { "epoch": 0.25, "grad_norm": 0.9493619799613953, "learning_rate": 8.830518599839286e-06, "loss": 0.6211, "step": 1917 }, { "epoch": 0.25, "grad_norm": 1.2883530855178833, "learning_rate": 8.829184614214637e-06, "loss": 0.6184, "step": 1918 }, { "epoch": 0.25, "grad_norm": 1.0521215200424194, "learning_rate": 8.82784996909077e-06, "loss": 0.6081, "step": 1919 }, { "epoch": 0.25, "grad_norm": 1.2523962259292603, "learning_rate": 8.82651466469755e-06, "loss": 0.6249, "step": 1920 }, { "epoch": 0.25, "grad_norm": 1.0267293453216553, "learning_rate": 8.825178701264957e-06, "loss": 0.5879, "step": 1921 }, { "epoch": 0.25, "grad_norm": 1.191742181777954, "learning_rate": 8.82384207902308e-06, "loss": 0.7142, "step": 1922 }, { "epoch": 0.25, "grad_norm": 1.205351710319519, "learning_rate": 8.822504798202128e-06, "loss": 0.6608, "step": 1923 }, { "epoch": 0.25, "grad_norm": 1.1526292562484741, "learning_rate": 8.821166859032419e-06, "loss": 0.6627, "step": 1924 }, { "epoch": 0.25, "grad_norm": 1.3558284044265747, "learning_rate": 8.819828261744388e-06, "loss": 0.6308, "step": 1925 }, { "epoch": 0.25, "grad_norm": 1.1486369371414185, "learning_rate": 8.81848900656858e-06, "loss": 0.6342, "step": 1926 }, { "epoch": 0.25, "grad_norm": 2.2741756439208984, "learning_rate": 8.817149093735654e-06, "loss": 0.5495, "step": 1927 }, { "epoch": 0.25, "grad_norm": 1.269870638847351, "learning_rate": 8.815808523476383e-06, "loss": 0.645, "step": 1928 }, { "epoch": 0.25, "grad_norm": 1.1454700231552124, "learning_rate": 8.814467296021652e-06, "loss": 0.6326, "step": 1929 }, { "epoch": 0.25, "grad_norm": 1.2435215711593628, "learning_rate": 8.813125411602463e-06, "loss": 0.6898, "step": 1930 }, { "epoch": 0.25, "grad_norm": 1.1720244884490967, "learning_rate": 8.811782870449925e-06, "loss": 0.6177, "step": 1931 }, { "epoch": 0.25, "grad_norm": 1.859727382659912, "learning_rate": 8.810439672795266e-06, "loss": 0.6215, "step": 1932 }, { "epoch": 0.25, "grad_norm": 1.085492491722107, "learning_rate": 8.809095818869823e-06, "loss": 0.6814, "step": 1933 }, { "epoch": 0.25, "grad_norm": 1.1132259368896484, "learning_rate": 8.807751308905049e-06, "loss": 0.6428, "step": 1934 }, { "epoch": 0.25, "grad_norm": 1.0994184017181396, "learning_rate": 8.806406143132507e-06, "loss": 0.6146, "step": 1935 }, { "epoch": 0.25, "grad_norm": 1.1863535642623901, "learning_rate": 8.805060321783873e-06, "loss": 0.7188, "step": 1936 }, { "epoch": 0.25, "grad_norm": 1.321311116218567, "learning_rate": 8.803713845090942e-06, "loss": 0.5959, "step": 1937 }, { "epoch": 0.25, "grad_norm": 1.1888467073440552, "learning_rate": 8.802366713285612e-06, "loss": 0.6004, "step": 1938 }, { "epoch": 0.25, "grad_norm": 1.0325742959976196, "learning_rate": 8.801018926599904e-06, "loss": 0.5642, "step": 1939 }, { "epoch": 0.25, "grad_norm": 1.123363971710205, "learning_rate": 8.799670485265944e-06, "loss": 0.708, "step": 1940 }, { "epoch": 0.25, "grad_norm": 1.2410331964492798, "learning_rate": 8.798321389515974e-06, "loss": 0.6157, "step": 1941 }, { "epoch": 0.25, "grad_norm": 1.289198398590088, "learning_rate": 8.796971639582347e-06, "loss": 0.6785, "step": 1942 }, { "epoch": 0.25, "grad_norm": 1.5738413333892822, "learning_rate": 8.795621235697531e-06, "loss": 0.6329, "step": 1943 }, { "epoch": 0.25, "grad_norm": 1.011083722114563, "learning_rate": 8.79427017809411e-06, "loss": 0.5909, "step": 1944 }, { "epoch": 0.25, "grad_norm": 1.3928797245025635, "learning_rate": 8.792918467004767e-06, "loss": 0.5906, "step": 1945 }, { "epoch": 0.25, "grad_norm": 0.9257722496986389, "learning_rate": 8.791566102662315e-06, "loss": 0.5623, "step": 1946 }, { "epoch": 0.25, "grad_norm": 1.2923310995101929, "learning_rate": 8.790213085299668e-06, "loss": 0.6385, "step": 1947 }, { "epoch": 0.25, "grad_norm": 1.4501917362213135, "learning_rate": 8.788859415149856e-06, "loss": 0.5815, "step": 1948 }, { "epoch": 0.25, "grad_norm": 1.2182177305221558, "learning_rate": 8.787505092446022e-06, "loss": 0.6108, "step": 1949 }, { "epoch": 0.25, "grad_norm": 1.0286319255828857, "learning_rate": 8.786150117421418e-06, "loss": 0.5499, "step": 1950 }, { "epoch": 0.25, "grad_norm": 1.2858717441558838, "learning_rate": 8.784794490309414e-06, "loss": 0.5579, "step": 1951 }, { "epoch": 0.25, "grad_norm": 0.9833030104637146, "learning_rate": 8.783438211343487e-06, "loss": 0.6406, "step": 1952 }, { "epoch": 0.25, "grad_norm": 1.0329986810684204, "learning_rate": 8.78208128075723e-06, "loss": 0.6064, "step": 1953 }, { "epoch": 0.25, "grad_norm": 1.3916441202163696, "learning_rate": 8.780723698784346e-06, "loss": 0.6235, "step": 1954 }, { "epoch": 0.25, "grad_norm": 1.7905875444412231, "learning_rate": 8.77936546565865e-06, "loss": 0.6405, "step": 1955 }, { "epoch": 0.25, "grad_norm": 1.13296377658844, "learning_rate": 8.778006581614073e-06, "loss": 0.6188, "step": 1956 }, { "epoch": 0.25, "grad_norm": 1.1696197986602783, "learning_rate": 8.776647046884651e-06, "loss": 0.5943, "step": 1957 }, { "epoch": 0.25, "grad_norm": 1.2003610134124756, "learning_rate": 8.77528686170454e-06, "loss": 0.6799, "step": 1958 }, { "epoch": 0.25, "grad_norm": 1.1605165004730225, "learning_rate": 8.773926026308002e-06, "loss": 0.7245, "step": 1959 }, { "epoch": 0.25, "grad_norm": 1.8035575151443481, "learning_rate": 8.772564540929414e-06, "loss": 0.6746, "step": 1960 }, { "epoch": 0.25, "grad_norm": 1.1232562065124512, "learning_rate": 8.771202405803263e-06, "loss": 0.6389, "step": 1961 }, { "epoch": 0.25, "grad_norm": 1.3930180072784424, "learning_rate": 8.769839621164152e-06, "loss": 0.5986, "step": 1962 }, { "epoch": 0.25, "grad_norm": 1.1030189990997314, "learning_rate": 8.768476187246789e-06, "loss": 0.579, "step": 1963 }, { "epoch": 0.25, "grad_norm": 1.1885334253311157, "learning_rate": 8.767112104286003e-06, "loss": 0.6221, "step": 1964 }, { "epoch": 0.25, "grad_norm": 1.8045053482055664, "learning_rate": 8.765747372516723e-06, "loss": 0.59, "step": 1965 }, { "epoch": 0.25, "grad_norm": 1.1481117010116577, "learning_rate": 8.764381992174001e-06, "loss": 0.6523, "step": 1966 }, { "epoch": 0.25, "grad_norm": 1.3269027471542358, "learning_rate": 8.763015963492996e-06, "loss": 0.6512, "step": 1967 }, { "epoch": 0.25, "grad_norm": 0.9991157054901123, "learning_rate": 8.761649286708975e-06, "loss": 0.5713, "step": 1968 }, { "epoch": 0.25, "grad_norm": 1.0284345149993896, "learning_rate": 8.760281962057324e-06, "loss": 0.6516, "step": 1969 }, { "epoch": 0.25, "grad_norm": 1.1772648096084595, "learning_rate": 8.758913989773536e-06, "loss": 0.7054, "step": 1970 }, { "epoch": 0.25, "grad_norm": 1.2631373405456543, "learning_rate": 8.757545370093216e-06, "loss": 0.727, "step": 1971 }, { "epoch": 0.25, "grad_norm": 1.1898554563522339, "learning_rate": 8.756176103252082e-06, "loss": 0.601, "step": 1972 }, { "epoch": 0.25, "grad_norm": 1.192469835281372, "learning_rate": 8.754806189485959e-06, "loss": 0.5665, "step": 1973 }, { "epoch": 0.25, "grad_norm": 0.9782890677452087, "learning_rate": 8.75343562903079e-06, "loss": 0.5972, "step": 1974 }, { "epoch": 0.25, "grad_norm": 1.5428776741027832, "learning_rate": 8.752064422122625e-06, "loss": 0.6663, "step": 1975 }, { "epoch": 0.25, "grad_norm": 1.1398931741714478, "learning_rate": 8.750692568997629e-06, "loss": 0.666, "step": 1976 }, { "epoch": 0.25, "grad_norm": 1.2742382287979126, "learning_rate": 8.74932006989207e-06, "loss": 0.6918, "step": 1977 }, { "epoch": 0.25, "grad_norm": 1.2991219758987427, "learning_rate": 8.747946925042341e-06, "loss": 0.6471, "step": 1978 }, { "epoch": 0.25, "grad_norm": 1.6621350049972534, "learning_rate": 8.746573134684932e-06, "loss": 0.5951, "step": 1979 }, { "epoch": 0.25, "grad_norm": 1.5337145328521729, "learning_rate": 8.745198699056452e-06, "loss": 0.5993, "step": 1980 }, { "epoch": 0.25, "grad_norm": 1.0378293991088867, "learning_rate": 8.74382361839362e-06, "loss": 0.6211, "step": 1981 }, { "epoch": 0.25, "grad_norm": 1.1881955862045288, "learning_rate": 8.742447892933266e-06, "loss": 0.6135, "step": 1982 }, { "epoch": 0.25, "grad_norm": 1.375791311264038, "learning_rate": 8.741071522912331e-06, "loss": 0.7713, "step": 1983 }, { "epoch": 0.25, "grad_norm": 1.263593077659607, "learning_rate": 8.739694508567866e-06, "loss": 0.6795, "step": 1984 }, { "epoch": 0.25, "grad_norm": 1.0440095663070679, "learning_rate": 8.738316850137034e-06, "loss": 0.4923, "step": 1985 }, { "epoch": 0.25, "grad_norm": 1.112327218055725, "learning_rate": 8.736938547857109e-06, "loss": 0.6235, "step": 1986 }, { "epoch": 0.25, "grad_norm": 1.110875129699707, "learning_rate": 8.735559601965475e-06, "loss": 0.6164, "step": 1987 }, { "epoch": 0.25, "grad_norm": 1.1973077058792114, "learning_rate": 8.734180012699628e-06, "loss": 0.5878, "step": 1988 }, { "epoch": 0.25, "grad_norm": 1.1539205312728882, "learning_rate": 8.732799780297174e-06, "loss": 0.7529, "step": 1989 }, { "epoch": 0.25, "grad_norm": 1.2167625427246094, "learning_rate": 8.731418904995829e-06, "loss": 0.6641, "step": 1990 }, { "epoch": 0.26, "grad_norm": 1.0853583812713623, "learning_rate": 8.730037387033422e-06, "loss": 0.598, "step": 1991 }, { "epoch": 0.26, "grad_norm": 1.1032205820083618, "learning_rate": 8.72865522664789e-06, "loss": 0.643, "step": 1992 }, { "epoch": 0.26, "grad_norm": 1.2810516357421875, "learning_rate": 8.727272424077284e-06, "loss": 0.5398, "step": 1993 }, { "epoch": 0.26, "grad_norm": 1.2989377975463867, "learning_rate": 8.725888979559762e-06, "loss": 0.5852, "step": 1994 }, { "epoch": 0.26, "grad_norm": 1.120161533355713, "learning_rate": 8.724504893333596e-06, "loss": 0.7027, "step": 1995 }, { "epoch": 0.26, "grad_norm": 1.3071115016937256, "learning_rate": 8.723120165637165e-06, "loss": 0.5965, "step": 1996 }, { "epoch": 0.26, "grad_norm": 1.1043663024902344, "learning_rate": 8.72173479670896e-06, "loss": 0.6167, "step": 1997 }, { "epoch": 0.26, "grad_norm": 1.1847256422042847, "learning_rate": 8.720348786787583e-06, "loss": 0.6122, "step": 1998 }, { "epoch": 0.26, "grad_norm": 1.1380722522735596, "learning_rate": 8.718962136111749e-06, "loss": 0.6251, "step": 1999 }, { "epoch": 0.26, "grad_norm": 1.1924020051956177, "learning_rate": 8.717574844920274e-06, "loss": 0.5994, "step": 2000 }, { "epoch": 0.26, "grad_norm": 1.000066876411438, "learning_rate": 8.716186913452097e-06, "loss": 0.6475, "step": 2001 }, { "epoch": 0.26, "grad_norm": 1.0465896129608154, "learning_rate": 8.714798341946258e-06, "loss": 0.6021, "step": 2002 }, { "epoch": 0.26, "grad_norm": 1.0609678030014038, "learning_rate": 8.71340913064191e-06, "loss": 0.5697, "step": 2003 }, { "epoch": 0.26, "grad_norm": 1.1682281494140625, "learning_rate": 8.712019279778319e-06, "loss": 0.6211, "step": 2004 }, { "epoch": 0.26, "grad_norm": 1.4044312238693237, "learning_rate": 8.710628789594855e-06, "loss": 0.6077, "step": 2005 }, { "epoch": 0.26, "grad_norm": 1.1312463283538818, "learning_rate": 8.709237660331003e-06, "loss": 0.5921, "step": 2006 }, { "epoch": 0.26, "grad_norm": 1.1439876556396484, "learning_rate": 8.70784589222636e-06, "loss": 0.5924, "step": 2007 }, { "epoch": 0.26, "grad_norm": 1.3385566473007202, "learning_rate": 8.706453485520622e-06, "loss": 0.5244, "step": 2008 }, { "epoch": 0.26, "grad_norm": 1.2704468965530396, "learning_rate": 8.70506044045361e-06, "loss": 0.5937, "step": 2009 }, { "epoch": 0.26, "grad_norm": 1.1238104104995728, "learning_rate": 8.703666757265246e-06, "loss": 0.6783, "step": 2010 }, { "epoch": 0.26, "grad_norm": 1.1684532165527344, "learning_rate": 8.702272436195562e-06, "loss": 0.6352, "step": 2011 }, { "epoch": 0.26, "grad_norm": 1.1618826389312744, "learning_rate": 8.700877477484704e-06, "loss": 0.6676, "step": 2012 }, { "epoch": 0.26, "grad_norm": 1.2076810598373413, "learning_rate": 8.699481881372922e-06, "loss": 0.6103, "step": 2013 }, { "epoch": 0.26, "grad_norm": 1.1898448467254639, "learning_rate": 8.698085648100581e-06, "loss": 0.5716, "step": 2014 }, { "epoch": 0.26, "grad_norm": 1.7246116399765015, "learning_rate": 8.696688777908154e-06, "loss": 0.5495, "step": 2015 }, { "epoch": 0.26, "grad_norm": 1.511298656463623, "learning_rate": 8.695291271036221e-06, "loss": 0.6376, "step": 2016 }, { "epoch": 0.26, "grad_norm": 1.2623487710952759, "learning_rate": 8.69389312772548e-06, "loss": 0.6639, "step": 2017 }, { "epoch": 0.26, "grad_norm": 1.220610499382019, "learning_rate": 8.692494348216726e-06, "loss": 0.6363, "step": 2018 }, { "epoch": 0.26, "grad_norm": 1.2728426456451416, "learning_rate": 8.691094932750875e-06, "loss": 0.6267, "step": 2019 }, { "epoch": 0.26, "grad_norm": 1.6735302209854126, "learning_rate": 8.689694881568945e-06, "loss": 0.6491, "step": 2020 }, { "epoch": 0.26, "grad_norm": 1.3414764404296875, "learning_rate": 8.688294194912066e-06, "loss": 0.6314, "step": 2021 }, { "epoch": 0.26, "grad_norm": 1.5352545976638794, "learning_rate": 8.686892873021481e-06, "loss": 0.6437, "step": 2022 }, { "epoch": 0.26, "grad_norm": 1.3221575021743774, "learning_rate": 8.685490916138536e-06, "loss": 0.6519, "step": 2023 }, { "epoch": 0.26, "grad_norm": 1.5757750272750854, "learning_rate": 8.684088324504694e-06, "loss": 0.6486, "step": 2024 }, { "epoch": 0.26, "grad_norm": 1.2225582599639893, "learning_rate": 8.682685098361518e-06, "loss": 0.7509, "step": 2025 }, { "epoch": 0.26, "grad_norm": 1.0095558166503906, "learning_rate": 8.681281237950688e-06, "loss": 0.6723, "step": 2026 }, { "epoch": 0.26, "grad_norm": 0.9800388216972351, "learning_rate": 8.67987674351399e-06, "loss": 0.5663, "step": 2027 }, { "epoch": 0.26, "grad_norm": 1.777536392211914, "learning_rate": 8.678471615293317e-06, "loss": 0.6395, "step": 2028 }, { "epoch": 0.26, "grad_norm": 1.0923036336898804, "learning_rate": 8.677065853530679e-06, "loss": 0.5539, "step": 2029 }, { "epoch": 0.26, "grad_norm": 1.2774169445037842, "learning_rate": 8.675659458468186e-06, "loss": 0.6695, "step": 2030 }, { "epoch": 0.26, "grad_norm": 1.2363957166671753, "learning_rate": 8.67425243034806e-06, "loss": 0.6806, "step": 2031 }, { "epoch": 0.26, "grad_norm": 1.3562581539154053, "learning_rate": 8.672844769412637e-06, "loss": 0.6637, "step": 2032 }, { "epoch": 0.26, "grad_norm": 1.290300965309143, "learning_rate": 8.671436475904353e-06, "loss": 0.6027, "step": 2033 }, { "epoch": 0.26, "grad_norm": 1.1578395366668701, "learning_rate": 8.670027550065763e-06, "loss": 0.6095, "step": 2034 }, { "epoch": 0.26, "grad_norm": 3.5597174167633057, "learning_rate": 8.668617992139524e-06, "loss": 0.6364, "step": 2035 }, { "epoch": 0.26, "grad_norm": 0.9307906031608582, "learning_rate": 8.667207802368403e-06, "loss": 0.671, "step": 2036 }, { "epoch": 0.26, "grad_norm": 1.2509186267852783, "learning_rate": 8.665796980995275e-06, "loss": 0.6422, "step": 2037 }, { "epoch": 0.26, "grad_norm": 2.086897373199463, "learning_rate": 8.66438552826313e-06, "loss": 0.6447, "step": 2038 }, { "epoch": 0.26, "grad_norm": 1.172615885734558, "learning_rate": 8.662973444415058e-06, "loss": 0.562, "step": 2039 }, { "epoch": 0.26, "grad_norm": 1.004035234451294, "learning_rate": 8.661560729694262e-06, "loss": 0.515, "step": 2040 }, { "epoch": 0.26, "grad_norm": 1.1104687452316284, "learning_rate": 8.660147384344055e-06, "loss": 0.5915, "step": 2041 }, { "epoch": 0.26, "grad_norm": 1.2272541522979736, "learning_rate": 8.658733408607856e-06, "loss": 0.5539, "step": 2042 }, { "epoch": 0.26, "grad_norm": 1.0483099222183228, "learning_rate": 8.657318802729194e-06, "loss": 0.5457, "step": 2043 }, { "epoch": 0.26, "grad_norm": 1.2460724115371704, "learning_rate": 8.655903566951706e-06, "loss": 0.6596, "step": 2044 }, { "epoch": 0.26, "grad_norm": 0.8781019449234009, "learning_rate": 8.654487701519139e-06, "loss": 0.5678, "step": 2045 }, { "epoch": 0.26, "grad_norm": 1.4468796253204346, "learning_rate": 8.653071206675344e-06, "loss": 0.6264, "step": 2046 }, { "epoch": 0.26, "grad_norm": 5.015211582183838, "learning_rate": 8.651654082664285e-06, "loss": 0.5887, "step": 2047 }, { "epoch": 0.26, "grad_norm": 1.4183422327041626, "learning_rate": 8.650236329730034e-06, "loss": 0.6769, "step": 2048 }, { "epoch": 0.26, "grad_norm": 2.0147705078125, "learning_rate": 8.648817948116767e-06, "loss": 0.6663, "step": 2049 }, { "epoch": 0.26, "grad_norm": 1.3201289176940918, "learning_rate": 8.647398938068775e-06, "loss": 0.6155, "step": 2050 }, { "epoch": 0.26, "grad_norm": 1.1508480310440063, "learning_rate": 8.645979299830452e-06, "loss": 0.6329, "step": 2051 }, { "epoch": 0.26, "grad_norm": 1.3195370435714722, "learning_rate": 8.644559033646303e-06, "loss": 0.6606, "step": 2052 }, { "epoch": 0.26, "grad_norm": 1.2289732694625854, "learning_rate": 8.643138139760935e-06, "loss": 0.5887, "step": 2053 }, { "epoch": 0.26, "grad_norm": 1.2333985567092896, "learning_rate": 8.641716618419076e-06, "loss": 0.7597, "step": 2054 }, { "epoch": 0.26, "grad_norm": 1.6038957834243774, "learning_rate": 8.640294469865548e-06, "loss": 0.563, "step": 2055 }, { "epoch": 0.26, "grad_norm": 1.6648248434066772, "learning_rate": 8.638871694345293e-06, "loss": 0.6064, "step": 2056 }, { "epoch": 0.26, "grad_norm": 1.1484403610229492, "learning_rate": 8.637448292103346e-06, "loss": 0.6087, "step": 2057 }, { "epoch": 0.26, "grad_norm": 1.367751955986023, "learning_rate": 8.636024263384868e-06, "loss": 0.6503, "step": 2058 }, { "epoch": 0.26, "grad_norm": 1.4821258783340454, "learning_rate": 8.634599608435115e-06, "loss": 0.7098, "step": 2059 }, { "epoch": 0.26, "grad_norm": 2.0166525840759277, "learning_rate": 8.633174327499456e-06, "loss": 0.6259, "step": 2060 }, { "epoch": 0.26, "grad_norm": 1.2003066539764404, "learning_rate": 8.631748420823365e-06, "loss": 0.7077, "step": 2061 }, { "epoch": 0.26, "grad_norm": 1.448062539100647, "learning_rate": 8.630321888652426e-06, "loss": 0.694, "step": 2062 }, { "epoch": 0.26, "grad_norm": 2.6138148307800293, "learning_rate": 8.628894731232332e-06, "loss": 0.6076, "step": 2063 }, { "epoch": 0.26, "grad_norm": 1.1501845121383667, "learning_rate": 8.62746694880888e-06, "loss": 0.5773, "step": 2064 }, { "epoch": 0.26, "grad_norm": 1.4958134889602661, "learning_rate": 8.626038541627977e-06, "loss": 0.5991, "step": 2065 }, { "epoch": 0.26, "grad_norm": 1.0958569049835205, "learning_rate": 8.624609509935637e-06, "loss": 0.5845, "step": 2066 }, { "epoch": 0.26, "grad_norm": 1.1663223505020142, "learning_rate": 8.623179853977984e-06, "loss": 0.5782, "step": 2067 }, { "epoch": 0.26, "grad_norm": 1.1979085206985474, "learning_rate": 8.621749574001241e-06, "loss": 0.6255, "step": 2068 }, { "epoch": 0.27, "grad_norm": 1.3670035600662231, "learning_rate": 8.620318670251752e-06, "loss": 0.6218, "step": 2069 }, { "epoch": 0.27, "grad_norm": 1.0638803243637085, "learning_rate": 8.618887142975956e-06, "loss": 0.696, "step": 2070 }, { "epoch": 0.27, "grad_norm": 2.1462111473083496, "learning_rate": 8.617454992420407e-06, "loss": 0.6679, "step": 2071 }, { "epoch": 0.27, "grad_norm": 1.3509697914123535, "learning_rate": 8.616022218831764e-06, "loss": 0.6722, "step": 2072 }, { "epoch": 0.27, "grad_norm": 1.0871156454086304, "learning_rate": 8.61458882245679e-06, "loss": 0.5614, "step": 2073 }, { "epoch": 0.27, "grad_norm": 1.0885635614395142, "learning_rate": 8.613154803542362e-06, "loss": 0.6469, "step": 2074 }, { "epoch": 0.27, "grad_norm": 1.2749704122543335, "learning_rate": 8.611720162335459e-06, "loss": 0.6835, "step": 2075 }, { "epoch": 0.27, "grad_norm": 1.2243422269821167, "learning_rate": 8.61028489908317e-06, "loss": 0.5278, "step": 2076 }, { "epoch": 0.27, "grad_norm": 1.17266845703125, "learning_rate": 8.608849014032687e-06, "loss": 0.6967, "step": 2077 }, { "epoch": 0.27, "grad_norm": 1.2027003765106201, "learning_rate": 8.607412507431316e-06, "loss": 0.7052, "step": 2078 }, { "epoch": 0.27, "grad_norm": 1.0147993564605713, "learning_rate": 8.605975379526463e-06, "loss": 0.6634, "step": 2079 }, { "epoch": 0.27, "grad_norm": 1.7742810249328613, "learning_rate": 8.604537630565644e-06, "loss": 0.6074, "step": 2080 }, { "epoch": 0.27, "grad_norm": 1.3109650611877441, "learning_rate": 8.603099260796486e-06, "loss": 0.6322, "step": 2081 }, { "epoch": 0.27, "grad_norm": 1.1907052993774414, "learning_rate": 8.601660270466714e-06, "loss": 0.6935, "step": 2082 }, { "epoch": 0.27, "grad_norm": 1.3169562816619873, "learning_rate": 8.600220659824166e-06, "loss": 0.6313, "step": 2083 }, { "epoch": 0.27, "grad_norm": 1.1314212083816528, "learning_rate": 8.598780429116788e-06, "loss": 0.5839, "step": 2084 }, { "epoch": 0.27, "grad_norm": 1.2062609195709229, "learning_rate": 8.59733957859263e-06, "loss": 0.6198, "step": 2085 }, { "epoch": 0.27, "grad_norm": 0.9913787245750427, "learning_rate": 8.595898108499845e-06, "loss": 0.6855, "step": 2086 }, { "epoch": 0.27, "grad_norm": 1.212138056755066, "learning_rate": 8.594456019086702e-06, "loss": 0.5885, "step": 2087 }, { "epoch": 0.27, "grad_norm": 1.0849419832229614, "learning_rate": 8.59301331060157e-06, "loss": 0.5638, "step": 2088 }, { "epoch": 0.27, "grad_norm": 4.176388263702393, "learning_rate": 8.591569983292924e-06, "loss": 0.5899, "step": 2089 }, { "epoch": 0.27, "grad_norm": 1.0811898708343506, "learning_rate": 8.590126037409353e-06, "loss": 0.6131, "step": 2090 }, { "epoch": 0.27, "grad_norm": 1.1859910488128662, "learning_rate": 8.588681473199543e-06, "loss": 0.5915, "step": 2091 }, { "epoch": 0.27, "grad_norm": 1.8125941753387451, "learning_rate": 8.587236290912292e-06, "loss": 0.66, "step": 2092 }, { "epoch": 0.27, "grad_norm": 1.1952428817749023, "learning_rate": 8.585790490796502e-06, "loss": 0.5988, "step": 2093 }, { "epoch": 0.27, "grad_norm": 1.172929048538208, "learning_rate": 8.584344073101185e-06, "loss": 0.6717, "step": 2094 }, { "epoch": 0.27, "grad_norm": 1.0805124044418335, "learning_rate": 8.582897038075455e-06, "loss": 0.5679, "step": 2095 }, { "epoch": 0.27, "grad_norm": 1.1243839263916016, "learning_rate": 8.581449385968536e-06, "loss": 0.6338, "step": 2096 }, { "epoch": 0.27, "grad_norm": 1.2426873445510864, "learning_rate": 8.580001117029755e-06, "loss": 0.5016, "step": 2097 }, { "epoch": 0.27, "grad_norm": 1.2226959466934204, "learning_rate": 8.57855223150855e-06, "loss": 0.628, "step": 2098 }, { "epoch": 0.27, "grad_norm": 1.5704655647277832, "learning_rate": 8.577102729654457e-06, "loss": 0.6069, "step": 2099 }, { "epoch": 0.27, "grad_norm": 1.1772339344024658, "learning_rate": 8.575652611717127e-06, "loss": 0.6016, "step": 2100 }, { "epoch": 0.27, "grad_norm": 1.0823158025741577, "learning_rate": 8.574201877946314e-06, "loss": 0.6074, "step": 2101 }, { "epoch": 0.27, "grad_norm": 1.330729365348816, "learning_rate": 8.572750528591875e-06, "loss": 0.6382, "step": 2102 }, { "epoch": 0.27, "grad_norm": 1.2223377227783203, "learning_rate": 8.571298563903775e-06, "loss": 0.6634, "step": 2103 }, { "epoch": 0.27, "grad_norm": 1.497209906578064, "learning_rate": 8.56984598413209e-06, "loss": 0.6778, "step": 2104 }, { "epoch": 0.27, "grad_norm": 3.1404335498809814, "learning_rate": 8.568392789526992e-06, "loss": 0.6256, "step": 2105 }, { "epoch": 0.27, "grad_norm": 1.1916518211364746, "learning_rate": 8.566938980338765e-06, "loss": 0.7344, "step": 2106 }, { "epoch": 0.27, "grad_norm": 1.0679166316986084, "learning_rate": 8.565484556817802e-06, "loss": 0.6275, "step": 2107 }, { "epoch": 0.27, "grad_norm": 1.1432031393051147, "learning_rate": 8.564029519214594e-06, "loss": 0.6351, "step": 2108 }, { "epoch": 0.27, "grad_norm": 1.1834468841552734, "learning_rate": 8.562573867779741e-06, "loss": 0.6035, "step": 2109 }, { "epoch": 0.27, "grad_norm": 1.60970938205719, "learning_rate": 8.561117602763954e-06, "loss": 0.6716, "step": 2110 }, { "epoch": 0.27, "grad_norm": 1.1802395582199097, "learning_rate": 8.559660724418041e-06, "loss": 0.5264, "step": 2111 }, { "epoch": 0.27, "grad_norm": 3.7724010944366455, "learning_rate": 8.558203232992923e-06, "loss": 0.6596, "step": 2112 }, { "epoch": 0.27, "grad_norm": 1.0022841691970825, "learning_rate": 8.556745128739618e-06, "loss": 0.6672, "step": 2113 }, { "epoch": 0.27, "grad_norm": 1.4700846672058105, "learning_rate": 8.55528641190926e-06, "loss": 0.5608, "step": 2114 }, { "epoch": 0.27, "grad_norm": 1.1476560831069946, "learning_rate": 8.553827082753084e-06, "loss": 0.5078, "step": 2115 }, { "epoch": 0.27, "grad_norm": 1.1997826099395752, "learning_rate": 8.552367141522423e-06, "loss": 0.5914, "step": 2116 }, { "epoch": 0.27, "grad_norm": 1.022222876548767, "learning_rate": 8.550906588468728e-06, "loss": 0.6681, "step": 2117 }, { "epoch": 0.27, "grad_norm": 1.3832002878189087, "learning_rate": 8.549445423843548e-06, "loss": 0.6029, "step": 2118 }, { "epoch": 0.27, "grad_norm": 1.0408334732055664, "learning_rate": 8.54798364789854e-06, "loss": 0.5953, "step": 2119 }, { "epoch": 0.27, "grad_norm": 1.0350518226623535, "learning_rate": 8.546521260885463e-06, "loss": 0.5858, "step": 2120 }, { "epoch": 0.27, "grad_norm": 1.097774624824524, "learning_rate": 8.545058263056186e-06, "loss": 0.6631, "step": 2121 }, { "epoch": 0.27, "grad_norm": 1.5874125957489014, "learning_rate": 8.543594654662677e-06, "loss": 0.6732, "step": 2122 }, { "epoch": 0.27, "grad_norm": 1.0040942430496216, "learning_rate": 8.542130435957014e-06, "loss": 0.6054, "step": 2123 }, { "epoch": 0.27, "grad_norm": 1.172186255455017, "learning_rate": 8.540665607191383e-06, "loss": 0.6075, "step": 2124 }, { "epoch": 0.27, "grad_norm": 1.285057783126831, "learning_rate": 8.539200168618067e-06, "loss": 0.5955, "step": 2125 }, { "epoch": 0.27, "grad_norm": 1.245982050895691, "learning_rate": 8.537734120489459e-06, "loss": 0.5765, "step": 2126 }, { "epoch": 0.27, "grad_norm": 1.9209935665130615, "learning_rate": 8.536267463058055e-06, "loss": 0.65, "step": 2127 }, { "epoch": 0.27, "grad_norm": 1.3906763792037964, "learning_rate": 8.534800196576459e-06, "loss": 0.6005, "step": 2128 }, { "epoch": 0.27, "grad_norm": 1.8860881328582764, "learning_rate": 8.533332321297374e-06, "loss": 0.6604, "step": 2129 }, { "epoch": 0.27, "grad_norm": 1.790269374847412, "learning_rate": 8.531863837473617e-06, "loss": 0.6665, "step": 2130 }, { "epoch": 0.27, "grad_norm": 1.052520990371704, "learning_rate": 8.530394745358101e-06, "loss": 0.5778, "step": 2131 }, { "epoch": 0.27, "grad_norm": 1.5288801193237305, "learning_rate": 8.528925045203847e-06, "loss": 0.6731, "step": 2132 }, { "epoch": 0.27, "grad_norm": 0.9974155426025391, "learning_rate": 8.527454737263983e-06, "loss": 0.5731, "step": 2133 }, { "epoch": 0.27, "grad_norm": 1.3304848670959473, "learning_rate": 8.52598382179174e-06, "loss": 0.5752, "step": 2134 }, { "epoch": 0.27, "grad_norm": 0.973964512348175, "learning_rate": 8.524512299040451e-06, "loss": 0.6316, "step": 2135 }, { "epoch": 0.27, "grad_norm": 1.7174031734466553, "learning_rate": 8.523040169263555e-06, "loss": 0.6612, "step": 2136 }, { "epoch": 0.27, "grad_norm": 1.5883331298828125, "learning_rate": 8.5215674327146e-06, "loss": 0.6166, "step": 2137 }, { "epoch": 0.27, "grad_norm": 0.9867196083068848, "learning_rate": 8.520094089647233e-06, "loss": 0.557, "step": 2138 }, { "epoch": 0.27, "grad_norm": 1.4658268690109253, "learning_rate": 8.518620140315209e-06, "loss": 0.5743, "step": 2139 }, { "epoch": 0.27, "grad_norm": 0.9947718381881714, "learning_rate": 8.517145584972383e-06, "loss": 0.558, "step": 2140 }, { "epoch": 0.27, "grad_norm": 1.1221495866775513, "learning_rate": 8.515670423872719e-06, "loss": 0.6239, "step": 2141 }, { "epoch": 0.27, "grad_norm": 1.916927695274353, "learning_rate": 8.514194657270283e-06, "loss": 0.6223, "step": 2142 }, { "epoch": 0.27, "grad_norm": 1.1117417812347412, "learning_rate": 8.512718285419246e-06, "loss": 0.5865, "step": 2143 }, { "epoch": 0.27, "grad_norm": 1.2808547019958496, "learning_rate": 8.511241308573884e-06, "loss": 0.6683, "step": 2144 }, { "epoch": 0.27, "grad_norm": 1.0311280488967896, "learning_rate": 8.509763726988573e-06, "loss": 0.5753, "step": 2145 }, { "epoch": 0.27, "grad_norm": 1.4085116386413574, "learning_rate": 8.5082855409178e-06, "loss": 0.5724, "step": 2146 }, { "epoch": 0.28, "grad_norm": 2.5789756774902344, "learning_rate": 8.506806750616152e-06, "loss": 0.5779, "step": 2147 }, { "epoch": 0.28, "grad_norm": 1.2261545658111572, "learning_rate": 8.505327356338318e-06, "loss": 0.7003, "step": 2148 }, { "epoch": 0.28, "grad_norm": 1.2848753929138184, "learning_rate": 8.503847358339094e-06, "loss": 0.534, "step": 2149 }, { "epoch": 0.28, "grad_norm": 1.2150418758392334, "learning_rate": 8.502366756873384e-06, "loss": 0.5801, "step": 2150 }, { "epoch": 0.28, "grad_norm": 1.183171033859253, "learning_rate": 8.500885552196187e-06, "loss": 0.6252, "step": 2151 }, { "epoch": 0.28, "grad_norm": 1.2070298194885254, "learning_rate": 8.499403744562613e-06, "loss": 0.611, "step": 2152 }, { "epoch": 0.28, "grad_norm": 1.6139317750930786, "learning_rate": 8.497921334227872e-06, "loss": 0.6351, "step": 2153 }, { "epoch": 0.28, "grad_norm": 1.6720945835113525, "learning_rate": 8.496438321447278e-06, "loss": 0.6369, "step": 2154 }, { "epoch": 0.28, "grad_norm": 1.2238283157348633, "learning_rate": 8.49495470647625e-06, "loss": 0.68, "step": 2155 }, { "epoch": 0.28, "grad_norm": 1.3400514125823975, "learning_rate": 8.493470489570314e-06, "loss": 0.6404, "step": 2156 }, { "epoch": 0.28, "grad_norm": 1.198529839515686, "learning_rate": 8.491985670985093e-06, "loss": 0.6745, "step": 2157 }, { "epoch": 0.28, "grad_norm": 3.067074775695801, "learning_rate": 8.490500250976314e-06, "loss": 0.6279, "step": 2158 }, { "epoch": 0.28, "grad_norm": 1.331629991531372, "learning_rate": 8.489014229799816e-06, "loss": 0.6106, "step": 2159 }, { "epoch": 0.28, "grad_norm": 1.2290736436843872, "learning_rate": 8.487527607711535e-06, "loss": 0.5825, "step": 2160 }, { "epoch": 0.28, "grad_norm": 1.1484627723693848, "learning_rate": 8.486040384967509e-06, "loss": 0.5505, "step": 2161 }, { "epoch": 0.28, "grad_norm": 1.2274521589279175, "learning_rate": 8.484552561823885e-06, "loss": 0.6092, "step": 2162 }, { "epoch": 0.28, "grad_norm": 1.2703810930252075, "learning_rate": 8.483064138536906e-06, "loss": 0.5541, "step": 2163 }, { "epoch": 0.28, "grad_norm": 1.1794027090072632, "learning_rate": 8.481575115362926e-06, "loss": 0.6615, "step": 2164 }, { "epoch": 0.28, "grad_norm": 1.208961844444275, "learning_rate": 8.480085492558398e-06, "loss": 0.5989, "step": 2165 }, { "epoch": 0.28, "grad_norm": 1.370457649230957, "learning_rate": 8.47859527037988e-06, "loss": 0.5694, "step": 2166 }, { "epoch": 0.28, "grad_norm": 1.082960605621338, "learning_rate": 8.47710444908403e-06, "loss": 0.5625, "step": 2167 }, { "epoch": 0.28, "grad_norm": 1.1672261953353882, "learning_rate": 8.475613028927615e-06, "loss": 0.7696, "step": 2168 }, { "epoch": 0.28, "grad_norm": 1.1371452808380127, "learning_rate": 8.4741210101675e-06, "loss": 0.5821, "step": 2169 }, { "epoch": 0.28, "grad_norm": 1.1140472888946533, "learning_rate": 8.472628393060654e-06, "loss": 0.5798, "step": 2170 }, { "epoch": 0.28, "grad_norm": 1.0004754066467285, "learning_rate": 8.471135177864152e-06, "loss": 0.5552, "step": 2171 }, { "epoch": 0.28, "grad_norm": 1.2164877653121948, "learning_rate": 8.469641364835171e-06, "loss": 0.5519, "step": 2172 }, { "epoch": 0.28, "grad_norm": 2.1060073375701904, "learning_rate": 8.468146954230984e-06, "loss": 0.7057, "step": 2173 }, { "epoch": 0.28, "grad_norm": 1.1200610399246216, "learning_rate": 8.466651946308979e-06, "loss": 0.6117, "step": 2174 }, { "epoch": 0.28, "grad_norm": 1.214603304862976, "learning_rate": 8.465156341326639e-06, "loss": 0.6462, "step": 2175 }, { "epoch": 0.28, "grad_norm": 1.2249237298965454, "learning_rate": 8.46366013954155e-06, "loss": 0.7087, "step": 2176 }, { "epoch": 0.28, "grad_norm": 0.9753620028495789, "learning_rate": 8.462163341211404e-06, "loss": 0.6127, "step": 2177 }, { "epoch": 0.28, "grad_norm": 1.143880844116211, "learning_rate": 8.460665946593994e-06, "loss": 0.6184, "step": 2178 }, { "epoch": 0.28, "grad_norm": 1.178911566734314, "learning_rate": 8.459167955947217e-06, "loss": 0.6362, "step": 2179 }, { "epoch": 0.28, "grad_norm": 1.4393823146820068, "learning_rate": 8.457669369529067e-06, "loss": 0.7749, "step": 2180 }, { "epoch": 0.28, "grad_norm": 2.1207737922668457, "learning_rate": 8.456170187597647e-06, "loss": 0.6919, "step": 2181 }, { "epoch": 0.28, "grad_norm": 1.2718356847763062, "learning_rate": 8.454670410411165e-06, "loss": 0.5967, "step": 2182 }, { "epoch": 0.28, "grad_norm": 1.3145051002502441, "learning_rate": 8.453170038227922e-06, "loss": 0.7233, "step": 2183 }, { "epoch": 0.28, "grad_norm": 1.1284637451171875, "learning_rate": 8.451669071306326e-06, "loss": 0.629, "step": 2184 }, { "epoch": 0.28, "grad_norm": 1.1894221305847168, "learning_rate": 8.450167509904892e-06, "loss": 0.5804, "step": 2185 }, { "epoch": 0.28, "grad_norm": 1.2713896036148071, "learning_rate": 8.448665354282233e-06, "loss": 0.6278, "step": 2186 }, { "epoch": 0.28, "grad_norm": 1.4796535968780518, "learning_rate": 8.447162604697062e-06, "loss": 0.6252, "step": 2187 }, { "epoch": 0.28, "grad_norm": 2.35587739944458, "learning_rate": 8.445659261408199e-06, "loss": 0.7211, "step": 2188 }, { "epoch": 0.28, "grad_norm": 1.649792194366455, "learning_rate": 8.444155324674564e-06, "loss": 0.6718, "step": 2189 }, { "epoch": 0.28, "grad_norm": 1.4509642124176025, "learning_rate": 8.442650794755178e-06, "loss": 0.6138, "step": 2190 }, { "epoch": 0.28, "grad_norm": 1.5314606428146362, "learning_rate": 8.44114567190917e-06, "loss": 0.6393, "step": 2191 }, { "epoch": 0.28, "grad_norm": 1.026334524154663, "learning_rate": 8.439639956395763e-06, "loss": 0.5199, "step": 2192 }, { "epoch": 0.28, "grad_norm": 1.0074406862258911, "learning_rate": 8.438133648474284e-06, "loss": 0.6609, "step": 2193 }, { "epoch": 0.28, "grad_norm": 1.685986876487732, "learning_rate": 8.43662674840417e-06, "loss": 0.64, "step": 2194 }, { "epoch": 0.28, "grad_norm": 1.3212658166885376, "learning_rate": 8.435119256444948e-06, "loss": 0.6855, "step": 2195 }, { "epoch": 0.28, "grad_norm": 1.209119439125061, "learning_rate": 8.433611172856258e-06, "loss": 0.7623, "step": 2196 }, { "epoch": 0.28, "grad_norm": 1.0640321969985962, "learning_rate": 8.432102497897832e-06, "loss": 0.5945, "step": 2197 }, { "epoch": 0.28, "grad_norm": 1.2218296527862549, "learning_rate": 8.430593231829512e-06, "loss": 0.5931, "step": 2198 }, { "epoch": 0.28, "grad_norm": 1.5449182987213135, "learning_rate": 8.429083374911238e-06, "loss": 0.5279, "step": 2199 }, { "epoch": 0.28, "grad_norm": 1.1568078994750977, "learning_rate": 8.427572927403049e-06, "loss": 0.5968, "step": 2200 }, { "epoch": 0.28, "grad_norm": 1.2066130638122559, "learning_rate": 8.426061889565094e-06, "loss": 0.569, "step": 2201 }, { "epoch": 0.28, "grad_norm": 1.4190328121185303, "learning_rate": 8.424550261657614e-06, "loss": 0.6139, "step": 2202 }, { "epoch": 0.28, "grad_norm": 1.1991509199142456, "learning_rate": 8.423038043940958e-06, "loss": 0.6717, "step": 2203 }, { "epoch": 0.28, "grad_norm": 1.6494675874710083, "learning_rate": 8.421525236675577e-06, "loss": 0.5841, "step": 2204 }, { "epoch": 0.28, "grad_norm": 1.3361454010009766, "learning_rate": 8.420011840122016e-06, "loss": 0.6102, "step": 2205 }, { "epoch": 0.28, "grad_norm": 1.5138863325119019, "learning_rate": 8.418497854540933e-06, "loss": 0.6498, "step": 2206 }, { "epoch": 0.28, "grad_norm": 1.3878949880599976, "learning_rate": 8.416983280193076e-06, "loss": 0.6866, "step": 2207 }, { "epoch": 0.28, "grad_norm": 1.4841134548187256, "learning_rate": 8.415468117339302e-06, "loss": 0.6434, "step": 2208 }, { "epoch": 0.28, "grad_norm": 1.2668837308883667, "learning_rate": 8.413952366240565e-06, "loss": 0.6114, "step": 2209 }, { "epoch": 0.28, "grad_norm": 1.4674394130706787, "learning_rate": 8.412436027157927e-06, "loss": 0.5745, "step": 2210 }, { "epoch": 0.28, "grad_norm": 1.2802519798278809, "learning_rate": 8.410919100352543e-06, "loss": 0.564, "step": 2211 }, { "epoch": 0.28, "grad_norm": 1.2879337072372437, "learning_rate": 8.409401586085673e-06, "loss": 0.6732, "step": 2212 }, { "epoch": 0.28, "grad_norm": 1.2128925323486328, "learning_rate": 8.407883484618679e-06, "loss": 0.6827, "step": 2213 }, { "epoch": 0.28, "grad_norm": 1.12970769405365, "learning_rate": 8.406364796213023e-06, "loss": 0.6273, "step": 2214 }, { "epoch": 0.28, "grad_norm": 1.1036797761917114, "learning_rate": 8.404845521130268e-06, "loss": 0.5723, "step": 2215 }, { "epoch": 0.28, "grad_norm": 1.1636890172958374, "learning_rate": 8.403325659632076e-06, "loss": 0.6296, "step": 2216 }, { "epoch": 0.28, "grad_norm": 1.6161547899246216, "learning_rate": 8.401805211980215e-06, "loss": 0.6374, "step": 2217 }, { "epoch": 0.28, "grad_norm": 1.2114921808242798, "learning_rate": 8.400284178436551e-06, "loss": 0.6444, "step": 2218 }, { "epoch": 0.28, "grad_norm": 1.136370062828064, "learning_rate": 8.39876255926305e-06, "loss": 0.6796, "step": 2219 }, { "epoch": 0.28, "grad_norm": 10.106849670410156, "learning_rate": 8.397240354721782e-06, "loss": 0.6263, "step": 2220 }, { "epoch": 0.28, "grad_norm": 1.2132163047790527, "learning_rate": 8.395717565074913e-06, "loss": 0.6485, "step": 2221 }, { "epoch": 0.28, "grad_norm": 1.0989460945129395, "learning_rate": 8.394194190584714e-06, "loss": 0.6106, "step": 2222 }, { "epoch": 0.28, "grad_norm": 1.1754080057144165, "learning_rate": 8.392670231513557e-06, "loss": 0.585, "step": 2223 }, { "epoch": 0.28, "grad_norm": 1.3587006330490112, "learning_rate": 8.39114568812391e-06, "loss": 0.6329, "step": 2224 }, { "epoch": 0.29, "grad_norm": 1.0616475343704224, "learning_rate": 8.389620560678345e-06, "loss": 0.6499, "step": 2225 }, { "epoch": 0.29, "grad_norm": 1.4234185218811035, "learning_rate": 8.388094849439536e-06, "loss": 0.5861, "step": 2226 }, { "epoch": 0.29, "grad_norm": 1.3070238828659058, "learning_rate": 8.386568554670255e-06, "loss": 0.7642, "step": 2227 }, { "epoch": 0.29, "grad_norm": 1.2934914827346802, "learning_rate": 8.385041676633375e-06, "loss": 0.6081, "step": 2228 }, { "epoch": 0.29, "grad_norm": 2.055109739303589, "learning_rate": 8.383514215591868e-06, "loss": 0.5796, "step": 2229 }, { "epoch": 0.29, "grad_norm": 1.8594862222671509, "learning_rate": 8.381986171808811e-06, "loss": 0.5904, "step": 2230 }, { "epoch": 0.29, "grad_norm": 1.1786482334136963, "learning_rate": 8.380457545547378e-06, "loss": 0.6771, "step": 2231 }, { "epoch": 0.29, "grad_norm": 1.8738934993743896, "learning_rate": 8.378928337070844e-06, "loss": 0.5631, "step": 2232 }, { "epoch": 0.29, "grad_norm": 1.595661997795105, "learning_rate": 8.37739854664258e-06, "loss": 0.5903, "step": 2233 }, { "epoch": 0.29, "grad_norm": 1.2699490785598755, "learning_rate": 8.375868174526066e-06, "loss": 0.7472, "step": 2234 }, { "epoch": 0.29, "grad_norm": 1.3659288883209229, "learning_rate": 8.374337220984879e-06, "loss": 0.5874, "step": 2235 }, { "epoch": 0.29, "grad_norm": 1.3856401443481445, "learning_rate": 8.372805686282688e-06, "loss": 0.5405, "step": 2236 }, { "epoch": 0.29, "grad_norm": 1.9804805517196655, "learning_rate": 8.371273570683273e-06, "loss": 0.6157, "step": 2237 }, { "epoch": 0.29, "grad_norm": 1.1596951484680176, "learning_rate": 8.369740874450511e-06, "loss": 0.6362, "step": 2238 }, { "epoch": 0.29, "grad_norm": 1.279098391532898, "learning_rate": 8.368207597848375e-06, "loss": 0.6012, "step": 2239 }, { "epoch": 0.29, "grad_norm": 1.4010846614837646, "learning_rate": 8.36667374114094e-06, "loss": 0.5266, "step": 2240 }, { "epoch": 0.29, "grad_norm": 0.9738383293151855, "learning_rate": 8.365139304592384e-06, "loss": 0.6336, "step": 2241 }, { "epoch": 0.29, "grad_norm": 1.3508878946304321, "learning_rate": 8.363604288466984e-06, "loss": 0.6368, "step": 2242 }, { "epoch": 0.29, "grad_norm": 1.1931465864181519, "learning_rate": 8.362068693029111e-06, "loss": 0.5803, "step": 2243 }, { "epoch": 0.29, "grad_norm": 1.0445038080215454, "learning_rate": 8.360532518543241e-06, "loss": 0.5703, "step": 2244 }, { "epoch": 0.29, "grad_norm": 1.1057722568511963, "learning_rate": 8.358995765273953e-06, "loss": 0.6061, "step": 2245 }, { "epoch": 0.29, "grad_norm": 2.1264703273773193, "learning_rate": 8.357458433485917e-06, "loss": 0.6588, "step": 2246 }, { "epoch": 0.29, "grad_norm": 1.4543817043304443, "learning_rate": 8.355920523443909e-06, "loss": 0.5947, "step": 2247 }, { "epoch": 0.29, "grad_norm": 0.9155209064483643, "learning_rate": 8.354382035412803e-06, "loss": 0.5496, "step": 2248 }, { "epoch": 0.29, "grad_norm": 1.251805067062378, "learning_rate": 8.35284296965757e-06, "loss": 0.5388, "step": 2249 }, { "epoch": 0.29, "grad_norm": 1.2307274341583252, "learning_rate": 8.351303326443287e-06, "loss": 0.6713, "step": 2250 }, { "epoch": 0.29, "grad_norm": 1.3618934154510498, "learning_rate": 8.349763106035123e-06, "loss": 0.6442, "step": 2251 }, { "epoch": 0.29, "grad_norm": 1.1276440620422363, "learning_rate": 8.348222308698348e-06, "loss": 0.6474, "step": 2252 }, { "epoch": 0.29, "grad_norm": 1.5264719724655151, "learning_rate": 8.34668093469834e-06, "loss": 0.623, "step": 2253 }, { "epoch": 0.29, "grad_norm": 1.4504443407058716, "learning_rate": 8.34513898430056e-06, "loss": 0.6397, "step": 2254 }, { "epoch": 0.29, "grad_norm": 1.4074475765228271, "learning_rate": 8.343596457770586e-06, "loss": 0.6265, "step": 2255 }, { "epoch": 0.29, "grad_norm": 1.2215962409973145, "learning_rate": 8.342053355374082e-06, "loss": 0.708, "step": 2256 }, { "epoch": 0.29, "grad_norm": 1.6315170526504517, "learning_rate": 8.340509677376817e-06, "loss": 0.6566, "step": 2257 }, { "epoch": 0.29, "grad_norm": 1.2518130540847778, "learning_rate": 8.338965424044658e-06, "loss": 0.7536, "step": 2258 }, { "epoch": 0.29, "grad_norm": 1.4042388200759888, "learning_rate": 8.337420595643574e-06, "loss": 0.6747, "step": 2259 }, { "epoch": 0.29, "grad_norm": 1.046313762664795, "learning_rate": 8.335875192439627e-06, "loss": 0.6222, "step": 2260 }, { "epoch": 0.29, "grad_norm": 1.1476646661758423, "learning_rate": 8.33432921469898e-06, "loss": 0.6875, "step": 2261 }, { "epoch": 0.29, "grad_norm": 1.1270971298217773, "learning_rate": 8.332782662687902e-06, "loss": 0.5926, "step": 2262 }, { "epoch": 0.29, "grad_norm": 1.2763153314590454, "learning_rate": 8.331235536672748e-06, "loss": 0.5782, "step": 2263 }, { "epoch": 0.29, "grad_norm": 1.3246185779571533, "learning_rate": 8.329687836919986e-06, "loss": 0.6623, "step": 2264 }, { "epoch": 0.29, "grad_norm": 1.4971126317977905, "learning_rate": 8.328139563696172e-06, "loss": 0.6468, "step": 2265 }, { "epoch": 0.29, "grad_norm": 1.484387993812561, "learning_rate": 8.326590717267964e-06, "loss": 0.6747, "step": 2266 }, { "epoch": 0.29, "grad_norm": 1.130797266960144, "learning_rate": 8.32504129790212e-06, "loss": 0.6821, "step": 2267 }, { "epoch": 0.29, "grad_norm": 1.0058528184890747, "learning_rate": 8.323491305865498e-06, "loss": 0.5863, "step": 2268 }, { "epoch": 0.29, "grad_norm": 1.406995415687561, "learning_rate": 8.321940741425049e-06, "loss": 0.6819, "step": 2269 }, { "epoch": 0.29, "grad_norm": 1.8498761653900146, "learning_rate": 8.32038960484783e-06, "loss": 0.7176, "step": 2270 }, { "epoch": 0.29, "grad_norm": 1.4135112762451172, "learning_rate": 8.31883789640099e-06, "loss": 0.5951, "step": 2271 }, { "epoch": 0.29, "grad_norm": 1.5905379056930542, "learning_rate": 8.317285616351782e-06, "loss": 0.7059, "step": 2272 }, { "epoch": 0.29, "grad_norm": 1.0527902841567993, "learning_rate": 8.315732764967552e-06, "loss": 0.6129, "step": 2273 }, { "epoch": 0.29, "grad_norm": 1.336744785308838, "learning_rate": 8.314179342515746e-06, "loss": 0.6918, "step": 2274 }, { "epoch": 0.29, "grad_norm": 1.7440773248672485, "learning_rate": 8.312625349263914e-06, "loss": 0.6386, "step": 2275 }, { "epoch": 0.29, "grad_norm": 1.086266279220581, "learning_rate": 8.311070785479699e-06, "loss": 0.6303, "step": 2276 }, { "epoch": 0.29, "grad_norm": 1.1209901571273804, "learning_rate": 8.309515651430837e-06, "loss": 0.6507, "step": 2277 }, { "epoch": 0.29, "grad_norm": 1.4283497333526611, "learning_rate": 8.307959947385174e-06, "loss": 0.6424, "step": 2278 }, { "epoch": 0.29, "grad_norm": 1.2685678005218506, "learning_rate": 8.306403673610646e-06, "loss": 0.5953, "step": 2279 }, { "epoch": 0.29, "grad_norm": 1.394755482673645, "learning_rate": 8.304846830375294e-06, "loss": 0.5581, "step": 2280 }, { "epoch": 0.29, "grad_norm": 1.204455852508545, "learning_rate": 8.303289417947244e-06, "loss": 0.5786, "step": 2281 }, { "epoch": 0.29, "grad_norm": 1.3802586793899536, "learning_rate": 8.301731436594737e-06, "loss": 0.5582, "step": 2282 }, { "epoch": 0.29, "grad_norm": 1.6759288311004639, "learning_rate": 8.3001728865861e-06, "loss": 0.6505, "step": 2283 }, { "epoch": 0.29, "grad_norm": 1.07804536819458, "learning_rate": 8.298613768189761e-06, "loss": 0.6687, "step": 2284 }, { "epoch": 0.29, "grad_norm": 1.4512630701065063, "learning_rate": 8.297054081674247e-06, "loss": 0.6112, "step": 2285 }, { "epoch": 0.29, "grad_norm": 2.449326753616333, "learning_rate": 8.29549382730818e-06, "loss": 0.628, "step": 2286 }, { "epoch": 0.29, "grad_norm": 1.163280725479126, "learning_rate": 8.293933005360288e-06, "loss": 0.6082, "step": 2287 }, { "epoch": 0.29, "grad_norm": 1.181201696395874, "learning_rate": 8.292371616099388e-06, "loss": 0.6441, "step": 2288 }, { "epoch": 0.29, "grad_norm": 1.1229546070098877, "learning_rate": 8.290809659794397e-06, "loss": 0.6509, "step": 2289 }, { "epoch": 0.29, "grad_norm": 1.2174752950668335, "learning_rate": 8.289247136714328e-06, "loss": 0.6646, "step": 2290 }, { "epoch": 0.29, "grad_norm": 1.106174111366272, "learning_rate": 8.287684047128298e-06, "loss": 0.6211, "step": 2291 }, { "epoch": 0.29, "grad_norm": 1.5392602682113647, "learning_rate": 8.286120391305515e-06, "loss": 0.5977, "step": 2292 }, { "epoch": 0.29, "grad_norm": 1.2090442180633545, "learning_rate": 8.284556169515287e-06, "loss": 0.6219, "step": 2293 }, { "epoch": 0.29, "grad_norm": 1.6874985694885254, "learning_rate": 8.282991382027022e-06, "loss": 0.6698, "step": 2294 }, { "epoch": 0.29, "grad_norm": 1.1028480529785156, "learning_rate": 8.281426029110218e-06, "loss": 0.6597, "step": 2295 }, { "epoch": 0.29, "grad_norm": 1.1251074075698853, "learning_rate": 8.279860111034478e-06, "loss": 0.6606, "step": 2296 }, { "epoch": 0.29, "grad_norm": 1.540422797203064, "learning_rate": 8.278293628069502e-06, "loss": 0.6238, "step": 2297 }, { "epoch": 0.29, "grad_norm": 1.1604392528533936, "learning_rate": 8.276726580485082e-06, "loss": 0.6685, "step": 2298 }, { "epoch": 0.29, "grad_norm": 1.4422894716262817, "learning_rate": 8.27515896855111e-06, "loss": 0.6697, "step": 2299 }, { "epoch": 0.29, "grad_norm": 1.4221534729003906, "learning_rate": 8.273590792537574e-06, "loss": 0.6167, "step": 2300 }, { "epoch": 0.29, "grad_norm": 1.291616678237915, "learning_rate": 8.272022052714563e-06, "loss": 0.6258, "step": 2301 }, { "epoch": 0.29, "grad_norm": 0.9711089134216309, "learning_rate": 8.270452749352261e-06, "loss": 0.5395, "step": 2302 }, { "epoch": 0.3, "grad_norm": 1.145838975906372, "learning_rate": 8.268882882720946e-06, "loss": 0.5787, "step": 2303 }, { "epoch": 0.3, "grad_norm": 1.0817339420318604, "learning_rate": 8.267312453090997e-06, "loss": 0.6168, "step": 2304 }, { "epoch": 0.3, "grad_norm": 1.2782442569732666, "learning_rate": 8.26574146073289e-06, "loss": 0.673, "step": 2305 }, { "epoch": 0.3, "grad_norm": 1.1187020540237427, "learning_rate": 8.264169905917194e-06, "loss": 0.6032, "step": 2306 }, { "epoch": 0.3, "grad_norm": 1.3407741785049438, "learning_rate": 8.262597788914579e-06, "loss": 0.6716, "step": 2307 }, { "epoch": 0.3, "grad_norm": 1.4748417139053345, "learning_rate": 8.26102510999581e-06, "loss": 0.6684, "step": 2308 }, { "epoch": 0.3, "grad_norm": 1.8912734985351562, "learning_rate": 8.259451869431746e-06, "loss": 0.5978, "step": 2309 }, { "epoch": 0.3, "grad_norm": 0.9286829233169556, "learning_rate": 8.257878067493351e-06, "loss": 0.6408, "step": 2310 }, { "epoch": 0.3, "grad_norm": 1.2534960508346558, "learning_rate": 8.256303704451679e-06, "loss": 0.5862, "step": 2311 }, { "epoch": 0.3, "grad_norm": 1.3470648527145386, "learning_rate": 8.254728780577877e-06, "loss": 0.6486, "step": 2312 }, { "epoch": 0.3, "grad_norm": 1.1892026662826538, "learning_rate": 8.253153296143199e-06, "loss": 0.6312, "step": 2313 }, { "epoch": 0.3, "grad_norm": 2.13631534576416, "learning_rate": 8.251577251418987e-06, "loss": 0.5599, "step": 2314 }, { "epoch": 0.3, "grad_norm": 1.4602019786834717, "learning_rate": 8.250000646676688e-06, "loss": 0.638, "step": 2315 }, { "epoch": 0.3, "grad_norm": 1.0925565958023071, "learning_rate": 8.248423482187832e-06, "loss": 0.6265, "step": 2316 }, { "epoch": 0.3, "grad_norm": 1.1063237190246582, "learning_rate": 8.246845758224062e-06, "loss": 0.6283, "step": 2317 }, { "epoch": 0.3, "grad_norm": 1.2904975414276123, "learning_rate": 8.245267475057102e-06, "loss": 0.6167, "step": 2318 }, { "epoch": 0.3, "grad_norm": 1.1062041521072388, "learning_rate": 8.243688632958783e-06, "loss": 0.6804, "step": 2319 }, { "epoch": 0.3, "grad_norm": 1.2432068586349487, "learning_rate": 8.242109232201028e-06, "loss": 0.6152, "step": 2320 }, { "epoch": 0.3, "grad_norm": 1.199753761291504, "learning_rate": 8.240529273055852e-06, "loss": 0.6969, "step": 2321 }, { "epoch": 0.3, "grad_norm": 2.155482769012451, "learning_rate": 8.238948755795379e-06, "loss": 0.69, "step": 2322 }, { "epoch": 0.3, "grad_norm": 1.2352306842803955, "learning_rate": 8.237367680691817e-06, "loss": 0.6873, "step": 2323 }, { "epoch": 0.3, "grad_norm": 1.3099703788757324, "learning_rate": 8.235786048017473e-06, "loss": 0.6747, "step": 2324 }, { "epoch": 0.3, "grad_norm": 1.2565714120864868, "learning_rate": 8.234203858044751e-06, "loss": 0.6278, "step": 2325 }, { "epoch": 0.3, "grad_norm": 1.7045984268188477, "learning_rate": 8.232621111046154e-06, "loss": 0.6427, "step": 2326 }, { "epoch": 0.3, "grad_norm": 1.1133400201797485, "learning_rate": 8.231037807294275e-06, "loss": 0.6679, "step": 2327 }, { "epoch": 0.3, "grad_norm": 1.3851282596588135, "learning_rate": 8.229453947061807e-06, "loss": 0.6312, "step": 2328 }, { "epoch": 0.3, "grad_norm": 1.1454262733459473, "learning_rate": 8.227869530621538e-06, "loss": 0.6135, "step": 2329 }, { "epoch": 0.3, "grad_norm": 1.3275219202041626, "learning_rate": 8.226284558246351e-06, "loss": 0.6552, "step": 2330 }, { "epoch": 0.3, "grad_norm": 1.5105386972427368, "learning_rate": 8.224699030209227e-06, "loss": 0.6731, "step": 2331 }, { "epoch": 0.3, "grad_norm": 1.1528339385986328, "learning_rate": 8.223112946783237e-06, "loss": 0.5775, "step": 2332 }, { "epoch": 0.3, "grad_norm": 1.2737232446670532, "learning_rate": 8.221526308241556e-06, "loss": 0.5996, "step": 2333 }, { "epoch": 0.3, "grad_norm": 1.409363031387329, "learning_rate": 8.219939114857446e-06, "loss": 0.6846, "step": 2334 }, { "epoch": 0.3, "grad_norm": 1.1390594244003296, "learning_rate": 8.218351366904273e-06, "loss": 0.6503, "step": 2335 }, { "epoch": 0.3, "grad_norm": 1.7003358602523804, "learning_rate": 8.216763064655493e-06, "loss": 0.6394, "step": 2336 }, { "epoch": 0.3, "grad_norm": 1.6137683391571045, "learning_rate": 8.215174208384658e-06, "loss": 0.5789, "step": 2337 }, { "epoch": 0.3, "grad_norm": 1.193477988243103, "learning_rate": 8.213584798365416e-06, "loss": 0.5051, "step": 2338 }, { "epoch": 0.3, "grad_norm": 1.2043871879577637, "learning_rate": 8.211994834871511e-06, "loss": 0.5932, "step": 2339 }, { "epoch": 0.3, "grad_norm": 1.538806676864624, "learning_rate": 8.21040431817678e-06, "loss": 0.6492, "step": 2340 }, { "epoch": 0.3, "grad_norm": 1.2634645700454712, "learning_rate": 8.208813248555163e-06, "loss": 0.6181, "step": 2341 }, { "epoch": 0.3, "grad_norm": 1.2859795093536377, "learning_rate": 8.207221626280683e-06, "loss": 0.6666, "step": 2342 }, { "epoch": 0.3, "grad_norm": 1.1655632257461548, "learning_rate": 8.205629451627469e-06, "loss": 0.6052, "step": 2343 }, { "epoch": 0.3, "grad_norm": 6.354539394378662, "learning_rate": 8.204036724869737e-06, "loss": 0.6652, "step": 2344 }, { "epoch": 0.3, "grad_norm": 1.6453803777694702, "learning_rate": 8.202443446281804e-06, "loss": 0.5975, "step": 2345 }, { "epoch": 0.3, "grad_norm": 1.1627188920974731, "learning_rate": 8.20084961613808e-06, "loss": 0.6252, "step": 2346 }, { "epoch": 0.3, "grad_norm": 0.9727002382278442, "learning_rate": 8.199255234713068e-06, "loss": 0.5488, "step": 2347 }, { "epoch": 0.3, "grad_norm": 1.367080807685852, "learning_rate": 8.197660302281371e-06, "loss": 0.59, "step": 2348 }, { "epoch": 0.3, "grad_norm": 1.4162267446517944, "learning_rate": 8.196064819117681e-06, "loss": 0.6033, "step": 2349 }, { "epoch": 0.3, "grad_norm": 1.5463422536849976, "learning_rate": 8.194468785496788e-06, "loss": 0.6141, "step": 2350 }, { "epoch": 0.3, "grad_norm": 1.220804214477539, "learning_rate": 8.192872201693575e-06, "loss": 0.6256, "step": 2351 }, { "epoch": 0.3, "grad_norm": 1.263001799583435, "learning_rate": 8.191275067983026e-06, "loss": 0.6838, "step": 2352 }, { "epoch": 0.3, "grad_norm": 0.9969754815101624, "learning_rate": 8.189677384640212e-06, "loss": 0.6114, "step": 2353 }, { "epoch": 0.3, "grad_norm": 1.2792723178863525, "learning_rate": 8.188079151940299e-06, "loss": 0.6809, "step": 2354 }, { "epoch": 0.3, "grad_norm": 1.3698155879974365, "learning_rate": 8.186480370158552e-06, "loss": 0.6168, "step": 2355 }, { "epoch": 0.3, "grad_norm": 1.3380119800567627, "learning_rate": 8.18488103957033e-06, "loss": 0.6018, "step": 2356 }, { "epoch": 0.3, "grad_norm": 1.2656079530715942, "learning_rate": 8.183281160451083e-06, "loss": 0.5869, "step": 2357 }, { "epoch": 0.3, "grad_norm": 1.461011290550232, "learning_rate": 8.181680733076359e-06, "loss": 0.5784, "step": 2358 }, { "epoch": 0.3, "grad_norm": 1.3184751272201538, "learning_rate": 8.180079757721799e-06, "loss": 0.6599, "step": 2359 }, { "epoch": 0.3, "grad_norm": 1.169732689857483, "learning_rate": 8.178478234663139e-06, "loss": 0.6553, "step": 2360 }, { "epoch": 0.3, "grad_norm": 1.2968778610229492, "learning_rate": 8.176876164176206e-06, "loss": 0.669, "step": 2361 }, { "epoch": 0.3, "grad_norm": 1.166947603225708, "learning_rate": 8.175273546536929e-06, "loss": 0.6507, "step": 2362 }, { "epoch": 0.3, "grad_norm": 1.2829275131225586, "learning_rate": 8.17367038202132e-06, "loss": 0.5774, "step": 2363 }, { "epoch": 0.3, "grad_norm": 1.1029894351959229, "learning_rate": 8.172066670905498e-06, "loss": 0.6448, "step": 2364 }, { "epoch": 0.3, "grad_norm": 1.3434944152832031, "learning_rate": 8.170462413465666e-06, "loss": 0.6137, "step": 2365 }, { "epoch": 0.3, "grad_norm": 1.5390658378601074, "learning_rate": 8.168857609978125e-06, "loss": 0.647, "step": 2366 }, { "epoch": 0.3, "grad_norm": 1.3299118280410767, "learning_rate": 8.16725226071927e-06, "loss": 0.6634, "step": 2367 }, { "epoch": 0.3, "grad_norm": 1.6107890605926514, "learning_rate": 8.165646365965589e-06, "loss": 0.5927, "step": 2368 }, { "epoch": 0.3, "grad_norm": 1.2683249711990356, "learning_rate": 8.164039925993667e-06, "loss": 0.6726, "step": 2369 }, { "epoch": 0.3, "grad_norm": 1.6438405513763428, "learning_rate": 8.162432941080178e-06, "loss": 0.619, "step": 2370 }, { "epoch": 0.3, "grad_norm": 1.1879276037216187, "learning_rate": 8.160825411501896e-06, "loss": 0.5904, "step": 2371 }, { "epoch": 0.3, "grad_norm": 1.1308679580688477, "learning_rate": 8.159217337535682e-06, "loss": 0.6671, "step": 2372 }, { "epoch": 0.3, "grad_norm": 1.3544384241104126, "learning_rate": 8.157608719458493e-06, "loss": 0.5496, "step": 2373 }, { "epoch": 0.3, "grad_norm": 6.0907440185546875, "learning_rate": 8.155999557547384e-06, "loss": 0.6856, "step": 2374 }, { "epoch": 0.3, "grad_norm": 1.3868075609207153, "learning_rate": 8.154389852079501e-06, "loss": 0.6667, "step": 2375 }, { "epoch": 0.3, "grad_norm": 1.0029851198196411, "learning_rate": 8.15277960333208e-06, "loss": 0.6324, "step": 2376 }, { "epoch": 0.3, "grad_norm": 1.873964548110962, "learning_rate": 8.151168811582455e-06, "loss": 0.6404, "step": 2377 }, { "epoch": 0.3, "grad_norm": 1.0714986324310303, "learning_rate": 8.149557477108051e-06, "loss": 0.6066, "step": 2378 }, { "epoch": 0.3, "grad_norm": 1.5956631898880005, "learning_rate": 8.147945600186391e-06, "loss": 0.6414, "step": 2379 }, { "epoch": 0.3, "grad_norm": 1.2539238929748535, "learning_rate": 8.146333181095086e-06, "loss": 0.5592, "step": 2380 }, { "epoch": 0.31, "grad_norm": 1.2881016731262207, "learning_rate": 8.14472022011184e-06, "loss": 0.5886, "step": 2381 }, { "epoch": 0.31, "grad_norm": 3.226897954940796, "learning_rate": 8.143106717514455e-06, "loss": 0.7155, "step": 2382 }, { "epoch": 0.31, "grad_norm": 1.4589011669158936, "learning_rate": 8.141492673580825e-06, "loss": 0.7016, "step": 2383 }, { "epoch": 0.31, "grad_norm": 1.2750200033187866, "learning_rate": 8.139878088588934e-06, "loss": 0.6133, "step": 2384 }, { "epoch": 0.31, "grad_norm": 1.2676771879196167, "learning_rate": 8.138262962816865e-06, "loss": 0.6297, "step": 2385 }, { "epoch": 0.31, "grad_norm": 1.572501301765442, "learning_rate": 8.136647296542787e-06, "loss": 0.6115, "step": 2386 }, { "epoch": 0.31, "grad_norm": 1.544345736503601, "learning_rate": 8.135031090044966e-06, "loss": 0.6698, "step": 2387 }, { "epoch": 0.31, "grad_norm": 1.247854232788086, "learning_rate": 8.133414343601762e-06, "loss": 0.6356, "step": 2388 }, { "epoch": 0.31, "grad_norm": 1.2449547052383423, "learning_rate": 8.131797057491627e-06, "loss": 0.6794, "step": 2389 }, { "epoch": 0.31, "grad_norm": 1.5108448266983032, "learning_rate": 8.130179231993105e-06, "loss": 0.6564, "step": 2390 }, { "epoch": 0.31, "grad_norm": 1.2895339727401733, "learning_rate": 8.128560867384832e-06, "loss": 0.5596, "step": 2391 }, { "epoch": 0.31, "grad_norm": 1.3409855365753174, "learning_rate": 8.126941963945541e-06, "loss": 0.6147, "step": 2392 }, { "epoch": 0.31, "grad_norm": 1.2308881282806396, "learning_rate": 8.125322521954055e-06, "loss": 0.5847, "step": 2393 }, { "epoch": 0.31, "grad_norm": 1.4125090837478638, "learning_rate": 8.12370254168929e-06, "loss": 0.6779, "step": 2394 }, { "epoch": 0.31, "grad_norm": 1.1895586252212524, "learning_rate": 8.122082023430251e-06, "loss": 0.7764, "step": 2395 }, { "epoch": 0.31, "grad_norm": 1.083396077156067, "learning_rate": 8.120460967456043e-06, "loss": 0.6141, "step": 2396 }, { "epoch": 0.31, "grad_norm": 1.1598103046417236, "learning_rate": 8.118839374045861e-06, "loss": 0.612, "step": 2397 }, { "epoch": 0.31, "grad_norm": 1.1906448602676392, "learning_rate": 8.117217243478988e-06, "loss": 0.6277, "step": 2398 }, { "epoch": 0.31, "grad_norm": 1.1712722778320312, "learning_rate": 8.115594576034804e-06, "loss": 0.7004, "step": 2399 }, { "epoch": 0.31, "grad_norm": 1.3918662071228027, "learning_rate": 8.113971371992782e-06, "loss": 0.5189, "step": 2400 }, { "epoch": 0.31, "grad_norm": 1.191245436668396, "learning_rate": 8.112347631632484e-06, "loss": 0.6203, "step": 2401 }, { "epoch": 0.31, "grad_norm": 1.500771403312683, "learning_rate": 8.11072335523357e-06, "loss": 0.5052, "step": 2402 }, { "epoch": 0.31, "grad_norm": 1.3032631874084473, "learning_rate": 8.109098543075784e-06, "loss": 0.6056, "step": 2403 }, { "epoch": 0.31, "grad_norm": 1.1832846403121948, "learning_rate": 8.107473195438969e-06, "loss": 0.6169, "step": 2404 }, { "epoch": 0.31, "grad_norm": 1.2408926486968994, "learning_rate": 8.105847312603057e-06, "loss": 0.5983, "step": 2405 }, { "epoch": 0.31, "grad_norm": 1.0566129684448242, "learning_rate": 8.104220894848073e-06, "loss": 0.6529, "step": 2406 }, { "epoch": 0.31, "grad_norm": 1.521499752998352, "learning_rate": 8.102593942454138e-06, "loss": 0.6247, "step": 2407 }, { "epoch": 0.31, "grad_norm": 1.2609105110168457, "learning_rate": 8.100966455701458e-06, "loss": 0.551, "step": 2408 }, { "epoch": 0.31, "grad_norm": 1.3678559064865112, "learning_rate": 8.099338434870336e-06, "loss": 0.6309, "step": 2409 }, { "epoch": 0.31, "grad_norm": 1.0221734046936035, "learning_rate": 8.097709880241165e-06, "loss": 0.5807, "step": 2410 }, { "epoch": 0.31, "grad_norm": 1.6654701232910156, "learning_rate": 8.09608079209443e-06, "loss": 0.6289, "step": 2411 }, { "epoch": 0.31, "grad_norm": 1.2061890363693237, "learning_rate": 8.094451170710708e-06, "loss": 0.6315, "step": 2412 }, { "epoch": 0.31, "grad_norm": 1.0013035535812378, "learning_rate": 8.09282101637067e-06, "loss": 0.5325, "step": 2413 }, { "epoch": 0.31, "grad_norm": 3.319288730621338, "learning_rate": 8.091190329355076e-06, "loss": 0.6432, "step": 2414 }, { "epoch": 0.31, "grad_norm": 1.1914440393447876, "learning_rate": 8.089559109944777e-06, "loss": 0.5529, "step": 2415 }, { "epoch": 0.31, "grad_norm": 1.162363886833191, "learning_rate": 8.087927358420723e-06, "loss": 0.6628, "step": 2416 }, { "epoch": 0.31, "grad_norm": 1.818101406097412, "learning_rate": 8.086295075063942e-06, "loss": 0.6276, "step": 2417 }, { "epoch": 0.31, "grad_norm": 1.2434170246124268, "learning_rate": 8.084662260155567e-06, "loss": 0.6482, "step": 2418 }, { "epoch": 0.31, "grad_norm": 0.9897717833518982, "learning_rate": 8.083028913976816e-06, "loss": 0.6063, "step": 2419 }, { "epoch": 0.31, "grad_norm": 1.3943613767623901, "learning_rate": 8.081395036808999e-06, "loss": 0.6482, "step": 2420 }, { "epoch": 0.31, "grad_norm": 1.2112592458724976, "learning_rate": 8.079760628933518e-06, "loss": 0.6589, "step": 2421 }, { "epoch": 0.31, "grad_norm": 1.2801096439361572, "learning_rate": 8.078125690631868e-06, "loss": 0.5905, "step": 2422 }, { "epoch": 0.31, "grad_norm": 1.4379853010177612, "learning_rate": 8.076490222185631e-06, "loss": 0.5817, "step": 2423 }, { "epoch": 0.31, "grad_norm": 1.9312883615493774, "learning_rate": 8.074854223876487e-06, "loss": 0.6578, "step": 2424 }, { "epoch": 0.31, "grad_norm": 3.0586395263671875, "learning_rate": 8.073217695986203e-06, "loss": 0.5962, "step": 2425 }, { "epoch": 0.31, "grad_norm": 2.1898674964904785, "learning_rate": 8.071580638796634e-06, "loss": 0.6595, "step": 2426 }, { "epoch": 0.31, "grad_norm": 1.2980766296386719, "learning_rate": 8.069943052589734e-06, "loss": 0.6066, "step": 2427 }, { "epoch": 0.31, "grad_norm": 1.2575178146362305, "learning_rate": 8.068304937647542e-06, "loss": 0.5541, "step": 2428 }, { "epoch": 0.31, "grad_norm": 1.1975334882736206, "learning_rate": 8.066666294252189e-06, "loss": 0.6744, "step": 2429 }, { "epoch": 0.31, "grad_norm": 1.5488165616989136, "learning_rate": 8.0650271226859e-06, "loss": 0.6803, "step": 2430 }, { "epoch": 0.31, "grad_norm": 1.2515982389450073, "learning_rate": 8.063387423230987e-06, "loss": 0.6455, "step": 2431 }, { "epoch": 0.31, "grad_norm": 1.3792338371276855, "learning_rate": 8.061747196169855e-06, "loss": 0.5893, "step": 2432 }, { "epoch": 0.31, "grad_norm": 1.6035951375961304, "learning_rate": 8.060106441785003e-06, "loss": 0.5758, "step": 2433 }, { "epoch": 0.31, "grad_norm": 2.494800090789795, "learning_rate": 8.058465160359016e-06, "loss": 0.6529, "step": 2434 }, { "epoch": 0.31, "grad_norm": 1.2441433668136597, "learning_rate": 8.05682335217457e-06, "loss": 0.5876, "step": 2435 }, { "epoch": 0.31, "grad_norm": 1.5442471504211426, "learning_rate": 8.055181017514432e-06, "loss": 0.6346, "step": 2436 }, { "epoch": 0.31, "grad_norm": 1.3310129642486572, "learning_rate": 8.053538156661465e-06, "loss": 0.6639, "step": 2437 }, { "epoch": 0.31, "grad_norm": 1.222756028175354, "learning_rate": 8.051894769898615e-06, "loss": 0.6473, "step": 2438 }, { "epoch": 0.31, "grad_norm": 1.3063236474990845, "learning_rate": 8.050250857508923e-06, "loss": 0.7353, "step": 2439 }, { "epoch": 0.31, "grad_norm": 1.1706254482269287, "learning_rate": 8.04860641977552e-06, "loss": 0.6611, "step": 2440 }, { "epoch": 0.31, "grad_norm": 1.5362704992294312, "learning_rate": 8.046961456981625e-06, "loss": 0.6244, "step": 2441 }, { "epoch": 0.31, "grad_norm": 1.1532371044158936, "learning_rate": 8.045315969410551e-06, "loss": 0.6642, "step": 2442 }, { "epoch": 0.31, "grad_norm": 1.4653925895690918, "learning_rate": 8.043669957345701e-06, "loss": 0.6008, "step": 2443 }, { "epoch": 0.31, "grad_norm": 2.7584710121154785, "learning_rate": 8.042023421070566e-06, "loss": 0.6796, "step": 2444 }, { "epoch": 0.31, "grad_norm": 1.38533616065979, "learning_rate": 8.040376360868727e-06, "loss": 0.7932, "step": 2445 }, { "epoch": 0.31, "grad_norm": 2.1703217029571533, "learning_rate": 8.038728777023858e-06, "loss": 0.5941, "step": 2446 }, { "epoch": 0.31, "grad_norm": 4.146225452423096, "learning_rate": 8.037080669819723e-06, "loss": 0.6693, "step": 2447 }, { "epoch": 0.31, "grad_norm": 1.3640509843826294, "learning_rate": 8.035432039540172e-06, "loss": 0.7157, "step": 2448 }, { "epoch": 0.31, "grad_norm": 1.1412841081619263, "learning_rate": 8.03378288646915e-06, "loss": 0.4853, "step": 2449 }, { "epoch": 0.31, "grad_norm": 1.9124466180801392, "learning_rate": 8.03213321089069e-06, "loss": 0.5493, "step": 2450 }, { "epoch": 0.31, "grad_norm": 1.4567307233810425, "learning_rate": 8.030483013088913e-06, "loss": 0.6635, "step": 2451 }, { "epoch": 0.31, "grad_norm": 1.292675495147705, "learning_rate": 8.028832293348036e-06, "loss": 0.7629, "step": 2452 }, { "epoch": 0.31, "grad_norm": 1.2411820888519287, "learning_rate": 8.027181051952359e-06, "loss": 0.6375, "step": 2453 }, { "epoch": 0.31, "grad_norm": 1.1143420934677124, "learning_rate": 8.025529289186276e-06, "loss": 0.5945, "step": 2454 }, { "epoch": 0.31, "grad_norm": 1.4121794700622559, "learning_rate": 8.023877005334268e-06, "loss": 0.5983, "step": 2455 }, { "epoch": 0.31, "grad_norm": 1.5333738327026367, "learning_rate": 8.022224200680911e-06, "loss": 0.5918, "step": 2456 }, { "epoch": 0.31, "grad_norm": 1.6830614805221558, "learning_rate": 8.020570875510863e-06, "loss": 0.5903, "step": 2457 }, { "epoch": 0.31, "grad_norm": 1.3144832849502563, "learning_rate": 8.018917030108874e-06, "loss": 0.665, "step": 2458 }, { "epoch": 0.32, "grad_norm": 3.2713377475738525, "learning_rate": 8.017262664759793e-06, "loss": 0.6369, "step": 2459 }, { "epoch": 0.32, "grad_norm": 1.5499030351638794, "learning_rate": 8.015607779748544e-06, "loss": 0.5759, "step": 2460 }, { "epoch": 0.32, "grad_norm": 1.8569972515106201, "learning_rate": 8.01395237536015e-06, "loss": 0.6703, "step": 2461 }, { "epoch": 0.32, "grad_norm": 1.4563158750534058, "learning_rate": 8.01229645187972e-06, "loss": 0.6818, "step": 2462 }, { "epoch": 0.32, "grad_norm": 1.1788994073867798, "learning_rate": 8.010640009592454e-06, "loss": 0.6173, "step": 2463 }, { "epoch": 0.32, "grad_norm": 1.48537015914917, "learning_rate": 8.008983048783639e-06, "loss": 0.5923, "step": 2464 }, { "epoch": 0.32, "grad_norm": 1.9768660068511963, "learning_rate": 8.007325569738654e-06, "loss": 0.6231, "step": 2465 }, { "epoch": 0.32, "grad_norm": 1.359007716178894, "learning_rate": 8.005667572742964e-06, "loss": 0.6858, "step": 2466 }, { "epoch": 0.32, "grad_norm": 1.6291216611862183, "learning_rate": 8.00400905808213e-06, "loss": 0.6691, "step": 2467 }, { "epoch": 0.32, "grad_norm": 1.2007826566696167, "learning_rate": 8.002350026041792e-06, "loss": 0.6349, "step": 2468 }, { "epoch": 0.32, "grad_norm": 1.9453998804092407, "learning_rate": 8.000690476907688e-06, "loss": 0.5912, "step": 2469 }, { "epoch": 0.32, "grad_norm": 1.2745121717453003, "learning_rate": 7.999030410965642e-06, "loss": 0.6899, "step": 2470 }, { "epoch": 0.32, "grad_norm": 1.5417898893356323, "learning_rate": 7.997369828501565e-06, "loss": 0.5747, "step": 2471 }, { "epoch": 0.32, "grad_norm": 1.322919249534607, "learning_rate": 7.995708729801459e-06, "loss": 0.5912, "step": 2472 }, { "epoch": 0.32, "grad_norm": 2.1955883502960205, "learning_rate": 7.994047115151414e-06, "loss": 0.5515, "step": 2473 }, { "epoch": 0.32, "grad_norm": 1.2016857862472534, "learning_rate": 7.992384984837608e-06, "loss": 0.5988, "step": 2474 }, { "epoch": 0.32, "grad_norm": 1.1391526460647583, "learning_rate": 7.99072233914631e-06, "loss": 0.7175, "step": 2475 }, { "epoch": 0.32, "grad_norm": 1.383262276649475, "learning_rate": 7.98905917836388e-06, "loss": 0.5499, "step": 2476 }, { "epoch": 0.32, "grad_norm": 1.3291974067687988, "learning_rate": 7.987395502776762e-06, "loss": 0.5684, "step": 2477 }, { "epoch": 0.32, "grad_norm": 1.3303680419921875, "learning_rate": 7.98573131267149e-06, "loss": 0.6121, "step": 2478 }, { "epoch": 0.32, "grad_norm": 1.3416320085525513, "learning_rate": 7.984066608334684e-06, "loss": 0.58, "step": 2479 }, { "epoch": 0.32, "grad_norm": 1.2517801523208618, "learning_rate": 7.982401390053061e-06, "loss": 0.6141, "step": 2480 }, { "epoch": 0.32, "grad_norm": 1.685667634010315, "learning_rate": 7.980735658113416e-06, "loss": 0.5259, "step": 2481 }, { "epoch": 0.32, "grad_norm": 1.3155723810195923, "learning_rate": 7.97906941280264e-06, "loss": 0.5923, "step": 2482 }, { "epoch": 0.32, "grad_norm": 1.6275346279144287, "learning_rate": 7.97740265440771e-06, "loss": 0.6981, "step": 2483 }, { "epoch": 0.32, "grad_norm": 1.539910078048706, "learning_rate": 7.975735383215691e-06, "loss": 0.7108, "step": 2484 }, { "epoch": 0.32, "grad_norm": 1.0558339357376099, "learning_rate": 7.974067599513737e-06, "loss": 0.6255, "step": 2485 }, { "epoch": 0.32, "grad_norm": 1.4272794723510742, "learning_rate": 7.972399303589087e-06, "loss": 0.646, "step": 2486 }, { "epoch": 0.32, "grad_norm": 1.5255314111709595, "learning_rate": 7.970730495729075e-06, "loss": 0.6481, "step": 2487 }, { "epoch": 0.32, "grad_norm": 1.7075724601745605, "learning_rate": 7.969061176221118e-06, "loss": 0.6343, "step": 2488 }, { "epoch": 0.32, "grad_norm": 1.2317755222320557, "learning_rate": 7.96739134535272e-06, "loss": 0.7467, "step": 2489 }, { "epoch": 0.32, "grad_norm": 1.4913558959960938, "learning_rate": 7.965721003411477e-06, "loss": 0.6367, "step": 2490 }, { "epoch": 0.32, "grad_norm": 1.5637904405593872, "learning_rate": 7.964050150685075e-06, "loss": 0.6279, "step": 2491 }, { "epoch": 0.32, "grad_norm": 1.2378253936767578, "learning_rate": 7.962378787461278e-06, "loss": 0.6074, "step": 2492 }, { "epoch": 0.32, "grad_norm": 1.2458785772323608, "learning_rate": 7.960706914027947e-06, "loss": 0.6126, "step": 2493 }, { "epoch": 0.32, "grad_norm": 1.1778990030288696, "learning_rate": 7.95903453067303e-06, "loss": 0.6499, "step": 2494 }, { "epoch": 0.32, "grad_norm": 1.3699263334274292, "learning_rate": 7.95736163768456e-06, "loss": 0.6635, "step": 2495 }, { "epoch": 0.32, "grad_norm": 1.1903271675109863, "learning_rate": 7.955688235350659e-06, "loss": 0.6467, "step": 2496 }, { "epoch": 0.32, "grad_norm": 0.9940861463546753, "learning_rate": 7.954014323959535e-06, "loss": 0.5876, "step": 2497 }, { "epoch": 0.32, "grad_norm": 1.2723509073257446, "learning_rate": 7.952339903799486e-06, "loss": 0.6072, "step": 2498 }, { "epoch": 0.32, "grad_norm": 1.031148076057434, "learning_rate": 7.950664975158898e-06, "loss": 0.6026, "step": 2499 }, { "epoch": 0.32, "grad_norm": 1.7933772802352905, "learning_rate": 7.948989538326241e-06, "loss": 0.7027, "step": 2500 }, { "epoch": 0.32, "grad_norm": 1.2486510276794434, "learning_rate": 7.947313593590078e-06, "loss": 0.5948, "step": 2501 }, { "epoch": 0.32, "grad_norm": 1.5051732063293457, "learning_rate": 7.945637141239054e-06, "loss": 0.6531, "step": 2502 }, { "epoch": 0.32, "grad_norm": 1.544805884361267, "learning_rate": 7.943960181561905e-06, "loss": 0.6543, "step": 2503 }, { "epoch": 0.32, "grad_norm": 1.2853175401687622, "learning_rate": 7.942282714847453e-06, "loss": 0.6587, "step": 2504 }, { "epoch": 0.32, "grad_norm": 1.3751949071884155, "learning_rate": 7.940604741384607e-06, "loss": 0.5903, "step": 2505 }, { "epoch": 0.32, "grad_norm": 1.3573917150497437, "learning_rate": 7.938926261462366e-06, "loss": 0.6358, "step": 2506 }, { "epoch": 0.32, "grad_norm": 0.9741413593292236, "learning_rate": 7.937247275369813e-06, "loss": 0.61, "step": 2507 }, { "epoch": 0.32, "grad_norm": 1.3532620668411255, "learning_rate": 7.935567783396116e-06, "loss": 0.5756, "step": 2508 }, { "epoch": 0.32, "grad_norm": 1.2103369235992432, "learning_rate": 7.933887785830536e-06, "loss": 0.6417, "step": 2509 }, { "epoch": 0.32, "grad_norm": 1.3352247476577759, "learning_rate": 7.93220728296242e-06, "loss": 0.6468, "step": 2510 }, { "epoch": 0.32, "grad_norm": 1.1334822177886963, "learning_rate": 7.930526275081198e-06, "loss": 0.629, "step": 2511 }, { "epoch": 0.32, "grad_norm": 1.796984314918518, "learning_rate": 7.92884476247639e-06, "loss": 0.637, "step": 2512 }, { "epoch": 0.32, "grad_norm": 1.3944237232208252, "learning_rate": 7.927162745437605e-06, "loss": 0.602, "step": 2513 }, { "epoch": 0.32, "grad_norm": 1.1634482145309448, "learning_rate": 7.92548022425453e-06, "loss": 0.7354, "step": 2514 }, { "epoch": 0.32, "grad_norm": 1.0452791452407837, "learning_rate": 7.923797199216952e-06, "loss": 0.5221, "step": 2515 }, { "epoch": 0.32, "grad_norm": 1.141053557395935, "learning_rate": 7.922113670614733e-06, "loss": 0.6103, "step": 2516 }, { "epoch": 0.32, "grad_norm": 1.371309518814087, "learning_rate": 7.92042963873783e-06, "loss": 0.5625, "step": 2517 }, { "epoch": 0.32, "grad_norm": 1.4770622253417969, "learning_rate": 7.91874510387628e-06, "loss": 0.6978, "step": 2518 }, { "epoch": 0.32, "grad_norm": 1.2017892599105835, "learning_rate": 7.917060066320213e-06, "loss": 0.5845, "step": 2519 }, { "epoch": 0.32, "grad_norm": 1.2020307779312134, "learning_rate": 7.91537452635984e-06, "loss": 0.5964, "step": 2520 }, { "epoch": 0.32, "grad_norm": 1.0251448154449463, "learning_rate": 7.913688484285462e-06, "loss": 0.6008, "step": 2521 }, { "epoch": 0.32, "grad_norm": 1.8153685331344604, "learning_rate": 7.912001940387466e-06, "loss": 0.736, "step": 2522 }, { "epoch": 0.32, "grad_norm": 2.3994553089141846, "learning_rate": 7.910314894956326e-06, "loss": 0.6798, "step": 2523 }, { "epoch": 0.32, "grad_norm": 1.541852593421936, "learning_rate": 7.908627348282599e-06, "loss": 0.6723, "step": 2524 }, { "epoch": 0.32, "grad_norm": 1.0899004936218262, "learning_rate": 7.906939300656929e-06, "loss": 0.6825, "step": 2525 }, { "epoch": 0.32, "grad_norm": 1.5158523321151733, "learning_rate": 7.905250752370051e-06, "loss": 0.5933, "step": 2526 }, { "epoch": 0.32, "grad_norm": 1.0821444988250732, "learning_rate": 7.903561703712784e-06, "loss": 0.5961, "step": 2527 }, { "epoch": 0.32, "grad_norm": 1.3640822172164917, "learning_rate": 7.90187215497603e-06, "loss": 0.6481, "step": 2528 }, { "epoch": 0.32, "grad_norm": 1.0577113628387451, "learning_rate": 7.900182106450778e-06, "loss": 0.6615, "step": 2529 }, { "epoch": 0.32, "grad_norm": 1.1909829378128052, "learning_rate": 7.898491558428108e-06, "loss": 0.6597, "step": 2530 }, { "epoch": 0.32, "grad_norm": 1.1416188478469849, "learning_rate": 7.896800511199182e-06, "loss": 0.7269, "step": 2531 }, { "epoch": 0.32, "grad_norm": 1.210830807685852, "learning_rate": 7.895108965055247e-06, "loss": 0.6678, "step": 2532 }, { "epoch": 0.32, "grad_norm": 0.9497708678245544, "learning_rate": 7.893416920287638e-06, "loss": 0.5491, "step": 2533 }, { "epoch": 0.32, "grad_norm": 1.2646074295043945, "learning_rate": 7.891724377187774e-06, "loss": 0.5835, "step": 2534 }, { "epoch": 0.32, "grad_norm": 1.4362199306488037, "learning_rate": 7.890031336047166e-06, "loss": 0.6361, "step": 2535 }, { "epoch": 0.32, "grad_norm": 1.1478391885757446, "learning_rate": 7.8883377971574e-06, "loss": 0.6115, "step": 2536 }, { "epoch": 0.33, "grad_norm": 1.1613198518753052, "learning_rate": 7.886643760810156e-06, "loss": 0.6171, "step": 2537 }, { "epoch": 0.33, "grad_norm": 1.247196912765503, "learning_rate": 7.884949227297199e-06, "loss": 0.6298, "step": 2538 }, { "epoch": 0.33, "grad_norm": 1.3772872686386108, "learning_rate": 7.883254196910375e-06, "loss": 0.5992, "step": 2539 }, { "epoch": 0.33, "grad_norm": 1.6356278657913208, "learning_rate": 7.88155866994162e-06, "loss": 0.6667, "step": 2540 }, { "epoch": 0.33, "grad_norm": 1.2067662477493286, "learning_rate": 7.879862646682955e-06, "loss": 0.6492, "step": 2541 }, { "epoch": 0.33, "grad_norm": 1.1001278162002563, "learning_rate": 7.878166127426483e-06, "loss": 0.6088, "step": 2542 }, { "epoch": 0.33, "grad_norm": 0.9857989549636841, "learning_rate": 7.876469112464395e-06, "loss": 0.6095, "step": 2543 }, { "epoch": 0.33, "grad_norm": 1.3550429344177246, "learning_rate": 7.874771602088971e-06, "loss": 0.6157, "step": 2544 }, { "epoch": 0.33, "grad_norm": 0.9245561361312866, "learning_rate": 7.873073596592571e-06, "loss": 0.5456, "step": 2545 }, { "epoch": 0.33, "grad_norm": 1.329111099243164, "learning_rate": 7.871375096267641e-06, "loss": 0.6259, "step": 2546 }, { "epoch": 0.33, "grad_norm": 1.0523360967636108, "learning_rate": 7.869676101406713e-06, "loss": 0.6267, "step": 2547 }, { "epoch": 0.33, "grad_norm": 1.33540940284729, "learning_rate": 7.867976612302405e-06, "loss": 0.6859, "step": 2548 }, { "epoch": 0.33, "grad_norm": 1.6070891618728638, "learning_rate": 7.86627662924742e-06, "loss": 0.6134, "step": 2549 }, { "epoch": 0.33, "grad_norm": 1.088706374168396, "learning_rate": 7.864576152534544e-06, "loss": 0.6603, "step": 2550 }, { "epoch": 0.33, "grad_norm": 1.3226513862609863, "learning_rate": 7.862875182456652e-06, "loss": 0.5793, "step": 2551 }, { "epoch": 0.33, "grad_norm": 1.0518193244934082, "learning_rate": 7.861173719306697e-06, "loss": 0.5619, "step": 2552 }, { "epoch": 0.33, "grad_norm": 1.0934220552444458, "learning_rate": 7.859471763377726e-06, "loss": 0.6339, "step": 2553 }, { "epoch": 0.33, "grad_norm": 1.7915741205215454, "learning_rate": 7.857769314962865e-06, "loss": 0.6733, "step": 2554 }, { "epoch": 0.33, "grad_norm": 1.3046362400054932, "learning_rate": 7.856066374355326e-06, "loss": 0.6231, "step": 2555 }, { "epoch": 0.33, "grad_norm": 1.4739090204238892, "learning_rate": 7.854362941848406e-06, "loss": 0.6874, "step": 2556 }, { "epoch": 0.33, "grad_norm": 1.2384624481201172, "learning_rate": 7.852659017735484e-06, "loss": 0.642, "step": 2557 }, { "epoch": 0.33, "grad_norm": 0.99631267786026, "learning_rate": 7.850954602310032e-06, "loss": 0.5691, "step": 2558 }, { "epoch": 0.33, "grad_norm": 1.1139394044876099, "learning_rate": 7.849249695865595e-06, "loss": 0.6083, "step": 2559 }, { "epoch": 0.33, "grad_norm": 1.2851378917694092, "learning_rate": 7.847544298695812e-06, "loss": 0.7062, "step": 2560 }, { "epoch": 0.33, "grad_norm": 1.2008907794952393, "learning_rate": 7.845838411094403e-06, "loss": 0.5856, "step": 2561 }, { "epoch": 0.33, "grad_norm": 1.2814170122146606, "learning_rate": 7.844132033355169e-06, "loss": 0.6046, "step": 2562 }, { "epoch": 0.33, "grad_norm": 1.5419236421585083, "learning_rate": 7.842425165772003e-06, "loss": 0.6394, "step": 2563 }, { "epoch": 0.33, "grad_norm": 1.3178516626358032, "learning_rate": 7.840717808638878e-06, "loss": 0.5834, "step": 2564 }, { "epoch": 0.33, "grad_norm": 1.137864112854004, "learning_rate": 7.839009962249849e-06, "loss": 0.7019, "step": 2565 }, { "epoch": 0.33, "grad_norm": 1.5136849880218506, "learning_rate": 7.837301626899059e-06, "loss": 0.6362, "step": 2566 }, { "epoch": 0.33, "grad_norm": 1.5012842416763306, "learning_rate": 7.835592802880733e-06, "loss": 0.6163, "step": 2567 }, { "epoch": 0.33, "grad_norm": 1.2922011613845825, "learning_rate": 7.833883490489183e-06, "loss": 0.6593, "step": 2568 }, { "epoch": 0.33, "grad_norm": 1.0815269947052002, "learning_rate": 7.832173690018803e-06, "loss": 0.641, "step": 2569 }, { "epoch": 0.33, "grad_norm": 1.347912073135376, "learning_rate": 7.83046340176407e-06, "loss": 0.5536, "step": 2570 }, { "epoch": 0.33, "grad_norm": 1.4981647729873657, "learning_rate": 7.82875262601955e-06, "loss": 0.7163, "step": 2571 }, { "epoch": 0.33, "grad_norm": 1.0309983491897583, "learning_rate": 7.827041363079884e-06, "loss": 0.6477, "step": 2572 }, { "epoch": 0.33, "grad_norm": 1.9935951232910156, "learning_rate": 7.825329613239804e-06, "loss": 0.663, "step": 2573 }, { "epoch": 0.33, "grad_norm": 1.2904274463653564, "learning_rate": 7.823617376794128e-06, "loss": 0.5916, "step": 2574 }, { "epoch": 0.33, "grad_norm": 1.2341314554214478, "learning_rate": 7.82190465403775e-06, "loss": 0.6036, "step": 2575 }, { "epoch": 0.33, "grad_norm": 1.3546459674835205, "learning_rate": 7.820191445265653e-06, "loss": 0.6393, "step": 2576 }, { "epoch": 0.33, "grad_norm": 1.1507740020751953, "learning_rate": 7.818477750772901e-06, "loss": 0.6983, "step": 2577 }, { "epoch": 0.33, "grad_norm": 3.3480865955352783, "learning_rate": 7.816763570854644e-06, "loss": 0.6082, "step": 2578 }, { "epoch": 0.33, "grad_norm": 1.4413642883300781, "learning_rate": 7.815048905806116e-06, "loss": 0.6031, "step": 2579 }, { "epoch": 0.33, "grad_norm": 1.2350773811340332, "learning_rate": 7.813333755922631e-06, "loss": 0.6398, "step": 2580 }, { "epoch": 0.33, "grad_norm": 2.1799776554107666, "learning_rate": 7.811618121499591e-06, "loss": 0.6729, "step": 2581 }, { "epoch": 0.33, "grad_norm": 1.3736685514450073, "learning_rate": 7.809902002832477e-06, "loss": 0.6516, "step": 2582 }, { "epoch": 0.33, "grad_norm": 1.426729679107666, "learning_rate": 7.808185400216857e-06, "loss": 0.6552, "step": 2583 }, { "epoch": 0.33, "grad_norm": 1.6423767805099487, "learning_rate": 7.806468313948379e-06, "loss": 0.6658, "step": 2584 }, { "epoch": 0.33, "grad_norm": 1.1850903034210205, "learning_rate": 7.804750744322777e-06, "loss": 0.4809, "step": 2585 }, { "epoch": 0.33, "grad_norm": 1.1379947662353516, "learning_rate": 7.80303269163587e-06, "loss": 0.5726, "step": 2586 }, { "epoch": 0.33, "grad_norm": 1.541176438331604, "learning_rate": 7.801314156183554e-06, "loss": 0.5963, "step": 2587 }, { "epoch": 0.33, "grad_norm": 1.38937509059906, "learning_rate": 7.799595138261815e-06, "loss": 0.5938, "step": 2588 }, { "epoch": 0.33, "grad_norm": 1.0433942079544067, "learning_rate": 7.797875638166717e-06, "loss": 0.7079, "step": 2589 }, { "epoch": 0.33, "grad_norm": 1.3225352764129639, "learning_rate": 7.796155656194409e-06, "loss": 0.6547, "step": 2590 }, { "epoch": 0.33, "grad_norm": 1.3787798881530762, "learning_rate": 7.794435192641124e-06, "loss": 0.6904, "step": 2591 }, { "epoch": 0.33, "grad_norm": 1.0460058450698853, "learning_rate": 7.792714247803174e-06, "loss": 0.5865, "step": 2592 }, { "epoch": 0.33, "grad_norm": 1.1611684560775757, "learning_rate": 7.79099282197696e-06, "loss": 0.7466, "step": 2593 }, { "epoch": 0.33, "grad_norm": 1.1869133710861206, "learning_rate": 7.789270915458962e-06, "loss": 0.6646, "step": 2594 }, { "epoch": 0.33, "grad_norm": 1.3539026975631714, "learning_rate": 7.787548528545743e-06, "loss": 0.5528, "step": 2595 }, { "epoch": 0.33, "grad_norm": 1.1651065349578857, "learning_rate": 7.785825661533947e-06, "loss": 0.6363, "step": 2596 }, { "epoch": 0.33, "grad_norm": 1.2617387771606445, "learning_rate": 7.784102314720307e-06, "loss": 0.669, "step": 2597 }, { "epoch": 0.33, "grad_norm": 1.5752850770950317, "learning_rate": 7.782378488401632e-06, "loss": 0.5657, "step": 2598 }, { "epoch": 0.33, "grad_norm": 1.5625871419906616, "learning_rate": 7.780654182874816e-06, "loss": 0.6113, "step": 2599 }, { "epoch": 0.33, "grad_norm": 1.0509943962097168, "learning_rate": 7.778929398436835e-06, "loss": 0.6342, "step": 2600 }, { "epoch": 0.33, "grad_norm": 1.2715318202972412, "learning_rate": 7.777204135384749e-06, "loss": 0.5642, "step": 2601 }, { "epoch": 0.33, "grad_norm": 1.5984022617340088, "learning_rate": 7.7754783940157e-06, "loss": 0.6316, "step": 2602 }, { "epoch": 0.33, "grad_norm": 1.0083099603652954, "learning_rate": 7.773752174626911e-06, "loss": 0.639, "step": 2603 }, { "epoch": 0.33, "grad_norm": 1.0958874225616455, "learning_rate": 7.77202547751569e-06, "loss": 0.6832, "step": 2604 }, { "epoch": 0.33, "grad_norm": 1.1940189599990845, "learning_rate": 7.770298302979422e-06, "loss": 0.6686, "step": 2605 }, { "epoch": 0.33, "grad_norm": 1.1370244026184082, "learning_rate": 7.768570651315582e-06, "loss": 0.5631, "step": 2606 }, { "epoch": 0.33, "grad_norm": 1.4601707458496094, "learning_rate": 7.766842522821718e-06, "loss": 0.6766, "step": 2607 }, { "epoch": 0.33, "grad_norm": 0.9551748633384705, "learning_rate": 7.76511391779547e-06, "loss": 0.5999, "step": 2608 }, { "epoch": 0.33, "grad_norm": 1.3081096410751343, "learning_rate": 7.76338483653455e-06, "loss": 0.5324, "step": 2609 }, { "epoch": 0.33, "grad_norm": 1.218382716178894, "learning_rate": 7.761655279336762e-06, "loss": 0.6574, "step": 2610 }, { "epoch": 0.33, "grad_norm": 1.2414915561676025, "learning_rate": 7.759925246499984e-06, "loss": 0.6467, "step": 2611 }, { "epoch": 0.33, "grad_norm": 1.1774640083312988, "learning_rate": 7.75819473832218e-06, "loss": 0.6357, "step": 2612 }, { "epoch": 0.33, "grad_norm": 1.1158899068832397, "learning_rate": 7.756463755101395e-06, "loss": 0.5574, "step": 2613 }, { "epoch": 0.33, "grad_norm": 1.0917646884918213, "learning_rate": 7.754732297135756e-06, "loss": 0.61, "step": 2614 }, { "epoch": 0.34, "grad_norm": 1.4236118793487549, "learning_rate": 7.753000364723471e-06, "loss": 0.6406, "step": 2615 }, { "epoch": 0.34, "grad_norm": 1.2008647918701172, "learning_rate": 7.75126795816283e-06, "loss": 0.6095, "step": 2616 }, { "epoch": 0.34, "grad_norm": 2.6084930896759033, "learning_rate": 7.749535077752204e-06, "loss": 0.5763, "step": 2617 }, { "epoch": 0.34, "grad_norm": 1.2119183540344238, "learning_rate": 7.747801723790046e-06, "loss": 0.5449, "step": 2618 }, { "epoch": 0.34, "grad_norm": 1.2325401306152344, "learning_rate": 7.746067896574893e-06, "loss": 0.7414, "step": 2619 }, { "epoch": 0.34, "grad_norm": 1.1843703985214233, "learning_rate": 7.744333596405363e-06, "loss": 0.6557, "step": 2620 }, { "epoch": 0.34, "grad_norm": 1.0715175867080688, "learning_rate": 7.742598823580149e-06, "loss": 0.63, "step": 2621 }, { "epoch": 0.34, "grad_norm": 2.040466070175171, "learning_rate": 7.740863578398033e-06, "loss": 0.5549, "step": 2622 }, { "epoch": 0.34, "grad_norm": 1.321418285369873, "learning_rate": 7.739127861157878e-06, "loss": 0.6138, "step": 2623 }, { "epoch": 0.34, "grad_norm": 1.3999861478805542, "learning_rate": 7.737391672158621e-06, "loss": 0.5579, "step": 2624 }, { "epoch": 0.34, "grad_norm": 1.2321044206619263, "learning_rate": 7.73565501169929e-06, "loss": 0.6219, "step": 2625 }, { "epoch": 0.34, "grad_norm": 1.1841264963150024, "learning_rate": 7.733917880078988e-06, "loss": 0.5434, "step": 2626 }, { "epoch": 0.34, "grad_norm": 1.1282161474227905, "learning_rate": 7.732180277596899e-06, "loss": 0.5728, "step": 2627 }, { "epoch": 0.34, "grad_norm": 1.2175527811050415, "learning_rate": 7.730442204552292e-06, "loss": 0.6357, "step": 2628 }, { "epoch": 0.34, "grad_norm": 1.1946020126342773, "learning_rate": 7.72870366124451e-06, "loss": 0.6907, "step": 2629 }, { "epoch": 0.34, "grad_norm": 1.3675916194915771, "learning_rate": 7.726964647972987e-06, "loss": 0.7276, "step": 2630 }, { "epoch": 0.34, "grad_norm": 1.191794753074646, "learning_rate": 7.725225165037233e-06, "loss": 0.6401, "step": 2631 }, { "epoch": 0.34, "grad_norm": 1.3735675811767578, "learning_rate": 7.723485212736835e-06, "loss": 0.7791, "step": 2632 }, { "epoch": 0.34, "grad_norm": 1.5447452068328857, "learning_rate": 7.721744791371466e-06, "loss": 0.7008, "step": 2633 }, { "epoch": 0.34, "grad_norm": 2.7708098888397217, "learning_rate": 7.72000390124088e-06, "loss": 0.7301, "step": 2634 }, { "epoch": 0.34, "grad_norm": 1.259595513343811, "learning_rate": 7.718262542644906e-06, "loss": 0.6117, "step": 2635 }, { "epoch": 0.34, "grad_norm": 1.14170503616333, "learning_rate": 7.716520715883463e-06, "loss": 0.6366, "step": 2636 }, { "epoch": 0.34, "grad_norm": 1.3256770372390747, "learning_rate": 7.714778421256538e-06, "loss": 0.6714, "step": 2637 }, { "epoch": 0.34, "grad_norm": 1.6231776475906372, "learning_rate": 7.71303565906421e-06, "loss": 0.5758, "step": 2638 }, { "epoch": 0.34, "grad_norm": 1.823754906654358, "learning_rate": 7.711292429606635e-06, "loss": 0.6082, "step": 2639 }, { "epoch": 0.34, "grad_norm": 1.233652114868164, "learning_rate": 7.70954873318405e-06, "loss": 0.613, "step": 2640 }, { "epoch": 0.34, "grad_norm": 1.0892643928527832, "learning_rate": 7.707804570096769e-06, "loss": 0.5392, "step": 2641 }, { "epoch": 0.34, "grad_norm": 1.5985088348388672, "learning_rate": 7.706059940645187e-06, "loss": 0.6543, "step": 2642 }, { "epoch": 0.34, "grad_norm": 1.1690950393676758, "learning_rate": 7.704314845129785e-06, "loss": 0.6247, "step": 2643 }, { "epoch": 0.34, "grad_norm": 1.276746392250061, "learning_rate": 7.702569283851117e-06, "loss": 0.6876, "step": 2644 }, { "epoch": 0.34, "grad_norm": 1.1838765144348145, "learning_rate": 7.700823257109821e-06, "loss": 0.5706, "step": 2645 }, { "epoch": 0.34, "grad_norm": 1.1753582954406738, "learning_rate": 7.699076765206617e-06, "loss": 0.7981, "step": 2646 }, { "epoch": 0.34, "grad_norm": 1.190260410308838, "learning_rate": 7.6973298084423e-06, "loss": 0.6326, "step": 2647 }, { "epoch": 0.34, "grad_norm": 1.8324419260025024, "learning_rate": 7.695582387117749e-06, "loss": 0.678, "step": 2648 }, { "epoch": 0.34, "grad_norm": 1.1149442195892334, "learning_rate": 7.693834501533921e-06, "loss": 0.6484, "step": 2649 }, { "epoch": 0.34, "grad_norm": 1.1948319673538208, "learning_rate": 7.692086151991855e-06, "loss": 0.5729, "step": 2650 }, { "epoch": 0.34, "grad_norm": 1.7144792079925537, "learning_rate": 7.690337338792667e-06, "loss": 0.683, "step": 2651 }, { "epoch": 0.34, "grad_norm": 1.9441317319869995, "learning_rate": 7.688588062237557e-06, "loss": 0.6081, "step": 2652 }, { "epoch": 0.34, "grad_norm": 1.2331782579421997, "learning_rate": 7.6868383226278e-06, "loss": 0.6088, "step": 2653 }, { "epoch": 0.34, "grad_norm": 1.6664066314697266, "learning_rate": 7.685088120264754e-06, "loss": 0.6171, "step": 2654 }, { "epoch": 0.34, "grad_norm": 1.3156743049621582, "learning_rate": 7.683337455449856e-06, "loss": 0.6272, "step": 2655 }, { "epoch": 0.34, "grad_norm": 1.1555067300796509, "learning_rate": 7.681586328484621e-06, "loss": 0.5953, "step": 2656 }, { "epoch": 0.34, "grad_norm": 1.3859845399856567, "learning_rate": 7.679834739670649e-06, "loss": 0.634, "step": 2657 }, { "epoch": 0.34, "grad_norm": 2.6940677165985107, "learning_rate": 7.67808268930961e-06, "loss": 0.6804, "step": 2658 }, { "epoch": 0.34, "grad_norm": 1.2090493440628052, "learning_rate": 7.676330177703262e-06, "loss": 0.6196, "step": 2659 }, { "epoch": 0.34, "grad_norm": 1.1433385610580444, "learning_rate": 7.674577205153441e-06, "loss": 0.6498, "step": 2660 }, { "epoch": 0.34, "grad_norm": 1.3377647399902344, "learning_rate": 7.672823771962059e-06, "loss": 0.5986, "step": 2661 }, { "epoch": 0.34, "grad_norm": 1.0680303573608398, "learning_rate": 7.671069878431107e-06, "loss": 0.5319, "step": 2662 }, { "epoch": 0.34, "grad_norm": 1.3417402505874634, "learning_rate": 7.669315524862662e-06, "loss": 0.6823, "step": 2663 }, { "epoch": 0.34, "grad_norm": 1.365355372428894, "learning_rate": 7.667560711558875e-06, "loss": 0.6342, "step": 2664 }, { "epoch": 0.34, "grad_norm": 1.3211275339126587, "learning_rate": 7.665805438821973e-06, "loss": 0.6097, "step": 2665 }, { "epoch": 0.34, "grad_norm": 1.580230474472046, "learning_rate": 7.664049706954271e-06, "loss": 0.6559, "step": 2666 }, { "epoch": 0.34, "grad_norm": 1.0732578039169312, "learning_rate": 7.662293516258154e-06, "loss": 0.5693, "step": 2667 }, { "epoch": 0.34, "grad_norm": 1.8319809436798096, "learning_rate": 7.660536867036092e-06, "loss": 0.6538, "step": 2668 }, { "epoch": 0.34, "grad_norm": 1.520002007484436, "learning_rate": 7.658779759590634e-06, "loss": 0.5934, "step": 2669 }, { "epoch": 0.34, "grad_norm": 1.2555063962936401, "learning_rate": 7.657022194224402e-06, "loss": 0.6547, "step": 2670 }, { "epoch": 0.34, "grad_norm": 2.653493642807007, "learning_rate": 7.655264171240105e-06, "loss": 0.6805, "step": 2671 }, { "epoch": 0.34, "grad_norm": 1.030552625656128, "learning_rate": 7.653505690940522e-06, "loss": 0.5611, "step": 2672 }, { "epoch": 0.34, "grad_norm": 1.2763005495071411, "learning_rate": 7.65174675362852e-06, "loss": 0.6387, "step": 2673 }, { "epoch": 0.34, "grad_norm": 1.7102898359298706, "learning_rate": 7.649987359607039e-06, "loss": 0.6756, "step": 2674 }, { "epoch": 0.34, "grad_norm": 1.2029314041137695, "learning_rate": 7.648227509179095e-06, "loss": 0.7813, "step": 2675 }, { "epoch": 0.34, "grad_norm": 1.1546415090560913, "learning_rate": 7.646467202647794e-06, "loss": 0.6681, "step": 2676 }, { "epoch": 0.34, "grad_norm": 1.1073461771011353, "learning_rate": 7.644706440316308e-06, "loss": 0.6578, "step": 2677 }, { "epoch": 0.34, "grad_norm": 1.2768898010253906, "learning_rate": 7.642945222487892e-06, "loss": 0.7158, "step": 2678 }, { "epoch": 0.34, "grad_norm": 1.2239513397216797, "learning_rate": 7.641183549465881e-06, "loss": 0.6005, "step": 2679 }, { "epoch": 0.34, "grad_norm": 1.0863401889801025, "learning_rate": 7.639421421553687e-06, "loss": 0.6159, "step": 2680 }, { "epoch": 0.34, "grad_norm": 1.596559762954712, "learning_rate": 7.637658839054805e-06, "loss": 0.6288, "step": 2681 }, { "epoch": 0.34, "grad_norm": 1.3041114807128906, "learning_rate": 7.635895802272796e-06, "loss": 0.5878, "step": 2682 }, { "epoch": 0.34, "grad_norm": 1.0410126447677612, "learning_rate": 7.634132311511316e-06, "loss": 0.5978, "step": 2683 }, { "epoch": 0.34, "grad_norm": 0.9961788654327393, "learning_rate": 7.632368367074083e-06, "loss": 0.5053, "step": 2684 }, { "epoch": 0.34, "grad_norm": 1.2974720001220703, "learning_rate": 7.630603969264905e-06, "loss": 0.6118, "step": 2685 }, { "epoch": 0.34, "grad_norm": 1.1592649221420288, "learning_rate": 7.628839118387662e-06, "loss": 0.6189, "step": 2686 }, { "epoch": 0.34, "grad_norm": 1.093314528465271, "learning_rate": 7.627073814746315e-06, "loss": 0.6362, "step": 2687 }, { "epoch": 0.34, "grad_norm": 1.052543044090271, "learning_rate": 7.625308058644898e-06, "loss": 0.6143, "step": 2688 }, { "epoch": 0.34, "grad_norm": 1.3054975271224976, "learning_rate": 7.623541850387531e-06, "loss": 0.6378, "step": 2689 }, { "epoch": 0.34, "grad_norm": 1.0570430755615234, "learning_rate": 7.621775190278407e-06, "loss": 0.5995, "step": 2690 }, { "epoch": 0.34, "grad_norm": 1.4440031051635742, "learning_rate": 7.620008078621793e-06, "loss": 0.6158, "step": 2691 }, { "epoch": 0.34, "grad_norm": 1.3761212825775146, "learning_rate": 7.618240515722044e-06, "loss": 0.5676, "step": 2692 }, { "epoch": 0.35, "grad_norm": 1.1331719160079956, "learning_rate": 7.616472501883583e-06, "loss": 0.6331, "step": 2693 }, { "epoch": 0.35, "grad_norm": 1.8078550100326538, "learning_rate": 7.614704037410915e-06, "loss": 0.5285, "step": 2694 }, { "epoch": 0.35, "grad_norm": 1.2421823740005493, "learning_rate": 7.6129351226086225e-06, "loss": 0.7476, "step": 2695 }, { "epoch": 0.35, "grad_norm": 1.6171449422836304, "learning_rate": 7.6111657577813644e-06, "loss": 0.6853, "step": 2696 }, { "epoch": 0.35, "grad_norm": 2.045614719390869, "learning_rate": 7.609395943233877e-06, "loss": 0.6106, "step": 2697 }, { "epoch": 0.35, "grad_norm": 1.2099361419677734, "learning_rate": 7.6076256792709805e-06, "loss": 0.5939, "step": 2698 }, { "epoch": 0.35, "grad_norm": 1.3107006549835205, "learning_rate": 7.605854966197559e-06, "loss": 0.6542, "step": 2699 }, { "epoch": 0.35, "grad_norm": 1.4774034023284912, "learning_rate": 7.604083804318587e-06, "loss": 0.6179, "step": 2700 }, { "epoch": 0.35, "grad_norm": 1.5129690170288086, "learning_rate": 7.60231219393911e-06, "loss": 0.6467, "step": 2701 }, { "epoch": 0.35, "grad_norm": 1.1982402801513672, "learning_rate": 7.600540135364252e-06, "loss": 0.6278, "step": 2702 }, { "epoch": 0.35, "grad_norm": 1.2928173542022705, "learning_rate": 7.598767628899213e-06, "loss": 0.7124, "step": 2703 }, { "epoch": 0.35, "grad_norm": 1.1297682523727417, "learning_rate": 7.596994674849272e-06, "loss": 0.5936, "step": 2704 }, { "epoch": 0.35, "grad_norm": 1.0973412990570068, "learning_rate": 7.595221273519784e-06, "loss": 0.7179, "step": 2705 }, { "epoch": 0.35, "grad_norm": 1.5251829624176025, "learning_rate": 7.59344742521618e-06, "loss": 0.6319, "step": 2706 }, { "epoch": 0.35, "grad_norm": 1.0643885135650635, "learning_rate": 7.591673130243973e-06, "loss": 0.6339, "step": 2707 }, { "epoch": 0.35, "grad_norm": 1.0274592638015747, "learning_rate": 7.589898388908745e-06, "loss": 0.6236, "step": 2708 }, { "epoch": 0.35, "grad_norm": 1.272946834564209, "learning_rate": 7.588123201516164e-06, "loss": 0.6329, "step": 2709 }, { "epoch": 0.35, "grad_norm": 1.2281945943832397, "learning_rate": 7.5863475683719655e-06, "loss": 0.5959, "step": 2710 }, { "epoch": 0.35, "grad_norm": 1.1847566366195679, "learning_rate": 7.584571489781968e-06, "loss": 0.5932, "step": 2711 }, { "epoch": 0.35, "grad_norm": 1.0989586114883423, "learning_rate": 7.582794966052064e-06, "loss": 0.6339, "step": 2712 }, { "epoch": 0.35, "grad_norm": 1.114815354347229, "learning_rate": 7.581017997488225e-06, "loss": 0.5454, "step": 2713 }, { "epoch": 0.35, "grad_norm": 2.860734224319458, "learning_rate": 7.579240584396497e-06, "loss": 0.5895, "step": 2714 }, { "epoch": 0.35, "grad_norm": 1.2328461408615112, "learning_rate": 7.577462727083002e-06, "loss": 0.6297, "step": 2715 }, { "epoch": 0.35, "grad_norm": 1.1195461750030518, "learning_rate": 7.575684425853944e-06, "loss": 0.5487, "step": 2716 }, { "epoch": 0.35, "grad_norm": 1.1979864835739136, "learning_rate": 7.573905681015594e-06, "loss": 0.6874, "step": 2717 }, { "epoch": 0.35, "grad_norm": 1.1667231321334839, "learning_rate": 7.57212649287431e-06, "loss": 0.5755, "step": 2718 }, { "epoch": 0.35, "grad_norm": 2.0767219066619873, "learning_rate": 7.570346861736515e-06, "loss": 0.6358, "step": 2719 }, { "epoch": 0.35, "grad_norm": 1.3232417106628418, "learning_rate": 7.568566787908719e-06, "loss": 0.5362, "step": 2720 }, { "epoch": 0.35, "grad_norm": 1.3619623184204102, "learning_rate": 7.5667862716975e-06, "loss": 0.6142, "step": 2721 }, { "epoch": 0.35, "grad_norm": 1.1791231632232666, "learning_rate": 7.565005313409519e-06, "loss": 0.5849, "step": 2722 }, { "epoch": 0.35, "grad_norm": 1.304181694984436, "learning_rate": 7.563223913351507e-06, "loss": 0.6559, "step": 2723 }, { "epoch": 0.35, "grad_norm": 1.1328728199005127, "learning_rate": 7.561442071830279e-06, "loss": 0.5632, "step": 2724 }, { "epoch": 0.35, "grad_norm": 1.1923437118530273, "learning_rate": 7.559659789152714e-06, "loss": 0.5817, "step": 2725 }, { "epoch": 0.35, "grad_norm": 1.4104726314544678, "learning_rate": 7.557877065625778e-06, "loss": 0.7269, "step": 2726 }, { "epoch": 0.35, "grad_norm": 1.1886721849441528, "learning_rate": 7.5560939015565084e-06, "loss": 0.524, "step": 2727 }, { "epoch": 0.35, "grad_norm": 1.6470292806625366, "learning_rate": 7.554310297252019e-06, "loss": 0.6261, "step": 2728 }, { "epoch": 0.35, "grad_norm": 1.882947564125061, "learning_rate": 7.552526253019499e-06, "loss": 0.6656, "step": 2729 }, { "epoch": 0.35, "grad_norm": 1.4039462804794312, "learning_rate": 7.550741769166215e-06, "loss": 0.6302, "step": 2730 }, { "epoch": 0.35, "grad_norm": 2.5081090927124023, "learning_rate": 7.548956845999504e-06, "loss": 0.652, "step": 2731 }, { "epoch": 0.35, "grad_norm": 1.2422338724136353, "learning_rate": 7.547171483826788e-06, "loss": 0.6617, "step": 2732 }, { "epoch": 0.35, "grad_norm": 1.51920485496521, "learning_rate": 7.545385682955558e-06, "loss": 0.5829, "step": 2733 }, { "epoch": 0.35, "grad_norm": 1.3961273431777954, "learning_rate": 7.543599443693379e-06, "loss": 0.6472, "step": 2734 }, { "epoch": 0.35, "grad_norm": 1.0059709548950195, "learning_rate": 7.541812766347898e-06, "loss": 0.6372, "step": 2735 }, { "epoch": 0.35, "grad_norm": 1.0821688175201416, "learning_rate": 7.540025651226832e-06, "loss": 0.618, "step": 2736 }, { "epoch": 0.35, "grad_norm": 1.056602954864502, "learning_rate": 7.538238098637976e-06, "loss": 0.5796, "step": 2737 }, { "epoch": 0.35, "grad_norm": 1.2009097337722778, "learning_rate": 7.536450108889197e-06, "loss": 0.5918, "step": 2738 }, { "epoch": 0.35, "grad_norm": 1.1832275390625, "learning_rate": 7.534661682288444e-06, "loss": 0.6911, "step": 2739 }, { "epoch": 0.35, "grad_norm": 1.2712024450302124, "learning_rate": 7.532872819143735e-06, "loss": 0.57, "step": 2740 }, { "epoch": 0.35, "grad_norm": 1.5780545473098755, "learning_rate": 7.531083519763164e-06, "loss": 0.5392, "step": 2741 }, { "epoch": 0.35, "grad_norm": 1.6948215961456299, "learning_rate": 7.5292937844549045e-06, "loss": 0.6263, "step": 2742 }, { "epoch": 0.35, "grad_norm": 1.145394206047058, "learning_rate": 7.527503613527198e-06, "loss": 0.5929, "step": 2743 }, { "epoch": 0.35, "grad_norm": 1.0543674230575562, "learning_rate": 7.525713007288371e-06, "loss": 0.5308, "step": 2744 }, { "epoch": 0.35, "grad_norm": 1.533777117729187, "learning_rate": 7.523921966046813e-06, "loss": 0.6327, "step": 2745 }, { "epoch": 0.35, "grad_norm": 1.6052230596542358, "learning_rate": 7.522130490110999e-06, "loss": 0.6528, "step": 2746 }, { "epoch": 0.35, "grad_norm": 1.4751596450805664, "learning_rate": 7.52033857978947e-06, "loss": 0.6004, "step": 2747 }, { "epoch": 0.35, "grad_norm": 1.4990054368972778, "learning_rate": 7.51854623539085e-06, "loss": 0.5579, "step": 2748 }, { "epoch": 0.35, "grad_norm": 1.8240008354187012, "learning_rate": 7.516753457223831e-06, "loss": 0.6331, "step": 2749 }, { "epoch": 0.35, "grad_norm": 1.3702977895736694, "learning_rate": 7.514960245597185e-06, "loss": 0.6204, "step": 2750 }, { "epoch": 0.35, "grad_norm": 1.5224536657333374, "learning_rate": 7.5131666008197546e-06, "loss": 0.6065, "step": 2751 }, { "epoch": 0.35, "grad_norm": 1.307070016860962, "learning_rate": 7.51137252320046e-06, "loss": 0.7614, "step": 2752 }, { "epoch": 0.35, "grad_norm": 1.2971265316009521, "learning_rate": 7.509578013048293e-06, "loss": 0.5903, "step": 2753 }, { "epoch": 0.35, "grad_norm": 1.0652785301208496, "learning_rate": 7.507783070672323e-06, "loss": 0.6088, "step": 2754 }, { "epoch": 0.35, "grad_norm": 1.1220684051513672, "learning_rate": 7.505987696381692e-06, "loss": 0.6447, "step": 2755 }, { "epoch": 0.35, "grad_norm": 1.2321696281433105, "learning_rate": 7.504191890485615e-06, "loss": 0.6243, "step": 2756 }, { "epoch": 0.35, "grad_norm": 1.2906885147094727, "learning_rate": 7.502395653293385e-06, "loss": 0.6823, "step": 2757 }, { "epoch": 0.35, "grad_norm": 1.1404194831848145, "learning_rate": 7.500598985114367e-06, "loss": 0.54, "step": 2758 }, { "epoch": 0.35, "grad_norm": 1.316081166267395, "learning_rate": 7.498801886258001e-06, "loss": 0.597, "step": 2759 }, { "epoch": 0.35, "grad_norm": 1.2815004587173462, "learning_rate": 7.497004357033799e-06, "loss": 0.6086, "step": 2760 }, { "epoch": 0.35, "grad_norm": 1.0035674571990967, "learning_rate": 7.495206397751351e-06, "loss": 0.6706, "step": 2761 }, { "epoch": 0.35, "grad_norm": 2.31266188621521, "learning_rate": 7.493408008720317e-06, "loss": 0.5874, "step": 2762 }, { "epoch": 0.35, "grad_norm": 1.275524377822876, "learning_rate": 7.491609190250435e-06, "loss": 0.5634, "step": 2763 }, { "epoch": 0.35, "grad_norm": 1.0364302396774292, "learning_rate": 7.489809942651514e-06, "loss": 0.5669, "step": 2764 }, { "epoch": 0.35, "grad_norm": 1.401902198791504, "learning_rate": 7.488010266233436e-06, "loss": 0.6698, "step": 2765 }, { "epoch": 0.35, "grad_norm": 1.1657154560089111, "learning_rate": 7.486210161306161e-06, "loss": 0.585, "step": 2766 }, { "epoch": 0.35, "grad_norm": 1.433221697807312, "learning_rate": 7.48440962817972e-06, "loss": 0.5722, "step": 2767 }, { "epoch": 0.35, "grad_norm": 1.1307737827301025, "learning_rate": 7.482608667164218e-06, "loss": 0.6006, "step": 2768 }, { "epoch": 0.35, "grad_norm": 1.26017165184021, "learning_rate": 7.480807278569835e-06, "loss": 0.7052, "step": 2769 }, { "epoch": 0.35, "grad_norm": 1.2488316297531128, "learning_rate": 7.479005462706821e-06, "loss": 0.5859, "step": 2770 }, { "epoch": 0.36, "grad_norm": 1.1443097591400146, "learning_rate": 7.477203219885505e-06, "loss": 0.632, "step": 2771 }, { "epoch": 0.36, "grad_norm": 1.0260684490203857, "learning_rate": 7.4754005504162855e-06, "loss": 0.6807, "step": 2772 }, { "epoch": 0.36, "grad_norm": 1.0291800498962402, "learning_rate": 7.4735974546096335e-06, "loss": 0.6448, "step": 2773 }, { "epoch": 0.36, "grad_norm": 1.8409909009933472, "learning_rate": 7.4717939327761005e-06, "loss": 0.6562, "step": 2774 }, { "epoch": 0.36, "grad_norm": 1.0962021350860596, "learning_rate": 7.4699899852263e-06, "loss": 0.6475, "step": 2775 }, { "epoch": 0.36, "grad_norm": 1.6759026050567627, "learning_rate": 7.468185612270932e-06, "loss": 0.6217, "step": 2776 }, { "epoch": 0.36, "grad_norm": 1.2551426887512207, "learning_rate": 7.466380814220758e-06, "loss": 0.6965, "step": 2777 }, { "epoch": 0.36, "grad_norm": 1.2845110893249512, "learning_rate": 7.464575591386623e-06, "loss": 0.6527, "step": 2778 }, { "epoch": 0.36, "grad_norm": 1.096612572669983, "learning_rate": 7.462769944079433e-06, "loss": 0.6857, "step": 2779 }, { "epoch": 0.36, "grad_norm": 1.328682780265808, "learning_rate": 7.460963872610181e-06, "loss": 0.6129, "step": 2780 }, { "epoch": 0.36, "grad_norm": 1.3041599988937378, "learning_rate": 7.45915737728992e-06, "loss": 0.6, "step": 2781 }, { "epoch": 0.36, "grad_norm": 1.4073518514633179, "learning_rate": 7.457350458429788e-06, "loss": 0.697, "step": 2782 }, { "epoch": 0.36, "grad_norm": 1.2230504751205444, "learning_rate": 7.455543116340985e-06, "loss": 0.665, "step": 2783 }, { "epoch": 0.36, "grad_norm": 1.3002861738204956, "learning_rate": 7.453735351334791e-06, "loss": 0.6305, "step": 2784 }, { "epoch": 0.36, "grad_norm": 1.2461925745010376, "learning_rate": 7.451927163722557e-06, "loss": 0.6243, "step": 2785 }, { "epoch": 0.36, "grad_norm": 1.6128621101379395, "learning_rate": 7.450118553815707e-06, "loss": 0.6508, "step": 2786 }, { "epoch": 0.36, "grad_norm": 1.3560428619384766, "learning_rate": 7.448309521925737e-06, "loss": 0.6631, "step": 2787 }, { "epoch": 0.36, "grad_norm": 1.214652180671692, "learning_rate": 7.4465000683642144e-06, "loss": 0.6243, "step": 2788 }, { "epoch": 0.36, "grad_norm": 2.2300350666046143, "learning_rate": 7.444690193442783e-06, "loss": 0.6499, "step": 2789 }, { "epoch": 0.36, "grad_norm": 1.3689312934875488, "learning_rate": 7.442879897473156e-06, "loss": 0.6187, "step": 2790 }, { "epoch": 0.36, "grad_norm": 1.1015273332595825, "learning_rate": 7.441069180767119e-06, "loss": 0.5566, "step": 2791 }, { "epoch": 0.36, "grad_norm": 1.6342823505401611, "learning_rate": 7.439258043636532e-06, "loss": 0.5791, "step": 2792 }, { "epoch": 0.36, "grad_norm": 1.212996482849121, "learning_rate": 7.437446486393327e-06, "loss": 0.535, "step": 2793 }, { "epoch": 0.36, "grad_norm": 1.1164084672927856, "learning_rate": 7.435634509349509e-06, "loss": 0.7325, "step": 2794 }, { "epoch": 0.36, "grad_norm": 1.421766996383667, "learning_rate": 7.433822112817151e-06, "loss": 0.5267, "step": 2795 }, { "epoch": 0.36, "grad_norm": 1.4409089088439941, "learning_rate": 7.4320092971084044e-06, "loss": 0.5939, "step": 2796 }, { "epoch": 0.36, "grad_norm": 1.0344882011413574, "learning_rate": 7.430196062535488e-06, "loss": 0.5837, "step": 2797 }, { "epoch": 0.36, "grad_norm": 1.0946214199066162, "learning_rate": 7.428382409410697e-06, "loss": 0.5718, "step": 2798 }, { "epoch": 0.36, "grad_norm": 1.3646178245544434, "learning_rate": 7.426568338046394e-06, "loss": 0.5607, "step": 2799 }, { "epoch": 0.36, "grad_norm": 1.1723817586898804, "learning_rate": 7.4247538487550154e-06, "loss": 0.6713, "step": 2800 }, { "epoch": 0.36, "grad_norm": 1.326438546180725, "learning_rate": 7.422938941849073e-06, "loss": 0.6182, "step": 2801 }, { "epoch": 0.36, "grad_norm": 1.0464811325073242, "learning_rate": 7.421123617641145e-06, "loss": 0.5515, "step": 2802 }, { "epoch": 0.36, "grad_norm": 1.1844979524612427, "learning_rate": 7.4193078764438855e-06, "loss": 0.5557, "step": 2803 }, { "epoch": 0.36, "grad_norm": 1.2062512636184692, "learning_rate": 7.417491718570018e-06, "loss": 0.6877, "step": 2804 }, { "epoch": 0.36, "grad_norm": 1.0828931331634521, "learning_rate": 7.41567514433234e-06, "loss": 0.6256, "step": 2805 }, { "epoch": 0.36, "grad_norm": 1.0971540212631226, "learning_rate": 7.413858154043718e-06, "loss": 0.5836, "step": 2806 }, { "epoch": 0.36, "grad_norm": 1.0136876106262207, "learning_rate": 7.412040748017094e-06, "loss": 0.6453, "step": 2807 }, { "epoch": 0.36, "grad_norm": 1.2080310583114624, "learning_rate": 7.410222926565477e-06, "loss": 0.695, "step": 2808 }, { "epoch": 0.36, "grad_norm": 1.1714882850646973, "learning_rate": 7.408404690001949e-06, "loss": 0.6441, "step": 2809 }, { "epoch": 0.36, "grad_norm": 1.214609980583191, "learning_rate": 7.406586038639667e-06, "loss": 0.6754, "step": 2810 }, { "epoch": 0.36, "grad_norm": 1.2446070909500122, "learning_rate": 7.404766972791856e-06, "loss": 0.5658, "step": 2811 }, { "epoch": 0.36, "grad_norm": 1.5085375308990479, "learning_rate": 7.402947492771811e-06, "loss": 0.6991, "step": 2812 }, { "epoch": 0.36, "grad_norm": 2.154221534729004, "learning_rate": 7.4011275988929046e-06, "loss": 0.6379, "step": 2813 }, { "epoch": 0.36, "grad_norm": 1.2423889636993408, "learning_rate": 7.399307291468572e-06, "loss": 0.6439, "step": 2814 }, { "epoch": 0.36, "grad_norm": 1.5794448852539062, "learning_rate": 7.3974865708123276e-06, "loss": 0.6704, "step": 2815 }, { "epoch": 0.36, "grad_norm": 1.1966044902801514, "learning_rate": 7.395665437237751e-06, "loss": 0.6989, "step": 2816 }, { "epoch": 0.36, "grad_norm": 1.102135419845581, "learning_rate": 7.3938438910584964e-06, "loss": 0.6813, "step": 2817 }, { "epoch": 0.36, "grad_norm": 1.2617212533950806, "learning_rate": 7.392021932588289e-06, "loss": 0.6861, "step": 2818 }, { "epoch": 0.36, "grad_norm": 1.1992825269699097, "learning_rate": 7.3901995621409224e-06, "loss": 0.5791, "step": 2819 }, { "epoch": 0.36, "grad_norm": 1.2392593622207642, "learning_rate": 7.3883767800302665e-06, "loss": 0.7588, "step": 2820 }, { "epoch": 0.36, "grad_norm": 1.4153352975845337, "learning_rate": 7.386553586570253e-06, "loss": 0.62, "step": 2821 }, { "epoch": 0.36, "grad_norm": 1.2339427471160889, "learning_rate": 7.384729982074894e-06, "loss": 0.6189, "step": 2822 }, { "epoch": 0.36, "grad_norm": 1.1156781911849976, "learning_rate": 7.382905966858267e-06, "loss": 0.57, "step": 2823 }, { "epoch": 0.36, "grad_norm": 1.5094579458236694, "learning_rate": 7.381081541234522e-06, "loss": 0.6152, "step": 2824 }, { "epoch": 0.36, "grad_norm": 1.3721705675125122, "learning_rate": 7.3792567055178766e-06, "loss": 0.5664, "step": 2825 }, { "epoch": 0.36, "grad_norm": 1.3138307332992554, "learning_rate": 7.377431460022625e-06, "loss": 0.6344, "step": 2826 }, { "epoch": 0.36, "grad_norm": 1.6809388399124146, "learning_rate": 7.375605805063128e-06, "loss": 0.5832, "step": 2827 }, { "epoch": 0.36, "grad_norm": 1.2338505983352661, "learning_rate": 7.373779740953817e-06, "loss": 0.5376, "step": 2828 }, { "epoch": 0.36, "grad_norm": 1.1951864957809448, "learning_rate": 7.371953268009194e-06, "loss": 0.5897, "step": 2829 }, { "epoch": 0.36, "grad_norm": 1.2772202491760254, "learning_rate": 7.370126386543833e-06, "loss": 0.6407, "step": 2830 }, { "epoch": 0.36, "grad_norm": 1.6042275428771973, "learning_rate": 7.368299096872377e-06, "loss": 0.6627, "step": 2831 }, { "epoch": 0.36, "grad_norm": 1.3527517318725586, "learning_rate": 7.366471399309538e-06, "loss": 0.589, "step": 2832 }, { "epoch": 0.36, "grad_norm": 1.1194247007369995, "learning_rate": 7.3646432941701015e-06, "loss": 0.6234, "step": 2833 }, { "epoch": 0.36, "grad_norm": 1.0248486995697021, "learning_rate": 7.362814781768919e-06, "loss": 0.551, "step": 2834 }, { "epoch": 0.36, "grad_norm": 1.1566435098648071, "learning_rate": 7.360985862420919e-06, "loss": 0.6106, "step": 2835 }, { "epoch": 0.36, "grad_norm": 1.258584976196289, "learning_rate": 7.359156536441091e-06, "loss": 0.6558, "step": 2836 }, { "epoch": 0.36, "grad_norm": 1.1506608724594116, "learning_rate": 7.357326804144502e-06, "loss": 0.6423, "step": 2837 }, { "epoch": 0.36, "grad_norm": 1.1835181713104248, "learning_rate": 7.355496665846285e-06, "loss": 0.672, "step": 2838 }, { "epoch": 0.36, "grad_norm": 1.3529503345489502, "learning_rate": 7.353666121861646e-06, "loss": 0.5715, "step": 2839 }, { "epoch": 0.36, "grad_norm": 1.514220952987671, "learning_rate": 7.3518351725058555e-06, "loss": 0.637, "step": 2840 }, { "epoch": 0.36, "grad_norm": 1.547484278678894, "learning_rate": 7.350003818094261e-06, "loss": 0.6553, "step": 2841 }, { "epoch": 0.36, "grad_norm": 2.531700849533081, "learning_rate": 7.348172058942274e-06, "loss": 0.6472, "step": 2842 }, { "epoch": 0.36, "grad_norm": 1.1948038339614868, "learning_rate": 7.346339895365378e-06, "loss": 0.6535, "step": 2843 }, { "epoch": 0.36, "grad_norm": 1.2925516366958618, "learning_rate": 7.344507327679127e-06, "loss": 0.5542, "step": 2844 }, { "epoch": 0.36, "grad_norm": 1.377945899963379, "learning_rate": 7.342674356199141e-06, "loss": 0.687, "step": 2845 }, { "epoch": 0.36, "grad_norm": 1.2123630046844482, "learning_rate": 7.340840981241115e-06, "loss": 0.5739, "step": 2846 }, { "epoch": 0.36, "grad_norm": 1.087882161140442, "learning_rate": 7.339007203120809e-06, "loss": 0.6949, "step": 2847 }, { "epoch": 0.36, "grad_norm": 1.039701223373413, "learning_rate": 7.3371730221540545e-06, "loss": 0.577, "step": 2848 }, { "epoch": 0.36, "grad_norm": 1.535933494567871, "learning_rate": 7.335338438656752e-06, "loss": 0.6101, "step": 2849 }, { "epoch": 0.37, "grad_norm": 1.4305620193481445, "learning_rate": 7.333503452944872e-06, "loss": 0.5939, "step": 2850 }, { "epoch": 0.37, "grad_norm": 1.1246757507324219, "learning_rate": 7.331668065334449e-06, "loss": 0.6691, "step": 2851 }, { "epoch": 0.37, "grad_norm": 1.2173248529434204, "learning_rate": 7.329832276141597e-06, "loss": 0.7914, "step": 2852 }, { "epoch": 0.37, "grad_norm": 1.0413439273834229, "learning_rate": 7.327996085682491e-06, "loss": 0.6105, "step": 2853 }, { "epoch": 0.37, "grad_norm": 1.1989972591400146, "learning_rate": 7.326159494273377e-06, "loss": 0.7641, "step": 2854 }, { "epoch": 0.37, "grad_norm": 1.2141451835632324, "learning_rate": 7.324322502230571e-06, "loss": 0.6149, "step": 2855 }, { "epoch": 0.37, "grad_norm": 1.3219760656356812, "learning_rate": 7.322485109870458e-06, "loss": 0.5544, "step": 2856 }, { "epoch": 0.37, "grad_norm": 1.2472971677780151, "learning_rate": 7.320647317509493e-06, "loss": 0.628, "step": 2857 }, { "epoch": 0.37, "grad_norm": 1.1860524415969849, "learning_rate": 7.318809125464194e-06, "loss": 0.5704, "step": 2858 }, { "epoch": 0.37, "grad_norm": 1.7199898958206177, "learning_rate": 7.316970534051155e-06, "loss": 0.5817, "step": 2859 }, { "epoch": 0.37, "grad_norm": 1.1000601053237915, "learning_rate": 7.315131543587035e-06, "loss": 0.5683, "step": 2860 }, { "epoch": 0.37, "grad_norm": 1.0615863800048828, "learning_rate": 7.3132921543885646e-06, "loss": 0.5399, "step": 2861 }, { "epoch": 0.37, "grad_norm": 1.2328920364379883, "learning_rate": 7.31145236677254e-06, "loss": 0.6103, "step": 2862 }, { "epoch": 0.37, "grad_norm": 1.1900357007980347, "learning_rate": 7.309612181055827e-06, "loss": 0.5467, "step": 2863 }, { "epoch": 0.37, "grad_norm": 1.1210532188415527, "learning_rate": 7.30777159755536e-06, "loss": 0.6925, "step": 2864 }, { "epoch": 0.37, "grad_norm": 1.3341383934020996, "learning_rate": 7.305930616588144e-06, "loss": 0.5621, "step": 2865 }, { "epoch": 0.37, "grad_norm": 1.700534701347351, "learning_rate": 7.304089238471248e-06, "loss": 0.695, "step": 2866 }, { "epoch": 0.37, "grad_norm": 1.206648588180542, "learning_rate": 7.302247463521813e-06, "loss": 0.59, "step": 2867 }, { "epoch": 0.37, "grad_norm": 1.0011547803878784, "learning_rate": 7.300405292057048e-06, "loss": 0.5618, "step": 2868 }, { "epoch": 0.37, "grad_norm": 1.4958430528640747, "learning_rate": 7.298562724394227e-06, "loss": 0.4853, "step": 2869 }, { "epoch": 0.37, "grad_norm": 1.4243090152740479, "learning_rate": 7.296719760850699e-06, "loss": 0.7581, "step": 2870 }, { "epoch": 0.37, "grad_norm": 1.1251380443572998, "learning_rate": 7.294876401743873e-06, "loss": 0.6223, "step": 2871 }, { "epoch": 0.37, "grad_norm": 1.3010609149932861, "learning_rate": 7.293032647391234e-06, "loss": 0.635, "step": 2872 }, { "epoch": 0.37, "grad_norm": 1.1540263891220093, "learning_rate": 7.291188498110328e-06, "loss": 0.611, "step": 2873 }, { "epoch": 0.37, "grad_norm": 2.1613638401031494, "learning_rate": 7.289343954218772e-06, "loss": 0.6606, "step": 2874 }, { "epoch": 0.37, "grad_norm": 1.032372236251831, "learning_rate": 7.287499016034255e-06, "loss": 0.6623, "step": 2875 }, { "epoch": 0.37, "grad_norm": 1.2156139612197876, "learning_rate": 7.285653683874527e-06, "loss": 0.6258, "step": 2876 }, { "epoch": 0.37, "grad_norm": 1.3962489366531372, "learning_rate": 7.2838079580574076e-06, "loss": 0.5691, "step": 2877 }, { "epoch": 0.37, "grad_norm": 1.1535193920135498, "learning_rate": 7.28196183890079e-06, "loss": 0.6384, "step": 2878 }, { "epoch": 0.37, "grad_norm": 1.083708643913269, "learning_rate": 7.2801153267226266e-06, "loss": 0.5573, "step": 2879 }, { "epoch": 0.37, "grad_norm": 1.3152596950531006, "learning_rate": 7.278268421840944e-06, "loss": 0.6413, "step": 2880 }, { "epoch": 0.37, "grad_norm": 1.2211905717849731, "learning_rate": 7.276421124573834e-06, "loss": 0.608, "step": 2881 }, { "epoch": 0.37, "grad_norm": 1.245803713798523, "learning_rate": 7.274573435239454e-06, "loss": 0.555, "step": 2882 }, { "epoch": 0.37, "grad_norm": 1.663479208946228, "learning_rate": 7.272725354156034e-06, "loss": 0.5815, "step": 2883 }, { "epoch": 0.37, "grad_norm": 1.306326150894165, "learning_rate": 7.270876881641864e-06, "loss": 0.5868, "step": 2884 }, { "epoch": 0.37, "grad_norm": 1.315184235572815, "learning_rate": 7.269028018015311e-06, "loss": 0.6078, "step": 2885 }, { "epoch": 0.37, "grad_norm": 1.3038302659988403, "learning_rate": 7.267178763594797e-06, "loss": 0.6068, "step": 2886 }, { "epoch": 0.37, "grad_norm": 1.1452289819717407, "learning_rate": 7.265329118698828e-06, "loss": 0.5734, "step": 2887 }, { "epoch": 0.37, "grad_norm": 1.0434083938598633, "learning_rate": 7.263479083645961e-06, "loss": 0.6097, "step": 2888 }, { "epoch": 0.37, "grad_norm": 1.2132070064544678, "learning_rate": 7.2616286587548305e-06, "loss": 0.5651, "step": 2889 }, { "epoch": 0.37, "grad_norm": 1.0834686756134033, "learning_rate": 7.25977784434413e-06, "loss": 0.6347, "step": 2890 }, { "epoch": 0.37, "grad_norm": 1.3891212940216064, "learning_rate": 7.25792664073263e-06, "loss": 0.5851, "step": 2891 }, { "epoch": 0.37, "grad_norm": 1.2323148250579834, "learning_rate": 7.256075048239158e-06, "loss": 0.7112, "step": 2892 }, { "epoch": 0.37, "grad_norm": 1.1010910272598267, "learning_rate": 7.254223067182618e-06, "loss": 0.6333, "step": 2893 }, { "epoch": 0.37, "grad_norm": 1.0941414833068848, "learning_rate": 7.252370697881971e-06, "loss": 0.5294, "step": 2894 }, { "epoch": 0.37, "grad_norm": 1.3933345079421997, "learning_rate": 7.250517940656253e-06, "loss": 0.6761, "step": 2895 }, { "epoch": 0.37, "grad_norm": 1.3738077878952026, "learning_rate": 7.248664795824565e-06, "loss": 0.6789, "step": 2896 }, { "epoch": 0.37, "grad_norm": 1.0183112621307373, "learning_rate": 7.246811263706069e-06, "loss": 0.5176, "step": 2897 }, { "epoch": 0.37, "grad_norm": 1.2617154121398926, "learning_rate": 7.244957344620003e-06, "loss": 0.7175, "step": 2898 }, { "epoch": 0.37, "grad_norm": 1.2727289199829102, "learning_rate": 7.243103038885664e-06, "loss": 0.6891, "step": 2899 }, { "epoch": 0.37, "grad_norm": 1.2516908645629883, "learning_rate": 7.241248346822421e-06, "loss": 0.6941, "step": 2900 }, { "epoch": 0.37, "grad_norm": 1.441074013710022, "learning_rate": 7.2393932687497015e-06, "loss": 0.6303, "step": 2901 }, { "epoch": 0.37, "grad_norm": 1.305977702140808, "learning_rate": 7.237537804987013e-06, "loss": 0.7342, "step": 2902 }, { "epoch": 0.37, "grad_norm": 1.399827480316162, "learning_rate": 7.2356819558539124e-06, "loss": 0.6366, "step": 2903 }, { "epoch": 0.37, "grad_norm": 1.2844167947769165, "learning_rate": 7.233825721670042e-06, "loss": 0.5527, "step": 2904 }, { "epoch": 0.37, "grad_norm": 1.3156801462173462, "learning_rate": 7.231969102755093e-06, "loss": 0.6165, "step": 2905 }, { "epoch": 0.37, "grad_norm": 1.2262427806854248, "learning_rate": 7.230112099428832e-06, "loss": 0.578, "step": 2906 }, { "epoch": 0.37, "grad_norm": 1.332987904548645, "learning_rate": 7.228254712011091e-06, "loss": 0.5501, "step": 2907 }, { "epoch": 0.37, "grad_norm": 1.1426584720611572, "learning_rate": 7.226396940821767e-06, "loss": 0.5759, "step": 2908 }, { "epoch": 0.37, "grad_norm": 1.636731505393982, "learning_rate": 7.224538786180824e-06, "loss": 0.6399, "step": 2909 }, { "epoch": 0.37, "grad_norm": 1.7168676853179932, "learning_rate": 7.222680248408289e-06, "loss": 0.674, "step": 2910 }, { "epoch": 0.37, "grad_norm": 1.3796486854553223, "learning_rate": 7.22082132782426e-06, "loss": 0.6683, "step": 2911 }, { "epoch": 0.37, "grad_norm": 1.0893101692199707, "learning_rate": 7.218962024748896e-06, "loss": 0.5658, "step": 2912 }, { "epoch": 0.37, "grad_norm": 1.2268693447113037, "learning_rate": 7.217102339502426e-06, "loss": 0.6162, "step": 2913 }, { "epoch": 0.37, "grad_norm": 1.2177835702896118, "learning_rate": 7.215242272405142e-06, "loss": 0.6081, "step": 2914 }, { "epoch": 0.37, "grad_norm": 1.2131762504577637, "learning_rate": 7.213381823777404e-06, "loss": 0.5518, "step": 2915 }, { "epoch": 0.37, "grad_norm": 1.3434334993362427, "learning_rate": 7.2115209939396345e-06, "loss": 0.6446, "step": 2916 }, { "epoch": 0.37, "grad_norm": 1.2754311561584473, "learning_rate": 7.209659783212326e-06, "loss": 0.6655, "step": 2917 }, { "epoch": 0.37, "grad_norm": 1.3804233074188232, "learning_rate": 7.207798191916031e-06, "loss": 0.6211, "step": 2918 }, { "epoch": 0.37, "grad_norm": 1.2941632270812988, "learning_rate": 7.205936220371374e-06, "loss": 0.5307, "step": 2919 }, { "epoch": 0.37, "grad_norm": 1.4709851741790771, "learning_rate": 7.20407386889904e-06, "loss": 0.5958, "step": 2920 }, { "epoch": 0.37, "grad_norm": 1.3792892694473267, "learning_rate": 7.202211137819781e-06, "loss": 0.6584, "step": 2921 }, { "epoch": 0.37, "grad_norm": 1.2968083620071411, "learning_rate": 7.2003480274544156e-06, "loss": 0.5921, "step": 2922 }, { "epoch": 0.37, "grad_norm": 1.2979252338409424, "learning_rate": 7.198484538123826e-06, "loss": 0.6601, "step": 2923 }, { "epoch": 0.37, "grad_norm": 1.3300632238388062, "learning_rate": 7.196620670148961e-06, "loss": 0.7856, "step": 2924 }, { "epoch": 0.37, "grad_norm": 1.1163116693496704, "learning_rate": 7.194756423850833e-06, "loss": 0.6127, "step": 2925 }, { "epoch": 0.37, "grad_norm": 1.256772518157959, "learning_rate": 7.192891799550522e-06, "loss": 0.5977, "step": 2926 }, { "epoch": 0.37, "grad_norm": 1.180251955986023, "learning_rate": 7.19102679756917e-06, "loss": 0.6473, "step": 2927 }, { "epoch": 0.38, "grad_norm": 1.2117708921432495, "learning_rate": 7.189161418227987e-06, "loss": 0.6191, "step": 2928 }, { "epoch": 0.38, "grad_norm": 1.150450587272644, "learning_rate": 7.187295661848243e-06, "loss": 0.7571, "step": 2929 }, { "epoch": 0.38, "grad_norm": 1.3137658834457397, "learning_rate": 7.185429528751285e-06, "loss": 0.5918, "step": 2930 }, { "epoch": 0.38, "grad_norm": 1.3359402418136597, "learning_rate": 7.1835630192585085e-06, "loss": 0.5988, "step": 2931 }, { "epoch": 0.38, "grad_norm": 1.3789992332458496, "learning_rate": 7.1816961336913835e-06, "loss": 0.6197, "step": 2932 }, { "epoch": 0.38, "grad_norm": 1.2217789888381958, "learning_rate": 7.179828872371446e-06, "loss": 0.5909, "step": 2933 }, { "epoch": 0.38, "grad_norm": 1.4157116413116455, "learning_rate": 7.17796123562029e-06, "loss": 0.5482, "step": 2934 }, { "epoch": 0.38, "grad_norm": 1.2854241132736206, "learning_rate": 7.176093223759581e-06, "loss": 0.5544, "step": 2935 }, { "epoch": 0.38, "grad_norm": 1.2791540622711182, "learning_rate": 7.174224837111044e-06, "loss": 0.6145, "step": 2936 }, { "epoch": 0.38, "grad_norm": 1.3771109580993652, "learning_rate": 7.172356075996473e-06, "loss": 0.6808, "step": 2937 }, { "epoch": 0.38, "grad_norm": 1.5977014303207397, "learning_rate": 7.170486940737722e-06, "loss": 0.7052, "step": 2938 }, { "epoch": 0.38, "grad_norm": 1.30767023563385, "learning_rate": 7.168617431656713e-06, "loss": 0.674, "step": 2939 }, { "epoch": 0.38, "grad_norm": 1.5938276052474976, "learning_rate": 7.166747549075428e-06, "loss": 0.6467, "step": 2940 }, { "epoch": 0.38, "grad_norm": 1.34315824508667, "learning_rate": 7.164877293315921e-06, "loss": 0.5739, "step": 2941 }, { "epoch": 0.38, "grad_norm": 1.2254630327224731, "learning_rate": 7.163006664700302e-06, "loss": 0.6603, "step": 2942 }, { "epoch": 0.38, "grad_norm": 1.1479889154434204, "learning_rate": 7.16113566355075e-06, "loss": 0.6333, "step": 2943 }, { "epoch": 0.38, "grad_norm": 1.111104130744934, "learning_rate": 7.159264290189506e-06, "loss": 0.6006, "step": 2944 }, { "epoch": 0.38, "grad_norm": 1.1808959245681763, "learning_rate": 7.157392544938877e-06, "loss": 0.5819, "step": 2945 }, { "epoch": 0.38, "grad_norm": 1.5967776775360107, "learning_rate": 7.155520428121233e-06, "loss": 0.649, "step": 2946 }, { "epoch": 0.38, "grad_norm": 1.1286388635635376, "learning_rate": 7.153647940059007e-06, "loss": 0.6107, "step": 2947 }, { "epoch": 0.38, "grad_norm": 1.261406421661377, "learning_rate": 7.1517750810746986e-06, "loss": 0.6149, "step": 2948 }, { "epoch": 0.38, "grad_norm": 1.5292327404022217, "learning_rate": 7.149901851490869e-06, "loss": 0.6509, "step": 2949 }, { "epoch": 0.38, "grad_norm": 1.308585524559021, "learning_rate": 7.148028251630144e-06, "loss": 0.5977, "step": 2950 }, { "epoch": 0.38, "grad_norm": 1.1514670848846436, "learning_rate": 7.146154281815213e-06, "loss": 0.6166, "step": 2951 }, { "epoch": 0.38, "grad_norm": 1.0874031782150269, "learning_rate": 7.144279942368829e-06, "loss": 0.6517, "step": 2952 }, { "epoch": 0.38, "grad_norm": 1.1332695484161377, "learning_rate": 7.1424052336138094e-06, "loss": 0.5677, "step": 2953 }, { "epoch": 0.38, "grad_norm": 1.1511764526367188, "learning_rate": 7.140530155873033e-06, "loss": 0.5318, "step": 2954 }, { "epoch": 0.38, "grad_norm": 1.207506537437439, "learning_rate": 7.138654709469446e-06, "loss": 0.6273, "step": 2955 }, { "epoch": 0.38, "grad_norm": 1.4010735750198364, "learning_rate": 7.136778894726055e-06, "loss": 0.6545, "step": 2956 }, { "epoch": 0.38, "grad_norm": 1.8122620582580566, "learning_rate": 7.134902711965932e-06, "loss": 0.6774, "step": 2957 }, { "epoch": 0.38, "grad_norm": 1.2734031677246094, "learning_rate": 7.133026161512209e-06, "loss": 0.5889, "step": 2958 }, { "epoch": 0.38, "grad_norm": 1.1605252027511597, "learning_rate": 7.131149243688086e-06, "loss": 0.7358, "step": 2959 }, { "epoch": 0.38, "grad_norm": 1.0211987495422363, "learning_rate": 7.1292719588168225e-06, "loss": 0.5536, "step": 2960 }, { "epoch": 0.38, "grad_norm": 1.1764460802078247, "learning_rate": 7.127394307221743e-06, "loss": 0.6021, "step": 2961 }, { "epoch": 0.38, "grad_norm": 1.449872612953186, "learning_rate": 7.125516289226236e-06, "loss": 0.6723, "step": 2962 }, { "epoch": 0.38, "grad_norm": 1.2078107595443726, "learning_rate": 7.123637905153749e-06, "loss": 0.5864, "step": 2963 }, { "epoch": 0.38, "grad_norm": 1.082673192024231, "learning_rate": 7.121759155327799e-06, "loss": 0.6392, "step": 2964 }, { "epoch": 0.38, "grad_norm": 2.1781044006347656, "learning_rate": 7.11988004007196e-06, "loss": 0.645, "step": 2965 }, { "epoch": 0.38, "grad_norm": 1.0570480823516846, "learning_rate": 7.118000559709872e-06, "loss": 0.56, "step": 2966 }, { "epoch": 0.38, "grad_norm": 1.205815076828003, "learning_rate": 7.1161207145652385e-06, "loss": 0.5751, "step": 2967 }, { "epoch": 0.38, "grad_norm": 3.2318918704986572, "learning_rate": 7.114240504961824e-06, "loss": 0.6159, "step": 2968 }, { "epoch": 0.38, "grad_norm": 1.0701419115066528, "learning_rate": 7.112359931223456e-06, "loss": 0.6113, "step": 2969 }, { "epoch": 0.38, "grad_norm": 1.3909010887145996, "learning_rate": 7.110478993674023e-06, "loss": 0.6169, "step": 2970 }, { "epoch": 0.38, "grad_norm": 1.2270265817642212, "learning_rate": 7.108597692637483e-06, "loss": 0.6437, "step": 2971 }, { "epoch": 0.38, "grad_norm": 1.7526935338974, "learning_rate": 7.106716028437848e-06, "loss": 0.5907, "step": 2972 }, { "epoch": 0.38, "grad_norm": 1.3577011823654175, "learning_rate": 7.104834001399198e-06, "loss": 0.6427, "step": 2973 }, { "epoch": 0.38, "grad_norm": 1.416495680809021, "learning_rate": 7.102951611845675e-06, "loss": 0.6881, "step": 2974 }, { "epoch": 0.38, "grad_norm": 1.134766936302185, "learning_rate": 7.1010688601014786e-06, "loss": 0.6554, "step": 2975 }, { "epoch": 0.38, "grad_norm": 1.2442423105239868, "learning_rate": 7.099185746490878e-06, "loss": 0.5727, "step": 2976 }, { "epoch": 0.38, "grad_norm": 1.257924199104309, "learning_rate": 7.0973022713382e-06, "loss": 0.6091, "step": 2977 }, { "epoch": 0.38, "grad_norm": 1.4289751052856445, "learning_rate": 7.0954184349678355e-06, "loss": 0.6965, "step": 2978 }, { "epoch": 0.38, "grad_norm": 1.3827462196350098, "learning_rate": 7.0935342377042346e-06, "loss": 0.7763, "step": 2979 }, { "epoch": 0.38, "grad_norm": 1.0459548234939575, "learning_rate": 7.091649679871915e-06, "loss": 0.618, "step": 2980 }, { "epoch": 0.38, "grad_norm": 1.6075551509857178, "learning_rate": 7.0897647617954536e-06, "loss": 0.5657, "step": 2981 }, { "epoch": 0.38, "grad_norm": 1.1650707721710205, "learning_rate": 7.087879483799487e-06, "loss": 0.746, "step": 2982 }, { "epoch": 0.38, "grad_norm": 1.0505053997039795, "learning_rate": 7.085993846208718e-06, "loss": 0.6344, "step": 2983 }, { "epoch": 0.38, "grad_norm": 1.2678617238998413, "learning_rate": 7.0841078493479066e-06, "loss": 0.6389, "step": 2984 }, { "epoch": 0.38, "grad_norm": 1.1751099824905396, "learning_rate": 7.082221493541881e-06, "loss": 0.5815, "step": 2985 }, { "epoch": 0.38, "grad_norm": 1.1204549074172974, "learning_rate": 7.080334779115525e-06, "loss": 0.6307, "step": 2986 }, { "epoch": 0.38, "grad_norm": 1.218030571937561, "learning_rate": 7.078447706393788e-06, "loss": 0.6361, "step": 2987 }, { "epoch": 0.38, "grad_norm": 1.160930871963501, "learning_rate": 7.07656027570168e-06, "loss": 0.6299, "step": 2988 }, { "epoch": 0.38, "grad_norm": 1.3285592794418335, "learning_rate": 7.07467248736427e-06, "loss": 0.6738, "step": 2989 }, { "epoch": 0.38, "grad_norm": 1.2137503623962402, "learning_rate": 7.072784341706696e-06, "loss": 0.6647, "step": 2990 }, { "epoch": 0.38, "grad_norm": 1.0857856273651123, "learning_rate": 7.0708958390541485e-06, "loss": 0.5601, "step": 2991 }, { "epoch": 0.38, "grad_norm": 1.3946149349212646, "learning_rate": 7.069006979731885e-06, "loss": 0.6107, "step": 2992 }, { "epoch": 0.38, "grad_norm": 1.3853344917297363, "learning_rate": 7.067117764065226e-06, "loss": 0.7235, "step": 2993 }, { "epoch": 0.38, "grad_norm": 1.084587574005127, "learning_rate": 7.065228192379545e-06, "loss": 0.5808, "step": 2994 }, { "epoch": 0.38, "grad_norm": 1.4154893159866333, "learning_rate": 7.0633382650002854e-06, "loss": 0.7262, "step": 2995 }, { "epoch": 0.38, "grad_norm": 1.2447575330734253, "learning_rate": 7.061447982252949e-06, "loss": 0.6407, "step": 2996 }, { "epoch": 0.38, "grad_norm": 1.1402736902236938, "learning_rate": 7.0595573444631e-06, "loss": 0.5344, "step": 2997 }, { "epoch": 0.38, "grad_norm": 1.4848835468292236, "learning_rate": 7.0576663519563584e-06, "loss": 0.6223, "step": 2998 }, { "epoch": 0.38, "grad_norm": 1.1726362705230713, "learning_rate": 7.0557750050584115e-06, "loss": 0.6763, "step": 2999 }, { "epoch": 0.38, "grad_norm": 1.2380672693252563, "learning_rate": 7.0538833040950065e-06, "loss": 0.6007, "step": 3000 }, { "epoch": 0.38, "grad_norm": 1.149147629737854, "learning_rate": 7.051991249391947e-06, "loss": 0.5811, "step": 3001 }, { "epoch": 0.38, "grad_norm": 1.3859102725982666, "learning_rate": 7.0500988412751044e-06, "loss": 0.5532, "step": 3002 }, { "epoch": 0.38, "grad_norm": 1.054567813873291, "learning_rate": 7.048206080070407e-06, "loss": 0.6674, "step": 3003 }, { "epoch": 0.38, "grad_norm": 1.1755194664001465, "learning_rate": 7.046312966103843e-06, "loss": 0.6072, "step": 3004 }, { "epoch": 0.38, "grad_norm": 1.3620301485061646, "learning_rate": 7.044419499701462e-06, "loss": 0.6302, "step": 3005 }, { "epoch": 0.39, "grad_norm": 1.4070069789886475, "learning_rate": 7.042525681189377e-06, "loss": 0.6966, "step": 3006 }, { "epoch": 0.39, "grad_norm": 1.1201298236846924, "learning_rate": 7.0406315108937605e-06, "loss": 0.646, "step": 3007 }, { "epoch": 0.39, "grad_norm": 1.182099461555481, "learning_rate": 7.038736989140843e-06, "loss": 0.6491, "step": 3008 }, { "epoch": 0.39, "grad_norm": 1.2445439100265503, "learning_rate": 7.036842116256919e-06, "loss": 0.6441, "step": 3009 }, { "epoch": 0.39, "grad_norm": 0.9912326335906982, "learning_rate": 7.034946892568339e-06, "loss": 0.6075, "step": 3010 }, { "epoch": 0.39, "grad_norm": 1.477307915687561, "learning_rate": 7.03305131840152e-06, "loss": 0.5633, "step": 3011 }, { "epoch": 0.39, "grad_norm": 1.2548702955245972, "learning_rate": 7.031155394082935e-06, "loss": 0.6102, "step": 3012 }, { "epoch": 0.39, "grad_norm": 1.1897332668304443, "learning_rate": 7.029259119939118e-06, "loss": 0.6613, "step": 3013 }, { "epoch": 0.39, "grad_norm": 1.2398009300231934, "learning_rate": 7.027362496296662e-06, "loss": 0.6149, "step": 3014 }, { "epoch": 0.39, "grad_norm": 1.3539067506790161, "learning_rate": 7.025465523482225e-06, "loss": 0.5495, "step": 3015 }, { "epoch": 0.39, "grad_norm": 1.1485373973846436, "learning_rate": 7.023568201822519e-06, "loss": 0.5772, "step": 3016 }, { "epoch": 0.39, "grad_norm": 1.3308528661727905, "learning_rate": 7.021670531644324e-06, "loss": 0.5792, "step": 3017 }, { "epoch": 0.39, "grad_norm": 1.2606019973754883, "learning_rate": 7.01977251327447e-06, "loss": 0.6744, "step": 3018 }, { "epoch": 0.39, "grad_norm": 1.0969258546829224, "learning_rate": 7.017874147039855e-06, "loss": 0.6242, "step": 3019 }, { "epoch": 0.39, "grad_norm": 1.1134408712387085, "learning_rate": 7.0159754332674316e-06, "loss": 0.6511, "step": 3020 }, { "epoch": 0.39, "grad_norm": 1.1563353538513184, "learning_rate": 7.014076372284217e-06, "loss": 0.712, "step": 3021 }, { "epoch": 0.39, "grad_norm": 1.1367099285125732, "learning_rate": 7.012176964417284e-06, "loss": 0.6286, "step": 3022 }, { "epoch": 0.39, "grad_norm": 1.443542718887329, "learning_rate": 7.010277209993769e-06, "loss": 0.6477, "step": 3023 }, { "epoch": 0.39, "grad_norm": 1.0937227010726929, "learning_rate": 7.008377109340865e-06, "loss": 0.6286, "step": 3024 }, { "epoch": 0.39, "grad_norm": 1.4116562604904175, "learning_rate": 7.006476662785825e-06, "loss": 0.6758, "step": 3025 }, { "epoch": 0.39, "grad_norm": 0.9480895400047302, "learning_rate": 7.004575870655963e-06, "loss": 0.5669, "step": 3026 }, { "epoch": 0.39, "grad_norm": 1.328150987625122, "learning_rate": 7.002674733278652e-06, "loss": 0.6269, "step": 3027 }, { "epoch": 0.39, "grad_norm": 1.224358320236206, "learning_rate": 7.000773250981325e-06, "loss": 0.626, "step": 3028 }, { "epoch": 0.39, "grad_norm": 1.514603853225708, "learning_rate": 6.998871424091472e-06, "loss": 0.6341, "step": 3029 }, { "epoch": 0.39, "grad_norm": 1.0662037134170532, "learning_rate": 6.996969252936645e-06, "loss": 0.5783, "step": 3030 }, { "epoch": 0.39, "grad_norm": 1.2578132152557373, "learning_rate": 6.995066737844454e-06, "loss": 0.6042, "step": 3031 }, { "epoch": 0.39, "grad_norm": 1.1706691980361938, "learning_rate": 6.993163879142567e-06, "loss": 0.6136, "step": 3032 }, { "epoch": 0.39, "grad_norm": 0.9942347407341003, "learning_rate": 6.991260677158717e-06, "loss": 0.6262, "step": 3033 }, { "epoch": 0.39, "grad_norm": 1.1480199098587036, "learning_rate": 6.989357132220686e-06, "loss": 0.5928, "step": 3034 }, { "epoch": 0.39, "grad_norm": 1.1092981100082397, "learning_rate": 6.987453244656328e-06, "loss": 0.6324, "step": 3035 }, { "epoch": 0.39, "grad_norm": 1.462157130241394, "learning_rate": 6.985549014793542e-06, "loss": 0.6114, "step": 3036 }, { "epoch": 0.39, "grad_norm": 0.9729678630828857, "learning_rate": 6.983644442960299e-06, "loss": 0.644, "step": 3037 }, { "epoch": 0.39, "grad_norm": 1.6944612264633179, "learning_rate": 6.9817395294846165e-06, "loss": 0.5726, "step": 3038 }, { "epoch": 0.39, "grad_norm": 1.0340354442596436, "learning_rate": 6.979834274694583e-06, "loss": 0.6147, "step": 3039 }, { "epoch": 0.39, "grad_norm": 1.3940993547439575, "learning_rate": 6.977928678918335e-06, "loss": 0.6238, "step": 3040 }, { "epoch": 0.39, "grad_norm": 1.217641830444336, "learning_rate": 6.976022742484076e-06, "loss": 0.6713, "step": 3041 }, { "epoch": 0.39, "grad_norm": 5.02081298828125, "learning_rate": 6.974116465720064e-06, "loss": 0.6449, "step": 3042 }, { "epoch": 0.39, "grad_norm": 1.6640965938568115, "learning_rate": 6.972209848954617e-06, "loss": 0.6568, "step": 3043 }, { "epoch": 0.39, "grad_norm": 1.213199496269226, "learning_rate": 6.97030289251611e-06, "loss": 0.5438, "step": 3044 }, { "epoch": 0.39, "grad_norm": 1.1289682388305664, "learning_rate": 6.968395596732977e-06, "loss": 0.6994, "step": 3045 }, { "epoch": 0.39, "grad_norm": 1.1272958517074585, "learning_rate": 6.9664879619337115e-06, "loss": 0.5982, "step": 3046 }, { "epoch": 0.39, "grad_norm": 1.7126679420471191, "learning_rate": 6.964579988446866e-06, "loss": 0.5872, "step": 3047 }, { "epoch": 0.39, "grad_norm": 1.227146863937378, "learning_rate": 6.962671676601048e-06, "loss": 0.6131, "step": 3048 }, { "epoch": 0.39, "grad_norm": 1.314324140548706, "learning_rate": 6.960763026724926e-06, "loss": 0.584, "step": 3049 }, { "epoch": 0.39, "grad_norm": 1.2320308685302734, "learning_rate": 6.958854039147227e-06, "loss": 0.5748, "step": 3050 }, { "epoch": 0.39, "grad_norm": 1.2381142377853394, "learning_rate": 6.956944714196735e-06, "loss": 0.6293, "step": 3051 }, { "epoch": 0.39, "grad_norm": 1.3332390785217285, "learning_rate": 6.9550350522022935e-06, "loss": 0.5833, "step": 3052 }, { "epoch": 0.39, "grad_norm": 1.1489558219909668, "learning_rate": 6.953125053492801e-06, "loss": 0.5628, "step": 3053 }, { "epoch": 0.39, "grad_norm": 1.03483247756958, "learning_rate": 6.951214718397217e-06, "loss": 0.5724, "step": 3054 }, { "epoch": 0.39, "grad_norm": 1.5205141305923462, "learning_rate": 6.9493040472445575e-06, "loss": 0.6287, "step": 3055 }, { "epoch": 0.39, "grad_norm": 1.2520228624343872, "learning_rate": 6.947393040363897e-06, "loss": 0.6165, "step": 3056 }, { "epoch": 0.39, "grad_norm": 1.1673003435134888, "learning_rate": 6.945481698084366e-06, "loss": 0.6758, "step": 3057 }, { "epoch": 0.39, "grad_norm": 1.1318930387496948, "learning_rate": 6.943570020735158e-06, "loss": 0.6002, "step": 3058 }, { "epoch": 0.39, "grad_norm": 1.2879729270935059, "learning_rate": 6.941658008645518e-06, "loss": 0.6208, "step": 3059 }, { "epoch": 0.39, "grad_norm": 1.3986260890960693, "learning_rate": 6.939745662144751e-06, "loss": 0.5997, "step": 3060 }, { "epoch": 0.39, "grad_norm": 1.7398829460144043, "learning_rate": 6.9378329815622215e-06, "loss": 0.6191, "step": 3061 }, { "epoch": 0.39, "grad_norm": 1.0883538722991943, "learning_rate": 6.935919967227348e-06, "loss": 0.6555, "step": 3062 }, { "epoch": 0.39, "grad_norm": 1.441008448600769, "learning_rate": 6.93400661946961e-06, "loss": 0.6771, "step": 3063 }, { "epoch": 0.39, "grad_norm": 1.193451166152954, "learning_rate": 6.932092938618541e-06, "loss": 0.6785, "step": 3064 }, { "epoch": 0.39, "grad_norm": 1.0834167003631592, "learning_rate": 6.930178925003735e-06, "loss": 0.6142, "step": 3065 }, { "epoch": 0.39, "grad_norm": 1.0183688402175903, "learning_rate": 6.928264578954841e-06, "loss": 0.6609, "step": 3066 }, { "epoch": 0.39, "grad_norm": 1.2965633869171143, "learning_rate": 6.926349900801568e-06, "loss": 0.6554, "step": 3067 }, { "epoch": 0.39, "grad_norm": 1.4456558227539062, "learning_rate": 6.924434890873677e-06, "loss": 0.6321, "step": 3068 }, { "epoch": 0.39, "grad_norm": 1.5659126043319702, "learning_rate": 6.922519549500994e-06, "loss": 0.6931, "step": 3069 }, { "epoch": 0.39, "grad_norm": 1.1102631092071533, "learning_rate": 6.920603877013393e-06, "loss": 0.6349, "step": 3070 }, { "epoch": 0.39, "grad_norm": 1.2046291828155518, "learning_rate": 6.918687873740815e-06, "loss": 0.6647, "step": 3071 }, { "epoch": 0.39, "grad_norm": 1.7100063562393188, "learning_rate": 6.916771540013246e-06, "loss": 0.6549, "step": 3072 }, { "epoch": 0.39, "grad_norm": 1.1494594812393188, "learning_rate": 6.914854876160741e-06, "loss": 0.619, "step": 3073 }, { "epoch": 0.39, "grad_norm": 1.156104564666748, "learning_rate": 6.912937882513404e-06, "loss": 0.7542, "step": 3074 }, { "epoch": 0.39, "grad_norm": 1.092430591583252, "learning_rate": 6.911020559401399e-06, "loss": 0.5925, "step": 3075 }, { "epoch": 0.39, "grad_norm": 1.2527555227279663, "learning_rate": 6.909102907154946e-06, "loss": 0.6515, "step": 3076 }, { "epoch": 0.39, "grad_norm": 1.0143706798553467, "learning_rate": 6.90718492610432e-06, "loss": 0.5752, "step": 3077 }, { "epoch": 0.39, "grad_norm": 1.188001036643982, "learning_rate": 6.905266616579857e-06, "loss": 0.5966, "step": 3078 }, { "epoch": 0.39, "grad_norm": 1.197407841682434, "learning_rate": 6.903347978911944e-06, "loss": 0.5806, "step": 3079 }, { "epoch": 0.39, "grad_norm": 1.2781100273132324, "learning_rate": 6.9014290134310294e-06, "loss": 0.6362, "step": 3080 }, { "epoch": 0.39, "grad_norm": 1.1337027549743652, "learning_rate": 6.899509720467614e-06, "loss": 0.6279, "step": 3081 }, { "epoch": 0.39, "grad_norm": 1.2396838665008545, "learning_rate": 6.897590100352261e-06, "loss": 0.5464, "step": 3082 }, { "epoch": 0.39, "grad_norm": 0.947368860244751, "learning_rate": 6.89567015341558e-06, "loss": 0.6205, "step": 3083 }, { "epoch": 0.4, "grad_norm": 1.04353928565979, "learning_rate": 6.893749879988248e-06, "loss": 0.6318, "step": 3084 }, { "epoch": 0.4, "grad_norm": 1.1901612281799316, "learning_rate": 6.89182928040099e-06, "loss": 0.5634, "step": 3085 }, { "epoch": 0.4, "grad_norm": 1.215627670288086, "learning_rate": 6.8899083549845914e-06, "loss": 0.6177, "step": 3086 }, { "epoch": 0.4, "grad_norm": 1.5702704191207886, "learning_rate": 6.8879871040698935e-06, "loss": 0.6184, "step": 3087 }, { "epoch": 0.4, "grad_norm": 1.1875890493392944, "learning_rate": 6.886065527987791e-06, "loss": 0.5802, "step": 3088 }, { "epoch": 0.4, "grad_norm": 1.155051350593567, "learning_rate": 6.884143627069236e-06, "loss": 0.6456, "step": 3089 }, { "epoch": 0.4, "grad_norm": 1.4234286546707153, "learning_rate": 6.882221401645239e-06, "loss": 0.6865, "step": 3090 }, { "epoch": 0.4, "grad_norm": 1.3715044260025024, "learning_rate": 6.880298852046863e-06, "loss": 0.5297, "step": 3091 }, { "epoch": 0.4, "grad_norm": 1.273527979850769, "learning_rate": 6.878375978605227e-06, "loss": 0.5526, "step": 3092 }, { "epoch": 0.4, "grad_norm": 1.063097596168518, "learning_rate": 6.8764527816515105e-06, "loss": 0.551, "step": 3093 }, { "epoch": 0.4, "grad_norm": 1.2897167205810547, "learning_rate": 6.874529261516941e-06, "loss": 0.5874, "step": 3094 }, { "epoch": 0.4, "grad_norm": 1.6665688753128052, "learning_rate": 6.872605418532808e-06, "loss": 0.5858, "step": 3095 }, { "epoch": 0.4, "grad_norm": 1.0794209241867065, "learning_rate": 6.870681253030453e-06, "loss": 0.6313, "step": 3096 }, { "epoch": 0.4, "grad_norm": 1.3062844276428223, "learning_rate": 6.868756765341278e-06, "loss": 0.686, "step": 3097 }, { "epoch": 0.4, "grad_norm": 1.1371840238571167, "learning_rate": 6.866831955796731e-06, "loss": 0.525, "step": 3098 }, { "epoch": 0.4, "grad_norm": 1.2220420837402344, "learning_rate": 6.864906824728326e-06, "loss": 0.6384, "step": 3099 }, { "epoch": 0.4, "grad_norm": 4.069206237792969, "learning_rate": 6.862981372467626e-06, "loss": 0.6486, "step": 3100 }, { "epoch": 0.4, "grad_norm": 1.6746448278427124, "learning_rate": 6.861055599346249e-06, "loss": 0.5713, "step": 3101 }, { "epoch": 0.4, "grad_norm": 1.100169062614441, "learning_rate": 6.859129505695874e-06, "loss": 0.5867, "step": 3102 }, { "epoch": 0.4, "grad_norm": 1.2251650094985962, "learning_rate": 6.85720309184823e-06, "loss": 0.61, "step": 3103 }, { "epoch": 0.4, "grad_norm": 1.1724867820739746, "learning_rate": 6.855276358135102e-06, "loss": 0.6248, "step": 3104 }, { "epoch": 0.4, "grad_norm": 1.0926048755645752, "learning_rate": 6.853349304888331e-06, "loss": 0.5075, "step": 3105 }, { "epoch": 0.4, "grad_norm": 1.4889264106750488, "learning_rate": 6.851421932439815e-06, "loss": 0.5743, "step": 3106 }, { "epoch": 0.4, "grad_norm": 1.1425611972808838, "learning_rate": 6.8494942411215e-06, "loss": 0.68, "step": 3107 }, { "epoch": 0.4, "grad_norm": 1.138596534729004, "learning_rate": 6.847566231265397e-06, "loss": 0.6524, "step": 3108 }, { "epoch": 0.4, "grad_norm": 1.3879531621932983, "learning_rate": 6.845637903203562e-06, "loss": 0.6623, "step": 3109 }, { "epoch": 0.4, "grad_norm": 1.7340507507324219, "learning_rate": 6.843709257268112e-06, "loss": 0.6271, "step": 3110 }, { "epoch": 0.4, "grad_norm": 1.5906254053115845, "learning_rate": 6.841780293791218e-06, "loss": 0.5912, "step": 3111 }, { "epoch": 0.4, "grad_norm": 1.439652681350708, "learning_rate": 6.839851013105103e-06, "loss": 0.6353, "step": 3112 }, { "epoch": 0.4, "grad_norm": 1.2272002696990967, "learning_rate": 6.837921415542048e-06, "loss": 0.6631, "step": 3113 }, { "epoch": 0.4, "grad_norm": 1.2197409868240356, "learning_rate": 6.835991501434387e-06, "loss": 0.6065, "step": 3114 }, { "epoch": 0.4, "grad_norm": 1.2662650346755981, "learning_rate": 6.834061271114507e-06, "loss": 0.6074, "step": 3115 }, { "epoch": 0.4, "grad_norm": 1.422365665435791, "learning_rate": 6.832130724914852e-06, "loss": 0.676, "step": 3116 }, { "epoch": 0.4, "grad_norm": 1.490148663520813, "learning_rate": 6.830199863167919e-06, "loss": 0.5937, "step": 3117 }, { "epoch": 0.4, "grad_norm": 1.249938726425171, "learning_rate": 6.828268686206259e-06, "loss": 0.6307, "step": 3118 }, { "epoch": 0.4, "grad_norm": 1.534359335899353, "learning_rate": 6.82633719436248e-06, "loss": 0.6081, "step": 3119 }, { "epoch": 0.4, "grad_norm": 1.2044472694396973, "learning_rate": 6.82440538796924e-06, "loss": 0.5735, "step": 3120 }, { "epoch": 0.4, "grad_norm": 1.0795295238494873, "learning_rate": 6.8224732673592555e-06, "loss": 0.5773, "step": 3121 }, { "epoch": 0.4, "grad_norm": 1.2564455270767212, "learning_rate": 6.820540832865293e-06, "loss": 0.592, "step": 3122 }, { "epoch": 0.4, "grad_norm": 1.1027977466583252, "learning_rate": 6.818608084820176e-06, "loss": 0.6072, "step": 3123 }, { "epoch": 0.4, "grad_norm": 1.121226191520691, "learning_rate": 6.816675023556781e-06, "loss": 0.6798, "step": 3124 }, { "epoch": 0.4, "grad_norm": 1.1786526441574097, "learning_rate": 6.814741649408039e-06, "loss": 0.6565, "step": 3125 }, { "epoch": 0.4, "grad_norm": 1.2946691513061523, "learning_rate": 6.812807962706933e-06, "loss": 0.5838, "step": 3126 }, { "epoch": 0.4, "grad_norm": 1.294685959815979, "learning_rate": 6.810873963786501e-06, "loss": 0.5232, "step": 3127 }, { "epoch": 0.4, "grad_norm": 2.5822598934173584, "learning_rate": 6.808939652979839e-06, "loss": 0.6297, "step": 3128 }, { "epoch": 0.4, "grad_norm": 1.2533141374588013, "learning_rate": 6.807005030620088e-06, "loss": 0.6384, "step": 3129 }, { "epoch": 0.4, "grad_norm": 1.3660823106765747, "learning_rate": 6.805070097040451e-06, "loss": 0.6749, "step": 3130 }, { "epoch": 0.4, "grad_norm": 1.3122096061706543, "learning_rate": 6.803134852574177e-06, "loss": 0.6865, "step": 3131 }, { "epoch": 0.4, "grad_norm": 1.1023629903793335, "learning_rate": 6.801199297554577e-06, "loss": 0.6321, "step": 3132 }, { "epoch": 0.4, "grad_norm": 1.2410534620285034, "learning_rate": 6.799263432315006e-06, "loss": 0.6166, "step": 3133 }, { "epoch": 0.4, "grad_norm": 1.456033706665039, "learning_rate": 6.797327257188882e-06, "loss": 0.6063, "step": 3134 }, { "epoch": 0.4, "grad_norm": 1.1916273832321167, "learning_rate": 6.79539077250967e-06, "loss": 0.6006, "step": 3135 }, { "epoch": 0.4, "grad_norm": 1.2367602586746216, "learning_rate": 6.793453978610889e-06, "loss": 0.6278, "step": 3136 }, { "epoch": 0.4, "grad_norm": 1.3319041728973389, "learning_rate": 6.791516875826115e-06, "loss": 0.6671, "step": 3137 }, { "epoch": 0.4, "grad_norm": 1.1352344751358032, "learning_rate": 6.789579464488971e-06, "loss": 0.6093, "step": 3138 }, { "epoch": 0.4, "grad_norm": 1.1920417547225952, "learning_rate": 6.787641744933141e-06, "loss": 0.6582, "step": 3139 }, { "epoch": 0.4, "grad_norm": 1.1601824760437012, "learning_rate": 6.785703717492355e-06, "loss": 0.5819, "step": 3140 }, { "epoch": 0.4, "grad_norm": 1.0924068689346313, "learning_rate": 6.783765382500399e-06, "loss": 0.6438, "step": 3141 }, { "epoch": 0.4, "grad_norm": 1.2986295223236084, "learning_rate": 6.781826740291112e-06, "loss": 0.6269, "step": 3142 }, { "epoch": 0.4, "grad_norm": 0.9878475666046143, "learning_rate": 6.7798877911983865e-06, "loss": 0.5701, "step": 3143 }, { "epoch": 0.4, "grad_norm": 1.0973353385925293, "learning_rate": 6.7779485355561656e-06, "loss": 0.5951, "step": 3144 }, { "epoch": 0.4, "grad_norm": 1.1205912828445435, "learning_rate": 6.776008973698449e-06, "loss": 0.6099, "step": 3145 }, { "epoch": 0.4, "grad_norm": 1.1063035726547241, "learning_rate": 6.7740691059592844e-06, "loss": 0.5666, "step": 3146 }, { "epoch": 0.4, "grad_norm": 1.3901326656341553, "learning_rate": 6.7721289326727765e-06, "loss": 0.6367, "step": 3147 }, { "epoch": 0.4, "grad_norm": 1.5512032508850098, "learning_rate": 6.77018845417308e-06, "loss": 0.6069, "step": 3148 }, { "epoch": 0.4, "grad_norm": 1.5061674118041992, "learning_rate": 6.768247670794401e-06, "loss": 0.6052, "step": 3149 }, { "epoch": 0.4, "grad_norm": 1.2669321298599243, "learning_rate": 6.766306582871004e-06, "loss": 0.593, "step": 3150 }, { "epoch": 0.4, "grad_norm": 1.0490036010742188, "learning_rate": 6.764365190737197e-06, "loss": 0.58, "step": 3151 }, { "epoch": 0.4, "grad_norm": 1.3043758869171143, "learning_rate": 6.7624234947273495e-06, "loss": 0.6879, "step": 3152 }, { "epoch": 0.4, "grad_norm": 1.293900728225708, "learning_rate": 6.7604814951758764e-06, "loss": 0.6152, "step": 3153 }, { "epoch": 0.4, "grad_norm": 2.1668155193328857, "learning_rate": 6.758539192417251e-06, "loss": 0.5602, "step": 3154 }, { "epoch": 0.4, "grad_norm": 1.1483577489852905, "learning_rate": 6.7565965867859914e-06, "loss": 0.6185, "step": 3155 }, { "epoch": 0.4, "grad_norm": 1.1590529680252075, "learning_rate": 6.754653678616676e-06, "loss": 0.6432, "step": 3156 }, { "epoch": 0.4, "grad_norm": 1.140228509902954, "learning_rate": 6.752710468243927e-06, "loss": 0.6195, "step": 3157 }, { "epoch": 0.4, "grad_norm": 1.4454078674316406, "learning_rate": 6.7507669560024265e-06, "loss": 0.6264, "step": 3158 }, { "epoch": 0.4, "grad_norm": 1.2619370222091675, "learning_rate": 6.748823142226902e-06, "loss": 0.7389, "step": 3159 }, { "epoch": 0.4, "grad_norm": 1.2213263511657715, "learning_rate": 6.746879027252138e-06, "loss": 0.5855, "step": 3160 }, { "epoch": 0.4, "grad_norm": 1.2946794033050537, "learning_rate": 6.74493461141297e-06, "loss": 0.5703, "step": 3161 }, { "epoch": 0.41, "grad_norm": 1.256300926208496, "learning_rate": 6.7429898950442794e-06, "loss": 0.6776, "step": 3162 }, { "epoch": 0.41, "grad_norm": 1.374864101409912, "learning_rate": 6.741044878481009e-06, "loss": 0.6003, "step": 3163 }, { "epoch": 0.41, "grad_norm": 1.5750633478164673, "learning_rate": 6.739099562058146e-06, "loss": 0.7092, "step": 3164 }, { "epoch": 0.41, "grad_norm": 2.048159122467041, "learning_rate": 6.737153946110732e-06, "loss": 0.6243, "step": 3165 }, { "epoch": 0.41, "grad_norm": 2.874873399734497, "learning_rate": 6.735208030973858e-06, "loss": 0.5676, "step": 3166 }, { "epoch": 0.41, "grad_norm": 1.1411672830581665, "learning_rate": 6.7332618169826725e-06, "loss": 0.5944, "step": 3167 }, { "epoch": 0.41, "grad_norm": 1.4558788537979126, "learning_rate": 6.731315304472366e-06, "loss": 0.5991, "step": 3168 }, { "epoch": 0.41, "grad_norm": 1.4933147430419922, "learning_rate": 6.7293684937781915e-06, "loss": 0.6637, "step": 3169 }, { "epoch": 0.41, "grad_norm": 1.090269923210144, "learning_rate": 6.727421385235443e-06, "loss": 0.597, "step": 3170 }, { "epoch": 0.41, "grad_norm": 1.5020686388015747, "learning_rate": 6.7254739791794735e-06, "loss": 0.6184, "step": 3171 }, { "epoch": 0.41, "grad_norm": 1.2008216381072998, "learning_rate": 6.7235262759456824e-06, "loss": 0.5001, "step": 3172 }, { "epoch": 0.41, "grad_norm": 1.278684377670288, "learning_rate": 6.721578275869521e-06, "loss": 0.6604, "step": 3173 }, { "epoch": 0.41, "grad_norm": 1.2600221633911133, "learning_rate": 6.719629979286495e-06, "loss": 0.6642, "step": 3174 }, { "epoch": 0.41, "grad_norm": 1.3807469606399536, "learning_rate": 6.717681386532158e-06, "loss": 0.5636, "step": 3175 }, { "epoch": 0.41, "grad_norm": 1.2727843523025513, "learning_rate": 6.7157324979421145e-06, "loss": 0.6672, "step": 3176 }, { "epoch": 0.41, "grad_norm": 1.1497278213500977, "learning_rate": 6.71378331385202e-06, "loss": 0.7587, "step": 3177 }, { "epoch": 0.41, "grad_norm": 1.1401848793029785, "learning_rate": 6.711833834597587e-06, "loss": 0.5935, "step": 3178 }, { "epoch": 0.41, "grad_norm": 1.0336487293243408, "learning_rate": 6.709884060514568e-06, "loss": 0.5936, "step": 3179 }, { "epoch": 0.41, "grad_norm": 1.3813772201538086, "learning_rate": 6.707933991938777e-06, "loss": 0.6915, "step": 3180 }, { "epoch": 0.41, "grad_norm": 1.2658019065856934, "learning_rate": 6.705983629206068e-06, "loss": 0.5932, "step": 3181 }, { "epoch": 0.41, "grad_norm": 1.535305142402649, "learning_rate": 6.704032972652357e-06, "loss": 0.6805, "step": 3182 }, { "epoch": 0.41, "grad_norm": 1.2994247674942017, "learning_rate": 6.7020820226136e-06, "loss": 0.6971, "step": 3183 }, { "epoch": 0.41, "grad_norm": 1.518620252609253, "learning_rate": 6.700130779425812e-06, "loss": 0.6603, "step": 3184 }, { "epoch": 0.41, "grad_norm": 1.4807312488555908, "learning_rate": 6.698179243425053e-06, "loss": 0.5437, "step": 3185 }, { "epoch": 0.41, "grad_norm": 1.1867648363113403, "learning_rate": 6.696227414947436e-06, "loss": 0.7545, "step": 3186 }, { "epoch": 0.41, "grad_norm": 1.30292809009552, "learning_rate": 6.694275294329125e-06, "loss": 0.6676, "step": 3187 }, { "epoch": 0.41, "grad_norm": 1.5975512266159058, "learning_rate": 6.69232288190633e-06, "loss": 0.6476, "step": 3188 }, { "epoch": 0.41, "grad_norm": 1.0806468725204468, "learning_rate": 6.690370178015318e-06, "loss": 0.6023, "step": 3189 }, { "epoch": 0.41, "grad_norm": 1.43095862865448, "learning_rate": 6.688417182992399e-06, "loss": 0.6321, "step": 3190 }, { "epoch": 0.41, "grad_norm": 1.135010838508606, "learning_rate": 6.686463897173942e-06, "loss": 0.6086, "step": 3191 }, { "epoch": 0.41, "grad_norm": 1.3971952199935913, "learning_rate": 6.684510320896354e-06, "loss": 0.5269, "step": 3192 }, { "epoch": 0.41, "grad_norm": 1.2091296911239624, "learning_rate": 6.6825564544961055e-06, "loss": 0.6593, "step": 3193 }, { "epoch": 0.41, "grad_norm": 1.231992483139038, "learning_rate": 6.680602298309703e-06, "loss": 0.6894, "step": 3194 }, { "epoch": 0.41, "grad_norm": 1.6504241228103638, "learning_rate": 6.678647852673717e-06, "loss": 0.6522, "step": 3195 }, { "epoch": 0.41, "grad_norm": 1.6003928184509277, "learning_rate": 6.676693117924757e-06, "loss": 0.5055, "step": 3196 }, { "epoch": 0.41, "grad_norm": 1.3327730894088745, "learning_rate": 6.674738094399488e-06, "loss": 0.6239, "step": 3197 }, { "epoch": 0.41, "grad_norm": 1.1069480180740356, "learning_rate": 6.672782782434622e-06, "loss": 0.5595, "step": 3198 }, { "epoch": 0.41, "grad_norm": 1.0996955633163452, "learning_rate": 6.670827182366922e-06, "loss": 0.6455, "step": 3199 }, { "epoch": 0.41, "grad_norm": 1.1967326402664185, "learning_rate": 6.668871294533202e-06, "loss": 0.6504, "step": 3200 }, { "epoch": 0.41, "grad_norm": 1.4416142702102661, "learning_rate": 6.666915119270322e-06, "loss": 0.7059, "step": 3201 }, { "epoch": 0.41, "grad_norm": 1.0445725917816162, "learning_rate": 6.664958656915195e-06, "loss": 0.5707, "step": 3202 }, { "epoch": 0.41, "grad_norm": 1.2745569944381714, "learning_rate": 6.663001907804778e-06, "loss": 0.6299, "step": 3203 }, { "epoch": 0.41, "grad_norm": 1.199661374092102, "learning_rate": 6.661044872276086e-06, "loss": 0.6521, "step": 3204 }, { "epoch": 0.41, "grad_norm": 1.2170096635818481, "learning_rate": 6.6590875506661764e-06, "loss": 0.5463, "step": 3205 }, { "epoch": 0.41, "grad_norm": 1.6790763139724731, "learning_rate": 6.65712994331216e-06, "loss": 0.6315, "step": 3206 }, { "epoch": 0.41, "grad_norm": 1.6332316398620605, "learning_rate": 6.655172050551191e-06, "loss": 0.4971, "step": 3207 }, { "epoch": 0.41, "grad_norm": 1.081221580505371, "learning_rate": 6.653213872720481e-06, "loss": 0.7278, "step": 3208 }, { "epoch": 0.41, "grad_norm": 1.130863904953003, "learning_rate": 6.651255410157282e-06, "loss": 0.6655, "step": 3209 }, { "epoch": 0.41, "grad_norm": 1.1781548261642456, "learning_rate": 6.649296663198903e-06, "loss": 0.6444, "step": 3210 }, { "epoch": 0.41, "grad_norm": 1.1700100898742676, "learning_rate": 6.6473376321826965e-06, "loss": 0.6042, "step": 3211 }, { "epoch": 0.41, "grad_norm": 1.1747479438781738, "learning_rate": 6.645378317446066e-06, "loss": 0.6241, "step": 3212 }, { "epoch": 0.41, "grad_norm": 1.5794117450714111, "learning_rate": 6.643418719326466e-06, "loss": 0.6243, "step": 3213 }, { "epoch": 0.41, "grad_norm": 1.0308579206466675, "learning_rate": 6.6414588381613935e-06, "loss": 0.6249, "step": 3214 }, { "epoch": 0.41, "grad_norm": 1.6672332286834717, "learning_rate": 6.6394986742884e-06, "loss": 0.5955, "step": 3215 }, { "epoch": 0.41, "grad_norm": 1.2605336904525757, "learning_rate": 6.637538228045084e-06, "loss": 0.6468, "step": 3216 }, { "epoch": 0.41, "grad_norm": 1.1389272212982178, "learning_rate": 6.635577499769093e-06, "loss": 0.5899, "step": 3217 }, { "epoch": 0.41, "grad_norm": 1.2226412296295166, "learning_rate": 6.633616489798121e-06, "loss": 0.5962, "step": 3218 }, { "epoch": 0.41, "grad_norm": 1.1442811489105225, "learning_rate": 6.631655198469915e-06, "loss": 0.7649, "step": 3219 }, { "epoch": 0.41, "grad_norm": 1.2384653091430664, "learning_rate": 6.629693626122262e-06, "loss": 0.5698, "step": 3220 }, { "epoch": 0.41, "grad_norm": 1.148032546043396, "learning_rate": 6.62773177309301e-06, "loss": 0.6537, "step": 3221 }, { "epoch": 0.41, "grad_norm": 1.6926991939544678, "learning_rate": 6.625769639720045e-06, "loss": 0.5875, "step": 3222 }, { "epoch": 0.41, "grad_norm": 1.2067431211471558, "learning_rate": 6.623807226341303e-06, "loss": 0.6435, "step": 3223 }, { "epoch": 0.41, "grad_norm": 1.403552532196045, "learning_rate": 6.621844533294772e-06, "loss": 0.6821, "step": 3224 }, { "epoch": 0.41, "grad_norm": 1.3150187730789185, "learning_rate": 6.619881560918485e-06, "loss": 0.5826, "step": 3225 }, { "epoch": 0.41, "grad_norm": 2.287637948989868, "learning_rate": 6.6179183095505265e-06, "loss": 0.6228, "step": 3226 }, { "epoch": 0.41, "grad_norm": 1.7602990865707397, "learning_rate": 6.6159547795290214e-06, "loss": 0.5653, "step": 3227 }, { "epoch": 0.41, "grad_norm": 1.151167869567871, "learning_rate": 6.613990971192152e-06, "loss": 0.5097, "step": 3228 }, { "epoch": 0.41, "grad_norm": 1.2814674377441406, "learning_rate": 6.6120268848781445e-06, "loss": 0.6207, "step": 3229 }, { "epoch": 0.41, "grad_norm": 1.2942862510681152, "learning_rate": 6.610062520925271e-06, "loss": 0.6376, "step": 3230 }, { "epoch": 0.41, "grad_norm": 2.0902457237243652, "learning_rate": 6.608097879671853e-06, "loss": 0.6226, "step": 3231 }, { "epoch": 0.41, "grad_norm": 1.3513823747634888, "learning_rate": 6.606132961456264e-06, "loss": 0.6445, "step": 3232 }, { "epoch": 0.41, "grad_norm": 1.0098199844360352, "learning_rate": 6.604167766616916e-06, "loss": 0.564, "step": 3233 }, { "epoch": 0.41, "grad_norm": 1.3201072216033936, "learning_rate": 6.602202295492277e-06, "loss": 0.5242, "step": 3234 }, { "epoch": 0.41, "grad_norm": 1.1454012393951416, "learning_rate": 6.600236548420858e-06, "loss": 0.6332, "step": 3235 }, { "epoch": 0.41, "grad_norm": 1.72659170627594, "learning_rate": 6.59827052574122e-06, "loss": 0.6348, "step": 3236 }, { "epoch": 0.41, "grad_norm": 1.327935814857483, "learning_rate": 6.59630422779197e-06, "loss": 0.5882, "step": 3237 }, { "epoch": 0.41, "grad_norm": 1.219178557395935, "learning_rate": 6.594337654911761e-06, "loss": 0.72, "step": 3238 }, { "epoch": 0.41, "grad_norm": 1.1300914287567139, "learning_rate": 6.592370807439299e-06, "loss": 0.6251, "step": 3239 }, { "epoch": 0.42, "grad_norm": 1.0695475339889526, "learning_rate": 6.5904036857133315e-06, "loss": 0.5239, "step": 3240 }, { "epoch": 0.42, "grad_norm": 1.289373755455017, "learning_rate": 6.588436290072655e-06, "loss": 0.5697, "step": 3241 }, { "epoch": 0.42, "grad_norm": 1.647684931755066, "learning_rate": 6.586468620856114e-06, "loss": 0.6084, "step": 3242 }, { "epoch": 0.42, "grad_norm": 1.1704531908035278, "learning_rate": 6.5845006784025985e-06, "loss": 0.7122, "step": 3243 }, { "epoch": 0.42, "grad_norm": 1.6329240798950195, "learning_rate": 6.582532463051048e-06, "loss": 0.6969, "step": 3244 }, { "epoch": 0.42, "grad_norm": 0.9630757570266724, "learning_rate": 6.580563975140447e-06, "loss": 0.6295, "step": 3245 }, { "epoch": 0.42, "grad_norm": 1.0210407972335815, "learning_rate": 6.578595215009827e-06, "loss": 0.6567, "step": 3246 }, { "epoch": 0.42, "grad_norm": 1.281633734703064, "learning_rate": 6.576626182998267e-06, "loss": 0.5795, "step": 3247 }, { "epoch": 0.42, "grad_norm": 1.1632440090179443, "learning_rate": 6.574656879444894e-06, "loss": 0.592, "step": 3248 }, { "epoch": 0.42, "grad_norm": 1.1041927337646484, "learning_rate": 6.5726873046888795e-06, "loss": 0.6129, "step": 3249 }, { "epoch": 0.42, "grad_norm": 1.37405526638031, "learning_rate": 6.570717459069442e-06, "loss": 0.7053, "step": 3250 }, { "epoch": 0.42, "grad_norm": 1.7148224115371704, "learning_rate": 6.56874734292585e-06, "loss": 0.6797, "step": 3251 }, { "epoch": 0.42, "grad_norm": 1.98465096950531, "learning_rate": 6.5667769565974126e-06, "loss": 0.643, "step": 3252 }, { "epoch": 0.42, "grad_norm": 1.3943524360656738, "learning_rate": 6.56480630042349e-06, "loss": 0.6549, "step": 3253 }, { "epoch": 0.42, "grad_norm": 1.2270232439041138, "learning_rate": 6.562835374743488e-06, "loss": 0.6189, "step": 3254 }, { "epoch": 0.42, "grad_norm": 1.2784587144851685, "learning_rate": 6.5608641798968596e-06, "loss": 0.6567, "step": 3255 }, { "epoch": 0.42, "grad_norm": 1.17548406124115, "learning_rate": 6.558892716223102e-06, "loss": 0.7443, "step": 3256 }, { "epoch": 0.42, "grad_norm": 1.4248778820037842, "learning_rate": 6.556920984061759e-06, "loss": 0.6288, "step": 3257 }, { "epoch": 0.42, "grad_norm": 4.57545804977417, "learning_rate": 6.554948983752423e-06, "loss": 0.7004, "step": 3258 }, { "epoch": 0.42, "grad_norm": 1.1954271793365479, "learning_rate": 6.552976715634729e-06, "loss": 0.5588, "step": 3259 }, { "epoch": 0.42, "grad_norm": 1.479419231414795, "learning_rate": 6.551004180048361e-06, "loss": 0.6446, "step": 3260 }, { "epoch": 0.42, "grad_norm": 1.04899001121521, "learning_rate": 6.549031377333049e-06, "loss": 0.5631, "step": 3261 }, { "epoch": 0.42, "grad_norm": 1.1985399723052979, "learning_rate": 6.5470583078285685e-06, "loss": 0.5939, "step": 3262 }, { "epoch": 0.42, "grad_norm": 1.081529974937439, "learning_rate": 6.545084971874738e-06, "loss": 0.6133, "step": 3263 }, { "epoch": 0.42, "grad_norm": 1.3592029809951782, "learning_rate": 6.5431113698114255e-06, "loss": 0.6758, "step": 3264 }, { "epoch": 0.42, "grad_norm": 1.38568115234375, "learning_rate": 6.541137501978547e-06, "loss": 0.6327, "step": 3265 }, { "epoch": 0.42, "grad_norm": 1.2125675678253174, "learning_rate": 6.539163368716057e-06, "loss": 0.5552, "step": 3266 }, { "epoch": 0.42, "grad_norm": 1.0356985330581665, "learning_rate": 6.537188970363961e-06, "loss": 0.5857, "step": 3267 }, { "epoch": 0.42, "grad_norm": 1.4390578269958496, "learning_rate": 6.53521430726231e-06, "loss": 0.6635, "step": 3268 }, { "epoch": 0.42, "grad_norm": 1.1902811527252197, "learning_rate": 6.5332393797512e-06, "loss": 0.73, "step": 3269 }, { "epoch": 0.42, "grad_norm": 1.1815040111541748, "learning_rate": 6.531264188170769e-06, "loss": 0.678, "step": 3270 }, { "epoch": 0.42, "grad_norm": 1.6778348684310913, "learning_rate": 6.529288732861207e-06, "loss": 0.6705, "step": 3271 }, { "epoch": 0.42, "grad_norm": 1.5308274030685425, "learning_rate": 6.527313014162745e-06, "loss": 0.6068, "step": 3272 }, { "epoch": 0.42, "grad_norm": 1.2709729671478271, "learning_rate": 6.525337032415658e-06, "loss": 0.5982, "step": 3273 }, { "epoch": 0.42, "grad_norm": 1.3264434337615967, "learning_rate": 6.523360787960273e-06, "loss": 0.599, "step": 3274 }, { "epoch": 0.42, "grad_norm": 2.82342791557312, "learning_rate": 6.521384281136955e-06, "loss": 0.6239, "step": 3275 }, { "epoch": 0.42, "grad_norm": 1.3357826471328735, "learning_rate": 6.519407512286119e-06, "loss": 0.5771, "step": 3276 }, { "epoch": 0.42, "grad_norm": 1.4777655601501465, "learning_rate": 6.51743048174822e-06, "loss": 0.6048, "step": 3277 }, { "epoch": 0.42, "grad_norm": 1.4173051118850708, "learning_rate": 6.515453189863765e-06, "loss": 0.5204, "step": 3278 }, { "epoch": 0.42, "grad_norm": 1.2135194540023804, "learning_rate": 6.513475636973301e-06, "loss": 0.5671, "step": 3279 }, { "epoch": 0.42, "grad_norm": 1.3434330224990845, "learning_rate": 6.511497823417418e-06, "loss": 0.576, "step": 3280 }, { "epoch": 0.42, "grad_norm": 1.3612425327301025, "learning_rate": 6.50951974953676e-06, "loss": 0.6269, "step": 3281 }, { "epoch": 0.42, "grad_norm": 1.213051438331604, "learning_rate": 6.507541415672007e-06, "loss": 0.6763, "step": 3282 }, { "epoch": 0.42, "grad_norm": 1.151252031326294, "learning_rate": 6.505562822163887e-06, "loss": 0.556, "step": 3283 }, { "epoch": 0.42, "grad_norm": 2.0518524646759033, "learning_rate": 6.503583969353173e-06, "loss": 0.7004, "step": 3284 }, { "epoch": 0.42, "grad_norm": 1.3543423414230347, "learning_rate": 6.501604857580681e-06, "loss": 0.6353, "step": 3285 }, { "epoch": 0.42, "grad_norm": 1.3768010139465332, "learning_rate": 6.499625487187276e-06, "loss": 0.5926, "step": 3286 }, { "epoch": 0.42, "grad_norm": 1.7199901342391968, "learning_rate": 6.497645858513858e-06, "loss": 0.631, "step": 3287 }, { "epoch": 0.42, "grad_norm": 1.2099894285202026, "learning_rate": 6.4956659719013835e-06, "loss": 0.5909, "step": 3288 }, { "epoch": 0.42, "grad_norm": 1.031480312347412, "learning_rate": 6.493685827690846e-06, "loss": 0.6396, "step": 3289 }, { "epoch": 0.42, "grad_norm": 1.2236201763153076, "learning_rate": 6.491705426223285e-06, "loss": 0.6766, "step": 3290 }, { "epoch": 0.42, "grad_norm": 1.6073354482650757, "learning_rate": 6.4897247678397845e-06, "loss": 0.6743, "step": 3291 }, { "epoch": 0.42, "grad_norm": 1.1248873472213745, "learning_rate": 6.487743852881472e-06, "loss": 0.5832, "step": 3292 }, { "epoch": 0.42, "grad_norm": 1.0797927379608154, "learning_rate": 6.485762681689521e-06, "loss": 0.6391, "step": 3293 }, { "epoch": 0.42, "grad_norm": 1.4567921161651611, "learning_rate": 6.483781254605146e-06, "loss": 0.6303, "step": 3294 }, { "epoch": 0.42, "grad_norm": 1.9019827842712402, "learning_rate": 6.48179957196961e-06, "loss": 0.5922, "step": 3295 }, { "epoch": 0.42, "grad_norm": 1.6069629192352295, "learning_rate": 6.479817634124216e-06, "loss": 0.6879, "step": 3296 }, { "epoch": 0.42, "grad_norm": 1.3667546510696411, "learning_rate": 6.477835441410311e-06, "loss": 0.65, "step": 3297 }, { "epoch": 0.42, "grad_norm": 1.415429711341858, "learning_rate": 6.475852994169294e-06, "loss": 0.6479, "step": 3298 }, { "epoch": 0.42, "grad_norm": 1.2002050876617432, "learning_rate": 6.473870292742592e-06, "loss": 0.5623, "step": 3299 }, { "epoch": 0.42, "grad_norm": 1.5991908311843872, "learning_rate": 6.471887337471693e-06, "loss": 0.6927, "step": 3300 }, { "epoch": 0.42, "grad_norm": 1.442886471748352, "learning_rate": 6.4699041286981155e-06, "loss": 0.6233, "step": 3301 }, { "epoch": 0.42, "grad_norm": 1.619641661643982, "learning_rate": 6.46792066676343e-06, "loss": 0.6882, "step": 3302 }, { "epoch": 0.42, "grad_norm": 1.233254313468933, "learning_rate": 6.465936952009245e-06, "loss": 0.6169, "step": 3303 }, { "epoch": 0.42, "grad_norm": 1.1515586376190186, "learning_rate": 6.463952984777218e-06, "loss": 0.5435, "step": 3304 }, { "epoch": 0.42, "grad_norm": 1.2402045726776123, "learning_rate": 6.461968765409041e-06, "loss": 0.7376, "step": 3305 }, { "epoch": 0.42, "grad_norm": 1.3234292268753052, "learning_rate": 6.4599842942464645e-06, "loss": 0.7769, "step": 3306 }, { "epoch": 0.42, "grad_norm": 1.9401755332946777, "learning_rate": 6.457999571631266e-06, "loss": 0.6768, "step": 3307 }, { "epoch": 0.42, "grad_norm": 2.493912696838379, "learning_rate": 6.456014597905278e-06, "loss": 0.5967, "step": 3308 }, { "epoch": 0.42, "grad_norm": 1.2056974172592163, "learning_rate": 6.454029373410369e-06, "loss": 0.5962, "step": 3309 }, { "epoch": 0.42, "grad_norm": 1.219796061515808, "learning_rate": 6.452043898488456e-06, "loss": 0.6247, "step": 3310 }, { "epoch": 0.42, "grad_norm": 1.3913002014160156, "learning_rate": 6.450058173481493e-06, "loss": 0.5459, "step": 3311 }, { "epoch": 0.42, "grad_norm": 1.591672658920288, "learning_rate": 6.448072198731485e-06, "loss": 0.5614, "step": 3312 }, { "epoch": 0.42, "grad_norm": 1.4829857349395752, "learning_rate": 6.4460859745804735e-06, "loss": 0.6168, "step": 3313 }, { "epoch": 0.42, "grad_norm": 1.0522713661193848, "learning_rate": 6.444099501370545e-06, "loss": 0.5752, "step": 3314 }, { "epoch": 0.42, "grad_norm": 1.390140175819397, "learning_rate": 6.442112779443832e-06, "loss": 0.6122, "step": 3315 }, { "epoch": 0.42, "grad_norm": 1.25529944896698, "learning_rate": 6.440125809142503e-06, "loss": 0.5556, "step": 3316 }, { "epoch": 0.42, "grad_norm": 1.3403065204620361, "learning_rate": 6.438138590808776e-06, "loss": 0.6308, "step": 3317 }, { "epoch": 0.43, "grad_norm": 1.25216543674469, "learning_rate": 6.436151124784906e-06, "loss": 0.6417, "step": 3318 }, { "epoch": 0.43, "grad_norm": 1.2277629375457764, "learning_rate": 6.434163411413197e-06, "loss": 0.6425, "step": 3319 }, { "epoch": 0.43, "grad_norm": 1.0705773830413818, "learning_rate": 6.432175451035991e-06, "loss": 0.5152, "step": 3320 }, { "epoch": 0.43, "grad_norm": 1.466159701347351, "learning_rate": 6.430187243995674e-06, "loss": 0.6207, "step": 3321 }, { "epoch": 0.43, "grad_norm": 1.1634601354599, "learning_rate": 6.428198790634672e-06, "loss": 0.5681, "step": 3322 }, { "epoch": 0.43, "grad_norm": 1.6939680576324463, "learning_rate": 6.4262100912954585e-06, "loss": 0.593, "step": 3323 }, { "epoch": 0.43, "grad_norm": 1.3650009632110596, "learning_rate": 6.424221146320547e-06, "loss": 0.6422, "step": 3324 }, { "epoch": 0.43, "grad_norm": 1.1898249387741089, "learning_rate": 6.422231956052489e-06, "loss": 0.6359, "step": 3325 }, { "epoch": 0.43, "grad_norm": 1.2429956197738647, "learning_rate": 6.420242520833886e-06, "loss": 0.6454, "step": 3326 }, { "epoch": 0.43, "grad_norm": 1.1976206302642822, "learning_rate": 6.418252841007376e-06, "loss": 0.5683, "step": 3327 }, { "epoch": 0.43, "grad_norm": 1.0774952173233032, "learning_rate": 6.416262916915642e-06, "loss": 0.6135, "step": 3328 }, { "epoch": 0.43, "grad_norm": 1.288520336151123, "learning_rate": 6.414272748901405e-06, "loss": 0.6605, "step": 3329 }, { "epoch": 0.43, "grad_norm": 2.0824525356292725, "learning_rate": 6.412282337307436e-06, "loss": 0.5897, "step": 3330 }, { "epoch": 0.43, "grad_norm": 1.8679497241973877, "learning_rate": 6.410291682476537e-06, "loss": 0.5329, "step": 3331 }, { "epoch": 0.43, "grad_norm": 1.217800498008728, "learning_rate": 6.4083007847515634e-06, "loss": 0.6684, "step": 3332 }, { "epoch": 0.43, "grad_norm": 1.3628228902816772, "learning_rate": 6.406309644475404e-06, "loss": 0.6208, "step": 3333 }, { "epoch": 0.43, "grad_norm": 1.4911750555038452, "learning_rate": 6.404318261990992e-06, "loss": 0.6059, "step": 3334 }, { "epoch": 0.43, "grad_norm": 1.5707592964172363, "learning_rate": 6.402326637641303e-06, "loss": 0.5948, "step": 3335 }, { "epoch": 0.43, "grad_norm": 1.3157973289489746, "learning_rate": 6.400334771769355e-06, "loss": 0.6137, "step": 3336 }, { "epoch": 0.43, "grad_norm": 1.2203538417816162, "learning_rate": 6.398342664718204e-06, "loss": 0.607, "step": 3337 }, { "epoch": 0.43, "grad_norm": 1.2806761264801025, "learning_rate": 6.396350316830954e-06, "loss": 0.5865, "step": 3338 }, { "epoch": 0.43, "grad_norm": 1.370572805404663, "learning_rate": 6.394357728450741e-06, "loss": 0.6144, "step": 3339 }, { "epoch": 0.43, "grad_norm": 1.2665327787399292, "learning_rate": 6.392364899920751e-06, "loss": 0.5818, "step": 3340 }, { "epoch": 0.43, "grad_norm": 1.244334101676941, "learning_rate": 6.39037183158421e-06, "loss": 0.6458, "step": 3341 }, { "epoch": 0.43, "grad_norm": 1.2308834791183472, "learning_rate": 6.388378523784379e-06, "loss": 0.7208, "step": 3342 }, { "epoch": 0.43, "grad_norm": 1.2577228546142578, "learning_rate": 6.386384976864569e-06, "loss": 0.7069, "step": 3343 }, { "epoch": 0.43, "grad_norm": 1.1200151443481445, "learning_rate": 6.384391191168124e-06, "loss": 0.6288, "step": 3344 }, { "epoch": 0.43, "grad_norm": 1.3991978168487549, "learning_rate": 6.382397167038438e-06, "loss": 0.616, "step": 3345 }, { "epoch": 0.43, "grad_norm": 1.689049482345581, "learning_rate": 6.3804029048189355e-06, "loss": 0.6469, "step": 3346 }, { "epoch": 0.43, "grad_norm": 1.1596959829330444, "learning_rate": 6.378408404853093e-06, "loss": 0.538, "step": 3347 }, { "epoch": 0.43, "grad_norm": 1.1679819822311401, "learning_rate": 6.376413667484417e-06, "loss": 0.7596, "step": 3348 }, { "epoch": 0.43, "grad_norm": 1.8966916799545288, "learning_rate": 6.374418693056464e-06, "loss": 0.6038, "step": 3349 }, { "epoch": 0.43, "grad_norm": 1.218559741973877, "learning_rate": 6.37242348191283e-06, "loss": 0.6026, "step": 3350 }, { "epoch": 0.43, "grad_norm": 1.2759473323822021, "learning_rate": 6.370428034397144e-06, "loss": 0.5726, "step": 3351 }, { "epoch": 0.43, "grad_norm": 1.0636835098266602, "learning_rate": 6.368432350853085e-06, "loss": 0.6214, "step": 3352 }, { "epoch": 0.43, "grad_norm": 1.555844783782959, "learning_rate": 6.366436431624368e-06, "loss": 0.622, "step": 3353 }, { "epoch": 0.43, "grad_norm": 1.8688308000564575, "learning_rate": 6.3644402770547496e-06, "loss": 0.6356, "step": 3354 }, { "epoch": 0.43, "grad_norm": 0.9006859660148621, "learning_rate": 6.3624438874880256e-06, "loss": 0.6228, "step": 3355 }, { "epoch": 0.43, "grad_norm": 1.367267370223999, "learning_rate": 6.360447263268037e-06, "loss": 0.5737, "step": 3356 }, { "epoch": 0.43, "grad_norm": 1.1289016008377075, "learning_rate": 6.358450404738656e-06, "loss": 0.6597, "step": 3357 }, { "epoch": 0.43, "grad_norm": 1.5956635475158691, "learning_rate": 6.356453312243807e-06, "loss": 0.5691, "step": 3358 }, { "epoch": 0.43, "grad_norm": 1.2158634662628174, "learning_rate": 6.354455986127445e-06, "loss": 0.5911, "step": 3359 }, { "epoch": 0.43, "grad_norm": 2.0845446586608887, "learning_rate": 6.352458426733571e-06, "loss": 0.5655, "step": 3360 }, { "epoch": 0.43, "grad_norm": 1.3381253480911255, "learning_rate": 6.3504606344062215e-06, "loss": 0.6276, "step": 3361 }, { "epoch": 0.43, "grad_norm": 1.382815957069397, "learning_rate": 6.348462609489477e-06, "loss": 0.6372, "step": 3362 }, { "epoch": 0.43, "grad_norm": 1.607489824295044, "learning_rate": 6.346464352327456e-06, "loss": 0.7139, "step": 3363 }, { "epoch": 0.43, "grad_norm": 1.351166009902954, "learning_rate": 6.3444658632643195e-06, "loss": 0.5938, "step": 3364 }, { "epoch": 0.43, "grad_norm": 2.418544292449951, "learning_rate": 6.342467142644264e-06, "loss": 0.5654, "step": 3365 }, { "epoch": 0.43, "grad_norm": 1.2091163396835327, "learning_rate": 6.340468190811531e-06, "loss": 0.6736, "step": 3366 }, { "epoch": 0.43, "grad_norm": 1.4336068630218506, "learning_rate": 6.338469008110399e-06, "loss": 0.693, "step": 3367 }, { "epoch": 0.43, "grad_norm": 1.4184355735778809, "learning_rate": 6.336469594885183e-06, "loss": 0.6291, "step": 3368 }, { "epoch": 0.43, "grad_norm": 1.5481655597686768, "learning_rate": 6.3344699514802465e-06, "loss": 0.6017, "step": 3369 }, { "epoch": 0.43, "grad_norm": 1.1255567073822021, "learning_rate": 6.332470078239983e-06, "loss": 0.6172, "step": 3370 }, { "epoch": 0.43, "grad_norm": 1.1182490587234497, "learning_rate": 6.330469975508834e-06, "loss": 0.584, "step": 3371 }, { "epoch": 0.43, "grad_norm": 1.1752885580062866, "learning_rate": 6.328469643631273e-06, "loss": 0.7249, "step": 3372 }, { "epoch": 0.43, "grad_norm": 1.3815187215805054, "learning_rate": 6.326469082951819e-06, "loss": 0.635, "step": 3373 }, { "epoch": 0.43, "grad_norm": 1.2596144676208496, "learning_rate": 6.3244682938150255e-06, "loss": 0.5281, "step": 3374 }, { "epoch": 0.43, "grad_norm": 1.3565584421157837, "learning_rate": 6.3224672765654905e-06, "loss": 0.6269, "step": 3375 }, { "epoch": 0.43, "grad_norm": 1.5856893062591553, "learning_rate": 6.320466031547847e-06, "loss": 0.6211, "step": 3376 }, { "epoch": 0.43, "grad_norm": 1.1341127157211304, "learning_rate": 6.31846455910677e-06, "loss": 0.5976, "step": 3377 }, { "epoch": 0.43, "grad_norm": 1.2035582065582275, "learning_rate": 6.316462859586971e-06, "loss": 0.5688, "step": 3378 }, { "epoch": 0.43, "grad_norm": 1.234596610069275, "learning_rate": 6.314460933333201e-06, "loss": 0.613, "step": 3379 }, { "epoch": 0.43, "grad_norm": 1.051673412322998, "learning_rate": 6.312458780690254e-06, "loss": 0.4786, "step": 3380 }, { "epoch": 0.43, "grad_norm": 1.180688500404358, "learning_rate": 6.310456402002958e-06, "loss": 0.6154, "step": 3381 }, { "epoch": 0.43, "grad_norm": 1.144311547279358, "learning_rate": 6.308453797616184e-06, "loss": 0.7581, "step": 3382 }, { "epoch": 0.43, "grad_norm": 1.3280012607574463, "learning_rate": 6.306450967874836e-06, "loss": 0.6299, "step": 3383 }, { "epoch": 0.43, "grad_norm": 1.2186527252197266, "learning_rate": 6.304447913123866e-06, "loss": 0.5792, "step": 3384 }, { "epoch": 0.43, "grad_norm": 1.3583048582077026, "learning_rate": 6.3024446337082555e-06, "loss": 0.6775, "step": 3385 }, { "epoch": 0.43, "grad_norm": 1.3401097059249878, "learning_rate": 6.300441129973032e-06, "loss": 0.6299, "step": 3386 }, { "epoch": 0.43, "grad_norm": 1.4296989440917969, "learning_rate": 6.298437402263254e-06, "loss": 0.6503, "step": 3387 }, { "epoch": 0.43, "grad_norm": 1.3577499389648438, "learning_rate": 6.296433450924027e-06, "loss": 0.6072, "step": 3388 }, { "epoch": 0.43, "grad_norm": 1.4429153203964233, "learning_rate": 6.2944292763004885e-06, "loss": 0.5407, "step": 3389 }, { "epoch": 0.43, "grad_norm": 1.1351635456085205, "learning_rate": 6.292424878737817e-06, "loss": 0.5831, "step": 3390 }, { "epoch": 0.43, "grad_norm": 1.355637550354004, "learning_rate": 6.290420258581229e-06, "loss": 0.6465, "step": 3391 }, { "epoch": 0.43, "grad_norm": 1.0525286197662354, "learning_rate": 6.288415416175981e-06, "loss": 0.5857, "step": 3392 }, { "epoch": 0.43, "grad_norm": 1.313124656677246, "learning_rate": 6.286410351867367e-06, "loss": 0.5992, "step": 3393 }, { "epoch": 0.43, "grad_norm": 2.2575294971466064, "learning_rate": 6.284405066000715e-06, "loss": 0.5893, "step": 3394 }, { "epoch": 0.43, "grad_norm": 1.1958906650543213, "learning_rate": 6.282399558921398e-06, "loss": 0.5755, "step": 3395 }, { "epoch": 0.44, "grad_norm": 1.1793793439865112, "learning_rate": 6.280393830974822e-06, "loss": 0.6098, "step": 3396 }, { "epoch": 0.44, "grad_norm": 1.2796311378479004, "learning_rate": 6.278387882506434e-06, "loss": 0.6819, "step": 3397 }, { "epoch": 0.44, "grad_norm": 1.8948205709457397, "learning_rate": 6.276381713861717e-06, "loss": 0.6131, "step": 3398 }, { "epoch": 0.44, "grad_norm": 1.889975905418396, "learning_rate": 6.274375325386195e-06, "loss": 0.6074, "step": 3399 }, { "epoch": 0.44, "grad_norm": 1.6620714664459229, "learning_rate": 6.272368717425423e-06, "loss": 0.5899, "step": 3400 }, { "epoch": 0.44, "grad_norm": 1.221790075302124, "learning_rate": 6.270361890325003e-06, "loss": 0.5863, "step": 3401 }, { "epoch": 0.44, "grad_norm": 1.1769517660140991, "learning_rate": 6.268354844430569e-06, "loss": 0.5553, "step": 3402 }, { "epoch": 0.44, "grad_norm": 1.1347368955612183, "learning_rate": 6.266347580087791e-06, "loss": 0.7537, "step": 3403 }, { "epoch": 0.44, "grad_norm": 1.2931528091430664, "learning_rate": 6.264340097642385e-06, "loss": 0.5663, "step": 3404 }, { "epoch": 0.44, "grad_norm": 1.5123640298843384, "learning_rate": 6.2623323974400944e-06, "loss": 0.571, "step": 3405 }, { "epoch": 0.44, "grad_norm": 1.1496232748031616, "learning_rate": 6.260324479826706e-06, "loss": 0.7423, "step": 3406 }, { "epoch": 0.44, "grad_norm": 1.2281783819198608, "learning_rate": 6.258316345148042e-06, "loss": 0.6239, "step": 3407 }, { "epoch": 0.44, "grad_norm": 1.229547142982483, "learning_rate": 6.256307993749965e-06, "loss": 0.5493, "step": 3408 }, { "epoch": 0.44, "grad_norm": 1.5989558696746826, "learning_rate": 6.254299425978371e-06, "loss": 0.5408, "step": 3409 }, { "epoch": 0.44, "grad_norm": 1.251488208770752, "learning_rate": 6.252290642179197e-06, "loss": 0.6038, "step": 3410 }, { "epoch": 0.44, "grad_norm": 3.296891450881958, "learning_rate": 6.25028164269841e-06, "loss": 0.6108, "step": 3411 }, { "epoch": 0.44, "grad_norm": 1.524884819984436, "learning_rate": 6.248272427882027e-06, "loss": 0.683, "step": 3412 }, { "epoch": 0.44, "grad_norm": 1.0750436782836914, "learning_rate": 6.246262998076088e-06, "loss": 0.69, "step": 3413 }, { "epoch": 0.44, "grad_norm": 1.3640230894088745, "learning_rate": 6.24425335362668e-06, "loss": 0.6142, "step": 3414 }, { "epoch": 0.44, "grad_norm": 1.7753424644470215, "learning_rate": 6.242243494879923e-06, "loss": 0.6733, "step": 3415 }, { "epoch": 0.44, "grad_norm": 1.3016899824142456, "learning_rate": 6.240233422181972e-06, "loss": 0.5632, "step": 3416 }, { "epoch": 0.44, "grad_norm": 1.370159387588501, "learning_rate": 6.2382231358790224e-06, "loss": 0.6562, "step": 3417 }, { "epoch": 0.44, "grad_norm": 1.9591825008392334, "learning_rate": 6.236212636317305e-06, "loss": 0.6253, "step": 3418 }, { "epoch": 0.44, "grad_norm": 1.2164580821990967, "learning_rate": 6.23420192384309e-06, "loss": 0.5545, "step": 3419 }, { "epoch": 0.44, "grad_norm": 1.5938736200332642, "learning_rate": 6.232190998802679e-06, "loss": 0.5702, "step": 3420 }, { "epoch": 0.44, "grad_norm": 1.2773017883300781, "learning_rate": 6.230179861542413e-06, "loss": 0.6788, "step": 3421 }, { "epoch": 0.44, "grad_norm": 1.3805594444274902, "learning_rate": 6.2281685124086714e-06, "loss": 0.5953, "step": 3422 }, { "epoch": 0.44, "grad_norm": 1.4324122667312622, "learning_rate": 6.226156951747866e-06, "loss": 0.5984, "step": 3423 }, { "epoch": 0.44, "grad_norm": 1.1922259330749512, "learning_rate": 6.224145179906448e-06, "loss": 0.6201, "step": 3424 }, { "epoch": 0.44, "grad_norm": 1.930484652519226, "learning_rate": 6.222133197230904e-06, "loss": 0.6431, "step": 3425 }, { "epoch": 0.44, "grad_norm": 1.209879755973816, "learning_rate": 6.22012100406776e-06, "loss": 0.5888, "step": 3426 }, { "epoch": 0.44, "grad_norm": 1.4153494834899902, "learning_rate": 6.21810860076357e-06, "loss": 0.6015, "step": 3427 }, { "epoch": 0.44, "grad_norm": 1.4506012201309204, "learning_rate": 6.216095987664935e-06, "loss": 0.6142, "step": 3428 }, { "epoch": 0.44, "grad_norm": 1.0460131168365479, "learning_rate": 6.214083165118483e-06, "loss": 0.5344, "step": 3429 }, { "epoch": 0.44, "grad_norm": 1.4760301113128662, "learning_rate": 6.212070133470884e-06, "loss": 0.6131, "step": 3430 }, { "epoch": 0.44, "grad_norm": 1.3164763450622559, "learning_rate": 6.210056893068839e-06, "loss": 0.607, "step": 3431 }, { "epoch": 0.44, "grad_norm": 1.4441686868667603, "learning_rate": 6.208043444259091e-06, "loss": 0.7008, "step": 3432 }, { "epoch": 0.44, "grad_norm": 1.0722826719284058, "learning_rate": 6.206029787388412e-06, "loss": 0.5562, "step": 3433 }, { "epoch": 0.44, "grad_norm": 1.1804120540618896, "learning_rate": 6.204015922803617e-06, "loss": 0.5505, "step": 3434 }, { "epoch": 0.44, "grad_norm": 1.25029718875885, "learning_rate": 6.202001850851551e-06, "loss": 0.6006, "step": 3435 }, { "epoch": 0.44, "grad_norm": 1.1206964254379272, "learning_rate": 6.199987571879101e-06, "loss": 0.7474, "step": 3436 }, { "epoch": 0.44, "grad_norm": 1.8449739217758179, "learning_rate": 6.197973086233179e-06, "loss": 0.651, "step": 3437 }, { "epoch": 0.44, "grad_norm": 1.151208758354187, "learning_rate": 6.195958394260744e-06, "loss": 0.5435, "step": 3438 }, { "epoch": 0.44, "grad_norm": 1.3795284032821655, "learning_rate": 6.1939434963087845e-06, "loss": 0.6664, "step": 3439 }, { "epoch": 0.44, "grad_norm": 1.3607579469680786, "learning_rate": 6.191928392724325e-06, "loss": 0.5944, "step": 3440 }, { "epoch": 0.44, "grad_norm": 1.5826529264450073, "learning_rate": 6.1899130838544275e-06, "loss": 0.645, "step": 3441 }, { "epoch": 0.44, "grad_norm": 1.0307071208953857, "learning_rate": 6.187897570046184e-06, "loss": 0.5332, "step": 3442 }, { "epoch": 0.44, "grad_norm": 1.3897781372070312, "learning_rate": 6.185881851646732e-06, "loss": 0.652, "step": 3443 }, { "epoch": 0.44, "grad_norm": 0.9002603888511658, "learning_rate": 6.1838659290032346e-06, "loss": 0.5409, "step": 3444 }, { "epoch": 0.44, "grad_norm": 1.2274670600891113, "learning_rate": 6.181849802462895e-06, "loss": 0.5836, "step": 3445 }, { "epoch": 0.44, "grad_norm": 1.3531590700149536, "learning_rate": 6.179833472372947e-06, "loss": 0.598, "step": 3446 }, { "epoch": 0.44, "grad_norm": 1.2528719902038574, "learning_rate": 6.177816939080666e-06, "loss": 0.5959, "step": 3447 }, { "epoch": 0.44, "grad_norm": 1.0928164720535278, "learning_rate": 6.175800202933355e-06, "loss": 0.5685, "step": 3448 }, { "epoch": 0.44, "grad_norm": 1.4518722295761108, "learning_rate": 6.17378326427836e-06, "loss": 0.6363, "step": 3449 }, { "epoch": 0.44, "grad_norm": 1.127073884010315, "learning_rate": 6.171766123463054e-06, "loss": 0.79, "step": 3450 }, { "epoch": 0.44, "grad_norm": 1.7623597383499146, "learning_rate": 6.1697487808348525e-06, "loss": 0.6219, "step": 3451 }, { "epoch": 0.44, "grad_norm": 1.348456621170044, "learning_rate": 6.167731236741199e-06, "loss": 0.5674, "step": 3452 }, { "epoch": 0.44, "grad_norm": 1.6265017986297607, "learning_rate": 6.1657134915295735e-06, "loss": 0.5659, "step": 3453 }, { "epoch": 0.44, "grad_norm": 1.057990550994873, "learning_rate": 6.163695545547494e-06, "loss": 0.6351, "step": 3454 }, { "epoch": 0.44, "grad_norm": 3.3038055896759033, "learning_rate": 6.16167739914251e-06, "loss": 0.6228, "step": 3455 }, { "epoch": 0.44, "grad_norm": 1.0282056331634521, "learning_rate": 6.159659052662207e-06, "loss": 0.5298, "step": 3456 }, { "epoch": 0.44, "grad_norm": 1.179836630821228, "learning_rate": 6.157640506454201e-06, "loss": 0.5805, "step": 3457 }, { "epoch": 0.44, "grad_norm": 1.278550148010254, "learning_rate": 6.15562176086615e-06, "loss": 0.6652, "step": 3458 }, { "epoch": 0.44, "grad_norm": 1.3121113777160645, "learning_rate": 6.153602816245738e-06, "loss": 0.6242, "step": 3459 }, { "epoch": 0.44, "grad_norm": 1.404284119606018, "learning_rate": 6.1515836729406874e-06, "loss": 0.6148, "step": 3460 }, { "epoch": 0.44, "grad_norm": 1.0569359064102173, "learning_rate": 6.149564331298758e-06, "loss": 0.605, "step": 3461 }, { "epoch": 0.44, "grad_norm": 1.422186017036438, "learning_rate": 6.147544791667738e-06, "loss": 0.6296, "step": 3462 }, { "epoch": 0.44, "grad_norm": 1.2817769050598145, "learning_rate": 6.145525054395453e-06, "loss": 0.6151, "step": 3463 }, { "epoch": 0.44, "grad_norm": 3.4974191188812256, "learning_rate": 6.143505119829759e-06, "loss": 0.5476, "step": 3464 }, { "epoch": 0.44, "grad_norm": 1.151680827140808, "learning_rate": 6.141484988318554e-06, "loss": 0.588, "step": 3465 }, { "epoch": 0.44, "grad_norm": 1.0980405807495117, "learning_rate": 6.139464660209757e-06, "loss": 0.5949, "step": 3466 }, { "epoch": 0.44, "grad_norm": 2.5018017292022705, "learning_rate": 6.137444135851336e-06, "loss": 0.5604, "step": 3467 }, { "epoch": 0.44, "grad_norm": 1.1532684564590454, "learning_rate": 6.1354234155912795e-06, "loss": 0.6558, "step": 3468 }, { "epoch": 0.44, "grad_norm": 1.3535691499710083, "learning_rate": 6.133402499777621e-06, "loss": 0.6153, "step": 3469 }, { "epoch": 0.44, "grad_norm": 1.7174805402755737, "learning_rate": 6.131381388758417e-06, "loss": 0.5909, "step": 3470 }, { "epoch": 0.44, "grad_norm": 1.1755709648132324, "learning_rate": 6.1293600828817655e-06, "loss": 0.5663, "step": 3471 }, { "epoch": 0.44, "grad_norm": 1.8840426206588745, "learning_rate": 6.127338582495796e-06, "loss": 0.6317, "step": 3472 }, { "epoch": 0.44, "grad_norm": 1.3639739751815796, "learning_rate": 6.125316887948668e-06, "loss": 0.6246, "step": 3473 }, { "epoch": 0.45, "grad_norm": 1.087632179260254, "learning_rate": 6.123294999588579e-06, "loss": 0.5259, "step": 3474 }, { "epoch": 0.45, "grad_norm": 1.6002978086471558, "learning_rate": 6.1212729177637595e-06, "loss": 0.5469, "step": 3475 }, { "epoch": 0.45, "grad_norm": 1.169373869895935, "learning_rate": 6.119250642822469e-06, "loss": 0.646, "step": 3476 }, { "epoch": 0.45, "grad_norm": 2.6361258029937744, "learning_rate": 6.117228175113006e-06, "loss": 0.6835, "step": 3477 }, { "epoch": 0.45, "grad_norm": 1.1000622510910034, "learning_rate": 6.115205514983699e-06, "loss": 0.621, "step": 3478 }, { "epoch": 0.45, "grad_norm": 1.3290541172027588, "learning_rate": 6.1131826627829085e-06, "loss": 0.6461, "step": 3479 }, { "epoch": 0.45, "grad_norm": 1.3654758930206299, "learning_rate": 6.11115961885903e-06, "loss": 0.6303, "step": 3480 }, { "epoch": 0.45, "grad_norm": 1.3180681467056274, "learning_rate": 6.109136383560494e-06, "loss": 0.6891, "step": 3481 }, { "epoch": 0.45, "grad_norm": 1.4984737634658813, "learning_rate": 6.107112957235759e-06, "loss": 0.6644, "step": 3482 }, { "epoch": 0.45, "grad_norm": 1.4471309185028076, "learning_rate": 6.10508934023332e-06, "loss": 0.5985, "step": 3483 }, { "epoch": 0.45, "grad_norm": 1.3030848503112793, "learning_rate": 6.103065532901704e-06, "loss": 0.7308, "step": 3484 }, { "epoch": 0.45, "grad_norm": 1.357759714126587, "learning_rate": 6.101041535589469e-06, "loss": 0.6637, "step": 3485 }, { "epoch": 0.45, "grad_norm": 1.5871968269348145, "learning_rate": 6.099017348645212e-06, "loss": 0.6919, "step": 3486 }, { "epoch": 0.45, "grad_norm": 1.0771772861480713, "learning_rate": 6.096992972417552e-06, "loss": 0.6352, "step": 3487 }, { "epoch": 0.45, "grad_norm": 1.3609187602996826, "learning_rate": 6.094968407255153e-06, "loss": 0.6257, "step": 3488 }, { "epoch": 0.45, "grad_norm": 1.1798498630523682, "learning_rate": 6.092943653506701e-06, "loss": 0.59, "step": 3489 }, { "epoch": 0.45, "grad_norm": 1.488000512123108, "learning_rate": 6.090918711520918e-06, "loss": 0.6466, "step": 3490 }, { "epoch": 0.45, "grad_norm": 1.2035020589828491, "learning_rate": 6.088893581646562e-06, "loss": 0.5723, "step": 3491 }, { "epoch": 0.45, "grad_norm": 1.004976749420166, "learning_rate": 6.086868264232418e-06, "loss": 0.6055, "step": 3492 }, { "epoch": 0.45, "grad_norm": 2.643667697906494, "learning_rate": 6.084842759627309e-06, "loss": 0.5617, "step": 3493 }, { "epoch": 0.45, "grad_norm": 2.7203829288482666, "learning_rate": 6.082817068180081e-06, "loss": 0.6339, "step": 3494 }, { "epoch": 0.45, "grad_norm": 1.1511204242706299, "learning_rate": 6.0807911902396255e-06, "loss": 0.6117, "step": 3495 }, { "epoch": 0.45, "grad_norm": 1.4033151865005493, "learning_rate": 6.078765126154854e-06, "loss": 0.5826, "step": 3496 }, { "epoch": 0.45, "grad_norm": 1.21249520778656, "learning_rate": 6.076738876274718e-06, "loss": 0.6407, "step": 3497 }, { "epoch": 0.45, "grad_norm": 1.0692471265792847, "learning_rate": 6.074712440948194e-06, "loss": 0.586, "step": 3498 }, { "epoch": 0.45, "grad_norm": 1.459241509437561, "learning_rate": 6.0726858205242974e-06, "loss": 0.581, "step": 3499 }, { "epoch": 0.45, "grad_norm": 1.218548059463501, "learning_rate": 6.070659015352072e-06, "loss": 0.5934, "step": 3500 }, { "epoch": 0.45, "grad_norm": 1.2451094388961792, "learning_rate": 6.068632025780592e-06, "loss": 0.6442, "step": 3501 }, { "epoch": 0.45, "grad_norm": 1.7080090045928955, "learning_rate": 6.066604852158966e-06, "loss": 0.6385, "step": 3502 }, { "epoch": 0.45, "grad_norm": 1.4963022470474243, "learning_rate": 6.064577494836334e-06, "loss": 0.5995, "step": 3503 }, { "epoch": 0.45, "grad_norm": 1.1907035112380981, "learning_rate": 6.0625499541618686e-06, "loss": 0.6659, "step": 3504 }, { "epoch": 0.45, "grad_norm": 0.9348851442337036, "learning_rate": 6.060522230484769e-06, "loss": 0.5889, "step": 3505 }, { "epoch": 0.45, "grad_norm": 1.1017441749572754, "learning_rate": 6.058494324154272e-06, "loss": 0.6732, "step": 3506 }, { "epoch": 0.45, "grad_norm": 1.096999168395996, "learning_rate": 6.056466235519641e-06, "loss": 0.5974, "step": 3507 }, { "epoch": 0.45, "grad_norm": 1.2315013408660889, "learning_rate": 6.054437964930175e-06, "loss": 0.6482, "step": 3508 }, { "epoch": 0.45, "grad_norm": 1.2487025260925293, "learning_rate": 6.052409512735202e-06, "loss": 0.6511, "step": 3509 }, { "epoch": 0.45, "grad_norm": 1.2520768642425537, "learning_rate": 6.0503808792840825e-06, "loss": 0.6488, "step": 3510 }, { "epoch": 0.45, "grad_norm": 1.1143486499786377, "learning_rate": 6.048352064926204e-06, "loss": 0.6537, "step": 3511 }, { "epoch": 0.45, "grad_norm": 1.371921181678772, "learning_rate": 6.046323070010993e-06, "loss": 0.5731, "step": 3512 }, { "epoch": 0.45, "grad_norm": 2.219484806060791, "learning_rate": 6.0442938948879006e-06, "loss": 0.5992, "step": 3513 }, { "epoch": 0.45, "grad_norm": 1.838356614112854, "learning_rate": 6.04226453990641e-06, "loss": 0.591, "step": 3514 }, { "epoch": 0.45, "grad_norm": 1.2704648971557617, "learning_rate": 6.040235005416037e-06, "loss": 0.6266, "step": 3515 }, { "epoch": 0.45, "grad_norm": 1.7678539752960205, "learning_rate": 6.038205291766328e-06, "loss": 0.6643, "step": 3516 }, { "epoch": 0.45, "grad_norm": 1.7963787317276, "learning_rate": 6.036175399306861e-06, "loss": 0.628, "step": 3517 }, { "epoch": 0.45, "grad_norm": 1.342311143875122, "learning_rate": 6.034145328387239e-06, "loss": 0.6382, "step": 3518 }, { "epoch": 0.45, "grad_norm": 1.220090627670288, "learning_rate": 6.032115079357108e-06, "loss": 0.5598, "step": 3519 }, { "epoch": 0.45, "grad_norm": 1.0853689908981323, "learning_rate": 6.0300846525661305e-06, "loss": 0.5895, "step": 3520 }, { "epoch": 0.45, "grad_norm": 1.2728458642959595, "learning_rate": 6.028054048364011e-06, "loss": 0.645, "step": 3521 }, { "epoch": 0.45, "grad_norm": 1.1695315837860107, "learning_rate": 6.026023267100474e-06, "loss": 0.5413, "step": 3522 }, { "epoch": 0.45, "grad_norm": 1.3852046728134155, "learning_rate": 6.023992309125288e-06, "loss": 0.6413, "step": 3523 }, { "epoch": 0.45, "grad_norm": 5.460791110992432, "learning_rate": 6.021961174788239e-06, "loss": 0.6022, "step": 3524 }, { "epoch": 0.45, "grad_norm": 1.5186138153076172, "learning_rate": 6.019929864439149e-06, "loss": 0.6042, "step": 3525 }, { "epoch": 0.45, "grad_norm": 1.498050332069397, "learning_rate": 6.017898378427871e-06, "loss": 0.7024, "step": 3526 }, { "epoch": 0.45, "grad_norm": 1.3453694581985474, "learning_rate": 6.015866717104288e-06, "loss": 0.6183, "step": 3527 }, { "epoch": 0.45, "grad_norm": 1.3376102447509766, "learning_rate": 6.01383488081831e-06, "loss": 0.6636, "step": 3528 }, { "epoch": 0.45, "grad_norm": 1.0858659744262695, "learning_rate": 6.01180286991988e-06, "loss": 0.7165, "step": 3529 }, { "epoch": 0.45, "grad_norm": 1.561975121498108, "learning_rate": 6.009770684758973e-06, "loss": 0.6641, "step": 3530 }, { "epoch": 0.45, "grad_norm": 1.422297716140747, "learning_rate": 6.007738325685588e-06, "loss": 0.6278, "step": 3531 }, { "epoch": 0.45, "grad_norm": 1.3532339334487915, "learning_rate": 6.005705793049762e-06, "loss": 0.5749, "step": 3532 }, { "epoch": 0.45, "grad_norm": 1.1300181150436401, "learning_rate": 6.0036730872015524e-06, "loss": 0.6098, "step": 3533 }, { "epoch": 0.45, "grad_norm": 1.3219990730285645, "learning_rate": 6.001640208491056e-06, "loss": 0.5893, "step": 3534 }, { "epoch": 0.45, "grad_norm": 2.5730981826782227, "learning_rate": 5.9996071572683914e-06, "loss": 0.553, "step": 3535 }, { "epoch": 0.45, "grad_norm": 1.663468837738037, "learning_rate": 5.997573933883714e-06, "loss": 0.5648, "step": 3536 }, { "epoch": 0.45, "grad_norm": 1.451084017753601, "learning_rate": 5.995540538687199e-06, "loss": 0.6901, "step": 3537 }, { "epoch": 0.45, "grad_norm": 3.200803518295288, "learning_rate": 5.993506972029064e-06, "loss": 0.595, "step": 3538 }, { "epoch": 0.45, "grad_norm": 1.3902348279953003, "learning_rate": 5.991473234259546e-06, "loss": 0.6697, "step": 3539 }, { "epoch": 0.45, "grad_norm": 2.0828068256378174, "learning_rate": 5.989439325728916e-06, "loss": 0.6027, "step": 3540 }, { "epoch": 0.45, "grad_norm": 1.1694917678833008, "learning_rate": 5.987405246787474e-06, "loss": 0.5812, "step": 3541 }, { "epoch": 0.45, "grad_norm": 1.7300596237182617, "learning_rate": 5.985370997785547e-06, "loss": 0.5532, "step": 3542 }, { "epoch": 0.45, "grad_norm": 1.1379985809326172, "learning_rate": 5.983336579073495e-06, "loss": 0.6126, "step": 3543 }, { "epoch": 0.45, "grad_norm": 1.147905945777893, "learning_rate": 5.981301991001703e-06, "loss": 0.5544, "step": 3544 }, { "epoch": 0.45, "grad_norm": 1.108622431755066, "learning_rate": 5.979267233920589e-06, "loss": 0.5836, "step": 3545 }, { "epoch": 0.45, "grad_norm": 1.3943743705749512, "learning_rate": 5.9772323081805985e-06, "loss": 0.6146, "step": 3546 }, { "epoch": 0.45, "grad_norm": 1.359615683555603, "learning_rate": 5.975197214132207e-06, "loss": 0.6164, "step": 3547 }, { "epoch": 0.45, "grad_norm": 1.324974775314331, "learning_rate": 5.9731619521259156e-06, "loss": 0.6021, "step": 3548 }, { "epoch": 0.45, "grad_norm": 1.6578129529953003, "learning_rate": 5.97112652251226e-06, "loss": 0.5934, "step": 3549 }, { "epoch": 0.45, "grad_norm": 1.3419653177261353, "learning_rate": 5.969090925641799e-06, "loss": 0.7274, "step": 3550 }, { "epoch": 0.45, "grad_norm": 1.1800451278686523, "learning_rate": 5.9670551618651225e-06, "loss": 0.7183, "step": 3551 }, { "epoch": 0.46, "grad_norm": 1.0364329814910889, "learning_rate": 5.96501923153285e-06, "loss": 0.5909, "step": 3552 }, { "epoch": 0.46, "grad_norm": 1.199339509010315, "learning_rate": 5.9629831349956325e-06, "loss": 0.5893, "step": 3553 }, { "epoch": 0.46, "grad_norm": 1.2047299146652222, "learning_rate": 5.960946872604141e-06, "loss": 0.6378, "step": 3554 }, { "epoch": 0.46, "grad_norm": 1.2108663320541382, "learning_rate": 5.958910444709083e-06, "loss": 0.5957, "step": 3555 }, { "epoch": 0.46, "grad_norm": 1.2545571327209473, "learning_rate": 5.956873851661192e-06, "loss": 0.5564, "step": 3556 }, { "epoch": 0.46, "grad_norm": 1.1796643733978271, "learning_rate": 5.954837093811229e-06, "loss": 0.6311, "step": 3557 }, { "epoch": 0.46, "grad_norm": 1.4320603609085083, "learning_rate": 5.952800171509985e-06, "loss": 0.6326, "step": 3558 }, { "epoch": 0.46, "grad_norm": 1.1937462091445923, "learning_rate": 5.950763085108277e-06, "loss": 0.6688, "step": 3559 }, { "epoch": 0.46, "grad_norm": 1.2120587825775146, "learning_rate": 5.948725834956952e-06, "loss": 0.6689, "step": 3560 }, { "epoch": 0.46, "grad_norm": 1.330200433731079, "learning_rate": 5.946688421406886e-06, "loss": 0.5936, "step": 3561 }, { "epoch": 0.46, "grad_norm": 1.3225212097167969, "learning_rate": 5.9446508448089825e-06, "loss": 0.6483, "step": 3562 }, { "epoch": 0.46, "grad_norm": 1.0410070419311523, "learning_rate": 5.942613105514171e-06, "loss": 0.5963, "step": 3563 }, { "epoch": 0.46, "grad_norm": 1.1120356321334839, "learning_rate": 5.940575203873411e-06, "loss": 0.6553, "step": 3564 }, { "epoch": 0.46, "grad_norm": 1.1769014596939087, "learning_rate": 5.9385371402376914e-06, "loss": 0.6784, "step": 3565 }, { "epoch": 0.46, "grad_norm": 2.005772590637207, "learning_rate": 5.936498914958025e-06, "loss": 0.6325, "step": 3566 }, { "epoch": 0.46, "grad_norm": 1.3367499113082886, "learning_rate": 5.934460528385456e-06, "loss": 0.6275, "step": 3567 }, { "epoch": 0.46, "grad_norm": 1.1705892086029053, "learning_rate": 5.932421980871054e-06, "loss": 0.5585, "step": 3568 }, { "epoch": 0.46, "grad_norm": 1.4280080795288086, "learning_rate": 5.930383272765918e-06, "loss": 0.5989, "step": 3569 }, { "epoch": 0.46, "grad_norm": 1.7109063863754272, "learning_rate": 5.928344404421174e-06, "loss": 0.5889, "step": 3570 }, { "epoch": 0.46, "grad_norm": 1.2693599462509155, "learning_rate": 5.926305376187976e-06, "loss": 0.6736, "step": 3571 }, { "epoch": 0.46, "grad_norm": 1.3794386386871338, "learning_rate": 5.924266188417503e-06, "loss": 0.5768, "step": 3572 }, { "epoch": 0.46, "grad_norm": 1.2329838275909424, "learning_rate": 5.922226841460968e-06, "loss": 0.6015, "step": 3573 }, { "epoch": 0.46, "grad_norm": 1.6483304500579834, "learning_rate": 5.920187335669602e-06, "loss": 0.6534, "step": 3574 }, { "epoch": 0.46, "grad_norm": 1.2386940717697144, "learning_rate": 5.918147671394674e-06, "loss": 0.5888, "step": 3575 }, { "epoch": 0.46, "grad_norm": 1.224981427192688, "learning_rate": 5.91610784898747e-06, "loss": 0.525, "step": 3576 }, { "epoch": 0.46, "grad_norm": 1.332261085510254, "learning_rate": 5.91406786879931e-06, "loss": 0.5301, "step": 3577 }, { "epoch": 0.46, "grad_norm": 1.2706077098846436, "learning_rate": 5.912027731181539e-06, "loss": 0.5354, "step": 3578 }, { "epoch": 0.46, "grad_norm": 1.3086779117584229, "learning_rate": 5.909987436485527e-06, "loss": 0.5638, "step": 3579 }, { "epoch": 0.46, "grad_norm": 2.0201406478881836, "learning_rate": 5.907946985062678e-06, "loss": 0.6415, "step": 3580 }, { "epoch": 0.46, "grad_norm": 1.47654390335083, "learning_rate": 5.905906377264413e-06, "loss": 0.6439, "step": 3581 }, { "epoch": 0.46, "grad_norm": 1.4309604167938232, "learning_rate": 5.90386561344219e-06, "loss": 0.6235, "step": 3582 }, { "epoch": 0.46, "grad_norm": 1.3017194271087646, "learning_rate": 5.901824693947486e-06, "loss": 0.6122, "step": 3583 }, { "epoch": 0.46, "grad_norm": 1.1456342935562134, "learning_rate": 5.899783619131809e-06, "loss": 0.5598, "step": 3584 }, { "epoch": 0.46, "grad_norm": 2.045018196105957, "learning_rate": 5.897742389346691e-06, "loss": 0.6183, "step": 3585 }, { "epoch": 0.46, "grad_norm": 1.2723512649536133, "learning_rate": 5.8957010049436955e-06, "loss": 0.5801, "step": 3586 }, { "epoch": 0.46, "grad_norm": 1.087829828262329, "learning_rate": 5.893659466274407e-06, "loss": 0.5959, "step": 3587 }, { "epoch": 0.46, "grad_norm": 1.7654427289962769, "learning_rate": 5.891617773690439e-06, "loss": 0.6837, "step": 3588 }, { "epoch": 0.46, "grad_norm": 1.581198811531067, "learning_rate": 5.8895759275434326e-06, "loss": 0.5811, "step": 3589 }, { "epoch": 0.46, "grad_norm": 1.239762544631958, "learning_rate": 5.887533928185054e-06, "loss": 0.5911, "step": 3590 }, { "epoch": 0.46, "grad_norm": 1.2970668077468872, "learning_rate": 5.8854917759669975e-06, "loss": 0.6369, "step": 3591 }, { "epoch": 0.46, "grad_norm": 1.130383014678955, "learning_rate": 5.88344947124098e-06, "loss": 0.576, "step": 3592 }, { "epoch": 0.46, "grad_norm": 1.180657148361206, "learning_rate": 5.881407014358748e-06, "loss": 0.5828, "step": 3593 }, { "epoch": 0.46, "grad_norm": 1.4534460306167603, "learning_rate": 5.879364405672072e-06, "loss": 0.6106, "step": 3594 }, { "epoch": 0.46, "grad_norm": 1.3066177368164062, "learning_rate": 5.877321645532752e-06, "loss": 0.6507, "step": 3595 }, { "epoch": 0.46, "grad_norm": 1.6938061714172363, "learning_rate": 5.87527873429261e-06, "loss": 0.6446, "step": 3596 }, { "epoch": 0.46, "grad_norm": 2.4884603023529053, "learning_rate": 5.873235672303497e-06, "loss": 0.6266, "step": 3597 }, { "epoch": 0.46, "grad_norm": 1.3594398498535156, "learning_rate": 5.871192459917287e-06, "loss": 0.5882, "step": 3598 }, { "epoch": 0.46, "grad_norm": 1.6385754346847534, "learning_rate": 5.869149097485887e-06, "loss": 0.7136, "step": 3599 }, { "epoch": 0.46, "grad_norm": 1.3254238367080688, "learning_rate": 5.867105585361218e-06, "loss": 0.5929, "step": 3600 }, { "epoch": 0.46, "grad_norm": 1.1824506521224976, "learning_rate": 5.865061923895238e-06, "loss": 0.6646, "step": 3601 }, { "epoch": 0.46, "grad_norm": 1.4841578006744385, "learning_rate": 5.863018113439925e-06, "loss": 0.6157, "step": 3602 }, { "epoch": 0.46, "grad_norm": 1.1699411869049072, "learning_rate": 5.860974154347282e-06, "loss": 0.5475, "step": 3603 }, { "epoch": 0.46, "grad_norm": 1.5281282663345337, "learning_rate": 5.858930046969341e-06, "loss": 0.6103, "step": 3604 }, { "epoch": 0.46, "grad_norm": 1.1899720430374146, "learning_rate": 5.856885791658158e-06, "loss": 0.6138, "step": 3605 }, { "epoch": 0.46, "grad_norm": 1.1821370124816895, "learning_rate": 5.854841388765816e-06, "loss": 0.583, "step": 3606 }, { "epoch": 0.46, "grad_norm": 2.455402135848999, "learning_rate": 5.8527968386444174e-06, "loss": 0.6418, "step": 3607 }, { "epoch": 0.46, "grad_norm": 1.7864829301834106, "learning_rate": 5.8507521416461e-06, "loss": 0.5578, "step": 3608 }, { "epoch": 0.46, "grad_norm": 1.501908540725708, "learning_rate": 5.848707298123017e-06, "loss": 0.6361, "step": 3609 }, { "epoch": 0.46, "grad_norm": 1.2245491743087769, "learning_rate": 5.846662308427352e-06, "loss": 0.6692, "step": 3610 }, { "epoch": 0.46, "grad_norm": 1.385204792022705, "learning_rate": 5.844617172911313e-06, "loss": 0.639, "step": 3611 }, { "epoch": 0.46, "grad_norm": 1.3009589910507202, "learning_rate": 5.8425718919271356e-06, "loss": 0.6582, "step": 3612 }, { "epoch": 0.46, "grad_norm": 1.1194837093353271, "learning_rate": 5.8405264658270735e-06, "loss": 0.5595, "step": 3613 }, { "epoch": 0.46, "grad_norm": 1.1085587739944458, "learning_rate": 5.838480894963412e-06, "loss": 0.5775, "step": 3614 }, { "epoch": 0.46, "grad_norm": 1.2732248306274414, "learning_rate": 5.836435179688461e-06, "loss": 0.5688, "step": 3615 }, { "epoch": 0.46, "grad_norm": 1.1878724098205566, "learning_rate": 5.83438932035455e-06, "loss": 0.6376, "step": 3616 }, { "epoch": 0.46, "grad_norm": 1.7219138145446777, "learning_rate": 5.8323433173140395e-06, "loss": 0.6421, "step": 3617 }, { "epoch": 0.46, "grad_norm": 1.5152086019515991, "learning_rate": 5.830297170919309e-06, "loss": 0.6828, "step": 3618 }, { "epoch": 0.46, "grad_norm": 1.371113657951355, "learning_rate": 5.828250881522769e-06, "loss": 0.6614, "step": 3619 }, { "epoch": 0.46, "grad_norm": 1.0953738689422607, "learning_rate": 5.826204449476848e-06, "loss": 0.6094, "step": 3620 }, { "epoch": 0.46, "grad_norm": 1.253603219985962, "learning_rate": 5.824157875134005e-06, "loss": 0.6148, "step": 3621 }, { "epoch": 0.46, "grad_norm": 1.2168421745300293, "learning_rate": 5.822111158846718e-06, "loss": 0.6631, "step": 3622 }, { "epoch": 0.46, "grad_norm": 1.5407350063323975, "learning_rate": 5.820064300967494e-06, "loss": 0.6399, "step": 3623 }, { "epoch": 0.46, "grad_norm": 1.245928168296814, "learning_rate": 5.8180173018488625e-06, "loss": 0.4972, "step": 3624 }, { "epoch": 0.46, "grad_norm": 1.1214627027511597, "learning_rate": 5.815970161843379e-06, "loss": 0.5604, "step": 3625 }, { "epoch": 0.46, "grad_norm": 1.016115427017212, "learning_rate": 5.81392288130362e-06, "loss": 0.6543, "step": 3626 }, { "epoch": 0.46, "grad_norm": 1.1197785139083862, "learning_rate": 5.811875460582189e-06, "loss": 0.6655, "step": 3627 }, { "epoch": 0.46, "grad_norm": 1.4029167890548706, "learning_rate": 5.80982790003171e-06, "loss": 0.6326, "step": 3628 }, { "epoch": 0.46, "grad_norm": 1.0858590602874756, "learning_rate": 5.807780200004838e-06, "loss": 0.6388, "step": 3629 }, { "epoch": 0.47, "grad_norm": 1.1979694366455078, "learning_rate": 5.805732360854243e-06, "loss": 0.5826, "step": 3630 }, { "epoch": 0.47, "grad_norm": 2.004387617111206, "learning_rate": 5.803684382932626e-06, "loss": 0.6769, "step": 3631 }, { "epoch": 0.47, "grad_norm": 1.0634325742721558, "learning_rate": 5.801636266592712e-06, "loss": 0.629, "step": 3632 }, { "epoch": 0.47, "grad_norm": 1.3099150657653809, "learning_rate": 5.799588012187243e-06, "loss": 0.6693, "step": 3633 }, { "epoch": 0.47, "grad_norm": 1.3465301990509033, "learning_rate": 5.797539620068992e-06, "loss": 0.6112, "step": 3634 }, { "epoch": 0.47, "grad_norm": 1.356348991394043, "learning_rate": 5.79549109059075e-06, "loss": 0.6714, "step": 3635 }, { "epoch": 0.47, "grad_norm": 1.0583562850952148, "learning_rate": 5.793442424105339e-06, "loss": 0.5755, "step": 3636 }, { "epoch": 0.47, "grad_norm": 1.416865348815918, "learning_rate": 5.791393620965597e-06, "loss": 0.6358, "step": 3637 }, { "epoch": 0.47, "grad_norm": 1.24928617477417, "learning_rate": 5.789344681524389e-06, "loss": 0.6334, "step": 3638 }, { "epoch": 0.47, "grad_norm": 1.223817229270935, "learning_rate": 5.7872956061346045e-06, "loss": 0.6467, "step": 3639 }, { "epoch": 0.47, "grad_norm": 0.9584287405014038, "learning_rate": 5.785246395149152e-06, "loss": 0.5455, "step": 3640 }, { "epoch": 0.47, "grad_norm": 0.9955974221229553, "learning_rate": 5.783197048920972e-06, "loss": 0.6464, "step": 3641 }, { "epoch": 0.47, "grad_norm": 1.016220211982727, "learning_rate": 5.781147567803017e-06, "loss": 0.5736, "step": 3642 }, { "epoch": 0.47, "grad_norm": 1.1931318044662476, "learning_rate": 5.779097952148273e-06, "loss": 0.6193, "step": 3643 }, { "epoch": 0.47, "grad_norm": 1.3940238952636719, "learning_rate": 5.77704820230974e-06, "loss": 0.691, "step": 3644 }, { "epoch": 0.47, "grad_norm": 1.2070850133895874, "learning_rate": 5.77499831864045e-06, "loss": 0.6053, "step": 3645 }, { "epoch": 0.47, "grad_norm": 1.3859649896621704, "learning_rate": 5.772948301493451e-06, "loss": 0.6045, "step": 3646 }, { "epoch": 0.47, "grad_norm": 1.1878433227539062, "learning_rate": 5.770898151221819e-06, "loss": 0.5722, "step": 3647 }, { "epoch": 0.47, "grad_norm": 1.1642296314239502, "learning_rate": 5.768847868178646e-06, "loss": 0.6328, "step": 3648 }, { "epoch": 0.47, "grad_norm": 8.618324279785156, "learning_rate": 5.766797452717059e-06, "loss": 0.6084, "step": 3649 }, { "epoch": 0.47, "grad_norm": 1.2117552757263184, "learning_rate": 5.764746905190195e-06, "loss": 0.5757, "step": 3650 }, { "epoch": 0.47, "grad_norm": 1.7341634035110474, "learning_rate": 5.762696225951222e-06, "loss": 0.6151, "step": 3651 }, { "epoch": 0.47, "grad_norm": 1.1232070922851562, "learning_rate": 5.760645415353325e-06, "loss": 0.5258, "step": 3652 }, { "epoch": 0.47, "grad_norm": 1.6535828113555908, "learning_rate": 5.7585944737497165e-06, "loss": 0.6518, "step": 3653 }, { "epoch": 0.47, "grad_norm": 1.4652224779129028, "learning_rate": 5.756543401493628e-06, "loss": 0.6398, "step": 3654 }, { "epoch": 0.47, "grad_norm": 1.3363951444625854, "learning_rate": 5.754492198938318e-06, "loss": 0.6407, "step": 3655 }, { "epoch": 0.47, "grad_norm": 1.152831792831421, "learning_rate": 5.752440866437062e-06, "loss": 0.537, "step": 3656 }, { "epoch": 0.47, "grad_norm": 1.4328360557556152, "learning_rate": 5.750389404343159e-06, "loss": 0.6318, "step": 3657 }, { "epoch": 0.47, "grad_norm": 1.8562523126602173, "learning_rate": 5.748337813009934e-06, "loss": 0.5985, "step": 3658 }, { "epoch": 0.47, "grad_norm": 0.9919341802597046, "learning_rate": 5.74628609279073e-06, "loss": 0.6107, "step": 3659 }, { "epoch": 0.47, "grad_norm": 1.3245062828063965, "learning_rate": 5.744234244038918e-06, "loss": 0.526, "step": 3660 }, { "epoch": 0.47, "grad_norm": 1.3959466218948364, "learning_rate": 5.742182267107883e-06, "loss": 0.6278, "step": 3661 }, { "epoch": 0.47, "grad_norm": 1.1084336042404175, "learning_rate": 5.740130162351039e-06, "loss": 0.5771, "step": 3662 }, { "epoch": 0.47, "grad_norm": 1.2701683044433594, "learning_rate": 5.738077930121817e-06, "loss": 0.5676, "step": 3663 }, { "epoch": 0.47, "grad_norm": 1.0271109342575073, "learning_rate": 5.736025570773675e-06, "loss": 0.6557, "step": 3664 }, { "epoch": 0.47, "grad_norm": 1.5346267223358154, "learning_rate": 5.7339730846600875e-06, "loss": 0.4982, "step": 3665 }, { "epoch": 0.47, "grad_norm": 1.2214652299880981, "learning_rate": 5.7319204721345536e-06, "loss": 0.6193, "step": 3666 }, { "epoch": 0.47, "grad_norm": 1.1910988092422485, "learning_rate": 5.729867733550597e-06, "loss": 0.5806, "step": 3667 }, { "epoch": 0.47, "grad_norm": 1.1290035247802734, "learning_rate": 5.727814869261758e-06, "loss": 0.611, "step": 3668 }, { "epoch": 0.47, "grad_norm": 1.1266953945159912, "learning_rate": 5.725761879621601e-06, "loss": 0.628, "step": 3669 }, { "epoch": 0.47, "grad_norm": 1.3203330039978027, "learning_rate": 5.723708764983712e-06, "loss": 0.6415, "step": 3670 }, { "epoch": 0.47, "grad_norm": 1.3828309774398804, "learning_rate": 5.721655525701699e-06, "loss": 0.5446, "step": 3671 }, { "epoch": 0.47, "grad_norm": 1.2213470935821533, "learning_rate": 5.719602162129189e-06, "loss": 0.6274, "step": 3672 }, { "epoch": 0.47, "grad_norm": 1.2695316076278687, "learning_rate": 5.7175486746198325e-06, "loss": 0.58, "step": 3673 }, { "epoch": 0.47, "grad_norm": 1.0185368061065674, "learning_rate": 5.715495063527301e-06, "loss": 0.6232, "step": 3674 }, { "epoch": 0.47, "grad_norm": 1.2112317085266113, "learning_rate": 5.713441329205289e-06, "loss": 0.6422, "step": 3675 }, { "epoch": 0.47, "grad_norm": 1.6289145946502686, "learning_rate": 5.711387472007509e-06, "loss": 0.6632, "step": 3676 }, { "epoch": 0.47, "grad_norm": 1.5209150314331055, "learning_rate": 5.709333492287698e-06, "loss": 0.6227, "step": 3677 }, { "epoch": 0.47, "grad_norm": 1.4312928915023804, "learning_rate": 5.7072793903996085e-06, "loss": 0.66, "step": 3678 }, { "epoch": 0.47, "grad_norm": 1.2010070085525513, "learning_rate": 5.705225166697022e-06, "loss": 0.6936, "step": 3679 }, { "epoch": 0.47, "grad_norm": 1.0900729894638062, "learning_rate": 5.703170821533733e-06, "loss": 0.6763, "step": 3680 }, { "epoch": 0.47, "grad_norm": 1.209028720855713, "learning_rate": 5.701116355263563e-06, "loss": 0.5245, "step": 3681 }, { "epoch": 0.47, "grad_norm": 1.0546902418136597, "learning_rate": 5.69906176824035e-06, "loss": 0.5515, "step": 3682 }, { "epoch": 0.47, "grad_norm": 1.3346909284591675, "learning_rate": 5.697007060817958e-06, "loss": 0.57, "step": 3683 }, { "epoch": 0.47, "grad_norm": 1.3135395050048828, "learning_rate": 5.694952233350268e-06, "loss": 0.5915, "step": 3684 }, { "epoch": 0.47, "grad_norm": 1.2499960660934448, "learning_rate": 5.692897286191179e-06, "loss": 0.5783, "step": 3685 }, { "epoch": 0.47, "grad_norm": 1.2707788944244385, "learning_rate": 5.690842219694619e-06, "loss": 0.6796, "step": 3686 }, { "epoch": 0.47, "grad_norm": 1.1295020580291748, "learning_rate": 5.688787034214525e-06, "loss": 0.6514, "step": 3687 }, { "epoch": 0.47, "grad_norm": 1.445489764213562, "learning_rate": 5.6867317301048676e-06, "loss": 0.7057, "step": 3688 }, { "epoch": 0.47, "grad_norm": 1.3477917909622192, "learning_rate": 5.684676307719626e-06, "loss": 0.5777, "step": 3689 }, { "epoch": 0.47, "grad_norm": 1.2112998962402344, "learning_rate": 5.682620767412808e-06, "loss": 0.6103, "step": 3690 }, { "epoch": 0.47, "grad_norm": 1.5050208568572998, "learning_rate": 5.6805651095384375e-06, "loss": 0.6307, "step": 3691 }, { "epoch": 0.47, "grad_norm": 1.221442461013794, "learning_rate": 5.678509334450559e-06, "loss": 0.5603, "step": 3692 }, { "epoch": 0.47, "grad_norm": 1.4392536878585815, "learning_rate": 5.67645344250324e-06, "loss": 0.6022, "step": 3693 }, { "epoch": 0.47, "grad_norm": 1.1971774101257324, "learning_rate": 5.6743974340505645e-06, "loss": 0.5572, "step": 3694 }, { "epoch": 0.47, "grad_norm": 1.7652573585510254, "learning_rate": 5.672341309446639e-06, "loss": 0.6265, "step": 3695 }, { "epoch": 0.47, "grad_norm": 1.4171807765960693, "learning_rate": 5.670285069045588e-06, "loss": 0.6642, "step": 3696 }, { "epoch": 0.47, "grad_norm": 1.8250502347946167, "learning_rate": 5.668228713201559e-06, "loss": 0.6942, "step": 3697 }, { "epoch": 0.47, "grad_norm": 1.0191811323165894, "learning_rate": 5.666172242268715e-06, "loss": 0.5493, "step": 3698 }, { "epoch": 0.47, "grad_norm": 1.2496845722198486, "learning_rate": 5.664115656601243e-06, "loss": 0.6399, "step": 3699 }, { "epoch": 0.47, "grad_norm": 1.2776678800582886, "learning_rate": 5.662058956553348e-06, "loss": 0.6093, "step": 3700 }, { "epoch": 0.47, "grad_norm": 1.5822676420211792, "learning_rate": 5.660002142479255e-06, "loss": 0.5506, "step": 3701 }, { "epoch": 0.47, "grad_norm": 1.53562593460083, "learning_rate": 5.657945214733208e-06, "loss": 0.6304, "step": 3702 }, { "epoch": 0.47, "grad_norm": 1.2964261770248413, "learning_rate": 5.655888173669472e-06, "loss": 0.6556, "step": 3703 }, { "epoch": 0.47, "grad_norm": 1.3869433403015137, "learning_rate": 5.653831019642327e-06, "loss": 0.7004, "step": 3704 }, { "epoch": 0.47, "grad_norm": 1.1649458408355713, "learning_rate": 5.6517737530060815e-06, "loss": 0.5943, "step": 3705 }, { "epoch": 0.47, "grad_norm": 1.3618543148040771, "learning_rate": 5.649716374115053e-06, "loss": 0.6762, "step": 3706 }, { "epoch": 0.47, "grad_norm": 1.0227161645889282, "learning_rate": 5.6476588833235846e-06, "loss": 0.5979, "step": 3707 }, { "epoch": 0.48, "grad_norm": 1.4849395751953125, "learning_rate": 5.645601280986038e-06, "loss": 0.5881, "step": 3708 }, { "epoch": 0.48, "grad_norm": 1.639639139175415, "learning_rate": 5.643543567456793e-06, "loss": 0.5817, "step": 3709 }, { "epoch": 0.48, "grad_norm": 1.9866191148757935, "learning_rate": 5.64148574309025e-06, "loss": 0.6601, "step": 3710 }, { "epoch": 0.48, "grad_norm": 1.609009027481079, "learning_rate": 5.639427808240825e-06, "loss": 0.5858, "step": 3711 }, { "epoch": 0.48, "grad_norm": 1.3619134426116943, "learning_rate": 5.637369763262959e-06, "loss": 0.7077, "step": 3712 }, { "epoch": 0.48, "grad_norm": 1.349634051322937, "learning_rate": 5.635311608511105e-06, "loss": 0.6249, "step": 3713 }, { "epoch": 0.48, "grad_norm": 1.1673775911331177, "learning_rate": 5.63325334433974e-06, "loss": 0.6222, "step": 3714 }, { "epoch": 0.48, "grad_norm": 1.4045964479446411, "learning_rate": 5.631194971103357e-06, "loss": 0.5758, "step": 3715 }, { "epoch": 0.48, "grad_norm": 1.5967957973480225, "learning_rate": 5.6291364891564704e-06, "loss": 0.6434, "step": 3716 }, { "epoch": 0.48, "grad_norm": 1.5817066431045532, "learning_rate": 5.62707789885361e-06, "loss": 0.6438, "step": 3717 }, { "epoch": 0.48, "grad_norm": 1.5195118188858032, "learning_rate": 5.6250192005493285e-06, "loss": 0.6614, "step": 3718 }, { "epoch": 0.48, "grad_norm": 1.3318872451782227, "learning_rate": 5.622960394598194e-06, "loss": 0.598, "step": 3719 }, { "epoch": 0.48, "grad_norm": 1.229845643043518, "learning_rate": 5.620901481354792e-06, "loss": 0.5759, "step": 3720 }, { "epoch": 0.48, "grad_norm": 1.2453598976135254, "learning_rate": 5.6188424611737325e-06, "loss": 0.671, "step": 3721 }, { "epoch": 0.48, "grad_norm": 1.5366243124008179, "learning_rate": 5.616783334409637e-06, "loss": 0.7277, "step": 3722 }, { "epoch": 0.48, "grad_norm": 1.2466989755630493, "learning_rate": 5.614724101417148e-06, "loss": 0.6157, "step": 3723 }, { "epoch": 0.48, "grad_norm": 1.4630953073501587, "learning_rate": 5.612664762550927e-06, "loss": 0.6182, "step": 3724 }, { "epoch": 0.48, "grad_norm": 1.7095898389816284, "learning_rate": 5.6106053181656535e-06, "loss": 0.6675, "step": 3725 }, { "epoch": 0.48, "grad_norm": 1.4824260473251343, "learning_rate": 5.608545768616025e-06, "loss": 0.5571, "step": 3726 }, { "epoch": 0.48, "grad_norm": 1.4626764059066772, "learning_rate": 5.6064861142567575e-06, "loss": 0.6716, "step": 3727 }, { "epoch": 0.48, "grad_norm": 1.6166949272155762, "learning_rate": 5.604426355442584e-06, "loss": 0.6997, "step": 3728 }, { "epoch": 0.48, "grad_norm": 1.882332444190979, "learning_rate": 5.602366492528256e-06, "loss": 0.6062, "step": 3729 }, { "epoch": 0.48, "grad_norm": 1.456566333770752, "learning_rate": 5.6003065258685444e-06, "loss": 0.5865, "step": 3730 }, { "epoch": 0.48, "grad_norm": 1.3899471759796143, "learning_rate": 5.5982464558182335e-06, "loss": 0.5982, "step": 3731 }, { "epoch": 0.48, "grad_norm": 1.3733237981796265, "learning_rate": 5.596186282732132e-06, "loss": 0.6052, "step": 3732 }, { "epoch": 0.48, "grad_norm": 1.0841834545135498, "learning_rate": 5.59412600696506e-06, "loss": 0.5684, "step": 3733 }, { "epoch": 0.48, "grad_norm": 1.2034820318222046, "learning_rate": 5.59206562887186e-06, "loss": 0.5606, "step": 3734 }, { "epoch": 0.48, "grad_norm": 1.2253649234771729, "learning_rate": 5.5900051488073896e-06, "loss": 0.5615, "step": 3735 }, { "epoch": 0.48, "grad_norm": 2.54776930809021, "learning_rate": 5.587944567126525e-06, "loss": 0.6376, "step": 3736 }, { "epoch": 0.48, "grad_norm": 1.1381703615188599, "learning_rate": 5.585883884184158e-06, "loss": 0.594, "step": 3737 }, { "epoch": 0.48, "grad_norm": 1.3217531442642212, "learning_rate": 5.583823100335202e-06, "loss": 0.7467, "step": 3738 }, { "epoch": 0.48, "grad_norm": 1.229705810546875, "learning_rate": 5.581762215934582e-06, "loss": 0.4965, "step": 3739 }, { "epoch": 0.48, "grad_norm": 1.090378761291504, "learning_rate": 5.579701231337247e-06, "loss": 0.6012, "step": 3740 }, { "epoch": 0.48, "grad_norm": 1.2407397031784058, "learning_rate": 5.577640146898156e-06, "loss": 0.6052, "step": 3741 }, { "epoch": 0.48, "grad_norm": 2.4652295112609863, "learning_rate": 5.57557896297229e-06, "loss": 0.6058, "step": 3742 }, { "epoch": 0.48, "grad_norm": 1.1912661790847778, "learning_rate": 5.5735176799146486e-06, "loss": 0.5969, "step": 3743 }, { "epoch": 0.48, "grad_norm": 1.246437668800354, "learning_rate": 5.571456298080243e-06, "loss": 0.6003, "step": 3744 }, { "epoch": 0.48, "grad_norm": 1.1998566389083862, "learning_rate": 5.569394817824106e-06, "loss": 0.612, "step": 3745 }, { "epoch": 0.48, "grad_norm": 1.8297500610351562, "learning_rate": 5.567333239501284e-06, "loss": 0.5909, "step": 3746 }, { "epoch": 0.48, "grad_norm": 1.1892491579055786, "learning_rate": 5.565271563466845e-06, "loss": 0.5661, "step": 3747 }, { "epoch": 0.48, "grad_norm": 1.076907753944397, "learning_rate": 5.5632097900758676e-06, "loss": 0.5741, "step": 3748 }, { "epoch": 0.48, "grad_norm": 1.0066630840301514, "learning_rate": 5.561147919683451e-06, "loss": 0.5962, "step": 3749 }, { "epoch": 0.48, "grad_norm": 2.04477858543396, "learning_rate": 5.559085952644711e-06, "loss": 0.5698, "step": 3750 }, { "epoch": 0.48, "grad_norm": 1.407403588294983, "learning_rate": 5.5570238893147795e-06, "loss": 0.6276, "step": 3751 }, { "epoch": 0.48, "grad_norm": 1.328902244567871, "learning_rate": 5.554961730048806e-06, "loss": 0.579, "step": 3752 }, { "epoch": 0.48, "grad_norm": 1.0437157154083252, "learning_rate": 5.5528994752019535e-06, "loss": 0.6284, "step": 3753 }, { "epoch": 0.48, "grad_norm": 1.2737390995025635, "learning_rate": 5.550837125129406e-06, "loss": 0.6508, "step": 3754 }, { "epoch": 0.48, "grad_norm": 1.1763293743133545, "learning_rate": 5.548774680186359e-06, "loss": 0.5689, "step": 3755 }, { "epoch": 0.48, "grad_norm": 1.2019954919815063, "learning_rate": 5.5467121407280275e-06, "loss": 0.7561, "step": 3756 }, { "epoch": 0.48, "grad_norm": 1.4074658155441284, "learning_rate": 5.544649507109642e-06, "loss": 0.6113, "step": 3757 }, { "epoch": 0.48, "grad_norm": 1.317198395729065, "learning_rate": 5.5425867796864496e-06, "loss": 0.6282, "step": 3758 }, { "epoch": 0.48, "grad_norm": 1.7032959461212158, "learning_rate": 5.54052395881371e-06, "loss": 0.6331, "step": 3759 }, { "epoch": 0.48, "grad_norm": 1.3925243616104126, "learning_rate": 5.5384610448467095e-06, "loss": 0.563, "step": 3760 }, { "epoch": 0.48, "grad_norm": 2.3951807022094727, "learning_rate": 5.536398038140735e-06, "loss": 0.596, "step": 3761 }, { "epoch": 0.48, "grad_norm": 1.2922788858413696, "learning_rate": 5.534334939051104e-06, "loss": 0.5835, "step": 3762 }, { "epoch": 0.48, "grad_norm": 1.4560922384262085, "learning_rate": 5.53227174793314e-06, "loss": 0.5856, "step": 3763 }, { "epoch": 0.48, "grad_norm": 1.7121245861053467, "learning_rate": 5.530208465142186e-06, "loss": 0.5596, "step": 3764 }, { "epoch": 0.48, "grad_norm": 1.4115746021270752, "learning_rate": 5.5281450910336e-06, "loss": 0.6768, "step": 3765 }, { "epoch": 0.48, "grad_norm": 2.208176612854004, "learning_rate": 5.526081625962758e-06, "loss": 0.5841, "step": 3766 }, { "epoch": 0.48, "grad_norm": 1.182554841041565, "learning_rate": 5.524018070285047e-06, "loss": 0.6007, "step": 3767 }, { "epoch": 0.48, "grad_norm": 1.3877570629119873, "learning_rate": 5.521954424355876e-06, "loss": 0.6617, "step": 3768 }, { "epoch": 0.48, "grad_norm": 1.7248724699020386, "learning_rate": 5.519890688530666e-06, "loss": 0.6391, "step": 3769 }, { "epoch": 0.48, "grad_norm": 1.0161206722259521, "learning_rate": 5.517826863164849e-06, "loss": 0.4795, "step": 3770 }, { "epoch": 0.48, "grad_norm": 1.1962370872497559, "learning_rate": 5.515762948613882e-06, "loss": 0.6225, "step": 3771 }, { "epoch": 0.48, "grad_norm": 1.3718147277832031, "learning_rate": 5.51369894523323e-06, "loss": 0.6185, "step": 3772 }, { "epoch": 0.48, "grad_norm": 1.4110352993011475, "learning_rate": 5.5116348533783755e-06, "loss": 0.6968, "step": 3773 }, { "epoch": 0.48, "grad_norm": 1.2778741121292114, "learning_rate": 5.509570673404819e-06, "loss": 0.5913, "step": 3774 }, { "epoch": 0.48, "grad_norm": 0.9584821462631226, "learning_rate": 5.50750640566807e-06, "loss": 0.5655, "step": 3775 }, { "epoch": 0.48, "grad_norm": 1.1385620832443237, "learning_rate": 5.505442050523655e-06, "loss": 0.703, "step": 3776 }, { "epoch": 0.48, "grad_norm": 1.690279483795166, "learning_rate": 5.503377608327124e-06, "loss": 0.5459, "step": 3777 }, { "epoch": 0.48, "grad_norm": 1.3032708168029785, "learning_rate": 5.50131307943403e-06, "loss": 0.6358, "step": 3778 }, { "epoch": 0.48, "grad_norm": 1.129211187362671, "learning_rate": 5.499248464199949e-06, "loss": 0.5988, "step": 3779 }, { "epoch": 0.48, "grad_norm": 1.3283401727676392, "learning_rate": 5.497183762980467e-06, "loss": 0.6212, "step": 3780 }, { "epoch": 0.48, "grad_norm": 1.4811829328536987, "learning_rate": 5.495118976131187e-06, "loss": 0.5867, "step": 3781 }, { "epoch": 0.48, "grad_norm": 1.861901044845581, "learning_rate": 5.493054104007728e-06, "loss": 0.6727, "step": 3782 }, { "epoch": 0.48, "grad_norm": 1.4480305910110474, "learning_rate": 5.49098914696572e-06, "loss": 0.6174, "step": 3783 }, { "epoch": 0.48, "grad_norm": 1.423263669013977, "learning_rate": 5.488924105360812e-06, "loss": 0.6232, "step": 3784 }, { "epoch": 0.48, "grad_norm": 1.5136548280715942, "learning_rate": 5.486858979548663e-06, "loss": 0.5616, "step": 3785 }, { "epoch": 0.49, "grad_norm": 1.1748825311660767, "learning_rate": 5.484793769884953e-06, "loss": 0.5792, "step": 3786 }, { "epoch": 0.49, "grad_norm": 1.2067316770553589, "learning_rate": 5.482728476725369e-06, "loss": 0.5731, "step": 3787 }, { "epoch": 0.49, "grad_norm": 1.8080260753631592, "learning_rate": 5.480663100425616e-06, "loss": 0.6952, "step": 3788 }, { "epoch": 0.49, "grad_norm": 1.5747623443603516, "learning_rate": 5.478597641341414e-06, "loss": 0.5669, "step": 3789 }, { "epoch": 0.49, "grad_norm": 1.224440574645996, "learning_rate": 5.476532099828498e-06, "loss": 0.5727, "step": 3790 }, { "epoch": 0.49, "grad_norm": 1.2634854316711426, "learning_rate": 5.474466476242611e-06, "loss": 0.6208, "step": 3791 }, { "epoch": 0.49, "grad_norm": 1.3283199071884155, "learning_rate": 5.472400770939519e-06, "loss": 0.6815, "step": 3792 }, { "epoch": 0.49, "grad_norm": 1.3618751764297485, "learning_rate": 5.470334984274995e-06, "loss": 0.58, "step": 3793 }, { "epoch": 0.49, "grad_norm": 1.400259256362915, "learning_rate": 5.468269116604831e-06, "loss": 0.6144, "step": 3794 }, { "epoch": 0.49, "grad_norm": 1.376218557357788, "learning_rate": 5.466203168284829e-06, "loss": 0.6464, "step": 3795 }, { "epoch": 0.49, "grad_norm": 1.2561371326446533, "learning_rate": 5.464137139670806e-06, "loss": 0.5849, "step": 3796 }, { "epoch": 0.49, "grad_norm": 1.5874736309051514, "learning_rate": 5.4620710311185976e-06, "loss": 0.6173, "step": 3797 }, { "epoch": 0.49, "grad_norm": 1.9282337427139282, "learning_rate": 5.460004842984044e-06, "loss": 0.5781, "step": 3798 }, { "epoch": 0.49, "grad_norm": 1.2490649223327637, "learning_rate": 5.457938575623008e-06, "loss": 0.6326, "step": 3799 }, { "epoch": 0.49, "grad_norm": 1.4292243719100952, "learning_rate": 5.455872229391357e-06, "loss": 0.5963, "step": 3800 }, { "epoch": 0.49, "grad_norm": 1.1780860424041748, "learning_rate": 5.453805804644984e-06, "loss": 0.5695, "step": 3801 }, { "epoch": 0.49, "grad_norm": 1.3638889789581299, "learning_rate": 5.451739301739782e-06, "loss": 0.5805, "step": 3802 }, { "epoch": 0.49, "grad_norm": 1.8172688484191895, "learning_rate": 5.449672721031668e-06, "loss": 0.5556, "step": 3803 }, { "epoch": 0.49, "grad_norm": 1.3854845762252808, "learning_rate": 5.447606062876569e-06, "loss": 0.6247, "step": 3804 }, { "epoch": 0.49, "grad_norm": 1.3911125659942627, "learning_rate": 5.445539327630423e-06, "loss": 0.5315, "step": 3805 }, { "epoch": 0.49, "grad_norm": 1.301125407218933, "learning_rate": 5.443472515649184e-06, "loss": 0.6608, "step": 3806 }, { "epoch": 0.49, "grad_norm": 1.013878583908081, "learning_rate": 5.441405627288817e-06, "loss": 0.5742, "step": 3807 }, { "epoch": 0.49, "grad_norm": 2.087625026702881, "learning_rate": 5.439338662905305e-06, "loss": 0.6331, "step": 3808 }, { "epoch": 0.49, "grad_norm": 1.1464579105377197, "learning_rate": 5.437271622854635e-06, "loss": 0.6458, "step": 3809 }, { "epoch": 0.49, "grad_norm": 1.453547477722168, "learning_rate": 5.435204507492819e-06, "loss": 0.6398, "step": 3810 }, { "epoch": 0.49, "grad_norm": 1.6091513633728027, "learning_rate": 5.433137317175868e-06, "loss": 0.5774, "step": 3811 }, { "epoch": 0.49, "grad_norm": 1.2415236234664917, "learning_rate": 5.431070052259821e-06, "loss": 0.7783, "step": 3812 }, { "epoch": 0.49, "grad_norm": 1.310966968536377, "learning_rate": 5.42900271310072e-06, "loss": 0.7047, "step": 3813 }, { "epoch": 0.49, "grad_norm": 1.3866527080535889, "learning_rate": 5.426935300054621e-06, "loss": 0.6902, "step": 3814 }, { "epoch": 0.49, "grad_norm": 1.3021939992904663, "learning_rate": 5.424867813477593e-06, "loss": 0.5693, "step": 3815 }, { "epoch": 0.49, "grad_norm": 1.2108157873153687, "learning_rate": 5.422800253725722e-06, "loss": 0.5726, "step": 3816 }, { "epoch": 0.49, "grad_norm": 1.936347246170044, "learning_rate": 5.4207326211550995e-06, "loss": 0.6049, "step": 3817 }, { "epoch": 0.49, "grad_norm": 1.1145234107971191, "learning_rate": 5.418664916121835e-06, "loss": 0.5592, "step": 3818 }, { "epoch": 0.49, "grad_norm": 5.271641254425049, "learning_rate": 5.416597138982048e-06, "loss": 0.5187, "step": 3819 }, { "epoch": 0.49, "grad_norm": 1.0878288745880127, "learning_rate": 5.414529290091872e-06, "loss": 0.6153, "step": 3820 }, { "epoch": 0.49, "grad_norm": 1.5007294416427612, "learning_rate": 5.412461369807451e-06, "loss": 0.621, "step": 3821 }, { "epoch": 0.49, "grad_norm": 1.5491704940795898, "learning_rate": 5.4103933784849435e-06, "loss": 0.723, "step": 3822 }, { "epoch": 0.49, "grad_norm": 1.1629546880722046, "learning_rate": 5.408325316480518e-06, "loss": 0.5476, "step": 3823 }, { "epoch": 0.49, "grad_norm": 1.2649697065353394, "learning_rate": 5.406257184150355e-06, "loss": 0.6794, "step": 3824 }, { "epoch": 0.49, "grad_norm": 1.6518555879592896, "learning_rate": 5.404188981850651e-06, "loss": 0.6568, "step": 3825 }, { "epoch": 0.49, "grad_norm": 1.191433310508728, "learning_rate": 5.4021207099376095e-06, "loss": 0.6148, "step": 3826 }, { "epoch": 0.49, "grad_norm": 1.2688812017440796, "learning_rate": 5.40005236876745e-06, "loss": 0.6096, "step": 3827 }, { "epoch": 0.49, "grad_norm": 1.7701101303100586, "learning_rate": 5.3979839586964e-06, "loss": 0.5587, "step": 3828 }, { "epoch": 0.49, "grad_norm": 1.3165203332901, "learning_rate": 5.3959154800807025e-06, "loss": 0.5454, "step": 3829 }, { "epoch": 0.49, "grad_norm": 1.3157920837402344, "learning_rate": 5.393846933276612e-06, "loss": 0.6478, "step": 3830 }, { "epoch": 0.49, "grad_norm": 1.2996734380722046, "learning_rate": 5.391778318640392e-06, "loss": 0.5351, "step": 3831 }, { "epoch": 0.49, "grad_norm": 1.2892003059387207, "learning_rate": 5.38970963652832e-06, "loss": 0.5722, "step": 3832 }, { "epoch": 0.49, "grad_norm": 1.160461187362671, "learning_rate": 5.387640887296683e-06, "loss": 0.6198, "step": 3833 }, { "epoch": 0.49, "grad_norm": 1.233054757118225, "learning_rate": 5.385572071301785e-06, "loss": 0.5894, "step": 3834 }, { "epoch": 0.49, "grad_norm": 1.3636419773101807, "learning_rate": 5.383503188899932e-06, "loss": 0.6296, "step": 3835 }, { "epoch": 0.49, "grad_norm": 1.6694684028625488, "learning_rate": 5.381434240447451e-06, "loss": 0.6466, "step": 3836 }, { "epoch": 0.49, "grad_norm": 1.753450632095337, "learning_rate": 5.379365226300673e-06, "loss": 0.6464, "step": 3837 }, { "epoch": 0.49, "grad_norm": 1.236421823501587, "learning_rate": 5.377296146815949e-06, "loss": 0.5271, "step": 3838 }, { "epoch": 0.49, "grad_norm": 1.3160569667816162, "learning_rate": 5.37522700234963e-06, "loss": 0.7266, "step": 3839 }, { "epoch": 0.49, "grad_norm": 1.5594069957733154, "learning_rate": 5.373157793258088e-06, "loss": 0.6761, "step": 3840 }, { "epoch": 0.49, "grad_norm": 1.5676730871200562, "learning_rate": 5.3710885198977004e-06, "loss": 0.5312, "step": 3841 }, { "epoch": 0.49, "grad_norm": 1.762484073638916, "learning_rate": 5.369019182624858e-06, "loss": 0.6218, "step": 3842 }, { "epoch": 0.49, "grad_norm": 1.2499971389770508, "learning_rate": 5.366949781795961e-06, "loss": 0.6244, "step": 3843 }, { "epoch": 0.49, "grad_norm": 1.2467316389083862, "learning_rate": 5.364880317767424e-06, "loss": 0.6309, "step": 3844 }, { "epoch": 0.49, "grad_norm": 1.1873960494995117, "learning_rate": 5.362810790895668e-06, "loss": 0.5959, "step": 3845 }, { "epoch": 0.49, "grad_norm": 1.2639389038085938, "learning_rate": 5.360741201537127e-06, "loss": 0.6166, "step": 3846 }, { "epoch": 0.49, "grad_norm": 1.4507334232330322, "learning_rate": 5.358671550048249e-06, "loss": 0.5928, "step": 3847 }, { "epoch": 0.49, "grad_norm": 1.1256029605865479, "learning_rate": 5.356601836785484e-06, "loss": 0.5958, "step": 3848 }, { "epoch": 0.49, "grad_norm": 1.2243516445159912, "learning_rate": 5.354532062105303e-06, "loss": 0.5836, "step": 3849 }, { "epoch": 0.49, "grad_norm": 1.5672364234924316, "learning_rate": 5.352462226364179e-06, "loss": 0.6359, "step": 3850 }, { "epoch": 0.49, "grad_norm": 1.424888014793396, "learning_rate": 5.350392329918601e-06, "loss": 0.6901, "step": 3851 }, { "epoch": 0.49, "grad_norm": 3.7046003341674805, "learning_rate": 5.348322373125065e-06, "loss": 0.6442, "step": 3852 }, { "epoch": 0.49, "grad_norm": 1.4008851051330566, "learning_rate": 5.346252356340082e-06, "loss": 0.5696, "step": 3853 }, { "epoch": 0.49, "grad_norm": 1.6207784414291382, "learning_rate": 5.344182279920167e-06, "loss": 0.5765, "step": 3854 }, { "epoch": 0.49, "grad_norm": 1.579346776008606, "learning_rate": 5.342112144221851e-06, "loss": 0.6231, "step": 3855 }, { "epoch": 0.49, "grad_norm": 1.2518047094345093, "learning_rate": 5.340041949601672e-06, "loss": 0.5404, "step": 3856 }, { "epoch": 0.49, "grad_norm": 1.0116968154907227, "learning_rate": 5.337971696416178e-06, "loss": 0.7541, "step": 3857 }, { "epoch": 0.49, "grad_norm": 1.3027863502502441, "learning_rate": 5.33590138502193e-06, "loss": 0.626, "step": 3858 }, { "epoch": 0.49, "grad_norm": 2.156329870223999, "learning_rate": 5.3338310157754934e-06, "loss": 0.6029, "step": 3859 }, { "epoch": 0.49, "grad_norm": 1.7596616744995117, "learning_rate": 5.331760589033452e-06, "loss": 0.6789, "step": 3860 }, { "epoch": 0.49, "grad_norm": 1.0322740077972412, "learning_rate": 5.329690105152392e-06, "loss": 0.637, "step": 3861 }, { "epoch": 0.49, "grad_norm": 1.2053539752960205, "learning_rate": 5.3276195644889115e-06, "loss": 0.5274, "step": 3862 }, { "epoch": 0.49, "grad_norm": 1.20881986618042, "learning_rate": 5.325548967399621e-06, "loss": 0.636, "step": 3863 }, { "epoch": 0.5, "grad_norm": 1.379366159439087, "learning_rate": 5.323478314241138e-06, "loss": 0.598, "step": 3864 }, { "epoch": 0.5, "grad_norm": 1.3362188339233398, "learning_rate": 5.321407605370091e-06, "loss": 0.7068, "step": 3865 }, { "epoch": 0.5, "grad_norm": 1.4871771335601807, "learning_rate": 5.319336841143116e-06, "loss": 0.6618, "step": 3866 }, { "epoch": 0.5, "grad_norm": 1.288758397102356, "learning_rate": 5.31726602191686e-06, "loss": 0.7501, "step": 3867 }, { "epoch": 0.5, "grad_norm": 1.3813121318817139, "learning_rate": 5.315195148047981e-06, "loss": 0.6301, "step": 3868 }, { "epoch": 0.5, "grad_norm": 1.2369877099990845, "learning_rate": 5.313124219893145e-06, "loss": 0.5641, "step": 3869 }, { "epoch": 0.5, "grad_norm": 1.0300804376602173, "learning_rate": 5.311053237809026e-06, "loss": 0.6737, "step": 3870 }, { "epoch": 0.5, "grad_norm": 1.3140389919281006, "learning_rate": 5.30898220215231e-06, "loss": 0.5421, "step": 3871 }, { "epoch": 0.5, "grad_norm": 1.4346362352371216, "learning_rate": 5.306911113279689e-06, "loss": 0.6743, "step": 3872 }, { "epoch": 0.5, "grad_norm": 1.0767395496368408, "learning_rate": 5.3048399715478675e-06, "loss": 0.6462, "step": 3873 }, { "epoch": 0.5, "grad_norm": 1.3019919395446777, "learning_rate": 5.302768777313556e-06, "loss": 0.7012, "step": 3874 }, { "epoch": 0.5, "grad_norm": 1.2964775562286377, "learning_rate": 5.300697530933479e-06, "loss": 0.4923, "step": 3875 }, { "epoch": 0.5, "grad_norm": 1.0694615840911865, "learning_rate": 5.2986262327643615e-06, "loss": 0.5454, "step": 3876 }, { "epoch": 0.5, "grad_norm": 1.3932045698165894, "learning_rate": 5.296554883162947e-06, "loss": 0.5652, "step": 3877 }, { "epoch": 0.5, "grad_norm": 1.147936463356018, "learning_rate": 5.29448348248598e-06, "loss": 0.6513, "step": 3878 }, { "epoch": 0.5, "grad_norm": 1.2141468524932861, "learning_rate": 5.29241203109022e-06, "loss": 0.5956, "step": 3879 }, { "epoch": 0.5, "grad_norm": 1.1196589469909668, "learning_rate": 5.2903405293324316e-06, "loss": 0.5876, "step": 3880 }, { "epoch": 0.5, "grad_norm": 1.196601152420044, "learning_rate": 5.288268977569386e-06, "loss": 0.6035, "step": 3881 }, { "epoch": 0.5, "grad_norm": 1.4775373935699463, "learning_rate": 5.286197376157871e-06, "loss": 0.6348, "step": 3882 }, { "epoch": 0.5, "grad_norm": 1.283056378364563, "learning_rate": 5.284125725454674e-06, "loss": 0.64, "step": 3883 }, { "epoch": 0.5, "grad_norm": 1.263865351676941, "learning_rate": 5.282054025816597e-06, "loss": 0.6002, "step": 3884 }, { "epoch": 0.5, "grad_norm": 1.2261013984680176, "learning_rate": 5.279982277600445e-06, "loss": 0.5812, "step": 3885 }, { "epoch": 0.5, "grad_norm": 3.103773355484009, "learning_rate": 5.277910481163038e-06, "loss": 0.6105, "step": 3886 }, { "epoch": 0.5, "grad_norm": 1.412146806716919, "learning_rate": 5.275838636861198e-06, "loss": 0.6886, "step": 3887 }, { "epoch": 0.5, "grad_norm": 2.211158037185669, "learning_rate": 5.273766745051761e-06, "loss": 0.5951, "step": 3888 }, { "epoch": 0.5, "grad_norm": 1.4581794738769531, "learning_rate": 5.271694806091564e-06, "loss": 0.64, "step": 3889 }, { "epoch": 0.5, "grad_norm": 1.1060121059417725, "learning_rate": 5.269622820337462e-06, "loss": 0.5375, "step": 3890 }, { "epoch": 0.5, "grad_norm": 3.2724769115448, "learning_rate": 5.267550788146308e-06, "loss": 0.5919, "step": 3891 }, { "epoch": 0.5, "grad_norm": 1.2122722864151, "learning_rate": 5.26547870987497e-06, "loss": 0.6125, "step": 3892 }, { "epoch": 0.5, "grad_norm": 1.1100267171859741, "learning_rate": 5.26340658588032e-06, "loss": 0.5679, "step": 3893 }, { "epoch": 0.5, "grad_norm": 1.2956206798553467, "learning_rate": 5.261334416519239e-06, "loss": 0.6334, "step": 3894 }, { "epoch": 0.5, "grad_norm": 1.3694170713424683, "learning_rate": 5.259262202148616e-06, "loss": 0.6584, "step": 3895 }, { "epoch": 0.5, "grad_norm": 1.3362345695495605, "learning_rate": 5.257189943125349e-06, "loss": 0.5786, "step": 3896 }, { "epoch": 0.5, "grad_norm": 1.2397748231887817, "learning_rate": 5.255117639806343e-06, "loss": 0.5881, "step": 3897 }, { "epoch": 0.5, "grad_norm": 1.4001562595367432, "learning_rate": 5.253045292548508e-06, "loss": 0.646, "step": 3898 }, { "epoch": 0.5, "grad_norm": 1.3735246658325195, "learning_rate": 5.250972901708765e-06, "loss": 0.8063, "step": 3899 }, { "epoch": 0.5, "grad_norm": 1.1073076725006104, "learning_rate": 5.248900467644041e-06, "loss": 0.6368, "step": 3900 }, { "epoch": 0.5, "grad_norm": 1.3196773529052734, "learning_rate": 5.246827990711269e-06, "loss": 0.5578, "step": 3901 }, { "epoch": 0.5, "grad_norm": 1.3190702199935913, "learning_rate": 5.244755471267394e-06, "loss": 0.5742, "step": 3902 }, { "epoch": 0.5, "grad_norm": 1.3288342952728271, "learning_rate": 5.242682909669364e-06, "loss": 0.5448, "step": 3903 }, { "epoch": 0.5, "grad_norm": 1.3225739002227783, "learning_rate": 5.240610306274134e-06, "loss": 0.6489, "step": 3904 }, { "epoch": 0.5, "grad_norm": 1.309205174446106, "learning_rate": 5.238537661438671e-06, "loss": 0.6241, "step": 3905 }, { "epoch": 0.5, "grad_norm": 1.2318326234817505, "learning_rate": 5.236464975519944e-06, "loss": 0.5419, "step": 3906 }, { "epoch": 0.5, "grad_norm": 1.4670156240463257, "learning_rate": 5.234392248874931e-06, "loss": 0.6127, "step": 3907 }, { "epoch": 0.5, "grad_norm": 2.364734411239624, "learning_rate": 5.232319481860618e-06, "loss": 0.6175, "step": 3908 }, { "epoch": 0.5, "grad_norm": 1.1357091665267944, "learning_rate": 5.2302466748339954e-06, "loss": 0.6176, "step": 3909 }, { "epoch": 0.5, "grad_norm": 1.4520831108093262, "learning_rate": 5.228173828152063e-06, "loss": 0.5557, "step": 3910 }, { "epoch": 0.5, "grad_norm": 1.7567130327224731, "learning_rate": 5.226100942171827e-06, "loss": 0.5868, "step": 3911 }, { "epoch": 0.5, "grad_norm": 1.0778729915618896, "learning_rate": 5.2240280172503e-06, "loss": 0.6589, "step": 3912 }, { "epoch": 0.5, "grad_norm": 1.256453037261963, "learning_rate": 5.221955053744498e-06, "loss": 0.6077, "step": 3913 }, { "epoch": 0.5, "grad_norm": 4.119685649871826, "learning_rate": 5.219882052011451e-06, "loss": 0.6429, "step": 3914 }, { "epoch": 0.5, "grad_norm": 1.4709926843643188, "learning_rate": 5.217809012408191e-06, "loss": 0.621, "step": 3915 }, { "epoch": 0.5, "grad_norm": 1.3248754739761353, "learning_rate": 5.215735935291755e-06, "loss": 0.656, "step": 3916 }, { "epoch": 0.5, "grad_norm": 1.7260463237762451, "learning_rate": 5.213662821019187e-06, "loss": 0.6932, "step": 3917 }, { "epoch": 0.5, "grad_norm": 1.4484518766403198, "learning_rate": 5.211589669947544e-06, "loss": 0.5989, "step": 3918 }, { "epoch": 0.5, "grad_norm": 1.3106611967086792, "learning_rate": 5.209516482433879e-06, "loss": 0.682, "step": 3919 }, { "epoch": 0.5, "grad_norm": 1.3350433111190796, "learning_rate": 5.207443258835261e-06, "loss": 0.6008, "step": 3920 }, { "epoch": 0.5, "grad_norm": 1.8616951704025269, "learning_rate": 5.205369999508756e-06, "loss": 0.5976, "step": 3921 }, { "epoch": 0.5, "grad_norm": 1.2377861738204956, "learning_rate": 5.203296704811443e-06, "loss": 0.5425, "step": 3922 }, { "epoch": 0.5, "grad_norm": 1.428403615951538, "learning_rate": 5.201223375100406e-06, "loss": 0.6628, "step": 3923 }, { "epoch": 0.5, "grad_norm": 1.4202046394348145, "learning_rate": 5.199150010732731e-06, "loss": 0.567, "step": 3924 }, { "epoch": 0.5, "grad_norm": 1.4062016010284424, "learning_rate": 5.1970766120655155e-06, "loss": 0.6508, "step": 3925 }, { "epoch": 0.5, "grad_norm": 1.2854269742965698, "learning_rate": 5.195003179455859e-06, "loss": 0.6857, "step": 3926 }, { "epoch": 0.5, "grad_norm": 1.979161024093628, "learning_rate": 5.192929713260869e-06, "loss": 0.6199, "step": 3927 }, { "epoch": 0.5, "grad_norm": 1.3159161806106567, "learning_rate": 5.190856213837656e-06, "loss": 0.6078, "step": 3928 }, { "epoch": 0.5, "grad_norm": 1.0987286567687988, "learning_rate": 5.1887826815433415e-06, "loss": 0.5123, "step": 3929 }, { "epoch": 0.5, "grad_norm": 1.5091686248779297, "learning_rate": 5.186709116735046e-06, "loss": 0.596, "step": 3930 }, { "epoch": 0.5, "grad_norm": 1.3786001205444336, "learning_rate": 5.1846355197699e-06, "loss": 0.6173, "step": 3931 }, { "epoch": 0.5, "grad_norm": 1.4815407991409302, "learning_rate": 5.182561891005039e-06, "loss": 0.5635, "step": 3932 }, { "epoch": 0.5, "grad_norm": 1.4058189392089844, "learning_rate": 5.180488230797602e-06, "loss": 0.5896, "step": 3933 }, { "epoch": 0.5, "grad_norm": 1.2490657567977905, "learning_rate": 5.178414539504737e-06, "loss": 0.596, "step": 3934 }, { "epoch": 0.5, "grad_norm": 1.224664330482483, "learning_rate": 5.176340817483592e-06, "loss": 0.6107, "step": 3935 }, { "epoch": 0.5, "grad_norm": 1.3340404033660889, "learning_rate": 5.174267065091329e-06, "loss": 0.5858, "step": 3936 }, { "epoch": 0.5, "grad_norm": 1.558334469795227, "learning_rate": 5.172193282685104e-06, "loss": 0.5914, "step": 3937 }, { "epoch": 0.5, "grad_norm": 1.1823575496673584, "learning_rate": 5.170119470622086e-06, "loss": 0.5338, "step": 3938 }, { "epoch": 0.5, "grad_norm": 1.632826328277588, "learning_rate": 5.168045629259446e-06, "loss": 0.7068, "step": 3939 }, { "epoch": 0.5, "grad_norm": 1.282450556755066, "learning_rate": 5.1659717589543635e-06, "loss": 0.6205, "step": 3940 }, { "epoch": 0.5, "grad_norm": 2.351825714111328, "learning_rate": 5.163897860064019e-06, "loss": 0.6188, "step": 3941 }, { "epoch": 0.51, "grad_norm": 1.1424978971481323, "learning_rate": 5.1618239329456e-06, "loss": 0.5913, "step": 3942 }, { "epoch": 0.51, "grad_norm": 1.2732305526733398, "learning_rate": 5.159749977956295e-06, "loss": 0.572, "step": 3943 }, { "epoch": 0.51, "grad_norm": 1.20172917842865, "learning_rate": 5.157675995453306e-06, "loss": 0.7332, "step": 3944 }, { "epoch": 0.51, "grad_norm": 1.1711151599884033, "learning_rate": 5.155601985793831e-06, "loss": 0.5454, "step": 3945 }, { "epoch": 0.51, "grad_norm": 1.4528971910476685, "learning_rate": 5.153527949335077e-06, "loss": 0.5712, "step": 3946 }, { "epoch": 0.51, "grad_norm": 1.6643458604812622, "learning_rate": 5.151453886434255e-06, "loss": 0.6729, "step": 3947 }, { "epoch": 0.51, "grad_norm": 1.3720951080322266, "learning_rate": 5.149379797448577e-06, "loss": 0.6249, "step": 3948 }, { "epoch": 0.51, "grad_norm": 1.305267572402954, "learning_rate": 5.147305682735266e-06, "loss": 0.5595, "step": 3949 }, { "epoch": 0.51, "grad_norm": 1.398755669593811, "learning_rate": 5.145231542651547e-06, "loss": 0.5929, "step": 3950 }, { "epoch": 0.51, "grad_norm": 1.1850718259811401, "learning_rate": 5.143157377554645e-06, "loss": 0.6145, "step": 3951 }, { "epoch": 0.51, "grad_norm": 1.3608309030532837, "learning_rate": 5.1410831878017945e-06, "loss": 0.6691, "step": 3952 }, { "epoch": 0.51, "grad_norm": 1.1051334142684937, "learning_rate": 5.139008973750234e-06, "loss": 0.6059, "step": 3953 }, { "epoch": 0.51, "grad_norm": 1.0188237428665161, "learning_rate": 5.136934735757202e-06, "loss": 0.6137, "step": 3954 }, { "epoch": 0.51, "grad_norm": 1.2129838466644287, "learning_rate": 5.1348604741799455e-06, "loss": 0.5999, "step": 3955 }, { "epoch": 0.51, "grad_norm": 2.062683343887329, "learning_rate": 5.1327861893757125e-06, "loss": 0.5963, "step": 3956 }, { "epoch": 0.51, "grad_norm": 1.2500804662704468, "learning_rate": 5.1307118817017575e-06, "loss": 0.6215, "step": 3957 }, { "epoch": 0.51, "grad_norm": 1.3206661939620972, "learning_rate": 5.12863755151534e-06, "loss": 0.7026, "step": 3958 }, { "epoch": 0.51, "grad_norm": 2.201270580291748, "learning_rate": 5.1265631991737165e-06, "loss": 0.6585, "step": 3959 }, { "epoch": 0.51, "grad_norm": 1.1174263954162598, "learning_rate": 5.124488825034155e-06, "loss": 0.5546, "step": 3960 }, { "epoch": 0.51, "grad_norm": 1.306179165840149, "learning_rate": 5.122414429453923e-06, "loss": 0.6222, "step": 3961 }, { "epoch": 0.51, "grad_norm": 1.289539098739624, "learning_rate": 5.120340012790296e-06, "loss": 0.6138, "step": 3962 }, { "epoch": 0.51, "grad_norm": 1.3443716764450073, "learning_rate": 5.118265575400546e-06, "loss": 0.5832, "step": 3963 }, { "epoch": 0.51, "grad_norm": 1.2395848035812378, "learning_rate": 5.116191117641955e-06, "loss": 0.6501, "step": 3964 }, { "epoch": 0.51, "grad_norm": 1.1485309600830078, "learning_rate": 5.114116639871804e-06, "loss": 0.579, "step": 3965 }, { "epoch": 0.51, "grad_norm": 1.2162903547286987, "learning_rate": 5.112042142447384e-06, "loss": 0.5577, "step": 3966 }, { "epoch": 0.51, "grad_norm": 1.1788960695266724, "learning_rate": 5.10996762572598e-06, "loss": 0.5424, "step": 3967 }, { "epoch": 0.51, "grad_norm": 1.1972217559814453, "learning_rate": 5.10789309006489e-06, "loss": 0.5713, "step": 3968 }, { "epoch": 0.51, "grad_norm": 1.1885600090026855, "learning_rate": 5.105818535821406e-06, "loss": 0.5696, "step": 3969 }, { "epoch": 0.51, "grad_norm": 1.4530361890792847, "learning_rate": 5.103743963352832e-06, "loss": 0.6195, "step": 3970 }, { "epoch": 0.51, "grad_norm": 1.3970437049865723, "learning_rate": 5.101669373016469e-06, "loss": 0.7098, "step": 3971 }, { "epoch": 0.51, "grad_norm": 1.6319563388824463, "learning_rate": 5.099594765169621e-06, "loss": 0.5933, "step": 3972 }, { "epoch": 0.51, "grad_norm": 1.417358636856079, "learning_rate": 5.097520140169599e-06, "loss": 0.6566, "step": 3973 }, { "epoch": 0.51, "grad_norm": 1.314659833908081, "learning_rate": 5.095445498373717e-06, "loss": 0.6062, "step": 3974 }, { "epoch": 0.51, "grad_norm": 1.4447764158248901, "learning_rate": 5.0933708401392864e-06, "loss": 0.5829, "step": 3975 }, { "epoch": 0.51, "grad_norm": 1.1412814855575562, "learning_rate": 5.091296165823627e-06, "loss": 0.5649, "step": 3976 }, { "epoch": 0.51, "grad_norm": 3.5301032066345215, "learning_rate": 5.08922147578406e-06, "loss": 0.5886, "step": 3977 }, { "epoch": 0.51, "grad_norm": 1.2669841051101685, "learning_rate": 5.0871467703779054e-06, "loss": 0.5541, "step": 3978 }, { "epoch": 0.51, "grad_norm": 1.5283225774765015, "learning_rate": 5.0850720499624915e-06, "loss": 0.5673, "step": 3979 }, { "epoch": 0.51, "grad_norm": 1.3331176042556763, "learning_rate": 5.082997314895146e-06, "loss": 0.6687, "step": 3980 }, { "epoch": 0.51, "grad_norm": 1.5164918899536133, "learning_rate": 5.080922565533201e-06, "loss": 0.6221, "step": 3981 }, { "epoch": 0.51, "grad_norm": 1.4119714498519897, "learning_rate": 5.078847802233987e-06, "loss": 0.5823, "step": 3982 }, { "epoch": 0.51, "grad_norm": 1.263611912727356, "learning_rate": 5.076773025354843e-06, "loss": 0.6335, "step": 3983 }, { "epoch": 0.51, "grad_norm": 1.6149969100952148, "learning_rate": 5.074698235253106e-06, "loss": 0.6478, "step": 3984 }, { "epoch": 0.51, "grad_norm": 1.2421183586120605, "learning_rate": 5.072623432286116e-06, "loss": 0.7456, "step": 3985 }, { "epoch": 0.51, "grad_norm": 1.2369145154953003, "learning_rate": 5.070548616811216e-06, "loss": 0.5444, "step": 3986 }, { "epoch": 0.51, "grad_norm": 1.4720882177352905, "learning_rate": 5.0684737891857505e-06, "loss": 0.5883, "step": 3987 }, { "epoch": 0.51, "grad_norm": 1.7480475902557373, "learning_rate": 5.066398949767068e-06, "loss": 0.5908, "step": 3988 }, { "epoch": 0.51, "grad_norm": 1.277265191078186, "learning_rate": 5.064324098912513e-06, "loss": 0.5804, "step": 3989 }, { "epoch": 0.51, "grad_norm": 1.826635479927063, "learning_rate": 5.062249236979442e-06, "loss": 0.6328, "step": 3990 }, { "epoch": 0.51, "grad_norm": 1.3039029836654663, "learning_rate": 5.060174364325202e-06, "loss": 0.6261, "step": 3991 }, { "epoch": 0.51, "grad_norm": 1.1858707666397095, "learning_rate": 5.058099481307154e-06, "loss": 0.5912, "step": 3992 }, { "epoch": 0.51, "grad_norm": 1.1001001596450806, "learning_rate": 5.05602458828265e-06, "loss": 0.5876, "step": 3993 }, { "epoch": 0.51, "grad_norm": 1.4584338665008545, "learning_rate": 5.053949685609051e-06, "loss": 0.5939, "step": 3994 }, { "epoch": 0.51, "grad_norm": 1.3201522827148438, "learning_rate": 5.051874773643713e-06, "loss": 0.654, "step": 3995 }, { "epoch": 0.51, "grad_norm": 1.044450044631958, "learning_rate": 5.049799852744001e-06, "loss": 0.58, "step": 3996 }, { "epoch": 0.51, "grad_norm": 1.269258975982666, "learning_rate": 5.047724923267277e-06, "loss": 0.5994, "step": 3997 }, { "epoch": 0.51, "grad_norm": 1.2389514446258545, "learning_rate": 5.045649985570904e-06, "loss": 0.5532, "step": 3998 }, { "epoch": 0.51, "grad_norm": 1.3213895559310913, "learning_rate": 5.0435750400122485e-06, "loss": 0.6093, "step": 3999 }, { "epoch": 0.51, "grad_norm": 1.4534727334976196, "learning_rate": 5.041500086948677e-06, "loss": 0.6433, "step": 4000 }, { "epoch": 0.51, "grad_norm": 1.4285792112350464, "learning_rate": 5.039425126737563e-06, "loss": 0.6478, "step": 4001 }, { "epoch": 0.51, "grad_norm": 1.21049165725708, "learning_rate": 5.0373501597362685e-06, "loss": 0.6199, "step": 4002 }, { "epoch": 0.51, "grad_norm": 1.1804089546203613, "learning_rate": 5.03527518630217e-06, "loss": 0.5829, "step": 4003 }, { "epoch": 0.51, "grad_norm": 1.5666120052337646, "learning_rate": 5.033200206792637e-06, "loss": 0.616, "step": 4004 }, { "epoch": 0.51, "grad_norm": 1.9499365091323853, "learning_rate": 5.031125221565044e-06, "loss": 0.6225, "step": 4005 }, { "epoch": 0.51, "grad_norm": 2.4018373489379883, "learning_rate": 5.029050230976763e-06, "loss": 0.6091, "step": 4006 }, { "epoch": 0.51, "grad_norm": 1.8645708560943604, "learning_rate": 5.026975235385172e-06, "loss": 0.6104, "step": 4007 }, { "epoch": 0.51, "grad_norm": 1.2363499402999878, "learning_rate": 5.024900235147643e-06, "loss": 0.6544, "step": 4008 }, { "epoch": 0.51, "grad_norm": 1.315367341041565, "learning_rate": 5.022825230621555e-06, "loss": 0.6242, "step": 4009 }, { "epoch": 0.51, "grad_norm": 1.2281227111816406, "learning_rate": 5.020750222164286e-06, "loss": 0.5227, "step": 4010 }, { "epoch": 0.51, "grad_norm": 1.3124263286590576, "learning_rate": 5.0186752101332124e-06, "loss": 0.6848, "step": 4011 }, { "epoch": 0.51, "grad_norm": 1.246238350868225, "learning_rate": 5.016600194885714e-06, "loss": 0.5938, "step": 4012 }, { "epoch": 0.51, "grad_norm": 1.2616819143295288, "learning_rate": 5.014525176779168e-06, "loss": 0.6353, "step": 4013 }, { "epoch": 0.51, "grad_norm": 1.8460921049118042, "learning_rate": 5.012450156170957e-06, "loss": 0.5932, "step": 4014 }, { "epoch": 0.51, "grad_norm": 1.32743501663208, "learning_rate": 5.0103751334184595e-06, "loss": 0.6467, "step": 4015 }, { "epoch": 0.51, "grad_norm": 1.421920657157898, "learning_rate": 5.008300108879055e-06, "loss": 0.521, "step": 4016 }, { "epoch": 0.51, "grad_norm": 1.3539751768112183, "learning_rate": 5.006225082910126e-06, "loss": 0.5817, "step": 4017 }, { "epoch": 0.51, "grad_norm": 1.1650025844573975, "learning_rate": 5.004150055869053e-06, "loss": 0.6017, "step": 4018 }, { "epoch": 0.51, "grad_norm": 1.121670126914978, "learning_rate": 5.0020750281132165e-06, "loss": 0.546, "step": 4019 }, { "epoch": 0.52, "grad_norm": 1.4239189624786377, "learning_rate": 5e-06, "loss": 0.5849, "step": 4020 }, { "epoch": 0.52, "grad_norm": 1.4270631074905396, "learning_rate": 4.997924971886784e-06, "loss": 0.5985, "step": 4021 }, { "epoch": 0.52, "grad_norm": 1.1389853954315186, "learning_rate": 4.995849944130948e-06, "loss": 0.5686, "step": 4022 }, { "epoch": 0.52, "grad_norm": 1.3634734153747559, "learning_rate": 4.993774917089876e-06, "loss": 0.6108, "step": 4023 }, { "epoch": 0.52, "grad_norm": 1.4653452634811401, "learning_rate": 4.991699891120947e-06, "loss": 0.5871, "step": 4024 }, { "epoch": 0.52, "grad_norm": 1.2894748449325562, "learning_rate": 4.989624866581544e-06, "loss": 0.575, "step": 4025 }, { "epoch": 0.52, "grad_norm": 1.3853498697280884, "learning_rate": 4.987549843829045e-06, "loss": 0.6293, "step": 4026 }, { "epoch": 0.52, "grad_norm": 1.2562291622161865, "learning_rate": 4.985474823220835e-06, "loss": 0.6276, "step": 4027 }, { "epoch": 0.52, "grad_norm": 1.1109954118728638, "learning_rate": 4.983399805114289e-06, "loss": 0.5868, "step": 4028 }, { "epoch": 0.52, "grad_norm": 1.2795517444610596, "learning_rate": 4.981324789866788e-06, "loss": 0.5745, "step": 4029 }, { "epoch": 0.52, "grad_norm": 1.9505984783172607, "learning_rate": 4.979249777835715e-06, "loss": 0.5709, "step": 4030 }, { "epoch": 0.52, "grad_norm": 1.2079765796661377, "learning_rate": 4.977174769378445e-06, "loss": 0.6797, "step": 4031 }, { "epoch": 0.52, "grad_norm": 1.466506004333496, "learning_rate": 4.975099764852359e-06, "loss": 0.6298, "step": 4032 }, { "epoch": 0.52, "grad_norm": 1.308029294013977, "learning_rate": 4.973024764614829e-06, "loss": 0.5299, "step": 4033 }, { "epoch": 0.52, "grad_norm": 4.230959892272949, "learning_rate": 4.970949769023238e-06, "loss": 0.6072, "step": 4034 }, { "epoch": 0.52, "grad_norm": 1.2437947988510132, "learning_rate": 4.968874778434957e-06, "loss": 0.6981, "step": 4035 }, { "epoch": 0.52, "grad_norm": 1.1782584190368652, "learning_rate": 4.966799793207364e-06, "loss": 0.5982, "step": 4036 }, { "epoch": 0.52, "grad_norm": 1.3160803318023682, "learning_rate": 4.964724813697831e-06, "loss": 0.6438, "step": 4037 }, { "epoch": 0.52, "grad_norm": 1.471976637840271, "learning_rate": 4.962649840263733e-06, "loss": 0.5819, "step": 4038 }, { "epoch": 0.52, "grad_norm": 1.1718621253967285, "learning_rate": 4.960574873262439e-06, "loss": 0.545, "step": 4039 }, { "epoch": 0.52, "grad_norm": 1.1031982898712158, "learning_rate": 4.9584999130513235e-06, "loss": 0.551, "step": 4040 }, { "epoch": 0.52, "grad_norm": 1.2355008125305176, "learning_rate": 4.956424959987753e-06, "loss": 0.538, "step": 4041 }, { "epoch": 0.52, "grad_norm": 1.5801515579223633, "learning_rate": 4.954350014429099e-06, "loss": 0.622, "step": 4042 }, { "epoch": 0.52, "grad_norm": 1.3405274152755737, "learning_rate": 4.952275076732726e-06, "loss": 0.6235, "step": 4043 }, { "epoch": 0.52, "grad_norm": 1.0359275341033936, "learning_rate": 4.950200147256002e-06, "loss": 0.743, "step": 4044 }, { "epoch": 0.52, "grad_norm": 1.0925586223602295, "learning_rate": 4.948125226356288e-06, "loss": 0.662, "step": 4045 }, { "epoch": 0.52, "grad_norm": 1.484156847000122, "learning_rate": 4.94605031439095e-06, "loss": 0.6197, "step": 4046 }, { "epoch": 0.52, "grad_norm": 1.1978555917739868, "learning_rate": 4.943975411717351e-06, "loss": 0.6148, "step": 4047 }, { "epoch": 0.52, "grad_norm": 1.3440272808074951, "learning_rate": 4.941900518692846e-06, "loss": 0.5443, "step": 4048 }, { "epoch": 0.52, "grad_norm": 1.8050079345703125, "learning_rate": 4.939825635674798e-06, "loss": 0.6654, "step": 4049 }, { "epoch": 0.52, "grad_norm": 1.0395833253860474, "learning_rate": 4.93775076302056e-06, "loss": 0.6051, "step": 4050 }, { "epoch": 0.52, "grad_norm": 2.008521556854248, "learning_rate": 4.935675901087488e-06, "loss": 0.6138, "step": 4051 }, { "epoch": 0.52, "grad_norm": 2.4628641605377197, "learning_rate": 4.933601050232935e-06, "loss": 0.5635, "step": 4052 }, { "epoch": 0.52, "grad_norm": 1.2769087553024292, "learning_rate": 4.931526210814251e-06, "loss": 0.6141, "step": 4053 }, { "epoch": 0.52, "grad_norm": 1.3242411613464355, "learning_rate": 4.929451383188785e-06, "loss": 0.6098, "step": 4054 }, { "epoch": 0.52, "grad_norm": 1.0932570695877075, "learning_rate": 4.927376567713886e-06, "loss": 0.6458, "step": 4055 }, { "epoch": 0.52, "grad_norm": 1.3448100090026855, "learning_rate": 4.925301764746895e-06, "loss": 0.5961, "step": 4056 }, { "epoch": 0.52, "grad_norm": 1.2128664255142212, "learning_rate": 4.923226974645158e-06, "loss": 0.5711, "step": 4057 }, { "epoch": 0.52, "grad_norm": 1.6496801376342773, "learning_rate": 4.921152197766014e-06, "loss": 0.5831, "step": 4058 }, { "epoch": 0.52, "grad_norm": 1.2741731405258179, "learning_rate": 4.919077434466802e-06, "loss": 0.6547, "step": 4059 }, { "epoch": 0.52, "grad_norm": 1.3321490287780762, "learning_rate": 4.917002685104855e-06, "loss": 0.5447, "step": 4060 }, { "epoch": 0.52, "grad_norm": 1.141733169555664, "learning_rate": 4.914927950037511e-06, "loss": 0.5685, "step": 4061 }, { "epoch": 0.52, "grad_norm": 1.9314861297607422, "learning_rate": 4.912853229622096e-06, "loss": 0.5964, "step": 4062 }, { "epoch": 0.52, "grad_norm": 1.4189141988754272, "learning_rate": 4.910778524215941e-06, "loss": 0.586, "step": 4063 }, { "epoch": 0.52, "grad_norm": 1.2865058183670044, "learning_rate": 4.908703834176373e-06, "loss": 0.5715, "step": 4064 }, { "epoch": 0.52, "grad_norm": 1.4331640005111694, "learning_rate": 4.906629159860713e-06, "loss": 0.6381, "step": 4065 }, { "epoch": 0.52, "grad_norm": 1.0844879150390625, "learning_rate": 4.904554501626284e-06, "loss": 0.5739, "step": 4066 }, { "epoch": 0.52, "grad_norm": 1.2306838035583496, "learning_rate": 4.9024798598304006e-06, "loss": 0.7441, "step": 4067 }, { "epoch": 0.52, "grad_norm": 1.091158390045166, "learning_rate": 4.90040523483038e-06, "loss": 0.6085, "step": 4068 }, { "epoch": 0.52, "grad_norm": 1.3969615697860718, "learning_rate": 4.898330626983533e-06, "loss": 0.6604, "step": 4069 }, { "epoch": 0.52, "grad_norm": 1.0094318389892578, "learning_rate": 4.89625603664717e-06, "loss": 0.5522, "step": 4070 }, { "epoch": 0.52, "grad_norm": 1.2802209854125977, "learning_rate": 4.894181464178595e-06, "loss": 0.6044, "step": 4071 }, { "epoch": 0.52, "grad_norm": 1.6502206325531006, "learning_rate": 4.892106909935111e-06, "loss": 0.6496, "step": 4072 }, { "epoch": 0.52, "grad_norm": 1.3228180408477783, "learning_rate": 4.890032374274021e-06, "loss": 0.6288, "step": 4073 }, { "epoch": 0.52, "grad_norm": 1.3824211359024048, "learning_rate": 4.887957857552617e-06, "loss": 0.6468, "step": 4074 }, { "epoch": 0.52, "grad_norm": 0.9041395783424377, "learning_rate": 4.885883360128197e-06, "loss": 0.5782, "step": 4075 }, { "epoch": 0.52, "grad_norm": 1.6577094793319702, "learning_rate": 4.883808882358047e-06, "loss": 0.6067, "step": 4076 }, { "epoch": 0.52, "grad_norm": 1.2962143421173096, "learning_rate": 4.881734424599456e-06, "loss": 0.6659, "step": 4077 }, { "epoch": 0.52, "grad_norm": 1.3377918004989624, "learning_rate": 4.879659987209707e-06, "loss": 0.6366, "step": 4078 }, { "epoch": 0.52, "grad_norm": 2.1966018676757812, "learning_rate": 4.877585570546078e-06, "loss": 0.6419, "step": 4079 }, { "epoch": 0.52, "grad_norm": 1.3197449445724487, "learning_rate": 4.875511174965846e-06, "loss": 0.6256, "step": 4080 }, { "epoch": 0.52, "grad_norm": 1.4747912883758545, "learning_rate": 4.8734368008262835e-06, "loss": 0.6736, "step": 4081 }, { "epoch": 0.52, "grad_norm": 1.2974356412887573, "learning_rate": 4.871362448484662e-06, "loss": 0.5823, "step": 4082 }, { "epoch": 0.52, "grad_norm": 1.4338997602462769, "learning_rate": 4.869288118298242e-06, "loss": 0.5833, "step": 4083 }, { "epoch": 0.52, "grad_norm": 1.3909071683883667, "learning_rate": 4.867213810624288e-06, "loss": 0.5486, "step": 4084 }, { "epoch": 0.52, "grad_norm": 1.427019476890564, "learning_rate": 4.865139525820055e-06, "loss": 0.555, "step": 4085 }, { "epoch": 0.52, "grad_norm": 1.493001937866211, "learning_rate": 4.8630652642428e-06, "loss": 0.6544, "step": 4086 }, { "epoch": 0.52, "grad_norm": 1.26246178150177, "learning_rate": 4.860991026249768e-06, "loss": 0.6591, "step": 4087 }, { "epoch": 0.52, "grad_norm": 1.4320074319839478, "learning_rate": 4.858916812198206e-06, "loss": 0.6442, "step": 4088 }, { "epoch": 0.52, "grad_norm": 1.3701486587524414, "learning_rate": 4.856842622445356e-06, "loss": 0.5566, "step": 4089 }, { "epoch": 0.52, "grad_norm": 1.2770544290542603, "learning_rate": 4.854768457348456e-06, "loss": 0.6557, "step": 4090 }, { "epoch": 0.52, "grad_norm": 1.5645573139190674, "learning_rate": 4.852694317264735e-06, "loss": 0.6767, "step": 4091 }, { "epoch": 0.52, "grad_norm": 1.349003791809082, "learning_rate": 4.850620202551425e-06, "loss": 0.6173, "step": 4092 }, { "epoch": 0.52, "grad_norm": 1.3395682573318481, "learning_rate": 4.848546113565748e-06, "loss": 0.5781, "step": 4093 }, { "epoch": 0.52, "grad_norm": 1.317745566368103, "learning_rate": 4.846472050664925e-06, "loss": 0.6592, "step": 4094 }, { "epoch": 0.52, "grad_norm": 4.009605884552002, "learning_rate": 4.84439801420617e-06, "loss": 0.5211, "step": 4095 }, { "epoch": 0.52, "grad_norm": 1.3194994926452637, "learning_rate": 4.842324004546696e-06, "loss": 0.6196, "step": 4096 }, { "epoch": 0.52, "grad_norm": 1.368104338645935, "learning_rate": 4.8402500220437054e-06, "loss": 0.6548, "step": 4097 }, { "epoch": 0.53, "grad_norm": 1.1761075258255005, "learning_rate": 4.838176067054401e-06, "loss": 0.6217, "step": 4098 }, { "epoch": 0.53, "grad_norm": 1.5087071657180786, "learning_rate": 4.836102139935982e-06, "loss": 0.5141, "step": 4099 }, { "epoch": 0.53, "grad_norm": 1.0981336832046509, "learning_rate": 4.8340282410456365e-06, "loss": 0.5627, "step": 4100 }, { "epoch": 0.53, "grad_norm": 1.1873528957366943, "learning_rate": 4.831954370740554e-06, "loss": 0.6115, "step": 4101 }, { "epoch": 0.53, "grad_norm": 1.1071884632110596, "learning_rate": 4.829880529377915e-06, "loss": 0.5372, "step": 4102 }, { "epoch": 0.53, "grad_norm": 1.2068363428115845, "learning_rate": 4.8278067173148975e-06, "loss": 0.6624, "step": 4103 }, { "epoch": 0.53, "grad_norm": 1.3392771482467651, "learning_rate": 4.825732934908672e-06, "loss": 0.6454, "step": 4104 }, { "epoch": 0.53, "grad_norm": 3.0413925647735596, "learning_rate": 4.8236591825164085e-06, "loss": 0.6027, "step": 4105 }, { "epoch": 0.53, "grad_norm": 1.4464915990829468, "learning_rate": 4.821585460495264e-06, "loss": 0.6546, "step": 4106 }, { "epoch": 0.53, "grad_norm": 1.405840277671814, "learning_rate": 4.819511769202399e-06, "loss": 0.5513, "step": 4107 }, { "epoch": 0.53, "grad_norm": 1.2467281818389893, "learning_rate": 4.817438108994963e-06, "loss": 0.5584, "step": 4108 }, { "epoch": 0.53, "grad_norm": 1.293278455734253, "learning_rate": 4.815364480230103e-06, "loss": 0.6054, "step": 4109 }, { "epoch": 0.53, "grad_norm": 1.4756916761398315, "learning_rate": 4.813290883264956e-06, "loss": 0.6539, "step": 4110 }, { "epoch": 0.53, "grad_norm": 1.106099009513855, "learning_rate": 4.811217318456661e-06, "loss": 0.7242, "step": 4111 }, { "epoch": 0.53, "grad_norm": 1.1641430854797363, "learning_rate": 4.809143786162345e-06, "loss": 0.605, "step": 4112 }, { "epoch": 0.53, "grad_norm": 1.113264799118042, "learning_rate": 4.807070286739134e-06, "loss": 0.753, "step": 4113 }, { "epoch": 0.53, "grad_norm": 1.1319680213928223, "learning_rate": 4.804996820544144e-06, "loss": 0.7126, "step": 4114 }, { "epoch": 0.53, "grad_norm": 1.2857635021209717, "learning_rate": 4.8029233879344845e-06, "loss": 0.6106, "step": 4115 }, { "epoch": 0.53, "grad_norm": 1.3838752508163452, "learning_rate": 4.800849989267269e-06, "loss": 0.5595, "step": 4116 }, { "epoch": 0.53, "grad_norm": 6.1764984130859375, "learning_rate": 4.798776624899595e-06, "loss": 0.5441, "step": 4117 }, { "epoch": 0.53, "grad_norm": 1.6189913749694824, "learning_rate": 4.796703295188557e-06, "loss": 0.6184, "step": 4118 }, { "epoch": 0.53, "grad_norm": 2.234065532684326, "learning_rate": 4.7946300004912454e-06, "loss": 0.671, "step": 4119 }, { "epoch": 0.53, "grad_norm": 1.2225993871688843, "learning_rate": 4.7925567411647405e-06, "loss": 0.6519, "step": 4120 }, { "epoch": 0.53, "grad_norm": 1.2532271146774292, "learning_rate": 4.790483517566122e-06, "loss": 0.5927, "step": 4121 }, { "epoch": 0.53, "grad_norm": 1.2144299745559692, "learning_rate": 4.788410330052457e-06, "loss": 0.724, "step": 4122 }, { "epoch": 0.53, "grad_norm": 1.2604823112487793, "learning_rate": 4.7863371789808135e-06, "loss": 0.5651, "step": 4123 }, { "epoch": 0.53, "grad_norm": 1.168423056602478, "learning_rate": 4.784264064708247e-06, "loss": 0.5987, "step": 4124 }, { "epoch": 0.53, "grad_norm": 1.171149730682373, "learning_rate": 4.782190987591811e-06, "loss": 0.6428, "step": 4125 }, { "epoch": 0.53, "grad_norm": 1.292298674583435, "learning_rate": 4.7801179479885495e-06, "loss": 0.6348, "step": 4126 }, { "epoch": 0.53, "grad_norm": 1.250784993171692, "learning_rate": 4.778044946255503e-06, "loss": 0.5781, "step": 4127 }, { "epoch": 0.53, "grad_norm": 1.4512914419174194, "learning_rate": 4.775971982749703e-06, "loss": 0.5359, "step": 4128 }, { "epoch": 0.53, "grad_norm": 1.359748363494873, "learning_rate": 4.773899057828176e-06, "loss": 0.5784, "step": 4129 }, { "epoch": 0.53, "grad_norm": 1.3749535083770752, "learning_rate": 4.771826171847939e-06, "loss": 0.6338, "step": 4130 }, { "epoch": 0.53, "grad_norm": 1.980396032333374, "learning_rate": 4.769753325166008e-06, "loss": 0.6662, "step": 4131 }, { "epoch": 0.53, "grad_norm": 1.5456019639968872, "learning_rate": 4.7676805181393835e-06, "loss": 0.6045, "step": 4132 }, { "epoch": 0.53, "grad_norm": 3.6582868099212646, "learning_rate": 4.76560775112507e-06, "loss": 0.5481, "step": 4133 }, { "epoch": 0.53, "grad_norm": 1.1082854270935059, "learning_rate": 4.763535024480057e-06, "loss": 0.6318, "step": 4134 }, { "epoch": 0.53, "grad_norm": 2.2785942554473877, "learning_rate": 4.761462338561329e-06, "loss": 0.611, "step": 4135 }, { "epoch": 0.53, "grad_norm": 1.0957773923873901, "learning_rate": 4.759389693725867e-06, "loss": 0.706, "step": 4136 }, { "epoch": 0.53, "grad_norm": 0.9537548422813416, "learning_rate": 4.757317090330638e-06, "loss": 0.5881, "step": 4137 }, { "epoch": 0.53, "grad_norm": 1.0327272415161133, "learning_rate": 4.755244528732608e-06, "loss": 0.5027, "step": 4138 }, { "epoch": 0.53, "grad_norm": 1.2874075174331665, "learning_rate": 4.753172009288732e-06, "loss": 0.6377, "step": 4139 }, { "epoch": 0.53, "grad_norm": 1.2741062641143799, "learning_rate": 4.751099532355962e-06, "loss": 0.6127, "step": 4140 }, { "epoch": 0.53, "grad_norm": 1.2608203887939453, "learning_rate": 4.749027098291237e-06, "loss": 0.583, "step": 4141 }, { "epoch": 0.53, "grad_norm": 1.517383337020874, "learning_rate": 4.7469547074514946e-06, "loss": 0.5961, "step": 4142 }, { "epoch": 0.53, "grad_norm": 1.231284499168396, "learning_rate": 4.7448823601936585e-06, "loss": 0.583, "step": 4143 }, { "epoch": 0.53, "grad_norm": 1.1564208269119263, "learning_rate": 4.742810056874652e-06, "loss": 0.6192, "step": 4144 }, { "epoch": 0.53, "grad_norm": 1.2633081674575806, "learning_rate": 4.740737797851385e-06, "loss": 0.5658, "step": 4145 }, { "epoch": 0.53, "grad_norm": 1.1155922412872314, "learning_rate": 4.7386655834807634e-06, "loss": 0.5894, "step": 4146 }, { "epoch": 0.53, "grad_norm": 1.3662333488464355, "learning_rate": 4.736593414119682e-06, "loss": 0.6027, "step": 4147 }, { "epoch": 0.53, "grad_norm": 1.2502559423446655, "learning_rate": 4.734521290125032e-06, "loss": 0.5672, "step": 4148 }, { "epoch": 0.53, "grad_norm": 1.3026888370513916, "learning_rate": 4.732449211853693e-06, "loss": 0.5624, "step": 4149 }, { "epoch": 0.53, "grad_norm": 1.1376659870147705, "learning_rate": 4.730377179662538e-06, "loss": 0.5888, "step": 4150 }, { "epoch": 0.53, "grad_norm": 1.2101504802703857, "learning_rate": 4.728305193908436e-06, "loss": 0.6549, "step": 4151 }, { "epoch": 0.53, "grad_norm": 1.048017144203186, "learning_rate": 4.72623325494824e-06, "loss": 0.6031, "step": 4152 }, { "epoch": 0.53, "grad_norm": 1.2536982297897339, "learning_rate": 4.7241613631388034e-06, "loss": 0.5735, "step": 4153 }, { "epoch": 0.53, "grad_norm": 1.2073613405227661, "learning_rate": 4.722089518836964e-06, "loss": 0.6656, "step": 4154 }, { "epoch": 0.53, "grad_norm": 1.3333324193954468, "learning_rate": 4.720017722399557e-06, "loss": 0.6033, "step": 4155 }, { "epoch": 0.53, "grad_norm": 1.8105237483978271, "learning_rate": 4.717945974183405e-06, "loss": 0.4994, "step": 4156 }, { "epoch": 0.53, "grad_norm": 1.501945972442627, "learning_rate": 4.715874274545328e-06, "loss": 0.5396, "step": 4157 }, { "epoch": 0.53, "grad_norm": 1.8984172344207764, "learning_rate": 4.71380262384213e-06, "loss": 0.549, "step": 4158 }, { "epoch": 0.53, "grad_norm": 1.550038456916809, "learning_rate": 4.711731022430615e-06, "loss": 0.5852, "step": 4159 }, { "epoch": 0.53, "grad_norm": 1.7350627183914185, "learning_rate": 4.70965947066757e-06, "loss": 0.6284, "step": 4160 }, { "epoch": 0.53, "grad_norm": 1.5158087015151978, "learning_rate": 4.707587968909782e-06, "loss": 0.5684, "step": 4161 }, { "epoch": 0.53, "grad_norm": 1.1972614526748657, "learning_rate": 4.705516517514021e-06, "loss": 0.5638, "step": 4162 }, { "epoch": 0.53, "grad_norm": 1.1058855056762695, "learning_rate": 4.703445116837055e-06, "loss": 0.5002, "step": 4163 }, { "epoch": 0.53, "grad_norm": 1.3835372924804688, "learning_rate": 4.701373767235641e-06, "loss": 0.6697, "step": 4164 }, { "epoch": 0.53, "grad_norm": 1.174512267112732, "learning_rate": 4.699302469066524e-06, "loss": 0.6428, "step": 4165 }, { "epoch": 0.53, "grad_norm": 1.310792326927185, "learning_rate": 4.6972312226864445e-06, "loss": 0.652, "step": 4166 }, { "epoch": 0.53, "grad_norm": 1.3059464693069458, "learning_rate": 4.6951600284521324e-06, "loss": 0.5807, "step": 4167 }, { "epoch": 0.53, "grad_norm": 1.334140658378601, "learning_rate": 4.6930888867203115e-06, "loss": 0.6129, "step": 4168 }, { "epoch": 0.53, "grad_norm": 1.4384865760803223, "learning_rate": 4.691017797847692e-06, "loss": 0.6122, "step": 4169 }, { "epoch": 0.53, "grad_norm": 1.3629262447357178, "learning_rate": 4.688946762190975e-06, "loss": 0.6201, "step": 4170 }, { "epoch": 0.53, "grad_norm": 1.473082423210144, "learning_rate": 4.686875780106856e-06, "loss": 0.5797, "step": 4171 }, { "epoch": 0.53, "grad_norm": 1.335864782333374, "learning_rate": 4.68480485195202e-06, "loss": 0.6164, "step": 4172 }, { "epoch": 0.53, "grad_norm": 2.127086639404297, "learning_rate": 4.682733978083142e-06, "loss": 0.6167, "step": 4173 }, { "epoch": 0.53, "grad_norm": 1.2139432430267334, "learning_rate": 4.680663158856886e-06, "loss": 0.6562, "step": 4174 }, { "epoch": 0.53, "grad_norm": 1.4341139793395996, "learning_rate": 4.678592394629912e-06, "loss": 0.6285, "step": 4175 }, { "epoch": 0.54, "grad_norm": 1.3252298831939697, "learning_rate": 4.676521685758863e-06, "loss": 0.6012, "step": 4176 }, { "epoch": 0.54, "grad_norm": 1.3366296291351318, "learning_rate": 4.6744510326003805e-06, "loss": 0.6053, "step": 4177 }, { "epoch": 0.54, "grad_norm": 1.302324652671814, "learning_rate": 4.672380435511089e-06, "loss": 0.6217, "step": 4178 }, { "epoch": 0.54, "grad_norm": 1.673362374305725, "learning_rate": 4.67030989484761e-06, "loss": 0.6395, "step": 4179 }, { "epoch": 0.54, "grad_norm": 1.6342353820800781, "learning_rate": 4.668239410966549e-06, "loss": 0.5935, "step": 4180 }, { "epoch": 0.54, "grad_norm": 1.6448659896850586, "learning_rate": 4.666168984224508e-06, "loss": 0.555, "step": 4181 }, { "epoch": 0.54, "grad_norm": 1.3737086057662964, "learning_rate": 4.664098614978073e-06, "loss": 0.5609, "step": 4182 }, { "epoch": 0.54, "grad_norm": 1.202628254890442, "learning_rate": 4.662028303583823e-06, "loss": 0.6397, "step": 4183 }, { "epoch": 0.54, "grad_norm": 1.1492021083831787, "learning_rate": 4.6599580503983295e-06, "loss": 0.6714, "step": 4184 }, { "epoch": 0.54, "grad_norm": 1.1646534204483032, "learning_rate": 4.657887855778149e-06, "loss": 0.6116, "step": 4185 }, { "epoch": 0.54, "grad_norm": 1.2870683670043945, "learning_rate": 4.655817720079834e-06, "loss": 0.5639, "step": 4186 }, { "epoch": 0.54, "grad_norm": 2.9178974628448486, "learning_rate": 4.6537476436599184e-06, "loss": 0.6177, "step": 4187 }, { "epoch": 0.54, "grad_norm": 1.0415689945220947, "learning_rate": 4.651677626874936e-06, "loss": 0.5541, "step": 4188 }, { "epoch": 0.54, "grad_norm": 1.0987578630447388, "learning_rate": 4.6496076700814e-06, "loss": 0.6257, "step": 4189 }, { "epoch": 0.54, "grad_norm": 1.4872864484786987, "learning_rate": 4.647537773635823e-06, "loss": 0.6029, "step": 4190 }, { "epoch": 0.54, "grad_norm": 1.2052521705627441, "learning_rate": 4.645467937894699e-06, "loss": 0.6159, "step": 4191 }, { "epoch": 0.54, "grad_norm": 1.0160893201828003, "learning_rate": 4.643398163214517e-06, "loss": 0.6487, "step": 4192 }, { "epoch": 0.54, "grad_norm": 1.498758316040039, "learning_rate": 4.641328449951753e-06, "loss": 0.5912, "step": 4193 }, { "epoch": 0.54, "grad_norm": 1.1858739852905273, "learning_rate": 4.6392587984628735e-06, "loss": 0.7092, "step": 4194 }, { "epoch": 0.54, "grad_norm": 1.1768958568572998, "learning_rate": 4.637189209104333e-06, "loss": 0.5921, "step": 4195 }, { "epoch": 0.54, "grad_norm": 1.3898215293884277, "learning_rate": 4.635119682232577e-06, "loss": 0.6398, "step": 4196 }, { "epoch": 0.54, "grad_norm": 1.3137660026550293, "learning_rate": 4.63305021820404e-06, "loss": 0.6571, "step": 4197 }, { "epoch": 0.54, "grad_norm": 1.1577122211456299, "learning_rate": 4.6309808173751445e-06, "loss": 0.7469, "step": 4198 }, { "epoch": 0.54, "grad_norm": 1.4007368087768555, "learning_rate": 4.628911480102301e-06, "loss": 0.6021, "step": 4199 }, { "epoch": 0.54, "grad_norm": 1.1606431007385254, "learning_rate": 4.626842206741912e-06, "loss": 0.6898, "step": 4200 }, { "epoch": 0.54, "grad_norm": 1.4867026805877686, "learning_rate": 4.62477299765037e-06, "loss": 0.6388, "step": 4201 }, { "epoch": 0.54, "grad_norm": 1.663015365600586, "learning_rate": 4.622703853184052e-06, "loss": 0.5711, "step": 4202 }, { "epoch": 0.54, "grad_norm": 1.1145719289779663, "learning_rate": 4.620634773699327e-06, "loss": 0.603, "step": 4203 }, { "epoch": 0.54, "grad_norm": 1.4312100410461426, "learning_rate": 4.61856575955255e-06, "loss": 0.6286, "step": 4204 }, { "epoch": 0.54, "grad_norm": 1.1849942207336426, "learning_rate": 4.6164968111000695e-06, "loss": 0.6667, "step": 4205 }, { "epoch": 0.54, "grad_norm": 1.0929937362670898, "learning_rate": 4.614427928698217e-06, "loss": 0.6663, "step": 4206 }, { "epoch": 0.54, "grad_norm": 0.972935140132904, "learning_rate": 4.612359112703318e-06, "loss": 0.5812, "step": 4207 }, { "epoch": 0.54, "grad_norm": 1.4837443828582764, "learning_rate": 4.610290363471681e-06, "loss": 0.6155, "step": 4208 }, { "epoch": 0.54, "grad_norm": 1.6648123264312744, "learning_rate": 4.608221681359609e-06, "loss": 0.5247, "step": 4209 }, { "epoch": 0.54, "grad_norm": 1.3424453735351562, "learning_rate": 4.606153066723389e-06, "loss": 0.6489, "step": 4210 }, { "epoch": 0.54, "grad_norm": 1.4238646030426025, "learning_rate": 4.604084519919298e-06, "loss": 0.5952, "step": 4211 }, { "epoch": 0.54, "grad_norm": 1.4087281227111816, "learning_rate": 4.602016041303601e-06, "loss": 0.6647, "step": 4212 }, { "epoch": 0.54, "grad_norm": 1.5249015092849731, "learning_rate": 4.599947631232552e-06, "loss": 0.6362, "step": 4213 }, { "epoch": 0.54, "grad_norm": 1.3257591724395752, "learning_rate": 4.597879290062393e-06, "loss": 0.5909, "step": 4214 }, { "epoch": 0.54, "grad_norm": 1.1432000398635864, "learning_rate": 4.595811018149351e-06, "loss": 0.5854, "step": 4215 }, { "epoch": 0.54, "grad_norm": 1.2500845193862915, "learning_rate": 4.5937428158496475e-06, "loss": 0.5857, "step": 4216 }, { "epoch": 0.54, "grad_norm": 1.0443065166473389, "learning_rate": 4.591674683519483e-06, "loss": 0.6195, "step": 4217 }, { "epoch": 0.54, "grad_norm": 1.2430527210235596, "learning_rate": 4.589606621515057e-06, "loss": 0.5721, "step": 4218 }, { "epoch": 0.54, "grad_norm": 1.3105758428573608, "learning_rate": 4.5875386301925495e-06, "loss": 0.4901, "step": 4219 }, { "epoch": 0.54, "grad_norm": 1.6490211486816406, "learning_rate": 4.5854707099081285e-06, "loss": 0.6874, "step": 4220 }, { "epoch": 0.54, "grad_norm": 1.3691102266311646, "learning_rate": 4.583402861017953e-06, "loss": 0.594, "step": 4221 }, { "epoch": 0.54, "grad_norm": 1.2332993745803833, "learning_rate": 4.5813350838781665e-06, "loss": 0.6166, "step": 4222 }, { "epoch": 0.54, "grad_norm": 1.3578057289123535, "learning_rate": 4.579267378844902e-06, "loss": 0.6297, "step": 4223 }, { "epoch": 0.54, "grad_norm": 1.7097007036209106, "learning_rate": 4.577199746274279e-06, "loss": 0.6119, "step": 4224 }, { "epoch": 0.54, "grad_norm": 1.160226583480835, "learning_rate": 4.575132186522408e-06, "loss": 0.5266, "step": 4225 }, { "epoch": 0.54, "grad_norm": 1.177698016166687, "learning_rate": 4.5730646999453805e-06, "loss": 0.6148, "step": 4226 }, { "epoch": 0.54, "grad_norm": 1.4060474634170532, "learning_rate": 4.570997286899282e-06, "loss": 0.5874, "step": 4227 }, { "epoch": 0.54, "grad_norm": 1.2073032855987549, "learning_rate": 4.56892994774018e-06, "loss": 0.5137, "step": 4228 }, { "epoch": 0.54, "grad_norm": 1.2490034103393555, "learning_rate": 4.566862682824133e-06, "loss": 0.58, "step": 4229 }, { "epoch": 0.54, "grad_norm": 1.3035485744476318, "learning_rate": 4.564795492507184e-06, "loss": 0.6719, "step": 4230 }, { "epoch": 0.54, "grad_norm": 1.219054102897644, "learning_rate": 4.562728377145367e-06, "loss": 0.6331, "step": 4231 }, { "epoch": 0.54, "grad_norm": 1.202539324760437, "learning_rate": 4.560661337094698e-06, "loss": 0.5764, "step": 4232 }, { "epoch": 0.54, "grad_norm": 1.1667770147323608, "learning_rate": 4.558594372711185e-06, "loss": 0.5885, "step": 4233 }, { "epoch": 0.54, "grad_norm": 1.0304358005523682, "learning_rate": 4.556527484350819e-06, "loss": 0.5146, "step": 4234 }, { "epoch": 0.54, "grad_norm": 1.0976370573043823, "learning_rate": 4.554460672369578e-06, "loss": 0.574, "step": 4235 }, { "epoch": 0.54, "grad_norm": 1.336747646331787, "learning_rate": 4.552393937123432e-06, "loss": 0.5512, "step": 4236 }, { "epoch": 0.54, "grad_norm": 1.2262814044952393, "learning_rate": 4.550327278968333e-06, "loss": 0.5616, "step": 4237 }, { "epoch": 0.54, "grad_norm": 1.2212550640106201, "learning_rate": 4.548260698260219e-06, "loss": 0.6184, "step": 4238 }, { "epoch": 0.54, "grad_norm": 1.4734658002853394, "learning_rate": 4.546194195355018e-06, "loss": 0.5742, "step": 4239 }, { "epoch": 0.54, "grad_norm": 1.4658452272415161, "learning_rate": 4.544127770608644e-06, "loss": 0.6211, "step": 4240 }, { "epoch": 0.54, "grad_norm": 1.2137492895126343, "learning_rate": 4.542061424376995e-06, "loss": 0.5261, "step": 4241 }, { "epoch": 0.54, "grad_norm": 1.2533836364746094, "learning_rate": 4.539995157015957e-06, "loss": 0.6744, "step": 4242 }, { "epoch": 0.54, "grad_norm": 1.436880111694336, "learning_rate": 4.537928968881404e-06, "loss": 0.6731, "step": 4243 }, { "epoch": 0.54, "grad_norm": 1.1234179735183716, "learning_rate": 4.535862860329195e-06, "loss": 0.5358, "step": 4244 }, { "epoch": 0.54, "grad_norm": 1.4436432123184204, "learning_rate": 4.533796831715172e-06, "loss": 0.6199, "step": 4245 }, { "epoch": 0.54, "grad_norm": 1.6011847257614136, "learning_rate": 4.531730883395171e-06, "loss": 0.5899, "step": 4246 }, { "epoch": 0.54, "grad_norm": 1.1307777166366577, "learning_rate": 4.529665015725006e-06, "loss": 0.6098, "step": 4247 }, { "epoch": 0.54, "grad_norm": 1.4225749969482422, "learning_rate": 4.527599229060483e-06, "loss": 0.6267, "step": 4248 }, { "epoch": 0.54, "grad_norm": 1.1333550214767456, "learning_rate": 4.5255335237573905e-06, "loss": 0.6978, "step": 4249 }, { "epoch": 0.54, "grad_norm": 1.0923420190811157, "learning_rate": 4.5234679001715055e-06, "loss": 0.6762, "step": 4250 }, { "epoch": 0.54, "grad_norm": 1.4398398399353027, "learning_rate": 4.521402358658587e-06, "loss": 0.684, "step": 4251 }, { "epoch": 0.54, "grad_norm": 1.3944870233535767, "learning_rate": 4.519336899574384e-06, "loss": 0.6649, "step": 4252 }, { "epoch": 0.54, "grad_norm": 1.357353925704956, "learning_rate": 4.517271523274632e-06, "loss": 0.5606, "step": 4253 }, { "epoch": 0.55, "grad_norm": 1.7038229703903198, "learning_rate": 4.515206230115047e-06, "loss": 0.6472, "step": 4254 }, { "epoch": 0.55, "grad_norm": 1.4553661346435547, "learning_rate": 4.5131410204513375e-06, "loss": 0.5576, "step": 4255 }, { "epoch": 0.55, "grad_norm": 1.0828001499176025, "learning_rate": 4.511075894639189e-06, "loss": 0.6354, "step": 4256 }, { "epoch": 0.55, "grad_norm": 1.3921860456466675, "learning_rate": 4.509010853034281e-06, "loss": 0.5799, "step": 4257 }, { "epoch": 0.55, "grad_norm": 1.2910650968551636, "learning_rate": 4.506945895992274e-06, "loss": 0.6111, "step": 4258 }, { "epoch": 0.55, "grad_norm": 1.2620890140533447, "learning_rate": 4.5048810238688145e-06, "loss": 0.7103, "step": 4259 }, { "epoch": 0.55, "grad_norm": 1.2186883687973022, "learning_rate": 4.502816237019534e-06, "loss": 0.6864, "step": 4260 }, { "epoch": 0.55, "grad_norm": 1.271498680114746, "learning_rate": 4.5007515358000525e-06, "loss": 0.5766, "step": 4261 }, { "epoch": 0.55, "grad_norm": 1.3539847135543823, "learning_rate": 4.498686920565972e-06, "loss": 0.6176, "step": 4262 }, { "epoch": 0.55, "grad_norm": 1.1289831399917603, "learning_rate": 4.496622391672878e-06, "loss": 0.5878, "step": 4263 }, { "epoch": 0.55, "grad_norm": 1.1196657419204712, "learning_rate": 4.494557949476347e-06, "loss": 0.5116, "step": 4264 }, { "epoch": 0.55, "grad_norm": 1.3079795837402344, "learning_rate": 4.492493594331934e-06, "loss": 0.524, "step": 4265 }, { "epoch": 0.55, "grad_norm": 1.2875686883926392, "learning_rate": 4.490429326595185e-06, "loss": 0.5328, "step": 4266 }, { "epoch": 0.55, "grad_norm": 1.1164149045944214, "learning_rate": 4.488365146621626e-06, "loss": 0.6377, "step": 4267 }, { "epoch": 0.55, "grad_norm": 1.228834867477417, "learning_rate": 4.486301054766773e-06, "loss": 0.5413, "step": 4268 }, { "epoch": 0.55, "grad_norm": 2.0576958656311035, "learning_rate": 4.484237051386119e-06, "loss": 0.6451, "step": 4269 }, { "epoch": 0.55, "grad_norm": 1.1337889432907104, "learning_rate": 4.482173136835152e-06, "loss": 0.68, "step": 4270 }, { "epoch": 0.55, "grad_norm": 1.1818243265151978, "learning_rate": 4.480109311469336e-06, "loss": 0.5929, "step": 4271 }, { "epoch": 0.55, "grad_norm": 2.773174524307251, "learning_rate": 4.4780455756441245e-06, "loss": 0.5298, "step": 4272 }, { "epoch": 0.55, "grad_norm": 1.5445469617843628, "learning_rate": 4.475981929714953e-06, "loss": 0.5569, "step": 4273 }, { "epoch": 0.55, "grad_norm": 1.3637791872024536, "learning_rate": 4.473918374037244e-06, "loss": 0.6169, "step": 4274 }, { "epoch": 0.55, "grad_norm": 1.1337825059890747, "learning_rate": 4.471854908966402e-06, "loss": 0.5918, "step": 4275 }, { "epoch": 0.55, "grad_norm": 1.2219849824905396, "learning_rate": 4.469791534857816e-06, "loss": 0.5439, "step": 4276 }, { "epoch": 0.55, "grad_norm": 2.176208734512329, "learning_rate": 4.467728252066862e-06, "loss": 0.6635, "step": 4277 }, { "epoch": 0.55, "grad_norm": 1.3340530395507812, "learning_rate": 4.465665060948897e-06, "loss": 0.5424, "step": 4278 }, { "epoch": 0.55, "grad_norm": 1.166491985321045, "learning_rate": 4.4636019618592655e-06, "loss": 0.665, "step": 4279 }, { "epoch": 0.55, "grad_norm": 1.9526617527008057, "learning_rate": 4.461538955153292e-06, "loss": 0.6259, "step": 4280 }, { "epoch": 0.55, "grad_norm": 1.2087116241455078, "learning_rate": 4.4594760411862905e-06, "loss": 0.7804, "step": 4281 }, { "epoch": 0.55, "grad_norm": 1.6400904655456543, "learning_rate": 4.457413220313553e-06, "loss": 0.6266, "step": 4282 }, { "epoch": 0.55, "grad_norm": 1.6623104810714722, "learning_rate": 4.455350492890361e-06, "loss": 0.6399, "step": 4283 }, { "epoch": 0.55, "grad_norm": 1.3035902976989746, "learning_rate": 4.453287859271975e-06, "loss": 0.5671, "step": 4284 }, { "epoch": 0.55, "grad_norm": 1.1311049461364746, "learning_rate": 4.451225319813644e-06, "loss": 0.5505, "step": 4285 }, { "epoch": 0.55, "grad_norm": 1.258715033531189, "learning_rate": 4.449162874870595e-06, "loss": 0.62, "step": 4286 }, { "epoch": 0.55, "grad_norm": 1.3356287479400635, "learning_rate": 4.4471005247980464e-06, "loss": 0.6583, "step": 4287 }, { "epoch": 0.55, "grad_norm": 1.2244027853012085, "learning_rate": 4.445038269951195e-06, "loss": 0.5537, "step": 4288 }, { "epoch": 0.55, "grad_norm": 1.3109939098358154, "learning_rate": 4.4429761106852204e-06, "loss": 0.5879, "step": 4289 }, { "epoch": 0.55, "grad_norm": 1.1747775077819824, "learning_rate": 4.44091404735529e-06, "loss": 0.5629, "step": 4290 }, { "epoch": 0.55, "grad_norm": 1.4858828783035278, "learning_rate": 4.4388520803165495e-06, "loss": 0.6296, "step": 4291 }, { "epoch": 0.55, "grad_norm": 2.4959919452667236, "learning_rate": 4.436790209924134e-06, "loss": 0.6197, "step": 4292 }, { "epoch": 0.55, "grad_norm": 1.1516481637954712, "learning_rate": 4.434728436533156e-06, "loss": 0.7115, "step": 4293 }, { "epoch": 0.55, "grad_norm": 1.2591958045959473, "learning_rate": 4.4326667604987165e-06, "loss": 0.571, "step": 4294 }, { "epoch": 0.55, "grad_norm": 1.4871764183044434, "learning_rate": 4.430605182175895e-06, "loss": 0.5495, "step": 4295 }, { "epoch": 0.55, "grad_norm": 1.2093896865844727, "learning_rate": 4.428543701919758e-06, "loss": 0.5496, "step": 4296 }, { "epoch": 0.55, "grad_norm": 1.2225619554519653, "learning_rate": 4.426482320085352e-06, "loss": 0.5976, "step": 4297 }, { "epoch": 0.55, "grad_norm": 1.4069699048995972, "learning_rate": 4.424421037027711e-06, "loss": 0.5725, "step": 4298 }, { "epoch": 0.55, "grad_norm": 1.7598546743392944, "learning_rate": 4.422359853101846e-06, "loss": 0.5402, "step": 4299 }, { "epoch": 0.55, "grad_norm": 1.1938350200653076, "learning_rate": 4.420298768662756e-06, "loss": 0.5808, "step": 4300 }, { "epoch": 0.55, "grad_norm": 1.3357658386230469, "learning_rate": 4.418237784065419e-06, "loss": 0.6606, "step": 4301 }, { "epoch": 0.55, "grad_norm": 1.4759631156921387, "learning_rate": 4.416176899664801e-06, "loss": 0.6102, "step": 4302 }, { "epoch": 0.55, "grad_norm": 1.2692142724990845, "learning_rate": 4.4141161158158426e-06, "loss": 0.6524, "step": 4303 }, { "epoch": 0.55, "grad_norm": 1.1206196546554565, "learning_rate": 4.412055432873475e-06, "loss": 0.6809, "step": 4304 }, { "epoch": 0.55, "grad_norm": 1.9297462701797485, "learning_rate": 4.409994851192611e-06, "loss": 0.5622, "step": 4305 }, { "epoch": 0.55, "grad_norm": 1.0943596363067627, "learning_rate": 4.40793437112814e-06, "loss": 0.5449, "step": 4306 }, { "epoch": 0.55, "grad_norm": 1.434758186340332, "learning_rate": 4.4058739930349406e-06, "loss": 0.6168, "step": 4307 }, { "epoch": 0.55, "grad_norm": 1.8183618783950806, "learning_rate": 4.403813717267869e-06, "loss": 0.5821, "step": 4308 }, { "epoch": 0.55, "grad_norm": 1.0242024660110474, "learning_rate": 4.401753544181767e-06, "loss": 0.6293, "step": 4309 }, { "epoch": 0.55, "grad_norm": 1.1580708026885986, "learning_rate": 4.399693474131456e-06, "loss": 0.6038, "step": 4310 }, { "epoch": 0.55, "grad_norm": 1.2843397855758667, "learning_rate": 4.3976335074717446e-06, "loss": 0.6346, "step": 4311 }, { "epoch": 0.55, "grad_norm": 1.1573116779327393, "learning_rate": 4.3955736445574176e-06, "loss": 0.6021, "step": 4312 }, { "epoch": 0.55, "grad_norm": 1.339087963104248, "learning_rate": 4.393513885743243e-06, "loss": 0.5863, "step": 4313 }, { "epoch": 0.55, "grad_norm": 1.3947805166244507, "learning_rate": 4.391454231383976e-06, "loss": 0.649, "step": 4314 }, { "epoch": 0.55, "grad_norm": 1.3123061656951904, "learning_rate": 4.389394681834348e-06, "loss": 0.6246, "step": 4315 }, { "epoch": 0.55, "grad_norm": 1.151753306388855, "learning_rate": 4.387335237449076e-06, "loss": 0.709, "step": 4316 }, { "epoch": 0.55, "grad_norm": 1.03976571559906, "learning_rate": 4.385275898582855e-06, "loss": 0.549, "step": 4317 }, { "epoch": 0.55, "grad_norm": 1.5990352630615234, "learning_rate": 4.383216665590366e-06, "loss": 0.7688, "step": 4318 }, { "epoch": 0.55, "grad_norm": 1.2555058002471924, "learning_rate": 4.381157538826269e-06, "loss": 0.6593, "step": 4319 }, { "epoch": 0.55, "grad_norm": 1.5851774215698242, "learning_rate": 4.379098518645207e-06, "loss": 0.6126, "step": 4320 }, { "epoch": 0.55, "grad_norm": 1.2325047254562378, "learning_rate": 4.377039605401807e-06, "loss": 0.6097, "step": 4321 }, { "epoch": 0.55, "grad_norm": 1.3471224308013916, "learning_rate": 4.374980799450672e-06, "loss": 0.5994, "step": 4322 }, { "epoch": 0.55, "grad_norm": 2.4533209800720215, "learning_rate": 4.372922101146391e-06, "loss": 0.5687, "step": 4323 }, { "epoch": 0.55, "grad_norm": 1.748429536819458, "learning_rate": 4.370863510843531e-06, "loss": 0.6614, "step": 4324 }, { "epoch": 0.55, "grad_norm": 1.2647942304611206, "learning_rate": 4.368805028896645e-06, "loss": 0.6273, "step": 4325 }, { "epoch": 0.55, "grad_norm": 1.4914000034332275, "learning_rate": 4.366746655660262e-06, "loss": 0.7031, "step": 4326 }, { "epoch": 0.55, "grad_norm": 1.345897912979126, "learning_rate": 4.364688391488897e-06, "loss": 0.5652, "step": 4327 }, { "epoch": 0.55, "grad_norm": 1.1223891973495483, "learning_rate": 4.362630236737043e-06, "loss": 0.6655, "step": 4328 }, { "epoch": 0.55, "grad_norm": 1.1209384202957153, "learning_rate": 4.360572191759176e-06, "loss": 0.6175, "step": 4329 }, { "epoch": 0.55, "grad_norm": 1.4959664344787598, "learning_rate": 4.358514256909751e-06, "loss": 0.6079, "step": 4330 }, { "epoch": 0.55, "grad_norm": 1.6316180229187012, "learning_rate": 4.356456432543208e-06, "loss": 0.6025, "step": 4331 }, { "epoch": 0.55, "grad_norm": 1.7658164501190186, "learning_rate": 4.354398719013964e-06, "loss": 0.5352, "step": 4332 }, { "epoch": 0.56, "grad_norm": 1.239953875541687, "learning_rate": 4.352341116676418e-06, "loss": 0.6356, "step": 4333 }, { "epoch": 0.56, "grad_norm": 1.2205899953842163, "learning_rate": 4.350283625884949e-06, "loss": 0.7296, "step": 4334 }, { "epoch": 0.56, "grad_norm": 1.6176862716674805, "learning_rate": 4.348226246993922e-06, "loss": 0.6232, "step": 4335 }, { "epoch": 0.56, "grad_norm": 1.2962661981582642, "learning_rate": 4.346168980357674e-06, "loss": 0.6093, "step": 4336 }, { "epoch": 0.56, "grad_norm": 1.1365058422088623, "learning_rate": 4.344111826330529e-06, "loss": 0.6139, "step": 4337 }, { "epoch": 0.56, "grad_norm": 1.4743257761001587, "learning_rate": 4.342054785266792e-06, "loss": 0.6462, "step": 4338 }, { "epoch": 0.56, "grad_norm": 1.2239124774932861, "learning_rate": 4.339997857520745e-06, "loss": 0.58, "step": 4339 }, { "epoch": 0.56, "grad_norm": 1.2178657054901123, "learning_rate": 4.337941043446653e-06, "loss": 0.6802, "step": 4340 }, { "epoch": 0.56, "grad_norm": 0.9617922902107239, "learning_rate": 4.335884343398757e-06, "loss": 0.6315, "step": 4341 }, { "epoch": 0.56, "grad_norm": 1.1992239952087402, "learning_rate": 4.333827757731286e-06, "loss": 0.6106, "step": 4342 }, { "epoch": 0.56, "grad_norm": 1.2823426723480225, "learning_rate": 4.331771286798442e-06, "loss": 0.7401, "step": 4343 }, { "epoch": 0.56, "grad_norm": 1.0556762218475342, "learning_rate": 4.329714930954414e-06, "loss": 0.5582, "step": 4344 }, { "epoch": 0.56, "grad_norm": 1.2360563278198242, "learning_rate": 4.327658690553362e-06, "loss": 0.5358, "step": 4345 }, { "epoch": 0.56, "grad_norm": 1.4432363510131836, "learning_rate": 4.325602565949437e-06, "loss": 0.6498, "step": 4346 }, { "epoch": 0.56, "grad_norm": 1.0684760808944702, "learning_rate": 4.3235465574967615e-06, "loss": 0.5381, "step": 4347 }, { "epoch": 0.56, "grad_norm": 1.2283833026885986, "learning_rate": 4.321490665549442e-06, "loss": 0.525, "step": 4348 }, { "epoch": 0.56, "grad_norm": 1.2738137245178223, "learning_rate": 4.319434890461563e-06, "loss": 0.6444, "step": 4349 }, { "epoch": 0.56, "grad_norm": 1.3298536539077759, "learning_rate": 4.317379232587194e-06, "loss": 0.6234, "step": 4350 }, { "epoch": 0.56, "grad_norm": 1.158233642578125, "learning_rate": 4.315323692280375e-06, "loss": 0.5949, "step": 4351 }, { "epoch": 0.56, "grad_norm": 3.0666298866271973, "learning_rate": 4.313268269895134e-06, "loss": 0.5992, "step": 4352 }, { "epoch": 0.56, "grad_norm": 1.5501093864440918, "learning_rate": 4.3112129657854755e-06, "loss": 0.5746, "step": 4353 }, { "epoch": 0.56, "grad_norm": 1.7205291986465454, "learning_rate": 4.3091577803053816e-06, "loss": 0.6069, "step": 4354 }, { "epoch": 0.56, "grad_norm": 1.564469337463379, "learning_rate": 4.3071027138088206e-06, "loss": 0.5885, "step": 4355 }, { "epoch": 0.56, "grad_norm": 1.302836298942566, "learning_rate": 4.305047766649733e-06, "loss": 0.6223, "step": 4356 }, { "epoch": 0.56, "grad_norm": 1.3741014003753662, "learning_rate": 4.302992939182042e-06, "loss": 0.5507, "step": 4357 }, { "epoch": 0.56, "grad_norm": 1.358909249305725, "learning_rate": 4.30093823175965e-06, "loss": 0.6304, "step": 4358 }, { "epoch": 0.56, "grad_norm": 1.1862109899520874, "learning_rate": 4.298883644736438e-06, "loss": 0.7494, "step": 4359 }, { "epoch": 0.56, "grad_norm": 1.1762068271636963, "learning_rate": 4.296829178466268e-06, "loss": 0.6076, "step": 4360 }, { "epoch": 0.56, "grad_norm": 1.2821547985076904, "learning_rate": 4.294774833302981e-06, "loss": 0.627, "step": 4361 }, { "epoch": 0.56, "grad_norm": 1.2723315954208374, "learning_rate": 4.292720609600393e-06, "loss": 0.6285, "step": 4362 }, { "epoch": 0.56, "grad_norm": 1.2723183631896973, "learning_rate": 4.290666507712304e-06, "loss": 0.6313, "step": 4363 }, { "epoch": 0.56, "grad_norm": 1.1899631023406982, "learning_rate": 4.288612527992492e-06, "loss": 0.6145, "step": 4364 }, { "epoch": 0.56, "grad_norm": 1.3209925889968872, "learning_rate": 4.286558670794712e-06, "loss": 0.5391, "step": 4365 }, { "epoch": 0.56, "grad_norm": 1.0585249662399292, "learning_rate": 4.284504936472701e-06, "loss": 0.5716, "step": 4366 }, { "epoch": 0.56, "grad_norm": 1.8191659450531006, "learning_rate": 4.28245132538017e-06, "loss": 0.5629, "step": 4367 }, { "epoch": 0.56, "grad_norm": 1.2652943134307861, "learning_rate": 4.2803978378708145e-06, "loss": 0.6255, "step": 4368 }, { "epoch": 0.56, "grad_norm": 1.146340250968933, "learning_rate": 4.278344474298304e-06, "loss": 0.726, "step": 4369 }, { "epoch": 0.56, "grad_norm": 1.114557147026062, "learning_rate": 4.276291235016291e-06, "loss": 0.6706, "step": 4370 }, { "epoch": 0.56, "grad_norm": 1.4655970335006714, "learning_rate": 4.274238120378401e-06, "loss": 0.5154, "step": 4371 }, { "epoch": 0.56, "grad_norm": 1.245018720626831, "learning_rate": 4.272185130738243e-06, "loss": 0.5518, "step": 4372 }, { "epoch": 0.56, "grad_norm": 1.4143431186676025, "learning_rate": 4.270132266449404e-06, "loss": 0.589, "step": 4373 }, { "epoch": 0.56, "grad_norm": 1.2347571849822998, "learning_rate": 4.268079527865447e-06, "loss": 0.712, "step": 4374 }, { "epoch": 0.56, "grad_norm": 1.2976598739624023, "learning_rate": 4.266026915339915e-06, "loss": 0.5632, "step": 4375 }, { "epoch": 0.56, "grad_norm": 1.4560409784317017, "learning_rate": 4.263974429226327e-06, "loss": 0.6189, "step": 4376 }, { "epoch": 0.56, "grad_norm": 1.3681960105895996, "learning_rate": 4.261922069878185e-06, "loss": 0.4996, "step": 4377 }, { "epoch": 0.56, "grad_norm": 2.385707139968872, "learning_rate": 4.259869837648963e-06, "loss": 0.615, "step": 4378 }, { "epoch": 0.56, "grad_norm": 2.20070743560791, "learning_rate": 4.2578177328921185e-06, "loss": 0.5422, "step": 4379 }, { "epoch": 0.56, "grad_norm": 1.4489797353744507, "learning_rate": 4.255765755961083e-06, "loss": 0.6436, "step": 4380 }, { "epoch": 0.56, "grad_norm": 1.207725167274475, "learning_rate": 4.253713907209271e-06, "loss": 0.5272, "step": 4381 }, { "epoch": 0.56, "grad_norm": 1.0956413745880127, "learning_rate": 4.251662186990067e-06, "loss": 0.5381, "step": 4382 }, { "epoch": 0.56, "grad_norm": 1.241393804550171, "learning_rate": 4.249610595656843e-06, "loss": 0.6241, "step": 4383 }, { "epoch": 0.56, "grad_norm": 1.1352612972259521, "learning_rate": 4.24755913356294e-06, "loss": 0.6223, "step": 4384 }, { "epoch": 0.56, "grad_norm": 1.1019214391708374, "learning_rate": 4.245507801061684e-06, "loss": 0.7584, "step": 4385 }, { "epoch": 0.56, "grad_norm": 1.3050336837768555, "learning_rate": 4.243456598506373e-06, "loss": 0.6316, "step": 4386 }, { "epoch": 0.56, "grad_norm": 1.0004106760025024, "learning_rate": 4.241405526250285e-06, "loss": 0.5286, "step": 4387 }, { "epoch": 0.56, "grad_norm": 1.244195818901062, "learning_rate": 4.239354584646677e-06, "loss": 0.5851, "step": 4388 }, { "epoch": 0.56, "grad_norm": 1.2075914144515991, "learning_rate": 4.2373037740487785e-06, "loss": 0.5609, "step": 4389 }, { "epoch": 0.56, "grad_norm": 1.2431285381317139, "learning_rate": 4.235253094809804e-06, "loss": 0.5163, "step": 4390 }, { "epoch": 0.56, "grad_norm": 1.424965739250183, "learning_rate": 4.233202547282941e-06, "loss": 0.5798, "step": 4391 }, { "epoch": 0.56, "grad_norm": 1.2569266557693481, "learning_rate": 4.231152131821353e-06, "loss": 0.6726, "step": 4392 }, { "epoch": 0.56, "grad_norm": 1.3034515380859375, "learning_rate": 4.2291018487781825e-06, "loss": 0.6889, "step": 4393 }, { "epoch": 0.56, "grad_norm": 1.1181299686431885, "learning_rate": 4.227051698506551e-06, "loss": 0.622, "step": 4394 }, { "epoch": 0.56, "grad_norm": 1.356863260269165, "learning_rate": 4.225001681359552e-06, "loss": 0.6016, "step": 4395 }, { "epoch": 0.56, "grad_norm": 1.473215937614441, "learning_rate": 4.222951797690262e-06, "loss": 0.603, "step": 4396 }, { "epoch": 0.56, "grad_norm": 1.1657586097717285, "learning_rate": 4.220902047851729e-06, "loss": 0.6301, "step": 4397 }, { "epoch": 0.56, "grad_norm": 1.2193200588226318, "learning_rate": 4.218852432196984e-06, "loss": 0.5981, "step": 4398 }, { "epoch": 0.56, "grad_norm": 2.201629400253296, "learning_rate": 4.21680295107903e-06, "loss": 0.5681, "step": 4399 }, { "epoch": 0.56, "grad_norm": 1.4651731252670288, "learning_rate": 4.2147536048508485e-06, "loss": 0.5787, "step": 4400 }, { "epoch": 0.56, "grad_norm": 0.9583895206451416, "learning_rate": 4.212704393865398e-06, "loss": 0.521, "step": 4401 }, { "epoch": 0.56, "grad_norm": 1.2025455236434937, "learning_rate": 4.210655318475613e-06, "loss": 0.6295, "step": 4402 }, { "epoch": 0.56, "grad_norm": 1.3904603719711304, "learning_rate": 4.208606379034405e-06, "loss": 0.584, "step": 4403 }, { "epoch": 0.56, "grad_norm": 1.3118081092834473, "learning_rate": 4.206557575894664e-06, "loss": 0.6188, "step": 4404 }, { "epoch": 0.56, "grad_norm": 1.2359284162521362, "learning_rate": 4.204508909409253e-06, "loss": 0.5612, "step": 4405 }, { "epoch": 0.56, "grad_norm": 1.6139267683029175, "learning_rate": 4.202460379931009e-06, "loss": 0.5957, "step": 4406 }, { "epoch": 0.56, "grad_norm": 1.1521854400634766, "learning_rate": 4.200411987812758e-06, "loss": 0.613, "step": 4407 }, { "epoch": 0.56, "grad_norm": 1.0886516571044922, "learning_rate": 4.198363733407289e-06, "loss": 0.6028, "step": 4408 }, { "epoch": 0.56, "grad_norm": 1.284321904182434, "learning_rate": 4.196315617067374e-06, "loss": 0.5777, "step": 4409 }, { "epoch": 0.56, "grad_norm": 1.2444514036178589, "learning_rate": 4.194267639145758e-06, "loss": 0.5497, "step": 4410 }, { "epoch": 0.57, "grad_norm": 1.4131932258605957, "learning_rate": 4.192219799995164e-06, "loss": 0.6017, "step": 4411 }, { "epoch": 0.57, "grad_norm": 2.238851308822632, "learning_rate": 4.190172099968291e-06, "loss": 0.5908, "step": 4412 }, { "epoch": 0.57, "grad_norm": 1.365478754043579, "learning_rate": 4.1881245394178125e-06, "loss": 0.5623, "step": 4413 }, { "epoch": 0.57, "grad_norm": 1.6913090944290161, "learning_rate": 4.186077118696381e-06, "loss": 0.6189, "step": 4414 }, { "epoch": 0.57, "grad_norm": 1.0305039882659912, "learning_rate": 4.184029838156622e-06, "loss": 0.6424, "step": 4415 }, { "epoch": 0.57, "grad_norm": 0.9532804489135742, "learning_rate": 4.181982698151138e-06, "loss": 0.6773, "step": 4416 }, { "epoch": 0.57, "grad_norm": 1.6363182067871094, "learning_rate": 4.179935699032507e-06, "loss": 0.5869, "step": 4417 }, { "epoch": 0.57, "grad_norm": 1.2440401315689087, "learning_rate": 4.177888841153285e-06, "loss": 0.6201, "step": 4418 }, { "epoch": 0.57, "grad_norm": 1.4306551218032837, "learning_rate": 4.175842124865998e-06, "loss": 0.6048, "step": 4419 }, { "epoch": 0.57, "grad_norm": 1.1270860433578491, "learning_rate": 4.1737955505231546e-06, "loss": 0.6119, "step": 4420 }, { "epoch": 0.57, "grad_norm": 1.3283473253250122, "learning_rate": 4.171749118477234e-06, "loss": 0.549, "step": 4421 }, { "epoch": 0.57, "grad_norm": 1.4524391889572144, "learning_rate": 4.1697028290806935e-06, "loss": 0.5849, "step": 4422 }, { "epoch": 0.57, "grad_norm": 1.351607084274292, "learning_rate": 4.167656682685962e-06, "loss": 0.6161, "step": 4423 }, { "epoch": 0.57, "grad_norm": 1.2113388776779175, "learning_rate": 4.165610679645451e-06, "loss": 0.5219, "step": 4424 }, { "epoch": 0.57, "grad_norm": 2.863964557647705, "learning_rate": 4.1635648203115405e-06, "loss": 0.5498, "step": 4425 }, { "epoch": 0.57, "grad_norm": 1.9458630084991455, "learning_rate": 4.161519105036588e-06, "loss": 0.4855, "step": 4426 }, { "epoch": 0.57, "grad_norm": 1.1281429529190063, "learning_rate": 4.159473534172927e-06, "loss": 0.5676, "step": 4427 }, { "epoch": 0.57, "grad_norm": 1.1383236646652222, "learning_rate": 4.157428108072866e-06, "loss": 0.6055, "step": 4428 }, { "epoch": 0.57, "grad_norm": 1.510728120803833, "learning_rate": 4.155382827088688e-06, "loss": 0.5496, "step": 4429 }, { "epoch": 0.57, "grad_norm": 1.6128290891647339, "learning_rate": 4.1533376915726495e-06, "loss": 0.5808, "step": 4430 }, { "epoch": 0.57, "grad_norm": 2.0385468006134033, "learning_rate": 4.151292701876986e-06, "loss": 0.6876, "step": 4431 }, { "epoch": 0.57, "grad_norm": 1.1657432317733765, "learning_rate": 4.149247858353902e-06, "loss": 0.5627, "step": 4432 }, { "epoch": 0.57, "grad_norm": 1.0777486562728882, "learning_rate": 4.147203161355583e-06, "loss": 0.5524, "step": 4433 }, { "epoch": 0.57, "grad_norm": 1.4103708267211914, "learning_rate": 4.145158611234186e-06, "loss": 0.5984, "step": 4434 }, { "epoch": 0.57, "grad_norm": 1.323095679283142, "learning_rate": 4.143114208341843e-06, "loss": 0.7533, "step": 4435 }, { "epoch": 0.57, "grad_norm": 1.0617965459823608, "learning_rate": 4.14106995303066e-06, "loss": 0.5806, "step": 4436 }, { "epoch": 0.57, "grad_norm": 1.3179047107696533, "learning_rate": 4.1390258456527195e-06, "loss": 0.5494, "step": 4437 }, { "epoch": 0.57, "grad_norm": 3.672276020050049, "learning_rate": 4.136981886560078e-06, "loss": 0.5966, "step": 4438 }, { "epoch": 0.57, "grad_norm": 1.2726572751998901, "learning_rate": 4.134938076104764e-06, "loss": 0.6048, "step": 4439 }, { "epoch": 0.57, "grad_norm": 1.0546799898147583, "learning_rate": 4.132894414638782e-06, "loss": 0.6254, "step": 4440 }, { "epoch": 0.57, "grad_norm": 1.2751909494400024, "learning_rate": 4.130850902514114e-06, "loss": 0.5718, "step": 4441 }, { "epoch": 0.57, "grad_norm": 1.2318024635314941, "learning_rate": 4.128807540082714e-06, "loss": 0.5496, "step": 4442 }, { "epoch": 0.57, "grad_norm": 1.266778826713562, "learning_rate": 4.126764327696504e-06, "loss": 0.5568, "step": 4443 }, { "epoch": 0.57, "grad_norm": 1.3783042430877686, "learning_rate": 4.124721265707392e-06, "loss": 0.5641, "step": 4444 }, { "epoch": 0.57, "grad_norm": 2.1089837551116943, "learning_rate": 4.12267835446725e-06, "loss": 0.6204, "step": 4445 }, { "epoch": 0.57, "grad_norm": 3.281911611557007, "learning_rate": 4.12063559432793e-06, "loss": 0.7028, "step": 4446 }, { "epoch": 0.57, "grad_norm": 1.1332265138626099, "learning_rate": 4.118592985641254e-06, "loss": 0.6078, "step": 4447 }, { "epoch": 0.57, "grad_norm": 1.2196893692016602, "learning_rate": 4.116550528759023e-06, "loss": 0.5784, "step": 4448 }, { "epoch": 0.57, "grad_norm": 1.2832058668136597, "learning_rate": 4.114508224033004e-06, "loss": 0.6051, "step": 4449 }, { "epoch": 0.57, "grad_norm": 1.4253997802734375, "learning_rate": 4.112466071814947e-06, "loss": 0.6712, "step": 4450 }, { "epoch": 0.57, "grad_norm": 1.711785912513733, "learning_rate": 4.110424072456568e-06, "loss": 0.6563, "step": 4451 }, { "epoch": 0.57, "grad_norm": 1.2868982553482056, "learning_rate": 4.108382226309563e-06, "loss": 0.5904, "step": 4452 }, { "epoch": 0.57, "grad_norm": 1.2525250911712646, "learning_rate": 4.106340533725595e-06, "loss": 0.6214, "step": 4453 }, { "epoch": 0.57, "grad_norm": 1.175255537033081, "learning_rate": 4.104298995056307e-06, "loss": 0.5653, "step": 4454 }, { "epoch": 0.57, "grad_norm": 1.409961223602295, "learning_rate": 4.102257610653311e-06, "loss": 0.605, "step": 4455 }, { "epoch": 0.57, "grad_norm": 1.28078293800354, "learning_rate": 4.100216380868194e-06, "loss": 0.5668, "step": 4456 }, { "epoch": 0.57, "grad_norm": 1.1241258382797241, "learning_rate": 4.098175306052515e-06, "loss": 0.6365, "step": 4457 }, { "epoch": 0.57, "grad_norm": 1.2419236898422241, "learning_rate": 4.09613438655781e-06, "loss": 0.6215, "step": 4458 }, { "epoch": 0.57, "grad_norm": 1.4537636041641235, "learning_rate": 4.0940936227355866e-06, "loss": 0.5763, "step": 4459 }, { "epoch": 0.57, "grad_norm": 1.1015124320983887, "learning_rate": 4.0920530149373235e-06, "loss": 0.5656, "step": 4460 }, { "epoch": 0.57, "grad_norm": 1.080503225326538, "learning_rate": 4.090012563514473e-06, "loss": 0.606, "step": 4461 }, { "epoch": 0.57, "grad_norm": 1.4011242389678955, "learning_rate": 4.087972268818463e-06, "loss": 0.6446, "step": 4462 }, { "epoch": 0.57, "grad_norm": 1.2129254341125488, "learning_rate": 4.085932131200691e-06, "loss": 0.5713, "step": 4463 }, { "epoch": 0.57, "grad_norm": 1.1942564249038696, "learning_rate": 4.083892151012531e-06, "loss": 0.6235, "step": 4464 }, { "epoch": 0.57, "grad_norm": 1.2527570724487305, "learning_rate": 4.081852328605327e-06, "loss": 0.5275, "step": 4465 }, { "epoch": 0.57, "grad_norm": 1.1266459226608276, "learning_rate": 4.079812664330398e-06, "loss": 0.6578, "step": 4466 }, { "epoch": 0.57, "grad_norm": 1.1888070106506348, "learning_rate": 4.0777731585390335e-06, "loss": 0.5591, "step": 4467 }, { "epoch": 0.57, "grad_norm": 1.3004099130630493, "learning_rate": 4.0757338115824975e-06, "loss": 0.5872, "step": 4468 }, { "epoch": 0.57, "grad_norm": 3.164547920227051, "learning_rate": 4.073694623812026e-06, "loss": 0.5892, "step": 4469 }, { "epoch": 0.57, "grad_norm": 1.132218599319458, "learning_rate": 4.071655595578829e-06, "loss": 0.5648, "step": 4470 }, { "epoch": 0.57, "grad_norm": 1.187025547027588, "learning_rate": 4.0696167272340845e-06, "loss": 0.5044, "step": 4471 }, { "epoch": 0.57, "grad_norm": 1.381439208984375, "learning_rate": 4.0675780191289496e-06, "loss": 0.6377, "step": 4472 }, { "epoch": 0.57, "grad_norm": 1.3431545495986938, "learning_rate": 4.065539471614547e-06, "loss": 0.5823, "step": 4473 }, { "epoch": 0.57, "grad_norm": 1.775613784790039, "learning_rate": 4.063501085041976e-06, "loss": 0.5882, "step": 4474 }, { "epoch": 0.57, "grad_norm": 1.6177005767822266, "learning_rate": 4.06146285976231e-06, "loss": 0.6739, "step": 4475 }, { "epoch": 0.57, "grad_norm": 1.1213589906692505, "learning_rate": 4.059424796126589e-06, "loss": 0.6788, "step": 4476 }, { "epoch": 0.57, "grad_norm": 1.4037939310073853, "learning_rate": 4.0573868944858306e-06, "loss": 0.5024, "step": 4477 }, { "epoch": 0.57, "grad_norm": 1.1332634687423706, "learning_rate": 4.055349155191018e-06, "loss": 0.7279, "step": 4478 }, { "epoch": 0.57, "grad_norm": 1.2475149631500244, "learning_rate": 4.0533115785931146e-06, "loss": 0.6189, "step": 4479 }, { "epoch": 0.57, "grad_norm": 1.490715742111206, "learning_rate": 4.051274165043049e-06, "loss": 0.6506, "step": 4480 }, { "epoch": 0.57, "grad_norm": 1.3428494930267334, "learning_rate": 4.049236914891726e-06, "loss": 0.56, "step": 4481 }, { "epoch": 0.57, "grad_norm": 1.3903584480285645, "learning_rate": 4.047199828490017e-06, "loss": 0.5654, "step": 4482 }, { "epoch": 0.57, "grad_norm": 1.2388803958892822, "learning_rate": 4.045162906188773e-06, "loss": 0.5477, "step": 4483 }, { "epoch": 0.57, "grad_norm": 1.5961958169937134, "learning_rate": 4.04312614833881e-06, "loss": 0.574, "step": 4484 }, { "epoch": 0.57, "grad_norm": 1.4322210550308228, "learning_rate": 4.041089555290919e-06, "loss": 0.5648, "step": 4485 }, { "epoch": 0.57, "grad_norm": 1.3006025552749634, "learning_rate": 4.039053127395861e-06, "loss": 0.6208, "step": 4486 }, { "epoch": 0.57, "grad_norm": 1.0778475999832153, "learning_rate": 4.037016865004371e-06, "loss": 0.5335, "step": 4487 }, { "epoch": 0.57, "grad_norm": 1.3122655153274536, "learning_rate": 4.0349807684671506e-06, "loss": 0.606, "step": 4488 }, { "epoch": 0.58, "grad_norm": 1.462798833847046, "learning_rate": 4.03294483813488e-06, "loss": 0.6424, "step": 4489 }, { "epoch": 0.58, "grad_norm": 1.512628436088562, "learning_rate": 4.030909074358204e-06, "loss": 0.5838, "step": 4490 }, { "epoch": 0.58, "grad_norm": 1.626233696937561, "learning_rate": 4.028873477487741e-06, "loss": 0.6137, "step": 4491 }, { "epoch": 0.58, "grad_norm": 1.5814064741134644, "learning_rate": 4.026838047874084e-06, "loss": 0.6584, "step": 4492 }, { "epoch": 0.58, "grad_norm": 1.6172866821289062, "learning_rate": 4.024802785867793e-06, "loss": 0.6242, "step": 4493 }, { "epoch": 0.58, "grad_norm": 1.1439348459243774, "learning_rate": 4.0227676918194015e-06, "loss": 0.7038, "step": 4494 }, { "epoch": 0.58, "grad_norm": 1.3968899250030518, "learning_rate": 4.020732766079411e-06, "loss": 0.6014, "step": 4495 }, { "epoch": 0.58, "grad_norm": 1.4990570545196533, "learning_rate": 4.018698008998298e-06, "loss": 0.623, "step": 4496 }, { "epoch": 0.58, "grad_norm": 1.8007217645645142, "learning_rate": 4.0166634209265065e-06, "loss": 0.6442, "step": 4497 }, { "epoch": 0.58, "grad_norm": 1.3919003009796143, "learning_rate": 4.014629002214454e-06, "loss": 0.46, "step": 4498 }, { "epoch": 0.58, "grad_norm": 1.3041036128997803, "learning_rate": 4.012594753212528e-06, "loss": 0.6084, "step": 4499 }, { "epoch": 0.58, "grad_norm": 1.221053123474121, "learning_rate": 4.010560674271085e-06, "loss": 0.5738, "step": 4500 }, { "epoch": 0.58, "grad_norm": 1.5155267715454102, "learning_rate": 4.0085267657404544e-06, "loss": 0.7307, "step": 4501 }, { "epoch": 0.58, "grad_norm": 1.1297587156295776, "learning_rate": 4.006493027970938e-06, "loss": 0.6133, "step": 4502 }, { "epoch": 0.58, "grad_norm": 1.395195484161377, "learning_rate": 4.004459461312802e-06, "loss": 0.7651, "step": 4503 }, { "epoch": 0.58, "grad_norm": 1.0992610454559326, "learning_rate": 4.0024260661162895e-06, "loss": 0.4897, "step": 4504 }, { "epoch": 0.58, "grad_norm": 1.1246989965438843, "learning_rate": 4.000392842731611e-06, "loss": 0.5924, "step": 4505 }, { "epoch": 0.58, "grad_norm": 1.2189942598342896, "learning_rate": 3.998359791508946e-06, "loss": 0.6192, "step": 4506 }, { "epoch": 0.58, "grad_norm": 1.5760741233825684, "learning_rate": 3.996326912798449e-06, "loss": 0.6763, "step": 4507 }, { "epoch": 0.58, "grad_norm": 1.6541393995285034, "learning_rate": 3.994294206950241e-06, "loss": 0.6857, "step": 4508 }, { "epoch": 0.58, "grad_norm": 1.0716215372085571, "learning_rate": 3.992261674314411e-06, "loss": 0.6769, "step": 4509 }, { "epoch": 0.58, "grad_norm": 1.1231220960617065, "learning_rate": 3.990229315241028e-06, "loss": 0.5404, "step": 4510 }, { "epoch": 0.58, "grad_norm": 1.6681180000305176, "learning_rate": 3.988197130080121e-06, "loss": 0.5339, "step": 4511 }, { "epoch": 0.58, "grad_norm": 1.169209361076355, "learning_rate": 3.986165119181692e-06, "loss": 0.5946, "step": 4512 }, { "epoch": 0.58, "grad_norm": 1.410796046257019, "learning_rate": 3.9841332828957135e-06, "loss": 0.6796, "step": 4513 }, { "epoch": 0.58, "grad_norm": 1.1149036884307861, "learning_rate": 3.98210162157213e-06, "loss": 0.5849, "step": 4514 }, { "epoch": 0.58, "grad_norm": 1.4105883836746216, "learning_rate": 3.980070135560852e-06, "loss": 0.5759, "step": 4515 }, { "epoch": 0.58, "grad_norm": 1.4017486572265625, "learning_rate": 3.978038825211763e-06, "loss": 0.5776, "step": 4516 }, { "epoch": 0.58, "grad_norm": 1.292477011680603, "learning_rate": 3.9760076908747134e-06, "loss": 0.7173, "step": 4517 }, { "epoch": 0.58, "grad_norm": 1.4200750589370728, "learning_rate": 3.973976732899526e-06, "loss": 0.5984, "step": 4518 }, { "epoch": 0.58, "grad_norm": 1.526354432106018, "learning_rate": 3.971945951635992e-06, "loss": 0.5828, "step": 4519 }, { "epoch": 0.58, "grad_norm": 0.998432457447052, "learning_rate": 3.969915347433871e-06, "loss": 0.5829, "step": 4520 }, { "epoch": 0.58, "grad_norm": 4.682458400726318, "learning_rate": 3.967884920642895e-06, "loss": 0.6694, "step": 4521 }, { "epoch": 0.58, "grad_norm": 1.5824471712112427, "learning_rate": 3.965854671612762e-06, "loss": 0.5997, "step": 4522 }, { "epoch": 0.58, "grad_norm": 6.456887245178223, "learning_rate": 3.963824600693143e-06, "loss": 0.6215, "step": 4523 }, { "epoch": 0.58, "grad_norm": 1.255520224571228, "learning_rate": 3.961794708233675e-06, "loss": 0.7175, "step": 4524 }, { "epoch": 0.58, "grad_norm": 1.1447798013687134, "learning_rate": 3.959764994583965e-06, "loss": 0.5813, "step": 4525 }, { "epoch": 0.58, "grad_norm": 1.238195538520813, "learning_rate": 3.957735460093591e-06, "loss": 0.5689, "step": 4526 }, { "epoch": 0.58, "grad_norm": 1.5053225755691528, "learning_rate": 3.955706105112101e-06, "loss": 0.591, "step": 4527 }, { "epoch": 0.58, "grad_norm": 1.2886848449707031, "learning_rate": 3.953676929989008e-06, "loss": 0.5764, "step": 4528 }, { "epoch": 0.58, "grad_norm": 1.453669786453247, "learning_rate": 3.951647935073796e-06, "loss": 0.6344, "step": 4529 }, { "epoch": 0.58, "grad_norm": 1.3581910133361816, "learning_rate": 3.949619120715918e-06, "loss": 0.5262, "step": 4530 }, { "epoch": 0.58, "grad_norm": 1.4336278438568115, "learning_rate": 3.947590487264799e-06, "loss": 0.5272, "step": 4531 }, { "epoch": 0.58, "grad_norm": 1.4319508075714111, "learning_rate": 3.945562035069826e-06, "loss": 0.6207, "step": 4532 }, { "epoch": 0.58, "grad_norm": 1.5566030740737915, "learning_rate": 3.9435337644803605e-06, "loss": 0.6633, "step": 4533 }, { "epoch": 0.58, "grad_norm": 1.4970437288284302, "learning_rate": 3.94150567584573e-06, "loss": 0.6113, "step": 4534 }, { "epoch": 0.58, "grad_norm": 1.2610963582992554, "learning_rate": 3.9394777695152335e-06, "loss": 0.6422, "step": 4535 }, { "epoch": 0.58, "grad_norm": 1.2423081398010254, "learning_rate": 3.937450045838133e-06, "loss": 0.6177, "step": 4536 }, { "epoch": 0.58, "grad_norm": 1.597859501838684, "learning_rate": 3.935422505163667e-06, "loss": 0.6175, "step": 4537 }, { "epoch": 0.58, "grad_norm": 1.2870022058486938, "learning_rate": 3.933395147841035e-06, "loss": 0.5798, "step": 4538 }, { "epoch": 0.58, "grad_norm": 1.2589836120605469, "learning_rate": 3.931367974219411e-06, "loss": 0.5974, "step": 4539 }, { "epoch": 0.58, "grad_norm": 1.2614821195602417, "learning_rate": 3.9293409846479305e-06, "loss": 0.6749, "step": 4540 }, { "epoch": 0.58, "grad_norm": 1.2686032056808472, "learning_rate": 3.927314179475705e-06, "loss": 0.5984, "step": 4541 }, { "epoch": 0.58, "grad_norm": 1.2367477416992188, "learning_rate": 3.925287559051808e-06, "loss": 0.6273, "step": 4542 }, { "epoch": 0.58, "grad_norm": 1.1769745349884033, "learning_rate": 3.923261123725283e-06, "loss": 0.5428, "step": 4543 }, { "epoch": 0.58, "grad_norm": 1.3887510299682617, "learning_rate": 3.921234873845146e-06, "loss": 0.5769, "step": 4544 }, { "epoch": 0.58, "grad_norm": 1.007226586341858, "learning_rate": 3.9192088097603745e-06, "loss": 0.6352, "step": 4545 }, { "epoch": 0.58, "grad_norm": 1.2268894910812378, "learning_rate": 3.917182931819918e-06, "loss": 0.6749, "step": 4546 }, { "epoch": 0.58, "grad_norm": 1.3286808729171753, "learning_rate": 3.915157240372693e-06, "loss": 0.5088, "step": 4547 }, { "epoch": 0.58, "grad_norm": 1.3561018705368042, "learning_rate": 3.913131735767583e-06, "loss": 0.5868, "step": 4548 }, { "epoch": 0.58, "grad_norm": 1.3705759048461914, "learning_rate": 3.911106418353439e-06, "loss": 0.6115, "step": 4549 }, { "epoch": 0.58, "grad_norm": 1.358817458152771, "learning_rate": 3.909081288479083e-06, "loss": 0.5725, "step": 4550 }, { "epoch": 0.58, "grad_norm": 1.503664493560791, "learning_rate": 3.907056346493301e-06, "loss": 0.6032, "step": 4551 }, { "epoch": 0.58, "grad_norm": 1.1593071222305298, "learning_rate": 3.905031592744849e-06, "loss": 0.629, "step": 4552 }, { "epoch": 0.58, "grad_norm": 0.9405057430267334, "learning_rate": 3.9030070275824486e-06, "loss": 0.5977, "step": 4553 }, { "epoch": 0.58, "grad_norm": 1.4616215229034424, "learning_rate": 3.90098265135479e-06, "loss": 0.6383, "step": 4554 }, { "epoch": 0.58, "grad_norm": 1.344404697418213, "learning_rate": 3.898958464410532e-06, "loss": 0.6543, "step": 4555 }, { "epoch": 0.58, "grad_norm": 1.2103294134140015, "learning_rate": 3.896934467098298e-06, "loss": 0.598, "step": 4556 }, { "epoch": 0.58, "grad_norm": 1.1870449781417847, "learning_rate": 3.894910659766682e-06, "loss": 0.6187, "step": 4557 }, { "epoch": 0.58, "grad_norm": 2.1117770671844482, "learning_rate": 3.892887042764243e-06, "loss": 0.6356, "step": 4558 }, { "epoch": 0.58, "grad_norm": 1.8922291994094849, "learning_rate": 3.890863616439509e-06, "loss": 0.5096, "step": 4559 }, { "epoch": 0.58, "grad_norm": 1.3100972175598145, "learning_rate": 3.888840381140971e-06, "loss": 0.6282, "step": 4560 }, { "epoch": 0.58, "grad_norm": 1.8961695432662964, "learning_rate": 3.886817337217092e-06, "loss": 0.5745, "step": 4561 }, { "epoch": 0.58, "grad_norm": 1.7968751192092896, "learning_rate": 3.884794485016302e-06, "loss": 0.4665, "step": 4562 }, { "epoch": 0.58, "grad_norm": 1.0768951177597046, "learning_rate": 3.882771824886994e-06, "loss": 0.583, "step": 4563 }, { "epoch": 0.58, "grad_norm": 1.303018569946289, "learning_rate": 3.8807493571775315e-06, "loss": 0.5878, "step": 4564 }, { "epoch": 0.58, "grad_norm": 1.1549627780914307, "learning_rate": 3.878727082236241e-06, "loss": 0.6445, "step": 4565 }, { "epoch": 0.58, "grad_norm": 1.1280014514923096, "learning_rate": 3.876705000411422e-06, "loss": 0.627, "step": 4566 }, { "epoch": 0.59, "grad_norm": 1.0545580387115479, "learning_rate": 3.874683112051333e-06, "loss": 0.6427, "step": 4567 }, { "epoch": 0.59, "grad_norm": 1.2090257406234741, "learning_rate": 3.872661417504207e-06, "loss": 0.7049, "step": 4568 }, { "epoch": 0.59, "grad_norm": 1.2235411405563354, "learning_rate": 3.870639917118235e-06, "loss": 0.7095, "step": 4569 }, { "epoch": 0.59, "grad_norm": 1.0587208271026611, "learning_rate": 3.868618611241584e-06, "loss": 0.5291, "step": 4570 }, { "epoch": 0.59, "grad_norm": 1.2914259433746338, "learning_rate": 3.866597500222381e-06, "loss": 0.5862, "step": 4571 }, { "epoch": 0.59, "grad_norm": 1.3580983877182007, "learning_rate": 3.864576584408722e-06, "loss": 0.515, "step": 4572 }, { "epoch": 0.59, "grad_norm": 1.3612672090530396, "learning_rate": 3.862555864148666e-06, "loss": 0.6386, "step": 4573 }, { "epoch": 0.59, "grad_norm": 1.3382236957550049, "learning_rate": 3.860535339790245e-06, "loss": 0.5627, "step": 4574 }, { "epoch": 0.59, "grad_norm": 1.3669838905334473, "learning_rate": 3.85851501168145e-06, "loss": 0.6867, "step": 4575 }, { "epoch": 0.59, "grad_norm": 1.110496997833252, "learning_rate": 3.856494880170243e-06, "loss": 0.5651, "step": 4576 }, { "epoch": 0.59, "grad_norm": 1.1593047380447388, "learning_rate": 3.854474945604549e-06, "loss": 0.4934, "step": 4577 }, { "epoch": 0.59, "grad_norm": 1.3233683109283447, "learning_rate": 3.852455208332262e-06, "loss": 0.6082, "step": 4578 }, { "epoch": 0.59, "grad_norm": 1.193565011024475, "learning_rate": 3.850435668701243e-06, "loss": 0.531, "step": 4579 }, { "epoch": 0.59, "grad_norm": 1.1400476694107056, "learning_rate": 3.8484163270593125e-06, "loss": 0.5821, "step": 4580 }, { "epoch": 0.59, "grad_norm": 1.239013910293579, "learning_rate": 3.846397183754265e-06, "loss": 0.5321, "step": 4581 }, { "epoch": 0.59, "grad_norm": 1.63629150390625, "learning_rate": 3.844378239133852e-06, "loss": 0.5889, "step": 4582 }, { "epoch": 0.59, "grad_norm": 1.2488521337509155, "learning_rate": 3.8423594935458e-06, "loss": 0.6494, "step": 4583 }, { "epoch": 0.59, "grad_norm": 1.3007025718688965, "learning_rate": 3.840340947337795e-06, "loss": 0.6275, "step": 4584 }, { "epoch": 0.59, "grad_norm": 1.5868581533432007, "learning_rate": 3.838322600857491e-06, "loss": 0.6701, "step": 4585 }, { "epoch": 0.59, "grad_norm": 0.9399994015693665, "learning_rate": 3.8363044544525065e-06, "loss": 0.5922, "step": 4586 }, { "epoch": 0.59, "grad_norm": 1.3345903158187866, "learning_rate": 3.834286508470428e-06, "loss": 0.6308, "step": 4587 }, { "epoch": 0.59, "grad_norm": 1.9581162929534912, "learning_rate": 3.832268763258803e-06, "loss": 0.6132, "step": 4588 }, { "epoch": 0.59, "grad_norm": 1.2127094268798828, "learning_rate": 3.83025121916515e-06, "loss": 0.6929, "step": 4589 }, { "epoch": 0.59, "grad_norm": 1.5069866180419922, "learning_rate": 3.8282338765369466e-06, "loss": 0.5396, "step": 4590 }, { "epoch": 0.59, "grad_norm": 1.1287541389465332, "learning_rate": 3.8262167357216426e-06, "loss": 0.6039, "step": 4591 }, { "epoch": 0.59, "grad_norm": 1.0839670896530151, "learning_rate": 3.824199797066646e-06, "loss": 0.6734, "step": 4592 }, { "epoch": 0.59, "grad_norm": 1.1110575199127197, "learning_rate": 3.822183060919337e-06, "loss": 0.6552, "step": 4593 }, { "epoch": 0.59, "grad_norm": 1.0983967781066895, "learning_rate": 3.820166527627054e-06, "loss": 0.5722, "step": 4594 }, { "epoch": 0.59, "grad_norm": 1.3963110446929932, "learning_rate": 3.818150197537106e-06, "loss": 0.6369, "step": 4595 }, { "epoch": 0.59, "grad_norm": 1.2948497533798218, "learning_rate": 3.816134070996766e-06, "loss": 0.5612, "step": 4596 }, { "epoch": 0.59, "grad_norm": 1.1768401861190796, "learning_rate": 3.8141181483532676e-06, "loss": 0.586, "step": 4597 }, { "epoch": 0.59, "grad_norm": 1.5991896390914917, "learning_rate": 3.8121024299538156e-06, "loss": 0.6508, "step": 4598 }, { "epoch": 0.59, "grad_norm": 1.1931565999984741, "learning_rate": 3.8100869161455746e-06, "loss": 0.5943, "step": 4599 }, { "epoch": 0.59, "grad_norm": 1.2148854732513428, "learning_rate": 3.8080716072756767e-06, "loss": 0.648, "step": 4600 }, { "epoch": 0.59, "grad_norm": 1.4304548501968384, "learning_rate": 3.8060565036912167e-06, "loss": 0.674, "step": 4601 }, { "epoch": 0.59, "grad_norm": 1.529685616493225, "learning_rate": 3.8040416057392577e-06, "loss": 0.5341, "step": 4602 }, { "epoch": 0.59, "grad_norm": 1.5385857820510864, "learning_rate": 3.802026913766823e-06, "loss": 0.678, "step": 4603 }, { "epoch": 0.59, "grad_norm": 1.180269479751587, "learning_rate": 3.8000124281209015e-06, "loss": 0.5744, "step": 4604 }, { "epoch": 0.59, "grad_norm": 1.1694934368133545, "learning_rate": 3.7979981491484496e-06, "loss": 0.5707, "step": 4605 }, { "epoch": 0.59, "grad_norm": 1.2115732431411743, "learning_rate": 3.795984077196384e-06, "loss": 0.5016, "step": 4606 }, { "epoch": 0.59, "grad_norm": 1.5277433395385742, "learning_rate": 3.7939702126115895e-06, "loss": 0.6791, "step": 4607 }, { "epoch": 0.59, "grad_norm": 1.4278790950775146, "learning_rate": 3.7919565557409115e-06, "loss": 0.6131, "step": 4608 }, { "epoch": 0.59, "grad_norm": 1.4480127096176147, "learning_rate": 3.789943106931164e-06, "loss": 0.532, "step": 4609 }, { "epoch": 0.59, "grad_norm": 1.3539552688598633, "learning_rate": 3.7879298665291194e-06, "loss": 0.5845, "step": 4610 }, { "epoch": 0.59, "grad_norm": 2.717895030975342, "learning_rate": 3.7859168348815177e-06, "loss": 0.5879, "step": 4611 }, { "epoch": 0.59, "grad_norm": 1.146493911743164, "learning_rate": 3.7839040123350664e-06, "loss": 0.7074, "step": 4612 }, { "epoch": 0.59, "grad_norm": 1.0468714237213135, "learning_rate": 3.7818913992364298e-06, "loss": 0.4987, "step": 4613 }, { "epoch": 0.59, "grad_norm": 1.3573884963989258, "learning_rate": 3.7798789959322417e-06, "loss": 0.7328, "step": 4614 }, { "epoch": 0.59, "grad_norm": 4.123680591583252, "learning_rate": 3.7778668027690957e-06, "loss": 0.603, "step": 4615 }, { "epoch": 0.59, "grad_norm": 1.4465726613998413, "learning_rate": 3.7758548200935537e-06, "loss": 0.5605, "step": 4616 }, { "epoch": 0.59, "grad_norm": 1.3835780620574951, "learning_rate": 3.7738430482521355e-06, "loss": 0.6137, "step": 4617 }, { "epoch": 0.59, "grad_norm": 1.2412621974945068, "learning_rate": 3.771831487591331e-06, "loss": 0.5755, "step": 4618 }, { "epoch": 0.59, "grad_norm": 1.3044785261154175, "learning_rate": 3.7698201384575883e-06, "loss": 0.6081, "step": 4619 }, { "epoch": 0.59, "grad_norm": 1.197967290878296, "learning_rate": 3.767809001197323e-06, "loss": 0.6496, "step": 4620 }, { "epoch": 0.59, "grad_norm": 1.3317675590515137, "learning_rate": 3.7657980761569114e-06, "loss": 0.6448, "step": 4621 }, { "epoch": 0.59, "grad_norm": 1.4491106271743774, "learning_rate": 3.763787363682696e-06, "loss": 0.7453, "step": 4622 }, { "epoch": 0.59, "grad_norm": 1.172399640083313, "learning_rate": 3.7617768641209797e-06, "loss": 0.5975, "step": 4623 }, { "epoch": 0.59, "grad_norm": 1.3938887119293213, "learning_rate": 3.7597665778180307e-06, "loss": 0.6943, "step": 4624 }, { "epoch": 0.59, "grad_norm": 1.1383371353149414, "learning_rate": 3.75775650512008e-06, "loss": 0.5426, "step": 4625 }, { "epoch": 0.59, "grad_norm": 1.595054268836975, "learning_rate": 3.755746646373322e-06, "loss": 0.6339, "step": 4626 }, { "epoch": 0.59, "grad_norm": 1.3873826265335083, "learning_rate": 3.7537370019239135e-06, "loss": 0.597, "step": 4627 }, { "epoch": 0.59, "grad_norm": 1.1629124879837036, "learning_rate": 3.7517275721179736e-06, "loss": 0.6172, "step": 4628 }, { "epoch": 0.59, "grad_norm": 1.3999043703079224, "learning_rate": 3.7497183573015893e-06, "loss": 0.5355, "step": 4629 }, { "epoch": 0.59, "grad_norm": 1.1866925954818726, "learning_rate": 3.7477093578208047e-06, "loss": 0.5889, "step": 4630 }, { "epoch": 0.59, "grad_norm": 1.3769731521606445, "learning_rate": 3.74570057402163e-06, "loss": 0.5985, "step": 4631 }, { "epoch": 0.59, "grad_norm": 1.4890729188919067, "learning_rate": 3.743692006250036e-06, "loss": 0.5906, "step": 4632 }, { "epoch": 0.59, "grad_norm": 1.5705612897872925, "learning_rate": 3.741683654851959e-06, "loss": 0.6187, "step": 4633 }, { "epoch": 0.59, "grad_norm": 1.6368554830551147, "learning_rate": 3.739675520173296e-06, "loss": 0.6418, "step": 4634 }, { "epoch": 0.59, "grad_norm": 1.1601102352142334, "learning_rate": 3.737667602559908e-06, "loss": 0.5984, "step": 4635 }, { "epoch": 0.59, "grad_norm": 2.6461944580078125, "learning_rate": 3.7356599023576166e-06, "loss": 0.5892, "step": 4636 }, { "epoch": 0.59, "grad_norm": 1.2127076387405396, "learning_rate": 3.7336524199122094e-06, "loss": 0.5839, "step": 4637 }, { "epoch": 0.59, "grad_norm": 1.202880859375, "learning_rate": 3.7316451555694327e-06, "loss": 0.6829, "step": 4638 }, { "epoch": 0.59, "grad_norm": 1.4662011861801147, "learning_rate": 3.7296381096749983e-06, "loss": 0.5418, "step": 4639 }, { "epoch": 0.59, "grad_norm": 1.1767808198928833, "learning_rate": 3.7276312825745775e-06, "loss": 0.5559, "step": 4640 }, { "epoch": 0.59, "grad_norm": 1.2574052810668945, "learning_rate": 3.7256246746138082e-06, "loss": 0.6815, "step": 4641 }, { "epoch": 0.59, "grad_norm": 1.4866012334823608, "learning_rate": 3.7236182861382843e-06, "loss": 0.6487, "step": 4642 }, { "epoch": 0.59, "grad_norm": 1.2232147455215454, "learning_rate": 3.721612117493568e-06, "loss": 0.6718, "step": 4643 }, { "epoch": 0.59, "grad_norm": 1.3642147779464722, "learning_rate": 3.7196061690251795e-06, "loss": 0.5534, "step": 4644 }, { "epoch": 0.6, "grad_norm": 1.121066927909851, "learning_rate": 3.7176004410786047e-06, "loss": 0.5487, "step": 4645 }, { "epoch": 0.6, "grad_norm": 1.6986517906188965, "learning_rate": 3.7155949339992856e-06, "loss": 0.7287, "step": 4646 }, { "epoch": 0.6, "grad_norm": 1.4121288061141968, "learning_rate": 3.713589648132634e-06, "loss": 0.5755, "step": 4647 }, { "epoch": 0.6, "grad_norm": 1.3280876874923706, "learning_rate": 3.7115845838240193e-06, "loss": 0.5657, "step": 4648 }, { "epoch": 0.6, "grad_norm": 1.3368446826934814, "learning_rate": 3.7095797414187707e-06, "loss": 0.6326, "step": 4649 }, { "epoch": 0.6, "grad_norm": 1.0597262382507324, "learning_rate": 3.707575121262185e-06, "loss": 0.5701, "step": 4650 }, { "epoch": 0.6, "grad_norm": 1.1828985214233398, "learning_rate": 3.7055707236995123e-06, "loss": 0.5832, "step": 4651 }, { "epoch": 0.6, "grad_norm": 1.4886174201965332, "learning_rate": 3.7035665490759743e-06, "loss": 0.6071, "step": 4652 }, { "epoch": 0.6, "grad_norm": 1.7236698865890503, "learning_rate": 3.7015625977367476e-06, "loss": 0.6029, "step": 4653 }, { "epoch": 0.6, "grad_norm": 1.32156240940094, "learning_rate": 3.6995588700269697e-06, "loss": 0.5823, "step": 4654 }, { "epoch": 0.6, "grad_norm": 1.207321047782898, "learning_rate": 3.6975553662917453e-06, "loss": 0.5992, "step": 4655 }, { "epoch": 0.6, "grad_norm": 1.2201660871505737, "learning_rate": 3.695552086876135e-06, "loss": 0.4492, "step": 4656 }, { "epoch": 0.6, "grad_norm": 1.4512239694595337, "learning_rate": 3.6935490321251655e-06, "loss": 0.6295, "step": 4657 }, { "epoch": 0.6, "grad_norm": 1.6261647939682007, "learning_rate": 3.691546202383819e-06, "loss": 0.5875, "step": 4658 }, { "epoch": 0.6, "grad_norm": 1.0876991748809814, "learning_rate": 3.689543597997044e-06, "loss": 0.5359, "step": 4659 }, { "epoch": 0.6, "grad_norm": 1.2004252672195435, "learning_rate": 3.687541219309748e-06, "loss": 0.5935, "step": 4660 }, { "epoch": 0.6, "grad_norm": 1.3604297637939453, "learning_rate": 3.685539066666802e-06, "loss": 0.5974, "step": 4661 }, { "epoch": 0.6, "grad_norm": 1.2416634559631348, "learning_rate": 3.683537140413032e-06, "loss": 0.6314, "step": 4662 }, { "epoch": 0.6, "grad_norm": 1.1395695209503174, "learning_rate": 3.6815354408932314e-06, "loss": 0.6032, "step": 4663 }, { "epoch": 0.6, "grad_norm": 1.3960171937942505, "learning_rate": 3.6795339684521535e-06, "loss": 0.6214, "step": 4664 }, { "epoch": 0.6, "grad_norm": 1.3174827098846436, "learning_rate": 3.67753272343451e-06, "loss": 0.6025, "step": 4665 }, { "epoch": 0.6, "grad_norm": 1.3568823337554932, "learning_rate": 3.675531706184975e-06, "loss": 0.6243, "step": 4666 }, { "epoch": 0.6, "grad_norm": 1.2531613111495972, "learning_rate": 3.6735309170481825e-06, "loss": 0.588, "step": 4667 }, { "epoch": 0.6, "grad_norm": 1.1361048221588135, "learning_rate": 3.6715303563687286e-06, "loss": 0.6366, "step": 4668 }, { "epoch": 0.6, "grad_norm": 1.0308409929275513, "learning_rate": 3.6695300244911676e-06, "loss": 0.485, "step": 4669 }, { "epoch": 0.6, "grad_norm": 1.2207977771759033, "learning_rate": 3.667529921760018e-06, "loss": 0.6058, "step": 4670 }, { "epoch": 0.6, "grad_norm": 1.139939546585083, "learning_rate": 3.6655300485197556e-06, "loss": 0.5526, "step": 4671 }, { "epoch": 0.6, "grad_norm": 1.3506957292556763, "learning_rate": 3.663530405114818e-06, "loss": 0.6159, "step": 4672 }, { "epoch": 0.6, "grad_norm": 1.240322232246399, "learning_rate": 3.6615309918896034e-06, "loss": 0.5767, "step": 4673 }, { "epoch": 0.6, "grad_norm": 3.595827579498291, "learning_rate": 3.6595318091884707e-06, "loss": 0.6388, "step": 4674 }, { "epoch": 0.6, "grad_norm": 1.5169620513916016, "learning_rate": 3.6575328573557367e-06, "loss": 0.6659, "step": 4675 }, { "epoch": 0.6, "grad_norm": 1.1293915510177612, "learning_rate": 3.655534136735682e-06, "loss": 0.5856, "step": 4676 }, { "epoch": 0.6, "grad_norm": 1.3506858348846436, "learning_rate": 3.6535356476725447e-06, "loss": 0.6416, "step": 4677 }, { "epoch": 0.6, "grad_norm": 1.2166670560836792, "learning_rate": 3.6515373905105254e-06, "loss": 0.6027, "step": 4678 }, { "epoch": 0.6, "grad_norm": 2.4438283443450928, "learning_rate": 3.6495393655937806e-06, "loss": 0.6248, "step": 4679 }, { "epoch": 0.6, "grad_norm": 1.3515195846557617, "learning_rate": 3.6475415732664297e-06, "loss": 0.5362, "step": 4680 }, { "epoch": 0.6, "grad_norm": 1.5036596059799194, "learning_rate": 3.6455440138725553e-06, "loss": 0.6032, "step": 4681 }, { "epoch": 0.6, "grad_norm": 1.2506636381149292, "learning_rate": 3.6435466877561933e-06, "loss": 0.6498, "step": 4682 }, { "epoch": 0.6, "grad_norm": 1.2424179315567017, "learning_rate": 3.6415495952613446e-06, "loss": 0.6529, "step": 4683 }, { "epoch": 0.6, "grad_norm": 1.3675919771194458, "learning_rate": 3.639552736731965e-06, "loss": 0.5896, "step": 4684 }, { "epoch": 0.6, "grad_norm": 1.2366334199905396, "learning_rate": 3.6375561125119752e-06, "loss": 0.7092, "step": 4685 }, { "epoch": 0.6, "grad_norm": 1.2184380292892456, "learning_rate": 3.635559722945252e-06, "loss": 0.6131, "step": 4686 }, { "epoch": 0.6, "grad_norm": 1.219530701637268, "learning_rate": 3.6335635683756343e-06, "loss": 0.5639, "step": 4687 }, { "epoch": 0.6, "grad_norm": 1.363353967666626, "learning_rate": 3.6315676491469165e-06, "loss": 0.5838, "step": 4688 }, { "epoch": 0.6, "grad_norm": 1.2268239259719849, "learning_rate": 3.629571965602858e-06, "loss": 0.5609, "step": 4689 }, { "epoch": 0.6, "grad_norm": 1.1857258081436157, "learning_rate": 3.6275765180871723e-06, "loss": 0.6544, "step": 4690 }, { "epoch": 0.6, "grad_norm": 1.2621301412582397, "learning_rate": 3.625581306943537e-06, "loss": 0.5046, "step": 4691 }, { "epoch": 0.6, "grad_norm": 1.828747034072876, "learning_rate": 3.623586332515584e-06, "loss": 0.6152, "step": 4692 }, { "epoch": 0.6, "grad_norm": 1.2840453386306763, "learning_rate": 3.6215915951469105e-06, "loss": 0.7242, "step": 4693 }, { "epoch": 0.6, "grad_norm": 2.8199918270111084, "learning_rate": 3.6195970951810653e-06, "loss": 0.5506, "step": 4694 }, { "epoch": 0.6, "grad_norm": 1.6158698797225952, "learning_rate": 3.6176028329615654e-06, "loss": 0.6238, "step": 4695 }, { "epoch": 0.6, "grad_norm": 1.960234522819519, "learning_rate": 3.615608808831877e-06, "loss": 0.6093, "step": 4696 }, { "epoch": 0.6, "grad_norm": 1.1626479625701904, "learning_rate": 3.6136150231354317e-06, "loss": 0.6119, "step": 4697 }, { "epoch": 0.6, "grad_norm": 1.4083420038223267, "learning_rate": 3.611621476215621e-06, "loss": 0.5713, "step": 4698 }, { "epoch": 0.6, "grad_norm": 1.3749487400054932, "learning_rate": 3.609628168415791e-06, "loss": 0.6795, "step": 4699 }, { "epoch": 0.6, "grad_norm": 1.4019120931625366, "learning_rate": 3.6076351000792487e-06, "loss": 0.643, "step": 4700 }, { "epoch": 0.6, "grad_norm": 1.2229111194610596, "learning_rate": 3.60564227154926e-06, "loss": 0.6225, "step": 4701 }, { "epoch": 0.6, "grad_norm": 1.121282696723938, "learning_rate": 3.6036496831690483e-06, "loss": 0.5681, "step": 4702 }, { "epoch": 0.6, "grad_norm": 1.2437260150909424, "learning_rate": 3.601657335281797e-06, "loss": 0.6019, "step": 4703 }, { "epoch": 0.6, "grad_norm": 1.2536563873291016, "learning_rate": 3.5996652282306467e-06, "loss": 0.6497, "step": 4704 }, { "epoch": 0.6, "grad_norm": 1.2499635219573975, "learning_rate": 3.5976733623586986e-06, "loss": 0.574, "step": 4705 }, { "epoch": 0.6, "grad_norm": 1.0934405326843262, "learning_rate": 3.5956817380090092e-06, "loss": 0.617, "step": 4706 }, { "epoch": 0.6, "grad_norm": 1.4827922582626343, "learning_rate": 3.5936903555245983e-06, "loss": 0.6322, "step": 4707 }, { "epoch": 0.6, "grad_norm": 1.30734121799469, "learning_rate": 3.5916992152484382e-06, "loss": 0.6527, "step": 4708 }, { "epoch": 0.6, "grad_norm": 1.2423964738845825, "learning_rate": 3.5897083175234644e-06, "loss": 0.632, "step": 4709 }, { "epoch": 0.6, "grad_norm": 1.3838889598846436, "learning_rate": 3.587717662692567e-06, "loss": 0.6033, "step": 4710 }, { "epoch": 0.6, "grad_norm": 1.2591181993484497, "learning_rate": 3.5857272510985964e-06, "loss": 0.6277, "step": 4711 }, { "epoch": 0.6, "grad_norm": 1.1490346193313599, "learning_rate": 3.583737083084361e-06, "loss": 0.5655, "step": 4712 }, { "epoch": 0.6, "grad_norm": 1.445289134979248, "learning_rate": 3.5817471589926266e-06, "loss": 0.6682, "step": 4713 }, { "epoch": 0.6, "grad_norm": 1.6664557456970215, "learning_rate": 3.579757479166115e-06, "loss": 0.5634, "step": 4714 }, { "epoch": 0.6, "grad_norm": 1.124857783317566, "learning_rate": 3.5777680439475115e-06, "loss": 0.7383, "step": 4715 }, { "epoch": 0.6, "grad_norm": 1.228276014328003, "learning_rate": 3.5757788536794547e-06, "loss": 0.5617, "step": 4716 }, { "epoch": 0.6, "grad_norm": 1.1751571893692017, "learning_rate": 3.573789908704542e-06, "loss": 0.5534, "step": 4717 }, { "epoch": 0.6, "grad_norm": 1.3536722660064697, "learning_rate": 3.5718012093653294e-06, "loss": 0.568, "step": 4718 }, { "epoch": 0.6, "grad_norm": 1.338544249534607, "learning_rate": 3.5698127560043277e-06, "loss": 0.554, "step": 4719 }, { "epoch": 0.6, "grad_norm": 1.0579848289489746, "learning_rate": 3.567824548964011e-06, "loss": 0.4984, "step": 4720 }, { "epoch": 0.6, "grad_norm": 1.7330485582351685, "learning_rate": 3.565836588586804e-06, "loss": 0.6076, "step": 4721 }, { "epoch": 0.6, "grad_norm": 1.2623368501663208, "learning_rate": 3.5638488752150956e-06, "loss": 0.6481, "step": 4722 }, { "epoch": 0.61, "grad_norm": 1.1102206707000732, "learning_rate": 3.5618614091912264e-06, "loss": 0.5571, "step": 4723 }, { "epoch": 0.61, "grad_norm": 1.2276370525360107, "learning_rate": 3.559874190857499e-06, "loss": 0.7092, "step": 4724 }, { "epoch": 0.61, "grad_norm": 1.190426230430603, "learning_rate": 3.55788722055617e-06, "loss": 0.6076, "step": 4725 }, { "epoch": 0.61, "grad_norm": 1.1113755702972412, "learning_rate": 3.555900498629456e-06, "loss": 0.6253, "step": 4726 }, { "epoch": 0.61, "grad_norm": 1.146645426750183, "learning_rate": 3.5539140254195277e-06, "loss": 0.5932, "step": 4727 }, { "epoch": 0.61, "grad_norm": 1.3581435680389404, "learning_rate": 3.5519278012685164e-06, "loss": 0.6639, "step": 4728 }, { "epoch": 0.61, "grad_norm": 1.19074285030365, "learning_rate": 3.5499418265185082e-06, "loss": 0.6644, "step": 4729 }, { "epoch": 0.61, "grad_norm": 1.2494276762008667, "learning_rate": 3.547956101511547e-06, "loss": 0.5994, "step": 4730 }, { "epoch": 0.61, "grad_norm": 1.3655177354812622, "learning_rate": 3.545970626589631e-06, "loss": 0.6564, "step": 4731 }, { "epoch": 0.61, "grad_norm": 1.3489915132522583, "learning_rate": 3.5439854020947216e-06, "loss": 0.7015, "step": 4732 }, { "epoch": 0.61, "grad_norm": 1.1399732828140259, "learning_rate": 3.5420004283687338e-06, "loss": 0.594, "step": 4733 }, { "epoch": 0.61, "grad_norm": 1.2338383197784424, "learning_rate": 3.5400157057535354e-06, "loss": 0.6912, "step": 4734 }, { "epoch": 0.61, "grad_norm": 1.1922136545181274, "learning_rate": 3.5380312345909582e-06, "loss": 0.5775, "step": 4735 }, { "epoch": 0.61, "grad_norm": 1.2153799533843994, "learning_rate": 3.5360470152227846e-06, "loss": 0.5943, "step": 4736 }, { "epoch": 0.61, "grad_norm": 1.3712053298950195, "learning_rate": 3.5340630479907567e-06, "loss": 0.6236, "step": 4737 }, { "epoch": 0.61, "grad_norm": 1.0961687564849854, "learning_rate": 3.532079333236571e-06, "loss": 0.5619, "step": 4738 }, { "epoch": 0.61, "grad_norm": 1.1054195165634155, "learning_rate": 3.5300958713018858e-06, "loss": 0.5613, "step": 4739 }, { "epoch": 0.61, "grad_norm": 1.196414589881897, "learning_rate": 3.528112662528308e-06, "loss": 0.6143, "step": 4740 }, { "epoch": 0.61, "grad_norm": 1.1598870754241943, "learning_rate": 3.5261297072574085e-06, "loss": 0.6768, "step": 4741 }, { "epoch": 0.61, "grad_norm": 1.3299384117126465, "learning_rate": 3.524147005830708e-06, "loss": 0.6039, "step": 4742 }, { "epoch": 0.61, "grad_norm": 1.8477388620376587, "learning_rate": 3.522164558589689e-06, "loss": 0.5782, "step": 4743 }, { "epoch": 0.61, "grad_norm": 1.4437540769577026, "learning_rate": 3.5201823658757855e-06, "loss": 0.6431, "step": 4744 }, { "epoch": 0.61, "grad_norm": 1.136198878288269, "learning_rate": 3.5182004280303927e-06, "loss": 0.5964, "step": 4745 }, { "epoch": 0.61, "grad_norm": 1.2844723463058472, "learning_rate": 3.516218745394857e-06, "loss": 0.5537, "step": 4746 }, { "epoch": 0.61, "grad_norm": 1.1594425439834595, "learning_rate": 3.5142373183104823e-06, "loss": 0.7255, "step": 4747 }, { "epoch": 0.61, "grad_norm": 1.6344101428985596, "learning_rate": 3.5122561471185292e-06, "loss": 0.5981, "step": 4748 }, { "epoch": 0.61, "grad_norm": 1.394809603691101, "learning_rate": 3.5102752321602163e-06, "loss": 0.5609, "step": 4749 }, { "epoch": 0.61, "grad_norm": 2.0609498023986816, "learning_rate": 3.508294573776716e-06, "loss": 0.5798, "step": 4750 }, { "epoch": 0.61, "grad_norm": 1.3028736114501953, "learning_rate": 3.5063141723091555e-06, "loss": 0.5482, "step": 4751 }, { "epoch": 0.61, "grad_norm": 1.1946570873260498, "learning_rate": 3.504334028098617e-06, "loss": 0.5992, "step": 4752 }, { "epoch": 0.61, "grad_norm": 1.6412652730941772, "learning_rate": 3.502354141486143e-06, "loss": 0.6044, "step": 4753 }, { "epoch": 0.61, "grad_norm": 2.494856119155884, "learning_rate": 3.5003745128127263e-06, "loss": 0.5425, "step": 4754 }, { "epoch": 0.61, "grad_norm": 1.1414670944213867, "learning_rate": 3.4983951424193196e-06, "loss": 0.756, "step": 4755 }, { "epoch": 0.61, "grad_norm": 1.2341411113739014, "learning_rate": 3.4964160306468274e-06, "loss": 0.5684, "step": 4756 }, { "epoch": 0.61, "grad_norm": 1.4616705179214478, "learning_rate": 3.4944371778361143e-06, "loss": 0.5613, "step": 4757 }, { "epoch": 0.61, "grad_norm": 1.233243465423584, "learning_rate": 3.4924585843279933e-06, "loss": 0.6797, "step": 4758 }, { "epoch": 0.61, "grad_norm": 1.328782081604004, "learning_rate": 3.4904802504632406e-06, "loss": 0.6308, "step": 4759 }, { "epoch": 0.61, "grad_norm": 1.412166714668274, "learning_rate": 3.488502176582582e-06, "loss": 0.5519, "step": 4760 }, { "epoch": 0.61, "grad_norm": 1.19275963306427, "learning_rate": 3.4865243630267022e-06, "loss": 0.5534, "step": 4761 }, { "epoch": 0.61, "grad_norm": 1.3603435754776, "learning_rate": 3.484546810136237e-06, "loss": 0.71, "step": 4762 }, { "epoch": 0.61, "grad_norm": 1.1178488731384277, "learning_rate": 3.4825695182517823e-06, "loss": 0.6962, "step": 4763 }, { "epoch": 0.61, "grad_norm": 1.11331307888031, "learning_rate": 3.4805924877138837e-06, "loss": 0.6168, "step": 4764 }, { "epoch": 0.61, "grad_norm": 1.4276987314224243, "learning_rate": 3.478615718863047e-06, "loss": 0.6107, "step": 4765 }, { "epoch": 0.61, "grad_norm": 1.1094125509262085, "learning_rate": 3.4766392120397274e-06, "loss": 0.5935, "step": 4766 }, { "epoch": 0.61, "grad_norm": 1.393486738204956, "learning_rate": 3.4746629675843413e-06, "loss": 0.6608, "step": 4767 }, { "epoch": 0.61, "grad_norm": 1.3646267652511597, "learning_rate": 3.472686985837257e-06, "loss": 0.7118, "step": 4768 }, { "epoch": 0.61, "grad_norm": 1.2422285079956055, "learning_rate": 3.470711267138794e-06, "loss": 0.5005, "step": 4769 }, { "epoch": 0.61, "grad_norm": 1.4949270486831665, "learning_rate": 3.4687358118292325e-06, "loss": 0.5937, "step": 4770 }, { "epoch": 0.61, "grad_norm": 1.2646280527114868, "learning_rate": 3.4667606202488014e-06, "loss": 0.5595, "step": 4771 }, { "epoch": 0.61, "grad_norm": 1.8542555570602417, "learning_rate": 3.464785692737691e-06, "loss": 0.6481, "step": 4772 }, { "epoch": 0.61, "grad_norm": 1.4289233684539795, "learning_rate": 3.4628110296360394e-06, "loss": 0.6218, "step": 4773 }, { "epoch": 0.61, "grad_norm": 1.189062237739563, "learning_rate": 3.460836631283945e-06, "loss": 0.5669, "step": 4774 }, { "epoch": 0.61, "grad_norm": 1.6110448837280273, "learning_rate": 3.4588624980214547e-06, "loss": 0.6313, "step": 4775 }, { "epoch": 0.61, "grad_norm": 1.2245473861694336, "learning_rate": 3.4568886301885753e-06, "loss": 0.651, "step": 4776 }, { "epoch": 0.61, "grad_norm": 1.3276503086090088, "learning_rate": 3.4549150281252635e-06, "loss": 0.6339, "step": 4777 }, { "epoch": 0.61, "grad_norm": 1.3305314779281616, "learning_rate": 3.4529416921714344e-06, "loss": 0.6083, "step": 4778 }, { "epoch": 0.61, "grad_norm": 1.6480473279953003, "learning_rate": 3.450968622666952e-06, "loss": 0.6153, "step": 4779 }, { "epoch": 0.61, "grad_norm": 1.2591640949249268, "learning_rate": 3.4489958199516404e-06, "loss": 0.7308, "step": 4780 }, { "epoch": 0.61, "grad_norm": 1.382493019104004, "learning_rate": 3.4470232843652728e-06, "loss": 0.6487, "step": 4781 }, { "epoch": 0.61, "grad_norm": 1.3461976051330566, "learning_rate": 3.4450510162475797e-06, "loss": 0.664, "step": 4782 }, { "epoch": 0.61, "grad_norm": 1.8919711112976074, "learning_rate": 3.4430790159382414e-06, "loss": 0.6032, "step": 4783 }, { "epoch": 0.61, "grad_norm": 1.4023423194885254, "learning_rate": 3.441107283776899e-06, "loss": 0.6103, "step": 4784 }, { "epoch": 0.61, "grad_norm": 1.211699366569519, "learning_rate": 3.4391358201031412e-06, "loss": 0.5951, "step": 4785 }, { "epoch": 0.61, "grad_norm": 1.2939444780349731, "learning_rate": 3.4371646252565114e-06, "loss": 0.6142, "step": 4786 }, { "epoch": 0.61, "grad_norm": 1.2708574533462524, "learning_rate": 3.4351936995765112e-06, "loss": 0.5485, "step": 4787 }, { "epoch": 0.61, "grad_norm": 1.2561215162277222, "learning_rate": 3.4332230434025887e-06, "loss": 0.5625, "step": 4788 }, { "epoch": 0.61, "grad_norm": 1.0576465129852295, "learning_rate": 3.4312526570741524e-06, "loss": 0.693, "step": 4789 }, { "epoch": 0.61, "grad_norm": 1.220123052597046, "learning_rate": 3.4292825409305586e-06, "loss": 0.641, "step": 4790 }, { "epoch": 0.61, "grad_norm": 1.1228139400482178, "learning_rate": 3.4273126953111226e-06, "loss": 0.7457, "step": 4791 }, { "epoch": 0.61, "grad_norm": 1.302809715270996, "learning_rate": 3.425343120555107e-06, "loss": 0.6077, "step": 4792 }, { "epoch": 0.61, "grad_norm": 1.169256567955017, "learning_rate": 3.4233738170017338e-06, "loss": 0.6243, "step": 4793 }, { "epoch": 0.61, "grad_norm": 1.4685825109481812, "learning_rate": 3.4214047849901743e-06, "loss": 0.566, "step": 4794 }, { "epoch": 0.61, "grad_norm": 1.632744550704956, "learning_rate": 3.4194360248595547e-06, "loss": 0.5817, "step": 4795 }, { "epoch": 0.61, "grad_norm": 1.2418441772460938, "learning_rate": 3.417467536948954e-06, "loss": 0.5698, "step": 4796 }, { "epoch": 0.61, "grad_norm": 1.720788598060608, "learning_rate": 3.415499321597403e-06, "loss": 0.6484, "step": 4797 }, { "epoch": 0.61, "grad_norm": 1.3634127378463745, "learning_rate": 3.4135313791438885e-06, "loss": 0.7072, "step": 4798 }, { "epoch": 0.61, "grad_norm": 1.0684276819229126, "learning_rate": 3.411563709927347e-06, "loss": 0.6948, "step": 4799 }, { "epoch": 0.61, "grad_norm": 1.3760558366775513, "learning_rate": 3.409596314286669e-06, "loss": 0.5563, "step": 4800 }, { "epoch": 0.62, "grad_norm": 1.910668969154358, "learning_rate": 3.4076291925607017e-06, "loss": 0.692, "step": 4801 }, { "epoch": 0.62, "grad_norm": 1.1838176250457764, "learning_rate": 3.4056623450882388e-06, "loss": 0.6427, "step": 4802 }, { "epoch": 0.62, "grad_norm": 1.3909626007080078, "learning_rate": 3.403695772208032e-06, "loss": 0.5647, "step": 4803 }, { "epoch": 0.62, "grad_norm": 1.0790553092956543, "learning_rate": 3.4017294742587812e-06, "loss": 0.6149, "step": 4804 }, { "epoch": 0.62, "grad_norm": 1.1320269107818604, "learning_rate": 3.399763451579144e-06, "loss": 0.7715, "step": 4805 }, { "epoch": 0.62, "grad_norm": 1.1932052373886108, "learning_rate": 3.3977977045077247e-06, "loss": 0.6292, "step": 4806 }, { "epoch": 0.62, "grad_norm": 1.2008845806121826, "learning_rate": 3.3958322333830864e-06, "loss": 0.6292, "step": 4807 }, { "epoch": 0.62, "grad_norm": 1.3319114446640015, "learning_rate": 3.393867038543738e-06, "loss": 0.5772, "step": 4808 }, { "epoch": 0.62, "grad_norm": 1.5973466634750366, "learning_rate": 3.3919021203281475e-06, "loss": 0.6426, "step": 4809 }, { "epoch": 0.62, "grad_norm": 1.542841911315918, "learning_rate": 3.389937479074731e-06, "loss": 0.5787, "step": 4810 }, { "epoch": 0.62, "grad_norm": 1.9632922410964966, "learning_rate": 3.3879731151218575e-06, "loss": 0.5883, "step": 4811 }, { "epoch": 0.62, "grad_norm": 1.3410135507583618, "learning_rate": 3.3860090288078496e-06, "loss": 0.6418, "step": 4812 }, { "epoch": 0.62, "grad_norm": 1.061713695526123, "learning_rate": 3.3840452204709806e-06, "loss": 0.6125, "step": 4813 }, { "epoch": 0.62, "grad_norm": 1.0856437683105469, "learning_rate": 3.382081690449477e-06, "loss": 0.5871, "step": 4814 }, { "epoch": 0.62, "grad_norm": 1.18121337890625, "learning_rate": 3.3801184390815173e-06, "loss": 0.5499, "step": 4815 }, { "epoch": 0.62, "grad_norm": 1.4576302766799927, "learning_rate": 3.37815546670523e-06, "loss": 0.5686, "step": 4816 }, { "epoch": 0.62, "grad_norm": 0.9863116145133972, "learning_rate": 3.3761927736586976e-06, "loss": 0.58, "step": 4817 }, { "epoch": 0.62, "grad_norm": 1.1071988344192505, "learning_rate": 3.3742303602799565e-06, "loss": 0.6325, "step": 4818 }, { "epoch": 0.62, "grad_norm": 1.518639087677002, "learning_rate": 3.3722682269069906e-06, "loss": 0.5877, "step": 4819 }, { "epoch": 0.62, "grad_norm": 1.3329269886016846, "learning_rate": 3.370306373877738e-06, "loss": 0.5854, "step": 4820 }, { "epoch": 0.62, "grad_norm": 1.4260133504867554, "learning_rate": 3.368344801530087e-06, "loss": 0.6094, "step": 4821 }, { "epoch": 0.62, "grad_norm": 1.498528242111206, "learning_rate": 3.3663835102018803e-06, "loss": 0.5885, "step": 4822 }, { "epoch": 0.62, "grad_norm": 1.4872510433197021, "learning_rate": 3.364422500230908e-06, "loss": 0.581, "step": 4823 }, { "epoch": 0.62, "grad_norm": 1.251771092414856, "learning_rate": 3.3624617719549178e-06, "loss": 0.6102, "step": 4824 }, { "epoch": 0.62, "grad_norm": 1.566983938217163, "learning_rate": 3.3605013257116016e-06, "loss": 0.6619, "step": 4825 }, { "epoch": 0.62, "grad_norm": 1.4458528757095337, "learning_rate": 3.3585411618386086e-06, "loss": 0.5325, "step": 4826 }, { "epoch": 0.62, "grad_norm": 1.2375273704528809, "learning_rate": 3.356581280673536e-06, "loss": 0.5656, "step": 4827 }, { "epoch": 0.62, "grad_norm": 1.2488701343536377, "learning_rate": 3.3546216825539347e-06, "loss": 0.5597, "step": 4828 }, { "epoch": 0.62, "grad_norm": 1.3934226036071777, "learning_rate": 3.3526623678173043e-06, "loss": 0.5244, "step": 4829 }, { "epoch": 0.62, "grad_norm": 1.497615098953247, "learning_rate": 3.350703336801099e-06, "loss": 0.5815, "step": 4830 }, { "epoch": 0.62, "grad_norm": 1.1210520267486572, "learning_rate": 3.3487445898427195e-06, "loss": 0.5517, "step": 4831 }, { "epoch": 0.62, "grad_norm": 1.2002229690551758, "learning_rate": 3.346786127279522e-06, "loss": 0.6144, "step": 4832 }, { "epoch": 0.62, "grad_norm": 1.2714262008666992, "learning_rate": 3.34482794944881e-06, "loss": 0.5459, "step": 4833 }, { "epoch": 0.62, "grad_norm": 1.2544283866882324, "learning_rate": 3.3428700566878407e-06, "loss": 0.5844, "step": 4834 }, { "epoch": 0.62, "grad_norm": 1.3227342367172241, "learning_rate": 3.340912449333824e-06, "loss": 0.5983, "step": 4835 }, { "epoch": 0.62, "grad_norm": 1.1427699327468872, "learning_rate": 3.3389551277239143e-06, "loss": 0.5442, "step": 4836 }, { "epoch": 0.62, "grad_norm": 1.4273881912231445, "learning_rate": 3.3369980921952227e-06, "loss": 0.5977, "step": 4837 }, { "epoch": 0.62, "grad_norm": 1.286092758178711, "learning_rate": 3.335041343084807e-06, "loss": 0.7324, "step": 4838 }, { "epoch": 0.62, "grad_norm": 1.303217887878418, "learning_rate": 3.3330848807296796e-06, "loss": 0.6638, "step": 4839 }, { "epoch": 0.62, "grad_norm": 1.4378916025161743, "learning_rate": 3.331128705466799e-06, "loss": 0.63, "step": 4840 }, { "epoch": 0.62, "grad_norm": 1.227399468421936, "learning_rate": 3.3291728176330786e-06, "loss": 0.625, "step": 4841 }, { "epoch": 0.62, "grad_norm": 1.3684594631195068, "learning_rate": 3.327217217565379e-06, "loss": 0.6153, "step": 4842 }, { "epoch": 0.62, "grad_norm": 1.5865708589553833, "learning_rate": 3.325261905600514e-06, "loss": 0.6505, "step": 4843 }, { "epoch": 0.62, "grad_norm": 1.1837784051895142, "learning_rate": 3.3233068820752447e-06, "loss": 0.5805, "step": 4844 }, { "epoch": 0.62, "grad_norm": 1.0995562076568604, "learning_rate": 3.321352147326285e-06, "loss": 0.7422, "step": 4845 }, { "epoch": 0.62, "grad_norm": 1.242751121520996, "learning_rate": 3.3193977016902988e-06, "loss": 0.6068, "step": 4846 }, { "epoch": 0.62, "grad_norm": 1.5467709302902222, "learning_rate": 3.317443545503898e-06, "loss": 0.5951, "step": 4847 }, { "epoch": 0.62, "grad_norm": 1.178579568862915, "learning_rate": 3.315489679103648e-06, "loss": 0.515, "step": 4848 }, { "epoch": 0.62, "grad_norm": 1.3723570108413696, "learning_rate": 3.3135361028260604e-06, "loss": 0.5833, "step": 4849 }, { "epoch": 0.62, "grad_norm": 2.414771795272827, "learning_rate": 3.3115828170076026e-06, "loss": 0.6358, "step": 4850 }, { "epoch": 0.62, "grad_norm": 1.168471336364746, "learning_rate": 3.3096298219846835e-06, "loss": 0.5542, "step": 4851 }, { "epoch": 0.62, "grad_norm": 1.5970467329025269, "learning_rate": 3.3076771180936707e-06, "loss": 0.7044, "step": 4852 }, { "epoch": 0.62, "grad_norm": 1.1945412158966064, "learning_rate": 3.305724705670877e-06, "loss": 0.5253, "step": 4853 }, { "epoch": 0.62, "grad_norm": 1.2077255249023438, "learning_rate": 3.3037725850525648e-06, "loss": 0.6126, "step": 4854 }, { "epoch": 0.62, "grad_norm": 1.2469735145568848, "learning_rate": 3.3018207565749484e-06, "loss": 0.5933, "step": 4855 }, { "epoch": 0.62, "grad_norm": 1.3545241355895996, "learning_rate": 3.2998692205741893e-06, "loss": 0.5633, "step": 4856 }, { "epoch": 0.62, "grad_norm": 1.2162749767303467, "learning_rate": 3.2979179773864013e-06, "loss": 0.5882, "step": 4857 }, { "epoch": 0.62, "grad_norm": 1.2614243030548096, "learning_rate": 3.295967027347645e-06, "loss": 0.5937, "step": 4858 }, { "epoch": 0.62, "grad_norm": 1.402371883392334, "learning_rate": 3.2940163707939333e-06, "loss": 0.5585, "step": 4859 }, { "epoch": 0.62, "grad_norm": 1.107991099357605, "learning_rate": 3.2920660080612245e-06, "loss": 0.6597, "step": 4860 }, { "epoch": 0.62, "grad_norm": 1.2759180068969727, "learning_rate": 3.2901159394854324e-06, "loss": 0.5066, "step": 4861 }, { "epoch": 0.62, "grad_norm": 1.3145313262939453, "learning_rate": 3.2881661654024144e-06, "loss": 0.5645, "step": 4862 }, { "epoch": 0.62, "grad_norm": 1.1362555027008057, "learning_rate": 3.2862166861479806e-06, "loss": 0.5143, "step": 4863 }, { "epoch": 0.62, "grad_norm": 1.101493000984192, "learning_rate": 3.284267502057888e-06, "loss": 0.6588, "step": 4864 }, { "epoch": 0.62, "grad_norm": 1.3010179996490479, "learning_rate": 3.2823186134678455e-06, "loss": 0.5626, "step": 4865 }, { "epoch": 0.62, "grad_norm": 1.1528490781784058, "learning_rate": 3.280370020713507e-06, "loss": 0.6245, "step": 4866 }, { "epoch": 0.62, "grad_norm": 1.2665293216705322, "learning_rate": 3.2784217241304815e-06, "loss": 0.5826, "step": 4867 }, { "epoch": 0.62, "grad_norm": 1.4382033348083496, "learning_rate": 3.2764737240543192e-06, "loss": 0.565, "step": 4868 }, { "epoch": 0.62, "grad_norm": 1.322492003440857, "learning_rate": 3.2745260208205273e-06, "loss": 0.5892, "step": 4869 }, { "epoch": 0.62, "grad_norm": 1.3495713472366333, "learning_rate": 3.2725786147645577e-06, "loss": 0.6144, "step": 4870 }, { "epoch": 0.62, "grad_norm": 1.2086135149002075, "learning_rate": 3.2706315062218085e-06, "loss": 0.5433, "step": 4871 }, { "epoch": 0.62, "grad_norm": 1.6504979133605957, "learning_rate": 3.268684695527634e-06, "loss": 0.5627, "step": 4872 }, { "epoch": 0.62, "grad_norm": 1.2472084760665894, "learning_rate": 3.2667381830173287e-06, "loss": 0.5854, "step": 4873 }, { "epoch": 0.62, "grad_norm": 1.8455642461776733, "learning_rate": 3.2647919690261433e-06, "loss": 0.5472, "step": 4874 }, { "epoch": 0.62, "grad_norm": 1.2269060611724854, "learning_rate": 3.26284605388927e-06, "loss": 0.5151, "step": 4875 }, { "epoch": 0.62, "grad_norm": 0.9887154698371887, "learning_rate": 3.2609004379418564e-06, "loss": 0.6177, "step": 4876 }, { "epoch": 0.62, "grad_norm": 1.5012866258621216, "learning_rate": 3.2589551215189925e-06, "loss": 0.694, "step": 4877 }, { "epoch": 0.62, "grad_norm": 1.2509353160858154, "learning_rate": 3.257010104955722e-06, "loss": 0.6014, "step": 4878 }, { "epoch": 0.63, "grad_norm": 1.3519951105117798, "learning_rate": 3.255065388587032e-06, "loss": 0.6751, "step": 4879 }, { "epoch": 0.63, "grad_norm": 1.2042193412780762, "learning_rate": 3.253120972747863e-06, "loss": 0.5931, "step": 4880 }, { "epoch": 0.63, "grad_norm": 1.2597451210021973, "learning_rate": 3.251176857773099e-06, "loss": 0.6325, "step": 4881 }, { "epoch": 0.63, "grad_norm": 1.296736478805542, "learning_rate": 3.249233043997576e-06, "loss": 0.6089, "step": 4882 }, { "epoch": 0.63, "grad_norm": 1.2353603839874268, "learning_rate": 3.2472895317560744e-06, "loss": 0.6336, "step": 4883 }, { "epoch": 0.63, "grad_norm": 1.2520846128463745, "learning_rate": 3.2453463213833267e-06, "loss": 0.5977, "step": 4884 }, { "epoch": 0.63, "grad_norm": 1.338379979133606, "learning_rate": 3.2434034132140085e-06, "loss": 0.6035, "step": 4885 }, { "epoch": 0.63, "grad_norm": 1.2667759656906128, "learning_rate": 3.241460807582749e-06, "loss": 0.7181, "step": 4886 }, { "epoch": 0.63, "grad_norm": 1.2850943803787231, "learning_rate": 3.2395185048241235e-06, "loss": 0.4562, "step": 4887 }, { "epoch": 0.63, "grad_norm": 1.7538964748382568, "learning_rate": 3.2375765052726505e-06, "loss": 0.5707, "step": 4888 }, { "epoch": 0.63, "grad_norm": 1.2799808979034424, "learning_rate": 3.2356348092628038e-06, "loss": 0.692, "step": 4889 }, { "epoch": 0.63, "grad_norm": 1.681443452835083, "learning_rate": 3.2336934171289974e-06, "loss": 0.5608, "step": 4890 }, { "epoch": 0.63, "grad_norm": 1.5989010334014893, "learning_rate": 3.2317523292055998e-06, "loss": 0.5823, "step": 4891 }, { "epoch": 0.63, "grad_norm": 1.2030177116394043, "learning_rate": 3.2298115458269212e-06, "loss": 0.6447, "step": 4892 }, { "epoch": 0.63, "grad_norm": 1.3958220481872559, "learning_rate": 3.227871067327225e-06, "loss": 0.6051, "step": 4893 }, { "epoch": 0.63, "grad_norm": 1.4750139713287354, "learning_rate": 3.225930894040717e-06, "loss": 0.6222, "step": 4894 }, { "epoch": 0.63, "grad_norm": 1.5289214849472046, "learning_rate": 3.2239910263015524e-06, "loss": 0.626, "step": 4895 }, { "epoch": 0.63, "grad_norm": 1.1837035417556763, "learning_rate": 3.222051464443836e-06, "loss": 0.5143, "step": 4896 }, { "epoch": 0.63, "grad_norm": 1.1531528234481812, "learning_rate": 3.220112208801615e-06, "loss": 0.6023, "step": 4897 }, { "epoch": 0.63, "grad_norm": 1.0396391153335571, "learning_rate": 3.21817325970889e-06, "loss": 0.5225, "step": 4898 }, { "epoch": 0.63, "grad_norm": 1.2080007791519165, "learning_rate": 3.216234617499603e-06, "loss": 0.4965, "step": 4899 }, { "epoch": 0.63, "grad_norm": 1.1739264726638794, "learning_rate": 3.2142962825076477e-06, "loss": 0.5772, "step": 4900 }, { "epoch": 0.63, "grad_norm": 1.0638878345489502, "learning_rate": 3.2123582550668608e-06, "loss": 0.6668, "step": 4901 }, { "epoch": 0.63, "grad_norm": 1.3319416046142578, "learning_rate": 3.210420535511031e-06, "loss": 0.5559, "step": 4902 }, { "epoch": 0.63, "grad_norm": 1.216744065284729, "learning_rate": 3.2084831241738866e-06, "loss": 0.4514, "step": 4903 }, { "epoch": 0.63, "grad_norm": 1.1501775979995728, "learning_rate": 3.206546021389111e-06, "loss": 0.5571, "step": 4904 }, { "epoch": 0.63, "grad_norm": 1.2810231447219849, "learning_rate": 3.2046092274903316e-06, "loss": 0.7026, "step": 4905 }, { "epoch": 0.63, "grad_norm": 1.1379876136779785, "learning_rate": 3.2026727428111186e-06, "loss": 0.5862, "step": 4906 }, { "epoch": 0.63, "grad_norm": 1.143991231918335, "learning_rate": 3.200736567684995e-06, "loss": 0.5613, "step": 4907 }, { "epoch": 0.63, "grad_norm": 1.5698902606964111, "learning_rate": 3.198800702445425e-06, "loss": 0.5612, "step": 4908 }, { "epoch": 0.63, "grad_norm": 1.3888661861419678, "learning_rate": 3.196865147425824e-06, "loss": 0.6665, "step": 4909 }, { "epoch": 0.63, "grad_norm": 1.4403868913650513, "learning_rate": 3.19492990295955e-06, "loss": 0.5879, "step": 4910 }, { "epoch": 0.63, "grad_norm": 1.532800316810608, "learning_rate": 3.1929949693799134e-06, "loss": 0.7634, "step": 4911 }, { "epoch": 0.63, "grad_norm": 1.1193608045578003, "learning_rate": 3.1910603470201616e-06, "loss": 0.6415, "step": 4912 }, { "epoch": 0.63, "grad_norm": 1.3783601522445679, "learning_rate": 3.189126036213499e-06, "loss": 0.6293, "step": 4913 }, { "epoch": 0.63, "grad_norm": 1.1927382946014404, "learning_rate": 3.1871920372930687e-06, "loss": 0.5678, "step": 4914 }, { "epoch": 0.63, "grad_norm": 1.2124135494232178, "learning_rate": 3.185258350591963e-06, "loss": 0.6184, "step": 4915 }, { "epoch": 0.63, "grad_norm": 1.0941423177719116, "learning_rate": 3.1833249764432206e-06, "loss": 0.5018, "step": 4916 }, { "epoch": 0.63, "grad_norm": 1.1445602178573608, "learning_rate": 3.1813919151798265e-06, "loss": 0.654, "step": 4917 }, { "epoch": 0.63, "grad_norm": 1.6601332426071167, "learning_rate": 3.1794591671347087e-06, "loss": 0.5667, "step": 4918 }, { "epoch": 0.63, "grad_norm": 1.3656716346740723, "learning_rate": 3.177526732640747e-06, "loss": 0.5714, "step": 4919 }, { "epoch": 0.63, "grad_norm": 1.346827507019043, "learning_rate": 3.1755946120307605e-06, "loss": 0.583, "step": 4920 }, { "epoch": 0.63, "grad_norm": 1.2119982242584229, "learning_rate": 3.173662805637521e-06, "loss": 0.5986, "step": 4921 }, { "epoch": 0.63, "grad_norm": 1.3897769451141357, "learning_rate": 3.1717313137937415e-06, "loss": 0.5864, "step": 4922 }, { "epoch": 0.63, "grad_norm": 1.1082748174667358, "learning_rate": 3.1698001368320817e-06, "loss": 0.5421, "step": 4923 }, { "epoch": 0.63, "grad_norm": 1.4113824367523193, "learning_rate": 3.16786927508515e-06, "loss": 0.588, "step": 4924 }, { "epoch": 0.63, "grad_norm": 1.2338011264801025, "learning_rate": 3.1659387288854937e-06, "loss": 0.5927, "step": 4925 }, { "epoch": 0.63, "grad_norm": 1.165490746498108, "learning_rate": 3.164008498565615e-06, "loss": 0.5956, "step": 4926 }, { "epoch": 0.63, "grad_norm": 1.206478476524353, "learning_rate": 3.1620785844579526e-06, "loss": 0.589, "step": 4927 }, { "epoch": 0.63, "grad_norm": 1.202146291732788, "learning_rate": 3.160148986894899e-06, "loss": 0.6487, "step": 4928 }, { "epoch": 0.63, "grad_norm": 1.1670382022857666, "learning_rate": 3.1582197062087837e-06, "loss": 0.495, "step": 4929 }, { "epoch": 0.63, "grad_norm": 1.248085618019104, "learning_rate": 3.15629074273189e-06, "loss": 0.6223, "step": 4930 }, { "epoch": 0.63, "grad_norm": 1.2666771411895752, "learning_rate": 3.15436209679644e-06, "loss": 0.6845, "step": 4931 }, { "epoch": 0.63, "grad_norm": 1.1898845434188843, "learning_rate": 3.1524337687346065e-06, "loss": 0.5541, "step": 4932 }, { "epoch": 0.63, "grad_norm": 1.34114670753479, "learning_rate": 3.150505758878501e-06, "loss": 0.5768, "step": 4933 }, { "epoch": 0.63, "grad_norm": 1.2350118160247803, "learning_rate": 3.1485780675601878e-06, "loss": 0.5804, "step": 4934 }, { "epoch": 0.63, "grad_norm": 1.3063546419143677, "learning_rate": 3.1466506951116697e-06, "loss": 0.6739, "step": 4935 }, { "epoch": 0.63, "grad_norm": 1.5891927480697632, "learning_rate": 3.1447236418648997e-06, "loss": 0.5966, "step": 4936 }, { "epoch": 0.63, "grad_norm": 1.3554737567901611, "learning_rate": 3.1427969081517705e-06, "loss": 0.5389, "step": 4937 }, { "epoch": 0.63, "grad_norm": 1.4271050691604614, "learning_rate": 3.1408704943041257e-06, "loss": 0.6583, "step": 4938 }, { "epoch": 0.63, "grad_norm": 1.2718795537948608, "learning_rate": 3.1389444006537517e-06, "loss": 0.5947, "step": 4939 }, { "epoch": 0.63, "grad_norm": 1.1423945426940918, "learning_rate": 3.1370186275323756e-06, "loss": 0.5624, "step": 4940 }, { "epoch": 0.63, "grad_norm": 1.107884168624878, "learning_rate": 3.135093175271676e-06, "loss": 0.6333, "step": 4941 }, { "epoch": 0.63, "grad_norm": 1.4218220710754395, "learning_rate": 3.1331680442032697e-06, "loss": 0.6028, "step": 4942 }, { "epoch": 0.63, "grad_norm": 1.4798823595046997, "learning_rate": 3.131243234658724e-06, "loss": 0.5927, "step": 4943 }, { "epoch": 0.63, "grad_norm": 1.1230974197387695, "learning_rate": 3.1293187469695472e-06, "loss": 0.5974, "step": 4944 }, { "epoch": 0.63, "grad_norm": 1.2540388107299805, "learning_rate": 3.127394581467193e-06, "loss": 0.5366, "step": 4945 }, { "epoch": 0.63, "grad_norm": 1.489526629447937, "learning_rate": 3.1254707384830607e-06, "loss": 0.6562, "step": 4946 }, { "epoch": 0.63, "grad_norm": 1.336727261543274, "learning_rate": 3.123547218348491e-06, "loss": 0.6517, "step": 4947 }, { "epoch": 0.63, "grad_norm": 1.2091395854949951, "learning_rate": 3.121624021394774e-06, "loss": 0.6301, "step": 4948 }, { "epoch": 0.63, "grad_norm": 1.2998344898223877, "learning_rate": 3.1197011479531386e-06, "loss": 0.7585, "step": 4949 }, { "epoch": 0.63, "grad_norm": 1.2029203176498413, "learning_rate": 3.1177785983547633e-06, "loss": 0.5922, "step": 4950 }, { "epoch": 0.63, "grad_norm": 1.2441179752349854, "learning_rate": 3.1158563729307658e-06, "loss": 0.6311, "step": 4951 }, { "epoch": 0.63, "grad_norm": 1.2800577878952026, "learning_rate": 3.113934472012212e-06, "loss": 0.5742, "step": 4952 }, { "epoch": 0.63, "grad_norm": 1.2300503253936768, "learning_rate": 3.112012895930109e-06, "loss": 0.5904, "step": 4953 }, { "epoch": 0.63, "grad_norm": 1.1221928596496582, "learning_rate": 3.110091645015409e-06, "loss": 0.5441, "step": 4954 }, { "epoch": 0.63, "grad_norm": 1.1386147737503052, "learning_rate": 3.1081707195990115e-06, "loss": 0.6479, "step": 4955 }, { "epoch": 0.63, "grad_norm": 0.9600458145141602, "learning_rate": 3.1062501200117536e-06, "loss": 0.4795, "step": 4956 }, { "epoch": 0.64, "grad_norm": 1.1221802234649658, "learning_rate": 3.1043298465844207e-06, "loss": 0.5593, "step": 4957 }, { "epoch": 0.64, "grad_norm": 1.3925119638442993, "learning_rate": 3.1024098996477407e-06, "loss": 0.5899, "step": 4958 }, { "epoch": 0.64, "grad_norm": 1.0606712102890015, "learning_rate": 3.1004902795323867e-06, "loss": 0.4922, "step": 4959 }, { "epoch": 0.64, "grad_norm": 1.3476324081420898, "learning_rate": 3.098570986568972e-06, "loss": 0.6115, "step": 4960 }, { "epoch": 0.64, "grad_norm": 1.3782424926757812, "learning_rate": 3.096652021088057e-06, "loss": 0.6623, "step": 4961 }, { "epoch": 0.64, "grad_norm": 1.2778935432434082, "learning_rate": 3.0947333834201443e-06, "loss": 0.6427, "step": 4962 }, { "epoch": 0.64, "grad_norm": 3.0849034786224365, "learning_rate": 3.092815073895681e-06, "loss": 0.573, "step": 4963 }, { "epoch": 0.64, "grad_norm": 1.0979350805282593, "learning_rate": 3.0908970928450555e-06, "loss": 0.5269, "step": 4964 }, { "epoch": 0.64, "grad_norm": 1.2356542348861694, "learning_rate": 3.0889794405986024e-06, "loss": 0.5587, "step": 4965 }, { "epoch": 0.64, "grad_norm": 1.2701390981674194, "learning_rate": 3.087062117486597e-06, "loss": 0.6439, "step": 4966 }, { "epoch": 0.64, "grad_norm": 1.3336454629898071, "learning_rate": 3.0851451238392604e-06, "loss": 0.5936, "step": 4967 }, { "epoch": 0.64, "grad_norm": 1.7004276514053345, "learning_rate": 3.0832284599867544e-06, "loss": 0.5704, "step": 4968 }, { "epoch": 0.64, "grad_norm": 1.0348007678985596, "learning_rate": 3.0813121262591885e-06, "loss": 0.5852, "step": 4969 }, { "epoch": 0.64, "grad_norm": 1.3528045415878296, "learning_rate": 3.0793961229866077e-06, "loss": 0.6092, "step": 4970 }, { "epoch": 0.64, "grad_norm": 1.1565823554992676, "learning_rate": 3.0774804504990064e-06, "loss": 0.6212, "step": 4971 }, { "epoch": 0.64, "grad_norm": 1.185302734375, "learning_rate": 3.0755651091263233e-06, "loss": 0.5614, "step": 4972 }, { "epoch": 0.64, "grad_norm": 1.3281807899475098, "learning_rate": 3.073650099198433e-06, "loss": 0.6672, "step": 4973 }, { "epoch": 0.64, "grad_norm": 1.251400351524353, "learning_rate": 3.07173542104516e-06, "loss": 0.7054, "step": 4974 }, { "epoch": 0.64, "grad_norm": 0.98326575756073, "learning_rate": 3.069821074996266e-06, "loss": 0.6104, "step": 4975 }, { "epoch": 0.64, "grad_norm": 1.5410773754119873, "learning_rate": 3.067907061381461e-06, "loss": 0.6519, "step": 4976 }, { "epoch": 0.64, "grad_norm": 1.4055252075195312, "learning_rate": 3.0659933805303914e-06, "loss": 0.607, "step": 4977 }, { "epoch": 0.64, "grad_norm": 1.695743203163147, "learning_rate": 3.0640800327726537e-06, "loss": 0.6483, "step": 4978 }, { "epoch": 0.64, "grad_norm": 1.2075164318084717, "learning_rate": 3.06216701843778e-06, "loss": 0.5343, "step": 4979 }, { "epoch": 0.64, "grad_norm": 1.335493564605713, "learning_rate": 3.060254337855251e-06, "loss": 0.6638, "step": 4980 }, { "epoch": 0.64, "grad_norm": 1.1143360137939453, "learning_rate": 3.0583419913544833e-06, "loss": 0.5355, "step": 4981 }, { "epoch": 0.64, "grad_norm": 1.7671163082122803, "learning_rate": 3.056429979264844e-06, "loss": 0.5906, "step": 4982 }, { "epoch": 0.64, "grad_norm": 1.333821415901184, "learning_rate": 3.0545183019156345e-06, "loss": 0.6186, "step": 4983 }, { "epoch": 0.64, "grad_norm": 1.1221269369125366, "learning_rate": 3.052606959636106e-06, "loss": 0.634, "step": 4984 }, { "epoch": 0.64, "grad_norm": 1.378235936164856, "learning_rate": 3.0506959527554445e-06, "loss": 0.5971, "step": 4985 }, { "epoch": 0.64, "grad_norm": 1.267785668373108, "learning_rate": 3.0487852816027853e-06, "loss": 0.5958, "step": 4986 }, { "epoch": 0.64, "grad_norm": 1.2087866067886353, "learning_rate": 3.046874946507201e-06, "loss": 0.6207, "step": 4987 }, { "epoch": 0.64, "grad_norm": 1.871625542640686, "learning_rate": 3.0449649477977073e-06, "loss": 0.5814, "step": 4988 }, { "epoch": 0.64, "grad_norm": 1.2489285469055176, "learning_rate": 3.0430552858032647e-06, "loss": 0.6081, "step": 4989 }, { "epoch": 0.64, "grad_norm": 1.1966968774795532, "learning_rate": 3.0411459608527727e-06, "loss": 0.5906, "step": 4990 }, { "epoch": 0.64, "grad_norm": 1.507742166519165, "learning_rate": 3.039236973275075e-06, "loss": 0.628, "step": 4991 }, { "epoch": 0.64, "grad_norm": 1.4480714797973633, "learning_rate": 3.037328323398953e-06, "loss": 0.5951, "step": 4992 }, { "epoch": 0.64, "grad_norm": 1.2698642015457153, "learning_rate": 3.035420011553136e-06, "loss": 0.5878, "step": 4993 }, { "epoch": 0.64, "grad_norm": 1.0168004035949707, "learning_rate": 3.0335120380662897e-06, "loss": 0.6092, "step": 4994 }, { "epoch": 0.64, "grad_norm": 1.1975743770599365, "learning_rate": 3.0316044032670245e-06, "loss": 0.607, "step": 4995 }, { "epoch": 0.64, "grad_norm": 1.311187982559204, "learning_rate": 3.0296971074838923e-06, "loss": 0.5956, "step": 4996 }, { "epoch": 0.64, "grad_norm": 1.199485182762146, "learning_rate": 3.027790151045384e-06, "loss": 0.7287, "step": 4997 }, { "epoch": 0.64, "grad_norm": 1.2564228773117065, "learning_rate": 3.0258835342799362e-06, "loss": 0.6196, "step": 4998 }, { "epoch": 0.64, "grad_norm": 1.117231011390686, "learning_rate": 3.023977257515924e-06, "loss": 0.5185, "step": 4999 }, { "epoch": 0.64, "grad_norm": 1.2204550504684448, "learning_rate": 3.022071321081666e-06, "loss": 0.5253, "step": 5000 }, { "epoch": 0.64, "grad_norm": 1.2541391849517822, "learning_rate": 3.020165725305419e-06, "loss": 0.6425, "step": 5001 }, { "epoch": 0.64, "grad_norm": 1.467839241027832, "learning_rate": 3.018260470515385e-06, "loss": 0.6367, "step": 5002 }, { "epoch": 0.64, "grad_norm": 1.1253759860992432, "learning_rate": 3.016355557039704e-06, "loss": 0.5926, "step": 5003 }, { "epoch": 0.64, "grad_norm": 1.2686272859573364, "learning_rate": 3.0144509852064597e-06, "loss": 0.534, "step": 5004 }, { "epoch": 0.64, "grad_norm": 1.152122139930725, "learning_rate": 3.0125467553436737e-06, "loss": 0.5331, "step": 5005 }, { "epoch": 0.64, "grad_norm": 1.1917579174041748, "learning_rate": 3.0106428677793133e-06, "loss": 0.596, "step": 5006 }, { "epoch": 0.64, "grad_norm": 1.1018012762069702, "learning_rate": 3.008739322841285e-06, "loss": 0.6628, "step": 5007 }, { "epoch": 0.64, "grad_norm": 1.526096224784851, "learning_rate": 3.0068361208574336e-06, "loss": 0.604, "step": 5008 }, { "epoch": 0.64, "grad_norm": 1.2660118341445923, "learning_rate": 3.0049332621555483e-06, "loss": 0.6269, "step": 5009 }, { "epoch": 0.64, "grad_norm": 1.051699161529541, "learning_rate": 3.003030747063357e-06, "loss": 0.5862, "step": 5010 }, { "epoch": 0.64, "grad_norm": 1.3319251537322998, "learning_rate": 3.0011285759085296e-06, "loss": 0.5735, "step": 5011 }, { "epoch": 0.64, "grad_norm": 1.586374282836914, "learning_rate": 2.9992267490186766e-06, "loss": 0.6117, "step": 5012 }, { "epoch": 0.64, "grad_norm": 1.3304932117462158, "learning_rate": 2.9973252667213494e-06, "loss": 0.6032, "step": 5013 }, { "epoch": 0.64, "grad_norm": 1.2678347826004028, "learning_rate": 2.995424129344038e-06, "loss": 0.4959, "step": 5014 }, { "epoch": 0.64, "grad_norm": 1.3165990114212036, "learning_rate": 2.993523337214177e-06, "loss": 0.6297, "step": 5015 }, { "epoch": 0.64, "grad_norm": 1.1281373500823975, "learning_rate": 2.9916228906591366e-06, "loss": 0.5204, "step": 5016 }, { "epoch": 0.64, "grad_norm": 1.138863205909729, "learning_rate": 2.9897227900062327e-06, "loss": 0.6399, "step": 5017 }, { "epoch": 0.64, "grad_norm": 1.1743173599243164, "learning_rate": 2.9878230355827166e-06, "loss": 0.5562, "step": 5018 }, { "epoch": 0.64, "grad_norm": 1.163933277130127, "learning_rate": 2.985923627715785e-06, "loss": 0.614, "step": 5019 }, { "epoch": 0.64, "grad_norm": 1.2621997594833374, "learning_rate": 2.9840245667325697e-06, "loss": 0.6404, "step": 5020 }, { "epoch": 0.64, "grad_norm": 1.3806610107421875, "learning_rate": 2.982125852960148e-06, "loss": 0.6053, "step": 5021 }, { "epoch": 0.64, "grad_norm": 1.8941912651062012, "learning_rate": 2.9802274867255306e-06, "loss": 0.6629, "step": 5022 }, { "epoch": 0.64, "grad_norm": 1.2895264625549316, "learning_rate": 2.9783294683556764e-06, "loss": 0.6593, "step": 5023 }, { "epoch": 0.64, "grad_norm": 1.5828664302825928, "learning_rate": 2.9764317981774804e-06, "loss": 0.6568, "step": 5024 }, { "epoch": 0.64, "grad_norm": 1.4575058221817017, "learning_rate": 2.9745344765177753e-06, "loss": 0.6443, "step": 5025 }, { "epoch": 0.64, "grad_norm": 1.212241530418396, "learning_rate": 2.9726375037033396e-06, "loss": 0.5902, "step": 5026 }, { "epoch": 0.64, "grad_norm": 1.538864016532898, "learning_rate": 2.9707408800608837e-06, "loss": 0.5993, "step": 5027 }, { "epoch": 0.64, "grad_norm": 1.0669389963150024, "learning_rate": 2.968844605917067e-06, "loss": 0.5779, "step": 5028 }, { "epoch": 0.64, "grad_norm": 1.2678580284118652, "learning_rate": 2.9669486815984807e-06, "loss": 0.6404, "step": 5029 }, { "epoch": 0.64, "grad_norm": 1.2878882884979248, "learning_rate": 2.965053107431662e-06, "loss": 0.5885, "step": 5030 }, { "epoch": 0.64, "grad_norm": 1.1472769975662231, "learning_rate": 2.9631578837430826e-06, "loss": 0.6865, "step": 5031 }, { "epoch": 0.64, "grad_norm": 1.0584124326705933, "learning_rate": 2.9612630108591576e-06, "loss": 0.5281, "step": 5032 }, { "epoch": 0.64, "grad_norm": 1.6062891483306885, "learning_rate": 2.9593684891062403e-06, "loss": 0.5883, "step": 5033 }, { "epoch": 0.64, "grad_norm": 1.0802760124206543, "learning_rate": 2.9574743188106235e-06, "loss": 0.5788, "step": 5034 }, { "epoch": 0.65, "grad_norm": 1.1777443885803223, "learning_rate": 2.9555805002985394e-06, "loss": 0.5501, "step": 5035 }, { "epoch": 0.65, "grad_norm": 1.3648144006729126, "learning_rate": 2.9536870338961597e-06, "loss": 0.6221, "step": 5036 }, { "epoch": 0.65, "grad_norm": 1.4176534414291382, "learning_rate": 2.9517939199295965e-06, "loss": 0.6233, "step": 5037 }, { "epoch": 0.65, "grad_norm": 1.1230452060699463, "learning_rate": 2.9499011587248972e-06, "loss": 0.5559, "step": 5038 }, { "epoch": 0.65, "grad_norm": 1.0202972888946533, "learning_rate": 2.948008750608055e-06, "loss": 0.6254, "step": 5039 }, { "epoch": 0.65, "grad_norm": 1.4414222240447998, "learning_rate": 2.9461166959049943e-06, "loss": 0.5967, "step": 5040 }, { "epoch": 0.65, "grad_norm": 1.1397916078567505, "learning_rate": 2.9442249949415893e-06, "loss": 0.4967, "step": 5041 }, { "epoch": 0.65, "grad_norm": 1.192447543144226, "learning_rate": 2.942333648043643e-06, "loss": 0.5351, "step": 5042 }, { "epoch": 0.65, "grad_norm": 1.623815894126892, "learning_rate": 2.9404426555369012e-06, "loss": 0.5818, "step": 5043 }, { "epoch": 0.65, "grad_norm": 1.3485163450241089, "learning_rate": 2.9385520177470517e-06, "loss": 0.5429, "step": 5044 }, { "epoch": 0.65, "grad_norm": 1.3247160911560059, "learning_rate": 2.936661734999715e-06, "loss": 0.5124, "step": 5045 }, { "epoch": 0.65, "grad_norm": 1.489458441734314, "learning_rate": 2.934771807620457e-06, "loss": 0.6052, "step": 5046 }, { "epoch": 0.65, "grad_norm": 1.4193159341812134, "learning_rate": 2.932882235934776e-06, "loss": 0.5938, "step": 5047 }, { "epoch": 0.65, "grad_norm": 1.929944396018982, "learning_rate": 2.9309930202681156e-06, "loss": 0.5478, "step": 5048 }, { "epoch": 0.65, "grad_norm": 1.3525385856628418, "learning_rate": 2.9291041609458528e-06, "loss": 0.5828, "step": 5049 }, { "epoch": 0.65, "grad_norm": 1.367734670639038, "learning_rate": 2.9272156582933063e-06, "loss": 0.5198, "step": 5050 }, { "epoch": 0.65, "grad_norm": 1.2847005128860474, "learning_rate": 2.9253275126357313e-06, "loss": 0.5772, "step": 5051 }, { "epoch": 0.65, "grad_norm": 1.4280390739440918, "learning_rate": 2.9234397242983232e-06, "loss": 0.5525, "step": 5052 }, { "epoch": 0.65, "grad_norm": 1.293440580368042, "learning_rate": 2.921552293606214e-06, "loss": 0.5723, "step": 5053 }, { "epoch": 0.65, "grad_norm": 1.1732693910598755, "learning_rate": 2.919665220884478e-06, "loss": 0.6308, "step": 5054 }, { "epoch": 0.65, "grad_norm": 1.4701364040374756, "learning_rate": 2.917778506458121e-06, "loss": 0.5309, "step": 5055 }, { "epoch": 0.65, "grad_norm": 1.3998725414276123, "learning_rate": 2.9158921506520943e-06, "loss": 0.5675, "step": 5056 }, { "epoch": 0.65, "grad_norm": 1.3049033880233765, "learning_rate": 2.9140061537912835e-06, "loss": 0.6115, "step": 5057 }, { "epoch": 0.65, "grad_norm": 1.081899881362915, "learning_rate": 2.9121205162005134e-06, "loss": 0.5292, "step": 5058 }, { "epoch": 0.65, "grad_norm": 1.247861623764038, "learning_rate": 2.9102352382045464e-06, "loss": 0.5237, "step": 5059 }, { "epoch": 0.65, "grad_norm": 1.3318337202072144, "learning_rate": 2.908350320128086e-06, "loss": 0.6684, "step": 5060 }, { "epoch": 0.65, "grad_norm": 1.313628077507019, "learning_rate": 2.906465762295766e-06, "loss": 0.6259, "step": 5061 }, { "epoch": 0.65, "grad_norm": 1.4944661855697632, "learning_rate": 2.904581565032166e-06, "loss": 0.5594, "step": 5062 }, { "epoch": 0.65, "grad_norm": 1.2283939123153687, "learning_rate": 2.902697728661801e-06, "loss": 0.5856, "step": 5063 }, { "epoch": 0.65, "grad_norm": 1.3918577432632446, "learning_rate": 2.9008142535091246e-06, "loss": 0.5818, "step": 5064 }, { "epoch": 0.65, "grad_norm": 1.163896918296814, "learning_rate": 2.898931139898523e-06, "loss": 0.6553, "step": 5065 }, { "epoch": 0.65, "grad_norm": 1.1158233880996704, "learning_rate": 2.897048388154328e-06, "loss": 0.5073, "step": 5066 }, { "epoch": 0.65, "grad_norm": 1.0618435144424438, "learning_rate": 2.895165998600803e-06, "loss": 0.5781, "step": 5067 }, { "epoch": 0.65, "grad_norm": 1.5787274837493896, "learning_rate": 2.893283971562154e-06, "loss": 0.597, "step": 5068 }, { "epoch": 0.65, "grad_norm": 1.4554435014724731, "learning_rate": 2.891402307362519e-06, "loss": 0.5957, "step": 5069 }, { "epoch": 0.65, "grad_norm": 1.2838584184646606, "learning_rate": 2.8895210063259775e-06, "loss": 0.565, "step": 5070 }, { "epoch": 0.65, "grad_norm": 1.2869141101837158, "learning_rate": 2.887640068776546e-06, "loss": 0.5993, "step": 5071 }, { "epoch": 0.65, "grad_norm": 1.1139177083969116, "learning_rate": 2.885759495038179e-06, "loss": 0.5741, "step": 5072 }, { "epoch": 0.65, "grad_norm": 2.5964291095733643, "learning_rate": 2.883879285434763e-06, "loss": 0.6509, "step": 5073 }, { "epoch": 0.65, "grad_norm": 0.9919441938400269, "learning_rate": 2.8819994402901276e-06, "loss": 0.4702, "step": 5074 }, { "epoch": 0.65, "grad_norm": 1.2516149282455444, "learning_rate": 2.8801199599280417e-06, "loss": 0.6224, "step": 5075 }, { "epoch": 0.65, "grad_norm": 1.0911529064178467, "learning_rate": 2.878240844672202e-06, "loss": 0.5022, "step": 5076 }, { "epoch": 0.65, "grad_norm": 1.3920502662658691, "learning_rate": 2.876362094846251e-06, "loss": 0.6245, "step": 5077 }, { "epoch": 0.65, "grad_norm": 1.443747878074646, "learning_rate": 2.874483710773765e-06, "loss": 0.5352, "step": 5078 }, { "epoch": 0.65, "grad_norm": 1.816619634628296, "learning_rate": 2.8726056927782587e-06, "loss": 0.6492, "step": 5079 }, { "epoch": 0.65, "grad_norm": 1.262075662612915, "learning_rate": 2.8707280411831796e-06, "loss": 0.5654, "step": 5080 }, { "epoch": 0.65, "grad_norm": 1.3001986742019653, "learning_rate": 2.868850756311915e-06, "loss": 0.5528, "step": 5081 }, { "epoch": 0.65, "grad_norm": 1.3010063171386719, "learning_rate": 2.8669738384877915e-06, "loss": 0.5112, "step": 5082 }, { "epoch": 0.65, "grad_norm": 1.350829839706421, "learning_rate": 2.8650972880340704e-06, "loss": 0.6077, "step": 5083 }, { "epoch": 0.65, "grad_norm": 1.1886601448059082, "learning_rate": 2.8632211052739463e-06, "loss": 0.6004, "step": 5084 }, { "epoch": 0.65, "grad_norm": 2.6389498710632324, "learning_rate": 2.861345290530555e-06, "loss": 0.6008, "step": 5085 }, { "epoch": 0.65, "grad_norm": 2.1080713272094727, "learning_rate": 2.8594698441269696e-06, "loss": 0.5678, "step": 5086 }, { "epoch": 0.65, "grad_norm": 1.1639949083328247, "learning_rate": 2.8575947663861935e-06, "loss": 0.6014, "step": 5087 }, { "epoch": 0.65, "grad_norm": 1.455990195274353, "learning_rate": 2.855720057631173e-06, "loss": 0.5077, "step": 5088 }, { "epoch": 0.65, "grad_norm": 1.3436378240585327, "learning_rate": 2.853845718184789e-06, "loss": 0.5635, "step": 5089 }, { "epoch": 0.65, "grad_norm": 1.1918082237243652, "learning_rate": 2.851971748369859e-06, "loss": 0.5182, "step": 5090 }, { "epoch": 0.65, "grad_norm": 1.5144091844558716, "learning_rate": 2.8500981485091313e-06, "loss": 0.69, "step": 5091 }, { "epoch": 0.65, "grad_norm": 1.447807788848877, "learning_rate": 2.848224918925301e-06, "loss": 0.6154, "step": 5092 }, { "epoch": 0.65, "grad_norm": 1.535978078842163, "learning_rate": 2.8463520599409945e-06, "loss": 0.5446, "step": 5093 }, { "epoch": 0.65, "grad_norm": 1.2916719913482666, "learning_rate": 2.844479571878769e-06, "loss": 0.5821, "step": 5094 }, { "epoch": 0.65, "grad_norm": 1.3413225412368774, "learning_rate": 2.8426074550611237e-06, "loss": 0.5991, "step": 5095 }, { "epoch": 0.65, "grad_norm": 1.7014421224594116, "learning_rate": 2.840735709810495e-06, "loss": 0.6615, "step": 5096 }, { "epoch": 0.65, "grad_norm": 1.217027187347412, "learning_rate": 2.838864336449253e-06, "loss": 0.7111, "step": 5097 }, { "epoch": 0.65, "grad_norm": 1.2105580568313599, "learning_rate": 2.8369933352997003e-06, "loss": 0.552, "step": 5098 }, { "epoch": 0.65, "grad_norm": 1.383817434310913, "learning_rate": 2.8351227066840805e-06, "loss": 0.6348, "step": 5099 }, { "epoch": 0.65, "grad_norm": 1.2009333372116089, "learning_rate": 2.8332524509245718e-06, "loss": 0.539, "step": 5100 }, { "epoch": 0.65, "grad_norm": 1.331572413444519, "learning_rate": 2.8313825683432906e-06, "loss": 0.5791, "step": 5101 }, { "epoch": 0.65, "grad_norm": 1.6446709632873535, "learning_rate": 2.8295130592622797e-06, "loss": 0.6404, "step": 5102 }, { "epoch": 0.65, "grad_norm": 1.5512125492095947, "learning_rate": 2.8276439240035287e-06, "loss": 0.542, "step": 5103 }, { "epoch": 0.65, "grad_norm": 1.3419227600097656, "learning_rate": 2.8257751628889564e-06, "loss": 0.5675, "step": 5104 }, { "epoch": 0.65, "grad_norm": 1.3091492652893066, "learning_rate": 2.8239067762404216e-06, "loss": 0.6319, "step": 5105 }, { "epoch": 0.65, "grad_norm": 1.235048770904541, "learning_rate": 2.822038764379712e-06, "loss": 0.6524, "step": 5106 }, { "epoch": 0.65, "grad_norm": 1.292203664779663, "learning_rate": 2.820171127628557e-06, "loss": 0.5984, "step": 5107 }, { "epoch": 0.65, "grad_norm": 3.168729543685913, "learning_rate": 2.818303866308618e-06, "loss": 0.6435, "step": 5108 }, { "epoch": 0.65, "grad_norm": 1.1043180227279663, "learning_rate": 2.8164369807414936e-06, "loss": 0.6302, "step": 5109 }, { "epoch": 0.65, "grad_norm": 1.3122977018356323, "learning_rate": 2.8145704712487167e-06, "loss": 0.6408, "step": 5110 }, { "epoch": 0.65, "grad_norm": 1.1558114290237427, "learning_rate": 2.8127043381517553e-06, "loss": 0.5507, "step": 5111 }, { "epoch": 0.65, "grad_norm": 1.5054841041564941, "learning_rate": 2.810838581772015e-06, "loss": 0.6062, "step": 5112 }, { "epoch": 0.66, "grad_norm": 1.2152769565582275, "learning_rate": 2.8089732024308316e-06, "loss": 0.623, "step": 5113 }, { "epoch": 0.66, "grad_norm": 1.117659091949463, "learning_rate": 2.807108200449479e-06, "loss": 0.5331, "step": 5114 }, { "epoch": 0.66, "grad_norm": 1.4485636949539185, "learning_rate": 2.805243576149167e-06, "loss": 0.6216, "step": 5115 }, { "epoch": 0.66, "grad_norm": 1.1245951652526855, "learning_rate": 2.8033793298510415e-06, "loss": 0.6768, "step": 5116 }, { "epoch": 0.66, "grad_norm": 1.2359367609024048, "learning_rate": 2.8015154618761754e-06, "loss": 0.611, "step": 5117 }, { "epoch": 0.66, "grad_norm": 1.2246320247650146, "learning_rate": 2.7996519725455857e-06, "loss": 0.5716, "step": 5118 }, { "epoch": 0.66, "grad_norm": 1.1994571685791016, "learning_rate": 2.7977888621802196e-06, "loss": 0.5599, "step": 5119 }, { "epoch": 0.66, "grad_norm": 1.1807100772857666, "learning_rate": 2.7959261311009623e-06, "loss": 0.583, "step": 5120 }, { "epoch": 0.66, "grad_norm": 1.2274185419082642, "learning_rate": 2.794063779628628e-06, "loss": 0.5591, "step": 5121 }, { "epoch": 0.66, "grad_norm": 1.1783159971237183, "learning_rate": 2.79220180808397e-06, "loss": 0.5576, "step": 5122 }, { "epoch": 0.66, "grad_norm": 1.355054497718811, "learning_rate": 2.790340216787676e-06, "loss": 0.6449, "step": 5123 }, { "epoch": 0.66, "grad_norm": 1.0858644247055054, "learning_rate": 2.788479006060368e-06, "loss": 0.7109, "step": 5124 }, { "epoch": 0.66, "grad_norm": 1.1023110151290894, "learning_rate": 2.7866181762225964e-06, "loss": 0.5975, "step": 5125 }, { "epoch": 0.66, "grad_norm": 1.26510751247406, "learning_rate": 2.7847577275948573e-06, "loss": 0.6797, "step": 5126 }, { "epoch": 0.66, "grad_norm": 1.3188729286193848, "learning_rate": 2.7828976604975756e-06, "loss": 0.6283, "step": 5127 }, { "epoch": 0.66, "grad_norm": 1.3665924072265625, "learning_rate": 2.7810379752511045e-06, "loss": 0.539, "step": 5128 }, { "epoch": 0.66, "grad_norm": 1.152752161026001, "learning_rate": 2.779178672175741e-06, "loss": 0.5775, "step": 5129 }, { "epoch": 0.66, "grad_norm": 1.2595551013946533, "learning_rate": 2.777319751591711e-06, "loss": 0.6191, "step": 5130 }, { "epoch": 0.66, "grad_norm": 1.1859368085861206, "learning_rate": 2.7754612138191784e-06, "loss": 0.5934, "step": 5131 }, { "epoch": 0.66, "grad_norm": 1.0668327808380127, "learning_rate": 2.7736030591782337e-06, "loss": 0.7114, "step": 5132 }, { "epoch": 0.66, "grad_norm": 1.3377395868301392, "learning_rate": 2.7717452879889094e-06, "loss": 0.6773, "step": 5133 }, { "epoch": 0.66, "grad_norm": 1.3250497579574585, "learning_rate": 2.7698879005711684e-06, "loss": 0.5363, "step": 5134 }, { "epoch": 0.66, "grad_norm": 1.2125672101974487, "learning_rate": 2.768030897244909e-06, "loss": 0.5669, "step": 5135 }, { "epoch": 0.66, "grad_norm": 1.2120472192764282, "learning_rate": 2.76617427832996e-06, "loss": 0.5805, "step": 5136 }, { "epoch": 0.66, "grad_norm": 1.2930200099945068, "learning_rate": 2.764318044146087e-06, "loss": 0.6083, "step": 5137 }, { "epoch": 0.66, "grad_norm": 1.3229888677597046, "learning_rate": 2.762462195012991e-06, "loss": 0.5454, "step": 5138 }, { "epoch": 0.66, "grad_norm": 1.3981189727783203, "learning_rate": 2.7606067312503006e-06, "loss": 0.6181, "step": 5139 }, { "epoch": 0.66, "grad_norm": 1.3310023546218872, "learning_rate": 2.7587516531775826e-06, "loss": 0.5589, "step": 5140 }, { "epoch": 0.66, "grad_norm": 1.1334054470062256, "learning_rate": 2.7568969611143377e-06, "loss": 0.5911, "step": 5141 }, { "epoch": 0.66, "grad_norm": 1.3404936790466309, "learning_rate": 2.755042655379998e-06, "loss": 0.624, "step": 5142 }, { "epoch": 0.66, "grad_norm": 1.3292677402496338, "learning_rate": 2.7531887362939314e-06, "loss": 0.598, "step": 5143 }, { "epoch": 0.66, "grad_norm": 1.5356954336166382, "learning_rate": 2.751335204175436e-06, "loss": 0.6055, "step": 5144 }, { "epoch": 0.66, "grad_norm": 1.472981333732605, "learning_rate": 2.7494820593437483e-06, "loss": 0.6197, "step": 5145 }, { "epoch": 0.66, "grad_norm": 1.6259320974349976, "learning_rate": 2.74762930211803e-06, "loss": 0.5957, "step": 5146 }, { "epoch": 0.66, "grad_norm": 1.2277649641036987, "learning_rate": 2.745776932817384e-06, "loss": 0.5525, "step": 5147 }, { "epoch": 0.66, "grad_norm": 1.352315068244934, "learning_rate": 2.743924951760842e-06, "loss": 0.5822, "step": 5148 }, { "epoch": 0.66, "grad_norm": 1.4033784866333008, "learning_rate": 2.7420733592673727e-06, "loss": 0.5787, "step": 5149 }, { "epoch": 0.66, "grad_norm": 1.3330798149108887, "learning_rate": 2.740222155655871e-06, "loss": 0.5817, "step": 5150 }, { "epoch": 0.66, "grad_norm": 1.3742144107818604, "learning_rate": 2.7383713412451716e-06, "loss": 0.5179, "step": 5151 }, { "epoch": 0.66, "grad_norm": 1.388723373413086, "learning_rate": 2.736520916354039e-06, "loss": 0.4809, "step": 5152 }, { "epoch": 0.66, "grad_norm": 1.3881044387817383, "learning_rate": 2.734670881301174e-06, "loss": 0.5648, "step": 5153 }, { "epoch": 0.66, "grad_norm": 1.7049732208251953, "learning_rate": 2.732821236405203e-06, "loss": 0.5736, "step": 5154 }, { "epoch": 0.66, "grad_norm": 1.0168077945709229, "learning_rate": 2.730971981984692e-06, "loss": 0.6677, "step": 5155 }, { "epoch": 0.66, "grad_norm": 1.2047303915023804, "learning_rate": 2.729123118358137e-06, "loss": 0.6059, "step": 5156 }, { "epoch": 0.66, "grad_norm": 1.1460449695587158, "learning_rate": 2.7272746458439705e-06, "loss": 0.6915, "step": 5157 }, { "epoch": 0.66, "grad_norm": 1.4450008869171143, "learning_rate": 2.7254265647605483e-06, "loss": 0.6629, "step": 5158 }, { "epoch": 0.66, "grad_norm": 1.5762354135513306, "learning_rate": 2.723578875426166e-06, "loss": 0.6685, "step": 5159 }, { "epoch": 0.66, "grad_norm": 1.3964108228683472, "learning_rate": 2.721731578159057e-06, "loss": 0.6259, "step": 5160 }, { "epoch": 0.66, "grad_norm": 1.066676139831543, "learning_rate": 2.7198846732773743e-06, "loss": 0.7189, "step": 5161 }, { "epoch": 0.66, "grad_norm": 1.1809970140457153, "learning_rate": 2.718038161099211e-06, "loss": 0.6223, "step": 5162 }, { "epoch": 0.66, "grad_norm": 1.1365050077438354, "learning_rate": 2.716192041942592e-06, "loss": 0.5089, "step": 5163 }, { "epoch": 0.66, "grad_norm": 1.1270561218261719, "learning_rate": 2.7143463161254755e-06, "loss": 0.555, "step": 5164 }, { "epoch": 0.66, "grad_norm": 3.720628261566162, "learning_rate": 2.712500983965747e-06, "loss": 0.6347, "step": 5165 }, { "epoch": 0.66, "grad_norm": 1.4290040731430054, "learning_rate": 2.710656045781228e-06, "loss": 0.5405, "step": 5166 }, { "epoch": 0.66, "grad_norm": 2.559084415435791, "learning_rate": 2.7088115018896725e-06, "loss": 0.6434, "step": 5167 }, { "epoch": 0.66, "grad_norm": 1.1094157695770264, "learning_rate": 2.706967352608768e-06, "loss": 0.5956, "step": 5168 }, { "epoch": 0.66, "grad_norm": 1.2786990404129028, "learning_rate": 2.7051235982561275e-06, "loss": 0.515, "step": 5169 }, { "epoch": 0.66, "grad_norm": 1.1976306438446045, "learning_rate": 2.703280239149302e-06, "loss": 0.5842, "step": 5170 }, { "epoch": 0.66, "grad_norm": 1.3613533973693848, "learning_rate": 2.701437275605773e-06, "loss": 0.6508, "step": 5171 }, { "epoch": 0.66, "grad_norm": 1.403113842010498, "learning_rate": 2.699594707942955e-06, "loss": 0.6158, "step": 5172 }, { "epoch": 0.66, "grad_norm": 1.390093445777893, "learning_rate": 2.6977525364781887e-06, "loss": 0.6485, "step": 5173 }, { "epoch": 0.66, "grad_norm": 1.1422367095947266, "learning_rate": 2.695910761528754e-06, "loss": 0.6894, "step": 5174 }, { "epoch": 0.66, "grad_norm": 1.3628041744232178, "learning_rate": 2.694069383411857e-06, "loss": 0.5549, "step": 5175 }, { "epoch": 0.66, "grad_norm": 1.215430498123169, "learning_rate": 2.692228402444642e-06, "loss": 0.5934, "step": 5176 }, { "epoch": 0.66, "grad_norm": 1.1754097938537598, "learning_rate": 2.6903878189441734e-06, "loss": 0.5572, "step": 5177 }, { "epoch": 0.66, "grad_norm": 1.1725589036941528, "learning_rate": 2.6885476332274598e-06, "loss": 0.608, "step": 5178 }, { "epoch": 0.66, "grad_norm": 1.1601642370224, "learning_rate": 2.6867078456114367e-06, "loss": 0.6104, "step": 5179 }, { "epoch": 0.66, "grad_norm": 1.180969476699829, "learning_rate": 2.6848684564129657e-06, "loss": 0.5651, "step": 5180 }, { "epoch": 0.66, "grad_norm": 1.457452654838562, "learning_rate": 2.683029465948846e-06, "loss": 0.6624, "step": 5181 }, { "epoch": 0.66, "grad_norm": 1.2275680303573608, "learning_rate": 2.6811908745358068e-06, "loss": 0.7376, "step": 5182 }, { "epoch": 0.66, "grad_norm": 1.2394523620605469, "learning_rate": 2.6793526824905102e-06, "loss": 0.6944, "step": 5183 }, { "epoch": 0.66, "grad_norm": 1.4745672941207886, "learning_rate": 2.677514890129543e-06, "loss": 0.5372, "step": 5184 }, { "epoch": 0.66, "grad_norm": 1.3252058029174805, "learning_rate": 2.6756774977694295e-06, "loss": 0.6018, "step": 5185 }, { "epoch": 0.66, "grad_norm": 1.290786623954773, "learning_rate": 2.6738405057266255e-06, "loss": 0.613, "step": 5186 }, { "epoch": 0.66, "grad_norm": 1.545142650604248, "learning_rate": 2.6720039143175116e-06, "loss": 0.6156, "step": 5187 }, { "epoch": 0.66, "grad_norm": 1.6830646991729736, "learning_rate": 2.6701677238584046e-06, "loss": 0.5804, "step": 5188 }, { "epoch": 0.66, "grad_norm": 1.2544286251068115, "learning_rate": 2.6683319346655523e-06, "loss": 0.5945, "step": 5189 }, { "epoch": 0.66, "grad_norm": 1.4033396244049072, "learning_rate": 2.666496547055133e-06, "loss": 0.5992, "step": 5190 }, { "epoch": 0.67, "grad_norm": 1.134125828742981, "learning_rate": 2.6646615613432507e-06, "loss": 0.6052, "step": 5191 }, { "epoch": 0.67, "grad_norm": 1.1849713325500488, "learning_rate": 2.6628269778459475e-06, "loss": 0.6269, "step": 5192 }, { "epoch": 0.67, "grad_norm": 1.2358061075210571, "learning_rate": 2.660992796879193e-06, "loss": 0.6058, "step": 5193 }, { "epoch": 0.67, "grad_norm": 1.204371452331543, "learning_rate": 2.659159018758886e-06, "loss": 0.5892, "step": 5194 }, { "epoch": 0.67, "grad_norm": 1.3742873668670654, "learning_rate": 2.6573256438008594e-06, "loss": 0.5877, "step": 5195 }, { "epoch": 0.67, "grad_norm": 1.4056979417800903, "learning_rate": 2.655492672320874e-06, "loss": 0.6435, "step": 5196 }, { "epoch": 0.67, "grad_norm": 1.357175350189209, "learning_rate": 2.653660104634624e-06, "loss": 0.6442, "step": 5197 }, { "epoch": 0.67, "grad_norm": 1.4769208431243896, "learning_rate": 2.6518279410577276e-06, "loss": 0.5422, "step": 5198 }, { "epoch": 0.67, "grad_norm": 1.2639411687850952, "learning_rate": 2.6499961819057396e-06, "loss": 0.6191, "step": 5199 }, { "epoch": 0.67, "grad_norm": 1.4961445331573486, "learning_rate": 2.648164827494144e-06, "loss": 0.6537, "step": 5200 }, { "epoch": 0.67, "grad_norm": 1.1992703676223755, "learning_rate": 2.6463338781383563e-06, "loss": 0.5623, "step": 5201 }, { "epoch": 0.67, "grad_norm": 1.429692029953003, "learning_rate": 2.6445033341537164e-06, "loss": 0.5856, "step": 5202 }, { "epoch": 0.67, "grad_norm": 1.4961892366409302, "learning_rate": 2.642673195855499e-06, "loss": 0.6466, "step": 5203 }, { "epoch": 0.67, "grad_norm": 1.2756520509719849, "learning_rate": 2.6408434635589096e-06, "loss": 0.5343, "step": 5204 }, { "epoch": 0.67, "grad_norm": 1.2954007387161255, "learning_rate": 2.6390141375790834e-06, "loss": 0.6814, "step": 5205 }, { "epoch": 0.67, "grad_norm": 1.1817203760147095, "learning_rate": 2.637185218231082e-06, "loss": 0.5147, "step": 5206 }, { "epoch": 0.67, "grad_norm": 1.356261134147644, "learning_rate": 2.6353567058299006e-06, "loss": 0.6249, "step": 5207 }, { "epoch": 0.67, "grad_norm": 1.4610151052474976, "learning_rate": 2.633528600690463e-06, "loss": 0.6509, "step": 5208 }, { "epoch": 0.67, "grad_norm": 1.2745957374572754, "learning_rate": 2.6317009031276264e-06, "loss": 0.7069, "step": 5209 }, { "epoch": 0.67, "grad_norm": 1.1911793947219849, "learning_rate": 2.6298736134561686e-06, "loss": 0.5957, "step": 5210 }, { "epoch": 0.67, "grad_norm": 1.2115494012832642, "learning_rate": 2.6280467319908052e-06, "loss": 0.4903, "step": 5211 }, { "epoch": 0.67, "grad_norm": 1.1515758037567139, "learning_rate": 2.6262202590461843e-06, "loss": 0.6614, "step": 5212 }, { "epoch": 0.67, "grad_norm": 1.148162841796875, "learning_rate": 2.6243941949368728e-06, "loss": 0.6236, "step": 5213 }, { "epoch": 0.67, "grad_norm": 1.7739685773849487, "learning_rate": 2.622568539977375e-06, "loss": 0.6557, "step": 5214 }, { "epoch": 0.67, "grad_norm": 1.455483078956604, "learning_rate": 2.620743294482123e-06, "loss": 0.6547, "step": 5215 }, { "epoch": 0.67, "grad_norm": 1.486600399017334, "learning_rate": 2.6189184587654813e-06, "loss": 0.6379, "step": 5216 }, { "epoch": 0.67, "grad_norm": 1.2012253999710083, "learning_rate": 2.617094033141735e-06, "loss": 0.546, "step": 5217 }, { "epoch": 0.67, "grad_norm": 1.324657917022705, "learning_rate": 2.615270017925107e-06, "loss": 0.6812, "step": 5218 }, { "epoch": 0.67, "grad_norm": 2.661184310913086, "learning_rate": 2.6134464134297476e-06, "loss": 0.6015, "step": 5219 }, { "epoch": 0.67, "grad_norm": 1.1693406105041504, "learning_rate": 2.611623219969737e-06, "loss": 0.5996, "step": 5220 }, { "epoch": 0.67, "grad_norm": 1.1396194696426392, "learning_rate": 2.609800437859078e-06, "loss": 0.6311, "step": 5221 }, { "epoch": 0.67, "grad_norm": 1.285706877708435, "learning_rate": 2.607978067411712e-06, "loss": 0.6305, "step": 5222 }, { "epoch": 0.67, "grad_norm": 1.161902666091919, "learning_rate": 2.606156108941504e-06, "loss": 0.6345, "step": 5223 }, { "epoch": 0.67, "grad_norm": 1.4399522542953491, "learning_rate": 2.6043345627622513e-06, "loss": 0.5897, "step": 5224 }, { "epoch": 0.67, "grad_norm": 1.349932312965393, "learning_rate": 2.6025134291876745e-06, "loss": 0.6166, "step": 5225 }, { "epoch": 0.67, "grad_norm": 1.8268938064575195, "learning_rate": 2.6006927085314295e-06, "loss": 0.6521, "step": 5226 }, { "epoch": 0.67, "grad_norm": 1.7306489944458008, "learning_rate": 2.5988724011070967e-06, "loss": 0.5793, "step": 5227 }, { "epoch": 0.67, "grad_norm": 1.346104621887207, "learning_rate": 2.597052507228189e-06, "loss": 0.596, "step": 5228 }, { "epoch": 0.67, "grad_norm": 1.2005935907363892, "learning_rate": 2.5952330272081446e-06, "loss": 0.6136, "step": 5229 }, { "epoch": 0.67, "grad_norm": 1.2340837717056274, "learning_rate": 2.5934139613603326e-06, "loss": 0.5841, "step": 5230 }, { "epoch": 0.67, "grad_norm": 1.3165960311889648, "learning_rate": 2.591595309998052e-06, "loss": 0.59, "step": 5231 }, { "epoch": 0.67, "grad_norm": 1.0761138200759888, "learning_rate": 2.5897770734345253e-06, "loss": 0.5112, "step": 5232 }, { "epoch": 0.67, "grad_norm": 1.1540740728378296, "learning_rate": 2.5879592519829065e-06, "loss": 0.5531, "step": 5233 }, { "epoch": 0.67, "grad_norm": 1.2632431983947754, "learning_rate": 2.586141845956284e-06, "loss": 0.5473, "step": 5234 }, { "epoch": 0.67, "grad_norm": 1.3133299350738525, "learning_rate": 2.5843248556676615e-06, "loss": 0.5852, "step": 5235 }, { "epoch": 0.67, "grad_norm": 1.290602207183838, "learning_rate": 2.582508281429983e-06, "loss": 0.5788, "step": 5236 }, { "epoch": 0.67, "grad_norm": 1.1755783557891846, "learning_rate": 2.5806921235561157e-06, "loss": 0.528, "step": 5237 }, { "epoch": 0.67, "grad_norm": 1.6222681999206543, "learning_rate": 2.578876382358857e-06, "loss": 0.6104, "step": 5238 }, { "epoch": 0.67, "grad_norm": 1.2842351198196411, "learning_rate": 2.5770610581509292e-06, "loss": 0.528, "step": 5239 }, { "epoch": 0.67, "grad_norm": 1.1351863145828247, "learning_rate": 2.5752461512449854e-06, "loss": 0.6943, "step": 5240 }, { "epoch": 0.67, "grad_norm": 2.076362133026123, "learning_rate": 2.5734316619536072e-06, "loss": 0.611, "step": 5241 }, { "epoch": 0.67, "grad_norm": 1.2188138961791992, "learning_rate": 2.571617590589306e-06, "loss": 0.6463, "step": 5242 }, { "epoch": 0.67, "grad_norm": 1.3224034309387207, "learning_rate": 2.5698039374645134e-06, "loss": 0.6016, "step": 5243 }, { "epoch": 0.67, "grad_norm": 1.2040905952453613, "learning_rate": 2.5679907028915972e-06, "loss": 0.5456, "step": 5244 }, { "epoch": 0.67, "grad_norm": 1.2921534776687622, "learning_rate": 2.5661778871828503e-06, "loss": 0.6099, "step": 5245 }, { "epoch": 0.67, "grad_norm": 1.2232110500335693, "learning_rate": 2.564365490650493e-06, "loss": 0.6297, "step": 5246 }, { "epoch": 0.67, "grad_norm": 1.3627408742904663, "learning_rate": 2.562553513606674e-06, "loss": 0.5766, "step": 5247 }, { "epoch": 0.67, "grad_norm": 1.229413390159607, "learning_rate": 2.5607419563634682e-06, "loss": 0.5605, "step": 5248 }, { "epoch": 0.67, "grad_norm": 1.2325100898742676, "learning_rate": 2.558930819232883e-06, "loss": 0.6456, "step": 5249 }, { "epoch": 0.67, "grad_norm": 1.1989376544952393, "learning_rate": 2.5571201025268468e-06, "loss": 0.641, "step": 5250 }, { "epoch": 0.67, "grad_norm": 1.2480047941207886, "learning_rate": 2.5553098065572186e-06, "loss": 0.5573, "step": 5251 }, { "epoch": 0.67, "grad_norm": 1.2484368085861206, "learning_rate": 2.553499931635786e-06, "loss": 0.6038, "step": 5252 }, { "epoch": 0.67, "grad_norm": 1.4819079637527466, "learning_rate": 2.5516904780742658e-06, "loss": 0.6092, "step": 5253 }, { "epoch": 0.67, "grad_norm": 1.2783153057098389, "learning_rate": 2.5498814461842942e-06, "loss": 0.6426, "step": 5254 }, { "epoch": 0.67, "grad_norm": 1.3205689191818237, "learning_rate": 2.548072836277443e-06, "loss": 0.6066, "step": 5255 }, { "epoch": 0.67, "grad_norm": 1.2751951217651367, "learning_rate": 2.5462646486652094e-06, "loss": 0.5752, "step": 5256 }, { "epoch": 0.67, "grad_norm": 1.27857506275177, "learning_rate": 2.5444568836590174e-06, "loss": 0.667, "step": 5257 }, { "epoch": 0.67, "grad_norm": 1.5317552089691162, "learning_rate": 2.5426495415702146e-06, "loss": 0.6109, "step": 5258 }, { "epoch": 0.67, "grad_norm": 1.290687918663025, "learning_rate": 2.54084262271008e-06, "loss": 0.6179, "step": 5259 }, { "epoch": 0.67, "grad_norm": 1.1564019918441772, "learning_rate": 2.5390361273898207e-06, "loss": 0.5787, "step": 5260 }, { "epoch": 0.67, "grad_norm": 1.4289751052856445, "learning_rate": 2.537230055920569e-06, "loss": 0.5601, "step": 5261 }, { "epoch": 0.67, "grad_norm": 1.3122379779815674, "learning_rate": 2.5354244086133784e-06, "loss": 0.6218, "step": 5262 }, { "epoch": 0.67, "grad_norm": 1.3078715801239014, "learning_rate": 2.533619185779241e-06, "loss": 0.5187, "step": 5263 }, { "epoch": 0.67, "grad_norm": 1.357385277748108, "learning_rate": 2.531814387729069e-06, "loss": 0.648, "step": 5264 }, { "epoch": 0.67, "grad_norm": 1.2903344631195068, "learning_rate": 2.5300100147737007e-06, "loss": 0.5953, "step": 5265 }, { "epoch": 0.67, "grad_norm": 1.2929610013961792, "learning_rate": 2.5282060672239016e-06, "loss": 0.519, "step": 5266 }, { "epoch": 0.67, "grad_norm": 1.800912618637085, "learning_rate": 2.526402545390367e-06, "loss": 0.6361, "step": 5267 }, { "epoch": 0.67, "grad_norm": 2.328589916229248, "learning_rate": 2.524599449583718e-06, "loss": 0.6149, "step": 5268 }, { "epoch": 0.68, "grad_norm": 1.3592402935028076, "learning_rate": 2.5227967801144972e-06, "loss": 0.5711, "step": 5269 }, { "epoch": 0.68, "grad_norm": 1.2721518278121948, "learning_rate": 2.5209945372931798e-06, "loss": 0.5685, "step": 5270 }, { "epoch": 0.68, "grad_norm": 1.2351329326629639, "learning_rate": 2.519192721430166e-06, "loss": 0.5586, "step": 5271 }, { "epoch": 0.68, "grad_norm": 2.6479928493499756, "learning_rate": 2.5173913328357836e-06, "loss": 0.5282, "step": 5272 }, { "epoch": 0.68, "grad_norm": 1.0487334728240967, "learning_rate": 2.515590371820281e-06, "loss": 0.6852, "step": 5273 }, { "epoch": 0.68, "grad_norm": 1.3234119415283203, "learning_rate": 2.5137898386938396e-06, "loss": 0.5542, "step": 5274 }, { "epoch": 0.68, "grad_norm": 1.2834035158157349, "learning_rate": 2.511989733766565e-06, "loss": 0.5604, "step": 5275 }, { "epoch": 0.68, "grad_norm": 1.3077657222747803, "learning_rate": 2.510190057348489e-06, "loss": 0.656, "step": 5276 }, { "epoch": 0.68, "grad_norm": 1.3545429706573486, "learning_rate": 2.508390809749567e-06, "loss": 0.5638, "step": 5277 }, { "epoch": 0.68, "grad_norm": 1.1976977586746216, "learning_rate": 2.5065919912796845e-06, "loss": 0.5891, "step": 5278 }, { "epoch": 0.68, "grad_norm": 1.1058452129364014, "learning_rate": 2.5047936022486503e-06, "loss": 0.5514, "step": 5279 }, { "epoch": 0.68, "grad_norm": 1.2233741283416748, "learning_rate": 2.5029956429662017e-06, "loss": 0.5231, "step": 5280 }, { "epoch": 0.68, "grad_norm": 1.187469244003296, "learning_rate": 2.5011981137419993e-06, "loss": 0.6487, "step": 5281 }, { "epoch": 0.68, "grad_norm": 1.358062744140625, "learning_rate": 2.4994010148856325e-06, "loss": 0.6192, "step": 5282 }, { "epoch": 0.68, "grad_norm": 1.3537427186965942, "learning_rate": 2.4976043467066164e-06, "loss": 0.5947, "step": 5283 }, { "epoch": 0.68, "grad_norm": 1.1666181087493896, "learning_rate": 2.495808109514386e-06, "loss": 0.5528, "step": 5284 }, { "epoch": 0.68, "grad_norm": 1.244334101676941, "learning_rate": 2.4940123036183095e-06, "loss": 0.5853, "step": 5285 }, { "epoch": 0.68, "grad_norm": 1.467995524406433, "learning_rate": 2.4922169293276793e-06, "loss": 0.5277, "step": 5286 }, { "epoch": 0.68, "grad_norm": 1.112685203552246, "learning_rate": 2.4904219869517083e-06, "loss": 0.6663, "step": 5287 }, { "epoch": 0.68, "grad_norm": 1.6050626039505005, "learning_rate": 2.488627476799542e-06, "loss": 0.583, "step": 5288 }, { "epoch": 0.68, "grad_norm": 1.614817500114441, "learning_rate": 2.486833399180246e-06, "loss": 0.5568, "step": 5289 }, { "epoch": 0.68, "grad_norm": 1.259895920753479, "learning_rate": 2.485039754402817e-06, "loss": 0.6091, "step": 5290 }, { "epoch": 0.68, "grad_norm": 1.1175788640975952, "learning_rate": 2.4832465427761704e-06, "loss": 0.6088, "step": 5291 }, { "epoch": 0.68, "grad_norm": 1.3290629386901855, "learning_rate": 2.481453764609152e-06, "loss": 0.6004, "step": 5292 }, { "epoch": 0.68, "grad_norm": 1.1337230205535889, "learning_rate": 2.4796614202105313e-06, "loss": 0.4849, "step": 5293 }, { "epoch": 0.68, "grad_norm": 1.3604735136032104, "learning_rate": 2.477869509889005e-06, "loss": 0.5763, "step": 5294 }, { "epoch": 0.68, "grad_norm": 1.551064372062683, "learning_rate": 2.476078033953189e-06, "loss": 0.5731, "step": 5295 }, { "epoch": 0.68, "grad_norm": 1.4335790872573853, "learning_rate": 2.4742869927116297e-06, "loss": 0.5623, "step": 5296 }, { "epoch": 0.68, "grad_norm": 1.4575905799865723, "learning_rate": 2.4724963864728026e-06, "loss": 0.6491, "step": 5297 }, { "epoch": 0.68, "grad_norm": 1.3047614097595215, "learning_rate": 2.470706215545097e-06, "loss": 0.5279, "step": 5298 }, { "epoch": 0.68, "grad_norm": 1.3483816385269165, "learning_rate": 2.4689164802368366e-06, "loss": 0.636, "step": 5299 }, { "epoch": 0.68, "grad_norm": 1.285593032836914, "learning_rate": 2.4671271808562664e-06, "loss": 0.5903, "step": 5300 }, { "epoch": 0.68, "grad_norm": 1.0162348747253418, "learning_rate": 2.465338317711558e-06, "loss": 0.5889, "step": 5301 }, { "epoch": 0.68, "grad_norm": 1.4397096633911133, "learning_rate": 2.4635498911108042e-06, "loss": 0.5798, "step": 5302 }, { "epoch": 0.68, "grad_norm": 1.4754639863967896, "learning_rate": 2.4617619013620257e-06, "loss": 0.6134, "step": 5303 }, { "epoch": 0.68, "grad_norm": 1.1740292310714722, "learning_rate": 2.4599743487731686e-06, "loss": 0.5692, "step": 5304 }, { "epoch": 0.68, "grad_norm": 1.4390628337860107, "learning_rate": 2.458187233652104e-06, "loss": 0.6033, "step": 5305 }, { "epoch": 0.68, "grad_norm": 1.326101541519165, "learning_rate": 2.456400556306622e-06, "loss": 0.5816, "step": 5306 }, { "epoch": 0.68, "grad_norm": 1.6704646348953247, "learning_rate": 2.454614317044443e-06, "loss": 0.5938, "step": 5307 }, { "epoch": 0.68, "grad_norm": 1.2074356079101562, "learning_rate": 2.452828516173212e-06, "loss": 0.5318, "step": 5308 }, { "epoch": 0.68, "grad_norm": 1.418071985244751, "learning_rate": 2.451043154000497e-06, "loss": 0.6688, "step": 5309 }, { "epoch": 0.68, "grad_norm": 1.3660101890563965, "learning_rate": 2.449258230833788e-06, "loss": 0.5901, "step": 5310 }, { "epoch": 0.68, "grad_norm": 1.3401819467544556, "learning_rate": 2.4474737469805026e-06, "loss": 0.6244, "step": 5311 }, { "epoch": 0.68, "grad_norm": 1.1151803731918335, "learning_rate": 2.445689702747982e-06, "loss": 0.6078, "step": 5312 }, { "epoch": 0.68, "grad_norm": 1.3116505146026611, "learning_rate": 2.443906098443494e-06, "loss": 0.5469, "step": 5313 }, { "epoch": 0.68, "grad_norm": 1.2464821338653564, "learning_rate": 2.4421229343742224e-06, "loss": 0.573, "step": 5314 }, { "epoch": 0.68, "grad_norm": 1.6851762533187866, "learning_rate": 2.4403402108472863e-06, "loss": 0.6493, "step": 5315 }, { "epoch": 0.68, "grad_norm": 1.3235703706741333, "learning_rate": 2.4385579281697235e-06, "loss": 0.5459, "step": 5316 }, { "epoch": 0.68, "grad_norm": 1.267038106918335, "learning_rate": 2.436776086648493e-06, "loss": 0.6437, "step": 5317 }, { "epoch": 0.68, "grad_norm": 1.639235258102417, "learning_rate": 2.4349946865904815e-06, "loss": 0.5845, "step": 5318 }, { "epoch": 0.68, "grad_norm": 1.2454874515533447, "learning_rate": 2.4332137283024997e-06, "loss": 0.537, "step": 5319 }, { "epoch": 0.68, "grad_norm": 1.361704707145691, "learning_rate": 2.431433212091284e-06, "loss": 0.5855, "step": 5320 }, { "epoch": 0.68, "grad_norm": 1.4523319005966187, "learning_rate": 2.4296531382634864e-06, "loss": 0.626, "step": 5321 }, { "epoch": 0.68, "grad_norm": 1.2574588060379028, "learning_rate": 2.427873507125692e-06, "loss": 0.6157, "step": 5322 }, { "epoch": 0.68, "grad_norm": 2.1008527278900146, "learning_rate": 2.4260943189844055e-06, "loss": 0.593, "step": 5323 }, { "epoch": 0.68, "grad_norm": 1.3145480155944824, "learning_rate": 2.4243155741460583e-06, "loss": 0.5892, "step": 5324 }, { "epoch": 0.68, "grad_norm": 1.1647745370864868, "learning_rate": 2.422537272916998e-06, "loss": 0.5774, "step": 5325 }, { "epoch": 0.68, "grad_norm": 1.1349399089813232, "learning_rate": 2.4207594156035042e-06, "loss": 0.5195, "step": 5326 }, { "epoch": 0.68, "grad_norm": 1.4423859119415283, "learning_rate": 2.4189820025117762e-06, "loss": 0.6294, "step": 5327 }, { "epoch": 0.68, "grad_norm": 1.2302815914154053, "learning_rate": 2.417205033947938e-06, "loss": 0.5202, "step": 5328 }, { "epoch": 0.68, "grad_norm": 1.4219542741775513, "learning_rate": 2.415428510218035e-06, "loss": 0.6441, "step": 5329 }, { "epoch": 0.68, "grad_norm": 1.2709050178527832, "learning_rate": 2.413652431628036e-06, "loss": 0.6086, "step": 5330 }, { "epoch": 0.68, "grad_norm": 1.3104877471923828, "learning_rate": 2.4118767984838376e-06, "loss": 0.5737, "step": 5331 }, { "epoch": 0.68, "grad_norm": 1.25847327709198, "learning_rate": 2.4101016110912547e-06, "loss": 0.5736, "step": 5332 }, { "epoch": 0.68, "grad_norm": 1.170403003692627, "learning_rate": 2.4083268697560276e-06, "loss": 0.5607, "step": 5333 }, { "epoch": 0.68, "grad_norm": 1.3111367225646973, "learning_rate": 2.406552574783821e-06, "loss": 0.5241, "step": 5334 }, { "epoch": 0.68, "grad_norm": 1.4980742931365967, "learning_rate": 2.404778726480218e-06, "loss": 0.6526, "step": 5335 }, { "epoch": 0.68, "grad_norm": 1.3784537315368652, "learning_rate": 2.4030053251507298e-06, "loss": 0.5576, "step": 5336 }, { "epoch": 0.68, "grad_norm": 1.2792404890060425, "learning_rate": 2.401232371100788e-06, "loss": 0.6557, "step": 5337 }, { "epoch": 0.68, "grad_norm": 1.282593846321106, "learning_rate": 2.3994598646357505e-06, "loss": 0.5845, "step": 5338 }, { "epoch": 0.68, "grad_norm": 1.3621476888656616, "learning_rate": 2.397687806060891e-06, "loss": 0.5418, "step": 5339 }, { "epoch": 0.68, "grad_norm": 1.4283099174499512, "learning_rate": 2.3959161956814136e-06, "loss": 0.588, "step": 5340 }, { "epoch": 0.68, "grad_norm": 1.1404139995574951, "learning_rate": 2.394145033802441e-06, "loss": 0.577, "step": 5341 }, { "epoch": 0.68, "grad_norm": 1.461607813835144, "learning_rate": 2.392374320729023e-06, "loss": 0.6742, "step": 5342 }, { "epoch": 0.68, "grad_norm": 1.1831786632537842, "learning_rate": 2.390604056766124e-06, "loss": 0.6059, "step": 5343 }, { "epoch": 0.68, "grad_norm": 1.6188664436340332, "learning_rate": 2.3888342422186376e-06, "loss": 0.5266, "step": 5344 }, { "epoch": 0.68, "grad_norm": 1.6657450199127197, "learning_rate": 2.3870648773913796e-06, "loss": 0.6137, "step": 5345 }, { "epoch": 0.68, "grad_norm": 1.1973358392715454, "learning_rate": 2.3852959625890888e-06, "loss": 0.6311, "step": 5346 }, { "epoch": 0.69, "grad_norm": 1.5780502557754517, "learning_rate": 2.38352749811642e-06, "loss": 0.6452, "step": 5347 }, { "epoch": 0.69, "grad_norm": 1.6269686222076416, "learning_rate": 2.3817594842779566e-06, "loss": 0.5092, "step": 5348 }, { "epoch": 0.69, "grad_norm": 1.3493552207946777, "learning_rate": 2.379991921378208e-06, "loss": 0.6042, "step": 5349 }, { "epoch": 0.69, "grad_norm": 1.241943359375, "learning_rate": 2.378224809721595e-06, "loss": 0.5149, "step": 5350 }, { "epoch": 0.69, "grad_norm": 1.2631124258041382, "learning_rate": 2.3764581496124693e-06, "loss": 0.5638, "step": 5351 }, { "epoch": 0.69, "grad_norm": 1.1910855770111084, "learning_rate": 2.374691941355102e-06, "loss": 0.5865, "step": 5352 }, { "epoch": 0.69, "grad_norm": 4.200222015380859, "learning_rate": 2.372926185253688e-06, "loss": 0.5512, "step": 5353 }, { "epoch": 0.69, "grad_norm": 1.0969185829162598, "learning_rate": 2.3711608816123393e-06, "loss": 0.5978, "step": 5354 }, { "epoch": 0.69, "grad_norm": 1.5703184604644775, "learning_rate": 2.3693960307350953e-06, "loss": 0.5803, "step": 5355 }, { "epoch": 0.69, "grad_norm": 1.877293586730957, "learning_rate": 2.367631632925917e-06, "loss": 0.5711, "step": 5356 }, { "epoch": 0.69, "grad_norm": 1.0474374294281006, "learning_rate": 2.365867688488686e-06, "loss": 0.5735, "step": 5357 }, { "epoch": 0.69, "grad_norm": 1.2668507099151611, "learning_rate": 2.364104197727204e-06, "loss": 0.5971, "step": 5358 }, { "epoch": 0.69, "grad_norm": 1.335062861442566, "learning_rate": 2.362341160945197e-06, "loss": 0.5533, "step": 5359 }, { "epoch": 0.69, "grad_norm": 1.19700026512146, "learning_rate": 2.360578578446312e-06, "loss": 0.5404, "step": 5360 }, { "epoch": 0.69, "grad_norm": 1.3640122413635254, "learning_rate": 2.358816450534121e-06, "loss": 0.636, "step": 5361 }, { "epoch": 0.69, "grad_norm": 1.0688934326171875, "learning_rate": 2.3570547775121106e-06, "loss": 0.6033, "step": 5362 }, { "epoch": 0.69, "grad_norm": 1.1979340314865112, "learning_rate": 2.3552935596836947e-06, "loss": 0.554, "step": 5363 }, { "epoch": 0.69, "grad_norm": 1.4938658475875854, "learning_rate": 2.353532797352207e-06, "loss": 0.5977, "step": 5364 }, { "epoch": 0.69, "grad_norm": 1.255603551864624, "learning_rate": 2.3517724908209037e-06, "loss": 0.5904, "step": 5365 }, { "epoch": 0.69, "grad_norm": 1.557724118232727, "learning_rate": 2.3500126403929624e-06, "loss": 0.6582, "step": 5366 }, { "epoch": 0.69, "grad_norm": 1.2635000944137573, "learning_rate": 2.3482532463714803e-06, "loss": 0.5664, "step": 5367 }, { "epoch": 0.69, "grad_norm": 1.5641952753067017, "learning_rate": 2.3464943090594794e-06, "loss": 0.6332, "step": 5368 }, { "epoch": 0.69, "grad_norm": 1.1680552959442139, "learning_rate": 2.3447358287598975e-06, "loss": 0.625, "step": 5369 }, { "epoch": 0.69, "grad_norm": 1.9742357730865479, "learning_rate": 2.3429778057755994e-06, "loss": 0.5614, "step": 5370 }, { "epoch": 0.69, "grad_norm": 1.2470797300338745, "learning_rate": 2.3412202404093675e-06, "loss": 0.5732, "step": 5371 }, { "epoch": 0.69, "grad_norm": 1.2727478742599487, "learning_rate": 2.3394631329639094e-06, "loss": 0.5777, "step": 5372 }, { "epoch": 0.69, "grad_norm": 1.1417211294174194, "learning_rate": 2.337706483741848e-06, "loss": 0.6026, "step": 5373 }, { "epoch": 0.69, "grad_norm": 1.1219482421875, "learning_rate": 2.3359502930457306e-06, "loss": 0.5525, "step": 5374 }, { "epoch": 0.69, "grad_norm": 1.3681706190109253, "learning_rate": 2.334194561178027e-06, "loss": 0.5578, "step": 5375 }, { "epoch": 0.69, "grad_norm": 1.2046383619308472, "learning_rate": 2.3324392884411277e-06, "loss": 0.5576, "step": 5376 }, { "epoch": 0.69, "grad_norm": 3.306382894515991, "learning_rate": 2.3306844751373384e-06, "loss": 0.5266, "step": 5377 }, { "epoch": 0.69, "grad_norm": 1.108659267425537, "learning_rate": 2.328930121568893e-06, "loss": 0.6962, "step": 5378 }, { "epoch": 0.69, "grad_norm": 1.2322014570236206, "learning_rate": 2.3271762280379446e-06, "loss": 0.5757, "step": 5379 }, { "epoch": 0.69, "grad_norm": 1.400467038154602, "learning_rate": 2.3254227948465613e-06, "loss": 0.5913, "step": 5380 }, { "epoch": 0.69, "grad_norm": 1.3882756233215332, "learning_rate": 2.3236698222967392e-06, "loss": 0.5503, "step": 5381 }, { "epoch": 0.69, "grad_norm": 1.5921300649642944, "learning_rate": 2.321917310690392e-06, "loss": 0.5449, "step": 5382 }, { "epoch": 0.69, "grad_norm": 1.7357591390609741, "learning_rate": 2.3201652603293535e-06, "loss": 0.5941, "step": 5383 }, { "epoch": 0.69, "grad_norm": 1.2445812225341797, "learning_rate": 2.318413671515379e-06, "loss": 0.5878, "step": 5384 }, { "epoch": 0.69, "grad_norm": 1.2913782596588135, "learning_rate": 2.3166625445501444e-06, "loss": 0.6032, "step": 5385 }, { "epoch": 0.69, "grad_norm": 1.1985303163528442, "learning_rate": 2.3149118797352484e-06, "loss": 0.5549, "step": 5386 }, { "epoch": 0.69, "grad_norm": 2.0271339416503906, "learning_rate": 2.3131616773722015e-06, "loss": 0.5392, "step": 5387 }, { "epoch": 0.69, "grad_norm": 1.3953230381011963, "learning_rate": 2.3114119377624444e-06, "loss": 0.6916, "step": 5388 }, { "epoch": 0.69, "grad_norm": 1.073756456375122, "learning_rate": 2.309662661207334e-06, "loss": 0.6164, "step": 5389 }, { "epoch": 0.69, "grad_norm": 1.0587557554244995, "learning_rate": 2.3079138480081474e-06, "loss": 0.5769, "step": 5390 }, { "epoch": 0.69, "grad_norm": 1.3357759714126587, "learning_rate": 2.3061654984660808e-06, "loss": 0.6175, "step": 5391 }, { "epoch": 0.69, "grad_norm": 1.4631446599960327, "learning_rate": 2.3044176128822533e-06, "loss": 0.5939, "step": 5392 }, { "epoch": 0.69, "grad_norm": 1.3898251056671143, "learning_rate": 2.3026701915577017e-06, "loss": 0.5235, "step": 5393 }, { "epoch": 0.69, "grad_norm": 1.3785183429718018, "learning_rate": 2.3009232347933858e-06, "loss": 0.647, "step": 5394 }, { "epoch": 0.69, "grad_norm": 1.1423410177230835, "learning_rate": 2.299176742890181e-06, "loss": 0.5851, "step": 5395 }, { "epoch": 0.69, "grad_norm": 1.523036003112793, "learning_rate": 2.297430716148885e-06, "loss": 0.5871, "step": 5396 }, { "epoch": 0.69, "grad_norm": 1.1296528577804565, "learning_rate": 2.295685154870217e-06, "loss": 0.5476, "step": 5397 }, { "epoch": 0.69, "grad_norm": 1.4960590600967407, "learning_rate": 2.2939400593548157e-06, "loss": 0.6542, "step": 5398 }, { "epoch": 0.69, "grad_norm": 1.2516299486160278, "learning_rate": 2.2921954299032324e-06, "loss": 0.5171, "step": 5399 }, { "epoch": 0.69, "grad_norm": 1.3827989101409912, "learning_rate": 2.29045126681595e-06, "loss": 0.6073, "step": 5400 }, { "epoch": 0.69, "grad_norm": 1.7401831150054932, "learning_rate": 2.2887075703933654e-06, "loss": 0.5981, "step": 5401 }, { "epoch": 0.69, "grad_norm": 1.0633769035339355, "learning_rate": 2.2869643409357907e-06, "loss": 0.7061, "step": 5402 }, { "epoch": 0.69, "grad_norm": 1.2310799360275269, "learning_rate": 2.2852215787434636e-06, "loss": 0.5535, "step": 5403 }, { "epoch": 0.69, "grad_norm": 1.2269682884216309, "learning_rate": 2.28347928411654e-06, "loss": 0.606, "step": 5404 }, { "epoch": 0.69, "grad_norm": 1.3008508682250977, "learning_rate": 2.2817374573550956e-06, "loss": 0.5632, "step": 5405 }, { "epoch": 0.69, "grad_norm": 1.3585628271102905, "learning_rate": 2.2799960987591217e-06, "loss": 0.604, "step": 5406 }, { "epoch": 0.69, "grad_norm": 1.6831282377243042, "learning_rate": 2.278255208628534e-06, "loss": 0.6772, "step": 5407 }, { "epoch": 0.69, "grad_norm": 1.3406838178634644, "learning_rate": 2.2765147872631656e-06, "loss": 0.6374, "step": 5408 }, { "epoch": 0.69, "grad_norm": 1.5567865371704102, "learning_rate": 2.274774834962769e-06, "loss": 0.6538, "step": 5409 }, { "epoch": 0.69, "grad_norm": 1.564103364944458, "learning_rate": 2.273035352027013e-06, "loss": 0.6838, "step": 5410 }, { "epoch": 0.69, "grad_norm": 1.5919641256332397, "learning_rate": 2.271296338755491e-06, "loss": 0.6681, "step": 5411 }, { "epoch": 0.69, "grad_norm": 1.4023327827453613, "learning_rate": 2.269557795447711e-06, "loss": 0.5633, "step": 5412 }, { "epoch": 0.69, "grad_norm": 1.1743390560150146, "learning_rate": 2.267819722403104e-06, "loss": 0.519, "step": 5413 }, { "epoch": 0.69, "grad_norm": 1.3634856939315796, "learning_rate": 2.2660821199210147e-06, "loss": 0.6134, "step": 5414 }, { "epoch": 0.69, "grad_norm": 1.3316655158996582, "learning_rate": 2.2643449883007113e-06, "loss": 0.6312, "step": 5415 }, { "epoch": 0.69, "grad_norm": 1.128868818283081, "learning_rate": 2.26260832784138e-06, "loss": 0.45, "step": 5416 }, { "epoch": 0.69, "grad_norm": 1.427899718284607, "learning_rate": 2.2608721388421233e-06, "loss": 0.6127, "step": 5417 }, { "epoch": 0.69, "grad_norm": 16.15768051147461, "learning_rate": 2.259136421601967e-06, "loss": 0.6433, "step": 5418 }, { "epoch": 0.69, "grad_norm": 1.452759027481079, "learning_rate": 2.257401176419851e-06, "loss": 0.5636, "step": 5419 }, { "epoch": 0.69, "grad_norm": 1.5467267036437988, "learning_rate": 2.2556664035946396e-06, "loss": 0.6037, "step": 5420 }, { "epoch": 0.69, "grad_norm": 1.701102375984192, "learning_rate": 2.253932103425107e-06, "loss": 0.5748, "step": 5421 }, { "epoch": 0.69, "grad_norm": 1.346367359161377, "learning_rate": 2.2521982762099544e-06, "loss": 0.6572, "step": 5422 }, { "epoch": 0.69, "grad_norm": 1.1707795858383179, "learning_rate": 2.2504649222477977e-06, "loss": 0.6369, "step": 5423 }, { "epoch": 0.69, "grad_norm": 1.1806703805923462, "learning_rate": 2.248732041837173e-06, "loss": 0.616, "step": 5424 }, { "epoch": 0.7, "grad_norm": 1.6231908798217773, "learning_rate": 2.2469996352765307e-06, "loss": 0.5244, "step": 5425 }, { "epoch": 0.7, "grad_norm": 1.244320034980774, "learning_rate": 2.2452677028642445e-06, "loss": 0.6273, "step": 5426 }, { "epoch": 0.7, "grad_norm": 5.8792548179626465, "learning_rate": 2.2435362448986058e-06, "loss": 0.6268, "step": 5427 }, { "epoch": 0.7, "grad_norm": 1.297541618347168, "learning_rate": 2.241805261677821e-06, "loss": 0.5361, "step": 5428 }, { "epoch": 0.7, "grad_norm": 1.4641773700714111, "learning_rate": 2.240074753500017e-06, "loss": 0.5059, "step": 5429 }, { "epoch": 0.7, "grad_norm": 1.5284756422042847, "learning_rate": 2.2383447206632385e-06, "loss": 0.5743, "step": 5430 }, { "epoch": 0.7, "grad_norm": 1.548749327659607, "learning_rate": 2.2366151634654513e-06, "loss": 0.6469, "step": 5431 }, { "epoch": 0.7, "grad_norm": 1.4602359533309937, "learning_rate": 2.2348860822045327e-06, "loss": 0.5945, "step": 5432 }, { "epoch": 0.7, "grad_norm": 1.2962963581085205, "learning_rate": 2.233157477178281e-06, "loss": 0.63, "step": 5433 }, { "epoch": 0.7, "grad_norm": 1.1280606985092163, "learning_rate": 2.23142934868442e-06, "loss": 0.5783, "step": 5434 }, { "epoch": 0.7, "grad_norm": 1.4649392366409302, "learning_rate": 2.229701697020578e-06, "loss": 0.6784, "step": 5435 }, { "epoch": 0.7, "grad_norm": 1.415880799293518, "learning_rate": 2.2279745224843108e-06, "loss": 0.6329, "step": 5436 }, { "epoch": 0.7, "grad_norm": 1.3583704233169556, "learning_rate": 2.2262478253730885e-06, "loss": 0.5051, "step": 5437 }, { "epoch": 0.7, "grad_norm": 1.3856008052825928, "learning_rate": 2.224521605984302e-06, "loss": 0.6311, "step": 5438 }, { "epoch": 0.7, "grad_norm": 1.266484022140503, "learning_rate": 2.2227958646152525e-06, "loss": 0.6008, "step": 5439 }, { "epoch": 0.7, "grad_norm": 1.2216291427612305, "learning_rate": 2.2210706015631666e-06, "loss": 0.5565, "step": 5440 }, { "epoch": 0.7, "grad_norm": 1.1825257539749146, "learning_rate": 2.2193458171251857e-06, "loss": 0.6352, "step": 5441 }, { "epoch": 0.7, "grad_norm": 0.9359205365180969, "learning_rate": 2.217621511598371e-06, "loss": 0.5556, "step": 5442 }, { "epoch": 0.7, "grad_norm": 1.3200385570526123, "learning_rate": 2.2158976852796956e-06, "loss": 0.5984, "step": 5443 }, { "epoch": 0.7, "grad_norm": 1.6388661861419678, "learning_rate": 2.2141743384660542e-06, "loss": 0.6564, "step": 5444 }, { "epoch": 0.7, "grad_norm": 1.454256296157837, "learning_rate": 2.212451471454259e-06, "loss": 0.5588, "step": 5445 }, { "epoch": 0.7, "grad_norm": 1.3445405960083008, "learning_rate": 2.2107290845410405e-06, "loss": 0.5404, "step": 5446 }, { "epoch": 0.7, "grad_norm": 1.1842902898788452, "learning_rate": 2.209007178023042e-06, "loss": 0.6045, "step": 5447 }, { "epoch": 0.7, "grad_norm": 1.4644899368286133, "learning_rate": 2.2072857521968276e-06, "loss": 0.5565, "step": 5448 }, { "epoch": 0.7, "grad_norm": 1.2165268659591675, "learning_rate": 2.205564807358878e-06, "loss": 0.6259, "step": 5449 }, { "epoch": 0.7, "grad_norm": 1.3028157949447632, "learning_rate": 2.203844343805594e-06, "loss": 0.5122, "step": 5450 }, { "epoch": 0.7, "grad_norm": 1.135292410850525, "learning_rate": 2.2021243618332833e-06, "loss": 0.54, "step": 5451 }, { "epoch": 0.7, "grad_norm": 1.196251630783081, "learning_rate": 2.2004048617381844e-06, "loss": 0.4666, "step": 5452 }, { "epoch": 0.7, "grad_norm": 1.4005775451660156, "learning_rate": 2.1986858438164464e-06, "loss": 0.5846, "step": 5453 }, { "epoch": 0.7, "grad_norm": 1.7641587257385254, "learning_rate": 2.196967308364131e-06, "loss": 0.578, "step": 5454 }, { "epoch": 0.7, "grad_norm": 1.2260916233062744, "learning_rate": 2.1952492556772226e-06, "loss": 0.5843, "step": 5455 }, { "epoch": 0.7, "grad_norm": 1.4907604455947876, "learning_rate": 2.1935316860516214e-06, "loss": 0.6536, "step": 5456 }, { "epoch": 0.7, "grad_norm": 1.4494532346725464, "learning_rate": 2.191814599783146e-06, "loss": 0.5521, "step": 5457 }, { "epoch": 0.7, "grad_norm": 1.3400883674621582, "learning_rate": 2.190097997167525e-06, "loss": 0.576, "step": 5458 }, { "epoch": 0.7, "grad_norm": 1.1813629865646362, "learning_rate": 2.1883818785004108e-06, "loss": 0.7461, "step": 5459 }, { "epoch": 0.7, "grad_norm": 1.175892949104309, "learning_rate": 2.1866662440773694e-06, "loss": 0.6967, "step": 5460 }, { "epoch": 0.7, "grad_norm": 1.5057560205459595, "learning_rate": 2.1849510941938864e-06, "loss": 0.637, "step": 5461 }, { "epoch": 0.7, "grad_norm": 1.2532135248184204, "learning_rate": 2.1832364291453572e-06, "loss": 0.6955, "step": 5462 }, { "epoch": 0.7, "grad_norm": 1.2681691646575928, "learning_rate": 2.181522249227101e-06, "loss": 0.6327, "step": 5463 }, { "epoch": 0.7, "grad_norm": 1.4270204305648804, "learning_rate": 2.179808554734349e-06, "loss": 0.5954, "step": 5464 }, { "epoch": 0.7, "grad_norm": 1.4886845350265503, "learning_rate": 2.1780953459622526e-06, "loss": 0.5569, "step": 5465 }, { "epoch": 0.7, "grad_norm": 1.3294905424118042, "learning_rate": 2.1763826232058745e-06, "loss": 0.6214, "step": 5466 }, { "epoch": 0.7, "grad_norm": 1.3741010427474976, "learning_rate": 2.174670386760197e-06, "loss": 0.6151, "step": 5467 }, { "epoch": 0.7, "grad_norm": 2.0721983909606934, "learning_rate": 2.1729586369201184e-06, "loss": 0.7426, "step": 5468 }, { "epoch": 0.7, "grad_norm": 1.39492666721344, "learning_rate": 2.1712473739804524e-06, "loss": 0.6307, "step": 5469 }, { "epoch": 0.7, "grad_norm": 1.437631368637085, "learning_rate": 2.16953659823593e-06, "loss": 0.5988, "step": 5470 }, { "epoch": 0.7, "grad_norm": 1.322919487953186, "learning_rate": 2.1678263099811973e-06, "loss": 0.6527, "step": 5471 }, { "epoch": 0.7, "grad_norm": 1.1208932399749756, "learning_rate": 2.166116509510819e-06, "loss": 0.5896, "step": 5472 }, { "epoch": 0.7, "grad_norm": 1.3424168825149536, "learning_rate": 2.1644071971192687e-06, "loss": 0.5752, "step": 5473 }, { "epoch": 0.7, "grad_norm": 1.3916758298873901, "learning_rate": 2.162698373100943e-06, "loss": 0.6444, "step": 5474 }, { "epoch": 0.7, "grad_norm": 1.0904909372329712, "learning_rate": 2.1609900377501526e-06, "loss": 0.6417, "step": 5475 }, { "epoch": 0.7, "grad_norm": 1.8144199848175049, "learning_rate": 2.1592821913611246e-06, "loss": 0.5684, "step": 5476 }, { "epoch": 0.7, "grad_norm": 1.6254833936691284, "learning_rate": 2.157574834227998e-06, "loss": 0.6259, "step": 5477 }, { "epoch": 0.7, "grad_norm": 1.1566519737243652, "learning_rate": 2.1558679666448314e-06, "loss": 0.5346, "step": 5478 }, { "epoch": 0.7, "grad_norm": 1.5531538724899292, "learning_rate": 2.1541615889056e-06, "loss": 0.6, "step": 5479 }, { "epoch": 0.7, "grad_norm": 1.1958260536193848, "learning_rate": 2.1524557013041897e-06, "loss": 0.6024, "step": 5480 }, { "epoch": 0.7, "grad_norm": 1.4889098405838013, "learning_rate": 2.1507503041344063e-06, "loss": 0.6399, "step": 5481 }, { "epoch": 0.7, "grad_norm": 1.2659668922424316, "learning_rate": 2.14904539768997e-06, "loss": 0.5371, "step": 5482 }, { "epoch": 0.7, "grad_norm": 1.2588521242141724, "learning_rate": 2.1473409822645174e-06, "loss": 0.6951, "step": 5483 }, { "epoch": 0.7, "grad_norm": 1.2568881511688232, "learning_rate": 2.1456370581515967e-06, "loss": 0.5739, "step": 5484 }, { "epoch": 0.7, "grad_norm": 1.5543744564056396, "learning_rate": 2.143933625644674e-06, "loss": 0.5692, "step": 5485 }, { "epoch": 0.7, "grad_norm": 1.441428542137146, "learning_rate": 2.142230685037136e-06, "loss": 0.6467, "step": 5486 }, { "epoch": 0.7, "grad_norm": 1.2142436504364014, "learning_rate": 2.140528236622274e-06, "loss": 0.4952, "step": 5487 }, { "epoch": 0.7, "grad_norm": 1.1973832845687866, "learning_rate": 2.138826280693303e-06, "loss": 0.5627, "step": 5488 }, { "epoch": 0.7, "grad_norm": 1.733954668045044, "learning_rate": 2.137124817543349e-06, "loss": 0.604, "step": 5489 }, { "epoch": 0.7, "grad_norm": 1.3623613119125366, "learning_rate": 2.1354238474654574e-06, "loss": 0.5886, "step": 5490 }, { "epoch": 0.7, "grad_norm": 1.325785517692566, "learning_rate": 2.133723370752581e-06, "loss": 0.578, "step": 5491 }, { "epoch": 0.7, "grad_norm": 1.2375460863113403, "learning_rate": 2.1320233876975955e-06, "loss": 0.5326, "step": 5492 }, { "epoch": 0.7, "grad_norm": 1.4318372011184692, "learning_rate": 2.130323898593287e-06, "loss": 0.5755, "step": 5493 }, { "epoch": 0.7, "grad_norm": 1.3102978467941284, "learning_rate": 2.128624903732361e-06, "loss": 0.6431, "step": 5494 }, { "epoch": 0.7, "grad_norm": 1.4908846616744995, "learning_rate": 2.12692640340743e-06, "loss": 0.5952, "step": 5495 }, { "epoch": 0.7, "grad_norm": 1.2359824180603027, "learning_rate": 2.1252283979110292e-06, "loss": 0.6169, "step": 5496 }, { "epoch": 0.7, "grad_norm": 1.3551256656646729, "learning_rate": 2.1235308875356048e-06, "loss": 0.6417, "step": 5497 }, { "epoch": 0.7, "grad_norm": 1.139798641204834, "learning_rate": 2.1218338725735203e-06, "loss": 0.5594, "step": 5498 }, { "epoch": 0.7, "grad_norm": 1.2601267099380493, "learning_rate": 2.1201373533170484e-06, "loss": 0.6317, "step": 5499 }, { "epoch": 0.7, "grad_norm": 1.3188377618789673, "learning_rate": 2.1184413300583823e-06, "loss": 0.6327, "step": 5500 }, { "epoch": 0.7, "grad_norm": 1.4169940948486328, "learning_rate": 2.116745803089627e-06, "loss": 0.6216, "step": 5501 }, { "epoch": 0.7, "grad_norm": 1.5327837467193604, "learning_rate": 2.115050772702803e-06, "loss": 0.6618, "step": 5502 }, { "epoch": 0.71, "grad_norm": 1.3896936178207397, "learning_rate": 2.1133562391898444e-06, "loss": 0.5753, "step": 5503 }, { "epoch": 0.71, "grad_norm": 1.2866517305374146, "learning_rate": 2.1116622028426004e-06, "loss": 0.6075, "step": 5504 }, { "epoch": 0.71, "grad_norm": 1.4563747644424438, "learning_rate": 2.109968663952836e-06, "loss": 0.6092, "step": 5505 }, { "epoch": 0.71, "grad_norm": 1.2761017084121704, "learning_rate": 2.108275622812226e-06, "loss": 0.6336, "step": 5506 }, { "epoch": 0.71, "grad_norm": 1.3175703287124634, "learning_rate": 2.1065830797123628e-06, "loss": 0.5825, "step": 5507 }, { "epoch": 0.71, "grad_norm": 1.1784989833831787, "learning_rate": 2.1048910349447533e-06, "loss": 0.5514, "step": 5508 }, { "epoch": 0.71, "grad_norm": 1.3874294757843018, "learning_rate": 2.1031994888008196e-06, "loss": 0.5477, "step": 5509 }, { "epoch": 0.71, "grad_norm": 1.0878545045852661, "learning_rate": 2.1015084415718926e-06, "loss": 0.5669, "step": 5510 }, { "epoch": 0.71, "grad_norm": 1.1752663850784302, "learning_rate": 2.0998178935492223e-06, "loss": 0.5278, "step": 5511 }, { "epoch": 0.71, "grad_norm": 1.5478566884994507, "learning_rate": 2.098127845023972e-06, "loss": 0.5717, "step": 5512 }, { "epoch": 0.71, "grad_norm": 1.4689644575119019, "learning_rate": 2.0964382962872187e-06, "loss": 0.6032, "step": 5513 }, { "epoch": 0.71, "grad_norm": 1.3008317947387695, "learning_rate": 2.0947492476299503e-06, "loss": 0.6083, "step": 5514 }, { "epoch": 0.71, "grad_norm": 1.0789536237716675, "learning_rate": 2.0930606993430722e-06, "loss": 0.5137, "step": 5515 }, { "epoch": 0.71, "grad_norm": 1.4409828186035156, "learning_rate": 2.091372651717404e-06, "loss": 0.6374, "step": 5516 }, { "epoch": 0.71, "grad_norm": 1.2810938358306885, "learning_rate": 2.0896851050436774e-06, "loss": 0.6584, "step": 5517 }, { "epoch": 0.71, "grad_norm": 1.3894275426864624, "learning_rate": 2.0879980596125353e-06, "loss": 0.5872, "step": 5518 }, { "epoch": 0.71, "grad_norm": 1.4865697622299194, "learning_rate": 2.086311515714537e-06, "loss": 0.4925, "step": 5519 }, { "epoch": 0.71, "grad_norm": 1.483933448791504, "learning_rate": 2.084625473640161e-06, "loss": 0.6136, "step": 5520 }, { "epoch": 0.71, "grad_norm": 1.3818782567977905, "learning_rate": 2.0829399336797883e-06, "loss": 0.6241, "step": 5521 }, { "epoch": 0.71, "grad_norm": 1.9844701290130615, "learning_rate": 2.08125489612372e-06, "loss": 0.641, "step": 5522 }, { "epoch": 0.71, "grad_norm": 1.4045764207839966, "learning_rate": 2.079570361262171e-06, "loss": 0.5807, "step": 5523 }, { "epoch": 0.71, "grad_norm": 1.5632199048995972, "learning_rate": 2.0778863293852686e-06, "loss": 0.607, "step": 5524 }, { "epoch": 0.71, "grad_norm": 1.273563265800476, "learning_rate": 2.07620280078305e-06, "loss": 0.5969, "step": 5525 }, { "epoch": 0.71, "grad_norm": 1.6909412145614624, "learning_rate": 2.0745197757454704e-06, "loss": 0.6227, "step": 5526 }, { "epoch": 0.71, "grad_norm": 1.1659879684448242, "learning_rate": 2.0728372545623987e-06, "loss": 0.559, "step": 5527 }, { "epoch": 0.71, "grad_norm": 2.438142776489258, "learning_rate": 2.0711552375236115e-06, "loss": 0.5949, "step": 5528 }, { "epoch": 0.71, "grad_norm": 1.3141118288040161, "learning_rate": 2.0694737249188036e-06, "loss": 0.4984, "step": 5529 }, { "epoch": 0.71, "grad_norm": 1.1047041416168213, "learning_rate": 2.0677927170375812e-06, "loss": 0.6739, "step": 5530 }, { "epoch": 0.71, "grad_norm": 1.4703797101974487, "learning_rate": 2.0661122141694655e-06, "loss": 0.5354, "step": 5531 }, { "epoch": 0.71, "grad_norm": 1.1246980428695679, "learning_rate": 2.0644322166038867e-06, "loss": 0.5546, "step": 5532 }, { "epoch": 0.71, "grad_norm": 1.181230902671814, "learning_rate": 2.06275272463019e-06, "loss": 0.6074, "step": 5533 }, { "epoch": 0.71, "grad_norm": 1.4038329124450684, "learning_rate": 2.061073738537635e-06, "loss": 0.5793, "step": 5534 }, { "epoch": 0.71, "grad_norm": 1.2831275463104248, "learning_rate": 2.059395258615394e-06, "loss": 0.6463, "step": 5535 }, { "epoch": 0.71, "grad_norm": 1.1998370885849, "learning_rate": 2.057717285152547e-06, "loss": 0.743, "step": 5536 }, { "epoch": 0.71, "grad_norm": 1.8671464920043945, "learning_rate": 2.056039818438095e-06, "loss": 0.5823, "step": 5537 }, { "epoch": 0.71, "grad_norm": 1.3943527936935425, "learning_rate": 2.0543628587609472e-06, "loss": 0.5931, "step": 5538 }, { "epoch": 0.71, "grad_norm": 1.3470430374145508, "learning_rate": 2.052686406409923e-06, "loss": 0.5477, "step": 5539 }, { "epoch": 0.71, "grad_norm": 1.0583560466766357, "learning_rate": 2.0510104616737597e-06, "loss": 0.6012, "step": 5540 }, { "epoch": 0.71, "grad_norm": 1.5672448873519897, "learning_rate": 2.0493350248411033e-06, "loss": 0.6135, "step": 5541 }, { "epoch": 0.71, "grad_norm": 1.6713660955429077, "learning_rate": 2.047660096200516e-06, "loss": 0.653, "step": 5542 }, { "epoch": 0.71, "grad_norm": 1.2293089628219604, "learning_rate": 2.045985676040467e-06, "loss": 0.5229, "step": 5543 }, { "epoch": 0.71, "grad_norm": 1.250230312347412, "learning_rate": 2.044311764649343e-06, "loss": 0.6772, "step": 5544 }, { "epoch": 0.71, "grad_norm": 1.6269879341125488, "learning_rate": 2.042638362315441e-06, "loss": 0.6468, "step": 5545 }, { "epoch": 0.71, "grad_norm": 1.4112759828567505, "learning_rate": 2.040965469326972e-06, "loss": 0.6261, "step": 5546 }, { "epoch": 0.71, "grad_norm": 1.4061042070388794, "learning_rate": 2.0392930859720545e-06, "loss": 0.5911, "step": 5547 }, { "epoch": 0.71, "grad_norm": 1.2331053018569946, "learning_rate": 2.037621212538724e-06, "loss": 0.5203, "step": 5548 }, { "epoch": 0.71, "grad_norm": 1.3158512115478516, "learning_rate": 2.035949849314928e-06, "loss": 0.4839, "step": 5549 }, { "epoch": 0.71, "grad_norm": 1.7110259532928467, "learning_rate": 2.0342789965885246e-06, "loss": 0.601, "step": 5550 }, { "epoch": 0.71, "grad_norm": 1.6905916929244995, "learning_rate": 2.0326086546472823e-06, "loss": 0.6474, "step": 5551 }, { "epoch": 0.71, "grad_norm": 1.428787112236023, "learning_rate": 2.0309388237788847e-06, "loss": 0.5905, "step": 5552 }, { "epoch": 0.71, "grad_norm": 1.4208601713180542, "learning_rate": 2.029269504270926e-06, "loss": 0.6783, "step": 5553 }, { "epoch": 0.71, "grad_norm": 1.5845845937728882, "learning_rate": 2.027600696410913e-06, "loss": 0.6614, "step": 5554 }, { "epoch": 0.71, "grad_norm": 1.4801743030548096, "learning_rate": 2.025932400486264e-06, "loss": 0.6549, "step": 5555 }, { "epoch": 0.71, "grad_norm": 1.0526586771011353, "learning_rate": 2.0242646167843083e-06, "loss": 0.5649, "step": 5556 }, { "epoch": 0.71, "grad_norm": 1.4187215566635132, "learning_rate": 2.022597345592291e-06, "loss": 0.6142, "step": 5557 }, { "epoch": 0.71, "grad_norm": 1.489784836769104, "learning_rate": 2.02093058719736e-06, "loss": 0.5635, "step": 5558 }, { "epoch": 0.71, "grad_norm": 1.1324917078018188, "learning_rate": 2.0192643418865843e-06, "loss": 0.512, "step": 5559 }, { "epoch": 0.71, "grad_norm": 1.250471591949463, "learning_rate": 2.0175986099469395e-06, "loss": 0.6027, "step": 5560 }, { "epoch": 0.71, "grad_norm": 1.311180830001831, "learning_rate": 2.0159333916653166e-06, "loss": 0.5209, "step": 5561 }, { "epoch": 0.71, "grad_norm": 1.4973576068878174, "learning_rate": 2.0142686873285124e-06, "loss": 0.5737, "step": 5562 }, { "epoch": 0.71, "grad_norm": 1.524202823638916, "learning_rate": 2.012604497223239e-06, "loss": 0.5775, "step": 5563 }, { "epoch": 0.71, "grad_norm": 1.5545145273208618, "learning_rate": 2.01094082163612e-06, "loss": 0.6578, "step": 5564 }, { "epoch": 0.71, "grad_norm": 1.5071074962615967, "learning_rate": 2.009277660853691e-06, "loss": 0.7238, "step": 5565 }, { "epoch": 0.71, "grad_norm": 1.6860618591308594, "learning_rate": 2.007615015162395e-06, "loss": 0.6873, "step": 5566 }, { "epoch": 0.71, "grad_norm": 1.1676408052444458, "learning_rate": 2.0059528848485895e-06, "loss": 0.6382, "step": 5567 }, { "epoch": 0.71, "grad_norm": 1.1281388998031616, "learning_rate": 2.0042912701985436e-06, "loss": 0.6241, "step": 5568 }, { "epoch": 0.71, "grad_norm": 1.5055862665176392, "learning_rate": 2.002630171498438e-06, "loss": 0.6282, "step": 5569 }, { "epoch": 0.71, "grad_norm": 1.5386683940887451, "learning_rate": 2.0009695890343583e-06, "loss": 0.6475, "step": 5570 }, { "epoch": 0.71, "grad_norm": 1.3176437616348267, "learning_rate": 1.999309523092311e-06, "loss": 0.5033, "step": 5571 }, { "epoch": 0.71, "grad_norm": 1.1751028299331665, "learning_rate": 1.997649973958208e-06, "loss": 0.5784, "step": 5572 }, { "epoch": 0.71, "grad_norm": 1.2589142322540283, "learning_rate": 1.9959909419178713e-06, "loss": 0.6164, "step": 5573 }, { "epoch": 0.71, "grad_norm": 1.3741481304168701, "learning_rate": 1.9943324272570356e-06, "loss": 0.5355, "step": 5574 }, { "epoch": 0.71, "grad_norm": 1.4822330474853516, "learning_rate": 1.992674430261349e-06, "loss": 0.5507, "step": 5575 }, { "epoch": 0.71, "grad_norm": 1.3982889652252197, "learning_rate": 1.9910169512163634e-06, "loss": 0.6258, "step": 5576 }, { "epoch": 0.71, "grad_norm": 1.3766522407531738, "learning_rate": 1.9893599904075485e-06, "loss": 0.594, "step": 5577 }, { "epoch": 0.71, "grad_norm": 1.1951097249984741, "learning_rate": 1.987703548120281e-06, "loss": 0.5951, "step": 5578 }, { "epoch": 0.71, "grad_norm": 1.265201449394226, "learning_rate": 1.9860476246398526e-06, "loss": 0.6276, "step": 5579 }, { "epoch": 0.71, "grad_norm": 2.538569927215576, "learning_rate": 1.984392220251458e-06, "loss": 0.6001, "step": 5580 }, { "epoch": 0.72, "grad_norm": 1.507003903388977, "learning_rate": 1.982737335240209e-06, "loss": 0.6099, "step": 5581 }, { "epoch": 0.72, "grad_norm": 1.2398253679275513, "learning_rate": 1.9810829698911256e-06, "loss": 0.5759, "step": 5582 }, { "epoch": 0.72, "grad_norm": 1.2879706621170044, "learning_rate": 1.9794291244891406e-06, "loss": 0.5272, "step": 5583 }, { "epoch": 0.72, "grad_norm": 1.5107345581054688, "learning_rate": 1.977775799319092e-06, "loss": 0.6362, "step": 5584 }, { "epoch": 0.72, "grad_norm": 1.378507137298584, "learning_rate": 1.976122994665733e-06, "loss": 0.6266, "step": 5585 }, { "epoch": 0.72, "grad_norm": 1.3231158256530762, "learning_rate": 1.9744707108137257e-06, "loss": 0.6275, "step": 5586 }, { "epoch": 0.72, "grad_norm": 1.7076232433319092, "learning_rate": 1.9728189480476436e-06, "loss": 0.6385, "step": 5587 }, { "epoch": 0.72, "grad_norm": 1.4472925662994385, "learning_rate": 1.9711677066519645e-06, "loss": 0.6796, "step": 5588 }, { "epoch": 0.72, "grad_norm": 1.3523715734481812, "learning_rate": 1.9695169869110864e-06, "loss": 0.6968, "step": 5589 }, { "epoch": 0.72, "grad_norm": 1.466336727142334, "learning_rate": 1.967866789109313e-06, "loss": 0.5789, "step": 5590 }, { "epoch": 0.72, "grad_norm": 1.324497103691101, "learning_rate": 1.9662171135308517e-06, "loss": 0.5396, "step": 5591 }, { "epoch": 0.72, "grad_norm": 1.0975810289382935, "learning_rate": 1.9645679604598297e-06, "loss": 0.5384, "step": 5592 }, { "epoch": 0.72, "grad_norm": 1.4148670434951782, "learning_rate": 1.9629193301802786e-06, "loss": 0.5924, "step": 5593 }, { "epoch": 0.72, "grad_norm": 1.2906337976455688, "learning_rate": 1.9612712229761434e-06, "loss": 0.6046, "step": 5594 }, { "epoch": 0.72, "grad_norm": 1.1885030269622803, "learning_rate": 1.959623639131274e-06, "loss": 0.5818, "step": 5595 }, { "epoch": 0.72, "grad_norm": 1.5290043354034424, "learning_rate": 1.9579765789294354e-06, "loss": 0.6608, "step": 5596 }, { "epoch": 0.72, "grad_norm": 1.3768508434295654, "learning_rate": 1.9563300426542992e-06, "loss": 0.5653, "step": 5597 }, { "epoch": 0.72, "grad_norm": 1.3716613054275513, "learning_rate": 1.95468403058945e-06, "loss": 0.666, "step": 5598 }, { "epoch": 0.72, "grad_norm": 1.6025148630142212, "learning_rate": 1.9530385430183763e-06, "loss": 0.6309, "step": 5599 }, { "epoch": 0.72, "grad_norm": 1.2713145017623901, "learning_rate": 1.9513935802244816e-06, "loss": 0.6173, "step": 5600 }, { "epoch": 0.72, "grad_norm": 1.2051814794540405, "learning_rate": 1.9497491424910774e-06, "loss": 0.5658, "step": 5601 }, { "epoch": 0.72, "grad_norm": 1.1207643747329712, "learning_rate": 1.948105230101387e-06, "loss": 0.5222, "step": 5602 }, { "epoch": 0.72, "grad_norm": 1.8980365991592407, "learning_rate": 1.9464618433385367e-06, "loss": 0.5671, "step": 5603 }, { "epoch": 0.72, "grad_norm": 1.4103658199310303, "learning_rate": 1.9448189824855684e-06, "loss": 0.6214, "step": 5604 }, { "epoch": 0.72, "grad_norm": 1.3645200729370117, "learning_rate": 1.9431766478254315e-06, "loss": 0.5669, "step": 5605 }, { "epoch": 0.72, "grad_norm": 1.2241871356964111, "learning_rate": 1.941534839640985e-06, "loss": 0.5364, "step": 5606 }, { "epoch": 0.72, "grad_norm": 1.241102933883667, "learning_rate": 1.9398935582149964e-06, "loss": 0.6437, "step": 5607 }, { "epoch": 0.72, "grad_norm": 2.057539939880371, "learning_rate": 1.9382528038301437e-06, "loss": 0.622, "step": 5608 }, { "epoch": 0.72, "grad_norm": 1.2099688053131104, "learning_rate": 1.9366125767690145e-06, "loss": 0.5541, "step": 5609 }, { "epoch": 0.72, "grad_norm": 1.6069209575653076, "learning_rate": 1.9349728773141017e-06, "loss": 0.5802, "step": 5610 }, { "epoch": 0.72, "grad_norm": 1.1698077917099, "learning_rate": 1.9333337057478123e-06, "loss": 0.5953, "step": 5611 }, { "epoch": 0.72, "grad_norm": 1.2990256547927856, "learning_rate": 1.9316950623524595e-06, "loss": 0.6457, "step": 5612 }, { "epoch": 0.72, "grad_norm": 1.0718307495117188, "learning_rate": 1.930056947410268e-06, "loss": 0.5546, "step": 5613 }, { "epoch": 0.72, "grad_norm": 1.0749974250793457, "learning_rate": 1.928419361203367e-06, "loss": 0.6744, "step": 5614 }, { "epoch": 0.72, "grad_norm": 1.2324750423431396, "learning_rate": 1.9267823040137983e-06, "loss": 0.6432, "step": 5615 }, { "epoch": 0.72, "grad_norm": 1.1767945289611816, "learning_rate": 1.9251457761235127e-06, "loss": 0.6066, "step": 5616 }, { "epoch": 0.72, "grad_norm": 1.249362826347351, "learning_rate": 1.92350977781437e-06, "loss": 0.5928, "step": 5617 }, { "epoch": 0.72, "grad_norm": 1.3877533674240112, "learning_rate": 1.9218743093681346e-06, "loss": 0.5676, "step": 5618 }, { "epoch": 0.72, "grad_norm": 1.1825809478759766, "learning_rate": 1.920239371066484e-06, "loss": 0.6075, "step": 5619 }, { "epoch": 0.72, "grad_norm": 1.3485913276672363, "learning_rate": 1.9186049631910047e-06, "loss": 0.616, "step": 5620 }, { "epoch": 0.72, "grad_norm": 1.5328774452209473, "learning_rate": 1.916971086023188e-06, "loss": 0.6823, "step": 5621 }, { "epoch": 0.72, "grad_norm": 1.6213551759719849, "learning_rate": 1.915337739844434e-06, "loss": 0.5982, "step": 5622 }, { "epoch": 0.72, "grad_norm": 1.1525039672851562, "learning_rate": 1.9137049249360583e-06, "loss": 0.568, "step": 5623 }, { "epoch": 0.72, "grad_norm": 1.1615984439849854, "learning_rate": 1.9120726415792795e-06, "loss": 0.6214, "step": 5624 }, { "epoch": 0.72, "grad_norm": 1.1085628271102905, "learning_rate": 1.9104408900552226e-06, "loss": 0.6147, "step": 5625 }, { "epoch": 0.72, "grad_norm": 1.7933579683303833, "learning_rate": 1.9088096706449245e-06, "loss": 0.6051, "step": 5626 }, { "epoch": 0.72, "grad_norm": 1.3298068046569824, "learning_rate": 1.9071789836293313e-06, "loss": 0.5936, "step": 5627 }, { "epoch": 0.72, "grad_norm": 1.3054797649383545, "learning_rate": 1.9055488292892927e-06, "loss": 0.6075, "step": 5628 }, { "epoch": 0.72, "grad_norm": 1.7392698526382446, "learning_rate": 1.9039192079055712e-06, "loss": 0.6097, "step": 5629 }, { "epoch": 0.72, "grad_norm": 1.3165106773376465, "learning_rate": 1.902290119758836e-06, "loss": 0.526, "step": 5630 }, { "epoch": 0.72, "grad_norm": 1.294399380683899, "learning_rate": 1.9006615651296662e-06, "loss": 0.4764, "step": 5631 }, { "epoch": 0.72, "grad_norm": 1.2079136371612549, "learning_rate": 1.8990335442985436e-06, "loss": 0.6266, "step": 5632 }, { "epoch": 0.72, "grad_norm": 1.8871737718582153, "learning_rate": 1.897406057545863e-06, "loss": 0.6647, "step": 5633 }, { "epoch": 0.72, "grad_norm": 1.2210822105407715, "learning_rate": 1.895779105151927e-06, "loss": 0.5954, "step": 5634 }, { "epoch": 0.72, "grad_norm": 1.3488540649414062, "learning_rate": 1.8941526873969463e-06, "loss": 0.5809, "step": 5635 }, { "epoch": 0.72, "grad_norm": 1.655834674835205, "learning_rate": 1.8925268045610345e-06, "loss": 0.5514, "step": 5636 }, { "epoch": 0.72, "grad_norm": 1.7508749961853027, "learning_rate": 1.890901456924219e-06, "loss": 0.5246, "step": 5637 }, { "epoch": 0.72, "grad_norm": 1.218916416168213, "learning_rate": 1.8892766447664323e-06, "loss": 0.5255, "step": 5638 }, { "epoch": 0.72, "grad_norm": 1.2823469638824463, "learning_rate": 1.8876523683675163e-06, "loss": 0.6096, "step": 5639 }, { "epoch": 0.72, "grad_norm": 1.282475233078003, "learning_rate": 1.8860286280072187e-06, "loss": 0.5822, "step": 5640 }, { "epoch": 0.72, "grad_norm": 1.4252816438674927, "learning_rate": 1.884405423965196e-06, "loss": 0.5581, "step": 5641 }, { "epoch": 0.72, "grad_norm": 1.3148558139801025, "learning_rate": 1.8827827565210143e-06, "loss": 0.5631, "step": 5642 }, { "epoch": 0.72, "grad_norm": 1.354689359664917, "learning_rate": 1.881160625954141e-06, "loss": 0.5763, "step": 5643 }, { "epoch": 0.72, "grad_norm": 1.8449751138687134, "learning_rate": 1.8795390325439572e-06, "loss": 0.6018, "step": 5644 }, { "epoch": 0.72, "grad_norm": 1.3221594095230103, "learning_rate": 1.8779179765697491e-06, "loss": 0.6552, "step": 5645 }, { "epoch": 0.72, "grad_norm": 1.4569640159606934, "learning_rate": 1.8762974583107129e-06, "loss": 0.5821, "step": 5646 }, { "epoch": 0.72, "grad_norm": 1.3788806200027466, "learning_rate": 1.8746774780459465e-06, "loss": 0.5294, "step": 5647 }, { "epoch": 0.72, "grad_norm": 1.5517175197601318, "learning_rate": 1.8730580360544593e-06, "loss": 0.6109, "step": 5648 }, { "epoch": 0.72, "grad_norm": 1.6753023862838745, "learning_rate": 1.8714391326151681e-06, "loss": 0.5427, "step": 5649 }, { "epoch": 0.72, "grad_norm": 1.5441654920578003, "learning_rate": 1.8698207680068974e-06, "loss": 0.6389, "step": 5650 }, { "epoch": 0.72, "grad_norm": 1.1922584772109985, "learning_rate": 1.8682029425083748e-06, "loss": 0.5781, "step": 5651 }, { "epoch": 0.72, "grad_norm": 1.3633989095687866, "learning_rate": 1.8665856563982392e-06, "loss": 0.5629, "step": 5652 }, { "epoch": 0.72, "grad_norm": 1.3253165483474731, "learning_rate": 1.864968909955035e-06, "loss": 0.554, "step": 5653 }, { "epoch": 0.72, "grad_norm": 1.3885520696640015, "learning_rate": 1.8633527034572164e-06, "loss": 0.6109, "step": 5654 }, { "epoch": 0.72, "grad_norm": 1.899064302444458, "learning_rate": 1.8617370371831373e-06, "loss": 0.6412, "step": 5655 }, { "epoch": 0.72, "grad_norm": 1.9599201679229736, "learning_rate": 1.8601219114110646e-06, "loss": 0.5653, "step": 5656 }, { "epoch": 0.72, "grad_norm": 1.2851388454437256, "learning_rate": 1.858507326419176e-06, "loss": 0.5802, "step": 5657 }, { "epoch": 0.72, "grad_norm": 1.6403940916061401, "learning_rate": 1.8568932824855457e-06, "loss": 0.5969, "step": 5658 }, { "epoch": 0.73, "grad_norm": 1.4266154766082764, "learning_rate": 1.8552797798881611e-06, "loss": 0.554, "step": 5659 }, { "epoch": 0.73, "grad_norm": 1.2188271284103394, "learning_rate": 1.8536668189049156e-06, "loss": 0.5139, "step": 5660 }, { "epoch": 0.73, "grad_norm": 1.4466283321380615, "learning_rate": 1.8520543998136104e-06, "loss": 0.6892, "step": 5661 }, { "epoch": 0.73, "grad_norm": 1.507656455039978, "learning_rate": 1.8504425228919492e-06, "loss": 0.6445, "step": 5662 }, { "epoch": 0.73, "grad_norm": 1.2851141691207886, "learning_rate": 1.8488311884175458e-06, "loss": 0.69, "step": 5663 }, { "epoch": 0.73, "grad_norm": 1.239793062210083, "learning_rate": 1.8472203966679208e-06, "loss": 0.65, "step": 5664 }, { "epoch": 0.73, "grad_norm": 1.2325901985168457, "learning_rate": 1.8456101479205012e-06, "loss": 0.5284, "step": 5665 }, { "epoch": 0.73, "grad_norm": 1.43928861618042, "learning_rate": 1.8440004424526165e-06, "loss": 0.6087, "step": 5666 }, { "epoch": 0.73, "grad_norm": 1.4355437755584717, "learning_rate": 1.8423912805415078e-06, "loss": 0.6277, "step": 5667 }, { "epoch": 0.73, "grad_norm": 1.8221622705459595, "learning_rate": 1.84078266246432e-06, "loss": 0.5687, "step": 5668 }, { "epoch": 0.73, "grad_norm": 1.6198118925094604, "learning_rate": 1.839174588498107e-06, "loss": 0.5901, "step": 5669 }, { "epoch": 0.73, "grad_norm": 1.7814929485321045, "learning_rate": 1.837567058919823e-06, "loss": 0.5777, "step": 5670 }, { "epoch": 0.73, "grad_norm": 1.416741967201233, "learning_rate": 1.8359600740063344e-06, "loss": 0.5138, "step": 5671 }, { "epoch": 0.73, "grad_norm": 1.3309919834136963, "learning_rate": 1.8343536340344136e-06, "loss": 0.5628, "step": 5672 }, { "epoch": 0.73, "grad_norm": 1.2797962427139282, "learning_rate": 1.8327477392807314e-06, "loss": 0.5647, "step": 5673 }, { "epoch": 0.73, "grad_norm": 1.2392196655273438, "learning_rate": 1.831142390021876e-06, "loss": 0.5267, "step": 5674 }, { "epoch": 0.73, "grad_norm": 1.2258974313735962, "learning_rate": 1.8295375865343363e-06, "loss": 0.631, "step": 5675 }, { "epoch": 0.73, "grad_norm": 1.6016523838043213, "learning_rate": 1.8279333290945035e-06, "loss": 0.6077, "step": 5676 }, { "epoch": 0.73, "grad_norm": 1.2385517358779907, "learning_rate": 1.8263296179786798e-06, "loss": 0.6142, "step": 5677 }, { "epoch": 0.73, "grad_norm": 1.3300591707229614, "learning_rate": 1.8247264534630727e-06, "loss": 0.5558, "step": 5678 }, { "epoch": 0.73, "grad_norm": 1.3536458015441895, "learning_rate": 1.8231238358237952e-06, "loss": 0.5652, "step": 5679 }, { "epoch": 0.73, "grad_norm": 1.294861078262329, "learning_rate": 1.8215217653368632e-06, "loss": 0.6216, "step": 5680 }, { "epoch": 0.73, "grad_norm": 1.1894181966781616, "learning_rate": 1.8199202422782026e-06, "loss": 0.5569, "step": 5681 }, { "epoch": 0.73, "grad_norm": 1.3334403038024902, "learning_rate": 1.8183192669236422e-06, "loss": 0.6186, "step": 5682 }, { "epoch": 0.73, "grad_norm": 1.4797943830490112, "learning_rate": 1.8167188395489194e-06, "loss": 0.5627, "step": 5683 }, { "epoch": 0.73, "grad_norm": 1.333863377571106, "learning_rate": 1.8151189604296727e-06, "loss": 0.6105, "step": 5684 }, { "epoch": 0.73, "grad_norm": 1.8466845750808716, "learning_rate": 1.8135196298414498e-06, "loss": 0.7075, "step": 5685 }, { "epoch": 0.73, "grad_norm": 1.2560466527938843, "learning_rate": 1.8119208480597033e-06, "loss": 0.6165, "step": 5686 }, { "epoch": 0.73, "grad_norm": 1.3292875289916992, "learning_rate": 1.8103226153597919e-06, "loss": 0.6225, "step": 5687 }, { "epoch": 0.73, "grad_norm": 2.3358840942382812, "learning_rate": 1.8087249320169758e-06, "loss": 0.6418, "step": 5688 }, { "epoch": 0.73, "grad_norm": 1.621479868888855, "learning_rate": 1.807127798306425e-06, "loss": 0.535, "step": 5689 }, { "epoch": 0.73, "grad_norm": 1.5514147281646729, "learning_rate": 1.8055312145032139e-06, "loss": 0.6259, "step": 5690 }, { "epoch": 0.73, "grad_norm": 1.329399585723877, "learning_rate": 1.8039351808823203e-06, "loss": 0.56, "step": 5691 }, { "epoch": 0.73, "grad_norm": 1.4586081504821777, "learning_rate": 1.8023396977186297e-06, "loss": 0.5567, "step": 5692 }, { "epoch": 0.73, "grad_norm": 1.481757402420044, "learning_rate": 1.8007447652869314e-06, "loss": 0.6491, "step": 5693 }, { "epoch": 0.73, "grad_norm": 1.280112624168396, "learning_rate": 1.7991503838619219e-06, "loss": 0.6109, "step": 5694 }, { "epoch": 0.73, "grad_norm": 1.3548251390457153, "learning_rate": 1.7975565537181971e-06, "loss": 0.5796, "step": 5695 }, { "epoch": 0.73, "grad_norm": 1.166089415550232, "learning_rate": 1.7959632751302636e-06, "loss": 0.5353, "step": 5696 }, { "epoch": 0.73, "grad_norm": 1.4079132080078125, "learning_rate": 1.794370548372532e-06, "loss": 0.5729, "step": 5697 }, { "epoch": 0.73, "grad_norm": 6.476421356201172, "learning_rate": 1.7927783737193182e-06, "loss": 0.6065, "step": 5698 }, { "epoch": 0.73, "grad_norm": 1.1923269033432007, "learning_rate": 1.7911867514448384e-06, "loss": 0.614, "step": 5699 }, { "epoch": 0.73, "grad_norm": 1.501766324043274, "learning_rate": 1.7895956818232191e-06, "loss": 0.6037, "step": 5700 }, { "epoch": 0.73, "grad_norm": 1.4375709295272827, "learning_rate": 1.7880051651284897e-06, "loss": 0.6155, "step": 5701 }, { "epoch": 0.73, "grad_norm": 1.1795188188552856, "learning_rate": 1.786415201634586e-06, "loss": 0.606, "step": 5702 }, { "epoch": 0.73, "grad_norm": 1.210554599761963, "learning_rate": 1.784825791615344e-06, "loss": 0.5175, "step": 5703 }, { "epoch": 0.73, "grad_norm": 1.2811566591262817, "learning_rate": 1.7832369353445078e-06, "loss": 0.5747, "step": 5704 }, { "epoch": 0.73, "grad_norm": 1.2850680351257324, "learning_rate": 1.7816486330957272e-06, "loss": 0.6138, "step": 5705 }, { "epoch": 0.73, "grad_norm": 1.2182974815368652, "learning_rate": 1.780060885142555e-06, "loss": 0.5599, "step": 5706 }, { "epoch": 0.73, "grad_norm": 1.3909426927566528, "learning_rate": 1.778473691758445e-06, "loss": 0.5889, "step": 5707 }, { "epoch": 0.73, "grad_norm": 1.6075366735458374, "learning_rate": 1.7768870532167625e-06, "loss": 0.5588, "step": 5708 }, { "epoch": 0.73, "grad_norm": 1.3977075815200806, "learning_rate": 1.7753009697907753e-06, "loss": 0.5676, "step": 5709 }, { "epoch": 0.73, "grad_norm": 1.174627661705017, "learning_rate": 1.7737154417536495e-06, "loss": 0.5714, "step": 5710 }, { "epoch": 0.73, "grad_norm": 1.6572093963623047, "learning_rate": 1.7721304693784624e-06, "loss": 0.6938, "step": 5711 }, { "epoch": 0.73, "grad_norm": 1.2030808925628662, "learning_rate": 1.770546052938193e-06, "loss": 0.5375, "step": 5712 }, { "epoch": 0.73, "grad_norm": 1.3561108112335205, "learning_rate": 1.7689621927057265e-06, "loss": 0.5343, "step": 5713 }, { "epoch": 0.73, "grad_norm": 1.1456682682037354, "learning_rate": 1.7673788889538473e-06, "loss": 0.5621, "step": 5714 }, { "epoch": 0.73, "grad_norm": 1.3912715911865234, "learning_rate": 1.7657961419552488e-06, "loss": 0.5994, "step": 5715 }, { "epoch": 0.73, "grad_norm": 1.5939741134643555, "learning_rate": 1.7642139519825274e-06, "loss": 0.5317, "step": 5716 }, { "epoch": 0.73, "grad_norm": 1.3256257772445679, "learning_rate": 1.7626323193081852e-06, "loss": 0.5304, "step": 5717 }, { "epoch": 0.73, "grad_norm": 1.559259057044983, "learning_rate": 1.761051244204622e-06, "loss": 0.6115, "step": 5718 }, { "epoch": 0.73, "grad_norm": 1.6052910089492798, "learning_rate": 1.7594707269441473e-06, "loss": 0.5905, "step": 5719 }, { "epoch": 0.73, "grad_norm": 1.3079288005828857, "learning_rate": 1.757890767798976e-06, "loss": 0.6405, "step": 5720 }, { "epoch": 0.73, "grad_norm": 1.5783737897872925, "learning_rate": 1.7563113670412197e-06, "loss": 0.6213, "step": 5721 }, { "epoch": 0.73, "grad_norm": 1.4406654834747314, "learning_rate": 1.7547325249428999e-06, "loss": 0.6082, "step": 5722 }, { "epoch": 0.73, "grad_norm": 1.1159170866012573, "learning_rate": 1.7531542417759401e-06, "loss": 0.6584, "step": 5723 }, { "epoch": 0.73, "grad_norm": 1.4534189701080322, "learning_rate": 1.7515765178121696e-06, "loss": 0.5921, "step": 5724 }, { "epoch": 0.73, "grad_norm": 1.9966347217559814, "learning_rate": 1.7499993533233134e-06, "loss": 0.6042, "step": 5725 }, { "epoch": 0.73, "grad_norm": 1.3243257999420166, "learning_rate": 1.7484227485810119e-06, "loss": 0.5952, "step": 5726 }, { "epoch": 0.73, "grad_norm": 1.409643530845642, "learning_rate": 1.7468467038568033e-06, "loss": 0.5421, "step": 5727 }, { "epoch": 0.73, "grad_norm": 1.2723798751831055, "learning_rate": 1.7452712194221243e-06, "loss": 0.647, "step": 5728 }, { "epoch": 0.73, "grad_norm": 1.3207684755325317, "learning_rate": 1.743696295548324e-06, "loss": 0.5333, "step": 5729 }, { "epoch": 0.73, "grad_norm": 1.3573734760284424, "learning_rate": 1.7421219325066495e-06, "loss": 0.5481, "step": 5730 }, { "epoch": 0.73, "grad_norm": 1.479612946510315, "learning_rate": 1.7405481305682547e-06, "loss": 0.5887, "step": 5731 }, { "epoch": 0.73, "grad_norm": 1.5791213512420654, "learning_rate": 1.7389748900041926e-06, "loss": 0.5848, "step": 5732 }, { "epoch": 0.73, "grad_norm": 1.5573316812515259, "learning_rate": 1.7374022110854222e-06, "loss": 0.6232, "step": 5733 }, { "epoch": 0.73, "grad_norm": 1.2642827033996582, "learning_rate": 1.7358300940828067e-06, "loss": 0.5412, "step": 5734 }, { "epoch": 0.73, "grad_norm": 1.6646677255630493, "learning_rate": 1.7342585392671117e-06, "loss": 0.5998, "step": 5735 }, { "epoch": 0.73, "grad_norm": 1.2101836204528809, "learning_rate": 1.7326875469090037e-06, "loss": 0.582, "step": 5736 }, { "epoch": 0.73, "grad_norm": 1.327369213104248, "learning_rate": 1.7311171172790548e-06, "loss": 0.5807, "step": 5737 }, { "epoch": 0.74, "grad_norm": 1.1370022296905518, "learning_rate": 1.72954725064774e-06, "loss": 0.503, "step": 5738 }, { "epoch": 0.74, "grad_norm": 1.4957923889160156, "learning_rate": 1.7279779472854386e-06, "loss": 0.5385, "step": 5739 }, { "epoch": 0.74, "grad_norm": 1.2059375047683716, "learning_rate": 1.7264092074624278e-06, "loss": 0.5874, "step": 5740 }, { "epoch": 0.74, "grad_norm": 1.1997199058532715, "learning_rate": 1.7248410314488928e-06, "loss": 0.5358, "step": 5741 }, { "epoch": 0.74, "grad_norm": 1.4809540510177612, "learning_rate": 1.7232734195149197e-06, "loss": 0.5952, "step": 5742 }, { "epoch": 0.74, "grad_norm": 1.9461658000946045, "learning_rate": 1.7217063719304988e-06, "loss": 0.6296, "step": 5743 }, { "epoch": 0.74, "grad_norm": 1.942325472831726, "learning_rate": 1.7201398889655207e-06, "loss": 0.6345, "step": 5744 }, { "epoch": 0.74, "grad_norm": 1.3077179193496704, "learning_rate": 1.718573970889782e-06, "loss": 0.7035, "step": 5745 }, { "epoch": 0.74, "grad_norm": 1.5481005907058716, "learning_rate": 1.71700861797298e-06, "loss": 0.5672, "step": 5746 }, { "epoch": 0.74, "grad_norm": 1.3350780010223389, "learning_rate": 1.7154438304847132e-06, "loss": 0.5974, "step": 5747 }, { "epoch": 0.74, "grad_norm": 1.4484832286834717, "learning_rate": 1.7138796086944854e-06, "loss": 0.6045, "step": 5748 }, { "epoch": 0.74, "grad_norm": 1.4450817108154297, "learning_rate": 1.712315952871702e-06, "loss": 0.5568, "step": 5749 }, { "epoch": 0.74, "grad_norm": 1.2990732192993164, "learning_rate": 1.7107528632856729e-06, "loss": 0.541, "step": 5750 }, { "epoch": 0.74, "grad_norm": 1.9606150388717651, "learning_rate": 1.7091903402056054e-06, "loss": 0.584, "step": 5751 }, { "epoch": 0.74, "grad_norm": 1.5782921314239502, "learning_rate": 1.707628383900613e-06, "loss": 0.5366, "step": 5752 }, { "epoch": 0.74, "grad_norm": 1.3492310047149658, "learning_rate": 1.7060669946397112e-06, "loss": 0.6352, "step": 5753 }, { "epoch": 0.74, "grad_norm": 1.2007477283477783, "learning_rate": 1.7045061726918204e-06, "loss": 0.5359, "step": 5754 }, { "epoch": 0.74, "grad_norm": 1.260575771331787, "learning_rate": 1.7029459183257557e-06, "loss": 0.5743, "step": 5755 }, { "epoch": 0.74, "grad_norm": 1.4031670093536377, "learning_rate": 1.7013862318102415e-06, "loss": 0.5996, "step": 5756 }, { "epoch": 0.74, "grad_norm": 1.4298655986785889, "learning_rate": 1.6998271134139022e-06, "loss": 0.627, "step": 5757 }, { "epoch": 0.74, "grad_norm": 2.1103055477142334, "learning_rate": 1.698268563405266e-06, "loss": 0.5912, "step": 5758 }, { "epoch": 0.74, "grad_norm": 1.184146761894226, "learning_rate": 1.6967105820527558e-06, "loss": 0.6081, "step": 5759 }, { "epoch": 0.74, "grad_norm": 1.3072179555892944, "learning_rate": 1.6951531696247075e-06, "loss": 0.6095, "step": 5760 }, { "epoch": 0.74, "grad_norm": 1.2722811698913574, "learning_rate": 1.6935963263893546e-06, "loss": 0.4975, "step": 5761 }, { "epoch": 0.74, "grad_norm": 1.443372368812561, "learning_rate": 1.6920400526148272e-06, "loss": 0.6409, "step": 5762 }, { "epoch": 0.74, "grad_norm": 1.277919054031372, "learning_rate": 1.6904843485691635e-06, "loss": 0.6582, "step": 5763 }, { "epoch": 0.74, "grad_norm": 1.1344646215438843, "learning_rate": 1.6889292145203028e-06, "loss": 0.5916, "step": 5764 }, { "epoch": 0.74, "grad_norm": 1.1989878416061401, "learning_rate": 1.6873746507360865e-06, "loss": 0.4848, "step": 5765 }, { "epoch": 0.74, "grad_norm": 1.2986321449279785, "learning_rate": 1.685820657484254e-06, "loss": 0.6413, "step": 5766 }, { "epoch": 0.74, "grad_norm": 1.6236834526062012, "learning_rate": 1.684267235032449e-06, "loss": 0.6122, "step": 5767 }, { "epoch": 0.74, "grad_norm": 1.3555066585540771, "learning_rate": 1.68271438364822e-06, "loss": 0.5595, "step": 5768 }, { "epoch": 0.74, "grad_norm": 1.2782316207885742, "learning_rate": 1.6811621035990106e-06, "loss": 0.7395, "step": 5769 }, { "epoch": 0.74, "grad_norm": 1.4683347940444946, "learning_rate": 1.6796103951521708e-06, "loss": 0.5275, "step": 5770 }, { "epoch": 0.74, "grad_norm": 1.3068945407867432, "learning_rate": 1.678059258574951e-06, "loss": 0.6087, "step": 5771 }, { "epoch": 0.74, "grad_norm": 1.8507400751113892, "learning_rate": 1.676508694134505e-06, "loss": 0.5883, "step": 5772 }, { "epoch": 0.74, "grad_norm": 1.1494866609573364, "learning_rate": 1.6749587020978814e-06, "loss": 0.5752, "step": 5773 }, { "epoch": 0.74, "grad_norm": 1.5824859142303467, "learning_rate": 1.673409282732038e-06, "loss": 0.5977, "step": 5774 }, { "epoch": 0.74, "grad_norm": 1.35085129737854, "learning_rate": 1.67186043630383e-06, "loss": 0.5602, "step": 5775 }, { "epoch": 0.74, "grad_norm": 1.6015706062316895, "learning_rate": 1.670312163080015e-06, "loss": 0.598, "step": 5776 }, { "epoch": 0.74, "grad_norm": 1.6979427337646484, "learning_rate": 1.6687644633272516e-06, "loss": 0.6116, "step": 5777 }, { "epoch": 0.74, "grad_norm": 1.282360315322876, "learning_rate": 1.6672173373120992e-06, "loss": 0.5501, "step": 5778 }, { "epoch": 0.74, "grad_norm": 1.518515944480896, "learning_rate": 1.6656707853010207e-06, "loss": 0.564, "step": 5779 }, { "epoch": 0.74, "grad_norm": 1.1478134393692017, "learning_rate": 1.6641248075603756e-06, "loss": 0.7633, "step": 5780 }, { "epoch": 0.74, "grad_norm": 1.7522804737091064, "learning_rate": 1.6625794043564275e-06, "loss": 0.517, "step": 5781 }, { "epoch": 0.74, "grad_norm": 1.3169957399368286, "learning_rate": 1.661034575955342e-06, "loss": 0.6518, "step": 5782 }, { "epoch": 0.74, "grad_norm": 3.0078976154327393, "learning_rate": 1.659490322623185e-06, "loss": 0.5437, "step": 5783 }, { "epoch": 0.74, "grad_norm": 1.5404016971588135, "learning_rate": 1.6579466446259201e-06, "loss": 0.6104, "step": 5784 }, { "epoch": 0.74, "grad_norm": 1.531265377998352, "learning_rate": 1.6564035422294156e-06, "loss": 0.6512, "step": 5785 }, { "epoch": 0.74, "grad_norm": 1.444911241531372, "learning_rate": 1.65486101569944e-06, "loss": 0.5998, "step": 5786 }, { "epoch": 0.74, "grad_norm": 1.3478665351867676, "learning_rate": 1.653319065301664e-06, "loss": 0.6803, "step": 5787 }, { "epoch": 0.74, "grad_norm": 1.3380131721496582, "learning_rate": 1.651777691301653e-06, "loss": 0.6165, "step": 5788 }, { "epoch": 0.74, "grad_norm": 1.335648536682129, "learning_rate": 1.6502368939648794e-06, "loss": 0.6804, "step": 5789 }, { "epoch": 0.74, "grad_norm": 1.409081220626831, "learning_rate": 1.6486966735567144e-06, "loss": 0.5307, "step": 5790 }, { "epoch": 0.74, "grad_norm": 1.3683109283447266, "learning_rate": 1.6471570303424318e-06, "loss": 0.5651, "step": 5791 }, { "epoch": 0.74, "grad_norm": 1.4809428453445435, "learning_rate": 1.6456179645871996e-06, "loss": 0.5627, "step": 5792 }, { "epoch": 0.74, "grad_norm": 1.6425578594207764, "learning_rate": 1.6440794765560913e-06, "loss": 0.6125, "step": 5793 }, { "epoch": 0.74, "grad_norm": 1.5959701538085938, "learning_rate": 1.6425415665140842e-06, "loss": 0.507, "step": 5794 }, { "epoch": 0.74, "grad_norm": 1.0912171602249146, "learning_rate": 1.6410042347260486e-06, "loss": 0.6124, "step": 5795 }, { "epoch": 0.74, "grad_norm": 1.6113317012786865, "learning_rate": 1.6394674814567585e-06, "loss": 0.5317, "step": 5796 }, { "epoch": 0.74, "grad_norm": 1.3795167207717896, "learning_rate": 1.6379313069708896e-06, "loss": 0.665, "step": 5797 }, { "epoch": 0.74, "grad_norm": 1.1050100326538086, "learning_rate": 1.6363957115330187e-06, "loss": 0.6964, "step": 5798 }, { "epoch": 0.74, "grad_norm": 1.3911715745925903, "learning_rate": 1.6348606954076169e-06, "loss": 0.6012, "step": 5799 }, { "epoch": 0.74, "grad_norm": 1.1959513425827026, "learning_rate": 1.6333262588590609e-06, "loss": 0.7361, "step": 5800 }, { "epoch": 0.74, "grad_norm": 1.2428171634674072, "learning_rate": 1.631792402151627e-06, "loss": 0.62, "step": 5801 }, { "epoch": 0.74, "grad_norm": 2.339919328689575, "learning_rate": 1.6302591255494916e-06, "loss": 0.6668, "step": 5802 }, { "epoch": 0.74, "grad_norm": 1.3819876909255981, "learning_rate": 1.6287264293167277e-06, "loss": 0.6304, "step": 5803 }, { "epoch": 0.74, "grad_norm": 1.578447937965393, "learning_rate": 1.627194313717313e-06, "loss": 0.6149, "step": 5804 }, { "epoch": 0.74, "grad_norm": 1.1103376150131226, "learning_rate": 1.6256627790151224e-06, "loss": 0.5619, "step": 5805 }, { "epoch": 0.74, "grad_norm": 1.9362802505493164, "learning_rate": 1.6241318254739346e-06, "loss": 0.5938, "step": 5806 }, { "epoch": 0.74, "grad_norm": 1.4348499774932861, "learning_rate": 1.6226014533574208e-06, "loss": 0.6009, "step": 5807 }, { "epoch": 0.74, "grad_norm": 1.4850698709487915, "learning_rate": 1.6210716629291584e-06, "loss": 0.6183, "step": 5808 }, { "epoch": 0.74, "grad_norm": 1.44414484500885, "learning_rate": 1.6195424544526228e-06, "loss": 0.6009, "step": 5809 }, { "epoch": 0.74, "grad_norm": 1.3758606910705566, "learning_rate": 1.6180138281911889e-06, "loss": 0.6444, "step": 5810 }, { "epoch": 0.74, "grad_norm": 1.1687239408493042, "learning_rate": 1.6164857844081316e-06, "loss": 0.4735, "step": 5811 }, { "epoch": 0.74, "grad_norm": 1.3271052837371826, "learning_rate": 1.6149583233666254e-06, "loss": 0.6004, "step": 5812 }, { "epoch": 0.74, "grad_norm": 1.2638598680496216, "learning_rate": 1.6134314453297467e-06, "loss": 0.7543, "step": 5813 }, { "epoch": 0.74, "grad_norm": 1.4526175260543823, "learning_rate": 1.611905150560465e-06, "loss": 0.5601, "step": 5814 }, { "epoch": 0.74, "grad_norm": 1.159395694732666, "learning_rate": 1.6103794393216554e-06, "loss": 0.5431, "step": 5815 }, { "epoch": 0.75, "grad_norm": 1.4904710054397583, "learning_rate": 1.6088543118760908e-06, "loss": 0.5712, "step": 5816 }, { "epoch": 0.75, "grad_norm": 1.4881311655044556, "learning_rate": 1.6073297684864453e-06, "loss": 0.5665, "step": 5817 }, { "epoch": 0.75, "grad_norm": 2.3589932918548584, "learning_rate": 1.6058058094152862e-06, "loss": 0.6032, "step": 5818 }, { "epoch": 0.75, "grad_norm": 1.4021669626235962, "learning_rate": 1.6042824349250873e-06, "loss": 0.6179, "step": 5819 }, { "epoch": 0.75, "grad_norm": 1.3111644983291626, "learning_rate": 1.6027596452782202e-06, "loss": 0.5615, "step": 5820 }, { "epoch": 0.75, "grad_norm": 1.3525159358978271, "learning_rate": 1.6012374407369514e-06, "loss": 0.5584, "step": 5821 }, { "epoch": 0.75, "grad_norm": 1.5664325952529907, "learning_rate": 1.5997158215634506e-06, "loss": 0.5334, "step": 5822 }, { "epoch": 0.75, "grad_norm": 1.4455913305282593, "learning_rate": 1.5981947880197862e-06, "loss": 0.6552, "step": 5823 }, { "epoch": 0.75, "grad_norm": 1.6153526306152344, "learning_rate": 1.596674340367927e-06, "loss": 0.5421, "step": 5824 }, { "epoch": 0.75, "grad_norm": 1.32578706741333, "learning_rate": 1.5951544788697354e-06, "loss": 0.621, "step": 5825 }, { "epoch": 0.75, "grad_norm": 1.0666056871414185, "learning_rate": 1.5936352037869796e-06, "loss": 0.6811, "step": 5826 }, { "epoch": 0.75, "grad_norm": 1.09562349319458, "learning_rate": 1.5921165153813222e-06, "loss": 0.6983, "step": 5827 }, { "epoch": 0.75, "grad_norm": 1.482304573059082, "learning_rate": 1.5905984139143277e-06, "loss": 0.5927, "step": 5828 }, { "epoch": 0.75, "grad_norm": 1.3417950868606567, "learning_rate": 1.5890808996474576e-06, "loss": 0.593, "step": 5829 }, { "epoch": 0.75, "grad_norm": 1.6582002639770508, "learning_rate": 1.5875639728420727e-06, "loss": 0.5227, "step": 5830 }, { "epoch": 0.75, "grad_norm": 1.452853798866272, "learning_rate": 1.586047633759435e-06, "loss": 0.6726, "step": 5831 }, { "epoch": 0.75, "grad_norm": 1.5506370067596436, "learning_rate": 1.5845318826606997e-06, "loss": 0.5796, "step": 5832 }, { "epoch": 0.75, "grad_norm": 1.3531577587127686, "learning_rate": 1.5830167198069256e-06, "loss": 0.6651, "step": 5833 }, { "epoch": 0.75, "grad_norm": 1.3325982093811035, "learning_rate": 1.581502145459069e-06, "loss": 0.6107, "step": 5834 }, { "epoch": 0.75, "grad_norm": 1.6698253154754639, "learning_rate": 1.5799881598779853e-06, "loss": 0.6907, "step": 5835 }, { "epoch": 0.75, "grad_norm": 1.4126887321472168, "learning_rate": 1.5784747633244257e-06, "loss": 0.6099, "step": 5836 }, { "epoch": 0.75, "grad_norm": 1.450058937072754, "learning_rate": 1.5769619560590426e-06, "loss": 0.6664, "step": 5837 }, { "epoch": 0.75, "grad_norm": 4.739638805389404, "learning_rate": 1.5754497383423866e-06, "loss": 0.5682, "step": 5838 }, { "epoch": 0.75, "grad_norm": 1.2308710813522339, "learning_rate": 1.573938110434909e-06, "loss": 0.568, "step": 5839 }, { "epoch": 0.75, "grad_norm": 1.7075868844985962, "learning_rate": 1.5724270725969521e-06, "loss": 0.5979, "step": 5840 }, { "epoch": 0.75, "grad_norm": 1.3854655027389526, "learning_rate": 1.570916625088764e-06, "loss": 0.5529, "step": 5841 }, { "epoch": 0.75, "grad_norm": 1.426127314567566, "learning_rate": 1.5694067681704888e-06, "loss": 0.5614, "step": 5842 }, { "epoch": 0.75, "grad_norm": 1.2803055047988892, "learning_rate": 1.5678975021021703e-06, "loss": 0.6027, "step": 5843 }, { "epoch": 0.75, "grad_norm": 1.465036392211914, "learning_rate": 1.5663888271437434e-06, "loss": 0.6433, "step": 5844 }, { "epoch": 0.75, "grad_norm": 1.2361197471618652, "learning_rate": 1.5648807435550518e-06, "loss": 0.643, "step": 5845 }, { "epoch": 0.75, "grad_norm": 1.3847153186798096, "learning_rate": 1.5633732515958322e-06, "loss": 0.6341, "step": 5846 }, { "epoch": 0.75, "grad_norm": 1.526636004447937, "learning_rate": 1.5618663515257166e-06, "loss": 0.5841, "step": 5847 }, { "epoch": 0.75, "grad_norm": 1.0692555904388428, "learning_rate": 1.5603600436042393e-06, "loss": 0.6289, "step": 5848 }, { "epoch": 0.75, "grad_norm": 1.4534244537353516, "learning_rate": 1.5588543280908309e-06, "loss": 0.5165, "step": 5849 }, { "epoch": 0.75, "grad_norm": 1.500308632850647, "learning_rate": 1.5573492052448226e-06, "loss": 0.6331, "step": 5850 }, { "epoch": 0.75, "grad_norm": 1.619732141494751, "learning_rate": 1.5558446753254374e-06, "loss": 0.6026, "step": 5851 }, { "epoch": 0.75, "grad_norm": 1.5240439176559448, "learning_rate": 1.5543407385918019e-06, "loss": 0.5902, "step": 5852 }, { "epoch": 0.75, "grad_norm": 1.284821629524231, "learning_rate": 1.5528373953029386e-06, "loss": 0.5572, "step": 5853 }, { "epoch": 0.75, "grad_norm": 1.4876408576965332, "learning_rate": 1.5513346457177692e-06, "loss": 0.55, "step": 5854 }, { "epoch": 0.75, "grad_norm": 1.397421956062317, "learning_rate": 1.5498324900951083e-06, "loss": 0.5822, "step": 5855 }, { "epoch": 0.75, "grad_norm": 1.2312592267990112, "learning_rate": 1.5483309286936743e-06, "loss": 0.5442, "step": 5856 }, { "epoch": 0.75, "grad_norm": 1.2165515422821045, "learning_rate": 1.54682996177208e-06, "loss": 0.6374, "step": 5857 }, { "epoch": 0.75, "grad_norm": 1.3601298332214355, "learning_rate": 1.5453295895888382e-06, "loss": 0.5892, "step": 5858 }, { "epoch": 0.75, "grad_norm": 1.664817214012146, "learning_rate": 1.5438298124023537e-06, "loss": 0.5797, "step": 5859 }, { "epoch": 0.75, "grad_norm": 2.5244088172912598, "learning_rate": 1.542330630470935e-06, "loss": 0.4757, "step": 5860 }, { "epoch": 0.75, "grad_norm": 1.2353243827819824, "learning_rate": 1.5408320440527874e-06, "loss": 0.5268, "step": 5861 }, { "epoch": 0.75, "grad_norm": 1.2477408647537231, "learning_rate": 1.5393340534060064e-06, "loss": 0.6622, "step": 5862 }, { "epoch": 0.75, "grad_norm": 1.1838089227676392, "learning_rate": 1.5378366587885956e-06, "loss": 0.5516, "step": 5863 }, { "epoch": 0.75, "grad_norm": 1.4416934251785278, "learning_rate": 1.5363398604584496e-06, "loss": 0.5599, "step": 5864 }, { "epoch": 0.75, "grad_norm": 1.2726426124572754, "learning_rate": 1.5348436586733623e-06, "loss": 0.5677, "step": 5865 }, { "epoch": 0.75, "grad_norm": 1.5306282043457031, "learning_rate": 1.5333480536910217e-06, "loss": 0.6393, "step": 5866 }, { "epoch": 0.75, "grad_norm": 1.202629566192627, "learning_rate": 1.5318530457690163e-06, "loss": 0.6512, "step": 5867 }, { "epoch": 0.75, "grad_norm": 1.521677851676941, "learning_rate": 1.5303586351648326e-06, "loss": 0.6393, "step": 5868 }, { "epoch": 0.75, "grad_norm": 1.428450107574463, "learning_rate": 1.5288648221358488e-06, "loss": 0.6125, "step": 5869 }, { "epoch": 0.75, "grad_norm": 1.4109379053115845, "learning_rate": 1.5273716069393467e-06, "loss": 0.5075, "step": 5870 }, { "epoch": 0.75, "grad_norm": 1.4165418148040771, "learning_rate": 1.525878989832501e-06, "loss": 0.6775, "step": 5871 }, { "epoch": 0.75, "grad_norm": 1.4802438020706177, "learning_rate": 1.5243869710723875e-06, "loss": 0.6267, "step": 5872 }, { "epoch": 0.75, "grad_norm": 1.338913917541504, "learning_rate": 1.5228955509159715e-06, "loss": 0.6234, "step": 5873 }, { "epoch": 0.75, "grad_norm": 1.6580629348754883, "learning_rate": 1.5214047296201218e-06, "loss": 0.5374, "step": 5874 }, { "epoch": 0.75, "grad_norm": 1.2051814794540405, "learning_rate": 1.5199145074416028e-06, "loss": 0.5343, "step": 5875 }, { "epoch": 0.75, "grad_norm": 1.2296867370605469, "learning_rate": 1.5184248846370763e-06, "loss": 0.6521, "step": 5876 }, { "epoch": 0.75, "grad_norm": 2.1033120155334473, "learning_rate": 1.5169358614630958e-06, "loss": 0.66, "step": 5877 }, { "epoch": 0.75, "grad_norm": 1.2728708982467651, "learning_rate": 1.5154474381761175e-06, "loss": 0.6069, "step": 5878 }, { "epoch": 0.75, "grad_norm": 1.303300380706787, "learning_rate": 1.5139596150324915e-06, "loss": 0.5893, "step": 5879 }, { "epoch": 0.75, "grad_norm": 1.422898530960083, "learning_rate": 1.5124723922884655e-06, "loss": 0.5823, "step": 5880 }, { "epoch": 0.75, "grad_norm": 1.917449712753296, "learning_rate": 1.5109857702001834e-06, "loss": 0.6907, "step": 5881 }, { "epoch": 0.75, "grad_norm": 1.2992802858352661, "learning_rate": 1.5094997490236857e-06, "loss": 0.6293, "step": 5882 }, { "epoch": 0.75, "grad_norm": 1.5122466087341309, "learning_rate": 1.50801432901491e-06, "loss": 0.6306, "step": 5883 }, { "epoch": 0.75, "grad_norm": 1.4652949571609497, "learning_rate": 1.5065295104296884e-06, "loss": 0.5773, "step": 5884 }, { "epoch": 0.75, "grad_norm": 1.2318650484085083, "learning_rate": 1.5050452935237502e-06, "loss": 0.5216, "step": 5885 }, { "epoch": 0.75, "grad_norm": 1.5187735557556152, "learning_rate": 1.5035616785527235e-06, "loss": 0.662, "step": 5886 }, { "epoch": 0.75, "grad_norm": 1.2845804691314697, "learning_rate": 1.5020786657721309e-06, "loss": 0.5735, "step": 5887 }, { "epoch": 0.75, "grad_norm": 1.2593872547149658, "learning_rate": 1.5005962554373887e-06, "loss": 0.5358, "step": 5888 }, { "epoch": 0.75, "grad_norm": 1.33612060546875, "learning_rate": 1.4991144478038133e-06, "loss": 0.5702, "step": 5889 }, { "epoch": 0.75, "grad_norm": 1.3789081573486328, "learning_rate": 1.497633243126616e-06, "loss": 0.5369, "step": 5890 }, { "epoch": 0.75, "grad_norm": 4.179154396057129, "learning_rate": 1.4961526416609062e-06, "loss": 0.6509, "step": 5891 }, { "epoch": 0.75, "grad_norm": 1.2960028648376465, "learning_rate": 1.4946726436616844e-06, "loss": 0.4924, "step": 5892 }, { "epoch": 0.75, "grad_norm": 1.3278669118881226, "learning_rate": 1.493193249383851e-06, "loss": 0.6282, "step": 5893 }, { "epoch": 0.76, "grad_norm": 1.4293935298919678, "learning_rate": 1.4917144590822015e-06, "loss": 0.6909, "step": 5894 }, { "epoch": 0.76, "grad_norm": 1.6118907928466797, "learning_rate": 1.4902362730114296e-06, "loss": 0.6674, "step": 5895 }, { "epoch": 0.76, "grad_norm": 1.550476312637329, "learning_rate": 1.4887586914261175e-06, "loss": 0.5613, "step": 5896 }, { "epoch": 0.76, "grad_norm": 1.3202171325683594, "learning_rate": 1.4872817145807545e-06, "loss": 0.5946, "step": 5897 }, { "epoch": 0.76, "grad_norm": 1.505103588104248, "learning_rate": 1.4858053427297186e-06, "loss": 0.5916, "step": 5898 }, { "epoch": 0.76, "grad_norm": 1.4744017124176025, "learning_rate": 1.4843295761272824e-06, "loss": 0.6262, "step": 5899 }, { "epoch": 0.76, "grad_norm": 1.3989981412887573, "learning_rate": 1.482854415027618e-06, "loss": 0.6148, "step": 5900 }, { "epoch": 0.76, "grad_norm": 1.4221875667572021, "learning_rate": 1.4813798596847916e-06, "loss": 0.57, "step": 5901 }, { "epoch": 0.76, "grad_norm": 1.3431564569473267, "learning_rate": 1.479905910352768e-06, "loss": 0.5682, "step": 5902 }, { "epoch": 0.76, "grad_norm": 1.2961328029632568, "learning_rate": 1.4784325672854005e-06, "loss": 0.6361, "step": 5903 }, { "epoch": 0.76, "grad_norm": 1.5604289770126343, "learning_rate": 1.4769598307364452e-06, "loss": 0.6184, "step": 5904 }, { "epoch": 0.76, "grad_norm": 1.340232014656067, "learning_rate": 1.4754877009595509e-06, "loss": 0.5844, "step": 5905 }, { "epoch": 0.76, "grad_norm": 1.2040913105010986, "learning_rate": 1.4740161782082623e-06, "loss": 0.6196, "step": 5906 }, { "epoch": 0.76, "grad_norm": 1.2919450998306274, "learning_rate": 1.4725452627360182e-06, "loss": 0.623, "step": 5907 }, { "epoch": 0.76, "grad_norm": 1.9808508157730103, "learning_rate": 1.471074954796154e-06, "loss": 0.6147, "step": 5908 }, { "epoch": 0.76, "grad_norm": 1.3140724897384644, "learning_rate": 1.4696052546419005e-06, "loss": 0.5737, "step": 5909 }, { "epoch": 0.76, "grad_norm": 1.031933069229126, "learning_rate": 1.4681361625263857e-06, "loss": 0.6697, "step": 5910 }, { "epoch": 0.76, "grad_norm": 1.5908561944961548, "learning_rate": 1.4666676787026273e-06, "loss": 0.6361, "step": 5911 }, { "epoch": 0.76, "grad_norm": 1.4421895742416382, "learning_rate": 1.4651998034235438e-06, "loss": 0.6496, "step": 5912 }, { "epoch": 0.76, "grad_norm": 2.0249710083007812, "learning_rate": 1.463732536941947e-06, "loss": 0.6466, "step": 5913 }, { "epoch": 0.76, "grad_norm": 1.8768399953842163, "learning_rate": 1.4622658795105427e-06, "loss": 0.693, "step": 5914 }, { "epoch": 0.76, "grad_norm": 1.4486182928085327, "learning_rate": 1.460799831381934e-06, "loss": 0.5236, "step": 5915 }, { "epoch": 0.76, "grad_norm": 1.1291416883468628, "learning_rate": 1.4593343928086185e-06, "loss": 0.6452, "step": 5916 }, { "epoch": 0.76, "grad_norm": 1.273863434791565, "learning_rate": 1.4578695640429863e-06, "loss": 0.5713, "step": 5917 }, { "epoch": 0.76, "grad_norm": 1.5281238555908203, "learning_rate": 1.4564053453373245e-06, "loss": 0.6862, "step": 5918 }, { "epoch": 0.76, "grad_norm": 1.6069231033325195, "learning_rate": 1.4549417369438162e-06, "loss": 0.5982, "step": 5919 }, { "epoch": 0.76, "grad_norm": 1.2721714973449707, "learning_rate": 1.4534787391145388e-06, "loss": 0.5969, "step": 5920 }, { "epoch": 0.76, "grad_norm": 1.1203700304031372, "learning_rate": 1.452016352101462e-06, "loss": 0.5516, "step": 5921 }, { "epoch": 0.76, "grad_norm": 1.6137348413467407, "learning_rate": 1.4505545761564526e-06, "loss": 0.6058, "step": 5922 }, { "epoch": 0.76, "grad_norm": 1.4966486692428589, "learning_rate": 1.4490934115312721e-06, "loss": 0.6258, "step": 5923 }, { "epoch": 0.76, "grad_norm": 1.3931201696395874, "learning_rate": 1.4476328584775784e-06, "loss": 0.5624, "step": 5924 }, { "epoch": 0.76, "grad_norm": 1.4084396362304688, "learning_rate": 1.4461729172469191e-06, "loss": 0.52, "step": 5925 }, { "epoch": 0.76, "grad_norm": 1.3359655141830444, "learning_rate": 1.4447135880907397e-06, "loss": 0.5936, "step": 5926 }, { "epoch": 0.76, "grad_norm": 1.4408459663391113, "learning_rate": 1.4432548712603816e-06, "loss": 0.5981, "step": 5927 }, { "epoch": 0.76, "grad_norm": 1.4925096035003662, "learning_rate": 1.4417967670070799e-06, "loss": 0.556, "step": 5928 }, { "epoch": 0.76, "grad_norm": 2.2388222217559814, "learning_rate": 1.44033927558196e-06, "loss": 0.5848, "step": 5929 }, { "epoch": 0.76, "grad_norm": 1.3931220769882202, "learning_rate": 1.4388823972360455e-06, "loss": 0.5938, "step": 5930 }, { "epoch": 0.76, "grad_norm": 1.208591103553772, "learning_rate": 1.4374261322202592e-06, "loss": 0.6013, "step": 5931 }, { "epoch": 0.76, "grad_norm": 1.465299129486084, "learning_rate": 1.435970480785408e-06, "loss": 0.6509, "step": 5932 }, { "epoch": 0.76, "grad_norm": 1.5342435836791992, "learning_rate": 1.4345154431821995e-06, "loss": 0.636, "step": 5933 }, { "epoch": 0.76, "grad_norm": 1.555898666381836, "learning_rate": 1.4330610196612348e-06, "loss": 0.5484, "step": 5934 }, { "epoch": 0.76, "grad_norm": 1.423935055732727, "learning_rate": 1.4316072104730106e-06, "loss": 0.5434, "step": 5935 }, { "epoch": 0.76, "grad_norm": 1.2774029970169067, "learning_rate": 1.4301540158679123e-06, "loss": 0.5129, "step": 5936 }, { "epoch": 0.76, "grad_norm": 1.258390188217163, "learning_rate": 1.4287014360962247e-06, "loss": 0.5213, "step": 5937 }, { "epoch": 0.76, "grad_norm": 1.3220731019973755, "learning_rate": 1.427249471408126e-06, "loss": 0.5337, "step": 5938 }, { "epoch": 0.76, "grad_norm": 1.4194802045822144, "learning_rate": 1.4257981220536883e-06, "loss": 0.567, "step": 5939 }, { "epoch": 0.76, "grad_norm": 1.372889757156372, "learning_rate": 1.424347388282874e-06, "loss": 0.6144, "step": 5940 }, { "epoch": 0.76, "grad_norm": 1.7539900541305542, "learning_rate": 1.4228972703455441e-06, "loss": 0.6644, "step": 5941 }, { "epoch": 0.76, "grad_norm": 1.544344186782837, "learning_rate": 1.4214477684914524e-06, "loss": 0.6441, "step": 5942 }, { "epoch": 0.76, "grad_norm": 1.3730299472808838, "learning_rate": 1.4199988829702472e-06, "loss": 0.5675, "step": 5943 }, { "epoch": 0.76, "grad_norm": 1.2083449363708496, "learning_rate": 1.4185506140314665e-06, "loss": 0.5938, "step": 5944 }, { "epoch": 0.76, "grad_norm": 1.5624059438705444, "learning_rate": 1.4171029619245468e-06, "loss": 0.6969, "step": 5945 }, { "epoch": 0.76, "grad_norm": 1.3644176721572876, "learning_rate": 1.4156559268988168e-06, "loss": 0.5768, "step": 5946 }, { "epoch": 0.76, "grad_norm": 1.3280311822891235, "learning_rate": 1.414209509203499e-06, "loss": 0.6175, "step": 5947 }, { "epoch": 0.76, "grad_norm": 1.4516396522521973, "learning_rate": 1.4127637090877094e-06, "loss": 0.5231, "step": 5948 }, { "epoch": 0.76, "grad_norm": 1.298318862915039, "learning_rate": 1.4113185268004576e-06, "loss": 0.5446, "step": 5949 }, { "epoch": 0.76, "grad_norm": 1.2986234426498413, "learning_rate": 1.4098739625906482e-06, "loss": 0.5896, "step": 5950 }, { "epoch": 0.76, "grad_norm": 1.3965665102005005, "learning_rate": 1.4084300167070758e-06, "loss": 0.6212, "step": 5951 }, { "epoch": 0.76, "grad_norm": 1.5937730073928833, "learning_rate": 1.4069866893984307e-06, "loss": 0.5914, "step": 5952 }, { "epoch": 0.76, "grad_norm": 2.7656188011169434, "learning_rate": 1.4055439809132987e-06, "loss": 0.5545, "step": 5953 }, { "epoch": 0.76, "grad_norm": 1.46426260471344, "learning_rate": 1.4041018915001564e-06, "loss": 0.58, "step": 5954 }, { "epoch": 0.76, "grad_norm": 1.5463647842407227, "learning_rate": 1.4026604214073731e-06, "loss": 0.5886, "step": 5955 }, { "epoch": 0.76, "grad_norm": 1.4944566488265991, "learning_rate": 1.4012195708832132e-06, "loss": 0.5805, "step": 5956 }, { "epoch": 0.76, "grad_norm": 1.733450174331665, "learning_rate": 1.3997793401758347e-06, "loss": 0.5651, "step": 5957 }, { "epoch": 0.76, "grad_norm": 1.2912808656692505, "learning_rate": 1.398339729533289e-06, "loss": 0.6123, "step": 5958 }, { "epoch": 0.76, "grad_norm": 1.4565380811691284, "learning_rate": 1.3969007392035166e-06, "loss": 0.656, "step": 5959 }, { "epoch": 0.76, "grad_norm": 1.2773964405059814, "learning_rate": 1.3954623694343566e-06, "loss": 0.5466, "step": 5960 }, { "epoch": 0.76, "grad_norm": 1.2596515417099, "learning_rate": 1.3940246204735398e-06, "loss": 0.5493, "step": 5961 }, { "epoch": 0.76, "grad_norm": 1.3928008079528809, "learning_rate": 1.392587492568686e-06, "loss": 0.5857, "step": 5962 }, { "epoch": 0.76, "grad_norm": 1.3344323635101318, "learning_rate": 1.3911509859673138e-06, "loss": 0.5673, "step": 5963 }, { "epoch": 0.76, "grad_norm": 1.650927186012268, "learning_rate": 1.3897151009168297e-06, "loss": 0.5837, "step": 5964 }, { "epoch": 0.76, "grad_norm": 1.2089368104934692, "learning_rate": 1.3882798376645413e-06, "loss": 0.5227, "step": 5965 }, { "epoch": 0.76, "grad_norm": 1.2477643489837646, "learning_rate": 1.386845196457638e-06, "loss": 0.5825, "step": 5966 }, { "epoch": 0.76, "grad_norm": 1.5391136407852173, "learning_rate": 1.3854111775432093e-06, "loss": 0.6396, "step": 5967 }, { "epoch": 0.76, "grad_norm": 1.506758213043213, "learning_rate": 1.383977781168238e-06, "loss": 0.5131, "step": 5968 }, { "epoch": 0.76, "grad_norm": 1.46280038356781, "learning_rate": 1.3825450075795937e-06, "loss": 0.5954, "step": 5969 }, { "epoch": 0.76, "grad_norm": 1.0830283164978027, "learning_rate": 1.3811128570240445e-06, "loss": 0.6811, "step": 5970 }, { "epoch": 0.76, "grad_norm": 1.7907627820968628, "learning_rate": 1.3796813297482486e-06, "loss": 0.6008, "step": 5971 }, { "epoch": 0.77, "grad_norm": 1.3350781202316284, "learning_rate": 1.3782504259987601e-06, "loss": 0.538, "step": 5972 }, { "epoch": 0.77, "grad_norm": 1.362600326538086, "learning_rate": 1.3768201460220187e-06, "loss": 0.5904, "step": 5973 }, { "epoch": 0.77, "grad_norm": 1.149887204170227, "learning_rate": 1.375390490064364e-06, "loss": 0.5439, "step": 5974 }, { "epoch": 0.77, "grad_norm": 1.1738979816436768, "learning_rate": 1.373961458372024e-06, "loss": 0.535, "step": 5975 }, { "epoch": 0.77, "grad_norm": 1.3399025201797485, "learning_rate": 1.3725330511911223e-06, "loss": 0.5719, "step": 5976 }, { "epoch": 0.77, "grad_norm": 1.3920693397521973, "learning_rate": 1.3711052687676701e-06, "loss": 0.6096, "step": 5977 }, { "epoch": 0.77, "grad_norm": 1.3506783246994019, "learning_rate": 1.3696781113475754e-06, "loss": 0.594, "step": 5978 }, { "epoch": 0.77, "grad_norm": 1.2706949710845947, "learning_rate": 1.3682515791766372e-06, "loss": 0.5891, "step": 5979 }, { "epoch": 0.77, "grad_norm": 1.2764166593551636, "learning_rate": 1.3668256725005475e-06, "loss": 0.6208, "step": 5980 }, { "epoch": 0.77, "grad_norm": 1.4557310342788696, "learning_rate": 1.3654003915648873e-06, "loss": 0.558, "step": 5981 }, { "epoch": 0.77, "grad_norm": 1.1343717575073242, "learning_rate": 1.3639757366151323e-06, "loss": 0.5511, "step": 5982 }, { "epoch": 0.77, "grad_norm": 1.4977566003799438, "learning_rate": 1.3625517078966544e-06, "loss": 0.5928, "step": 5983 }, { "epoch": 0.77, "grad_norm": 1.3123750686645508, "learning_rate": 1.3611283056547097e-06, "loss": 0.5015, "step": 5984 }, { "epoch": 0.77, "grad_norm": 1.268973469734192, "learning_rate": 1.3597055301344515e-06, "loss": 0.6172, "step": 5985 }, { "epoch": 0.77, "grad_norm": 1.3133175373077393, "learning_rate": 1.3582833815809244e-06, "loss": 0.6634, "step": 5986 }, { "epoch": 0.77, "grad_norm": 1.5407078266143799, "learning_rate": 1.356861860239065e-06, "loss": 0.6416, "step": 5987 }, { "epoch": 0.77, "grad_norm": 1.2365789413452148, "learning_rate": 1.3554409663536993e-06, "loss": 0.6078, "step": 5988 }, { "epoch": 0.77, "grad_norm": 1.4890007972717285, "learning_rate": 1.3540207001695489e-06, "loss": 0.5228, "step": 5989 }, { "epoch": 0.77, "grad_norm": 1.1936070919036865, "learning_rate": 1.3526010619312252e-06, "loss": 0.6939, "step": 5990 }, { "epoch": 0.77, "grad_norm": 1.4938281774520874, "learning_rate": 1.3511820518832342e-06, "loss": 0.628, "step": 5991 }, { "epoch": 0.77, "grad_norm": 1.7732173204421997, "learning_rate": 1.3497636702699684e-06, "loss": 0.6403, "step": 5992 }, { "epoch": 0.77, "grad_norm": 1.5048422813415527, "learning_rate": 1.3483459173357167e-06, "loss": 0.6572, "step": 5993 }, { "epoch": 0.77, "grad_norm": 1.2302240133285522, "learning_rate": 1.3469287933246577e-06, "loss": 0.562, "step": 5994 }, { "epoch": 0.77, "grad_norm": 1.197856068611145, "learning_rate": 1.3455122984808644e-06, "loss": 0.677, "step": 5995 }, { "epoch": 0.77, "grad_norm": 1.3732661008834839, "learning_rate": 1.3440964330482958e-06, "loss": 0.6358, "step": 5996 }, { "epoch": 0.77, "grad_norm": 1.2321995496749878, "learning_rate": 1.3426811972708076e-06, "loss": 0.6068, "step": 5997 }, { "epoch": 0.77, "grad_norm": 1.1939703226089478, "learning_rate": 1.3412665913921451e-06, "loss": 0.5675, "step": 5998 }, { "epoch": 0.77, "grad_norm": 1.3941285610198975, "learning_rate": 1.339852615655946e-06, "loss": 0.612, "step": 5999 }, { "epoch": 0.77, "grad_norm": 1.4940359592437744, "learning_rate": 1.3384392703057386e-06, "loss": 0.653, "step": 6000 }, { "epoch": 0.77, "grad_norm": 1.482255220413208, "learning_rate": 1.3370265555849427e-06, "loss": 0.495, "step": 6001 }, { "epoch": 0.77, "grad_norm": 1.4018694162368774, "learning_rate": 1.335614471736872e-06, "loss": 0.6908, "step": 6002 }, { "epoch": 0.77, "grad_norm": 1.2565648555755615, "learning_rate": 1.334203019004725e-06, "loss": 0.5861, "step": 6003 }, { "epoch": 0.77, "grad_norm": 1.4649759531021118, "learning_rate": 1.3327921976315977e-06, "loss": 0.5378, "step": 6004 }, { "epoch": 0.77, "grad_norm": 1.391389012336731, "learning_rate": 1.3313820078604761e-06, "loss": 0.6189, "step": 6005 }, { "epoch": 0.77, "grad_norm": 1.4627043008804321, "learning_rate": 1.3299724499342376e-06, "loss": 0.6309, "step": 6006 }, { "epoch": 0.77, "grad_norm": 1.310528039932251, "learning_rate": 1.3285635240956469e-06, "loss": 0.6002, "step": 6007 }, { "epoch": 0.77, "grad_norm": 1.153878092765808, "learning_rate": 1.3271552305873648e-06, "loss": 0.6072, "step": 6008 }, { "epoch": 0.77, "grad_norm": 1.3677489757537842, "learning_rate": 1.3257475696519417e-06, "loss": 0.6208, "step": 6009 }, { "epoch": 0.77, "grad_norm": 1.3258711099624634, "learning_rate": 1.3243405415318166e-06, "loss": 0.5862, "step": 6010 }, { "epoch": 0.77, "grad_norm": 1.423606038093567, "learning_rate": 1.322934146469323e-06, "loss": 0.5903, "step": 6011 }, { "epoch": 0.77, "grad_norm": 1.480432152748108, "learning_rate": 1.3215283847066828e-06, "loss": 0.4928, "step": 6012 }, { "epoch": 0.77, "grad_norm": 1.5109148025512695, "learning_rate": 1.3201232564860128e-06, "loss": 0.6378, "step": 6013 }, { "epoch": 0.77, "grad_norm": 1.531599998474121, "learning_rate": 1.3187187620493137e-06, "loss": 0.6011, "step": 6014 }, { "epoch": 0.77, "grad_norm": 1.9274752140045166, "learning_rate": 1.3173149016384829e-06, "loss": 0.5542, "step": 6015 }, { "epoch": 0.77, "grad_norm": 1.15962553024292, "learning_rate": 1.3159116754953073e-06, "loss": 0.5979, "step": 6016 }, { "epoch": 0.77, "grad_norm": 1.3300819396972656, "learning_rate": 1.3145090838614633e-06, "loss": 0.538, "step": 6017 }, { "epoch": 0.77, "grad_norm": 1.3785098791122437, "learning_rate": 1.3131071269785196e-06, "loss": 0.6167, "step": 6018 }, { "epoch": 0.77, "grad_norm": 1.139225721359253, "learning_rate": 1.3117058050879344e-06, "loss": 0.7076, "step": 6019 }, { "epoch": 0.77, "grad_norm": 1.5697603225708008, "learning_rate": 1.3103051184310577e-06, "loss": 0.614, "step": 6020 }, { "epoch": 0.77, "grad_norm": 1.3997340202331543, "learning_rate": 1.3089050672491278e-06, "loss": 0.5386, "step": 6021 }, { "epoch": 0.77, "grad_norm": 1.4354143142700195, "learning_rate": 1.3075056517832758e-06, "loss": 0.6331, "step": 6022 }, { "epoch": 0.77, "grad_norm": 1.3406672477722168, "learning_rate": 1.306106872274522e-06, "loss": 0.5646, "step": 6023 }, { "epoch": 0.77, "grad_norm": 1.3164324760437012, "learning_rate": 1.30470872896378e-06, "loss": 0.582, "step": 6024 }, { "epoch": 0.77, "grad_norm": 1.4579137563705444, "learning_rate": 1.3033112220918482e-06, "loss": 0.6018, "step": 6025 }, { "epoch": 0.77, "grad_norm": 1.4018653631210327, "learning_rate": 1.3019143518994203e-06, "loss": 0.6285, "step": 6026 }, { "epoch": 0.77, "grad_norm": 1.6690030097961426, "learning_rate": 1.3005181186270794e-06, "loss": 0.6455, "step": 6027 }, { "epoch": 0.77, "grad_norm": 1.20956552028656, "learning_rate": 1.2991225225152988e-06, "loss": 0.5629, "step": 6028 }, { "epoch": 0.77, "grad_norm": 1.3824257850646973, "learning_rate": 1.297727563804439e-06, "loss": 0.5708, "step": 6029 }, { "epoch": 0.77, "grad_norm": 1.7339993715286255, "learning_rate": 1.2963332427347552e-06, "loss": 0.6303, "step": 6030 }, { "epoch": 0.77, "grad_norm": 1.2081109285354614, "learning_rate": 1.2949395595463904e-06, "loss": 0.6148, "step": 6031 }, { "epoch": 0.77, "grad_norm": 1.271765947341919, "learning_rate": 1.2935465144793797e-06, "loss": 0.5637, "step": 6032 }, { "epoch": 0.77, "grad_norm": 1.3769280910491943, "learning_rate": 1.2921541077736428e-06, "loss": 0.565, "step": 6033 }, { "epoch": 0.77, "grad_norm": 1.288026213645935, "learning_rate": 1.2907623396689973e-06, "loss": 0.5762, "step": 6034 }, { "epoch": 0.77, "grad_norm": 2.817467451095581, "learning_rate": 1.289371210405147e-06, "loss": 0.6836, "step": 6035 }, { "epoch": 0.77, "grad_norm": 1.3072487115859985, "learning_rate": 1.2879807202216826e-06, "loss": 0.5799, "step": 6036 }, { "epoch": 0.77, "grad_norm": 1.3482693433761597, "learning_rate": 1.2865908693580903e-06, "loss": 0.6028, "step": 6037 }, { "epoch": 0.77, "grad_norm": 1.5620173215866089, "learning_rate": 1.2852016580537424e-06, "loss": 0.5214, "step": 6038 }, { "epoch": 0.77, "grad_norm": 1.521825909614563, "learning_rate": 1.2838130865479049e-06, "loss": 0.6226, "step": 6039 }, { "epoch": 0.77, "grad_norm": 1.7323329448699951, "learning_rate": 1.2824251550797268e-06, "loss": 0.5745, "step": 6040 }, { "epoch": 0.77, "grad_norm": 1.4529424905776978, "learning_rate": 1.281037863888253e-06, "loss": 0.6783, "step": 6041 }, { "epoch": 0.77, "grad_norm": 1.3162099123001099, "learning_rate": 1.2796512132124172e-06, "loss": 0.6138, "step": 6042 }, { "epoch": 0.77, "grad_norm": 1.3417941331863403, "learning_rate": 1.278265203291042e-06, "loss": 0.651, "step": 6043 }, { "epoch": 0.77, "grad_norm": 1.2643450498580933, "learning_rate": 1.2768798343628368e-06, "loss": 0.5403, "step": 6044 }, { "epoch": 0.77, "grad_norm": 1.5022494792938232, "learning_rate": 1.2754951066664056e-06, "loss": 0.592, "step": 6045 }, { "epoch": 0.77, "grad_norm": 1.4565330743789673, "learning_rate": 1.274111020440238e-06, "loss": 0.5484, "step": 6046 }, { "epoch": 0.77, "grad_norm": 1.3392274379730225, "learning_rate": 1.2727275759227175e-06, "loss": 0.6221, "step": 6047 }, { "epoch": 0.77, "grad_norm": 1.425985336303711, "learning_rate": 1.271344773352111e-06, "loss": 0.5557, "step": 6048 }, { "epoch": 0.77, "grad_norm": 1.3691293001174927, "learning_rate": 1.2699626129665798e-06, "loss": 0.6133, "step": 6049 }, { "epoch": 0.78, "grad_norm": 1.5187207460403442, "learning_rate": 1.2685810950041722e-06, "loss": 0.6545, "step": 6050 }, { "epoch": 0.78, "grad_norm": 1.3241124153137207, "learning_rate": 1.267200219702827e-06, "loss": 0.5776, "step": 6051 }, { "epoch": 0.78, "grad_norm": 1.4314247369766235, "learning_rate": 1.2658199873003723e-06, "loss": 0.5092, "step": 6052 }, { "epoch": 0.78, "grad_norm": 1.4745780229568481, "learning_rate": 1.264440398034525e-06, "loss": 0.561, "step": 6053 }, { "epoch": 0.78, "grad_norm": 1.2822953462600708, "learning_rate": 1.2630614521428919e-06, "loss": 0.601, "step": 6054 }, { "epoch": 0.78, "grad_norm": 1.345860242843628, "learning_rate": 1.2616831498629668e-06, "loss": 0.5509, "step": 6055 }, { "epoch": 0.78, "grad_norm": 1.3719156980514526, "learning_rate": 1.2603054914321343e-06, "loss": 0.5773, "step": 6056 }, { "epoch": 0.78, "grad_norm": 1.6491966247558594, "learning_rate": 1.2589284770876692e-06, "loss": 0.5813, "step": 6057 }, { "epoch": 0.78, "grad_norm": 1.3380208015441895, "learning_rate": 1.2575521070667352e-06, "loss": 0.6066, "step": 6058 }, { "epoch": 0.78, "grad_norm": 1.6886008977890015, "learning_rate": 1.256176381606381e-06, "loss": 0.6094, "step": 6059 }, { "epoch": 0.78, "grad_norm": 1.4857572317123413, "learning_rate": 1.2548013009435495e-06, "loss": 0.5453, "step": 6060 }, { "epoch": 0.78, "grad_norm": 1.2749769687652588, "learning_rate": 1.2534268653150705e-06, "loss": 0.5806, "step": 6061 }, { "epoch": 0.78, "grad_norm": 1.563644528388977, "learning_rate": 1.2520530749576615e-06, "loss": 0.6023, "step": 6062 }, { "epoch": 0.78, "grad_norm": 1.2830228805541992, "learning_rate": 1.2506799301079298e-06, "loss": 0.5755, "step": 6063 }, { "epoch": 0.78, "grad_norm": 1.7365049123764038, "learning_rate": 1.2493074310023728e-06, "loss": 0.5773, "step": 6064 }, { "epoch": 0.78, "grad_norm": 1.2732226848602295, "learning_rate": 1.2479355778773761e-06, "loss": 0.5507, "step": 6065 }, { "epoch": 0.78, "grad_norm": 1.5290393829345703, "learning_rate": 1.2465643709692115e-06, "loss": 0.6324, "step": 6066 }, { "epoch": 0.78, "grad_norm": 2.335749387741089, "learning_rate": 1.245193810514041e-06, "loss": 0.6078, "step": 6067 }, { "epoch": 0.78, "grad_norm": 1.4709378480911255, "learning_rate": 1.2438238967479205e-06, "loss": 0.6015, "step": 6068 }, { "epoch": 0.78, "grad_norm": 1.3493144512176514, "learning_rate": 1.2424546299067846e-06, "loss": 0.6496, "step": 6069 }, { "epoch": 0.78, "grad_norm": 1.2655116319656372, "learning_rate": 1.2410860102264644e-06, "loss": 0.5568, "step": 6070 }, { "epoch": 0.78, "grad_norm": 1.2244620323181152, "learning_rate": 1.239718037942676e-06, "loss": 0.6423, "step": 6071 }, { "epoch": 0.78, "grad_norm": 1.434319019317627, "learning_rate": 1.238350713291026e-06, "loss": 0.6144, "step": 6072 }, { "epoch": 0.78, "grad_norm": 1.324418067932129, "learning_rate": 1.2369840365070063e-06, "loss": 0.5737, "step": 6073 }, { "epoch": 0.78, "grad_norm": 1.387671709060669, "learning_rate": 1.235618007826e-06, "loss": 0.6031, "step": 6074 }, { "epoch": 0.78, "grad_norm": 1.4040278196334839, "learning_rate": 1.2342526274832778e-06, "loss": 0.6111, "step": 6075 }, { "epoch": 0.78, "grad_norm": 1.373987078666687, "learning_rate": 1.2328878957140006e-06, "loss": 0.5173, "step": 6076 }, { "epoch": 0.78, "grad_norm": 1.1661827564239502, "learning_rate": 1.2315238127532126e-06, "loss": 0.5731, "step": 6077 }, { "epoch": 0.78, "grad_norm": 1.3571733236312866, "learning_rate": 1.2301603788358501e-06, "loss": 0.6028, "step": 6078 }, { "epoch": 0.78, "grad_norm": 1.2105902433395386, "learning_rate": 1.2287975941967377e-06, "loss": 0.5522, "step": 6079 }, { "epoch": 0.78, "grad_norm": 1.1787046194076538, "learning_rate": 1.2274354590705888e-06, "loss": 0.5634, "step": 6080 }, { "epoch": 0.78, "grad_norm": 1.513954520225525, "learning_rate": 1.2260739736920002e-06, "loss": 0.5923, "step": 6081 }, { "epoch": 0.78, "grad_norm": 1.3857628107070923, "learning_rate": 1.224713138295462e-06, "loss": 0.5761, "step": 6082 }, { "epoch": 0.78, "grad_norm": 2.2354791164398193, "learning_rate": 1.22335295311535e-06, "loss": 0.5485, "step": 6083 }, { "epoch": 0.78, "grad_norm": 1.4245437383651733, "learning_rate": 1.2219934183859283e-06, "loss": 0.6352, "step": 6084 }, { "epoch": 0.78, "grad_norm": 1.455147385597229, "learning_rate": 1.22063453434135e-06, "loss": 0.6363, "step": 6085 }, { "epoch": 0.78, "grad_norm": 1.2811365127563477, "learning_rate": 1.2192763012156544e-06, "loss": 0.5653, "step": 6086 }, { "epoch": 0.78, "grad_norm": 1.1058675050735474, "learning_rate": 1.2179187192427716e-06, "loss": 0.708, "step": 6087 }, { "epoch": 0.78, "grad_norm": 1.4116437435150146, "learning_rate": 1.216561788656514e-06, "loss": 0.6089, "step": 6088 }, { "epoch": 0.78, "grad_norm": 1.2147979736328125, "learning_rate": 1.2152055096905868e-06, "loss": 0.5706, "step": 6089 }, { "epoch": 0.78, "grad_norm": 1.238149881362915, "learning_rate": 1.2138498825785822e-06, "loss": 0.5763, "step": 6090 }, { "epoch": 0.78, "grad_norm": 1.7066261768341064, "learning_rate": 1.21249490755398e-06, "loss": 0.5693, "step": 6091 }, { "epoch": 0.78, "grad_norm": 1.3570530414581299, "learning_rate": 1.2111405848501451e-06, "loss": 0.6234, "step": 6092 }, { "epoch": 0.78, "grad_norm": 1.259835958480835, "learning_rate": 1.2097869147003328e-06, "loss": 0.5633, "step": 6093 }, { "epoch": 0.78, "grad_norm": 1.6666969060897827, "learning_rate": 1.2084338973376853e-06, "loss": 0.575, "step": 6094 }, { "epoch": 0.78, "grad_norm": 0.9863353967666626, "learning_rate": 1.207081532995234e-06, "loss": 0.6823, "step": 6095 }, { "epoch": 0.78, "grad_norm": 1.477820873260498, "learning_rate": 1.2057298219058933e-06, "loss": 0.6011, "step": 6096 }, { "epoch": 0.78, "grad_norm": 1.3085917234420776, "learning_rate": 1.204378764302469e-06, "loss": 0.5428, "step": 6097 }, { "epoch": 0.78, "grad_norm": 1.393518090248108, "learning_rate": 1.2030283604176545e-06, "loss": 0.6187, "step": 6098 }, { "epoch": 0.78, "grad_norm": 1.3972924947738647, "learning_rate": 1.2016786104840296e-06, "loss": 0.6148, "step": 6099 }, { "epoch": 0.78, "grad_norm": 1.4566419124603271, "learning_rate": 1.2003295147340587e-06, "loss": 0.6794, "step": 6100 }, { "epoch": 0.78, "grad_norm": 1.3721485137939453, "learning_rate": 1.1989810734000962e-06, "loss": 0.5104, "step": 6101 }, { "epoch": 0.78, "grad_norm": 1.3800498247146606, "learning_rate": 1.1976332867143887e-06, "loss": 0.4953, "step": 6102 }, { "epoch": 0.78, "grad_norm": 1.5365797281265259, "learning_rate": 1.1962861549090593e-06, "loss": 0.607, "step": 6103 }, { "epoch": 0.78, "grad_norm": 1.18382728099823, "learning_rate": 1.1949396782161272e-06, "loss": 0.725, "step": 6104 }, { "epoch": 0.78, "grad_norm": 1.2551465034484863, "learning_rate": 1.1935938568674943e-06, "loss": 0.6576, "step": 6105 }, { "epoch": 0.78, "grad_norm": 1.2952631711959839, "learning_rate": 1.1922486910949527e-06, "loss": 0.5513, "step": 6106 }, { "epoch": 0.78, "grad_norm": 1.188146948814392, "learning_rate": 1.1909041811301775e-06, "loss": 0.5895, "step": 6107 }, { "epoch": 0.78, "grad_norm": 1.3294532299041748, "learning_rate": 1.189560327204734e-06, "loss": 0.5671, "step": 6108 }, { "epoch": 0.78, "grad_norm": 1.3476665019989014, "learning_rate": 1.188217129550076e-06, "loss": 0.6287, "step": 6109 }, { "epoch": 0.78, "grad_norm": 1.3214592933654785, "learning_rate": 1.1868745883975386e-06, "loss": 0.5985, "step": 6110 }, { "epoch": 0.78, "grad_norm": 1.6524791717529297, "learning_rate": 1.1855327039783481e-06, "loss": 0.6008, "step": 6111 }, { "epoch": 0.78, "grad_norm": 1.334662914276123, "learning_rate": 1.1841914765236178e-06, "loss": 0.5857, "step": 6112 }, { "epoch": 0.78, "grad_norm": 5.622629642486572, "learning_rate": 1.182850906264348e-06, "loss": 0.663, "step": 6113 }, { "epoch": 0.78, "grad_norm": 1.1818311214447021, "learning_rate": 1.1815109934314218e-06, "loss": 0.5183, "step": 6114 }, { "epoch": 0.78, "grad_norm": 1.55926513671875, "learning_rate": 1.1801717382556133e-06, "loss": 0.6356, "step": 6115 }, { "epoch": 0.78, "grad_norm": 1.465349793434143, "learning_rate": 1.1788331409675812e-06, "loss": 0.5734, "step": 6116 }, { "epoch": 0.78, "grad_norm": 1.333842396736145, "learning_rate": 1.1774952017978748e-06, "loss": 0.54, "step": 6117 }, { "epoch": 0.78, "grad_norm": 1.6432085037231445, "learning_rate": 1.1761579209769226e-06, "loss": 0.6167, "step": 6118 }, { "epoch": 0.78, "grad_norm": 1.2257115840911865, "learning_rate": 1.174821298735045e-06, "loss": 0.5268, "step": 6119 }, { "epoch": 0.78, "grad_norm": 1.3372501134872437, "learning_rate": 1.1734853353024517e-06, "loss": 0.6231, "step": 6120 }, { "epoch": 0.78, "grad_norm": 1.5983772277832031, "learning_rate": 1.172150030909231e-06, "loss": 0.6376, "step": 6121 }, { "epoch": 0.78, "grad_norm": 1.557028889656067, "learning_rate": 1.1708153857853637e-06, "loss": 0.661, "step": 6122 }, { "epoch": 0.78, "grad_norm": 1.520965576171875, "learning_rate": 1.1694814001607152e-06, "loss": 0.6603, "step": 6123 }, { "epoch": 0.78, "grad_norm": 1.3232431411743164, "learning_rate": 1.1681480742650387e-06, "loss": 0.5791, "step": 6124 }, { "epoch": 0.78, "grad_norm": 1.9522755146026611, "learning_rate": 1.1668154083279692e-06, "loss": 0.5742, "step": 6125 }, { "epoch": 0.78, "grad_norm": 1.4064555168151855, "learning_rate": 1.165483402579034e-06, "loss": 0.6266, "step": 6126 }, { "epoch": 0.78, "grad_norm": 1.2465460300445557, "learning_rate": 1.1641520572476428e-06, "loss": 0.5173, "step": 6127 }, { "epoch": 0.79, "grad_norm": 1.656630039215088, "learning_rate": 1.162821372563095e-06, "loss": 0.5574, "step": 6128 }, { "epoch": 0.79, "grad_norm": 1.7703921794891357, "learning_rate": 1.1614913487545704e-06, "loss": 0.6125, "step": 6129 }, { "epoch": 0.79, "grad_norm": 1.557174801826477, "learning_rate": 1.1601619860511403e-06, "loss": 0.5712, "step": 6130 }, { "epoch": 0.79, "grad_norm": 1.303775429725647, "learning_rate": 1.1588332846817608e-06, "loss": 0.5705, "step": 6131 }, { "epoch": 0.79, "grad_norm": 1.4587818384170532, "learning_rate": 1.1575052448752744e-06, "loss": 0.5648, "step": 6132 }, { "epoch": 0.79, "grad_norm": 1.3065063953399658, "learning_rate": 1.1561778668604068e-06, "loss": 0.5471, "step": 6133 }, { "epoch": 0.79, "grad_norm": 1.1926515102386475, "learning_rate": 1.1548511508657733e-06, "loss": 0.5889, "step": 6134 }, { "epoch": 0.79, "grad_norm": 1.834707260131836, "learning_rate": 1.153525097119873e-06, "loss": 0.5979, "step": 6135 }, { "epoch": 0.79, "grad_norm": 1.8028165102005005, "learning_rate": 1.1521997058510926e-06, "loss": 0.6267, "step": 6136 }, { "epoch": 0.79, "grad_norm": 1.2593283653259277, "learning_rate": 1.1508749772877032e-06, "loss": 0.604, "step": 6137 }, { "epoch": 0.79, "grad_norm": 1.511449933052063, "learning_rate": 1.1495509116578628e-06, "loss": 0.6545, "step": 6138 }, { "epoch": 0.79, "grad_norm": 1.41291344165802, "learning_rate": 1.1482275091896155e-06, "loss": 0.4631, "step": 6139 }, { "epoch": 0.79, "grad_norm": 1.5056250095367432, "learning_rate": 1.1469047701108888e-06, "loss": 0.6018, "step": 6140 }, { "epoch": 0.79, "grad_norm": 1.5309189558029175, "learning_rate": 1.145582694649498e-06, "loss": 0.6386, "step": 6141 }, { "epoch": 0.79, "grad_norm": 1.509054183959961, "learning_rate": 1.144261283033144e-06, "loss": 0.5576, "step": 6142 }, { "epoch": 0.79, "grad_norm": 2.0752272605895996, "learning_rate": 1.1429405354894147e-06, "loss": 0.6352, "step": 6143 }, { "epoch": 0.79, "grad_norm": 1.3260945081710815, "learning_rate": 1.1416204522457792e-06, "loss": 0.5944, "step": 6144 }, { "epoch": 0.79, "grad_norm": 1.1211093664169312, "learning_rate": 1.140301033529596e-06, "loss": 0.7431, "step": 6145 }, { "epoch": 0.79, "grad_norm": 1.5786157846450806, "learning_rate": 1.1389822795681088e-06, "loss": 0.573, "step": 6146 }, { "epoch": 0.79, "grad_norm": 1.4324467182159424, "learning_rate": 1.1376641905884472e-06, "loss": 0.5515, "step": 6147 }, { "epoch": 0.79, "grad_norm": 1.2469350099563599, "learning_rate": 1.1363467668176221e-06, "loss": 0.532, "step": 6148 }, { "epoch": 0.79, "grad_norm": 1.782609462738037, "learning_rate": 1.135030008482535e-06, "loss": 0.5981, "step": 6149 }, { "epoch": 0.79, "grad_norm": 1.5515788793563843, "learning_rate": 1.1337139158099708e-06, "loss": 0.6569, "step": 6150 }, { "epoch": 0.79, "grad_norm": 1.6036688089370728, "learning_rate": 1.1323984890266005e-06, "loss": 0.6541, "step": 6151 }, { "epoch": 0.79, "grad_norm": 1.3179755210876465, "learning_rate": 1.1310837283589771e-06, "loss": 0.6114, "step": 6152 }, { "epoch": 0.79, "grad_norm": 1.550906777381897, "learning_rate": 1.1297696340335412e-06, "loss": 0.614, "step": 6153 }, { "epoch": 0.79, "grad_norm": 1.434679388999939, "learning_rate": 1.1284562062766236e-06, "loss": 0.5633, "step": 6154 }, { "epoch": 0.79, "grad_norm": 1.4772613048553467, "learning_rate": 1.1271434453144308e-06, "loss": 0.609, "step": 6155 }, { "epoch": 0.79, "grad_norm": 1.4170340299606323, "learning_rate": 1.1258313513730613e-06, "loss": 0.6429, "step": 6156 }, { "epoch": 0.79, "grad_norm": 1.5838710069656372, "learning_rate": 1.1245199246784965e-06, "loss": 0.6858, "step": 6157 }, { "epoch": 0.79, "grad_norm": 2.0961270332336426, "learning_rate": 1.1232091654566024e-06, "loss": 0.5789, "step": 6158 }, { "epoch": 0.79, "grad_norm": 1.436582326889038, "learning_rate": 1.1218990739331297e-06, "loss": 0.6391, "step": 6159 }, { "epoch": 0.79, "grad_norm": 1.3351346254348755, "learning_rate": 1.120589650333717e-06, "loss": 0.5651, "step": 6160 }, { "epoch": 0.79, "grad_norm": 1.1929622888565063, "learning_rate": 1.1192808948838863e-06, "loss": 0.552, "step": 6161 }, { "epoch": 0.79, "grad_norm": 1.3724346160888672, "learning_rate": 1.1179728078090413e-06, "loss": 0.6234, "step": 6162 }, { "epoch": 0.79, "grad_norm": 1.2918885946273804, "learning_rate": 1.1166653893344753e-06, "loss": 0.5676, "step": 6163 }, { "epoch": 0.79, "grad_norm": 1.776462435722351, "learning_rate": 1.115358639685364e-06, "loss": 0.5481, "step": 6164 }, { "epoch": 0.79, "grad_norm": 1.4188241958618164, "learning_rate": 1.11405255908677e-06, "loss": 0.5834, "step": 6165 }, { "epoch": 0.79, "grad_norm": 1.6242364645004272, "learning_rate": 1.112747147763637e-06, "loss": 0.6114, "step": 6166 }, { "epoch": 0.79, "grad_norm": 1.0123103857040405, "learning_rate": 1.1114424059407957e-06, "loss": 0.7003, "step": 6167 }, { "epoch": 0.79, "grad_norm": 1.5645592212677002, "learning_rate": 1.1101383338429627e-06, "loss": 0.6156, "step": 6168 }, { "epoch": 0.79, "grad_norm": 1.3179627656936646, "learning_rate": 1.1088349316947388e-06, "loss": 0.5178, "step": 6169 }, { "epoch": 0.79, "grad_norm": 1.7976934909820557, "learning_rate": 1.107532199720604e-06, "loss": 0.5481, "step": 6170 }, { "epoch": 0.79, "grad_norm": 1.527823567390442, "learning_rate": 1.1062301381449313e-06, "loss": 0.6192, "step": 6171 }, { "epoch": 0.79, "grad_norm": 1.5737080574035645, "learning_rate": 1.1049287471919756e-06, "loss": 0.6406, "step": 6172 }, { "epoch": 0.79, "grad_norm": 1.55455482006073, "learning_rate": 1.1036280270858712e-06, "loss": 0.703, "step": 6173 }, { "epoch": 0.79, "grad_norm": 1.3198282718658447, "learning_rate": 1.1023279780506425e-06, "loss": 0.6418, "step": 6174 }, { "epoch": 0.79, "grad_norm": 2.4118762016296387, "learning_rate": 1.1010286003101967e-06, "loss": 0.6689, "step": 6175 }, { "epoch": 0.79, "grad_norm": 1.9782161712646484, "learning_rate": 1.0997298940883261e-06, "loss": 0.5931, "step": 6176 }, { "epoch": 0.79, "grad_norm": 2.633495807647705, "learning_rate": 1.0984318596087035e-06, "loss": 0.6079, "step": 6177 }, { "epoch": 0.79, "grad_norm": 1.5618376731872559, "learning_rate": 1.0971344970948911e-06, "loss": 0.5518, "step": 6178 }, { "epoch": 0.79, "grad_norm": 1.2417227029800415, "learning_rate": 1.095837806770333e-06, "loss": 0.5463, "step": 6179 }, { "epoch": 0.79, "grad_norm": 2.2190401554107666, "learning_rate": 1.0945417888583593e-06, "loss": 0.648, "step": 6180 }, { "epoch": 0.79, "grad_norm": 1.2779728174209595, "learning_rate": 1.0932464435821789e-06, "loss": 0.5593, "step": 6181 }, { "epoch": 0.79, "grad_norm": 1.9929521083831787, "learning_rate": 1.0919517711648914e-06, "loss": 0.5269, "step": 6182 }, { "epoch": 0.79, "grad_norm": 1.4795068502426147, "learning_rate": 1.090657771829477e-06, "loss": 0.6332, "step": 6183 }, { "epoch": 0.79, "grad_norm": 1.3366481065750122, "learning_rate": 1.0893644457988029e-06, "loss": 0.5933, "step": 6184 }, { "epoch": 0.79, "grad_norm": 1.3400702476501465, "learning_rate": 1.0880717932956148e-06, "loss": 0.5539, "step": 6185 }, { "epoch": 0.79, "grad_norm": 1.3519129753112793, "learning_rate": 1.0867798145425468e-06, "loss": 0.5936, "step": 6186 }, { "epoch": 0.79, "grad_norm": 1.3224157094955444, "learning_rate": 1.085488509762117e-06, "loss": 0.5686, "step": 6187 }, { "epoch": 0.79, "grad_norm": 1.4233057498931885, "learning_rate": 1.084197879176726e-06, "loss": 0.7274, "step": 6188 }, { "epoch": 0.79, "grad_norm": 1.6368494033813477, "learning_rate": 1.0829079230086587e-06, "loss": 0.5718, "step": 6189 }, { "epoch": 0.79, "grad_norm": 1.5278626680374146, "learning_rate": 1.0816186414800838e-06, "loss": 0.5603, "step": 6190 }, { "epoch": 0.79, "grad_norm": 1.375484824180603, "learning_rate": 1.0803300348130552e-06, "loss": 0.564, "step": 6191 }, { "epoch": 0.79, "grad_norm": 2.002624273300171, "learning_rate": 1.0790421032295067e-06, "loss": 0.5248, "step": 6192 }, { "epoch": 0.79, "grad_norm": 1.4766088724136353, "learning_rate": 1.0777548469512588e-06, "loss": 0.5901, "step": 6193 }, { "epoch": 0.79, "grad_norm": 1.3245599269866943, "learning_rate": 1.0764682662000165e-06, "loss": 0.5882, "step": 6194 }, { "epoch": 0.79, "grad_norm": 1.1820677518844604, "learning_rate": 1.0751823611973677e-06, "loss": 0.5516, "step": 6195 }, { "epoch": 0.79, "grad_norm": 1.272409439086914, "learning_rate": 1.0738971321647811e-06, "loss": 0.6358, "step": 6196 }, { "epoch": 0.79, "grad_norm": 1.234097957611084, "learning_rate": 1.0726125793236124e-06, "loss": 0.5142, "step": 6197 }, { "epoch": 0.79, "grad_norm": 1.3627111911773682, "learning_rate": 1.0713287028950997e-06, "loss": 0.5465, "step": 6198 }, { "epoch": 0.79, "grad_norm": 1.6191880702972412, "learning_rate": 1.0700455031003665e-06, "loss": 0.6199, "step": 6199 }, { "epoch": 0.79, "grad_norm": 1.6177563667297363, "learning_rate": 1.0687629801604143e-06, "loss": 0.6624, "step": 6200 }, { "epoch": 0.79, "grad_norm": 1.1564007997512817, "learning_rate": 1.0674811342961332e-06, "loss": 0.5907, "step": 6201 }, { "epoch": 0.79, "grad_norm": 1.260568618774414, "learning_rate": 1.0661999657282968e-06, "loss": 0.5627, "step": 6202 }, { "epoch": 0.79, "grad_norm": 1.5022335052490234, "learning_rate": 1.0649194746775576e-06, "loss": 0.6852, "step": 6203 }, { "epoch": 0.79, "grad_norm": 1.435604453086853, "learning_rate": 1.063639661364454e-06, "loss": 0.6222, "step": 6204 }, { "epoch": 0.79, "grad_norm": 1.1896655559539795, "learning_rate": 1.0623605260094105e-06, "loss": 0.5527, "step": 6205 }, { "epoch": 0.8, "grad_norm": 1.4578163623809814, "learning_rate": 1.0610820688327323e-06, "loss": 0.6038, "step": 6206 }, { "epoch": 0.8, "grad_norm": 1.404301643371582, "learning_rate": 1.0598042900546051e-06, "loss": 0.554, "step": 6207 }, { "epoch": 0.8, "grad_norm": 1.2354761362075806, "learning_rate": 1.058527189895101e-06, "loss": 0.6951, "step": 6208 }, { "epoch": 0.8, "grad_norm": 1.9446297883987427, "learning_rate": 1.0572507685741767e-06, "loss": 0.5824, "step": 6209 }, { "epoch": 0.8, "grad_norm": 1.273754596710205, "learning_rate": 1.055975026311667e-06, "loss": 0.6316, "step": 6210 }, { "epoch": 0.8, "grad_norm": 1.49468195438385, "learning_rate": 1.0546999633272935e-06, "loss": 0.6304, "step": 6211 }, { "epoch": 0.8, "grad_norm": 1.2683830261230469, "learning_rate": 1.0534255798406602e-06, "loss": 0.5711, "step": 6212 }, { "epoch": 0.8, "grad_norm": 1.4558610916137695, "learning_rate": 1.0521518760712545e-06, "loss": 0.5594, "step": 6213 }, { "epoch": 0.8, "grad_norm": 1.4848997592926025, "learning_rate": 1.0508788522384443e-06, "loss": 0.5832, "step": 6214 }, { "epoch": 0.8, "grad_norm": 1.1660897731781006, "learning_rate": 1.0496065085614827e-06, "loss": 0.5497, "step": 6215 }, { "epoch": 0.8, "grad_norm": 1.3938348293304443, "learning_rate": 1.0483348452595056e-06, "loss": 0.6248, "step": 6216 }, { "epoch": 0.8, "grad_norm": 1.2313079833984375, "learning_rate": 1.0470638625515316e-06, "loss": 0.549, "step": 6217 }, { "epoch": 0.8, "grad_norm": 1.4544310569763184, "learning_rate": 1.0457935606564596e-06, "loss": 0.5853, "step": 6218 }, { "epoch": 0.8, "grad_norm": 1.7911359071731567, "learning_rate": 1.044523939793075e-06, "loss": 0.6357, "step": 6219 }, { "epoch": 0.8, "grad_norm": 1.5014184713363647, "learning_rate": 1.0432550001800435e-06, "loss": 0.6137, "step": 6220 }, { "epoch": 0.8, "grad_norm": 1.3622461557388306, "learning_rate": 1.0419867420359141e-06, "loss": 0.5823, "step": 6221 }, { "epoch": 0.8, "grad_norm": 1.2310843467712402, "learning_rate": 1.0407191655791188e-06, "loss": 0.5025, "step": 6222 }, { "epoch": 0.8, "grad_norm": 1.638079047203064, "learning_rate": 1.039452271027972e-06, "loss": 0.6757, "step": 6223 }, { "epoch": 0.8, "grad_norm": 1.6458463668823242, "learning_rate": 1.038186058600672e-06, "loss": 0.4908, "step": 6224 }, { "epoch": 0.8, "grad_norm": 1.4010100364685059, "learning_rate": 1.036920528515295e-06, "loss": 0.6098, "step": 6225 }, { "epoch": 0.8, "grad_norm": 1.987982153892517, "learning_rate": 1.0356556809898044e-06, "loss": 0.59, "step": 6226 }, { "epoch": 0.8, "grad_norm": 1.2834595441818237, "learning_rate": 1.0343915162420443e-06, "loss": 0.5335, "step": 6227 }, { "epoch": 0.8, "grad_norm": 1.4950449466705322, "learning_rate": 1.0331280344897425e-06, "loss": 0.5466, "step": 6228 }, { "epoch": 0.8, "grad_norm": 1.2922565937042236, "learning_rate": 1.0318652359505065e-06, "loss": 0.5139, "step": 6229 }, { "epoch": 0.8, "grad_norm": 1.3083853721618652, "learning_rate": 1.0306031208418277e-06, "loss": 0.5349, "step": 6230 }, { "epoch": 0.8, "grad_norm": 1.3209140300750732, "learning_rate": 1.0293416893810803e-06, "loss": 0.5987, "step": 6231 }, { "epoch": 0.8, "grad_norm": 1.6582406759262085, "learning_rate": 1.028080941785522e-06, "loss": 0.6124, "step": 6232 }, { "epoch": 0.8, "grad_norm": 1.5571374893188477, "learning_rate": 1.0268208782722876e-06, "loss": 0.6194, "step": 6233 }, { "epoch": 0.8, "grad_norm": 6.914717197418213, "learning_rate": 1.0255614990583995e-06, "loss": 0.5848, "step": 6234 }, { "epoch": 0.8, "grad_norm": 1.1963386535644531, "learning_rate": 1.0243028043607588e-06, "loss": 0.5973, "step": 6235 }, { "epoch": 0.8, "grad_norm": 2.909210205078125, "learning_rate": 1.023044794396153e-06, "loss": 0.6383, "step": 6236 }, { "epoch": 0.8, "grad_norm": 1.247591495513916, "learning_rate": 1.0217874693812458e-06, "loss": 0.643, "step": 6237 }, { "epoch": 0.8, "grad_norm": 1.2099329233169556, "learning_rate": 1.0205308295325844e-06, "loss": 0.6963, "step": 6238 }, { "epoch": 0.8, "grad_norm": 1.6623411178588867, "learning_rate": 1.0192748750666053e-06, "loss": 0.5453, "step": 6239 }, { "epoch": 0.8, "grad_norm": 1.3051793575286865, "learning_rate": 1.0180196061996162e-06, "loss": 0.5942, "step": 6240 }, { "epoch": 0.8, "grad_norm": 1.3945056200027466, "learning_rate": 1.0167650231478132e-06, "loss": 0.5567, "step": 6241 }, { "epoch": 0.8, "grad_norm": 1.3888925313949585, "learning_rate": 1.0155111261272726e-06, "loss": 0.5803, "step": 6242 }, { "epoch": 0.8, "grad_norm": 1.3073126077651978, "learning_rate": 1.014257915353954e-06, "loss": 0.5863, "step": 6243 }, { "epoch": 0.8, "grad_norm": 1.203535556793213, "learning_rate": 1.0130053910436955e-06, "loss": 0.5271, "step": 6244 }, { "epoch": 0.8, "grad_norm": 1.2935106754302979, "learning_rate": 1.0117535534122191e-06, "loss": 0.5767, "step": 6245 }, { "epoch": 0.8, "grad_norm": 1.3383389711380005, "learning_rate": 1.0105024026751292e-06, "loss": 0.6398, "step": 6246 }, { "epoch": 0.8, "grad_norm": 1.4551408290863037, "learning_rate": 1.0092519390479116e-06, "loss": 0.6012, "step": 6247 }, { "epoch": 0.8, "grad_norm": 1.4838967323303223, "learning_rate": 1.0080021627459319e-06, "loss": 0.6087, "step": 6248 }, { "epoch": 0.8, "grad_norm": 1.7100906372070312, "learning_rate": 1.0067530739844394e-06, "loss": 0.4806, "step": 6249 }, { "epoch": 0.8, "grad_norm": 1.233081579208374, "learning_rate": 1.005504672978564e-06, "loss": 0.6828, "step": 6250 }, { "epoch": 0.8, "grad_norm": 1.1398285627365112, "learning_rate": 1.0042569599433183e-06, "loss": 0.5084, "step": 6251 }, { "epoch": 0.8, "grad_norm": 1.179826021194458, "learning_rate": 1.003009935093594e-06, "loss": 0.5361, "step": 6252 }, { "epoch": 0.8, "grad_norm": 1.7407220602035522, "learning_rate": 1.0017635986441664e-06, "loss": 0.5924, "step": 6253 }, { "epoch": 0.8, "grad_norm": 1.390802025794983, "learning_rate": 1.0005179508096929e-06, "loss": 0.6056, "step": 6254 }, { "epoch": 0.8, "grad_norm": 1.1375401020050049, "learning_rate": 9.992729918047085e-07, "loss": 0.5618, "step": 6255 }, { "epoch": 0.8, "grad_norm": 1.4758859872817993, "learning_rate": 9.980287218436324e-07, "loss": 0.6488, "step": 6256 }, { "epoch": 0.8, "grad_norm": 4.653335094451904, "learning_rate": 9.967851411407685e-07, "loss": 0.686, "step": 6257 }, { "epoch": 0.8, "grad_norm": 1.4219043254852295, "learning_rate": 9.955422499102941e-07, "loss": 0.5705, "step": 6258 }, { "epoch": 0.8, "grad_norm": 1.2233293056488037, "learning_rate": 9.943000483662735e-07, "loss": 0.4663, "step": 6259 }, { "epoch": 0.8, "grad_norm": 1.3139796257019043, "learning_rate": 9.930585367226508e-07, "loss": 0.6691, "step": 6260 }, { "epoch": 0.8, "grad_norm": 1.184753179550171, "learning_rate": 9.918177151932522e-07, "loss": 0.5962, "step": 6261 }, { "epoch": 0.8, "grad_norm": 1.4073246717453003, "learning_rate": 9.90577583991782e-07, "loss": 0.5851, "step": 6262 }, { "epoch": 0.8, "grad_norm": 1.6594538688659668, "learning_rate": 9.893381433318277e-07, "loss": 0.6553, "step": 6263 }, { "epoch": 0.8, "grad_norm": 1.5163381099700928, "learning_rate": 9.88099393426859e-07, "loss": 0.6235, "step": 6264 }, { "epoch": 0.8, "grad_norm": 1.2041316032409668, "learning_rate": 9.868613344902262e-07, "loss": 0.5982, "step": 6265 }, { "epoch": 0.8, "grad_norm": 1.3843533992767334, "learning_rate": 9.856239667351569e-07, "loss": 0.5743, "step": 6266 }, { "epoch": 0.8, "grad_norm": 1.2988375425338745, "learning_rate": 9.843872903747641e-07, "loss": 0.6007, "step": 6267 }, { "epoch": 0.8, "grad_norm": 1.8280378580093384, "learning_rate": 9.831513056220405e-07, "loss": 0.5599, "step": 6268 }, { "epoch": 0.8, "grad_norm": 1.389329433441162, "learning_rate": 9.819160126898598e-07, "loss": 0.568, "step": 6269 }, { "epoch": 0.8, "grad_norm": 4.785888195037842, "learning_rate": 9.806814117909742e-07, "loss": 0.6587, "step": 6270 }, { "epoch": 0.8, "grad_norm": 1.6062169075012207, "learning_rate": 9.794475031380195e-07, "loss": 0.6054, "step": 6271 }, { "epoch": 0.8, "grad_norm": 1.4643789529800415, "learning_rate": 9.782142869435119e-07, "loss": 0.5829, "step": 6272 }, { "epoch": 0.8, "grad_norm": 1.3099743127822876, "learning_rate": 9.769817634198475e-07, "loss": 0.611, "step": 6273 }, { "epoch": 0.8, "grad_norm": 1.5942202806472778, "learning_rate": 9.757499327793035e-07, "loss": 0.5887, "step": 6274 }, { "epoch": 0.8, "grad_norm": 3.8324639797210693, "learning_rate": 9.745187952340374e-07, "loss": 0.5849, "step": 6275 }, { "epoch": 0.8, "grad_norm": 1.438779592514038, "learning_rate": 9.732883509960895e-07, "loss": 0.5977, "step": 6276 }, { "epoch": 0.8, "grad_norm": 1.2395045757293701, "learning_rate": 9.720586002773757e-07, "loss": 0.5873, "step": 6277 }, { "epoch": 0.8, "grad_norm": 1.4779880046844482, "learning_rate": 9.70829543289697e-07, "loss": 0.5937, "step": 6278 }, { "epoch": 0.8, "grad_norm": 1.641114592552185, "learning_rate": 9.696011802447337e-07, "loss": 0.5943, "step": 6279 }, { "epoch": 0.8, "grad_norm": 1.8073025941848755, "learning_rate": 9.683735113540471e-07, "loss": 0.5827, "step": 6280 }, { "epoch": 0.8, "grad_norm": 1.2064876556396484, "learning_rate": 9.671465368290761e-07, "loss": 0.587, "step": 6281 }, { "epoch": 0.8, "grad_norm": 1.4570554494857788, "learning_rate": 9.659202568811437e-07, "loss": 0.5751, "step": 6282 }, { "epoch": 0.8, "grad_norm": 1.5656616687774658, "learning_rate": 9.64694671721451e-07, "loss": 0.5457, "step": 6283 }, { "epoch": 0.81, "grad_norm": 1.6131380796432495, "learning_rate": 9.634697815610815e-07, "loss": 0.5962, "step": 6284 }, { "epoch": 0.81, "grad_norm": 1.3118836879730225, "learning_rate": 9.622455866109958e-07, "loss": 0.5983, "step": 6285 }, { "epoch": 0.81, "grad_norm": 1.4274793863296509, "learning_rate": 9.610220870820374e-07, "loss": 0.6167, "step": 6286 }, { "epoch": 0.81, "grad_norm": 1.510737419128418, "learning_rate": 9.59799283184929e-07, "loss": 0.6158, "step": 6287 }, { "epoch": 0.81, "grad_norm": 1.8258140087127686, "learning_rate": 9.585771751302753e-07, "loss": 0.6055, "step": 6288 }, { "epoch": 0.81, "grad_norm": 1.1926887035369873, "learning_rate": 9.573557631285568e-07, "loss": 0.687, "step": 6289 }, { "epoch": 0.81, "grad_norm": 1.6286548376083374, "learning_rate": 9.56135047390137e-07, "loss": 0.6492, "step": 6290 }, { "epoch": 0.81, "grad_norm": 1.3712109327316284, "learning_rate": 9.549150281252633e-07, "loss": 0.5782, "step": 6291 }, { "epoch": 0.81, "grad_norm": 1.3473654985427856, "learning_rate": 9.536957055440555e-07, "loss": 0.6219, "step": 6292 }, { "epoch": 0.81, "grad_norm": 1.4779305458068848, "learning_rate": 9.524770798565174e-07, "loss": 0.5425, "step": 6293 }, { "epoch": 0.81, "grad_norm": 1.329278826713562, "learning_rate": 9.512591512725344e-07, "loss": 0.6175, "step": 6294 }, { "epoch": 0.81, "grad_norm": 1.6562693119049072, "learning_rate": 9.500419200018695e-07, "loss": 0.5724, "step": 6295 }, { "epoch": 0.81, "grad_norm": 1.2669323682785034, "learning_rate": 9.488253862541641e-07, "loss": 0.5622, "step": 6296 }, { "epoch": 0.81, "grad_norm": 1.161300539970398, "learning_rate": 9.476095502389432e-07, "loss": 0.5179, "step": 6297 }, { "epoch": 0.81, "grad_norm": 1.5029854774475098, "learning_rate": 9.463944121656088e-07, "loss": 0.6479, "step": 6298 }, { "epoch": 0.81, "grad_norm": 2.1170594692230225, "learning_rate": 9.451799722434462e-07, "loss": 0.5316, "step": 6299 }, { "epoch": 0.81, "grad_norm": 1.3949368000030518, "learning_rate": 9.439662306816144e-07, "loss": 0.6038, "step": 6300 }, { "epoch": 0.81, "grad_norm": 1.2971605062484741, "learning_rate": 9.427531876891577e-07, "loss": 0.618, "step": 6301 }, { "epoch": 0.81, "grad_norm": 1.5163462162017822, "learning_rate": 9.415408434749984e-07, "loss": 0.6335, "step": 6302 }, { "epoch": 0.81, "grad_norm": 1.2938801050186157, "learning_rate": 9.403291982479368e-07, "loss": 0.6646, "step": 6303 }, { "epoch": 0.81, "grad_norm": 1.369942545890808, "learning_rate": 9.391182522166548e-07, "loss": 0.5532, "step": 6304 }, { "epoch": 0.81, "grad_norm": 1.317028284072876, "learning_rate": 9.379080055897133e-07, "loss": 0.5684, "step": 6305 }, { "epoch": 0.81, "grad_norm": 1.504163146018982, "learning_rate": 9.366984585755545e-07, "loss": 0.6464, "step": 6306 }, { "epoch": 0.81, "grad_norm": 1.22713041305542, "learning_rate": 9.354896113824935e-07, "loss": 0.6153, "step": 6307 }, { "epoch": 0.81, "grad_norm": 1.341958999633789, "learning_rate": 9.342814642187336e-07, "loss": 0.5821, "step": 6308 }, { "epoch": 0.81, "grad_norm": 1.6462088823318481, "learning_rate": 9.330740172923547e-07, "loss": 0.4702, "step": 6309 }, { "epoch": 0.81, "grad_norm": 1.349832534790039, "learning_rate": 9.318672708113113e-07, "loss": 0.6108, "step": 6310 }, { "epoch": 0.81, "grad_norm": 1.3375132083892822, "learning_rate": 9.306612249834429e-07, "loss": 0.6344, "step": 6311 }, { "epoch": 0.81, "grad_norm": 1.3548191785812378, "learning_rate": 9.294558800164655e-07, "loss": 0.5802, "step": 6312 }, { "epoch": 0.81, "grad_norm": 1.3934367895126343, "learning_rate": 9.282512361179779e-07, "loss": 0.509, "step": 6313 }, { "epoch": 0.81, "grad_norm": 1.4095515012741089, "learning_rate": 9.270472934954516e-07, "loss": 0.5798, "step": 6314 }, { "epoch": 0.81, "grad_norm": 1.2284436225891113, "learning_rate": 9.258440523562434e-07, "loss": 0.6162, "step": 6315 }, { "epoch": 0.81, "grad_norm": 1.4372409582138062, "learning_rate": 9.246415129075869e-07, "loss": 0.6458, "step": 6316 }, { "epoch": 0.81, "grad_norm": 1.468173861503601, "learning_rate": 9.234396753565966e-07, "loss": 0.5612, "step": 6317 }, { "epoch": 0.81, "grad_norm": 1.2699594497680664, "learning_rate": 9.222385399102613e-07, "loss": 0.7351, "step": 6318 }, { "epoch": 0.81, "grad_norm": 1.6350229978561401, "learning_rate": 9.210381067754542e-07, "loss": 0.5474, "step": 6319 }, { "epoch": 0.81, "grad_norm": 1.289797306060791, "learning_rate": 9.198383761589247e-07, "loss": 0.5446, "step": 6320 }, { "epoch": 0.81, "grad_norm": 1.2473235130310059, "learning_rate": 9.186393482673045e-07, "loss": 0.5478, "step": 6321 }, { "epoch": 0.81, "grad_norm": 3.0618834495544434, "learning_rate": 9.17441023307098e-07, "loss": 0.6916, "step": 6322 }, { "epoch": 0.81, "grad_norm": 1.326453447341919, "learning_rate": 9.162434014846944e-07, "loss": 0.6769, "step": 6323 }, { "epoch": 0.81, "grad_norm": 1.895493745803833, "learning_rate": 9.150464830063593e-07, "loss": 0.5819, "step": 6324 }, { "epoch": 0.81, "grad_norm": 1.2672816514968872, "learning_rate": 9.13850268078238e-07, "loss": 0.6084, "step": 6325 }, { "epoch": 0.81, "grad_norm": 1.3241233825683594, "learning_rate": 9.126547569063538e-07, "loss": 0.5891, "step": 6326 }, { "epoch": 0.81, "grad_norm": 1.417624831199646, "learning_rate": 9.114599496966093e-07, "loss": 0.5614, "step": 6327 }, { "epoch": 0.81, "grad_norm": 1.364613652229309, "learning_rate": 9.10265846654787e-07, "loss": 0.5463, "step": 6328 }, { "epoch": 0.81, "grad_norm": 1.3848798274993896, "learning_rate": 9.090724479865443e-07, "loss": 0.6403, "step": 6329 }, { "epoch": 0.81, "grad_norm": 1.2594083547592163, "learning_rate": 9.07879753897421e-07, "loss": 0.5747, "step": 6330 }, { "epoch": 0.81, "grad_norm": 1.2425966262817383, "learning_rate": 9.066877645928351e-07, "loss": 0.5759, "step": 6331 }, { "epoch": 0.81, "grad_norm": 1.5962731838226318, "learning_rate": 9.054964802780836e-07, "loss": 0.6179, "step": 6332 }, { "epoch": 0.81, "grad_norm": 1.6983554363250732, "learning_rate": 9.043059011583378e-07, "loss": 0.529, "step": 6333 }, { "epoch": 0.81, "grad_norm": 1.4406030178070068, "learning_rate": 9.031160274386524e-07, "loss": 0.5584, "step": 6334 }, { "epoch": 0.81, "grad_norm": 1.1120250225067139, "learning_rate": 9.019268593239589e-07, "loss": 0.5092, "step": 6335 }, { "epoch": 0.81, "grad_norm": 1.3159717321395874, "learning_rate": 9.007383970190692e-07, "loss": 0.5571, "step": 6336 }, { "epoch": 0.81, "grad_norm": 1.4577220678329468, "learning_rate": 8.995506407286681e-07, "loss": 0.5735, "step": 6337 }, { "epoch": 0.81, "grad_norm": 2.6757638454437256, "learning_rate": 8.983635906573246e-07, "loss": 0.6404, "step": 6338 }, { "epoch": 0.81, "grad_norm": 1.3575419187545776, "learning_rate": 8.971772470094841e-07, "loss": 0.5763, "step": 6339 }, { "epoch": 0.81, "grad_norm": 1.1019004583358765, "learning_rate": 8.959916099894706e-07, "loss": 0.6899, "step": 6340 }, { "epoch": 0.81, "grad_norm": 1.2935214042663574, "learning_rate": 8.948066798014832e-07, "loss": 0.6393, "step": 6341 }, { "epoch": 0.81, "grad_norm": 1.3130396604537964, "learning_rate": 8.936224566496049e-07, "loss": 0.5713, "step": 6342 }, { "epoch": 0.81, "grad_norm": 1.318345308303833, "learning_rate": 8.924389407377948e-07, "loss": 0.544, "step": 6343 }, { "epoch": 0.81, "grad_norm": 1.3377659320831299, "learning_rate": 8.912561322698859e-07, "loss": 0.5473, "step": 6344 }, { "epoch": 0.81, "grad_norm": 1.4414652585983276, "learning_rate": 8.900740314495953e-07, "loss": 0.596, "step": 6345 }, { "epoch": 0.81, "grad_norm": 1.5321376323699951, "learning_rate": 8.888926384805158e-07, "loss": 0.5581, "step": 6346 }, { "epoch": 0.81, "grad_norm": 1.2531845569610596, "learning_rate": 8.877119535661189e-07, "loss": 0.5722, "step": 6347 }, { "epoch": 0.81, "grad_norm": 1.3081930875778198, "learning_rate": 8.865319769097513e-07, "loss": 0.5443, "step": 6348 }, { "epoch": 0.81, "grad_norm": 1.565206527709961, "learning_rate": 8.853527087146413e-07, "loss": 0.6366, "step": 6349 }, { "epoch": 0.81, "grad_norm": 1.6505012512207031, "learning_rate": 8.841741491838951e-07, "loss": 0.6175, "step": 6350 }, { "epoch": 0.81, "grad_norm": 1.3950464725494385, "learning_rate": 8.829962985204931e-07, "loss": 0.607, "step": 6351 }, { "epoch": 0.81, "grad_norm": 1.5542408227920532, "learning_rate": 8.818191569272977e-07, "loss": 0.6109, "step": 6352 }, { "epoch": 0.81, "grad_norm": 1.618653416633606, "learning_rate": 8.806427246070476e-07, "loss": 0.5526, "step": 6353 }, { "epoch": 0.81, "grad_norm": 1.473083734512329, "learning_rate": 8.794670017623602e-07, "loss": 0.6048, "step": 6354 }, { "epoch": 0.81, "grad_norm": 1.2885507345199585, "learning_rate": 8.782919885957276e-07, "loss": 0.6216, "step": 6355 }, { "epoch": 0.81, "grad_norm": 1.6841661930084229, "learning_rate": 8.771176853095226e-07, "loss": 0.6373, "step": 6356 }, { "epoch": 0.81, "grad_norm": 2.770217180252075, "learning_rate": 8.759440921059958e-07, "loss": 0.6398, "step": 6357 }, { "epoch": 0.81, "grad_norm": 1.275274395942688, "learning_rate": 8.747712091872751e-07, "loss": 0.71, "step": 6358 }, { "epoch": 0.81, "grad_norm": 2.0190229415893555, "learning_rate": 8.735990367553643e-07, "loss": 0.5194, "step": 6359 }, { "epoch": 0.81, "grad_norm": 1.600679636001587, "learning_rate": 8.72427575012148e-07, "loss": 0.5969, "step": 6360 }, { "epoch": 0.81, "grad_norm": 1.613568663597107, "learning_rate": 8.712568241593866e-07, "loss": 0.5324, "step": 6361 }, { "epoch": 0.82, "grad_norm": 1.3485932350158691, "learning_rate": 8.700867843987165e-07, "loss": 0.5907, "step": 6362 }, { "epoch": 0.82, "grad_norm": 1.446373462677002, "learning_rate": 8.689174559316544e-07, "loss": 0.5736, "step": 6363 }, { "epoch": 0.82, "grad_norm": 1.2776132822036743, "learning_rate": 8.677488389595923e-07, "loss": 0.4848, "step": 6364 }, { "epoch": 0.82, "grad_norm": 1.2803000211715698, "learning_rate": 8.665809336838032e-07, "loss": 0.5473, "step": 6365 }, { "epoch": 0.82, "grad_norm": 1.1983240842819214, "learning_rate": 8.654137403054324e-07, "loss": 0.5795, "step": 6366 }, { "epoch": 0.82, "grad_norm": 1.4812618494033813, "learning_rate": 8.642472590255063e-07, "loss": 0.6122, "step": 6367 }, { "epoch": 0.82, "grad_norm": 1.6410510540008545, "learning_rate": 8.630814900449269e-07, "loss": 0.6678, "step": 6368 }, { "epoch": 0.82, "grad_norm": 1.134711742401123, "learning_rate": 8.619164335644764e-07, "loss": 0.4751, "step": 6369 }, { "epoch": 0.82, "grad_norm": 1.3798121213912964, "learning_rate": 8.607520897848093e-07, "loss": 0.6079, "step": 6370 }, { "epoch": 0.82, "grad_norm": 1.5486456155776978, "learning_rate": 8.595884589064618e-07, "loss": 0.5501, "step": 6371 }, { "epoch": 0.82, "grad_norm": 1.4457107782363892, "learning_rate": 8.584255411298448e-07, "loss": 0.5237, "step": 6372 }, { "epoch": 0.82, "grad_norm": 1.3767578601837158, "learning_rate": 8.57263336655249e-07, "loss": 0.5954, "step": 6373 }, { "epoch": 0.82, "grad_norm": 1.325913429260254, "learning_rate": 8.561018456828379e-07, "loss": 0.5758, "step": 6374 }, { "epoch": 0.82, "grad_norm": 1.5465365648269653, "learning_rate": 8.549410684126547e-07, "loss": 0.5237, "step": 6375 }, { "epoch": 0.82, "grad_norm": 1.5502859354019165, "learning_rate": 8.537810050446238e-07, "loss": 0.5968, "step": 6376 }, { "epoch": 0.82, "grad_norm": 1.3833034038543701, "learning_rate": 8.526216557785383e-07, "loss": 0.4794, "step": 6377 }, { "epoch": 0.82, "grad_norm": 1.4041199684143066, "learning_rate": 8.51463020814074e-07, "loss": 0.5775, "step": 6378 }, { "epoch": 0.82, "grad_norm": 1.3076719045639038, "learning_rate": 8.503051003507823e-07, "loss": 0.5465, "step": 6379 }, { "epoch": 0.82, "grad_norm": 1.535282015800476, "learning_rate": 8.49147894588092e-07, "loss": 0.6229, "step": 6380 }, { "epoch": 0.82, "grad_norm": 1.4696766138076782, "learning_rate": 8.479914037253073e-07, "loss": 0.5713, "step": 6381 }, { "epoch": 0.82, "grad_norm": 1.4727602005004883, "learning_rate": 8.468356279616102e-07, "loss": 0.5585, "step": 6382 }, { "epoch": 0.82, "grad_norm": 1.4697345495224, "learning_rate": 8.456805674960594e-07, "loss": 0.5728, "step": 6383 }, { "epoch": 0.82, "grad_norm": 1.5066434144973755, "learning_rate": 8.44526222527593e-07, "loss": 0.6151, "step": 6384 }, { "epoch": 0.82, "grad_norm": 1.423194408416748, "learning_rate": 8.433725932550207e-07, "loss": 0.5928, "step": 6385 }, { "epoch": 0.82, "grad_norm": 1.6643799543380737, "learning_rate": 8.422196798770321e-07, "loss": 0.6388, "step": 6386 }, { "epoch": 0.82, "grad_norm": 1.3613812923431396, "learning_rate": 8.410674825921938e-07, "loss": 0.5786, "step": 6387 }, { "epoch": 0.82, "grad_norm": 1.1162885427474976, "learning_rate": 8.399160015989494e-07, "loss": 0.5944, "step": 6388 }, { "epoch": 0.82, "grad_norm": 1.5724189281463623, "learning_rate": 8.387652370956151e-07, "loss": 0.5654, "step": 6389 }, { "epoch": 0.82, "grad_norm": 1.359290361404419, "learning_rate": 8.37615189280389e-07, "loss": 0.5796, "step": 6390 }, { "epoch": 0.82, "grad_norm": 1.3778443336486816, "learning_rate": 8.36465858351343e-07, "loss": 0.5938, "step": 6391 }, { "epoch": 0.82, "grad_norm": 1.4014848470687866, "learning_rate": 8.353172445064268e-07, "loss": 0.5879, "step": 6392 }, { "epoch": 0.82, "grad_norm": 1.405988097190857, "learning_rate": 8.341693479434626e-07, "loss": 0.5998, "step": 6393 }, { "epoch": 0.82, "grad_norm": 1.4378615617752075, "learning_rate": 8.330221688601559e-07, "loss": 0.6279, "step": 6394 }, { "epoch": 0.82, "grad_norm": 1.794307827949524, "learning_rate": 8.318757074540845e-07, "loss": 0.6281, "step": 6395 }, { "epoch": 0.82, "grad_norm": 1.450239896774292, "learning_rate": 8.307299639227013e-07, "loss": 0.5631, "step": 6396 }, { "epoch": 0.82, "grad_norm": 1.587395191192627, "learning_rate": 8.295849384633381e-07, "loss": 0.5966, "step": 6397 }, { "epoch": 0.82, "grad_norm": 1.633143663406372, "learning_rate": 8.284406312732024e-07, "loss": 0.587, "step": 6398 }, { "epoch": 0.82, "grad_norm": 1.2260631322860718, "learning_rate": 8.272970425493793e-07, "loss": 0.5618, "step": 6399 }, { "epoch": 0.82, "grad_norm": 1.3946280479431152, "learning_rate": 8.261541724888256e-07, "loss": 0.5446, "step": 6400 }, { "epoch": 0.82, "grad_norm": 1.1478197574615479, "learning_rate": 8.250120212883794e-07, "loss": 0.5878, "step": 6401 }, { "epoch": 0.82, "grad_norm": 1.6904059648513794, "learning_rate": 8.23870589144754e-07, "loss": 0.6042, "step": 6402 }, { "epoch": 0.82, "grad_norm": 1.5287965536117554, "learning_rate": 8.227298762545354e-07, "loss": 0.6247, "step": 6403 }, { "epoch": 0.82, "grad_norm": 1.4237464666366577, "learning_rate": 8.215898828141894e-07, "loss": 0.5744, "step": 6404 }, { "epoch": 0.82, "grad_norm": 1.4170470237731934, "learning_rate": 8.204506090200565e-07, "loss": 0.5751, "step": 6405 }, { "epoch": 0.82, "grad_norm": 1.3677936792373657, "learning_rate": 8.193120550683553e-07, "loss": 0.6154, "step": 6406 }, { "epoch": 0.82, "grad_norm": 1.2820465564727783, "learning_rate": 8.181742211551757e-07, "loss": 0.5579, "step": 6407 }, { "epoch": 0.82, "grad_norm": 1.6385856866836548, "learning_rate": 8.170371074764872e-07, "loss": 0.6446, "step": 6408 }, { "epoch": 0.82, "grad_norm": 1.253929615020752, "learning_rate": 8.159007142281356e-07, "loss": 0.55, "step": 6409 }, { "epoch": 0.82, "grad_norm": 1.2222574949264526, "learning_rate": 8.147650416058406e-07, "loss": 0.5793, "step": 6410 }, { "epoch": 0.82, "grad_norm": 1.3179538249969482, "learning_rate": 8.136300898051996e-07, "loss": 0.616, "step": 6411 }, { "epoch": 0.82, "grad_norm": 1.3393512964248657, "learning_rate": 8.124958590216841e-07, "loss": 0.6454, "step": 6412 }, { "epoch": 0.82, "grad_norm": 1.482521414756775, "learning_rate": 8.11362349450644e-07, "loss": 0.5641, "step": 6413 }, { "epoch": 0.82, "grad_norm": 1.3834075927734375, "learning_rate": 8.102295612873007e-07, "loss": 0.5252, "step": 6414 }, { "epoch": 0.82, "grad_norm": 1.378448724746704, "learning_rate": 8.090974947267555e-07, "loss": 0.5853, "step": 6415 }, { "epoch": 0.82, "grad_norm": 1.5157331228256226, "learning_rate": 8.07966149963983e-07, "loss": 0.6003, "step": 6416 }, { "epoch": 0.82, "grad_norm": 1.1425423622131348, "learning_rate": 8.068355271938366e-07, "loss": 0.7038, "step": 6417 }, { "epoch": 0.82, "grad_norm": 1.4335917234420776, "learning_rate": 8.057056266110397e-07, "loss": 0.6066, "step": 6418 }, { "epoch": 0.82, "grad_norm": 1.3131961822509766, "learning_rate": 8.04576448410197e-07, "loss": 0.4857, "step": 6419 }, { "epoch": 0.82, "grad_norm": 1.3881715536117554, "learning_rate": 8.034479927857852e-07, "loss": 0.6013, "step": 6420 }, { "epoch": 0.82, "grad_norm": 1.620413064956665, "learning_rate": 8.023202599321605e-07, "loss": 0.6436, "step": 6421 }, { "epoch": 0.82, "grad_norm": 1.1148796081542969, "learning_rate": 8.011932500435482e-07, "loss": 0.6773, "step": 6422 }, { "epoch": 0.82, "grad_norm": 1.3660540580749512, "learning_rate": 8.000669633140551e-07, "loss": 0.6043, "step": 6423 }, { "epoch": 0.82, "grad_norm": 1.3749091625213623, "learning_rate": 7.989413999376605e-07, "loss": 0.5466, "step": 6424 }, { "epoch": 0.82, "grad_norm": 1.1317570209503174, "learning_rate": 7.978165601082211e-07, "loss": 0.4955, "step": 6425 }, { "epoch": 0.82, "grad_norm": 1.1026514768600464, "learning_rate": 7.966924440194657e-07, "loss": 0.5888, "step": 6426 }, { "epoch": 0.82, "grad_norm": 1.2896405458450317, "learning_rate": 7.95569051865e-07, "loss": 0.5944, "step": 6427 }, { "epoch": 0.82, "grad_norm": 1.2608617544174194, "learning_rate": 7.944463838383093e-07, "loss": 0.5025, "step": 6428 }, { "epoch": 0.82, "grad_norm": 1.1657538414001465, "learning_rate": 7.933244401327472e-07, "loss": 0.6009, "step": 6429 }, { "epoch": 0.82, "grad_norm": 2.1015689373016357, "learning_rate": 7.922032209415459e-07, "loss": 0.6016, "step": 6430 }, { "epoch": 0.82, "grad_norm": 1.2412078380584717, "learning_rate": 7.91082726457813e-07, "loss": 0.5762, "step": 6431 }, { "epoch": 0.82, "grad_norm": 1.2768174409866333, "learning_rate": 7.899629568745327e-07, "loss": 0.5765, "step": 6432 }, { "epoch": 0.82, "grad_norm": 1.256107211112976, "learning_rate": 7.888439123845599e-07, "loss": 0.6115, "step": 6433 }, { "epoch": 0.82, "grad_norm": 2.3772521018981934, "learning_rate": 7.877255931806277e-07, "loss": 0.546, "step": 6434 }, { "epoch": 0.82, "grad_norm": 1.5032480955123901, "learning_rate": 7.866079994553444e-07, "loss": 0.6249, "step": 6435 }, { "epoch": 0.82, "grad_norm": 1.3456975221633911, "learning_rate": 7.854911314011942e-07, "loss": 0.5095, "step": 6436 }, { "epoch": 0.82, "grad_norm": 1.1659880876541138, "learning_rate": 7.843749892105323e-07, "loss": 0.5498, "step": 6437 }, { "epoch": 0.82, "grad_norm": 1.2211744785308838, "learning_rate": 7.832595730755927e-07, "loss": 0.5963, "step": 6438 }, { "epoch": 0.82, "grad_norm": 1.3915050029754639, "learning_rate": 7.821448831884831e-07, "loss": 0.6141, "step": 6439 }, { "epoch": 0.83, "grad_norm": 2.694148302078247, "learning_rate": 7.810309197411875e-07, "loss": 0.6192, "step": 6440 }, { "epoch": 0.83, "grad_norm": 1.6715288162231445, "learning_rate": 7.799176829255612e-07, "loss": 0.6291, "step": 6441 }, { "epoch": 0.83, "grad_norm": 1.2998833656311035, "learning_rate": 7.788051729333373e-07, "loss": 0.6008, "step": 6442 }, { "epoch": 0.83, "grad_norm": 1.360581874847412, "learning_rate": 7.776933899561239e-07, "loss": 0.5631, "step": 6443 }, { "epoch": 0.83, "grad_norm": 1.3779425621032715, "learning_rate": 7.765823341854017e-07, "loss": 0.488, "step": 6444 }, { "epoch": 0.83, "grad_norm": 1.4948135614395142, "learning_rate": 7.754720058125293e-07, "loss": 0.6333, "step": 6445 }, { "epoch": 0.83, "grad_norm": 2.061363935470581, "learning_rate": 7.743624050287363e-07, "loss": 0.532, "step": 6446 }, { "epoch": 0.83, "grad_norm": 1.2560065984725952, "learning_rate": 7.732535320251316e-07, "loss": 0.5488, "step": 6447 }, { "epoch": 0.83, "grad_norm": 1.488149642944336, "learning_rate": 7.721453869926926e-07, "loss": 0.5659, "step": 6448 }, { "epoch": 0.83, "grad_norm": 1.2953224182128906, "learning_rate": 7.710379701222764e-07, "loss": 0.6267, "step": 6449 }, { "epoch": 0.83, "grad_norm": 2.0666239261627197, "learning_rate": 7.699312816046139e-07, "loss": 0.6492, "step": 6450 }, { "epoch": 0.83, "grad_norm": 1.348408579826355, "learning_rate": 7.688253216303082e-07, "loss": 0.5643, "step": 6451 }, { "epoch": 0.83, "grad_norm": 1.6481759548187256, "learning_rate": 7.677200903898386e-07, "loss": 0.6163, "step": 6452 }, { "epoch": 0.83, "grad_norm": 1.656936526298523, "learning_rate": 7.666155880735593e-07, "loss": 0.6407, "step": 6453 }, { "epoch": 0.83, "grad_norm": 1.1746383905410767, "learning_rate": 7.655118148716989e-07, "loss": 0.75, "step": 6454 }, { "epoch": 0.83, "grad_norm": 1.4573662281036377, "learning_rate": 7.644087709743586e-07, "loss": 0.5225, "step": 6455 }, { "epoch": 0.83, "grad_norm": 1.2147821187973022, "learning_rate": 7.633064565715159e-07, "loss": 0.7242, "step": 6456 }, { "epoch": 0.83, "grad_norm": 1.5926049947738647, "learning_rate": 7.622048718530218e-07, "loss": 0.5808, "step": 6457 }, { "epoch": 0.83, "grad_norm": 1.6897194385528564, "learning_rate": 7.611040170086032e-07, "loss": 0.6736, "step": 6458 }, { "epoch": 0.83, "grad_norm": 1.5272244215011597, "learning_rate": 7.60003892227858e-07, "loss": 0.6596, "step": 6459 }, { "epoch": 0.83, "grad_norm": 1.4822163581848145, "learning_rate": 7.589044977002607e-07, "loss": 0.677, "step": 6460 }, { "epoch": 0.83, "grad_norm": 4.653491497039795, "learning_rate": 7.578058336151611e-07, "loss": 0.5817, "step": 6461 }, { "epoch": 0.83, "grad_norm": 1.3933191299438477, "learning_rate": 7.5670790016178e-07, "loss": 0.6101, "step": 6462 }, { "epoch": 0.83, "grad_norm": 1.2626433372497559, "learning_rate": 7.556106975292155e-07, "loss": 0.5215, "step": 6463 }, { "epoch": 0.83, "grad_norm": 1.1545683145523071, "learning_rate": 7.545142259064376e-07, "loss": 0.5581, "step": 6464 }, { "epoch": 0.83, "grad_norm": 1.349684476852417, "learning_rate": 7.534184854822929e-07, "loss": 0.7176, "step": 6465 }, { "epoch": 0.83, "grad_norm": 1.2531545162200928, "learning_rate": 7.523234764454978e-07, "loss": 0.5055, "step": 6466 }, { "epoch": 0.83, "grad_norm": 1.6439186334609985, "learning_rate": 7.512291989846465e-07, "loss": 0.5796, "step": 6467 }, { "epoch": 0.83, "grad_norm": 1.1926203966140747, "learning_rate": 7.501356532882064e-07, "loss": 0.468, "step": 6468 }, { "epoch": 0.83, "grad_norm": 1.6254560947418213, "learning_rate": 7.490428395445198e-07, "loss": 0.6552, "step": 6469 }, { "epoch": 0.83, "grad_norm": 1.2992578744888306, "learning_rate": 7.479507579417989e-07, "loss": 0.648, "step": 6470 }, { "epoch": 0.83, "grad_norm": 1.4693169593811035, "learning_rate": 7.46859408668133e-07, "loss": 0.5684, "step": 6471 }, { "epoch": 0.83, "grad_norm": 1.5550191402435303, "learning_rate": 7.457687919114864e-07, "loss": 0.5953, "step": 6472 }, { "epoch": 0.83, "grad_norm": 1.1891635656356812, "learning_rate": 7.446789078596961e-07, "loss": 0.4812, "step": 6473 }, { "epoch": 0.83, "grad_norm": 1.6289045810699463, "learning_rate": 7.435897567004696e-07, "loss": 0.6572, "step": 6474 }, { "epoch": 0.83, "grad_norm": 1.2836540937423706, "learning_rate": 7.425013386213931e-07, "loss": 0.5618, "step": 6475 }, { "epoch": 0.83, "grad_norm": 2.0774881839752197, "learning_rate": 7.414136538099242e-07, "loss": 0.6091, "step": 6476 }, { "epoch": 0.83, "grad_norm": 1.2852836847305298, "learning_rate": 7.403267024533956e-07, "loss": 0.584, "step": 6477 }, { "epoch": 0.83, "grad_norm": 1.08048677444458, "learning_rate": 7.392404847390089e-07, "loss": 0.7044, "step": 6478 }, { "epoch": 0.83, "grad_norm": 1.3461830615997314, "learning_rate": 7.381550008538468e-07, "loss": 0.6609, "step": 6479 }, { "epoch": 0.83, "grad_norm": 1.3070403337478638, "learning_rate": 7.37070250984862e-07, "loss": 0.5704, "step": 6480 }, { "epoch": 0.83, "grad_norm": 1.1511784791946411, "learning_rate": 7.359862353188774e-07, "loss": 0.6785, "step": 6481 }, { "epoch": 0.83, "grad_norm": 1.5748064517974854, "learning_rate": 7.34902954042595e-07, "loss": 0.5357, "step": 6482 }, { "epoch": 0.83, "grad_norm": 1.5832569599151611, "learning_rate": 7.33820407342587e-07, "loss": 0.6216, "step": 6483 }, { "epoch": 0.83, "grad_norm": 1.5341449975967407, "learning_rate": 7.327385954053023e-07, "loss": 0.6461, "step": 6484 }, { "epoch": 0.83, "grad_norm": 1.2405881881713867, "learning_rate": 7.316575184170577e-07, "loss": 0.6008, "step": 6485 }, { "epoch": 0.83, "grad_norm": 1.6158428192138672, "learning_rate": 7.30577176564048e-07, "loss": 0.6164, "step": 6486 }, { "epoch": 0.83, "grad_norm": 1.4766799211502075, "learning_rate": 7.294975700323404e-07, "loss": 0.5562, "step": 6487 }, { "epoch": 0.83, "grad_norm": 1.7121096849441528, "learning_rate": 7.284186990078767e-07, "loss": 0.5626, "step": 6488 }, { "epoch": 0.83, "grad_norm": 0.9860489964485168, "learning_rate": 7.273405636764675e-07, "loss": 0.6356, "step": 6489 }, { "epoch": 0.83, "grad_norm": 1.6255037784576416, "learning_rate": 7.262631642238011e-07, "loss": 0.5731, "step": 6490 }, { "epoch": 0.83, "grad_norm": 1.4038243293762207, "learning_rate": 7.25186500835438e-07, "loss": 0.6349, "step": 6491 }, { "epoch": 0.83, "grad_norm": 1.1845976114273071, "learning_rate": 7.241105736968124e-07, "loss": 0.5589, "step": 6492 }, { "epoch": 0.83, "grad_norm": 1.3688479661941528, "learning_rate": 7.230353829932285e-07, "loss": 0.6446, "step": 6493 }, { "epoch": 0.83, "grad_norm": 1.30573570728302, "learning_rate": 7.219609289098672e-07, "loss": 0.6097, "step": 6494 }, { "epoch": 0.83, "grad_norm": 1.4633636474609375, "learning_rate": 7.208872116317822e-07, "loss": 0.633, "step": 6495 }, { "epoch": 0.83, "grad_norm": 1.5460313558578491, "learning_rate": 7.198142313438983e-07, "loss": 0.5651, "step": 6496 }, { "epoch": 0.83, "grad_norm": 1.4103299379348755, "learning_rate": 7.187419882310148e-07, "loss": 0.5557, "step": 6497 }, { "epoch": 0.83, "grad_norm": 1.33799147605896, "learning_rate": 7.176704824778052e-07, "loss": 0.5859, "step": 6498 }, { "epoch": 0.83, "grad_norm": 1.6135934591293335, "learning_rate": 7.165997142688124e-07, "loss": 0.5929, "step": 6499 }, { "epoch": 0.83, "grad_norm": 1.3209842443466187, "learning_rate": 7.155296837884557e-07, "loss": 0.6129, "step": 6500 }, { "epoch": 0.83, "grad_norm": 1.2033591270446777, "learning_rate": 7.144603912210257e-07, "loss": 0.6195, "step": 6501 }, { "epoch": 0.83, "grad_norm": 1.1654304265975952, "learning_rate": 7.133918367506876e-07, "loss": 0.5069, "step": 6502 }, { "epoch": 0.83, "grad_norm": 1.3715147972106934, "learning_rate": 7.123240205614756e-07, "loss": 0.5825, "step": 6503 }, { "epoch": 0.83, "grad_norm": 1.4160212278366089, "learning_rate": 7.112569428373012e-07, "loss": 0.5936, "step": 6504 }, { "epoch": 0.83, "grad_norm": 1.3100764751434326, "learning_rate": 7.101906037619466e-07, "loss": 0.5829, "step": 6505 }, { "epoch": 0.83, "grad_norm": 1.678920865058899, "learning_rate": 7.091250035190678e-07, "loss": 0.6412, "step": 6506 }, { "epoch": 0.83, "grad_norm": 1.4690093994140625, "learning_rate": 7.080601422921901e-07, "loss": 0.5738, "step": 6507 }, { "epoch": 0.83, "grad_norm": 1.3475110530853271, "learning_rate": 7.069960202647169e-07, "loss": 0.5587, "step": 6508 }, { "epoch": 0.83, "grad_norm": 1.3803815841674805, "learning_rate": 7.059326376199199e-07, "loss": 0.6277, "step": 6509 }, { "epoch": 0.83, "grad_norm": 1.3090627193450928, "learning_rate": 7.048699945409477e-07, "loss": 0.5846, "step": 6510 }, { "epoch": 0.83, "grad_norm": 1.225256323814392, "learning_rate": 7.038080912108152e-07, "loss": 0.5646, "step": 6511 }, { "epoch": 0.83, "grad_norm": 1.3957878351211548, "learning_rate": 7.027469278124155e-07, "loss": 0.5509, "step": 6512 }, { "epoch": 0.83, "grad_norm": 1.2957148551940918, "learning_rate": 7.016865045285143e-07, "loss": 0.6144, "step": 6513 }, { "epoch": 0.83, "grad_norm": 1.1870754957199097, "learning_rate": 7.00626821541745e-07, "loss": 0.6008, "step": 6514 }, { "epoch": 0.83, "grad_norm": 1.483576774597168, "learning_rate": 6.995678790346178e-07, "loss": 0.6274, "step": 6515 }, { "epoch": 0.83, "grad_norm": 1.2268551588058472, "learning_rate": 6.985096771895139e-07, "loss": 0.5665, "step": 6516 }, { "epoch": 0.83, "grad_norm": 1.424389362335205, "learning_rate": 6.974522161886882e-07, "loss": 0.5598, "step": 6517 }, { "epoch": 0.84, "grad_norm": 1.3461567163467407, "learning_rate": 6.963954962142644e-07, "loss": 0.6131, "step": 6518 }, { "epoch": 0.84, "grad_norm": 1.4819175004959106, "learning_rate": 6.953395174482425e-07, "loss": 0.6464, "step": 6519 }, { "epoch": 0.84, "grad_norm": 1.284936547279358, "learning_rate": 6.94284280072493e-07, "loss": 0.6178, "step": 6520 }, { "epoch": 0.84, "grad_norm": 1.185971736907959, "learning_rate": 6.932297842687607e-07, "loss": 0.6414, "step": 6521 }, { "epoch": 0.84, "grad_norm": 1.4137840270996094, "learning_rate": 6.921760302186587e-07, "loss": 0.6293, "step": 6522 }, { "epoch": 0.84, "grad_norm": 1.347339391708374, "learning_rate": 6.911230181036755e-07, "loss": 0.5987, "step": 6523 }, { "epoch": 0.84, "grad_norm": 1.255189299583435, "learning_rate": 6.90070748105171e-07, "loss": 0.6005, "step": 6524 }, { "epoch": 0.84, "grad_norm": 1.5423235893249512, "learning_rate": 6.890192204043789e-07, "loss": 0.6264, "step": 6525 }, { "epoch": 0.84, "grad_norm": 1.6151106357574463, "learning_rate": 6.879684351824012e-07, "loss": 0.7098, "step": 6526 }, { "epoch": 0.84, "grad_norm": 1.403106451034546, "learning_rate": 6.869183926202149e-07, "loss": 0.5491, "step": 6527 }, { "epoch": 0.84, "grad_norm": 1.432671308517456, "learning_rate": 6.858690928986689e-07, "loss": 0.6377, "step": 6528 }, { "epoch": 0.84, "grad_norm": 1.3803349733352661, "learning_rate": 6.84820536198485e-07, "loss": 0.5884, "step": 6529 }, { "epoch": 0.84, "grad_norm": 1.3720000982284546, "learning_rate": 6.837727227002522e-07, "loss": 0.6173, "step": 6530 }, { "epoch": 0.84, "grad_norm": 1.2312438488006592, "learning_rate": 6.827256525844384e-07, "loss": 0.5887, "step": 6531 }, { "epoch": 0.84, "grad_norm": 1.3636536598205566, "learning_rate": 6.816793260313798e-07, "loss": 0.5294, "step": 6532 }, { "epoch": 0.84, "grad_norm": 1.2737432718276978, "learning_rate": 6.806337432212834e-07, "loss": 0.5848, "step": 6533 }, { "epoch": 0.84, "grad_norm": 1.3652080297470093, "learning_rate": 6.795889043342302e-07, "loss": 0.6028, "step": 6534 }, { "epoch": 0.84, "grad_norm": 1.309290885925293, "learning_rate": 6.785448095501728e-07, "loss": 0.5539, "step": 6535 }, { "epoch": 0.84, "grad_norm": 1.4194649457931519, "learning_rate": 6.775014590489359e-07, "loss": 0.5934, "step": 6536 }, { "epoch": 0.84, "grad_norm": 1.6991386413574219, "learning_rate": 6.76458853010214e-07, "loss": 0.6732, "step": 6537 }, { "epoch": 0.84, "grad_norm": 1.4001078605651855, "learning_rate": 6.75416991613575e-07, "loss": 0.6728, "step": 6538 }, { "epoch": 0.84, "grad_norm": 1.0708200931549072, "learning_rate": 6.743758750384588e-07, "loss": 0.6719, "step": 6539 }, { "epoch": 0.84, "grad_norm": 2.4985439777374268, "learning_rate": 6.733355034641776e-07, "loss": 0.5891, "step": 6540 }, { "epoch": 0.84, "grad_norm": 1.2028065919876099, "learning_rate": 6.722958770699123e-07, "loss": 0.6056, "step": 6541 }, { "epoch": 0.84, "grad_norm": 1.2086281776428223, "learning_rate": 6.712569960347182e-07, "loss": 0.5668, "step": 6542 }, { "epoch": 0.84, "grad_norm": 1.2189396619796753, "learning_rate": 6.702188605375226e-07, "loss": 0.7119, "step": 6543 }, { "epoch": 0.84, "grad_norm": 1.2520568370819092, "learning_rate": 6.691814707571209e-07, "loss": 0.5847, "step": 6544 }, { "epoch": 0.84, "grad_norm": 1.4773744344711304, "learning_rate": 6.681448268721841e-07, "loss": 0.5806, "step": 6545 }, { "epoch": 0.84, "grad_norm": 1.519181728363037, "learning_rate": 6.671089290612526e-07, "loss": 0.5644, "step": 6546 }, { "epoch": 0.84, "grad_norm": 1.489691972732544, "learning_rate": 6.660737775027381e-07, "loss": 0.6373, "step": 6547 }, { "epoch": 0.84, "grad_norm": 1.2815091609954834, "learning_rate": 6.65039372374926e-07, "loss": 0.5611, "step": 6548 }, { "epoch": 0.84, "grad_norm": 1.2832820415496826, "learning_rate": 6.640057138559702e-07, "loss": 0.6027, "step": 6549 }, { "epoch": 0.84, "grad_norm": 1.3316545486450195, "learning_rate": 6.629728021238991e-07, "loss": 0.5918, "step": 6550 }, { "epoch": 0.84, "grad_norm": 1.3692222833633423, "learning_rate": 6.619406373566079e-07, "loss": 0.6008, "step": 6551 }, { "epoch": 0.84, "grad_norm": 1.561578392982483, "learning_rate": 6.609092197318678e-07, "loss": 0.5702, "step": 6552 }, { "epoch": 0.84, "grad_norm": 1.8879069089889526, "learning_rate": 6.598785494273197e-07, "loss": 0.5556, "step": 6553 }, { "epoch": 0.84, "grad_norm": 1.5902771949768066, "learning_rate": 6.588486266204758e-07, "loss": 0.6338, "step": 6554 }, { "epoch": 0.84, "grad_norm": 1.5381138324737549, "learning_rate": 6.578194514887176e-07, "loss": 0.5805, "step": 6555 }, { "epoch": 0.84, "grad_norm": 1.417212963104248, "learning_rate": 6.567910242093012e-07, "loss": 0.5621, "step": 6556 }, { "epoch": 0.84, "grad_norm": 1.422182321548462, "learning_rate": 6.557633449593515e-07, "loss": 0.516, "step": 6557 }, { "epoch": 0.84, "grad_norm": 1.2522732019424438, "learning_rate": 6.547364139158674e-07, "loss": 0.6348, "step": 6558 }, { "epoch": 0.84, "grad_norm": 1.4380911588668823, "learning_rate": 6.537102312557137e-07, "loss": 0.6165, "step": 6559 }, { "epoch": 0.84, "grad_norm": 1.3114643096923828, "learning_rate": 6.52684797155631e-07, "loss": 0.5736, "step": 6560 }, { "epoch": 0.84, "grad_norm": 1.367963433265686, "learning_rate": 6.516601117922295e-07, "loss": 0.5807, "step": 6561 }, { "epoch": 0.84, "grad_norm": 1.5235767364501953, "learning_rate": 6.506361753419916e-07, "loss": 0.6142, "step": 6562 }, { "epoch": 0.84, "grad_norm": 2.021899700164795, "learning_rate": 6.496129879812673e-07, "loss": 0.5628, "step": 6563 }, { "epoch": 0.84, "grad_norm": 1.5203709602355957, "learning_rate": 6.485905498862799e-07, "loss": 0.6086, "step": 6564 }, { "epoch": 0.84, "grad_norm": 1.330996036529541, "learning_rate": 6.475688612331265e-07, "loss": 0.5845, "step": 6565 }, { "epoch": 0.84, "grad_norm": 1.3727091550827026, "learning_rate": 6.465479221977694e-07, "loss": 0.596, "step": 6566 }, { "epoch": 0.84, "grad_norm": 1.5101768970489502, "learning_rate": 6.455277329560456e-07, "loss": 0.5664, "step": 6567 }, { "epoch": 0.84, "grad_norm": 1.3237074613571167, "learning_rate": 6.445082936836616e-07, "loss": 0.6286, "step": 6568 }, { "epoch": 0.84, "grad_norm": 1.626060962677002, "learning_rate": 6.434896045561967e-07, "loss": 0.5562, "step": 6569 }, { "epoch": 0.84, "grad_norm": 1.2659837007522583, "learning_rate": 6.424716657490965e-07, "loss": 0.5489, "step": 6570 }, { "epoch": 0.84, "grad_norm": 1.7719166278839111, "learning_rate": 6.414544774376819e-07, "loss": 0.6006, "step": 6571 }, { "epoch": 0.84, "grad_norm": 1.414229393005371, "learning_rate": 6.404380397971432e-07, "loss": 0.5606, "step": 6572 }, { "epoch": 0.84, "grad_norm": 1.48387610912323, "learning_rate": 6.394223530025418e-07, "loss": 0.5362, "step": 6573 }, { "epoch": 0.84, "grad_norm": 1.4343132972717285, "learning_rate": 6.384074172288068e-07, "loss": 0.6049, "step": 6574 }, { "epoch": 0.84, "grad_norm": 1.396812081336975, "learning_rate": 6.373932326507415e-07, "loss": 0.5969, "step": 6575 }, { "epoch": 0.84, "grad_norm": 1.5124541521072388, "learning_rate": 6.363797994430182e-07, "loss": 0.6075, "step": 6576 }, { "epoch": 0.84, "grad_norm": 1.2993794679641724, "learning_rate": 6.353671177801824e-07, "loss": 0.6173, "step": 6577 }, { "epoch": 0.84, "grad_norm": 1.5706703662872314, "learning_rate": 6.343551878366444e-07, "loss": 0.6263, "step": 6578 }, { "epoch": 0.84, "grad_norm": 1.3450409173965454, "learning_rate": 6.333440097866905e-07, "loss": 0.6767, "step": 6579 }, { "epoch": 0.84, "grad_norm": 1.1720281839370728, "learning_rate": 6.323335838044753e-07, "loss": 0.5835, "step": 6580 }, { "epoch": 0.84, "grad_norm": 1.2327227592468262, "learning_rate": 6.31323910064024e-07, "loss": 0.7767, "step": 6581 }, { "epoch": 0.84, "grad_norm": 1.5149428844451904, "learning_rate": 6.303149887392329e-07, "loss": 0.6144, "step": 6582 }, { "epoch": 0.84, "grad_norm": 1.06614089012146, "learning_rate": 6.293068200038677e-07, "loss": 0.6909, "step": 6583 }, { "epoch": 0.84, "grad_norm": 1.6291388273239136, "learning_rate": 6.28299404031566e-07, "loss": 0.6292, "step": 6584 }, { "epoch": 0.84, "grad_norm": 1.6113803386688232, "learning_rate": 6.272927409958323e-07, "loss": 0.6151, "step": 6585 }, { "epoch": 0.84, "grad_norm": 1.4304330348968506, "learning_rate": 6.262868310700459e-07, "loss": 0.6134, "step": 6586 }, { "epoch": 0.84, "grad_norm": 1.3153834342956543, "learning_rate": 6.252816744274542e-07, "loss": 0.6329, "step": 6587 }, { "epoch": 0.84, "grad_norm": 1.266677737236023, "learning_rate": 6.242772712411754e-07, "loss": 0.5337, "step": 6588 }, { "epoch": 0.84, "grad_norm": 0.8882852792739868, "learning_rate": 6.232736216841956e-07, "loss": 0.6046, "step": 6589 }, { "epoch": 0.84, "grad_norm": 1.5751484632492065, "learning_rate": 6.222707259293742e-07, "loss": 0.59, "step": 6590 }, { "epoch": 0.84, "grad_norm": 1.3150429725646973, "learning_rate": 6.212685841494392e-07, "loss": 0.6032, "step": 6591 }, { "epoch": 0.84, "grad_norm": 1.3497982025146484, "learning_rate": 6.202671965169909e-07, "loss": 0.6491, "step": 6592 }, { "epoch": 0.84, "grad_norm": 1.6363470554351807, "learning_rate": 6.192665632044959e-07, "loss": 0.6138, "step": 6593 }, { "epoch": 0.84, "grad_norm": 1.4757647514343262, "learning_rate": 6.182666843842933e-07, "loss": 0.6082, "step": 6594 }, { "epoch": 0.84, "grad_norm": 1.5103785991668701, "learning_rate": 6.172675602285933e-07, "loss": 0.6074, "step": 6595 }, { "epoch": 0.85, "grad_norm": 1.3814870119094849, "learning_rate": 6.162691909094726e-07, "loss": 0.6029, "step": 6596 }, { "epoch": 0.85, "grad_norm": 1.476153016090393, "learning_rate": 6.152715765988815e-07, "loss": 0.6853, "step": 6597 }, { "epoch": 0.85, "grad_norm": 1.8093366622924805, "learning_rate": 6.142747174686381e-07, "loss": 0.5474, "step": 6598 }, { "epoch": 0.85, "grad_norm": 1.3409085273742676, "learning_rate": 6.132786136904312e-07, "loss": 0.6649, "step": 6599 }, { "epoch": 0.85, "grad_norm": 2.276571750640869, "learning_rate": 6.122832654358196e-07, "loss": 0.5591, "step": 6600 }, { "epoch": 0.85, "grad_norm": 1.5459884405136108, "learning_rate": 6.112886728762324e-07, "loss": 0.6567, "step": 6601 }, { "epoch": 0.85, "grad_norm": 1.4320536851882935, "learning_rate": 6.10294836182968e-07, "loss": 0.6105, "step": 6602 }, { "epoch": 0.85, "grad_norm": 0.9960110783576965, "learning_rate": 6.093017555271935e-07, "loss": 0.6775, "step": 6603 }, { "epoch": 0.85, "grad_norm": 1.5592219829559326, "learning_rate": 6.08309431079947e-07, "loss": 0.6259, "step": 6604 }, { "epoch": 0.85, "grad_norm": 1.4093371629714966, "learning_rate": 6.073178630121363e-07, "loss": 0.6485, "step": 6605 }, { "epoch": 0.85, "grad_norm": 1.1359326839447021, "learning_rate": 6.063270514945402e-07, "loss": 0.5621, "step": 6606 }, { "epoch": 0.85, "grad_norm": 1.2226295471191406, "learning_rate": 6.05336996697804e-07, "loss": 0.5737, "step": 6607 }, { "epoch": 0.85, "grad_norm": 1.3089041709899902, "learning_rate": 6.043476987924452e-07, "loss": 0.5701, "step": 6608 }, { "epoch": 0.85, "grad_norm": 1.5044931173324585, "learning_rate": 6.033591579488501e-07, "loss": 0.7168, "step": 6609 }, { "epoch": 0.85, "grad_norm": 1.2217152118682861, "learning_rate": 6.023713743372761e-07, "loss": 0.5639, "step": 6610 }, { "epoch": 0.85, "grad_norm": 1.4304804801940918, "learning_rate": 6.01384348127847e-07, "loss": 0.6051, "step": 6611 }, { "epoch": 0.85, "grad_norm": 1.248270869255066, "learning_rate": 6.003980794905584e-07, "loss": 0.5676, "step": 6612 }, { "epoch": 0.85, "grad_norm": 1.317533016204834, "learning_rate": 5.994125685952757e-07, "loss": 0.5705, "step": 6613 }, { "epoch": 0.85, "grad_norm": 1.586898922920227, "learning_rate": 5.984278156117335e-07, "loss": 0.6188, "step": 6614 }, { "epoch": 0.85, "grad_norm": 1.4527963399887085, "learning_rate": 5.974438207095328e-07, "loss": 0.5467, "step": 6615 }, { "epoch": 0.85, "grad_norm": 1.3087095022201538, "learning_rate": 5.964605840581494e-07, "loss": 0.5782, "step": 6616 }, { "epoch": 0.85, "grad_norm": 1.2765275239944458, "learning_rate": 5.954781058269265e-07, "loss": 0.5261, "step": 6617 }, { "epoch": 0.85, "grad_norm": 1.2288613319396973, "learning_rate": 5.944963861850738e-07, "loss": 0.6082, "step": 6618 }, { "epoch": 0.85, "grad_norm": 1.3720436096191406, "learning_rate": 5.935154253016729e-07, "loss": 0.6148, "step": 6619 }, { "epoch": 0.85, "grad_norm": 1.081778883934021, "learning_rate": 5.925352233456749e-07, "loss": 0.5368, "step": 6620 }, { "epoch": 0.85, "grad_norm": 1.3894524574279785, "learning_rate": 5.915557804859013e-07, "loss": 0.5739, "step": 6621 }, { "epoch": 0.85, "grad_norm": 1.4773305654525757, "learning_rate": 5.905770968910379e-07, "loss": 0.6667, "step": 6622 }, { "epoch": 0.85, "grad_norm": 1.2038147449493408, "learning_rate": 5.895991727296447e-07, "loss": 0.5568, "step": 6623 }, { "epoch": 0.85, "grad_norm": 1.198391318321228, "learning_rate": 5.886220081701494e-07, "loss": 0.6375, "step": 6624 }, { "epoch": 0.85, "grad_norm": 1.1705589294433594, "learning_rate": 5.876456033808498e-07, "loss": 0.5212, "step": 6625 }, { "epoch": 0.85, "grad_norm": 1.425979733467102, "learning_rate": 5.86669958529909e-07, "loss": 0.6197, "step": 6626 }, { "epoch": 0.85, "grad_norm": 1.5084619522094727, "learning_rate": 5.85695073785364e-07, "loss": 0.6776, "step": 6627 }, { "epoch": 0.85, "grad_norm": 1.321138858795166, "learning_rate": 5.847209493151185e-07, "loss": 0.6305, "step": 6628 }, { "epoch": 0.85, "grad_norm": 1.3939682245254517, "learning_rate": 5.837475852869462e-07, "loss": 0.6287, "step": 6629 }, { "epoch": 0.85, "grad_norm": 6.342435836791992, "learning_rate": 5.82774981868488e-07, "loss": 0.5973, "step": 6630 }, { "epoch": 0.85, "grad_norm": 1.4819163084030151, "learning_rate": 5.818031392272555e-07, "loss": 0.6643, "step": 6631 }, { "epoch": 0.85, "grad_norm": 1.5554163455963135, "learning_rate": 5.808320575306292e-07, "loss": 0.6145, "step": 6632 }, { "epoch": 0.85, "grad_norm": 1.2572414875030518, "learning_rate": 5.798617369458581e-07, "loss": 0.6317, "step": 6633 }, { "epoch": 0.85, "grad_norm": 1.3168387413024902, "learning_rate": 5.788921776400597e-07, "loss": 0.6413, "step": 6634 }, { "epoch": 0.85, "grad_norm": 1.401464819908142, "learning_rate": 5.77923379780222e-07, "loss": 0.6708, "step": 6635 }, { "epoch": 0.85, "grad_norm": 1.3466784954071045, "learning_rate": 5.769553435332009e-07, "loss": 0.5765, "step": 6636 }, { "epoch": 0.85, "grad_norm": 1.2899616956710815, "learning_rate": 5.759880690657188e-07, "loss": 0.5885, "step": 6637 }, { "epoch": 0.85, "grad_norm": 1.3985425233840942, "learning_rate": 5.750215565443707e-07, "loss": 0.5156, "step": 6638 }, { "epoch": 0.85, "grad_norm": 1.468558669090271, "learning_rate": 5.740558061356183e-07, "loss": 0.6191, "step": 6639 }, { "epoch": 0.85, "grad_norm": 1.5268968343734741, "learning_rate": 5.730908180057937e-07, "loss": 0.5815, "step": 6640 }, { "epoch": 0.85, "grad_norm": 2.1540591716766357, "learning_rate": 5.721265923210944e-07, "loss": 0.578, "step": 6641 }, { "epoch": 0.85, "grad_norm": 1.5741490125656128, "learning_rate": 5.711631292475894e-07, "loss": 0.5626, "step": 6642 }, { "epoch": 0.85, "grad_norm": 2.359459638595581, "learning_rate": 5.702004289512175e-07, "loss": 0.6464, "step": 6643 }, { "epoch": 0.85, "grad_norm": 1.2629036903381348, "learning_rate": 5.692384915977811e-07, "loss": 0.6634, "step": 6644 }, { "epoch": 0.85, "grad_norm": 1.1385648250579834, "learning_rate": 5.682773173529565e-07, "loss": 0.6298, "step": 6645 }, { "epoch": 0.85, "grad_norm": 1.4421610832214355, "learning_rate": 5.673169063822853e-07, "loss": 0.5755, "step": 6646 }, { "epoch": 0.85, "grad_norm": 1.2397493124008179, "learning_rate": 5.663572588511806e-07, "loss": 0.5909, "step": 6647 }, { "epoch": 0.85, "grad_norm": 1.226677656173706, "learning_rate": 5.653983749249198e-07, "loss": 0.6041, "step": 6648 }, { "epoch": 0.85, "grad_norm": 1.2758342027664185, "learning_rate": 5.644402547686518e-07, "loss": 0.6877, "step": 6649 }, { "epoch": 0.85, "grad_norm": 1.6766910552978516, "learning_rate": 5.63482898547395e-07, "loss": 0.6041, "step": 6650 }, { "epoch": 0.85, "grad_norm": 1.5834779739379883, "learning_rate": 5.625263064260328e-07, "loss": 0.5752, "step": 6651 }, { "epoch": 0.85, "grad_norm": 2.2436914443969727, "learning_rate": 5.615704785693193e-07, "loss": 0.6544, "step": 6652 }, { "epoch": 0.85, "grad_norm": 1.5408368110656738, "learning_rate": 5.606154151418763e-07, "loss": 0.6088, "step": 6653 }, { "epoch": 0.85, "grad_norm": 1.2919232845306396, "learning_rate": 5.596611163081949e-07, "loss": 0.5976, "step": 6654 }, { "epoch": 0.85, "grad_norm": 1.246882438659668, "learning_rate": 5.587075822326326e-07, "loss": 0.5889, "step": 6655 }, { "epoch": 0.85, "grad_norm": 1.1017459630966187, "learning_rate": 5.577548130794164e-07, "loss": 0.5616, "step": 6656 }, { "epoch": 0.85, "grad_norm": 1.254428505897522, "learning_rate": 5.568028090126415e-07, "loss": 0.5466, "step": 6657 }, { "epoch": 0.85, "grad_norm": 1.4765057563781738, "learning_rate": 5.558515701962725e-07, "loss": 0.609, "step": 6658 }, { "epoch": 0.85, "grad_norm": 1.194210171699524, "learning_rate": 5.549010967941387e-07, "loss": 0.5604, "step": 6659 }, { "epoch": 0.85, "grad_norm": 1.3843317031860352, "learning_rate": 5.539513889699411e-07, "loss": 0.5851, "step": 6660 }, { "epoch": 0.85, "grad_norm": 1.2867571115493774, "learning_rate": 5.530024468872474e-07, "loss": 0.6045, "step": 6661 }, { "epoch": 0.85, "grad_norm": 1.4414231777191162, "learning_rate": 5.520542707094945e-07, "loss": 0.5784, "step": 6662 }, { "epoch": 0.85, "grad_norm": 1.4258451461791992, "learning_rate": 5.511068605999848e-07, "loss": 0.6573, "step": 6663 }, { "epoch": 0.85, "grad_norm": 1.4315848350524902, "learning_rate": 5.501602167218912e-07, "loss": 0.5564, "step": 6664 }, { "epoch": 0.85, "grad_norm": 1.1932504177093506, "learning_rate": 5.492143392382537e-07, "loss": 0.5155, "step": 6665 }, { "epoch": 0.85, "grad_norm": 1.22544527053833, "learning_rate": 5.482692283119817e-07, "loss": 0.5858, "step": 6666 }, { "epoch": 0.85, "grad_norm": 1.4700120687484741, "learning_rate": 5.473248841058487e-07, "loss": 0.6701, "step": 6667 }, { "epoch": 0.85, "grad_norm": 1.4346530437469482, "learning_rate": 5.463813067825008e-07, "loss": 0.647, "step": 6668 }, { "epoch": 0.85, "grad_norm": 2.5285699367523193, "learning_rate": 5.454384965044512e-07, "loss": 0.5553, "step": 6669 }, { "epoch": 0.85, "grad_norm": 5.045205593109131, "learning_rate": 5.444964534340768e-07, "loss": 0.5509, "step": 6670 }, { "epoch": 0.85, "grad_norm": 1.4717965126037598, "learning_rate": 5.435551777336273e-07, "loss": 0.6316, "step": 6671 }, { "epoch": 0.85, "grad_norm": 1.3239675760269165, "learning_rate": 5.426146695652173e-07, "loss": 0.5174, "step": 6672 }, { "epoch": 0.85, "grad_norm": 1.4008818864822388, "learning_rate": 5.416749290908324e-07, "loss": 0.5849, "step": 6673 }, { "epoch": 0.86, "grad_norm": 1.3032972812652588, "learning_rate": 5.407359564723202e-07, "loss": 0.6331, "step": 6674 }, { "epoch": 0.86, "grad_norm": 1.2396972179412842, "learning_rate": 5.397977518714026e-07, "loss": 0.5312, "step": 6675 }, { "epoch": 0.86, "grad_norm": 1.1935837268829346, "learning_rate": 5.388603154496647e-07, "loss": 0.5171, "step": 6676 }, { "epoch": 0.86, "grad_norm": 1.5318385362625122, "learning_rate": 5.379236473685623e-07, "loss": 0.567, "step": 6677 }, { "epoch": 0.86, "grad_norm": 1.2079846858978271, "learning_rate": 5.369877477894154e-07, "loss": 0.7553, "step": 6678 }, { "epoch": 0.86, "grad_norm": 1.4537231922149658, "learning_rate": 5.360526168734154e-07, "loss": 0.556, "step": 6679 }, { "epoch": 0.86, "grad_norm": 1.2296948432922363, "learning_rate": 5.351182547816186e-07, "loss": 0.5602, "step": 6680 }, { "epoch": 0.86, "grad_norm": 1.3176974058151245, "learning_rate": 5.341846616749513e-07, "loss": 0.6225, "step": 6681 }, { "epoch": 0.86, "grad_norm": 1.0236971378326416, "learning_rate": 5.332518377142043e-07, "loss": 0.6221, "step": 6682 }, { "epoch": 0.86, "grad_norm": 1.3097838163375854, "learning_rate": 5.32319783060038e-07, "loss": 0.4918, "step": 6683 }, { "epoch": 0.86, "grad_norm": 1.4100576639175415, "learning_rate": 5.3138849787298e-07, "loss": 0.6335, "step": 6684 }, { "epoch": 0.86, "grad_norm": 1.343997836112976, "learning_rate": 5.304579823134254e-07, "loss": 0.5864, "step": 6685 }, { "epoch": 0.86, "grad_norm": 1.2703300714492798, "learning_rate": 5.295282365416365e-07, "loss": 0.5433, "step": 6686 }, { "epoch": 0.86, "grad_norm": 1.460349440574646, "learning_rate": 5.285992607177432e-07, "loss": 0.6216, "step": 6687 }, { "epoch": 0.86, "grad_norm": 1.8459396362304688, "learning_rate": 5.276710550017433e-07, "loss": 0.6032, "step": 6688 }, { "epoch": 0.86, "grad_norm": 1.2343941926956177, "learning_rate": 5.267436195535003e-07, "loss": 0.7502, "step": 6689 }, { "epoch": 0.86, "grad_norm": 1.4660110473632812, "learning_rate": 5.258169545327462e-07, "loss": 0.561, "step": 6690 }, { "epoch": 0.86, "grad_norm": 1.122137188911438, "learning_rate": 5.248910600990814e-07, "loss": 0.5409, "step": 6691 }, { "epoch": 0.86, "grad_norm": 1.3979350328445435, "learning_rate": 5.239659364119703e-07, "loss": 0.6101, "step": 6692 }, { "epoch": 0.86, "grad_norm": 1.6155316829681396, "learning_rate": 5.230415836307485e-07, "loss": 0.6467, "step": 6693 }, { "epoch": 0.86, "grad_norm": 1.6388894319534302, "learning_rate": 5.22118001914616e-07, "loss": 0.6412, "step": 6694 }, { "epoch": 0.86, "grad_norm": 1.4391796588897705, "learning_rate": 5.21195191422642e-07, "loss": 0.5887, "step": 6695 }, { "epoch": 0.86, "grad_norm": 1.4248244762420654, "learning_rate": 5.202731523137605e-07, "loss": 0.6395, "step": 6696 }, { "epoch": 0.86, "grad_norm": 1.8664724826812744, "learning_rate": 5.193518847467749e-07, "loss": 0.5536, "step": 6697 }, { "epoch": 0.86, "grad_norm": 1.5709271430969238, "learning_rate": 5.184313888803544e-07, "loss": 0.6439, "step": 6698 }, { "epoch": 0.86, "grad_norm": 1.2865567207336426, "learning_rate": 5.175116648730366e-07, "loss": 0.6003, "step": 6699 }, { "epoch": 0.86, "grad_norm": 1.3031384944915771, "learning_rate": 5.165927128832238e-07, "loss": 0.6648, "step": 6700 }, { "epoch": 0.86, "grad_norm": 1.2907569408416748, "learning_rate": 5.156745330691871e-07, "loss": 0.7093, "step": 6701 }, { "epoch": 0.86, "grad_norm": 1.391937017440796, "learning_rate": 5.14757125589066e-07, "loss": 0.6502, "step": 6702 }, { "epoch": 0.86, "grad_norm": 1.4120694398880005, "learning_rate": 5.138404906008631e-07, "loss": 0.5444, "step": 6703 }, { "epoch": 0.86, "grad_norm": 1.370375156402588, "learning_rate": 5.129246282624511e-07, "loss": 0.5462, "step": 6704 }, { "epoch": 0.86, "grad_norm": 1.3214823007583618, "learning_rate": 5.120095387315688e-07, "loss": 0.6326, "step": 6705 }, { "epoch": 0.86, "grad_norm": 1.4014390707015991, "learning_rate": 5.110952221658228e-07, "loss": 0.5686, "step": 6706 }, { "epoch": 0.86, "grad_norm": 1.2138499021530151, "learning_rate": 5.101816787226832e-07, "loss": 0.5041, "step": 6707 }, { "epoch": 0.86, "grad_norm": 1.6902697086334229, "learning_rate": 5.092689085594904e-07, "loss": 0.5416, "step": 6708 }, { "epoch": 0.86, "grad_norm": 1.6033650636672974, "learning_rate": 5.083569118334508e-07, "loss": 0.6395, "step": 6709 }, { "epoch": 0.86, "grad_norm": 1.4746758937835693, "learning_rate": 5.074456887016382e-07, "loss": 0.5835, "step": 6710 }, { "epoch": 0.86, "grad_norm": 1.268338918685913, "learning_rate": 5.065352393209899e-07, "loss": 0.5185, "step": 6711 }, { "epoch": 0.86, "grad_norm": 2.275320291519165, "learning_rate": 5.056255638483137e-07, "loss": 0.6441, "step": 6712 }, { "epoch": 0.86, "grad_norm": 1.6120179891586304, "learning_rate": 5.047166624402822e-07, "loss": 0.582, "step": 6713 }, { "epoch": 0.86, "grad_norm": 1.3864774703979492, "learning_rate": 5.038085352534367e-07, "loss": 0.542, "step": 6714 }, { "epoch": 0.86, "grad_norm": 1.0285900831222534, "learning_rate": 5.029011824441821e-07, "loss": 0.5212, "step": 6715 }, { "epoch": 0.86, "grad_norm": 1.4503127336502075, "learning_rate": 5.019946041687911e-07, "loss": 0.5739, "step": 6716 }, { "epoch": 0.86, "grad_norm": 1.4805421829223633, "learning_rate": 5.010888005834047e-07, "loss": 0.558, "step": 6717 }, { "epoch": 0.86, "grad_norm": 1.2787694931030273, "learning_rate": 5.001837718440283e-07, "loss": 0.5401, "step": 6718 }, { "epoch": 0.86, "grad_norm": 1.7053654193878174, "learning_rate": 4.992795181065347e-07, "loss": 0.711, "step": 6719 }, { "epoch": 0.86, "grad_norm": 1.1280864477157593, "learning_rate": 4.98376039526664e-07, "loss": 0.5233, "step": 6720 }, { "epoch": 0.86, "grad_norm": 1.7056137323379517, "learning_rate": 4.974733362600225e-07, "loss": 0.6195, "step": 6721 }, { "epoch": 0.86, "grad_norm": 1.1686879396438599, "learning_rate": 4.965714084620804e-07, "loss": 0.6126, "step": 6722 }, { "epoch": 0.86, "grad_norm": 1.1632872819900513, "learning_rate": 4.956702562881783e-07, "loss": 0.5181, "step": 6723 }, { "epoch": 0.86, "grad_norm": 1.4708881378173828, "learning_rate": 4.947698798935196e-07, "loss": 0.6074, "step": 6724 }, { "epoch": 0.86, "grad_norm": 1.4640129804611206, "learning_rate": 4.938702794331785e-07, "loss": 0.5477, "step": 6725 }, { "epoch": 0.86, "grad_norm": 1.3392497301101685, "learning_rate": 4.929714550620901e-07, "loss": 0.5741, "step": 6726 }, { "epoch": 0.86, "grad_norm": 1.190356969833374, "learning_rate": 4.920734069350597e-07, "loss": 0.5884, "step": 6727 }, { "epoch": 0.86, "grad_norm": 1.3186179399490356, "learning_rate": 4.911761352067579e-07, "loss": 0.6421, "step": 6728 }, { "epoch": 0.86, "grad_norm": 1.3501472473144531, "learning_rate": 4.902796400317228e-07, "loss": 0.6491, "step": 6729 }, { "epoch": 0.86, "grad_norm": 1.5455275774002075, "learning_rate": 4.893839215643553e-07, "loss": 0.6143, "step": 6730 }, { "epoch": 0.86, "grad_norm": 1.5439045429229736, "learning_rate": 4.884889799589254e-07, "loss": 0.5792, "step": 6731 }, { "epoch": 0.86, "grad_norm": 1.2626060247421265, "learning_rate": 4.875948153695687e-07, "loss": 0.5075, "step": 6732 }, { "epoch": 0.86, "grad_norm": 1.0654497146606445, "learning_rate": 4.86701427950288e-07, "loss": 0.5002, "step": 6733 }, { "epoch": 0.86, "grad_norm": 1.3448207378387451, "learning_rate": 4.858088178549492e-07, "loss": 0.5399, "step": 6734 }, { "epoch": 0.86, "grad_norm": 1.4263006448745728, "learning_rate": 4.849169852372864e-07, "loss": 0.6053, "step": 6735 }, { "epoch": 0.86, "grad_norm": 1.585645079612732, "learning_rate": 4.84025930250902e-07, "loss": 0.6796, "step": 6736 }, { "epoch": 0.86, "grad_norm": 1.15276038646698, "learning_rate": 4.831356530492598e-07, "loss": 0.583, "step": 6737 }, { "epoch": 0.86, "grad_norm": 1.6426639556884766, "learning_rate": 4.822461537856927e-07, "loss": 0.546, "step": 6738 }, { "epoch": 0.86, "grad_norm": 1.0492527484893799, "learning_rate": 4.813574326133985e-07, "loss": 0.6961, "step": 6739 }, { "epoch": 0.86, "grad_norm": 1.360402226448059, "learning_rate": 4.804694896854434e-07, "loss": 0.6052, "step": 6740 }, { "epoch": 0.86, "grad_norm": 1.158557415008545, "learning_rate": 4.795823251547544e-07, "loss": 0.7179, "step": 6741 }, { "epoch": 0.86, "grad_norm": 3.3589587211608887, "learning_rate": 4.786959391741286e-07, "loss": 0.5791, "step": 6742 }, { "epoch": 0.86, "grad_norm": 1.7391221523284912, "learning_rate": 4.778103318962296e-07, "loss": 0.606, "step": 6743 }, { "epoch": 0.86, "grad_norm": 1.5412747859954834, "learning_rate": 4.769255034735831e-07, "loss": 0.6217, "step": 6744 }, { "epoch": 0.86, "grad_norm": 1.2919334173202515, "learning_rate": 4.760414540585839e-07, "loss": 0.6057, "step": 6745 }, { "epoch": 0.86, "grad_norm": 1.3662687540054321, "learning_rate": 4.7515818380349074e-07, "loss": 0.7293, "step": 6746 }, { "epoch": 0.86, "grad_norm": 1.417550802230835, "learning_rate": 4.7427569286043086e-07, "loss": 0.6307, "step": 6747 }, { "epoch": 0.86, "grad_norm": 1.5828347206115723, "learning_rate": 4.733939813813931e-07, "loss": 0.5875, "step": 6748 }, { "epoch": 0.86, "grad_norm": 1.5326488018035889, "learning_rate": 4.725130495182356e-07, "loss": 0.5814, "step": 6749 }, { "epoch": 0.86, "grad_norm": 1.4294366836547852, "learning_rate": 4.716328974226808e-07, "loss": 0.5564, "step": 6750 }, { "epoch": 0.86, "grad_norm": 1.306718111038208, "learning_rate": 4.707535252463175e-07, "loss": 0.5206, "step": 6751 }, { "epoch": 0.87, "grad_norm": 1.2117685079574585, "learning_rate": 4.6987493314059716e-07, "loss": 0.516, "step": 6752 }, { "epoch": 0.87, "grad_norm": 1.3194915056228638, "learning_rate": 4.689971212568428e-07, "loss": 0.6414, "step": 6753 }, { "epoch": 0.87, "grad_norm": 1.456396222114563, "learning_rate": 4.6812008974623845e-07, "loss": 0.6447, "step": 6754 }, { "epoch": 0.87, "grad_norm": 1.3069777488708496, "learning_rate": 4.672438387598344e-07, "loss": 0.6064, "step": 6755 }, { "epoch": 0.87, "grad_norm": 1.897487759590149, "learning_rate": 4.6636836844854706e-07, "loss": 0.5244, "step": 6756 }, { "epoch": 0.87, "grad_norm": 1.408294916152954, "learning_rate": 4.6549367896315923e-07, "loss": 0.5875, "step": 6757 }, { "epoch": 0.87, "grad_norm": 1.1629550457000732, "learning_rate": 4.646197704543187e-07, "loss": 0.7416, "step": 6758 }, { "epoch": 0.87, "grad_norm": 1.1940720081329346, "learning_rate": 4.6374664307253625e-07, "loss": 0.5178, "step": 6759 }, { "epoch": 0.87, "grad_norm": 1.6622729301452637, "learning_rate": 4.628742969681921e-07, "loss": 0.6646, "step": 6760 }, { "epoch": 0.87, "grad_norm": 2.3263518810272217, "learning_rate": 4.6200273229152994e-07, "loss": 0.5751, "step": 6761 }, { "epoch": 0.87, "grad_norm": 1.7037662267684937, "learning_rate": 4.611319491926597e-07, "loss": 0.6177, "step": 6762 }, { "epoch": 0.87, "grad_norm": 1.4251806735992432, "learning_rate": 4.602619478215542e-07, "loss": 0.7138, "step": 6763 }, { "epoch": 0.87, "grad_norm": 1.6694972515106201, "learning_rate": 4.593927283280547e-07, "loss": 0.6646, "step": 6764 }, { "epoch": 0.87, "grad_norm": 1.3548812866210938, "learning_rate": 4.5852429086186646e-07, "loss": 0.5704, "step": 6765 }, { "epoch": 0.87, "grad_norm": 1.6709963083267212, "learning_rate": 4.576566355725609e-07, "loss": 0.7045, "step": 6766 }, { "epoch": 0.87, "grad_norm": 1.4117552042007446, "learning_rate": 4.5678976260957243e-07, "loss": 0.5623, "step": 6767 }, { "epoch": 0.87, "grad_norm": 1.4504401683807373, "learning_rate": 4.5592367212220324e-07, "loss": 0.6227, "step": 6768 }, { "epoch": 0.87, "grad_norm": 1.266184687614441, "learning_rate": 4.5505836425961956e-07, "loss": 0.5448, "step": 6769 }, { "epoch": 0.87, "grad_norm": 1.2798473834991455, "learning_rate": 4.541938391708539e-07, "loss": 0.5534, "step": 6770 }, { "epoch": 0.87, "grad_norm": 1.5648812055587769, "learning_rate": 4.533300970048016e-07, "loss": 0.6557, "step": 6771 }, { "epoch": 0.87, "grad_norm": 1.3745602369308472, "learning_rate": 4.5246713791022633e-07, "loss": 0.6003, "step": 6772 }, { "epoch": 0.87, "grad_norm": 1.2130157947540283, "learning_rate": 4.516049620357549e-07, "loss": 0.7218, "step": 6773 }, { "epoch": 0.87, "grad_norm": 1.1825891733169556, "learning_rate": 4.507435695298784e-07, "loss": 0.5922, "step": 6774 }, { "epoch": 0.87, "grad_norm": 1.352432131767273, "learning_rate": 4.4988296054095494e-07, "loss": 0.5628, "step": 6775 }, { "epoch": 0.87, "grad_norm": 1.6166142225265503, "learning_rate": 4.4902313521720696e-07, "loss": 0.634, "step": 6776 }, { "epoch": 0.87, "grad_norm": 1.435806155204773, "learning_rate": 4.4816409370672277e-07, "loss": 0.5729, "step": 6777 }, { "epoch": 0.87, "grad_norm": 1.5084896087646484, "learning_rate": 4.473058361574534e-07, "loss": 0.6539, "step": 6778 }, { "epoch": 0.87, "grad_norm": 1.4006537199020386, "learning_rate": 4.4644836271721617e-07, "loss": 0.6189, "step": 6779 }, { "epoch": 0.87, "grad_norm": 1.4999135732650757, "learning_rate": 4.455916735336946e-07, "loss": 0.6958, "step": 6780 }, { "epoch": 0.87, "grad_norm": 1.1975650787353516, "learning_rate": 4.4473576875443626e-07, "loss": 0.6147, "step": 6781 }, { "epoch": 0.87, "grad_norm": 1.477052927017212, "learning_rate": 4.438806485268515e-07, "loss": 0.5882, "step": 6782 }, { "epoch": 0.87, "grad_norm": 1.4066824913024902, "learning_rate": 4.430263129982182e-07, "loss": 0.5717, "step": 6783 }, { "epoch": 0.87, "grad_norm": 1.330893635749817, "learning_rate": 4.421727623156796e-07, "loss": 0.6369, "step": 6784 }, { "epoch": 0.87, "grad_norm": 1.5608009099960327, "learning_rate": 4.41319996626241e-07, "loss": 0.5731, "step": 6785 }, { "epoch": 0.87, "grad_norm": 1.482822299003601, "learning_rate": 4.404680160767727e-07, "loss": 0.633, "step": 6786 }, { "epoch": 0.87, "grad_norm": 1.300209879875183, "learning_rate": 4.3961682081401393e-07, "loss": 0.615, "step": 6787 }, { "epoch": 0.87, "grad_norm": 1.7004002332687378, "learning_rate": 4.3876641098456574e-07, "loss": 0.5625, "step": 6788 }, { "epoch": 0.87, "grad_norm": 2.4036426544189453, "learning_rate": 4.379167867348916e-07, "loss": 0.6332, "step": 6789 }, { "epoch": 0.87, "grad_norm": 2.266597032546997, "learning_rate": 4.370679482113227e-07, "loss": 0.6741, "step": 6790 }, { "epoch": 0.87, "grad_norm": 1.4129186868667603, "learning_rate": 4.362198955600561e-07, "loss": 0.6334, "step": 6791 }, { "epoch": 0.87, "grad_norm": 1.2967772483825684, "learning_rate": 4.3537262892714926e-07, "loss": 0.7026, "step": 6792 }, { "epoch": 0.87, "grad_norm": 1.3166394233703613, "learning_rate": 4.345261484585273e-07, "loss": 0.5694, "step": 6793 }, { "epoch": 0.87, "grad_norm": 1.5502707958221436, "learning_rate": 4.3368045429997953e-07, "loss": 0.5651, "step": 6794 }, { "epoch": 0.87, "grad_norm": 1.5218241214752197, "learning_rate": 4.3283554659716066e-07, "loss": 0.6583, "step": 6795 }, { "epoch": 0.87, "grad_norm": 1.3967206478118896, "learning_rate": 4.3199142549558704e-07, "loss": 0.5456, "step": 6796 }, { "epoch": 0.87, "grad_norm": 1.2347934246063232, "learning_rate": 4.3114809114064183e-07, "loss": 0.5805, "step": 6797 }, { "epoch": 0.87, "grad_norm": 1.6478691101074219, "learning_rate": 4.303055436775727e-07, "loss": 0.5988, "step": 6798 }, { "epoch": 0.87, "grad_norm": 2.3431971073150635, "learning_rate": 4.2946378325149196e-07, "loss": 0.5429, "step": 6799 }, { "epoch": 0.87, "grad_norm": 1.5060168504714966, "learning_rate": 4.286228100073742e-07, "loss": 0.5957, "step": 6800 }, { "epoch": 0.87, "grad_norm": 1.3222039937973022, "learning_rate": 4.2778262409006064e-07, "loss": 0.5746, "step": 6801 }, { "epoch": 0.87, "grad_norm": 1.2762765884399414, "learning_rate": 4.2694322564425626e-07, "loss": 0.5936, "step": 6802 }, { "epoch": 0.87, "grad_norm": 1.3855468034744263, "learning_rate": 4.261046148145315e-07, "loss": 0.6254, "step": 6803 }, { "epoch": 0.87, "grad_norm": 1.342413306236267, "learning_rate": 4.2526679174531737e-07, "loss": 0.6038, "step": 6804 }, { "epoch": 0.87, "grad_norm": 1.30988609790802, "learning_rate": 4.244297565809136e-07, "loss": 0.5995, "step": 6805 }, { "epoch": 0.87, "grad_norm": 1.4815443754196167, "learning_rate": 4.235935094654836e-07, "loss": 0.6256, "step": 6806 }, { "epoch": 0.87, "grad_norm": 1.2580639123916626, "learning_rate": 4.2275805054305175e-07, "loss": 0.5472, "step": 6807 }, { "epoch": 0.87, "grad_norm": 1.2893187999725342, "learning_rate": 4.219233799575101e-07, "loss": 0.6253, "step": 6808 }, { "epoch": 0.87, "grad_norm": 1.2105703353881836, "learning_rate": 4.210894978526131e-07, "loss": 0.5719, "step": 6809 }, { "epoch": 0.87, "grad_norm": 1.4485310316085815, "learning_rate": 4.202564043719809e-07, "loss": 0.6083, "step": 6810 }, { "epoch": 0.87, "grad_norm": 1.28269624710083, "learning_rate": 4.194240996590959e-07, "loss": 0.5364, "step": 6811 }, { "epoch": 0.87, "grad_norm": 1.2909283638000488, "learning_rate": 4.185925838573057e-07, "loss": 0.6054, "step": 6812 }, { "epoch": 0.87, "grad_norm": 1.3592783212661743, "learning_rate": 4.1776185710982244e-07, "loss": 0.6188, "step": 6813 }, { "epoch": 0.87, "grad_norm": 1.3486402034759521, "learning_rate": 4.169319195597227e-07, "loss": 0.6326, "step": 6814 }, { "epoch": 0.87, "grad_norm": 1.3511604070663452, "learning_rate": 4.161027713499444e-07, "loss": 0.6333, "step": 6815 }, { "epoch": 0.87, "grad_norm": 1.4477179050445557, "learning_rate": 4.152744126232927e-07, "loss": 0.5499, "step": 6816 }, { "epoch": 0.87, "grad_norm": 1.2189664840698242, "learning_rate": 4.1444684352243525e-07, "loss": 0.5236, "step": 6817 }, { "epoch": 0.87, "grad_norm": 1.2309170961380005, "learning_rate": 4.136200641899052e-07, "loss": 0.5513, "step": 6818 }, { "epoch": 0.87, "grad_norm": 2.251579761505127, "learning_rate": 4.12794074768097e-07, "loss": 0.5862, "step": 6819 }, { "epoch": 0.87, "grad_norm": 1.47301185131073, "learning_rate": 4.119688753992707e-07, "loss": 0.5965, "step": 6820 }, { "epoch": 0.87, "grad_norm": 1.6270514726638794, "learning_rate": 4.111444662255498e-07, "loss": 0.5907, "step": 6821 }, { "epoch": 0.87, "grad_norm": 1.4491541385650635, "learning_rate": 4.103208473889231e-07, "loss": 0.519, "step": 6822 }, { "epoch": 0.87, "grad_norm": 1.2944505214691162, "learning_rate": 4.09498019031242e-07, "loss": 0.6209, "step": 6823 }, { "epoch": 0.87, "grad_norm": 1.4557186365127563, "learning_rate": 4.0867598129422146e-07, "loss": 0.6311, "step": 6824 }, { "epoch": 0.87, "grad_norm": 1.2103979587554932, "learning_rate": 4.0785473431944165e-07, "loss": 0.5814, "step": 6825 }, { "epoch": 0.87, "grad_norm": 1.428792953491211, "learning_rate": 4.070342782483444e-07, "loss": 0.5717, "step": 6826 }, { "epoch": 0.87, "grad_norm": 1.3763489723205566, "learning_rate": 4.062146132222372e-07, "loss": 0.551, "step": 6827 }, { "epoch": 0.87, "grad_norm": 1.3854894638061523, "learning_rate": 4.053957393822905e-07, "loss": 0.5466, "step": 6828 }, { "epoch": 0.87, "grad_norm": 1.4803504943847656, "learning_rate": 4.045776568695398e-07, "loss": 0.6583, "step": 6829 }, { "epoch": 0.88, "grad_norm": 1.1337047815322876, "learning_rate": 4.037603658248812e-07, "loss": 0.6015, "step": 6830 }, { "epoch": 0.88, "grad_norm": 1.2642403841018677, "learning_rate": 4.029438663890778e-07, "loss": 0.6461, "step": 6831 }, { "epoch": 0.88, "grad_norm": 1.5304993391036987, "learning_rate": 4.021281587027548e-07, "loss": 0.509, "step": 6832 }, { "epoch": 0.88, "grad_norm": 1.3939540386199951, "learning_rate": 4.0131324290640206e-07, "loss": 0.5702, "step": 6833 }, { "epoch": 0.88, "grad_norm": 1.4129104614257812, "learning_rate": 4.0049911914037067e-07, "loss": 0.6004, "step": 6834 }, { "epoch": 0.88, "grad_norm": 1.4573888778686523, "learning_rate": 3.9968578754487784e-07, "loss": 0.6195, "step": 6835 }, { "epoch": 0.88, "grad_norm": 1.4409502744674683, "learning_rate": 3.988732482600033e-07, "loss": 0.556, "step": 6836 }, { "epoch": 0.88, "grad_norm": 1.4219567775726318, "learning_rate": 3.9806150142569e-07, "loss": 0.6177, "step": 6837 }, { "epoch": 0.88, "grad_norm": 1.3022994995117188, "learning_rate": 3.972505471817445e-07, "loss": 0.5414, "step": 6838 }, { "epoch": 0.88, "grad_norm": 1.4800045490264893, "learning_rate": 3.9644038566783946e-07, "loss": 0.6664, "step": 6839 }, { "epoch": 0.88, "grad_norm": 1.3413270711898804, "learning_rate": 3.9563101702350616e-07, "loss": 0.5806, "step": 6840 }, { "epoch": 0.88, "grad_norm": 1.4288663864135742, "learning_rate": 3.9482244138814295e-07, "loss": 0.5744, "step": 6841 }, { "epoch": 0.88, "grad_norm": 1.5452122688293457, "learning_rate": 3.940146589010108e-07, "loss": 0.5657, "step": 6842 }, { "epoch": 0.88, "grad_norm": 1.9402995109558105, "learning_rate": 3.9320766970123383e-07, "loss": 0.5839, "step": 6843 }, { "epoch": 0.88, "grad_norm": 1.1517301797866821, "learning_rate": 3.924014739277987e-07, "loss": 0.5033, "step": 6844 }, { "epoch": 0.88, "grad_norm": 1.396001935005188, "learning_rate": 3.915960717195566e-07, "loss": 0.6478, "step": 6845 }, { "epoch": 0.88, "grad_norm": 1.4981609582901, "learning_rate": 3.907914632152215e-07, "loss": 0.602, "step": 6846 }, { "epoch": 0.88, "grad_norm": 1.3975633382797241, "learning_rate": 3.8998764855337266e-07, "loss": 0.5397, "step": 6847 }, { "epoch": 0.88, "grad_norm": 1.1856523752212524, "learning_rate": 3.8918462787244817e-07, "loss": 0.5272, "step": 6848 }, { "epoch": 0.88, "grad_norm": 1.2696727514266968, "learning_rate": 3.8838240131075343e-07, "loss": 0.5655, "step": 6849 }, { "epoch": 0.88, "grad_norm": 1.603298306465149, "learning_rate": 3.8758096900645524e-07, "loss": 0.522, "step": 6850 }, { "epoch": 0.88, "grad_norm": 1.3075599670410156, "learning_rate": 3.867803310975854e-07, "loss": 0.5532, "step": 6851 }, { "epoch": 0.88, "grad_norm": 1.4688489437103271, "learning_rate": 3.859804877220352e-07, "loss": 0.5474, "step": 6852 }, { "epoch": 0.88, "grad_norm": 1.316911220550537, "learning_rate": 3.851814390175623e-07, "loss": 0.6715, "step": 6853 }, { "epoch": 0.88, "grad_norm": 1.143972635269165, "learning_rate": 3.843831851217872e-07, "loss": 0.7013, "step": 6854 }, { "epoch": 0.88, "grad_norm": 1.1683162450790405, "learning_rate": 3.835857261721926e-07, "loss": 0.5927, "step": 6855 }, { "epoch": 0.88, "grad_norm": 1.6495614051818848, "learning_rate": 3.827890623061242e-07, "loss": 0.6061, "step": 6856 }, { "epoch": 0.88, "grad_norm": 1.4121828079223633, "learning_rate": 3.8199319366079177e-07, "loss": 0.5579, "step": 6857 }, { "epoch": 0.88, "grad_norm": 1.4523359537124634, "learning_rate": 3.811981203732684e-07, "loss": 0.5648, "step": 6858 }, { "epoch": 0.88, "grad_norm": 1.28249990940094, "learning_rate": 3.8040384258048677e-07, "loss": 0.6971, "step": 6859 }, { "epoch": 0.88, "grad_norm": 1.5575233697891235, "learning_rate": 3.7961036041924635e-07, "loss": 0.5897, "step": 6860 }, { "epoch": 0.88, "grad_norm": 1.302139163017273, "learning_rate": 3.788176740262089e-07, "loss": 0.6636, "step": 6861 }, { "epoch": 0.88, "grad_norm": 1.1842310428619385, "learning_rate": 3.7802578353789864e-07, "loss": 0.5368, "step": 6862 }, { "epoch": 0.88, "grad_norm": 1.4447256326675415, "learning_rate": 3.7723468909070136e-07, "loss": 0.5266, "step": 6863 }, { "epoch": 0.88, "grad_norm": 1.4478634595870972, "learning_rate": 3.764443908208676e-07, "loss": 0.5914, "step": 6864 }, { "epoch": 0.88, "grad_norm": 1.580125093460083, "learning_rate": 3.7565488886451004e-07, "loss": 0.5411, "step": 6865 }, { "epoch": 0.88, "grad_norm": 1.5011149644851685, "learning_rate": 3.748661833576056e-07, "loss": 0.6147, "step": 6866 }, { "epoch": 0.88, "grad_norm": 1.561859369277954, "learning_rate": 3.740782744359911e-07, "loss": 0.6672, "step": 6867 }, { "epoch": 0.88, "grad_norm": 1.3884916305541992, "learning_rate": 3.7329116223536797e-07, "loss": 0.6525, "step": 6868 }, { "epoch": 0.88, "grad_norm": 1.4261103868484497, "learning_rate": 3.7250484689130115e-07, "loss": 0.5723, "step": 6869 }, { "epoch": 0.88, "grad_norm": 1.4831106662750244, "learning_rate": 3.717193285392179e-07, "loss": 0.5859, "step": 6870 }, { "epoch": 0.88, "grad_norm": 1.6929248571395874, "learning_rate": 3.70934607314406e-07, "loss": 0.6201, "step": 6871 }, { "epoch": 0.88, "grad_norm": 1.2150589227676392, "learning_rate": 3.70150683352018e-07, "loss": 0.608, "step": 6872 }, { "epoch": 0.88, "grad_norm": 1.2132083177566528, "learning_rate": 3.693675567870714e-07, "loss": 0.5384, "step": 6873 }, { "epoch": 0.88, "grad_norm": 1.33145010471344, "learning_rate": 3.685852277544405e-07, "loss": 0.6047, "step": 6874 }, { "epoch": 0.88, "grad_norm": 1.2606712579727173, "learning_rate": 3.678036963888676e-07, "loss": 0.6038, "step": 6875 }, { "epoch": 0.88, "grad_norm": 1.2835811376571655, "learning_rate": 3.670229628249555e-07, "loss": 0.5235, "step": 6876 }, { "epoch": 0.88, "grad_norm": 1.5742946863174438, "learning_rate": 3.662430271971695e-07, "loss": 0.654, "step": 6877 }, { "epoch": 0.88, "grad_norm": 1.2673202753067017, "learning_rate": 3.6546388963983716e-07, "loss": 0.529, "step": 6878 }, { "epoch": 0.88, "grad_norm": 1.531229019165039, "learning_rate": 3.646855502871488e-07, "loss": 0.5374, "step": 6879 }, { "epoch": 0.88, "grad_norm": 1.3460601568222046, "learning_rate": 3.639080092731584e-07, "loss": 0.5097, "step": 6880 }, { "epoch": 0.88, "grad_norm": 1.3324980735778809, "learning_rate": 3.6313126673178213e-07, "loss": 0.547, "step": 6881 }, { "epoch": 0.88, "grad_norm": 1.4404997825622559, "learning_rate": 3.623553227967963e-07, "loss": 0.5873, "step": 6882 }, { "epoch": 0.88, "grad_norm": 1.44928777217865, "learning_rate": 3.6158017760184237e-07, "loss": 0.5764, "step": 6883 }, { "epoch": 0.88, "grad_norm": 1.1881428956985474, "learning_rate": 3.608058312804247e-07, "loss": 0.6388, "step": 6884 }, { "epoch": 0.88, "grad_norm": 1.5132038593292236, "learning_rate": 3.600322839659065e-07, "loss": 0.5741, "step": 6885 }, { "epoch": 0.88, "grad_norm": 2.413456439971924, "learning_rate": 3.592595357915163e-07, "loss": 0.6382, "step": 6886 }, { "epoch": 0.88, "grad_norm": 1.2886861562728882, "learning_rate": 3.584875868903448e-07, "loss": 0.5972, "step": 6887 }, { "epoch": 0.88, "grad_norm": 1.2081990242004395, "learning_rate": 3.577164373953446e-07, "loss": 0.5572, "step": 6888 }, { "epoch": 0.88, "grad_norm": 1.4857827425003052, "learning_rate": 3.569460874393288e-07, "loss": 0.5606, "step": 6889 }, { "epoch": 0.88, "grad_norm": 1.3529541492462158, "learning_rate": 3.561765371549769e-07, "loss": 0.5588, "step": 6890 }, { "epoch": 0.88, "grad_norm": 1.1445674896240234, "learning_rate": 3.554077866748279e-07, "loss": 0.5352, "step": 6891 }, { "epoch": 0.88, "grad_norm": 1.4179437160491943, "learning_rate": 3.5463983613128136e-07, "loss": 0.5433, "step": 6892 }, { "epoch": 0.88, "grad_norm": 1.1696901321411133, "learning_rate": 3.5387268565660324e-07, "loss": 0.5855, "step": 6893 }, { "epoch": 0.88, "grad_norm": 1.259205937385559, "learning_rate": 3.5310633538291894e-07, "loss": 0.5802, "step": 6894 }, { "epoch": 0.88, "grad_norm": 1.2908961772918701, "learning_rate": 3.523407854422173e-07, "loss": 0.6712, "step": 6895 }, { "epoch": 0.88, "grad_norm": 1.3714510202407837, "learning_rate": 3.5157603596634727e-07, "loss": 0.6665, "step": 6896 }, { "epoch": 0.88, "grad_norm": 1.2138036489486694, "learning_rate": 3.508120870870224e-07, "loss": 0.6244, "step": 6897 }, { "epoch": 0.88, "grad_norm": 1.4784094095230103, "learning_rate": 3.5004893893581746e-07, "loss": 0.6091, "step": 6898 }, { "epoch": 0.88, "grad_norm": 1.4526443481445312, "learning_rate": 3.4928659164416956e-07, "loss": 0.6336, "step": 6899 }, { "epoch": 0.88, "grad_norm": 1.4262062311172485, "learning_rate": 3.485250453433764e-07, "loss": 0.5957, "step": 6900 }, { "epoch": 0.88, "grad_norm": 1.7060856819152832, "learning_rate": 3.4776430016459917e-07, "loss": 0.5559, "step": 6901 }, { "epoch": 0.88, "grad_norm": 1.9785739183425903, "learning_rate": 3.4700435623886143e-07, "loss": 0.6409, "step": 6902 }, { "epoch": 0.88, "grad_norm": 1.693240761756897, "learning_rate": 3.462452136970479e-07, "loss": 0.6472, "step": 6903 }, { "epoch": 0.88, "grad_norm": 1.6329549551010132, "learning_rate": 3.4548687266990453e-07, "loss": 0.5792, "step": 6904 }, { "epoch": 0.88, "grad_norm": 1.6811611652374268, "learning_rate": 3.4472933328804134e-07, "loss": 0.5417, "step": 6905 }, { "epoch": 0.88, "grad_norm": 1.2126712799072266, "learning_rate": 3.439725956819284e-07, "loss": 0.5035, "step": 6906 }, { "epoch": 0.88, "grad_norm": 1.4418469667434692, "learning_rate": 3.4321665998189814e-07, "loss": 0.5954, "step": 6907 }, { "epoch": 0.89, "grad_norm": 1.128082275390625, "learning_rate": 3.424615263181458e-07, "loss": 0.5779, "step": 6908 }, { "epoch": 0.89, "grad_norm": 1.4310845136642456, "learning_rate": 3.417071948207273e-07, "loss": 0.505, "step": 6909 }, { "epoch": 0.89, "grad_norm": 1.5503251552581787, "learning_rate": 3.409536656195628e-07, "loss": 0.6519, "step": 6910 }, { "epoch": 0.89, "grad_norm": 1.8439539670944214, "learning_rate": 3.4020093884442885e-07, "loss": 0.5995, "step": 6911 }, { "epoch": 0.89, "grad_norm": 1.237624168395996, "learning_rate": 3.3944901462497014e-07, "loss": 0.4917, "step": 6912 }, { "epoch": 0.89, "grad_norm": 1.3327163457870483, "learning_rate": 3.3869789309068866e-07, "loss": 0.5526, "step": 6913 }, { "epoch": 0.89, "grad_norm": 1.702526330947876, "learning_rate": 3.3794757437095206e-07, "loss": 0.6228, "step": 6914 }, { "epoch": 0.89, "grad_norm": 1.312321424484253, "learning_rate": 3.3719805859498466e-07, "loss": 0.6137, "step": 6915 }, { "epoch": 0.89, "grad_norm": 1.5142812728881836, "learning_rate": 3.364493458918772e-07, "loss": 0.5638, "step": 6916 }, { "epoch": 0.89, "grad_norm": 1.370383858680725, "learning_rate": 3.3570143639057916e-07, "loss": 0.5413, "step": 6917 }, { "epoch": 0.89, "grad_norm": 1.4188882112503052, "learning_rate": 3.349543302199043e-07, "loss": 0.5455, "step": 6918 }, { "epoch": 0.89, "grad_norm": 1.3100755214691162, "learning_rate": 3.342080275085252e-07, "loss": 0.6094, "step": 6919 }, { "epoch": 0.89, "grad_norm": 1.6879796981811523, "learning_rate": 3.3346252838497683e-07, "loss": 0.6627, "step": 6920 }, { "epoch": 0.89, "grad_norm": 1.2003105878829956, "learning_rate": 3.327178329776576e-07, "loss": 0.6109, "step": 6921 }, { "epoch": 0.89, "grad_norm": 1.4838978052139282, "learning_rate": 3.31973941414826e-07, "loss": 0.5857, "step": 6922 }, { "epoch": 0.89, "grad_norm": 1.5608856678009033, "learning_rate": 3.312308538246006e-07, "loss": 0.6024, "step": 6923 }, { "epoch": 0.89, "grad_norm": 2.857037305831909, "learning_rate": 3.3048857033496473e-07, "loss": 0.5997, "step": 6924 }, { "epoch": 0.89, "grad_norm": 1.4345684051513672, "learning_rate": 3.2974709107376215e-07, "loss": 0.6384, "step": 6925 }, { "epoch": 0.89, "grad_norm": 1.6866978406906128, "learning_rate": 3.290064161686962e-07, "loss": 0.5539, "step": 6926 }, { "epoch": 0.89, "grad_norm": 1.3979156017303467, "learning_rate": 3.282665457473333e-07, "loss": 0.6002, "step": 6927 }, { "epoch": 0.89, "grad_norm": 1.108978033065796, "learning_rate": 3.2752747993710144e-07, "loss": 0.7341, "step": 6928 }, { "epoch": 0.89, "grad_norm": 1.3866137266159058, "learning_rate": 3.267892188652905e-07, "loss": 0.6452, "step": 6929 }, { "epoch": 0.89, "grad_norm": 1.1434195041656494, "learning_rate": 3.2605176265904925e-07, "loss": 0.6161, "step": 6930 }, { "epoch": 0.89, "grad_norm": 1.0713568925857544, "learning_rate": 3.253151114453901e-07, "loss": 0.6877, "step": 6931 }, { "epoch": 0.89, "grad_norm": 1.3612641096115112, "learning_rate": 3.245792653511876e-07, "loss": 0.5224, "step": 6932 }, { "epoch": 0.89, "grad_norm": 1.527736783027649, "learning_rate": 3.238442245031742e-07, "loss": 0.6014, "step": 6933 }, { "epoch": 0.89, "grad_norm": 1.1294968128204346, "learning_rate": 3.2310998902794653e-07, "loss": 0.7599, "step": 6934 }, { "epoch": 0.89, "grad_norm": 1.4158496856689453, "learning_rate": 3.223765590519623e-07, "loss": 0.5286, "step": 6935 }, { "epoch": 0.89, "grad_norm": 1.530761957168579, "learning_rate": 3.216439347015399e-07, "loss": 0.5673, "step": 6936 }, { "epoch": 0.89, "grad_norm": 1.185805320739746, "learning_rate": 3.209121161028583e-07, "loss": 0.5242, "step": 6937 }, { "epoch": 0.89, "grad_norm": 1.3359084129333496, "learning_rate": 3.201811033819585e-07, "loss": 0.5777, "step": 6938 }, { "epoch": 0.89, "grad_norm": 2.525627613067627, "learning_rate": 3.194508966647425e-07, "loss": 0.5719, "step": 6939 }, { "epoch": 0.89, "grad_norm": 1.422498345375061, "learning_rate": 3.1872149607697466e-07, "loss": 0.5689, "step": 6940 }, { "epoch": 0.89, "grad_norm": 1.4468578100204468, "learning_rate": 3.179929017442773e-07, "loss": 0.5871, "step": 6941 }, { "epoch": 0.89, "grad_norm": 1.309499979019165, "learning_rate": 3.1726511379213784e-07, "loss": 0.6147, "step": 6942 }, { "epoch": 0.89, "grad_norm": 2.29915189743042, "learning_rate": 3.1653813234590327e-07, "loss": 0.6435, "step": 6943 }, { "epoch": 0.89, "grad_norm": 1.3363467454910278, "learning_rate": 3.1581195753078e-07, "loss": 0.5975, "step": 6944 }, { "epoch": 0.89, "grad_norm": 1.5139838457107544, "learning_rate": 3.150865894718369e-07, "loss": 0.6698, "step": 6945 }, { "epoch": 0.89, "grad_norm": 1.194031834602356, "learning_rate": 3.143620282940046e-07, "loss": 0.5815, "step": 6946 }, { "epoch": 0.89, "grad_norm": 1.3560861349105835, "learning_rate": 3.136382741220745e-07, "loss": 0.4936, "step": 6947 }, { "epoch": 0.89, "grad_norm": 1.2448467016220093, "learning_rate": 3.1291532708069727e-07, "loss": 0.575, "step": 6948 }, { "epoch": 0.89, "grad_norm": 1.4894511699676514, "learning_rate": 3.121931872943862e-07, "loss": 0.6252, "step": 6949 }, { "epoch": 0.89, "grad_norm": 1.2867622375488281, "learning_rate": 3.1147185488751506e-07, "loss": 0.641, "step": 6950 }, { "epoch": 0.89, "grad_norm": 1.371701955795288, "learning_rate": 3.107513299843201e-07, "loss": 0.6279, "step": 6951 }, { "epoch": 0.89, "grad_norm": 1.264610767364502, "learning_rate": 3.100316127088954e-07, "loss": 0.6091, "step": 6952 }, { "epoch": 0.89, "grad_norm": 1.2578670978546143, "learning_rate": 3.093127031851978e-07, "loss": 0.5467, "step": 6953 }, { "epoch": 0.89, "grad_norm": 1.3644834756851196, "learning_rate": 3.0859460153704557e-07, "loss": 0.5687, "step": 6954 }, { "epoch": 0.89, "grad_norm": 1.1062878370285034, "learning_rate": 3.0787730788811807e-07, "loss": 0.5197, "step": 6955 }, { "epoch": 0.89, "grad_norm": 1.430008888244629, "learning_rate": 3.0716082236195213e-07, "loss": 0.5242, "step": 6956 }, { "epoch": 0.89, "grad_norm": 1.4670532941818237, "learning_rate": 3.064451450819489e-07, "loss": 0.5455, "step": 6957 }, { "epoch": 0.89, "grad_norm": 1.1277669668197632, "learning_rate": 3.057302761713693e-07, "loss": 0.7229, "step": 6958 }, { "epoch": 0.89, "grad_norm": 1.744103193283081, "learning_rate": 3.050162157533354e-07, "loss": 0.5868, "step": 6959 }, { "epoch": 0.89, "grad_norm": 1.5596626996994019, "learning_rate": 3.0430296395082883e-07, "loss": 0.643, "step": 6960 }, { "epoch": 0.89, "grad_norm": 1.3399211168289185, "learning_rate": 3.035905208866935e-07, "loss": 0.6114, "step": 6961 }, { "epoch": 0.89, "grad_norm": 2.7940516471862793, "learning_rate": 3.028788866836335e-07, "loss": 0.6472, "step": 6962 }, { "epoch": 0.89, "grad_norm": 1.4219404458999634, "learning_rate": 3.02168061464212e-07, "loss": 0.5366, "step": 6963 }, { "epoch": 0.89, "grad_norm": 1.567854881286621, "learning_rate": 3.0145804535085476e-07, "loss": 0.5771, "step": 6964 }, { "epoch": 0.89, "grad_norm": 1.4092823266983032, "learning_rate": 3.00748838465848e-07, "loss": 0.5056, "step": 6965 }, { "epoch": 0.89, "grad_norm": 1.33809494972229, "learning_rate": 3.0004044093133844e-07, "loss": 0.5536, "step": 6966 }, { "epoch": 0.89, "grad_norm": 1.3685873746871948, "learning_rate": 2.9933285286933177e-07, "loss": 0.6435, "step": 6967 }, { "epoch": 0.89, "grad_norm": 1.6785255670547485, "learning_rate": 2.9862607440169724e-07, "loss": 0.5749, "step": 6968 }, { "epoch": 0.89, "grad_norm": 1.203216314315796, "learning_rate": 2.979201056501618e-07, "loss": 0.5402, "step": 6969 }, { "epoch": 0.89, "grad_norm": 1.2036579847335815, "learning_rate": 2.972149467363161e-07, "loss": 0.5646, "step": 6970 }, { "epoch": 0.89, "grad_norm": 1.28584623336792, "learning_rate": 2.9651059778160685e-07, "loss": 0.6238, "step": 6971 }, { "epoch": 0.89, "grad_norm": 1.2839908599853516, "learning_rate": 2.958070589073453e-07, "loss": 0.6049, "step": 6972 }, { "epoch": 0.89, "grad_norm": 1.457728624343872, "learning_rate": 2.9510433023470174e-07, "loss": 0.5392, "step": 6973 }, { "epoch": 0.89, "grad_norm": 1.3485701084136963, "learning_rate": 2.9440241188470717e-07, "loss": 0.6339, "step": 6974 }, { "epoch": 0.89, "grad_norm": 1.3882330656051636, "learning_rate": 2.93701303978251e-07, "loss": 0.561, "step": 6975 }, { "epoch": 0.89, "grad_norm": 1.0906554460525513, "learning_rate": 2.930010066360872e-07, "loss": 0.5579, "step": 6976 }, { "epoch": 0.89, "grad_norm": 1.292925238609314, "learning_rate": 2.9230151997882703e-07, "loss": 0.5328, "step": 6977 }, { "epoch": 0.89, "grad_norm": 1.570992350578308, "learning_rate": 2.9160284412694195e-07, "loss": 0.5696, "step": 6978 }, { "epoch": 0.89, "grad_norm": 1.475880742073059, "learning_rate": 2.909049792007651e-07, "loss": 0.6416, "step": 6979 }, { "epoch": 0.89, "grad_norm": 1.4461641311645508, "learning_rate": 2.902079253204898e-07, "loss": 0.577, "step": 6980 }, { "epoch": 0.89, "grad_norm": 1.4062169790267944, "learning_rate": 2.8951168260617004e-07, "loss": 0.5309, "step": 6981 }, { "epoch": 0.89, "grad_norm": 1.2815173864364624, "learning_rate": 2.888162511777176e-07, "loss": 0.6191, "step": 6982 }, { "epoch": 0.89, "grad_norm": 1.3789962530136108, "learning_rate": 2.881216311549079e-07, "loss": 0.5828, "step": 6983 }, { "epoch": 0.89, "grad_norm": 1.1321500539779663, "learning_rate": 2.8742782265737514e-07, "loss": 0.7302, "step": 6984 }, { "epoch": 0.89, "grad_norm": 1.0742570161819458, "learning_rate": 2.8673482580461264e-07, "loss": 0.5523, "step": 6985 }, { "epoch": 0.9, "grad_norm": 1.1808911561965942, "learning_rate": 2.8604264071597607e-07, "loss": 0.5198, "step": 6986 }, { "epoch": 0.9, "grad_norm": 1.1899702548980713, "learning_rate": 2.8535126751067954e-07, "loss": 0.7027, "step": 6987 }, { "epoch": 0.9, "grad_norm": 1.4160841703414917, "learning_rate": 2.846607063077994e-07, "loss": 0.624, "step": 6988 }, { "epoch": 0.9, "grad_norm": 1.3612381219863892, "learning_rate": 2.8397095722626833e-07, "loss": 0.6166, "step": 6989 }, { "epoch": 0.9, "grad_norm": 1.779995083808899, "learning_rate": 2.832820203848835e-07, "loss": 0.6595, "step": 6990 }, { "epoch": 0.9, "grad_norm": 1.5072723627090454, "learning_rate": 2.8259389590230003e-07, "loss": 0.6803, "step": 6991 }, { "epoch": 0.9, "grad_norm": 1.4251368045806885, "learning_rate": 2.8190658389703304e-07, "loss": 0.5835, "step": 6992 }, { "epoch": 0.9, "grad_norm": 1.4748808145523071, "learning_rate": 2.8122008448745795e-07, "loss": 0.5532, "step": 6993 }, { "epoch": 0.9, "grad_norm": 1.4076902866363525, "learning_rate": 2.805343977918101e-07, "loss": 0.5721, "step": 6994 }, { "epoch": 0.9, "grad_norm": 1.2788536548614502, "learning_rate": 2.798495239281868e-07, "loss": 0.5865, "step": 6995 }, { "epoch": 0.9, "grad_norm": 1.204859972000122, "learning_rate": 2.7916546301454185e-07, "loss": 0.5728, "step": 6996 }, { "epoch": 0.9, "grad_norm": 1.7112232446670532, "learning_rate": 2.7848221516869114e-07, "loss": 0.6384, "step": 6997 }, { "epoch": 0.9, "grad_norm": 1.2741093635559082, "learning_rate": 2.7779978050830993e-07, "loss": 0.61, "step": 6998 }, { "epoch": 0.9, "grad_norm": 1.2119300365447998, "learning_rate": 2.771181591509353e-07, "loss": 0.6146, "step": 6999 }, { "epoch": 0.9, "grad_norm": 1.110952615737915, "learning_rate": 2.764373512139612e-07, "loss": 0.6863, "step": 7000 }, { "epoch": 0.9, "grad_norm": 1.2552884817123413, "learning_rate": 2.757573568146432e-07, "loss": 0.5632, "step": 7001 }, { "epoch": 0.9, "grad_norm": 1.9371869564056396, "learning_rate": 2.750781760700966e-07, "loss": 0.6369, "step": 7002 }, { "epoch": 0.9, "grad_norm": 3.263620138168335, "learning_rate": 2.7439980909729716e-07, "loss": 0.6031, "step": 7003 }, { "epoch": 0.9, "grad_norm": 1.5019482374191284, "learning_rate": 2.737222560130787e-07, "loss": 0.5737, "step": 7004 }, { "epoch": 0.9, "grad_norm": 1.525652289390564, "learning_rate": 2.7304551693413616e-07, "loss": 0.6343, "step": 7005 }, { "epoch": 0.9, "grad_norm": 1.3184609413146973, "learning_rate": 2.723695919770242e-07, "loss": 0.6735, "step": 7006 }, { "epoch": 0.9, "grad_norm": 1.2091474533081055, "learning_rate": 2.7169448125815846e-07, "loss": 0.5457, "step": 7007 }, { "epoch": 0.9, "grad_norm": 1.3816118240356445, "learning_rate": 2.7102018489381154e-07, "loss": 0.6264, "step": 7008 }, { "epoch": 0.9, "grad_norm": 1.2462316751480103, "learning_rate": 2.7034670300011614e-07, "loss": 0.6312, "step": 7009 }, { "epoch": 0.9, "grad_norm": 1.240803837776184, "learning_rate": 2.696740356930694e-07, "loss": 0.5673, "step": 7010 }, { "epoch": 0.9, "grad_norm": 1.3565901517868042, "learning_rate": 2.690021830885214e-07, "loss": 0.6344, "step": 7011 }, { "epoch": 0.9, "grad_norm": 1.1310712099075317, "learning_rate": 2.6833114530218694e-07, "loss": 0.5777, "step": 7012 }, { "epoch": 0.9, "grad_norm": 1.0522847175598145, "learning_rate": 2.676609224496374e-07, "loss": 0.6879, "step": 7013 }, { "epoch": 0.9, "grad_norm": 1.6499149799346924, "learning_rate": 2.6699151464630644e-07, "loss": 0.6255, "step": 7014 }, { "epoch": 0.9, "grad_norm": 1.2910633087158203, "learning_rate": 2.663229220074842e-07, "loss": 0.6169, "step": 7015 }, { "epoch": 0.9, "grad_norm": 1.4426332712173462, "learning_rate": 2.6565514464832354e-07, "loss": 0.6411, "step": 7016 }, { "epoch": 0.9, "grad_norm": 1.2526745796203613, "learning_rate": 2.6498818268383465e-07, "loss": 0.574, "step": 7017 }, { "epoch": 0.9, "grad_norm": 1.5019805431365967, "learning_rate": 2.6432203622888895e-07, "loss": 0.5775, "step": 7018 }, { "epoch": 0.9, "grad_norm": 1.2330554723739624, "learning_rate": 2.636567053982164e-07, "loss": 0.4841, "step": 7019 }, { "epoch": 0.9, "grad_norm": 1.3986729383468628, "learning_rate": 2.6299219030640586e-07, "loss": 0.5777, "step": 7020 }, { "epoch": 0.9, "grad_norm": 1.4264737367630005, "learning_rate": 2.6232849106790745e-07, "loss": 0.6921, "step": 7021 }, { "epoch": 0.9, "grad_norm": 1.2543922662734985, "learning_rate": 2.616656077970309e-07, "loss": 0.5413, "step": 7022 }, { "epoch": 0.9, "grad_norm": 1.8443100452423096, "learning_rate": 2.6100354060794196e-07, "loss": 0.6575, "step": 7023 }, { "epoch": 0.9, "grad_norm": 2.33084774017334, "learning_rate": 2.603422896146696e-07, "loss": 0.5558, "step": 7024 }, { "epoch": 0.9, "grad_norm": 1.9156298637390137, "learning_rate": 2.5968185493110034e-07, "loss": 0.581, "step": 7025 }, { "epoch": 0.9, "grad_norm": 1.377859354019165, "learning_rate": 2.590222366709816e-07, "loss": 0.6449, "step": 7026 }, { "epoch": 0.9, "grad_norm": 1.6291786432266235, "learning_rate": 2.58363434947918e-07, "loss": 0.5664, "step": 7027 }, { "epoch": 0.9, "grad_norm": 1.342464566230774, "learning_rate": 2.5770544987537616e-07, "loss": 0.6111, "step": 7028 }, { "epoch": 0.9, "grad_norm": 1.6001750230789185, "learning_rate": 2.570482815666797e-07, "loss": 0.5381, "step": 7029 }, { "epoch": 0.9, "grad_norm": 1.5080769062042236, "learning_rate": 2.563919301350126e-07, "loss": 0.6536, "step": 7030 }, { "epoch": 0.9, "grad_norm": 1.3722752332687378, "learning_rate": 2.557363956934178e-07, "loss": 0.543, "step": 7031 }, { "epoch": 0.9, "grad_norm": 1.3873956203460693, "learning_rate": 2.550816783547988e-07, "loss": 0.521, "step": 7032 }, { "epoch": 0.9, "grad_norm": 1.437137246131897, "learning_rate": 2.54427778231916e-07, "loss": 0.5755, "step": 7033 }, { "epoch": 0.9, "grad_norm": 1.7044881582260132, "learning_rate": 2.53774695437391e-07, "loss": 0.6265, "step": 7034 }, { "epoch": 0.9, "grad_norm": 1.2608447074890137, "learning_rate": 2.531224300837043e-07, "loss": 0.5938, "step": 7035 }, { "epoch": 0.9, "grad_norm": 1.528001308441162, "learning_rate": 2.5247098228319557e-07, "loss": 0.5902, "step": 7036 }, { "epoch": 0.9, "grad_norm": 1.4568138122558594, "learning_rate": 2.518203521480622e-07, "loss": 0.5409, "step": 7037 }, { "epoch": 0.9, "grad_norm": 1.466330647468567, "learning_rate": 2.51170539790363e-07, "loss": 0.5988, "step": 7038 }, { "epoch": 0.9, "grad_norm": 1.3123421669006348, "learning_rate": 2.50521545322015e-07, "loss": 0.6526, "step": 7039 }, { "epoch": 0.9, "grad_norm": 2.2476837635040283, "learning_rate": 2.498733688547944e-07, "loss": 0.6303, "step": 7040 }, { "epoch": 0.9, "grad_norm": 2.1309330463409424, "learning_rate": 2.492260105003358e-07, "loss": 0.606, "step": 7041 }, { "epoch": 0.9, "grad_norm": 1.2924765348434448, "learning_rate": 2.485794703701333e-07, "loss": 0.5738, "step": 7042 }, { "epoch": 0.9, "grad_norm": 1.3361096382141113, "learning_rate": 2.479337485755412e-07, "loss": 0.6349, "step": 7043 }, { "epoch": 0.9, "grad_norm": 1.2557562589645386, "learning_rate": 2.4728884522777106e-07, "loss": 0.6163, "step": 7044 }, { "epoch": 0.9, "grad_norm": 1.16624116897583, "learning_rate": 2.4664476043789523e-07, "loss": 0.7221, "step": 7045 }, { "epoch": 0.9, "grad_norm": 1.5108433961868286, "learning_rate": 2.460014943168443e-07, "loss": 0.6333, "step": 7046 }, { "epoch": 0.9, "grad_norm": 1.4388067722320557, "learning_rate": 2.4535904697540746e-07, "loss": 0.5811, "step": 7047 }, { "epoch": 0.9, "grad_norm": 1.2010633945465088, "learning_rate": 2.447174185242324e-07, "loss": 0.5747, "step": 7048 }, { "epoch": 0.9, "grad_norm": 2.1095924377441406, "learning_rate": 2.4407660907382727e-07, "loss": 0.5752, "step": 7049 }, { "epoch": 0.9, "grad_norm": 1.2455929517745972, "learning_rate": 2.434366187345588e-07, "loss": 0.5444, "step": 7050 }, { "epoch": 0.9, "grad_norm": 1.5187287330627441, "learning_rate": 2.4279744761665225e-07, "loss": 0.4958, "step": 7051 }, { "epoch": 0.9, "grad_norm": 1.457276463508606, "learning_rate": 2.4215909583019117e-07, "loss": 0.581, "step": 7052 }, { "epoch": 0.9, "grad_norm": 1.4139649868011475, "learning_rate": 2.4152156348511923e-07, "loss": 0.5623, "step": 7053 }, { "epoch": 0.9, "grad_norm": 1.4696898460388184, "learning_rate": 2.408848506912381e-07, "loss": 0.5736, "step": 7054 }, { "epoch": 0.9, "grad_norm": 1.5024607181549072, "learning_rate": 2.4024895755820956e-07, "loss": 0.6737, "step": 7055 }, { "epoch": 0.9, "grad_norm": 1.4502644538879395, "learning_rate": 2.3961388419555145e-07, "loss": 0.5557, "step": 7056 }, { "epoch": 0.9, "grad_norm": 1.718114972114563, "learning_rate": 2.389796307126441e-07, "loss": 0.5784, "step": 7057 }, { "epoch": 0.9, "grad_norm": 1.5058298110961914, "learning_rate": 2.3834619721872342e-07, "loss": 0.5671, "step": 7058 }, { "epoch": 0.9, "grad_norm": 1.4167766571044922, "learning_rate": 2.3771358382288666e-07, "loss": 0.5516, "step": 7059 }, { "epoch": 0.9, "grad_norm": 1.5127261877059937, "learning_rate": 2.370817906340872e-07, "loss": 0.6122, "step": 7060 }, { "epoch": 0.9, "grad_norm": 1.1658446788787842, "learning_rate": 2.3645081776113965e-07, "loss": 0.7223, "step": 7061 }, { "epoch": 0.9, "grad_norm": 1.3336877822875977, "learning_rate": 2.3582066531271708e-07, "loss": 0.5896, "step": 7062 }, { "epoch": 0.9, "grad_norm": 1.4371452331542969, "learning_rate": 2.3519133339734823e-07, "loss": 0.5797, "step": 7063 }, { "epoch": 0.91, "grad_norm": 1.455986738204956, "learning_rate": 2.3456282212342363e-07, "loss": 0.6198, "step": 7064 }, { "epoch": 0.91, "grad_norm": 1.5969009399414062, "learning_rate": 2.3393513159919223e-07, "loss": 0.6366, "step": 7065 }, { "epoch": 0.91, "grad_norm": 1.3942619562149048, "learning_rate": 2.3330826193276145e-07, "loss": 0.5663, "step": 7066 }, { "epoch": 0.91, "grad_norm": 2.943873405456543, "learning_rate": 2.3268221323209494e-07, "loss": 0.5417, "step": 7067 }, { "epoch": 0.91, "grad_norm": 1.2764878273010254, "learning_rate": 2.3205698560501755e-07, "loss": 0.5953, "step": 7068 }, { "epoch": 0.91, "grad_norm": 1.2757691144943237, "learning_rate": 2.314325791592126e-07, "loss": 0.5584, "step": 7069 }, { "epoch": 0.91, "grad_norm": 1.3400564193725586, "learning_rate": 2.3080899400222178e-07, "loss": 0.5815, "step": 7070 }, { "epoch": 0.91, "grad_norm": 1.1935240030288696, "learning_rate": 2.301862302414437e-07, "loss": 0.5787, "step": 7071 }, { "epoch": 0.91, "grad_norm": 1.1886448860168457, "learning_rate": 2.2956428798413755e-07, "loss": 0.5357, "step": 7072 }, { "epoch": 0.91, "grad_norm": 1.3621718883514404, "learning_rate": 2.2894316733741985e-07, "loss": 0.5345, "step": 7073 }, { "epoch": 0.91, "grad_norm": 1.316648006439209, "learning_rate": 2.2832286840826667e-07, "loss": 0.5749, "step": 7074 }, { "epoch": 0.91, "grad_norm": 1.4001599550247192, "learning_rate": 2.277033913035115e-07, "loss": 0.6113, "step": 7075 }, { "epoch": 0.91, "grad_norm": 2.2548327445983887, "learning_rate": 2.2708473612984617e-07, "loss": 0.5458, "step": 7076 }, { "epoch": 0.91, "grad_norm": 1.441633701324463, "learning_rate": 2.2646690299382212e-07, "loss": 0.5897, "step": 7077 }, { "epoch": 0.91, "grad_norm": 1.5713400840759277, "learning_rate": 2.258498920018476e-07, "loss": 0.5499, "step": 7078 }, { "epoch": 0.91, "grad_norm": 1.258521318435669, "learning_rate": 2.2523370326019145e-07, "loss": 0.5601, "step": 7079 }, { "epoch": 0.91, "grad_norm": 1.3642133474349976, "learning_rate": 2.246183368749799e-07, "loss": 0.5553, "step": 7080 }, { "epoch": 0.91, "grad_norm": 1.4287675619125366, "learning_rate": 2.24003792952196e-07, "loss": 0.6197, "step": 7081 }, { "epoch": 0.91, "grad_norm": 1.6514580249786377, "learning_rate": 2.233900715976828e-07, "loss": 0.5636, "step": 7082 }, { "epoch": 0.91, "grad_norm": 1.6768487691879272, "learning_rate": 2.2277717291714184e-07, "loss": 0.5855, "step": 7083 }, { "epoch": 0.91, "grad_norm": 1.3704938888549805, "learning_rate": 2.2216509701613265e-07, "loss": 0.6517, "step": 7084 }, { "epoch": 0.91, "grad_norm": 1.2699650526046753, "learning_rate": 2.2155384400007196e-07, "loss": 0.5112, "step": 7085 }, { "epoch": 0.91, "grad_norm": 1.198685884475708, "learning_rate": 2.2094341397423558e-07, "loss": 0.6796, "step": 7086 }, { "epoch": 0.91, "grad_norm": 1.5341168642044067, "learning_rate": 2.2033380704375829e-07, "loss": 0.5189, "step": 7087 }, { "epoch": 0.91, "grad_norm": 1.5040671825408936, "learning_rate": 2.1972502331363332e-07, "loss": 0.6402, "step": 7088 }, { "epoch": 0.91, "grad_norm": 1.403660774230957, "learning_rate": 2.191170628887096e-07, "loss": 0.5636, "step": 7089 }, { "epoch": 0.91, "grad_norm": 1.2295106649398804, "learning_rate": 2.1850992587369668e-07, "loss": 0.5676, "step": 7090 }, { "epoch": 0.91, "grad_norm": 1.3213706016540527, "learning_rate": 2.1790361237316204e-07, "loss": 0.5344, "step": 7091 }, { "epoch": 0.91, "grad_norm": 1.5314936637878418, "learning_rate": 2.1729812249153048e-07, "loss": 0.6096, "step": 7092 }, { "epoch": 0.91, "grad_norm": 1.497307300567627, "learning_rate": 2.1669345633308526e-07, "loss": 0.5312, "step": 7093 }, { "epoch": 0.91, "grad_norm": 1.2101197242736816, "learning_rate": 2.1608961400196747e-07, "loss": 0.6864, "step": 7094 }, { "epoch": 0.91, "grad_norm": 1.2792714834213257, "learning_rate": 2.1548659560217678e-07, "loss": 0.529, "step": 7095 }, { "epoch": 0.91, "grad_norm": 1.6750179529190063, "learning_rate": 2.148844012375717e-07, "loss": 0.6093, "step": 7096 }, { "epoch": 0.91, "grad_norm": 1.2162268161773682, "learning_rate": 2.1428303101186708e-07, "loss": 0.6036, "step": 7097 }, { "epoch": 0.91, "grad_norm": 1.2044456005096436, "learning_rate": 2.1368248502863676e-07, "loss": 0.5956, "step": 7098 }, { "epoch": 0.91, "grad_norm": 1.3525432348251343, "learning_rate": 2.1308276339131407e-07, "loss": 0.6101, "step": 7099 }, { "epoch": 0.91, "grad_norm": 1.3939049243927002, "learning_rate": 2.124838662031864e-07, "loss": 0.5844, "step": 7100 }, { "epoch": 0.91, "grad_norm": 2.0812456607818604, "learning_rate": 2.1188579356740346e-07, "loss": 0.5712, "step": 7101 }, { "epoch": 0.91, "grad_norm": 1.490818738937378, "learning_rate": 2.1128854558697009e-07, "loss": 0.597, "step": 7102 }, { "epoch": 0.91, "grad_norm": 1.2523460388183594, "learning_rate": 2.1069212236475177e-07, "loss": 0.7394, "step": 7103 }, { "epoch": 0.91, "grad_norm": 1.3945302963256836, "learning_rate": 2.1009652400346802e-07, "loss": 0.57, "step": 7104 }, { "epoch": 0.91, "grad_norm": 1.7118395566940308, "learning_rate": 2.0950175060569956e-07, "loss": 0.5877, "step": 7105 }, { "epoch": 0.91, "grad_norm": 1.2491495609283447, "learning_rate": 2.0890780227388385e-07, "loss": 0.5995, "step": 7106 }, { "epoch": 0.91, "grad_norm": 1.2308311462402344, "learning_rate": 2.08314679110318e-07, "loss": 0.6299, "step": 7107 }, { "epoch": 0.91, "grad_norm": 1.270413875579834, "learning_rate": 2.0772238121715248e-07, "loss": 0.5532, "step": 7108 }, { "epoch": 0.91, "grad_norm": 1.3391141891479492, "learning_rate": 2.0713090869640072e-07, "loss": 0.6113, "step": 7109 }, { "epoch": 0.91, "grad_norm": 1.320900559425354, "learning_rate": 2.0654026164993123e-07, "loss": 0.565, "step": 7110 }, { "epoch": 0.91, "grad_norm": 1.3125171661376953, "learning_rate": 2.0595044017947153e-07, "loss": 0.621, "step": 7111 }, { "epoch": 0.91, "grad_norm": 1.7887307405471802, "learning_rate": 2.053614443866042e-07, "loss": 0.6705, "step": 7112 }, { "epoch": 0.91, "grad_norm": 1.334878921508789, "learning_rate": 2.0477327437277427e-07, "loss": 0.6416, "step": 7113 }, { "epoch": 0.91, "grad_norm": 1.0813583135604858, "learning_rate": 2.041859302392818e-07, "loss": 0.5807, "step": 7114 }, { "epoch": 0.91, "grad_norm": 1.5186846256256104, "learning_rate": 2.0359941208728363e-07, "loss": 0.6429, "step": 7115 }, { "epoch": 0.91, "grad_norm": 1.855786919593811, "learning_rate": 2.0301372001779673e-07, "loss": 0.664, "step": 7116 }, { "epoch": 0.91, "grad_norm": 1.525266408920288, "learning_rate": 2.0242885413169376e-07, "loss": 0.618, "step": 7117 }, { "epoch": 0.91, "grad_norm": 1.3486268520355225, "learning_rate": 2.0184481452970694e-07, "loss": 0.5815, "step": 7118 }, { "epoch": 0.91, "grad_norm": 1.5850796699523926, "learning_rate": 2.012616013124241e-07, "loss": 0.5655, "step": 7119 }, { "epoch": 0.91, "grad_norm": 1.358275055885315, "learning_rate": 2.0067921458029272e-07, "loss": 0.5076, "step": 7120 }, { "epoch": 0.91, "grad_norm": 1.3294881582260132, "learning_rate": 2.000976544336164e-07, "loss": 0.6009, "step": 7121 }, { "epoch": 0.91, "grad_norm": 1.6353390216827393, "learning_rate": 1.9951692097255836e-07, "loss": 0.5333, "step": 7122 }, { "epoch": 0.91, "grad_norm": 1.490020990371704, "learning_rate": 1.989370142971364e-07, "loss": 0.6594, "step": 7123 }, { "epoch": 0.91, "grad_norm": 1.7284151315689087, "learning_rate": 1.983579345072284e-07, "loss": 0.5152, "step": 7124 }, { "epoch": 0.91, "grad_norm": 1.3690820932388306, "learning_rate": 1.9777968170257012e-07, "loss": 0.5566, "step": 7125 }, { "epoch": 0.91, "grad_norm": 1.5744019746780396, "learning_rate": 1.972022559827519e-07, "loss": 0.6513, "step": 7126 }, { "epoch": 0.91, "grad_norm": 2.656482696533203, "learning_rate": 1.9662565744722472e-07, "loss": 0.523, "step": 7127 }, { "epoch": 0.91, "grad_norm": 2.3750336170196533, "learning_rate": 1.9604988619529586e-07, "loss": 0.5436, "step": 7128 }, { "epoch": 0.91, "grad_norm": 1.3693758249282837, "learning_rate": 1.954749423261304e-07, "loss": 0.5961, "step": 7129 }, { "epoch": 0.91, "grad_norm": 1.8211826086044312, "learning_rate": 1.9490082593875026e-07, "loss": 0.6507, "step": 7130 }, { "epoch": 0.91, "grad_norm": 2.833078384399414, "learning_rate": 1.9432753713203524e-07, "loss": 0.5015, "step": 7131 }, { "epoch": 0.91, "grad_norm": 1.487642526626587, "learning_rate": 1.937550760047241e-07, "loss": 0.5934, "step": 7132 }, { "epoch": 0.91, "grad_norm": 1.3412609100341797, "learning_rate": 1.9318344265540967e-07, "loss": 0.564, "step": 7133 }, { "epoch": 0.91, "grad_norm": 1.496861219406128, "learning_rate": 1.926126371825454e-07, "loss": 0.5433, "step": 7134 }, { "epoch": 0.91, "grad_norm": 1.3020457029342651, "learning_rate": 1.9204265968444047e-07, "loss": 0.4675, "step": 7135 }, { "epoch": 0.91, "grad_norm": 1.3243365287780762, "learning_rate": 1.9147351025926242e-07, "loss": 0.583, "step": 7136 }, { "epoch": 0.91, "grad_norm": 1.173129677772522, "learning_rate": 1.9090518900503508e-07, "loss": 0.6188, "step": 7137 }, { "epoch": 0.91, "grad_norm": 1.4048100709915161, "learning_rate": 1.9033769601964013e-07, "loss": 0.5896, "step": 7138 }, { "epoch": 0.91, "grad_norm": 1.206529140472412, "learning_rate": 1.8977103140081664e-07, "loss": 0.5598, "step": 7139 }, { "epoch": 0.91, "grad_norm": 1.4039027690887451, "learning_rate": 1.8920519524616265e-07, "loss": 0.5779, "step": 7140 }, { "epoch": 0.91, "grad_norm": 1.485414743423462, "learning_rate": 1.8864018765312963e-07, "loss": 0.5744, "step": 7141 }, { "epoch": 0.91, "grad_norm": 1.6506009101867676, "learning_rate": 1.880760087190303e-07, "loss": 0.5637, "step": 7142 }, { "epoch": 0.92, "grad_norm": 1.588201642036438, "learning_rate": 1.8751265854103197e-07, "loss": 0.4825, "step": 7143 }, { "epoch": 0.92, "grad_norm": 1.8149200677871704, "learning_rate": 1.869501372161614e-07, "loss": 0.5513, "step": 7144 }, { "epoch": 0.92, "grad_norm": 2.2272982597351074, "learning_rate": 1.8638844484130058e-07, "loss": 0.5979, "step": 7145 }, { "epoch": 0.92, "grad_norm": 1.5402497053146362, "learning_rate": 1.858275815131888e-07, "loss": 0.5783, "step": 7146 }, { "epoch": 0.92, "grad_norm": 1.4437787532806396, "learning_rate": 1.85267547328426e-07, "loss": 0.5892, "step": 7147 }, { "epoch": 0.92, "grad_norm": 1.1562960147857666, "learning_rate": 1.8470834238346448e-07, "loss": 0.5223, "step": 7148 }, { "epoch": 0.92, "grad_norm": 1.4266095161437988, "learning_rate": 1.8414996677461605e-07, "loss": 0.6465, "step": 7149 }, { "epoch": 0.92, "grad_norm": 1.4627182483673096, "learning_rate": 1.8359242059805048e-07, "loss": 0.6431, "step": 7150 }, { "epoch": 0.92, "grad_norm": 1.4455598592758179, "learning_rate": 1.8303570394979375e-07, "loss": 0.6584, "step": 7151 }, { "epoch": 0.92, "grad_norm": 1.3495609760284424, "learning_rate": 1.8247981692572802e-07, "loss": 0.6074, "step": 7152 }, { "epoch": 0.92, "grad_norm": 1.1152602434158325, "learning_rate": 1.8192475962159395e-07, "loss": 0.4656, "step": 7153 }, { "epoch": 0.92, "grad_norm": 1.3456722497940063, "learning_rate": 1.8137053213298895e-07, "loss": 0.5437, "step": 7154 }, { "epoch": 0.92, "grad_norm": 1.8487489223480225, "learning_rate": 1.808171345553683e-07, "loss": 0.5297, "step": 7155 }, { "epoch": 0.92, "grad_norm": 1.3239003419876099, "learning_rate": 1.8026456698404192e-07, "loss": 0.5854, "step": 7156 }, { "epoch": 0.92, "grad_norm": 1.531930923461914, "learning_rate": 1.7971282951417923e-07, "loss": 0.5994, "step": 7157 }, { "epoch": 0.92, "grad_norm": 1.3221821784973145, "learning_rate": 1.7916192224080586e-07, "loss": 0.5434, "step": 7158 }, { "epoch": 0.92, "grad_norm": 1.7776752710342407, "learning_rate": 1.7861184525880425e-07, "loss": 0.6325, "step": 7159 }, { "epoch": 0.92, "grad_norm": 1.3057001829147339, "learning_rate": 1.7806259866291366e-07, "loss": 0.548, "step": 7160 }, { "epoch": 0.92, "grad_norm": 1.2270419597625732, "learning_rate": 1.775141825477311e-07, "loss": 0.5853, "step": 7161 }, { "epoch": 0.92, "grad_norm": 1.3529689311981201, "learning_rate": 1.7696659700770948e-07, "loss": 0.5753, "step": 7162 }, { "epoch": 0.92, "grad_norm": 1.1730376482009888, "learning_rate": 1.764198421371599e-07, "loss": 0.5227, "step": 7163 }, { "epoch": 0.92, "grad_norm": 1.4664965867996216, "learning_rate": 1.758739180302499e-07, "loss": 0.5583, "step": 7164 }, { "epoch": 0.92, "grad_norm": 1.7074847221374512, "learning_rate": 1.753288247810031e-07, "loss": 0.6546, "step": 7165 }, { "epoch": 0.92, "grad_norm": 1.511422872543335, "learning_rate": 1.7478456248330166e-07, "loss": 0.6486, "step": 7166 }, { "epoch": 0.92, "grad_norm": 1.5393157005310059, "learning_rate": 1.7424113123088282e-07, "loss": 0.6168, "step": 7167 }, { "epoch": 0.92, "grad_norm": 2.8663549423217773, "learning_rate": 1.736985311173417e-07, "loss": 0.6082, "step": 7168 }, { "epoch": 0.92, "grad_norm": 1.5243266820907593, "learning_rate": 1.7315676223613077e-07, "loss": 0.5553, "step": 7169 }, { "epoch": 0.92, "grad_norm": 1.2957146167755127, "learning_rate": 1.7261582468055872e-07, "loss": 0.5589, "step": 7170 }, { "epoch": 0.92, "grad_norm": 1.3966628313064575, "learning_rate": 1.720757185437899e-07, "loss": 0.5563, "step": 7171 }, { "epoch": 0.92, "grad_norm": 1.2726695537567139, "learning_rate": 1.7153644391884715e-07, "loss": 0.6512, "step": 7172 }, { "epoch": 0.92, "grad_norm": 1.4725221395492554, "learning_rate": 1.7099800089861006e-07, "loss": 0.5772, "step": 7173 }, { "epoch": 0.92, "grad_norm": 1.5598526000976562, "learning_rate": 1.7046038957581447e-07, "loss": 0.6274, "step": 7174 }, { "epoch": 0.92, "grad_norm": 2.330385684967041, "learning_rate": 1.6992361004305235e-07, "loss": 0.5829, "step": 7175 }, { "epoch": 0.92, "grad_norm": 1.331067681312561, "learning_rate": 1.693876623927737e-07, "loss": 0.5583, "step": 7176 }, { "epoch": 0.92, "grad_norm": 1.3554342985153198, "learning_rate": 1.6885254671728468e-07, "loss": 0.5674, "step": 7177 }, { "epoch": 0.92, "grad_norm": 1.254412055015564, "learning_rate": 1.683182631087471e-07, "loss": 0.6022, "step": 7178 }, { "epoch": 0.92, "grad_norm": 1.508400321006775, "learning_rate": 1.677848116591807e-07, "loss": 0.5477, "step": 7179 }, { "epoch": 0.92, "grad_norm": 1.2750962972640991, "learning_rate": 1.6725219246046254e-07, "loss": 0.5805, "step": 7180 }, { "epoch": 0.92, "grad_norm": 1.5220352411270142, "learning_rate": 1.6672040560432533e-07, "loss": 0.6261, "step": 7181 }, { "epoch": 0.92, "grad_norm": 1.256548523902893, "learning_rate": 1.661894511823575e-07, "loss": 0.5502, "step": 7182 }, { "epoch": 0.92, "grad_norm": 1.461651086807251, "learning_rate": 1.6565932928600593e-07, "loss": 0.4907, "step": 7183 }, { "epoch": 0.92, "grad_norm": 1.1890257596969604, "learning_rate": 1.6513004000657418e-07, "loss": 0.5885, "step": 7184 }, { "epoch": 0.92, "grad_norm": 1.4478956460952759, "learning_rate": 1.6460158343521993e-07, "loss": 0.5186, "step": 7185 }, { "epoch": 0.92, "grad_norm": 1.0976368188858032, "learning_rate": 1.640739596629598e-07, "loss": 0.7179, "step": 7186 }, { "epoch": 0.92, "grad_norm": 1.2835136651992798, "learning_rate": 1.6354716878066612e-07, "loss": 0.5308, "step": 7187 }, { "epoch": 0.92, "grad_norm": 1.4048012495040894, "learning_rate": 1.6302121087906854e-07, "loss": 0.6106, "step": 7188 }, { "epoch": 0.92, "grad_norm": 1.2641879320144653, "learning_rate": 1.6249608604875177e-07, "loss": 0.6223, "step": 7189 }, { "epoch": 0.92, "grad_norm": 1.7437182664871216, "learning_rate": 1.6197179438015798e-07, "loss": 0.5893, "step": 7190 }, { "epoch": 0.92, "grad_norm": 1.3747304677963257, "learning_rate": 1.6144833596358656e-07, "loss": 0.5148, "step": 7191 }, { "epoch": 0.92, "grad_norm": 1.4542993307113647, "learning_rate": 1.6092571088919205e-07, "loss": 0.5563, "step": 7192 }, { "epoch": 0.92, "grad_norm": 1.3954862356185913, "learning_rate": 1.6040391924698584e-07, "loss": 0.6165, "step": 7193 }, { "epoch": 0.92, "grad_norm": 1.301512598991394, "learning_rate": 1.5988296112683598e-07, "loss": 0.5522, "step": 7194 }, { "epoch": 0.92, "grad_norm": 1.3332188129425049, "learning_rate": 1.5936283661846686e-07, "loss": 0.5854, "step": 7195 }, { "epoch": 0.92, "grad_norm": 1.3937634229660034, "learning_rate": 1.588435458114601e-07, "loss": 0.6015, "step": 7196 }, { "epoch": 0.92, "grad_norm": 1.4809768199920654, "learning_rate": 1.5832508879525143e-07, "loss": 0.4986, "step": 7197 }, { "epoch": 0.92, "grad_norm": 1.265257477760315, "learning_rate": 1.5780746565913552e-07, "loss": 0.6384, "step": 7198 }, { "epoch": 0.92, "grad_norm": 1.1740734577178955, "learning_rate": 1.5729067649226327e-07, "loss": 0.5811, "step": 7199 }, { "epoch": 0.92, "grad_norm": 1.4921587705612183, "learning_rate": 1.5677472138364014e-07, "loss": 0.5796, "step": 7200 }, { "epoch": 0.92, "grad_norm": 1.2573281526565552, "learning_rate": 1.562596004221284e-07, "loss": 0.5726, "step": 7201 }, { "epoch": 0.92, "grad_norm": 1.4946744441986084, "learning_rate": 1.557453136964482e-07, "loss": 0.568, "step": 7202 }, { "epoch": 0.92, "grad_norm": 1.1617540121078491, "learning_rate": 1.5523186129517475e-07, "loss": 0.5592, "step": 7203 }, { "epoch": 0.92, "grad_norm": 1.2968569993972778, "learning_rate": 1.547192433067396e-07, "loss": 0.5903, "step": 7204 }, { "epoch": 0.92, "grad_norm": 1.3031530380249023, "learning_rate": 1.5420745981943042e-07, "loss": 0.5648, "step": 7205 }, { "epoch": 0.92, "grad_norm": 1.2497785091400146, "learning_rate": 1.5369651092139172e-07, "loss": 0.5518, "step": 7206 }, { "epoch": 0.92, "grad_norm": 1.333309531211853, "learning_rate": 1.531863967006253e-07, "loss": 0.5962, "step": 7207 }, { "epoch": 0.92, "grad_norm": 1.2219014167785645, "learning_rate": 1.526771172449859e-07, "loss": 0.6769, "step": 7208 }, { "epoch": 0.92, "grad_norm": 1.6595335006713867, "learning_rate": 1.5216867264218726e-07, "loss": 0.5918, "step": 7209 }, { "epoch": 0.92, "grad_norm": 1.4741884469985962, "learning_rate": 1.516610629797993e-07, "loss": 0.6679, "step": 7210 }, { "epoch": 0.92, "grad_norm": 1.4178804159164429, "learning_rate": 1.511542883452477e-07, "loss": 0.592, "step": 7211 }, { "epoch": 0.92, "grad_norm": 1.6573342084884644, "learning_rate": 1.5064834882581314e-07, "loss": 0.6468, "step": 7212 }, { "epoch": 0.92, "grad_norm": 1.2453925609588623, "learning_rate": 1.5014324450863316e-07, "loss": 0.5826, "step": 7213 }, { "epoch": 0.92, "grad_norm": 2.3448381423950195, "learning_rate": 1.496389754807026e-07, "loss": 0.6063, "step": 7214 }, { "epoch": 0.92, "grad_norm": 1.3461638689041138, "learning_rate": 1.4913554182887147e-07, "loss": 0.6598, "step": 7215 }, { "epoch": 0.92, "grad_norm": 1.6426374912261963, "learning_rate": 1.4863294363984536e-07, "loss": 0.6191, "step": 7216 }, { "epoch": 0.92, "grad_norm": 1.748055100440979, "learning_rate": 1.481311810001873e-07, "loss": 0.6522, "step": 7217 }, { "epoch": 0.92, "grad_norm": 1.689281702041626, "learning_rate": 1.4763025399631535e-07, "loss": 0.6215, "step": 7218 }, { "epoch": 0.92, "grad_norm": 1.246228575706482, "learning_rate": 1.4713016271450443e-07, "loss": 0.5989, "step": 7219 }, { "epoch": 0.92, "grad_norm": 2.082564353942871, "learning_rate": 1.466309072408839e-07, "loss": 0.6292, "step": 7220 }, { "epoch": 0.93, "grad_norm": 1.7110521793365479, "learning_rate": 1.4613248766144172e-07, "loss": 0.5804, "step": 7221 }, { "epoch": 0.93, "grad_norm": 1.2282060384750366, "learning_rate": 1.456349040620203e-07, "loss": 0.6041, "step": 7222 }, { "epoch": 0.93, "grad_norm": 1.646616816520691, "learning_rate": 1.4513815652831776e-07, "loss": 0.604, "step": 7223 }, { "epoch": 0.93, "grad_norm": 1.2719740867614746, "learning_rate": 1.4464224514588842e-07, "loss": 0.6009, "step": 7224 }, { "epoch": 0.93, "grad_norm": 1.63080632686615, "learning_rate": 1.4414717000014456e-07, "loss": 0.5949, "step": 7225 }, { "epoch": 0.93, "grad_norm": 1.7271970510482788, "learning_rate": 1.436529311763507e-07, "loss": 0.5815, "step": 7226 }, { "epoch": 0.93, "grad_norm": 1.3970266580581665, "learning_rate": 1.43159528759631e-07, "loss": 0.5571, "step": 7227 }, { "epoch": 0.93, "grad_norm": 1.2534281015396118, "learning_rate": 1.426669628349636e-07, "loss": 0.6054, "step": 7228 }, { "epoch": 0.93, "grad_norm": 1.373757004737854, "learning_rate": 1.4217523348718287e-07, "loss": 0.6324, "step": 7229 }, { "epoch": 0.93, "grad_norm": 1.6304547786712646, "learning_rate": 1.4168434080097937e-07, "loss": 0.6204, "step": 7230 }, { "epoch": 0.93, "grad_norm": 1.140513300895691, "learning_rate": 1.411942848608988e-07, "loss": 0.6375, "step": 7231 }, { "epoch": 0.93, "grad_norm": 1.1663187742233276, "learning_rate": 1.4070506575134367e-07, "loss": 0.5516, "step": 7232 }, { "epoch": 0.93, "grad_norm": 1.328594446182251, "learning_rate": 1.4021668355657215e-07, "loss": 0.5821, "step": 7233 }, { "epoch": 0.93, "grad_norm": 1.4575828313827515, "learning_rate": 1.3972913836069857e-07, "loss": 0.6476, "step": 7234 }, { "epoch": 0.93, "grad_norm": 1.5077722072601318, "learning_rate": 1.3924243024769135e-07, "loss": 0.5528, "step": 7235 }, { "epoch": 0.93, "grad_norm": 1.1910840272903442, "learning_rate": 1.387565593013779e-07, "loss": 0.5179, "step": 7236 }, { "epoch": 0.93, "grad_norm": 1.430280089378357, "learning_rate": 1.3827152560543843e-07, "loss": 0.6201, "step": 7237 }, { "epoch": 0.93, "grad_norm": 1.1076302528381348, "learning_rate": 1.377873292434101e-07, "loss": 0.6917, "step": 7238 }, { "epoch": 0.93, "grad_norm": 1.003353476524353, "learning_rate": 1.3730397029868615e-07, "loss": 0.559, "step": 7239 }, { "epoch": 0.93, "grad_norm": 1.5710481405258179, "learning_rate": 1.3682144885451555e-07, "loss": 0.5213, "step": 7240 }, { "epoch": 0.93, "grad_norm": 1.5206317901611328, "learning_rate": 1.3633976499400235e-07, "loss": 0.59, "step": 7241 }, { "epoch": 0.93, "grad_norm": 1.4319339990615845, "learning_rate": 1.3585891880010747e-07, "loss": 0.6111, "step": 7242 }, { "epoch": 0.93, "grad_norm": 1.376744031906128, "learning_rate": 1.3537891035564576e-07, "loss": 0.6414, "step": 7243 }, { "epoch": 0.93, "grad_norm": 1.5035916566848755, "learning_rate": 1.3489973974329053e-07, "loss": 0.732, "step": 7244 }, { "epoch": 0.93, "grad_norm": 1.226749062538147, "learning_rate": 1.3442140704556794e-07, "loss": 0.5514, "step": 7245 }, { "epoch": 0.93, "grad_norm": 1.0988937616348267, "learning_rate": 1.3394391234486104e-07, "loss": 0.5946, "step": 7246 }, { "epoch": 0.93, "grad_norm": 1.2332367897033691, "learning_rate": 1.3346725572340903e-07, "loss": 0.5686, "step": 7247 }, { "epoch": 0.93, "grad_norm": 1.4782696962356567, "learning_rate": 1.3299143726330676e-07, "loss": 0.6448, "step": 7248 }, { "epoch": 0.93, "grad_norm": 1.3674440383911133, "learning_rate": 1.3251645704650262e-07, "loss": 0.6015, "step": 7249 }, { "epoch": 0.93, "grad_norm": 1.3126945495605469, "learning_rate": 1.3204231515480447e-07, "loss": 0.6499, "step": 7250 }, { "epoch": 0.93, "grad_norm": 1.5984584093093872, "learning_rate": 1.3156901166987258e-07, "loss": 0.6032, "step": 7251 }, { "epoch": 0.93, "grad_norm": 1.3169101476669312, "learning_rate": 1.3109654667322337e-07, "loss": 0.5563, "step": 7252 }, { "epoch": 0.93, "grad_norm": 1.4062318801879883, "learning_rate": 1.306249202462301e-07, "loss": 0.6181, "step": 7253 }, { "epoch": 0.93, "grad_norm": 1.340091347694397, "learning_rate": 1.3015413247012054e-07, "loss": 0.592, "step": 7254 }, { "epoch": 0.93, "grad_norm": 1.2546271085739136, "learning_rate": 1.2968418342597876e-07, "loss": 0.6095, "step": 7255 }, { "epoch": 0.93, "grad_norm": 1.1093549728393555, "learning_rate": 1.2921507319474337e-07, "loss": 0.7472, "step": 7256 }, { "epoch": 0.93, "grad_norm": 1.3403831720352173, "learning_rate": 1.2874680185720857e-07, "loss": 0.484, "step": 7257 }, { "epoch": 0.93, "grad_norm": 1.4152884483337402, "learning_rate": 1.2827936949402598e-07, "loss": 0.6114, "step": 7258 }, { "epoch": 0.93, "grad_norm": 1.3531930446624756, "learning_rate": 1.2781277618570066e-07, "loss": 0.5642, "step": 7259 }, { "epoch": 0.93, "grad_norm": 1.5098471641540527, "learning_rate": 1.273470220125933e-07, "loss": 0.5642, "step": 7260 }, { "epoch": 0.93, "grad_norm": 1.4057996273040771, "learning_rate": 1.2688210705492088e-07, "loss": 0.6116, "step": 7261 }, { "epoch": 0.93, "grad_norm": 1.2736021280288696, "learning_rate": 1.26418031392756e-07, "loss": 0.6281, "step": 7262 }, { "epoch": 0.93, "grad_norm": 1.0702874660491943, "learning_rate": 1.2595479510602583e-07, "loss": 0.6903, "step": 7263 }, { "epoch": 0.93, "grad_norm": 1.287724494934082, "learning_rate": 1.2549239827451375e-07, "loss": 0.5868, "step": 7264 }, { "epoch": 0.93, "grad_norm": 1.1175434589385986, "learning_rate": 1.2503084097785777e-07, "loss": 0.6007, "step": 7265 }, { "epoch": 0.93, "grad_norm": 1.1822404861450195, "learning_rate": 1.2457012329555206e-07, "loss": 0.5906, "step": 7266 }, { "epoch": 0.93, "grad_norm": 1.4445996284484863, "learning_rate": 1.2411024530694537e-07, "loss": 0.6044, "step": 7267 }, { "epoch": 0.93, "grad_norm": 1.1158324480056763, "learning_rate": 1.2365120709124322e-07, "loss": 0.6941, "step": 7268 }, { "epoch": 0.93, "grad_norm": 1.6972533464431763, "learning_rate": 1.2319300872750518e-07, "loss": 0.6134, "step": 7269 }, { "epoch": 0.93, "grad_norm": 1.1953438520431519, "learning_rate": 1.2273565029464694e-07, "loss": 0.7026, "step": 7270 }, { "epoch": 0.93, "grad_norm": 1.5305699110031128, "learning_rate": 1.2227913187143835e-07, "loss": 0.547, "step": 7271 }, { "epoch": 0.93, "grad_norm": 1.451676607131958, "learning_rate": 1.2182345353650592e-07, "loss": 0.6202, "step": 7272 }, { "epoch": 0.93, "grad_norm": 1.5434238910675049, "learning_rate": 1.2136861536833134e-07, "loss": 0.6061, "step": 7273 }, { "epoch": 0.93, "grad_norm": 1.2015151977539062, "learning_rate": 1.209146174452508e-07, "loss": 0.4739, "step": 7274 }, { "epoch": 0.93, "grad_norm": 1.211741328239441, "learning_rate": 1.2046145984545565e-07, "loss": 0.6157, "step": 7275 }, { "epoch": 0.93, "grad_norm": 1.2866265773773193, "learning_rate": 1.2000914264699403e-07, "loss": 0.5505, "step": 7276 }, { "epoch": 0.93, "grad_norm": 1.437593698501587, "learning_rate": 1.1955766592776863e-07, "loss": 0.5961, "step": 7277 }, { "epoch": 0.93, "grad_norm": 1.3555943965911865, "learning_rate": 1.1910702976553612e-07, "loss": 0.642, "step": 7278 }, { "epoch": 0.93, "grad_norm": 1.1852178573608398, "learning_rate": 1.1865723423790999e-07, "loss": 0.5775, "step": 7279 }, { "epoch": 0.93, "grad_norm": 1.3030401468276978, "learning_rate": 1.1820827942235824e-07, "loss": 0.5631, "step": 7280 }, { "epoch": 0.93, "grad_norm": 1.204147219657898, "learning_rate": 1.177601653962046e-07, "loss": 0.733, "step": 7281 }, { "epoch": 0.93, "grad_norm": 1.1808563470840454, "learning_rate": 1.1731289223662679e-07, "loss": 0.5254, "step": 7282 }, { "epoch": 0.93, "grad_norm": 1.5205473899841309, "learning_rate": 1.1686646002065815e-07, "loss": 0.586, "step": 7283 }, { "epoch": 0.93, "grad_norm": 1.6285957098007202, "learning_rate": 1.1642086882519e-07, "loss": 0.6336, "step": 7284 }, { "epoch": 0.93, "grad_norm": 1.5329946279525757, "learning_rate": 1.1597611872696368e-07, "loss": 0.6124, "step": 7285 }, { "epoch": 0.93, "grad_norm": 1.4156322479248047, "learning_rate": 1.1553220980257962e-07, "loss": 0.597, "step": 7286 }, { "epoch": 0.93, "grad_norm": 1.3494125604629517, "learning_rate": 1.150891421284922e-07, "loss": 0.5883, "step": 7287 }, { "epoch": 0.93, "grad_norm": 1.2939672470092773, "learning_rate": 1.146469157810104e-07, "loss": 0.6399, "step": 7288 }, { "epoch": 0.93, "grad_norm": 3.1361777782440186, "learning_rate": 1.1420553083629882e-07, "loss": 0.6024, "step": 7289 }, { "epoch": 0.93, "grad_norm": 1.2731001377105713, "learning_rate": 1.1376498737037722e-07, "loss": 0.5837, "step": 7290 }, { "epoch": 0.93, "grad_norm": 1.1375939846038818, "learning_rate": 1.1332528545911992e-07, "loss": 0.6947, "step": 7291 }, { "epoch": 0.93, "grad_norm": 1.776005744934082, "learning_rate": 1.1288642517825688e-07, "loss": 0.5576, "step": 7292 }, { "epoch": 0.93, "grad_norm": 1.6414631605148315, "learning_rate": 1.1244840660337264e-07, "loss": 0.5971, "step": 7293 }, { "epoch": 0.93, "grad_norm": 1.1578058004379272, "learning_rate": 1.1201122980990742e-07, "loss": 0.7463, "step": 7294 }, { "epoch": 0.93, "grad_norm": 1.6849491596221924, "learning_rate": 1.1157489487315542e-07, "loss": 0.581, "step": 7295 }, { "epoch": 0.93, "grad_norm": 1.2874782085418701, "learning_rate": 1.1113940186826767e-07, "loss": 0.5576, "step": 7296 }, { "epoch": 0.93, "grad_norm": 1.0534873008728027, "learning_rate": 1.1070475087024745e-07, "loss": 0.6968, "step": 7297 }, { "epoch": 0.93, "grad_norm": 1.308283805847168, "learning_rate": 1.1027094195395548e-07, "loss": 0.5559, "step": 7298 }, { "epoch": 0.94, "grad_norm": 1.334275245666504, "learning_rate": 1.0983797519410588e-07, "loss": 0.6184, "step": 7299 }, { "epoch": 0.94, "grad_norm": 1.2813701629638672, "learning_rate": 1.0940585066526899e-07, "loss": 0.587, "step": 7300 }, { "epoch": 0.94, "grad_norm": 1.355509638786316, "learning_rate": 1.0897456844186971e-07, "loss": 0.6175, "step": 7301 }, { "epoch": 0.94, "grad_norm": 1.4116696119308472, "learning_rate": 1.0854412859818642e-07, "loss": 0.5073, "step": 7302 }, { "epoch": 0.94, "grad_norm": 1.340970516204834, "learning_rate": 1.0811453120835535e-07, "loss": 0.5772, "step": 7303 }, { "epoch": 0.94, "grad_norm": 1.5687137842178345, "learning_rate": 1.0768577634636457e-07, "loss": 0.5782, "step": 7304 }, { "epoch": 0.94, "grad_norm": 1.321331262588501, "learning_rate": 1.0725786408605887e-07, "loss": 0.5902, "step": 7305 }, { "epoch": 0.94, "grad_norm": 1.3391438722610474, "learning_rate": 1.068307945011371e-07, "loss": 0.6029, "step": 7306 }, { "epoch": 0.94, "grad_norm": 1.2736539840698242, "learning_rate": 1.0640456766515428e-07, "loss": 0.6101, "step": 7307 }, { "epoch": 0.94, "grad_norm": 1.5165783166885376, "learning_rate": 1.0597918365151838e-07, "loss": 0.5793, "step": 7308 }, { "epoch": 0.94, "grad_norm": 1.2305033206939697, "learning_rate": 1.0555464253349301e-07, "loss": 0.5675, "step": 7309 }, { "epoch": 0.94, "grad_norm": 1.2480313777923584, "learning_rate": 1.0513094438419747e-07, "loss": 0.518, "step": 7310 }, { "epoch": 0.94, "grad_norm": 1.5531625747680664, "learning_rate": 1.0470808927660503e-07, "loss": 0.6093, "step": 7311 }, { "epoch": 0.94, "grad_norm": 1.1417747735977173, "learning_rate": 1.0428607728354301e-07, "loss": 0.7112, "step": 7312 }, { "epoch": 0.94, "grad_norm": 1.267432689666748, "learning_rate": 1.0386490847769547e-07, "loss": 0.6231, "step": 7313 }, { "epoch": 0.94, "grad_norm": 1.4412930011749268, "learning_rate": 1.0344458293159998e-07, "loss": 0.6256, "step": 7314 }, { "epoch": 0.94, "grad_norm": 1.665556788444519, "learning_rate": 1.030251007176486e-07, "loss": 0.5757, "step": 7315 }, { "epoch": 0.94, "grad_norm": 1.5385949611663818, "learning_rate": 1.026064619080891e-07, "loss": 0.657, "step": 7316 }, { "epoch": 0.94, "grad_norm": 1.289075255393982, "learning_rate": 1.0218866657502268e-07, "loss": 0.6306, "step": 7317 }, { "epoch": 0.94, "grad_norm": 1.4588640928268433, "learning_rate": 1.0177171479040737e-07, "loss": 0.5182, "step": 7318 }, { "epoch": 0.94, "grad_norm": 1.3852373361587524, "learning_rate": 1.0135560662605404e-07, "loss": 0.5512, "step": 7319 }, { "epoch": 0.94, "grad_norm": 1.2439759969711304, "learning_rate": 1.0094034215362814e-07, "loss": 0.639, "step": 7320 }, { "epoch": 0.94, "grad_norm": 1.4485936164855957, "learning_rate": 1.0052592144465189e-07, "loss": 0.6265, "step": 7321 }, { "epoch": 0.94, "grad_norm": 1.1735234260559082, "learning_rate": 1.0011234457050045e-07, "loss": 0.7429, "step": 7322 }, { "epoch": 0.94, "grad_norm": 1.1867866516113281, "learning_rate": 9.969961160240294e-08, "loss": 0.563, "step": 7323 }, { "epoch": 0.94, "grad_norm": 1.3140125274658203, "learning_rate": 9.928772261144526e-08, "loss": 0.5963, "step": 7324 }, { "epoch": 0.94, "grad_norm": 1.2439604997634888, "learning_rate": 9.887667766856678e-08, "loss": 0.6702, "step": 7325 }, { "epoch": 0.94, "grad_norm": 1.1434909105300903, "learning_rate": 9.846647684456146e-08, "loss": 0.5945, "step": 7326 }, { "epoch": 0.94, "grad_norm": 1.1893295049667358, "learning_rate": 9.805712021007774e-08, "loss": 0.5411, "step": 7327 }, { "epoch": 0.94, "grad_norm": 1.2865269184112549, "learning_rate": 9.764860783561981e-08, "loss": 0.676, "step": 7328 }, { "epoch": 0.94, "grad_norm": 1.5296525955200195, "learning_rate": 9.724093979154526e-08, "loss": 0.5856, "step": 7329 }, { "epoch": 0.94, "grad_norm": 1.277660608291626, "learning_rate": 9.683411614806626e-08, "loss": 0.6107, "step": 7330 }, { "epoch": 0.94, "grad_norm": 1.141019344329834, "learning_rate": 9.642813697525011e-08, "loss": 0.5483, "step": 7331 }, { "epoch": 0.94, "grad_norm": 1.5347297191619873, "learning_rate": 9.602300234301865e-08, "loss": 0.6413, "step": 7332 }, { "epoch": 0.94, "grad_norm": 1.3887579441070557, "learning_rate": 9.561871232114827e-08, "loss": 0.5488, "step": 7333 }, { "epoch": 0.94, "grad_norm": 1.2093347311019897, "learning_rate": 9.521526697926887e-08, "loss": 0.6469, "step": 7334 }, { "epoch": 0.94, "grad_norm": 1.4027270078659058, "learning_rate": 9.481266638686592e-08, "loss": 0.5732, "step": 7335 }, { "epoch": 0.94, "grad_norm": 1.4226722717285156, "learning_rate": 9.441091061328067e-08, "loss": 0.5628, "step": 7336 }, { "epoch": 0.94, "grad_norm": 1.2715821266174316, "learning_rate": 9.400999972770553e-08, "loss": 0.5455, "step": 7337 }, { "epoch": 0.94, "grad_norm": 1.5981942415237427, "learning_rate": 9.360993379919026e-08, "loss": 0.5994, "step": 7338 }, { "epoch": 0.94, "grad_norm": 1.1296770572662354, "learning_rate": 9.321071289663753e-08, "loss": 0.6579, "step": 7339 }, { "epoch": 0.94, "grad_norm": 1.32581627368927, "learning_rate": 9.281233708880566e-08, "loss": 0.557, "step": 7340 }, { "epoch": 0.94, "grad_norm": 1.3479461669921875, "learning_rate": 9.241480644430644e-08, "loss": 0.7366, "step": 7341 }, { "epoch": 0.94, "grad_norm": 1.926008701324463, "learning_rate": 9.201812103160624e-08, "loss": 0.6514, "step": 7342 }, { "epoch": 0.94, "grad_norm": 1.4221993684768677, "learning_rate": 9.162228091902593e-08, "loss": 0.5706, "step": 7343 }, { "epoch": 0.94, "grad_norm": 1.6545454263687134, "learning_rate": 9.122728617474153e-08, "loss": 0.6052, "step": 7344 }, { "epoch": 0.94, "grad_norm": 1.3921095132827759, "learning_rate": 9.083313686678252e-08, "loss": 0.5529, "step": 7345 }, { "epoch": 0.94, "grad_norm": 1.1098374128341675, "learning_rate": 9.043983306303295e-08, "loss": 0.7025, "step": 7346 }, { "epoch": 0.94, "grad_norm": 1.4808984994888306, "learning_rate": 9.004737483123194e-08, "loss": 0.6095, "step": 7347 }, { "epoch": 0.94, "grad_norm": 1.2280536890029907, "learning_rate": 8.96557622389721e-08, "loss": 0.6116, "step": 7348 }, { "epoch": 0.94, "grad_norm": 1.2250851392745972, "learning_rate": 8.926499535370058e-08, "loss": 0.5939, "step": 7349 }, { "epoch": 0.94, "grad_norm": 1.9256665706634521, "learning_rate": 8.887507424271913e-08, "loss": 0.5694, "step": 7350 }, { "epoch": 0.94, "grad_norm": 1.2938241958618164, "learning_rate": 8.8485998973184e-08, "loss": 0.5579, "step": 7351 }, { "epoch": 0.94, "grad_norm": 1.3634623289108276, "learning_rate": 8.809776961210493e-08, "loss": 0.5262, "step": 7352 }, { "epoch": 0.94, "grad_norm": 1.161146879196167, "learning_rate": 8.771038622634731e-08, "loss": 0.5316, "step": 7353 }, { "epoch": 0.94, "grad_norm": 1.3339877128601074, "learning_rate": 8.732384888262945e-08, "loss": 0.5637, "step": 7354 }, { "epoch": 0.94, "grad_norm": 1.254198670387268, "learning_rate": 8.693815764752477e-08, "loss": 0.6377, "step": 7355 }, { "epoch": 0.94, "grad_norm": 1.2894302606582642, "learning_rate": 8.65533125874607e-08, "loss": 0.5951, "step": 7356 }, { "epoch": 0.94, "grad_norm": 1.4392236471176147, "learning_rate": 8.616931376871917e-08, "loss": 0.6323, "step": 7357 }, { "epoch": 0.94, "grad_norm": 1.222009539604187, "learning_rate": 8.578616125743567e-08, "loss": 0.6005, "step": 7358 }, { "epoch": 0.94, "grad_norm": 1.019447922706604, "learning_rate": 8.540385511960126e-08, "loss": 0.5459, "step": 7359 }, { "epoch": 0.94, "grad_norm": 1.4560258388519287, "learning_rate": 8.502239542105994e-08, "loss": 0.6215, "step": 7360 }, { "epoch": 0.94, "grad_norm": 1.296736240386963, "learning_rate": 8.464178222750974e-08, "loss": 0.5495, "step": 7361 }, { "epoch": 0.94, "grad_norm": 1.5001124143600464, "learning_rate": 8.426201560450431e-08, "loss": 0.6199, "step": 7362 }, { "epoch": 0.94, "grad_norm": 1.5553319454193115, "learning_rate": 8.388309561745134e-08, "loss": 0.5854, "step": 7363 }, { "epoch": 0.94, "grad_norm": 1.3460683822631836, "learning_rate": 8.350502233161084e-08, "loss": 0.5443, "step": 7364 }, { "epoch": 0.94, "grad_norm": 1.374433994293213, "learning_rate": 8.312779581209852e-08, "loss": 0.6094, "step": 7365 }, { "epoch": 0.94, "grad_norm": 1.3948500156402588, "learning_rate": 8.27514161238846e-08, "loss": 0.5573, "step": 7366 }, { "epoch": 0.94, "grad_norm": 1.369402289390564, "learning_rate": 8.237588333179225e-08, "loss": 0.4974, "step": 7367 }, { "epoch": 0.94, "grad_norm": 1.2202465534210205, "learning_rate": 8.200119750049918e-08, "loss": 0.5171, "step": 7368 }, { "epoch": 0.94, "grad_norm": 1.5176337957382202, "learning_rate": 8.162735869453764e-08, "loss": 0.6837, "step": 7369 }, { "epoch": 0.94, "grad_norm": 2.0145180225372314, "learning_rate": 8.125436697829503e-08, "loss": 0.5776, "step": 7370 }, { "epoch": 0.94, "grad_norm": 1.281848669052124, "learning_rate": 8.08822224160094e-08, "loss": 0.6065, "step": 7371 }, { "epoch": 0.94, "grad_norm": 1.6321043968200684, "learning_rate": 8.051092507177672e-08, "loss": 0.6207, "step": 7372 }, { "epoch": 0.94, "grad_norm": 1.5435349941253662, "learning_rate": 8.014047500954524e-08, "loss": 0.6096, "step": 7373 }, { "epoch": 0.94, "grad_norm": 1.468544840812683, "learning_rate": 7.97708722931162e-08, "loss": 0.6066, "step": 7374 }, { "epoch": 0.94, "grad_norm": 2.739684581756592, "learning_rate": 7.940211698614753e-08, "loss": 0.5939, "step": 7375 }, { "epoch": 0.94, "grad_norm": 1.334362506866455, "learning_rate": 7.903420915214899e-08, "loss": 0.4706, "step": 7376 }, { "epoch": 0.95, "grad_norm": 1.2078897953033447, "learning_rate": 7.866714885448601e-08, "loss": 0.5775, "step": 7377 }, { "epoch": 0.95, "grad_norm": 1.5452244281768799, "learning_rate": 7.830093615637635e-08, "loss": 0.6114, "step": 7378 }, { "epoch": 0.95, "grad_norm": 1.3688369989395142, "learning_rate": 7.793557112089345e-08, "loss": 0.5782, "step": 7379 }, { "epoch": 0.95, "grad_norm": 1.3603882789611816, "learning_rate": 7.757105381096363e-08, "loss": 0.6609, "step": 7380 }, { "epoch": 0.95, "grad_norm": 1.3486891984939575, "learning_rate": 7.72073842893678e-08, "loss": 0.5362, "step": 7381 }, { "epoch": 0.95, "grad_norm": 1.3633257150650024, "learning_rate": 7.684456261874085e-08, "loss": 0.637, "step": 7382 }, { "epoch": 0.95, "grad_norm": 1.2990694046020508, "learning_rate": 7.648258886157056e-08, "loss": 0.5077, "step": 7383 }, { "epoch": 0.95, "grad_norm": 1.217881202697754, "learning_rate": 7.612146308019985e-08, "loss": 0.5874, "step": 7384 }, { "epoch": 0.95, "grad_norm": 1.1617398262023926, "learning_rate": 7.57611853368262e-08, "loss": 0.5252, "step": 7385 }, { "epoch": 0.95, "grad_norm": 1.2338204383850098, "learning_rate": 7.540175569349883e-08, "loss": 0.5152, "step": 7386 }, { "epoch": 0.95, "grad_norm": 1.3661803007125854, "learning_rate": 7.504317421212326e-08, "loss": 0.5535, "step": 7387 }, { "epoch": 0.95, "grad_norm": 1.4716441631317139, "learning_rate": 7.468544095445784e-08, "loss": 0.6154, "step": 7388 }, { "epoch": 0.95, "grad_norm": 1.3351925611495972, "learning_rate": 7.432855598211386e-08, "loss": 0.6322, "step": 7389 }, { "epoch": 0.95, "grad_norm": 1.1938472986221313, "learning_rate": 7.397251935655825e-08, "loss": 0.5131, "step": 7390 }, { "epoch": 0.95, "grad_norm": 1.2754592895507812, "learning_rate": 7.36173311391114e-08, "loss": 0.5288, "step": 7391 }, { "epoch": 0.95, "grad_norm": 1.3025398254394531, "learning_rate": 7.326299139094662e-08, "loss": 0.6064, "step": 7392 }, { "epoch": 0.95, "grad_norm": 1.4515681266784668, "learning_rate": 7.290950017309173e-08, "loss": 0.5584, "step": 7393 }, { "epoch": 0.95, "grad_norm": 1.6512247323989868, "learning_rate": 7.255685754642805e-08, "loss": 0.5702, "step": 7394 }, { "epoch": 0.95, "grad_norm": 1.3409143686294556, "learning_rate": 7.220506357169255e-08, "loss": 0.5642, "step": 7395 }, { "epoch": 0.95, "grad_norm": 1.2338666915893555, "learning_rate": 7.185411830947342e-08, "loss": 0.5008, "step": 7396 }, { "epoch": 0.95, "grad_norm": 1.2696747779846191, "learning_rate": 7.150402182021399e-08, "loss": 0.5333, "step": 7397 }, { "epoch": 0.95, "grad_norm": 1.2724928855895996, "learning_rate": 7.115477416421101e-08, "loss": 0.6582, "step": 7398 }, { "epoch": 0.95, "grad_norm": 1.1926487684249878, "learning_rate": 7.080637540161583e-08, "loss": 0.6197, "step": 7399 }, { "epoch": 0.95, "grad_norm": 1.4144186973571777, "learning_rate": 7.045882559243322e-08, "loss": 0.6195, "step": 7400 }, { "epoch": 0.95, "grad_norm": 1.3399546146392822, "learning_rate": 7.011212479652141e-08, "loss": 0.5682, "step": 7401 }, { "epoch": 0.95, "grad_norm": 1.146112084388733, "learning_rate": 6.976627307359208e-08, "loss": 0.5884, "step": 7402 }, { "epoch": 0.95, "grad_norm": 1.5165796279907227, "learning_rate": 6.94212704832109e-08, "loss": 0.6208, "step": 7403 }, { "epoch": 0.95, "grad_norm": 1.4220664501190186, "learning_rate": 6.90771170847987e-08, "loss": 0.6322, "step": 7404 }, { "epoch": 0.95, "grad_norm": 1.4657684564590454, "learning_rate": 6.873381293762859e-08, "loss": 0.5636, "step": 7405 }, { "epoch": 0.95, "grad_norm": 1.468279480934143, "learning_rate": 6.839135810082664e-08, "loss": 0.5901, "step": 7406 }, { "epoch": 0.95, "grad_norm": 2.511237144470215, "learning_rate": 6.804975263337565e-08, "loss": 0.5578, "step": 7407 }, { "epoch": 0.95, "grad_norm": 1.1953728199005127, "learning_rate": 6.770899659410856e-08, "loss": 0.5636, "step": 7408 }, { "epoch": 0.95, "grad_norm": 1.134719967842102, "learning_rate": 6.736909004171399e-08, "loss": 0.6885, "step": 7409 }, { "epoch": 0.95, "grad_norm": 1.5010466575622559, "learning_rate": 6.703003303473454e-08, "loss": 0.5951, "step": 7410 }, { "epoch": 0.95, "grad_norm": 1.215299367904663, "learning_rate": 6.669182563156574e-08, "loss": 0.5923, "step": 7411 }, { "epoch": 0.95, "grad_norm": 1.380470633506775, "learning_rate": 6.635446789045651e-08, "loss": 0.6177, "step": 7412 }, { "epoch": 0.95, "grad_norm": 1.3253923654556274, "learning_rate": 6.601795986951044e-08, "loss": 0.6369, "step": 7413 }, { "epoch": 0.95, "grad_norm": 1.4012606143951416, "learning_rate": 6.568230162668332e-08, "loss": 0.6044, "step": 7414 }, { "epoch": 0.95, "grad_norm": 1.2084659337997437, "learning_rate": 6.534749321978617e-08, "loss": 0.7131, "step": 7415 }, { "epoch": 0.95, "grad_norm": 1.2429335117340088, "learning_rate": 6.501353470648286e-08, "loss": 0.5506, "step": 7416 }, { "epoch": 0.95, "grad_norm": 1.1838606595993042, "learning_rate": 6.46804261442907e-08, "loss": 0.581, "step": 7417 }, { "epoch": 0.95, "grad_norm": 1.5430114269256592, "learning_rate": 6.43481675905816e-08, "loss": 0.6428, "step": 7418 }, { "epoch": 0.95, "grad_norm": 1.4366458654403687, "learning_rate": 6.40167591025792e-08, "loss": 0.5344, "step": 7419 }, { "epoch": 0.95, "grad_norm": 1.7072436809539795, "learning_rate": 6.368620073736176e-08, "loss": 0.6056, "step": 7420 }, { "epoch": 0.95, "grad_norm": 1.645785927772522, "learning_rate": 6.335649255186315e-08, "loss": 0.6395, "step": 7421 }, { "epoch": 0.95, "grad_norm": 1.3543944358825684, "learning_rate": 6.302763460286687e-08, "loss": 0.5263, "step": 7422 }, { "epoch": 0.95, "grad_norm": 2.483001232147217, "learning_rate": 6.269962694701315e-08, "loss": 0.5472, "step": 7423 }, { "epoch": 0.95, "grad_norm": 1.2230137586593628, "learning_rate": 6.237246964079458e-08, "loss": 0.6058, "step": 7424 }, { "epoch": 0.95, "grad_norm": 1.5250579118728638, "learning_rate": 6.204616274055663e-08, "loss": 0.6588, "step": 7425 }, { "epoch": 0.95, "grad_norm": 1.269244909286499, "learning_rate": 6.172070630249993e-08, "loss": 0.5417, "step": 7426 }, { "epoch": 0.95, "grad_norm": 1.3151706457138062, "learning_rate": 6.139610038267685e-08, "loss": 0.548, "step": 7427 }, { "epoch": 0.95, "grad_norm": 1.407835602760315, "learning_rate": 6.107234503699488e-08, "loss": 0.6501, "step": 7428 }, { "epoch": 0.95, "grad_norm": 1.3132225275039673, "learning_rate": 6.074944032121388e-08, "loss": 0.4961, "step": 7429 }, { "epoch": 0.95, "grad_norm": 1.4748717546463013, "learning_rate": 6.042738629094769e-08, "loss": 0.5878, "step": 7430 }, { "epoch": 0.95, "grad_norm": 2.3013241291046143, "learning_rate": 6.010618300166415e-08, "loss": 0.5828, "step": 7431 }, { "epoch": 0.95, "grad_norm": 1.3468983173370361, "learning_rate": 5.978583050868292e-08, "loss": 0.6095, "step": 7432 }, { "epoch": 0.95, "grad_norm": 2.144441604614258, "learning_rate": 5.946632886717929e-08, "loss": 0.5951, "step": 7433 }, { "epoch": 0.95, "grad_norm": 1.5819791555404663, "learning_rate": 5.914767813217981e-08, "loss": 0.6179, "step": 7434 }, { "epoch": 0.95, "grad_norm": 1.079916000366211, "learning_rate": 5.882987835856668e-08, "loss": 0.6712, "step": 7435 }, { "epoch": 0.95, "grad_norm": 1.3036378622055054, "learning_rate": 5.851292960107391e-08, "loss": 0.5571, "step": 7436 }, { "epoch": 0.95, "grad_norm": 1.3435218334197998, "learning_rate": 5.8196831914289485e-08, "loss": 0.6113, "step": 7437 }, { "epoch": 0.95, "grad_norm": 1.429049015045166, "learning_rate": 5.788158535265431e-08, "loss": 0.5887, "step": 7438 }, { "epoch": 0.95, "grad_norm": 1.3666672706604004, "learning_rate": 5.75671899704644e-08, "loss": 0.6005, "step": 7439 }, { "epoch": 0.95, "grad_norm": 1.053040623664856, "learning_rate": 5.7253645821867546e-08, "loss": 0.4651, "step": 7440 }, { "epoch": 0.95, "grad_norm": 1.2168482542037964, "learning_rate": 5.694095296086444e-08, "loss": 0.5997, "step": 7441 }, { "epoch": 0.95, "grad_norm": 1.2379217147827148, "learning_rate": 5.662911144131145e-08, "loss": 0.6519, "step": 7442 }, { "epoch": 0.95, "grad_norm": 1.175512671470642, "learning_rate": 5.631812131691561e-08, "loss": 0.5748, "step": 7443 }, { "epoch": 0.95, "grad_norm": 1.4115569591522217, "learning_rate": 5.600798264123963e-08, "loss": 0.5639, "step": 7444 }, { "epoch": 0.95, "grad_norm": 1.4109700918197632, "learning_rate": 5.569869546769857e-08, "loss": 0.58, "step": 7445 }, { "epoch": 0.95, "grad_norm": 1.6279006004333496, "learning_rate": 5.5390259849559813e-08, "loss": 0.5886, "step": 7446 }, { "epoch": 0.95, "grad_norm": 1.9307910203933716, "learning_rate": 5.508267583994642e-08, "loss": 0.5608, "step": 7447 }, { "epoch": 0.95, "grad_norm": 1.3651392459869385, "learning_rate": 5.4775943491832684e-08, "loss": 0.6131, "step": 7448 }, { "epoch": 0.95, "grad_norm": 1.3257839679718018, "learning_rate": 5.4470062858047457e-08, "loss": 0.6027, "step": 7449 }, { "epoch": 0.95, "grad_norm": 1.4429939985275269, "learning_rate": 5.4165033991271934e-08, "loss": 0.5893, "step": 7450 }, { "epoch": 0.95, "grad_norm": 1.618682622909546, "learning_rate": 5.3860856944041303e-08, "loss": 0.5665, "step": 7451 }, { "epoch": 0.95, "grad_norm": 1.1508351564407349, "learning_rate": 5.3557531768744784e-08, "loss": 0.6654, "step": 7452 }, { "epoch": 0.95, "grad_norm": 1.1664789915084839, "learning_rate": 5.32550585176228e-08, "loss": 0.6164, "step": 7453 }, { "epoch": 0.95, "grad_norm": 1.3211183547973633, "learning_rate": 5.29534372427698e-08, "loss": 0.61, "step": 7454 }, { "epoch": 0.96, "grad_norm": 1.6111810207366943, "learning_rate": 5.2652667996135884e-08, "loss": 0.602, "step": 7455 }, { "epoch": 0.96, "grad_norm": 5.7178215980529785, "learning_rate": 5.2352750829521294e-08, "loss": 0.5855, "step": 7456 }, { "epoch": 0.96, "grad_norm": 1.387904167175293, "learning_rate": 5.205368579458025e-08, "loss": 0.6116, "step": 7457 }, { "epoch": 0.96, "grad_norm": 6.927826881408691, "learning_rate": 5.175547294282102e-08, "loss": 0.629, "step": 7458 }, { "epoch": 0.96, "grad_norm": 1.1902652978897095, "learning_rate": 5.145811232560527e-08, "loss": 0.5822, "step": 7459 }, { "epoch": 0.96, "grad_norm": 1.764687180519104, "learning_rate": 5.116160399414649e-08, "loss": 0.6164, "step": 7460 }, { "epoch": 0.96, "grad_norm": 1.2136625051498413, "learning_rate": 5.0865947999512166e-08, "loss": 0.5099, "step": 7461 }, { "epoch": 0.96, "grad_norm": 1.5262752771377563, "learning_rate": 5.057114439262378e-08, "loss": 0.5656, "step": 7462 }, { "epoch": 0.96, "grad_norm": 1.331688642501831, "learning_rate": 5.0277193224255175e-08, "loss": 0.5583, "step": 7463 }, { "epoch": 0.96, "grad_norm": 1.4355744123458862, "learning_rate": 4.9984094545033054e-08, "loss": 0.5641, "step": 7464 }, { "epoch": 0.96, "grad_norm": 1.4766184091567993, "learning_rate": 4.969184840543706e-08, "loss": 0.5645, "step": 7465 }, { "epoch": 0.96, "grad_norm": 1.319823980331421, "learning_rate": 4.940045485580247e-08, "loss": 0.5336, "step": 7466 }, { "epoch": 0.96, "grad_norm": 1.2531654834747314, "learning_rate": 4.9109913946314145e-08, "loss": 0.6307, "step": 7467 }, { "epoch": 0.96, "grad_norm": 1.4141623973846436, "learning_rate": 4.882022572701261e-08, "loss": 0.5306, "step": 7468 }, { "epoch": 0.96, "grad_norm": 1.1841273307800293, "learning_rate": 4.8531390247790745e-08, "loss": 0.5981, "step": 7469 }, { "epoch": 0.96, "grad_norm": 1.325745940208435, "learning_rate": 4.824340755839485e-08, "loss": 0.5935, "step": 7470 }, { "epoch": 0.96, "grad_norm": 1.4514849185943604, "learning_rate": 4.795627770842359e-08, "loss": 0.6294, "step": 7471 }, { "epoch": 0.96, "grad_norm": 1.2255972623825073, "learning_rate": 4.767000074732908e-08, "loss": 0.5396, "step": 7472 }, { "epoch": 0.96, "grad_norm": 1.6118378639221191, "learning_rate": 4.738457672441799e-08, "loss": 0.6553, "step": 7473 }, { "epoch": 0.96, "grad_norm": 1.5968915224075317, "learning_rate": 4.710000568884709e-08, "loss": 0.6712, "step": 7474 }, { "epoch": 0.96, "grad_norm": 1.486005425453186, "learning_rate": 4.681628768962887e-08, "loss": 0.6427, "step": 7475 }, { "epoch": 0.96, "grad_norm": 1.2320314645767212, "learning_rate": 4.653342277562811e-08, "loss": 0.7184, "step": 7476 }, { "epoch": 0.96, "grad_norm": 1.1806683540344238, "learning_rate": 4.625141099556252e-08, "loss": 0.7453, "step": 7477 }, { "epoch": 0.96, "grad_norm": 1.3961061239242554, "learning_rate": 4.597025239800212e-08, "loss": 0.5866, "step": 7478 }, { "epoch": 0.96, "grad_norm": 1.8109081983566284, "learning_rate": 4.5689947031371527e-08, "loss": 0.592, "step": 7479 }, { "epoch": 0.96, "grad_norm": 1.4340118169784546, "learning_rate": 4.541049494394767e-08, "loss": 0.5356, "step": 7480 }, { "epoch": 0.96, "grad_norm": 1.403652548789978, "learning_rate": 4.513189618385983e-08, "loss": 0.6765, "step": 7481 }, { "epoch": 0.96, "grad_norm": 1.5432109832763672, "learning_rate": 4.4854150799091836e-08, "loss": 0.4902, "step": 7482 }, { "epoch": 0.96, "grad_norm": 1.5097144842147827, "learning_rate": 4.457725883747932e-08, "loss": 0.6542, "step": 7483 }, { "epoch": 0.96, "grad_norm": 1.4052143096923828, "learning_rate": 4.4301220346710806e-08, "loss": 0.5848, "step": 7484 }, { "epoch": 0.96, "grad_norm": 1.4552314281463623, "learning_rate": 4.402603537432937e-08, "loss": 0.5903, "step": 7485 }, { "epoch": 0.96, "grad_norm": 1.6251884698867798, "learning_rate": 4.3751703967728765e-08, "loss": 0.5735, "step": 7486 }, { "epoch": 0.96, "grad_norm": 1.1430637836456299, "learning_rate": 4.347822617415787e-08, "loss": 0.718, "step": 7487 }, { "epoch": 0.96, "grad_norm": 1.0503935813903809, "learning_rate": 4.320560204071733e-08, "loss": 0.6921, "step": 7488 }, { "epoch": 0.96, "grad_norm": 1.4668529033660889, "learning_rate": 4.293383161436182e-08, "loss": 0.6366, "step": 7489 }, { "epoch": 0.96, "grad_norm": 1.753491759300232, "learning_rate": 4.2662914941897225e-08, "loss": 0.5208, "step": 7490 }, { "epoch": 0.96, "grad_norm": 1.3943778276443481, "learning_rate": 4.239285206998345e-08, "loss": 0.5593, "step": 7491 }, { "epoch": 0.96, "grad_norm": 1.3868759870529175, "learning_rate": 4.212364304513439e-08, "loss": 0.6141, "step": 7492 }, { "epoch": 0.96, "grad_norm": 1.3776869773864746, "learning_rate": 4.185528791371518e-08, "loss": 0.5684, "step": 7493 }, { "epoch": 0.96, "grad_norm": 1.3493459224700928, "learning_rate": 4.158778672194441e-08, "loss": 0.5887, "step": 7494 }, { "epoch": 0.96, "grad_norm": 1.200941801071167, "learning_rate": 4.1321139515894116e-08, "loss": 0.5237, "step": 7495 }, { "epoch": 0.96, "grad_norm": 1.162359595298767, "learning_rate": 4.105534634148867e-08, "loss": 0.5887, "step": 7496 }, { "epoch": 0.96, "grad_norm": 1.3749542236328125, "learning_rate": 4.0790407244505335e-08, "loss": 0.5458, "step": 7497 }, { "epoch": 0.96, "grad_norm": 1.2350449562072754, "learning_rate": 4.052632227057429e-08, "loss": 0.5914, "step": 7498 }, { "epoch": 0.96, "grad_norm": 1.3565806150436401, "learning_rate": 4.02630914651797e-08, "loss": 0.5873, "step": 7499 }, { "epoch": 0.96, "grad_norm": 1.3490086793899536, "learning_rate": 4.0000714873656976e-08, "loss": 0.588, "step": 7500 }, { "epoch": 0.96, "grad_norm": 1.4163496494293213, "learning_rate": 3.973919254119551e-08, "loss": 0.6271, "step": 7501 }, { "epoch": 0.96, "grad_norm": 1.4125710725784302, "learning_rate": 3.947852451283707e-08, "loss": 0.6185, "step": 7502 }, { "epoch": 0.96, "grad_norm": 1.3267148733139038, "learning_rate": 3.921871083347628e-08, "loss": 0.6035, "step": 7503 }, { "epoch": 0.96, "grad_norm": 1.564961314201355, "learning_rate": 3.8959751547861244e-08, "loss": 0.6344, "step": 7504 }, { "epoch": 0.96, "grad_norm": 1.7320315837860107, "learning_rate": 3.870164670059129e-08, "loss": 0.6125, "step": 7505 }, { "epoch": 0.96, "grad_norm": 1.794987440109253, "learning_rate": 3.844439633612085e-08, "loss": 0.5969, "step": 7506 }, { "epoch": 0.96, "grad_norm": 1.538055181503296, "learning_rate": 3.818800049875615e-08, "loss": 0.582, "step": 7507 }, { "epoch": 0.96, "grad_norm": 1.460828185081482, "learning_rate": 3.793245923265576e-08, "loss": 0.6452, "step": 7508 }, { "epoch": 0.96, "grad_norm": 1.2221049070358276, "learning_rate": 3.767777258183114e-08, "loss": 0.5363, "step": 7509 }, { "epoch": 0.96, "grad_norm": 1.714708685874939, "learning_rate": 3.74239405901472e-08, "loss": 0.5413, "step": 7510 }, { "epoch": 0.96, "grad_norm": 1.3533588647842407, "learning_rate": 3.717096330132175e-08, "loss": 0.6549, "step": 7511 }, { "epoch": 0.96, "grad_norm": 1.2847508192062378, "learning_rate": 3.691884075892438e-08, "loss": 0.5459, "step": 7512 }, { "epoch": 0.96, "grad_norm": 1.5215178728103638, "learning_rate": 3.666757300637813e-08, "loss": 0.6188, "step": 7513 }, { "epoch": 0.96, "grad_norm": 1.9925087690353394, "learning_rate": 3.6417160086959505e-08, "loss": 0.5738, "step": 7514 }, { "epoch": 0.96, "grad_norm": 1.384124755859375, "learning_rate": 3.616760204379621e-08, "loss": 0.6303, "step": 7515 }, { "epoch": 0.96, "grad_norm": 1.3988687992095947, "learning_rate": 3.5918898919869436e-08, "loss": 0.5765, "step": 7516 }, { "epoch": 0.96, "grad_norm": 1.3108985424041748, "learning_rate": 3.5671050758013806e-08, "loss": 0.5715, "step": 7517 }, { "epoch": 0.96, "grad_norm": 1.2928988933563232, "learning_rate": 3.542405760091627e-08, "loss": 0.6436, "step": 7518 }, { "epoch": 0.96, "grad_norm": 1.6511348485946655, "learning_rate": 3.5177919491115595e-08, "loss": 0.6539, "step": 7519 }, { "epoch": 0.96, "grad_norm": 1.209773063659668, "learning_rate": 3.493263647100453e-08, "loss": 0.5657, "step": 7520 }, { "epoch": 0.96, "grad_norm": 1.4642279148101807, "learning_rate": 3.468820858282818e-08, "loss": 0.5891, "step": 7521 }, { "epoch": 0.96, "grad_norm": 1.4938738346099854, "learning_rate": 3.444463586868452e-08, "loss": 0.5184, "step": 7522 }, { "epoch": 0.96, "grad_norm": 1.524194598197937, "learning_rate": 3.420191837052278e-08, "loss": 0.5887, "step": 7523 }, { "epoch": 0.96, "grad_norm": 1.2606900930404663, "learning_rate": 3.3960056130147855e-08, "loss": 0.5932, "step": 7524 }, { "epoch": 0.96, "grad_norm": 1.350730538368225, "learning_rate": 3.37190491892142e-08, "loss": 0.5891, "step": 7525 }, { "epoch": 0.96, "grad_norm": 1.3839001655578613, "learning_rate": 3.34788975892314e-08, "loss": 0.5905, "step": 7526 }, { "epoch": 0.96, "grad_norm": 1.6060885190963745, "learning_rate": 3.3239601371559685e-08, "loss": 0.5926, "step": 7527 }, { "epoch": 0.96, "grad_norm": 1.3335615396499634, "learning_rate": 3.3001160577413873e-08, "loss": 0.6283, "step": 7528 }, { "epoch": 0.96, "grad_norm": 1.4750850200653076, "learning_rate": 3.276357524786e-08, "loss": 0.5586, "step": 7529 }, { "epoch": 0.96, "grad_norm": 1.4482122659683228, "learning_rate": 3.252684542381701e-08, "loss": 0.589, "step": 7530 }, { "epoch": 0.96, "grad_norm": 1.4635100364685059, "learning_rate": 3.229097114605784e-08, "loss": 0.6155, "step": 7531 }, { "epoch": 0.96, "grad_norm": 1.282564640045166, "learning_rate": 3.20559524552061e-08, "loss": 0.6131, "step": 7532 }, { "epoch": 0.97, "grad_norm": 1.5637482404708862, "learning_rate": 3.182178939173941e-08, "loss": 0.6289, "step": 7533 }, { "epoch": 0.97, "grad_norm": 1.3659104108810425, "learning_rate": 3.1588481995987743e-08, "loss": 0.5984, "step": 7534 }, { "epoch": 0.97, "grad_norm": 1.998304843902588, "learning_rate": 3.1356030308132834e-08, "loss": 0.5942, "step": 7535 }, { "epoch": 0.97, "grad_norm": 0.9875431656837463, "learning_rate": 3.1124434368210444e-08, "loss": 0.6936, "step": 7536 }, { "epoch": 0.97, "grad_norm": 1.2541671991348267, "learning_rate": 3.089369421610866e-08, "loss": 0.6214, "step": 7537 }, { "epoch": 0.97, "grad_norm": 1.374961018562317, "learning_rate": 3.066380989156625e-08, "loss": 0.5787, "step": 7538 }, { "epoch": 0.97, "grad_norm": 1.4336551427841187, "learning_rate": 3.043478143417766e-08, "loss": 0.6287, "step": 7539 }, { "epoch": 0.97, "grad_norm": 1.1535704135894775, "learning_rate": 3.0206608883387444e-08, "loss": 0.7096, "step": 7540 }, { "epoch": 0.97, "grad_norm": 1.3666244745254517, "learning_rate": 2.997929227849416e-08, "loss": 0.5903, "step": 7541 }, { "epoch": 0.97, "grad_norm": 1.4516652822494507, "learning_rate": 2.9752831658648175e-08, "loss": 0.6077, "step": 7542 }, { "epoch": 0.97, "grad_norm": 1.3812227249145508, "learning_rate": 2.952722706285327e-08, "loss": 0.6462, "step": 7543 }, { "epoch": 0.97, "grad_norm": 1.4420405626296997, "learning_rate": 2.9302478529964484e-08, "loss": 0.6442, "step": 7544 }, { "epoch": 0.97, "grad_norm": 1.4087380170822144, "learning_rate": 2.907858609869085e-08, "loss": 0.5694, "step": 7545 }, { "epoch": 0.97, "grad_norm": 1.8472365140914917, "learning_rate": 2.8855549807592644e-08, "loss": 0.6193, "step": 7546 }, { "epoch": 0.97, "grad_norm": 1.2451483011245728, "learning_rate": 2.8633369695084125e-08, "loss": 0.5944, "step": 7547 }, { "epoch": 0.97, "grad_norm": 1.3169342279434204, "learning_rate": 2.84120457994308e-08, "loss": 0.5901, "step": 7548 }, { "epoch": 0.97, "grad_norm": 1.3715953826904297, "learning_rate": 2.8191578158751064e-08, "loss": 0.6093, "step": 7549 }, { "epoch": 0.97, "grad_norm": 1.386909008026123, "learning_rate": 2.7971966811016772e-08, "loss": 0.5776, "step": 7550 }, { "epoch": 0.97, "grad_norm": 1.138466715812683, "learning_rate": 2.775321179405044e-08, "loss": 0.59, "step": 7551 }, { "epoch": 0.97, "grad_norm": 2.033170223236084, "learning_rate": 2.7535313145528597e-08, "loss": 0.5274, "step": 7552 }, { "epoch": 0.97, "grad_norm": 1.6652302742004395, "learning_rate": 2.7318270902980116e-08, "loss": 0.5159, "step": 7553 }, { "epoch": 0.97, "grad_norm": 1.2595553398132324, "learning_rate": 2.7102085103786203e-08, "loss": 0.5573, "step": 7554 }, { "epoch": 0.97, "grad_norm": 1.474131464958191, "learning_rate": 2.688675578517985e-08, "loss": 0.6597, "step": 7555 }, { "epoch": 0.97, "grad_norm": 1.894659399986267, "learning_rate": 2.6672282984248066e-08, "loss": 0.5857, "step": 7556 }, { "epoch": 0.97, "grad_norm": 1.4702141284942627, "learning_rate": 2.6458666737927962e-08, "loss": 0.6389, "step": 7557 }, { "epoch": 0.97, "grad_norm": 1.2070083618164062, "learning_rate": 2.624590708301178e-08, "loss": 0.6223, "step": 7558 }, { "epoch": 0.97, "grad_norm": 1.6800892353057861, "learning_rate": 2.6034004056143535e-08, "loss": 0.5541, "step": 7559 }, { "epoch": 0.97, "grad_norm": 1.4756264686584473, "learning_rate": 2.582295769381793e-08, "loss": 0.5985, "step": 7560 }, { "epoch": 0.97, "grad_norm": 1.7296569347381592, "learning_rate": 2.5612768032383662e-08, "loss": 0.5712, "step": 7561 }, { "epoch": 0.97, "grad_norm": 1.2623050212860107, "learning_rate": 2.540343510804233e-08, "loss": 0.5563, "step": 7562 }, { "epoch": 0.97, "grad_norm": 1.2337028980255127, "learning_rate": 2.519495895684676e-08, "loss": 0.6952, "step": 7563 }, { "epoch": 0.97, "grad_norm": 1.9761078357696533, "learning_rate": 2.498733961470268e-08, "loss": 0.6188, "step": 7564 }, { "epoch": 0.97, "grad_norm": 1.36776864528656, "learning_rate": 2.47805771173687e-08, "loss": 0.5815, "step": 7565 }, { "epoch": 0.97, "grad_norm": 1.5808404684066772, "learning_rate": 2.4574671500455227e-08, "loss": 0.6603, "step": 7566 }, { "epoch": 0.97, "grad_norm": 1.1816710233688354, "learning_rate": 2.4369622799425008e-08, "loss": 0.6366, "step": 7567 }, { "epoch": 0.97, "grad_norm": 1.2168693542480469, "learning_rate": 2.416543104959368e-08, "loss": 0.593, "step": 7568 }, { "epoch": 0.97, "grad_norm": 1.2128366231918335, "learning_rate": 2.3962096286129778e-08, "loss": 0.6183, "step": 7569 }, { "epoch": 0.97, "grad_norm": 1.4425259828567505, "learning_rate": 2.375961854405251e-08, "loss": 0.6046, "step": 7570 }, { "epoch": 0.97, "grad_norm": 1.308990716934204, "learning_rate": 2.3557997858235647e-08, "loss": 0.5935, "step": 7571 }, { "epoch": 0.97, "grad_norm": 1.4226394891738892, "learning_rate": 2.3357234263403083e-08, "loss": 0.6166, "step": 7572 }, { "epoch": 0.97, "grad_norm": 1.415466070175171, "learning_rate": 2.3157327794133267e-08, "loss": 0.5839, "step": 7573 }, { "epoch": 0.97, "grad_norm": 1.2484937906265259, "learning_rate": 2.2958278484855323e-08, "loss": 0.5463, "step": 7574 }, { "epoch": 0.97, "grad_norm": 1.1190849542617798, "learning_rate": 2.2760086369851832e-08, "loss": 0.582, "step": 7575 }, { "epoch": 0.97, "grad_norm": 1.3897758722305298, "learning_rate": 2.2562751483257706e-08, "loss": 0.5186, "step": 7576 }, { "epoch": 0.97, "grad_norm": 1.5645219087600708, "learning_rate": 2.2366273859059095e-08, "loss": 0.5141, "step": 7577 }, { "epoch": 0.97, "grad_norm": 1.2559103965759277, "learning_rate": 2.21706535310956e-08, "loss": 0.5594, "step": 7578 }, { "epoch": 0.97, "grad_norm": 1.294405460357666, "learning_rate": 2.197589053305915e-08, "loss": 0.6049, "step": 7579 }, { "epoch": 0.97, "grad_norm": 1.4571852684020996, "learning_rate": 2.1781984898492926e-08, "loss": 0.6056, "step": 7580 }, { "epoch": 0.97, "grad_norm": 1.2196654081344604, "learning_rate": 2.158893666079409e-08, "loss": 0.5471, "step": 7581 }, { "epoch": 0.97, "grad_norm": 1.3833738565444946, "learning_rate": 2.1396745853211054e-08, "loss": 0.5686, "step": 7582 }, { "epoch": 0.97, "grad_norm": 1.3073983192443848, "learning_rate": 2.1205412508844557e-08, "loss": 0.662, "step": 7583 }, { "epoch": 0.97, "grad_norm": 1.2904523611068726, "learning_rate": 2.1014936660647688e-08, "loss": 0.6103, "step": 7584 }, { "epoch": 0.97, "grad_norm": 1.7544934749603271, "learning_rate": 2.0825318341426982e-08, "loss": 0.5528, "step": 7585 }, { "epoch": 0.97, "grad_norm": 1.464415192604065, "learning_rate": 2.063655758383909e-08, "loss": 0.6172, "step": 7586 }, { "epoch": 0.97, "grad_norm": 2.500410795211792, "learning_rate": 2.0448654420395232e-08, "loss": 0.6548, "step": 7587 }, { "epoch": 0.97, "grad_norm": 1.2668145895004272, "learning_rate": 2.0261608883457297e-08, "loss": 0.5142, "step": 7588 }, { "epoch": 0.97, "grad_norm": 1.5883921384811401, "learning_rate": 2.007542100524007e-08, "loss": 0.5204, "step": 7589 }, { "epoch": 0.97, "grad_norm": 1.5936336517333984, "learning_rate": 1.9890090817811237e-08, "loss": 0.5901, "step": 7590 }, { "epoch": 0.97, "grad_norm": 1.3055000305175781, "learning_rate": 1.970561835308915e-08, "loss": 0.6495, "step": 7591 }, { "epoch": 0.97, "grad_norm": 1.4675624370574951, "learning_rate": 1.952200364284673e-08, "loss": 0.6421, "step": 7592 }, { "epoch": 0.97, "grad_norm": 1.198490023612976, "learning_rate": 1.933924671870646e-08, "loss": 0.6123, "step": 7593 }, { "epoch": 0.97, "grad_norm": 1.3107367753982544, "learning_rate": 1.915734761214594e-08, "loss": 0.5849, "step": 7594 }, { "epoch": 0.97, "grad_norm": 1.3395153284072876, "learning_rate": 1.8976306354492323e-08, "loss": 0.6503, "step": 7595 }, { "epoch": 0.97, "grad_norm": 1.2745283842086792, "learning_rate": 1.8796122976927343e-08, "loss": 0.5744, "step": 7596 }, { "epoch": 0.97, "grad_norm": 1.3492122888565063, "learning_rate": 1.8616797510483396e-08, "loss": 0.6037, "step": 7597 }, { "epoch": 0.97, "grad_norm": 1.5924208164215088, "learning_rate": 1.8438329986045224e-08, "loss": 0.6054, "step": 7598 }, { "epoch": 0.97, "grad_norm": 1.8689661026000977, "learning_rate": 1.8260720434351008e-08, "loss": 0.6192, "step": 7599 }, { "epoch": 0.97, "grad_norm": 1.1037673950195312, "learning_rate": 1.8083968885990178e-08, "loss": 0.657, "step": 7600 }, { "epoch": 0.97, "grad_norm": 1.0584919452667236, "learning_rate": 1.7908075371404487e-08, "loss": 0.693, "step": 7601 }, { "epoch": 0.97, "grad_norm": 1.1882449388504028, "learning_rate": 1.7733039920887484e-08, "loss": 0.5358, "step": 7602 }, { "epoch": 0.97, "grad_norm": 1.2444685697555542, "learning_rate": 1.755886256458672e-08, "loss": 0.6336, "step": 7603 }, { "epoch": 0.97, "grad_norm": 1.3037827014923096, "learning_rate": 1.738554333249931e-08, "loss": 0.6433, "step": 7604 }, { "epoch": 0.97, "grad_norm": 1.2642031908035278, "learning_rate": 1.721308225447693e-08, "loss": 0.5269, "step": 7605 }, { "epoch": 0.97, "grad_norm": 1.2133139371871948, "learning_rate": 1.704147936022249e-08, "loss": 0.6345, "step": 7606 }, { "epoch": 0.97, "grad_norm": 1.2551854848861694, "learning_rate": 1.687073467929068e-08, "loss": 0.503, "step": 7607 }, { "epoch": 0.97, "grad_norm": 1.7916251420974731, "learning_rate": 1.670084824108853e-08, "loss": 0.5754, "step": 7608 }, { "epoch": 0.97, "grad_norm": 1.8876594305038452, "learning_rate": 1.6531820074876526e-08, "loss": 0.6057, "step": 7609 }, { "epoch": 0.97, "grad_norm": 1.3388862609863281, "learning_rate": 1.636365020976527e-08, "loss": 0.5536, "step": 7610 }, { "epoch": 0.98, "grad_norm": 1.126447081565857, "learning_rate": 1.619633867471937e-08, "loss": 0.5904, "step": 7611 }, { "epoch": 0.98, "grad_norm": 1.4953137636184692, "learning_rate": 1.6029885498554108e-08, "loss": 0.6016, "step": 7612 }, { "epoch": 0.98, "grad_norm": 1.5743541717529297, "learning_rate": 1.5864290709938778e-08, "loss": 0.6243, "step": 7613 }, { "epoch": 0.98, "grad_norm": 1.5609896183013916, "learning_rate": 1.5699554337392786e-08, "loss": 0.6461, "step": 7614 }, { "epoch": 0.98, "grad_norm": 1.3598272800445557, "learning_rate": 1.5535676409288435e-08, "loss": 0.6131, "step": 7615 }, { "epoch": 0.98, "grad_norm": 1.5744637250900269, "learning_rate": 1.5372656953851482e-08, "loss": 0.5648, "step": 7616 }, { "epoch": 0.98, "grad_norm": 2.973106622695923, "learning_rate": 1.521049599915725e-08, "loss": 0.6274, "step": 7617 }, { "epoch": 0.98, "grad_norm": 1.3239871263504028, "learning_rate": 1.5049193573136168e-08, "loss": 0.6198, "step": 7618 }, { "epoch": 0.98, "grad_norm": 1.3224722146987915, "learning_rate": 1.4888749703568239e-08, "loss": 0.521, "step": 7619 }, { "epoch": 0.98, "grad_norm": 1.16157865524292, "learning_rate": 1.4729164418087472e-08, "loss": 0.5791, "step": 7620 }, { "epoch": 0.98, "grad_norm": 1.2755320072174072, "learning_rate": 1.4570437744177989e-08, "loss": 0.5609, "step": 7621 }, { "epoch": 0.98, "grad_norm": 1.4971299171447754, "learning_rate": 1.441256970917848e-08, "loss": 0.5949, "step": 7622 }, { "epoch": 0.98, "grad_norm": 1.2407373189926147, "learning_rate": 1.4255560340277753e-08, "loss": 0.5842, "step": 7623 }, { "epoch": 0.98, "grad_norm": 1.3337568044662476, "learning_rate": 1.409940966451806e-08, "loss": 0.5259, "step": 7624 }, { "epoch": 0.98, "grad_norm": 1.6187931299209595, "learning_rate": 1.3944117708792338e-08, "loss": 0.4729, "step": 7625 }, { "epoch": 0.98, "grad_norm": 1.2470555305480957, "learning_rate": 1.3789684499846967e-08, "loss": 0.5533, "step": 7626 }, { "epoch": 0.98, "grad_norm": 1.2147272825241089, "learning_rate": 1.3636110064280117e-08, "loss": 0.5493, "step": 7627 }, { "epoch": 0.98, "grad_norm": 1.2246286869049072, "learning_rate": 1.348339442854174e-08, "loss": 0.5352, "step": 7628 }, { "epoch": 0.98, "grad_norm": 1.2009773254394531, "learning_rate": 1.3331537618934132e-08, "loss": 0.6325, "step": 7629 }, { "epoch": 0.98, "grad_norm": 1.299921989440918, "learning_rate": 1.318053966161137e-08, "loss": 0.4974, "step": 7630 }, { "epoch": 0.98, "grad_norm": 1.8579556941986084, "learning_rate": 1.3030400582579317e-08, "loss": 0.5998, "step": 7631 }, { "epoch": 0.98, "grad_norm": 1.709477424621582, "learning_rate": 1.2881120407696734e-08, "loss": 0.6101, "step": 7632 }, { "epoch": 0.98, "grad_norm": 1.3852404356002808, "learning_rate": 1.2732699162674721e-08, "loss": 0.537, "step": 7633 }, { "epoch": 0.98, "grad_norm": 1.1303093433380127, "learning_rate": 1.2585136873075055e-08, "loss": 0.6092, "step": 7634 }, { "epoch": 0.98, "grad_norm": 1.6485886573791504, "learning_rate": 1.2438433564312958e-08, "loss": 0.5874, "step": 7635 }, { "epoch": 0.98, "grad_norm": 1.3467371463775635, "learning_rate": 1.2292589261654332e-08, "loss": 0.5349, "step": 7636 }, { "epoch": 0.98, "grad_norm": 1.4503722190856934, "learning_rate": 1.214760399021797e-08, "loss": 0.6611, "step": 7637 }, { "epoch": 0.98, "grad_norm": 1.5155328512191772, "learning_rate": 1.200347777497557e-08, "loss": 0.489, "step": 7638 }, { "epoch": 0.98, "grad_norm": 1.5880452394485474, "learning_rate": 1.1860210640748936e-08, "loss": 0.6368, "step": 7639 }, { "epoch": 0.98, "grad_norm": 1.2064660787582397, "learning_rate": 1.1717802612213336e-08, "loss": 0.5961, "step": 7640 }, { "epoch": 0.98, "grad_norm": 1.406825304031372, "learning_rate": 1.1576253713895258e-08, "loss": 0.5817, "step": 7641 }, { "epoch": 0.98, "grad_norm": 2.6012203693389893, "learning_rate": 1.1435563970174645e-08, "loss": 0.594, "step": 7642 }, { "epoch": 0.98, "grad_norm": 1.1902316808700562, "learning_rate": 1.1295733405281006e-08, "loss": 0.6312, "step": 7643 }, { "epoch": 0.98, "grad_norm": 1.4329363107681274, "learning_rate": 1.1156762043298963e-08, "loss": 0.5915, "step": 7644 }, { "epoch": 0.98, "grad_norm": 1.167524814605713, "learning_rate": 1.1018649908162149e-08, "loss": 0.6717, "step": 7645 }, { "epoch": 0.98, "grad_norm": 1.4185863733291626, "learning_rate": 1.0881397023658202e-08, "loss": 0.6306, "step": 7646 }, { "epoch": 0.98, "grad_norm": 1.5840102434158325, "learning_rate": 1.0745003413425992e-08, "loss": 0.5838, "step": 7647 }, { "epoch": 0.98, "grad_norm": 1.3226847648620605, "learning_rate": 1.0609469100956171e-08, "loss": 0.5167, "step": 7648 }, { "epoch": 0.98, "grad_norm": 1.2246990203857422, "learning_rate": 1.0474794109592845e-08, "loss": 0.5422, "step": 7649 }, { "epoch": 0.98, "grad_norm": 1.3924264907836914, "learning_rate": 1.0340978462529682e-08, "loss": 0.6216, "step": 7650 }, { "epoch": 0.98, "grad_norm": 1.2300564050674438, "learning_rate": 1.0208022182814914e-08, "loss": 0.4941, "step": 7651 }, { "epoch": 0.98, "grad_norm": 1.2392950057983398, "learning_rate": 1.0075925293346888e-08, "loss": 0.5609, "step": 7652 }, { "epoch": 0.98, "grad_norm": 1.3816360235214233, "learning_rate": 9.944687816876296e-09, "loss": 0.592, "step": 7653 }, { "epoch": 0.98, "grad_norm": 1.271823763847351, "learning_rate": 9.814309776006725e-09, "loss": 0.5909, "step": 7654 }, { "epoch": 0.98, "grad_norm": 1.333675503730774, "learning_rate": 9.684791193193543e-09, "loss": 0.6167, "step": 7655 }, { "epoch": 0.98, "grad_norm": 1.711577296257019, "learning_rate": 9.556132090742797e-09, "loss": 0.5504, "step": 7656 }, { "epoch": 0.98, "grad_norm": 1.2807025909423828, "learning_rate": 9.42833249081343e-09, "loss": 0.6422, "step": 7657 }, { "epoch": 0.98, "grad_norm": 2.3574330806732178, "learning_rate": 9.301392415417276e-09, "loss": 0.5771, "step": 7658 }, { "epoch": 0.98, "grad_norm": 1.2664991617202759, "learning_rate": 9.175311886416294e-09, "loss": 0.5728, "step": 7659 }, { "epoch": 0.98, "grad_norm": 1.3596203327178955, "learning_rate": 9.05009092552478e-09, "loss": 0.5903, "step": 7660 }, { "epoch": 0.98, "grad_norm": 1.3723080158233643, "learning_rate": 8.925729554311036e-09, "loss": 0.638, "step": 7661 }, { "epoch": 0.98, "grad_norm": 1.689207673072815, "learning_rate": 8.80222779419293e-09, "loss": 0.6545, "step": 7662 }, { "epoch": 0.98, "grad_norm": 1.2211661338806152, "learning_rate": 8.679585666441226e-09, "loss": 0.594, "step": 7663 }, { "epoch": 0.98, "grad_norm": 1.2049944400787354, "learning_rate": 8.557803192178471e-09, "loss": 0.5486, "step": 7664 }, { "epoch": 0.98, "grad_norm": 1.2144725322723389, "learning_rate": 8.436880392379e-09, "loss": 0.6328, "step": 7665 }, { "epoch": 0.98, "grad_norm": 1.2572896480560303, "learning_rate": 8.316817287870039e-09, "loss": 0.6196, "step": 7666 }, { "epoch": 0.98, "grad_norm": 1.505215048789978, "learning_rate": 8.197613899329493e-09, "loss": 0.599, "step": 7667 }, { "epoch": 0.98, "grad_norm": 1.4084683656692505, "learning_rate": 8.079270247287607e-09, "loss": 0.5462, "step": 7668 }, { "epoch": 0.98, "grad_norm": 1.439065933227539, "learning_rate": 7.961786352126411e-09, "loss": 0.5704, "step": 7669 }, { "epoch": 0.98, "grad_norm": 1.5005910396575928, "learning_rate": 7.845162234081382e-09, "loss": 0.6232, "step": 7670 }, { "epoch": 0.98, "grad_norm": 1.1550309658050537, "learning_rate": 7.729397913237013e-09, "loss": 0.5539, "step": 7671 }, { "epoch": 0.98, "grad_norm": 1.173595666885376, "learning_rate": 7.614493409532909e-09, "loss": 0.5558, "step": 7672 }, { "epoch": 0.98, "grad_norm": 1.4196832180023193, "learning_rate": 7.500448742757682e-09, "loss": 0.566, "step": 7673 }, { "epoch": 0.98, "grad_norm": 1.2495452165603638, "learning_rate": 7.387263932554511e-09, "loss": 0.6306, "step": 7674 }, { "epoch": 0.98, "grad_norm": 1.6128349304199219, "learning_rate": 7.274938998415581e-09, "loss": 0.5693, "step": 7675 }, { "epoch": 0.98, "grad_norm": 2.603642702102661, "learning_rate": 7.163473959688194e-09, "loss": 0.5928, "step": 7676 }, { "epoch": 0.98, "grad_norm": 1.5382452011108398, "learning_rate": 7.052868835569215e-09, "loss": 0.5689, "step": 7677 }, { "epoch": 0.98, "grad_norm": 1.2661441564559937, "learning_rate": 6.943123645107852e-09, "loss": 0.5303, "step": 7678 }, { "epoch": 0.98, "grad_norm": 1.5901883840560913, "learning_rate": 6.8342384072056515e-09, "loss": 0.5756, "step": 7679 }, { "epoch": 0.98, "grad_norm": 1.394724726676941, "learning_rate": 6.7262131406165e-09, "loss": 0.6474, "step": 7680 }, { "epoch": 0.98, "grad_norm": 1.2976278066635132, "learning_rate": 6.6190478639444056e-09, "loss": 0.5856, "step": 7681 }, { "epoch": 0.98, "grad_norm": 1.3308930397033691, "learning_rate": 6.512742595647381e-09, "loss": 0.5754, "step": 7682 }, { "epoch": 0.98, "grad_norm": 1.4971197843551636, "learning_rate": 6.407297354034115e-09, "loss": 0.6073, "step": 7683 }, { "epoch": 0.98, "grad_norm": 1.3769445419311523, "learning_rate": 6.302712157265634e-09, "loss": 0.584, "step": 7684 }, { "epoch": 0.98, "grad_norm": 1.4821816682815552, "learning_rate": 6.198987023353642e-09, "loss": 0.585, "step": 7685 }, { "epoch": 0.98, "grad_norm": 1.0962272882461548, "learning_rate": 6.096121970163849e-09, "loss": 0.5703, "step": 7686 }, { "epoch": 0.98, "grad_norm": 1.3945462703704834, "learning_rate": 5.994117015412082e-09, "loss": 0.5628, "step": 7687 }, { "epoch": 0.98, "grad_norm": 1.610139012336731, "learning_rate": 5.892972176666512e-09, "loss": 0.6201, "step": 7688 }, { "epoch": 0.99, "grad_norm": 1.2614445686340332, "learning_rate": 5.792687471347646e-09, "loss": 0.6168, "step": 7689 }, { "epoch": 0.99, "grad_norm": 1.807807207107544, "learning_rate": 5.693262916726672e-09, "loss": 0.5201, "step": 7690 }, { "epoch": 0.99, "grad_norm": 1.3365129232406616, "learning_rate": 5.594698529928777e-09, "loss": 0.5822, "step": 7691 }, { "epoch": 0.99, "grad_norm": 1.4841192960739136, "learning_rate": 5.496994327928717e-09, "loss": 0.5317, "step": 7692 }, { "epoch": 0.99, "grad_norm": 1.2918846607208252, "learning_rate": 5.4001503275546984e-09, "loss": 0.651, "step": 7693 }, { "epoch": 0.99, "grad_norm": 1.43020761013031, "learning_rate": 5.304166545485046e-09, "loss": 0.6198, "step": 7694 }, { "epoch": 0.99, "grad_norm": 1.3098740577697754, "learning_rate": 5.209042998252645e-09, "loss": 0.5555, "step": 7695 }, { "epoch": 0.99, "grad_norm": 1.4694414138793945, "learning_rate": 5.114779702238837e-09, "loss": 0.6245, "step": 7696 }, { "epoch": 0.99, "grad_norm": 1.2974458932876587, "learning_rate": 5.021376673680078e-09, "loss": 0.6221, "step": 7697 }, { "epoch": 0.99, "grad_norm": 1.057411789894104, "learning_rate": 4.928833928661836e-09, "loss": 0.6633, "step": 7698 }, { "epoch": 0.99, "grad_norm": 1.225907325744629, "learning_rate": 4.837151483123581e-09, "loss": 0.5786, "step": 7699 }, { "epoch": 0.99, "grad_norm": 1.4693338871002197, "learning_rate": 4.746329352856016e-09, "loss": 0.5585, "step": 7700 }, { "epoch": 0.99, "grad_norm": 1.21501624584198, "learning_rate": 4.6563675534999635e-09, "loss": 0.5666, "step": 7701 }, { "epoch": 0.99, "grad_norm": 1.2666512727737427, "learning_rate": 4.567266100550805e-09, "loss": 0.5707, "step": 7702 }, { "epoch": 0.99, "grad_norm": 1.1822082996368408, "learning_rate": 4.4790250093545984e-09, "loss": 0.474, "step": 7703 }, { "epoch": 0.99, "grad_norm": 1.4030053615570068, "learning_rate": 4.3916442951080775e-09, "loss": 0.5502, "step": 7704 }, { "epoch": 0.99, "grad_norm": 1.361227035522461, "learning_rate": 4.305123972861424e-09, "loss": 0.5675, "step": 7705 }, { "epoch": 0.99, "grad_norm": 1.310667634010315, "learning_rate": 4.219464057516054e-09, "loss": 0.6493, "step": 7706 }, { "epoch": 0.99, "grad_norm": 1.2178518772125244, "learning_rate": 4.1346645638246084e-09, "loss": 0.5146, "step": 7707 }, { "epoch": 0.99, "grad_norm": 1.443044900894165, "learning_rate": 4.0507255063926275e-09, "loss": 0.6149, "step": 7708 }, { "epoch": 0.99, "grad_norm": 1.3189752101898193, "learning_rate": 3.96764689967688e-09, "loss": 0.5632, "step": 7709 }, { "epoch": 0.99, "grad_norm": 1.260591983795166, "learning_rate": 3.885428757985366e-09, "loss": 0.5505, "step": 7710 }, { "epoch": 0.99, "grad_norm": 1.4206947088241577, "learning_rate": 3.804071095479533e-09, "loss": 0.614, "step": 7711 }, { "epoch": 0.99, "grad_norm": 1.2580530643463135, "learning_rate": 3.7235739261703983e-09, "loss": 0.5879, "step": 7712 }, { "epoch": 0.99, "grad_norm": 1.525988221168518, "learning_rate": 3.643937263922981e-09, "loss": 0.6059, "step": 7713 }, { "epoch": 0.99, "grad_norm": 1.3162834644317627, "learning_rate": 3.5651611224524207e-09, "loss": 0.6687, "step": 7714 }, { "epoch": 0.99, "grad_norm": 1.2362546920776367, "learning_rate": 3.4872455153267536e-09, "loss": 0.6078, "step": 7715 }, { "epoch": 0.99, "grad_norm": 1.3531043529510498, "learning_rate": 3.41019045596469e-09, "loss": 0.5203, "step": 7716 }, { "epoch": 0.99, "grad_norm": 1.5174033641815186, "learning_rate": 3.3339959576378365e-09, "loss": 0.5899, "step": 7717 }, { "epoch": 0.99, "grad_norm": 1.8559000492095947, "learning_rate": 3.258662033469584e-09, "loss": 0.5911, "step": 7718 }, { "epoch": 0.99, "grad_norm": 1.32603919506073, "learning_rate": 3.1841886964339987e-09, "loss": 0.6394, "step": 7719 }, { "epoch": 0.99, "grad_norm": 1.5879665613174438, "learning_rate": 3.110575959358042e-09, "loss": 0.59, "step": 7720 }, { "epoch": 0.99, "grad_norm": 1.3852684497833252, "learning_rate": 3.037823834919906e-09, "loss": 0.5394, "step": 7721 }, { "epoch": 0.99, "grad_norm": 1.2205983400344849, "learning_rate": 2.9659323356490134e-09, "loss": 0.7311, "step": 7722 }, { "epoch": 0.99, "grad_norm": 1.312951922416687, "learning_rate": 2.8949014739282354e-09, "loss": 0.4707, "step": 7723 }, { "epoch": 0.99, "grad_norm": 1.5596814155578613, "learning_rate": 2.8247312619905653e-09, "loss": 0.606, "step": 7724 }, { "epoch": 0.99, "grad_norm": 1.2269487380981445, "learning_rate": 2.7554217119213354e-09, "loss": 0.5177, "step": 7725 }, { "epoch": 0.99, "grad_norm": 1.2579458951950073, "learning_rate": 2.6869728356582192e-09, "loss": 0.55, "step": 7726 }, { "epoch": 0.99, "grad_norm": 1.6828994750976562, "learning_rate": 2.6193846449901193e-09, "loss": 0.6079, "step": 7727 }, { "epoch": 0.99, "grad_norm": 1.452315330505371, "learning_rate": 2.5526571515571696e-09, "loss": 0.5791, "step": 7728 }, { "epoch": 0.99, "grad_norm": 1.2305513620376587, "learning_rate": 2.4867903668518433e-09, "loss": 0.5592, "step": 7729 }, { "epoch": 0.99, "grad_norm": 1.548684000968933, "learning_rate": 2.4217843022189546e-09, "loss": 0.4734, "step": 7730 }, { "epoch": 0.99, "grad_norm": 1.631661295890808, "learning_rate": 2.3576389688539924e-09, "loss": 0.5776, "step": 7731 }, { "epoch": 0.99, "grad_norm": 1.554509162902832, "learning_rate": 2.294354377804786e-09, "loss": 0.604, "step": 7732 }, { "epoch": 0.99, "grad_norm": 1.5531501770019531, "learning_rate": 2.2319305399715054e-09, "loss": 0.5192, "step": 7733 }, { "epoch": 0.99, "grad_norm": 1.3193473815917969, "learning_rate": 2.170367466103884e-09, "loss": 0.5963, "step": 7734 }, { "epoch": 0.99, "grad_norm": 1.141550898551941, "learning_rate": 2.1096651668062184e-09, "loss": 0.5749, "step": 7735 }, { "epoch": 0.99, "grad_norm": 1.7103443145751953, "learning_rate": 2.0498236525329228e-09, "loss": 0.5586, "step": 7736 }, { "epoch": 0.99, "grad_norm": 1.4387218952178955, "learning_rate": 1.990842933590198e-09, "loss": 0.5993, "step": 7737 }, { "epoch": 0.99, "grad_norm": 1.195109486579895, "learning_rate": 1.932723020136584e-09, "loss": 0.5556, "step": 7738 }, { "epoch": 0.99, "grad_norm": 1.2314635515213013, "learning_rate": 1.875463922181853e-09, "loss": 0.5436, "step": 7739 }, { "epoch": 0.99, "grad_norm": 1.4578646421432495, "learning_rate": 1.8190656495875591e-09, "loss": 0.6199, "step": 7740 }, { "epoch": 0.99, "grad_norm": 1.4149136543273926, "learning_rate": 1.7635282120676e-09, "loss": 0.5495, "step": 7741 }, { "epoch": 0.99, "grad_norm": 1.971264362335205, "learning_rate": 1.7088516191871018e-09, "loss": 0.6472, "step": 7742 }, { "epoch": 0.99, "grad_norm": 1.5405484437942505, "learning_rate": 1.655035880362421e-09, "loss": 0.5643, "step": 7743 }, { "epoch": 0.99, "grad_norm": 1.398697853088379, "learning_rate": 1.602081004863365e-09, "loss": 0.5151, "step": 7744 }, { "epoch": 0.99, "grad_norm": 1.603548288345337, "learning_rate": 1.5499870018093054e-09, "loss": 0.5618, "step": 7745 }, { "epoch": 0.99, "grad_norm": 2.4291934967041016, "learning_rate": 1.49875388017251e-09, "loss": 0.7038, "step": 7746 }, { "epoch": 0.99, "grad_norm": 1.4780473709106445, "learning_rate": 1.4483816487770309e-09, "loss": 0.6409, "step": 7747 }, { "epoch": 0.99, "grad_norm": 1.2052099704742432, "learning_rate": 1.398870316298151e-09, "loss": 0.5907, "step": 7748 }, { "epoch": 0.99, "grad_norm": 1.4878346920013428, "learning_rate": 1.3502198912640484e-09, "loss": 0.649, "step": 7749 }, { "epoch": 0.99, "grad_norm": 1.4148633480072021, "learning_rate": 1.3024303820530216e-09, "loss": 0.5942, "step": 7750 }, { "epoch": 0.99, "grad_norm": 1.2521181106567383, "learning_rate": 1.2555017968962635e-09, "loss": 0.5044, "step": 7751 }, { "epoch": 0.99, "grad_norm": 1.4545745849609375, "learning_rate": 1.209434143875643e-09, "loss": 0.5946, "step": 7752 }, { "epoch": 0.99, "grad_norm": 1.1492974758148193, "learning_rate": 1.1642274309259238e-09, "loss": 0.6968, "step": 7753 }, { "epoch": 0.99, "grad_norm": 1.2396249771118164, "learning_rate": 1.1198816658331003e-09, "loss": 0.4695, "step": 7754 }, { "epoch": 0.99, "grad_norm": 1.2736849784851074, "learning_rate": 1.0763968562343963e-09, "loss": 0.5964, "step": 7755 }, { "epoch": 0.99, "grad_norm": 3.6767706871032715, "learning_rate": 1.0337730096199316e-09, "loss": 0.567, "step": 7756 }, { "epoch": 0.99, "grad_norm": 1.1457759141921997, "learning_rate": 9.920101333293907e-10, "loss": 0.4591, "step": 7757 }, { "epoch": 0.99, "grad_norm": 1.395374059677124, "learning_rate": 9.511082345570189e-10, "loss": 0.5131, "step": 7758 }, { "epoch": 0.99, "grad_norm": 1.0349416732788086, "learning_rate": 9.110673203471809e-10, "loss": 0.7146, "step": 7759 }, { "epoch": 0.99, "grad_norm": 1.1731163263320923, "learning_rate": 8.718873975949171e-10, "loss": 0.6224, "step": 7760 }, { "epoch": 0.99, "grad_norm": 1.4639376401901245, "learning_rate": 8.335684730492733e-10, "loss": 0.6381, "step": 7761 }, { "epoch": 0.99, "grad_norm": 1.3412548303604126, "learning_rate": 7.9611055330997e-10, "loss": 0.5763, "step": 7762 }, { "epoch": 0.99, "grad_norm": 1.3731943368911743, "learning_rate": 7.595136448274032e-10, "loss": 0.5907, "step": 7763 }, { "epoch": 0.99, "grad_norm": 1.477068305015564, "learning_rate": 7.237777539059743e-10, "loss": 0.6016, "step": 7764 }, { "epoch": 0.99, "grad_norm": 1.240449070930481, "learning_rate": 6.889028866990943e-10, "loss": 0.5382, "step": 7765 }, { "epoch": 0.99, "grad_norm": 1.313112735748291, "learning_rate": 6.548890492141802e-10, "loss": 0.5249, "step": 7766 }, { "epoch": 1.0, "grad_norm": 1.2798494100570679, "learning_rate": 6.217362473093235e-10, "loss": 0.6216, "step": 7767 }, { "epoch": 1.0, "grad_norm": 2.7569220066070557, "learning_rate": 5.894444866938465e-10, "loss": 0.602, "step": 7768 }, { "epoch": 1.0, "grad_norm": 1.4768586158752441, "learning_rate": 5.580137729299661e-10, "loss": 0.6307, "step": 7769 }, { "epoch": 1.0, "grad_norm": 1.2594913244247437, "learning_rate": 5.274441114305751e-10, "loss": 0.562, "step": 7770 }, { "epoch": 1.0, "grad_norm": 1.1919431686401367, "learning_rate": 4.977355074609059e-10, "loss": 0.5843, "step": 7771 }, { "epoch": 1.0, "grad_norm": 1.5836091041564941, "learning_rate": 4.688879661379764e-10, "loss": 0.601, "step": 7772 }, { "epoch": 1.0, "grad_norm": 1.3357963562011719, "learning_rate": 4.409014924294797e-10, "loss": 0.5608, "step": 7773 }, { "epoch": 1.0, "grad_norm": 1.4678525924682617, "learning_rate": 4.1377609115600403e-10, "loss": 0.615, "step": 7774 }, { "epoch": 1.0, "grad_norm": 1.2490729093551636, "learning_rate": 3.8751176698936797e-10, "loss": 0.5467, "step": 7775 }, { "epoch": 1.0, "grad_norm": 1.4261091947555542, "learning_rate": 3.621085244526201e-10, "loss": 0.5542, "step": 7776 }, { "epoch": 1.0, "grad_norm": 1.5678194761276245, "learning_rate": 3.375663679211494e-10, "loss": 0.5603, "step": 7777 }, { "epoch": 1.0, "grad_norm": 1.3209941387176514, "learning_rate": 3.138853016221299e-10, "loss": 0.5321, "step": 7778 }, { "epoch": 1.0, "grad_norm": 1.2347135543823242, "learning_rate": 2.9106532963396605e-10, "loss": 0.7119, "step": 7779 }, { "epoch": 1.0, "grad_norm": 1.4404094219207764, "learning_rate": 2.6910645588684726e-10, "loss": 0.5572, "step": 7780 }, { "epoch": 1.0, "grad_norm": 1.1381596326828003, "learning_rate": 2.480086841633034e-10, "loss": 0.6961, "step": 7781 }, { "epoch": 1.0, "grad_norm": 1.4806857109069824, "learning_rate": 2.2777201809598415e-10, "loss": 0.6237, "step": 7782 }, { "epoch": 1.0, "grad_norm": 1.4877007007598877, "learning_rate": 2.0839646117098988e-10, "loss": 0.5864, "step": 7783 }, { "epoch": 1.0, "grad_norm": 1.6828969717025757, "learning_rate": 1.8988201672509587e-10, "loss": 0.6237, "step": 7784 }, { "epoch": 1.0, "grad_norm": 1.460888385772705, "learning_rate": 1.7222868794686264e-10, "loss": 0.5588, "step": 7785 }, { "epoch": 1.0, "grad_norm": 1.2123541831970215, "learning_rate": 1.5543647787719108e-10, "loss": 0.5466, "step": 7786 }, { "epoch": 1.0, "grad_norm": 1.8442035913467407, "learning_rate": 1.39505389407657e-10, "loss": 0.5387, "step": 7787 }, { "epoch": 1.0, "grad_norm": 1.310922384262085, "learning_rate": 1.2443542528273178e-10, "loss": 0.5123, "step": 7788 }, { "epoch": 1.0, "grad_norm": 1.1582748889923096, "learning_rate": 1.102265880975617e-10, "loss": 0.6051, "step": 7789 }, { "epoch": 1.0, "grad_norm": 1.2169848680496216, "learning_rate": 9.687888029907832e-11, "loss": 0.5885, "step": 7790 }, { "epoch": 1.0, "grad_norm": 1.2619993686676025, "learning_rate": 8.439230418655353e-11, "loss": 0.5656, "step": 7791 }, { "epoch": 1.0, "grad_norm": 1.3111398220062256, "learning_rate": 7.27668619099342e-11, "loss": 0.5407, "step": 7792 }, { "epoch": 1.0, "grad_norm": 1.3473933935165405, "learning_rate": 6.20025554726178e-11, "loss": 0.575, "step": 7793 }, { "epoch": 1.0, "grad_norm": 1.3455848693847656, "learning_rate": 5.2099386727566535e-11, "loss": 0.547, "step": 7794 }, { "epoch": 1.0, "grad_norm": 1.5748672485351562, "learning_rate": 4.305735738008299e-11, "loss": 0.5743, "step": 7795 }, { "epoch": 1.0, "grad_norm": 1.4658571481704712, "learning_rate": 3.4876468988920275e-11, "loss": 0.5293, "step": 7796 }, { "epoch": 1.0, "grad_norm": 1.108062505722046, "learning_rate": 2.7556722961841197e-11, "loss": 0.6937, "step": 7797 }, { "epoch": 1.0, "grad_norm": 1.1840686798095703, "learning_rate": 2.1098120559503998e-11, "loss": 0.476, "step": 7798 }, { "epoch": 1.0, "grad_norm": 1.324550747871399, "learning_rate": 1.550066289490726e-11, "loss": 0.5883, "step": 7799 }, { "epoch": 1.0, "grad_norm": 1.7365845441818237, "learning_rate": 1.0764350932279676e-11, "loss": 0.5685, "step": 7800 }, { "epoch": 1.0, "grad_norm": 1.3814128637313843, "learning_rate": 6.889185486524952e-12, "loss": 0.5706, "step": 7801 }, { "epoch": 1.0, "grad_norm": 1.1768162250518799, "learning_rate": 3.875167225442234e-12, "loss": 0.5444, "step": 7802 }, { "epoch": 1.0, "grad_norm": 1.977554440498352, "learning_rate": 1.7222966680607856e-12, "loss": 0.5997, "step": 7803 }, { "epoch": 1.0, "grad_norm": 1.1297414302825928, "learning_rate": 4.3057418575020993e-13, "loss": 0.6249, "step": 7804 }, { "epoch": 1.0, "grad_norm": 1.294604778289795, "learning_rate": 0.0, "loss": 0.5606, "step": 7805 }, { "epoch": 1.0, "step": 7805, "total_flos": 1.1237568065308721e+19, "train_loss": 0.6154153387490028, "train_runtime": 23859.4346, "train_samples_per_second": 41.876, "train_steps_per_second": 0.327 } ], "logging_steps": 1.0, "max_steps": 7805, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "total_flos": 1.1237568065308721e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }