{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 22095, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 4.525910839556461e-05, "grad_norm": 8.66013199834246, "learning_rate": 0.0, "loss": 0.8106, "step": 1 }, { "epoch": 9.051821679112921e-05, "grad_norm": 8.116963999167655, "learning_rate": 1.5082956259426848e-08, "loss": 0.8228, "step": 2 }, { "epoch": 0.00013577732518669383, "grad_norm": 8.805888630344679, "learning_rate": 3.0165912518853697e-08, "loss": 0.8379, "step": 3 }, { "epoch": 0.00018103643358225843, "grad_norm": 10.413908076660961, "learning_rate": 4.524886877828055e-08, "loss": 0.8215, "step": 4 }, { "epoch": 0.00022629554197782303, "grad_norm": 10.270466065239447, "learning_rate": 6.033182503770739e-08, "loss": 0.7408, "step": 5 }, { "epoch": 0.00027155465037338765, "grad_norm": 8.667172848979332, "learning_rate": 7.541478129713425e-08, "loss": 0.871, "step": 6 }, { "epoch": 0.00031681375876895223, "grad_norm": 7.875222542229044, "learning_rate": 9.04977375565611e-08, "loss": 0.826, "step": 7 }, { "epoch": 0.00036207286716451686, "grad_norm": 9.852921507702614, "learning_rate": 1.0558069381598795e-07, "loss": 0.7683, "step": 8 }, { "epoch": 0.0004073319755600815, "grad_norm": 2.931646823848608, "learning_rate": 1.2066365007541479e-07, "loss": 0.7524, "step": 9 }, { "epoch": 0.00045259108395564606, "grad_norm": 7.699231177320019, "learning_rate": 1.3574660633484163e-07, "loss": 0.8115, "step": 10 }, { "epoch": 0.0004978501923512107, "grad_norm": 8.48382482276139, "learning_rate": 1.508295625942685e-07, "loss": 0.8242, "step": 11 }, { "epoch": 0.0005431093007467753, "grad_norm": 3.0335535418133057, "learning_rate": 1.6591251885369535e-07, "loss": 0.7765, "step": 12 }, { "epoch": 0.0005883684091423399, "grad_norm": 7.977093909893501, "learning_rate": 1.809954751131222e-07, "loss": 0.8172, "step": 13 }, { "epoch": 0.0006336275175379045, "grad_norm": 8.497572549254802, "learning_rate": 1.9607843137254904e-07, "loss": 0.8067, "step": 14 }, { "epoch": 0.0006788866259334691, "grad_norm": 2.8940758258319446, "learning_rate": 2.111613876319759e-07, "loss": 0.7585, "step": 15 }, { "epoch": 0.0007241457343290337, "grad_norm": 2.855482184271788, "learning_rate": 2.2624434389140273e-07, "loss": 0.771, "step": 16 }, { "epoch": 0.0007694048427245983, "grad_norm": 8.986529342997084, "learning_rate": 2.4132730015082957e-07, "loss": 0.8223, "step": 17 }, { "epoch": 0.000814663951120163, "grad_norm": 6.395648167097882, "learning_rate": 2.564102564102564e-07, "loss": 0.7447, "step": 18 }, { "epoch": 0.0008599230595157276, "grad_norm": 4.9798889312227725, "learning_rate": 2.7149321266968326e-07, "loss": 0.829, "step": 19 }, { "epoch": 0.0009051821679112921, "grad_norm": 2.6740823934447224, "learning_rate": 2.865761689291101e-07, "loss": 0.7568, "step": 20 }, { "epoch": 0.0009504412763068567, "grad_norm": 4.052800002632703, "learning_rate": 3.01659125188537e-07, "loss": 0.8026, "step": 21 }, { "epoch": 0.0009957003847024214, "grad_norm": 2.9369933535648247, "learning_rate": 3.167420814479638e-07, "loss": 0.7492, "step": 22 }, { "epoch": 0.001040959493097986, "grad_norm": 4.247413851142735, "learning_rate": 3.318250377073907e-07, "loss": 0.7575, "step": 23 }, { "epoch": 0.0010862186014935506, "grad_norm": 3.879005288265461, "learning_rate": 3.4690799396681754e-07, "loss": 0.759, "step": 24 }, { "epoch": 0.0011314777098891152, "grad_norm": 3.6440819833985922, "learning_rate": 3.619909502262444e-07, "loss": 0.7482, "step": 25 }, { "epoch": 0.0011767368182846799, "grad_norm": 2.757922595973135, "learning_rate": 3.770739064856712e-07, "loss": 0.7626, "step": 26 }, { "epoch": 0.0012219959266802445, "grad_norm": 3.832157008102444, "learning_rate": 3.921568627450981e-07, "loss": 0.7556, "step": 27 }, { "epoch": 0.001267255035075809, "grad_norm": 3.372216966665436, "learning_rate": 4.072398190045249e-07, "loss": 0.7646, "step": 28 }, { "epoch": 0.0013125141434713735, "grad_norm": 3.13813749845113, "learning_rate": 4.223227752639518e-07, "loss": 0.7665, "step": 29 }, { "epoch": 0.0013577732518669382, "grad_norm": 2.8809864229440163, "learning_rate": 4.374057315233786e-07, "loss": 0.748, "step": 30 }, { "epoch": 0.0014030323602625028, "grad_norm": 2.8897137086592086, "learning_rate": 4.5248868778280546e-07, "loss": 0.7738, "step": 31 }, { "epoch": 0.0014482914686580674, "grad_norm": 2.942818955850594, "learning_rate": 4.675716440422323e-07, "loss": 0.738, "step": 32 }, { "epoch": 0.001493550577053632, "grad_norm": 2.6309860751703624, "learning_rate": 4.826546003016591e-07, "loss": 0.7678, "step": 33 }, { "epoch": 0.0015388096854491967, "grad_norm": 2.8062178485999776, "learning_rate": 4.977375565610859e-07, "loss": 0.7834, "step": 34 }, { "epoch": 0.0015840687938447613, "grad_norm": 2.8268426312674646, "learning_rate": 5.128205128205128e-07, "loss": 0.7633, "step": 35 }, { "epoch": 0.001629327902240326, "grad_norm": 2.4556470307247205, "learning_rate": 5.279034690799397e-07, "loss": 0.7394, "step": 36 }, { "epoch": 0.0016745870106358906, "grad_norm": 2.472058064042505, "learning_rate": 5.429864253393665e-07, "loss": 0.7233, "step": 37 }, { "epoch": 0.0017198461190314552, "grad_norm": 2.1758732925169255, "learning_rate": 5.580693815987934e-07, "loss": 0.7058, "step": 38 }, { "epoch": 0.0017651052274270196, "grad_norm": 2.5628467709923193, "learning_rate": 5.731523378582202e-07, "loss": 0.7554, "step": 39 }, { "epoch": 0.0018103643358225842, "grad_norm": 2.3023149629245503, "learning_rate": 5.882352941176471e-07, "loss": 0.7271, "step": 40 }, { "epoch": 0.0018556234442181488, "grad_norm": 2.0801763787360965, "learning_rate": 6.03318250377074e-07, "loss": 0.6804, "step": 41 }, { "epoch": 0.0019008825526137135, "grad_norm": 2.2400032166503094, "learning_rate": 6.184012066365008e-07, "loss": 0.7269, "step": 42 }, { "epoch": 0.001946141661009278, "grad_norm": 2.074448591273433, "learning_rate": 6.334841628959276e-07, "loss": 0.724, "step": 43 }, { "epoch": 0.0019914007694048427, "grad_norm": 2.052458166169399, "learning_rate": 6.485671191553546e-07, "loss": 0.7597, "step": 44 }, { "epoch": 0.002036659877800407, "grad_norm": 2.3000279668851826, "learning_rate": 6.636500754147814e-07, "loss": 0.775, "step": 45 }, { "epoch": 0.002081918986195972, "grad_norm": 2.040178744317726, "learning_rate": 6.787330316742082e-07, "loss": 0.6826, "step": 46 }, { "epoch": 0.0021271780945915364, "grad_norm": 2.169650593616676, "learning_rate": 6.938159879336351e-07, "loss": 0.7196, "step": 47 }, { "epoch": 0.0021724372029871012, "grad_norm": 2.243237967368775, "learning_rate": 7.088989441930619e-07, "loss": 0.6534, "step": 48 }, { "epoch": 0.0022176963113826656, "grad_norm": 1.805594330039211, "learning_rate": 7.239819004524888e-07, "loss": 0.7239, "step": 49 }, { "epoch": 0.0022629554197782305, "grad_norm": 1.95060511614219, "learning_rate": 7.390648567119156e-07, "loss": 0.7892, "step": 50 }, { "epoch": 0.002308214528173795, "grad_norm": 2.0050821311562967, "learning_rate": 7.541478129713424e-07, "loss": 0.7541, "step": 51 }, { "epoch": 0.0023534736365693597, "grad_norm": 1.591345649647068, "learning_rate": 7.692307692307694e-07, "loss": 0.653, "step": 52 }, { "epoch": 0.002398732744964924, "grad_norm": 1.5359586585503124, "learning_rate": 7.843137254901962e-07, "loss": 0.7579, "step": 53 }, { "epoch": 0.002443991853360489, "grad_norm": 1.4903440630350036, "learning_rate": 7.993966817496229e-07, "loss": 0.6981, "step": 54 }, { "epoch": 0.0024892509617560534, "grad_norm": 1.576261684717775, "learning_rate": 8.144796380090498e-07, "loss": 0.7096, "step": 55 }, { "epoch": 0.002534510070151618, "grad_norm": 1.4623041693607248, "learning_rate": 8.295625942684766e-07, "loss": 0.7056, "step": 56 }, { "epoch": 0.0025797691785471827, "grad_norm": 1.3299793426063247, "learning_rate": 8.446455505279036e-07, "loss": 0.7375, "step": 57 }, { "epoch": 0.002625028286942747, "grad_norm": 1.3229170043287524, "learning_rate": 8.597285067873304e-07, "loss": 0.6838, "step": 58 }, { "epoch": 0.002670287395338312, "grad_norm": 1.3144587271599693, "learning_rate": 8.748114630467572e-07, "loss": 0.7115, "step": 59 }, { "epoch": 0.0027155465037338763, "grad_norm": 1.6142307994153728, "learning_rate": 8.898944193061841e-07, "loss": 0.7371, "step": 60 }, { "epoch": 0.002760805612129441, "grad_norm": 1.244580858498938, "learning_rate": 9.049773755656109e-07, "loss": 0.6543, "step": 61 }, { "epoch": 0.0028060647205250056, "grad_norm": 1.2418783287357869, "learning_rate": 9.200603318250378e-07, "loss": 0.6459, "step": 62 }, { "epoch": 0.0028513238289205704, "grad_norm": 1.3734658575051808, "learning_rate": 9.351432880844646e-07, "loss": 0.7234, "step": 63 }, { "epoch": 0.002896582937316135, "grad_norm": 1.5154654997771089, "learning_rate": 9.502262443438914e-07, "loss": 0.7172, "step": 64 }, { "epoch": 0.0029418420457116997, "grad_norm": 1.5993349941418993, "learning_rate": 9.653092006033183e-07, "loss": 0.7091, "step": 65 }, { "epoch": 0.002987101154107264, "grad_norm": 1.2952398954480047, "learning_rate": 9.80392156862745e-07, "loss": 0.6245, "step": 66 }, { "epoch": 0.0030323602625028285, "grad_norm": 1.3771464561355673, "learning_rate": 9.954751131221719e-07, "loss": 0.6454, "step": 67 }, { "epoch": 0.0030776193708983933, "grad_norm": 1.412649273270314, "learning_rate": 1.0105580693815989e-06, "loss": 0.6814, "step": 68 }, { "epoch": 0.0031228784792939578, "grad_norm": 1.3193487969255955, "learning_rate": 1.0256410256410257e-06, "loss": 0.682, "step": 69 }, { "epoch": 0.0031681375876895226, "grad_norm": 1.273629024869017, "learning_rate": 1.0407239819004527e-06, "loss": 0.7166, "step": 70 }, { "epoch": 0.003213396696085087, "grad_norm": 1.1381038066518965, "learning_rate": 1.0558069381598795e-06, "loss": 0.7045, "step": 71 }, { "epoch": 0.003258655804480652, "grad_norm": 1.2047824209988878, "learning_rate": 1.0708898944193063e-06, "loss": 0.6771, "step": 72 }, { "epoch": 0.0033039149128762163, "grad_norm": 1.2391818859284285, "learning_rate": 1.085972850678733e-06, "loss": 0.6741, "step": 73 }, { "epoch": 0.003349174021271781, "grad_norm": 1.2521185180506431, "learning_rate": 1.1010558069381598e-06, "loss": 0.6855, "step": 74 }, { "epoch": 0.0033944331296673455, "grad_norm": 1.3882698005588492, "learning_rate": 1.1161387631975868e-06, "loss": 0.7001, "step": 75 }, { "epoch": 0.0034396922380629104, "grad_norm": 1.179314695521784, "learning_rate": 1.1312217194570136e-06, "loss": 0.6747, "step": 76 }, { "epoch": 0.0034849513464584748, "grad_norm": 1.5552928150597494, "learning_rate": 1.1463046757164404e-06, "loss": 0.6286, "step": 77 }, { "epoch": 0.003530210454854039, "grad_norm": 1.1734748345240251, "learning_rate": 1.1613876319758674e-06, "loss": 0.6742, "step": 78 }, { "epoch": 0.003575469563249604, "grad_norm": 1.131287052435935, "learning_rate": 1.1764705882352942e-06, "loss": 0.6426, "step": 79 }, { "epoch": 0.0036207286716451684, "grad_norm": 1.3295414806754717, "learning_rate": 1.1915535444947212e-06, "loss": 0.6618, "step": 80 }, { "epoch": 0.0036659877800407333, "grad_norm": 1.2939253815058376, "learning_rate": 1.206636500754148e-06, "loss": 0.6729, "step": 81 }, { "epoch": 0.0037112468884362977, "grad_norm": 1.059572927368594, "learning_rate": 1.2217194570135748e-06, "loss": 0.5791, "step": 82 }, { "epoch": 0.0037565059968318625, "grad_norm": 1.1326662378816292, "learning_rate": 1.2368024132730016e-06, "loss": 0.6478, "step": 83 }, { "epoch": 0.003801765105227427, "grad_norm": 1.4420484198243764, "learning_rate": 1.2518853695324284e-06, "loss": 0.7105, "step": 84 }, { "epoch": 0.003847024213622992, "grad_norm": 1.054691744896305, "learning_rate": 1.2669683257918552e-06, "loss": 0.6299, "step": 85 }, { "epoch": 0.003892283322018556, "grad_norm": 1.1882611476012201, "learning_rate": 1.282051282051282e-06, "loss": 0.6486, "step": 86 }, { "epoch": 0.003937542430414121, "grad_norm": 1.059033257046168, "learning_rate": 1.2971342383107092e-06, "loss": 0.6162, "step": 87 }, { "epoch": 0.0039828015388096855, "grad_norm": 1.4556419443337114, "learning_rate": 1.312217194570136e-06, "loss": 0.7638, "step": 88 }, { "epoch": 0.00402806064720525, "grad_norm": 1.1239534063715333, "learning_rate": 1.3273001508295628e-06, "loss": 0.6258, "step": 89 }, { "epoch": 0.004073319755600814, "grad_norm": 1.106624334983117, "learning_rate": 1.3423831070889896e-06, "loss": 0.6705, "step": 90 }, { "epoch": 0.004118578863996379, "grad_norm": 1.0594735066509668, "learning_rate": 1.3574660633484164e-06, "loss": 0.6365, "step": 91 }, { "epoch": 0.004163837972391944, "grad_norm": 1.1407310916855053, "learning_rate": 1.3725490196078434e-06, "loss": 0.6674, "step": 92 }, { "epoch": 0.004209097080787509, "grad_norm": 1.0868736007753048, "learning_rate": 1.3876319758672702e-06, "loss": 0.6586, "step": 93 }, { "epoch": 0.004254356189183073, "grad_norm": 1.0787095460749092, "learning_rate": 1.402714932126697e-06, "loss": 0.6344, "step": 94 }, { "epoch": 0.004299615297578638, "grad_norm": 1.1365671650774605, "learning_rate": 1.4177978883861237e-06, "loss": 0.6751, "step": 95 }, { "epoch": 0.0043448744059742025, "grad_norm": 1.0487165231214994, "learning_rate": 1.4328808446455505e-06, "loss": 0.657, "step": 96 }, { "epoch": 0.004390133514369767, "grad_norm": 0.9787033497294746, "learning_rate": 1.4479638009049775e-06, "loss": 0.6337, "step": 97 }, { "epoch": 0.004435392622765331, "grad_norm": 1.0254485226838135, "learning_rate": 1.4630467571644043e-06, "loss": 0.6081, "step": 98 }, { "epoch": 0.004480651731160896, "grad_norm": 1.3872002279212687, "learning_rate": 1.4781297134238311e-06, "loss": 0.5998, "step": 99 }, { "epoch": 0.004525910839556461, "grad_norm": 1.2984889703154123, "learning_rate": 1.493212669683258e-06, "loss": 0.6809, "step": 100 }, { "epoch": 0.004571169947952025, "grad_norm": 1.0452806515562352, "learning_rate": 1.5082956259426847e-06, "loss": 0.6208, "step": 101 }, { "epoch": 0.00461642905634759, "grad_norm": 1.0097601200328257, "learning_rate": 1.5233785822021115e-06, "loss": 0.6415, "step": 102 }, { "epoch": 0.004661688164743155, "grad_norm": 1.269723703523461, "learning_rate": 1.5384615384615387e-06, "loss": 0.7088, "step": 103 }, { "epoch": 0.0047069472731387195, "grad_norm": 1.2470497789460646, "learning_rate": 1.5535444947209655e-06, "loss": 0.7053, "step": 104 }, { "epoch": 0.0047522063815342835, "grad_norm": 0.9368939620497067, "learning_rate": 1.5686274509803923e-06, "loss": 0.6018, "step": 105 }, { "epoch": 0.004797465489929848, "grad_norm": 1.1604437387684339, "learning_rate": 1.583710407239819e-06, "loss": 0.6662, "step": 106 }, { "epoch": 0.004842724598325413, "grad_norm": 1.0358552080502414, "learning_rate": 1.5987933634992459e-06, "loss": 0.6463, "step": 107 }, { "epoch": 0.004887983706720978, "grad_norm": 1.0368493258936142, "learning_rate": 1.6138763197586729e-06, "loss": 0.6304, "step": 108 }, { "epoch": 0.004933242815116542, "grad_norm": 1.1302662456729546, "learning_rate": 1.6289592760180997e-06, "loss": 0.668, "step": 109 }, { "epoch": 0.004978501923512107, "grad_norm": 1.0351636351517919, "learning_rate": 1.6440422322775265e-06, "loss": 0.5568, "step": 110 }, { "epoch": 0.005023761031907672, "grad_norm": 1.0501568943457311, "learning_rate": 1.6591251885369533e-06, "loss": 0.6857, "step": 111 }, { "epoch": 0.005069020140303236, "grad_norm": 1.0207344968356886, "learning_rate": 1.67420814479638e-06, "loss": 0.6603, "step": 112 }, { "epoch": 0.0051142792486988005, "grad_norm": 1.0310491415955947, "learning_rate": 1.6892911010558073e-06, "loss": 0.6509, "step": 113 }, { "epoch": 0.005159538357094365, "grad_norm": 0.9639024601411313, "learning_rate": 1.704374057315234e-06, "loss": 0.7051, "step": 114 }, { "epoch": 0.00520479746548993, "grad_norm": 0.9586419580660428, "learning_rate": 1.7194570135746609e-06, "loss": 0.6142, "step": 115 }, { "epoch": 0.005250056573885494, "grad_norm": 1.04842344522062, "learning_rate": 1.7345399698340876e-06, "loss": 0.6466, "step": 116 }, { "epoch": 0.005295315682281059, "grad_norm": 0.9173299249262994, "learning_rate": 1.7496229260935144e-06, "loss": 0.5516, "step": 117 }, { "epoch": 0.005340574790676624, "grad_norm": 1.0831856068854298, "learning_rate": 1.7647058823529414e-06, "loss": 0.5931, "step": 118 }, { "epoch": 0.005385833899072189, "grad_norm": 0.8946723582329622, "learning_rate": 1.7797888386123682e-06, "loss": 0.577, "step": 119 }, { "epoch": 0.005431093007467753, "grad_norm": 1.1010913291353444, "learning_rate": 1.794871794871795e-06, "loss": 0.6203, "step": 120 }, { "epoch": 0.0054763521158633175, "grad_norm": 0.9085173100560293, "learning_rate": 1.8099547511312218e-06, "loss": 0.6101, "step": 121 }, { "epoch": 0.005521611224258882, "grad_norm": 0.9320802267784369, "learning_rate": 1.8250377073906486e-06, "loss": 0.5623, "step": 122 }, { "epoch": 0.005566870332654446, "grad_norm": 0.8195711642520936, "learning_rate": 1.8401206636500756e-06, "loss": 0.6649, "step": 123 }, { "epoch": 0.005612129441050011, "grad_norm": 1.010253752587403, "learning_rate": 1.8552036199095024e-06, "loss": 0.5649, "step": 124 }, { "epoch": 0.005657388549445576, "grad_norm": 0.991330010194571, "learning_rate": 1.8702865761689292e-06, "loss": 0.5982, "step": 125 }, { "epoch": 0.005702647657841141, "grad_norm": 1.1475476851088662, "learning_rate": 1.885369532428356e-06, "loss": 0.6453, "step": 126 }, { "epoch": 0.005747906766236705, "grad_norm": 0.9235417511336075, "learning_rate": 1.9004524886877828e-06, "loss": 0.5992, "step": 127 }, { "epoch": 0.00579316587463227, "grad_norm": 0.9847063346731131, "learning_rate": 1.91553544494721e-06, "loss": 0.6304, "step": 128 }, { "epoch": 0.0058384249830278345, "grad_norm": 1.0269890471208356, "learning_rate": 1.9306184012066366e-06, "loss": 0.6046, "step": 129 }, { "epoch": 0.005883684091423399, "grad_norm": 0.9258189317090748, "learning_rate": 1.9457013574660634e-06, "loss": 0.6118, "step": 130 }, { "epoch": 0.005928943199818963, "grad_norm": 0.7492225521686104, "learning_rate": 1.96078431372549e-06, "loss": 0.6506, "step": 131 }, { "epoch": 0.005974202308214528, "grad_norm": 0.7421506908255864, "learning_rate": 1.975867269984917e-06, "loss": 0.6702, "step": 132 }, { "epoch": 0.006019461416610093, "grad_norm": 0.9545777179324334, "learning_rate": 1.9909502262443437e-06, "loss": 0.5685, "step": 133 }, { "epoch": 0.006064720525005657, "grad_norm": 0.9622591564554519, "learning_rate": 2.006033182503771e-06, "loss": 0.5939, "step": 134 }, { "epoch": 0.006109979633401222, "grad_norm": 0.9109690493087242, "learning_rate": 2.0211161387631978e-06, "loss": 0.6169, "step": 135 }, { "epoch": 0.006155238741796787, "grad_norm": 0.6813272311762413, "learning_rate": 2.0361990950226245e-06, "loss": 0.6737, "step": 136 }, { "epoch": 0.0062004978501923515, "grad_norm": 0.6720777722130816, "learning_rate": 2.0512820512820513e-06, "loss": 0.6571, "step": 137 }, { "epoch": 0.0062457569585879155, "grad_norm": 0.9667588053275596, "learning_rate": 2.066365007541478e-06, "loss": 0.5855, "step": 138 }, { "epoch": 0.00629101606698348, "grad_norm": 0.9344038736230162, "learning_rate": 2.0814479638009053e-06, "loss": 0.6125, "step": 139 }, { "epoch": 0.006336275175379045, "grad_norm": 0.9465344342175216, "learning_rate": 2.096530920060332e-06, "loss": 0.5956, "step": 140 }, { "epoch": 0.00638153428377461, "grad_norm": 1.2591164988076586, "learning_rate": 2.111613876319759e-06, "loss": 0.6398, "step": 141 }, { "epoch": 0.006426793392170174, "grad_norm": 0.9677788926426463, "learning_rate": 2.1266968325791857e-06, "loss": 0.6047, "step": 142 }, { "epoch": 0.006472052500565739, "grad_norm": 0.9872448306229715, "learning_rate": 2.1417797888386125e-06, "loss": 0.6125, "step": 143 }, { "epoch": 0.006517311608961304, "grad_norm": 1.0035271309418503, "learning_rate": 2.1568627450980393e-06, "loss": 0.5664, "step": 144 }, { "epoch": 0.006562570717356868, "grad_norm": 0.9880506990276164, "learning_rate": 2.171945701357466e-06, "loss": 0.5886, "step": 145 }, { "epoch": 0.0066078298257524325, "grad_norm": 1.0103893385719878, "learning_rate": 2.187028657616893e-06, "loss": 0.6753, "step": 146 }, { "epoch": 0.006653088934147997, "grad_norm": 1.011094895629863, "learning_rate": 2.2021116138763197e-06, "loss": 0.6041, "step": 147 }, { "epoch": 0.006698348042543562, "grad_norm": 0.9464836309060873, "learning_rate": 2.2171945701357465e-06, "loss": 0.6077, "step": 148 }, { "epoch": 0.006743607150939126, "grad_norm": 0.9495819965291539, "learning_rate": 2.2322775263951737e-06, "loss": 0.5735, "step": 149 }, { "epoch": 0.006788866259334691, "grad_norm": 1.1858654132024509, "learning_rate": 2.2473604826546005e-06, "loss": 0.6186, "step": 150 }, { "epoch": 0.006834125367730256, "grad_norm": 0.9009709214733077, "learning_rate": 2.2624434389140273e-06, "loss": 0.5791, "step": 151 }, { "epoch": 0.006879384476125821, "grad_norm": 1.9798213247768532, "learning_rate": 2.277526395173454e-06, "loss": 0.5288, "step": 152 }, { "epoch": 0.006924643584521385, "grad_norm": 0.8768656065313846, "learning_rate": 2.292609351432881e-06, "loss": 0.5388, "step": 153 }, { "epoch": 0.0069699026929169496, "grad_norm": 0.9853972537936977, "learning_rate": 2.307692307692308e-06, "loss": 0.621, "step": 154 }, { "epoch": 0.007015161801312514, "grad_norm": 1.183982065728492, "learning_rate": 2.322775263951735e-06, "loss": 0.5722, "step": 155 }, { "epoch": 0.007060420909708078, "grad_norm": 0.9122814416114049, "learning_rate": 2.3378582202111617e-06, "loss": 0.5931, "step": 156 }, { "epoch": 0.007105680018103643, "grad_norm": 0.6050120092736706, "learning_rate": 2.3529411764705885e-06, "loss": 0.6359, "step": 157 }, { "epoch": 0.007150939126499208, "grad_norm": 0.8846810488876649, "learning_rate": 2.3680241327300152e-06, "loss": 0.5777, "step": 158 }, { "epoch": 0.007196198234894773, "grad_norm": 0.9791346336803588, "learning_rate": 2.3831070889894425e-06, "loss": 0.6196, "step": 159 }, { "epoch": 0.007241457343290337, "grad_norm": 1.0364431416834374, "learning_rate": 2.3981900452488693e-06, "loss": 0.5649, "step": 160 }, { "epoch": 0.007286716451685902, "grad_norm": 1.0907811411319832, "learning_rate": 2.413273001508296e-06, "loss": 0.5675, "step": 161 }, { "epoch": 0.007331975560081467, "grad_norm": 1.0702644046402912, "learning_rate": 2.428355957767723e-06, "loss": 0.5868, "step": 162 }, { "epoch": 0.007377234668477031, "grad_norm": 1.1961484830707276, "learning_rate": 2.4434389140271496e-06, "loss": 0.6126, "step": 163 }, { "epoch": 0.007422493776872595, "grad_norm": 0.9997853040755823, "learning_rate": 2.4585218702865764e-06, "loss": 0.5593, "step": 164 }, { "epoch": 0.00746775288526816, "grad_norm": 1.5306620336532337, "learning_rate": 2.4736048265460032e-06, "loss": 0.5741, "step": 165 }, { "epoch": 0.007513011993663725, "grad_norm": 1.0365389768096527, "learning_rate": 2.48868778280543e-06, "loss": 0.597, "step": 166 }, { "epoch": 0.007558271102059289, "grad_norm": 0.9300028853685578, "learning_rate": 2.503770739064857e-06, "loss": 0.6228, "step": 167 }, { "epoch": 0.007603530210454854, "grad_norm": 0.9730959927324633, "learning_rate": 2.5188536953242836e-06, "loss": 0.5888, "step": 168 }, { "epoch": 0.007648789318850419, "grad_norm": 0.7004727910376336, "learning_rate": 2.5339366515837104e-06, "loss": 0.6655, "step": 169 }, { "epoch": 0.007694048427245984, "grad_norm": 1.1770735730846689, "learning_rate": 2.549019607843137e-06, "loss": 0.61, "step": 170 }, { "epoch": 0.007739307535641548, "grad_norm": 0.9265069767412805, "learning_rate": 2.564102564102564e-06, "loss": 0.6124, "step": 171 }, { "epoch": 0.007784566644037112, "grad_norm": 1.0026834295632552, "learning_rate": 2.5791855203619916e-06, "loss": 0.5759, "step": 172 }, { "epoch": 0.007829825752432677, "grad_norm": 0.893051784847375, "learning_rate": 2.5942684766214184e-06, "loss": 0.6195, "step": 173 }, { "epoch": 0.007875084860828241, "grad_norm": 1.0294544753456212, "learning_rate": 2.609351432880845e-06, "loss": 0.5924, "step": 174 }, { "epoch": 0.007920343969223807, "grad_norm": 0.983431175208823, "learning_rate": 2.624434389140272e-06, "loss": 0.6401, "step": 175 }, { "epoch": 0.007965603077619371, "grad_norm": 0.9732249669160397, "learning_rate": 2.6395173453996988e-06, "loss": 0.5839, "step": 176 }, { "epoch": 0.008010862186014935, "grad_norm": 1.0460819509049557, "learning_rate": 2.6546003016591256e-06, "loss": 0.5342, "step": 177 }, { "epoch": 0.0080561212944105, "grad_norm": 0.6725530926649889, "learning_rate": 2.6696832579185524e-06, "loss": 0.6642, "step": 178 }, { "epoch": 0.008101380402806065, "grad_norm": 0.947656576932551, "learning_rate": 2.684766214177979e-06, "loss": 0.5543, "step": 179 }, { "epoch": 0.008146639511201629, "grad_norm": 0.890279138583737, "learning_rate": 2.699849170437406e-06, "loss": 0.5781, "step": 180 }, { "epoch": 0.008191898619597194, "grad_norm": 0.9491950792758711, "learning_rate": 2.7149321266968327e-06, "loss": 0.5896, "step": 181 }, { "epoch": 0.008237157727992758, "grad_norm": 0.9199486672240875, "learning_rate": 2.7300150829562595e-06, "loss": 0.5462, "step": 182 }, { "epoch": 0.008282416836388324, "grad_norm": 4.539286243882512, "learning_rate": 2.7450980392156867e-06, "loss": 0.5749, "step": 183 }, { "epoch": 0.008327675944783888, "grad_norm": 0.9266218360089107, "learning_rate": 2.7601809954751135e-06, "loss": 0.5577, "step": 184 }, { "epoch": 0.008372935053179452, "grad_norm": 0.9869037300499367, "learning_rate": 2.7752639517345403e-06, "loss": 0.5218, "step": 185 }, { "epoch": 0.008418194161575018, "grad_norm": 0.9711731377333721, "learning_rate": 2.790346907993967e-06, "loss": 0.5768, "step": 186 }, { "epoch": 0.008463453269970582, "grad_norm": 1.2170837862401673, "learning_rate": 2.805429864253394e-06, "loss": 0.587, "step": 187 }, { "epoch": 0.008508712378366146, "grad_norm": 0.8855600221161494, "learning_rate": 2.8205128205128207e-06, "loss": 0.5275, "step": 188 }, { "epoch": 0.008553971486761711, "grad_norm": 0.9718908666916015, "learning_rate": 2.8355957767722475e-06, "loss": 0.5574, "step": 189 }, { "epoch": 0.008599230595157275, "grad_norm": 0.9121332025706556, "learning_rate": 2.8506787330316743e-06, "loss": 0.5696, "step": 190 }, { "epoch": 0.00864448970355284, "grad_norm": 0.9606521465966855, "learning_rate": 2.865761689291101e-06, "loss": 0.5831, "step": 191 }, { "epoch": 0.008689748811948405, "grad_norm": 0.9248220902665218, "learning_rate": 2.880844645550528e-06, "loss": 0.6033, "step": 192 }, { "epoch": 0.008735007920343969, "grad_norm": 1.245130579446091, "learning_rate": 2.895927601809955e-06, "loss": 0.5711, "step": 193 }, { "epoch": 0.008780267028739535, "grad_norm": 0.6920799519601023, "learning_rate": 2.911010558069382e-06, "loss": 0.6706, "step": 194 }, { "epoch": 0.008825526137135099, "grad_norm": 0.9225517446215864, "learning_rate": 2.9260935143288087e-06, "loss": 0.5236, "step": 195 }, { "epoch": 0.008870785245530663, "grad_norm": 0.6084442686451063, "learning_rate": 2.9411764705882355e-06, "loss": 0.6724, "step": 196 }, { "epoch": 0.008916044353926228, "grad_norm": 1.0686412628431616, "learning_rate": 2.9562594268476623e-06, "loss": 0.5737, "step": 197 }, { "epoch": 0.008961303462321792, "grad_norm": 1.0296132076998055, "learning_rate": 2.971342383107089e-06, "loss": 0.5786, "step": 198 }, { "epoch": 0.009006562570717356, "grad_norm": 0.6266104875319409, "learning_rate": 2.986425339366516e-06, "loss": 0.6535, "step": 199 }, { "epoch": 0.009051821679112922, "grad_norm": 0.5458319005831402, "learning_rate": 3.0015082956259426e-06, "loss": 0.647, "step": 200 }, { "epoch": 0.009097080787508486, "grad_norm": 0.963218116755177, "learning_rate": 3.0165912518853694e-06, "loss": 0.5753, "step": 201 }, { "epoch": 0.00914233989590405, "grad_norm": 1.1478861970452678, "learning_rate": 3.0316742081447962e-06, "loss": 0.5516, "step": 202 }, { "epoch": 0.009187599004299616, "grad_norm": 0.8944848254240031, "learning_rate": 3.046757164404223e-06, "loss": 0.597, "step": 203 }, { "epoch": 0.00923285811269518, "grad_norm": 0.9869650281799124, "learning_rate": 3.0618401206636506e-06, "loss": 0.5382, "step": 204 }, { "epoch": 0.009278117221090745, "grad_norm": 1.130476760242102, "learning_rate": 3.0769230769230774e-06, "loss": 0.5331, "step": 205 }, { "epoch": 0.00932337632948631, "grad_norm": 0.6252836318965277, "learning_rate": 3.0920060331825042e-06, "loss": 0.618, "step": 206 }, { "epoch": 0.009368635437881873, "grad_norm": 0.9759516513557152, "learning_rate": 3.107088989441931e-06, "loss": 0.5858, "step": 207 }, { "epoch": 0.009413894546277439, "grad_norm": 0.8682518804136213, "learning_rate": 3.122171945701358e-06, "loss": 0.5768, "step": 208 }, { "epoch": 0.009459153654673003, "grad_norm": 1.236201277393762, "learning_rate": 3.1372549019607846e-06, "loss": 0.529, "step": 209 }, { "epoch": 0.009504412763068567, "grad_norm": 0.49697934177661585, "learning_rate": 3.1523378582202114e-06, "loss": 0.6492, "step": 210 }, { "epoch": 0.009549671871464133, "grad_norm": 0.8940926607531419, "learning_rate": 3.167420814479638e-06, "loss": 0.563, "step": 211 }, { "epoch": 0.009594930979859697, "grad_norm": 0.858566838909827, "learning_rate": 3.182503770739065e-06, "loss": 0.5136, "step": 212 }, { "epoch": 0.00964019008825526, "grad_norm": 0.4250007793137212, "learning_rate": 3.1975867269984918e-06, "loss": 0.6387, "step": 213 }, { "epoch": 0.009685449196650826, "grad_norm": 0.4830424869851058, "learning_rate": 3.212669683257919e-06, "loss": 0.6461, "step": 214 }, { "epoch": 0.00973070830504639, "grad_norm": 0.9492593456229038, "learning_rate": 3.2277526395173458e-06, "loss": 0.5798, "step": 215 }, { "epoch": 0.009775967413441956, "grad_norm": 0.9052606044295339, "learning_rate": 3.2428355957767726e-06, "loss": 0.5629, "step": 216 }, { "epoch": 0.00982122652183752, "grad_norm": 0.46529736780797787, "learning_rate": 3.2579185520361994e-06, "loss": 0.6497, "step": 217 }, { "epoch": 0.009866485630233084, "grad_norm": 0.44782130490666033, "learning_rate": 3.273001508295626e-06, "loss": 0.6619, "step": 218 }, { "epoch": 0.00991174473862865, "grad_norm": 0.8659706800572665, "learning_rate": 3.288084464555053e-06, "loss": 0.4668, "step": 219 }, { "epoch": 0.009957003847024214, "grad_norm": 0.8891708692268783, "learning_rate": 3.3031674208144797e-06, "loss": 0.517, "step": 220 }, { "epoch": 0.010002262955419778, "grad_norm": 0.44320004826346965, "learning_rate": 3.3182503770739065e-06, "loss": 0.6212, "step": 221 }, { "epoch": 0.010047522063815343, "grad_norm": 1.0023703249828462, "learning_rate": 3.3333333333333333e-06, "loss": 0.612, "step": 222 }, { "epoch": 0.010092781172210907, "grad_norm": 0.9391998913476327, "learning_rate": 3.34841628959276e-06, "loss": 0.5485, "step": 223 }, { "epoch": 0.010138040280606471, "grad_norm": 0.9215626336665612, "learning_rate": 3.3634992458521878e-06, "loss": 0.5324, "step": 224 }, { "epoch": 0.010183299389002037, "grad_norm": 0.9650536964226126, "learning_rate": 3.3785822021116145e-06, "loss": 0.5148, "step": 225 }, { "epoch": 0.010228558497397601, "grad_norm": 0.44025580492320604, "learning_rate": 3.3936651583710413e-06, "loss": 0.6352, "step": 226 }, { "epoch": 0.010273817605793167, "grad_norm": 0.9860234711060266, "learning_rate": 3.408748114630468e-06, "loss": 0.5221, "step": 227 }, { "epoch": 0.01031907671418873, "grad_norm": 0.8750523725384597, "learning_rate": 3.423831070889895e-06, "loss": 0.4942, "step": 228 }, { "epoch": 0.010364335822584295, "grad_norm": 0.40786929784799386, "learning_rate": 3.4389140271493217e-06, "loss": 0.6168, "step": 229 }, { "epoch": 0.01040959493097986, "grad_norm": 0.9581620031963619, "learning_rate": 3.4539969834087485e-06, "loss": 0.5732, "step": 230 }, { "epoch": 0.010454854039375424, "grad_norm": 1.078295738150313, "learning_rate": 3.4690799396681753e-06, "loss": 0.5713, "step": 231 }, { "epoch": 0.010500113147770988, "grad_norm": 1.005658714168155, "learning_rate": 3.484162895927602e-06, "loss": 0.5712, "step": 232 }, { "epoch": 0.010545372256166554, "grad_norm": 0.9349868398807674, "learning_rate": 3.499245852187029e-06, "loss": 0.5375, "step": 233 }, { "epoch": 0.010590631364562118, "grad_norm": 0.8593073232849099, "learning_rate": 3.5143288084464557e-06, "loss": 0.4573, "step": 234 }, { "epoch": 0.010635890472957682, "grad_norm": 0.46015794426951856, "learning_rate": 3.529411764705883e-06, "loss": 0.6272, "step": 235 }, { "epoch": 0.010681149581353248, "grad_norm": 1.0336342436633565, "learning_rate": 3.5444947209653097e-06, "loss": 0.5125, "step": 236 }, { "epoch": 0.010726408689748812, "grad_norm": 0.8832268250263099, "learning_rate": 3.5595776772247365e-06, "loss": 0.5745, "step": 237 }, { "epoch": 0.010771667798144377, "grad_norm": 0.9516494399278698, "learning_rate": 3.5746606334841633e-06, "loss": 0.5466, "step": 238 }, { "epoch": 0.010816926906539941, "grad_norm": 0.4369975511253576, "learning_rate": 3.58974358974359e-06, "loss": 0.6034, "step": 239 }, { "epoch": 0.010862186014935505, "grad_norm": 0.9401986167737724, "learning_rate": 3.604826546003017e-06, "loss": 0.5792, "step": 240 }, { "epoch": 0.010907445123331071, "grad_norm": 1.0006366096995205, "learning_rate": 3.6199095022624436e-06, "loss": 0.6009, "step": 241 }, { "epoch": 0.010952704231726635, "grad_norm": 0.9510760230842723, "learning_rate": 3.6349924585218704e-06, "loss": 0.5477, "step": 242 }, { "epoch": 0.010997963340122199, "grad_norm": 0.9288859948652094, "learning_rate": 3.6500754147812972e-06, "loss": 0.544, "step": 243 }, { "epoch": 0.011043222448517765, "grad_norm": 0.9863343501078657, "learning_rate": 3.665158371040724e-06, "loss": 0.5416, "step": 244 }, { "epoch": 0.011088481556913329, "grad_norm": 0.8605513996689019, "learning_rate": 3.6802413273001512e-06, "loss": 0.4932, "step": 245 }, { "epoch": 0.011133740665308893, "grad_norm": 0.916463140897627, "learning_rate": 3.695324283559578e-06, "loss": 0.6034, "step": 246 }, { "epoch": 0.011178999773704458, "grad_norm": 0.4956358437007129, "learning_rate": 3.710407239819005e-06, "loss": 0.6056, "step": 247 }, { "epoch": 0.011224258882100022, "grad_norm": 0.9163501523417295, "learning_rate": 3.7254901960784316e-06, "loss": 0.5617, "step": 248 }, { "epoch": 0.011269517990495588, "grad_norm": 0.9778185038718414, "learning_rate": 3.7405731523378584e-06, "loss": 0.546, "step": 249 }, { "epoch": 0.011314777098891152, "grad_norm": 0.9520944809364426, "learning_rate": 3.755656108597285e-06, "loss": 0.5811, "step": 250 }, { "epoch": 0.011360036207286716, "grad_norm": 0.8823450931650489, "learning_rate": 3.770739064856712e-06, "loss": 0.5386, "step": 251 }, { "epoch": 0.011405295315682282, "grad_norm": 0.9512201225676397, "learning_rate": 3.7858220211161388e-06, "loss": 0.5205, "step": 252 }, { "epoch": 0.011450554424077846, "grad_norm": 0.9448051386395275, "learning_rate": 3.8009049773755656e-06, "loss": 0.5838, "step": 253 }, { "epoch": 0.01149581353247341, "grad_norm": 0.8613594278239519, "learning_rate": 3.815987933634992e-06, "loss": 0.5709, "step": 254 }, { "epoch": 0.011541072640868975, "grad_norm": 0.9715804592335193, "learning_rate": 3.83107088989442e-06, "loss": 0.551, "step": 255 }, { "epoch": 0.01158633174926454, "grad_norm": 0.8766071855257321, "learning_rate": 3.846153846153847e-06, "loss": 0.5178, "step": 256 }, { "epoch": 0.011631590857660103, "grad_norm": 0.8675288371592743, "learning_rate": 3.861236802413273e-06, "loss": 0.5359, "step": 257 }, { "epoch": 0.011676849966055669, "grad_norm": 0.9401513608490044, "learning_rate": 3.8763197586727e-06, "loss": 0.531, "step": 258 }, { "epoch": 0.011722109074451233, "grad_norm": 0.8856111486307915, "learning_rate": 3.891402714932127e-06, "loss": 0.536, "step": 259 }, { "epoch": 0.011767368182846799, "grad_norm": 0.6697874287839527, "learning_rate": 3.906485671191554e-06, "loss": 0.617, "step": 260 }, { "epoch": 0.011812627291242363, "grad_norm": 0.9666300735640014, "learning_rate": 3.92156862745098e-06, "loss": 0.536, "step": 261 }, { "epoch": 0.011857886399637927, "grad_norm": 0.5422182479370271, "learning_rate": 3.9366515837104075e-06, "loss": 0.6328, "step": 262 }, { "epoch": 0.011903145508033492, "grad_norm": 0.4923913584350843, "learning_rate": 3.951734539969834e-06, "loss": 0.5888, "step": 263 }, { "epoch": 0.011948404616429056, "grad_norm": 0.43537169302489886, "learning_rate": 3.966817496229261e-06, "loss": 0.5906, "step": 264 }, { "epoch": 0.01199366372482462, "grad_norm": 0.495426622721109, "learning_rate": 3.9819004524886875e-06, "loss": 0.6193, "step": 265 }, { "epoch": 0.012038922833220186, "grad_norm": 1.0064974142968786, "learning_rate": 3.9969834087481156e-06, "loss": 0.5928, "step": 266 }, { "epoch": 0.01208418194161575, "grad_norm": 0.9836738785638621, "learning_rate": 4.012066365007542e-06, "loss": 0.4592, "step": 267 }, { "epoch": 0.012129441050011314, "grad_norm": 0.6441927181482473, "learning_rate": 4.027149321266969e-06, "loss": 0.5993, "step": 268 }, { "epoch": 0.01217470015840688, "grad_norm": 0.5193332967918078, "learning_rate": 4.0422322775263955e-06, "loss": 0.6138, "step": 269 }, { "epoch": 0.012219959266802444, "grad_norm": 0.9500292114748106, "learning_rate": 4.057315233785823e-06, "loss": 0.5578, "step": 270 }, { "epoch": 0.01226521837519801, "grad_norm": 0.4149215761090333, "learning_rate": 4.072398190045249e-06, "loss": 0.608, "step": 271 }, { "epoch": 0.012310477483593573, "grad_norm": 1.0754271378193447, "learning_rate": 4.087481146304676e-06, "loss": 0.5623, "step": 272 }, { "epoch": 0.012355736591989137, "grad_norm": 0.925293932357001, "learning_rate": 4.102564102564103e-06, "loss": 0.5349, "step": 273 }, { "epoch": 0.012400995700384703, "grad_norm": 1.133479898251426, "learning_rate": 4.11764705882353e-06, "loss": 0.5174, "step": 274 }, { "epoch": 0.012446254808780267, "grad_norm": 0.5209625544150582, "learning_rate": 4.132730015082956e-06, "loss": 0.6002, "step": 275 }, { "epoch": 0.012491513917175831, "grad_norm": 1.0776330878254095, "learning_rate": 4.1478129713423835e-06, "loss": 0.5646, "step": 276 }, { "epoch": 0.012536773025571397, "grad_norm": 1.1219212069602342, "learning_rate": 4.162895927601811e-06, "loss": 0.5618, "step": 277 }, { "epoch": 0.01258203213396696, "grad_norm": 0.9540197245213747, "learning_rate": 4.177978883861237e-06, "loss": 0.5335, "step": 278 }, { "epoch": 0.012627291242362525, "grad_norm": 1.0409140098530238, "learning_rate": 4.193061840120664e-06, "loss": 0.5274, "step": 279 }, { "epoch": 0.01267255035075809, "grad_norm": 0.9479380496867869, "learning_rate": 4.208144796380091e-06, "loss": 0.5062, "step": 280 }, { "epoch": 0.012717809459153654, "grad_norm": 0.8985186859100934, "learning_rate": 4.223227752639518e-06, "loss": 0.5108, "step": 281 }, { "epoch": 0.01276306856754922, "grad_norm": 0.8673565799474766, "learning_rate": 4.238310708898944e-06, "loss": 0.5079, "step": 282 }, { "epoch": 0.012808327675944784, "grad_norm": 0.9738525782248589, "learning_rate": 4.2533936651583714e-06, "loss": 0.5285, "step": 283 }, { "epoch": 0.012853586784340348, "grad_norm": 0.9445428406253431, "learning_rate": 4.268476621417798e-06, "loss": 0.5087, "step": 284 }, { "epoch": 0.012898845892735914, "grad_norm": 0.9482994825503861, "learning_rate": 4.283559577677225e-06, "loss": 0.5435, "step": 285 }, { "epoch": 0.012944105001131478, "grad_norm": 0.9428865202414309, "learning_rate": 4.298642533936652e-06, "loss": 0.4748, "step": 286 }, { "epoch": 0.012989364109527042, "grad_norm": 0.48671128565727295, "learning_rate": 4.313725490196079e-06, "loss": 0.6106, "step": 287 }, { "epoch": 0.013034623217922607, "grad_norm": 0.8757214278700259, "learning_rate": 4.328808446455506e-06, "loss": 0.5244, "step": 288 }, { "epoch": 0.013079882326318171, "grad_norm": 1.316171662310517, "learning_rate": 4.343891402714932e-06, "loss": 0.5239, "step": 289 }, { "epoch": 0.013125141434713735, "grad_norm": 0.9589527483195361, "learning_rate": 4.358974358974359e-06, "loss": 0.549, "step": 290 }, { "epoch": 0.013170400543109301, "grad_norm": 0.4200317179916816, "learning_rate": 4.374057315233786e-06, "loss": 0.6291, "step": 291 }, { "epoch": 0.013215659651504865, "grad_norm": 0.9318035726460884, "learning_rate": 4.389140271493213e-06, "loss": 0.558, "step": 292 }, { "epoch": 0.01326091875990043, "grad_norm": 1.0019818689732092, "learning_rate": 4.404223227752639e-06, "loss": 0.5025, "step": 293 }, { "epoch": 0.013306177868295995, "grad_norm": 1.113539719910521, "learning_rate": 4.419306184012067e-06, "loss": 0.5599, "step": 294 }, { "epoch": 0.013351436976691559, "grad_norm": 1.0569103377615405, "learning_rate": 4.434389140271493e-06, "loss": 0.5216, "step": 295 }, { "epoch": 0.013396696085087124, "grad_norm": 0.9664867476522317, "learning_rate": 4.44947209653092e-06, "loss": 0.4954, "step": 296 }, { "epoch": 0.013441955193482688, "grad_norm": 0.896450043739785, "learning_rate": 4.464555052790347e-06, "loss": 0.5027, "step": 297 }, { "epoch": 0.013487214301878252, "grad_norm": 0.9422311268833776, "learning_rate": 4.479638009049775e-06, "loss": 0.5497, "step": 298 }, { "epoch": 0.013532473410273818, "grad_norm": 1.035399348209044, "learning_rate": 4.494720965309201e-06, "loss": 0.56, "step": 299 }, { "epoch": 0.013577732518669382, "grad_norm": 0.5018088786562147, "learning_rate": 4.509803921568628e-06, "loss": 0.5795, "step": 300 }, { "epoch": 0.013622991627064946, "grad_norm": 0.9210544017751315, "learning_rate": 4.5248868778280546e-06, "loss": 0.5114, "step": 301 }, { "epoch": 0.013668250735460512, "grad_norm": 1.291852571021736, "learning_rate": 4.539969834087482e-06, "loss": 0.5583, "step": 302 }, { "epoch": 0.013713509843856076, "grad_norm": 1.0262904351383784, "learning_rate": 4.555052790346908e-06, "loss": 0.4886, "step": 303 }, { "epoch": 0.013758768952251641, "grad_norm": 0.9650289716611675, "learning_rate": 4.570135746606335e-06, "loss": 0.5207, "step": 304 }, { "epoch": 0.013804028060647205, "grad_norm": 0.48075664142150215, "learning_rate": 4.585218702865762e-06, "loss": 0.6426, "step": 305 }, { "epoch": 0.01384928716904277, "grad_norm": 0.8763321117534213, "learning_rate": 4.600301659125189e-06, "loss": 0.5104, "step": 306 }, { "epoch": 0.013894546277438335, "grad_norm": 0.941361972531803, "learning_rate": 4.615384615384616e-06, "loss": 0.5378, "step": 307 }, { "epoch": 0.013939805385833899, "grad_norm": 1.5059100405196593, "learning_rate": 4.6304675716440425e-06, "loss": 0.5498, "step": 308 }, { "epoch": 0.013985064494229463, "grad_norm": 0.917428534519775, "learning_rate": 4.64555052790347e-06, "loss": 0.5412, "step": 309 }, { "epoch": 0.014030323602625029, "grad_norm": 0.9499732405768552, "learning_rate": 4.660633484162896e-06, "loss": 0.5303, "step": 310 }, { "epoch": 0.014075582711020593, "grad_norm": 0.9636543513031476, "learning_rate": 4.675716440422323e-06, "loss": 0.552, "step": 311 }, { "epoch": 0.014120841819416157, "grad_norm": 0.9242787914214284, "learning_rate": 4.69079939668175e-06, "loss": 0.4908, "step": 312 }, { "epoch": 0.014166100927811722, "grad_norm": 1.081272531811085, "learning_rate": 4.705882352941177e-06, "loss": 0.5389, "step": 313 }, { "epoch": 0.014211360036207286, "grad_norm": 0.9253075444801346, "learning_rate": 4.720965309200603e-06, "loss": 0.5807, "step": 314 }, { "epoch": 0.014256619144602852, "grad_norm": 0.49351422143918683, "learning_rate": 4.7360482654600305e-06, "loss": 0.5956, "step": 315 }, { "epoch": 0.014301878252998416, "grad_norm": 0.9741852578327058, "learning_rate": 4.751131221719457e-06, "loss": 0.5276, "step": 316 }, { "epoch": 0.01434713736139398, "grad_norm": 0.42603570176596806, "learning_rate": 4.766214177978885e-06, "loss": 0.6086, "step": 317 }, { "epoch": 0.014392396469789546, "grad_norm": 0.9694041654859664, "learning_rate": 4.781297134238311e-06, "loss": 0.5729, "step": 318 }, { "epoch": 0.01443765557818511, "grad_norm": 0.8927176798511097, "learning_rate": 4.7963800904977385e-06, "loss": 0.5127, "step": 319 }, { "epoch": 0.014482914686580674, "grad_norm": 0.504617438746344, "learning_rate": 4.811463046757165e-06, "loss": 0.6003, "step": 320 }, { "epoch": 0.01452817379497624, "grad_norm": 0.4540099755279664, "learning_rate": 4.826546003016592e-06, "loss": 0.6335, "step": 321 }, { "epoch": 0.014573432903371803, "grad_norm": 1.1454920132721478, "learning_rate": 4.8416289592760185e-06, "loss": 0.5226, "step": 322 }, { "epoch": 0.014618692011767367, "grad_norm": 0.9128245799208095, "learning_rate": 4.856711915535446e-06, "loss": 0.523, "step": 323 }, { "epoch": 0.014663951120162933, "grad_norm": 0.42641660214287797, "learning_rate": 4.871794871794872e-06, "loss": 0.6013, "step": 324 }, { "epoch": 0.014709210228558497, "grad_norm": 0.9227995753110831, "learning_rate": 4.886877828054299e-06, "loss": 0.5273, "step": 325 }, { "epoch": 0.014754469336954063, "grad_norm": 0.859961581416437, "learning_rate": 4.901960784313726e-06, "loss": 0.51, "step": 326 }, { "epoch": 0.014799728445349627, "grad_norm": 0.9260793685617191, "learning_rate": 4.917043740573153e-06, "loss": 0.4988, "step": 327 }, { "epoch": 0.01484498755374519, "grad_norm": 0.9780194052175253, "learning_rate": 4.93212669683258e-06, "loss": 0.5041, "step": 328 }, { "epoch": 0.014890246662140756, "grad_norm": 0.5176024283585878, "learning_rate": 4.9472096530920064e-06, "loss": 0.5843, "step": 329 }, { "epoch": 0.01493550577053632, "grad_norm": 0.4840598729523819, "learning_rate": 4.962292609351434e-06, "loss": 0.594, "step": 330 }, { "epoch": 0.014980764878931884, "grad_norm": 0.38051714853312013, "learning_rate": 4.97737556561086e-06, "loss": 0.5979, "step": 331 }, { "epoch": 0.01502602398732745, "grad_norm": 0.9995763964660412, "learning_rate": 4.992458521870287e-06, "loss": 0.5146, "step": 332 }, { "epoch": 0.015071283095723014, "grad_norm": 0.974717356695917, "learning_rate": 5.007541478129714e-06, "loss": 0.5428, "step": 333 }, { "epoch": 0.015116542204118578, "grad_norm": 0.9470024372074591, "learning_rate": 5.022624434389141e-06, "loss": 0.5165, "step": 334 }, { "epoch": 0.015161801312514144, "grad_norm": 1.1126527648607103, "learning_rate": 5.037707390648567e-06, "loss": 0.5282, "step": 335 }, { "epoch": 0.015207060420909708, "grad_norm": 1.0448542636444378, "learning_rate": 5.052790346907994e-06, "loss": 0.5556, "step": 336 }, { "epoch": 0.015252319529305274, "grad_norm": 1.0030573569299337, "learning_rate": 5.067873303167421e-06, "loss": 0.4889, "step": 337 }, { "epoch": 0.015297578637700837, "grad_norm": 1.010629182095211, "learning_rate": 5.082956259426848e-06, "loss": 0.5208, "step": 338 }, { "epoch": 0.015342837746096401, "grad_norm": 0.8679356439233207, "learning_rate": 5.098039215686274e-06, "loss": 0.4962, "step": 339 }, { "epoch": 0.015388096854491967, "grad_norm": 0.9161320845529888, "learning_rate": 5.1131221719457016e-06, "loss": 0.5067, "step": 340 }, { "epoch": 0.015433355962887531, "grad_norm": 1.1114898863130425, "learning_rate": 5.128205128205128e-06, "loss": 0.5856, "step": 341 }, { "epoch": 0.015478615071283095, "grad_norm": 0.9330607826133401, "learning_rate": 5.143288084464555e-06, "loss": 0.5067, "step": 342 }, { "epoch": 0.01552387417967866, "grad_norm": 0.944407245141589, "learning_rate": 5.158371040723983e-06, "loss": 0.5558, "step": 343 }, { "epoch": 0.015569133288074225, "grad_norm": 1.1333977043998347, "learning_rate": 5.1734539969834096e-06, "loss": 0.5319, "step": 344 }, { "epoch": 0.015614392396469789, "grad_norm": 0.873397313220358, "learning_rate": 5.188536953242837e-06, "loss": 0.5263, "step": 345 }, { "epoch": 0.015659651504865355, "grad_norm": 0.763591213995416, "learning_rate": 5.203619909502263e-06, "loss": 0.6024, "step": 346 }, { "epoch": 0.01570491061326092, "grad_norm": 1.1260558397244373, "learning_rate": 5.21870286576169e-06, "loss": 0.5125, "step": 347 }, { "epoch": 0.015750169721656482, "grad_norm": 0.9889199273006318, "learning_rate": 5.233785822021117e-06, "loss": 0.5169, "step": 348 }, { "epoch": 0.015795428830052048, "grad_norm": 0.9146677322541702, "learning_rate": 5.248868778280544e-06, "loss": 0.4975, "step": 349 }, { "epoch": 0.015840687938447614, "grad_norm": 0.8626251002848274, "learning_rate": 5.26395173453997e-06, "loss": 0.4895, "step": 350 }, { "epoch": 0.015885947046843176, "grad_norm": 1.009332375677646, "learning_rate": 5.2790346907993975e-06, "loss": 0.4803, "step": 351 }, { "epoch": 0.015931206155238742, "grad_norm": 1.04132783036604, "learning_rate": 5.294117647058824e-06, "loss": 0.5219, "step": 352 }, { "epoch": 0.015976465263634308, "grad_norm": 0.8918609702573956, "learning_rate": 5.309200603318251e-06, "loss": 0.5251, "step": 353 }, { "epoch": 0.01602172437202987, "grad_norm": 0.9777210319694408, "learning_rate": 5.3242835595776775e-06, "loss": 0.5295, "step": 354 }, { "epoch": 0.016066983480425436, "grad_norm": 0.7434341418735497, "learning_rate": 5.339366515837105e-06, "loss": 0.5909, "step": 355 }, { "epoch": 0.016112242588821, "grad_norm": 1.1328565365265588, "learning_rate": 5.354449472096531e-06, "loss": 0.5164, "step": 356 }, { "epoch": 0.016157501697216563, "grad_norm": 0.9334067691411315, "learning_rate": 5.369532428355958e-06, "loss": 0.5455, "step": 357 }, { "epoch": 0.01620276080561213, "grad_norm": 0.8700250681219853, "learning_rate": 5.384615384615385e-06, "loss": 0.5212, "step": 358 }, { "epoch": 0.016248019914007695, "grad_norm": 0.8928502707857849, "learning_rate": 5.399698340874812e-06, "loss": 0.5332, "step": 359 }, { "epoch": 0.016293279022403257, "grad_norm": 1.0109170289938698, "learning_rate": 5.414781297134238e-06, "loss": 0.5707, "step": 360 }, { "epoch": 0.016338538130798823, "grad_norm": 0.47521107319293504, "learning_rate": 5.4298642533936655e-06, "loss": 0.5669, "step": 361 }, { "epoch": 0.01638379723919439, "grad_norm": 0.9306733378237048, "learning_rate": 5.444947209653092e-06, "loss": 0.4959, "step": 362 }, { "epoch": 0.01642905634758995, "grad_norm": 0.9194562783119116, "learning_rate": 5.460030165912519e-06, "loss": 0.5333, "step": 363 }, { "epoch": 0.016474315455985516, "grad_norm": 1.0013647007578086, "learning_rate": 5.475113122171946e-06, "loss": 0.4786, "step": 364 }, { "epoch": 0.016519574564381082, "grad_norm": 0.961270745112822, "learning_rate": 5.4901960784313735e-06, "loss": 0.5266, "step": 365 }, { "epoch": 0.016564833672776648, "grad_norm": 0.9891712982691216, "learning_rate": 5.505279034690801e-06, "loss": 0.5228, "step": 366 }, { "epoch": 0.01661009278117221, "grad_norm": 0.9361874821945951, "learning_rate": 5.520361990950227e-06, "loss": 0.532, "step": 367 }, { "epoch": 0.016655351889567776, "grad_norm": 1.01010060145753, "learning_rate": 5.535444947209654e-06, "loss": 0.5114, "step": 368 }, { "epoch": 0.01670061099796334, "grad_norm": 0.9379850817433849, "learning_rate": 5.550527903469081e-06, "loss": 0.5285, "step": 369 }, { "epoch": 0.016745870106358904, "grad_norm": 1.016504623607867, "learning_rate": 5.565610859728508e-06, "loss": 0.4726, "step": 370 }, { "epoch": 0.01679112921475447, "grad_norm": 1.082966973882081, "learning_rate": 5.580693815987934e-06, "loss": 0.5037, "step": 371 }, { "epoch": 0.016836388323150035, "grad_norm": 0.997881502882024, "learning_rate": 5.5957767722473614e-06, "loss": 0.5725, "step": 372 }, { "epoch": 0.016881647431545597, "grad_norm": 0.5965032348908443, "learning_rate": 5.610859728506788e-06, "loss": 0.592, "step": 373 }, { "epoch": 0.016926906539941163, "grad_norm": 0.9535705328720748, "learning_rate": 5.625942684766215e-06, "loss": 0.5252, "step": 374 }, { "epoch": 0.01697216564833673, "grad_norm": 0.9635724454974052, "learning_rate": 5.641025641025641e-06, "loss": 0.5908, "step": 375 }, { "epoch": 0.01701742475673229, "grad_norm": 0.920845074054737, "learning_rate": 5.656108597285069e-06, "loss": 0.573, "step": 376 }, { "epoch": 0.017062683865127857, "grad_norm": 0.8683200822495826, "learning_rate": 5.671191553544495e-06, "loss": 0.5179, "step": 377 }, { "epoch": 0.017107942973523423, "grad_norm": 0.9308637166603803, "learning_rate": 5.686274509803922e-06, "loss": 0.4988, "step": 378 }, { "epoch": 0.017153202081918985, "grad_norm": 0.5493833687064147, "learning_rate": 5.7013574660633486e-06, "loss": 0.6014, "step": 379 }, { "epoch": 0.01719846119031455, "grad_norm": 0.9211708064016626, "learning_rate": 5.716440422322776e-06, "loss": 0.5174, "step": 380 }, { "epoch": 0.017243720298710116, "grad_norm": 0.936329971148168, "learning_rate": 5.731523378582202e-06, "loss": 0.5162, "step": 381 }, { "epoch": 0.01728897940710568, "grad_norm": 1.064735933806856, "learning_rate": 5.746606334841629e-06, "loss": 0.4509, "step": 382 }, { "epoch": 0.017334238515501244, "grad_norm": 0.9441184309473762, "learning_rate": 5.761689291101056e-06, "loss": 0.531, "step": 383 }, { "epoch": 0.01737949762389681, "grad_norm": 0.891932270187907, "learning_rate": 5.776772247360483e-06, "loss": 0.5829, "step": 384 }, { "epoch": 0.017424756732292372, "grad_norm": 0.5207360378802349, "learning_rate": 5.79185520361991e-06, "loss": 0.5725, "step": 385 }, { "epoch": 0.017470015840687938, "grad_norm": 0.9828487126912975, "learning_rate": 5.806938159879337e-06, "loss": 0.5431, "step": 386 }, { "epoch": 0.017515274949083504, "grad_norm": 0.879686558754779, "learning_rate": 5.822021116138764e-06, "loss": 0.571, "step": 387 }, { "epoch": 0.01756053405747907, "grad_norm": 0.867256262719138, "learning_rate": 5.837104072398191e-06, "loss": 0.4815, "step": 388 }, { "epoch": 0.01760579316587463, "grad_norm": 0.9583720954401007, "learning_rate": 5.852187028657617e-06, "loss": 0.571, "step": 389 }, { "epoch": 0.017651052274270197, "grad_norm": 0.8976973382623153, "learning_rate": 5.8672699849170446e-06, "loss": 0.5015, "step": 390 }, { "epoch": 0.017696311382665763, "grad_norm": 0.8935749651867885, "learning_rate": 5.882352941176471e-06, "loss": 0.4882, "step": 391 }, { "epoch": 0.017741570491061325, "grad_norm": 0.8648779834677215, "learning_rate": 5.897435897435898e-06, "loss": 0.5131, "step": 392 }, { "epoch": 0.01778682959945689, "grad_norm": 0.9431042049088775, "learning_rate": 5.9125188536953245e-06, "loss": 0.4949, "step": 393 }, { "epoch": 0.017832088707852457, "grad_norm": 0.5114510065663831, "learning_rate": 5.927601809954752e-06, "loss": 0.5519, "step": 394 }, { "epoch": 0.01787734781624802, "grad_norm": 1.0818380636393499, "learning_rate": 5.942684766214178e-06, "loss": 0.4862, "step": 395 }, { "epoch": 0.017922606924643585, "grad_norm": 0.9493273137564501, "learning_rate": 5.957767722473605e-06, "loss": 0.5129, "step": 396 }, { "epoch": 0.01796786603303915, "grad_norm": 0.40185565651478844, "learning_rate": 5.972850678733032e-06, "loss": 0.5866, "step": 397 }, { "epoch": 0.018013125141434713, "grad_norm": 0.8638395089291984, "learning_rate": 5.987933634992459e-06, "loss": 0.4719, "step": 398 }, { "epoch": 0.018058384249830278, "grad_norm": 1.0007749499746714, "learning_rate": 6.003016591251885e-06, "loss": 0.5821, "step": 399 }, { "epoch": 0.018103643358225844, "grad_norm": 0.5148875748111391, "learning_rate": 6.0180995475113125e-06, "loss": 0.6172, "step": 400 }, { "epoch": 0.018148902466621406, "grad_norm": 0.4632624398180578, "learning_rate": 6.033182503770739e-06, "loss": 0.5892, "step": 401 }, { "epoch": 0.018194161575016972, "grad_norm": 0.9138373595599119, "learning_rate": 6.048265460030166e-06, "loss": 0.5006, "step": 402 }, { "epoch": 0.018239420683412538, "grad_norm": 1.002764279783093, "learning_rate": 6.0633484162895924e-06, "loss": 0.5384, "step": 403 }, { "epoch": 0.0182846797918081, "grad_norm": 0.9330664682506379, "learning_rate": 6.07843137254902e-06, "loss": 0.4955, "step": 404 }, { "epoch": 0.018329938900203666, "grad_norm": 0.8863526835691686, "learning_rate": 6.093514328808446e-06, "loss": 0.5018, "step": 405 }, { "epoch": 0.01837519800859923, "grad_norm": 0.8723865491435912, "learning_rate": 6.108597285067874e-06, "loss": 0.5304, "step": 406 }, { "epoch": 0.018420457116994794, "grad_norm": 0.7326950236339584, "learning_rate": 6.123680241327301e-06, "loss": 0.5871, "step": 407 }, { "epoch": 0.01846571622539036, "grad_norm": 1.1416361451930475, "learning_rate": 6.138763197586728e-06, "loss": 0.5121, "step": 408 }, { "epoch": 0.018510975333785925, "grad_norm": 0.9460814774142131, "learning_rate": 6.153846153846155e-06, "loss": 0.4931, "step": 409 }, { "epoch": 0.01855623444218149, "grad_norm": 0.43571051461036575, "learning_rate": 6.168929110105581e-06, "loss": 0.577, "step": 410 }, { "epoch": 0.018601493550577053, "grad_norm": 0.9005078431887299, "learning_rate": 6.1840120663650085e-06, "loss": 0.5226, "step": 411 }, { "epoch": 0.01864675265897262, "grad_norm": 1.0887276487488817, "learning_rate": 6.199095022624435e-06, "loss": 0.4509, "step": 412 }, { "epoch": 0.018692011767368184, "grad_norm": 0.8428854780274067, "learning_rate": 6.214177978883862e-06, "loss": 0.4858, "step": 413 }, { "epoch": 0.018737270875763747, "grad_norm": 0.9252658031535186, "learning_rate": 6.229260935143288e-06, "loss": 0.4748, "step": 414 }, { "epoch": 0.018782529984159312, "grad_norm": 0.9412040538128673, "learning_rate": 6.244343891402716e-06, "loss": 0.5015, "step": 415 }, { "epoch": 0.018827789092554878, "grad_norm": 0.9138434969166862, "learning_rate": 6.259426847662142e-06, "loss": 0.4821, "step": 416 }, { "epoch": 0.01887304820095044, "grad_norm": 0.8656063917318203, "learning_rate": 6.274509803921569e-06, "loss": 0.5217, "step": 417 }, { "epoch": 0.018918307309346006, "grad_norm": 0.94991968312089, "learning_rate": 6.2895927601809956e-06, "loss": 0.5392, "step": 418 }, { "epoch": 0.01896356641774157, "grad_norm": 0.9372279572345605, "learning_rate": 6.304675716440423e-06, "loss": 0.4799, "step": 419 }, { "epoch": 0.019008825526137134, "grad_norm": 0.9533178482186907, "learning_rate": 6.319758672699849e-06, "loss": 0.5858, "step": 420 }, { "epoch": 0.0190540846345327, "grad_norm": 0.9034610556331467, "learning_rate": 6.334841628959276e-06, "loss": 0.5204, "step": 421 }, { "epoch": 0.019099343742928265, "grad_norm": 0.9342840894396373, "learning_rate": 6.349924585218703e-06, "loss": 0.5085, "step": 422 }, { "epoch": 0.019144602851323828, "grad_norm": 0.8600603730023817, "learning_rate": 6.36500754147813e-06, "loss": 0.4955, "step": 423 }, { "epoch": 0.019189861959719393, "grad_norm": 0.6397248799825535, "learning_rate": 6.380090497737556e-06, "loss": 0.5843, "step": 424 }, { "epoch": 0.01923512106811496, "grad_norm": 1.0534317002502531, "learning_rate": 6.3951734539969835e-06, "loss": 0.512, "step": 425 }, { "epoch": 0.01928038017651052, "grad_norm": 0.9708662444314573, "learning_rate": 6.410256410256412e-06, "loss": 0.5013, "step": 426 }, { "epoch": 0.019325639284906087, "grad_norm": 0.6262308260974387, "learning_rate": 6.425339366515838e-06, "loss": 0.5656, "step": 427 }, { "epoch": 0.019370898393301653, "grad_norm": 1.104132946757567, "learning_rate": 6.440422322775265e-06, "loss": 0.5492, "step": 428 }, { "epoch": 0.019416157501697215, "grad_norm": 1.0780508879991042, "learning_rate": 6.4555052790346916e-06, "loss": 0.513, "step": 429 }, { "epoch": 0.01946141661009278, "grad_norm": 0.8014315963924707, "learning_rate": 6.470588235294119e-06, "loss": 0.5113, "step": 430 }, { "epoch": 0.019506675718488346, "grad_norm": 0.9294933497639883, "learning_rate": 6.485671191553545e-06, "loss": 0.5298, "step": 431 }, { "epoch": 0.019551934826883912, "grad_norm": 1.0134726540671148, "learning_rate": 6.500754147812972e-06, "loss": 0.4905, "step": 432 }, { "epoch": 0.019597193935279474, "grad_norm": 0.935200214361148, "learning_rate": 6.515837104072399e-06, "loss": 0.5372, "step": 433 }, { "epoch": 0.01964245304367504, "grad_norm": 0.9332971164790316, "learning_rate": 6.530920060331826e-06, "loss": 0.463, "step": 434 }, { "epoch": 0.019687712152070606, "grad_norm": 0.867110760375185, "learning_rate": 6.546003016591252e-06, "loss": 0.5862, "step": 435 }, { "epoch": 0.019732971260466168, "grad_norm": 0.8950935667741385, "learning_rate": 6.5610859728506795e-06, "loss": 0.5442, "step": 436 }, { "epoch": 0.019778230368861734, "grad_norm": 0.9721133660738163, "learning_rate": 6.576168929110106e-06, "loss": 0.4917, "step": 437 }, { "epoch": 0.0198234894772573, "grad_norm": 0.8960955325001373, "learning_rate": 6.591251885369533e-06, "loss": 0.49, "step": 438 }, { "epoch": 0.01986874858565286, "grad_norm": 0.9513126805609666, "learning_rate": 6.6063348416289595e-06, "loss": 0.5162, "step": 439 }, { "epoch": 0.019914007694048427, "grad_norm": 0.9823253412027514, "learning_rate": 6.621417797888387e-06, "loss": 0.5152, "step": 440 }, { "epoch": 0.019959266802443993, "grad_norm": 0.9045371589165514, "learning_rate": 6.636500754147813e-06, "loss": 0.527, "step": 441 }, { "epoch": 0.020004525910839555, "grad_norm": 0.8782531559250729, "learning_rate": 6.65158371040724e-06, "loss": 0.5669, "step": 442 }, { "epoch": 0.02004978501923512, "grad_norm": 0.8859706150934012, "learning_rate": 6.666666666666667e-06, "loss": 0.4585, "step": 443 }, { "epoch": 0.020095044127630687, "grad_norm": 0.6991315890774688, "learning_rate": 6.681749622926094e-06, "loss": 0.5682, "step": 444 }, { "epoch": 0.02014030323602625, "grad_norm": 0.9485402451951149, "learning_rate": 6.69683257918552e-06, "loss": 0.5208, "step": 445 }, { "epoch": 0.020185562344421815, "grad_norm": 0.9873924486992607, "learning_rate": 6.7119155354449474e-06, "loss": 0.5101, "step": 446 }, { "epoch": 0.02023082145281738, "grad_norm": 0.9276034420485871, "learning_rate": 6.7269984917043755e-06, "loss": 0.5268, "step": 447 }, { "epoch": 0.020276080561212943, "grad_norm": 0.9603097707029032, "learning_rate": 6.742081447963802e-06, "loss": 0.5419, "step": 448 }, { "epoch": 0.02032133966960851, "grad_norm": 0.9290487002204546, "learning_rate": 6.757164404223229e-06, "loss": 0.527, "step": 449 }, { "epoch": 0.020366598778004074, "grad_norm": 1.0113822614648986, "learning_rate": 6.7722473604826555e-06, "loss": 0.486, "step": 450 }, { "epoch": 0.020411857886399636, "grad_norm": 0.9560484644844321, "learning_rate": 6.787330316742083e-06, "loss": 0.5126, "step": 451 }, { "epoch": 0.020457116994795202, "grad_norm": 0.9513817484526905, "learning_rate": 6.802413273001509e-06, "loss": 0.5092, "step": 452 }, { "epoch": 0.020502376103190768, "grad_norm": 1.0341828220958489, "learning_rate": 6.817496229260936e-06, "loss": 0.4636, "step": 453 }, { "epoch": 0.020547635211586333, "grad_norm": 0.836876325711259, "learning_rate": 6.832579185520363e-06, "loss": 0.4935, "step": 454 }, { "epoch": 0.020592894319981896, "grad_norm": 0.7154257246692087, "learning_rate": 6.84766214177979e-06, "loss": 0.5816, "step": 455 }, { "epoch": 0.02063815342837746, "grad_norm": 0.8883803720828303, "learning_rate": 6.862745098039216e-06, "loss": 0.525, "step": 456 }, { "epoch": 0.020683412536773027, "grad_norm": 0.8812155648935334, "learning_rate": 6.8778280542986434e-06, "loss": 0.4803, "step": 457 }, { "epoch": 0.02072867164516859, "grad_norm": 0.8914599212352133, "learning_rate": 6.89291101055807e-06, "loss": 0.5146, "step": 458 }, { "epoch": 0.020773930753564155, "grad_norm": 0.9610108620275446, "learning_rate": 6.907993966817497e-06, "loss": 0.4794, "step": 459 }, { "epoch": 0.02081918986195972, "grad_norm": 0.9236182851861297, "learning_rate": 6.923076923076923e-06, "loss": 0.5321, "step": 460 }, { "epoch": 0.020864448970355283, "grad_norm": 0.7895517788163228, "learning_rate": 6.938159879336351e-06, "loss": 0.6144, "step": 461 }, { "epoch": 0.02090970807875085, "grad_norm": 1.4224341444472233, "learning_rate": 6.953242835595777e-06, "loss": 0.558, "step": 462 }, { "epoch": 0.020954967187146414, "grad_norm": 0.8534888875322985, "learning_rate": 6.968325791855204e-06, "loss": 0.5194, "step": 463 }, { "epoch": 0.021000226295541977, "grad_norm": 0.8878493254499618, "learning_rate": 6.9834087481146306e-06, "loss": 0.484, "step": 464 }, { "epoch": 0.021045485403937542, "grad_norm": 0.7689726276830536, "learning_rate": 6.998491704374058e-06, "loss": 0.4687, "step": 465 }, { "epoch": 0.021090744512333108, "grad_norm": 0.8431041350728392, "learning_rate": 7.013574660633484e-06, "loss": 0.4896, "step": 466 }, { "epoch": 0.02113600362072867, "grad_norm": 0.9131741584792719, "learning_rate": 7.028657616892911e-06, "loss": 0.5267, "step": 467 }, { "epoch": 0.021181262729124236, "grad_norm": 0.7000414706451139, "learning_rate": 7.0437405731523386e-06, "loss": 0.5891, "step": 468 }, { "epoch": 0.0212265218375198, "grad_norm": 0.5274632467798588, "learning_rate": 7.058823529411766e-06, "loss": 0.5661, "step": 469 }, { "epoch": 0.021271780945915364, "grad_norm": 1.0489074712596098, "learning_rate": 7.073906485671192e-06, "loss": 0.5283, "step": 470 }, { "epoch": 0.02131704005431093, "grad_norm": 0.8608740580398528, "learning_rate": 7.088989441930619e-06, "loss": 0.4743, "step": 471 }, { "epoch": 0.021362299162706495, "grad_norm": 0.8993120418263926, "learning_rate": 7.104072398190046e-06, "loss": 0.5414, "step": 472 }, { "epoch": 0.021407558271102058, "grad_norm": 0.8519655085775868, "learning_rate": 7.119155354449473e-06, "loss": 0.4785, "step": 473 }, { "epoch": 0.021452817379497623, "grad_norm": 0.9992364234055222, "learning_rate": 7.134238310708899e-06, "loss": 0.4768, "step": 474 }, { "epoch": 0.02149807648789319, "grad_norm": 0.8211878630216686, "learning_rate": 7.1493212669683265e-06, "loss": 0.512, "step": 475 }, { "epoch": 0.021543335596288755, "grad_norm": 0.8673840729651399, "learning_rate": 7.164404223227753e-06, "loss": 0.4972, "step": 476 }, { "epoch": 0.021588594704684317, "grad_norm": 0.9626081409011095, "learning_rate": 7.17948717948718e-06, "loss": 0.5009, "step": 477 }, { "epoch": 0.021633853813079883, "grad_norm": 0.9324671863783013, "learning_rate": 7.1945701357466065e-06, "loss": 0.5291, "step": 478 }, { "epoch": 0.02167911292147545, "grad_norm": 1.6119787378474646, "learning_rate": 7.209653092006034e-06, "loss": 0.5974, "step": 479 }, { "epoch": 0.02172437202987101, "grad_norm": 0.8936322895814439, "learning_rate": 7.22473604826546e-06, "loss": 0.4819, "step": 480 }, { "epoch": 0.021769631138266576, "grad_norm": 1.1833559492371089, "learning_rate": 7.239819004524887e-06, "loss": 0.5015, "step": 481 }, { "epoch": 0.021814890246662142, "grad_norm": 0.9542089624937165, "learning_rate": 7.2549019607843145e-06, "loss": 0.5349, "step": 482 }, { "epoch": 0.021860149355057704, "grad_norm": 0.8717232550801574, "learning_rate": 7.269984917043741e-06, "loss": 0.4842, "step": 483 }, { "epoch": 0.02190540846345327, "grad_norm": 0.8784660417369917, "learning_rate": 7.285067873303168e-06, "loss": 0.4522, "step": 484 }, { "epoch": 0.021950667571848836, "grad_norm": 0.8714297614485044, "learning_rate": 7.3001508295625945e-06, "loss": 0.486, "step": 485 }, { "epoch": 0.021995926680244398, "grad_norm": 1.0117867219550658, "learning_rate": 7.315233785822022e-06, "loss": 0.518, "step": 486 }, { "epoch": 0.022041185788639964, "grad_norm": 0.8551402658731775, "learning_rate": 7.330316742081448e-06, "loss": 0.5309, "step": 487 }, { "epoch": 0.02208644489703553, "grad_norm": 0.8520731544633671, "learning_rate": 7.345399698340876e-06, "loss": 0.4849, "step": 488 }, { "epoch": 0.02213170400543109, "grad_norm": 0.945220429406945, "learning_rate": 7.3604826546003025e-06, "loss": 0.5183, "step": 489 }, { "epoch": 0.022176963113826657, "grad_norm": 0.8965594690346885, "learning_rate": 7.37556561085973e-06, "loss": 0.5068, "step": 490 }, { "epoch": 0.022222222222222223, "grad_norm": 0.9383416027268775, "learning_rate": 7.390648567119156e-06, "loss": 0.5172, "step": 491 }, { "epoch": 0.022267481330617785, "grad_norm": 2.8137379093185415, "learning_rate": 7.405731523378583e-06, "loss": 0.6656, "step": 492 }, { "epoch": 0.02231274043901335, "grad_norm": 0.8911437812376426, "learning_rate": 7.42081447963801e-06, "loss": 0.517, "step": 493 }, { "epoch": 0.022357999547408917, "grad_norm": 0.8746627228365279, "learning_rate": 7.435897435897437e-06, "loss": 0.476, "step": 494 }, { "epoch": 0.02240325865580448, "grad_norm": 1.0210061302597564, "learning_rate": 7.450980392156863e-06, "loss": 0.4964, "step": 495 }, { "epoch": 0.022448517764200045, "grad_norm": 1.3930772745192121, "learning_rate": 7.4660633484162904e-06, "loss": 0.5769, "step": 496 }, { "epoch": 0.02249377687259561, "grad_norm": 0.9354966630918502, "learning_rate": 7.481146304675717e-06, "loss": 0.5273, "step": 497 }, { "epoch": 0.022539035980991176, "grad_norm": 1.0404930623959125, "learning_rate": 7.496229260935144e-06, "loss": 0.5322, "step": 498 }, { "epoch": 0.02258429508938674, "grad_norm": 0.9212989516000655, "learning_rate": 7.51131221719457e-06, "loss": 0.5197, "step": 499 }, { "epoch": 0.022629554197782304, "grad_norm": 1.1837040152817129, "learning_rate": 7.526395173453998e-06, "loss": 0.4601, "step": 500 }, { "epoch": 0.02267481330617787, "grad_norm": 0.9831508145374277, "learning_rate": 7.541478129713424e-06, "loss": 0.5082, "step": 501 }, { "epoch": 0.022720072414573432, "grad_norm": 1.7317500998832773, "learning_rate": 7.556561085972851e-06, "loss": 0.6088, "step": 502 }, { "epoch": 0.022765331522968998, "grad_norm": 1.4020978387603402, "learning_rate": 7.5716440422322776e-06, "loss": 0.5859, "step": 503 }, { "epoch": 0.022810590631364563, "grad_norm": 0.9968935655490997, "learning_rate": 7.586726998491705e-06, "loss": 0.4931, "step": 504 }, { "epoch": 0.022855849739760126, "grad_norm": 0.9737894931726774, "learning_rate": 7.601809954751131e-06, "loss": 0.5629, "step": 505 }, { "epoch": 0.02290110884815569, "grad_norm": 0.8123883604016281, "learning_rate": 7.616892911010558e-06, "loss": 0.5703, "step": 506 }, { "epoch": 0.022946367956551257, "grad_norm": 0.9854901638439508, "learning_rate": 7.631975867269985e-06, "loss": 0.4761, "step": 507 }, { "epoch": 0.02299162706494682, "grad_norm": 1.4197575971354461, "learning_rate": 7.647058823529411e-06, "loss": 0.5448, "step": 508 }, { "epoch": 0.023036886173342385, "grad_norm": 0.923807733562872, "learning_rate": 7.66214177978884e-06, "loss": 0.5432, "step": 509 }, { "epoch": 0.02308214528173795, "grad_norm": 0.9263412579857541, "learning_rate": 7.677224736048267e-06, "loss": 0.507, "step": 510 }, { "epoch": 0.023127404390133513, "grad_norm": 0.854932864691546, "learning_rate": 7.692307692307694e-06, "loss": 0.4931, "step": 511 }, { "epoch": 0.02317266349852908, "grad_norm": 0.9010694714875057, "learning_rate": 7.70739064856712e-06, "loss": 0.5055, "step": 512 }, { "epoch": 0.023217922606924644, "grad_norm": 0.9302825746028861, "learning_rate": 7.722473604826546e-06, "loss": 0.5165, "step": 513 }, { "epoch": 0.023263181715320207, "grad_norm": 0.815629604194157, "learning_rate": 7.737556561085974e-06, "loss": 0.5036, "step": 514 }, { "epoch": 0.023308440823715772, "grad_norm": 0.873286630615026, "learning_rate": 7.7526395173454e-06, "loss": 0.509, "step": 515 }, { "epoch": 0.023353699932111338, "grad_norm": 1.3120833057706391, "learning_rate": 7.767722473604827e-06, "loss": 0.5643, "step": 516 }, { "epoch": 0.0233989590405069, "grad_norm": 0.8369186657274733, "learning_rate": 7.782805429864253e-06, "loss": 0.5009, "step": 517 }, { "epoch": 0.023444218148902466, "grad_norm": 0.8544449557940702, "learning_rate": 7.797888386123682e-06, "loss": 0.4528, "step": 518 }, { "epoch": 0.023489477257298032, "grad_norm": 1.0406352425448877, "learning_rate": 7.812971342383108e-06, "loss": 0.5267, "step": 519 }, { "epoch": 0.023534736365693597, "grad_norm": 0.8706975403009801, "learning_rate": 7.828054298642534e-06, "loss": 0.5368, "step": 520 }, { "epoch": 0.02357999547408916, "grad_norm": 0.8513980861118772, "learning_rate": 7.84313725490196e-06, "loss": 0.476, "step": 521 }, { "epoch": 0.023625254582484725, "grad_norm": 0.8850946942100224, "learning_rate": 7.858220211161389e-06, "loss": 0.5099, "step": 522 }, { "epoch": 0.02367051369088029, "grad_norm": 0.9757505338538881, "learning_rate": 7.873303167420815e-06, "loss": 0.5034, "step": 523 }, { "epoch": 0.023715772799275853, "grad_norm": 0.8863131755025008, "learning_rate": 7.888386123680241e-06, "loss": 0.5835, "step": 524 }, { "epoch": 0.02376103190767142, "grad_norm": 0.7221129697816255, "learning_rate": 7.903469079939668e-06, "loss": 0.5908, "step": 525 }, { "epoch": 0.023806291016066985, "grad_norm": 1.1644628967771642, "learning_rate": 7.918552036199096e-06, "loss": 0.5293, "step": 526 }, { "epoch": 0.023851550124462547, "grad_norm": 0.9771835472342165, "learning_rate": 7.933634992458522e-06, "loss": 0.5114, "step": 527 }, { "epoch": 0.023896809232858113, "grad_norm": 0.84653531874801, "learning_rate": 7.948717948717949e-06, "loss": 0.5591, "step": 528 }, { "epoch": 0.02394206834125368, "grad_norm": 1.1156798418233669, "learning_rate": 7.963800904977375e-06, "loss": 0.4927, "step": 529 }, { "epoch": 0.02398732744964924, "grad_norm": 0.6547282981964502, "learning_rate": 7.978883861236803e-06, "loss": 0.5492, "step": 530 }, { "epoch": 0.024032586558044806, "grad_norm": 0.4978885832627992, "learning_rate": 7.993966817496231e-06, "loss": 0.553, "step": 531 }, { "epoch": 0.024077845666440372, "grad_norm": 0.9638003747027917, "learning_rate": 8.009049773755657e-06, "loss": 0.4927, "step": 532 }, { "epoch": 0.024123104774835934, "grad_norm": 0.9485547114845543, "learning_rate": 8.024132730015084e-06, "loss": 0.5389, "step": 533 }, { "epoch": 0.0241683638832315, "grad_norm": 0.970251325477242, "learning_rate": 8.03921568627451e-06, "loss": 0.5218, "step": 534 }, { "epoch": 0.024213622991627066, "grad_norm": 1.066770352433271, "learning_rate": 8.054298642533938e-06, "loss": 0.5245, "step": 535 }, { "epoch": 0.024258882100022628, "grad_norm": 1.2508405584119022, "learning_rate": 8.069381598793365e-06, "loss": 0.5851, "step": 536 }, { "epoch": 0.024304141208418194, "grad_norm": 0.8977303585116398, "learning_rate": 8.084464555052791e-06, "loss": 0.4804, "step": 537 }, { "epoch": 0.02434940031681376, "grad_norm": 0.8550682869491487, "learning_rate": 8.099547511312217e-06, "loss": 0.4385, "step": 538 }, { "epoch": 0.02439465942520932, "grad_norm": 0.5604442567533277, "learning_rate": 8.114630467571645e-06, "loss": 0.5873, "step": 539 }, { "epoch": 0.024439918533604887, "grad_norm": 1.0044342170560538, "learning_rate": 8.129713423831072e-06, "loss": 0.4735, "step": 540 }, { "epoch": 0.024485177642000453, "grad_norm": 0.6410626077944116, "learning_rate": 8.144796380090498e-06, "loss": 0.5873, "step": 541 }, { "epoch": 0.02453043675039602, "grad_norm": 0.9384899463609325, "learning_rate": 8.159879336349925e-06, "loss": 0.4954, "step": 542 }, { "epoch": 0.02457569585879158, "grad_norm": 0.9117160909143945, "learning_rate": 8.174962292609353e-06, "loss": 0.5302, "step": 543 }, { "epoch": 0.024620954967187147, "grad_norm": 0.9444687825633534, "learning_rate": 8.190045248868779e-06, "loss": 0.5089, "step": 544 }, { "epoch": 0.024666214075582713, "grad_norm": 0.8911153707648989, "learning_rate": 8.205128205128205e-06, "loss": 0.4722, "step": 545 }, { "epoch": 0.024711473183978275, "grad_norm": 0.8783990335001564, "learning_rate": 8.220211161387632e-06, "loss": 0.5297, "step": 546 }, { "epoch": 0.02475673229237384, "grad_norm": 0.9749831735866099, "learning_rate": 8.23529411764706e-06, "loss": 0.5089, "step": 547 }, { "epoch": 0.024801991400769406, "grad_norm": 0.9534394716345836, "learning_rate": 8.250377073906486e-06, "loss": 0.4903, "step": 548 }, { "epoch": 0.02484725050916497, "grad_norm": 0.986230968075295, "learning_rate": 8.265460030165913e-06, "loss": 0.5271, "step": 549 }, { "epoch": 0.024892509617560534, "grad_norm": 0.9445929420045507, "learning_rate": 8.280542986425339e-06, "loss": 0.5049, "step": 550 }, { "epoch": 0.0249377687259561, "grad_norm": 0.8445435072400197, "learning_rate": 8.295625942684767e-06, "loss": 0.4706, "step": 551 }, { "epoch": 0.024983027834351662, "grad_norm": 0.8146518680172017, "learning_rate": 8.310708898944195e-06, "loss": 0.495, "step": 552 }, { "epoch": 0.025028286942747228, "grad_norm": 1.2530851684753053, "learning_rate": 8.325791855203621e-06, "loss": 0.5775, "step": 553 }, { "epoch": 0.025073546051142794, "grad_norm": 0.8546866880086258, "learning_rate": 8.340874811463048e-06, "loss": 0.6055, "step": 554 }, { "epoch": 0.025118805159538356, "grad_norm": 1.1621279479943107, "learning_rate": 8.355957767722474e-06, "loss": 0.5157, "step": 555 }, { "epoch": 0.02516406426793392, "grad_norm": 0.9994237527753784, "learning_rate": 8.371040723981902e-06, "loss": 0.4294, "step": 556 }, { "epoch": 0.025209323376329487, "grad_norm": 0.8970428825907083, "learning_rate": 8.386123680241329e-06, "loss": 0.4898, "step": 557 }, { "epoch": 0.02525458248472505, "grad_norm": 1.1646020881114376, "learning_rate": 8.401206636500755e-06, "loss": 0.5023, "step": 558 }, { "epoch": 0.025299841593120615, "grad_norm": 1.0114259306681477, "learning_rate": 8.416289592760181e-06, "loss": 0.4947, "step": 559 }, { "epoch": 0.02534510070151618, "grad_norm": 1.0786177518368725, "learning_rate": 8.43137254901961e-06, "loss": 0.4837, "step": 560 }, { "epoch": 0.025390359809911743, "grad_norm": 1.1702295424811555, "learning_rate": 8.446455505279036e-06, "loss": 0.5464, "step": 561 }, { "epoch": 0.02543561891830731, "grad_norm": 0.9776929278671919, "learning_rate": 8.461538461538462e-06, "loss": 0.5263, "step": 562 }, { "epoch": 0.025480878026702874, "grad_norm": 2.158204647935258, "learning_rate": 8.476621417797888e-06, "loss": 0.5842, "step": 563 }, { "epoch": 0.02552613713509844, "grad_norm": 1.2324208092362534, "learning_rate": 8.491704374057317e-06, "loss": 0.5163, "step": 564 }, { "epoch": 0.025571396243494002, "grad_norm": 1.1914425382161538, "learning_rate": 8.506787330316743e-06, "loss": 0.5762, "step": 565 }, { "epoch": 0.025616655351889568, "grad_norm": 0.9005565179098027, "learning_rate": 8.52187028657617e-06, "loss": 0.4902, "step": 566 }, { "epoch": 0.025661914460285134, "grad_norm": 0.8924173594211934, "learning_rate": 8.536953242835596e-06, "loss": 0.5824, "step": 567 }, { "epoch": 0.025707173568680696, "grad_norm": 1.0033799904869471, "learning_rate": 8.552036199095024e-06, "loss": 0.5912, "step": 568 }, { "epoch": 0.025752432677076262, "grad_norm": 1.3324490342886244, "learning_rate": 8.56711915535445e-06, "loss": 0.5026, "step": 569 }, { "epoch": 0.025797691785471828, "grad_norm": 1.013502674657888, "learning_rate": 8.582202111613876e-06, "loss": 0.4814, "step": 570 }, { "epoch": 0.02584295089386739, "grad_norm": 1.0831743227926793, "learning_rate": 8.597285067873304e-06, "loss": 0.4891, "step": 571 }, { "epoch": 0.025888210002262955, "grad_norm": 1.2332349946569197, "learning_rate": 8.612368024132731e-06, "loss": 0.4562, "step": 572 }, { "epoch": 0.02593346911065852, "grad_norm": 0.9177188668689421, "learning_rate": 8.627450980392157e-06, "loss": 0.4895, "step": 573 }, { "epoch": 0.025978728219054083, "grad_norm": 1.1644222868364908, "learning_rate": 8.642533936651585e-06, "loss": 0.5196, "step": 574 }, { "epoch": 0.02602398732744965, "grad_norm": 3.131192676139937, "learning_rate": 8.657616892911012e-06, "loss": 0.6046, "step": 575 }, { "epoch": 0.026069246435845215, "grad_norm": 1.5353274640022994, "learning_rate": 8.672699849170438e-06, "loss": 0.541, "step": 576 }, { "epoch": 0.026114505544240777, "grad_norm": 1.0917678418917096, "learning_rate": 8.687782805429864e-06, "loss": 0.5182, "step": 577 }, { "epoch": 0.026159764652636343, "grad_norm": 1.5763631164913774, "learning_rate": 8.702865761689292e-06, "loss": 0.5366, "step": 578 }, { "epoch": 0.02620502376103191, "grad_norm": 1.0361965455336255, "learning_rate": 8.717948717948719e-06, "loss": 0.5253, "step": 579 }, { "epoch": 0.02625028286942747, "grad_norm": 1.083518580000751, "learning_rate": 8.733031674208145e-06, "loss": 0.5001, "step": 580 }, { "epoch": 0.026295541977823036, "grad_norm": 1.155423558488019, "learning_rate": 8.748114630467572e-06, "loss": 0.5259, "step": 581 }, { "epoch": 0.026340801086218602, "grad_norm": 1.0320656726255666, "learning_rate": 8.763197586727e-06, "loss": 0.5254, "step": 582 }, { "epoch": 0.026386060194614164, "grad_norm": 1.2339557419740619, "learning_rate": 8.778280542986426e-06, "loss": 0.5735, "step": 583 }, { "epoch": 0.02643131930300973, "grad_norm": 0.9770656904444553, "learning_rate": 8.793363499245852e-06, "loss": 0.5164, "step": 584 }, { "epoch": 0.026476578411405296, "grad_norm": 1.2688613297497584, "learning_rate": 8.808446455505279e-06, "loss": 0.487, "step": 585 }, { "epoch": 0.02652183751980086, "grad_norm": 1.034534302426547, "learning_rate": 8.823529411764707e-06, "loss": 0.5501, "step": 586 }, { "epoch": 0.026567096628196424, "grad_norm": 1.0523407452463565, "learning_rate": 8.838612368024133e-06, "loss": 0.5342, "step": 587 }, { "epoch": 0.02661235573659199, "grad_norm": 1.0254835996160458, "learning_rate": 8.85369532428356e-06, "loss": 0.484, "step": 588 }, { "epoch": 0.026657614844987555, "grad_norm": 0.9622678518511545, "learning_rate": 8.868778280542986e-06, "loss": 0.5375, "step": 589 }, { "epoch": 0.026702873953383117, "grad_norm": 0.9677691980588815, "learning_rate": 8.883861236802414e-06, "loss": 0.5067, "step": 590 }, { "epoch": 0.026748133061778683, "grad_norm": 0.9627736768818012, "learning_rate": 8.89894419306184e-06, "loss": 0.5187, "step": 591 }, { "epoch": 0.02679339217017425, "grad_norm": 0.9440199612227524, "learning_rate": 8.914027149321268e-06, "loss": 0.4758, "step": 592 }, { "epoch": 0.02683865127856981, "grad_norm": 0.93767786410755, "learning_rate": 8.929110105580695e-06, "loss": 0.5087, "step": 593 }, { "epoch": 0.026883910386965377, "grad_norm": 1.1964384917445936, "learning_rate": 8.944193061840121e-06, "loss": 0.5766, "step": 594 }, { "epoch": 0.026929169495360943, "grad_norm": 1.0117724924229, "learning_rate": 8.95927601809955e-06, "loss": 0.4784, "step": 595 }, { "epoch": 0.026974428603756505, "grad_norm": 0.9925820906569525, "learning_rate": 8.974358974358976e-06, "loss": 0.4617, "step": 596 }, { "epoch": 0.02701968771215207, "grad_norm": 0.6677243498813, "learning_rate": 8.989441930618402e-06, "loss": 0.5879, "step": 597 }, { "epoch": 0.027064946820547636, "grad_norm": 0.9912709074709852, "learning_rate": 9.004524886877828e-06, "loss": 0.5333, "step": 598 }, { "epoch": 0.0271102059289432, "grad_norm": 1.0272835433313774, "learning_rate": 9.019607843137256e-06, "loss": 0.521, "step": 599 }, { "epoch": 0.027155465037338764, "grad_norm": 0.683172105248878, "learning_rate": 9.034690799396683e-06, "loss": 0.5702, "step": 600 }, { "epoch": 0.02720072414573433, "grad_norm": 0.8123114594776241, "learning_rate": 9.049773755656109e-06, "loss": 0.4317, "step": 601 }, { "epoch": 0.027245983254129892, "grad_norm": 0.8680303400075743, "learning_rate": 9.064856711915535e-06, "loss": 0.4948, "step": 602 }, { "epoch": 0.027291242362525458, "grad_norm": 0.8525579255911381, "learning_rate": 9.079939668174964e-06, "loss": 0.5081, "step": 603 }, { "epoch": 0.027336501470921024, "grad_norm": 0.8102455407975535, "learning_rate": 9.09502262443439e-06, "loss": 0.474, "step": 604 }, { "epoch": 0.027381760579316586, "grad_norm": 0.8259249514304119, "learning_rate": 9.110105580693816e-06, "loss": 0.5591, "step": 605 }, { "epoch": 0.02742701968771215, "grad_norm": 0.8613389960609926, "learning_rate": 9.125188536953243e-06, "loss": 0.4717, "step": 606 }, { "epoch": 0.027472278796107717, "grad_norm": 0.6540097401182121, "learning_rate": 9.14027149321267e-06, "loss": 0.5509, "step": 607 }, { "epoch": 0.027517537904503283, "grad_norm": 0.9461405945810982, "learning_rate": 9.155354449472097e-06, "loss": 0.4877, "step": 608 }, { "epoch": 0.027562797012898845, "grad_norm": 0.9013544304168348, "learning_rate": 9.170437405731523e-06, "loss": 0.5194, "step": 609 }, { "epoch": 0.02760805612129441, "grad_norm": 1.1402918439029173, "learning_rate": 9.18552036199095e-06, "loss": 0.4829, "step": 610 }, { "epoch": 0.027653315229689977, "grad_norm": 0.9047977925822974, "learning_rate": 9.200603318250378e-06, "loss": 0.5196, "step": 611 }, { "epoch": 0.02769857433808554, "grad_norm": 0.9207106618040918, "learning_rate": 9.215686274509804e-06, "loss": 0.4846, "step": 612 }, { "epoch": 0.027743833446481105, "grad_norm": 0.8252106661434595, "learning_rate": 9.230769230769232e-06, "loss": 0.46, "step": 613 }, { "epoch": 0.02778909255487667, "grad_norm": 0.9010037438995498, "learning_rate": 9.245852187028659e-06, "loss": 0.5667, "step": 614 }, { "epoch": 0.027834351663272233, "grad_norm": 0.8647698992314747, "learning_rate": 9.260935143288085e-06, "loss": 0.4788, "step": 615 }, { "epoch": 0.027879610771667798, "grad_norm": 0.9697514771488839, "learning_rate": 9.276018099547513e-06, "loss": 0.5116, "step": 616 }, { "epoch": 0.027924869880063364, "grad_norm": 0.8975155881110004, "learning_rate": 9.29110105580694e-06, "loss": 0.5, "step": 617 }, { "epoch": 0.027970128988458926, "grad_norm": 0.9331083075291978, "learning_rate": 9.306184012066366e-06, "loss": 0.4704, "step": 618 }, { "epoch": 0.028015388096854492, "grad_norm": 0.8356134579308436, "learning_rate": 9.321266968325792e-06, "loss": 0.466, "step": 619 }, { "epoch": 0.028060647205250058, "grad_norm": 1.0477143205408235, "learning_rate": 9.33634992458522e-06, "loss": 0.4672, "step": 620 }, { "epoch": 0.02810590631364562, "grad_norm": 1.4144678930589356, "learning_rate": 9.351432880844647e-06, "loss": 0.5469, "step": 621 }, { "epoch": 0.028151165422041186, "grad_norm": 0.7897408114102736, "learning_rate": 9.366515837104073e-06, "loss": 0.5242, "step": 622 }, { "epoch": 0.02819642453043675, "grad_norm": 0.9966835571160938, "learning_rate": 9.3815987933635e-06, "loss": 0.5051, "step": 623 }, { "epoch": 0.028241683638832314, "grad_norm": 1.0012891418820922, "learning_rate": 9.396681749622927e-06, "loss": 0.4974, "step": 624 }, { "epoch": 0.02828694274722788, "grad_norm": 0.8643487579115816, "learning_rate": 9.411764705882354e-06, "loss": 0.478, "step": 625 }, { "epoch": 0.028332201855623445, "grad_norm": 1.0272295174288926, "learning_rate": 9.42684766214178e-06, "loss": 0.4871, "step": 626 }, { "epoch": 0.028377460964019007, "grad_norm": 0.9497133963033242, "learning_rate": 9.441930618401207e-06, "loss": 0.4988, "step": 627 }, { "epoch": 0.028422720072414573, "grad_norm": 0.885996701031616, "learning_rate": 9.457013574660635e-06, "loss": 0.5156, "step": 628 }, { "epoch": 0.02846797918081014, "grad_norm": 0.9061066748339962, "learning_rate": 9.472096530920061e-06, "loss": 0.4292, "step": 629 }, { "epoch": 0.028513238289205704, "grad_norm": 2.3887004974429993, "learning_rate": 9.487179487179487e-06, "loss": 0.5426, "step": 630 }, { "epoch": 0.028558497397601267, "grad_norm": 1.0015617083513035, "learning_rate": 9.502262443438914e-06, "loss": 0.5036, "step": 631 }, { "epoch": 0.028603756505996832, "grad_norm": 1.1310600352677773, "learning_rate": 9.517345399698342e-06, "loss": 0.519, "step": 632 }, { "epoch": 0.028649015614392398, "grad_norm": 1.0516032606117616, "learning_rate": 9.53242835595777e-06, "loss": 0.5245, "step": 633 }, { "epoch": 0.02869427472278796, "grad_norm": 0.9627150600636653, "learning_rate": 9.547511312217196e-06, "loss": 0.4858, "step": 634 }, { "epoch": 0.028739533831183526, "grad_norm": 0.9908734595681895, "learning_rate": 9.562594268476623e-06, "loss": 0.5066, "step": 635 }, { "epoch": 0.02878479293957909, "grad_norm": 0.8558945955157928, "learning_rate": 9.577677224736049e-06, "loss": 0.4889, "step": 636 }, { "epoch": 0.028830052047974654, "grad_norm": 1.1896758395015434, "learning_rate": 9.592760180995477e-06, "loss": 0.4829, "step": 637 }, { "epoch": 0.02887531115637022, "grad_norm": 1.0550264618476963, "learning_rate": 9.607843137254903e-06, "loss": 0.5054, "step": 638 }, { "epoch": 0.028920570264765785, "grad_norm": 0.9297556565489958, "learning_rate": 9.62292609351433e-06, "loss": 0.5131, "step": 639 }, { "epoch": 0.028965829373161348, "grad_norm": 1.8024189426984918, "learning_rate": 9.638009049773756e-06, "loss": 0.5572, "step": 640 }, { "epoch": 0.029011088481556913, "grad_norm": 1.031101602190451, "learning_rate": 9.653092006033184e-06, "loss": 0.4433, "step": 641 }, { "epoch": 0.02905634758995248, "grad_norm": 0.9243046569458107, "learning_rate": 9.66817496229261e-06, "loss": 0.5722, "step": 642 }, { "epoch": 0.02910160669834804, "grad_norm": 0.9572833331050026, "learning_rate": 9.683257918552037e-06, "loss": 0.5154, "step": 643 }, { "epoch": 0.029146865806743607, "grad_norm": 0.9545263635012465, "learning_rate": 9.698340874811463e-06, "loss": 0.5023, "step": 644 }, { "epoch": 0.029192124915139173, "grad_norm": 1.1494387901770513, "learning_rate": 9.713423831070891e-06, "loss": 0.4989, "step": 645 }, { "epoch": 0.029237384023534735, "grad_norm": 0.811442686170712, "learning_rate": 9.728506787330318e-06, "loss": 0.4753, "step": 646 }, { "epoch": 0.0292826431319303, "grad_norm": 0.9594851119019182, "learning_rate": 9.743589743589744e-06, "loss": 0.4897, "step": 647 }, { "epoch": 0.029327902240325866, "grad_norm": 0.9577158737270751, "learning_rate": 9.75867269984917e-06, "loss": 0.5069, "step": 648 }, { "epoch": 0.02937316134872143, "grad_norm": 0.9677059426724289, "learning_rate": 9.773755656108599e-06, "loss": 0.4707, "step": 649 }, { "epoch": 0.029418420457116994, "grad_norm": 0.9131491960385424, "learning_rate": 9.788838612368025e-06, "loss": 0.5233, "step": 650 }, { "epoch": 0.02946367956551256, "grad_norm": 0.8903706265312409, "learning_rate": 9.803921568627451e-06, "loss": 0.494, "step": 651 }, { "epoch": 0.029508938673908126, "grad_norm": 0.8897802621593002, "learning_rate": 9.819004524886878e-06, "loss": 0.5024, "step": 652 }, { "epoch": 0.029554197782303688, "grad_norm": 0.886489545618575, "learning_rate": 9.834087481146306e-06, "loss": 0.4947, "step": 653 }, { "epoch": 0.029599456890699254, "grad_norm": 1.142183417297171, "learning_rate": 9.849170437405732e-06, "loss": 0.5074, "step": 654 }, { "epoch": 0.02964471599909482, "grad_norm": 0.7920238350891496, "learning_rate": 9.86425339366516e-06, "loss": 0.4285, "step": 655 }, { "epoch": 0.02968997510749038, "grad_norm": 0.7817895238319269, "learning_rate": 9.879336349924586e-06, "loss": 0.4933, "step": 656 }, { "epoch": 0.029735234215885947, "grad_norm": 3.3052962392165166, "learning_rate": 9.894419306184013e-06, "loss": 0.6201, "step": 657 }, { "epoch": 0.029780493324281513, "grad_norm": 1.8967947995531824, "learning_rate": 9.90950226244344e-06, "loss": 0.5664, "step": 658 }, { "epoch": 0.029825752432677075, "grad_norm": 1.016181045275243, "learning_rate": 9.924585218702867e-06, "loss": 0.468, "step": 659 }, { "epoch": 0.02987101154107264, "grad_norm": 1.0906829653780257, "learning_rate": 9.939668174962294e-06, "loss": 0.5306, "step": 660 }, { "epoch": 0.029916270649468207, "grad_norm": 1.0567718662960948, "learning_rate": 9.95475113122172e-06, "loss": 0.4589, "step": 661 }, { "epoch": 0.02996152975786377, "grad_norm": 1.155731681459567, "learning_rate": 9.969834087481146e-06, "loss": 0.5076, "step": 662 }, { "epoch": 0.030006788866259335, "grad_norm": 0.9147348944501993, "learning_rate": 9.984917043740574e-06, "loss": 0.432, "step": 663 }, { "epoch": 0.0300520479746549, "grad_norm": 0.9209779765845878, "learning_rate": 1e-05, "loss": 0.4823, "step": 664 }, { "epoch": 0.030097307083050463, "grad_norm": 0.9271797626630768, "learning_rate": 9.999999946282679e-06, "loss": 0.5029, "step": 665 }, { "epoch": 0.03014256619144603, "grad_norm": 1.039138585186304, "learning_rate": 9.999999785130714e-06, "loss": 0.5299, "step": 666 }, { "epoch": 0.030187825299841594, "grad_norm": 0.969861000408359, "learning_rate": 9.999999516544111e-06, "loss": 0.4611, "step": 667 }, { "epoch": 0.030233084408237156, "grad_norm": 1.077682631894601, "learning_rate": 9.999999140522874e-06, "loss": 0.5032, "step": 668 }, { "epoch": 0.030278343516632722, "grad_norm": 5.948473688153386, "learning_rate": 9.999998657067014e-06, "loss": 0.7728, "step": 669 }, { "epoch": 0.030323602625028288, "grad_norm": 3.676232499482188, "learning_rate": 9.999998066176536e-06, "loss": 0.6763, "step": 670 }, { "epoch": 0.03036886173342385, "grad_norm": 1.2775316773570315, "learning_rate": 9.999997367851456e-06, "loss": 0.5174, "step": 671 }, { "epoch": 0.030414120841819416, "grad_norm": 1.6908085425895913, "learning_rate": 9.999996562091792e-06, "loss": 0.5926, "step": 672 }, { "epoch": 0.03045937995021498, "grad_norm": 1.2246977137958381, "learning_rate": 9.999995648897555e-06, "loss": 0.4605, "step": 673 }, { "epoch": 0.030504639058610547, "grad_norm": 1.3528227854220647, "learning_rate": 9.99999462826877e-06, "loss": 0.5713, "step": 674 }, { "epoch": 0.03054989816700611, "grad_norm": 3.362052766446197, "learning_rate": 9.999993500205456e-06, "loss": 0.651, "step": 675 }, { "epoch": 0.030595157275401675, "grad_norm": 0.958564212895757, "learning_rate": 9.999992264707636e-06, "loss": 0.5178, "step": 676 }, { "epoch": 0.03064041638379724, "grad_norm": 1.0395509750067198, "learning_rate": 9.999990921775341e-06, "loss": 0.5313, "step": 677 }, { "epoch": 0.030685675492192803, "grad_norm": 1.120139810370774, "learning_rate": 9.999989471408598e-06, "loss": 0.5664, "step": 678 }, { "epoch": 0.03073093460058837, "grad_norm": 1.032136947745328, "learning_rate": 9.999987913607437e-06, "loss": 0.4841, "step": 679 }, { "epoch": 0.030776193708983934, "grad_norm": 2.467769359435823, "learning_rate": 9.999986248371889e-06, "loss": 0.6585, "step": 680 }, { "epoch": 0.030821452817379497, "grad_norm": 1.0114156966604324, "learning_rate": 9.999984475701996e-06, "loss": 0.5235, "step": 681 }, { "epoch": 0.030866711925775062, "grad_norm": 2.1678025103559753, "learning_rate": 9.999982595597793e-06, "loss": 0.6423, "step": 682 }, { "epoch": 0.030911971034170628, "grad_norm": 1.0222023327367475, "learning_rate": 9.99998060805932e-06, "loss": 0.4334, "step": 683 }, { "epoch": 0.03095723014256619, "grad_norm": 1.0264965752501367, "learning_rate": 9.999978513086617e-06, "loss": 0.5546, "step": 684 }, { "epoch": 0.031002489250961756, "grad_norm": 1.2861062861613923, "learning_rate": 9.999976310679735e-06, "loss": 0.5974, "step": 685 }, { "epoch": 0.03104774835935732, "grad_norm": 0.9369415311783983, "learning_rate": 9.999974000838716e-06, "loss": 0.4734, "step": 686 }, { "epoch": 0.031093007467752884, "grad_norm": 0.9003760578237145, "learning_rate": 9.999971583563615e-06, "loss": 0.5228, "step": 687 }, { "epoch": 0.03113826657614845, "grad_norm": 0.79968319184062, "learning_rate": 9.99996905885448e-06, "loss": 0.5705, "step": 688 }, { "epoch": 0.031183525684544015, "grad_norm": 0.7660832635821488, "learning_rate": 9.999966426711364e-06, "loss": 0.5749, "step": 689 }, { "epoch": 0.031228784792939578, "grad_norm": 0.6958497061532944, "learning_rate": 9.99996368713433e-06, "loss": 0.5822, "step": 690 }, { "epoch": 0.03127404390133515, "grad_norm": 0.8745363662409338, "learning_rate": 9.999960840123428e-06, "loss": 0.6022, "step": 691 }, { "epoch": 0.03131930300973071, "grad_norm": 1.3696394690577367, "learning_rate": 9.999957885678725e-06, "loss": 0.4984, "step": 692 }, { "epoch": 0.03136456211812627, "grad_norm": 1.0403799882410576, "learning_rate": 9.999954823800287e-06, "loss": 0.5122, "step": 693 }, { "epoch": 0.03140982122652184, "grad_norm": 1.2900716363078422, "learning_rate": 9.99995165448817e-06, "loss": 0.509, "step": 694 }, { "epoch": 0.0314550803349174, "grad_norm": 1.0807483041647399, "learning_rate": 9.999948377742453e-06, "loss": 0.5161, "step": 695 }, { "epoch": 0.031500339443312965, "grad_norm": 1.0028670752550366, "learning_rate": 9.9999449935632e-06, "loss": 0.5443, "step": 696 }, { "epoch": 0.031545598551708534, "grad_norm": 0.7870478530335979, "learning_rate": 9.999941501950484e-06, "loss": 0.4537, "step": 697 }, { "epoch": 0.031590857660104096, "grad_norm": 1.0264232534437348, "learning_rate": 9.999937902904382e-06, "loss": 0.4937, "step": 698 }, { "epoch": 0.03163611676849966, "grad_norm": 1.0829604759779226, "learning_rate": 9.999934196424972e-06, "loss": 0.459, "step": 699 }, { "epoch": 0.03168137587689523, "grad_norm": 0.966309088508198, "learning_rate": 9.999930382512331e-06, "loss": 0.5105, "step": 700 }, { "epoch": 0.03172663498529079, "grad_norm": 1.0366245263100786, "learning_rate": 9.999926461166541e-06, "loss": 0.4671, "step": 701 }, { "epoch": 0.03177189409368635, "grad_norm": 1.2234134756053456, "learning_rate": 9.99992243238769e-06, "loss": 0.4912, "step": 702 }, { "epoch": 0.03181715320208192, "grad_norm": 0.9590494692266611, "learning_rate": 9.99991829617586e-06, "loss": 0.5133, "step": 703 }, { "epoch": 0.031862412310477484, "grad_norm": 1.3428036136373722, "learning_rate": 9.999914052531143e-06, "loss": 0.5341, "step": 704 }, { "epoch": 0.031907671418873046, "grad_norm": 1.4649979468611838, "learning_rate": 9.999909701453629e-06, "loss": 0.5954, "step": 705 }, { "epoch": 0.031952930527268615, "grad_norm": 1.4747032246725622, "learning_rate": 9.99990524294341e-06, "loss": 0.5009, "step": 706 }, { "epoch": 0.03199818963566418, "grad_norm": 0.8628686083875655, "learning_rate": 9.999900677000584e-06, "loss": 0.5205, "step": 707 }, { "epoch": 0.03204344874405974, "grad_norm": 0.7491389733083518, "learning_rate": 9.99989600362525e-06, "loss": 0.5734, "step": 708 }, { "epoch": 0.03208870785245531, "grad_norm": 0.5221698127186557, "learning_rate": 9.999891222817507e-06, "loss": 0.5635, "step": 709 }, { "epoch": 0.03213396696085087, "grad_norm": 1.759719976659955, "learning_rate": 9.999886334577456e-06, "loss": 0.5399, "step": 710 }, { "epoch": 0.03217922606924643, "grad_norm": 0.616650168587763, "learning_rate": 9.999881338905204e-06, "loss": 0.5543, "step": 711 }, { "epoch": 0.032224485177642, "grad_norm": 0.9961499565001543, "learning_rate": 9.999876235800859e-06, "loss": 0.4935, "step": 712 }, { "epoch": 0.032269744286037565, "grad_norm": 1.0072414232661828, "learning_rate": 9.999871025264528e-06, "loss": 0.4725, "step": 713 }, { "epoch": 0.03231500339443313, "grad_norm": 1.0472179363754566, "learning_rate": 9.999865707296326e-06, "loss": 0.5199, "step": 714 }, { "epoch": 0.032360262502828696, "grad_norm": 0.8728295602150761, "learning_rate": 9.999860281896366e-06, "loss": 0.5595, "step": 715 }, { "epoch": 0.03240552161122426, "grad_norm": 0.8090690179012936, "learning_rate": 9.999854749064764e-06, "loss": 0.5402, "step": 716 }, { "epoch": 0.03245078071961982, "grad_norm": 0.6068390919302309, "learning_rate": 9.999849108801637e-06, "loss": 0.5795, "step": 717 }, { "epoch": 0.03249603982801539, "grad_norm": 0.5094045186994052, "learning_rate": 9.999843361107111e-06, "loss": 0.5798, "step": 718 }, { "epoch": 0.03254129893641095, "grad_norm": 1.3118168143611104, "learning_rate": 9.999837505981308e-06, "loss": 0.5546, "step": 719 }, { "epoch": 0.032586558044806514, "grad_norm": 1.0749377351159282, "learning_rate": 9.99983154342435e-06, "loss": 0.5243, "step": 720 }, { "epoch": 0.03263181715320208, "grad_norm": 1.4815041547299679, "learning_rate": 9.99982547343637e-06, "loss": 0.4979, "step": 721 }, { "epoch": 0.032677076261597646, "grad_norm": 1.1919403721482398, "learning_rate": 9.999819296017496e-06, "loss": 0.4916, "step": 722 }, { "epoch": 0.03272233536999321, "grad_norm": 0.9116590492540936, "learning_rate": 9.999813011167861e-06, "loss": 0.4929, "step": 723 }, { "epoch": 0.03276759447838878, "grad_norm": 1.2629616711327594, "learning_rate": 9.9998066188876e-06, "loss": 0.5855, "step": 724 }, { "epoch": 0.03281285358678434, "grad_norm": 0.9248013179371635, "learning_rate": 9.99980011917685e-06, "loss": 0.5008, "step": 725 }, { "epoch": 0.0328581126951799, "grad_norm": 1.0678999576310209, "learning_rate": 9.999793512035751e-06, "loss": 0.4596, "step": 726 }, { "epoch": 0.03290337180357547, "grad_norm": 0.8909248268182276, "learning_rate": 9.999786797464446e-06, "loss": 0.5499, "step": 727 }, { "epoch": 0.03294863091197103, "grad_norm": 0.7216813737007891, "learning_rate": 9.999779975463079e-06, "loss": 0.5702, "step": 728 }, { "epoch": 0.0329938900203666, "grad_norm": 1.0018022162445315, "learning_rate": 9.999773046031795e-06, "loss": 0.5352, "step": 729 }, { "epoch": 0.033039149128762164, "grad_norm": 0.919647573300584, "learning_rate": 9.999766009170743e-06, "loss": 0.4565, "step": 730 }, { "epoch": 0.03308440823715773, "grad_norm": 0.8796753543539122, "learning_rate": 9.999758864880078e-06, "loss": 0.4837, "step": 731 }, { "epoch": 0.033129667345553296, "grad_norm": 0.849129211511892, "learning_rate": 9.999751613159947e-06, "loss": 0.4907, "step": 732 }, { "epoch": 0.03317492645394886, "grad_norm": 0.8562079088711478, "learning_rate": 9.99974425401051e-06, "loss": 0.5033, "step": 733 }, { "epoch": 0.03322018556234442, "grad_norm": 0.8722424149204497, "learning_rate": 9.999736787431927e-06, "loss": 0.4548, "step": 734 }, { "epoch": 0.03326544467073999, "grad_norm": 0.8075078284399309, "learning_rate": 9.999729213424355e-06, "loss": 0.4592, "step": 735 }, { "epoch": 0.03331070377913555, "grad_norm": 0.9474371118555418, "learning_rate": 9.999721531987958e-06, "loss": 0.4876, "step": 736 }, { "epoch": 0.033355962887531114, "grad_norm": 0.9740697650234126, "learning_rate": 9.999713743122898e-06, "loss": 0.5161, "step": 737 }, { "epoch": 0.03340122199592668, "grad_norm": 0.7762903958059998, "learning_rate": 9.999705846829348e-06, "loss": 0.4738, "step": 738 }, { "epoch": 0.033446481104322245, "grad_norm": 0.6913393805537109, "learning_rate": 9.999697843107475e-06, "loss": 0.5282, "step": 739 }, { "epoch": 0.03349174021271781, "grad_norm": 0.9631280627628708, "learning_rate": 9.99968973195745e-06, "loss": 0.4951, "step": 740 }, { "epoch": 0.03353699932111338, "grad_norm": 1.1401384416707492, "learning_rate": 9.999681513379447e-06, "loss": 0.496, "step": 741 }, { "epoch": 0.03358225842950894, "grad_norm": 0.8209194165070912, "learning_rate": 9.999673187373644e-06, "loss": 0.4793, "step": 742 }, { "epoch": 0.0336275175379045, "grad_norm": 0.43857969694489585, "learning_rate": 9.99966475394022e-06, "loss": 0.5692, "step": 743 }, { "epoch": 0.03367277664630007, "grad_norm": 1.0790565795223694, "learning_rate": 9.999656213079356e-06, "loss": 0.4542, "step": 744 }, { "epoch": 0.03371803575469563, "grad_norm": 0.8621474937716063, "learning_rate": 9.999647564791234e-06, "loss": 0.4916, "step": 745 }, { "epoch": 0.033763294863091195, "grad_norm": 0.8492549723137861, "learning_rate": 9.999638809076043e-06, "loss": 0.4985, "step": 746 }, { "epoch": 0.033808553971486764, "grad_norm": 1.0321269679631884, "learning_rate": 9.999629945933967e-06, "loss": 0.5065, "step": 747 }, { "epoch": 0.033853813079882326, "grad_norm": 1.0591850566806524, "learning_rate": 9.9996209753652e-06, "loss": 0.5104, "step": 748 }, { "epoch": 0.03389907218827789, "grad_norm": 0.9046110188284266, "learning_rate": 9.999611897369933e-06, "loss": 0.5013, "step": 749 }, { "epoch": 0.03394433129667346, "grad_norm": 0.8114624134750734, "learning_rate": 9.999602711948362e-06, "loss": 0.4607, "step": 750 }, { "epoch": 0.03398959040506902, "grad_norm": 0.4443264274419598, "learning_rate": 9.999593419100683e-06, "loss": 0.5433, "step": 751 }, { "epoch": 0.03403484951346458, "grad_norm": 0.43502422992640793, "learning_rate": 9.999584018827097e-06, "loss": 0.5734, "step": 752 }, { "epoch": 0.03408010862186015, "grad_norm": 1.2444799823008759, "learning_rate": 9.999574511127806e-06, "loss": 0.5093, "step": 753 }, { "epoch": 0.034125367730255714, "grad_norm": 1.0738710755456755, "learning_rate": 9.999564896003013e-06, "loss": 0.5308, "step": 754 }, { "epoch": 0.034170626838651276, "grad_norm": 0.8692108687564998, "learning_rate": 9.999555173452925e-06, "loss": 0.5181, "step": 755 }, { "epoch": 0.034215885947046845, "grad_norm": 0.9737398693974376, "learning_rate": 9.999545343477752e-06, "loss": 0.4986, "step": 756 }, { "epoch": 0.03426114505544241, "grad_norm": 0.9555525860038144, "learning_rate": 9.999535406077706e-06, "loss": 0.5009, "step": 757 }, { "epoch": 0.03430640416383797, "grad_norm": 0.8457483843791993, "learning_rate": 9.999525361252996e-06, "loss": 0.4735, "step": 758 }, { "epoch": 0.03435166327223354, "grad_norm": 0.9139228689176659, "learning_rate": 9.999515209003842e-06, "loss": 0.459, "step": 759 }, { "epoch": 0.0343969223806291, "grad_norm": 1.0397618730118945, "learning_rate": 9.99950494933046e-06, "loss": 0.477, "step": 760 }, { "epoch": 0.03444218148902466, "grad_norm": 0.6038324799341142, "learning_rate": 9.999494582233074e-06, "loss": 0.5614, "step": 761 }, { "epoch": 0.03448744059742023, "grad_norm": 0.8974012894329598, "learning_rate": 9.999484107711904e-06, "loss": 0.4982, "step": 762 }, { "epoch": 0.034532699705815795, "grad_norm": 1.0877263021222965, "learning_rate": 9.999473525767173e-06, "loss": 0.4917, "step": 763 }, { "epoch": 0.03457795881421136, "grad_norm": 0.7823596745969484, "learning_rate": 9.999462836399112e-06, "loss": 0.4959, "step": 764 }, { "epoch": 0.034623217922606926, "grad_norm": 0.8617310112728173, "learning_rate": 9.999452039607948e-06, "loss": 0.4362, "step": 765 }, { "epoch": 0.03466847703100249, "grad_norm": 0.8658866389167234, "learning_rate": 9.999441135393917e-06, "loss": 0.462, "step": 766 }, { "epoch": 0.03471373613939805, "grad_norm": 0.41883019477992395, "learning_rate": 9.99943012375725e-06, "loss": 0.5354, "step": 767 }, { "epoch": 0.03475899524779362, "grad_norm": 0.813158518464342, "learning_rate": 9.999419004698182e-06, "loss": 0.4495, "step": 768 }, { "epoch": 0.03480425435618918, "grad_norm": 0.8880549031334604, "learning_rate": 9.999407778216957e-06, "loss": 0.4945, "step": 769 }, { "epoch": 0.034849513464584744, "grad_norm": 0.9003853192572606, "learning_rate": 9.999396444313811e-06, "loss": 0.5068, "step": 770 }, { "epoch": 0.034894772572980313, "grad_norm": 0.8752619001869574, "learning_rate": 9.99938500298899e-06, "loss": 0.4964, "step": 771 }, { "epoch": 0.034940031681375876, "grad_norm": 0.8271100830472271, "learning_rate": 9.99937345424274e-06, "loss": 0.4564, "step": 772 }, { "epoch": 0.034985290789771445, "grad_norm": 0.7203352211017896, "learning_rate": 9.99936179807531e-06, "loss": 0.4577, "step": 773 }, { "epoch": 0.03503054989816701, "grad_norm": 0.8500740344609287, "learning_rate": 9.999350034486948e-06, "loss": 0.4508, "step": 774 }, { "epoch": 0.03507580900656257, "grad_norm": 0.39547626177355233, "learning_rate": 9.99933816347791e-06, "loss": 0.5338, "step": 775 }, { "epoch": 0.03512106811495814, "grad_norm": 0.9311817658265301, "learning_rate": 9.999326185048447e-06, "loss": 0.5512, "step": 776 }, { "epoch": 0.0351663272233537, "grad_norm": 0.8800233195349324, "learning_rate": 9.99931409919882e-06, "loss": 0.5399, "step": 777 }, { "epoch": 0.03521158633174926, "grad_norm": 0.3313894794355621, "learning_rate": 9.999301905929286e-06, "loss": 0.5335, "step": 778 }, { "epoch": 0.03525684544014483, "grad_norm": 0.8317066378641015, "learning_rate": 9.999289605240109e-06, "loss": 0.4468, "step": 779 }, { "epoch": 0.035302104548540394, "grad_norm": 0.7974651494986721, "learning_rate": 9.999277197131551e-06, "loss": 0.4734, "step": 780 }, { "epoch": 0.03534736365693596, "grad_norm": 0.7944228025196886, "learning_rate": 9.999264681603881e-06, "loss": 0.5195, "step": 781 }, { "epoch": 0.035392622765331526, "grad_norm": 0.8433968983805096, "learning_rate": 9.99925205865737e-06, "loss": 0.4342, "step": 782 }, { "epoch": 0.03543788187372709, "grad_norm": 0.8021298240348729, "learning_rate": 9.999239328292283e-06, "loss": 0.5107, "step": 783 }, { "epoch": 0.03548314098212265, "grad_norm": 0.9387637066650226, "learning_rate": 9.999226490508897e-06, "loss": 0.4739, "step": 784 }, { "epoch": 0.03552840009051822, "grad_norm": 0.4872050722136279, "learning_rate": 9.999213545307488e-06, "loss": 0.554, "step": 785 }, { "epoch": 0.03557365919891378, "grad_norm": 0.9012196342130966, "learning_rate": 9.999200492688334e-06, "loss": 0.5362, "step": 786 }, { "epoch": 0.035618918307309344, "grad_norm": 0.9506473252668751, "learning_rate": 9.999187332651716e-06, "loss": 0.4624, "step": 787 }, { "epoch": 0.03566417741570491, "grad_norm": 0.8901086070418615, "learning_rate": 9.999174065197916e-06, "loss": 0.4634, "step": 788 }, { "epoch": 0.035709436524100475, "grad_norm": 0.36537172901012394, "learning_rate": 9.999160690327218e-06, "loss": 0.5775, "step": 789 }, { "epoch": 0.03575469563249604, "grad_norm": 1.3411043185866383, "learning_rate": 9.999147208039912e-06, "loss": 0.4727, "step": 790 }, { "epoch": 0.03579995474089161, "grad_norm": 0.9409963730058601, "learning_rate": 9.999133618336285e-06, "loss": 0.523, "step": 791 }, { "epoch": 0.03584521384928717, "grad_norm": 0.377734894810001, "learning_rate": 9.99911992121663e-06, "loss": 0.5649, "step": 792 }, { "epoch": 0.03589047295768273, "grad_norm": 1.0227197057908568, "learning_rate": 9.999106116681243e-06, "loss": 0.5063, "step": 793 }, { "epoch": 0.0359357320660783, "grad_norm": 0.3577445076404578, "learning_rate": 9.999092204730418e-06, "loss": 0.5136, "step": 794 }, { "epoch": 0.03598099117447386, "grad_norm": 0.8851794694546333, "learning_rate": 9.999078185364455e-06, "loss": 0.4668, "step": 795 }, { "epoch": 0.036026250282869425, "grad_norm": 0.822351255754194, "learning_rate": 9.999064058583657e-06, "loss": 0.5141, "step": 796 }, { "epoch": 0.036071509391264994, "grad_norm": 0.8727820448841807, "learning_rate": 9.999049824388324e-06, "loss": 0.5556, "step": 797 }, { "epoch": 0.036116768499660556, "grad_norm": 0.9039671640308168, "learning_rate": 9.999035482778764e-06, "loss": 0.5346, "step": 798 }, { "epoch": 0.03616202760805612, "grad_norm": 0.8457605924012196, "learning_rate": 9.999021033755286e-06, "loss": 0.5, "step": 799 }, { "epoch": 0.03620728671645169, "grad_norm": 0.9032493102294806, "learning_rate": 9.999006477318197e-06, "loss": 0.4503, "step": 800 }, { "epoch": 0.03625254582484725, "grad_norm": 0.7868413613589126, "learning_rate": 9.998991813467814e-06, "loss": 0.4662, "step": 801 }, { "epoch": 0.03629780493324281, "grad_norm": 0.93526853556894, "learning_rate": 9.998977042204449e-06, "loss": 0.4836, "step": 802 }, { "epoch": 0.03634306404163838, "grad_norm": 0.8455875935885705, "learning_rate": 9.998962163528421e-06, "loss": 0.4806, "step": 803 }, { "epoch": 0.036388323150033944, "grad_norm": 0.8344451586941963, "learning_rate": 9.998947177440048e-06, "loss": 0.4291, "step": 804 }, { "epoch": 0.036433582258429506, "grad_norm": 0.8522693202810683, "learning_rate": 9.998932083939657e-06, "loss": 0.5004, "step": 805 }, { "epoch": 0.036478841366825075, "grad_norm": 0.905163368072537, "learning_rate": 9.998916883027565e-06, "loss": 0.4948, "step": 806 }, { "epoch": 0.03652410047522064, "grad_norm": 0.73147243210702, "learning_rate": 9.998901574704102e-06, "loss": 0.4602, "step": 807 }, { "epoch": 0.0365693595836162, "grad_norm": 0.4513364849291583, "learning_rate": 9.9988861589696e-06, "loss": 0.5818, "step": 808 }, { "epoch": 0.03661461869201177, "grad_norm": 0.40403113807498325, "learning_rate": 9.998870635824385e-06, "loss": 0.5678, "step": 809 }, { "epoch": 0.03665987780040733, "grad_norm": 1.07302313691336, "learning_rate": 9.998855005268794e-06, "loss": 0.495, "step": 810 }, { "epoch": 0.03670513690880289, "grad_norm": 0.3781018473275081, "learning_rate": 9.998839267303163e-06, "loss": 0.5311, "step": 811 }, { "epoch": 0.03675039601719846, "grad_norm": 0.8388566810721623, "learning_rate": 9.998823421927826e-06, "loss": 0.4647, "step": 812 }, { "epoch": 0.036795655125594025, "grad_norm": 0.83966169332251, "learning_rate": 9.998807469143129e-06, "loss": 0.4687, "step": 813 }, { "epoch": 0.03684091423398959, "grad_norm": 0.860897967280018, "learning_rate": 9.998791408949408e-06, "loss": 0.4837, "step": 814 }, { "epoch": 0.036886173342385156, "grad_norm": 0.8062004578592479, "learning_rate": 9.998775241347017e-06, "loss": 0.4928, "step": 815 }, { "epoch": 0.03693143245078072, "grad_norm": 0.7764292399554144, "learning_rate": 9.998758966336296e-06, "loss": 0.4549, "step": 816 }, { "epoch": 0.03697669155917629, "grad_norm": 0.7996994905115854, "learning_rate": 9.998742583917598e-06, "loss": 0.4537, "step": 817 }, { "epoch": 0.03702195066757185, "grad_norm": 0.8265343193477142, "learning_rate": 9.998726094091275e-06, "loss": 0.4563, "step": 818 }, { "epoch": 0.03706720977596741, "grad_norm": 0.82518409601697, "learning_rate": 9.99870949685768e-06, "loss": 0.5115, "step": 819 }, { "epoch": 0.03711246888436298, "grad_norm": 0.8645585275224266, "learning_rate": 9.99869279221717e-06, "loss": 0.4773, "step": 820 }, { "epoch": 0.037157727992758544, "grad_norm": 0.8706170286664132, "learning_rate": 9.998675980170106e-06, "loss": 0.4519, "step": 821 }, { "epoch": 0.037202987101154106, "grad_norm": 0.7940021232898752, "learning_rate": 9.998659060716844e-06, "loss": 0.4296, "step": 822 }, { "epoch": 0.037248246209549675, "grad_norm": 0.8167594306281625, "learning_rate": 9.998642033857753e-06, "loss": 0.4646, "step": 823 }, { "epoch": 0.03729350531794524, "grad_norm": 1.7764689686555373, "learning_rate": 9.998624899593197e-06, "loss": 0.475, "step": 824 }, { "epoch": 0.0373387644263408, "grad_norm": 0.8476519593212136, "learning_rate": 9.998607657923545e-06, "loss": 0.4667, "step": 825 }, { "epoch": 0.03738402353473637, "grad_norm": 0.6990683330559072, "learning_rate": 9.998590308849164e-06, "loss": 0.57, "step": 826 }, { "epoch": 0.03742928264313193, "grad_norm": 0.7878434383789473, "learning_rate": 9.998572852370432e-06, "loss": 0.4403, "step": 827 }, { "epoch": 0.03747454175152749, "grad_norm": 0.8452597507527643, "learning_rate": 9.998555288487719e-06, "loss": 0.4564, "step": 828 }, { "epoch": 0.03751980085992306, "grad_norm": 0.8291505593866499, "learning_rate": 9.998537617201405e-06, "loss": 0.4431, "step": 829 }, { "epoch": 0.037565059968318625, "grad_norm": 0.8739561479854944, "learning_rate": 9.998519838511872e-06, "loss": 0.5116, "step": 830 }, { "epoch": 0.03761031907671419, "grad_norm": 0.8685439340711074, "learning_rate": 9.998501952419496e-06, "loss": 0.4549, "step": 831 }, { "epoch": 0.037655578185109756, "grad_norm": 0.8176380913106567, "learning_rate": 9.998483958924666e-06, "loss": 0.4933, "step": 832 }, { "epoch": 0.03770083729350532, "grad_norm": 0.5896711368819945, "learning_rate": 9.998465858027769e-06, "loss": 0.5262, "step": 833 }, { "epoch": 0.03774609640190088, "grad_norm": 0.9204416208370488, "learning_rate": 9.99844764972919e-06, "loss": 0.4775, "step": 834 }, { "epoch": 0.03779135551029645, "grad_norm": 0.8974279098852376, "learning_rate": 9.998429334029323e-06, "loss": 0.438, "step": 835 }, { "epoch": 0.03783661461869201, "grad_norm": 0.4566914236668775, "learning_rate": 9.998410910928562e-06, "loss": 0.5537, "step": 836 }, { "epoch": 0.037881873727087574, "grad_norm": 0.9354308373334939, "learning_rate": 9.998392380427302e-06, "loss": 0.473, "step": 837 }, { "epoch": 0.03792713283548314, "grad_norm": 0.9415680530950489, "learning_rate": 9.998373742525941e-06, "loss": 0.4528, "step": 838 }, { "epoch": 0.037972391943878706, "grad_norm": 0.8566551875156379, "learning_rate": 9.998354997224879e-06, "loss": 0.4451, "step": 839 }, { "epoch": 0.03801765105227427, "grad_norm": 0.8394095318186099, "learning_rate": 9.998336144524521e-06, "loss": 0.488, "step": 840 }, { "epoch": 0.03806291016066984, "grad_norm": 0.8683511332108413, "learning_rate": 9.998317184425268e-06, "loss": 0.5012, "step": 841 }, { "epoch": 0.0381081692690654, "grad_norm": 0.8028785010939439, "learning_rate": 9.998298116927532e-06, "loss": 0.5156, "step": 842 }, { "epoch": 0.03815342837746096, "grad_norm": 0.9152126508013158, "learning_rate": 9.99827894203172e-06, "loss": 0.5676, "step": 843 }, { "epoch": 0.03819868748585653, "grad_norm": 0.9331450310430767, "learning_rate": 9.998259659738243e-06, "loss": 0.5213, "step": 844 }, { "epoch": 0.03824394659425209, "grad_norm": 0.8624765340420999, "learning_rate": 9.998240270047519e-06, "loss": 0.47, "step": 845 }, { "epoch": 0.038289205702647655, "grad_norm": 0.9051223838196641, "learning_rate": 9.998220772959962e-06, "loss": 0.5183, "step": 846 }, { "epoch": 0.038334464811043224, "grad_norm": 0.38497024559095944, "learning_rate": 9.998201168475991e-06, "loss": 0.563, "step": 847 }, { "epoch": 0.038379723919438787, "grad_norm": 1.0886459904950612, "learning_rate": 9.998181456596027e-06, "loss": 0.4387, "step": 848 }, { "epoch": 0.03842498302783435, "grad_norm": 0.906151568063953, "learning_rate": 9.998161637320495e-06, "loss": 0.5013, "step": 849 }, { "epoch": 0.03847024213622992, "grad_norm": 0.8703851030575666, "learning_rate": 9.998141710649822e-06, "loss": 0.4966, "step": 850 }, { "epoch": 0.03851550124462548, "grad_norm": 0.8775907858377049, "learning_rate": 9.998121676584432e-06, "loss": 0.4399, "step": 851 }, { "epoch": 0.03856076035302104, "grad_norm": 0.5793465331166096, "learning_rate": 9.998101535124758e-06, "loss": 0.5776, "step": 852 }, { "epoch": 0.03860601946141661, "grad_norm": 0.8748602831935688, "learning_rate": 9.998081286271234e-06, "loss": 0.5187, "step": 853 }, { "epoch": 0.038651278569812174, "grad_norm": 0.7891627206628827, "learning_rate": 9.99806093002429e-06, "loss": 0.5106, "step": 854 }, { "epoch": 0.038696537678207736, "grad_norm": 0.804830597119037, "learning_rate": 9.99804046638437e-06, "loss": 0.4622, "step": 855 }, { "epoch": 0.038741796786603305, "grad_norm": 0.8470820634732646, "learning_rate": 9.99801989535191e-06, "loss": 0.4602, "step": 856 }, { "epoch": 0.03878705589499887, "grad_norm": 0.7899585020497784, "learning_rate": 9.997999216927352e-06, "loss": 0.4819, "step": 857 }, { "epoch": 0.03883231500339443, "grad_norm": 0.7852368236713659, "learning_rate": 9.997978431111142e-06, "loss": 0.4689, "step": 858 }, { "epoch": 0.03887757411179, "grad_norm": 0.7943676416848869, "learning_rate": 9.997957537903727e-06, "loss": 0.4799, "step": 859 }, { "epoch": 0.03892283322018556, "grad_norm": 0.892529635684416, "learning_rate": 9.997936537305551e-06, "loss": 0.4807, "step": 860 }, { "epoch": 0.03896809232858113, "grad_norm": 0.45745492269747656, "learning_rate": 9.997915429317071e-06, "loss": 0.5707, "step": 861 }, { "epoch": 0.03901335143697669, "grad_norm": 0.8100899395043848, "learning_rate": 9.997894213938738e-06, "loss": 0.4792, "step": 862 }, { "epoch": 0.039058610545372255, "grad_norm": 0.8251870779862273, "learning_rate": 9.997872891171009e-06, "loss": 0.5256, "step": 863 }, { "epoch": 0.039103869653767824, "grad_norm": 0.9052449244530871, "learning_rate": 9.99785146101434e-06, "loss": 0.46, "step": 864 }, { "epoch": 0.039149128762163386, "grad_norm": 0.8233054083332147, "learning_rate": 9.997829923469194e-06, "loss": 0.5034, "step": 865 }, { "epoch": 0.03919438787055895, "grad_norm": 0.7663398755861442, "learning_rate": 9.997808278536032e-06, "loss": 0.4548, "step": 866 }, { "epoch": 0.03923964697895452, "grad_norm": 0.8238704523328892, "learning_rate": 9.99778652621532e-06, "loss": 0.51, "step": 867 }, { "epoch": 0.03928490608735008, "grad_norm": 0.4089041222135545, "learning_rate": 9.997764666507523e-06, "loss": 0.5214, "step": 868 }, { "epoch": 0.03933016519574564, "grad_norm": 0.8337966943875073, "learning_rate": 9.997742699413115e-06, "loss": 0.448, "step": 869 }, { "epoch": 0.03937542430414121, "grad_norm": 0.3718911507575922, "learning_rate": 9.997720624932566e-06, "loss": 0.5238, "step": 870 }, { "epoch": 0.039420683412536774, "grad_norm": 0.8330502280175536, "learning_rate": 9.99769844306635e-06, "loss": 0.492, "step": 871 }, { "epoch": 0.039465942520932336, "grad_norm": 0.8869907730560409, "learning_rate": 9.997676153814944e-06, "loss": 0.523, "step": 872 }, { "epoch": 0.039511201629327905, "grad_norm": 0.8564115335442091, "learning_rate": 9.997653757178824e-06, "loss": 0.5044, "step": 873 }, { "epoch": 0.03955646073772347, "grad_norm": 0.3409783012433021, "learning_rate": 9.997631253158477e-06, "loss": 0.5594, "step": 874 }, { "epoch": 0.03960171984611903, "grad_norm": 0.8857825685787568, "learning_rate": 9.997608641754381e-06, "loss": 0.477, "step": 875 }, { "epoch": 0.0396469789545146, "grad_norm": 0.40045123455066095, "learning_rate": 9.997585922967026e-06, "loss": 0.5545, "step": 876 }, { "epoch": 0.03969223806291016, "grad_norm": 0.3827260476019697, "learning_rate": 9.997563096796899e-06, "loss": 0.5172, "step": 877 }, { "epoch": 0.03973749717130572, "grad_norm": 0.34473254672516823, "learning_rate": 9.997540163244487e-06, "loss": 0.5662, "step": 878 }, { "epoch": 0.03978275627970129, "grad_norm": 0.3722334175738685, "learning_rate": 9.997517122310287e-06, "loss": 0.553, "step": 879 }, { "epoch": 0.039828015388096855, "grad_norm": 1.1596370622984065, "learning_rate": 9.997493973994793e-06, "loss": 0.4872, "step": 880 }, { "epoch": 0.03987327449649242, "grad_norm": 0.9422109365205645, "learning_rate": 9.997470718298503e-06, "loss": 0.5366, "step": 881 }, { "epoch": 0.039918533604887986, "grad_norm": 0.9376681347956652, "learning_rate": 9.997447355221915e-06, "loss": 0.5092, "step": 882 }, { "epoch": 0.03996379271328355, "grad_norm": 0.9194276554143219, "learning_rate": 9.997423884765532e-06, "loss": 0.5064, "step": 883 }, { "epoch": 0.04000905182167911, "grad_norm": 0.5705723711046609, "learning_rate": 9.99740030692986e-06, "loss": 0.5557, "step": 884 }, { "epoch": 0.04005431093007468, "grad_norm": 1.1046214239155359, "learning_rate": 9.9973766217154e-06, "loss": 0.4448, "step": 885 }, { "epoch": 0.04009957003847024, "grad_norm": 0.8280146005762715, "learning_rate": 9.997352829122667e-06, "loss": 0.4865, "step": 886 }, { "epoch": 0.040144829146865804, "grad_norm": 0.8686670011300119, "learning_rate": 9.99732892915217e-06, "loss": 0.4488, "step": 887 }, { "epoch": 0.04019008825526137, "grad_norm": 0.43280572475101964, "learning_rate": 9.99730492180442e-06, "loss": 0.5701, "step": 888 }, { "epoch": 0.040235347363656936, "grad_norm": 0.8997349614022049, "learning_rate": 9.997280807079938e-06, "loss": 0.4958, "step": 889 }, { "epoch": 0.0402806064720525, "grad_norm": 0.8726634607887849, "learning_rate": 9.997256584979239e-06, "loss": 0.4799, "step": 890 }, { "epoch": 0.04032586558044807, "grad_norm": 0.4248470639109258, "learning_rate": 9.997232255502842e-06, "loss": 0.5752, "step": 891 }, { "epoch": 0.04037112468884363, "grad_norm": 0.8720384694863267, "learning_rate": 9.997207818651273e-06, "loss": 0.5101, "step": 892 }, { "epoch": 0.04041638379723919, "grad_norm": 0.9066900693499089, "learning_rate": 9.997183274425058e-06, "loss": 0.4938, "step": 893 }, { "epoch": 0.04046164290563476, "grad_norm": 0.9292081807321683, "learning_rate": 9.997158622824719e-06, "loss": 0.4438, "step": 894 }, { "epoch": 0.04050690201403032, "grad_norm": 1.057943792521059, "learning_rate": 9.99713386385079e-06, "loss": 0.4396, "step": 895 }, { "epoch": 0.040552161122425885, "grad_norm": 0.44618197731567005, "learning_rate": 9.9971089975038e-06, "loss": 0.5388, "step": 896 }, { "epoch": 0.040597420230821454, "grad_norm": 1.0347025476143397, "learning_rate": 9.997084023784286e-06, "loss": 0.4858, "step": 897 }, { "epoch": 0.04064267933921702, "grad_norm": 0.9476775847258264, "learning_rate": 9.997058942692786e-06, "loss": 0.4632, "step": 898 }, { "epoch": 0.04068793844761258, "grad_norm": 0.7912189795373192, "learning_rate": 9.997033754229835e-06, "loss": 0.4905, "step": 899 }, { "epoch": 0.04073319755600815, "grad_norm": 0.9864123150456614, "learning_rate": 9.997008458395975e-06, "loss": 0.4605, "step": 900 }, { "epoch": 0.04077845666440371, "grad_norm": 0.40215490381501273, "learning_rate": 9.996983055191752e-06, "loss": 0.551, "step": 901 }, { "epoch": 0.04082371577279927, "grad_norm": 0.3822696497597169, "learning_rate": 9.99695754461771e-06, "loss": 0.5483, "step": 902 }, { "epoch": 0.04086897488119484, "grad_norm": 1.0110205632233296, "learning_rate": 9.996931926674396e-06, "loss": 0.4921, "step": 903 }, { "epoch": 0.040914233989590404, "grad_norm": 0.9296152330438265, "learning_rate": 9.996906201362361e-06, "loss": 0.5194, "step": 904 }, { "epoch": 0.04095949309798597, "grad_norm": 0.8565404745687755, "learning_rate": 9.99688036868216e-06, "loss": 0.4809, "step": 905 }, { "epoch": 0.041004752206381535, "grad_norm": 0.8590242523016869, "learning_rate": 9.996854428634348e-06, "loss": 0.4932, "step": 906 }, { "epoch": 0.0410500113147771, "grad_norm": 0.8493136418082664, "learning_rate": 9.996828381219479e-06, "loss": 0.4946, "step": 907 }, { "epoch": 0.04109527042317267, "grad_norm": 0.6124772508865317, "learning_rate": 9.996802226438117e-06, "loss": 0.5735, "step": 908 }, { "epoch": 0.04114052953156823, "grad_norm": 0.8954342427228704, "learning_rate": 9.996775964290819e-06, "loss": 0.4609, "step": 909 }, { "epoch": 0.04118578863996379, "grad_norm": 0.8279174198415556, "learning_rate": 9.996749594778153e-06, "loss": 0.4952, "step": 910 }, { "epoch": 0.04123104774835936, "grad_norm": 0.3864797621662077, "learning_rate": 9.996723117900684e-06, "loss": 0.5361, "step": 911 }, { "epoch": 0.04127630685675492, "grad_norm": 0.900106757577596, "learning_rate": 9.996696533658981e-06, "loss": 0.4824, "step": 912 }, { "epoch": 0.041321565965150485, "grad_norm": 0.8550971074550466, "learning_rate": 9.996669842053617e-06, "loss": 0.4586, "step": 913 }, { "epoch": 0.041366825073546054, "grad_norm": 0.9222917536345409, "learning_rate": 9.996643043085164e-06, "loss": 0.4704, "step": 914 }, { "epoch": 0.041412084181941616, "grad_norm": 0.8242120274667823, "learning_rate": 9.996616136754198e-06, "loss": 0.4926, "step": 915 }, { "epoch": 0.04145734329033718, "grad_norm": 0.7706995384843042, "learning_rate": 9.996589123061297e-06, "loss": 0.4383, "step": 916 }, { "epoch": 0.04150260239873275, "grad_norm": 0.7484393968818605, "learning_rate": 9.996562002007042e-06, "loss": 0.4811, "step": 917 }, { "epoch": 0.04154786150712831, "grad_norm": 0.5945477305149327, "learning_rate": 9.996534773592016e-06, "loss": 0.561, "step": 918 }, { "epoch": 0.04159312061552387, "grad_norm": 0.88348900798707, "learning_rate": 9.9965074378168e-06, "loss": 0.4753, "step": 919 }, { "epoch": 0.04163837972391944, "grad_norm": 0.843212052146494, "learning_rate": 9.996479994681989e-06, "loss": 0.4554, "step": 920 }, { "epoch": 0.041683638832315004, "grad_norm": 0.8267504174103804, "learning_rate": 9.996452444188166e-06, "loss": 0.446, "step": 921 }, { "epoch": 0.041728897940710566, "grad_norm": 0.39741826226838095, "learning_rate": 9.996424786335925e-06, "loss": 0.5399, "step": 922 }, { "epoch": 0.041774157049106135, "grad_norm": 0.3831494947571296, "learning_rate": 9.996397021125862e-06, "loss": 0.5416, "step": 923 }, { "epoch": 0.0418194161575017, "grad_norm": 0.8422075131373115, "learning_rate": 9.996369148558573e-06, "loss": 0.449, "step": 924 }, { "epoch": 0.04186467526589726, "grad_norm": 0.8486980902404835, "learning_rate": 9.996341168634653e-06, "loss": 0.4113, "step": 925 }, { "epoch": 0.04190993437429283, "grad_norm": 0.889356449999675, "learning_rate": 9.99631308135471e-06, "loss": 0.4738, "step": 926 }, { "epoch": 0.04195519348268839, "grad_norm": 0.8520583531149387, "learning_rate": 9.996284886719342e-06, "loss": 0.508, "step": 927 }, { "epoch": 0.04200045259108395, "grad_norm": 0.8429515410229458, "learning_rate": 9.996256584729157e-06, "loss": 0.4892, "step": 928 }, { "epoch": 0.04204571169947952, "grad_norm": 0.8478899080075085, "learning_rate": 9.996228175384764e-06, "loss": 0.5311, "step": 929 }, { "epoch": 0.042090970807875085, "grad_norm": 0.6460493207491624, "learning_rate": 9.996199658686769e-06, "loss": 0.561, "step": 930 }, { "epoch": 0.04213622991627065, "grad_norm": 0.7654622141397179, "learning_rate": 9.99617103463579e-06, "loss": 0.4714, "step": 931 }, { "epoch": 0.042181489024666216, "grad_norm": 1.0725569589615345, "learning_rate": 9.99614230323244e-06, "loss": 0.4912, "step": 932 }, { "epoch": 0.04222674813306178, "grad_norm": 0.7822955176560418, "learning_rate": 9.996113464477337e-06, "loss": 0.4458, "step": 933 }, { "epoch": 0.04227200724145734, "grad_norm": 0.803557154231793, "learning_rate": 9.996084518371101e-06, "loss": 0.5008, "step": 934 }, { "epoch": 0.04231726634985291, "grad_norm": 0.8629657209874251, "learning_rate": 9.996055464914351e-06, "loss": 0.4699, "step": 935 }, { "epoch": 0.04236252545824847, "grad_norm": 0.9756308146322324, "learning_rate": 9.996026304107713e-06, "loss": 0.4988, "step": 936 }, { "epoch": 0.042407784566644034, "grad_norm": 0.8561456799235027, "learning_rate": 9.995997035951816e-06, "loss": 0.4693, "step": 937 }, { "epoch": 0.0424530436750396, "grad_norm": 0.8924522605663066, "learning_rate": 9.995967660447285e-06, "loss": 0.4863, "step": 938 }, { "epoch": 0.042498302783435166, "grad_norm": 0.8418644621286018, "learning_rate": 9.995938177594753e-06, "loss": 0.4681, "step": 939 }, { "epoch": 0.04254356189183073, "grad_norm": 1.2365889696528232, "learning_rate": 9.995908587394854e-06, "loss": 0.469, "step": 940 }, { "epoch": 0.0425888210002263, "grad_norm": 0.6024320407477483, "learning_rate": 9.995878889848223e-06, "loss": 0.5168, "step": 941 }, { "epoch": 0.04263408010862186, "grad_norm": 0.9491137737305888, "learning_rate": 9.995849084955498e-06, "loss": 0.4466, "step": 942 }, { "epoch": 0.04267933921701742, "grad_norm": 0.851853575868263, "learning_rate": 9.99581917271732e-06, "loss": 0.4709, "step": 943 }, { "epoch": 0.04272459832541299, "grad_norm": 0.8028920300451445, "learning_rate": 9.995789153134333e-06, "loss": 0.4656, "step": 944 }, { "epoch": 0.04276985743380855, "grad_norm": 0.4079955958058016, "learning_rate": 9.995759026207179e-06, "loss": 0.5244, "step": 945 }, { "epoch": 0.042815116542204115, "grad_norm": 0.4484634107614797, "learning_rate": 9.995728791936505e-06, "loss": 0.5344, "step": 946 }, { "epoch": 0.042860375650599684, "grad_norm": 0.906131738275771, "learning_rate": 9.995698450322965e-06, "loss": 0.4561, "step": 947 }, { "epoch": 0.04290563475899525, "grad_norm": 1.0408129265137542, "learning_rate": 9.995668001367208e-06, "loss": 0.4295, "step": 948 }, { "epoch": 0.042950893867390816, "grad_norm": 0.8137121642370845, "learning_rate": 9.995637445069889e-06, "loss": 0.4805, "step": 949 }, { "epoch": 0.04299615297578638, "grad_norm": 0.8978674731733924, "learning_rate": 9.995606781431664e-06, "loss": 0.5397, "step": 950 }, { "epoch": 0.04304141208418194, "grad_norm": 0.8323695041137925, "learning_rate": 9.99557601045319e-06, "loss": 0.4466, "step": 951 }, { "epoch": 0.04308667119257751, "grad_norm": 0.8388981810246476, "learning_rate": 9.995545132135133e-06, "loss": 0.4916, "step": 952 }, { "epoch": 0.04313193030097307, "grad_norm": 0.5734877288235656, "learning_rate": 9.995514146478152e-06, "loss": 0.5394, "step": 953 }, { "epoch": 0.043177189409368634, "grad_norm": 0.9670137618853054, "learning_rate": 9.995483053482917e-06, "loss": 0.4966, "step": 954 }, { "epoch": 0.0432224485177642, "grad_norm": 0.8820830424535521, "learning_rate": 9.995451853150091e-06, "loss": 0.4859, "step": 955 }, { "epoch": 0.043267707626159765, "grad_norm": 0.7973854009399443, "learning_rate": 9.995420545480349e-06, "loss": 0.512, "step": 956 }, { "epoch": 0.04331296673455533, "grad_norm": 0.8844927712303589, "learning_rate": 9.99538913047436e-06, "loss": 0.4833, "step": 957 }, { "epoch": 0.0433582258429509, "grad_norm": 0.8569865302662765, "learning_rate": 9.9953576081328e-06, "loss": 0.4711, "step": 958 }, { "epoch": 0.04340348495134646, "grad_norm": 0.8153974165603668, "learning_rate": 9.995325978456349e-06, "loss": 0.439, "step": 959 }, { "epoch": 0.04344874405974202, "grad_norm": 0.8034190547420145, "learning_rate": 9.995294241445685e-06, "loss": 0.445, "step": 960 }, { "epoch": 0.04349400316813759, "grad_norm": 0.8022765956843442, "learning_rate": 9.995262397101489e-06, "loss": 0.4781, "step": 961 }, { "epoch": 0.04353926227653315, "grad_norm": 0.5816020147571523, "learning_rate": 9.995230445424446e-06, "loss": 0.544, "step": 962 }, { "epoch": 0.043584521384928715, "grad_norm": 0.9986172227674865, "learning_rate": 9.995198386415241e-06, "loss": 0.4766, "step": 963 }, { "epoch": 0.043629780493324284, "grad_norm": 0.791646512962987, "learning_rate": 9.995166220074566e-06, "loss": 0.4591, "step": 964 }, { "epoch": 0.043675039601719846, "grad_norm": 0.8885119876985477, "learning_rate": 9.995133946403111e-06, "loss": 0.4547, "step": 965 }, { "epoch": 0.04372029871011541, "grad_norm": 0.4175285658178469, "learning_rate": 9.995101565401566e-06, "loss": 0.5103, "step": 966 }, { "epoch": 0.04376555781851098, "grad_norm": 0.44806347188990125, "learning_rate": 9.995069077070632e-06, "loss": 0.5563, "step": 967 }, { "epoch": 0.04381081692690654, "grad_norm": 1.2349779804925443, "learning_rate": 9.995036481411005e-06, "loss": 0.492, "step": 968 }, { "epoch": 0.0438560760353021, "grad_norm": 0.9400022460617656, "learning_rate": 9.995003778423383e-06, "loss": 0.4622, "step": 969 }, { "epoch": 0.04390133514369767, "grad_norm": 0.42943118298688115, "learning_rate": 9.994970968108473e-06, "loss": 0.536, "step": 970 }, { "epoch": 0.043946594252093234, "grad_norm": 1.161378142208096, "learning_rate": 9.994938050466976e-06, "loss": 0.4773, "step": 971 }, { "epoch": 0.043991853360488796, "grad_norm": 1.1044222313320406, "learning_rate": 9.994905025499602e-06, "loss": 0.5134, "step": 972 }, { "epoch": 0.044037112468884365, "grad_norm": 0.8840976288998802, "learning_rate": 9.994871893207058e-06, "loss": 0.4961, "step": 973 }, { "epoch": 0.04408237157727993, "grad_norm": 0.9243901215383543, "learning_rate": 9.99483865359006e-06, "loss": 0.5218, "step": 974 }, { "epoch": 0.04412763068567549, "grad_norm": 0.924911837548586, "learning_rate": 9.99480530664932e-06, "loss": 0.464, "step": 975 }, { "epoch": 0.04417288979407106, "grad_norm": 0.875085936045096, "learning_rate": 9.994771852385552e-06, "loss": 0.4863, "step": 976 }, { "epoch": 0.04421814890246662, "grad_norm": 0.8127041858417101, "learning_rate": 9.994738290799479e-06, "loss": 0.4658, "step": 977 }, { "epoch": 0.04426340801086218, "grad_norm": 0.5493520559000986, "learning_rate": 9.99470462189182e-06, "loss": 0.5463, "step": 978 }, { "epoch": 0.04430866711925775, "grad_norm": 1.02408537343092, "learning_rate": 9.994670845663297e-06, "loss": 0.463, "step": 979 }, { "epoch": 0.044353926227653315, "grad_norm": 0.916174894899694, "learning_rate": 9.99463696211464e-06, "loss": 0.4892, "step": 980 }, { "epoch": 0.04439918533604888, "grad_norm": 0.8152376323392234, "learning_rate": 9.994602971246573e-06, "loss": 0.4441, "step": 981 }, { "epoch": 0.044444444444444446, "grad_norm": 0.8328042792025057, "learning_rate": 9.994568873059829e-06, "loss": 0.4403, "step": 982 }, { "epoch": 0.04448970355284001, "grad_norm": 2.0243382061600372, "learning_rate": 9.994534667555138e-06, "loss": 0.5063, "step": 983 }, { "epoch": 0.04453496266123557, "grad_norm": 0.8762795255672268, "learning_rate": 9.994500354733238e-06, "loss": 0.4542, "step": 984 }, { "epoch": 0.04458022176963114, "grad_norm": 0.4677123917860248, "learning_rate": 9.994465934594863e-06, "loss": 0.5622, "step": 985 }, { "epoch": 0.0446254808780267, "grad_norm": 1.0928191852342053, "learning_rate": 9.994431407140757e-06, "loss": 0.4326, "step": 986 }, { "epoch": 0.044670739986422264, "grad_norm": 0.8365114456102918, "learning_rate": 9.994396772371658e-06, "loss": 0.4831, "step": 987 }, { "epoch": 0.044715999094817833, "grad_norm": 0.40524873724140714, "learning_rate": 9.994362030288312e-06, "loss": 0.5172, "step": 988 }, { "epoch": 0.044761258203213396, "grad_norm": 0.7934455189770376, "learning_rate": 9.994327180891462e-06, "loss": 0.4512, "step": 989 }, { "epoch": 0.04480651731160896, "grad_norm": 0.43443711211319097, "learning_rate": 9.994292224181864e-06, "loss": 0.5455, "step": 990 }, { "epoch": 0.04485177642000453, "grad_norm": 1.078608052597576, "learning_rate": 9.994257160160263e-06, "loss": 0.4939, "step": 991 }, { "epoch": 0.04489703552840009, "grad_norm": 0.8074285900663641, "learning_rate": 9.994221988827415e-06, "loss": 0.4862, "step": 992 }, { "epoch": 0.04494229463679566, "grad_norm": 0.790319743842942, "learning_rate": 9.994186710184073e-06, "loss": 0.43, "step": 993 }, { "epoch": 0.04498755374519122, "grad_norm": 0.8873418220441408, "learning_rate": 9.994151324231e-06, "loss": 0.4658, "step": 994 }, { "epoch": 0.04503281285358678, "grad_norm": 0.8225925871404844, "learning_rate": 9.994115830968951e-06, "loss": 0.417, "step": 995 }, { "epoch": 0.04507807196198235, "grad_norm": 0.8076197995626312, "learning_rate": 9.994080230398693e-06, "loss": 0.474, "step": 996 }, { "epoch": 0.045123331070377914, "grad_norm": 0.8566188848933955, "learning_rate": 9.994044522520988e-06, "loss": 0.5055, "step": 997 }, { "epoch": 0.04516859017877348, "grad_norm": 0.7046398217059011, "learning_rate": 9.994008707336604e-06, "loss": 0.5656, "step": 998 }, { "epoch": 0.045213849287169046, "grad_norm": 1.267284206330529, "learning_rate": 9.99397278484631e-06, "loss": 0.5063, "step": 999 }, { "epoch": 0.04525910839556461, "grad_norm": 0.8217192392842306, "learning_rate": 9.993936755050881e-06, "loss": 0.5099, "step": 1000 }, { "epoch": 0.04530436750396017, "grad_norm": 0.866571381344001, "learning_rate": 9.993900617951087e-06, "loss": 0.4602, "step": 1001 }, { "epoch": 0.04534962661235574, "grad_norm": 0.8053116757932492, "learning_rate": 9.993864373547707e-06, "loss": 0.4631, "step": 1002 }, { "epoch": 0.0453948857207513, "grad_norm": 0.8906153508438541, "learning_rate": 9.993828021841518e-06, "loss": 0.4536, "step": 1003 }, { "epoch": 0.045440144829146864, "grad_norm": 0.7761743131090554, "learning_rate": 9.993791562833303e-06, "loss": 0.4591, "step": 1004 }, { "epoch": 0.04548540393754243, "grad_norm": 0.8438694699439588, "learning_rate": 9.993754996523846e-06, "loss": 0.5054, "step": 1005 }, { "epoch": 0.045530663045937995, "grad_norm": 0.9633033027437268, "learning_rate": 9.99371832291393e-06, "loss": 0.4456, "step": 1006 }, { "epoch": 0.04557592215433356, "grad_norm": 0.7624238288048312, "learning_rate": 9.993681542004343e-06, "loss": 0.5288, "step": 1007 }, { "epoch": 0.04562118126272913, "grad_norm": 1.0441430641968361, "learning_rate": 9.99364465379588e-06, "loss": 0.4678, "step": 1008 }, { "epoch": 0.04566644037112469, "grad_norm": 0.8104564043918575, "learning_rate": 9.993607658289325e-06, "loss": 0.428, "step": 1009 }, { "epoch": 0.04571169947952025, "grad_norm": 0.39650819228795653, "learning_rate": 9.993570555485484e-06, "loss": 0.5174, "step": 1010 }, { "epoch": 0.04575695858791582, "grad_norm": 1.0830099479547428, "learning_rate": 9.993533345385145e-06, "loss": 0.5068, "step": 1011 }, { "epoch": 0.04580221769631138, "grad_norm": 0.5169965065083604, "learning_rate": 9.993496027989112e-06, "loss": 0.5052, "step": 1012 }, { "epoch": 0.045847476804706945, "grad_norm": 0.4772035033679194, "learning_rate": 9.993458603298184e-06, "loss": 0.5631, "step": 1013 }, { "epoch": 0.045892735913102514, "grad_norm": 1.0771229663771391, "learning_rate": 9.993421071313168e-06, "loss": 0.4949, "step": 1014 }, { "epoch": 0.045937995021498076, "grad_norm": 0.9200033400407627, "learning_rate": 9.993383432034869e-06, "loss": 0.4857, "step": 1015 }, { "epoch": 0.04598325412989364, "grad_norm": 0.8675512644866923, "learning_rate": 9.993345685464097e-06, "loss": 0.4762, "step": 1016 }, { "epoch": 0.04602851323828921, "grad_norm": 0.8495883898640996, "learning_rate": 9.993307831601661e-06, "loss": 0.4652, "step": 1017 }, { "epoch": 0.04607377234668477, "grad_norm": 0.8723604577429355, "learning_rate": 9.993269870448375e-06, "loss": 0.4401, "step": 1018 }, { "epoch": 0.04611903145508033, "grad_norm": 0.695706230625535, "learning_rate": 9.993231802005056e-06, "loss": 0.4295, "step": 1019 }, { "epoch": 0.0461642905634759, "grad_norm": 0.8270086618965578, "learning_rate": 9.99319362627252e-06, "loss": 0.4856, "step": 1020 }, { "epoch": 0.046209549671871464, "grad_norm": 0.8445513154871833, "learning_rate": 9.993155343251592e-06, "loss": 0.476, "step": 1021 }, { "epoch": 0.046254808780267026, "grad_norm": 0.8926816239824479, "learning_rate": 9.993116952943087e-06, "loss": 0.5158, "step": 1022 }, { "epoch": 0.046300067888662595, "grad_norm": 1.0848942171687697, "learning_rate": 9.993078455347835e-06, "loss": 0.5605, "step": 1023 }, { "epoch": 0.04634532699705816, "grad_norm": 0.7114123741793746, "learning_rate": 9.993039850466664e-06, "loss": 0.523, "step": 1024 }, { "epoch": 0.04639058610545372, "grad_norm": 0.41718334147884434, "learning_rate": 9.9930011383004e-06, "loss": 0.5228, "step": 1025 }, { "epoch": 0.04643584521384929, "grad_norm": 1.0948263552696134, "learning_rate": 9.992962318849876e-06, "loss": 0.4418, "step": 1026 }, { "epoch": 0.04648110432224485, "grad_norm": 1.3184136429115096, "learning_rate": 9.992923392115927e-06, "loss": 0.5414, "step": 1027 }, { "epoch": 0.04652636343064041, "grad_norm": 0.988322664954066, "learning_rate": 9.992884358099389e-06, "loss": 0.4914, "step": 1028 }, { "epoch": 0.04657162253903598, "grad_norm": 1.03237806355917, "learning_rate": 9.9928452168011e-06, "loss": 0.5398, "step": 1029 }, { "epoch": 0.046616881647431545, "grad_norm": 1.0051916183489378, "learning_rate": 9.992805968221902e-06, "loss": 0.4913, "step": 1030 }, { "epoch": 0.04666214075582711, "grad_norm": 0.823719675186997, "learning_rate": 9.99276661236264e-06, "loss": 0.5054, "step": 1031 }, { "epoch": 0.046707399864222676, "grad_norm": 0.8540554077712061, "learning_rate": 9.992727149224155e-06, "loss": 0.4993, "step": 1032 }, { "epoch": 0.04675265897261824, "grad_norm": 0.9522023086516167, "learning_rate": 9.992687578807296e-06, "loss": 0.477, "step": 1033 }, { "epoch": 0.0467979180810138, "grad_norm": 1.1578859999622484, "learning_rate": 9.992647901112918e-06, "loss": 0.525, "step": 1034 }, { "epoch": 0.04684317718940937, "grad_norm": 0.7745434808777305, "learning_rate": 9.992608116141868e-06, "loss": 0.4579, "step": 1035 }, { "epoch": 0.04688843629780493, "grad_norm": 0.7848365984109472, "learning_rate": 9.992568223895007e-06, "loss": 0.4992, "step": 1036 }, { "epoch": 0.0469336954062005, "grad_norm": 1.4260545518131067, "learning_rate": 9.992528224373184e-06, "loss": 0.4865, "step": 1037 }, { "epoch": 0.046978954514596064, "grad_norm": 0.973652136914254, "learning_rate": 9.992488117577265e-06, "loss": 0.4634, "step": 1038 }, { "epoch": 0.047024213622991626, "grad_norm": 0.7644367544615681, "learning_rate": 9.99244790350811e-06, "loss": 0.4649, "step": 1039 }, { "epoch": 0.047069472731387195, "grad_norm": 0.8781215453997191, "learning_rate": 9.992407582166582e-06, "loss": 0.474, "step": 1040 }, { "epoch": 0.04711473183978276, "grad_norm": 0.8450792350739496, "learning_rate": 9.992367153553549e-06, "loss": 0.446, "step": 1041 }, { "epoch": 0.04715999094817832, "grad_norm": 0.9136083369013077, "learning_rate": 9.992326617669876e-06, "loss": 0.455, "step": 1042 }, { "epoch": 0.04720525005657389, "grad_norm": 2.3247242416841916, "learning_rate": 9.99228597451644e-06, "loss": 0.5615, "step": 1043 }, { "epoch": 0.04725050916496945, "grad_norm": 0.980824771184377, "learning_rate": 9.99224522409411e-06, "loss": 0.4526, "step": 1044 }, { "epoch": 0.04729576827336501, "grad_norm": 0.7777888346815359, "learning_rate": 9.992204366403761e-06, "loss": 0.5381, "step": 1045 }, { "epoch": 0.04734102738176058, "grad_norm": 0.568469961832735, "learning_rate": 9.992163401446274e-06, "loss": 0.5143, "step": 1046 }, { "epoch": 0.047386286490156145, "grad_norm": 1.0356275444400265, "learning_rate": 9.992122329222527e-06, "loss": 0.514, "step": 1047 }, { "epoch": 0.04743154559855171, "grad_norm": 0.9332493931025058, "learning_rate": 9.992081149733404e-06, "loss": 0.4338, "step": 1048 }, { "epoch": 0.047476804706947276, "grad_norm": 1.0333833578504823, "learning_rate": 9.99203986297979e-06, "loss": 0.5054, "step": 1049 }, { "epoch": 0.04752206381534284, "grad_norm": 0.8779286388663978, "learning_rate": 9.99199846896257e-06, "loss": 0.4828, "step": 1050 }, { "epoch": 0.0475673229237384, "grad_norm": 0.7946450784816697, "learning_rate": 9.991956967682635e-06, "loss": 0.488, "step": 1051 }, { "epoch": 0.04761258203213397, "grad_norm": 2.8390151308444898, "learning_rate": 9.991915359140876e-06, "loss": 0.5759, "step": 1052 }, { "epoch": 0.04765784114052953, "grad_norm": 0.795466215922787, "learning_rate": 9.991873643338187e-06, "loss": 0.4438, "step": 1053 }, { "epoch": 0.047703100248925094, "grad_norm": 0.89522116285953, "learning_rate": 9.991831820275466e-06, "loss": 0.509, "step": 1054 }, { "epoch": 0.04774835935732066, "grad_norm": 0.8460939918115581, "learning_rate": 9.99178988995361e-06, "loss": 0.5155, "step": 1055 }, { "epoch": 0.047793618465716226, "grad_norm": 0.9491912394916352, "learning_rate": 9.991747852373522e-06, "loss": 0.4917, "step": 1056 }, { "epoch": 0.04783887757411179, "grad_norm": 1.1064849521692737, "learning_rate": 9.9917057075361e-06, "loss": 0.4638, "step": 1057 }, { "epoch": 0.04788413668250736, "grad_norm": 0.8443091766555462, "learning_rate": 9.991663455442255e-06, "loss": 0.498, "step": 1058 }, { "epoch": 0.04792939579090292, "grad_norm": 1.4157963360282313, "learning_rate": 9.991621096092895e-06, "loss": 0.5387, "step": 1059 }, { "epoch": 0.04797465489929848, "grad_norm": 0.8466878016153724, "learning_rate": 9.991578629488926e-06, "loss": 0.4364, "step": 1060 }, { "epoch": 0.04801991400769405, "grad_norm": 0.907026124451798, "learning_rate": 9.991536055631263e-06, "loss": 0.4207, "step": 1061 }, { "epoch": 0.04806517311608961, "grad_norm": 1.3059124793214225, "learning_rate": 9.99149337452082e-06, "loss": 0.5143, "step": 1062 }, { "epoch": 0.048110432224485175, "grad_norm": 1.0645852262999596, "learning_rate": 9.991450586158515e-06, "loss": 0.4878, "step": 1063 }, { "epoch": 0.048155691332880744, "grad_norm": 0.8253282261334028, "learning_rate": 9.991407690545267e-06, "loss": 0.493, "step": 1064 }, { "epoch": 0.048200950441276307, "grad_norm": 0.841953712734116, "learning_rate": 9.991364687681998e-06, "loss": 0.4481, "step": 1065 }, { "epoch": 0.04824620954967187, "grad_norm": 1.8495652063684374, "learning_rate": 9.991321577569632e-06, "loss": 0.4993, "step": 1066 }, { "epoch": 0.04829146865806744, "grad_norm": 0.7895317716602368, "learning_rate": 9.991278360209094e-06, "loss": 0.4786, "step": 1067 }, { "epoch": 0.048336727766463, "grad_norm": 0.7317553707364586, "learning_rate": 9.991235035601314e-06, "loss": 0.4064, "step": 1068 }, { "epoch": 0.04838198687485856, "grad_norm": 4.832414373312705, "learning_rate": 9.991191603747223e-06, "loss": 0.4873, "step": 1069 }, { "epoch": 0.04842724598325413, "grad_norm": 0.8183438730813508, "learning_rate": 9.991148064647753e-06, "loss": 0.4324, "step": 1070 }, { "epoch": 0.048472505091649694, "grad_norm": 0.8433086571476347, "learning_rate": 9.99110441830384e-06, "loss": 0.4406, "step": 1071 }, { "epoch": 0.048517764200045256, "grad_norm": 0.8047137664912013, "learning_rate": 9.991060664716423e-06, "loss": 0.4656, "step": 1072 }, { "epoch": 0.048563023308440825, "grad_norm": 0.8109246254977853, "learning_rate": 9.991016803886441e-06, "loss": 0.4845, "step": 1073 }, { "epoch": 0.04860828241683639, "grad_norm": 0.88284711254973, "learning_rate": 9.990972835814836e-06, "loss": 0.4508, "step": 1074 }, { "epoch": 0.04865354152523195, "grad_norm": 0.9783194550591081, "learning_rate": 9.990928760502554e-06, "loss": 0.4699, "step": 1075 }, { "epoch": 0.04869880063362752, "grad_norm": 0.8693059736340535, "learning_rate": 9.990884577950542e-06, "loss": 0.5487, "step": 1076 }, { "epoch": 0.04874405974202308, "grad_norm": 0.8105918598711073, "learning_rate": 9.990840288159747e-06, "loss": 0.4735, "step": 1077 }, { "epoch": 0.04878931885041864, "grad_norm": 0.4456518130885961, "learning_rate": 9.990795891131125e-06, "loss": 0.564, "step": 1078 }, { "epoch": 0.04883457795881421, "grad_norm": 0.7670428008042319, "learning_rate": 9.990751386865624e-06, "loss": 0.4433, "step": 1079 }, { "epoch": 0.048879837067209775, "grad_norm": 0.917325505413355, "learning_rate": 9.990706775364204e-06, "loss": 0.4988, "step": 1080 }, { "epoch": 0.048925096175605344, "grad_norm": 0.8182023141846203, "learning_rate": 9.990662056627825e-06, "loss": 0.4192, "step": 1081 }, { "epoch": 0.048970355284000906, "grad_norm": 0.913312253395318, "learning_rate": 9.990617230657446e-06, "loss": 0.5304, "step": 1082 }, { "epoch": 0.04901561439239647, "grad_norm": 0.8520793636486157, "learning_rate": 9.990572297454031e-06, "loss": 0.5529, "step": 1083 }, { "epoch": 0.04906087350079204, "grad_norm": 0.7872118274791575, "learning_rate": 9.990527257018544e-06, "loss": 0.4579, "step": 1084 }, { "epoch": 0.0491061326091876, "grad_norm": 0.7971591462553888, "learning_rate": 9.990482109351951e-06, "loss": 0.4798, "step": 1085 }, { "epoch": 0.04915139171758316, "grad_norm": 0.5425751844160779, "learning_rate": 9.990436854455228e-06, "loss": 0.5453, "step": 1086 }, { "epoch": 0.04919665082597873, "grad_norm": 0.885036195458033, "learning_rate": 9.990391492329341e-06, "loss": 0.4667, "step": 1087 }, { "epoch": 0.049241909934374294, "grad_norm": 0.8812596963416286, "learning_rate": 9.99034602297527e-06, "loss": 0.4687, "step": 1088 }, { "epoch": 0.049287169042769856, "grad_norm": 0.7548044713322737, "learning_rate": 9.990300446393988e-06, "loss": 0.4574, "step": 1089 }, { "epoch": 0.049332428151165425, "grad_norm": 1.3872353232776937, "learning_rate": 9.990254762586477e-06, "loss": 0.4417, "step": 1090 }, { "epoch": 0.04937768725956099, "grad_norm": 0.6820528986305062, "learning_rate": 9.990208971553716e-06, "loss": 0.5387, "step": 1091 }, { "epoch": 0.04942294636795655, "grad_norm": 0.8846682032693766, "learning_rate": 9.990163073296692e-06, "loss": 0.4802, "step": 1092 }, { "epoch": 0.04946820547635212, "grad_norm": 0.4990089890256931, "learning_rate": 9.99011706781639e-06, "loss": 0.5377, "step": 1093 }, { "epoch": 0.04951346458474768, "grad_norm": 0.7752264950430418, "learning_rate": 9.990070955113798e-06, "loss": 0.4238, "step": 1094 }, { "epoch": 0.04955872369314324, "grad_norm": 0.8435283800432144, "learning_rate": 9.990024735189907e-06, "loss": 0.4477, "step": 1095 }, { "epoch": 0.04960398280153881, "grad_norm": 0.7803389134460659, "learning_rate": 9.989978408045709e-06, "loss": 0.4369, "step": 1096 }, { "epoch": 0.049649241909934375, "grad_norm": 0.7334389651169826, "learning_rate": 9.989931973682202e-06, "loss": 0.5141, "step": 1097 }, { "epoch": 0.04969450101832994, "grad_norm": 0.820447034179538, "learning_rate": 9.989885432100381e-06, "loss": 0.5037, "step": 1098 }, { "epoch": 0.049739760126725506, "grad_norm": 0.6248412960611177, "learning_rate": 9.989838783301248e-06, "loss": 0.5438, "step": 1099 }, { "epoch": 0.04978501923512107, "grad_norm": 0.8232431368916311, "learning_rate": 9.989792027285805e-06, "loss": 0.4732, "step": 1100 }, { "epoch": 0.04983027834351663, "grad_norm": 0.8808472161055362, "learning_rate": 9.989745164055056e-06, "loss": 0.4218, "step": 1101 }, { "epoch": 0.0498755374519122, "grad_norm": 0.8609138479748117, "learning_rate": 9.989698193610007e-06, "loss": 0.4684, "step": 1102 }, { "epoch": 0.04992079656030776, "grad_norm": 0.7324041656570668, "learning_rate": 9.98965111595167e-06, "loss": 0.4427, "step": 1103 }, { "epoch": 0.049966055668703324, "grad_norm": 0.7668886253685008, "learning_rate": 9.989603931081055e-06, "loss": 0.46, "step": 1104 }, { "epoch": 0.05001131477709889, "grad_norm": 0.8435203086437428, "learning_rate": 9.989556638999175e-06, "loss": 0.5022, "step": 1105 }, { "epoch": 0.050056573885494456, "grad_norm": 0.4261132058486116, "learning_rate": 9.989509239707047e-06, "loss": 0.5337, "step": 1106 }, { "epoch": 0.05010183299389002, "grad_norm": 0.8578128070222978, "learning_rate": 9.989461733205692e-06, "loss": 0.4439, "step": 1107 }, { "epoch": 0.05014709210228559, "grad_norm": 0.836209356153558, "learning_rate": 9.989414119496126e-06, "loss": 0.4679, "step": 1108 }, { "epoch": 0.05019235121068115, "grad_norm": 0.7941865579121077, "learning_rate": 9.989366398579375e-06, "loss": 0.4532, "step": 1109 }, { "epoch": 0.05023761031907671, "grad_norm": 0.35779933739851233, "learning_rate": 9.989318570456463e-06, "loss": 0.5278, "step": 1110 }, { "epoch": 0.05028286942747228, "grad_norm": 0.8641871319948795, "learning_rate": 9.989270635128418e-06, "loss": 0.4225, "step": 1111 }, { "epoch": 0.05032812853586784, "grad_norm": 0.5511830790669976, "learning_rate": 9.989222592596272e-06, "loss": 0.5678, "step": 1112 }, { "epoch": 0.050373387644263405, "grad_norm": 0.8417673671234892, "learning_rate": 9.989174442861056e-06, "loss": 0.4781, "step": 1113 }, { "epoch": 0.050418646752658974, "grad_norm": 0.8276051715311175, "learning_rate": 9.989126185923803e-06, "loss": 0.4283, "step": 1114 }, { "epoch": 0.05046390586105454, "grad_norm": 0.8593958832271233, "learning_rate": 9.989077821785552e-06, "loss": 0.4295, "step": 1115 }, { "epoch": 0.0505091649694501, "grad_norm": 0.7195755355913743, "learning_rate": 9.98902935044734e-06, "loss": 0.4601, "step": 1116 }, { "epoch": 0.05055442407784567, "grad_norm": 0.4017362161163235, "learning_rate": 9.988980771910213e-06, "loss": 0.5335, "step": 1117 }, { "epoch": 0.05059968318624123, "grad_norm": 0.8845074196050715, "learning_rate": 9.988932086175209e-06, "loss": 0.471, "step": 1118 }, { "epoch": 0.05064494229463679, "grad_norm": 0.761327368968983, "learning_rate": 9.988883293243378e-06, "loss": 0.4394, "step": 1119 }, { "epoch": 0.05069020140303236, "grad_norm": 0.7783332130399511, "learning_rate": 9.988834393115768e-06, "loss": 0.4287, "step": 1120 }, { "epoch": 0.050735460511427924, "grad_norm": 0.7309564495384006, "learning_rate": 9.988785385793427e-06, "loss": 0.4478, "step": 1121 }, { "epoch": 0.050780719619823486, "grad_norm": 0.8208787390443257, "learning_rate": 9.98873627127741e-06, "loss": 0.4197, "step": 1122 }, { "epoch": 0.050825978728219055, "grad_norm": 0.4006151609810976, "learning_rate": 9.988687049568772e-06, "loss": 0.5226, "step": 1123 }, { "epoch": 0.05087123783661462, "grad_norm": 0.36589360608826726, "learning_rate": 9.988637720668573e-06, "loss": 0.5386, "step": 1124 }, { "epoch": 0.05091649694501019, "grad_norm": 0.7734634813347808, "learning_rate": 9.98858828457787e-06, "loss": 0.4795, "step": 1125 }, { "epoch": 0.05096175605340575, "grad_norm": 0.8394436286581666, "learning_rate": 9.988538741297724e-06, "loss": 0.4619, "step": 1126 }, { "epoch": 0.05100701516180131, "grad_norm": 0.8843306463699511, "learning_rate": 9.988489090829204e-06, "loss": 0.4081, "step": 1127 }, { "epoch": 0.05105227427019688, "grad_norm": 0.7403667217778743, "learning_rate": 9.988439333173373e-06, "loss": 0.4804, "step": 1128 }, { "epoch": 0.05109753337859244, "grad_norm": 1.013403764971607, "learning_rate": 9.988389468331304e-06, "loss": 0.4803, "step": 1129 }, { "epoch": 0.051142792486988005, "grad_norm": 0.757680606016578, "learning_rate": 9.988339496304062e-06, "loss": 0.4575, "step": 1130 }, { "epoch": 0.051188051595383574, "grad_norm": 0.7396867410136777, "learning_rate": 9.988289417092729e-06, "loss": 0.4178, "step": 1131 }, { "epoch": 0.051233310703779136, "grad_norm": 0.8297804902848872, "learning_rate": 9.988239230698373e-06, "loss": 0.5086, "step": 1132 }, { "epoch": 0.0512785698121747, "grad_norm": 0.7912505856561427, "learning_rate": 9.988188937122078e-06, "loss": 0.4618, "step": 1133 }, { "epoch": 0.05132382892057027, "grad_norm": 0.9367111562458711, "learning_rate": 9.988138536364922e-06, "loss": 0.4813, "step": 1134 }, { "epoch": 0.05136908802896583, "grad_norm": 0.6628874626437402, "learning_rate": 9.988088028427992e-06, "loss": 0.5245, "step": 1135 }, { "epoch": 0.05141434713736139, "grad_norm": 1.7104204042871356, "learning_rate": 9.988037413312365e-06, "loss": 0.4558, "step": 1136 }, { "epoch": 0.05145960624575696, "grad_norm": 0.8066081014907509, "learning_rate": 9.987986691019136e-06, "loss": 0.4825, "step": 1137 }, { "epoch": 0.051504865354152524, "grad_norm": 0.806073020603354, "learning_rate": 9.987935861549393e-06, "loss": 0.4536, "step": 1138 }, { "epoch": 0.051550124462548086, "grad_norm": 0.7535675067601677, "learning_rate": 9.987884924904228e-06, "loss": 0.4765, "step": 1139 }, { "epoch": 0.051595383570943655, "grad_norm": 0.8168490660049519, "learning_rate": 9.987833881084734e-06, "loss": 0.4676, "step": 1140 }, { "epoch": 0.05164064267933922, "grad_norm": 0.849879574073609, "learning_rate": 9.987782730092009e-06, "loss": 0.485, "step": 1141 }, { "epoch": 0.05168590178773478, "grad_norm": 0.8437989117176353, "learning_rate": 9.987731471927152e-06, "loss": 0.4804, "step": 1142 }, { "epoch": 0.05173116089613035, "grad_norm": 1.1720073013495276, "learning_rate": 9.987680106591264e-06, "loss": 0.4653, "step": 1143 }, { "epoch": 0.05177642000452591, "grad_norm": 0.6529217354151006, "learning_rate": 9.98762863408545e-06, "loss": 0.5479, "step": 1144 }, { "epoch": 0.05182167911292147, "grad_norm": 0.8299860927045014, "learning_rate": 9.987577054410813e-06, "loss": 0.4758, "step": 1145 }, { "epoch": 0.05186693822131704, "grad_norm": 0.7964057437485828, "learning_rate": 9.987525367568464e-06, "loss": 0.4813, "step": 1146 }, { "epoch": 0.051912197329712605, "grad_norm": 0.7868668624743436, "learning_rate": 9.987473573559514e-06, "loss": 0.4751, "step": 1147 }, { "epoch": 0.05195745643810817, "grad_norm": 0.36553766406529065, "learning_rate": 9.987421672385073e-06, "loss": 0.5253, "step": 1148 }, { "epoch": 0.052002715546503736, "grad_norm": 0.8803059079274371, "learning_rate": 9.98736966404626e-06, "loss": 0.474, "step": 1149 }, { "epoch": 0.0520479746548993, "grad_norm": 0.7268443082420789, "learning_rate": 9.98731754854419e-06, "loss": 0.4543, "step": 1150 }, { "epoch": 0.05209323376329486, "grad_norm": 0.38910048527339913, "learning_rate": 9.987265325879983e-06, "loss": 0.5388, "step": 1151 }, { "epoch": 0.05213849287169043, "grad_norm": 0.37753851519302517, "learning_rate": 9.98721299605476e-06, "loss": 0.5392, "step": 1152 }, { "epoch": 0.05218375198008599, "grad_norm": 1.2202861369230587, "learning_rate": 9.987160559069649e-06, "loss": 0.4305, "step": 1153 }, { "epoch": 0.052229011088481554, "grad_norm": 0.8826481532913922, "learning_rate": 9.987108014925772e-06, "loss": 0.5202, "step": 1154 }, { "epoch": 0.05227427019687712, "grad_norm": 0.8066686419753973, "learning_rate": 9.987055363624263e-06, "loss": 0.4912, "step": 1155 }, { "epoch": 0.052319529305272686, "grad_norm": 0.9693526840801042, "learning_rate": 9.98700260516625e-06, "loss": 0.4606, "step": 1156 }, { "epoch": 0.05236478841366825, "grad_norm": 0.7720688308741186, "learning_rate": 9.986949739552867e-06, "loss": 0.4528, "step": 1157 }, { "epoch": 0.05241004752206382, "grad_norm": 0.6195630876054363, "learning_rate": 9.98689676678525e-06, "loss": 0.5449, "step": 1158 }, { "epoch": 0.05245530663045938, "grad_norm": 1.0037880322159685, "learning_rate": 9.986843686864538e-06, "loss": 0.4996, "step": 1159 }, { "epoch": 0.05250056573885494, "grad_norm": 0.8977655363937432, "learning_rate": 9.986790499791872e-06, "loss": 0.4897, "step": 1160 }, { "epoch": 0.05254582484725051, "grad_norm": 0.42159467768136355, "learning_rate": 9.986737205568393e-06, "loss": 0.5529, "step": 1161 }, { "epoch": 0.05259108395564607, "grad_norm": 0.7786886256636473, "learning_rate": 9.986683804195248e-06, "loss": 0.4662, "step": 1162 }, { "epoch": 0.052636343064041635, "grad_norm": 0.9184508577564199, "learning_rate": 9.98663029567358e-06, "loss": 0.5036, "step": 1163 }, { "epoch": 0.052681602172437204, "grad_norm": 0.445883140071055, "learning_rate": 9.986576680004546e-06, "loss": 0.5074, "step": 1164 }, { "epoch": 0.05272686128083277, "grad_norm": 0.7543499978541274, "learning_rate": 9.986522957189293e-06, "loss": 0.4596, "step": 1165 }, { "epoch": 0.05277212038922833, "grad_norm": 0.3332988296091536, "learning_rate": 9.986469127228977e-06, "loss": 0.5426, "step": 1166 }, { "epoch": 0.0528173794976239, "grad_norm": 0.29582601007727477, "learning_rate": 9.986415190124754e-06, "loss": 0.5026, "step": 1167 }, { "epoch": 0.05286263860601946, "grad_norm": 0.9204990477506604, "learning_rate": 9.986361145877783e-06, "loss": 0.442, "step": 1168 }, { "epoch": 0.05290789771441503, "grad_norm": 0.4160967570525296, "learning_rate": 9.986306994489226e-06, "loss": 0.5665, "step": 1169 }, { "epoch": 0.05295315682281059, "grad_norm": 0.8304239963080823, "learning_rate": 9.986252735960245e-06, "loss": 0.4393, "step": 1170 }, { "epoch": 0.052998415931206154, "grad_norm": 0.3926909854775528, "learning_rate": 9.986198370292007e-06, "loss": 0.5234, "step": 1171 }, { "epoch": 0.05304367503960172, "grad_norm": 0.8733750774138885, "learning_rate": 9.98614389748568e-06, "loss": 0.5123, "step": 1172 }, { "epoch": 0.053088934147997285, "grad_norm": 0.8887474864844891, "learning_rate": 9.986089317542434e-06, "loss": 0.5097, "step": 1173 }, { "epoch": 0.05313419325639285, "grad_norm": 0.7862722297713525, "learning_rate": 9.986034630463443e-06, "loss": 0.4245, "step": 1174 }, { "epoch": 0.05317945236478842, "grad_norm": 0.7915565801064068, "learning_rate": 9.985979836249882e-06, "loss": 0.4794, "step": 1175 }, { "epoch": 0.05322471147318398, "grad_norm": 0.8468539133747583, "learning_rate": 9.985924934902927e-06, "loss": 0.4669, "step": 1176 }, { "epoch": 0.05326997058157954, "grad_norm": 0.6026584322786569, "learning_rate": 9.985869926423757e-06, "loss": 0.5246, "step": 1177 }, { "epoch": 0.05331522968997511, "grad_norm": 0.49181922663684496, "learning_rate": 9.985814810813556e-06, "loss": 0.5579, "step": 1178 }, { "epoch": 0.05336048879837067, "grad_norm": 0.865929858522939, "learning_rate": 9.985759588073508e-06, "loss": 0.4746, "step": 1179 }, { "epoch": 0.053405747906766235, "grad_norm": 0.4006188616165955, "learning_rate": 9.985704258204798e-06, "loss": 0.5232, "step": 1180 }, { "epoch": 0.053451007015161804, "grad_norm": 0.44661749061090317, "learning_rate": 9.985648821208616e-06, "loss": 0.5318, "step": 1181 }, { "epoch": 0.053496266123557366, "grad_norm": 0.8150202842917483, "learning_rate": 9.985593277086155e-06, "loss": 0.472, "step": 1182 }, { "epoch": 0.05354152523195293, "grad_norm": 0.9321471164895254, "learning_rate": 9.985537625838603e-06, "loss": 0.4737, "step": 1183 }, { "epoch": 0.0535867843403485, "grad_norm": 0.4787161789655883, "learning_rate": 9.985481867467162e-06, "loss": 0.5248, "step": 1184 }, { "epoch": 0.05363204344874406, "grad_norm": 0.781597037845619, "learning_rate": 9.985426001973026e-06, "loss": 0.447, "step": 1185 }, { "epoch": 0.05367730255713962, "grad_norm": 0.7739348385383168, "learning_rate": 9.985370029357399e-06, "loss": 0.4877, "step": 1186 }, { "epoch": 0.05372256166553519, "grad_norm": 0.8249985271286937, "learning_rate": 9.98531394962148e-06, "loss": 0.4825, "step": 1187 }, { "epoch": 0.053767820773930754, "grad_norm": 0.7769368520306797, "learning_rate": 9.985257762766476e-06, "loss": 0.4585, "step": 1188 }, { "epoch": 0.053813079882326316, "grad_norm": 0.49925545381022285, "learning_rate": 9.985201468793593e-06, "loss": 0.5321, "step": 1189 }, { "epoch": 0.053858338990721885, "grad_norm": 0.3838928358822514, "learning_rate": 9.985145067704042e-06, "loss": 0.5298, "step": 1190 }, { "epoch": 0.05390359809911745, "grad_norm": 0.9635548790408048, "learning_rate": 9.985088559499032e-06, "loss": 0.4555, "step": 1191 }, { "epoch": 0.05394885720751301, "grad_norm": 0.8111640620370049, "learning_rate": 9.985031944179781e-06, "loss": 0.476, "step": 1192 }, { "epoch": 0.05399411631590858, "grad_norm": 0.8288410981511287, "learning_rate": 9.984975221747505e-06, "loss": 0.4952, "step": 1193 }, { "epoch": 0.05403937542430414, "grad_norm": 0.8598494946424741, "learning_rate": 9.984918392203421e-06, "loss": 0.4564, "step": 1194 }, { "epoch": 0.0540846345326997, "grad_norm": 0.7828458336222965, "learning_rate": 9.98486145554875e-06, "loss": 0.4415, "step": 1195 }, { "epoch": 0.05412989364109527, "grad_norm": 0.7851268025912339, "learning_rate": 9.984804411784717e-06, "loss": 0.4606, "step": 1196 }, { "epoch": 0.054175152749490835, "grad_norm": 0.7997777564795878, "learning_rate": 9.984747260912546e-06, "loss": 0.4906, "step": 1197 }, { "epoch": 0.0542204118578864, "grad_norm": 0.7557512579413929, "learning_rate": 9.984690002933465e-06, "loss": 0.4964, "step": 1198 }, { "epoch": 0.054265670966281966, "grad_norm": 1.010347993778274, "learning_rate": 9.984632637848708e-06, "loss": 0.5612, "step": 1199 }, { "epoch": 0.05431093007467753, "grad_norm": 0.5411492624911337, "learning_rate": 9.984575165659503e-06, "loss": 0.5502, "step": 1200 }, { "epoch": 0.05435618918307309, "grad_norm": 1.0723421207044346, "learning_rate": 9.984517586367088e-06, "loss": 0.4881, "step": 1201 }, { "epoch": 0.05440144829146866, "grad_norm": 1.024700727373943, "learning_rate": 9.984459899972696e-06, "loss": 0.4582, "step": 1202 }, { "epoch": 0.05444670739986422, "grad_norm": 0.8541022994896453, "learning_rate": 9.984402106477572e-06, "loss": 0.4682, "step": 1203 }, { "epoch": 0.054491966508259784, "grad_norm": 0.8690265217212084, "learning_rate": 9.984344205882954e-06, "loss": 0.4897, "step": 1204 }, { "epoch": 0.05453722561665535, "grad_norm": 1.574950294258745, "learning_rate": 9.984286198190087e-06, "loss": 0.5448, "step": 1205 }, { "epoch": 0.054582484725050916, "grad_norm": 0.9788112855639516, "learning_rate": 9.984228083400218e-06, "loss": 0.48, "step": 1206 }, { "epoch": 0.05462774383344648, "grad_norm": 0.8732371443748891, "learning_rate": 9.984169861514597e-06, "loss": 0.4679, "step": 1207 }, { "epoch": 0.05467300294184205, "grad_norm": 0.6303672878123504, "learning_rate": 9.98411153253447e-06, "loss": 0.5437, "step": 1208 }, { "epoch": 0.05471826205023761, "grad_norm": 0.9558686425270596, "learning_rate": 9.984053096461098e-06, "loss": 0.4852, "step": 1209 }, { "epoch": 0.05476352115863317, "grad_norm": 1.087269887409945, "learning_rate": 9.983994553295728e-06, "loss": 0.4598, "step": 1210 }, { "epoch": 0.05480878026702874, "grad_norm": 0.8303647766368976, "learning_rate": 9.983935903039625e-06, "loss": 0.4845, "step": 1211 }, { "epoch": 0.0548540393754243, "grad_norm": 0.8183369574706792, "learning_rate": 9.983877145694046e-06, "loss": 0.5078, "step": 1212 }, { "epoch": 0.05489929848381987, "grad_norm": 0.8522053359132349, "learning_rate": 9.983818281260253e-06, "loss": 0.4419, "step": 1213 }, { "epoch": 0.054944557592215434, "grad_norm": 0.9621572091092583, "learning_rate": 9.983759309739512e-06, "loss": 0.4218, "step": 1214 }, { "epoch": 0.054989816700611, "grad_norm": 0.9053413715699286, "learning_rate": 9.98370023113309e-06, "loss": 0.5094, "step": 1215 }, { "epoch": 0.055035075809006566, "grad_norm": 0.7835898344779059, "learning_rate": 9.983641045442256e-06, "loss": 0.4825, "step": 1216 }, { "epoch": 0.05508033491740213, "grad_norm": 0.9485400715813633, "learning_rate": 9.983581752668283e-06, "loss": 0.5178, "step": 1217 }, { "epoch": 0.05512559402579769, "grad_norm": 0.7566415295491471, "learning_rate": 9.983522352812443e-06, "loss": 0.4506, "step": 1218 }, { "epoch": 0.05517085313419326, "grad_norm": 0.7828487329955025, "learning_rate": 9.983462845876015e-06, "loss": 0.4485, "step": 1219 }, { "epoch": 0.05521611224258882, "grad_norm": 0.8264869845338306, "learning_rate": 9.983403231860273e-06, "loss": 0.4515, "step": 1220 }, { "epoch": 0.055261371350984384, "grad_norm": 1.5117493089257694, "learning_rate": 9.983343510766504e-06, "loss": 0.5786, "step": 1221 }, { "epoch": 0.05530663045937995, "grad_norm": 0.9014398579583309, "learning_rate": 9.983283682595986e-06, "loss": 0.4313, "step": 1222 }, { "epoch": 0.055351889567775515, "grad_norm": 0.614288103847374, "learning_rate": 9.983223747350008e-06, "loss": 0.5528, "step": 1223 }, { "epoch": 0.05539714867617108, "grad_norm": 0.9713214207122499, "learning_rate": 9.983163705029857e-06, "loss": 0.4304, "step": 1224 }, { "epoch": 0.05544240778456665, "grad_norm": 0.9031593512931938, "learning_rate": 9.983103555636821e-06, "loss": 0.4615, "step": 1225 }, { "epoch": 0.05548766689296221, "grad_norm": 0.9316822418803643, "learning_rate": 9.983043299172195e-06, "loss": 0.4663, "step": 1226 }, { "epoch": 0.05553292600135777, "grad_norm": 1.5925339118559703, "learning_rate": 9.982982935637272e-06, "loss": 0.5504, "step": 1227 }, { "epoch": 0.05557818510975334, "grad_norm": 0.8044607515862033, "learning_rate": 9.98292246503335e-06, "loss": 0.4547, "step": 1228 }, { "epoch": 0.0556234442181489, "grad_norm": 0.7912690572472879, "learning_rate": 9.982861887361728e-06, "loss": 0.4676, "step": 1229 }, { "epoch": 0.055668703326544465, "grad_norm": 0.9431332129547105, "learning_rate": 9.982801202623708e-06, "loss": 0.4764, "step": 1230 }, { "epoch": 0.055713962434940034, "grad_norm": 0.8081337093685754, "learning_rate": 9.982740410820595e-06, "loss": 0.4274, "step": 1231 }, { "epoch": 0.055759221543335596, "grad_norm": 0.763449935761009, "learning_rate": 9.98267951195369e-06, "loss": 0.406, "step": 1232 }, { "epoch": 0.05580448065173116, "grad_norm": 0.8422970501795058, "learning_rate": 9.982618506024309e-06, "loss": 0.451, "step": 1233 }, { "epoch": 0.05584973976012673, "grad_norm": 0.757141131265391, "learning_rate": 9.982557393033758e-06, "loss": 0.4609, "step": 1234 }, { "epoch": 0.05589499886852229, "grad_norm": 0.7797311152704947, "learning_rate": 9.98249617298335e-06, "loss": 0.4758, "step": 1235 }, { "epoch": 0.05594025797691785, "grad_norm": 0.7571561418356503, "learning_rate": 9.982434845874405e-06, "loss": 0.4443, "step": 1236 }, { "epoch": 0.05598551708531342, "grad_norm": 0.940424347691789, "learning_rate": 9.982373411708237e-06, "loss": 0.4379, "step": 1237 }, { "epoch": 0.056030776193708984, "grad_norm": 0.8378688150058623, "learning_rate": 9.982311870486166e-06, "loss": 0.5175, "step": 1238 }, { "epoch": 0.056076035302104546, "grad_norm": 0.7725216832344305, "learning_rate": 9.982250222209513e-06, "loss": 0.5485, "step": 1239 }, { "epoch": 0.056121294410500115, "grad_norm": 0.5805096150729553, "learning_rate": 9.982188466879607e-06, "loss": 0.5171, "step": 1240 }, { "epoch": 0.05616655351889568, "grad_norm": 0.857878552541859, "learning_rate": 9.98212660449777e-06, "loss": 0.432, "step": 1241 }, { "epoch": 0.05621181262729124, "grad_norm": 0.4158987046032542, "learning_rate": 9.982064635065336e-06, "loss": 0.5439, "step": 1242 }, { "epoch": 0.05625707173568681, "grad_norm": 0.4930079344799815, "learning_rate": 9.982002558583633e-06, "loss": 0.5426, "step": 1243 }, { "epoch": 0.05630233084408237, "grad_norm": 0.9266098650508937, "learning_rate": 9.981940375053996e-06, "loss": 0.4712, "step": 1244 }, { "epoch": 0.05634758995247793, "grad_norm": 0.8588623400330749, "learning_rate": 9.981878084477764e-06, "loss": 0.4478, "step": 1245 }, { "epoch": 0.0563928490608735, "grad_norm": 0.7961382205379587, "learning_rate": 9.981815686856268e-06, "loss": 0.4534, "step": 1246 }, { "epoch": 0.056438108169269065, "grad_norm": 0.8930930962002508, "learning_rate": 9.981753182190856e-06, "loss": 0.4689, "step": 1247 }, { "epoch": 0.05648336727766463, "grad_norm": 0.907959707580663, "learning_rate": 9.981690570482869e-06, "loss": 0.4321, "step": 1248 }, { "epoch": 0.056528626386060196, "grad_norm": 0.7678510229566248, "learning_rate": 9.981627851733651e-06, "loss": 0.5635, "step": 1249 }, { "epoch": 0.05657388549445576, "grad_norm": 0.8515278105378038, "learning_rate": 9.98156502594455e-06, "loss": 0.4135, "step": 1250 }, { "epoch": 0.05661914460285132, "grad_norm": 0.7744151104367206, "learning_rate": 9.981502093116917e-06, "loss": 0.4919, "step": 1251 }, { "epoch": 0.05666440371124689, "grad_norm": 0.7920093313730772, "learning_rate": 9.981439053252102e-06, "loss": 0.4288, "step": 1252 }, { "epoch": 0.05670966281964245, "grad_norm": 0.8486807298027813, "learning_rate": 9.981375906351463e-06, "loss": 0.4679, "step": 1253 }, { "epoch": 0.056754921928038014, "grad_norm": 1.0169113659412827, "learning_rate": 9.981312652416353e-06, "loss": 0.5354, "step": 1254 }, { "epoch": 0.056800181036433584, "grad_norm": 0.5822200648528999, "learning_rate": 9.981249291448134e-06, "loss": 0.5397, "step": 1255 }, { "epoch": 0.056845440144829146, "grad_norm": 0.8313948880129977, "learning_rate": 9.981185823448166e-06, "loss": 0.4753, "step": 1256 }, { "epoch": 0.056890699253224715, "grad_norm": 0.43699857233451633, "learning_rate": 9.981122248417815e-06, "loss": 0.5286, "step": 1257 }, { "epoch": 0.05693595836162028, "grad_norm": 0.3821736230004524, "learning_rate": 9.981058566358443e-06, "loss": 0.5527, "step": 1258 }, { "epoch": 0.05698121747001584, "grad_norm": 0.7856472855765193, "learning_rate": 9.98099477727142e-06, "loss": 0.4282, "step": 1259 }, { "epoch": 0.05702647657841141, "grad_norm": 0.4076765456528284, "learning_rate": 9.98093088115812e-06, "loss": 0.5309, "step": 1260 }, { "epoch": 0.05707173568680697, "grad_norm": 0.8071172893195849, "learning_rate": 9.980866878019911e-06, "loss": 0.4614, "step": 1261 }, { "epoch": 0.05711699479520253, "grad_norm": 0.8475654460872483, "learning_rate": 9.98080276785817e-06, "loss": 0.4701, "step": 1262 }, { "epoch": 0.0571622539035981, "grad_norm": 0.47535055779038554, "learning_rate": 9.980738550674277e-06, "loss": 0.5347, "step": 1263 }, { "epoch": 0.057207513011993665, "grad_norm": 0.7909484322536614, "learning_rate": 9.980674226469608e-06, "loss": 0.485, "step": 1264 }, { "epoch": 0.05725277212038923, "grad_norm": 0.8709597748130423, "learning_rate": 9.980609795245548e-06, "loss": 0.4867, "step": 1265 }, { "epoch": 0.057298031228784796, "grad_norm": 0.8005551517657652, "learning_rate": 9.980545257003481e-06, "loss": 0.4603, "step": 1266 }, { "epoch": 0.05734329033718036, "grad_norm": 0.7964388047236337, "learning_rate": 9.980480611744791e-06, "loss": 0.4336, "step": 1267 }, { "epoch": 0.05738854944557592, "grad_norm": 0.7983523114274277, "learning_rate": 9.980415859470872e-06, "loss": 0.4721, "step": 1268 }, { "epoch": 0.05743380855397149, "grad_norm": 0.829019588478564, "learning_rate": 9.980351000183108e-06, "loss": 0.4537, "step": 1269 }, { "epoch": 0.05747906766236705, "grad_norm": 0.8097302752292416, "learning_rate": 9.9802860338829e-06, "loss": 0.4967, "step": 1270 }, { "epoch": 0.057524326770762614, "grad_norm": 0.9222055660580988, "learning_rate": 9.98022096057164e-06, "loss": 0.4537, "step": 1271 }, { "epoch": 0.05756958587915818, "grad_norm": 0.8110170634260588, "learning_rate": 9.980155780250728e-06, "loss": 0.4337, "step": 1272 }, { "epoch": 0.057614844987553746, "grad_norm": 0.7978741699808711, "learning_rate": 9.980090492921563e-06, "loss": 0.4801, "step": 1273 }, { "epoch": 0.05766010409594931, "grad_norm": 0.8338140737252998, "learning_rate": 9.98002509858555e-06, "loss": 0.5115, "step": 1274 }, { "epoch": 0.05770536320434488, "grad_norm": 0.7310445282087151, "learning_rate": 9.979959597244089e-06, "loss": 0.4572, "step": 1275 }, { "epoch": 0.05775062231274044, "grad_norm": 0.7531542525602791, "learning_rate": 9.979893988898592e-06, "loss": 0.4782, "step": 1276 }, { "epoch": 0.057795881421136, "grad_norm": 0.7523160239405309, "learning_rate": 9.97982827355047e-06, "loss": 0.4425, "step": 1277 }, { "epoch": 0.05784114052953157, "grad_norm": 0.5112046197859239, "learning_rate": 9.979762451201132e-06, "loss": 0.5545, "step": 1278 }, { "epoch": 0.05788639963792713, "grad_norm": 0.8668024028019767, "learning_rate": 9.979696521851992e-06, "loss": 0.4949, "step": 1279 }, { "epoch": 0.057931658746322695, "grad_norm": 0.779857898757288, "learning_rate": 9.979630485504468e-06, "loss": 0.4369, "step": 1280 }, { "epoch": 0.057976917854718264, "grad_norm": 0.751721968005427, "learning_rate": 9.97956434215998e-06, "loss": 0.4641, "step": 1281 }, { "epoch": 0.058022176963113826, "grad_norm": 0.7025221316380706, "learning_rate": 9.979498091819946e-06, "loss": 0.429, "step": 1282 }, { "epoch": 0.05806743607150939, "grad_norm": 0.7194116567943578, "learning_rate": 9.979431734485794e-06, "loss": 0.4303, "step": 1283 }, { "epoch": 0.05811269517990496, "grad_norm": 0.7585130981008433, "learning_rate": 9.979365270158945e-06, "loss": 0.4604, "step": 1284 }, { "epoch": 0.05815795428830052, "grad_norm": 0.7978623772369193, "learning_rate": 9.979298698840829e-06, "loss": 0.4644, "step": 1285 }, { "epoch": 0.05820321339669608, "grad_norm": 0.7284784348084953, "learning_rate": 9.979232020532877e-06, "loss": 0.4468, "step": 1286 }, { "epoch": 0.05824847250509165, "grad_norm": 0.7619367485973766, "learning_rate": 9.979165235236523e-06, "loss": 0.4488, "step": 1287 }, { "epoch": 0.058293731613487214, "grad_norm": 0.6968847961889845, "learning_rate": 9.979098342953198e-06, "loss": 0.4614, "step": 1288 }, { "epoch": 0.058338990721882776, "grad_norm": 1.0272658772877736, "learning_rate": 9.979031343684344e-06, "loss": 0.4614, "step": 1289 }, { "epoch": 0.058384249830278345, "grad_norm": 0.5435257113968114, "learning_rate": 9.978964237431396e-06, "loss": 0.5455, "step": 1290 }, { "epoch": 0.05842950893867391, "grad_norm": 0.8353831810505, "learning_rate": 9.978897024195801e-06, "loss": 0.4309, "step": 1291 }, { "epoch": 0.05847476804706947, "grad_norm": 0.7290873281764264, "learning_rate": 9.978829703978999e-06, "loss": 0.4579, "step": 1292 }, { "epoch": 0.05852002715546504, "grad_norm": 0.3411679847978857, "learning_rate": 9.978762276782438e-06, "loss": 0.5133, "step": 1293 }, { "epoch": 0.0585652862638606, "grad_norm": 0.3718275823828351, "learning_rate": 9.978694742607566e-06, "loss": 0.5583, "step": 1294 }, { "epoch": 0.05861054537225616, "grad_norm": 0.9738128977028798, "learning_rate": 9.978627101455836e-06, "loss": 0.4861, "step": 1295 }, { "epoch": 0.05865580448065173, "grad_norm": 0.9236918922061743, "learning_rate": 9.9785593533287e-06, "loss": 0.5127, "step": 1296 }, { "epoch": 0.058701063589047295, "grad_norm": 0.6878931249632689, "learning_rate": 9.978491498227615e-06, "loss": 0.4449, "step": 1297 }, { "epoch": 0.05874632269744286, "grad_norm": 0.7557270130278592, "learning_rate": 9.978423536154036e-06, "loss": 0.4625, "step": 1298 }, { "epoch": 0.058791581805838426, "grad_norm": 1.0642306750672659, "learning_rate": 9.978355467109427e-06, "loss": 0.4336, "step": 1299 }, { "epoch": 0.05883684091423399, "grad_norm": 0.5117178183518919, "learning_rate": 9.978287291095248e-06, "loss": 0.5213, "step": 1300 }, { "epoch": 0.05888210002262955, "grad_norm": 0.794491939155005, "learning_rate": 9.978219008112965e-06, "loss": 0.4541, "step": 1301 }, { "epoch": 0.05892735913102512, "grad_norm": 0.8099921246940341, "learning_rate": 9.978150618164044e-06, "loss": 0.4702, "step": 1302 }, { "epoch": 0.05897261823942068, "grad_norm": 0.7695831619969989, "learning_rate": 9.978082121249957e-06, "loss": 0.4413, "step": 1303 }, { "epoch": 0.05901787734781625, "grad_norm": 0.8738306669138687, "learning_rate": 9.978013517372173e-06, "loss": 0.4808, "step": 1304 }, { "epoch": 0.059063136456211814, "grad_norm": 0.8250768119015344, "learning_rate": 9.977944806532169e-06, "loss": 0.5147, "step": 1305 }, { "epoch": 0.059108395564607376, "grad_norm": 0.49320793021794157, "learning_rate": 9.977875988731418e-06, "loss": 0.5311, "step": 1306 }, { "epoch": 0.059153654673002945, "grad_norm": 0.6911527640318451, "learning_rate": 9.977807063971401e-06, "loss": 0.411, "step": 1307 }, { "epoch": 0.05919891378139851, "grad_norm": 0.7413756021870055, "learning_rate": 9.977738032253598e-06, "loss": 0.4715, "step": 1308 }, { "epoch": 0.05924417288979407, "grad_norm": 0.8133220610132956, "learning_rate": 9.977668893579493e-06, "loss": 0.4609, "step": 1309 }, { "epoch": 0.05928943199818964, "grad_norm": 0.8196571698370698, "learning_rate": 9.977599647950572e-06, "loss": 0.4714, "step": 1310 }, { "epoch": 0.0593346911065852, "grad_norm": 0.8644105862836049, "learning_rate": 9.977530295368321e-06, "loss": 0.4819, "step": 1311 }, { "epoch": 0.05937995021498076, "grad_norm": 1.0647079301335312, "learning_rate": 9.977460835834231e-06, "loss": 0.4529, "step": 1312 }, { "epoch": 0.05942520932337633, "grad_norm": 0.8206674886176298, "learning_rate": 9.977391269349795e-06, "loss": 0.458, "step": 1313 }, { "epoch": 0.059470468431771895, "grad_norm": 0.5318759671503385, "learning_rate": 9.977321595916507e-06, "loss": 0.5106, "step": 1314 }, { "epoch": 0.05951572754016746, "grad_norm": 0.8727685781150271, "learning_rate": 9.977251815535867e-06, "loss": 0.4952, "step": 1315 }, { "epoch": 0.059560986648563026, "grad_norm": 0.744440840375188, "learning_rate": 9.97718192820937e-06, "loss": 0.4188, "step": 1316 }, { "epoch": 0.05960624575695859, "grad_norm": 1.0437273788657893, "learning_rate": 9.977111933938519e-06, "loss": 0.4429, "step": 1317 }, { "epoch": 0.05965150486535415, "grad_norm": 0.46107440696628565, "learning_rate": 9.97704183272482e-06, "loss": 0.525, "step": 1318 }, { "epoch": 0.05969676397374972, "grad_norm": 0.9797169729349386, "learning_rate": 9.976971624569776e-06, "loss": 0.4579, "step": 1319 }, { "epoch": 0.05974202308214528, "grad_norm": 0.8694097288243677, "learning_rate": 9.9769013094749e-06, "loss": 0.4914, "step": 1320 }, { "epoch": 0.059787282190540844, "grad_norm": 0.8524501643852321, "learning_rate": 9.976830887441699e-06, "loss": 0.4999, "step": 1321 }, { "epoch": 0.05983254129893641, "grad_norm": 0.8575124599427206, "learning_rate": 9.976760358471687e-06, "loss": 0.4622, "step": 1322 }, { "epoch": 0.059877800407331976, "grad_norm": 0.786340660897482, "learning_rate": 9.976689722566379e-06, "loss": 0.4204, "step": 1323 }, { "epoch": 0.05992305951572754, "grad_norm": 0.4433058625534793, "learning_rate": 9.976618979727295e-06, "loss": 0.5612, "step": 1324 }, { "epoch": 0.05996831862412311, "grad_norm": 0.8653521916140352, "learning_rate": 9.976548129955953e-06, "loss": 0.4716, "step": 1325 }, { "epoch": 0.06001357773251867, "grad_norm": 0.796213598131806, "learning_rate": 9.976477173253878e-06, "loss": 0.4307, "step": 1326 }, { "epoch": 0.06005883684091423, "grad_norm": 0.8431570163012182, "learning_rate": 9.97640610962259e-06, "loss": 0.4347, "step": 1327 }, { "epoch": 0.0601040959493098, "grad_norm": 0.7192342906246789, "learning_rate": 9.97633493906362e-06, "loss": 0.4251, "step": 1328 }, { "epoch": 0.06014935505770536, "grad_norm": 0.3889310353578457, "learning_rate": 9.976263661578495e-06, "loss": 0.5414, "step": 1329 }, { "epoch": 0.060194614166100925, "grad_norm": 0.8408450911647659, "learning_rate": 9.976192277168748e-06, "loss": 0.455, "step": 1330 }, { "epoch": 0.060239873274496494, "grad_norm": 0.7791459262815543, "learning_rate": 9.976120785835912e-06, "loss": 0.4486, "step": 1331 }, { "epoch": 0.06028513238289206, "grad_norm": 1.5368903877653917, "learning_rate": 9.976049187581523e-06, "loss": 0.4438, "step": 1332 }, { "epoch": 0.06033039149128762, "grad_norm": 0.8423946896063987, "learning_rate": 9.97597748240712e-06, "loss": 0.4697, "step": 1333 }, { "epoch": 0.06037565059968319, "grad_norm": 0.7581531607654294, "learning_rate": 9.975905670314243e-06, "loss": 0.4845, "step": 1334 }, { "epoch": 0.06042090970807875, "grad_norm": 0.8046542695421153, "learning_rate": 9.975833751304435e-06, "loss": 0.44, "step": 1335 }, { "epoch": 0.06046616881647431, "grad_norm": 0.4528822879915298, "learning_rate": 9.975761725379243e-06, "loss": 0.5409, "step": 1336 }, { "epoch": 0.06051142792486988, "grad_norm": 0.850037337299667, "learning_rate": 9.975689592540214e-06, "loss": 0.452, "step": 1337 }, { "epoch": 0.060556687033265444, "grad_norm": 0.99034556951951, "learning_rate": 9.975617352788897e-06, "loss": 0.4389, "step": 1338 }, { "epoch": 0.060601946141661006, "grad_norm": 1.0271152953869538, "learning_rate": 9.975545006126843e-06, "loss": 0.5071, "step": 1339 }, { "epoch": 0.060647205250056575, "grad_norm": 1.1420043625492036, "learning_rate": 9.975472552555609e-06, "loss": 0.4876, "step": 1340 }, { "epoch": 0.06069246435845214, "grad_norm": 0.6971624850612915, "learning_rate": 9.975399992076752e-06, "loss": 0.4107, "step": 1341 }, { "epoch": 0.0607377234668477, "grad_norm": 0.9569774396437504, "learning_rate": 9.975327324691828e-06, "loss": 0.4331, "step": 1342 }, { "epoch": 0.06078298257524327, "grad_norm": 0.7914603074563847, "learning_rate": 9.9752545504024e-06, "loss": 0.5065, "step": 1343 }, { "epoch": 0.06082824168363883, "grad_norm": 0.9088018964302117, "learning_rate": 9.975181669210034e-06, "loss": 0.4993, "step": 1344 }, { "epoch": 0.06087350079203439, "grad_norm": 1.1608321903492493, "learning_rate": 9.975108681116293e-06, "loss": 0.5226, "step": 1345 }, { "epoch": 0.06091875990042996, "grad_norm": 0.7616726835673043, "learning_rate": 9.975035586122746e-06, "loss": 0.4445, "step": 1346 }, { "epoch": 0.060964019008825525, "grad_norm": 0.4804652401570814, "learning_rate": 9.974962384230965e-06, "loss": 0.5431, "step": 1347 }, { "epoch": 0.061009278117221094, "grad_norm": 0.4173257563644276, "learning_rate": 9.97488907544252e-06, "loss": 0.5413, "step": 1348 }, { "epoch": 0.061054537225616656, "grad_norm": 0.962069270717757, "learning_rate": 9.97481565975899e-06, "loss": 0.4772, "step": 1349 }, { "epoch": 0.06109979633401222, "grad_norm": 0.36001383419618155, "learning_rate": 9.97474213718195e-06, "loss": 0.5404, "step": 1350 }, { "epoch": 0.06114505544240779, "grad_norm": 0.7685197910763001, "learning_rate": 9.974668507712979e-06, "loss": 0.4357, "step": 1351 }, { "epoch": 0.06119031455080335, "grad_norm": 0.7682770127489988, "learning_rate": 9.974594771353662e-06, "loss": 0.4166, "step": 1352 }, { "epoch": 0.06123557365919891, "grad_norm": 0.8399578495600116, "learning_rate": 9.97452092810558e-06, "loss": 0.4639, "step": 1353 }, { "epoch": 0.06128083276759448, "grad_norm": 0.7164554717054931, "learning_rate": 9.974446977970322e-06, "loss": 0.489, "step": 1354 }, { "epoch": 0.061326091875990044, "grad_norm": 0.8019348898901124, "learning_rate": 9.974372920949478e-06, "loss": 0.4225, "step": 1355 }, { "epoch": 0.061371350984385606, "grad_norm": 0.7457433655397221, "learning_rate": 9.974298757044636e-06, "loss": 0.4416, "step": 1356 }, { "epoch": 0.061416610092781175, "grad_norm": 0.8093746951320654, "learning_rate": 9.97422448625739e-06, "loss": 0.4496, "step": 1357 }, { "epoch": 0.06146186920117674, "grad_norm": 0.7702467986450054, "learning_rate": 9.974150108589338e-06, "loss": 0.4122, "step": 1358 }, { "epoch": 0.0615071283095723, "grad_norm": 0.74345550836823, "learning_rate": 9.974075624042076e-06, "loss": 0.5027, "step": 1359 }, { "epoch": 0.06155238741796787, "grad_norm": 0.7484354102674841, "learning_rate": 9.974001032617208e-06, "loss": 0.4435, "step": 1360 }, { "epoch": 0.06159764652636343, "grad_norm": 0.749996191394422, "learning_rate": 9.973926334316332e-06, "loss": 0.5201, "step": 1361 }, { "epoch": 0.06164290563475899, "grad_norm": 0.8440489977478883, "learning_rate": 9.973851529141056e-06, "loss": 0.4307, "step": 1362 }, { "epoch": 0.06168816474315456, "grad_norm": 0.8720090740465758, "learning_rate": 9.973776617092988e-06, "loss": 0.4501, "step": 1363 }, { "epoch": 0.061733423851550125, "grad_norm": 0.7660819683254514, "learning_rate": 9.973701598173736e-06, "loss": 0.5035, "step": 1364 }, { "epoch": 0.06177868295994569, "grad_norm": 0.8494876594535249, "learning_rate": 9.973626472384911e-06, "loss": 0.4673, "step": 1365 }, { "epoch": 0.061823942068341256, "grad_norm": 0.48390760598129084, "learning_rate": 9.973551239728129e-06, "loss": 0.5281, "step": 1366 }, { "epoch": 0.06186920117673682, "grad_norm": 0.4369140872787393, "learning_rate": 9.973475900205005e-06, "loss": 0.5271, "step": 1367 }, { "epoch": 0.06191446028513238, "grad_norm": 0.7571630206601964, "learning_rate": 9.97340045381716e-06, "loss": 0.457, "step": 1368 }, { "epoch": 0.06195971939352795, "grad_norm": 0.7646121483191878, "learning_rate": 9.973324900566214e-06, "loss": 0.4085, "step": 1369 }, { "epoch": 0.06200497850192351, "grad_norm": 1.3464464658815254, "learning_rate": 9.973249240453789e-06, "loss": 0.4159, "step": 1370 }, { "epoch": 0.062050237610319074, "grad_norm": 0.903357108813622, "learning_rate": 9.973173473481513e-06, "loss": 0.4295, "step": 1371 }, { "epoch": 0.06209549671871464, "grad_norm": 0.7900153899952508, "learning_rate": 9.973097599651013e-06, "loss": 0.4428, "step": 1372 }, { "epoch": 0.062140755827110206, "grad_norm": 0.9107180209952301, "learning_rate": 9.973021618963919e-06, "loss": 0.4394, "step": 1373 }, { "epoch": 0.06218601493550577, "grad_norm": 0.7416484939825573, "learning_rate": 9.972945531421863e-06, "loss": 0.4603, "step": 1374 }, { "epoch": 0.06223127404390134, "grad_norm": 1.465734093495972, "learning_rate": 9.972869337026482e-06, "loss": 0.4159, "step": 1375 }, { "epoch": 0.0622765331522969, "grad_norm": 0.780362708639645, "learning_rate": 9.972793035779412e-06, "loss": 0.4091, "step": 1376 }, { "epoch": 0.06232179226069246, "grad_norm": 0.808375725476795, "learning_rate": 9.972716627682292e-06, "loss": 0.494, "step": 1377 }, { "epoch": 0.06236705136908803, "grad_norm": 0.8677968454058532, "learning_rate": 9.972640112736764e-06, "loss": 0.5243, "step": 1378 }, { "epoch": 0.06241231047748359, "grad_norm": 0.7190882477891677, "learning_rate": 9.972563490944474e-06, "loss": 0.4548, "step": 1379 }, { "epoch": 0.062457569585879155, "grad_norm": 0.8145344529184204, "learning_rate": 9.972486762307064e-06, "loss": 0.4349, "step": 1380 }, { "epoch": 0.06250282869427472, "grad_norm": 0.82848896121603, "learning_rate": 9.972409926826188e-06, "loss": 0.419, "step": 1381 }, { "epoch": 0.0625480878026703, "grad_norm": 0.8915242371627252, "learning_rate": 9.972332984503493e-06, "loss": 0.5263, "step": 1382 }, { "epoch": 0.06259334691106586, "grad_norm": 0.8686588179710028, "learning_rate": 9.972255935340631e-06, "loss": 0.4617, "step": 1383 }, { "epoch": 0.06263860601946142, "grad_norm": 0.7461021561719915, "learning_rate": 9.972178779339264e-06, "loss": 0.4577, "step": 1384 }, { "epoch": 0.06268386512785698, "grad_norm": 0.767860526526951, "learning_rate": 9.972101516501043e-06, "loss": 0.468, "step": 1385 }, { "epoch": 0.06272912423625254, "grad_norm": 0.7755025230348713, "learning_rate": 9.972024146827633e-06, "loss": 0.4581, "step": 1386 }, { "epoch": 0.0627743833446481, "grad_norm": 0.7136222251907728, "learning_rate": 9.971946670320693e-06, "loss": 0.4258, "step": 1387 }, { "epoch": 0.06281964245304368, "grad_norm": 0.7962994471669949, "learning_rate": 9.971869086981892e-06, "loss": 0.418, "step": 1388 }, { "epoch": 0.06286490156143924, "grad_norm": 0.9369097066828213, "learning_rate": 9.971791396812891e-06, "loss": 0.5509, "step": 1389 }, { "epoch": 0.0629101606698348, "grad_norm": 0.85928628848663, "learning_rate": 9.971713599815364e-06, "loss": 0.4512, "step": 1390 }, { "epoch": 0.06295541977823037, "grad_norm": 0.4760552251345332, "learning_rate": 9.971635695990981e-06, "loss": 0.5308, "step": 1391 }, { "epoch": 0.06300067888662593, "grad_norm": 0.8016075510443832, "learning_rate": 9.971557685341415e-06, "loss": 0.5004, "step": 1392 }, { "epoch": 0.06304593799502149, "grad_norm": 0.8390371784654099, "learning_rate": 9.971479567868345e-06, "loss": 0.447, "step": 1393 }, { "epoch": 0.06309119710341707, "grad_norm": 0.8424654439840261, "learning_rate": 9.971401343573448e-06, "loss": 0.464, "step": 1394 }, { "epoch": 0.06313645621181263, "grad_norm": 0.7607383258531611, "learning_rate": 9.971323012458403e-06, "loss": 0.4349, "step": 1395 }, { "epoch": 0.06318171532020819, "grad_norm": 0.8127223782573194, "learning_rate": 9.971244574524897e-06, "loss": 0.4361, "step": 1396 }, { "epoch": 0.06322697442860375, "grad_norm": 1.0790888839572594, "learning_rate": 9.97116602977461e-06, "loss": 0.5568, "step": 1397 }, { "epoch": 0.06327223353699932, "grad_norm": 1.2668559111678692, "learning_rate": 9.971087378209235e-06, "loss": 0.4803, "step": 1398 }, { "epoch": 0.06331749264539488, "grad_norm": 0.755449835673747, "learning_rate": 9.97100861983046e-06, "loss": 0.4364, "step": 1399 }, { "epoch": 0.06336275175379046, "grad_norm": 0.8453741315660476, "learning_rate": 9.970929754639976e-06, "loss": 0.4613, "step": 1400 }, { "epoch": 0.06340801086218602, "grad_norm": 0.8970543727300306, "learning_rate": 9.970850782639478e-06, "loss": 0.4654, "step": 1401 }, { "epoch": 0.06345326997058158, "grad_norm": 0.9167840194035326, "learning_rate": 9.970771703830666e-06, "loss": 0.3953, "step": 1402 }, { "epoch": 0.06349852907897714, "grad_norm": 0.5676117809549915, "learning_rate": 9.970692518215236e-06, "loss": 0.5232, "step": 1403 }, { "epoch": 0.0635437881873727, "grad_norm": 0.8284466275722184, "learning_rate": 9.970613225794887e-06, "loss": 0.4419, "step": 1404 }, { "epoch": 0.06358904729576827, "grad_norm": 0.916990534856046, "learning_rate": 9.970533826571329e-06, "loss": 0.4603, "step": 1405 }, { "epoch": 0.06363430640416384, "grad_norm": 0.5007907202491658, "learning_rate": 9.970454320546264e-06, "loss": 0.512, "step": 1406 }, { "epoch": 0.0636795655125594, "grad_norm": 0.8791752963483216, "learning_rate": 9.9703747077214e-06, "loss": 0.4818, "step": 1407 }, { "epoch": 0.06372482462095497, "grad_norm": 0.8003276708620475, "learning_rate": 9.970294988098452e-06, "loss": 0.4426, "step": 1408 }, { "epoch": 0.06377008372935053, "grad_norm": 0.45111031550043196, "learning_rate": 9.970215161679126e-06, "loss": 0.5424, "step": 1409 }, { "epoch": 0.06381534283774609, "grad_norm": 0.7672189227879728, "learning_rate": 9.970135228465144e-06, "loss": 0.4591, "step": 1410 }, { "epoch": 0.06386060194614165, "grad_norm": 0.9335458891124578, "learning_rate": 9.970055188458219e-06, "loss": 0.4507, "step": 1411 }, { "epoch": 0.06390586105453723, "grad_norm": 0.7114136093026613, "learning_rate": 9.969975041660073e-06, "loss": 0.4377, "step": 1412 }, { "epoch": 0.06395112016293279, "grad_norm": 0.8747199646442678, "learning_rate": 9.969894788072427e-06, "loss": 0.3875, "step": 1413 }, { "epoch": 0.06399637927132835, "grad_norm": 0.829182408703576, "learning_rate": 9.969814427697007e-06, "loss": 0.4383, "step": 1414 }, { "epoch": 0.06404163837972392, "grad_norm": 0.7319960516188317, "learning_rate": 9.969733960535537e-06, "loss": 0.4635, "step": 1415 }, { "epoch": 0.06408689748811948, "grad_norm": 0.9267499056159455, "learning_rate": 9.969653386589749e-06, "loss": 0.5478, "step": 1416 }, { "epoch": 0.06413215659651506, "grad_norm": 0.4920810844031071, "learning_rate": 9.969572705861371e-06, "loss": 0.5244, "step": 1417 }, { "epoch": 0.06417741570491062, "grad_norm": 0.8843609262456265, "learning_rate": 9.96949191835214e-06, "loss": 0.4943, "step": 1418 }, { "epoch": 0.06422267481330618, "grad_norm": 0.7939118677093964, "learning_rate": 9.96941102406379e-06, "loss": 0.4671, "step": 1419 }, { "epoch": 0.06426793392170174, "grad_norm": 0.8074312906259381, "learning_rate": 9.969330022998057e-06, "loss": 0.4537, "step": 1420 }, { "epoch": 0.0643131930300973, "grad_norm": 0.7941660570380797, "learning_rate": 9.969248915156689e-06, "loss": 0.4675, "step": 1421 }, { "epoch": 0.06435845213849287, "grad_norm": 0.739298603734896, "learning_rate": 9.96916770054142e-06, "loss": 0.4102, "step": 1422 }, { "epoch": 0.06440371124688844, "grad_norm": 0.7447203329144491, "learning_rate": 9.969086379154e-06, "loss": 0.4473, "step": 1423 }, { "epoch": 0.064448970355284, "grad_norm": 1.1690352979094978, "learning_rate": 9.969004950996175e-06, "loss": 0.5181, "step": 1424 }, { "epoch": 0.06449422946367957, "grad_norm": 0.6911976096981919, "learning_rate": 9.968923416069694e-06, "loss": 0.4239, "step": 1425 }, { "epoch": 0.06453948857207513, "grad_norm": 0.5637242347474052, "learning_rate": 9.96884177437631e-06, "loss": 0.514, "step": 1426 }, { "epoch": 0.06458474768047069, "grad_norm": 0.7441386514999966, "learning_rate": 9.968760025917777e-06, "loss": 0.467, "step": 1427 }, { "epoch": 0.06463000678886625, "grad_norm": 0.784708982521003, "learning_rate": 9.968678170695851e-06, "loss": 0.4294, "step": 1428 }, { "epoch": 0.06467526589726183, "grad_norm": 0.6990928541859651, "learning_rate": 9.968596208712293e-06, "loss": 0.4495, "step": 1429 }, { "epoch": 0.06472052500565739, "grad_norm": 1.0658537709299807, "learning_rate": 9.968514139968862e-06, "loss": 0.4917, "step": 1430 }, { "epoch": 0.06476578411405295, "grad_norm": 0.7858687905006959, "learning_rate": 9.96843196446732e-06, "loss": 0.4447, "step": 1431 }, { "epoch": 0.06481104322244852, "grad_norm": 0.8138918788039744, "learning_rate": 9.968349682209434e-06, "loss": 0.4743, "step": 1432 }, { "epoch": 0.06485630233084408, "grad_norm": 0.7115302776794071, "learning_rate": 9.968267293196976e-06, "loss": 0.4507, "step": 1433 }, { "epoch": 0.06490156143923964, "grad_norm": 1.566811431079173, "learning_rate": 9.96818479743171e-06, "loss": 0.5437, "step": 1434 }, { "epoch": 0.06494682054763522, "grad_norm": 0.7712599008875365, "learning_rate": 9.968102194915411e-06, "loss": 0.4627, "step": 1435 }, { "epoch": 0.06499207965603078, "grad_norm": 0.8098764007806255, "learning_rate": 9.968019485649856e-06, "loss": 0.4519, "step": 1436 }, { "epoch": 0.06503733876442634, "grad_norm": 0.730530054382985, "learning_rate": 9.967936669636818e-06, "loss": 0.4679, "step": 1437 }, { "epoch": 0.0650825978728219, "grad_norm": 0.7298010549478803, "learning_rate": 9.96785374687808e-06, "loss": 0.3934, "step": 1438 }, { "epoch": 0.06512785698121747, "grad_norm": 0.7719987609607117, "learning_rate": 9.967770717375423e-06, "loss": 0.4488, "step": 1439 }, { "epoch": 0.06517311608961303, "grad_norm": 0.7576083931078323, "learning_rate": 9.967687581130632e-06, "loss": 0.4493, "step": 1440 }, { "epoch": 0.0652183751980086, "grad_norm": 0.7456916900585282, "learning_rate": 9.967604338145488e-06, "loss": 0.434, "step": 1441 }, { "epoch": 0.06526363430640417, "grad_norm": 0.7609509382040559, "learning_rate": 9.967520988421788e-06, "loss": 0.4175, "step": 1442 }, { "epoch": 0.06530889341479973, "grad_norm": 0.8318454322580573, "learning_rate": 9.967437531961316e-06, "loss": 0.465, "step": 1443 }, { "epoch": 0.06535415252319529, "grad_norm": 0.7591737815983305, "learning_rate": 9.967353968765868e-06, "loss": 0.4394, "step": 1444 }, { "epoch": 0.06539941163159085, "grad_norm": 0.7765098393214346, "learning_rate": 9.967270298837239e-06, "loss": 0.47, "step": 1445 }, { "epoch": 0.06544467073998642, "grad_norm": 0.7409110390658145, "learning_rate": 9.967186522177228e-06, "loss": 0.3877, "step": 1446 }, { "epoch": 0.06548992984838199, "grad_norm": 0.9312488082329916, "learning_rate": 9.967102638787634e-06, "loss": 0.4224, "step": 1447 }, { "epoch": 0.06553518895677755, "grad_norm": 0.749522516130679, "learning_rate": 9.96701864867026e-06, "loss": 0.4626, "step": 1448 }, { "epoch": 0.06558044806517312, "grad_norm": 0.8037830099599014, "learning_rate": 9.96693455182691e-06, "loss": 0.5151, "step": 1449 }, { "epoch": 0.06562570717356868, "grad_norm": 0.8896635354781706, "learning_rate": 9.96685034825939e-06, "loss": 0.5014, "step": 1450 }, { "epoch": 0.06567096628196424, "grad_norm": 0.9075654261092597, "learning_rate": 9.966766037969512e-06, "loss": 0.48, "step": 1451 }, { "epoch": 0.0657162253903598, "grad_norm": 0.779067559983051, "learning_rate": 9.966681620959085e-06, "loss": 0.4136, "step": 1452 }, { "epoch": 0.06576148449875538, "grad_norm": 0.8202536297853454, "learning_rate": 9.966597097229925e-06, "loss": 0.4761, "step": 1453 }, { "epoch": 0.06580674360715094, "grad_norm": 1.5381184164373207, "learning_rate": 9.966512466783846e-06, "loss": 0.468, "step": 1454 }, { "epoch": 0.0658520027155465, "grad_norm": 0.7781534864045977, "learning_rate": 9.966427729622668e-06, "loss": 0.4855, "step": 1455 }, { "epoch": 0.06589726182394207, "grad_norm": 0.7798522596941327, "learning_rate": 9.966342885748212e-06, "loss": 0.4623, "step": 1456 }, { "epoch": 0.06594252093233763, "grad_norm": 0.7384237234710249, "learning_rate": 9.9662579351623e-06, "loss": 0.483, "step": 1457 }, { "epoch": 0.0659877800407332, "grad_norm": 0.7826289098351965, "learning_rate": 9.966172877866757e-06, "loss": 0.4736, "step": 1458 }, { "epoch": 0.06603303914912877, "grad_norm": 1.0297726700330934, "learning_rate": 9.966087713863412e-06, "loss": 0.4721, "step": 1459 }, { "epoch": 0.06607829825752433, "grad_norm": 0.8561847199763449, "learning_rate": 9.966002443154095e-06, "loss": 0.4884, "step": 1460 }, { "epoch": 0.06612355736591989, "grad_norm": 2.3350403237905915, "learning_rate": 9.965917065740636e-06, "loss": 0.5438, "step": 1461 }, { "epoch": 0.06616881647431545, "grad_norm": 0.8562932774188359, "learning_rate": 9.965831581624872e-06, "loss": 0.473, "step": 1462 }, { "epoch": 0.06621407558271102, "grad_norm": 0.6826153820493718, "learning_rate": 9.965745990808638e-06, "loss": 0.547, "step": 1463 }, { "epoch": 0.06625933469110659, "grad_norm": 0.7976139884418457, "learning_rate": 9.965660293293773e-06, "loss": 0.4536, "step": 1464 }, { "epoch": 0.06630459379950215, "grad_norm": 0.9966975784442647, "learning_rate": 9.96557448908212e-06, "loss": 0.5063, "step": 1465 }, { "epoch": 0.06634985290789772, "grad_norm": 0.8040762001174595, "learning_rate": 9.965488578175522e-06, "loss": 0.4333, "step": 1466 }, { "epoch": 0.06639511201629328, "grad_norm": 0.8261180768400028, "learning_rate": 9.965402560575825e-06, "loss": 0.4308, "step": 1467 }, { "epoch": 0.06644037112468884, "grad_norm": 0.9011136586096863, "learning_rate": 9.965316436284877e-06, "loss": 0.4597, "step": 1468 }, { "epoch": 0.0664856302330844, "grad_norm": 1.111941679470362, "learning_rate": 9.965230205304528e-06, "loss": 0.4384, "step": 1469 }, { "epoch": 0.06653088934147998, "grad_norm": 0.7204158964166693, "learning_rate": 9.96514386763663e-06, "loss": 0.4624, "step": 1470 }, { "epoch": 0.06657614844987554, "grad_norm": 0.9149661678745, "learning_rate": 9.965057423283043e-06, "loss": 0.4667, "step": 1471 }, { "epoch": 0.0666214075582711, "grad_norm": 0.7932039742728114, "learning_rate": 9.964970872245618e-06, "loss": 0.4311, "step": 1472 }, { "epoch": 0.06666666666666667, "grad_norm": 1.89014538464223, "learning_rate": 9.96488421452622e-06, "loss": 0.5952, "step": 1473 }, { "epoch": 0.06671192577506223, "grad_norm": 0.8032671819143476, "learning_rate": 9.964797450126708e-06, "loss": 0.443, "step": 1474 }, { "epoch": 0.06675718488345779, "grad_norm": 0.7481664383546026, "learning_rate": 9.964710579048947e-06, "loss": 0.4828, "step": 1475 }, { "epoch": 0.06680244399185337, "grad_norm": 0.7386169349436833, "learning_rate": 9.964623601294802e-06, "loss": 0.4415, "step": 1476 }, { "epoch": 0.06684770310024893, "grad_norm": 0.779597142522534, "learning_rate": 9.964536516866146e-06, "loss": 0.4436, "step": 1477 }, { "epoch": 0.06689296220864449, "grad_norm": 0.6576504922840656, "learning_rate": 9.964449325764846e-06, "loss": 0.5342, "step": 1478 }, { "epoch": 0.06693822131704005, "grad_norm": 0.8003162853179788, "learning_rate": 9.964362027992777e-06, "loss": 0.4446, "step": 1479 }, { "epoch": 0.06698348042543562, "grad_norm": 0.7921636322644379, "learning_rate": 9.964274623551814e-06, "loss": 0.4222, "step": 1480 }, { "epoch": 0.06702873953383118, "grad_norm": 0.8123469531045502, "learning_rate": 9.964187112443839e-06, "loss": 0.4191, "step": 1481 }, { "epoch": 0.06707399864222675, "grad_norm": 0.820058490946606, "learning_rate": 9.964099494670727e-06, "loss": 0.459, "step": 1482 }, { "epoch": 0.06711925775062232, "grad_norm": 0.7893140272997659, "learning_rate": 9.964011770234364e-06, "loss": 0.4884, "step": 1483 }, { "epoch": 0.06716451685901788, "grad_norm": 1.0204350131333586, "learning_rate": 9.963923939136632e-06, "loss": 0.5305, "step": 1484 }, { "epoch": 0.06720977596741344, "grad_norm": 1.1363273251767987, "learning_rate": 9.963836001379423e-06, "loss": 0.47, "step": 1485 }, { "epoch": 0.067255035075809, "grad_norm": 0.8064235598914097, "learning_rate": 9.963747956964623e-06, "loss": 0.4702, "step": 1486 }, { "epoch": 0.06730029418420456, "grad_norm": 0.728234755597837, "learning_rate": 9.963659805894123e-06, "loss": 0.4592, "step": 1487 }, { "epoch": 0.06734555329260014, "grad_norm": 0.8039241658387191, "learning_rate": 9.96357154816982e-06, "loss": 0.4776, "step": 1488 }, { "epoch": 0.0673908124009957, "grad_norm": 0.7661464342835728, "learning_rate": 9.963483183793606e-06, "loss": 0.4504, "step": 1489 }, { "epoch": 0.06743607150939127, "grad_norm": 0.737037447449985, "learning_rate": 9.963394712767385e-06, "loss": 0.4531, "step": 1490 }, { "epoch": 0.06748133061778683, "grad_norm": 0.5540904667559855, "learning_rate": 9.963306135093054e-06, "loss": 0.5121, "step": 1491 }, { "epoch": 0.06752658972618239, "grad_norm": 0.8491171776657304, "learning_rate": 9.96321745077252e-06, "loss": 0.4768, "step": 1492 }, { "epoch": 0.06757184883457795, "grad_norm": 0.43270575186531257, "learning_rate": 9.963128659807684e-06, "loss": 0.5299, "step": 1493 }, { "epoch": 0.06761710794297353, "grad_norm": 0.8745420939942341, "learning_rate": 9.963039762200457e-06, "loss": 0.4687, "step": 1494 }, { "epoch": 0.06766236705136909, "grad_norm": 0.8373452601614464, "learning_rate": 9.96295075795275e-06, "loss": 0.459, "step": 1495 }, { "epoch": 0.06770762615976465, "grad_norm": 0.7698332952658042, "learning_rate": 9.962861647066472e-06, "loss": 0.5082, "step": 1496 }, { "epoch": 0.06775288526816022, "grad_norm": 0.8100270250771073, "learning_rate": 9.962772429543539e-06, "loss": 0.456, "step": 1497 }, { "epoch": 0.06779814437655578, "grad_norm": 0.7843116494944076, "learning_rate": 9.96268310538587e-06, "loss": 0.4587, "step": 1498 }, { "epoch": 0.06784340348495134, "grad_norm": 0.7879384228895121, "learning_rate": 9.962593674595382e-06, "loss": 0.438, "step": 1499 }, { "epoch": 0.06788866259334692, "grad_norm": 0.7403443187198006, "learning_rate": 9.962504137173997e-06, "loss": 0.4559, "step": 1500 }, { "epoch": 0.06793392170174248, "grad_norm": 0.7619760500772016, "learning_rate": 9.96241449312364e-06, "loss": 0.4454, "step": 1501 }, { "epoch": 0.06797918081013804, "grad_norm": 0.8274554589173836, "learning_rate": 9.962324742446237e-06, "loss": 0.4684, "step": 1502 }, { "epoch": 0.0680244399185336, "grad_norm": 0.8122743257002134, "learning_rate": 9.962234885143715e-06, "loss": 0.4558, "step": 1503 }, { "epoch": 0.06806969902692916, "grad_norm": 0.8125418203275906, "learning_rate": 9.962144921218005e-06, "loss": 0.5069, "step": 1504 }, { "epoch": 0.06811495813532474, "grad_norm": 1.197255811058083, "learning_rate": 9.962054850671042e-06, "loss": 0.5401, "step": 1505 }, { "epoch": 0.0681602172437203, "grad_norm": 0.7745585414790322, "learning_rate": 9.961964673504759e-06, "loss": 0.3907, "step": 1506 }, { "epoch": 0.06820547635211587, "grad_norm": 0.493849832077196, "learning_rate": 9.961874389721095e-06, "loss": 0.5421, "step": 1507 }, { "epoch": 0.06825073546051143, "grad_norm": 0.7940022723097947, "learning_rate": 9.96178399932199e-06, "loss": 0.4773, "step": 1508 }, { "epoch": 0.06829599456890699, "grad_norm": 0.8717583485304328, "learning_rate": 9.961693502309385e-06, "loss": 0.4817, "step": 1509 }, { "epoch": 0.06834125367730255, "grad_norm": 0.8235441414406076, "learning_rate": 9.961602898685225e-06, "loss": 0.4492, "step": 1510 }, { "epoch": 0.06838651278569813, "grad_norm": 1.0502485038616896, "learning_rate": 9.961512188451458e-06, "loss": 0.5486, "step": 1511 }, { "epoch": 0.06843177189409369, "grad_norm": 0.7696161022692278, "learning_rate": 9.961421371610034e-06, "loss": 0.4708, "step": 1512 }, { "epoch": 0.06847703100248925, "grad_norm": 0.7810384269518527, "learning_rate": 9.9613304481629e-06, "loss": 0.5473, "step": 1513 }, { "epoch": 0.06852229011088481, "grad_norm": 0.5406957384016672, "learning_rate": 9.961239418112013e-06, "loss": 0.5205, "step": 1514 }, { "epoch": 0.06856754921928038, "grad_norm": 0.9623025256050911, "learning_rate": 9.961148281459328e-06, "loss": 0.4817, "step": 1515 }, { "epoch": 0.06861280832767594, "grad_norm": 0.8031406777854115, "learning_rate": 9.961057038206804e-06, "loss": 0.4505, "step": 1516 }, { "epoch": 0.06865806743607152, "grad_norm": 0.8468517938114531, "learning_rate": 9.960965688356401e-06, "loss": 0.4735, "step": 1517 }, { "epoch": 0.06870332654446708, "grad_norm": 0.7971237582102288, "learning_rate": 9.960874231910081e-06, "loss": 0.4216, "step": 1518 }, { "epoch": 0.06874858565286264, "grad_norm": 1.1521788910843045, "learning_rate": 9.960782668869811e-06, "loss": 0.5635, "step": 1519 }, { "epoch": 0.0687938447612582, "grad_norm": 0.7942431019486376, "learning_rate": 9.960690999237555e-06, "loss": 0.489, "step": 1520 }, { "epoch": 0.06883910386965376, "grad_norm": 0.7702082381966745, "learning_rate": 9.960599223015287e-06, "loss": 0.43, "step": 1521 }, { "epoch": 0.06888436297804933, "grad_norm": 0.6724548920276606, "learning_rate": 9.960507340204977e-06, "loss": 0.4509, "step": 1522 }, { "epoch": 0.0689296220864449, "grad_norm": 0.8431250938808247, "learning_rate": 9.960415350808598e-06, "loss": 0.4686, "step": 1523 }, { "epoch": 0.06897488119484046, "grad_norm": 0.7809711877829256, "learning_rate": 9.960323254828129e-06, "loss": 0.4505, "step": 1524 }, { "epoch": 0.06902014030323603, "grad_norm": 0.6213389147642807, "learning_rate": 9.960231052265548e-06, "loss": 0.5229, "step": 1525 }, { "epoch": 0.06906539941163159, "grad_norm": 0.759452138984007, "learning_rate": 9.960138743122835e-06, "loss": 0.4357, "step": 1526 }, { "epoch": 0.06911065852002715, "grad_norm": 0.7715047876751977, "learning_rate": 9.960046327401975e-06, "loss": 0.4177, "step": 1527 }, { "epoch": 0.06915591762842271, "grad_norm": 0.7671577146844714, "learning_rate": 9.959953805104953e-06, "loss": 0.4023, "step": 1528 }, { "epoch": 0.06920117673681829, "grad_norm": 0.7603786601404778, "learning_rate": 9.959861176233756e-06, "loss": 0.4729, "step": 1529 }, { "epoch": 0.06924643584521385, "grad_norm": 1.2263879084410303, "learning_rate": 9.959768440790377e-06, "loss": 0.4791, "step": 1530 }, { "epoch": 0.06929169495360941, "grad_norm": 0.8042621884641189, "learning_rate": 9.959675598776805e-06, "loss": 0.4855, "step": 1531 }, { "epoch": 0.06933695406200498, "grad_norm": 0.7607417462032463, "learning_rate": 9.95958265019504e-06, "loss": 0.4193, "step": 1532 }, { "epoch": 0.06938221317040054, "grad_norm": 0.7396140867796288, "learning_rate": 9.959489595047074e-06, "loss": 0.4299, "step": 1533 }, { "epoch": 0.0694274722787961, "grad_norm": 0.8098941502403151, "learning_rate": 9.959396433334907e-06, "loss": 0.5049, "step": 1534 }, { "epoch": 0.06947273138719168, "grad_norm": 0.6644968450292126, "learning_rate": 9.959303165060546e-06, "loss": 0.5169, "step": 1535 }, { "epoch": 0.06951799049558724, "grad_norm": 0.8017479067575274, "learning_rate": 9.959209790225987e-06, "loss": 0.4529, "step": 1536 }, { "epoch": 0.0695632496039828, "grad_norm": 1.6091086506950214, "learning_rate": 9.959116308833244e-06, "loss": 0.4178, "step": 1537 }, { "epoch": 0.06960850871237836, "grad_norm": 0.7569888031961698, "learning_rate": 9.959022720884321e-06, "loss": 0.4357, "step": 1538 }, { "epoch": 0.06965376782077393, "grad_norm": 0.34109004209053556, "learning_rate": 9.95892902638123e-06, "loss": 0.5254, "step": 1539 }, { "epoch": 0.06969902692916949, "grad_norm": 0.829481890585957, "learning_rate": 9.958835225325984e-06, "loss": 0.4158, "step": 1540 }, { "epoch": 0.06974428603756506, "grad_norm": 0.7756169671760623, "learning_rate": 9.9587413177206e-06, "loss": 0.3823, "step": 1541 }, { "epoch": 0.06978954514596063, "grad_norm": 0.44633982647223575, "learning_rate": 9.958647303567094e-06, "loss": 0.5427, "step": 1542 }, { "epoch": 0.06983480425435619, "grad_norm": 0.883531560550747, "learning_rate": 9.958553182867488e-06, "loss": 0.4481, "step": 1543 }, { "epoch": 0.06988006336275175, "grad_norm": 0.4300749273865749, "learning_rate": 9.958458955623802e-06, "loss": 0.5135, "step": 1544 }, { "epoch": 0.06992532247114731, "grad_norm": 0.7996971504896987, "learning_rate": 9.958364621838062e-06, "loss": 0.4874, "step": 1545 }, { "epoch": 0.06997058157954289, "grad_norm": 0.7474858485034108, "learning_rate": 9.958270181512295e-06, "loss": 0.4484, "step": 1546 }, { "epoch": 0.07001584068793845, "grad_norm": 0.8790061160587301, "learning_rate": 9.95817563464853e-06, "loss": 0.4148, "step": 1547 }, { "epoch": 0.07006109979633401, "grad_norm": 0.49396382028010033, "learning_rate": 9.958080981248798e-06, "loss": 0.5323, "step": 1548 }, { "epoch": 0.07010635890472958, "grad_norm": 0.7998562590968145, "learning_rate": 9.957986221315134e-06, "loss": 0.4221, "step": 1549 }, { "epoch": 0.07015161801312514, "grad_norm": 0.8086282984928738, "learning_rate": 9.957891354849573e-06, "loss": 0.4593, "step": 1550 }, { "epoch": 0.0701968771215207, "grad_norm": 0.8162934805047718, "learning_rate": 9.957796381854152e-06, "loss": 0.4597, "step": 1551 }, { "epoch": 0.07024213622991628, "grad_norm": 0.7663579788681755, "learning_rate": 9.957701302330915e-06, "loss": 0.4515, "step": 1552 }, { "epoch": 0.07028739533831184, "grad_norm": 0.7919053756584956, "learning_rate": 9.957606116281905e-06, "loss": 0.4443, "step": 1553 }, { "epoch": 0.0703326544467074, "grad_norm": 0.8133292991518659, "learning_rate": 9.957510823709165e-06, "loss": 0.4557, "step": 1554 }, { "epoch": 0.07037791355510296, "grad_norm": 0.6882397202006979, "learning_rate": 9.957415424614742e-06, "loss": 0.5269, "step": 1555 }, { "epoch": 0.07042317266349853, "grad_norm": 0.8998616260792923, "learning_rate": 9.957319919000687e-06, "loss": 0.435, "step": 1556 }, { "epoch": 0.07046843177189409, "grad_norm": 0.7549419519105975, "learning_rate": 9.957224306869053e-06, "loss": 0.497, "step": 1557 }, { "epoch": 0.07051369088028966, "grad_norm": 0.7234189199464256, "learning_rate": 9.957128588221895e-06, "loss": 0.4492, "step": 1558 }, { "epoch": 0.07055894998868523, "grad_norm": 0.7526551273534683, "learning_rate": 9.957032763061264e-06, "loss": 0.3977, "step": 1559 }, { "epoch": 0.07060420909708079, "grad_norm": 0.7661108290816389, "learning_rate": 9.956936831389228e-06, "loss": 0.479, "step": 1560 }, { "epoch": 0.07064946820547635, "grad_norm": 0.7268298686703955, "learning_rate": 9.956840793207841e-06, "loss": 0.5161, "step": 1561 }, { "epoch": 0.07069472731387191, "grad_norm": 0.8351450729731379, "learning_rate": 9.95674464851917e-06, "loss": 0.4463, "step": 1562 }, { "epoch": 0.07073998642226748, "grad_norm": 0.7466578559219901, "learning_rate": 9.95664839732528e-06, "loss": 0.4756, "step": 1563 }, { "epoch": 0.07078524553066305, "grad_norm": 0.7005965800541605, "learning_rate": 9.956552039628237e-06, "loss": 0.4096, "step": 1564 }, { "epoch": 0.07083050463905861, "grad_norm": 0.7945484153877097, "learning_rate": 9.956455575430115e-06, "loss": 0.4333, "step": 1565 }, { "epoch": 0.07087576374745418, "grad_norm": 0.7964664261929095, "learning_rate": 9.956359004732986e-06, "loss": 0.4668, "step": 1566 }, { "epoch": 0.07092102285584974, "grad_norm": 0.4334662715457474, "learning_rate": 9.956262327538924e-06, "loss": 0.5346, "step": 1567 }, { "epoch": 0.0709662819642453, "grad_norm": 0.8466745017880247, "learning_rate": 9.956165543850007e-06, "loss": 0.4911, "step": 1568 }, { "epoch": 0.07101154107264086, "grad_norm": 0.33019104388466125, "learning_rate": 9.956068653668314e-06, "loss": 0.5066, "step": 1569 }, { "epoch": 0.07105680018103644, "grad_norm": 0.3623870468116331, "learning_rate": 9.955971656995927e-06, "loss": 0.5325, "step": 1570 }, { "epoch": 0.071102059289432, "grad_norm": 0.3741449779498846, "learning_rate": 9.955874553834928e-06, "loss": 0.5381, "step": 1571 }, { "epoch": 0.07114731839782756, "grad_norm": 0.3768306568499116, "learning_rate": 9.955777344187407e-06, "loss": 0.5692, "step": 1572 }, { "epoch": 0.07119257750622313, "grad_norm": 1.085101554006371, "learning_rate": 9.955680028055453e-06, "loss": 0.4282, "step": 1573 }, { "epoch": 0.07123783661461869, "grad_norm": 0.783608464689036, "learning_rate": 9.955582605441154e-06, "loss": 0.4335, "step": 1574 }, { "epoch": 0.07128309572301425, "grad_norm": 0.9016573804866266, "learning_rate": 9.955485076346605e-06, "loss": 0.4817, "step": 1575 }, { "epoch": 0.07132835483140983, "grad_norm": 0.8464255427197236, "learning_rate": 9.955387440773902e-06, "loss": 0.4541, "step": 1576 }, { "epoch": 0.07137361393980539, "grad_norm": 0.8352333785219662, "learning_rate": 9.955289698725141e-06, "loss": 0.5068, "step": 1577 }, { "epoch": 0.07141887304820095, "grad_norm": 0.7678664991367014, "learning_rate": 9.955191850202424e-06, "loss": 0.4476, "step": 1578 }, { "epoch": 0.07146413215659651, "grad_norm": 0.7432997228864974, "learning_rate": 9.955093895207853e-06, "loss": 0.4361, "step": 1579 }, { "epoch": 0.07150939126499208, "grad_norm": 1.043347006843631, "learning_rate": 9.954995833743532e-06, "loss": 0.466, "step": 1580 }, { "epoch": 0.07155465037338764, "grad_norm": 0.7511877513268755, "learning_rate": 9.95489766581157e-06, "loss": 0.4462, "step": 1581 }, { "epoch": 0.07159990948178321, "grad_norm": 0.7138314733618735, "learning_rate": 9.954799391414073e-06, "loss": 0.5603, "step": 1582 }, { "epoch": 0.07164516859017878, "grad_norm": 1.0080615313646797, "learning_rate": 9.954701010553156e-06, "loss": 0.45, "step": 1583 }, { "epoch": 0.07169042769857434, "grad_norm": 0.44949036623651845, "learning_rate": 9.95460252323093e-06, "loss": 0.5231, "step": 1584 }, { "epoch": 0.0717356868069699, "grad_norm": 0.814469135656858, "learning_rate": 9.954503929449513e-06, "loss": 0.4641, "step": 1585 }, { "epoch": 0.07178094591536546, "grad_norm": 0.780742667966943, "learning_rate": 9.954405229211025e-06, "loss": 0.4783, "step": 1586 }, { "epoch": 0.07182620502376103, "grad_norm": 0.9876274201894842, "learning_rate": 9.954306422517583e-06, "loss": 0.4102, "step": 1587 }, { "epoch": 0.0718714641321566, "grad_norm": 0.8347915917303684, "learning_rate": 9.954207509371313e-06, "loss": 0.5033, "step": 1588 }, { "epoch": 0.07191672324055216, "grad_norm": 0.7436863290744097, "learning_rate": 9.954108489774339e-06, "loss": 0.4545, "step": 1589 }, { "epoch": 0.07196198234894773, "grad_norm": 0.7512176662693054, "learning_rate": 9.95400936372879e-06, "loss": 0.483, "step": 1590 }, { "epoch": 0.07200724145734329, "grad_norm": 0.6968344530808637, "learning_rate": 9.953910131236793e-06, "loss": 0.5386, "step": 1591 }, { "epoch": 0.07205250056573885, "grad_norm": 0.9115392018944554, "learning_rate": 9.953810792300482e-06, "loss": 0.4793, "step": 1592 }, { "epoch": 0.07209775967413443, "grad_norm": 0.7593058631745393, "learning_rate": 9.953711346921994e-06, "loss": 0.4179, "step": 1593 }, { "epoch": 0.07214301878252999, "grad_norm": 0.8363184414562198, "learning_rate": 9.953611795103462e-06, "loss": 0.4427, "step": 1594 }, { "epoch": 0.07218827789092555, "grad_norm": 0.7961544165941885, "learning_rate": 9.953512136847026e-06, "loss": 0.4654, "step": 1595 }, { "epoch": 0.07223353699932111, "grad_norm": 0.8475644647503084, "learning_rate": 9.953412372154826e-06, "loss": 0.4491, "step": 1596 }, { "epoch": 0.07227879610771668, "grad_norm": 0.7591172312405083, "learning_rate": 9.95331250102901e-06, "loss": 0.4721, "step": 1597 }, { "epoch": 0.07232405521611224, "grad_norm": 1.2064906579866868, "learning_rate": 9.95321252347172e-06, "loss": 0.4261, "step": 1598 }, { "epoch": 0.07236931432450781, "grad_norm": 0.8037664893843064, "learning_rate": 9.953112439485107e-06, "loss": 0.4595, "step": 1599 }, { "epoch": 0.07241457343290338, "grad_norm": 0.6037274223376878, "learning_rate": 9.95301224907132e-06, "loss": 0.5278, "step": 1600 }, { "epoch": 0.07245983254129894, "grad_norm": 0.8423451104175681, "learning_rate": 9.95291195223251e-06, "loss": 0.468, "step": 1601 }, { "epoch": 0.0725050916496945, "grad_norm": 0.734562033088208, "learning_rate": 9.952811548970834e-06, "loss": 0.4573, "step": 1602 }, { "epoch": 0.07255035075809006, "grad_norm": 0.7980025152030702, "learning_rate": 9.952711039288451e-06, "loss": 0.4247, "step": 1603 }, { "epoch": 0.07259560986648562, "grad_norm": 0.8268544917981663, "learning_rate": 9.952610423187516e-06, "loss": 0.4378, "step": 1604 }, { "epoch": 0.0726408689748812, "grad_norm": 0.7288491743965827, "learning_rate": 9.952509700670197e-06, "loss": 0.4295, "step": 1605 }, { "epoch": 0.07268612808327676, "grad_norm": 0.574267631988978, "learning_rate": 9.952408871738652e-06, "loss": 0.5117, "step": 1606 }, { "epoch": 0.07273138719167233, "grad_norm": 0.810368491171752, "learning_rate": 9.952307936395054e-06, "loss": 0.4359, "step": 1607 }, { "epoch": 0.07277664630006789, "grad_norm": 0.7467048796188573, "learning_rate": 9.952206894641565e-06, "loss": 0.4529, "step": 1608 }, { "epoch": 0.07282190540846345, "grad_norm": 0.7670762293890041, "learning_rate": 9.952105746480361e-06, "loss": 0.4459, "step": 1609 }, { "epoch": 0.07286716451685901, "grad_norm": 0.8677749241903157, "learning_rate": 9.952004491913613e-06, "loss": 0.5131, "step": 1610 }, { "epoch": 0.07291242362525459, "grad_norm": 0.43473157187032946, "learning_rate": 9.9519031309435e-06, "loss": 0.5079, "step": 1611 }, { "epoch": 0.07295768273365015, "grad_norm": 0.7709482615496647, "learning_rate": 9.951801663572194e-06, "loss": 0.4676, "step": 1612 }, { "epoch": 0.07300294184204571, "grad_norm": 0.7734986634642428, "learning_rate": 9.951700089801879e-06, "loss": 0.4816, "step": 1613 }, { "epoch": 0.07304820095044127, "grad_norm": 0.712129054682761, "learning_rate": 9.951598409634738e-06, "loss": 0.4398, "step": 1614 }, { "epoch": 0.07309346005883684, "grad_norm": 0.6855724174862319, "learning_rate": 9.951496623072955e-06, "loss": 0.4412, "step": 1615 }, { "epoch": 0.0731387191672324, "grad_norm": 0.7685554643700105, "learning_rate": 9.951394730118717e-06, "loss": 0.4403, "step": 1616 }, { "epoch": 0.07318397827562798, "grad_norm": 0.4755960850993418, "learning_rate": 9.951292730774213e-06, "loss": 0.5291, "step": 1617 }, { "epoch": 0.07322923738402354, "grad_norm": 0.4204314232545453, "learning_rate": 9.951190625041634e-06, "loss": 0.5251, "step": 1618 }, { "epoch": 0.0732744964924191, "grad_norm": 0.7848766554034355, "learning_rate": 9.951088412923175e-06, "loss": 0.4715, "step": 1619 }, { "epoch": 0.07331975560081466, "grad_norm": 0.7101940377324135, "learning_rate": 9.950986094421033e-06, "loss": 0.471, "step": 1620 }, { "epoch": 0.07336501470921022, "grad_norm": 0.9173872355738407, "learning_rate": 9.950883669537405e-06, "loss": 0.4493, "step": 1621 }, { "epoch": 0.07341027381760579, "grad_norm": 0.8585729042659949, "learning_rate": 9.950781138274494e-06, "loss": 0.4719, "step": 1622 }, { "epoch": 0.07345553292600136, "grad_norm": 0.7234837833909473, "learning_rate": 9.950678500634501e-06, "loss": 0.5371, "step": 1623 }, { "epoch": 0.07350079203439693, "grad_norm": 0.7175744371928864, "learning_rate": 9.95057575661963e-06, "loss": 0.4383, "step": 1624 }, { "epoch": 0.07354605114279249, "grad_norm": 0.7903698818392364, "learning_rate": 9.950472906232091e-06, "loss": 0.4679, "step": 1625 }, { "epoch": 0.07359131025118805, "grad_norm": 0.3575880075219209, "learning_rate": 9.950369949474095e-06, "loss": 0.5099, "step": 1626 }, { "epoch": 0.07363656935958361, "grad_norm": 0.785889252127557, "learning_rate": 9.950266886347852e-06, "loss": 0.4453, "step": 1627 }, { "epoch": 0.07368182846797917, "grad_norm": 0.35653778542954306, "learning_rate": 9.950163716855578e-06, "loss": 0.5208, "step": 1628 }, { "epoch": 0.07372708757637475, "grad_norm": 1.5450850497688764, "learning_rate": 9.950060440999486e-06, "loss": 0.4549, "step": 1629 }, { "epoch": 0.07377234668477031, "grad_norm": 0.7693214263174912, "learning_rate": 9.949957058781802e-06, "loss": 0.4813, "step": 1630 }, { "epoch": 0.07381760579316587, "grad_norm": 0.8123776819315764, "learning_rate": 9.949853570204742e-06, "loss": 0.4426, "step": 1631 }, { "epoch": 0.07386286490156144, "grad_norm": 0.745047866822542, "learning_rate": 9.94974997527053e-06, "loss": 0.4226, "step": 1632 }, { "epoch": 0.073908124009957, "grad_norm": 1.1293663462337062, "learning_rate": 9.949646273981394e-06, "loss": 0.473, "step": 1633 }, { "epoch": 0.07395338311835258, "grad_norm": 0.725690333793923, "learning_rate": 9.949542466339561e-06, "loss": 0.4421, "step": 1634 }, { "epoch": 0.07399864222674814, "grad_norm": 0.7562534174587207, "learning_rate": 9.949438552347262e-06, "loss": 0.4429, "step": 1635 }, { "epoch": 0.0740439013351437, "grad_norm": 0.9048310824567884, "learning_rate": 9.94933453200673e-06, "loss": 0.4806, "step": 1636 }, { "epoch": 0.07408916044353926, "grad_norm": 0.7639953887241905, "learning_rate": 9.949230405320198e-06, "loss": 0.4829, "step": 1637 }, { "epoch": 0.07413441955193482, "grad_norm": 0.7243486351207513, "learning_rate": 9.949126172289905e-06, "loss": 0.4355, "step": 1638 }, { "epoch": 0.07417967866033039, "grad_norm": 0.6082662853357984, "learning_rate": 9.949021832918092e-06, "loss": 0.5237, "step": 1639 }, { "epoch": 0.07422493776872596, "grad_norm": 0.8794722206912542, "learning_rate": 9.948917387206999e-06, "loss": 0.4821, "step": 1640 }, { "epoch": 0.07427019687712152, "grad_norm": 0.8696598828377334, "learning_rate": 9.948812835158872e-06, "loss": 0.4723, "step": 1641 }, { "epoch": 0.07431545598551709, "grad_norm": 0.7322349900873856, "learning_rate": 9.948708176775954e-06, "loss": 0.4216, "step": 1642 }, { "epoch": 0.07436071509391265, "grad_norm": 0.8653735420761837, "learning_rate": 9.948603412060498e-06, "loss": 0.4641, "step": 1643 }, { "epoch": 0.07440597420230821, "grad_norm": 0.7424619254038732, "learning_rate": 9.948498541014752e-06, "loss": 0.4344, "step": 1644 }, { "epoch": 0.07445123331070377, "grad_norm": 0.7369603541667813, "learning_rate": 9.94839356364097e-06, "loss": 0.4083, "step": 1645 }, { "epoch": 0.07449649241909935, "grad_norm": 0.7140327682794468, "learning_rate": 9.94828847994141e-06, "loss": 0.4047, "step": 1646 }, { "epoch": 0.07454175152749491, "grad_norm": 0.7216918324659736, "learning_rate": 9.948183289918327e-06, "loss": 0.4515, "step": 1647 }, { "epoch": 0.07458701063589047, "grad_norm": 0.6294579395283374, "learning_rate": 9.948077993573983e-06, "loss": 0.5288, "step": 1648 }, { "epoch": 0.07463226974428604, "grad_norm": 0.7958923321672456, "learning_rate": 9.947972590910639e-06, "loss": 0.5042, "step": 1649 }, { "epoch": 0.0746775288526816, "grad_norm": 0.8417226582025827, "learning_rate": 9.94786708193056e-06, "loss": 0.4503, "step": 1650 }, { "epoch": 0.07472278796107716, "grad_norm": 0.3480153243530504, "learning_rate": 9.947761466636014e-06, "loss": 0.5113, "step": 1651 }, { "epoch": 0.07476804706947274, "grad_norm": 0.7706541931247691, "learning_rate": 9.94765574502927e-06, "loss": 0.4482, "step": 1652 }, { "epoch": 0.0748133061778683, "grad_norm": 0.7555172102442301, "learning_rate": 9.947549917112601e-06, "loss": 0.4164, "step": 1653 }, { "epoch": 0.07485856528626386, "grad_norm": 0.4662745249690845, "learning_rate": 9.947443982888279e-06, "loss": 0.5317, "step": 1654 }, { "epoch": 0.07490382439465942, "grad_norm": 0.838637572397322, "learning_rate": 9.947337942358579e-06, "loss": 0.4558, "step": 1655 }, { "epoch": 0.07494908350305499, "grad_norm": 0.816122648381445, "learning_rate": 9.947231795525782e-06, "loss": 0.4353, "step": 1656 }, { "epoch": 0.07499434261145055, "grad_norm": 0.7673786090994253, "learning_rate": 9.94712554239217e-06, "loss": 0.5044, "step": 1657 }, { "epoch": 0.07503960171984612, "grad_norm": 0.704259071940599, "learning_rate": 9.947019182960023e-06, "loss": 0.449, "step": 1658 }, { "epoch": 0.07508486082824169, "grad_norm": 0.7164377948782436, "learning_rate": 9.946912717231625e-06, "loss": 0.4738, "step": 1659 }, { "epoch": 0.07513011993663725, "grad_norm": 0.6995814898789535, "learning_rate": 9.94680614520927e-06, "loss": 0.4134, "step": 1660 }, { "epoch": 0.07517537904503281, "grad_norm": 0.857002781818847, "learning_rate": 9.94669946689524e-06, "loss": 0.4305, "step": 1661 }, { "epoch": 0.07522063815342837, "grad_norm": 0.7424988428382787, "learning_rate": 9.946592682291834e-06, "loss": 0.4683, "step": 1662 }, { "epoch": 0.07526589726182394, "grad_norm": 0.7630417153053298, "learning_rate": 9.94648579140134e-06, "loss": 0.4157, "step": 1663 }, { "epoch": 0.07531115637021951, "grad_norm": 0.7893442002173218, "learning_rate": 9.946378794226062e-06, "loss": 0.4425, "step": 1664 }, { "epoch": 0.07535641547861507, "grad_norm": 0.6564682733195059, "learning_rate": 9.946271690768295e-06, "loss": 0.4324, "step": 1665 }, { "epoch": 0.07540167458701064, "grad_norm": 0.8107525179394984, "learning_rate": 9.946164481030339e-06, "loss": 0.4887, "step": 1666 }, { "epoch": 0.0754469336954062, "grad_norm": 0.7832898959544311, "learning_rate": 9.9460571650145e-06, "loss": 0.4952, "step": 1667 }, { "epoch": 0.07549219280380176, "grad_norm": 0.7275954330682075, "learning_rate": 9.945949742723083e-06, "loss": 0.4314, "step": 1668 }, { "epoch": 0.07553745191219732, "grad_norm": 0.8272072509791674, "learning_rate": 9.945842214158397e-06, "loss": 0.4737, "step": 1669 }, { "epoch": 0.0755827110205929, "grad_norm": 0.7302759518481456, "learning_rate": 9.94573457932275e-06, "loss": 0.446, "step": 1670 }, { "epoch": 0.07562797012898846, "grad_norm": 0.7285007459322589, "learning_rate": 9.945626838218458e-06, "loss": 0.439, "step": 1671 }, { "epoch": 0.07567322923738402, "grad_norm": 0.600806103249455, "learning_rate": 9.945518990847835e-06, "loss": 0.5277, "step": 1672 }, { "epoch": 0.07571848834577959, "grad_norm": 0.7644840288674761, "learning_rate": 9.945411037213198e-06, "loss": 0.4852, "step": 1673 }, { "epoch": 0.07576374745417515, "grad_norm": 0.7606077392588975, "learning_rate": 9.945302977316864e-06, "loss": 0.4512, "step": 1674 }, { "epoch": 0.07580900656257071, "grad_norm": 0.717075980182358, "learning_rate": 9.94519481116116e-06, "loss": 0.4448, "step": 1675 }, { "epoch": 0.07585426567096629, "grad_norm": 0.8581548432084788, "learning_rate": 9.945086538748407e-06, "loss": 0.4802, "step": 1676 }, { "epoch": 0.07589952477936185, "grad_norm": 0.7421139594180161, "learning_rate": 9.944978160080932e-06, "loss": 0.4677, "step": 1677 }, { "epoch": 0.07594478388775741, "grad_norm": 0.7329069408677304, "learning_rate": 9.944869675161062e-06, "loss": 0.4558, "step": 1678 }, { "epoch": 0.07599004299615297, "grad_norm": 0.6783854819129181, "learning_rate": 9.944761083991131e-06, "loss": 0.4025, "step": 1679 }, { "epoch": 0.07603530210454854, "grad_norm": 0.7926734375394854, "learning_rate": 9.944652386573472e-06, "loss": 0.4869, "step": 1680 }, { "epoch": 0.07608056121294411, "grad_norm": 0.7747418613948901, "learning_rate": 9.944543582910417e-06, "loss": 0.4493, "step": 1681 }, { "epoch": 0.07612582032133967, "grad_norm": 0.6748968711002219, "learning_rate": 9.944434673004308e-06, "loss": 0.5339, "step": 1682 }, { "epoch": 0.07617107942973524, "grad_norm": 0.8246562125565327, "learning_rate": 9.944325656857485e-06, "loss": 0.4748, "step": 1683 }, { "epoch": 0.0762163385381308, "grad_norm": 0.7619113155947301, "learning_rate": 9.944216534472287e-06, "loss": 0.4123, "step": 1684 }, { "epoch": 0.07626159764652636, "grad_norm": 0.8584651446158137, "learning_rate": 9.94410730585106e-06, "loss": 0.4604, "step": 1685 }, { "epoch": 0.07630685675492192, "grad_norm": 0.8277957948886835, "learning_rate": 9.943997970996153e-06, "loss": 0.4572, "step": 1686 }, { "epoch": 0.0763521158633175, "grad_norm": 0.7801979933818924, "learning_rate": 9.943888529909916e-06, "loss": 0.492, "step": 1687 }, { "epoch": 0.07639737497171306, "grad_norm": 0.8391519761417111, "learning_rate": 9.943778982594695e-06, "loss": 0.449, "step": 1688 }, { "epoch": 0.07644263408010862, "grad_norm": 0.5006693997641047, "learning_rate": 9.943669329052848e-06, "loss": 0.5261, "step": 1689 }, { "epoch": 0.07648789318850419, "grad_norm": 0.8170731618555016, "learning_rate": 9.943559569286731e-06, "loss": 0.4434, "step": 1690 }, { "epoch": 0.07653315229689975, "grad_norm": 0.3748653606553218, "learning_rate": 9.943449703298703e-06, "loss": 0.5147, "step": 1691 }, { "epoch": 0.07657841140529531, "grad_norm": 0.7888227413039046, "learning_rate": 9.943339731091122e-06, "loss": 0.437, "step": 1692 }, { "epoch": 0.07662367051369089, "grad_norm": 0.38129147182439493, "learning_rate": 9.943229652666353e-06, "loss": 0.5146, "step": 1693 }, { "epoch": 0.07666892962208645, "grad_norm": 0.7875704407185139, "learning_rate": 9.94311946802676e-06, "loss": 0.4428, "step": 1694 }, { "epoch": 0.07671418873048201, "grad_norm": 1.7171840921170323, "learning_rate": 9.943009177174712e-06, "loss": 0.4613, "step": 1695 }, { "epoch": 0.07675944783887757, "grad_norm": 0.5894610471131189, "learning_rate": 9.942898780112578e-06, "loss": 0.54, "step": 1696 }, { "epoch": 0.07680470694727314, "grad_norm": 0.9081380234402502, "learning_rate": 9.94278827684273e-06, "loss": 0.4184, "step": 1697 }, { "epoch": 0.0768499660556687, "grad_norm": 0.7823344173089999, "learning_rate": 9.942677667367541e-06, "loss": 0.4672, "step": 1698 }, { "epoch": 0.07689522516406427, "grad_norm": 0.704900119854084, "learning_rate": 9.942566951689391e-06, "loss": 0.4232, "step": 1699 }, { "epoch": 0.07694048427245984, "grad_norm": 0.9097669547846146, "learning_rate": 9.942456129810658e-06, "loss": 0.4317, "step": 1700 }, { "epoch": 0.0769857433808554, "grad_norm": 0.7346865602242609, "learning_rate": 9.942345201733722e-06, "loss": 0.4731, "step": 1701 }, { "epoch": 0.07703100248925096, "grad_norm": 0.7587183898655953, "learning_rate": 9.942234167460966e-06, "loss": 0.4739, "step": 1702 }, { "epoch": 0.07707626159764652, "grad_norm": 0.8201545355377298, "learning_rate": 9.942123026994776e-06, "loss": 0.4157, "step": 1703 }, { "epoch": 0.07712152070604208, "grad_norm": 0.7966173846718374, "learning_rate": 9.942011780337542e-06, "loss": 0.4148, "step": 1704 }, { "epoch": 0.07716677981443766, "grad_norm": 0.7314677884036063, "learning_rate": 9.941900427491652e-06, "loss": 0.4087, "step": 1705 }, { "epoch": 0.07721203892283322, "grad_norm": 0.5109860171092886, "learning_rate": 9.941788968459502e-06, "loss": 0.5203, "step": 1706 }, { "epoch": 0.07725729803122879, "grad_norm": 0.8133081034298886, "learning_rate": 9.941677403243482e-06, "loss": 0.4036, "step": 1707 }, { "epoch": 0.07730255713962435, "grad_norm": 0.7755811147059083, "learning_rate": 9.941565731845993e-06, "loss": 0.4689, "step": 1708 }, { "epoch": 0.07734781624801991, "grad_norm": 0.7232739678213123, "learning_rate": 9.941453954269434e-06, "loss": 0.4253, "step": 1709 }, { "epoch": 0.07739307535641547, "grad_norm": 0.8649248462101308, "learning_rate": 9.941342070516205e-06, "loss": 0.4546, "step": 1710 }, { "epoch": 0.07743833446481105, "grad_norm": 0.8732318494015278, "learning_rate": 9.941230080588711e-06, "loss": 0.4374, "step": 1711 }, { "epoch": 0.07748359357320661, "grad_norm": 0.7775579716115757, "learning_rate": 9.941117984489358e-06, "loss": 0.44, "step": 1712 }, { "epoch": 0.07752885268160217, "grad_norm": 0.8352613535775968, "learning_rate": 9.941005782220557e-06, "loss": 0.4301, "step": 1713 }, { "epoch": 0.07757411178999774, "grad_norm": 1.600225175835997, "learning_rate": 9.940893473784714e-06, "loss": 0.4748, "step": 1714 }, { "epoch": 0.0776193708983933, "grad_norm": 0.7210361373387383, "learning_rate": 9.940781059184246e-06, "loss": 0.468, "step": 1715 }, { "epoch": 0.07766463000678886, "grad_norm": 0.7215114442344711, "learning_rate": 9.940668538421569e-06, "loss": 0.4502, "step": 1716 }, { "epoch": 0.07770988911518444, "grad_norm": 0.7494374543013032, "learning_rate": 9.940555911499098e-06, "loss": 0.4619, "step": 1717 }, { "epoch": 0.07775514822358, "grad_norm": 0.8060329308220311, "learning_rate": 9.940443178419255e-06, "loss": 0.4807, "step": 1718 }, { "epoch": 0.07780040733197556, "grad_norm": 0.7348165098234182, "learning_rate": 9.940330339184461e-06, "loss": 0.4063, "step": 1719 }, { "epoch": 0.07784566644037112, "grad_norm": 0.7759411143569876, "learning_rate": 9.94021739379714e-06, "loss": 0.4637, "step": 1720 }, { "epoch": 0.07789092554876668, "grad_norm": 0.6940545281776684, "learning_rate": 9.940104342259721e-06, "loss": 0.4375, "step": 1721 }, { "epoch": 0.07793618465716226, "grad_norm": 0.6427432439226576, "learning_rate": 9.939991184574632e-06, "loss": 0.5262, "step": 1722 }, { "epoch": 0.07798144376555782, "grad_norm": 0.7415240348599722, "learning_rate": 9.939877920744305e-06, "loss": 0.4516, "step": 1723 }, { "epoch": 0.07802670287395339, "grad_norm": 0.7042025867455868, "learning_rate": 9.939764550771172e-06, "loss": 0.4324, "step": 1724 }, { "epoch": 0.07807196198234895, "grad_norm": 0.7534567023710437, "learning_rate": 9.939651074657672e-06, "loss": 0.4533, "step": 1725 }, { "epoch": 0.07811722109074451, "grad_norm": 0.5824720249805632, "learning_rate": 9.939537492406239e-06, "loss": 0.5252, "step": 1726 }, { "epoch": 0.07816248019914007, "grad_norm": 0.7635615190018861, "learning_rate": 9.939423804019316e-06, "loss": 0.4304, "step": 1727 }, { "epoch": 0.07820773930753565, "grad_norm": 0.806469733151042, "learning_rate": 9.939310009499348e-06, "loss": 0.4557, "step": 1728 }, { "epoch": 0.07825299841593121, "grad_norm": 0.46661235464145895, "learning_rate": 9.939196108848777e-06, "loss": 0.5468, "step": 1729 }, { "epoch": 0.07829825752432677, "grad_norm": 0.7065872520486806, "learning_rate": 9.93908210207005e-06, "loss": 0.3917, "step": 1730 }, { "epoch": 0.07834351663272233, "grad_norm": 0.7716556106668118, "learning_rate": 9.93896798916562e-06, "loss": 0.4622, "step": 1731 }, { "epoch": 0.0783887757411179, "grad_norm": 0.7320673064635882, "learning_rate": 9.938853770137935e-06, "loss": 0.4564, "step": 1732 }, { "epoch": 0.07843403484951346, "grad_norm": 0.7696490103344769, "learning_rate": 9.938739444989452e-06, "loss": 0.453, "step": 1733 }, { "epoch": 0.07847929395790904, "grad_norm": 0.7618050527451413, "learning_rate": 9.938625013722625e-06, "loss": 0.4298, "step": 1734 }, { "epoch": 0.0785245530663046, "grad_norm": 0.7287327328684455, "learning_rate": 9.938510476339915e-06, "loss": 0.4492, "step": 1735 }, { "epoch": 0.07856981217470016, "grad_norm": 0.7355669694141859, "learning_rate": 9.938395832843784e-06, "loss": 0.4397, "step": 1736 }, { "epoch": 0.07861507128309572, "grad_norm": 0.5986083608600049, "learning_rate": 9.938281083236692e-06, "loss": 0.5109, "step": 1737 }, { "epoch": 0.07866033039149128, "grad_norm": 0.8267103372429406, "learning_rate": 9.938166227521106e-06, "loss": 0.464, "step": 1738 }, { "epoch": 0.07870558949988685, "grad_norm": 0.8498829525864762, "learning_rate": 9.938051265699495e-06, "loss": 0.4495, "step": 1739 }, { "epoch": 0.07875084860828242, "grad_norm": 0.7371846718545788, "learning_rate": 9.937936197774328e-06, "loss": 0.493, "step": 1740 }, { "epoch": 0.07879610771667798, "grad_norm": 0.7612591001481491, "learning_rate": 9.937821023748077e-06, "loss": 0.506, "step": 1741 }, { "epoch": 0.07884136682507355, "grad_norm": 0.7921542623351243, "learning_rate": 9.93770574362322e-06, "loss": 0.4469, "step": 1742 }, { "epoch": 0.07888662593346911, "grad_norm": 0.7471390352933903, "learning_rate": 9.937590357402229e-06, "loss": 0.4762, "step": 1743 }, { "epoch": 0.07893188504186467, "grad_norm": 0.6834612586390713, "learning_rate": 9.937474865087588e-06, "loss": 0.5168, "step": 1744 }, { "epoch": 0.07897714415026023, "grad_norm": 0.4588021154978975, "learning_rate": 9.937359266681774e-06, "loss": 0.5015, "step": 1745 }, { "epoch": 0.07902240325865581, "grad_norm": 0.8696667108506297, "learning_rate": 9.937243562187276e-06, "loss": 0.3916, "step": 1746 }, { "epoch": 0.07906766236705137, "grad_norm": 0.9082653531391771, "learning_rate": 9.937127751606577e-06, "loss": 0.4699, "step": 1747 }, { "epoch": 0.07911292147544693, "grad_norm": 0.5583039423714269, "learning_rate": 9.937011834942165e-06, "loss": 0.5257, "step": 1748 }, { "epoch": 0.0791581805838425, "grad_norm": 0.7838632052997255, "learning_rate": 9.936895812196531e-06, "loss": 0.4654, "step": 1749 }, { "epoch": 0.07920343969223806, "grad_norm": 0.8033674155474345, "learning_rate": 9.936779683372169e-06, "loss": 0.4551, "step": 1750 }, { "epoch": 0.07924869880063362, "grad_norm": 0.7960851566293581, "learning_rate": 9.936663448471573e-06, "loss": 0.4933, "step": 1751 }, { "epoch": 0.0792939579090292, "grad_norm": 0.75375389407784, "learning_rate": 9.936547107497243e-06, "loss": 0.4504, "step": 1752 }, { "epoch": 0.07933921701742476, "grad_norm": 0.8160771745066261, "learning_rate": 9.936430660451676e-06, "loss": 0.4346, "step": 1753 }, { "epoch": 0.07938447612582032, "grad_norm": 0.7558067831789662, "learning_rate": 9.936314107337375e-06, "loss": 0.4562, "step": 1754 }, { "epoch": 0.07942973523421588, "grad_norm": 0.7699610133117941, "learning_rate": 9.936197448156845e-06, "loss": 0.4346, "step": 1755 }, { "epoch": 0.07947499434261145, "grad_norm": 0.4979692975191816, "learning_rate": 9.936080682912594e-06, "loss": 0.54, "step": 1756 }, { "epoch": 0.07952025345100701, "grad_norm": 0.8028363930031575, "learning_rate": 9.935963811607127e-06, "loss": 0.4594, "step": 1757 }, { "epoch": 0.07956551255940258, "grad_norm": 0.7932475224690867, "learning_rate": 9.935846834242956e-06, "loss": 0.4588, "step": 1758 }, { "epoch": 0.07961077166779815, "grad_norm": 0.7325005955848916, "learning_rate": 9.935729750822598e-06, "loss": 0.4699, "step": 1759 }, { "epoch": 0.07965603077619371, "grad_norm": 0.3710574617410941, "learning_rate": 9.935612561348566e-06, "loss": 0.5169, "step": 1760 }, { "epoch": 0.07970128988458927, "grad_norm": 0.7945140363859222, "learning_rate": 9.935495265823379e-06, "loss": 0.4356, "step": 1761 }, { "epoch": 0.07974654899298483, "grad_norm": 1.1506262279272965, "learning_rate": 9.935377864249558e-06, "loss": 0.4829, "step": 1762 }, { "epoch": 0.0797918081013804, "grad_norm": 0.7278323845638668, "learning_rate": 9.935260356629623e-06, "loss": 0.4144, "step": 1763 }, { "epoch": 0.07983706720977597, "grad_norm": 0.7497010631773515, "learning_rate": 9.935142742966099e-06, "loss": 0.3977, "step": 1764 }, { "epoch": 0.07988232631817153, "grad_norm": 0.7404458281691546, "learning_rate": 9.935025023261518e-06, "loss": 0.4732, "step": 1765 }, { "epoch": 0.0799275854265671, "grad_norm": 0.46932200375510835, "learning_rate": 9.934907197518405e-06, "loss": 0.5196, "step": 1766 }, { "epoch": 0.07997284453496266, "grad_norm": 0.7566804198935556, "learning_rate": 9.934789265739291e-06, "loss": 0.4248, "step": 1767 }, { "epoch": 0.08001810364335822, "grad_norm": 0.36487050083608025, "learning_rate": 9.934671227926714e-06, "loss": 0.5084, "step": 1768 }, { "epoch": 0.0800633627517538, "grad_norm": 0.7996572718306318, "learning_rate": 9.934553084083205e-06, "loss": 0.4203, "step": 1769 }, { "epoch": 0.08010862186014936, "grad_norm": 0.7649702206192669, "learning_rate": 9.934434834211309e-06, "loss": 0.4812, "step": 1770 }, { "epoch": 0.08015388096854492, "grad_norm": 0.7872672685702111, "learning_rate": 9.93431647831356e-06, "loss": 0.4276, "step": 1771 }, { "epoch": 0.08019914007694048, "grad_norm": 0.7374095690002961, "learning_rate": 9.934198016392507e-06, "loss": 0.4045, "step": 1772 }, { "epoch": 0.08024439918533605, "grad_norm": 0.46296745243555587, "learning_rate": 9.934079448450692e-06, "loss": 0.5258, "step": 1773 }, { "epoch": 0.08028965829373161, "grad_norm": 0.9402761167337232, "learning_rate": 9.933960774490663e-06, "loss": 0.4316, "step": 1774 }, { "epoch": 0.08033491740212718, "grad_norm": 0.770679153259081, "learning_rate": 9.933841994514972e-06, "loss": 0.4937, "step": 1775 }, { "epoch": 0.08038017651052275, "grad_norm": 0.7540461540716024, "learning_rate": 9.933723108526168e-06, "loss": 0.456, "step": 1776 }, { "epoch": 0.08042543561891831, "grad_norm": 0.4096177953587281, "learning_rate": 9.933604116526807e-06, "loss": 0.5169, "step": 1777 }, { "epoch": 0.08047069472731387, "grad_norm": 0.846509551981744, "learning_rate": 9.933485018519448e-06, "loss": 0.4286, "step": 1778 }, { "epoch": 0.08051595383570943, "grad_norm": 0.7339440286381098, "learning_rate": 9.933365814506646e-06, "loss": 0.4304, "step": 1779 }, { "epoch": 0.080561212944105, "grad_norm": 0.7571291353344685, "learning_rate": 9.933246504490966e-06, "loss": 0.4615, "step": 1780 }, { "epoch": 0.08060647205250057, "grad_norm": 0.785352191096861, "learning_rate": 9.933127088474968e-06, "loss": 0.4042, "step": 1781 }, { "epoch": 0.08065173116089613, "grad_norm": 0.7196603766142141, "learning_rate": 9.93300756646122e-06, "loss": 0.4385, "step": 1782 }, { "epoch": 0.0806969902692917, "grad_norm": 0.790373245354847, "learning_rate": 9.932887938452292e-06, "loss": 0.4111, "step": 1783 }, { "epoch": 0.08074224937768726, "grad_norm": 0.7157238169714207, "learning_rate": 9.932768204450751e-06, "loss": 0.4257, "step": 1784 }, { "epoch": 0.08078750848608282, "grad_norm": 0.8043970204059999, "learning_rate": 9.932648364459172e-06, "loss": 0.4602, "step": 1785 }, { "epoch": 0.08083276759447838, "grad_norm": 0.8122044645865316, "learning_rate": 9.93252841848013e-06, "loss": 0.4547, "step": 1786 }, { "epoch": 0.08087802670287396, "grad_norm": 0.7857914577282751, "learning_rate": 9.932408366516198e-06, "loss": 0.4828, "step": 1787 }, { "epoch": 0.08092328581126952, "grad_norm": 0.7751491400233553, "learning_rate": 9.932288208569961e-06, "loss": 0.3924, "step": 1788 }, { "epoch": 0.08096854491966508, "grad_norm": 0.7582334131958042, "learning_rate": 9.932167944643998e-06, "loss": 0.4319, "step": 1789 }, { "epoch": 0.08101380402806065, "grad_norm": 0.7161260925348351, "learning_rate": 9.932047574740895e-06, "loss": 0.4755, "step": 1790 }, { "epoch": 0.08105906313645621, "grad_norm": 0.8180310284049336, "learning_rate": 9.931927098863237e-06, "loss": 0.4175, "step": 1791 }, { "epoch": 0.08110432224485177, "grad_norm": 0.6835845823674249, "learning_rate": 9.931806517013612e-06, "loss": 0.4034, "step": 1792 }, { "epoch": 0.08114958135324735, "grad_norm": 0.577581697777701, "learning_rate": 9.931685829194612e-06, "loss": 0.4926, "step": 1793 }, { "epoch": 0.08119484046164291, "grad_norm": 0.788125945271417, "learning_rate": 9.931565035408833e-06, "loss": 0.4507, "step": 1794 }, { "epoch": 0.08124009957003847, "grad_norm": 0.7967217507636467, "learning_rate": 9.931444135658864e-06, "loss": 0.4145, "step": 1795 }, { "epoch": 0.08128535867843403, "grad_norm": 0.7126756325676225, "learning_rate": 9.931323129947306e-06, "loss": 0.4061, "step": 1796 }, { "epoch": 0.0813306177868296, "grad_norm": 0.7425565928836916, "learning_rate": 9.931202018276761e-06, "loss": 0.4138, "step": 1797 }, { "epoch": 0.08137587689522516, "grad_norm": 0.7435477086974337, "learning_rate": 9.93108080064983e-06, "loss": 0.4607, "step": 1798 }, { "epoch": 0.08142113600362073, "grad_norm": 0.772240860997725, "learning_rate": 9.930959477069117e-06, "loss": 0.4587, "step": 1799 }, { "epoch": 0.0814663951120163, "grad_norm": 0.7435660703263862, "learning_rate": 9.930838047537228e-06, "loss": 0.4311, "step": 1800 }, { "epoch": 0.08151165422041186, "grad_norm": 0.6831940907334916, "learning_rate": 9.930716512056775e-06, "loss": 0.4285, "step": 1801 }, { "epoch": 0.08155691332880742, "grad_norm": 0.8106025492808707, "learning_rate": 9.930594870630365e-06, "loss": 0.4521, "step": 1802 }, { "epoch": 0.08160217243720298, "grad_norm": 0.752269782845294, "learning_rate": 9.930473123260618e-06, "loss": 0.5216, "step": 1803 }, { "epoch": 0.08164743154559854, "grad_norm": 0.7751702345109988, "learning_rate": 9.930351269950144e-06, "loss": 0.4725, "step": 1804 }, { "epoch": 0.08169269065399412, "grad_norm": 0.7742768817473613, "learning_rate": 9.930229310701563e-06, "loss": 0.424, "step": 1805 }, { "epoch": 0.08173794976238968, "grad_norm": 0.46471716913368016, "learning_rate": 9.930107245517498e-06, "loss": 0.55, "step": 1806 }, { "epoch": 0.08178320887078525, "grad_norm": 0.3865056561970093, "learning_rate": 9.929985074400569e-06, "loss": 0.5319, "step": 1807 }, { "epoch": 0.08182846797918081, "grad_norm": 0.9958802419213363, "learning_rate": 9.929862797353402e-06, "loss": 0.4585, "step": 1808 }, { "epoch": 0.08187372708757637, "grad_norm": 0.8995192761447293, "learning_rate": 9.929740414378625e-06, "loss": 0.4438, "step": 1809 }, { "epoch": 0.08191898619597195, "grad_norm": 0.5321022825681664, "learning_rate": 9.929617925478868e-06, "loss": 0.5199, "step": 1810 }, { "epoch": 0.08196424530436751, "grad_norm": 0.9978659299427117, "learning_rate": 9.92949533065676e-06, "loss": 0.4453, "step": 1811 }, { "epoch": 0.08200950441276307, "grad_norm": 0.9841211011383583, "learning_rate": 9.929372629914937e-06, "loss": 0.4665, "step": 1812 }, { "epoch": 0.08205476352115863, "grad_norm": 0.47226880302660784, "learning_rate": 9.929249823256037e-06, "loss": 0.4928, "step": 1813 }, { "epoch": 0.0821000226295542, "grad_norm": 0.44030382896499054, "learning_rate": 9.929126910682697e-06, "loss": 0.5312, "step": 1814 }, { "epoch": 0.08214528173794976, "grad_norm": 1.0380986223119235, "learning_rate": 9.929003892197558e-06, "loss": 0.437, "step": 1815 }, { "epoch": 0.08219054084634533, "grad_norm": 0.8010625789004381, "learning_rate": 9.928880767803264e-06, "loss": 0.4426, "step": 1816 }, { "epoch": 0.0822357999547409, "grad_norm": 0.8061411711841202, "learning_rate": 9.928757537502458e-06, "loss": 0.444, "step": 1817 }, { "epoch": 0.08228105906313646, "grad_norm": 0.9451107724245381, "learning_rate": 9.928634201297793e-06, "loss": 0.4575, "step": 1818 }, { "epoch": 0.08232631817153202, "grad_norm": 0.7274563128661418, "learning_rate": 9.928510759191914e-06, "loss": 0.4367, "step": 1819 }, { "epoch": 0.08237157727992758, "grad_norm": 0.8482490358684864, "learning_rate": 9.928387211187478e-06, "loss": 0.43, "step": 1820 }, { "epoch": 0.08241683638832314, "grad_norm": 0.9110378559090746, "learning_rate": 9.928263557287135e-06, "loss": 0.4179, "step": 1821 }, { "epoch": 0.08246209549671872, "grad_norm": 0.9439672447412728, "learning_rate": 9.928139797493545e-06, "loss": 0.4434, "step": 1822 }, { "epoch": 0.08250735460511428, "grad_norm": 0.6352867452932015, "learning_rate": 9.928015931809368e-06, "loss": 0.5682, "step": 1823 }, { "epoch": 0.08255261371350985, "grad_norm": 0.8239888115018645, "learning_rate": 9.927891960237261e-06, "loss": 0.4506, "step": 1824 }, { "epoch": 0.08259787282190541, "grad_norm": 0.42021768765074907, "learning_rate": 9.927767882779892e-06, "loss": 0.498, "step": 1825 }, { "epoch": 0.08264313193030097, "grad_norm": 1.2031228767214417, "learning_rate": 9.927643699439925e-06, "loss": 0.4705, "step": 1826 }, { "epoch": 0.08268839103869653, "grad_norm": 0.7864408720289386, "learning_rate": 9.92751941022003e-06, "loss": 0.4587, "step": 1827 }, { "epoch": 0.08273365014709211, "grad_norm": 0.8146428310929719, "learning_rate": 9.927395015122876e-06, "loss": 0.4978, "step": 1828 }, { "epoch": 0.08277890925548767, "grad_norm": 0.9403173929391236, "learning_rate": 9.927270514151137e-06, "loss": 0.4838, "step": 1829 }, { "epoch": 0.08282416836388323, "grad_norm": 0.7782652098882176, "learning_rate": 9.927145907307486e-06, "loss": 0.4569, "step": 1830 }, { "epoch": 0.0828694274722788, "grad_norm": 1.0164338183003059, "learning_rate": 9.927021194594604e-06, "loss": 0.5406, "step": 1831 }, { "epoch": 0.08291468658067436, "grad_norm": 0.7846927910640304, "learning_rate": 9.926896376015168e-06, "loss": 0.4387, "step": 1832 }, { "epoch": 0.08295994568906992, "grad_norm": 0.8219142743159832, "learning_rate": 9.926771451571862e-06, "loss": 0.5215, "step": 1833 }, { "epoch": 0.0830052047974655, "grad_norm": 0.6988546359400207, "learning_rate": 9.926646421267366e-06, "loss": 0.4201, "step": 1834 }, { "epoch": 0.08305046390586106, "grad_norm": 0.7129991767098324, "learning_rate": 9.926521285104371e-06, "loss": 0.4536, "step": 1835 }, { "epoch": 0.08309572301425662, "grad_norm": 0.7806065574483061, "learning_rate": 9.926396043085564e-06, "loss": 0.4813, "step": 1836 }, { "epoch": 0.08314098212265218, "grad_norm": 0.7805909948187303, "learning_rate": 9.926270695213638e-06, "loss": 0.4476, "step": 1837 }, { "epoch": 0.08318624123104774, "grad_norm": 0.5948129410350549, "learning_rate": 9.926145241491283e-06, "loss": 0.4907, "step": 1838 }, { "epoch": 0.0832315003394433, "grad_norm": 0.8754548731717574, "learning_rate": 9.926019681921196e-06, "loss": 0.4772, "step": 1839 }, { "epoch": 0.08327675944783888, "grad_norm": 0.9208126996590229, "learning_rate": 9.925894016506076e-06, "loss": 0.4527, "step": 1840 }, { "epoch": 0.08332201855623445, "grad_norm": 1.202719505119469, "learning_rate": 9.925768245248622e-06, "loss": 0.4071, "step": 1841 }, { "epoch": 0.08336727766463001, "grad_norm": 0.8513587887535525, "learning_rate": 9.925642368151536e-06, "loss": 0.4702, "step": 1842 }, { "epoch": 0.08341253677302557, "grad_norm": 0.8056976169739187, "learning_rate": 9.925516385217524e-06, "loss": 0.438, "step": 1843 }, { "epoch": 0.08345779588142113, "grad_norm": 0.9725598362627249, "learning_rate": 9.925390296449293e-06, "loss": 0.4457, "step": 1844 }, { "epoch": 0.0835030549898167, "grad_norm": 0.8121083302424683, "learning_rate": 9.925264101849552e-06, "loss": 0.429, "step": 1845 }, { "epoch": 0.08354831409821227, "grad_norm": 0.7744796561216578, "learning_rate": 9.925137801421011e-06, "loss": 0.5364, "step": 1846 }, { "epoch": 0.08359357320660783, "grad_norm": 0.800946010472054, "learning_rate": 9.925011395166387e-06, "loss": 0.3983, "step": 1847 }, { "epoch": 0.0836388323150034, "grad_norm": 0.7276221221900879, "learning_rate": 9.924884883088392e-06, "loss": 0.51, "step": 1848 }, { "epoch": 0.08368409142339896, "grad_norm": 0.4285744008499849, "learning_rate": 9.924758265189746e-06, "loss": 0.5154, "step": 1849 }, { "epoch": 0.08372935053179452, "grad_norm": 0.4265868429049617, "learning_rate": 9.924631541473174e-06, "loss": 0.5345, "step": 1850 }, { "epoch": 0.08377460964019008, "grad_norm": 0.7860118715571093, "learning_rate": 9.924504711941391e-06, "loss": 0.4562, "step": 1851 }, { "epoch": 0.08381986874858566, "grad_norm": 0.41053072199017704, "learning_rate": 9.924377776597128e-06, "loss": 0.5181, "step": 1852 }, { "epoch": 0.08386512785698122, "grad_norm": 0.41695682875868034, "learning_rate": 9.92425073544311e-06, "loss": 0.5228, "step": 1853 }, { "epoch": 0.08391038696537678, "grad_norm": 0.8054863739728471, "learning_rate": 9.924123588482068e-06, "loss": 0.4461, "step": 1854 }, { "epoch": 0.08395564607377234, "grad_norm": 0.7739457395814601, "learning_rate": 9.923996335716732e-06, "loss": 0.4478, "step": 1855 }, { "epoch": 0.0840009051821679, "grad_norm": 0.7623799852036963, "learning_rate": 9.92386897714984e-06, "loss": 0.4644, "step": 1856 }, { "epoch": 0.08404616429056348, "grad_norm": 0.7923643933942867, "learning_rate": 9.923741512784124e-06, "loss": 0.4439, "step": 1857 }, { "epoch": 0.08409142339895904, "grad_norm": 0.7400779069636888, "learning_rate": 9.923613942622326e-06, "loss": 0.4717, "step": 1858 }, { "epoch": 0.08413668250735461, "grad_norm": 0.7773476459849654, "learning_rate": 9.923486266667186e-06, "loss": 0.4649, "step": 1859 }, { "epoch": 0.08418194161575017, "grad_norm": 0.7908800432339458, "learning_rate": 9.923358484921447e-06, "loss": 0.4666, "step": 1860 }, { "epoch": 0.08422720072414573, "grad_norm": 0.7190776961384768, "learning_rate": 9.923230597387856e-06, "loss": 0.4084, "step": 1861 }, { "epoch": 0.0842724598325413, "grad_norm": 0.7009628564366115, "learning_rate": 9.92310260406916e-06, "loss": 0.4366, "step": 1862 }, { "epoch": 0.08431771894093687, "grad_norm": 0.7328454796007702, "learning_rate": 9.922974504968107e-06, "loss": 0.4222, "step": 1863 }, { "epoch": 0.08436297804933243, "grad_norm": 0.8678391023098952, "learning_rate": 9.922846300087454e-06, "loss": 0.4821, "step": 1864 }, { "epoch": 0.084408237157728, "grad_norm": 0.8433282876733529, "learning_rate": 9.922717989429954e-06, "loss": 0.4217, "step": 1865 }, { "epoch": 0.08445349626612356, "grad_norm": 0.7218042482897301, "learning_rate": 9.922589572998362e-06, "loss": 0.5084, "step": 1866 }, { "epoch": 0.08449875537451912, "grad_norm": 0.8602340789259252, "learning_rate": 9.922461050795438e-06, "loss": 0.4069, "step": 1867 }, { "epoch": 0.08454401448291468, "grad_norm": 0.8309640447672152, "learning_rate": 9.922332422823945e-06, "loss": 0.5391, "step": 1868 }, { "epoch": 0.08458927359131026, "grad_norm": 0.8035478084960879, "learning_rate": 9.922203689086647e-06, "loss": 0.4768, "step": 1869 }, { "epoch": 0.08463453269970582, "grad_norm": 0.4148660540710434, "learning_rate": 9.922074849586308e-06, "loss": 0.5419, "step": 1870 }, { "epoch": 0.08467979180810138, "grad_norm": 0.8219306255439327, "learning_rate": 9.921945904325697e-06, "loss": 0.43, "step": 1871 }, { "epoch": 0.08472505091649694, "grad_norm": 0.5312965844343916, "learning_rate": 9.921816853307587e-06, "loss": 0.5292, "step": 1872 }, { "epoch": 0.0847703100248925, "grad_norm": 0.7367383375649544, "learning_rate": 9.921687696534747e-06, "loss": 0.4661, "step": 1873 }, { "epoch": 0.08481556913328807, "grad_norm": 0.7950765321616564, "learning_rate": 9.921558434009955e-06, "loss": 0.4288, "step": 1874 }, { "epoch": 0.08486082824168364, "grad_norm": 0.7644887707411738, "learning_rate": 9.921429065735988e-06, "loss": 0.4307, "step": 1875 }, { "epoch": 0.0849060873500792, "grad_norm": 0.7419573945037351, "learning_rate": 9.921299591715624e-06, "loss": 0.4117, "step": 1876 }, { "epoch": 0.08495134645847477, "grad_norm": 0.7484051985152155, "learning_rate": 9.921170011951647e-06, "loss": 0.4789, "step": 1877 }, { "epoch": 0.08499660556687033, "grad_norm": 0.7464732006200073, "learning_rate": 9.921040326446843e-06, "loss": 0.4159, "step": 1878 }, { "epoch": 0.0850418646752659, "grad_norm": 0.7313580668980304, "learning_rate": 9.920910535203994e-06, "loss": 0.4521, "step": 1879 }, { "epoch": 0.08508712378366146, "grad_norm": 0.8321482272182403, "learning_rate": 9.92078063822589e-06, "loss": 0.4779, "step": 1880 }, { "epoch": 0.08513238289205703, "grad_norm": 0.6967085482083333, "learning_rate": 9.920650635515325e-06, "loss": 0.3927, "step": 1881 }, { "epoch": 0.0851776420004526, "grad_norm": 0.9794195771812104, "learning_rate": 9.92052052707509e-06, "loss": 0.5055, "step": 1882 }, { "epoch": 0.08522290110884816, "grad_norm": 0.6899236636306616, "learning_rate": 9.92039031290798e-06, "loss": 0.4186, "step": 1883 }, { "epoch": 0.08526816021724372, "grad_norm": 0.725913617196428, "learning_rate": 9.920259993016797e-06, "loss": 0.4871, "step": 1884 }, { "epoch": 0.08531341932563928, "grad_norm": 0.8332609166219846, "learning_rate": 9.920129567404335e-06, "loss": 0.4206, "step": 1885 }, { "epoch": 0.08535867843403484, "grad_norm": 0.7069291434494992, "learning_rate": 9.9199990360734e-06, "loss": 0.4109, "step": 1886 }, { "epoch": 0.08540393754243042, "grad_norm": 0.7991002911044793, "learning_rate": 9.919868399026797e-06, "loss": 0.3979, "step": 1887 }, { "epoch": 0.08544919665082598, "grad_norm": 0.8346063775026414, "learning_rate": 9.919737656267335e-06, "loss": 0.4882, "step": 1888 }, { "epoch": 0.08549445575922154, "grad_norm": 0.6406919207588929, "learning_rate": 9.919606807797817e-06, "loss": 0.522, "step": 1889 }, { "epoch": 0.0855397148676171, "grad_norm": 0.7523586431196658, "learning_rate": 9.919475853621058e-06, "loss": 0.4645, "step": 1890 }, { "epoch": 0.08558497397601267, "grad_norm": 0.6798200716411895, "learning_rate": 9.919344793739874e-06, "loss": 0.4176, "step": 1891 }, { "epoch": 0.08563023308440823, "grad_norm": 0.736139778725739, "learning_rate": 9.919213628157078e-06, "loss": 0.441, "step": 1892 }, { "epoch": 0.0856754921928038, "grad_norm": 0.6998787747239904, "learning_rate": 9.91908235687549e-06, "loss": 0.4291, "step": 1893 }, { "epoch": 0.08572075130119937, "grad_norm": 0.5684390273842443, "learning_rate": 9.918950979897928e-06, "loss": 0.5303, "step": 1894 }, { "epoch": 0.08576601040959493, "grad_norm": 0.7239694026683012, "learning_rate": 9.91881949722722e-06, "loss": 0.4609, "step": 1895 }, { "epoch": 0.0858112695179905, "grad_norm": 0.7182694255332468, "learning_rate": 9.918687908866185e-06, "loss": 0.4247, "step": 1896 }, { "epoch": 0.08585652862638606, "grad_norm": 0.7599687280674057, "learning_rate": 9.918556214817655e-06, "loss": 0.4404, "step": 1897 }, { "epoch": 0.08590178773478163, "grad_norm": 0.7937333688188135, "learning_rate": 9.918424415084458e-06, "loss": 0.4639, "step": 1898 }, { "epoch": 0.0859470468431772, "grad_norm": 0.8085633869903383, "learning_rate": 9.918292509669426e-06, "loss": 0.4284, "step": 1899 }, { "epoch": 0.08599230595157276, "grad_norm": 0.776740327788825, "learning_rate": 9.918160498575394e-06, "loss": 0.4654, "step": 1900 }, { "epoch": 0.08603756505996832, "grad_norm": 0.7632130970803997, "learning_rate": 9.918028381805196e-06, "loss": 0.422, "step": 1901 }, { "epoch": 0.08608282416836388, "grad_norm": 0.7443778623445391, "learning_rate": 9.917896159361674e-06, "loss": 0.4558, "step": 1902 }, { "epoch": 0.08612808327675944, "grad_norm": 0.7202079960261414, "learning_rate": 9.917763831247667e-06, "loss": 0.4543, "step": 1903 }, { "epoch": 0.08617334238515502, "grad_norm": 0.7044512276975228, "learning_rate": 9.91763139746602e-06, "loss": 0.5185, "step": 1904 }, { "epoch": 0.08621860149355058, "grad_norm": 0.5471247904879195, "learning_rate": 9.917498858019577e-06, "loss": 0.5281, "step": 1905 }, { "epoch": 0.08626386060194614, "grad_norm": 0.7310463517891044, "learning_rate": 9.917366212911187e-06, "loss": 0.3936, "step": 1906 }, { "epoch": 0.0863091197103417, "grad_norm": 0.7507045212274692, "learning_rate": 9.917233462143698e-06, "loss": 0.4251, "step": 1907 }, { "epoch": 0.08635437881873727, "grad_norm": 0.7295758228462808, "learning_rate": 9.917100605719968e-06, "loss": 0.4258, "step": 1908 }, { "epoch": 0.08639963792713283, "grad_norm": 0.7133428447258995, "learning_rate": 9.916967643642844e-06, "loss": 0.4553, "step": 1909 }, { "epoch": 0.0864448970355284, "grad_norm": 0.7734628826971062, "learning_rate": 9.916834575915186e-06, "loss": 0.417, "step": 1910 }, { "epoch": 0.08649015614392397, "grad_norm": 0.6985026626342046, "learning_rate": 9.916701402539857e-06, "loss": 0.4119, "step": 1911 }, { "epoch": 0.08653541525231953, "grad_norm": 1.1633875585458455, "learning_rate": 9.916568123519713e-06, "loss": 0.5522, "step": 1912 }, { "epoch": 0.08658067436071509, "grad_norm": 0.8129193812189842, "learning_rate": 9.916434738857621e-06, "loss": 0.4336, "step": 1913 }, { "epoch": 0.08662593346911066, "grad_norm": 0.7459738133804278, "learning_rate": 9.916301248556446e-06, "loss": 0.4838, "step": 1914 }, { "epoch": 0.08667119257750622, "grad_norm": 0.7379164873726534, "learning_rate": 9.916167652619058e-06, "loss": 0.4234, "step": 1915 }, { "epoch": 0.0867164516859018, "grad_norm": 0.8413281330999343, "learning_rate": 9.916033951048322e-06, "loss": 0.4448, "step": 1916 }, { "epoch": 0.08676171079429736, "grad_norm": 0.7615897239798799, "learning_rate": 9.915900143847119e-06, "loss": 0.3915, "step": 1917 }, { "epoch": 0.08680696990269292, "grad_norm": 0.7395346710437531, "learning_rate": 9.915766231018317e-06, "loss": 0.466, "step": 1918 }, { "epoch": 0.08685222901108848, "grad_norm": 0.7461409358070803, "learning_rate": 9.915632212564798e-06, "loss": 0.4975, "step": 1919 }, { "epoch": 0.08689748811948404, "grad_norm": 0.8443578605281495, "learning_rate": 9.91549808848944e-06, "loss": 0.5407, "step": 1920 }, { "epoch": 0.0869427472278796, "grad_norm": 0.8118802915223485, "learning_rate": 9.915363858795125e-06, "loss": 0.4272, "step": 1921 }, { "epoch": 0.08698800633627518, "grad_norm": 0.7345275937375815, "learning_rate": 9.915229523484736e-06, "loss": 0.4269, "step": 1922 }, { "epoch": 0.08703326544467074, "grad_norm": 0.7122964942038054, "learning_rate": 9.915095082561161e-06, "loss": 0.4409, "step": 1923 }, { "epoch": 0.0870785245530663, "grad_norm": 0.7396962298691463, "learning_rate": 9.914960536027289e-06, "loss": 0.4316, "step": 1924 }, { "epoch": 0.08712378366146187, "grad_norm": 0.7307867562497987, "learning_rate": 9.91482588388601e-06, "loss": 0.4782, "step": 1925 }, { "epoch": 0.08716904276985743, "grad_norm": 0.7484534005439776, "learning_rate": 9.914691126140216e-06, "loss": 0.4382, "step": 1926 }, { "epoch": 0.08721430187825299, "grad_norm": 0.825731600659895, "learning_rate": 9.914556262792805e-06, "loss": 0.5151, "step": 1927 }, { "epoch": 0.08725956098664857, "grad_norm": 0.7123185730894329, "learning_rate": 9.914421293846675e-06, "loss": 0.4662, "step": 1928 }, { "epoch": 0.08730482009504413, "grad_norm": 0.7781423627660821, "learning_rate": 9.914286219304724e-06, "loss": 0.4698, "step": 1929 }, { "epoch": 0.08735007920343969, "grad_norm": 0.6957365833355711, "learning_rate": 9.914151039169855e-06, "loss": 0.3905, "step": 1930 }, { "epoch": 0.08739533831183526, "grad_norm": 0.6685636570567208, "learning_rate": 9.914015753444973e-06, "loss": 0.4447, "step": 1931 }, { "epoch": 0.08744059742023082, "grad_norm": 1.0053401661334387, "learning_rate": 9.913880362132984e-06, "loss": 0.5551, "step": 1932 }, { "epoch": 0.08748585652862638, "grad_norm": 0.6118565640438647, "learning_rate": 9.913744865236798e-06, "loss": 0.5343, "step": 1933 }, { "epoch": 0.08753111563702196, "grad_norm": 1.0514009365642782, "learning_rate": 9.913609262759326e-06, "loss": 0.4909, "step": 1934 }, { "epoch": 0.08757637474541752, "grad_norm": 0.855627611367071, "learning_rate": 9.913473554703483e-06, "loss": 0.4809, "step": 1935 }, { "epoch": 0.08762163385381308, "grad_norm": 0.7468569420197058, "learning_rate": 9.913337741072183e-06, "loss": 0.4447, "step": 1936 }, { "epoch": 0.08766689296220864, "grad_norm": 0.7679281923297259, "learning_rate": 9.913201821868345e-06, "loss": 0.4235, "step": 1937 }, { "epoch": 0.0877121520706042, "grad_norm": 0.695156803389326, "learning_rate": 9.913065797094893e-06, "loss": 0.3993, "step": 1938 }, { "epoch": 0.08775741117899977, "grad_norm": 0.7702685215584218, "learning_rate": 9.912929666754741e-06, "loss": 0.422, "step": 1939 }, { "epoch": 0.08780267028739534, "grad_norm": 0.7598651277864084, "learning_rate": 9.912793430850822e-06, "loss": 0.4281, "step": 1940 }, { "epoch": 0.0878479293957909, "grad_norm": 0.6621538356949686, "learning_rate": 9.912657089386062e-06, "loss": 0.4467, "step": 1941 }, { "epoch": 0.08789318850418647, "grad_norm": 0.738213356518308, "learning_rate": 9.912520642363387e-06, "loss": 0.4615, "step": 1942 }, { "epoch": 0.08793844761258203, "grad_norm": 2.1635848608348716, "learning_rate": 9.912384089785731e-06, "loss": 0.6037, "step": 1943 }, { "epoch": 0.08798370672097759, "grad_norm": 1.0352199439365746, "learning_rate": 9.91224743165603e-06, "loss": 0.4801, "step": 1944 }, { "epoch": 0.08802896582937317, "grad_norm": 0.8709803651125759, "learning_rate": 9.912110667977218e-06, "loss": 0.5165, "step": 1945 }, { "epoch": 0.08807422493776873, "grad_norm": 0.8736736235706593, "learning_rate": 9.911973798752232e-06, "loss": 0.4637, "step": 1946 }, { "epoch": 0.08811948404616429, "grad_norm": 0.8532192502651776, "learning_rate": 9.911836823984018e-06, "loss": 0.4777, "step": 1947 }, { "epoch": 0.08816474315455985, "grad_norm": 0.7883996558296882, "learning_rate": 9.911699743675513e-06, "loss": 0.4497, "step": 1948 }, { "epoch": 0.08821000226295542, "grad_norm": 0.768457003127408, "learning_rate": 9.911562557829668e-06, "loss": 0.4458, "step": 1949 }, { "epoch": 0.08825526137135098, "grad_norm": 1.2044000998025475, "learning_rate": 9.911425266449428e-06, "loss": 0.521, "step": 1950 }, { "epoch": 0.08830052047974656, "grad_norm": 0.8025949214413347, "learning_rate": 9.911287869537744e-06, "loss": 0.4367, "step": 1951 }, { "epoch": 0.08834577958814212, "grad_norm": 0.8465178290443464, "learning_rate": 9.911150367097566e-06, "loss": 0.4638, "step": 1952 }, { "epoch": 0.08839103869653768, "grad_norm": 0.8250438554174135, "learning_rate": 9.911012759131852e-06, "loss": 0.438, "step": 1953 }, { "epoch": 0.08843629780493324, "grad_norm": 0.7497252987029995, "learning_rate": 9.910875045643555e-06, "loss": 0.4175, "step": 1954 }, { "epoch": 0.0884815569133288, "grad_norm": 0.7660876420547476, "learning_rate": 9.910737226635636e-06, "loss": 0.4404, "step": 1955 }, { "epoch": 0.08852681602172437, "grad_norm": 0.7384189585829709, "learning_rate": 9.910599302111057e-06, "loss": 0.4182, "step": 1956 }, { "epoch": 0.08857207513011994, "grad_norm": 0.7495072122782613, "learning_rate": 9.91046127207278e-06, "loss": 0.4084, "step": 1957 }, { "epoch": 0.0886173342385155, "grad_norm": 0.7627524376716129, "learning_rate": 9.910323136523773e-06, "loss": 0.4389, "step": 1958 }, { "epoch": 0.08866259334691107, "grad_norm": 0.7313100522135206, "learning_rate": 9.910184895467001e-06, "loss": 0.4454, "step": 1959 }, { "epoch": 0.08870785245530663, "grad_norm": 0.712201543133738, "learning_rate": 9.910046548905437e-06, "loss": 0.4231, "step": 1960 }, { "epoch": 0.08875311156370219, "grad_norm": 0.7388104424423865, "learning_rate": 9.909908096842053e-06, "loss": 0.4943, "step": 1961 }, { "epoch": 0.08879837067209775, "grad_norm": 0.7075371931646569, "learning_rate": 9.909769539279823e-06, "loss": 0.473, "step": 1962 }, { "epoch": 0.08884362978049333, "grad_norm": 0.6600754349658474, "learning_rate": 9.909630876221726e-06, "loss": 0.5189, "step": 1963 }, { "epoch": 0.08888888888888889, "grad_norm": 0.7682343599469311, "learning_rate": 9.909492107670737e-06, "loss": 0.4538, "step": 1964 }, { "epoch": 0.08893414799728445, "grad_norm": 0.7616573920258807, "learning_rate": 9.909353233629844e-06, "loss": 0.4521, "step": 1965 }, { "epoch": 0.08897940710568002, "grad_norm": 0.7655653986533902, "learning_rate": 9.909214254102027e-06, "loss": 0.5001, "step": 1966 }, { "epoch": 0.08902466621407558, "grad_norm": 0.7915882076561631, "learning_rate": 9.909075169090275e-06, "loss": 0.4719, "step": 1967 }, { "epoch": 0.08906992532247114, "grad_norm": 0.8313921499635212, "learning_rate": 9.90893597859757e-06, "loss": 0.4376, "step": 1968 }, { "epoch": 0.08911518443086672, "grad_norm": 0.7865427117270996, "learning_rate": 9.908796682626911e-06, "loss": 0.4728, "step": 1969 }, { "epoch": 0.08916044353926228, "grad_norm": 0.7169640304743307, "learning_rate": 9.908657281181289e-06, "loss": 0.4036, "step": 1970 }, { "epoch": 0.08920570264765784, "grad_norm": 0.7259107241970512, "learning_rate": 9.908517774263694e-06, "loss": 0.4504, "step": 1971 }, { "epoch": 0.0892509617560534, "grad_norm": 0.8039701538287216, "learning_rate": 9.90837816187713e-06, "loss": 0.4208, "step": 1972 }, { "epoch": 0.08929622086444897, "grad_norm": 0.8988560217541354, "learning_rate": 9.908238444024593e-06, "loss": 0.4544, "step": 1973 }, { "epoch": 0.08934147997284453, "grad_norm": 0.7390784236507443, "learning_rate": 9.908098620709088e-06, "loss": 0.459, "step": 1974 }, { "epoch": 0.0893867390812401, "grad_norm": 0.71640823902626, "learning_rate": 9.907958691933616e-06, "loss": 0.4309, "step": 1975 }, { "epoch": 0.08943199818963567, "grad_norm": 0.7413322149073592, "learning_rate": 9.907818657701185e-06, "loss": 0.4342, "step": 1976 }, { "epoch": 0.08947725729803123, "grad_norm": 0.7103835938842725, "learning_rate": 9.907678518014805e-06, "loss": 0.5497, "step": 1977 }, { "epoch": 0.08952251640642679, "grad_norm": 0.8569735720494202, "learning_rate": 9.907538272877487e-06, "loss": 0.4103, "step": 1978 }, { "epoch": 0.08956777551482235, "grad_norm": 0.4764015148465107, "learning_rate": 9.907397922292244e-06, "loss": 0.5308, "step": 1979 }, { "epoch": 0.08961303462321792, "grad_norm": 0.7423437895217264, "learning_rate": 9.90725746626209e-06, "loss": 0.4092, "step": 1980 }, { "epoch": 0.08965829373161349, "grad_norm": 0.7474565450120157, "learning_rate": 9.907116904790046e-06, "loss": 0.4369, "step": 1981 }, { "epoch": 0.08970355284000905, "grad_norm": 0.7462368731786069, "learning_rate": 9.90697623787913e-06, "loss": 0.4647, "step": 1982 }, { "epoch": 0.08974881194840462, "grad_norm": 0.7712449048788309, "learning_rate": 9.906835465532364e-06, "loss": 0.4692, "step": 1983 }, { "epoch": 0.08979407105680018, "grad_norm": 0.711234265516189, "learning_rate": 9.906694587752777e-06, "loss": 0.3805, "step": 1984 }, { "epoch": 0.08983933016519574, "grad_norm": 0.8239657790937962, "learning_rate": 9.906553604543392e-06, "loss": 0.3879, "step": 1985 }, { "epoch": 0.08988458927359132, "grad_norm": 0.6990732298441272, "learning_rate": 9.90641251590724e-06, "loss": 0.4371, "step": 1986 }, { "epoch": 0.08992984838198688, "grad_norm": 0.7636960030695994, "learning_rate": 9.906271321847349e-06, "loss": 0.4598, "step": 1987 }, { "epoch": 0.08997510749038244, "grad_norm": 0.7243944420146078, "learning_rate": 9.906130022366757e-06, "loss": 0.442, "step": 1988 }, { "epoch": 0.090020366598778, "grad_norm": 0.7077679135943856, "learning_rate": 9.905988617468501e-06, "loss": 0.4358, "step": 1989 }, { "epoch": 0.09006562570717357, "grad_norm": 0.69551264922042, "learning_rate": 9.905847107155615e-06, "loss": 0.3987, "step": 1990 }, { "epoch": 0.09011088481556913, "grad_norm": 0.7493605737496202, "learning_rate": 9.905705491431143e-06, "loss": 0.3991, "step": 1991 }, { "epoch": 0.0901561439239647, "grad_norm": 0.7035523261648925, "learning_rate": 9.905563770298126e-06, "loss": 0.4341, "step": 1992 }, { "epoch": 0.09020140303236027, "grad_norm": 0.6974086929552428, "learning_rate": 9.905421943759611e-06, "loss": 0.4504, "step": 1993 }, { "epoch": 0.09024666214075583, "grad_norm": 0.724066794803269, "learning_rate": 9.905280011818642e-06, "loss": 0.4629, "step": 1994 }, { "epoch": 0.09029192124915139, "grad_norm": 0.856557517052248, "learning_rate": 9.905137974478274e-06, "loss": 0.4473, "step": 1995 }, { "epoch": 0.09033718035754695, "grad_norm": 1.5435171830573482, "learning_rate": 9.904995831741553e-06, "loss": 0.5605, "step": 1996 }, { "epoch": 0.09038243946594252, "grad_norm": 0.8950493639427329, "learning_rate": 9.904853583611537e-06, "loss": 0.5319, "step": 1997 }, { "epoch": 0.09042769857433809, "grad_norm": 0.885813931552701, "learning_rate": 9.904711230091284e-06, "loss": 0.4585, "step": 1998 }, { "epoch": 0.09047295768273365, "grad_norm": 0.8122692652550383, "learning_rate": 9.904568771183848e-06, "loss": 0.405, "step": 1999 }, { "epoch": 0.09051821679112922, "grad_norm": 0.7032039282943026, "learning_rate": 9.904426206892292e-06, "loss": 0.4255, "step": 2000 }, { "epoch": 0.09056347589952478, "grad_norm": 0.730949878970527, "learning_rate": 9.90428353721968e-06, "loss": 0.4146, "step": 2001 }, { "epoch": 0.09060873500792034, "grad_norm": 1.8957984523607985, "learning_rate": 9.904140762169079e-06, "loss": 0.5715, "step": 2002 }, { "epoch": 0.0906539941163159, "grad_norm": 1.6179371382040924, "learning_rate": 9.903997881743552e-06, "loss": 0.5678, "step": 2003 }, { "epoch": 0.09069925322471148, "grad_norm": 0.8557369834319728, "learning_rate": 9.903854895946174e-06, "loss": 0.4557, "step": 2004 }, { "epoch": 0.09074451233310704, "grad_norm": 0.9088274857229913, "learning_rate": 9.903711804780015e-06, "loss": 0.4157, "step": 2005 }, { "epoch": 0.0907897714415026, "grad_norm": 0.8200194902087647, "learning_rate": 9.90356860824815e-06, "loss": 0.4635, "step": 2006 }, { "epoch": 0.09083503054989817, "grad_norm": 0.8415326148921095, "learning_rate": 9.903425306353656e-06, "loss": 0.48, "step": 2007 }, { "epoch": 0.09088028965829373, "grad_norm": 0.8056663597851978, "learning_rate": 9.90328189909961e-06, "loss": 0.4895, "step": 2008 }, { "epoch": 0.09092554876668929, "grad_norm": 0.8676749506416057, "learning_rate": 9.903138386489097e-06, "loss": 0.4835, "step": 2009 }, { "epoch": 0.09097080787508487, "grad_norm": 0.7626279262973016, "learning_rate": 9.902994768525199e-06, "loss": 0.4351, "step": 2010 }, { "epoch": 0.09101606698348043, "grad_norm": 0.9920892948015188, "learning_rate": 9.902851045211e-06, "loss": 0.4682, "step": 2011 }, { "epoch": 0.09106132609187599, "grad_norm": 0.8113461558852759, "learning_rate": 9.902707216549592e-06, "loss": 0.4427, "step": 2012 }, { "epoch": 0.09110658520027155, "grad_norm": 0.7395981957725416, "learning_rate": 9.902563282544061e-06, "loss": 0.4191, "step": 2013 }, { "epoch": 0.09115184430866712, "grad_norm": 0.7629633005452424, "learning_rate": 9.902419243197505e-06, "loss": 0.4024, "step": 2014 }, { "epoch": 0.09119710341706268, "grad_norm": 0.7563524409039875, "learning_rate": 9.902275098513015e-06, "loss": 0.4209, "step": 2015 }, { "epoch": 0.09124236252545825, "grad_norm": 0.7238738044857984, "learning_rate": 9.90213084849369e-06, "loss": 0.436, "step": 2016 }, { "epoch": 0.09128762163385382, "grad_norm": 1.764351025615634, "learning_rate": 9.901986493142629e-06, "loss": 0.5675, "step": 2017 }, { "epoch": 0.09133288074224938, "grad_norm": 1.4324836252716153, "learning_rate": 9.901842032462931e-06, "loss": 0.5895, "step": 2018 }, { "epoch": 0.09137813985064494, "grad_norm": 0.8962622205166951, "learning_rate": 9.901697466457706e-06, "loss": 0.4472, "step": 2019 }, { "epoch": 0.0914233989590405, "grad_norm": 0.8641869503878485, "learning_rate": 9.901552795130054e-06, "loss": 0.3986, "step": 2020 }, { "epoch": 0.09146865806743606, "grad_norm": 0.9679911613055479, "learning_rate": 9.901408018483087e-06, "loss": 0.4555, "step": 2021 }, { "epoch": 0.09151391717583164, "grad_norm": 1.7195222230010936, "learning_rate": 9.901263136519917e-06, "loss": 0.5488, "step": 2022 }, { "epoch": 0.0915591762842272, "grad_norm": 0.8581028478777686, "learning_rate": 9.901118149243653e-06, "loss": 0.4668, "step": 2023 }, { "epoch": 0.09160443539262277, "grad_norm": 0.7784153654002759, "learning_rate": 9.900973056657414e-06, "loss": 0.4235, "step": 2024 }, { "epoch": 0.09164969450101833, "grad_norm": 0.7236386410380111, "learning_rate": 9.900827858764315e-06, "loss": 0.4437, "step": 2025 }, { "epoch": 0.09169495360941389, "grad_norm": 0.8316898164519387, "learning_rate": 9.900682555567478e-06, "loss": 0.5253, "step": 2026 }, { "epoch": 0.09174021271780945, "grad_norm": 0.792182988032171, "learning_rate": 9.900537147070025e-06, "loss": 0.3917, "step": 2027 }, { "epoch": 0.09178547182620503, "grad_norm": 0.7853677656386151, "learning_rate": 9.900391633275079e-06, "loss": 0.4041, "step": 2028 }, { "epoch": 0.09183073093460059, "grad_norm": 0.9331508273972187, "learning_rate": 9.900246014185765e-06, "loss": 0.4462, "step": 2029 }, { "epoch": 0.09187599004299615, "grad_norm": 1.40698347576658, "learning_rate": 9.900100289805217e-06, "loss": 0.5497, "step": 2030 }, { "epoch": 0.09192124915139172, "grad_norm": 0.8484668466241169, "learning_rate": 9.899954460136563e-06, "loss": 0.3621, "step": 2031 }, { "epoch": 0.09196650825978728, "grad_norm": 0.7649499435717951, "learning_rate": 9.899808525182935e-06, "loss": 0.4725, "step": 2032 }, { "epoch": 0.09201176736818285, "grad_norm": 0.8057014650689612, "learning_rate": 9.899662484947473e-06, "loss": 0.5284, "step": 2033 }, { "epoch": 0.09205702647657842, "grad_norm": 1.1381958528632035, "learning_rate": 9.899516339433308e-06, "loss": 0.4134, "step": 2034 }, { "epoch": 0.09210228558497398, "grad_norm": 0.7743025263168356, "learning_rate": 9.899370088643589e-06, "loss": 0.455, "step": 2035 }, { "epoch": 0.09214754469336954, "grad_norm": 0.8291030740640418, "learning_rate": 9.899223732581452e-06, "loss": 0.4497, "step": 2036 }, { "epoch": 0.0921928038017651, "grad_norm": 0.6110725868672015, "learning_rate": 9.899077271250043e-06, "loss": 0.5065, "step": 2037 }, { "epoch": 0.09223806291016066, "grad_norm": 0.9969532555112366, "learning_rate": 9.898930704652512e-06, "loss": 0.4626, "step": 2038 }, { "epoch": 0.09228332201855624, "grad_norm": 0.7716306199234213, "learning_rate": 9.898784032792005e-06, "loss": 0.4295, "step": 2039 }, { "epoch": 0.0923285811269518, "grad_norm": 0.7635557014521751, "learning_rate": 9.898637255671674e-06, "loss": 0.4323, "step": 2040 }, { "epoch": 0.09237384023534737, "grad_norm": 0.832082874007693, "learning_rate": 9.898490373294673e-06, "loss": 0.4321, "step": 2041 }, { "epoch": 0.09241909934374293, "grad_norm": 0.9848162238601245, "learning_rate": 9.898343385664161e-06, "loss": 0.4305, "step": 2042 }, { "epoch": 0.09246435845213849, "grad_norm": 0.745330890800276, "learning_rate": 9.898196292783291e-06, "loss": 0.4903, "step": 2043 }, { "epoch": 0.09250961756053405, "grad_norm": 0.9633770165986147, "learning_rate": 9.898049094655229e-06, "loss": 0.4719, "step": 2044 }, { "epoch": 0.09255487666892963, "grad_norm": 0.8418196344795355, "learning_rate": 9.897901791283133e-06, "loss": 0.4433, "step": 2045 }, { "epoch": 0.09260013577732519, "grad_norm": 1.1470649580813783, "learning_rate": 9.897754382670171e-06, "loss": 0.55, "step": 2046 }, { "epoch": 0.09264539488572075, "grad_norm": 0.8762812853825404, "learning_rate": 9.897606868819508e-06, "loss": 0.4489, "step": 2047 }, { "epoch": 0.09269065399411631, "grad_norm": 0.8423179020239366, "learning_rate": 9.897459249734318e-06, "loss": 0.4265, "step": 2048 }, { "epoch": 0.09273591310251188, "grad_norm": 0.7325380667861581, "learning_rate": 9.89731152541777e-06, "loss": 0.4449, "step": 2049 }, { "epoch": 0.09278117221090744, "grad_norm": 0.7717625136781339, "learning_rate": 9.897163695873036e-06, "loss": 0.4298, "step": 2050 }, { "epoch": 0.09282643131930302, "grad_norm": 0.7557315397060594, "learning_rate": 9.897015761103298e-06, "loss": 0.4139, "step": 2051 }, { "epoch": 0.09287169042769858, "grad_norm": 0.7589363242557137, "learning_rate": 9.896867721111726e-06, "loss": 0.4668, "step": 2052 }, { "epoch": 0.09291694953609414, "grad_norm": 0.8135784534771674, "learning_rate": 9.89671957590151e-06, "loss": 0.4654, "step": 2053 }, { "epoch": 0.0929622086444897, "grad_norm": 0.770208676126074, "learning_rate": 9.89657132547583e-06, "loss": 0.4593, "step": 2054 }, { "epoch": 0.09300746775288526, "grad_norm": 0.7130610024807864, "learning_rate": 9.89642296983787e-06, "loss": 0.4724, "step": 2055 }, { "epoch": 0.09305272686128083, "grad_norm": 0.8139952994416343, "learning_rate": 9.896274508990818e-06, "loss": 0.4605, "step": 2056 }, { "epoch": 0.0930979859696764, "grad_norm": 0.7597358274853931, "learning_rate": 9.896125942937865e-06, "loss": 0.4247, "step": 2057 }, { "epoch": 0.09314324507807197, "grad_norm": 0.8903289322010103, "learning_rate": 9.895977271682203e-06, "loss": 0.4706, "step": 2058 }, { "epoch": 0.09318850418646753, "grad_norm": 0.8455330761971988, "learning_rate": 9.895828495227026e-06, "loss": 0.4398, "step": 2059 }, { "epoch": 0.09323376329486309, "grad_norm": 0.7911632261161675, "learning_rate": 9.89567961357553e-06, "loss": 0.5244, "step": 2060 }, { "epoch": 0.09327902240325865, "grad_norm": 0.7714784849728722, "learning_rate": 9.895530626730917e-06, "loss": 0.4424, "step": 2061 }, { "epoch": 0.09332428151165421, "grad_norm": 0.7595825000992268, "learning_rate": 9.895381534696385e-06, "loss": 0.4432, "step": 2062 }, { "epoch": 0.09336954062004979, "grad_norm": 0.7933673014458721, "learning_rate": 9.89523233747514e-06, "loss": 0.4948, "step": 2063 }, { "epoch": 0.09341479972844535, "grad_norm": 0.6566601566759311, "learning_rate": 9.895083035070386e-06, "loss": 0.5417, "step": 2064 }, { "epoch": 0.09346005883684091, "grad_norm": 0.7426330048072817, "learning_rate": 9.894933627485332e-06, "loss": 0.4582, "step": 2065 }, { "epoch": 0.09350531794523648, "grad_norm": 0.5430416280961405, "learning_rate": 9.894784114723186e-06, "loss": 0.5136, "step": 2066 }, { "epoch": 0.09355057705363204, "grad_norm": 0.8886218681236913, "learning_rate": 9.894634496787166e-06, "loss": 0.4623, "step": 2067 }, { "epoch": 0.0935958361620276, "grad_norm": 0.346019844103863, "learning_rate": 9.89448477368048e-06, "loss": 0.4935, "step": 2068 }, { "epoch": 0.09364109527042318, "grad_norm": 0.7603346261585872, "learning_rate": 9.89433494540635e-06, "loss": 0.4279, "step": 2069 }, { "epoch": 0.09368635437881874, "grad_norm": 0.44835072827611927, "learning_rate": 9.894185011967994e-06, "loss": 0.5457, "step": 2070 }, { "epoch": 0.0937316134872143, "grad_norm": 0.8254020666481041, "learning_rate": 9.894034973368633e-06, "loss": 0.4298, "step": 2071 }, { "epoch": 0.09377687259560986, "grad_norm": 0.7212908946016292, "learning_rate": 9.89388482961149e-06, "loss": 0.4348, "step": 2072 }, { "epoch": 0.09382213170400543, "grad_norm": 0.8305662270330699, "learning_rate": 9.893734580699796e-06, "loss": 0.4529, "step": 2073 }, { "epoch": 0.093867390812401, "grad_norm": 0.7498327315800464, "learning_rate": 9.893584226636773e-06, "loss": 0.3987, "step": 2074 }, { "epoch": 0.09391264992079656, "grad_norm": 0.7576967573230682, "learning_rate": 9.893433767425655e-06, "loss": 0.4056, "step": 2075 }, { "epoch": 0.09395790902919213, "grad_norm": 0.8421167565131027, "learning_rate": 9.893283203069675e-06, "loss": 0.4209, "step": 2076 }, { "epoch": 0.09400316813758769, "grad_norm": 0.9312987055219514, "learning_rate": 9.893132533572067e-06, "loss": 0.4547, "step": 2077 }, { "epoch": 0.09404842724598325, "grad_norm": 0.7287349796188367, "learning_rate": 9.892981758936069e-06, "loss": 0.419, "step": 2078 }, { "epoch": 0.09409368635437881, "grad_norm": 0.7886625124732145, "learning_rate": 9.89283087916492e-06, "loss": 0.446, "step": 2079 }, { "epoch": 0.09413894546277439, "grad_norm": 0.7342267624673294, "learning_rate": 9.892679894261865e-06, "loss": 0.4047, "step": 2080 }, { "epoch": 0.09418420457116995, "grad_norm": 0.7551290893052203, "learning_rate": 9.892528804230144e-06, "loss": 0.4536, "step": 2081 }, { "epoch": 0.09422946367956551, "grad_norm": 0.6795891080594817, "learning_rate": 9.892377609073006e-06, "loss": 0.4232, "step": 2082 }, { "epoch": 0.09427472278796108, "grad_norm": 1.0309268697491385, "learning_rate": 9.892226308793697e-06, "loss": 0.51, "step": 2083 }, { "epoch": 0.09431998189635664, "grad_norm": 0.999083959427165, "learning_rate": 9.892074903395472e-06, "loss": 0.4124, "step": 2084 }, { "epoch": 0.0943652410047522, "grad_norm": 0.6626173222054419, "learning_rate": 9.891923392881581e-06, "loss": 0.4113, "step": 2085 }, { "epoch": 0.09441050011314778, "grad_norm": 0.8008472966595057, "learning_rate": 9.89177177725528e-06, "loss": 0.4096, "step": 2086 }, { "epoch": 0.09445575922154334, "grad_norm": 0.7070000181672658, "learning_rate": 9.89162005651983e-06, "loss": 0.4698, "step": 2087 }, { "epoch": 0.0945010183299389, "grad_norm": 0.7102922590146776, "learning_rate": 9.891468230678487e-06, "loss": 0.3912, "step": 2088 }, { "epoch": 0.09454627743833446, "grad_norm": 0.7965351685251075, "learning_rate": 9.891316299734514e-06, "loss": 0.4661, "step": 2089 }, { "epoch": 0.09459153654673003, "grad_norm": 0.7133001147134137, "learning_rate": 9.891164263691178e-06, "loss": 0.4163, "step": 2090 }, { "epoch": 0.09463679565512559, "grad_norm": 0.6919466012287274, "learning_rate": 9.891012122551742e-06, "loss": 0.4251, "step": 2091 }, { "epoch": 0.09468205476352116, "grad_norm": 0.8172173083042604, "learning_rate": 9.890859876319479e-06, "loss": 0.4708, "step": 2092 }, { "epoch": 0.09472731387191673, "grad_norm": 0.7157802079281965, "learning_rate": 9.890707524997657e-06, "loss": 0.4183, "step": 2093 }, { "epoch": 0.09477257298031229, "grad_norm": 0.8012655492564951, "learning_rate": 9.890555068589552e-06, "loss": 0.4581, "step": 2094 }, { "epoch": 0.09481783208870785, "grad_norm": 0.6616316877473514, "learning_rate": 9.890402507098437e-06, "loss": 0.4321, "step": 2095 }, { "epoch": 0.09486309119710341, "grad_norm": 0.6796374624881355, "learning_rate": 9.890249840527593e-06, "loss": 0.4048, "step": 2096 }, { "epoch": 0.09490835030549898, "grad_norm": 0.6595102338230455, "learning_rate": 9.8900970688803e-06, "loss": 0.5378, "step": 2097 }, { "epoch": 0.09495360941389455, "grad_norm": 0.7408710873526465, "learning_rate": 9.88994419215984e-06, "loss": 0.4311, "step": 2098 }, { "epoch": 0.09499886852229011, "grad_norm": 0.5541647525789047, "learning_rate": 9.889791210369496e-06, "loss": 0.5408, "step": 2099 }, { "epoch": 0.09504412763068568, "grad_norm": 0.7911667363009298, "learning_rate": 9.889638123512557e-06, "loss": 0.4689, "step": 2100 }, { "epoch": 0.09508938673908124, "grad_norm": 0.42855301210537444, "learning_rate": 9.889484931592313e-06, "loss": 0.5257, "step": 2101 }, { "epoch": 0.0951346458474768, "grad_norm": 0.9473251263121182, "learning_rate": 9.889331634612053e-06, "loss": 0.4347, "step": 2102 }, { "epoch": 0.09517990495587236, "grad_norm": 0.4418629563484785, "learning_rate": 9.889178232575074e-06, "loss": 0.4935, "step": 2103 }, { "epoch": 0.09522516406426794, "grad_norm": 0.8028317808705686, "learning_rate": 9.889024725484672e-06, "loss": 0.4619, "step": 2104 }, { "epoch": 0.0952704231726635, "grad_norm": 0.7433539611390096, "learning_rate": 9.888871113344144e-06, "loss": 0.4343, "step": 2105 }, { "epoch": 0.09531568228105906, "grad_norm": 0.7133886009247582, "learning_rate": 9.888717396156788e-06, "loss": 0.4586, "step": 2106 }, { "epoch": 0.09536094138945463, "grad_norm": 0.6706564434461998, "learning_rate": 9.88856357392591e-06, "loss": 0.4374, "step": 2107 }, { "epoch": 0.09540620049785019, "grad_norm": 0.8111131097505304, "learning_rate": 9.888409646654818e-06, "loss": 0.4282, "step": 2108 }, { "epoch": 0.09545145960624575, "grad_norm": 0.7686171681775432, "learning_rate": 9.888255614346813e-06, "loss": 0.457, "step": 2109 }, { "epoch": 0.09549671871464133, "grad_norm": 0.7054175753477833, "learning_rate": 9.88810147700521e-06, "loss": 0.4647, "step": 2110 }, { "epoch": 0.09554197782303689, "grad_norm": 1.0515788174529657, "learning_rate": 9.887947234633318e-06, "loss": 0.4704, "step": 2111 }, { "epoch": 0.09558723693143245, "grad_norm": 0.7175508450358566, "learning_rate": 9.887792887234453e-06, "loss": 0.4552, "step": 2112 }, { "epoch": 0.09563249603982801, "grad_norm": 0.7729794430588015, "learning_rate": 9.88763843481193e-06, "loss": 0.4396, "step": 2113 }, { "epoch": 0.09567775514822358, "grad_norm": 0.706629623489225, "learning_rate": 9.887483877369068e-06, "loss": 0.4271, "step": 2114 }, { "epoch": 0.09572301425661914, "grad_norm": 0.7475391012848127, "learning_rate": 9.88732921490919e-06, "loss": 0.4409, "step": 2115 }, { "epoch": 0.09576827336501471, "grad_norm": 0.7293429645265976, "learning_rate": 9.887174447435615e-06, "loss": 0.4801, "step": 2116 }, { "epoch": 0.09581353247341028, "grad_norm": 0.6990341610953973, "learning_rate": 9.88701957495167e-06, "loss": 0.4306, "step": 2117 }, { "epoch": 0.09585879158180584, "grad_norm": 0.7544727476330307, "learning_rate": 9.886864597460686e-06, "loss": 0.4297, "step": 2118 }, { "epoch": 0.0959040506902014, "grad_norm": 0.7856611371489157, "learning_rate": 9.88670951496599e-06, "loss": 0.4923, "step": 2119 }, { "epoch": 0.09594930979859696, "grad_norm": 0.7044813533443429, "learning_rate": 9.886554327470917e-06, "loss": 0.4295, "step": 2120 }, { "epoch": 0.09599456890699254, "grad_norm": 0.872281271578073, "learning_rate": 9.886399034978798e-06, "loss": 0.452, "step": 2121 }, { "epoch": 0.0960398280153881, "grad_norm": 0.6719432262640053, "learning_rate": 9.886243637492969e-06, "loss": 0.4103, "step": 2122 }, { "epoch": 0.09608508712378366, "grad_norm": 0.7155813838531678, "learning_rate": 9.886088135016773e-06, "loss": 0.4032, "step": 2123 }, { "epoch": 0.09613034623217923, "grad_norm": 0.7312506486965615, "learning_rate": 9.88593252755355e-06, "loss": 0.4494, "step": 2124 }, { "epoch": 0.09617560534057479, "grad_norm": 1.817955979217866, "learning_rate": 9.885776815106643e-06, "loss": 0.5495, "step": 2125 }, { "epoch": 0.09622086444897035, "grad_norm": 0.7271498190042965, "learning_rate": 9.885620997679397e-06, "loss": 0.4755, "step": 2126 }, { "epoch": 0.09626612355736593, "grad_norm": 0.9057747660426797, "learning_rate": 9.88546507527516e-06, "loss": 0.4454, "step": 2127 }, { "epoch": 0.09631138266576149, "grad_norm": 0.7188105353417086, "learning_rate": 9.885309047897285e-06, "loss": 0.405, "step": 2128 }, { "epoch": 0.09635664177415705, "grad_norm": 0.824647330223287, "learning_rate": 9.88515291554912e-06, "loss": 0.4987, "step": 2129 }, { "epoch": 0.09640190088255261, "grad_norm": 0.8047057457137237, "learning_rate": 9.884996678234024e-06, "loss": 0.4606, "step": 2130 }, { "epoch": 0.09644715999094818, "grad_norm": 0.7541646312384799, "learning_rate": 9.884840335955354e-06, "loss": 0.4225, "step": 2131 }, { "epoch": 0.09649241909934374, "grad_norm": 0.7657084288201877, "learning_rate": 9.884683888716466e-06, "loss": 0.4381, "step": 2132 }, { "epoch": 0.09653767820773931, "grad_norm": 0.733840982809254, "learning_rate": 9.884527336520724e-06, "loss": 0.4376, "step": 2133 }, { "epoch": 0.09658293731613488, "grad_norm": 0.7450007075565378, "learning_rate": 9.88437067937149e-06, "loss": 0.4756, "step": 2134 }, { "epoch": 0.09662819642453044, "grad_norm": 0.8394335018999397, "learning_rate": 9.884213917272133e-06, "loss": 0.5089, "step": 2135 }, { "epoch": 0.096673455532926, "grad_norm": 0.7165101390264119, "learning_rate": 9.88405705022602e-06, "loss": 0.4506, "step": 2136 }, { "epoch": 0.09671871464132156, "grad_norm": 0.7136018139841266, "learning_rate": 9.883900078236519e-06, "loss": 0.3896, "step": 2137 }, { "epoch": 0.09676397374971712, "grad_norm": 1.0973366170490864, "learning_rate": 9.883743001307007e-06, "loss": 0.3826, "step": 2138 }, { "epoch": 0.0968092328581127, "grad_norm": 0.7498597906623329, "learning_rate": 9.883585819440854e-06, "loss": 0.4749, "step": 2139 }, { "epoch": 0.09685449196650826, "grad_norm": 0.7252975386637314, "learning_rate": 9.883428532641445e-06, "loss": 0.4289, "step": 2140 }, { "epoch": 0.09689975107490383, "grad_norm": 0.7609438996643995, "learning_rate": 9.883271140912153e-06, "loss": 0.45, "step": 2141 }, { "epoch": 0.09694501018329939, "grad_norm": 0.6732376326562703, "learning_rate": 9.88311364425636e-06, "loss": 0.4543, "step": 2142 }, { "epoch": 0.09699026929169495, "grad_norm": 0.6372499949868802, "learning_rate": 9.882956042677457e-06, "loss": 0.391, "step": 2143 }, { "epoch": 0.09703552840009051, "grad_norm": 0.7951653118971782, "learning_rate": 9.882798336178821e-06, "loss": 0.453, "step": 2144 }, { "epoch": 0.09708078750848609, "grad_norm": 0.7507677694816026, "learning_rate": 9.882640524763847e-06, "loss": 0.4693, "step": 2145 }, { "epoch": 0.09712604661688165, "grad_norm": 0.6870245167157061, "learning_rate": 9.882482608435924e-06, "loss": 0.3988, "step": 2146 }, { "epoch": 0.09717130572527721, "grad_norm": 0.7071117925139719, "learning_rate": 9.882324587198446e-06, "loss": 0.4471, "step": 2147 }, { "epoch": 0.09721656483367278, "grad_norm": 0.6773979773976042, "learning_rate": 9.882166461054806e-06, "loss": 0.4139, "step": 2148 }, { "epoch": 0.09726182394206834, "grad_norm": 0.6139954213727978, "learning_rate": 9.882008230008403e-06, "loss": 0.5461, "step": 2149 }, { "epoch": 0.0973070830504639, "grad_norm": 0.5598559951748219, "learning_rate": 9.881849894062639e-06, "loss": 0.5198, "step": 2150 }, { "epoch": 0.09735234215885948, "grad_norm": 0.7831826157949604, "learning_rate": 9.881691453220912e-06, "loss": 0.4562, "step": 2151 }, { "epoch": 0.09739760126725504, "grad_norm": 0.7644327354430474, "learning_rate": 9.88153290748663e-06, "loss": 0.4838, "step": 2152 }, { "epoch": 0.0974428603756506, "grad_norm": 0.824965801673457, "learning_rate": 9.8813742568632e-06, "loss": 0.4401, "step": 2153 }, { "epoch": 0.09748811948404616, "grad_norm": 0.7638742540229725, "learning_rate": 9.881215501354025e-06, "loss": 0.4399, "step": 2154 }, { "epoch": 0.09753337859244172, "grad_norm": 0.6250697994456945, "learning_rate": 9.881056640962524e-06, "loss": 0.5198, "step": 2155 }, { "epoch": 0.09757863770083729, "grad_norm": 0.7161150856416505, "learning_rate": 9.880897675692105e-06, "loss": 0.4157, "step": 2156 }, { "epoch": 0.09762389680923286, "grad_norm": 0.7066523082148944, "learning_rate": 9.880738605546186e-06, "loss": 0.3921, "step": 2157 }, { "epoch": 0.09766915591762843, "grad_norm": 0.6960762618579934, "learning_rate": 9.880579430528183e-06, "loss": 0.398, "step": 2158 }, { "epoch": 0.09771441502602399, "grad_norm": 0.6712418881697726, "learning_rate": 9.880420150641519e-06, "loss": 0.3952, "step": 2159 }, { "epoch": 0.09775967413441955, "grad_norm": 0.8044310928745596, "learning_rate": 9.880260765889615e-06, "loss": 0.4779, "step": 2160 }, { "epoch": 0.09780493324281511, "grad_norm": 0.7582313840845955, "learning_rate": 9.880101276275896e-06, "loss": 0.4259, "step": 2161 }, { "epoch": 0.09785019235121069, "grad_norm": 0.411187499329373, "learning_rate": 9.87994168180379e-06, "loss": 0.5103, "step": 2162 }, { "epoch": 0.09789545145960625, "grad_norm": 0.8352279353409846, "learning_rate": 9.879781982476722e-06, "loss": 0.5148, "step": 2163 }, { "epoch": 0.09794071056800181, "grad_norm": 0.6975707964456537, "learning_rate": 9.879622178298128e-06, "loss": 0.431, "step": 2164 }, { "epoch": 0.09798596967639737, "grad_norm": 0.7346753431411169, "learning_rate": 9.879462269271439e-06, "loss": 0.4202, "step": 2165 }, { "epoch": 0.09803122878479294, "grad_norm": 0.347524141797627, "learning_rate": 9.879302255400092e-06, "loss": 0.5198, "step": 2166 }, { "epoch": 0.0980764878931885, "grad_norm": 0.3586006925617358, "learning_rate": 9.879142136687524e-06, "loss": 0.5421, "step": 2167 }, { "epoch": 0.09812174700158408, "grad_norm": 0.9478318513259163, "learning_rate": 9.878981913137178e-06, "loss": 0.4372, "step": 2168 }, { "epoch": 0.09816700610997964, "grad_norm": 0.7835830213363405, "learning_rate": 9.878821584752495e-06, "loss": 0.3997, "step": 2169 }, { "epoch": 0.0982122652183752, "grad_norm": 0.7127149457361394, "learning_rate": 9.878661151536923e-06, "loss": 0.4473, "step": 2170 }, { "epoch": 0.09825752432677076, "grad_norm": 0.9555889260859909, "learning_rate": 9.878500613493904e-06, "loss": 0.4679, "step": 2171 }, { "epoch": 0.09830278343516632, "grad_norm": 0.741480683152549, "learning_rate": 9.87833997062689e-06, "loss": 0.4414, "step": 2172 }, { "epoch": 0.09834804254356189, "grad_norm": 0.7204742635936184, "learning_rate": 9.878179222939333e-06, "loss": 0.4415, "step": 2173 }, { "epoch": 0.09839330165195746, "grad_norm": 0.898233981729686, "learning_rate": 9.878018370434686e-06, "loss": 0.4517, "step": 2174 }, { "epoch": 0.09843856076035302, "grad_norm": 0.5640581299672587, "learning_rate": 9.877857413116408e-06, "loss": 0.5291, "step": 2175 }, { "epoch": 0.09848381986874859, "grad_norm": 0.8242186289537619, "learning_rate": 9.877696350987954e-06, "loss": 0.4235, "step": 2176 }, { "epoch": 0.09852907897714415, "grad_norm": 0.7710921868502916, "learning_rate": 9.877535184052786e-06, "loss": 0.4414, "step": 2177 }, { "epoch": 0.09857433808553971, "grad_norm": 0.6927559731602532, "learning_rate": 9.877373912314367e-06, "loss": 0.4113, "step": 2178 }, { "epoch": 0.09861959719393527, "grad_norm": 0.3404435555777899, "learning_rate": 9.877212535776161e-06, "loss": 0.5251, "step": 2179 }, { "epoch": 0.09866485630233085, "grad_norm": 0.8667350818901997, "learning_rate": 9.87705105444164e-06, "loss": 0.4138, "step": 2180 }, { "epoch": 0.09871011541072641, "grad_norm": 0.9045198118800067, "learning_rate": 9.876889468314268e-06, "loss": 0.4152, "step": 2181 }, { "epoch": 0.09875537451912197, "grad_norm": 0.6987822562694133, "learning_rate": 9.876727777397522e-06, "loss": 0.4166, "step": 2182 }, { "epoch": 0.09880063362751754, "grad_norm": 0.7422912359370816, "learning_rate": 9.876565981694871e-06, "loss": 0.4554, "step": 2183 }, { "epoch": 0.0988458927359131, "grad_norm": 0.8370568389817287, "learning_rate": 9.876404081209796e-06, "loss": 0.4713, "step": 2184 }, { "epoch": 0.09889115184430866, "grad_norm": 0.7619494348801985, "learning_rate": 9.876242075945774e-06, "loss": 0.4493, "step": 2185 }, { "epoch": 0.09893641095270424, "grad_norm": 0.7099278623393065, "learning_rate": 9.876079965906284e-06, "loss": 0.4599, "step": 2186 }, { "epoch": 0.0989816700610998, "grad_norm": 0.6536632383972745, "learning_rate": 9.875917751094814e-06, "loss": 0.3861, "step": 2187 }, { "epoch": 0.09902692916949536, "grad_norm": 0.7126200240781412, "learning_rate": 9.875755431514846e-06, "loss": 0.4468, "step": 2188 }, { "epoch": 0.09907218827789092, "grad_norm": 0.723300049733845, "learning_rate": 9.875593007169868e-06, "loss": 0.4283, "step": 2189 }, { "epoch": 0.09911744738628649, "grad_norm": 1.9414330444935057, "learning_rate": 9.87543047806337e-06, "loss": 0.5392, "step": 2190 }, { "epoch": 0.09916270649468205, "grad_norm": 0.7713129418149638, "learning_rate": 9.875267844198846e-06, "loss": 0.4594, "step": 2191 }, { "epoch": 0.09920796560307762, "grad_norm": 0.36286998565742357, "learning_rate": 9.875105105579789e-06, "loss": 0.5206, "step": 2192 }, { "epoch": 0.09925322471147319, "grad_norm": 0.7090515722591921, "learning_rate": 9.874942262209695e-06, "loss": 0.4469, "step": 2193 }, { "epoch": 0.09929848381986875, "grad_norm": 0.35163704632743675, "learning_rate": 9.874779314092065e-06, "loss": 0.513, "step": 2194 }, { "epoch": 0.09934374292826431, "grad_norm": 0.7991550466891403, "learning_rate": 9.874616261230398e-06, "loss": 0.4538, "step": 2195 }, { "epoch": 0.09938900203665987, "grad_norm": 0.7567345337215104, "learning_rate": 9.874453103628201e-06, "loss": 0.4492, "step": 2196 }, { "epoch": 0.09943426114505544, "grad_norm": 0.6729555115890912, "learning_rate": 9.874289841288976e-06, "loss": 0.419, "step": 2197 }, { "epoch": 0.09947952025345101, "grad_norm": 0.7729185223247828, "learning_rate": 9.874126474216234e-06, "loss": 0.4344, "step": 2198 }, { "epoch": 0.09952477936184657, "grad_norm": 0.7355415324216681, "learning_rate": 9.873963002413483e-06, "loss": 0.4201, "step": 2199 }, { "epoch": 0.09957003847024214, "grad_norm": 0.6982893543482479, "learning_rate": 9.873799425884235e-06, "loss": 0.4277, "step": 2200 }, { "epoch": 0.0996152975786377, "grad_norm": 0.773880454489427, "learning_rate": 9.873635744632008e-06, "loss": 0.4539, "step": 2201 }, { "epoch": 0.09966055668703326, "grad_norm": 0.8494593639519243, "learning_rate": 9.873471958660316e-06, "loss": 0.4192, "step": 2202 }, { "epoch": 0.09970581579542882, "grad_norm": 0.8580004122744093, "learning_rate": 9.873308067972679e-06, "loss": 0.5519, "step": 2203 }, { "epoch": 0.0997510749038244, "grad_norm": 0.7290001443780998, "learning_rate": 9.87314407257262e-06, "loss": 0.4467, "step": 2204 }, { "epoch": 0.09979633401221996, "grad_norm": 0.7988956837738996, "learning_rate": 9.87297997246366e-06, "loss": 0.4354, "step": 2205 }, { "epoch": 0.09984159312061552, "grad_norm": 0.7358440002456154, "learning_rate": 9.872815767649329e-06, "loss": 0.4403, "step": 2206 }, { "epoch": 0.09988685222901109, "grad_norm": 0.7204005245588492, "learning_rate": 9.87265145813315e-06, "loss": 0.4011, "step": 2207 }, { "epoch": 0.09993211133740665, "grad_norm": 0.9151800895514478, "learning_rate": 9.872487043918659e-06, "loss": 0.4647, "step": 2208 }, { "epoch": 0.09997737044580222, "grad_norm": 0.6684901673506206, "learning_rate": 9.872322525009383e-06, "loss": 0.415, "step": 2209 }, { "epoch": 0.10002262955419779, "grad_norm": 0.810784992270613, "learning_rate": 9.872157901408863e-06, "loss": 0.4549, "step": 2210 }, { "epoch": 0.10006788866259335, "grad_norm": 0.7561176597277325, "learning_rate": 9.871993173120633e-06, "loss": 0.4739, "step": 2211 }, { "epoch": 0.10011314777098891, "grad_norm": 0.7224618216199857, "learning_rate": 9.871828340148232e-06, "loss": 0.3496, "step": 2212 }, { "epoch": 0.10015840687938447, "grad_norm": 0.48863707696024233, "learning_rate": 9.871663402495202e-06, "loss": 0.537, "step": 2213 }, { "epoch": 0.10020366598778004, "grad_norm": 0.7221464520254421, "learning_rate": 9.87149836016509e-06, "loss": 0.43, "step": 2214 }, { "epoch": 0.10024892509617561, "grad_norm": 0.7771878759212629, "learning_rate": 9.871333213161438e-06, "loss": 0.436, "step": 2215 }, { "epoch": 0.10029418420457117, "grad_norm": 0.7560332076204587, "learning_rate": 9.871167961487798e-06, "loss": 0.4362, "step": 2216 }, { "epoch": 0.10033944331296674, "grad_norm": 0.8018401387575208, "learning_rate": 9.871002605147717e-06, "loss": 0.4112, "step": 2217 }, { "epoch": 0.1003847024213623, "grad_norm": 0.6839784259457237, "learning_rate": 9.870837144144752e-06, "loss": 0.4268, "step": 2218 }, { "epoch": 0.10042996152975786, "grad_norm": 0.6933617275224555, "learning_rate": 9.870671578482457e-06, "loss": 0.4214, "step": 2219 }, { "epoch": 0.10047522063815342, "grad_norm": 0.7206012184599776, "learning_rate": 9.870505908164386e-06, "loss": 0.4716, "step": 2220 }, { "epoch": 0.100520479746549, "grad_norm": 0.8075038548608849, "learning_rate": 9.870340133194103e-06, "loss": 0.4395, "step": 2221 }, { "epoch": 0.10056573885494456, "grad_norm": 0.7635458649375578, "learning_rate": 9.870174253575169e-06, "loss": 0.4831, "step": 2222 }, { "epoch": 0.10061099796334012, "grad_norm": 0.8634190586113031, "learning_rate": 9.870008269311148e-06, "loss": 0.437, "step": 2223 }, { "epoch": 0.10065625707173569, "grad_norm": 0.6343631893568048, "learning_rate": 9.869842180405607e-06, "loss": 0.4045, "step": 2224 }, { "epoch": 0.10070151618013125, "grad_norm": 0.7399088703990067, "learning_rate": 9.869675986862113e-06, "loss": 0.4848, "step": 2225 }, { "epoch": 0.10074677528852681, "grad_norm": 0.7663305947086005, "learning_rate": 9.869509688684238e-06, "loss": 0.4651, "step": 2226 }, { "epoch": 0.10079203439692239, "grad_norm": 0.7544720440825532, "learning_rate": 9.869343285875556e-06, "loss": 0.4519, "step": 2227 }, { "epoch": 0.10083729350531795, "grad_norm": 0.7707733603624612, "learning_rate": 9.869176778439641e-06, "loss": 0.4276, "step": 2228 }, { "epoch": 0.10088255261371351, "grad_norm": 0.8454366073314977, "learning_rate": 9.869010166380074e-06, "loss": 0.4214, "step": 2229 }, { "epoch": 0.10092781172210907, "grad_norm": 0.5461864636688677, "learning_rate": 9.868843449700429e-06, "loss": 0.5228, "step": 2230 }, { "epoch": 0.10097307083050464, "grad_norm": 0.7294155882889573, "learning_rate": 9.868676628404294e-06, "loss": 0.4261, "step": 2231 }, { "epoch": 0.1010183299389002, "grad_norm": 0.3168440817594752, "learning_rate": 9.86850970249525e-06, "loss": 0.5219, "step": 2232 }, { "epoch": 0.10106358904729577, "grad_norm": 0.7579059657604005, "learning_rate": 9.868342671976887e-06, "loss": 0.3979, "step": 2233 }, { "epoch": 0.10110884815569134, "grad_norm": 0.7292905542346751, "learning_rate": 9.86817553685279e-06, "loss": 0.4608, "step": 2234 }, { "epoch": 0.1011541072640869, "grad_norm": 0.8458997911415539, "learning_rate": 9.868008297126552e-06, "loss": 0.4371, "step": 2235 }, { "epoch": 0.10119936637248246, "grad_norm": 0.824759911636903, "learning_rate": 9.867840952801768e-06, "loss": 0.3859, "step": 2236 }, { "epoch": 0.10124462548087802, "grad_norm": 0.7304273464477413, "learning_rate": 9.867673503882031e-06, "loss": 0.44, "step": 2237 }, { "epoch": 0.10128988458927358, "grad_norm": 0.7799304327722856, "learning_rate": 9.867505950370942e-06, "loss": 0.4505, "step": 2238 }, { "epoch": 0.10133514369766916, "grad_norm": 0.7306543877797002, "learning_rate": 9.8673382922721e-06, "loss": 0.4801, "step": 2239 }, { "epoch": 0.10138040280606472, "grad_norm": 0.7435279790784834, "learning_rate": 9.867170529589106e-06, "loss": 0.5353, "step": 2240 }, { "epoch": 0.10142566191446029, "grad_norm": 0.6603844615030072, "learning_rate": 9.867002662325564e-06, "loss": 0.4005, "step": 2241 }, { "epoch": 0.10147092102285585, "grad_norm": 0.8630624848995856, "learning_rate": 9.866834690485083e-06, "loss": 0.4415, "step": 2242 }, { "epoch": 0.10151618013125141, "grad_norm": 0.9896646400129324, "learning_rate": 9.866666614071274e-06, "loss": 0.4243, "step": 2243 }, { "epoch": 0.10156143923964697, "grad_norm": 0.41220129274846495, "learning_rate": 9.866498433087745e-06, "loss": 0.5234, "step": 2244 }, { "epoch": 0.10160669834804255, "grad_norm": 0.8362621005931979, "learning_rate": 9.86633014753811e-06, "loss": 0.4474, "step": 2245 }, { "epoch": 0.10165195745643811, "grad_norm": 0.7503981593504316, "learning_rate": 9.866161757425988e-06, "loss": 0.4534, "step": 2246 }, { "epoch": 0.10169721656483367, "grad_norm": 0.41195840898795105, "learning_rate": 9.865993262754993e-06, "loss": 0.5063, "step": 2247 }, { "epoch": 0.10174247567322924, "grad_norm": 0.7265194341332623, "learning_rate": 9.86582466352875e-06, "loss": 0.433, "step": 2248 }, { "epoch": 0.1017877347816248, "grad_norm": 0.8409081526645884, "learning_rate": 9.865655959750877e-06, "loss": 0.4292, "step": 2249 }, { "epoch": 0.10183299389002037, "grad_norm": 3.0630792854097093, "learning_rate": 9.865487151425003e-06, "loss": 0.4367, "step": 2250 }, { "epoch": 0.10187825299841594, "grad_norm": 0.8861233445538413, "learning_rate": 9.865318238554754e-06, "loss": 0.4542, "step": 2251 }, { "epoch": 0.1019235121068115, "grad_norm": 0.7822807066601043, "learning_rate": 9.865149221143755e-06, "loss": 0.418, "step": 2252 }, { "epoch": 0.10196877121520706, "grad_norm": 0.7559943477806504, "learning_rate": 9.864980099195644e-06, "loss": 0.4496, "step": 2253 }, { "epoch": 0.10201403032360262, "grad_norm": 0.768806042496435, "learning_rate": 9.864810872714053e-06, "loss": 0.4134, "step": 2254 }, { "epoch": 0.10205928943199818, "grad_norm": 0.7047306300749351, "learning_rate": 9.864641541702616e-06, "loss": 0.4246, "step": 2255 }, { "epoch": 0.10210454854039376, "grad_norm": 0.7028731886223434, "learning_rate": 9.864472106164974e-06, "loss": 0.4247, "step": 2256 }, { "epoch": 0.10214980764878932, "grad_norm": 0.7153130576512241, "learning_rate": 9.864302566104764e-06, "loss": 0.4625, "step": 2257 }, { "epoch": 0.10219506675718489, "grad_norm": 1.048917465822911, "learning_rate": 9.864132921525633e-06, "loss": 0.4191, "step": 2258 }, { "epoch": 0.10224032586558045, "grad_norm": 0.7899022592166589, "learning_rate": 9.863963172431225e-06, "loss": 0.4477, "step": 2259 }, { "epoch": 0.10228558497397601, "grad_norm": 0.47204269426321754, "learning_rate": 9.863793318825186e-06, "loss": 0.5383, "step": 2260 }, { "epoch": 0.10233084408237157, "grad_norm": 0.6892225594080795, "learning_rate": 9.863623360711167e-06, "loss": 0.4722, "step": 2261 }, { "epoch": 0.10237610319076715, "grad_norm": 0.748046058634716, "learning_rate": 9.86345329809282e-06, "loss": 0.4466, "step": 2262 }, { "epoch": 0.10242136229916271, "grad_norm": 0.7130768776206144, "learning_rate": 9.863283130973799e-06, "loss": 0.4145, "step": 2263 }, { "epoch": 0.10246662140755827, "grad_norm": 0.8219802036337167, "learning_rate": 9.86311285935776e-06, "loss": 0.4318, "step": 2264 }, { "epoch": 0.10251188051595383, "grad_norm": 0.3198446179300901, "learning_rate": 9.86294248324836e-06, "loss": 0.5065, "step": 2265 }, { "epoch": 0.1025571396243494, "grad_norm": 0.756389577087066, "learning_rate": 9.862772002649261e-06, "loss": 0.4529, "step": 2266 }, { "epoch": 0.10260239873274496, "grad_norm": 0.7648477142665258, "learning_rate": 9.862601417564128e-06, "loss": 0.4489, "step": 2267 }, { "epoch": 0.10264765784114054, "grad_norm": 0.7699363576476402, "learning_rate": 9.862430727996627e-06, "loss": 0.4219, "step": 2268 }, { "epoch": 0.1026929169495361, "grad_norm": 0.7729899239840131, "learning_rate": 9.86225993395042e-06, "loss": 0.407, "step": 2269 }, { "epoch": 0.10273817605793166, "grad_norm": 0.7204098134653517, "learning_rate": 9.86208903542918e-06, "loss": 0.3968, "step": 2270 }, { "epoch": 0.10278343516632722, "grad_norm": 0.35254404610092344, "learning_rate": 9.861918032436582e-06, "loss": 0.5204, "step": 2271 }, { "epoch": 0.10282869427472278, "grad_norm": 0.7655693019851239, "learning_rate": 9.861746924976297e-06, "loss": 0.4407, "step": 2272 }, { "epoch": 0.10287395338311835, "grad_norm": 0.7358913387904376, "learning_rate": 9.861575713052e-06, "loss": 0.444, "step": 2273 }, { "epoch": 0.10291921249151392, "grad_norm": 0.7648849776920894, "learning_rate": 9.861404396667375e-06, "loss": 0.4331, "step": 2274 }, { "epoch": 0.10296447159990949, "grad_norm": 0.7137409472692013, "learning_rate": 9.861232975826098e-06, "loss": 0.4632, "step": 2275 }, { "epoch": 0.10300973070830505, "grad_norm": 0.8041533524954448, "learning_rate": 9.861061450531857e-06, "loss": 0.4217, "step": 2276 }, { "epoch": 0.10305498981670061, "grad_norm": 0.6937125693526789, "learning_rate": 9.860889820788333e-06, "loss": 0.3682, "step": 2277 }, { "epoch": 0.10310024892509617, "grad_norm": 0.3890112300684754, "learning_rate": 9.860718086599217e-06, "loss": 0.5088, "step": 2278 }, { "epoch": 0.10314550803349173, "grad_norm": 0.755158671146552, "learning_rate": 9.860546247968196e-06, "loss": 0.4567, "step": 2279 }, { "epoch": 0.10319076714188731, "grad_norm": 0.6964351594584257, "learning_rate": 9.860374304898966e-06, "loss": 0.3703, "step": 2280 }, { "epoch": 0.10323602625028287, "grad_norm": 0.6942034226642203, "learning_rate": 9.86020225739522e-06, "loss": 0.4203, "step": 2281 }, { "epoch": 0.10328128535867843, "grad_norm": 0.7493731249129036, "learning_rate": 9.860030105460655e-06, "loss": 0.4105, "step": 2282 }, { "epoch": 0.103326544467074, "grad_norm": 0.7875242177860882, "learning_rate": 9.859857849098967e-06, "loss": 0.3853, "step": 2283 }, { "epoch": 0.10337180357546956, "grad_norm": 0.6723055543092835, "learning_rate": 9.859685488313861e-06, "loss": 0.4332, "step": 2284 }, { "epoch": 0.10341706268386512, "grad_norm": 0.6560360537570137, "learning_rate": 9.859513023109037e-06, "loss": 0.4391, "step": 2285 }, { "epoch": 0.1034623217922607, "grad_norm": 0.6849114895958732, "learning_rate": 9.859340453488206e-06, "loss": 0.4383, "step": 2286 }, { "epoch": 0.10350758090065626, "grad_norm": 0.7783040393773684, "learning_rate": 9.859167779455072e-06, "loss": 0.4432, "step": 2287 }, { "epoch": 0.10355284000905182, "grad_norm": 0.724288770259416, "learning_rate": 9.858995001013347e-06, "loss": 0.4384, "step": 2288 }, { "epoch": 0.10359809911744738, "grad_norm": 0.7111758444461136, "learning_rate": 9.858822118166742e-06, "loss": 0.4456, "step": 2289 }, { "epoch": 0.10364335822584295, "grad_norm": 0.6932478030538325, "learning_rate": 9.85864913091897e-06, "loss": 0.4351, "step": 2290 }, { "epoch": 0.10368861733423851, "grad_norm": 0.7579749515859955, "learning_rate": 9.858476039273755e-06, "loss": 0.4304, "step": 2291 }, { "epoch": 0.10373387644263408, "grad_norm": 0.7060927633532289, "learning_rate": 9.85830284323481e-06, "loss": 0.4056, "step": 2292 }, { "epoch": 0.10377913555102965, "grad_norm": 0.7485879479542586, "learning_rate": 9.858129542805857e-06, "loss": 0.4651, "step": 2293 }, { "epoch": 0.10382439465942521, "grad_norm": 0.48112576797143786, "learning_rate": 9.857956137990621e-06, "loss": 0.5417, "step": 2294 }, { "epoch": 0.10386965376782077, "grad_norm": 0.7667656764960077, "learning_rate": 9.857782628792826e-06, "loss": 0.4429, "step": 2295 }, { "epoch": 0.10391491287621633, "grad_norm": 0.6954661751657719, "learning_rate": 9.857609015216205e-06, "loss": 0.436, "step": 2296 }, { "epoch": 0.10396017198461191, "grad_norm": 0.6967576041244012, "learning_rate": 9.857435297264484e-06, "loss": 0.4751, "step": 2297 }, { "epoch": 0.10400543109300747, "grad_norm": 0.834588518369712, "learning_rate": 9.857261474941397e-06, "loss": 0.4228, "step": 2298 }, { "epoch": 0.10405069020140303, "grad_norm": 0.6721588650367362, "learning_rate": 9.85708754825068e-06, "loss": 0.3802, "step": 2299 }, { "epoch": 0.1040959493097986, "grad_norm": 0.41026951413440077, "learning_rate": 9.856913517196065e-06, "loss": 0.5238, "step": 2300 }, { "epoch": 0.10414120841819416, "grad_norm": 0.7035267949243749, "learning_rate": 9.8567393817813e-06, "loss": 0.4336, "step": 2301 }, { "epoch": 0.10418646752658972, "grad_norm": 0.7585486917088173, "learning_rate": 9.85656514201012e-06, "loss": 0.4439, "step": 2302 }, { "epoch": 0.1042317266349853, "grad_norm": 0.6649764632725609, "learning_rate": 9.85639079788627e-06, "loss": 0.403, "step": 2303 }, { "epoch": 0.10427698574338086, "grad_norm": 0.7385706952114729, "learning_rate": 9.856216349413499e-06, "loss": 0.4479, "step": 2304 }, { "epoch": 0.10432224485177642, "grad_norm": 0.6905045514972701, "learning_rate": 9.856041796595553e-06, "loss": 0.3824, "step": 2305 }, { "epoch": 0.10436750396017198, "grad_norm": 0.6327526228856807, "learning_rate": 9.855867139436182e-06, "loss": 0.395, "step": 2306 }, { "epoch": 0.10441276306856755, "grad_norm": 0.7546079408741386, "learning_rate": 9.85569237793914e-06, "loss": 0.48, "step": 2307 }, { "epoch": 0.10445802217696311, "grad_norm": 0.737242030395461, "learning_rate": 9.855517512108182e-06, "loss": 0.4505, "step": 2308 }, { "epoch": 0.10450328128535868, "grad_norm": 0.6670828689286896, "learning_rate": 9.855342541947065e-06, "loss": 0.4319, "step": 2309 }, { "epoch": 0.10454854039375425, "grad_norm": 0.7111244403772158, "learning_rate": 9.855167467459548e-06, "loss": 0.4621, "step": 2310 }, { "epoch": 0.10459379950214981, "grad_norm": 0.7428775133067482, "learning_rate": 9.854992288649397e-06, "loss": 0.4183, "step": 2311 }, { "epoch": 0.10463905861054537, "grad_norm": 0.7683796554367252, "learning_rate": 9.85481700552037e-06, "loss": 0.4512, "step": 2312 }, { "epoch": 0.10468431771894093, "grad_norm": 0.6426490380469407, "learning_rate": 9.854641618076236e-06, "loss": 0.4567, "step": 2313 }, { "epoch": 0.1047295768273365, "grad_norm": 0.6711259744328443, "learning_rate": 9.854466126320763e-06, "loss": 0.401, "step": 2314 }, { "epoch": 0.10477483593573207, "grad_norm": 0.8455675550799173, "learning_rate": 9.854290530257723e-06, "loss": 0.4774, "step": 2315 }, { "epoch": 0.10482009504412763, "grad_norm": 0.7467569882797702, "learning_rate": 9.85411482989089e-06, "loss": 0.4238, "step": 2316 }, { "epoch": 0.1048653541525232, "grad_norm": 0.734320352470716, "learning_rate": 9.853939025224037e-06, "loss": 0.4381, "step": 2317 }, { "epoch": 0.10491061326091876, "grad_norm": 0.6341079183035991, "learning_rate": 9.853763116260941e-06, "loss": 0.4068, "step": 2318 }, { "epoch": 0.10495587236931432, "grad_norm": 0.7800650722788848, "learning_rate": 9.853587103005382e-06, "loss": 0.4489, "step": 2319 }, { "epoch": 0.10500113147770988, "grad_norm": 0.451047452424555, "learning_rate": 9.853410985461145e-06, "loss": 0.5282, "step": 2320 }, { "epoch": 0.10504639058610546, "grad_norm": 0.38852085304725875, "learning_rate": 9.85323476363201e-06, "loss": 0.5363, "step": 2321 }, { "epoch": 0.10509164969450102, "grad_norm": 0.31390542967558155, "learning_rate": 9.853058437521768e-06, "loss": 0.5195, "step": 2322 }, { "epoch": 0.10513690880289658, "grad_norm": 0.7989730108740146, "learning_rate": 9.852882007134202e-06, "loss": 0.434, "step": 2323 }, { "epoch": 0.10518216791129215, "grad_norm": 0.749871253521505, "learning_rate": 9.852705472473108e-06, "loss": 0.4473, "step": 2324 }, { "epoch": 0.10522742701968771, "grad_norm": 0.7727191908261867, "learning_rate": 9.852528833542278e-06, "loss": 0.4168, "step": 2325 }, { "epoch": 0.10527268612808327, "grad_norm": 0.7798115976467546, "learning_rate": 9.852352090345504e-06, "loss": 0.4311, "step": 2326 }, { "epoch": 0.10531794523647885, "grad_norm": 0.7268627450505267, "learning_rate": 9.85217524288659e-06, "loss": 0.4638, "step": 2327 }, { "epoch": 0.10536320434487441, "grad_norm": 0.8698990762933937, "learning_rate": 9.851998291169332e-06, "loss": 0.4049, "step": 2328 }, { "epoch": 0.10540846345326997, "grad_norm": 0.6206157334058049, "learning_rate": 9.85182123519753e-06, "loss": 0.5142, "step": 2329 }, { "epoch": 0.10545372256166553, "grad_norm": 0.7495598486543176, "learning_rate": 9.851644074974992e-06, "loss": 0.4425, "step": 2330 }, { "epoch": 0.1054989816700611, "grad_norm": 0.6881993019531567, "learning_rate": 9.851466810505523e-06, "loss": 0.4056, "step": 2331 }, { "epoch": 0.10554424077845666, "grad_norm": 0.7274463239961425, "learning_rate": 9.851289441792934e-06, "loss": 0.4254, "step": 2332 }, { "epoch": 0.10558949988685223, "grad_norm": 0.7768959308161464, "learning_rate": 9.851111968841033e-06, "loss": 0.4515, "step": 2333 }, { "epoch": 0.1056347589952478, "grad_norm": 0.7460630867250454, "learning_rate": 9.850934391653636e-06, "loss": 0.3977, "step": 2334 }, { "epoch": 0.10568001810364336, "grad_norm": 0.7049755514403179, "learning_rate": 9.850756710234557e-06, "loss": 0.3988, "step": 2335 }, { "epoch": 0.10572527721203892, "grad_norm": 0.8415832391023809, "learning_rate": 9.850578924587614e-06, "loss": 0.4394, "step": 2336 }, { "epoch": 0.10577053632043448, "grad_norm": 0.9337415198625878, "learning_rate": 9.850401034716629e-06, "loss": 0.3985, "step": 2337 }, { "epoch": 0.10581579542883006, "grad_norm": 0.7403823554645752, "learning_rate": 9.85022304062542e-06, "loss": 0.4113, "step": 2338 }, { "epoch": 0.10586105453722562, "grad_norm": 0.7597975447386219, "learning_rate": 9.850044942317814e-06, "loss": 0.4565, "step": 2339 }, { "epoch": 0.10590631364562118, "grad_norm": 0.7379521994471612, "learning_rate": 9.84986673979764e-06, "loss": 0.4617, "step": 2340 }, { "epoch": 0.10595157275401675, "grad_norm": 0.5879028793940111, "learning_rate": 9.849688433068724e-06, "loss": 0.4947, "step": 2341 }, { "epoch": 0.10599683186241231, "grad_norm": 0.8458052413227429, "learning_rate": 9.849510022134899e-06, "loss": 0.4749, "step": 2342 }, { "epoch": 0.10604209097080787, "grad_norm": 0.7548523560498813, "learning_rate": 9.849331506999996e-06, "loss": 0.4197, "step": 2343 }, { "epoch": 0.10608735007920345, "grad_norm": 0.7488304007811081, "learning_rate": 9.849152887667855e-06, "loss": 0.438, "step": 2344 }, { "epoch": 0.10613260918759901, "grad_norm": 0.7414720569112878, "learning_rate": 9.848974164142309e-06, "loss": 0.4229, "step": 2345 }, { "epoch": 0.10617786829599457, "grad_norm": 0.7423339816887292, "learning_rate": 9.848795336427202e-06, "loss": 0.4464, "step": 2346 }, { "epoch": 0.10622312740439013, "grad_norm": 0.78079522511492, "learning_rate": 9.848616404526374e-06, "loss": 0.4756, "step": 2347 }, { "epoch": 0.1062683865127857, "grad_norm": 0.8166039341128437, "learning_rate": 9.848437368443672e-06, "loss": 0.4547, "step": 2348 }, { "epoch": 0.10631364562118126, "grad_norm": 0.6081730989414673, "learning_rate": 9.848258228182943e-06, "loss": 0.5249, "step": 2349 }, { "epoch": 0.10635890472957683, "grad_norm": 0.7141425259010529, "learning_rate": 9.848078983748032e-06, "loss": 0.3982, "step": 2350 }, { "epoch": 0.1064041638379724, "grad_norm": 0.7046271198271215, "learning_rate": 9.847899635142797e-06, "loss": 0.4029, "step": 2351 }, { "epoch": 0.10644942294636796, "grad_norm": 0.72872473152322, "learning_rate": 9.847720182371086e-06, "loss": 0.4417, "step": 2352 }, { "epoch": 0.10649468205476352, "grad_norm": 0.7796132442433459, "learning_rate": 9.847540625436756e-06, "loss": 0.4342, "step": 2353 }, { "epoch": 0.10653994116315908, "grad_norm": 0.7273745717727493, "learning_rate": 9.847360964343667e-06, "loss": 0.4482, "step": 2354 }, { "epoch": 0.10658520027155464, "grad_norm": 0.4829803438786883, "learning_rate": 9.84718119909568e-06, "loss": 0.4964, "step": 2355 }, { "epoch": 0.10663045937995022, "grad_norm": 0.436561756230908, "learning_rate": 9.847001329696653e-06, "loss": 0.5238, "step": 2356 }, { "epoch": 0.10667571848834578, "grad_norm": 0.8452537247177253, "learning_rate": 9.846821356150455e-06, "loss": 0.3898, "step": 2357 }, { "epoch": 0.10672097759674135, "grad_norm": 0.3834464300732562, "learning_rate": 9.846641278460952e-06, "loss": 0.5405, "step": 2358 }, { "epoch": 0.10676623670513691, "grad_norm": 0.7367450263313308, "learning_rate": 9.846461096632014e-06, "loss": 0.4122, "step": 2359 }, { "epoch": 0.10681149581353247, "grad_norm": 0.40049087997983424, "learning_rate": 9.846280810667512e-06, "loss": 0.5298, "step": 2360 }, { "epoch": 0.10685675492192803, "grad_norm": 0.35495512975334464, "learning_rate": 9.846100420571319e-06, "loss": 0.4991, "step": 2361 }, { "epoch": 0.10690201403032361, "grad_norm": 0.8965978069317818, "learning_rate": 9.84591992634731e-06, "loss": 0.4215, "step": 2362 }, { "epoch": 0.10694727313871917, "grad_norm": 0.36426252834219985, "learning_rate": 9.845739327999366e-06, "loss": 0.5186, "step": 2363 }, { "epoch": 0.10699253224711473, "grad_norm": 0.7992452990273541, "learning_rate": 9.845558625531368e-06, "loss": 0.4627, "step": 2364 }, { "epoch": 0.1070377913555103, "grad_norm": 0.8018786875684687, "learning_rate": 9.845377818947194e-06, "loss": 0.4202, "step": 2365 }, { "epoch": 0.10708305046390586, "grad_norm": 0.747643050655193, "learning_rate": 9.845196908250737e-06, "loss": 0.4563, "step": 2366 }, { "epoch": 0.10712830957230142, "grad_norm": 0.43328647639779194, "learning_rate": 9.845015893445874e-06, "loss": 0.5062, "step": 2367 }, { "epoch": 0.107173568680697, "grad_norm": 0.40488746552796007, "learning_rate": 9.844834774536503e-06, "loss": 0.4825, "step": 2368 }, { "epoch": 0.10721882778909256, "grad_norm": 0.9608323793728127, "learning_rate": 9.84465355152651e-06, "loss": 0.4464, "step": 2369 }, { "epoch": 0.10726408689748812, "grad_norm": 0.4088980125981813, "learning_rate": 9.844472224419794e-06, "loss": 0.522, "step": 2370 }, { "epoch": 0.10730934600588368, "grad_norm": 0.7098724808096835, "learning_rate": 9.844290793220249e-06, "loss": 0.4272, "step": 2371 }, { "epoch": 0.10735460511427924, "grad_norm": 0.810840485749749, "learning_rate": 9.84410925793177e-06, "loss": 0.4372, "step": 2372 }, { "epoch": 0.1073998642226748, "grad_norm": 0.4924884016840848, "learning_rate": 9.843927618558262e-06, "loss": 0.5439, "step": 2373 }, { "epoch": 0.10744512333107038, "grad_norm": 0.8003323683533289, "learning_rate": 9.843745875103628e-06, "loss": 0.471, "step": 2374 }, { "epoch": 0.10749038243946595, "grad_norm": 0.7457532055280077, "learning_rate": 9.84356402757177e-06, "loss": 0.4476, "step": 2375 }, { "epoch": 0.10753564154786151, "grad_norm": 0.7647485411939247, "learning_rate": 9.843382075966596e-06, "loss": 0.4134, "step": 2376 }, { "epoch": 0.10758090065625707, "grad_norm": 0.8488959006361999, "learning_rate": 9.843200020292017e-06, "loss": 0.3952, "step": 2377 }, { "epoch": 0.10762615976465263, "grad_norm": 0.7277350914827775, "learning_rate": 9.843017860551946e-06, "loss": 0.4477, "step": 2378 }, { "epoch": 0.1076714188730482, "grad_norm": 0.7694876133740054, "learning_rate": 9.842835596750292e-06, "loss": 0.3839, "step": 2379 }, { "epoch": 0.10771667798144377, "grad_norm": 0.9111084221791587, "learning_rate": 9.842653228890979e-06, "loss": 0.4337, "step": 2380 }, { "epoch": 0.10776193708983933, "grad_norm": 0.6855619180024155, "learning_rate": 9.84247075697792e-06, "loss": 0.3778, "step": 2381 }, { "epoch": 0.1078071961982349, "grad_norm": 0.6863604389228337, "learning_rate": 9.842288181015035e-06, "loss": 0.3973, "step": 2382 }, { "epoch": 0.10785245530663046, "grad_norm": 0.6123620312305323, "learning_rate": 9.84210550100625e-06, "loss": 0.5356, "step": 2383 }, { "epoch": 0.10789771441502602, "grad_norm": 0.8038667464891291, "learning_rate": 9.841922716955488e-06, "loss": 0.4588, "step": 2384 }, { "epoch": 0.1079429735234216, "grad_norm": 0.8232987417202955, "learning_rate": 9.84173982886668e-06, "loss": 0.4849, "step": 2385 }, { "epoch": 0.10798823263181716, "grad_norm": 0.7063766129624343, "learning_rate": 9.841556836743752e-06, "loss": 0.4006, "step": 2386 }, { "epoch": 0.10803349174021272, "grad_norm": 0.8885217926078794, "learning_rate": 9.841373740590638e-06, "loss": 0.4433, "step": 2387 }, { "epoch": 0.10807875084860828, "grad_norm": 0.7139772742218194, "learning_rate": 9.84119054041127e-06, "loss": 0.4048, "step": 2388 }, { "epoch": 0.10812400995700384, "grad_norm": 0.7428126319567718, "learning_rate": 9.841007236209588e-06, "loss": 0.4283, "step": 2389 }, { "epoch": 0.1081692690653994, "grad_norm": 0.7118335211303995, "learning_rate": 9.840823827989526e-06, "loss": 0.438, "step": 2390 }, { "epoch": 0.10821452817379498, "grad_norm": 0.71790940397756, "learning_rate": 9.84064031575503e-06, "loss": 0.4164, "step": 2391 }, { "epoch": 0.10825978728219054, "grad_norm": 0.7169223945081842, "learning_rate": 9.840456699510038e-06, "loss": 0.4193, "step": 2392 }, { "epoch": 0.10830504639058611, "grad_norm": 0.5818471073518655, "learning_rate": 9.840272979258498e-06, "loss": 0.5264, "step": 2393 }, { "epoch": 0.10835030549898167, "grad_norm": 0.44257271729553715, "learning_rate": 9.84008915500436e-06, "loss": 0.5038, "step": 2394 }, { "epoch": 0.10839556460737723, "grad_norm": 0.9142415732457422, "learning_rate": 9.83990522675157e-06, "loss": 0.409, "step": 2395 }, { "epoch": 0.1084408237157728, "grad_norm": 0.857612046602221, "learning_rate": 9.83972119450408e-06, "loss": 0.4275, "step": 2396 }, { "epoch": 0.10848608282416837, "grad_norm": 0.8211196319920059, "learning_rate": 9.839537058265847e-06, "loss": 0.4086, "step": 2397 }, { "epoch": 0.10853134193256393, "grad_norm": 0.6625393546539192, "learning_rate": 9.839352818040825e-06, "loss": 0.5295, "step": 2398 }, { "epoch": 0.1085766010409595, "grad_norm": 0.8175415684719074, "learning_rate": 9.839168473832975e-06, "loss": 0.4349, "step": 2399 }, { "epoch": 0.10862186014935506, "grad_norm": 0.6971690315490437, "learning_rate": 9.838984025646257e-06, "loss": 0.398, "step": 2400 }, { "epoch": 0.10866711925775062, "grad_norm": 0.7333102031011268, "learning_rate": 9.838799473484633e-06, "loss": 0.4326, "step": 2401 }, { "epoch": 0.10871237836614618, "grad_norm": 0.7465017771426746, "learning_rate": 9.83861481735207e-06, "loss": 0.4563, "step": 2402 }, { "epoch": 0.10875763747454176, "grad_norm": 0.7089569072849802, "learning_rate": 9.838430057252537e-06, "loss": 0.4603, "step": 2403 }, { "epoch": 0.10880289658293732, "grad_norm": 0.9367161129821115, "learning_rate": 9.838245193189999e-06, "loss": 0.4472, "step": 2404 }, { "epoch": 0.10884815569133288, "grad_norm": 0.44700555725761754, "learning_rate": 9.838060225168432e-06, "loss": 0.5022, "step": 2405 }, { "epoch": 0.10889341479972844, "grad_norm": 0.7906653966767495, "learning_rate": 9.837875153191812e-06, "loss": 0.4389, "step": 2406 }, { "epoch": 0.108938673908124, "grad_norm": 0.7286294782716575, "learning_rate": 9.837689977264111e-06, "loss": 0.4223, "step": 2407 }, { "epoch": 0.10898393301651957, "grad_norm": 2.253138918510198, "learning_rate": 9.837504697389311e-06, "loss": 0.4281, "step": 2408 }, { "epoch": 0.10902919212491514, "grad_norm": 0.7661119819405651, "learning_rate": 9.837319313571394e-06, "loss": 0.4392, "step": 2409 }, { "epoch": 0.1090744512333107, "grad_norm": 0.7407852422162088, "learning_rate": 9.83713382581434e-06, "loss": 0.4698, "step": 2410 }, { "epoch": 0.10911971034170627, "grad_norm": 0.8474642676205407, "learning_rate": 9.836948234122136e-06, "loss": 0.4589, "step": 2411 }, { "epoch": 0.10916496945010183, "grad_norm": 0.7070961319248004, "learning_rate": 9.83676253849877e-06, "loss": 0.4159, "step": 2412 }, { "epoch": 0.1092102285584974, "grad_norm": 0.8516499038377042, "learning_rate": 9.836576738948234e-06, "loss": 0.5101, "step": 2413 }, { "epoch": 0.10925548766689296, "grad_norm": 0.7205091158697706, "learning_rate": 9.836390835474516e-06, "loss": 0.4545, "step": 2414 }, { "epoch": 0.10930074677528853, "grad_norm": 0.41820220247791706, "learning_rate": 9.836204828081612e-06, "loss": 0.489, "step": 2415 }, { "epoch": 0.1093460058836841, "grad_norm": 0.7746337605139181, "learning_rate": 9.836018716773522e-06, "loss": 0.4397, "step": 2416 }, { "epoch": 0.10939126499207966, "grad_norm": 0.7362030193141964, "learning_rate": 9.835832501554242e-06, "loss": 0.4459, "step": 2417 }, { "epoch": 0.10943652410047522, "grad_norm": 0.7433426511252804, "learning_rate": 9.835646182427773e-06, "loss": 0.4436, "step": 2418 }, { "epoch": 0.10948178320887078, "grad_norm": 0.8830194859078878, "learning_rate": 9.835459759398118e-06, "loss": 0.4438, "step": 2419 }, { "epoch": 0.10952704231726634, "grad_norm": 0.7277600436583608, "learning_rate": 9.835273232469285e-06, "loss": 0.4386, "step": 2420 }, { "epoch": 0.10957230142566192, "grad_norm": 0.3795240972724703, "learning_rate": 9.83508660164528e-06, "loss": 0.5036, "step": 2421 }, { "epoch": 0.10961756053405748, "grad_norm": 0.7872464533012712, "learning_rate": 9.834899866930116e-06, "loss": 0.4306, "step": 2422 }, { "epoch": 0.10966281964245304, "grad_norm": 0.7764837247171241, "learning_rate": 9.834713028327802e-06, "loss": 0.4909, "step": 2423 }, { "epoch": 0.1097080787508486, "grad_norm": 0.6853382068771207, "learning_rate": 9.834526085842352e-06, "loss": 0.3955, "step": 2424 }, { "epoch": 0.10975333785924417, "grad_norm": 0.7321908758615492, "learning_rate": 9.834339039477787e-06, "loss": 0.4379, "step": 2425 }, { "epoch": 0.10979859696763974, "grad_norm": 0.7753512763451639, "learning_rate": 9.834151889238121e-06, "loss": 0.4054, "step": 2426 }, { "epoch": 0.1098438560760353, "grad_norm": 0.3675674524715627, "learning_rate": 9.83396463512738e-06, "loss": 0.5238, "step": 2427 }, { "epoch": 0.10988911518443087, "grad_norm": 0.3506600433838427, "learning_rate": 9.833777277149585e-06, "loss": 0.504, "step": 2428 }, { "epoch": 0.10993437429282643, "grad_norm": 0.91527237285889, "learning_rate": 9.833589815308761e-06, "loss": 0.5326, "step": 2429 }, { "epoch": 0.109979633401222, "grad_norm": 0.8309215990408595, "learning_rate": 9.833402249608938e-06, "loss": 0.4735, "step": 2430 }, { "epoch": 0.11002489250961756, "grad_norm": 0.6832150109748468, "learning_rate": 9.833214580054145e-06, "loss": 0.4463, "step": 2431 }, { "epoch": 0.11007015161801313, "grad_norm": 0.8721708352761983, "learning_rate": 9.833026806648415e-06, "loss": 0.3863, "step": 2432 }, { "epoch": 0.1101154107264087, "grad_norm": 0.7530372258702653, "learning_rate": 9.832838929395782e-06, "loss": 0.4044, "step": 2433 }, { "epoch": 0.11016066983480426, "grad_norm": 0.4474166144373787, "learning_rate": 9.832650948300284e-06, "loss": 0.5151, "step": 2434 }, { "epoch": 0.11020592894319982, "grad_norm": 0.9044477916013893, "learning_rate": 9.832462863365959e-06, "loss": 0.4523, "step": 2435 }, { "epoch": 0.11025118805159538, "grad_norm": 0.8437894209564453, "learning_rate": 9.83227467459685e-06, "loss": 0.4247, "step": 2436 }, { "epoch": 0.11029644715999094, "grad_norm": 0.7387425628334219, "learning_rate": 9.832086381996997e-06, "loss": 0.3963, "step": 2437 }, { "epoch": 0.11034170626838652, "grad_norm": 0.7854114015827275, "learning_rate": 9.83189798557045e-06, "loss": 0.4649, "step": 2438 }, { "epoch": 0.11038696537678208, "grad_norm": 0.827093510582746, "learning_rate": 9.831709485321255e-06, "loss": 0.3967, "step": 2439 }, { "epoch": 0.11043222448517764, "grad_norm": 0.7078503797630626, "learning_rate": 9.831520881253462e-06, "loss": 0.4789, "step": 2440 }, { "epoch": 0.1104774835935732, "grad_norm": 0.6430101363692668, "learning_rate": 9.831332173371125e-06, "loss": 0.4161, "step": 2441 }, { "epoch": 0.11052274270196877, "grad_norm": 0.685885874237446, "learning_rate": 9.831143361678299e-06, "loss": 0.4383, "step": 2442 }, { "epoch": 0.11056800181036433, "grad_norm": 0.7105514565656513, "learning_rate": 9.830954446179037e-06, "loss": 0.432, "step": 2443 }, { "epoch": 0.1106132609187599, "grad_norm": 0.48220270099095647, "learning_rate": 9.830765426877404e-06, "loss": 0.5381, "step": 2444 }, { "epoch": 0.11065852002715547, "grad_norm": 0.6741979700234679, "learning_rate": 9.830576303777456e-06, "loss": 0.4586, "step": 2445 }, { "epoch": 0.11070377913555103, "grad_norm": 0.7119060669901887, "learning_rate": 9.83038707688326e-06, "loss": 0.4608, "step": 2446 }, { "epoch": 0.1107490382439466, "grad_norm": 0.8096309822720701, "learning_rate": 9.830197746198882e-06, "loss": 0.4664, "step": 2447 }, { "epoch": 0.11079429735234216, "grad_norm": 0.8196824978383348, "learning_rate": 9.83000831172839e-06, "loss": 0.4452, "step": 2448 }, { "epoch": 0.11083955646073772, "grad_norm": 0.7556365021830586, "learning_rate": 9.829818773475852e-06, "loss": 0.4527, "step": 2449 }, { "epoch": 0.1108848155691333, "grad_norm": 0.8006816242158974, "learning_rate": 9.829629131445342e-06, "loss": 0.4201, "step": 2450 }, { "epoch": 0.11093007467752886, "grad_norm": 0.7554350680724521, "learning_rate": 9.829439385640936e-06, "loss": 0.4824, "step": 2451 }, { "epoch": 0.11097533378592442, "grad_norm": 0.664479601846548, "learning_rate": 9.82924953606671e-06, "loss": 0.3956, "step": 2452 }, { "epoch": 0.11102059289431998, "grad_norm": 0.6857058477647321, "learning_rate": 9.829059582726743e-06, "loss": 0.4114, "step": 2453 }, { "epoch": 0.11106585200271554, "grad_norm": 0.7722700935916896, "learning_rate": 9.828869525625118e-06, "loss": 0.4213, "step": 2454 }, { "epoch": 0.1111111111111111, "grad_norm": 0.7138037254822772, "learning_rate": 9.828679364765917e-06, "loss": 0.4329, "step": 2455 }, { "epoch": 0.11115637021950668, "grad_norm": 0.779255191989751, "learning_rate": 9.828489100153224e-06, "loss": 0.4003, "step": 2456 }, { "epoch": 0.11120162932790224, "grad_norm": 0.9137826989883181, "learning_rate": 9.828298731791133e-06, "loss": 0.4325, "step": 2457 }, { "epoch": 0.1112468884362978, "grad_norm": 0.7227084991013153, "learning_rate": 9.82810825968373e-06, "loss": 0.4099, "step": 2458 }, { "epoch": 0.11129214754469337, "grad_norm": 0.7269166807445072, "learning_rate": 9.827917683835109e-06, "loss": 0.3982, "step": 2459 }, { "epoch": 0.11133740665308893, "grad_norm": 0.6259376766075867, "learning_rate": 9.827727004249366e-06, "loss": 0.5181, "step": 2460 }, { "epoch": 0.11138266576148449, "grad_norm": 0.7294144278342447, "learning_rate": 9.827536220930596e-06, "loss": 0.4308, "step": 2461 }, { "epoch": 0.11142792486988007, "grad_norm": 0.7849374307877185, "learning_rate": 9.827345333882898e-06, "loss": 0.4454, "step": 2462 }, { "epoch": 0.11147318397827563, "grad_norm": 0.8153581247572056, "learning_rate": 9.827154343110376e-06, "loss": 0.4257, "step": 2463 }, { "epoch": 0.11151844308667119, "grad_norm": 0.6611837576004533, "learning_rate": 9.826963248617133e-06, "loss": 0.4247, "step": 2464 }, { "epoch": 0.11156370219506676, "grad_norm": 0.7848171203104205, "learning_rate": 9.826772050407273e-06, "loss": 0.4558, "step": 2465 }, { "epoch": 0.11160896130346232, "grad_norm": 2.1110926208357843, "learning_rate": 9.826580748484908e-06, "loss": 0.3769, "step": 2466 }, { "epoch": 0.11165422041185788, "grad_norm": 0.8213480521010255, "learning_rate": 9.826389342854146e-06, "loss": 0.5348, "step": 2467 }, { "epoch": 0.11169947952025346, "grad_norm": 0.7099766095535562, "learning_rate": 9.8261978335191e-06, "loss": 0.4407, "step": 2468 }, { "epoch": 0.11174473862864902, "grad_norm": 0.7630263984746236, "learning_rate": 9.826006220483886e-06, "loss": 0.425, "step": 2469 }, { "epoch": 0.11178999773704458, "grad_norm": 0.6950195430682758, "learning_rate": 9.825814503752618e-06, "loss": 0.4686, "step": 2470 }, { "epoch": 0.11183525684544014, "grad_norm": 0.7677560822279192, "learning_rate": 9.825622683329419e-06, "loss": 0.4228, "step": 2471 }, { "epoch": 0.1118805159538357, "grad_norm": 0.7758707129168558, "learning_rate": 9.82543075921841e-06, "loss": 0.4136, "step": 2472 }, { "epoch": 0.11192577506223128, "grad_norm": 0.46941180467558136, "learning_rate": 9.825238731423713e-06, "loss": 0.501, "step": 2473 }, { "epoch": 0.11197103417062684, "grad_norm": 0.7360325669571167, "learning_rate": 9.825046599949455e-06, "loss": 0.4119, "step": 2474 }, { "epoch": 0.1120162932790224, "grad_norm": 0.7760259047685877, "learning_rate": 9.824854364799766e-06, "loss": 0.4081, "step": 2475 }, { "epoch": 0.11206155238741797, "grad_norm": 0.764283254991474, "learning_rate": 9.824662025978774e-06, "loss": 0.4486, "step": 2476 }, { "epoch": 0.11210681149581353, "grad_norm": 0.702833413633441, "learning_rate": 9.824469583490612e-06, "loss": 0.4288, "step": 2477 }, { "epoch": 0.11215207060420909, "grad_norm": 0.40113438821656633, "learning_rate": 9.824277037339419e-06, "loss": 0.5195, "step": 2478 }, { "epoch": 0.11219732971260467, "grad_norm": 0.38158292572294616, "learning_rate": 9.824084387529326e-06, "loss": 0.5168, "step": 2479 }, { "epoch": 0.11224258882100023, "grad_norm": 0.956080719114455, "learning_rate": 9.823891634064478e-06, "loss": 0.4531, "step": 2480 }, { "epoch": 0.11228784792939579, "grad_norm": 0.7235253068311136, "learning_rate": 9.823698776949011e-06, "loss": 0.4813, "step": 2481 }, { "epoch": 0.11233310703779135, "grad_norm": 0.7522682244822607, "learning_rate": 9.823505816187076e-06, "loss": 0.4315, "step": 2482 }, { "epoch": 0.11237836614618692, "grad_norm": 0.8176435014549138, "learning_rate": 9.823312751782812e-06, "loss": 0.43, "step": 2483 }, { "epoch": 0.11242362525458248, "grad_norm": 0.718478291493913, "learning_rate": 9.823119583740373e-06, "loss": 0.4744, "step": 2484 }, { "epoch": 0.11246888436297806, "grad_norm": 0.6502726205028461, "learning_rate": 9.822926312063905e-06, "loss": 0.4179, "step": 2485 }, { "epoch": 0.11251414347137362, "grad_norm": 0.7305782785378717, "learning_rate": 9.822732936757564e-06, "loss": 0.4687, "step": 2486 }, { "epoch": 0.11255940257976918, "grad_norm": 0.6004507126829316, "learning_rate": 9.822539457825505e-06, "loss": 0.531, "step": 2487 }, { "epoch": 0.11260466168816474, "grad_norm": 0.7823041938016477, "learning_rate": 9.822345875271884e-06, "loss": 0.4376, "step": 2488 }, { "epoch": 0.1126499207965603, "grad_norm": 0.7453882450020083, "learning_rate": 9.82215218910086e-06, "loss": 0.4314, "step": 2489 }, { "epoch": 0.11269517990495587, "grad_norm": 0.6997393893692536, "learning_rate": 9.821958399316595e-06, "loss": 0.3992, "step": 2490 }, { "epoch": 0.11274043901335144, "grad_norm": 0.8002430016187357, "learning_rate": 9.821764505923257e-06, "loss": 0.4732, "step": 2491 }, { "epoch": 0.112785698121747, "grad_norm": 0.7487575906480041, "learning_rate": 9.821570508925005e-06, "loss": 0.4273, "step": 2492 }, { "epoch": 0.11283095723014257, "grad_norm": 0.741383909506027, "learning_rate": 9.821376408326013e-06, "loss": 0.4093, "step": 2493 }, { "epoch": 0.11287621633853813, "grad_norm": 0.8072793420604332, "learning_rate": 9.821182204130448e-06, "loss": 0.4081, "step": 2494 }, { "epoch": 0.11292147544693369, "grad_norm": 0.4724509747117798, "learning_rate": 9.820987896342487e-06, "loss": 0.515, "step": 2495 }, { "epoch": 0.11296673455532925, "grad_norm": 0.758529766742308, "learning_rate": 9.8207934849663e-06, "loss": 0.4594, "step": 2496 }, { "epoch": 0.11301199366372483, "grad_norm": 0.7146459027236057, "learning_rate": 9.820598970006068e-06, "loss": 0.4138, "step": 2497 }, { "epoch": 0.11305725277212039, "grad_norm": 0.34575479257833636, "learning_rate": 9.82040435146597e-06, "loss": 0.5035, "step": 2498 }, { "epoch": 0.11310251188051595, "grad_norm": 0.3175300244710189, "learning_rate": 9.820209629350189e-06, "loss": 0.4976, "step": 2499 }, { "epoch": 0.11314777098891152, "grad_norm": 0.9579112274642932, "learning_rate": 9.820014803662905e-06, "loss": 0.4725, "step": 2500 }, { "epoch": 0.11319303009730708, "grad_norm": 0.42437979117341523, "learning_rate": 9.819819874408306e-06, "loss": 0.5118, "step": 2501 }, { "epoch": 0.11323828920570264, "grad_norm": 1.0258124340930102, "learning_rate": 9.81962484159058e-06, "loss": 0.4211, "step": 2502 }, { "epoch": 0.11328354831409822, "grad_norm": 0.7774769252865491, "learning_rate": 9.819429705213922e-06, "loss": 0.4202, "step": 2503 }, { "epoch": 0.11332880742249378, "grad_norm": 0.7262055087963113, "learning_rate": 9.819234465282518e-06, "loss": 0.4285, "step": 2504 }, { "epoch": 0.11337406653088934, "grad_norm": 0.8528024190792679, "learning_rate": 9.819039121800568e-06, "loss": 0.4395, "step": 2505 }, { "epoch": 0.1134193256392849, "grad_norm": 0.723040071029397, "learning_rate": 9.818843674772268e-06, "loss": 0.4621, "step": 2506 }, { "epoch": 0.11346458474768047, "grad_norm": 0.6986040317293818, "learning_rate": 9.818648124201817e-06, "loss": 0.4238, "step": 2507 }, { "epoch": 0.11350984385607603, "grad_norm": 0.8031078796455979, "learning_rate": 9.818452470093416e-06, "loss": 0.463, "step": 2508 }, { "epoch": 0.1135551029644716, "grad_norm": 0.7185941171883956, "learning_rate": 9.818256712451272e-06, "loss": 0.4244, "step": 2509 }, { "epoch": 0.11360036207286717, "grad_norm": 0.7064335790434774, "learning_rate": 9.81806085127959e-06, "loss": 0.422, "step": 2510 }, { "epoch": 0.11364562118126273, "grad_norm": 0.658313320908971, "learning_rate": 9.817864886582575e-06, "loss": 0.4074, "step": 2511 }, { "epoch": 0.11369088028965829, "grad_norm": 0.5048450850733868, "learning_rate": 9.817668818364441e-06, "loss": 0.5352, "step": 2512 }, { "epoch": 0.11373613939805385, "grad_norm": 0.8615294240399992, "learning_rate": 9.817472646629403e-06, "loss": 0.4123, "step": 2513 }, { "epoch": 0.11378139850644943, "grad_norm": 0.3868215144673393, "learning_rate": 9.817276371381671e-06, "loss": 0.4918, "step": 2514 }, { "epoch": 0.11382665761484499, "grad_norm": 0.72087500047661, "learning_rate": 9.817079992625467e-06, "loss": 0.4139, "step": 2515 }, { "epoch": 0.11387191672324055, "grad_norm": 0.3519348388728298, "learning_rate": 9.816883510365007e-06, "loss": 0.5143, "step": 2516 }, { "epoch": 0.11391717583163612, "grad_norm": 0.37694064143955736, "learning_rate": 9.816686924604515e-06, "loss": 0.5142, "step": 2517 }, { "epoch": 0.11396243494003168, "grad_norm": 0.9441190145175116, "learning_rate": 9.816490235348215e-06, "loss": 0.4321, "step": 2518 }, { "epoch": 0.11400769404842724, "grad_norm": 0.77127837366527, "learning_rate": 9.816293442600331e-06, "loss": 0.4461, "step": 2519 }, { "epoch": 0.11405295315682282, "grad_norm": 0.8364582358922202, "learning_rate": 9.816096546365094e-06, "loss": 0.3901, "step": 2520 }, { "epoch": 0.11409821226521838, "grad_norm": 0.500008347179003, "learning_rate": 9.815899546646734e-06, "loss": 0.5407, "step": 2521 }, { "epoch": 0.11414347137361394, "grad_norm": 0.8221660850937721, "learning_rate": 9.815702443449482e-06, "loss": 0.428, "step": 2522 }, { "epoch": 0.1141887304820095, "grad_norm": 0.869718534293267, "learning_rate": 9.815505236777576e-06, "loss": 0.4608, "step": 2523 }, { "epoch": 0.11423398959040507, "grad_norm": 0.38870622655972636, "learning_rate": 9.815307926635252e-06, "loss": 0.5186, "step": 2524 }, { "epoch": 0.11427924869880063, "grad_norm": 0.8050537795361368, "learning_rate": 9.815110513026749e-06, "loss": 0.3684, "step": 2525 }, { "epoch": 0.1143245078071962, "grad_norm": 0.9066359801731271, "learning_rate": 9.814912995956311e-06, "loss": 0.4784, "step": 2526 }, { "epoch": 0.11436976691559177, "grad_norm": 0.780943610306695, "learning_rate": 9.814715375428181e-06, "loss": 0.4326, "step": 2527 }, { "epoch": 0.11441502602398733, "grad_norm": 0.7916808022497258, "learning_rate": 9.814517651446603e-06, "loss": 0.4624, "step": 2528 }, { "epoch": 0.11446028513238289, "grad_norm": 0.803747106010139, "learning_rate": 9.814319824015827e-06, "loss": 0.4368, "step": 2529 }, { "epoch": 0.11450554424077845, "grad_norm": 0.764847693389444, "learning_rate": 9.814121893140105e-06, "loss": 0.4765, "step": 2530 }, { "epoch": 0.11455080334917402, "grad_norm": 0.6383253482048825, "learning_rate": 9.81392385882369e-06, "loss": 0.4087, "step": 2531 }, { "epoch": 0.11459606245756959, "grad_norm": 0.821489295199945, "learning_rate": 9.813725721070834e-06, "loss": 0.4072, "step": 2532 }, { "epoch": 0.11464132156596515, "grad_norm": 0.8707012364160911, "learning_rate": 9.813527479885799e-06, "loss": 0.4296, "step": 2533 }, { "epoch": 0.11468658067436072, "grad_norm": 1.3106924634509085, "learning_rate": 9.813329135272841e-06, "loss": 0.4377, "step": 2534 }, { "epoch": 0.11473183978275628, "grad_norm": 0.5267145937741655, "learning_rate": 9.813130687236222e-06, "loss": 0.5184, "step": 2535 }, { "epoch": 0.11477709889115184, "grad_norm": 0.7352462805393354, "learning_rate": 9.81293213578021e-06, "loss": 0.4098, "step": 2536 }, { "epoch": 0.1148223579995474, "grad_norm": 0.8631311864144396, "learning_rate": 9.812733480909065e-06, "loss": 0.4135, "step": 2537 }, { "epoch": 0.11486761710794298, "grad_norm": 0.4587714355663383, "learning_rate": 9.812534722627058e-06, "loss": 0.5288, "step": 2538 }, { "epoch": 0.11491287621633854, "grad_norm": 0.7901169569777019, "learning_rate": 9.812335860938462e-06, "loss": 0.4404, "step": 2539 }, { "epoch": 0.1149581353247341, "grad_norm": 0.361868647097338, "learning_rate": 9.812136895847548e-06, "loss": 0.5265, "step": 2540 }, { "epoch": 0.11500339443312967, "grad_norm": 0.7596945627994101, "learning_rate": 9.811937827358592e-06, "loss": 0.4571, "step": 2541 }, { "epoch": 0.11504865354152523, "grad_norm": 0.7906186891160457, "learning_rate": 9.81173865547587e-06, "loss": 0.4267, "step": 2542 }, { "epoch": 0.11509391264992079, "grad_norm": 0.4117514950499734, "learning_rate": 9.811539380203663e-06, "loss": 0.4849, "step": 2543 }, { "epoch": 0.11513917175831637, "grad_norm": 0.7657511338022506, "learning_rate": 9.811340001546252e-06, "loss": 0.4434, "step": 2544 }, { "epoch": 0.11518443086671193, "grad_norm": 0.7789309960897834, "learning_rate": 9.811140519507922e-06, "loss": 0.4494, "step": 2545 }, { "epoch": 0.11522968997510749, "grad_norm": 0.6871882671752764, "learning_rate": 9.810940934092958e-06, "loss": 0.4671, "step": 2546 }, { "epoch": 0.11527494908350305, "grad_norm": 0.7825805615576867, "learning_rate": 9.810741245305649e-06, "loss": 0.4138, "step": 2547 }, { "epoch": 0.11532020819189862, "grad_norm": 0.45342424459144637, "learning_rate": 9.810541453150286e-06, "loss": 0.4827, "step": 2548 }, { "epoch": 0.11536546730029418, "grad_norm": 0.7997696049758634, "learning_rate": 9.810341557631161e-06, "loss": 0.4261, "step": 2549 }, { "epoch": 0.11541072640868975, "grad_norm": 0.3354195057845609, "learning_rate": 9.81014155875257e-06, "loss": 0.5186, "step": 2550 }, { "epoch": 0.11545598551708532, "grad_norm": 0.3518734423957318, "learning_rate": 9.80994145651881e-06, "loss": 0.5159, "step": 2551 }, { "epoch": 0.11550124462548088, "grad_norm": 0.8518932818441349, "learning_rate": 9.809741250934182e-06, "loss": 0.4127, "step": 2552 }, { "epoch": 0.11554650373387644, "grad_norm": 0.8049676685657544, "learning_rate": 9.809540942002984e-06, "loss": 0.47, "step": 2553 }, { "epoch": 0.115591762842272, "grad_norm": 1.1333619096212444, "learning_rate": 9.809340529729523e-06, "loss": 0.4076, "step": 2554 }, { "epoch": 0.11563702195066757, "grad_norm": 0.4749602483194078, "learning_rate": 9.809140014118106e-06, "loss": 0.4991, "step": 2555 }, { "epoch": 0.11568228105906314, "grad_norm": 0.8929118199192368, "learning_rate": 9.80893939517304e-06, "loss": 0.4566, "step": 2556 }, { "epoch": 0.1157275401674587, "grad_norm": 1.0534167992870758, "learning_rate": 9.808738672898637e-06, "loss": 0.4217, "step": 2557 }, { "epoch": 0.11577279927585427, "grad_norm": 0.9090595000677438, "learning_rate": 9.808537847299206e-06, "loss": 0.43, "step": 2558 }, { "epoch": 0.11581805838424983, "grad_norm": 0.7642568276547604, "learning_rate": 9.808336918379068e-06, "loss": 0.4057, "step": 2559 }, { "epoch": 0.11586331749264539, "grad_norm": 0.752331667095118, "learning_rate": 9.808135886142536e-06, "loss": 0.4222, "step": 2560 }, { "epoch": 0.11590857660104097, "grad_norm": 0.7131841249575264, "learning_rate": 9.80793475059393e-06, "loss": 0.4255, "step": 2561 }, { "epoch": 0.11595383570943653, "grad_norm": 1.0519989552101878, "learning_rate": 9.807733511737574e-06, "loss": 0.4236, "step": 2562 }, { "epoch": 0.11599909481783209, "grad_norm": 0.5128610138385369, "learning_rate": 9.80753216957779e-06, "loss": 0.4961, "step": 2563 }, { "epoch": 0.11604435392622765, "grad_norm": 0.45318445672664937, "learning_rate": 9.807330724118906e-06, "loss": 0.5104, "step": 2564 }, { "epoch": 0.11608961303462322, "grad_norm": 0.9444688018175134, "learning_rate": 9.807129175365248e-06, "loss": 0.4216, "step": 2565 }, { "epoch": 0.11613487214301878, "grad_norm": 0.7334542392066108, "learning_rate": 9.806927523321148e-06, "loss": 0.4404, "step": 2566 }, { "epoch": 0.11618013125141435, "grad_norm": 0.667130718816056, "learning_rate": 9.806725767990938e-06, "loss": 0.3966, "step": 2567 }, { "epoch": 0.11622539035980992, "grad_norm": 0.7872579252818356, "learning_rate": 9.806523909378956e-06, "loss": 0.4384, "step": 2568 }, { "epoch": 0.11627064946820548, "grad_norm": 0.8402962544898573, "learning_rate": 9.806321947489537e-06, "loss": 0.4756, "step": 2569 }, { "epoch": 0.11631590857660104, "grad_norm": 0.8202872754958558, "learning_rate": 9.806119882327019e-06, "loss": 0.448, "step": 2570 }, { "epoch": 0.1163611676849966, "grad_norm": 0.7107492495403155, "learning_rate": 9.805917713895748e-06, "loss": 0.4321, "step": 2571 }, { "epoch": 0.11640642679339216, "grad_norm": 0.771093224734539, "learning_rate": 9.805715442200065e-06, "loss": 0.5144, "step": 2572 }, { "epoch": 0.11645168590178774, "grad_norm": 0.9386984016833877, "learning_rate": 9.805513067244316e-06, "loss": 0.4454, "step": 2573 }, { "epoch": 0.1164969450101833, "grad_norm": 0.8371963863890383, "learning_rate": 9.80531058903285e-06, "loss": 0.4083, "step": 2574 }, { "epoch": 0.11654220411857887, "grad_norm": 0.7628481457269319, "learning_rate": 9.805108007570019e-06, "loss": 0.4178, "step": 2575 }, { "epoch": 0.11658746322697443, "grad_norm": 0.7629279641165384, "learning_rate": 9.804905322860174e-06, "loss": 0.3906, "step": 2576 }, { "epoch": 0.11663272233536999, "grad_norm": 0.7876862625935663, "learning_rate": 9.80470253490767e-06, "loss": 0.4435, "step": 2577 }, { "epoch": 0.11667798144376555, "grad_norm": 0.8393703723647782, "learning_rate": 9.804499643716866e-06, "loss": 0.427, "step": 2578 }, { "epoch": 0.11672324055216113, "grad_norm": 0.8430332725251674, "learning_rate": 9.804296649292119e-06, "loss": 0.4125, "step": 2579 }, { "epoch": 0.11676849966055669, "grad_norm": 0.5868369641884476, "learning_rate": 9.804093551637794e-06, "loss": 0.5417, "step": 2580 }, { "epoch": 0.11681375876895225, "grad_norm": 0.7528081549316857, "learning_rate": 9.803890350758253e-06, "loss": 0.4395, "step": 2581 }, { "epoch": 0.11685901787734781, "grad_norm": 0.812847278681468, "learning_rate": 9.803687046657863e-06, "loss": 0.4565, "step": 2582 }, { "epoch": 0.11690427698574338, "grad_norm": 0.3822252791723251, "learning_rate": 9.80348363934099e-06, "loss": 0.5108, "step": 2583 }, { "epoch": 0.11694953609413894, "grad_norm": 0.81855950148487, "learning_rate": 9.803280128812009e-06, "loss": 0.4828, "step": 2584 }, { "epoch": 0.11699479520253452, "grad_norm": 0.38534699136789974, "learning_rate": 9.803076515075288e-06, "loss": 0.5013, "step": 2585 }, { "epoch": 0.11704005431093008, "grad_norm": 0.8995071789486414, "learning_rate": 9.802872798135205e-06, "loss": 0.3777, "step": 2586 }, { "epoch": 0.11708531341932564, "grad_norm": 0.6654776493366726, "learning_rate": 9.802668977996134e-06, "loss": 0.4325, "step": 2587 }, { "epoch": 0.1171305725277212, "grad_norm": 0.40645015656442596, "learning_rate": 9.80246505466246e-06, "loss": 0.5202, "step": 2588 }, { "epoch": 0.11717583163611676, "grad_norm": 0.4328728192860413, "learning_rate": 9.802261028138563e-06, "loss": 0.4921, "step": 2589 }, { "epoch": 0.11722109074451233, "grad_norm": 0.8520383668317281, "learning_rate": 9.802056898428823e-06, "loss": 0.4322, "step": 2590 }, { "epoch": 0.1172663498529079, "grad_norm": 0.8162455760932941, "learning_rate": 9.801852665537628e-06, "loss": 0.4772, "step": 2591 }, { "epoch": 0.11731160896130347, "grad_norm": 0.715639339968077, "learning_rate": 9.801648329469368e-06, "loss": 0.4096, "step": 2592 }, { "epoch": 0.11735686806969903, "grad_norm": 0.7674946740030191, "learning_rate": 9.801443890228433e-06, "loss": 0.4204, "step": 2593 }, { "epoch": 0.11740212717809459, "grad_norm": 0.8687384401260149, "learning_rate": 9.801239347819213e-06, "loss": 0.4088, "step": 2594 }, { "epoch": 0.11744738628649015, "grad_norm": 0.776904357959414, "learning_rate": 9.801034702246109e-06, "loss": 0.519, "step": 2595 }, { "epoch": 0.11749264539488571, "grad_norm": 0.7081209701798946, "learning_rate": 9.80082995351351e-06, "loss": 0.4869, "step": 2596 }, { "epoch": 0.11753790450328129, "grad_norm": 0.8995320362421774, "learning_rate": 9.800625101625823e-06, "loss": 0.4077, "step": 2597 }, { "epoch": 0.11758316361167685, "grad_norm": 0.8018640403584185, "learning_rate": 9.800420146587446e-06, "loss": 0.4211, "step": 2598 }, { "epoch": 0.11762842272007241, "grad_norm": 0.7980549659130014, "learning_rate": 9.800215088402785e-06, "loss": 0.4222, "step": 2599 }, { "epoch": 0.11767368182846798, "grad_norm": 0.4214767758524068, "learning_rate": 9.800009927076242e-06, "loss": 0.5301, "step": 2600 }, { "epoch": 0.11771894093686354, "grad_norm": 0.8670626633745894, "learning_rate": 9.79980466261223e-06, "loss": 0.4073, "step": 2601 }, { "epoch": 0.1177642000452591, "grad_norm": 0.7999794305874037, "learning_rate": 9.799599295015154e-06, "loss": 0.4164, "step": 2602 }, { "epoch": 0.11780945915365468, "grad_norm": 0.38042470528204886, "learning_rate": 9.799393824289432e-06, "loss": 0.4855, "step": 2603 }, { "epoch": 0.11785471826205024, "grad_norm": 0.7736755748456349, "learning_rate": 9.799188250439477e-06, "loss": 0.4406, "step": 2604 }, { "epoch": 0.1178999773704458, "grad_norm": 0.9131934705355554, "learning_rate": 9.798982573469706e-06, "loss": 0.4272, "step": 2605 }, { "epoch": 0.11794523647884136, "grad_norm": 0.7246504193592824, "learning_rate": 9.79877679338454e-06, "loss": 0.4511, "step": 2606 }, { "epoch": 0.11799049558723693, "grad_norm": 0.7085435596280111, "learning_rate": 9.798570910188396e-06, "loss": 0.4068, "step": 2607 }, { "epoch": 0.1180357546956325, "grad_norm": 0.7441094134118903, "learning_rate": 9.798364923885703e-06, "loss": 0.4549, "step": 2608 }, { "epoch": 0.11808101380402806, "grad_norm": 0.7898290377759148, "learning_rate": 9.798158834480883e-06, "loss": 0.4463, "step": 2609 }, { "epoch": 0.11812627291242363, "grad_norm": 0.6696170261673059, "learning_rate": 9.797952641978368e-06, "loss": 0.3952, "step": 2610 }, { "epoch": 0.11817153202081919, "grad_norm": 0.84637099325106, "learning_rate": 9.797746346382586e-06, "loss": 0.5207, "step": 2611 }, { "epoch": 0.11821679112921475, "grad_norm": 0.7675882546910711, "learning_rate": 9.797539947697969e-06, "loss": 0.4498, "step": 2612 }, { "epoch": 0.11826205023761031, "grad_norm": 0.761430885948382, "learning_rate": 9.797333445928954e-06, "loss": 0.4515, "step": 2613 }, { "epoch": 0.11830730934600589, "grad_norm": 0.7188207060892471, "learning_rate": 9.797126841079979e-06, "loss": 0.4042, "step": 2614 }, { "epoch": 0.11835256845440145, "grad_norm": 0.7496719771480668, "learning_rate": 9.796920133155479e-06, "loss": 0.4564, "step": 2615 }, { "epoch": 0.11839782756279701, "grad_norm": 1.0304731986930586, "learning_rate": 9.796713322159897e-06, "loss": 0.4532, "step": 2616 }, { "epoch": 0.11844308667119258, "grad_norm": 0.9970062290722393, "learning_rate": 9.796506408097679e-06, "loss": 0.4328, "step": 2617 }, { "epoch": 0.11848834577958814, "grad_norm": 0.7185759231957992, "learning_rate": 9.79629939097327e-06, "loss": 0.469, "step": 2618 }, { "epoch": 0.1185336048879837, "grad_norm": 0.6205298876025276, "learning_rate": 9.796092270791118e-06, "loss": 0.5458, "step": 2619 }, { "epoch": 0.11857886399637928, "grad_norm": 0.9922965858266375, "learning_rate": 9.795885047555673e-06, "loss": 0.4199, "step": 2620 }, { "epoch": 0.11862412310477484, "grad_norm": 0.7396088827374019, "learning_rate": 9.795677721271388e-06, "loss": 0.4193, "step": 2621 }, { "epoch": 0.1186693822131704, "grad_norm": 0.6860612044528022, "learning_rate": 9.795470291942717e-06, "loss": 0.4015, "step": 2622 }, { "epoch": 0.11871464132156596, "grad_norm": 0.9072366386992914, "learning_rate": 9.795262759574117e-06, "loss": 0.4456, "step": 2623 }, { "epoch": 0.11875990042996153, "grad_norm": 0.8086548745386506, "learning_rate": 9.795055124170047e-06, "loss": 0.4396, "step": 2624 }, { "epoch": 0.11880515953835709, "grad_norm": 0.8949857372498993, "learning_rate": 9.79484738573497e-06, "loss": 0.4149, "step": 2625 }, { "epoch": 0.11885041864675266, "grad_norm": 0.7416475609651193, "learning_rate": 9.794639544273352e-06, "loss": 0.4285, "step": 2626 }, { "epoch": 0.11889567775514823, "grad_norm": 0.7588753079782493, "learning_rate": 9.794431599789653e-06, "loss": 0.4561, "step": 2627 }, { "epoch": 0.11894093686354379, "grad_norm": 0.790029553819229, "learning_rate": 9.794223552288344e-06, "loss": 0.4243, "step": 2628 }, { "epoch": 0.11898619597193935, "grad_norm": 0.712822624737303, "learning_rate": 9.794015401773896e-06, "loss": 0.3943, "step": 2629 }, { "epoch": 0.11903145508033491, "grad_norm": 0.8630365063043336, "learning_rate": 9.79380714825078e-06, "loss": 0.5127, "step": 2630 }, { "epoch": 0.11907671418873048, "grad_norm": 0.9709964026202484, "learning_rate": 9.793598791723471e-06, "loss": 0.4413, "step": 2631 }, { "epoch": 0.11912197329712605, "grad_norm": 0.7388955468482264, "learning_rate": 9.793390332196448e-06, "loss": 0.4456, "step": 2632 }, { "epoch": 0.11916723240552161, "grad_norm": 0.7321239683277707, "learning_rate": 9.793181769674186e-06, "loss": 0.4058, "step": 2633 }, { "epoch": 0.11921249151391718, "grad_norm": 1.2012183009430955, "learning_rate": 9.792973104161172e-06, "loss": 0.4178, "step": 2634 }, { "epoch": 0.11925775062231274, "grad_norm": 1.0114217801817902, "learning_rate": 9.792764335661885e-06, "loss": 0.4096, "step": 2635 }, { "epoch": 0.1193030097307083, "grad_norm": 0.6756070247331848, "learning_rate": 9.792555464180813e-06, "loss": 0.5141, "step": 2636 }, { "epoch": 0.11934826883910386, "grad_norm": 0.5561437651032958, "learning_rate": 9.792346489722443e-06, "loss": 0.5274, "step": 2637 }, { "epoch": 0.11939352794749944, "grad_norm": 0.9687338809941708, "learning_rate": 9.792137412291265e-06, "loss": 0.4768, "step": 2638 }, { "epoch": 0.119438787055895, "grad_norm": 0.7698369750400488, "learning_rate": 9.791928231891771e-06, "loss": 0.4102, "step": 2639 }, { "epoch": 0.11948404616429056, "grad_norm": 0.7731517828178811, "learning_rate": 9.791718948528457e-06, "loss": 0.4375, "step": 2640 }, { "epoch": 0.11952930527268613, "grad_norm": 0.92130600172214, "learning_rate": 9.79150956220582e-06, "loss": 0.4676, "step": 2641 }, { "epoch": 0.11957456438108169, "grad_norm": 0.6881212305908873, "learning_rate": 9.79130007292836e-06, "loss": 0.5164, "step": 2642 }, { "epoch": 0.11961982348947725, "grad_norm": 0.7206591094332322, "learning_rate": 9.791090480700575e-06, "loss": 0.4386, "step": 2643 }, { "epoch": 0.11966508259787283, "grad_norm": 0.4618535399153771, "learning_rate": 9.790880785526971e-06, "loss": 0.5285, "step": 2644 }, { "epoch": 0.11971034170626839, "grad_norm": 0.746839294471897, "learning_rate": 9.790670987412052e-06, "loss": 0.4009, "step": 2645 }, { "epoch": 0.11975560081466395, "grad_norm": 0.7155483593476287, "learning_rate": 9.790461086360327e-06, "loss": 0.3695, "step": 2646 }, { "epoch": 0.11980085992305951, "grad_norm": 0.7112119393094498, "learning_rate": 9.790251082376308e-06, "loss": 0.4128, "step": 2647 }, { "epoch": 0.11984611903145508, "grad_norm": 0.7763810167513379, "learning_rate": 9.790040975464503e-06, "loss": 0.4237, "step": 2648 }, { "epoch": 0.11989137813985065, "grad_norm": 0.712552760136062, "learning_rate": 9.78983076562943e-06, "loss": 0.4416, "step": 2649 }, { "epoch": 0.11993663724824621, "grad_norm": 0.7682964438579254, "learning_rate": 9.789620452875605e-06, "loss": 0.4608, "step": 2650 }, { "epoch": 0.11998189635664178, "grad_norm": 0.87596034032508, "learning_rate": 9.789410037207546e-06, "loss": 0.5363, "step": 2651 }, { "epoch": 0.12002715546503734, "grad_norm": 0.7109720872914763, "learning_rate": 9.789199518629774e-06, "loss": 0.4195, "step": 2652 }, { "epoch": 0.1200724145734329, "grad_norm": 0.7260005596256348, "learning_rate": 9.788988897146814e-06, "loss": 0.446, "step": 2653 }, { "epoch": 0.12011767368182846, "grad_norm": 0.7833435501815497, "learning_rate": 9.788778172763191e-06, "loss": 0.4292, "step": 2654 }, { "epoch": 0.12016293279022404, "grad_norm": 0.3969348371479244, "learning_rate": 9.788567345483434e-06, "loss": 0.5069, "step": 2655 }, { "epoch": 0.1202081918986196, "grad_norm": 0.7136665781135838, "learning_rate": 9.78835641531207e-06, "loss": 0.4012, "step": 2656 }, { "epoch": 0.12025345100701516, "grad_norm": 0.715615027824156, "learning_rate": 9.788145382253633e-06, "loss": 0.4371, "step": 2657 }, { "epoch": 0.12029871011541073, "grad_norm": 0.507504666798424, "learning_rate": 9.787934246312657e-06, "loss": 0.5096, "step": 2658 }, { "epoch": 0.12034396922380629, "grad_norm": 0.4527911516398708, "learning_rate": 9.787723007493681e-06, "loss": 0.5422, "step": 2659 }, { "epoch": 0.12038922833220185, "grad_norm": 0.7400944870716484, "learning_rate": 9.787511665801242e-06, "loss": 0.3803, "step": 2660 }, { "epoch": 0.12043448744059743, "grad_norm": 0.6825565016927304, "learning_rate": 9.78730022123988e-06, "loss": 0.4075, "step": 2661 }, { "epoch": 0.12047974654899299, "grad_norm": 0.4016213987042365, "learning_rate": 9.787088673814137e-06, "loss": 0.491, "step": 2662 }, { "epoch": 0.12052500565738855, "grad_norm": 0.6816641941164435, "learning_rate": 9.786877023528564e-06, "loss": 0.3884, "step": 2663 }, { "epoch": 0.12057026476578411, "grad_norm": 0.49056929924742526, "learning_rate": 9.786665270387706e-06, "loss": 0.5096, "step": 2664 }, { "epoch": 0.12061552387417968, "grad_norm": 0.40521515766664484, "learning_rate": 9.78645341439611e-06, "loss": 0.4994, "step": 2665 }, { "epoch": 0.12066078298257524, "grad_norm": 0.8613575720890704, "learning_rate": 9.786241455558332e-06, "loss": 0.4777, "step": 2666 }, { "epoch": 0.12070604209097081, "grad_norm": 0.7503037569130684, "learning_rate": 9.786029393878925e-06, "loss": 0.4586, "step": 2667 }, { "epoch": 0.12075130119936638, "grad_norm": 0.6666433288490968, "learning_rate": 9.785817229362445e-06, "loss": 0.3881, "step": 2668 }, { "epoch": 0.12079656030776194, "grad_norm": 0.895421914416902, "learning_rate": 9.78560496201345e-06, "loss": 0.399, "step": 2669 }, { "epoch": 0.1208418194161575, "grad_norm": 0.7827897135331325, "learning_rate": 9.785392591836504e-06, "loss": 0.4544, "step": 2670 }, { "epoch": 0.12088707852455306, "grad_norm": 0.7347360025993976, "learning_rate": 9.785180118836169e-06, "loss": 0.455, "step": 2671 }, { "epoch": 0.12093233763294862, "grad_norm": 0.828298118763274, "learning_rate": 9.784967543017008e-06, "loss": 0.4535, "step": 2672 }, { "epoch": 0.1209775967413442, "grad_norm": 0.7050553279671258, "learning_rate": 9.784754864383593e-06, "loss": 0.3994, "step": 2673 }, { "epoch": 0.12102285584973976, "grad_norm": 0.7759845857752202, "learning_rate": 9.784542082940488e-06, "loss": 0.4549, "step": 2674 }, { "epoch": 0.12106811495813533, "grad_norm": 0.9078020537580649, "learning_rate": 9.784329198692269e-06, "loss": 0.4441, "step": 2675 }, { "epoch": 0.12111337406653089, "grad_norm": 0.7967655768014362, "learning_rate": 9.78411621164351e-06, "loss": 0.4718, "step": 2676 }, { "epoch": 0.12115863317492645, "grad_norm": 0.6867947079019111, "learning_rate": 9.783903121798787e-06, "loss": 0.436, "step": 2677 }, { "epoch": 0.12120389228332201, "grad_norm": 0.7372737057900989, "learning_rate": 9.783689929162679e-06, "loss": 0.4426, "step": 2678 }, { "epoch": 0.12124915139171759, "grad_norm": 0.6993752887877634, "learning_rate": 9.783476633739766e-06, "loss": 0.4679, "step": 2679 }, { "epoch": 0.12129441050011315, "grad_norm": 0.6940593830233261, "learning_rate": 9.783263235534632e-06, "loss": 0.4123, "step": 2680 }, { "epoch": 0.12133966960850871, "grad_norm": 0.7090396183152797, "learning_rate": 9.783049734551861e-06, "loss": 0.398, "step": 2681 }, { "epoch": 0.12138492871690428, "grad_norm": 0.6781535914506774, "learning_rate": 9.78283613079604e-06, "loss": 0.4313, "step": 2682 }, { "epoch": 0.12143018782529984, "grad_norm": 1.229670729375344, "learning_rate": 9.782622424271761e-06, "loss": 0.5487, "step": 2683 }, { "epoch": 0.1214754469336954, "grad_norm": 0.8239341407113727, "learning_rate": 9.782408614983616e-06, "loss": 0.3965, "step": 2684 }, { "epoch": 0.12152070604209098, "grad_norm": 0.7395028627363661, "learning_rate": 9.782194702936198e-06, "loss": 0.4073, "step": 2685 }, { "epoch": 0.12156596515048654, "grad_norm": 0.6849031343236605, "learning_rate": 9.781980688134102e-06, "loss": 0.4154, "step": 2686 }, { "epoch": 0.1216112242588821, "grad_norm": 1.2292072746561793, "learning_rate": 9.781766570581927e-06, "loss": 0.4413, "step": 2687 }, { "epoch": 0.12165648336727766, "grad_norm": 0.7494350410309273, "learning_rate": 9.781552350284275e-06, "loss": 0.4313, "step": 2688 }, { "epoch": 0.12170174247567322, "grad_norm": 0.7544210475047712, "learning_rate": 9.78133802724575e-06, "loss": 0.4419, "step": 2689 }, { "epoch": 0.12174700158406879, "grad_norm": 0.8333559848106804, "learning_rate": 9.781123601470953e-06, "loss": 0.4334, "step": 2690 }, { "epoch": 0.12179226069246436, "grad_norm": 1.3548197867903617, "learning_rate": 9.780909072964497e-06, "loss": 0.5363, "step": 2691 }, { "epoch": 0.12183751980085993, "grad_norm": 0.8618630036683134, "learning_rate": 9.780694441730987e-06, "loss": 0.5295, "step": 2692 }, { "epoch": 0.12188277890925549, "grad_norm": 0.45892450903463605, "learning_rate": 9.780479707775035e-06, "loss": 0.4854, "step": 2693 }, { "epoch": 0.12192803801765105, "grad_norm": 0.7846049894036222, "learning_rate": 9.780264871101256e-06, "loss": 0.438, "step": 2694 }, { "epoch": 0.12197329712604661, "grad_norm": 0.7605031550438223, "learning_rate": 9.78004993171427e-06, "loss": 0.4153, "step": 2695 }, { "epoch": 0.12201855623444219, "grad_norm": 0.7357793468390382, "learning_rate": 9.77983488961869e-06, "loss": 0.4399, "step": 2696 }, { "epoch": 0.12206381534283775, "grad_norm": 0.7580043201789151, "learning_rate": 9.779619744819136e-06, "loss": 0.4408, "step": 2697 }, { "epoch": 0.12210907445123331, "grad_norm": 2.103868880774699, "learning_rate": 9.779404497320236e-06, "loss": 0.5704, "step": 2698 }, { "epoch": 0.12215433355962887, "grad_norm": 0.7871003114405238, "learning_rate": 9.77918914712661e-06, "loss": 0.3799, "step": 2699 }, { "epoch": 0.12219959266802444, "grad_norm": 0.7841364790364305, "learning_rate": 9.778973694242888e-06, "loss": 0.474, "step": 2700 }, { "epoch": 0.12224485177642, "grad_norm": 0.8486597643275121, "learning_rate": 9.7787581386737e-06, "loss": 0.4336, "step": 2701 }, { "epoch": 0.12229011088481558, "grad_norm": 0.7196137791219471, "learning_rate": 9.778542480423677e-06, "loss": 0.4111, "step": 2702 }, { "epoch": 0.12233536999321114, "grad_norm": 0.7347694160748849, "learning_rate": 9.77832671949745e-06, "loss": 0.4042, "step": 2703 }, { "epoch": 0.1223806291016067, "grad_norm": 0.9636687858399887, "learning_rate": 9.778110855899659e-06, "loss": 0.5408, "step": 2704 }, { "epoch": 0.12242588821000226, "grad_norm": 0.8111963342960673, "learning_rate": 9.777894889634939e-06, "loss": 0.4039, "step": 2705 }, { "epoch": 0.12247114731839782, "grad_norm": 0.6695899517416207, "learning_rate": 9.777678820707932e-06, "loss": 0.5425, "step": 2706 }, { "epoch": 0.12251640642679339, "grad_norm": 0.7610453505809822, "learning_rate": 9.777462649123281e-06, "loss": 0.3837, "step": 2707 }, { "epoch": 0.12256166553518896, "grad_norm": 0.7326549088785459, "learning_rate": 9.777246374885631e-06, "loss": 0.3737, "step": 2708 }, { "epoch": 0.12260692464358453, "grad_norm": 0.6840927367574374, "learning_rate": 9.77702999799963e-06, "loss": 0.3914, "step": 2709 }, { "epoch": 0.12265218375198009, "grad_norm": 0.9382804953482135, "learning_rate": 9.776813518469924e-06, "loss": 0.5241, "step": 2710 }, { "epoch": 0.12269744286037565, "grad_norm": 0.8627904070576863, "learning_rate": 9.776596936301168e-06, "loss": 0.5221, "step": 2711 }, { "epoch": 0.12274270196877121, "grad_norm": 0.6784951736072385, "learning_rate": 9.776380251498013e-06, "loss": 0.4575, "step": 2712 }, { "epoch": 0.12278796107716677, "grad_norm": 0.6935729007977068, "learning_rate": 9.776163464065115e-06, "loss": 0.4268, "step": 2713 }, { "epoch": 0.12283322018556235, "grad_norm": 0.7215493944586941, "learning_rate": 9.775946574007133e-06, "loss": 0.4375, "step": 2714 }, { "epoch": 0.12287847929395791, "grad_norm": 0.791182864674943, "learning_rate": 9.775729581328728e-06, "loss": 0.4137, "step": 2715 }, { "epoch": 0.12292373840235347, "grad_norm": 0.69097511820002, "learning_rate": 9.775512486034564e-06, "loss": 0.4204, "step": 2716 }, { "epoch": 0.12296899751074904, "grad_norm": 0.7109609228395887, "learning_rate": 9.775295288129301e-06, "loss": 0.4299, "step": 2717 }, { "epoch": 0.1230142566191446, "grad_norm": 0.6922140625025477, "learning_rate": 9.775077987617609e-06, "loss": 0.4669, "step": 2718 }, { "epoch": 0.12305951572754016, "grad_norm": 0.7454249071139529, "learning_rate": 9.774860584504156e-06, "loss": 0.4484, "step": 2719 }, { "epoch": 0.12310477483593574, "grad_norm": 0.6929325920676569, "learning_rate": 9.774643078793616e-06, "loss": 0.3958, "step": 2720 }, { "epoch": 0.1231500339443313, "grad_norm": 0.6871179296450526, "learning_rate": 9.774425470490657e-06, "loss": 0.4551, "step": 2721 }, { "epoch": 0.12319529305272686, "grad_norm": 0.6682448994056809, "learning_rate": 9.774207759599961e-06, "loss": 0.446, "step": 2722 }, { "epoch": 0.12324055216112242, "grad_norm": 0.6988690432746627, "learning_rate": 9.773989946126202e-06, "loss": 0.4169, "step": 2723 }, { "epoch": 0.12328581126951799, "grad_norm": 0.6891060920685302, "learning_rate": 9.773772030074062e-06, "loss": 0.3707, "step": 2724 }, { "epoch": 0.12333107037791355, "grad_norm": 0.6759094580335833, "learning_rate": 9.773554011448221e-06, "loss": 0.4012, "step": 2725 }, { "epoch": 0.12337632948630912, "grad_norm": 0.6820553553816232, "learning_rate": 9.773335890253367e-06, "loss": 0.3912, "step": 2726 }, { "epoch": 0.12342158859470469, "grad_norm": 0.6928170927079416, "learning_rate": 9.773117666494183e-06, "loss": 0.4546, "step": 2727 }, { "epoch": 0.12346684770310025, "grad_norm": 0.7628843441717775, "learning_rate": 9.772899340175362e-06, "loss": 0.4215, "step": 2728 }, { "epoch": 0.12351210681149581, "grad_norm": 0.7729846245333306, "learning_rate": 9.772680911301592e-06, "loss": 0.4508, "step": 2729 }, { "epoch": 0.12355736591989137, "grad_norm": 1.8521663449637993, "learning_rate": 9.772462379877566e-06, "loss": 0.5501, "step": 2730 }, { "epoch": 0.12360262502828694, "grad_norm": 0.7956300766318737, "learning_rate": 9.772243745907983e-06, "loss": 0.4294, "step": 2731 }, { "epoch": 0.12364788413668251, "grad_norm": 0.7989248544289832, "learning_rate": 9.772025009397538e-06, "loss": 0.427, "step": 2732 }, { "epoch": 0.12369314324507807, "grad_norm": 0.6440191845101102, "learning_rate": 9.771806170350931e-06, "loss": 0.5273, "step": 2733 }, { "epoch": 0.12373840235347364, "grad_norm": 0.8104647497365935, "learning_rate": 9.771587228772866e-06, "loss": 0.3975, "step": 2734 }, { "epoch": 0.1237836614618692, "grad_norm": 0.7902673158685333, "learning_rate": 9.771368184668046e-06, "loss": 0.5167, "step": 2735 }, { "epoch": 0.12382892057026476, "grad_norm": 1.0776498695175314, "learning_rate": 9.771149038041177e-06, "loss": 0.4532, "step": 2736 }, { "epoch": 0.12387417967866034, "grad_norm": 0.7327511682649216, "learning_rate": 9.77092978889697e-06, "loss": 0.4391, "step": 2737 }, { "epoch": 0.1239194387870559, "grad_norm": 1.005928049604739, "learning_rate": 9.770710437240134e-06, "loss": 0.5269, "step": 2738 }, { "epoch": 0.12396469789545146, "grad_norm": 0.8360074267582877, "learning_rate": 9.770490983075383e-06, "loss": 0.457, "step": 2739 }, { "epoch": 0.12400995700384702, "grad_norm": 0.8991652457166528, "learning_rate": 9.770271426407432e-06, "loss": 0.4054, "step": 2740 }, { "epoch": 0.12405521611224259, "grad_norm": 0.7664564769263479, "learning_rate": 9.770051767241e-06, "loss": 0.3936, "step": 2741 }, { "epoch": 0.12410047522063815, "grad_norm": 0.77113558498915, "learning_rate": 9.769832005580804e-06, "loss": 0.5359, "step": 2742 }, { "epoch": 0.12414573432903372, "grad_norm": 0.8478107407330637, "learning_rate": 9.769612141431568e-06, "loss": 0.4158, "step": 2743 }, { "epoch": 0.12419099343742929, "grad_norm": 0.795928760884458, "learning_rate": 9.769392174798017e-06, "loss": 0.3909, "step": 2744 }, { "epoch": 0.12423625254582485, "grad_norm": 0.9940043746867019, "learning_rate": 9.769172105684875e-06, "loss": 0.4278, "step": 2745 }, { "epoch": 0.12428151165422041, "grad_norm": 0.7112341670412834, "learning_rate": 9.76895193409687e-06, "loss": 0.4406, "step": 2746 }, { "epoch": 0.12432677076261597, "grad_norm": 0.7573968135300648, "learning_rate": 9.768731660038737e-06, "loss": 0.4156, "step": 2747 }, { "epoch": 0.12437202987101154, "grad_norm": 1.113628090493707, "learning_rate": 9.768511283515207e-06, "loss": 0.4301, "step": 2748 }, { "epoch": 0.12441728897940711, "grad_norm": 0.6786265356978682, "learning_rate": 9.768290804531013e-06, "loss": 0.4271, "step": 2749 }, { "epoch": 0.12446254808780267, "grad_norm": 0.7577079928644646, "learning_rate": 9.768070223090896e-06, "loss": 0.5163, "step": 2750 }, { "epoch": 0.12450780719619824, "grad_norm": 0.5699591505059829, "learning_rate": 9.767849539199594e-06, "loss": 0.5325, "step": 2751 }, { "epoch": 0.1245530663045938, "grad_norm": 1.6891404275229212, "learning_rate": 9.767628752861848e-06, "loss": 0.4455, "step": 2752 }, { "epoch": 0.12459832541298936, "grad_norm": 0.7397212184485615, "learning_rate": 9.767407864082404e-06, "loss": 0.4223, "step": 2753 }, { "epoch": 0.12464358452138492, "grad_norm": 0.7178882499970353, "learning_rate": 9.767186872866004e-06, "loss": 0.3905, "step": 2754 }, { "epoch": 0.1246888436297805, "grad_norm": 0.7992228039565072, "learning_rate": 9.766965779217401e-06, "loss": 0.4232, "step": 2755 }, { "epoch": 0.12473410273817606, "grad_norm": 0.7976996216847373, "learning_rate": 9.766744583141345e-06, "loss": 0.4512, "step": 2756 }, { "epoch": 0.12477936184657162, "grad_norm": 0.6992639657248829, "learning_rate": 9.766523284642588e-06, "loss": 0.4165, "step": 2757 }, { "epoch": 0.12482462095496719, "grad_norm": 0.6921757743659276, "learning_rate": 9.766301883725884e-06, "loss": 0.4365, "step": 2758 }, { "epoch": 0.12486988006336275, "grad_norm": 0.7561530071180924, "learning_rate": 9.76608038039599e-06, "loss": 0.4301, "step": 2759 }, { "epoch": 0.12491513917175831, "grad_norm": 0.8004041588160753, "learning_rate": 9.765858774657669e-06, "loss": 0.4177, "step": 2760 }, { "epoch": 0.12496039828015389, "grad_norm": 1.0769918767267206, "learning_rate": 9.76563706651568e-06, "loss": 0.5095, "step": 2761 }, { "epoch": 0.12500565738854943, "grad_norm": 0.8997733531140661, "learning_rate": 9.765415255974784e-06, "loss": 0.5023, "step": 2762 }, { "epoch": 0.125050916496945, "grad_norm": 0.433269329782108, "learning_rate": 9.765193343039751e-06, "loss": 0.5207, "step": 2763 }, { "epoch": 0.1250961756053406, "grad_norm": 0.9853752511046728, "learning_rate": 9.76497132771535e-06, "loss": 0.4668, "step": 2764 }, { "epoch": 0.12514143471373615, "grad_norm": 0.8148443072198143, "learning_rate": 9.764749210006348e-06, "loss": 0.4301, "step": 2765 }, { "epoch": 0.1251866938221317, "grad_norm": 0.8802776513421972, "learning_rate": 9.76452698991752e-06, "loss": 0.4973, "step": 2766 }, { "epoch": 0.12523195293052727, "grad_norm": 0.855492498623336, "learning_rate": 9.76430466745364e-06, "loss": 0.4407, "step": 2767 }, { "epoch": 0.12527721203892284, "grad_norm": 0.8785273791521889, "learning_rate": 9.764082242619485e-06, "loss": 0.3817, "step": 2768 }, { "epoch": 0.1253224711473184, "grad_norm": 1.557105939671637, "learning_rate": 9.763859715419834e-06, "loss": 0.53, "step": 2769 }, { "epoch": 0.12536773025571396, "grad_norm": 1.066750154692408, "learning_rate": 9.76363708585947e-06, "loss": 0.4435, "step": 2770 }, { "epoch": 0.12541298936410952, "grad_norm": 1.162892927861453, "learning_rate": 9.763414353943175e-06, "loss": 0.5445, "step": 2771 }, { "epoch": 0.12545824847250509, "grad_norm": 0.8886752219994062, "learning_rate": 9.763191519675735e-06, "loss": 0.4512, "step": 2772 }, { "epoch": 0.12550350758090065, "grad_norm": 0.8803921244404787, "learning_rate": 9.762968583061938e-06, "loss": 0.458, "step": 2773 }, { "epoch": 0.1255487666892962, "grad_norm": 0.9265125219905003, "learning_rate": 9.762745544106576e-06, "loss": 0.4956, "step": 2774 }, { "epoch": 0.12559402579769177, "grad_norm": 1.0048813765796745, "learning_rate": 9.762522402814438e-06, "loss": 0.47, "step": 2775 }, { "epoch": 0.12563928490608736, "grad_norm": 1.0089599105936264, "learning_rate": 9.762299159190322e-06, "loss": 0.5454, "step": 2776 }, { "epoch": 0.12568454401448292, "grad_norm": 0.7490852698867788, "learning_rate": 9.762075813239022e-06, "loss": 0.4505, "step": 2777 }, { "epoch": 0.1257298031228785, "grad_norm": 0.7337323557104144, "learning_rate": 9.761852364965339e-06, "loss": 0.4108, "step": 2778 }, { "epoch": 0.12577506223127405, "grad_norm": 0.8230115914730641, "learning_rate": 9.761628814374074e-06, "loss": 0.4365, "step": 2779 }, { "epoch": 0.1258203213396696, "grad_norm": 0.7377174974714752, "learning_rate": 9.76140516147003e-06, "loss": 0.4069, "step": 2780 }, { "epoch": 0.12586558044806517, "grad_norm": 0.7771110398697952, "learning_rate": 9.761181406258012e-06, "loss": 0.4056, "step": 2781 }, { "epoch": 0.12591083955646074, "grad_norm": 0.8547150246842478, "learning_rate": 9.760957548742828e-06, "loss": 0.4479, "step": 2782 }, { "epoch": 0.1259560986648563, "grad_norm": 0.7135937835367194, "learning_rate": 9.760733588929289e-06, "loss": 0.3704, "step": 2783 }, { "epoch": 0.12600135777325186, "grad_norm": 0.733145252105959, "learning_rate": 9.760509526822206e-06, "loss": 0.4439, "step": 2784 }, { "epoch": 0.12604661688164742, "grad_norm": 0.7259614720917196, "learning_rate": 9.760285362426397e-06, "loss": 0.4677, "step": 2785 }, { "epoch": 0.12609187599004298, "grad_norm": 0.7895174836944507, "learning_rate": 9.760061095746671e-06, "loss": 0.422, "step": 2786 }, { "epoch": 0.12613713509843857, "grad_norm": 0.7242396310884901, "learning_rate": 9.759836726787855e-06, "loss": 0.4348, "step": 2787 }, { "epoch": 0.12618239420683414, "grad_norm": 0.7492140269556657, "learning_rate": 9.759612255554765e-06, "loss": 0.4439, "step": 2788 }, { "epoch": 0.1262276533152297, "grad_norm": 0.7253084120049096, "learning_rate": 9.759387682052226e-06, "loss": 0.4383, "step": 2789 }, { "epoch": 0.12627291242362526, "grad_norm": 1.1116143910529677, "learning_rate": 9.759163006285064e-06, "loss": 0.5338, "step": 2790 }, { "epoch": 0.12631817153202082, "grad_norm": 0.8685718392395363, "learning_rate": 9.758938228258103e-06, "loss": 0.4255, "step": 2791 }, { "epoch": 0.12636343064041639, "grad_norm": 0.7273351825438547, "learning_rate": 9.758713347976179e-06, "loss": 0.4382, "step": 2792 }, { "epoch": 0.12640868974881195, "grad_norm": 0.7179864020192804, "learning_rate": 9.758488365444117e-06, "loss": 0.4311, "step": 2793 }, { "epoch": 0.1264539488572075, "grad_norm": 0.6649989419467585, "learning_rate": 9.758263280666757e-06, "loss": 0.4111, "step": 2794 }, { "epoch": 0.12649920796560307, "grad_norm": 0.4415612740759909, "learning_rate": 9.758038093648931e-06, "loss": 0.5024, "step": 2795 }, { "epoch": 0.12654446707399863, "grad_norm": 0.8351532083948827, "learning_rate": 9.757812804395482e-06, "loss": 0.4213, "step": 2796 }, { "epoch": 0.1265897261823942, "grad_norm": 1.0093853854686414, "learning_rate": 9.757587412911247e-06, "loss": 0.4192, "step": 2797 }, { "epoch": 0.12663498529078976, "grad_norm": 0.6988444753162972, "learning_rate": 9.75736191920107e-06, "loss": 0.4206, "step": 2798 }, { "epoch": 0.12668024439918535, "grad_norm": 0.7719411662818325, "learning_rate": 9.757136323269798e-06, "loss": 0.4357, "step": 2799 }, { "epoch": 0.1267255035075809, "grad_norm": 0.8921782142026669, "learning_rate": 9.756910625122276e-06, "loss": 0.4749, "step": 2800 }, { "epoch": 0.12677076261597647, "grad_norm": 0.7741589039777517, "learning_rate": 9.756684824763354e-06, "loss": 0.4751, "step": 2801 }, { "epoch": 0.12681602172437204, "grad_norm": 0.7571345424322213, "learning_rate": 9.756458922197884e-06, "loss": 0.4489, "step": 2802 }, { "epoch": 0.1268612808327676, "grad_norm": 0.8639275063462156, "learning_rate": 9.756232917430719e-06, "loss": 0.5225, "step": 2803 }, { "epoch": 0.12690653994116316, "grad_norm": 0.8443327036951966, "learning_rate": 9.756006810466719e-06, "loss": 0.4155, "step": 2804 }, { "epoch": 0.12695179904955872, "grad_norm": 0.8229293203051627, "learning_rate": 9.755780601310738e-06, "loss": 0.4464, "step": 2805 }, { "epoch": 0.12699705815795428, "grad_norm": 0.8798994860945353, "learning_rate": 9.755554289967638e-06, "loss": 0.4316, "step": 2806 }, { "epoch": 0.12704231726634985, "grad_norm": 0.7797064513535364, "learning_rate": 9.755327876442282e-06, "loss": 0.4324, "step": 2807 }, { "epoch": 0.1270875763747454, "grad_norm": 0.46667805555480824, "learning_rate": 9.755101360739537e-06, "loss": 0.5179, "step": 2808 }, { "epoch": 0.12713283548314097, "grad_norm": 1.1732456802108047, "learning_rate": 9.754874742864264e-06, "loss": 0.4062, "step": 2809 }, { "epoch": 0.12717809459153653, "grad_norm": 0.7681936991538912, "learning_rate": 9.754648022821339e-06, "loss": 0.4489, "step": 2810 }, { "epoch": 0.12722335369993212, "grad_norm": 0.8233301252300935, "learning_rate": 9.754421200615629e-06, "loss": 0.4357, "step": 2811 }, { "epoch": 0.12726861280832769, "grad_norm": 1.5611820942128543, "learning_rate": 9.75419427625201e-06, "loss": 0.4545, "step": 2812 }, { "epoch": 0.12731387191672325, "grad_norm": 0.7366516657356496, "learning_rate": 9.753967249735359e-06, "loss": 0.4269, "step": 2813 }, { "epoch": 0.1273591310251188, "grad_norm": 1.197383763338776, "learning_rate": 9.753740121070552e-06, "loss": 0.4172, "step": 2814 }, { "epoch": 0.12740439013351437, "grad_norm": 0.5187181295570135, "learning_rate": 9.753512890262468e-06, "loss": 0.523, "step": 2815 }, { "epoch": 0.12744964924190993, "grad_norm": 0.7402400139361618, "learning_rate": 9.753285557315993e-06, "loss": 0.4342, "step": 2816 }, { "epoch": 0.1274949083503055, "grad_norm": 0.4179812876405542, "learning_rate": 9.75305812223601e-06, "loss": 0.5042, "step": 2817 }, { "epoch": 0.12754016745870106, "grad_norm": 0.956692225707494, "learning_rate": 9.752830585027406e-06, "loss": 0.4321, "step": 2818 }, { "epoch": 0.12758542656709662, "grad_norm": 0.7580316124124711, "learning_rate": 9.752602945695068e-06, "loss": 0.4106, "step": 2819 }, { "epoch": 0.12763068567549218, "grad_norm": 0.6779058714280451, "learning_rate": 9.75237520424389e-06, "loss": 0.4584, "step": 2820 }, { "epoch": 0.12767594478388775, "grad_norm": 0.7006360886542689, "learning_rate": 9.752147360678767e-06, "loss": 0.4538, "step": 2821 }, { "epoch": 0.1277212038922833, "grad_norm": 0.4225543051078938, "learning_rate": 9.75191941500459e-06, "loss": 0.5084, "step": 2822 }, { "epoch": 0.1277664630006789, "grad_norm": 0.9870591208561422, "learning_rate": 9.75169136722626e-06, "loss": 0.4234, "step": 2823 }, { "epoch": 0.12781172210907446, "grad_norm": 0.38302841608917704, "learning_rate": 9.751463217348675e-06, "loss": 0.497, "step": 2824 }, { "epoch": 0.12785698121747002, "grad_norm": 0.7176736840275659, "learning_rate": 9.75123496537674e-06, "loss": 0.4236, "step": 2825 }, { "epoch": 0.12790224032586558, "grad_norm": 0.34689025847761207, "learning_rate": 9.751006611315357e-06, "loss": 0.5039, "step": 2826 }, { "epoch": 0.12794749943426115, "grad_norm": 0.3500954668873423, "learning_rate": 9.750778155169434e-06, "loss": 0.4995, "step": 2827 }, { "epoch": 0.1279927585426567, "grad_norm": 1.0915034728521975, "learning_rate": 9.75054959694388e-06, "loss": 0.4577, "step": 2828 }, { "epoch": 0.12803801765105227, "grad_norm": 0.36993428839340675, "learning_rate": 9.750320936643604e-06, "loss": 0.5616, "step": 2829 }, { "epoch": 0.12808327675944783, "grad_norm": 1.1635716200014408, "learning_rate": 9.75009217427352e-06, "loss": 0.4061, "step": 2830 }, { "epoch": 0.1281285358678434, "grad_norm": 0.7709762211162258, "learning_rate": 9.749863309838545e-06, "loss": 0.4129, "step": 2831 }, { "epoch": 0.12817379497623896, "grad_norm": 0.8127009596810594, "learning_rate": 9.749634343343598e-06, "loss": 0.4606, "step": 2832 }, { "epoch": 0.12821905408463452, "grad_norm": 0.7069452105591859, "learning_rate": 9.749405274793592e-06, "loss": 0.432, "step": 2833 }, { "epoch": 0.1282643131930301, "grad_norm": 0.7539224217869867, "learning_rate": 9.749176104193456e-06, "loss": 0.4228, "step": 2834 }, { "epoch": 0.12830957230142567, "grad_norm": 0.682126292506584, "learning_rate": 9.748946831548111e-06, "loss": 0.3924, "step": 2835 }, { "epoch": 0.12835483140982124, "grad_norm": 0.8189363607131463, "learning_rate": 9.748717456862484e-06, "loss": 0.4298, "step": 2836 }, { "epoch": 0.1284000905182168, "grad_norm": 0.6526943554999189, "learning_rate": 9.748487980141503e-06, "loss": 0.4387, "step": 2837 }, { "epoch": 0.12844534962661236, "grad_norm": 0.7165030693701668, "learning_rate": 9.748258401390099e-06, "loss": 0.4605, "step": 2838 }, { "epoch": 0.12849060873500792, "grad_norm": 0.748171676264515, "learning_rate": 9.748028720613206e-06, "loss": 0.4589, "step": 2839 }, { "epoch": 0.12853586784340348, "grad_norm": 0.6958245051079487, "learning_rate": 9.747798937815756e-06, "loss": 0.4405, "step": 2840 }, { "epoch": 0.12858112695179905, "grad_norm": 0.7010544328742045, "learning_rate": 9.74756905300269e-06, "loss": 0.4166, "step": 2841 }, { "epoch": 0.1286263860601946, "grad_norm": 0.6929601001488586, "learning_rate": 9.747339066178947e-06, "loss": 0.3861, "step": 2842 }, { "epoch": 0.12867164516859017, "grad_norm": 0.7691628549541619, "learning_rate": 9.747108977349466e-06, "loss": 0.4512, "step": 2843 }, { "epoch": 0.12871690427698573, "grad_norm": 1.5952758417935748, "learning_rate": 9.746878786519195e-06, "loss": 0.4443, "step": 2844 }, { "epoch": 0.1287621633853813, "grad_norm": 0.7012408391747204, "learning_rate": 9.746648493693076e-06, "loss": 0.3726, "step": 2845 }, { "epoch": 0.12880742249377689, "grad_norm": 0.7661600137981615, "learning_rate": 9.74641809887606e-06, "loss": 0.38, "step": 2846 }, { "epoch": 0.12885268160217245, "grad_norm": 0.8000061170172621, "learning_rate": 9.746187602073097e-06, "loss": 0.3985, "step": 2847 }, { "epoch": 0.128897940710568, "grad_norm": 0.7000898176833337, "learning_rate": 9.745957003289138e-06, "loss": 0.4487, "step": 2848 }, { "epoch": 0.12894319981896357, "grad_norm": 0.717128506736437, "learning_rate": 9.745726302529139e-06, "loss": 0.4188, "step": 2849 }, { "epoch": 0.12898845892735913, "grad_norm": 0.7935962162575106, "learning_rate": 9.745495499798058e-06, "loss": 0.3798, "step": 2850 }, { "epoch": 0.1290337180357547, "grad_norm": 0.7923976032261633, "learning_rate": 9.745264595100854e-06, "loss": 0.4351, "step": 2851 }, { "epoch": 0.12907897714415026, "grad_norm": 0.747883990023528, "learning_rate": 9.745033588442487e-06, "loss": 0.4304, "step": 2852 }, { "epoch": 0.12912423625254582, "grad_norm": 0.7459299202346427, "learning_rate": 9.744802479827921e-06, "loss": 0.4162, "step": 2853 }, { "epoch": 0.12916949536094138, "grad_norm": 0.5887077338666191, "learning_rate": 9.744571269262122e-06, "loss": 0.5119, "step": 2854 }, { "epoch": 0.12921475446933695, "grad_norm": 0.6729072197590653, "learning_rate": 9.74433995675006e-06, "loss": 0.4359, "step": 2855 }, { "epoch": 0.1292600135777325, "grad_norm": 0.7777456505468983, "learning_rate": 9.744108542296702e-06, "loss": 0.4771, "step": 2856 }, { "epoch": 0.12930527268612807, "grad_norm": 0.7469741198993132, "learning_rate": 9.743877025907023e-06, "loss": 0.3983, "step": 2857 }, { "epoch": 0.12935053179452366, "grad_norm": 0.6858749117382782, "learning_rate": 9.743645407585994e-06, "loss": 0.3637, "step": 2858 }, { "epoch": 0.12939579090291922, "grad_norm": 0.7031646069476174, "learning_rate": 9.743413687338596e-06, "loss": 0.4184, "step": 2859 }, { "epoch": 0.12944105001131478, "grad_norm": 0.7255909443098081, "learning_rate": 9.743181865169806e-06, "loss": 0.4131, "step": 2860 }, { "epoch": 0.12948630911971035, "grad_norm": 0.7322265572281352, "learning_rate": 9.742949941084604e-06, "loss": 0.4202, "step": 2861 }, { "epoch": 0.1295315682281059, "grad_norm": 0.6886194155596765, "learning_rate": 9.742717915087978e-06, "loss": 0.4113, "step": 2862 }, { "epoch": 0.12957682733650147, "grad_norm": 0.7652278487384526, "learning_rate": 9.742485787184907e-06, "loss": 0.4339, "step": 2863 }, { "epoch": 0.12962208644489703, "grad_norm": 0.67801745151185, "learning_rate": 9.742253557380383e-06, "loss": 0.4259, "step": 2864 }, { "epoch": 0.1296673455532926, "grad_norm": 0.6355102242351469, "learning_rate": 9.742021225679394e-06, "loss": 0.4087, "step": 2865 }, { "epoch": 0.12971260466168816, "grad_norm": 0.7260698872298267, "learning_rate": 9.741788792086934e-06, "loss": 0.502, "step": 2866 }, { "epoch": 0.12975786377008372, "grad_norm": 1.0475091478809628, "learning_rate": 9.741556256607996e-06, "loss": 0.376, "step": 2867 }, { "epoch": 0.12980312287847928, "grad_norm": 0.7174565795305505, "learning_rate": 9.741323619247575e-06, "loss": 0.4064, "step": 2868 }, { "epoch": 0.12984838198687487, "grad_norm": 0.6816845441724139, "learning_rate": 9.741090880010674e-06, "loss": 0.4205, "step": 2869 }, { "epoch": 0.12989364109527043, "grad_norm": 0.7404934530715019, "learning_rate": 9.74085803890229e-06, "loss": 0.469, "step": 2870 }, { "epoch": 0.129938900203666, "grad_norm": 0.7107227611827823, "learning_rate": 9.740625095927428e-06, "loss": 0.4323, "step": 2871 }, { "epoch": 0.12998415931206156, "grad_norm": 0.6435610487654178, "learning_rate": 9.74039205109109e-06, "loss": 0.4506, "step": 2872 }, { "epoch": 0.13002941842045712, "grad_norm": 0.6905701954641099, "learning_rate": 9.740158904398286e-06, "loss": 0.4603, "step": 2873 }, { "epoch": 0.13007467752885268, "grad_norm": 0.6490855543128955, "learning_rate": 9.739925655854028e-06, "loss": 0.3842, "step": 2874 }, { "epoch": 0.13011993663724825, "grad_norm": 0.7628737285578185, "learning_rate": 9.739692305463324e-06, "loss": 0.4419, "step": 2875 }, { "epoch": 0.1301651957456438, "grad_norm": 0.7171768424557958, "learning_rate": 9.739458853231188e-06, "loss": 0.4759, "step": 2876 }, { "epoch": 0.13021045485403937, "grad_norm": 0.6877385757644017, "learning_rate": 9.739225299162638e-06, "loss": 0.4431, "step": 2877 }, { "epoch": 0.13025571396243493, "grad_norm": 0.7360380510178575, "learning_rate": 9.738991643262693e-06, "loss": 0.4724, "step": 2878 }, { "epoch": 0.1303009730708305, "grad_norm": 0.7086556215160046, "learning_rate": 9.738757885536371e-06, "loss": 0.4279, "step": 2879 }, { "epoch": 0.13034623217922606, "grad_norm": 0.7367770757564954, "learning_rate": 9.738524025988696e-06, "loss": 0.4422, "step": 2880 }, { "epoch": 0.13039149128762165, "grad_norm": 0.7325494044645873, "learning_rate": 9.738290064624694e-06, "loss": 0.4302, "step": 2881 }, { "epoch": 0.1304367503960172, "grad_norm": 0.7139844892563704, "learning_rate": 9.73805600144939e-06, "loss": 0.5225, "step": 2882 }, { "epoch": 0.13048200950441277, "grad_norm": 0.7369897055113472, "learning_rate": 9.737821836467816e-06, "loss": 0.3924, "step": 2883 }, { "epoch": 0.13052726861280833, "grad_norm": 0.7422154925009801, "learning_rate": 9.737587569685e-06, "loss": 0.4555, "step": 2884 }, { "epoch": 0.1305725277212039, "grad_norm": 0.6495721284288817, "learning_rate": 9.737353201105978e-06, "loss": 0.3824, "step": 2885 }, { "epoch": 0.13061778682959946, "grad_norm": 0.3972406270595581, "learning_rate": 9.737118730735786e-06, "loss": 0.5372, "step": 2886 }, { "epoch": 0.13066304593799502, "grad_norm": 0.3759637227326978, "learning_rate": 9.73688415857946e-06, "loss": 0.5196, "step": 2887 }, { "epoch": 0.13070830504639058, "grad_norm": 0.38323779671317704, "learning_rate": 9.736649484642044e-06, "loss": 0.5211, "step": 2888 }, { "epoch": 0.13075356415478614, "grad_norm": 0.3246091338161728, "learning_rate": 9.736414708928576e-06, "loss": 0.502, "step": 2889 }, { "epoch": 0.1307988232631817, "grad_norm": 0.3367150725113058, "learning_rate": 9.736179831444103e-06, "loss": 0.5038, "step": 2890 }, { "epoch": 0.13084408237157727, "grad_norm": 1.1456990720264577, "learning_rate": 9.735944852193673e-06, "loss": 0.4215, "step": 2891 }, { "epoch": 0.13088934147997283, "grad_norm": 0.7204268232990344, "learning_rate": 9.735709771182331e-06, "loss": 0.4528, "step": 2892 }, { "epoch": 0.13093460058836842, "grad_norm": 0.8307652317369043, "learning_rate": 9.735474588415132e-06, "loss": 0.4629, "step": 2893 }, { "epoch": 0.13097985969676398, "grad_norm": 1.1668789311608647, "learning_rate": 9.735239303897129e-06, "loss": 0.4201, "step": 2894 }, { "epoch": 0.13102511880515955, "grad_norm": 0.7384757066530285, "learning_rate": 9.735003917633376e-06, "loss": 0.4314, "step": 2895 }, { "epoch": 0.1310703779135551, "grad_norm": 0.6199463148996311, "learning_rate": 9.73476842962893e-06, "loss": 0.5127, "step": 2896 }, { "epoch": 0.13111563702195067, "grad_norm": 1.2150021320725093, "learning_rate": 9.734532839888853e-06, "loss": 0.4345, "step": 2897 }, { "epoch": 0.13116089613034623, "grad_norm": 0.9311562905072975, "learning_rate": 9.734297148418205e-06, "loss": 0.4413, "step": 2898 }, { "epoch": 0.1312061552387418, "grad_norm": 0.7231301939259253, "learning_rate": 9.734061355222054e-06, "loss": 0.4008, "step": 2899 }, { "epoch": 0.13125141434713736, "grad_norm": 0.8095404884163195, "learning_rate": 9.733825460305462e-06, "loss": 0.3956, "step": 2900 }, { "epoch": 0.13129667345553292, "grad_norm": 0.8873271958738239, "learning_rate": 9.7335894636735e-06, "loss": 0.4109, "step": 2901 }, { "epoch": 0.13134193256392848, "grad_norm": 0.8089331693710592, "learning_rate": 9.73335336533124e-06, "loss": 0.4431, "step": 2902 }, { "epoch": 0.13138719167232404, "grad_norm": 0.7109221137009005, "learning_rate": 9.733117165283753e-06, "loss": 0.4636, "step": 2903 }, { "epoch": 0.1314324507807196, "grad_norm": 0.5918246544216307, "learning_rate": 9.732880863536114e-06, "loss": 0.5099, "step": 2904 }, { "epoch": 0.1314777098891152, "grad_norm": 0.8772212127328848, "learning_rate": 9.732644460093402e-06, "loss": 0.4276, "step": 2905 }, { "epoch": 0.13152296899751076, "grad_norm": 0.8671021696476754, "learning_rate": 9.732407954960695e-06, "loss": 0.4327, "step": 2906 }, { "epoch": 0.13156822810590632, "grad_norm": 0.3731968179795825, "learning_rate": 9.732171348143076e-06, "loss": 0.5093, "step": 2907 }, { "epoch": 0.13161348721430188, "grad_norm": 1.5183166337584588, "learning_rate": 9.731934639645628e-06, "loss": 0.3628, "step": 2908 }, { "epoch": 0.13165874632269745, "grad_norm": 0.7402805392392205, "learning_rate": 9.731697829473438e-06, "loss": 0.4497, "step": 2909 }, { "epoch": 0.131704005431093, "grad_norm": 0.4875191976551136, "learning_rate": 9.731460917631594e-06, "loss": 0.5167, "step": 2910 }, { "epoch": 0.13174926453948857, "grad_norm": 0.8975361419222202, "learning_rate": 9.731223904125186e-06, "loss": 0.4501, "step": 2911 }, { "epoch": 0.13179452364788413, "grad_norm": 0.7226982167131014, "learning_rate": 9.730986788959308e-06, "loss": 0.4582, "step": 2912 }, { "epoch": 0.1318397827562797, "grad_norm": 0.7359214134878374, "learning_rate": 9.730749572139054e-06, "loss": 0.3826, "step": 2913 }, { "epoch": 0.13188504186467526, "grad_norm": 0.6907853883793874, "learning_rate": 9.730512253669523e-06, "loss": 0.4567, "step": 2914 }, { "epoch": 0.13193030097307082, "grad_norm": 0.7620674928519404, "learning_rate": 9.730274833555809e-06, "loss": 0.4673, "step": 2915 }, { "epoch": 0.1319755600814664, "grad_norm": 0.4297711923387903, "learning_rate": 9.730037311803017e-06, "loss": 0.4913, "step": 2916 }, { "epoch": 0.13202081918986197, "grad_norm": 0.3635336031936697, "learning_rate": 9.72979968841625e-06, "loss": 0.5311, "step": 2917 }, { "epoch": 0.13206607829825753, "grad_norm": 0.31921531659711294, "learning_rate": 9.729561963400616e-06, "loss": 0.5029, "step": 2918 }, { "epoch": 0.1321113374066531, "grad_norm": 0.8854230801627538, "learning_rate": 9.72932413676122e-06, "loss": 0.4328, "step": 2919 }, { "epoch": 0.13215659651504866, "grad_norm": 0.7914974220186524, "learning_rate": 9.729086208503174e-06, "loss": 0.4549, "step": 2920 }, { "epoch": 0.13220185562344422, "grad_norm": 0.6883760552004766, "learning_rate": 9.728848178631588e-06, "loss": 0.4069, "step": 2921 }, { "epoch": 0.13224711473183978, "grad_norm": 1.0327106749885167, "learning_rate": 9.72861004715158e-06, "loss": 0.402, "step": 2922 }, { "epoch": 0.13229237384023534, "grad_norm": 0.7813827826117568, "learning_rate": 9.728371814068265e-06, "loss": 0.3908, "step": 2923 }, { "epoch": 0.1323376329486309, "grad_norm": 0.6625608201070756, "learning_rate": 9.728133479386763e-06, "loss": 0.4126, "step": 2924 }, { "epoch": 0.13238289205702647, "grad_norm": 0.8204268002708216, "learning_rate": 9.727895043112192e-06, "loss": 0.3966, "step": 2925 }, { "epoch": 0.13242815116542203, "grad_norm": 0.7096880863141186, "learning_rate": 9.727656505249676e-06, "loss": 0.5155, "step": 2926 }, { "epoch": 0.1324734102738176, "grad_norm": 0.8209178150784526, "learning_rate": 9.727417865804343e-06, "loss": 0.4203, "step": 2927 }, { "epoch": 0.13251866938221318, "grad_norm": 1.320863933337403, "learning_rate": 9.72717912478132e-06, "loss": 0.4437, "step": 2928 }, { "epoch": 0.13256392849060875, "grad_norm": 0.6550034192890875, "learning_rate": 9.726940282185734e-06, "loss": 0.4092, "step": 2929 }, { "epoch": 0.1326091875990043, "grad_norm": 0.7084129783400551, "learning_rate": 9.726701338022722e-06, "loss": 0.4351, "step": 2930 }, { "epoch": 0.13265444670739987, "grad_norm": 0.693353211799939, "learning_rate": 9.726462292297411e-06, "loss": 0.3808, "step": 2931 }, { "epoch": 0.13269970581579543, "grad_norm": 0.7701332472199786, "learning_rate": 9.726223145014946e-06, "loss": 0.4215, "step": 2932 }, { "epoch": 0.132744964924191, "grad_norm": 0.7094866856303067, "learning_rate": 9.725983896180458e-06, "loss": 0.4252, "step": 2933 }, { "epoch": 0.13279022403258656, "grad_norm": 0.6232236818498672, "learning_rate": 9.725744545799093e-06, "loss": 0.5054, "step": 2934 }, { "epoch": 0.13283548314098212, "grad_norm": 0.9707367889864059, "learning_rate": 9.72550509387599e-06, "loss": 0.3881, "step": 2935 }, { "epoch": 0.13288074224937768, "grad_norm": 0.7360273696473658, "learning_rate": 9.725265540416296e-06, "loss": 0.4109, "step": 2936 }, { "epoch": 0.13292600135777324, "grad_norm": 0.7238757945473541, "learning_rate": 9.725025885425159e-06, "loss": 0.4553, "step": 2937 }, { "epoch": 0.1329712604661688, "grad_norm": 0.471024966139833, "learning_rate": 9.724786128907726e-06, "loss": 0.5154, "step": 2938 }, { "epoch": 0.13301651957456437, "grad_norm": 0.7205417842353122, "learning_rate": 9.724546270869152e-06, "loss": 0.4465, "step": 2939 }, { "epoch": 0.13306177868295996, "grad_norm": 0.40647518704260016, "learning_rate": 9.724306311314589e-06, "loss": 0.5377, "step": 2940 }, { "epoch": 0.13310703779135552, "grad_norm": 0.35852848738255716, "learning_rate": 9.724066250249192e-06, "loss": 0.5163, "step": 2941 }, { "epoch": 0.13315229689975108, "grad_norm": 0.9552604293041855, "learning_rate": 9.72382608767812e-06, "loss": 0.4381, "step": 2942 }, { "epoch": 0.13319755600814664, "grad_norm": 0.7275710516886414, "learning_rate": 9.723585823606533e-06, "loss": 0.4132, "step": 2943 }, { "epoch": 0.1332428151165422, "grad_norm": 0.5386892240679365, "learning_rate": 9.723345458039595e-06, "loss": 0.5094, "step": 2944 }, { "epoch": 0.13328807422493777, "grad_norm": 0.9176692907509812, "learning_rate": 9.723104990982469e-06, "loss": 0.4535, "step": 2945 }, { "epoch": 0.13333333333333333, "grad_norm": 0.7598831526931533, "learning_rate": 9.722864422440323e-06, "loss": 0.4384, "step": 2946 }, { "epoch": 0.1333785924417289, "grad_norm": 0.6886217263953099, "learning_rate": 9.722623752418327e-06, "loss": 0.4696, "step": 2947 }, { "epoch": 0.13342385155012446, "grad_norm": 0.7236203182922575, "learning_rate": 9.722382980921649e-06, "loss": 0.4127, "step": 2948 }, { "epoch": 0.13346911065852002, "grad_norm": 0.7866624324006298, "learning_rate": 9.722142107955465e-06, "loss": 0.4252, "step": 2949 }, { "epoch": 0.13351436976691558, "grad_norm": 0.7674053504076765, "learning_rate": 9.721901133524951e-06, "loss": 0.427, "step": 2950 }, { "epoch": 0.13355962887531114, "grad_norm": 0.7596085549378919, "learning_rate": 9.721660057635284e-06, "loss": 0.4098, "step": 2951 }, { "epoch": 0.13360488798370673, "grad_norm": 0.7883147091388137, "learning_rate": 9.721418880291642e-06, "loss": 0.3913, "step": 2952 }, { "epoch": 0.1336501470921023, "grad_norm": 0.7018500754706035, "learning_rate": 9.72117760149921e-06, "loss": 0.384, "step": 2953 }, { "epoch": 0.13369540620049786, "grad_norm": 0.6791992191941137, "learning_rate": 9.720936221263174e-06, "loss": 0.4403, "step": 2954 }, { "epoch": 0.13374066530889342, "grad_norm": 0.7247397511156864, "learning_rate": 9.720694739588714e-06, "loss": 0.4036, "step": 2955 }, { "epoch": 0.13378592441728898, "grad_norm": 0.7815257711538628, "learning_rate": 9.720453156481023e-06, "loss": 0.4059, "step": 2956 }, { "epoch": 0.13383118352568454, "grad_norm": 0.7438749116316435, "learning_rate": 9.720211471945293e-06, "loss": 0.4463, "step": 2957 }, { "epoch": 0.1338764426340801, "grad_norm": 0.7237329330053868, "learning_rate": 9.719969685986714e-06, "loss": 0.4146, "step": 2958 }, { "epoch": 0.13392170174247567, "grad_norm": 0.9573335901450759, "learning_rate": 9.719727798610483e-06, "loss": 0.442, "step": 2959 }, { "epoch": 0.13396696085087123, "grad_norm": 0.9656716511929738, "learning_rate": 9.719485809821799e-06, "loss": 0.4202, "step": 2960 }, { "epoch": 0.1340122199592668, "grad_norm": 0.8338597559468536, "learning_rate": 9.719243719625857e-06, "loss": 0.3783, "step": 2961 }, { "epoch": 0.13405747906766236, "grad_norm": 0.6863750264693458, "learning_rate": 9.719001528027863e-06, "loss": 0.5274, "step": 2962 }, { "epoch": 0.13410273817605795, "grad_norm": 0.7985367660393604, "learning_rate": 9.71875923503302e-06, "loss": 0.4137, "step": 2963 }, { "epoch": 0.1341479972844535, "grad_norm": 0.9118946431924335, "learning_rate": 9.718516840646533e-06, "loss": 0.4091, "step": 2964 }, { "epoch": 0.13419325639284907, "grad_norm": 0.7934119150445078, "learning_rate": 9.71827434487361e-06, "loss": 0.4263, "step": 2965 }, { "epoch": 0.13423851550124463, "grad_norm": 0.7735298280481725, "learning_rate": 9.718031747719465e-06, "loss": 0.4382, "step": 2966 }, { "epoch": 0.1342837746096402, "grad_norm": 0.9513330875037518, "learning_rate": 9.717789049189306e-06, "loss": 0.4435, "step": 2967 }, { "epoch": 0.13432903371803576, "grad_norm": 0.6909982648821564, "learning_rate": 9.71754624928835e-06, "loss": 0.4049, "step": 2968 }, { "epoch": 0.13437429282643132, "grad_norm": 0.8679915662733991, "learning_rate": 9.717303348021814e-06, "loss": 0.395, "step": 2969 }, { "epoch": 0.13441955193482688, "grad_norm": 0.8847599037454432, "learning_rate": 9.717060345394917e-06, "loss": 0.4044, "step": 2970 }, { "epoch": 0.13446481104322244, "grad_norm": 0.7118812380117908, "learning_rate": 9.716817241412882e-06, "loss": 0.3957, "step": 2971 }, { "epoch": 0.134510070151618, "grad_norm": 0.6306224960599156, "learning_rate": 9.71657403608093e-06, "loss": 0.5091, "step": 2972 }, { "epoch": 0.13455532926001357, "grad_norm": 0.7622301601596669, "learning_rate": 9.716330729404287e-06, "loss": 0.4114, "step": 2973 }, { "epoch": 0.13460058836840913, "grad_norm": 0.7484186601210657, "learning_rate": 9.716087321388184e-06, "loss": 0.3871, "step": 2974 }, { "epoch": 0.13464584747680472, "grad_norm": 0.7625419668353308, "learning_rate": 9.715843812037846e-06, "loss": 0.4587, "step": 2975 }, { "epoch": 0.13469110658520028, "grad_norm": 0.35951379933644734, "learning_rate": 9.71560020135851e-06, "loss": 0.4977, "step": 2976 }, { "epoch": 0.13473636569359584, "grad_norm": 0.3442069853028463, "learning_rate": 9.715356489355408e-06, "loss": 0.5211, "step": 2977 }, { "epoch": 0.1347816248019914, "grad_norm": 0.7327349225883127, "learning_rate": 9.715112676033777e-06, "loss": 0.3668, "step": 2978 }, { "epoch": 0.13482688391038697, "grad_norm": 0.6963321159595294, "learning_rate": 9.714868761398856e-06, "loss": 0.4397, "step": 2979 }, { "epoch": 0.13487214301878253, "grad_norm": 0.7512751684699605, "learning_rate": 9.714624745455885e-06, "loss": 0.4309, "step": 2980 }, { "epoch": 0.1349174021271781, "grad_norm": 0.6640385044151759, "learning_rate": 9.71438062821011e-06, "loss": 0.4231, "step": 2981 }, { "epoch": 0.13496266123557366, "grad_norm": 0.7347924559093464, "learning_rate": 9.714136409666773e-06, "loss": 0.3837, "step": 2982 }, { "epoch": 0.13500792034396922, "grad_norm": 0.7153839496940779, "learning_rate": 9.713892089831122e-06, "loss": 0.3813, "step": 2983 }, { "epoch": 0.13505317945236478, "grad_norm": 0.6314896443694339, "learning_rate": 9.71364766870841e-06, "loss": 0.5242, "step": 2984 }, { "epoch": 0.13509843856076034, "grad_norm": 0.7938527436789027, "learning_rate": 9.713403146303885e-06, "loss": 0.3912, "step": 2985 }, { "epoch": 0.1351436976691559, "grad_norm": 0.7578285470729604, "learning_rate": 9.713158522622804e-06, "loss": 0.4341, "step": 2986 }, { "epoch": 0.1351889567775515, "grad_norm": 0.7256290252232681, "learning_rate": 9.71291379767042e-06, "loss": 0.4769, "step": 2987 }, { "epoch": 0.13523421588594706, "grad_norm": 0.705979909773582, "learning_rate": 9.712668971451996e-06, "loss": 0.4145, "step": 2988 }, { "epoch": 0.13527947499434262, "grad_norm": 0.7603561663218684, "learning_rate": 9.712424043972786e-06, "loss": 0.4247, "step": 2989 }, { "epoch": 0.13532473410273818, "grad_norm": 0.7209131814522719, "learning_rate": 9.712179015238058e-06, "loss": 0.4629, "step": 2990 }, { "epoch": 0.13536999321113374, "grad_norm": 0.685193750110945, "learning_rate": 9.711933885253076e-06, "loss": 0.443, "step": 2991 }, { "epoch": 0.1354152523195293, "grad_norm": 0.6841066749635569, "learning_rate": 9.711688654023105e-06, "loss": 0.4122, "step": 2992 }, { "epoch": 0.13546051142792487, "grad_norm": 0.5869559059563596, "learning_rate": 9.711443321553415e-06, "loss": 0.4883, "step": 2993 }, { "epoch": 0.13550577053632043, "grad_norm": 0.7553318168260519, "learning_rate": 9.71119788784928e-06, "loss": 0.4483, "step": 2994 }, { "epoch": 0.135551029644716, "grad_norm": 0.7287364896534111, "learning_rate": 9.71095235291597e-06, "loss": 0.4336, "step": 2995 }, { "epoch": 0.13559628875311155, "grad_norm": 0.7214137149091241, "learning_rate": 9.710706716758765e-06, "loss": 0.4295, "step": 2996 }, { "epoch": 0.13564154786150712, "grad_norm": 0.7463034267148326, "learning_rate": 9.710460979382938e-06, "loss": 0.411, "step": 2997 }, { "epoch": 0.13568680696990268, "grad_norm": 0.7686412440469463, "learning_rate": 9.710215140793774e-06, "loss": 0.3998, "step": 2998 }, { "epoch": 0.13573206607829827, "grad_norm": 0.9299126331208909, "learning_rate": 9.709969200996551e-06, "loss": 0.3754, "step": 2999 }, { "epoch": 0.13577732518669383, "grad_norm": 0.7395976891382515, "learning_rate": 9.709723159996556e-06, "loss": 0.443, "step": 3000 }, { "epoch": 0.1358225842950894, "grad_norm": 0.4746397495528787, "learning_rate": 9.709477017799076e-06, "loss": 0.5077, "step": 3001 }, { "epoch": 0.13586784340348496, "grad_norm": 0.7794278734782368, "learning_rate": 9.709230774409397e-06, "loss": 0.4328, "step": 3002 }, { "epoch": 0.13591310251188052, "grad_norm": 0.649400693345909, "learning_rate": 9.708984429832815e-06, "loss": 0.3932, "step": 3003 }, { "epoch": 0.13595836162027608, "grad_norm": 0.7243280492242261, "learning_rate": 9.708737984074616e-06, "loss": 0.425, "step": 3004 }, { "epoch": 0.13600362072867164, "grad_norm": 0.7567664983915044, "learning_rate": 9.708491437140103e-06, "loss": 0.4414, "step": 3005 }, { "epoch": 0.1360488798370672, "grad_norm": 0.7045563573152411, "learning_rate": 9.708244789034568e-06, "loss": 0.438, "step": 3006 }, { "epoch": 0.13609413894546277, "grad_norm": 0.6520952845285576, "learning_rate": 9.707998039763315e-06, "loss": 0.4046, "step": 3007 }, { "epoch": 0.13613939805385833, "grad_norm": 0.521269484352832, "learning_rate": 9.707751189331642e-06, "loss": 0.5201, "step": 3008 }, { "epoch": 0.1361846571622539, "grad_norm": 0.7179054327234664, "learning_rate": 9.707504237744854e-06, "loss": 0.4759, "step": 3009 }, { "epoch": 0.13622991627064948, "grad_norm": 0.6624657232936156, "learning_rate": 9.707257185008259e-06, "loss": 0.4593, "step": 3010 }, { "epoch": 0.13627517537904504, "grad_norm": 0.9881809471220743, "learning_rate": 9.707010031127164e-06, "loss": 0.4301, "step": 3011 }, { "epoch": 0.1363204344874406, "grad_norm": 0.7301209156664, "learning_rate": 9.70676277610688e-06, "loss": 0.4417, "step": 3012 }, { "epoch": 0.13636569359583617, "grad_norm": 0.9904205272743063, "learning_rate": 9.70651541995272e-06, "loss": 0.4054, "step": 3013 }, { "epoch": 0.13641095270423173, "grad_norm": 0.470084247855341, "learning_rate": 9.706267962669999e-06, "loss": 0.5211, "step": 3014 }, { "epoch": 0.1364562118126273, "grad_norm": 0.7041348282498282, "learning_rate": 9.706020404264033e-06, "loss": 0.4135, "step": 3015 }, { "epoch": 0.13650147092102285, "grad_norm": 0.6653132246514796, "learning_rate": 9.705772744740142e-06, "loss": 0.4016, "step": 3016 }, { "epoch": 0.13654673002941842, "grad_norm": 0.6466986946363746, "learning_rate": 9.705524984103647e-06, "loss": 0.4183, "step": 3017 }, { "epoch": 0.13659198913781398, "grad_norm": 0.6928442879758144, "learning_rate": 9.705277122359871e-06, "loss": 0.4253, "step": 3018 }, { "epoch": 0.13663724824620954, "grad_norm": 0.6640542362916704, "learning_rate": 9.705029159514143e-06, "loss": 0.4113, "step": 3019 }, { "epoch": 0.1366825073546051, "grad_norm": 0.6814633476218547, "learning_rate": 9.704781095571788e-06, "loss": 0.4088, "step": 3020 }, { "epoch": 0.13672776646300067, "grad_norm": 0.706195642860411, "learning_rate": 9.704532930538137e-06, "loss": 0.3909, "step": 3021 }, { "epoch": 0.13677302557139626, "grad_norm": 0.8893079768742019, "learning_rate": 9.704284664418521e-06, "loss": 0.397, "step": 3022 }, { "epoch": 0.13681828467979182, "grad_norm": 0.4988443481670754, "learning_rate": 9.704036297218278e-06, "loss": 0.4932, "step": 3023 }, { "epoch": 0.13686354378818738, "grad_norm": 0.3946010923966299, "learning_rate": 9.70378782894274e-06, "loss": 0.5055, "step": 3024 }, { "epoch": 0.13690880289658294, "grad_norm": 0.7658283547259195, "learning_rate": 9.70353925959725e-06, "loss": 0.438, "step": 3025 }, { "epoch": 0.1369540620049785, "grad_norm": 0.7081223389869151, "learning_rate": 9.703290589187146e-06, "loss": 0.3792, "step": 3026 }, { "epoch": 0.13699932111337407, "grad_norm": 0.7429936786885113, "learning_rate": 9.703041817717773e-06, "loss": 0.4005, "step": 3027 }, { "epoch": 0.13704458022176963, "grad_norm": 0.9040521372243512, "learning_rate": 9.702792945194475e-06, "loss": 0.4689, "step": 3028 }, { "epoch": 0.1370898393301652, "grad_norm": 0.7365739004941558, "learning_rate": 9.7025439716226e-06, "loss": 0.4277, "step": 3029 }, { "epoch": 0.13713509843856075, "grad_norm": 0.6711554139649331, "learning_rate": 9.702294897007499e-06, "loss": 0.3984, "step": 3030 }, { "epoch": 0.13718035754695632, "grad_norm": 0.7033004233347675, "learning_rate": 9.702045721354521e-06, "loss": 0.5139, "step": 3031 }, { "epoch": 0.13722561665535188, "grad_norm": 0.9480372201213745, "learning_rate": 9.701796444669022e-06, "loss": 0.4476, "step": 3032 }, { "epoch": 0.13727087576374744, "grad_norm": 0.7158231088813505, "learning_rate": 9.701547066956359e-06, "loss": 0.4125, "step": 3033 }, { "epoch": 0.13731613487214303, "grad_norm": 0.7652456510075898, "learning_rate": 9.701297588221888e-06, "loss": 0.3784, "step": 3034 }, { "epoch": 0.1373613939805386, "grad_norm": 0.9602938075922123, "learning_rate": 9.701048008470972e-06, "loss": 0.4084, "step": 3035 }, { "epoch": 0.13740665308893416, "grad_norm": 0.7415008906843575, "learning_rate": 9.700798327708972e-06, "loss": 0.423, "step": 3036 }, { "epoch": 0.13745191219732972, "grad_norm": 0.6903867395699205, "learning_rate": 9.700548545941253e-06, "loss": 0.43, "step": 3037 }, { "epoch": 0.13749717130572528, "grad_norm": 0.6979826967977066, "learning_rate": 9.700298663173183e-06, "loss": 0.4955, "step": 3038 }, { "epoch": 0.13754243041412084, "grad_norm": 0.7044161732381952, "learning_rate": 9.70004867941013e-06, "loss": 0.421, "step": 3039 }, { "epoch": 0.1375876895225164, "grad_norm": 0.37286787245742103, "learning_rate": 9.699798594657464e-06, "loss": 0.5152, "step": 3040 }, { "epoch": 0.13763294863091197, "grad_norm": 0.9123876179707686, "learning_rate": 9.699548408920563e-06, "loss": 0.445, "step": 3041 }, { "epoch": 0.13767820773930753, "grad_norm": 0.6151008314597627, "learning_rate": 9.6992981222048e-06, "loss": 0.5323, "step": 3042 }, { "epoch": 0.1377234668477031, "grad_norm": 0.8045341700582606, "learning_rate": 9.699047734515554e-06, "loss": 0.4556, "step": 3043 }, { "epoch": 0.13776872595609865, "grad_norm": 0.8138702592007475, "learning_rate": 9.698797245858202e-06, "loss": 0.443, "step": 3044 }, { "epoch": 0.13781398506449424, "grad_norm": 0.5004662711445704, "learning_rate": 9.69854665623813e-06, "loss": 0.4995, "step": 3045 }, { "epoch": 0.1378592441728898, "grad_norm": 0.4342575012009011, "learning_rate": 9.698295965660721e-06, "loss": 0.5132, "step": 3046 }, { "epoch": 0.13790450328128537, "grad_norm": 0.8695634845187036, "learning_rate": 9.69804517413136e-06, "loss": 0.389, "step": 3047 }, { "epoch": 0.13794976238968093, "grad_norm": 0.7739147195274749, "learning_rate": 9.697794281655439e-06, "loss": 0.4401, "step": 3048 }, { "epoch": 0.1379950214980765, "grad_norm": 0.7119861534065948, "learning_rate": 9.697543288238345e-06, "loss": 0.3981, "step": 3049 }, { "epoch": 0.13804028060647205, "grad_norm": 0.6663963626978507, "learning_rate": 9.697292193885475e-06, "loss": 0.4072, "step": 3050 }, { "epoch": 0.13808553971486762, "grad_norm": 0.781265903071915, "learning_rate": 9.69704099860222e-06, "loss": 0.3964, "step": 3051 }, { "epoch": 0.13813079882326318, "grad_norm": 1.2176129605873554, "learning_rate": 9.696789702393982e-06, "loss": 0.4457, "step": 3052 }, { "epoch": 0.13817605793165874, "grad_norm": 0.9365927829037233, "learning_rate": 9.69653830526616e-06, "loss": 0.5298, "step": 3053 }, { "epoch": 0.1382213170400543, "grad_norm": 0.5873324580483902, "learning_rate": 9.696286807224151e-06, "loss": 0.4898, "step": 3054 }, { "epoch": 0.13826657614844987, "grad_norm": 0.7170843817829563, "learning_rate": 9.696035208273363e-06, "loss": 0.4818, "step": 3055 }, { "epoch": 0.13831183525684543, "grad_norm": 0.7445503826919057, "learning_rate": 9.6957835084192e-06, "loss": 0.4018, "step": 3056 }, { "epoch": 0.13835709436524102, "grad_norm": 0.6868032484108987, "learning_rate": 9.695531707667073e-06, "loss": 0.4229, "step": 3057 }, { "epoch": 0.13840235347363658, "grad_norm": 0.7126566217183055, "learning_rate": 9.695279806022391e-06, "loss": 0.4223, "step": 3058 }, { "epoch": 0.13844761258203214, "grad_norm": 1.6047999058425257, "learning_rate": 9.695027803490565e-06, "loss": 0.5042, "step": 3059 }, { "epoch": 0.1384928716904277, "grad_norm": 1.1171172770190672, "learning_rate": 9.694775700077013e-06, "loss": 0.5175, "step": 3060 }, { "epoch": 0.13853813079882327, "grad_norm": 0.9032744039406752, "learning_rate": 9.694523495787149e-06, "loss": 0.3975, "step": 3061 }, { "epoch": 0.13858338990721883, "grad_norm": 0.7173000975750978, "learning_rate": 9.694271190626393e-06, "loss": 0.4484, "step": 3062 }, { "epoch": 0.1386286490156144, "grad_norm": 1.0967957259191934, "learning_rate": 9.694018784600166e-06, "loss": 0.5279, "step": 3063 }, { "epoch": 0.13867390812400995, "grad_norm": 1.1085504083237756, "learning_rate": 9.693766277713893e-06, "loss": 0.5089, "step": 3064 }, { "epoch": 0.13871916723240552, "grad_norm": 1.0817725333092245, "learning_rate": 9.693513669972999e-06, "loss": 0.4073, "step": 3065 }, { "epoch": 0.13876442634080108, "grad_norm": 0.7458369430458078, "learning_rate": 9.69326096138291e-06, "loss": 0.382, "step": 3066 }, { "epoch": 0.13880968544919664, "grad_norm": 0.6444122363483363, "learning_rate": 9.693008151949058e-06, "loss": 0.531, "step": 3067 }, { "epoch": 0.1388549445575922, "grad_norm": 0.8259422450902474, "learning_rate": 9.692755241676874e-06, "loss": 0.4223, "step": 3068 }, { "epoch": 0.1389002036659878, "grad_norm": 0.8397951737371382, "learning_rate": 9.692502230571792e-06, "loss": 0.4282, "step": 3069 }, { "epoch": 0.13894546277438335, "grad_norm": 0.7945570794817268, "learning_rate": 9.69224911863925e-06, "loss": 0.3981, "step": 3070 }, { "epoch": 0.13899072188277892, "grad_norm": 0.7121357050394432, "learning_rate": 9.691995905884684e-06, "loss": 0.4371, "step": 3071 }, { "epoch": 0.13903598099117448, "grad_norm": 0.7504348353548871, "learning_rate": 9.691742592313537e-06, "loss": 0.4514, "step": 3072 }, { "epoch": 0.13908124009957004, "grad_norm": 0.8473511032416616, "learning_rate": 9.691489177931253e-06, "loss": 0.419, "step": 3073 }, { "epoch": 0.1391264992079656, "grad_norm": 0.7824499897745099, "learning_rate": 9.691235662743273e-06, "loss": 0.4036, "step": 3074 }, { "epoch": 0.13917175831636117, "grad_norm": 0.6989322866124807, "learning_rate": 9.690982046755048e-06, "loss": 0.4274, "step": 3075 }, { "epoch": 0.13921701742475673, "grad_norm": 0.6853412184971148, "learning_rate": 9.690728329972025e-06, "loss": 0.4167, "step": 3076 }, { "epoch": 0.1392622765331523, "grad_norm": 1.4691170124006308, "learning_rate": 9.690474512399658e-06, "loss": 0.5224, "step": 3077 }, { "epoch": 0.13930753564154785, "grad_norm": 0.8608378530218217, "learning_rate": 9.690220594043399e-06, "loss": 0.4327, "step": 3078 }, { "epoch": 0.13935279474994341, "grad_norm": 0.8612975549057106, "learning_rate": 9.689966574908704e-06, "loss": 0.4128, "step": 3079 }, { "epoch": 0.13939805385833898, "grad_norm": 1.1479647773489714, "learning_rate": 9.689712455001031e-06, "loss": 0.3769, "step": 3080 }, { "epoch": 0.13944331296673457, "grad_norm": 0.7404215174065047, "learning_rate": 9.689458234325842e-06, "loss": 0.4226, "step": 3081 }, { "epoch": 0.13948857207513013, "grad_norm": 0.7714942920613758, "learning_rate": 9.689203912888597e-06, "loss": 0.5254, "step": 3082 }, { "epoch": 0.1395338311835257, "grad_norm": 0.8908715997971788, "learning_rate": 9.688949490694762e-06, "loss": 0.3953, "step": 3083 }, { "epoch": 0.13957909029192125, "grad_norm": 0.8204552348832636, "learning_rate": 9.688694967749804e-06, "loss": 0.4468, "step": 3084 }, { "epoch": 0.13962434940031682, "grad_norm": 0.7103680809500149, "learning_rate": 9.68844034405919e-06, "loss": 0.4333, "step": 3085 }, { "epoch": 0.13966960850871238, "grad_norm": 0.6542833279183922, "learning_rate": 9.688185619628395e-06, "loss": 0.5296, "step": 3086 }, { "epoch": 0.13971486761710794, "grad_norm": 0.7607580376314098, "learning_rate": 9.687930794462887e-06, "loss": 0.4291, "step": 3087 }, { "epoch": 0.1397601267255035, "grad_norm": 0.731342029185371, "learning_rate": 9.687675868568145e-06, "loss": 0.426, "step": 3088 }, { "epoch": 0.13980538583389907, "grad_norm": 0.729612946517145, "learning_rate": 9.687420841949646e-06, "loss": 0.4535, "step": 3089 }, { "epoch": 0.13985064494229463, "grad_norm": 0.4080284721261495, "learning_rate": 9.68716571461287e-06, "loss": 0.5057, "step": 3090 }, { "epoch": 0.1398959040506902, "grad_norm": 0.6791576858736899, "learning_rate": 9.686910486563297e-06, "loss": 0.431, "step": 3091 }, { "epoch": 0.13994116315908578, "grad_norm": 0.6886144579501806, "learning_rate": 9.686655157806412e-06, "loss": 0.4299, "step": 3092 }, { "epoch": 0.13998642226748134, "grad_norm": 0.717296049557901, "learning_rate": 9.686399728347704e-06, "loss": 0.418, "step": 3093 }, { "epoch": 0.1400316813758769, "grad_norm": 0.4554757341055788, "learning_rate": 9.686144198192658e-06, "loss": 0.5322, "step": 3094 }, { "epoch": 0.14007694048427247, "grad_norm": 0.8239705370084248, "learning_rate": 9.685888567346765e-06, "loss": 0.4378, "step": 3095 }, { "epoch": 0.14012219959266803, "grad_norm": 0.7019855219895466, "learning_rate": 9.685632835815519e-06, "loss": 0.3947, "step": 3096 }, { "epoch": 0.1401674587010636, "grad_norm": 0.6641989821498935, "learning_rate": 9.685377003604412e-06, "loss": 0.4222, "step": 3097 }, { "epoch": 0.14021271780945915, "grad_norm": 0.7060878628909356, "learning_rate": 9.685121070718946e-06, "loss": 0.418, "step": 3098 }, { "epoch": 0.14025797691785472, "grad_norm": 0.7055236199159629, "learning_rate": 9.684865037164616e-06, "loss": 0.4459, "step": 3099 }, { "epoch": 0.14030323602625028, "grad_norm": 0.7136009838012457, "learning_rate": 9.684608902946926e-06, "loss": 0.3899, "step": 3100 }, { "epoch": 0.14034849513464584, "grad_norm": 0.7280440062086467, "learning_rate": 9.684352668071378e-06, "loss": 0.417, "step": 3101 }, { "epoch": 0.1403937542430414, "grad_norm": 0.7027541777851652, "learning_rate": 9.684096332543477e-06, "loss": 0.415, "step": 3102 }, { "epoch": 0.14043901335143696, "grad_norm": 0.686899495440445, "learning_rate": 9.683839896368732e-06, "loss": 0.4969, "step": 3103 }, { "epoch": 0.14048427245983255, "grad_norm": 0.8245957668931218, "learning_rate": 9.683583359552654e-06, "loss": 0.4609, "step": 3104 }, { "epoch": 0.14052953156822812, "grad_norm": 0.7556686305076334, "learning_rate": 9.683326722100753e-06, "loss": 0.4388, "step": 3105 }, { "epoch": 0.14057479067662368, "grad_norm": 0.5379909049373236, "learning_rate": 9.683069984018545e-06, "loss": 0.4911, "step": 3106 }, { "epoch": 0.14062004978501924, "grad_norm": 0.8009003920741268, "learning_rate": 9.682813145311547e-06, "loss": 0.4007, "step": 3107 }, { "epoch": 0.1406653088934148, "grad_norm": 0.7335679247958816, "learning_rate": 9.682556205985274e-06, "loss": 0.4309, "step": 3108 }, { "epoch": 0.14071056800181037, "grad_norm": 0.730711292524861, "learning_rate": 9.682299166045252e-06, "loss": 0.4242, "step": 3109 }, { "epoch": 0.14075582711020593, "grad_norm": 0.7004543383866226, "learning_rate": 9.682042025497001e-06, "loss": 0.3755, "step": 3110 }, { "epoch": 0.1408010862186015, "grad_norm": 1.519595889804459, "learning_rate": 9.681784784346047e-06, "loss": 0.4541, "step": 3111 }, { "epoch": 0.14084634532699705, "grad_norm": 0.661638334038799, "learning_rate": 9.681527442597916e-06, "loss": 0.3909, "step": 3112 }, { "epoch": 0.14089160443539261, "grad_norm": 0.6977427321641013, "learning_rate": 9.681270000258138e-06, "loss": 0.447, "step": 3113 }, { "epoch": 0.14093686354378818, "grad_norm": 0.7069246418810469, "learning_rate": 9.681012457332247e-06, "loss": 0.3864, "step": 3114 }, { "epoch": 0.14098212265218374, "grad_norm": 0.7266791455837109, "learning_rate": 9.680754813825774e-06, "loss": 0.4387, "step": 3115 }, { "epoch": 0.14102738176057933, "grad_norm": 0.7571786820538708, "learning_rate": 9.680497069744254e-06, "loss": 0.4425, "step": 3116 }, { "epoch": 0.1410726408689749, "grad_norm": 0.7102976525155017, "learning_rate": 9.68023922509323e-06, "loss": 0.439, "step": 3117 }, { "epoch": 0.14111789997737045, "grad_norm": 0.6973062459504656, "learning_rate": 9.67998127987824e-06, "loss": 0.4311, "step": 3118 }, { "epoch": 0.14116315908576602, "grad_norm": 0.6736480913668162, "learning_rate": 9.679723234104822e-06, "loss": 0.4194, "step": 3119 }, { "epoch": 0.14120841819416158, "grad_norm": 0.6712826154773708, "learning_rate": 9.679465087778526e-06, "loss": 0.3818, "step": 3120 }, { "epoch": 0.14125367730255714, "grad_norm": 0.7766965611884528, "learning_rate": 9.679206840904898e-06, "loss": 0.4403, "step": 3121 }, { "epoch": 0.1412989364109527, "grad_norm": 0.7349713706275844, "learning_rate": 9.678948493489485e-06, "loss": 0.4386, "step": 3122 }, { "epoch": 0.14134419551934826, "grad_norm": 0.6508673734652729, "learning_rate": 9.67869004553784e-06, "loss": 0.4208, "step": 3123 }, { "epoch": 0.14138945462774383, "grad_norm": 0.728103346866895, "learning_rate": 9.678431497055515e-06, "loss": 0.5208, "step": 3124 }, { "epoch": 0.1414347137361394, "grad_norm": 0.45853081090677567, "learning_rate": 9.678172848048067e-06, "loss": 0.5069, "step": 3125 }, { "epoch": 0.14147997284453495, "grad_norm": 0.804412565112675, "learning_rate": 9.677914098521051e-06, "loss": 0.3767, "step": 3126 }, { "epoch": 0.1415252319529305, "grad_norm": 0.6728901882341966, "learning_rate": 9.677655248480026e-06, "loss": 0.42, "step": 3127 }, { "epoch": 0.1415704910613261, "grad_norm": 0.7435907240723189, "learning_rate": 9.67739629793056e-06, "loss": 0.3903, "step": 3128 }, { "epoch": 0.14161575016972167, "grad_norm": 0.7145965926168072, "learning_rate": 9.677137246878212e-06, "loss": 0.4231, "step": 3129 }, { "epoch": 0.14166100927811723, "grad_norm": 0.6718267845844599, "learning_rate": 9.676878095328547e-06, "loss": 0.4187, "step": 3130 }, { "epoch": 0.1417062683865128, "grad_norm": 0.7073341669717274, "learning_rate": 9.67661884328714e-06, "loss": 0.4173, "step": 3131 }, { "epoch": 0.14175152749490835, "grad_norm": 0.7355489401608707, "learning_rate": 9.676359490759554e-06, "loss": 0.4201, "step": 3132 }, { "epoch": 0.14179678660330391, "grad_norm": 1.6407994213832153, "learning_rate": 9.676100037751366e-06, "loss": 0.5338, "step": 3133 }, { "epoch": 0.14184204571169948, "grad_norm": 0.7249211925149227, "learning_rate": 9.675840484268149e-06, "loss": 0.4435, "step": 3134 }, { "epoch": 0.14188730482009504, "grad_norm": 0.6720350118302825, "learning_rate": 9.675580830315481e-06, "loss": 0.5272, "step": 3135 }, { "epoch": 0.1419325639284906, "grad_norm": 0.7299979256540658, "learning_rate": 9.67532107589894e-06, "loss": 0.4302, "step": 3136 }, { "epoch": 0.14197782303688616, "grad_norm": 0.727738005887431, "learning_rate": 9.67506122102411e-06, "loss": 0.3973, "step": 3137 }, { "epoch": 0.14202308214528173, "grad_norm": 0.8311652702626384, "learning_rate": 9.674801265696572e-06, "loss": 0.423, "step": 3138 }, { "epoch": 0.14206834125367732, "grad_norm": 0.6585685880370094, "learning_rate": 9.674541209921913e-06, "loss": 0.3822, "step": 3139 }, { "epoch": 0.14211360036207288, "grad_norm": 0.7377137503636694, "learning_rate": 9.674281053705719e-06, "loss": 0.4129, "step": 3140 }, { "epoch": 0.14215885947046844, "grad_norm": 0.7171166966408776, "learning_rate": 9.67402079705358e-06, "loss": 0.4275, "step": 3141 }, { "epoch": 0.142204118578864, "grad_norm": 0.7429829000995922, "learning_rate": 9.673760439971091e-06, "loss": 0.4481, "step": 3142 }, { "epoch": 0.14224937768725956, "grad_norm": 0.7112414502918661, "learning_rate": 9.673499982463846e-06, "loss": 0.4229, "step": 3143 }, { "epoch": 0.14229463679565513, "grad_norm": 0.6799300213562083, "learning_rate": 9.673239424537437e-06, "loss": 0.4215, "step": 3144 }, { "epoch": 0.1423398959040507, "grad_norm": 2.4514558013874717, "learning_rate": 9.672978766197468e-06, "loss": 0.5697, "step": 3145 }, { "epoch": 0.14238515501244625, "grad_norm": 0.8073367634530231, "learning_rate": 9.672718007449535e-06, "loss": 0.3968, "step": 3146 }, { "epoch": 0.14243041412084181, "grad_norm": 0.7186657373222725, "learning_rate": 9.672457148299245e-06, "loss": 0.3786, "step": 3147 }, { "epoch": 0.14247567322923738, "grad_norm": 0.7936502277208761, "learning_rate": 9.672196188752201e-06, "loss": 0.5066, "step": 3148 }, { "epoch": 0.14252093233763294, "grad_norm": 0.7367266440609374, "learning_rate": 9.67193512881401e-06, "loss": 0.3956, "step": 3149 }, { "epoch": 0.1425661914460285, "grad_norm": 0.86974007930535, "learning_rate": 9.671673968490281e-06, "loss": 0.51, "step": 3150 }, { "epoch": 0.1426114505544241, "grad_norm": 0.7339821934903883, "learning_rate": 9.671412707786628e-06, "loss": 0.3905, "step": 3151 }, { "epoch": 0.14265670966281965, "grad_norm": 0.7741642555970408, "learning_rate": 9.67115134670866e-06, "loss": 0.4156, "step": 3152 }, { "epoch": 0.14270196877121522, "grad_norm": 0.6771007640808087, "learning_rate": 9.670889885262e-06, "loss": 0.4323, "step": 3153 }, { "epoch": 0.14274722787961078, "grad_norm": 1.4497561146456408, "learning_rate": 9.670628323452259e-06, "loss": 0.5543, "step": 3154 }, { "epoch": 0.14279248698800634, "grad_norm": 2.3808597659867896, "learning_rate": 9.670366661285061e-06, "loss": 0.4272, "step": 3155 }, { "epoch": 0.1428377460964019, "grad_norm": 0.6572621235218521, "learning_rate": 9.670104898766028e-06, "loss": 0.389, "step": 3156 }, { "epoch": 0.14288300520479746, "grad_norm": 0.6716930824211316, "learning_rate": 9.669843035900783e-06, "loss": 0.4043, "step": 3157 }, { "epoch": 0.14292826431319303, "grad_norm": 0.7747608617896153, "learning_rate": 9.669581072694954e-06, "loss": 0.4865, "step": 3158 }, { "epoch": 0.1429735234215886, "grad_norm": 0.7170303310627186, "learning_rate": 9.669319009154169e-06, "loss": 0.4336, "step": 3159 }, { "epoch": 0.14301878252998415, "grad_norm": 0.7309134726102187, "learning_rate": 9.66905684528406e-06, "loss": 0.4216, "step": 3160 }, { "epoch": 0.1430640416383797, "grad_norm": 0.7802958711128894, "learning_rate": 9.668794581090257e-06, "loss": 0.4163, "step": 3161 }, { "epoch": 0.14310930074677528, "grad_norm": 0.8650002567846453, "learning_rate": 9.6685322165784e-06, "loss": 0.4519, "step": 3162 }, { "epoch": 0.14315455985517087, "grad_norm": 0.6585549158195979, "learning_rate": 9.668269751754123e-06, "loss": 0.3696, "step": 3163 }, { "epoch": 0.14319981896356643, "grad_norm": 0.7047364200382954, "learning_rate": 9.668007186623068e-06, "loss": 0.3826, "step": 3164 }, { "epoch": 0.143245078071962, "grad_norm": 0.6752872962284534, "learning_rate": 9.667744521190873e-06, "loss": 0.4091, "step": 3165 }, { "epoch": 0.14329033718035755, "grad_norm": 0.7053220031969246, "learning_rate": 9.667481755463183e-06, "loss": 0.3918, "step": 3166 }, { "epoch": 0.14333559628875311, "grad_norm": 0.7531808398595325, "learning_rate": 9.66721888944565e-06, "loss": 0.4624, "step": 3167 }, { "epoch": 0.14338085539714868, "grad_norm": 0.7065600576391988, "learning_rate": 9.666955923143912e-06, "loss": 0.4121, "step": 3168 }, { "epoch": 0.14342611450554424, "grad_norm": 0.7669130699470585, "learning_rate": 9.666692856563628e-06, "loss": 0.4169, "step": 3169 }, { "epoch": 0.1434713736139398, "grad_norm": 1.1338249346765497, "learning_rate": 9.666429689710447e-06, "loss": 0.5071, "step": 3170 }, { "epoch": 0.14351663272233536, "grad_norm": 0.6905545961200613, "learning_rate": 9.666166422590024e-06, "loss": 0.4471, "step": 3171 }, { "epoch": 0.14356189183073093, "grad_norm": 0.6918574529763974, "learning_rate": 9.665903055208013e-06, "loss": 0.4686, "step": 3172 }, { "epoch": 0.1436071509391265, "grad_norm": 0.725100808369826, "learning_rate": 9.665639587570079e-06, "loss": 0.4353, "step": 3173 }, { "epoch": 0.14365241004752205, "grad_norm": 0.7448320533413242, "learning_rate": 9.665376019681876e-06, "loss": 0.4346, "step": 3174 }, { "epoch": 0.14369766915591764, "grad_norm": 0.7240294172659404, "learning_rate": 9.665112351549074e-06, "loss": 0.3947, "step": 3175 }, { "epoch": 0.1437429282643132, "grad_norm": 0.7927000090096381, "learning_rate": 9.664848583177335e-06, "loss": 0.4177, "step": 3176 }, { "epoch": 0.14378818737270876, "grad_norm": 0.7320172279898588, "learning_rate": 9.664584714572326e-06, "loss": 0.4181, "step": 3177 }, { "epoch": 0.14383344648110433, "grad_norm": 0.5698166164116315, "learning_rate": 9.664320745739717e-06, "loss": 0.5135, "step": 3178 }, { "epoch": 0.1438787055894999, "grad_norm": 0.7539947870246922, "learning_rate": 9.664056676685183e-06, "loss": 0.4467, "step": 3179 }, { "epoch": 0.14392396469789545, "grad_norm": 0.7213958856579106, "learning_rate": 9.663792507414393e-06, "loss": 0.4245, "step": 3180 }, { "epoch": 0.143969223806291, "grad_norm": 0.6726300749608423, "learning_rate": 9.663528237933027e-06, "loss": 0.4099, "step": 3181 }, { "epoch": 0.14401448291468658, "grad_norm": 0.4822560169478301, "learning_rate": 9.663263868246762e-06, "loss": 0.5279, "step": 3182 }, { "epoch": 0.14405974202308214, "grad_norm": 0.9085236049734731, "learning_rate": 9.662999398361278e-06, "loss": 0.4152, "step": 3183 }, { "epoch": 0.1441050011314777, "grad_norm": 0.7829003770567539, "learning_rate": 9.662734828282258e-06, "loss": 0.4515, "step": 3184 }, { "epoch": 0.14415026023987326, "grad_norm": 0.6575551419625164, "learning_rate": 9.66247015801539e-06, "loss": 0.4002, "step": 3185 }, { "epoch": 0.14419551934826885, "grad_norm": 0.7181233616587022, "learning_rate": 9.662205387566355e-06, "loss": 0.4125, "step": 3186 }, { "epoch": 0.14424077845666441, "grad_norm": 0.6990078657681448, "learning_rate": 9.661940516940846e-06, "loss": 0.41, "step": 3187 }, { "epoch": 0.14428603756505998, "grad_norm": 0.4046658946943539, "learning_rate": 9.661675546144553e-06, "loss": 0.4915, "step": 3188 }, { "epoch": 0.14433129667345554, "grad_norm": 0.7822597863670415, "learning_rate": 9.661410475183169e-06, "loss": 0.4238, "step": 3189 }, { "epoch": 0.1443765557818511, "grad_norm": 0.691259239024519, "learning_rate": 9.661145304062391e-06, "loss": 0.3847, "step": 3190 }, { "epoch": 0.14442181489024666, "grad_norm": 0.7603748825768698, "learning_rate": 9.660880032787917e-06, "loss": 0.4286, "step": 3191 }, { "epoch": 0.14446707399864223, "grad_norm": 0.7192307911925878, "learning_rate": 9.660614661365446e-06, "loss": 0.407, "step": 3192 }, { "epoch": 0.1445123331070378, "grad_norm": 0.7306554858820006, "learning_rate": 9.660349189800678e-06, "loss": 0.4582, "step": 3193 }, { "epoch": 0.14455759221543335, "grad_norm": 0.35736569073643326, "learning_rate": 9.660083618099321e-06, "loss": 0.5332, "step": 3194 }, { "epoch": 0.1446028513238289, "grad_norm": 0.7395746670458239, "learning_rate": 9.659817946267079e-06, "loss": 0.4104, "step": 3195 }, { "epoch": 0.14464811043222447, "grad_norm": 0.7604286179825007, "learning_rate": 9.65955217430966e-06, "loss": 0.4173, "step": 3196 }, { "epoch": 0.14469336954062004, "grad_norm": 0.34860121685691725, "learning_rate": 9.659286302232776e-06, "loss": 0.4974, "step": 3197 }, { "epoch": 0.14473862864901563, "grad_norm": 0.691904119059187, "learning_rate": 9.659020330042139e-06, "loss": 0.4527, "step": 3198 }, { "epoch": 0.1447838877574112, "grad_norm": 0.697285444271822, "learning_rate": 9.658754257743465e-06, "loss": 0.4309, "step": 3199 }, { "epoch": 0.14482914686580675, "grad_norm": 0.3251444549614559, "learning_rate": 9.65848808534247e-06, "loss": 0.5187, "step": 3200 }, { "epoch": 0.1448744059742023, "grad_norm": 0.6903935648469879, "learning_rate": 9.658221812844872e-06, "loss": 0.425, "step": 3201 }, { "epoch": 0.14491966508259788, "grad_norm": 0.6953138537555082, "learning_rate": 9.657955440256396e-06, "loss": 0.4405, "step": 3202 }, { "epoch": 0.14496492419099344, "grad_norm": 0.726536205730736, "learning_rate": 9.657688967582762e-06, "loss": 0.4221, "step": 3203 }, { "epoch": 0.145010183299389, "grad_norm": 0.7074917191163478, "learning_rate": 9.657422394829697e-06, "loss": 0.4541, "step": 3204 }, { "epoch": 0.14505544240778456, "grad_norm": 0.3587028380035552, "learning_rate": 9.65715572200293e-06, "loss": 0.5251, "step": 3205 }, { "epoch": 0.14510070151618013, "grad_norm": 0.5153669271065352, "learning_rate": 9.65688894910819e-06, "loss": 0.5109, "step": 3206 }, { "epoch": 0.1451459606245757, "grad_norm": 0.7331308798946778, "learning_rate": 9.656622076151208e-06, "loss": 0.4372, "step": 3207 }, { "epoch": 0.14519121973297125, "grad_norm": 0.731716823903968, "learning_rate": 9.65635510313772e-06, "loss": 0.437, "step": 3208 }, { "epoch": 0.1452364788413668, "grad_norm": 0.6501948418255162, "learning_rate": 9.656088030073462e-06, "loss": 0.3799, "step": 3209 }, { "epoch": 0.1452817379497624, "grad_norm": 0.6802252677586819, "learning_rate": 9.655820856964171e-06, "loss": 0.4147, "step": 3210 }, { "epoch": 0.14532699705815796, "grad_norm": 0.669078224193458, "learning_rate": 9.65555358381559e-06, "loss": 0.4373, "step": 3211 }, { "epoch": 0.14537225616655353, "grad_norm": 0.7840548343002391, "learning_rate": 9.65528621063346e-06, "loss": 0.3962, "step": 3212 }, { "epoch": 0.1454175152749491, "grad_norm": 0.6714606210120766, "learning_rate": 9.655018737423529e-06, "loss": 0.4113, "step": 3213 }, { "epoch": 0.14546277438334465, "grad_norm": 0.764637062360135, "learning_rate": 9.65475116419154e-06, "loss": 0.435, "step": 3214 }, { "epoch": 0.1455080334917402, "grad_norm": 0.6940196373497772, "learning_rate": 9.654483490943245e-06, "loss": 0.3845, "step": 3215 }, { "epoch": 0.14555329260013578, "grad_norm": 0.6979860285067682, "learning_rate": 9.654215717684397e-06, "loss": 0.3827, "step": 3216 }, { "epoch": 0.14559855170853134, "grad_norm": 0.6450860678926025, "learning_rate": 9.653947844420744e-06, "loss": 0.4263, "step": 3217 }, { "epoch": 0.1456438108169269, "grad_norm": 0.4631569753271721, "learning_rate": 9.653679871158048e-06, "loss": 0.4863, "step": 3218 }, { "epoch": 0.14568906992532246, "grad_norm": 0.6504102546655056, "learning_rate": 9.653411797902063e-06, "loss": 0.4481, "step": 3219 }, { "epoch": 0.14573432903371802, "grad_norm": 0.8385292768004025, "learning_rate": 9.65314362465855e-06, "loss": 0.4089, "step": 3220 }, { "epoch": 0.14577958814211361, "grad_norm": 0.3677785772048963, "learning_rate": 9.652875351433272e-06, "loss": 0.5351, "step": 3221 }, { "epoch": 0.14582484725050918, "grad_norm": 0.7357209315563944, "learning_rate": 9.652606978231994e-06, "loss": 0.4031, "step": 3222 }, { "epoch": 0.14587010635890474, "grad_norm": 0.9786863345150582, "learning_rate": 9.65233850506048e-06, "loss": 0.4362, "step": 3223 }, { "epoch": 0.1459153654673003, "grad_norm": 0.7184510643808878, "learning_rate": 9.6520699319245e-06, "loss": 0.4401, "step": 3224 }, { "epoch": 0.14596062457569586, "grad_norm": 0.7416426760575285, "learning_rate": 9.651801258829827e-06, "loss": 0.3851, "step": 3225 }, { "epoch": 0.14600588368409143, "grad_norm": 0.6576989128660552, "learning_rate": 9.651532485782231e-06, "loss": 0.4249, "step": 3226 }, { "epoch": 0.146051142792487, "grad_norm": 0.7296653820837178, "learning_rate": 9.651263612787487e-06, "loss": 0.379, "step": 3227 }, { "epoch": 0.14609640190088255, "grad_norm": 0.47286884291343695, "learning_rate": 9.650994639851375e-06, "loss": 0.4942, "step": 3228 }, { "epoch": 0.1461416610092781, "grad_norm": 0.41271948404533615, "learning_rate": 9.650725566979671e-06, "loss": 0.5121, "step": 3229 }, { "epoch": 0.14618692011767367, "grad_norm": 0.8261333798102216, "learning_rate": 9.650456394178157e-06, "loss": 0.442, "step": 3230 }, { "epoch": 0.14623217922606924, "grad_norm": 0.6917499919499427, "learning_rate": 9.65018712145262e-06, "loss": 0.426, "step": 3231 }, { "epoch": 0.1462774383344648, "grad_norm": 0.7170816180353979, "learning_rate": 9.649917748808844e-06, "loss": 0.4061, "step": 3232 }, { "epoch": 0.1463226974428604, "grad_norm": 0.6987359541063993, "learning_rate": 9.649648276252614e-06, "loss": 0.4426, "step": 3233 }, { "epoch": 0.14636795655125595, "grad_norm": 0.8070158387970268, "learning_rate": 9.649378703789724e-06, "loss": 0.4252, "step": 3234 }, { "epoch": 0.1464132156596515, "grad_norm": 0.6080123077930029, "learning_rate": 9.649109031425968e-06, "loss": 0.4893, "step": 3235 }, { "epoch": 0.14645847476804708, "grad_norm": 0.6925974737374918, "learning_rate": 9.648839259167135e-06, "loss": 0.4406, "step": 3236 }, { "epoch": 0.14650373387644264, "grad_norm": 0.7773373297030651, "learning_rate": 9.648569387019025e-06, "loss": 0.3996, "step": 3237 }, { "epoch": 0.1465489929848382, "grad_norm": 0.7564126557863732, "learning_rate": 9.648299414987434e-06, "loss": 0.4379, "step": 3238 }, { "epoch": 0.14659425209323376, "grad_norm": 0.4150638456805394, "learning_rate": 9.648029343078167e-06, "loss": 0.5252, "step": 3239 }, { "epoch": 0.14663951120162932, "grad_norm": 0.3622096334292757, "learning_rate": 9.647759171297024e-06, "loss": 0.5406, "step": 3240 }, { "epoch": 0.1466847703100249, "grad_norm": 0.6823350984312218, "learning_rate": 9.64748889964981e-06, "loss": 0.3972, "step": 3241 }, { "epoch": 0.14673002941842045, "grad_norm": 0.7071456722892714, "learning_rate": 9.647218528142333e-06, "loss": 0.4485, "step": 3242 }, { "epoch": 0.146775288526816, "grad_norm": 0.6869486414541307, "learning_rate": 9.646948056780403e-06, "loss": 0.4102, "step": 3243 }, { "epoch": 0.14682054763521157, "grad_norm": 0.6827200494044977, "learning_rate": 9.646677485569834e-06, "loss": 0.3975, "step": 3244 }, { "epoch": 0.14686580674360716, "grad_norm": 0.6883900751807166, "learning_rate": 9.646406814516434e-06, "loss": 0.4023, "step": 3245 }, { "epoch": 0.14691106585200273, "grad_norm": 0.5684018991219494, "learning_rate": 9.646136043626023e-06, "loss": 0.4943, "step": 3246 }, { "epoch": 0.1469563249603983, "grad_norm": 0.6624837027292081, "learning_rate": 9.645865172904418e-06, "loss": 0.4079, "step": 3247 }, { "epoch": 0.14700158406879385, "grad_norm": 0.6713274806582772, "learning_rate": 9.645594202357438e-06, "loss": 0.4176, "step": 3248 }, { "epoch": 0.1470468431771894, "grad_norm": 0.34889750817078513, "learning_rate": 9.645323131990908e-06, "loss": 0.4997, "step": 3249 }, { "epoch": 0.14709210228558497, "grad_norm": 0.7653840453006011, "learning_rate": 9.64505196181065e-06, "loss": 0.4072, "step": 3250 }, { "epoch": 0.14713736139398054, "grad_norm": 0.6156596928615148, "learning_rate": 9.644780691822491e-06, "loss": 0.3849, "step": 3251 }, { "epoch": 0.1471826205023761, "grad_norm": 0.7840515047666305, "learning_rate": 9.644509322032262e-06, "loss": 0.4479, "step": 3252 }, { "epoch": 0.14722787961077166, "grad_norm": 0.6974849515726319, "learning_rate": 9.644237852445792e-06, "loss": 0.3878, "step": 3253 }, { "epoch": 0.14727313871916722, "grad_norm": 0.5227456682682667, "learning_rate": 9.643966283068912e-06, "loss": 0.4938, "step": 3254 }, { "epoch": 0.14731839782756279, "grad_norm": 0.7538320838930355, "learning_rate": 9.643694613907461e-06, "loss": 0.4184, "step": 3255 }, { "epoch": 0.14736365693595835, "grad_norm": 0.6633093812499693, "learning_rate": 9.643422844967274e-06, "loss": 0.4429, "step": 3256 }, { "epoch": 0.14740891604435394, "grad_norm": 0.38889087430455327, "learning_rate": 9.643150976254192e-06, "loss": 0.5149, "step": 3257 }, { "epoch": 0.1474541751527495, "grad_norm": 0.6689124157664896, "learning_rate": 9.642879007774058e-06, "loss": 0.4621, "step": 3258 }, { "epoch": 0.14749943426114506, "grad_norm": 0.6469967659217317, "learning_rate": 9.64260693953271e-06, "loss": 0.3693, "step": 3259 }, { "epoch": 0.14754469336954062, "grad_norm": 0.7225777422722433, "learning_rate": 9.642334771536e-06, "loss": 0.461, "step": 3260 }, { "epoch": 0.1475899524779362, "grad_norm": 0.663154906716596, "learning_rate": 9.642062503789772e-06, "loss": 0.3842, "step": 3261 }, { "epoch": 0.14763521158633175, "grad_norm": 0.4600695882779264, "learning_rate": 9.641790136299877e-06, "loss": 0.5037, "step": 3262 }, { "epoch": 0.1476804706947273, "grad_norm": 0.7247436825298432, "learning_rate": 9.641517669072171e-06, "loss": 0.4413, "step": 3263 }, { "epoch": 0.14772572980312287, "grad_norm": 0.3658974333913344, "learning_rate": 9.641245102112503e-06, "loss": 0.5163, "step": 3264 }, { "epoch": 0.14777098891151844, "grad_norm": 0.679325315229181, "learning_rate": 9.640972435426734e-06, "loss": 0.4003, "step": 3265 }, { "epoch": 0.147816248019914, "grad_norm": 0.6799213139722677, "learning_rate": 9.640699669020721e-06, "loss": 0.4247, "step": 3266 }, { "epoch": 0.14786150712830956, "grad_norm": 0.7157540734235226, "learning_rate": 9.640426802900325e-06, "loss": 0.3924, "step": 3267 }, { "epoch": 0.14790676623670515, "grad_norm": 0.4449196343882221, "learning_rate": 9.640153837071407e-06, "loss": 0.5094, "step": 3268 }, { "epoch": 0.1479520253451007, "grad_norm": 0.38711013174344855, "learning_rate": 9.639880771539836e-06, "loss": 0.4969, "step": 3269 }, { "epoch": 0.14799728445349628, "grad_norm": 0.7658134774111022, "learning_rate": 9.639607606311477e-06, "loss": 0.4226, "step": 3270 }, { "epoch": 0.14804254356189184, "grad_norm": 0.3612525389646945, "learning_rate": 9.6393343413922e-06, "loss": 0.5179, "step": 3271 }, { "epoch": 0.1480878026702874, "grad_norm": 0.6473237930596188, "learning_rate": 9.639060976787878e-06, "loss": 0.3766, "step": 3272 }, { "epoch": 0.14813306177868296, "grad_norm": 0.7181412797772644, "learning_rate": 9.638787512504382e-06, "loss": 0.3788, "step": 3273 }, { "epoch": 0.14817832088707852, "grad_norm": 0.6567385369804504, "learning_rate": 9.63851394854759e-06, "loss": 0.3702, "step": 3274 }, { "epoch": 0.1482235799954741, "grad_norm": 0.7640669550629611, "learning_rate": 9.638240284923377e-06, "loss": 0.445, "step": 3275 }, { "epoch": 0.14826883910386965, "grad_norm": 0.6652970779894702, "learning_rate": 9.637966521637628e-06, "loss": 0.424, "step": 3276 }, { "epoch": 0.1483140982122652, "grad_norm": 0.6624015894203762, "learning_rate": 9.637692658696222e-06, "loss": 0.404, "step": 3277 }, { "epoch": 0.14835935732066077, "grad_norm": 0.6559214372418471, "learning_rate": 9.637418696105043e-06, "loss": 0.4228, "step": 3278 }, { "epoch": 0.14840461642905634, "grad_norm": 0.7299149369216319, "learning_rate": 9.63714463386998e-06, "loss": 0.4156, "step": 3279 }, { "epoch": 0.14844987553745193, "grad_norm": 0.4885908240246616, "learning_rate": 9.636870471996923e-06, "loss": 0.4854, "step": 3280 }, { "epoch": 0.1484951346458475, "grad_norm": 0.6781850874150899, "learning_rate": 9.63659621049176e-06, "loss": 0.386, "step": 3281 }, { "epoch": 0.14854039375424305, "grad_norm": 0.3620606036338175, "learning_rate": 9.636321849360382e-06, "loss": 0.512, "step": 3282 }, { "epoch": 0.1485856528626386, "grad_norm": 0.7136500943838553, "learning_rate": 9.63604738860869e-06, "loss": 0.3859, "step": 3283 }, { "epoch": 0.14863091197103417, "grad_norm": 0.7297957271328588, "learning_rate": 9.635772828242575e-06, "loss": 0.423, "step": 3284 }, { "epoch": 0.14867617107942974, "grad_norm": 0.3745810356349233, "learning_rate": 9.63549816826794e-06, "loss": 0.4979, "step": 3285 }, { "epoch": 0.1487214301878253, "grad_norm": 0.356023359325862, "learning_rate": 9.635223408690688e-06, "loss": 0.5067, "step": 3286 }, { "epoch": 0.14876668929622086, "grad_norm": 0.6842405407097039, "learning_rate": 9.63494854951672e-06, "loss": 0.416, "step": 3287 }, { "epoch": 0.14881194840461642, "grad_norm": 0.7189774734887591, "learning_rate": 9.634673590751944e-06, "loss": 0.4349, "step": 3288 }, { "epoch": 0.14885720751301199, "grad_norm": 0.7186667769161001, "learning_rate": 9.634398532402264e-06, "loss": 0.4225, "step": 3289 }, { "epoch": 0.14890246662140755, "grad_norm": 0.7200939913489343, "learning_rate": 9.634123374473596e-06, "loss": 0.3895, "step": 3290 }, { "epoch": 0.1489477257298031, "grad_norm": 0.6927317189414052, "learning_rate": 9.633848116971849e-06, "loss": 0.3925, "step": 3291 }, { "epoch": 0.1489929848381987, "grad_norm": 1.166576042660753, "learning_rate": 9.633572759902936e-06, "loss": 0.4118, "step": 3292 }, { "epoch": 0.14903824394659426, "grad_norm": 0.7492381420963506, "learning_rate": 9.633297303272777e-06, "loss": 0.4184, "step": 3293 }, { "epoch": 0.14908350305498982, "grad_norm": 0.7223441248858142, "learning_rate": 9.633021747087288e-06, "loss": 0.3735, "step": 3294 }, { "epoch": 0.1491287621633854, "grad_norm": 0.6917643370279334, "learning_rate": 9.632746091352393e-06, "loss": 0.4248, "step": 3295 }, { "epoch": 0.14917402127178095, "grad_norm": 0.7034104999065266, "learning_rate": 9.632470336074009e-06, "loss": 0.3917, "step": 3296 }, { "epoch": 0.1492192803801765, "grad_norm": 0.6831009361510235, "learning_rate": 9.632194481258069e-06, "loss": 0.4178, "step": 3297 }, { "epoch": 0.14926453948857207, "grad_norm": 0.5716097893116413, "learning_rate": 9.631918526910493e-06, "loss": 0.5524, "step": 3298 }, { "epoch": 0.14930979859696764, "grad_norm": 0.6941509909598592, "learning_rate": 9.631642473037216e-06, "loss": 0.3869, "step": 3299 }, { "epoch": 0.1493550577053632, "grad_norm": 0.7320256723128162, "learning_rate": 9.631366319644167e-06, "loss": 0.4733, "step": 3300 }, { "epoch": 0.14940031681375876, "grad_norm": 0.7026666178027788, "learning_rate": 9.631090066737278e-06, "loss": 0.4246, "step": 3301 }, { "epoch": 0.14944557592215432, "grad_norm": 1.0725104038960356, "learning_rate": 9.630813714322488e-06, "loss": 0.481, "step": 3302 }, { "epoch": 0.14949083503054988, "grad_norm": 0.6730109808516953, "learning_rate": 9.630537262405735e-06, "loss": 0.3949, "step": 3303 }, { "epoch": 0.14953609413894547, "grad_norm": 0.7169901020211172, "learning_rate": 9.630260710992956e-06, "loss": 0.4161, "step": 3304 }, { "epoch": 0.14958135324734104, "grad_norm": 0.6953568784611611, "learning_rate": 9.629984060090097e-06, "loss": 0.3782, "step": 3305 }, { "epoch": 0.1496266123557366, "grad_norm": 0.723483016156664, "learning_rate": 9.629707309703099e-06, "loss": 0.4203, "step": 3306 }, { "epoch": 0.14967187146413216, "grad_norm": 0.6857172507609567, "learning_rate": 9.629430459837909e-06, "loss": 0.3877, "step": 3307 }, { "epoch": 0.14971713057252772, "grad_norm": 0.700995997510525, "learning_rate": 9.629153510500478e-06, "loss": 0.4093, "step": 3308 }, { "epoch": 0.14976238968092329, "grad_norm": 0.6784060071160138, "learning_rate": 9.628876461696754e-06, "loss": 0.3871, "step": 3309 }, { "epoch": 0.14980764878931885, "grad_norm": 0.5473550274042821, "learning_rate": 9.628599313432694e-06, "loss": 0.5218, "step": 3310 }, { "epoch": 0.1498529078977144, "grad_norm": 0.42281332962853047, "learning_rate": 9.628322065714248e-06, "loss": 0.5336, "step": 3311 }, { "epoch": 0.14989816700610997, "grad_norm": 0.7061183170557235, "learning_rate": 9.628044718547379e-06, "loss": 0.4277, "step": 3312 }, { "epoch": 0.14994342611450553, "grad_norm": 0.7485224328430917, "learning_rate": 9.62776727193804e-06, "loss": 0.4106, "step": 3313 }, { "epoch": 0.1499886852229011, "grad_norm": 0.49668806668198767, "learning_rate": 9.627489725892195e-06, "loss": 0.5306, "step": 3314 }, { "epoch": 0.1500339443312967, "grad_norm": 0.5014177021586415, "learning_rate": 9.627212080415808e-06, "loss": 0.5247, "step": 3315 }, { "epoch": 0.15007920343969225, "grad_norm": 0.7862004631444977, "learning_rate": 9.626934335514847e-06, "loss": 0.4317, "step": 3316 }, { "epoch": 0.1501244625480878, "grad_norm": 0.7304331450450849, "learning_rate": 9.626656491195277e-06, "loss": 0.4345, "step": 3317 }, { "epoch": 0.15016972165648337, "grad_norm": 0.7099372085651383, "learning_rate": 9.626378547463067e-06, "loss": 0.4388, "step": 3318 }, { "epoch": 0.15021498076487894, "grad_norm": 0.6846526401291254, "learning_rate": 9.626100504324194e-06, "loss": 0.4032, "step": 3319 }, { "epoch": 0.1502602398732745, "grad_norm": 0.8179284569944197, "learning_rate": 9.625822361784626e-06, "loss": 0.4003, "step": 3320 }, { "epoch": 0.15030549898167006, "grad_norm": 0.7223286737584297, "learning_rate": 9.625544119850344e-06, "loss": 0.3899, "step": 3321 }, { "epoch": 0.15035075809006562, "grad_norm": 0.6603367788079666, "learning_rate": 9.625265778527325e-06, "loss": 0.3849, "step": 3322 }, { "epoch": 0.15039601719846118, "grad_norm": 0.7607035253944717, "learning_rate": 9.62498733782155e-06, "loss": 0.4401, "step": 3323 }, { "epoch": 0.15044127630685675, "grad_norm": 0.7541225861262272, "learning_rate": 9.624708797739002e-06, "loss": 0.4311, "step": 3324 }, { "epoch": 0.1504865354152523, "grad_norm": 0.7014753507801132, "learning_rate": 9.624430158285664e-06, "loss": 0.4407, "step": 3325 }, { "epoch": 0.15053179452364787, "grad_norm": 0.6942835461363718, "learning_rate": 9.624151419467527e-06, "loss": 0.3872, "step": 3326 }, { "epoch": 0.15057705363204346, "grad_norm": 0.7138036553160272, "learning_rate": 9.623872581290576e-06, "loss": 0.4314, "step": 3327 }, { "epoch": 0.15062231274043902, "grad_norm": 0.8268866607473049, "learning_rate": 9.623593643760805e-06, "loss": 0.5235, "step": 3328 }, { "epoch": 0.15066757184883459, "grad_norm": 0.7084738223029766, "learning_rate": 9.623314606884207e-06, "loss": 0.4128, "step": 3329 }, { "epoch": 0.15071283095723015, "grad_norm": 0.7136920204031075, "learning_rate": 9.623035470666778e-06, "loss": 0.443, "step": 3330 }, { "epoch": 0.1507580900656257, "grad_norm": 0.6631487881509494, "learning_rate": 9.622756235114515e-06, "loss": 0.423, "step": 3331 }, { "epoch": 0.15080334917402127, "grad_norm": 0.7237223774625545, "learning_rate": 9.622476900233417e-06, "loss": 0.4024, "step": 3332 }, { "epoch": 0.15084860828241684, "grad_norm": 0.7136308557874039, "learning_rate": 9.622197466029488e-06, "loss": 0.4373, "step": 3333 }, { "epoch": 0.1508938673908124, "grad_norm": 0.6607160453705881, "learning_rate": 9.621917932508733e-06, "loss": 0.5152, "step": 3334 }, { "epoch": 0.15093912649920796, "grad_norm": 0.6643946238580568, "learning_rate": 9.621638299677157e-06, "loss": 0.4072, "step": 3335 }, { "epoch": 0.15098438560760352, "grad_norm": 0.7875523683733319, "learning_rate": 9.621358567540766e-06, "loss": 0.4121, "step": 3336 }, { "epoch": 0.15102964471599908, "grad_norm": 0.6171204450407061, "learning_rate": 9.621078736105573e-06, "loss": 0.3895, "step": 3337 }, { "epoch": 0.15107490382439465, "grad_norm": 0.6658184861266957, "learning_rate": 9.620798805377592e-06, "loss": 0.3854, "step": 3338 }, { "epoch": 0.15112016293279024, "grad_norm": 0.404686142066214, "learning_rate": 9.620518775362835e-06, "loss": 0.4777, "step": 3339 }, { "epoch": 0.1511654220411858, "grad_norm": 0.7400084138254952, "learning_rate": 9.620238646067322e-06, "loss": 0.4257, "step": 3340 }, { "epoch": 0.15121068114958136, "grad_norm": 0.8422360462884343, "learning_rate": 9.619958417497069e-06, "loss": 0.4365, "step": 3341 }, { "epoch": 0.15125594025797692, "grad_norm": 0.9105430523186688, "learning_rate": 9.619678089658097e-06, "loss": 0.4329, "step": 3342 }, { "epoch": 0.15130119936637249, "grad_norm": 0.41325749692787855, "learning_rate": 9.619397662556434e-06, "loss": 0.5005, "step": 3343 }, { "epoch": 0.15134645847476805, "grad_norm": 0.8607971073288683, "learning_rate": 9.619117136198101e-06, "loss": 0.4102, "step": 3344 }, { "epoch": 0.1513917175831636, "grad_norm": 0.7294942692583787, "learning_rate": 9.61883651058913e-06, "loss": 0.4079, "step": 3345 }, { "epoch": 0.15143697669155917, "grad_norm": 0.682562825235258, "learning_rate": 9.618555785735546e-06, "loss": 0.4413, "step": 3346 }, { "epoch": 0.15148223579995473, "grad_norm": 0.6883105556066248, "learning_rate": 9.618274961643384e-06, "loss": 0.3879, "step": 3347 }, { "epoch": 0.1515274949083503, "grad_norm": 0.6450651111810509, "learning_rate": 9.617994038318675e-06, "loss": 0.4119, "step": 3348 }, { "epoch": 0.15157275401674586, "grad_norm": 0.7116436770471571, "learning_rate": 9.617713015767457e-06, "loss": 0.3703, "step": 3349 }, { "epoch": 0.15161801312514142, "grad_norm": 0.7377554108903471, "learning_rate": 9.617431893995771e-06, "loss": 0.3761, "step": 3350 }, { "epoch": 0.151663272233537, "grad_norm": 0.6646223645221694, "learning_rate": 9.617150673009654e-06, "loss": 0.4193, "step": 3351 }, { "epoch": 0.15170853134193257, "grad_norm": 0.45223626911500664, "learning_rate": 9.61686935281515e-06, "loss": 0.5071, "step": 3352 }, { "epoch": 0.15175379045032814, "grad_norm": 0.3798005018025008, "learning_rate": 9.616587933418302e-06, "loss": 0.5095, "step": 3353 }, { "epoch": 0.1517990495587237, "grad_norm": 0.789312374773116, "learning_rate": 9.616306414825158e-06, "loss": 0.4139, "step": 3354 }, { "epoch": 0.15184430866711926, "grad_norm": 0.6956206272305917, "learning_rate": 9.616024797041769e-06, "loss": 0.4142, "step": 3355 }, { "epoch": 0.15188956777551482, "grad_norm": 0.6648217865219, "learning_rate": 9.615743080074183e-06, "loss": 0.4318, "step": 3356 }, { "epoch": 0.15193482688391038, "grad_norm": 0.5935656105926574, "learning_rate": 9.615461263928454e-06, "loss": 0.5229, "step": 3357 }, { "epoch": 0.15198008599230595, "grad_norm": 0.7476404386102188, "learning_rate": 9.615179348610638e-06, "loss": 0.4273, "step": 3358 }, { "epoch": 0.1520253451007015, "grad_norm": 0.7061949866038572, "learning_rate": 9.614897334126791e-06, "loss": 0.4166, "step": 3359 }, { "epoch": 0.15207060420909707, "grad_norm": 0.40998979916945805, "learning_rate": 9.614615220482976e-06, "loss": 0.5397, "step": 3360 }, { "epoch": 0.15211586331749263, "grad_norm": 0.8891488917747642, "learning_rate": 9.614333007685253e-06, "loss": 0.4159, "step": 3361 }, { "epoch": 0.15216112242588822, "grad_norm": 0.7086492645793606, "learning_rate": 9.614050695739683e-06, "loss": 0.3637, "step": 3362 }, { "epoch": 0.15220638153428379, "grad_norm": 0.41334132030452503, "learning_rate": 9.613768284652336e-06, "loss": 0.4981, "step": 3363 }, { "epoch": 0.15225164064267935, "grad_norm": 0.7341079457582734, "learning_rate": 9.613485774429279e-06, "loss": 0.3899, "step": 3364 }, { "epoch": 0.1522968997510749, "grad_norm": 0.7868870876356426, "learning_rate": 9.61320316507658e-06, "loss": 0.4445, "step": 3365 }, { "epoch": 0.15234215885947047, "grad_norm": 0.7311498728838661, "learning_rate": 9.612920456600317e-06, "loss": 0.4189, "step": 3366 }, { "epoch": 0.15238741796786603, "grad_norm": 0.7843591853548378, "learning_rate": 9.612637649006557e-06, "loss": 0.3834, "step": 3367 }, { "epoch": 0.1524326770762616, "grad_norm": 0.7826692283832626, "learning_rate": 9.612354742301381e-06, "loss": 0.4292, "step": 3368 }, { "epoch": 0.15247793618465716, "grad_norm": 0.6647315439646727, "learning_rate": 9.61207173649087e-06, "loss": 0.3619, "step": 3369 }, { "epoch": 0.15252319529305272, "grad_norm": 0.6557323897622711, "learning_rate": 9.6117886315811e-06, "loss": 0.3907, "step": 3370 }, { "epoch": 0.15256845440144828, "grad_norm": 0.7221361141961096, "learning_rate": 9.611505427578159e-06, "loss": 0.4113, "step": 3371 }, { "epoch": 0.15261371350984385, "grad_norm": 0.7099770327135274, "learning_rate": 9.611222124488126e-06, "loss": 0.3876, "step": 3372 }, { "epoch": 0.1526589726182394, "grad_norm": 0.4333483735128783, "learning_rate": 9.610938722317095e-06, "loss": 0.4772, "step": 3373 }, { "epoch": 0.152704231726635, "grad_norm": 0.7950935256983794, "learning_rate": 9.61065522107115e-06, "loss": 0.4274, "step": 3374 }, { "epoch": 0.15274949083503056, "grad_norm": 0.3544158593732371, "learning_rate": 9.610371620756385e-06, "loss": 0.5014, "step": 3375 }, { "epoch": 0.15279474994342612, "grad_norm": 0.7494143798836961, "learning_rate": 9.610087921378895e-06, "loss": 0.4031, "step": 3376 }, { "epoch": 0.15284000905182168, "grad_norm": 0.32936531220215204, "learning_rate": 9.609804122944774e-06, "loss": 0.5054, "step": 3377 }, { "epoch": 0.15288526816021725, "grad_norm": 0.6970342828940654, "learning_rate": 9.60952022546012e-06, "loss": 0.3913, "step": 3378 }, { "epoch": 0.1529305272686128, "grad_norm": 0.3542374522374869, "learning_rate": 9.609236228931033e-06, "loss": 0.5125, "step": 3379 }, { "epoch": 0.15297578637700837, "grad_norm": 0.37112645861116433, "learning_rate": 9.608952133363616e-06, "loss": 0.5181, "step": 3380 }, { "epoch": 0.15302104548540393, "grad_norm": 0.7934526387588287, "learning_rate": 9.608667938763974e-06, "loss": 0.3955, "step": 3381 }, { "epoch": 0.1530663045937995, "grad_norm": 0.7638806885248461, "learning_rate": 9.60838364513821e-06, "loss": 0.3943, "step": 3382 }, { "epoch": 0.15311156370219506, "grad_norm": 0.6629446396210611, "learning_rate": 9.608099252492437e-06, "loss": 0.4013, "step": 3383 }, { "epoch": 0.15315682281059062, "grad_norm": 0.8507887664141085, "learning_rate": 9.607814760832764e-06, "loss": 0.3968, "step": 3384 }, { "epoch": 0.15320208191898618, "grad_norm": 0.4667875313224176, "learning_rate": 9.607530170165302e-06, "loss": 0.5105, "step": 3385 }, { "epoch": 0.15324734102738177, "grad_norm": 0.7202328958788586, "learning_rate": 9.607245480496168e-06, "loss": 0.4417, "step": 3386 }, { "epoch": 0.15329260013577733, "grad_norm": 0.7011725011282022, "learning_rate": 9.60696069183148e-06, "loss": 0.4114, "step": 3387 }, { "epoch": 0.1533378592441729, "grad_norm": 0.7338372700466135, "learning_rate": 9.606675804177355e-06, "loss": 0.4384, "step": 3388 }, { "epoch": 0.15338311835256846, "grad_norm": 0.6826376956547067, "learning_rate": 9.606390817539915e-06, "loss": 0.451, "step": 3389 }, { "epoch": 0.15342837746096402, "grad_norm": 0.6496624803686839, "learning_rate": 9.606105731925284e-06, "loss": 0.436, "step": 3390 }, { "epoch": 0.15347363656935958, "grad_norm": 0.7179188848683196, "learning_rate": 9.605820547339585e-06, "loss": 0.4005, "step": 3391 }, { "epoch": 0.15351889567775515, "grad_norm": 0.6822218854988548, "learning_rate": 9.605535263788952e-06, "loss": 0.4075, "step": 3392 }, { "epoch": 0.1535641547861507, "grad_norm": 0.3730441185268565, "learning_rate": 9.60524988127951e-06, "loss": 0.53, "step": 3393 }, { "epoch": 0.15360941389454627, "grad_norm": 0.6636620437666377, "learning_rate": 9.604964399817392e-06, "loss": 0.4018, "step": 3394 }, { "epoch": 0.15365467300294183, "grad_norm": 0.7268505905368877, "learning_rate": 9.60467881940873e-06, "loss": 0.4387, "step": 3395 }, { "epoch": 0.1536999321113374, "grad_norm": 0.6961618085585248, "learning_rate": 9.604393140059666e-06, "loss": 0.3756, "step": 3396 }, { "epoch": 0.15374519121973299, "grad_norm": 0.3179645190143338, "learning_rate": 9.604107361776331e-06, "loss": 0.4851, "step": 3397 }, { "epoch": 0.15379045032812855, "grad_norm": 0.662873080346535, "learning_rate": 9.603821484564873e-06, "loss": 0.3963, "step": 3398 }, { "epoch": 0.1538357094365241, "grad_norm": 0.6926916308137251, "learning_rate": 9.603535508431428e-06, "loss": 0.3792, "step": 3399 }, { "epoch": 0.15388096854491967, "grad_norm": 0.7226052047552491, "learning_rate": 9.603249433382145e-06, "loss": 0.4637, "step": 3400 }, { "epoch": 0.15392622765331523, "grad_norm": 0.7045172676686957, "learning_rate": 9.602963259423168e-06, "loss": 0.4246, "step": 3401 }, { "epoch": 0.1539714867617108, "grad_norm": 0.7305812318197507, "learning_rate": 9.602676986560649e-06, "loss": 0.4106, "step": 3402 }, { "epoch": 0.15401674587010636, "grad_norm": 0.6517290131883109, "learning_rate": 9.602390614800737e-06, "loss": 0.3772, "step": 3403 }, { "epoch": 0.15406200497850192, "grad_norm": 0.6246818631587848, "learning_rate": 9.602104144149587e-06, "loss": 0.4714, "step": 3404 }, { "epoch": 0.15410726408689748, "grad_norm": 0.6493350034733257, "learning_rate": 9.601817574613352e-06, "loss": 0.4017, "step": 3405 }, { "epoch": 0.15415252319529305, "grad_norm": 0.69897511909871, "learning_rate": 9.60153090619819e-06, "loss": 0.4399, "step": 3406 }, { "epoch": 0.1541977823036886, "grad_norm": 0.6498934389942151, "learning_rate": 9.601244138910262e-06, "loss": 0.3845, "step": 3407 }, { "epoch": 0.15424304141208417, "grad_norm": 0.7266953270385536, "learning_rate": 9.60095727275573e-06, "loss": 0.4543, "step": 3408 }, { "epoch": 0.15428830052047976, "grad_norm": 0.4488563542322566, "learning_rate": 9.600670307740755e-06, "loss": 0.5156, "step": 3409 }, { "epoch": 0.15433355962887532, "grad_norm": 0.965869209891706, "learning_rate": 9.600383243871508e-06, "loss": 0.3851, "step": 3410 }, { "epoch": 0.15437881873727088, "grad_norm": 0.7504707497764602, "learning_rate": 9.600096081154151e-06, "loss": 0.4307, "step": 3411 }, { "epoch": 0.15442407784566645, "grad_norm": 0.6613917357422878, "learning_rate": 9.59980881959486e-06, "loss": 0.3978, "step": 3412 }, { "epoch": 0.154469336954062, "grad_norm": 0.7520780460741235, "learning_rate": 9.599521459199803e-06, "loss": 0.4667, "step": 3413 }, { "epoch": 0.15451459606245757, "grad_norm": 0.8176268578974006, "learning_rate": 9.599233999975156e-06, "loss": 0.4274, "step": 3414 }, { "epoch": 0.15455985517085313, "grad_norm": 0.6840665310353456, "learning_rate": 9.598946441927097e-06, "loss": 0.4168, "step": 3415 }, { "epoch": 0.1546051142792487, "grad_norm": 0.7683252535016913, "learning_rate": 9.598658785061803e-06, "loss": 0.4372, "step": 3416 }, { "epoch": 0.15465037338764426, "grad_norm": 0.6823793322842577, "learning_rate": 9.598371029385455e-06, "loss": 0.3983, "step": 3417 }, { "epoch": 0.15469563249603982, "grad_norm": 0.44229167249114293, "learning_rate": 9.598083174904235e-06, "loss": 0.4959, "step": 3418 }, { "epoch": 0.15474089160443538, "grad_norm": 0.7338618242173148, "learning_rate": 9.597795221624334e-06, "loss": 0.4095, "step": 3419 }, { "epoch": 0.15478615071283094, "grad_norm": 0.32181814585633023, "learning_rate": 9.59750716955193e-06, "loss": 0.5028, "step": 3420 }, { "epoch": 0.15483140982122653, "grad_norm": 0.7202648707296, "learning_rate": 9.59721901869322e-06, "loss": 0.4154, "step": 3421 }, { "epoch": 0.1548766689296221, "grad_norm": 0.7033701337578779, "learning_rate": 9.596930769054391e-06, "loss": 0.4178, "step": 3422 }, { "epoch": 0.15492192803801766, "grad_norm": 0.9866248658193294, "learning_rate": 9.59664242064164e-06, "loss": 0.407, "step": 3423 }, { "epoch": 0.15496718714641322, "grad_norm": 0.7775425621832033, "learning_rate": 9.59635397346116e-06, "loss": 0.3774, "step": 3424 }, { "epoch": 0.15501244625480878, "grad_norm": 0.6732563516972015, "learning_rate": 9.596065427519149e-06, "loss": 0.4344, "step": 3425 }, { "epoch": 0.15505770536320435, "grad_norm": 0.691794745822921, "learning_rate": 9.595776782821807e-06, "loss": 0.421, "step": 3426 }, { "epoch": 0.1551029644715999, "grad_norm": 0.6687992237129932, "learning_rate": 9.595488039375338e-06, "loss": 0.4408, "step": 3427 }, { "epoch": 0.15514822357999547, "grad_norm": 0.7624800700828009, "learning_rate": 9.595199197185944e-06, "loss": 0.4267, "step": 3428 }, { "epoch": 0.15519348268839103, "grad_norm": 0.7863688543009814, "learning_rate": 9.594910256259834e-06, "loss": 0.4503, "step": 3429 }, { "epoch": 0.1552387417967866, "grad_norm": 0.6196129348860103, "learning_rate": 9.594621216603215e-06, "loss": 0.5111, "step": 3430 }, { "epoch": 0.15528400090518216, "grad_norm": 0.7464054819999745, "learning_rate": 9.594332078222296e-06, "loss": 0.3929, "step": 3431 }, { "epoch": 0.15532926001357772, "grad_norm": 0.660553397731443, "learning_rate": 9.594042841123291e-06, "loss": 0.4165, "step": 3432 }, { "epoch": 0.1553745191219733, "grad_norm": 0.6912744419593565, "learning_rate": 9.593753505312415e-06, "loss": 0.4312, "step": 3433 }, { "epoch": 0.15541977823036887, "grad_norm": 0.700543306891848, "learning_rate": 9.593464070795887e-06, "loss": 0.4068, "step": 3434 }, { "epoch": 0.15546503733876443, "grad_norm": 0.6372973202578978, "learning_rate": 9.593174537579921e-06, "loss": 0.3695, "step": 3435 }, { "epoch": 0.15551029644716, "grad_norm": 0.7077294435391664, "learning_rate": 9.592884905670742e-06, "loss": 0.3848, "step": 3436 }, { "epoch": 0.15555555555555556, "grad_norm": 2.966558214938584, "learning_rate": 9.592595175074573e-06, "loss": 0.3867, "step": 3437 }, { "epoch": 0.15560081466395112, "grad_norm": 0.6559110571739295, "learning_rate": 9.592305345797636e-06, "loss": 0.5055, "step": 3438 }, { "epoch": 0.15564607377234668, "grad_norm": 0.7035205428029994, "learning_rate": 9.592015417846166e-06, "loss": 0.4145, "step": 3439 }, { "epoch": 0.15569133288074224, "grad_norm": 0.7079837650347159, "learning_rate": 9.591725391226383e-06, "loss": 0.3844, "step": 3440 }, { "epoch": 0.1557365919891378, "grad_norm": 0.32253785644702726, "learning_rate": 9.591435265944527e-06, "loss": 0.4867, "step": 3441 }, { "epoch": 0.15578185109753337, "grad_norm": 0.8604594320238467, "learning_rate": 9.591145042006829e-06, "loss": 0.4029, "step": 3442 }, { "epoch": 0.15582711020592893, "grad_norm": 0.8566153255616973, "learning_rate": 9.590854719419522e-06, "loss": 0.4557, "step": 3443 }, { "epoch": 0.15587236931432452, "grad_norm": 0.6991748547257743, "learning_rate": 9.59056429818885e-06, "loss": 0.4254, "step": 3444 }, { "epoch": 0.15591762842272008, "grad_norm": 0.6833290597384973, "learning_rate": 9.590273778321048e-06, "loss": 0.3911, "step": 3445 }, { "epoch": 0.15596288753111565, "grad_norm": 0.7384819888520089, "learning_rate": 9.58998315982236e-06, "loss": 0.4306, "step": 3446 }, { "epoch": 0.1560081466395112, "grad_norm": 0.7408386652212549, "learning_rate": 9.589692442699033e-06, "loss": 0.4349, "step": 3447 }, { "epoch": 0.15605340574790677, "grad_norm": 0.7202578582855139, "learning_rate": 9.589401626957309e-06, "loss": 0.4633, "step": 3448 }, { "epoch": 0.15609866485630233, "grad_norm": 0.73345358302742, "learning_rate": 9.589110712603442e-06, "loss": 0.4691, "step": 3449 }, { "epoch": 0.1561439239646979, "grad_norm": 0.6367108198509374, "learning_rate": 9.588819699643677e-06, "loss": 0.536, "step": 3450 }, { "epoch": 0.15618918307309346, "grad_norm": 0.4522720428613994, "learning_rate": 9.588528588084272e-06, "loss": 0.5085, "step": 3451 }, { "epoch": 0.15623444218148902, "grad_norm": 0.7553918774782068, "learning_rate": 9.588237377931482e-06, "loss": 0.439, "step": 3452 }, { "epoch": 0.15627970128988458, "grad_norm": 0.7313305103265421, "learning_rate": 9.587946069191561e-06, "loss": 0.4457, "step": 3453 }, { "epoch": 0.15632496039828014, "grad_norm": 0.6840203653682762, "learning_rate": 9.58765466187077e-06, "loss": 0.4285, "step": 3454 }, { "epoch": 0.1563702195066757, "grad_norm": 0.7561467717478978, "learning_rate": 9.587363155975367e-06, "loss": 0.3913, "step": 3455 }, { "epoch": 0.1564154786150713, "grad_norm": 0.8080672045005288, "learning_rate": 9.587071551511621e-06, "loss": 0.4189, "step": 3456 }, { "epoch": 0.15646073772346686, "grad_norm": 0.7731559050053085, "learning_rate": 9.586779848485797e-06, "loss": 0.3409, "step": 3457 }, { "epoch": 0.15650599683186242, "grad_norm": 0.6965717472553806, "learning_rate": 9.58648804690416e-06, "loss": 0.4369, "step": 3458 }, { "epoch": 0.15655125594025798, "grad_norm": 0.8440351872536466, "learning_rate": 9.586196146772982e-06, "loss": 0.4422, "step": 3459 }, { "epoch": 0.15659651504865355, "grad_norm": 0.7661592744550679, "learning_rate": 9.585904148098532e-06, "loss": 0.4561, "step": 3460 }, { "epoch": 0.1566417741570491, "grad_norm": 1.3733560238638691, "learning_rate": 9.58561205088709e-06, "loss": 0.4997, "step": 3461 }, { "epoch": 0.15668703326544467, "grad_norm": 0.7280290741276498, "learning_rate": 9.585319855144926e-06, "loss": 0.4139, "step": 3462 }, { "epoch": 0.15673229237384023, "grad_norm": 0.7531301358903874, "learning_rate": 9.585027560878322e-06, "loss": 0.4493, "step": 3463 }, { "epoch": 0.1567775514822358, "grad_norm": 0.6604910087043134, "learning_rate": 9.584735168093557e-06, "loss": 0.4061, "step": 3464 }, { "epoch": 0.15682281059063136, "grad_norm": 0.7322442914769552, "learning_rate": 9.584442676796915e-06, "loss": 0.4581, "step": 3465 }, { "epoch": 0.15686806969902692, "grad_norm": 0.7073063990760776, "learning_rate": 9.584150086994678e-06, "loss": 0.4086, "step": 3466 }, { "epoch": 0.15691332880742248, "grad_norm": 0.7190082010411192, "learning_rate": 9.583857398693137e-06, "loss": 0.3896, "step": 3467 }, { "epoch": 0.15695858791581807, "grad_norm": 0.7584845386772335, "learning_rate": 9.583564611898577e-06, "loss": 0.4143, "step": 3468 }, { "epoch": 0.15700384702421363, "grad_norm": 0.7108815510865111, "learning_rate": 9.583271726617293e-06, "loss": 0.4264, "step": 3469 }, { "epoch": 0.1570491061326092, "grad_norm": 0.7277968624062817, "learning_rate": 9.582978742855575e-06, "loss": 0.4339, "step": 3470 }, { "epoch": 0.15709436524100476, "grad_norm": 0.7515655120521509, "learning_rate": 9.582685660619718e-06, "loss": 0.4065, "step": 3471 }, { "epoch": 0.15713962434940032, "grad_norm": 0.6949952504423228, "learning_rate": 9.582392479916023e-06, "loss": 0.4211, "step": 3472 }, { "epoch": 0.15718488345779588, "grad_norm": 0.7550078700494964, "learning_rate": 9.582099200750784e-06, "loss": 0.4322, "step": 3473 }, { "epoch": 0.15723014256619144, "grad_norm": 0.6658476068153182, "learning_rate": 9.58180582313031e-06, "loss": 0.3894, "step": 3474 }, { "epoch": 0.157275401674587, "grad_norm": 0.7200657052746892, "learning_rate": 9.581512347060899e-06, "loss": 0.4394, "step": 3475 }, { "epoch": 0.15732066078298257, "grad_norm": 0.6551082332498498, "learning_rate": 9.58121877254886e-06, "loss": 0.4125, "step": 3476 }, { "epoch": 0.15736591989137813, "grad_norm": 0.8696623317849659, "learning_rate": 9.580925099600497e-06, "loss": 0.4245, "step": 3477 }, { "epoch": 0.1574111789997737, "grad_norm": 1.3728801852345183, "learning_rate": 9.580631328222124e-06, "loss": 0.5327, "step": 3478 }, { "epoch": 0.15745643810816926, "grad_norm": 0.7613061786692783, "learning_rate": 9.580337458420052e-06, "loss": 0.3812, "step": 3479 }, { "epoch": 0.15750169721656485, "grad_norm": 0.6733684603028954, "learning_rate": 9.580043490200597e-06, "loss": 0.4271, "step": 3480 }, { "epoch": 0.1575469563249604, "grad_norm": 0.7118128617274064, "learning_rate": 9.579749423570072e-06, "loss": 0.3765, "step": 3481 }, { "epoch": 0.15759221543335597, "grad_norm": 0.8008041863113271, "learning_rate": 9.579455258534798e-06, "loss": 0.4328, "step": 3482 }, { "epoch": 0.15763747454175153, "grad_norm": 0.7067609703059613, "learning_rate": 9.579160995101095e-06, "loss": 0.4616, "step": 3483 }, { "epoch": 0.1576827336501471, "grad_norm": 0.6948951876700356, "learning_rate": 9.578866633275289e-06, "loss": 0.4095, "step": 3484 }, { "epoch": 0.15772799275854266, "grad_norm": 1.030356858024951, "learning_rate": 9.578572173063698e-06, "loss": 0.5231, "step": 3485 }, { "epoch": 0.15777325186693822, "grad_norm": 0.731783601918967, "learning_rate": 9.578277614472655e-06, "loss": 0.4501, "step": 3486 }, { "epoch": 0.15781851097533378, "grad_norm": 0.6664453441880271, "learning_rate": 9.577982957508488e-06, "loss": 0.3953, "step": 3487 }, { "epoch": 0.15786377008372934, "grad_norm": 0.6950749453384258, "learning_rate": 9.577688202177525e-06, "loss": 0.4274, "step": 3488 }, { "epoch": 0.1579090291921249, "grad_norm": 1.0097198471065796, "learning_rate": 9.577393348486104e-06, "loss": 0.4375, "step": 3489 }, { "epoch": 0.15795428830052047, "grad_norm": 0.7139937096631528, "learning_rate": 9.577098396440557e-06, "loss": 0.4217, "step": 3490 }, { "epoch": 0.15799954740891606, "grad_norm": 0.5875788879196998, "learning_rate": 9.576803346047223e-06, "loss": 0.5023, "step": 3491 }, { "epoch": 0.15804480651731162, "grad_norm": 0.6762842201383202, "learning_rate": 9.576508197312441e-06, "loss": 0.4239, "step": 3492 }, { "epoch": 0.15809006562570718, "grad_norm": 0.37114250178621616, "learning_rate": 9.576212950242554e-06, "loss": 0.4967, "step": 3493 }, { "epoch": 0.15813532473410274, "grad_norm": 0.6747086799824057, "learning_rate": 9.575917604843907e-06, "loss": 0.443, "step": 3494 }, { "epoch": 0.1581805838424983, "grad_norm": 0.6714700623457361, "learning_rate": 9.575622161122843e-06, "loss": 0.4158, "step": 3495 }, { "epoch": 0.15822584295089387, "grad_norm": 0.7040860890844103, "learning_rate": 9.575326619085713e-06, "loss": 0.4436, "step": 3496 }, { "epoch": 0.15827110205928943, "grad_norm": 0.8520999125563331, "learning_rate": 9.575030978738865e-06, "loss": 0.4327, "step": 3497 }, { "epoch": 0.158316361167685, "grad_norm": 0.742115121650574, "learning_rate": 9.574735240088652e-06, "loss": 0.3907, "step": 3498 }, { "epoch": 0.15836162027608056, "grad_norm": 0.6498680949772371, "learning_rate": 9.574439403141431e-06, "loss": 0.4895, "step": 3499 }, { "epoch": 0.15840687938447612, "grad_norm": 0.7487791434196598, "learning_rate": 9.574143467903554e-06, "loss": 0.3604, "step": 3500 }, { "epoch": 0.15845213849287168, "grad_norm": 0.7048832865090792, "learning_rate": 9.573847434381382e-06, "loss": 0.4797, "step": 3501 }, { "epoch": 0.15849739760126724, "grad_norm": 0.6976048296528936, "learning_rate": 9.573551302581279e-06, "loss": 0.4349, "step": 3502 }, { "epoch": 0.15854265670966283, "grad_norm": 0.7027842537035046, "learning_rate": 9.573255072509604e-06, "loss": 0.3949, "step": 3503 }, { "epoch": 0.1585879158180584, "grad_norm": 0.35742746636337064, "learning_rate": 9.572958744172722e-06, "loss": 0.5011, "step": 3504 }, { "epoch": 0.15863317492645396, "grad_norm": 0.6404956434831562, "learning_rate": 9.572662317577002e-06, "loss": 0.4505, "step": 3505 }, { "epoch": 0.15867843403484952, "grad_norm": 0.7493153738563227, "learning_rate": 9.572365792728812e-06, "loss": 0.4209, "step": 3506 }, { "epoch": 0.15872369314324508, "grad_norm": 0.6490770161540544, "learning_rate": 9.572069169634526e-06, "loss": 0.402, "step": 3507 }, { "epoch": 0.15876895225164064, "grad_norm": 0.48412819221920733, "learning_rate": 9.571772448300514e-06, "loss": 0.5375, "step": 3508 }, { "epoch": 0.1588142113600362, "grad_norm": 0.691642616032123, "learning_rate": 9.571475628733153e-06, "loss": 0.3852, "step": 3509 }, { "epoch": 0.15885947046843177, "grad_norm": 0.6706389167853233, "learning_rate": 9.571178710938823e-06, "loss": 0.421, "step": 3510 }, { "epoch": 0.15890472957682733, "grad_norm": 0.8253546949943985, "learning_rate": 9.570881694923899e-06, "loss": 0.4211, "step": 3511 }, { "epoch": 0.1589499886852229, "grad_norm": 0.6671583142880999, "learning_rate": 9.570584580694768e-06, "loss": 0.4265, "step": 3512 }, { "epoch": 0.15899524779361845, "grad_norm": 0.9602296597346538, "learning_rate": 9.570287368257811e-06, "loss": 0.4102, "step": 3513 }, { "epoch": 0.15904050690201402, "grad_norm": 0.7444960285872806, "learning_rate": 9.569990057619414e-06, "loss": 0.4406, "step": 3514 }, { "epoch": 0.1590857660104096, "grad_norm": 0.6599282051949944, "learning_rate": 9.569692648785967e-06, "loss": 0.4068, "step": 3515 }, { "epoch": 0.15913102511880517, "grad_norm": 0.7741719356633847, "learning_rate": 9.56939514176386e-06, "loss": 0.415, "step": 3516 }, { "epoch": 0.15917628422720073, "grad_norm": 0.7258449526309476, "learning_rate": 9.569097536559486e-06, "loss": 0.4139, "step": 3517 }, { "epoch": 0.1592215433355963, "grad_norm": 0.7239432327363277, "learning_rate": 9.568799833179238e-06, "loss": 0.4041, "step": 3518 }, { "epoch": 0.15926680244399186, "grad_norm": 0.56606812255434, "learning_rate": 9.568502031629513e-06, "loss": 0.5377, "step": 3519 }, { "epoch": 0.15931206155238742, "grad_norm": 0.6611254951118147, "learning_rate": 9.568204131916712e-06, "loss": 0.3549, "step": 3520 }, { "epoch": 0.15935732066078298, "grad_norm": 0.7134987779635908, "learning_rate": 9.567906134047233e-06, "loss": 0.413, "step": 3521 }, { "epoch": 0.15940257976917854, "grad_norm": 0.6931576955652552, "learning_rate": 9.567608038027481e-06, "loss": 0.4342, "step": 3522 }, { "epoch": 0.1594478388775741, "grad_norm": 0.7093232644238057, "learning_rate": 9.567309843863862e-06, "loss": 0.3926, "step": 3523 }, { "epoch": 0.15949309798596967, "grad_norm": 0.7141010617990805, "learning_rate": 9.56701155156278e-06, "loss": 0.476, "step": 3524 }, { "epoch": 0.15953835709436523, "grad_norm": 0.6637371625249113, "learning_rate": 9.566713161130646e-06, "loss": 0.4099, "step": 3525 }, { "epoch": 0.1595836162027608, "grad_norm": 0.7357520258636889, "learning_rate": 9.566414672573873e-06, "loss": 0.391, "step": 3526 }, { "epoch": 0.15962887531115638, "grad_norm": 0.7564330883531917, "learning_rate": 9.566116085898872e-06, "loss": 0.4638, "step": 3527 }, { "epoch": 0.15967413441955194, "grad_norm": 0.49859478913625926, "learning_rate": 9.565817401112061e-06, "loss": 0.5584, "step": 3528 }, { "epoch": 0.1597193935279475, "grad_norm": 0.3963057852996049, "learning_rate": 9.565518618219857e-06, "loss": 0.4982, "step": 3529 }, { "epoch": 0.15976465263634307, "grad_norm": 0.7049571051495215, "learning_rate": 9.56521973722868e-06, "loss": 0.4088, "step": 3530 }, { "epoch": 0.15980991174473863, "grad_norm": 0.6898393168730423, "learning_rate": 9.564920758144951e-06, "loss": 0.3681, "step": 3531 }, { "epoch": 0.1598551708531342, "grad_norm": 0.8196229163508992, "learning_rate": 9.564621680975095e-06, "loss": 0.3881, "step": 3532 }, { "epoch": 0.15990042996152976, "grad_norm": 0.7760658972671267, "learning_rate": 9.564322505725539e-06, "loss": 0.4025, "step": 3533 }, { "epoch": 0.15994568906992532, "grad_norm": 0.7553452613369879, "learning_rate": 9.56402323240271e-06, "loss": 0.4105, "step": 3534 }, { "epoch": 0.15999094817832088, "grad_norm": 0.7444146630985331, "learning_rate": 9.563723861013039e-06, "loss": 0.3949, "step": 3535 }, { "epoch": 0.16003620728671644, "grad_norm": 0.634783786367562, "learning_rate": 9.563424391562958e-06, "loss": 0.375, "step": 3536 }, { "epoch": 0.160081466395112, "grad_norm": 0.6960440889965599, "learning_rate": 9.563124824058905e-06, "loss": 0.4077, "step": 3537 }, { "epoch": 0.1601267255035076, "grad_norm": 0.6738085708575341, "learning_rate": 9.562825158507311e-06, "loss": 0.4072, "step": 3538 }, { "epoch": 0.16017198461190316, "grad_norm": 0.730674294084897, "learning_rate": 9.562525394914621e-06, "loss": 0.4379, "step": 3539 }, { "epoch": 0.16021724372029872, "grad_norm": 0.8099966068598785, "learning_rate": 9.562225533287271e-06, "loss": 0.4621, "step": 3540 }, { "epoch": 0.16026250282869428, "grad_norm": 0.8510329125183358, "learning_rate": 9.561925573631706e-06, "loss": 0.5253, "step": 3541 }, { "epoch": 0.16030776193708984, "grad_norm": 0.7692233573042879, "learning_rate": 9.561625515954372e-06, "loss": 0.3987, "step": 3542 }, { "epoch": 0.1603530210454854, "grad_norm": 0.7289333091370424, "learning_rate": 9.561325360261714e-06, "loss": 0.4236, "step": 3543 }, { "epoch": 0.16039828015388097, "grad_norm": 0.6737579331807066, "learning_rate": 9.561025106560184e-06, "loss": 0.39, "step": 3544 }, { "epoch": 0.16044353926227653, "grad_norm": 1.0999296662435172, "learning_rate": 9.560724754856234e-06, "loss": 0.5043, "step": 3545 }, { "epoch": 0.1604887983706721, "grad_norm": 0.8174191146577231, "learning_rate": 9.560424305156314e-06, "loss": 0.4529, "step": 3546 }, { "epoch": 0.16053405747906765, "grad_norm": 0.7134271369346344, "learning_rate": 9.560123757466885e-06, "loss": 0.4264, "step": 3547 }, { "epoch": 0.16057931658746322, "grad_norm": 0.7892566606627814, "learning_rate": 9.5598231117944e-06, "loss": 0.4666, "step": 3548 }, { "epoch": 0.16062457569585878, "grad_norm": 0.6618307576773829, "learning_rate": 9.559522368145319e-06, "loss": 0.4007, "step": 3549 }, { "epoch": 0.16066983480425437, "grad_norm": 0.5561735482323752, "learning_rate": 9.55922152652611e-06, "loss": 0.5258, "step": 3550 }, { "epoch": 0.16071509391264993, "grad_norm": 0.45960926418367365, "learning_rate": 9.55892058694323e-06, "loss": 0.5249, "step": 3551 }, { "epoch": 0.1607603530210455, "grad_norm": 1.039650118403853, "learning_rate": 9.558619549403148e-06, "loss": 0.3931, "step": 3552 }, { "epoch": 0.16080561212944106, "grad_norm": 0.7145501228608365, "learning_rate": 9.558318413912333e-06, "loss": 0.42, "step": 3553 }, { "epoch": 0.16085087123783662, "grad_norm": 0.7942813553983717, "learning_rate": 9.558017180477256e-06, "loss": 0.4074, "step": 3554 }, { "epoch": 0.16089613034623218, "grad_norm": 1.2037104862401944, "learning_rate": 9.557715849104388e-06, "loss": 0.3903, "step": 3555 }, { "epoch": 0.16094138945462774, "grad_norm": 0.6698408169445956, "learning_rate": 9.557414419800204e-06, "loss": 0.3849, "step": 3556 }, { "epoch": 0.1609866485630233, "grad_norm": 0.6827869819031588, "learning_rate": 9.557112892571182e-06, "loss": 0.4154, "step": 3557 }, { "epoch": 0.16103190767141887, "grad_norm": 0.7589222223962308, "learning_rate": 9.5568112674238e-06, "loss": 0.4312, "step": 3558 }, { "epoch": 0.16107716677981443, "grad_norm": 0.7177871268316959, "learning_rate": 9.556509544364538e-06, "loss": 0.4274, "step": 3559 }, { "epoch": 0.16112242588821, "grad_norm": 0.8385093075093688, "learning_rate": 9.556207723399882e-06, "loss": 0.4941, "step": 3560 }, { "epoch": 0.16116768499660555, "grad_norm": 0.7822093267945871, "learning_rate": 9.555905804536315e-06, "loss": 0.4128, "step": 3561 }, { "epoch": 0.16121294410500114, "grad_norm": 0.8118716712352124, "learning_rate": 9.555603787780321e-06, "loss": 0.4083, "step": 3562 }, { "epoch": 0.1612582032133967, "grad_norm": 0.709902451425543, "learning_rate": 9.555301673138397e-06, "loss": 0.4207, "step": 3563 }, { "epoch": 0.16130346232179227, "grad_norm": 0.4298801720977222, "learning_rate": 9.55499946061703e-06, "loss": 0.5039, "step": 3564 }, { "epoch": 0.16134872143018783, "grad_norm": 0.7351410622515675, "learning_rate": 9.554697150222713e-06, "loss": 0.4508, "step": 3565 }, { "epoch": 0.1613939805385834, "grad_norm": 0.4190010950162947, "learning_rate": 9.554394741961944e-06, "loss": 0.5276, "step": 3566 }, { "epoch": 0.16143923964697895, "grad_norm": 0.7005793374248309, "learning_rate": 9.554092235841219e-06, "loss": 0.4163, "step": 3567 }, { "epoch": 0.16148449875537452, "grad_norm": 0.7101329404583272, "learning_rate": 9.553789631867039e-06, "loss": 0.4009, "step": 3568 }, { "epoch": 0.16152975786377008, "grad_norm": 0.671422559631563, "learning_rate": 9.553486930045906e-06, "loss": 0.4007, "step": 3569 }, { "epoch": 0.16157501697216564, "grad_norm": 0.8008267532909126, "learning_rate": 9.553184130384324e-06, "loss": 0.4549, "step": 3570 }, { "epoch": 0.1616202760805612, "grad_norm": 0.7376105220646921, "learning_rate": 9.5528812328888e-06, "loss": 0.4455, "step": 3571 }, { "epoch": 0.16166553518895677, "grad_norm": 0.6704041835203152, "learning_rate": 9.552578237565839e-06, "loss": 0.4112, "step": 3572 }, { "epoch": 0.16171079429735236, "grad_norm": 0.6929611158994938, "learning_rate": 9.552275144421953e-06, "loss": 0.4417, "step": 3573 }, { "epoch": 0.16175605340574792, "grad_norm": 0.6915671452152358, "learning_rate": 9.551971953463659e-06, "loss": 0.4215, "step": 3574 }, { "epoch": 0.16180131251414348, "grad_norm": 1.136766335665436, "learning_rate": 9.551668664697467e-06, "loss": 0.5025, "step": 3575 }, { "epoch": 0.16184657162253904, "grad_norm": 0.6674529201333661, "learning_rate": 9.551365278129894e-06, "loss": 0.3585, "step": 3576 }, { "epoch": 0.1618918307309346, "grad_norm": 0.8954324093302272, "learning_rate": 9.55106179376746e-06, "loss": 0.4171, "step": 3577 }, { "epoch": 0.16193708983933017, "grad_norm": 0.78892785286471, "learning_rate": 9.550758211616684e-06, "loss": 0.4321, "step": 3578 }, { "epoch": 0.16198234894772573, "grad_norm": 0.6293828498610229, "learning_rate": 9.550454531684092e-06, "loss": 0.3647, "step": 3579 }, { "epoch": 0.1620276080561213, "grad_norm": 0.7028121697581299, "learning_rate": 9.550150753976209e-06, "loss": 0.4225, "step": 3580 }, { "epoch": 0.16207286716451685, "grad_norm": 0.6885948868606172, "learning_rate": 9.54984687849956e-06, "loss": 0.4268, "step": 3581 }, { "epoch": 0.16211812627291242, "grad_norm": 0.6863217671163234, "learning_rate": 9.549542905260674e-06, "loss": 0.4105, "step": 3582 }, { "epoch": 0.16216338538130798, "grad_norm": 0.7042879685578601, "learning_rate": 9.549238834266086e-06, "loss": 0.3946, "step": 3583 }, { "epoch": 0.16220864448970354, "grad_norm": 0.6589248712522245, "learning_rate": 9.548934665522325e-06, "loss": 0.3693, "step": 3584 }, { "epoch": 0.16225390359809913, "grad_norm": 0.7011875986352972, "learning_rate": 9.548630399035931e-06, "loss": 0.3809, "step": 3585 }, { "epoch": 0.1622991627064947, "grad_norm": 0.7806831482841735, "learning_rate": 9.54832603481344e-06, "loss": 0.4031, "step": 3586 }, { "epoch": 0.16234442181489026, "grad_norm": 0.7346212570439461, "learning_rate": 9.54802157286139e-06, "loss": 0.4171, "step": 3587 }, { "epoch": 0.16238968092328582, "grad_norm": 0.6962142519779313, "learning_rate": 9.547717013186326e-06, "loss": 0.4172, "step": 3588 }, { "epoch": 0.16243494003168138, "grad_norm": 0.6451915080977847, "learning_rate": 9.547412355794789e-06, "loss": 0.4122, "step": 3589 }, { "epoch": 0.16248019914007694, "grad_norm": 0.6863066177902849, "learning_rate": 9.547107600693328e-06, "loss": 0.4314, "step": 3590 }, { "epoch": 0.1625254582484725, "grad_norm": 0.6418986780368956, "learning_rate": 9.54680274788849e-06, "loss": 0.4208, "step": 3591 }, { "epoch": 0.16257071735686807, "grad_norm": 0.9359545702071064, "learning_rate": 9.546497797386824e-06, "loss": 0.552, "step": 3592 }, { "epoch": 0.16261597646526363, "grad_norm": 0.7327979098491275, "learning_rate": 9.546192749194885e-06, "loss": 0.4228, "step": 3593 }, { "epoch": 0.1626612355736592, "grad_norm": 0.8076526403856159, "learning_rate": 9.545887603319228e-06, "loss": 0.4029, "step": 3594 }, { "epoch": 0.16270649468205475, "grad_norm": 0.7663084724651145, "learning_rate": 9.545582359766405e-06, "loss": 0.3929, "step": 3595 }, { "epoch": 0.16275175379045032, "grad_norm": 0.6985073830450215, "learning_rate": 9.54527701854298e-06, "loss": 0.3769, "step": 3596 }, { "epoch": 0.1627970128988459, "grad_norm": 0.7097929957097808, "learning_rate": 9.544971579655512e-06, "loss": 0.4669, "step": 3597 }, { "epoch": 0.16284227200724147, "grad_norm": 0.6590480282146434, "learning_rate": 9.544666043110562e-06, "loss": 0.4041, "step": 3598 }, { "epoch": 0.16288753111563703, "grad_norm": 0.6845449427550792, "learning_rate": 9.544360408914696e-06, "loss": 0.4011, "step": 3599 }, { "epoch": 0.1629327902240326, "grad_norm": 0.7318456776258486, "learning_rate": 9.544054677074483e-06, "loss": 0.4056, "step": 3600 }, { "epoch": 0.16297804933242815, "grad_norm": 0.7210936459362473, "learning_rate": 9.543748847596491e-06, "loss": 0.4206, "step": 3601 }, { "epoch": 0.16302330844082372, "grad_norm": 0.7099193412366627, "learning_rate": 9.543442920487291e-06, "loss": 0.3923, "step": 3602 }, { "epoch": 0.16306856754921928, "grad_norm": 0.7374127538432773, "learning_rate": 9.543136895753458e-06, "loss": 0.4666, "step": 3603 }, { "epoch": 0.16311382665761484, "grad_norm": 0.6380376806590465, "learning_rate": 9.542830773401564e-06, "loss": 0.3662, "step": 3604 }, { "epoch": 0.1631590857660104, "grad_norm": 0.7137586537082149, "learning_rate": 9.54252455343819e-06, "loss": 0.4414, "step": 3605 }, { "epoch": 0.16320434487440597, "grad_norm": 0.9055787596951604, "learning_rate": 9.542218235869915e-06, "loss": 0.4986, "step": 3606 }, { "epoch": 0.16324960398280153, "grad_norm": 0.8021776941432398, "learning_rate": 9.54191182070332e-06, "loss": 0.4135, "step": 3607 }, { "epoch": 0.1632948630911971, "grad_norm": 0.5627123166001644, "learning_rate": 9.54160530794499e-06, "loss": 0.5387, "step": 3608 }, { "epoch": 0.16334012219959268, "grad_norm": 0.7214389595222919, "learning_rate": 9.541298697601508e-06, "loss": 0.4124, "step": 3609 }, { "epoch": 0.16338538130798824, "grad_norm": 0.7582642791339812, "learning_rate": 9.540991989679468e-06, "loss": 0.4447, "step": 3610 }, { "epoch": 0.1634306404163838, "grad_norm": 0.7113602407039508, "learning_rate": 9.540685184185455e-06, "loss": 0.4116, "step": 3611 }, { "epoch": 0.16347589952477937, "grad_norm": 0.7172389497873176, "learning_rate": 9.540378281126064e-06, "loss": 0.4665, "step": 3612 }, { "epoch": 0.16352115863317493, "grad_norm": 0.7156189181787637, "learning_rate": 9.540071280507887e-06, "loss": 0.4648, "step": 3613 }, { "epoch": 0.1635664177415705, "grad_norm": 0.7428520280649665, "learning_rate": 9.539764182337523e-06, "loss": 0.3967, "step": 3614 }, { "epoch": 0.16361167684996605, "grad_norm": 0.6564775779238287, "learning_rate": 9.539456986621568e-06, "loss": 0.4227, "step": 3615 }, { "epoch": 0.16365693595836162, "grad_norm": 0.5938727255118851, "learning_rate": 9.539149693366628e-06, "loss": 0.4373, "step": 3616 }, { "epoch": 0.16370219506675718, "grad_norm": 0.8369196659255341, "learning_rate": 9.538842302579299e-06, "loss": 0.3769, "step": 3617 }, { "epoch": 0.16374745417515274, "grad_norm": 1.165080626912052, "learning_rate": 9.538534814266187e-06, "loss": 0.5097, "step": 3618 }, { "epoch": 0.1637927132835483, "grad_norm": 0.8593480539402252, "learning_rate": 9.538227228433905e-06, "loss": 0.4292, "step": 3619 }, { "epoch": 0.1638379723919439, "grad_norm": 0.7962035527894968, "learning_rate": 9.537919545089057e-06, "loss": 0.3933, "step": 3620 }, { "epoch": 0.16388323150033945, "grad_norm": 0.6133916320357296, "learning_rate": 9.537611764238253e-06, "loss": 0.5197, "step": 3621 }, { "epoch": 0.16392849060873502, "grad_norm": 0.45833468804418676, "learning_rate": 9.53730388588811e-06, "loss": 0.4907, "step": 3622 }, { "epoch": 0.16397374971713058, "grad_norm": 0.6799233073262908, "learning_rate": 9.536995910045241e-06, "loss": 0.3854, "step": 3623 }, { "epoch": 0.16401900882552614, "grad_norm": 0.7132031385597974, "learning_rate": 9.536687836716265e-06, "loss": 0.4124, "step": 3624 }, { "epoch": 0.1640642679339217, "grad_norm": 1.1479222305246017, "learning_rate": 9.536379665907801e-06, "loss": 0.4351, "step": 3625 }, { "epoch": 0.16410952704231727, "grad_norm": 0.8723725440454708, "learning_rate": 9.53607139762647e-06, "loss": 0.5495, "step": 3626 }, { "epoch": 0.16415478615071283, "grad_norm": 0.7988608216779318, "learning_rate": 9.535763031878895e-06, "loss": 0.4412, "step": 3627 }, { "epoch": 0.1642000452591084, "grad_norm": 0.6804044503637056, "learning_rate": 9.535454568671705e-06, "loss": 0.4246, "step": 3628 }, { "epoch": 0.16424530436750395, "grad_norm": 0.7159799420828592, "learning_rate": 9.535146008011524e-06, "loss": 0.4234, "step": 3629 }, { "epoch": 0.16429056347589951, "grad_norm": 0.8517435555100842, "learning_rate": 9.534837349904986e-06, "loss": 0.3846, "step": 3630 }, { "epoch": 0.16433582258429508, "grad_norm": 0.7481983359140346, "learning_rate": 9.534528594358718e-06, "loss": 0.4253, "step": 3631 }, { "epoch": 0.16438108169269067, "grad_norm": 0.746705182354197, "learning_rate": 9.53421974137936e-06, "loss": 0.4291, "step": 3632 }, { "epoch": 0.16442634080108623, "grad_norm": 0.656268854264488, "learning_rate": 9.533910790973545e-06, "loss": 0.42, "step": 3633 }, { "epoch": 0.1644715999094818, "grad_norm": 0.715318601638388, "learning_rate": 9.533601743147911e-06, "loss": 0.454, "step": 3634 }, { "epoch": 0.16451685901787735, "grad_norm": 0.6353354533937493, "learning_rate": 9.533292597909101e-06, "loss": 0.4316, "step": 3635 }, { "epoch": 0.16456211812627292, "grad_norm": 0.6869695424669242, "learning_rate": 9.532983355263753e-06, "loss": 0.4179, "step": 3636 }, { "epoch": 0.16460737723466848, "grad_norm": 0.6966617156005382, "learning_rate": 9.532674015218519e-06, "loss": 0.3999, "step": 3637 }, { "epoch": 0.16465263634306404, "grad_norm": 0.738112774923969, "learning_rate": 9.532364577780039e-06, "loss": 0.5192, "step": 3638 }, { "epoch": 0.1646978954514596, "grad_norm": 0.7197831342194553, "learning_rate": 9.532055042954964e-06, "loss": 0.4315, "step": 3639 }, { "epoch": 0.16474315455985516, "grad_norm": 0.6691610827792658, "learning_rate": 9.531745410749946e-06, "loss": 0.432, "step": 3640 }, { "epoch": 0.16478841366825073, "grad_norm": 0.7336041070909316, "learning_rate": 9.531435681171637e-06, "loss": 0.4584, "step": 3641 }, { "epoch": 0.1648336727766463, "grad_norm": 0.6534578008595666, "learning_rate": 9.531125854226692e-06, "loss": 0.4358, "step": 3642 }, { "epoch": 0.16487893188504185, "grad_norm": 0.5270481010940409, "learning_rate": 9.53081592992177e-06, "loss": 0.5232, "step": 3643 }, { "epoch": 0.16492419099343744, "grad_norm": 0.832431322687446, "learning_rate": 9.530505908263528e-06, "loss": 0.3747, "step": 3644 }, { "epoch": 0.164969450101833, "grad_norm": 0.7072464593601516, "learning_rate": 9.53019578925863e-06, "loss": 0.4118, "step": 3645 }, { "epoch": 0.16501470921022857, "grad_norm": 0.6158071480238312, "learning_rate": 9.529885572913735e-06, "loss": 0.4015, "step": 3646 }, { "epoch": 0.16505996831862413, "grad_norm": 0.7465380445759865, "learning_rate": 9.529575259235514e-06, "loss": 0.4451, "step": 3647 }, { "epoch": 0.1651052274270197, "grad_norm": 0.684587534515466, "learning_rate": 9.52926484823063e-06, "loss": 0.3958, "step": 3648 }, { "epoch": 0.16515048653541525, "grad_norm": 0.636071964727268, "learning_rate": 9.528954339905759e-06, "loss": 0.404, "step": 3649 }, { "epoch": 0.16519574564381082, "grad_norm": 0.6693368657078425, "learning_rate": 9.528643734267564e-06, "loss": 0.4247, "step": 3650 }, { "epoch": 0.16524100475220638, "grad_norm": 0.8203539046854412, "learning_rate": 9.528333031322728e-06, "loss": 0.4013, "step": 3651 }, { "epoch": 0.16528626386060194, "grad_norm": 0.6957575535631207, "learning_rate": 9.528022231077921e-06, "loss": 0.4628, "step": 3652 }, { "epoch": 0.1653315229689975, "grad_norm": 0.6883990276108745, "learning_rate": 9.527711333539821e-06, "loss": 0.4181, "step": 3653 }, { "epoch": 0.16537678207739306, "grad_norm": 0.6621880635568219, "learning_rate": 9.527400338715112e-06, "loss": 0.4216, "step": 3654 }, { "epoch": 0.16542204118578863, "grad_norm": 0.5905985298350221, "learning_rate": 9.527089246610475e-06, "loss": 0.5367, "step": 3655 }, { "epoch": 0.16546730029418422, "grad_norm": 0.66555066514175, "learning_rate": 9.526778057232595e-06, "loss": 0.4306, "step": 3656 }, { "epoch": 0.16551255940257978, "grad_norm": 0.6431260588652664, "learning_rate": 9.526466770588156e-06, "loss": 0.3738, "step": 3657 }, { "epoch": 0.16555781851097534, "grad_norm": 0.7130443154083126, "learning_rate": 9.526155386683848e-06, "loss": 0.4557, "step": 3658 }, { "epoch": 0.1656030776193709, "grad_norm": 0.6961582015719266, "learning_rate": 9.525843905526361e-06, "loss": 0.397, "step": 3659 }, { "epoch": 0.16564833672776647, "grad_norm": 0.623855766689215, "learning_rate": 9.525532327122391e-06, "loss": 0.4043, "step": 3660 }, { "epoch": 0.16569359583616203, "grad_norm": 0.4348115038481914, "learning_rate": 9.525220651478628e-06, "loss": 0.505, "step": 3661 }, { "epoch": 0.1657388549445576, "grad_norm": 0.7124951405659162, "learning_rate": 9.524908878601773e-06, "loss": 0.4289, "step": 3662 }, { "epoch": 0.16578411405295315, "grad_norm": 0.3185914241502027, "learning_rate": 9.524597008498522e-06, "loss": 0.4992, "step": 3663 }, { "epoch": 0.16582937316134871, "grad_norm": 0.7086305378925704, "learning_rate": 9.524285041175578e-06, "loss": 0.4552, "step": 3664 }, { "epoch": 0.16587463226974428, "grad_norm": 0.8279029973754335, "learning_rate": 9.523972976639645e-06, "loss": 0.3899, "step": 3665 }, { "epoch": 0.16591989137813984, "grad_norm": 0.3352919478515927, "learning_rate": 9.523660814897426e-06, "loss": 0.4951, "step": 3666 }, { "epoch": 0.16596515048653543, "grad_norm": 0.727907832487834, "learning_rate": 9.52334855595563e-06, "loss": 0.4349, "step": 3667 }, { "epoch": 0.166010409594931, "grad_norm": 0.6549909987507452, "learning_rate": 9.523036199820964e-06, "loss": 0.377, "step": 3668 }, { "epoch": 0.16605566870332655, "grad_norm": 0.626733270750146, "learning_rate": 9.522723746500144e-06, "loss": 0.4197, "step": 3669 }, { "epoch": 0.16610092781172212, "grad_norm": 0.7155877450445944, "learning_rate": 9.522411195999879e-06, "loss": 0.4233, "step": 3670 }, { "epoch": 0.16614618692011768, "grad_norm": 0.6985169309325515, "learning_rate": 9.522098548326888e-06, "loss": 0.4286, "step": 3671 }, { "epoch": 0.16619144602851324, "grad_norm": 0.7018155538709772, "learning_rate": 9.521785803487888e-06, "loss": 0.4138, "step": 3672 }, { "epoch": 0.1662367051369088, "grad_norm": 0.6868291461365117, "learning_rate": 9.5214729614896e-06, "loss": 0.3932, "step": 3673 }, { "epoch": 0.16628196424530436, "grad_norm": 0.770746088836217, "learning_rate": 9.521160022338742e-06, "loss": 0.405, "step": 3674 }, { "epoch": 0.16632722335369993, "grad_norm": 0.7083686368080435, "learning_rate": 9.520846986042043e-06, "loss": 0.3715, "step": 3675 }, { "epoch": 0.1663724824620955, "grad_norm": 0.7256127301256134, "learning_rate": 9.520533852606226e-06, "loss": 0.4242, "step": 3676 }, { "epoch": 0.16641774157049105, "grad_norm": 0.7874565138391614, "learning_rate": 9.520220622038019e-06, "loss": 0.4022, "step": 3677 }, { "epoch": 0.1664630006788866, "grad_norm": 0.663211610255665, "learning_rate": 9.519907294344155e-06, "loss": 0.4053, "step": 3678 }, { "epoch": 0.1665082597872822, "grad_norm": 0.7512651019964488, "learning_rate": 9.519593869531366e-06, "loss": 0.4084, "step": 3679 }, { "epoch": 0.16655351889567777, "grad_norm": 0.6856705100857937, "learning_rate": 9.519280347606383e-06, "loss": 0.4363, "step": 3680 }, { "epoch": 0.16659877800407333, "grad_norm": 0.820504135650576, "learning_rate": 9.518966728575947e-06, "loss": 0.4354, "step": 3681 }, { "epoch": 0.1666440371124689, "grad_norm": 0.5359567082935988, "learning_rate": 9.518653012446794e-06, "loss": 0.4859, "step": 3682 }, { "epoch": 0.16668929622086445, "grad_norm": 0.42768864998571654, "learning_rate": 9.518339199225668e-06, "loss": 0.4734, "step": 3683 }, { "epoch": 0.16673455532926001, "grad_norm": 0.6977386931322633, "learning_rate": 9.518025288919307e-06, "loss": 0.4493, "step": 3684 }, { "epoch": 0.16677981443765558, "grad_norm": 0.7312921685664348, "learning_rate": 9.51771128153446e-06, "loss": 0.4551, "step": 3685 }, { "epoch": 0.16682507354605114, "grad_norm": 0.8006426810646926, "learning_rate": 9.517397177077874e-06, "loss": 0.3496, "step": 3686 }, { "epoch": 0.1668703326544467, "grad_norm": 0.6467847843733492, "learning_rate": 9.517082975556294e-06, "loss": 0.4105, "step": 3687 }, { "epoch": 0.16691559176284226, "grad_norm": 0.7434533319298859, "learning_rate": 9.516768676976476e-06, "loss": 0.3715, "step": 3688 }, { "epoch": 0.16696085087123783, "grad_norm": 0.7565063603778319, "learning_rate": 9.51645428134517e-06, "loss": 0.4101, "step": 3689 }, { "epoch": 0.1670061099796334, "grad_norm": 0.6583406605473527, "learning_rate": 9.516139788669133e-06, "loss": 0.4008, "step": 3690 }, { "epoch": 0.16705136908802898, "grad_norm": 0.9488362943271945, "learning_rate": 9.515825198955122e-06, "loss": 0.5161, "step": 3691 }, { "epoch": 0.16709662819642454, "grad_norm": 0.74207752543715, "learning_rate": 9.515510512209898e-06, "loss": 0.4167, "step": 3692 }, { "epoch": 0.1671418873048201, "grad_norm": 0.688311061847171, "learning_rate": 9.515195728440221e-06, "loss": 0.421, "step": 3693 }, { "epoch": 0.16718714641321566, "grad_norm": 0.7076735124909065, "learning_rate": 9.514880847652855e-06, "loss": 0.4215, "step": 3694 }, { "epoch": 0.16723240552161123, "grad_norm": 0.4649065122917939, "learning_rate": 9.514565869854566e-06, "loss": 0.5269, "step": 3695 }, { "epoch": 0.1672776646300068, "grad_norm": 0.37744810277788543, "learning_rate": 9.51425079505212e-06, "loss": 0.4916, "step": 3696 }, { "epoch": 0.16732292373840235, "grad_norm": 0.35435769341967316, "learning_rate": 9.513935623252292e-06, "loss": 0.4882, "step": 3697 }, { "epoch": 0.1673681828467979, "grad_norm": 0.8444424239920386, "learning_rate": 9.51362035446185e-06, "loss": 0.3775, "step": 3698 }, { "epoch": 0.16741344195519348, "grad_norm": 0.49136085657274564, "learning_rate": 9.513304988687568e-06, "loss": 0.5053, "step": 3699 }, { "epoch": 0.16745870106358904, "grad_norm": 0.48488017714940024, "learning_rate": 9.512989525936223e-06, "loss": 0.5036, "step": 3700 }, { "epoch": 0.1675039601719846, "grad_norm": 0.7566271474139192, "learning_rate": 9.512673966214597e-06, "loss": 0.4554, "step": 3701 }, { "epoch": 0.16754921928038016, "grad_norm": 0.8793616289280668, "learning_rate": 9.512358309529463e-06, "loss": 0.3939, "step": 3702 }, { "epoch": 0.16759447838877575, "grad_norm": 0.6267765901631913, "learning_rate": 9.51204255588761e-06, "loss": 0.3886, "step": 3703 }, { "epoch": 0.16763973749717132, "grad_norm": 0.7177662264806949, "learning_rate": 9.51172670529582e-06, "loss": 0.42, "step": 3704 }, { "epoch": 0.16768499660556688, "grad_norm": 0.7540731952293713, "learning_rate": 9.511410757760878e-06, "loss": 0.4347, "step": 3705 }, { "epoch": 0.16773025571396244, "grad_norm": 1.175057370829924, "learning_rate": 9.511094713289575e-06, "loss": 0.4344, "step": 3706 }, { "epoch": 0.167775514822358, "grad_norm": 0.6673807601229402, "learning_rate": 9.510778571888704e-06, "loss": 0.3923, "step": 3707 }, { "epoch": 0.16782077393075356, "grad_norm": 0.6639737685542729, "learning_rate": 9.510462333565052e-06, "loss": 0.4127, "step": 3708 }, { "epoch": 0.16786603303914913, "grad_norm": 0.6623937707899048, "learning_rate": 9.510145998325419e-06, "loss": 0.3768, "step": 3709 }, { "epoch": 0.1679112921475447, "grad_norm": 0.7189384687642437, "learning_rate": 9.509829566176601e-06, "loss": 0.4426, "step": 3710 }, { "epoch": 0.16795655125594025, "grad_norm": 0.7553610441482141, "learning_rate": 9.509513037125395e-06, "loss": 0.4116, "step": 3711 }, { "epoch": 0.1680018103643358, "grad_norm": 0.6739738444460805, "learning_rate": 9.509196411178605e-06, "loss": 0.4017, "step": 3712 }, { "epoch": 0.16804706947273138, "grad_norm": 0.8404052913139409, "learning_rate": 9.508879688343033e-06, "loss": 0.4114, "step": 3713 }, { "epoch": 0.16809232858112697, "grad_norm": 0.7005051307524329, "learning_rate": 9.508562868625484e-06, "loss": 0.4285, "step": 3714 }, { "epoch": 0.16813758768952253, "grad_norm": 0.6861061710619839, "learning_rate": 9.508245952032765e-06, "loss": 0.3782, "step": 3715 }, { "epoch": 0.1681828467979181, "grad_norm": 0.7290703589658392, "learning_rate": 9.507928938571689e-06, "loss": 0.4325, "step": 3716 }, { "epoch": 0.16822810590631365, "grad_norm": 0.6613589739454301, "learning_rate": 9.507611828249062e-06, "loss": 0.3857, "step": 3717 }, { "epoch": 0.16827336501470921, "grad_norm": 0.675217399005107, "learning_rate": 9.507294621071702e-06, "loss": 0.4095, "step": 3718 }, { "epoch": 0.16831862412310478, "grad_norm": 0.7494715356484618, "learning_rate": 9.506977317046424e-06, "loss": 0.4322, "step": 3719 }, { "epoch": 0.16836388323150034, "grad_norm": 0.6606116250864447, "learning_rate": 9.506659916180046e-06, "loss": 0.4673, "step": 3720 }, { "epoch": 0.1684091423398959, "grad_norm": 0.7163806350258065, "learning_rate": 9.506342418479388e-06, "loss": 0.4361, "step": 3721 }, { "epoch": 0.16845440144829146, "grad_norm": 0.626221536747177, "learning_rate": 9.50602482395127e-06, "loss": 0.4085, "step": 3722 }, { "epoch": 0.16849966055668703, "grad_norm": 0.7311666290265683, "learning_rate": 9.50570713260252e-06, "loss": 0.4388, "step": 3723 }, { "epoch": 0.1685449196650826, "grad_norm": 0.6904737034485492, "learning_rate": 9.50538934443996e-06, "loss": 0.3982, "step": 3724 }, { "epoch": 0.16859017877347815, "grad_norm": 0.6646357804123364, "learning_rate": 9.50507145947042e-06, "loss": 0.4169, "step": 3725 }, { "epoch": 0.16863543788187374, "grad_norm": 0.6716345872070512, "learning_rate": 9.504753477700731e-06, "loss": 0.4108, "step": 3726 }, { "epoch": 0.1686806969902693, "grad_norm": 0.6807055677303921, "learning_rate": 9.504435399137726e-06, "loss": 0.41, "step": 3727 }, { "epoch": 0.16872595609866486, "grad_norm": 0.7083670984942221, "learning_rate": 9.504117223788238e-06, "loss": 0.3943, "step": 3728 }, { "epoch": 0.16877121520706043, "grad_norm": 0.6568251151354582, "learning_rate": 9.503798951659104e-06, "loss": 0.4139, "step": 3729 }, { "epoch": 0.168816474315456, "grad_norm": 0.6810666065376982, "learning_rate": 9.503480582757163e-06, "loss": 0.4098, "step": 3730 }, { "epoch": 0.16886173342385155, "grad_norm": 0.987821729528041, "learning_rate": 9.503162117089256e-06, "loss": 0.518, "step": 3731 }, { "epoch": 0.1689069925322471, "grad_norm": 0.6956385502760074, "learning_rate": 9.502843554662225e-06, "loss": 0.4107, "step": 3732 }, { "epoch": 0.16895225164064268, "grad_norm": 0.7239870623317979, "learning_rate": 9.502524895482917e-06, "loss": 0.3878, "step": 3733 }, { "epoch": 0.16899751074903824, "grad_norm": 0.6494967848122583, "learning_rate": 9.502206139558175e-06, "loss": 0.4095, "step": 3734 }, { "epoch": 0.1690427698574338, "grad_norm": 0.7057674434549391, "learning_rate": 9.501887286894852e-06, "loss": 0.405, "step": 3735 }, { "epoch": 0.16908802896582936, "grad_norm": 0.7003729702583934, "learning_rate": 9.501568337499798e-06, "loss": 0.4078, "step": 3736 }, { "epoch": 0.16913328807422492, "grad_norm": 0.6829548547414105, "learning_rate": 9.501249291379865e-06, "loss": 0.393, "step": 3737 }, { "epoch": 0.16917854718262051, "grad_norm": 0.74769762989029, "learning_rate": 9.50093014854191e-06, "loss": 0.4413, "step": 3738 }, { "epoch": 0.16922380629101608, "grad_norm": 0.7410832274031145, "learning_rate": 9.500610908992788e-06, "loss": 0.4369, "step": 3739 }, { "epoch": 0.16926906539941164, "grad_norm": 0.9351541665911204, "learning_rate": 9.500291572739362e-06, "loss": 0.5246, "step": 3740 }, { "epoch": 0.1693143245078072, "grad_norm": 0.7186639929072621, "learning_rate": 9.49997213978849e-06, "loss": 0.5083, "step": 3741 }, { "epoch": 0.16935958361620276, "grad_norm": 0.7107391517242865, "learning_rate": 9.49965261014704e-06, "loss": 0.43, "step": 3742 }, { "epoch": 0.16940484272459833, "grad_norm": 0.6242561143083231, "learning_rate": 9.499332983821873e-06, "loss": 0.4075, "step": 3743 }, { "epoch": 0.1694501018329939, "grad_norm": 0.6358146911217676, "learning_rate": 9.49901326081986e-06, "loss": 0.4052, "step": 3744 }, { "epoch": 0.16949536094138945, "grad_norm": 0.7088530729448421, "learning_rate": 9.498693441147868e-06, "loss": 0.4071, "step": 3745 }, { "epoch": 0.169540620049785, "grad_norm": 1.0440527962553254, "learning_rate": 9.498373524812771e-06, "loss": 0.5321, "step": 3746 }, { "epoch": 0.16958587915818057, "grad_norm": 0.7200570581024353, "learning_rate": 9.498053511821445e-06, "loss": 0.3925, "step": 3747 }, { "epoch": 0.16963113826657614, "grad_norm": 0.6562807866803275, "learning_rate": 9.497733402180761e-06, "loss": 0.4265, "step": 3748 }, { "epoch": 0.16967639737497173, "grad_norm": 0.6703611503814182, "learning_rate": 9.497413195897601e-06, "loss": 0.4132, "step": 3749 }, { "epoch": 0.1697216564833673, "grad_norm": 0.6892556857897737, "learning_rate": 9.497092892978844e-06, "loss": 0.518, "step": 3750 }, { "epoch": 0.16976691559176285, "grad_norm": 0.6611965128166901, "learning_rate": 9.496772493431373e-06, "loss": 0.4001, "step": 3751 }, { "epoch": 0.1698121747001584, "grad_norm": 0.7793165941706258, "learning_rate": 9.496451997262071e-06, "loss": 0.4128, "step": 3752 }, { "epoch": 0.16985743380855398, "grad_norm": 0.47410875781920114, "learning_rate": 9.496131404477826e-06, "loss": 0.4902, "step": 3753 }, { "epoch": 0.16990269291694954, "grad_norm": 0.6863007768927061, "learning_rate": 9.495810715085526e-06, "loss": 0.393, "step": 3754 }, { "epoch": 0.1699479520253451, "grad_norm": 0.7225011596239164, "learning_rate": 9.495489929092062e-06, "loss": 0.3979, "step": 3755 }, { "epoch": 0.16999321113374066, "grad_norm": 0.7322895998589937, "learning_rate": 9.495169046504325e-06, "loss": 0.391, "step": 3756 }, { "epoch": 0.17003847024213622, "grad_norm": 0.6541370202035405, "learning_rate": 9.494848067329211e-06, "loss": 0.3956, "step": 3757 }, { "epoch": 0.1700837293505318, "grad_norm": 0.643158696186771, "learning_rate": 9.494526991573619e-06, "loss": 0.3263, "step": 3758 }, { "epoch": 0.17012898845892735, "grad_norm": 0.6596094479839292, "learning_rate": 9.494205819244444e-06, "loss": 0.4137, "step": 3759 }, { "epoch": 0.1701742475673229, "grad_norm": 0.6836657205327861, "learning_rate": 9.493884550348589e-06, "loss": 0.4406, "step": 3760 }, { "epoch": 0.1702195066757185, "grad_norm": 0.6867349047506497, "learning_rate": 9.493563184892958e-06, "loss": 0.4254, "step": 3761 }, { "epoch": 0.17026476578411406, "grad_norm": 0.6815726649750058, "learning_rate": 9.493241722884454e-06, "loss": 0.4609, "step": 3762 }, { "epoch": 0.17031002489250963, "grad_norm": 0.6885133337543684, "learning_rate": 9.492920164329985e-06, "loss": 0.3385, "step": 3763 }, { "epoch": 0.1703552840009052, "grad_norm": 1.0212362444151246, "learning_rate": 9.492598509236461e-06, "loss": 0.5036, "step": 3764 }, { "epoch": 0.17040054310930075, "grad_norm": 0.774041865443583, "learning_rate": 9.492276757610795e-06, "loss": 0.3948, "step": 3765 }, { "epoch": 0.1704458022176963, "grad_norm": 0.6134765089458971, "learning_rate": 9.491954909459895e-06, "loss": 0.4832, "step": 3766 }, { "epoch": 0.17049106132609188, "grad_norm": 0.36207745015444576, "learning_rate": 9.491632964790683e-06, "loss": 0.5457, "step": 3767 }, { "epoch": 0.17053632043448744, "grad_norm": 0.7729859301025481, "learning_rate": 9.491310923610071e-06, "loss": 0.419, "step": 3768 }, { "epoch": 0.170581579542883, "grad_norm": 0.6830124066237396, "learning_rate": 9.490988785924983e-06, "loss": 0.4085, "step": 3769 }, { "epoch": 0.17062683865127856, "grad_norm": 0.7352583131801282, "learning_rate": 9.490666551742338e-06, "loss": 0.4953, "step": 3770 }, { "epoch": 0.17067209775967412, "grad_norm": 0.6727503824450569, "learning_rate": 9.490344221069062e-06, "loss": 0.3845, "step": 3771 }, { "epoch": 0.1707173568680697, "grad_norm": 0.7477892470274518, "learning_rate": 9.490021793912079e-06, "loss": 0.4165, "step": 3772 }, { "epoch": 0.17076261597646528, "grad_norm": 0.7280098699277786, "learning_rate": 9.489699270278316e-06, "loss": 0.3909, "step": 3773 }, { "epoch": 0.17080787508486084, "grad_norm": 0.662921243277628, "learning_rate": 9.489376650174708e-06, "loss": 0.4372, "step": 3774 }, { "epoch": 0.1708531341932564, "grad_norm": 0.9470274488208026, "learning_rate": 9.489053933608182e-06, "loss": 0.5428, "step": 3775 }, { "epoch": 0.17089839330165196, "grad_norm": 0.6684584842570336, "learning_rate": 9.488731120585675e-06, "loss": 0.4145, "step": 3776 }, { "epoch": 0.17094365241004753, "grad_norm": 0.7104320385344154, "learning_rate": 9.488408211114121e-06, "loss": 0.3872, "step": 3777 }, { "epoch": 0.1709889115184431, "grad_norm": 0.6115588397093513, "learning_rate": 9.48808520520046e-06, "loss": 0.3681, "step": 3778 }, { "epoch": 0.17103417062683865, "grad_norm": 0.7587750055088698, "learning_rate": 9.487762102851631e-06, "loss": 0.4817, "step": 3779 }, { "epoch": 0.1710794297352342, "grad_norm": 0.6573454769461418, "learning_rate": 9.487438904074581e-06, "loss": 0.3939, "step": 3780 }, { "epoch": 0.17112468884362977, "grad_norm": 0.6807681638602197, "learning_rate": 9.48711560887625e-06, "loss": 0.3964, "step": 3781 }, { "epoch": 0.17116994795202534, "grad_norm": 0.6528819830811584, "learning_rate": 9.486792217263584e-06, "loss": 0.4169, "step": 3782 }, { "epoch": 0.1712152070604209, "grad_norm": 0.6456444671915634, "learning_rate": 9.486468729243533e-06, "loss": 0.3778, "step": 3783 }, { "epoch": 0.17126046616881646, "grad_norm": 0.685917141382002, "learning_rate": 9.48614514482305e-06, "loss": 0.4021, "step": 3784 }, { "epoch": 0.17130572527721205, "grad_norm": 0.7035389881359554, "learning_rate": 9.485821464009084e-06, "loss": 0.3866, "step": 3785 }, { "epoch": 0.1713509843856076, "grad_norm": 0.6293192536212908, "learning_rate": 9.485497686808594e-06, "loss": 0.5125, "step": 3786 }, { "epoch": 0.17139624349400318, "grad_norm": 0.6996237899447874, "learning_rate": 9.485173813228535e-06, "loss": 0.4342, "step": 3787 }, { "epoch": 0.17144150260239874, "grad_norm": 0.7228737991665823, "learning_rate": 9.484849843275863e-06, "loss": 0.4426, "step": 3788 }, { "epoch": 0.1714867617107943, "grad_norm": 0.6784925368979711, "learning_rate": 9.484525776957544e-06, "loss": 0.4401, "step": 3789 }, { "epoch": 0.17153202081918986, "grad_norm": 0.6955890630411466, "learning_rate": 9.484201614280539e-06, "loss": 0.4187, "step": 3790 }, { "epoch": 0.17157727992758542, "grad_norm": 0.720595862616324, "learning_rate": 9.483877355251814e-06, "loss": 0.3606, "step": 3791 }, { "epoch": 0.171622539035981, "grad_norm": 0.6916800542587127, "learning_rate": 9.483552999878335e-06, "loss": 0.4037, "step": 3792 }, { "epoch": 0.17166779814437655, "grad_norm": 0.694954500461271, "learning_rate": 9.483228548167075e-06, "loss": 0.4423, "step": 3793 }, { "epoch": 0.1717130572527721, "grad_norm": 0.3969421838470267, "learning_rate": 9.482904000124998e-06, "loss": 0.5163, "step": 3794 }, { "epoch": 0.17175831636116767, "grad_norm": 0.33452167181396353, "learning_rate": 9.482579355759085e-06, "loss": 0.5129, "step": 3795 }, { "epoch": 0.17180357546956326, "grad_norm": 0.6967667263051072, "learning_rate": 9.482254615076307e-06, "loss": 0.455, "step": 3796 }, { "epoch": 0.17184883457795883, "grad_norm": 0.3339040070467812, "learning_rate": 9.481929778083646e-06, "loss": 0.5311, "step": 3797 }, { "epoch": 0.1718940936863544, "grad_norm": 0.36808453193924556, "learning_rate": 9.481604844788078e-06, "loss": 0.5197, "step": 3798 }, { "epoch": 0.17193935279474995, "grad_norm": 0.35561700516571404, "learning_rate": 9.481279815196587e-06, "loss": 0.4952, "step": 3799 }, { "epoch": 0.1719846119031455, "grad_norm": 1.0893008139643745, "learning_rate": 9.480954689316155e-06, "loss": 0.4168, "step": 3800 }, { "epoch": 0.17202987101154107, "grad_norm": 0.4053512638244079, "learning_rate": 9.480629467153768e-06, "loss": 0.5244, "step": 3801 }, { "epoch": 0.17207513011993664, "grad_norm": 0.40523098581740213, "learning_rate": 9.480304148716418e-06, "loss": 0.4998, "step": 3802 }, { "epoch": 0.1721203892283322, "grad_norm": 0.8014392539434086, "learning_rate": 9.479978734011089e-06, "loss": 0.4028, "step": 3803 }, { "epoch": 0.17216564833672776, "grad_norm": 0.7256290606944289, "learning_rate": 9.479653223044776e-06, "loss": 0.4377, "step": 3804 }, { "epoch": 0.17221090744512332, "grad_norm": 0.6517262142803637, "learning_rate": 9.479327615824476e-06, "loss": 0.359, "step": 3805 }, { "epoch": 0.17225616655351889, "grad_norm": 0.7758678317801353, "learning_rate": 9.479001912357181e-06, "loss": 0.4047, "step": 3806 }, { "epoch": 0.17230142566191445, "grad_norm": 0.6734929141109003, "learning_rate": 9.478676112649892e-06, "loss": 0.4003, "step": 3807 }, { "epoch": 0.17234668477031004, "grad_norm": 0.6460118289975646, "learning_rate": 9.478350216709609e-06, "loss": 0.4209, "step": 3808 }, { "epoch": 0.1723919438787056, "grad_norm": 0.7400410273732939, "learning_rate": 9.478024224543332e-06, "loss": 0.4035, "step": 3809 }, { "epoch": 0.17243720298710116, "grad_norm": 0.6620153485178493, "learning_rate": 9.477698136158068e-06, "loss": 0.4006, "step": 3810 }, { "epoch": 0.17248246209549672, "grad_norm": 0.674849489646353, "learning_rate": 9.477371951560825e-06, "loss": 0.3948, "step": 3811 }, { "epoch": 0.1725277212038923, "grad_norm": 0.6830113481329078, "learning_rate": 9.477045670758609e-06, "loss": 0.3879, "step": 3812 }, { "epoch": 0.17257298031228785, "grad_norm": 0.6779094831298704, "learning_rate": 9.476719293758431e-06, "loss": 0.3828, "step": 3813 }, { "epoch": 0.1726182394206834, "grad_norm": 0.66722862120204, "learning_rate": 9.476392820567306e-06, "loss": 0.399, "step": 3814 }, { "epoch": 0.17266349852907897, "grad_norm": 0.6684781940195049, "learning_rate": 9.476066251192248e-06, "loss": 0.3661, "step": 3815 }, { "epoch": 0.17270875763747454, "grad_norm": 0.49516822571517094, "learning_rate": 9.475739585640272e-06, "loss": 0.4783, "step": 3816 }, { "epoch": 0.1727540167458701, "grad_norm": 0.624620814684528, "learning_rate": 9.475412823918398e-06, "loss": 0.3734, "step": 3817 }, { "epoch": 0.17279927585426566, "grad_norm": 0.3758624282498899, "learning_rate": 9.475085966033649e-06, "loss": 0.4856, "step": 3818 }, { "epoch": 0.17284453496266122, "grad_norm": 0.7495092258007775, "learning_rate": 9.474759011993045e-06, "loss": 0.4685, "step": 3819 }, { "epoch": 0.1728897940710568, "grad_norm": 0.7028451617869309, "learning_rate": 9.474431961803615e-06, "loss": 0.4345, "step": 3820 }, { "epoch": 0.17293505317945237, "grad_norm": 0.6838612365680022, "learning_rate": 9.474104815472382e-06, "loss": 0.3717, "step": 3821 }, { "epoch": 0.17298031228784794, "grad_norm": 0.6690480173586114, "learning_rate": 9.47377757300638e-06, "loss": 0.4126, "step": 3822 }, { "epoch": 0.1730255713962435, "grad_norm": 0.6459824004506101, "learning_rate": 9.473450234412638e-06, "loss": 0.3475, "step": 3823 }, { "epoch": 0.17307083050463906, "grad_norm": 0.6605687360932971, "learning_rate": 9.473122799698189e-06, "loss": 0.4021, "step": 3824 }, { "epoch": 0.17311608961303462, "grad_norm": 0.7336982972593437, "learning_rate": 9.472795268870068e-06, "loss": 0.3697, "step": 3825 }, { "epoch": 0.17316134872143019, "grad_norm": 0.6807569738448235, "learning_rate": 9.472467641935314e-06, "loss": 0.4196, "step": 3826 }, { "epoch": 0.17320660782982575, "grad_norm": 0.6598634075011642, "learning_rate": 9.472139918900969e-06, "loss": 0.5147, "step": 3827 }, { "epoch": 0.1732518669382213, "grad_norm": 0.6445631249211847, "learning_rate": 9.47181209977407e-06, "loss": 0.4068, "step": 3828 }, { "epoch": 0.17329712604661687, "grad_norm": 0.4750175521736982, "learning_rate": 9.471484184561664e-06, "loss": 0.5171, "step": 3829 }, { "epoch": 0.17334238515501244, "grad_norm": 0.3584003360435236, "learning_rate": 9.471156173270796e-06, "loss": 0.4917, "step": 3830 }, { "epoch": 0.173387644263408, "grad_norm": 0.7073047056713918, "learning_rate": 9.470828065908512e-06, "loss": 0.3914, "step": 3831 }, { "epoch": 0.1734329033718036, "grad_norm": 0.7026671316830553, "learning_rate": 9.470499862481867e-06, "loss": 0.4363, "step": 3832 }, { "epoch": 0.17347816248019915, "grad_norm": 0.6446855918509418, "learning_rate": 9.470171562997908e-06, "loss": 0.3664, "step": 3833 }, { "epoch": 0.1735234215885947, "grad_norm": 0.6653694374505091, "learning_rate": 9.469843167463692e-06, "loss": 0.3864, "step": 3834 }, { "epoch": 0.17356868069699027, "grad_norm": 0.6814010028261566, "learning_rate": 9.469514675886276e-06, "loss": 0.399, "step": 3835 }, { "epoch": 0.17361393980538584, "grad_norm": 0.7220147662032761, "learning_rate": 9.469186088272714e-06, "loss": 0.3474, "step": 3836 }, { "epoch": 0.1736591989137814, "grad_norm": 0.6498031930000514, "learning_rate": 9.468857404630069e-06, "loss": 0.4211, "step": 3837 }, { "epoch": 0.17370445802217696, "grad_norm": 0.6605078113760698, "learning_rate": 9.468528624965406e-06, "loss": 0.4029, "step": 3838 }, { "epoch": 0.17374971713057252, "grad_norm": 0.686696816542692, "learning_rate": 9.468199749285785e-06, "loss": 0.3802, "step": 3839 }, { "epoch": 0.17379497623896809, "grad_norm": 0.6838184982820498, "learning_rate": 9.467870777598274e-06, "loss": 0.4076, "step": 3840 }, { "epoch": 0.17384023534736365, "grad_norm": 1.603580102321151, "learning_rate": 9.467541709909942e-06, "loss": 0.5383, "step": 3841 }, { "epoch": 0.1738854944557592, "grad_norm": 0.7090022793113754, "learning_rate": 9.46721254622786e-06, "loss": 0.4162, "step": 3842 }, { "epoch": 0.1739307535641548, "grad_norm": 0.6742737656798072, "learning_rate": 9.466883286559102e-06, "loss": 0.4254, "step": 3843 }, { "epoch": 0.17397601267255036, "grad_norm": 0.7230508206818501, "learning_rate": 9.46655393091074e-06, "loss": 0.3856, "step": 3844 }, { "epoch": 0.17402127178094592, "grad_norm": 0.812199433596557, "learning_rate": 9.466224479289851e-06, "loss": 0.4029, "step": 3845 }, { "epoch": 0.1740665308893415, "grad_norm": 0.706257234178393, "learning_rate": 9.465894931703517e-06, "loss": 0.3809, "step": 3846 }, { "epoch": 0.17411178999773705, "grad_norm": 0.7939136340507305, "learning_rate": 9.465565288158815e-06, "loss": 0.3825, "step": 3847 }, { "epoch": 0.1741570491061326, "grad_norm": 0.7038949961513123, "learning_rate": 9.46523554866283e-06, "loss": 0.3804, "step": 3848 }, { "epoch": 0.17420230821452817, "grad_norm": 0.6547818451610412, "learning_rate": 9.464905713222648e-06, "loss": 0.3839, "step": 3849 }, { "epoch": 0.17424756732292374, "grad_norm": 0.7689604401981761, "learning_rate": 9.464575781845355e-06, "loss": 0.3985, "step": 3850 }, { "epoch": 0.1742928264313193, "grad_norm": 0.6932882122137568, "learning_rate": 9.46424575453804e-06, "loss": 0.437, "step": 3851 }, { "epoch": 0.17433808553971486, "grad_norm": 0.6653382774110156, "learning_rate": 9.463915631307795e-06, "loss": 0.3566, "step": 3852 }, { "epoch": 0.17438334464811042, "grad_norm": 0.7126119345383991, "learning_rate": 9.463585412161712e-06, "loss": 0.4094, "step": 3853 }, { "epoch": 0.17442860375650598, "grad_norm": 0.7299262826317278, "learning_rate": 9.463255097106888e-06, "loss": 0.4516, "step": 3854 }, { "epoch": 0.17447386286490157, "grad_norm": 0.6814962311461914, "learning_rate": 9.462924686150419e-06, "loss": 0.4126, "step": 3855 }, { "epoch": 0.17451912197329714, "grad_norm": 0.6083842476390211, "learning_rate": 9.462594179299408e-06, "loss": 0.3564, "step": 3856 }, { "epoch": 0.1745643810816927, "grad_norm": 0.6648655332817587, "learning_rate": 9.462263576560951e-06, "loss": 0.3934, "step": 3857 }, { "epoch": 0.17460964019008826, "grad_norm": 0.6540476263210379, "learning_rate": 9.461932877942154e-06, "loss": 0.4531, "step": 3858 }, { "epoch": 0.17465489929848382, "grad_norm": 0.6841250761641118, "learning_rate": 9.461602083450126e-06, "loss": 0.4127, "step": 3859 }, { "epoch": 0.17470015840687939, "grad_norm": 0.6339962178288016, "learning_rate": 9.461271193091971e-06, "loss": 0.4099, "step": 3860 }, { "epoch": 0.17474541751527495, "grad_norm": 0.7159915950097685, "learning_rate": 9.4609402068748e-06, "loss": 0.4309, "step": 3861 }, { "epoch": 0.1747906766236705, "grad_norm": 0.7058571671469768, "learning_rate": 9.460609124805724e-06, "loss": 0.4109, "step": 3862 }, { "epoch": 0.17483593573206607, "grad_norm": 0.6230601643244665, "learning_rate": 9.460277946891859e-06, "loss": 0.3917, "step": 3863 }, { "epoch": 0.17488119484046163, "grad_norm": 0.6702894866068349, "learning_rate": 9.459946673140317e-06, "loss": 0.3789, "step": 3864 }, { "epoch": 0.1749264539488572, "grad_norm": 0.6890944216862027, "learning_rate": 9.45961530355822e-06, "loss": 0.4154, "step": 3865 }, { "epoch": 0.17497171305725276, "grad_norm": 1.3068604598743, "learning_rate": 9.459283838152686e-06, "loss": 0.5401, "step": 3866 }, { "epoch": 0.17501697216564835, "grad_norm": 1.1368248257270164, "learning_rate": 9.45895227693084e-06, "loss": 0.518, "step": 3867 }, { "epoch": 0.1750622312740439, "grad_norm": 0.7575971192564259, "learning_rate": 9.458620619899803e-06, "loss": 0.4324, "step": 3868 }, { "epoch": 0.17510749038243947, "grad_norm": 0.7079932207926797, "learning_rate": 9.458288867066702e-06, "loss": 0.4319, "step": 3869 }, { "epoch": 0.17515274949083504, "grad_norm": 0.7920654492018414, "learning_rate": 9.457957018438668e-06, "loss": 0.5161, "step": 3870 }, { "epoch": 0.1751980085992306, "grad_norm": 0.7178653393777417, "learning_rate": 9.457625074022827e-06, "loss": 0.4424, "step": 3871 }, { "epoch": 0.17524326770762616, "grad_norm": 0.6877880291711488, "learning_rate": 9.457293033826314e-06, "loss": 0.4109, "step": 3872 }, { "epoch": 0.17528852681602172, "grad_norm": 0.6855375864869679, "learning_rate": 9.456960897856264e-06, "loss": 0.3978, "step": 3873 }, { "epoch": 0.17533378592441728, "grad_norm": 0.6933572812811226, "learning_rate": 9.456628666119812e-06, "loss": 0.4077, "step": 3874 }, { "epoch": 0.17537904503281285, "grad_norm": 1.5072854612067466, "learning_rate": 9.456296338624098e-06, "loss": 0.5352, "step": 3875 }, { "epoch": 0.1754243041412084, "grad_norm": 0.6863337744350352, "learning_rate": 9.455963915376262e-06, "loss": 0.4084, "step": 3876 }, { "epoch": 0.17546956324960397, "grad_norm": 0.8083693280407686, "learning_rate": 9.455631396383446e-06, "loss": 0.3907, "step": 3877 }, { "epoch": 0.17551482235799953, "grad_norm": 1.1066296499903405, "learning_rate": 9.455298781652797e-06, "loss": 0.5165, "step": 3878 }, { "epoch": 0.17556008146639512, "grad_norm": 0.8388078483952227, "learning_rate": 9.454966071191461e-06, "loss": 0.4957, "step": 3879 }, { "epoch": 0.17560534057479069, "grad_norm": 0.7459585566669298, "learning_rate": 9.454633265006585e-06, "loss": 0.4132, "step": 3880 }, { "epoch": 0.17565059968318625, "grad_norm": 0.7000355787238284, "learning_rate": 9.454300363105323e-06, "loss": 0.4443, "step": 3881 }, { "epoch": 0.1756958587915818, "grad_norm": 0.6348413063134372, "learning_rate": 9.453967365494824e-06, "loss": 0.4895, "step": 3882 }, { "epoch": 0.17574111789997737, "grad_norm": 0.7044484596842852, "learning_rate": 9.453634272182249e-06, "loss": 0.4248, "step": 3883 }, { "epoch": 0.17578637700837293, "grad_norm": 0.7131839130118517, "learning_rate": 9.45330108317475e-06, "loss": 0.4257, "step": 3884 }, { "epoch": 0.1758316361167685, "grad_norm": 0.6633999287098569, "learning_rate": 9.45296779847949e-06, "loss": 0.4329, "step": 3885 }, { "epoch": 0.17587689522516406, "grad_norm": 0.6678411587728268, "learning_rate": 9.452634418103626e-06, "loss": 0.4161, "step": 3886 }, { "epoch": 0.17592215433355962, "grad_norm": 0.6953785685795281, "learning_rate": 9.452300942054324e-06, "loss": 0.4185, "step": 3887 }, { "epoch": 0.17596741344195518, "grad_norm": 1.179121220652229, "learning_rate": 9.451967370338747e-06, "loss": 0.5453, "step": 3888 }, { "epoch": 0.17601267255035075, "grad_norm": 0.7069774255329982, "learning_rate": 9.451633702964067e-06, "loss": 0.4168, "step": 3889 }, { "epoch": 0.17605793165874634, "grad_norm": 0.7127050767647245, "learning_rate": 9.45129993993745e-06, "loss": 0.3923, "step": 3890 }, { "epoch": 0.1761031907671419, "grad_norm": 0.9695002630974516, "learning_rate": 9.450966081266069e-06, "loss": 0.4977, "step": 3891 }, { "epoch": 0.17614844987553746, "grad_norm": 0.8269460584043274, "learning_rate": 9.450632126957098e-06, "loss": 0.526, "step": 3892 }, { "epoch": 0.17619370898393302, "grad_norm": 0.5904239742667041, "learning_rate": 9.45029807701771e-06, "loss": 0.5192, "step": 3893 }, { "epoch": 0.17623896809232859, "grad_norm": 0.7518798449772747, "learning_rate": 9.449963931455084e-06, "loss": 0.3919, "step": 3894 }, { "epoch": 0.17628422720072415, "grad_norm": 0.7435713399453958, "learning_rate": 9.449629690276401e-06, "loss": 0.4009, "step": 3895 }, { "epoch": 0.1763294863091197, "grad_norm": 0.5816424605119339, "learning_rate": 9.44929535348884e-06, "loss": 0.3547, "step": 3896 }, { "epoch": 0.17637474541751527, "grad_norm": 0.821270546116312, "learning_rate": 9.44896092109959e-06, "loss": 0.367, "step": 3897 }, { "epoch": 0.17642000452591083, "grad_norm": 0.9931850336613028, "learning_rate": 9.448626393115833e-06, "loss": 0.4123, "step": 3898 }, { "epoch": 0.1764652636343064, "grad_norm": 0.6533003091582168, "learning_rate": 9.448291769544758e-06, "loss": 0.3862, "step": 3899 }, { "epoch": 0.17651052274270196, "grad_norm": 0.6816255014141562, "learning_rate": 9.447957050393552e-06, "loss": 0.4037, "step": 3900 }, { "epoch": 0.17655578185109752, "grad_norm": 0.7414261462013415, "learning_rate": 9.447622235669412e-06, "loss": 0.4018, "step": 3901 }, { "epoch": 0.1766010409594931, "grad_norm": 0.7064509154674706, "learning_rate": 9.44728732537953e-06, "loss": 0.3837, "step": 3902 }, { "epoch": 0.17664630006788867, "grad_norm": 0.6743859732965065, "learning_rate": 9.446952319531102e-06, "loss": 0.3788, "step": 3903 }, { "epoch": 0.17669155917628424, "grad_norm": 2.4314369942165257, "learning_rate": 9.446617218131326e-06, "loss": 0.5672, "step": 3904 }, { "epoch": 0.1767368182846798, "grad_norm": 0.7027033907421777, "learning_rate": 9.446282021187403e-06, "loss": 0.4037, "step": 3905 }, { "epoch": 0.17678207739307536, "grad_norm": 0.6764659734044738, "learning_rate": 9.445946728706535e-06, "loss": 0.4069, "step": 3906 }, { "epoch": 0.17682733650147092, "grad_norm": 1.2756689798521164, "learning_rate": 9.445611340695926e-06, "loss": 0.5385, "step": 3907 }, { "epoch": 0.17687259560986648, "grad_norm": 2.5327412146331674, "learning_rate": 9.445275857162784e-06, "loss": 0.3839, "step": 3908 }, { "epoch": 0.17691785471826205, "grad_norm": 0.6686919910303133, "learning_rate": 9.444940278114316e-06, "loss": 0.406, "step": 3909 }, { "epoch": 0.1769631138266576, "grad_norm": 0.8168236937312541, "learning_rate": 9.444604603557733e-06, "loss": 0.3656, "step": 3910 }, { "epoch": 0.17700837293505317, "grad_norm": 0.5818948523437871, "learning_rate": 9.444268833500247e-06, "loss": 0.5121, "step": 3911 }, { "epoch": 0.17705363204344873, "grad_norm": 0.6705505609317879, "learning_rate": 9.443932967949074e-06, "loss": 0.4032, "step": 3912 }, { "epoch": 0.1770988911518443, "grad_norm": 0.756973477548209, "learning_rate": 9.443597006911432e-06, "loss": 0.5175, "step": 3913 }, { "epoch": 0.17714415026023989, "grad_norm": 0.720833225924833, "learning_rate": 9.443260950394535e-06, "loss": 0.4031, "step": 3914 }, { "epoch": 0.17718940936863545, "grad_norm": 0.7253115963736714, "learning_rate": 9.442924798405605e-06, "loss": 0.3792, "step": 3915 }, { "epoch": 0.177234668477031, "grad_norm": 0.7770778836748186, "learning_rate": 9.44258855095187e-06, "loss": 0.5194, "step": 3916 }, { "epoch": 0.17727992758542657, "grad_norm": 0.7620354620331995, "learning_rate": 9.442252208040551e-06, "loss": 0.3729, "step": 3917 }, { "epoch": 0.17732518669382213, "grad_norm": 0.7620836046469401, "learning_rate": 9.441915769678874e-06, "loss": 0.4391, "step": 3918 }, { "epoch": 0.1773704458022177, "grad_norm": 0.6579862798048236, "learning_rate": 9.44157923587407e-06, "loss": 0.4095, "step": 3919 }, { "epoch": 0.17741570491061326, "grad_norm": 0.8267059990269631, "learning_rate": 9.441242606633369e-06, "loss": 0.4239, "step": 3920 }, { "epoch": 0.17746096401900882, "grad_norm": 0.7353518313708115, "learning_rate": 9.440905881964007e-06, "loss": 0.3951, "step": 3921 }, { "epoch": 0.17750622312740438, "grad_norm": 0.6659406803623616, "learning_rate": 9.440569061873213e-06, "loss": 0.3894, "step": 3922 }, { "epoch": 0.17755148223579995, "grad_norm": 0.7242839163193702, "learning_rate": 9.44023214636823e-06, "loss": 0.3994, "step": 3923 }, { "epoch": 0.1775967413441955, "grad_norm": 0.6445084964739258, "learning_rate": 9.439895135456297e-06, "loss": 0.535, "step": 3924 }, { "epoch": 0.17764200045259107, "grad_norm": 0.7747303062389351, "learning_rate": 9.43955802914465e-06, "loss": 0.4296, "step": 3925 }, { "epoch": 0.17768725956098666, "grad_norm": 0.6792800098985597, "learning_rate": 9.439220827440539e-06, "loss": 0.421, "step": 3926 }, { "epoch": 0.17773251866938222, "grad_norm": 0.7261120584253042, "learning_rate": 9.438883530351207e-06, "loss": 0.432, "step": 3927 }, { "epoch": 0.17777777777777778, "grad_norm": 0.39904041842961024, "learning_rate": 9.438546137883898e-06, "loss": 0.5249, "step": 3928 }, { "epoch": 0.17782303688617335, "grad_norm": 0.8107503680750474, "learning_rate": 9.438208650045866e-06, "loss": 0.3769, "step": 3929 }, { "epoch": 0.1778682959945689, "grad_norm": 0.7203559014405553, "learning_rate": 9.43787106684436e-06, "loss": 0.4014, "step": 3930 }, { "epoch": 0.17791355510296447, "grad_norm": 0.962397168004711, "learning_rate": 9.437533388286635e-06, "loss": 0.3732, "step": 3931 }, { "epoch": 0.17795881421136003, "grad_norm": 0.45413655885885273, "learning_rate": 9.437195614379947e-06, "loss": 0.5248, "step": 3932 }, { "epoch": 0.1780040733197556, "grad_norm": 0.817919637821282, "learning_rate": 9.436857745131553e-06, "loss": 0.3741, "step": 3933 }, { "epoch": 0.17804933242815116, "grad_norm": 0.682119639232124, "learning_rate": 9.436519780548712e-06, "loss": 0.4316, "step": 3934 }, { "epoch": 0.17809459153654672, "grad_norm": 0.6600377596146887, "learning_rate": 9.436181720638688e-06, "loss": 0.3644, "step": 3935 }, { "epoch": 0.17813985064494228, "grad_norm": 0.4001893548138264, "learning_rate": 9.435843565408742e-06, "loss": 0.4959, "step": 3936 }, { "epoch": 0.17818510975333787, "grad_norm": 0.8448226633705256, "learning_rate": 9.435505314866143e-06, "loss": 0.4095, "step": 3937 }, { "epoch": 0.17823036886173343, "grad_norm": 0.7752472854802783, "learning_rate": 9.435166969018158e-06, "loss": 0.4406, "step": 3938 }, { "epoch": 0.178275627970129, "grad_norm": 0.6748292726243518, "learning_rate": 9.434828527872052e-06, "loss": 0.422, "step": 3939 }, { "epoch": 0.17832088707852456, "grad_norm": 0.7678223720734237, "learning_rate": 9.434489991435106e-06, "loss": 0.4333, "step": 3940 }, { "epoch": 0.17836614618692012, "grad_norm": 0.8010333175192554, "learning_rate": 9.434151359714587e-06, "loss": 0.3947, "step": 3941 }, { "epoch": 0.17841140529531568, "grad_norm": 0.4352994652107656, "learning_rate": 9.433812632717776e-06, "loss": 0.5114, "step": 3942 }, { "epoch": 0.17845666440371125, "grad_norm": 0.7307693833726469, "learning_rate": 9.433473810451947e-06, "loss": 0.4461, "step": 3943 }, { "epoch": 0.1785019235121068, "grad_norm": 0.6569377227188848, "learning_rate": 9.433134892924383e-06, "loss": 0.3852, "step": 3944 }, { "epoch": 0.17854718262050237, "grad_norm": 0.7588640467049291, "learning_rate": 9.432795880142366e-06, "loss": 0.4646, "step": 3945 }, { "epoch": 0.17859244172889793, "grad_norm": 0.7047461181700752, "learning_rate": 9.432456772113179e-06, "loss": 0.3859, "step": 3946 }, { "epoch": 0.1786377008372935, "grad_norm": 0.6265443772082004, "learning_rate": 9.43211756884411e-06, "loss": 0.4089, "step": 3947 }, { "epoch": 0.17868295994568906, "grad_norm": 0.709027381706952, "learning_rate": 9.431778270342447e-06, "loss": 0.4275, "step": 3948 }, { "epoch": 0.17872821905408465, "grad_norm": 0.7120230598819207, "learning_rate": 9.431438876615478e-06, "loss": 0.4154, "step": 3949 }, { "epoch": 0.1787734781624802, "grad_norm": 0.6887782725922001, "learning_rate": 9.4310993876705e-06, "loss": 0.4286, "step": 3950 }, { "epoch": 0.17881873727087577, "grad_norm": 0.710693416079211, "learning_rate": 9.430759803514802e-06, "loss": 0.4382, "step": 3951 }, { "epoch": 0.17886399637927133, "grad_norm": 0.6188621557116822, "learning_rate": 9.430420124155687e-06, "loss": 0.3749, "step": 3952 }, { "epoch": 0.1789092554876669, "grad_norm": 0.6738178241809274, "learning_rate": 9.43008034960045e-06, "loss": 0.436, "step": 3953 }, { "epoch": 0.17895451459606246, "grad_norm": 0.6825365111947984, "learning_rate": 9.42974047985639e-06, "loss": 0.4149, "step": 3954 }, { "epoch": 0.17899977370445802, "grad_norm": 0.6605285605975528, "learning_rate": 9.429400514930815e-06, "loss": 0.4023, "step": 3955 }, { "epoch": 0.17904503281285358, "grad_norm": 0.7096638896075035, "learning_rate": 9.429060454831026e-06, "loss": 0.4472, "step": 3956 }, { "epoch": 0.17909029192124915, "grad_norm": 0.415451298677631, "learning_rate": 9.42872029956433e-06, "loss": 0.5404, "step": 3957 }, { "epoch": 0.1791355510296447, "grad_norm": 0.7667486909261592, "learning_rate": 9.428380049138038e-06, "loss": 0.4114, "step": 3958 }, { "epoch": 0.17918081013804027, "grad_norm": 0.6663120328599494, "learning_rate": 9.428039703559458e-06, "loss": 0.4269, "step": 3959 }, { "epoch": 0.17922606924643583, "grad_norm": 0.7142005965525264, "learning_rate": 9.427699262835904e-06, "loss": 0.4392, "step": 3960 }, { "epoch": 0.17927132835483142, "grad_norm": 0.8063258085038403, "learning_rate": 9.427358726974693e-06, "loss": 0.4144, "step": 3961 }, { "epoch": 0.17931658746322698, "grad_norm": 0.6875786825371241, "learning_rate": 9.42701809598314e-06, "loss": 0.3895, "step": 3962 }, { "epoch": 0.17936184657162255, "grad_norm": 0.38949801700994485, "learning_rate": 9.426677369868564e-06, "loss": 0.4787, "step": 3963 }, { "epoch": 0.1794071056800181, "grad_norm": 0.7008172907799324, "learning_rate": 9.426336548638287e-06, "loss": 0.3878, "step": 3964 }, { "epoch": 0.17945236478841367, "grad_norm": 0.6497017033257975, "learning_rate": 9.425995632299631e-06, "loss": 0.4116, "step": 3965 }, { "epoch": 0.17949762389680923, "grad_norm": 0.3271747753890301, "learning_rate": 9.425654620859923e-06, "loss": 0.5049, "step": 3966 }, { "epoch": 0.1795428830052048, "grad_norm": 0.7075592309298688, "learning_rate": 9.425313514326491e-06, "loss": 0.3762, "step": 3967 }, { "epoch": 0.17958814211360036, "grad_norm": 1.1093788268170846, "learning_rate": 9.424972312706663e-06, "loss": 0.4145, "step": 3968 }, { "epoch": 0.17963340122199592, "grad_norm": 0.69564273713801, "learning_rate": 9.424631016007768e-06, "loss": 0.4049, "step": 3969 }, { "epoch": 0.17967866033039148, "grad_norm": 0.6730428805768747, "learning_rate": 9.424289624237143e-06, "loss": 0.357, "step": 3970 }, { "epoch": 0.17972391943878704, "grad_norm": 0.753946831911797, "learning_rate": 9.423948137402123e-06, "loss": 0.4044, "step": 3971 }, { "epoch": 0.17976917854718263, "grad_norm": 0.6356412800721248, "learning_rate": 9.423606555510043e-06, "loss": 0.3898, "step": 3972 }, { "epoch": 0.1798144376555782, "grad_norm": 0.6725822799634429, "learning_rate": 9.423264878568246e-06, "loss": 0.3978, "step": 3973 }, { "epoch": 0.17985969676397376, "grad_norm": 0.7231602834064391, "learning_rate": 9.42292310658407e-06, "loss": 0.4201, "step": 3974 }, { "epoch": 0.17990495587236932, "grad_norm": 0.3432254737661334, "learning_rate": 9.422581239564861e-06, "loss": 0.5007, "step": 3975 }, { "epoch": 0.17995021498076488, "grad_norm": 0.7604507555915985, "learning_rate": 9.422239277517964e-06, "loss": 0.4049, "step": 3976 }, { "epoch": 0.17999547408916045, "grad_norm": 0.6473006192279481, "learning_rate": 9.421897220450728e-06, "loss": 0.3911, "step": 3977 }, { "epoch": 0.180040733197556, "grad_norm": 0.6406786856957288, "learning_rate": 9.4215550683705e-06, "loss": 0.3843, "step": 3978 }, { "epoch": 0.18008599230595157, "grad_norm": 0.7426731979212589, "learning_rate": 9.421212821284633e-06, "loss": 0.4037, "step": 3979 }, { "epoch": 0.18013125141434713, "grad_norm": 0.6165518680048144, "learning_rate": 9.420870479200483e-06, "loss": 0.4113, "step": 3980 }, { "epoch": 0.1801765105227427, "grad_norm": 0.6055004238082176, "learning_rate": 9.420528042125404e-06, "loss": 0.3845, "step": 3981 }, { "epoch": 0.18022176963113826, "grad_norm": 0.6829154371077608, "learning_rate": 9.420185510066753e-06, "loss": 0.4331, "step": 3982 }, { "epoch": 0.18026702873953382, "grad_norm": 0.701267887540177, "learning_rate": 9.41984288303189e-06, "loss": 0.4057, "step": 3983 }, { "epoch": 0.1803122878479294, "grad_norm": 0.7412461761260488, "learning_rate": 9.419500161028178e-06, "loss": 0.4022, "step": 3984 }, { "epoch": 0.18035754695632497, "grad_norm": 0.6478422974417011, "learning_rate": 9.419157344062984e-06, "loss": 0.3746, "step": 3985 }, { "epoch": 0.18040280606472053, "grad_norm": 0.6463703026899874, "learning_rate": 9.418814432143669e-06, "loss": 0.4228, "step": 3986 }, { "epoch": 0.1804480651731161, "grad_norm": 0.6316234750070395, "learning_rate": 9.418471425277603e-06, "loss": 0.4108, "step": 3987 }, { "epoch": 0.18049332428151166, "grad_norm": 0.7962127763617338, "learning_rate": 9.418128323472157e-06, "loss": 0.4254, "step": 3988 }, { "epoch": 0.18053858338990722, "grad_norm": 0.6952711404179807, "learning_rate": 9.417785126734701e-06, "loss": 0.4222, "step": 3989 }, { "epoch": 0.18058384249830278, "grad_norm": 0.7285155806695047, "learning_rate": 9.417441835072615e-06, "loss": 0.412, "step": 3990 }, { "epoch": 0.18062910160669834, "grad_norm": 0.6387050480157371, "learning_rate": 9.417098448493267e-06, "loss": 0.3712, "step": 3991 }, { "epoch": 0.1806743607150939, "grad_norm": 0.6248158896534401, "learning_rate": 9.41675496700404e-06, "loss": 0.3723, "step": 3992 }, { "epoch": 0.18071961982348947, "grad_norm": 0.6519533699295533, "learning_rate": 9.416411390612315e-06, "loss": 0.3747, "step": 3993 }, { "epoch": 0.18076487893188503, "grad_norm": 0.49737740981951944, "learning_rate": 9.416067719325472e-06, "loss": 0.5126, "step": 3994 }, { "epoch": 0.1808101380402806, "grad_norm": 0.6608173062896084, "learning_rate": 9.415723953150897e-06, "loss": 0.4085, "step": 3995 }, { "epoch": 0.18085539714867618, "grad_norm": 0.6267983817821665, "learning_rate": 9.415380092095976e-06, "loss": 0.3878, "step": 3996 }, { "epoch": 0.18090065625707175, "grad_norm": 0.7368257783544285, "learning_rate": 9.415036136168099e-06, "loss": 0.4488, "step": 3997 }, { "epoch": 0.1809459153654673, "grad_norm": 0.6199378468837977, "learning_rate": 9.414692085374654e-06, "loss": 0.4199, "step": 3998 }, { "epoch": 0.18099117447386287, "grad_norm": 0.5008602627173249, "learning_rate": 9.414347939723033e-06, "loss": 0.517, "step": 3999 }, { "epoch": 0.18103643358225843, "grad_norm": 0.7481791294382593, "learning_rate": 9.414003699220636e-06, "loss": 0.4177, "step": 4000 }, { "epoch": 0.181081692690654, "grad_norm": 0.6333229028807124, "learning_rate": 9.413659363874855e-06, "loss": 0.3521, "step": 4001 }, { "epoch": 0.18112695179904956, "grad_norm": 0.6906285797805105, "learning_rate": 9.413314933693088e-06, "loss": 0.3738, "step": 4002 }, { "epoch": 0.18117221090744512, "grad_norm": 0.6255253306601154, "learning_rate": 9.41297040868274e-06, "loss": 0.4365, "step": 4003 }, { "epoch": 0.18121747001584068, "grad_norm": 0.6497671002311871, "learning_rate": 9.412625788851208e-06, "loss": 0.382, "step": 4004 }, { "epoch": 0.18126272912423624, "grad_norm": 0.734449128993359, "learning_rate": 9.412281074205903e-06, "loss": 0.4666, "step": 4005 }, { "epoch": 0.1813079882326318, "grad_norm": 0.6881308403070234, "learning_rate": 9.41193626475423e-06, "loss": 0.4051, "step": 4006 }, { "epoch": 0.18135324734102737, "grad_norm": 0.4046944363293297, "learning_rate": 9.411591360503594e-06, "loss": 0.5014, "step": 4007 }, { "epoch": 0.18139850644942296, "grad_norm": 0.6909008023747709, "learning_rate": 9.41124636146141e-06, "loss": 0.3746, "step": 4008 }, { "epoch": 0.18144376555781852, "grad_norm": 0.7304086747129781, "learning_rate": 9.41090126763509e-06, "loss": 0.4352, "step": 4009 }, { "epoch": 0.18148902466621408, "grad_norm": 0.6541757729998198, "learning_rate": 9.410556079032049e-06, "loss": 0.4304, "step": 4010 }, { "epoch": 0.18153428377460964, "grad_norm": 0.6872384761920306, "learning_rate": 9.410210795659702e-06, "loss": 0.4321, "step": 4011 }, { "epoch": 0.1815795428830052, "grad_norm": 0.3936706884541984, "learning_rate": 9.409865417525473e-06, "loss": 0.4897, "step": 4012 }, { "epoch": 0.18162480199140077, "grad_norm": 0.6558205643441305, "learning_rate": 9.409519944636778e-06, "loss": 0.3871, "step": 4013 }, { "epoch": 0.18167006109979633, "grad_norm": 0.6706625783570828, "learning_rate": 9.409174377001043e-06, "loss": 0.3961, "step": 4014 }, { "epoch": 0.1817153202081919, "grad_norm": 0.6638586577515181, "learning_rate": 9.40882871462569e-06, "loss": 0.4043, "step": 4015 }, { "epoch": 0.18176057931658746, "grad_norm": 0.6821026362875763, "learning_rate": 9.408482957518152e-06, "loss": 0.4002, "step": 4016 }, { "epoch": 0.18180583842498302, "grad_norm": 0.6699639269128657, "learning_rate": 9.408137105685853e-06, "loss": 0.4185, "step": 4017 }, { "epoch": 0.18185109753337858, "grad_norm": 0.6801453596286959, "learning_rate": 9.407791159136226e-06, "loss": 0.4125, "step": 4018 }, { "epoch": 0.18189635664177417, "grad_norm": 0.7345921944079861, "learning_rate": 9.407445117876705e-06, "loss": 0.4349, "step": 4019 }, { "epoch": 0.18194161575016973, "grad_norm": 0.6975348399576882, "learning_rate": 9.407098981914726e-06, "loss": 0.4387, "step": 4020 }, { "epoch": 0.1819868748585653, "grad_norm": 0.653769033304466, "learning_rate": 9.406752751257724e-06, "loss": 0.4169, "step": 4021 }, { "epoch": 0.18203213396696086, "grad_norm": 0.6274532282943137, "learning_rate": 9.40640642591314e-06, "loss": 0.3745, "step": 4022 }, { "epoch": 0.18207739307535642, "grad_norm": 0.7483261826815685, "learning_rate": 9.406060005888414e-06, "loss": 0.4375, "step": 4023 }, { "epoch": 0.18212265218375198, "grad_norm": 0.6041073913009501, "learning_rate": 9.405713491190992e-06, "loss": 0.5221, "step": 4024 }, { "epoch": 0.18216791129214754, "grad_norm": 0.669360438322126, "learning_rate": 9.405366881828317e-06, "loss": 0.4088, "step": 4025 }, { "epoch": 0.1822131704005431, "grad_norm": 0.6639839913216719, "learning_rate": 9.40502017780784e-06, "loss": 0.4042, "step": 4026 }, { "epoch": 0.18225842950893867, "grad_norm": 0.7186389028518378, "learning_rate": 9.404673379137007e-06, "loss": 0.3955, "step": 4027 }, { "epoch": 0.18230368861733423, "grad_norm": 0.7107843378506711, "learning_rate": 9.40432648582327e-06, "loss": 0.4038, "step": 4028 }, { "epoch": 0.1823489477257298, "grad_norm": 0.38811158141052376, "learning_rate": 9.403979497874085e-06, "loss": 0.5146, "step": 4029 }, { "epoch": 0.18239420683412536, "grad_norm": 0.639213961994482, "learning_rate": 9.403632415296907e-06, "loss": 0.4025, "step": 4030 }, { "epoch": 0.18243946594252095, "grad_norm": 0.7539547800235694, "learning_rate": 9.403285238099192e-06, "loss": 0.4333, "step": 4031 }, { "epoch": 0.1824847250509165, "grad_norm": 0.6599444003297105, "learning_rate": 9.402937966288402e-06, "loss": 0.3992, "step": 4032 }, { "epoch": 0.18252998415931207, "grad_norm": 0.7890073548455298, "learning_rate": 9.402590599871994e-06, "loss": 0.3805, "step": 4033 }, { "epoch": 0.18257524326770763, "grad_norm": 0.6702074136415639, "learning_rate": 9.402243138857439e-06, "loss": 0.4398, "step": 4034 }, { "epoch": 0.1826205023761032, "grad_norm": 0.7131108641193631, "learning_rate": 9.401895583252198e-06, "loss": 0.4137, "step": 4035 }, { "epoch": 0.18266576148449876, "grad_norm": 0.6868999795018752, "learning_rate": 9.40154793306374e-06, "loss": 0.432, "step": 4036 }, { "epoch": 0.18271102059289432, "grad_norm": 1.1847528882430005, "learning_rate": 9.401200188299538e-06, "loss": 0.4423, "step": 4037 }, { "epoch": 0.18275627970128988, "grad_norm": 0.6741045662787118, "learning_rate": 9.40085234896706e-06, "loss": 0.4007, "step": 4038 }, { "epoch": 0.18280153880968544, "grad_norm": 0.7622032017541412, "learning_rate": 9.400504415073781e-06, "loss": 0.4238, "step": 4039 }, { "epoch": 0.182846797918081, "grad_norm": 0.6220923109649558, "learning_rate": 9.400156386627177e-06, "loss": 0.3761, "step": 4040 }, { "epoch": 0.18289205702647657, "grad_norm": 0.8052312403575104, "learning_rate": 9.399808263634725e-06, "loss": 0.4273, "step": 4041 }, { "epoch": 0.18293731613487213, "grad_norm": 0.684150835282518, "learning_rate": 9.399460046103908e-06, "loss": 0.4268, "step": 4042 }, { "epoch": 0.18298257524326772, "grad_norm": 0.4843734116041665, "learning_rate": 9.399111734042206e-06, "loss": 0.488, "step": 4043 }, { "epoch": 0.18302783435166328, "grad_norm": 0.7435265409800369, "learning_rate": 9.398763327457104e-06, "loss": 0.3692, "step": 4044 }, { "epoch": 0.18307309346005884, "grad_norm": 0.6868701918588811, "learning_rate": 9.398414826356088e-06, "loss": 0.3993, "step": 4045 }, { "epoch": 0.1831183525684544, "grad_norm": 0.9011937083746742, "learning_rate": 9.398066230746645e-06, "loss": 0.3478, "step": 4046 }, { "epoch": 0.18316361167684997, "grad_norm": 0.714107170767041, "learning_rate": 9.397717540636268e-06, "loss": 0.3849, "step": 4047 }, { "epoch": 0.18320887078524553, "grad_norm": 0.6470444070084024, "learning_rate": 9.397368756032445e-06, "loss": 0.3682, "step": 4048 }, { "epoch": 0.1832541298936411, "grad_norm": 0.4175690544272301, "learning_rate": 9.397019876942675e-06, "loss": 0.5435, "step": 4049 }, { "epoch": 0.18329938900203666, "grad_norm": 0.7044682525903221, "learning_rate": 9.396670903374452e-06, "loss": 0.4174, "step": 4050 }, { "epoch": 0.18334464811043222, "grad_norm": 0.7265792303979158, "learning_rate": 9.396321835335274e-06, "loss": 0.4232, "step": 4051 }, { "epoch": 0.18338990721882778, "grad_norm": 0.6683492011615667, "learning_rate": 9.395972672832642e-06, "loss": 0.4212, "step": 4052 }, { "epoch": 0.18343516632722334, "grad_norm": 0.8017287999678097, "learning_rate": 9.39562341587406e-06, "loss": 0.4166, "step": 4053 }, { "epoch": 0.1834804254356189, "grad_norm": 0.7425769217227569, "learning_rate": 9.39527406446703e-06, "loss": 0.4377, "step": 4054 }, { "epoch": 0.1835256845440145, "grad_norm": 0.32643590782763204, "learning_rate": 9.394924618619059e-06, "loss": 0.4994, "step": 4055 }, { "epoch": 0.18357094365241006, "grad_norm": 0.6471436602045438, "learning_rate": 9.394575078337657e-06, "loss": 0.4105, "step": 4056 }, { "epoch": 0.18361620276080562, "grad_norm": 0.4833246761409749, "learning_rate": 9.394225443630332e-06, "loss": 0.4927, "step": 4057 }, { "epoch": 0.18366146186920118, "grad_norm": 0.6848428054800386, "learning_rate": 9.393875714504598e-06, "loss": 0.3934, "step": 4058 }, { "epoch": 0.18370672097759674, "grad_norm": 0.655694304334304, "learning_rate": 9.393525890967971e-06, "loss": 0.4049, "step": 4059 }, { "epoch": 0.1837519800859923, "grad_norm": 0.3594581111319001, "learning_rate": 9.393175973027967e-06, "loss": 0.4795, "step": 4060 }, { "epoch": 0.18379723919438787, "grad_norm": 0.7635421681342424, "learning_rate": 9.392825960692103e-06, "loss": 0.3944, "step": 4061 }, { "epoch": 0.18384249830278343, "grad_norm": 0.6727442618433395, "learning_rate": 9.3924758539679e-06, "loss": 0.4417, "step": 4062 }, { "epoch": 0.183887757411179, "grad_norm": 0.6499968211176823, "learning_rate": 9.392125652862881e-06, "loss": 0.3782, "step": 4063 }, { "epoch": 0.18393301651957455, "grad_norm": 0.6338590652010262, "learning_rate": 9.391775357384571e-06, "loss": 0.4408, "step": 4064 }, { "epoch": 0.18397827562797012, "grad_norm": 0.6456796294022185, "learning_rate": 9.3914249675405e-06, "loss": 0.3867, "step": 4065 }, { "epoch": 0.1840235347363657, "grad_norm": 0.4560740638541108, "learning_rate": 9.39107448333819e-06, "loss": 0.5238, "step": 4066 }, { "epoch": 0.18406879384476127, "grad_norm": 0.3738339016612427, "learning_rate": 9.390723904785178e-06, "loss": 0.5169, "step": 4067 }, { "epoch": 0.18411405295315683, "grad_norm": 0.8041291215714814, "learning_rate": 9.390373231888991e-06, "loss": 0.4241, "step": 4068 }, { "epoch": 0.1841593120615524, "grad_norm": 0.6659286018838321, "learning_rate": 9.39002246465717e-06, "loss": 0.4115, "step": 4069 }, { "epoch": 0.18420457116994796, "grad_norm": 0.6382235578815533, "learning_rate": 9.389671603097248e-06, "loss": 0.4536, "step": 4070 }, { "epoch": 0.18424983027834352, "grad_norm": 0.7681460436470686, "learning_rate": 9.389320647216767e-06, "loss": 0.3774, "step": 4071 }, { "epoch": 0.18429508938673908, "grad_norm": 0.7688218133575312, "learning_rate": 9.388969597023265e-06, "loss": 0.399, "step": 4072 }, { "epoch": 0.18434034849513464, "grad_norm": 0.6452861606689477, "learning_rate": 9.388618452524285e-06, "loss": 0.3793, "step": 4073 }, { "epoch": 0.1843856076035302, "grad_norm": 0.703849544538944, "learning_rate": 9.388267213727373e-06, "loss": 0.4177, "step": 4074 }, { "epoch": 0.18443086671192577, "grad_norm": 0.6851811677788411, "learning_rate": 9.387915880640077e-06, "loss": 0.4469, "step": 4075 }, { "epoch": 0.18447612582032133, "grad_norm": 0.6926954407735016, "learning_rate": 9.387564453269945e-06, "loss": 0.4019, "step": 4076 }, { "epoch": 0.1845213849287169, "grad_norm": 0.6532267417161022, "learning_rate": 9.38721293162453e-06, "loss": 0.4304, "step": 4077 }, { "epoch": 0.18456664403711248, "grad_norm": 0.7015512523650694, "learning_rate": 9.386861315711382e-06, "loss": 0.5237, "step": 4078 }, { "epoch": 0.18461190314550804, "grad_norm": 0.5024324484532633, "learning_rate": 9.386509605538057e-06, "loss": 0.5247, "step": 4079 }, { "epoch": 0.1846571622539036, "grad_norm": 0.8009254479377098, "learning_rate": 9.386157801112112e-06, "loss": 0.4297, "step": 4080 }, { "epoch": 0.18470242136229917, "grad_norm": 0.7143862676068782, "learning_rate": 9.385805902441109e-06, "loss": 0.4324, "step": 4081 }, { "epoch": 0.18474768047069473, "grad_norm": 0.7113426206348091, "learning_rate": 9.385453909532606e-06, "loss": 0.3778, "step": 4082 }, { "epoch": 0.1847929395790903, "grad_norm": 0.7426225523157818, "learning_rate": 9.385101822394167e-06, "loss": 0.3976, "step": 4083 }, { "epoch": 0.18483819868748586, "grad_norm": 0.7347993942730485, "learning_rate": 9.384749641033358e-06, "loss": 0.3931, "step": 4084 }, { "epoch": 0.18488345779588142, "grad_norm": 0.7249770199361875, "learning_rate": 9.384397365457747e-06, "loss": 0.4235, "step": 4085 }, { "epoch": 0.18492871690427698, "grad_norm": 0.7628881619671343, "learning_rate": 9.3840449956749e-06, "loss": 0.3643, "step": 4086 }, { "epoch": 0.18497397601267254, "grad_norm": 0.6992424817628683, "learning_rate": 9.383692531692392e-06, "loss": 0.4394, "step": 4087 }, { "epoch": 0.1850192351210681, "grad_norm": 0.6819527112357563, "learning_rate": 9.383339973517796e-06, "loss": 0.4066, "step": 4088 }, { "epoch": 0.18506449422946367, "grad_norm": 0.7248904254009867, "learning_rate": 9.382987321158686e-06, "loss": 0.42, "step": 4089 }, { "epoch": 0.18510975333785926, "grad_norm": 1.0207624752501492, "learning_rate": 9.382634574622637e-06, "loss": 0.4014, "step": 4090 }, { "epoch": 0.18515501244625482, "grad_norm": 0.6421552433584352, "learning_rate": 9.382281733917235e-06, "loss": 0.4049, "step": 4091 }, { "epoch": 0.18520027155465038, "grad_norm": 0.6737615415897059, "learning_rate": 9.381928799050054e-06, "loss": 0.3951, "step": 4092 }, { "epoch": 0.18524553066304594, "grad_norm": 0.6744702758862204, "learning_rate": 9.381575770028684e-06, "loss": 0.3851, "step": 4093 }, { "epoch": 0.1852907897714415, "grad_norm": 1.5943439842998557, "learning_rate": 9.381222646860708e-06, "loss": 0.5285, "step": 4094 }, { "epoch": 0.18533604887983707, "grad_norm": 0.7502753234602005, "learning_rate": 9.380869429553712e-06, "loss": 0.4299, "step": 4095 }, { "epoch": 0.18538130798823263, "grad_norm": 0.6712999824888282, "learning_rate": 9.380516118115287e-06, "loss": 0.399, "step": 4096 }, { "epoch": 0.1854265670966282, "grad_norm": 0.6842794981481997, "learning_rate": 9.380162712553024e-06, "loss": 0.3814, "step": 4097 }, { "epoch": 0.18547182620502375, "grad_norm": 0.7185421908678674, "learning_rate": 9.379809212874517e-06, "loss": 0.3983, "step": 4098 }, { "epoch": 0.18551708531341932, "grad_norm": 0.6614546240803848, "learning_rate": 9.379455619087361e-06, "loss": 0.3495, "step": 4099 }, { "epoch": 0.18556234442181488, "grad_norm": 0.6541861454625707, "learning_rate": 9.379101931199154e-06, "loss": 0.4067, "step": 4100 }, { "epoch": 0.18560760353021044, "grad_norm": 1.0781073128655911, "learning_rate": 9.378748149217498e-06, "loss": 0.5088, "step": 4101 }, { "epoch": 0.18565286263860603, "grad_norm": 0.690712285204404, "learning_rate": 9.378394273149992e-06, "loss": 0.3794, "step": 4102 }, { "epoch": 0.1856981217470016, "grad_norm": 0.6801659428571151, "learning_rate": 9.37804030300424e-06, "loss": 0.3863, "step": 4103 }, { "epoch": 0.18574338085539716, "grad_norm": 0.6462483308953831, "learning_rate": 9.377686238787848e-06, "loss": 0.5028, "step": 4104 }, { "epoch": 0.18578863996379272, "grad_norm": 0.6980783750964051, "learning_rate": 9.377332080508423e-06, "loss": 0.3842, "step": 4105 }, { "epoch": 0.18583389907218828, "grad_norm": 0.6791753952826917, "learning_rate": 9.376977828173576e-06, "loss": 0.4278, "step": 4106 }, { "epoch": 0.18587915818058384, "grad_norm": 0.5805388440208535, "learning_rate": 9.376623481790918e-06, "loss": 0.4883, "step": 4107 }, { "epoch": 0.1859244172889794, "grad_norm": 0.6774348967610799, "learning_rate": 9.376269041368063e-06, "loss": 0.3824, "step": 4108 }, { "epoch": 0.18596967639737497, "grad_norm": 0.7046508039451562, "learning_rate": 9.375914506912628e-06, "loss": 0.4184, "step": 4109 }, { "epoch": 0.18601493550577053, "grad_norm": 0.6349773226277379, "learning_rate": 9.37555987843223e-06, "loss": 0.5237, "step": 4110 }, { "epoch": 0.1860601946141661, "grad_norm": 0.7162257440503814, "learning_rate": 9.375205155934488e-06, "loss": 0.3783, "step": 4111 }, { "epoch": 0.18610545372256165, "grad_norm": 0.7165412489731983, "learning_rate": 9.374850339427024e-06, "loss": 0.4266, "step": 4112 }, { "epoch": 0.18615071283095724, "grad_norm": 0.6954870860047865, "learning_rate": 9.374495428917463e-06, "loss": 0.3875, "step": 4113 }, { "epoch": 0.1861959719393528, "grad_norm": 0.6981956064505662, "learning_rate": 9.37414042441343e-06, "loss": 0.3425, "step": 4114 }, { "epoch": 0.18624123104774837, "grad_norm": 0.7453770808853691, "learning_rate": 9.373785325922556e-06, "loss": 0.4829, "step": 4115 }, { "epoch": 0.18628649015614393, "grad_norm": 0.6556482744053577, "learning_rate": 9.373430133452466e-06, "loss": 0.4009, "step": 4116 }, { "epoch": 0.1863317492645395, "grad_norm": 0.6973709463168327, "learning_rate": 9.373074847010795e-06, "loss": 0.4074, "step": 4117 }, { "epoch": 0.18637700837293505, "grad_norm": 0.7096935290207299, "learning_rate": 9.372719466605176e-06, "loss": 0.459, "step": 4118 }, { "epoch": 0.18642226748133062, "grad_norm": 0.626924754136172, "learning_rate": 9.372363992243245e-06, "loss": 0.3997, "step": 4119 }, { "epoch": 0.18646752658972618, "grad_norm": 0.691742254232117, "learning_rate": 9.37200842393264e-06, "loss": 0.4543, "step": 4120 }, { "epoch": 0.18651278569812174, "grad_norm": 0.7763991797792152, "learning_rate": 9.371652761681006e-06, "loss": 0.4108, "step": 4121 }, { "epoch": 0.1865580448065173, "grad_norm": 0.6590271573620654, "learning_rate": 9.371297005495976e-06, "loss": 0.3566, "step": 4122 }, { "epoch": 0.18660330391491287, "grad_norm": 0.7172524152091183, "learning_rate": 9.3709411553852e-06, "loss": 0.3836, "step": 4123 }, { "epoch": 0.18664856302330843, "grad_norm": 0.6864398739450693, "learning_rate": 9.370585211356323e-06, "loss": 0.3759, "step": 4124 }, { "epoch": 0.18669382213170402, "grad_norm": 0.645730820024419, "learning_rate": 9.370229173416994e-06, "loss": 0.378, "step": 4125 }, { "epoch": 0.18673908124009958, "grad_norm": 0.6075348044191042, "learning_rate": 9.36987304157486e-06, "loss": 0.5183, "step": 4126 }, { "epoch": 0.18678434034849514, "grad_norm": 0.78877271214592, "learning_rate": 9.369516815837579e-06, "loss": 0.3947, "step": 4127 }, { "epoch": 0.1868295994568907, "grad_norm": 0.7040157005782529, "learning_rate": 9.369160496212797e-06, "loss": 0.4193, "step": 4128 }, { "epoch": 0.18687485856528627, "grad_norm": 0.7747630187627644, "learning_rate": 9.368804082708178e-06, "loss": 0.4055, "step": 4129 }, { "epoch": 0.18692011767368183, "grad_norm": 0.6983420726951024, "learning_rate": 9.368447575331376e-06, "loss": 0.4087, "step": 4130 }, { "epoch": 0.1869653767820774, "grad_norm": 0.6971366034220329, "learning_rate": 9.368090974090053e-06, "loss": 0.3831, "step": 4131 }, { "epoch": 0.18701063589047295, "grad_norm": 0.6732722620717891, "learning_rate": 9.36773427899187e-06, "loss": 0.4186, "step": 4132 }, { "epoch": 0.18705589499886852, "grad_norm": 0.7045716634227874, "learning_rate": 9.367377490044491e-06, "loss": 0.4112, "step": 4133 }, { "epoch": 0.18710115410726408, "grad_norm": 0.7178724111065302, "learning_rate": 9.367020607255584e-06, "loss": 0.4044, "step": 4134 }, { "epoch": 0.18714641321565964, "grad_norm": 0.7229009173713574, "learning_rate": 9.366663630632817e-06, "loss": 0.4038, "step": 4135 }, { "epoch": 0.1871916723240552, "grad_norm": 0.6270133182764555, "learning_rate": 9.36630656018386e-06, "loss": 0.3759, "step": 4136 }, { "epoch": 0.1872369314324508, "grad_norm": 0.7126974681377124, "learning_rate": 9.365949395916383e-06, "loss": 0.3723, "step": 4137 }, { "epoch": 0.18728219054084635, "grad_norm": 0.7413695555778791, "learning_rate": 9.365592137838063e-06, "loss": 0.4281, "step": 4138 }, { "epoch": 0.18732744964924192, "grad_norm": 1.0872267695040065, "learning_rate": 9.365234785956575e-06, "loss": 0.4052, "step": 4139 }, { "epoch": 0.18737270875763748, "grad_norm": 0.6618657909759026, "learning_rate": 9.3648773402796e-06, "loss": 0.395, "step": 4140 }, { "epoch": 0.18741796786603304, "grad_norm": 0.756847238761708, "learning_rate": 9.364519800814818e-06, "loss": 0.3977, "step": 4141 }, { "epoch": 0.1874632269744286, "grad_norm": 0.7104506052937238, "learning_rate": 9.364162167569907e-06, "loss": 0.4612, "step": 4142 }, { "epoch": 0.18750848608282417, "grad_norm": 0.622825098297183, "learning_rate": 9.363804440552557e-06, "loss": 0.5194, "step": 4143 }, { "epoch": 0.18755374519121973, "grad_norm": 0.8854425327165626, "learning_rate": 9.363446619770452e-06, "loss": 0.3511, "step": 4144 }, { "epoch": 0.1875990042996153, "grad_norm": 0.7319115033311788, "learning_rate": 9.363088705231277e-06, "loss": 0.4249, "step": 4145 }, { "epoch": 0.18764426340801085, "grad_norm": 0.6450997911633352, "learning_rate": 9.36273069694273e-06, "loss": 0.3504, "step": 4146 }, { "epoch": 0.18768952251640642, "grad_norm": 0.46984863114291675, "learning_rate": 9.362372594912498e-06, "loss": 0.535, "step": 4147 }, { "epoch": 0.187734781624802, "grad_norm": 0.4128089269608341, "learning_rate": 9.362014399148275e-06, "loss": 0.5204, "step": 4148 }, { "epoch": 0.18778004073319757, "grad_norm": 0.8256883317191542, "learning_rate": 9.361656109657761e-06, "loss": 0.397, "step": 4149 }, { "epoch": 0.18782529984159313, "grad_norm": 0.808810125346021, "learning_rate": 9.361297726448656e-06, "loss": 0.4696, "step": 4150 }, { "epoch": 0.1878705589499887, "grad_norm": 0.3406958381061442, "learning_rate": 9.360939249528653e-06, "loss": 0.5191, "step": 4151 }, { "epoch": 0.18791581805838425, "grad_norm": 0.7482057961528514, "learning_rate": 9.360580678905462e-06, "loss": 0.3934, "step": 4152 }, { "epoch": 0.18796107716677982, "grad_norm": 0.3900188665176955, "learning_rate": 9.360222014586782e-06, "loss": 0.5091, "step": 4153 }, { "epoch": 0.18800633627517538, "grad_norm": 0.7373474726783783, "learning_rate": 9.359863256580326e-06, "loss": 0.4003, "step": 4154 }, { "epoch": 0.18805159538357094, "grad_norm": 0.7019847965214174, "learning_rate": 9.359504404893795e-06, "loss": 0.3899, "step": 4155 }, { "epoch": 0.1880968544919665, "grad_norm": 0.6653724063594627, "learning_rate": 9.359145459534906e-06, "loss": 0.38, "step": 4156 }, { "epoch": 0.18814211360036207, "grad_norm": 0.3733988868528731, "learning_rate": 9.35878642051137e-06, "loss": 0.5017, "step": 4157 }, { "epoch": 0.18818737270875763, "grad_norm": 0.8251591439561985, "learning_rate": 9.358427287830898e-06, "loss": 0.4396, "step": 4158 }, { "epoch": 0.1882326318171532, "grad_norm": 0.7482630429833076, "learning_rate": 9.358068061501211e-06, "loss": 0.4259, "step": 4159 }, { "epoch": 0.18827789092554878, "grad_norm": 0.6534549270921789, "learning_rate": 9.357708741530025e-06, "loss": 0.3608, "step": 4160 }, { "epoch": 0.18832315003394434, "grad_norm": 0.6465604289725814, "learning_rate": 9.357349327925063e-06, "loss": 0.3773, "step": 4161 }, { "epoch": 0.1883684091423399, "grad_norm": 0.7427467905232471, "learning_rate": 9.356989820694046e-06, "loss": 0.3589, "step": 4162 }, { "epoch": 0.18841366825073547, "grad_norm": 0.7248522411623955, "learning_rate": 9.3566302198447e-06, "loss": 0.4453, "step": 4163 }, { "epoch": 0.18845892735913103, "grad_norm": 0.6468436050912464, "learning_rate": 9.356270525384749e-06, "loss": 0.3759, "step": 4164 }, { "epoch": 0.1885041864675266, "grad_norm": 0.6606287578369291, "learning_rate": 9.355910737321927e-06, "loss": 0.3866, "step": 4165 }, { "epoch": 0.18854944557592215, "grad_norm": 0.874469386000399, "learning_rate": 9.35555085566396e-06, "loss": 0.426, "step": 4166 }, { "epoch": 0.18859470468431772, "grad_norm": 1.379091788268698, "learning_rate": 9.35519088041858e-06, "loss": 0.4193, "step": 4167 }, { "epoch": 0.18863996379271328, "grad_norm": 0.6644904529186746, "learning_rate": 9.354830811593527e-06, "loss": 0.3589, "step": 4168 }, { "epoch": 0.18868522290110884, "grad_norm": 0.6252804901163832, "learning_rate": 9.354470649196532e-06, "loss": 0.3771, "step": 4169 }, { "epoch": 0.1887304820095044, "grad_norm": 0.6371878142364097, "learning_rate": 9.354110393235339e-06, "loss": 0.3911, "step": 4170 }, { "epoch": 0.18877574111789996, "grad_norm": 0.7310086362067233, "learning_rate": 9.353750043717685e-06, "loss": 0.435, "step": 4171 }, { "epoch": 0.18882100022629555, "grad_norm": 0.6213844537992925, "learning_rate": 9.353389600651313e-06, "loss": 0.3899, "step": 4172 }, { "epoch": 0.18886625933469112, "grad_norm": 0.6700371913451663, "learning_rate": 9.35302906404397e-06, "loss": 0.4159, "step": 4173 }, { "epoch": 0.18891151844308668, "grad_norm": 0.7052752239619765, "learning_rate": 9.352668433903402e-06, "loss": 0.3734, "step": 4174 }, { "epoch": 0.18895677755148224, "grad_norm": 0.6566206805556328, "learning_rate": 9.352307710237358e-06, "loss": 0.4252, "step": 4175 }, { "epoch": 0.1890020366598778, "grad_norm": 0.7189694032411736, "learning_rate": 9.351946893053587e-06, "loss": 0.4134, "step": 4176 }, { "epoch": 0.18904729576827337, "grad_norm": 0.714903760039513, "learning_rate": 9.351585982359845e-06, "loss": 0.4327, "step": 4177 }, { "epoch": 0.18909255487666893, "grad_norm": 0.9170516480758911, "learning_rate": 9.351224978163885e-06, "loss": 0.3823, "step": 4178 }, { "epoch": 0.1891378139850645, "grad_norm": 0.6625988385123889, "learning_rate": 9.350863880473462e-06, "loss": 0.4025, "step": 4179 }, { "epoch": 0.18918307309346005, "grad_norm": 0.6808033178232126, "learning_rate": 9.350502689296337e-06, "loss": 0.4382, "step": 4180 }, { "epoch": 0.18922833220185561, "grad_norm": 0.6377075302123607, "learning_rate": 9.350141404640273e-06, "loss": 0.3599, "step": 4181 }, { "epoch": 0.18927359131025118, "grad_norm": 0.4455416067005901, "learning_rate": 9.34978002651303e-06, "loss": 0.5079, "step": 4182 }, { "epoch": 0.18931885041864674, "grad_norm": 0.9095683713471634, "learning_rate": 9.349418554922371e-06, "loss": 0.3626, "step": 4183 }, { "epoch": 0.18936410952704233, "grad_norm": 0.7259307961524236, "learning_rate": 9.349056989876068e-06, "loss": 0.3844, "step": 4184 }, { "epoch": 0.1894093686354379, "grad_norm": 0.3346786654337558, "learning_rate": 9.348695331381887e-06, "loss": 0.5089, "step": 4185 }, { "epoch": 0.18945462774383345, "grad_norm": 0.3344837591888438, "learning_rate": 9.3483335794476e-06, "loss": 0.4931, "step": 4186 }, { "epoch": 0.18949988685222902, "grad_norm": 0.8388219787853163, "learning_rate": 9.347971734080978e-06, "loss": 0.3988, "step": 4187 }, { "epoch": 0.18954514596062458, "grad_norm": 0.7570586941523583, "learning_rate": 9.347609795289798e-06, "loss": 0.413, "step": 4188 }, { "epoch": 0.18959040506902014, "grad_norm": 0.8178611564571792, "learning_rate": 9.347247763081834e-06, "loss": 0.3826, "step": 4189 }, { "epoch": 0.1896356641774157, "grad_norm": 0.628098793520388, "learning_rate": 9.346885637464871e-06, "loss": 0.5218, "step": 4190 }, { "epoch": 0.18968092328581126, "grad_norm": 0.7271969659510159, "learning_rate": 9.346523418446682e-06, "loss": 0.3973, "step": 4191 }, { "epoch": 0.18972618239420683, "grad_norm": 0.39111820880276893, "learning_rate": 9.346161106035056e-06, "loss": 0.5036, "step": 4192 }, { "epoch": 0.1897714415026024, "grad_norm": 1.1236366782758218, "learning_rate": 9.345798700237778e-06, "loss": 0.4423, "step": 4193 }, { "epoch": 0.18981670061099795, "grad_norm": 0.6378386630155638, "learning_rate": 9.34543620106263e-06, "loss": 0.3715, "step": 4194 }, { "epoch": 0.18986195971939354, "grad_norm": 0.7933074202178645, "learning_rate": 9.345073608517405e-06, "loss": 0.3934, "step": 4195 }, { "epoch": 0.1899072188277891, "grad_norm": 0.6613598448529215, "learning_rate": 9.344710922609893e-06, "loss": 0.3943, "step": 4196 }, { "epoch": 0.18995247793618467, "grad_norm": 0.7896961749002904, "learning_rate": 9.344348143347888e-06, "loss": 0.4128, "step": 4197 }, { "epoch": 0.18999773704458023, "grad_norm": 0.6664564006056659, "learning_rate": 9.343985270739184e-06, "loss": 0.3779, "step": 4198 }, { "epoch": 0.1900429961529758, "grad_norm": 0.6422339067166803, "learning_rate": 9.343622304791577e-06, "loss": 0.3793, "step": 4199 }, { "epoch": 0.19008825526137135, "grad_norm": 0.446876644874073, "learning_rate": 9.343259245512866e-06, "loss": 0.4923, "step": 4200 }, { "epoch": 0.19013351436976691, "grad_norm": 0.38858522128843226, "learning_rate": 9.342896092910857e-06, "loss": 0.5146, "step": 4201 }, { "epoch": 0.19017877347816248, "grad_norm": 0.7559868937443666, "learning_rate": 9.342532846993345e-06, "loss": 0.4266, "step": 4202 }, { "epoch": 0.19022403258655804, "grad_norm": 0.6361014808828712, "learning_rate": 9.342169507768143e-06, "loss": 0.4009, "step": 4203 }, { "epoch": 0.1902692916949536, "grad_norm": 0.6703834528070357, "learning_rate": 9.341806075243049e-06, "loss": 0.3937, "step": 4204 }, { "epoch": 0.19031455080334916, "grad_norm": 0.8034159210953544, "learning_rate": 9.341442549425882e-06, "loss": 0.4345, "step": 4205 }, { "epoch": 0.19035980991174473, "grad_norm": 0.70894004080529, "learning_rate": 9.341078930324446e-06, "loss": 0.4077, "step": 4206 }, { "epoch": 0.19040506902014032, "grad_norm": 0.6265618012794626, "learning_rate": 9.340715217946557e-06, "loss": 0.505, "step": 4207 }, { "epoch": 0.19045032812853588, "grad_norm": 0.6867976221003556, "learning_rate": 9.34035141230003e-06, "loss": 0.4117, "step": 4208 }, { "epoch": 0.19049558723693144, "grad_norm": 0.6815725037318351, "learning_rate": 9.339987513392681e-06, "loss": 0.3745, "step": 4209 }, { "epoch": 0.190540846345327, "grad_norm": 0.9252713915472213, "learning_rate": 9.33962352123233e-06, "loss": 0.4132, "step": 4210 }, { "epoch": 0.19058610545372257, "grad_norm": 0.35903187846953316, "learning_rate": 9.339259435826798e-06, "loss": 0.4936, "step": 4211 }, { "epoch": 0.19063136456211813, "grad_norm": 0.7351904438219221, "learning_rate": 9.338895257183907e-06, "loss": 0.3868, "step": 4212 }, { "epoch": 0.1906766236705137, "grad_norm": 0.666741204958213, "learning_rate": 9.338530985311483e-06, "loss": 0.427, "step": 4213 }, { "epoch": 0.19072188277890925, "grad_norm": 0.6864012420285249, "learning_rate": 9.338166620217353e-06, "loss": 0.3899, "step": 4214 }, { "epoch": 0.19076714188730481, "grad_norm": 0.6406318140484143, "learning_rate": 9.337802161909344e-06, "loss": 0.4032, "step": 4215 }, { "epoch": 0.19081240099570038, "grad_norm": 0.6779079194311202, "learning_rate": 9.337437610395292e-06, "loss": 0.4443, "step": 4216 }, { "epoch": 0.19085766010409594, "grad_norm": 0.8085914383393745, "learning_rate": 9.337072965683026e-06, "loss": 0.4001, "step": 4217 }, { "epoch": 0.1909029192124915, "grad_norm": 0.8418371319390886, "learning_rate": 9.336708227780382e-06, "loss": 0.4139, "step": 4218 }, { "epoch": 0.1909481783208871, "grad_norm": 0.5312634645258777, "learning_rate": 9.336343396695197e-06, "loss": 0.4937, "step": 4219 }, { "epoch": 0.19099343742928265, "grad_norm": 0.8147529416185956, "learning_rate": 9.335978472435311e-06, "loss": 0.3672, "step": 4220 }, { "epoch": 0.19103869653767822, "grad_norm": 0.36155580688435995, "learning_rate": 9.335613455008565e-06, "loss": 0.499, "step": 4221 }, { "epoch": 0.19108395564607378, "grad_norm": 0.8797910704411804, "learning_rate": 9.335248344422803e-06, "loss": 0.4269, "step": 4222 }, { "epoch": 0.19112921475446934, "grad_norm": 0.6906031892517221, "learning_rate": 9.334883140685867e-06, "loss": 0.4253, "step": 4223 }, { "epoch": 0.1911744738628649, "grad_norm": 0.7501075268027417, "learning_rate": 9.334517843805606e-06, "loss": 0.4356, "step": 4224 }, { "epoch": 0.19121973297126046, "grad_norm": 0.8377097360755146, "learning_rate": 9.334152453789868e-06, "loss": 0.4198, "step": 4225 }, { "epoch": 0.19126499207965603, "grad_norm": 0.6989885826530646, "learning_rate": 9.333786970646507e-06, "loss": 0.4341, "step": 4226 }, { "epoch": 0.1913102511880516, "grad_norm": 0.6613263318372491, "learning_rate": 9.333421394383374e-06, "loss": 0.3878, "step": 4227 }, { "epoch": 0.19135551029644715, "grad_norm": 0.5685047682663741, "learning_rate": 9.333055725008323e-06, "loss": 0.5035, "step": 4228 }, { "epoch": 0.1914007694048427, "grad_norm": 0.7589492207434562, "learning_rate": 9.332689962529213e-06, "loss": 0.4241, "step": 4229 }, { "epoch": 0.19144602851323828, "grad_norm": 0.4250628070188152, "learning_rate": 9.332324106953903e-06, "loss": 0.5119, "step": 4230 }, { "epoch": 0.19149128762163387, "grad_norm": 0.7021573180456467, "learning_rate": 9.331958158290253e-06, "loss": 0.4065, "step": 4231 }, { "epoch": 0.19153654673002943, "grad_norm": 0.382512596580531, "learning_rate": 9.331592116546128e-06, "loss": 0.4987, "step": 4232 }, { "epoch": 0.191581805838425, "grad_norm": 0.7330051980181821, "learning_rate": 9.33122598172939e-06, "loss": 0.4129, "step": 4233 }, { "epoch": 0.19162706494682055, "grad_norm": 0.689069453015296, "learning_rate": 9.33085975384791e-06, "loss": 0.4418, "step": 4234 }, { "epoch": 0.19167232405521611, "grad_norm": 0.6607414917334931, "learning_rate": 9.330493432909553e-06, "loss": 0.3966, "step": 4235 }, { "epoch": 0.19171758316361168, "grad_norm": 0.6062373121085459, "learning_rate": 9.330127018922195e-06, "loss": 0.3827, "step": 4236 }, { "epoch": 0.19176284227200724, "grad_norm": 0.6657482893388922, "learning_rate": 9.329760511893703e-06, "loss": 0.4258, "step": 4237 }, { "epoch": 0.1918081013804028, "grad_norm": 0.76336256720969, "learning_rate": 9.329393911831957e-06, "loss": 0.5121, "step": 4238 }, { "epoch": 0.19185336048879836, "grad_norm": 0.570098781814426, "learning_rate": 9.329027218744833e-06, "loss": 0.4937, "step": 4239 }, { "epoch": 0.19189861959719393, "grad_norm": 0.6741787772987584, "learning_rate": 9.328660432640211e-06, "loss": 0.4024, "step": 4240 }, { "epoch": 0.1919438787055895, "grad_norm": 0.6141085288648717, "learning_rate": 9.32829355352597e-06, "loss": 0.4197, "step": 4241 }, { "epoch": 0.19198913781398508, "grad_norm": 0.535364704619855, "learning_rate": 9.327926581409992e-06, "loss": 0.5256, "step": 4242 }, { "epoch": 0.19203439692238064, "grad_norm": 0.6452703911522668, "learning_rate": 9.327559516300164e-06, "loss": 0.4067, "step": 4243 }, { "epoch": 0.1920796560307762, "grad_norm": 0.6459242711060674, "learning_rate": 9.327192358204374e-06, "loss": 0.3749, "step": 4244 }, { "epoch": 0.19212491513917176, "grad_norm": 0.6934224689498821, "learning_rate": 9.32682510713051e-06, "loss": 0.397, "step": 4245 }, { "epoch": 0.19217017424756733, "grad_norm": 0.7035646271268499, "learning_rate": 9.326457763086463e-06, "loss": 0.3713, "step": 4246 }, { "epoch": 0.1922154333559629, "grad_norm": 0.6359014330987115, "learning_rate": 9.326090326080129e-06, "loss": 0.412, "step": 4247 }, { "epoch": 0.19226069246435845, "grad_norm": 0.8562684223334093, "learning_rate": 9.325722796119396e-06, "loss": 0.5247, "step": 4248 }, { "epoch": 0.192305951572754, "grad_norm": 0.668983090986784, "learning_rate": 9.325355173212169e-06, "loss": 0.3677, "step": 4249 }, { "epoch": 0.19235121068114958, "grad_norm": 0.6735918825748062, "learning_rate": 9.324987457366342e-06, "loss": 0.4335, "step": 4250 }, { "epoch": 0.19239646978954514, "grad_norm": 0.6111348754854273, "learning_rate": 9.324619648589818e-06, "loss": 0.3478, "step": 4251 }, { "epoch": 0.1924417288979407, "grad_norm": 0.6529173510325869, "learning_rate": 9.324251746890501e-06, "loss": 0.3914, "step": 4252 }, { "epoch": 0.19248698800633626, "grad_norm": 0.7176116943040441, "learning_rate": 9.323883752276294e-06, "loss": 0.4417, "step": 4253 }, { "epoch": 0.19253224711473185, "grad_norm": 0.8261099452942321, "learning_rate": 9.323515664755105e-06, "loss": 0.3822, "step": 4254 }, { "epoch": 0.19257750622312741, "grad_norm": 0.7602870493102301, "learning_rate": 9.323147484334843e-06, "loss": 0.4208, "step": 4255 }, { "epoch": 0.19262276533152298, "grad_norm": 0.6966871082756921, "learning_rate": 9.322779211023418e-06, "loss": 0.431, "step": 4256 }, { "epoch": 0.19266802443991854, "grad_norm": 0.6646502179431876, "learning_rate": 9.322410844828747e-06, "loss": 0.4047, "step": 4257 }, { "epoch": 0.1927132835483141, "grad_norm": 0.7620936660471155, "learning_rate": 9.322042385758738e-06, "loss": 0.4006, "step": 4258 }, { "epoch": 0.19275854265670966, "grad_norm": 0.5508551135263602, "learning_rate": 9.321673833821316e-06, "loss": 0.4854, "step": 4259 }, { "epoch": 0.19280380176510523, "grad_norm": 0.7260689341128518, "learning_rate": 9.321305189024395e-06, "loss": 0.4336, "step": 4260 }, { "epoch": 0.1928490608735008, "grad_norm": 0.6686325587042482, "learning_rate": 9.320936451375896e-06, "loss": 0.3674, "step": 4261 }, { "epoch": 0.19289431998189635, "grad_norm": 0.6248650776883086, "learning_rate": 9.320567620883746e-06, "loss": 0.3821, "step": 4262 }, { "epoch": 0.1929395790902919, "grad_norm": 0.708121072026063, "learning_rate": 9.320198697555866e-06, "loss": 0.407, "step": 4263 }, { "epoch": 0.19298483819868748, "grad_norm": 0.6231187055806207, "learning_rate": 9.319829681400185e-06, "loss": 0.3846, "step": 4264 }, { "epoch": 0.19303009730708304, "grad_norm": 0.43480382071748097, "learning_rate": 9.319460572424632e-06, "loss": 0.5126, "step": 4265 }, { "epoch": 0.19307535641547863, "grad_norm": 0.658036330731171, "learning_rate": 9.319091370637136e-06, "loss": 0.4143, "step": 4266 }, { "epoch": 0.1931206155238742, "grad_norm": 0.6675050586561079, "learning_rate": 9.318722076045632e-06, "loss": 0.3835, "step": 4267 }, { "epoch": 0.19316587463226975, "grad_norm": 0.6878269185310447, "learning_rate": 9.318352688658055e-06, "loss": 0.389, "step": 4268 }, { "epoch": 0.19321113374066531, "grad_norm": 0.6417992974753269, "learning_rate": 9.317983208482342e-06, "loss": 0.4138, "step": 4269 }, { "epoch": 0.19325639284906088, "grad_norm": 0.7400557604489254, "learning_rate": 9.317613635526431e-06, "loss": 0.4442, "step": 4270 }, { "epoch": 0.19330165195745644, "grad_norm": 0.6493969617728693, "learning_rate": 9.317243969798263e-06, "loss": 0.3417, "step": 4271 }, { "epoch": 0.193346911065852, "grad_norm": 0.7276072755450002, "learning_rate": 9.31687421130578e-06, "loss": 0.4165, "step": 4272 }, { "epoch": 0.19339217017424756, "grad_norm": 0.7372973315741538, "learning_rate": 9.31650436005693e-06, "loss": 0.4105, "step": 4273 }, { "epoch": 0.19343742928264313, "grad_norm": 0.7257643318303959, "learning_rate": 9.31613441605966e-06, "loss": 0.4357, "step": 4274 }, { "epoch": 0.1934826883910387, "grad_norm": 0.4424464950211227, "learning_rate": 9.315764379321916e-06, "loss": 0.4984, "step": 4275 }, { "epoch": 0.19352794749943425, "grad_norm": 0.6614602225854239, "learning_rate": 9.31539424985165e-06, "loss": 0.3993, "step": 4276 }, { "epoch": 0.1935732066078298, "grad_norm": 0.7541761185746563, "learning_rate": 9.315024027656815e-06, "loss": 0.3853, "step": 4277 }, { "epoch": 0.1936184657162254, "grad_norm": 0.660166374597361, "learning_rate": 9.314653712745368e-06, "loss": 0.3879, "step": 4278 }, { "epoch": 0.19366372482462096, "grad_norm": 0.6435859897251333, "learning_rate": 9.314283305125262e-06, "loss": 0.4038, "step": 4279 }, { "epoch": 0.19370898393301653, "grad_norm": 0.7788972612870466, "learning_rate": 9.313912804804459e-06, "loss": 0.4066, "step": 4280 }, { "epoch": 0.1937542430414121, "grad_norm": 0.6496745811186846, "learning_rate": 9.31354221179092e-06, "loss": 0.4215, "step": 4281 }, { "epoch": 0.19379950214980765, "grad_norm": 0.6848040494516953, "learning_rate": 9.313171526092606e-06, "loss": 0.422, "step": 4282 }, { "epoch": 0.1938447612582032, "grad_norm": 0.8014840865761476, "learning_rate": 9.312800747717484e-06, "loss": 0.3954, "step": 4283 }, { "epoch": 0.19389002036659878, "grad_norm": 0.6019739424529075, "learning_rate": 9.312429876673517e-06, "loss": 0.3774, "step": 4284 }, { "epoch": 0.19393527947499434, "grad_norm": 0.6340649489666178, "learning_rate": 9.312058912968679e-06, "loss": 0.3709, "step": 4285 }, { "epoch": 0.1939805385833899, "grad_norm": 0.6679937885689637, "learning_rate": 9.311687856610939e-06, "loss": 0.4114, "step": 4286 }, { "epoch": 0.19402579769178546, "grad_norm": 0.6207236883394622, "learning_rate": 9.311316707608267e-06, "loss": 0.3748, "step": 4287 }, { "epoch": 0.19407105680018102, "grad_norm": 0.580243437551749, "learning_rate": 9.31094546596864e-06, "loss": 0.3919, "step": 4288 }, { "epoch": 0.19411631590857661, "grad_norm": 0.7394004485489974, "learning_rate": 9.310574131700036e-06, "loss": 0.396, "step": 4289 }, { "epoch": 0.19416157501697218, "grad_norm": 0.7232307625440043, "learning_rate": 9.310202704810433e-06, "loss": 0.4577, "step": 4290 }, { "epoch": 0.19420683412536774, "grad_norm": 0.6854297919590081, "learning_rate": 9.309831185307812e-06, "loss": 0.381, "step": 4291 }, { "epoch": 0.1942520932337633, "grad_norm": 0.6196581479239693, "learning_rate": 9.309459573200154e-06, "loss": 0.4015, "step": 4292 }, { "epoch": 0.19429735234215886, "grad_norm": 0.6615913159694972, "learning_rate": 9.309087868495447e-06, "loss": 0.3935, "step": 4293 }, { "epoch": 0.19434261145055443, "grad_norm": 0.6393318696287117, "learning_rate": 9.308716071201676e-06, "loss": 0.4008, "step": 4294 }, { "epoch": 0.19438787055895, "grad_norm": 0.546204787393143, "learning_rate": 9.308344181326829e-06, "loss": 0.4989, "step": 4295 }, { "epoch": 0.19443312966734555, "grad_norm": 0.6960738962155074, "learning_rate": 9.307972198878897e-06, "loss": 0.3923, "step": 4296 }, { "epoch": 0.1944783887757411, "grad_norm": 0.33892437302645556, "learning_rate": 9.307600123865874e-06, "loss": 0.4821, "step": 4297 }, { "epoch": 0.19452364788413667, "grad_norm": 0.6390442836312251, "learning_rate": 9.307227956295754e-06, "loss": 0.3633, "step": 4298 }, { "epoch": 0.19456890699253224, "grad_norm": 0.6792186733996218, "learning_rate": 9.306855696176536e-06, "loss": 0.3651, "step": 4299 }, { "epoch": 0.1946141661009278, "grad_norm": 0.4289816986536164, "learning_rate": 9.306483343516212e-06, "loss": 0.4678, "step": 4300 }, { "epoch": 0.1946594252093234, "grad_norm": 0.7078556709325038, "learning_rate": 9.30611089832279e-06, "loss": 0.4272, "step": 4301 }, { "epoch": 0.19470468431771895, "grad_norm": 0.670063578620746, "learning_rate": 9.30573836060427e-06, "loss": 0.3604, "step": 4302 }, { "epoch": 0.1947499434261145, "grad_norm": 0.6125312638812249, "learning_rate": 9.305365730368658e-06, "loss": 0.343, "step": 4303 }, { "epoch": 0.19479520253451008, "grad_norm": 0.43965317617828314, "learning_rate": 9.304993007623958e-06, "loss": 0.5143, "step": 4304 }, { "epoch": 0.19484046164290564, "grad_norm": 0.6869474154118801, "learning_rate": 9.30462019237818e-06, "loss": 0.4482, "step": 4305 }, { "epoch": 0.1948857207513012, "grad_norm": 0.37417953819718464, "learning_rate": 9.304247284639335e-06, "loss": 0.5008, "step": 4306 }, { "epoch": 0.19493097985969676, "grad_norm": 0.6682325190494395, "learning_rate": 9.303874284415435e-06, "loss": 0.3748, "step": 4307 }, { "epoch": 0.19497623896809232, "grad_norm": 0.6462237565900745, "learning_rate": 9.303501191714494e-06, "loss": 0.4181, "step": 4308 }, { "epoch": 0.1950214980764879, "grad_norm": 0.644397907031416, "learning_rate": 9.303128006544531e-06, "loss": 0.3434, "step": 4309 }, { "epoch": 0.19506675718488345, "grad_norm": 0.6405159433598613, "learning_rate": 9.302754728913563e-06, "loss": 0.3615, "step": 4310 }, { "epoch": 0.195112016293279, "grad_norm": 0.7136582827059487, "learning_rate": 9.302381358829612e-06, "loss": 0.4064, "step": 4311 }, { "epoch": 0.19515727540167457, "grad_norm": 0.6703286149054923, "learning_rate": 9.302007896300697e-06, "loss": 0.4185, "step": 4312 }, { "epoch": 0.19520253451007016, "grad_norm": 0.67624507234566, "learning_rate": 9.301634341334846e-06, "loss": 0.3992, "step": 4313 }, { "epoch": 0.19524779361846573, "grad_norm": 0.6507390990718162, "learning_rate": 9.301260693940084e-06, "loss": 0.3774, "step": 4314 }, { "epoch": 0.1952930527268613, "grad_norm": 0.635224165327857, "learning_rate": 9.300886954124442e-06, "loss": 0.391, "step": 4315 }, { "epoch": 0.19533831183525685, "grad_norm": 0.820758878812254, "learning_rate": 9.300513121895946e-06, "loss": 0.4185, "step": 4316 }, { "epoch": 0.1953835709436524, "grad_norm": 0.6567914034471967, "learning_rate": 9.300139197262633e-06, "loss": 0.3871, "step": 4317 }, { "epoch": 0.19542883005204797, "grad_norm": 0.7745445471527284, "learning_rate": 9.299765180232534e-06, "loss": 0.4992, "step": 4318 }, { "epoch": 0.19547408916044354, "grad_norm": 0.5686358701093436, "learning_rate": 9.299391070813687e-06, "loss": 0.5268, "step": 4319 }, { "epoch": 0.1955193482688391, "grad_norm": 0.7189558651821492, "learning_rate": 9.29901686901413e-06, "loss": 0.3943, "step": 4320 }, { "epoch": 0.19556460737723466, "grad_norm": 0.6913062360595764, "learning_rate": 9.298642574841906e-06, "loss": 0.3723, "step": 4321 }, { "epoch": 0.19560986648563022, "grad_norm": 0.7396020547936617, "learning_rate": 9.298268188305054e-06, "loss": 0.373, "step": 4322 }, { "epoch": 0.19565512559402579, "grad_norm": 0.7529357435564967, "learning_rate": 9.29789370941162e-06, "loss": 0.4975, "step": 4323 }, { "epoch": 0.19570038470242138, "grad_norm": 0.7697627354205817, "learning_rate": 9.29751913816965e-06, "loss": 0.3901, "step": 4324 }, { "epoch": 0.19574564381081694, "grad_norm": 0.7005308781484924, "learning_rate": 9.297144474587193e-06, "loss": 0.4247, "step": 4325 }, { "epoch": 0.1957909029192125, "grad_norm": 0.6823939930425462, "learning_rate": 9.296769718672298e-06, "loss": 0.4269, "step": 4326 }, { "epoch": 0.19583616202760806, "grad_norm": 0.614401669857915, "learning_rate": 9.296394870433018e-06, "loss": 0.5073, "step": 4327 }, { "epoch": 0.19588142113600363, "grad_norm": 0.7837169926571875, "learning_rate": 9.29601992987741e-06, "loss": 0.4299, "step": 4328 }, { "epoch": 0.1959266802443992, "grad_norm": 0.7365197541628816, "learning_rate": 9.295644897013526e-06, "loss": 0.4003, "step": 4329 }, { "epoch": 0.19597193935279475, "grad_norm": 0.6291839507518706, "learning_rate": 9.295269771849426e-06, "loss": 0.4143, "step": 4330 }, { "epoch": 0.1960171984611903, "grad_norm": 0.7709732047355847, "learning_rate": 9.294894554393172e-06, "loss": 0.4568, "step": 4331 }, { "epoch": 0.19606245756958587, "grad_norm": 0.4735295845056872, "learning_rate": 9.294519244652825e-06, "loss": 0.5193, "step": 4332 }, { "epoch": 0.19610771667798144, "grad_norm": 0.7441985176769523, "learning_rate": 9.294143842636447e-06, "loss": 0.4162, "step": 4333 }, { "epoch": 0.196152975786377, "grad_norm": 0.6201583591024671, "learning_rate": 9.293768348352106e-06, "loss": 0.3498, "step": 4334 }, { "epoch": 0.19619823489477256, "grad_norm": 1.1910528546319035, "learning_rate": 9.293392761807873e-06, "loss": 0.3856, "step": 4335 }, { "epoch": 0.19624349400316815, "grad_norm": 0.7391466697999081, "learning_rate": 9.293017083011814e-06, "loss": 0.3971, "step": 4336 }, { "epoch": 0.1962887531115637, "grad_norm": 0.679637583536768, "learning_rate": 9.292641311972004e-06, "loss": 0.3835, "step": 4337 }, { "epoch": 0.19633401221995928, "grad_norm": 0.6965119342754589, "learning_rate": 9.292265448696515e-06, "loss": 0.4116, "step": 4338 }, { "epoch": 0.19637927132835484, "grad_norm": 0.6969239676501272, "learning_rate": 9.291889493193424e-06, "loss": 0.3934, "step": 4339 }, { "epoch": 0.1964245304367504, "grad_norm": 0.6554684903091974, "learning_rate": 9.29151344547081e-06, "loss": 0.4031, "step": 4340 }, { "epoch": 0.19646978954514596, "grad_norm": 0.6461420773694276, "learning_rate": 9.291137305536752e-06, "loss": 0.3987, "step": 4341 }, { "epoch": 0.19651504865354152, "grad_norm": 0.6893853761388993, "learning_rate": 9.290761073399333e-06, "loss": 0.3913, "step": 4342 }, { "epoch": 0.1965603077619371, "grad_norm": 0.6926547278011074, "learning_rate": 9.290384749066636e-06, "loss": 0.4473, "step": 4343 }, { "epoch": 0.19660556687033265, "grad_norm": 0.7730563566336075, "learning_rate": 9.290008332546749e-06, "loss": 0.3774, "step": 4344 }, { "epoch": 0.1966508259787282, "grad_norm": 0.761834350061357, "learning_rate": 9.289631823847758e-06, "loss": 0.3956, "step": 4345 }, { "epoch": 0.19669608508712377, "grad_norm": 0.6493671717411257, "learning_rate": 9.289255222977754e-06, "loss": 0.4139, "step": 4346 }, { "epoch": 0.19674134419551934, "grad_norm": 0.4625826341249837, "learning_rate": 9.288878529944827e-06, "loss": 0.4967, "step": 4347 }, { "epoch": 0.19678660330391493, "grad_norm": 0.7436204158165354, "learning_rate": 9.288501744757073e-06, "loss": 0.4225, "step": 4348 }, { "epoch": 0.1968318624123105, "grad_norm": 0.709135533044813, "learning_rate": 9.28812486742259e-06, "loss": 0.4244, "step": 4349 }, { "epoch": 0.19687712152070605, "grad_norm": 0.6641857934741487, "learning_rate": 9.287747897949471e-06, "loss": 0.415, "step": 4350 }, { "epoch": 0.1969223806291016, "grad_norm": 0.714834663860521, "learning_rate": 9.287370836345819e-06, "loss": 0.4266, "step": 4351 }, { "epoch": 0.19696763973749717, "grad_norm": 0.648691599092435, "learning_rate": 9.286993682619736e-06, "loss": 0.356, "step": 4352 }, { "epoch": 0.19701289884589274, "grad_norm": 0.6686053436740277, "learning_rate": 9.286616436779326e-06, "loss": 0.3934, "step": 4353 }, { "epoch": 0.1970581579542883, "grad_norm": 0.6551766852685967, "learning_rate": 9.286239098832693e-06, "loss": 0.4217, "step": 4354 }, { "epoch": 0.19710341706268386, "grad_norm": 0.6901906038206445, "learning_rate": 9.285861668787947e-06, "loss": 0.4014, "step": 4355 }, { "epoch": 0.19714867617107942, "grad_norm": 0.6851944238808891, "learning_rate": 9.285484146653195e-06, "loss": 0.429, "step": 4356 }, { "epoch": 0.19719393527947499, "grad_norm": 0.5409067984664292, "learning_rate": 9.285106532436552e-06, "loss": 0.5184, "step": 4357 }, { "epoch": 0.19723919438787055, "grad_norm": 0.4302281353429989, "learning_rate": 9.28472882614613e-06, "loss": 0.5058, "step": 4358 }, { "epoch": 0.1972844534962661, "grad_norm": 0.7081951904028086, "learning_rate": 9.284351027790044e-06, "loss": 0.3741, "step": 4359 }, { "epoch": 0.1973297126046617, "grad_norm": 0.6824997425810786, "learning_rate": 9.283973137376414e-06, "loss": 0.4113, "step": 4360 }, { "epoch": 0.19737497171305726, "grad_norm": 0.7060524282807865, "learning_rate": 9.283595154913358e-06, "loss": 0.3592, "step": 4361 }, { "epoch": 0.19742023082145282, "grad_norm": 0.7613516912992961, "learning_rate": 9.283217080409e-06, "loss": 0.4269, "step": 4362 }, { "epoch": 0.1974654899298484, "grad_norm": 0.7603432579957048, "learning_rate": 9.28283891387146e-06, "loss": 0.4193, "step": 4363 }, { "epoch": 0.19751074903824395, "grad_norm": 0.6901066095588091, "learning_rate": 9.282460655308864e-06, "loss": 0.4444, "step": 4364 }, { "epoch": 0.1975560081466395, "grad_norm": 0.584544309983049, "learning_rate": 9.282082304729343e-06, "loss": 0.3687, "step": 4365 }, { "epoch": 0.19760126725503507, "grad_norm": 0.7097341226175008, "learning_rate": 9.281703862141024e-06, "loss": 0.3928, "step": 4366 }, { "epoch": 0.19764652636343064, "grad_norm": 0.704967973293008, "learning_rate": 9.28132532755204e-06, "loss": 0.3784, "step": 4367 }, { "epoch": 0.1976917854718262, "grad_norm": 0.9059692746686824, "learning_rate": 9.280946700970524e-06, "loss": 0.4982, "step": 4368 }, { "epoch": 0.19773704458022176, "grad_norm": 0.6941175981179759, "learning_rate": 9.280567982404611e-06, "loss": 0.3921, "step": 4369 }, { "epoch": 0.19778230368861732, "grad_norm": 0.7020988815556046, "learning_rate": 9.280189171862439e-06, "loss": 0.361, "step": 4370 }, { "epoch": 0.1978275627970129, "grad_norm": 0.733757185054716, "learning_rate": 9.279810269352147e-06, "loss": 0.4315, "step": 4371 }, { "epoch": 0.19787282190540847, "grad_norm": 0.9030378119274879, "learning_rate": 9.279431274881876e-06, "loss": 0.3648, "step": 4372 }, { "epoch": 0.19791808101380404, "grad_norm": 0.6822015900189836, "learning_rate": 9.279052188459772e-06, "loss": 0.4028, "step": 4373 }, { "epoch": 0.1979633401221996, "grad_norm": 0.6302617832804666, "learning_rate": 9.278673010093977e-06, "loss": 0.5142, "step": 4374 }, { "epoch": 0.19800859923059516, "grad_norm": 0.496785804689605, "learning_rate": 9.278293739792642e-06, "loss": 0.5014, "step": 4375 }, { "epoch": 0.19805385833899072, "grad_norm": 0.7594395569593272, "learning_rate": 9.277914377563911e-06, "loss": 0.4004, "step": 4376 }, { "epoch": 0.19809911744738629, "grad_norm": 0.6961484073026286, "learning_rate": 9.277534923415941e-06, "loss": 0.3856, "step": 4377 }, { "epoch": 0.19814437655578185, "grad_norm": 0.752079978554707, "learning_rate": 9.277155377356881e-06, "loss": 0.4135, "step": 4378 }, { "epoch": 0.1981896356641774, "grad_norm": 0.7544369391544148, "learning_rate": 9.27677573939489e-06, "loss": 0.3979, "step": 4379 }, { "epoch": 0.19823489477257297, "grad_norm": 0.730897043381073, "learning_rate": 9.276396009538122e-06, "loss": 0.4056, "step": 4380 }, { "epoch": 0.19828015388096853, "grad_norm": 0.6920481467125367, "learning_rate": 9.276016187794739e-06, "loss": 0.4194, "step": 4381 }, { "epoch": 0.1983254129893641, "grad_norm": 0.6528896991545643, "learning_rate": 9.275636274172901e-06, "loss": 0.4046, "step": 4382 }, { "epoch": 0.1983706720977597, "grad_norm": 0.7660841197828047, "learning_rate": 9.27525626868077e-06, "loss": 0.3838, "step": 4383 }, { "epoch": 0.19841593120615525, "grad_norm": 0.7133729442628481, "learning_rate": 9.274876171326514e-06, "loss": 0.3808, "step": 4384 }, { "epoch": 0.1984611903145508, "grad_norm": 0.6404607940509356, "learning_rate": 9.274495982118297e-06, "loss": 0.3779, "step": 4385 }, { "epoch": 0.19850644942294637, "grad_norm": 0.7199080919720658, "learning_rate": 9.27411570106429e-06, "loss": 0.4167, "step": 4386 }, { "epoch": 0.19855170853134194, "grad_norm": 1.2733519021150366, "learning_rate": 9.273735328172664e-06, "loss": 0.5265, "step": 4387 }, { "epoch": 0.1985969676397375, "grad_norm": 0.7103108166886162, "learning_rate": 9.273354863451589e-06, "loss": 0.3537, "step": 4388 }, { "epoch": 0.19864222674813306, "grad_norm": 0.7246408967666235, "learning_rate": 9.272974306909246e-06, "loss": 0.3739, "step": 4389 }, { "epoch": 0.19868748585652862, "grad_norm": 0.6757107097574218, "learning_rate": 9.272593658553806e-06, "loss": 0.354, "step": 4390 }, { "epoch": 0.19873274496492419, "grad_norm": 0.7067224266353317, "learning_rate": 9.272212918393452e-06, "loss": 0.4137, "step": 4391 }, { "epoch": 0.19877800407331975, "grad_norm": 0.6155517958991198, "learning_rate": 9.271832086436364e-06, "loss": 0.3582, "step": 4392 }, { "epoch": 0.1988232631817153, "grad_norm": 0.6805250960188165, "learning_rate": 9.271451162690723e-06, "loss": 0.3961, "step": 4393 }, { "epoch": 0.19886852229011087, "grad_norm": 0.6844985043600663, "learning_rate": 9.271070147164715e-06, "loss": 0.375, "step": 4394 }, { "epoch": 0.19891378139850646, "grad_norm": 0.7043381339443586, "learning_rate": 9.270689039866528e-06, "loss": 0.4425, "step": 4395 }, { "epoch": 0.19895904050690202, "grad_norm": 0.6478549868387321, "learning_rate": 9.270307840804349e-06, "loss": 0.3539, "step": 4396 }, { "epoch": 0.1990042996152976, "grad_norm": 0.6498360959963834, "learning_rate": 9.26992654998637e-06, "loss": 0.3831, "step": 4397 }, { "epoch": 0.19904955872369315, "grad_norm": 0.674985630028175, "learning_rate": 9.269545167420786e-06, "loss": 0.4313, "step": 4398 }, { "epoch": 0.1990948178320887, "grad_norm": 0.6890540926953495, "learning_rate": 9.269163693115786e-06, "loss": 0.4219, "step": 4399 }, { "epoch": 0.19914007694048427, "grad_norm": 0.6898930046935953, "learning_rate": 9.268782127079571e-06, "loss": 0.3732, "step": 4400 }, { "epoch": 0.19918533604887984, "grad_norm": 0.6177411270268293, "learning_rate": 9.26840046932034e-06, "loss": 0.3636, "step": 4401 }, { "epoch": 0.1992305951572754, "grad_norm": 1.3825448268802494, "learning_rate": 9.26801871984629e-06, "loss": 0.5663, "step": 4402 }, { "epoch": 0.19927585426567096, "grad_norm": 0.9528669719885208, "learning_rate": 9.267636878665629e-06, "loss": 0.5335, "step": 4403 }, { "epoch": 0.19932111337406652, "grad_norm": 0.6940354386270224, "learning_rate": 9.267254945786556e-06, "loss": 0.3852, "step": 4404 }, { "epoch": 0.19936637248246208, "grad_norm": 0.7044042232798097, "learning_rate": 9.26687292121728e-06, "loss": 0.3684, "step": 4405 }, { "epoch": 0.19941163159085765, "grad_norm": 0.8272582311689283, "learning_rate": 9.26649080496601e-06, "loss": 0.4547, "step": 4406 }, { "epoch": 0.19945689069925324, "grad_norm": 0.8991881377986967, "learning_rate": 9.266108597040957e-06, "loss": 0.4182, "step": 4407 }, { "epoch": 0.1995021498076488, "grad_norm": 0.8684215744351997, "learning_rate": 9.265726297450332e-06, "loss": 0.4388, "step": 4408 }, { "epoch": 0.19954740891604436, "grad_norm": 0.671845166331204, "learning_rate": 9.265343906202351e-06, "loss": 0.376, "step": 4409 }, { "epoch": 0.19959266802443992, "grad_norm": 0.7218212880590468, "learning_rate": 9.264961423305229e-06, "loss": 0.4265, "step": 4410 }, { "epoch": 0.19963792713283549, "grad_norm": 0.7227038515063788, "learning_rate": 9.264578848767184e-06, "loss": 0.3955, "step": 4411 }, { "epoch": 0.19968318624123105, "grad_norm": 0.7151889788085047, "learning_rate": 9.264196182596438e-06, "loss": 0.4277, "step": 4412 }, { "epoch": 0.1997284453496266, "grad_norm": 0.8204253178429187, "learning_rate": 9.26381342480121e-06, "loss": 0.3914, "step": 4413 }, { "epoch": 0.19977370445802217, "grad_norm": 0.7712295837039834, "learning_rate": 9.26343057538973e-06, "loss": 0.3781, "step": 4414 }, { "epoch": 0.19981896356641773, "grad_norm": 0.7006062972466253, "learning_rate": 9.263047634370221e-06, "loss": 0.3883, "step": 4415 }, { "epoch": 0.1998642226748133, "grad_norm": 0.7042611085484666, "learning_rate": 9.26266460175091e-06, "loss": 0.4038, "step": 4416 }, { "epoch": 0.19990948178320886, "grad_norm": 0.7092910007919806, "learning_rate": 9.262281477540029e-06, "loss": 0.4182, "step": 4417 }, { "epoch": 0.19995474089160445, "grad_norm": 0.6352626827612116, "learning_rate": 9.26189826174581e-06, "loss": 0.4053, "step": 4418 }, { "epoch": 0.2, "grad_norm": 0.6908034332242652, "learning_rate": 9.261514954376487e-06, "loss": 0.4037, "step": 4419 }, { "epoch": 0.20004525910839557, "grad_norm": 0.6393519206087547, "learning_rate": 9.261131555440295e-06, "loss": 0.4285, "step": 4420 }, { "epoch": 0.20009051821679114, "grad_norm": 0.7543033606632701, "learning_rate": 9.260748064945473e-06, "loss": 0.3904, "step": 4421 }, { "epoch": 0.2001357773251867, "grad_norm": 0.8441036963952208, "learning_rate": 9.26036448290026e-06, "loss": 0.4251, "step": 4422 }, { "epoch": 0.20018103643358226, "grad_norm": 0.7412155904408332, "learning_rate": 9.259980809312901e-06, "loss": 0.3955, "step": 4423 }, { "epoch": 0.20022629554197782, "grad_norm": 0.7085212725378105, "learning_rate": 9.259597044191635e-06, "loss": 0.3939, "step": 4424 }, { "epoch": 0.20027155465037338, "grad_norm": 3.2353281721630096, "learning_rate": 9.259213187544714e-06, "loss": 0.6369, "step": 4425 }, { "epoch": 0.20031681375876895, "grad_norm": 1.81014429072996, "learning_rate": 9.25882923938038e-06, "loss": 0.5755, "step": 4426 }, { "epoch": 0.2003620728671645, "grad_norm": 0.9136144605035063, "learning_rate": 9.25844519970689e-06, "loss": 0.4165, "step": 4427 }, { "epoch": 0.20040733197556007, "grad_norm": 1.02823154637771, "learning_rate": 9.258061068532487e-06, "loss": 0.4422, "step": 4428 }, { "epoch": 0.20045259108395563, "grad_norm": 1.1601240780768014, "learning_rate": 9.257676845865431e-06, "loss": 0.4443, "step": 4429 }, { "epoch": 0.20049785019235122, "grad_norm": 2.571160437651429, "learning_rate": 9.257292531713977e-06, "loss": 0.5761, "step": 4430 }, { "epoch": 0.20054310930074679, "grad_norm": 0.8266987002197626, "learning_rate": 9.25690812608638e-06, "loss": 0.4099, "step": 4431 }, { "epoch": 0.20058836840914235, "grad_norm": 0.7510003168521713, "learning_rate": 9.256523628990903e-06, "loss": 0.3916, "step": 4432 }, { "epoch": 0.2006336275175379, "grad_norm": 0.7427283363249146, "learning_rate": 9.256139040435806e-06, "loss": 0.4012, "step": 4433 }, { "epoch": 0.20067888662593347, "grad_norm": 0.8078106575136649, "learning_rate": 9.255754360429353e-06, "loss": 0.3643, "step": 4434 }, { "epoch": 0.20072414573432903, "grad_norm": 0.8547619551004447, "learning_rate": 9.255369588979806e-06, "loss": 0.3855, "step": 4435 }, { "epoch": 0.2007694048427246, "grad_norm": 0.7763954730463974, "learning_rate": 9.25498472609544e-06, "loss": 0.3987, "step": 4436 }, { "epoch": 0.20081466395112016, "grad_norm": 0.8427741534604029, "learning_rate": 9.254599771784519e-06, "loss": 0.4286, "step": 4437 }, { "epoch": 0.20085992305951572, "grad_norm": 0.7836133705709749, "learning_rate": 9.254214726055314e-06, "loss": 0.3951, "step": 4438 }, { "epoch": 0.20090518216791128, "grad_norm": 0.7298023584383054, "learning_rate": 9.253829588916103e-06, "loss": 0.4004, "step": 4439 }, { "epoch": 0.20095044127630685, "grad_norm": 0.665970725982351, "learning_rate": 9.253444360375157e-06, "loss": 0.417, "step": 4440 }, { "epoch": 0.2009957003847024, "grad_norm": 1.531114314437859, "learning_rate": 9.253059040440757e-06, "loss": 0.6065, "step": 4441 }, { "epoch": 0.201040959493098, "grad_norm": 0.7515967029741009, "learning_rate": 9.25267362912118e-06, "loss": 0.4067, "step": 4442 }, { "epoch": 0.20108621860149356, "grad_norm": 0.6829143827997587, "learning_rate": 9.252288126424707e-06, "loss": 0.3985, "step": 4443 }, { "epoch": 0.20113147770988912, "grad_norm": 1.1010757231316262, "learning_rate": 9.251902532359622e-06, "loss": 0.5895, "step": 4444 }, { "epoch": 0.20117673681828468, "grad_norm": 0.7326225536696734, "learning_rate": 9.25151684693421e-06, "loss": 0.3961, "step": 4445 }, { "epoch": 0.20122199592668025, "grad_norm": 0.7448227710158162, "learning_rate": 9.251131070156761e-06, "loss": 0.4333, "step": 4446 }, { "epoch": 0.2012672550350758, "grad_norm": 0.734666576540432, "learning_rate": 9.250745202035558e-06, "loss": 0.4081, "step": 4447 }, { "epoch": 0.20131251414347137, "grad_norm": 0.6936139236742962, "learning_rate": 9.250359242578898e-06, "loss": 0.3982, "step": 4448 }, { "epoch": 0.20135777325186693, "grad_norm": 0.7448189175784459, "learning_rate": 9.249973191795072e-06, "loss": 0.3906, "step": 4449 }, { "epoch": 0.2014030323602625, "grad_norm": 0.7389504242537271, "learning_rate": 9.249587049692375e-06, "loss": 0.436, "step": 4450 }, { "epoch": 0.20144829146865806, "grad_norm": 0.8361161553792722, "learning_rate": 9.249200816279103e-06, "loss": 0.5595, "step": 4451 }, { "epoch": 0.20149355057705362, "grad_norm": 0.7152862153144932, "learning_rate": 9.248814491563555e-06, "loss": 0.3676, "step": 4452 }, { "epoch": 0.20153880968544918, "grad_norm": 0.6533846117754827, "learning_rate": 9.248428075554034e-06, "loss": 0.3462, "step": 4453 }, { "epoch": 0.20158406879384477, "grad_norm": 0.8677187836764106, "learning_rate": 9.248041568258843e-06, "loss": 0.4081, "step": 4454 }, { "epoch": 0.20162932790224034, "grad_norm": 0.6707462106808738, "learning_rate": 9.247654969686283e-06, "loss": 0.4266, "step": 4455 }, { "epoch": 0.2016745870106359, "grad_norm": 0.672407091659003, "learning_rate": 9.247268279844666e-06, "loss": 0.41, "step": 4456 }, { "epoch": 0.20171984611903146, "grad_norm": 0.6815229751795202, "learning_rate": 9.246881498742296e-06, "loss": 0.4167, "step": 4457 }, { "epoch": 0.20176510522742702, "grad_norm": 0.5479951855336056, "learning_rate": 9.246494626387487e-06, "loss": 0.5093, "step": 4458 }, { "epoch": 0.20181036433582258, "grad_norm": 0.5161973718916856, "learning_rate": 9.24610766278855e-06, "loss": 0.5098, "step": 4459 }, { "epoch": 0.20185562344421815, "grad_norm": 0.850925537761303, "learning_rate": 9.245720607953802e-06, "loss": 0.3833, "step": 4460 }, { "epoch": 0.2019008825526137, "grad_norm": 0.6542363235875374, "learning_rate": 9.245333461891555e-06, "loss": 0.3858, "step": 4461 }, { "epoch": 0.20194614166100927, "grad_norm": 0.6973756715606783, "learning_rate": 9.244946224610132e-06, "loss": 0.373, "step": 4462 }, { "epoch": 0.20199140076940483, "grad_norm": 0.8079341217942448, "learning_rate": 9.244558896117852e-06, "loss": 0.4349, "step": 4463 }, { "epoch": 0.2020366598778004, "grad_norm": 0.681137135301652, "learning_rate": 9.244171476423037e-06, "loss": 0.4003, "step": 4464 }, { "epoch": 0.20208191898619599, "grad_norm": 0.7047389289061264, "learning_rate": 9.243783965534012e-06, "loss": 0.4466, "step": 4465 }, { "epoch": 0.20212717809459155, "grad_norm": 0.7248341836247567, "learning_rate": 9.243396363459104e-06, "loss": 0.3839, "step": 4466 }, { "epoch": 0.2021724372029871, "grad_norm": 0.6816766720407302, "learning_rate": 9.24300867020664e-06, "loss": 0.4275, "step": 4467 }, { "epoch": 0.20221769631138267, "grad_norm": 0.6565503559461989, "learning_rate": 9.242620885784952e-06, "loss": 0.3764, "step": 4468 }, { "epoch": 0.20226295541977823, "grad_norm": 0.7332199448787484, "learning_rate": 9.242233010202371e-06, "loss": 0.3909, "step": 4469 }, { "epoch": 0.2023082145281738, "grad_norm": 0.6763839124000454, "learning_rate": 9.241845043467232e-06, "loss": 0.5072, "step": 4470 }, { "epoch": 0.20235347363656936, "grad_norm": 0.691110792809285, "learning_rate": 9.241456985587868e-06, "loss": 0.3975, "step": 4471 }, { "epoch": 0.20239873274496492, "grad_norm": 0.6932678792221445, "learning_rate": 9.241068836572623e-06, "loss": 0.4609, "step": 4472 }, { "epoch": 0.20244399185336048, "grad_norm": 0.44079329807617135, "learning_rate": 9.240680596429833e-06, "loss": 0.495, "step": 4473 }, { "epoch": 0.20248925096175605, "grad_norm": 0.655790544577791, "learning_rate": 9.240292265167843e-06, "loss": 0.4043, "step": 4474 }, { "epoch": 0.2025345100701516, "grad_norm": 0.7597126537185416, "learning_rate": 9.239903842794995e-06, "loss": 0.361, "step": 4475 }, { "epoch": 0.20257976917854717, "grad_norm": 0.6928027687811528, "learning_rate": 9.239515329319633e-06, "loss": 0.4059, "step": 4476 }, { "epoch": 0.20262502828694276, "grad_norm": 0.6265742356763842, "learning_rate": 9.23912672475011e-06, "loss": 0.3775, "step": 4477 }, { "epoch": 0.20267028739533832, "grad_norm": 0.6250366573033741, "learning_rate": 9.238738029094771e-06, "loss": 0.3929, "step": 4478 }, { "epoch": 0.20271554650373388, "grad_norm": 0.6239194232238193, "learning_rate": 9.238349242361971e-06, "loss": 0.3946, "step": 4479 }, { "epoch": 0.20276080561212945, "grad_norm": 0.49157297510570386, "learning_rate": 9.237960364560063e-06, "loss": 0.5188, "step": 4480 }, { "epoch": 0.202806064720525, "grad_norm": 0.7051363283191563, "learning_rate": 9.237571395697403e-06, "loss": 0.3835, "step": 4481 }, { "epoch": 0.20285132382892057, "grad_norm": 0.6660464023096989, "learning_rate": 9.237182335782347e-06, "loss": 0.4022, "step": 4482 }, { "epoch": 0.20289658293731613, "grad_norm": 0.6843714863796969, "learning_rate": 9.236793184823257e-06, "loss": 0.397, "step": 4483 }, { "epoch": 0.2029418420457117, "grad_norm": 0.6855307725350448, "learning_rate": 9.236403942828494e-06, "loss": 0.3815, "step": 4484 }, { "epoch": 0.20298710115410726, "grad_norm": 0.7079043018016902, "learning_rate": 9.236014609806421e-06, "loss": 0.3846, "step": 4485 }, { "epoch": 0.20303236026250282, "grad_norm": 0.6504384598720968, "learning_rate": 9.235625185765403e-06, "loss": 0.3855, "step": 4486 }, { "epoch": 0.20307761937089838, "grad_norm": 0.6534159175098676, "learning_rate": 9.235235670713808e-06, "loss": 0.3779, "step": 4487 }, { "epoch": 0.20312287847929394, "grad_norm": 0.41340276621194005, "learning_rate": 9.23484606466001e-06, "loss": 0.5256, "step": 4488 }, { "epoch": 0.20316813758768953, "grad_norm": 0.3317961618649806, "learning_rate": 9.234456367612373e-06, "loss": 0.5193, "step": 4489 }, { "epoch": 0.2032133966960851, "grad_norm": 0.7802740604366346, "learning_rate": 9.234066579579274e-06, "loss": 0.396, "step": 4490 }, { "epoch": 0.20325865580448066, "grad_norm": 0.3090938209094079, "learning_rate": 9.23367670056909e-06, "loss": 0.465, "step": 4491 }, { "epoch": 0.20330391491287622, "grad_norm": 0.7517899148301332, "learning_rate": 9.233286730590195e-06, "loss": 0.4396, "step": 4492 }, { "epoch": 0.20334917402127178, "grad_norm": 0.3312876729542182, "learning_rate": 9.23289666965097e-06, "loss": 0.5304, "step": 4493 }, { "epoch": 0.20339443312966735, "grad_norm": 0.7427229107966618, "learning_rate": 9.232506517759797e-06, "loss": 0.3931, "step": 4494 }, { "epoch": 0.2034396922380629, "grad_norm": 0.6789139552402076, "learning_rate": 9.232116274925056e-06, "loss": 0.4469, "step": 4495 }, { "epoch": 0.20348495134645847, "grad_norm": 0.7699554131664681, "learning_rate": 9.231725941155133e-06, "loss": 0.4291, "step": 4496 }, { "epoch": 0.20353021045485403, "grad_norm": 0.6974959924396135, "learning_rate": 9.231335516458419e-06, "loss": 0.3907, "step": 4497 }, { "epoch": 0.2035754695632496, "grad_norm": 0.6737647687437592, "learning_rate": 9.2309450008433e-06, "loss": 0.445, "step": 4498 }, { "epoch": 0.20362072867164516, "grad_norm": 0.4044279854670894, "learning_rate": 9.230554394318167e-06, "loss": 0.4961, "step": 4499 }, { "epoch": 0.20366598778004075, "grad_norm": 0.844669490129284, "learning_rate": 9.230163696891415e-06, "loss": 0.3862, "step": 4500 }, { "epoch": 0.2037112468884363, "grad_norm": 0.7059403482053921, "learning_rate": 9.229772908571435e-06, "loss": 0.3812, "step": 4501 }, { "epoch": 0.20375650599683187, "grad_norm": 0.6133336353944165, "learning_rate": 9.229382029366625e-06, "loss": 0.382, "step": 4502 }, { "epoch": 0.20380176510522743, "grad_norm": 0.6774167500015552, "learning_rate": 9.228991059285387e-06, "loss": 0.3902, "step": 4503 }, { "epoch": 0.203847024213623, "grad_norm": 0.32660098914735397, "learning_rate": 9.228599998336119e-06, "loss": 0.5226, "step": 4504 }, { "epoch": 0.20389228332201856, "grad_norm": 0.8621953064122951, "learning_rate": 9.228208846527222e-06, "loss": 0.4481, "step": 4505 }, { "epoch": 0.20393754243041412, "grad_norm": 0.31618272426187605, "learning_rate": 9.227817603867106e-06, "loss": 0.521, "step": 4506 }, { "epoch": 0.20398280153880968, "grad_norm": 0.6973370488241651, "learning_rate": 9.227426270364172e-06, "loss": 0.408, "step": 4507 }, { "epoch": 0.20402806064720524, "grad_norm": 0.7224976157253408, "learning_rate": 9.227034846026833e-06, "loss": 0.4008, "step": 4508 }, { "epoch": 0.2040733197556008, "grad_norm": 0.860177658259118, "learning_rate": 9.226643330863497e-06, "loss": 0.4123, "step": 4509 }, { "epoch": 0.20411857886399637, "grad_norm": 0.9113439419672582, "learning_rate": 9.226251724882576e-06, "loss": 0.4213, "step": 4510 }, { "epoch": 0.20416383797239193, "grad_norm": 0.6339986185921339, "learning_rate": 9.225860028092486e-06, "loss": 0.3931, "step": 4511 }, { "epoch": 0.20420909708078752, "grad_norm": 0.6893893577078529, "learning_rate": 9.225468240501643e-06, "loss": 0.3616, "step": 4512 }, { "epoch": 0.20425435618918308, "grad_norm": 0.7505527891331154, "learning_rate": 9.225076362118464e-06, "loss": 0.4093, "step": 4513 }, { "epoch": 0.20429961529757865, "grad_norm": 0.6566878964422919, "learning_rate": 9.22468439295137e-06, "loss": 0.3789, "step": 4514 }, { "epoch": 0.2043448744059742, "grad_norm": 0.4184513712004044, "learning_rate": 9.224292333008785e-06, "loss": 0.5064, "step": 4515 }, { "epoch": 0.20439013351436977, "grad_norm": 0.6435411751541094, "learning_rate": 9.223900182299132e-06, "loss": 0.3671, "step": 4516 }, { "epoch": 0.20443539262276533, "grad_norm": 0.7608819934526175, "learning_rate": 9.223507940830836e-06, "loss": 0.3805, "step": 4517 }, { "epoch": 0.2044806517311609, "grad_norm": 0.7234395427513599, "learning_rate": 9.223115608612325e-06, "loss": 0.3975, "step": 4518 }, { "epoch": 0.20452591083955646, "grad_norm": 0.6773361232917884, "learning_rate": 9.222723185652031e-06, "loss": 0.3955, "step": 4519 }, { "epoch": 0.20457116994795202, "grad_norm": 0.7176545433153804, "learning_rate": 9.222330671958385e-06, "loss": 0.3876, "step": 4520 }, { "epoch": 0.20461642905634758, "grad_norm": 0.6506277456232169, "learning_rate": 9.22193806753982e-06, "loss": 0.4109, "step": 4521 }, { "epoch": 0.20466168816474314, "grad_norm": 0.6114119716823015, "learning_rate": 9.221545372404774e-06, "loss": 0.3546, "step": 4522 }, { "epoch": 0.2047069472731387, "grad_norm": 0.6235199644185562, "learning_rate": 9.22115258656168e-06, "loss": 0.384, "step": 4523 }, { "epoch": 0.2047522063815343, "grad_norm": 0.7221677461499507, "learning_rate": 9.220759710018984e-06, "loss": 0.3897, "step": 4524 }, { "epoch": 0.20479746548992986, "grad_norm": 0.6620027763210206, "learning_rate": 9.220366742785126e-06, "loss": 0.4044, "step": 4525 }, { "epoch": 0.20484272459832542, "grad_norm": 0.6396484049106496, "learning_rate": 9.219973684868546e-06, "loss": 0.387, "step": 4526 }, { "epoch": 0.20488798370672098, "grad_norm": 0.6500113781338169, "learning_rate": 9.219580536277693e-06, "loss": 0.3711, "step": 4527 }, { "epoch": 0.20493324281511655, "grad_norm": 0.8404289788973495, "learning_rate": 9.219187297021015e-06, "loss": 0.3726, "step": 4528 }, { "epoch": 0.2049785019235121, "grad_norm": 0.6387752010088015, "learning_rate": 9.218793967106959e-06, "loss": 0.4157, "step": 4529 }, { "epoch": 0.20502376103190767, "grad_norm": 0.630614010902661, "learning_rate": 9.218400546543977e-06, "loss": 0.366, "step": 4530 }, { "epoch": 0.20506902014030323, "grad_norm": 0.6171563864543054, "learning_rate": 9.218007035340525e-06, "loss": 0.4018, "step": 4531 }, { "epoch": 0.2051142792486988, "grad_norm": 0.6403875839699732, "learning_rate": 9.217613433505056e-06, "loss": 0.415, "step": 4532 }, { "epoch": 0.20515953835709436, "grad_norm": 0.650257811916877, "learning_rate": 9.217219741046026e-06, "loss": 0.4413, "step": 4533 }, { "epoch": 0.20520479746548992, "grad_norm": 0.38375700790017325, "learning_rate": 9.216825957971898e-06, "loss": 0.4794, "step": 4534 }, { "epoch": 0.20525005657388548, "grad_norm": 0.6478140522618808, "learning_rate": 9.21643208429113e-06, "loss": 0.3696, "step": 4535 }, { "epoch": 0.20529531568228107, "grad_norm": 0.6876090924588174, "learning_rate": 9.216038120012187e-06, "loss": 0.4065, "step": 4536 }, { "epoch": 0.20534057479067663, "grad_norm": 0.7690946772442716, "learning_rate": 9.215644065143533e-06, "loss": 0.4241, "step": 4537 }, { "epoch": 0.2053858338990722, "grad_norm": 0.3022901550826388, "learning_rate": 9.215249919693634e-06, "loss": 0.4742, "step": 4538 }, { "epoch": 0.20543109300746776, "grad_norm": 0.7201790266563611, "learning_rate": 9.214855683670962e-06, "loss": 0.4444, "step": 4539 }, { "epoch": 0.20547635211586332, "grad_norm": 0.3224356216160571, "learning_rate": 9.214461357083986e-06, "loss": 0.5113, "step": 4540 }, { "epoch": 0.20552161122425888, "grad_norm": 0.6949901984886445, "learning_rate": 9.21406693994118e-06, "loss": 0.4504, "step": 4541 }, { "epoch": 0.20556687033265444, "grad_norm": 0.2908178295108323, "learning_rate": 9.213672432251016e-06, "loss": 0.5201, "step": 4542 }, { "epoch": 0.20561212944105, "grad_norm": 0.6020446632402668, "learning_rate": 9.213277834021975e-06, "loss": 0.3505, "step": 4543 }, { "epoch": 0.20565738854944557, "grad_norm": 0.6681811347078651, "learning_rate": 9.212883145262532e-06, "loss": 0.4143, "step": 4544 }, { "epoch": 0.20570264765784113, "grad_norm": 0.5824281917974231, "learning_rate": 9.212488365981169e-06, "loss": 0.3621, "step": 4545 }, { "epoch": 0.2057479067662367, "grad_norm": 0.6887703737480376, "learning_rate": 9.21209349618637e-06, "loss": 0.408, "step": 4546 }, { "epoch": 0.20579316587463228, "grad_norm": 0.6825994800894678, "learning_rate": 9.211698535886617e-06, "loss": 0.4061, "step": 4547 }, { "epoch": 0.20583842498302785, "grad_norm": 0.6205959778563953, "learning_rate": 9.211303485090396e-06, "loss": 0.3746, "step": 4548 }, { "epoch": 0.2058836840914234, "grad_norm": 0.3568690622234325, "learning_rate": 9.210908343806201e-06, "loss": 0.5068, "step": 4549 }, { "epoch": 0.20592894319981897, "grad_norm": 0.7072278527888455, "learning_rate": 9.210513112042516e-06, "loss": 0.4337, "step": 4550 }, { "epoch": 0.20597420230821453, "grad_norm": 0.2905103607140547, "learning_rate": 9.210117789807837e-06, "loss": 0.4811, "step": 4551 }, { "epoch": 0.2060194614166101, "grad_norm": 0.7029007626023053, "learning_rate": 9.209722377110657e-06, "loss": 0.4229, "step": 4552 }, { "epoch": 0.20606472052500566, "grad_norm": 0.292195895010017, "learning_rate": 9.20932687395947e-06, "loss": 0.5031, "step": 4553 }, { "epoch": 0.20610997963340122, "grad_norm": 0.6145259280329646, "learning_rate": 9.20893128036278e-06, "loss": 0.3548, "step": 4554 }, { "epoch": 0.20615523874179678, "grad_norm": 0.7336553661516277, "learning_rate": 9.208535596329082e-06, "loss": 0.4202, "step": 4555 }, { "epoch": 0.20620049785019234, "grad_norm": 0.35952433690955826, "learning_rate": 9.20813982186688e-06, "loss": 0.4885, "step": 4556 }, { "epoch": 0.2062457569585879, "grad_norm": 0.3178685454909754, "learning_rate": 9.207743956984676e-06, "loss": 0.4909, "step": 4557 }, { "epoch": 0.20629101606698347, "grad_norm": 0.7560864491204569, "learning_rate": 9.20734800169098e-06, "loss": 0.4113, "step": 4558 }, { "epoch": 0.20633627517537906, "grad_norm": 0.6815854069387833, "learning_rate": 9.206951955994294e-06, "loss": 0.4012, "step": 4559 }, { "epoch": 0.20638153428377462, "grad_norm": 0.6708607127704782, "learning_rate": 9.206555819903132e-06, "loss": 0.3886, "step": 4560 }, { "epoch": 0.20642679339217018, "grad_norm": 0.6822575874755423, "learning_rate": 9.206159593426005e-06, "loss": 0.3947, "step": 4561 }, { "epoch": 0.20647205250056574, "grad_norm": 0.6443314679032428, "learning_rate": 9.205763276571429e-06, "loss": 0.3646, "step": 4562 }, { "epoch": 0.2065173116089613, "grad_norm": 1.2434698621002092, "learning_rate": 9.205366869347915e-06, "loss": 0.4032, "step": 4563 }, { "epoch": 0.20656257071735687, "grad_norm": 0.5846776817337689, "learning_rate": 9.204970371763984e-06, "loss": 0.354, "step": 4564 }, { "epoch": 0.20660782982575243, "grad_norm": 0.6663693027142774, "learning_rate": 9.204573783828153e-06, "loss": 0.3752, "step": 4565 }, { "epoch": 0.206653088934148, "grad_norm": 0.6392985106616054, "learning_rate": 9.204177105548946e-06, "loss": 0.3578, "step": 4566 }, { "epoch": 0.20669834804254356, "grad_norm": 0.6708860689350385, "learning_rate": 9.203780336934885e-06, "loss": 0.378, "step": 4567 }, { "epoch": 0.20674360715093912, "grad_norm": 0.6624608867324993, "learning_rate": 9.203383477994495e-06, "loss": 0.3772, "step": 4568 }, { "epoch": 0.20678886625933468, "grad_norm": 0.443250526541554, "learning_rate": 9.202986528736302e-06, "loss": 0.4944, "step": 4569 }, { "epoch": 0.20683412536773024, "grad_norm": 0.7065139455060182, "learning_rate": 9.20258948916884e-06, "loss": 0.3936, "step": 4570 }, { "epoch": 0.20687938447612583, "grad_norm": 0.6886726217442054, "learning_rate": 9.202192359300635e-06, "loss": 0.385, "step": 4571 }, { "epoch": 0.2069246435845214, "grad_norm": 0.30440484021814024, "learning_rate": 9.201795139140224e-06, "loss": 0.4913, "step": 4572 }, { "epoch": 0.20696990269291696, "grad_norm": 0.6461373151052034, "learning_rate": 9.201397828696139e-06, "loss": 0.4234, "step": 4573 }, { "epoch": 0.20701516180131252, "grad_norm": 0.29313820021793546, "learning_rate": 9.201000427976917e-06, "loss": 0.4926, "step": 4574 }, { "epoch": 0.20706042090970808, "grad_norm": 0.7446593774804321, "learning_rate": 9.2006029369911e-06, "loss": 0.4272, "step": 4575 }, { "epoch": 0.20710568001810364, "grad_norm": 0.634239966775946, "learning_rate": 9.200205355747228e-06, "loss": 0.41, "step": 4576 }, { "epoch": 0.2071509391264992, "grad_norm": 0.6269753464584988, "learning_rate": 9.199807684253842e-06, "loss": 0.4049, "step": 4577 }, { "epoch": 0.20719619823489477, "grad_norm": 0.6626327081044174, "learning_rate": 9.199409922519487e-06, "loss": 0.4133, "step": 4578 }, { "epoch": 0.20724145734329033, "grad_norm": 0.6810410460327005, "learning_rate": 9.19901207055271e-06, "loss": 0.4049, "step": 4579 }, { "epoch": 0.2072867164516859, "grad_norm": 0.677169394674404, "learning_rate": 9.198614128362062e-06, "loss": 0.4485, "step": 4580 }, { "epoch": 0.20733197556008146, "grad_norm": 0.6882687805510503, "learning_rate": 9.19821609595609e-06, "loss": 0.399, "step": 4581 }, { "epoch": 0.20737723466847702, "grad_norm": 0.7153669943119992, "learning_rate": 9.197817973343347e-06, "loss": 0.3734, "step": 4582 }, { "epoch": 0.2074224937768726, "grad_norm": 0.659508194441196, "learning_rate": 9.197419760532389e-06, "loss": 0.4033, "step": 4583 }, { "epoch": 0.20746775288526817, "grad_norm": 0.6520433998599509, "learning_rate": 9.197021457531771e-06, "loss": 0.3872, "step": 4584 }, { "epoch": 0.20751301199366373, "grad_norm": 0.6629252034178089, "learning_rate": 9.196623064350054e-06, "loss": 0.3936, "step": 4585 }, { "epoch": 0.2075582711020593, "grad_norm": 0.8014334519954863, "learning_rate": 9.196224580995796e-06, "loss": 0.3918, "step": 4586 }, { "epoch": 0.20760353021045486, "grad_norm": 0.6381738015711796, "learning_rate": 9.19582600747756e-06, "loss": 0.3992, "step": 4587 }, { "epoch": 0.20764878931885042, "grad_norm": 0.6575473018865028, "learning_rate": 9.195427343803906e-06, "loss": 0.4203, "step": 4588 }, { "epoch": 0.20769404842724598, "grad_norm": 0.680756389919026, "learning_rate": 9.195028589983407e-06, "loss": 0.4196, "step": 4589 }, { "epoch": 0.20773930753564154, "grad_norm": 0.7390360892869532, "learning_rate": 9.194629746024627e-06, "loss": 0.3954, "step": 4590 }, { "epoch": 0.2077845666440371, "grad_norm": 0.6774172823682195, "learning_rate": 9.194230811936135e-06, "loss": 0.4134, "step": 4591 }, { "epoch": 0.20782982575243267, "grad_norm": 0.4994134842411283, "learning_rate": 9.193831787726507e-06, "loss": 0.518, "step": 4592 }, { "epoch": 0.20787508486082823, "grad_norm": 0.8287166767605357, "learning_rate": 9.193432673404312e-06, "loss": 0.3993, "step": 4593 }, { "epoch": 0.20792034396922382, "grad_norm": 0.751905987339554, "learning_rate": 9.19303346897813e-06, "loss": 0.41, "step": 4594 }, { "epoch": 0.20796560307761938, "grad_norm": 0.7410296417952482, "learning_rate": 9.192634174456536e-06, "loss": 0.3207, "step": 4595 }, { "epoch": 0.20801086218601494, "grad_norm": 0.6774001605097975, "learning_rate": 9.19223478984811e-06, "loss": 0.3675, "step": 4596 }, { "epoch": 0.2080561212944105, "grad_norm": 0.6846876993441788, "learning_rate": 9.191835315161432e-06, "loss": 0.4113, "step": 4597 }, { "epoch": 0.20810138040280607, "grad_norm": 0.6541964859436085, "learning_rate": 9.191435750405091e-06, "loss": 0.3714, "step": 4598 }, { "epoch": 0.20814663951120163, "grad_norm": 0.6553196982674202, "learning_rate": 9.191036095587667e-06, "loss": 0.4332, "step": 4599 }, { "epoch": 0.2081918986195972, "grad_norm": 0.69992746418274, "learning_rate": 9.190636350717747e-06, "loss": 0.3774, "step": 4600 }, { "epoch": 0.20823715772799276, "grad_norm": 0.685563902486613, "learning_rate": 9.190236515803926e-06, "loss": 0.3841, "step": 4601 }, { "epoch": 0.20828241683638832, "grad_norm": 0.7595242844336855, "learning_rate": 9.18983659085479e-06, "loss": 0.3941, "step": 4602 }, { "epoch": 0.20832767594478388, "grad_norm": 0.6384986197619993, "learning_rate": 9.189436575878933e-06, "loss": 0.3761, "step": 4603 }, { "epoch": 0.20837293505317944, "grad_norm": 0.7432963933397082, "learning_rate": 9.189036470884951e-06, "loss": 0.3927, "step": 4604 }, { "epoch": 0.208418194161575, "grad_norm": 0.6954626477052497, "learning_rate": 9.188636275881442e-06, "loss": 0.3661, "step": 4605 }, { "epoch": 0.2084634532699706, "grad_norm": 0.7741203657135156, "learning_rate": 9.188235990877004e-06, "loss": 0.4073, "step": 4606 }, { "epoch": 0.20850871237836616, "grad_norm": 0.4754471604353748, "learning_rate": 9.187835615880235e-06, "loss": 0.5383, "step": 4607 }, { "epoch": 0.20855397148676172, "grad_norm": 0.800689655749288, "learning_rate": 9.187435150899743e-06, "loss": 0.4326, "step": 4608 }, { "epoch": 0.20859923059515728, "grad_norm": 0.3767292595817881, "learning_rate": 9.187034595944131e-06, "loss": 0.4918, "step": 4609 }, { "epoch": 0.20864448970355284, "grad_norm": 0.6318101316229977, "learning_rate": 9.186633951022005e-06, "loss": 0.3991, "step": 4610 }, { "epoch": 0.2086897488119484, "grad_norm": 0.2964277166418596, "learning_rate": 9.186233216141972e-06, "loss": 0.4824, "step": 4611 }, { "epoch": 0.20873500792034397, "grad_norm": 0.32570830424870334, "learning_rate": 9.185832391312644e-06, "loss": 0.4822, "step": 4612 }, { "epoch": 0.20878026702873953, "grad_norm": 0.2974554326662702, "learning_rate": 9.185431476542635e-06, "loss": 0.4878, "step": 4613 }, { "epoch": 0.2088255261371351, "grad_norm": 0.7701402097321183, "learning_rate": 9.185030471840557e-06, "loss": 0.3986, "step": 4614 }, { "epoch": 0.20887078524553065, "grad_norm": 0.7491767662695776, "learning_rate": 9.184629377215028e-06, "loss": 0.4542, "step": 4615 }, { "epoch": 0.20891604435392622, "grad_norm": 0.7697111484129529, "learning_rate": 9.184228192674667e-06, "loss": 0.3708, "step": 4616 }, { "epoch": 0.20896130346232178, "grad_norm": 0.6412765775554661, "learning_rate": 9.183826918228092e-06, "loss": 0.3901, "step": 4617 }, { "epoch": 0.20900656257071737, "grad_norm": 0.719052047311315, "learning_rate": 9.183425553883925e-06, "loss": 0.3966, "step": 4618 }, { "epoch": 0.20905182167911293, "grad_norm": 0.6533657663372489, "learning_rate": 9.183024099650793e-06, "loss": 0.3682, "step": 4619 }, { "epoch": 0.2090970807875085, "grad_norm": 0.4063216099091764, "learning_rate": 9.18262255553732e-06, "loss": 0.5193, "step": 4620 }, { "epoch": 0.20914233989590406, "grad_norm": 0.716403564936248, "learning_rate": 9.182220921552132e-06, "loss": 0.4351, "step": 4621 }, { "epoch": 0.20918759900429962, "grad_norm": 0.6644833222370995, "learning_rate": 9.181819197703864e-06, "loss": 0.3863, "step": 4622 }, { "epoch": 0.20923285811269518, "grad_norm": 0.7599099253584791, "learning_rate": 9.181417384001143e-06, "loss": 0.3436, "step": 4623 }, { "epoch": 0.20927811722109074, "grad_norm": 0.6960306482533338, "learning_rate": 9.181015480452607e-06, "loss": 0.3701, "step": 4624 }, { "epoch": 0.2093233763294863, "grad_norm": 0.6677901326076892, "learning_rate": 9.180613487066888e-06, "loss": 0.4453, "step": 4625 }, { "epoch": 0.20936863543788187, "grad_norm": 0.7052036480574205, "learning_rate": 9.180211403852623e-06, "loss": 0.4158, "step": 4626 }, { "epoch": 0.20941389454627743, "grad_norm": 0.6767244474943633, "learning_rate": 9.179809230818458e-06, "loss": 0.3774, "step": 4627 }, { "epoch": 0.209459153654673, "grad_norm": 0.6896840117858403, "learning_rate": 9.179406967973025e-06, "loss": 0.423, "step": 4628 }, { "epoch": 0.20950441276306855, "grad_norm": 1.0927703091373575, "learning_rate": 9.179004615324976e-06, "loss": 0.3883, "step": 4629 }, { "epoch": 0.20954967187146414, "grad_norm": 0.3893396568527757, "learning_rate": 9.178602172882951e-06, "loss": 0.5055, "step": 4630 }, { "epoch": 0.2095949309798597, "grad_norm": 0.734134054975077, "learning_rate": 9.178199640655598e-06, "loss": 0.3971, "step": 4631 }, { "epoch": 0.20964019008825527, "grad_norm": 0.6996391227153452, "learning_rate": 9.177797018651568e-06, "loss": 0.4527, "step": 4632 }, { "epoch": 0.20968544919665083, "grad_norm": 0.6747446956371578, "learning_rate": 9.177394306879513e-06, "loss": 0.4148, "step": 4633 }, { "epoch": 0.2097307083050464, "grad_norm": 0.6681883466966753, "learning_rate": 9.176991505348082e-06, "loss": 0.4469, "step": 4634 }, { "epoch": 0.20977596741344195, "grad_norm": 0.6171146969041853, "learning_rate": 9.176588614065934e-06, "loss": 0.3342, "step": 4635 }, { "epoch": 0.20982122652183752, "grad_norm": 0.6505458312196792, "learning_rate": 9.17618563304172e-06, "loss": 0.414, "step": 4636 }, { "epoch": 0.20986648563023308, "grad_norm": 0.6281754410034931, "learning_rate": 9.175782562284108e-06, "loss": 0.409, "step": 4637 }, { "epoch": 0.20991174473862864, "grad_norm": 0.6888797123187864, "learning_rate": 9.175379401801752e-06, "loss": 0.4315, "step": 4638 }, { "epoch": 0.2099570038470242, "grad_norm": 0.7493278171281029, "learning_rate": 9.174976151603314e-06, "loss": 0.4113, "step": 4639 }, { "epoch": 0.21000226295541977, "grad_norm": 0.8294347486423541, "learning_rate": 9.174572811697464e-06, "loss": 0.3607, "step": 4640 }, { "epoch": 0.21004752206381536, "grad_norm": 0.7000019570919632, "learning_rate": 9.174169382092864e-06, "loss": 0.3661, "step": 4641 }, { "epoch": 0.21009278117221092, "grad_norm": 0.6980852810654846, "learning_rate": 9.173765862798185e-06, "loss": 0.4516, "step": 4642 }, { "epoch": 0.21013804028060648, "grad_norm": 0.7211418317715546, "learning_rate": 9.173362253822095e-06, "loss": 0.3795, "step": 4643 }, { "epoch": 0.21018329938900204, "grad_norm": 0.39470052963079344, "learning_rate": 9.172958555173268e-06, "loss": 0.4853, "step": 4644 }, { "epoch": 0.2102285584973976, "grad_norm": 0.33231990173857073, "learning_rate": 9.17255476686038e-06, "loss": 0.497, "step": 4645 }, { "epoch": 0.21027381760579317, "grad_norm": 0.6787839011727957, "learning_rate": 9.172150888892102e-06, "loss": 0.3966, "step": 4646 }, { "epoch": 0.21031907671418873, "grad_norm": 0.7294657149087491, "learning_rate": 9.171746921277116e-06, "loss": 0.4028, "step": 4647 }, { "epoch": 0.2103643358225843, "grad_norm": 0.7001236222785713, "learning_rate": 9.171342864024103e-06, "loss": 0.3603, "step": 4648 }, { "epoch": 0.21040959493097985, "grad_norm": 0.9831069171644347, "learning_rate": 9.17093871714174e-06, "loss": 0.4124, "step": 4649 }, { "epoch": 0.21045485403937542, "grad_norm": 0.6921596856617678, "learning_rate": 9.170534480638718e-06, "loss": 0.3952, "step": 4650 }, { "epoch": 0.21050011314777098, "grad_norm": 0.8188302904529361, "learning_rate": 9.170130154523715e-06, "loss": 0.3858, "step": 4651 }, { "epoch": 0.21054537225616654, "grad_norm": 0.6255134043307441, "learning_rate": 9.169725738805425e-06, "loss": 0.399, "step": 4652 }, { "epoch": 0.21059063136456213, "grad_norm": 0.6553509538956197, "learning_rate": 9.169321233492534e-06, "loss": 0.3949, "step": 4653 }, { "epoch": 0.2106358904729577, "grad_norm": 0.6746561449928478, "learning_rate": 9.168916638593736e-06, "loss": 0.3631, "step": 4654 }, { "epoch": 0.21068114958135326, "grad_norm": 0.6714508923031717, "learning_rate": 9.168511954117723e-06, "loss": 0.3969, "step": 4655 }, { "epoch": 0.21072640868974882, "grad_norm": 0.6108944498419144, "learning_rate": 9.16810718007319e-06, "loss": 0.4342, "step": 4656 }, { "epoch": 0.21077166779814438, "grad_norm": 0.6910327522428834, "learning_rate": 9.167702316468835e-06, "loss": 0.4071, "step": 4657 }, { "epoch": 0.21081692690653994, "grad_norm": 0.6512363183886198, "learning_rate": 9.167297363313357e-06, "loss": 0.4079, "step": 4658 }, { "epoch": 0.2108621860149355, "grad_norm": 0.6883357018712101, "learning_rate": 9.166892320615459e-06, "loss": 0.371, "step": 4659 }, { "epoch": 0.21090744512333107, "grad_norm": 0.6252312961373009, "learning_rate": 9.166487188383841e-06, "loss": 0.4051, "step": 4660 }, { "epoch": 0.21095270423172663, "grad_norm": 0.6589575854416991, "learning_rate": 9.166081966627211e-06, "loss": 0.4222, "step": 4661 }, { "epoch": 0.2109979633401222, "grad_norm": 0.6861814844204853, "learning_rate": 9.165676655354274e-06, "loss": 0.4831, "step": 4662 }, { "epoch": 0.21104322244851775, "grad_norm": 0.6707217311626233, "learning_rate": 9.16527125457374e-06, "loss": 0.3438, "step": 4663 }, { "epoch": 0.21108848155691332, "grad_norm": 0.7936046441370954, "learning_rate": 9.16486576429432e-06, "loss": 0.3499, "step": 4664 }, { "epoch": 0.2111337406653089, "grad_norm": 0.7234342589495827, "learning_rate": 9.164460184524726e-06, "loss": 0.3742, "step": 4665 }, { "epoch": 0.21117899977370447, "grad_norm": 0.6772999050647835, "learning_rate": 9.16405451527367e-06, "loss": 0.3686, "step": 4666 }, { "epoch": 0.21122425888210003, "grad_norm": 0.6520754696457214, "learning_rate": 9.163648756549875e-06, "loss": 0.4921, "step": 4667 }, { "epoch": 0.2112695179904956, "grad_norm": 0.7762013029665157, "learning_rate": 9.163242908362053e-06, "loss": 0.3969, "step": 4668 }, { "epoch": 0.21131477709889115, "grad_norm": 0.6829559452971228, "learning_rate": 9.16283697071893e-06, "loss": 0.4119, "step": 4669 }, { "epoch": 0.21136003620728672, "grad_norm": 0.6784654757287741, "learning_rate": 9.162430943629224e-06, "loss": 0.4044, "step": 4670 }, { "epoch": 0.21140529531568228, "grad_norm": 0.336867182270296, "learning_rate": 9.162024827101663e-06, "loss": 0.4802, "step": 4671 }, { "epoch": 0.21145055442407784, "grad_norm": 0.3422647615733906, "learning_rate": 9.161618621144967e-06, "loss": 0.5177, "step": 4672 }, { "epoch": 0.2114958135324734, "grad_norm": 1.1008475237506572, "learning_rate": 9.161212325767873e-06, "loss": 0.4127, "step": 4673 }, { "epoch": 0.21154107264086897, "grad_norm": 0.6685866425309338, "learning_rate": 9.160805940979104e-06, "loss": 0.38, "step": 4674 }, { "epoch": 0.21158633174926453, "grad_norm": 0.6905136705212983, "learning_rate": 9.160399466787392e-06, "loss": 0.4012, "step": 4675 }, { "epoch": 0.21163159085766012, "grad_norm": 0.9911104484428561, "learning_rate": 9.159992903201478e-06, "loss": 0.3776, "step": 4676 }, { "epoch": 0.21167684996605568, "grad_norm": 0.7258200921880575, "learning_rate": 9.15958625023009e-06, "loss": 0.4013, "step": 4677 }, { "epoch": 0.21172210907445124, "grad_norm": 0.40608231174106374, "learning_rate": 9.15917950788197e-06, "loss": 0.4963, "step": 4678 }, { "epoch": 0.2117673681828468, "grad_norm": 0.753776177198563, "learning_rate": 9.158772676165854e-06, "loss": 0.3785, "step": 4679 }, { "epoch": 0.21181262729124237, "grad_norm": 0.4500270088526661, "learning_rate": 9.158365755090488e-06, "loss": 0.5132, "step": 4680 }, { "epoch": 0.21185788639963793, "grad_norm": 1.1467954943239418, "learning_rate": 9.157958744664612e-06, "loss": 0.4071, "step": 4681 }, { "epoch": 0.2119031455080335, "grad_norm": 0.6675630555278697, "learning_rate": 9.157551644896974e-06, "loss": 0.4126, "step": 4682 }, { "epoch": 0.21194840461642905, "grad_norm": 0.29502147340878954, "learning_rate": 9.15714445579632e-06, "loss": 0.4719, "step": 4683 }, { "epoch": 0.21199366372482462, "grad_norm": 0.3252216219097457, "learning_rate": 9.156737177371399e-06, "loss": 0.5063, "step": 4684 }, { "epoch": 0.21203892283322018, "grad_norm": 1.8135010055214982, "learning_rate": 9.156329809630962e-06, "loss": 0.4284, "step": 4685 }, { "epoch": 0.21208418194161574, "grad_norm": 0.38930115148456806, "learning_rate": 9.155922352583763e-06, "loss": 0.4703, "step": 4686 }, { "epoch": 0.2121294410500113, "grad_norm": 1.280235310925464, "learning_rate": 9.155514806238557e-06, "loss": 0.3811, "step": 4687 }, { "epoch": 0.2121747001584069, "grad_norm": 0.31157176205214804, "learning_rate": 9.1551071706041e-06, "loss": 0.5066, "step": 4688 }, { "epoch": 0.21221995926680245, "grad_norm": 1.170161611611983, "learning_rate": 9.154699445689151e-06, "loss": 0.3751, "step": 4689 }, { "epoch": 0.21226521837519802, "grad_norm": 0.6990135258357858, "learning_rate": 9.154291631502471e-06, "loss": 0.4575, "step": 4690 }, { "epoch": 0.21231047748359358, "grad_norm": 0.7945124398062862, "learning_rate": 9.153883728052824e-06, "loss": 0.3965, "step": 4691 }, { "epoch": 0.21235573659198914, "grad_norm": 1.098263554852461, "learning_rate": 9.153475735348973e-06, "loss": 0.3956, "step": 4692 }, { "epoch": 0.2124009957003847, "grad_norm": 0.7792909497721552, "learning_rate": 9.153067653399684e-06, "loss": 0.4391, "step": 4693 }, { "epoch": 0.21244625480878027, "grad_norm": 0.7769896070381618, "learning_rate": 9.152659482213727e-06, "loss": 0.3678, "step": 4694 }, { "epoch": 0.21249151391717583, "grad_norm": 0.7248722333731626, "learning_rate": 9.152251221799871e-06, "loss": 0.3668, "step": 4695 }, { "epoch": 0.2125367730255714, "grad_norm": 0.7951512445368836, "learning_rate": 9.15184287216689e-06, "loss": 0.3748, "step": 4696 }, { "epoch": 0.21258203213396695, "grad_norm": 0.46089270965008194, "learning_rate": 9.151434433323556e-06, "loss": 0.5111, "step": 4697 }, { "epoch": 0.21262729124236251, "grad_norm": 0.7608710232314421, "learning_rate": 9.151025905278647e-06, "loss": 0.3889, "step": 4698 }, { "epoch": 0.21267255035075808, "grad_norm": 0.6912646554784957, "learning_rate": 9.15061728804094e-06, "loss": 0.3943, "step": 4699 }, { "epoch": 0.21271780945915367, "grad_norm": 0.6628035744904157, "learning_rate": 9.150208581619215e-06, "loss": 0.3742, "step": 4700 }, { "epoch": 0.21276306856754923, "grad_norm": 0.8216102198117472, "learning_rate": 9.149799786022256e-06, "loss": 0.3574, "step": 4701 }, { "epoch": 0.2128083276759448, "grad_norm": 0.7410885559035563, "learning_rate": 9.149390901258841e-06, "loss": 0.3828, "step": 4702 }, { "epoch": 0.21285358678434035, "grad_norm": 0.6536436140256802, "learning_rate": 9.14898192733776e-06, "loss": 0.3664, "step": 4703 }, { "epoch": 0.21289884589273592, "grad_norm": 0.6977789389528802, "learning_rate": 9.148572864267804e-06, "loss": 0.429, "step": 4704 }, { "epoch": 0.21294410500113148, "grad_norm": 0.7408643479307443, "learning_rate": 9.148163712057755e-06, "loss": 0.4213, "step": 4705 }, { "epoch": 0.21298936410952704, "grad_norm": 0.6933655118606328, "learning_rate": 9.147754470716407e-06, "loss": 0.3908, "step": 4706 }, { "epoch": 0.2130346232179226, "grad_norm": 0.6693690607323451, "learning_rate": 9.147345140252557e-06, "loss": 0.3323, "step": 4707 }, { "epoch": 0.21307988232631817, "grad_norm": 0.6021530915845484, "learning_rate": 9.146935720674996e-06, "loss": 0.3634, "step": 4708 }, { "epoch": 0.21312514143471373, "grad_norm": 0.37323411086081587, "learning_rate": 9.146526211992523e-06, "loss": 0.4996, "step": 4709 }, { "epoch": 0.2131704005431093, "grad_norm": 0.6934432204208978, "learning_rate": 9.146116614213938e-06, "loss": 0.3723, "step": 4710 }, { "epoch": 0.21321565965150485, "grad_norm": 0.684048900725944, "learning_rate": 9.14570692734804e-06, "loss": 0.4022, "step": 4711 }, { "epoch": 0.21326091875990044, "grad_norm": 0.6876883349474486, "learning_rate": 9.145297151403631e-06, "loss": 0.3528, "step": 4712 }, { "epoch": 0.213306177868296, "grad_norm": 0.6900448103049897, "learning_rate": 9.14488728638952e-06, "loss": 0.3581, "step": 4713 }, { "epoch": 0.21335143697669157, "grad_norm": 0.6654430713269235, "learning_rate": 9.144477332314509e-06, "loss": 0.3596, "step": 4714 }, { "epoch": 0.21339669608508713, "grad_norm": 0.7570070927296473, "learning_rate": 9.14406728918741e-06, "loss": 0.3447, "step": 4715 }, { "epoch": 0.2134419551934827, "grad_norm": 0.7467852955198542, "learning_rate": 9.143657157017034e-06, "loss": 0.4206, "step": 4716 }, { "epoch": 0.21348721430187825, "grad_norm": 0.6630878162500251, "learning_rate": 9.14324693581219e-06, "loss": 0.3856, "step": 4717 }, { "epoch": 0.21353247341027382, "grad_norm": 0.582274695283196, "learning_rate": 9.142836625581694e-06, "loss": 0.3687, "step": 4718 }, { "epoch": 0.21357773251866938, "grad_norm": 0.6545244065786855, "learning_rate": 9.142426226334365e-06, "loss": 0.3833, "step": 4719 }, { "epoch": 0.21362299162706494, "grad_norm": 0.6883635673957185, "learning_rate": 9.142015738079017e-06, "loss": 0.4312, "step": 4720 }, { "epoch": 0.2136682507354605, "grad_norm": 0.6751076178839724, "learning_rate": 9.141605160824473e-06, "loss": 0.3916, "step": 4721 }, { "epoch": 0.21371350984385606, "grad_norm": 0.5309631556880734, "learning_rate": 9.141194494579553e-06, "loss": 0.511, "step": 4722 }, { "epoch": 0.21375876895225165, "grad_norm": 0.7167569087582294, "learning_rate": 9.140783739353083e-06, "loss": 0.3938, "step": 4723 }, { "epoch": 0.21380402806064722, "grad_norm": 0.662087956027807, "learning_rate": 9.140372895153887e-06, "loss": 0.4021, "step": 4724 }, { "epoch": 0.21384928716904278, "grad_norm": 0.6237946620606332, "learning_rate": 9.139961961990796e-06, "loss": 0.4173, "step": 4725 }, { "epoch": 0.21389454627743834, "grad_norm": 0.6856754692559144, "learning_rate": 9.139550939872635e-06, "loss": 0.4065, "step": 4726 }, { "epoch": 0.2139398053858339, "grad_norm": 0.7084939389228818, "learning_rate": 9.139139828808238e-06, "loss": 0.4086, "step": 4727 }, { "epoch": 0.21398506449422947, "grad_norm": 0.6144425495088535, "learning_rate": 9.13872862880644e-06, "loss": 0.3686, "step": 4728 }, { "epoch": 0.21403032360262503, "grad_norm": 0.6861154793751022, "learning_rate": 9.138317339876073e-06, "loss": 0.3772, "step": 4729 }, { "epoch": 0.2140755827110206, "grad_norm": 0.5890343111376076, "learning_rate": 9.137905962025977e-06, "loss": 0.5363, "step": 4730 }, { "epoch": 0.21412084181941615, "grad_norm": 0.7217914460843785, "learning_rate": 9.13749449526499e-06, "loss": 0.4486, "step": 4731 }, { "epoch": 0.21416610092781171, "grad_norm": 0.6471609436769172, "learning_rate": 9.137082939601953e-06, "loss": 0.4231, "step": 4732 }, { "epoch": 0.21421136003620728, "grad_norm": 0.6431682681393758, "learning_rate": 9.136671295045713e-06, "loss": 0.4308, "step": 4733 }, { "epoch": 0.21425661914460284, "grad_norm": 0.6473156110408935, "learning_rate": 9.13625956160511e-06, "loss": 0.3905, "step": 4734 }, { "epoch": 0.21430187825299843, "grad_norm": 0.6914681606814352, "learning_rate": 9.135847739288991e-06, "loss": 0.4215, "step": 4735 }, { "epoch": 0.214347137361394, "grad_norm": 0.6284487613866135, "learning_rate": 9.135435828106208e-06, "loss": 0.3643, "step": 4736 }, { "epoch": 0.21439239646978955, "grad_norm": 0.6258062829449805, "learning_rate": 9.135023828065609e-06, "loss": 0.3776, "step": 4737 }, { "epoch": 0.21443765557818512, "grad_norm": 0.6517393869689241, "learning_rate": 9.13461173917605e-06, "loss": 0.3803, "step": 4738 }, { "epoch": 0.21448291468658068, "grad_norm": 0.6343432447683893, "learning_rate": 9.134199561446379e-06, "loss": 0.4224, "step": 4739 }, { "epoch": 0.21452817379497624, "grad_norm": 0.7060100945070159, "learning_rate": 9.13378729488546e-06, "loss": 0.3879, "step": 4740 }, { "epoch": 0.2145734329033718, "grad_norm": 0.6590280271404857, "learning_rate": 9.133374939502147e-06, "loss": 0.3826, "step": 4741 }, { "epoch": 0.21461869201176736, "grad_norm": 0.6401579046669076, "learning_rate": 9.132962495305302e-06, "loss": 0.373, "step": 4742 }, { "epoch": 0.21466395112016293, "grad_norm": 0.8626848325193524, "learning_rate": 9.132549962303786e-06, "loss": 0.4087, "step": 4743 }, { "epoch": 0.2147092102285585, "grad_norm": 0.6145110012458083, "learning_rate": 9.132137340506464e-06, "loss": 0.3839, "step": 4744 }, { "epoch": 0.21475446933695405, "grad_norm": 0.6269187719056039, "learning_rate": 9.131724629922199e-06, "loss": 0.3955, "step": 4745 }, { "epoch": 0.2147997284453496, "grad_norm": 0.6731846589000745, "learning_rate": 9.131311830559864e-06, "loss": 0.3995, "step": 4746 }, { "epoch": 0.2148449875537452, "grad_norm": 0.628819585985676, "learning_rate": 9.130898942428326e-06, "loss": 0.3406, "step": 4747 }, { "epoch": 0.21489024666214077, "grad_norm": 0.6643307393653383, "learning_rate": 9.130485965536455e-06, "loss": 0.4098, "step": 4748 }, { "epoch": 0.21493550577053633, "grad_norm": 0.6360776379996099, "learning_rate": 9.130072899893127e-06, "loss": 0.4016, "step": 4749 }, { "epoch": 0.2149807648789319, "grad_norm": 0.6744085126225986, "learning_rate": 9.129659745507219e-06, "loss": 0.405, "step": 4750 }, { "epoch": 0.21502602398732745, "grad_norm": 0.6673408539577744, "learning_rate": 9.129246502387602e-06, "loss": 0.3999, "step": 4751 }, { "epoch": 0.21507128309572301, "grad_norm": 0.7208579860734767, "learning_rate": 9.128833170543164e-06, "loss": 0.4242, "step": 4752 }, { "epoch": 0.21511654220411858, "grad_norm": 0.5792654836484264, "learning_rate": 9.12841974998278e-06, "loss": 0.3537, "step": 4753 }, { "epoch": 0.21516180131251414, "grad_norm": 0.6245147101677967, "learning_rate": 9.128006240715335e-06, "loss": 0.3769, "step": 4754 }, { "epoch": 0.2152070604209097, "grad_norm": 0.7417289049341894, "learning_rate": 9.127592642749714e-06, "loss": 0.4466, "step": 4755 }, { "epoch": 0.21525231952930526, "grad_norm": 0.7202075800051113, "learning_rate": 9.127178956094805e-06, "loss": 0.4151, "step": 4756 }, { "epoch": 0.21529757863770083, "grad_norm": 0.6338443835750316, "learning_rate": 9.126765180759495e-06, "loss": 0.4143, "step": 4757 }, { "epoch": 0.2153428377460964, "grad_norm": 0.7038352222725998, "learning_rate": 9.126351316752677e-06, "loss": 0.3949, "step": 4758 }, { "epoch": 0.21538809685449198, "grad_norm": 0.7621821057855113, "learning_rate": 9.125937364083241e-06, "loss": 0.383, "step": 4759 }, { "epoch": 0.21543335596288754, "grad_norm": 0.6183332780913333, "learning_rate": 9.125523322760084e-06, "loss": 0.3882, "step": 4760 }, { "epoch": 0.2154786150712831, "grad_norm": 0.7084547271369006, "learning_rate": 9.1251091927921e-06, "loss": 0.3748, "step": 4761 }, { "epoch": 0.21552387417967867, "grad_norm": 0.6274635436111439, "learning_rate": 9.124694974188188e-06, "loss": 0.4986, "step": 4762 }, { "epoch": 0.21556913328807423, "grad_norm": 0.3987272632360564, "learning_rate": 9.124280666957251e-06, "loss": 0.4994, "step": 4763 }, { "epoch": 0.2156143923964698, "grad_norm": 0.3045904274637565, "learning_rate": 9.123866271108188e-06, "loss": 0.4929, "step": 4764 }, { "epoch": 0.21565965150486535, "grad_norm": 0.7009826919682061, "learning_rate": 9.123451786649906e-06, "loss": 0.3927, "step": 4765 }, { "epoch": 0.21570491061326091, "grad_norm": 0.6765166384139596, "learning_rate": 9.123037213591308e-06, "loss": 0.4443, "step": 4766 }, { "epoch": 0.21575016972165648, "grad_norm": 0.6274547084962255, "learning_rate": 9.122622551941303e-06, "loss": 0.4134, "step": 4767 }, { "epoch": 0.21579542883005204, "grad_norm": 0.6800427324859651, "learning_rate": 9.122207801708802e-06, "loss": 0.4027, "step": 4768 }, { "epoch": 0.2158406879384476, "grad_norm": 0.6689189439956374, "learning_rate": 9.121792962902715e-06, "loss": 0.3804, "step": 4769 }, { "epoch": 0.2158859470468432, "grad_norm": 1.0224311576834435, "learning_rate": 9.121378035531957e-06, "loss": 0.3983, "step": 4770 }, { "epoch": 0.21593120615523875, "grad_norm": 0.6625989732933459, "learning_rate": 9.120963019605442e-06, "loss": 0.3779, "step": 4771 }, { "epoch": 0.21597646526363432, "grad_norm": 0.9989802934708129, "learning_rate": 9.12054791513209e-06, "loss": 0.5067, "step": 4772 }, { "epoch": 0.21602172437202988, "grad_norm": 0.6473775166086109, "learning_rate": 9.120132722120817e-06, "loss": 0.3762, "step": 4773 }, { "epoch": 0.21606698348042544, "grad_norm": 0.6851233182293817, "learning_rate": 9.119717440580547e-06, "loss": 0.4046, "step": 4774 }, { "epoch": 0.216112242588821, "grad_norm": 0.6633570528725057, "learning_rate": 9.1193020705202e-06, "loss": 0.3725, "step": 4775 }, { "epoch": 0.21615750169721656, "grad_norm": 0.38037237653395206, "learning_rate": 9.118886611948704e-06, "loss": 0.4849, "step": 4776 }, { "epoch": 0.21620276080561213, "grad_norm": 0.7112060405305283, "learning_rate": 9.118471064874985e-06, "loss": 0.3842, "step": 4777 }, { "epoch": 0.2162480199140077, "grad_norm": 0.5085666920474634, "learning_rate": 9.118055429307972e-06, "loss": 0.4991, "step": 4778 }, { "epoch": 0.21629327902240325, "grad_norm": 0.7313166012275039, "learning_rate": 9.117639705256595e-06, "loss": 0.4218, "step": 4779 }, { "epoch": 0.2163385381307988, "grad_norm": 0.5094894776139752, "learning_rate": 9.117223892729788e-06, "loss": 0.5184, "step": 4780 }, { "epoch": 0.21638379723919438, "grad_norm": 0.8138243525194827, "learning_rate": 9.116807991736483e-06, "loss": 0.4278, "step": 4781 }, { "epoch": 0.21642905634758997, "grad_norm": 0.7507256150215487, "learning_rate": 9.11639200228562e-06, "loss": 0.3727, "step": 4782 }, { "epoch": 0.21647431545598553, "grad_norm": 0.7920366485254184, "learning_rate": 9.115975924386133e-06, "loss": 0.4084, "step": 4783 }, { "epoch": 0.2165195745643811, "grad_norm": 0.6485474045722099, "learning_rate": 9.115559758046967e-06, "loss": 0.3937, "step": 4784 }, { "epoch": 0.21656483367277665, "grad_norm": 0.935641414641697, "learning_rate": 9.115143503277061e-06, "loss": 0.4281, "step": 4785 }, { "epoch": 0.21661009278117221, "grad_norm": 0.690370433047125, "learning_rate": 9.11472716008536e-06, "loss": 0.4214, "step": 4786 }, { "epoch": 0.21665535188956778, "grad_norm": 0.6883280151352387, "learning_rate": 9.114310728480809e-06, "loss": 0.4211, "step": 4787 }, { "epoch": 0.21670061099796334, "grad_norm": 0.6955993473765785, "learning_rate": 9.113894208472357e-06, "loss": 0.4069, "step": 4788 }, { "epoch": 0.2167458701063589, "grad_norm": 0.9616140275679443, "learning_rate": 9.113477600068954e-06, "loss": 0.3835, "step": 4789 }, { "epoch": 0.21679112921475446, "grad_norm": 0.6925944169611911, "learning_rate": 9.11306090327955e-06, "loss": 0.4239, "step": 4790 }, { "epoch": 0.21683638832315003, "grad_norm": 0.6841971425614416, "learning_rate": 9.112644118113098e-06, "loss": 0.4307, "step": 4791 }, { "epoch": 0.2168816474315456, "grad_norm": 0.6420255859979275, "learning_rate": 9.112227244578557e-06, "loss": 0.3921, "step": 4792 }, { "epoch": 0.21692690653994115, "grad_norm": 1.0857826233798953, "learning_rate": 9.111810282684883e-06, "loss": 0.4979, "step": 4793 }, { "epoch": 0.21697216564833674, "grad_norm": 0.7103692532900913, "learning_rate": 9.111393232441033e-06, "loss": 0.5308, "step": 4794 }, { "epoch": 0.2170174247567323, "grad_norm": 0.31048946305929215, "learning_rate": 9.11097609385597e-06, "loss": 0.515, "step": 4795 }, { "epoch": 0.21706268386512786, "grad_norm": 0.7646353820957875, "learning_rate": 9.110558866938657e-06, "loss": 0.3976, "step": 4796 }, { "epoch": 0.21710794297352343, "grad_norm": 0.847772148923698, "learning_rate": 9.110141551698058e-06, "loss": 0.4052, "step": 4797 }, { "epoch": 0.217153202081919, "grad_norm": 0.781170161994958, "learning_rate": 9.10972414814314e-06, "loss": 0.3784, "step": 4798 }, { "epoch": 0.21719846119031455, "grad_norm": 0.7387783443504868, "learning_rate": 9.109306656282873e-06, "loss": 0.4047, "step": 4799 }, { "epoch": 0.2172437202987101, "grad_norm": 0.7776877608382816, "learning_rate": 9.108889076126226e-06, "loss": 0.415, "step": 4800 }, { "epoch": 0.21728897940710568, "grad_norm": 1.8493601277062992, "learning_rate": 9.108471407682173e-06, "loss": 0.5213, "step": 4801 }, { "epoch": 0.21733423851550124, "grad_norm": 0.7543258013996105, "learning_rate": 9.108053650959687e-06, "loss": 0.401, "step": 4802 }, { "epoch": 0.2173794976238968, "grad_norm": 0.6386692161119694, "learning_rate": 9.107635805967746e-06, "loss": 0.4269, "step": 4803 }, { "epoch": 0.21742475673229236, "grad_norm": 0.7614681871635838, "learning_rate": 9.107217872715326e-06, "loss": 0.5302, "step": 4804 }, { "epoch": 0.21747001584068792, "grad_norm": 0.7008794665046963, "learning_rate": 9.10679985121141e-06, "loss": 0.4, "step": 4805 }, { "epoch": 0.21751527494908351, "grad_norm": 0.6508911059006347, "learning_rate": 9.106381741464976e-06, "loss": 0.5107, "step": 4806 }, { "epoch": 0.21756053405747908, "grad_norm": 0.7100459278013462, "learning_rate": 9.105963543485012e-06, "loss": 0.3736, "step": 4807 }, { "epoch": 0.21760579316587464, "grad_norm": 0.7069088764353862, "learning_rate": 9.105545257280502e-06, "loss": 0.4255, "step": 4808 }, { "epoch": 0.2176510522742702, "grad_norm": 0.7355101644306209, "learning_rate": 9.105126882860431e-06, "loss": 0.4482, "step": 4809 }, { "epoch": 0.21769631138266576, "grad_norm": 0.6537061814493169, "learning_rate": 9.104708420233794e-06, "loss": 0.3661, "step": 4810 }, { "epoch": 0.21774157049106133, "grad_norm": 0.7340349633672104, "learning_rate": 9.104289869409577e-06, "loss": 0.3673, "step": 4811 }, { "epoch": 0.2177868295994569, "grad_norm": 0.6970942224495815, "learning_rate": 9.103871230396778e-06, "loss": 0.3988, "step": 4812 }, { "epoch": 0.21783208870785245, "grad_norm": 0.6889946495799093, "learning_rate": 9.10345250320439e-06, "loss": 0.3794, "step": 4813 }, { "epoch": 0.217877347816248, "grad_norm": 1.2788229066156596, "learning_rate": 9.103033687841412e-06, "loss": 0.4945, "step": 4814 }, { "epoch": 0.21792260692464357, "grad_norm": 0.6827859617043349, "learning_rate": 9.10261478431684e-06, "loss": 0.3967, "step": 4815 }, { "epoch": 0.21796786603303914, "grad_norm": 0.6939430838461157, "learning_rate": 9.102195792639677e-06, "loss": 0.4055, "step": 4816 }, { "epoch": 0.21801312514143473, "grad_norm": 0.6831299091964597, "learning_rate": 9.101776712818924e-06, "loss": 0.4031, "step": 4817 }, { "epoch": 0.2180583842498303, "grad_norm": 0.6629483845316078, "learning_rate": 9.101357544863589e-06, "loss": 0.398, "step": 4818 }, { "epoch": 0.21810364335822585, "grad_norm": 0.7171445450976456, "learning_rate": 9.100938288782675e-06, "loss": 0.4025, "step": 4819 }, { "epoch": 0.2181489024666214, "grad_norm": 0.5089509713601021, "learning_rate": 9.100518944585194e-06, "loss": 0.4927, "step": 4820 }, { "epoch": 0.21819416157501698, "grad_norm": 0.7030141717004029, "learning_rate": 9.100099512280155e-06, "loss": 0.4026, "step": 4821 }, { "epoch": 0.21823942068341254, "grad_norm": 0.668643383248003, "learning_rate": 9.099679991876567e-06, "loss": 0.3793, "step": 4822 }, { "epoch": 0.2182846797918081, "grad_norm": 0.6782888733885742, "learning_rate": 9.09926038338345e-06, "loss": 0.3817, "step": 4823 }, { "epoch": 0.21832993890020366, "grad_norm": 0.6695192386556748, "learning_rate": 9.098840686809816e-06, "loss": 0.354, "step": 4824 }, { "epoch": 0.21837519800859923, "grad_norm": 0.6071035795399119, "learning_rate": 9.098420902164684e-06, "loss": 0.5142, "step": 4825 }, { "epoch": 0.2184204571169948, "grad_norm": 0.6713769264047075, "learning_rate": 9.098001029457074e-06, "loss": 0.4157, "step": 4826 }, { "epoch": 0.21846571622539035, "grad_norm": 0.6264425918657583, "learning_rate": 9.097581068696009e-06, "loss": 0.3699, "step": 4827 }, { "epoch": 0.2185109753337859, "grad_norm": 0.6349502047551863, "learning_rate": 9.09716101989051e-06, "loss": 0.4022, "step": 4828 }, { "epoch": 0.2185562344421815, "grad_norm": 0.6473338464170852, "learning_rate": 9.096740883049606e-06, "loss": 0.3828, "step": 4829 }, { "epoch": 0.21860149355057706, "grad_norm": 0.6804309986334778, "learning_rate": 9.096320658182323e-06, "loss": 0.3805, "step": 4830 }, { "epoch": 0.21864675265897263, "grad_norm": 0.4044106437016609, "learning_rate": 9.095900345297688e-06, "loss": 0.5005, "step": 4831 }, { "epoch": 0.2186920117673682, "grad_norm": 0.6281173047090177, "learning_rate": 9.095479944404735e-06, "loss": 0.3727, "step": 4832 }, { "epoch": 0.21873727087576375, "grad_norm": 0.6724742320266274, "learning_rate": 9.095059455512496e-06, "loss": 0.4261, "step": 4833 }, { "epoch": 0.2187825299841593, "grad_norm": 0.6443652166595981, "learning_rate": 9.094638878630007e-06, "loss": 0.3509, "step": 4834 }, { "epoch": 0.21882778909255488, "grad_norm": 1.0013061561516774, "learning_rate": 9.094218213766304e-06, "loss": 0.3848, "step": 4835 }, { "epoch": 0.21887304820095044, "grad_norm": 0.957768121562034, "learning_rate": 9.093797460930426e-06, "loss": 0.4139, "step": 4836 }, { "epoch": 0.218918307309346, "grad_norm": 0.7983684200974046, "learning_rate": 9.093376620131414e-06, "loss": 0.4067, "step": 4837 }, { "epoch": 0.21896356641774156, "grad_norm": 0.648737570449002, "learning_rate": 9.09295569137831e-06, "loss": 0.4392, "step": 4838 }, { "epoch": 0.21900882552613712, "grad_norm": 0.7519313642500751, "learning_rate": 9.092534674680158e-06, "loss": 0.3965, "step": 4839 }, { "epoch": 0.2190540846345327, "grad_norm": 0.6612125002526383, "learning_rate": 9.092113570046005e-06, "loss": 0.397, "step": 4840 }, { "epoch": 0.21909934374292828, "grad_norm": 0.6496470297316416, "learning_rate": 9.0916923774849e-06, "loss": 0.3922, "step": 4841 }, { "epoch": 0.21914460285132384, "grad_norm": 0.6049859360584547, "learning_rate": 9.091271097005894e-06, "loss": 0.3593, "step": 4842 }, { "epoch": 0.2191898619597194, "grad_norm": 0.66596102844228, "learning_rate": 9.090849728618034e-06, "loss": 0.3995, "step": 4843 }, { "epoch": 0.21923512106811496, "grad_norm": 0.658302161673842, "learning_rate": 9.090428272330381e-06, "loss": 0.397, "step": 4844 }, { "epoch": 0.21928038017651053, "grad_norm": 0.7329682214731195, "learning_rate": 9.090006728151986e-06, "loss": 0.4294, "step": 4845 }, { "epoch": 0.2193256392849061, "grad_norm": 0.7542852602708906, "learning_rate": 9.089585096091906e-06, "loss": 0.4163, "step": 4846 }, { "epoch": 0.21937089839330165, "grad_norm": 0.7274274023986838, "learning_rate": 9.089163376159205e-06, "loss": 0.4238, "step": 4847 }, { "epoch": 0.2194161575016972, "grad_norm": 0.6175911757542972, "learning_rate": 9.08874156836294e-06, "loss": 0.4075, "step": 4848 }, { "epoch": 0.21946141661009277, "grad_norm": 0.6682924426530112, "learning_rate": 9.088319672712179e-06, "loss": 0.4295, "step": 4849 }, { "epoch": 0.21950667571848834, "grad_norm": 1.1939628574369072, "learning_rate": 9.087897689215983e-06, "loss": 0.4059, "step": 4850 }, { "epoch": 0.2195519348268839, "grad_norm": 0.43468805882083733, "learning_rate": 9.087475617883419e-06, "loss": 0.5302, "step": 4851 }, { "epoch": 0.2195971939352795, "grad_norm": 0.6512979730787424, "learning_rate": 9.08705345872356e-06, "loss": 0.3914, "step": 4852 }, { "epoch": 0.21964245304367505, "grad_norm": 0.6743507359541027, "learning_rate": 9.086631211745474e-06, "loss": 0.3487, "step": 4853 }, { "epoch": 0.2196877121520706, "grad_norm": 0.35648851900559125, "learning_rate": 9.086208876958233e-06, "loss": 0.5257, "step": 4854 }, { "epoch": 0.21973297126046618, "grad_norm": 0.6652147485374682, "learning_rate": 9.085786454370915e-06, "loss": 0.3392, "step": 4855 }, { "epoch": 0.21977823036886174, "grad_norm": 0.7496692106275002, "learning_rate": 9.085363943992593e-06, "loss": 0.4144, "step": 4856 }, { "epoch": 0.2198234894772573, "grad_norm": 0.3034966816973855, "learning_rate": 9.084941345832348e-06, "loss": 0.4836, "step": 4857 }, { "epoch": 0.21986874858565286, "grad_norm": 0.6818992193702953, "learning_rate": 9.08451865989926e-06, "loss": 0.3607, "step": 4858 }, { "epoch": 0.21991400769404842, "grad_norm": 0.734131224447571, "learning_rate": 9.08409588620241e-06, "loss": 0.4213, "step": 4859 }, { "epoch": 0.219959266802444, "grad_norm": 0.7196822706743875, "learning_rate": 9.083673024750882e-06, "loss": 0.3966, "step": 4860 }, { "epoch": 0.22000452591083955, "grad_norm": 0.7125834370384616, "learning_rate": 9.083250075553765e-06, "loss": 0.3849, "step": 4861 }, { "epoch": 0.2200497850192351, "grad_norm": 0.7844549672485713, "learning_rate": 9.082827038620143e-06, "loss": 0.4505, "step": 4862 }, { "epoch": 0.22009504412763067, "grad_norm": 0.6848019492200854, "learning_rate": 9.082403913959109e-06, "loss": 0.3821, "step": 4863 }, { "epoch": 0.22014030323602626, "grad_norm": 0.34865430290147625, "learning_rate": 9.08198070157975e-06, "loss": 0.517, "step": 4864 }, { "epoch": 0.22018556234442183, "grad_norm": 0.5992614487673076, "learning_rate": 9.081557401491164e-06, "loss": 0.3561, "step": 4865 }, { "epoch": 0.2202308214528174, "grad_norm": 0.6710248014391402, "learning_rate": 9.081134013702447e-06, "loss": 0.3714, "step": 4866 }, { "epoch": 0.22027608056121295, "grad_norm": 0.6860968982296224, "learning_rate": 9.080710538222692e-06, "loss": 0.4516, "step": 4867 }, { "epoch": 0.2203213396696085, "grad_norm": 0.7244537982890571, "learning_rate": 9.080286975061e-06, "loss": 0.4373, "step": 4868 }, { "epoch": 0.22036659877800407, "grad_norm": 0.6788330842807545, "learning_rate": 9.079863324226473e-06, "loss": 0.4273, "step": 4869 }, { "epoch": 0.22041185788639964, "grad_norm": 0.6624675454264172, "learning_rate": 9.079439585728214e-06, "loss": 0.3791, "step": 4870 }, { "epoch": 0.2204571169947952, "grad_norm": 0.6730079736173539, "learning_rate": 9.079015759575327e-06, "loss": 0.3956, "step": 4871 }, { "epoch": 0.22050237610319076, "grad_norm": 0.699365885683548, "learning_rate": 9.078591845776921e-06, "loss": 0.3508, "step": 4872 }, { "epoch": 0.22054763521158632, "grad_norm": 0.6799562739889206, "learning_rate": 9.0781678443421e-06, "loss": 0.4171, "step": 4873 }, { "epoch": 0.22059289431998189, "grad_norm": 0.6385646491210312, "learning_rate": 9.077743755279977e-06, "loss": 0.3761, "step": 4874 }, { "epoch": 0.22063815342837745, "grad_norm": 0.6774749292746666, "learning_rate": 9.077319578599667e-06, "loss": 0.3867, "step": 4875 }, { "epoch": 0.22068341253677304, "grad_norm": 0.6287712017883627, "learning_rate": 9.076895314310282e-06, "loss": 0.3534, "step": 4876 }, { "epoch": 0.2207286716451686, "grad_norm": 0.6665113821873541, "learning_rate": 9.076470962420935e-06, "loss": 0.3711, "step": 4877 }, { "epoch": 0.22077393075356416, "grad_norm": 0.6834998561502025, "learning_rate": 9.076046522940749e-06, "loss": 0.3757, "step": 4878 }, { "epoch": 0.22081918986195972, "grad_norm": 0.7744574841169455, "learning_rate": 9.075621995878841e-06, "loss": 0.4163, "step": 4879 }, { "epoch": 0.2208644489703553, "grad_norm": 0.6814692976919767, "learning_rate": 9.075197381244333e-06, "loss": 0.407, "step": 4880 }, { "epoch": 0.22090970807875085, "grad_norm": 0.3696973213244952, "learning_rate": 9.074772679046351e-06, "loss": 0.5034, "step": 4881 }, { "epoch": 0.2209549671871464, "grad_norm": 0.6658175315998246, "learning_rate": 9.074347889294017e-06, "loss": 0.3985, "step": 4882 }, { "epoch": 0.22100022629554197, "grad_norm": 0.7359226606161277, "learning_rate": 9.073923011996462e-06, "loss": 0.424, "step": 4883 }, { "epoch": 0.22104548540393754, "grad_norm": 0.3223135860462851, "learning_rate": 9.073498047162813e-06, "loss": 0.496, "step": 4884 }, { "epoch": 0.2210907445123331, "grad_norm": 0.6504920645333246, "learning_rate": 9.073072994802202e-06, "loss": 0.4055, "step": 4885 }, { "epoch": 0.22113600362072866, "grad_norm": 0.7523501341301879, "learning_rate": 9.072647854923763e-06, "loss": 0.3905, "step": 4886 }, { "epoch": 0.22118126272912422, "grad_norm": 0.7486959836693489, "learning_rate": 9.072222627536627e-06, "loss": 0.3838, "step": 4887 }, { "epoch": 0.2212265218375198, "grad_norm": 0.6528488225448029, "learning_rate": 9.071797312649934e-06, "loss": 0.3548, "step": 4888 }, { "epoch": 0.22127178094591538, "grad_norm": 0.6901906657633865, "learning_rate": 9.071371910272823e-06, "loss": 0.3931, "step": 4889 }, { "epoch": 0.22131704005431094, "grad_norm": 0.7001577349443593, "learning_rate": 9.070946420414435e-06, "loss": 0.3866, "step": 4890 }, { "epoch": 0.2213622991627065, "grad_norm": 0.801922623369581, "learning_rate": 9.07052084308391e-06, "loss": 0.392, "step": 4891 }, { "epoch": 0.22140755827110206, "grad_norm": 0.7677849535633429, "learning_rate": 9.070095178290394e-06, "loss": 0.414, "step": 4892 }, { "epoch": 0.22145281737949762, "grad_norm": 0.6805848241897616, "learning_rate": 9.069669426043033e-06, "loss": 0.3772, "step": 4893 }, { "epoch": 0.2214980764878932, "grad_norm": 0.4350282351307276, "learning_rate": 9.069243586350976e-06, "loss": 0.5201, "step": 4894 }, { "epoch": 0.22154333559628875, "grad_norm": 0.7355164443478349, "learning_rate": 9.068817659223371e-06, "loss": 0.3992, "step": 4895 }, { "epoch": 0.2215885947046843, "grad_norm": 0.7023395182277847, "learning_rate": 9.068391644669371e-06, "loss": 0.406, "step": 4896 }, { "epoch": 0.22163385381307987, "grad_norm": 0.7063160229165169, "learning_rate": 9.067965542698129e-06, "loss": 0.41, "step": 4897 }, { "epoch": 0.22167911292147544, "grad_norm": 0.6950126592196099, "learning_rate": 9.067539353318804e-06, "loss": 0.4181, "step": 4898 }, { "epoch": 0.22172437202987103, "grad_norm": 0.6869503147100258, "learning_rate": 9.067113076540547e-06, "loss": 0.4187, "step": 4899 }, { "epoch": 0.2217696311382666, "grad_norm": 0.6871055919686695, "learning_rate": 9.066686712372524e-06, "loss": 0.3908, "step": 4900 }, { "epoch": 0.22181489024666215, "grad_norm": 0.6954917745019584, "learning_rate": 9.066260260823893e-06, "loss": 0.369, "step": 4901 }, { "epoch": 0.2218601493550577, "grad_norm": 0.6749196877672734, "learning_rate": 9.065833721903817e-06, "loss": 0.4043, "step": 4902 }, { "epoch": 0.22190540846345327, "grad_norm": 0.6561073305341862, "learning_rate": 9.065407095621462e-06, "loss": 0.4153, "step": 4903 }, { "epoch": 0.22195066757184884, "grad_norm": 0.6712022464959736, "learning_rate": 9.064980381985993e-06, "loss": 0.3959, "step": 4904 }, { "epoch": 0.2219959266802444, "grad_norm": 0.8511922636720965, "learning_rate": 9.064553581006583e-06, "loss": 0.3698, "step": 4905 }, { "epoch": 0.22204118578863996, "grad_norm": 0.6086633175646591, "learning_rate": 9.064126692692397e-06, "loss": 0.4295, "step": 4906 }, { "epoch": 0.22208644489703552, "grad_norm": 0.5905928955912909, "learning_rate": 9.063699717052612e-06, "loss": 0.373, "step": 4907 }, { "epoch": 0.22213170400543109, "grad_norm": 0.4037618393026883, "learning_rate": 9.0632726540964e-06, "loss": 0.5065, "step": 4908 }, { "epoch": 0.22217696311382665, "grad_norm": 0.7951152684635662, "learning_rate": 9.06284550383294e-06, "loss": 0.4814, "step": 4909 }, { "epoch": 0.2222222222222222, "grad_norm": 0.7227826564261085, "learning_rate": 9.062418266271406e-06, "loss": 0.3257, "step": 4910 }, { "epoch": 0.2222674813306178, "grad_norm": 0.6299762399568244, "learning_rate": 9.06199094142098e-06, "loss": 0.3548, "step": 4911 }, { "epoch": 0.22231274043901336, "grad_norm": 0.6391259383273397, "learning_rate": 9.061563529290845e-06, "loss": 0.3968, "step": 4912 }, { "epoch": 0.22235799954740892, "grad_norm": 0.6254815763024072, "learning_rate": 9.061136029890186e-06, "loss": 0.4148, "step": 4913 }, { "epoch": 0.2224032586558045, "grad_norm": 0.2979564299577659, "learning_rate": 9.060708443228184e-06, "loss": 0.512, "step": 4914 }, { "epoch": 0.22244851776420005, "grad_norm": 0.7084697271068684, "learning_rate": 9.060280769314028e-06, "loss": 0.4223, "step": 4915 }, { "epoch": 0.2224937768725956, "grad_norm": 0.6349471347363206, "learning_rate": 9.05985300815691e-06, "loss": 0.3844, "step": 4916 }, { "epoch": 0.22253903598099117, "grad_norm": 2.6816014194505997, "learning_rate": 9.05942515976602e-06, "loss": 0.3938, "step": 4917 }, { "epoch": 0.22258429508938674, "grad_norm": 0.6663388143533746, "learning_rate": 9.05899722415055e-06, "loss": 0.3803, "step": 4918 }, { "epoch": 0.2226295541977823, "grad_norm": 0.720519900506803, "learning_rate": 9.058569201319696e-06, "loss": 0.374, "step": 4919 }, { "epoch": 0.22267481330617786, "grad_norm": 0.6702356758327594, "learning_rate": 9.058141091282656e-06, "loss": 0.3908, "step": 4920 }, { "epoch": 0.22272007241457342, "grad_norm": 0.6841496388893396, "learning_rate": 9.057712894048627e-06, "loss": 0.3944, "step": 4921 }, { "epoch": 0.22276533152296898, "grad_norm": 0.6330740612123831, "learning_rate": 9.05728460962681e-06, "loss": 0.3823, "step": 4922 }, { "epoch": 0.22281059063136457, "grad_norm": 0.6340769606214909, "learning_rate": 9.056856238026408e-06, "loss": 0.4255, "step": 4923 }, { "epoch": 0.22285584973976014, "grad_norm": 0.6726079933198121, "learning_rate": 9.056427779256624e-06, "loss": 0.3963, "step": 4924 }, { "epoch": 0.2229011088481557, "grad_norm": 0.3608419911007155, "learning_rate": 9.055999233326667e-06, "loss": 0.5117, "step": 4925 }, { "epoch": 0.22294636795655126, "grad_norm": 1.0467144881169608, "learning_rate": 9.055570600245744e-06, "loss": 0.3622, "step": 4926 }, { "epoch": 0.22299162706494682, "grad_norm": 0.7188640558240326, "learning_rate": 9.055141880023062e-06, "loss": 0.3882, "step": 4927 }, { "epoch": 0.22303688617334239, "grad_norm": 0.7274003485447285, "learning_rate": 9.054713072667838e-06, "loss": 0.4102, "step": 4928 }, { "epoch": 0.22308214528173795, "grad_norm": 0.35090527102288355, "learning_rate": 9.054284178189281e-06, "loss": 0.488, "step": 4929 }, { "epoch": 0.2231274043901335, "grad_norm": 0.643082305355415, "learning_rate": 9.05385519659661e-06, "loss": 0.3465, "step": 4930 }, { "epoch": 0.22317266349852907, "grad_norm": 0.6488098190014425, "learning_rate": 9.05342612789904e-06, "loss": 0.4201, "step": 4931 }, { "epoch": 0.22321792260692463, "grad_norm": 0.6559485495898078, "learning_rate": 9.052996972105794e-06, "loss": 0.3536, "step": 4932 }, { "epoch": 0.2232631817153202, "grad_norm": 0.6305430257580593, "learning_rate": 9.052567729226089e-06, "loss": 0.3803, "step": 4933 }, { "epoch": 0.22330844082371576, "grad_norm": 0.693319002377718, "learning_rate": 9.052138399269153e-06, "loss": 0.404, "step": 4934 }, { "epoch": 0.22335369993211135, "grad_norm": 0.7031246161246214, "learning_rate": 9.051708982244205e-06, "loss": 0.4099, "step": 4935 }, { "epoch": 0.2233989590405069, "grad_norm": 0.3962595002461269, "learning_rate": 9.051279478160475e-06, "loss": 0.5132, "step": 4936 }, { "epoch": 0.22344421814890247, "grad_norm": 0.6743678198292562, "learning_rate": 9.050849887027192e-06, "loss": 0.3888, "step": 4937 }, { "epoch": 0.22348947725729804, "grad_norm": 0.7553552357137391, "learning_rate": 9.050420208853587e-06, "loss": 0.3712, "step": 4938 }, { "epoch": 0.2235347363656936, "grad_norm": 0.6468895793062895, "learning_rate": 9.04999044364889e-06, "loss": 0.3895, "step": 4939 }, { "epoch": 0.22357999547408916, "grad_norm": 0.6049019690618703, "learning_rate": 9.049560591422339e-06, "loss": 0.3454, "step": 4940 }, { "epoch": 0.22362525458248472, "grad_norm": 0.6421990664350431, "learning_rate": 9.049130652183167e-06, "loss": 0.3908, "step": 4941 }, { "epoch": 0.22367051369088028, "grad_norm": 0.6610407945684632, "learning_rate": 9.048700625940613e-06, "loss": 0.3668, "step": 4942 }, { "epoch": 0.22371577279927585, "grad_norm": 0.3797001342865075, "learning_rate": 9.048270512703917e-06, "loss": 0.5241, "step": 4943 }, { "epoch": 0.2237610319076714, "grad_norm": 0.3268726665548248, "learning_rate": 9.04784031248232e-06, "loss": 0.5158, "step": 4944 }, { "epoch": 0.22380629101606697, "grad_norm": 0.7192392891866869, "learning_rate": 9.04741002528507e-06, "loss": 0.4637, "step": 4945 }, { "epoch": 0.22385155012446256, "grad_norm": 0.6991235003684834, "learning_rate": 9.046979651121407e-06, "loss": 0.3877, "step": 4946 }, { "epoch": 0.22389680923285812, "grad_norm": 0.39804851670087743, "learning_rate": 9.04654919000058e-06, "loss": 0.5109, "step": 4947 }, { "epoch": 0.22394206834125369, "grad_norm": 0.37682713032256965, "learning_rate": 9.046118641931841e-06, "loss": 0.471, "step": 4948 }, { "epoch": 0.22398732744964925, "grad_norm": 0.7482524081049696, "learning_rate": 9.045688006924438e-06, "loss": 0.4281, "step": 4949 }, { "epoch": 0.2240325865580448, "grad_norm": 0.6714930277958379, "learning_rate": 9.045257284987625e-06, "loss": 0.3682, "step": 4950 }, { "epoch": 0.22407784566644037, "grad_norm": 0.668180324072035, "learning_rate": 9.044826476130657e-06, "loss": 0.3829, "step": 4951 }, { "epoch": 0.22412310477483594, "grad_norm": 0.6081873599786615, "learning_rate": 9.04439558036279e-06, "loss": 0.3952, "step": 4952 }, { "epoch": 0.2241683638832315, "grad_norm": 0.46132916924040257, "learning_rate": 9.043964597693285e-06, "loss": 0.5081, "step": 4953 }, { "epoch": 0.22421362299162706, "grad_norm": 0.7123279512728615, "learning_rate": 9.043533528131401e-06, "loss": 0.3934, "step": 4954 }, { "epoch": 0.22425888210002262, "grad_norm": 0.6788394561902364, "learning_rate": 9.0431023716864e-06, "loss": 0.412, "step": 4955 }, { "epoch": 0.22430414120841818, "grad_norm": 0.6516290357865095, "learning_rate": 9.042671128367545e-06, "loss": 0.4584, "step": 4956 }, { "epoch": 0.22434940031681375, "grad_norm": 0.694717466352686, "learning_rate": 9.042239798184104e-06, "loss": 0.3739, "step": 4957 }, { "epoch": 0.22439465942520934, "grad_norm": 0.6566955026673125, "learning_rate": 9.041808381145345e-06, "loss": 0.426, "step": 4958 }, { "epoch": 0.2244399185336049, "grad_norm": 0.7044272633799166, "learning_rate": 9.041376877260537e-06, "loss": 0.3974, "step": 4959 }, { "epoch": 0.22448517764200046, "grad_norm": 0.36168351486043804, "learning_rate": 9.040945286538954e-06, "loss": 0.483, "step": 4960 }, { "epoch": 0.22453043675039602, "grad_norm": 0.7206470073148731, "learning_rate": 9.040513608989865e-06, "loss": 0.4604, "step": 4961 }, { "epoch": 0.22457569585879159, "grad_norm": 0.6872488577391577, "learning_rate": 9.040081844622549e-06, "loss": 0.3785, "step": 4962 }, { "epoch": 0.22462095496718715, "grad_norm": 0.6441005996153664, "learning_rate": 9.039649993446282e-06, "loss": 0.3796, "step": 4963 }, { "epoch": 0.2246662140755827, "grad_norm": 0.6648495871965038, "learning_rate": 9.039218055470345e-06, "loss": 0.3969, "step": 4964 }, { "epoch": 0.22471147318397827, "grad_norm": 0.6206659315926869, "learning_rate": 9.038786030704015e-06, "loss": 0.4097, "step": 4965 }, { "epoch": 0.22475673229237383, "grad_norm": 0.572166293045714, "learning_rate": 9.038353919156579e-06, "loss": 0.3693, "step": 4966 }, { "epoch": 0.2248019914007694, "grad_norm": 0.688238215927942, "learning_rate": 9.03792172083732e-06, "loss": 0.393, "step": 4967 }, { "epoch": 0.22484725050916496, "grad_norm": 0.6426199823718562, "learning_rate": 9.037489435755525e-06, "loss": 0.385, "step": 4968 }, { "epoch": 0.22489250961756052, "grad_norm": 0.6863085194919177, "learning_rate": 9.037057063920482e-06, "loss": 0.4335, "step": 4969 }, { "epoch": 0.2249377687259561, "grad_norm": 0.3480317198597673, "learning_rate": 9.03662460534148e-06, "loss": 0.4737, "step": 4970 }, { "epoch": 0.22498302783435167, "grad_norm": 0.7441531712838492, "learning_rate": 9.036192060027815e-06, "loss": 0.4075, "step": 4971 }, { "epoch": 0.22502828694274724, "grad_norm": 0.6849480310304413, "learning_rate": 9.035759427988779e-06, "loss": 0.3916, "step": 4972 }, { "epoch": 0.2250735460511428, "grad_norm": 0.6509099795779941, "learning_rate": 9.035326709233666e-06, "loss": 0.3677, "step": 4973 }, { "epoch": 0.22511880515953836, "grad_norm": 0.6816022275651715, "learning_rate": 9.034893903771776e-06, "loss": 0.3982, "step": 4974 }, { "epoch": 0.22516406426793392, "grad_norm": 0.656449812683951, "learning_rate": 9.034461011612408e-06, "loss": 0.3834, "step": 4975 }, { "epoch": 0.22520932337632948, "grad_norm": 0.37343104904151614, "learning_rate": 9.034028032764866e-06, "loss": 0.5213, "step": 4976 }, { "epoch": 0.22525458248472505, "grad_norm": 0.6434372858287496, "learning_rate": 9.033594967238449e-06, "loss": 0.3551, "step": 4977 }, { "epoch": 0.2252998415931206, "grad_norm": 0.635442460720965, "learning_rate": 9.033161815042465e-06, "loss": 0.385, "step": 4978 }, { "epoch": 0.22534510070151617, "grad_norm": 0.31351963290651574, "learning_rate": 9.032728576186221e-06, "loss": 0.5183, "step": 4979 }, { "epoch": 0.22539035980991173, "grad_norm": 0.7047869382640914, "learning_rate": 9.032295250679024e-06, "loss": 0.4107, "step": 4980 }, { "epoch": 0.2254356189183073, "grad_norm": 0.802688035345281, "learning_rate": 9.031861838530187e-06, "loss": 0.3933, "step": 4981 }, { "epoch": 0.22548087802670289, "grad_norm": 0.6748141649156711, "learning_rate": 9.031428339749023e-06, "loss": 0.3806, "step": 4982 }, { "epoch": 0.22552613713509845, "grad_norm": 0.31771599393442523, "learning_rate": 9.030994754344845e-06, "loss": 0.4662, "step": 4983 }, { "epoch": 0.225571396243494, "grad_norm": 0.6717370322459778, "learning_rate": 9.03056108232697e-06, "loss": 0.3726, "step": 4984 }, { "epoch": 0.22561665535188957, "grad_norm": 0.3275985126479388, "learning_rate": 9.030127323704716e-06, "loss": 0.5291, "step": 4985 }, { "epoch": 0.22566191446028513, "grad_norm": 0.6814558422571404, "learning_rate": 9.029693478487403e-06, "loss": 0.4068, "step": 4986 }, { "epoch": 0.2257071735686807, "grad_norm": 0.7062955257138119, "learning_rate": 9.029259546684352e-06, "loss": 0.3794, "step": 4987 }, { "epoch": 0.22575243267707626, "grad_norm": 0.29081694014049086, "learning_rate": 9.028825528304892e-06, "loss": 0.4816, "step": 4988 }, { "epoch": 0.22579769178547182, "grad_norm": 0.7861575746631435, "learning_rate": 9.028391423358343e-06, "loss": 0.3616, "step": 4989 }, { "epoch": 0.22584295089386738, "grad_norm": 0.644040995539964, "learning_rate": 9.027957231854034e-06, "loss": 0.3572, "step": 4990 }, { "epoch": 0.22588821000226295, "grad_norm": 0.6394027967714493, "learning_rate": 9.027522953801296e-06, "loss": 0.3785, "step": 4991 }, { "epoch": 0.2259334691106585, "grad_norm": 0.662027116266261, "learning_rate": 9.027088589209458e-06, "loss": 0.3816, "step": 4992 }, { "epoch": 0.2259787282190541, "grad_norm": 0.6663392960488509, "learning_rate": 9.026654138087857e-06, "loss": 0.4121, "step": 4993 }, { "epoch": 0.22602398732744966, "grad_norm": 0.6182570656575896, "learning_rate": 9.026219600445824e-06, "loss": 0.3979, "step": 4994 }, { "epoch": 0.22606924643584522, "grad_norm": 0.6965154917042926, "learning_rate": 9.025784976292698e-06, "loss": 0.3589, "step": 4995 }, { "epoch": 0.22611450554424078, "grad_norm": 0.6926703656472599, "learning_rate": 9.025350265637816e-06, "loss": 0.379, "step": 4996 }, { "epoch": 0.22615976465263635, "grad_norm": 0.6645148067448459, "learning_rate": 9.02491546849052e-06, "loss": 0.3825, "step": 4997 }, { "epoch": 0.2262050237610319, "grad_norm": 0.6664176952365746, "learning_rate": 9.024480584860151e-06, "loss": 0.4086, "step": 4998 }, { "epoch": 0.22625028286942747, "grad_norm": 0.6365534795201367, "learning_rate": 9.024045614756056e-06, "loss": 0.405, "step": 4999 }, { "epoch": 0.22629554197782303, "grad_norm": 0.6339182513959327, "learning_rate": 9.02361055818758e-06, "loss": 0.389, "step": 5000 }, { "epoch": 0.2263408010862186, "grad_norm": 0.6353193403763745, "learning_rate": 9.02317541516407e-06, "loss": 0.3989, "step": 5001 }, { "epoch": 0.22638606019461416, "grad_norm": 0.6190937243125736, "learning_rate": 9.022740185694877e-06, "loss": 0.4203, "step": 5002 }, { "epoch": 0.22643131930300972, "grad_norm": 0.6636631009871302, "learning_rate": 9.022304869789352e-06, "loss": 0.3938, "step": 5003 }, { "epoch": 0.22647657841140528, "grad_norm": 0.7262606727914647, "learning_rate": 9.02186946745685e-06, "loss": 0.3902, "step": 5004 }, { "epoch": 0.22652183751980087, "grad_norm": 0.6552484435543318, "learning_rate": 9.021433978706724e-06, "loss": 0.3833, "step": 5005 }, { "epoch": 0.22656709662819643, "grad_norm": 0.6619672606846077, "learning_rate": 9.020998403548333e-06, "loss": 0.38, "step": 5006 }, { "epoch": 0.226612355736592, "grad_norm": 0.6452943746949554, "learning_rate": 9.020562741991035e-06, "loss": 0.3952, "step": 5007 }, { "epoch": 0.22665761484498756, "grad_norm": 0.7229966603330219, "learning_rate": 9.020126994044194e-06, "loss": 0.4396, "step": 5008 }, { "epoch": 0.22670287395338312, "grad_norm": 0.6631422008112223, "learning_rate": 9.01969115971717e-06, "loss": 0.4145, "step": 5009 }, { "epoch": 0.22674813306177868, "grad_norm": 0.6025960255581672, "learning_rate": 9.019255239019327e-06, "loss": 0.3982, "step": 5010 }, { "epoch": 0.22679339217017425, "grad_norm": 0.640514584070971, "learning_rate": 9.018819231960035e-06, "loss": 0.4105, "step": 5011 }, { "epoch": 0.2268386512785698, "grad_norm": 0.6926676693423613, "learning_rate": 9.01838313854866e-06, "loss": 0.4185, "step": 5012 }, { "epoch": 0.22688391038696537, "grad_norm": 0.646092239513566, "learning_rate": 9.017946958794572e-06, "loss": 0.3833, "step": 5013 }, { "epoch": 0.22692916949536093, "grad_norm": 0.43804579728241944, "learning_rate": 9.017510692707144e-06, "loss": 0.4814, "step": 5014 }, { "epoch": 0.2269744286037565, "grad_norm": 0.6746575132691192, "learning_rate": 9.01707434029575e-06, "loss": 0.4079, "step": 5015 }, { "epoch": 0.22701968771215206, "grad_norm": 0.6754684713002371, "learning_rate": 9.016637901569767e-06, "loss": 0.3984, "step": 5016 }, { "epoch": 0.22706494682054765, "grad_norm": 0.33722037950031575, "learning_rate": 9.01620137653857e-06, "loss": 0.5116, "step": 5017 }, { "epoch": 0.2271102059289432, "grad_norm": 0.6355902009523129, "learning_rate": 9.015764765211542e-06, "loss": 0.3737, "step": 5018 }, { "epoch": 0.22715546503733877, "grad_norm": 0.7013672992936948, "learning_rate": 9.015328067598064e-06, "loss": 0.455, "step": 5019 }, { "epoch": 0.22720072414573433, "grad_norm": 0.636650952638715, "learning_rate": 9.014891283707517e-06, "loss": 0.3807, "step": 5020 }, { "epoch": 0.2272459832541299, "grad_norm": 0.6591969812812456, "learning_rate": 9.014454413549285e-06, "loss": 0.4363, "step": 5021 }, { "epoch": 0.22729124236252546, "grad_norm": 0.6480320041512702, "learning_rate": 9.014017457132759e-06, "loss": 0.3622, "step": 5022 }, { "epoch": 0.22733650147092102, "grad_norm": 0.7046158006976412, "learning_rate": 9.013580414467324e-06, "loss": 0.395, "step": 5023 }, { "epoch": 0.22738176057931658, "grad_norm": 0.7412498466774871, "learning_rate": 9.013143285562375e-06, "loss": 0.4135, "step": 5024 }, { "epoch": 0.22742701968771215, "grad_norm": 0.6430333473293939, "learning_rate": 9.012706070427302e-06, "loss": 0.3934, "step": 5025 }, { "epoch": 0.2274722787961077, "grad_norm": 0.9565897525460444, "learning_rate": 9.012268769071499e-06, "loss": 0.3622, "step": 5026 }, { "epoch": 0.22751753790450327, "grad_norm": 0.7943779811391722, "learning_rate": 9.011831381504362e-06, "loss": 0.3784, "step": 5027 }, { "epoch": 0.22756279701289886, "grad_norm": 0.6853892780934475, "learning_rate": 9.011393907735291e-06, "loss": 0.3891, "step": 5028 }, { "epoch": 0.22760805612129442, "grad_norm": 0.5847454644149267, "learning_rate": 9.010956347773685e-06, "loss": 0.4946, "step": 5029 }, { "epoch": 0.22765331522968998, "grad_norm": 0.43884271089363497, "learning_rate": 9.010518701628946e-06, "loss": 0.5247, "step": 5030 }, { "epoch": 0.22769857433808555, "grad_norm": 0.7332460232839678, "learning_rate": 9.010080969310477e-06, "loss": 0.427, "step": 5031 }, { "epoch": 0.2277438334464811, "grad_norm": 0.666964101425838, "learning_rate": 9.009643150827683e-06, "loss": 0.3768, "step": 5032 }, { "epoch": 0.22778909255487667, "grad_norm": 0.6490034288028559, "learning_rate": 9.009205246189974e-06, "loss": 0.4198, "step": 5033 }, { "epoch": 0.22783435166327223, "grad_norm": 0.6593400548042422, "learning_rate": 9.008767255406757e-06, "loss": 0.3847, "step": 5034 }, { "epoch": 0.2278796107716678, "grad_norm": 0.6084761452473841, "learning_rate": 9.008329178487442e-06, "loss": 0.5144, "step": 5035 }, { "epoch": 0.22792486988006336, "grad_norm": 0.6718615799842731, "learning_rate": 9.007891015441447e-06, "loss": 0.3904, "step": 5036 }, { "epoch": 0.22797012898845892, "grad_norm": 0.6738115602196058, "learning_rate": 9.007452766278181e-06, "loss": 0.3686, "step": 5037 }, { "epoch": 0.22801538809685448, "grad_norm": 0.6424073143560038, "learning_rate": 9.007014431007064e-06, "loss": 0.4093, "step": 5038 }, { "epoch": 0.22806064720525004, "grad_norm": 0.7084602860135315, "learning_rate": 9.006576009637513e-06, "loss": 0.4291, "step": 5039 }, { "epoch": 0.22810590631364563, "grad_norm": 0.6434498431158302, "learning_rate": 9.00613750217895e-06, "loss": 0.3985, "step": 5040 }, { "epoch": 0.2281511654220412, "grad_norm": 0.40294794849350446, "learning_rate": 9.005698908640795e-06, "loss": 0.5242, "step": 5041 }, { "epoch": 0.22819642453043676, "grad_norm": 0.6032961522009523, "learning_rate": 9.005260229032471e-06, "loss": 0.3963, "step": 5042 }, { "epoch": 0.22824168363883232, "grad_norm": 0.3356321513424042, "learning_rate": 9.004821463363409e-06, "loss": 0.475, "step": 5043 }, { "epoch": 0.22828694274722788, "grad_norm": 0.5938021240632243, "learning_rate": 9.004382611643032e-06, "loss": 0.3356, "step": 5044 }, { "epoch": 0.22833220185562345, "grad_norm": 0.7223163154013754, "learning_rate": 9.003943673880771e-06, "loss": 0.4112, "step": 5045 }, { "epoch": 0.228377460964019, "grad_norm": 0.6531894900267514, "learning_rate": 9.00350465008606e-06, "loss": 0.3885, "step": 5046 }, { "epoch": 0.22842272007241457, "grad_norm": 0.3875028933102217, "learning_rate": 9.003065540268328e-06, "loss": 0.5096, "step": 5047 }, { "epoch": 0.22846797918081013, "grad_norm": 0.6800193885461148, "learning_rate": 9.00262634443701e-06, "loss": 0.393, "step": 5048 }, { "epoch": 0.2285132382892057, "grad_norm": 0.6674094552885022, "learning_rate": 9.002187062601548e-06, "loss": 0.3392, "step": 5049 }, { "epoch": 0.22855849739760126, "grad_norm": 0.6999580178301209, "learning_rate": 9.001747694771378e-06, "loss": 0.3492, "step": 5050 }, { "epoch": 0.22860375650599682, "grad_norm": 0.6571573244248741, "learning_rate": 9.00130824095594e-06, "loss": 0.3999, "step": 5051 }, { "epoch": 0.2286490156143924, "grad_norm": 0.3742644643532557, "learning_rate": 9.000868701164676e-06, "loss": 0.4805, "step": 5052 }, { "epoch": 0.22869427472278797, "grad_norm": 0.7135463099458239, "learning_rate": 9.00042907540703e-06, "loss": 0.4351, "step": 5053 }, { "epoch": 0.22873953383118353, "grad_norm": 0.6325496509989086, "learning_rate": 8.999989363692453e-06, "loss": 0.3836, "step": 5054 }, { "epoch": 0.2287847929395791, "grad_norm": 0.7088140449106423, "learning_rate": 8.999549566030389e-06, "loss": 0.4416, "step": 5055 }, { "epoch": 0.22883005204797466, "grad_norm": 0.6212518022240358, "learning_rate": 8.999109682430288e-06, "loss": 0.3555, "step": 5056 }, { "epoch": 0.22887531115637022, "grad_norm": 0.7263540895949513, "learning_rate": 8.9986697129016e-06, "loss": 0.3844, "step": 5057 }, { "epoch": 0.22892057026476578, "grad_norm": 0.6386208062962684, "learning_rate": 8.998229657453783e-06, "loss": 0.359, "step": 5058 }, { "epoch": 0.22896582937316134, "grad_norm": 0.7299702917808321, "learning_rate": 8.99778951609629e-06, "loss": 0.3813, "step": 5059 }, { "epoch": 0.2290110884815569, "grad_norm": 0.6187123146757414, "learning_rate": 8.997349288838579e-06, "loss": 0.3686, "step": 5060 }, { "epoch": 0.22905634758995247, "grad_norm": 0.6462565768504316, "learning_rate": 8.996908975690107e-06, "loss": 0.361, "step": 5061 }, { "epoch": 0.22910160669834803, "grad_norm": 0.43439600221257585, "learning_rate": 8.996468576660337e-06, "loss": 0.4917, "step": 5062 }, { "epoch": 0.2291468658067436, "grad_norm": 0.6981144837914366, "learning_rate": 8.996028091758733e-06, "loss": 0.3568, "step": 5063 }, { "epoch": 0.22919212491513918, "grad_norm": 0.665355672986008, "learning_rate": 8.995587520994757e-06, "loss": 0.3985, "step": 5064 }, { "epoch": 0.22923738402353475, "grad_norm": 0.6531402191226752, "learning_rate": 8.995146864377877e-06, "loss": 0.3997, "step": 5065 }, { "epoch": 0.2292826431319303, "grad_norm": 0.6692184657619775, "learning_rate": 8.994706121917562e-06, "loss": 0.3675, "step": 5066 }, { "epoch": 0.22932790224032587, "grad_norm": 0.748061770522374, "learning_rate": 8.99426529362328e-06, "loss": 0.3545, "step": 5067 }, { "epoch": 0.22937316134872143, "grad_norm": 0.6629816145696559, "learning_rate": 8.993824379504505e-06, "loss": 0.3949, "step": 5068 }, { "epoch": 0.229418420457117, "grad_norm": 0.36058234225704106, "learning_rate": 8.99338337957071e-06, "loss": 0.4942, "step": 5069 }, { "epoch": 0.22946367956551256, "grad_norm": 0.6403442029161902, "learning_rate": 8.99294229383137e-06, "loss": 0.4098, "step": 5070 }, { "epoch": 0.22950893867390812, "grad_norm": 0.30217042809740324, "learning_rate": 8.992501122295964e-06, "loss": 0.5023, "step": 5071 }, { "epoch": 0.22955419778230368, "grad_norm": 0.7060732314966579, "learning_rate": 8.992059864973972e-06, "loss": 0.392, "step": 5072 }, { "epoch": 0.22959945689069924, "grad_norm": 0.31473324252979823, "learning_rate": 8.991618521874874e-06, "loss": 0.5342, "step": 5073 }, { "epoch": 0.2296447159990948, "grad_norm": 0.7440077572148602, "learning_rate": 8.991177093008153e-06, "loss": 0.4034, "step": 5074 }, { "epoch": 0.2296899751074904, "grad_norm": 0.3344787423121618, "learning_rate": 8.990735578383295e-06, "loss": 0.5276, "step": 5075 }, { "epoch": 0.22973523421588596, "grad_norm": 0.7355397133446866, "learning_rate": 8.990293978009782e-06, "loss": 0.3846, "step": 5076 }, { "epoch": 0.22978049332428152, "grad_norm": 0.6111272041129948, "learning_rate": 8.98985229189711e-06, "loss": 0.3438, "step": 5077 }, { "epoch": 0.22982575243267708, "grad_norm": 0.6582935033341474, "learning_rate": 8.989410520054767e-06, "loss": 0.3601, "step": 5078 }, { "epoch": 0.22987101154107265, "grad_norm": 0.6543794262424822, "learning_rate": 8.988968662492243e-06, "loss": 0.4188, "step": 5079 }, { "epoch": 0.2299162706494682, "grad_norm": 0.3491993514780067, "learning_rate": 8.988526719219035e-06, "loss": 0.5115, "step": 5080 }, { "epoch": 0.22996152975786377, "grad_norm": 0.3329227948894098, "learning_rate": 8.988084690244636e-06, "loss": 0.5075, "step": 5081 }, { "epoch": 0.23000678886625933, "grad_norm": 0.3059273323766298, "learning_rate": 8.987642575578546e-06, "loss": 0.4829, "step": 5082 }, { "epoch": 0.2300520479746549, "grad_norm": 0.6749932682170735, "learning_rate": 8.987200375230262e-06, "loss": 0.4061, "step": 5083 }, { "epoch": 0.23009730708305046, "grad_norm": 0.667571209794923, "learning_rate": 8.986758089209292e-06, "loss": 0.4166, "step": 5084 }, { "epoch": 0.23014256619144602, "grad_norm": 0.6994861621747892, "learning_rate": 8.986315717525132e-06, "loss": 0.4102, "step": 5085 }, { "epoch": 0.23018782529984158, "grad_norm": 0.6364880522630176, "learning_rate": 8.98587326018729e-06, "loss": 0.4032, "step": 5086 }, { "epoch": 0.23023308440823717, "grad_norm": 0.40828602820192145, "learning_rate": 8.985430717205276e-06, "loss": 0.4834, "step": 5087 }, { "epoch": 0.23027834351663273, "grad_norm": 0.6472024336771361, "learning_rate": 8.984988088588594e-06, "loss": 0.4167, "step": 5088 }, { "epoch": 0.2303236026250283, "grad_norm": 0.6382904284079052, "learning_rate": 8.984545374346758e-06, "loss": 0.3806, "step": 5089 }, { "epoch": 0.23036886173342386, "grad_norm": 0.7405927409143391, "learning_rate": 8.98410257448928e-06, "loss": 0.4345, "step": 5090 }, { "epoch": 0.23041412084181942, "grad_norm": 0.6151881033617659, "learning_rate": 8.983659689025673e-06, "loss": 0.3811, "step": 5091 }, { "epoch": 0.23045937995021498, "grad_norm": 0.6586186202326227, "learning_rate": 8.983216717965453e-06, "loss": 0.3562, "step": 5092 }, { "epoch": 0.23050463905861054, "grad_norm": 0.6552286587750954, "learning_rate": 8.98277366131814e-06, "loss": 0.3732, "step": 5093 }, { "epoch": 0.2305498981670061, "grad_norm": 0.6744611940595147, "learning_rate": 8.982330519093255e-06, "loss": 0.3634, "step": 5094 }, { "epoch": 0.23059515727540167, "grad_norm": 0.6468474579989157, "learning_rate": 8.981887291300315e-06, "loss": 0.3916, "step": 5095 }, { "epoch": 0.23064041638379723, "grad_norm": 0.6413550963762416, "learning_rate": 8.981443977948848e-06, "loss": 0.4041, "step": 5096 }, { "epoch": 0.2306856754921928, "grad_norm": 0.3875886722541602, "learning_rate": 8.98100057904838e-06, "loss": 0.5196, "step": 5097 }, { "epoch": 0.23073093460058836, "grad_norm": 0.3240288028641131, "learning_rate": 8.980557094608433e-06, "loss": 0.4977, "step": 5098 }, { "epoch": 0.23077619370898395, "grad_norm": 0.6977386054479082, "learning_rate": 8.980113524638541e-06, "loss": 0.3847, "step": 5099 }, { "epoch": 0.2308214528173795, "grad_norm": 0.65294049746612, "learning_rate": 8.979669869148234e-06, "loss": 0.3908, "step": 5100 }, { "epoch": 0.23086671192577507, "grad_norm": 0.6586498592271675, "learning_rate": 8.979226128147043e-06, "loss": 0.3793, "step": 5101 }, { "epoch": 0.23091197103417063, "grad_norm": 0.624180963723518, "learning_rate": 8.978782301644503e-06, "loss": 0.4071, "step": 5102 }, { "epoch": 0.2309572301425662, "grad_norm": 0.3947402995617219, "learning_rate": 8.978338389650152e-06, "loss": 0.4873, "step": 5103 }, { "epoch": 0.23100248925096176, "grad_norm": 0.7039306333910293, "learning_rate": 8.977894392173527e-06, "loss": 0.4046, "step": 5104 }, { "epoch": 0.23104774835935732, "grad_norm": 0.6445887028036205, "learning_rate": 8.97745030922417e-06, "loss": 0.4122, "step": 5105 }, { "epoch": 0.23109300746775288, "grad_norm": 0.7006501371896489, "learning_rate": 8.977006140811621e-06, "loss": 0.3884, "step": 5106 }, { "epoch": 0.23113826657614844, "grad_norm": 0.6703032827813812, "learning_rate": 8.976561886945426e-06, "loss": 0.3607, "step": 5107 }, { "epoch": 0.231183525684544, "grad_norm": 0.7015094869729309, "learning_rate": 8.976117547635125e-06, "loss": 0.4294, "step": 5108 }, { "epoch": 0.23122878479293957, "grad_norm": 0.6317103668825891, "learning_rate": 8.975673122890273e-06, "loss": 0.4084, "step": 5109 }, { "epoch": 0.23127404390133513, "grad_norm": 0.6348862463002435, "learning_rate": 8.975228612720415e-06, "loss": 0.3692, "step": 5110 }, { "epoch": 0.23131930300973072, "grad_norm": 0.7192469572737816, "learning_rate": 8.974784017135104e-06, "loss": 0.4034, "step": 5111 }, { "epoch": 0.23136456211812628, "grad_norm": 0.70426941566466, "learning_rate": 8.974339336143892e-06, "loss": 0.3641, "step": 5112 }, { "epoch": 0.23140982122652184, "grad_norm": 0.7329488639684603, "learning_rate": 8.973894569756333e-06, "loss": 0.4074, "step": 5113 }, { "epoch": 0.2314550803349174, "grad_norm": 0.678743773114706, "learning_rate": 8.973449717981984e-06, "loss": 0.3797, "step": 5114 }, { "epoch": 0.23150033944331297, "grad_norm": 0.6801450151035214, "learning_rate": 8.973004780830405e-06, "loss": 0.4082, "step": 5115 }, { "epoch": 0.23154559855170853, "grad_norm": 0.8518430751724988, "learning_rate": 8.972559758311156e-06, "loss": 0.3891, "step": 5116 }, { "epoch": 0.2315908576601041, "grad_norm": 0.4006010921762384, "learning_rate": 8.972114650433798e-06, "loss": 0.5036, "step": 5117 }, { "epoch": 0.23163611676849966, "grad_norm": 0.33678025565893854, "learning_rate": 8.971669457207896e-06, "loss": 0.5028, "step": 5118 }, { "epoch": 0.23168137587689522, "grad_norm": 0.7913353825927536, "learning_rate": 8.971224178643015e-06, "loss": 0.3729, "step": 5119 }, { "epoch": 0.23172663498529078, "grad_norm": 0.7398032625351002, "learning_rate": 8.970778814748722e-06, "loss": 0.3754, "step": 5120 }, { "epoch": 0.23177189409368634, "grad_norm": 0.6682354188081827, "learning_rate": 8.97033336553459e-06, "loss": 0.396, "step": 5121 }, { "epoch": 0.23181715320208193, "grad_norm": 0.8144947277111944, "learning_rate": 8.969887831010185e-06, "loss": 0.4829, "step": 5122 }, { "epoch": 0.2318624123104775, "grad_norm": 0.7451888612956965, "learning_rate": 8.969442211185086e-06, "loss": 0.388, "step": 5123 }, { "epoch": 0.23190767141887306, "grad_norm": 0.7696054579132323, "learning_rate": 8.968996506068863e-06, "loss": 0.3659, "step": 5124 }, { "epoch": 0.23195293052726862, "grad_norm": 0.47226834278168806, "learning_rate": 8.968550715671096e-06, "loss": 0.5083, "step": 5125 }, { "epoch": 0.23199818963566418, "grad_norm": 0.8043182628259528, "learning_rate": 8.968104840001362e-06, "loss": 0.363, "step": 5126 }, { "epoch": 0.23204344874405974, "grad_norm": 0.8018927387330322, "learning_rate": 8.967658879069243e-06, "loss": 0.3646, "step": 5127 }, { "epoch": 0.2320887078524553, "grad_norm": 0.6455595712351934, "learning_rate": 8.96721283288432e-06, "loss": 0.3807, "step": 5128 }, { "epoch": 0.23213396696085087, "grad_norm": 0.6741490222011743, "learning_rate": 8.966766701456177e-06, "loss": 0.4122, "step": 5129 }, { "epoch": 0.23217922606924643, "grad_norm": 0.7687792391535636, "learning_rate": 8.9663204847944e-06, "loss": 0.4552, "step": 5130 }, { "epoch": 0.232224485177642, "grad_norm": 0.6669012351077497, "learning_rate": 8.965874182908578e-06, "loss": 0.3792, "step": 5131 }, { "epoch": 0.23226974428603755, "grad_norm": 0.3517548575118009, "learning_rate": 8.9654277958083e-06, "loss": 0.4974, "step": 5132 }, { "epoch": 0.23231500339443312, "grad_norm": 0.6987967216935925, "learning_rate": 8.96498132350316e-06, "loss": 0.3722, "step": 5133 }, { "epoch": 0.2323602625028287, "grad_norm": 2.2598695936336903, "learning_rate": 8.964534766002747e-06, "loss": 0.3949, "step": 5134 }, { "epoch": 0.23240552161122427, "grad_norm": 0.6806879214900802, "learning_rate": 8.964088123316657e-06, "loss": 0.4195, "step": 5135 }, { "epoch": 0.23245078071961983, "grad_norm": 0.6509213495136762, "learning_rate": 8.96364139545449e-06, "loss": 0.3989, "step": 5136 }, { "epoch": 0.2324960398280154, "grad_norm": 0.6993968892851343, "learning_rate": 8.96319458242584e-06, "loss": 0.398, "step": 5137 }, { "epoch": 0.23254129893641096, "grad_norm": 0.3881002933457079, "learning_rate": 8.962747684240313e-06, "loss": 0.5247, "step": 5138 }, { "epoch": 0.23258655804480652, "grad_norm": 0.7525172989755955, "learning_rate": 8.962300700907508e-06, "loss": 0.4302, "step": 5139 }, { "epoch": 0.23263181715320208, "grad_norm": 0.6624381151083798, "learning_rate": 8.96185363243703e-06, "loss": 0.3945, "step": 5140 }, { "epoch": 0.23267707626159764, "grad_norm": 0.6056783431125236, "learning_rate": 8.961406478838486e-06, "loss": 0.3918, "step": 5141 }, { "epoch": 0.2327223353699932, "grad_norm": 0.6902131262621901, "learning_rate": 8.960959240121483e-06, "loss": 0.4018, "step": 5142 }, { "epoch": 0.23276759447838877, "grad_norm": 0.7433333943110101, "learning_rate": 8.96051191629563e-06, "loss": 0.433, "step": 5143 }, { "epoch": 0.23281285358678433, "grad_norm": 0.6448248656475403, "learning_rate": 8.96006450737054e-06, "loss": 0.3489, "step": 5144 }, { "epoch": 0.2328581126951799, "grad_norm": 0.6608729931513597, "learning_rate": 8.959617013355829e-06, "loss": 0.4211, "step": 5145 }, { "epoch": 0.23290337180357548, "grad_norm": 0.714836914340452, "learning_rate": 8.959169434261106e-06, "loss": 0.404, "step": 5146 }, { "epoch": 0.23294863091197104, "grad_norm": 0.4241072094567102, "learning_rate": 8.958721770095993e-06, "loss": 0.4861, "step": 5147 }, { "epoch": 0.2329938900203666, "grad_norm": 0.7033691745901531, "learning_rate": 8.958274020870107e-06, "loss": 0.3767, "step": 5148 }, { "epoch": 0.23303914912876217, "grad_norm": 0.6697782259656342, "learning_rate": 8.95782618659307e-06, "loss": 0.3817, "step": 5149 }, { "epoch": 0.23308440823715773, "grad_norm": 0.684941030312248, "learning_rate": 8.957378267274502e-06, "loss": 0.3734, "step": 5150 }, { "epoch": 0.2331296673455533, "grad_norm": 0.6454346323086292, "learning_rate": 8.95693026292403e-06, "loss": 0.3462, "step": 5151 }, { "epoch": 0.23317492645394886, "grad_norm": 0.6743028983951925, "learning_rate": 8.956482173551281e-06, "loss": 0.3936, "step": 5152 }, { "epoch": 0.23322018556234442, "grad_norm": 0.35510312218255524, "learning_rate": 8.956033999165881e-06, "loss": 0.5015, "step": 5153 }, { "epoch": 0.23326544467073998, "grad_norm": 0.33903672826457865, "learning_rate": 8.95558573977746e-06, "loss": 0.4976, "step": 5154 }, { "epoch": 0.23331070377913554, "grad_norm": 0.6732348430980337, "learning_rate": 8.955137395395649e-06, "loss": 0.4227, "step": 5155 }, { "epoch": 0.2333559628875311, "grad_norm": 0.6513263774892051, "learning_rate": 8.954688966030083e-06, "loss": 0.3716, "step": 5156 }, { "epoch": 0.23340122199592667, "grad_norm": 0.6796153436099222, "learning_rate": 8.954240451690396e-06, "loss": 0.3805, "step": 5157 }, { "epoch": 0.23344648110432226, "grad_norm": 0.7066157020242287, "learning_rate": 8.953791852386229e-06, "loss": 0.4376, "step": 5158 }, { "epoch": 0.23349174021271782, "grad_norm": 0.4003704868791629, "learning_rate": 8.953343168127218e-06, "loss": 0.5046, "step": 5159 }, { "epoch": 0.23353699932111338, "grad_norm": 0.7553986056398494, "learning_rate": 8.952894398923003e-06, "loss": 0.3873, "step": 5160 }, { "epoch": 0.23358225842950894, "grad_norm": 0.3605697912438177, "learning_rate": 8.952445544783227e-06, "loss": 0.4796, "step": 5161 }, { "epoch": 0.2336275175379045, "grad_norm": 0.33431313155846065, "learning_rate": 8.951996605717537e-06, "loss": 0.5052, "step": 5162 }, { "epoch": 0.23367277664630007, "grad_norm": 0.6935848168831603, "learning_rate": 8.951547581735576e-06, "loss": 0.3645, "step": 5163 }, { "epoch": 0.23371803575469563, "grad_norm": 0.28657580245470116, "learning_rate": 8.951098472846994e-06, "loss": 0.5062, "step": 5164 }, { "epoch": 0.2337632948630912, "grad_norm": 0.6478693357949638, "learning_rate": 8.950649279061441e-06, "loss": 0.3496, "step": 5165 }, { "epoch": 0.23380855397148675, "grad_norm": 0.6352113123421139, "learning_rate": 8.950200000388569e-06, "loss": 0.4445, "step": 5166 }, { "epoch": 0.23385381307988232, "grad_norm": 0.6870906658498175, "learning_rate": 8.94975063683803e-06, "loss": 0.4252, "step": 5167 }, { "epoch": 0.23389907218827788, "grad_norm": 0.6279771064277256, "learning_rate": 8.949301188419481e-06, "loss": 0.353, "step": 5168 }, { "epoch": 0.23394433129667347, "grad_norm": 0.6866365440285913, "learning_rate": 8.948851655142579e-06, "loss": 0.3868, "step": 5169 }, { "epoch": 0.23398959040506903, "grad_norm": 0.634897252379839, "learning_rate": 8.948402037016984e-06, "loss": 0.4156, "step": 5170 }, { "epoch": 0.2340348495134646, "grad_norm": 0.6840741049902902, "learning_rate": 8.947952334052354e-06, "loss": 0.384, "step": 5171 }, { "epoch": 0.23408010862186016, "grad_norm": 0.6709720523253192, "learning_rate": 8.947502546258354e-06, "loss": 0.3184, "step": 5172 }, { "epoch": 0.23412536773025572, "grad_norm": 0.6546340406137499, "learning_rate": 8.947052673644649e-06, "loss": 0.3699, "step": 5173 }, { "epoch": 0.23417062683865128, "grad_norm": 1.1575843601066382, "learning_rate": 8.946602716220903e-06, "loss": 0.3687, "step": 5174 }, { "epoch": 0.23421588594704684, "grad_norm": 0.5592146591684121, "learning_rate": 8.946152673996786e-06, "loss": 0.5038, "step": 5175 }, { "epoch": 0.2342611450554424, "grad_norm": 0.438025934009356, "learning_rate": 8.94570254698197e-06, "loss": 0.4751, "step": 5176 }, { "epoch": 0.23430640416383797, "grad_norm": 0.3331041402234504, "learning_rate": 8.94525233518612e-06, "loss": 0.5063, "step": 5177 }, { "epoch": 0.23435166327223353, "grad_norm": 0.7216492408264652, "learning_rate": 8.944802038618919e-06, "loss": 0.3478, "step": 5178 }, { "epoch": 0.2343969223806291, "grad_norm": 0.6858494288023315, "learning_rate": 8.944351657290037e-06, "loss": 0.3644, "step": 5179 }, { "epoch": 0.23444218148902465, "grad_norm": 0.6451322581270235, "learning_rate": 8.94390119120915e-06, "loss": 0.3867, "step": 5180 }, { "epoch": 0.23448744059742024, "grad_norm": 0.7447336454417398, "learning_rate": 8.94345064038594e-06, "loss": 0.4206, "step": 5181 }, { "epoch": 0.2345326997058158, "grad_norm": 0.7668578669225723, "learning_rate": 8.943000004830087e-06, "loss": 0.514, "step": 5182 }, { "epoch": 0.23457795881421137, "grad_norm": 0.718206994552857, "learning_rate": 8.942549284551274e-06, "loss": 0.4438, "step": 5183 }, { "epoch": 0.23462321792260693, "grad_norm": 0.7203731751657185, "learning_rate": 8.942098479559185e-06, "loss": 0.4234, "step": 5184 }, { "epoch": 0.2346684770310025, "grad_norm": 0.6292088498449315, "learning_rate": 8.941647589863507e-06, "loss": 0.4002, "step": 5185 }, { "epoch": 0.23471373613939805, "grad_norm": 0.6600783332319208, "learning_rate": 8.941196615473929e-06, "loss": 0.4002, "step": 5186 }, { "epoch": 0.23475899524779362, "grad_norm": 0.7434646236905292, "learning_rate": 8.94074555640014e-06, "loss": 0.4058, "step": 5187 }, { "epoch": 0.23480425435618918, "grad_norm": 0.6471649173552345, "learning_rate": 8.940294412651831e-06, "loss": 0.3731, "step": 5188 }, { "epoch": 0.23484951346458474, "grad_norm": 0.7267012818438411, "learning_rate": 8.939843184238698e-06, "loss": 0.4365, "step": 5189 }, { "epoch": 0.2348947725729803, "grad_norm": 0.42194579918599756, "learning_rate": 8.939391871170435e-06, "loss": 0.4843, "step": 5190 }, { "epoch": 0.23494003168137587, "grad_norm": 0.6951856656124707, "learning_rate": 8.93894047345674e-06, "loss": 0.4126, "step": 5191 }, { "epoch": 0.23498529078977143, "grad_norm": 0.6568313845426516, "learning_rate": 8.93848899110731e-06, "loss": 0.391, "step": 5192 }, { "epoch": 0.23503054989816702, "grad_norm": 0.7239464873852787, "learning_rate": 8.93803742413185e-06, "loss": 0.3812, "step": 5193 }, { "epoch": 0.23507580900656258, "grad_norm": 0.3504256078341399, "learning_rate": 8.937585772540058e-06, "loss": 0.5347, "step": 5194 }, { "epoch": 0.23512106811495814, "grad_norm": 0.6827160249138898, "learning_rate": 8.937134036341643e-06, "loss": 0.3973, "step": 5195 }, { "epoch": 0.2351663272233537, "grad_norm": 0.6703648183205427, "learning_rate": 8.93668221554631e-06, "loss": 0.4111, "step": 5196 }, { "epoch": 0.23521158633174927, "grad_norm": 0.31976622494354495, "learning_rate": 8.936230310163765e-06, "loss": 0.5168, "step": 5197 }, { "epoch": 0.23525684544014483, "grad_norm": 0.6581618322094055, "learning_rate": 8.935778320203721e-06, "loss": 0.3728, "step": 5198 }, { "epoch": 0.2353021045485404, "grad_norm": 0.6170063595280307, "learning_rate": 8.935326245675887e-06, "loss": 0.3508, "step": 5199 }, { "epoch": 0.23534736365693595, "grad_norm": 0.33336389010807627, "learning_rate": 8.934874086589981e-06, "loss": 0.4836, "step": 5200 }, { "epoch": 0.23539262276533152, "grad_norm": 0.9567379336319621, "learning_rate": 8.934421842955715e-06, "loss": 0.376, "step": 5201 }, { "epoch": 0.23543788187372708, "grad_norm": 0.6638392906292916, "learning_rate": 8.933969514782808e-06, "loss": 0.35, "step": 5202 }, { "epoch": 0.23548314098212264, "grad_norm": 0.65336301854799, "learning_rate": 8.933517102080977e-06, "loss": 0.3838, "step": 5203 }, { "epoch": 0.2355284000905182, "grad_norm": 0.3227094570153876, "learning_rate": 8.933064604859945e-06, "loss": 0.4925, "step": 5204 }, { "epoch": 0.2355736591989138, "grad_norm": 0.31246372138751155, "learning_rate": 8.932612023129433e-06, "loss": 0.5056, "step": 5205 }, { "epoch": 0.23561891830730936, "grad_norm": 0.7701575727730373, "learning_rate": 8.932159356899169e-06, "loss": 0.3625, "step": 5206 }, { "epoch": 0.23566417741570492, "grad_norm": 0.2880103831704138, "learning_rate": 8.931706606178874e-06, "loss": 0.4982, "step": 5207 }, { "epoch": 0.23570943652410048, "grad_norm": 0.7208448019428256, "learning_rate": 8.931253770978281e-06, "loss": 0.3792, "step": 5208 }, { "epoch": 0.23575469563249604, "grad_norm": 0.6607662590967202, "learning_rate": 8.93080085130712e-06, "loss": 0.39, "step": 5209 }, { "epoch": 0.2357999547408916, "grad_norm": 0.6524379021573505, "learning_rate": 8.930347847175118e-06, "loss": 0.3755, "step": 5210 }, { "epoch": 0.23584521384928717, "grad_norm": 0.3524805216490473, "learning_rate": 8.929894758592016e-06, "loss": 0.4992, "step": 5211 }, { "epoch": 0.23589047295768273, "grad_norm": 0.6890393087617782, "learning_rate": 8.929441585567543e-06, "loss": 0.4145, "step": 5212 }, { "epoch": 0.2359357320660783, "grad_norm": 0.35273439977535703, "learning_rate": 8.928988328111437e-06, "loss": 0.4905, "step": 5213 }, { "epoch": 0.23598099117447385, "grad_norm": 0.6956924315211259, "learning_rate": 8.928534986233441e-06, "loss": 0.3921, "step": 5214 }, { "epoch": 0.23602625028286942, "grad_norm": 0.6630715659033836, "learning_rate": 8.928081559943293e-06, "loss": 0.3345, "step": 5215 }, { "epoch": 0.236071509391265, "grad_norm": 0.6177416326477722, "learning_rate": 8.927628049250736e-06, "loss": 0.36, "step": 5216 }, { "epoch": 0.23611676849966057, "grad_norm": 0.4239754000254461, "learning_rate": 8.927174454165518e-06, "loss": 0.4947, "step": 5217 }, { "epoch": 0.23616202760805613, "grad_norm": 0.7221304351259809, "learning_rate": 8.926720774697379e-06, "loss": 0.3808, "step": 5218 }, { "epoch": 0.2362072867164517, "grad_norm": 0.7340501642183168, "learning_rate": 8.926267010856072e-06, "loss": 0.4445, "step": 5219 }, { "epoch": 0.23625254582484725, "grad_norm": 0.6257379899081139, "learning_rate": 8.925813162651345e-06, "loss": 0.402, "step": 5220 }, { "epoch": 0.23629780493324282, "grad_norm": 0.6189493871758575, "learning_rate": 8.92535923009295e-06, "loss": 0.4159, "step": 5221 }, { "epoch": 0.23634306404163838, "grad_norm": 0.6375101034399829, "learning_rate": 8.924905213190641e-06, "loss": 0.3664, "step": 5222 }, { "epoch": 0.23638832315003394, "grad_norm": 0.659813540503722, "learning_rate": 8.924451111954173e-06, "loss": 0.4071, "step": 5223 }, { "epoch": 0.2364335822584295, "grad_norm": 0.6552180360974372, "learning_rate": 8.923996926393306e-06, "loss": 0.3749, "step": 5224 }, { "epoch": 0.23647884136682507, "grad_norm": 0.3810805951151939, "learning_rate": 8.923542656517795e-06, "loss": 0.5057, "step": 5225 }, { "epoch": 0.23652410047522063, "grad_norm": 0.6787402212616696, "learning_rate": 8.923088302337402e-06, "loss": 0.3789, "step": 5226 }, { "epoch": 0.2365693595836162, "grad_norm": 0.6387130171561558, "learning_rate": 8.922633863861891e-06, "loss": 0.4037, "step": 5227 }, { "epoch": 0.23661461869201178, "grad_norm": 0.3172792652610559, "learning_rate": 8.922179341101027e-06, "loss": 0.4942, "step": 5228 }, { "epoch": 0.23665987780040734, "grad_norm": 0.6719534269724785, "learning_rate": 8.921724734064573e-06, "loss": 0.3778, "step": 5229 }, { "epoch": 0.2367051369088029, "grad_norm": 0.7536957548197958, "learning_rate": 8.9212700427623e-06, "loss": 0.4318, "step": 5230 }, { "epoch": 0.23675039601719847, "grad_norm": 0.7039894799263119, "learning_rate": 8.920815267203977e-06, "loss": 0.3631, "step": 5231 }, { "epoch": 0.23679565512559403, "grad_norm": 0.6832250244855517, "learning_rate": 8.920360407399375e-06, "loss": 0.4254, "step": 5232 }, { "epoch": 0.2368409142339896, "grad_norm": 0.697516368031495, "learning_rate": 8.919905463358269e-06, "loss": 0.3406, "step": 5233 }, { "epoch": 0.23688617334238515, "grad_norm": 0.6938707188368235, "learning_rate": 8.919450435090433e-06, "loss": 0.4, "step": 5234 }, { "epoch": 0.23693143245078072, "grad_norm": 0.6964208971518877, "learning_rate": 8.918995322605646e-06, "loss": 0.4226, "step": 5235 }, { "epoch": 0.23697669155917628, "grad_norm": 0.41458513233775623, "learning_rate": 8.918540125913686e-06, "loss": 0.492, "step": 5236 }, { "epoch": 0.23702195066757184, "grad_norm": 0.6790128431432206, "learning_rate": 8.918084845024334e-06, "loss": 0.3861, "step": 5237 }, { "epoch": 0.2370672097759674, "grad_norm": 0.6650948718946313, "learning_rate": 8.917629479947369e-06, "loss": 0.3855, "step": 5238 }, { "epoch": 0.23711246888436296, "grad_norm": 0.32944873763671007, "learning_rate": 8.917174030692582e-06, "loss": 0.5137, "step": 5239 }, { "epoch": 0.23715772799275855, "grad_norm": 0.7527930233644158, "learning_rate": 8.916718497269755e-06, "loss": 0.4035, "step": 5240 }, { "epoch": 0.23720298710115412, "grad_norm": 0.7248777445942971, "learning_rate": 8.916262879688674e-06, "loss": 0.3802, "step": 5241 }, { "epoch": 0.23724824620954968, "grad_norm": 0.6412358156653725, "learning_rate": 8.915807177959133e-06, "loss": 0.3757, "step": 5242 }, { "epoch": 0.23729350531794524, "grad_norm": 0.387452448857917, "learning_rate": 8.915351392090925e-06, "loss": 0.4974, "step": 5243 }, { "epoch": 0.2373387644263408, "grad_norm": 0.7509110448440282, "learning_rate": 8.914895522093839e-06, "loss": 0.3416, "step": 5244 }, { "epoch": 0.23738402353473637, "grad_norm": 0.6916588684495765, "learning_rate": 8.91443956797767e-06, "loss": 0.3728, "step": 5245 }, { "epoch": 0.23742928264313193, "grad_norm": 0.6397813659500448, "learning_rate": 8.91398352975222e-06, "loss": 0.3884, "step": 5246 }, { "epoch": 0.2374745417515275, "grad_norm": 1.3669528665831592, "learning_rate": 8.913527407427282e-06, "loss": 0.3725, "step": 5247 }, { "epoch": 0.23751980085992305, "grad_norm": 0.714301416019482, "learning_rate": 8.91307120101266e-06, "loss": 0.3635, "step": 5248 }, { "epoch": 0.23756505996831861, "grad_norm": 0.6345896928698095, "learning_rate": 8.912614910518158e-06, "loss": 0.3996, "step": 5249 }, { "epoch": 0.23761031907671418, "grad_norm": 0.6514324533755185, "learning_rate": 8.912158535953576e-06, "loss": 0.3743, "step": 5250 }, { "epoch": 0.23765557818510977, "grad_norm": 0.6343997575674019, "learning_rate": 8.911702077328723e-06, "loss": 0.3851, "step": 5251 }, { "epoch": 0.23770083729350533, "grad_norm": 0.3922728341327845, "learning_rate": 8.911245534653409e-06, "loss": 0.5126, "step": 5252 }, { "epoch": 0.2377460964019009, "grad_norm": 1.0385948182811826, "learning_rate": 8.910788907937437e-06, "loss": 0.3682, "step": 5253 }, { "epoch": 0.23779135551029645, "grad_norm": 0.3969089405014628, "learning_rate": 8.910332197190623e-06, "loss": 0.5111, "step": 5254 }, { "epoch": 0.23783661461869202, "grad_norm": 0.7374498506816389, "learning_rate": 8.90987540242278e-06, "loss": 0.3867, "step": 5255 }, { "epoch": 0.23788187372708758, "grad_norm": 0.3126931159539168, "learning_rate": 8.909418523643724e-06, "loss": 0.4951, "step": 5256 }, { "epoch": 0.23792713283548314, "grad_norm": 0.6573278021852665, "learning_rate": 8.908961560863271e-06, "loss": 0.3913, "step": 5257 }, { "epoch": 0.2379723919438787, "grad_norm": 0.6754692628404956, "learning_rate": 8.908504514091239e-06, "loss": 0.4083, "step": 5258 }, { "epoch": 0.23801765105227427, "grad_norm": 0.7009135373593584, "learning_rate": 8.908047383337447e-06, "loss": 0.3772, "step": 5259 }, { "epoch": 0.23806291016066983, "grad_norm": 0.6307270990180422, "learning_rate": 8.907590168611724e-06, "loss": 0.4036, "step": 5260 }, { "epoch": 0.2381081692690654, "grad_norm": 0.3787399442226855, "learning_rate": 8.907132869923886e-06, "loss": 0.4853, "step": 5261 }, { "epoch": 0.23815342837746095, "grad_norm": 0.3359140168820819, "learning_rate": 8.906675487283764e-06, "loss": 0.4977, "step": 5262 }, { "epoch": 0.23819868748585654, "grad_norm": 0.7146020697874832, "learning_rate": 8.906218020701182e-06, "loss": 0.405, "step": 5263 }, { "epoch": 0.2382439465942521, "grad_norm": 0.6773098535047575, "learning_rate": 8.905760470185974e-06, "loss": 0.3726, "step": 5264 }, { "epoch": 0.23828920570264767, "grad_norm": 0.31021528458286485, "learning_rate": 8.90530283574797e-06, "loss": 0.4924, "step": 5265 }, { "epoch": 0.23833446481104323, "grad_norm": 0.6946956337630187, "learning_rate": 8.904845117397e-06, "loss": 0.3884, "step": 5266 }, { "epoch": 0.2383797239194388, "grad_norm": 0.6487113983731831, "learning_rate": 8.904387315142901e-06, "loss": 0.3681, "step": 5267 }, { "epoch": 0.23842498302783435, "grad_norm": 0.7148333414265486, "learning_rate": 8.903929428995512e-06, "loss": 0.3657, "step": 5268 }, { "epoch": 0.23847024213622992, "grad_norm": 0.6329638363612822, "learning_rate": 8.903471458964668e-06, "loss": 0.3237, "step": 5269 }, { "epoch": 0.23851550124462548, "grad_norm": 0.6737207547648599, "learning_rate": 8.903013405060212e-06, "loss": 0.3561, "step": 5270 }, { "epoch": 0.23856076035302104, "grad_norm": 0.7075110234059288, "learning_rate": 8.902555267291984e-06, "loss": 0.3589, "step": 5271 }, { "epoch": 0.2386060194614166, "grad_norm": 0.6371642429692355, "learning_rate": 8.90209704566983e-06, "loss": 0.3779, "step": 5272 }, { "epoch": 0.23865127856981216, "grad_norm": 0.6740952463066865, "learning_rate": 8.901638740203594e-06, "loss": 0.3782, "step": 5273 }, { "epoch": 0.23869653767820773, "grad_norm": 0.6595696256173088, "learning_rate": 8.901180350903125e-06, "loss": 0.3964, "step": 5274 }, { "epoch": 0.23874179678660332, "grad_norm": 0.6521331171437978, "learning_rate": 8.900721877778271e-06, "loss": 0.3956, "step": 5275 }, { "epoch": 0.23878705589499888, "grad_norm": 0.6925461727004567, "learning_rate": 8.900263320838886e-06, "loss": 0.357, "step": 5276 }, { "epoch": 0.23883231500339444, "grad_norm": 0.5489727830840708, "learning_rate": 8.899804680094818e-06, "loss": 0.525, "step": 5277 }, { "epoch": 0.23887757411179, "grad_norm": 0.6763946770947068, "learning_rate": 8.899345955555928e-06, "loss": 0.4357, "step": 5278 }, { "epoch": 0.23892283322018557, "grad_norm": 0.7201089468189605, "learning_rate": 8.898887147232066e-06, "loss": 0.429, "step": 5279 }, { "epoch": 0.23896809232858113, "grad_norm": 0.8181534036902893, "learning_rate": 8.898428255133098e-06, "loss": 0.3797, "step": 5280 }, { "epoch": 0.2390133514369767, "grad_norm": 0.38237795007697833, "learning_rate": 8.897969279268877e-06, "loss": 0.5104, "step": 5281 }, { "epoch": 0.23905861054537225, "grad_norm": 0.3744096064923189, "learning_rate": 8.897510219649268e-06, "loss": 0.4722, "step": 5282 }, { "epoch": 0.23910386965376781, "grad_norm": 0.6605872245547142, "learning_rate": 8.897051076284135e-06, "loss": 0.3431, "step": 5283 }, { "epoch": 0.23914912876216338, "grad_norm": 0.7269263863684868, "learning_rate": 8.896591849183343e-06, "loss": 0.3862, "step": 5284 }, { "epoch": 0.23919438787055894, "grad_norm": 0.3298538520006822, "learning_rate": 8.89613253835676e-06, "loss": 0.4864, "step": 5285 }, { "epoch": 0.2392396469789545, "grad_norm": 0.32610787321322837, "learning_rate": 8.895673143814254e-06, "loss": 0.481, "step": 5286 }, { "epoch": 0.2392849060873501, "grad_norm": 0.692626113129391, "learning_rate": 8.895213665565698e-06, "loss": 0.4213, "step": 5287 }, { "epoch": 0.23933016519574565, "grad_norm": 0.6756453499928873, "learning_rate": 8.894754103620963e-06, "loss": 0.3849, "step": 5288 }, { "epoch": 0.23937542430414122, "grad_norm": 0.6977591454885193, "learning_rate": 8.894294457989924e-06, "loss": 0.399, "step": 5289 }, { "epoch": 0.23942068341253678, "grad_norm": 0.7247753730009222, "learning_rate": 8.893834728682459e-06, "loss": 0.4267, "step": 5290 }, { "epoch": 0.23946594252093234, "grad_norm": 0.6429940559942522, "learning_rate": 8.893374915708443e-06, "loss": 0.3309, "step": 5291 }, { "epoch": 0.2395112016293279, "grad_norm": 0.6512035834490856, "learning_rate": 8.892915019077757e-06, "loss": 0.3736, "step": 5292 }, { "epoch": 0.23955646073772346, "grad_norm": 0.6223161302422051, "learning_rate": 8.892455038800286e-06, "loss": 0.3783, "step": 5293 }, { "epoch": 0.23960171984611903, "grad_norm": 0.6208666737007078, "learning_rate": 8.891994974885909e-06, "loss": 0.4299, "step": 5294 }, { "epoch": 0.2396469789545146, "grad_norm": 0.5913234393655277, "learning_rate": 8.891534827344514e-06, "loss": 0.5051, "step": 5295 }, { "epoch": 0.23969223806291015, "grad_norm": 0.6841295356040774, "learning_rate": 8.891074596185987e-06, "loss": 0.4066, "step": 5296 }, { "epoch": 0.2397374971713057, "grad_norm": 0.7106840922605392, "learning_rate": 8.890614281420218e-06, "loss": 0.4207, "step": 5297 }, { "epoch": 0.2397827562797013, "grad_norm": 0.6294873742101729, "learning_rate": 8.890153883057097e-06, "loss": 0.3652, "step": 5298 }, { "epoch": 0.23982801538809687, "grad_norm": 0.6989552091439057, "learning_rate": 8.889693401106516e-06, "loss": 0.3965, "step": 5299 }, { "epoch": 0.23987327449649243, "grad_norm": 0.6747147530686606, "learning_rate": 8.889232835578372e-06, "loss": 0.3384, "step": 5300 }, { "epoch": 0.239918533604888, "grad_norm": 0.6658941217429191, "learning_rate": 8.888772186482557e-06, "loss": 0.3815, "step": 5301 }, { "epoch": 0.23996379271328355, "grad_norm": 0.664905784458334, "learning_rate": 8.888311453828973e-06, "loss": 0.397, "step": 5302 }, { "epoch": 0.24000905182167911, "grad_norm": 0.7081115510513791, "learning_rate": 8.887850637627517e-06, "loss": 0.356, "step": 5303 }, { "epoch": 0.24005431093007468, "grad_norm": 0.7100845586062496, "learning_rate": 8.88738973788809e-06, "loss": 0.3899, "step": 5304 }, { "epoch": 0.24009957003847024, "grad_norm": 0.7060600319820818, "learning_rate": 8.8869287546206e-06, "loss": 0.4217, "step": 5305 }, { "epoch": 0.2401448291468658, "grad_norm": 0.6256192045316736, "learning_rate": 8.886467687834946e-06, "loss": 0.3582, "step": 5306 }, { "epoch": 0.24019008825526136, "grad_norm": 0.8902326378633588, "learning_rate": 8.88600653754104e-06, "loss": 0.3692, "step": 5307 }, { "epoch": 0.24023534736365693, "grad_norm": 0.6399337314616411, "learning_rate": 8.885545303748786e-06, "loss": 0.4017, "step": 5308 }, { "epoch": 0.2402806064720525, "grad_norm": 0.6870566412315947, "learning_rate": 8.8850839864681e-06, "loss": 0.3831, "step": 5309 }, { "epoch": 0.24032586558044808, "grad_norm": 0.6238546104441927, "learning_rate": 8.884622585708888e-06, "loss": 0.3703, "step": 5310 }, { "epoch": 0.24037112468884364, "grad_norm": 0.99261545468166, "learning_rate": 8.88416110148107e-06, "loss": 0.4248, "step": 5311 }, { "epoch": 0.2404163837972392, "grad_norm": 0.6688459208895308, "learning_rate": 8.883699533794558e-06, "loss": 0.4143, "step": 5312 }, { "epoch": 0.24046164290563476, "grad_norm": 0.649799512025244, "learning_rate": 8.883237882659271e-06, "loss": 0.3451, "step": 5313 }, { "epoch": 0.24050690201403033, "grad_norm": 0.6311487774639363, "learning_rate": 8.882776148085129e-06, "loss": 0.3589, "step": 5314 }, { "epoch": 0.2405521611224259, "grad_norm": 0.6287242122377914, "learning_rate": 8.882314330082051e-06, "loss": 0.3873, "step": 5315 }, { "epoch": 0.24059742023082145, "grad_norm": 0.6422213213204758, "learning_rate": 8.881852428659963e-06, "loss": 0.3887, "step": 5316 }, { "epoch": 0.240642679339217, "grad_norm": 0.7059140395496692, "learning_rate": 8.881390443828788e-06, "loss": 0.3831, "step": 5317 }, { "epoch": 0.24068793844761258, "grad_norm": 0.6526053796106218, "learning_rate": 8.880928375598453e-06, "loss": 0.4216, "step": 5318 }, { "epoch": 0.24073319755600814, "grad_norm": 0.6717680282483763, "learning_rate": 8.880466223978887e-06, "loss": 0.4068, "step": 5319 }, { "epoch": 0.2407784566644037, "grad_norm": 0.6617612520838584, "learning_rate": 8.880003988980019e-06, "loss": 0.3809, "step": 5320 }, { "epoch": 0.24082371577279926, "grad_norm": 0.6311219890329217, "learning_rate": 8.879541670611784e-06, "loss": 0.3892, "step": 5321 }, { "epoch": 0.24086897488119485, "grad_norm": 0.7371992835369472, "learning_rate": 8.879079268884113e-06, "loss": 0.369, "step": 5322 }, { "epoch": 0.24091423398959042, "grad_norm": 0.8539629154715503, "learning_rate": 8.878616783806939e-06, "loss": 0.3762, "step": 5323 }, { "epoch": 0.24095949309798598, "grad_norm": 0.6722749149455938, "learning_rate": 8.878154215390204e-06, "loss": 0.3884, "step": 5324 }, { "epoch": 0.24100475220638154, "grad_norm": 0.6719106851436162, "learning_rate": 8.877691563643848e-06, "loss": 0.3862, "step": 5325 }, { "epoch": 0.2410500113147771, "grad_norm": 0.6312868275883254, "learning_rate": 8.877228828577809e-06, "loss": 0.3606, "step": 5326 }, { "epoch": 0.24109527042317266, "grad_norm": 0.47262316317293573, "learning_rate": 8.876766010202029e-06, "loss": 0.5013, "step": 5327 }, { "epoch": 0.24114052953156823, "grad_norm": 0.7079327142025273, "learning_rate": 8.876303108526455e-06, "loss": 0.4296, "step": 5328 }, { "epoch": 0.2411857886399638, "grad_norm": 0.658648471499382, "learning_rate": 8.875840123561033e-06, "loss": 0.3946, "step": 5329 }, { "epoch": 0.24123104774835935, "grad_norm": 0.30867312660550816, "learning_rate": 8.875377055315709e-06, "loss": 0.4739, "step": 5330 }, { "epoch": 0.2412763068567549, "grad_norm": 0.6468211564058911, "learning_rate": 8.874913903800436e-06, "loss": 0.3642, "step": 5331 }, { "epoch": 0.24132156596515048, "grad_norm": 0.6775896448599275, "learning_rate": 8.874450669025161e-06, "loss": 0.415, "step": 5332 }, { "epoch": 0.24136682507354604, "grad_norm": 0.6495529703738707, "learning_rate": 8.873987350999843e-06, "loss": 0.3654, "step": 5333 }, { "epoch": 0.24141208418194163, "grad_norm": 0.4410663750016654, "learning_rate": 8.873523949734435e-06, "loss": 0.5283, "step": 5334 }, { "epoch": 0.2414573432903372, "grad_norm": 0.6581032625586402, "learning_rate": 8.873060465238894e-06, "loss": 0.3393, "step": 5335 }, { "epoch": 0.24150260239873275, "grad_norm": 0.6763820592845609, "learning_rate": 8.872596897523178e-06, "loss": 0.3877, "step": 5336 }, { "epoch": 0.24154786150712831, "grad_norm": 0.741517536605685, "learning_rate": 8.872133246597247e-06, "loss": 0.3722, "step": 5337 }, { "epoch": 0.24159312061552388, "grad_norm": 0.6843360209148616, "learning_rate": 8.871669512471068e-06, "loss": 0.3671, "step": 5338 }, { "epoch": 0.24163837972391944, "grad_norm": 0.6594839446020356, "learning_rate": 8.871205695154601e-06, "loss": 0.3787, "step": 5339 }, { "epoch": 0.241683638832315, "grad_norm": 0.37323150087019386, "learning_rate": 8.870741794657814e-06, "loss": 0.4727, "step": 5340 }, { "epoch": 0.24172889794071056, "grad_norm": 0.3654118177626498, "learning_rate": 8.870277810990671e-06, "loss": 0.5142, "step": 5341 }, { "epoch": 0.24177415704910613, "grad_norm": 0.6965027222944363, "learning_rate": 8.869813744163147e-06, "loss": 0.4065, "step": 5342 }, { "epoch": 0.2418194161575017, "grad_norm": 0.7002338027510239, "learning_rate": 8.86934959418521e-06, "loss": 0.3781, "step": 5343 }, { "epoch": 0.24186467526589725, "grad_norm": 0.6496249565713254, "learning_rate": 8.868885361066835e-06, "loss": 0.3628, "step": 5344 }, { "epoch": 0.24190993437429284, "grad_norm": 0.4174009112938387, "learning_rate": 8.868421044817994e-06, "loss": 0.4999, "step": 5345 }, { "epoch": 0.2419551934826884, "grad_norm": 0.7319192637518557, "learning_rate": 8.867956645448667e-06, "loss": 0.4531, "step": 5346 }, { "epoch": 0.24200045259108396, "grad_norm": 0.6793857881569461, "learning_rate": 8.86749216296883e-06, "loss": 0.3535, "step": 5347 }, { "epoch": 0.24204571169947953, "grad_norm": 0.3200373606682935, "learning_rate": 8.867027597388467e-06, "loss": 0.4857, "step": 5348 }, { "epoch": 0.2420909708078751, "grad_norm": 0.319293383103753, "learning_rate": 8.866562948717555e-06, "loss": 0.4753, "step": 5349 }, { "epoch": 0.24213622991627065, "grad_norm": 0.30674679923048015, "learning_rate": 8.866098216966081e-06, "loss": 0.5124, "step": 5350 }, { "epoch": 0.2421814890246662, "grad_norm": 0.30221657868269836, "learning_rate": 8.865633402144032e-06, "loss": 0.5116, "step": 5351 }, { "epoch": 0.24222674813306178, "grad_norm": 0.29530341560010703, "learning_rate": 8.865168504261392e-06, "loss": 0.4946, "step": 5352 }, { "epoch": 0.24227200724145734, "grad_norm": 0.7944404800721792, "learning_rate": 8.864703523328153e-06, "loss": 0.396, "step": 5353 }, { "epoch": 0.2423172663498529, "grad_norm": 0.33803840764758725, "learning_rate": 8.864238459354303e-06, "loss": 0.4723, "step": 5354 }, { "epoch": 0.24236252545824846, "grad_norm": 0.6518971223670713, "learning_rate": 8.863773312349838e-06, "loss": 0.3695, "step": 5355 }, { "epoch": 0.24240778456664402, "grad_norm": 0.3380621018492585, "learning_rate": 8.86330808232475e-06, "loss": 0.5039, "step": 5356 }, { "epoch": 0.24245304367503961, "grad_norm": 0.7447560157517059, "learning_rate": 8.862842769289037e-06, "loss": 0.3771, "step": 5357 }, { "epoch": 0.24249830278343518, "grad_norm": 0.7501764157418583, "learning_rate": 8.862377373252697e-06, "loss": 0.3699, "step": 5358 }, { "epoch": 0.24254356189183074, "grad_norm": 0.6265046919283168, "learning_rate": 8.86191189422573e-06, "loss": 0.4108, "step": 5359 }, { "epoch": 0.2425888210002263, "grad_norm": 0.6313737707557908, "learning_rate": 8.861446332218138e-06, "loss": 0.37, "step": 5360 }, { "epoch": 0.24263408010862186, "grad_norm": 0.7014318487147055, "learning_rate": 8.860980687239922e-06, "loss": 0.4077, "step": 5361 }, { "epoch": 0.24267933921701743, "grad_norm": 0.7094592637276969, "learning_rate": 8.86051495930109e-06, "loss": 0.3813, "step": 5362 }, { "epoch": 0.242724598325413, "grad_norm": 0.6135837069346777, "learning_rate": 8.860049148411649e-06, "loss": 0.3697, "step": 5363 }, { "epoch": 0.24276985743380855, "grad_norm": 0.6548772644362388, "learning_rate": 8.859583254581604e-06, "loss": 0.4045, "step": 5364 }, { "epoch": 0.2428151165422041, "grad_norm": 0.4681752414364971, "learning_rate": 8.859117277820972e-06, "loss": 0.5107, "step": 5365 }, { "epoch": 0.24286037565059967, "grad_norm": 0.7547141701141868, "learning_rate": 8.85865121813976e-06, "loss": 0.4012, "step": 5366 }, { "epoch": 0.24290563475899524, "grad_norm": 0.6690380482512771, "learning_rate": 8.858185075547987e-06, "loss": 0.4845, "step": 5367 }, { "epoch": 0.2429508938673908, "grad_norm": 0.6493813874743187, "learning_rate": 8.857718850055663e-06, "loss": 0.3698, "step": 5368 }, { "epoch": 0.2429961529757864, "grad_norm": 0.7556859630562037, "learning_rate": 8.857252541672812e-06, "loss": 0.429, "step": 5369 }, { "epoch": 0.24304141208418195, "grad_norm": 0.6888913998842668, "learning_rate": 8.856786150409448e-06, "loss": 0.3613, "step": 5370 }, { "epoch": 0.2430866711925775, "grad_norm": 0.648604761289055, "learning_rate": 8.856319676275595e-06, "loss": 0.3691, "step": 5371 }, { "epoch": 0.24313193030097308, "grad_norm": 0.5213434269140345, "learning_rate": 8.855853119281278e-06, "loss": 0.4927, "step": 5372 }, { "epoch": 0.24317718940936864, "grad_norm": 0.6643186693105698, "learning_rate": 8.855386479436518e-06, "loss": 0.4262, "step": 5373 }, { "epoch": 0.2432224485177642, "grad_norm": 0.6483221393724724, "learning_rate": 8.854919756751343e-06, "loss": 0.3478, "step": 5374 }, { "epoch": 0.24326770762615976, "grad_norm": 0.611895884992864, "learning_rate": 8.854452951235784e-06, "loss": 0.351, "step": 5375 }, { "epoch": 0.24331296673455532, "grad_norm": 0.6942629669349091, "learning_rate": 8.853986062899869e-06, "loss": 0.4008, "step": 5376 }, { "epoch": 0.2433582258429509, "grad_norm": 0.6256217162597607, "learning_rate": 8.853519091753629e-06, "loss": 0.3447, "step": 5377 }, { "epoch": 0.24340348495134645, "grad_norm": 0.6993569125923726, "learning_rate": 8.853052037807099e-06, "loss": 0.3695, "step": 5378 }, { "epoch": 0.243448744059742, "grad_norm": 0.6163882470276689, "learning_rate": 8.852584901070314e-06, "loss": 0.4004, "step": 5379 }, { "epoch": 0.24349400316813757, "grad_norm": 0.6280681657569366, "learning_rate": 8.852117681553312e-06, "loss": 0.3811, "step": 5380 }, { "epoch": 0.24353926227653316, "grad_norm": 0.44492594792209034, "learning_rate": 8.851650379266133e-06, "loss": 0.5072, "step": 5381 }, { "epoch": 0.24358452138492873, "grad_norm": 0.7206872723939073, "learning_rate": 8.851182994218815e-06, "loss": 0.3498, "step": 5382 }, { "epoch": 0.2436297804933243, "grad_norm": 0.6696865366923967, "learning_rate": 8.850715526421404e-06, "loss": 0.4066, "step": 5383 }, { "epoch": 0.24367503960171985, "grad_norm": 0.6626802638484196, "learning_rate": 8.850247975883942e-06, "loss": 0.3876, "step": 5384 }, { "epoch": 0.2437202987101154, "grad_norm": 0.6755263223808509, "learning_rate": 8.849780342616477e-06, "loss": 0.3734, "step": 5385 }, { "epoch": 0.24376555781851098, "grad_norm": 0.6319556903000817, "learning_rate": 8.849312626629055e-06, "loss": 0.3842, "step": 5386 }, { "epoch": 0.24381081692690654, "grad_norm": 0.6609492411166165, "learning_rate": 8.848844827931727e-06, "loss": 0.3761, "step": 5387 }, { "epoch": 0.2438560760353021, "grad_norm": 0.6241567642176671, "learning_rate": 8.848376946534545e-06, "loss": 0.3792, "step": 5388 }, { "epoch": 0.24390133514369766, "grad_norm": 0.7183087410306984, "learning_rate": 8.847908982447561e-06, "loss": 0.4036, "step": 5389 }, { "epoch": 0.24394659425209322, "grad_norm": 0.7218942848451008, "learning_rate": 8.847440935680833e-06, "loss": 0.3661, "step": 5390 }, { "epoch": 0.2439918533604888, "grad_norm": 0.6694338164281567, "learning_rate": 8.846972806244415e-06, "loss": 0.3447, "step": 5391 }, { "epoch": 0.24403711246888438, "grad_norm": 0.6908215455706206, "learning_rate": 8.846504594148366e-06, "loss": 0.3893, "step": 5392 }, { "epoch": 0.24408237157727994, "grad_norm": 0.6561908456944305, "learning_rate": 8.846036299402747e-06, "loss": 0.4021, "step": 5393 }, { "epoch": 0.2441276306856755, "grad_norm": 0.4597583708167697, "learning_rate": 8.84556792201762e-06, "loss": 0.5165, "step": 5394 }, { "epoch": 0.24417288979407106, "grad_norm": 0.6821535747642689, "learning_rate": 8.845099462003049e-06, "loss": 0.4006, "step": 5395 }, { "epoch": 0.24421814890246663, "grad_norm": 0.3192590633854796, "learning_rate": 8.844630919369099e-06, "loss": 0.4962, "step": 5396 }, { "epoch": 0.2442634080108622, "grad_norm": 0.7205417774604912, "learning_rate": 8.84416229412584e-06, "loss": 0.3918, "step": 5397 }, { "epoch": 0.24430866711925775, "grad_norm": 0.647692178832509, "learning_rate": 8.84369358628334e-06, "loss": 0.3968, "step": 5398 }, { "epoch": 0.2443539262276533, "grad_norm": 0.6842297749923265, "learning_rate": 8.843224795851668e-06, "loss": 0.4078, "step": 5399 }, { "epoch": 0.24439918533604887, "grad_norm": 0.7569759066971131, "learning_rate": 8.8427559228409e-06, "loss": 0.3973, "step": 5400 }, { "epoch": 0.24444444444444444, "grad_norm": 0.6414284504701626, "learning_rate": 8.842286967261109e-06, "loss": 0.3699, "step": 5401 }, { "epoch": 0.24448970355284, "grad_norm": 0.44428672016307075, "learning_rate": 8.841817929122373e-06, "loss": 0.4885, "step": 5402 }, { "epoch": 0.24453496266123556, "grad_norm": 0.7171617724470963, "learning_rate": 8.841348808434766e-06, "loss": 0.3672, "step": 5403 }, { "epoch": 0.24458022176963115, "grad_norm": 0.37253477989149236, "learning_rate": 8.840879605208374e-06, "loss": 0.4672, "step": 5404 }, { "epoch": 0.2446254808780267, "grad_norm": 0.7108694071388104, "learning_rate": 8.840410319453274e-06, "loss": 0.4388, "step": 5405 }, { "epoch": 0.24467073998642228, "grad_norm": 0.6828806879996595, "learning_rate": 8.839940951179552e-06, "loss": 0.3855, "step": 5406 }, { "epoch": 0.24471599909481784, "grad_norm": 0.3537449598703036, "learning_rate": 8.839471500397292e-06, "loss": 0.4777, "step": 5407 }, { "epoch": 0.2447612582032134, "grad_norm": 0.7020097644297246, "learning_rate": 8.83900196711658e-06, "loss": 0.3823, "step": 5408 }, { "epoch": 0.24480651731160896, "grad_norm": 0.6807381310828038, "learning_rate": 8.838532351347509e-06, "loss": 0.4012, "step": 5409 }, { "epoch": 0.24485177642000452, "grad_norm": 0.7073324268304318, "learning_rate": 8.838062653100165e-06, "loss": 0.4034, "step": 5410 }, { "epoch": 0.2448970355284001, "grad_norm": 0.7850841150864362, "learning_rate": 8.837592872384643e-06, "loss": 0.3713, "step": 5411 }, { "epoch": 0.24494229463679565, "grad_norm": 0.6864920531829904, "learning_rate": 8.837123009211038e-06, "loss": 0.4047, "step": 5412 }, { "epoch": 0.2449875537451912, "grad_norm": 0.48113930173789576, "learning_rate": 8.836653063589443e-06, "loss": 0.4991, "step": 5413 }, { "epoch": 0.24503281285358677, "grad_norm": 0.4411436163117178, "learning_rate": 8.836183035529954e-06, "loss": 0.4979, "step": 5414 }, { "epoch": 0.24507807196198234, "grad_norm": 0.7466742119661637, "learning_rate": 8.835712925042678e-06, "loss": 0.3974, "step": 5415 }, { "epoch": 0.24512333107037793, "grad_norm": 0.7179041559362122, "learning_rate": 8.83524273213771e-06, "loss": 0.3743, "step": 5416 }, { "epoch": 0.2451685901787735, "grad_norm": 0.6572146908802492, "learning_rate": 8.834772456825155e-06, "loss": 0.4023, "step": 5417 }, { "epoch": 0.24521384928716905, "grad_norm": 0.5038071350955399, "learning_rate": 8.834302099115118e-06, "loss": 0.5026, "step": 5418 }, { "epoch": 0.2452591083955646, "grad_norm": 0.8540148243050669, "learning_rate": 8.833831659017703e-06, "loss": 0.3645, "step": 5419 }, { "epoch": 0.24530436750396017, "grad_norm": 0.6457518436451026, "learning_rate": 8.833361136543021e-06, "loss": 0.3766, "step": 5420 }, { "epoch": 0.24534962661235574, "grad_norm": 0.7338945386325528, "learning_rate": 8.832890531701184e-06, "loss": 0.4062, "step": 5421 }, { "epoch": 0.2453948857207513, "grad_norm": 0.7518020196047538, "learning_rate": 8.832419844502298e-06, "loss": 0.4127, "step": 5422 }, { "epoch": 0.24544014482914686, "grad_norm": 0.6440486004965358, "learning_rate": 8.831949074956483e-06, "loss": 0.3488, "step": 5423 }, { "epoch": 0.24548540393754242, "grad_norm": 0.3900943962261443, "learning_rate": 8.831478223073848e-06, "loss": 0.4871, "step": 5424 }, { "epoch": 0.24553066304593799, "grad_norm": 0.36365756981959374, "learning_rate": 8.831007288864517e-06, "loss": 0.4751, "step": 5425 }, { "epoch": 0.24557592215433355, "grad_norm": 0.9342732459976021, "learning_rate": 8.830536272338602e-06, "loss": 0.3965, "step": 5426 }, { "epoch": 0.24562118126272914, "grad_norm": 0.7180496226403557, "learning_rate": 8.830065173506229e-06, "loss": 0.4072, "step": 5427 }, { "epoch": 0.2456664403711247, "grad_norm": 0.6947647126181522, "learning_rate": 8.829593992377518e-06, "loss": 0.3607, "step": 5428 }, { "epoch": 0.24571169947952026, "grad_norm": 0.4581266564149199, "learning_rate": 8.829122728962594e-06, "loss": 0.4792, "step": 5429 }, { "epoch": 0.24575695858791582, "grad_norm": 0.837846180542243, "learning_rate": 8.828651383271582e-06, "loss": 0.4022, "step": 5430 }, { "epoch": 0.2458022176963114, "grad_norm": 0.8475559381295793, "learning_rate": 8.828179955314612e-06, "loss": 0.3761, "step": 5431 }, { "epoch": 0.24584747680470695, "grad_norm": 0.679435079467592, "learning_rate": 8.827708445101813e-06, "loss": 0.3274, "step": 5432 }, { "epoch": 0.2458927359131025, "grad_norm": 0.7471578999986938, "learning_rate": 8.827236852643313e-06, "loss": 0.3658, "step": 5433 }, { "epoch": 0.24593799502149807, "grad_norm": 0.7937943646003452, "learning_rate": 8.826765177949248e-06, "loss": 0.3832, "step": 5434 }, { "epoch": 0.24598325412989364, "grad_norm": 0.7468722171561877, "learning_rate": 8.826293421029754e-06, "loss": 0.3874, "step": 5435 }, { "epoch": 0.2460285132382892, "grad_norm": 0.6400369840427028, "learning_rate": 8.825821581894964e-06, "loss": 0.4022, "step": 5436 }, { "epoch": 0.24607377234668476, "grad_norm": 0.40295831861440534, "learning_rate": 8.82534966055502e-06, "loss": 0.4951, "step": 5437 }, { "epoch": 0.24611903145508032, "grad_norm": 0.401916187918165, "learning_rate": 8.824877657020058e-06, "loss": 0.4945, "step": 5438 }, { "epoch": 0.2461642905634759, "grad_norm": 0.7838023003559151, "learning_rate": 8.824405571300225e-06, "loss": 0.3804, "step": 5439 }, { "epoch": 0.24620954967187147, "grad_norm": 0.9976284832408775, "learning_rate": 8.82393340340566e-06, "loss": 0.3876, "step": 5440 }, { "epoch": 0.24625480878026704, "grad_norm": 0.6993269738534612, "learning_rate": 8.823461153346512e-06, "loss": 0.3865, "step": 5441 }, { "epoch": 0.2463000678886626, "grad_norm": 0.6974540450082418, "learning_rate": 8.822988821132925e-06, "loss": 0.3761, "step": 5442 }, { "epoch": 0.24634532699705816, "grad_norm": 0.6574267966627634, "learning_rate": 8.822516406775051e-06, "loss": 0.3712, "step": 5443 }, { "epoch": 0.24639058610545372, "grad_norm": 0.7192051932707151, "learning_rate": 8.822043910283041e-06, "loss": 0.3694, "step": 5444 }, { "epoch": 0.24643584521384929, "grad_norm": 0.6941915904680251, "learning_rate": 8.821571331667043e-06, "loss": 0.356, "step": 5445 }, { "epoch": 0.24648110432224485, "grad_norm": 0.6784074255903936, "learning_rate": 8.821098670937215e-06, "loss": 0.4146, "step": 5446 }, { "epoch": 0.2465263634306404, "grad_norm": 0.6372613000327357, "learning_rate": 8.820625928103712e-06, "loss": 0.3657, "step": 5447 }, { "epoch": 0.24657162253903597, "grad_norm": 0.565621513237359, "learning_rate": 8.820153103176692e-06, "loss": 0.4783, "step": 5448 }, { "epoch": 0.24661688164743154, "grad_norm": 0.7421946650728314, "learning_rate": 8.819680196166315e-06, "loss": 0.405, "step": 5449 }, { "epoch": 0.2466621407558271, "grad_norm": 0.7230679706345529, "learning_rate": 8.819207207082741e-06, "loss": 0.3889, "step": 5450 }, { "epoch": 0.2467073998642227, "grad_norm": 0.6871674592758005, "learning_rate": 8.818734135936136e-06, "loss": 0.3559, "step": 5451 }, { "epoch": 0.24675265897261825, "grad_norm": 0.704175915763451, "learning_rate": 8.818260982736662e-06, "loss": 0.389, "step": 5452 }, { "epoch": 0.2467979180810138, "grad_norm": 0.6566117862980755, "learning_rate": 8.817787747494484e-06, "loss": 0.4332, "step": 5453 }, { "epoch": 0.24684317718940937, "grad_norm": 1.9586366774313648, "learning_rate": 8.817314430219775e-06, "loss": 0.3669, "step": 5454 }, { "epoch": 0.24688843629780494, "grad_norm": 0.6883090670651413, "learning_rate": 8.816841030922702e-06, "loss": 0.3951, "step": 5455 }, { "epoch": 0.2469336954062005, "grad_norm": 0.6684753360093686, "learning_rate": 8.816367549613439e-06, "loss": 0.3708, "step": 5456 }, { "epoch": 0.24697895451459606, "grad_norm": 0.6699780649262084, "learning_rate": 8.815893986302158e-06, "loss": 0.36, "step": 5457 }, { "epoch": 0.24702421362299162, "grad_norm": 0.6751583780687819, "learning_rate": 8.815420340999034e-06, "loss": 0.3692, "step": 5458 }, { "epoch": 0.24706947273138719, "grad_norm": 0.8641158746125323, "learning_rate": 8.814946613714244e-06, "loss": 0.3599, "step": 5459 }, { "epoch": 0.24711473183978275, "grad_norm": 0.6746092328932488, "learning_rate": 8.81447280445797e-06, "loss": 0.428, "step": 5460 }, { "epoch": 0.2471599909481783, "grad_norm": 0.8619884401255935, "learning_rate": 8.81399891324039e-06, "loss": 0.3568, "step": 5461 }, { "epoch": 0.24720525005657387, "grad_norm": 0.5537299656020676, "learning_rate": 8.813524940071687e-06, "loss": 0.5229, "step": 5462 }, { "epoch": 0.24725050916496946, "grad_norm": 0.7466826870291752, "learning_rate": 8.813050884962046e-06, "loss": 0.3674, "step": 5463 }, { "epoch": 0.24729576827336502, "grad_norm": 0.32719231858791137, "learning_rate": 8.812576747921653e-06, "loss": 0.4921, "step": 5464 }, { "epoch": 0.2473410273817606, "grad_norm": 0.7554589756926923, "learning_rate": 8.812102528960693e-06, "loss": 0.3852, "step": 5465 }, { "epoch": 0.24738628649015615, "grad_norm": 0.7777119738955086, "learning_rate": 8.81162822808936e-06, "loss": 0.4032, "step": 5466 }, { "epoch": 0.2474315455985517, "grad_norm": 0.5932440156262864, "learning_rate": 8.811153845317842e-06, "loss": 0.3426, "step": 5467 }, { "epoch": 0.24747680470694727, "grad_norm": 0.7747935373734771, "learning_rate": 8.810679380656331e-06, "loss": 0.3747, "step": 5468 }, { "epoch": 0.24752206381534284, "grad_norm": 0.834579211230089, "learning_rate": 8.810204834115026e-06, "loss": 0.3553, "step": 5469 }, { "epoch": 0.2475673229237384, "grad_norm": 0.7751947065367353, "learning_rate": 8.80973020570412e-06, "loss": 0.3669, "step": 5470 }, { "epoch": 0.24761258203213396, "grad_norm": 0.6760212197334325, "learning_rate": 8.809255495433814e-06, "loss": 0.3715, "step": 5471 }, { "epoch": 0.24765784114052952, "grad_norm": 0.7701164610513445, "learning_rate": 8.808780703314305e-06, "loss": 0.4749, "step": 5472 }, { "epoch": 0.24770310024892508, "grad_norm": 0.7626850010355772, "learning_rate": 8.808305829355797e-06, "loss": 0.3791, "step": 5473 }, { "epoch": 0.24774835935732067, "grad_norm": 0.7405751108830647, "learning_rate": 8.807830873568493e-06, "loss": 0.4131, "step": 5474 }, { "epoch": 0.24779361846571624, "grad_norm": 0.39037150438658097, "learning_rate": 8.8073558359626e-06, "loss": 0.4998, "step": 5475 }, { "epoch": 0.2478388775741118, "grad_norm": 0.6539788911826464, "learning_rate": 8.806880716548322e-06, "loss": 0.4044, "step": 5476 }, { "epoch": 0.24788413668250736, "grad_norm": 0.6535225496446154, "learning_rate": 8.80640551533587e-06, "loss": 0.3887, "step": 5477 }, { "epoch": 0.24792939579090292, "grad_norm": 0.4697297560970744, "learning_rate": 8.805930232335454e-06, "loss": 0.5045, "step": 5478 }, { "epoch": 0.24797465489929849, "grad_norm": 0.7089333794869764, "learning_rate": 8.805454867557284e-06, "loss": 0.3496, "step": 5479 }, { "epoch": 0.24801991400769405, "grad_norm": 0.6986426965189364, "learning_rate": 8.804979421011579e-06, "loss": 0.4134, "step": 5480 }, { "epoch": 0.2480651731160896, "grad_norm": 0.6432027825709875, "learning_rate": 8.804503892708552e-06, "loss": 0.3891, "step": 5481 }, { "epoch": 0.24811043222448517, "grad_norm": 0.6516825549121646, "learning_rate": 8.80402828265842e-06, "loss": 0.3936, "step": 5482 }, { "epoch": 0.24815569133288073, "grad_norm": 0.6905744064567494, "learning_rate": 8.803552590871406e-06, "loss": 0.3776, "step": 5483 }, { "epoch": 0.2482009504412763, "grad_norm": 0.770332771693627, "learning_rate": 8.803076817357725e-06, "loss": 0.4089, "step": 5484 }, { "epoch": 0.24824620954967186, "grad_norm": 0.7445899391534694, "learning_rate": 8.802600962127606e-06, "loss": 0.4047, "step": 5485 }, { "epoch": 0.24829146865806745, "grad_norm": 0.610507084099802, "learning_rate": 8.802125025191268e-06, "loss": 0.4052, "step": 5486 }, { "epoch": 0.248336727766463, "grad_norm": 0.6567358608756095, "learning_rate": 8.801649006558943e-06, "loss": 0.4266, "step": 5487 }, { "epoch": 0.24838198687485857, "grad_norm": 0.6596038921834942, "learning_rate": 8.801172906240857e-06, "loss": 0.4, "step": 5488 }, { "epoch": 0.24842724598325414, "grad_norm": 0.590396728859398, "learning_rate": 8.800696724247239e-06, "loss": 0.3684, "step": 5489 }, { "epoch": 0.2484725050916497, "grad_norm": 0.6386192813750432, "learning_rate": 8.800220460588321e-06, "loss": 0.3714, "step": 5490 }, { "epoch": 0.24851776420004526, "grad_norm": 0.6997118465528872, "learning_rate": 8.799744115274339e-06, "loss": 0.4147, "step": 5491 }, { "epoch": 0.24856302330844082, "grad_norm": 0.6565255682296418, "learning_rate": 8.799267688315523e-06, "loss": 0.3748, "step": 5492 }, { "epoch": 0.24860828241683638, "grad_norm": 0.6225869450633494, "learning_rate": 8.798791179722114e-06, "loss": 0.3724, "step": 5493 }, { "epoch": 0.24865354152523195, "grad_norm": 0.7199023735140465, "learning_rate": 8.798314589504348e-06, "loss": 0.3847, "step": 5494 }, { "epoch": 0.2486988006336275, "grad_norm": 0.6212669132572823, "learning_rate": 8.79783791767247e-06, "loss": 0.5017, "step": 5495 }, { "epoch": 0.24874405974202307, "grad_norm": 0.6840190455449334, "learning_rate": 8.797361164236717e-06, "loss": 0.3892, "step": 5496 }, { "epoch": 0.24878931885041863, "grad_norm": 0.673935866571196, "learning_rate": 8.796884329207337e-06, "loss": 0.3901, "step": 5497 }, { "epoch": 0.24883457795881422, "grad_norm": 0.6989693763930492, "learning_rate": 8.796407412594573e-06, "loss": 0.3867, "step": 5498 }, { "epoch": 0.24887983706720979, "grad_norm": 0.3414127366149348, "learning_rate": 8.795930414408676e-06, "loss": 0.4909, "step": 5499 }, { "epoch": 0.24892509617560535, "grad_norm": 0.3350419170199567, "learning_rate": 8.795453334659889e-06, "loss": 0.4988, "step": 5500 }, { "epoch": 0.2489703552840009, "grad_norm": 0.7526309166356293, "learning_rate": 8.79497617335847e-06, "loss": 0.3886, "step": 5501 }, { "epoch": 0.24901561439239647, "grad_norm": 0.6392150511382405, "learning_rate": 8.794498930514666e-06, "loss": 0.3453, "step": 5502 }, { "epoch": 0.24906087350079203, "grad_norm": 0.6385867732761876, "learning_rate": 8.794021606138734e-06, "loss": 0.3432, "step": 5503 }, { "epoch": 0.2491061326091876, "grad_norm": 0.6785794290773549, "learning_rate": 8.793544200240932e-06, "loss": 0.3906, "step": 5504 }, { "epoch": 0.24915139171758316, "grad_norm": 0.6534436391608166, "learning_rate": 8.793066712831515e-06, "loss": 0.3868, "step": 5505 }, { "epoch": 0.24919665082597872, "grad_norm": 0.6416942590392011, "learning_rate": 8.792589143920743e-06, "loss": 0.3912, "step": 5506 }, { "epoch": 0.24924190993437428, "grad_norm": 0.6457455294679602, "learning_rate": 8.792111493518878e-06, "loss": 0.3795, "step": 5507 }, { "epoch": 0.24928716904276985, "grad_norm": 0.6324241805057175, "learning_rate": 8.791633761636186e-06, "loss": 0.3617, "step": 5508 }, { "epoch": 0.2493324281511654, "grad_norm": 0.7451581373182293, "learning_rate": 8.791155948282927e-06, "loss": 0.4277, "step": 5509 }, { "epoch": 0.249377687259561, "grad_norm": 0.6652436681096543, "learning_rate": 8.790678053469372e-06, "loss": 0.4655, "step": 5510 }, { "epoch": 0.24942294636795656, "grad_norm": 0.6862925638345763, "learning_rate": 8.790200077205789e-06, "loss": 0.4131, "step": 5511 }, { "epoch": 0.24946820547635212, "grad_norm": 0.6767907349063768, "learning_rate": 8.789722019502444e-06, "loss": 0.415, "step": 5512 }, { "epoch": 0.24951346458474769, "grad_norm": 0.3808039754062271, "learning_rate": 8.789243880369613e-06, "loss": 0.4734, "step": 5513 }, { "epoch": 0.24955872369314325, "grad_norm": 0.6477071323204457, "learning_rate": 8.78876565981757e-06, "loss": 0.3865, "step": 5514 }, { "epoch": 0.2496039828015388, "grad_norm": 0.7227539653597835, "learning_rate": 8.788287357856588e-06, "loss": 0.3678, "step": 5515 }, { "epoch": 0.24964924190993437, "grad_norm": 0.3728619700056104, "learning_rate": 8.787808974496946e-06, "loss": 0.5286, "step": 5516 }, { "epoch": 0.24969450101832993, "grad_norm": 0.8103874044499024, "learning_rate": 8.787330509748924e-06, "loss": 0.3696, "step": 5517 }, { "epoch": 0.2497397601267255, "grad_norm": 0.6919952359581814, "learning_rate": 8.786851963622799e-06, "loss": 0.4202, "step": 5518 }, { "epoch": 0.24978501923512106, "grad_norm": 0.6528924734294044, "learning_rate": 8.786373336128858e-06, "loss": 0.399, "step": 5519 }, { "epoch": 0.24983027834351662, "grad_norm": 0.7227289627808617, "learning_rate": 8.78589462727738e-06, "loss": 0.3805, "step": 5520 }, { "epoch": 0.2498755374519122, "grad_norm": 0.6116874693101342, "learning_rate": 8.785415837078655e-06, "loss": 0.3814, "step": 5521 }, { "epoch": 0.24992079656030777, "grad_norm": 0.47326084506519833, "learning_rate": 8.78493696554297e-06, "loss": 0.5113, "step": 5522 }, { "epoch": 0.24996605566870334, "grad_norm": 0.6477654979674229, "learning_rate": 8.784458012680614e-06, "loss": 0.3818, "step": 5523 }, { "epoch": 0.25001131477709887, "grad_norm": 0.6210811401562978, "learning_rate": 8.783978978501879e-06, "loss": 0.379, "step": 5524 }, { "epoch": 0.25005657388549446, "grad_norm": 0.6830342901861352, "learning_rate": 8.783499863017057e-06, "loss": 0.3561, "step": 5525 }, { "epoch": 0.25010183299389, "grad_norm": 0.6731567670022601, "learning_rate": 8.783020666236443e-06, "loss": 0.4218, "step": 5526 }, { "epoch": 0.2501470921022856, "grad_norm": 0.65807887584648, "learning_rate": 8.782541388170334e-06, "loss": 0.3543, "step": 5527 }, { "epoch": 0.2501923512106812, "grad_norm": 0.8695506353899523, "learning_rate": 8.782062028829028e-06, "loss": 0.3845, "step": 5528 }, { "epoch": 0.2502376103190767, "grad_norm": 0.6392487323412227, "learning_rate": 8.781582588222823e-06, "loss": 0.3622, "step": 5529 }, { "epoch": 0.2502828694274723, "grad_norm": 0.5861804047598004, "learning_rate": 8.781103066362024e-06, "loss": 0.3361, "step": 5530 }, { "epoch": 0.25032812853586783, "grad_norm": 0.6529690974582265, "learning_rate": 8.780623463256932e-06, "loss": 0.353, "step": 5531 }, { "epoch": 0.2503733876442634, "grad_norm": 0.6388205223624392, "learning_rate": 8.780143778917853e-06, "loss": 0.3629, "step": 5532 }, { "epoch": 0.25041864675265896, "grad_norm": 0.5135197990140405, "learning_rate": 8.779664013355095e-06, "loss": 0.5079, "step": 5533 }, { "epoch": 0.25046390586105455, "grad_norm": 0.6759508366790495, "learning_rate": 8.779184166578965e-06, "loss": 0.3658, "step": 5534 }, { "epoch": 0.2505091649694501, "grad_norm": 0.67004678301159, "learning_rate": 8.778704238599775e-06, "loss": 0.3724, "step": 5535 }, { "epoch": 0.25055442407784567, "grad_norm": 0.7186727224626106, "learning_rate": 8.778224229427836e-06, "loss": 0.3948, "step": 5536 }, { "epoch": 0.2505996831862412, "grad_norm": 0.6462913809859987, "learning_rate": 8.777744139073461e-06, "loss": 0.397, "step": 5537 }, { "epoch": 0.2506449422946368, "grad_norm": 0.6317619249321907, "learning_rate": 8.777263967546969e-06, "loss": 0.4191, "step": 5538 }, { "epoch": 0.2506902014030324, "grad_norm": 0.6514663160067772, "learning_rate": 8.776783714858672e-06, "loss": 0.3998, "step": 5539 }, { "epoch": 0.2507354605114279, "grad_norm": 0.6606566810098499, "learning_rate": 8.776303381018895e-06, "loss": 0.3773, "step": 5540 }, { "epoch": 0.2507807196198235, "grad_norm": 0.696273850276573, "learning_rate": 8.775822966037956e-06, "loss": 0.3711, "step": 5541 }, { "epoch": 0.25082597872821905, "grad_norm": 0.6246857391708531, "learning_rate": 8.775342469926178e-06, "loss": 0.3843, "step": 5542 }, { "epoch": 0.25087123783661464, "grad_norm": 0.6057970609141705, "learning_rate": 8.774861892693886e-06, "loss": 0.3556, "step": 5543 }, { "epoch": 0.25091649694501017, "grad_norm": 0.6359022529488361, "learning_rate": 8.774381234351406e-06, "loss": 0.3758, "step": 5544 }, { "epoch": 0.25096175605340576, "grad_norm": 0.6749517080642095, "learning_rate": 8.773900494909065e-06, "loss": 0.3607, "step": 5545 }, { "epoch": 0.2510070151618013, "grad_norm": 0.45212094632264954, "learning_rate": 8.77341967437719e-06, "loss": 0.5107, "step": 5546 }, { "epoch": 0.2510522742701969, "grad_norm": 0.6890815621422842, "learning_rate": 8.77293877276612e-06, "loss": 0.4256, "step": 5547 }, { "epoch": 0.2510975333785924, "grad_norm": 0.6796933014357068, "learning_rate": 8.77245779008618e-06, "loss": 0.3561, "step": 5548 }, { "epoch": 0.251142792486988, "grad_norm": 0.7136438532760775, "learning_rate": 8.77197672634771e-06, "loss": 0.3897, "step": 5549 }, { "epoch": 0.25118805159538354, "grad_norm": 0.6531877591429376, "learning_rate": 8.771495581561043e-06, "loss": 0.4194, "step": 5550 }, { "epoch": 0.25123331070377913, "grad_norm": 0.3472284463790447, "learning_rate": 8.77101435573652e-06, "loss": 0.5091, "step": 5551 }, { "epoch": 0.2512785698121747, "grad_norm": 0.6351845727041716, "learning_rate": 8.770533048884483e-06, "loss": 0.3905, "step": 5552 }, { "epoch": 0.25132382892057026, "grad_norm": 0.6696146617160268, "learning_rate": 8.77005166101527e-06, "loss": 0.439, "step": 5553 }, { "epoch": 0.25136908802896585, "grad_norm": 0.6208498844865883, "learning_rate": 8.769570192139224e-06, "loss": 0.3722, "step": 5554 }, { "epoch": 0.2514143471373614, "grad_norm": 0.7578644508445018, "learning_rate": 8.76908864226669e-06, "loss": 0.3978, "step": 5555 }, { "epoch": 0.251459606245757, "grad_norm": 0.6320177265238178, "learning_rate": 8.768607011408021e-06, "loss": 0.3843, "step": 5556 }, { "epoch": 0.2515048653541525, "grad_norm": 0.5742711543775584, "learning_rate": 8.76812529957356e-06, "loss": 0.3584, "step": 5557 }, { "epoch": 0.2515501244625481, "grad_norm": 0.6223855477017578, "learning_rate": 8.76764350677366e-06, "loss": 0.3568, "step": 5558 }, { "epoch": 0.25159538357094363, "grad_norm": 0.5993825762871401, "learning_rate": 8.76716163301867e-06, "loss": 0.3652, "step": 5559 }, { "epoch": 0.2516406426793392, "grad_norm": 0.8458811737927945, "learning_rate": 8.76667967831895e-06, "loss": 0.3735, "step": 5560 }, { "epoch": 0.25168590178773476, "grad_norm": 0.40384503794198034, "learning_rate": 8.76619764268485e-06, "loss": 0.492, "step": 5561 }, { "epoch": 0.25173116089613035, "grad_norm": 0.6648333028328689, "learning_rate": 8.76571552612673e-06, "loss": 0.4102, "step": 5562 }, { "epoch": 0.25177642000452594, "grad_norm": 0.6475418274382717, "learning_rate": 8.765233328654949e-06, "loss": 0.3926, "step": 5563 }, { "epoch": 0.25182167911292147, "grad_norm": 0.6015760283334478, "learning_rate": 8.764751050279868e-06, "loss": 0.3489, "step": 5564 }, { "epoch": 0.25186693822131706, "grad_norm": 0.6921602856411545, "learning_rate": 8.764268691011851e-06, "loss": 0.4221, "step": 5565 }, { "epoch": 0.2519121973297126, "grad_norm": 0.7213082623671482, "learning_rate": 8.763786250861258e-06, "loss": 0.4028, "step": 5566 }, { "epoch": 0.2519574564381082, "grad_norm": 0.6666064268052584, "learning_rate": 8.76330372983846e-06, "loss": 0.3781, "step": 5567 }, { "epoch": 0.2520027155465037, "grad_norm": 0.6081436800667175, "learning_rate": 8.762821127953821e-06, "loss": 0.356, "step": 5568 }, { "epoch": 0.2520479746548993, "grad_norm": 0.6309678293578441, "learning_rate": 8.762338445217713e-06, "loss": 0.3625, "step": 5569 }, { "epoch": 0.25209323376329484, "grad_norm": 0.38533445695115726, "learning_rate": 8.761855681640508e-06, "loss": 0.4888, "step": 5570 }, { "epoch": 0.25213849287169043, "grad_norm": 0.6644202585973386, "learning_rate": 8.761372837232578e-06, "loss": 0.3547, "step": 5571 }, { "epoch": 0.25218375198008597, "grad_norm": 0.6490414722219586, "learning_rate": 8.760889912004297e-06, "loss": 0.3561, "step": 5572 }, { "epoch": 0.25222901108848156, "grad_norm": 0.721093897991297, "learning_rate": 8.760406905966045e-06, "loss": 0.3941, "step": 5573 }, { "epoch": 0.25227427019687715, "grad_norm": 0.7217691975495019, "learning_rate": 8.759923819128196e-06, "loss": 0.3836, "step": 5574 }, { "epoch": 0.2523195293052727, "grad_norm": 0.3325969229718621, "learning_rate": 8.759440651501131e-06, "loss": 0.5095, "step": 5575 }, { "epoch": 0.2523647884136683, "grad_norm": 0.7479140074598437, "learning_rate": 8.758957403095234e-06, "loss": 0.4595, "step": 5576 }, { "epoch": 0.2524100475220638, "grad_norm": 0.7376226797049706, "learning_rate": 8.758474073920887e-06, "loss": 0.3628, "step": 5577 }, { "epoch": 0.2524553066304594, "grad_norm": 0.32568130987292926, "learning_rate": 8.757990663988474e-06, "loss": 0.5044, "step": 5578 }, { "epoch": 0.25250056573885493, "grad_norm": 0.7091443937278222, "learning_rate": 8.757507173308385e-06, "loss": 0.3442, "step": 5579 }, { "epoch": 0.2525458248472505, "grad_norm": 0.6641702917856819, "learning_rate": 8.757023601891006e-06, "loss": 0.3829, "step": 5580 }, { "epoch": 0.25259108395564606, "grad_norm": 0.7033983142051135, "learning_rate": 8.756539949746729e-06, "loss": 0.3979, "step": 5581 }, { "epoch": 0.25263634306404165, "grad_norm": 0.32804607849724715, "learning_rate": 8.756056216885946e-06, "loss": 0.5033, "step": 5582 }, { "epoch": 0.2526816021724372, "grad_norm": 0.7125925445274838, "learning_rate": 8.755572403319052e-06, "loss": 0.3892, "step": 5583 }, { "epoch": 0.25272686128083277, "grad_norm": 0.6783287676002182, "learning_rate": 8.75508850905644e-06, "loss": 0.3807, "step": 5584 }, { "epoch": 0.2527721203892283, "grad_norm": 0.6702317252289326, "learning_rate": 8.754604534108509e-06, "loss": 0.3759, "step": 5585 }, { "epoch": 0.2528173794976239, "grad_norm": 0.6867608478804084, "learning_rate": 8.754120478485659e-06, "loss": 0.4367, "step": 5586 }, { "epoch": 0.2528626386060195, "grad_norm": 0.6694843817164187, "learning_rate": 8.753636342198289e-06, "loss": 0.3638, "step": 5587 }, { "epoch": 0.252907897714415, "grad_norm": 0.6358098373776894, "learning_rate": 8.753152125256801e-06, "loss": 0.3795, "step": 5588 }, { "epoch": 0.2529531568228106, "grad_norm": 0.6631921437034257, "learning_rate": 8.752667827671602e-06, "loss": 0.3908, "step": 5589 }, { "epoch": 0.25299841593120614, "grad_norm": 0.3710637208753156, "learning_rate": 8.752183449453098e-06, "loss": 0.4857, "step": 5590 }, { "epoch": 0.25304367503960173, "grad_norm": 0.7082325129752405, "learning_rate": 8.751698990611694e-06, "loss": 0.4029, "step": 5591 }, { "epoch": 0.25308893414799727, "grad_norm": 0.7012540179855176, "learning_rate": 8.751214451157802e-06, "loss": 0.3426, "step": 5592 }, { "epoch": 0.25313419325639286, "grad_norm": 0.30426007358344753, "learning_rate": 8.750729831101831e-06, "loss": 0.4895, "step": 5593 }, { "epoch": 0.2531794523647884, "grad_norm": 0.613238452659256, "learning_rate": 8.750245130454197e-06, "loss": 0.3505, "step": 5594 }, { "epoch": 0.253224711473184, "grad_norm": 0.7036978272830566, "learning_rate": 8.749760349225312e-06, "loss": 0.4148, "step": 5595 }, { "epoch": 0.2532699705815795, "grad_norm": 0.34027021169803484, "learning_rate": 8.749275487425595e-06, "loss": 0.5372, "step": 5596 }, { "epoch": 0.2533152296899751, "grad_norm": 0.6430519959314669, "learning_rate": 8.748790545065462e-06, "loss": 0.3648, "step": 5597 }, { "epoch": 0.2533604887983707, "grad_norm": 0.6481716182969333, "learning_rate": 8.748305522155333e-06, "loss": 0.3924, "step": 5598 }, { "epoch": 0.25340574790676623, "grad_norm": 0.7850438631381669, "learning_rate": 8.747820418705632e-06, "loss": 0.3477, "step": 5599 }, { "epoch": 0.2534510070151618, "grad_norm": 0.6475872459409273, "learning_rate": 8.74733523472678e-06, "loss": 0.3393, "step": 5600 }, { "epoch": 0.25349626612355736, "grad_norm": 0.6892199701583623, "learning_rate": 8.746849970229202e-06, "loss": 0.3912, "step": 5601 }, { "epoch": 0.25354152523195295, "grad_norm": 0.652841793709212, "learning_rate": 8.746364625223326e-06, "loss": 0.3977, "step": 5602 }, { "epoch": 0.2535867843403485, "grad_norm": 0.7133437435431227, "learning_rate": 8.74587919971958e-06, "loss": 0.4435, "step": 5603 }, { "epoch": 0.25363204344874407, "grad_norm": 0.6371564505576985, "learning_rate": 8.745393693728395e-06, "loss": 0.3763, "step": 5604 }, { "epoch": 0.2536773025571396, "grad_norm": 0.5867002877282491, "learning_rate": 8.744908107260204e-06, "loss": 0.3753, "step": 5605 }, { "epoch": 0.2537225616655352, "grad_norm": 0.6591111743754317, "learning_rate": 8.744422440325437e-06, "loss": 0.3785, "step": 5606 }, { "epoch": 0.25376782077393073, "grad_norm": 0.702145490185904, "learning_rate": 8.743936692934533e-06, "loss": 0.4149, "step": 5607 }, { "epoch": 0.2538130798823263, "grad_norm": 0.3819564845362773, "learning_rate": 8.743450865097929e-06, "loss": 0.5093, "step": 5608 }, { "epoch": 0.2538583389907219, "grad_norm": 0.6355224732404193, "learning_rate": 8.742964956826063e-06, "loss": 0.3885, "step": 5609 }, { "epoch": 0.25390359809911744, "grad_norm": 0.3119610838803885, "learning_rate": 8.742478968129375e-06, "loss": 0.4812, "step": 5610 }, { "epoch": 0.25394885720751303, "grad_norm": 0.6582709352358617, "learning_rate": 8.741992899018307e-06, "loss": 0.3808, "step": 5611 }, { "epoch": 0.25399411631590857, "grad_norm": 0.3003522544833689, "learning_rate": 8.741506749503306e-06, "loss": 0.4791, "step": 5612 }, { "epoch": 0.25403937542430416, "grad_norm": 0.6558557492776759, "learning_rate": 8.741020519594816e-06, "loss": 0.4024, "step": 5613 }, { "epoch": 0.2540846345326997, "grad_norm": 0.37483177884061825, "learning_rate": 8.740534209303285e-06, "loss": 0.5212, "step": 5614 }, { "epoch": 0.2541298936410953, "grad_norm": 0.3089870798161669, "learning_rate": 8.74004781863916e-06, "loss": 0.5019, "step": 5615 }, { "epoch": 0.2541751527494908, "grad_norm": 0.6220929272548339, "learning_rate": 8.739561347612894e-06, "loss": 0.3541, "step": 5616 }, { "epoch": 0.2542204118578864, "grad_norm": 0.6973460496890179, "learning_rate": 8.739074796234943e-06, "loss": 0.3822, "step": 5617 }, { "epoch": 0.25426567096628194, "grad_norm": 0.6709010629334192, "learning_rate": 8.738588164515755e-06, "loss": 0.3875, "step": 5618 }, { "epoch": 0.25431093007467753, "grad_norm": 0.6981970747897914, "learning_rate": 8.738101452465793e-06, "loss": 0.3827, "step": 5619 }, { "epoch": 0.25435618918307307, "grad_norm": 0.6752351421153933, "learning_rate": 8.737614660095507e-06, "loss": 0.3956, "step": 5620 }, { "epoch": 0.25440144829146866, "grad_norm": 0.5064682169153885, "learning_rate": 8.737127787415365e-06, "loss": 0.4983, "step": 5621 }, { "epoch": 0.25444670739986425, "grad_norm": 0.4210484452205898, "learning_rate": 8.736640834435824e-06, "loss": 0.5241, "step": 5622 }, { "epoch": 0.2544919665082598, "grad_norm": 0.9187963479616968, "learning_rate": 8.736153801167346e-06, "loss": 0.374, "step": 5623 }, { "epoch": 0.25453722561665537, "grad_norm": 0.6036483418713718, "learning_rate": 8.735666687620398e-06, "loss": 0.3771, "step": 5624 }, { "epoch": 0.2545824847250509, "grad_norm": 0.6672026035990911, "learning_rate": 8.735179493805446e-06, "loss": 0.3879, "step": 5625 }, { "epoch": 0.2546277438334465, "grad_norm": 0.6955777937848375, "learning_rate": 8.73469221973296e-06, "loss": 0.3721, "step": 5626 }, { "epoch": 0.25467300294184203, "grad_norm": 0.6532425045514988, "learning_rate": 8.734204865413407e-06, "loss": 0.365, "step": 5627 }, { "epoch": 0.2547182620502376, "grad_norm": 0.6804037652907705, "learning_rate": 8.73371743085726e-06, "loss": 0.394, "step": 5628 }, { "epoch": 0.25476352115863315, "grad_norm": 0.6691062206288424, "learning_rate": 8.733229916074995e-06, "loss": 0.4164, "step": 5629 }, { "epoch": 0.25480878026702874, "grad_norm": 0.6919494707655087, "learning_rate": 8.732742321077082e-06, "loss": 0.3914, "step": 5630 }, { "epoch": 0.2548540393754243, "grad_norm": 0.6994508346956777, "learning_rate": 8.732254645874002e-06, "loss": 0.5069, "step": 5631 }, { "epoch": 0.25489929848381987, "grad_norm": 0.48480809580622497, "learning_rate": 8.731766890476232e-06, "loss": 0.4924, "step": 5632 }, { "epoch": 0.25494455759221546, "grad_norm": 0.7333716987396993, "learning_rate": 8.731279054894254e-06, "loss": 0.3927, "step": 5633 }, { "epoch": 0.254989816700611, "grad_norm": 0.6965699173224933, "learning_rate": 8.730791139138546e-06, "loss": 0.3644, "step": 5634 }, { "epoch": 0.2550350758090066, "grad_norm": 0.8260440813243546, "learning_rate": 8.730303143219597e-06, "loss": 0.3773, "step": 5635 }, { "epoch": 0.2550803349174021, "grad_norm": 0.6660711629700041, "learning_rate": 8.729815067147888e-06, "loss": 0.383, "step": 5636 }, { "epoch": 0.2551255940257977, "grad_norm": 0.736406331574392, "learning_rate": 8.729326910933911e-06, "loss": 0.3716, "step": 5637 }, { "epoch": 0.25517085313419324, "grad_norm": 0.709712953761439, "learning_rate": 8.728838674588151e-06, "loss": 0.3965, "step": 5638 }, { "epoch": 0.25521611224258883, "grad_norm": 0.6271166899069629, "learning_rate": 8.728350358121101e-06, "loss": 0.3537, "step": 5639 }, { "epoch": 0.25526137135098437, "grad_norm": 0.6697285911368892, "learning_rate": 8.727861961543253e-06, "loss": 0.3755, "step": 5640 }, { "epoch": 0.25530663045937996, "grad_norm": 0.6809780054179385, "learning_rate": 8.7273734848651e-06, "loss": 0.4126, "step": 5641 }, { "epoch": 0.2553518895677755, "grad_norm": 0.7967430252315837, "learning_rate": 8.726884928097138e-06, "loss": 0.3632, "step": 5642 }, { "epoch": 0.2553971486761711, "grad_norm": 1.381867297092667, "learning_rate": 8.726396291249866e-06, "loss": 0.5181, "step": 5643 }, { "epoch": 0.2554424077845666, "grad_norm": 0.7415298912300912, "learning_rate": 8.725907574333783e-06, "loss": 0.3891, "step": 5644 }, { "epoch": 0.2554876668929622, "grad_norm": 0.7454140763091222, "learning_rate": 8.725418777359389e-06, "loss": 0.4035, "step": 5645 }, { "epoch": 0.2555329260013578, "grad_norm": 0.7945699973069527, "learning_rate": 8.724929900337186e-06, "loss": 0.3777, "step": 5646 }, { "epoch": 0.25557818510975333, "grad_norm": 0.7909815852970442, "learning_rate": 8.724440943277681e-06, "loss": 0.3635, "step": 5647 }, { "epoch": 0.2556234442181489, "grad_norm": 0.6718420734648589, "learning_rate": 8.723951906191377e-06, "loss": 0.4747, "step": 5648 }, { "epoch": 0.25566870332654446, "grad_norm": 0.7038507433457524, "learning_rate": 8.723462789088785e-06, "loss": 0.5177, "step": 5649 }, { "epoch": 0.25571396243494005, "grad_norm": 0.6407362587147867, "learning_rate": 8.722973591980414e-06, "loss": 0.3444, "step": 5650 }, { "epoch": 0.2557592215433356, "grad_norm": 0.7535583737045262, "learning_rate": 8.722484314876776e-06, "loss": 0.3938, "step": 5651 }, { "epoch": 0.25580448065173117, "grad_norm": 0.6430207864370134, "learning_rate": 8.72199495778838e-06, "loss": 0.345, "step": 5652 }, { "epoch": 0.2558497397601267, "grad_norm": 0.6251897210396002, "learning_rate": 8.721505520725745e-06, "loss": 0.3648, "step": 5653 }, { "epoch": 0.2558949988685223, "grad_norm": 0.6296987375458352, "learning_rate": 8.721016003699385e-06, "loss": 0.3697, "step": 5654 }, { "epoch": 0.25594025797691783, "grad_norm": 0.6776477921529509, "learning_rate": 8.72052640671982e-06, "loss": 0.3498, "step": 5655 }, { "epoch": 0.2559855170853134, "grad_norm": 0.5898537695546245, "learning_rate": 8.72003672979757e-06, "loss": 0.3576, "step": 5656 }, { "epoch": 0.256030776193709, "grad_norm": 0.6294189740361547, "learning_rate": 8.719546972943156e-06, "loss": 0.3831, "step": 5657 }, { "epoch": 0.25607603530210454, "grad_norm": 0.6665099049361219, "learning_rate": 8.719057136167099e-06, "loss": 0.3972, "step": 5658 }, { "epoch": 0.25612129441050013, "grad_norm": 0.660593059607384, "learning_rate": 8.71856721947993e-06, "loss": 0.3794, "step": 5659 }, { "epoch": 0.25616655351889567, "grad_norm": 0.7154169972792961, "learning_rate": 8.718077222892169e-06, "loss": 0.4129, "step": 5660 }, { "epoch": 0.25621181262729126, "grad_norm": 0.6645475490795022, "learning_rate": 8.717587146414348e-06, "loss": 0.4019, "step": 5661 }, { "epoch": 0.2562570717356868, "grad_norm": 0.6548301565215484, "learning_rate": 8.717096990056999e-06, "loss": 0.3973, "step": 5662 }, { "epoch": 0.2563023308440824, "grad_norm": 0.779726610983311, "learning_rate": 8.71660675383065e-06, "loss": 0.4008, "step": 5663 }, { "epoch": 0.2563475899524779, "grad_norm": 0.6523134947123276, "learning_rate": 8.716116437745836e-06, "loss": 0.4211, "step": 5664 }, { "epoch": 0.2563928490608735, "grad_norm": 0.6608811987550522, "learning_rate": 8.715626041813095e-06, "loss": 0.4106, "step": 5665 }, { "epoch": 0.25643810816926904, "grad_norm": 0.6898017659880898, "learning_rate": 8.71513556604296e-06, "loss": 0.382, "step": 5666 }, { "epoch": 0.25648336727766463, "grad_norm": 0.723160907659878, "learning_rate": 8.714645010445974e-06, "loss": 0.4066, "step": 5667 }, { "epoch": 0.2565286263860602, "grad_norm": 0.6429431624225951, "learning_rate": 8.714154375032675e-06, "loss": 0.4093, "step": 5668 }, { "epoch": 0.25657388549445576, "grad_norm": 1.2113190533946177, "learning_rate": 8.713663659813605e-06, "loss": 0.5187, "step": 5669 }, { "epoch": 0.25661914460285135, "grad_norm": 0.6354341373225793, "learning_rate": 8.713172864799309e-06, "loss": 0.371, "step": 5670 }, { "epoch": 0.2566644037112469, "grad_norm": 0.45960048313751684, "learning_rate": 8.712681990000332e-06, "loss": 0.5104, "step": 5671 }, { "epoch": 0.25670966281964247, "grad_norm": 0.6761932077538576, "learning_rate": 8.71219103542722e-06, "loss": 0.3442, "step": 5672 }, { "epoch": 0.256754921928038, "grad_norm": 0.5932332779997048, "learning_rate": 8.711700001090524e-06, "loss": 0.4999, "step": 5673 }, { "epoch": 0.2568001810364336, "grad_norm": 0.6854025592794466, "learning_rate": 8.711208887000797e-06, "loss": 0.4969, "step": 5674 }, { "epoch": 0.25684544014482913, "grad_norm": 0.7360965312204293, "learning_rate": 8.710717693168588e-06, "loss": 0.3926, "step": 5675 }, { "epoch": 0.2568906992532247, "grad_norm": 0.7146450211894528, "learning_rate": 8.710226419604453e-06, "loss": 0.405, "step": 5676 }, { "epoch": 0.25693595836162025, "grad_norm": 0.6804543109515563, "learning_rate": 8.709735066318946e-06, "loss": 0.4143, "step": 5677 }, { "epoch": 0.25698121747001584, "grad_norm": 0.6413194057286674, "learning_rate": 8.709243633322627e-06, "loss": 0.3447, "step": 5678 }, { "epoch": 0.2570264765784114, "grad_norm": 0.6783568892967364, "learning_rate": 8.708752120626054e-06, "loss": 0.3746, "step": 5679 }, { "epoch": 0.25707173568680697, "grad_norm": 0.6505544536816305, "learning_rate": 8.708260528239788e-06, "loss": 0.4018, "step": 5680 }, { "epoch": 0.25711699479520256, "grad_norm": 0.742783726637506, "learning_rate": 8.707768856174393e-06, "loss": 0.3693, "step": 5681 }, { "epoch": 0.2571622539035981, "grad_norm": 0.6665288886536155, "learning_rate": 8.707277104440432e-06, "loss": 0.3636, "step": 5682 }, { "epoch": 0.2572075130119937, "grad_norm": 0.6467823550649642, "learning_rate": 8.706785273048475e-06, "loss": 0.3495, "step": 5683 }, { "epoch": 0.2572527721203892, "grad_norm": 0.605064740924202, "learning_rate": 8.706293362009084e-06, "loss": 0.3749, "step": 5684 }, { "epoch": 0.2572980312287848, "grad_norm": 0.7717223661878522, "learning_rate": 8.705801371332832e-06, "loss": 0.3755, "step": 5685 }, { "epoch": 0.25734329033718034, "grad_norm": 0.6282435336830396, "learning_rate": 8.70530930103029e-06, "loss": 0.3562, "step": 5686 }, { "epoch": 0.25738854944557593, "grad_norm": 0.7577896145589404, "learning_rate": 8.704817151112033e-06, "loss": 0.3891, "step": 5687 }, { "epoch": 0.25743380855397147, "grad_norm": 0.7057241116379769, "learning_rate": 8.704324921588631e-06, "loss": 0.4212, "step": 5688 }, { "epoch": 0.25747906766236706, "grad_norm": 0.7065461234902365, "learning_rate": 8.703832612470665e-06, "loss": 0.3825, "step": 5689 }, { "epoch": 0.2575243267707626, "grad_norm": 0.7101016138777637, "learning_rate": 8.703340223768713e-06, "loss": 0.3642, "step": 5690 }, { "epoch": 0.2575695858791582, "grad_norm": 0.6020716234533079, "learning_rate": 8.70284775549335e-06, "loss": 0.3474, "step": 5691 }, { "epoch": 0.25761484498755377, "grad_norm": 0.7963449065128408, "learning_rate": 8.702355207655164e-06, "loss": 0.5115, "step": 5692 }, { "epoch": 0.2576601040959493, "grad_norm": 0.6741311830333333, "learning_rate": 8.701862580264735e-06, "loss": 0.3854, "step": 5693 }, { "epoch": 0.2577053632043449, "grad_norm": 0.45541735171368786, "learning_rate": 8.701369873332647e-06, "loss": 0.4829, "step": 5694 }, { "epoch": 0.25775062231274043, "grad_norm": 0.6808063988576847, "learning_rate": 8.70087708686949e-06, "loss": 0.4208, "step": 5695 }, { "epoch": 0.257795881421136, "grad_norm": 1.0002550438489555, "learning_rate": 8.700384220885852e-06, "loss": 0.3507, "step": 5696 }, { "epoch": 0.25784114052953155, "grad_norm": 0.6578641214454441, "learning_rate": 8.699891275392319e-06, "loss": 0.4122, "step": 5697 }, { "epoch": 0.25788639963792714, "grad_norm": 0.6321553880200829, "learning_rate": 8.699398250399486e-06, "loss": 0.3911, "step": 5698 }, { "epoch": 0.2579316587463227, "grad_norm": 0.6306184982690165, "learning_rate": 8.698905145917948e-06, "loss": 0.3443, "step": 5699 }, { "epoch": 0.25797691785471827, "grad_norm": 0.9831445650283764, "learning_rate": 8.6984119619583e-06, "loss": 0.4861, "step": 5700 }, { "epoch": 0.2580221769631138, "grad_norm": 0.6193016097001647, "learning_rate": 8.697918698531135e-06, "loss": 0.3793, "step": 5701 }, { "epoch": 0.2580674360715094, "grad_norm": 0.5713133255025882, "learning_rate": 8.697425355647055e-06, "loss": 0.4789, "step": 5702 }, { "epoch": 0.258112695179905, "grad_norm": 0.6176027961816791, "learning_rate": 8.696931933316661e-06, "loss": 0.3762, "step": 5703 }, { "epoch": 0.2581579542883005, "grad_norm": 0.7087957945916381, "learning_rate": 8.696438431550553e-06, "loss": 0.3791, "step": 5704 }, { "epoch": 0.2582032133966961, "grad_norm": 0.6353374786867481, "learning_rate": 8.695944850359337e-06, "loss": 0.3673, "step": 5705 }, { "epoch": 0.25824847250509164, "grad_norm": 0.6955272929655746, "learning_rate": 8.695451189753616e-06, "loss": 0.4057, "step": 5706 }, { "epoch": 0.25829373161348723, "grad_norm": 0.6275498827078999, "learning_rate": 8.694957449744e-06, "loss": 0.3476, "step": 5707 }, { "epoch": 0.25833899072188277, "grad_norm": 0.6950507771068638, "learning_rate": 8.694463630341094e-06, "loss": 0.3953, "step": 5708 }, { "epoch": 0.25838424983027836, "grad_norm": 0.7994838251226956, "learning_rate": 8.693969731555514e-06, "loss": 0.3723, "step": 5709 }, { "epoch": 0.2584295089386739, "grad_norm": 0.6387292929473521, "learning_rate": 8.693475753397869e-06, "loss": 0.373, "step": 5710 }, { "epoch": 0.2584747680470695, "grad_norm": 0.6301470128141162, "learning_rate": 8.692981695878772e-06, "loss": 0.385, "step": 5711 }, { "epoch": 0.258520027155465, "grad_norm": 0.5937941248355948, "learning_rate": 8.692487559008843e-06, "loss": 0.363, "step": 5712 }, { "epoch": 0.2585652862638606, "grad_norm": 1.192506158297031, "learning_rate": 8.691993342798698e-06, "loss": 0.5158, "step": 5713 }, { "epoch": 0.25861054537225614, "grad_norm": 0.6708971470570072, "learning_rate": 8.691499047258952e-06, "loss": 0.3817, "step": 5714 }, { "epoch": 0.25865580448065173, "grad_norm": 0.6282540225233022, "learning_rate": 8.69100467240023e-06, "loss": 0.3691, "step": 5715 }, { "epoch": 0.2587010635890473, "grad_norm": 0.6998716434175581, "learning_rate": 8.690510218233153e-06, "loss": 0.4013, "step": 5716 }, { "epoch": 0.25874632269744285, "grad_norm": 0.6701690489887676, "learning_rate": 8.690015684768347e-06, "loss": 0.3527, "step": 5717 }, { "epoch": 0.25879158180583844, "grad_norm": 0.6245332757292525, "learning_rate": 8.689521072016436e-06, "loss": 0.4015, "step": 5718 }, { "epoch": 0.258836840914234, "grad_norm": 0.6928148401172459, "learning_rate": 8.68902637998805e-06, "loss": 0.393, "step": 5719 }, { "epoch": 0.25888210002262957, "grad_norm": 0.6114044470861477, "learning_rate": 8.688531608693817e-06, "loss": 0.361, "step": 5720 }, { "epoch": 0.2589273591310251, "grad_norm": 0.6257657653929006, "learning_rate": 8.688036758144367e-06, "loss": 0.3705, "step": 5721 }, { "epoch": 0.2589726182394207, "grad_norm": 0.6996865989535014, "learning_rate": 8.687541828350334e-06, "loss": 0.5148, "step": 5722 }, { "epoch": 0.2590178773478162, "grad_norm": 0.7563925845604034, "learning_rate": 8.687046819322353e-06, "loss": 0.4001, "step": 5723 }, { "epoch": 0.2590631364562118, "grad_norm": 0.6674974207955788, "learning_rate": 8.68655173107106e-06, "loss": 0.412, "step": 5724 }, { "epoch": 0.25910839556460735, "grad_norm": 0.6705895639170104, "learning_rate": 8.686056563607093e-06, "loss": 0.3571, "step": 5725 }, { "epoch": 0.25915365467300294, "grad_norm": 0.7363012843803127, "learning_rate": 8.685561316941091e-06, "loss": 0.3844, "step": 5726 }, { "epoch": 0.25919891378139853, "grad_norm": 0.6487455970498142, "learning_rate": 8.685065991083695e-06, "loss": 0.3841, "step": 5727 }, { "epoch": 0.25924417288979407, "grad_norm": 0.6936695497920785, "learning_rate": 8.68457058604555e-06, "loss": 0.3753, "step": 5728 }, { "epoch": 0.25928943199818966, "grad_norm": 0.4468251185731199, "learning_rate": 8.684075101837298e-06, "loss": 0.4773, "step": 5729 }, { "epoch": 0.2593346911065852, "grad_norm": 0.6467029106064753, "learning_rate": 8.683579538469587e-06, "loss": 0.3659, "step": 5730 }, { "epoch": 0.2593799502149808, "grad_norm": 0.38998358102716185, "learning_rate": 8.683083895953066e-06, "loss": 0.4814, "step": 5731 }, { "epoch": 0.2594252093233763, "grad_norm": 0.7080368874591275, "learning_rate": 8.682588174298384e-06, "loss": 0.4224, "step": 5732 }, { "epoch": 0.2594704684317719, "grad_norm": 0.6507574628339656, "learning_rate": 8.68209237351619e-06, "loss": 0.3714, "step": 5733 }, { "epoch": 0.25951572754016744, "grad_norm": 0.6255841463576619, "learning_rate": 8.681596493617141e-06, "loss": 0.3706, "step": 5734 }, { "epoch": 0.25956098664856303, "grad_norm": 0.7205900485067189, "learning_rate": 8.681100534611891e-06, "loss": 0.4071, "step": 5735 }, { "epoch": 0.25960624575695856, "grad_norm": 0.6450927790544251, "learning_rate": 8.680604496511095e-06, "loss": 0.375, "step": 5736 }, { "epoch": 0.25965150486535415, "grad_norm": 0.6590018160806728, "learning_rate": 8.680108379325413e-06, "loss": 0.3737, "step": 5737 }, { "epoch": 0.25969676397374974, "grad_norm": 0.6418568583398381, "learning_rate": 8.679612183065506e-06, "loss": 0.3884, "step": 5738 }, { "epoch": 0.2597420230821453, "grad_norm": 0.6454949426628106, "learning_rate": 8.679115907742032e-06, "loss": 0.3564, "step": 5739 }, { "epoch": 0.25978728219054087, "grad_norm": 0.6739832666361274, "learning_rate": 8.67861955336566e-06, "loss": 0.3742, "step": 5740 }, { "epoch": 0.2598325412989364, "grad_norm": 0.6683636845230098, "learning_rate": 8.678123119947049e-06, "loss": 0.3584, "step": 5741 }, { "epoch": 0.259877800407332, "grad_norm": 0.6559644343457217, "learning_rate": 8.677626607496869e-06, "loss": 0.3648, "step": 5742 }, { "epoch": 0.25992305951572753, "grad_norm": 0.6809757711271472, "learning_rate": 8.677130016025788e-06, "loss": 0.3556, "step": 5743 }, { "epoch": 0.2599683186241231, "grad_norm": 0.6666856861528969, "learning_rate": 8.676633345544476e-06, "loss": 0.3886, "step": 5744 }, { "epoch": 0.26001357773251865, "grad_norm": 0.6393280406948814, "learning_rate": 8.676136596063607e-06, "loss": 0.3589, "step": 5745 }, { "epoch": 0.26005883684091424, "grad_norm": 0.678176855017373, "learning_rate": 8.675639767593851e-06, "loss": 0.3631, "step": 5746 }, { "epoch": 0.2601040959493098, "grad_norm": 0.6610446597238993, "learning_rate": 8.675142860145887e-06, "loss": 0.3838, "step": 5747 }, { "epoch": 0.26014935505770537, "grad_norm": 0.6162186768778396, "learning_rate": 8.67464587373039e-06, "loss": 0.3889, "step": 5748 }, { "epoch": 0.2601946141661009, "grad_norm": 0.6431669043412701, "learning_rate": 8.674148808358038e-06, "loss": 0.4023, "step": 5749 }, { "epoch": 0.2602398732744965, "grad_norm": 0.6334447577441437, "learning_rate": 8.673651664039513e-06, "loss": 0.352, "step": 5750 }, { "epoch": 0.2602851323828921, "grad_norm": 0.629109002626069, "learning_rate": 8.673154440785496e-06, "loss": 0.4121, "step": 5751 }, { "epoch": 0.2603303914912876, "grad_norm": 0.6882025240837115, "learning_rate": 8.672657138606672e-06, "loss": 0.403, "step": 5752 }, { "epoch": 0.2603756505996832, "grad_norm": 0.6620543132588419, "learning_rate": 8.672159757513726e-06, "loss": 0.369, "step": 5753 }, { "epoch": 0.26042090970807874, "grad_norm": 0.6516401962062613, "learning_rate": 8.671662297517344e-06, "loss": 0.4314, "step": 5754 }, { "epoch": 0.26046616881647433, "grad_norm": 0.6667136130475712, "learning_rate": 8.671164758628216e-06, "loss": 0.3493, "step": 5755 }, { "epoch": 0.26051142792486986, "grad_norm": 0.6643676696578114, "learning_rate": 8.670667140857034e-06, "loss": 0.3769, "step": 5756 }, { "epoch": 0.26055668703326545, "grad_norm": 0.632758644728174, "learning_rate": 8.670169444214487e-06, "loss": 0.3735, "step": 5757 }, { "epoch": 0.260601946141661, "grad_norm": 0.630402510364806, "learning_rate": 8.669671668711272e-06, "loss": 0.3773, "step": 5758 }, { "epoch": 0.2606472052500566, "grad_norm": 0.6562395073335897, "learning_rate": 8.669173814358082e-06, "loss": 0.3849, "step": 5759 }, { "epoch": 0.2606924643584521, "grad_norm": 0.6379547262212517, "learning_rate": 8.668675881165616e-06, "loss": 0.3595, "step": 5760 }, { "epoch": 0.2607377234668477, "grad_norm": 0.6794966206353229, "learning_rate": 8.668177869144574e-06, "loss": 0.3756, "step": 5761 }, { "epoch": 0.2607829825752433, "grad_norm": 0.6470626273490792, "learning_rate": 8.667679778305654e-06, "loss": 0.3591, "step": 5762 }, { "epoch": 0.26082824168363883, "grad_norm": 0.6400645187021914, "learning_rate": 8.66718160865956e-06, "loss": 0.3728, "step": 5763 }, { "epoch": 0.2608735007920344, "grad_norm": 0.7782560423349998, "learning_rate": 8.666683360216998e-06, "loss": 0.4782, "step": 5764 }, { "epoch": 0.26091875990042995, "grad_norm": 0.6331256763361964, "learning_rate": 8.66618503298867e-06, "loss": 0.3924, "step": 5765 }, { "epoch": 0.26096401900882554, "grad_norm": 0.6983330718539844, "learning_rate": 8.665686626985286e-06, "loss": 0.4098, "step": 5766 }, { "epoch": 0.2610092781172211, "grad_norm": 0.347891864990072, "learning_rate": 8.665188142217555e-06, "loss": 0.4693, "step": 5767 }, { "epoch": 0.26105453722561667, "grad_norm": 0.7149931938736016, "learning_rate": 8.664689578696188e-06, "loss": 0.3672, "step": 5768 }, { "epoch": 0.2610997963340122, "grad_norm": 0.6324010359903846, "learning_rate": 8.664190936431896e-06, "loss": 0.3695, "step": 5769 }, { "epoch": 0.2611450554424078, "grad_norm": 0.6574088753449782, "learning_rate": 8.663692215435396e-06, "loss": 0.3741, "step": 5770 }, { "epoch": 0.2611903145508033, "grad_norm": 0.6924097149046233, "learning_rate": 8.663193415717402e-06, "loss": 0.3801, "step": 5771 }, { "epoch": 0.2612355736591989, "grad_norm": 0.6212884502110326, "learning_rate": 8.662694537288632e-06, "loss": 0.5228, "step": 5772 }, { "epoch": 0.26128083276759445, "grad_norm": 0.7482647862220366, "learning_rate": 8.662195580159804e-06, "loss": 0.3978, "step": 5773 }, { "epoch": 0.26132609187599004, "grad_norm": 0.5149044333270514, "learning_rate": 8.661696544341642e-06, "loss": 0.4931, "step": 5774 }, { "epoch": 0.26137135098438563, "grad_norm": 0.653471749140414, "learning_rate": 8.661197429844868e-06, "loss": 0.3388, "step": 5775 }, { "epoch": 0.26141661009278117, "grad_norm": 0.6625056719664779, "learning_rate": 8.660698236680205e-06, "loss": 0.3696, "step": 5776 }, { "epoch": 0.26146186920117676, "grad_norm": 0.6471294672905499, "learning_rate": 8.66019896485838e-06, "loss": 0.4087, "step": 5777 }, { "epoch": 0.2615071283095723, "grad_norm": 0.7361480408869571, "learning_rate": 8.65969961439012e-06, "loss": 0.3858, "step": 5778 }, { "epoch": 0.2615523874179679, "grad_norm": 0.649115216250859, "learning_rate": 8.659200185286157e-06, "loss": 0.3477, "step": 5779 }, { "epoch": 0.2615976465263634, "grad_norm": 0.6561882849580633, "learning_rate": 8.658700677557217e-06, "loss": 0.4159, "step": 5780 }, { "epoch": 0.261642905634759, "grad_norm": 0.6753162434144271, "learning_rate": 8.658201091214038e-06, "loss": 0.4287, "step": 5781 }, { "epoch": 0.26168816474315454, "grad_norm": 0.6725502703479815, "learning_rate": 8.657701426267355e-06, "loss": 0.4304, "step": 5782 }, { "epoch": 0.26173342385155013, "grad_norm": 0.6815944476324566, "learning_rate": 8.657201682727898e-06, "loss": 0.3641, "step": 5783 }, { "epoch": 0.26177868295994566, "grad_norm": 0.5161456444441221, "learning_rate": 8.656701860606412e-06, "loss": 0.491, "step": 5784 }, { "epoch": 0.26182394206834125, "grad_norm": 0.6652180348295492, "learning_rate": 8.656201959913635e-06, "loss": 0.3502, "step": 5785 }, { "epoch": 0.26186920117673684, "grad_norm": 0.612731572351153, "learning_rate": 8.655701980660305e-06, "loss": 0.3635, "step": 5786 }, { "epoch": 0.2619144602851324, "grad_norm": 0.6252464211687456, "learning_rate": 8.655201922857166e-06, "loss": 0.357, "step": 5787 }, { "epoch": 0.26195971939352797, "grad_norm": 0.6850715997532323, "learning_rate": 8.654701786514965e-06, "loss": 0.3842, "step": 5788 }, { "epoch": 0.2620049785019235, "grad_norm": 0.6445923933478774, "learning_rate": 8.654201571644447e-06, "loss": 0.3718, "step": 5789 }, { "epoch": 0.2620502376103191, "grad_norm": 0.4093785123197519, "learning_rate": 8.653701278256362e-06, "loss": 0.4881, "step": 5790 }, { "epoch": 0.2620954967187146, "grad_norm": 0.6996517734015768, "learning_rate": 8.653200906361454e-06, "loss": 0.3419, "step": 5791 }, { "epoch": 0.2621407558271102, "grad_norm": 0.6759619747376816, "learning_rate": 8.652700455970483e-06, "loss": 0.3778, "step": 5792 }, { "epoch": 0.26218601493550575, "grad_norm": 0.6963571140515151, "learning_rate": 8.652199927094194e-06, "loss": 0.3885, "step": 5793 }, { "epoch": 0.26223127404390134, "grad_norm": 0.6076035876970179, "learning_rate": 8.651699319743348e-06, "loss": 0.3478, "step": 5794 }, { "epoch": 0.2622765331522969, "grad_norm": 0.6776206762542706, "learning_rate": 8.651198633928696e-06, "loss": 0.3873, "step": 5795 }, { "epoch": 0.26232179226069247, "grad_norm": 0.6661315977828, "learning_rate": 8.650697869661002e-06, "loss": 0.3991, "step": 5796 }, { "epoch": 0.26236705136908806, "grad_norm": 0.46358353209235925, "learning_rate": 8.650197026951022e-06, "loss": 0.4936, "step": 5797 }, { "epoch": 0.2624123104774836, "grad_norm": 0.6883901449463596, "learning_rate": 8.649696105809518e-06, "loss": 0.4183, "step": 5798 }, { "epoch": 0.2624575695858792, "grad_norm": 0.5777003885885577, "learning_rate": 8.649195106247256e-06, "loss": 0.378, "step": 5799 }, { "epoch": 0.2625028286942747, "grad_norm": 0.6298595377891067, "learning_rate": 8.648694028274998e-06, "loss": 0.417, "step": 5800 }, { "epoch": 0.2625480878026703, "grad_norm": 0.6152758639766999, "learning_rate": 8.64819287190351e-06, "loss": 0.4013, "step": 5801 }, { "epoch": 0.26259334691106584, "grad_norm": 0.3157038254738025, "learning_rate": 8.647691637143562e-06, "loss": 0.4892, "step": 5802 }, { "epoch": 0.26263860601946143, "grad_norm": 0.28796182851067176, "learning_rate": 8.647190324005925e-06, "loss": 0.488, "step": 5803 }, { "epoch": 0.26268386512785696, "grad_norm": 0.6537209460403814, "learning_rate": 8.646688932501369e-06, "loss": 0.3802, "step": 5804 }, { "epoch": 0.26272912423625255, "grad_norm": 0.6960618753773958, "learning_rate": 8.646187462640668e-06, "loss": 0.4316, "step": 5805 }, { "epoch": 0.2627743833446481, "grad_norm": 0.6628038199492589, "learning_rate": 8.645685914434596e-06, "loss": 0.4048, "step": 5806 }, { "epoch": 0.2628196424530437, "grad_norm": 0.701227250896986, "learning_rate": 8.64518428789393e-06, "loss": 0.3942, "step": 5807 }, { "epoch": 0.2628649015614392, "grad_norm": 0.6371205125596449, "learning_rate": 8.644682583029452e-06, "loss": 0.3832, "step": 5808 }, { "epoch": 0.2629101606698348, "grad_norm": 0.6289246242511318, "learning_rate": 8.644180799851936e-06, "loss": 0.3707, "step": 5809 }, { "epoch": 0.2629554197782304, "grad_norm": 0.6210970208658598, "learning_rate": 8.643678938372167e-06, "loss": 0.2983, "step": 5810 }, { "epoch": 0.2630006788866259, "grad_norm": 0.6778788158675642, "learning_rate": 8.643176998600931e-06, "loss": 0.3708, "step": 5811 }, { "epoch": 0.2630459379950215, "grad_norm": 0.4103471001964465, "learning_rate": 8.642674980549008e-06, "loss": 0.4846, "step": 5812 }, { "epoch": 0.26309119710341705, "grad_norm": 0.6314054288667231, "learning_rate": 8.642172884227187e-06, "loss": 0.3729, "step": 5813 }, { "epoch": 0.26313645621181264, "grad_norm": 0.6871376003891854, "learning_rate": 8.641670709646258e-06, "loss": 0.3556, "step": 5814 }, { "epoch": 0.2631817153202082, "grad_norm": 0.32402773719708344, "learning_rate": 8.64116845681701e-06, "loss": 0.4937, "step": 5815 }, { "epoch": 0.26322697442860377, "grad_norm": 0.6534754943829143, "learning_rate": 8.640666125750234e-06, "loss": 0.3971, "step": 5816 }, { "epoch": 0.2632722335369993, "grad_norm": 0.7017365191775312, "learning_rate": 8.640163716456726e-06, "loss": 0.4009, "step": 5817 }, { "epoch": 0.2633174926453949, "grad_norm": 0.30537086162876564, "learning_rate": 8.639661228947278e-06, "loss": 0.5117, "step": 5818 }, { "epoch": 0.2633627517537904, "grad_norm": 0.7499047532831177, "learning_rate": 8.63915866323269e-06, "loss": 0.4687, "step": 5819 }, { "epoch": 0.263408010862186, "grad_norm": 0.6707874277156979, "learning_rate": 8.638656019323758e-06, "loss": 0.3724, "step": 5820 }, { "epoch": 0.2634532699705816, "grad_norm": 0.3199935029667527, "learning_rate": 8.638153297231282e-06, "loss": 0.4871, "step": 5821 }, { "epoch": 0.26349852907897714, "grad_norm": 0.6652319046497731, "learning_rate": 8.637650496966069e-06, "loss": 0.35, "step": 5822 }, { "epoch": 0.26354378818737273, "grad_norm": 0.6489623223735191, "learning_rate": 8.637147618538918e-06, "loss": 0.4248, "step": 5823 }, { "epoch": 0.26358904729576826, "grad_norm": 0.6338461835366701, "learning_rate": 8.636644661960634e-06, "loss": 0.3737, "step": 5824 }, { "epoch": 0.26363430640416385, "grad_norm": 0.2895408176277085, "learning_rate": 8.636141627242025e-06, "loss": 0.4618, "step": 5825 }, { "epoch": 0.2636795655125594, "grad_norm": 0.6836727451279606, "learning_rate": 8.6356385143939e-06, "loss": 0.372, "step": 5826 }, { "epoch": 0.263724824620955, "grad_norm": 0.6853694082506848, "learning_rate": 8.635135323427072e-06, "loss": 0.4041, "step": 5827 }, { "epoch": 0.2637700837293505, "grad_norm": 0.5989493808594828, "learning_rate": 8.634632054352347e-06, "loss": 0.3827, "step": 5828 }, { "epoch": 0.2638153428377461, "grad_norm": 0.6143907957530291, "learning_rate": 8.634128707180544e-06, "loss": 0.3936, "step": 5829 }, { "epoch": 0.26386060194614164, "grad_norm": 0.6213073360715494, "learning_rate": 8.633625281922477e-06, "loss": 0.3784, "step": 5830 }, { "epoch": 0.2639058610545372, "grad_norm": 0.30410771789820507, "learning_rate": 8.63312177858896e-06, "loss": 0.4935, "step": 5831 }, { "epoch": 0.2639511201629328, "grad_norm": 0.6590420669823309, "learning_rate": 8.632618197190817e-06, "loss": 0.4466, "step": 5832 }, { "epoch": 0.26399637927132835, "grad_norm": 0.6639909004609018, "learning_rate": 8.632114537738865e-06, "loss": 0.3941, "step": 5833 }, { "epoch": 0.26404163837972394, "grad_norm": 0.629068927113239, "learning_rate": 8.631610800243926e-06, "loss": 0.4041, "step": 5834 }, { "epoch": 0.2640868974881195, "grad_norm": 0.6548830543697949, "learning_rate": 8.631106984716824e-06, "loss": 0.3643, "step": 5835 }, { "epoch": 0.26413215659651507, "grad_norm": 0.6267647987383094, "learning_rate": 8.630603091168385e-06, "loss": 0.3975, "step": 5836 }, { "epoch": 0.2641774157049106, "grad_norm": 0.3366458561699845, "learning_rate": 8.630099119609439e-06, "loss": 0.4905, "step": 5837 }, { "epoch": 0.2642226748133062, "grad_norm": 0.6485695922987192, "learning_rate": 8.62959507005081e-06, "loss": 0.3584, "step": 5838 }, { "epoch": 0.2642679339217017, "grad_norm": 0.29037235688732815, "learning_rate": 8.62909094250333e-06, "loss": 0.4906, "step": 5839 }, { "epoch": 0.2643131930300973, "grad_norm": 0.6792032775967267, "learning_rate": 8.62858673697783e-06, "loss": 0.3736, "step": 5840 }, { "epoch": 0.26435845213849285, "grad_norm": 0.3204503110222696, "learning_rate": 8.628082453485149e-06, "loss": 0.5212, "step": 5841 }, { "epoch": 0.26440371124688844, "grad_norm": 0.7177482170374119, "learning_rate": 8.627578092036117e-06, "loss": 0.3985, "step": 5842 }, { "epoch": 0.264448970355284, "grad_norm": 0.8012517579137678, "learning_rate": 8.627073652641573e-06, "loss": 0.3961, "step": 5843 }, { "epoch": 0.26449422946367956, "grad_norm": 0.6032837985123936, "learning_rate": 8.626569135312354e-06, "loss": 0.3912, "step": 5844 }, { "epoch": 0.26453948857207515, "grad_norm": 0.6511910379174815, "learning_rate": 8.626064540059305e-06, "loss": 0.3977, "step": 5845 }, { "epoch": 0.2645847476804707, "grad_norm": 0.6740604527846302, "learning_rate": 8.625559866893265e-06, "loss": 0.3606, "step": 5846 }, { "epoch": 0.2646300067888663, "grad_norm": 0.6388403733226904, "learning_rate": 8.625055115825078e-06, "loss": 0.3666, "step": 5847 }, { "epoch": 0.2646752658972618, "grad_norm": 0.6245931039340854, "learning_rate": 8.624550286865592e-06, "loss": 0.3362, "step": 5848 }, { "epoch": 0.2647205250056574, "grad_norm": 0.6749344299546478, "learning_rate": 8.62404538002565e-06, "loss": 0.4211, "step": 5849 }, { "epoch": 0.26476578411405294, "grad_norm": 0.6031113265169693, "learning_rate": 8.623540395316105e-06, "loss": 0.3206, "step": 5850 }, { "epoch": 0.26481104322244853, "grad_norm": 0.7118892079516408, "learning_rate": 8.623035332747804e-06, "loss": 0.4087, "step": 5851 }, { "epoch": 0.26485630233084406, "grad_norm": 0.7077213249212451, "learning_rate": 8.622530192331602e-06, "loss": 0.3765, "step": 5852 }, { "epoch": 0.26490156143923965, "grad_norm": 0.7198794438308793, "learning_rate": 8.622024974078354e-06, "loss": 0.3482, "step": 5853 }, { "epoch": 0.2649468205476352, "grad_norm": 0.778162946379442, "learning_rate": 8.62151967799891e-06, "loss": 0.3665, "step": 5854 }, { "epoch": 0.2649920796560308, "grad_norm": 0.533309837016235, "learning_rate": 8.621014304104131e-06, "loss": 0.4961, "step": 5855 }, { "epoch": 0.26503733876442637, "grad_norm": 0.7134140281191794, "learning_rate": 8.620508852404878e-06, "loss": 0.3803, "step": 5856 }, { "epoch": 0.2650825978728219, "grad_norm": 0.6597382720345764, "learning_rate": 8.620003322912008e-06, "loss": 0.4084, "step": 5857 }, { "epoch": 0.2651278569812175, "grad_norm": 0.6589962606418719, "learning_rate": 8.619497715636385e-06, "loss": 0.381, "step": 5858 }, { "epoch": 0.265173116089613, "grad_norm": 0.6724882253908735, "learning_rate": 8.618992030588872e-06, "loss": 0.4202, "step": 5859 }, { "epoch": 0.2652183751980086, "grad_norm": 0.708051466390435, "learning_rate": 8.618486267780334e-06, "loss": 0.3905, "step": 5860 }, { "epoch": 0.26526363430640415, "grad_norm": 0.6241104554817613, "learning_rate": 8.617980427221641e-06, "loss": 0.3767, "step": 5861 }, { "epoch": 0.26530889341479974, "grad_norm": 0.6367150687469564, "learning_rate": 8.617474508923662e-06, "loss": 0.3898, "step": 5862 }, { "epoch": 0.2653541525231953, "grad_norm": 0.6543179077247595, "learning_rate": 8.616968512897264e-06, "loss": 0.4204, "step": 5863 }, { "epoch": 0.26539941163159086, "grad_norm": 0.6934113699895663, "learning_rate": 8.61646243915332e-06, "loss": 0.4221, "step": 5864 }, { "epoch": 0.2654446707399864, "grad_norm": 0.4148202704133109, "learning_rate": 8.615956287702708e-06, "loss": 0.4663, "step": 5865 }, { "epoch": 0.265489929848382, "grad_norm": 0.35037573430565927, "learning_rate": 8.615450058556301e-06, "loss": 0.4917, "step": 5866 }, { "epoch": 0.2655351889567776, "grad_norm": 0.7073144750498311, "learning_rate": 8.614943751724973e-06, "loss": 0.4249, "step": 5867 }, { "epoch": 0.2655804480651731, "grad_norm": 0.7428661153855681, "learning_rate": 8.614437367219609e-06, "loss": 0.3834, "step": 5868 }, { "epoch": 0.2656257071735687, "grad_norm": 0.6768255011074285, "learning_rate": 8.613930905051087e-06, "loss": 0.3994, "step": 5869 }, { "epoch": 0.26567096628196424, "grad_norm": 0.7575444906896478, "learning_rate": 8.613424365230287e-06, "loss": 0.3717, "step": 5870 }, { "epoch": 0.26571622539035983, "grad_norm": 0.7548909935508007, "learning_rate": 8.612917747768097e-06, "loss": 0.3623, "step": 5871 }, { "epoch": 0.26576148449875536, "grad_norm": 0.6016701214142821, "learning_rate": 8.6124110526754e-06, "loss": 0.343, "step": 5872 }, { "epoch": 0.26580674360715095, "grad_norm": 0.679660680308673, "learning_rate": 8.611904279963085e-06, "loss": 0.3906, "step": 5873 }, { "epoch": 0.2658520027155465, "grad_norm": 0.6360930168064564, "learning_rate": 8.61139742964204e-06, "loss": 0.4084, "step": 5874 }, { "epoch": 0.2658972618239421, "grad_norm": 0.6615358533938903, "learning_rate": 8.610890501723155e-06, "loss": 0.4094, "step": 5875 }, { "epoch": 0.2659425209323376, "grad_norm": 0.7109395219743451, "learning_rate": 8.610383496217323e-06, "loss": 0.3736, "step": 5876 }, { "epoch": 0.2659877800407332, "grad_norm": 0.6837406970708032, "learning_rate": 8.609876413135439e-06, "loss": 0.4766, "step": 5877 }, { "epoch": 0.26603303914912874, "grad_norm": 0.7336459536199746, "learning_rate": 8.609369252488398e-06, "loss": 0.4239, "step": 5878 }, { "epoch": 0.2660782982575243, "grad_norm": 0.6437473058572298, "learning_rate": 8.608862014287095e-06, "loss": 0.3776, "step": 5879 }, { "epoch": 0.2661235573659199, "grad_norm": 0.32222665867547856, "learning_rate": 8.608354698542433e-06, "loss": 0.5048, "step": 5880 }, { "epoch": 0.26616881647431545, "grad_norm": 0.6907684265874435, "learning_rate": 8.607847305265312e-06, "loss": 0.3886, "step": 5881 }, { "epoch": 0.26621407558271104, "grad_norm": 0.648226465706051, "learning_rate": 8.607339834466632e-06, "loss": 0.3937, "step": 5882 }, { "epoch": 0.2662593346911066, "grad_norm": 0.6804105185275151, "learning_rate": 8.606832286157296e-06, "loss": 0.3769, "step": 5883 }, { "epoch": 0.26630459379950217, "grad_norm": 0.6633112138977015, "learning_rate": 8.606324660348214e-06, "loss": 0.3824, "step": 5884 }, { "epoch": 0.2663498529078977, "grad_norm": 0.7789029630758616, "learning_rate": 8.605816957050291e-06, "loss": 0.3202, "step": 5885 }, { "epoch": 0.2663951120162933, "grad_norm": 0.7116149857547136, "learning_rate": 8.605309176274434e-06, "loss": 0.3525, "step": 5886 }, { "epoch": 0.2664403711246888, "grad_norm": 0.7987998989658341, "learning_rate": 8.604801318031556e-06, "loss": 0.385, "step": 5887 }, { "epoch": 0.2664856302330844, "grad_norm": 0.6836694369333408, "learning_rate": 8.604293382332572e-06, "loss": 0.3733, "step": 5888 }, { "epoch": 0.26653088934147995, "grad_norm": 0.6679344237964411, "learning_rate": 8.60378536918839e-06, "loss": 0.3588, "step": 5889 }, { "epoch": 0.26657614844987554, "grad_norm": 0.6560760582437417, "learning_rate": 8.60327727860993e-06, "loss": 0.3992, "step": 5890 }, { "epoch": 0.26662140755827113, "grad_norm": 0.7049765744315053, "learning_rate": 8.602769110608107e-06, "loss": 0.3676, "step": 5891 }, { "epoch": 0.26666666666666666, "grad_norm": 0.6276088466610943, "learning_rate": 8.602260865193841e-06, "loss": 0.3812, "step": 5892 }, { "epoch": 0.26671192577506225, "grad_norm": 0.6116868774203824, "learning_rate": 8.601752542378052e-06, "loss": 0.3714, "step": 5893 }, { "epoch": 0.2667571848834578, "grad_norm": 0.6801239626111731, "learning_rate": 8.601244142171665e-06, "loss": 0.3809, "step": 5894 }, { "epoch": 0.2668024439918534, "grad_norm": 0.821498545744599, "learning_rate": 8.6007356645856e-06, "loss": 0.5027, "step": 5895 }, { "epoch": 0.2668477031002489, "grad_norm": 0.6657517193075106, "learning_rate": 8.600227109630785e-06, "loss": 0.3888, "step": 5896 }, { "epoch": 0.2668929622086445, "grad_norm": 0.4859325881457738, "learning_rate": 8.599718477318146e-06, "loss": 0.4869, "step": 5897 }, { "epoch": 0.26693822131704004, "grad_norm": 0.7389180211567423, "learning_rate": 8.599209767658613e-06, "loss": 0.3747, "step": 5898 }, { "epoch": 0.2669834804254356, "grad_norm": 0.6422154576545112, "learning_rate": 8.598700980663116e-06, "loss": 0.3458, "step": 5899 }, { "epoch": 0.26702873953383116, "grad_norm": 0.6237373016006862, "learning_rate": 8.598192116342587e-06, "loss": 0.38, "step": 5900 }, { "epoch": 0.26707399864222675, "grad_norm": 0.6284487296234135, "learning_rate": 8.597683174707961e-06, "loss": 0.3297, "step": 5901 }, { "epoch": 0.2671192577506223, "grad_norm": 0.8446235665355496, "learning_rate": 8.597174155770174e-06, "loss": 0.5076, "step": 5902 }, { "epoch": 0.2671645168590179, "grad_norm": 0.7014310270863416, "learning_rate": 8.596665059540161e-06, "loss": 0.4103, "step": 5903 }, { "epoch": 0.26720977596741347, "grad_norm": 0.698389379913561, "learning_rate": 8.596155886028863e-06, "loss": 0.4106, "step": 5904 }, { "epoch": 0.267255035075809, "grad_norm": 0.6508038142607682, "learning_rate": 8.59564663524722e-06, "loss": 0.3526, "step": 5905 }, { "epoch": 0.2673002941842046, "grad_norm": 0.6590762924057729, "learning_rate": 8.595137307206171e-06, "loss": 0.3979, "step": 5906 }, { "epoch": 0.2673455532926001, "grad_norm": 0.5143468902112852, "learning_rate": 8.594627901916667e-06, "loss": 0.486, "step": 5907 }, { "epoch": 0.2673908124009957, "grad_norm": 0.6668621637376194, "learning_rate": 8.594118419389648e-06, "loss": 0.3568, "step": 5908 }, { "epoch": 0.26743607150939125, "grad_norm": 0.6586502935608585, "learning_rate": 8.593608859636063e-06, "loss": 0.3406, "step": 5909 }, { "epoch": 0.26748133061778684, "grad_norm": 0.7020550504466091, "learning_rate": 8.593099222666859e-06, "loss": 0.4176, "step": 5910 }, { "epoch": 0.2675265897261824, "grad_norm": 0.33072277187840926, "learning_rate": 8.592589508492989e-06, "loss": 0.4711, "step": 5911 }, { "epoch": 0.26757184883457796, "grad_norm": 0.7552124560845879, "learning_rate": 8.592079717125403e-06, "loss": 0.3952, "step": 5912 }, { "epoch": 0.2676171079429735, "grad_norm": 0.4440559086132741, "learning_rate": 8.591569848575058e-06, "loss": 0.4977, "step": 5913 }, { "epoch": 0.2676623670513691, "grad_norm": 0.6809038852834273, "learning_rate": 8.591059902852907e-06, "loss": 0.4298, "step": 5914 }, { "epoch": 0.2677076261597647, "grad_norm": 0.6532468455961231, "learning_rate": 8.590549879969907e-06, "loss": 0.3721, "step": 5915 }, { "epoch": 0.2677528852681602, "grad_norm": 0.6528557835064469, "learning_rate": 8.590039779937019e-06, "loss": 0.3939, "step": 5916 }, { "epoch": 0.2677981443765558, "grad_norm": 0.6577332862162955, "learning_rate": 8.5895296027652e-06, "loss": 0.3531, "step": 5917 }, { "epoch": 0.26784340348495134, "grad_norm": 0.6707199520170976, "learning_rate": 8.589019348465416e-06, "loss": 0.377, "step": 5918 }, { "epoch": 0.2678886625933469, "grad_norm": 0.6661069685645835, "learning_rate": 8.588509017048629e-06, "loss": 0.385, "step": 5919 }, { "epoch": 0.26793392170174246, "grad_norm": 0.656977906811802, "learning_rate": 8.587998608525806e-06, "loss": 0.3797, "step": 5920 }, { "epoch": 0.26797918081013805, "grad_norm": 0.6457510799865884, "learning_rate": 8.58748812290791e-06, "loss": 0.3865, "step": 5921 }, { "epoch": 0.2680244399185336, "grad_norm": 0.6716714434913296, "learning_rate": 8.586977560205914e-06, "loss": 0.3889, "step": 5922 }, { "epoch": 0.2680696990269292, "grad_norm": 0.6985273498781074, "learning_rate": 8.586466920430785e-06, "loss": 0.4133, "step": 5923 }, { "epoch": 0.2681149581353247, "grad_norm": 0.6292758691095204, "learning_rate": 8.585956203593497e-06, "loss": 0.3627, "step": 5924 }, { "epoch": 0.2681602172437203, "grad_norm": 0.5394491971196063, "learning_rate": 8.585445409705026e-06, "loss": 0.5169, "step": 5925 }, { "epoch": 0.2682054763521159, "grad_norm": 0.6232604641935396, "learning_rate": 8.584934538776342e-06, "loss": 0.3383, "step": 5926 }, { "epoch": 0.2682507354605114, "grad_norm": 0.6635149021073501, "learning_rate": 8.584423590818427e-06, "loss": 0.3532, "step": 5927 }, { "epoch": 0.268295994568907, "grad_norm": 0.9092934282618736, "learning_rate": 8.583912565842258e-06, "loss": 0.3869, "step": 5928 }, { "epoch": 0.26834125367730255, "grad_norm": 0.6085165446418936, "learning_rate": 8.583401463858814e-06, "loss": 0.3775, "step": 5929 }, { "epoch": 0.26838651278569814, "grad_norm": 0.6810081434074116, "learning_rate": 8.582890284879077e-06, "loss": 0.3621, "step": 5930 }, { "epoch": 0.2684317718940937, "grad_norm": 0.6595823794825919, "learning_rate": 8.582379028914034e-06, "loss": 0.4089, "step": 5931 }, { "epoch": 0.26847703100248926, "grad_norm": 0.7243005246090832, "learning_rate": 8.581867695974667e-06, "loss": 0.4088, "step": 5932 }, { "epoch": 0.2685222901108848, "grad_norm": 0.6679784436223112, "learning_rate": 8.581356286071964e-06, "loss": 0.3949, "step": 5933 }, { "epoch": 0.2685675492192804, "grad_norm": 0.623973476384692, "learning_rate": 8.580844799216914e-06, "loss": 0.375, "step": 5934 }, { "epoch": 0.2686128083276759, "grad_norm": 0.7071319896024497, "learning_rate": 8.580333235420509e-06, "loss": 0.3691, "step": 5935 }, { "epoch": 0.2686580674360715, "grad_norm": 0.6381944332927595, "learning_rate": 8.579821594693736e-06, "loss": 0.345, "step": 5936 }, { "epoch": 0.26870332654446705, "grad_norm": 0.5723820412634509, "learning_rate": 8.579309877047593e-06, "loss": 0.5103, "step": 5937 }, { "epoch": 0.26874858565286264, "grad_norm": 0.6204819925409865, "learning_rate": 8.578798082493074e-06, "loss": 0.3309, "step": 5938 }, { "epoch": 0.2687938447612582, "grad_norm": 0.7162615258973855, "learning_rate": 8.578286211041173e-06, "loss": 0.3869, "step": 5939 }, { "epoch": 0.26883910386965376, "grad_norm": 0.6812068319674739, "learning_rate": 8.577774262702894e-06, "loss": 0.3934, "step": 5940 }, { "epoch": 0.26888436297804935, "grad_norm": 0.8801846873382326, "learning_rate": 8.577262237489234e-06, "loss": 0.4178, "step": 5941 }, { "epoch": 0.2689296220864449, "grad_norm": 0.7154494503050958, "learning_rate": 8.576750135411194e-06, "loss": 0.4163, "step": 5942 }, { "epoch": 0.2689748811948405, "grad_norm": 0.41689872782674725, "learning_rate": 8.57623795647978e-06, "loss": 0.5022, "step": 5943 }, { "epoch": 0.269020140303236, "grad_norm": 0.6760071606654183, "learning_rate": 8.575725700705995e-06, "loss": 0.3541, "step": 5944 }, { "epoch": 0.2690653994116316, "grad_norm": 0.5954749930016797, "learning_rate": 8.575213368100847e-06, "loss": 0.3645, "step": 5945 }, { "epoch": 0.26911065852002714, "grad_norm": 0.6367933833228567, "learning_rate": 8.574700958675345e-06, "loss": 0.333, "step": 5946 }, { "epoch": 0.2691559176284227, "grad_norm": 0.7950089422983774, "learning_rate": 8.574188472440497e-06, "loss": 0.3649, "step": 5947 }, { "epoch": 0.26920117673681826, "grad_norm": 0.7098041224453934, "learning_rate": 8.573675909407316e-06, "loss": 0.3815, "step": 5948 }, { "epoch": 0.26924643584521385, "grad_norm": 0.6194752583100062, "learning_rate": 8.573163269586818e-06, "loss": 0.3499, "step": 5949 }, { "epoch": 0.26929169495360944, "grad_norm": 0.613211866231783, "learning_rate": 8.572650552990012e-06, "loss": 0.3677, "step": 5950 }, { "epoch": 0.269336954062005, "grad_norm": 0.7044084698902522, "learning_rate": 8.572137759627919e-06, "loss": 0.3674, "step": 5951 }, { "epoch": 0.26938221317040056, "grad_norm": 0.6962715657146951, "learning_rate": 8.571624889511558e-06, "loss": 0.3938, "step": 5952 }, { "epoch": 0.2694274722787961, "grad_norm": 0.637278621803258, "learning_rate": 8.571111942651945e-06, "loss": 0.4078, "step": 5953 }, { "epoch": 0.2694727313871917, "grad_norm": 0.6978075288266782, "learning_rate": 8.570598919060108e-06, "loss": 0.3697, "step": 5954 }, { "epoch": 0.2695179904955872, "grad_norm": 0.6322435094797664, "learning_rate": 8.570085818747063e-06, "loss": 0.3471, "step": 5955 }, { "epoch": 0.2695632496039828, "grad_norm": 0.6994153624100978, "learning_rate": 8.56957264172384e-06, "loss": 0.3857, "step": 5956 }, { "epoch": 0.26960850871237835, "grad_norm": 0.6860845640803551, "learning_rate": 8.569059388001463e-06, "loss": 0.3976, "step": 5957 }, { "epoch": 0.26965376782077394, "grad_norm": 0.776993539496546, "learning_rate": 8.568546057590963e-06, "loss": 0.3963, "step": 5958 }, { "epoch": 0.26969902692916947, "grad_norm": 0.41075113977310934, "learning_rate": 8.568032650503366e-06, "loss": 0.5327, "step": 5959 }, { "epoch": 0.26974428603756506, "grad_norm": 0.6551096579982019, "learning_rate": 8.567519166749707e-06, "loss": 0.3683, "step": 5960 }, { "epoch": 0.26978954514596065, "grad_norm": 0.9991521072753345, "learning_rate": 8.567005606341019e-06, "loss": 0.3517, "step": 5961 }, { "epoch": 0.2698348042543562, "grad_norm": 0.6873912875023508, "learning_rate": 8.566491969288333e-06, "loss": 0.4392, "step": 5962 }, { "epoch": 0.2698800633627518, "grad_norm": 0.6267614200958163, "learning_rate": 8.565978255602692e-06, "loss": 0.3718, "step": 5963 }, { "epoch": 0.2699253224711473, "grad_norm": 0.755594663681423, "learning_rate": 8.565464465295128e-06, "loss": 0.442, "step": 5964 }, { "epoch": 0.2699705815795429, "grad_norm": 0.6180654968091859, "learning_rate": 8.564950598376683e-06, "loss": 0.3679, "step": 5965 }, { "epoch": 0.27001584068793844, "grad_norm": 0.809800571361141, "learning_rate": 8.5644366548584e-06, "loss": 0.4397, "step": 5966 }, { "epoch": 0.270061099796334, "grad_norm": 0.6092547414226015, "learning_rate": 8.563922634751318e-06, "loss": 0.3701, "step": 5967 }, { "epoch": 0.27010635890472956, "grad_norm": 0.6731909504435205, "learning_rate": 8.563408538066486e-06, "loss": 0.392, "step": 5968 }, { "epoch": 0.27015161801312515, "grad_norm": 0.6775432593263921, "learning_rate": 8.562894364814948e-06, "loss": 0.3732, "step": 5969 }, { "epoch": 0.2701968771215207, "grad_norm": 0.6727938342148715, "learning_rate": 8.562380115007753e-06, "loss": 0.376, "step": 5970 }, { "epoch": 0.2702421362299163, "grad_norm": 0.6533268203426031, "learning_rate": 8.561865788655951e-06, "loss": 0.3576, "step": 5971 }, { "epoch": 0.2702873953383118, "grad_norm": 0.6259228660706317, "learning_rate": 8.561351385770592e-06, "loss": 0.34, "step": 5972 }, { "epoch": 0.2703326544467074, "grad_norm": 0.7074757571539375, "learning_rate": 8.560836906362731e-06, "loss": 0.3913, "step": 5973 }, { "epoch": 0.270377913555103, "grad_norm": 0.7318314444482122, "learning_rate": 8.56032235044342e-06, "loss": 0.3668, "step": 5974 }, { "epoch": 0.2704231726634985, "grad_norm": 0.6757028464267878, "learning_rate": 8.559807718023715e-06, "loss": 0.3639, "step": 5975 }, { "epoch": 0.2704684317718941, "grad_norm": 0.6430596171748171, "learning_rate": 8.559293009114678e-06, "loss": 0.4166, "step": 5976 }, { "epoch": 0.27051369088028965, "grad_norm": 0.6852484751594174, "learning_rate": 8.558778223727363e-06, "loss": 0.372, "step": 5977 }, { "epoch": 0.27055894998868524, "grad_norm": 0.6088024575326358, "learning_rate": 8.558263361872836e-06, "loss": 0.4165, "step": 5978 }, { "epoch": 0.2706042090970808, "grad_norm": 0.6452025606498345, "learning_rate": 8.557748423562157e-06, "loss": 0.3354, "step": 5979 }, { "epoch": 0.27064946820547636, "grad_norm": 0.4629092068761617, "learning_rate": 8.55723340880639e-06, "loss": 0.4937, "step": 5980 }, { "epoch": 0.2706947273138719, "grad_norm": 0.650889937361659, "learning_rate": 8.556718317616603e-06, "loss": 0.364, "step": 5981 }, { "epoch": 0.2707399864222675, "grad_norm": 0.5910922166754542, "learning_rate": 8.556203150003863e-06, "loss": 0.3798, "step": 5982 }, { "epoch": 0.270785245530663, "grad_norm": 0.6773198923876816, "learning_rate": 8.55568790597924e-06, "loss": 0.4274, "step": 5983 }, { "epoch": 0.2708305046390586, "grad_norm": 0.6389749204912827, "learning_rate": 8.555172585553804e-06, "loss": 0.3604, "step": 5984 }, { "epoch": 0.2708757637474542, "grad_norm": 0.6088735184267581, "learning_rate": 8.55465718873863e-06, "loss": 0.4338, "step": 5985 }, { "epoch": 0.27092102285584974, "grad_norm": 0.6186470148336451, "learning_rate": 8.554141715544788e-06, "loss": 0.3711, "step": 5986 }, { "epoch": 0.2709662819642453, "grad_norm": 0.6727668532608004, "learning_rate": 8.553626165983355e-06, "loss": 0.3897, "step": 5987 }, { "epoch": 0.27101154107264086, "grad_norm": 0.616134976164528, "learning_rate": 8.553110540065412e-06, "loss": 0.3587, "step": 5988 }, { "epoch": 0.27105680018103645, "grad_norm": 0.42615089223040237, "learning_rate": 8.552594837802035e-06, "loss": 0.476, "step": 5989 }, { "epoch": 0.271102059289432, "grad_norm": 0.6261527251182383, "learning_rate": 8.552079059204306e-06, "loss": 0.3496, "step": 5990 }, { "epoch": 0.2711473183978276, "grad_norm": 1.1080483705521476, "learning_rate": 8.551563204283308e-06, "loss": 0.3852, "step": 5991 }, { "epoch": 0.2711925775062231, "grad_norm": 0.33901070493735247, "learning_rate": 8.551047273050126e-06, "loss": 0.4988, "step": 5992 }, { "epoch": 0.2712378366146187, "grad_norm": 0.6859078191706226, "learning_rate": 8.550531265515842e-06, "loss": 0.3912, "step": 5993 }, { "epoch": 0.27128309572301423, "grad_norm": 0.673173504588645, "learning_rate": 8.550015181691546e-06, "loss": 0.3891, "step": 5994 }, { "epoch": 0.2713283548314098, "grad_norm": 0.3069681794950536, "learning_rate": 8.549499021588328e-06, "loss": 0.4939, "step": 5995 }, { "epoch": 0.27137361393980536, "grad_norm": 0.6517678642102981, "learning_rate": 8.548982785217277e-06, "loss": 0.3418, "step": 5996 }, { "epoch": 0.27141887304820095, "grad_norm": 0.3253512792732969, "learning_rate": 8.548466472589485e-06, "loss": 0.5133, "step": 5997 }, { "epoch": 0.27146413215659654, "grad_norm": 0.6915729271927266, "learning_rate": 8.547950083716047e-06, "loss": 0.3375, "step": 5998 }, { "epoch": 0.2715093912649921, "grad_norm": 0.6576267492188453, "learning_rate": 8.547433618608059e-06, "loss": 0.3902, "step": 5999 }, { "epoch": 0.27155465037338766, "grad_norm": 0.6279246314213415, "learning_rate": 8.546917077276618e-06, "loss": 0.3917, "step": 6000 }, { "epoch": 0.2715999094817832, "grad_norm": 0.6221555956642787, "learning_rate": 8.54640045973282e-06, "loss": 0.401, "step": 6001 }, { "epoch": 0.2716451685901788, "grad_norm": 0.6306321739741833, "learning_rate": 8.54588376598777e-06, "loss": 0.3645, "step": 6002 }, { "epoch": 0.2716904276985743, "grad_norm": 0.6262000288703357, "learning_rate": 8.545366996052568e-06, "loss": 0.3908, "step": 6003 }, { "epoch": 0.2717356868069699, "grad_norm": 0.666632379653395, "learning_rate": 8.54485014993832e-06, "loss": 0.4311, "step": 6004 }, { "epoch": 0.27178094591536545, "grad_norm": 0.6618339991231594, "learning_rate": 8.544333227656126e-06, "loss": 0.3649, "step": 6005 }, { "epoch": 0.27182620502376104, "grad_norm": 0.6665292193157383, "learning_rate": 8.543816229217099e-06, "loss": 0.362, "step": 6006 }, { "epoch": 0.27187146413215657, "grad_norm": 0.6743160090461385, "learning_rate": 8.543299154632343e-06, "loss": 0.4236, "step": 6007 }, { "epoch": 0.27191672324055216, "grad_norm": 0.6499545296448126, "learning_rate": 8.542782003912973e-06, "loss": 0.3738, "step": 6008 }, { "epoch": 0.27196198234894775, "grad_norm": 0.6244358131955613, "learning_rate": 8.542264777070097e-06, "loss": 0.3653, "step": 6009 }, { "epoch": 0.2720072414573433, "grad_norm": 0.6584235185675267, "learning_rate": 8.54174747411483e-06, "loss": 0.3828, "step": 6010 }, { "epoch": 0.2720525005657389, "grad_norm": 0.6079854139604739, "learning_rate": 8.541230095058289e-06, "loss": 0.3765, "step": 6011 }, { "epoch": 0.2720977596741344, "grad_norm": 0.6601104823424143, "learning_rate": 8.540712639911588e-06, "loss": 0.3562, "step": 6012 }, { "epoch": 0.27214301878253, "grad_norm": 0.6408304883319524, "learning_rate": 8.540195108685846e-06, "loss": 0.3878, "step": 6013 }, { "epoch": 0.27218827789092553, "grad_norm": 0.6128296670508544, "learning_rate": 8.539677501392187e-06, "loss": 0.3665, "step": 6014 }, { "epoch": 0.2722335369993211, "grad_norm": 0.6746159565674552, "learning_rate": 8.539159818041727e-06, "loss": 0.4219, "step": 6015 }, { "epoch": 0.27227879610771666, "grad_norm": 0.6738103055188247, "learning_rate": 8.538642058645595e-06, "loss": 0.3991, "step": 6016 }, { "epoch": 0.27232405521611225, "grad_norm": 0.5836860257061833, "learning_rate": 8.538124223214909e-06, "loss": 0.3372, "step": 6017 }, { "epoch": 0.2723693143245078, "grad_norm": 0.49335442087411124, "learning_rate": 8.537606311760804e-06, "loss": 0.4964, "step": 6018 }, { "epoch": 0.2724145734329034, "grad_norm": 0.6693697621695167, "learning_rate": 8.537088324294403e-06, "loss": 0.3229, "step": 6019 }, { "epoch": 0.27245983254129896, "grad_norm": 0.6527074234229424, "learning_rate": 8.536570260826837e-06, "loss": 0.371, "step": 6020 }, { "epoch": 0.2725050916496945, "grad_norm": 0.6517147449836838, "learning_rate": 8.536052121369238e-06, "loss": 0.3695, "step": 6021 }, { "epoch": 0.2725503507580901, "grad_norm": 0.7243834845664229, "learning_rate": 8.535533905932739e-06, "loss": 0.381, "step": 6022 }, { "epoch": 0.2725956098664856, "grad_norm": 0.651690902641303, "learning_rate": 8.535015614528475e-06, "loss": 0.3708, "step": 6023 }, { "epoch": 0.2726408689748812, "grad_norm": 0.6353993471501393, "learning_rate": 8.534497247167581e-06, "loss": 0.3404, "step": 6024 }, { "epoch": 0.27268612808327675, "grad_norm": 0.6883967390953978, "learning_rate": 8.533978803861199e-06, "loss": 0.4128, "step": 6025 }, { "epoch": 0.27273138719167234, "grad_norm": 0.7210181559318217, "learning_rate": 8.533460284620464e-06, "loss": 0.3335, "step": 6026 }, { "epoch": 0.27277664630006787, "grad_norm": 0.6455031954425401, "learning_rate": 8.532941689456521e-06, "loss": 0.3843, "step": 6027 }, { "epoch": 0.27282190540846346, "grad_norm": 0.6338284754087283, "learning_rate": 8.532423018380511e-06, "loss": 0.3658, "step": 6028 }, { "epoch": 0.272867164516859, "grad_norm": 0.5224069999647922, "learning_rate": 8.53190427140358e-06, "loss": 0.5052, "step": 6029 }, { "epoch": 0.2729124236252546, "grad_norm": 0.6258685222511478, "learning_rate": 8.531385448536875e-06, "loss": 0.347, "step": 6030 }, { "epoch": 0.2729576827336501, "grad_norm": 0.6682531378852933, "learning_rate": 8.53086654979154e-06, "loss": 0.4118, "step": 6031 }, { "epoch": 0.2730029418420457, "grad_norm": 0.6144327014064551, "learning_rate": 8.530347575178728e-06, "loss": 0.3551, "step": 6032 }, { "epoch": 0.2730482009504413, "grad_norm": 0.6289075987124088, "learning_rate": 8.52982852470959e-06, "loss": 0.3418, "step": 6033 }, { "epoch": 0.27309346005883683, "grad_norm": 0.645673471979479, "learning_rate": 8.529309398395275e-06, "loss": 0.3926, "step": 6034 }, { "epoch": 0.2731387191672324, "grad_norm": 0.690713840540433, "learning_rate": 8.528790196246944e-06, "loss": 0.4096, "step": 6035 }, { "epoch": 0.27318397827562796, "grad_norm": 0.48055901693607467, "learning_rate": 8.528270918275749e-06, "loss": 0.4982, "step": 6036 }, { "epoch": 0.27322923738402355, "grad_norm": 0.6483839043555008, "learning_rate": 8.527751564492847e-06, "loss": 0.3755, "step": 6037 }, { "epoch": 0.2732744964924191, "grad_norm": 0.33294770241593863, "learning_rate": 8.527232134909398e-06, "loss": 0.4978, "step": 6038 }, { "epoch": 0.2733197556008147, "grad_norm": 0.6955806026366336, "learning_rate": 8.526712629536566e-06, "loss": 0.3873, "step": 6039 }, { "epoch": 0.2733650147092102, "grad_norm": 0.7046916383301215, "learning_rate": 8.52619304838551e-06, "loss": 0.3398, "step": 6040 }, { "epoch": 0.2734102738176058, "grad_norm": 0.6306151655838789, "learning_rate": 8.525673391467395e-06, "loss": 0.3614, "step": 6041 }, { "epoch": 0.27345553292600133, "grad_norm": 0.6994791751162374, "learning_rate": 8.525153658793386e-06, "loss": 0.4099, "step": 6042 }, { "epoch": 0.2735007920343969, "grad_norm": 0.6159324117192329, "learning_rate": 8.524633850374653e-06, "loss": 0.3621, "step": 6043 }, { "epoch": 0.2735460511427925, "grad_norm": 0.6079616116262638, "learning_rate": 8.524113966222363e-06, "loss": 0.3456, "step": 6044 }, { "epoch": 0.27359131025118805, "grad_norm": 0.6898106765207705, "learning_rate": 8.523594006347686e-06, "loss": 0.4221, "step": 6045 }, { "epoch": 0.27363656935958364, "grad_norm": 0.5991907495206867, "learning_rate": 8.523073970761799e-06, "loss": 0.3603, "step": 6046 }, { "epoch": 0.27368182846797917, "grad_norm": 0.6934617136625089, "learning_rate": 8.52255385947587e-06, "loss": 0.4052, "step": 6047 }, { "epoch": 0.27372708757637476, "grad_norm": 0.6112395249078179, "learning_rate": 8.52203367250108e-06, "loss": 0.372, "step": 6048 }, { "epoch": 0.2737723466847703, "grad_norm": 0.6502628865263396, "learning_rate": 8.521513409848601e-06, "loss": 0.3435, "step": 6049 }, { "epoch": 0.2738176057931659, "grad_norm": 0.6236769646363846, "learning_rate": 8.520993071529614e-06, "loss": 0.4928, "step": 6050 }, { "epoch": 0.2738628649015614, "grad_norm": 0.6919958170062448, "learning_rate": 8.520472657555301e-06, "loss": 0.3707, "step": 6051 }, { "epoch": 0.273908124009957, "grad_norm": 0.34575276990715864, "learning_rate": 8.519952167936842e-06, "loss": 0.4819, "step": 6052 }, { "epoch": 0.27395338311835254, "grad_norm": 0.6692288837623679, "learning_rate": 8.519431602685423e-06, "loss": 0.3679, "step": 6053 }, { "epoch": 0.27399864222674813, "grad_norm": 0.6513651971217073, "learning_rate": 8.518910961812229e-06, "loss": 0.3517, "step": 6054 }, { "epoch": 0.2740439013351437, "grad_norm": 0.6701945415254389, "learning_rate": 8.518390245328444e-06, "loss": 0.3163, "step": 6055 }, { "epoch": 0.27408916044353926, "grad_norm": 0.6611267699545874, "learning_rate": 8.517869453245257e-06, "loss": 0.3771, "step": 6056 }, { "epoch": 0.27413441955193485, "grad_norm": 0.6839169716811373, "learning_rate": 8.517348585573862e-06, "loss": 0.3549, "step": 6057 }, { "epoch": 0.2741796786603304, "grad_norm": 0.6835259767360924, "learning_rate": 8.516827642325447e-06, "loss": 0.3923, "step": 6058 }, { "epoch": 0.274224937768726, "grad_norm": 0.597517933827256, "learning_rate": 8.51630662351121e-06, "loss": 0.3652, "step": 6059 }, { "epoch": 0.2742701968771215, "grad_norm": 0.8775982858679074, "learning_rate": 8.515785529142339e-06, "loss": 0.5372, "step": 6060 }, { "epoch": 0.2743154559855171, "grad_norm": 0.686589275683136, "learning_rate": 8.515264359230038e-06, "loss": 0.3742, "step": 6061 }, { "epoch": 0.27436071509391263, "grad_norm": 0.647086052466487, "learning_rate": 8.514743113785501e-06, "loss": 0.3295, "step": 6062 }, { "epoch": 0.2744059742023082, "grad_norm": 0.640801493246443, "learning_rate": 8.51422179281993e-06, "loss": 0.3412, "step": 6063 }, { "epoch": 0.27445123331070376, "grad_norm": 0.6425145743473898, "learning_rate": 8.513700396344527e-06, "loss": 0.3725, "step": 6064 }, { "epoch": 0.27449649241909935, "grad_norm": 0.7084102432303667, "learning_rate": 8.51317892437049e-06, "loss": 0.3484, "step": 6065 }, { "epoch": 0.2745417515274949, "grad_norm": 0.6634924837539934, "learning_rate": 8.512657376909031e-06, "loss": 0.3933, "step": 6066 }, { "epoch": 0.27458701063589047, "grad_norm": 0.6744386287510311, "learning_rate": 8.512135753971353e-06, "loss": 0.3514, "step": 6067 }, { "epoch": 0.27463226974428606, "grad_norm": 0.6249090418967712, "learning_rate": 8.511614055568665e-06, "loss": 0.3501, "step": 6068 }, { "epoch": 0.2746775288526816, "grad_norm": 0.6350782641412113, "learning_rate": 8.511092281712174e-06, "loss": 0.4163, "step": 6069 }, { "epoch": 0.2747227879610772, "grad_norm": 0.6298461195689672, "learning_rate": 8.510570432413095e-06, "loss": 0.3859, "step": 6070 }, { "epoch": 0.2747680470694727, "grad_norm": 0.6447723188705166, "learning_rate": 8.510048507682637e-06, "loss": 0.3542, "step": 6071 }, { "epoch": 0.2748133061778683, "grad_norm": 0.6436348002464336, "learning_rate": 8.50952650753202e-06, "loss": 0.3221, "step": 6072 }, { "epoch": 0.27485856528626385, "grad_norm": 0.6204655375985614, "learning_rate": 8.509004431972455e-06, "loss": 0.3524, "step": 6073 }, { "epoch": 0.27490382439465944, "grad_norm": 0.6687700098555398, "learning_rate": 8.508482281015163e-06, "loss": 0.3919, "step": 6074 }, { "epoch": 0.27494908350305497, "grad_norm": 0.6433898373052561, "learning_rate": 8.50796005467136e-06, "loss": 0.4072, "step": 6075 }, { "epoch": 0.27499434261145056, "grad_norm": 0.6452541013286588, "learning_rate": 8.507437752952271e-06, "loss": 0.4101, "step": 6076 }, { "epoch": 0.2750396017198461, "grad_norm": 0.6893631663895471, "learning_rate": 8.506915375869118e-06, "loss": 0.4915, "step": 6077 }, { "epoch": 0.2750848608282417, "grad_norm": 0.692999676441218, "learning_rate": 8.506392923433124e-06, "loss": 0.3923, "step": 6078 }, { "epoch": 0.2751301199366373, "grad_norm": 0.6724569502775599, "learning_rate": 8.505870395655512e-06, "loss": 0.4131, "step": 6079 }, { "epoch": 0.2751753790450328, "grad_norm": 0.6804157172735723, "learning_rate": 8.505347792547516e-06, "loss": 0.3892, "step": 6080 }, { "epoch": 0.2752206381534284, "grad_norm": 0.4057416708339337, "learning_rate": 8.504825114120361e-06, "loss": 0.5007, "step": 6081 }, { "epoch": 0.27526589726182393, "grad_norm": 0.7245855816415157, "learning_rate": 8.504302360385276e-06, "loss": 0.3408, "step": 6082 }, { "epoch": 0.2753111563702195, "grad_norm": 0.6641071295508348, "learning_rate": 8.5037795313535e-06, "loss": 0.3677, "step": 6083 }, { "epoch": 0.27535641547861506, "grad_norm": 0.6976861804856807, "learning_rate": 8.50325662703626e-06, "loss": 0.399, "step": 6084 }, { "epoch": 0.27540167458701065, "grad_norm": 0.662407202013515, "learning_rate": 8.502733647444796e-06, "loss": 0.3714, "step": 6085 }, { "epoch": 0.2754469336954062, "grad_norm": 0.5031182360900243, "learning_rate": 8.502210592590344e-06, "loss": 0.4847, "step": 6086 }, { "epoch": 0.27549219280380177, "grad_norm": 0.8223407065276434, "learning_rate": 8.501687462484141e-06, "loss": 0.3875, "step": 6087 }, { "epoch": 0.2755374519121973, "grad_norm": 0.7308641110498155, "learning_rate": 8.501164257137431e-06, "loss": 0.4031, "step": 6088 }, { "epoch": 0.2755827110205929, "grad_norm": 0.6205462016558165, "learning_rate": 8.500640976561453e-06, "loss": 0.3834, "step": 6089 }, { "epoch": 0.2756279701289885, "grad_norm": 0.6532663048504899, "learning_rate": 8.500117620767452e-06, "loss": 0.3513, "step": 6090 }, { "epoch": 0.275673229237384, "grad_norm": 0.6343766244654432, "learning_rate": 8.499594189766674e-06, "loss": 0.4097, "step": 6091 }, { "epoch": 0.2757184883457796, "grad_norm": 0.6452984743606108, "learning_rate": 8.499070683570363e-06, "loss": 0.4342, "step": 6092 }, { "epoch": 0.27576374745417515, "grad_norm": 0.670940591277106, "learning_rate": 8.49854710218977e-06, "loss": 0.401, "step": 6093 }, { "epoch": 0.27580900656257074, "grad_norm": 0.6861574396517808, "learning_rate": 8.498023445636145e-06, "loss": 0.367, "step": 6094 }, { "epoch": 0.27585426567096627, "grad_norm": 0.6537017546764119, "learning_rate": 8.49749971392074e-06, "loss": 0.3854, "step": 6095 }, { "epoch": 0.27589952477936186, "grad_norm": 0.6706788741382547, "learning_rate": 8.496975907054808e-06, "loss": 0.3728, "step": 6096 }, { "epoch": 0.2759447838877574, "grad_norm": 0.6577647196836957, "learning_rate": 8.496452025049605e-06, "loss": 0.386, "step": 6097 }, { "epoch": 0.275990042996153, "grad_norm": 0.687328696884683, "learning_rate": 8.495928067916383e-06, "loss": 0.3405, "step": 6098 }, { "epoch": 0.2760353021045485, "grad_norm": 0.6355701599063429, "learning_rate": 8.495404035666409e-06, "loss": 0.4077, "step": 6099 }, { "epoch": 0.2760805612129441, "grad_norm": 0.6651890064834014, "learning_rate": 8.494879928310934e-06, "loss": 0.3735, "step": 6100 }, { "epoch": 0.27612582032133964, "grad_norm": 0.6690293513244945, "learning_rate": 8.494355745861223e-06, "loss": 0.4075, "step": 6101 }, { "epoch": 0.27617107942973523, "grad_norm": 0.6397491482824402, "learning_rate": 8.49383148832854e-06, "loss": 0.3712, "step": 6102 }, { "epoch": 0.2762163385381308, "grad_norm": 0.6254108917060476, "learning_rate": 8.493307155724147e-06, "loss": 0.3548, "step": 6103 }, { "epoch": 0.27626159764652636, "grad_norm": 0.6253800648922403, "learning_rate": 8.492782748059314e-06, "loss": 0.3484, "step": 6104 }, { "epoch": 0.27630685675492195, "grad_norm": 1.127881443978114, "learning_rate": 8.492258265345307e-06, "loss": 0.3745, "step": 6105 }, { "epoch": 0.2763521158633175, "grad_norm": 0.645476286737807, "learning_rate": 8.491733707593395e-06, "loss": 0.3739, "step": 6106 }, { "epoch": 0.2763973749717131, "grad_norm": 0.6708347456649015, "learning_rate": 8.49120907481485e-06, "loss": 0.3677, "step": 6107 }, { "epoch": 0.2764426340801086, "grad_norm": 0.6620974851891143, "learning_rate": 8.490684367020944e-06, "loss": 0.3921, "step": 6108 }, { "epoch": 0.2764878931885042, "grad_norm": 0.6977561862853785, "learning_rate": 8.490159584222952e-06, "loss": 0.4095, "step": 6109 }, { "epoch": 0.27653315229689973, "grad_norm": 0.5846728205583535, "learning_rate": 8.48963472643215e-06, "loss": 0.3874, "step": 6110 }, { "epoch": 0.2765784114052953, "grad_norm": 0.6304765120857315, "learning_rate": 8.489109793659815e-06, "loss": 0.3824, "step": 6111 }, { "epoch": 0.27662367051369086, "grad_norm": 0.69646468269515, "learning_rate": 8.488584785917226e-06, "loss": 0.4003, "step": 6112 }, { "epoch": 0.27666892962208645, "grad_norm": 0.669757027053765, "learning_rate": 8.488059703215666e-06, "loss": 0.3358, "step": 6113 }, { "epoch": 0.27671418873048204, "grad_norm": 0.6960057329359819, "learning_rate": 8.487534545566414e-06, "loss": 0.3456, "step": 6114 }, { "epoch": 0.27675944783887757, "grad_norm": 0.6425349594788039, "learning_rate": 8.487009312980756e-06, "loss": 0.4085, "step": 6115 }, { "epoch": 0.27680470694727316, "grad_norm": 0.7200906547318653, "learning_rate": 8.486484005469977e-06, "loss": 0.386, "step": 6116 }, { "epoch": 0.2768499660556687, "grad_norm": 0.5778607950964567, "learning_rate": 8.485958623045365e-06, "loss": 0.3665, "step": 6117 }, { "epoch": 0.2768952251640643, "grad_norm": 0.6721297193179439, "learning_rate": 8.48543316571821e-06, "loss": 0.3692, "step": 6118 }, { "epoch": 0.2769404842724598, "grad_norm": 0.7550680623075885, "learning_rate": 8.484907633499798e-06, "loss": 0.3829, "step": 6119 }, { "epoch": 0.2769857433808554, "grad_norm": 0.6393433200351009, "learning_rate": 8.484382026401428e-06, "loss": 0.3975, "step": 6120 }, { "epoch": 0.27703100248925094, "grad_norm": 0.6772103434747855, "learning_rate": 8.483856344434388e-06, "loss": 0.3644, "step": 6121 }, { "epoch": 0.27707626159764653, "grad_norm": 0.6192545725413289, "learning_rate": 8.483330587609975e-06, "loss": 0.3698, "step": 6122 }, { "epoch": 0.27712152070604207, "grad_norm": 0.6638295813572664, "learning_rate": 8.482804755939484e-06, "loss": 0.3637, "step": 6123 }, { "epoch": 0.27716677981443766, "grad_norm": 0.6374373395694628, "learning_rate": 8.482278849434218e-06, "loss": 0.4055, "step": 6124 }, { "epoch": 0.2772120389228332, "grad_norm": 0.7399956950117406, "learning_rate": 8.481752868105473e-06, "loss": 0.3655, "step": 6125 }, { "epoch": 0.2772572980312288, "grad_norm": 0.6252133276431657, "learning_rate": 8.481226811964552e-06, "loss": 0.367, "step": 6126 }, { "epoch": 0.2773025571396244, "grad_norm": 0.6040986055476337, "learning_rate": 8.48070068102276e-06, "loss": 0.399, "step": 6127 }, { "epoch": 0.2773478162480199, "grad_norm": 0.6680655356074402, "learning_rate": 8.480174475291401e-06, "loss": 0.386, "step": 6128 }, { "epoch": 0.2773930753564155, "grad_norm": 0.6679662578379792, "learning_rate": 8.47964819478178e-06, "loss": 0.3413, "step": 6129 }, { "epoch": 0.27743833446481103, "grad_norm": 0.6402503560072257, "learning_rate": 8.479121839505205e-06, "loss": 0.4141, "step": 6130 }, { "epoch": 0.2774835935732066, "grad_norm": 0.6748241095133092, "learning_rate": 8.478595409472988e-06, "loss": 0.363, "step": 6131 }, { "epoch": 0.27752885268160216, "grad_norm": 0.735162342819606, "learning_rate": 8.47806890469644e-06, "loss": 0.4045, "step": 6132 }, { "epoch": 0.27757411178999775, "grad_norm": 0.6468589030966592, "learning_rate": 8.477542325186873e-06, "loss": 0.3892, "step": 6133 }, { "epoch": 0.2776193708983933, "grad_norm": 0.6205833487739102, "learning_rate": 8.4770156709556e-06, "loss": 0.3388, "step": 6134 }, { "epoch": 0.27766463000678887, "grad_norm": 0.6116203070506987, "learning_rate": 8.476488942013941e-06, "loss": 0.3953, "step": 6135 }, { "epoch": 0.2777098891151844, "grad_norm": 0.41637885767207033, "learning_rate": 8.475962138373212e-06, "loss": 0.4961, "step": 6136 }, { "epoch": 0.27775514822358, "grad_norm": 0.6635986927809545, "learning_rate": 8.475435260044732e-06, "loss": 0.3943, "step": 6137 }, { "epoch": 0.2778004073319756, "grad_norm": 0.6525112217425668, "learning_rate": 8.474908307039822e-06, "loss": 0.3992, "step": 6138 }, { "epoch": 0.2778456664403711, "grad_norm": 0.6005514128270779, "learning_rate": 8.474381279369804e-06, "loss": 0.3926, "step": 6139 }, { "epoch": 0.2778909255487667, "grad_norm": 0.30909771445739137, "learning_rate": 8.473854177046004e-06, "loss": 0.479, "step": 6140 }, { "epoch": 0.27793618465716224, "grad_norm": 0.6494619158079863, "learning_rate": 8.473327000079748e-06, "loss": 0.3605, "step": 6141 }, { "epoch": 0.27798144376555783, "grad_norm": 0.3128065828485614, "learning_rate": 8.472799748482361e-06, "loss": 0.4826, "step": 6142 }, { "epoch": 0.27802670287395337, "grad_norm": 0.28853261982474304, "learning_rate": 8.472272422265172e-06, "loss": 0.4897, "step": 6143 }, { "epoch": 0.27807196198234896, "grad_norm": 0.6229091371671683, "learning_rate": 8.471745021439516e-06, "loss": 0.3932, "step": 6144 }, { "epoch": 0.2781172210907445, "grad_norm": 0.6575726509092661, "learning_rate": 8.47121754601672e-06, "loss": 0.4207, "step": 6145 }, { "epoch": 0.2781624801991401, "grad_norm": 0.7373976535519031, "learning_rate": 8.47068999600812e-06, "loss": 0.3768, "step": 6146 }, { "epoch": 0.2782077393075356, "grad_norm": 0.3628628811080446, "learning_rate": 8.470162371425052e-06, "loss": 0.4912, "step": 6147 }, { "epoch": 0.2782529984159312, "grad_norm": 0.6637909505966786, "learning_rate": 8.469634672278853e-06, "loss": 0.3541, "step": 6148 }, { "epoch": 0.2782982575243268, "grad_norm": 0.311668781451325, "learning_rate": 8.46910689858086e-06, "loss": 0.4806, "step": 6149 }, { "epoch": 0.27834351663272233, "grad_norm": 0.683883187993135, "learning_rate": 8.468579050342414e-06, "loss": 0.3922, "step": 6150 }, { "epoch": 0.2783887757411179, "grad_norm": 0.6586497915069213, "learning_rate": 8.468051127574858e-06, "loss": 0.4114, "step": 6151 }, { "epoch": 0.27843403484951346, "grad_norm": 0.6524905680374529, "learning_rate": 8.467523130289535e-06, "loss": 0.383, "step": 6152 }, { "epoch": 0.27847929395790905, "grad_norm": 0.6940460177411515, "learning_rate": 8.466995058497788e-06, "loss": 0.345, "step": 6153 }, { "epoch": 0.2785245530663046, "grad_norm": 0.4365224565945977, "learning_rate": 8.466466912210967e-06, "loss": 0.47, "step": 6154 }, { "epoch": 0.27856981217470017, "grad_norm": 0.38480078277144353, "learning_rate": 8.465938691440417e-06, "loss": 0.5013, "step": 6155 }, { "epoch": 0.2786150712830957, "grad_norm": 0.6727143832648759, "learning_rate": 8.46541039619749e-06, "loss": 0.3663, "step": 6156 }, { "epoch": 0.2786603303914913, "grad_norm": 0.3096778956946426, "learning_rate": 8.464882026493537e-06, "loss": 0.4784, "step": 6157 }, { "epoch": 0.27870558949988683, "grad_norm": 0.6339465451087098, "learning_rate": 8.464353582339911e-06, "loss": 0.3632, "step": 6158 }, { "epoch": 0.2787508486082824, "grad_norm": 0.3841815298069502, "learning_rate": 8.463825063747966e-06, "loss": 0.4866, "step": 6159 }, { "epoch": 0.27879610771667795, "grad_norm": 0.6775125423465649, "learning_rate": 8.463296470729058e-06, "loss": 0.3913, "step": 6160 }, { "epoch": 0.27884136682507354, "grad_norm": 0.35137476460460587, "learning_rate": 8.462767803294547e-06, "loss": 0.475, "step": 6161 }, { "epoch": 0.27888662593346913, "grad_norm": 0.6326437997259854, "learning_rate": 8.462239061455791e-06, "loss": 0.3446, "step": 6162 }, { "epoch": 0.27893188504186467, "grad_norm": 0.2746024871363939, "learning_rate": 8.461710245224149e-06, "loss": 0.4814, "step": 6163 }, { "epoch": 0.27897714415026026, "grad_norm": 0.6396593577582315, "learning_rate": 8.461181354610988e-06, "loss": 0.3749, "step": 6164 }, { "epoch": 0.2790224032586558, "grad_norm": 0.6563268547409656, "learning_rate": 8.460652389627668e-06, "loss": 0.4009, "step": 6165 }, { "epoch": 0.2790676623670514, "grad_norm": 0.4048207096311486, "learning_rate": 8.46012335028556e-06, "loss": 0.5349, "step": 6166 }, { "epoch": 0.2791129214754469, "grad_norm": 0.6814293888375414, "learning_rate": 8.459594236596024e-06, "loss": 0.3856, "step": 6167 }, { "epoch": 0.2791581805838425, "grad_norm": 0.7480971357179941, "learning_rate": 8.459065048570434e-06, "loss": 0.4051, "step": 6168 }, { "epoch": 0.27920343969223804, "grad_norm": 0.6274966551812439, "learning_rate": 8.45853578622016e-06, "loss": 0.3512, "step": 6169 }, { "epoch": 0.27924869880063363, "grad_norm": 0.6308842647074984, "learning_rate": 8.458006449556576e-06, "loss": 0.3576, "step": 6170 }, { "epoch": 0.27929395790902917, "grad_norm": 0.32163095555358034, "learning_rate": 8.457477038591054e-06, "loss": 0.4928, "step": 6171 }, { "epoch": 0.27933921701742476, "grad_norm": 0.6627694350400074, "learning_rate": 8.456947553334966e-06, "loss": 0.3557, "step": 6172 }, { "epoch": 0.27938447612582035, "grad_norm": 0.6565527403152172, "learning_rate": 8.456417993799695e-06, "loss": 0.4173, "step": 6173 }, { "epoch": 0.2794297352342159, "grad_norm": 0.2907819307107183, "learning_rate": 8.455888359996616e-06, "loss": 0.5071, "step": 6174 }, { "epoch": 0.27947499434261147, "grad_norm": 0.8058453445617328, "learning_rate": 8.455358651937111e-06, "loss": 0.3688, "step": 6175 }, { "epoch": 0.279520253451007, "grad_norm": 0.6762267533514817, "learning_rate": 8.45482886963256e-06, "loss": 0.3641, "step": 6176 }, { "epoch": 0.2795655125594026, "grad_norm": 0.6706516377549697, "learning_rate": 8.454299013094347e-06, "loss": 0.3531, "step": 6177 }, { "epoch": 0.27961077166779813, "grad_norm": 0.6571031966771673, "learning_rate": 8.453769082333858e-06, "loss": 0.3657, "step": 6178 }, { "epoch": 0.2796560307761937, "grad_norm": 0.682765081302516, "learning_rate": 8.453239077362478e-06, "loss": 0.3127, "step": 6179 }, { "epoch": 0.27970128988458925, "grad_norm": 0.3404651269271434, "learning_rate": 8.452708998191597e-06, "loss": 0.4783, "step": 6180 }, { "epoch": 0.27974654899298484, "grad_norm": 0.6757919228891912, "learning_rate": 8.452178844832603e-06, "loss": 0.3785, "step": 6181 }, { "epoch": 0.2797918081013804, "grad_norm": 0.8022516686986559, "learning_rate": 8.451648617296889e-06, "loss": 0.3378, "step": 6182 }, { "epoch": 0.27983706720977597, "grad_norm": 0.317920038886496, "learning_rate": 8.451118315595847e-06, "loss": 0.489, "step": 6183 }, { "epoch": 0.27988232631817156, "grad_norm": 0.6975404891441597, "learning_rate": 8.45058793974087e-06, "loss": 0.4417, "step": 6184 }, { "epoch": 0.2799275854265671, "grad_norm": 0.69970890978507, "learning_rate": 8.450057489743359e-06, "loss": 0.4497, "step": 6185 }, { "epoch": 0.2799728445349627, "grad_norm": 0.7103773384893085, "learning_rate": 8.449526965614708e-06, "loss": 0.3826, "step": 6186 }, { "epoch": 0.2800181036433582, "grad_norm": 0.6669420693822625, "learning_rate": 8.448996367366313e-06, "loss": 0.3461, "step": 6187 }, { "epoch": 0.2800633627517538, "grad_norm": 0.7245749365890447, "learning_rate": 8.448465695009583e-06, "loss": 0.3839, "step": 6188 }, { "epoch": 0.28010862186014934, "grad_norm": 0.6974101733284066, "learning_rate": 8.447934948555915e-06, "loss": 0.3906, "step": 6189 }, { "epoch": 0.28015388096854493, "grad_norm": 0.6700526606181927, "learning_rate": 8.447404128016715e-06, "loss": 0.338, "step": 6190 }, { "epoch": 0.28019914007694047, "grad_norm": 0.6485693661815121, "learning_rate": 8.446873233403388e-06, "loss": 0.4185, "step": 6191 }, { "epoch": 0.28024439918533606, "grad_norm": 0.6388113321491804, "learning_rate": 8.446342264727341e-06, "loss": 0.3867, "step": 6192 }, { "epoch": 0.2802896582937316, "grad_norm": 0.6613139018209925, "learning_rate": 8.445811221999983e-06, "loss": 0.4226, "step": 6193 }, { "epoch": 0.2803349174021272, "grad_norm": 0.514249841919849, "learning_rate": 8.445280105232724e-06, "loss": 0.4739, "step": 6194 }, { "epoch": 0.2803801765105227, "grad_norm": 0.6705199570351592, "learning_rate": 8.44474891443698e-06, "loss": 0.3916, "step": 6195 }, { "epoch": 0.2804254356189183, "grad_norm": 0.6287426122637085, "learning_rate": 8.44421764962416e-06, "loss": 0.3719, "step": 6196 }, { "epoch": 0.2804706947273139, "grad_norm": 0.6513169442120156, "learning_rate": 8.443686310805679e-06, "loss": 0.3873, "step": 6197 }, { "epoch": 0.28051595383570943, "grad_norm": 0.6885897435735384, "learning_rate": 8.443154897992958e-06, "loss": 0.3384, "step": 6198 }, { "epoch": 0.280561212944105, "grad_norm": 0.6483482609359074, "learning_rate": 8.442623411197412e-06, "loss": 0.413, "step": 6199 }, { "epoch": 0.28060647205250056, "grad_norm": 0.63645573302067, "learning_rate": 8.442091850430463e-06, "loss": 0.3639, "step": 6200 }, { "epoch": 0.28065173116089615, "grad_norm": 0.48556986376096983, "learning_rate": 8.441560215703531e-06, "loss": 0.5057, "step": 6201 }, { "epoch": 0.2806969902692917, "grad_norm": 0.6492821333841186, "learning_rate": 8.441028507028041e-06, "loss": 0.4118, "step": 6202 }, { "epoch": 0.28074224937768727, "grad_norm": 0.6545075018939425, "learning_rate": 8.440496724415415e-06, "loss": 0.3803, "step": 6203 }, { "epoch": 0.2807875084860828, "grad_norm": 0.6451701396785834, "learning_rate": 8.439964867877082e-06, "loss": 0.3693, "step": 6204 }, { "epoch": 0.2808327675944784, "grad_norm": 0.7229587230904689, "learning_rate": 8.439432937424468e-06, "loss": 0.3748, "step": 6205 }, { "epoch": 0.28087802670287393, "grad_norm": 0.6800637781863547, "learning_rate": 8.438900933069006e-06, "loss": 0.445, "step": 6206 }, { "epoch": 0.2809232858112695, "grad_norm": 0.6669011748349529, "learning_rate": 8.438368854822123e-06, "loss": 0.3841, "step": 6207 }, { "epoch": 0.2809685449196651, "grad_norm": 0.42745797731326585, "learning_rate": 8.437836702695253e-06, "loss": 0.4638, "step": 6208 }, { "epoch": 0.28101380402806064, "grad_norm": 0.33492718032317736, "learning_rate": 8.437304476699833e-06, "loss": 0.4973, "step": 6209 }, { "epoch": 0.28105906313645623, "grad_norm": 0.6915936016022975, "learning_rate": 8.436772176847295e-06, "loss": 0.3549, "step": 6210 }, { "epoch": 0.28110432224485177, "grad_norm": 0.6375801191470075, "learning_rate": 8.436239803149077e-06, "loss": 0.3504, "step": 6211 }, { "epoch": 0.28114958135324736, "grad_norm": 0.3721610081077562, "learning_rate": 8.43570735561662e-06, "loss": 0.4799, "step": 6212 }, { "epoch": 0.2811948404616429, "grad_norm": 0.7265660941567311, "learning_rate": 8.435174834261365e-06, "loss": 0.4056, "step": 6213 }, { "epoch": 0.2812400995700385, "grad_norm": 0.35685129525632925, "learning_rate": 8.434642239094752e-06, "loss": 0.469, "step": 6214 }, { "epoch": 0.281285358678434, "grad_norm": 0.7138835831888775, "learning_rate": 8.434109570128228e-06, "loss": 0.3291, "step": 6215 }, { "epoch": 0.2813306177868296, "grad_norm": 0.7130084286744158, "learning_rate": 8.433576827373234e-06, "loss": 0.3764, "step": 6216 }, { "epoch": 0.28137587689522514, "grad_norm": 0.3521422973869999, "learning_rate": 8.433044010841221e-06, "loss": 0.5127, "step": 6217 }, { "epoch": 0.28142113600362073, "grad_norm": 0.8142269849959118, "learning_rate": 8.432511120543633e-06, "loss": 0.4221, "step": 6218 }, { "epoch": 0.2814663951120163, "grad_norm": 0.652441030531594, "learning_rate": 8.431978156491927e-06, "loss": 0.3707, "step": 6219 }, { "epoch": 0.28151165422041186, "grad_norm": 0.7365615620283028, "learning_rate": 8.43144511869755e-06, "loss": 0.3895, "step": 6220 }, { "epoch": 0.28155691332880745, "grad_norm": 0.6056462961860908, "learning_rate": 8.430912007171957e-06, "loss": 0.3803, "step": 6221 }, { "epoch": 0.281602172437203, "grad_norm": 0.6091945279871832, "learning_rate": 8.430378821926599e-06, "loss": 0.4075, "step": 6222 }, { "epoch": 0.28164743154559857, "grad_norm": 0.3147920289891761, "learning_rate": 8.429845562972939e-06, "loss": 0.5117, "step": 6223 }, { "epoch": 0.2816926906539941, "grad_norm": 0.6431671482916711, "learning_rate": 8.429312230322431e-06, "loss": 0.3297, "step": 6224 }, { "epoch": 0.2817379497623897, "grad_norm": 0.2877762484850333, "learning_rate": 8.428778823986534e-06, "loss": 0.4789, "step": 6225 }, { "epoch": 0.28178320887078523, "grad_norm": 0.6581219748598858, "learning_rate": 8.42824534397671e-06, "loss": 0.3275, "step": 6226 }, { "epoch": 0.2818284679791808, "grad_norm": 0.6211291035800235, "learning_rate": 8.427711790304426e-06, "loss": 0.367, "step": 6227 }, { "epoch": 0.28187372708757635, "grad_norm": 0.7104653182176582, "learning_rate": 8.427178162981141e-06, "loss": 0.3692, "step": 6228 }, { "epoch": 0.28191898619597194, "grad_norm": 0.6858916103546259, "learning_rate": 8.426644462018323e-06, "loss": 0.4087, "step": 6229 }, { "epoch": 0.2819642453043675, "grad_norm": 0.630715733986845, "learning_rate": 8.42611068742744e-06, "loss": 0.3305, "step": 6230 }, { "epoch": 0.28200950441276307, "grad_norm": 0.6855694797106894, "learning_rate": 8.425576839219962e-06, "loss": 0.3584, "step": 6231 }, { "epoch": 0.28205476352115866, "grad_norm": 0.6616610678604273, "learning_rate": 8.425042917407358e-06, "loss": 0.3821, "step": 6232 }, { "epoch": 0.2821000226295542, "grad_norm": 0.6621237669737059, "learning_rate": 8.4245089220011e-06, "loss": 0.3835, "step": 6233 }, { "epoch": 0.2821452817379498, "grad_norm": 0.7034082290073037, "learning_rate": 8.423974853012663e-06, "loss": 0.386, "step": 6234 }, { "epoch": 0.2821905408463453, "grad_norm": 0.7146277465228836, "learning_rate": 8.423440710453524e-06, "loss": 0.3895, "step": 6235 }, { "epoch": 0.2822357999547409, "grad_norm": 0.6941430005620083, "learning_rate": 8.422906494335155e-06, "loss": 0.3778, "step": 6236 }, { "epoch": 0.28228105906313644, "grad_norm": 0.6174910742970482, "learning_rate": 8.42237220466904e-06, "loss": 0.349, "step": 6237 }, { "epoch": 0.28232631817153203, "grad_norm": 0.6327827603546239, "learning_rate": 8.421837841466657e-06, "loss": 0.3166, "step": 6238 }, { "epoch": 0.28237157727992757, "grad_norm": 0.7934117873176336, "learning_rate": 8.42130340473949e-06, "loss": 0.35, "step": 6239 }, { "epoch": 0.28241683638832316, "grad_norm": 0.6753158892052803, "learning_rate": 8.420768894499018e-06, "loss": 0.416, "step": 6240 }, { "epoch": 0.2824620954967187, "grad_norm": 0.6114585858457859, "learning_rate": 8.420234310756731e-06, "loss": 0.3795, "step": 6241 }, { "epoch": 0.2825073546051143, "grad_norm": 0.6503678006643415, "learning_rate": 8.419699653524112e-06, "loss": 0.3335, "step": 6242 }, { "epoch": 0.28255261371350987, "grad_norm": 0.6703813746328721, "learning_rate": 8.41916492281265e-06, "loss": 0.3887, "step": 6243 }, { "epoch": 0.2825978728219054, "grad_norm": 0.6616322211727921, "learning_rate": 8.418630118633835e-06, "loss": 0.3731, "step": 6244 }, { "epoch": 0.282643131930301, "grad_norm": 0.679884118274133, "learning_rate": 8.418095240999157e-06, "loss": 0.3616, "step": 6245 }, { "epoch": 0.28268839103869653, "grad_norm": 0.6420201373911756, "learning_rate": 8.417560289920112e-06, "loss": 0.3553, "step": 6246 }, { "epoch": 0.2827336501470921, "grad_norm": 0.6892038150466372, "learning_rate": 8.417025265408192e-06, "loss": 0.3675, "step": 6247 }, { "epoch": 0.28277890925548765, "grad_norm": 0.6629311105686149, "learning_rate": 8.416490167474894e-06, "loss": 0.433, "step": 6248 }, { "epoch": 0.28282416836388324, "grad_norm": 0.753104654935313, "learning_rate": 8.415954996131715e-06, "loss": 0.332, "step": 6249 }, { "epoch": 0.2828694274722788, "grad_norm": 0.4750248779522945, "learning_rate": 8.415419751390155e-06, "loss": 0.4924, "step": 6250 }, { "epoch": 0.28291468658067437, "grad_norm": 0.6649641453862327, "learning_rate": 8.414884433261712e-06, "loss": 0.4092, "step": 6251 }, { "epoch": 0.2829599456890699, "grad_norm": 0.7043728882105145, "learning_rate": 8.414349041757895e-06, "loss": 0.3647, "step": 6252 }, { "epoch": 0.2830052047974655, "grad_norm": 0.7906797670385073, "learning_rate": 8.4138135768902e-06, "loss": 0.3976, "step": 6253 }, { "epoch": 0.283050463905861, "grad_norm": 0.33427530855867776, "learning_rate": 8.413278038670137e-06, "loss": 0.4822, "step": 6254 }, { "epoch": 0.2830957230142566, "grad_norm": 0.6971907768403794, "learning_rate": 8.412742427109211e-06, "loss": 0.3494, "step": 6255 }, { "epoch": 0.2831409821226522, "grad_norm": 0.6501227087395217, "learning_rate": 8.41220674221893e-06, "loss": 0.3777, "step": 6256 }, { "epoch": 0.28318624123104774, "grad_norm": 0.711428274830069, "learning_rate": 8.41167098401081e-06, "loss": 0.3844, "step": 6257 }, { "epoch": 0.28323150033944333, "grad_norm": 0.6728050850151719, "learning_rate": 8.411135152496357e-06, "loss": 0.3792, "step": 6258 }, { "epoch": 0.28327675944783887, "grad_norm": 0.6833250711380504, "learning_rate": 8.410599247687085e-06, "loss": 0.3719, "step": 6259 }, { "epoch": 0.28332201855623446, "grad_norm": 0.6669147987584385, "learning_rate": 8.41006326959451e-06, "loss": 0.3496, "step": 6260 }, { "epoch": 0.28336727766463, "grad_norm": 0.6839235194108939, "learning_rate": 8.409527218230152e-06, "loss": 0.3865, "step": 6261 }, { "epoch": 0.2834125367730256, "grad_norm": 0.6959276699222522, "learning_rate": 8.408991093605524e-06, "loss": 0.3195, "step": 6262 }, { "epoch": 0.2834577958814211, "grad_norm": 0.6382203905796654, "learning_rate": 8.408454895732146e-06, "loss": 0.3427, "step": 6263 }, { "epoch": 0.2835030549898167, "grad_norm": 0.6391687745761867, "learning_rate": 8.40791862462154e-06, "loss": 0.3376, "step": 6264 }, { "epoch": 0.28354831409821224, "grad_norm": 0.6564862085170919, "learning_rate": 8.407382280285231e-06, "loss": 0.388, "step": 6265 }, { "epoch": 0.28359357320660783, "grad_norm": 0.6727667578398724, "learning_rate": 8.406845862734741e-06, "loss": 0.3607, "step": 6266 }, { "epoch": 0.2836388323150034, "grad_norm": 0.6915184834363046, "learning_rate": 8.406309371981597e-06, "loss": 0.3528, "step": 6267 }, { "epoch": 0.28368409142339895, "grad_norm": 0.6171206668536895, "learning_rate": 8.405772808037326e-06, "loss": 0.4959, "step": 6268 }, { "epoch": 0.28372935053179454, "grad_norm": 0.6642918766150323, "learning_rate": 8.405236170913458e-06, "loss": 0.3933, "step": 6269 }, { "epoch": 0.2837746096401901, "grad_norm": 0.44063677262443796, "learning_rate": 8.404699460621523e-06, "loss": 0.5106, "step": 6270 }, { "epoch": 0.28381986874858567, "grad_norm": 0.6525737155916631, "learning_rate": 8.404162677173052e-06, "loss": 0.3706, "step": 6271 }, { "epoch": 0.2838651278569812, "grad_norm": 0.6395490185811372, "learning_rate": 8.403625820579582e-06, "loss": 0.3579, "step": 6272 }, { "epoch": 0.2839103869653768, "grad_norm": 0.6688331079105485, "learning_rate": 8.403088890852646e-06, "loss": 0.3629, "step": 6273 }, { "epoch": 0.2839556460737723, "grad_norm": 0.6238772693835692, "learning_rate": 8.402551888003781e-06, "loss": 0.3402, "step": 6274 }, { "epoch": 0.2840009051821679, "grad_norm": 0.6460567022078382, "learning_rate": 8.402014812044525e-06, "loss": 0.4083, "step": 6275 }, { "epoch": 0.28404616429056345, "grad_norm": 0.6865228006752404, "learning_rate": 8.401477662986421e-06, "loss": 0.4873, "step": 6276 }, { "epoch": 0.28409142339895904, "grad_norm": 0.7456566077740172, "learning_rate": 8.400940440841008e-06, "loss": 0.3645, "step": 6277 }, { "epoch": 0.28413668250735463, "grad_norm": 0.6988846647845834, "learning_rate": 8.40040314561983e-06, "loss": 0.411, "step": 6278 }, { "epoch": 0.28418194161575017, "grad_norm": 0.9361045391555585, "learning_rate": 8.399865777334435e-06, "loss": 0.3513, "step": 6279 }, { "epoch": 0.28422720072414576, "grad_norm": 0.751607817915409, "learning_rate": 8.399328335996362e-06, "loss": 0.3572, "step": 6280 }, { "epoch": 0.2842724598325413, "grad_norm": 0.6749108112927008, "learning_rate": 8.398790821617166e-06, "loss": 0.3846, "step": 6281 }, { "epoch": 0.2843177189409369, "grad_norm": 0.7128668589052166, "learning_rate": 8.398253234208391e-06, "loss": 0.3705, "step": 6282 }, { "epoch": 0.2843629780493324, "grad_norm": 0.6235231061493672, "learning_rate": 8.397715573781596e-06, "loss": 0.3688, "step": 6283 }, { "epoch": 0.284408237157728, "grad_norm": 0.7008898344484392, "learning_rate": 8.397177840348323e-06, "loss": 0.3399, "step": 6284 }, { "epoch": 0.28445349626612354, "grad_norm": 0.43211295832271546, "learning_rate": 8.396640033920135e-06, "loss": 0.4725, "step": 6285 }, { "epoch": 0.28449875537451913, "grad_norm": 0.6773046329502455, "learning_rate": 8.396102154508584e-06, "loss": 0.3979, "step": 6286 }, { "epoch": 0.28454401448291466, "grad_norm": 0.6202231828662841, "learning_rate": 8.395564202125229e-06, "loss": 0.4042, "step": 6287 }, { "epoch": 0.28458927359131025, "grad_norm": 0.6785238184624034, "learning_rate": 8.395026176781627e-06, "loss": 0.3194, "step": 6288 }, { "epoch": 0.2846345326997058, "grad_norm": 0.663363406948093, "learning_rate": 8.394488078489339e-06, "loss": 0.3632, "step": 6289 }, { "epoch": 0.2846797918081014, "grad_norm": 0.656510124986166, "learning_rate": 8.393949907259927e-06, "loss": 0.3984, "step": 6290 }, { "epoch": 0.28472505091649697, "grad_norm": 0.6625844048352743, "learning_rate": 8.393411663104957e-06, "loss": 0.3573, "step": 6291 }, { "epoch": 0.2847703100248925, "grad_norm": 0.6682820561545953, "learning_rate": 8.392873346035992e-06, "loss": 0.3627, "step": 6292 }, { "epoch": 0.2848155691332881, "grad_norm": 0.6039770807957126, "learning_rate": 8.392334956064598e-06, "loss": 0.3614, "step": 6293 }, { "epoch": 0.28486082824168363, "grad_norm": 0.6601147390295486, "learning_rate": 8.391796493202346e-06, "loss": 0.3419, "step": 6294 }, { "epoch": 0.2849060873500792, "grad_norm": 0.6409892616716775, "learning_rate": 8.391257957460803e-06, "loss": 0.377, "step": 6295 }, { "epoch": 0.28495134645847475, "grad_norm": 0.6090673283201767, "learning_rate": 8.390719348851544e-06, "loss": 0.3615, "step": 6296 }, { "epoch": 0.28499660556687034, "grad_norm": 0.6125879242381763, "learning_rate": 8.390180667386138e-06, "loss": 0.3521, "step": 6297 }, { "epoch": 0.2850418646752659, "grad_norm": 0.7012367365291511, "learning_rate": 8.389641913076163e-06, "loss": 0.3305, "step": 6298 }, { "epoch": 0.28508712378366147, "grad_norm": 0.6452780407011524, "learning_rate": 8.389103085933192e-06, "loss": 0.3889, "step": 6299 }, { "epoch": 0.285132382892057, "grad_norm": 0.4827058990269452, "learning_rate": 8.388564185968805e-06, "loss": 0.5188, "step": 6300 }, { "epoch": 0.2851776420004526, "grad_norm": 0.6965575404225468, "learning_rate": 8.388025213194585e-06, "loss": 0.368, "step": 6301 }, { "epoch": 0.2852229011088482, "grad_norm": 0.7127863519099163, "learning_rate": 8.387486167622103e-06, "loss": 0.4024, "step": 6302 }, { "epoch": 0.2852681602172437, "grad_norm": 0.3250995844318269, "learning_rate": 8.38694704926295e-06, "loss": 0.5249, "step": 6303 }, { "epoch": 0.2853134193256393, "grad_norm": 0.6735640437482957, "learning_rate": 8.386407858128707e-06, "loss": 0.3605, "step": 6304 }, { "epoch": 0.28535867843403484, "grad_norm": 0.3165298941877711, "learning_rate": 8.385868594230958e-06, "loss": 0.5164, "step": 6305 }, { "epoch": 0.28540393754243043, "grad_norm": 0.6564690706710307, "learning_rate": 8.385329257581295e-06, "loss": 0.3368, "step": 6306 }, { "epoch": 0.28544919665082596, "grad_norm": 0.6489326868070335, "learning_rate": 8.3847898481913e-06, "loss": 0.3394, "step": 6307 }, { "epoch": 0.28549445575922155, "grad_norm": 0.40721945360601186, "learning_rate": 8.384250366072568e-06, "loss": 0.5119, "step": 6308 }, { "epoch": 0.2855397148676171, "grad_norm": 0.33191010690961714, "learning_rate": 8.38371081123669e-06, "loss": 0.4886, "step": 6309 }, { "epoch": 0.2855849739760127, "grad_norm": 0.7339684742337894, "learning_rate": 8.383171183695258e-06, "loss": 0.3459, "step": 6310 }, { "epoch": 0.2856302330844082, "grad_norm": 0.7711528963712407, "learning_rate": 8.382631483459869e-06, "loss": 0.4392, "step": 6311 }, { "epoch": 0.2856754921928038, "grad_norm": 0.6589848254857321, "learning_rate": 8.382091710542118e-06, "loss": 0.3706, "step": 6312 }, { "epoch": 0.2857207513011994, "grad_norm": 0.6757671235326732, "learning_rate": 8.381551864953603e-06, "loss": 0.3859, "step": 6313 }, { "epoch": 0.28576601040959493, "grad_norm": 0.449617795828226, "learning_rate": 8.381011946705926e-06, "loss": 0.5095, "step": 6314 }, { "epoch": 0.2858112695179905, "grad_norm": 0.7213903453427956, "learning_rate": 8.380471955810685e-06, "loss": 0.3896, "step": 6315 }, { "epoch": 0.28585652862638605, "grad_norm": 0.6514003171057873, "learning_rate": 8.379931892279483e-06, "loss": 0.3962, "step": 6316 }, { "epoch": 0.28590178773478164, "grad_norm": 0.7085896477424691, "learning_rate": 8.379391756123927e-06, "loss": 0.366, "step": 6317 }, { "epoch": 0.2859470468431772, "grad_norm": 0.6803649542757192, "learning_rate": 8.37885154735562e-06, "loss": 0.3723, "step": 6318 }, { "epoch": 0.28599230595157277, "grad_norm": 1.2716815933415464, "learning_rate": 8.37831126598617e-06, "loss": 0.373, "step": 6319 }, { "epoch": 0.2860375650599683, "grad_norm": 0.6554974108933356, "learning_rate": 8.377770912027187e-06, "loss": 0.4031, "step": 6320 }, { "epoch": 0.2860828241683639, "grad_norm": 0.7453757586179238, "learning_rate": 8.377230485490282e-06, "loss": 0.3675, "step": 6321 }, { "epoch": 0.2861280832767594, "grad_norm": 0.7501007963538981, "learning_rate": 8.376689986387066e-06, "loss": 0.3803, "step": 6322 }, { "epoch": 0.286173342385155, "grad_norm": 0.6447615198903361, "learning_rate": 8.376149414729154e-06, "loss": 0.3433, "step": 6323 }, { "epoch": 0.28621860149355055, "grad_norm": 0.7109482396314323, "learning_rate": 8.375608770528157e-06, "loss": 0.3758, "step": 6324 }, { "epoch": 0.28626386060194614, "grad_norm": 0.3596396620662271, "learning_rate": 8.375068053795697e-06, "loss": 0.4917, "step": 6325 }, { "epoch": 0.28630911971034173, "grad_norm": 0.3406801314012656, "learning_rate": 8.37452726454339e-06, "loss": 0.4738, "step": 6326 }, { "epoch": 0.28635437881873727, "grad_norm": 0.6914537614782104, "learning_rate": 8.373986402782857e-06, "loss": 0.3971, "step": 6327 }, { "epoch": 0.28639963792713286, "grad_norm": 0.6957523570372517, "learning_rate": 8.373445468525719e-06, "loss": 0.3774, "step": 6328 }, { "epoch": 0.2864448970355284, "grad_norm": 0.6690897788009853, "learning_rate": 8.372904461783596e-06, "loss": 0.3812, "step": 6329 }, { "epoch": 0.286490156143924, "grad_norm": 0.3872804952218078, "learning_rate": 8.372363382568116e-06, "loss": 0.5283, "step": 6330 }, { "epoch": 0.2865354152523195, "grad_norm": 0.7496446064848421, "learning_rate": 8.371822230890905e-06, "loss": 0.3606, "step": 6331 }, { "epoch": 0.2865806743607151, "grad_norm": 0.754423666023919, "learning_rate": 8.371281006763589e-06, "loss": 0.4128, "step": 6332 }, { "epoch": 0.28662593346911064, "grad_norm": 0.6736836893255465, "learning_rate": 8.3707397101978e-06, "loss": 0.3817, "step": 6333 }, { "epoch": 0.28667119257750623, "grad_norm": 0.6353507976696714, "learning_rate": 8.370198341205167e-06, "loss": 0.3441, "step": 6334 }, { "epoch": 0.28671645168590176, "grad_norm": 0.40956676546522564, "learning_rate": 8.36965689979732e-06, "loss": 0.5078, "step": 6335 }, { "epoch": 0.28676171079429735, "grad_norm": 0.740988659745703, "learning_rate": 8.369115385985897e-06, "loss": 0.3762, "step": 6336 }, { "epoch": 0.28680696990269294, "grad_norm": 0.7073140874733087, "learning_rate": 8.368573799782533e-06, "loss": 0.4027, "step": 6337 }, { "epoch": 0.2868522290110885, "grad_norm": 0.6412217710564962, "learning_rate": 8.368032141198864e-06, "loss": 0.3663, "step": 6338 }, { "epoch": 0.28689748811948407, "grad_norm": 0.6852482683651501, "learning_rate": 8.367490410246525e-06, "loss": 0.3905, "step": 6339 }, { "epoch": 0.2869427472278796, "grad_norm": 0.6830362986429956, "learning_rate": 8.366948606937161e-06, "loss": 0.3932, "step": 6340 }, { "epoch": 0.2869880063362752, "grad_norm": 0.6531315592813901, "learning_rate": 8.366406731282415e-06, "loss": 0.3571, "step": 6341 }, { "epoch": 0.2870332654446707, "grad_norm": 0.6840745659773954, "learning_rate": 8.365864783293925e-06, "loss": 0.3852, "step": 6342 }, { "epoch": 0.2870785245530663, "grad_norm": 0.7243044316163573, "learning_rate": 8.36532276298334e-06, "loss": 0.3673, "step": 6343 }, { "epoch": 0.28712378366146185, "grad_norm": 0.3596143601853733, "learning_rate": 8.364780670362302e-06, "loss": 0.4866, "step": 6344 }, { "epoch": 0.28716904276985744, "grad_norm": 0.3369326536107606, "learning_rate": 8.364238505442462e-06, "loss": 0.4754, "step": 6345 }, { "epoch": 0.287214301878253, "grad_norm": 0.7430363453770691, "learning_rate": 8.36369626823547e-06, "loss": 0.4005, "step": 6346 }, { "epoch": 0.28725956098664857, "grad_norm": 0.6433857629537567, "learning_rate": 8.363153958752976e-06, "loss": 0.382, "step": 6347 }, { "epoch": 0.2873048200950441, "grad_norm": 0.6350133465496026, "learning_rate": 8.362611577006632e-06, "loss": 0.3565, "step": 6348 }, { "epoch": 0.2873500792034397, "grad_norm": 0.6169981326762816, "learning_rate": 8.362069123008092e-06, "loss": 0.3779, "step": 6349 }, { "epoch": 0.2873953383118353, "grad_norm": 0.45227784655374936, "learning_rate": 8.361526596769013e-06, "loss": 0.4995, "step": 6350 }, { "epoch": 0.2874405974202308, "grad_norm": 0.9147592151032556, "learning_rate": 8.360983998301053e-06, "loss": 0.3873, "step": 6351 }, { "epoch": 0.2874858565286264, "grad_norm": 0.6547204163805698, "learning_rate": 8.360441327615868e-06, "loss": 0.3806, "step": 6352 }, { "epoch": 0.28753111563702194, "grad_norm": 0.7098358675605005, "learning_rate": 8.35989858472512e-06, "loss": 0.3651, "step": 6353 }, { "epoch": 0.28757637474541753, "grad_norm": 0.7335097713307398, "learning_rate": 8.359355769640472e-06, "loss": 0.3561, "step": 6354 }, { "epoch": 0.28762163385381306, "grad_norm": 0.7783977190478998, "learning_rate": 8.358812882373584e-06, "loss": 0.3531, "step": 6355 }, { "epoch": 0.28766689296220865, "grad_norm": 0.6820434180148108, "learning_rate": 8.358269922936121e-06, "loss": 0.3537, "step": 6356 }, { "epoch": 0.2877121520706042, "grad_norm": 0.6367159751994629, "learning_rate": 8.357726891339756e-06, "loss": 0.3764, "step": 6357 }, { "epoch": 0.2877574111789998, "grad_norm": 0.7413979855286208, "learning_rate": 8.357183787596151e-06, "loss": 0.3354, "step": 6358 }, { "epoch": 0.2878026702873953, "grad_norm": 0.7893411212943653, "learning_rate": 8.356640611716976e-06, "loss": 0.3576, "step": 6359 }, { "epoch": 0.2878479293957909, "grad_norm": 0.6973211159466143, "learning_rate": 8.356097363713904e-06, "loss": 0.3546, "step": 6360 }, { "epoch": 0.2878931885041865, "grad_norm": 0.6283123119027827, "learning_rate": 8.355554043598608e-06, "loss": 0.3581, "step": 6361 }, { "epoch": 0.287938447612582, "grad_norm": 0.3994029621696445, "learning_rate": 8.35501065138276e-06, "loss": 0.4921, "step": 6362 }, { "epoch": 0.2879837067209776, "grad_norm": 0.35484264290434336, "learning_rate": 8.354467187078037e-06, "loss": 0.5089, "step": 6363 }, { "epoch": 0.28802896582937315, "grad_norm": 0.8314561846376063, "learning_rate": 8.353923650696119e-06, "loss": 0.3948, "step": 6364 }, { "epoch": 0.28807422493776874, "grad_norm": 0.33738649343966226, "learning_rate": 8.35338004224868e-06, "loss": 0.5093, "step": 6365 }, { "epoch": 0.2881194840461643, "grad_norm": 0.6726203777236593, "learning_rate": 8.352836361747403e-06, "loss": 0.4174, "step": 6366 }, { "epoch": 0.28816474315455987, "grad_norm": 0.6518077292485198, "learning_rate": 8.352292609203973e-06, "loss": 0.3607, "step": 6367 }, { "epoch": 0.2882100022629554, "grad_norm": 0.6762160616346438, "learning_rate": 8.351748784630068e-06, "loss": 0.3921, "step": 6368 }, { "epoch": 0.288255261371351, "grad_norm": 0.6930960439817764, "learning_rate": 8.351204888037377e-06, "loss": 0.4079, "step": 6369 }, { "epoch": 0.2883005204797465, "grad_norm": 0.6448827108519407, "learning_rate": 8.350660919437585e-06, "loss": 0.32, "step": 6370 }, { "epoch": 0.2883457795881421, "grad_norm": 0.6114651259864918, "learning_rate": 8.350116878842379e-06, "loss": 0.3314, "step": 6371 }, { "epoch": 0.2883910386965377, "grad_norm": 0.6361346844899176, "learning_rate": 8.349572766263452e-06, "loss": 0.3434, "step": 6372 }, { "epoch": 0.28843629780493324, "grad_norm": 0.6258771935514704, "learning_rate": 8.349028581712493e-06, "loss": 0.3732, "step": 6373 }, { "epoch": 0.28848155691332883, "grad_norm": 0.6344175862930747, "learning_rate": 8.348484325201196e-06, "loss": 0.3555, "step": 6374 }, { "epoch": 0.28852681602172436, "grad_norm": 0.6512433095639879, "learning_rate": 8.347939996741255e-06, "loss": 0.3597, "step": 6375 }, { "epoch": 0.28857207513011995, "grad_norm": 0.6347448345003477, "learning_rate": 8.347395596344365e-06, "loss": 0.3672, "step": 6376 }, { "epoch": 0.2886173342385155, "grad_norm": 0.7352586053277483, "learning_rate": 8.346851124022226e-06, "loss": 0.3458, "step": 6377 }, { "epoch": 0.2886625933469111, "grad_norm": 0.6173551112383586, "learning_rate": 8.346306579786536e-06, "loss": 0.3825, "step": 6378 }, { "epoch": 0.2887078524553066, "grad_norm": 0.7178480662247867, "learning_rate": 8.345761963648993e-06, "loss": 0.3867, "step": 6379 }, { "epoch": 0.2887531115637022, "grad_norm": 0.6351496353698858, "learning_rate": 8.345217275621303e-06, "loss": 0.3606, "step": 6380 }, { "epoch": 0.28879837067209774, "grad_norm": 0.6624551672993362, "learning_rate": 8.344672515715165e-06, "loss": 0.3461, "step": 6381 }, { "epoch": 0.2888436297804933, "grad_norm": 0.650563289492609, "learning_rate": 8.344127683942289e-06, "loss": 0.4002, "step": 6382 }, { "epoch": 0.28888888888888886, "grad_norm": 0.6078861028555593, "learning_rate": 8.34358278031438e-06, "loss": 0.3572, "step": 6383 }, { "epoch": 0.28893414799728445, "grad_norm": 0.6120498255653158, "learning_rate": 8.343037804843143e-06, "loss": 0.3589, "step": 6384 }, { "epoch": 0.28897940710568004, "grad_norm": 0.6481854290974546, "learning_rate": 8.342492757540294e-06, "loss": 0.3579, "step": 6385 }, { "epoch": 0.2890246662140756, "grad_norm": 0.5468373204491022, "learning_rate": 8.34194763841754e-06, "loss": 0.4952, "step": 6386 }, { "epoch": 0.28906992532247117, "grad_norm": 0.7190267617607462, "learning_rate": 8.341402447486598e-06, "loss": 0.4117, "step": 6387 }, { "epoch": 0.2891151844308667, "grad_norm": 0.6336260112629661, "learning_rate": 8.340857184759178e-06, "loss": 0.3373, "step": 6388 }, { "epoch": 0.2891604435392623, "grad_norm": 0.6630374397436601, "learning_rate": 8.340311850246996e-06, "loss": 0.3619, "step": 6389 }, { "epoch": 0.2892057026476578, "grad_norm": 0.7371034281394628, "learning_rate": 8.339766443961772e-06, "loss": 0.3494, "step": 6390 }, { "epoch": 0.2892509617560534, "grad_norm": 0.6538041235259925, "learning_rate": 8.339220965915227e-06, "loss": 0.3795, "step": 6391 }, { "epoch": 0.28929622086444895, "grad_norm": 0.38119731417128494, "learning_rate": 8.338675416119076e-06, "loss": 0.4796, "step": 6392 }, { "epoch": 0.28934147997284454, "grad_norm": 0.726785165671723, "learning_rate": 8.338129794585047e-06, "loss": 0.3564, "step": 6393 }, { "epoch": 0.2893867390812401, "grad_norm": 0.6533301961414332, "learning_rate": 8.337584101324859e-06, "loss": 0.3779, "step": 6394 }, { "epoch": 0.28943199818963566, "grad_norm": 0.6282052343937943, "learning_rate": 8.337038336350238e-06, "loss": 0.3447, "step": 6395 }, { "epoch": 0.28947725729803125, "grad_norm": 0.6293806472535389, "learning_rate": 8.336492499672915e-06, "loss": 0.3495, "step": 6396 }, { "epoch": 0.2895225164064268, "grad_norm": 0.5981108898546699, "learning_rate": 8.335946591304614e-06, "loss": 0.3557, "step": 6397 }, { "epoch": 0.2895677755148224, "grad_norm": 0.6777571348756583, "learning_rate": 8.335400611257067e-06, "loss": 0.4098, "step": 6398 }, { "epoch": 0.2896130346232179, "grad_norm": 0.3942290080698544, "learning_rate": 8.334854559542004e-06, "loss": 0.5415, "step": 6399 }, { "epoch": 0.2896582937316135, "grad_norm": 0.6961096511585597, "learning_rate": 8.334308436171159e-06, "loss": 0.4219, "step": 6400 }, { "epoch": 0.28970355284000904, "grad_norm": 0.6754681406841953, "learning_rate": 8.333762241156268e-06, "loss": 0.3602, "step": 6401 }, { "epoch": 0.2897488119484046, "grad_norm": 0.7790640942022196, "learning_rate": 8.33321597450906e-06, "loss": 0.3734, "step": 6402 }, { "epoch": 0.28979407105680016, "grad_norm": 0.31179722449465863, "learning_rate": 8.332669636241284e-06, "loss": 0.4947, "step": 6403 }, { "epoch": 0.28983933016519575, "grad_norm": 0.32547128381188095, "learning_rate": 8.33212322636467e-06, "loss": 0.5166, "step": 6404 }, { "epoch": 0.2898845892735913, "grad_norm": 0.6859148528664963, "learning_rate": 8.331576744890963e-06, "loss": 0.4074, "step": 6405 }, { "epoch": 0.2899298483819869, "grad_norm": 0.7084680498510506, "learning_rate": 8.331030191831904e-06, "loss": 0.3552, "step": 6406 }, { "epoch": 0.28997510749038247, "grad_norm": 0.29201909746466553, "learning_rate": 8.330483567199234e-06, "loss": 0.4949, "step": 6407 }, { "epoch": 0.290020366598778, "grad_norm": 0.30213810850562034, "learning_rate": 8.329936871004703e-06, "loss": 0.4922, "step": 6408 }, { "epoch": 0.2900656257071736, "grad_norm": 0.6708127394690835, "learning_rate": 8.329390103260057e-06, "loss": 0.3748, "step": 6409 }, { "epoch": 0.2901108848155691, "grad_norm": 0.7416775574171741, "learning_rate": 8.32884326397704e-06, "loss": 0.3585, "step": 6410 }, { "epoch": 0.2901561439239647, "grad_norm": 0.5993879488501103, "learning_rate": 8.328296353167408e-06, "loss": 0.3609, "step": 6411 }, { "epoch": 0.29020140303236025, "grad_norm": 0.6371880564391219, "learning_rate": 8.327749370842909e-06, "loss": 0.3863, "step": 6412 }, { "epoch": 0.29024666214075584, "grad_norm": 0.618451111883719, "learning_rate": 8.327202317015295e-06, "loss": 0.353, "step": 6413 }, { "epoch": 0.2902919212491514, "grad_norm": 0.6249599044660648, "learning_rate": 8.326655191696322e-06, "loss": 0.4184, "step": 6414 }, { "epoch": 0.29033718035754696, "grad_norm": 0.34375589339921275, "learning_rate": 8.326107994897748e-06, "loss": 0.4931, "step": 6415 }, { "epoch": 0.2903824394659425, "grad_norm": 0.6602548060548115, "learning_rate": 8.325560726631325e-06, "loss": 0.3652, "step": 6416 }, { "epoch": 0.2904276985743381, "grad_norm": 0.33527913002483645, "learning_rate": 8.325013386908817e-06, "loss": 0.4795, "step": 6417 }, { "epoch": 0.2904729576827336, "grad_norm": 0.6547073505960216, "learning_rate": 8.324465975741986e-06, "loss": 0.3442, "step": 6418 }, { "epoch": 0.2905182167911292, "grad_norm": 0.6652996596938084, "learning_rate": 8.323918493142588e-06, "loss": 0.3898, "step": 6419 }, { "epoch": 0.2905634758995248, "grad_norm": 0.64028964625868, "learning_rate": 8.323370939122393e-06, "loss": 0.3706, "step": 6420 }, { "epoch": 0.29060873500792034, "grad_norm": 0.680854532340238, "learning_rate": 8.322823313693162e-06, "loss": 0.3617, "step": 6421 }, { "epoch": 0.29065399411631593, "grad_norm": 0.6684202593248905, "learning_rate": 8.322275616866663e-06, "loss": 0.4334, "step": 6422 }, { "epoch": 0.29069925322471146, "grad_norm": 0.5962055859753047, "learning_rate": 8.321727848654666e-06, "loss": 0.3495, "step": 6423 }, { "epoch": 0.29074451233310705, "grad_norm": 0.6619552340244278, "learning_rate": 8.321180009068937e-06, "loss": 0.4093, "step": 6424 }, { "epoch": 0.2907897714415026, "grad_norm": 0.7488039229952417, "learning_rate": 8.320632098121253e-06, "loss": 0.3796, "step": 6425 }, { "epoch": 0.2908350305498982, "grad_norm": 0.6722660190015518, "learning_rate": 8.320084115823382e-06, "loss": 0.3798, "step": 6426 }, { "epoch": 0.2908802896582937, "grad_norm": 0.6643333815944632, "learning_rate": 8.3195360621871e-06, "loss": 0.3934, "step": 6427 }, { "epoch": 0.2909255487666893, "grad_norm": 0.5847662005979445, "learning_rate": 8.318987937224183e-06, "loss": 0.3645, "step": 6428 }, { "epoch": 0.29097080787508484, "grad_norm": 0.7669998563625238, "learning_rate": 8.318439740946409e-06, "loss": 0.4105, "step": 6429 }, { "epoch": 0.2910160669834804, "grad_norm": 0.5310405439597425, "learning_rate": 8.317891473365558e-06, "loss": 0.4751, "step": 6430 }, { "epoch": 0.291061326091876, "grad_norm": 0.6261449752730862, "learning_rate": 8.317343134493408e-06, "loss": 0.332, "step": 6431 }, { "epoch": 0.29110658520027155, "grad_norm": 0.6987428389371922, "learning_rate": 8.316794724341743e-06, "loss": 0.3862, "step": 6432 }, { "epoch": 0.29115184430866714, "grad_norm": 0.6655812586635537, "learning_rate": 8.316246242922345e-06, "loss": 0.3647, "step": 6433 }, { "epoch": 0.2911971034170627, "grad_norm": 0.6048388635137149, "learning_rate": 8.315697690247002e-06, "loss": 0.3789, "step": 6434 }, { "epoch": 0.29124236252545826, "grad_norm": 0.41803567170471573, "learning_rate": 8.315149066327498e-06, "loss": 0.5103, "step": 6435 }, { "epoch": 0.2912876216338538, "grad_norm": 0.40434775094018255, "learning_rate": 8.314600371175623e-06, "loss": 0.5231, "step": 6436 }, { "epoch": 0.2913328807422494, "grad_norm": 0.6869531963848231, "learning_rate": 8.314051604803164e-06, "loss": 0.3729, "step": 6437 }, { "epoch": 0.2913781398506449, "grad_norm": 0.29570610897397354, "learning_rate": 8.313502767221916e-06, "loss": 0.4862, "step": 6438 }, { "epoch": 0.2914233989590405, "grad_norm": 0.31598816349430026, "learning_rate": 8.312953858443672e-06, "loss": 0.4805, "step": 6439 }, { "epoch": 0.29146865806743605, "grad_norm": 0.80407284756795, "learning_rate": 8.312404878480222e-06, "loss": 0.3466, "step": 6440 }, { "epoch": 0.29151391717583164, "grad_norm": 0.6493079758709581, "learning_rate": 8.311855827343364e-06, "loss": 0.4, "step": 6441 }, { "epoch": 0.29155917628422723, "grad_norm": 0.6575077859486658, "learning_rate": 8.311306705044898e-06, "loss": 0.4026, "step": 6442 }, { "epoch": 0.29160443539262276, "grad_norm": 0.6641962626839024, "learning_rate": 8.31075751159662e-06, "loss": 0.3636, "step": 6443 }, { "epoch": 0.29164969450101835, "grad_norm": 0.66221788099013, "learning_rate": 8.310208247010331e-06, "loss": 0.3959, "step": 6444 }, { "epoch": 0.2916949536094139, "grad_norm": 0.6939365180392555, "learning_rate": 8.309658911297833e-06, "loss": 0.3597, "step": 6445 }, { "epoch": 0.2917402127178095, "grad_norm": 0.4297312805988011, "learning_rate": 8.309109504470932e-06, "loss": 0.4985, "step": 6446 }, { "epoch": 0.291785471826205, "grad_norm": 0.6415743882962217, "learning_rate": 8.308560026541428e-06, "loss": 0.3852, "step": 6447 }, { "epoch": 0.2918307309346006, "grad_norm": 0.6544798763209472, "learning_rate": 8.30801047752113e-06, "loss": 0.3674, "step": 6448 }, { "epoch": 0.29187599004299614, "grad_norm": 0.6687801130362593, "learning_rate": 8.307460857421849e-06, "loss": 0.3697, "step": 6449 }, { "epoch": 0.2919212491513917, "grad_norm": 0.6429403857604996, "learning_rate": 8.306911166255392e-06, "loss": 0.3401, "step": 6450 }, { "epoch": 0.29196650825978726, "grad_norm": 0.5829187355129206, "learning_rate": 8.306361404033571e-06, "loss": 0.3139, "step": 6451 }, { "epoch": 0.29201176736818285, "grad_norm": 0.6256295801055626, "learning_rate": 8.305811570768196e-06, "loss": 0.3806, "step": 6452 }, { "epoch": 0.2920570264765784, "grad_norm": 0.656436203896976, "learning_rate": 8.305261666471085e-06, "loss": 0.3926, "step": 6453 }, { "epoch": 0.292102285584974, "grad_norm": 0.6134172488872648, "learning_rate": 8.304711691154052e-06, "loss": 0.3953, "step": 6454 }, { "epoch": 0.29214754469336957, "grad_norm": 0.38117255138407263, "learning_rate": 8.304161644828913e-06, "loss": 0.5043, "step": 6455 }, { "epoch": 0.2921928038017651, "grad_norm": 0.6713297462223095, "learning_rate": 8.30361152750749e-06, "loss": 0.4098, "step": 6456 }, { "epoch": 0.2922380629101607, "grad_norm": 0.6994613343471084, "learning_rate": 8.303061339201601e-06, "loss": 0.3802, "step": 6457 }, { "epoch": 0.2922833220185562, "grad_norm": 0.6373226817333996, "learning_rate": 8.302511079923068e-06, "loss": 0.3764, "step": 6458 }, { "epoch": 0.2923285811269518, "grad_norm": 0.31101672311753653, "learning_rate": 8.301960749683715e-06, "loss": 0.5137, "step": 6459 }, { "epoch": 0.29237384023534735, "grad_norm": 0.6661226652935572, "learning_rate": 8.301410348495366e-06, "loss": 0.3826, "step": 6460 }, { "epoch": 0.29241909934374294, "grad_norm": 0.681465527925491, "learning_rate": 8.300859876369849e-06, "loss": 0.3283, "step": 6461 }, { "epoch": 0.2924643584521385, "grad_norm": 0.6503583509986406, "learning_rate": 8.300309333318992e-06, "loss": 0.3807, "step": 6462 }, { "epoch": 0.29250961756053406, "grad_norm": 0.35637099222950214, "learning_rate": 8.299758719354621e-06, "loss": 0.5103, "step": 6463 }, { "epoch": 0.2925548766689296, "grad_norm": 0.9007426007639495, "learning_rate": 8.299208034488571e-06, "loss": 0.4173, "step": 6464 }, { "epoch": 0.2926001357773252, "grad_norm": 0.8978949257967975, "learning_rate": 8.298657278732673e-06, "loss": 0.4247, "step": 6465 }, { "epoch": 0.2926453948857208, "grad_norm": 0.7437470268845902, "learning_rate": 8.298106452098761e-06, "loss": 0.3561, "step": 6466 }, { "epoch": 0.2926906539941163, "grad_norm": 0.674750975286095, "learning_rate": 8.297555554598671e-06, "loss": 0.4265, "step": 6467 }, { "epoch": 0.2927359131025119, "grad_norm": 0.6389898615422173, "learning_rate": 8.29700458624424e-06, "loss": 0.3432, "step": 6468 }, { "epoch": 0.29278117221090744, "grad_norm": 0.6119289376751083, "learning_rate": 8.296453547047305e-06, "loss": 0.3565, "step": 6469 }, { "epoch": 0.292826431319303, "grad_norm": 0.6467003422732659, "learning_rate": 8.295902437019709e-06, "loss": 0.3678, "step": 6470 }, { "epoch": 0.29287169042769856, "grad_norm": 0.6275114622388187, "learning_rate": 8.295351256173292e-06, "loss": 0.3772, "step": 6471 }, { "epoch": 0.29291694953609415, "grad_norm": 0.6070339394501282, "learning_rate": 8.294800004519895e-06, "loss": 0.3339, "step": 6472 }, { "epoch": 0.2929622086444897, "grad_norm": 0.6790702159687559, "learning_rate": 8.294248682071369e-06, "loss": 0.3849, "step": 6473 }, { "epoch": 0.2930074677528853, "grad_norm": 0.6806848558424293, "learning_rate": 8.293697288839555e-06, "loss": 0.3665, "step": 6474 }, { "epoch": 0.2930527268612808, "grad_norm": 0.6489635136154929, "learning_rate": 8.293145824836302e-06, "loss": 0.392, "step": 6475 }, { "epoch": 0.2930979859696764, "grad_norm": 0.37044885389524723, "learning_rate": 8.29259429007346e-06, "loss": 0.5182, "step": 6476 }, { "epoch": 0.29314324507807193, "grad_norm": 0.655501620363715, "learning_rate": 8.292042684562878e-06, "loss": 0.357, "step": 6477 }, { "epoch": 0.2931885041864675, "grad_norm": 0.9507463591648303, "learning_rate": 8.291491008316409e-06, "loss": 0.3688, "step": 6478 }, { "epoch": 0.2932337632948631, "grad_norm": 0.32278269268452087, "learning_rate": 8.290939261345908e-06, "loss": 0.4761, "step": 6479 }, { "epoch": 0.29327902240325865, "grad_norm": 0.6977751345184396, "learning_rate": 8.29038744366323e-06, "loss": 0.3683, "step": 6480 }, { "epoch": 0.29332428151165424, "grad_norm": 0.30990544000558706, "learning_rate": 8.28983555528023e-06, "loss": 0.4853, "step": 6481 }, { "epoch": 0.2933695406200498, "grad_norm": 0.704930034295891, "learning_rate": 8.289283596208769e-06, "loss": 0.3804, "step": 6482 }, { "epoch": 0.29341479972844536, "grad_norm": 0.3095176020113561, "learning_rate": 8.288731566460706e-06, "loss": 0.4689, "step": 6483 }, { "epoch": 0.2934600588368409, "grad_norm": 0.6767804618120248, "learning_rate": 8.288179466047903e-06, "loss": 0.3578, "step": 6484 }, { "epoch": 0.2935053179452365, "grad_norm": 0.6605536627738009, "learning_rate": 8.28762729498222e-06, "loss": 0.385, "step": 6485 }, { "epoch": 0.293550577053632, "grad_norm": 0.6499909633330283, "learning_rate": 8.287075053275527e-06, "loss": 0.3542, "step": 6486 }, { "epoch": 0.2935958361620276, "grad_norm": 0.6419155300510001, "learning_rate": 8.286522740939682e-06, "loss": 0.3944, "step": 6487 }, { "epoch": 0.29364109527042315, "grad_norm": 0.41197650198755686, "learning_rate": 8.285970357986559e-06, "loss": 0.4885, "step": 6488 }, { "epoch": 0.29368635437881874, "grad_norm": 0.6783231634374803, "learning_rate": 8.285417904428025e-06, "loss": 0.3763, "step": 6489 }, { "epoch": 0.2937316134872143, "grad_norm": 0.3380209730570563, "learning_rate": 8.284865380275953e-06, "loss": 0.5039, "step": 6490 }, { "epoch": 0.29377687259560986, "grad_norm": 0.6434836225109783, "learning_rate": 8.28431278554221e-06, "loss": 0.3606, "step": 6491 }, { "epoch": 0.29382213170400545, "grad_norm": 0.6712871013442894, "learning_rate": 8.283760120238672e-06, "loss": 0.3692, "step": 6492 }, { "epoch": 0.293867390812401, "grad_norm": 0.7502628512683776, "learning_rate": 8.283207384377217e-06, "loss": 0.3614, "step": 6493 }, { "epoch": 0.2939126499207966, "grad_norm": 0.6574921802281626, "learning_rate": 8.282654577969715e-06, "loss": 0.3889, "step": 6494 }, { "epoch": 0.2939579090291921, "grad_norm": 0.6436678017455617, "learning_rate": 8.282101701028051e-06, "loss": 0.3759, "step": 6495 }, { "epoch": 0.2940031681375877, "grad_norm": 0.6331727763394492, "learning_rate": 8.281548753564101e-06, "loss": 0.3794, "step": 6496 }, { "epoch": 0.29404842724598323, "grad_norm": 3.064791132428098, "learning_rate": 8.280995735589748e-06, "loss": 0.3326, "step": 6497 }, { "epoch": 0.2940936863543788, "grad_norm": 0.7158885307535165, "learning_rate": 8.28044264711687e-06, "loss": 0.3618, "step": 6498 }, { "epoch": 0.29413894546277436, "grad_norm": 0.633887155405091, "learning_rate": 8.279889488157358e-06, "loss": 0.3625, "step": 6499 }, { "epoch": 0.29418420457116995, "grad_norm": 0.44873262101556294, "learning_rate": 8.279336258723092e-06, "loss": 0.4881, "step": 6500 }, { "epoch": 0.29422946367956554, "grad_norm": 0.42868033624798035, "learning_rate": 8.278782958825963e-06, "loss": 0.4763, "step": 6501 }, { "epoch": 0.2942747227879611, "grad_norm": 0.6926362327266504, "learning_rate": 8.278229588477857e-06, "loss": 0.3633, "step": 6502 }, { "epoch": 0.29431998189635666, "grad_norm": 0.7159833505501458, "learning_rate": 8.277676147690667e-06, "loss": 0.3704, "step": 6503 }, { "epoch": 0.2943652410047522, "grad_norm": 0.6048515450661027, "learning_rate": 8.277122636476284e-06, "loss": 0.361, "step": 6504 }, { "epoch": 0.2944105001131478, "grad_norm": 0.6926580589479077, "learning_rate": 8.276569054846598e-06, "loss": 0.4029, "step": 6505 }, { "epoch": 0.2944557592215433, "grad_norm": 0.7087743982571297, "learning_rate": 8.276015402813507e-06, "loss": 0.3709, "step": 6506 }, { "epoch": 0.2945010183299389, "grad_norm": 0.6699221368445163, "learning_rate": 8.275461680388907e-06, "loss": 0.3808, "step": 6507 }, { "epoch": 0.29454627743833445, "grad_norm": 0.6510226287700173, "learning_rate": 8.274907887584695e-06, "loss": 0.3394, "step": 6508 }, { "epoch": 0.29459153654673004, "grad_norm": 0.7599440272738529, "learning_rate": 8.274354024412771e-06, "loss": 0.506, "step": 6509 }, { "epoch": 0.29463679565512557, "grad_norm": 0.6533955878761483, "learning_rate": 8.273800090885033e-06, "loss": 0.3876, "step": 6510 }, { "epoch": 0.29468205476352116, "grad_norm": 0.6330339667687448, "learning_rate": 8.273246087013389e-06, "loss": 0.3375, "step": 6511 }, { "epoch": 0.2947273138719167, "grad_norm": 0.6825683608101373, "learning_rate": 8.27269201280974e-06, "loss": 0.3953, "step": 6512 }, { "epoch": 0.2947725729803123, "grad_norm": 0.6742266479272396, "learning_rate": 8.272137868285988e-06, "loss": 0.3741, "step": 6513 }, { "epoch": 0.2948178320887079, "grad_norm": 0.3509569724005049, "learning_rate": 8.271583653454046e-06, "loss": 0.4807, "step": 6514 }, { "epoch": 0.2948630911971034, "grad_norm": 0.6964791974984916, "learning_rate": 8.271029368325816e-06, "loss": 0.4001, "step": 6515 }, { "epoch": 0.294908350305499, "grad_norm": 0.7441171021360583, "learning_rate": 8.270475012913212e-06, "loss": 0.3423, "step": 6516 }, { "epoch": 0.29495360941389454, "grad_norm": 0.6282383938135476, "learning_rate": 8.269920587228145e-06, "loss": 0.3606, "step": 6517 }, { "epoch": 0.2949988685222901, "grad_norm": 0.6679807888966616, "learning_rate": 8.269366091282526e-06, "loss": 0.4197, "step": 6518 }, { "epoch": 0.29504412763068566, "grad_norm": 0.6600489477472857, "learning_rate": 8.268811525088273e-06, "loss": 0.3808, "step": 6519 }, { "epoch": 0.29508938673908125, "grad_norm": 0.5760869889668241, "learning_rate": 8.2682568886573e-06, "loss": 0.3092, "step": 6520 }, { "epoch": 0.2951346458474768, "grad_norm": 0.660281851940936, "learning_rate": 8.267702182001521e-06, "loss": 0.3618, "step": 6521 }, { "epoch": 0.2951799049558724, "grad_norm": 0.668073722385169, "learning_rate": 8.26714740513286e-06, "loss": 0.3764, "step": 6522 }, { "epoch": 0.2952251640642679, "grad_norm": 0.6880108887912482, "learning_rate": 8.266592558063235e-06, "loss": 0.3695, "step": 6523 }, { "epoch": 0.2952704231726635, "grad_norm": 0.3869646595533447, "learning_rate": 8.26603764080457e-06, "loss": 0.4912, "step": 6524 }, { "epoch": 0.2953156822810591, "grad_norm": 0.6738971146084021, "learning_rate": 8.265482653368786e-06, "loss": 0.3755, "step": 6525 }, { "epoch": 0.2953609413894546, "grad_norm": 0.9514622783496854, "learning_rate": 8.264927595767808e-06, "loss": 0.3406, "step": 6526 }, { "epoch": 0.2954062004978502, "grad_norm": 0.6555285194634614, "learning_rate": 8.264372468013566e-06, "loss": 0.3592, "step": 6527 }, { "epoch": 0.29545145960624575, "grad_norm": 0.6941521013634866, "learning_rate": 8.263817270117984e-06, "loss": 0.4001, "step": 6528 }, { "epoch": 0.29549671871464134, "grad_norm": 0.6522125610450555, "learning_rate": 8.263262002092992e-06, "loss": 0.3549, "step": 6529 }, { "epoch": 0.29554197782303687, "grad_norm": 0.7423122524992386, "learning_rate": 8.262706663950522e-06, "loss": 0.387, "step": 6530 }, { "epoch": 0.29558723693143246, "grad_norm": 0.6941356725705105, "learning_rate": 8.262151255702506e-06, "loss": 0.3873, "step": 6531 }, { "epoch": 0.295632496039828, "grad_norm": 0.40196026244367045, "learning_rate": 8.261595777360881e-06, "loss": 0.4844, "step": 6532 }, { "epoch": 0.2956777551482236, "grad_norm": 0.6558786177137559, "learning_rate": 8.261040228937578e-06, "loss": 0.3722, "step": 6533 }, { "epoch": 0.2957230142566191, "grad_norm": 0.6546132783831952, "learning_rate": 8.260484610444537e-06, "loss": 0.3947, "step": 6534 }, { "epoch": 0.2957682733650147, "grad_norm": 0.638845136439574, "learning_rate": 8.259928921893694e-06, "loss": 0.3635, "step": 6535 }, { "epoch": 0.2958135324734103, "grad_norm": 0.3215929274096865, "learning_rate": 8.259373163296992e-06, "loss": 0.4919, "step": 6536 }, { "epoch": 0.29585879158180584, "grad_norm": 0.6560801022010452, "learning_rate": 8.258817334666371e-06, "loss": 0.3585, "step": 6537 }, { "epoch": 0.2959040506902014, "grad_norm": 0.6445716599426061, "learning_rate": 8.258261436013774e-06, "loss": 0.3529, "step": 6538 }, { "epoch": 0.29594930979859696, "grad_norm": 0.617007318890825, "learning_rate": 8.257705467351144e-06, "loss": 0.3152, "step": 6539 }, { "epoch": 0.29599456890699255, "grad_norm": 0.6083874381432658, "learning_rate": 8.257149428690432e-06, "loss": 0.3922, "step": 6540 }, { "epoch": 0.2960398280153881, "grad_norm": 0.3776880342089537, "learning_rate": 8.256593320043582e-06, "loss": 0.487, "step": 6541 }, { "epoch": 0.2960850871237837, "grad_norm": 0.704918022131064, "learning_rate": 8.25603714142254e-06, "loss": 0.3876, "step": 6542 }, { "epoch": 0.2961303462321792, "grad_norm": 0.8063942549240285, "learning_rate": 8.255480892839262e-06, "loss": 0.3627, "step": 6543 }, { "epoch": 0.2961756053405748, "grad_norm": 0.5742802128032614, "learning_rate": 8.254924574305698e-06, "loss": 0.3473, "step": 6544 }, { "epoch": 0.29622086444897033, "grad_norm": 0.6964485588806103, "learning_rate": 8.254368185833803e-06, "loss": 0.3822, "step": 6545 }, { "epoch": 0.2962661235573659, "grad_norm": 0.6321601650487939, "learning_rate": 8.25381172743553e-06, "loss": 0.356, "step": 6546 }, { "epoch": 0.29631138266576146, "grad_norm": 1.0443208662838888, "learning_rate": 8.253255199122834e-06, "loss": 0.4008, "step": 6547 }, { "epoch": 0.29635664177415705, "grad_norm": 0.6582380851157981, "learning_rate": 8.252698600907678e-06, "loss": 0.3482, "step": 6548 }, { "epoch": 0.29640190088255264, "grad_norm": 0.6970926427572819, "learning_rate": 8.252141932802018e-06, "loss": 0.3846, "step": 6549 }, { "epoch": 0.2964471599909482, "grad_norm": 0.4938941833933624, "learning_rate": 8.251585194817816e-06, "loss": 0.5017, "step": 6550 }, { "epoch": 0.29649241909934376, "grad_norm": 0.6856811251346372, "learning_rate": 8.251028386967035e-06, "loss": 0.3657, "step": 6551 }, { "epoch": 0.2965376782077393, "grad_norm": 0.31996299943306283, "learning_rate": 8.25047150926164e-06, "loss": 0.4859, "step": 6552 }, { "epoch": 0.2965829373161349, "grad_norm": 0.6998424919733743, "learning_rate": 8.249914561713592e-06, "loss": 0.3839, "step": 6553 }, { "epoch": 0.2966281964245304, "grad_norm": 0.773242550385443, "learning_rate": 8.249357544334865e-06, "loss": 0.3736, "step": 6554 }, { "epoch": 0.296673455532926, "grad_norm": 0.6725820236276115, "learning_rate": 8.248800457137422e-06, "loss": 0.388, "step": 6555 }, { "epoch": 0.29671871464132155, "grad_norm": 0.6893640866870768, "learning_rate": 8.248243300133236e-06, "loss": 0.4145, "step": 6556 }, { "epoch": 0.29676397374971714, "grad_norm": 0.7584177846682806, "learning_rate": 8.247686073334277e-06, "loss": 0.3928, "step": 6557 }, { "epoch": 0.29680923285811267, "grad_norm": 0.6678868748025847, "learning_rate": 8.247128776752517e-06, "loss": 0.4747, "step": 6558 }, { "epoch": 0.29685449196650826, "grad_norm": 0.6610574223754907, "learning_rate": 8.246571410399935e-06, "loss": 0.3582, "step": 6559 }, { "epoch": 0.29689975107490385, "grad_norm": 0.6115168177738242, "learning_rate": 8.246013974288505e-06, "loss": 0.359, "step": 6560 }, { "epoch": 0.2969450101832994, "grad_norm": 0.33510638899390455, "learning_rate": 8.245456468430201e-06, "loss": 0.4911, "step": 6561 }, { "epoch": 0.296990269291695, "grad_norm": 0.6297514724178221, "learning_rate": 8.244898892837009e-06, "loss": 0.3633, "step": 6562 }, { "epoch": 0.2970355284000905, "grad_norm": 0.6217919692620721, "learning_rate": 8.244341247520903e-06, "loss": 0.3601, "step": 6563 }, { "epoch": 0.2970807875084861, "grad_norm": 0.4667666793389384, "learning_rate": 8.243783532493868e-06, "loss": 0.4649, "step": 6564 }, { "epoch": 0.29712604661688163, "grad_norm": 0.6434234825245986, "learning_rate": 8.243225747767888e-06, "loss": 0.3581, "step": 6565 }, { "epoch": 0.2971713057252772, "grad_norm": 0.6626520810564629, "learning_rate": 8.242667893354948e-06, "loss": 0.3694, "step": 6566 }, { "epoch": 0.29721656483367276, "grad_norm": 0.6603571749083783, "learning_rate": 8.242109969267033e-06, "loss": 0.3276, "step": 6567 }, { "epoch": 0.29726182394206835, "grad_norm": 0.6109779360936114, "learning_rate": 8.241551975516133e-06, "loss": 0.396, "step": 6568 }, { "epoch": 0.2973070830504639, "grad_norm": 0.651519606462089, "learning_rate": 8.240993912114236e-06, "loss": 0.3578, "step": 6569 }, { "epoch": 0.2973523421588595, "grad_norm": 0.6371769305932496, "learning_rate": 8.240435779073336e-06, "loss": 0.36, "step": 6570 }, { "epoch": 0.297397601267255, "grad_norm": 0.42362648372322165, "learning_rate": 8.23987757640542e-06, "loss": 0.4808, "step": 6571 }, { "epoch": 0.2974428603756506, "grad_norm": 0.6340758942769098, "learning_rate": 8.239319304122488e-06, "loss": 0.3618, "step": 6572 }, { "epoch": 0.2974881194840462, "grad_norm": 0.7264695251353803, "learning_rate": 8.238760962236532e-06, "loss": 0.3885, "step": 6573 }, { "epoch": 0.2975333785924417, "grad_norm": 0.6927123830509735, "learning_rate": 8.23820255075955e-06, "loss": 0.4151, "step": 6574 }, { "epoch": 0.2975786377008373, "grad_norm": 0.7849148012762388, "learning_rate": 8.23764406970354e-06, "loss": 0.3743, "step": 6575 }, { "epoch": 0.29762389680923285, "grad_norm": 0.654854279717437, "learning_rate": 8.237085519080503e-06, "loss": 0.375, "step": 6576 }, { "epoch": 0.29766915591762844, "grad_norm": 0.7159203992116845, "learning_rate": 8.236526898902439e-06, "loss": 0.388, "step": 6577 }, { "epoch": 0.29771441502602397, "grad_norm": 0.656283121300721, "learning_rate": 8.235968209181355e-06, "loss": 0.3411, "step": 6578 }, { "epoch": 0.29775967413441956, "grad_norm": 0.4616172562386764, "learning_rate": 8.23540944992925e-06, "loss": 0.4888, "step": 6579 }, { "epoch": 0.2978049332428151, "grad_norm": 0.664265125141684, "learning_rate": 8.234850621158135e-06, "loss": 0.3647, "step": 6580 }, { "epoch": 0.2978501923512107, "grad_norm": 0.6515710215820063, "learning_rate": 8.234291722880015e-06, "loss": 0.3735, "step": 6581 }, { "epoch": 0.2978954514596062, "grad_norm": 0.670688081563422, "learning_rate": 8.233732755106897e-06, "loss": 0.3846, "step": 6582 }, { "epoch": 0.2979407105680018, "grad_norm": 0.3096613537636291, "learning_rate": 8.233173717850796e-06, "loss": 0.4626, "step": 6583 }, { "epoch": 0.2979859696763974, "grad_norm": 0.670924612725373, "learning_rate": 8.232614611123719e-06, "loss": 0.3603, "step": 6584 }, { "epoch": 0.29803122878479293, "grad_norm": 0.7092134926955636, "learning_rate": 8.232055434937685e-06, "loss": 0.3543, "step": 6585 }, { "epoch": 0.2980764878931885, "grad_norm": 0.32778289159729157, "learning_rate": 8.231496189304704e-06, "loss": 0.4722, "step": 6586 }, { "epoch": 0.29812174700158406, "grad_norm": 0.701945960013496, "learning_rate": 8.230936874236797e-06, "loss": 0.4089, "step": 6587 }, { "epoch": 0.29816700610997965, "grad_norm": 0.6969133591371268, "learning_rate": 8.230377489745979e-06, "loss": 0.3806, "step": 6588 }, { "epoch": 0.2982122652183752, "grad_norm": 0.6535133332299422, "learning_rate": 8.229818035844269e-06, "loss": 0.4209, "step": 6589 }, { "epoch": 0.2982575243267708, "grad_norm": 0.6780849897711396, "learning_rate": 8.22925851254369e-06, "loss": 0.3675, "step": 6590 }, { "epoch": 0.2983027834351663, "grad_norm": 0.7267884367024268, "learning_rate": 8.228698919856264e-06, "loss": 0.3453, "step": 6591 }, { "epoch": 0.2983480425435619, "grad_norm": 0.39652816926380524, "learning_rate": 8.228139257794012e-06, "loss": 0.4841, "step": 6592 }, { "epoch": 0.29839330165195743, "grad_norm": 0.30304362311260974, "learning_rate": 8.227579526368965e-06, "loss": 0.4818, "step": 6593 }, { "epoch": 0.298438560760353, "grad_norm": 0.6851067942591184, "learning_rate": 8.227019725593144e-06, "loss": 0.3391, "step": 6594 }, { "epoch": 0.2984838198687486, "grad_norm": 0.34227352067508154, "learning_rate": 8.226459855478582e-06, "loss": 0.4676, "step": 6595 }, { "epoch": 0.29852907897714415, "grad_norm": 0.6581569083787847, "learning_rate": 8.225899916037305e-06, "loss": 0.403, "step": 6596 }, { "epoch": 0.29857433808553974, "grad_norm": 0.6284583434239028, "learning_rate": 8.22533990728135e-06, "loss": 0.401, "step": 6597 }, { "epoch": 0.29861959719393527, "grad_norm": 0.7456078983710108, "learning_rate": 8.224779829222742e-06, "loss": 0.3938, "step": 6598 }, { "epoch": 0.29866485630233086, "grad_norm": 0.6615636059791379, "learning_rate": 8.224219681873522e-06, "loss": 0.3549, "step": 6599 }, { "epoch": 0.2987101154107264, "grad_norm": 0.6578472104200105, "learning_rate": 8.223659465245723e-06, "loss": 0.3297, "step": 6600 }, { "epoch": 0.298755374519122, "grad_norm": 0.327107354885252, "learning_rate": 8.223099179351383e-06, "loss": 0.4747, "step": 6601 }, { "epoch": 0.2988006336275175, "grad_norm": 0.31622473719570926, "learning_rate": 8.22253882420254e-06, "loss": 0.4718, "step": 6602 }, { "epoch": 0.2988458927359131, "grad_norm": 0.6994810720919846, "learning_rate": 8.221978399811237e-06, "loss": 0.3992, "step": 6603 }, { "epoch": 0.29889115184430864, "grad_norm": 0.6706589677639165, "learning_rate": 8.22141790618951e-06, "loss": 0.3884, "step": 6604 }, { "epoch": 0.29893641095270423, "grad_norm": 0.7094897895214411, "learning_rate": 8.220857343349408e-06, "loss": 0.3376, "step": 6605 }, { "epoch": 0.29898167006109977, "grad_norm": 0.32556023080515356, "learning_rate": 8.220296711302976e-06, "loss": 0.5088, "step": 6606 }, { "epoch": 0.29902692916949536, "grad_norm": 0.7184867337191, "learning_rate": 8.219736010062255e-06, "loss": 0.3605, "step": 6607 }, { "epoch": 0.29907218827789095, "grad_norm": 0.3098575866940877, "learning_rate": 8.219175239639296e-06, "loss": 0.4804, "step": 6608 }, { "epoch": 0.2991174473862865, "grad_norm": 0.6400206664582679, "learning_rate": 8.21861440004615e-06, "loss": 0.3574, "step": 6609 }, { "epoch": 0.2991627064946821, "grad_norm": 0.3010626374463441, "learning_rate": 8.218053491294864e-06, "loss": 0.5005, "step": 6610 }, { "epoch": 0.2992079656030776, "grad_norm": 0.6855028942826946, "learning_rate": 8.217492513397493e-06, "loss": 0.3662, "step": 6611 }, { "epoch": 0.2992532247114732, "grad_norm": 0.7101719022351363, "learning_rate": 8.216931466366089e-06, "loss": 0.3976, "step": 6612 }, { "epoch": 0.29929848381986873, "grad_norm": 0.5527238979079008, "learning_rate": 8.216370350212709e-06, "loss": 0.3337, "step": 6613 }, { "epoch": 0.2993437429282643, "grad_norm": 0.6503365969415894, "learning_rate": 8.215809164949407e-06, "loss": 0.352, "step": 6614 }, { "epoch": 0.29938900203665986, "grad_norm": 0.6456987454478041, "learning_rate": 8.215247910588242e-06, "loss": 0.4055, "step": 6615 }, { "epoch": 0.29943426114505545, "grad_norm": 0.765375799768525, "learning_rate": 8.214686587141277e-06, "loss": 0.4042, "step": 6616 }, { "epoch": 0.299479520253451, "grad_norm": 0.3333814206779106, "learning_rate": 8.21412519462057e-06, "loss": 0.5028, "step": 6617 }, { "epoch": 0.29952477936184657, "grad_norm": 0.33084472661619535, "learning_rate": 8.213563733038182e-06, "loss": 0.4787, "step": 6618 }, { "epoch": 0.29957003847024216, "grad_norm": 0.6637174994749759, "learning_rate": 8.21300220240618e-06, "loss": 0.3875, "step": 6619 }, { "epoch": 0.2996152975786377, "grad_norm": 0.6912141263850212, "learning_rate": 8.212440602736628e-06, "loss": 0.3809, "step": 6620 }, { "epoch": 0.2996605566870333, "grad_norm": 0.6936592964069401, "learning_rate": 8.211878934041595e-06, "loss": 0.3658, "step": 6621 }, { "epoch": 0.2997058157954288, "grad_norm": 0.678807367422059, "learning_rate": 8.211317196333149e-06, "loss": 0.3622, "step": 6622 }, { "epoch": 0.2997510749038244, "grad_norm": 0.38560937624830255, "learning_rate": 8.210755389623356e-06, "loss": 0.46, "step": 6623 }, { "epoch": 0.29979633401221994, "grad_norm": 0.779311131617998, "learning_rate": 8.210193513924294e-06, "loss": 0.3718, "step": 6624 }, { "epoch": 0.29984159312061553, "grad_norm": 0.6420801291643528, "learning_rate": 8.209631569248031e-06, "loss": 0.3685, "step": 6625 }, { "epoch": 0.29988685222901107, "grad_norm": 0.6773260279806149, "learning_rate": 8.209069555606643e-06, "loss": 0.3472, "step": 6626 }, { "epoch": 0.29993211133740666, "grad_norm": 0.7706893310245987, "learning_rate": 8.208507473012207e-06, "loss": 0.4212, "step": 6627 }, { "epoch": 0.2999773704458022, "grad_norm": 0.638140601624294, "learning_rate": 8.2079453214768e-06, "loss": 0.3402, "step": 6628 }, { "epoch": 0.3000226295541978, "grad_norm": 0.5940795490485952, "learning_rate": 8.2073831010125e-06, "loss": 0.3185, "step": 6629 }, { "epoch": 0.3000678886625934, "grad_norm": 0.6635770683032601, "learning_rate": 8.206820811631387e-06, "loss": 0.3228, "step": 6630 }, { "epoch": 0.3001131477709889, "grad_norm": 0.6899477337576465, "learning_rate": 8.206258453345543e-06, "loss": 0.384, "step": 6631 }, { "epoch": 0.3001584068793845, "grad_norm": 0.6668924568154347, "learning_rate": 8.205696026167054e-06, "loss": 0.3475, "step": 6632 }, { "epoch": 0.30020366598778003, "grad_norm": 0.6413029895755245, "learning_rate": 8.205133530108003e-06, "loss": 0.393, "step": 6633 }, { "epoch": 0.3002489250961756, "grad_norm": 0.6730984902221498, "learning_rate": 8.204570965180476e-06, "loss": 0.3834, "step": 6634 }, { "epoch": 0.30029418420457116, "grad_norm": 0.371876236667934, "learning_rate": 8.204008331396562e-06, "loss": 0.5068, "step": 6635 }, { "epoch": 0.30033944331296675, "grad_norm": 0.6556362419726928, "learning_rate": 8.203445628768347e-06, "loss": 0.3631, "step": 6636 }, { "epoch": 0.3003847024213623, "grad_norm": 0.6359940940564239, "learning_rate": 8.202882857307926e-06, "loss": 0.3779, "step": 6637 }, { "epoch": 0.30042996152975787, "grad_norm": 0.6386188623759247, "learning_rate": 8.202320017027387e-06, "loss": 0.3984, "step": 6638 }, { "epoch": 0.3004752206381534, "grad_norm": 0.32621834677771844, "learning_rate": 8.201757107938829e-06, "loss": 0.5145, "step": 6639 }, { "epoch": 0.300520479746549, "grad_norm": 0.5928303322514519, "learning_rate": 8.201194130054342e-06, "loss": 0.3325, "step": 6640 }, { "epoch": 0.30056573885494453, "grad_norm": 0.3094692390285682, "learning_rate": 8.200631083386025e-06, "loss": 0.4907, "step": 6641 }, { "epoch": 0.3006109979633401, "grad_norm": 0.6144956649085465, "learning_rate": 8.200067967945977e-06, "loss": 0.3741, "step": 6642 }, { "epoch": 0.3006562570717357, "grad_norm": 0.695521701070394, "learning_rate": 8.199504783746297e-06, "loss": 0.3798, "step": 6643 }, { "epoch": 0.30070151618013125, "grad_norm": 0.7236023029614718, "learning_rate": 8.198941530799084e-06, "loss": 0.3339, "step": 6644 }, { "epoch": 0.30074677528852684, "grad_norm": 0.2931514319730529, "learning_rate": 8.198378209116444e-06, "loss": 0.4678, "step": 6645 }, { "epoch": 0.30079203439692237, "grad_norm": 0.3172098987084482, "learning_rate": 8.19781481871048e-06, "loss": 0.4783, "step": 6646 }, { "epoch": 0.30083729350531796, "grad_norm": 0.6300258938616354, "learning_rate": 8.197251359593294e-06, "loss": 0.3924, "step": 6647 }, { "epoch": 0.3008825526137135, "grad_norm": 0.6117147132966283, "learning_rate": 8.196687831776998e-06, "loss": 0.3308, "step": 6648 }, { "epoch": 0.3009278117221091, "grad_norm": 0.6846165713761543, "learning_rate": 8.196124235273698e-06, "loss": 0.4117, "step": 6649 }, { "epoch": 0.3009730708305046, "grad_norm": 0.6475285881169686, "learning_rate": 8.195560570095504e-06, "loss": 0.3732, "step": 6650 }, { "epoch": 0.3010183299389002, "grad_norm": 0.6562117371282999, "learning_rate": 8.194996836254527e-06, "loss": 0.3692, "step": 6651 }, { "epoch": 0.30106358904729574, "grad_norm": 0.3879940700042671, "learning_rate": 8.194433033762882e-06, "loss": 0.4973, "step": 6652 }, { "epoch": 0.30110884815569133, "grad_norm": 0.3532760568138731, "learning_rate": 8.193869162632682e-06, "loss": 0.4715, "step": 6653 }, { "epoch": 0.3011541072640869, "grad_norm": 0.33387439482702236, "learning_rate": 8.193305222876043e-06, "loss": 0.5229, "step": 6654 }, { "epoch": 0.30119936637248246, "grad_norm": 0.6660220599660495, "learning_rate": 8.19274121450508e-06, "loss": 0.3942, "step": 6655 }, { "epoch": 0.30124462548087805, "grad_norm": 0.8598578165148109, "learning_rate": 8.192177137531916e-06, "loss": 0.3359, "step": 6656 }, { "epoch": 0.3012898845892736, "grad_norm": 0.4416717118687196, "learning_rate": 8.19161299196867e-06, "loss": 0.4614, "step": 6657 }, { "epoch": 0.30133514369766917, "grad_norm": 0.8191984846210192, "learning_rate": 8.191048777827462e-06, "loss": 0.3596, "step": 6658 }, { "epoch": 0.3013804028060647, "grad_norm": 0.7090058012522736, "learning_rate": 8.190484495120416e-06, "loss": 0.3474, "step": 6659 }, { "epoch": 0.3014256619144603, "grad_norm": 0.6600972546014798, "learning_rate": 8.189920143859658e-06, "loss": 0.356, "step": 6660 }, { "epoch": 0.30147092102285583, "grad_norm": 0.758035548634254, "learning_rate": 8.189355724057313e-06, "loss": 0.3822, "step": 6661 }, { "epoch": 0.3015161801312514, "grad_norm": 0.6585617124518213, "learning_rate": 8.188791235725509e-06, "loss": 0.39, "step": 6662 }, { "epoch": 0.30156143923964696, "grad_norm": 0.6584263455818427, "learning_rate": 8.188226678876374e-06, "loss": 0.3525, "step": 6663 }, { "epoch": 0.30160669834804255, "grad_norm": 0.6284333579476523, "learning_rate": 8.187662053522039e-06, "loss": 0.364, "step": 6664 }, { "epoch": 0.30165195745643814, "grad_norm": 0.6941954463209911, "learning_rate": 8.187097359674638e-06, "loss": 0.3824, "step": 6665 }, { "epoch": 0.30169721656483367, "grad_norm": 0.6193892417113361, "learning_rate": 8.186532597346304e-06, "loss": 0.3797, "step": 6666 }, { "epoch": 0.30174247567322926, "grad_norm": 0.661626608730419, "learning_rate": 8.18596776654917e-06, "loss": 0.3267, "step": 6667 }, { "epoch": 0.3017877347816248, "grad_norm": 0.6513566152865766, "learning_rate": 8.185402867295373e-06, "loss": 0.3986, "step": 6668 }, { "epoch": 0.3018329938900204, "grad_norm": 0.46167306904485933, "learning_rate": 8.184837899597054e-06, "loss": 0.4989, "step": 6669 }, { "epoch": 0.3018782529984159, "grad_norm": 0.6812849953088046, "learning_rate": 8.184272863466348e-06, "loss": 0.3587, "step": 6670 }, { "epoch": 0.3019235121068115, "grad_norm": 0.6372181036074359, "learning_rate": 8.183707758915398e-06, "loss": 0.3866, "step": 6671 }, { "epoch": 0.30196877121520704, "grad_norm": 0.6663877518321638, "learning_rate": 8.183142585956347e-06, "loss": 0.382, "step": 6672 }, { "epoch": 0.30201403032360263, "grad_norm": 0.6383630283033406, "learning_rate": 8.182577344601337e-06, "loss": 0.3833, "step": 6673 }, { "epoch": 0.30205928943199817, "grad_norm": 0.36237644474402525, "learning_rate": 8.182012034862514e-06, "loss": 0.4855, "step": 6674 }, { "epoch": 0.30210454854039376, "grad_norm": 0.7009030759694248, "learning_rate": 8.181446656752027e-06, "loss": 0.4167, "step": 6675 }, { "epoch": 0.3021498076487893, "grad_norm": 0.3188822744200764, "learning_rate": 8.18088121028202e-06, "loss": 0.4902, "step": 6676 }, { "epoch": 0.3021950667571849, "grad_norm": 0.7033049015038353, "learning_rate": 8.18031569546465e-06, "loss": 0.395, "step": 6677 }, { "epoch": 0.3022403258655805, "grad_norm": 0.2996947562744208, "learning_rate": 8.179750112312058e-06, "loss": 0.4924, "step": 6678 }, { "epoch": 0.302285584973976, "grad_norm": 0.7107215020779526, "learning_rate": 8.179184460836404e-06, "loss": 0.4296, "step": 6679 }, { "epoch": 0.3023308440823716, "grad_norm": 0.36882862048673637, "learning_rate": 8.178618741049841e-06, "loss": 0.4754, "step": 6680 }, { "epoch": 0.30237610319076713, "grad_norm": 0.35130336150032226, "learning_rate": 8.178052952964523e-06, "loss": 0.4703, "step": 6681 }, { "epoch": 0.3024213622991627, "grad_norm": 0.7141293551304086, "learning_rate": 8.177487096592607e-06, "loss": 0.3988, "step": 6682 }, { "epoch": 0.30246662140755826, "grad_norm": 0.7030862967298622, "learning_rate": 8.176921171946252e-06, "loss": 0.3606, "step": 6683 }, { "epoch": 0.30251188051595385, "grad_norm": 0.6769924190293563, "learning_rate": 8.176355179037619e-06, "loss": 0.4181, "step": 6684 }, { "epoch": 0.3025571396243494, "grad_norm": 0.6831775720861758, "learning_rate": 8.17578911787887e-06, "loss": 0.4089, "step": 6685 }, { "epoch": 0.30260239873274497, "grad_norm": 0.6606531283871524, "learning_rate": 8.175222988482163e-06, "loss": 0.3295, "step": 6686 }, { "epoch": 0.3026476578411405, "grad_norm": 0.6762522492476349, "learning_rate": 8.174656790859668e-06, "loss": 0.3567, "step": 6687 }, { "epoch": 0.3026929169495361, "grad_norm": 0.6795353803369522, "learning_rate": 8.17409052502355e-06, "loss": 0.4107, "step": 6688 }, { "epoch": 0.3027381760579317, "grad_norm": 0.538399507273518, "learning_rate": 8.173524190985973e-06, "loss": 0.4827, "step": 6689 }, { "epoch": 0.3027834351663272, "grad_norm": 0.40540498512979073, "learning_rate": 8.172957788759109e-06, "loss": 0.4884, "step": 6690 }, { "epoch": 0.3028286942747228, "grad_norm": 0.7645143001189432, "learning_rate": 8.172391318355126e-06, "loss": 0.3521, "step": 6691 }, { "epoch": 0.30287395338311834, "grad_norm": 0.7116150900076627, "learning_rate": 8.171824779786198e-06, "loss": 0.383, "step": 6692 }, { "epoch": 0.30291921249151393, "grad_norm": 0.6885266895160488, "learning_rate": 8.171258173064497e-06, "loss": 0.3792, "step": 6693 }, { "epoch": 0.30296447159990947, "grad_norm": 0.6154377255141567, "learning_rate": 8.170691498202196e-06, "loss": 0.3836, "step": 6694 }, { "epoch": 0.30300973070830506, "grad_norm": 0.6892015186835359, "learning_rate": 8.170124755211475e-06, "loss": 0.4875, "step": 6695 }, { "epoch": 0.3030549898167006, "grad_norm": 0.5078678332810831, "learning_rate": 8.16955794410451e-06, "loss": 0.518, "step": 6696 }, { "epoch": 0.3031002489250962, "grad_norm": 0.6996318079313077, "learning_rate": 8.168991064893476e-06, "loss": 0.4067, "step": 6697 }, { "epoch": 0.3031455080334917, "grad_norm": 0.6701047881657107, "learning_rate": 8.168424117590559e-06, "loss": 0.364, "step": 6698 }, { "epoch": 0.3031907671418873, "grad_norm": 0.6267231223583077, "learning_rate": 8.167857102207936e-06, "loss": 0.3914, "step": 6699 }, { "epoch": 0.30323602625028284, "grad_norm": 0.6488152116391062, "learning_rate": 8.167290018757797e-06, "loss": 0.3677, "step": 6700 }, { "epoch": 0.30328128535867843, "grad_norm": 0.657644421376092, "learning_rate": 8.166722867252321e-06, "loss": 0.3588, "step": 6701 }, { "epoch": 0.303326544467074, "grad_norm": 0.6977592438122392, "learning_rate": 8.166155647703698e-06, "loss": 0.3919, "step": 6702 }, { "epoch": 0.30337180357546956, "grad_norm": 1.0770726070383594, "learning_rate": 8.165588360124112e-06, "loss": 0.5246, "step": 6703 }, { "epoch": 0.30341706268386515, "grad_norm": 0.6771285868253755, "learning_rate": 8.165021004525758e-06, "loss": 0.3728, "step": 6704 }, { "epoch": 0.3034623217922607, "grad_norm": 0.6544659774166018, "learning_rate": 8.164453580920819e-06, "loss": 0.3463, "step": 6705 }, { "epoch": 0.30350758090065627, "grad_norm": 0.6545512816480353, "learning_rate": 8.163886089321493e-06, "loss": 0.4144, "step": 6706 }, { "epoch": 0.3035528400090518, "grad_norm": 0.7171692882504428, "learning_rate": 8.163318529739971e-06, "loss": 0.349, "step": 6707 }, { "epoch": 0.3035980991174474, "grad_norm": 0.5006054067208444, "learning_rate": 8.162750902188452e-06, "loss": 0.4737, "step": 6708 }, { "epoch": 0.30364335822584293, "grad_norm": 0.6516365427753947, "learning_rate": 8.162183206679129e-06, "loss": 0.3905, "step": 6709 }, { "epoch": 0.3036886173342385, "grad_norm": 0.6767068672930011, "learning_rate": 8.1616154432242e-06, "loss": 0.346, "step": 6710 }, { "epoch": 0.30373387644263405, "grad_norm": 0.6397724806662128, "learning_rate": 8.161047611835866e-06, "loss": 0.3751, "step": 6711 }, { "epoch": 0.30377913555102964, "grad_norm": 0.8168005161205404, "learning_rate": 8.160479712526326e-06, "loss": 0.3977, "step": 6712 }, { "epoch": 0.30382439465942523, "grad_norm": 0.7369886205778866, "learning_rate": 8.159911745307785e-06, "loss": 0.4237, "step": 6713 }, { "epoch": 0.30386965376782077, "grad_norm": 0.6740165118906998, "learning_rate": 8.159343710192445e-06, "loss": 0.389, "step": 6714 }, { "epoch": 0.30391491287621636, "grad_norm": 0.5087143101780832, "learning_rate": 8.158775607192511e-06, "loss": 0.4844, "step": 6715 }, { "epoch": 0.3039601719846119, "grad_norm": 0.6533814867752841, "learning_rate": 8.158207436320192e-06, "loss": 0.3281, "step": 6716 }, { "epoch": 0.3040054310930075, "grad_norm": 0.6203679406879485, "learning_rate": 8.157639197587694e-06, "loss": 0.3611, "step": 6717 }, { "epoch": 0.304050690201403, "grad_norm": 0.6193842426608959, "learning_rate": 8.157070891007227e-06, "loss": 0.357, "step": 6718 }, { "epoch": 0.3040959493097986, "grad_norm": 0.6689713449805593, "learning_rate": 8.156502516591005e-06, "loss": 0.4038, "step": 6719 }, { "epoch": 0.30414120841819414, "grad_norm": 0.6649185774901039, "learning_rate": 8.155934074351236e-06, "loss": 0.3695, "step": 6720 }, { "epoch": 0.30418646752658973, "grad_norm": 0.7085354849748461, "learning_rate": 8.155365564300137e-06, "loss": 0.3875, "step": 6721 }, { "epoch": 0.30423172663498527, "grad_norm": 0.7011208137126376, "learning_rate": 8.154796986449925e-06, "loss": 0.3678, "step": 6722 }, { "epoch": 0.30427698574338086, "grad_norm": 0.6183098806515898, "learning_rate": 8.154228340812812e-06, "loss": 0.3564, "step": 6723 }, { "epoch": 0.30432224485177645, "grad_norm": 0.6849206059636519, "learning_rate": 8.15365962740102e-06, "loss": 0.4572, "step": 6724 }, { "epoch": 0.304367503960172, "grad_norm": 0.667232617976701, "learning_rate": 8.15309084622677e-06, "loss": 0.3435, "step": 6725 }, { "epoch": 0.30441276306856757, "grad_norm": 0.5710042272132126, "learning_rate": 8.15252199730228e-06, "loss": 0.486, "step": 6726 }, { "epoch": 0.3044580221769631, "grad_norm": 0.6301191405750226, "learning_rate": 8.151953080639777e-06, "loss": 0.4118, "step": 6727 }, { "epoch": 0.3045032812853587, "grad_norm": 0.6708578885522815, "learning_rate": 8.15138409625148e-06, "loss": 0.3569, "step": 6728 }, { "epoch": 0.30454854039375423, "grad_norm": 0.6846127030532159, "learning_rate": 8.15081504414962e-06, "loss": 0.3496, "step": 6729 }, { "epoch": 0.3045937995021498, "grad_norm": 0.6200505350083688, "learning_rate": 8.15024592434642e-06, "loss": 0.3675, "step": 6730 }, { "epoch": 0.30463905861054535, "grad_norm": 0.5283697356772571, "learning_rate": 8.14967673685411e-06, "loss": 0.5014, "step": 6731 }, { "epoch": 0.30468431771894094, "grad_norm": 0.9863932733707426, "learning_rate": 8.149107481684922e-06, "loss": 0.3767, "step": 6732 }, { "epoch": 0.3047295768273365, "grad_norm": 0.6713443061113822, "learning_rate": 8.148538158851084e-06, "loss": 0.3789, "step": 6733 }, { "epoch": 0.30477483593573207, "grad_norm": 0.5988285870373714, "learning_rate": 8.147968768364833e-06, "loss": 0.3227, "step": 6734 }, { "epoch": 0.3048200950441276, "grad_norm": 0.6679839451442557, "learning_rate": 8.1473993102384e-06, "loss": 0.3709, "step": 6735 }, { "epoch": 0.3048653541525232, "grad_norm": 0.719002401863315, "learning_rate": 8.146829784484024e-06, "loss": 0.3692, "step": 6736 }, { "epoch": 0.3049106132609188, "grad_norm": 0.7384804649111679, "learning_rate": 8.146260191113937e-06, "loss": 0.408, "step": 6737 }, { "epoch": 0.3049558723693143, "grad_norm": 0.5806880149958714, "learning_rate": 8.145690530140385e-06, "loss": 0.3467, "step": 6738 }, { "epoch": 0.3050011314777099, "grad_norm": 0.7988457184655746, "learning_rate": 8.145120801575603e-06, "loss": 0.3329, "step": 6739 }, { "epoch": 0.30504639058610544, "grad_norm": 0.45441104152888795, "learning_rate": 8.144551005431835e-06, "loss": 0.5055, "step": 6740 }, { "epoch": 0.30509164969450103, "grad_norm": 0.3480895237820813, "learning_rate": 8.143981141721324e-06, "loss": 0.5051, "step": 6741 }, { "epoch": 0.30513690880289657, "grad_norm": 0.6786100980441568, "learning_rate": 8.143411210456314e-06, "loss": 0.3572, "step": 6742 }, { "epoch": 0.30518216791129216, "grad_norm": 0.3247065808236292, "learning_rate": 8.142841211649052e-06, "loss": 0.4946, "step": 6743 }, { "epoch": 0.3052274270196877, "grad_norm": 0.7602586822007469, "learning_rate": 8.142271145311784e-06, "loss": 0.3885, "step": 6744 }, { "epoch": 0.3052726861280833, "grad_norm": 0.6827903832763981, "learning_rate": 8.141701011456759e-06, "loss": 0.4101, "step": 6745 }, { "epoch": 0.3053179452364788, "grad_norm": 0.6811187790476072, "learning_rate": 8.14113081009623e-06, "loss": 0.3494, "step": 6746 }, { "epoch": 0.3053632043448744, "grad_norm": 0.7048085893836293, "learning_rate": 8.140560541242446e-06, "loss": 0.3774, "step": 6747 }, { "epoch": 0.30540846345327, "grad_norm": 0.6491314330558875, "learning_rate": 8.139990204907662e-06, "loss": 0.3666, "step": 6748 }, { "epoch": 0.30545372256166553, "grad_norm": 0.4712696305154235, "learning_rate": 8.139419801104133e-06, "loss": 0.5054, "step": 6749 }, { "epoch": 0.3054989816700611, "grad_norm": 0.6124385047064537, "learning_rate": 8.138849329844115e-06, "loss": 0.3466, "step": 6750 }, { "epoch": 0.30554424077845665, "grad_norm": 0.6026356203624581, "learning_rate": 8.138278791139863e-06, "loss": 0.3485, "step": 6751 }, { "epoch": 0.30558949988685224, "grad_norm": 0.6271212950544153, "learning_rate": 8.13770818500364e-06, "loss": 0.3676, "step": 6752 }, { "epoch": 0.3056347589952478, "grad_norm": 0.6357535646778157, "learning_rate": 8.137137511447702e-06, "loss": 0.3909, "step": 6753 }, { "epoch": 0.30568001810364337, "grad_norm": 0.37425033364294313, "learning_rate": 8.136566770484316e-06, "loss": 0.489, "step": 6754 }, { "epoch": 0.3057252772120389, "grad_norm": 0.3013803252222591, "learning_rate": 8.135995962125744e-06, "loss": 0.4781, "step": 6755 }, { "epoch": 0.3057705363204345, "grad_norm": 0.6793897256341992, "learning_rate": 8.135425086384249e-06, "loss": 0.3599, "step": 6756 }, { "epoch": 0.30581579542883003, "grad_norm": 0.5991893062045853, "learning_rate": 8.1348541432721e-06, "loss": 0.3353, "step": 6757 }, { "epoch": 0.3058610545372256, "grad_norm": 0.6641871745828792, "learning_rate": 8.134283132801562e-06, "loss": 0.3843, "step": 6758 }, { "epoch": 0.3059063136456212, "grad_norm": 0.615570207173398, "learning_rate": 8.133712054984906e-06, "loss": 0.3622, "step": 6759 }, { "epoch": 0.30595157275401674, "grad_norm": 0.44958620298115815, "learning_rate": 8.133140909834402e-06, "loss": 0.5002, "step": 6760 }, { "epoch": 0.30599683186241233, "grad_norm": 0.738740954931246, "learning_rate": 8.132569697362323e-06, "loss": 0.3919, "step": 6761 }, { "epoch": 0.30604209097080787, "grad_norm": 0.3629080785737851, "learning_rate": 8.131998417580942e-06, "loss": 0.5022, "step": 6762 }, { "epoch": 0.30608735007920346, "grad_norm": 0.6430334797680526, "learning_rate": 8.131427070502535e-06, "loss": 0.3578, "step": 6763 }, { "epoch": 0.306132609187599, "grad_norm": 0.6998391775286003, "learning_rate": 8.130855656139375e-06, "loss": 0.3389, "step": 6764 }, { "epoch": 0.3061778682959946, "grad_norm": 0.7018326564652131, "learning_rate": 8.130284174503746e-06, "loss": 0.3888, "step": 6765 }, { "epoch": 0.3062231274043901, "grad_norm": 0.6413065879688464, "learning_rate": 8.129712625607924e-06, "loss": 0.351, "step": 6766 }, { "epoch": 0.3062683865127857, "grad_norm": 0.6484535268441277, "learning_rate": 8.129141009464187e-06, "loss": 0.3305, "step": 6767 }, { "epoch": 0.30631364562118124, "grad_norm": 0.6539147847237137, "learning_rate": 8.128569326084824e-06, "loss": 0.3459, "step": 6768 }, { "epoch": 0.30635890472957683, "grad_norm": 0.6790155149435549, "learning_rate": 8.127997575482112e-06, "loss": 0.4109, "step": 6769 }, { "epoch": 0.30640416383797237, "grad_norm": 0.6114908532644882, "learning_rate": 8.127425757668338e-06, "loss": 0.3752, "step": 6770 }, { "epoch": 0.30644942294636796, "grad_norm": 0.6098158272610537, "learning_rate": 8.12685387265579e-06, "loss": 0.4123, "step": 6771 }, { "epoch": 0.30649468205476355, "grad_norm": 0.6409207978303622, "learning_rate": 8.126281920456758e-06, "loss": 0.4011, "step": 6772 }, { "epoch": 0.3065399411631591, "grad_norm": 0.7401398533726881, "learning_rate": 8.12570990108353e-06, "loss": 0.4185, "step": 6773 }, { "epoch": 0.30658520027155467, "grad_norm": 0.6356749159664686, "learning_rate": 8.125137814548394e-06, "loss": 0.356, "step": 6774 }, { "epoch": 0.3066304593799502, "grad_norm": 0.6529145959466799, "learning_rate": 8.124565660863643e-06, "loss": 0.3638, "step": 6775 }, { "epoch": 0.3066757184883458, "grad_norm": 0.6608892458641886, "learning_rate": 8.123993440041576e-06, "loss": 0.3563, "step": 6776 }, { "epoch": 0.30672097759674133, "grad_norm": 0.5521789874530064, "learning_rate": 8.123421152094481e-06, "loss": 0.5073, "step": 6777 }, { "epoch": 0.3067662367051369, "grad_norm": 0.6905730622680517, "learning_rate": 8.12284879703466e-06, "loss": 0.3584, "step": 6778 }, { "epoch": 0.30681149581353245, "grad_norm": 0.6080164401091869, "learning_rate": 8.12227637487441e-06, "loss": 0.3673, "step": 6779 }, { "epoch": 0.30685675492192804, "grad_norm": 0.6349719454277032, "learning_rate": 8.121703885626029e-06, "loss": 0.3932, "step": 6780 }, { "epoch": 0.3069020140303236, "grad_norm": 0.6416433540100211, "learning_rate": 8.12113132930182e-06, "loss": 0.3911, "step": 6781 }, { "epoch": 0.30694727313871917, "grad_norm": 0.3495737986624057, "learning_rate": 8.120558705914083e-06, "loss": 0.4932, "step": 6782 }, { "epoch": 0.30699253224711476, "grad_norm": 0.5923057827572341, "learning_rate": 8.119986015475126e-06, "loss": 0.3361, "step": 6783 }, { "epoch": 0.3070377913555103, "grad_norm": 0.707659390930438, "learning_rate": 8.11941325799725e-06, "loss": 0.3557, "step": 6784 }, { "epoch": 0.3070830504639059, "grad_norm": 0.7240388985619817, "learning_rate": 8.118840433492764e-06, "loss": 0.3898, "step": 6785 }, { "epoch": 0.3071283095723014, "grad_norm": 0.6417285146209729, "learning_rate": 8.118267541973975e-06, "loss": 0.3738, "step": 6786 }, { "epoch": 0.307173568680697, "grad_norm": 0.3553061010073323, "learning_rate": 8.117694583453195e-06, "loss": 0.4988, "step": 6787 }, { "epoch": 0.30721882778909254, "grad_norm": 0.6635298379528045, "learning_rate": 8.117121557942733e-06, "loss": 0.3736, "step": 6788 }, { "epoch": 0.30726408689748813, "grad_norm": 0.312431761561904, "learning_rate": 8.116548465454902e-06, "loss": 0.4785, "step": 6789 }, { "epoch": 0.30730934600588367, "grad_norm": 0.741654489937296, "learning_rate": 8.115975306002018e-06, "loss": 0.3714, "step": 6790 }, { "epoch": 0.30735460511427926, "grad_norm": 0.6059806753467073, "learning_rate": 8.115402079596392e-06, "loss": 0.3504, "step": 6791 }, { "epoch": 0.3073998642226748, "grad_norm": 0.6754900935493422, "learning_rate": 8.114828786250345e-06, "loss": 0.3692, "step": 6792 }, { "epoch": 0.3074451233310704, "grad_norm": 0.379407826248663, "learning_rate": 8.114255425976193e-06, "loss": 0.4703, "step": 6793 }, { "epoch": 0.30749038243946597, "grad_norm": 0.6066107353561802, "learning_rate": 8.113681998786257e-06, "loss": 0.3734, "step": 6794 }, { "epoch": 0.3075356415478615, "grad_norm": 0.6140946118613567, "learning_rate": 8.113108504692858e-06, "loss": 0.3494, "step": 6795 }, { "epoch": 0.3075809006562571, "grad_norm": 0.6478485310329225, "learning_rate": 8.11253494370832e-06, "loss": 0.4195, "step": 6796 }, { "epoch": 0.30762615976465263, "grad_norm": 0.6222099995071896, "learning_rate": 8.111961315844964e-06, "loss": 0.3788, "step": 6797 }, { "epoch": 0.3076714188730482, "grad_norm": 0.32387049358651526, "learning_rate": 8.111387621115116e-06, "loss": 0.4689, "step": 6798 }, { "epoch": 0.30771667798144375, "grad_norm": 0.7076263821704921, "learning_rate": 8.110813859531104e-06, "loss": 0.3445, "step": 6799 }, { "epoch": 0.30776193708983934, "grad_norm": 0.6373148724099768, "learning_rate": 8.110240031105257e-06, "loss": 0.3686, "step": 6800 }, { "epoch": 0.3078071961982349, "grad_norm": 0.7596219343419965, "learning_rate": 8.109666135849905e-06, "loss": 0.3462, "step": 6801 }, { "epoch": 0.30785245530663047, "grad_norm": 0.6406151701818559, "learning_rate": 8.109092173777376e-06, "loss": 0.3602, "step": 6802 }, { "epoch": 0.307897714415026, "grad_norm": 0.6168811765740875, "learning_rate": 8.108518144900007e-06, "loss": 0.3321, "step": 6803 }, { "epoch": 0.3079429735234216, "grad_norm": 0.6369882372627966, "learning_rate": 8.10794404923013e-06, "loss": 0.3178, "step": 6804 }, { "epoch": 0.3079882326318171, "grad_norm": 0.3820706526356615, "learning_rate": 8.107369886780082e-06, "loss": 0.4625, "step": 6805 }, { "epoch": 0.3080334917402127, "grad_norm": 0.6049111052281383, "learning_rate": 8.106795657562197e-06, "loss": 0.331, "step": 6806 }, { "epoch": 0.3080787508486083, "grad_norm": 0.3170775102658955, "learning_rate": 8.106221361588814e-06, "loss": 0.5085, "step": 6807 }, { "epoch": 0.30812400995700384, "grad_norm": 0.6396101195957737, "learning_rate": 8.105646998872275e-06, "loss": 0.3939, "step": 6808 }, { "epoch": 0.30816926906539943, "grad_norm": 0.6137977068626062, "learning_rate": 8.10507256942492e-06, "loss": 0.3392, "step": 6809 }, { "epoch": 0.30821452817379497, "grad_norm": 0.6733315999273725, "learning_rate": 8.104498073259093e-06, "loss": 0.338, "step": 6810 }, { "epoch": 0.30825978728219056, "grad_norm": 0.6554373555685616, "learning_rate": 8.103923510387138e-06, "loss": 0.3999, "step": 6811 }, { "epoch": 0.3083050463905861, "grad_norm": 0.6453238347422188, "learning_rate": 8.103348880821399e-06, "loss": 0.3468, "step": 6812 }, { "epoch": 0.3083503054989817, "grad_norm": 0.4037572749428178, "learning_rate": 8.10277418457422e-06, "loss": 0.5147, "step": 6813 }, { "epoch": 0.3083955646073772, "grad_norm": 0.3979089005469083, "learning_rate": 8.102199421657957e-06, "loss": 0.509, "step": 6814 }, { "epoch": 0.3084408237157728, "grad_norm": 0.6515789874392688, "learning_rate": 8.101624592084956e-06, "loss": 0.3963, "step": 6815 }, { "epoch": 0.30848608282416834, "grad_norm": 0.7141706623863724, "learning_rate": 8.101049695867566e-06, "loss": 0.3619, "step": 6816 }, { "epoch": 0.30853134193256393, "grad_norm": 0.6568144635043696, "learning_rate": 8.100474733018145e-06, "loss": 0.3569, "step": 6817 }, { "epoch": 0.3085766010409595, "grad_norm": 0.687926850848241, "learning_rate": 8.099899703549043e-06, "loss": 0.3557, "step": 6818 }, { "epoch": 0.30862186014935505, "grad_norm": 0.7585055645699293, "learning_rate": 8.099324607472619e-06, "loss": 0.3915, "step": 6819 }, { "epoch": 0.30866711925775064, "grad_norm": 0.6493471634676304, "learning_rate": 8.098749444801226e-06, "loss": 0.3558, "step": 6820 }, { "epoch": 0.3087123783661462, "grad_norm": 0.5066541089205197, "learning_rate": 8.098174215547224e-06, "loss": 0.4819, "step": 6821 }, { "epoch": 0.30875763747454177, "grad_norm": 0.6462727041437436, "learning_rate": 8.097598919722975e-06, "loss": 0.3227, "step": 6822 }, { "epoch": 0.3088028965829373, "grad_norm": 0.636904943838202, "learning_rate": 8.097023557340837e-06, "loss": 0.3285, "step": 6823 }, { "epoch": 0.3088481556913329, "grad_norm": 0.3604508863174562, "learning_rate": 8.096448128413177e-06, "loss": 0.494, "step": 6824 }, { "epoch": 0.3088934147997284, "grad_norm": 0.3033044941525956, "learning_rate": 8.095872632952354e-06, "loss": 0.5074, "step": 6825 }, { "epoch": 0.308938673908124, "grad_norm": 0.6798141188672809, "learning_rate": 8.095297070970738e-06, "loss": 0.4, "step": 6826 }, { "epoch": 0.30898393301651955, "grad_norm": 0.6561679938459225, "learning_rate": 8.094721442480696e-06, "loss": 0.3725, "step": 6827 }, { "epoch": 0.30902919212491514, "grad_norm": 0.4073413244614808, "learning_rate": 8.094145747494591e-06, "loss": 0.4766, "step": 6828 }, { "epoch": 0.3090744512333107, "grad_norm": 0.7145571037258346, "learning_rate": 8.093569986024798e-06, "loss": 0.3615, "step": 6829 }, { "epoch": 0.30911971034170627, "grad_norm": 0.42268248652396634, "learning_rate": 8.092994158083689e-06, "loss": 0.5162, "step": 6830 }, { "epoch": 0.30916496945010186, "grad_norm": 0.6628200350098077, "learning_rate": 8.092418263683635e-06, "loss": 0.4114, "step": 6831 }, { "epoch": 0.3092102285584974, "grad_norm": 0.6047039574344523, "learning_rate": 8.091842302837009e-06, "loss": 0.3592, "step": 6832 }, { "epoch": 0.309255487666893, "grad_norm": 0.6438819114591449, "learning_rate": 8.091266275556188e-06, "loss": 0.4155, "step": 6833 }, { "epoch": 0.3093007467752885, "grad_norm": 0.6437391796856079, "learning_rate": 8.090690181853548e-06, "loss": 0.3785, "step": 6834 }, { "epoch": 0.3093460058836841, "grad_norm": 0.6030900916072861, "learning_rate": 8.09011402174147e-06, "loss": 0.3633, "step": 6835 }, { "epoch": 0.30939126499207964, "grad_norm": 0.7355170778499127, "learning_rate": 8.089537795232331e-06, "loss": 0.3587, "step": 6836 }, { "epoch": 0.30943652410047523, "grad_norm": 0.6698188931352116, "learning_rate": 8.088961502338514e-06, "loss": 0.3725, "step": 6837 }, { "epoch": 0.30948178320887076, "grad_norm": 0.7400131968976658, "learning_rate": 8.088385143072402e-06, "loss": 0.3586, "step": 6838 }, { "epoch": 0.30952704231726635, "grad_norm": 0.6718189379267882, "learning_rate": 8.087808717446377e-06, "loss": 0.3694, "step": 6839 }, { "epoch": 0.3095723014256619, "grad_norm": 0.6473017686964793, "learning_rate": 8.087232225472827e-06, "loss": 0.3561, "step": 6840 }, { "epoch": 0.3096175605340575, "grad_norm": 1.0674858541583925, "learning_rate": 8.086655667164137e-06, "loss": 0.3661, "step": 6841 }, { "epoch": 0.30966281964245307, "grad_norm": 0.5969628411282236, "learning_rate": 8.086079042532699e-06, "loss": 0.3864, "step": 6842 }, { "epoch": 0.3097080787508486, "grad_norm": 0.5847828086110796, "learning_rate": 8.0855023515909e-06, "loss": 0.3402, "step": 6843 }, { "epoch": 0.3097533378592442, "grad_norm": 0.6408510947564642, "learning_rate": 8.08492559435113e-06, "loss": 0.3885, "step": 6844 }, { "epoch": 0.30979859696763973, "grad_norm": 0.6446133618228684, "learning_rate": 8.084348770825785e-06, "loss": 0.4956, "step": 6845 }, { "epoch": 0.3098438560760353, "grad_norm": 0.6549341012350445, "learning_rate": 8.083771881027259e-06, "loss": 0.4082, "step": 6846 }, { "epoch": 0.30988911518443085, "grad_norm": 0.7068487841425369, "learning_rate": 8.083194924967943e-06, "loss": 0.4099, "step": 6847 }, { "epoch": 0.30993437429282644, "grad_norm": 0.7007918864113356, "learning_rate": 8.08261790266024e-06, "loss": 0.3243, "step": 6848 }, { "epoch": 0.309979633401222, "grad_norm": 0.629604268051431, "learning_rate": 8.082040814116545e-06, "loss": 0.3626, "step": 6849 }, { "epoch": 0.31002489250961757, "grad_norm": 0.6278545282083574, "learning_rate": 8.081463659349258e-06, "loss": 0.3375, "step": 6850 }, { "epoch": 0.3100701516180131, "grad_norm": 0.4734338413743656, "learning_rate": 8.080886438370781e-06, "loss": 0.5128, "step": 6851 }, { "epoch": 0.3101154107264087, "grad_norm": 0.4099808770615001, "learning_rate": 8.080309151193517e-06, "loss": 0.4826, "step": 6852 }, { "epoch": 0.3101606698348043, "grad_norm": 0.6989913052154422, "learning_rate": 8.07973179782987e-06, "loss": 0.3824, "step": 6853 }, { "epoch": 0.3102059289431998, "grad_norm": 0.6248720257681796, "learning_rate": 8.079154378292246e-06, "loss": 0.3345, "step": 6854 }, { "epoch": 0.3102511880515954, "grad_norm": 0.6139729843377981, "learning_rate": 8.07857689259305e-06, "loss": 0.363, "step": 6855 }, { "epoch": 0.31029644715999094, "grad_norm": 0.6465785975930611, "learning_rate": 8.077999340744694e-06, "loss": 0.3383, "step": 6856 }, { "epoch": 0.31034170626838653, "grad_norm": 0.654275213477745, "learning_rate": 8.077421722759584e-06, "loss": 0.3197, "step": 6857 }, { "epoch": 0.31038696537678206, "grad_norm": 0.7020318044378301, "learning_rate": 8.076844038650133e-06, "loss": 0.3761, "step": 6858 }, { "epoch": 0.31043222448517765, "grad_norm": 0.6495770931717897, "learning_rate": 8.076266288428753e-06, "loss": 0.3691, "step": 6859 }, { "epoch": 0.3104774835935732, "grad_norm": 0.8143884827910683, "learning_rate": 8.075688472107859e-06, "loss": 0.3725, "step": 6860 }, { "epoch": 0.3105227427019688, "grad_norm": 0.8094018848341005, "learning_rate": 8.075110589699866e-06, "loss": 0.5127, "step": 6861 }, { "epoch": 0.3105680018103643, "grad_norm": 0.6774066803171471, "learning_rate": 8.07453264121719e-06, "loss": 0.4156, "step": 6862 }, { "epoch": 0.3106132609187599, "grad_norm": 0.6939323935945736, "learning_rate": 8.07395462667225e-06, "loss": 0.3314, "step": 6863 }, { "epoch": 0.31065852002715544, "grad_norm": 0.6902658938875413, "learning_rate": 8.073376546077468e-06, "loss": 0.3835, "step": 6864 }, { "epoch": 0.31070377913555103, "grad_norm": 0.34823630616883156, "learning_rate": 8.07279839944526e-06, "loss": 0.4922, "step": 6865 }, { "epoch": 0.3107490382439466, "grad_norm": 0.7711677796759714, "learning_rate": 8.072220186788056e-06, "loss": 0.3656, "step": 6866 }, { "epoch": 0.31079429735234215, "grad_norm": 0.6111540013077865, "learning_rate": 8.071641908118273e-06, "loss": 0.3609, "step": 6867 }, { "epoch": 0.31083955646073774, "grad_norm": 0.4677566680030358, "learning_rate": 8.071063563448341e-06, "loss": 0.512, "step": 6868 }, { "epoch": 0.3108848155691333, "grad_norm": 0.6404382698128002, "learning_rate": 8.070485152790684e-06, "loss": 0.3427, "step": 6869 }, { "epoch": 0.31093007467752887, "grad_norm": 0.3686604494675354, "learning_rate": 8.06990667615773e-06, "loss": 0.4937, "step": 6870 }, { "epoch": 0.3109753337859244, "grad_norm": 0.6986737941715713, "learning_rate": 8.069328133561911e-06, "loss": 0.3958, "step": 6871 }, { "epoch": 0.31102059289432, "grad_norm": 0.3162165790029801, "learning_rate": 8.068749525015658e-06, "loss": 0.4915, "step": 6872 }, { "epoch": 0.3110658520027155, "grad_norm": 0.8038232490379009, "learning_rate": 8.068170850531401e-06, "loss": 0.3809, "step": 6873 }, { "epoch": 0.3111111111111111, "grad_norm": 0.6650645932764687, "learning_rate": 8.067592110121576e-06, "loss": 0.3954, "step": 6874 }, { "epoch": 0.31115637021950665, "grad_norm": 0.6527969870447125, "learning_rate": 8.06701330379862e-06, "loss": 0.3501, "step": 6875 }, { "epoch": 0.31120162932790224, "grad_norm": 0.5320663380971439, "learning_rate": 8.066434431574965e-06, "loss": 0.4975, "step": 6876 }, { "epoch": 0.31124688843629783, "grad_norm": 0.7166134358312011, "learning_rate": 8.065855493463055e-06, "loss": 0.3703, "step": 6877 }, { "epoch": 0.31129214754469337, "grad_norm": 0.6843795237430064, "learning_rate": 8.065276489475324e-06, "loss": 0.3682, "step": 6878 }, { "epoch": 0.31133740665308896, "grad_norm": 0.628539631506995, "learning_rate": 8.064697419624216e-06, "loss": 0.3842, "step": 6879 }, { "epoch": 0.3113826657614845, "grad_norm": 0.63461679915827, "learning_rate": 8.064118283922173e-06, "loss": 0.3472, "step": 6880 }, { "epoch": 0.3114279248698801, "grad_norm": 0.5858346718837931, "learning_rate": 8.06353908238164e-06, "loss": 0.3982, "step": 6881 }, { "epoch": 0.3114731839782756, "grad_norm": 0.3290559586116341, "learning_rate": 8.06295981501506e-06, "loss": 0.4678, "step": 6882 }, { "epoch": 0.3115184430866712, "grad_norm": 0.6397350366827538, "learning_rate": 8.062380481834881e-06, "loss": 0.3743, "step": 6883 }, { "epoch": 0.31156370219506674, "grad_norm": 0.6291797265410776, "learning_rate": 8.061801082853548e-06, "loss": 0.3446, "step": 6884 }, { "epoch": 0.31160896130346233, "grad_norm": 0.6852533920605484, "learning_rate": 8.061221618083519e-06, "loss": 0.3873, "step": 6885 }, { "epoch": 0.31165422041185786, "grad_norm": 0.7471568005288752, "learning_rate": 8.060642087537233e-06, "loss": 0.3438, "step": 6886 }, { "epoch": 0.31169947952025345, "grad_norm": 0.5769928531316513, "learning_rate": 8.060062491227154e-06, "loss": 0.3268, "step": 6887 }, { "epoch": 0.31174473862864904, "grad_norm": 0.39457544398209493, "learning_rate": 8.059482829165728e-06, "loss": 0.4775, "step": 6888 }, { "epoch": 0.3117899977370446, "grad_norm": 0.6477673888723783, "learning_rate": 8.058903101365412e-06, "loss": 0.3274, "step": 6889 }, { "epoch": 0.31183525684544017, "grad_norm": 0.6592795719191157, "learning_rate": 8.058323307838665e-06, "loss": 0.3386, "step": 6890 }, { "epoch": 0.3118805159538357, "grad_norm": 0.7579655825665975, "learning_rate": 8.05774344859794e-06, "loss": 0.3585, "step": 6891 }, { "epoch": 0.3119257750622313, "grad_norm": 0.6553535146501523, "learning_rate": 8.057163523655702e-06, "loss": 0.3319, "step": 6892 }, { "epoch": 0.3119710341706268, "grad_norm": 0.722228702460221, "learning_rate": 8.056583533024408e-06, "loss": 0.4117, "step": 6893 }, { "epoch": 0.3120162932790224, "grad_norm": 0.62822723167227, "learning_rate": 8.056003476716521e-06, "loss": 0.3519, "step": 6894 }, { "epoch": 0.31206155238741795, "grad_norm": 0.34302529356673794, "learning_rate": 8.055423354744507e-06, "loss": 0.4896, "step": 6895 }, { "epoch": 0.31210681149581354, "grad_norm": 0.8038315586479057, "learning_rate": 8.054843167120827e-06, "loss": 0.3516, "step": 6896 }, { "epoch": 0.3121520706042091, "grad_norm": 0.7075627538296707, "learning_rate": 8.054262913857951e-06, "loss": 0.3913, "step": 6897 }, { "epoch": 0.31219732971260467, "grad_norm": 0.6365784262746258, "learning_rate": 8.053682594968346e-06, "loss": 0.3721, "step": 6898 }, { "epoch": 0.3122425888210002, "grad_norm": 0.6020970936461443, "learning_rate": 8.053102210464478e-06, "loss": 0.3466, "step": 6899 }, { "epoch": 0.3122878479293958, "grad_norm": 0.6240839936563485, "learning_rate": 8.052521760358822e-06, "loss": 0.3847, "step": 6900 }, { "epoch": 0.3123331070377914, "grad_norm": 0.30425922617942247, "learning_rate": 8.05194124466385e-06, "loss": 0.4701, "step": 6901 }, { "epoch": 0.3123783661461869, "grad_norm": 0.6497985170793312, "learning_rate": 8.051360663392031e-06, "loss": 0.3752, "step": 6902 }, { "epoch": 0.3124236252545825, "grad_norm": 0.6580678715176217, "learning_rate": 8.050780016555846e-06, "loss": 0.4101, "step": 6903 }, { "epoch": 0.31246888436297804, "grad_norm": 0.2985922174224097, "learning_rate": 8.050199304167766e-06, "loss": 0.4843, "step": 6904 }, { "epoch": 0.31251414347137363, "grad_norm": 0.6295805144586434, "learning_rate": 8.04961852624027e-06, "loss": 0.3404, "step": 6905 }, { "epoch": 0.31255940257976916, "grad_norm": 0.6372763963072887, "learning_rate": 8.04903768278584e-06, "loss": 0.352, "step": 6906 }, { "epoch": 0.31260466168816475, "grad_norm": 0.6481452415926576, "learning_rate": 8.048456773816955e-06, "loss": 0.3757, "step": 6907 }, { "epoch": 0.3126499207965603, "grad_norm": 0.7300271667682606, "learning_rate": 8.047875799346096e-06, "loss": 0.3824, "step": 6908 }, { "epoch": 0.3126951799049559, "grad_norm": 0.6156677324812093, "learning_rate": 8.047294759385746e-06, "loss": 0.3255, "step": 6909 }, { "epoch": 0.3127404390133514, "grad_norm": 0.67457907378465, "learning_rate": 8.046713653948393e-06, "loss": 0.3683, "step": 6910 }, { "epoch": 0.312785698121747, "grad_norm": 0.6480105249444751, "learning_rate": 8.046132483046518e-06, "loss": 0.3724, "step": 6911 }, { "epoch": 0.3128309572301426, "grad_norm": 0.6655706607509766, "learning_rate": 8.045551246692612e-06, "loss": 0.3749, "step": 6912 }, { "epoch": 0.3128762163385381, "grad_norm": 0.696785351305484, "learning_rate": 8.044969944899165e-06, "loss": 0.4009, "step": 6913 }, { "epoch": 0.3129214754469337, "grad_norm": 0.7094320509766985, "learning_rate": 8.044388577678666e-06, "loss": 0.4151, "step": 6914 }, { "epoch": 0.31296673455532925, "grad_norm": 0.8859580873747761, "learning_rate": 8.043807145043604e-06, "loss": 0.332, "step": 6915 }, { "epoch": 0.31301199366372484, "grad_norm": 0.7092949835152653, "learning_rate": 8.043225647006475e-06, "loss": 0.3363, "step": 6916 }, { "epoch": 0.3130572527721204, "grad_norm": 0.4288553491654917, "learning_rate": 8.042644083579775e-06, "loss": 0.5173, "step": 6917 }, { "epoch": 0.31310251188051597, "grad_norm": 0.6636966437293738, "learning_rate": 8.042062454775999e-06, "loss": 0.3481, "step": 6918 }, { "epoch": 0.3131477709889115, "grad_norm": 0.700343789222212, "learning_rate": 8.041480760607642e-06, "loss": 0.3481, "step": 6919 }, { "epoch": 0.3131930300973071, "grad_norm": 0.6413586661482176, "learning_rate": 8.040899001087206e-06, "loss": 0.4055, "step": 6920 }, { "epoch": 0.3132382892057026, "grad_norm": 0.7054811245614201, "learning_rate": 8.04031717622719e-06, "loss": 0.3947, "step": 6921 }, { "epoch": 0.3132835483140982, "grad_norm": 0.6748577078036545, "learning_rate": 8.039735286040095e-06, "loss": 0.3392, "step": 6922 }, { "epoch": 0.31332880742249375, "grad_norm": 0.6208383147855693, "learning_rate": 8.039153330538423e-06, "loss": 0.3515, "step": 6923 }, { "epoch": 0.31337406653088934, "grad_norm": 0.6376714796607698, "learning_rate": 8.038571309734682e-06, "loss": 0.3985, "step": 6924 }, { "epoch": 0.31341932563928493, "grad_norm": 0.6995289970919804, "learning_rate": 8.037989223641375e-06, "loss": 0.3348, "step": 6925 }, { "epoch": 0.31346458474768046, "grad_norm": 0.6844964516030871, "learning_rate": 8.03740707227101e-06, "loss": 0.3474, "step": 6926 }, { "epoch": 0.31350984385607605, "grad_norm": 0.6413951552444123, "learning_rate": 8.036824855636096e-06, "loss": 0.3519, "step": 6927 }, { "epoch": 0.3135551029644716, "grad_norm": 0.4902546346685851, "learning_rate": 8.036242573749142e-06, "loss": 0.4797, "step": 6928 }, { "epoch": 0.3136003620728672, "grad_norm": 0.6548546213508429, "learning_rate": 8.035660226622661e-06, "loss": 0.3783, "step": 6929 }, { "epoch": 0.3136456211812627, "grad_norm": 0.6675440569763577, "learning_rate": 8.035077814269165e-06, "loss": 0.3656, "step": 6930 }, { "epoch": 0.3136908802896583, "grad_norm": 0.6934360166198267, "learning_rate": 8.034495336701169e-06, "loss": 0.3737, "step": 6931 }, { "epoch": 0.31373613939805384, "grad_norm": 0.6674105315224877, "learning_rate": 8.033912793931187e-06, "loss": 0.3313, "step": 6932 }, { "epoch": 0.3137813985064494, "grad_norm": 0.6779911169606194, "learning_rate": 8.033330185971737e-06, "loss": 0.3777, "step": 6933 }, { "epoch": 0.31382665761484496, "grad_norm": 0.3547796842907606, "learning_rate": 8.032747512835338e-06, "loss": 0.5, "step": 6934 }, { "epoch": 0.31387191672324055, "grad_norm": 0.616740468431483, "learning_rate": 8.03216477453451e-06, "loss": 0.3504, "step": 6935 }, { "epoch": 0.31391717583163614, "grad_norm": 0.636731133727304, "learning_rate": 8.03158197108177e-06, "loss": 0.4083, "step": 6936 }, { "epoch": 0.3139624349400317, "grad_norm": 0.6420405558207202, "learning_rate": 8.030999102489649e-06, "loss": 0.376, "step": 6937 }, { "epoch": 0.31400769404842727, "grad_norm": 0.6260731233892689, "learning_rate": 8.030416168770663e-06, "loss": 0.3452, "step": 6938 }, { "epoch": 0.3140529531568228, "grad_norm": 0.6135576943507863, "learning_rate": 8.029833169937343e-06, "loss": 0.359, "step": 6939 }, { "epoch": 0.3140982122652184, "grad_norm": 0.7184287456717344, "learning_rate": 8.029250106002212e-06, "loss": 0.3575, "step": 6940 }, { "epoch": 0.3141434713736139, "grad_norm": 0.6092310549635934, "learning_rate": 8.0286669769778e-06, "loss": 0.3642, "step": 6941 }, { "epoch": 0.3141887304820095, "grad_norm": 0.6550276525946266, "learning_rate": 8.028083782876636e-06, "loss": 0.3899, "step": 6942 }, { "epoch": 0.31423398959040505, "grad_norm": 0.6539315006997505, "learning_rate": 8.027500523711253e-06, "loss": 0.3467, "step": 6943 }, { "epoch": 0.31427924869880064, "grad_norm": 0.6699656463779345, "learning_rate": 8.026917199494181e-06, "loss": 0.3618, "step": 6944 }, { "epoch": 0.3143245078071962, "grad_norm": 0.36221957851989256, "learning_rate": 8.026333810237956e-06, "loss": 0.4938, "step": 6945 }, { "epoch": 0.31436976691559176, "grad_norm": 0.7061374105862012, "learning_rate": 8.025750355955112e-06, "loss": 0.3669, "step": 6946 }, { "epoch": 0.31441502602398735, "grad_norm": 0.6008363417200151, "learning_rate": 8.025166836658185e-06, "loss": 0.3171, "step": 6947 }, { "epoch": 0.3144602851323829, "grad_norm": 0.6153450850043569, "learning_rate": 8.024583252359714e-06, "loss": 0.3622, "step": 6948 }, { "epoch": 0.3145055442407785, "grad_norm": 0.6484699123239448, "learning_rate": 8.023999603072236e-06, "loss": 0.3492, "step": 6949 }, { "epoch": 0.314550803349174, "grad_norm": 0.6947514250003248, "learning_rate": 8.023415888808297e-06, "loss": 0.3531, "step": 6950 }, { "epoch": 0.3145960624575696, "grad_norm": 0.38000138347421075, "learning_rate": 8.022832109580437e-06, "loss": 0.4838, "step": 6951 }, { "epoch": 0.31464132156596514, "grad_norm": 0.6763144529594775, "learning_rate": 8.022248265401196e-06, "loss": 0.3564, "step": 6952 }, { "epoch": 0.3146865806743607, "grad_norm": 0.6601351368993845, "learning_rate": 8.021664356283123e-06, "loss": 0.3684, "step": 6953 }, { "epoch": 0.31473183978275626, "grad_norm": 0.7040570624830684, "learning_rate": 8.021080382238763e-06, "loss": 0.3753, "step": 6954 }, { "epoch": 0.31477709889115185, "grad_norm": 0.2880395263967866, "learning_rate": 8.020496343280664e-06, "loss": 0.4805, "step": 6955 }, { "epoch": 0.3148223579995474, "grad_norm": 0.3021069983303145, "learning_rate": 8.019912239421376e-06, "loss": 0.4913, "step": 6956 }, { "epoch": 0.314867617107943, "grad_norm": 0.8756622334383337, "learning_rate": 8.019328070673449e-06, "loss": 0.3898, "step": 6957 }, { "epoch": 0.3149128762163385, "grad_norm": 0.6654109828266398, "learning_rate": 8.018743837049433e-06, "loss": 0.3788, "step": 6958 }, { "epoch": 0.3149581353247341, "grad_norm": 0.7690824808772482, "learning_rate": 8.018159538561888e-06, "loss": 0.3458, "step": 6959 }, { "epoch": 0.3150033944331297, "grad_norm": 0.790969564695417, "learning_rate": 8.01757517522336e-06, "loss": 0.3943, "step": 6960 }, { "epoch": 0.3150486535415252, "grad_norm": 0.6888825820126708, "learning_rate": 8.01699074704641e-06, "loss": 0.4222, "step": 6961 }, { "epoch": 0.3150939126499208, "grad_norm": 0.9953360923545216, "learning_rate": 8.016406254043595e-06, "loss": 0.3557, "step": 6962 }, { "epoch": 0.31513917175831635, "grad_norm": 0.7163182122643703, "learning_rate": 8.015821696227475e-06, "loss": 0.3606, "step": 6963 }, { "epoch": 0.31518443086671194, "grad_norm": 0.6413564293083378, "learning_rate": 8.015237073610607e-06, "loss": 0.3787, "step": 6964 }, { "epoch": 0.3152296899751075, "grad_norm": 0.6576486928547289, "learning_rate": 8.014652386205557e-06, "loss": 0.4123, "step": 6965 }, { "epoch": 0.31527494908350306, "grad_norm": 0.6047092559852032, "learning_rate": 8.014067634024884e-06, "loss": 0.3701, "step": 6966 }, { "epoch": 0.3153202081918986, "grad_norm": 0.6571742648341652, "learning_rate": 8.013482817081157e-06, "loss": 0.3856, "step": 6967 }, { "epoch": 0.3153654673002942, "grad_norm": 0.6707972533911652, "learning_rate": 8.012897935386938e-06, "loss": 0.3592, "step": 6968 }, { "epoch": 0.3154107264086897, "grad_norm": 0.6202903980423257, "learning_rate": 8.012312988954795e-06, "loss": 0.3869, "step": 6969 }, { "epoch": 0.3154559855170853, "grad_norm": 0.4156628400616371, "learning_rate": 8.0117279777973e-06, "loss": 0.4932, "step": 6970 }, { "epoch": 0.3155012446254809, "grad_norm": 0.660814934434108, "learning_rate": 8.011142901927018e-06, "loss": 0.3479, "step": 6971 }, { "epoch": 0.31554650373387644, "grad_norm": 0.6830413012597334, "learning_rate": 8.010557761356523e-06, "loss": 0.4147, "step": 6972 }, { "epoch": 0.31559176284227203, "grad_norm": 0.30869750524274253, "learning_rate": 8.009972556098388e-06, "loss": 0.4674, "step": 6973 }, { "epoch": 0.31563702195066756, "grad_norm": 0.3013701412214667, "learning_rate": 8.009387286165188e-06, "loss": 0.501, "step": 6974 }, { "epoch": 0.31568228105906315, "grad_norm": 0.7170094699174588, "learning_rate": 8.008801951569501e-06, "loss": 0.3552, "step": 6975 }, { "epoch": 0.3157275401674587, "grad_norm": 0.7032893337212256, "learning_rate": 8.008216552323896e-06, "loss": 0.3914, "step": 6976 }, { "epoch": 0.3157727992758543, "grad_norm": 0.6187411787630647, "learning_rate": 8.007631088440959e-06, "loss": 0.3883, "step": 6977 }, { "epoch": 0.3158180583842498, "grad_norm": 0.6929869219292941, "learning_rate": 8.007045559933265e-06, "loss": 0.3521, "step": 6978 }, { "epoch": 0.3158633174926454, "grad_norm": 0.41659022905236215, "learning_rate": 8.006459966813399e-06, "loss": 0.518, "step": 6979 }, { "epoch": 0.31590857660104094, "grad_norm": 0.6864769287896622, "learning_rate": 8.005874309093942e-06, "loss": 0.3382, "step": 6980 }, { "epoch": 0.3159538357094365, "grad_norm": 0.8409165007357672, "learning_rate": 8.005288586787477e-06, "loss": 0.3954, "step": 6981 }, { "epoch": 0.3159990948178321, "grad_norm": 0.6951795654243249, "learning_rate": 8.00470279990659e-06, "loss": 0.4228, "step": 6982 }, { "epoch": 0.31604435392622765, "grad_norm": 0.3213139159543221, "learning_rate": 8.00411694846387e-06, "loss": 0.4808, "step": 6983 }, { "epoch": 0.31608961303462324, "grad_norm": 0.7266194383038589, "learning_rate": 8.003531032471901e-06, "loss": 0.3913, "step": 6984 }, { "epoch": 0.3161348721430188, "grad_norm": 0.9611211426506843, "learning_rate": 8.002945051943276e-06, "loss": 0.3398, "step": 6985 }, { "epoch": 0.31618013125141436, "grad_norm": 0.6052295188548436, "learning_rate": 8.002359006890585e-06, "loss": 0.3788, "step": 6986 }, { "epoch": 0.3162253903598099, "grad_norm": 0.7664924172248415, "learning_rate": 8.001772897326418e-06, "loss": 0.3707, "step": 6987 }, { "epoch": 0.3162706494682055, "grad_norm": 0.8040540740269324, "learning_rate": 8.001186723263374e-06, "loss": 0.3593, "step": 6988 }, { "epoch": 0.316315908576601, "grad_norm": 0.6847053006400641, "learning_rate": 8.000600484714043e-06, "loss": 0.4082, "step": 6989 }, { "epoch": 0.3163611676849966, "grad_norm": 0.692378851693665, "learning_rate": 8.000014181691023e-06, "loss": 0.3716, "step": 6990 }, { "epoch": 0.31640642679339215, "grad_norm": 0.6925287113795098, "learning_rate": 7.999427814206911e-06, "loss": 0.3648, "step": 6991 }, { "epoch": 0.31645168590178774, "grad_norm": 0.6704389383276854, "learning_rate": 7.99884138227431e-06, "loss": 0.3365, "step": 6992 }, { "epoch": 0.3164969450101833, "grad_norm": 0.6874645585931815, "learning_rate": 7.998254885905817e-06, "loss": 0.3452, "step": 6993 }, { "epoch": 0.31654220411857886, "grad_norm": 0.6286062996410474, "learning_rate": 7.997668325114033e-06, "loss": 0.3476, "step": 6994 }, { "epoch": 0.31658746322697445, "grad_norm": 0.6251766923475587, "learning_rate": 7.997081699911566e-06, "loss": 0.36, "step": 6995 }, { "epoch": 0.31663272233537, "grad_norm": 0.4019324945117982, "learning_rate": 7.996495010311017e-06, "loss": 0.4625, "step": 6996 }, { "epoch": 0.3166779814437656, "grad_norm": 0.7655651498493894, "learning_rate": 7.995908256324992e-06, "loss": 0.3416, "step": 6997 }, { "epoch": 0.3167232405521611, "grad_norm": 0.6785538399318121, "learning_rate": 7.995321437966102e-06, "loss": 0.4238, "step": 6998 }, { "epoch": 0.3167684996605567, "grad_norm": 0.6357356159438607, "learning_rate": 7.99473455524695e-06, "loss": 0.39, "step": 6999 }, { "epoch": 0.31681375876895224, "grad_norm": 0.33904987132475095, "learning_rate": 7.994147608180153e-06, "loss": 0.4999, "step": 7000 }, { "epoch": 0.3168590178773478, "grad_norm": 0.7621136973532631, "learning_rate": 7.993560596778321e-06, "loss": 0.3879, "step": 7001 }, { "epoch": 0.31690427698574336, "grad_norm": 0.7132686937912542, "learning_rate": 7.992973521054063e-06, "loss": 0.3871, "step": 7002 }, { "epoch": 0.31694953609413895, "grad_norm": 0.6690670498675246, "learning_rate": 7.992386381019999e-06, "loss": 0.3945, "step": 7003 }, { "epoch": 0.3169947952025345, "grad_norm": 0.7285347897571842, "learning_rate": 7.99179917668874e-06, "loss": 0.3553, "step": 7004 }, { "epoch": 0.3170400543109301, "grad_norm": 0.6793097106943258, "learning_rate": 7.991211908072905e-06, "loss": 0.3883, "step": 7005 }, { "epoch": 0.31708531341932567, "grad_norm": 0.6018551606250381, "learning_rate": 7.990624575185116e-06, "loss": 0.3684, "step": 7006 }, { "epoch": 0.3171305725277212, "grad_norm": 0.6756261068762829, "learning_rate": 7.990037178037987e-06, "loss": 0.4062, "step": 7007 }, { "epoch": 0.3171758316361168, "grad_norm": 0.6133815435263604, "learning_rate": 7.989449716644142e-06, "loss": 0.3658, "step": 7008 }, { "epoch": 0.3172210907445123, "grad_norm": 0.619225307675842, "learning_rate": 7.988862191016204e-06, "loss": 0.362, "step": 7009 }, { "epoch": 0.3172663498529079, "grad_norm": 0.6478285255297425, "learning_rate": 7.9882746011668e-06, "loss": 0.3724, "step": 7010 }, { "epoch": 0.31731160896130345, "grad_norm": 0.6368209266259487, "learning_rate": 7.98768694710855e-06, "loss": 0.3836, "step": 7011 }, { "epoch": 0.31735686806969904, "grad_norm": 0.4413932798745625, "learning_rate": 7.987099228854083e-06, "loss": 0.4922, "step": 7012 }, { "epoch": 0.3174021271780946, "grad_norm": 0.6229862211243823, "learning_rate": 7.986511446416029e-06, "loss": 0.3324, "step": 7013 }, { "epoch": 0.31744738628649016, "grad_norm": 0.6977332581462345, "learning_rate": 7.985923599807017e-06, "loss": 0.4385, "step": 7014 }, { "epoch": 0.3174926453948857, "grad_norm": 0.7046571732350977, "learning_rate": 7.985335689039675e-06, "loss": 0.3629, "step": 7015 }, { "epoch": 0.3175379045032813, "grad_norm": 0.6477763537029326, "learning_rate": 7.984747714126639e-06, "loss": 0.3785, "step": 7016 }, { "epoch": 0.3175831636116769, "grad_norm": 0.7144266335689072, "learning_rate": 7.984159675080543e-06, "loss": 0.3906, "step": 7017 }, { "epoch": 0.3176284227200724, "grad_norm": 0.6258472946470179, "learning_rate": 7.98357157191402e-06, "loss": 0.3575, "step": 7018 }, { "epoch": 0.317673681828468, "grad_norm": 0.6200884932537759, "learning_rate": 7.982983404639707e-06, "loss": 0.3471, "step": 7019 }, { "epoch": 0.31771894093686354, "grad_norm": 0.6650517614200443, "learning_rate": 7.98239517327024e-06, "loss": 0.3554, "step": 7020 }, { "epoch": 0.3177642000452591, "grad_norm": 0.6530860017670242, "learning_rate": 7.981806877818265e-06, "loss": 0.3711, "step": 7021 }, { "epoch": 0.31780945915365466, "grad_norm": 0.8740355444512873, "learning_rate": 7.981218518296414e-06, "loss": 0.3659, "step": 7022 }, { "epoch": 0.31785471826205025, "grad_norm": 0.6671952504181827, "learning_rate": 7.980630094717337e-06, "loss": 0.3792, "step": 7023 }, { "epoch": 0.3178999773704458, "grad_norm": 0.6477284106255007, "learning_rate": 7.98004160709367e-06, "loss": 0.3446, "step": 7024 }, { "epoch": 0.3179452364788414, "grad_norm": 0.6192963333013082, "learning_rate": 7.979453055438063e-06, "loss": 0.3551, "step": 7025 }, { "epoch": 0.3179904955872369, "grad_norm": 0.6196762931669262, "learning_rate": 7.97886443976316e-06, "loss": 0.328, "step": 7026 }, { "epoch": 0.3180357546956325, "grad_norm": 0.48428443466322474, "learning_rate": 7.978275760081611e-06, "loss": 0.5026, "step": 7027 }, { "epoch": 0.31808101380402803, "grad_norm": 0.661978297863087, "learning_rate": 7.97768701640606e-06, "loss": 0.4117, "step": 7028 }, { "epoch": 0.3181262729124236, "grad_norm": 1.7101319009447666, "learning_rate": 7.977098208749162e-06, "loss": 0.3473, "step": 7029 }, { "epoch": 0.3181715320208192, "grad_norm": 0.6697938911628487, "learning_rate": 7.976509337123567e-06, "loss": 0.4033, "step": 7030 }, { "epoch": 0.31821679112921475, "grad_norm": 0.6372431024914199, "learning_rate": 7.975920401541927e-06, "loss": 0.3733, "step": 7031 }, { "epoch": 0.31826205023761034, "grad_norm": 0.6087178277991327, "learning_rate": 7.975331402016898e-06, "loss": 0.363, "step": 7032 }, { "epoch": 0.3183073093460059, "grad_norm": 0.6942216985962842, "learning_rate": 7.974742338561134e-06, "loss": 0.3627, "step": 7033 }, { "epoch": 0.31835256845440146, "grad_norm": 0.391526321631339, "learning_rate": 7.974153211187296e-06, "loss": 0.5146, "step": 7034 }, { "epoch": 0.318397827562797, "grad_norm": 0.3479777571847785, "learning_rate": 7.973564019908038e-06, "loss": 0.4979, "step": 7035 }, { "epoch": 0.3184430866711926, "grad_norm": 1.4253779934524895, "learning_rate": 7.972974764736023e-06, "loss": 0.3807, "step": 7036 }, { "epoch": 0.3184883457795881, "grad_norm": 0.6227052935548067, "learning_rate": 7.97238544568391e-06, "loss": 0.3789, "step": 7037 }, { "epoch": 0.3185336048879837, "grad_norm": 0.6147570914994451, "learning_rate": 7.971796062764363e-06, "loss": 0.3501, "step": 7038 }, { "epoch": 0.31857886399637925, "grad_norm": 0.6806938315694051, "learning_rate": 7.971206615990046e-06, "loss": 0.3667, "step": 7039 }, { "epoch": 0.31862412310477484, "grad_norm": 0.6507251787693218, "learning_rate": 7.970617105373624e-06, "loss": 0.441, "step": 7040 }, { "epoch": 0.3186693822131704, "grad_norm": 0.6400026468720813, "learning_rate": 7.970027530927765e-06, "loss": 0.3509, "step": 7041 }, { "epoch": 0.31871464132156596, "grad_norm": 0.9682135791562172, "learning_rate": 7.969437892665134e-06, "loss": 0.335, "step": 7042 }, { "epoch": 0.31875990042996155, "grad_norm": 0.6946780013933663, "learning_rate": 7.968848190598404e-06, "loss": 0.4637, "step": 7043 }, { "epoch": 0.3188051595383571, "grad_norm": 0.6146416446661337, "learning_rate": 7.968258424740245e-06, "loss": 0.3855, "step": 7044 }, { "epoch": 0.3188504186467527, "grad_norm": 0.6630762453867052, "learning_rate": 7.967668595103328e-06, "loss": 0.3648, "step": 7045 }, { "epoch": 0.3188956777551482, "grad_norm": 0.6409902766059519, "learning_rate": 7.967078701700329e-06, "loss": 0.3675, "step": 7046 }, { "epoch": 0.3189409368635438, "grad_norm": 0.3593074982929306, "learning_rate": 7.966488744543919e-06, "loss": 0.4827, "step": 7047 }, { "epoch": 0.31898619597193933, "grad_norm": 0.40176551814356487, "learning_rate": 7.965898723646777e-06, "loss": 0.5214, "step": 7048 }, { "epoch": 0.3190314550803349, "grad_norm": 0.45032727794835453, "learning_rate": 7.965308639021581e-06, "loss": 0.4759, "step": 7049 }, { "epoch": 0.31907671418873046, "grad_norm": 0.7198669844930446, "learning_rate": 7.964718490681009e-06, "loss": 0.3569, "step": 7050 }, { "epoch": 0.31912197329712605, "grad_norm": 0.6546251899527719, "learning_rate": 7.964128278637745e-06, "loss": 0.3607, "step": 7051 }, { "epoch": 0.3191672324055216, "grad_norm": 0.8092815635678249, "learning_rate": 7.963538002904464e-06, "loss": 0.3728, "step": 7052 }, { "epoch": 0.3192124915139172, "grad_norm": 1.5350541693416326, "learning_rate": 7.962947663493855e-06, "loss": 0.3441, "step": 7053 }, { "epoch": 0.31925775062231276, "grad_norm": 0.6106851124656958, "learning_rate": 7.9623572604186e-06, "loss": 0.3581, "step": 7054 }, { "epoch": 0.3193030097307083, "grad_norm": 0.6489555394182779, "learning_rate": 7.961766793691387e-06, "loss": 0.3446, "step": 7055 }, { "epoch": 0.3193482688391039, "grad_norm": 0.6293509170922542, "learning_rate": 7.961176263324902e-06, "loss": 0.3544, "step": 7056 }, { "epoch": 0.3193935279474994, "grad_norm": 0.6794823179154822, "learning_rate": 7.960585669331832e-06, "loss": 0.376, "step": 7057 }, { "epoch": 0.319438787055895, "grad_norm": 0.628733216417228, "learning_rate": 7.959995011724869e-06, "loss": 0.3947, "step": 7058 }, { "epoch": 0.31948404616429055, "grad_norm": 0.7145812322342616, "learning_rate": 7.959404290516705e-06, "loss": 0.4822, "step": 7059 }, { "epoch": 0.31952930527268614, "grad_norm": 0.6517928178132028, "learning_rate": 7.958813505720031e-06, "loss": 0.408, "step": 7060 }, { "epoch": 0.31957456438108167, "grad_norm": 0.643493560485578, "learning_rate": 7.958222657347543e-06, "loss": 0.3909, "step": 7061 }, { "epoch": 0.31961982348947726, "grad_norm": 0.6968346039393635, "learning_rate": 7.957631745411936e-06, "loss": 0.3946, "step": 7062 }, { "epoch": 0.3196650825978728, "grad_norm": 0.6186660710362943, "learning_rate": 7.957040769925906e-06, "loss": 0.3484, "step": 7063 }, { "epoch": 0.3197103417062684, "grad_norm": 0.6076213582496474, "learning_rate": 7.95644973090215e-06, "loss": 0.3651, "step": 7064 }, { "epoch": 0.319755600814664, "grad_norm": 0.5507873432743083, "learning_rate": 7.955858628353372e-06, "loss": 0.4934, "step": 7065 }, { "epoch": 0.3198008599230595, "grad_norm": 0.6447783016497618, "learning_rate": 7.95526746229227e-06, "loss": 0.3774, "step": 7066 }, { "epoch": 0.3198461190314551, "grad_norm": 0.6033695502218002, "learning_rate": 7.954676232731545e-06, "loss": 0.3423, "step": 7067 }, { "epoch": 0.31989137813985064, "grad_norm": 0.7129234065266957, "learning_rate": 7.954084939683901e-06, "loss": 0.3626, "step": 7068 }, { "epoch": 0.3199366372482462, "grad_norm": 0.6305288052748258, "learning_rate": 7.953493583162047e-06, "loss": 0.346, "step": 7069 }, { "epoch": 0.31998189635664176, "grad_norm": 0.3498155729821522, "learning_rate": 7.952902163178687e-06, "loss": 0.4731, "step": 7070 }, { "epoch": 0.32002715546503735, "grad_norm": 0.6789249555155012, "learning_rate": 7.952310679746528e-06, "loss": 0.3761, "step": 7071 }, { "epoch": 0.3200724145734329, "grad_norm": 0.6980391317468145, "learning_rate": 7.951719132878279e-06, "loss": 0.3881, "step": 7072 }, { "epoch": 0.3201176736818285, "grad_norm": 0.9088700610711827, "learning_rate": 7.951127522586653e-06, "loss": 0.3882, "step": 7073 }, { "epoch": 0.320162932790224, "grad_norm": 0.6659807627108673, "learning_rate": 7.95053584888436e-06, "loss": 0.3502, "step": 7074 }, { "epoch": 0.3202081918986196, "grad_norm": 0.755708696082257, "learning_rate": 7.94994411178411e-06, "loss": 0.4077, "step": 7075 }, { "epoch": 0.3202534510070152, "grad_norm": 0.6560835029799718, "learning_rate": 7.949352311298626e-06, "loss": 0.3779, "step": 7076 }, { "epoch": 0.3202987101154107, "grad_norm": 0.6869943892018556, "learning_rate": 7.948760447440617e-06, "loss": 0.362, "step": 7077 }, { "epoch": 0.3203439692238063, "grad_norm": 0.3807530113056217, "learning_rate": 7.948168520222802e-06, "loss": 0.4803, "step": 7078 }, { "epoch": 0.32038922833220185, "grad_norm": 0.6263532501824917, "learning_rate": 7.9475765296579e-06, "loss": 0.3371, "step": 7079 }, { "epoch": 0.32043448744059744, "grad_norm": 0.6798061248166258, "learning_rate": 7.946984475758633e-06, "loss": 0.3876, "step": 7080 }, { "epoch": 0.32047974654899297, "grad_norm": 0.6812449344143824, "learning_rate": 7.946392358537719e-06, "loss": 0.372, "step": 7081 }, { "epoch": 0.32052500565738856, "grad_norm": 0.634619025478144, "learning_rate": 7.945800178007883e-06, "loss": 0.3567, "step": 7082 }, { "epoch": 0.3205702647657841, "grad_norm": 0.6872688162762708, "learning_rate": 7.945207934181849e-06, "loss": 0.3509, "step": 7083 }, { "epoch": 0.3206155238741797, "grad_norm": 0.6278473270352956, "learning_rate": 7.944615627072341e-06, "loss": 0.3606, "step": 7084 }, { "epoch": 0.3206607829825752, "grad_norm": 0.6898920436870292, "learning_rate": 7.944023256692086e-06, "loss": 0.3734, "step": 7085 }, { "epoch": 0.3207060420909708, "grad_norm": 0.6505918738584352, "learning_rate": 7.943430823053815e-06, "loss": 0.3845, "step": 7086 }, { "epoch": 0.32075130119936635, "grad_norm": 0.7038738512820012, "learning_rate": 7.942838326170255e-06, "loss": 0.3345, "step": 7087 }, { "epoch": 0.32079656030776194, "grad_norm": 0.6051699178090763, "learning_rate": 7.942245766054137e-06, "loss": 0.3571, "step": 7088 }, { "epoch": 0.3208418194161575, "grad_norm": 0.4150689109819772, "learning_rate": 7.941653142718194e-06, "loss": 0.4855, "step": 7089 }, { "epoch": 0.32088707852455306, "grad_norm": 0.6268038080046793, "learning_rate": 7.94106045617516e-06, "loss": 0.3285, "step": 7090 }, { "epoch": 0.32093233763294865, "grad_norm": 0.6397684099671085, "learning_rate": 7.94046770643777e-06, "loss": 0.3662, "step": 7091 }, { "epoch": 0.3209775967413442, "grad_norm": 0.6908477440660391, "learning_rate": 7.93987489351876e-06, "loss": 0.3667, "step": 7092 }, { "epoch": 0.3210228558497398, "grad_norm": 0.6745483725451942, "learning_rate": 7.939282017430867e-06, "loss": 0.3978, "step": 7093 }, { "epoch": 0.3210681149581353, "grad_norm": 0.6529001360028059, "learning_rate": 7.93868907818683e-06, "loss": 0.3489, "step": 7094 }, { "epoch": 0.3211133740665309, "grad_norm": 0.6145650846844853, "learning_rate": 7.938096075799391e-06, "loss": 0.4018, "step": 7095 }, { "epoch": 0.32115863317492643, "grad_norm": 0.4122410958350045, "learning_rate": 7.93750301028129e-06, "loss": 0.487, "step": 7096 }, { "epoch": 0.321203892283322, "grad_norm": 0.6595309437944411, "learning_rate": 7.936909881645275e-06, "loss": 0.3653, "step": 7097 }, { "epoch": 0.32124915139171756, "grad_norm": 0.6300548129151529, "learning_rate": 7.936316689904083e-06, "loss": 0.3788, "step": 7098 }, { "epoch": 0.32129441050011315, "grad_norm": 0.6692355502644558, "learning_rate": 7.935723435070464e-06, "loss": 0.3666, "step": 7099 }, { "epoch": 0.32133966960850874, "grad_norm": 0.6494119925557855, "learning_rate": 7.935130117157166e-06, "loss": 0.3704, "step": 7100 }, { "epoch": 0.3213849287169043, "grad_norm": 0.34216565367593454, "learning_rate": 7.934536736176934e-06, "loss": 0.5311, "step": 7101 }, { "epoch": 0.32143018782529986, "grad_norm": 1.0052287214302138, "learning_rate": 7.933943292142524e-06, "loss": 0.3802, "step": 7102 }, { "epoch": 0.3214754469336954, "grad_norm": 0.6499179876977065, "learning_rate": 7.93334978506668e-06, "loss": 0.344, "step": 7103 }, { "epoch": 0.321520706042091, "grad_norm": 0.6181493818110998, "learning_rate": 7.93275621496216e-06, "loss": 0.3724, "step": 7104 }, { "epoch": 0.3215659651504865, "grad_norm": 0.6791885332119639, "learning_rate": 7.932162581841715e-06, "loss": 0.348, "step": 7105 }, { "epoch": 0.3216112242588821, "grad_norm": 0.4324667087116202, "learning_rate": 7.931568885718104e-06, "loss": 0.5062, "step": 7106 }, { "epoch": 0.32165648336727765, "grad_norm": 0.5369739435044163, "learning_rate": 7.930975126604079e-06, "loss": 0.512, "step": 7107 }, { "epoch": 0.32170174247567324, "grad_norm": 0.30855384882874476, "learning_rate": 7.930381304512401e-06, "loss": 0.5037, "step": 7108 }, { "epoch": 0.32174700158406877, "grad_norm": 0.6766398168097438, "learning_rate": 7.92978741945583e-06, "loss": 0.442, "step": 7109 }, { "epoch": 0.32179226069246436, "grad_norm": 0.6718820626808718, "learning_rate": 7.929193471447123e-06, "loss": 0.3785, "step": 7110 }, { "epoch": 0.32183751980085995, "grad_norm": 0.62498935257029, "learning_rate": 7.928599460499046e-06, "loss": 0.3497, "step": 7111 }, { "epoch": 0.3218827789092555, "grad_norm": 0.7246586779569112, "learning_rate": 7.92800538662436e-06, "loss": 0.3715, "step": 7112 }, { "epoch": 0.3219280380176511, "grad_norm": 0.6591067393074789, "learning_rate": 7.927411249835832e-06, "loss": 0.3734, "step": 7113 }, { "epoch": 0.3219732971260466, "grad_norm": 0.7603779807342957, "learning_rate": 7.926817050146227e-06, "loss": 0.3286, "step": 7114 }, { "epoch": 0.3220185562344422, "grad_norm": 0.657032741453226, "learning_rate": 7.926222787568314e-06, "loss": 0.3599, "step": 7115 }, { "epoch": 0.32206381534283773, "grad_norm": 0.8247053846685484, "learning_rate": 7.925628462114858e-06, "loss": 0.5117, "step": 7116 }, { "epoch": 0.3221090744512333, "grad_norm": 0.8964379263543338, "learning_rate": 7.925034073798632e-06, "loss": 0.3377, "step": 7117 }, { "epoch": 0.32215433355962886, "grad_norm": 0.6875665098075191, "learning_rate": 7.92443962263241e-06, "loss": 0.3311, "step": 7118 }, { "epoch": 0.32219959266802445, "grad_norm": 0.9490803870507291, "learning_rate": 7.92384510862896e-06, "loss": 0.3867, "step": 7119 }, { "epoch": 0.32224485177642, "grad_norm": 0.7349779813006002, "learning_rate": 7.92325053180106e-06, "loss": 0.3784, "step": 7120 }, { "epoch": 0.3222901108848156, "grad_norm": 0.7006969086062932, "learning_rate": 7.922655892161482e-06, "loss": 0.3666, "step": 7121 }, { "epoch": 0.3223353699932111, "grad_norm": 0.6555049359630561, "learning_rate": 7.922061189723007e-06, "loss": 0.3581, "step": 7122 }, { "epoch": 0.3223806291016067, "grad_norm": 0.6740198833872045, "learning_rate": 7.921466424498409e-06, "loss": 0.3809, "step": 7123 }, { "epoch": 0.3224258882100023, "grad_norm": 0.6650014243978319, "learning_rate": 7.920871596500473e-06, "loss": 0.3679, "step": 7124 }, { "epoch": 0.3224711473183978, "grad_norm": 0.6504397000732004, "learning_rate": 7.920276705741975e-06, "loss": 0.3366, "step": 7125 }, { "epoch": 0.3225164064267934, "grad_norm": 0.49837067139854274, "learning_rate": 7.919681752235701e-06, "loss": 0.4496, "step": 7126 }, { "epoch": 0.32256166553518895, "grad_norm": 0.6165258872006424, "learning_rate": 7.919086735994433e-06, "loss": 0.3166, "step": 7127 }, { "epoch": 0.32260692464358454, "grad_norm": 0.6527582636116545, "learning_rate": 7.918491657030956e-06, "loss": 0.3648, "step": 7128 }, { "epoch": 0.32265218375198007, "grad_norm": 0.6597269628107116, "learning_rate": 7.917896515358057e-06, "loss": 0.3332, "step": 7129 }, { "epoch": 0.32269744286037566, "grad_norm": 0.6542637026119181, "learning_rate": 7.917301310988525e-06, "loss": 0.3536, "step": 7130 }, { "epoch": 0.3227427019687712, "grad_norm": 0.6347125789718905, "learning_rate": 7.916706043935145e-06, "loss": 0.365, "step": 7131 }, { "epoch": 0.3227879610771668, "grad_norm": 0.7115640181010907, "learning_rate": 7.916110714210711e-06, "loss": 0.3735, "step": 7132 }, { "epoch": 0.3228332201855623, "grad_norm": 0.3234316538967396, "learning_rate": 7.915515321828014e-06, "loss": 0.4915, "step": 7133 }, { "epoch": 0.3228784792939579, "grad_norm": 0.687035292169822, "learning_rate": 7.914919866799847e-06, "loss": 0.3634, "step": 7134 }, { "epoch": 0.3229237384023535, "grad_norm": 0.3137342255904164, "learning_rate": 7.914324349139006e-06, "loss": 0.4805, "step": 7135 }, { "epoch": 0.32296899751074903, "grad_norm": 0.27487283487033065, "learning_rate": 7.913728768858283e-06, "loss": 0.4696, "step": 7136 }, { "epoch": 0.3230142566191446, "grad_norm": 0.6152486208784101, "learning_rate": 7.91313312597048e-06, "loss": 0.3231, "step": 7137 }, { "epoch": 0.32305951572754016, "grad_norm": 0.6482665421392981, "learning_rate": 7.91253742048839e-06, "loss": 0.3707, "step": 7138 }, { "epoch": 0.32310477483593575, "grad_norm": 0.6755287950541693, "learning_rate": 7.911941652424819e-06, "loss": 0.3887, "step": 7139 }, { "epoch": 0.3231500339443313, "grad_norm": 0.7476984596163645, "learning_rate": 7.911345821792565e-06, "loss": 0.3947, "step": 7140 }, { "epoch": 0.3231952930527269, "grad_norm": 0.7050035119659278, "learning_rate": 7.910749928604429e-06, "loss": 0.398, "step": 7141 }, { "epoch": 0.3232405521611224, "grad_norm": 0.6985845003611664, "learning_rate": 7.910153972873218e-06, "loss": 0.3849, "step": 7142 }, { "epoch": 0.323285811269518, "grad_norm": 0.7188017184074661, "learning_rate": 7.909557954611736e-06, "loss": 0.3831, "step": 7143 }, { "epoch": 0.32333107037791353, "grad_norm": 0.6187212364272202, "learning_rate": 7.908961873832788e-06, "loss": 0.3489, "step": 7144 }, { "epoch": 0.3233763294863091, "grad_norm": 0.5689497713714161, "learning_rate": 7.908365730549183e-06, "loss": 0.4918, "step": 7145 }, { "epoch": 0.3234215885947047, "grad_norm": 0.6522720842697802, "learning_rate": 7.907769524773734e-06, "loss": 0.352, "step": 7146 }, { "epoch": 0.32346684770310025, "grad_norm": 0.68249189867944, "learning_rate": 7.907173256519246e-06, "loss": 0.3962, "step": 7147 }, { "epoch": 0.32351210681149584, "grad_norm": 0.6918661181189343, "learning_rate": 7.906576925798535e-06, "loss": 0.3562, "step": 7148 }, { "epoch": 0.32355736591989137, "grad_norm": 0.6470440870797444, "learning_rate": 7.905980532624411e-06, "loss": 0.3726, "step": 7149 }, { "epoch": 0.32360262502828696, "grad_norm": 0.6204544691347952, "learning_rate": 7.905384077009693e-06, "loss": 0.3307, "step": 7150 }, { "epoch": 0.3236478841366825, "grad_norm": 0.654141371005639, "learning_rate": 7.904787558967193e-06, "loss": 0.3837, "step": 7151 }, { "epoch": 0.3236931432450781, "grad_norm": 0.6214250147231662, "learning_rate": 7.904190978509729e-06, "loss": 0.364, "step": 7152 }, { "epoch": 0.3237384023534736, "grad_norm": 0.6605746453137314, "learning_rate": 7.90359433565012e-06, "loss": 0.3793, "step": 7153 }, { "epoch": 0.3237836614618692, "grad_norm": 0.6374246098103137, "learning_rate": 7.902997630401188e-06, "loss": 0.3635, "step": 7154 }, { "epoch": 0.32382892057026474, "grad_norm": 0.7547998705704276, "learning_rate": 7.902400862775752e-06, "loss": 0.384, "step": 7155 }, { "epoch": 0.32387417967866033, "grad_norm": 0.6713274567187839, "learning_rate": 7.901804032786637e-06, "loss": 0.3638, "step": 7156 }, { "epoch": 0.32391943878705587, "grad_norm": 0.6410889902728407, "learning_rate": 7.901207140446662e-06, "loss": 0.3773, "step": 7157 }, { "epoch": 0.32396469789545146, "grad_norm": 0.6597367749925079, "learning_rate": 7.90061018576866e-06, "loss": 0.3783, "step": 7158 }, { "epoch": 0.32400995700384705, "grad_norm": 0.7166227954783297, "learning_rate": 7.900013168765453e-06, "loss": 0.3571, "step": 7159 }, { "epoch": 0.3240552161122426, "grad_norm": 0.8334546986425982, "learning_rate": 7.899416089449867e-06, "loss": 0.3568, "step": 7160 }, { "epoch": 0.3241004752206382, "grad_norm": 0.6549313556490581, "learning_rate": 7.898818947834737e-06, "loss": 0.3913, "step": 7161 }, { "epoch": 0.3241457343290337, "grad_norm": 0.6616731785929209, "learning_rate": 7.898221743932887e-06, "loss": 0.3639, "step": 7162 }, { "epoch": 0.3241909934374293, "grad_norm": 0.6307360694773526, "learning_rate": 7.897624477757156e-06, "loss": 0.3985, "step": 7163 }, { "epoch": 0.32423625254582483, "grad_norm": 0.6211038754157479, "learning_rate": 7.897027149320375e-06, "loss": 0.3666, "step": 7164 }, { "epoch": 0.3242815116542204, "grad_norm": 0.6638297280567542, "learning_rate": 7.896429758635375e-06, "loss": 0.365, "step": 7165 }, { "epoch": 0.32432677076261596, "grad_norm": 0.6532817268016007, "learning_rate": 7.895832305715e-06, "loss": 0.3813, "step": 7166 }, { "epoch": 0.32437202987101155, "grad_norm": 0.6701885349272433, "learning_rate": 7.895234790572077e-06, "loss": 0.3704, "step": 7167 }, { "epoch": 0.3244172889794071, "grad_norm": 0.6442400135179758, "learning_rate": 7.894637213219454e-06, "loss": 0.378, "step": 7168 }, { "epoch": 0.32446254808780267, "grad_norm": 0.667231431672473, "learning_rate": 7.894039573669968e-06, "loss": 0.3926, "step": 7169 }, { "epoch": 0.32450780719619826, "grad_norm": 0.6344096908061077, "learning_rate": 7.893441871936456e-06, "loss": 0.3954, "step": 7170 }, { "epoch": 0.3245530663045938, "grad_norm": 0.6725657035402189, "learning_rate": 7.892844108031768e-06, "loss": 0.3807, "step": 7171 }, { "epoch": 0.3245983254129894, "grad_norm": 0.7062785122126237, "learning_rate": 7.892246281968745e-06, "loss": 0.4932, "step": 7172 }, { "epoch": 0.3246435845213849, "grad_norm": 0.6774231556142725, "learning_rate": 7.891648393760232e-06, "loss": 0.3447, "step": 7173 }, { "epoch": 0.3246888436297805, "grad_norm": 0.8538387827323394, "learning_rate": 7.891050443419074e-06, "loss": 0.3446, "step": 7174 }, { "epoch": 0.32473410273817604, "grad_norm": 0.6583879453482827, "learning_rate": 7.890452430958123e-06, "loss": 0.4065, "step": 7175 }, { "epoch": 0.32477936184657163, "grad_norm": 0.681043978728226, "learning_rate": 7.889854356390225e-06, "loss": 0.4438, "step": 7176 }, { "epoch": 0.32482462095496717, "grad_norm": 0.8220002508654024, "learning_rate": 7.889256219728235e-06, "loss": 0.3427, "step": 7177 }, { "epoch": 0.32486988006336276, "grad_norm": 0.6731314383680075, "learning_rate": 7.888658020985e-06, "loss": 0.3257, "step": 7178 }, { "epoch": 0.3249151391717583, "grad_norm": 0.6885006191925244, "learning_rate": 7.888059760173377e-06, "loss": 0.3643, "step": 7179 }, { "epoch": 0.3249603982801539, "grad_norm": 0.6801934899357927, "learning_rate": 7.887461437306221e-06, "loss": 0.3772, "step": 7180 }, { "epoch": 0.3250056573885494, "grad_norm": 0.7150719356238756, "learning_rate": 7.886863052396384e-06, "loss": 0.4863, "step": 7181 }, { "epoch": 0.325050916496945, "grad_norm": 0.626612080010428, "learning_rate": 7.886264605456727e-06, "loss": 0.3638, "step": 7182 }, { "epoch": 0.3250961756053406, "grad_norm": 0.6263977091282102, "learning_rate": 7.88566609650011e-06, "loss": 0.3396, "step": 7183 }, { "epoch": 0.32514143471373613, "grad_norm": 0.6505262702471224, "learning_rate": 7.88506752553939e-06, "loss": 0.3751, "step": 7184 }, { "epoch": 0.3251866938221317, "grad_norm": 0.35956124288248265, "learning_rate": 7.88446889258743e-06, "loss": 0.4744, "step": 7185 }, { "epoch": 0.32523195293052726, "grad_norm": 0.6126826293591199, "learning_rate": 7.883870197657094e-06, "loss": 0.3577, "step": 7186 }, { "epoch": 0.32527721203892285, "grad_norm": 0.6808837015295135, "learning_rate": 7.883271440761241e-06, "loss": 0.3959, "step": 7187 }, { "epoch": 0.3253224711473184, "grad_norm": 0.726997546831792, "learning_rate": 7.882672621912742e-06, "loss": 0.3182, "step": 7188 }, { "epoch": 0.32536773025571397, "grad_norm": 0.5954746959339828, "learning_rate": 7.882073741124464e-06, "loss": 0.3575, "step": 7189 }, { "epoch": 0.3254129893641095, "grad_norm": 0.6277763190542311, "learning_rate": 7.88147479840927e-06, "loss": 0.3882, "step": 7190 }, { "epoch": 0.3254582484725051, "grad_norm": 0.6181567794663823, "learning_rate": 7.880875793780031e-06, "loss": 0.3792, "step": 7191 }, { "epoch": 0.32550350758090063, "grad_norm": 0.6307714979444174, "learning_rate": 7.880276727249623e-06, "loss": 0.4817, "step": 7192 }, { "epoch": 0.3255487666892962, "grad_norm": 0.747922164182171, "learning_rate": 7.879677598830913e-06, "loss": 0.3931, "step": 7193 }, { "epoch": 0.3255940257976918, "grad_norm": 0.6431937919879824, "learning_rate": 7.879078408536774e-06, "loss": 0.3458, "step": 7194 }, { "epoch": 0.32563928490608735, "grad_norm": 0.3977026758300841, "learning_rate": 7.878479156380085e-06, "loss": 0.4914, "step": 7195 }, { "epoch": 0.32568454401448294, "grad_norm": 0.682964630211364, "learning_rate": 7.877879842373718e-06, "loss": 0.3708, "step": 7196 }, { "epoch": 0.32572980312287847, "grad_norm": 0.6231524576095671, "learning_rate": 7.877280466530552e-06, "loss": 0.3418, "step": 7197 }, { "epoch": 0.32577506223127406, "grad_norm": 0.6748437842992963, "learning_rate": 7.876681028863464e-06, "loss": 0.3746, "step": 7198 }, { "epoch": 0.3258203213396696, "grad_norm": 0.6550830959993491, "learning_rate": 7.876081529385338e-06, "loss": 0.3588, "step": 7199 }, { "epoch": 0.3258655804480652, "grad_norm": 0.6477789521912153, "learning_rate": 7.875481968109052e-06, "loss": 0.3705, "step": 7200 }, { "epoch": 0.3259108395564607, "grad_norm": 0.6372087242778248, "learning_rate": 7.874882345047491e-06, "loss": 0.3785, "step": 7201 }, { "epoch": 0.3259560986648563, "grad_norm": 0.655494204670402, "learning_rate": 7.874282660213537e-06, "loss": 0.3595, "step": 7202 }, { "epoch": 0.32600135777325184, "grad_norm": 0.6214300206617079, "learning_rate": 7.873682913620077e-06, "loss": 0.3709, "step": 7203 }, { "epoch": 0.32604661688164743, "grad_norm": 0.7395486618414999, "learning_rate": 7.873083105279996e-06, "loss": 0.4838, "step": 7204 }, { "epoch": 0.326091875990043, "grad_norm": 0.6643510155786785, "learning_rate": 7.872483235206184e-06, "loss": 0.34, "step": 7205 }, { "epoch": 0.32613713509843856, "grad_norm": 0.7851872919090189, "learning_rate": 7.87188330341153e-06, "loss": 0.3432, "step": 7206 }, { "epoch": 0.32618239420683415, "grad_norm": 0.6122471370986199, "learning_rate": 7.871283309908922e-06, "loss": 0.3881, "step": 7207 }, { "epoch": 0.3262276533152297, "grad_norm": 0.654839829130589, "learning_rate": 7.870683254711255e-06, "loss": 0.4115, "step": 7208 }, { "epoch": 0.32627291242362527, "grad_norm": 0.632937065166646, "learning_rate": 7.870083137831423e-06, "loss": 0.3534, "step": 7209 }, { "epoch": 0.3263181715320208, "grad_norm": 0.3925737232393038, "learning_rate": 7.869482959282318e-06, "loss": 0.4862, "step": 7210 }, { "epoch": 0.3263634306404164, "grad_norm": 0.6745670011729434, "learning_rate": 7.868882719076838e-06, "loss": 0.3442, "step": 7211 }, { "epoch": 0.32640868974881193, "grad_norm": 0.6684057044920896, "learning_rate": 7.868282417227877e-06, "loss": 0.3888, "step": 7212 }, { "epoch": 0.3264539488572075, "grad_norm": 0.6623891104338104, "learning_rate": 7.867682053748338e-06, "loss": 0.4021, "step": 7213 }, { "epoch": 0.32649920796560306, "grad_norm": 0.5684645700583332, "learning_rate": 7.86708162865112e-06, "loss": 0.3948, "step": 7214 }, { "epoch": 0.32654446707399865, "grad_norm": 0.38853354414543634, "learning_rate": 7.866481141949123e-06, "loss": 0.4923, "step": 7215 }, { "epoch": 0.3265897261823942, "grad_norm": 0.6899635765060905, "learning_rate": 7.86588059365525e-06, "loss": 0.4039, "step": 7216 }, { "epoch": 0.32663498529078977, "grad_norm": 0.637953863348367, "learning_rate": 7.865279983782402e-06, "loss": 0.403, "step": 7217 }, { "epoch": 0.32668024439918536, "grad_norm": 0.6447741093066474, "learning_rate": 7.864679312343491e-06, "loss": 0.419, "step": 7218 }, { "epoch": 0.3267255035075809, "grad_norm": 0.3132402057909027, "learning_rate": 7.864078579351418e-06, "loss": 0.4769, "step": 7219 }, { "epoch": 0.3267707626159765, "grad_norm": 0.6410307435099741, "learning_rate": 7.863477784819091e-06, "loss": 0.3385, "step": 7220 }, { "epoch": 0.326816021724372, "grad_norm": 0.8084080593940757, "learning_rate": 7.862876928759424e-06, "loss": 0.3662, "step": 7221 }, { "epoch": 0.3268612808327676, "grad_norm": 0.6242179182343427, "learning_rate": 7.862276011185323e-06, "loss": 0.3368, "step": 7222 }, { "epoch": 0.32690653994116314, "grad_norm": 0.6506736147066804, "learning_rate": 7.8616750321097e-06, "loss": 0.3591, "step": 7223 }, { "epoch": 0.32695179904955873, "grad_norm": 0.3743456101413249, "learning_rate": 7.861073991545472e-06, "loss": 0.5008, "step": 7224 }, { "epoch": 0.32699705815795427, "grad_norm": 0.8322622971337075, "learning_rate": 7.86047288950555e-06, "loss": 0.3881, "step": 7225 }, { "epoch": 0.32704231726634986, "grad_norm": 0.42578988571435983, "learning_rate": 7.859871726002852e-06, "loss": 0.493, "step": 7226 }, { "epoch": 0.3270875763747454, "grad_norm": 0.6491635693186513, "learning_rate": 7.859270501050292e-06, "loss": 0.3765, "step": 7227 }, { "epoch": 0.327132835483141, "grad_norm": 0.2764549467599382, "learning_rate": 7.858669214660792e-06, "loss": 0.4823, "step": 7228 }, { "epoch": 0.3271780945915366, "grad_norm": 0.7046311928792779, "learning_rate": 7.85806786684727e-06, "loss": 0.351, "step": 7229 }, { "epoch": 0.3272233536999321, "grad_norm": 0.3321521003907507, "learning_rate": 7.857466457622647e-06, "loss": 0.4904, "step": 7230 }, { "epoch": 0.3272686128083277, "grad_norm": 0.6566847794699036, "learning_rate": 7.856864986999845e-06, "loss": 0.3917, "step": 7231 }, { "epoch": 0.32731387191672323, "grad_norm": 0.629014744960846, "learning_rate": 7.856263454991791e-06, "loss": 0.3318, "step": 7232 }, { "epoch": 0.3273591310251188, "grad_norm": 0.6085911810930542, "learning_rate": 7.855661861611406e-06, "loss": 0.3438, "step": 7233 }, { "epoch": 0.32740439013351436, "grad_norm": 0.6578764127823246, "learning_rate": 7.855060206871618e-06, "loss": 0.419, "step": 7234 }, { "epoch": 0.32744964924190995, "grad_norm": 0.6536238269200988, "learning_rate": 7.854458490785354e-06, "loss": 0.3811, "step": 7235 }, { "epoch": 0.3274949083503055, "grad_norm": 0.381547503313166, "learning_rate": 7.853856713365547e-06, "loss": 0.4837, "step": 7236 }, { "epoch": 0.32754016745870107, "grad_norm": 0.7208941435848779, "learning_rate": 7.853254874625122e-06, "loss": 0.3384, "step": 7237 }, { "epoch": 0.3275854265670966, "grad_norm": 0.6808342916905882, "learning_rate": 7.852652974577012e-06, "loss": 0.3551, "step": 7238 }, { "epoch": 0.3276306856754922, "grad_norm": 0.7001374804992242, "learning_rate": 7.852051013234153e-06, "loss": 0.375, "step": 7239 }, { "epoch": 0.3276759447838878, "grad_norm": 0.650543073012321, "learning_rate": 7.851448990609476e-06, "loss": 0.3435, "step": 7240 }, { "epoch": 0.3277212038922833, "grad_norm": 0.34818232252070125, "learning_rate": 7.850846906715917e-06, "loss": 0.4859, "step": 7241 }, { "epoch": 0.3277664630006789, "grad_norm": 0.6569965955754113, "learning_rate": 7.850244761566415e-06, "loss": 0.35, "step": 7242 }, { "epoch": 0.32781172210907444, "grad_norm": 0.6249546782859293, "learning_rate": 7.849642555173907e-06, "loss": 0.3492, "step": 7243 }, { "epoch": 0.32785698121747003, "grad_norm": 0.7960260771660157, "learning_rate": 7.849040287551331e-06, "loss": 0.3906, "step": 7244 }, { "epoch": 0.32790224032586557, "grad_norm": 0.6504881285244849, "learning_rate": 7.848437958711631e-06, "loss": 0.3752, "step": 7245 }, { "epoch": 0.32794749943426116, "grad_norm": 0.6562907636624832, "learning_rate": 7.847835568667746e-06, "loss": 0.341, "step": 7246 }, { "epoch": 0.3279927585426567, "grad_norm": 0.663970148382978, "learning_rate": 7.847233117432623e-06, "loss": 0.4125, "step": 7247 }, { "epoch": 0.3280380176510523, "grad_norm": 0.611816963228363, "learning_rate": 7.846630605019204e-06, "loss": 0.3623, "step": 7248 }, { "epoch": 0.3280832767594478, "grad_norm": 0.8418055709857473, "learning_rate": 7.846028031440436e-06, "loss": 0.3735, "step": 7249 }, { "epoch": 0.3281285358678434, "grad_norm": 0.4046248697144449, "learning_rate": 7.845425396709266e-06, "loss": 0.4738, "step": 7250 }, { "epoch": 0.32817379497623894, "grad_norm": 0.6705514103940412, "learning_rate": 7.844822700838644e-06, "loss": 0.3467, "step": 7251 }, { "epoch": 0.32821905408463453, "grad_norm": 0.663416672149569, "learning_rate": 7.84421994384152e-06, "loss": 0.388, "step": 7252 }, { "epoch": 0.3282643131930301, "grad_norm": 0.651365532004636, "learning_rate": 7.843617125730842e-06, "loss": 0.3884, "step": 7253 }, { "epoch": 0.32830957230142566, "grad_norm": 0.6076682619566085, "learning_rate": 7.843014246519569e-06, "loss": 0.3548, "step": 7254 }, { "epoch": 0.32835483140982125, "grad_norm": 0.6729372693949679, "learning_rate": 7.84241130622065e-06, "loss": 0.3634, "step": 7255 }, { "epoch": 0.3284000905182168, "grad_norm": 0.6264003233718723, "learning_rate": 7.841808304847041e-06, "loss": 0.3481, "step": 7256 }, { "epoch": 0.32844534962661237, "grad_norm": 0.8129037846541436, "learning_rate": 7.841205242411701e-06, "loss": 0.3632, "step": 7257 }, { "epoch": 0.3284906087350079, "grad_norm": 0.37059103009216027, "learning_rate": 7.840602118927584e-06, "loss": 0.4537, "step": 7258 }, { "epoch": 0.3285358678434035, "grad_norm": 0.6352263729186042, "learning_rate": 7.839998934407652e-06, "loss": 0.3527, "step": 7259 }, { "epoch": 0.32858112695179903, "grad_norm": 0.6370304748624506, "learning_rate": 7.839395688864868e-06, "loss": 0.3522, "step": 7260 }, { "epoch": 0.3286263860601946, "grad_norm": 0.33410480912428436, "learning_rate": 7.83879238231219e-06, "loss": 0.4857, "step": 7261 }, { "epoch": 0.32867164516859015, "grad_norm": 0.39715243089542934, "learning_rate": 7.838189014762582e-06, "loss": 0.5234, "step": 7262 }, { "epoch": 0.32871690427698574, "grad_norm": 0.6725589605603078, "learning_rate": 7.83758558622901e-06, "loss": 0.3386, "step": 7263 }, { "epoch": 0.32876216338538133, "grad_norm": 0.7081106837258482, "learning_rate": 7.836982096724438e-06, "loss": 0.3806, "step": 7264 }, { "epoch": 0.32880742249377687, "grad_norm": 0.6134248938641275, "learning_rate": 7.836378546261834e-06, "loss": 0.3394, "step": 7265 }, { "epoch": 0.32885268160217246, "grad_norm": 0.6459265887108824, "learning_rate": 7.835774934854166e-06, "loss": 0.3418, "step": 7266 }, { "epoch": 0.328897940710568, "grad_norm": 0.6242275569284673, "learning_rate": 7.835171262514402e-06, "loss": 0.3263, "step": 7267 }, { "epoch": 0.3289431998189636, "grad_norm": 0.6192643139391333, "learning_rate": 7.834567529255519e-06, "loss": 0.3335, "step": 7268 }, { "epoch": 0.3289884589273591, "grad_norm": 0.7266708113236864, "learning_rate": 7.833963735090484e-06, "loss": 0.3673, "step": 7269 }, { "epoch": 0.3290337180357547, "grad_norm": 0.42644557495266, "learning_rate": 7.833359880032272e-06, "loss": 0.4821, "step": 7270 }, { "epoch": 0.32907897714415024, "grad_norm": 0.37821061706041686, "learning_rate": 7.832755964093859e-06, "loss": 0.51, "step": 7271 }, { "epoch": 0.32912423625254583, "grad_norm": 0.3157916385785731, "learning_rate": 7.832151987288219e-06, "loss": 0.5135, "step": 7272 }, { "epoch": 0.32916949536094137, "grad_norm": 0.7308948326064201, "learning_rate": 7.83154794962833e-06, "loss": 0.3712, "step": 7273 }, { "epoch": 0.32921475446933696, "grad_norm": 0.6851166754739852, "learning_rate": 7.830943851127175e-06, "loss": 0.3813, "step": 7274 }, { "epoch": 0.3292600135777325, "grad_norm": 0.6385881894664626, "learning_rate": 7.830339691797727e-06, "loss": 0.3423, "step": 7275 }, { "epoch": 0.3293052726861281, "grad_norm": 0.9217747573027575, "learning_rate": 7.829735471652978e-06, "loss": 0.3602, "step": 7276 }, { "epoch": 0.32935053179452367, "grad_norm": 0.6708483049494212, "learning_rate": 7.8291311907059e-06, "loss": 0.371, "step": 7277 }, { "epoch": 0.3293957909029192, "grad_norm": 0.7331380798086038, "learning_rate": 7.828526848969482e-06, "loss": 0.4031, "step": 7278 }, { "epoch": 0.3294410500113148, "grad_norm": 0.7548536600065007, "learning_rate": 7.827922446456711e-06, "loss": 0.3531, "step": 7279 }, { "epoch": 0.32948630911971033, "grad_norm": 0.6395017247716274, "learning_rate": 7.827317983180571e-06, "loss": 0.376, "step": 7280 }, { "epoch": 0.3295315682281059, "grad_norm": 0.688170077140715, "learning_rate": 7.826713459154051e-06, "loss": 0.3621, "step": 7281 }, { "epoch": 0.32957682733650145, "grad_norm": 0.6798231401394088, "learning_rate": 7.826108874390141e-06, "loss": 0.3872, "step": 7282 }, { "epoch": 0.32962208644489704, "grad_norm": 0.7378493099029014, "learning_rate": 7.82550422890183e-06, "loss": 0.358, "step": 7283 }, { "epoch": 0.3296673455532926, "grad_norm": 0.6859895137902425, "learning_rate": 7.824899522702112e-06, "loss": 0.3741, "step": 7284 }, { "epoch": 0.32971260466168817, "grad_norm": 0.658983030067839, "learning_rate": 7.824294755803978e-06, "loss": 0.3697, "step": 7285 }, { "epoch": 0.3297578637700837, "grad_norm": 0.8659072243183273, "learning_rate": 7.823689928220424e-06, "loss": 0.3796, "step": 7286 }, { "epoch": 0.3298031228784793, "grad_norm": 0.8688001597626949, "learning_rate": 7.823085039964446e-06, "loss": 0.509, "step": 7287 }, { "epoch": 0.3298483819868749, "grad_norm": 0.6597518755134283, "learning_rate": 7.82248009104904e-06, "loss": 0.4867, "step": 7288 }, { "epoch": 0.3298936410952704, "grad_norm": 0.3427715840543005, "learning_rate": 7.821875081487208e-06, "loss": 0.496, "step": 7289 }, { "epoch": 0.329938900203666, "grad_norm": 0.6649470951982919, "learning_rate": 7.821270011291946e-06, "loss": 0.3074, "step": 7290 }, { "epoch": 0.32998415931206154, "grad_norm": 0.7713963361882481, "learning_rate": 7.820664880476257e-06, "loss": 0.3383, "step": 7291 }, { "epoch": 0.33002941842045713, "grad_norm": 0.6840185544238141, "learning_rate": 7.820059689053142e-06, "loss": 0.3775, "step": 7292 }, { "epoch": 0.33007467752885267, "grad_norm": 0.7067990947071694, "learning_rate": 7.819454437035605e-06, "loss": 0.3748, "step": 7293 }, { "epoch": 0.33011993663724826, "grad_norm": 0.6848570477143894, "learning_rate": 7.818849124436651e-06, "loss": 0.3773, "step": 7294 }, { "epoch": 0.3301651957456438, "grad_norm": 0.6522391481043741, "learning_rate": 7.818243751269288e-06, "loss": 0.4016, "step": 7295 }, { "epoch": 0.3302104548540394, "grad_norm": 0.6509650533075849, "learning_rate": 7.817638317546521e-06, "loss": 0.3688, "step": 7296 }, { "epoch": 0.3302557139624349, "grad_norm": 0.785981076357508, "learning_rate": 7.817032823281362e-06, "loss": 0.3385, "step": 7297 }, { "epoch": 0.3303009730708305, "grad_norm": 0.7189844060702856, "learning_rate": 7.816427268486819e-06, "loss": 0.3952, "step": 7298 }, { "epoch": 0.3303462321792261, "grad_norm": 0.6982142130746813, "learning_rate": 7.815821653175903e-06, "loss": 0.3857, "step": 7299 }, { "epoch": 0.33039149128762163, "grad_norm": 0.6773609322645711, "learning_rate": 7.815215977361628e-06, "loss": 0.3254, "step": 7300 }, { "epoch": 0.3304367503960172, "grad_norm": 0.7337843174717779, "learning_rate": 7.814610241057009e-06, "loss": 0.3923, "step": 7301 }, { "epoch": 0.33048200950441275, "grad_norm": 0.617776076989984, "learning_rate": 7.814004444275058e-06, "loss": 0.3394, "step": 7302 }, { "epoch": 0.33052726861280834, "grad_norm": 0.6361374966992775, "learning_rate": 7.813398587028798e-06, "loss": 0.3687, "step": 7303 }, { "epoch": 0.3305725277212039, "grad_norm": 0.6348279118574923, "learning_rate": 7.81279266933124e-06, "loss": 0.3844, "step": 7304 }, { "epoch": 0.33061778682959947, "grad_norm": 2.8535305256216423, "learning_rate": 7.812186691195407e-06, "loss": 0.5487, "step": 7305 }, { "epoch": 0.330663045937995, "grad_norm": 0.7068173549230775, "learning_rate": 7.811580652634319e-06, "loss": 0.379, "step": 7306 }, { "epoch": 0.3307083050463906, "grad_norm": 0.690644774893463, "learning_rate": 7.810974553660998e-06, "loss": 0.375, "step": 7307 }, { "epoch": 0.33075356415478613, "grad_norm": 0.663678126303351, "learning_rate": 7.810368394288468e-06, "loss": 0.3685, "step": 7308 }, { "epoch": 0.3307988232631817, "grad_norm": 0.740229603627771, "learning_rate": 7.809762174529752e-06, "loss": 0.3729, "step": 7309 }, { "epoch": 0.33084408237157725, "grad_norm": 0.7410494585371324, "learning_rate": 7.809155894397876e-06, "loss": 0.3873, "step": 7310 }, { "epoch": 0.33088934147997284, "grad_norm": 0.6234515829771916, "learning_rate": 7.808549553905867e-06, "loss": 0.348, "step": 7311 }, { "epoch": 0.33093460058836843, "grad_norm": 0.6860869376405904, "learning_rate": 7.807943153066754e-06, "loss": 0.3357, "step": 7312 }, { "epoch": 0.33097985969676397, "grad_norm": 1.2962262711218264, "learning_rate": 7.807336691893568e-06, "loss": 0.5247, "step": 7313 }, { "epoch": 0.33102511880515956, "grad_norm": 0.6934296670610857, "learning_rate": 7.806730170399337e-06, "loss": 0.3651, "step": 7314 }, { "epoch": 0.3310703779135551, "grad_norm": 0.7025851521873115, "learning_rate": 7.806123588597094e-06, "loss": 0.3592, "step": 7315 }, { "epoch": 0.3311156370219507, "grad_norm": 0.768101167716597, "learning_rate": 7.805516946499876e-06, "loss": 0.3298, "step": 7316 }, { "epoch": 0.3311608961303462, "grad_norm": 0.6351060711723828, "learning_rate": 7.804910244120714e-06, "loss": 0.3296, "step": 7317 }, { "epoch": 0.3312061552387418, "grad_norm": 0.6978681798720107, "learning_rate": 7.804303481472645e-06, "loss": 0.3914, "step": 7318 }, { "epoch": 0.33125141434713734, "grad_norm": 0.6061560182864592, "learning_rate": 7.80369665856871e-06, "loss": 0.3256, "step": 7319 }, { "epoch": 0.33129667345553293, "grad_norm": 0.6335131386121605, "learning_rate": 7.80308977542194e-06, "loss": 0.3703, "step": 7320 }, { "epoch": 0.33134193256392847, "grad_norm": 1.3965455827885276, "learning_rate": 7.802482832045383e-06, "loss": 0.3467, "step": 7321 }, { "epoch": 0.33138719167232406, "grad_norm": 0.6540973659645941, "learning_rate": 7.801875828452077e-06, "loss": 0.4098, "step": 7322 }, { "epoch": 0.33143245078071965, "grad_norm": 0.5751699292719178, "learning_rate": 7.801268764655063e-06, "loss": 0.3413, "step": 7323 }, { "epoch": 0.3314777098891152, "grad_norm": 0.6726602987568291, "learning_rate": 7.800661640667388e-06, "loss": 0.3027, "step": 7324 }, { "epoch": 0.33152296899751077, "grad_norm": 0.6510373564214473, "learning_rate": 7.800054456502096e-06, "loss": 0.3328, "step": 7325 }, { "epoch": 0.3315682281059063, "grad_norm": 0.6380091514174852, "learning_rate": 7.799447212172233e-06, "loss": 0.3574, "step": 7326 }, { "epoch": 0.3316134872143019, "grad_norm": 0.6531228281965532, "learning_rate": 7.798839907690847e-06, "loss": 0.3581, "step": 7327 }, { "epoch": 0.33165874632269743, "grad_norm": 0.9721212229654579, "learning_rate": 7.798232543070987e-06, "loss": 0.4774, "step": 7328 }, { "epoch": 0.331704005431093, "grad_norm": 0.6227626915657207, "learning_rate": 7.797625118325705e-06, "loss": 0.3739, "step": 7329 }, { "epoch": 0.33174926453948855, "grad_norm": 0.7719234444001082, "learning_rate": 7.797017633468052e-06, "loss": 0.4823, "step": 7330 }, { "epoch": 0.33179452364788414, "grad_norm": 0.627263679418447, "learning_rate": 7.796410088511078e-06, "loss": 0.3712, "step": 7331 }, { "epoch": 0.3318397827562797, "grad_norm": 0.3508190839711614, "learning_rate": 7.79580248346784e-06, "loss": 0.4973, "step": 7332 }, { "epoch": 0.33188504186467527, "grad_norm": 0.6231007923758607, "learning_rate": 7.795194818351395e-06, "loss": 0.363, "step": 7333 }, { "epoch": 0.33193030097307086, "grad_norm": 0.6697970573735087, "learning_rate": 7.794587093174797e-06, "loss": 0.4091, "step": 7334 }, { "epoch": 0.3319755600814664, "grad_norm": 0.6362010756444667, "learning_rate": 7.793979307951108e-06, "loss": 0.3471, "step": 7335 }, { "epoch": 0.332020819189862, "grad_norm": 0.6596318056401064, "learning_rate": 7.79337146269338e-06, "loss": 0.3645, "step": 7336 }, { "epoch": 0.3320660782982575, "grad_norm": 0.9210714304746968, "learning_rate": 7.792763557414683e-06, "loss": 0.3606, "step": 7337 }, { "epoch": 0.3321113374066531, "grad_norm": 0.5987292893054342, "learning_rate": 7.792155592128072e-06, "loss": 0.3364, "step": 7338 }, { "epoch": 0.33215659651504864, "grad_norm": 0.6215979190882727, "learning_rate": 7.791547566846612e-06, "loss": 0.3604, "step": 7339 }, { "epoch": 0.33220185562344423, "grad_norm": 0.646836740101282, "learning_rate": 7.79093948158337e-06, "loss": 0.3662, "step": 7340 }, { "epoch": 0.33224711473183977, "grad_norm": 0.6432445331083202, "learning_rate": 7.790331336351408e-06, "loss": 0.3607, "step": 7341 }, { "epoch": 0.33229237384023536, "grad_norm": 0.8022329252854027, "learning_rate": 7.7897231311638e-06, "loss": 0.4262, "step": 7342 }, { "epoch": 0.3323376329486309, "grad_norm": 0.6805560295429158, "learning_rate": 7.789114866033607e-06, "loss": 0.3422, "step": 7343 }, { "epoch": 0.3323828920570265, "grad_norm": 0.7509513883759419, "learning_rate": 7.788506540973902e-06, "loss": 0.4154, "step": 7344 }, { "epoch": 0.332428151165422, "grad_norm": 1.1071314317950283, "learning_rate": 7.787898155997755e-06, "loss": 0.5069, "step": 7345 }, { "epoch": 0.3324734102738176, "grad_norm": 0.992944270441858, "learning_rate": 7.787289711118238e-06, "loss": 0.5124, "step": 7346 }, { "epoch": 0.3325186693822132, "grad_norm": 0.7760811321913402, "learning_rate": 7.786681206348428e-06, "loss": 0.3206, "step": 7347 }, { "epoch": 0.33256392849060873, "grad_norm": 0.5570280244602357, "learning_rate": 7.786072641701397e-06, "loss": 0.502, "step": 7348 }, { "epoch": 0.3326091875990043, "grad_norm": 0.43038194913860595, "learning_rate": 7.78546401719022e-06, "loss": 0.5162, "step": 7349 }, { "epoch": 0.33265444670739985, "grad_norm": 0.7139064690910768, "learning_rate": 7.784855332827979e-06, "loss": 0.3599, "step": 7350 }, { "epoch": 0.33269970581579544, "grad_norm": 0.6840052243027095, "learning_rate": 7.784246588627747e-06, "loss": 0.346, "step": 7351 }, { "epoch": 0.332744964924191, "grad_norm": 0.7906548268620531, "learning_rate": 7.783637784602608e-06, "loss": 0.3728, "step": 7352 }, { "epoch": 0.33279022403258657, "grad_norm": 0.9588818268057699, "learning_rate": 7.783028920765644e-06, "loss": 0.5166, "step": 7353 }, { "epoch": 0.3328354831409821, "grad_norm": 0.628264054881028, "learning_rate": 7.782419997129934e-06, "loss": 0.3538, "step": 7354 }, { "epoch": 0.3328807422493777, "grad_norm": 0.7094367104820966, "learning_rate": 7.781811013708565e-06, "loss": 0.3691, "step": 7355 }, { "epoch": 0.3329260013577732, "grad_norm": 0.9712871371707572, "learning_rate": 7.78120197051462e-06, "loss": 0.5178, "step": 7356 }, { "epoch": 0.3329712604661688, "grad_norm": 0.7065811609441587, "learning_rate": 7.780592867561187e-06, "loss": 0.3436, "step": 7357 }, { "epoch": 0.3330165195745644, "grad_norm": 0.8080745881239523, "learning_rate": 7.779983704861354e-06, "loss": 0.5165, "step": 7358 }, { "epoch": 0.33306177868295994, "grad_norm": 0.5864163976829028, "learning_rate": 7.779374482428206e-06, "loss": 0.4997, "step": 7359 }, { "epoch": 0.33310703779135553, "grad_norm": 0.6685332325376612, "learning_rate": 7.77876520027484e-06, "loss": 0.3599, "step": 7360 }, { "epoch": 0.33315229689975107, "grad_norm": 0.7441894827625023, "learning_rate": 7.778155858414342e-06, "loss": 0.3582, "step": 7361 }, { "epoch": 0.33319755600814666, "grad_norm": 0.733170379006083, "learning_rate": 7.777546456859808e-06, "loss": 0.3976, "step": 7362 }, { "epoch": 0.3332428151165422, "grad_norm": 0.621295126537377, "learning_rate": 7.77693699562433e-06, "loss": 0.3647, "step": 7363 }, { "epoch": 0.3332880742249378, "grad_norm": 0.6711556936745743, "learning_rate": 7.776327474721009e-06, "loss": 0.5287, "step": 7364 }, { "epoch": 0.3333333333333333, "grad_norm": 0.722177606091501, "learning_rate": 7.775717894162933e-06, "loss": 0.3633, "step": 7365 }, { "epoch": 0.3333785924417289, "grad_norm": 0.7038042521755242, "learning_rate": 7.775108253963207e-06, "loss": 0.365, "step": 7366 }, { "epoch": 0.33342385155012444, "grad_norm": 0.7542576892215929, "learning_rate": 7.774498554134925e-06, "loss": 0.4828, "step": 7367 }, { "epoch": 0.33346911065852003, "grad_norm": 0.622741190715478, "learning_rate": 7.773888794691192e-06, "loss": 0.4903, "step": 7368 }, { "epoch": 0.3335143697669156, "grad_norm": 0.6829381274063849, "learning_rate": 7.773278975645109e-06, "loss": 0.3455, "step": 7369 }, { "epoch": 0.33355962887531115, "grad_norm": 0.7782974536058487, "learning_rate": 7.772669097009777e-06, "loss": 0.36, "step": 7370 }, { "epoch": 0.33360488798370674, "grad_norm": 0.5081855472190925, "learning_rate": 7.772059158798302e-06, "loss": 0.4576, "step": 7371 }, { "epoch": 0.3336501470921023, "grad_norm": 0.6705304178050254, "learning_rate": 7.77144916102379e-06, "loss": 0.3765, "step": 7372 }, { "epoch": 0.33369540620049787, "grad_norm": 0.38144921018083416, "learning_rate": 7.770839103699345e-06, "loss": 0.4927, "step": 7373 }, { "epoch": 0.3337406653088934, "grad_norm": 0.7062153479980101, "learning_rate": 7.77022898683808e-06, "loss": 0.3313, "step": 7374 }, { "epoch": 0.333785924417289, "grad_norm": 0.4372021191401388, "learning_rate": 7.769618810453101e-06, "loss": 0.4999, "step": 7375 }, { "epoch": 0.3338311835256845, "grad_norm": 0.6507493359356833, "learning_rate": 7.769008574557522e-06, "loss": 0.3894, "step": 7376 }, { "epoch": 0.3338764426340801, "grad_norm": 0.7817304510978234, "learning_rate": 7.76839827916445e-06, "loss": 0.3949, "step": 7377 }, { "epoch": 0.33392170174247565, "grad_norm": 0.6754684107822001, "learning_rate": 7.767787924287005e-06, "loss": 0.3897, "step": 7378 }, { "epoch": 0.33396696085087124, "grad_norm": 0.68264505263259, "learning_rate": 7.767177509938294e-06, "loss": 0.4152, "step": 7379 }, { "epoch": 0.3340122199592668, "grad_norm": 0.607110426394516, "learning_rate": 7.76656703613144e-06, "loss": 0.3981, "step": 7380 }, { "epoch": 0.33405747906766237, "grad_norm": 0.6315941771087339, "learning_rate": 7.765956502879557e-06, "loss": 0.351, "step": 7381 }, { "epoch": 0.33410273817605796, "grad_norm": 0.622596703964894, "learning_rate": 7.765345910195764e-06, "loss": 0.3744, "step": 7382 }, { "epoch": 0.3341479972844535, "grad_norm": 0.6443042347847058, "learning_rate": 7.76473525809318e-06, "loss": 0.3394, "step": 7383 }, { "epoch": 0.3341932563928491, "grad_norm": 0.6177848452709123, "learning_rate": 7.764124546584926e-06, "loss": 0.3718, "step": 7384 }, { "epoch": 0.3342385155012446, "grad_norm": 0.6317024650528028, "learning_rate": 7.763513775684125e-06, "loss": 0.3415, "step": 7385 }, { "epoch": 0.3342837746096402, "grad_norm": 0.6268294522319873, "learning_rate": 7.7629029454039e-06, "loss": 0.3432, "step": 7386 }, { "epoch": 0.33432903371803574, "grad_norm": 0.6961497243147552, "learning_rate": 7.762292055757379e-06, "loss": 0.4405, "step": 7387 }, { "epoch": 0.33437429282643133, "grad_norm": 0.6416482361622061, "learning_rate": 7.761681106757682e-06, "loss": 0.3533, "step": 7388 }, { "epoch": 0.33441955193482686, "grad_norm": 0.6484749481198567, "learning_rate": 7.761070098417943e-06, "loss": 0.3225, "step": 7389 }, { "epoch": 0.33446481104322245, "grad_norm": 0.7573052274699215, "learning_rate": 7.760459030751285e-06, "loss": 0.4899, "step": 7390 }, { "epoch": 0.334510070151618, "grad_norm": 0.6484070916262837, "learning_rate": 7.759847903770841e-06, "loss": 0.3776, "step": 7391 }, { "epoch": 0.3345553292600136, "grad_norm": 0.6403106131889722, "learning_rate": 7.759236717489743e-06, "loss": 0.3534, "step": 7392 }, { "epoch": 0.33460058836840917, "grad_norm": 0.7201737675332623, "learning_rate": 7.75862547192112e-06, "loss": 0.4111, "step": 7393 }, { "epoch": 0.3346458474768047, "grad_norm": 0.3878614133578035, "learning_rate": 7.75801416707811e-06, "loss": 0.4685, "step": 7394 }, { "epoch": 0.3346911065852003, "grad_norm": 0.5983085649938688, "learning_rate": 7.757402802973846e-06, "loss": 0.3261, "step": 7395 }, { "epoch": 0.3347363656935958, "grad_norm": 0.7292220648834236, "learning_rate": 7.756791379621461e-06, "loss": 0.3354, "step": 7396 }, { "epoch": 0.3347816248019914, "grad_norm": 0.6933013859235289, "learning_rate": 7.756179897034101e-06, "loss": 0.3502, "step": 7397 }, { "epoch": 0.33482688391038695, "grad_norm": 0.7088971477959854, "learning_rate": 7.7555683552249e-06, "loss": 0.3665, "step": 7398 }, { "epoch": 0.33487214301878254, "grad_norm": 0.6655689664230773, "learning_rate": 7.754956754206995e-06, "loss": 0.4009, "step": 7399 }, { "epoch": 0.3349174021271781, "grad_norm": 0.6449599873272122, "learning_rate": 7.754345093993531e-06, "loss": 0.4111, "step": 7400 }, { "epoch": 0.33496266123557367, "grad_norm": 0.642456407875221, "learning_rate": 7.753733374597651e-06, "loss": 0.2901, "step": 7401 }, { "epoch": 0.3350079203439692, "grad_norm": 0.47876243681554803, "learning_rate": 7.7531215960325e-06, "loss": 0.4831, "step": 7402 }, { "epoch": 0.3350531794523648, "grad_norm": 0.6578598929289875, "learning_rate": 7.75250975831122e-06, "loss": 0.395, "step": 7403 }, { "epoch": 0.3350984385607603, "grad_norm": 0.43312235044318054, "learning_rate": 7.751897861446957e-06, "loss": 0.4638, "step": 7404 }, { "epoch": 0.3351436976691559, "grad_norm": 0.5912177865190626, "learning_rate": 7.751285905452863e-06, "loss": 0.3715, "step": 7405 }, { "epoch": 0.3351889567775515, "grad_norm": 0.6617835213947177, "learning_rate": 7.750673890342087e-06, "loss": 0.3882, "step": 7406 }, { "epoch": 0.33523421588594704, "grad_norm": 0.33686268188490154, "learning_rate": 7.750061816127773e-06, "loss": 0.4945, "step": 7407 }, { "epoch": 0.33527947499434263, "grad_norm": 0.6242490984195372, "learning_rate": 7.749449682823077e-06, "loss": 0.3567, "step": 7408 }, { "epoch": 0.33532473410273816, "grad_norm": 0.3702368617253743, "learning_rate": 7.748837490441154e-06, "loss": 0.4814, "step": 7409 }, { "epoch": 0.33536999321113375, "grad_norm": 0.31983652680776037, "learning_rate": 7.748225238995155e-06, "loss": 0.4851, "step": 7410 }, { "epoch": 0.3354152523195293, "grad_norm": 0.6678022578758226, "learning_rate": 7.747612928498236e-06, "loss": 0.3703, "step": 7411 }, { "epoch": 0.3354605114279249, "grad_norm": 0.6487321452560142, "learning_rate": 7.747000558963553e-06, "loss": 0.3133, "step": 7412 }, { "epoch": 0.3355057705363204, "grad_norm": 0.37027873488301116, "learning_rate": 7.746388130404266e-06, "loss": 0.5095, "step": 7413 }, { "epoch": 0.335551029644716, "grad_norm": 0.5749342746992915, "learning_rate": 7.745775642833532e-06, "loss": 0.3537, "step": 7414 }, { "epoch": 0.33559628875311154, "grad_norm": 0.3615744542074436, "learning_rate": 7.745163096264512e-06, "loss": 0.5016, "step": 7415 }, { "epoch": 0.33564154786150713, "grad_norm": 0.6853723855206779, "learning_rate": 7.74455049071037e-06, "loss": 0.3701, "step": 7416 }, { "epoch": 0.3356868069699027, "grad_norm": 0.6218699873168575, "learning_rate": 7.743937826184266e-06, "loss": 0.329, "step": 7417 }, { "epoch": 0.33573206607829825, "grad_norm": 0.6405323808791121, "learning_rate": 7.743325102699366e-06, "loss": 0.3976, "step": 7418 }, { "epoch": 0.33577732518669384, "grad_norm": 0.7927461115027449, "learning_rate": 7.742712320268835e-06, "loss": 0.3702, "step": 7419 }, { "epoch": 0.3358225842950894, "grad_norm": 0.37805600475702256, "learning_rate": 7.742099478905837e-06, "loss": 0.5159, "step": 7420 }, { "epoch": 0.33586784340348497, "grad_norm": 0.6019854929627273, "learning_rate": 7.741486578623546e-06, "loss": 0.3676, "step": 7421 }, { "epoch": 0.3359131025118805, "grad_norm": 0.5992008219954912, "learning_rate": 7.740873619435127e-06, "loss": 0.3441, "step": 7422 }, { "epoch": 0.3359583616202761, "grad_norm": 0.6385439615181806, "learning_rate": 7.740260601353755e-06, "loss": 0.3698, "step": 7423 }, { "epoch": 0.3360036207286716, "grad_norm": 0.8383219944781505, "learning_rate": 7.739647524392595e-06, "loss": 0.337, "step": 7424 }, { "epoch": 0.3360488798370672, "grad_norm": 0.7062114631525629, "learning_rate": 7.739034388564826e-06, "loss": 0.3804, "step": 7425 }, { "epoch": 0.33609413894546275, "grad_norm": 1.0240473141234805, "learning_rate": 7.738421193883618e-06, "loss": 0.385, "step": 7426 }, { "epoch": 0.33613939805385834, "grad_norm": 0.4036271018286075, "learning_rate": 7.737807940362153e-06, "loss": 0.4713, "step": 7427 }, { "epoch": 0.33618465716225393, "grad_norm": 0.6935362243615151, "learning_rate": 7.7371946280136e-06, "loss": 0.3415, "step": 7428 }, { "epoch": 0.33622991627064946, "grad_norm": 0.6765723463844222, "learning_rate": 7.736581256851143e-06, "loss": 0.3547, "step": 7429 }, { "epoch": 0.33627517537904505, "grad_norm": 0.29491475836160913, "learning_rate": 7.735967826887957e-06, "loss": 0.507, "step": 7430 }, { "epoch": 0.3363204344874406, "grad_norm": 0.6213753045889875, "learning_rate": 7.73535433813723e-06, "loss": 0.3598, "step": 7431 }, { "epoch": 0.3363656935958362, "grad_norm": 0.6048305024715104, "learning_rate": 7.734740790612137e-06, "loss": 0.3531, "step": 7432 }, { "epoch": 0.3364109527042317, "grad_norm": 0.6111012611969582, "learning_rate": 7.734127184325862e-06, "loss": 0.3652, "step": 7433 }, { "epoch": 0.3364562118126273, "grad_norm": 0.7314719236909824, "learning_rate": 7.73351351929159e-06, "loss": 0.3873, "step": 7434 }, { "epoch": 0.33650147092102284, "grad_norm": 0.6751215172117923, "learning_rate": 7.732899795522511e-06, "loss": 0.3584, "step": 7435 }, { "epoch": 0.33654673002941843, "grad_norm": 0.6526724286809542, "learning_rate": 7.732286013031807e-06, "loss": 0.3316, "step": 7436 }, { "epoch": 0.33659198913781396, "grad_norm": 0.8181862564068937, "learning_rate": 7.73167217183267e-06, "loss": 0.3682, "step": 7437 }, { "epoch": 0.33663724824620955, "grad_norm": 0.6340294789997006, "learning_rate": 7.731058271938286e-06, "loss": 0.3887, "step": 7438 }, { "epoch": 0.3366825073546051, "grad_norm": 0.6380738026004611, "learning_rate": 7.73044431336185e-06, "loss": 0.3757, "step": 7439 }, { "epoch": 0.3367277664630007, "grad_norm": 0.7063121664201003, "learning_rate": 7.729830296116549e-06, "loss": 0.3932, "step": 7440 }, { "epoch": 0.33677302557139627, "grad_norm": 0.6163861356537637, "learning_rate": 7.729216220215579e-06, "loss": 0.3733, "step": 7441 }, { "epoch": 0.3368182846797918, "grad_norm": 0.6808044512307527, "learning_rate": 7.728602085672136e-06, "loss": 0.3617, "step": 7442 }, { "epoch": 0.3368635437881874, "grad_norm": 0.6416138091184128, "learning_rate": 7.727987892499413e-06, "loss": 0.301, "step": 7443 }, { "epoch": 0.3369088028965829, "grad_norm": 0.6412030801412819, "learning_rate": 7.72737364071061e-06, "loss": 0.3737, "step": 7444 }, { "epoch": 0.3369540620049785, "grad_norm": 0.6282639242310271, "learning_rate": 7.726759330318922e-06, "loss": 0.3606, "step": 7445 }, { "epoch": 0.33699932111337405, "grad_norm": 0.7902263070049047, "learning_rate": 7.726144961337552e-06, "loss": 0.3427, "step": 7446 }, { "epoch": 0.33704458022176964, "grad_norm": 1.2046021596760628, "learning_rate": 7.7255305337797e-06, "loss": 0.3538, "step": 7447 }, { "epoch": 0.3370898393301652, "grad_norm": 0.67347299103925, "learning_rate": 7.724916047658568e-06, "loss": 0.3803, "step": 7448 }, { "epoch": 0.33713509843856077, "grad_norm": 0.6249721615380065, "learning_rate": 7.724301502987357e-06, "loss": 0.3623, "step": 7449 }, { "epoch": 0.3371803575469563, "grad_norm": 0.6553766994577328, "learning_rate": 7.723686899779277e-06, "loss": 0.3445, "step": 7450 }, { "epoch": 0.3372256166553519, "grad_norm": 0.6312510750549889, "learning_rate": 7.723072238047526e-06, "loss": 0.3678, "step": 7451 }, { "epoch": 0.3372708757637475, "grad_norm": 0.40615804589458515, "learning_rate": 7.72245751780532e-06, "loss": 0.4807, "step": 7452 }, { "epoch": 0.337316134872143, "grad_norm": 0.6384877707589975, "learning_rate": 7.721842739065862e-06, "loss": 0.3547, "step": 7453 }, { "epoch": 0.3373613939805386, "grad_norm": 0.6979226985911737, "learning_rate": 7.721227901842363e-06, "loss": 0.3621, "step": 7454 }, { "epoch": 0.33740665308893414, "grad_norm": 0.6487294010425022, "learning_rate": 7.720613006148034e-06, "loss": 0.3578, "step": 7455 }, { "epoch": 0.33745191219732973, "grad_norm": 0.8479401579050782, "learning_rate": 7.719998051996087e-06, "loss": 0.3972, "step": 7456 }, { "epoch": 0.33749717130572526, "grad_norm": 0.3283701852873334, "learning_rate": 7.719383039399735e-06, "loss": 0.4766, "step": 7457 }, { "epoch": 0.33754243041412085, "grad_norm": 0.6778409513847219, "learning_rate": 7.718767968372193e-06, "loss": 0.3381, "step": 7458 }, { "epoch": 0.3375876895225164, "grad_norm": 0.6111463890412212, "learning_rate": 7.71815283892668e-06, "loss": 0.3235, "step": 7459 }, { "epoch": 0.337632948630912, "grad_norm": 0.3121534940673971, "learning_rate": 7.71753765107641e-06, "loss": 0.4745, "step": 7460 }, { "epoch": 0.3376782077393075, "grad_norm": 0.6458181090619489, "learning_rate": 7.716922404834602e-06, "loss": 0.3494, "step": 7461 }, { "epoch": 0.3377234668477031, "grad_norm": 0.6411197840593588, "learning_rate": 7.716307100214472e-06, "loss": 0.416, "step": 7462 }, { "epoch": 0.3377687259560987, "grad_norm": 0.6632890569738612, "learning_rate": 7.715691737229249e-06, "loss": 0.3312, "step": 7463 }, { "epoch": 0.3378139850644942, "grad_norm": 0.2935415701154594, "learning_rate": 7.715076315892152e-06, "loss": 0.4926, "step": 7464 }, { "epoch": 0.3378592441728898, "grad_norm": 0.6186396883713703, "learning_rate": 7.714460836216402e-06, "loss": 0.3823, "step": 7465 }, { "epoch": 0.33790450328128535, "grad_norm": 0.6027909574603085, "learning_rate": 7.713845298215226e-06, "loss": 0.3537, "step": 7466 }, { "epoch": 0.33794976238968094, "grad_norm": 0.6197701183196876, "learning_rate": 7.713229701901848e-06, "loss": 0.3794, "step": 7467 }, { "epoch": 0.3379950214980765, "grad_norm": 0.2862160805249473, "learning_rate": 7.712614047289498e-06, "loss": 0.4657, "step": 7468 }, { "epoch": 0.33804028060647207, "grad_norm": 0.6806505904540876, "learning_rate": 7.711998334391404e-06, "loss": 0.3869, "step": 7469 }, { "epoch": 0.3380855397148676, "grad_norm": 0.7033828216041671, "learning_rate": 7.711382563220793e-06, "loss": 0.3814, "step": 7470 }, { "epoch": 0.3381307988232632, "grad_norm": 0.6488784757057378, "learning_rate": 7.7107667337909e-06, "loss": 0.3832, "step": 7471 }, { "epoch": 0.3381760579316587, "grad_norm": 0.3317792728759328, "learning_rate": 7.710150846114954e-06, "loss": 0.4736, "step": 7472 }, { "epoch": 0.3382213170400543, "grad_norm": 0.6621196774829878, "learning_rate": 7.70953490020619e-06, "loss": 0.3642, "step": 7473 }, { "epoch": 0.33826657614844985, "grad_norm": 0.6740884911039556, "learning_rate": 7.708918896077843e-06, "loss": 0.4162, "step": 7474 }, { "epoch": 0.33831183525684544, "grad_norm": 0.6174594802456844, "learning_rate": 7.708302833743149e-06, "loss": 0.3285, "step": 7475 }, { "epoch": 0.33835709436524103, "grad_norm": 0.6464458466179519, "learning_rate": 7.707686713215346e-06, "loss": 0.3641, "step": 7476 }, { "epoch": 0.33840235347363656, "grad_norm": 0.6499025032731706, "learning_rate": 7.70707053450767e-06, "loss": 0.3353, "step": 7477 }, { "epoch": 0.33844761258203215, "grad_norm": 0.5949856358113641, "learning_rate": 7.706454297633363e-06, "loss": 0.38, "step": 7478 }, { "epoch": 0.3384928716904277, "grad_norm": 0.6759432774690013, "learning_rate": 7.705838002605665e-06, "loss": 0.3213, "step": 7479 }, { "epoch": 0.3385381307988233, "grad_norm": 0.29385555492762644, "learning_rate": 7.705221649437819e-06, "loss": 0.4835, "step": 7480 }, { "epoch": 0.3385833899072188, "grad_norm": 0.3342604546667831, "learning_rate": 7.704605238143069e-06, "loss": 0.5036, "step": 7481 }, { "epoch": 0.3386286490156144, "grad_norm": 0.647801597038109, "learning_rate": 7.703988768734658e-06, "loss": 0.41, "step": 7482 }, { "epoch": 0.33867390812400994, "grad_norm": 0.63915331270805, "learning_rate": 7.703372241225832e-06, "loss": 0.2949, "step": 7483 }, { "epoch": 0.3387191672324055, "grad_norm": 0.6251380338112288, "learning_rate": 7.702755655629841e-06, "loss": 0.3707, "step": 7484 }, { "epoch": 0.33876442634080106, "grad_norm": 0.7472768475392628, "learning_rate": 7.702139011959933e-06, "loss": 0.3788, "step": 7485 }, { "epoch": 0.33880968544919665, "grad_norm": 0.6220251227286063, "learning_rate": 7.701522310229353e-06, "loss": 0.3689, "step": 7486 }, { "epoch": 0.33885494455759224, "grad_norm": 0.31252483447081636, "learning_rate": 7.700905550451359e-06, "loss": 0.5129, "step": 7487 }, { "epoch": 0.3389002036659878, "grad_norm": 0.2934067222093203, "learning_rate": 7.700288732639198e-06, "loss": 0.4867, "step": 7488 }, { "epoch": 0.33894546277438337, "grad_norm": 0.6546002810607425, "learning_rate": 7.699671856806126e-06, "loss": 0.3321, "step": 7489 }, { "epoch": 0.3389907218827789, "grad_norm": 0.6325439862884268, "learning_rate": 7.699054922965398e-06, "loss": 0.3346, "step": 7490 }, { "epoch": 0.3390359809911745, "grad_norm": 0.740205050820994, "learning_rate": 7.698437931130266e-06, "loss": 0.3741, "step": 7491 }, { "epoch": 0.33908124009957, "grad_norm": 0.6601280735750225, "learning_rate": 7.697820881313994e-06, "loss": 0.3616, "step": 7492 }, { "epoch": 0.3391264992079656, "grad_norm": 0.6613533939553481, "learning_rate": 7.697203773529835e-06, "loss": 0.3569, "step": 7493 }, { "epoch": 0.33917175831636115, "grad_norm": 0.6112239789987094, "learning_rate": 7.696586607791053e-06, "loss": 0.3738, "step": 7494 }, { "epoch": 0.33921701742475674, "grad_norm": 0.7287828073782087, "learning_rate": 7.695969384110906e-06, "loss": 0.3303, "step": 7495 }, { "epoch": 0.3392622765331523, "grad_norm": 0.6877461486725416, "learning_rate": 7.695352102502655e-06, "loss": 0.3679, "step": 7496 }, { "epoch": 0.33930753564154786, "grad_norm": 0.7528212080107087, "learning_rate": 7.694734762979566e-06, "loss": 0.3722, "step": 7497 }, { "epoch": 0.33935279474994345, "grad_norm": 0.6911967207059251, "learning_rate": 7.694117365554905e-06, "loss": 0.3523, "step": 7498 }, { "epoch": 0.339398053858339, "grad_norm": 0.6781742116766306, "learning_rate": 7.693499910241935e-06, "loss": 0.3753, "step": 7499 }, { "epoch": 0.3394433129667346, "grad_norm": 0.6668863342842705, "learning_rate": 7.692882397053924e-06, "loss": 0.3711, "step": 7500 }, { "epoch": 0.3394885720751301, "grad_norm": 0.6889227130686282, "learning_rate": 7.69226482600414e-06, "loss": 0.3786, "step": 7501 }, { "epoch": 0.3395338311835257, "grad_norm": 0.6258982774738167, "learning_rate": 7.691647197105857e-06, "loss": 0.3838, "step": 7502 }, { "epoch": 0.33957909029192124, "grad_norm": 0.60363916132125, "learning_rate": 7.69102951037234e-06, "loss": 0.3668, "step": 7503 }, { "epoch": 0.3396243494003168, "grad_norm": 0.45105570128430217, "learning_rate": 7.690411765816864e-06, "loss": 0.4735, "step": 7504 }, { "epoch": 0.33966960850871236, "grad_norm": 0.6294237191403765, "learning_rate": 7.689793963452703e-06, "loss": 0.3415, "step": 7505 }, { "epoch": 0.33971486761710795, "grad_norm": 0.6526197354715212, "learning_rate": 7.68917610329313e-06, "loss": 0.3648, "step": 7506 }, { "epoch": 0.3397601267255035, "grad_norm": 0.6589135998127787, "learning_rate": 7.68855818535142e-06, "loss": 0.3441, "step": 7507 }, { "epoch": 0.3398053858338991, "grad_norm": 0.6712897142276448, "learning_rate": 7.687940209640853e-06, "loss": 0.3835, "step": 7508 }, { "epoch": 0.3398506449422946, "grad_norm": 0.3313425590280107, "learning_rate": 7.687322176174708e-06, "loss": 0.5078, "step": 7509 }, { "epoch": 0.3398959040506902, "grad_norm": 0.6378473326228518, "learning_rate": 7.686704084966263e-06, "loss": 0.3913, "step": 7510 }, { "epoch": 0.3399411631590858, "grad_norm": 0.6571514913747152, "learning_rate": 7.686085936028798e-06, "loss": 0.3348, "step": 7511 }, { "epoch": 0.3399864222674813, "grad_norm": 0.6324628293351644, "learning_rate": 7.685467729375596e-06, "loss": 0.3366, "step": 7512 }, { "epoch": 0.3400316813758769, "grad_norm": 0.6064251790572599, "learning_rate": 7.684849465019938e-06, "loss": 0.3518, "step": 7513 }, { "epoch": 0.34007694048427245, "grad_norm": 0.6791002915888196, "learning_rate": 7.684231142975113e-06, "loss": 0.3573, "step": 7514 }, { "epoch": 0.34012219959266804, "grad_norm": 0.6485858044842474, "learning_rate": 7.683612763254404e-06, "loss": 0.3393, "step": 7515 }, { "epoch": 0.3401674587010636, "grad_norm": 0.34229835613429566, "learning_rate": 7.682994325871098e-06, "loss": 0.5176, "step": 7516 }, { "epoch": 0.34021271780945916, "grad_norm": 0.5985761286493197, "learning_rate": 7.682375830838487e-06, "loss": 0.3647, "step": 7517 }, { "epoch": 0.3402579769178547, "grad_norm": 0.6356574267140701, "learning_rate": 7.681757278169854e-06, "loss": 0.3234, "step": 7518 }, { "epoch": 0.3403032360262503, "grad_norm": 0.685838642936561, "learning_rate": 7.681138667878497e-06, "loss": 0.3603, "step": 7519 }, { "epoch": 0.3403484951346458, "grad_norm": 0.6511830826513854, "learning_rate": 7.680519999977703e-06, "loss": 0.3723, "step": 7520 }, { "epoch": 0.3403937542430414, "grad_norm": 0.31154658968417265, "learning_rate": 7.679901274480766e-06, "loss": 0.4982, "step": 7521 }, { "epoch": 0.340439013351437, "grad_norm": 0.6537163331357495, "learning_rate": 7.67928249140098e-06, "loss": 0.4237, "step": 7522 }, { "epoch": 0.34048427245983254, "grad_norm": 0.6386803915716467, "learning_rate": 7.678663650751648e-06, "loss": 0.3303, "step": 7523 }, { "epoch": 0.3405295315682281, "grad_norm": 0.6519793736472048, "learning_rate": 7.678044752546056e-06, "loss": 0.366, "step": 7524 }, { "epoch": 0.34057479067662366, "grad_norm": 0.590842503948439, "learning_rate": 7.677425796797509e-06, "loss": 0.3097, "step": 7525 }, { "epoch": 0.34062004978501925, "grad_norm": 0.6407967582943332, "learning_rate": 7.676806783519304e-06, "loss": 0.3608, "step": 7526 }, { "epoch": 0.3406653088934148, "grad_norm": 0.7371405089237638, "learning_rate": 7.676187712724742e-06, "loss": 0.3703, "step": 7527 }, { "epoch": 0.3407105680018104, "grad_norm": 0.6499977417250453, "learning_rate": 7.675568584427125e-06, "loss": 0.3762, "step": 7528 }, { "epoch": 0.3407558271102059, "grad_norm": 0.6536935173708011, "learning_rate": 7.674949398639759e-06, "loss": 0.3582, "step": 7529 }, { "epoch": 0.3408010862186015, "grad_norm": 0.6722326617995539, "learning_rate": 7.674330155375942e-06, "loss": 0.3582, "step": 7530 }, { "epoch": 0.34084634532699704, "grad_norm": 0.6492041265689165, "learning_rate": 7.673710854648988e-06, "loss": 0.3397, "step": 7531 }, { "epoch": 0.3408916044353926, "grad_norm": 0.6033954319869382, "learning_rate": 7.673091496472195e-06, "loss": 0.3484, "step": 7532 }, { "epoch": 0.34093686354378816, "grad_norm": 0.38637999773221254, "learning_rate": 7.67247208085888e-06, "loss": 0.4913, "step": 7533 }, { "epoch": 0.34098212265218375, "grad_norm": 0.7052793212540459, "learning_rate": 7.671852607822346e-06, "loss": 0.3513, "step": 7534 }, { "epoch": 0.34102738176057934, "grad_norm": 0.6174511378249521, "learning_rate": 7.671233077375903e-06, "loss": 0.3736, "step": 7535 }, { "epoch": 0.3410726408689749, "grad_norm": 0.6239886045962482, "learning_rate": 7.670613489532868e-06, "loss": 0.3414, "step": 7536 }, { "epoch": 0.34111789997737046, "grad_norm": 0.8548263136871884, "learning_rate": 7.66999384430655e-06, "loss": 0.3477, "step": 7537 }, { "epoch": 0.341163159085766, "grad_norm": 0.6002015590991432, "learning_rate": 7.669374141710266e-06, "loss": 0.3479, "step": 7538 }, { "epoch": 0.3412084181941616, "grad_norm": 0.6336840470348314, "learning_rate": 7.668754381757329e-06, "loss": 0.3686, "step": 7539 }, { "epoch": 0.3412536773025571, "grad_norm": 0.6427120609580672, "learning_rate": 7.668134564461057e-06, "loss": 0.3474, "step": 7540 }, { "epoch": 0.3412989364109527, "grad_norm": 0.8789691593560188, "learning_rate": 7.667514689834766e-06, "loss": 0.3708, "step": 7541 }, { "epoch": 0.34134419551934825, "grad_norm": 0.6235062003928623, "learning_rate": 7.666894757891779e-06, "loss": 0.3479, "step": 7542 }, { "epoch": 0.34138945462774384, "grad_norm": 0.6322898144122554, "learning_rate": 7.666274768645413e-06, "loss": 0.3542, "step": 7543 }, { "epoch": 0.3414347137361394, "grad_norm": 0.6630673959545894, "learning_rate": 7.665654722108994e-06, "loss": 0.3685, "step": 7544 }, { "epoch": 0.34147997284453496, "grad_norm": 0.6387799928039584, "learning_rate": 7.665034618295838e-06, "loss": 0.3474, "step": 7545 }, { "epoch": 0.34152523195293055, "grad_norm": 0.5943943062030385, "learning_rate": 7.664414457219277e-06, "loss": 0.3239, "step": 7546 }, { "epoch": 0.3415704910613261, "grad_norm": 0.6699691702156413, "learning_rate": 7.66379423889263e-06, "loss": 0.3928, "step": 7547 }, { "epoch": 0.3416157501697217, "grad_norm": 0.7179907829644857, "learning_rate": 7.663173963329227e-06, "loss": 0.3197, "step": 7548 }, { "epoch": 0.3416610092781172, "grad_norm": 0.6417787563246365, "learning_rate": 7.662553630542393e-06, "loss": 0.3439, "step": 7549 }, { "epoch": 0.3417062683865128, "grad_norm": 0.6565039262127776, "learning_rate": 7.661933240545464e-06, "loss": 0.3279, "step": 7550 }, { "epoch": 0.34175152749490834, "grad_norm": 0.6660501472957977, "learning_rate": 7.661312793351758e-06, "loss": 0.388, "step": 7551 }, { "epoch": 0.3417967866033039, "grad_norm": 0.6796749549491682, "learning_rate": 7.660692288974618e-06, "loss": 0.3604, "step": 7552 }, { "epoch": 0.34184204571169946, "grad_norm": 0.5855195869802828, "learning_rate": 7.660071727427372e-06, "loss": 0.3145, "step": 7553 }, { "epoch": 0.34188730482009505, "grad_norm": 0.6448670926148414, "learning_rate": 7.659451108723353e-06, "loss": 0.3663, "step": 7554 }, { "epoch": 0.3419325639284906, "grad_norm": 0.6840365201027633, "learning_rate": 7.658830432875899e-06, "loss": 0.373, "step": 7555 }, { "epoch": 0.3419778230368862, "grad_norm": 0.6320095353665366, "learning_rate": 7.658209699898344e-06, "loss": 0.3696, "step": 7556 }, { "epoch": 0.34202308214528176, "grad_norm": 0.6769106274619351, "learning_rate": 7.657588909804028e-06, "loss": 0.4076, "step": 7557 }, { "epoch": 0.3420683412536773, "grad_norm": 0.6830176902563303, "learning_rate": 7.656968062606288e-06, "loss": 0.3546, "step": 7558 }, { "epoch": 0.3421136003620729, "grad_norm": 0.6386112845278985, "learning_rate": 7.656347158318462e-06, "loss": 0.3308, "step": 7559 }, { "epoch": 0.3421588594704684, "grad_norm": 0.6494201950324293, "learning_rate": 7.655726196953898e-06, "loss": 0.36, "step": 7560 }, { "epoch": 0.342204118578864, "grad_norm": 0.6433055486507153, "learning_rate": 7.655105178525932e-06, "loss": 0.3533, "step": 7561 }, { "epoch": 0.34224937768725955, "grad_norm": 0.5024231652031483, "learning_rate": 7.65448410304791e-06, "loss": 0.4686, "step": 7562 }, { "epoch": 0.34229463679565514, "grad_norm": 0.6118199949238902, "learning_rate": 7.653862970533179e-06, "loss": 0.3816, "step": 7563 }, { "epoch": 0.3423398959040507, "grad_norm": 0.6683841803842258, "learning_rate": 7.653241780995083e-06, "loss": 0.3528, "step": 7564 }, { "epoch": 0.34238515501244626, "grad_norm": 0.32221683070115276, "learning_rate": 7.652620534446968e-06, "loss": 0.4729, "step": 7565 }, { "epoch": 0.3424304141208418, "grad_norm": 0.6251898531891275, "learning_rate": 7.651999230902186e-06, "loss": 0.3703, "step": 7566 }, { "epoch": 0.3424756732292374, "grad_norm": 0.7174094885394138, "learning_rate": 7.651377870374087e-06, "loss": 0.3363, "step": 7567 }, { "epoch": 0.3425209323376329, "grad_norm": 0.5838536870265789, "learning_rate": 7.650756452876019e-06, "loss": 0.3548, "step": 7568 }, { "epoch": 0.3425661914460285, "grad_norm": 0.4020356162026709, "learning_rate": 7.650134978421335e-06, "loss": 0.472, "step": 7569 }, { "epoch": 0.3426114505544241, "grad_norm": 0.6858008302933762, "learning_rate": 7.64951344702339e-06, "loss": 0.3974, "step": 7570 }, { "epoch": 0.34265670966281964, "grad_norm": 0.6300589867991939, "learning_rate": 7.648891858695542e-06, "loss": 0.3443, "step": 7571 }, { "epoch": 0.3427019687712152, "grad_norm": 0.5986157076384225, "learning_rate": 7.64827021345114e-06, "loss": 0.3277, "step": 7572 }, { "epoch": 0.34274722787961076, "grad_norm": 0.3073571483471491, "learning_rate": 7.647648511303545e-06, "loss": 0.494, "step": 7573 }, { "epoch": 0.34279248698800635, "grad_norm": 0.6350792333445885, "learning_rate": 7.647026752266114e-06, "loss": 0.3809, "step": 7574 }, { "epoch": 0.3428377460964019, "grad_norm": 0.666039449246502, "learning_rate": 7.64640493635221e-06, "loss": 0.3917, "step": 7575 }, { "epoch": 0.3428830052047975, "grad_norm": 0.735552361903718, "learning_rate": 7.64578306357519e-06, "loss": 0.3641, "step": 7576 }, { "epoch": 0.342928264313193, "grad_norm": 0.7251610813098117, "learning_rate": 7.64516113394842e-06, "loss": 0.3148, "step": 7577 }, { "epoch": 0.3429735234215886, "grad_norm": 0.6705458498226945, "learning_rate": 7.64453914748526e-06, "loss": 0.3425, "step": 7578 }, { "epoch": 0.34301878252998413, "grad_norm": 0.6576445664417572, "learning_rate": 7.643917104199076e-06, "loss": 0.366, "step": 7579 }, { "epoch": 0.3430640416383797, "grad_norm": 0.35501970819457634, "learning_rate": 7.643295004103232e-06, "loss": 0.4816, "step": 7580 }, { "epoch": 0.3431093007467753, "grad_norm": 0.763870455320076, "learning_rate": 7.6426728472111e-06, "loss": 0.3533, "step": 7581 }, { "epoch": 0.34315455985517085, "grad_norm": 0.6554494379318401, "learning_rate": 7.642050633536042e-06, "loss": 0.3617, "step": 7582 }, { "epoch": 0.34319981896356644, "grad_norm": 0.6627115772554796, "learning_rate": 7.641428363091431e-06, "loss": 0.3789, "step": 7583 }, { "epoch": 0.343245078071962, "grad_norm": 0.6152424957501429, "learning_rate": 7.640806035890637e-06, "loss": 0.3426, "step": 7584 }, { "epoch": 0.34329033718035756, "grad_norm": 0.6534631216532331, "learning_rate": 7.640183651947033e-06, "loss": 0.3383, "step": 7585 }, { "epoch": 0.3433355962887531, "grad_norm": 0.7446778963304378, "learning_rate": 7.639561211273989e-06, "loss": 0.3749, "step": 7586 }, { "epoch": 0.3433808553971487, "grad_norm": 0.31481139433953165, "learning_rate": 7.638938713884883e-06, "loss": 0.4847, "step": 7587 }, { "epoch": 0.3434261145055442, "grad_norm": 0.5922040233371281, "learning_rate": 7.638316159793089e-06, "loss": 0.3569, "step": 7588 }, { "epoch": 0.3434713736139398, "grad_norm": 0.6249165881142379, "learning_rate": 7.637693549011983e-06, "loss": 0.3517, "step": 7589 }, { "epoch": 0.34351663272233535, "grad_norm": 0.3221901775760477, "learning_rate": 7.637070881554944e-06, "loss": 0.4812, "step": 7590 }, { "epoch": 0.34356189183073094, "grad_norm": 0.3070442368662165, "learning_rate": 7.63644815743535e-06, "loss": 0.4885, "step": 7591 }, { "epoch": 0.3436071509391265, "grad_norm": 0.6727977151365402, "learning_rate": 7.635825376666584e-06, "loss": 0.3514, "step": 7592 }, { "epoch": 0.34365241004752206, "grad_norm": 0.6778367970179924, "learning_rate": 7.635202539262025e-06, "loss": 0.3718, "step": 7593 }, { "epoch": 0.34369766915591765, "grad_norm": 0.6923427020612646, "learning_rate": 7.634579645235056e-06, "loss": 0.3678, "step": 7594 }, { "epoch": 0.3437429282643132, "grad_norm": 0.6398153691505553, "learning_rate": 7.633956694599063e-06, "loss": 0.3305, "step": 7595 }, { "epoch": 0.3437881873727088, "grad_norm": 0.658762949535757, "learning_rate": 7.63333368736743e-06, "loss": 0.4038, "step": 7596 }, { "epoch": 0.3438334464811043, "grad_norm": 0.36541076844789505, "learning_rate": 7.632710623553543e-06, "loss": 0.4857, "step": 7597 }, { "epoch": 0.3438787055894999, "grad_norm": 0.6564004935929247, "learning_rate": 7.632087503170793e-06, "loss": 0.3249, "step": 7598 }, { "epoch": 0.34392396469789543, "grad_norm": 0.6711467037755176, "learning_rate": 7.631464326232562e-06, "loss": 0.3469, "step": 7599 }, { "epoch": 0.343969223806291, "grad_norm": 0.645995639153286, "learning_rate": 7.630841092752248e-06, "loss": 0.3429, "step": 7600 }, { "epoch": 0.34401448291468656, "grad_norm": 0.6707935052593224, "learning_rate": 7.630217802743238e-06, "loss": 0.3921, "step": 7601 }, { "epoch": 0.34405974202308215, "grad_norm": 0.611645996050189, "learning_rate": 7.629594456218926e-06, "loss": 0.3883, "step": 7602 }, { "epoch": 0.3441050011314777, "grad_norm": 0.7375476165970931, "learning_rate": 7.628971053192705e-06, "loss": 0.3792, "step": 7603 }, { "epoch": 0.3441502602398733, "grad_norm": 0.9643445777519964, "learning_rate": 7.628347593677969e-06, "loss": 0.377, "step": 7604 }, { "epoch": 0.34419551934826886, "grad_norm": 0.42861613394280557, "learning_rate": 7.6277240776881175e-06, "loss": 0.4879, "step": 7605 }, { "epoch": 0.3442407784566644, "grad_norm": 0.6210216944863378, "learning_rate": 7.6271005052365465e-06, "loss": 0.3676, "step": 7606 }, { "epoch": 0.34428603756506, "grad_norm": 0.636479508470854, "learning_rate": 7.6264768763366525e-06, "loss": 0.3545, "step": 7607 }, { "epoch": 0.3443312966734555, "grad_norm": 0.6229405722005883, "learning_rate": 7.6258531910018375e-06, "loss": 0.3568, "step": 7608 }, { "epoch": 0.3443765557818511, "grad_norm": 0.5915074305794326, "learning_rate": 7.625229449245501e-06, "loss": 0.323, "step": 7609 }, { "epoch": 0.34442181489024665, "grad_norm": 0.5860717764241135, "learning_rate": 7.624605651081049e-06, "loss": 0.3412, "step": 7610 }, { "epoch": 0.34446707399864224, "grad_norm": 0.7414526006320594, "learning_rate": 7.62398179652188e-06, "loss": 0.3505, "step": 7611 }, { "epoch": 0.34451233310703777, "grad_norm": 0.6203614209578381, "learning_rate": 7.623357885581403e-06, "loss": 0.3978, "step": 7612 }, { "epoch": 0.34455759221543336, "grad_norm": 0.7438136046629493, "learning_rate": 7.622733918273021e-06, "loss": 0.365, "step": 7613 }, { "epoch": 0.3446028513238289, "grad_norm": 0.6351053778164822, "learning_rate": 7.6221098946101415e-06, "loss": 0.3576, "step": 7614 }, { "epoch": 0.3446481104322245, "grad_norm": 0.6493836667947682, "learning_rate": 7.621485814606175e-06, "loss": 0.3144, "step": 7615 }, { "epoch": 0.3446933695406201, "grad_norm": 0.4247953999482638, "learning_rate": 7.62086167827453e-06, "loss": 0.4993, "step": 7616 }, { "epoch": 0.3447386286490156, "grad_norm": 0.6601210723159385, "learning_rate": 7.620237485628614e-06, "loss": 0.3643, "step": 7617 }, { "epoch": 0.3447838877574112, "grad_norm": 0.618361473474604, "learning_rate": 7.619613236681845e-06, "loss": 0.3511, "step": 7618 }, { "epoch": 0.34482914686580673, "grad_norm": 0.6427019523563446, "learning_rate": 7.618988931447633e-06, "loss": 0.3331, "step": 7619 }, { "epoch": 0.3448744059742023, "grad_norm": 0.6811436509905098, "learning_rate": 7.61836456993939e-06, "loss": 0.3763, "step": 7620 }, { "epoch": 0.34491966508259786, "grad_norm": 0.752962159483992, "learning_rate": 7.617740152170536e-06, "loss": 0.3439, "step": 7621 }, { "epoch": 0.34496492419099345, "grad_norm": 0.320214585605932, "learning_rate": 7.617115678154485e-06, "loss": 0.4688, "step": 7622 }, { "epoch": 0.345010183299389, "grad_norm": 0.6349661881515777, "learning_rate": 7.616491147904657e-06, "loss": 0.3607, "step": 7623 }, { "epoch": 0.3450554424077846, "grad_norm": 0.6861805691638369, "learning_rate": 7.615866561434468e-06, "loss": 0.3703, "step": 7624 }, { "epoch": 0.3451007015161801, "grad_norm": 0.6198289549983903, "learning_rate": 7.615241918757343e-06, "loss": 0.3272, "step": 7625 }, { "epoch": 0.3451459606245757, "grad_norm": 0.2929009723502456, "learning_rate": 7.614617219886699e-06, "loss": 0.4649, "step": 7626 }, { "epoch": 0.34519121973297123, "grad_norm": 0.6858863221642914, "learning_rate": 7.613992464835964e-06, "loss": 0.3646, "step": 7627 }, { "epoch": 0.3452364788413668, "grad_norm": 0.6786937831026911, "learning_rate": 7.613367653618558e-06, "loss": 0.3563, "step": 7628 }, { "epoch": 0.3452817379497624, "grad_norm": 0.7177119428531356, "learning_rate": 7.612742786247906e-06, "loss": 0.3916, "step": 7629 }, { "epoch": 0.34532699705815795, "grad_norm": 0.603190059176882, "learning_rate": 7.612117862737437e-06, "loss": 0.3232, "step": 7630 }, { "epoch": 0.34537225616655354, "grad_norm": 0.5944795316400319, "learning_rate": 7.611492883100579e-06, "loss": 0.3464, "step": 7631 }, { "epoch": 0.34541751527494907, "grad_norm": 0.6558781026577434, "learning_rate": 7.610867847350758e-06, "loss": 0.3884, "step": 7632 }, { "epoch": 0.34546277438334466, "grad_norm": 0.637930146367971, "learning_rate": 7.610242755501404e-06, "loss": 0.3355, "step": 7633 }, { "epoch": 0.3455080334917402, "grad_norm": 0.5539742893944569, "learning_rate": 7.6096176075659535e-06, "loss": 0.341, "step": 7634 }, { "epoch": 0.3455532926001358, "grad_norm": 0.6214660355339781, "learning_rate": 7.608992403557833e-06, "loss": 0.3933, "step": 7635 }, { "epoch": 0.3455985517085313, "grad_norm": 0.6163769966528868, "learning_rate": 7.60836714349048e-06, "loss": 0.3566, "step": 7636 }, { "epoch": 0.3456438108169269, "grad_norm": 0.6619757585034614, "learning_rate": 7.607741827377329e-06, "loss": 0.3892, "step": 7637 }, { "epoch": 0.34568906992532245, "grad_norm": 0.6139244345551975, "learning_rate": 7.607116455231811e-06, "loss": 0.3253, "step": 7638 }, { "epoch": 0.34573432903371804, "grad_norm": 1.0639117936927491, "learning_rate": 7.606491027067372e-06, "loss": 0.3575, "step": 7639 }, { "epoch": 0.3457795881421136, "grad_norm": 0.71432928918362, "learning_rate": 7.605865542897443e-06, "loss": 0.3811, "step": 7640 }, { "epoch": 0.34582484725050916, "grad_norm": 0.59747298091808, "learning_rate": 7.605240002735469e-06, "loss": 0.3792, "step": 7641 }, { "epoch": 0.34587010635890475, "grad_norm": 0.6090730545711344, "learning_rate": 7.604614406594888e-06, "loss": 0.3769, "step": 7642 }, { "epoch": 0.3459153654673003, "grad_norm": 0.6527221123705976, "learning_rate": 7.603988754489142e-06, "loss": 0.3977, "step": 7643 }, { "epoch": 0.3459606245756959, "grad_norm": 0.6269383326587715, "learning_rate": 7.603363046431676e-06, "loss": 0.3345, "step": 7644 }, { "epoch": 0.3460058836840914, "grad_norm": 0.6320898437814025, "learning_rate": 7.6027372824359336e-06, "loss": 0.383, "step": 7645 }, { "epoch": 0.346051142792487, "grad_norm": 0.6182188002592361, "learning_rate": 7.60211146251536e-06, "loss": 0.369, "step": 7646 }, { "epoch": 0.34609640190088253, "grad_norm": 0.6648400989717609, "learning_rate": 7.601485586683404e-06, "loss": 0.3778, "step": 7647 }, { "epoch": 0.3461416610092781, "grad_norm": 0.640924490371323, "learning_rate": 7.600859654953513e-06, "loss": 0.3981, "step": 7648 }, { "epoch": 0.34618692011767366, "grad_norm": 0.657510092475443, "learning_rate": 7.600233667339134e-06, "loss": 0.3515, "step": 7649 }, { "epoch": 0.34623217922606925, "grad_norm": 0.6443967712541337, "learning_rate": 7.599607623853722e-06, "loss": 0.3721, "step": 7650 }, { "epoch": 0.34627743833446484, "grad_norm": 0.3703216977234791, "learning_rate": 7.5989815245107235e-06, "loss": 0.4724, "step": 7651 }, { "epoch": 0.34632269744286037, "grad_norm": 0.31900130505503843, "learning_rate": 7.5983553693235955e-06, "loss": 0.4883, "step": 7652 }, { "epoch": 0.34636795655125596, "grad_norm": 0.668104250745774, "learning_rate": 7.597729158305791e-06, "loss": 0.366, "step": 7653 }, { "epoch": 0.3464132156596515, "grad_norm": 0.6319615814150348, "learning_rate": 7.597102891470766e-06, "loss": 0.3459, "step": 7654 }, { "epoch": 0.3464584747680471, "grad_norm": 0.62178518968914, "learning_rate": 7.596476568831974e-06, "loss": 0.3496, "step": 7655 }, { "epoch": 0.3465037338764426, "grad_norm": 0.7020410645187021, "learning_rate": 7.595850190402877e-06, "loss": 0.3431, "step": 7656 }, { "epoch": 0.3465489929848382, "grad_norm": 0.6481777084808226, "learning_rate": 7.595223756196931e-06, "loss": 0.3603, "step": 7657 }, { "epoch": 0.34659425209323375, "grad_norm": 0.6725199795510061, "learning_rate": 7.594597266227599e-06, "loss": 0.3261, "step": 7658 }, { "epoch": 0.34663951120162934, "grad_norm": 0.6695539890315392, "learning_rate": 7.593970720508337e-06, "loss": 0.3911, "step": 7659 }, { "epoch": 0.34668477031002487, "grad_norm": 0.6354345747973272, "learning_rate": 7.5933441190526146e-06, "loss": 0.329, "step": 7660 }, { "epoch": 0.34673002941842046, "grad_norm": 0.6397382074660813, "learning_rate": 7.59271746187389e-06, "loss": 0.3688, "step": 7661 }, { "epoch": 0.346775288526816, "grad_norm": 0.6931351805945015, "learning_rate": 7.59209074898563e-06, "loss": 0.4006, "step": 7662 }, { "epoch": 0.3468205476352116, "grad_norm": 0.6902782157655498, "learning_rate": 7.591463980401302e-06, "loss": 0.3619, "step": 7663 }, { "epoch": 0.3468658067436072, "grad_norm": 0.6431808265068502, "learning_rate": 7.59083715613437e-06, "loss": 0.3402, "step": 7664 }, { "epoch": 0.3469110658520027, "grad_norm": 0.6268809460813694, "learning_rate": 7.590210276198305e-06, "loss": 0.3691, "step": 7665 }, { "epoch": 0.3469563249603983, "grad_norm": 0.5677407303711991, "learning_rate": 7.589583340606579e-06, "loss": 0.486, "step": 7666 }, { "epoch": 0.34700158406879383, "grad_norm": 0.6379834997740906, "learning_rate": 7.588956349372657e-06, "loss": 0.3523, "step": 7667 }, { "epoch": 0.3470468431771894, "grad_norm": 0.654715150775096, "learning_rate": 7.588329302510017e-06, "loss": 0.3662, "step": 7668 }, { "epoch": 0.34709210228558496, "grad_norm": 0.6607985524070082, "learning_rate": 7.5877022000321285e-06, "loss": 0.3515, "step": 7669 }, { "epoch": 0.34713736139398055, "grad_norm": 1.0173624912637775, "learning_rate": 7.5870750419524675e-06, "loss": 0.3463, "step": 7670 }, { "epoch": 0.3471826205023761, "grad_norm": 0.6332869161692951, "learning_rate": 7.586447828284509e-06, "loss": 0.3976, "step": 7671 }, { "epoch": 0.3472278796107717, "grad_norm": 0.64736306872132, "learning_rate": 7.58582055904173e-06, "loss": 0.3406, "step": 7672 }, { "epoch": 0.3472731387191672, "grad_norm": 0.6573305867265749, "learning_rate": 7.585193234237611e-06, "loss": 0.4192, "step": 7673 }, { "epoch": 0.3473183978275628, "grad_norm": 0.635163449801587, "learning_rate": 7.584565853885627e-06, "loss": 0.3202, "step": 7674 }, { "epoch": 0.3473636569359584, "grad_norm": 0.6196122097101256, "learning_rate": 7.583938417999261e-06, "loss": 0.3622, "step": 7675 }, { "epoch": 0.3474089160443539, "grad_norm": 0.6688456245347942, "learning_rate": 7.5833109265919955e-06, "loss": 0.3226, "step": 7676 }, { "epoch": 0.3474541751527495, "grad_norm": 0.6659623665378279, "learning_rate": 7.5826833796773115e-06, "loss": 0.3486, "step": 7677 }, { "epoch": 0.34749943426114505, "grad_norm": 0.6305891165742237, "learning_rate": 7.582055777268693e-06, "loss": 0.3452, "step": 7678 }, { "epoch": 0.34754469336954064, "grad_norm": 0.6338385838715717, "learning_rate": 7.581428119379628e-06, "loss": 0.3282, "step": 7679 }, { "epoch": 0.34758995247793617, "grad_norm": 0.6191310672713078, "learning_rate": 7.5808004060235995e-06, "loss": 0.3418, "step": 7680 }, { "epoch": 0.34763521158633176, "grad_norm": 0.48046456954070565, "learning_rate": 7.580172637214098e-06, "loss": 0.4705, "step": 7681 }, { "epoch": 0.3476804706947273, "grad_norm": 0.6075556600854566, "learning_rate": 7.57954481296461e-06, "loss": 0.347, "step": 7682 }, { "epoch": 0.3477257298031229, "grad_norm": 0.7261897682432522, "learning_rate": 7.5789169332886255e-06, "loss": 0.3466, "step": 7683 }, { "epoch": 0.3477709889115184, "grad_norm": 0.743757384841053, "learning_rate": 7.578288998199638e-06, "loss": 0.3595, "step": 7684 }, { "epoch": 0.347816248019914, "grad_norm": 0.6174087207637071, "learning_rate": 7.5776610077111375e-06, "loss": 0.3439, "step": 7685 }, { "epoch": 0.3478615071283096, "grad_norm": 0.7420086401146211, "learning_rate": 7.577032961836619e-06, "loss": 0.3457, "step": 7686 }, { "epoch": 0.34790676623670513, "grad_norm": 0.6720111974672807, "learning_rate": 7.576404860589579e-06, "loss": 0.3644, "step": 7687 }, { "epoch": 0.3479520253451007, "grad_norm": 0.39524473437736063, "learning_rate": 7.575776703983508e-06, "loss": 0.5039, "step": 7688 }, { "epoch": 0.34799728445349626, "grad_norm": 0.6664363625429858, "learning_rate": 7.575148492031908e-06, "loss": 0.3552, "step": 7689 }, { "epoch": 0.34804254356189185, "grad_norm": 0.7055820427991333, "learning_rate": 7.574520224748276e-06, "loss": 0.4008, "step": 7690 }, { "epoch": 0.3480878026702874, "grad_norm": 0.8020328548076989, "learning_rate": 7.573891902146111e-06, "loss": 0.3893, "step": 7691 }, { "epoch": 0.348133061778683, "grad_norm": 0.3070808301664086, "learning_rate": 7.573263524238914e-06, "loss": 0.4878, "step": 7692 }, { "epoch": 0.3481783208870785, "grad_norm": 0.653831880465432, "learning_rate": 7.572635091040188e-06, "loss": 0.3793, "step": 7693 }, { "epoch": 0.3482235799954741, "grad_norm": 0.5919767279825535, "learning_rate": 7.572006602563434e-06, "loss": 0.3251, "step": 7694 }, { "epoch": 0.34826883910386963, "grad_norm": 0.6968740648600568, "learning_rate": 7.571378058822159e-06, "loss": 0.3583, "step": 7695 }, { "epoch": 0.3483140982122652, "grad_norm": 0.6407502883602284, "learning_rate": 7.570749459829865e-06, "loss": 0.4116, "step": 7696 }, { "epoch": 0.34835935732066076, "grad_norm": 0.32059540456082625, "learning_rate": 7.570120805600063e-06, "loss": 0.4956, "step": 7697 }, { "epoch": 0.34840461642905635, "grad_norm": 0.3229611926125327, "learning_rate": 7.569492096146256e-06, "loss": 0.5189, "step": 7698 }, { "epoch": 0.34844987553745194, "grad_norm": 1.1869411507300307, "learning_rate": 7.568863331481957e-06, "loss": 0.3237, "step": 7699 }, { "epoch": 0.34849513464584747, "grad_norm": 0.650355888585891, "learning_rate": 7.568234511620674e-06, "loss": 0.3359, "step": 7700 }, { "epoch": 0.34854039375424306, "grad_norm": 0.32701937546089865, "learning_rate": 7.567605636575919e-06, "loss": 0.4972, "step": 7701 }, { "epoch": 0.3485856528626386, "grad_norm": 0.6480347956638689, "learning_rate": 7.566976706361204e-06, "loss": 0.3598, "step": 7702 }, { "epoch": 0.3486309119710342, "grad_norm": 0.670768338920183, "learning_rate": 7.566347720990044e-06, "loss": 0.3799, "step": 7703 }, { "epoch": 0.3486761710794297, "grad_norm": 0.614859753339743, "learning_rate": 7.565718680475953e-06, "loss": 0.339, "step": 7704 }, { "epoch": 0.3487214301878253, "grad_norm": 0.8436747082401596, "learning_rate": 7.565089584832448e-06, "loss": 0.3403, "step": 7705 }, { "epoch": 0.34876668929622084, "grad_norm": 0.6671309781608522, "learning_rate": 7.564460434073047e-06, "loss": 0.3566, "step": 7706 }, { "epoch": 0.34881194840461643, "grad_norm": 0.7655776253067961, "learning_rate": 7.563831228211266e-06, "loss": 0.3943, "step": 7707 }, { "epoch": 0.34885720751301197, "grad_norm": 0.6443313925396289, "learning_rate": 7.563201967260627e-06, "loss": 0.3185, "step": 7708 }, { "epoch": 0.34890246662140756, "grad_norm": 0.6727203672910881, "learning_rate": 7.562572651234649e-06, "loss": 0.3712, "step": 7709 }, { "epoch": 0.34894772572980315, "grad_norm": 0.6965609685868654, "learning_rate": 7.561943280146856e-06, "loss": 0.3689, "step": 7710 }, { "epoch": 0.3489929848381987, "grad_norm": 0.6877882531696877, "learning_rate": 7.56131385401077e-06, "loss": 0.354, "step": 7711 }, { "epoch": 0.3490382439465943, "grad_norm": 0.6516043811205681, "learning_rate": 7.560684372839915e-06, "loss": 0.3654, "step": 7712 }, { "epoch": 0.3490835030549898, "grad_norm": 0.6131498119539656, "learning_rate": 7.560054836647819e-06, "loss": 0.3211, "step": 7713 }, { "epoch": 0.3491287621633854, "grad_norm": 0.5903973295467017, "learning_rate": 7.559425245448006e-06, "loss": 0.3683, "step": 7714 }, { "epoch": 0.34917402127178093, "grad_norm": 0.7211704582858878, "learning_rate": 7.558795599254005e-06, "loss": 0.3272, "step": 7715 }, { "epoch": 0.3492192803801765, "grad_norm": 0.6511390209664222, "learning_rate": 7.558165898079346e-06, "loss": 0.3561, "step": 7716 }, { "epoch": 0.34926453948857206, "grad_norm": 0.6037263667339409, "learning_rate": 7.5575361419375585e-06, "loss": 0.3496, "step": 7717 }, { "epoch": 0.34930979859696765, "grad_norm": 0.6542931110891881, "learning_rate": 7.556906330842174e-06, "loss": 0.3612, "step": 7718 }, { "epoch": 0.3493550577053632, "grad_norm": 0.6544269421961227, "learning_rate": 7.556276464806725e-06, "loss": 0.3447, "step": 7719 }, { "epoch": 0.34940031681375877, "grad_norm": 0.6179559855338046, "learning_rate": 7.555646543844747e-06, "loss": 0.352, "step": 7720 }, { "epoch": 0.34944557592215436, "grad_norm": 0.3694286494252386, "learning_rate": 7.555016567969773e-06, "loss": 0.4923, "step": 7721 }, { "epoch": 0.3494908350305499, "grad_norm": 0.3571334190934288, "learning_rate": 7.554386537195339e-06, "loss": 0.4915, "step": 7722 }, { "epoch": 0.3495360941389455, "grad_norm": 0.639870037309397, "learning_rate": 7.553756451534984e-06, "loss": 0.3545, "step": 7723 }, { "epoch": 0.349581353247341, "grad_norm": 0.6321033788590497, "learning_rate": 7.553126311002248e-06, "loss": 0.3035, "step": 7724 }, { "epoch": 0.3496266123557366, "grad_norm": 0.725958941617707, "learning_rate": 7.552496115610668e-06, "loss": 0.3517, "step": 7725 }, { "epoch": 0.34967187146413214, "grad_norm": 0.35637223581074784, "learning_rate": 7.5518658653737844e-06, "loss": 0.5128, "step": 7726 }, { "epoch": 0.34971713057252773, "grad_norm": 0.6564009265796242, "learning_rate": 7.551235560305142e-06, "loss": 0.368, "step": 7727 }, { "epoch": 0.34976238968092327, "grad_norm": 0.6910617140704731, "learning_rate": 7.550605200418283e-06, "loss": 0.3633, "step": 7728 }, { "epoch": 0.34980764878931886, "grad_norm": 0.7162155865789955, "learning_rate": 7.549974785726753e-06, "loss": 0.4214, "step": 7729 }, { "epoch": 0.3498529078977144, "grad_norm": 0.6295258356979103, "learning_rate": 7.549344316244094e-06, "loss": 0.3669, "step": 7730 }, { "epoch": 0.34989816700611, "grad_norm": 0.8504213878378942, "learning_rate": 7.548713791983857e-06, "loss": 0.3781, "step": 7731 }, { "epoch": 0.3499434261145055, "grad_norm": 0.6899801762438271, "learning_rate": 7.548083212959588e-06, "loss": 0.3408, "step": 7732 }, { "epoch": 0.3499886852229011, "grad_norm": 0.6768212134505426, "learning_rate": 7.547452579184836e-06, "loss": 0.3435, "step": 7733 }, { "epoch": 0.3500339443312967, "grad_norm": 0.6615478961791598, "learning_rate": 7.546821890673153e-06, "loss": 0.3902, "step": 7734 }, { "epoch": 0.35007920343969223, "grad_norm": 0.6393455474883397, "learning_rate": 7.546191147438089e-06, "loss": 0.3873, "step": 7735 }, { "epoch": 0.3501244625480878, "grad_norm": 0.6611638513107325, "learning_rate": 7.545560349493197e-06, "loss": 0.3244, "step": 7736 }, { "epoch": 0.35016972165648336, "grad_norm": 0.6593511958016057, "learning_rate": 7.544929496852033e-06, "loss": 0.37, "step": 7737 }, { "epoch": 0.35021498076487895, "grad_norm": 0.9726619848240113, "learning_rate": 7.544298589528148e-06, "loss": 0.346, "step": 7738 }, { "epoch": 0.3502602398732745, "grad_norm": 0.521846771020006, "learning_rate": 7.5436676275351e-06, "loss": 0.4948, "step": 7739 }, { "epoch": 0.35030549898167007, "grad_norm": 0.6224165394621645, "learning_rate": 7.54303661088645e-06, "loss": 0.3474, "step": 7740 }, { "epoch": 0.3503507580900656, "grad_norm": 0.6603027913044086, "learning_rate": 7.542405539595752e-06, "loss": 0.3299, "step": 7741 }, { "epoch": 0.3503960171984612, "grad_norm": 0.6366304885536189, "learning_rate": 7.541774413676566e-06, "loss": 0.3338, "step": 7742 }, { "epoch": 0.35044127630685673, "grad_norm": 0.623869279581973, "learning_rate": 7.541143233142456e-06, "loss": 0.3731, "step": 7743 }, { "epoch": 0.3504865354152523, "grad_norm": 0.6733659163360821, "learning_rate": 7.540511998006982e-06, "loss": 0.3644, "step": 7744 }, { "epoch": 0.3505317945236479, "grad_norm": 0.8206727756834944, "learning_rate": 7.539880708283709e-06, "loss": 0.3503, "step": 7745 }, { "epoch": 0.35057705363204344, "grad_norm": 0.5872189014995534, "learning_rate": 7.539249363986196e-06, "loss": 0.3412, "step": 7746 }, { "epoch": 0.35062231274043903, "grad_norm": 0.7261999016080413, "learning_rate": 7.538617965128018e-06, "loss": 0.3779, "step": 7747 }, { "epoch": 0.35066757184883457, "grad_norm": 0.6220194173842661, "learning_rate": 7.537986511722732e-06, "loss": 0.3461, "step": 7748 }, { "epoch": 0.35071283095723016, "grad_norm": 0.6140523177540189, "learning_rate": 7.537355003783915e-06, "loss": 0.3505, "step": 7749 }, { "epoch": 0.3507580900656257, "grad_norm": 0.4861428500111197, "learning_rate": 7.53672344132513e-06, "loss": 0.497, "step": 7750 }, { "epoch": 0.3508033491740213, "grad_norm": 0.752805323197179, "learning_rate": 7.53609182435995e-06, "loss": 0.3759, "step": 7751 }, { "epoch": 0.3508486082824168, "grad_norm": 0.6070374149228687, "learning_rate": 7.535460152901945e-06, "loss": 0.3968, "step": 7752 }, { "epoch": 0.3508938673908124, "grad_norm": 0.6517684478502604, "learning_rate": 7.534828426964687e-06, "loss": 0.3702, "step": 7753 }, { "epoch": 0.35093912649920794, "grad_norm": 0.633839696078463, "learning_rate": 7.534196646561754e-06, "loss": 0.3961, "step": 7754 }, { "epoch": 0.35098438560760353, "grad_norm": 0.34285147538363264, "learning_rate": 7.533564811706715e-06, "loss": 0.5144, "step": 7755 }, { "epoch": 0.35102964471599907, "grad_norm": 0.5787212050900984, "learning_rate": 7.532932922413152e-06, "loss": 0.3658, "step": 7756 }, { "epoch": 0.35107490382439466, "grad_norm": 0.6087775891664018, "learning_rate": 7.532300978694639e-06, "loss": 0.3936, "step": 7757 }, { "epoch": 0.35112016293279025, "grad_norm": 0.3209188592804357, "learning_rate": 7.531668980564757e-06, "loss": 0.4857, "step": 7758 }, { "epoch": 0.3511654220411858, "grad_norm": 0.6416155319043709, "learning_rate": 7.531036928037081e-06, "loss": 0.3587, "step": 7759 }, { "epoch": 0.35121068114958137, "grad_norm": 0.29564737935085245, "learning_rate": 7.530404821125197e-06, "loss": 0.4763, "step": 7760 }, { "epoch": 0.3512559402579769, "grad_norm": 0.6204602551572096, "learning_rate": 7.529772659842685e-06, "loss": 0.3247, "step": 7761 }, { "epoch": 0.3513011993663725, "grad_norm": 0.7110343797456145, "learning_rate": 7.529140444203127e-06, "loss": 0.3382, "step": 7762 }, { "epoch": 0.35134645847476803, "grad_norm": 0.5993280775274655, "learning_rate": 7.5285081742201085e-06, "loss": 0.3661, "step": 7763 }, { "epoch": 0.3513917175831636, "grad_norm": 0.6200060790127362, "learning_rate": 7.527875849907216e-06, "loss": 0.3864, "step": 7764 }, { "epoch": 0.35143697669155916, "grad_norm": 0.5925761739408619, "learning_rate": 7.527243471278034e-06, "loss": 0.3635, "step": 7765 }, { "epoch": 0.35148223579995475, "grad_norm": 0.7199944515738541, "learning_rate": 7.526611038346153e-06, "loss": 0.3621, "step": 7766 }, { "epoch": 0.3515274949083503, "grad_norm": 0.6114599544378431, "learning_rate": 7.5259785511251595e-06, "loss": 0.3113, "step": 7767 }, { "epoch": 0.35157275401674587, "grad_norm": 0.3694562932467032, "learning_rate": 7.525346009628647e-06, "loss": 0.4663, "step": 7768 }, { "epoch": 0.35161801312514146, "grad_norm": 0.6326146960746679, "learning_rate": 7.524713413870201e-06, "loss": 0.3314, "step": 7769 }, { "epoch": 0.351663272233537, "grad_norm": 0.6443492767754994, "learning_rate": 7.524080763863422e-06, "loss": 0.3561, "step": 7770 }, { "epoch": 0.3517085313419326, "grad_norm": 0.6459826223012621, "learning_rate": 7.5234480596218965e-06, "loss": 0.341, "step": 7771 }, { "epoch": 0.3517537904503281, "grad_norm": 0.6059566286637352, "learning_rate": 7.522815301159223e-06, "loss": 0.3372, "step": 7772 }, { "epoch": 0.3517990495587237, "grad_norm": 0.629434666765441, "learning_rate": 7.522182488488999e-06, "loss": 0.3587, "step": 7773 }, { "epoch": 0.35184430866711924, "grad_norm": 0.6345649812348073, "learning_rate": 7.5215496216248175e-06, "loss": 0.3562, "step": 7774 }, { "epoch": 0.35188956777551483, "grad_norm": 0.34756610246884706, "learning_rate": 7.520916700580279e-06, "loss": 0.4792, "step": 7775 }, { "epoch": 0.35193482688391037, "grad_norm": 0.6288100576988347, "learning_rate": 7.5202837253689845e-06, "loss": 0.3657, "step": 7776 }, { "epoch": 0.35198008599230596, "grad_norm": 0.6067182425724056, "learning_rate": 7.51965069600453e-06, "loss": 0.3395, "step": 7777 }, { "epoch": 0.3520253451007015, "grad_norm": 0.6321301031695498, "learning_rate": 7.519017612500524e-06, "loss": 0.3289, "step": 7778 }, { "epoch": 0.3520706042090971, "grad_norm": 0.6059871782734803, "learning_rate": 7.5183844748705645e-06, "loss": 0.3107, "step": 7779 }, { "epoch": 0.35211586331749267, "grad_norm": 0.6111636119938987, "learning_rate": 7.517751283128258e-06, "loss": 0.3416, "step": 7780 }, { "epoch": 0.3521611224258882, "grad_norm": 0.6488860545994534, "learning_rate": 7.517118037287207e-06, "loss": 0.3623, "step": 7781 }, { "epoch": 0.3522063815342838, "grad_norm": 1.131189321473581, "learning_rate": 7.516484737361023e-06, "loss": 0.358, "step": 7782 }, { "epoch": 0.35225164064267933, "grad_norm": 1.2056760182043709, "learning_rate": 7.515851383363309e-06, "loss": 0.3434, "step": 7783 }, { "epoch": 0.3522968997510749, "grad_norm": 0.6553290062431891, "learning_rate": 7.515217975307677e-06, "loss": 0.3283, "step": 7784 }, { "epoch": 0.35234215885947046, "grad_norm": 0.6224771498224234, "learning_rate": 7.514584513207734e-06, "loss": 0.3355, "step": 7785 }, { "epoch": 0.35238741796786605, "grad_norm": 0.40038324245671597, "learning_rate": 7.513950997077094e-06, "loss": 0.5041, "step": 7786 }, { "epoch": 0.3524326770762616, "grad_norm": 0.626659752232271, "learning_rate": 7.513317426929369e-06, "loss": 0.3412, "step": 7787 }, { "epoch": 0.35247793618465717, "grad_norm": 0.3044550549445809, "learning_rate": 7.512683802778169e-06, "loss": 0.5069, "step": 7788 }, { "epoch": 0.3525231952930527, "grad_norm": 0.719800344359679, "learning_rate": 7.512050124637114e-06, "loss": 0.3139, "step": 7789 }, { "epoch": 0.3525684544014483, "grad_norm": 0.3408857775887603, "learning_rate": 7.511416392519815e-06, "loss": 0.4754, "step": 7790 }, { "epoch": 0.35261371350984383, "grad_norm": 0.6812537517058064, "learning_rate": 7.51078260643989e-06, "loss": 0.3829, "step": 7791 }, { "epoch": 0.3526589726182394, "grad_norm": 0.703618849968868, "learning_rate": 7.5101487664109605e-06, "loss": 0.36, "step": 7792 }, { "epoch": 0.352704231726635, "grad_norm": 0.7611903498969986, "learning_rate": 7.509514872446642e-06, "loss": 0.3385, "step": 7793 }, { "epoch": 0.35274949083503054, "grad_norm": 0.3892495760709264, "learning_rate": 7.5088809245605555e-06, "loss": 0.4742, "step": 7794 }, { "epoch": 0.35279474994342613, "grad_norm": 0.3789789286921561, "learning_rate": 7.508246922766326e-06, "loss": 0.4939, "step": 7795 }, { "epoch": 0.35284000905182167, "grad_norm": 0.6595053522544354, "learning_rate": 7.507612867077571e-06, "loss": 0.3883, "step": 7796 }, { "epoch": 0.35288526816021726, "grad_norm": 0.2995636894582247, "learning_rate": 7.506978757507919e-06, "loss": 0.4853, "step": 7797 }, { "epoch": 0.3529305272686128, "grad_norm": 0.6309037212275371, "learning_rate": 7.506344594070991e-06, "loss": 0.3525, "step": 7798 }, { "epoch": 0.3529757863770084, "grad_norm": 0.6936880134731889, "learning_rate": 7.5057103767804175e-06, "loss": 0.3816, "step": 7799 }, { "epoch": 0.3530210454854039, "grad_norm": 0.630733616335528, "learning_rate": 7.505076105649822e-06, "loss": 0.3118, "step": 7800 }, { "epoch": 0.3530663045937995, "grad_norm": 0.6846633350976611, "learning_rate": 7.504441780692836e-06, "loss": 0.4105, "step": 7801 }, { "epoch": 0.35311156370219504, "grad_norm": 0.6526336163178629, "learning_rate": 7.5038074019230865e-06, "loss": 0.4859, "step": 7802 }, { "epoch": 0.35315682281059063, "grad_norm": 0.6354983121815554, "learning_rate": 7.503172969354206e-06, "loss": 0.3362, "step": 7803 }, { "epoch": 0.3532020819189862, "grad_norm": 0.616840523677705, "learning_rate": 7.502538482999829e-06, "loss": 0.3745, "step": 7804 }, { "epoch": 0.35324734102738176, "grad_norm": 0.6277516775555112, "learning_rate": 7.501903942873584e-06, "loss": 0.3786, "step": 7805 }, { "epoch": 0.35329260013577735, "grad_norm": 0.6373393336769787, "learning_rate": 7.5012693489891065e-06, "loss": 0.3646, "step": 7806 }, { "epoch": 0.3533378592441729, "grad_norm": 0.6169337950282993, "learning_rate": 7.500634701360034e-06, "loss": 0.332, "step": 7807 }, { "epoch": 0.35338311835256847, "grad_norm": 0.3384868150450684, "learning_rate": 7.500000000000001e-06, "loss": 0.5114, "step": 7808 }, { "epoch": 0.353428377460964, "grad_norm": 0.6187640351076604, "learning_rate": 7.499365244922646e-06, "loss": 0.3781, "step": 7809 }, { "epoch": 0.3534736365693596, "grad_norm": 0.6555446380655447, "learning_rate": 7.498730436141609e-06, "loss": 0.3588, "step": 7810 }, { "epoch": 0.35351889567775513, "grad_norm": 0.6239720714131426, "learning_rate": 7.498095573670528e-06, "loss": 0.3885, "step": 7811 }, { "epoch": 0.3535641547861507, "grad_norm": 0.6362622564026761, "learning_rate": 7.497460657523047e-06, "loss": 0.391, "step": 7812 }, { "epoch": 0.35360941389454625, "grad_norm": 0.7405890819555525, "learning_rate": 7.496825687712805e-06, "loss": 0.3378, "step": 7813 }, { "epoch": 0.35365467300294184, "grad_norm": 0.6959052316973275, "learning_rate": 7.496190664253449e-06, "loss": 0.3531, "step": 7814 }, { "epoch": 0.35369993211133743, "grad_norm": 0.6077571764611213, "learning_rate": 7.495555587158622e-06, "loss": 0.3421, "step": 7815 }, { "epoch": 0.35374519121973297, "grad_norm": 0.6469514940955886, "learning_rate": 7.49492045644197e-06, "loss": 0.3424, "step": 7816 }, { "epoch": 0.35379045032812856, "grad_norm": 0.3811569747548171, "learning_rate": 7.494285272117139e-06, "loss": 0.4962, "step": 7817 }, { "epoch": 0.3538357094365241, "grad_norm": 0.3295220610717146, "learning_rate": 7.493650034197779e-06, "loss": 0.5015, "step": 7818 }, { "epoch": 0.3538809685449197, "grad_norm": 0.6595865740341526, "learning_rate": 7.493014742697537e-06, "loss": 0.3612, "step": 7819 }, { "epoch": 0.3539262276533152, "grad_norm": 0.2732336585442226, "learning_rate": 7.4923793976300665e-06, "loss": 0.4877, "step": 7820 }, { "epoch": 0.3539714867617108, "grad_norm": 0.797829661466962, "learning_rate": 7.4917439990090165e-06, "loss": 0.3558, "step": 7821 }, { "epoch": 0.35401674587010634, "grad_norm": 0.6327197220334714, "learning_rate": 7.491108546848041e-06, "loss": 0.3307, "step": 7822 }, { "epoch": 0.35406200497850193, "grad_norm": 0.4456977492630711, "learning_rate": 7.490473041160794e-06, "loss": 0.4877, "step": 7823 }, { "epoch": 0.35410726408689747, "grad_norm": 0.37679343862054404, "learning_rate": 7.489837481960931e-06, "loss": 0.512, "step": 7824 }, { "epoch": 0.35415252319529306, "grad_norm": 0.688862193477084, "learning_rate": 7.489201869262106e-06, "loss": 0.3437, "step": 7825 }, { "epoch": 0.3541977823036886, "grad_norm": 0.612011506777598, "learning_rate": 7.48856620307798e-06, "loss": 0.3613, "step": 7826 }, { "epoch": 0.3542430414120842, "grad_norm": 0.6547800401174696, "learning_rate": 7.487930483422206e-06, "loss": 0.3514, "step": 7827 }, { "epoch": 0.35428830052047977, "grad_norm": 0.7017144490010115, "learning_rate": 7.4872947103084495e-06, "loss": 0.3471, "step": 7828 }, { "epoch": 0.3543335596288753, "grad_norm": 0.6039843313631512, "learning_rate": 7.4866588837503686e-06, "loss": 0.3258, "step": 7829 }, { "epoch": 0.3543788187372709, "grad_norm": 0.6856001322498695, "learning_rate": 7.486023003761625e-06, "loss": 0.3539, "step": 7830 }, { "epoch": 0.35442407784566643, "grad_norm": 0.6239377737887924, "learning_rate": 7.48538707035588e-06, "loss": 0.3605, "step": 7831 }, { "epoch": 0.354469336954062, "grad_norm": 0.7392468682747669, "learning_rate": 7.484751083546804e-06, "loss": 0.3635, "step": 7832 }, { "epoch": 0.35451459606245755, "grad_norm": 0.6336965745586026, "learning_rate": 7.484115043348056e-06, "loss": 0.349, "step": 7833 }, { "epoch": 0.35455985517085314, "grad_norm": 0.6547582868007384, "learning_rate": 7.4834789497733065e-06, "loss": 0.4627, "step": 7834 }, { "epoch": 0.3546051142792487, "grad_norm": 0.4528387555975036, "learning_rate": 7.482842802836221e-06, "loss": 0.4812, "step": 7835 }, { "epoch": 0.35465037338764427, "grad_norm": 0.6591548404559565, "learning_rate": 7.482206602550469e-06, "loss": 0.4041, "step": 7836 }, { "epoch": 0.3546956324960398, "grad_norm": 0.8262674444288512, "learning_rate": 7.481570348929722e-06, "loss": 0.3721, "step": 7837 }, { "epoch": 0.3547408916044354, "grad_norm": 0.7090929955067138, "learning_rate": 7.480934041987649e-06, "loss": 0.3699, "step": 7838 }, { "epoch": 0.354786150712831, "grad_norm": 1.111779718586593, "learning_rate": 7.480297681737922e-06, "loss": 0.3902, "step": 7839 }, { "epoch": 0.3548314098212265, "grad_norm": 0.6493248511249113, "learning_rate": 7.479661268194217e-06, "loss": 0.3197, "step": 7840 }, { "epoch": 0.3548766689296221, "grad_norm": 0.6947138559490463, "learning_rate": 7.479024801370206e-06, "loss": 0.3554, "step": 7841 }, { "epoch": 0.35492192803801764, "grad_norm": 0.750127064883699, "learning_rate": 7.478388281279566e-06, "loss": 0.3553, "step": 7842 }, { "epoch": 0.35496718714641323, "grad_norm": 0.6068588275400576, "learning_rate": 7.477751707935974e-06, "loss": 0.3219, "step": 7843 }, { "epoch": 0.35501244625480877, "grad_norm": 0.6667280699696232, "learning_rate": 7.477115081353107e-06, "loss": 0.3476, "step": 7844 }, { "epoch": 0.35505770536320436, "grad_norm": 0.6720148038383784, "learning_rate": 7.476478401544647e-06, "loss": 0.3364, "step": 7845 }, { "epoch": 0.3551029644715999, "grad_norm": 0.6749944921167566, "learning_rate": 7.475841668524268e-06, "loss": 0.3578, "step": 7846 }, { "epoch": 0.3551482235799955, "grad_norm": 0.6546624428916041, "learning_rate": 7.475204882305659e-06, "loss": 0.3407, "step": 7847 }, { "epoch": 0.355193482688391, "grad_norm": 0.6187047516268036, "learning_rate": 7.474568042902497e-06, "loss": 0.3473, "step": 7848 }, { "epoch": 0.3552387417967866, "grad_norm": 0.6449786289711059, "learning_rate": 7.4739311503284695e-06, "loss": 0.3457, "step": 7849 }, { "epoch": 0.35528400090518214, "grad_norm": 0.6612723807417603, "learning_rate": 7.473294204597259e-06, "loss": 0.3385, "step": 7850 }, { "epoch": 0.35532926001357773, "grad_norm": 0.6023672840038461, "learning_rate": 7.472657205722551e-06, "loss": 0.3679, "step": 7851 }, { "epoch": 0.3553745191219733, "grad_norm": 0.682869276494719, "learning_rate": 7.472020153718036e-06, "loss": 0.3502, "step": 7852 }, { "epoch": 0.35541977823036885, "grad_norm": 0.658596181793742, "learning_rate": 7.471383048597399e-06, "loss": 0.3598, "step": 7853 }, { "epoch": 0.35546503733876444, "grad_norm": 1.4779270416964452, "learning_rate": 7.47074589037433e-06, "loss": 0.502, "step": 7854 }, { "epoch": 0.35551029644716, "grad_norm": 0.6974218981775188, "learning_rate": 7.470108679062521e-06, "loss": 0.3551, "step": 7855 }, { "epoch": 0.35555555555555557, "grad_norm": 0.7470662270954693, "learning_rate": 7.469471414675662e-06, "loss": 0.3752, "step": 7856 }, { "epoch": 0.3556008146639511, "grad_norm": 0.698352138931862, "learning_rate": 7.468834097227448e-06, "loss": 0.3742, "step": 7857 }, { "epoch": 0.3556460737723467, "grad_norm": 0.70489735180748, "learning_rate": 7.4681967267315715e-06, "loss": 0.3402, "step": 7858 }, { "epoch": 0.35569133288074223, "grad_norm": 0.6708085387779145, "learning_rate": 7.4675593032017266e-06, "loss": 0.3232, "step": 7859 }, { "epoch": 0.3557365919891378, "grad_norm": 0.7004600236705987, "learning_rate": 7.466921826651612e-06, "loss": 0.3686, "step": 7860 }, { "epoch": 0.35578185109753335, "grad_norm": 0.6822899888012113, "learning_rate": 7.466284297094922e-06, "loss": 0.3684, "step": 7861 }, { "epoch": 0.35582711020592894, "grad_norm": 0.6431997953727273, "learning_rate": 7.46564671454536e-06, "loss": 0.3443, "step": 7862 }, { "epoch": 0.35587236931432453, "grad_norm": 0.6400738731668444, "learning_rate": 7.46500907901662e-06, "loss": 0.3652, "step": 7863 }, { "epoch": 0.35591762842272007, "grad_norm": 0.6476788329341486, "learning_rate": 7.4643713905224065e-06, "loss": 0.3405, "step": 7864 }, { "epoch": 0.35596288753111566, "grad_norm": 0.9223979899731618, "learning_rate": 7.463733649076421e-06, "loss": 0.4326, "step": 7865 }, { "epoch": 0.3560081466395112, "grad_norm": 0.6300366430679043, "learning_rate": 7.4630958546923674e-06, "loss": 0.3519, "step": 7866 }, { "epoch": 0.3560534057479068, "grad_norm": 1.2353016132302856, "learning_rate": 7.462458007383946e-06, "loss": 0.4952, "step": 7867 }, { "epoch": 0.3560986648563023, "grad_norm": 0.5951207777646207, "learning_rate": 7.461820107164867e-06, "loss": 0.3544, "step": 7868 }, { "epoch": 0.3561439239646979, "grad_norm": 1.5186979849658055, "learning_rate": 7.461182154048832e-06, "loss": 0.3552, "step": 7869 }, { "epoch": 0.35618918307309344, "grad_norm": 0.4881240184949437, "learning_rate": 7.460544148049555e-06, "loss": 0.492, "step": 7870 }, { "epoch": 0.35623444218148903, "grad_norm": 0.8171773576837302, "learning_rate": 7.45990608918074e-06, "loss": 0.3741, "step": 7871 }, { "epoch": 0.35627970128988456, "grad_norm": 0.7048751897975343, "learning_rate": 7.459267977456097e-06, "loss": 0.3668, "step": 7872 }, { "epoch": 0.35632496039828015, "grad_norm": 0.68364472816167, "learning_rate": 7.45862981288934e-06, "loss": 0.3963, "step": 7873 }, { "epoch": 0.35637021950667574, "grad_norm": 0.6353143543372414, "learning_rate": 7.457991595494178e-06, "loss": 0.3283, "step": 7874 }, { "epoch": 0.3564154786150713, "grad_norm": 0.7565528040727152, "learning_rate": 7.457353325284327e-06, "loss": 0.4874, "step": 7875 }, { "epoch": 0.35646073772346687, "grad_norm": 0.6582202067423059, "learning_rate": 7.4567150022735e-06, "loss": 0.381, "step": 7876 }, { "epoch": 0.3565059968318624, "grad_norm": 0.6378632328892668, "learning_rate": 7.45607662647541e-06, "loss": 0.3426, "step": 7877 }, { "epoch": 0.356551255940258, "grad_norm": 0.4406770262620411, "learning_rate": 7.45543819790378e-06, "loss": 0.476, "step": 7878 }, { "epoch": 0.35659651504865353, "grad_norm": 0.36194061351320284, "learning_rate": 7.454799716572324e-06, "loss": 0.496, "step": 7879 }, { "epoch": 0.3566417741570491, "grad_norm": 0.6696797075276905, "learning_rate": 7.45416118249476e-06, "loss": 0.3941, "step": 7880 }, { "epoch": 0.35668703326544465, "grad_norm": 0.4478173564882491, "learning_rate": 7.4535225956848115e-06, "loss": 0.4982, "step": 7881 }, { "epoch": 0.35673229237384024, "grad_norm": 0.6653358361375153, "learning_rate": 7.452883956156197e-06, "loss": 0.3999, "step": 7882 }, { "epoch": 0.3567775514822358, "grad_norm": 0.6890302334830477, "learning_rate": 7.452245263922638e-06, "loss": 0.3903, "step": 7883 }, { "epoch": 0.35682281059063137, "grad_norm": 0.6117361768377864, "learning_rate": 7.4516065189978625e-06, "loss": 0.3289, "step": 7884 }, { "epoch": 0.3568680696990269, "grad_norm": 0.6131998172217197, "learning_rate": 7.45096772139559e-06, "loss": 0.5051, "step": 7885 }, { "epoch": 0.3569133288074225, "grad_norm": 0.6483207055632356, "learning_rate": 7.450328871129551e-06, "loss": 0.4946, "step": 7886 }, { "epoch": 0.3569585879158181, "grad_norm": 0.7360880947446763, "learning_rate": 7.4496899682134684e-06, "loss": 0.3814, "step": 7887 }, { "epoch": 0.3570038470242136, "grad_norm": 0.4969670850818326, "learning_rate": 7.449051012661073e-06, "loss": 0.5084, "step": 7888 }, { "epoch": 0.3570491061326092, "grad_norm": 0.654029005722767, "learning_rate": 7.4484120044860915e-06, "loss": 0.3032, "step": 7889 }, { "epoch": 0.35709436524100474, "grad_norm": 0.7367063069317463, "learning_rate": 7.447772943702258e-06, "loss": 0.3829, "step": 7890 }, { "epoch": 0.35713962434940033, "grad_norm": 0.6245920443685149, "learning_rate": 7.4471338303233e-06, "loss": 0.3148, "step": 7891 }, { "epoch": 0.35718488345779587, "grad_norm": 0.6574094557932094, "learning_rate": 7.4464946643629535e-06, "loss": 0.3702, "step": 7892 }, { "epoch": 0.35723014256619146, "grad_norm": 0.5573694083411818, "learning_rate": 7.4458554458349485e-06, "loss": 0.344, "step": 7893 }, { "epoch": 0.357275401674587, "grad_norm": 0.6893382885904867, "learning_rate": 7.445216174753022e-06, "loss": 0.3853, "step": 7894 }, { "epoch": 0.3573206607829826, "grad_norm": 0.6443073771654781, "learning_rate": 7.444576851130911e-06, "loss": 0.3627, "step": 7895 }, { "epoch": 0.3573659198913781, "grad_norm": 0.659029696855723, "learning_rate": 7.443937474982351e-06, "loss": 0.3524, "step": 7896 }, { "epoch": 0.3574111789997737, "grad_norm": 0.6751787259300283, "learning_rate": 7.443298046321082e-06, "loss": 0.3294, "step": 7897 }, { "epoch": 0.3574564381081693, "grad_norm": 0.6531740656256767, "learning_rate": 7.442658565160838e-06, "loss": 0.3308, "step": 7898 }, { "epoch": 0.35750169721656483, "grad_norm": 0.7154670512286024, "learning_rate": 7.442019031515368e-06, "loss": 0.2847, "step": 7899 }, { "epoch": 0.3575469563249604, "grad_norm": 1.021117071184924, "learning_rate": 7.4413794453984065e-06, "loss": 0.4848, "step": 7900 }, { "epoch": 0.35759221543335595, "grad_norm": 0.880368866321953, "learning_rate": 7.4407398068237e-06, "loss": 0.4649, "step": 7901 }, { "epoch": 0.35763747454175154, "grad_norm": 0.6463014724398792, "learning_rate": 7.440100115804991e-06, "loss": 0.3355, "step": 7902 }, { "epoch": 0.3576827336501471, "grad_norm": 0.6454359331546151, "learning_rate": 7.439460372356025e-06, "loss": 0.3782, "step": 7903 }, { "epoch": 0.35772799275854267, "grad_norm": 0.6732821923908577, "learning_rate": 7.438820576490546e-06, "loss": 0.3673, "step": 7904 }, { "epoch": 0.3577732518669382, "grad_norm": 0.6885551892394901, "learning_rate": 7.438180728222306e-06, "loss": 0.3884, "step": 7905 }, { "epoch": 0.3578185109753338, "grad_norm": 1.0716877491064174, "learning_rate": 7.4375408275650475e-06, "loss": 0.4863, "step": 7906 }, { "epoch": 0.3578637700837293, "grad_norm": 0.6553793841227316, "learning_rate": 7.436900874532526e-06, "loss": 0.3715, "step": 7907 }, { "epoch": 0.3579090291921249, "grad_norm": 0.8814940863816225, "learning_rate": 7.436260869138486e-06, "loss": 0.5012, "step": 7908 }, { "epoch": 0.3579542883005205, "grad_norm": 0.6642419514151628, "learning_rate": 7.435620811396684e-06, "loss": 0.3533, "step": 7909 }, { "epoch": 0.35799954740891604, "grad_norm": 0.661127123804159, "learning_rate": 7.434980701320871e-06, "loss": 0.3352, "step": 7910 }, { "epoch": 0.35804480651731163, "grad_norm": 0.68747966185606, "learning_rate": 7.4343405389248e-06, "loss": 0.401, "step": 7911 }, { "epoch": 0.35809006562570717, "grad_norm": 0.6979271552290265, "learning_rate": 7.43370032422223e-06, "loss": 0.3478, "step": 7912 }, { "epoch": 0.35813532473410276, "grad_norm": 0.7081415200993405, "learning_rate": 7.433060057226913e-06, "loss": 0.3571, "step": 7913 }, { "epoch": 0.3581805838424983, "grad_norm": 0.6564714199182704, "learning_rate": 7.432419737952607e-06, "loss": 0.3477, "step": 7914 }, { "epoch": 0.3582258429508939, "grad_norm": 0.6726460485709904, "learning_rate": 7.431779366413073e-06, "loss": 0.3791, "step": 7915 }, { "epoch": 0.3582711020592894, "grad_norm": 1.088140205794908, "learning_rate": 7.431138942622069e-06, "loss": 0.5026, "step": 7916 }, { "epoch": 0.358316361167685, "grad_norm": 0.8023510194742443, "learning_rate": 7.430498466593355e-06, "loss": 0.5146, "step": 7917 }, { "epoch": 0.35836162027608054, "grad_norm": 0.6175256937878455, "learning_rate": 7.429857938340693e-06, "loss": 0.32, "step": 7918 }, { "epoch": 0.35840687938447613, "grad_norm": 0.6853174040711785, "learning_rate": 7.429217357877848e-06, "loss": 0.366, "step": 7919 }, { "epoch": 0.35845213849287166, "grad_norm": 0.5864777621038076, "learning_rate": 7.4285767252185824e-06, "loss": 0.4921, "step": 7920 }, { "epoch": 0.35849739760126725, "grad_norm": 0.7552732461077418, "learning_rate": 7.427936040376662e-06, "loss": 0.4547, "step": 7921 }, { "epoch": 0.35854265670966284, "grad_norm": 0.6815214328335388, "learning_rate": 7.427295303365851e-06, "loss": 0.3188, "step": 7922 }, { "epoch": 0.3585879158180584, "grad_norm": 0.6302372503519815, "learning_rate": 7.426654514199921e-06, "loss": 0.3368, "step": 7923 }, { "epoch": 0.35863317492645397, "grad_norm": 0.7116580236180317, "learning_rate": 7.426013672892639e-06, "loss": 0.379, "step": 7924 }, { "epoch": 0.3586784340348495, "grad_norm": 0.6324604196524789, "learning_rate": 7.425372779457771e-06, "loss": 0.3561, "step": 7925 }, { "epoch": 0.3587236931432451, "grad_norm": 0.9011199431306274, "learning_rate": 7.424731833909094e-06, "loss": 0.3985, "step": 7926 }, { "epoch": 0.3587689522516406, "grad_norm": 0.6233800357442847, "learning_rate": 7.4240908362603745e-06, "loss": 0.4937, "step": 7927 }, { "epoch": 0.3588142113600362, "grad_norm": 0.6580323715547287, "learning_rate": 7.423449786525391e-06, "loss": 0.3669, "step": 7928 }, { "epoch": 0.35885947046843175, "grad_norm": 0.6788612202436244, "learning_rate": 7.422808684717913e-06, "loss": 0.3993, "step": 7929 }, { "epoch": 0.35890472957682734, "grad_norm": 0.45794257757974255, "learning_rate": 7.422167530851716e-06, "loss": 0.4668, "step": 7930 }, { "epoch": 0.3589499886852229, "grad_norm": 0.7067797996645679, "learning_rate": 7.42152632494058e-06, "loss": 0.4117, "step": 7931 }, { "epoch": 0.35899524779361847, "grad_norm": 0.7374060859848149, "learning_rate": 7.42088506699828e-06, "loss": 0.4242, "step": 7932 }, { "epoch": 0.35904050690201406, "grad_norm": 0.6940096071197507, "learning_rate": 7.420243757038593e-06, "loss": 0.3541, "step": 7933 }, { "epoch": 0.3590857660104096, "grad_norm": 0.46216186246289315, "learning_rate": 7.419602395075304e-06, "loss": 0.479, "step": 7934 }, { "epoch": 0.3591310251188052, "grad_norm": 0.7182676372748836, "learning_rate": 7.418960981122188e-06, "loss": 0.3263, "step": 7935 }, { "epoch": 0.3591762842272007, "grad_norm": 0.6259753858694471, "learning_rate": 7.418319515193032e-06, "loss": 0.3706, "step": 7936 }, { "epoch": 0.3592215433355963, "grad_norm": 0.8776219507096461, "learning_rate": 7.4176779973016156e-06, "loss": 0.3708, "step": 7937 }, { "epoch": 0.35926680244399184, "grad_norm": 0.41646167675109397, "learning_rate": 7.417036427461726e-06, "loss": 0.4882, "step": 7938 }, { "epoch": 0.35931206155238743, "grad_norm": 0.7252858911467089, "learning_rate": 7.416394805687145e-06, "loss": 0.3602, "step": 7939 }, { "epoch": 0.35935732066078296, "grad_norm": 0.3726637247353118, "learning_rate": 7.415753131991661e-06, "loss": 0.4785, "step": 7940 }, { "epoch": 0.35940257976917855, "grad_norm": 0.740712850578873, "learning_rate": 7.415111406389063e-06, "loss": 0.3669, "step": 7941 }, { "epoch": 0.3594478388775741, "grad_norm": 0.6646504393206524, "learning_rate": 7.414469628893137e-06, "loss": 0.3762, "step": 7942 }, { "epoch": 0.3594930979859697, "grad_norm": 0.6356744766828022, "learning_rate": 7.413827799517674e-06, "loss": 0.3475, "step": 7943 }, { "epoch": 0.35953835709436527, "grad_norm": 0.607166637688898, "learning_rate": 7.413185918276467e-06, "loss": 0.3348, "step": 7944 }, { "epoch": 0.3595836162027608, "grad_norm": 0.7131483598239857, "learning_rate": 7.412543985183306e-06, "loss": 0.4069, "step": 7945 }, { "epoch": 0.3596288753111564, "grad_norm": 0.6299425375662295, "learning_rate": 7.411902000251983e-06, "loss": 0.3566, "step": 7946 }, { "epoch": 0.3596741344195519, "grad_norm": 0.5049556350096941, "learning_rate": 7.411259963496294e-06, "loss": 0.4626, "step": 7947 }, { "epoch": 0.3597193935279475, "grad_norm": 0.6506714341238237, "learning_rate": 7.410617874930034e-06, "loss": 0.3619, "step": 7948 }, { "epoch": 0.35976465263634305, "grad_norm": 0.639220736506604, "learning_rate": 7.409975734566998e-06, "loss": 0.323, "step": 7949 }, { "epoch": 0.35980991174473864, "grad_norm": 0.7206397484804806, "learning_rate": 7.4093335424209875e-06, "loss": 0.3704, "step": 7950 }, { "epoch": 0.3598551708531342, "grad_norm": 0.37597732159073716, "learning_rate": 7.4086912985057976e-06, "loss": 0.4924, "step": 7951 }, { "epoch": 0.35990042996152977, "grad_norm": 0.6455469760980388, "learning_rate": 7.40804900283523e-06, "loss": 0.3768, "step": 7952 }, { "epoch": 0.3599456890699253, "grad_norm": 0.6301467889728702, "learning_rate": 7.407406655423086e-06, "loss": 0.3262, "step": 7953 }, { "epoch": 0.3599909481783209, "grad_norm": 0.6135304956212745, "learning_rate": 7.4067642562831656e-06, "loss": 0.3356, "step": 7954 }, { "epoch": 0.3600362072867164, "grad_norm": 0.6199600583542145, "learning_rate": 7.406121805429274e-06, "loss": 0.3211, "step": 7955 }, { "epoch": 0.360081466395112, "grad_norm": 0.39504063291210206, "learning_rate": 7.405479302875212e-06, "loss": 0.4783, "step": 7956 }, { "epoch": 0.3601267255035076, "grad_norm": 0.6760457687153064, "learning_rate": 7.404836748634791e-06, "loss": 0.389, "step": 7957 }, { "epoch": 0.36017198461190314, "grad_norm": 0.6950822303958237, "learning_rate": 7.404194142721812e-06, "loss": 0.3426, "step": 7958 }, { "epoch": 0.36021724372029873, "grad_norm": 0.7108334017246865, "learning_rate": 7.403551485150086e-06, "loss": 0.345, "step": 7959 }, { "epoch": 0.36026250282869426, "grad_norm": 0.5868546699602407, "learning_rate": 7.402908775933419e-06, "loss": 0.3175, "step": 7960 }, { "epoch": 0.36030776193708985, "grad_norm": 0.34926700370057423, "learning_rate": 7.402266015085624e-06, "loss": 0.4923, "step": 7961 }, { "epoch": 0.3603530210454854, "grad_norm": 0.31677625195627207, "learning_rate": 7.401623202620509e-06, "loss": 0.4733, "step": 7962 }, { "epoch": 0.360398280153881, "grad_norm": 0.6652658900532104, "learning_rate": 7.40098033855189e-06, "loss": 0.3187, "step": 7963 }, { "epoch": 0.3604435392622765, "grad_norm": 0.3583699712794321, "learning_rate": 7.4003374228935746e-06, "loss": 0.5076, "step": 7964 }, { "epoch": 0.3604887983706721, "grad_norm": 0.6172428639056611, "learning_rate": 7.399694455659382e-06, "loss": 0.3531, "step": 7965 }, { "epoch": 0.36053405747906764, "grad_norm": 0.30345325260551814, "learning_rate": 7.399051436863125e-06, "loss": 0.4982, "step": 7966 }, { "epoch": 0.36057931658746323, "grad_norm": 0.7233199446249501, "learning_rate": 7.39840836651862e-06, "loss": 0.3837, "step": 7967 }, { "epoch": 0.3606245756958588, "grad_norm": 0.6763101826128397, "learning_rate": 7.3977652446396855e-06, "loss": 0.3595, "step": 7968 }, { "epoch": 0.36066983480425435, "grad_norm": 0.6170249152749456, "learning_rate": 7.397122071240141e-06, "loss": 0.3527, "step": 7969 }, { "epoch": 0.36071509391264994, "grad_norm": 0.5710552406604417, "learning_rate": 7.396478846333805e-06, "loss": 0.3615, "step": 7970 }, { "epoch": 0.3607603530210455, "grad_norm": 0.6389271121653773, "learning_rate": 7.395835569934498e-06, "loss": 0.3775, "step": 7971 }, { "epoch": 0.36080561212944107, "grad_norm": 0.6142088904895509, "learning_rate": 7.395192242056044e-06, "loss": 0.3772, "step": 7972 }, { "epoch": 0.3608508712378366, "grad_norm": 0.6090371660988895, "learning_rate": 7.394548862712264e-06, "loss": 0.3425, "step": 7973 }, { "epoch": 0.3608961303462322, "grad_norm": 0.35945651666517714, "learning_rate": 7.393905431916985e-06, "loss": 0.5164, "step": 7974 }, { "epoch": 0.3609413894546277, "grad_norm": 0.6718626629578323, "learning_rate": 7.393261949684027e-06, "loss": 0.3691, "step": 7975 }, { "epoch": 0.3609866485630233, "grad_norm": 0.32284190264535156, "learning_rate": 7.392618416027224e-06, "loss": 0.4721, "step": 7976 }, { "epoch": 0.36103190767141885, "grad_norm": 0.6600892140380253, "learning_rate": 7.3919748309603965e-06, "loss": 0.3766, "step": 7977 }, { "epoch": 0.36107716677981444, "grad_norm": 0.6506458136837409, "learning_rate": 7.391331194497379e-06, "loss": 0.352, "step": 7978 }, { "epoch": 0.36112242588821, "grad_norm": 0.6494080072148609, "learning_rate": 7.3906875066519964e-06, "loss": 0.3817, "step": 7979 }, { "epoch": 0.36116768499660556, "grad_norm": 0.6758242870450416, "learning_rate": 7.390043767438083e-06, "loss": 0.31, "step": 7980 }, { "epoch": 0.36121294410500115, "grad_norm": 0.6400939611532723, "learning_rate": 7.389399976869469e-06, "loss": 0.3563, "step": 7981 }, { "epoch": 0.3612582032133967, "grad_norm": 0.673003801643574, "learning_rate": 7.388756134959989e-06, "loss": 0.318, "step": 7982 }, { "epoch": 0.3613034623217923, "grad_norm": 0.609327398367172, "learning_rate": 7.388112241723475e-06, "loss": 0.3677, "step": 7983 }, { "epoch": 0.3613487214301878, "grad_norm": 0.6815401431992644, "learning_rate": 7.387468297173764e-06, "loss": 0.3236, "step": 7984 }, { "epoch": 0.3613939805385834, "grad_norm": 0.6689289187919621, "learning_rate": 7.386824301324691e-06, "loss": 0.4191, "step": 7985 }, { "epoch": 0.36143923964697894, "grad_norm": 0.6081946398697645, "learning_rate": 7.386180254190095e-06, "loss": 0.351, "step": 7986 }, { "epoch": 0.36148449875537453, "grad_norm": 0.7500202226223562, "learning_rate": 7.3855361557838145e-06, "loss": 0.3843, "step": 7987 }, { "epoch": 0.36152975786377006, "grad_norm": 0.6559071916954051, "learning_rate": 7.384892006119687e-06, "loss": 0.3757, "step": 7988 }, { "epoch": 0.36157501697216565, "grad_norm": 0.4872198828882111, "learning_rate": 7.384247805211556e-06, "loss": 0.4832, "step": 7989 }, { "epoch": 0.3616202760805612, "grad_norm": 0.3916520443560128, "learning_rate": 7.383603553073262e-06, "loss": 0.4856, "step": 7990 }, { "epoch": 0.3616655351889568, "grad_norm": 0.5976092297300865, "learning_rate": 7.382959249718648e-06, "loss": 0.3566, "step": 7991 }, { "epoch": 0.36171079429735237, "grad_norm": 0.67388547399459, "learning_rate": 7.3823148951615605e-06, "loss": 0.3494, "step": 7992 }, { "epoch": 0.3617560534057479, "grad_norm": 0.6173366485190657, "learning_rate": 7.38167048941584e-06, "loss": 0.3578, "step": 7993 }, { "epoch": 0.3618013125141435, "grad_norm": 0.5931781342085648, "learning_rate": 7.381026032495338e-06, "loss": 0.348, "step": 7994 }, { "epoch": 0.361846571622539, "grad_norm": 0.6809323889524895, "learning_rate": 7.3803815244138976e-06, "loss": 0.4096, "step": 7995 }, { "epoch": 0.3618918307309346, "grad_norm": 0.6209283680560373, "learning_rate": 7.379736965185369e-06, "loss": 0.3388, "step": 7996 }, { "epoch": 0.36193708983933015, "grad_norm": 0.6650202358554252, "learning_rate": 7.379092354823602e-06, "loss": 0.3515, "step": 7997 }, { "epoch": 0.36198234894772574, "grad_norm": 0.7646399480977066, "learning_rate": 7.378447693342447e-06, "loss": 0.4754, "step": 7998 }, { "epoch": 0.3620276080561213, "grad_norm": 0.6603043887038389, "learning_rate": 7.377802980755756e-06, "loss": 0.3863, "step": 7999 }, { "epoch": 0.36207286716451687, "grad_norm": 0.42204413689316006, "learning_rate": 7.377158217077381e-06, "loss": 0.5074, "step": 8000 }, { "epoch": 0.3621181262729124, "grad_norm": 0.32598493421669134, "learning_rate": 7.3765134023211785e-06, "loss": 0.499, "step": 8001 }, { "epoch": 0.362163385381308, "grad_norm": 0.6719469428185199, "learning_rate": 7.375868536501001e-06, "loss": 0.325, "step": 8002 }, { "epoch": 0.3622086444897036, "grad_norm": 0.6800561272443149, "learning_rate": 7.3752236196307045e-06, "loss": 0.3806, "step": 8003 }, { "epoch": 0.3622539035980991, "grad_norm": 0.6730755539353572, "learning_rate": 7.374578651724149e-06, "loss": 0.3323, "step": 8004 }, { "epoch": 0.3622991627064947, "grad_norm": 0.6339768447450999, "learning_rate": 7.373933632795192e-06, "loss": 0.354, "step": 8005 }, { "epoch": 0.36234442181489024, "grad_norm": 0.651900578435686, "learning_rate": 7.37328856285769e-06, "loss": 0.3718, "step": 8006 }, { "epoch": 0.36238968092328583, "grad_norm": 0.6331718571481463, "learning_rate": 7.372643441925508e-06, "loss": 0.3649, "step": 8007 }, { "epoch": 0.36243494003168136, "grad_norm": 0.6150645079124384, "learning_rate": 7.371998270012504e-06, "loss": 0.3652, "step": 8008 }, { "epoch": 0.36248019914007695, "grad_norm": 0.6634436052319046, "learning_rate": 7.371353047132542e-06, "loss": 0.3629, "step": 8009 }, { "epoch": 0.3625254582484725, "grad_norm": 1.0406447821867784, "learning_rate": 7.370707773299486e-06, "loss": 0.5121, "step": 8010 }, { "epoch": 0.3625707173568681, "grad_norm": 0.6452982035033098, "learning_rate": 7.370062448527202e-06, "loss": 0.3304, "step": 8011 }, { "epoch": 0.3626159764652636, "grad_norm": 0.7508094008376939, "learning_rate": 7.369417072829555e-06, "loss": 0.4023, "step": 8012 }, { "epoch": 0.3626612355736592, "grad_norm": 0.6473029618057764, "learning_rate": 7.368771646220412e-06, "loss": 0.3824, "step": 8013 }, { "epoch": 0.36270649468205474, "grad_norm": 0.694529569892738, "learning_rate": 7.36812616871364e-06, "loss": 0.3445, "step": 8014 }, { "epoch": 0.3627517537904503, "grad_norm": 0.6285840846990028, "learning_rate": 7.367480640323113e-06, "loss": 0.3497, "step": 8015 }, { "epoch": 0.3627970128988459, "grad_norm": 0.4083667963474437, "learning_rate": 7.366835061062696e-06, "loss": 0.4819, "step": 8016 }, { "epoch": 0.36284227200724145, "grad_norm": 0.39779569020588457, "learning_rate": 7.366189430946262e-06, "loss": 0.4857, "step": 8017 }, { "epoch": 0.36288753111563704, "grad_norm": 0.33896363384718664, "learning_rate": 7.365543749987685e-06, "loss": 0.4672, "step": 8018 }, { "epoch": 0.3629327902240326, "grad_norm": 0.3043889489229819, "learning_rate": 7.364898018200839e-06, "loss": 0.4865, "step": 8019 }, { "epoch": 0.36297804933242817, "grad_norm": 0.2978844693860657, "learning_rate": 7.364252235599596e-06, "loss": 0.4573, "step": 8020 }, { "epoch": 0.3630233084408237, "grad_norm": 0.841995713596338, "learning_rate": 7.363606402197836e-06, "loss": 0.3927, "step": 8021 }, { "epoch": 0.3630685675492193, "grad_norm": 0.7853050599559405, "learning_rate": 7.362960518009432e-06, "loss": 0.3717, "step": 8022 }, { "epoch": 0.3631138266576148, "grad_norm": 0.7351000709156122, "learning_rate": 7.362314583048265e-06, "loss": 0.3284, "step": 8023 }, { "epoch": 0.3631590857660104, "grad_norm": 0.8623144464403313, "learning_rate": 7.361668597328212e-06, "loss": 0.3766, "step": 8024 }, { "epoch": 0.36320434487440595, "grad_norm": 0.6196968848506009, "learning_rate": 7.361022560863154e-06, "loss": 0.4829, "step": 8025 }, { "epoch": 0.36324960398280154, "grad_norm": 0.7176131906312808, "learning_rate": 7.360376473666973e-06, "loss": 0.3669, "step": 8026 }, { "epoch": 0.36329486309119713, "grad_norm": 0.6777973099375537, "learning_rate": 7.359730335753551e-06, "loss": 0.3583, "step": 8027 }, { "epoch": 0.36334012219959266, "grad_norm": 0.6718870397866715, "learning_rate": 7.35908414713677e-06, "loss": 0.3671, "step": 8028 }, { "epoch": 0.36338538130798825, "grad_norm": 0.5794901646044115, "learning_rate": 7.358437907830518e-06, "loss": 0.3311, "step": 8029 }, { "epoch": 0.3634306404163838, "grad_norm": 0.368659357946139, "learning_rate": 7.3577916178486775e-06, "loss": 0.4644, "step": 8030 }, { "epoch": 0.3634758995247794, "grad_norm": 0.6997437666807766, "learning_rate": 7.357145277205138e-06, "loss": 0.3567, "step": 8031 }, { "epoch": 0.3635211586331749, "grad_norm": 0.3570708240657923, "learning_rate": 7.356498885913784e-06, "loss": 0.4997, "step": 8032 }, { "epoch": 0.3635664177415705, "grad_norm": 0.8237636191487573, "learning_rate": 7.3558524439885075e-06, "loss": 0.4016, "step": 8033 }, { "epoch": 0.36361167684996604, "grad_norm": 0.6271151725782911, "learning_rate": 7.3552059514431985e-06, "loss": 0.3163, "step": 8034 }, { "epoch": 0.3636569359583616, "grad_norm": 0.6676304941879442, "learning_rate": 7.3545594082917435e-06, "loss": 0.4084, "step": 8035 }, { "epoch": 0.36370219506675716, "grad_norm": 0.7293712480186316, "learning_rate": 7.353912814548042e-06, "loss": 0.4009, "step": 8036 }, { "epoch": 0.36374745417515275, "grad_norm": 0.7012141766795378, "learning_rate": 7.353266170225982e-06, "loss": 0.3365, "step": 8037 }, { "epoch": 0.36379271328354834, "grad_norm": 0.7480133971533515, "learning_rate": 7.35261947533946e-06, "loss": 0.3778, "step": 8038 }, { "epoch": 0.3638379723919439, "grad_norm": 0.5479419567796888, "learning_rate": 7.35197272990237e-06, "loss": 0.505, "step": 8039 }, { "epoch": 0.36388323150033947, "grad_norm": 0.6619275487363858, "learning_rate": 7.35132593392861e-06, "loss": 0.3614, "step": 8040 }, { "epoch": 0.363928490608735, "grad_norm": 0.4062441097479375, "learning_rate": 7.350679087432078e-06, "loss": 0.4791, "step": 8041 }, { "epoch": 0.3639737497171306, "grad_norm": 0.29776999395359294, "learning_rate": 7.3500321904266725e-06, "loss": 0.5068, "step": 8042 }, { "epoch": 0.3640190088255261, "grad_norm": 0.8457303726828063, "learning_rate": 7.349385242926291e-06, "loss": 0.3744, "step": 8043 }, { "epoch": 0.3640642679339217, "grad_norm": 1.0320355475306249, "learning_rate": 7.348738244944837e-06, "loss": 0.3642, "step": 8044 }, { "epoch": 0.36410952704231725, "grad_norm": 0.4901552884921019, "learning_rate": 7.348091196496212e-06, "loss": 0.4909, "step": 8045 }, { "epoch": 0.36415478615071284, "grad_norm": 0.749738817323516, "learning_rate": 7.3474440975943185e-06, "loss": 0.3305, "step": 8046 }, { "epoch": 0.3642000452591084, "grad_norm": 0.6980541542956954, "learning_rate": 7.346796948253061e-06, "loss": 0.3835, "step": 8047 }, { "epoch": 0.36424530436750396, "grad_norm": 0.6926662082886041, "learning_rate": 7.346149748486345e-06, "loss": 0.3878, "step": 8048 }, { "epoch": 0.3642905634758995, "grad_norm": 0.6511728917753308, "learning_rate": 7.345502498308076e-06, "loss": 0.3591, "step": 8049 }, { "epoch": 0.3643358225842951, "grad_norm": 0.63481958151217, "learning_rate": 7.3448551977321615e-06, "loss": 0.3413, "step": 8050 }, { "epoch": 0.3643810816926907, "grad_norm": 0.4470564181613816, "learning_rate": 7.344207846772511e-06, "loss": 0.4872, "step": 8051 }, { "epoch": 0.3644263408010862, "grad_norm": 0.61445838152631, "learning_rate": 7.3435604454430345e-06, "loss": 0.3575, "step": 8052 }, { "epoch": 0.3644715999094818, "grad_norm": 0.34039730000214496, "learning_rate": 7.34291299375764e-06, "loss": 0.4941, "step": 8053 }, { "epoch": 0.36451685901787734, "grad_norm": 0.5926980612134997, "learning_rate": 7.342265491730243e-06, "loss": 0.3466, "step": 8054 }, { "epoch": 0.3645621181262729, "grad_norm": 0.2837414537247516, "learning_rate": 7.341617939374753e-06, "loss": 0.4955, "step": 8055 }, { "epoch": 0.36460737723466846, "grad_norm": 0.7716956234209372, "learning_rate": 7.340970336705084e-06, "loss": 0.36, "step": 8056 }, { "epoch": 0.36465263634306405, "grad_norm": 0.6427150881432653, "learning_rate": 7.340322683735155e-06, "loss": 0.3592, "step": 8057 }, { "epoch": 0.3646978954514596, "grad_norm": 0.3857297404798561, "learning_rate": 7.339674980478878e-06, "loss": 0.4854, "step": 8058 }, { "epoch": 0.3647431545598552, "grad_norm": 0.601291663477463, "learning_rate": 7.339027226950171e-06, "loss": 0.3543, "step": 8059 }, { "epoch": 0.3647884136682507, "grad_norm": 0.7298123559205156, "learning_rate": 7.338379423162953e-06, "loss": 0.3713, "step": 8060 }, { "epoch": 0.3648336727766463, "grad_norm": 0.6565806633034736, "learning_rate": 7.337731569131143e-06, "loss": 0.3452, "step": 8061 }, { "epoch": 0.3648789318850419, "grad_norm": 0.3229528406928706, "learning_rate": 7.3370836648686616e-06, "loss": 0.4727, "step": 8062 }, { "epoch": 0.3649241909934374, "grad_norm": 0.6929570188379901, "learning_rate": 7.33643571038943e-06, "loss": 0.348, "step": 8063 }, { "epoch": 0.364969450101833, "grad_norm": 0.6625662022938806, "learning_rate": 7.33578770570737e-06, "loss": 0.3899, "step": 8064 }, { "epoch": 0.36501470921022855, "grad_norm": 0.7017583007840354, "learning_rate": 7.335139650836407e-06, "loss": 0.4089, "step": 8065 }, { "epoch": 0.36505996831862414, "grad_norm": 0.662474726091609, "learning_rate": 7.3344915457904655e-06, "loss": 0.3396, "step": 8066 }, { "epoch": 0.3651052274270197, "grad_norm": 0.601157599169405, "learning_rate": 7.3338433905834685e-06, "loss": 0.3234, "step": 8067 }, { "epoch": 0.36515048653541526, "grad_norm": 0.6897480223541927, "learning_rate": 7.333195185229346e-06, "loss": 0.3668, "step": 8068 }, { "epoch": 0.3651957456438108, "grad_norm": 0.5994620628157344, "learning_rate": 7.3325469297420246e-06, "loss": 0.3894, "step": 8069 }, { "epoch": 0.3652410047522064, "grad_norm": 0.6392091647460895, "learning_rate": 7.331898624135434e-06, "loss": 0.3513, "step": 8070 }, { "epoch": 0.3652862638606019, "grad_norm": 0.3701285705436311, "learning_rate": 7.331250268423505e-06, "loss": 0.4707, "step": 8071 }, { "epoch": 0.3653315229689975, "grad_norm": 0.5766710018646587, "learning_rate": 7.330601862620164e-06, "loss": 0.34, "step": 8072 }, { "epoch": 0.3653767820773931, "grad_norm": 0.6618459831944085, "learning_rate": 7.3299534067393495e-06, "loss": 0.3681, "step": 8073 }, { "epoch": 0.36542204118578864, "grad_norm": 0.8322945398975357, "learning_rate": 7.329304900794991e-06, "loss": 0.3479, "step": 8074 }, { "epoch": 0.3654673002941842, "grad_norm": 0.8158659005341743, "learning_rate": 7.328656344801025e-06, "loss": 0.3681, "step": 8075 }, { "epoch": 0.36551255940257976, "grad_norm": 0.8539346306396185, "learning_rate": 7.328007738771385e-06, "loss": 0.362, "step": 8076 }, { "epoch": 0.36555781851097535, "grad_norm": 0.6563458590862646, "learning_rate": 7.32735908272001e-06, "loss": 0.3609, "step": 8077 }, { "epoch": 0.3656030776193709, "grad_norm": 0.3209877402422395, "learning_rate": 7.326710376660836e-06, "loss": 0.502, "step": 8078 }, { "epoch": 0.3656483367277665, "grad_norm": 0.675428159697903, "learning_rate": 7.326061620607801e-06, "loss": 0.3865, "step": 8079 }, { "epoch": 0.365693595836162, "grad_norm": 0.6703471467189661, "learning_rate": 7.325412814574847e-06, "loss": 0.3579, "step": 8080 }, { "epoch": 0.3657388549445576, "grad_norm": 0.602632944758576, "learning_rate": 7.324763958575913e-06, "loss": 0.3544, "step": 8081 }, { "epoch": 0.36578411405295314, "grad_norm": 0.31864481918066306, "learning_rate": 7.324115052624941e-06, "loss": 0.5226, "step": 8082 }, { "epoch": 0.3658293731613487, "grad_norm": 0.6424378043768086, "learning_rate": 7.323466096735875e-06, "loss": 0.3867, "step": 8083 }, { "epoch": 0.36587463226974426, "grad_norm": 0.6450470744304372, "learning_rate": 7.322817090922659e-06, "loss": 0.359, "step": 8084 }, { "epoch": 0.36591989137813985, "grad_norm": 0.7814567957687872, "learning_rate": 7.322168035199237e-06, "loss": 0.3518, "step": 8085 }, { "epoch": 0.36596515048653544, "grad_norm": 0.6107592968368141, "learning_rate": 7.3215189295795565e-06, "loss": 0.3286, "step": 8086 }, { "epoch": 0.366010409594931, "grad_norm": 0.6116002376209456, "learning_rate": 7.320869774077564e-06, "loss": 0.3235, "step": 8087 }, { "epoch": 0.36605566870332656, "grad_norm": 0.3665436413370661, "learning_rate": 7.320220568707207e-06, "loss": 0.4684, "step": 8088 }, { "epoch": 0.3661009278117221, "grad_norm": 0.6819908460809249, "learning_rate": 7.319571313482437e-06, "loss": 0.3676, "step": 8089 }, { "epoch": 0.3661461869201177, "grad_norm": 0.6976100390540673, "learning_rate": 7.318922008417203e-06, "loss": 0.346, "step": 8090 }, { "epoch": 0.3661914460285132, "grad_norm": 0.32398500017315196, "learning_rate": 7.318272653525457e-06, "loss": 0.4916, "step": 8091 }, { "epoch": 0.3662367051369088, "grad_norm": 0.6873578858329197, "learning_rate": 7.317623248821153e-06, "loss": 0.3403, "step": 8092 }, { "epoch": 0.36628196424530435, "grad_norm": 0.6234197882322604, "learning_rate": 7.316973794318242e-06, "loss": 0.3249, "step": 8093 }, { "epoch": 0.36632722335369994, "grad_norm": 0.40565248593002673, "learning_rate": 7.316324290030682e-06, "loss": 0.4955, "step": 8094 }, { "epoch": 0.3663724824620955, "grad_norm": 0.7118511193486391, "learning_rate": 7.315674735972426e-06, "loss": 0.3717, "step": 8095 }, { "epoch": 0.36641774157049106, "grad_norm": 0.6902695636733798, "learning_rate": 7.315025132157432e-06, "loss": 0.386, "step": 8096 }, { "epoch": 0.36646300067888665, "grad_norm": 0.3724437368013699, "learning_rate": 7.314375478599657e-06, "loss": 0.4877, "step": 8097 }, { "epoch": 0.3665082597872822, "grad_norm": 0.6294176595203217, "learning_rate": 7.313725775313061e-06, "loss": 0.3668, "step": 8098 }, { "epoch": 0.3665535188956778, "grad_norm": 0.6185721391255002, "learning_rate": 7.313076022311605e-06, "loss": 0.3367, "step": 8099 }, { "epoch": 0.3665987780040733, "grad_norm": 0.2993327159764945, "learning_rate": 7.31242621960925e-06, "loss": 0.4597, "step": 8100 }, { "epoch": 0.3666440371124689, "grad_norm": 0.2986381957076049, "learning_rate": 7.311776367219956e-06, "loss": 0.4973, "step": 8101 }, { "epoch": 0.36668929622086444, "grad_norm": 0.6804572154313598, "learning_rate": 7.3111264651576895e-06, "loss": 0.3448, "step": 8102 }, { "epoch": 0.36673455532926, "grad_norm": 0.8765735545656241, "learning_rate": 7.310476513436412e-06, "loss": 0.3784, "step": 8103 }, { "epoch": 0.36677981443765556, "grad_norm": 0.2952493580258756, "learning_rate": 7.3098265120700915e-06, "loss": 0.4805, "step": 8104 }, { "epoch": 0.36682507354605115, "grad_norm": 0.6513747567476289, "learning_rate": 7.3091764610726935e-06, "loss": 0.332, "step": 8105 }, { "epoch": 0.3668703326544467, "grad_norm": 0.8286346174779458, "learning_rate": 7.308526360458185e-06, "loss": 0.3715, "step": 8106 }, { "epoch": 0.3669155917628423, "grad_norm": 0.6412950037424189, "learning_rate": 7.307876210240534e-06, "loss": 0.3814, "step": 8107 }, { "epoch": 0.3669608508712378, "grad_norm": 0.741223434168854, "learning_rate": 7.3072260104337124e-06, "loss": 0.3169, "step": 8108 }, { "epoch": 0.3670061099796334, "grad_norm": 0.6052335633310483, "learning_rate": 7.3065757610516895e-06, "loss": 0.3636, "step": 8109 }, { "epoch": 0.367051369088029, "grad_norm": 0.635252692031835, "learning_rate": 7.305925462108439e-06, "loss": 0.3614, "step": 8110 }, { "epoch": 0.3670966281964245, "grad_norm": 0.634580241462948, "learning_rate": 7.30527511361793e-06, "loss": 0.3766, "step": 8111 }, { "epoch": 0.3671418873048201, "grad_norm": 0.39177715832680615, "learning_rate": 7.30462471559414e-06, "loss": 0.5032, "step": 8112 }, { "epoch": 0.36718714641321565, "grad_norm": 0.6742823922848321, "learning_rate": 7.303974268051044e-06, "loss": 0.3195, "step": 8113 }, { "epoch": 0.36723240552161124, "grad_norm": 0.6544761152892868, "learning_rate": 7.303323771002615e-06, "loss": 0.3331, "step": 8114 }, { "epoch": 0.3672776646300068, "grad_norm": 0.6472868999827004, "learning_rate": 7.302673224462835e-06, "loss": 0.3105, "step": 8115 }, { "epoch": 0.36732292373840236, "grad_norm": 0.30055019896322244, "learning_rate": 7.302022628445678e-06, "loss": 0.4879, "step": 8116 }, { "epoch": 0.3673681828467979, "grad_norm": 0.6749211147111039, "learning_rate": 7.301371982965125e-06, "loss": 0.3763, "step": 8117 }, { "epoch": 0.3674134419551935, "grad_norm": 0.6467312875354413, "learning_rate": 7.3007212880351565e-06, "loss": 0.333, "step": 8118 }, { "epoch": 0.367458701063589, "grad_norm": 0.6935178709365164, "learning_rate": 7.3000705436697525e-06, "loss": 0.3862, "step": 8119 }, { "epoch": 0.3675039601719846, "grad_norm": 0.6099660368039522, "learning_rate": 7.2994197498828975e-06, "loss": 0.3278, "step": 8120 }, { "epoch": 0.3675492192803802, "grad_norm": 0.6233582404645013, "learning_rate": 7.298768906688576e-06, "loss": 0.3747, "step": 8121 }, { "epoch": 0.36759447838877574, "grad_norm": 0.6454675108502524, "learning_rate": 7.298118014100766e-06, "loss": 0.3304, "step": 8122 }, { "epoch": 0.3676397374971713, "grad_norm": 0.6187331614448284, "learning_rate": 7.297467072133463e-06, "loss": 0.3657, "step": 8123 }, { "epoch": 0.36768499660556686, "grad_norm": 0.6252633414228883, "learning_rate": 7.296816080800646e-06, "loss": 0.3546, "step": 8124 }, { "epoch": 0.36773025571396245, "grad_norm": 0.6552389003007436, "learning_rate": 7.296165040116308e-06, "loss": 0.3447, "step": 8125 }, { "epoch": 0.367775514822358, "grad_norm": 0.6660391799782329, "learning_rate": 7.295513950094433e-06, "loss": 0.3703, "step": 8126 }, { "epoch": 0.3678207739307536, "grad_norm": 0.5997289999195763, "learning_rate": 7.294862810749014e-06, "loss": 0.3484, "step": 8127 }, { "epoch": 0.3678660330391491, "grad_norm": 0.6706146748720235, "learning_rate": 7.2942116220940406e-06, "loss": 0.3371, "step": 8128 }, { "epoch": 0.3679112921475447, "grad_norm": 0.6006983753253756, "learning_rate": 7.293560384143506e-06, "loss": 0.3094, "step": 8129 }, { "epoch": 0.36795655125594023, "grad_norm": 0.631994678606701, "learning_rate": 7.292909096911403e-06, "loss": 0.3352, "step": 8130 }, { "epoch": 0.3680018103643358, "grad_norm": 0.7523108426437606, "learning_rate": 7.292257760411726e-06, "loss": 0.37, "step": 8131 }, { "epoch": 0.3680470694727314, "grad_norm": 0.6309566047038871, "learning_rate": 7.29160637465847e-06, "loss": 0.3563, "step": 8132 }, { "epoch": 0.36809232858112695, "grad_norm": 0.37387998050983035, "learning_rate": 7.290954939665632e-06, "loss": 0.4873, "step": 8133 }, { "epoch": 0.36813758768952254, "grad_norm": 0.6851452506465928, "learning_rate": 7.290303455447208e-06, "loss": 0.3671, "step": 8134 }, { "epoch": 0.3681828467979181, "grad_norm": 0.7434686822496943, "learning_rate": 7.289651922017195e-06, "loss": 0.373, "step": 8135 }, { "epoch": 0.36822810590631366, "grad_norm": 0.5914979977178154, "learning_rate": 7.289000339389596e-06, "loss": 0.337, "step": 8136 }, { "epoch": 0.3682733650147092, "grad_norm": 0.6321514469503917, "learning_rate": 7.288348707578409e-06, "loss": 0.363, "step": 8137 }, { "epoch": 0.3683186241231048, "grad_norm": 0.6629606564514017, "learning_rate": 7.2876970265976365e-06, "loss": 0.3691, "step": 8138 }, { "epoch": 0.3683638832315003, "grad_norm": 0.6713936840695812, "learning_rate": 7.287045296461281e-06, "loss": 0.3425, "step": 8139 }, { "epoch": 0.3684091423398959, "grad_norm": 0.6120971669362468, "learning_rate": 7.2863935171833465e-06, "loss": 0.3648, "step": 8140 }, { "epoch": 0.36845440144829145, "grad_norm": 0.6659355828139842, "learning_rate": 7.285741688777838e-06, "loss": 0.3662, "step": 8141 }, { "epoch": 0.36849966055668704, "grad_norm": 0.4104427222421775, "learning_rate": 7.285089811258761e-06, "loss": 0.502, "step": 8142 }, { "epoch": 0.36854491966508257, "grad_norm": 0.6590165373916503, "learning_rate": 7.28443788464012e-06, "loss": 0.33, "step": 8143 }, { "epoch": 0.36859017877347816, "grad_norm": 0.6633311656110775, "learning_rate": 7.283785908935927e-06, "loss": 0.3849, "step": 8144 }, { "epoch": 0.36863543788187375, "grad_norm": 0.6063659937084452, "learning_rate": 7.283133884160187e-06, "loss": 0.316, "step": 8145 }, { "epoch": 0.3686806969902693, "grad_norm": 0.5980474521604764, "learning_rate": 7.282481810326915e-06, "loss": 0.3796, "step": 8146 }, { "epoch": 0.3687259560986649, "grad_norm": 0.5990355897144266, "learning_rate": 7.281829687450117e-06, "loss": 0.3323, "step": 8147 }, { "epoch": 0.3687712152070604, "grad_norm": 0.6745356562340838, "learning_rate": 7.281177515543807e-06, "loss": 0.387, "step": 8148 }, { "epoch": 0.368816474315456, "grad_norm": 0.6158116775354936, "learning_rate": 7.280525294621999e-06, "loss": 0.3219, "step": 8149 }, { "epoch": 0.36886173342385153, "grad_norm": 0.6545144180352372, "learning_rate": 7.2798730246987056e-06, "loss": 0.3637, "step": 8150 }, { "epoch": 0.3689069925322471, "grad_norm": 0.4375279106063995, "learning_rate": 7.279220705787943e-06, "loss": 0.471, "step": 8151 }, { "epoch": 0.36895225164064266, "grad_norm": 0.38159927496792784, "learning_rate": 7.278568337903729e-06, "loss": 0.4683, "step": 8152 }, { "epoch": 0.36899751074903825, "grad_norm": 0.6542186893818985, "learning_rate": 7.2779159210600765e-06, "loss": 0.3343, "step": 8153 }, { "epoch": 0.3690427698574338, "grad_norm": 0.29272977656676263, "learning_rate": 7.277263455271011e-06, "loss": 0.4718, "step": 8154 }, { "epoch": 0.3690880289658294, "grad_norm": 0.6501785373959793, "learning_rate": 7.2766109405505445e-06, "loss": 0.3524, "step": 8155 }, { "epoch": 0.36913328807422496, "grad_norm": 0.6013988426375781, "learning_rate": 7.275958376912703e-06, "loss": 0.334, "step": 8156 }, { "epoch": 0.3691785471826205, "grad_norm": 0.620811403055435, "learning_rate": 7.275305764371505e-06, "loss": 0.3269, "step": 8157 }, { "epoch": 0.3692238062910161, "grad_norm": 0.6162970042756091, "learning_rate": 7.274653102940974e-06, "loss": 0.3129, "step": 8158 }, { "epoch": 0.3692690653994116, "grad_norm": 0.6723016335543197, "learning_rate": 7.274000392635134e-06, "loss": 0.3956, "step": 8159 }, { "epoch": 0.3693143245078072, "grad_norm": 0.6754868476325461, "learning_rate": 7.273347633468011e-06, "loss": 0.3703, "step": 8160 }, { "epoch": 0.36935958361620275, "grad_norm": 0.6573254874606067, "learning_rate": 7.272694825453628e-06, "loss": 0.3852, "step": 8161 }, { "epoch": 0.36940484272459834, "grad_norm": 0.7382278556803853, "learning_rate": 7.272041968606014e-06, "loss": 0.381, "step": 8162 }, { "epoch": 0.36945010183299387, "grad_norm": 0.6845167040529891, "learning_rate": 7.271389062939196e-06, "loss": 0.3502, "step": 8163 }, { "epoch": 0.36949536094138946, "grad_norm": 0.6551708802117328, "learning_rate": 7.270736108467202e-06, "loss": 0.3503, "step": 8164 }, { "epoch": 0.369540620049785, "grad_norm": 0.6211818518982469, "learning_rate": 7.2700831052040656e-06, "loss": 0.3727, "step": 8165 }, { "epoch": 0.3695858791581806, "grad_norm": 0.6544819832232823, "learning_rate": 7.269430053163813e-06, "loss": 0.3592, "step": 8166 }, { "epoch": 0.3696311382665762, "grad_norm": 0.6687579884282855, "learning_rate": 7.268776952360479e-06, "loss": 0.3526, "step": 8167 }, { "epoch": 0.3696763973749717, "grad_norm": 0.6643304048573219, "learning_rate": 7.268123802808097e-06, "loss": 0.3713, "step": 8168 }, { "epoch": 0.3697216564833673, "grad_norm": 0.6462119618190477, "learning_rate": 7.2674706045207e-06, "loss": 0.3163, "step": 8169 }, { "epoch": 0.36976691559176283, "grad_norm": 0.6632924357449026, "learning_rate": 7.2668173575123234e-06, "loss": 0.382, "step": 8170 }, { "epoch": 0.3698121747001584, "grad_norm": 0.7171910384022454, "learning_rate": 7.2661640617970054e-06, "loss": 0.4129, "step": 8171 }, { "epoch": 0.36985743380855396, "grad_norm": 0.6723190617891283, "learning_rate": 7.26551071738878e-06, "loss": 0.3729, "step": 8172 }, { "epoch": 0.36990269291694955, "grad_norm": 0.6634564049143907, "learning_rate": 7.264857324301688e-06, "loss": 0.4029, "step": 8173 }, { "epoch": 0.3699479520253451, "grad_norm": 0.7272365330599319, "learning_rate": 7.264203882549766e-06, "loss": 0.4852, "step": 8174 }, { "epoch": 0.3699932111337407, "grad_norm": 0.6412200563909634, "learning_rate": 7.26355039214706e-06, "loss": 0.3433, "step": 8175 }, { "epoch": 0.3700384702421362, "grad_norm": 0.3903053709652611, "learning_rate": 7.262896853107606e-06, "loss": 0.4617, "step": 8176 }, { "epoch": 0.3700837293505318, "grad_norm": 0.6341750980091954, "learning_rate": 7.262243265445449e-06, "loss": 0.3362, "step": 8177 }, { "epoch": 0.37012898845892733, "grad_norm": 0.6452881582463291, "learning_rate": 7.261589629174632e-06, "loss": 0.3275, "step": 8178 }, { "epoch": 0.3701742475673229, "grad_norm": 0.6057918110267597, "learning_rate": 7.260935944309201e-06, "loss": 0.3315, "step": 8179 }, { "epoch": 0.3702195066757185, "grad_norm": 0.6226973497002455, "learning_rate": 7.260282210863199e-06, "loss": 0.3658, "step": 8180 }, { "epoch": 0.37026476578411405, "grad_norm": 0.6717952130988063, "learning_rate": 7.2596284288506745e-06, "loss": 0.3743, "step": 8181 }, { "epoch": 0.37031002489250964, "grad_norm": 0.7272312458191902, "learning_rate": 7.258974598285674e-06, "loss": 0.4841, "step": 8182 }, { "epoch": 0.37035528400090517, "grad_norm": 0.6751799745883079, "learning_rate": 7.25832071918225e-06, "loss": 0.3406, "step": 8183 }, { "epoch": 0.37040054310930076, "grad_norm": 0.6424543775896905, "learning_rate": 7.257666791554448e-06, "loss": 0.3633, "step": 8184 }, { "epoch": 0.3704458022176963, "grad_norm": 0.653516097898005, "learning_rate": 7.25701281541632e-06, "loss": 0.325, "step": 8185 }, { "epoch": 0.3704910613260919, "grad_norm": 0.5794649639415843, "learning_rate": 7.2563587907819185e-06, "loss": 0.3244, "step": 8186 }, { "epoch": 0.3705363204344874, "grad_norm": 0.7157715260093064, "learning_rate": 7.255704717665298e-06, "loss": 0.4209, "step": 8187 }, { "epoch": 0.370581579542883, "grad_norm": 0.5884653553265714, "learning_rate": 7.25505059608051e-06, "loss": 0.3363, "step": 8188 }, { "epoch": 0.37062683865127855, "grad_norm": 0.6741184230110532, "learning_rate": 7.25439642604161e-06, "loss": 0.3378, "step": 8189 }, { "epoch": 0.37067209775967414, "grad_norm": 0.9005729167867248, "learning_rate": 7.253742207562655e-06, "loss": 0.3587, "step": 8190 }, { "epoch": 0.3707173568680697, "grad_norm": 0.7030014372266986, "learning_rate": 7.253087940657702e-06, "loss": 0.3348, "step": 8191 }, { "epoch": 0.37076261597646526, "grad_norm": 0.38392013327619495, "learning_rate": 7.252433625340811e-06, "loss": 0.5037, "step": 8192 }, { "epoch": 0.37080787508486085, "grad_norm": 0.6781324653726816, "learning_rate": 7.251779261626035e-06, "loss": 0.3572, "step": 8193 }, { "epoch": 0.3708531341932564, "grad_norm": 0.6761517906134753, "learning_rate": 7.251124849527442e-06, "loss": 0.3056, "step": 8194 }, { "epoch": 0.370898393301652, "grad_norm": 0.6428503014211182, "learning_rate": 7.250470389059088e-06, "loss": 0.3996, "step": 8195 }, { "epoch": 0.3709436524100475, "grad_norm": 0.6354730026376507, "learning_rate": 7.2498158802350385e-06, "loss": 0.3845, "step": 8196 }, { "epoch": 0.3709889115184431, "grad_norm": 0.6703399462653616, "learning_rate": 7.249161323069355e-06, "loss": 0.3168, "step": 8197 }, { "epoch": 0.37103417062683863, "grad_norm": 0.7792293419436107, "learning_rate": 7.248506717576102e-06, "loss": 0.3511, "step": 8198 }, { "epoch": 0.3710794297352342, "grad_norm": 0.6482685165776548, "learning_rate": 7.247852063769345e-06, "loss": 0.4104, "step": 8199 }, { "epoch": 0.37112468884362976, "grad_norm": 0.6355574305326335, "learning_rate": 7.247197361663152e-06, "loss": 0.3241, "step": 8200 }, { "epoch": 0.37116994795202535, "grad_norm": 0.6312939670347133, "learning_rate": 7.246542611271587e-06, "loss": 0.3608, "step": 8201 }, { "epoch": 0.3712152070604209, "grad_norm": 0.6416849686675478, "learning_rate": 7.245887812608725e-06, "loss": 0.425, "step": 8202 }, { "epoch": 0.37126046616881647, "grad_norm": 0.690208012943487, "learning_rate": 7.245232965688629e-06, "loss": 0.3636, "step": 8203 }, { "epoch": 0.37130572527721206, "grad_norm": 0.8421190596902749, "learning_rate": 7.244578070525373e-06, "loss": 0.3582, "step": 8204 }, { "epoch": 0.3713509843856076, "grad_norm": 0.6219765356087398, "learning_rate": 7.243923127133028e-06, "loss": 0.3884, "step": 8205 }, { "epoch": 0.3713962434940032, "grad_norm": 0.8775162184518147, "learning_rate": 7.243268135525666e-06, "loss": 0.3319, "step": 8206 }, { "epoch": 0.3714415026023987, "grad_norm": 0.3858290275054255, "learning_rate": 7.242613095717361e-06, "loss": 0.5203, "step": 8207 }, { "epoch": 0.3714867617107943, "grad_norm": 0.6141720874361678, "learning_rate": 7.2419580077221906e-06, "loss": 0.3723, "step": 8208 }, { "epoch": 0.37153202081918985, "grad_norm": 0.6414007757700619, "learning_rate": 7.241302871554226e-06, "loss": 0.3723, "step": 8209 }, { "epoch": 0.37157727992758544, "grad_norm": 0.6300823776488501, "learning_rate": 7.240647687227547e-06, "loss": 0.377, "step": 8210 }, { "epoch": 0.37162253903598097, "grad_norm": 0.7237074014093474, "learning_rate": 7.23999245475623e-06, "loss": 0.4208, "step": 8211 }, { "epoch": 0.37166779814437656, "grad_norm": 0.7325475420895977, "learning_rate": 7.239337174154357e-06, "loss": 0.3537, "step": 8212 }, { "epoch": 0.3717130572527721, "grad_norm": 0.3137186371943764, "learning_rate": 7.238681845436004e-06, "loss": 0.4822, "step": 8213 }, { "epoch": 0.3717583163611677, "grad_norm": 0.6413702247460596, "learning_rate": 7.238026468615255e-06, "loss": 0.3426, "step": 8214 }, { "epoch": 0.3718035754695633, "grad_norm": 0.670904099564202, "learning_rate": 7.23737104370619e-06, "loss": 0.3609, "step": 8215 }, { "epoch": 0.3718488345779588, "grad_norm": 0.6297418857803184, "learning_rate": 7.236715570722892e-06, "loss": 0.3477, "step": 8216 }, { "epoch": 0.3718940936863544, "grad_norm": 0.5637446500275521, "learning_rate": 7.236060049679446e-06, "loss": 0.308, "step": 8217 }, { "epoch": 0.37193935279474993, "grad_norm": 0.7496022031912113, "learning_rate": 7.2354044805899385e-06, "loss": 0.3905, "step": 8218 }, { "epoch": 0.3719846119031455, "grad_norm": 0.6394406128286074, "learning_rate": 7.234748863468453e-06, "loss": 0.3696, "step": 8219 }, { "epoch": 0.37202987101154106, "grad_norm": 0.5950512693336792, "learning_rate": 7.234093198329078e-06, "loss": 0.351, "step": 8220 }, { "epoch": 0.37207513011993665, "grad_norm": 0.3256096310149524, "learning_rate": 7.233437485185904e-06, "loss": 0.483, "step": 8221 }, { "epoch": 0.3721203892283322, "grad_norm": 0.28917444198334696, "learning_rate": 7.232781724053014e-06, "loss": 0.4734, "step": 8222 }, { "epoch": 0.3721656483367278, "grad_norm": 0.5907937577751626, "learning_rate": 7.232125914944506e-06, "loss": 0.3499, "step": 8223 }, { "epoch": 0.3722109074451233, "grad_norm": 0.2959842299878078, "learning_rate": 7.2314700578744635e-06, "loss": 0.503, "step": 8224 }, { "epoch": 0.3722561665535189, "grad_norm": 0.6258794371939906, "learning_rate": 7.230814152856986e-06, "loss": 0.3641, "step": 8225 }, { "epoch": 0.3723014256619145, "grad_norm": 0.6253935976814171, "learning_rate": 7.230158199906163e-06, "loss": 0.3553, "step": 8226 }, { "epoch": 0.37234668477031, "grad_norm": 0.3048760301862293, "learning_rate": 7.2295021990360896e-06, "loss": 0.4685, "step": 8227 }, { "epoch": 0.3723919438787056, "grad_norm": 0.6456964042297493, "learning_rate": 7.228846150260861e-06, "loss": 0.3783, "step": 8228 }, { "epoch": 0.37243720298710115, "grad_norm": 0.6633579512268112, "learning_rate": 7.228190053594575e-06, "loss": 0.3743, "step": 8229 }, { "epoch": 0.37248246209549674, "grad_norm": 0.6322349637412092, "learning_rate": 7.227533909051327e-06, "loss": 0.3664, "step": 8230 }, { "epoch": 0.37252772120389227, "grad_norm": 0.6594001063414584, "learning_rate": 7.2268777166452175e-06, "loss": 0.3535, "step": 8231 }, { "epoch": 0.37257298031228786, "grad_norm": 0.6914287196399008, "learning_rate": 7.226221476390344e-06, "loss": 0.3593, "step": 8232 }, { "epoch": 0.3726182394206834, "grad_norm": 0.6762918894508828, "learning_rate": 7.22556518830081e-06, "loss": 0.3559, "step": 8233 }, { "epoch": 0.372663498529079, "grad_norm": 0.6233630699017264, "learning_rate": 7.224908852390714e-06, "loss": 0.3231, "step": 8234 }, { "epoch": 0.3727087576374745, "grad_norm": 0.32035678978033805, "learning_rate": 7.224252468674161e-06, "loss": 0.4999, "step": 8235 }, { "epoch": 0.3727540167458701, "grad_norm": 0.32533747372396, "learning_rate": 7.223596037165252e-06, "loss": 0.4856, "step": 8236 }, { "epoch": 0.37279927585426564, "grad_norm": 0.7078979169891529, "learning_rate": 7.2229395578780955e-06, "loss": 0.3852, "step": 8237 }, { "epoch": 0.37284453496266123, "grad_norm": 0.7705429790577362, "learning_rate": 7.222283030826795e-06, "loss": 0.3419, "step": 8238 }, { "epoch": 0.3728897940710568, "grad_norm": 0.5864709746736614, "learning_rate": 7.221626456025456e-06, "loss": 0.3375, "step": 8239 }, { "epoch": 0.37293505317945236, "grad_norm": 0.6161853610664281, "learning_rate": 7.220969833488188e-06, "loss": 0.3456, "step": 8240 }, { "epoch": 0.37298031228784795, "grad_norm": 0.6767120084765911, "learning_rate": 7.2203131632291e-06, "loss": 0.3624, "step": 8241 }, { "epoch": 0.3730255713962435, "grad_norm": 0.673082152049448, "learning_rate": 7.2196564452623015e-06, "loss": 0.3455, "step": 8242 }, { "epoch": 0.3730708305046391, "grad_norm": 0.6447050864070746, "learning_rate": 7.218999679601903e-06, "loss": 0.3499, "step": 8243 }, { "epoch": 0.3731160896130346, "grad_norm": 0.6341690830643045, "learning_rate": 7.2183428662620155e-06, "loss": 0.3635, "step": 8244 }, { "epoch": 0.3731613487214302, "grad_norm": 0.6284278281076813, "learning_rate": 7.217686005256755e-06, "loss": 0.3529, "step": 8245 }, { "epoch": 0.37320660782982573, "grad_norm": 0.6416020521885557, "learning_rate": 7.217029096600231e-06, "loss": 0.3424, "step": 8246 }, { "epoch": 0.3732518669382213, "grad_norm": 0.5983683465068236, "learning_rate": 7.216372140306563e-06, "loss": 0.3332, "step": 8247 }, { "epoch": 0.37329712604661686, "grad_norm": 0.6559567018480142, "learning_rate": 7.215715136389862e-06, "loss": 0.356, "step": 8248 }, { "epoch": 0.37334238515501245, "grad_norm": 0.6773549771240448, "learning_rate": 7.21505808486425e-06, "loss": 0.3941, "step": 8249 }, { "epoch": 0.37338764426340804, "grad_norm": 0.6604796043184018, "learning_rate": 7.2144009857438436e-06, "loss": 0.3708, "step": 8250 }, { "epoch": 0.37343290337180357, "grad_norm": 0.6423405444166955, "learning_rate": 7.213743839042757e-06, "loss": 0.3573, "step": 8251 }, { "epoch": 0.37347816248019916, "grad_norm": 0.6249343983434719, "learning_rate": 7.213086644775118e-06, "loss": 0.3797, "step": 8252 }, { "epoch": 0.3735234215885947, "grad_norm": 0.4517351308111027, "learning_rate": 7.212429402955043e-06, "loss": 0.4988, "step": 8253 }, { "epoch": 0.3735686806969903, "grad_norm": 0.622628015585213, "learning_rate": 7.211772113596656e-06, "loss": 0.3425, "step": 8254 }, { "epoch": 0.3736139398053858, "grad_norm": 0.333877141476755, "learning_rate": 7.211114776714077e-06, "loss": 0.4689, "step": 8255 }, { "epoch": 0.3736591989137814, "grad_norm": 0.6688247811686822, "learning_rate": 7.210457392321434e-06, "loss": 0.332, "step": 8256 }, { "epoch": 0.37370445802217694, "grad_norm": 0.5636180479977778, "learning_rate": 7.209799960432851e-06, "loss": 0.3083, "step": 8257 }, { "epoch": 0.37374971713057253, "grad_norm": 0.5817093516022838, "learning_rate": 7.209142481062452e-06, "loss": 0.3139, "step": 8258 }, { "epoch": 0.37379497623896807, "grad_norm": 0.35989511000783425, "learning_rate": 7.208484954224366e-06, "loss": 0.4904, "step": 8259 }, { "epoch": 0.37384023534736366, "grad_norm": 0.3762427932277179, "learning_rate": 7.207827379932724e-06, "loss": 0.4874, "step": 8260 }, { "epoch": 0.37388549445575925, "grad_norm": 0.3321377843546181, "learning_rate": 7.207169758201649e-06, "loss": 0.5235, "step": 8261 }, { "epoch": 0.3739307535641548, "grad_norm": 0.8598727794773364, "learning_rate": 7.206512089045277e-06, "loss": 0.3389, "step": 8262 }, { "epoch": 0.3739760126725504, "grad_norm": 0.6917048591356012, "learning_rate": 7.205854372477735e-06, "loss": 0.3633, "step": 8263 }, { "epoch": 0.3740212717809459, "grad_norm": 0.6357038317196191, "learning_rate": 7.2051966085131584e-06, "loss": 0.3523, "step": 8264 }, { "epoch": 0.3740665308893415, "grad_norm": 0.7597069027593606, "learning_rate": 7.20453879716568e-06, "loss": 0.2921, "step": 8265 }, { "epoch": 0.37411178999773703, "grad_norm": 0.7653677810020674, "learning_rate": 7.203880938449432e-06, "loss": 0.3254, "step": 8266 }, { "epoch": 0.3741570491061326, "grad_norm": 0.7374295563959377, "learning_rate": 7.203223032378552e-06, "loss": 0.3984, "step": 8267 }, { "epoch": 0.37420230821452816, "grad_norm": 0.6887589709014885, "learning_rate": 7.202565078967176e-06, "loss": 0.3443, "step": 8268 }, { "epoch": 0.37424756732292375, "grad_norm": 0.8163425475345482, "learning_rate": 7.201907078229442e-06, "loss": 0.3364, "step": 8269 }, { "epoch": 0.3742928264313193, "grad_norm": 0.6668493386525876, "learning_rate": 7.201249030179487e-06, "loss": 0.3452, "step": 8270 }, { "epoch": 0.37433808553971487, "grad_norm": 0.6931544278501053, "learning_rate": 7.200590934831451e-06, "loss": 0.3949, "step": 8271 }, { "epoch": 0.3743833446481104, "grad_norm": 0.6957182852097032, "learning_rate": 7.1999327921994735e-06, "loss": 0.3592, "step": 8272 }, { "epoch": 0.374428603756506, "grad_norm": 0.8084175042085993, "learning_rate": 7.199274602297698e-06, "loss": 0.2903, "step": 8273 }, { "epoch": 0.3744738628649016, "grad_norm": 0.5942923954714772, "learning_rate": 7.198616365140264e-06, "loss": 0.3224, "step": 8274 }, { "epoch": 0.3745191219732971, "grad_norm": 0.6934474912796738, "learning_rate": 7.197958080741319e-06, "loss": 0.3889, "step": 8275 }, { "epoch": 0.3745643810816927, "grad_norm": 0.6741172782103588, "learning_rate": 7.1972997491150046e-06, "loss": 0.3787, "step": 8276 }, { "epoch": 0.37460964019008824, "grad_norm": 0.7482604130933004, "learning_rate": 7.196641370275467e-06, "loss": 0.4912, "step": 8277 }, { "epoch": 0.37465489929848383, "grad_norm": 0.6585374578882484, "learning_rate": 7.195982944236853e-06, "loss": 0.3698, "step": 8278 }, { "epoch": 0.37470015840687937, "grad_norm": 0.6341317832916422, "learning_rate": 7.195324471013309e-06, "loss": 0.3301, "step": 8279 }, { "epoch": 0.37474541751527496, "grad_norm": 0.6603462506256079, "learning_rate": 7.194665950618986e-06, "loss": 0.3892, "step": 8280 }, { "epoch": 0.3747906766236705, "grad_norm": 0.6211242207857619, "learning_rate": 7.194007383068031e-06, "loss": 0.3192, "step": 8281 }, { "epoch": 0.3748359357320661, "grad_norm": 0.7203308404997607, "learning_rate": 7.193348768374595e-06, "loss": 0.3717, "step": 8282 }, { "epoch": 0.3748811948404616, "grad_norm": 0.6878629374944645, "learning_rate": 7.192690106552833e-06, "loss": 0.3376, "step": 8283 }, { "epoch": 0.3749264539488572, "grad_norm": 0.6750065790075197, "learning_rate": 7.1920313976168935e-06, "loss": 0.3733, "step": 8284 }, { "epoch": 0.3749717130572528, "grad_norm": 0.723135413194556, "learning_rate": 7.191372641580931e-06, "loss": 0.3331, "step": 8285 }, { "epoch": 0.37501697216564833, "grad_norm": 0.5944902240791072, "learning_rate": 7.190713838459101e-06, "loss": 0.2828, "step": 8286 }, { "epoch": 0.3750622312740439, "grad_norm": 0.6726537174612722, "learning_rate": 7.190054988265559e-06, "loss": 0.3305, "step": 8287 }, { "epoch": 0.37510749038243946, "grad_norm": 0.6223775834738695, "learning_rate": 7.189396091014462e-06, "loss": 0.3667, "step": 8288 }, { "epoch": 0.37515274949083505, "grad_norm": 0.6676977292485492, "learning_rate": 7.188737146719967e-06, "loss": 0.3757, "step": 8289 }, { "epoch": 0.3751980085992306, "grad_norm": 0.6564201500650513, "learning_rate": 7.188078155396232e-06, "loss": 0.331, "step": 8290 }, { "epoch": 0.37524326770762617, "grad_norm": 0.6770082220237673, "learning_rate": 7.187419117057419e-06, "loss": 0.3548, "step": 8291 }, { "epoch": 0.3752885268160217, "grad_norm": 0.6494451263198261, "learning_rate": 7.1867600317176875e-06, "loss": 0.3185, "step": 8292 }, { "epoch": 0.3753337859244173, "grad_norm": 0.7351655528808541, "learning_rate": 7.186100899391198e-06, "loss": 0.5227, "step": 8293 }, { "epoch": 0.37537904503281283, "grad_norm": 0.6711745935920342, "learning_rate": 7.185441720092114e-06, "loss": 0.3454, "step": 8294 }, { "epoch": 0.3754243041412084, "grad_norm": 0.6135920701861901, "learning_rate": 7.1847824938346e-06, "loss": 0.3194, "step": 8295 }, { "epoch": 0.375469563249604, "grad_norm": 0.6005901565788068, "learning_rate": 7.18412322063282e-06, "loss": 0.391, "step": 8296 }, { "epoch": 0.37551482235799954, "grad_norm": 0.6292338562077902, "learning_rate": 7.183463900500941e-06, "loss": 0.3364, "step": 8297 }, { "epoch": 0.37556008146639513, "grad_norm": 0.34141101180389294, "learning_rate": 7.182804533453127e-06, "loss": 0.5301, "step": 8298 }, { "epoch": 0.37560534057479067, "grad_norm": 0.6197088953157461, "learning_rate": 7.182145119503549e-06, "loss": 0.3473, "step": 8299 }, { "epoch": 0.37565059968318626, "grad_norm": 0.33265449349879456, "learning_rate": 7.181485658666375e-06, "loss": 0.4761, "step": 8300 }, { "epoch": 0.3756958587915818, "grad_norm": 0.640304362055324, "learning_rate": 7.180826150955772e-06, "loss": 0.3073, "step": 8301 }, { "epoch": 0.3757411178999774, "grad_norm": 0.6860724167861825, "learning_rate": 7.180166596385915e-06, "loss": 0.4338, "step": 8302 }, { "epoch": 0.3757863770083729, "grad_norm": 0.6019944334202502, "learning_rate": 7.179506994970972e-06, "loss": 0.3085, "step": 8303 }, { "epoch": 0.3758316361167685, "grad_norm": 0.656435666847941, "learning_rate": 7.178847346725119e-06, "loss": 0.3716, "step": 8304 }, { "epoch": 0.37587689522516404, "grad_norm": 0.6424257873205942, "learning_rate": 7.178187651662527e-06, "loss": 0.3585, "step": 8305 }, { "epoch": 0.37592215433355963, "grad_norm": 0.6525988216561274, "learning_rate": 7.177527909797373e-06, "loss": 0.4092, "step": 8306 }, { "epoch": 0.37596741344195517, "grad_norm": 0.679056074665069, "learning_rate": 7.176868121143831e-06, "loss": 0.3302, "step": 8307 }, { "epoch": 0.37601267255035076, "grad_norm": 0.6405564739339455, "learning_rate": 7.176208285716079e-06, "loss": 0.3311, "step": 8308 }, { "epoch": 0.37605793165874635, "grad_norm": 0.7141348763998681, "learning_rate": 7.175548403528295e-06, "loss": 0.399, "step": 8309 }, { "epoch": 0.3761031907671419, "grad_norm": 0.7062128340100978, "learning_rate": 7.174888474594659e-06, "loss": 0.3938, "step": 8310 }, { "epoch": 0.37614844987553747, "grad_norm": 0.6758179480303619, "learning_rate": 7.174228498929347e-06, "loss": 0.3702, "step": 8311 }, { "epoch": 0.376193708983933, "grad_norm": 0.5505683467660143, "learning_rate": 7.1735684765465444e-06, "loss": 0.4848, "step": 8312 }, { "epoch": 0.3762389680923286, "grad_norm": 0.45578075324415335, "learning_rate": 7.172908407460429e-06, "loss": 0.4858, "step": 8313 }, { "epoch": 0.37628422720072413, "grad_norm": 0.7382639843039384, "learning_rate": 7.172248291685187e-06, "loss": 0.316, "step": 8314 }, { "epoch": 0.3763294863091197, "grad_norm": 0.8101613462731673, "learning_rate": 7.171588129234999e-06, "loss": 0.3486, "step": 8315 }, { "epoch": 0.37637474541751526, "grad_norm": 0.6506764101308028, "learning_rate": 7.170927920124052e-06, "loss": 0.3294, "step": 8316 }, { "epoch": 0.37642000452591085, "grad_norm": 0.5881378075645671, "learning_rate": 7.1702676643665325e-06, "loss": 0.3211, "step": 8317 }, { "epoch": 0.3764652636343064, "grad_norm": 0.6630934221929646, "learning_rate": 7.169607361976627e-06, "loss": 0.3051, "step": 8318 }, { "epoch": 0.37651052274270197, "grad_norm": 0.6771901923424336, "learning_rate": 7.16894701296852e-06, "loss": 0.3842, "step": 8319 }, { "epoch": 0.37655578185109756, "grad_norm": 0.6014323203917278, "learning_rate": 7.168286617356406e-06, "loss": 0.3436, "step": 8320 }, { "epoch": 0.3766010409594931, "grad_norm": 0.6818623234030072, "learning_rate": 7.1676261751544696e-06, "loss": 0.3776, "step": 8321 }, { "epoch": 0.3766463000678887, "grad_norm": 0.6291103487682812, "learning_rate": 7.1669656863769055e-06, "loss": 0.3312, "step": 8322 }, { "epoch": 0.3766915591762842, "grad_norm": 0.5881389565856336, "learning_rate": 7.166305151037905e-06, "loss": 0.3592, "step": 8323 }, { "epoch": 0.3767368182846798, "grad_norm": 0.6840081359541433, "learning_rate": 7.165644569151658e-06, "loss": 0.3734, "step": 8324 }, { "epoch": 0.37678207739307534, "grad_norm": 0.6440224446417455, "learning_rate": 7.1649839407323606e-06, "loss": 0.3562, "step": 8325 }, { "epoch": 0.37682733650147093, "grad_norm": 0.6498735609301556, "learning_rate": 7.164323265794209e-06, "loss": 0.363, "step": 8326 }, { "epoch": 0.37687259560986647, "grad_norm": 0.641845120340163, "learning_rate": 7.163662544351396e-06, "loss": 0.3671, "step": 8327 }, { "epoch": 0.37691785471826206, "grad_norm": 0.6982652323012907, "learning_rate": 7.163001776418121e-06, "loss": 0.3521, "step": 8328 }, { "epoch": 0.3769631138266576, "grad_norm": 0.6243317055112345, "learning_rate": 7.162340962008581e-06, "loss": 0.3521, "step": 8329 }, { "epoch": 0.3770083729350532, "grad_norm": 0.6649935914439344, "learning_rate": 7.1616801011369755e-06, "loss": 0.3369, "step": 8330 }, { "epoch": 0.3770536320434487, "grad_norm": 0.6427306330278062, "learning_rate": 7.161019193817503e-06, "loss": 0.3221, "step": 8331 }, { "epoch": 0.3770988911518443, "grad_norm": 0.6021879131102966, "learning_rate": 7.1603582400643646e-06, "loss": 0.3212, "step": 8332 }, { "epoch": 0.3771441502602399, "grad_norm": 1.1681307049239376, "learning_rate": 7.159697239891764e-06, "loss": 0.4842, "step": 8333 }, { "epoch": 0.37718940936863543, "grad_norm": 0.7385597893203592, "learning_rate": 7.159036193313902e-06, "loss": 0.3408, "step": 8334 }, { "epoch": 0.377234668477031, "grad_norm": 0.6559267542328838, "learning_rate": 7.158375100344983e-06, "loss": 0.3593, "step": 8335 }, { "epoch": 0.37727992758542656, "grad_norm": 0.6265308322487304, "learning_rate": 7.157713960999212e-06, "loss": 0.3517, "step": 8336 }, { "epoch": 0.37732518669382215, "grad_norm": 0.6300808220892394, "learning_rate": 7.157052775290795e-06, "loss": 0.3205, "step": 8337 }, { "epoch": 0.3773704458022177, "grad_norm": 0.7314504650540607, "learning_rate": 7.156391543233938e-06, "loss": 0.3674, "step": 8338 }, { "epoch": 0.37741570491061327, "grad_norm": 0.625857506921556, "learning_rate": 7.155730264842852e-06, "loss": 0.3975, "step": 8339 }, { "epoch": 0.3774609640190088, "grad_norm": 0.6983912978077121, "learning_rate": 7.155068940131741e-06, "loss": 0.3646, "step": 8340 }, { "epoch": 0.3775062231274044, "grad_norm": 0.6330685645971086, "learning_rate": 7.154407569114818e-06, "loss": 0.3419, "step": 8341 }, { "epoch": 0.37755148223579993, "grad_norm": 0.6612187254544507, "learning_rate": 7.153746151806293e-06, "loss": 0.4922, "step": 8342 }, { "epoch": 0.3775967413441955, "grad_norm": 0.616178122020675, "learning_rate": 7.153084688220379e-06, "loss": 0.3582, "step": 8343 }, { "epoch": 0.3776420004525911, "grad_norm": 0.5975510050770201, "learning_rate": 7.152423178371286e-06, "loss": 0.343, "step": 8344 }, { "epoch": 0.37768725956098664, "grad_norm": 0.48499907097437206, "learning_rate": 7.15176162227323e-06, "loss": 0.4942, "step": 8345 }, { "epoch": 0.37773251866938223, "grad_norm": 0.6269278584054762, "learning_rate": 7.151100019940427e-06, "loss": 0.3072, "step": 8346 }, { "epoch": 0.37777777777777777, "grad_norm": 1.0530027938874988, "learning_rate": 7.1504383713870895e-06, "loss": 0.3565, "step": 8347 }, { "epoch": 0.37782303688617336, "grad_norm": 0.6296408615092338, "learning_rate": 7.149776676627436e-06, "loss": 0.3581, "step": 8348 }, { "epoch": 0.3778682959945689, "grad_norm": 0.38352804422665704, "learning_rate": 7.149114935675685e-06, "loss": 0.4654, "step": 8349 }, { "epoch": 0.3779135551029645, "grad_norm": 0.3512682890162911, "learning_rate": 7.148453148546055e-06, "loss": 0.4577, "step": 8350 }, { "epoch": 0.37795881421136, "grad_norm": 0.6552021817776984, "learning_rate": 7.1477913152527635e-06, "loss": 0.3564, "step": 8351 }, { "epoch": 0.3780040733197556, "grad_norm": 0.6176790702307078, "learning_rate": 7.1471294358100344e-06, "loss": 0.3423, "step": 8352 }, { "epoch": 0.37804933242815114, "grad_norm": 0.765039383704981, "learning_rate": 7.146467510232088e-06, "loss": 0.3545, "step": 8353 }, { "epoch": 0.37809459153654673, "grad_norm": 0.6006632425414946, "learning_rate": 7.145805538533146e-06, "loss": 0.3008, "step": 8354 }, { "epoch": 0.3781398506449423, "grad_norm": 0.6770490817071343, "learning_rate": 7.145143520727434e-06, "loss": 0.3485, "step": 8355 }, { "epoch": 0.37818510975333786, "grad_norm": 0.7613669358884392, "learning_rate": 7.144481456829178e-06, "loss": 0.3462, "step": 8356 }, { "epoch": 0.37823036886173345, "grad_norm": 0.6814959868650572, "learning_rate": 7.1438193468525986e-06, "loss": 0.3768, "step": 8357 }, { "epoch": 0.378275627970129, "grad_norm": 0.5110195752010532, "learning_rate": 7.143157190811927e-06, "loss": 0.4857, "step": 8358 }, { "epoch": 0.37832088707852457, "grad_norm": 0.6838577856291563, "learning_rate": 7.14249498872139e-06, "loss": 0.3829, "step": 8359 }, { "epoch": 0.3783661461869201, "grad_norm": 0.746673282701903, "learning_rate": 7.141832740595217e-06, "loss": 0.3886, "step": 8360 }, { "epoch": 0.3784114052953157, "grad_norm": 0.4042609860628919, "learning_rate": 7.141170446447634e-06, "loss": 0.4868, "step": 8361 }, { "epoch": 0.37845666440371123, "grad_norm": 0.6604856741415678, "learning_rate": 7.140508106292876e-06, "loss": 0.3452, "step": 8362 }, { "epoch": 0.3785019235121068, "grad_norm": 0.6152179001971164, "learning_rate": 7.139845720145172e-06, "loss": 0.3489, "step": 8363 }, { "epoch": 0.37854718262050235, "grad_norm": 0.6226880833935763, "learning_rate": 7.139183288018756e-06, "loss": 0.3523, "step": 8364 }, { "epoch": 0.37859244172889794, "grad_norm": 0.747242633729006, "learning_rate": 7.13852080992786e-06, "loss": 0.324, "step": 8365 }, { "epoch": 0.3786377008372935, "grad_norm": 0.6172830431009755, "learning_rate": 7.137858285886721e-06, "loss": 0.3626, "step": 8366 }, { "epoch": 0.37868295994568907, "grad_norm": 0.6480631555125602, "learning_rate": 7.137195715909573e-06, "loss": 0.4062, "step": 8367 }, { "epoch": 0.37872821905408466, "grad_norm": 0.6270206468414993, "learning_rate": 7.136533100010654e-06, "loss": 0.3615, "step": 8368 }, { "epoch": 0.3787734781624802, "grad_norm": 0.5960545309428391, "learning_rate": 7.135870438204198e-06, "loss": 0.3309, "step": 8369 }, { "epoch": 0.3788187372708758, "grad_norm": 0.6132416090139773, "learning_rate": 7.1352077305044485e-06, "loss": 0.368, "step": 8370 }, { "epoch": 0.3788639963792713, "grad_norm": 0.5923204264613755, "learning_rate": 7.1345449769256416e-06, "loss": 0.3448, "step": 8371 }, { "epoch": 0.3789092554876669, "grad_norm": 0.522084772545119, "learning_rate": 7.133882177482019e-06, "loss": 0.4879, "step": 8372 }, { "epoch": 0.37895451459606244, "grad_norm": 0.4472915138713644, "learning_rate": 7.133219332187823e-06, "loss": 0.4873, "step": 8373 }, { "epoch": 0.37899977370445803, "grad_norm": 0.5607250276819523, "learning_rate": 7.132556441057294e-06, "loss": 0.2654, "step": 8374 }, { "epoch": 0.37904503281285357, "grad_norm": 0.6821875143652115, "learning_rate": 7.131893504104677e-06, "loss": 0.3619, "step": 8375 }, { "epoch": 0.37909029192124916, "grad_norm": 0.6041315305261291, "learning_rate": 7.131230521344217e-06, "loss": 0.353, "step": 8376 }, { "epoch": 0.3791355510296447, "grad_norm": 0.6264230011816776, "learning_rate": 7.130567492790157e-06, "loss": 0.3891, "step": 8377 }, { "epoch": 0.3791808101380403, "grad_norm": 0.6333311023261349, "learning_rate": 7.129904418456745e-06, "loss": 0.3123, "step": 8378 }, { "epoch": 0.37922606924643587, "grad_norm": 0.699733653598983, "learning_rate": 7.129241298358231e-06, "loss": 0.3426, "step": 8379 }, { "epoch": 0.3792713283548314, "grad_norm": 0.6604529388686061, "learning_rate": 7.128578132508859e-06, "loss": 0.3427, "step": 8380 }, { "epoch": 0.379316587463227, "grad_norm": 0.6022601095193064, "learning_rate": 7.127914920922883e-06, "loss": 0.3482, "step": 8381 }, { "epoch": 0.37936184657162253, "grad_norm": 0.7434609203247454, "learning_rate": 7.127251663614547e-06, "loss": 0.3629, "step": 8382 }, { "epoch": 0.3794071056800181, "grad_norm": 0.6220340421846623, "learning_rate": 7.126588360598109e-06, "loss": 0.3559, "step": 8383 }, { "epoch": 0.37945236478841365, "grad_norm": 0.6740928356810203, "learning_rate": 7.125925011887818e-06, "loss": 0.335, "step": 8384 }, { "epoch": 0.37949762389680924, "grad_norm": 0.7475687257974962, "learning_rate": 7.125261617497926e-06, "loss": 0.3686, "step": 8385 }, { "epoch": 0.3795428830052048, "grad_norm": 0.8291350729963307, "learning_rate": 7.12459817744269e-06, "loss": 0.4895, "step": 8386 }, { "epoch": 0.37958814211360037, "grad_norm": 0.632240955498442, "learning_rate": 7.123934691736365e-06, "loss": 0.3739, "step": 8387 }, { "epoch": 0.3796334012219959, "grad_norm": 0.6842845487643655, "learning_rate": 7.123271160393206e-06, "loss": 0.3869, "step": 8388 }, { "epoch": 0.3796786603303915, "grad_norm": 0.6127478816563668, "learning_rate": 7.122607583427472e-06, "loss": 0.3238, "step": 8389 }, { "epoch": 0.3797239194387871, "grad_norm": 0.6564050899226402, "learning_rate": 7.121943960853418e-06, "loss": 0.3773, "step": 8390 }, { "epoch": 0.3797691785471826, "grad_norm": 0.6262183560049303, "learning_rate": 7.121280292685307e-06, "loss": 0.3562, "step": 8391 }, { "epoch": 0.3798144376555782, "grad_norm": 0.636568518471423, "learning_rate": 7.120616578937397e-06, "loss": 0.3793, "step": 8392 }, { "epoch": 0.37985969676397374, "grad_norm": 0.6503707596443266, "learning_rate": 7.1199528196239495e-06, "loss": 0.3582, "step": 8393 }, { "epoch": 0.37990495587236933, "grad_norm": 0.648733837093838, "learning_rate": 7.119289014759228e-06, "loss": 0.3641, "step": 8394 }, { "epoch": 0.37995021498076487, "grad_norm": 0.6033527463965486, "learning_rate": 7.118625164357493e-06, "loss": 0.3614, "step": 8395 }, { "epoch": 0.37999547408916046, "grad_norm": 0.6515459622908713, "learning_rate": 7.117961268433012e-06, "loss": 0.387, "step": 8396 }, { "epoch": 0.380040733197556, "grad_norm": 0.4871667020278753, "learning_rate": 7.117297327000046e-06, "loss": 0.4947, "step": 8397 }, { "epoch": 0.3800859923059516, "grad_norm": 0.6267015745529267, "learning_rate": 7.116633340072863e-06, "loss": 0.3506, "step": 8398 }, { "epoch": 0.3801312514143471, "grad_norm": 0.6194526502875497, "learning_rate": 7.115969307665733e-06, "loss": 0.3351, "step": 8399 }, { "epoch": 0.3801765105227427, "grad_norm": 0.6102449278498785, "learning_rate": 7.115305229792918e-06, "loss": 0.3439, "step": 8400 }, { "epoch": 0.38022176963113824, "grad_norm": 0.7654057374127621, "learning_rate": 7.114641106468692e-06, "loss": 0.3679, "step": 8401 }, { "epoch": 0.38026702873953383, "grad_norm": 0.6773434947716912, "learning_rate": 7.113976937707324e-06, "loss": 0.3582, "step": 8402 }, { "epoch": 0.3803122878479294, "grad_norm": 0.31799243148880396, "learning_rate": 7.1133127235230825e-06, "loss": 0.4777, "step": 8403 }, { "epoch": 0.38035754695632495, "grad_norm": 0.6289377071621696, "learning_rate": 7.1126484639302425e-06, "loss": 0.3615, "step": 8404 }, { "epoch": 0.38040280606472054, "grad_norm": 0.6278500233355753, "learning_rate": 7.111984158943075e-06, "loss": 0.3255, "step": 8405 }, { "epoch": 0.3804480651731161, "grad_norm": 0.6312913692634605, "learning_rate": 7.1113198085758535e-06, "loss": 0.3665, "step": 8406 }, { "epoch": 0.38049332428151167, "grad_norm": 0.6265823361581598, "learning_rate": 7.110655412842855e-06, "loss": 0.3658, "step": 8407 }, { "epoch": 0.3805385833899072, "grad_norm": 0.6374687650436551, "learning_rate": 7.109990971758354e-06, "loss": 0.3447, "step": 8408 }, { "epoch": 0.3805838424983028, "grad_norm": 0.6293191892231478, "learning_rate": 7.109326485336626e-06, "loss": 0.337, "step": 8409 }, { "epoch": 0.38062910160669833, "grad_norm": 0.7138126845674831, "learning_rate": 7.108661953591953e-06, "loss": 0.3398, "step": 8410 }, { "epoch": 0.3806743607150939, "grad_norm": 0.7703350926583172, "learning_rate": 7.107997376538606e-06, "loss": 0.3442, "step": 8411 }, { "epoch": 0.38071961982348945, "grad_norm": 0.658339137124922, "learning_rate": 7.107332754190874e-06, "loss": 0.3825, "step": 8412 }, { "epoch": 0.38076487893188504, "grad_norm": 0.6314047841513415, "learning_rate": 7.1066680865630335e-06, "loss": 0.4002, "step": 8413 }, { "epoch": 0.38081013804028063, "grad_norm": 0.6477014404457618, "learning_rate": 7.106003373669363e-06, "loss": 0.3591, "step": 8414 }, { "epoch": 0.38085539714867617, "grad_norm": 0.6041802367550553, "learning_rate": 7.10533861552415e-06, "loss": 0.33, "step": 8415 }, { "epoch": 0.38090065625707176, "grad_norm": 0.6430900504898354, "learning_rate": 7.104673812141676e-06, "loss": 0.3893, "step": 8416 }, { "epoch": 0.3809459153654673, "grad_norm": 0.6314476922290938, "learning_rate": 7.104008963536224e-06, "loss": 0.3551, "step": 8417 }, { "epoch": 0.3809911744738629, "grad_norm": 0.7428431343765651, "learning_rate": 7.1033440697220845e-06, "loss": 0.3344, "step": 8418 }, { "epoch": 0.3810364335822584, "grad_norm": 0.6049014863415836, "learning_rate": 7.102679130713538e-06, "loss": 0.354, "step": 8419 }, { "epoch": 0.381081692690654, "grad_norm": 0.6490441683398328, "learning_rate": 7.102014146524877e-06, "loss": 0.3793, "step": 8420 }, { "epoch": 0.38112695179904954, "grad_norm": 0.6654902860757159, "learning_rate": 7.101349117170386e-06, "loss": 0.3522, "step": 8421 }, { "epoch": 0.38117221090744513, "grad_norm": 0.6555822112102216, "learning_rate": 7.1006840426643576e-06, "loss": 0.3493, "step": 8422 }, { "epoch": 0.38121747001584066, "grad_norm": 0.6365048153101841, "learning_rate": 7.10001892302108e-06, "loss": 0.3384, "step": 8423 }, { "epoch": 0.38126272912423625, "grad_norm": 0.6887085895296732, "learning_rate": 7.099353758254846e-06, "loss": 0.3476, "step": 8424 }, { "epoch": 0.38130798823263184, "grad_norm": 0.6080229886754357, "learning_rate": 7.0986885483799475e-06, "loss": 0.3124, "step": 8425 }, { "epoch": 0.3813532473410274, "grad_norm": 0.647329971750059, "learning_rate": 7.098023293410677e-06, "loss": 0.3001, "step": 8426 }, { "epoch": 0.38139850644942297, "grad_norm": 0.6508641604197013, "learning_rate": 7.09735799336133e-06, "loss": 0.3491, "step": 8427 }, { "epoch": 0.3814437655578185, "grad_norm": 0.6191855403182719, "learning_rate": 7.096692648246203e-06, "loss": 0.327, "step": 8428 }, { "epoch": 0.3814890246662141, "grad_norm": 1.0524445853788542, "learning_rate": 7.096027258079587e-06, "loss": 0.3401, "step": 8429 }, { "epoch": 0.38153428377460963, "grad_norm": 0.6245006137232687, "learning_rate": 7.095361822875786e-06, "loss": 0.3223, "step": 8430 }, { "epoch": 0.3815795428830052, "grad_norm": 0.6383255613126531, "learning_rate": 7.094696342649092e-06, "loss": 0.3883, "step": 8431 }, { "epoch": 0.38162480199140075, "grad_norm": 0.6874734938881214, "learning_rate": 7.094030817413808e-06, "loss": 0.3414, "step": 8432 }, { "epoch": 0.38167006109979634, "grad_norm": 0.5845736916154386, "learning_rate": 7.093365247184234e-06, "loss": 0.3289, "step": 8433 }, { "epoch": 0.3817153202081919, "grad_norm": 0.5131578573043148, "learning_rate": 7.09269963197467e-06, "loss": 0.4684, "step": 8434 }, { "epoch": 0.38176057931658747, "grad_norm": 0.6426976817130162, "learning_rate": 7.092033971799417e-06, "loss": 0.4068, "step": 8435 }, { "epoch": 0.381805838424983, "grad_norm": 0.3146884550883189, "learning_rate": 7.09136826667278e-06, "loss": 0.4816, "step": 8436 }, { "epoch": 0.3818510975333786, "grad_norm": 0.8773610470531031, "learning_rate": 7.0907025166090615e-06, "loss": 0.3525, "step": 8437 }, { "epoch": 0.3818963566417742, "grad_norm": 0.6356233415643519, "learning_rate": 7.090036721622567e-06, "loss": 0.3579, "step": 8438 }, { "epoch": 0.3819416157501697, "grad_norm": 0.7375475936847615, "learning_rate": 7.089370881727604e-06, "loss": 0.3432, "step": 8439 }, { "epoch": 0.3819868748585653, "grad_norm": 0.6364433797110283, "learning_rate": 7.0887049969384756e-06, "loss": 0.3353, "step": 8440 }, { "epoch": 0.38203213396696084, "grad_norm": 0.5537782812413454, "learning_rate": 7.088039067269493e-06, "loss": 0.2949, "step": 8441 }, { "epoch": 0.38207739307535643, "grad_norm": 0.6125051256367956, "learning_rate": 7.087373092734964e-06, "loss": 0.355, "step": 8442 }, { "epoch": 0.38212265218375197, "grad_norm": 0.5627303676401337, "learning_rate": 7.086707073349197e-06, "loss": 0.4963, "step": 8443 }, { "epoch": 0.38216791129214756, "grad_norm": 0.6606337857741633, "learning_rate": 7.086041009126504e-06, "loss": 0.3288, "step": 8444 }, { "epoch": 0.3822131704005431, "grad_norm": 0.6542767650447026, "learning_rate": 7.0853749000811965e-06, "loss": 0.317, "step": 8445 }, { "epoch": 0.3822584295089387, "grad_norm": 0.40963340986258956, "learning_rate": 7.084708746227589e-06, "loss": 0.4834, "step": 8446 }, { "epoch": 0.3823036886173342, "grad_norm": 0.5833579676406702, "learning_rate": 7.084042547579992e-06, "loss": 0.3572, "step": 8447 }, { "epoch": 0.3823489477257298, "grad_norm": 0.5936420943321252, "learning_rate": 7.08337630415272e-06, "loss": 0.3346, "step": 8448 }, { "epoch": 0.3823942068341254, "grad_norm": 0.6331350642510168, "learning_rate": 7.082710015960091e-06, "loss": 0.36, "step": 8449 }, { "epoch": 0.38243946594252093, "grad_norm": 0.6776388853726919, "learning_rate": 7.08204368301642e-06, "loss": 0.3742, "step": 8450 }, { "epoch": 0.3824847250509165, "grad_norm": 0.6536913237318658, "learning_rate": 7.081377305336025e-06, "loss": 0.3375, "step": 8451 }, { "epoch": 0.38252998415931205, "grad_norm": 0.6744660832075964, "learning_rate": 7.080710882933225e-06, "loss": 0.3457, "step": 8452 }, { "epoch": 0.38257524326770764, "grad_norm": 0.620253089835594, "learning_rate": 7.080044415822337e-06, "loss": 0.3678, "step": 8453 }, { "epoch": 0.3826205023761032, "grad_norm": 0.6134172858330584, "learning_rate": 7.079377904017683e-06, "loss": 0.3096, "step": 8454 }, { "epoch": 0.38266576148449877, "grad_norm": 0.6199238081498524, "learning_rate": 7.078711347533585e-06, "loss": 0.3486, "step": 8455 }, { "epoch": 0.3827110205928943, "grad_norm": 0.6391699024069745, "learning_rate": 7.078044746384365e-06, "loss": 0.3599, "step": 8456 }, { "epoch": 0.3827562797012899, "grad_norm": 0.6803022292367258, "learning_rate": 7.077378100584344e-06, "loss": 0.3583, "step": 8457 }, { "epoch": 0.3828015388096854, "grad_norm": 0.7097666551654821, "learning_rate": 7.076711410147849e-06, "loss": 0.5134, "step": 8458 }, { "epoch": 0.382846797918081, "grad_norm": 0.6725298205711432, "learning_rate": 7.076044675089203e-06, "loss": 0.3769, "step": 8459 }, { "epoch": 0.38289205702647655, "grad_norm": 0.7210712947175153, "learning_rate": 7.075377895422735e-06, "loss": 0.3512, "step": 8460 }, { "epoch": 0.38293731613487214, "grad_norm": 0.5975178708495634, "learning_rate": 7.074711071162768e-06, "loss": 0.3757, "step": 8461 }, { "epoch": 0.38298257524326773, "grad_norm": 0.5915727400449279, "learning_rate": 7.074044202323632e-06, "loss": 0.3438, "step": 8462 }, { "epoch": 0.38302783435166327, "grad_norm": 0.6853253841017686, "learning_rate": 7.073377288919657e-06, "loss": 0.3433, "step": 8463 }, { "epoch": 0.38307309346005886, "grad_norm": 2.0075612522721986, "learning_rate": 7.072710330965171e-06, "loss": 0.3868, "step": 8464 }, { "epoch": 0.3831183525684544, "grad_norm": 0.649754434745739, "learning_rate": 7.072043328474507e-06, "loss": 0.3257, "step": 8465 }, { "epoch": 0.38316361167685, "grad_norm": 0.5866531935730779, "learning_rate": 7.071376281461994e-06, "loss": 0.3428, "step": 8466 }, { "epoch": 0.3832088707852455, "grad_norm": 0.6168824482768775, "learning_rate": 7.0707091899419685e-06, "loss": 0.3747, "step": 8467 }, { "epoch": 0.3832541298936411, "grad_norm": 0.6362663525465235, "learning_rate": 7.070042053928763e-06, "loss": 0.3468, "step": 8468 }, { "epoch": 0.38329938900203664, "grad_norm": 0.6595243836995929, "learning_rate": 7.0693748734367076e-06, "loss": 0.3614, "step": 8469 }, { "epoch": 0.38334464811043223, "grad_norm": 0.6817648534737756, "learning_rate": 7.068707648480145e-06, "loss": 0.3327, "step": 8470 }, { "epoch": 0.38338990721882776, "grad_norm": 0.6195886664641986, "learning_rate": 7.068040379073406e-06, "loss": 0.3607, "step": 8471 }, { "epoch": 0.38343516632722335, "grad_norm": 0.6799828912841049, "learning_rate": 7.067373065230834e-06, "loss": 0.3758, "step": 8472 }, { "epoch": 0.38348042543561894, "grad_norm": 0.6652330351884505, "learning_rate": 7.0667057069667625e-06, "loss": 0.3508, "step": 8473 }, { "epoch": 0.3835256845440145, "grad_norm": 0.704107025489184, "learning_rate": 7.066038304295533e-06, "loss": 0.3389, "step": 8474 }, { "epoch": 0.38357094365241007, "grad_norm": 0.5711102874307746, "learning_rate": 7.065370857231484e-06, "loss": 0.3261, "step": 8475 }, { "epoch": 0.3836162027608056, "grad_norm": 0.7373487417259906, "learning_rate": 7.064703365788961e-06, "loss": 0.372, "step": 8476 }, { "epoch": 0.3836614618692012, "grad_norm": 0.616650070917948, "learning_rate": 7.064035829982302e-06, "loss": 0.2955, "step": 8477 }, { "epoch": 0.3837067209775967, "grad_norm": 1.5530480359895966, "learning_rate": 7.063368249825855e-06, "loss": 0.3735, "step": 8478 }, { "epoch": 0.3837519800859923, "grad_norm": 0.5846369271162104, "learning_rate": 7.062700625333958e-06, "loss": 0.4856, "step": 8479 }, { "epoch": 0.38379723919438785, "grad_norm": 0.5626364168272818, "learning_rate": 7.0620329565209625e-06, "loss": 0.4911, "step": 8480 }, { "epoch": 0.38384249830278344, "grad_norm": 0.6561269380631167, "learning_rate": 7.06136524340121e-06, "loss": 0.3438, "step": 8481 }, { "epoch": 0.383887757411179, "grad_norm": 0.7824785353303435, "learning_rate": 7.06069748598905e-06, "loss": 0.3875, "step": 8482 }, { "epoch": 0.38393301651957457, "grad_norm": 0.6290979318250164, "learning_rate": 7.0600296842988305e-06, "loss": 0.3339, "step": 8483 }, { "epoch": 0.38397827562797016, "grad_norm": 0.6420260562777924, "learning_rate": 7.0593618383448995e-06, "loss": 0.3701, "step": 8484 }, { "epoch": 0.3840235347363657, "grad_norm": 0.6896202946580808, "learning_rate": 7.0586939481416065e-06, "loss": 0.3389, "step": 8485 }, { "epoch": 0.3840687938447613, "grad_norm": 0.6372493711656572, "learning_rate": 7.058026013703304e-06, "loss": 0.3515, "step": 8486 }, { "epoch": 0.3841140529531568, "grad_norm": 0.7460403703740545, "learning_rate": 7.057358035044344e-06, "loss": 0.4759, "step": 8487 }, { "epoch": 0.3841593120615524, "grad_norm": 0.6708392710946648, "learning_rate": 7.0566900121790775e-06, "loss": 0.3474, "step": 8488 }, { "epoch": 0.38420457116994794, "grad_norm": 0.6826571122233077, "learning_rate": 7.05602194512186e-06, "loss": 0.3688, "step": 8489 }, { "epoch": 0.38424983027834353, "grad_norm": 0.6349015181977148, "learning_rate": 7.055353833887045e-06, "loss": 0.3285, "step": 8490 }, { "epoch": 0.38429508938673906, "grad_norm": 0.6544983112078551, "learning_rate": 7.054685678488991e-06, "loss": 0.3661, "step": 8491 }, { "epoch": 0.38434034849513465, "grad_norm": 0.6218008745167531, "learning_rate": 7.054017478942048e-06, "loss": 0.3361, "step": 8492 }, { "epoch": 0.3843856076035302, "grad_norm": 0.6182357777520696, "learning_rate": 7.05334923526058e-06, "loss": 0.3383, "step": 8493 }, { "epoch": 0.3844308667119258, "grad_norm": 0.37265872321485444, "learning_rate": 7.052680947458944e-06, "loss": 0.4587, "step": 8494 }, { "epoch": 0.3844761258203213, "grad_norm": 0.7152783685098152, "learning_rate": 7.052012615551498e-06, "loss": 0.3411, "step": 8495 }, { "epoch": 0.3845213849287169, "grad_norm": 0.6712838532770757, "learning_rate": 7.051344239552603e-06, "loss": 0.3328, "step": 8496 }, { "epoch": 0.3845666440371125, "grad_norm": 0.6847543279933849, "learning_rate": 7.050675819476623e-06, "loss": 0.3337, "step": 8497 }, { "epoch": 0.384611903145508, "grad_norm": 0.6356767753615933, "learning_rate": 7.0500073553379136e-06, "loss": 0.3469, "step": 8498 }, { "epoch": 0.3846571622539036, "grad_norm": 0.6343150294252268, "learning_rate": 7.049338847150845e-06, "loss": 0.364, "step": 8499 }, { "epoch": 0.38470242136229915, "grad_norm": 0.6221401451155776, "learning_rate": 7.048670294929777e-06, "loss": 0.3565, "step": 8500 }, { "epoch": 0.38474768047069474, "grad_norm": 0.38006128196027994, "learning_rate": 7.0480016986890775e-06, "loss": 0.4921, "step": 8501 }, { "epoch": 0.3847929395790903, "grad_norm": 0.6303947949330196, "learning_rate": 7.047333058443111e-06, "loss": 0.3808, "step": 8502 }, { "epoch": 0.38483819868748587, "grad_norm": 0.3335813258860991, "learning_rate": 7.046664374206246e-06, "loss": 0.4623, "step": 8503 }, { "epoch": 0.3848834577958814, "grad_norm": 0.6438951118125865, "learning_rate": 7.045995645992848e-06, "loss": 0.3664, "step": 8504 }, { "epoch": 0.384928716904277, "grad_norm": 1.1040704150941836, "learning_rate": 7.045326873817289e-06, "loss": 0.3756, "step": 8505 }, { "epoch": 0.3849739760126725, "grad_norm": 0.7376731985285496, "learning_rate": 7.0446580576939346e-06, "loss": 0.3627, "step": 8506 }, { "epoch": 0.3850192351210681, "grad_norm": 0.31769986927945976, "learning_rate": 7.043989197637161e-06, "loss": 0.5044, "step": 8507 }, { "epoch": 0.3850644942294637, "grad_norm": 0.6709946462444102, "learning_rate": 7.043320293661335e-06, "loss": 0.3845, "step": 8508 }, { "epoch": 0.38510975333785924, "grad_norm": 0.6800794170963768, "learning_rate": 7.0426513457808334e-06, "loss": 0.3091, "step": 8509 }, { "epoch": 0.38515501244625483, "grad_norm": 0.6348048129635555, "learning_rate": 7.041982354010026e-06, "loss": 0.3342, "step": 8510 }, { "epoch": 0.38520027155465036, "grad_norm": 0.3253351493544958, "learning_rate": 7.041313318363291e-06, "loss": 0.4947, "step": 8511 }, { "epoch": 0.38524553066304595, "grad_norm": 0.6502405066779691, "learning_rate": 7.0406442388550016e-06, "loss": 0.3766, "step": 8512 }, { "epoch": 0.3852907897714415, "grad_norm": 0.7237918307112422, "learning_rate": 7.039975115499534e-06, "loss": 0.3657, "step": 8513 }, { "epoch": 0.3853360488798371, "grad_norm": 0.2887575351537154, "learning_rate": 7.039305948311268e-06, "loss": 0.5045, "step": 8514 }, { "epoch": 0.3853813079882326, "grad_norm": 0.2819195233385298, "learning_rate": 7.038636737304578e-06, "loss": 0.4686, "step": 8515 }, { "epoch": 0.3854265670966282, "grad_norm": 0.6932569182724204, "learning_rate": 7.037967482493848e-06, "loss": 0.2888, "step": 8516 }, { "epoch": 0.38547182620502374, "grad_norm": 0.6763109238832349, "learning_rate": 7.037298183893455e-06, "loss": 0.3678, "step": 8517 }, { "epoch": 0.3855170853134193, "grad_norm": 0.6190293228159512, "learning_rate": 7.036628841517783e-06, "loss": 0.3418, "step": 8518 }, { "epoch": 0.3855623444218149, "grad_norm": 0.6782949993433552, "learning_rate": 7.03595945538121e-06, "loss": 0.2971, "step": 8519 }, { "epoch": 0.38560760353021045, "grad_norm": 0.73709177330135, "learning_rate": 7.035290025498121e-06, "loss": 0.3748, "step": 8520 }, { "epoch": 0.38565286263860604, "grad_norm": 0.34987588010434106, "learning_rate": 7.0346205518829015e-06, "loss": 0.5048, "step": 8521 }, { "epoch": 0.3856981217470016, "grad_norm": 0.6244611959658506, "learning_rate": 7.033951034549935e-06, "loss": 0.3662, "step": 8522 }, { "epoch": 0.38574338085539717, "grad_norm": 0.30869497121445927, "learning_rate": 7.033281473513608e-06, "loss": 0.4954, "step": 8523 }, { "epoch": 0.3857886399637927, "grad_norm": 0.6578803599813655, "learning_rate": 7.032611868788306e-06, "loss": 0.3213, "step": 8524 }, { "epoch": 0.3858338990721883, "grad_norm": 0.6887830147701085, "learning_rate": 7.031942220388418e-06, "loss": 0.3556, "step": 8525 }, { "epoch": 0.3858791581805838, "grad_norm": 0.6383950468224833, "learning_rate": 7.031272528328332e-06, "loss": 0.3319, "step": 8526 }, { "epoch": 0.3859244172889794, "grad_norm": 0.6276590559652488, "learning_rate": 7.030602792622439e-06, "loss": 0.3399, "step": 8527 }, { "epoch": 0.38596967639737495, "grad_norm": 0.6297693881364205, "learning_rate": 7.029933013285127e-06, "loss": 0.3898, "step": 8528 }, { "epoch": 0.38601493550577054, "grad_norm": 0.6446800167295498, "learning_rate": 7.0292631903307895e-06, "loss": 0.3021, "step": 8529 }, { "epoch": 0.3860601946141661, "grad_norm": 0.6276385632566854, "learning_rate": 7.028593323773819e-06, "loss": 0.3825, "step": 8530 }, { "epoch": 0.38610545372256166, "grad_norm": 0.8475822563749026, "learning_rate": 7.027923413628608e-06, "loss": 0.3399, "step": 8531 }, { "epoch": 0.38615071283095725, "grad_norm": 0.5886263362806824, "learning_rate": 7.027253459909551e-06, "loss": 0.3282, "step": 8532 }, { "epoch": 0.3861959719393528, "grad_norm": 0.5911191875338679, "learning_rate": 7.026583462631044e-06, "loss": 0.3683, "step": 8533 }, { "epoch": 0.3862412310477484, "grad_norm": 0.5994274018430233, "learning_rate": 7.025913421807482e-06, "loss": 0.3613, "step": 8534 }, { "epoch": 0.3862864901561439, "grad_norm": 0.6328861964249055, "learning_rate": 7.025243337453263e-06, "loss": 0.296, "step": 8535 }, { "epoch": 0.3863317492645395, "grad_norm": 0.7749002298447163, "learning_rate": 7.024573209582783e-06, "loss": 0.3296, "step": 8536 }, { "epoch": 0.38637700837293504, "grad_norm": 0.6125646428390575, "learning_rate": 7.0239030382104445e-06, "loss": 0.3091, "step": 8537 }, { "epoch": 0.38642226748133063, "grad_norm": 0.6023846423567603, "learning_rate": 7.023232823350646e-06, "loss": 0.345, "step": 8538 }, { "epoch": 0.38646752658972616, "grad_norm": 0.6524052798822022, "learning_rate": 7.022562565017788e-06, "loss": 0.351, "step": 8539 }, { "epoch": 0.38651278569812175, "grad_norm": 0.6046955830979769, "learning_rate": 7.021892263226271e-06, "loss": 0.3365, "step": 8540 }, { "epoch": 0.3865580448065173, "grad_norm": 0.46937747458957113, "learning_rate": 7.0212219179904996e-06, "loss": 0.5064, "step": 8541 }, { "epoch": 0.3866033039149129, "grad_norm": 0.583346339177339, "learning_rate": 7.020551529324877e-06, "loss": 0.3157, "step": 8542 }, { "epoch": 0.38664856302330847, "grad_norm": 0.637042671091487, "learning_rate": 7.019881097243808e-06, "loss": 0.3187, "step": 8543 }, { "epoch": 0.386693822131704, "grad_norm": 0.6792643299062936, "learning_rate": 7.019210621761698e-06, "loss": 0.3934, "step": 8544 }, { "epoch": 0.3867390812400996, "grad_norm": 0.6082188060902372, "learning_rate": 7.018540102892952e-06, "loss": 0.3355, "step": 8545 }, { "epoch": 0.3867843403484951, "grad_norm": 0.6264804930140524, "learning_rate": 7.017869540651979e-06, "loss": 0.3702, "step": 8546 }, { "epoch": 0.3868295994568907, "grad_norm": 0.2948388318861362, "learning_rate": 7.017198935053189e-06, "loss": 0.4738, "step": 8547 }, { "epoch": 0.38687485856528625, "grad_norm": 0.6400740222345551, "learning_rate": 7.016528286110986e-06, "loss": 0.3663, "step": 8548 }, { "epoch": 0.38692011767368184, "grad_norm": 0.6173682417693278, "learning_rate": 7.0158575938397856e-06, "loss": 0.3538, "step": 8549 }, { "epoch": 0.3869653767820774, "grad_norm": 0.6417881388009231, "learning_rate": 7.015186858253995e-06, "loss": 0.377, "step": 8550 }, { "epoch": 0.38701063589047296, "grad_norm": 0.7749490309522027, "learning_rate": 7.01451607936803e-06, "loss": 0.3569, "step": 8551 }, { "epoch": 0.3870558949988685, "grad_norm": 0.6523873906515983, "learning_rate": 7.013845257196301e-06, "loss": 0.3697, "step": 8552 }, { "epoch": 0.3871011541072641, "grad_norm": 0.6536056364088692, "learning_rate": 7.013174391753222e-06, "loss": 0.3814, "step": 8553 }, { "epoch": 0.3871464132156596, "grad_norm": 0.6198002088630558, "learning_rate": 7.012503483053209e-06, "loss": 0.3477, "step": 8554 }, { "epoch": 0.3871916723240552, "grad_norm": 0.6534747588713392, "learning_rate": 7.0118325311106774e-06, "loss": 0.3823, "step": 8555 }, { "epoch": 0.3872369314324508, "grad_norm": 0.5953562968043516, "learning_rate": 7.011161535940042e-06, "loss": 0.3591, "step": 8556 }, { "epoch": 0.38728219054084634, "grad_norm": 0.7085087214260861, "learning_rate": 7.0104904975557245e-06, "loss": 0.4196, "step": 8557 }, { "epoch": 0.38732744964924193, "grad_norm": 0.6173288924149105, "learning_rate": 7.009819415972136e-06, "loss": 0.3636, "step": 8558 }, { "epoch": 0.38737270875763746, "grad_norm": 0.39121363517546165, "learning_rate": 7.009148291203707e-06, "loss": 0.4874, "step": 8559 }, { "epoch": 0.38741796786603305, "grad_norm": 0.35145612881645605, "learning_rate": 7.008477123264849e-06, "loss": 0.4868, "step": 8560 }, { "epoch": 0.3874632269744286, "grad_norm": 0.6610092433572923, "learning_rate": 7.007805912169985e-06, "loss": 0.3342, "step": 8561 }, { "epoch": 0.3875084860828242, "grad_norm": 0.2939613276299066, "learning_rate": 7.00713465793354e-06, "loss": 0.4758, "step": 8562 }, { "epoch": 0.3875537451912197, "grad_norm": 0.7630658549598067, "learning_rate": 7.006463360569935e-06, "loss": 0.3631, "step": 8563 }, { "epoch": 0.3875990042996153, "grad_norm": 0.6235937228967168, "learning_rate": 7.005792020093596e-06, "loss": 0.343, "step": 8564 }, { "epoch": 0.38764426340801084, "grad_norm": 0.5722417308890443, "learning_rate": 7.005120636518945e-06, "loss": 0.2882, "step": 8565 }, { "epoch": 0.3876895225164064, "grad_norm": 0.7099392150866518, "learning_rate": 7.004449209860411e-06, "loss": 0.3704, "step": 8566 }, { "epoch": 0.387734781624802, "grad_norm": 0.6039098044963439, "learning_rate": 7.003777740132419e-06, "loss": 0.3677, "step": 8567 }, { "epoch": 0.38778004073319755, "grad_norm": 0.4846309482443427, "learning_rate": 7.003106227349399e-06, "loss": 0.4731, "step": 8568 }, { "epoch": 0.38782529984159314, "grad_norm": 0.6206185445623791, "learning_rate": 7.002434671525776e-06, "loss": 0.316, "step": 8569 }, { "epoch": 0.3878705589499887, "grad_norm": 0.6252646740494667, "learning_rate": 7.001763072675984e-06, "loss": 0.3469, "step": 8570 }, { "epoch": 0.38791581805838427, "grad_norm": 0.5977188730549259, "learning_rate": 7.0010914308144495e-06, "loss": 0.3514, "step": 8571 }, { "epoch": 0.3879610771667798, "grad_norm": 0.7207959710427151, "learning_rate": 7.000419745955608e-06, "loss": 0.3466, "step": 8572 }, { "epoch": 0.3880063362751754, "grad_norm": 0.3444553025472076, "learning_rate": 6.999748018113889e-06, "loss": 0.518, "step": 8573 }, { "epoch": 0.3880515953835709, "grad_norm": 0.5920163103863829, "learning_rate": 6.999076247303727e-06, "loss": 0.3185, "step": 8574 }, { "epoch": 0.3880968544919665, "grad_norm": 0.6407433001736258, "learning_rate": 6.998404433539556e-06, "loss": 0.3491, "step": 8575 }, { "epoch": 0.38814211360036205, "grad_norm": 0.6195459294965006, "learning_rate": 6.997732576835812e-06, "loss": 0.3723, "step": 8576 }, { "epoch": 0.38818737270875764, "grad_norm": 0.29890055951620087, "learning_rate": 6.997060677206928e-06, "loss": 0.494, "step": 8577 }, { "epoch": 0.38823263181715323, "grad_norm": 0.6119850633665948, "learning_rate": 6.996388734667347e-06, "loss": 0.3426, "step": 8578 }, { "epoch": 0.38827789092554876, "grad_norm": 0.7260265378995115, "learning_rate": 6.995716749231501e-06, "loss": 0.3454, "step": 8579 }, { "epoch": 0.38832315003394435, "grad_norm": 0.5750007638619133, "learning_rate": 6.995044720913834e-06, "loss": 0.3197, "step": 8580 }, { "epoch": 0.3883684091423399, "grad_norm": 0.3534386399426898, "learning_rate": 6.994372649728781e-06, "loss": 0.5086, "step": 8581 }, { "epoch": 0.3884136682507355, "grad_norm": 0.7465663173943224, "learning_rate": 6.993700535690786e-06, "loss": 0.377, "step": 8582 }, { "epoch": 0.388458927359131, "grad_norm": 0.6635549051184639, "learning_rate": 6.993028378814288e-06, "loss": 0.3151, "step": 8583 }, { "epoch": 0.3885041864675266, "grad_norm": 0.6300073254729769, "learning_rate": 6.992356179113735e-06, "loss": 0.367, "step": 8584 }, { "epoch": 0.38854944557592214, "grad_norm": 0.6398951088006489, "learning_rate": 6.991683936603562e-06, "loss": 0.3958, "step": 8585 }, { "epoch": 0.3885947046843177, "grad_norm": 0.6160243481480446, "learning_rate": 6.991011651298223e-06, "loss": 0.376, "step": 8586 }, { "epoch": 0.38863996379271326, "grad_norm": 0.6365641272355594, "learning_rate": 6.990339323212154e-06, "loss": 0.2957, "step": 8587 }, { "epoch": 0.38868522290110885, "grad_norm": 0.640848693473412, "learning_rate": 6.989666952359809e-06, "loss": 0.3649, "step": 8588 }, { "epoch": 0.3887304820095044, "grad_norm": 0.7008769461942187, "learning_rate": 6.988994538755631e-06, "loss": 0.3437, "step": 8589 }, { "epoch": 0.3887757411179, "grad_norm": 0.601194932850971, "learning_rate": 6.988322082414069e-06, "loss": 0.3158, "step": 8590 }, { "epoch": 0.38882100022629557, "grad_norm": 0.6295313129212234, "learning_rate": 6.987649583349572e-06, "loss": 0.3819, "step": 8591 }, { "epoch": 0.3888662593346911, "grad_norm": 0.674097830312063, "learning_rate": 6.98697704157659e-06, "loss": 0.3393, "step": 8592 }, { "epoch": 0.3889115184430867, "grad_norm": 0.6002185493604161, "learning_rate": 6.986304457109574e-06, "loss": 0.3284, "step": 8593 }, { "epoch": 0.3889567775514822, "grad_norm": 0.669798338747915, "learning_rate": 6.9856318299629755e-06, "loss": 0.337, "step": 8594 }, { "epoch": 0.3890020366598778, "grad_norm": 0.6334498651193496, "learning_rate": 6.984959160151248e-06, "loss": 0.3352, "step": 8595 }, { "epoch": 0.38904729576827335, "grad_norm": 0.610674143561136, "learning_rate": 6.984286447688844e-06, "loss": 0.3368, "step": 8596 }, { "epoch": 0.38909255487666894, "grad_norm": 0.34689978420777917, "learning_rate": 6.983613692590219e-06, "loss": 0.4979, "step": 8597 }, { "epoch": 0.3891378139850645, "grad_norm": 0.6095161885854828, "learning_rate": 6.9829408948698274e-06, "loss": 0.3903, "step": 8598 }, { "epoch": 0.38918307309346006, "grad_norm": 0.3022837257767364, "learning_rate": 6.982268054542127e-06, "loss": 0.4885, "step": 8599 }, { "epoch": 0.3892283322018556, "grad_norm": 0.6723165210046169, "learning_rate": 6.981595171621572e-06, "loss": 0.344, "step": 8600 }, { "epoch": 0.3892735913102512, "grad_norm": 0.7296296760079936, "learning_rate": 6.980922246122626e-06, "loss": 0.367, "step": 8601 }, { "epoch": 0.3893188504186468, "grad_norm": 0.6500560720045101, "learning_rate": 6.980249278059742e-06, "loss": 0.337, "step": 8602 }, { "epoch": 0.3893641095270423, "grad_norm": 0.6182250761658985, "learning_rate": 6.979576267447385e-06, "loss": 0.3465, "step": 8603 }, { "epoch": 0.3894093686354379, "grad_norm": 0.6968747034851669, "learning_rate": 6.9789032143000125e-06, "loss": 0.3402, "step": 8604 }, { "epoch": 0.38945462774383344, "grad_norm": 0.6643017799130763, "learning_rate": 6.978230118632088e-06, "loss": 0.3708, "step": 8605 }, { "epoch": 0.389499886852229, "grad_norm": 0.6227026827677045, "learning_rate": 6.977556980458073e-06, "loss": 0.3538, "step": 8606 }, { "epoch": 0.38954514596062456, "grad_norm": 0.6742507578789183, "learning_rate": 6.976883799792434e-06, "loss": 0.3822, "step": 8607 }, { "epoch": 0.38959040506902015, "grad_norm": 0.6814209280655347, "learning_rate": 6.9762105766496315e-06, "loss": 0.3246, "step": 8608 }, { "epoch": 0.3896356641774157, "grad_norm": 0.647736370344086, "learning_rate": 6.975537311044136e-06, "loss": 0.334, "step": 8609 }, { "epoch": 0.3896809232858113, "grad_norm": 0.6228079659433321, "learning_rate": 6.974864002990409e-06, "loss": 0.3683, "step": 8610 }, { "epoch": 0.3897261823942068, "grad_norm": 0.6380735975515563, "learning_rate": 6.97419065250292e-06, "loss": 0.3641, "step": 8611 }, { "epoch": 0.3897714415026024, "grad_norm": 1.1646045621050385, "learning_rate": 6.973517259596138e-06, "loss": 0.3522, "step": 8612 }, { "epoch": 0.389816700610998, "grad_norm": 0.5993642665906557, "learning_rate": 6.9728438242845295e-06, "loss": 0.3384, "step": 8613 }, { "epoch": 0.3898619597193935, "grad_norm": 0.4615567787347811, "learning_rate": 6.972170346582568e-06, "loss": 0.4898, "step": 8614 }, { "epoch": 0.3899072188277891, "grad_norm": 0.6679530107934812, "learning_rate": 6.9714968265047234e-06, "loss": 0.3456, "step": 8615 }, { "epoch": 0.38995247793618465, "grad_norm": 0.6438760491238555, "learning_rate": 6.9708232640654646e-06, "loss": 0.3786, "step": 8616 }, { "epoch": 0.38999773704458024, "grad_norm": 0.6502734654050656, "learning_rate": 6.9701496592792695e-06, "loss": 0.3107, "step": 8617 }, { "epoch": 0.3900429961529758, "grad_norm": 0.655581936435849, "learning_rate": 6.969476012160607e-06, "loss": 0.3594, "step": 8618 }, { "epoch": 0.39008825526137136, "grad_norm": 0.5840458237076638, "learning_rate": 6.9688023227239555e-06, "loss": 0.3011, "step": 8619 }, { "epoch": 0.3901335143697669, "grad_norm": 0.3520744592844673, "learning_rate": 6.968128590983787e-06, "loss": 0.4994, "step": 8620 }, { "epoch": 0.3901787734781625, "grad_norm": 0.6239584092422918, "learning_rate": 6.967454816954581e-06, "loss": 0.3088, "step": 8621 }, { "epoch": 0.390224032586558, "grad_norm": 0.5966980508763927, "learning_rate": 6.966781000650813e-06, "loss": 0.3539, "step": 8622 }, { "epoch": 0.3902692916949536, "grad_norm": 0.6395866873179475, "learning_rate": 6.966107142086962e-06, "loss": 0.3503, "step": 8623 }, { "epoch": 0.39031455080334915, "grad_norm": 0.9190596121602028, "learning_rate": 6.965433241277506e-06, "loss": 0.4116, "step": 8624 }, { "epoch": 0.39035980991174474, "grad_norm": 0.6402288106541163, "learning_rate": 6.964759298236927e-06, "loss": 0.3258, "step": 8625 }, { "epoch": 0.3904050690201403, "grad_norm": 0.6504348325632919, "learning_rate": 6.964085312979706e-06, "loss": 0.3668, "step": 8626 }, { "epoch": 0.39045032812853586, "grad_norm": 0.6216577096624377, "learning_rate": 6.963411285520322e-06, "loss": 0.312, "step": 8627 }, { "epoch": 0.39049558723693145, "grad_norm": 0.6559684801547805, "learning_rate": 6.962737215873261e-06, "loss": 0.3615, "step": 8628 }, { "epoch": 0.390540846345327, "grad_norm": 0.6719182999512758, "learning_rate": 6.962063104053003e-06, "loss": 0.3883, "step": 8629 }, { "epoch": 0.3905861054537226, "grad_norm": 0.6345286639652378, "learning_rate": 6.961388950074038e-06, "loss": 0.3304, "step": 8630 }, { "epoch": 0.3906313645621181, "grad_norm": 0.7763447214288505, "learning_rate": 6.960714753950847e-06, "loss": 0.3543, "step": 8631 }, { "epoch": 0.3906766236705137, "grad_norm": 0.6207165825510894, "learning_rate": 6.960040515697918e-06, "loss": 0.3274, "step": 8632 }, { "epoch": 0.39072188277890924, "grad_norm": 0.6863537745023689, "learning_rate": 6.9593662353297375e-06, "loss": 0.3376, "step": 8633 }, { "epoch": 0.3907671418873048, "grad_norm": 0.6378501990610004, "learning_rate": 6.958691912860794e-06, "loss": 0.3066, "step": 8634 }, { "epoch": 0.39081240099570036, "grad_norm": 0.618393802299301, "learning_rate": 6.958017548305578e-06, "loss": 0.3535, "step": 8635 }, { "epoch": 0.39085766010409595, "grad_norm": 0.6521097916175539, "learning_rate": 6.95734314167858e-06, "loss": 0.3512, "step": 8636 }, { "epoch": 0.39090291921249154, "grad_norm": 0.35284015932868484, "learning_rate": 6.956668692994286e-06, "loss": 0.4716, "step": 8637 }, { "epoch": 0.3909481783208871, "grad_norm": 0.6396524024828847, "learning_rate": 6.955994202267193e-06, "loss": 0.3446, "step": 8638 }, { "epoch": 0.39099343742928266, "grad_norm": 0.6206237459503144, "learning_rate": 6.955319669511793e-06, "loss": 0.321, "step": 8639 }, { "epoch": 0.3910386965376782, "grad_norm": 0.29714977861285985, "learning_rate": 6.954645094742577e-06, "loss": 0.4887, "step": 8640 }, { "epoch": 0.3910839556460738, "grad_norm": 0.6168756752989212, "learning_rate": 6.9539704779740415e-06, "loss": 0.3238, "step": 8641 }, { "epoch": 0.3911292147544693, "grad_norm": 0.593369640280946, "learning_rate": 6.953295819220681e-06, "loss": 0.3272, "step": 8642 }, { "epoch": 0.3911744738628649, "grad_norm": 0.287094237240238, "learning_rate": 6.952621118496994e-06, "loss": 0.4801, "step": 8643 }, { "epoch": 0.39121973297126045, "grad_norm": 0.6246467794851839, "learning_rate": 6.9519463758174745e-06, "loss": 0.3513, "step": 8644 }, { "epoch": 0.39126499207965604, "grad_norm": 0.6384595000969184, "learning_rate": 6.951271591196623e-06, "loss": 0.3522, "step": 8645 }, { "epoch": 0.39131025118805157, "grad_norm": 0.6290461760291466, "learning_rate": 6.950596764648938e-06, "loss": 0.3511, "step": 8646 }, { "epoch": 0.39135551029644716, "grad_norm": 0.6447982036114861, "learning_rate": 6.9499218961889205e-06, "loss": 0.3602, "step": 8647 }, { "epoch": 0.39140076940484275, "grad_norm": 0.6259492232560252, "learning_rate": 6.949246985831069e-06, "loss": 0.3224, "step": 8648 }, { "epoch": 0.3914460285132383, "grad_norm": 0.644176305316592, "learning_rate": 6.948572033589887e-06, "loss": 0.2964, "step": 8649 }, { "epoch": 0.3914912876216339, "grad_norm": 0.6377018378945989, "learning_rate": 6.9478970394798755e-06, "loss": 0.2948, "step": 8650 }, { "epoch": 0.3915365467300294, "grad_norm": 0.34507467046136586, "learning_rate": 6.9472220035155394e-06, "loss": 0.5053, "step": 8651 }, { "epoch": 0.391581805838425, "grad_norm": 0.32562521184348303, "learning_rate": 6.9465469257113825e-06, "loss": 0.4863, "step": 8652 }, { "epoch": 0.39162706494682054, "grad_norm": 0.7729315088269995, "learning_rate": 6.945871806081911e-06, "loss": 0.3197, "step": 8653 }, { "epoch": 0.3916723240552161, "grad_norm": 0.6479846752585515, "learning_rate": 6.945196644641631e-06, "loss": 0.3031, "step": 8654 }, { "epoch": 0.39171758316361166, "grad_norm": 0.6411110092285045, "learning_rate": 6.944521441405049e-06, "loss": 0.3529, "step": 8655 }, { "epoch": 0.39176284227200725, "grad_norm": 0.7137929377143929, "learning_rate": 6.943846196386673e-06, "loss": 0.3444, "step": 8656 }, { "epoch": 0.3918081013804028, "grad_norm": 0.6422030826957724, "learning_rate": 6.943170909601013e-06, "loss": 0.3802, "step": 8657 }, { "epoch": 0.3918533604887984, "grad_norm": 0.6520353535812056, "learning_rate": 6.942495581062578e-06, "loss": 0.3379, "step": 8658 }, { "epoch": 0.3918986195971939, "grad_norm": 0.6514855160159058, "learning_rate": 6.94182021078588e-06, "loss": 0.3506, "step": 8659 }, { "epoch": 0.3919438787055895, "grad_norm": 0.6215040971747843, "learning_rate": 6.941144798785429e-06, "loss": 0.3467, "step": 8660 }, { "epoch": 0.3919891378139851, "grad_norm": 0.6040186673839965, "learning_rate": 6.9404693450757366e-06, "loss": 0.3085, "step": 8661 }, { "epoch": 0.3920343969223806, "grad_norm": 0.6247800042275801, "learning_rate": 6.939793849671318e-06, "loss": 0.3384, "step": 8662 }, { "epoch": 0.3920796560307762, "grad_norm": 0.6908979460847018, "learning_rate": 6.939118312586688e-06, "loss": 0.3421, "step": 8663 }, { "epoch": 0.39212491513917175, "grad_norm": 0.610891920990332, "learning_rate": 6.938442733836361e-06, "loss": 0.3246, "step": 8664 }, { "epoch": 0.39217017424756734, "grad_norm": 0.6193622434028262, "learning_rate": 6.9377671134348535e-06, "loss": 0.317, "step": 8665 }, { "epoch": 0.3922154333559629, "grad_norm": 0.6122163861863653, "learning_rate": 6.93709145139668e-06, "loss": 0.3232, "step": 8666 }, { "epoch": 0.39226069246435846, "grad_norm": 0.6713130434464446, "learning_rate": 6.936415747736363e-06, "loss": 0.273, "step": 8667 }, { "epoch": 0.392305951572754, "grad_norm": 0.5804329365473563, "learning_rate": 6.935740002468417e-06, "loss": 0.3356, "step": 8668 }, { "epoch": 0.3923512106811496, "grad_norm": 0.6704525479055023, "learning_rate": 6.935064215607364e-06, "loss": 0.3619, "step": 8669 }, { "epoch": 0.3923964697895451, "grad_norm": 0.5214643061521009, "learning_rate": 6.934388387167726e-06, "loss": 0.4794, "step": 8670 }, { "epoch": 0.3924417288979407, "grad_norm": 0.7548003104336839, "learning_rate": 6.933712517164019e-06, "loss": 0.3655, "step": 8671 }, { "epoch": 0.3924869880063363, "grad_norm": 0.6111333579007964, "learning_rate": 6.933036605610773e-06, "loss": 0.2902, "step": 8672 }, { "epoch": 0.39253224711473184, "grad_norm": 0.6062223905435875, "learning_rate": 6.932360652522504e-06, "loss": 0.3162, "step": 8673 }, { "epoch": 0.3925775062231274, "grad_norm": 0.6587197514595092, "learning_rate": 6.93168465791374e-06, "loss": 0.3598, "step": 8674 }, { "epoch": 0.39262276533152296, "grad_norm": 0.6525325821139949, "learning_rate": 6.931008621799007e-06, "loss": 0.3691, "step": 8675 }, { "epoch": 0.39266802443991855, "grad_norm": 0.6237709662578704, "learning_rate": 6.930332544192829e-06, "loss": 0.3762, "step": 8676 }, { "epoch": 0.3927132835483141, "grad_norm": 0.6245083049472832, "learning_rate": 6.929656425109731e-06, "loss": 0.3514, "step": 8677 }, { "epoch": 0.3927585426567097, "grad_norm": 0.6767809573399547, "learning_rate": 6.9289802645642455e-06, "loss": 0.3382, "step": 8678 }, { "epoch": 0.3928038017651052, "grad_norm": 0.41616801093824357, "learning_rate": 6.928304062570897e-06, "loss": 0.4813, "step": 8679 }, { "epoch": 0.3928490608735008, "grad_norm": 0.7707353902927744, "learning_rate": 6.927627819144217e-06, "loss": 0.3297, "step": 8680 }, { "epoch": 0.39289431998189633, "grad_norm": 0.6385764521466519, "learning_rate": 6.926951534298736e-06, "loss": 0.3728, "step": 8681 }, { "epoch": 0.3929395790902919, "grad_norm": 0.6644952441279728, "learning_rate": 6.926275208048984e-06, "loss": 0.3528, "step": 8682 }, { "epoch": 0.39298483819868746, "grad_norm": 0.6429728839288491, "learning_rate": 6.925598840409493e-06, "loss": 0.3732, "step": 8683 }, { "epoch": 0.39303009730708305, "grad_norm": 0.6111124585554257, "learning_rate": 6.924922431394798e-06, "loss": 0.349, "step": 8684 }, { "epoch": 0.39307535641547864, "grad_norm": 0.2904134706836162, "learning_rate": 6.924245981019432e-06, "loss": 0.4543, "step": 8685 }, { "epoch": 0.3931206155238742, "grad_norm": 0.6445296325024885, "learning_rate": 6.92356948929793e-06, "loss": 0.365, "step": 8686 }, { "epoch": 0.39316587463226976, "grad_norm": 0.622289117555611, "learning_rate": 6.922892956244827e-06, "loss": 0.3587, "step": 8687 }, { "epoch": 0.3932111337406653, "grad_norm": 0.2850696554299266, "learning_rate": 6.92221638187466e-06, "loss": 0.5007, "step": 8688 }, { "epoch": 0.3932563928490609, "grad_norm": 0.6122601998647502, "learning_rate": 6.921539766201967e-06, "loss": 0.3236, "step": 8689 }, { "epoch": 0.3933016519574564, "grad_norm": 0.709577675183887, "learning_rate": 6.920863109241285e-06, "loss": 0.3563, "step": 8690 }, { "epoch": 0.393346911065852, "grad_norm": 0.34339414934632356, "learning_rate": 6.920186411007155e-06, "loss": 0.4917, "step": 8691 }, { "epoch": 0.39339217017424755, "grad_norm": 0.7456381059814225, "learning_rate": 6.919509671514116e-06, "loss": 0.3629, "step": 8692 }, { "epoch": 0.39343742928264314, "grad_norm": 0.6745920091564857, "learning_rate": 6.91883289077671e-06, "loss": 0.3314, "step": 8693 }, { "epoch": 0.39348268839103867, "grad_norm": 0.6477926625245785, "learning_rate": 6.918156068809479e-06, "loss": 0.384, "step": 8694 }, { "epoch": 0.39352794749943426, "grad_norm": 0.2827842762719348, "learning_rate": 6.917479205626965e-06, "loss": 0.4691, "step": 8695 }, { "epoch": 0.39357320660782985, "grad_norm": 0.2858251259802762, "learning_rate": 6.916802301243711e-06, "loss": 0.4701, "step": 8696 }, { "epoch": 0.3936184657162254, "grad_norm": 0.6481176264184711, "learning_rate": 6.916125355674264e-06, "loss": 0.3364, "step": 8697 }, { "epoch": 0.393663724824621, "grad_norm": 0.6563203748286447, "learning_rate": 6.915448368933166e-06, "loss": 0.3543, "step": 8698 }, { "epoch": 0.3937089839330165, "grad_norm": 0.6412880506103583, "learning_rate": 6.914771341034967e-06, "loss": 0.3515, "step": 8699 }, { "epoch": 0.3937542430414121, "grad_norm": 0.29259987008716926, "learning_rate": 6.914094271994211e-06, "loss": 0.4955, "step": 8700 }, { "epoch": 0.39379950214980763, "grad_norm": 0.6285536609147793, "learning_rate": 6.913417161825449e-06, "loss": 0.3612, "step": 8701 }, { "epoch": 0.3938447612582032, "grad_norm": 0.2906222931727884, "learning_rate": 6.912740010543229e-06, "loss": 0.4956, "step": 8702 }, { "epoch": 0.39389002036659876, "grad_norm": 0.7385466443098395, "learning_rate": 6.912062818162101e-06, "loss": 0.3475, "step": 8703 }, { "epoch": 0.39393527947499435, "grad_norm": 0.2980880050919162, "learning_rate": 6.911385584696615e-06, "loss": 0.4687, "step": 8704 }, { "epoch": 0.3939805385833899, "grad_norm": 0.3011430205086542, "learning_rate": 6.910708310161323e-06, "loss": 0.4903, "step": 8705 }, { "epoch": 0.3940257976917855, "grad_norm": 0.6786333339665385, "learning_rate": 6.910030994570778e-06, "loss": 0.3609, "step": 8706 }, { "epoch": 0.39407105680018106, "grad_norm": 0.6605605300378226, "learning_rate": 6.909353637939533e-06, "loss": 0.3728, "step": 8707 }, { "epoch": 0.3941163159085766, "grad_norm": 0.3013435270774591, "learning_rate": 6.908676240282141e-06, "loss": 0.498, "step": 8708 }, { "epoch": 0.3941615750169722, "grad_norm": 0.6301341462442973, "learning_rate": 6.907998801613162e-06, "loss": 0.3521, "step": 8709 }, { "epoch": 0.3942068341253677, "grad_norm": 0.28087605899378787, "learning_rate": 6.907321321947146e-06, "loss": 0.4472, "step": 8710 }, { "epoch": 0.3942520932337633, "grad_norm": 0.7817288977807025, "learning_rate": 6.906643801298654e-06, "loss": 0.3586, "step": 8711 }, { "epoch": 0.39429735234215885, "grad_norm": 1.1246420663438081, "learning_rate": 6.9059662396822415e-06, "loss": 0.3506, "step": 8712 }, { "epoch": 0.39434261145055444, "grad_norm": 0.6710933590669182, "learning_rate": 6.905288637112468e-06, "loss": 0.3839, "step": 8713 }, { "epoch": 0.39438787055894997, "grad_norm": 0.7314548660479658, "learning_rate": 6.904610993603894e-06, "loss": 0.3217, "step": 8714 }, { "epoch": 0.39443312966734556, "grad_norm": 0.3041015740060619, "learning_rate": 6.90393330917108e-06, "loss": 0.497, "step": 8715 }, { "epoch": 0.3944783887757411, "grad_norm": 0.6279941312234719, "learning_rate": 6.903255583828585e-06, "loss": 0.3684, "step": 8716 }, { "epoch": 0.3945236478841367, "grad_norm": 0.7164162219907889, "learning_rate": 6.902577817590975e-06, "loss": 0.3792, "step": 8717 }, { "epoch": 0.3945689069925322, "grad_norm": 0.6107098021250627, "learning_rate": 6.901900010472811e-06, "loss": 0.3403, "step": 8718 }, { "epoch": 0.3946141661009278, "grad_norm": 0.5809393957416973, "learning_rate": 6.901222162488655e-06, "loss": 0.35, "step": 8719 }, { "epoch": 0.3946594252093234, "grad_norm": 0.6329370924311692, "learning_rate": 6.9005442736530745e-06, "loss": 0.3235, "step": 8720 }, { "epoch": 0.39470468431771893, "grad_norm": 0.6260093623120035, "learning_rate": 6.899866343980635e-06, "loss": 0.3836, "step": 8721 }, { "epoch": 0.3947499434261145, "grad_norm": 0.6883826862977819, "learning_rate": 6.899188373485903e-06, "loss": 0.3625, "step": 8722 }, { "epoch": 0.39479520253451006, "grad_norm": 0.5849484026828342, "learning_rate": 6.8985103621834455e-06, "loss": 0.3133, "step": 8723 }, { "epoch": 0.39484046164290565, "grad_norm": 0.623484541700811, "learning_rate": 6.8978323100878305e-06, "loss": 0.3496, "step": 8724 }, { "epoch": 0.3948857207513012, "grad_norm": 0.5962752422609053, "learning_rate": 6.897154217213629e-06, "loss": 0.3508, "step": 8725 }, { "epoch": 0.3949309798596968, "grad_norm": 0.6707375328358476, "learning_rate": 6.8964760835754095e-06, "loss": 0.3668, "step": 8726 }, { "epoch": 0.3949762389680923, "grad_norm": 0.32723708194093143, "learning_rate": 6.895797909187745e-06, "loss": 0.5035, "step": 8727 }, { "epoch": 0.3950214980764879, "grad_norm": 0.5904918431761856, "learning_rate": 6.8951196940652045e-06, "loss": 0.3286, "step": 8728 }, { "epoch": 0.39506675718488343, "grad_norm": 0.6513250106633817, "learning_rate": 6.894441438222362e-06, "loss": 0.3425, "step": 8729 }, { "epoch": 0.395112016293279, "grad_norm": 0.7455422173357793, "learning_rate": 6.89376314167379e-06, "loss": 0.3482, "step": 8730 }, { "epoch": 0.3951572754016746, "grad_norm": 0.6774642915248136, "learning_rate": 6.893084804434067e-06, "loss": 0.3727, "step": 8731 }, { "epoch": 0.39520253451007015, "grad_norm": 0.6096825832240897, "learning_rate": 6.892406426517764e-06, "loss": 0.3227, "step": 8732 }, { "epoch": 0.39524779361846574, "grad_norm": 0.666446310795665, "learning_rate": 6.8917280079394596e-06, "loss": 0.3622, "step": 8733 }, { "epoch": 0.39529305272686127, "grad_norm": 0.6059322710118061, "learning_rate": 6.891049548713731e-06, "loss": 0.3496, "step": 8734 }, { "epoch": 0.39533831183525686, "grad_norm": 0.3275420726550203, "learning_rate": 6.8903710488551544e-06, "loss": 0.4777, "step": 8735 }, { "epoch": 0.3953835709436524, "grad_norm": 0.6373966786981582, "learning_rate": 6.889692508378312e-06, "loss": 0.382, "step": 8736 }, { "epoch": 0.395428830052048, "grad_norm": 0.657915152352585, "learning_rate": 6.889013927297778e-06, "loss": 0.3628, "step": 8737 }, { "epoch": 0.3954740891604435, "grad_norm": 0.43447516800031616, "learning_rate": 6.888335305628138e-06, "loss": 0.4972, "step": 8738 }, { "epoch": 0.3955193482688391, "grad_norm": 0.294283044657983, "learning_rate": 6.887656643383972e-06, "loss": 0.4785, "step": 8739 }, { "epoch": 0.39556460737723464, "grad_norm": 0.701929657079196, "learning_rate": 6.886977940579862e-06, "loss": 0.3414, "step": 8740 }, { "epoch": 0.39560986648563023, "grad_norm": 0.6942881894781736, "learning_rate": 6.886299197230391e-06, "loss": 0.3436, "step": 8741 }, { "epoch": 0.3956551255940258, "grad_norm": 0.5906622336773468, "learning_rate": 6.885620413350145e-06, "loss": 0.3261, "step": 8742 }, { "epoch": 0.39570038470242136, "grad_norm": 0.732763832599632, "learning_rate": 6.884941588953706e-06, "loss": 0.3508, "step": 8743 }, { "epoch": 0.39574564381081695, "grad_norm": 0.7552325739031256, "learning_rate": 6.884262724055663e-06, "loss": 0.3474, "step": 8744 }, { "epoch": 0.3957909029192125, "grad_norm": 0.6542164691291017, "learning_rate": 6.8835838186705985e-06, "loss": 0.3221, "step": 8745 }, { "epoch": 0.3958361620276081, "grad_norm": 0.6364917758168952, "learning_rate": 6.8829048728131056e-06, "loss": 0.3626, "step": 8746 }, { "epoch": 0.3958814211360036, "grad_norm": 0.6307626027957765, "learning_rate": 6.882225886497768e-06, "loss": 0.3339, "step": 8747 }, { "epoch": 0.3959266802443992, "grad_norm": 0.6846426846072403, "learning_rate": 6.8815468597391785e-06, "loss": 0.3451, "step": 8748 }, { "epoch": 0.39597193935279473, "grad_norm": 0.6193145573313631, "learning_rate": 6.880867792551924e-06, "loss": 0.3539, "step": 8749 }, { "epoch": 0.3960171984611903, "grad_norm": 0.6411114657727086, "learning_rate": 6.880188684950599e-06, "loss": 0.3727, "step": 8750 }, { "epoch": 0.39606245756958586, "grad_norm": 0.676463974354279, "learning_rate": 6.879509536949792e-06, "loss": 0.3605, "step": 8751 }, { "epoch": 0.39610771667798145, "grad_norm": 0.6642196025191914, "learning_rate": 6.878830348564098e-06, "loss": 0.3631, "step": 8752 }, { "epoch": 0.396152975786377, "grad_norm": 0.6307656132854766, "learning_rate": 6.878151119808111e-06, "loss": 0.3443, "step": 8753 }, { "epoch": 0.39619823489477257, "grad_norm": 0.6247080085850564, "learning_rate": 6.8774718506964245e-06, "loss": 0.3222, "step": 8754 }, { "epoch": 0.39624349400316816, "grad_norm": 0.9383560582951295, "learning_rate": 6.876792541243633e-06, "loss": 0.3564, "step": 8755 }, { "epoch": 0.3962887531115637, "grad_norm": 0.6281135643962057, "learning_rate": 6.876113191464336e-06, "loss": 0.364, "step": 8756 }, { "epoch": 0.3963340122199593, "grad_norm": 0.7782318894620474, "learning_rate": 6.875433801373128e-06, "loss": 0.289, "step": 8757 }, { "epoch": 0.3963792713283548, "grad_norm": 0.4686477527301178, "learning_rate": 6.8747543709846064e-06, "loss": 0.4773, "step": 8758 }, { "epoch": 0.3964245304367504, "grad_norm": 0.5931616218567658, "learning_rate": 6.8740749003133725e-06, "loss": 0.3568, "step": 8759 }, { "epoch": 0.39646978954514595, "grad_norm": 0.6620622851903066, "learning_rate": 6.873395389374024e-06, "loss": 0.3537, "step": 8760 }, { "epoch": 0.39651504865354154, "grad_norm": 0.6560883533650138, "learning_rate": 6.872715838181161e-06, "loss": 0.3823, "step": 8761 }, { "epoch": 0.39656030776193707, "grad_norm": 0.6712679049437771, "learning_rate": 6.872036246749387e-06, "loss": 0.3656, "step": 8762 }, { "epoch": 0.39660556687033266, "grad_norm": 0.576014190977509, "learning_rate": 6.871356615093306e-06, "loss": 0.3273, "step": 8763 }, { "epoch": 0.3966508259787282, "grad_norm": 0.6717922727634008, "learning_rate": 6.870676943227516e-06, "loss": 0.3706, "step": 8764 }, { "epoch": 0.3966960850871238, "grad_norm": 0.40196289686369996, "learning_rate": 6.869997231166625e-06, "loss": 0.4814, "step": 8765 }, { "epoch": 0.3967413441955194, "grad_norm": 0.39845981511037476, "learning_rate": 6.869317478925236e-06, "loss": 0.4755, "step": 8766 }, { "epoch": 0.3967866033039149, "grad_norm": 0.7263105208145871, "learning_rate": 6.8686376865179576e-06, "loss": 0.3497, "step": 8767 }, { "epoch": 0.3968318624123105, "grad_norm": 0.6656152094364597, "learning_rate": 6.867957853959392e-06, "loss": 0.3601, "step": 8768 }, { "epoch": 0.39687712152070603, "grad_norm": 0.6096887779698981, "learning_rate": 6.86727798126415e-06, "loss": 0.3859, "step": 8769 }, { "epoch": 0.3969223806291016, "grad_norm": 0.7120773328838061, "learning_rate": 6.866598068446839e-06, "loss": 0.3555, "step": 8770 }, { "epoch": 0.39696763973749716, "grad_norm": 0.9733028374256942, "learning_rate": 6.8659181155220674e-06, "loss": 0.3321, "step": 8771 }, { "epoch": 0.39701289884589275, "grad_norm": 0.7205067740179895, "learning_rate": 6.865238122504449e-06, "loss": 0.3416, "step": 8772 }, { "epoch": 0.3970581579542883, "grad_norm": 0.7190878853947535, "learning_rate": 6.86455808940859e-06, "loss": 0.3867, "step": 8773 }, { "epoch": 0.39710341706268387, "grad_norm": 0.5962105622474269, "learning_rate": 6.863878016249103e-06, "loss": 0.3064, "step": 8774 }, { "epoch": 0.3971486761710794, "grad_norm": 0.4949437489129078, "learning_rate": 6.8631979030406045e-06, "loss": 0.4991, "step": 8775 }, { "epoch": 0.397193935279475, "grad_norm": 0.7032902018583524, "learning_rate": 6.862517749797703e-06, "loss": 0.3329, "step": 8776 }, { "epoch": 0.3972391943878706, "grad_norm": 0.6427673222676692, "learning_rate": 6.861837556535018e-06, "loss": 0.3734, "step": 8777 }, { "epoch": 0.3972844534962661, "grad_norm": 0.6180141133170143, "learning_rate": 6.86115732326716e-06, "loss": 0.3366, "step": 8778 }, { "epoch": 0.3973297126046617, "grad_norm": 0.6308046882524578, "learning_rate": 6.860477050008749e-06, "loss": 0.3522, "step": 8779 }, { "epoch": 0.39737497171305725, "grad_norm": 0.6888985871886418, "learning_rate": 6.859796736774399e-06, "loss": 0.4036, "step": 8780 }, { "epoch": 0.39742023082145284, "grad_norm": 0.6252466017722336, "learning_rate": 6.859116383578729e-06, "loss": 0.3372, "step": 8781 }, { "epoch": 0.39746548992984837, "grad_norm": 0.6339694577973134, "learning_rate": 6.858435990436357e-06, "loss": 0.3682, "step": 8782 }, { "epoch": 0.39751074903824396, "grad_norm": 0.6378813574811948, "learning_rate": 6.857755557361904e-06, "loss": 0.3422, "step": 8783 }, { "epoch": 0.3975560081466395, "grad_norm": 0.6418165335242025, "learning_rate": 6.8570750843699906e-06, "loss": 0.3562, "step": 8784 }, { "epoch": 0.3976012672550351, "grad_norm": 0.6998441305303985, "learning_rate": 6.856394571475236e-06, "loss": 0.3942, "step": 8785 }, { "epoch": 0.3976465263634306, "grad_norm": 0.6763851297883828, "learning_rate": 6.855714018692266e-06, "loss": 0.3044, "step": 8786 }, { "epoch": 0.3976917854718262, "grad_norm": 0.3356185758187047, "learning_rate": 6.855033426035698e-06, "loss": 0.4853, "step": 8787 }, { "epoch": 0.39773704458022174, "grad_norm": 0.6597605889712057, "learning_rate": 6.854352793520161e-06, "loss": 0.3482, "step": 8788 }, { "epoch": 0.39778230368861733, "grad_norm": 0.30797573717135995, "learning_rate": 6.853672121160277e-06, "loss": 0.5004, "step": 8789 }, { "epoch": 0.3978275627970129, "grad_norm": 0.29981839225811097, "learning_rate": 6.852991408970673e-06, "loss": 0.4789, "step": 8790 }, { "epoch": 0.39787282190540846, "grad_norm": 0.7316998984685005, "learning_rate": 6.852310656965973e-06, "loss": 0.3341, "step": 8791 }, { "epoch": 0.39791808101380405, "grad_norm": 0.6594598143900187, "learning_rate": 6.8516298651608075e-06, "loss": 0.3401, "step": 8792 }, { "epoch": 0.3979633401221996, "grad_norm": 0.647731276075123, "learning_rate": 6.850949033569802e-06, "loss": 0.2948, "step": 8793 }, { "epoch": 0.3980085992305952, "grad_norm": 0.6491077344661909, "learning_rate": 6.850268162207587e-06, "loss": 0.3411, "step": 8794 }, { "epoch": 0.3980538583389907, "grad_norm": 0.6955332825784432, "learning_rate": 6.84958725108879e-06, "loss": 0.3782, "step": 8795 }, { "epoch": 0.3980991174473863, "grad_norm": 0.6033407972613656, "learning_rate": 6.848906300228047e-06, "loss": 0.3427, "step": 8796 }, { "epoch": 0.39814437655578183, "grad_norm": 0.6390814926999342, "learning_rate": 6.8482253096399835e-06, "loss": 0.3563, "step": 8797 }, { "epoch": 0.3981896356641774, "grad_norm": 0.8164864194155153, "learning_rate": 6.847544279339235e-06, "loss": 0.4011, "step": 8798 }, { "epoch": 0.39823489477257296, "grad_norm": 0.6229443416891212, "learning_rate": 6.8468632093404356e-06, "loss": 0.3459, "step": 8799 }, { "epoch": 0.39828015388096855, "grad_norm": 0.6137351192779077, "learning_rate": 6.846182099658216e-06, "loss": 0.3232, "step": 8800 }, { "epoch": 0.39832541298936414, "grad_norm": 0.6480828136217117, "learning_rate": 6.845500950307215e-06, "loss": 0.3585, "step": 8801 }, { "epoch": 0.39837067209775967, "grad_norm": 0.6361253861236457, "learning_rate": 6.8448197613020664e-06, "loss": 0.3287, "step": 8802 }, { "epoch": 0.39841593120615526, "grad_norm": 0.6415046705485342, "learning_rate": 6.844138532657405e-06, "loss": 0.3557, "step": 8803 }, { "epoch": 0.3984611903145508, "grad_norm": 0.6278134895515164, "learning_rate": 6.843457264387874e-06, "loss": 0.3274, "step": 8804 }, { "epoch": 0.3985064494229464, "grad_norm": 0.6363884007409067, "learning_rate": 6.842775956508104e-06, "loss": 0.3383, "step": 8805 }, { "epoch": 0.3985517085313419, "grad_norm": 0.837352345152312, "learning_rate": 6.8420946090327416e-06, "loss": 0.335, "step": 8806 }, { "epoch": 0.3985969676397375, "grad_norm": 0.5971161813211178, "learning_rate": 6.841413221976422e-06, "loss": 0.3698, "step": 8807 }, { "epoch": 0.39864222674813304, "grad_norm": 0.7140643777217771, "learning_rate": 6.840731795353788e-06, "loss": 0.3433, "step": 8808 }, { "epoch": 0.39868748585652863, "grad_norm": 0.523852046512976, "learning_rate": 6.840050329179481e-06, "loss": 0.4989, "step": 8809 }, { "epoch": 0.39873274496492417, "grad_norm": 0.4224525675611614, "learning_rate": 6.839368823468144e-06, "loss": 0.4669, "step": 8810 }, { "epoch": 0.39877800407331976, "grad_norm": 0.6361397108087635, "learning_rate": 6.838687278234419e-06, "loss": 0.3543, "step": 8811 }, { "epoch": 0.3988232631817153, "grad_norm": 0.3237349798154476, "learning_rate": 6.838005693492953e-06, "loss": 0.4774, "step": 8812 }, { "epoch": 0.3988685222901109, "grad_norm": 0.6200239783378564, "learning_rate": 6.837324069258389e-06, "loss": 0.3643, "step": 8813 }, { "epoch": 0.3989137813985065, "grad_norm": 0.742821653188746, "learning_rate": 6.836642405545374e-06, "loss": 0.3307, "step": 8814 }, { "epoch": 0.398959040506902, "grad_norm": 0.5872253694957739, "learning_rate": 6.8359607023685544e-06, "loss": 0.3176, "step": 8815 }, { "epoch": 0.3990042996152976, "grad_norm": 0.6693686789127838, "learning_rate": 6.835278959742577e-06, "loss": 0.3363, "step": 8816 }, { "epoch": 0.39904955872369313, "grad_norm": 0.5871827507512425, "learning_rate": 6.8345971776820944e-06, "loss": 0.3102, "step": 8817 }, { "epoch": 0.3990948178320887, "grad_norm": 0.6081941918857661, "learning_rate": 6.833915356201749e-06, "loss": 0.4883, "step": 8818 }, { "epoch": 0.39914007694048426, "grad_norm": 0.542069814873775, "learning_rate": 6.833233495316198e-06, "loss": 0.5231, "step": 8819 }, { "epoch": 0.39918533604887985, "grad_norm": 0.6396501003092583, "learning_rate": 6.832551595040089e-06, "loss": 0.3471, "step": 8820 }, { "epoch": 0.3992305951572754, "grad_norm": 0.6680543486040922, "learning_rate": 6.8318696553880736e-06, "loss": 0.3832, "step": 8821 }, { "epoch": 0.39927585426567097, "grad_norm": 0.651301525618315, "learning_rate": 6.831187676374807e-06, "loss": 0.3086, "step": 8822 }, { "epoch": 0.3993211133740665, "grad_norm": 0.6727077273084098, "learning_rate": 6.83050565801494e-06, "loss": 0.3631, "step": 8823 }, { "epoch": 0.3993663724824621, "grad_norm": 0.6163866932954916, "learning_rate": 6.8298236003231264e-06, "loss": 0.3353, "step": 8824 }, { "epoch": 0.3994116315908577, "grad_norm": 0.6197188225438527, "learning_rate": 6.829141503314027e-06, "loss": 0.3233, "step": 8825 }, { "epoch": 0.3994568906992532, "grad_norm": 0.6424342060833129, "learning_rate": 6.8284593670022925e-06, "loss": 0.3059, "step": 8826 }, { "epoch": 0.3995021498076488, "grad_norm": 0.6480051962242951, "learning_rate": 6.827777191402584e-06, "loss": 0.3934, "step": 8827 }, { "epoch": 0.39954740891604434, "grad_norm": 0.9945980509456495, "learning_rate": 6.827094976529555e-06, "loss": 0.5059, "step": 8828 }, { "epoch": 0.39959266802443993, "grad_norm": 0.6656078511701508, "learning_rate": 6.826412722397867e-06, "loss": 0.3237, "step": 8829 }, { "epoch": 0.39963792713283547, "grad_norm": 0.6621313999952261, "learning_rate": 6.8257304290221794e-06, "loss": 0.3523, "step": 8830 }, { "epoch": 0.39968318624123106, "grad_norm": 0.6759962831435172, "learning_rate": 6.8250480964171526e-06, "loss": 0.3349, "step": 8831 }, { "epoch": 0.3997284453496266, "grad_norm": 0.6632127212233728, "learning_rate": 6.824365724597446e-06, "loss": 0.3805, "step": 8832 }, { "epoch": 0.3997737044580222, "grad_norm": 0.33370362668382353, "learning_rate": 6.823683313577725e-06, "loss": 0.4721, "step": 8833 }, { "epoch": 0.3998189635664177, "grad_norm": 0.5915639289253449, "learning_rate": 6.823000863372649e-06, "loss": 0.3339, "step": 8834 }, { "epoch": 0.3998642226748133, "grad_norm": 0.6474460312332149, "learning_rate": 6.822318373996884e-06, "loss": 0.3628, "step": 8835 }, { "epoch": 0.3999094817832089, "grad_norm": 0.3924738120952629, "learning_rate": 6.8216358454650935e-06, "loss": 0.4712, "step": 8836 }, { "epoch": 0.39995474089160443, "grad_norm": 0.6035930899859268, "learning_rate": 6.820953277791944e-06, "loss": 0.313, "step": 8837 }, { "epoch": 0.4, "grad_norm": 0.6635310443361517, "learning_rate": 6.8202706709921e-06, "loss": 0.3697, "step": 8838 }, { "epoch": 0.40004525910839556, "grad_norm": 0.6681270122548083, "learning_rate": 6.81958802508023e-06, "loss": 0.3649, "step": 8839 }, { "epoch": 0.40009051821679115, "grad_norm": 0.5962241863603737, "learning_rate": 6.818905340071004e-06, "loss": 0.3629, "step": 8840 }, { "epoch": 0.4001357773251867, "grad_norm": 0.6215104277591916, "learning_rate": 6.818222615979087e-06, "loss": 0.328, "step": 8841 }, { "epoch": 0.40018103643358227, "grad_norm": 0.6175036142464018, "learning_rate": 6.817539852819149e-06, "loss": 0.3285, "step": 8842 }, { "epoch": 0.4002262955419778, "grad_norm": 0.705849429599764, "learning_rate": 6.816857050605864e-06, "loss": 0.3486, "step": 8843 }, { "epoch": 0.4002715546503734, "grad_norm": 0.6307225714621325, "learning_rate": 6.8161742093539005e-06, "loss": 0.3474, "step": 8844 }, { "epoch": 0.40031681375876893, "grad_norm": 0.6726724488003374, "learning_rate": 6.81549132907793e-06, "loss": 0.354, "step": 8845 }, { "epoch": 0.4003620728671645, "grad_norm": 0.48933163712064615, "learning_rate": 6.814808409792628e-06, "loss": 0.5051, "step": 8846 }, { "epoch": 0.40040733197556005, "grad_norm": 0.6639468435423697, "learning_rate": 6.814125451512666e-06, "loss": 0.3468, "step": 8847 }, { "epoch": 0.40045259108395564, "grad_norm": 0.6380662284908794, "learning_rate": 6.8134424542527215e-06, "loss": 0.3581, "step": 8848 }, { "epoch": 0.40049785019235123, "grad_norm": 0.6132429048830895, "learning_rate": 6.812759418027466e-06, "loss": 0.3805, "step": 8849 }, { "epoch": 0.40054310930074677, "grad_norm": 0.6307231742366833, "learning_rate": 6.812076342851579e-06, "loss": 0.3224, "step": 8850 }, { "epoch": 0.40058836840914236, "grad_norm": 0.6191078091758072, "learning_rate": 6.811393228739737e-06, "loss": 0.3307, "step": 8851 }, { "epoch": 0.4006336275175379, "grad_norm": 0.32682970215003815, "learning_rate": 6.810710075706618e-06, "loss": 0.4633, "step": 8852 }, { "epoch": 0.4006788866259335, "grad_norm": 0.6713896907402072, "learning_rate": 6.8100268837669e-06, "loss": 0.3689, "step": 8853 }, { "epoch": 0.400724145734329, "grad_norm": 0.66965729091601, "learning_rate": 6.809343652935263e-06, "loss": 0.3655, "step": 8854 }, { "epoch": 0.4007694048427246, "grad_norm": 0.3073650613234617, "learning_rate": 6.808660383226388e-06, "loss": 0.4726, "step": 8855 }, { "epoch": 0.40081466395112014, "grad_norm": 0.3061154699957953, "learning_rate": 6.807977074654957e-06, "loss": 0.4807, "step": 8856 }, { "epoch": 0.40085992305951573, "grad_norm": 0.6343734983995349, "learning_rate": 6.807293727235651e-06, "loss": 0.3332, "step": 8857 }, { "epoch": 0.40090518216791127, "grad_norm": 0.6222786895743901, "learning_rate": 6.806610340983154e-06, "loss": 0.3331, "step": 8858 }, { "epoch": 0.40095044127630686, "grad_norm": 0.5848984802642063, "learning_rate": 6.8059269159121484e-06, "loss": 0.3793, "step": 8859 }, { "epoch": 0.40099570038470245, "grad_norm": 0.6518113755263946, "learning_rate": 6.8052434520373204e-06, "loss": 0.3325, "step": 8860 }, { "epoch": 0.401040959493098, "grad_norm": 0.6355972797900781, "learning_rate": 6.804559949373355e-06, "loss": 0.348, "step": 8861 }, { "epoch": 0.40108621860149357, "grad_norm": 0.6869756142935315, "learning_rate": 6.803876407934939e-06, "loss": 0.3771, "step": 8862 }, { "epoch": 0.4011314777098891, "grad_norm": 0.3713660385866207, "learning_rate": 6.803192827736758e-06, "loss": 0.4636, "step": 8863 }, { "epoch": 0.4011767368182847, "grad_norm": 0.649725866030626, "learning_rate": 6.802509208793502e-06, "loss": 0.314, "step": 8864 }, { "epoch": 0.40122199592668023, "grad_norm": 0.6290007862830288, "learning_rate": 6.80182555111986e-06, "loss": 0.3146, "step": 8865 }, { "epoch": 0.4012672550350758, "grad_norm": 0.6416736202002891, "learning_rate": 6.80114185473052e-06, "loss": 0.3255, "step": 8866 }, { "epoch": 0.40131251414347135, "grad_norm": 0.646691722276954, "learning_rate": 6.800458119640172e-06, "loss": 0.3269, "step": 8867 }, { "epoch": 0.40135777325186694, "grad_norm": 0.601916942813255, "learning_rate": 6.79977434586351e-06, "loss": 0.3505, "step": 8868 }, { "epoch": 0.4014030323602625, "grad_norm": 0.6052031851252415, "learning_rate": 6.799090533415225e-06, "loss": 0.3107, "step": 8869 }, { "epoch": 0.40144829146865807, "grad_norm": 0.6420105058443195, "learning_rate": 6.798406682310009e-06, "loss": 0.3625, "step": 8870 }, { "epoch": 0.40149355057705366, "grad_norm": 0.6665437907809222, "learning_rate": 6.797722792562558e-06, "loss": 0.3505, "step": 8871 }, { "epoch": 0.4015388096854492, "grad_norm": 0.6927302844427357, "learning_rate": 6.797038864187564e-06, "loss": 0.3484, "step": 8872 }, { "epoch": 0.4015840687938448, "grad_norm": 0.30249294964986345, "learning_rate": 6.796354897199726e-06, "loss": 0.4747, "step": 8873 }, { "epoch": 0.4016293279022403, "grad_norm": 0.6182420381926265, "learning_rate": 6.795670891613737e-06, "loss": 0.3636, "step": 8874 }, { "epoch": 0.4016745870106359, "grad_norm": 0.6310058170885068, "learning_rate": 6.794986847444296e-06, "loss": 0.3258, "step": 8875 }, { "epoch": 0.40171984611903144, "grad_norm": 0.6888547574317977, "learning_rate": 6.7943027647061e-06, "loss": 0.3617, "step": 8876 }, { "epoch": 0.40176510522742703, "grad_norm": 0.5791059330086225, "learning_rate": 6.793618643413848e-06, "loss": 0.3105, "step": 8877 }, { "epoch": 0.40181036433582257, "grad_norm": 0.6450799542232147, "learning_rate": 6.792934483582242e-06, "loss": 0.4149, "step": 8878 }, { "epoch": 0.40185562344421816, "grad_norm": 0.3069299614041707, "learning_rate": 6.792250285225978e-06, "loss": 0.4816, "step": 8879 }, { "epoch": 0.4019008825526137, "grad_norm": 0.6801140072717393, "learning_rate": 6.791566048359761e-06, "loss": 0.3549, "step": 8880 }, { "epoch": 0.4019461416610093, "grad_norm": 0.6316838717075581, "learning_rate": 6.7908817729982936e-06, "loss": 0.3048, "step": 8881 }, { "epoch": 0.4019914007694048, "grad_norm": 0.6214297573389774, "learning_rate": 6.790197459156275e-06, "loss": 0.3788, "step": 8882 }, { "epoch": 0.4020366598778004, "grad_norm": 0.6234239462090553, "learning_rate": 6.789513106848412e-06, "loss": 0.3267, "step": 8883 }, { "epoch": 0.402081918986196, "grad_norm": 0.29946524483125314, "learning_rate": 6.788828716089407e-06, "loss": 0.4826, "step": 8884 }, { "epoch": 0.40212717809459153, "grad_norm": 0.612541727714678, "learning_rate": 6.78814428689397e-06, "loss": 0.3385, "step": 8885 }, { "epoch": 0.4021724372029871, "grad_norm": 0.5987562227498454, "learning_rate": 6.787459819276802e-06, "loss": 0.3526, "step": 8886 }, { "epoch": 0.40221769631138266, "grad_norm": 0.6365973311112314, "learning_rate": 6.786775313252611e-06, "loss": 0.3332, "step": 8887 }, { "epoch": 0.40226295541977825, "grad_norm": 0.5884045771732902, "learning_rate": 6.7860907688361084e-06, "loss": 0.3377, "step": 8888 }, { "epoch": 0.4023082145281738, "grad_norm": 0.6385536028054486, "learning_rate": 6.785406186042e-06, "loss": 0.3605, "step": 8889 }, { "epoch": 0.40235347363656937, "grad_norm": 0.5846199958871605, "learning_rate": 6.7847215648849964e-06, "loss": 0.3168, "step": 8890 }, { "epoch": 0.4023987327449649, "grad_norm": 0.6891479504414567, "learning_rate": 6.784036905379807e-06, "loss": 0.383, "step": 8891 }, { "epoch": 0.4024439918533605, "grad_norm": 0.6574568805801104, "learning_rate": 6.783352207541144e-06, "loss": 0.3155, "step": 8892 }, { "epoch": 0.40248925096175603, "grad_norm": 0.34285635218528, "learning_rate": 6.782667471383719e-06, "loss": 0.4899, "step": 8893 }, { "epoch": 0.4025345100701516, "grad_norm": 0.5997713023380722, "learning_rate": 6.7819826969222465e-06, "loss": 0.3361, "step": 8894 }, { "epoch": 0.4025797691785472, "grad_norm": 0.6014580200566753, "learning_rate": 6.781297884171436e-06, "loss": 0.3324, "step": 8895 }, { "epoch": 0.40262502828694274, "grad_norm": 0.7053990750107237, "learning_rate": 6.780613033146008e-06, "loss": 0.3372, "step": 8896 }, { "epoch": 0.40267028739533833, "grad_norm": 0.6005162280243345, "learning_rate": 6.779928143860672e-06, "loss": 0.3809, "step": 8897 }, { "epoch": 0.40271554650373387, "grad_norm": 0.6332156859771381, "learning_rate": 6.779243216330149e-06, "loss": 0.3489, "step": 8898 }, { "epoch": 0.40276080561212946, "grad_norm": 0.5848052680601269, "learning_rate": 6.7785582505691525e-06, "loss": 0.3228, "step": 8899 }, { "epoch": 0.402806064720525, "grad_norm": 0.3297464573181845, "learning_rate": 6.777873246592403e-06, "loss": 0.4915, "step": 8900 }, { "epoch": 0.4028513238289206, "grad_norm": 0.6603865664890434, "learning_rate": 6.777188204414615e-06, "loss": 0.3845, "step": 8901 }, { "epoch": 0.4028965829373161, "grad_norm": 0.6331108359086557, "learning_rate": 6.776503124050514e-06, "loss": 0.3845, "step": 8902 }, { "epoch": 0.4029418420457117, "grad_norm": 0.6393073819687393, "learning_rate": 6.775818005514815e-06, "loss": 0.3873, "step": 8903 }, { "epoch": 0.40298710115410724, "grad_norm": 0.2893071946967361, "learning_rate": 6.7751328488222414e-06, "loss": 0.4827, "step": 8904 }, { "epoch": 0.40303236026250283, "grad_norm": 0.6510819344466393, "learning_rate": 6.774447653987515e-06, "loss": 0.3215, "step": 8905 }, { "epoch": 0.40307761937089837, "grad_norm": 0.7675879619243432, "learning_rate": 6.773762421025359e-06, "loss": 0.3793, "step": 8906 }, { "epoch": 0.40312287847929396, "grad_norm": 0.29502548677594626, "learning_rate": 6.773077149950494e-06, "loss": 0.4829, "step": 8907 }, { "epoch": 0.40316813758768955, "grad_norm": 0.6446546984632263, "learning_rate": 6.772391840777648e-06, "loss": 0.3047, "step": 8908 }, { "epoch": 0.4032133966960851, "grad_norm": 0.7347921913071014, "learning_rate": 6.771706493521546e-06, "loss": 0.3229, "step": 8909 }, { "epoch": 0.40325865580448067, "grad_norm": 0.6707052281228602, "learning_rate": 6.771021108196912e-06, "loss": 0.3785, "step": 8910 }, { "epoch": 0.4033039149128762, "grad_norm": 0.6264078396574432, "learning_rate": 6.770335684818472e-06, "loss": 0.3774, "step": 8911 }, { "epoch": 0.4033491740212718, "grad_norm": 0.2943032704082629, "learning_rate": 6.7696502234009576e-06, "loss": 0.4636, "step": 8912 }, { "epoch": 0.40339443312966733, "grad_norm": 0.6016668486483328, "learning_rate": 6.768964723959093e-06, "loss": 0.3458, "step": 8913 }, { "epoch": 0.4034396922380629, "grad_norm": 0.8107831458714004, "learning_rate": 6.768279186507611e-06, "loss": 0.3216, "step": 8914 }, { "epoch": 0.40348495134645845, "grad_norm": 0.640569774825471, "learning_rate": 6.7675936110612405e-06, "loss": 0.3378, "step": 8915 }, { "epoch": 0.40353021045485404, "grad_norm": 0.5799696440458336, "learning_rate": 6.766907997634711e-06, "loss": 0.3228, "step": 8916 }, { "epoch": 0.4035754695632496, "grad_norm": 0.6273281359680126, "learning_rate": 6.766222346242755e-06, "loss": 0.3377, "step": 8917 }, { "epoch": 0.40362072867164517, "grad_norm": 0.8178978298391293, "learning_rate": 6.765536656900105e-06, "loss": 0.3666, "step": 8918 }, { "epoch": 0.40366598778004076, "grad_norm": 0.318327709742364, "learning_rate": 6.764850929621496e-06, "loss": 0.4742, "step": 8919 }, { "epoch": 0.4037112468884363, "grad_norm": 0.5859553890996915, "learning_rate": 6.764165164421661e-06, "loss": 0.3087, "step": 8920 }, { "epoch": 0.4037565059968319, "grad_norm": 0.6387102179408355, "learning_rate": 6.763479361315334e-06, "loss": 0.3398, "step": 8921 }, { "epoch": 0.4038017651052274, "grad_norm": 0.5772994972957782, "learning_rate": 6.762793520317251e-06, "loss": 0.331, "step": 8922 }, { "epoch": 0.403847024213623, "grad_norm": 0.5899030881067611, "learning_rate": 6.7621076414421505e-06, "loss": 0.3443, "step": 8923 }, { "epoch": 0.40389228332201854, "grad_norm": 0.669877351584218, "learning_rate": 6.761421724704768e-06, "loss": 0.3731, "step": 8924 }, { "epoch": 0.40393754243041413, "grad_norm": 0.6487265557529385, "learning_rate": 6.760735770119843e-06, "loss": 0.3242, "step": 8925 }, { "epoch": 0.40398280153880967, "grad_norm": 0.6209192812599255, "learning_rate": 6.7600497777021125e-06, "loss": 0.311, "step": 8926 }, { "epoch": 0.40402806064720526, "grad_norm": 0.6228468323866984, "learning_rate": 6.7593637474663195e-06, "loss": 0.2858, "step": 8927 }, { "epoch": 0.4040733197556008, "grad_norm": 0.6184542504966413, "learning_rate": 6.758677679427204e-06, "loss": 0.369, "step": 8928 }, { "epoch": 0.4041185788639964, "grad_norm": 0.3459435067429982, "learning_rate": 6.757991573599504e-06, "loss": 0.4819, "step": 8929 }, { "epoch": 0.40416383797239197, "grad_norm": 0.6301670896170039, "learning_rate": 6.7573054299979655e-06, "loss": 0.3685, "step": 8930 }, { "epoch": 0.4042090970807875, "grad_norm": 0.6889789268158945, "learning_rate": 6.756619248637331e-06, "loss": 0.3526, "step": 8931 }, { "epoch": 0.4042543561891831, "grad_norm": 0.2939816782313439, "learning_rate": 6.755933029532342e-06, "loss": 0.4792, "step": 8932 }, { "epoch": 0.40429961529757863, "grad_norm": 0.8951588454851417, "learning_rate": 6.755246772697748e-06, "loss": 0.3159, "step": 8933 }, { "epoch": 0.4043448744059742, "grad_norm": 0.7378167619676134, "learning_rate": 6.754560478148289e-06, "loss": 0.3452, "step": 8934 }, { "epoch": 0.40439013351436975, "grad_norm": 0.6214978287086871, "learning_rate": 6.753874145898716e-06, "loss": 0.3129, "step": 8935 }, { "epoch": 0.40443539262276534, "grad_norm": 0.6569656789319982, "learning_rate": 6.753187775963773e-06, "loss": 0.3781, "step": 8936 }, { "epoch": 0.4044806517311609, "grad_norm": 0.32579131965062763, "learning_rate": 6.752501368358209e-06, "loss": 0.4666, "step": 8937 }, { "epoch": 0.40452591083955647, "grad_norm": 0.6825464587480135, "learning_rate": 6.751814923096773e-06, "loss": 0.368, "step": 8938 }, { "epoch": 0.404571169947952, "grad_norm": 0.6420022976480055, "learning_rate": 6.751128440194216e-06, "loss": 0.3341, "step": 8939 }, { "epoch": 0.4046164290563476, "grad_norm": 0.6656853771714591, "learning_rate": 6.750441919665286e-06, "loss": 0.3565, "step": 8940 }, { "epoch": 0.4046616881647431, "grad_norm": 0.6230241276435788, "learning_rate": 6.7497553615247355e-06, "loss": 0.3591, "step": 8941 }, { "epoch": 0.4047069472731387, "grad_norm": 0.29656768184411453, "learning_rate": 6.749068765787316e-06, "loss": 0.4804, "step": 8942 }, { "epoch": 0.4047522063815343, "grad_norm": 0.6631787295713072, "learning_rate": 6.748382132467781e-06, "loss": 0.3531, "step": 8943 }, { "epoch": 0.40479746548992984, "grad_norm": 0.616176087322102, "learning_rate": 6.7476954615808835e-06, "loss": 0.3612, "step": 8944 }, { "epoch": 0.40484272459832543, "grad_norm": 0.6365449640829076, "learning_rate": 6.747008753141377e-06, "loss": 0.3362, "step": 8945 }, { "epoch": 0.40488798370672097, "grad_norm": 0.646535504292674, "learning_rate": 6.74632200716402e-06, "loss": 0.3492, "step": 8946 }, { "epoch": 0.40493324281511656, "grad_norm": 0.6643896596439289, "learning_rate": 6.745635223663565e-06, "loss": 0.3331, "step": 8947 }, { "epoch": 0.4049785019235121, "grad_norm": 0.6066751912169945, "learning_rate": 6.7449484026547705e-06, "loss": 0.3664, "step": 8948 }, { "epoch": 0.4050237610319077, "grad_norm": 0.6417521544879048, "learning_rate": 6.744261544152395e-06, "loss": 0.3496, "step": 8949 }, { "epoch": 0.4050690201403032, "grad_norm": 0.6291110101036786, "learning_rate": 6.743574648171195e-06, "loss": 0.2984, "step": 8950 }, { "epoch": 0.4051142792486988, "grad_norm": 0.3148204187264297, "learning_rate": 6.7428877147259305e-06, "loss": 0.4766, "step": 8951 }, { "epoch": 0.40515953835709434, "grad_norm": 0.30458788376917484, "learning_rate": 6.742200743831364e-06, "loss": 0.4854, "step": 8952 }, { "epoch": 0.40520479746548993, "grad_norm": 0.6258531165395406, "learning_rate": 6.741513735502252e-06, "loss": 0.3461, "step": 8953 }, { "epoch": 0.4052500565738855, "grad_norm": 0.6165429605530012, "learning_rate": 6.740826689753359e-06, "loss": 0.3262, "step": 8954 }, { "epoch": 0.40529531568228105, "grad_norm": 0.6414593000502342, "learning_rate": 6.740139606599448e-06, "loss": 0.2975, "step": 8955 }, { "epoch": 0.40534057479067664, "grad_norm": 0.27098158133221006, "learning_rate": 6.73945248605528e-06, "loss": 0.493, "step": 8956 }, { "epoch": 0.4053858338990722, "grad_norm": 0.6298077983608283, "learning_rate": 6.738765328135621e-06, "loss": 0.3464, "step": 8957 }, { "epoch": 0.40543109300746777, "grad_norm": 0.6444828431474185, "learning_rate": 6.7380781328552346e-06, "loss": 0.3491, "step": 8958 }, { "epoch": 0.4054763521158633, "grad_norm": 0.2905457425379055, "learning_rate": 6.737390900228888e-06, "loss": 0.4622, "step": 8959 }, { "epoch": 0.4055216112242589, "grad_norm": 0.694225612326219, "learning_rate": 6.736703630271347e-06, "loss": 0.3944, "step": 8960 }, { "epoch": 0.40556687033265443, "grad_norm": 0.6095087960379569, "learning_rate": 6.736016322997379e-06, "loss": 0.3612, "step": 8961 }, { "epoch": 0.40561212944105, "grad_norm": 0.7793100472578198, "learning_rate": 6.7353289784217525e-06, "loss": 0.328, "step": 8962 }, { "epoch": 0.40565738854944555, "grad_norm": 0.5994659358964869, "learning_rate": 6.734641596559234e-06, "loss": 0.3221, "step": 8963 }, { "epoch": 0.40570264765784114, "grad_norm": 0.7179033980482736, "learning_rate": 6.733954177424598e-06, "loss": 0.3511, "step": 8964 }, { "epoch": 0.40574790676623673, "grad_norm": 0.6262554659821052, "learning_rate": 6.733266721032609e-06, "loss": 0.3766, "step": 8965 }, { "epoch": 0.40579316587463227, "grad_norm": 0.6368406550673319, "learning_rate": 6.732579227398043e-06, "loss": 0.3574, "step": 8966 }, { "epoch": 0.40583842498302786, "grad_norm": 0.6136063256982253, "learning_rate": 6.731891696535671e-06, "loss": 0.3499, "step": 8967 }, { "epoch": 0.4058836840914234, "grad_norm": 0.33916521123273174, "learning_rate": 6.731204128460265e-06, "loss": 0.4881, "step": 8968 }, { "epoch": 0.405928943199819, "grad_norm": 0.6510734811700759, "learning_rate": 6.730516523186599e-06, "loss": 0.3713, "step": 8969 }, { "epoch": 0.4059742023082145, "grad_norm": 0.6554821074225733, "learning_rate": 6.729828880729448e-06, "loss": 0.3469, "step": 8970 }, { "epoch": 0.4060194614166101, "grad_norm": 0.29512133094170623, "learning_rate": 6.7291412011035866e-06, "loss": 0.4643, "step": 8971 }, { "epoch": 0.40606472052500564, "grad_norm": 0.2946429814813781, "learning_rate": 6.728453484323791e-06, "loss": 0.452, "step": 8972 }, { "epoch": 0.40610997963340123, "grad_norm": 0.7983353679920422, "learning_rate": 6.727765730404841e-06, "loss": 0.3698, "step": 8973 }, { "epoch": 0.40615523874179676, "grad_norm": 0.6758477176684954, "learning_rate": 6.7270779393615095e-06, "loss": 0.3641, "step": 8974 }, { "epoch": 0.40620049785019235, "grad_norm": 0.589628555008071, "learning_rate": 6.726390111208579e-06, "loss": 0.2701, "step": 8975 }, { "epoch": 0.4062457569585879, "grad_norm": 0.6707201662154986, "learning_rate": 6.725702245960827e-06, "loss": 0.2902, "step": 8976 }, { "epoch": 0.4062910160669835, "grad_norm": 0.6308128620949878, "learning_rate": 6.725014343633033e-06, "loss": 0.3468, "step": 8977 }, { "epoch": 0.40633627517537907, "grad_norm": 0.6225704778634961, "learning_rate": 6.7243264042399795e-06, "loss": 0.2778, "step": 8978 }, { "epoch": 0.4063815342837746, "grad_norm": 0.6543361066301392, "learning_rate": 6.7236384277964465e-06, "loss": 0.349, "step": 8979 }, { "epoch": 0.4064267933921702, "grad_norm": 0.6259696940928321, "learning_rate": 6.722950414317218e-06, "loss": 0.3477, "step": 8980 }, { "epoch": 0.40647205250056573, "grad_norm": 0.66971951479067, "learning_rate": 6.722262363817077e-06, "loss": 0.3543, "step": 8981 }, { "epoch": 0.4065173116089613, "grad_norm": 0.3774940043813065, "learning_rate": 6.721574276310807e-06, "loss": 0.4749, "step": 8982 }, { "epoch": 0.40656257071735685, "grad_norm": 0.706138680641507, "learning_rate": 6.720886151813194e-06, "loss": 0.3383, "step": 8983 }, { "epoch": 0.40660782982575244, "grad_norm": 0.6063299043643507, "learning_rate": 6.720197990339022e-06, "loss": 0.41, "step": 8984 }, { "epoch": 0.406653088934148, "grad_norm": 0.6917969396568446, "learning_rate": 6.719509791903078e-06, "loss": 0.3464, "step": 8985 }, { "epoch": 0.40669834804254357, "grad_norm": 0.5925119104848456, "learning_rate": 6.718821556520151e-06, "loss": 0.3329, "step": 8986 }, { "epoch": 0.4067436071509391, "grad_norm": 0.7172368950413045, "learning_rate": 6.718133284205026e-06, "loss": 0.3235, "step": 8987 }, { "epoch": 0.4067888662593347, "grad_norm": 0.35530670970656764, "learning_rate": 6.717444974972495e-06, "loss": 0.482, "step": 8988 }, { "epoch": 0.4068341253677303, "grad_norm": 0.6179833011689086, "learning_rate": 6.716756628837345e-06, "loss": 0.3467, "step": 8989 }, { "epoch": 0.4068793844761258, "grad_norm": 0.6400629806758316, "learning_rate": 6.716068245814369e-06, "loss": 0.3698, "step": 8990 }, { "epoch": 0.4069246435845214, "grad_norm": 0.5903144512944397, "learning_rate": 6.715379825918357e-06, "loss": 0.3034, "step": 8991 }, { "epoch": 0.40696990269291694, "grad_norm": 0.5593568955536351, "learning_rate": 6.714691369164099e-06, "loss": 0.3332, "step": 8992 }, { "epoch": 0.40701516180131253, "grad_norm": 0.6698171715141726, "learning_rate": 6.714002875566392e-06, "loss": 0.3597, "step": 8993 }, { "epoch": 0.40706042090970807, "grad_norm": 0.4575285327706009, "learning_rate": 6.713314345140025e-06, "loss": 0.4941, "step": 8994 }, { "epoch": 0.40710568001810366, "grad_norm": 0.29855367789502585, "learning_rate": 6.712625777899797e-06, "loss": 0.4714, "step": 8995 }, { "epoch": 0.4071509391264992, "grad_norm": 0.6371976573051277, "learning_rate": 6.7119371738605e-06, "loss": 0.3387, "step": 8996 }, { "epoch": 0.4071961982348948, "grad_norm": 0.5841837035168561, "learning_rate": 6.711248533036931e-06, "loss": 0.3558, "step": 8997 }, { "epoch": 0.4072414573432903, "grad_norm": 0.5992793271836137, "learning_rate": 6.710559855443885e-06, "loss": 0.3371, "step": 8998 }, { "epoch": 0.4072867164516859, "grad_norm": 0.6635349205549409, "learning_rate": 6.709871141096164e-06, "loss": 0.3865, "step": 8999 }, { "epoch": 0.4073319755600815, "grad_norm": 0.5703140692706825, "learning_rate": 6.709182390008563e-06, "loss": 0.3479, "step": 9000 }, { "epoch": 0.40737723466847703, "grad_norm": 0.3243766357473611, "learning_rate": 6.70849360219588e-06, "loss": 0.494, "step": 9001 }, { "epoch": 0.4074224937768726, "grad_norm": 0.6563326984838999, "learning_rate": 6.70780477767292e-06, "loss": 0.3719, "step": 9002 }, { "epoch": 0.40746775288526815, "grad_norm": 0.6227235866029053, "learning_rate": 6.7071159164544775e-06, "loss": 0.3754, "step": 9003 }, { "epoch": 0.40751301199366374, "grad_norm": 0.6609459209436451, "learning_rate": 6.706427018555359e-06, "loss": 0.3696, "step": 9004 }, { "epoch": 0.4075582711020593, "grad_norm": 0.5852792648844204, "learning_rate": 6.705738083990363e-06, "loss": 0.2979, "step": 9005 }, { "epoch": 0.40760353021045487, "grad_norm": 0.30829767078260123, "learning_rate": 6.705049112774295e-06, "loss": 0.5131, "step": 9006 }, { "epoch": 0.4076487893188504, "grad_norm": 0.6404114897481298, "learning_rate": 6.704360104921959e-06, "loss": 0.3456, "step": 9007 }, { "epoch": 0.407694048427246, "grad_norm": 0.6660237164001803, "learning_rate": 6.703671060448158e-06, "loss": 0.3363, "step": 9008 }, { "epoch": 0.4077393075356415, "grad_norm": 0.2834674880371996, "learning_rate": 6.702981979367699e-06, "loss": 0.4687, "step": 9009 }, { "epoch": 0.4077845666440371, "grad_norm": 0.5858283436495668, "learning_rate": 6.7022928616953865e-06, "loss": 0.3304, "step": 9010 }, { "epoch": 0.40782982575243265, "grad_norm": 0.5975401010177234, "learning_rate": 6.701603707446029e-06, "loss": 0.3362, "step": 9011 }, { "epoch": 0.40787508486082824, "grad_norm": 0.6278500455622565, "learning_rate": 6.7009145166344355e-06, "loss": 0.3549, "step": 9012 }, { "epoch": 0.40792034396922383, "grad_norm": 0.5997105715074218, "learning_rate": 6.700225289275411e-06, "loss": 0.3401, "step": 9013 }, { "epoch": 0.40796560307761937, "grad_norm": 0.6259148881571266, "learning_rate": 6.699536025383768e-06, "loss": 0.3463, "step": 9014 }, { "epoch": 0.40801086218601496, "grad_norm": 0.6228649006571026, "learning_rate": 6.698846724974315e-06, "loss": 0.335, "step": 9015 }, { "epoch": 0.4080561212944105, "grad_norm": 0.30266594936736707, "learning_rate": 6.6981573880618636e-06, "loss": 0.4701, "step": 9016 }, { "epoch": 0.4081013804028061, "grad_norm": 0.2953912308079833, "learning_rate": 6.697468014661226e-06, "loss": 0.465, "step": 9017 }, { "epoch": 0.4081466395112016, "grad_norm": 0.6024204480029517, "learning_rate": 6.696778604787213e-06, "loss": 0.3318, "step": 9018 }, { "epoch": 0.4081918986195972, "grad_norm": 0.6727613447472707, "learning_rate": 6.69608915845464e-06, "loss": 0.3546, "step": 9019 }, { "epoch": 0.40823715772799274, "grad_norm": 0.6725825317612492, "learning_rate": 6.69539967567832e-06, "loss": 0.3267, "step": 9020 }, { "epoch": 0.40828241683638833, "grad_norm": 0.6251203031313752, "learning_rate": 6.694710156473067e-06, "loss": 0.3287, "step": 9021 }, { "epoch": 0.40832767594478386, "grad_norm": 0.5948194157228681, "learning_rate": 6.694020600853699e-06, "loss": 0.3493, "step": 9022 }, { "epoch": 0.40837293505317945, "grad_norm": 0.6262507575282605, "learning_rate": 6.69333100883503e-06, "loss": 0.3588, "step": 9023 }, { "epoch": 0.40841819416157504, "grad_norm": 0.35450308304949074, "learning_rate": 6.692641380431879e-06, "loss": 0.4705, "step": 9024 }, { "epoch": 0.4084634532699706, "grad_norm": 0.607371939659254, "learning_rate": 6.691951715659063e-06, "loss": 0.3682, "step": 9025 }, { "epoch": 0.40850871237836617, "grad_norm": 0.3412133730192881, "learning_rate": 6.691262014531401e-06, "loss": 0.4836, "step": 9026 }, { "epoch": 0.4085539714867617, "grad_norm": 0.6366134224531387, "learning_rate": 6.690572277063711e-06, "loss": 0.3402, "step": 9027 }, { "epoch": 0.4085992305951573, "grad_norm": 0.2912418177907017, "learning_rate": 6.689882503270818e-06, "loss": 0.5028, "step": 9028 }, { "epoch": 0.4086444897035528, "grad_norm": 0.3163477138902858, "learning_rate": 6.689192693167539e-06, "loss": 0.4761, "step": 9029 }, { "epoch": 0.4086897488119484, "grad_norm": 0.28719296847973913, "learning_rate": 6.688502846768697e-06, "loss": 0.473, "step": 9030 }, { "epoch": 0.40873500792034395, "grad_norm": 0.6244954493296053, "learning_rate": 6.6878129640891135e-06, "loss": 0.3251, "step": 9031 }, { "epoch": 0.40878026702873954, "grad_norm": 0.6516279469831836, "learning_rate": 6.687123045143613e-06, "loss": 0.3849, "step": 9032 }, { "epoch": 0.4088255261371351, "grad_norm": 0.5998224028305761, "learning_rate": 6.686433089947022e-06, "loss": 0.2979, "step": 9033 }, { "epoch": 0.40887078524553067, "grad_norm": 0.3103766703711543, "learning_rate": 6.685743098514161e-06, "loss": 0.4565, "step": 9034 }, { "epoch": 0.4089160443539262, "grad_norm": 0.6139132238441113, "learning_rate": 6.685053070859861e-06, "loss": 0.3101, "step": 9035 }, { "epoch": 0.4089613034623218, "grad_norm": 0.6160606143251894, "learning_rate": 6.684363006998944e-06, "loss": 0.3024, "step": 9036 }, { "epoch": 0.4090065625707174, "grad_norm": 0.30752656365509423, "learning_rate": 6.683672906946239e-06, "loss": 0.4851, "step": 9037 }, { "epoch": 0.4090518216791129, "grad_norm": 0.6513482944547165, "learning_rate": 6.682982770716575e-06, "loss": 0.369, "step": 9038 }, { "epoch": 0.4090970807875085, "grad_norm": 0.7791012897019023, "learning_rate": 6.682292598324779e-06, "loss": 0.3203, "step": 9039 }, { "epoch": 0.40914233989590404, "grad_norm": 0.6894452263955075, "learning_rate": 6.681602389785683e-06, "loss": 0.3487, "step": 9040 }, { "epoch": 0.40918759900429963, "grad_norm": 0.6752996121282719, "learning_rate": 6.680912145114116e-06, "loss": 0.3672, "step": 9041 }, { "epoch": 0.40923285811269516, "grad_norm": 0.6197637473794768, "learning_rate": 6.680221864324908e-06, "loss": 0.3466, "step": 9042 }, { "epoch": 0.40927811722109075, "grad_norm": 0.6856247512459379, "learning_rate": 6.679531547432896e-06, "loss": 0.3436, "step": 9043 }, { "epoch": 0.4093233763294863, "grad_norm": 0.6567848141481791, "learning_rate": 6.6788411944529064e-06, "loss": 0.3553, "step": 9044 }, { "epoch": 0.4093686354378819, "grad_norm": 0.7058855753044633, "learning_rate": 6.678150805399777e-06, "loss": 0.3257, "step": 9045 }, { "epoch": 0.4094138945462774, "grad_norm": 0.588293854134537, "learning_rate": 6.67746038028834e-06, "loss": 0.3346, "step": 9046 }, { "epoch": 0.409459153654673, "grad_norm": 0.6538508362128201, "learning_rate": 6.676769919133431e-06, "loss": 0.3763, "step": 9047 }, { "epoch": 0.4095044127630686, "grad_norm": 0.4019729609796205, "learning_rate": 6.6760794219498874e-06, "loss": 0.4864, "step": 9048 }, { "epoch": 0.4095496718714641, "grad_norm": 0.65655875129064, "learning_rate": 6.675388888752544e-06, "loss": 0.3817, "step": 9049 }, { "epoch": 0.4095949309798597, "grad_norm": 0.6169079869153397, "learning_rate": 6.674698319556239e-06, "loss": 0.3708, "step": 9050 }, { "epoch": 0.40964019008825525, "grad_norm": 0.6588170636410992, "learning_rate": 6.674007714375812e-06, "loss": 0.3638, "step": 9051 }, { "epoch": 0.40968544919665084, "grad_norm": 0.6373846978424119, "learning_rate": 6.673317073226097e-06, "loss": 0.3221, "step": 9052 }, { "epoch": 0.4097307083050464, "grad_norm": 0.6560858708014767, "learning_rate": 6.672626396121942e-06, "loss": 0.3713, "step": 9053 }, { "epoch": 0.40977596741344197, "grad_norm": 0.6087462390017654, "learning_rate": 6.671935683078179e-06, "loss": 0.3882, "step": 9054 }, { "epoch": 0.4098212265218375, "grad_norm": 0.705119345393188, "learning_rate": 6.6712449341096555e-06, "loss": 0.3496, "step": 9055 }, { "epoch": 0.4098664856302331, "grad_norm": 0.6344883167152161, "learning_rate": 6.67055414923121e-06, "loss": 0.3491, "step": 9056 }, { "epoch": 0.4099117447386286, "grad_norm": 0.6210838758555896, "learning_rate": 6.669863328457686e-06, "loss": 0.3344, "step": 9057 }, { "epoch": 0.4099570038470242, "grad_norm": 0.3234130847164188, "learning_rate": 6.6691724718039285e-06, "loss": 0.4915, "step": 9058 }, { "epoch": 0.4100022629554198, "grad_norm": 0.6170558332327496, "learning_rate": 6.668481579284781e-06, "loss": 0.3855, "step": 9059 }, { "epoch": 0.41004752206381534, "grad_norm": 0.6614143628085448, "learning_rate": 6.667790650915089e-06, "loss": 0.3475, "step": 9060 }, { "epoch": 0.41009278117221093, "grad_norm": 0.642262702067851, "learning_rate": 6.667099686709697e-06, "loss": 0.3502, "step": 9061 }, { "epoch": 0.41013804028060646, "grad_norm": 0.6701479057169873, "learning_rate": 6.666408686683455e-06, "loss": 0.3367, "step": 9062 }, { "epoch": 0.41018329938900205, "grad_norm": 0.5870143826263454, "learning_rate": 6.665717650851205e-06, "loss": 0.298, "step": 9063 }, { "epoch": 0.4102285584973976, "grad_norm": 0.6748879525929066, "learning_rate": 6.665026579227802e-06, "loss": 0.3733, "step": 9064 }, { "epoch": 0.4102738176057932, "grad_norm": 0.6586658160921205, "learning_rate": 6.66433547182809e-06, "loss": 0.3482, "step": 9065 }, { "epoch": 0.4103190767141887, "grad_norm": 0.7078301453617645, "learning_rate": 6.663644328666921e-06, "loss": 0.3387, "step": 9066 }, { "epoch": 0.4103643358225843, "grad_norm": 0.6306545682547016, "learning_rate": 6.662953149759144e-06, "loss": 0.332, "step": 9067 }, { "epoch": 0.41040959493097984, "grad_norm": 0.6647220500569193, "learning_rate": 6.6622619351196115e-06, "loss": 0.3714, "step": 9068 }, { "epoch": 0.4104548540393754, "grad_norm": 0.6229576833143935, "learning_rate": 6.661570684763175e-06, "loss": 0.3428, "step": 9069 }, { "epoch": 0.41050011314777096, "grad_norm": 0.6352150216178791, "learning_rate": 6.660879398704689e-06, "loss": 0.3541, "step": 9070 }, { "epoch": 0.41054537225616655, "grad_norm": 0.6452785895743032, "learning_rate": 6.660188076959004e-06, "loss": 0.3341, "step": 9071 }, { "epoch": 0.41059063136456214, "grad_norm": 0.6412904402714849, "learning_rate": 6.659496719540976e-06, "loss": 0.3371, "step": 9072 }, { "epoch": 0.4106358904729577, "grad_norm": 0.6635652999800646, "learning_rate": 6.658805326465462e-06, "loss": 0.3943, "step": 9073 }, { "epoch": 0.41068114958135327, "grad_norm": 0.31807832099944305, "learning_rate": 6.658113897747315e-06, "loss": 0.4658, "step": 9074 }, { "epoch": 0.4107264086897488, "grad_norm": 0.6611497775536742, "learning_rate": 6.657422433401392e-06, "loss": 0.306, "step": 9075 }, { "epoch": 0.4107716677981444, "grad_norm": 0.6214667092573483, "learning_rate": 6.656730933442552e-06, "loss": 0.354, "step": 9076 }, { "epoch": 0.4108169269065399, "grad_norm": 0.6319617771618785, "learning_rate": 6.656039397885653e-06, "loss": 0.3566, "step": 9077 }, { "epoch": 0.4108621860149355, "grad_norm": 0.6248363665958656, "learning_rate": 6.6553478267455526e-06, "loss": 0.3404, "step": 9078 }, { "epoch": 0.41090744512333105, "grad_norm": 0.6214439842268216, "learning_rate": 6.654656220037112e-06, "loss": 0.3264, "step": 9079 }, { "epoch": 0.41095270423172664, "grad_norm": 0.6171175050479654, "learning_rate": 6.653964577775192e-06, "loss": 0.3447, "step": 9080 }, { "epoch": 0.4109979633401222, "grad_norm": 0.3451405243141214, "learning_rate": 6.653272899974652e-06, "loss": 0.4523, "step": 9081 }, { "epoch": 0.41104322244851776, "grad_norm": 0.6359812714302097, "learning_rate": 6.652581186650355e-06, "loss": 0.3703, "step": 9082 }, { "epoch": 0.41108848155691335, "grad_norm": 0.624058737568414, "learning_rate": 6.651889437817165e-06, "loss": 0.3389, "step": 9083 }, { "epoch": 0.4111337406653089, "grad_norm": 0.574939779007028, "learning_rate": 6.6511976534899414e-06, "loss": 0.2987, "step": 9084 }, { "epoch": 0.4111789997737045, "grad_norm": 0.6465268244781667, "learning_rate": 6.650505833683555e-06, "loss": 0.352, "step": 9085 }, { "epoch": 0.4112242588821, "grad_norm": 0.6275208315388503, "learning_rate": 6.649813978412866e-06, "loss": 0.3659, "step": 9086 }, { "epoch": 0.4112695179904956, "grad_norm": 0.6279978110455504, "learning_rate": 6.6491220876927406e-06, "loss": 0.3388, "step": 9087 }, { "epoch": 0.41131477709889114, "grad_norm": 0.36609485100695516, "learning_rate": 6.648430161538047e-06, "loss": 0.4842, "step": 9088 }, { "epoch": 0.41136003620728673, "grad_norm": 0.6285456556219239, "learning_rate": 6.6477381999636525e-06, "loss": 0.3161, "step": 9089 }, { "epoch": 0.41140529531568226, "grad_norm": 0.6172940666972289, "learning_rate": 6.647046202984424e-06, "loss": 0.3104, "step": 9090 }, { "epoch": 0.41145055442407785, "grad_norm": 0.266702196880152, "learning_rate": 6.646354170615232e-06, "loss": 0.4603, "step": 9091 }, { "epoch": 0.4114958135324734, "grad_norm": 0.6172185599650262, "learning_rate": 6.645662102870944e-06, "loss": 0.3367, "step": 9092 }, { "epoch": 0.411541072640869, "grad_norm": 0.6231255908509027, "learning_rate": 6.644969999766434e-06, "loss": 0.3369, "step": 9093 }, { "epoch": 0.41158633174926457, "grad_norm": 0.6711560277546211, "learning_rate": 6.644277861316569e-06, "loss": 0.2951, "step": 9094 }, { "epoch": 0.4116315908576601, "grad_norm": 0.7050844540839191, "learning_rate": 6.643585687536224e-06, "loss": 0.3233, "step": 9095 }, { "epoch": 0.4116768499660557, "grad_norm": 0.33534558413590837, "learning_rate": 6.642893478440269e-06, "loss": 0.4817, "step": 9096 }, { "epoch": 0.4117221090744512, "grad_norm": 0.6289465132386788, "learning_rate": 6.6422012340435796e-06, "loss": 0.3155, "step": 9097 }, { "epoch": 0.4117673681828468, "grad_norm": 0.6164854466612605, "learning_rate": 6.641508954361029e-06, "loss": 0.3155, "step": 9098 }, { "epoch": 0.41181262729124235, "grad_norm": 0.9583668980218552, "learning_rate": 6.640816639407494e-06, "loss": 0.3495, "step": 9099 }, { "epoch": 0.41185788639963794, "grad_norm": 0.6494624743247183, "learning_rate": 6.640124289197845e-06, "loss": 0.3603, "step": 9100 }, { "epoch": 0.4119031455080335, "grad_norm": 0.3167337648889493, "learning_rate": 6.639431903746967e-06, "loss": 0.495, "step": 9101 }, { "epoch": 0.41194840461642906, "grad_norm": 0.6147842236862651, "learning_rate": 6.638739483069729e-06, "loss": 0.4093, "step": 9102 }, { "epoch": 0.4119936637248246, "grad_norm": 0.6248505173002057, "learning_rate": 6.6380470271810146e-06, "loss": 0.3575, "step": 9103 }, { "epoch": 0.4120389228332202, "grad_norm": 0.6509884609769292, "learning_rate": 6.637354536095699e-06, "loss": 0.3657, "step": 9104 }, { "epoch": 0.4120841819416157, "grad_norm": 0.31781228423799274, "learning_rate": 6.636662009828665e-06, "loss": 0.5212, "step": 9105 }, { "epoch": 0.4121294410500113, "grad_norm": 0.2928137845304512, "learning_rate": 6.635969448394789e-06, "loss": 0.4782, "step": 9106 }, { "epoch": 0.4121747001584069, "grad_norm": 0.6442762188793938, "learning_rate": 6.635276851808955e-06, "loss": 0.3403, "step": 9107 }, { "epoch": 0.41221995926680244, "grad_norm": 0.6580738386452342, "learning_rate": 6.634584220086043e-06, "loss": 0.3659, "step": 9108 }, { "epoch": 0.41226521837519803, "grad_norm": 0.6581215553798003, "learning_rate": 6.633891553240938e-06, "loss": 0.3583, "step": 9109 }, { "epoch": 0.41231047748359356, "grad_norm": 0.6474338516977602, "learning_rate": 6.63319885128852e-06, "loss": 0.3061, "step": 9110 }, { "epoch": 0.41235573659198915, "grad_norm": 0.6093922450524109, "learning_rate": 6.632506114243676e-06, "loss": 0.339, "step": 9111 }, { "epoch": 0.4124009957003847, "grad_norm": 0.6672804842611628, "learning_rate": 6.631813342121289e-06, "loss": 0.3515, "step": 9112 }, { "epoch": 0.4124462548087803, "grad_norm": 0.6295667090545991, "learning_rate": 6.631120534936244e-06, "loss": 0.341, "step": 9113 }, { "epoch": 0.4124915139171758, "grad_norm": 0.6006838717938614, "learning_rate": 6.6304276927034305e-06, "loss": 0.312, "step": 9114 }, { "epoch": 0.4125367730255714, "grad_norm": 0.7046041718813307, "learning_rate": 6.629734815437731e-06, "loss": 0.3529, "step": 9115 }, { "epoch": 0.41258203213396694, "grad_norm": 0.6305552420053111, "learning_rate": 6.629041903154038e-06, "loss": 0.3505, "step": 9116 }, { "epoch": 0.4126272912423625, "grad_norm": 0.6647842074571177, "learning_rate": 6.628348955867237e-06, "loss": 0.342, "step": 9117 }, { "epoch": 0.4126725503507581, "grad_norm": 0.7025522272858689, "learning_rate": 6.627655973592216e-06, "loss": 0.3299, "step": 9118 }, { "epoch": 0.41271780945915365, "grad_norm": 0.4316424212966116, "learning_rate": 6.626962956343868e-06, "loss": 0.4953, "step": 9119 }, { "epoch": 0.41276306856754924, "grad_norm": 0.6497356997555592, "learning_rate": 6.626269904137086e-06, "loss": 0.3277, "step": 9120 }, { "epoch": 0.4128083276759448, "grad_norm": 0.6221328793676323, "learning_rate": 6.625576816986754e-06, "loss": 0.3319, "step": 9121 }, { "epoch": 0.41285358678434037, "grad_norm": 0.6172935644265816, "learning_rate": 6.624883694907772e-06, "loss": 0.3294, "step": 9122 }, { "epoch": 0.4128988458927359, "grad_norm": 0.6632952225923628, "learning_rate": 6.624190537915028e-06, "loss": 0.3276, "step": 9123 }, { "epoch": 0.4129441050011315, "grad_norm": 0.6007638726533377, "learning_rate": 6.6234973460234184e-06, "loss": 0.3668, "step": 9124 }, { "epoch": 0.412989364109527, "grad_norm": 0.6670716039159356, "learning_rate": 6.6228041192478365e-06, "loss": 0.3211, "step": 9125 }, { "epoch": 0.4130346232179226, "grad_norm": 0.608979806865586, "learning_rate": 6.622110857603179e-06, "loss": 0.3393, "step": 9126 }, { "epoch": 0.41307988232631815, "grad_norm": 0.6191617627583516, "learning_rate": 6.6214175611043395e-06, "loss": 0.3359, "step": 9127 }, { "epoch": 0.41312514143471374, "grad_norm": 0.7211852075791401, "learning_rate": 6.620724229766219e-06, "loss": 0.3426, "step": 9128 }, { "epoch": 0.4131704005431093, "grad_norm": 0.3787113096246672, "learning_rate": 6.62003086360371e-06, "loss": 0.4728, "step": 9129 }, { "epoch": 0.41321565965150486, "grad_norm": 0.7311943998256455, "learning_rate": 6.6193374626317155e-06, "loss": 0.3416, "step": 9130 }, { "epoch": 0.41326091875990045, "grad_norm": 0.3177175282849726, "learning_rate": 6.61864402686513e-06, "loss": 0.4925, "step": 9131 }, { "epoch": 0.413306177868296, "grad_norm": 0.2790578219806817, "learning_rate": 6.617950556318858e-06, "loss": 0.4782, "step": 9132 }, { "epoch": 0.4133514369766916, "grad_norm": 0.7027309997642883, "learning_rate": 6.617257051007796e-06, "loss": 0.3752, "step": 9133 }, { "epoch": 0.4133966960850871, "grad_norm": 0.6393780180131584, "learning_rate": 6.616563510946848e-06, "loss": 0.3496, "step": 9134 }, { "epoch": 0.4134419551934827, "grad_norm": 0.593594767525673, "learning_rate": 6.615869936150914e-06, "loss": 0.3649, "step": 9135 }, { "epoch": 0.41348721430187824, "grad_norm": 0.610098381858523, "learning_rate": 6.6151763266348975e-06, "loss": 0.3348, "step": 9136 }, { "epoch": 0.4135324734102738, "grad_norm": 0.6090898027798755, "learning_rate": 6.614482682413703e-06, "loss": 0.3531, "step": 9137 }, { "epoch": 0.41357773251866936, "grad_norm": 0.6430454610132569, "learning_rate": 6.613789003502236e-06, "loss": 0.3304, "step": 9138 }, { "epoch": 0.41362299162706495, "grad_norm": 0.6193389709370466, "learning_rate": 6.6130952899153966e-06, "loss": 0.3471, "step": 9139 }, { "epoch": 0.4136682507354605, "grad_norm": 0.6107043710984218, "learning_rate": 6.6124015416680955e-06, "loss": 0.3211, "step": 9140 }, { "epoch": 0.4137135098438561, "grad_norm": 0.6248234817107684, "learning_rate": 6.611707758775238e-06, "loss": 0.3417, "step": 9141 }, { "epoch": 0.41375876895225167, "grad_norm": 0.6257364165127264, "learning_rate": 6.611013941251728e-06, "loss": 0.3247, "step": 9142 }, { "epoch": 0.4138040280606472, "grad_norm": 0.6544488321986621, "learning_rate": 6.61032008911248e-06, "loss": 0.3538, "step": 9143 }, { "epoch": 0.4138492871690428, "grad_norm": 0.5954957989123458, "learning_rate": 6.609626202372396e-06, "loss": 0.3456, "step": 9144 }, { "epoch": 0.4138945462774383, "grad_norm": 0.49113248105346125, "learning_rate": 6.6089322810463895e-06, "loss": 0.4729, "step": 9145 }, { "epoch": 0.4139398053858339, "grad_norm": 0.8460178834179111, "learning_rate": 6.60823832514937e-06, "loss": 0.3611, "step": 9146 }, { "epoch": 0.41398506449422945, "grad_norm": 0.6319870355690369, "learning_rate": 6.6075443346962475e-06, "loss": 0.3432, "step": 9147 }, { "epoch": 0.41403032360262504, "grad_norm": 0.6076256702515779, "learning_rate": 6.606850309701936e-06, "loss": 0.3163, "step": 9148 }, { "epoch": 0.4140755827110206, "grad_norm": 0.696995594690972, "learning_rate": 6.606156250181346e-06, "loss": 0.3641, "step": 9149 }, { "epoch": 0.41412084181941616, "grad_norm": 0.3384968033801821, "learning_rate": 6.6054621561493896e-06, "loss": 0.4977, "step": 9150 }, { "epoch": 0.4141661009278117, "grad_norm": 0.6415829878760637, "learning_rate": 6.604768027620984e-06, "loss": 0.3514, "step": 9151 }, { "epoch": 0.4142113600362073, "grad_norm": 0.5922317216981312, "learning_rate": 6.60407386461104e-06, "loss": 0.3517, "step": 9152 }, { "epoch": 0.4142566191446029, "grad_norm": 0.33405495227931115, "learning_rate": 6.603379667134478e-06, "loss": 0.4755, "step": 9153 }, { "epoch": 0.4143018782529984, "grad_norm": 0.6350463815012574, "learning_rate": 6.602685435206209e-06, "loss": 0.3481, "step": 9154 }, { "epoch": 0.414347137361394, "grad_norm": 0.6168553855298148, "learning_rate": 6.6019911688411535e-06, "loss": 0.3343, "step": 9155 }, { "epoch": 0.41439239646978954, "grad_norm": 0.7275595796187577, "learning_rate": 6.601296868054227e-06, "loss": 0.3139, "step": 9156 }, { "epoch": 0.4144376555781851, "grad_norm": 0.3305580939019304, "learning_rate": 6.600602532860349e-06, "loss": 0.4838, "step": 9157 }, { "epoch": 0.41448291468658066, "grad_norm": 0.5836929686129411, "learning_rate": 6.599908163274439e-06, "loss": 0.3148, "step": 9158 }, { "epoch": 0.41452817379497625, "grad_norm": 0.5992491283122371, "learning_rate": 6.599213759311416e-06, "loss": 0.3648, "step": 9159 }, { "epoch": 0.4145734329033718, "grad_norm": 0.6499845187153163, "learning_rate": 6.598519320986201e-06, "loss": 0.384, "step": 9160 }, { "epoch": 0.4146186920117674, "grad_norm": 0.6498925304476317, "learning_rate": 6.5978248483137165e-06, "loss": 0.3766, "step": 9161 }, { "epoch": 0.4146639511201629, "grad_norm": 0.6344836570957235, "learning_rate": 6.597130341308881e-06, "loss": 0.3851, "step": 9162 }, { "epoch": 0.4147092102285585, "grad_norm": 0.31294675135178723, "learning_rate": 6.5964357999866214e-06, "loss": 0.4889, "step": 9163 }, { "epoch": 0.41475446933695403, "grad_norm": 0.6378095099327816, "learning_rate": 6.595741224361858e-06, "loss": 0.3172, "step": 9164 }, { "epoch": 0.4147997284453496, "grad_norm": 0.6152584122949394, "learning_rate": 6.595046614449518e-06, "loss": 0.3677, "step": 9165 }, { "epoch": 0.4148449875537452, "grad_norm": 0.2874984063973487, "learning_rate": 6.594351970264525e-06, "loss": 0.4763, "step": 9166 }, { "epoch": 0.41489024666214075, "grad_norm": 0.816731893554995, "learning_rate": 6.593657291821804e-06, "loss": 0.3501, "step": 9167 }, { "epoch": 0.41493550577053634, "grad_norm": 0.30283741282069704, "learning_rate": 6.592962579136283e-06, "loss": 0.5067, "step": 9168 }, { "epoch": 0.4149807648789319, "grad_norm": 0.6416453864713597, "learning_rate": 6.592267832222888e-06, "loss": 0.3793, "step": 9169 }, { "epoch": 0.41502602398732746, "grad_norm": 0.6468314186517801, "learning_rate": 6.591573051096549e-06, "loss": 0.3798, "step": 9170 }, { "epoch": 0.415071283095723, "grad_norm": 0.6383963610937056, "learning_rate": 6.5908782357721914e-06, "loss": 0.3458, "step": 9171 }, { "epoch": 0.4151165422041186, "grad_norm": 0.6506341832672858, "learning_rate": 6.590183386264748e-06, "loss": 0.3717, "step": 9172 }, { "epoch": 0.4151618013125141, "grad_norm": 0.7739364454783211, "learning_rate": 6.5894885025891455e-06, "loss": 0.3672, "step": 9173 }, { "epoch": 0.4152070604209097, "grad_norm": 0.6417639748068534, "learning_rate": 6.5887935847603204e-06, "loss": 0.2971, "step": 9174 }, { "epoch": 0.41525231952930525, "grad_norm": 0.5913598035198431, "learning_rate": 6.588098632793197e-06, "loss": 0.3405, "step": 9175 }, { "epoch": 0.41529757863770084, "grad_norm": 0.6306761921126739, "learning_rate": 6.5874036467027135e-06, "loss": 0.3168, "step": 9176 }, { "epoch": 0.4153428377460964, "grad_norm": 0.6407055478791495, "learning_rate": 6.5867086265038005e-06, "loss": 0.3436, "step": 9177 }, { "epoch": 0.41538809685449196, "grad_norm": 0.7739259144316215, "learning_rate": 6.586013572211394e-06, "loss": 0.3196, "step": 9178 }, { "epoch": 0.41543335596288755, "grad_norm": 0.3843724146831492, "learning_rate": 6.585318483840424e-06, "loss": 0.4897, "step": 9179 }, { "epoch": 0.4154786150712831, "grad_norm": 0.6610425816406361, "learning_rate": 6.58462336140583e-06, "loss": 0.3899, "step": 9180 }, { "epoch": 0.4155238741796787, "grad_norm": 0.28457472167734965, "learning_rate": 6.583928204922546e-06, "loss": 0.4788, "step": 9181 }, { "epoch": 0.4155691332880742, "grad_norm": 0.62001978773301, "learning_rate": 6.5832330144055116e-06, "loss": 0.3485, "step": 9182 }, { "epoch": 0.4156143923964698, "grad_norm": 0.6396613108916812, "learning_rate": 6.58253778986966e-06, "loss": 0.3399, "step": 9183 }, { "epoch": 0.41565965150486534, "grad_norm": 0.6411496367755108, "learning_rate": 6.5818425313299325e-06, "loss": 0.3741, "step": 9184 }, { "epoch": 0.4157049106132609, "grad_norm": 0.6754167372367614, "learning_rate": 6.581147238801268e-06, "loss": 0.3171, "step": 9185 }, { "epoch": 0.41575016972165646, "grad_norm": 0.6254106383490038, "learning_rate": 6.5804519122986045e-06, "loss": 0.3366, "step": 9186 }, { "epoch": 0.41579542883005205, "grad_norm": 0.6739536124384865, "learning_rate": 6.5797565518368835e-06, "loss": 0.3561, "step": 9187 }, { "epoch": 0.41584068793844764, "grad_norm": 0.7134668711042326, "learning_rate": 6.579061157431046e-06, "loss": 0.3764, "step": 9188 }, { "epoch": 0.4158859470468432, "grad_norm": 0.630690784551514, "learning_rate": 6.578365729096034e-06, "loss": 0.3168, "step": 9189 }, { "epoch": 0.41593120615523876, "grad_norm": 0.631189148468012, "learning_rate": 6.57767026684679e-06, "loss": 0.3313, "step": 9190 }, { "epoch": 0.4159764652636343, "grad_norm": 0.6650612871222172, "learning_rate": 6.576974770698259e-06, "loss": 0.3645, "step": 9191 }, { "epoch": 0.4160217243720299, "grad_norm": 0.6459197229536661, "learning_rate": 6.576279240665381e-06, "loss": 0.3724, "step": 9192 }, { "epoch": 0.4160669834804254, "grad_norm": 0.583243594845825, "learning_rate": 6.575583676763105e-06, "loss": 0.3587, "step": 9193 }, { "epoch": 0.416112242588821, "grad_norm": 0.6114416733865374, "learning_rate": 6.574888079006374e-06, "loss": 0.349, "step": 9194 }, { "epoch": 0.41615750169721655, "grad_norm": 0.6310044548859889, "learning_rate": 6.574192447410136e-06, "loss": 0.3292, "step": 9195 }, { "epoch": 0.41620276080561214, "grad_norm": 0.6890581030769432, "learning_rate": 6.573496781989336e-06, "loss": 0.2981, "step": 9196 }, { "epoch": 0.41624801991400767, "grad_norm": 0.6077206380083984, "learning_rate": 6.572801082758923e-06, "loss": 0.3722, "step": 9197 }, { "epoch": 0.41629327902240326, "grad_norm": 0.5266079459598753, "learning_rate": 6.5721053497338464e-06, "loss": 0.5054, "step": 9198 }, { "epoch": 0.4163385381307988, "grad_norm": 0.6082102498170532, "learning_rate": 6.571409582929053e-06, "loss": 0.3137, "step": 9199 }, { "epoch": 0.4163837972391944, "grad_norm": 0.6345993046401832, "learning_rate": 6.570713782359493e-06, "loss": 0.3673, "step": 9200 }, { "epoch": 0.41642905634759, "grad_norm": 0.6354457264023634, "learning_rate": 6.57001794804012e-06, "loss": 0.3619, "step": 9201 }, { "epoch": 0.4164743154559855, "grad_norm": 0.7378579848599901, "learning_rate": 6.569322079985881e-06, "loss": 0.3376, "step": 9202 }, { "epoch": 0.4165195745643811, "grad_norm": 0.6618244842604736, "learning_rate": 6.568626178211732e-06, "loss": 0.3416, "step": 9203 }, { "epoch": 0.41656483367277664, "grad_norm": 0.6361598077473152, "learning_rate": 6.567930242732624e-06, "loss": 0.3417, "step": 9204 }, { "epoch": 0.4166100927811722, "grad_norm": 0.6876223638053927, "learning_rate": 6.5672342735635095e-06, "loss": 0.3632, "step": 9205 }, { "epoch": 0.41665535188956776, "grad_norm": 0.3977714891906502, "learning_rate": 6.566538270719345e-06, "loss": 0.4684, "step": 9206 }, { "epoch": 0.41670061099796335, "grad_norm": 0.31331673993273423, "learning_rate": 6.565842234215085e-06, "loss": 0.4653, "step": 9207 }, { "epoch": 0.4167458701063589, "grad_norm": 0.7117005769691125, "learning_rate": 6.5651461640656825e-06, "loss": 0.3515, "step": 9208 }, { "epoch": 0.4167911292147545, "grad_norm": 0.6359816588793948, "learning_rate": 6.564450060286098e-06, "loss": 0.3227, "step": 9209 }, { "epoch": 0.41683638832315, "grad_norm": 1.259017491550393, "learning_rate": 6.563753922891284e-06, "loss": 0.3229, "step": 9210 }, { "epoch": 0.4168816474315456, "grad_norm": 0.4617368788687044, "learning_rate": 6.563057751896204e-06, "loss": 0.4819, "step": 9211 }, { "epoch": 0.4169269065399412, "grad_norm": 0.6432718249738655, "learning_rate": 6.562361547315811e-06, "loss": 0.331, "step": 9212 }, { "epoch": 0.4169721656483367, "grad_norm": 0.6933318021267154, "learning_rate": 6.561665309165067e-06, "loss": 0.3203, "step": 9213 }, { "epoch": 0.4170174247567323, "grad_norm": 0.31445652021371545, "learning_rate": 6.560969037458933e-06, "loss": 0.4685, "step": 9214 }, { "epoch": 0.41706268386512785, "grad_norm": 0.6828889036220073, "learning_rate": 6.5602727322123675e-06, "loss": 0.3184, "step": 9215 }, { "epoch": 0.41710794297352344, "grad_norm": 0.6778087903452624, "learning_rate": 6.5595763934403335e-06, "loss": 0.3864, "step": 9216 }, { "epoch": 0.417153202081919, "grad_norm": 0.68361582002995, "learning_rate": 6.5588800211577915e-06, "loss": 0.3403, "step": 9217 }, { "epoch": 0.41719846119031456, "grad_norm": 0.6222438144782241, "learning_rate": 6.558183615379708e-06, "loss": 0.3495, "step": 9218 }, { "epoch": 0.4172437202987101, "grad_norm": 0.6665841861318481, "learning_rate": 6.557487176121042e-06, "loss": 0.3167, "step": 9219 }, { "epoch": 0.4172889794071057, "grad_norm": 0.6534720487634615, "learning_rate": 6.5567907033967616e-06, "loss": 0.403, "step": 9220 }, { "epoch": 0.4173342385155012, "grad_norm": 0.4525851786579055, "learning_rate": 6.556094197221828e-06, "loss": 0.4636, "step": 9221 }, { "epoch": 0.4173794976238968, "grad_norm": 0.35344989041189595, "learning_rate": 6.5553976576112124e-06, "loss": 0.4762, "step": 9222 }, { "epoch": 0.4174247567322924, "grad_norm": 0.6190618498454409, "learning_rate": 6.554701084579876e-06, "loss": 0.2829, "step": 9223 }, { "epoch": 0.41747001584068794, "grad_norm": 0.6814680694155807, "learning_rate": 6.554004478142789e-06, "loss": 0.3461, "step": 9224 }, { "epoch": 0.4175152749490835, "grad_norm": 0.67215300592011, "learning_rate": 6.553307838314919e-06, "loss": 0.3507, "step": 9225 }, { "epoch": 0.41756053405747906, "grad_norm": 0.6642982648054744, "learning_rate": 6.552611165111233e-06, "loss": 0.3373, "step": 9226 }, { "epoch": 0.41760579316587465, "grad_norm": 0.5308400151907808, "learning_rate": 6.551914458546702e-06, "loss": 0.4814, "step": 9227 }, { "epoch": 0.4176510522742702, "grad_norm": 0.6098024845830202, "learning_rate": 6.5512177186362956e-06, "loss": 0.3097, "step": 9228 }, { "epoch": 0.4176963113826658, "grad_norm": 0.3971927840088478, "learning_rate": 6.5505209453949844e-06, "loss": 0.4894, "step": 9229 }, { "epoch": 0.4177415704910613, "grad_norm": 0.6215690645807083, "learning_rate": 6.5498241388377415e-06, "loss": 0.304, "step": 9230 }, { "epoch": 0.4177868295994569, "grad_norm": 0.6501272460909904, "learning_rate": 6.549127298979535e-06, "loss": 0.3561, "step": 9231 }, { "epoch": 0.41783208870785243, "grad_norm": 1.047121293479635, "learning_rate": 6.5484304258353435e-06, "loss": 0.3503, "step": 9232 }, { "epoch": 0.417877347816248, "grad_norm": 0.6911381850116922, "learning_rate": 6.547733519420136e-06, "loss": 0.3353, "step": 9233 }, { "epoch": 0.41792260692464356, "grad_norm": 0.6769210876746538, "learning_rate": 6.54703657974889e-06, "loss": 0.3237, "step": 9234 }, { "epoch": 0.41796786603303915, "grad_norm": 0.6153345028518419, "learning_rate": 6.546339606836578e-06, "loss": 0.3453, "step": 9235 }, { "epoch": 0.41801312514143474, "grad_norm": 0.6601778241017583, "learning_rate": 6.545642600698179e-06, "loss": 0.358, "step": 9236 }, { "epoch": 0.4180583842498303, "grad_norm": 0.6643447241178514, "learning_rate": 6.544945561348665e-06, "loss": 0.3401, "step": 9237 }, { "epoch": 0.41810364335822586, "grad_norm": 0.6757724192115454, "learning_rate": 6.544248488803017e-06, "loss": 0.3554, "step": 9238 }, { "epoch": 0.4181489024666214, "grad_norm": 0.611764580109547, "learning_rate": 6.5435513830762125e-06, "loss": 0.4217, "step": 9239 }, { "epoch": 0.418194161575017, "grad_norm": 0.5760886846843914, "learning_rate": 6.542854244183229e-06, "loss": 0.3357, "step": 9240 }, { "epoch": 0.4182394206834125, "grad_norm": 0.6448729001164268, "learning_rate": 6.542157072139046e-06, "loss": 0.3631, "step": 9241 }, { "epoch": 0.4182846797918081, "grad_norm": 0.6129535632789035, "learning_rate": 6.541459866958644e-06, "loss": 0.3061, "step": 9242 }, { "epoch": 0.41832993890020365, "grad_norm": 0.617909176415105, "learning_rate": 6.540762628657003e-06, "loss": 0.3362, "step": 9243 }, { "epoch": 0.41837519800859924, "grad_norm": 0.6611050290972347, "learning_rate": 6.5400653572491055e-06, "loss": 0.3623, "step": 9244 }, { "epoch": 0.41842045711699477, "grad_norm": 0.5944838215802871, "learning_rate": 6.539368052749935e-06, "loss": 0.3086, "step": 9245 }, { "epoch": 0.41846571622539036, "grad_norm": 0.6339489370126075, "learning_rate": 6.538670715174471e-06, "loss": 0.3581, "step": 9246 }, { "epoch": 0.41851097533378595, "grad_norm": 0.600420415879879, "learning_rate": 6.537973344537699e-06, "loss": 0.3388, "step": 9247 }, { "epoch": 0.4185562344421815, "grad_norm": 0.6029103116209852, "learning_rate": 6.537275940854604e-06, "loss": 0.3529, "step": 9248 }, { "epoch": 0.4186014935505771, "grad_norm": 0.6447280673198621, "learning_rate": 6.536578504140172e-06, "loss": 0.4934, "step": 9249 }, { "epoch": 0.4186467526589726, "grad_norm": 0.6194822194770873, "learning_rate": 6.535881034409384e-06, "loss": 0.3363, "step": 9250 }, { "epoch": 0.4186920117673682, "grad_norm": 0.6502620940642008, "learning_rate": 6.535183531677232e-06, "loss": 0.3372, "step": 9251 }, { "epoch": 0.41873727087576373, "grad_norm": 0.6700261335411145, "learning_rate": 6.534485995958699e-06, "loss": 0.3933, "step": 9252 }, { "epoch": 0.4187825299841593, "grad_norm": 0.349521227256, "learning_rate": 6.533788427268777e-06, "loss": 0.492, "step": 9253 }, { "epoch": 0.41882778909255486, "grad_norm": 0.6771202574472491, "learning_rate": 6.533090825622451e-06, "loss": 0.3542, "step": 9254 }, { "epoch": 0.41887304820095045, "grad_norm": 0.6251717433479416, "learning_rate": 6.532393191034711e-06, "loss": 0.3166, "step": 9255 }, { "epoch": 0.418918307309346, "grad_norm": 0.6941485466388639, "learning_rate": 6.53169552352055e-06, "loss": 0.3458, "step": 9256 }, { "epoch": 0.4189635664177416, "grad_norm": 0.6536270290477265, "learning_rate": 6.530997823094956e-06, "loss": 0.3605, "step": 9257 }, { "epoch": 0.4190088255261371, "grad_norm": 0.6279317153068226, "learning_rate": 6.530300089772918e-06, "loss": 0.35, "step": 9258 }, { "epoch": 0.4190540846345327, "grad_norm": 0.6270067678352912, "learning_rate": 6.529602323569435e-06, "loss": 0.3297, "step": 9259 }, { "epoch": 0.4190993437429283, "grad_norm": 0.6002916694129556, "learning_rate": 6.528904524499492e-06, "loss": 0.3209, "step": 9260 }, { "epoch": 0.4191446028513238, "grad_norm": 0.5958028898914056, "learning_rate": 6.5282066925780896e-06, "loss": 0.3243, "step": 9261 }, { "epoch": 0.4191898619597194, "grad_norm": 0.41948313179471364, "learning_rate": 6.527508827820217e-06, "loss": 0.5249, "step": 9262 }, { "epoch": 0.41923512106811495, "grad_norm": 0.6650697757035404, "learning_rate": 6.526810930240872e-06, "loss": 0.3436, "step": 9263 }, { "epoch": 0.41928038017651054, "grad_norm": 0.5925651770758924, "learning_rate": 6.526112999855049e-06, "loss": 0.3139, "step": 9264 }, { "epoch": 0.41932563928490607, "grad_norm": 0.6675276809606014, "learning_rate": 6.525415036677745e-06, "loss": 0.3447, "step": 9265 }, { "epoch": 0.41937089839330166, "grad_norm": 0.6131476470944407, "learning_rate": 6.524717040723956e-06, "loss": 0.3409, "step": 9266 }, { "epoch": 0.4194161575016972, "grad_norm": 0.6353899230609578, "learning_rate": 6.524019012008681e-06, "loss": 0.3838, "step": 9267 }, { "epoch": 0.4194614166100928, "grad_norm": 0.3245527559211798, "learning_rate": 6.523320950546919e-06, "loss": 0.4586, "step": 9268 }, { "epoch": 0.4195066757184883, "grad_norm": 0.705392549434997, "learning_rate": 6.522622856353667e-06, "loss": 0.366, "step": 9269 }, { "epoch": 0.4195519348268839, "grad_norm": 0.3172452665983041, "learning_rate": 6.521924729443928e-06, "loss": 0.5068, "step": 9270 }, { "epoch": 0.4195971939352795, "grad_norm": 0.6478833118357851, "learning_rate": 6.521226569832699e-06, "loss": 0.3742, "step": 9271 }, { "epoch": 0.41964245304367503, "grad_norm": 0.6690294381143386, "learning_rate": 6.520528377534984e-06, "loss": 0.3379, "step": 9272 }, { "epoch": 0.4196877121520706, "grad_norm": 0.5924226382675255, "learning_rate": 6.519830152565784e-06, "loss": 0.3108, "step": 9273 }, { "epoch": 0.41973297126046616, "grad_norm": 0.8549559792432717, "learning_rate": 6.5191318949401005e-06, "loss": 0.3508, "step": 9274 }, { "epoch": 0.41977823036886175, "grad_norm": 0.6788708282638739, "learning_rate": 6.51843360467294e-06, "loss": 0.3518, "step": 9275 }, { "epoch": 0.4198234894772573, "grad_norm": 0.6608690918320347, "learning_rate": 6.517735281779304e-06, "loss": 0.3614, "step": 9276 }, { "epoch": 0.4198687485856529, "grad_norm": 0.644622363537874, "learning_rate": 6.517036926274198e-06, "loss": 0.3568, "step": 9277 }, { "epoch": 0.4199140076940484, "grad_norm": 0.672102533674108, "learning_rate": 6.51633853817263e-06, "loss": 0.3761, "step": 9278 }, { "epoch": 0.419959266802444, "grad_norm": 0.6160561545018294, "learning_rate": 6.5156401174896e-06, "loss": 0.317, "step": 9279 }, { "epoch": 0.42000452591083953, "grad_norm": 0.6409995525830237, "learning_rate": 6.514941664240122e-06, "loss": 0.3363, "step": 9280 }, { "epoch": 0.4200497850192351, "grad_norm": 0.6213300786894572, "learning_rate": 6.5142431784391976e-06, "loss": 0.2734, "step": 9281 }, { "epoch": 0.4200950441276307, "grad_norm": 0.6648642571051393, "learning_rate": 6.513544660101841e-06, "loss": 0.3613, "step": 9282 }, { "epoch": 0.42014030323602625, "grad_norm": 0.4573516667736759, "learning_rate": 6.512846109243056e-06, "loss": 0.4955, "step": 9283 }, { "epoch": 0.42018556234442184, "grad_norm": 0.6287135334931294, "learning_rate": 6.512147525877856e-06, "loss": 0.3704, "step": 9284 }, { "epoch": 0.42023082145281737, "grad_norm": 0.6254628698269683, "learning_rate": 6.5114489100212485e-06, "loss": 0.3798, "step": 9285 }, { "epoch": 0.42027608056121296, "grad_norm": 0.6000869513094843, "learning_rate": 6.510750261688246e-06, "loss": 0.3456, "step": 9286 }, { "epoch": 0.4203213396696085, "grad_norm": 0.6583406363645683, "learning_rate": 6.510051580893861e-06, "loss": 0.3511, "step": 9287 }, { "epoch": 0.4203665987780041, "grad_norm": 0.30321244352810744, "learning_rate": 6.509352867653106e-06, "loss": 0.479, "step": 9288 }, { "epoch": 0.4204118578863996, "grad_norm": 0.6540125283971915, "learning_rate": 6.508654121980992e-06, "loss": 0.3494, "step": 9289 }, { "epoch": 0.4204571169947952, "grad_norm": 0.6378658016227312, "learning_rate": 6.507955343892536e-06, "loss": 0.3639, "step": 9290 }, { "epoch": 0.42050237610319074, "grad_norm": 0.6512258485466873, "learning_rate": 6.507256533402749e-06, "loss": 0.3373, "step": 9291 }, { "epoch": 0.42054763521158633, "grad_norm": 0.6278434316788836, "learning_rate": 6.506557690526649e-06, "loss": 0.3492, "step": 9292 }, { "epoch": 0.42059289431998187, "grad_norm": 0.6065740516331564, "learning_rate": 6.5058588152792516e-06, "loss": 0.365, "step": 9293 }, { "epoch": 0.42063815342837746, "grad_norm": 0.6239073368527323, "learning_rate": 6.5051599076755735e-06, "loss": 0.3135, "step": 9294 }, { "epoch": 0.42068341253677305, "grad_norm": 0.6390514081870159, "learning_rate": 6.50446096773063e-06, "loss": 0.3243, "step": 9295 }, { "epoch": 0.4207286716451686, "grad_norm": 0.6657578724741773, "learning_rate": 6.503761995459443e-06, "loss": 0.304, "step": 9296 }, { "epoch": 0.4207739307535642, "grad_norm": 0.608600570311915, "learning_rate": 6.503062990877028e-06, "loss": 0.2998, "step": 9297 }, { "epoch": 0.4208191898619597, "grad_norm": 0.5960942254073808, "learning_rate": 6.502363953998406e-06, "loss": 0.3535, "step": 9298 }, { "epoch": 0.4208644489703553, "grad_norm": 0.5951803109195455, "learning_rate": 6.501664884838597e-06, "loss": 0.3158, "step": 9299 }, { "epoch": 0.42090970807875083, "grad_norm": 0.3982193169565782, "learning_rate": 6.500965783412621e-06, "loss": 0.4864, "step": 9300 }, { "epoch": 0.4209549671871464, "grad_norm": 0.6354665260421296, "learning_rate": 6.5002666497355015e-06, "loss": 0.3853, "step": 9301 }, { "epoch": 0.42100022629554196, "grad_norm": 0.7922319996822186, "learning_rate": 6.4995674838222575e-06, "loss": 0.3432, "step": 9302 }, { "epoch": 0.42104548540393755, "grad_norm": 0.720572508906418, "learning_rate": 6.498868285687916e-06, "loss": 0.3436, "step": 9303 }, { "epoch": 0.4210907445123331, "grad_norm": 1.0918756163190484, "learning_rate": 6.498169055347498e-06, "loss": 0.3878, "step": 9304 }, { "epoch": 0.42113600362072867, "grad_norm": 0.6265199496335899, "learning_rate": 6.497469792816027e-06, "loss": 0.3567, "step": 9305 }, { "epoch": 0.42118126272912426, "grad_norm": 0.6569177766236907, "learning_rate": 6.49677049810853e-06, "loss": 0.3204, "step": 9306 }, { "epoch": 0.4212265218375198, "grad_norm": 0.5878224560887056, "learning_rate": 6.4960711712400314e-06, "loss": 0.345, "step": 9307 }, { "epoch": 0.4212717809459154, "grad_norm": 0.6379857860705639, "learning_rate": 6.4953718122255584e-06, "loss": 0.3928, "step": 9308 }, { "epoch": 0.4213170400543109, "grad_norm": 0.3598080210324236, "learning_rate": 6.494672421080139e-06, "loss": 0.464, "step": 9309 }, { "epoch": 0.4213622991627065, "grad_norm": 0.33433311563012347, "learning_rate": 6.493972997818798e-06, "loss": 0.501, "step": 9310 }, { "epoch": 0.42140755827110205, "grad_norm": 0.6860341505203552, "learning_rate": 6.493273542456567e-06, "loss": 0.3916, "step": 9311 }, { "epoch": 0.42145281737949764, "grad_norm": 0.7154400037378822, "learning_rate": 6.492574055008474e-06, "loss": 0.3437, "step": 9312 }, { "epoch": 0.42149807648789317, "grad_norm": 0.6669624091081103, "learning_rate": 6.491874535489547e-06, "loss": 0.351, "step": 9313 }, { "epoch": 0.42154333559628876, "grad_norm": 0.4255153708413403, "learning_rate": 6.4911749839148195e-06, "loss": 0.492, "step": 9314 }, { "epoch": 0.4215885947046843, "grad_norm": 0.6582232973062351, "learning_rate": 6.490475400299321e-06, "loss": 0.3438, "step": 9315 }, { "epoch": 0.4216338538130799, "grad_norm": 0.35842591242246086, "learning_rate": 6.489775784658083e-06, "loss": 0.4968, "step": 9316 }, { "epoch": 0.4216791129214755, "grad_norm": 0.6233884916153226, "learning_rate": 6.489076137006141e-06, "loss": 0.3256, "step": 9317 }, { "epoch": 0.421724372029871, "grad_norm": 0.626987885497307, "learning_rate": 6.488376457358525e-06, "loss": 0.3612, "step": 9318 }, { "epoch": 0.4217696311382666, "grad_norm": 0.7825176726361815, "learning_rate": 6.487676745730271e-06, "loss": 0.3438, "step": 9319 }, { "epoch": 0.42181489024666213, "grad_norm": 0.7636418718424111, "learning_rate": 6.4869770021364105e-06, "loss": 0.3372, "step": 9320 }, { "epoch": 0.4218601493550577, "grad_norm": 0.6968989386721643, "learning_rate": 6.486277226591982e-06, "loss": 0.3473, "step": 9321 }, { "epoch": 0.42190540846345326, "grad_norm": 0.41793635105649984, "learning_rate": 6.4855774191120215e-06, "loss": 0.4952, "step": 9322 }, { "epoch": 0.42195066757184885, "grad_norm": 0.6678593798332022, "learning_rate": 6.484877579711565e-06, "loss": 0.3457, "step": 9323 }, { "epoch": 0.4219959266802444, "grad_norm": 0.634953767633606, "learning_rate": 6.484177708405649e-06, "loss": 0.34, "step": 9324 }, { "epoch": 0.42204118578863997, "grad_norm": 0.3158100492797705, "learning_rate": 6.4834778052093125e-06, "loss": 0.482, "step": 9325 }, { "epoch": 0.4220864448970355, "grad_norm": 0.5584665390209338, "learning_rate": 6.482777870137594e-06, "loss": 0.329, "step": 9326 }, { "epoch": 0.4221317040054311, "grad_norm": 0.5845534166457408, "learning_rate": 6.4820779032055335e-06, "loss": 0.3376, "step": 9327 }, { "epoch": 0.42217696311382663, "grad_norm": 0.6833774180728347, "learning_rate": 6.481377904428171e-06, "loss": 0.341, "step": 9328 }, { "epoch": 0.4222222222222222, "grad_norm": 0.6828282315122735, "learning_rate": 6.4806778738205455e-06, "loss": 0.3291, "step": 9329 }, { "epoch": 0.4222674813306178, "grad_norm": 0.613631604631963, "learning_rate": 6.479977811397702e-06, "loss": 0.319, "step": 9330 }, { "epoch": 0.42231274043901335, "grad_norm": 0.6997442427758623, "learning_rate": 6.479277717174679e-06, "loss": 0.3601, "step": 9331 }, { "epoch": 0.42235799954740894, "grad_norm": 0.6189173652158538, "learning_rate": 6.478577591166523e-06, "loss": 0.3411, "step": 9332 }, { "epoch": 0.42240325865580447, "grad_norm": 0.5729779897205061, "learning_rate": 6.477877433388274e-06, "loss": 0.2974, "step": 9333 }, { "epoch": 0.42244851776420006, "grad_norm": 0.3622437132156431, "learning_rate": 6.477177243854978e-06, "loss": 0.4943, "step": 9334 }, { "epoch": 0.4224937768725956, "grad_norm": 0.32321408167241117, "learning_rate": 6.476477022581681e-06, "loss": 0.483, "step": 9335 }, { "epoch": 0.4225390359809912, "grad_norm": 0.6281626093772569, "learning_rate": 6.475776769583426e-06, "loss": 0.326, "step": 9336 }, { "epoch": 0.4225842950893867, "grad_norm": 0.6403466744410745, "learning_rate": 6.475076484875262e-06, "loss": 0.3697, "step": 9337 }, { "epoch": 0.4226295541977823, "grad_norm": 0.6502452275171857, "learning_rate": 6.4743761684722354e-06, "loss": 0.3769, "step": 9338 }, { "epoch": 0.42267481330617784, "grad_norm": 0.5923763294515152, "learning_rate": 6.4736758203893915e-06, "loss": 0.339, "step": 9339 }, { "epoch": 0.42272007241457343, "grad_norm": 0.6721740786041263, "learning_rate": 6.472975440641781e-06, "loss": 0.3564, "step": 9340 }, { "epoch": 0.422765331522969, "grad_norm": 0.6428716930000375, "learning_rate": 6.472275029244452e-06, "loss": 0.3491, "step": 9341 }, { "epoch": 0.42281059063136456, "grad_norm": 0.4617371834957663, "learning_rate": 6.471574586212454e-06, "loss": 0.4759, "step": 9342 }, { "epoch": 0.42285584973976015, "grad_norm": 0.39874311204630764, "learning_rate": 6.470874111560837e-06, "loss": 0.4809, "step": 9343 }, { "epoch": 0.4229011088481557, "grad_norm": 0.3015463366870361, "learning_rate": 6.470173605304655e-06, "loss": 0.4753, "step": 9344 }, { "epoch": 0.4229463679565513, "grad_norm": 0.6940333918436483, "learning_rate": 6.469473067458956e-06, "loss": 0.3577, "step": 9345 }, { "epoch": 0.4229916270649468, "grad_norm": 0.3984963380341798, "learning_rate": 6.468772498038795e-06, "loss": 0.4999, "step": 9346 }, { "epoch": 0.4230368861733424, "grad_norm": 0.6775492573141172, "learning_rate": 6.468071897059222e-06, "loss": 0.3329, "step": 9347 }, { "epoch": 0.42308214528173793, "grad_norm": 0.7675894133760427, "learning_rate": 6.467371264535295e-06, "loss": 0.339, "step": 9348 }, { "epoch": 0.4231274043901335, "grad_norm": 0.658077152410635, "learning_rate": 6.466670600482065e-06, "loss": 0.4015, "step": 9349 }, { "epoch": 0.42317266349852906, "grad_norm": 0.6308682728934399, "learning_rate": 6.465969904914589e-06, "loss": 0.3805, "step": 9350 }, { "epoch": 0.42321792260692465, "grad_norm": 0.5976569284225053, "learning_rate": 6.4652691778479215e-06, "loss": 0.3121, "step": 9351 }, { "epoch": 0.42326318171532024, "grad_norm": 0.7100140072873342, "learning_rate": 6.4645684192971195e-06, "loss": 0.3197, "step": 9352 }, { "epoch": 0.42330844082371577, "grad_norm": 0.6518239354126296, "learning_rate": 6.463867629277241e-06, "loss": 0.4765, "step": 9353 }, { "epoch": 0.42335369993211136, "grad_norm": 0.6269798987646074, "learning_rate": 6.463166807803342e-06, "loss": 0.3341, "step": 9354 }, { "epoch": 0.4233989590405069, "grad_norm": 0.7638670907037846, "learning_rate": 6.462465954890482e-06, "loss": 0.338, "step": 9355 }, { "epoch": 0.4234442181489025, "grad_norm": 0.6502171356431934, "learning_rate": 6.46176507055372e-06, "loss": 0.3279, "step": 9356 }, { "epoch": 0.423489477257298, "grad_norm": 0.33969432269480376, "learning_rate": 6.461064154808118e-06, "loss": 0.4782, "step": 9357 }, { "epoch": 0.4235347363656936, "grad_norm": 0.7200506948589684, "learning_rate": 6.460363207668734e-06, "loss": 0.3244, "step": 9358 }, { "epoch": 0.42357999547408914, "grad_norm": 0.59014599078168, "learning_rate": 6.45966222915063e-06, "loss": 0.2856, "step": 9359 }, { "epoch": 0.42362525458248473, "grad_norm": 0.7047584502504611, "learning_rate": 6.4589612192688656e-06, "loss": 0.3552, "step": 9360 }, { "epoch": 0.42367051369088027, "grad_norm": 0.6219126607206661, "learning_rate": 6.458260178038508e-06, "loss": 0.3337, "step": 9361 }, { "epoch": 0.42371577279927586, "grad_norm": 0.6525766470892861, "learning_rate": 6.457559105474617e-06, "loss": 0.3628, "step": 9362 }, { "epoch": 0.4237610319076714, "grad_norm": 0.49286730889390995, "learning_rate": 6.456858001592257e-06, "loss": 0.5076, "step": 9363 }, { "epoch": 0.423806291016067, "grad_norm": 0.43228591511802883, "learning_rate": 6.456156866406493e-06, "loss": 0.5279, "step": 9364 }, { "epoch": 0.4238515501244626, "grad_norm": 0.7548313358238873, "learning_rate": 6.45545569993239e-06, "loss": 0.3571, "step": 9365 }, { "epoch": 0.4238968092328581, "grad_norm": 0.6385538040486504, "learning_rate": 6.454754502185015e-06, "loss": 0.2961, "step": 9366 }, { "epoch": 0.4239420683412537, "grad_norm": 0.7150213127800817, "learning_rate": 6.454053273179435e-06, "loss": 0.3793, "step": 9367 }, { "epoch": 0.42398732744964923, "grad_norm": 0.42403833084216347, "learning_rate": 6.453352012930713e-06, "loss": 0.4684, "step": 9368 }, { "epoch": 0.4240325865580448, "grad_norm": 0.8437666534491314, "learning_rate": 6.452650721453921e-06, "loss": 0.4078, "step": 9369 }, { "epoch": 0.42407784566644036, "grad_norm": 0.44738550576229486, "learning_rate": 6.451949398764127e-06, "loss": 0.5022, "step": 9370 }, { "epoch": 0.42412310477483595, "grad_norm": 0.6883373927932057, "learning_rate": 6.451248044876399e-06, "loss": 0.3763, "step": 9371 }, { "epoch": 0.4241683638832315, "grad_norm": 0.6465158845984057, "learning_rate": 6.450546659805807e-06, "loss": 0.3658, "step": 9372 }, { "epoch": 0.42421362299162707, "grad_norm": 0.5976605595425212, "learning_rate": 6.449845243567424e-06, "loss": 0.3262, "step": 9373 }, { "epoch": 0.4242588821000226, "grad_norm": 0.6650479113683798, "learning_rate": 6.449143796176318e-06, "loss": 0.3406, "step": 9374 }, { "epoch": 0.4243041412084182, "grad_norm": 0.6812372296874605, "learning_rate": 6.448442317647563e-06, "loss": 0.3514, "step": 9375 }, { "epoch": 0.4243494003168138, "grad_norm": 0.5904246462988874, "learning_rate": 6.447740807996232e-06, "loss": 0.3203, "step": 9376 }, { "epoch": 0.4243946594252093, "grad_norm": 0.6512807613106717, "learning_rate": 6.447039267237397e-06, "loss": 0.3109, "step": 9377 }, { "epoch": 0.4244399185336049, "grad_norm": 0.7020948553285277, "learning_rate": 6.446337695386132e-06, "loss": 0.3841, "step": 9378 }, { "epoch": 0.42448517764200044, "grad_norm": 0.6360695297590829, "learning_rate": 6.445636092457512e-06, "loss": 0.3559, "step": 9379 }, { "epoch": 0.42453043675039603, "grad_norm": 0.6904837117783633, "learning_rate": 6.444934458466614e-06, "loss": 0.303, "step": 9380 }, { "epoch": 0.42457569585879157, "grad_norm": 0.7228940949355944, "learning_rate": 6.444232793428511e-06, "loss": 0.3771, "step": 9381 }, { "epoch": 0.42462095496718716, "grad_norm": 0.6104398617473498, "learning_rate": 6.4435310973582795e-06, "loss": 0.3762, "step": 9382 }, { "epoch": 0.4246662140755827, "grad_norm": 0.579187174280872, "learning_rate": 6.442829370271e-06, "loss": 0.3394, "step": 9383 }, { "epoch": 0.4247114731839783, "grad_norm": 0.7785916025725149, "learning_rate": 6.442127612181747e-06, "loss": 0.3552, "step": 9384 }, { "epoch": 0.4247567322923738, "grad_norm": 0.6105297157822821, "learning_rate": 6.441425823105603e-06, "loss": 0.3298, "step": 9385 }, { "epoch": 0.4248019914007694, "grad_norm": 0.6995238406533221, "learning_rate": 6.440724003057643e-06, "loss": 0.3274, "step": 9386 }, { "epoch": 0.42484725050916494, "grad_norm": 0.6808658548787668, "learning_rate": 6.440022152052951e-06, "loss": 0.3037, "step": 9387 }, { "epoch": 0.42489250961756053, "grad_norm": 0.6164624205567748, "learning_rate": 6.4393202701066046e-06, "loss": 0.3627, "step": 9388 }, { "epoch": 0.4249377687259561, "grad_norm": 0.5725999624006355, "learning_rate": 6.4386183572336854e-06, "loss": 0.4658, "step": 9389 }, { "epoch": 0.42498302783435166, "grad_norm": 0.6049117002644915, "learning_rate": 6.437916413449278e-06, "loss": 0.3545, "step": 9390 }, { "epoch": 0.42502828694274725, "grad_norm": 0.6477856887287778, "learning_rate": 6.437214438768462e-06, "loss": 0.3626, "step": 9391 }, { "epoch": 0.4250735460511428, "grad_norm": 0.6093961114293333, "learning_rate": 6.436512433206321e-06, "loss": 0.3455, "step": 9392 }, { "epoch": 0.42511880515953837, "grad_norm": 0.6660332275889961, "learning_rate": 6.435810396777941e-06, "loss": 0.3519, "step": 9393 }, { "epoch": 0.4251640642679339, "grad_norm": 0.6843726330753063, "learning_rate": 6.435108329498404e-06, "loss": 0.3201, "step": 9394 }, { "epoch": 0.4252093233763295, "grad_norm": 0.6189098497854676, "learning_rate": 6.434406231382797e-06, "loss": 0.3305, "step": 9395 }, { "epoch": 0.42525458248472503, "grad_norm": 0.6627815625904901, "learning_rate": 6.433704102446207e-06, "loss": 0.3814, "step": 9396 }, { "epoch": 0.4252998415931206, "grad_norm": 0.6338700019878458, "learning_rate": 6.433001942703717e-06, "loss": 0.3447, "step": 9397 }, { "epoch": 0.42534510070151615, "grad_norm": 0.4237312118309754, "learning_rate": 6.432299752170419e-06, "loss": 0.4892, "step": 9398 }, { "epoch": 0.42539035980991174, "grad_norm": 0.37646514307108286, "learning_rate": 6.431597530861396e-06, "loss": 0.5054, "step": 9399 }, { "epoch": 0.42543561891830733, "grad_norm": 0.7563459022781303, "learning_rate": 6.430895278791739e-06, "loss": 0.3495, "step": 9400 }, { "epoch": 0.42548087802670287, "grad_norm": 0.6547677278910567, "learning_rate": 6.4301929959765375e-06, "loss": 0.3455, "step": 9401 }, { "epoch": 0.42552613713509846, "grad_norm": 0.6676633441951189, "learning_rate": 6.429490682430881e-06, "loss": 0.3177, "step": 9402 }, { "epoch": 0.425571396243494, "grad_norm": 0.36912778358643694, "learning_rate": 6.42878833816986e-06, "loss": 0.4467, "step": 9403 }, { "epoch": 0.4256166553518896, "grad_norm": 0.7687649341155516, "learning_rate": 6.428085963208567e-06, "loss": 0.3217, "step": 9404 }, { "epoch": 0.4256619144602851, "grad_norm": 0.6299817488297312, "learning_rate": 6.427383557562091e-06, "loss": 0.3548, "step": 9405 }, { "epoch": 0.4257071735686807, "grad_norm": 0.624422589718829, "learning_rate": 6.426681121245527e-06, "loss": 0.3691, "step": 9406 }, { "epoch": 0.42575243267707624, "grad_norm": 0.6652405335093032, "learning_rate": 6.4259786542739676e-06, "loss": 0.3522, "step": 9407 }, { "epoch": 0.42579769178547183, "grad_norm": 0.6581907622201878, "learning_rate": 6.425276156662506e-06, "loss": 0.4053, "step": 9408 }, { "epoch": 0.42584295089386737, "grad_norm": 0.6736635664062993, "learning_rate": 6.424573628426239e-06, "loss": 0.3439, "step": 9409 }, { "epoch": 0.42588821000226296, "grad_norm": 0.35440063532291827, "learning_rate": 6.423871069580256e-06, "loss": 0.4577, "step": 9410 }, { "epoch": 0.42593346911065855, "grad_norm": 0.6730631122694064, "learning_rate": 6.423168480139661e-06, "loss": 0.3462, "step": 9411 }, { "epoch": 0.4259787282190541, "grad_norm": 0.2975473247448006, "learning_rate": 6.4224658601195445e-06, "loss": 0.4678, "step": 9412 }, { "epoch": 0.42602398732744967, "grad_norm": 0.7888191764002732, "learning_rate": 6.4217632095350046e-06, "loss": 0.3254, "step": 9413 }, { "epoch": 0.4260692464358452, "grad_norm": 0.6764281596247608, "learning_rate": 6.421060528401141e-06, "loss": 0.3372, "step": 9414 }, { "epoch": 0.4261145055442408, "grad_norm": 0.6446197101185022, "learning_rate": 6.42035781673305e-06, "loss": 0.3741, "step": 9415 }, { "epoch": 0.42615976465263633, "grad_norm": 0.6554610625181327, "learning_rate": 6.419655074545833e-06, "loss": 0.3316, "step": 9416 }, { "epoch": 0.4262050237610319, "grad_norm": 0.7530874118558856, "learning_rate": 6.41895230185459e-06, "loss": 0.3527, "step": 9417 }, { "epoch": 0.42625028286942745, "grad_norm": 0.670199306873179, "learning_rate": 6.418249498674417e-06, "loss": 0.3683, "step": 9418 }, { "epoch": 0.42629554197782304, "grad_norm": 0.36679226234773377, "learning_rate": 6.41754666502042e-06, "loss": 0.4751, "step": 9419 }, { "epoch": 0.4263408010862186, "grad_norm": 0.6648165630423366, "learning_rate": 6.416843800907698e-06, "loss": 0.3224, "step": 9420 }, { "epoch": 0.42638606019461417, "grad_norm": 0.34829911958308424, "learning_rate": 6.416140906351355e-06, "loss": 0.4722, "step": 9421 }, { "epoch": 0.4264313193030097, "grad_norm": 0.6661627189350696, "learning_rate": 6.4154379813664926e-06, "loss": 0.3925, "step": 9422 }, { "epoch": 0.4264765784114053, "grad_norm": 0.6552253402331372, "learning_rate": 6.4147350259682155e-06, "loss": 0.3515, "step": 9423 }, { "epoch": 0.4265218375198009, "grad_norm": 0.6309157437493499, "learning_rate": 6.414032040171627e-06, "loss": 0.3435, "step": 9424 }, { "epoch": 0.4265670966281964, "grad_norm": 0.6562280887615455, "learning_rate": 6.413329023991834e-06, "loss": 0.3759, "step": 9425 }, { "epoch": 0.426612355736592, "grad_norm": 0.6730454798509388, "learning_rate": 6.412625977443939e-06, "loss": 0.3487, "step": 9426 }, { "epoch": 0.42665761484498754, "grad_norm": 0.3568611834228988, "learning_rate": 6.411922900543053e-06, "loss": 0.5055, "step": 9427 }, { "epoch": 0.42670287395338313, "grad_norm": 0.3593917177276333, "learning_rate": 6.411219793304278e-06, "loss": 0.4881, "step": 9428 }, { "epoch": 0.42674813306177867, "grad_norm": 0.6544861807828374, "learning_rate": 6.410516655742725e-06, "loss": 0.3341, "step": 9429 }, { "epoch": 0.42679339217017426, "grad_norm": 0.655777602680432, "learning_rate": 6.4098134878735005e-06, "loss": 0.3393, "step": 9430 }, { "epoch": 0.4268386512785698, "grad_norm": 0.6226018999883778, "learning_rate": 6.409110289711715e-06, "loss": 0.2939, "step": 9431 }, { "epoch": 0.4268839103869654, "grad_norm": 0.7018429768891283, "learning_rate": 6.4084070612724765e-06, "loss": 0.364, "step": 9432 }, { "epoch": 0.4269291694953609, "grad_norm": 0.34061795632785796, "learning_rate": 6.407703802570896e-06, "loss": 0.4937, "step": 9433 }, { "epoch": 0.4269744286037565, "grad_norm": 0.7167933010917638, "learning_rate": 6.407000513622083e-06, "loss": 0.3767, "step": 9434 }, { "epoch": 0.4270196877121521, "grad_norm": 0.643637606655535, "learning_rate": 6.4062971944411514e-06, "loss": 0.3876, "step": 9435 }, { "epoch": 0.42706494682054763, "grad_norm": 0.6931332491271285, "learning_rate": 6.405593845043212e-06, "loss": 0.3753, "step": 9436 }, { "epoch": 0.4271102059289432, "grad_norm": 0.3162355029302319, "learning_rate": 6.4048904654433785e-06, "loss": 0.4618, "step": 9437 }, { "epoch": 0.42715546503733876, "grad_norm": 0.7194911038746254, "learning_rate": 6.4041870556567645e-06, "loss": 0.347, "step": 9438 }, { "epoch": 0.42720072414573435, "grad_norm": 0.6319541792320363, "learning_rate": 6.4034836156984805e-06, "loss": 0.3973, "step": 9439 }, { "epoch": 0.4272459832541299, "grad_norm": 0.6732680070654008, "learning_rate": 6.4027801455836466e-06, "loss": 0.3761, "step": 9440 }, { "epoch": 0.42729124236252547, "grad_norm": 0.5858062417249877, "learning_rate": 6.402076645327374e-06, "loss": 0.3256, "step": 9441 }, { "epoch": 0.427336501470921, "grad_norm": 0.6912276975662498, "learning_rate": 6.401373114944781e-06, "loss": 0.3255, "step": 9442 }, { "epoch": 0.4273817605793166, "grad_norm": 0.5969928258655933, "learning_rate": 6.400669554450985e-06, "loss": 0.3305, "step": 9443 }, { "epoch": 0.42742701968771213, "grad_norm": 0.3981362523108663, "learning_rate": 6.3999659638611e-06, "loss": 0.4703, "step": 9444 }, { "epoch": 0.4274722787961077, "grad_norm": 0.6625477549277766, "learning_rate": 6.399262343190247e-06, "loss": 0.3542, "step": 9445 }, { "epoch": 0.4275175379045033, "grad_norm": 1.0213337138421563, "learning_rate": 6.398558692453545e-06, "loss": 0.3116, "step": 9446 }, { "epoch": 0.42756279701289884, "grad_norm": 0.6018532829667804, "learning_rate": 6.397855011666109e-06, "loss": 0.3276, "step": 9447 }, { "epoch": 0.42760805612129443, "grad_norm": 0.6584790078653553, "learning_rate": 6.397151300843065e-06, "loss": 0.3211, "step": 9448 }, { "epoch": 0.42765331522968997, "grad_norm": 1.0077965139681038, "learning_rate": 6.396447559999528e-06, "loss": 0.3514, "step": 9449 }, { "epoch": 0.42769857433808556, "grad_norm": 0.6114458956301414, "learning_rate": 6.3957437891506236e-06, "loss": 0.3492, "step": 9450 }, { "epoch": 0.4277438334464811, "grad_norm": 0.3438870869900401, "learning_rate": 6.395039988311472e-06, "loss": 0.4868, "step": 9451 }, { "epoch": 0.4277890925548767, "grad_norm": 0.6191567022098997, "learning_rate": 6.394336157497195e-06, "loss": 0.3622, "step": 9452 }, { "epoch": 0.4278343516632722, "grad_norm": 0.654114259431335, "learning_rate": 6.393632296722916e-06, "loss": 0.3345, "step": 9453 }, { "epoch": 0.4278796107716678, "grad_norm": 0.631728787366428, "learning_rate": 6.39292840600376e-06, "loss": 0.3388, "step": 9454 }, { "epoch": 0.42792486988006334, "grad_norm": 0.6681836940210151, "learning_rate": 6.39222448535485e-06, "loss": 0.3387, "step": 9455 }, { "epoch": 0.42797012898845893, "grad_norm": 0.6476944324988394, "learning_rate": 6.3915205347913124e-06, "loss": 0.3091, "step": 9456 }, { "epoch": 0.42801538809685447, "grad_norm": 0.5720201070992996, "learning_rate": 6.3908165543282706e-06, "loss": 0.3336, "step": 9457 }, { "epoch": 0.42806064720525006, "grad_norm": 0.5846592923118611, "learning_rate": 6.390112543980854e-06, "loss": 0.2851, "step": 9458 }, { "epoch": 0.42810590631364565, "grad_norm": 0.6430948603061865, "learning_rate": 6.389408503764188e-06, "loss": 0.3803, "step": 9459 }, { "epoch": 0.4281511654220412, "grad_norm": 0.6632950837946977, "learning_rate": 6.3887044336934005e-06, "loss": 0.3493, "step": 9460 }, { "epoch": 0.42819642453043677, "grad_norm": 0.6500691181724101, "learning_rate": 6.38800033378362e-06, "loss": 0.3627, "step": 9461 }, { "epoch": 0.4282416836388323, "grad_norm": 0.6254439914297663, "learning_rate": 6.387296204049975e-06, "loss": 0.3479, "step": 9462 }, { "epoch": 0.4282869427472279, "grad_norm": 0.6662471119654548, "learning_rate": 6.386592044507595e-06, "loss": 0.3588, "step": 9463 }, { "epoch": 0.42833220185562343, "grad_norm": 0.6723755867111733, "learning_rate": 6.385887855171611e-06, "loss": 0.3142, "step": 9464 }, { "epoch": 0.428377460964019, "grad_norm": 0.6204698827060886, "learning_rate": 6.3851836360571525e-06, "loss": 0.3371, "step": 9465 }, { "epoch": 0.42842272007241455, "grad_norm": 0.7133235517253447, "learning_rate": 6.384479387179353e-06, "loss": 0.3384, "step": 9466 }, { "epoch": 0.42846797918081014, "grad_norm": 0.37723556554414667, "learning_rate": 6.383775108553344e-06, "loss": 0.4872, "step": 9467 }, { "epoch": 0.4285132382892057, "grad_norm": 0.7365255621440524, "learning_rate": 6.383070800194257e-06, "loss": 0.372, "step": 9468 }, { "epoch": 0.42855849739760127, "grad_norm": 0.5662704539748532, "learning_rate": 6.382366462117227e-06, "loss": 0.3212, "step": 9469 }, { "epoch": 0.42860375650599686, "grad_norm": 0.6703587861944948, "learning_rate": 6.381662094337385e-06, "loss": 0.3255, "step": 9470 }, { "epoch": 0.4286490156143924, "grad_norm": 0.6570286513757565, "learning_rate": 6.380957696869872e-06, "loss": 0.3454, "step": 9471 }, { "epoch": 0.428694274722788, "grad_norm": 0.653602329680775, "learning_rate": 6.380253269729816e-06, "loss": 0.3366, "step": 9472 }, { "epoch": 0.4287395338311835, "grad_norm": 0.6247528482411291, "learning_rate": 6.379548812932358e-06, "loss": 0.3405, "step": 9473 }, { "epoch": 0.4287847929395791, "grad_norm": 0.6564823764144673, "learning_rate": 6.3788443264926325e-06, "loss": 0.3396, "step": 9474 }, { "epoch": 0.42883005204797464, "grad_norm": 0.6338050514675024, "learning_rate": 6.378139810425777e-06, "loss": 0.3855, "step": 9475 }, { "epoch": 0.42887531115637023, "grad_norm": 0.6320833873764625, "learning_rate": 6.37743526474693e-06, "loss": 0.343, "step": 9476 }, { "epoch": 0.42892057026476577, "grad_norm": 0.6692924545788045, "learning_rate": 6.37673068947123e-06, "loss": 0.3721, "step": 9477 }, { "epoch": 0.42896582937316136, "grad_norm": 0.6481553878042078, "learning_rate": 6.376026084613813e-06, "loss": 0.3149, "step": 9478 }, { "epoch": 0.4290110884815569, "grad_norm": 0.6127879848183435, "learning_rate": 6.375321450189826e-06, "loss": 0.3226, "step": 9479 }, { "epoch": 0.4290563475899525, "grad_norm": 0.6519182770593378, "learning_rate": 6.374616786214402e-06, "loss": 0.3491, "step": 9480 }, { "epoch": 0.429101606698348, "grad_norm": 0.6238691493270582, "learning_rate": 6.373912092702686e-06, "loss": 0.339, "step": 9481 }, { "epoch": 0.4291468658067436, "grad_norm": 0.6332219367972534, "learning_rate": 6.3732073696698194e-06, "loss": 0.312, "step": 9482 }, { "epoch": 0.4291921249151392, "grad_norm": 0.3263672533628197, "learning_rate": 6.372502617130942e-06, "loss": 0.4572, "step": 9483 }, { "epoch": 0.42923738402353473, "grad_norm": 0.5997874965565261, "learning_rate": 6.371797835101201e-06, "loss": 0.352, "step": 9484 }, { "epoch": 0.4292826431319303, "grad_norm": 0.31386368068126863, "learning_rate": 6.371093023595736e-06, "loss": 0.4785, "step": 9485 }, { "epoch": 0.42932790224032585, "grad_norm": 0.5963847286606663, "learning_rate": 6.370388182629693e-06, "loss": 0.3197, "step": 9486 }, { "epoch": 0.42937316134872144, "grad_norm": 0.6467820210257044, "learning_rate": 6.3696833122182175e-06, "loss": 0.3258, "step": 9487 }, { "epoch": 0.429418420457117, "grad_norm": 0.601051314895126, "learning_rate": 6.368978412376456e-06, "loss": 0.3583, "step": 9488 }, { "epoch": 0.42946367956551257, "grad_norm": 0.5897040599319083, "learning_rate": 6.3682734831195495e-06, "loss": 0.3357, "step": 9489 }, { "epoch": 0.4295089386739081, "grad_norm": 0.5789772461006665, "learning_rate": 6.367568524462651e-06, "loss": 0.2893, "step": 9490 }, { "epoch": 0.4295541977823037, "grad_norm": 0.6720690671844922, "learning_rate": 6.366863536420903e-06, "loss": 0.3493, "step": 9491 }, { "epoch": 0.4295994568906992, "grad_norm": 0.9106057929009909, "learning_rate": 6.3661585190094555e-06, "loss": 0.3756, "step": 9492 }, { "epoch": 0.4296447159990948, "grad_norm": 0.6571383440927613, "learning_rate": 6.365453472243458e-06, "loss": 0.3645, "step": 9493 }, { "epoch": 0.4296899751074904, "grad_norm": 0.6064326061612422, "learning_rate": 6.36474839613806e-06, "loss": 0.3029, "step": 9494 }, { "epoch": 0.42973523421588594, "grad_norm": 0.691674373407346, "learning_rate": 6.364043290708409e-06, "loss": 0.3526, "step": 9495 }, { "epoch": 0.42978049332428153, "grad_norm": 0.6137817520380174, "learning_rate": 6.363338155969658e-06, "loss": 0.31, "step": 9496 }, { "epoch": 0.42982575243267707, "grad_norm": 0.6353400075810914, "learning_rate": 6.362632991936956e-06, "loss": 0.3724, "step": 9497 }, { "epoch": 0.42987101154107266, "grad_norm": 0.6286136029257747, "learning_rate": 6.361927798625458e-06, "loss": 0.3388, "step": 9498 }, { "epoch": 0.4299162706494682, "grad_norm": 0.6081121318544058, "learning_rate": 6.361222576050312e-06, "loss": 0.3398, "step": 9499 }, { "epoch": 0.4299615297578638, "grad_norm": 0.6939297880857666, "learning_rate": 6.360517324226676e-06, "loss": 0.3425, "step": 9500 }, { "epoch": 0.4300067888662593, "grad_norm": 0.6198614495621046, "learning_rate": 6.3598120431697e-06, "loss": 0.3216, "step": 9501 }, { "epoch": 0.4300520479746549, "grad_norm": 0.6307838569841074, "learning_rate": 6.35910673289454e-06, "loss": 0.3396, "step": 9502 }, { "epoch": 0.43009730708305044, "grad_norm": 0.6183345620328679, "learning_rate": 6.358401393416349e-06, "loss": 0.3387, "step": 9503 }, { "epoch": 0.43014256619144603, "grad_norm": 0.4830405915588279, "learning_rate": 6.357696024750286e-06, "loss": 0.4909, "step": 9504 }, { "epoch": 0.4301878252998416, "grad_norm": 0.6336024479610478, "learning_rate": 6.356990626911503e-06, "loss": 0.3208, "step": 9505 }, { "epoch": 0.43023308440823715, "grad_norm": 0.6690021288856202, "learning_rate": 6.356285199915162e-06, "loss": 0.364, "step": 9506 }, { "epoch": 0.43027834351663274, "grad_norm": 0.6478736001609527, "learning_rate": 6.355579743776415e-06, "loss": 0.3436, "step": 9507 }, { "epoch": 0.4303236026250283, "grad_norm": 0.7058901153166096, "learning_rate": 6.354874258510425e-06, "loss": 0.3633, "step": 9508 }, { "epoch": 0.43036886173342387, "grad_norm": 0.646687768947303, "learning_rate": 6.3541687441323466e-06, "loss": 0.3173, "step": 9509 }, { "epoch": 0.4304141208418194, "grad_norm": 0.6384243764796345, "learning_rate": 6.353463200657341e-06, "loss": 0.3546, "step": 9510 }, { "epoch": 0.430459379950215, "grad_norm": 0.5782346792836871, "learning_rate": 6.352757628100569e-06, "loss": 0.3214, "step": 9511 }, { "epoch": 0.4305046390586105, "grad_norm": 0.6410978720387365, "learning_rate": 6.352052026477189e-06, "loss": 0.3601, "step": 9512 }, { "epoch": 0.4305498981670061, "grad_norm": 0.6338795047306764, "learning_rate": 6.351346395802365e-06, "loss": 0.2959, "step": 9513 }, { "epoch": 0.43059515727540165, "grad_norm": 0.6219979703602239, "learning_rate": 6.350640736091256e-06, "loss": 0.3601, "step": 9514 }, { "epoch": 0.43064041638379724, "grad_norm": 0.603935883727528, "learning_rate": 6.349935047359026e-06, "loss": 0.3235, "step": 9515 }, { "epoch": 0.4306856754921928, "grad_norm": 0.6304081216699563, "learning_rate": 6.349229329620839e-06, "loss": 0.3183, "step": 9516 }, { "epoch": 0.43073093460058837, "grad_norm": 0.6260900712445192, "learning_rate": 6.348523582891857e-06, "loss": 0.3703, "step": 9517 }, { "epoch": 0.43077619370898396, "grad_norm": 0.7608811482008814, "learning_rate": 6.347817807187242e-06, "loss": 0.3287, "step": 9518 }, { "epoch": 0.4308214528173795, "grad_norm": 0.6650272533873631, "learning_rate": 6.347112002522167e-06, "loss": 0.32, "step": 9519 }, { "epoch": 0.4308667119257751, "grad_norm": 0.6347777593589885, "learning_rate": 6.346406168911787e-06, "loss": 0.3253, "step": 9520 }, { "epoch": 0.4309119710341706, "grad_norm": 0.6417946074599703, "learning_rate": 6.3457003063712775e-06, "loss": 0.3733, "step": 9521 }, { "epoch": 0.4309572301425662, "grad_norm": 0.7687534975018414, "learning_rate": 6.344994414915801e-06, "loss": 0.322, "step": 9522 }, { "epoch": 0.43100248925096174, "grad_norm": 0.7554017704102255, "learning_rate": 6.3442884945605244e-06, "loss": 0.3365, "step": 9523 }, { "epoch": 0.43104774835935733, "grad_norm": 0.6077760535588105, "learning_rate": 6.343582545320617e-06, "loss": 0.3264, "step": 9524 }, { "epoch": 0.43109300746775286, "grad_norm": 0.7121399654195334, "learning_rate": 6.342876567211247e-06, "loss": 0.3541, "step": 9525 }, { "epoch": 0.43113826657614845, "grad_norm": 0.5372455333369079, "learning_rate": 6.3421705602475835e-06, "loss": 0.4888, "step": 9526 }, { "epoch": 0.431183525684544, "grad_norm": 0.4297497874863686, "learning_rate": 6.341464524444798e-06, "loss": 0.5041, "step": 9527 }, { "epoch": 0.4312287847929396, "grad_norm": 0.592031368327786, "learning_rate": 6.340758459818058e-06, "loss": 0.3267, "step": 9528 }, { "epoch": 0.43127404390133517, "grad_norm": 0.6250388602448749, "learning_rate": 6.340052366382539e-06, "loss": 0.3383, "step": 9529 }, { "epoch": 0.4313193030097307, "grad_norm": 0.6089617843367738, "learning_rate": 6.339346244153408e-06, "loss": 0.3561, "step": 9530 }, { "epoch": 0.4313645621181263, "grad_norm": 0.6268140034238477, "learning_rate": 6.3386400931458415e-06, "loss": 0.3427, "step": 9531 }, { "epoch": 0.43140982122652183, "grad_norm": 0.6129149633673354, "learning_rate": 6.33793391337501e-06, "loss": 0.3338, "step": 9532 }, { "epoch": 0.4314550803349174, "grad_norm": 0.6096958155296873, "learning_rate": 6.337227704856088e-06, "loss": 0.3824, "step": 9533 }, { "epoch": 0.43150033944331295, "grad_norm": 0.7077925181477205, "learning_rate": 6.336521467604248e-06, "loss": 0.4856, "step": 9534 }, { "epoch": 0.43154559855170854, "grad_norm": 0.698918442674996, "learning_rate": 6.33581520163467e-06, "loss": 0.3558, "step": 9535 }, { "epoch": 0.4315908576601041, "grad_norm": 0.6263258929760982, "learning_rate": 6.335108906962523e-06, "loss": 0.3547, "step": 9536 }, { "epoch": 0.43163611676849967, "grad_norm": 0.6214809292754883, "learning_rate": 6.334402583602988e-06, "loss": 0.3516, "step": 9537 }, { "epoch": 0.4316813758768952, "grad_norm": 0.6941423915887114, "learning_rate": 6.333696231571238e-06, "loss": 0.3184, "step": 9538 }, { "epoch": 0.4317266349852908, "grad_norm": 0.805609294731211, "learning_rate": 6.332989850882453e-06, "loss": 0.3666, "step": 9539 }, { "epoch": 0.4317718940936864, "grad_norm": 0.6618733159358412, "learning_rate": 6.33228344155181e-06, "loss": 0.3617, "step": 9540 }, { "epoch": 0.4318171532020819, "grad_norm": 0.34824166171139703, "learning_rate": 6.331577003594487e-06, "loss": 0.4706, "step": 9541 }, { "epoch": 0.4318624123104775, "grad_norm": 0.7031531376323451, "learning_rate": 6.330870537025664e-06, "loss": 0.3587, "step": 9542 }, { "epoch": 0.43190767141887304, "grad_norm": 0.629939383500528, "learning_rate": 6.3301640418605205e-06, "loss": 0.3356, "step": 9543 }, { "epoch": 0.43195293052726863, "grad_norm": 0.7383560966237717, "learning_rate": 6.329457518114237e-06, "loss": 0.3183, "step": 9544 }, { "epoch": 0.43199818963566416, "grad_norm": 0.6529920903315911, "learning_rate": 6.3287509658019955e-06, "loss": 0.3452, "step": 9545 }, { "epoch": 0.43204344874405975, "grad_norm": 0.6080741176068959, "learning_rate": 6.328044384938977e-06, "loss": 0.3728, "step": 9546 }, { "epoch": 0.4320887078524553, "grad_norm": 0.6343854373387572, "learning_rate": 6.327337775540362e-06, "loss": 0.3624, "step": 9547 }, { "epoch": 0.4321339669608509, "grad_norm": 0.6243543241577083, "learning_rate": 6.326631137621336e-06, "loss": 0.3479, "step": 9548 }, { "epoch": 0.4321792260692464, "grad_norm": 0.6233388741859107, "learning_rate": 6.32592447119708e-06, "loss": 0.3652, "step": 9549 }, { "epoch": 0.432224485177642, "grad_norm": 0.6586508655334762, "learning_rate": 6.32521777628278e-06, "loss": 0.3699, "step": 9550 }, { "epoch": 0.43226974428603754, "grad_norm": 0.5813175230678054, "learning_rate": 6.324511052893621e-06, "loss": 0.2788, "step": 9551 }, { "epoch": 0.43231500339443313, "grad_norm": 0.6953927867608366, "learning_rate": 6.323804301044787e-06, "loss": 0.3374, "step": 9552 }, { "epoch": 0.4323602625028287, "grad_norm": 0.5648584961368391, "learning_rate": 6.323097520751463e-06, "loss": 0.3052, "step": 9553 }, { "epoch": 0.43240552161122425, "grad_norm": 0.4741493747837489, "learning_rate": 6.322390712028839e-06, "loss": 0.5073, "step": 9554 }, { "epoch": 0.43245078071961984, "grad_norm": 0.6282792193419438, "learning_rate": 6.321683874892097e-06, "loss": 0.377, "step": 9555 }, { "epoch": 0.4324960398280154, "grad_norm": 0.6002379549916733, "learning_rate": 6.3209770093564315e-06, "loss": 0.317, "step": 9556 }, { "epoch": 0.43254129893641097, "grad_norm": 0.6804592964508102, "learning_rate": 6.320270115437024e-06, "loss": 0.3424, "step": 9557 }, { "epoch": 0.4325865580448065, "grad_norm": 0.6396321643958531, "learning_rate": 6.319563193149069e-06, "loss": 0.3416, "step": 9558 }, { "epoch": 0.4326318171532021, "grad_norm": 0.6131211335825673, "learning_rate": 6.318856242507751e-06, "loss": 0.3518, "step": 9559 }, { "epoch": 0.4326770762615976, "grad_norm": 0.6539758858362998, "learning_rate": 6.318149263528266e-06, "loss": 0.3267, "step": 9560 }, { "epoch": 0.4327223353699932, "grad_norm": 0.6427223122736058, "learning_rate": 6.3174422562258e-06, "loss": 0.3866, "step": 9561 }, { "epoch": 0.43276759447838875, "grad_norm": 0.620529057754623, "learning_rate": 6.316735220615546e-06, "loss": 0.3473, "step": 9562 }, { "epoch": 0.43281285358678434, "grad_norm": 0.5354036421937424, "learning_rate": 6.316028156712697e-06, "loss": 0.3013, "step": 9563 }, { "epoch": 0.43285811269517993, "grad_norm": 0.38415502626453674, "learning_rate": 6.315321064532444e-06, "loss": 0.4936, "step": 9564 }, { "epoch": 0.43290337180357547, "grad_norm": 0.6673503544936131, "learning_rate": 6.31461394408998e-06, "loss": 0.3507, "step": 9565 }, { "epoch": 0.43294863091197106, "grad_norm": 0.6086413226302155, "learning_rate": 6.313906795400503e-06, "loss": 0.3543, "step": 9566 }, { "epoch": 0.4329938900203666, "grad_norm": 0.613894175157072, "learning_rate": 6.313199618479202e-06, "loss": 0.3341, "step": 9567 }, { "epoch": 0.4330391491287622, "grad_norm": 0.27093995914272184, "learning_rate": 6.312492413341274e-06, "loss": 0.476, "step": 9568 }, { "epoch": 0.4330844082371577, "grad_norm": 0.6441830810283002, "learning_rate": 6.311785180001917e-06, "loss": 0.3675, "step": 9569 }, { "epoch": 0.4331296673455533, "grad_norm": 0.2881058366572796, "learning_rate": 6.311077918476324e-06, "loss": 0.5011, "step": 9570 }, { "epoch": 0.43317492645394884, "grad_norm": 0.3016593831565181, "learning_rate": 6.3103706287796925e-06, "loss": 0.4897, "step": 9571 }, { "epoch": 0.43322018556234443, "grad_norm": 0.6379247355962797, "learning_rate": 6.309663310927222e-06, "loss": 0.3611, "step": 9572 }, { "epoch": 0.43326544467073996, "grad_norm": 0.6403026737285819, "learning_rate": 6.30895596493411e-06, "loss": 0.3237, "step": 9573 }, { "epoch": 0.43331070377913555, "grad_norm": 0.29694733852073946, "learning_rate": 6.308248590815552e-06, "loss": 0.4911, "step": 9574 }, { "epoch": 0.43335596288753114, "grad_norm": 0.6408292536123505, "learning_rate": 6.3075411885867525e-06, "loss": 0.3418, "step": 9575 }, { "epoch": 0.4334012219959267, "grad_norm": 0.6038428892113239, "learning_rate": 6.306833758262906e-06, "loss": 0.3343, "step": 9576 }, { "epoch": 0.43344648110432227, "grad_norm": 0.6628742329075414, "learning_rate": 6.306126299859218e-06, "loss": 0.3618, "step": 9577 }, { "epoch": 0.4334917402127178, "grad_norm": 0.3045993291323623, "learning_rate": 6.305418813390885e-06, "loss": 0.5013, "step": 9578 }, { "epoch": 0.4335369993211134, "grad_norm": 0.6780581289844321, "learning_rate": 6.304711298873113e-06, "loss": 0.3365, "step": 9579 }, { "epoch": 0.4335822584295089, "grad_norm": 0.6211410836718435, "learning_rate": 6.304003756321101e-06, "loss": 0.3188, "step": 9580 }, { "epoch": 0.4336275175379045, "grad_norm": 0.7669868311980905, "learning_rate": 6.303296185750054e-06, "loss": 0.3625, "step": 9581 }, { "epoch": 0.43367277664630005, "grad_norm": 0.7573626626127393, "learning_rate": 6.302588587175175e-06, "loss": 0.3708, "step": 9582 }, { "epoch": 0.43371803575469564, "grad_norm": 0.6855161490216711, "learning_rate": 6.301880960611668e-06, "loss": 0.3345, "step": 9583 }, { "epoch": 0.4337632948630912, "grad_norm": 0.6437440624954825, "learning_rate": 6.301173306074735e-06, "loss": 0.323, "step": 9584 }, { "epoch": 0.43380855397148677, "grad_norm": 0.6232157340832585, "learning_rate": 6.300465623579587e-06, "loss": 0.3519, "step": 9585 }, { "epoch": 0.4338538130798823, "grad_norm": 0.6634296907842349, "learning_rate": 6.299757913141424e-06, "loss": 0.3498, "step": 9586 }, { "epoch": 0.4338990721882779, "grad_norm": 1.2642722267366815, "learning_rate": 6.299050174775458e-06, "loss": 0.3335, "step": 9587 }, { "epoch": 0.4339443312966735, "grad_norm": 0.651210690916979, "learning_rate": 6.298342408496892e-06, "loss": 0.3338, "step": 9588 }, { "epoch": 0.433989590405069, "grad_norm": 0.8266010855192732, "learning_rate": 6.297634614320937e-06, "loss": 0.3365, "step": 9589 }, { "epoch": 0.4340348495134646, "grad_norm": 0.777980841927406, "learning_rate": 6.2969267922627975e-06, "loss": 0.3787, "step": 9590 }, { "epoch": 0.43408010862186014, "grad_norm": 0.6771848978264605, "learning_rate": 6.296218942337685e-06, "loss": 0.3893, "step": 9591 }, { "epoch": 0.43412536773025573, "grad_norm": 0.6163189995087976, "learning_rate": 6.295511064560808e-06, "loss": 0.3225, "step": 9592 }, { "epoch": 0.43417062683865126, "grad_norm": 0.657764399625542, "learning_rate": 6.294803158947378e-06, "loss": 0.3155, "step": 9593 }, { "epoch": 0.43421588594704685, "grad_norm": 0.6305694061235845, "learning_rate": 6.294095225512604e-06, "loss": 0.3458, "step": 9594 }, { "epoch": 0.4342611450554424, "grad_norm": 0.35629379362146724, "learning_rate": 6.293387264271699e-06, "loss": 0.4632, "step": 9595 }, { "epoch": 0.434306404163838, "grad_norm": 0.6351505528430289, "learning_rate": 6.292679275239875e-06, "loss": 0.3321, "step": 9596 }, { "epoch": 0.4343516632722335, "grad_norm": 0.6206739807132818, "learning_rate": 6.29197125843234e-06, "loss": 0.2933, "step": 9597 }, { "epoch": 0.4343969223806291, "grad_norm": 0.6170676077633372, "learning_rate": 6.291263213864314e-06, "loss": 0.335, "step": 9598 }, { "epoch": 0.4344421814890247, "grad_norm": 0.29918808007546976, "learning_rate": 6.290555141551006e-06, "loss": 0.5105, "step": 9599 }, { "epoch": 0.4344874405974202, "grad_norm": 0.6466778800542244, "learning_rate": 6.289847041507632e-06, "loss": 0.318, "step": 9600 }, { "epoch": 0.4345326997058158, "grad_norm": 0.6644688745721504, "learning_rate": 6.289138913749406e-06, "loss": 0.3263, "step": 9601 }, { "epoch": 0.43457795881421135, "grad_norm": 0.5956829179386198, "learning_rate": 6.2884307582915434e-06, "loss": 0.3247, "step": 9602 }, { "epoch": 0.43462321792260694, "grad_norm": 0.6384688707836155, "learning_rate": 6.287722575149262e-06, "loss": 0.3776, "step": 9603 }, { "epoch": 0.4346684770310025, "grad_norm": 0.6210441993230379, "learning_rate": 6.287014364337778e-06, "loss": 0.3438, "step": 9604 }, { "epoch": 0.43471373613939807, "grad_norm": 0.6515747518311703, "learning_rate": 6.286306125872307e-06, "loss": 0.3044, "step": 9605 }, { "epoch": 0.4347589952477936, "grad_norm": 0.6065693284092167, "learning_rate": 6.285597859768069e-06, "loss": 0.3433, "step": 9606 }, { "epoch": 0.4348042543561892, "grad_norm": 0.649773066821743, "learning_rate": 6.28488956604028e-06, "loss": 0.3329, "step": 9607 }, { "epoch": 0.4348495134645847, "grad_norm": 0.6402533582637937, "learning_rate": 6.284181244704161e-06, "loss": 0.3311, "step": 9608 }, { "epoch": 0.4348947725729803, "grad_norm": 0.34486747340777774, "learning_rate": 6.2834728957749315e-06, "loss": 0.4732, "step": 9609 }, { "epoch": 0.43494003168137585, "grad_norm": 0.6517980038717447, "learning_rate": 6.2827645192678114e-06, "loss": 0.352, "step": 9610 }, { "epoch": 0.43498529078977144, "grad_norm": 0.6361557329684554, "learning_rate": 6.282056115198021e-06, "loss": 0.3426, "step": 9611 }, { "epoch": 0.43503054989816703, "grad_norm": 0.29829048121652124, "learning_rate": 6.2813476835807814e-06, "loss": 0.4682, "step": 9612 }, { "epoch": 0.43507580900656256, "grad_norm": 0.6764980210107786, "learning_rate": 6.280639224431317e-06, "loss": 0.3527, "step": 9613 }, { "epoch": 0.43512106811495815, "grad_norm": 0.6549779058980503, "learning_rate": 6.27993073776485e-06, "loss": 0.3193, "step": 9614 }, { "epoch": 0.4351663272233537, "grad_norm": 0.6323811410845982, "learning_rate": 6.279222223596599e-06, "loss": 0.3335, "step": 9615 }, { "epoch": 0.4352115863317493, "grad_norm": 0.5938579736378444, "learning_rate": 6.278513681941793e-06, "loss": 0.3349, "step": 9616 }, { "epoch": 0.4352568454401448, "grad_norm": 0.2802435542448092, "learning_rate": 6.277805112815656e-06, "loss": 0.4687, "step": 9617 }, { "epoch": 0.4353021045485404, "grad_norm": 0.6716376377922707, "learning_rate": 6.277096516233409e-06, "loss": 0.3541, "step": 9618 }, { "epoch": 0.43534736365693594, "grad_norm": 0.29189397078435236, "learning_rate": 6.276387892210281e-06, "loss": 0.4743, "step": 9619 }, { "epoch": 0.4353926227653315, "grad_norm": 0.7074551039812558, "learning_rate": 6.275679240761499e-06, "loss": 0.3602, "step": 9620 }, { "epoch": 0.43543788187372706, "grad_norm": 0.5986286546331557, "learning_rate": 6.274970561902286e-06, "loss": 0.319, "step": 9621 }, { "epoch": 0.43548314098212265, "grad_norm": 0.2924588447120755, "learning_rate": 6.274261855647872e-06, "loss": 0.4556, "step": 9622 }, { "epoch": 0.43552840009051824, "grad_norm": 0.6350275796798999, "learning_rate": 6.273553122013485e-06, "loss": 0.3189, "step": 9623 }, { "epoch": 0.4355736591989138, "grad_norm": 0.6569204309990117, "learning_rate": 6.272844361014352e-06, "loss": 0.3469, "step": 9624 }, { "epoch": 0.43561891830730937, "grad_norm": 0.28160925038059104, "learning_rate": 6.272135572665704e-06, "loss": 0.475, "step": 9625 }, { "epoch": 0.4356641774157049, "grad_norm": 0.6936511520119449, "learning_rate": 6.271426756982768e-06, "loss": 0.3391, "step": 9626 }, { "epoch": 0.4357094365241005, "grad_norm": 0.6334270977412247, "learning_rate": 6.270717913980777e-06, "loss": 0.3363, "step": 9627 }, { "epoch": 0.435754695632496, "grad_norm": 0.7076211424429047, "learning_rate": 6.270009043674959e-06, "loss": 0.3338, "step": 9628 }, { "epoch": 0.4357999547408916, "grad_norm": 0.2949010307384991, "learning_rate": 6.26930014608055e-06, "loss": 0.4749, "step": 9629 }, { "epoch": 0.43584521384928715, "grad_norm": 0.2991584601582421, "learning_rate": 6.268591221212779e-06, "loss": 0.5113, "step": 9630 }, { "epoch": 0.43589047295768274, "grad_norm": 0.28826042168950156, "learning_rate": 6.2678822690868765e-06, "loss": 0.4618, "step": 9631 }, { "epoch": 0.4359357320660783, "grad_norm": 0.27923258726620354, "learning_rate": 6.267173289718079e-06, "loss": 0.4611, "step": 9632 }, { "epoch": 0.43598099117447386, "grad_norm": 0.6535515640602259, "learning_rate": 6.2664642831216206e-06, "loss": 0.3311, "step": 9633 }, { "epoch": 0.43602625028286945, "grad_norm": 0.658058957760891, "learning_rate": 6.265755249312733e-06, "loss": 0.3343, "step": 9634 }, { "epoch": 0.436071509391265, "grad_norm": 0.7420584561566588, "learning_rate": 6.2650461883066534e-06, "loss": 0.2928, "step": 9635 }, { "epoch": 0.4361167684996606, "grad_norm": 0.679580769956985, "learning_rate": 6.264337100118615e-06, "loss": 0.3421, "step": 9636 }, { "epoch": 0.4361620276080561, "grad_norm": 0.6600756328928645, "learning_rate": 6.263627984763858e-06, "loss": 0.4015, "step": 9637 }, { "epoch": 0.4362072867164517, "grad_norm": 0.6217618393021392, "learning_rate": 6.262918842257615e-06, "loss": 0.3845, "step": 9638 }, { "epoch": 0.43625254582484724, "grad_norm": 0.42654541926319295, "learning_rate": 6.262209672615125e-06, "loss": 0.4553, "step": 9639 }, { "epoch": 0.4362978049332428, "grad_norm": 0.5991685886740397, "learning_rate": 6.261500475851625e-06, "loss": 0.3663, "step": 9640 }, { "epoch": 0.43634306404163836, "grad_norm": 0.6526326165159332, "learning_rate": 6.260791251982354e-06, "loss": 0.3534, "step": 9641 }, { "epoch": 0.43638832315003395, "grad_norm": 0.33023414793782335, "learning_rate": 6.260082001022553e-06, "loss": 0.4822, "step": 9642 }, { "epoch": 0.4364335822584295, "grad_norm": 0.614008987239421, "learning_rate": 6.259372722987459e-06, "loss": 0.3539, "step": 9643 }, { "epoch": 0.4364788413668251, "grad_norm": 0.6193780654351428, "learning_rate": 6.2586634178923124e-06, "loss": 0.3327, "step": 9644 }, { "epoch": 0.4365241004752206, "grad_norm": 0.678929067511659, "learning_rate": 6.257954085752356e-06, "loss": 0.3385, "step": 9645 }, { "epoch": 0.4365693595836162, "grad_norm": 0.6455644243579621, "learning_rate": 6.257244726582829e-06, "loss": 0.3944, "step": 9646 }, { "epoch": 0.4366146186920118, "grad_norm": 0.6332816139581993, "learning_rate": 6.256535340398974e-06, "loss": 0.3395, "step": 9647 }, { "epoch": 0.4366598778004073, "grad_norm": 0.6241992920208398, "learning_rate": 6.255825927216032e-06, "loss": 0.3107, "step": 9648 }, { "epoch": 0.4367051369088029, "grad_norm": 0.6499774891475161, "learning_rate": 6.2551164870492506e-06, "loss": 0.3049, "step": 9649 }, { "epoch": 0.43675039601719845, "grad_norm": 0.5852926834843499, "learning_rate": 6.25440701991387e-06, "loss": 0.3219, "step": 9650 }, { "epoch": 0.43679565512559404, "grad_norm": 0.682648927831333, "learning_rate": 6.253697525825134e-06, "loss": 0.3546, "step": 9651 }, { "epoch": 0.4368409142339896, "grad_norm": 0.6755898462967815, "learning_rate": 6.25298800479829e-06, "loss": 0.3562, "step": 9652 }, { "epoch": 0.43688617334238516, "grad_norm": 0.6553521064520352, "learning_rate": 6.252278456848581e-06, "loss": 0.342, "step": 9653 }, { "epoch": 0.4369314324507807, "grad_norm": 0.6414939440118836, "learning_rate": 6.251568881991256e-06, "loss": 0.3483, "step": 9654 }, { "epoch": 0.4369766915591763, "grad_norm": 0.6554899417647392, "learning_rate": 6.250859280241557e-06, "loss": 0.3432, "step": 9655 }, { "epoch": 0.4370219506675718, "grad_norm": 0.39347989973742065, "learning_rate": 6.250149651614735e-06, "loss": 0.4453, "step": 9656 }, { "epoch": 0.4370672097759674, "grad_norm": 0.616710306543043, "learning_rate": 6.249439996126036e-06, "loss": 0.33, "step": 9657 }, { "epoch": 0.437112468884363, "grad_norm": 0.6916743798248338, "learning_rate": 6.24873031379071e-06, "loss": 0.3539, "step": 9658 }, { "epoch": 0.43715772799275854, "grad_norm": 0.7770154644576485, "learning_rate": 6.248020604624004e-06, "loss": 0.3506, "step": 9659 }, { "epoch": 0.43720298710115413, "grad_norm": 0.6583445912764695, "learning_rate": 6.247310868641168e-06, "loss": 0.3122, "step": 9660 }, { "epoch": 0.43724824620954966, "grad_norm": 0.8566787002605626, "learning_rate": 6.246601105857453e-06, "loss": 0.328, "step": 9661 }, { "epoch": 0.43729350531794525, "grad_norm": 0.5977562191253866, "learning_rate": 6.245891316288108e-06, "loss": 0.3277, "step": 9662 }, { "epoch": 0.4373387644263408, "grad_norm": 0.5231617692788478, "learning_rate": 6.245181499948385e-06, "loss": 0.4739, "step": 9663 }, { "epoch": 0.4373840235347364, "grad_norm": 0.6619926024483229, "learning_rate": 6.244471656853538e-06, "loss": 0.3472, "step": 9664 }, { "epoch": 0.4374292826431319, "grad_norm": 0.6386867229353993, "learning_rate": 6.243761787018814e-06, "loss": 0.3355, "step": 9665 }, { "epoch": 0.4374745417515275, "grad_norm": 0.6153655896855599, "learning_rate": 6.2430518904594715e-06, "loss": 0.3314, "step": 9666 }, { "epoch": 0.43751980085992304, "grad_norm": 0.6500289470819034, "learning_rate": 6.24234196719076e-06, "loss": 0.3335, "step": 9667 }, { "epoch": 0.4375650599683186, "grad_norm": 0.6105016241224432, "learning_rate": 6.241632017227937e-06, "loss": 0.3209, "step": 9668 }, { "epoch": 0.4376103190767142, "grad_norm": 0.33115584574894136, "learning_rate": 6.240922040586254e-06, "loss": 0.4953, "step": 9669 }, { "epoch": 0.43765557818510975, "grad_norm": 0.33989481089576024, "learning_rate": 6.240212037280967e-06, "loss": 0.4787, "step": 9670 }, { "epoch": 0.43770083729350534, "grad_norm": 0.6816818191990349, "learning_rate": 6.239502007327334e-06, "loss": 0.3285, "step": 9671 }, { "epoch": 0.4377460964019009, "grad_norm": 0.6037031811269876, "learning_rate": 6.2387919507406085e-06, "loss": 0.308, "step": 9672 }, { "epoch": 0.43779135551029646, "grad_norm": 0.6195194185977877, "learning_rate": 6.238081867536049e-06, "loss": 0.3443, "step": 9673 }, { "epoch": 0.437836614618692, "grad_norm": 0.5802615024132223, "learning_rate": 6.237371757728914e-06, "loss": 0.3311, "step": 9674 }, { "epoch": 0.4378818737270876, "grad_norm": 0.6486256013867389, "learning_rate": 6.236661621334458e-06, "loss": 0.332, "step": 9675 }, { "epoch": 0.4379271328354831, "grad_norm": 0.5983038373745534, "learning_rate": 6.235951458367943e-06, "loss": 0.3169, "step": 9676 }, { "epoch": 0.4379723919438787, "grad_norm": 0.3263293080204479, "learning_rate": 6.235241268844626e-06, "loss": 0.4952, "step": 9677 }, { "epoch": 0.43801765105227425, "grad_norm": 0.6615016165723331, "learning_rate": 6.234531052779769e-06, "loss": 0.3285, "step": 9678 }, { "epoch": 0.43806291016066984, "grad_norm": 0.2958982133149087, "learning_rate": 6.233820810188631e-06, "loss": 0.4683, "step": 9679 }, { "epoch": 0.4381081692690654, "grad_norm": 0.597163484202998, "learning_rate": 6.233110541086473e-06, "loss": 0.349, "step": 9680 }, { "epoch": 0.43815342837746096, "grad_norm": 0.6319411261177034, "learning_rate": 6.2324002454885565e-06, "loss": 0.3328, "step": 9681 }, { "epoch": 0.43819868748585655, "grad_norm": 0.28556124909790315, "learning_rate": 6.231689923410144e-06, "loss": 0.4843, "step": 9682 }, { "epoch": 0.4382439465942521, "grad_norm": 0.6600743868567222, "learning_rate": 6.230979574866498e-06, "loss": 0.3118, "step": 9683 }, { "epoch": 0.4382892057026477, "grad_norm": 0.7072446001623893, "learning_rate": 6.230269199872881e-06, "loss": 0.3368, "step": 9684 }, { "epoch": 0.4383344648110432, "grad_norm": 0.6703396770777446, "learning_rate": 6.22955879844456e-06, "loss": 0.3548, "step": 9685 }, { "epoch": 0.4383797239194388, "grad_norm": 0.39158306035806945, "learning_rate": 6.228848370596793e-06, "loss": 0.4889, "step": 9686 }, { "epoch": 0.43842498302783434, "grad_norm": 0.6576588402412944, "learning_rate": 6.228137916344852e-06, "loss": 0.3602, "step": 9687 }, { "epoch": 0.4384702421362299, "grad_norm": 0.638629956214423, "learning_rate": 6.227427435703997e-06, "loss": 0.3318, "step": 9688 }, { "epoch": 0.43851550124462546, "grad_norm": 0.633776454109039, "learning_rate": 6.2267169286894954e-06, "loss": 0.3348, "step": 9689 }, { "epoch": 0.43856076035302105, "grad_norm": 0.3955281000545189, "learning_rate": 6.2260063953166165e-06, "loss": 0.4711, "step": 9690 }, { "epoch": 0.4386060194614166, "grad_norm": 0.2967833814586568, "learning_rate": 6.225295835600624e-06, "loss": 0.5086, "step": 9691 }, { "epoch": 0.4386512785698122, "grad_norm": 0.6881500244083311, "learning_rate": 6.2245852495567885e-06, "loss": 0.3629, "step": 9692 }, { "epoch": 0.43869653767820777, "grad_norm": 0.2702582686060595, "learning_rate": 6.2238746372003775e-06, "loss": 0.4664, "step": 9693 }, { "epoch": 0.4387417967866033, "grad_norm": 0.2826358246260504, "learning_rate": 6.223163998546657e-06, "loss": 0.4739, "step": 9694 }, { "epoch": 0.4387870558949989, "grad_norm": 0.6637887438021715, "learning_rate": 6.2224533336109015e-06, "loss": 0.3642, "step": 9695 }, { "epoch": 0.4388323150033944, "grad_norm": 0.612207286684882, "learning_rate": 6.221742642408377e-06, "loss": 0.3628, "step": 9696 }, { "epoch": 0.43887757411179, "grad_norm": 0.6165039348405416, "learning_rate": 6.221031924954356e-06, "loss": 0.3216, "step": 9697 }, { "epoch": 0.43892283322018555, "grad_norm": 0.696379443461046, "learning_rate": 6.220321181264108e-06, "loss": 0.3381, "step": 9698 }, { "epoch": 0.43896809232858114, "grad_norm": 0.5904740059119723, "learning_rate": 6.2196104113529064e-06, "loss": 0.3482, "step": 9699 }, { "epoch": 0.4390133514369767, "grad_norm": 0.7721270978948145, "learning_rate": 6.218899615236022e-06, "loss": 0.3034, "step": 9700 }, { "epoch": 0.43905861054537226, "grad_norm": 0.7227083202010125, "learning_rate": 6.21818879292873e-06, "loss": 0.3402, "step": 9701 }, { "epoch": 0.4391038696537678, "grad_norm": 0.6098500040445197, "learning_rate": 6.217477944446301e-06, "loss": 0.2952, "step": 9702 }, { "epoch": 0.4391491287621634, "grad_norm": 0.6829152157415639, "learning_rate": 6.216767069804011e-06, "loss": 0.357, "step": 9703 }, { "epoch": 0.439194387870559, "grad_norm": 0.7364773799141779, "learning_rate": 6.216056169017133e-06, "loss": 0.375, "step": 9704 }, { "epoch": 0.4392396469789545, "grad_norm": 0.6548002280902314, "learning_rate": 6.215345242100942e-06, "loss": 0.3248, "step": 9705 }, { "epoch": 0.4392849060873501, "grad_norm": 0.6254654159755485, "learning_rate": 6.214634289070717e-06, "loss": 0.3308, "step": 9706 }, { "epoch": 0.43933016519574564, "grad_norm": 0.634866839571993, "learning_rate": 6.213923309941728e-06, "loss": 0.3062, "step": 9707 }, { "epoch": 0.4393754243041412, "grad_norm": 0.40657094828930934, "learning_rate": 6.213212304729259e-06, "loss": 0.5051, "step": 9708 }, { "epoch": 0.43942068341253676, "grad_norm": 0.6105111080738309, "learning_rate": 6.212501273448581e-06, "loss": 0.3353, "step": 9709 }, { "epoch": 0.43946594252093235, "grad_norm": 0.662434514457218, "learning_rate": 6.211790216114976e-06, "loss": 0.3619, "step": 9710 }, { "epoch": 0.4395112016293279, "grad_norm": 0.6727588402946885, "learning_rate": 6.21107913274372e-06, "loss": 0.3231, "step": 9711 }, { "epoch": 0.4395564607377235, "grad_norm": 0.6425714546278731, "learning_rate": 6.210368023350094e-06, "loss": 0.3426, "step": 9712 }, { "epoch": 0.439601719846119, "grad_norm": 0.61234101605595, "learning_rate": 6.209656887949376e-06, "loss": 0.3355, "step": 9713 }, { "epoch": 0.4396469789545146, "grad_norm": 0.634668015159857, "learning_rate": 6.208945726556848e-06, "loss": 0.3608, "step": 9714 }, { "epoch": 0.43969223806291013, "grad_norm": 0.29388674268361786, "learning_rate": 6.2082345391877865e-06, "loss": 0.4741, "step": 9715 }, { "epoch": 0.4397374971713057, "grad_norm": 0.6535017456665534, "learning_rate": 6.207523325857479e-06, "loss": 0.3334, "step": 9716 }, { "epoch": 0.4397827562797013, "grad_norm": 0.6852149829343164, "learning_rate": 6.206812086581201e-06, "loss": 0.3798, "step": 9717 }, { "epoch": 0.43982801538809685, "grad_norm": 0.6729182496858611, "learning_rate": 6.206100821374238e-06, "loss": 0.3105, "step": 9718 }, { "epoch": 0.43987327449649244, "grad_norm": 0.6320681877620283, "learning_rate": 6.205389530251873e-06, "loss": 0.3502, "step": 9719 }, { "epoch": 0.439918533604888, "grad_norm": 0.6497975622661094, "learning_rate": 6.204678213229389e-06, "loss": 0.3513, "step": 9720 }, { "epoch": 0.43996379271328356, "grad_norm": 0.6672429135341902, "learning_rate": 6.203966870322071e-06, "loss": 0.3292, "step": 9721 }, { "epoch": 0.4400090518216791, "grad_norm": 0.31104056891057263, "learning_rate": 6.2032555015452036e-06, "loss": 0.4851, "step": 9722 }, { "epoch": 0.4400543109300747, "grad_norm": 0.6751221659802483, "learning_rate": 6.202544106914068e-06, "loss": 0.3552, "step": 9723 }, { "epoch": 0.4400995700384702, "grad_norm": 0.2951432521217396, "learning_rate": 6.201832686443955e-06, "loss": 0.4623, "step": 9724 }, { "epoch": 0.4401448291468658, "grad_norm": 0.6698341616356065, "learning_rate": 6.201121240150147e-06, "loss": 0.3288, "step": 9725 }, { "epoch": 0.44019008825526135, "grad_norm": 0.6358296298524535, "learning_rate": 6.200409768047935e-06, "loss": 0.32, "step": 9726 }, { "epoch": 0.44023534736365694, "grad_norm": 0.6169335416900048, "learning_rate": 6.199698270152602e-06, "loss": 0.333, "step": 9727 }, { "epoch": 0.4402806064720525, "grad_norm": 0.6492481963366252, "learning_rate": 6.198986746479439e-06, "loss": 0.3533, "step": 9728 }, { "epoch": 0.44032586558044806, "grad_norm": 0.6320510462358143, "learning_rate": 6.198275197043732e-06, "loss": 0.3324, "step": 9729 }, { "epoch": 0.44037112468884365, "grad_norm": 0.6742375310083387, "learning_rate": 6.197563621860771e-06, "loss": 0.3547, "step": 9730 }, { "epoch": 0.4404163837972392, "grad_norm": 0.6290118966322753, "learning_rate": 6.196852020945846e-06, "loss": 0.362, "step": 9731 }, { "epoch": 0.4404616429056348, "grad_norm": 0.6904537020386137, "learning_rate": 6.196140394314247e-06, "loss": 0.3841, "step": 9732 }, { "epoch": 0.4405069020140303, "grad_norm": 0.581190785112767, "learning_rate": 6.195428741981266e-06, "loss": 0.3469, "step": 9733 }, { "epoch": 0.4405521611224259, "grad_norm": 0.6780527641923901, "learning_rate": 6.194717063962191e-06, "loss": 0.3413, "step": 9734 }, { "epoch": 0.44059742023082143, "grad_norm": 0.32026258131864827, "learning_rate": 6.194005360272317e-06, "loss": 0.474, "step": 9735 }, { "epoch": 0.440642679339217, "grad_norm": 0.7847063668187274, "learning_rate": 6.193293630926933e-06, "loss": 0.3376, "step": 9736 }, { "epoch": 0.44068793844761256, "grad_norm": 0.6635025468514317, "learning_rate": 6.192581875941336e-06, "loss": 0.3821, "step": 9737 }, { "epoch": 0.44073319755600815, "grad_norm": 0.649767023666101, "learning_rate": 6.191870095330817e-06, "loss": 0.3396, "step": 9738 }, { "epoch": 0.4407784566644037, "grad_norm": 0.6212240638480885, "learning_rate": 6.191158289110669e-06, "loss": 0.3332, "step": 9739 }, { "epoch": 0.4408237157727993, "grad_norm": 0.6820651677451741, "learning_rate": 6.1904464572961874e-06, "loss": 0.3473, "step": 9740 }, { "epoch": 0.44086897488119486, "grad_norm": 0.6214271892311886, "learning_rate": 6.1897345999026695e-06, "loss": 0.3568, "step": 9741 }, { "epoch": 0.4409142339895904, "grad_norm": 0.6027011619205818, "learning_rate": 6.1890227169454075e-06, "loss": 0.3334, "step": 9742 }, { "epoch": 0.440959493097986, "grad_norm": 0.6328640836910864, "learning_rate": 6.188310808439701e-06, "loss": 0.3459, "step": 9743 }, { "epoch": 0.4410047522063815, "grad_norm": 0.6179245657168015, "learning_rate": 6.187598874400842e-06, "loss": 0.3734, "step": 9744 }, { "epoch": 0.4410500113147771, "grad_norm": 0.7759051603585791, "learning_rate": 6.1868869148441325e-06, "loss": 0.342, "step": 9745 }, { "epoch": 0.44109527042317265, "grad_norm": 0.606907011740406, "learning_rate": 6.1861749297848685e-06, "loss": 0.3105, "step": 9746 }, { "epoch": 0.44114052953156824, "grad_norm": 0.6237538836904465, "learning_rate": 6.185462919238348e-06, "loss": 0.3557, "step": 9747 }, { "epoch": 0.44118578863996377, "grad_norm": 0.3348760559457101, "learning_rate": 6.184750883219869e-06, "loss": 0.4823, "step": 9748 }, { "epoch": 0.44123104774835936, "grad_norm": 0.6390519401669638, "learning_rate": 6.184038821744733e-06, "loss": 0.3532, "step": 9749 }, { "epoch": 0.4412763068567549, "grad_norm": 0.657523255701622, "learning_rate": 6.18332673482824e-06, "loss": 0.2972, "step": 9750 }, { "epoch": 0.4413215659651505, "grad_norm": 0.6160734718098636, "learning_rate": 6.18261462248569e-06, "loss": 0.3295, "step": 9751 }, { "epoch": 0.4413668250735461, "grad_norm": 0.2923393424606555, "learning_rate": 6.181902484732381e-06, "loss": 0.4782, "step": 9752 }, { "epoch": 0.4414120841819416, "grad_norm": 0.6990437189078308, "learning_rate": 6.181190321583621e-06, "loss": 0.3, "step": 9753 }, { "epoch": 0.4414573432903372, "grad_norm": 0.6699717353002903, "learning_rate": 6.180478133054707e-06, "loss": 0.3256, "step": 9754 }, { "epoch": 0.44150260239873274, "grad_norm": 0.6438335030507906, "learning_rate": 6.179765919160945e-06, "loss": 0.2922, "step": 9755 }, { "epoch": 0.4415478615071283, "grad_norm": 0.6392233623323376, "learning_rate": 6.179053679917635e-06, "loss": 0.349, "step": 9756 }, { "epoch": 0.44159312061552386, "grad_norm": 0.7026479472661328, "learning_rate": 6.1783414153400835e-06, "loss": 0.3244, "step": 9757 }, { "epoch": 0.44163837972391945, "grad_norm": 0.6389687826947396, "learning_rate": 6.177629125443594e-06, "loss": 0.3285, "step": 9758 }, { "epoch": 0.441683638832315, "grad_norm": 0.6566514186922977, "learning_rate": 6.176916810243471e-06, "loss": 0.354, "step": 9759 }, { "epoch": 0.4417288979407106, "grad_norm": 0.43650505652041166, "learning_rate": 6.176204469755021e-06, "loss": 0.4714, "step": 9760 }, { "epoch": 0.4417741570491061, "grad_norm": 0.7489027344347635, "learning_rate": 6.175492103993548e-06, "loss": 0.3237, "step": 9761 }, { "epoch": 0.4418194161575017, "grad_norm": 0.7357275635937931, "learning_rate": 6.1747797129743605e-06, "loss": 0.3311, "step": 9762 }, { "epoch": 0.4418646752658973, "grad_norm": 0.31040027336716486, "learning_rate": 6.174067296712765e-06, "loss": 0.4697, "step": 9763 }, { "epoch": 0.4419099343742928, "grad_norm": 0.2952355102718732, "learning_rate": 6.173354855224071e-06, "loss": 0.5069, "step": 9764 }, { "epoch": 0.4419551934826884, "grad_norm": 0.6238306699955422, "learning_rate": 6.1726423885235816e-06, "loss": 0.2843, "step": 9765 }, { "epoch": 0.44200045259108395, "grad_norm": 0.6155755513933405, "learning_rate": 6.1719298966266114e-06, "loss": 0.3352, "step": 9766 }, { "epoch": 0.44204571169947954, "grad_norm": 0.5998973251292518, "learning_rate": 6.1712173795484665e-06, "loss": 0.3683, "step": 9767 }, { "epoch": 0.44209097080787507, "grad_norm": 0.6325835388706952, "learning_rate": 6.170504837304458e-06, "loss": 0.3218, "step": 9768 }, { "epoch": 0.44213622991627066, "grad_norm": 0.6759271954619623, "learning_rate": 6.169792269909893e-06, "loss": 0.2931, "step": 9769 }, { "epoch": 0.4421814890246662, "grad_norm": 0.7109614631678387, "learning_rate": 6.169079677380086e-06, "loss": 0.3695, "step": 9770 }, { "epoch": 0.4422267481330618, "grad_norm": 0.6078522568876246, "learning_rate": 6.168367059730348e-06, "loss": 0.3557, "step": 9771 }, { "epoch": 0.4422720072414573, "grad_norm": 0.6527485169924666, "learning_rate": 6.167654416975991e-06, "loss": 0.3428, "step": 9772 }, { "epoch": 0.4423172663498529, "grad_norm": 0.5826122835066895, "learning_rate": 6.166941749132325e-06, "loss": 0.3384, "step": 9773 }, { "epoch": 0.44236252545824845, "grad_norm": 0.5247511503156523, "learning_rate": 6.166229056214665e-06, "loss": 0.4777, "step": 9774 }, { "epoch": 0.44240778456664404, "grad_norm": 0.6002895767581452, "learning_rate": 6.165516338238324e-06, "loss": 0.3281, "step": 9775 }, { "epoch": 0.4424530436750396, "grad_norm": 0.6204952381918223, "learning_rate": 6.164803595218618e-06, "loss": 0.3679, "step": 9776 }, { "epoch": 0.44249830278343516, "grad_norm": 0.5812317480029556, "learning_rate": 6.16409082717086e-06, "loss": 0.288, "step": 9777 }, { "epoch": 0.44254356189183075, "grad_norm": 0.3027247180783634, "learning_rate": 6.163378034110364e-06, "loss": 0.4954, "step": 9778 }, { "epoch": 0.4425888210002263, "grad_norm": 0.5976073792363202, "learning_rate": 6.162665216052448e-06, "loss": 0.3415, "step": 9779 }, { "epoch": 0.4426340801086219, "grad_norm": 0.6213027447734806, "learning_rate": 6.161952373012427e-06, "loss": 0.3267, "step": 9780 }, { "epoch": 0.4426793392170174, "grad_norm": 0.6648348362802524, "learning_rate": 6.161239505005618e-06, "loss": 0.3131, "step": 9781 }, { "epoch": 0.442724598325413, "grad_norm": 0.6201774829422037, "learning_rate": 6.160526612047339e-06, "loss": 0.3248, "step": 9782 }, { "epoch": 0.44276985743380853, "grad_norm": 0.5839515753747767, "learning_rate": 6.159813694152907e-06, "loss": 0.3755, "step": 9783 }, { "epoch": 0.4428151165422041, "grad_norm": 0.6353528762384202, "learning_rate": 6.1591007513376425e-06, "loss": 0.36, "step": 9784 }, { "epoch": 0.44286037565059966, "grad_norm": 0.406392908771862, "learning_rate": 6.1583877836168615e-06, "loss": 0.4675, "step": 9785 }, { "epoch": 0.44290563475899525, "grad_norm": 0.7766715682857842, "learning_rate": 6.157674791005884e-06, "loss": 0.3431, "step": 9786 }, { "epoch": 0.44295089386739084, "grad_norm": 0.6466693556842529, "learning_rate": 6.1569617735200314e-06, "loss": 0.3392, "step": 9787 }, { "epoch": 0.4429961529757864, "grad_norm": 0.6287656784939121, "learning_rate": 6.156248731174623e-06, "loss": 0.3699, "step": 9788 }, { "epoch": 0.44304141208418196, "grad_norm": 0.6648703914194296, "learning_rate": 6.155535663984982e-06, "loss": 0.3456, "step": 9789 }, { "epoch": 0.4430866711925775, "grad_norm": 0.6262050918375093, "learning_rate": 6.154822571966428e-06, "loss": 0.3244, "step": 9790 }, { "epoch": 0.4431319303009731, "grad_norm": 0.5743181017272367, "learning_rate": 6.154109455134283e-06, "loss": 0.3541, "step": 9791 }, { "epoch": 0.4431771894093686, "grad_norm": 0.6474090918108056, "learning_rate": 6.15339631350387e-06, "loss": 0.3589, "step": 9792 }, { "epoch": 0.4432224485177642, "grad_norm": 0.2949861466946327, "learning_rate": 6.152683147090514e-06, "loss": 0.4708, "step": 9793 }, { "epoch": 0.44326770762615975, "grad_norm": 0.3234561818756022, "learning_rate": 6.151969955909536e-06, "loss": 0.4968, "step": 9794 }, { "epoch": 0.44331296673455534, "grad_norm": 0.6340148275150607, "learning_rate": 6.151256739976264e-06, "loss": 0.4069, "step": 9795 }, { "epoch": 0.44335822584295087, "grad_norm": 0.6197326211786413, "learning_rate": 6.150543499306016e-06, "loss": 0.3435, "step": 9796 }, { "epoch": 0.44340348495134646, "grad_norm": 0.5949256976740551, "learning_rate": 6.149830233914127e-06, "loss": 0.3038, "step": 9797 }, { "epoch": 0.44344874405974205, "grad_norm": 0.6661137701878944, "learning_rate": 6.149116943815915e-06, "loss": 0.3539, "step": 9798 }, { "epoch": 0.4434940031681376, "grad_norm": 0.6438231289467011, "learning_rate": 6.148403629026709e-06, "loss": 0.3425, "step": 9799 }, { "epoch": 0.4435392622765332, "grad_norm": 0.585216600771846, "learning_rate": 6.147690289561836e-06, "loss": 0.3322, "step": 9800 }, { "epoch": 0.4435845213849287, "grad_norm": 0.3512680335430706, "learning_rate": 6.146976925436625e-06, "loss": 0.4644, "step": 9801 }, { "epoch": 0.4436297804933243, "grad_norm": 0.6362038853976834, "learning_rate": 6.146263536666401e-06, "loss": 0.2967, "step": 9802 }, { "epoch": 0.44367503960171983, "grad_norm": 0.33783993682468194, "learning_rate": 6.145550123266496e-06, "loss": 0.4783, "step": 9803 }, { "epoch": 0.4437202987101154, "grad_norm": 0.3090639197217425, "learning_rate": 6.1448366852522346e-06, "loss": 0.4825, "step": 9804 }, { "epoch": 0.44376555781851096, "grad_norm": 0.583863872091518, "learning_rate": 6.144123222638952e-06, "loss": 0.3328, "step": 9805 }, { "epoch": 0.44381081692690655, "grad_norm": 0.6303943405691291, "learning_rate": 6.143409735441972e-06, "loss": 0.3285, "step": 9806 }, { "epoch": 0.4438560760353021, "grad_norm": 0.6284380418815342, "learning_rate": 6.1426962236766294e-06, "loss": 0.3571, "step": 9807 }, { "epoch": 0.4439013351436977, "grad_norm": 0.6136933151196827, "learning_rate": 6.141982687358255e-06, "loss": 0.3188, "step": 9808 }, { "epoch": 0.4439465942520932, "grad_norm": 0.6394782780168193, "learning_rate": 6.14126912650218e-06, "loss": 0.346, "step": 9809 }, { "epoch": 0.4439918533604888, "grad_norm": 0.3835124101312275, "learning_rate": 6.140555541123737e-06, "loss": 0.494, "step": 9810 }, { "epoch": 0.4440371124688844, "grad_norm": 0.6657903827089986, "learning_rate": 6.1398419312382575e-06, "loss": 0.3775, "step": 9811 }, { "epoch": 0.4440823715772799, "grad_norm": 0.6194909844891778, "learning_rate": 6.139128296861076e-06, "loss": 0.3312, "step": 9812 }, { "epoch": 0.4441276306856755, "grad_norm": 0.6453686197003785, "learning_rate": 6.138414638007526e-06, "loss": 0.3239, "step": 9813 }, { "epoch": 0.44417288979407105, "grad_norm": 0.6254271915732517, "learning_rate": 6.137700954692944e-06, "loss": 0.3028, "step": 9814 }, { "epoch": 0.44421814890246664, "grad_norm": 0.3586269129410446, "learning_rate": 6.136987246932658e-06, "loss": 0.4782, "step": 9815 }, { "epoch": 0.44426340801086217, "grad_norm": 0.6018532392814256, "learning_rate": 6.136273514742013e-06, "loss": 0.3272, "step": 9816 }, { "epoch": 0.44430866711925776, "grad_norm": 0.6227910130275708, "learning_rate": 6.135559758136337e-06, "loss": 0.3353, "step": 9817 }, { "epoch": 0.4443539262276533, "grad_norm": 0.6510502346855318, "learning_rate": 6.13484597713097e-06, "loss": 0.3647, "step": 9818 }, { "epoch": 0.4443991853360489, "grad_norm": 0.5901502996575975, "learning_rate": 6.134132171741247e-06, "loss": 0.3081, "step": 9819 }, { "epoch": 0.4444444444444444, "grad_norm": 0.6095995854076501, "learning_rate": 6.133418341982509e-06, "loss": 0.369, "step": 9820 }, { "epoch": 0.44448970355284, "grad_norm": 0.30644324654506866, "learning_rate": 6.132704487870091e-06, "loss": 0.4773, "step": 9821 }, { "epoch": 0.4445349626612356, "grad_norm": 0.5812312860977836, "learning_rate": 6.131990609419334e-06, "loss": 0.343, "step": 9822 }, { "epoch": 0.44458022176963113, "grad_norm": 0.6686231801887372, "learning_rate": 6.131276706645572e-06, "loss": 0.3077, "step": 9823 }, { "epoch": 0.4446254808780267, "grad_norm": 0.5833261560869663, "learning_rate": 6.130562779564151e-06, "loss": 0.3347, "step": 9824 }, { "epoch": 0.44467073998642226, "grad_norm": 0.7014683950261718, "learning_rate": 6.129848828190405e-06, "loss": 0.3275, "step": 9825 }, { "epoch": 0.44471599909481785, "grad_norm": 0.6994318413430156, "learning_rate": 6.129134852539682e-06, "loss": 0.3752, "step": 9826 }, { "epoch": 0.4447612582032134, "grad_norm": 0.6038864527395649, "learning_rate": 6.128420852627316e-06, "loss": 0.3189, "step": 9827 }, { "epoch": 0.444806517311609, "grad_norm": 0.6531328968033617, "learning_rate": 6.127706828468653e-06, "loss": 0.3217, "step": 9828 }, { "epoch": 0.4448517764200045, "grad_norm": 0.6903825925649602, "learning_rate": 6.126992780079032e-06, "loss": 0.3516, "step": 9829 }, { "epoch": 0.4448970355284001, "grad_norm": 0.6003467816513118, "learning_rate": 6.1262787074738e-06, "loss": 0.3222, "step": 9830 }, { "epoch": 0.44494229463679563, "grad_norm": 0.5740307584921875, "learning_rate": 6.125564610668294e-06, "loss": 0.3023, "step": 9831 }, { "epoch": 0.4449875537451912, "grad_norm": 0.33965468133552557, "learning_rate": 6.124850489677865e-06, "loss": 0.4725, "step": 9832 }, { "epoch": 0.44503281285358676, "grad_norm": 0.2948191948780353, "learning_rate": 6.1241363445178515e-06, "loss": 0.4507, "step": 9833 }, { "epoch": 0.44507807196198235, "grad_norm": 0.6054233565022529, "learning_rate": 6.1234221752036015e-06, "loss": 0.3551, "step": 9834 }, { "epoch": 0.44512333107037794, "grad_norm": 0.5959582579867868, "learning_rate": 6.122707981750458e-06, "loss": 0.3295, "step": 9835 }, { "epoch": 0.44516859017877347, "grad_norm": 0.6347395989261895, "learning_rate": 6.12199376417377e-06, "loss": 0.342, "step": 9836 }, { "epoch": 0.44521384928716906, "grad_norm": 0.6944384128632148, "learning_rate": 6.121279522488881e-06, "loss": 0.399, "step": 9837 }, { "epoch": 0.4452591083955646, "grad_norm": 1.8592269415278064, "learning_rate": 6.120565256711138e-06, "loss": 0.3245, "step": 9838 }, { "epoch": 0.4453043675039602, "grad_norm": 0.5513116467297371, "learning_rate": 6.11985096685589e-06, "loss": 0.4517, "step": 9839 }, { "epoch": 0.4453496266123557, "grad_norm": 0.5414042070571672, "learning_rate": 6.1191366529384845e-06, "loss": 0.4674, "step": 9840 }, { "epoch": 0.4453948857207513, "grad_norm": 0.6531904168417739, "learning_rate": 6.118422314974269e-06, "loss": 0.3548, "step": 9841 }, { "epoch": 0.44544014482914684, "grad_norm": 0.7155330778152321, "learning_rate": 6.117707952978593e-06, "loss": 0.3676, "step": 9842 }, { "epoch": 0.44548540393754243, "grad_norm": 0.820859464855041, "learning_rate": 6.116993566966807e-06, "loss": 0.3315, "step": 9843 }, { "epoch": 0.44553066304593797, "grad_norm": 0.33759141723286856, "learning_rate": 6.1162791569542576e-06, "loss": 0.4529, "step": 9844 }, { "epoch": 0.44557592215433356, "grad_norm": 0.6551364903781752, "learning_rate": 6.1155647229562994e-06, "loss": 0.3414, "step": 9845 }, { "epoch": 0.44562118126272915, "grad_norm": 0.7403036860239424, "learning_rate": 6.1148502649882805e-06, "loss": 0.3875, "step": 9846 }, { "epoch": 0.4456664403711247, "grad_norm": 0.7837013726439063, "learning_rate": 6.114135783065553e-06, "loss": 0.3745, "step": 9847 }, { "epoch": 0.4457116994795203, "grad_norm": 0.6477213100740911, "learning_rate": 6.113421277203471e-06, "loss": 0.407, "step": 9848 }, { "epoch": 0.4457569585879158, "grad_norm": 0.614216462619982, "learning_rate": 6.112706747417384e-06, "loss": 0.3495, "step": 9849 }, { "epoch": 0.4458022176963114, "grad_norm": 0.325186835765987, "learning_rate": 6.111992193722647e-06, "loss": 0.4765, "step": 9850 }, { "epoch": 0.44584747680470693, "grad_norm": 0.71102109767876, "learning_rate": 6.111277616134613e-06, "loss": 0.3754, "step": 9851 }, { "epoch": 0.4458927359131025, "grad_norm": 0.6230716349605854, "learning_rate": 6.1105630146686345e-06, "loss": 0.3431, "step": 9852 }, { "epoch": 0.44593799502149806, "grad_norm": 0.6124749160422528, "learning_rate": 6.109848389340071e-06, "loss": 0.2981, "step": 9853 }, { "epoch": 0.44598325412989365, "grad_norm": 0.6262606678189672, "learning_rate": 6.109133740164271e-06, "loss": 0.3268, "step": 9854 }, { "epoch": 0.4460285132382892, "grad_norm": 0.6227322143402623, "learning_rate": 6.108419067156595e-06, "loss": 0.3142, "step": 9855 }, { "epoch": 0.44607377234668477, "grad_norm": 0.651331240442703, "learning_rate": 6.1077043703323964e-06, "loss": 0.3388, "step": 9856 }, { "epoch": 0.44611903145508036, "grad_norm": 0.6447017477504052, "learning_rate": 6.106989649707034e-06, "loss": 0.3347, "step": 9857 }, { "epoch": 0.4461642905634759, "grad_norm": 1.0241416300529436, "learning_rate": 6.106274905295864e-06, "loss": 0.3285, "step": 9858 }, { "epoch": 0.4462095496718715, "grad_norm": 0.3596739071402451, "learning_rate": 6.105560137114244e-06, "loss": 0.4933, "step": 9859 }, { "epoch": 0.446254808780267, "grad_norm": 0.34139558368903, "learning_rate": 6.1048453451775305e-06, "loss": 0.4793, "step": 9860 }, { "epoch": 0.4463000678886626, "grad_norm": 0.6831938419121877, "learning_rate": 6.104130529501086e-06, "loss": 0.3348, "step": 9861 }, { "epoch": 0.44634532699705814, "grad_norm": 0.6882346739189739, "learning_rate": 6.103415690100265e-06, "loss": 0.3799, "step": 9862 }, { "epoch": 0.44639058610545373, "grad_norm": 0.3176810590962061, "learning_rate": 6.102700826990432e-06, "loss": 0.459, "step": 9863 }, { "epoch": 0.44643584521384927, "grad_norm": 0.6221193294243147, "learning_rate": 6.101985940186943e-06, "loss": 0.3489, "step": 9864 }, { "epoch": 0.44648110432224486, "grad_norm": 0.6180950453337573, "learning_rate": 6.101271029705163e-06, "loss": 0.3321, "step": 9865 }, { "epoch": 0.4465263634306404, "grad_norm": 0.6067883624713181, "learning_rate": 6.100556095560448e-06, "loss": 0.3362, "step": 9866 }, { "epoch": 0.446571622539036, "grad_norm": 0.6901054349850831, "learning_rate": 6.099841137768164e-06, "loss": 0.3529, "step": 9867 }, { "epoch": 0.4466168816474315, "grad_norm": 0.7772309288047123, "learning_rate": 6.099126156343672e-06, "loss": 0.3405, "step": 9868 }, { "epoch": 0.4466621407558271, "grad_norm": 0.6338037509155247, "learning_rate": 6.098411151302335e-06, "loss": 0.3095, "step": 9869 }, { "epoch": 0.4467073998642227, "grad_norm": 0.6478361223504291, "learning_rate": 6.097696122659515e-06, "loss": 0.365, "step": 9870 }, { "epoch": 0.44675265897261823, "grad_norm": 0.3435502929420577, "learning_rate": 6.096981070430577e-06, "loss": 0.5074, "step": 9871 }, { "epoch": 0.4467979180810138, "grad_norm": 0.5684308557539682, "learning_rate": 6.096265994630886e-06, "loss": 0.3604, "step": 9872 }, { "epoch": 0.44684317718940936, "grad_norm": 0.6546593740824457, "learning_rate": 6.095550895275803e-06, "loss": 0.3511, "step": 9873 }, { "epoch": 0.44688843629780495, "grad_norm": 0.6351718584179072, "learning_rate": 6.094835772380699e-06, "loss": 0.3389, "step": 9874 }, { "epoch": 0.4469336954062005, "grad_norm": 0.6333198074375345, "learning_rate": 6.094120625960934e-06, "loss": 0.3783, "step": 9875 }, { "epoch": 0.44697895451459607, "grad_norm": 0.28208508094965357, "learning_rate": 6.09340545603188e-06, "loss": 0.4683, "step": 9876 }, { "epoch": 0.4470242136229916, "grad_norm": 0.27034783904859405, "learning_rate": 6.092690262608899e-06, "loss": 0.463, "step": 9877 }, { "epoch": 0.4470694727313872, "grad_norm": 0.6585479271201585, "learning_rate": 6.091975045707361e-06, "loss": 0.3384, "step": 9878 }, { "epoch": 0.44711473183978273, "grad_norm": 0.3012915983187069, "learning_rate": 6.091259805342632e-06, "loss": 0.4755, "step": 9879 }, { "epoch": 0.4471599909481783, "grad_norm": 0.30539675156875673, "learning_rate": 6.0905445415300835e-06, "loss": 0.4655, "step": 9880 }, { "epoch": 0.4472052500565739, "grad_norm": 0.5853264373929327, "learning_rate": 6.089829254285079e-06, "loss": 0.3302, "step": 9881 }, { "epoch": 0.44725050916496945, "grad_norm": 0.27688497076430446, "learning_rate": 6.089113943622994e-06, "loss": 0.4778, "step": 9882 }, { "epoch": 0.44729576827336504, "grad_norm": 0.6386649390187925, "learning_rate": 6.088398609559193e-06, "loss": 0.3475, "step": 9883 }, { "epoch": 0.44734102738176057, "grad_norm": 0.67766272206694, "learning_rate": 6.08768325210905e-06, "loss": 0.3465, "step": 9884 }, { "epoch": 0.44738628649015616, "grad_norm": 0.5990159683409747, "learning_rate": 6.086967871287934e-06, "loss": 0.3466, "step": 9885 }, { "epoch": 0.4474315455985517, "grad_norm": 0.617226860730486, "learning_rate": 6.086252467111216e-06, "loss": 0.3293, "step": 9886 }, { "epoch": 0.4474768047069473, "grad_norm": 0.32740738856566126, "learning_rate": 6.0855370395942705e-06, "loss": 0.4634, "step": 9887 }, { "epoch": 0.4475220638153428, "grad_norm": 0.33128541122915056, "learning_rate": 6.0848215887524665e-06, "loss": 0.4757, "step": 9888 }, { "epoch": 0.4475673229237384, "grad_norm": 0.6868043705289181, "learning_rate": 6.084106114601178e-06, "loss": 0.3073, "step": 9889 }, { "epoch": 0.44761258203213394, "grad_norm": 0.6463951687803111, "learning_rate": 6.08339061715578e-06, "loss": 0.3492, "step": 9890 }, { "epoch": 0.44765784114052953, "grad_norm": 0.5876814576312829, "learning_rate": 6.082675096431645e-06, "loss": 0.2924, "step": 9891 }, { "epoch": 0.4477031002489251, "grad_norm": 0.6016861692698359, "learning_rate": 6.081959552444147e-06, "loss": 0.3545, "step": 9892 }, { "epoch": 0.44774835935732066, "grad_norm": 0.607095977406509, "learning_rate": 6.081243985208662e-06, "loss": 0.348, "step": 9893 }, { "epoch": 0.44779361846571625, "grad_norm": 0.6987791654284069, "learning_rate": 6.0805283947405625e-06, "loss": 0.3336, "step": 9894 }, { "epoch": 0.4478388775741118, "grad_norm": 0.33277908746130674, "learning_rate": 6.079812781055228e-06, "loss": 0.4934, "step": 9895 }, { "epoch": 0.44788413668250737, "grad_norm": 0.7554432633335122, "learning_rate": 6.0790971441680325e-06, "loss": 0.3706, "step": 9896 }, { "epoch": 0.4479293957909029, "grad_norm": 0.6070321705846707, "learning_rate": 6.078381484094353e-06, "loss": 0.3155, "step": 9897 }, { "epoch": 0.4479746548992985, "grad_norm": 0.2850215988806868, "learning_rate": 6.077665800849568e-06, "loss": 0.4661, "step": 9898 }, { "epoch": 0.44801991400769403, "grad_norm": 0.27157890571522925, "learning_rate": 6.076950094449055e-06, "loss": 0.4477, "step": 9899 }, { "epoch": 0.4480651731160896, "grad_norm": 0.637999409504055, "learning_rate": 6.076234364908192e-06, "loss": 0.3479, "step": 9900 }, { "epoch": 0.44811043222448516, "grad_norm": 0.7043538513070119, "learning_rate": 6.07551861224236e-06, "loss": 0.3614, "step": 9901 }, { "epoch": 0.44815569133288075, "grad_norm": 0.6120136354672181, "learning_rate": 6.074802836466932e-06, "loss": 0.3262, "step": 9902 }, { "epoch": 0.4482009504412763, "grad_norm": 0.6531903882646292, "learning_rate": 6.074087037597296e-06, "loss": 0.3485, "step": 9903 }, { "epoch": 0.44824620954967187, "grad_norm": 0.35229527558395135, "learning_rate": 6.073371215648824e-06, "loss": 0.4707, "step": 9904 }, { "epoch": 0.44829146865806746, "grad_norm": 0.6613695716254754, "learning_rate": 6.072655370636905e-06, "loss": 0.3817, "step": 9905 }, { "epoch": 0.448336727766463, "grad_norm": 0.6818675839553595, "learning_rate": 6.071939502576916e-06, "loss": 0.3511, "step": 9906 }, { "epoch": 0.4483819868748586, "grad_norm": 0.6641385200315042, "learning_rate": 6.071223611484238e-06, "loss": 0.3688, "step": 9907 }, { "epoch": 0.4484272459832541, "grad_norm": 0.6913005755156559, "learning_rate": 6.070507697374255e-06, "loss": 0.3579, "step": 9908 }, { "epoch": 0.4484725050916497, "grad_norm": 0.6397984850762364, "learning_rate": 6.06979176026235e-06, "loss": 0.3429, "step": 9909 }, { "epoch": 0.44851776420004524, "grad_norm": 0.5996746250995699, "learning_rate": 6.069075800163905e-06, "loss": 0.3481, "step": 9910 }, { "epoch": 0.44856302330844083, "grad_norm": 0.6204333710521447, "learning_rate": 6.068359817094305e-06, "loss": 0.3566, "step": 9911 }, { "epoch": 0.44860828241683637, "grad_norm": 0.68970625469837, "learning_rate": 6.067643811068933e-06, "loss": 0.3729, "step": 9912 }, { "epoch": 0.44865354152523196, "grad_norm": 0.3392200840779211, "learning_rate": 6.066927782103176e-06, "loss": 0.5064, "step": 9913 }, { "epoch": 0.4486988006336275, "grad_norm": 0.30088581321615016, "learning_rate": 6.066211730212416e-06, "loss": 0.5039, "step": 9914 }, { "epoch": 0.4487440597420231, "grad_norm": 0.28778244578525586, "learning_rate": 6.0654956554120415e-06, "loss": 0.4639, "step": 9915 }, { "epoch": 0.4487893188504187, "grad_norm": 0.6963911431712173, "learning_rate": 6.064779557717437e-06, "loss": 0.339, "step": 9916 }, { "epoch": 0.4488345779588142, "grad_norm": 0.6489724625248927, "learning_rate": 6.064063437143991e-06, "loss": 0.3543, "step": 9917 }, { "epoch": 0.4488798370672098, "grad_norm": 0.6240808389505063, "learning_rate": 6.063347293707089e-06, "loss": 0.3149, "step": 9918 }, { "epoch": 0.44892509617560533, "grad_norm": 0.622898892449197, "learning_rate": 6.06263112742212e-06, "loss": 0.3497, "step": 9919 }, { "epoch": 0.4489703552840009, "grad_norm": 0.6727813964875871, "learning_rate": 6.06191493830447e-06, "loss": 0.357, "step": 9920 }, { "epoch": 0.44901561439239646, "grad_norm": 0.6661519934507191, "learning_rate": 6.061198726369531e-06, "loss": 0.3959, "step": 9921 }, { "epoch": 0.44906087350079205, "grad_norm": 0.4142201447101892, "learning_rate": 6.060482491632692e-06, "loss": 0.4497, "step": 9922 }, { "epoch": 0.4491061326091876, "grad_norm": 0.393005672498883, "learning_rate": 6.0597662341093385e-06, "loss": 0.4966, "step": 9923 }, { "epoch": 0.44915139171758317, "grad_norm": 0.6824675162900966, "learning_rate": 6.059049953814866e-06, "loss": 0.3212, "step": 9924 }, { "epoch": 0.4491966508259787, "grad_norm": 0.6373071535052531, "learning_rate": 6.058333650764661e-06, "loss": 0.374, "step": 9925 }, { "epoch": 0.4492419099343743, "grad_norm": 0.3104536489191795, "learning_rate": 6.057617324974117e-06, "loss": 0.5098, "step": 9926 }, { "epoch": 0.4492871690427699, "grad_norm": 0.3229491305240693, "learning_rate": 6.056900976458624e-06, "loss": 0.4813, "step": 9927 }, { "epoch": 0.4493324281511654, "grad_norm": 0.30806458088232463, "learning_rate": 6.056184605233576e-06, "loss": 0.4662, "step": 9928 }, { "epoch": 0.449377687259561, "grad_norm": 0.28375591384829485, "learning_rate": 6.0554682113143634e-06, "loss": 0.473, "step": 9929 }, { "epoch": 0.44942294636795654, "grad_norm": 0.7300146494047709, "learning_rate": 6.054751794716383e-06, "loss": 0.3145, "step": 9930 }, { "epoch": 0.44946820547635213, "grad_norm": 0.6897056093316484, "learning_rate": 6.054035355455023e-06, "loss": 0.3321, "step": 9931 }, { "epoch": 0.44951346458474767, "grad_norm": 0.36312836939211174, "learning_rate": 6.053318893545683e-06, "loss": 0.47, "step": 9932 }, { "epoch": 0.44955872369314326, "grad_norm": 0.6457343812310226, "learning_rate": 6.052602409003752e-06, "loss": 0.336, "step": 9933 }, { "epoch": 0.4496039828015388, "grad_norm": 0.34105938676114667, "learning_rate": 6.051885901844631e-06, "loss": 0.4805, "step": 9934 }, { "epoch": 0.4496492419099344, "grad_norm": 0.675213898440973, "learning_rate": 6.0511693720837115e-06, "loss": 0.3401, "step": 9935 }, { "epoch": 0.4496945010183299, "grad_norm": 0.6437818053252721, "learning_rate": 6.05045281973639e-06, "loss": 0.3298, "step": 9936 }, { "epoch": 0.4497397601267255, "grad_norm": 0.28992887781932325, "learning_rate": 6.049736244818064e-06, "loss": 0.4943, "step": 9937 }, { "epoch": 0.44978501923512104, "grad_norm": 0.6648857665223082, "learning_rate": 6.049019647344131e-06, "loss": 0.3324, "step": 9938 }, { "epoch": 0.44983027834351663, "grad_norm": 0.5901260207845679, "learning_rate": 6.048303027329987e-06, "loss": 0.3156, "step": 9939 }, { "epoch": 0.4498755374519122, "grad_norm": 0.6111094970963682, "learning_rate": 6.047586384791031e-06, "loss": 0.3203, "step": 9940 }, { "epoch": 0.44992079656030776, "grad_norm": 0.6613587091807451, "learning_rate": 6.0468697197426595e-06, "loss": 0.3222, "step": 9941 }, { "epoch": 0.44996605566870335, "grad_norm": 0.6615717557607383, "learning_rate": 6.046153032200275e-06, "loss": 0.3667, "step": 9942 }, { "epoch": 0.4500113147770989, "grad_norm": 0.6185763797191234, "learning_rate": 6.045436322179274e-06, "loss": 0.2944, "step": 9943 }, { "epoch": 0.45005657388549447, "grad_norm": 0.5794541395057855, "learning_rate": 6.044719589695056e-06, "loss": 0.3209, "step": 9944 }, { "epoch": 0.45010183299389, "grad_norm": 0.6453103902286667, "learning_rate": 6.044002834763023e-06, "loss": 0.3123, "step": 9945 }, { "epoch": 0.4501470921022856, "grad_norm": 0.3764726932362269, "learning_rate": 6.043286057398576e-06, "loss": 0.4973, "step": 9946 }, { "epoch": 0.45019235121068113, "grad_norm": 0.5987607755953244, "learning_rate": 6.042569257617117e-06, "loss": 0.2867, "step": 9947 }, { "epoch": 0.4502376103190767, "grad_norm": 0.6875835265469559, "learning_rate": 6.041852435434044e-06, "loss": 0.3281, "step": 9948 }, { "epoch": 0.45028286942747225, "grad_norm": 0.31049192884635557, "learning_rate": 6.041135590864764e-06, "loss": 0.4826, "step": 9949 }, { "epoch": 0.45032812853586784, "grad_norm": 0.6758650923785007, "learning_rate": 6.040418723924677e-06, "loss": 0.3443, "step": 9950 }, { "epoch": 0.45037338764426343, "grad_norm": 0.28328861483255147, "learning_rate": 6.039701834629189e-06, "loss": 0.452, "step": 9951 }, { "epoch": 0.45041864675265897, "grad_norm": 0.6284360416631046, "learning_rate": 6.0389849229936995e-06, "loss": 0.3517, "step": 9952 }, { "epoch": 0.45046390586105456, "grad_norm": 0.654522012389242, "learning_rate": 6.038267989033616e-06, "loss": 0.3349, "step": 9953 }, { "epoch": 0.4505091649694501, "grad_norm": 0.6509169037023864, "learning_rate": 6.03755103276434e-06, "loss": 0.36, "step": 9954 }, { "epoch": 0.4505544240778457, "grad_norm": 0.613634808440209, "learning_rate": 6.036834054201283e-06, "loss": 0.3259, "step": 9955 }, { "epoch": 0.4505996831862412, "grad_norm": 0.6591701614555701, "learning_rate": 6.036117053359844e-06, "loss": 0.3353, "step": 9956 }, { "epoch": 0.4506449422946368, "grad_norm": 0.6706779719525753, "learning_rate": 6.035400030255431e-06, "loss": 0.3627, "step": 9957 }, { "epoch": 0.45069020140303234, "grad_norm": 0.5676917733584951, "learning_rate": 6.034682984903453e-06, "loss": 0.3374, "step": 9958 }, { "epoch": 0.45073546051142793, "grad_norm": 0.6145185773045998, "learning_rate": 6.0339659173193146e-06, "loss": 0.3101, "step": 9959 }, { "epoch": 0.45078071961982347, "grad_norm": 0.6095296510631012, "learning_rate": 6.033248827518424e-06, "loss": 0.3428, "step": 9960 }, { "epoch": 0.45082597872821906, "grad_norm": 0.9770970068781155, "learning_rate": 6.032531715516191e-06, "loss": 0.3494, "step": 9961 }, { "epoch": 0.4508712378366146, "grad_norm": 0.6038847607884669, "learning_rate": 6.03181458132802e-06, "loss": 0.3174, "step": 9962 }, { "epoch": 0.4509164969450102, "grad_norm": 0.6458052103580482, "learning_rate": 6.031097424969326e-06, "loss": 0.3378, "step": 9963 }, { "epoch": 0.45096175605340577, "grad_norm": 0.6262492290191575, "learning_rate": 6.030380246455513e-06, "loss": 0.3476, "step": 9964 }, { "epoch": 0.4510070151618013, "grad_norm": 0.6787994342237069, "learning_rate": 6.0296630458019925e-06, "loss": 0.3662, "step": 9965 }, { "epoch": 0.4510522742701969, "grad_norm": 0.47006450266382405, "learning_rate": 6.028945823024176e-06, "loss": 0.4631, "step": 9966 }, { "epoch": 0.45109753337859243, "grad_norm": 0.6496704316985095, "learning_rate": 6.0282285781374746e-06, "loss": 0.3742, "step": 9967 }, { "epoch": 0.451142792486988, "grad_norm": 0.3344543499700861, "learning_rate": 6.027511311157298e-06, "loss": 0.4825, "step": 9968 }, { "epoch": 0.45118805159538355, "grad_norm": 0.6464649385526088, "learning_rate": 6.026794022099061e-06, "loss": 0.3752, "step": 9969 }, { "epoch": 0.45123331070377914, "grad_norm": 0.6656227654498554, "learning_rate": 6.026076710978172e-06, "loss": 0.34, "step": 9970 }, { "epoch": 0.4512785698121747, "grad_norm": 0.3244651461172815, "learning_rate": 6.0253593778100475e-06, "loss": 0.4541, "step": 9971 }, { "epoch": 0.45132382892057027, "grad_norm": 0.631254794194664, "learning_rate": 6.0246420226100976e-06, "loss": 0.3385, "step": 9972 }, { "epoch": 0.4513690880289658, "grad_norm": 0.6291399810343311, "learning_rate": 6.023924645393739e-06, "loss": 0.3506, "step": 9973 }, { "epoch": 0.4514143471373614, "grad_norm": 0.6180885336147697, "learning_rate": 6.023207246176383e-06, "loss": 0.3311, "step": 9974 }, { "epoch": 0.451459606245757, "grad_norm": 0.6788040399146853, "learning_rate": 6.0224898249734466e-06, "loss": 0.3407, "step": 9975 }, { "epoch": 0.4515048653541525, "grad_norm": 0.7289625570092265, "learning_rate": 6.021772381800344e-06, "loss": 0.3521, "step": 9976 }, { "epoch": 0.4515501244625481, "grad_norm": 0.6269926510587083, "learning_rate": 6.021054916672491e-06, "loss": 0.3814, "step": 9977 }, { "epoch": 0.45159538357094364, "grad_norm": 0.6911981045719757, "learning_rate": 6.020337429605304e-06, "loss": 0.3716, "step": 9978 }, { "epoch": 0.45164064267933923, "grad_norm": 0.6034806566182572, "learning_rate": 6.019619920614199e-06, "loss": 0.2915, "step": 9979 }, { "epoch": 0.45168590178773477, "grad_norm": 0.62852179758487, "learning_rate": 6.0189023897145944e-06, "loss": 0.3452, "step": 9980 }, { "epoch": 0.45173116089613036, "grad_norm": 0.647496318788268, "learning_rate": 6.0181848369219055e-06, "loss": 0.3315, "step": 9981 }, { "epoch": 0.4517764200045259, "grad_norm": 0.6219216916623668, "learning_rate": 6.017467262251553e-06, "loss": 0.3492, "step": 9982 }, { "epoch": 0.4518216791129215, "grad_norm": 0.5964205467527914, "learning_rate": 6.016749665718953e-06, "loss": 0.284, "step": 9983 }, { "epoch": 0.451866938221317, "grad_norm": 0.6214559322845244, "learning_rate": 6.016032047339526e-06, "loss": 0.3743, "step": 9984 }, { "epoch": 0.4519121973297126, "grad_norm": 0.5930174872567172, "learning_rate": 6.01531440712869e-06, "loss": 0.3482, "step": 9985 }, { "epoch": 0.4519574564381082, "grad_norm": 0.6644054531840476, "learning_rate": 6.014596745101866e-06, "loss": 0.3815, "step": 9986 }, { "epoch": 0.45200271554650373, "grad_norm": 0.6062908315932274, "learning_rate": 6.0138790612744746e-06, "loss": 0.3439, "step": 9987 }, { "epoch": 0.4520479746548993, "grad_norm": 0.6729276946757002, "learning_rate": 6.013161355661935e-06, "loss": 0.3537, "step": 9988 }, { "epoch": 0.45209323376329485, "grad_norm": 0.7996378503409862, "learning_rate": 6.01244362827967e-06, "loss": 0.3689, "step": 9989 }, { "epoch": 0.45213849287169044, "grad_norm": 0.6228845676127899, "learning_rate": 6.011725879143102e-06, "loss": 0.3175, "step": 9990 }, { "epoch": 0.452183751980086, "grad_norm": 0.6135257766245439, "learning_rate": 6.01100810826765e-06, "loss": 0.3412, "step": 9991 }, { "epoch": 0.45222901108848157, "grad_norm": 0.6441029814830849, "learning_rate": 6.0102903156687406e-06, "loss": 0.328, "step": 9992 }, { "epoch": 0.4522742701968771, "grad_norm": 0.6892365356820823, "learning_rate": 6.009572501361794e-06, "loss": 0.3718, "step": 9993 }, { "epoch": 0.4523195293052727, "grad_norm": 0.6175618766242349, "learning_rate": 6.008854665362236e-06, "loss": 0.346, "step": 9994 }, { "epoch": 0.45236478841366823, "grad_norm": 0.6940736989878789, "learning_rate": 6.00813680768549e-06, "loss": 0.3737, "step": 9995 }, { "epoch": 0.4524100475220638, "grad_norm": 0.6413699942659871, "learning_rate": 6.007418928346979e-06, "loss": 0.3341, "step": 9996 }, { "epoch": 0.45245530663045935, "grad_norm": 0.46958215559153266, "learning_rate": 6.0067010273621295e-06, "loss": 0.5034, "step": 9997 }, { "epoch": 0.45250056573885494, "grad_norm": 0.6344552552649131, "learning_rate": 6.005983104746367e-06, "loss": 0.3008, "step": 9998 }, { "epoch": 0.45254582484725053, "grad_norm": 0.6182104080609797, "learning_rate": 6.005265160515117e-06, "loss": 0.3428, "step": 9999 }, { "epoch": 0.45259108395564607, "grad_norm": 0.6217252706723825, "learning_rate": 6.004547194683806e-06, "loss": 0.3569, "step": 10000 }, { "epoch": 0.45263634306404166, "grad_norm": 0.6887992737610831, "learning_rate": 6.003829207267863e-06, "loss": 0.3729, "step": 10001 }, { "epoch": 0.4526816021724372, "grad_norm": 0.6410925267438082, "learning_rate": 6.00311119828271e-06, "loss": 0.3274, "step": 10002 }, { "epoch": 0.4527268612808328, "grad_norm": 0.6721644066136669, "learning_rate": 6.002393167743782e-06, "loss": 0.359, "step": 10003 }, { "epoch": 0.4527721203892283, "grad_norm": 0.6646472727003765, "learning_rate": 6.001675115666501e-06, "loss": 0.3413, "step": 10004 }, { "epoch": 0.4528173794976239, "grad_norm": 0.6296735501659243, "learning_rate": 6.000957042066299e-06, "loss": 0.3476, "step": 10005 }, { "epoch": 0.45286263860601944, "grad_norm": 0.6067665919217092, "learning_rate": 6.0002389469586035e-06, "loss": 0.3066, "step": 10006 }, { "epoch": 0.45290789771441503, "grad_norm": 0.39061303340342207, "learning_rate": 5.999520830358845e-06, "loss": 0.493, "step": 10007 }, { "epoch": 0.45295315682281057, "grad_norm": 0.7134542623683294, "learning_rate": 5.998802692282454e-06, "loss": 0.3357, "step": 10008 }, { "epoch": 0.45299841593120616, "grad_norm": 0.7105305038574864, "learning_rate": 5.998084532744861e-06, "loss": 0.3566, "step": 10009 }, { "epoch": 0.45304367503960175, "grad_norm": 0.6754264853847363, "learning_rate": 5.997366351761497e-06, "loss": 0.3439, "step": 10010 }, { "epoch": 0.4530889341479973, "grad_norm": 0.6172224892039797, "learning_rate": 5.996648149347794e-06, "loss": 0.3478, "step": 10011 }, { "epoch": 0.45313419325639287, "grad_norm": 0.6040767266365203, "learning_rate": 5.995929925519181e-06, "loss": 0.359, "step": 10012 }, { "epoch": 0.4531794523647884, "grad_norm": 0.6136311395265972, "learning_rate": 5.9952116802910945e-06, "loss": 0.3322, "step": 10013 }, { "epoch": 0.453224711473184, "grad_norm": 0.6227029274185454, "learning_rate": 5.994493413678964e-06, "loss": 0.3624, "step": 10014 }, { "epoch": 0.45326997058157953, "grad_norm": 0.3313337340020232, "learning_rate": 5.993775125698226e-06, "loss": 0.4818, "step": 10015 }, { "epoch": 0.4533152296899751, "grad_norm": 0.5690430086005838, "learning_rate": 5.993056816364312e-06, "loss": 0.3081, "step": 10016 }, { "epoch": 0.45336048879837065, "grad_norm": 0.6301972050072376, "learning_rate": 5.992338485692657e-06, "loss": 0.4077, "step": 10017 }, { "epoch": 0.45340574790676624, "grad_norm": 0.28937216440665076, "learning_rate": 5.991620133698694e-06, "loss": 0.4723, "step": 10018 }, { "epoch": 0.4534510070151618, "grad_norm": 0.4170841645776839, "learning_rate": 5.990901760397863e-06, "loss": 0.4923, "step": 10019 }, { "epoch": 0.45349626612355737, "grad_norm": 0.626958759180911, "learning_rate": 5.990183365805594e-06, "loss": 0.3438, "step": 10020 }, { "epoch": 0.45354152523195296, "grad_norm": 0.7145696132172378, "learning_rate": 5.989464949937328e-06, "loss": 0.3539, "step": 10021 }, { "epoch": 0.4535867843403485, "grad_norm": 0.28916337801422026, "learning_rate": 5.988746512808497e-06, "loss": 0.4736, "step": 10022 }, { "epoch": 0.4536320434487441, "grad_norm": 0.8029091456258143, "learning_rate": 5.988028054434542e-06, "loss": 0.3466, "step": 10023 }, { "epoch": 0.4536773025571396, "grad_norm": 0.660620246553375, "learning_rate": 5.987309574830897e-06, "loss": 0.3498, "step": 10024 }, { "epoch": 0.4537225616655352, "grad_norm": 0.6306767920116505, "learning_rate": 5.986591074013002e-06, "loss": 0.3371, "step": 10025 }, { "epoch": 0.45376782077393074, "grad_norm": 0.6301389145434231, "learning_rate": 5.985872551996294e-06, "loss": 0.3572, "step": 10026 }, { "epoch": 0.45381307988232633, "grad_norm": 0.653586722565106, "learning_rate": 5.9851540087962134e-06, "loss": 0.3909, "step": 10027 }, { "epoch": 0.45385833899072187, "grad_norm": 0.7102396128575281, "learning_rate": 5.984435444428199e-06, "loss": 0.3169, "step": 10028 }, { "epoch": 0.45390359809911746, "grad_norm": 0.31410983897179384, "learning_rate": 5.9837168589076915e-06, "loss": 0.4836, "step": 10029 }, { "epoch": 0.453948857207513, "grad_norm": 0.6940641462750593, "learning_rate": 5.982998252250127e-06, "loss": 0.3427, "step": 10030 }, { "epoch": 0.4539941163159086, "grad_norm": 0.829965545364354, "learning_rate": 5.982279624470951e-06, "loss": 0.3049, "step": 10031 }, { "epoch": 0.4540393754243041, "grad_norm": 0.32688203077652456, "learning_rate": 5.981560975585604e-06, "loss": 0.4807, "step": 10032 }, { "epoch": 0.4540846345326997, "grad_norm": 0.6398806724095104, "learning_rate": 5.980842305609524e-06, "loss": 0.3039, "step": 10033 }, { "epoch": 0.4541298936410953, "grad_norm": 3.293531905029241, "learning_rate": 5.9801236145581575e-06, "loss": 0.369, "step": 10034 }, { "epoch": 0.45417515274949083, "grad_norm": 0.5979734329060953, "learning_rate": 5.979404902446944e-06, "loss": 0.3643, "step": 10035 }, { "epoch": 0.4542204118578864, "grad_norm": 0.6247016742930394, "learning_rate": 5.978686169291325e-06, "loss": 0.3453, "step": 10036 }, { "epoch": 0.45426567096628195, "grad_norm": 0.8567027597175304, "learning_rate": 5.977967415106748e-06, "loss": 0.3212, "step": 10037 }, { "epoch": 0.45431093007467754, "grad_norm": 0.6229950767660342, "learning_rate": 5.977248639908655e-06, "loss": 0.3437, "step": 10038 }, { "epoch": 0.4543561891830731, "grad_norm": 0.3089428583891721, "learning_rate": 5.976529843712489e-06, "loss": 0.4906, "step": 10039 }, { "epoch": 0.45440144829146867, "grad_norm": 0.6260978746591849, "learning_rate": 5.975811026533698e-06, "loss": 0.3332, "step": 10040 }, { "epoch": 0.4544467073998642, "grad_norm": 0.838058394573743, "learning_rate": 5.975092188387722e-06, "loss": 0.3336, "step": 10041 }, { "epoch": 0.4544919665082598, "grad_norm": 0.7191431229815619, "learning_rate": 5.974373329290012e-06, "loss": 0.3577, "step": 10042 }, { "epoch": 0.4545372256166553, "grad_norm": 1.3640864595801048, "learning_rate": 5.97365444925601e-06, "loss": 0.314, "step": 10043 }, { "epoch": 0.4545824847250509, "grad_norm": 0.6553645603471241, "learning_rate": 5.972935548301165e-06, "loss": 0.3784, "step": 10044 }, { "epoch": 0.4546277438334465, "grad_norm": 0.6919586894797458, "learning_rate": 5.972216626440923e-06, "loss": 0.3119, "step": 10045 }, { "epoch": 0.45467300294184204, "grad_norm": 0.6058196014582589, "learning_rate": 5.971497683690732e-06, "loss": 0.3095, "step": 10046 }, { "epoch": 0.45471826205023763, "grad_norm": 0.5859664888367128, "learning_rate": 5.970778720066039e-06, "loss": 0.3469, "step": 10047 }, { "epoch": 0.45476352115863317, "grad_norm": 0.6679169505616824, "learning_rate": 5.970059735582295e-06, "loss": 0.3582, "step": 10048 }, { "epoch": 0.45480878026702876, "grad_norm": 0.6748470447738677, "learning_rate": 5.969340730254943e-06, "loss": 0.3352, "step": 10049 }, { "epoch": 0.4548540393754243, "grad_norm": 0.6253274352764578, "learning_rate": 5.96862170409944e-06, "loss": 0.3526, "step": 10050 }, { "epoch": 0.4548992984838199, "grad_norm": 0.3514815605156929, "learning_rate": 5.967902657131228e-06, "loss": 0.5057, "step": 10051 }, { "epoch": 0.4549445575922154, "grad_norm": 0.6386680369414963, "learning_rate": 5.967183589365761e-06, "loss": 0.3268, "step": 10052 }, { "epoch": 0.454989816700611, "grad_norm": 0.6423595813623103, "learning_rate": 5.96646450081849e-06, "loss": 0.3392, "step": 10053 }, { "epoch": 0.45503507580900654, "grad_norm": 0.5911899645875737, "learning_rate": 5.965745391504866e-06, "loss": 0.3584, "step": 10054 }, { "epoch": 0.45508033491740213, "grad_norm": 0.6756603866248126, "learning_rate": 5.965026261440338e-06, "loss": 0.3638, "step": 10055 }, { "epoch": 0.4551255940257977, "grad_norm": 0.5730749821872515, "learning_rate": 5.964307110640359e-06, "loss": 0.2995, "step": 10056 }, { "epoch": 0.45517085313419325, "grad_norm": 0.647423316803012, "learning_rate": 5.963587939120383e-06, "loss": 0.3, "step": 10057 }, { "epoch": 0.45521611224258884, "grad_norm": 0.43464516641582995, "learning_rate": 5.962868746895863e-06, "loss": 0.5188, "step": 10058 }, { "epoch": 0.4552613713509844, "grad_norm": 0.6385870718981455, "learning_rate": 5.962149533982249e-06, "loss": 0.3265, "step": 10059 }, { "epoch": 0.45530663045937997, "grad_norm": 0.586098357918665, "learning_rate": 5.961430300394996e-06, "loss": 0.3227, "step": 10060 }, { "epoch": 0.4553518895677755, "grad_norm": 0.6571832827328082, "learning_rate": 5.960711046149561e-06, "loss": 0.324, "step": 10061 }, { "epoch": 0.4553971486761711, "grad_norm": 0.6963240291123793, "learning_rate": 5.959991771261393e-06, "loss": 0.3186, "step": 10062 }, { "epoch": 0.4554424077845666, "grad_norm": 0.675978472481428, "learning_rate": 5.959272475745953e-06, "loss": 0.3183, "step": 10063 }, { "epoch": 0.4554876668929622, "grad_norm": 0.6799292075558153, "learning_rate": 5.958553159618693e-06, "loss": 0.3561, "step": 10064 }, { "epoch": 0.45553292600135775, "grad_norm": 0.6488099183480327, "learning_rate": 5.957833822895069e-06, "loss": 0.327, "step": 10065 }, { "epoch": 0.45557818510975334, "grad_norm": 0.6392367712514024, "learning_rate": 5.957114465590537e-06, "loss": 0.3028, "step": 10066 }, { "epoch": 0.4556234442181489, "grad_norm": 0.650807133681978, "learning_rate": 5.9563950877205564e-06, "loss": 0.3145, "step": 10067 }, { "epoch": 0.45566870332654447, "grad_norm": 0.35525035348306405, "learning_rate": 5.955675689300583e-06, "loss": 0.4869, "step": 10068 }, { "epoch": 0.45571396243494006, "grad_norm": 0.3131661760499863, "learning_rate": 5.954956270346074e-06, "loss": 0.4686, "step": 10069 }, { "epoch": 0.4557592215433356, "grad_norm": 0.28290393614239373, "learning_rate": 5.954236830872486e-06, "loss": 0.4722, "step": 10070 }, { "epoch": 0.4558044806517312, "grad_norm": 0.6734508988009212, "learning_rate": 5.953517370895281e-06, "loss": 0.3424, "step": 10071 }, { "epoch": 0.4558497397601267, "grad_norm": 0.6159152425039114, "learning_rate": 5.9527978904299156e-06, "loss": 0.3573, "step": 10072 }, { "epoch": 0.4558949988685223, "grad_norm": 0.40209035644627683, "learning_rate": 5.952078389491849e-06, "loss": 0.4681, "step": 10073 }, { "epoch": 0.45594025797691784, "grad_norm": 0.3838006111097636, "learning_rate": 5.951358868096543e-06, "loss": 0.4741, "step": 10074 }, { "epoch": 0.45598551708531343, "grad_norm": 0.5806324459925772, "learning_rate": 5.950639326259456e-06, "loss": 0.3061, "step": 10075 }, { "epoch": 0.45603077619370896, "grad_norm": 0.2915926212204432, "learning_rate": 5.949919763996049e-06, "loss": 0.4833, "step": 10076 }, { "epoch": 0.45607603530210455, "grad_norm": 0.6698235355513487, "learning_rate": 5.949200181321785e-06, "loss": 0.2768, "step": 10077 }, { "epoch": 0.4561212944105001, "grad_norm": 0.661875473907528, "learning_rate": 5.948480578252124e-06, "loss": 0.3365, "step": 10078 }, { "epoch": 0.4561665535188957, "grad_norm": 0.7092775293804304, "learning_rate": 5.9477609548025295e-06, "loss": 0.334, "step": 10079 }, { "epoch": 0.45621181262729127, "grad_norm": 0.6381995964901996, "learning_rate": 5.9470413109884605e-06, "loss": 0.3144, "step": 10080 }, { "epoch": 0.4562570717356868, "grad_norm": 0.7031847456684301, "learning_rate": 5.946321646825385e-06, "loss": 0.3292, "step": 10081 }, { "epoch": 0.4563023308440824, "grad_norm": 0.6255334778708324, "learning_rate": 5.945601962328762e-06, "loss": 0.3261, "step": 10082 }, { "epoch": 0.45634758995247793, "grad_norm": 0.680176425307315, "learning_rate": 5.9448822575140575e-06, "loss": 0.3444, "step": 10083 }, { "epoch": 0.4563928490608735, "grad_norm": 0.5395328200815813, "learning_rate": 5.944162532396735e-06, "loss": 0.4819, "step": 10084 }, { "epoch": 0.45643810816926905, "grad_norm": 0.3849787355614753, "learning_rate": 5.94344278699226e-06, "loss": 0.5027, "step": 10085 }, { "epoch": 0.45648336727766464, "grad_norm": 0.6424042333189114, "learning_rate": 5.942723021316096e-06, "loss": 0.3238, "step": 10086 }, { "epoch": 0.4565286263860602, "grad_norm": 0.31396481615912764, "learning_rate": 5.94200323538371e-06, "loss": 0.4707, "step": 10087 }, { "epoch": 0.45657388549445577, "grad_norm": 0.6379754659076351, "learning_rate": 5.941283429210568e-06, "loss": 0.3282, "step": 10088 }, { "epoch": 0.4566191446028513, "grad_norm": 0.80606623399743, "learning_rate": 5.940563602812136e-06, "loss": 0.3359, "step": 10089 }, { "epoch": 0.4566644037112469, "grad_norm": 0.5049334906739511, "learning_rate": 5.939843756203881e-06, "loss": 0.5133, "step": 10090 }, { "epoch": 0.4567096628196424, "grad_norm": 0.4405229130785534, "learning_rate": 5.939123889401269e-06, "loss": 0.4887, "step": 10091 }, { "epoch": 0.456754921928038, "grad_norm": 0.6133949372009695, "learning_rate": 5.9384040024197706e-06, "loss": 0.3418, "step": 10092 }, { "epoch": 0.4568001810364336, "grad_norm": 0.6528224932137077, "learning_rate": 5.937684095274852e-06, "loss": 0.2896, "step": 10093 }, { "epoch": 0.45684544014482914, "grad_norm": 0.6773183075316381, "learning_rate": 5.9369641679819825e-06, "loss": 0.3126, "step": 10094 }, { "epoch": 0.45689069925322473, "grad_norm": 0.6173430889617483, "learning_rate": 5.936244220556629e-06, "loss": 0.2843, "step": 10095 }, { "epoch": 0.45693595836162026, "grad_norm": 0.6545161701377518, "learning_rate": 5.935524253014263e-06, "loss": 0.3498, "step": 10096 }, { "epoch": 0.45698121747001585, "grad_norm": 0.6926825859793615, "learning_rate": 5.934804265370355e-06, "loss": 0.3623, "step": 10097 }, { "epoch": 0.4570264765784114, "grad_norm": 0.6709432458048451, "learning_rate": 5.934084257640374e-06, "loss": 0.3769, "step": 10098 }, { "epoch": 0.457071735686807, "grad_norm": 0.7025990623404197, "learning_rate": 5.933364229839791e-06, "loss": 0.4754, "step": 10099 }, { "epoch": 0.4571169947952025, "grad_norm": 0.6522116942364439, "learning_rate": 5.9326441819840785e-06, "loss": 0.3262, "step": 10100 }, { "epoch": 0.4571622539035981, "grad_norm": 0.7262529789467604, "learning_rate": 5.931924114088704e-06, "loss": 0.3285, "step": 10101 }, { "epoch": 0.45720751301199364, "grad_norm": 0.6095318022692825, "learning_rate": 5.931204026169146e-06, "loss": 0.2971, "step": 10102 }, { "epoch": 0.45725277212038923, "grad_norm": 0.6440862160573638, "learning_rate": 5.930483918240871e-06, "loss": 0.3, "step": 10103 }, { "epoch": 0.4572980312287848, "grad_norm": 0.3511639562008092, "learning_rate": 5.929763790319355e-06, "loss": 0.4633, "step": 10104 }, { "epoch": 0.45734329033718035, "grad_norm": 0.6597290674517585, "learning_rate": 5.929043642420072e-06, "loss": 0.3472, "step": 10105 }, { "epoch": 0.45738854944557594, "grad_norm": 0.6926020451172388, "learning_rate": 5.928323474558492e-06, "loss": 0.3625, "step": 10106 }, { "epoch": 0.4574338085539715, "grad_norm": 0.6236702207782863, "learning_rate": 5.9276032867500935e-06, "loss": 0.3357, "step": 10107 }, { "epoch": 0.45747906766236707, "grad_norm": 0.6585100389290004, "learning_rate": 5.926883079010348e-06, "loss": 0.3405, "step": 10108 }, { "epoch": 0.4575243267707626, "grad_norm": 0.6536198954693773, "learning_rate": 5.926162851354733e-06, "loss": 0.3067, "step": 10109 }, { "epoch": 0.4575695858791582, "grad_norm": 0.6709630553993108, "learning_rate": 5.925442603798721e-06, "loss": 0.3512, "step": 10110 }, { "epoch": 0.4576148449875537, "grad_norm": 0.6357049472328576, "learning_rate": 5.924722336357793e-06, "loss": 0.3776, "step": 10111 }, { "epoch": 0.4576601040959493, "grad_norm": 0.6401419572145257, "learning_rate": 5.924002049047419e-06, "loss": 0.3589, "step": 10112 }, { "epoch": 0.45770536320434485, "grad_norm": 0.6412345227285043, "learning_rate": 5.92328174188308e-06, "loss": 0.3536, "step": 10113 }, { "epoch": 0.45775062231274044, "grad_norm": 0.35092114987906203, "learning_rate": 5.922561414880253e-06, "loss": 0.4642, "step": 10114 }, { "epoch": 0.45779588142113603, "grad_norm": 0.3163893146299063, "learning_rate": 5.9218410680544135e-06, "loss": 0.467, "step": 10115 }, { "epoch": 0.45784114052953157, "grad_norm": 0.6642074094805996, "learning_rate": 5.92112070142104e-06, "loss": 0.3683, "step": 10116 }, { "epoch": 0.45788639963792716, "grad_norm": 0.6728589957006714, "learning_rate": 5.920400314995612e-06, "loss": 0.3682, "step": 10117 }, { "epoch": 0.4579316587463227, "grad_norm": 0.6188594622706161, "learning_rate": 5.919679908793609e-06, "loss": 0.353, "step": 10118 }, { "epoch": 0.4579769178547183, "grad_norm": 0.6152141100731404, "learning_rate": 5.91895948283051e-06, "loss": 0.3518, "step": 10119 }, { "epoch": 0.4580221769631138, "grad_norm": 1.3605112239088757, "learning_rate": 5.918239037121791e-06, "loss": 0.3551, "step": 10120 }, { "epoch": 0.4580674360715094, "grad_norm": 0.6493685868114359, "learning_rate": 5.917518571682938e-06, "loss": 0.3611, "step": 10121 }, { "epoch": 0.45811269517990494, "grad_norm": 0.6374143410362819, "learning_rate": 5.9167980865294285e-06, "loss": 0.3527, "step": 10122 }, { "epoch": 0.45815795428830053, "grad_norm": 0.6177615252904042, "learning_rate": 5.916077581676743e-06, "loss": 0.3147, "step": 10123 }, { "epoch": 0.45820321339669606, "grad_norm": 0.6017872906281839, "learning_rate": 5.915357057140364e-06, "loss": 0.33, "step": 10124 }, { "epoch": 0.45824847250509165, "grad_norm": 2.520985279055733, "learning_rate": 5.914636512935773e-06, "loss": 0.2914, "step": 10125 }, { "epoch": 0.4582937316134872, "grad_norm": 0.6258472550396663, "learning_rate": 5.913915949078453e-06, "loss": 0.3159, "step": 10126 }, { "epoch": 0.4583389907218828, "grad_norm": 0.6200775670641495, "learning_rate": 5.913195365583886e-06, "loss": 0.3225, "step": 10127 }, { "epoch": 0.45838424983027837, "grad_norm": 0.6198459344095034, "learning_rate": 5.912474762467554e-06, "loss": 0.3223, "step": 10128 }, { "epoch": 0.4584295089386739, "grad_norm": 0.663417256125671, "learning_rate": 5.911754139744944e-06, "loss": 0.3457, "step": 10129 }, { "epoch": 0.4584747680470695, "grad_norm": 0.6357687156435802, "learning_rate": 5.911033497431535e-06, "loss": 0.3483, "step": 10130 }, { "epoch": 0.458520027155465, "grad_norm": 0.6808329048633953, "learning_rate": 5.910312835542818e-06, "loss": 0.3519, "step": 10131 }, { "epoch": 0.4585652862638606, "grad_norm": 0.6184081392584136, "learning_rate": 5.909592154094272e-06, "loss": 0.3417, "step": 10132 }, { "epoch": 0.45861054537225615, "grad_norm": 0.8011839566403799, "learning_rate": 5.908871453101382e-06, "loss": 0.3706, "step": 10133 }, { "epoch": 0.45865580448065174, "grad_norm": 0.6182329034996796, "learning_rate": 5.908150732579638e-06, "loss": 0.3616, "step": 10134 }, { "epoch": 0.4587010635890473, "grad_norm": 0.5550657593697929, "learning_rate": 5.907429992544524e-06, "loss": 0.4797, "step": 10135 }, { "epoch": 0.45874632269744287, "grad_norm": 0.6363540464766801, "learning_rate": 5.906709233011526e-06, "loss": 0.2912, "step": 10136 }, { "epoch": 0.4587915818058384, "grad_norm": 0.6775860869864486, "learning_rate": 5.905988453996132e-06, "loss": 0.3403, "step": 10137 }, { "epoch": 0.458836840914234, "grad_norm": 0.677654388601567, "learning_rate": 5.905267655513828e-06, "loss": 0.3386, "step": 10138 }, { "epoch": 0.4588821000226296, "grad_norm": 0.6242789450230746, "learning_rate": 5.904546837580102e-06, "loss": 0.3217, "step": 10139 }, { "epoch": 0.4589273591310251, "grad_norm": 0.3399240313310149, "learning_rate": 5.903826000210444e-06, "loss": 0.5025, "step": 10140 }, { "epoch": 0.4589726182394207, "grad_norm": 0.7333455939094149, "learning_rate": 5.903105143420339e-06, "loss": 0.3618, "step": 10141 }, { "epoch": 0.45901787734781624, "grad_norm": 0.32216027105417944, "learning_rate": 5.9023842672252805e-06, "loss": 0.4924, "step": 10142 }, { "epoch": 0.45906313645621183, "grad_norm": 0.3183411706250615, "learning_rate": 5.901663371640754e-06, "loss": 0.4918, "step": 10143 }, { "epoch": 0.45910839556460736, "grad_norm": 0.6769035244721872, "learning_rate": 5.9009424566822515e-06, "loss": 0.3371, "step": 10144 }, { "epoch": 0.45915365467300295, "grad_norm": 0.6855526785778393, "learning_rate": 5.900221522365262e-06, "loss": 0.3708, "step": 10145 }, { "epoch": 0.4591989137813985, "grad_norm": 0.30930897707540267, "learning_rate": 5.899500568705279e-06, "loss": 0.5104, "step": 10146 }, { "epoch": 0.4592441728897941, "grad_norm": 0.6335489140165369, "learning_rate": 5.898779595717788e-06, "loss": 0.3168, "step": 10147 }, { "epoch": 0.4592894319981896, "grad_norm": 0.6643824401837953, "learning_rate": 5.898058603418287e-06, "loss": 0.3341, "step": 10148 }, { "epoch": 0.4593346911065852, "grad_norm": 0.6753211229313297, "learning_rate": 5.897337591822262e-06, "loss": 0.3671, "step": 10149 }, { "epoch": 0.4593799502149808, "grad_norm": 0.6164101604781804, "learning_rate": 5.896616560945211e-06, "loss": 0.337, "step": 10150 }, { "epoch": 0.4594252093233763, "grad_norm": 0.35054630893544075, "learning_rate": 5.89589551080262e-06, "loss": 0.4682, "step": 10151 }, { "epoch": 0.4594704684317719, "grad_norm": 0.6409338224630452, "learning_rate": 5.89517444140999e-06, "loss": 0.358, "step": 10152 }, { "epoch": 0.45951572754016745, "grad_norm": 0.6369081943211347, "learning_rate": 5.8944533527828095e-06, "loss": 0.3228, "step": 10153 }, { "epoch": 0.45956098664856304, "grad_norm": 0.6745637529016413, "learning_rate": 5.893732244936572e-06, "loss": 0.321, "step": 10154 }, { "epoch": 0.4596062457569586, "grad_norm": 0.33008809593587707, "learning_rate": 5.893011117886775e-06, "loss": 0.4715, "step": 10155 }, { "epoch": 0.45965150486535417, "grad_norm": 0.7145102204647745, "learning_rate": 5.892289971648912e-06, "loss": 0.3288, "step": 10156 }, { "epoch": 0.4596967639737497, "grad_norm": 0.6899872862577504, "learning_rate": 5.8915688062384755e-06, "loss": 0.3219, "step": 10157 }, { "epoch": 0.4597420230821453, "grad_norm": 0.5941872763439713, "learning_rate": 5.890847621670966e-06, "loss": 0.358, "step": 10158 }, { "epoch": 0.4597872821905408, "grad_norm": 0.6521882399774729, "learning_rate": 5.8901264179618755e-06, "loss": 0.3593, "step": 10159 }, { "epoch": 0.4598325412989364, "grad_norm": 0.41761082565214547, "learning_rate": 5.889405195126704e-06, "loss": 0.4555, "step": 10160 }, { "epoch": 0.45987780040733195, "grad_norm": 0.5930093261474315, "learning_rate": 5.8886839531809455e-06, "loss": 0.3178, "step": 10161 }, { "epoch": 0.45992305951572754, "grad_norm": 0.6120930429914294, "learning_rate": 5.8879626921400975e-06, "loss": 0.3226, "step": 10162 }, { "epoch": 0.45996831862412313, "grad_norm": 0.30768656389224336, "learning_rate": 5.88724141201966e-06, "loss": 0.4654, "step": 10163 }, { "epoch": 0.46001357773251866, "grad_norm": 0.29084001888272587, "learning_rate": 5.886520112835128e-06, "loss": 0.4662, "step": 10164 }, { "epoch": 0.46005883684091425, "grad_norm": 0.28239681672321787, "learning_rate": 5.8857987946020025e-06, "loss": 0.4972, "step": 10165 }, { "epoch": 0.4601040959493098, "grad_norm": 0.6171878946319844, "learning_rate": 5.8850774573357804e-06, "loss": 0.3313, "step": 10166 }, { "epoch": 0.4601493550577054, "grad_norm": 0.6448552071058545, "learning_rate": 5.884356101051962e-06, "loss": 0.3886, "step": 10167 }, { "epoch": 0.4601946141661009, "grad_norm": 0.6837662949759267, "learning_rate": 5.8836347257660485e-06, "loss": 0.3452, "step": 10168 }, { "epoch": 0.4602398732744965, "grad_norm": 0.6473672470153388, "learning_rate": 5.882913331493538e-06, "loss": 0.3138, "step": 10169 }, { "epoch": 0.46028513238289204, "grad_norm": 0.601651739774847, "learning_rate": 5.882191918249931e-06, "loss": 0.3047, "step": 10170 }, { "epoch": 0.4603303914912876, "grad_norm": 0.35601805678176673, "learning_rate": 5.881470486050731e-06, "loss": 0.4877, "step": 10171 }, { "epoch": 0.46037565059968316, "grad_norm": 0.6413091667459374, "learning_rate": 5.880749034911435e-06, "loss": 0.3386, "step": 10172 }, { "epoch": 0.46042090970807875, "grad_norm": 0.5842090741092955, "learning_rate": 5.880027564847549e-06, "loss": 0.3215, "step": 10173 }, { "epoch": 0.46046616881647434, "grad_norm": 0.7689309377557272, "learning_rate": 5.879306075874572e-06, "loss": 0.3427, "step": 10174 }, { "epoch": 0.4605114279248699, "grad_norm": 0.3315562112584059, "learning_rate": 5.8785845680080085e-06, "loss": 0.4716, "step": 10175 }, { "epoch": 0.46055668703326547, "grad_norm": 0.6272824790835605, "learning_rate": 5.877863041263362e-06, "loss": 0.3191, "step": 10176 }, { "epoch": 0.460601946141661, "grad_norm": 0.6484198612109718, "learning_rate": 5.877141495656136e-06, "loss": 0.3509, "step": 10177 }, { "epoch": 0.4606472052500566, "grad_norm": 0.5869832697863312, "learning_rate": 5.876419931201829e-06, "loss": 0.2981, "step": 10178 }, { "epoch": 0.4606924643584521, "grad_norm": 0.6531080746471695, "learning_rate": 5.875698347915954e-06, "loss": 0.3253, "step": 10179 }, { "epoch": 0.4607377234668477, "grad_norm": 0.6298903489672832, "learning_rate": 5.8749767458140075e-06, "loss": 0.3144, "step": 10180 }, { "epoch": 0.46078298257524325, "grad_norm": 0.5973823819739114, "learning_rate": 5.8742551249115e-06, "loss": 0.328, "step": 10181 }, { "epoch": 0.46082824168363884, "grad_norm": 0.39421276793144844, "learning_rate": 5.873533485223934e-06, "loss": 0.4677, "step": 10182 }, { "epoch": 0.4608735007920344, "grad_norm": 0.33173025820647933, "learning_rate": 5.872811826766817e-06, "loss": 0.4668, "step": 10183 }, { "epoch": 0.46091875990042996, "grad_norm": 0.2915282231759761, "learning_rate": 5.872090149555653e-06, "loss": 0.4817, "step": 10184 }, { "epoch": 0.4609640190088255, "grad_norm": 1.2720611030473974, "learning_rate": 5.871368453605951e-06, "loss": 0.3945, "step": 10185 }, { "epoch": 0.4610092781172211, "grad_norm": 0.429056553504757, "learning_rate": 5.870646738933218e-06, "loss": 0.4937, "step": 10186 }, { "epoch": 0.4610545372256167, "grad_norm": 0.4424597400168609, "learning_rate": 5.869925005552959e-06, "loss": 0.4667, "step": 10187 }, { "epoch": 0.4610997963340122, "grad_norm": 0.6422267533943191, "learning_rate": 5.869203253480684e-06, "loss": 0.334, "step": 10188 }, { "epoch": 0.4611450554424078, "grad_norm": 0.624955624943747, "learning_rate": 5.868481482731903e-06, "loss": 0.3131, "step": 10189 }, { "epoch": 0.46119031455080334, "grad_norm": 0.6120821241352987, "learning_rate": 5.867759693322119e-06, "loss": 0.3318, "step": 10190 }, { "epoch": 0.4612355736591989, "grad_norm": 0.30119929184566724, "learning_rate": 5.867037885266845e-06, "loss": 0.5031, "step": 10191 }, { "epoch": 0.46128083276759446, "grad_norm": 0.6428062728626699, "learning_rate": 5.86631605858159e-06, "loss": 0.3349, "step": 10192 }, { "epoch": 0.46132609187599005, "grad_norm": 0.28165607627203954, "learning_rate": 5.865594213281864e-06, "loss": 0.4707, "step": 10193 }, { "epoch": 0.4613713509843856, "grad_norm": 0.6336775479151228, "learning_rate": 5.864872349383177e-06, "loss": 0.356, "step": 10194 }, { "epoch": 0.4614166100927812, "grad_norm": 0.2913933081534654, "learning_rate": 5.864150466901038e-06, "loss": 0.4849, "step": 10195 }, { "epoch": 0.4614618692011767, "grad_norm": 0.598970252669126, "learning_rate": 5.863428565850961e-06, "loss": 0.3303, "step": 10196 }, { "epoch": 0.4615071283095723, "grad_norm": 0.608507335922516, "learning_rate": 5.862706646248455e-06, "loss": 0.3021, "step": 10197 }, { "epoch": 0.4615523874179679, "grad_norm": 0.6200924088031894, "learning_rate": 5.861984708109035e-06, "loss": 0.3543, "step": 10198 }, { "epoch": 0.4615976465263634, "grad_norm": 0.5752767246422879, "learning_rate": 5.861262751448208e-06, "loss": 0.3112, "step": 10199 }, { "epoch": 0.461642905634759, "grad_norm": 0.6281904087924641, "learning_rate": 5.860540776281492e-06, "loss": 0.3551, "step": 10200 }, { "epoch": 0.46168816474315455, "grad_norm": 0.6328412769197119, "learning_rate": 5.859818782624395e-06, "loss": 0.341, "step": 10201 }, { "epoch": 0.46173342385155014, "grad_norm": 0.6876693828114777, "learning_rate": 5.8590967704924365e-06, "loss": 0.3139, "step": 10202 }, { "epoch": 0.4617786829599457, "grad_norm": 0.31990046259109833, "learning_rate": 5.858374739901125e-06, "loss": 0.4687, "step": 10203 }, { "epoch": 0.46182394206834126, "grad_norm": 0.6150479228587775, "learning_rate": 5.857652690865976e-06, "loss": 0.3444, "step": 10204 }, { "epoch": 0.4618692011767368, "grad_norm": 0.2747063505928363, "learning_rate": 5.856930623402506e-06, "loss": 0.4619, "step": 10205 }, { "epoch": 0.4619144602851324, "grad_norm": 0.6457097815772058, "learning_rate": 5.856208537526229e-06, "loss": 0.3926, "step": 10206 }, { "epoch": 0.4619597193935279, "grad_norm": 0.6171431621945438, "learning_rate": 5.855486433252658e-06, "loss": 0.3503, "step": 10207 }, { "epoch": 0.4620049785019235, "grad_norm": 0.6364217330608023, "learning_rate": 5.854764310597314e-06, "loss": 0.3435, "step": 10208 }, { "epoch": 0.4620502376103191, "grad_norm": 0.6389061843500499, "learning_rate": 5.8540421695757064e-06, "loss": 0.3563, "step": 10209 }, { "epoch": 0.46209549671871464, "grad_norm": 0.6891372948551845, "learning_rate": 5.85332001020336e-06, "loss": 0.3703, "step": 10210 }, { "epoch": 0.46214075582711023, "grad_norm": 0.6416000376271216, "learning_rate": 5.852597832495785e-06, "loss": 0.3664, "step": 10211 }, { "epoch": 0.46218601493550576, "grad_norm": 0.6537246700366226, "learning_rate": 5.851875636468501e-06, "loss": 0.3413, "step": 10212 }, { "epoch": 0.46223127404390135, "grad_norm": 0.3856112248677831, "learning_rate": 5.851153422137026e-06, "loss": 0.4696, "step": 10213 }, { "epoch": 0.4622765331522969, "grad_norm": 0.6338681957306734, "learning_rate": 5.850431189516878e-06, "loss": 0.377, "step": 10214 }, { "epoch": 0.4623217922606925, "grad_norm": 0.5774558126226176, "learning_rate": 5.849708938623575e-06, "loss": 0.3399, "step": 10215 }, { "epoch": 0.462367051369088, "grad_norm": 0.6104011755295417, "learning_rate": 5.848986669472637e-06, "loss": 0.3139, "step": 10216 }, { "epoch": 0.4624123104774836, "grad_norm": 1.0495709555357173, "learning_rate": 5.848264382079584e-06, "loss": 0.3121, "step": 10217 }, { "epoch": 0.46245756958587914, "grad_norm": 0.7581803532503051, "learning_rate": 5.847542076459933e-06, "loss": 0.3383, "step": 10218 }, { "epoch": 0.4625028286942747, "grad_norm": 0.830956095614911, "learning_rate": 5.846819752629208e-06, "loss": 0.2785, "step": 10219 }, { "epoch": 0.46254808780267026, "grad_norm": 0.3575301386080226, "learning_rate": 5.846097410602925e-06, "loss": 0.4788, "step": 10220 }, { "epoch": 0.46259334691106585, "grad_norm": 0.6153185689546572, "learning_rate": 5.84537505039661e-06, "loss": 0.3295, "step": 10221 }, { "epoch": 0.46263860601946144, "grad_norm": 0.6523731086184149, "learning_rate": 5.844652672025779e-06, "loss": 0.3411, "step": 10222 }, { "epoch": 0.462683865127857, "grad_norm": 0.3059244969304898, "learning_rate": 5.843930275505958e-06, "loss": 0.465, "step": 10223 }, { "epoch": 0.46272912423625256, "grad_norm": 0.6114200072668081, "learning_rate": 5.843207860852667e-06, "loss": 0.3175, "step": 10224 }, { "epoch": 0.4627743833446481, "grad_norm": 0.6347751736428424, "learning_rate": 5.842485428081428e-06, "loss": 0.3491, "step": 10225 }, { "epoch": 0.4628196424530437, "grad_norm": 0.2788723118165968, "learning_rate": 5.841762977207764e-06, "loss": 0.4776, "step": 10226 }, { "epoch": 0.4628649015614392, "grad_norm": 0.2938092295138972, "learning_rate": 5.841040508247201e-06, "loss": 0.4959, "step": 10227 }, { "epoch": 0.4629101606698348, "grad_norm": 0.2843870448715732, "learning_rate": 5.840318021215259e-06, "loss": 0.4595, "step": 10228 }, { "epoch": 0.46295541977823035, "grad_norm": 0.778637112356431, "learning_rate": 5.839595516127464e-06, "loss": 0.3508, "step": 10229 }, { "epoch": 0.46300067888662594, "grad_norm": 0.6174599810697912, "learning_rate": 5.838872992999339e-06, "loss": 0.3166, "step": 10230 }, { "epoch": 0.4630459379950215, "grad_norm": 0.7235202774561535, "learning_rate": 5.8381504518464114e-06, "loss": 0.3472, "step": 10231 }, { "epoch": 0.46309119710341706, "grad_norm": 0.6361329502755545, "learning_rate": 5.837427892684205e-06, "loss": 0.3, "step": 10232 }, { "epoch": 0.46313645621181265, "grad_norm": 0.6060017986409006, "learning_rate": 5.836705315528244e-06, "loss": 0.333, "step": 10233 }, { "epoch": 0.4631817153202082, "grad_norm": 0.6487855691336682, "learning_rate": 5.8359827203940555e-06, "loss": 0.3558, "step": 10234 }, { "epoch": 0.4632269744286038, "grad_norm": 0.6297909598410385, "learning_rate": 5.835260107297167e-06, "loss": 0.3884, "step": 10235 }, { "epoch": 0.4632722335369993, "grad_norm": 0.40330348336805316, "learning_rate": 5.834537476253102e-06, "loss": 0.4537, "step": 10236 }, { "epoch": 0.4633174926453949, "grad_norm": 0.609177172025508, "learning_rate": 5.833814827277391e-06, "loss": 0.3219, "step": 10237 }, { "epoch": 0.46336275175379044, "grad_norm": 0.747417185536518, "learning_rate": 5.83309216038556e-06, "loss": 0.3323, "step": 10238 }, { "epoch": 0.463408010862186, "grad_norm": 0.2943224458498767, "learning_rate": 5.832369475593138e-06, "loss": 0.4879, "step": 10239 }, { "epoch": 0.46345326997058156, "grad_norm": 0.6302865670246146, "learning_rate": 5.831646772915651e-06, "loss": 0.3296, "step": 10240 }, { "epoch": 0.46349852907897715, "grad_norm": 0.33260995177021796, "learning_rate": 5.8309240523686295e-06, "loss": 0.4706, "step": 10241 }, { "epoch": 0.4635437881873727, "grad_norm": 0.6227230630505952, "learning_rate": 5.830201313967603e-06, "loss": 0.3561, "step": 10242 }, { "epoch": 0.4635890472957683, "grad_norm": 0.5977630892372184, "learning_rate": 5.829478557728098e-06, "loss": 0.2956, "step": 10243 }, { "epoch": 0.46363430640416387, "grad_norm": 0.3112423505124785, "learning_rate": 5.828755783665649e-06, "loss": 0.4798, "step": 10244 }, { "epoch": 0.4636795655125594, "grad_norm": 0.7156177789007848, "learning_rate": 5.828032991795781e-06, "loss": 0.3492, "step": 10245 }, { "epoch": 0.463724824620955, "grad_norm": 0.6311863336708481, "learning_rate": 5.827310182134029e-06, "loss": 0.3904, "step": 10246 }, { "epoch": 0.4637700837293505, "grad_norm": 0.6156550034528948, "learning_rate": 5.8265873546959205e-06, "loss": 0.2968, "step": 10247 }, { "epoch": 0.4638153428377461, "grad_norm": 0.6030965572245929, "learning_rate": 5.825864509496991e-06, "loss": 0.3233, "step": 10248 }, { "epoch": 0.46386060194614165, "grad_norm": 0.6954897934939829, "learning_rate": 5.825141646552767e-06, "loss": 0.3911, "step": 10249 }, { "epoch": 0.46390586105453724, "grad_norm": 0.6408441042598859, "learning_rate": 5.8244187658787855e-06, "loss": 0.3344, "step": 10250 }, { "epoch": 0.4639511201629328, "grad_norm": 0.6074890088107604, "learning_rate": 5.8236958674905746e-06, "loss": 0.3723, "step": 10251 }, { "epoch": 0.46399637927132836, "grad_norm": 0.6334622958231805, "learning_rate": 5.82297295140367e-06, "loss": 0.3434, "step": 10252 }, { "epoch": 0.4640416383797239, "grad_norm": 0.5730384902807221, "learning_rate": 5.822250017633605e-06, "loss": 0.3199, "step": 10253 }, { "epoch": 0.4640868974881195, "grad_norm": 0.6189461609429648, "learning_rate": 5.821527066195911e-06, "loss": 0.3377, "step": 10254 }, { "epoch": 0.464132156596515, "grad_norm": 0.6371926272056898, "learning_rate": 5.820804097106125e-06, "loss": 0.3073, "step": 10255 }, { "epoch": 0.4641774157049106, "grad_norm": 0.36073886007825423, "learning_rate": 5.82008111037978e-06, "loss": 0.489, "step": 10256 }, { "epoch": 0.4642226748133062, "grad_norm": 0.6265469379681541, "learning_rate": 5.819358106032409e-06, "loss": 0.3613, "step": 10257 }, { "epoch": 0.46426793392170174, "grad_norm": 0.6752147741747988, "learning_rate": 5.81863508407955e-06, "loss": 0.3313, "step": 10258 }, { "epoch": 0.4643131930300973, "grad_norm": 0.6333988101454193, "learning_rate": 5.817912044536735e-06, "loss": 0.3466, "step": 10259 }, { "epoch": 0.46435845213849286, "grad_norm": 0.7462015858385178, "learning_rate": 5.8171889874195066e-06, "loss": 0.3298, "step": 10260 }, { "epoch": 0.46440371124688845, "grad_norm": 0.6957970518827933, "learning_rate": 5.8164659127433935e-06, "loss": 0.3552, "step": 10261 }, { "epoch": 0.464448970355284, "grad_norm": 0.6435510261970632, "learning_rate": 5.815742820523936e-06, "loss": 0.3092, "step": 10262 }, { "epoch": 0.4644942294636796, "grad_norm": 0.3325824347052253, "learning_rate": 5.815019710776671e-06, "loss": 0.4796, "step": 10263 }, { "epoch": 0.4645394885720751, "grad_norm": 0.7540769034192851, "learning_rate": 5.814296583517135e-06, "loss": 0.3059, "step": 10264 }, { "epoch": 0.4645847476804707, "grad_norm": 0.623352839941175, "learning_rate": 5.813573438760867e-06, "loss": 0.365, "step": 10265 }, { "epoch": 0.46463000678886623, "grad_norm": 0.626453682575965, "learning_rate": 5.812850276523405e-06, "loss": 0.3726, "step": 10266 }, { "epoch": 0.4646752658972618, "grad_norm": 0.29882319766048127, "learning_rate": 5.812127096820285e-06, "loss": 0.4693, "step": 10267 }, { "epoch": 0.4647205250056574, "grad_norm": 0.5995073751713392, "learning_rate": 5.811403899667049e-06, "loss": 0.329, "step": 10268 }, { "epoch": 0.46476578411405295, "grad_norm": 0.5822564001987135, "learning_rate": 5.810680685079236e-06, "loss": 0.2995, "step": 10269 }, { "epoch": 0.46481104322244854, "grad_norm": 0.6120648327385629, "learning_rate": 5.809957453072385e-06, "loss": 0.327, "step": 10270 }, { "epoch": 0.4648563023308441, "grad_norm": 0.5772405836572486, "learning_rate": 5.809234203662034e-06, "loss": 0.3188, "step": 10271 }, { "epoch": 0.46490156143923966, "grad_norm": 0.6548826917793775, "learning_rate": 5.808510936863727e-06, "loss": 0.3568, "step": 10272 }, { "epoch": 0.4649468205476352, "grad_norm": 0.6218802174741334, "learning_rate": 5.807787652693002e-06, "loss": 0.3347, "step": 10273 }, { "epoch": 0.4649920796560308, "grad_norm": 0.3925733151859355, "learning_rate": 5.8070643511654025e-06, "loss": 0.4809, "step": 10274 }, { "epoch": 0.4650373387644263, "grad_norm": 0.6150511972804596, "learning_rate": 5.806341032296468e-06, "loss": 0.313, "step": 10275 }, { "epoch": 0.4650825978728219, "grad_norm": 0.6160171370967106, "learning_rate": 5.805617696101742e-06, "loss": 0.3701, "step": 10276 }, { "epoch": 0.46512785698121745, "grad_norm": 0.5990757050539252, "learning_rate": 5.804894342596766e-06, "loss": 0.3755, "step": 10277 }, { "epoch": 0.46517311608961304, "grad_norm": 0.5648945834265401, "learning_rate": 5.804170971797081e-06, "loss": 0.2972, "step": 10278 }, { "epoch": 0.4652183751980086, "grad_norm": 0.6026134317216376, "learning_rate": 5.803447583718234e-06, "loss": 0.368, "step": 10279 }, { "epoch": 0.46526363430640416, "grad_norm": 0.5500918012924714, "learning_rate": 5.802724178375762e-06, "loss": 0.3353, "step": 10280 }, { "epoch": 0.46530889341479975, "grad_norm": 0.6222704125021451, "learning_rate": 5.802000755785217e-06, "loss": 0.2981, "step": 10281 }, { "epoch": 0.4653541525231953, "grad_norm": 0.3463249038069997, "learning_rate": 5.801277315962139e-06, "loss": 0.49, "step": 10282 }, { "epoch": 0.4653994116315909, "grad_norm": 0.6643901305295368, "learning_rate": 5.80055385892207e-06, "loss": 0.3667, "step": 10283 }, { "epoch": 0.4654446707399864, "grad_norm": 0.6112542275471315, "learning_rate": 5.799830384680558e-06, "loss": 0.3516, "step": 10284 }, { "epoch": 0.465489929848382, "grad_norm": 0.6482440782183497, "learning_rate": 5.799106893253148e-06, "loss": 0.3737, "step": 10285 }, { "epoch": 0.46553518895677753, "grad_norm": 0.5779577042672598, "learning_rate": 5.798383384655384e-06, "loss": 0.3306, "step": 10286 }, { "epoch": 0.4655804480651731, "grad_norm": 0.2977800126206191, "learning_rate": 5.7976598589028154e-06, "loss": 0.4888, "step": 10287 }, { "epoch": 0.46562570717356866, "grad_norm": 0.29912546170849535, "learning_rate": 5.796936316010984e-06, "loss": 0.4909, "step": 10288 }, { "epoch": 0.46567096628196425, "grad_norm": 0.5550307674716896, "learning_rate": 5.796212755995439e-06, "loss": 0.3606, "step": 10289 }, { "epoch": 0.4657162253903598, "grad_norm": 0.6016564912884554, "learning_rate": 5.795489178871728e-06, "loss": 0.32, "step": 10290 }, { "epoch": 0.4657614844987554, "grad_norm": 0.6441104882214519, "learning_rate": 5.794765584655397e-06, "loss": 0.3154, "step": 10291 }, { "epoch": 0.46580674360715096, "grad_norm": 0.6519116239052571, "learning_rate": 5.794041973361996e-06, "loss": 0.3459, "step": 10292 }, { "epoch": 0.4658520027155465, "grad_norm": 0.6734882207659202, "learning_rate": 5.793318345007071e-06, "loss": 0.3281, "step": 10293 }, { "epoch": 0.4658972618239421, "grad_norm": 0.657001665342723, "learning_rate": 5.7925946996061696e-06, "loss": 0.303, "step": 10294 }, { "epoch": 0.4659425209323376, "grad_norm": 0.784940555608557, "learning_rate": 5.791871037174844e-06, "loss": 0.3287, "step": 10295 }, { "epoch": 0.4659877800407332, "grad_norm": 1.2622160018753232, "learning_rate": 5.7911473577286415e-06, "loss": 0.3177, "step": 10296 }, { "epoch": 0.46603303914912875, "grad_norm": 0.7490174556923634, "learning_rate": 5.790423661283112e-06, "loss": 0.3239, "step": 10297 }, { "epoch": 0.46607829825752434, "grad_norm": 0.676624989660703, "learning_rate": 5.789699947853807e-06, "loss": 0.3614, "step": 10298 }, { "epoch": 0.46612355736591987, "grad_norm": 0.652027534931178, "learning_rate": 5.788976217456275e-06, "loss": 0.3225, "step": 10299 }, { "epoch": 0.46616881647431546, "grad_norm": 0.5830828483232589, "learning_rate": 5.788252470106066e-06, "loss": 0.3683, "step": 10300 }, { "epoch": 0.466214075582711, "grad_norm": 0.5600393977412688, "learning_rate": 5.787528705818732e-06, "loss": 0.3223, "step": 10301 }, { "epoch": 0.4662593346911066, "grad_norm": 0.6438021708441337, "learning_rate": 5.786804924609827e-06, "loss": 0.3012, "step": 10302 }, { "epoch": 0.4663045937995022, "grad_norm": 0.6132114991086414, "learning_rate": 5.786081126494899e-06, "loss": 0.3149, "step": 10303 }, { "epoch": 0.4663498529078977, "grad_norm": 0.6359022819021914, "learning_rate": 5.785357311489502e-06, "loss": 0.3078, "step": 10304 }, { "epoch": 0.4663951120162933, "grad_norm": 0.652813458711096, "learning_rate": 5.784633479609188e-06, "loss": 0.3595, "step": 10305 }, { "epoch": 0.46644037112468884, "grad_norm": 0.6192102296622036, "learning_rate": 5.783909630869513e-06, "loss": 0.3429, "step": 10306 }, { "epoch": 0.4664856302330844, "grad_norm": 0.5587784962323307, "learning_rate": 5.7831857652860234e-06, "loss": 0.28, "step": 10307 }, { "epoch": 0.46653088934147996, "grad_norm": 0.4213056610694686, "learning_rate": 5.782461882874281e-06, "loss": 0.4761, "step": 10308 }, { "epoch": 0.46657614844987555, "grad_norm": 0.3523271291831369, "learning_rate": 5.781737983649833e-06, "loss": 0.4666, "step": 10309 }, { "epoch": 0.4666214075582711, "grad_norm": 0.6272547002980707, "learning_rate": 5.781014067628239e-06, "loss": 0.3487, "step": 10310 }, { "epoch": 0.4666666666666667, "grad_norm": 0.6324788202023445, "learning_rate": 5.78029013482505e-06, "loss": 0.3387, "step": 10311 }, { "epoch": 0.4667119257750622, "grad_norm": 0.6166975355394055, "learning_rate": 5.779566185255823e-06, "loss": 0.373, "step": 10312 }, { "epoch": 0.4667571848834578, "grad_norm": 0.646302923312705, "learning_rate": 5.778842218936113e-06, "loss": 0.3311, "step": 10313 }, { "epoch": 0.46680244399185333, "grad_norm": 0.6577866467883527, "learning_rate": 5.778118235881475e-06, "loss": 0.3905, "step": 10314 }, { "epoch": 0.4668477031002489, "grad_norm": 0.645231514488921, "learning_rate": 5.777394236107465e-06, "loss": 0.3354, "step": 10315 }, { "epoch": 0.4668929622086445, "grad_norm": 0.6187727317439589, "learning_rate": 5.776670219629643e-06, "loss": 0.3309, "step": 10316 }, { "epoch": 0.46693822131704005, "grad_norm": 0.6915513013065251, "learning_rate": 5.775946186463561e-06, "loss": 0.3635, "step": 10317 }, { "epoch": 0.46698348042543564, "grad_norm": 0.6626864260683646, "learning_rate": 5.775222136624781e-06, "loss": 0.3722, "step": 10318 }, { "epoch": 0.46702873953383117, "grad_norm": 0.6645297758465186, "learning_rate": 5.774498070128857e-06, "loss": 0.3243, "step": 10319 }, { "epoch": 0.46707399864222676, "grad_norm": 0.6766053400804806, "learning_rate": 5.773773986991348e-06, "loss": 0.2781, "step": 10320 }, { "epoch": 0.4671192577506223, "grad_norm": 0.6428071642509061, "learning_rate": 5.773049887227813e-06, "loss": 0.3932, "step": 10321 }, { "epoch": 0.4671645168590179, "grad_norm": 0.6598168419273106, "learning_rate": 5.772325770853809e-06, "loss": 0.3251, "step": 10322 }, { "epoch": 0.4672097759674134, "grad_norm": 0.6502389888661539, "learning_rate": 5.771601637884897e-06, "loss": 0.3507, "step": 10323 }, { "epoch": 0.467255035075809, "grad_norm": 0.7030154069910686, "learning_rate": 5.770877488336636e-06, "loss": 0.2911, "step": 10324 }, { "epoch": 0.46730029418420455, "grad_norm": 0.6618412096184401, "learning_rate": 5.770153322224584e-06, "loss": 0.3697, "step": 10325 }, { "epoch": 0.46734555329260014, "grad_norm": 0.592769704678834, "learning_rate": 5.769429139564303e-06, "loss": 0.2987, "step": 10326 }, { "epoch": 0.4673908124009957, "grad_norm": 0.6440489207513519, "learning_rate": 5.7687049403713545e-06, "loss": 0.3575, "step": 10327 }, { "epoch": 0.46743607150939126, "grad_norm": 0.794780089394584, "learning_rate": 5.767980724661295e-06, "loss": 0.3235, "step": 10328 }, { "epoch": 0.46748133061778685, "grad_norm": 0.6140401917762476, "learning_rate": 5.767256492449691e-06, "loss": 0.3428, "step": 10329 }, { "epoch": 0.4675265897261824, "grad_norm": 0.6446457210170683, "learning_rate": 5.7665322437521e-06, "loss": 0.3425, "step": 10330 }, { "epoch": 0.467571848834578, "grad_norm": 0.6975506143904119, "learning_rate": 5.765807978584086e-06, "loss": 0.3229, "step": 10331 }, { "epoch": 0.4676171079429735, "grad_norm": 0.6558090303313163, "learning_rate": 5.76508369696121e-06, "loss": 0.4884, "step": 10332 }, { "epoch": 0.4676623670513691, "grad_norm": 0.6493883687712777, "learning_rate": 5.764359398899035e-06, "loss": 0.3582, "step": 10333 }, { "epoch": 0.46770762615976463, "grad_norm": 0.6830461604852215, "learning_rate": 5.763635084413124e-06, "loss": 0.3433, "step": 10334 }, { "epoch": 0.4677528852681602, "grad_norm": 0.5862373864261052, "learning_rate": 5.762910753519041e-06, "loss": 0.2892, "step": 10335 }, { "epoch": 0.46779814437655576, "grad_norm": 0.6203705565094673, "learning_rate": 5.7621864062323484e-06, "loss": 0.284, "step": 10336 }, { "epoch": 0.46784340348495135, "grad_norm": 0.6226866995996769, "learning_rate": 5.7614620425686115e-06, "loss": 0.3411, "step": 10337 }, { "epoch": 0.46788866259334694, "grad_norm": 0.6302494720705131, "learning_rate": 5.760737662543393e-06, "loss": 0.3842, "step": 10338 }, { "epoch": 0.4679339217017425, "grad_norm": 0.6905149999540106, "learning_rate": 5.760013266172261e-06, "loss": 0.3205, "step": 10339 }, { "epoch": 0.46797918081013806, "grad_norm": 0.6035717777445082, "learning_rate": 5.759288853470776e-06, "loss": 0.3267, "step": 10340 }, { "epoch": 0.4680244399185336, "grad_norm": 0.6758276913963119, "learning_rate": 5.758564424454505e-06, "loss": 0.3656, "step": 10341 }, { "epoch": 0.4680696990269292, "grad_norm": 0.6773124521999553, "learning_rate": 5.757839979139015e-06, "loss": 0.3276, "step": 10342 }, { "epoch": 0.4681149581353247, "grad_norm": 0.39690207641958924, "learning_rate": 5.757115517539871e-06, "loss": 0.4777, "step": 10343 }, { "epoch": 0.4681602172437203, "grad_norm": 0.6325568396327806, "learning_rate": 5.7563910396726406e-06, "loss": 0.3494, "step": 10344 }, { "epoch": 0.46820547635211585, "grad_norm": 0.6558991024020747, "learning_rate": 5.7556665455528905e-06, "loss": 0.3444, "step": 10345 }, { "epoch": 0.46825073546051144, "grad_norm": 0.5942159698676818, "learning_rate": 5.7549420351961845e-06, "loss": 0.3216, "step": 10346 }, { "epoch": 0.46829599456890697, "grad_norm": 0.7086765666045822, "learning_rate": 5.754217508618096e-06, "loss": 0.3273, "step": 10347 }, { "epoch": 0.46834125367730256, "grad_norm": 1.5854606445523287, "learning_rate": 5.7534929658341875e-06, "loss": 0.3766, "step": 10348 }, { "epoch": 0.4683865127856981, "grad_norm": 0.6060422652921184, "learning_rate": 5.75276840686003e-06, "loss": 0.3445, "step": 10349 }, { "epoch": 0.4684317718940937, "grad_norm": 0.6780583191381162, "learning_rate": 5.752043831711191e-06, "loss": 0.368, "step": 10350 }, { "epoch": 0.4684770310024893, "grad_norm": 0.6071328346358127, "learning_rate": 5.75131924040324e-06, "loss": 0.3302, "step": 10351 }, { "epoch": 0.4685222901108848, "grad_norm": 0.6544464830019272, "learning_rate": 5.750594632951746e-06, "loss": 0.3249, "step": 10352 }, { "epoch": 0.4685675492192804, "grad_norm": 0.6721548509646648, "learning_rate": 5.749870009372279e-06, "loss": 0.3449, "step": 10353 }, { "epoch": 0.46861280832767593, "grad_norm": 0.6522678147244234, "learning_rate": 5.7491453696804075e-06, "loss": 0.3217, "step": 10354 }, { "epoch": 0.4686580674360715, "grad_norm": 0.600514796449989, "learning_rate": 5.7484207138917046e-06, "loss": 0.3414, "step": 10355 }, { "epoch": 0.46870332654446706, "grad_norm": 0.34556402129044617, "learning_rate": 5.747696042021737e-06, "loss": 0.4632, "step": 10356 }, { "epoch": 0.46874858565286265, "grad_norm": 0.6345826108971352, "learning_rate": 5.746971354086079e-06, "loss": 0.3596, "step": 10357 }, { "epoch": 0.4687938447612582, "grad_norm": 0.30393717099708784, "learning_rate": 5.746246650100302e-06, "loss": 0.4986, "step": 10358 }, { "epoch": 0.4688391038696538, "grad_norm": 0.8897658153634812, "learning_rate": 5.745521930079974e-06, "loss": 0.3326, "step": 10359 }, { "epoch": 0.4688843629780493, "grad_norm": 0.5911021530626996, "learning_rate": 5.744797194040672e-06, "loss": 0.3094, "step": 10360 }, { "epoch": 0.4689296220864449, "grad_norm": 0.666037617818459, "learning_rate": 5.744072441997964e-06, "loss": 0.3444, "step": 10361 }, { "epoch": 0.4689748811948405, "grad_norm": 0.30775239245639197, "learning_rate": 5.743347673967425e-06, "loss": 0.4974, "step": 10362 }, { "epoch": 0.469020140303236, "grad_norm": 0.668637519512142, "learning_rate": 5.742622889964628e-06, "loss": 0.3626, "step": 10363 }, { "epoch": 0.4690653994116316, "grad_norm": 0.6288653334835292, "learning_rate": 5.7418980900051445e-06, "loss": 0.3252, "step": 10364 }, { "epoch": 0.46911065852002715, "grad_norm": 0.6235682340663499, "learning_rate": 5.74117327410455e-06, "loss": 0.3328, "step": 10365 }, { "epoch": 0.46915591762842274, "grad_norm": 0.6477660006050718, "learning_rate": 5.740448442278419e-06, "loss": 0.3471, "step": 10366 }, { "epoch": 0.46920117673681827, "grad_norm": 0.3137744726867989, "learning_rate": 5.739723594542323e-06, "loss": 0.4813, "step": 10367 }, { "epoch": 0.46924643584521386, "grad_norm": 0.652174300270186, "learning_rate": 5.738998730911842e-06, "loss": 0.3605, "step": 10368 }, { "epoch": 0.4692916949536094, "grad_norm": 0.6532414271831297, "learning_rate": 5.738273851402547e-06, "loss": 0.3369, "step": 10369 }, { "epoch": 0.469336954062005, "grad_norm": 0.673193705744866, "learning_rate": 5.737548956030014e-06, "loss": 0.3022, "step": 10370 }, { "epoch": 0.4693822131704005, "grad_norm": 0.6975430135053898, "learning_rate": 5.736824044809818e-06, "loss": 0.3666, "step": 10371 }, { "epoch": 0.4694274722787961, "grad_norm": 0.6213955186406469, "learning_rate": 5.736099117757536e-06, "loss": 0.3067, "step": 10372 }, { "epoch": 0.4694727313871917, "grad_norm": 0.33070487315086977, "learning_rate": 5.735374174888747e-06, "loss": 0.4613, "step": 10373 }, { "epoch": 0.46951799049558723, "grad_norm": 0.5927061983360309, "learning_rate": 5.734649216219025e-06, "loss": 0.3415, "step": 10374 }, { "epoch": 0.4695632496039828, "grad_norm": 0.3027285314363435, "learning_rate": 5.733924241763946e-06, "loss": 0.4421, "step": 10375 }, { "epoch": 0.46960850871237836, "grad_norm": 0.6054536612662688, "learning_rate": 5.733199251539091e-06, "loss": 0.363, "step": 10376 }, { "epoch": 0.46965376782077395, "grad_norm": 0.7190888725596375, "learning_rate": 5.732474245560035e-06, "loss": 0.3624, "step": 10377 }, { "epoch": 0.4696990269291695, "grad_norm": 0.6658433291423098, "learning_rate": 5.7317492238423565e-06, "loss": 0.3174, "step": 10378 }, { "epoch": 0.4697442860375651, "grad_norm": 0.6978929635468228, "learning_rate": 5.731024186401636e-06, "loss": 0.3492, "step": 10379 }, { "epoch": 0.4697895451459606, "grad_norm": 0.5995286401107972, "learning_rate": 5.730299133253449e-06, "loss": 0.3135, "step": 10380 }, { "epoch": 0.4698348042543562, "grad_norm": 0.6186094867021583, "learning_rate": 5.729574064413378e-06, "loss": 0.326, "step": 10381 }, { "epoch": 0.46988006336275173, "grad_norm": 0.5917704318434182, "learning_rate": 5.728848979897001e-06, "loss": 0.2979, "step": 10382 }, { "epoch": 0.4699253224711473, "grad_norm": 0.6118993093230494, "learning_rate": 5.728123879719898e-06, "loss": 0.3497, "step": 10383 }, { "epoch": 0.46997058157954286, "grad_norm": 0.6497195551446878, "learning_rate": 5.727398763897648e-06, "loss": 0.3151, "step": 10384 }, { "epoch": 0.47001584068793845, "grad_norm": 0.6050964668972018, "learning_rate": 5.726673632445834e-06, "loss": 0.3738, "step": 10385 }, { "epoch": 0.47006109979633404, "grad_norm": 0.6806531900254521, "learning_rate": 5.725948485380034e-06, "loss": 0.3026, "step": 10386 }, { "epoch": 0.47010635890472957, "grad_norm": 0.663190369071432, "learning_rate": 5.725223322715833e-06, "loss": 0.4091, "step": 10387 }, { "epoch": 0.47015161801312516, "grad_norm": 0.38758399729658655, "learning_rate": 5.724498144468807e-06, "loss": 0.4785, "step": 10388 }, { "epoch": 0.4701968771215207, "grad_norm": 0.6718509573949357, "learning_rate": 5.7237729506545435e-06, "loss": 0.3497, "step": 10389 }, { "epoch": 0.4702421362299163, "grad_norm": 0.699050141291865, "learning_rate": 5.723047741288621e-06, "loss": 0.3236, "step": 10390 }, { "epoch": 0.4702873953383118, "grad_norm": 0.3215874781013927, "learning_rate": 5.722322516386623e-06, "loss": 0.4859, "step": 10391 }, { "epoch": 0.4703326544467074, "grad_norm": 0.8302704908623061, "learning_rate": 5.7215972759641335e-06, "loss": 0.3606, "step": 10392 }, { "epoch": 0.47037791355510294, "grad_norm": 0.6579121221890892, "learning_rate": 5.720872020036734e-06, "loss": 0.3094, "step": 10393 }, { "epoch": 0.47042317266349853, "grad_norm": 0.6251053099740931, "learning_rate": 5.720146748620009e-06, "loss": 0.3317, "step": 10394 }, { "epoch": 0.47046843177189407, "grad_norm": 0.4553160396122376, "learning_rate": 5.719421461729544e-06, "loss": 0.4976, "step": 10395 }, { "epoch": 0.47051369088028966, "grad_norm": 0.6385209815002308, "learning_rate": 5.718696159380918e-06, "loss": 0.3031, "step": 10396 }, { "epoch": 0.47055894998868525, "grad_norm": 0.6550005906490711, "learning_rate": 5.717970841589722e-06, "loss": 0.3575, "step": 10397 }, { "epoch": 0.4706042090970808, "grad_norm": 0.6144433391942784, "learning_rate": 5.717245508371535e-06, "loss": 0.3571, "step": 10398 }, { "epoch": 0.4706494682054764, "grad_norm": 0.56431820576879, "learning_rate": 5.716520159741946e-06, "loss": 0.3345, "step": 10399 }, { "epoch": 0.4706947273138719, "grad_norm": 0.31773722371787644, "learning_rate": 5.715794795716539e-06, "loss": 0.4817, "step": 10400 }, { "epoch": 0.4707399864222675, "grad_norm": 0.6638658663750014, "learning_rate": 5.7150694163109015e-06, "loss": 0.3792, "step": 10401 }, { "epoch": 0.47078524553066303, "grad_norm": 0.6109695735079277, "learning_rate": 5.714344021540616e-06, "loss": 0.3311, "step": 10402 }, { "epoch": 0.4708305046390586, "grad_norm": 0.634634568023838, "learning_rate": 5.713618611421273e-06, "loss": 0.3193, "step": 10403 }, { "epoch": 0.47087576374745416, "grad_norm": 0.2813397261233842, "learning_rate": 5.712893185968458e-06, "loss": 0.4807, "step": 10404 }, { "epoch": 0.47092102285584975, "grad_norm": 0.6239281195599348, "learning_rate": 5.712167745197757e-06, "loss": 0.3455, "step": 10405 }, { "epoch": 0.4709662819642453, "grad_norm": 0.6635538212106871, "learning_rate": 5.71144228912476e-06, "loss": 0.3336, "step": 10406 }, { "epoch": 0.47101154107264087, "grad_norm": 0.7367756633644262, "learning_rate": 5.710716817765052e-06, "loss": 0.3805, "step": 10407 }, { "epoch": 0.4710568001810364, "grad_norm": 0.30067415828969046, "learning_rate": 5.709991331134224e-06, "loss": 0.4866, "step": 10408 }, { "epoch": 0.471102059289432, "grad_norm": 0.6064990682433967, "learning_rate": 5.709265829247861e-06, "loss": 0.2583, "step": 10409 }, { "epoch": 0.4711473183978276, "grad_norm": 0.5816228931837361, "learning_rate": 5.7085403121215545e-06, "loss": 0.3187, "step": 10410 }, { "epoch": 0.4711925775062231, "grad_norm": 0.6846169928806831, "learning_rate": 5.707814779770892e-06, "loss": 0.3383, "step": 10411 }, { "epoch": 0.4712378366146187, "grad_norm": 0.2864393917728087, "learning_rate": 5.707089232211463e-06, "loss": 0.4872, "step": 10412 }, { "epoch": 0.47128309572301424, "grad_norm": 0.7883095576233838, "learning_rate": 5.70636366945886e-06, "loss": 0.3431, "step": 10413 }, { "epoch": 0.47132835483140983, "grad_norm": 0.671171558514905, "learning_rate": 5.70563809152867e-06, "loss": 0.3319, "step": 10414 }, { "epoch": 0.47137361393980537, "grad_norm": 0.570402605761979, "learning_rate": 5.704912498436486e-06, "loss": 0.3253, "step": 10415 }, { "epoch": 0.47141887304820096, "grad_norm": 0.6494215852181807, "learning_rate": 5.704186890197897e-06, "loss": 0.3538, "step": 10416 }, { "epoch": 0.4714641321565965, "grad_norm": 0.6309491842680105, "learning_rate": 5.703461266828493e-06, "loss": 0.3157, "step": 10417 }, { "epoch": 0.4715093912649921, "grad_norm": 0.6158447888774433, "learning_rate": 5.702735628343869e-06, "loss": 0.3297, "step": 10418 }, { "epoch": 0.4715546503733876, "grad_norm": 0.6330071116585992, "learning_rate": 5.702009974759612e-06, "loss": 0.3274, "step": 10419 }, { "epoch": 0.4715999094817832, "grad_norm": 0.6302672089025184, "learning_rate": 5.701284306091319e-06, "loss": 0.329, "step": 10420 }, { "epoch": 0.4716451685901788, "grad_norm": 0.698076458574524, "learning_rate": 5.700558622354579e-06, "loss": 0.3319, "step": 10421 }, { "epoch": 0.47169042769857433, "grad_norm": 0.6162874189440831, "learning_rate": 5.699832923564986e-06, "loss": 0.3318, "step": 10422 }, { "epoch": 0.4717356868069699, "grad_norm": 0.6526134943107602, "learning_rate": 5.699107209738133e-06, "loss": 0.3253, "step": 10423 }, { "epoch": 0.47178094591536546, "grad_norm": 0.680267091801362, "learning_rate": 5.698381480889614e-06, "loss": 0.4051, "step": 10424 }, { "epoch": 0.47182620502376105, "grad_norm": 0.30455858467410707, "learning_rate": 5.697655737035019e-06, "loss": 0.4714, "step": 10425 }, { "epoch": 0.4718714641321566, "grad_norm": 0.6318519040245598, "learning_rate": 5.6969299781899486e-06, "loss": 0.33, "step": 10426 }, { "epoch": 0.47191672324055217, "grad_norm": 0.5964106428025935, "learning_rate": 5.696204204369991e-06, "loss": 0.3563, "step": 10427 }, { "epoch": 0.4719619823489477, "grad_norm": 0.6215879264225173, "learning_rate": 5.695478415590745e-06, "loss": 0.3742, "step": 10428 }, { "epoch": 0.4720072414573433, "grad_norm": 0.6929168753091881, "learning_rate": 5.6947526118678024e-06, "loss": 0.3318, "step": 10429 }, { "epoch": 0.47205250056573883, "grad_norm": 0.3668308493260659, "learning_rate": 5.69402679321676e-06, "loss": 0.4975, "step": 10430 }, { "epoch": 0.4720977596741344, "grad_norm": 0.6125920894562785, "learning_rate": 5.693300959653214e-06, "loss": 0.3537, "step": 10431 }, { "epoch": 0.47214301878253, "grad_norm": 0.6896258816056711, "learning_rate": 5.69257511119276e-06, "loss": 0.346, "step": 10432 }, { "epoch": 0.47218827789092555, "grad_norm": 0.29626764972290515, "learning_rate": 5.691849247850993e-06, "loss": 0.4737, "step": 10433 }, { "epoch": 0.47223353699932114, "grad_norm": 0.6294195346534802, "learning_rate": 5.691123369643511e-06, "loss": 0.2943, "step": 10434 }, { "epoch": 0.47227879610771667, "grad_norm": 0.6396559300429897, "learning_rate": 5.690397476585909e-06, "loss": 0.3178, "step": 10435 }, { "epoch": 0.47232405521611226, "grad_norm": 0.6148819531156378, "learning_rate": 5.689671568693788e-06, "loss": 0.3441, "step": 10436 }, { "epoch": 0.4723693143245078, "grad_norm": 0.6441787478142561, "learning_rate": 5.688945645982743e-06, "loss": 0.3151, "step": 10437 }, { "epoch": 0.4724145734329034, "grad_norm": 0.6019671802140769, "learning_rate": 5.68821970846837e-06, "loss": 0.3661, "step": 10438 }, { "epoch": 0.4724598325412989, "grad_norm": 0.4033827542244026, "learning_rate": 5.687493756166272e-06, "loss": 0.4804, "step": 10439 }, { "epoch": 0.4725050916496945, "grad_norm": 0.6473518747090413, "learning_rate": 5.686767789092041e-06, "loss": 0.3211, "step": 10440 }, { "epoch": 0.47255035075809004, "grad_norm": 0.3296715058419835, "learning_rate": 5.6860418072612826e-06, "loss": 0.4899, "step": 10441 }, { "epoch": 0.47259560986648563, "grad_norm": 0.270224074842136, "learning_rate": 5.6853158106895915e-06, "loss": 0.4893, "step": 10442 }, { "epoch": 0.47264086897488117, "grad_norm": 0.657635254692437, "learning_rate": 5.684589799392568e-06, "loss": 0.3276, "step": 10443 }, { "epoch": 0.47268612808327676, "grad_norm": 0.6170343378678285, "learning_rate": 5.683863773385813e-06, "loss": 0.3139, "step": 10444 }, { "epoch": 0.47273138719167235, "grad_norm": 0.6288497648807237, "learning_rate": 5.683137732684926e-06, "loss": 0.2783, "step": 10445 }, { "epoch": 0.4727766463000679, "grad_norm": 0.42879652640310395, "learning_rate": 5.682411677305506e-06, "loss": 0.4675, "step": 10446 }, { "epoch": 0.47282190540846347, "grad_norm": 0.666824200507803, "learning_rate": 5.681685607263156e-06, "loss": 0.3025, "step": 10447 }, { "epoch": 0.472867164516859, "grad_norm": 0.6456795722225278, "learning_rate": 5.680959522573476e-06, "loss": 0.3202, "step": 10448 }, { "epoch": 0.4729124236252546, "grad_norm": 0.6255694791465786, "learning_rate": 5.680233423252066e-06, "loss": 0.358, "step": 10449 }, { "epoch": 0.47295768273365013, "grad_norm": 0.6549822291733464, "learning_rate": 5.67950730931453e-06, "loss": 0.298, "step": 10450 }, { "epoch": 0.4730029418420457, "grad_norm": 0.3193477388383573, "learning_rate": 5.678781180776469e-06, "loss": 0.4755, "step": 10451 }, { "epoch": 0.47304820095044126, "grad_norm": 0.6423122948962887, "learning_rate": 5.678055037653485e-06, "loss": 0.3221, "step": 10452 }, { "epoch": 0.47309346005883685, "grad_norm": 0.5848128098939198, "learning_rate": 5.677328879961182e-06, "loss": 0.3089, "step": 10453 }, { "epoch": 0.4731387191672324, "grad_norm": 0.617186467546983, "learning_rate": 5.676602707715159e-06, "loss": 0.3378, "step": 10454 }, { "epoch": 0.47318397827562797, "grad_norm": 0.3529437506726879, "learning_rate": 5.675876520931023e-06, "loss": 0.4806, "step": 10455 }, { "epoch": 0.47322923738402356, "grad_norm": 0.6428562390362067, "learning_rate": 5.675150319624375e-06, "loss": 0.3185, "step": 10456 }, { "epoch": 0.4732744964924191, "grad_norm": 0.637439086908186, "learning_rate": 5.674424103810822e-06, "loss": 0.3537, "step": 10457 }, { "epoch": 0.4733197556008147, "grad_norm": 0.6085350660379343, "learning_rate": 5.6736978735059665e-06, "loss": 0.3302, "step": 10458 }, { "epoch": 0.4733650147092102, "grad_norm": 0.6298430509677303, "learning_rate": 5.672971628725412e-06, "loss": 0.3168, "step": 10459 }, { "epoch": 0.4734102738176058, "grad_norm": 0.6593200109445425, "learning_rate": 5.672245369484765e-06, "loss": 0.3543, "step": 10460 }, { "epoch": 0.47345553292600134, "grad_norm": 0.6021724432032531, "learning_rate": 5.671519095799629e-06, "loss": 0.3174, "step": 10461 }, { "epoch": 0.47350079203439693, "grad_norm": 0.38837544330242485, "learning_rate": 5.67079280768561e-06, "loss": 0.4881, "step": 10462 }, { "epoch": 0.47354605114279247, "grad_norm": 0.635804739256016, "learning_rate": 5.670066505158314e-06, "loss": 0.3139, "step": 10463 }, { "epoch": 0.47359131025118806, "grad_norm": 0.6266395671735902, "learning_rate": 5.6693401882333455e-06, "loss": 0.3154, "step": 10464 }, { "epoch": 0.4736365693595836, "grad_norm": 0.6620216762722568, "learning_rate": 5.668613856926312e-06, "loss": 0.3305, "step": 10465 }, { "epoch": 0.4736818284679792, "grad_norm": 1.1799266727472142, "learning_rate": 5.667887511252823e-06, "loss": 0.3249, "step": 10466 }, { "epoch": 0.4737270875763748, "grad_norm": 0.6204130806355646, "learning_rate": 5.667161151228481e-06, "loss": 0.3139, "step": 10467 }, { "epoch": 0.4737723466847703, "grad_norm": 0.6310237088814322, "learning_rate": 5.666434776868895e-06, "loss": 0.322, "step": 10468 }, { "epoch": 0.4738176057931659, "grad_norm": 0.6381134611880385, "learning_rate": 5.665708388189672e-06, "loss": 0.3484, "step": 10469 }, { "epoch": 0.47386286490156143, "grad_norm": 0.5889469271310414, "learning_rate": 5.664981985206421e-06, "loss": 0.2972, "step": 10470 }, { "epoch": 0.473908124009957, "grad_norm": 0.711647404359138, "learning_rate": 5.664255567934749e-06, "loss": 0.3813, "step": 10471 }, { "epoch": 0.47395338311835256, "grad_norm": 0.636677445282444, "learning_rate": 5.663529136390264e-06, "loss": 0.3075, "step": 10472 }, { "epoch": 0.47399864222674815, "grad_norm": 0.6477771180653775, "learning_rate": 5.662802690588578e-06, "loss": 0.3174, "step": 10473 }, { "epoch": 0.4740439013351437, "grad_norm": 0.6224392332441187, "learning_rate": 5.662076230545297e-06, "loss": 0.3416, "step": 10474 }, { "epoch": 0.47408916044353927, "grad_norm": 0.6727880059139598, "learning_rate": 5.66134975627603e-06, "loss": 0.3335, "step": 10475 }, { "epoch": 0.4741344195519348, "grad_norm": 0.6891150407798442, "learning_rate": 5.660623267796389e-06, "loss": 0.3144, "step": 10476 }, { "epoch": 0.4741796786603304, "grad_norm": 0.6435883187225452, "learning_rate": 5.659896765121982e-06, "loss": 0.3266, "step": 10477 }, { "epoch": 0.47422493776872593, "grad_norm": 0.608530341011818, "learning_rate": 5.659170248268422e-06, "loss": 0.3437, "step": 10478 }, { "epoch": 0.4742701968771215, "grad_norm": 0.7542906131639765, "learning_rate": 5.658443717251316e-06, "loss": 0.3254, "step": 10479 }, { "epoch": 0.4743154559855171, "grad_norm": 0.6917075448860384, "learning_rate": 5.657717172086278e-06, "loss": 0.3608, "step": 10480 }, { "epoch": 0.47436071509391264, "grad_norm": 0.5847839847757341, "learning_rate": 5.656990612788918e-06, "loss": 0.3296, "step": 10481 }, { "epoch": 0.47440597420230823, "grad_norm": 0.5982518729555611, "learning_rate": 5.656264039374846e-06, "loss": 0.3042, "step": 10482 }, { "epoch": 0.47445123331070377, "grad_norm": 0.5898479533174593, "learning_rate": 5.6555374518596765e-06, "loss": 0.2949, "step": 10483 }, { "epoch": 0.47449649241909936, "grad_norm": 0.6297858839636593, "learning_rate": 5.654810850259021e-06, "loss": 0.3386, "step": 10484 }, { "epoch": 0.4745417515274949, "grad_norm": 0.6019311826617614, "learning_rate": 5.65408423458849e-06, "loss": 0.3221, "step": 10485 }, { "epoch": 0.4745870106358905, "grad_norm": 0.6191628764874518, "learning_rate": 5.653357604863698e-06, "loss": 0.318, "step": 10486 }, { "epoch": 0.474632269744286, "grad_norm": 0.3862071876986101, "learning_rate": 5.65263096110026e-06, "loss": 0.4458, "step": 10487 }, { "epoch": 0.4746775288526816, "grad_norm": 0.32403615101505817, "learning_rate": 5.651904303313784e-06, "loss": 0.4774, "step": 10488 }, { "epoch": 0.47472278796107714, "grad_norm": 0.6090597584329148, "learning_rate": 5.6511776315198886e-06, "loss": 0.336, "step": 10489 }, { "epoch": 0.47476804706947273, "grad_norm": 0.6378578217907716, "learning_rate": 5.650450945734185e-06, "loss": 0.3159, "step": 10490 }, { "epoch": 0.4748133061778683, "grad_norm": 0.7852386464362128, "learning_rate": 5.649724245972288e-06, "loss": 0.3137, "step": 10491 }, { "epoch": 0.47485856528626386, "grad_norm": 0.7013033187362679, "learning_rate": 5.6489975322498124e-06, "loss": 0.3738, "step": 10492 }, { "epoch": 0.47490382439465945, "grad_norm": 0.6168953929319065, "learning_rate": 5.6482708045823734e-06, "loss": 0.3624, "step": 10493 }, { "epoch": 0.474949083503055, "grad_norm": 0.6178354670293249, "learning_rate": 5.647544062985586e-06, "loss": 0.3126, "step": 10494 }, { "epoch": 0.47499434261145057, "grad_norm": 0.6318537399545678, "learning_rate": 5.646817307475066e-06, "loss": 0.31, "step": 10495 }, { "epoch": 0.4750396017198461, "grad_norm": 0.644458175134238, "learning_rate": 5.646090538066426e-06, "loss": 0.2877, "step": 10496 }, { "epoch": 0.4750848608282417, "grad_norm": 0.642582769804473, "learning_rate": 5.645363754775288e-06, "loss": 0.3776, "step": 10497 }, { "epoch": 0.47513011993663723, "grad_norm": 0.7798488205437791, "learning_rate": 5.644636957617264e-06, "loss": 0.3314, "step": 10498 }, { "epoch": 0.4751753790450328, "grad_norm": 0.4828585852711643, "learning_rate": 5.643910146607972e-06, "loss": 0.5135, "step": 10499 }, { "epoch": 0.47522063815342835, "grad_norm": 0.40002404946581677, "learning_rate": 5.643183321763027e-06, "loss": 0.4605, "step": 10500 }, { "epoch": 0.47526589726182394, "grad_norm": 0.6665771337235158, "learning_rate": 5.642456483098049e-06, "loss": 0.3191, "step": 10501 }, { "epoch": 0.47531115637021953, "grad_norm": 0.6147364813893643, "learning_rate": 5.641729630628654e-06, "loss": 0.2882, "step": 10502 }, { "epoch": 0.47535641547861507, "grad_norm": 0.6099779425362699, "learning_rate": 5.641002764370461e-06, "loss": 0.2919, "step": 10503 }, { "epoch": 0.47540167458701066, "grad_norm": 0.43910766007800495, "learning_rate": 5.6402758843390844e-06, "loss": 0.4744, "step": 10504 }, { "epoch": 0.4754469336954062, "grad_norm": 0.6479746229972967, "learning_rate": 5.63954899055015e-06, "loss": 0.3509, "step": 10505 }, { "epoch": 0.4754921928038018, "grad_norm": 0.6720997314016489, "learning_rate": 5.638822083019267e-06, "loss": 0.318, "step": 10506 }, { "epoch": 0.4755374519121973, "grad_norm": 0.644017282983267, "learning_rate": 5.638095161762064e-06, "loss": 0.3371, "step": 10507 }, { "epoch": 0.4755827110205929, "grad_norm": 0.6956189399848878, "learning_rate": 5.637368226794153e-06, "loss": 0.3441, "step": 10508 }, { "epoch": 0.47562797012898844, "grad_norm": 0.7176511786720804, "learning_rate": 5.6366412781311575e-06, "loss": 0.2998, "step": 10509 }, { "epoch": 0.47567322923738403, "grad_norm": 0.6742388327178932, "learning_rate": 5.635914315788695e-06, "loss": 0.3256, "step": 10510 }, { "epoch": 0.47571848834577957, "grad_norm": 0.3869309011976393, "learning_rate": 5.635187339782389e-06, "loss": 0.4666, "step": 10511 }, { "epoch": 0.47576374745417516, "grad_norm": 0.6723727392135527, "learning_rate": 5.634460350127855e-06, "loss": 0.3445, "step": 10512 }, { "epoch": 0.4758090065625707, "grad_norm": 0.7130082870729761, "learning_rate": 5.633733346840719e-06, "loss": 0.3382, "step": 10513 }, { "epoch": 0.4758542656709663, "grad_norm": 0.6353001641146917, "learning_rate": 5.633006329936599e-06, "loss": 0.3402, "step": 10514 }, { "epoch": 0.47589952477936187, "grad_norm": 0.6832213982688691, "learning_rate": 5.632279299431117e-06, "loss": 0.3894, "step": 10515 }, { "epoch": 0.4759447838877574, "grad_norm": 0.6601553798734152, "learning_rate": 5.631552255339896e-06, "loss": 0.3305, "step": 10516 }, { "epoch": 0.475990042996153, "grad_norm": 0.5967921800961582, "learning_rate": 5.630825197678556e-06, "loss": 0.3038, "step": 10517 }, { "epoch": 0.47603530210454853, "grad_norm": 0.6301548350456041, "learning_rate": 5.630098126462719e-06, "loss": 0.3315, "step": 10518 }, { "epoch": 0.4760805612129441, "grad_norm": 0.32232962909444457, "learning_rate": 5.629371041708009e-06, "loss": 0.4891, "step": 10519 }, { "epoch": 0.47612582032133965, "grad_norm": 0.6719022010093874, "learning_rate": 5.6286439434300476e-06, "loss": 0.3032, "step": 10520 }, { "epoch": 0.47617107942973524, "grad_norm": 0.3066219988017249, "learning_rate": 5.627916831644459e-06, "loss": 0.4913, "step": 10521 }, { "epoch": 0.4762163385381308, "grad_norm": 0.7207359042047378, "learning_rate": 5.627189706366866e-06, "loss": 0.3651, "step": 10522 }, { "epoch": 0.47626159764652637, "grad_norm": 0.37989185098569295, "learning_rate": 5.626462567612892e-06, "loss": 0.4776, "step": 10523 }, { "epoch": 0.4763068567549219, "grad_norm": 0.647918593072506, "learning_rate": 5.625735415398164e-06, "loss": 0.3261, "step": 10524 }, { "epoch": 0.4763521158633175, "grad_norm": 0.6128445967722367, "learning_rate": 5.625008249738301e-06, "loss": 0.3424, "step": 10525 }, { "epoch": 0.4763973749717131, "grad_norm": 0.6504907307873667, "learning_rate": 5.624281070648933e-06, "loss": 0.2983, "step": 10526 }, { "epoch": 0.4764426340801086, "grad_norm": 0.6773751990955243, "learning_rate": 5.623553878145679e-06, "loss": 0.326, "step": 10527 }, { "epoch": 0.4764878931885042, "grad_norm": 0.6813088755616269, "learning_rate": 5.622826672244169e-06, "loss": 0.3685, "step": 10528 }, { "epoch": 0.47653315229689974, "grad_norm": 0.599714585578754, "learning_rate": 5.622099452960027e-06, "loss": 0.3429, "step": 10529 }, { "epoch": 0.47657841140529533, "grad_norm": 0.6266975657425706, "learning_rate": 5.621372220308877e-06, "loss": 0.3029, "step": 10530 }, { "epoch": 0.47662367051369087, "grad_norm": 0.638104136678306, "learning_rate": 5.620644974306347e-06, "loss": 0.3379, "step": 10531 }, { "epoch": 0.47666892962208646, "grad_norm": 0.5813658371373994, "learning_rate": 5.619917714968064e-06, "loss": 0.3223, "step": 10532 }, { "epoch": 0.476714188730482, "grad_norm": 0.6488863979765176, "learning_rate": 5.619190442309651e-06, "loss": 0.3755, "step": 10533 }, { "epoch": 0.4767594478388776, "grad_norm": 0.6424566258027432, "learning_rate": 5.61846315634674e-06, "loss": 0.3545, "step": 10534 }, { "epoch": 0.4768047069472731, "grad_norm": 0.6801460304007487, "learning_rate": 5.617735857094951e-06, "loss": 0.3029, "step": 10535 }, { "epoch": 0.4768499660556687, "grad_norm": 0.5846304119121462, "learning_rate": 5.61700854456992e-06, "loss": 0.2941, "step": 10536 }, { "epoch": 0.47689522516406424, "grad_norm": 0.6947565676732286, "learning_rate": 5.616281218787268e-06, "loss": 0.3463, "step": 10537 }, { "epoch": 0.47694048427245983, "grad_norm": 0.588970540066978, "learning_rate": 5.6155538797626254e-06, "loss": 0.2924, "step": 10538 }, { "epoch": 0.4769857433808554, "grad_norm": 0.5913635705662535, "learning_rate": 5.614826527511621e-06, "loss": 0.3402, "step": 10539 }, { "epoch": 0.47703100248925095, "grad_norm": 0.6070944121687863, "learning_rate": 5.614099162049883e-06, "loss": 0.3182, "step": 10540 }, { "epoch": 0.47707626159764654, "grad_norm": 0.6484577073713257, "learning_rate": 5.613371783393039e-06, "loss": 0.3275, "step": 10541 }, { "epoch": 0.4771215207060421, "grad_norm": 0.4262153304553133, "learning_rate": 5.612644391556721e-06, "loss": 0.4919, "step": 10542 }, { "epoch": 0.47716677981443767, "grad_norm": 0.3873556240946461, "learning_rate": 5.611916986556555e-06, "loss": 0.4972, "step": 10543 }, { "epoch": 0.4772120389228332, "grad_norm": 0.6849575773347174, "learning_rate": 5.611189568408173e-06, "loss": 0.3236, "step": 10544 }, { "epoch": 0.4772572980312288, "grad_norm": 0.5726982431800233, "learning_rate": 5.610462137127205e-06, "loss": 0.2837, "step": 10545 }, { "epoch": 0.47730255713962433, "grad_norm": 0.4824256200216908, "learning_rate": 5.609734692729278e-06, "loss": 0.5073, "step": 10546 }, { "epoch": 0.4773478162480199, "grad_norm": 0.6105993805076046, "learning_rate": 5.609007235230029e-06, "loss": 0.3515, "step": 10547 }, { "epoch": 0.47739307535641545, "grad_norm": 0.5849218911471216, "learning_rate": 5.60827976464508e-06, "loss": 0.3263, "step": 10548 }, { "epoch": 0.47743833446481104, "grad_norm": 0.6010479395747507, "learning_rate": 5.607552280990071e-06, "loss": 0.3844, "step": 10549 }, { "epoch": 0.47748359357320663, "grad_norm": 0.5929995553550435, "learning_rate": 5.606824784280629e-06, "loss": 0.3451, "step": 10550 }, { "epoch": 0.47752885268160217, "grad_norm": 0.5829228500969121, "learning_rate": 5.606097274532385e-06, "loss": 0.3029, "step": 10551 }, { "epoch": 0.47757411178999776, "grad_norm": 0.6339342525556836, "learning_rate": 5.6053697517609725e-06, "loss": 0.3454, "step": 10552 }, { "epoch": 0.4776193708983933, "grad_norm": 0.6276833410613624, "learning_rate": 5.604642215982025e-06, "loss": 0.2935, "step": 10553 }, { "epoch": 0.4776646300067889, "grad_norm": 0.6209626862316334, "learning_rate": 5.60391466721117e-06, "loss": 0.3491, "step": 10554 }, { "epoch": 0.4777098891151844, "grad_norm": 0.6916101322205742, "learning_rate": 5.603187105464045e-06, "loss": 0.3242, "step": 10555 }, { "epoch": 0.47775514822358, "grad_norm": 0.5816352573986318, "learning_rate": 5.6024595307562815e-06, "loss": 0.3542, "step": 10556 }, { "epoch": 0.47780040733197554, "grad_norm": 0.47474496295445967, "learning_rate": 5.601731943103515e-06, "loss": 0.4782, "step": 10557 }, { "epoch": 0.47784566644037113, "grad_norm": 0.41798351738900213, "learning_rate": 5.601004342521374e-06, "loss": 0.4784, "step": 10558 }, { "epoch": 0.47789092554876667, "grad_norm": 0.33629964526700756, "learning_rate": 5.6002767290254975e-06, "loss": 0.4703, "step": 10559 }, { "epoch": 0.47793618465716226, "grad_norm": 0.5380229410130636, "learning_rate": 5.599549102631516e-06, "loss": 0.284, "step": 10560 }, { "epoch": 0.47798144376555785, "grad_norm": 0.8051297804005851, "learning_rate": 5.598821463355069e-06, "loss": 0.3106, "step": 10561 }, { "epoch": 0.4780267028739534, "grad_norm": 0.5170642766936449, "learning_rate": 5.598093811211785e-06, "loss": 0.4833, "step": 10562 }, { "epoch": 0.47807196198234897, "grad_norm": 0.5963256229323173, "learning_rate": 5.597366146217303e-06, "loss": 0.2974, "step": 10563 }, { "epoch": 0.4781172210907445, "grad_norm": 0.6599750262075692, "learning_rate": 5.596638468387255e-06, "loss": 0.3371, "step": 10564 }, { "epoch": 0.4781624801991401, "grad_norm": 0.9292747092655834, "learning_rate": 5.595910777737281e-06, "loss": 0.297, "step": 10565 }, { "epoch": 0.47820773930753563, "grad_norm": 0.5795709443727703, "learning_rate": 5.5951830742830145e-06, "loss": 0.482, "step": 10566 }, { "epoch": 0.4782529984159312, "grad_norm": 0.6727811870569353, "learning_rate": 5.594455358040091e-06, "loss": 0.3363, "step": 10567 }, { "epoch": 0.47829825752432675, "grad_norm": 0.657959114215397, "learning_rate": 5.5937276290241486e-06, "loss": 0.3043, "step": 10568 }, { "epoch": 0.47834351663272234, "grad_norm": 0.6521324485995212, "learning_rate": 5.5929998872508215e-06, "loss": 0.36, "step": 10569 }, { "epoch": 0.4783887757411179, "grad_norm": 0.5943329694166126, "learning_rate": 5.592272132735749e-06, "loss": 0.34, "step": 10570 }, { "epoch": 0.47843403484951347, "grad_norm": 0.6612431740464342, "learning_rate": 5.591544365494567e-06, "loss": 0.3467, "step": 10571 }, { "epoch": 0.478479293957909, "grad_norm": 0.6059079878953904, "learning_rate": 5.590816585542913e-06, "loss": 0.2876, "step": 10572 }, { "epoch": 0.4785245530663046, "grad_norm": 0.4460620888325759, "learning_rate": 5.590088792896427e-06, "loss": 0.4569, "step": 10573 }, { "epoch": 0.4785698121747002, "grad_norm": 0.6267320935882589, "learning_rate": 5.589360987570745e-06, "loss": 0.3438, "step": 10574 }, { "epoch": 0.4786150712830957, "grad_norm": 0.6944370493131874, "learning_rate": 5.588633169581502e-06, "loss": 0.3631, "step": 10575 }, { "epoch": 0.4786603303914913, "grad_norm": 0.5718505842229085, "learning_rate": 5.5879053389443435e-06, "loss": 0.3306, "step": 10576 }, { "epoch": 0.47870558949988684, "grad_norm": 0.654911543789487, "learning_rate": 5.587177495674902e-06, "loss": 0.3318, "step": 10577 }, { "epoch": 0.47875084860828243, "grad_norm": 0.38311412169455544, "learning_rate": 5.586449639788822e-06, "loss": 0.459, "step": 10578 }, { "epoch": 0.47879610771667797, "grad_norm": 0.296067994927196, "learning_rate": 5.5857217713017394e-06, "loss": 0.4551, "step": 10579 }, { "epoch": 0.47884136682507356, "grad_norm": 0.6393993738142472, "learning_rate": 5.584993890229296e-06, "loss": 0.3378, "step": 10580 }, { "epoch": 0.4788866259334691, "grad_norm": 0.6430280142761382, "learning_rate": 5.584265996587129e-06, "loss": 0.3513, "step": 10581 }, { "epoch": 0.4789318850418647, "grad_norm": 0.6254186892649977, "learning_rate": 5.583538090390882e-06, "loss": 0.3143, "step": 10582 }, { "epoch": 0.4789771441502602, "grad_norm": 0.6131120850240496, "learning_rate": 5.582810171656191e-06, "loss": 0.3176, "step": 10583 }, { "epoch": 0.4790224032586558, "grad_norm": 0.6319043952102459, "learning_rate": 5.582082240398702e-06, "loss": 0.2955, "step": 10584 }, { "epoch": 0.4790676623670514, "grad_norm": 0.6220473199791497, "learning_rate": 5.5813542966340514e-06, "loss": 0.3094, "step": 10585 }, { "epoch": 0.47911292147544693, "grad_norm": 0.6886628697806193, "learning_rate": 5.580626340377884e-06, "loss": 0.347, "step": 10586 }, { "epoch": 0.4791581805838425, "grad_norm": 0.6260532435901625, "learning_rate": 5.579898371645839e-06, "loss": 0.3447, "step": 10587 }, { "epoch": 0.47920343969223805, "grad_norm": 0.6086583527185936, "learning_rate": 5.5791703904535584e-06, "loss": 0.3037, "step": 10588 }, { "epoch": 0.47924869880063364, "grad_norm": 0.41963576368214195, "learning_rate": 5.578442396816685e-06, "loss": 0.4868, "step": 10589 }, { "epoch": 0.4792939579090292, "grad_norm": 0.8103420874521906, "learning_rate": 5.577714390750862e-06, "loss": 0.3631, "step": 10590 }, { "epoch": 0.47933921701742477, "grad_norm": 0.8064511619876602, "learning_rate": 5.576986372271731e-06, "loss": 0.3607, "step": 10591 }, { "epoch": 0.4793844761258203, "grad_norm": 0.31885495383354473, "learning_rate": 5.576258341394936e-06, "loss": 0.4842, "step": 10592 }, { "epoch": 0.4794297352342159, "grad_norm": 0.6218266829354264, "learning_rate": 5.575530298136116e-06, "loss": 0.3272, "step": 10593 }, { "epoch": 0.4794749943426114, "grad_norm": 0.704607964592815, "learning_rate": 5.574802242510921e-06, "loss": 0.3527, "step": 10594 }, { "epoch": 0.479520253451007, "grad_norm": 0.6220004783827727, "learning_rate": 5.574074174534989e-06, "loss": 0.3301, "step": 10595 }, { "epoch": 0.4795655125594026, "grad_norm": 0.3650889133524078, "learning_rate": 5.573346094223966e-06, "loss": 0.5055, "step": 10596 }, { "epoch": 0.47961077166779814, "grad_norm": 0.35017989979027714, "learning_rate": 5.5726180015934976e-06, "loss": 0.4825, "step": 10597 }, { "epoch": 0.47965603077619373, "grad_norm": 0.5869712702415046, "learning_rate": 5.571889896659225e-06, "loss": 0.2814, "step": 10598 }, { "epoch": 0.47970128988458927, "grad_norm": 0.814425945966293, "learning_rate": 5.571161779436797e-06, "loss": 0.3136, "step": 10599 }, { "epoch": 0.47974654899298486, "grad_norm": 0.6462944916736755, "learning_rate": 5.570433649941855e-06, "loss": 0.3181, "step": 10600 }, { "epoch": 0.4797918081013804, "grad_norm": 0.293109634291855, "learning_rate": 5.5697055081900465e-06, "loss": 0.508, "step": 10601 }, { "epoch": 0.479837067209776, "grad_norm": 0.5914173537503168, "learning_rate": 5.568977354197016e-06, "loss": 0.3507, "step": 10602 }, { "epoch": 0.4798823263181715, "grad_norm": 0.6150955968191683, "learning_rate": 5.568249187978412e-06, "loss": 0.3485, "step": 10603 }, { "epoch": 0.4799275854265671, "grad_norm": 0.32318681448665276, "learning_rate": 5.567521009549874e-06, "loss": 0.4621, "step": 10604 }, { "epoch": 0.47997284453496264, "grad_norm": 0.7618492618904363, "learning_rate": 5.566792818927056e-06, "loss": 0.3452, "step": 10605 }, { "epoch": 0.48001810364335823, "grad_norm": 0.6691405458194113, "learning_rate": 5.566064616125599e-06, "loss": 0.3312, "step": 10606 }, { "epoch": 0.48006336275175376, "grad_norm": 0.6048012011493709, "learning_rate": 5.565336401161153e-06, "loss": 0.3358, "step": 10607 }, { "epoch": 0.48010862186014935, "grad_norm": 0.3210420083699887, "learning_rate": 5.564608174049364e-06, "loss": 0.4653, "step": 10608 }, { "epoch": 0.48015388096854494, "grad_norm": 0.6075408372209316, "learning_rate": 5.5638799348058795e-06, "loss": 0.3502, "step": 10609 }, { "epoch": 0.4801991400769405, "grad_norm": 0.30198056087813385, "learning_rate": 5.563151683446346e-06, "loss": 0.4962, "step": 10610 }, { "epoch": 0.48024439918533607, "grad_norm": 0.6459826113080686, "learning_rate": 5.562423419986415e-06, "loss": 0.3455, "step": 10611 }, { "epoch": 0.4802896582937316, "grad_norm": 0.26951371419713627, "learning_rate": 5.561695144441729e-06, "loss": 0.457, "step": 10612 }, { "epoch": 0.4803349174021272, "grad_norm": 0.624369197658816, "learning_rate": 5.5609668568279415e-06, "loss": 0.3443, "step": 10613 }, { "epoch": 0.4803801765105227, "grad_norm": 0.6414411152346636, "learning_rate": 5.560238557160698e-06, "loss": 0.301, "step": 10614 }, { "epoch": 0.4804254356189183, "grad_norm": 0.7575620000309918, "learning_rate": 5.559510245455649e-06, "loss": 0.3197, "step": 10615 }, { "epoch": 0.48047069472731385, "grad_norm": 0.6573322938311212, "learning_rate": 5.558781921728443e-06, "loss": 0.3071, "step": 10616 }, { "epoch": 0.48051595383570944, "grad_norm": 0.7094660698496261, "learning_rate": 5.558053585994729e-06, "loss": 0.3711, "step": 10617 }, { "epoch": 0.480561212944105, "grad_norm": 0.6309626577717088, "learning_rate": 5.557325238270158e-06, "loss": 0.3501, "step": 10618 }, { "epoch": 0.48060647205250057, "grad_norm": 0.5516977033728775, "learning_rate": 5.5565968785703795e-06, "loss": 0.4785, "step": 10619 }, { "epoch": 0.48065173116089616, "grad_norm": 0.6460332565278315, "learning_rate": 5.5558685069110444e-06, "loss": 0.3558, "step": 10620 }, { "epoch": 0.4806969902692917, "grad_norm": 0.6748996044819889, "learning_rate": 5.5551401233078e-06, "loss": 0.3051, "step": 10621 }, { "epoch": 0.4807422493776873, "grad_norm": 0.6158170111840895, "learning_rate": 5.554411727776301e-06, "loss": 0.3576, "step": 10622 }, { "epoch": 0.4807875084860828, "grad_norm": 0.6420214315777303, "learning_rate": 5.553683320332196e-06, "loss": 0.3463, "step": 10623 }, { "epoch": 0.4808327675944784, "grad_norm": 0.6657582922286736, "learning_rate": 5.552954900991139e-06, "loss": 0.3237, "step": 10624 }, { "epoch": 0.48087802670287394, "grad_norm": 0.2967738912280458, "learning_rate": 5.552226469768777e-06, "loss": 0.5012, "step": 10625 }, { "epoch": 0.48092328581126953, "grad_norm": 0.6290390226298818, "learning_rate": 5.551498026680766e-06, "loss": 0.3351, "step": 10626 }, { "epoch": 0.48096854491966506, "grad_norm": 0.6340394446650136, "learning_rate": 5.550769571742755e-06, "loss": 0.3395, "step": 10627 }, { "epoch": 0.48101380402806065, "grad_norm": 0.6845926201588363, "learning_rate": 5.550041104970398e-06, "loss": 0.3375, "step": 10628 }, { "epoch": 0.4810590631364562, "grad_norm": 0.637658919649331, "learning_rate": 5.5493126263793465e-06, "loss": 0.3517, "step": 10629 }, { "epoch": 0.4811043222448518, "grad_norm": 0.6112584722416579, "learning_rate": 5.548584135985253e-06, "loss": 0.3642, "step": 10630 }, { "epoch": 0.48114958135324737, "grad_norm": 0.29810893204105327, "learning_rate": 5.547855633803773e-06, "loss": 0.464, "step": 10631 }, { "epoch": 0.4811948404616429, "grad_norm": 0.2780123534377946, "learning_rate": 5.547127119850557e-06, "loss": 0.4612, "step": 10632 }, { "epoch": 0.4812400995700385, "grad_norm": 0.7624268380123517, "learning_rate": 5.546398594141259e-06, "loss": 0.319, "step": 10633 }, { "epoch": 0.481285358678434, "grad_norm": 0.6333384073958104, "learning_rate": 5.545670056691535e-06, "loss": 0.3073, "step": 10634 }, { "epoch": 0.4813306177868296, "grad_norm": 0.6160571792514077, "learning_rate": 5.544941507517036e-06, "loss": 0.324, "step": 10635 }, { "epoch": 0.48137587689522515, "grad_norm": 0.8222033027521287, "learning_rate": 5.544212946633418e-06, "loss": 0.3533, "step": 10636 }, { "epoch": 0.48142113600362074, "grad_norm": 0.3127573927438028, "learning_rate": 5.543484374056336e-06, "loss": 0.4877, "step": 10637 }, { "epoch": 0.4814663951120163, "grad_norm": 0.30309197025768586, "learning_rate": 5.542755789801442e-06, "loss": 0.4931, "step": 10638 }, { "epoch": 0.48151165422041187, "grad_norm": 0.7781188479398874, "learning_rate": 5.542027193884395e-06, "loss": 0.3357, "step": 10639 }, { "epoch": 0.4815569133288074, "grad_norm": 0.6058750001614668, "learning_rate": 5.541298586320848e-06, "loss": 0.3053, "step": 10640 }, { "epoch": 0.481602172437203, "grad_norm": 0.7037355994647424, "learning_rate": 5.540569967126457e-06, "loss": 0.3496, "step": 10641 }, { "epoch": 0.4816474315455985, "grad_norm": 0.2824906190415698, "learning_rate": 5.539841336316878e-06, "loss": 0.4922, "step": 10642 }, { "epoch": 0.4816926906539941, "grad_norm": 0.6428244763588142, "learning_rate": 5.539112693907765e-06, "loss": 0.3252, "step": 10643 }, { "epoch": 0.4817379497623897, "grad_norm": 0.8841811706079792, "learning_rate": 5.538384039914777e-06, "loss": 0.3226, "step": 10644 }, { "epoch": 0.48178320887078524, "grad_norm": 0.687525201512995, "learning_rate": 5.53765537435357e-06, "loss": 0.2941, "step": 10645 }, { "epoch": 0.48182846797918083, "grad_norm": 0.27305283829515287, "learning_rate": 5.536926697239799e-06, "loss": 0.4843, "step": 10646 }, { "epoch": 0.48187372708757636, "grad_norm": 0.2857701176133048, "learning_rate": 5.536198008589123e-06, "loss": 0.4813, "step": 10647 }, { "epoch": 0.48191898619597195, "grad_norm": 0.6667747799625429, "learning_rate": 5.535469308417198e-06, "loss": 0.3229, "step": 10648 }, { "epoch": 0.4819642453043675, "grad_norm": 0.2865961501721208, "learning_rate": 5.5347405967396825e-06, "loss": 0.4841, "step": 10649 }, { "epoch": 0.4820095044127631, "grad_norm": 0.6549681438750801, "learning_rate": 5.534011873572235e-06, "loss": 0.3061, "step": 10650 }, { "epoch": 0.4820547635211586, "grad_norm": 0.3055832816720461, "learning_rate": 5.533283138930511e-06, "loss": 0.4667, "step": 10651 }, { "epoch": 0.4821000226295542, "grad_norm": 0.288024145984902, "learning_rate": 5.532554392830171e-06, "loss": 0.4667, "step": 10652 }, { "epoch": 0.48214528173794974, "grad_norm": 0.6439717321302626, "learning_rate": 5.531825635286872e-06, "loss": 0.2995, "step": 10653 }, { "epoch": 0.48219054084634533, "grad_norm": 0.7070447804383497, "learning_rate": 5.531096866316273e-06, "loss": 0.3659, "step": 10654 }, { "epoch": 0.4822357999547409, "grad_norm": 0.6090117333768033, "learning_rate": 5.530368085934036e-06, "loss": 0.3284, "step": 10655 }, { "epoch": 0.48228105906313645, "grad_norm": 0.649044910192023, "learning_rate": 5.529639294155815e-06, "loss": 0.3244, "step": 10656 }, { "epoch": 0.48232631817153204, "grad_norm": 0.6588154605789093, "learning_rate": 5.528910490997275e-06, "loss": 0.3237, "step": 10657 }, { "epoch": 0.4823715772799276, "grad_norm": 0.6840278031937022, "learning_rate": 5.528181676474071e-06, "loss": 0.3157, "step": 10658 }, { "epoch": 0.48241683638832317, "grad_norm": 0.7609837095962596, "learning_rate": 5.527452850601864e-06, "loss": 0.3481, "step": 10659 }, { "epoch": 0.4824620954967187, "grad_norm": 0.766232069555319, "learning_rate": 5.526724013396317e-06, "loss": 0.3152, "step": 10660 }, { "epoch": 0.4825073546051143, "grad_norm": 0.870320948169904, "learning_rate": 5.5259951648730885e-06, "loss": 0.2826, "step": 10661 }, { "epoch": 0.4825526137135098, "grad_norm": 0.7347925460790161, "learning_rate": 5.525266305047838e-06, "loss": 0.3707, "step": 10662 }, { "epoch": 0.4825978728219054, "grad_norm": 0.6507573047099565, "learning_rate": 5.52453743393623e-06, "loss": 0.3407, "step": 10663 }, { "epoch": 0.48264313193030095, "grad_norm": 0.6773965666426126, "learning_rate": 5.523808551553922e-06, "loss": 0.3185, "step": 10664 }, { "epoch": 0.48268839103869654, "grad_norm": 0.726493244938831, "learning_rate": 5.523079657916578e-06, "loss": 0.3734, "step": 10665 }, { "epoch": 0.4827336501470921, "grad_norm": 0.7505689732391188, "learning_rate": 5.522350753039858e-06, "loss": 0.3267, "step": 10666 }, { "epoch": 0.48277890925548766, "grad_norm": 0.6373008470479439, "learning_rate": 5.521621836939424e-06, "loss": 0.3462, "step": 10667 }, { "epoch": 0.48282416836388325, "grad_norm": 0.656718497793086, "learning_rate": 5.520892909630939e-06, "loss": 0.347, "step": 10668 }, { "epoch": 0.4828694274722788, "grad_norm": 0.7163210793168534, "learning_rate": 5.520163971130066e-06, "loss": 0.3255, "step": 10669 }, { "epoch": 0.4829146865806744, "grad_norm": 0.8900367229797369, "learning_rate": 5.519435021452466e-06, "loss": 0.3682, "step": 10670 }, { "epoch": 0.4829599456890699, "grad_norm": 0.6803573374057947, "learning_rate": 5.518706060613805e-06, "loss": 0.3681, "step": 10671 }, { "epoch": 0.4830052047974655, "grad_norm": 0.41030123308996586, "learning_rate": 5.5179770886297405e-06, "loss": 0.483, "step": 10672 }, { "epoch": 0.48305046390586104, "grad_norm": 0.6613579069249995, "learning_rate": 5.517248105515941e-06, "loss": 0.3629, "step": 10673 }, { "epoch": 0.48309572301425663, "grad_norm": 0.7609906339983002, "learning_rate": 5.5165191112880674e-06, "loss": 0.313, "step": 10674 }, { "epoch": 0.48314098212265216, "grad_norm": 0.6434155323688646, "learning_rate": 5.515790105961785e-06, "loss": 0.3746, "step": 10675 }, { "epoch": 0.48318624123104775, "grad_norm": 0.6547562892095287, "learning_rate": 5.515061089552758e-06, "loss": 0.3438, "step": 10676 }, { "epoch": 0.4832315003394433, "grad_norm": 0.6847596676297534, "learning_rate": 5.514332062076649e-06, "loss": 0.362, "step": 10677 }, { "epoch": 0.4832767594478389, "grad_norm": 0.6315597904582344, "learning_rate": 5.513603023549124e-06, "loss": 0.3041, "step": 10678 }, { "epoch": 0.48332201855623447, "grad_norm": 0.9328268878726889, "learning_rate": 5.512873973985847e-06, "loss": 0.3337, "step": 10679 }, { "epoch": 0.48336727766463, "grad_norm": 0.6916736855340677, "learning_rate": 5.512144913402485e-06, "loss": 0.3443, "step": 10680 }, { "epoch": 0.4834125367730256, "grad_norm": 0.6105004747411769, "learning_rate": 5.5114158418147005e-06, "loss": 0.3758, "step": 10681 }, { "epoch": 0.4834577958814211, "grad_norm": 0.6197122850557252, "learning_rate": 5.51068675923816e-06, "loss": 0.348, "step": 10682 }, { "epoch": 0.4835030549898167, "grad_norm": 0.6173874073638086, "learning_rate": 5.50995766568853e-06, "loss": 0.296, "step": 10683 }, { "epoch": 0.48354831409821225, "grad_norm": 0.7139824073742472, "learning_rate": 5.509228561181476e-06, "loss": 0.3266, "step": 10684 }, { "epoch": 0.48359357320660784, "grad_norm": 0.6004396779219929, "learning_rate": 5.508499445732664e-06, "loss": 0.3495, "step": 10685 }, { "epoch": 0.4836388323150034, "grad_norm": 0.3708540694305002, "learning_rate": 5.507770319357762e-06, "loss": 0.4929, "step": 10686 }, { "epoch": 0.48368409142339897, "grad_norm": 0.6407768627436697, "learning_rate": 5.507041182072434e-06, "loss": 0.3306, "step": 10687 }, { "epoch": 0.4837293505317945, "grad_norm": 0.6358955180217345, "learning_rate": 5.506312033892348e-06, "loss": 0.3176, "step": 10688 }, { "epoch": 0.4837746096401901, "grad_norm": 0.6569460673575273, "learning_rate": 5.505582874833172e-06, "loss": 0.3283, "step": 10689 }, { "epoch": 0.4838198687485857, "grad_norm": 0.644320307937128, "learning_rate": 5.5048537049105725e-06, "loss": 0.3068, "step": 10690 }, { "epoch": 0.4838651278569812, "grad_norm": 0.7781673045915627, "learning_rate": 5.504124524140218e-06, "loss": 0.3419, "step": 10691 }, { "epoch": 0.4839103869653768, "grad_norm": 0.6220391667566061, "learning_rate": 5.503395332537775e-06, "loss": 0.3007, "step": 10692 }, { "epoch": 0.48395564607377234, "grad_norm": 0.6090721522407699, "learning_rate": 5.502666130118912e-06, "loss": 0.3194, "step": 10693 }, { "epoch": 0.48400090518216793, "grad_norm": 0.6498239850687649, "learning_rate": 5.501936916899299e-06, "loss": 0.3303, "step": 10694 }, { "epoch": 0.48404616429056346, "grad_norm": 0.5225679995389199, "learning_rate": 5.5012076928946035e-06, "loss": 0.504, "step": 10695 }, { "epoch": 0.48409142339895905, "grad_norm": 0.6396807582547848, "learning_rate": 5.500478458120493e-06, "loss": 0.3156, "step": 10696 }, { "epoch": 0.4841366825073546, "grad_norm": 0.6745967780966945, "learning_rate": 5.499749212592638e-06, "loss": 0.3393, "step": 10697 }, { "epoch": 0.4841819416157502, "grad_norm": 0.5948531132657466, "learning_rate": 5.499019956326707e-06, "loss": 0.3375, "step": 10698 }, { "epoch": 0.4842272007241457, "grad_norm": 0.7187557241178281, "learning_rate": 5.498290689338369e-06, "loss": 0.365, "step": 10699 }, { "epoch": 0.4842724598325413, "grad_norm": 0.6229183218355234, "learning_rate": 5.497561411643295e-06, "loss": 0.3192, "step": 10700 }, { "epoch": 0.48431771894093684, "grad_norm": 0.65019446915603, "learning_rate": 5.496832123257154e-06, "loss": 0.3165, "step": 10701 }, { "epoch": 0.4843629780493324, "grad_norm": 0.6392369740491995, "learning_rate": 5.496102824195618e-06, "loss": 0.3165, "step": 10702 }, { "epoch": 0.484408237157728, "grad_norm": 0.6310505782519518, "learning_rate": 5.495373514474356e-06, "loss": 0.3322, "step": 10703 }, { "epoch": 0.48445349626612355, "grad_norm": 0.3597350218284494, "learning_rate": 5.494644194109037e-06, "loss": 0.4691, "step": 10704 }, { "epoch": 0.48449875537451914, "grad_norm": 0.7423058383703874, "learning_rate": 5.493914863115334e-06, "loss": 0.3297, "step": 10705 }, { "epoch": 0.4845440144829147, "grad_norm": 0.6461765373067897, "learning_rate": 5.493185521508918e-06, "loss": 0.3338, "step": 10706 }, { "epoch": 0.48458927359131027, "grad_norm": 0.6258937227965276, "learning_rate": 5.492456169305459e-06, "loss": 0.3471, "step": 10707 }, { "epoch": 0.4846345326997058, "grad_norm": 0.6575981123752621, "learning_rate": 5.49172680652063e-06, "loss": 0.2936, "step": 10708 }, { "epoch": 0.4846797918081014, "grad_norm": 0.6326251364558374, "learning_rate": 5.490997433170102e-06, "loss": 0.3563, "step": 10709 }, { "epoch": 0.4847250509164969, "grad_norm": 0.621611401740867, "learning_rate": 5.490268049269547e-06, "loss": 0.3364, "step": 10710 }, { "epoch": 0.4847703100248925, "grad_norm": 0.6495453676007767, "learning_rate": 5.489538654834638e-06, "loss": 0.3161, "step": 10711 }, { "epoch": 0.48481556913328805, "grad_norm": 0.7567639136682123, "learning_rate": 5.488809249881046e-06, "loss": 0.3478, "step": 10712 }, { "epoch": 0.48486082824168364, "grad_norm": 0.6084504260324419, "learning_rate": 5.488079834424446e-06, "loss": 0.3391, "step": 10713 }, { "epoch": 0.48490608735007923, "grad_norm": 0.6199904202505113, "learning_rate": 5.487350408480507e-06, "loss": 0.3693, "step": 10714 }, { "epoch": 0.48495134645847476, "grad_norm": 0.694471474371993, "learning_rate": 5.486620972064907e-06, "loss": 0.3143, "step": 10715 }, { "epoch": 0.48499660556687035, "grad_norm": 0.6589954660090999, "learning_rate": 5.485891525193316e-06, "loss": 0.3475, "step": 10716 }, { "epoch": 0.4850418646752659, "grad_norm": 0.7121177870279708, "learning_rate": 5.485162067881407e-06, "loss": 0.3641, "step": 10717 }, { "epoch": 0.4850871237836615, "grad_norm": 0.6038698862707603, "learning_rate": 5.484432600144857e-06, "loss": 0.2968, "step": 10718 }, { "epoch": 0.485132382892057, "grad_norm": 0.3526127088415516, "learning_rate": 5.483703121999337e-06, "loss": 0.4761, "step": 10719 }, { "epoch": 0.4851776420004526, "grad_norm": 0.6175444607040002, "learning_rate": 5.482973633460524e-06, "loss": 0.3395, "step": 10720 }, { "epoch": 0.48522290110884814, "grad_norm": 0.6735534842917275, "learning_rate": 5.48224413454409e-06, "loss": 0.3059, "step": 10721 }, { "epoch": 0.4852681602172437, "grad_norm": 0.6806734732368638, "learning_rate": 5.481514625265709e-06, "loss": 0.3289, "step": 10722 }, { "epoch": 0.48531341932563926, "grad_norm": 0.6232723889284854, "learning_rate": 5.480785105641061e-06, "loss": 0.3057, "step": 10723 }, { "epoch": 0.48535867843403485, "grad_norm": 0.3112196698409816, "learning_rate": 5.480055575685815e-06, "loss": 0.4743, "step": 10724 }, { "epoch": 0.48540393754243044, "grad_norm": 0.6491875086620946, "learning_rate": 5.479326035415651e-06, "loss": 0.3451, "step": 10725 }, { "epoch": 0.485449196650826, "grad_norm": 0.5653696631158297, "learning_rate": 5.47859648484624e-06, "loss": 0.3236, "step": 10726 }, { "epoch": 0.48549445575922157, "grad_norm": 0.6438329748116453, "learning_rate": 5.477866923993262e-06, "loss": 0.3604, "step": 10727 }, { "epoch": 0.4855397148676171, "grad_norm": 0.6606980186635552, "learning_rate": 5.477137352872393e-06, "loss": 0.3349, "step": 10728 }, { "epoch": 0.4855849739760127, "grad_norm": 0.6504920369442497, "learning_rate": 5.476407771499305e-06, "loss": 0.3571, "step": 10729 }, { "epoch": 0.4856302330844082, "grad_norm": 0.6187465532547302, "learning_rate": 5.475678179889678e-06, "loss": 0.3178, "step": 10730 }, { "epoch": 0.4856754921928038, "grad_norm": 0.618073859499965, "learning_rate": 5.474948578059188e-06, "loss": 0.3314, "step": 10731 }, { "epoch": 0.48572075130119935, "grad_norm": 0.60108084486261, "learning_rate": 5.474218966023512e-06, "loss": 0.2919, "step": 10732 }, { "epoch": 0.48576601040959494, "grad_norm": 0.6285127287027362, "learning_rate": 5.473489343798327e-06, "loss": 0.3514, "step": 10733 }, { "epoch": 0.4858112695179905, "grad_norm": 0.5864677323792765, "learning_rate": 5.472759711399311e-06, "loss": 0.302, "step": 10734 }, { "epoch": 0.48585652862638606, "grad_norm": 0.6108680980516022, "learning_rate": 5.472030068842139e-06, "loss": 0.3118, "step": 10735 }, { "epoch": 0.4859017877347816, "grad_norm": 0.6878846950476892, "learning_rate": 5.471300416142492e-06, "loss": 0.3982, "step": 10736 }, { "epoch": 0.4859470468431772, "grad_norm": 0.5736098866508658, "learning_rate": 5.470570753316046e-06, "loss": 0.3123, "step": 10737 }, { "epoch": 0.4859923059515728, "grad_norm": 0.39332779899616394, "learning_rate": 5.469841080378479e-06, "loss": 0.486, "step": 10738 }, { "epoch": 0.4860375650599683, "grad_norm": 0.6134887660443886, "learning_rate": 5.469111397345471e-06, "loss": 0.2917, "step": 10739 }, { "epoch": 0.4860828241683639, "grad_norm": 0.616320225429893, "learning_rate": 5.468381704232699e-06, "loss": 0.3249, "step": 10740 }, { "epoch": 0.48612808327675944, "grad_norm": 0.30036757827431215, "learning_rate": 5.467652001055844e-06, "loss": 0.4679, "step": 10741 }, { "epoch": 0.486173342385155, "grad_norm": 0.627407516553, "learning_rate": 5.466922287830584e-06, "loss": 0.314, "step": 10742 }, { "epoch": 0.48621860149355056, "grad_norm": 0.6642704155718921, "learning_rate": 5.466192564572597e-06, "loss": 0.36, "step": 10743 }, { "epoch": 0.48626386060194615, "grad_norm": 0.6556948496626056, "learning_rate": 5.465462831297564e-06, "loss": 0.3316, "step": 10744 }, { "epoch": 0.4863091197103417, "grad_norm": 0.3150740101579165, "learning_rate": 5.464733088021165e-06, "loss": 0.4845, "step": 10745 }, { "epoch": 0.4863543788187373, "grad_norm": 0.3502713140017343, "learning_rate": 5.464003334759077e-06, "loss": 0.4992, "step": 10746 }, { "epoch": 0.4863996379271328, "grad_norm": 0.675623605474163, "learning_rate": 5.463273571526985e-06, "loss": 0.324, "step": 10747 }, { "epoch": 0.4864448970355284, "grad_norm": 0.6686506686394733, "learning_rate": 5.462543798340565e-06, "loss": 0.3361, "step": 10748 }, { "epoch": 0.486490156143924, "grad_norm": 0.6719577676699698, "learning_rate": 5.4618140152155e-06, "loss": 0.3557, "step": 10749 }, { "epoch": 0.4865354152523195, "grad_norm": 0.625166400770162, "learning_rate": 5.461084222167471e-06, "loss": 0.3191, "step": 10750 }, { "epoch": 0.4865806743607151, "grad_norm": 0.6150470753407132, "learning_rate": 5.460354419212156e-06, "loss": 0.3423, "step": 10751 }, { "epoch": 0.48662593346911065, "grad_norm": 0.6496271771479472, "learning_rate": 5.4596246063652405e-06, "loss": 0.3481, "step": 10752 }, { "epoch": 0.48667119257750624, "grad_norm": 0.35633811231314305, "learning_rate": 5.458894783642402e-06, "loss": 0.4824, "step": 10753 }, { "epoch": 0.4867164516859018, "grad_norm": 0.6279048917219101, "learning_rate": 5.458164951059326e-06, "loss": 0.3248, "step": 10754 }, { "epoch": 0.48676171079429736, "grad_norm": 0.8146324246424269, "learning_rate": 5.457435108631691e-06, "loss": 0.3321, "step": 10755 }, { "epoch": 0.4868069699026929, "grad_norm": 0.6477127892981791, "learning_rate": 5.456705256375181e-06, "loss": 0.2981, "step": 10756 }, { "epoch": 0.4868522290110885, "grad_norm": 0.6579613350271492, "learning_rate": 5.455975394305477e-06, "loss": 0.3271, "step": 10757 }, { "epoch": 0.486897488119484, "grad_norm": 0.5833393576827078, "learning_rate": 5.455245522438263e-06, "loss": 0.3102, "step": 10758 }, { "epoch": 0.4869427472278796, "grad_norm": 0.6247727229746702, "learning_rate": 5.4545156407892204e-06, "loss": 0.3344, "step": 10759 }, { "epoch": 0.48698800633627515, "grad_norm": 0.689951121413877, "learning_rate": 5.453785749374033e-06, "loss": 0.2856, "step": 10760 }, { "epoch": 0.48703326544467074, "grad_norm": 0.64134476580352, "learning_rate": 5.453055848208383e-06, "loss": 0.348, "step": 10761 }, { "epoch": 0.4870785245530663, "grad_norm": 0.6136023603777939, "learning_rate": 5.452325937307955e-06, "loss": 0.3274, "step": 10762 }, { "epoch": 0.48712378366146186, "grad_norm": 0.3744441172659562, "learning_rate": 5.4515960166884315e-06, "loss": 0.4868, "step": 10763 }, { "epoch": 0.48716904276985745, "grad_norm": 0.6212186162849846, "learning_rate": 5.450866086365496e-06, "loss": 0.3281, "step": 10764 }, { "epoch": 0.487214301878253, "grad_norm": 0.2842456058853913, "learning_rate": 5.450136146354834e-06, "loss": 0.4618, "step": 10765 }, { "epoch": 0.4872595609866486, "grad_norm": 0.6214680943609159, "learning_rate": 5.449406196672129e-06, "loss": 0.3403, "step": 10766 }, { "epoch": 0.4873048200950441, "grad_norm": 0.6344077106540689, "learning_rate": 5.448676237333064e-06, "loss": 0.3273, "step": 10767 }, { "epoch": 0.4873500792034397, "grad_norm": 0.7736892506859431, "learning_rate": 5.447946268353324e-06, "loss": 0.3384, "step": 10768 }, { "epoch": 0.48739533831183524, "grad_norm": 0.6034017418949362, "learning_rate": 5.447216289748596e-06, "loss": 0.296, "step": 10769 }, { "epoch": 0.4874405974202308, "grad_norm": 0.6784861654016594, "learning_rate": 5.446486301534564e-06, "loss": 0.3702, "step": 10770 }, { "epoch": 0.48748585652862636, "grad_norm": 0.6188537207264507, "learning_rate": 5.445756303726913e-06, "loss": 0.2995, "step": 10771 }, { "epoch": 0.48753111563702195, "grad_norm": 0.7374548514598774, "learning_rate": 5.445026296341325e-06, "loss": 0.3689, "step": 10772 }, { "epoch": 0.48757637474541754, "grad_norm": 0.5612933828257421, "learning_rate": 5.44429627939349e-06, "loss": 0.3288, "step": 10773 }, { "epoch": 0.4876216338538131, "grad_norm": 0.5757263653169219, "learning_rate": 5.443566252899093e-06, "loss": 0.3163, "step": 10774 }, { "epoch": 0.48766689296220866, "grad_norm": 0.6066722995925015, "learning_rate": 5.442836216873819e-06, "loss": 0.3361, "step": 10775 }, { "epoch": 0.4877121520706042, "grad_norm": 0.6216811471150734, "learning_rate": 5.442106171333355e-06, "loss": 0.3517, "step": 10776 }, { "epoch": 0.4877574111789998, "grad_norm": 0.6070444961735875, "learning_rate": 5.441376116293388e-06, "loss": 0.3058, "step": 10777 }, { "epoch": 0.4878026702873953, "grad_norm": 0.4419608326387728, "learning_rate": 5.4406460517696035e-06, "loss": 0.4778, "step": 10778 }, { "epoch": 0.4878479293957909, "grad_norm": 0.6615232986768225, "learning_rate": 5.439915977777689e-06, "loss": 0.297, "step": 10779 }, { "epoch": 0.48789318850418645, "grad_norm": 0.6232505080950455, "learning_rate": 5.43918589433333e-06, "loss": 0.287, "step": 10780 }, { "epoch": 0.48793844761258204, "grad_norm": 0.6065556060776699, "learning_rate": 5.438455801452216e-06, "loss": 0.3024, "step": 10781 }, { "epoch": 0.4879837067209776, "grad_norm": 0.2888300571280535, "learning_rate": 5.437725699150031e-06, "loss": 0.4714, "step": 10782 }, { "epoch": 0.48802896582937316, "grad_norm": 0.6129089740283804, "learning_rate": 5.43699558744247e-06, "loss": 0.3645, "step": 10783 }, { "epoch": 0.48807422493776875, "grad_norm": 0.6321064368855184, "learning_rate": 5.4362654663452115e-06, "loss": 0.3244, "step": 10784 }, { "epoch": 0.4881194840461643, "grad_norm": 0.6236913706818845, "learning_rate": 5.435535335873951e-06, "loss": 0.3237, "step": 10785 }, { "epoch": 0.4881647431545599, "grad_norm": 0.6794406833180068, "learning_rate": 5.434805196044372e-06, "loss": 0.3217, "step": 10786 }, { "epoch": 0.4882100022629554, "grad_norm": 0.6015193977905489, "learning_rate": 5.434075046872165e-06, "loss": 0.3482, "step": 10787 }, { "epoch": 0.488255261371351, "grad_norm": 0.7173251165967416, "learning_rate": 5.4333448883730176e-06, "loss": 0.3236, "step": 10788 }, { "epoch": 0.48830052047974654, "grad_norm": 0.5847772835682842, "learning_rate": 5.432614720562621e-06, "loss": 0.3191, "step": 10789 }, { "epoch": 0.4883457795881421, "grad_norm": 0.564475806463531, "learning_rate": 5.431884543456662e-06, "loss": 0.3342, "step": 10790 }, { "epoch": 0.48839103869653766, "grad_norm": 0.3581390264447621, "learning_rate": 5.43115435707083e-06, "loss": 0.4891, "step": 10791 }, { "epoch": 0.48843629780493325, "grad_norm": 0.31725716739829074, "learning_rate": 5.430424161420817e-06, "loss": 0.4979, "step": 10792 }, { "epoch": 0.4884815569133288, "grad_norm": 0.6605082182680612, "learning_rate": 5.429693956522308e-06, "loss": 0.3351, "step": 10793 }, { "epoch": 0.4885268160217244, "grad_norm": 0.6187867157877088, "learning_rate": 5.428963742390998e-06, "loss": 0.276, "step": 10794 }, { "epoch": 0.4885720751301199, "grad_norm": 0.6339357376584758, "learning_rate": 5.428233519042574e-06, "loss": 0.3166, "step": 10795 }, { "epoch": 0.4886173342385155, "grad_norm": 0.6775069897284436, "learning_rate": 5.427503286492727e-06, "loss": 0.3244, "step": 10796 }, { "epoch": 0.4886625933469111, "grad_norm": 0.5769499742725703, "learning_rate": 5.426773044757146e-06, "loss": 0.3392, "step": 10797 }, { "epoch": 0.4887078524553066, "grad_norm": 0.6189106653236469, "learning_rate": 5.426042793851525e-06, "loss": 0.3598, "step": 10798 }, { "epoch": 0.4887531115637022, "grad_norm": 0.4653135340607896, "learning_rate": 5.4253125337915514e-06, "loss": 0.4796, "step": 10799 }, { "epoch": 0.48879837067209775, "grad_norm": 0.5879197568638728, "learning_rate": 5.424582264592919e-06, "loss": 0.325, "step": 10800 }, { "epoch": 0.48884362978049334, "grad_norm": 0.6698195305676378, "learning_rate": 5.423851986271316e-06, "loss": 0.3432, "step": 10801 }, { "epoch": 0.4888888888888889, "grad_norm": 0.5785312045865859, "learning_rate": 5.423121698842437e-06, "loss": 0.3195, "step": 10802 }, { "epoch": 0.48893414799728446, "grad_norm": 0.6450008538019762, "learning_rate": 5.422391402321971e-06, "loss": 0.317, "step": 10803 }, { "epoch": 0.48897940710568, "grad_norm": 0.6335218164038602, "learning_rate": 5.421661096725612e-06, "loss": 0.3354, "step": 10804 }, { "epoch": 0.4890246662140756, "grad_norm": 0.5882818683954184, "learning_rate": 5.42093078206905e-06, "loss": 0.3516, "step": 10805 }, { "epoch": 0.4890699253224711, "grad_norm": 0.6605788221478196, "learning_rate": 5.42020045836798e-06, "loss": 0.3559, "step": 10806 }, { "epoch": 0.4891151844308667, "grad_norm": 0.6831599714525608, "learning_rate": 5.419470125638091e-06, "loss": 0.3454, "step": 10807 }, { "epoch": 0.4891604435392623, "grad_norm": 0.820649625790173, "learning_rate": 5.418739783895079e-06, "loss": 0.3969, "step": 10808 }, { "epoch": 0.48920570264765784, "grad_norm": 0.5944023432998939, "learning_rate": 5.418009433154633e-06, "loss": 0.3134, "step": 10809 }, { "epoch": 0.4892509617560534, "grad_norm": 0.6119284376862187, "learning_rate": 5.41727907343245e-06, "loss": 0.3646, "step": 10810 }, { "epoch": 0.48929622086444896, "grad_norm": 0.33465584922221014, "learning_rate": 5.41654870474422e-06, "loss": 0.5179, "step": 10811 }, { "epoch": 0.48934147997284455, "grad_norm": 0.6614144868552617, "learning_rate": 5.4158183271056385e-06, "loss": 0.3378, "step": 10812 }, { "epoch": 0.4893867390812401, "grad_norm": 0.2797843285100049, "learning_rate": 5.415087940532398e-06, "loss": 0.4535, "step": 10813 }, { "epoch": 0.4894319981896357, "grad_norm": 0.6000761307553717, "learning_rate": 5.414357545040193e-06, "loss": 0.3209, "step": 10814 }, { "epoch": 0.4894772572980312, "grad_norm": 0.6367329656813222, "learning_rate": 5.413627140644716e-06, "loss": 0.3127, "step": 10815 }, { "epoch": 0.4895225164064268, "grad_norm": 0.6852619243332301, "learning_rate": 5.412896727361663e-06, "loss": 0.3298, "step": 10816 }, { "epoch": 0.48956777551482233, "grad_norm": 0.6331187290160614, "learning_rate": 5.4121663052067265e-06, "loss": 0.2659, "step": 10817 }, { "epoch": 0.4896130346232179, "grad_norm": 0.6319135130209138, "learning_rate": 5.411435874195602e-06, "loss": 0.3313, "step": 10818 }, { "epoch": 0.4896582937316135, "grad_norm": 0.6181685095469309, "learning_rate": 5.410705434343985e-06, "loss": 0.3094, "step": 10819 }, { "epoch": 0.48970355284000905, "grad_norm": 0.5959127812339418, "learning_rate": 5.409974985667569e-06, "loss": 0.2987, "step": 10820 }, { "epoch": 0.48974881194840464, "grad_norm": 0.6046669266642816, "learning_rate": 5.409244528182051e-06, "loss": 0.3174, "step": 10821 }, { "epoch": 0.4897940710568002, "grad_norm": 0.6079813229979458, "learning_rate": 5.408514061903123e-06, "loss": 0.32, "step": 10822 }, { "epoch": 0.48983933016519576, "grad_norm": 0.6102670213077448, "learning_rate": 5.407783586846484e-06, "loss": 0.3443, "step": 10823 }, { "epoch": 0.4898845892735913, "grad_norm": 0.6393248736433033, "learning_rate": 5.407053103027826e-06, "loss": 0.3698, "step": 10824 }, { "epoch": 0.4899298483819869, "grad_norm": 0.36085845063400557, "learning_rate": 5.40632261046285e-06, "loss": 0.4866, "step": 10825 }, { "epoch": 0.4899751074903824, "grad_norm": 0.6483175296284592, "learning_rate": 5.405592109167247e-06, "loss": 0.3598, "step": 10826 }, { "epoch": 0.490020366598778, "grad_norm": 0.6434207102297304, "learning_rate": 5.404861599156715e-06, "loss": 0.2913, "step": 10827 }, { "epoch": 0.49006562570717355, "grad_norm": 0.6106852104150455, "learning_rate": 5.404131080446952e-06, "loss": 0.328, "step": 10828 }, { "epoch": 0.49011088481556914, "grad_norm": 0.5672917334952595, "learning_rate": 5.403400553053654e-06, "loss": 0.3257, "step": 10829 }, { "epoch": 0.49015614392396467, "grad_norm": 0.5955844527154185, "learning_rate": 5.402670016992514e-06, "loss": 0.3027, "step": 10830 }, { "epoch": 0.49020140303236026, "grad_norm": 0.6209346722321435, "learning_rate": 5.401939472279235e-06, "loss": 0.3474, "step": 10831 }, { "epoch": 0.49024666214075585, "grad_norm": 0.5779865260605683, "learning_rate": 5.401208918929509e-06, "loss": 0.2958, "step": 10832 }, { "epoch": 0.4902919212491514, "grad_norm": 0.6041288922310544, "learning_rate": 5.400478356959037e-06, "loss": 0.3225, "step": 10833 }, { "epoch": 0.490337180357547, "grad_norm": 0.5982865724226629, "learning_rate": 5.399747786383515e-06, "loss": 0.3329, "step": 10834 }, { "epoch": 0.4903824394659425, "grad_norm": 0.6113716925853488, "learning_rate": 5.39901720721864e-06, "loss": 0.3717, "step": 10835 }, { "epoch": 0.4904276985743381, "grad_norm": 0.6375109781082008, "learning_rate": 5.398286619480111e-06, "loss": 0.3493, "step": 10836 }, { "epoch": 0.49047295768273363, "grad_norm": 0.6280684294342922, "learning_rate": 5.397556023183627e-06, "loss": 0.3364, "step": 10837 }, { "epoch": 0.4905182167911292, "grad_norm": 0.632665793054112, "learning_rate": 5.396825418344883e-06, "loss": 0.3083, "step": 10838 }, { "epoch": 0.49056347589952476, "grad_norm": 0.35018151725738517, "learning_rate": 5.39609480497958e-06, "loss": 0.4969, "step": 10839 }, { "epoch": 0.49060873500792035, "grad_norm": 0.3135080903253166, "learning_rate": 5.395364183103418e-06, "loss": 0.4889, "step": 10840 }, { "epoch": 0.4906539941163159, "grad_norm": 0.6470675002316936, "learning_rate": 5.394633552732091e-06, "loss": 0.3122, "step": 10841 }, { "epoch": 0.4906992532247115, "grad_norm": 0.6641179250395611, "learning_rate": 5.393902913881304e-06, "loss": 0.3127, "step": 10842 }, { "epoch": 0.49074451233310706, "grad_norm": 0.6837994695623595, "learning_rate": 5.393172266566751e-06, "loss": 0.2972, "step": 10843 }, { "epoch": 0.4907897714415026, "grad_norm": 0.57976310001045, "learning_rate": 5.392441610804135e-06, "loss": 0.3528, "step": 10844 }, { "epoch": 0.4908350305498982, "grad_norm": 0.6808006276626665, "learning_rate": 5.391710946609152e-06, "loss": 0.3338, "step": 10845 }, { "epoch": 0.4908802896582937, "grad_norm": 0.6162543213535839, "learning_rate": 5.390980273997507e-06, "loss": 0.3185, "step": 10846 }, { "epoch": 0.4909255487666893, "grad_norm": 0.4064457148287276, "learning_rate": 5.390249592984894e-06, "loss": 0.4859, "step": 10847 }, { "epoch": 0.49097080787508485, "grad_norm": 0.6968576972450587, "learning_rate": 5.389518903587016e-06, "loss": 0.3389, "step": 10848 }, { "epoch": 0.49101606698348044, "grad_norm": 0.5918707805528982, "learning_rate": 5.388788205819575e-06, "loss": 0.3141, "step": 10849 }, { "epoch": 0.49106132609187597, "grad_norm": 0.6700538372595888, "learning_rate": 5.38805749969827e-06, "loss": 0.3083, "step": 10850 }, { "epoch": 0.49110658520027156, "grad_norm": 0.5692254658928143, "learning_rate": 5.387326785238798e-06, "loss": 0.3105, "step": 10851 }, { "epoch": 0.4911518443086671, "grad_norm": 0.7663508797023766, "learning_rate": 5.386596062456865e-06, "loss": 0.3289, "step": 10852 }, { "epoch": 0.4911971034170627, "grad_norm": 0.7226289709380682, "learning_rate": 5.385865331368169e-06, "loss": 0.329, "step": 10853 }, { "epoch": 0.4912423625254583, "grad_norm": 0.6834416027758144, "learning_rate": 5.385134591988412e-06, "loss": 0.3443, "step": 10854 }, { "epoch": 0.4912876216338538, "grad_norm": 0.6554737185398497, "learning_rate": 5.384403844333297e-06, "loss": 0.3413, "step": 10855 }, { "epoch": 0.4913328807422494, "grad_norm": 0.6095854602314272, "learning_rate": 5.383673088418523e-06, "loss": 0.3498, "step": 10856 }, { "epoch": 0.49137813985064493, "grad_norm": 0.6254987626059457, "learning_rate": 5.382942324259792e-06, "loss": 0.3505, "step": 10857 }, { "epoch": 0.4914233989590405, "grad_norm": 0.600840135233815, "learning_rate": 5.382211551872808e-06, "loss": 0.3281, "step": 10858 }, { "epoch": 0.49146865806743606, "grad_norm": 0.7062759844203941, "learning_rate": 5.38148077127327e-06, "loss": 0.3398, "step": 10859 }, { "epoch": 0.49151391717583165, "grad_norm": 0.6182077333196346, "learning_rate": 5.380749982476884e-06, "loss": 0.3508, "step": 10860 }, { "epoch": 0.4915591762842272, "grad_norm": 0.6250150814729192, "learning_rate": 5.380019185499348e-06, "loss": 0.3335, "step": 10861 }, { "epoch": 0.4916044353926228, "grad_norm": 0.644831247436958, "learning_rate": 5.379288380356369e-06, "loss": 0.3034, "step": 10862 }, { "epoch": 0.4916496945010183, "grad_norm": 0.6896682788376114, "learning_rate": 5.378557567063646e-06, "loss": 0.3236, "step": 10863 }, { "epoch": 0.4916949536094139, "grad_norm": 0.6116663195157774, "learning_rate": 5.3778267456368836e-06, "loss": 0.3136, "step": 10864 }, { "epoch": 0.49174021271780943, "grad_norm": 0.686230452116269, "learning_rate": 5.377095916091786e-06, "loss": 0.4097, "step": 10865 }, { "epoch": 0.491785471826205, "grad_norm": 0.6313581308698523, "learning_rate": 5.376365078444053e-06, "loss": 0.3125, "step": 10866 }, { "epoch": 0.4918307309346006, "grad_norm": 0.6151971347833678, "learning_rate": 5.375634232709392e-06, "loss": 0.3094, "step": 10867 }, { "epoch": 0.49187599004299615, "grad_norm": 0.6493310508228657, "learning_rate": 5.374903378903506e-06, "loss": 0.3611, "step": 10868 }, { "epoch": 0.49192124915139174, "grad_norm": 0.624810363256607, "learning_rate": 5.374172517042095e-06, "loss": 0.3345, "step": 10869 }, { "epoch": 0.49196650825978727, "grad_norm": 0.620486668426, "learning_rate": 5.373441647140868e-06, "loss": 0.3351, "step": 10870 }, { "epoch": 0.49201176736818286, "grad_norm": 0.6993162835016654, "learning_rate": 5.372710769215528e-06, "loss": 0.3263, "step": 10871 }, { "epoch": 0.4920570264765784, "grad_norm": 0.6195139398422571, "learning_rate": 5.371979883281775e-06, "loss": 0.3029, "step": 10872 }, { "epoch": 0.492102285584974, "grad_norm": 0.6278648577458785, "learning_rate": 5.37124898935532e-06, "loss": 0.3384, "step": 10873 }, { "epoch": 0.4921475446933695, "grad_norm": 0.3736919014627429, "learning_rate": 5.370518087451861e-06, "loss": 0.4994, "step": 10874 }, { "epoch": 0.4921928038017651, "grad_norm": 0.31140376218161614, "learning_rate": 5.36978717758711e-06, "loss": 0.467, "step": 10875 }, { "epoch": 0.49223806291016065, "grad_norm": 0.6264908346834731, "learning_rate": 5.369056259776766e-06, "loss": 0.3101, "step": 10876 }, { "epoch": 0.49228332201855624, "grad_norm": 0.6107193434356737, "learning_rate": 5.368325334036537e-06, "loss": 0.3226, "step": 10877 }, { "epoch": 0.4923285811269518, "grad_norm": 0.6140612466191963, "learning_rate": 5.367594400382128e-06, "loss": 0.3546, "step": 10878 }, { "epoch": 0.49237384023534736, "grad_norm": 0.6609013483376353, "learning_rate": 5.366863458829245e-06, "loss": 0.3197, "step": 10879 }, { "epoch": 0.49241909934374295, "grad_norm": 0.6760753743411584, "learning_rate": 5.36613250939359e-06, "loss": 0.3521, "step": 10880 }, { "epoch": 0.4924643584521385, "grad_norm": 0.6654052260248408, "learning_rate": 5.365401552090876e-06, "loss": 0.3776, "step": 10881 }, { "epoch": 0.4925096175605341, "grad_norm": 0.6076148237516632, "learning_rate": 5.364670586936801e-06, "loss": 0.3614, "step": 10882 }, { "epoch": 0.4925548766689296, "grad_norm": 0.6324147862809746, "learning_rate": 5.363939613947078e-06, "loss": 0.3794, "step": 10883 }, { "epoch": 0.4926001357773252, "grad_norm": 0.674557660483406, "learning_rate": 5.363208633137409e-06, "loss": 0.3159, "step": 10884 }, { "epoch": 0.49264539488572073, "grad_norm": 0.6074914429601798, "learning_rate": 5.3624776445235025e-06, "loss": 0.3391, "step": 10885 }, { "epoch": 0.4926906539941163, "grad_norm": 0.6394413452281954, "learning_rate": 5.361746648121064e-06, "loss": 0.3425, "step": 10886 }, { "epoch": 0.49273591310251186, "grad_norm": 0.5797041229041443, "learning_rate": 5.361015643945803e-06, "loss": 0.3405, "step": 10887 }, { "epoch": 0.49278117221090745, "grad_norm": 0.6373249682044135, "learning_rate": 5.3602846320134216e-06, "loss": 0.3199, "step": 10888 }, { "epoch": 0.492826431319303, "grad_norm": 0.6562131440044063, "learning_rate": 5.359553612339633e-06, "loss": 0.2873, "step": 10889 }, { "epoch": 0.49287169042769857, "grad_norm": 0.7090555888596984, "learning_rate": 5.358822584940139e-06, "loss": 0.3358, "step": 10890 }, { "epoch": 0.49291694953609416, "grad_norm": 0.605339486263966, "learning_rate": 5.358091549830651e-06, "loss": 0.336, "step": 10891 }, { "epoch": 0.4929622086444897, "grad_norm": 0.6203741456837942, "learning_rate": 5.357360507026875e-06, "loss": 0.3124, "step": 10892 }, { "epoch": 0.4930074677528853, "grad_norm": 0.685886739956919, "learning_rate": 5.35662945654452e-06, "loss": 0.3535, "step": 10893 }, { "epoch": 0.4930527268612808, "grad_norm": 0.5849598914637093, "learning_rate": 5.3558983983992915e-06, "loss": 0.4987, "step": 10894 }, { "epoch": 0.4930979859696764, "grad_norm": 0.6099707630801916, "learning_rate": 5.355167332606901e-06, "loss": 0.2888, "step": 10895 }, { "epoch": 0.49314324507807195, "grad_norm": 0.387638619022389, "learning_rate": 5.354436259183054e-06, "loss": 0.4624, "step": 10896 }, { "epoch": 0.49318850418646754, "grad_norm": 0.6293204916290952, "learning_rate": 5.353705178143462e-06, "loss": 0.3194, "step": 10897 }, { "epoch": 0.49323376329486307, "grad_norm": 0.6166390666247227, "learning_rate": 5.352974089503832e-06, "loss": 0.3186, "step": 10898 }, { "epoch": 0.49327902240325866, "grad_norm": 0.6305222876486003, "learning_rate": 5.352242993279871e-06, "loss": 0.3787, "step": 10899 }, { "epoch": 0.4933242815116542, "grad_norm": 0.6176998912923989, "learning_rate": 5.351511889487293e-06, "loss": 0.3472, "step": 10900 }, { "epoch": 0.4933695406200498, "grad_norm": 0.6752658070048627, "learning_rate": 5.350780778141801e-06, "loss": 0.3142, "step": 10901 }, { "epoch": 0.4934147997284454, "grad_norm": 0.7026151470603141, "learning_rate": 5.35004965925911e-06, "loss": 0.4815, "step": 10902 }, { "epoch": 0.4934600588368409, "grad_norm": 0.6056281896817226, "learning_rate": 5.349318532854924e-06, "loss": 0.3572, "step": 10903 }, { "epoch": 0.4935053179452365, "grad_norm": 0.6059450088427075, "learning_rate": 5.348587398944959e-06, "loss": 0.3166, "step": 10904 }, { "epoch": 0.49355057705363203, "grad_norm": 0.6160544560726442, "learning_rate": 5.347856257544919e-06, "loss": 0.2925, "step": 10905 }, { "epoch": 0.4935958361620276, "grad_norm": 0.6557893318809236, "learning_rate": 5.347125108670516e-06, "loss": 0.3096, "step": 10906 }, { "epoch": 0.49364109527042316, "grad_norm": 0.6031112497162145, "learning_rate": 5.3463939523374616e-06, "loss": 0.3395, "step": 10907 }, { "epoch": 0.49368635437881875, "grad_norm": 0.7063980157611425, "learning_rate": 5.345662788561466e-06, "loss": 0.362, "step": 10908 }, { "epoch": 0.4937316134872143, "grad_norm": 0.6090917761194881, "learning_rate": 5.344931617358237e-06, "loss": 0.3011, "step": 10909 }, { "epoch": 0.4937768725956099, "grad_norm": 0.6698751151563621, "learning_rate": 5.344200438743489e-06, "loss": 0.3522, "step": 10910 }, { "epoch": 0.4938221317040054, "grad_norm": 0.3498846920345231, "learning_rate": 5.343469252732928e-06, "loss": 0.4968, "step": 10911 }, { "epoch": 0.493867390812401, "grad_norm": 0.6144344328879905, "learning_rate": 5.34273805934227e-06, "loss": 0.3478, "step": 10912 }, { "epoch": 0.4939126499207966, "grad_norm": 0.6833336709587083, "learning_rate": 5.342006858587222e-06, "loss": 0.3395, "step": 10913 }, { "epoch": 0.4939579090291921, "grad_norm": 0.6541159079724937, "learning_rate": 5.341275650483497e-06, "loss": 0.3197, "step": 10914 }, { "epoch": 0.4940031681375877, "grad_norm": 0.5724005590293713, "learning_rate": 5.340544435046807e-06, "loss": 0.3453, "step": 10915 }, { "epoch": 0.49404842724598325, "grad_norm": 0.6626481102515759, "learning_rate": 5.3398132122928635e-06, "loss": 0.3212, "step": 10916 }, { "epoch": 0.49409368635437884, "grad_norm": 0.6123494656814092, "learning_rate": 5.339081982237377e-06, "loss": 0.3028, "step": 10917 }, { "epoch": 0.49413894546277437, "grad_norm": 0.3694123866230547, "learning_rate": 5.3383507448960605e-06, "loss": 0.4727, "step": 10918 }, { "epoch": 0.49418420457116996, "grad_norm": 0.38805313791080737, "learning_rate": 5.3376195002846255e-06, "loss": 0.4782, "step": 10919 }, { "epoch": 0.4942294636795655, "grad_norm": 0.27574780421912554, "learning_rate": 5.336888248418784e-06, "loss": 0.4617, "step": 10920 }, { "epoch": 0.4942747227879611, "grad_norm": 0.5884279939932177, "learning_rate": 5.3361569893142505e-06, "loss": 0.3636, "step": 10921 }, { "epoch": 0.4943199818963566, "grad_norm": 0.6648088950131924, "learning_rate": 5.335425722986735e-06, "loss": 0.3125, "step": 10922 }, { "epoch": 0.4943652410047522, "grad_norm": 0.6967573887842057, "learning_rate": 5.334694449451949e-06, "loss": 0.3286, "step": 10923 }, { "epoch": 0.49441050011314774, "grad_norm": 0.5998366409788873, "learning_rate": 5.3339631687256085e-06, "loss": 0.3723, "step": 10924 }, { "epoch": 0.49445575922154333, "grad_norm": 0.6340096059926653, "learning_rate": 5.333231880823425e-06, "loss": 0.308, "step": 10925 }, { "epoch": 0.4945010183299389, "grad_norm": 0.6382985585954123, "learning_rate": 5.3325005857611126e-06, "loss": 0.3365, "step": 10926 }, { "epoch": 0.49454627743833446, "grad_norm": 0.6236564404757622, "learning_rate": 5.331769283554382e-06, "loss": 0.3088, "step": 10927 }, { "epoch": 0.49459153654673005, "grad_norm": 0.6272800491529169, "learning_rate": 5.33103797421895e-06, "loss": 0.3432, "step": 10928 }, { "epoch": 0.4946367956551256, "grad_norm": 0.6036608440507453, "learning_rate": 5.33030665777053e-06, "loss": 0.292, "step": 10929 }, { "epoch": 0.4946820547635212, "grad_norm": 0.6449185476085723, "learning_rate": 5.329575334224832e-06, "loss": 0.3284, "step": 10930 }, { "epoch": 0.4947273138719167, "grad_norm": 0.6568644023628296, "learning_rate": 5.328844003597573e-06, "loss": 0.3207, "step": 10931 }, { "epoch": 0.4947725729803123, "grad_norm": 0.6664172500534838, "learning_rate": 5.328112665904465e-06, "loss": 0.3314, "step": 10932 }, { "epoch": 0.49481783208870783, "grad_norm": 0.7000066487436837, "learning_rate": 5.3273813211612254e-06, "loss": 0.4498, "step": 10933 }, { "epoch": 0.4948630911971034, "grad_norm": 0.6227058982525127, "learning_rate": 5.3266499693835664e-06, "loss": 0.3427, "step": 10934 }, { "epoch": 0.49490835030549896, "grad_norm": 0.6609441288231116, "learning_rate": 5.325918610587202e-06, "loss": 0.3172, "step": 10935 }, { "epoch": 0.49495360941389455, "grad_norm": 0.6541893702664565, "learning_rate": 5.325187244787848e-06, "loss": 0.3608, "step": 10936 }, { "epoch": 0.49499886852229014, "grad_norm": 0.6666335731827807, "learning_rate": 5.324455872001221e-06, "loss": 0.299, "step": 10937 }, { "epoch": 0.49504412763068567, "grad_norm": 0.3023990661780046, "learning_rate": 5.32372449224303e-06, "loss": 0.4673, "step": 10938 }, { "epoch": 0.49508938673908126, "grad_norm": 0.6952874310170034, "learning_rate": 5.322993105528996e-06, "loss": 0.307, "step": 10939 }, { "epoch": 0.4951346458474768, "grad_norm": 0.654364686935079, "learning_rate": 5.322261711874831e-06, "loss": 0.3121, "step": 10940 }, { "epoch": 0.4951799049558724, "grad_norm": 0.6533502738870345, "learning_rate": 5.321530311296253e-06, "loss": 0.346, "step": 10941 }, { "epoch": 0.4952251640642679, "grad_norm": 0.36164641617759674, "learning_rate": 5.320798903808976e-06, "loss": 0.4691, "step": 10942 }, { "epoch": 0.4952704231726635, "grad_norm": 0.6153895176913012, "learning_rate": 5.320067489428715e-06, "loss": 0.3039, "step": 10943 }, { "epoch": 0.49531568228105904, "grad_norm": 0.6072482821425039, "learning_rate": 5.319336068171187e-06, "loss": 0.3082, "step": 10944 }, { "epoch": 0.49536094138945463, "grad_norm": 0.6703237557809038, "learning_rate": 5.318604640052107e-06, "loss": 0.3422, "step": 10945 }, { "epoch": 0.49540620049785017, "grad_norm": 0.617543960875071, "learning_rate": 5.317873205087193e-06, "loss": 0.3292, "step": 10946 }, { "epoch": 0.49545145960624576, "grad_norm": 0.6251190285600072, "learning_rate": 5.31714176329216e-06, "loss": 0.3505, "step": 10947 }, { "epoch": 0.49549671871464135, "grad_norm": 0.6543858933536746, "learning_rate": 5.3164103146827225e-06, "loss": 0.3389, "step": 10948 }, { "epoch": 0.4955419778230369, "grad_norm": 0.7297640462728502, "learning_rate": 5.315678859274601e-06, "loss": 0.3592, "step": 10949 }, { "epoch": 0.4955872369314325, "grad_norm": 0.6665550483127983, "learning_rate": 5.314947397083512e-06, "loss": 0.3158, "step": 10950 }, { "epoch": 0.495632496039828, "grad_norm": 0.6365177479194217, "learning_rate": 5.314215928125167e-06, "loss": 0.3262, "step": 10951 }, { "epoch": 0.4956777551482236, "grad_norm": 0.6694352069351751, "learning_rate": 5.313484452415289e-06, "loss": 0.3406, "step": 10952 }, { "epoch": 0.49572301425661913, "grad_norm": 0.667497336946359, "learning_rate": 5.312752969969592e-06, "loss": 0.3397, "step": 10953 }, { "epoch": 0.4957682733650147, "grad_norm": 0.6467036143338789, "learning_rate": 5.3120214808037954e-06, "loss": 0.3268, "step": 10954 }, { "epoch": 0.49581353247341026, "grad_norm": 0.6394702341304671, "learning_rate": 5.311289984933615e-06, "loss": 0.3347, "step": 10955 }, { "epoch": 0.49585879158180585, "grad_norm": 0.6664012078186998, "learning_rate": 5.310558482374768e-06, "loss": 0.3179, "step": 10956 }, { "epoch": 0.4959040506902014, "grad_norm": 0.6043814803870569, "learning_rate": 5.309826973142974e-06, "loss": 0.3369, "step": 10957 }, { "epoch": 0.49594930979859697, "grad_norm": 0.37418229367821654, "learning_rate": 5.30909545725395e-06, "loss": 0.4913, "step": 10958 }, { "epoch": 0.4959945689069925, "grad_norm": 0.3446965224369728, "learning_rate": 5.308363934723412e-06, "loss": 0.4493, "step": 10959 }, { "epoch": 0.4960398280153881, "grad_norm": 0.29210342646383686, "learning_rate": 5.307632405567084e-06, "loss": 0.4693, "step": 10960 }, { "epoch": 0.4960850871237837, "grad_norm": 0.6848138089237998, "learning_rate": 5.306900869800676e-06, "loss": 0.3238, "step": 10961 }, { "epoch": 0.4961303462321792, "grad_norm": 0.6610513471135908, "learning_rate": 5.306169327439914e-06, "loss": 0.3727, "step": 10962 }, { "epoch": 0.4961756053405748, "grad_norm": 0.6254059324403175, "learning_rate": 5.3054377785005114e-06, "loss": 0.3011, "step": 10963 }, { "epoch": 0.49622086444897034, "grad_norm": 0.6963258697924865, "learning_rate": 5.30470622299819e-06, "loss": 0.3804, "step": 10964 }, { "epoch": 0.49626612355736593, "grad_norm": 0.6253787977907792, "learning_rate": 5.303974660948669e-06, "loss": 0.2995, "step": 10965 }, { "epoch": 0.49631138266576147, "grad_norm": 0.5194836757059452, "learning_rate": 5.3032430923676635e-06, "loss": 0.4814, "step": 10966 }, { "epoch": 0.49635664177415706, "grad_norm": 0.7144946249067363, "learning_rate": 5.302511517270897e-06, "loss": 0.3655, "step": 10967 }, { "epoch": 0.4964019008825526, "grad_norm": 0.6770453635481531, "learning_rate": 5.301779935674087e-06, "loss": 0.3143, "step": 10968 }, { "epoch": 0.4964471599909482, "grad_norm": 0.6699003525392917, "learning_rate": 5.301048347592952e-06, "loss": 0.3671, "step": 10969 }, { "epoch": 0.4964924190993437, "grad_norm": 0.7024479803516988, "learning_rate": 5.300316753043214e-06, "loss": 0.3028, "step": 10970 }, { "epoch": 0.4965376782077393, "grad_norm": 0.6159114504436758, "learning_rate": 5.299585152040592e-06, "loss": 0.3316, "step": 10971 }, { "epoch": 0.4965829373161349, "grad_norm": 0.6961387616550401, "learning_rate": 5.298853544600802e-06, "loss": 0.3352, "step": 10972 }, { "epoch": 0.49662819642453043, "grad_norm": 0.628516422333494, "learning_rate": 5.298121930739571e-06, "loss": 0.3507, "step": 10973 }, { "epoch": 0.496673455532926, "grad_norm": 0.6162577726628683, "learning_rate": 5.297390310472612e-06, "loss": 0.3468, "step": 10974 }, { "epoch": 0.49671871464132156, "grad_norm": 0.6160097811332337, "learning_rate": 5.29665868381565e-06, "loss": 0.3158, "step": 10975 }, { "epoch": 0.49676397374971715, "grad_norm": 0.3708650924765746, "learning_rate": 5.295927050784404e-06, "loss": 0.4435, "step": 10976 }, { "epoch": 0.4968092328581127, "grad_norm": 0.34981254449603477, "learning_rate": 5.295195411394595e-06, "loss": 0.4923, "step": 10977 }, { "epoch": 0.49685449196650827, "grad_norm": 0.6413983531235229, "learning_rate": 5.2944637656619415e-06, "loss": 0.3009, "step": 10978 }, { "epoch": 0.4968997510749038, "grad_norm": 0.2988611839833011, "learning_rate": 5.293732113602169e-06, "loss": 0.4772, "step": 10979 }, { "epoch": 0.4969450101832994, "grad_norm": 0.30784643760060076, "learning_rate": 5.293000455230992e-06, "loss": 0.457, "step": 10980 }, { "epoch": 0.49699026929169493, "grad_norm": 0.5847235092637628, "learning_rate": 5.292268790564138e-06, "loss": 0.3285, "step": 10981 }, { "epoch": 0.4970355284000905, "grad_norm": 0.33329237159104297, "learning_rate": 5.291537119617322e-06, "loss": 0.467, "step": 10982 }, { "epoch": 0.4970807875084861, "grad_norm": 0.7333630104412473, "learning_rate": 5.290805442406273e-06, "loss": 0.3166, "step": 10983 }, { "epoch": 0.49712604661688164, "grad_norm": 0.651647334533816, "learning_rate": 5.290073758946705e-06, "loss": 0.3379, "step": 10984 }, { "epoch": 0.49717130572527723, "grad_norm": 0.6267299723598055, "learning_rate": 5.289342069254345e-06, "loss": 0.3138, "step": 10985 }, { "epoch": 0.49721656483367277, "grad_norm": 0.348045624189992, "learning_rate": 5.288610373344911e-06, "loss": 0.5256, "step": 10986 }, { "epoch": 0.49726182394206836, "grad_norm": 0.642822543396966, "learning_rate": 5.287878671234127e-06, "loss": 0.3337, "step": 10987 }, { "epoch": 0.4973070830504639, "grad_norm": 0.6261370129498127, "learning_rate": 5.287146962937715e-06, "loss": 0.3539, "step": 10988 }, { "epoch": 0.4973523421588595, "grad_norm": 0.31546532431943075, "learning_rate": 5.286415248471397e-06, "loss": 0.4861, "step": 10989 }, { "epoch": 0.497397601267255, "grad_norm": 0.8233396237313507, "learning_rate": 5.285683527850892e-06, "loss": 0.3291, "step": 10990 }, { "epoch": 0.4974428603756506, "grad_norm": 0.6464844367231789, "learning_rate": 5.284951801091929e-06, "loss": 0.3274, "step": 10991 }, { "epoch": 0.49748811948404614, "grad_norm": 0.6104061184013967, "learning_rate": 5.284220068210225e-06, "loss": 0.3618, "step": 10992 }, { "epoch": 0.49753337859244173, "grad_norm": 0.6293890350828011, "learning_rate": 5.283488329221506e-06, "loss": 0.3895, "step": 10993 }, { "epoch": 0.49757863770083727, "grad_norm": 0.6475500629074776, "learning_rate": 5.2827565841414915e-06, "loss": 0.3484, "step": 10994 }, { "epoch": 0.49762389680923286, "grad_norm": 0.35003481742079523, "learning_rate": 5.282024832985908e-06, "loss": 0.4795, "step": 10995 }, { "epoch": 0.49766915591762845, "grad_norm": 0.6124094693385026, "learning_rate": 5.281293075770476e-06, "loss": 0.3034, "step": 10996 }, { "epoch": 0.497714415026024, "grad_norm": 0.6289972302871515, "learning_rate": 5.280561312510921e-06, "loss": 0.3426, "step": 10997 }, { "epoch": 0.49775967413441957, "grad_norm": 0.6095955157067299, "learning_rate": 5.279829543222963e-06, "loss": 0.326, "step": 10998 }, { "epoch": 0.4978049332428151, "grad_norm": 0.660604168797498, "learning_rate": 5.27909776792233e-06, "loss": 0.3287, "step": 10999 }, { "epoch": 0.4978501923512107, "grad_norm": 0.6395277156110963, "learning_rate": 5.278365986624743e-06, "loss": 0.3563, "step": 11000 }, { "epoch": 0.49789545145960623, "grad_norm": 0.6254936495960255, "learning_rate": 5.277634199345924e-06, "loss": 0.3451, "step": 11001 }, { "epoch": 0.4979407105680018, "grad_norm": 0.2960462330487585, "learning_rate": 5.2769024061016e-06, "loss": 0.4898, "step": 11002 }, { "epoch": 0.49798596967639736, "grad_norm": 0.6469695831600601, "learning_rate": 5.276170606907492e-06, "loss": 0.3291, "step": 11003 }, { "epoch": 0.49803122878479295, "grad_norm": 0.2650054780274163, "learning_rate": 5.275438801779328e-06, "loss": 0.4513, "step": 11004 }, { "epoch": 0.4980764878931885, "grad_norm": 0.5869666126213625, "learning_rate": 5.27470699073283e-06, "loss": 0.3008, "step": 11005 }, { "epoch": 0.49812174700158407, "grad_norm": 0.2852893613309126, "learning_rate": 5.273975173783721e-06, "loss": 0.4997, "step": 11006 }, { "epoch": 0.49816700610997966, "grad_norm": 0.5978448571071595, "learning_rate": 5.273243350947728e-06, "loss": 0.3148, "step": 11007 }, { "epoch": 0.4982122652183752, "grad_norm": 0.2849176493083047, "learning_rate": 5.272511522240574e-06, "loss": 0.4952, "step": 11008 }, { "epoch": 0.4982575243267708, "grad_norm": 0.586350458099298, "learning_rate": 5.271779687677984e-06, "loss": 0.3416, "step": 11009 }, { "epoch": 0.4983027834351663, "grad_norm": 0.6386831237999933, "learning_rate": 5.271047847275685e-06, "loss": 0.3097, "step": 11010 }, { "epoch": 0.4983480425435619, "grad_norm": 0.6339958723931352, "learning_rate": 5.270316001049398e-06, "loss": 0.3842, "step": 11011 }, { "epoch": 0.49839330165195744, "grad_norm": 0.29838913407663087, "learning_rate": 5.269584149014852e-06, "loss": 0.4807, "step": 11012 }, { "epoch": 0.49843856076035303, "grad_norm": 0.6826512568699339, "learning_rate": 5.268852291187771e-06, "loss": 0.3287, "step": 11013 }, { "epoch": 0.49848381986874857, "grad_norm": 0.5931840879498935, "learning_rate": 5.2681204275838785e-06, "loss": 0.3152, "step": 11014 }, { "epoch": 0.49852907897714416, "grad_norm": 0.6657488830808198, "learning_rate": 5.267388558218902e-06, "loss": 0.3263, "step": 11015 }, { "epoch": 0.4985743380855397, "grad_norm": 0.6057858608335768, "learning_rate": 5.266656683108566e-06, "loss": 0.3041, "step": 11016 }, { "epoch": 0.4986195971939353, "grad_norm": 0.6745789119505499, "learning_rate": 5.265924802268598e-06, "loss": 0.3572, "step": 11017 }, { "epoch": 0.4986648563023308, "grad_norm": 0.5747209305871905, "learning_rate": 5.265192915714723e-06, "loss": 0.3098, "step": 11018 }, { "epoch": 0.4987101154107264, "grad_norm": 0.6218352907198297, "learning_rate": 5.2644610234626646e-06, "loss": 0.2639, "step": 11019 }, { "epoch": 0.498755374519122, "grad_norm": 0.3348861378427615, "learning_rate": 5.2637291255281545e-06, "loss": 0.4646, "step": 11020 }, { "epoch": 0.49880063362751753, "grad_norm": 0.6534352481784338, "learning_rate": 5.262997221926912e-06, "loss": 0.304, "step": 11021 }, { "epoch": 0.4988458927359131, "grad_norm": 0.6371580418242253, "learning_rate": 5.262265312674669e-06, "loss": 0.3518, "step": 11022 }, { "epoch": 0.49889115184430866, "grad_norm": 0.29895083134067113, "learning_rate": 5.261533397787149e-06, "loss": 0.4731, "step": 11023 }, { "epoch": 0.49893641095270425, "grad_norm": 0.6893328823430755, "learning_rate": 5.26080147728008e-06, "loss": 0.3198, "step": 11024 }, { "epoch": 0.4989816700610998, "grad_norm": 0.6131173186560834, "learning_rate": 5.260069551169187e-06, "loss": 0.3078, "step": 11025 }, { "epoch": 0.49902692916949537, "grad_norm": 0.6330615061810914, "learning_rate": 5.2593376194702e-06, "loss": 0.3216, "step": 11026 }, { "epoch": 0.4990721882778909, "grad_norm": 0.6459216134848859, "learning_rate": 5.258605682198842e-06, "loss": 0.3118, "step": 11027 }, { "epoch": 0.4991174473862865, "grad_norm": 0.6205966481412192, "learning_rate": 5.2578737393708435e-06, "loss": 0.3307, "step": 11028 }, { "epoch": 0.49916270649468203, "grad_norm": 0.7227561225552619, "learning_rate": 5.257141791001931e-06, "loss": 0.3494, "step": 11029 }, { "epoch": 0.4992079656030776, "grad_norm": 0.5829741951280177, "learning_rate": 5.256409837107828e-06, "loss": 0.3502, "step": 11030 }, { "epoch": 0.4992532247114732, "grad_norm": 0.3573351925053009, "learning_rate": 5.255677877704269e-06, "loss": 0.5144, "step": 11031 }, { "epoch": 0.49929848381986874, "grad_norm": 0.4306011684363518, "learning_rate": 5.254945912806977e-06, "loss": 0.4578, "step": 11032 }, { "epoch": 0.49934374292826433, "grad_norm": 0.5924615467188293, "learning_rate": 5.254213942431679e-06, "loss": 0.3542, "step": 11033 }, { "epoch": 0.49938900203665987, "grad_norm": 0.6294281367848047, "learning_rate": 5.253481966594104e-06, "loss": 0.3409, "step": 11034 }, { "epoch": 0.49943426114505546, "grad_norm": 0.624064714320788, "learning_rate": 5.25274998530998e-06, "loss": 0.3123, "step": 11035 }, { "epoch": 0.499479520253451, "grad_norm": 0.7475311399192298, "learning_rate": 5.252017998595036e-06, "loss": 0.2917, "step": 11036 }, { "epoch": 0.4995247793618466, "grad_norm": 0.6201713220823594, "learning_rate": 5.2512860064649985e-06, "loss": 0.3123, "step": 11037 }, { "epoch": 0.4995700384702421, "grad_norm": 0.6376182014202468, "learning_rate": 5.250554008935596e-06, "loss": 0.3383, "step": 11038 }, { "epoch": 0.4996152975786377, "grad_norm": 0.6267144751580137, "learning_rate": 5.24982200602256e-06, "loss": 0.2996, "step": 11039 }, { "epoch": 0.49966055668703324, "grad_norm": 0.6305389310730376, "learning_rate": 5.249089997741613e-06, "loss": 0.3086, "step": 11040 }, { "epoch": 0.49970581579542883, "grad_norm": 0.6201450886917379, "learning_rate": 5.248357984108489e-06, "loss": 0.2934, "step": 11041 }, { "epoch": 0.4997510749038244, "grad_norm": 0.6182082306856194, "learning_rate": 5.247625965138915e-06, "loss": 0.3555, "step": 11042 }, { "epoch": 0.49979633401221996, "grad_norm": 0.6015189328325169, "learning_rate": 5.246893940848619e-06, "loss": 0.35, "step": 11043 }, { "epoch": 0.49984159312061555, "grad_norm": 0.5770996555177424, "learning_rate": 5.24616191125333e-06, "loss": 0.3129, "step": 11044 }, { "epoch": 0.4998868522290111, "grad_norm": 0.6121094238797752, "learning_rate": 5.245429876368777e-06, "loss": 0.3085, "step": 11045 }, { "epoch": 0.49993211133740667, "grad_norm": 0.6523235892928385, "learning_rate": 5.244697836210691e-06, "loss": 0.373, "step": 11046 }, { "epoch": 0.4999773704458022, "grad_norm": 0.6345743591337244, "learning_rate": 5.2439657907948005e-06, "loss": 0.3459, "step": 11047 }, { "epoch": 0.5000226295541977, "grad_norm": 0.6277061830033281, "learning_rate": 5.243233740136833e-06, "loss": 0.3521, "step": 11048 }, { "epoch": 0.5000678886625933, "grad_norm": 0.584450845482722, "learning_rate": 5.24250168425252e-06, "loss": 0.3166, "step": 11049 }, { "epoch": 0.5001131477709889, "grad_norm": 0.6260654492263931, "learning_rate": 5.241769623157591e-06, "loss": 0.3242, "step": 11050 }, { "epoch": 0.5001584068793845, "grad_norm": 0.6012131043558517, "learning_rate": 5.241037556867775e-06, "loss": 0.3088, "step": 11051 }, { "epoch": 0.50020366598778, "grad_norm": 0.7609827413186744, "learning_rate": 5.2403054853988025e-06, "loss": 0.3373, "step": 11052 }, { "epoch": 0.5002489250961756, "grad_norm": 0.6481397861005885, "learning_rate": 5.239573408766402e-06, "loss": 0.3348, "step": 11053 }, { "epoch": 0.5002941842045712, "grad_norm": 0.6230503792736052, "learning_rate": 5.2388413269863046e-06, "loss": 0.3084, "step": 11054 }, { "epoch": 0.5003394433129668, "grad_norm": 0.6175391721691664, "learning_rate": 5.238109240074242e-06, "loss": 0.3683, "step": 11055 }, { "epoch": 0.5003847024213623, "grad_norm": 0.6082765218977666, "learning_rate": 5.237377148045942e-06, "loss": 0.3168, "step": 11056 }, { "epoch": 0.5004299615297578, "grad_norm": 0.6186954600485816, "learning_rate": 5.236645050917137e-06, "loss": 0.322, "step": 11057 }, { "epoch": 0.5004752206381534, "grad_norm": 0.4754584167898786, "learning_rate": 5.235912948703557e-06, "loss": 0.5052, "step": 11058 }, { "epoch": 0.500520479746549, "grad_norm": 0.5948232513771863, "learning_rate": 5.235180841420932e-06, "loss": 0.3247, "step": 11059 }, { "epoch": 0.5005657388549446, "grad_norm": 0.3340250369796509, "learning_rate": 5.234448729084993e-06, "loss": 0.4682, "step": 11060 }, { "epoch": 0.5006109979633401, "grad_norm": 0.6822242617146005, "learning_rate": 5.233716611711469e-06, "loss": 0.3387, "step": 11061 }, { "epoch": 0.5006562570717357, "grad_norm": 0.6499428614474025, "learning_rate": 5.232984489316095e-06, "loss": 0.3623, "step": 11062 }, { "epoch": 0.5007015161801313, "grad_norm": 0.5985211579875924, "learning_rate": 5.2322523619146e-06, "loss": 0.2922, "step": 11063 }, { "epoch": 0.5007467752885268, "grad_norm": 0.6165914090170149, "learning_rate": 5.2315202295227144e-06, "loss": 0.3392, "step": 11064 }, { "epoch": 0.5007920343969223, "grad_norm": 0.5905261648096668, "learning_rate": 5.2307880921561695e-06, "loss": 0.3121, "step": 11065 }, { "epoch": 0.5008372935053179, "grad_norm": 0.6898713814030945, "learning_rate": 5.230055949830698e-06, "loss": 0.3261, "step": 11066 }, { "epoch": 0.5008825526137135, "grad_norm": 0.6222355567404058, "learning_rate": 5.229323802562031e-06, "loss": 0.353, "step": 11067 }, { "epoch": 0.5009278117221091, "grad_norm": 0.5075586203431988, "learning_rate": 5.2285916503659e-06, "loss": 0.4744, "step": 11068 }, { "epoch": 0.5009730708305047, "grad_norm": 0.6291194527210394, "learning_rate": 5.227859493258035e-06, "loss": 0.3207, "step": 11069 }, { "epoch": 0.5010183299389002, "grad_norm": 0.6418232667533473, "learning_rate": 5.227127331254171e-06, "loss": 0.3069, "step": 11070 }, { "epoch": 0.5010635890472958, "grad_norm": 0.6018724740067458, "learning_rate": 5.226395164370038e-06, "loss": 0.3068, "step": 11071 }, { "epoch": 0.5011088481556913, "grad_norm": 0.3252219454503633, "learning_rate": 5.225662992621367e-06, "loss": 0.454, "step": 11072 }, { "epoch": 0.5011541072640869, "grad_norm": 0.7285264576358847, "learning_rate": 5.224930816023892e-06, "loss": 0.35, "step": 11073 }, { "epoch": 0.5011993663724824, "grad_norm": 0.6205010962097538, "learning_rate": 5.224198634593344e-06, "loss": 0.3039, "step": 11074 }, { "epoch": 0.501244625480878, "grad_norm": 0.594041615008846, "learning_rate": 5.223466448345457e-06, "loss": 0.3091, "step": 11075 }, { "epoch": 0.5012898845892736, "grad_norm": 0.6321829785744144, "learning_rate": 5.222734257295963e-06, "loss": 0.3225, "step": 11076 }, { "epoch": 0.5013351436976692, "grad_norm": 0.6118644687548812, "learning_rate": 5.222002061460592e-06, "loss": 0.2942, "step": 11077 }, { "epoch": 0.5013804028060648, "grad_norm": 0.7053326924588188, "learning_rate": 5.22126986085508e-06, "loss": 0.3398, "step": 11078 }, { "epoch": 0.5014256619144603, "grad_norm": 0.6527266573297478, "learning_rate": 5.220537655495156e-06, "loss": 0.3353, "step": 11079 }, { "epoch": 0.5014709210228558, "grad_norm": 0.6638030771401946, "learning_rate": 5.219805445396558e-06, "loss": 0.362, "step": 11080 }, { "epoch": 0.5015161801312514, "grad_norm": 0.620148977612418, "learning_rate": 5.219073230575014e-06, "loss": 0.3098, "step": 11081 }, { "epoch": 0.501561439239647, "grad_norm": 0.6372724691585553, "learning_rate": 5.218341011046259e-06, "loss": 0.319, "step": 11082 }, { "epoch": 0.5016066983480425, "grad_norm": 0.6373736598102927, "learning_rate": 5.217608786826028e-06, "loss": 0.3637, "step": 11083 }, { "epoch": 0.5016519574564381, "grad_norm": 0.605732810497086, "learning_rate": 5.216876557930052e-06, "loss": 0.3337, "step": 11084 }, { "epoch": 0.5016972165648337, "grad_norm": 0.6222119929539311, "learning_rate": 5.216144324374064e-06, "loss": 0.3213, "step": 11085 }, { "epoch": 0.5017424756732293, "grad_norm": 0.6386771222508453, "learning_rate": 5.215412086173798e-06, "loss": 0.3204, "step": 11086 }, { "epoch": 0.5017877347816248, "grad_norm": 0.6350812082463306, "learning_rate": 5.214679843344989e-06, "loss": 0.3172, "step": 11087 }, { "epoch": 0.5018329938900203, "grad_norm": 0.5767092330974389, "learning_rate": 5.213947595903369e-06, "loss": 0.477, "step": 11088 }, { "epoch": 0.5018782529984159, "grad_norm": 0.6352459911259863, "learning_rate": 5.213215343864674e-06, "loss": 0.3516, "step": 11089 }, { "epoch": 0.5019235121068115, "grad_norm": 2.2769096522106915, "learning_rate": 5.212483087244633e-06, "loss": 0.3, "step": 11090 }, { "epoch": 0.5019687712152071, "grad_norm": 0.6258735223163712, "learning_rate": 5.211750826058986e-06, "loss": 0.3866, "step": 11091 }, { "epoch": 0.5020140303236026, "grad_norm": 0.3613147591509796, "learning_rate": 5.211018560323462e-06, "loss": 0.4903, "step": 11092 }, { "epoch": 0.5020592894319982, "grad_norm": 0.36778750299336693, "learning_rate": 5.2102862900537975e-06, "loss": 0.4728, "step": 11093 }, { "epoch": 0.5021045485403938, "grad_norm": 0.6867057430629306, "learning_rate": 5.209554015265727e-06, "loss": 0.3507, "step": 11094 }, { "epoch": 0.5021498076487894, "grad_norm": 0.668992589318168, "learning_rate": 5.208821735974984e-06, "loss": 0.3298, "step": 11095 }, { "epoch": 0.5021950667571848, "grad_norm": 0.6752741459092744, "learning_rate": 5.208089452197302e-06, "loss": 0.3499, "step": 11096 }, { "epoch": 0.5022403258655804, "grad_norm": 0.6438409777835392, "learning_rate": 5.20735716394842e-06, "loss": 0.3207, "step": 11097 }, { "epoch": 0.502285584973976, "grad_norm": 0.6102737159370142, "learning_rate": 5.206624871244066e-06, "loss": 0.3713, "step": 11098 }, { "epoch": 0.5023308440823716, "grad_norm": 0.433094389848305, "learning_rate": 5.205892574099981e-06, "loss": 0.4836, "step": 11099 }, { "epoch": 0.5023761031907671, "grad_norm": 0.6388238597155772, "learning_rate": 5.205160272531895e-06, "loss": 0.3217, "step": 11100 }, { "epoch": 0.5024213622991627, "grad_norm": 0.6274527566329003, "learning_rate": 5.204427966555545e-06, "loss": 0.3346, "step": 11101 }, { "epoch": 0.5024666214075583, "grad_norm": 0.6025656163217872, "learning_rate": 5.203695656186667e-06, "loss": 0.3253, "step": 11102 }, { "epoch": 0.5025118805159539, "grad_norm": 0.6551152806125176, "learning_rate": 5.202963341440994e-06, "loss": 0.3418, "step": 11103 }, { "epoch": 0.5025571396243494, "grad_norm": 0.816953226291965, "learning_rate": 5.202231022334262e-06, "loss": 0.3047, "step": 11104 }, { "epoch": 0.5026023987327449, "grad_norm": 0.6103218202483534, "learning_rate": 5.201498698882207e-06, "loss": 0.3312, "step": 11105 }, { "epoch": 0.5026476578411405, "grad_norm": 0.6104779818808732, "learning_rate": 5.200766371100564e-06, "loss": 0.3227, "step": 11106 }, { "epoch": 0.5026929169495361, "grad_norm": 0.33092996473441333, "learning_rate": 5.200034039005068e-06, "loss": 0.4819, "step": 11107 }, { "epoch": 0.5027381760579317, "grad_norm": 0.5760559752047034, "learning_rate": 5.199301702611454e-06, "loss": 0.3256, "step": 11108 }, { "epoch": 0.5027834351663272, "grad_norm": 0.7355190355427469, "learning_rate": 5.1985693619354604e-06, "loss": 0.3257, "step": 11109 }, { "epoch": 0.5028286942747228, "grad_norm": 0.5909027528222714, "learning_rate": 5.197837016992819e-06, "loss": 0.3348, "step": 11110 }, { "epoch": 0.5028739533831184, "grad_norm": 0.7936770031677421, "learning_rate": 5.1971046677992695e-06, "loss": 0.3381, "step": 11111 }, { "epoch": 0.502919212491514, "grad_norm": 0.6005895346477476, "learning_rate": 5.196372314370545e-06, "loss": 0.3098, "step": 11112 }, { "epoch": 0.5029644715999095, "grad_norm": 0.6393324770340331, "learning_rate": 5.195639956722382e-06, "loss": 0.3555, "step": 11113 }, { "epoch": 0.503009730708305, "grad_norm": 0.7363714170001564, "learning_rate": 5.194907594870519e-06, "loss": 0.3672, "step": 11114 }, { "epoch": 0.5030549898167006, "grad_norm": 0.6282518354837773, "learning_rate": 5.194175228830689e-06, "loss": 0.3095, "step": 11115 }, { "epoch": 0.5031002489250962, "grad_norm": 0.6451065493325112, "learning_rate": 5.19344285861863e-06, "loss": 0.3435, "step": 11116 }, { "epoch": 0.5031455080334918, "grad_norm": 0.6009264106991573, "learning_rate": 5.192710484250078e-06, "loss": 0.327, "step": 11117 }, { "epoch": 0.5031907671418873, "grad_norm": 0.36986139771972265, "learning_rate": 5.19197810574077e-06, "loss": 0.4864, "step": 11118 }, { "epoch": 0.5032360262502829, "grad_norm": 0.6092739699754802, "learning_rate": 5.191245723106442e-06, "loss": 0.2666, "step": 11119 }, { "epoch": 0.5032812853586784, "grad_norm": 0.6406215574886052, "learning_rate": 5.1905133363628314e-06, "loss": 0.3426, "step": 11120 }, { "epoch": 0.503326544467074, "grad_norm": 0.6530649316219899, "learning_rate": 5.189780945525673e-06, "loss": 0.3212, "step": 11121 }, { "epoch": 0.5033718035754695, "grad_norm": 0.2882004458790363, "learning_rate": 5.189048550610706e-06, "loss": 0.4628, "step": 11122 }, { "epoch": 0.5034170626838651, "grad_norm": 0.6282642020762652, "learning_rate": 5.188316151633665e-06, "loss": 0.3331, "step": 11123 }, { "epoch": 0.5034623217922607, "grad_norm": 0.6610170936830095, "learning_rate": 5.187583748610289e-06, "loss": 0.3781, "step": 11124 }, { "epoch": 0.5035075809006563, "grad_norm": 0.6332532439068955, "learning_rate": 5.186851341556315e-06, "loss": 0.3464, "step": 11125 }, { "epoch": 0.5035528400090519, "grad_norm": 0.6949540478270081, "learning_rate": 5.186118930487479e-06, "loss": 0.293, "step": 11126 }, { "epoch": 0.5035980991174474, "grad_norm": 0.67081655894998, "learning_rate": 5.185386515419518e-06, "loss": 0.3259, "step": 11127 }, { "epoch": 0.5036433582258429, "grad_norm": 0.3210088732142528, "learning_rate": 5.184654096368172e-06, "loss": 0.4637, "step": 11128 }, { "epoch": 0.5036886173342385, "grad_norm": 0.5915652837478227, "learning_rate": 5.183921673349174e-06, "loss": 0.3081, "step": 11129 }, { "epoch": 0.5037338764426341, "grad_norm": 0.6268015361227933, "learning_rate": 5.183189246378266e-06, "loss": 0.3451, "step": 11130 }, { "epoch": 0.5037791355510296, "grad_norm": 0.7243887671807692, "learning_rate": 5.182456815471184e-06, "loss": 0.3106, "step": 11131 }, { "epoch": 0.5038243946594252, "grad_norm": 0.6726835956341942, "learning_rate": 5.181724380643664e-06, "loss": 0.3437, "step": 11132 }, { "epoch": 0.5038696537678208, "grad_norm": 0.6491434431452936, "learning_rate": 5.180991941911446e-06, "loss": 0.2941, "step": 11133 }, { "epoch": 0.5039149128762164, "grad_norm": 0.6497408706120921, "learning_rate": 5.180259499290268e-06, "loss": 0.3248, "step": 11134 }, { "epoch": 0.5039601719846118, "grad_norm": 0.657340082524694, "learning_rate": 5.179527052795865e-06, "loss": 0.2914, "step": 11135 }, { "epoch": 0.5040054310930074, "grad_norm": 0.6404778084269498, "learning_rate": 5.178794602443978e-06, "loss": 0.3023, "step": 11136 }, { "epoch": 0.504050690201403, "grad_norm": 0.7219616940330175, "learning_rate": 5.178062148250343e-06, "loss": 0.3541, "step": 11137 }, { "epoch": 0.5040959493097986, "grad_norm": 0.4489197089113747, "learning_rate": 5.177329690230702e-06, "loss": 0.4824, "step": 11138 }, { "epoch": 0.5041412084181942, "grad_norm": 0.619375488651365, "learning_rate": 5.176597228400789e-06, "loss": 0.3333, "step": 11139 }, { "epoch": 0.5041864675265897, "grad_norm": 0.31081728274069303, "learning_rate": 5.175864762776343e-06, "loss": 0.4816, "step": 11140 }, { "epoch": 0.5042317266349853, "grad_norm": 0.6297193844551721, "learning_rate": 5.175132293373105e-06, "loss": 0.347, "step": 11141 }, { "epoch": 0.5042769857433809, "grad_norm": 0.6650090584331564, "learning_rate": 5.174399820206811e-06, "loss": 0.3354, "step": 11142 }, { "epoch": 0.5043222448517765, "grad_norm": 0.6580033293915457, "learning_rate": 5.1736673432932e-06, "loss": 0.3263, "step": 11143 }, { "epoch": 0.5043675039601719, "grad_norm": 0.3533605652723998, "learning_rate": 5.172934862648012e-06, "loss": 0.4612, "step": 11144 }, { "epoch": 0.5044127630685675, "grad_norm": 0.3096719747489321, "learning_rate": 5.172202378286986e-06, "loss": 0.4858, "step": 11145 }, { "epoch": 0.5044580221769631, "grad_norm": 0.976650936575463, "learning_rate": 5.171469890225857e-06, "loss": 0.3483, "step": 11146 }, { "epoch": 0.5045032812853587, "grad_norm": 0.6514853901406809, "learning_rate": 5.17073739848037e-06, "loss": 0.354, "step": 11147 }, { "epoch": 0.5045485403937543, "grad_norm": 0.6230159049535104, "learning_rate": 5.170004903066258e-06, "loss": 0.3374, "step": 11148 }, { "epoch": 0.5045937995021498, "grad_norm": 0.5977594402004174, "learning_rate": 5.169272403999265e-06, "loss": 0.3315, "step": 11149 }, { "epoch": 0.5046390586105454, "grad_norm": 0.6652306304159178, "learning_rate": 5.1685399012951244e-06, "loss": 0.3276, "step": 11150 }, { "epoch": 0.504684317718941, "grad_norm": 0.6319080666281376, "learning_rate": 5.167807394969583e-06, "loss": 0.3413, "step": 11151 }, { "epoch": 0.5047295768273365, "grad_norm": 0.6793929270264313, "learning_rate": 5.1670748850383734e-06, "loss": 0.3306, "step": 11152 }, { "epoch": 0.504774835935732, "grad_norm": 0.6600474914651242, "learning_rate": 5.166342371517239e-06, "loss": 0.3292, "step": 11153 }, { "epoch": 0.5048200950441276, "grad_norm": 0.5960777358642412, "learning_rate": 5.165609854421917e-06, "loss": 0.3273, "step": 11154 }, { "epoch": 0.5048653541525232, "grad_norm": 0.6414202022312718, "learning_rate": 5.164877333768149e-06, "loss": 0.3084, "step": 11155 }, { "epoch": 0.5049106132609188, "grad_norm": 0.5750570450752185, "learning_rate": 5.1641448095716715e-06, "loss": 0.3788, "step": 11156 }, { "epoch": 0.5049558723693143, "grad_norm": 0.5541977367870765, "learning_rate": 5.163412281848229e-06, "loss": 0.2903, "step": 11157 }, { "epoch": 0.5050011314777099, "grad_norm": 0.6017463923431956, "learning_rate": 5.162679750613555e-06, "loss": 0.3191, "step": 11158 }, { "epoch": 0.5050463905861055, "grad_norm": 0.6164484835509431, "learning_rate": 5.1619472158833964e-06, "loss": 0.2896, "step": 11159 }, { "epoch": 0.505091649694501, "grad_norm": 0.5741582036164451, "learning_rate": 5.161214677673487e-06, "loss": 0.3643, "step": 11160 }, { "epoch": 0.5051369088028966, "grad_norm": 0.6408902888090979, "learning_rate": 5.16048213599957e-06, "loss": 0.326, "step": 11161 }, { "epoch": 0.5051821679112921, "grad_norm": 0.3830985888230755, "learning_rate": 5.159749590877384e-06, "loss": 0.471, "step": 11162 }, { "epoch": 0.5052274270196877, "grad_norm": 0.35226583525158767, "learning_rate": 5.159017042322671e-06, "loss": 0.477, "step": 11163 }, { "epoch": 0.5052726861280833, "grad_norm": 0.6303737577199218, "learning_rate": 5.158284490351169e-06, "loss": 0.2958, "step": 11164 }, { "epoch": 0.5053179452364789, "grad_norm": 0.6127947670780783, "learning_rate": 5.157551934978622e-06, "loss": 0.3251, "step": 11165 }, { "epoch": 0.5053632043448744, "grad_norm": 0.6064243651942174, "learning_rate": 5.156819376220765e-06, "loss": 0.3087, "step": 11166 }, { "epoch": 0.50540846345327, "grad_norm": 1.068945333103363, "learning_rate": 5.1560868140933425e-06, "loss": 0.3313, "step": 11167 }, { "epoch": 0.5054537225616655, "grad_norm": 0.41989461112524973, "learning_rate": 5.155354248612095e-06, "loss": 0.4639, "step": 11168 }, { "epoch": 0.5054989816700611, "grad_norm": 0.7009044558515606, "learning_rate": 5.1546216797927594e-06, "loss": 0.3175, "step": 11169 }, { "epoch": 0.5055442407784566, "grad_norm": 0.6599507990452416, "learning_rate": 5.1538891076510815e-06, "loss": 0.353, "step": 11170 }, { "epoch": 0.5055894998868522, "grad_norm": 0.34121769850879935, "learning_rate": 5.153156532202795e-06, "loss": 0.4709, "step": 11171 }, { "epoch": 0.5056347589952478, "grad_norm": 0.6500256984129645, "learning_rate": 5.152423953463649e-06, "loss": 0.3466, "step": 11172 }, { "epoch": 0.5056800181036434, "grad_norm": 0.6725159093189567, "learning_rate": 5.151691371449378e-06, "loss": 0.3298, "step": 11173 }, { "epoch": 0.505725277212039, "grad_norm": 0.5950271952157588, "learning_rate": 5.150958786175727e-06, "loss": 0.2905, "step": 11174 }, { "epoch": 0.5057705363204344, "grad_norm": 0.6326018823306971, "learning_rate": 5.1502261976584354e-06, "loss": 0.282, "step": 11175 }, { "epoch": 0.50581579542883, "grad_norm": 0.589511282902164, "learning_rate": 5.149493605913244e-06, "loss": 0.3124, "step": 11176 }, { "epoch": 0.5058610545372256, "grad_norm": 0.9978463493192055, "learning_rate": 5.148761010955893e-06, "loss": 0.3334, "step": 11177 }, { "epoch": 0.5059063136456212, "grad_norm": 0.6701595925463175, "learning_rate": 5.1480284128021265e-06, "loss": 0.3836, "step": 11178 }, { "epoch": 0.5059515727540167, "grad_norm": 0.6096570947381155, "learning_rate": 5.147295811467681e-06, "loss": 0.3671, "step": 11179 }, { "epoch": 0.5059968318624123, "grad_norm": 0.6534848644153554, "learning_rate": 5.146563206968303e-06, "loss": 0.3427, "step": 11180 }, { "epoch": 0.5060420909708079, "grad_norm": 0.6840519353875196, "learning_rate": 5.1458305993197326e-06, "loss": 0.3127, "step": 11181 }, { "epoch": 0.5060873500792035, "grad_norm": 0.6609155547256175, "learning_rate": 5.145097988537709e-06, "loss": 0.3149, "step": 11182 }, { "epoch": 0.5061326091875991, "grad_norm": 0.6332826976514082, "learning_rate": 5.144365374637976e-06, "loss": 0.3082, "step": 11183 }, { "epoch": 0.5061778682959945, "grad_norm": 0.6580709619987227, "learning_rate": 5.143632757636275e-06, "loss": 0.3321, "step": 11184 }, { "epoch": 0.5062231274043901, "grad_norm": 0.6328645701871171, "learning_rate": 5.142900137548346e-06, "loss": 0.2912, "step": 11185 }, { "epoch": 0.5062683865127857, "grad_norm": 0.6329476213869506, "learning_rate": 5.142167514389933e-06, "loss": 0.3509, "step": 11186 }, { "epoch": 0.5063136456211813, "grad_norm": 0.6852797503715907, "learning_rate": 5.141434888176775e-06, "loss": 0.3406, "step": 11187 }, { "epoch": 0.5063589047295768, "grad_norm": 0.46909894318034845, "learning_rate": 5.140702258924618e-06, "loss": 0.487, "step": 11188 }, { "epoch": 0.5064041638379724, "grad_norm": 0.3779597483581766, "learning_rate": 5.1399696266491996e-06, "loss": 0.4912, "step": 11189 }, { "epoch": 0.506449422946368, "grad_norm": 0.2965653746703262, "learning_rate": 5.1392369913662646e-06, "loss": 0.486, "step": 11190 }, { "epoch": 0.5064946820547636, "grad_norm": 0.6462902080209708, "learning_rate": 5.138504353091555e-06, "loss": 0.3288, "step": 11191 }, { "epoch": 0.506539941163159, "grad_norm": 0.6200576019698926, "learning_rate": 5.137771711840811e-06, "loss": 0.317, "step": 11192 }, { "epoch": 0.5065852002715546, "grad_norm": 0.6191220053106656, "learning_rate": 5.137039067629776e-06, "loss": 0.3249, "step": 11193 }, { "epoch": 0.5066304593799502, "grad_norm": 0.6109383276969177, "learning_rate": 5.136306420474193e-06, "loss": 0.3895, "step": 11194 }, { "epoch": 0.5066757184883458, "grad_norm": 0.540868332291682, "learning_rate": 5.135573770389804e-06, "loss": 0.4816, "step": 11195 }, { "epoch": 0.5067209775967414, "grad_norm": 0.5663607782035478, "learning_rate": 5.134841117392349e-06, "loss": 0.3157, "step": 11196 }, { "epoch": 0.5067662367051369, "grad_norm": 0.6249994867232201, "learning_rate": 5.134108461497576e-06, "loss": 0.3748, "step": 11197 }, { "epoch": 0.5068114958135325, "grad_norm": 0.644905525457812, "learning_rate": 5.133375802721221e-06, "loss": 0.3007, "step": 11198 }, { "epoch": 0.506856754921928, "grad_norm": 0.5906218504091207, "learning_rate": 5.132643141079031e-06, "loss": 0.3555, "step": 11199 }, { "epoch": 0.5069020140303236, "grad_norm": 0.5924636723678586, "learning_rate": 5.131910476586747e-06, "loss": 0.3488, "step": 11200 }, { "epoch": 0.5069472731387191, "grad_norm": 0.617360493101454, "learning_rate": 5.131177809260113e-06, "loss": 0.2914, "step": 11201 }, { "epoch": 0.5069925322471147, "grad_norm": 0.37941447725663624, "learning_rate": 5.130445139114869e-06, "loss": 0.5056, "step": 11202 }, { "epoch": 0.5070377913555103, "grad_norm": 0.6042484854768109, "learning_rate": 5.129712466166761e-06, "loss": 0.3294, "step": 11203 }, { "epoch": 0.5070830504639059, "grad_norm": 0.6631656173398461, "learning_rate": 5.1289797904315295e-06, "loss": 0.3125, "step": 11204 }, { "epoch": 0.5071283095723014, "grad_norm": 0.6901252842609684, "learning_rate": 5.12824711192492e-06, "loss": 0.2989, "step": 11205 }, { "epoch": 0.507173568680697, "grad_norm": 0.3076522049578289, "learning_rate": 5.127514430662671e-06, "loss": 0.4765, "step": 11206 }, { "epoch": 0.5072188277890926, "grad_norm": 0.28162741026972016, "learning_rate": 5.126781746660532e-06, "loss": 0.4618, "step": 11207 }, { "epoch": 0.5072640868974881, "grad_norm": 0.7051014395322401, "learning_rate": 5.126049059934239e-06, "loss": 0.3878, "step": 11208 }, { "epoch": 0.5073093460058837, "grad_norm": 0.659795061917355, "learning_rate": 5.1253163704995425e-06, "loss": 0.3245, "step": 11209 }, { "epoch": 0.5073546051142792, "grad_norm": 0.6166099899942831, "learning_rate": 5.124583678372179e-06, "loss": 0.3236, "step": 11210 }, { "epoch": 0.5073998642226748, "grad_norm": 0.6118398319259457, "learning_rate": 5.1238509835678966e-06, "loss": 0.3747, "step": 11211 }, { "epoch": 0.5074451233310704, "grad_norm": 0.645756325863196, "learning_rate": 5.1231182861024365e-06, "loss": 0.3546, "step": 11212 }, { "epoch": 0.507490382439466, "grad_norm": 1.0032332349126258, "learning_rate": 5.122385585991543e-06, "loss": 0.3146, "step": 11213 }, { "epoch": 0.5075356415478615, "grad_norm": 0.6702414915122858, "learning_rate": 5.121652883250958e-06, "loss": 0.3278, "step": 11214 }, { "epoch": 0.507580900656257, "grad_norm": 0.6723140979649809, "learning_rate": 5.120920177896427e-06, "loss": 0.3213, "step": 11215 }, { "epoch": 0.5076261597646526, "grad_norm": 0.5790786377335796, "learning_rate": 5.120187469943693e-06, "loss": 0.3214, "step": 11216 }, { "epoch": 0.5076714188730482, "grad_norm": 0.6766593472851177, "learning_rate": 5.1194547594085e-06, "loss": 0.3392, "step": 11217 }, { "epoch": 0.5077166779814438, "grad_norm": 0.6590885592070846, "learning_rate": 5.11872204630659e-06, "loss": 0.3047, "step": 11218 }, { "epoch": 0.5077619370898393, "grad_norm": 0.5935583909536788, "learning_rate": 5.117989330653708e-06, "loss": 0.3255, "step": 11219 }, { "epoch": 0.5078071961982349, "grad_norm": 0.6477657735336387, "learning_rate": 5.117256612465598e-06, "loss": 0.3217, "step": 11220 }, { "epoch": 0.5078524553066305, "grad_norm": 0.6513408752224927, "learning_rate": 5.116523891758002e-06, "loss": 0.3173, "step": 11221 }, { "epoch": 0.5078977144150261, "grad_norm": 0.6476151623312782, "learning_rate": 5.115791168546667e-06, "loss": 0.3308, "step": 11222 }, { "epoch": 0.5079429735234215, "grad_norm": 0.6360775434592607, "learning_rate": 5.115058442847335e-06, "loss": 0.3149, "step": 11223 }, { "epoch": 0.5079882326318171, "grad_norm": 0.6270942302739652, "learning_rate": 5.1143257146757495e-06, "loss": 0.31, "step": 11224 }, { "epoch": 0.5080334917402127, "grad_norm": 0.9616494452634846, "learning_rate": 5.113592984047657e-06, "loss": 0.3273, "step": 11225 }, { "epoch": 0.5080787508486083, "grad_norm": 0.6000044934131067, "learning_rate": 5.1128602509788e-06, "loss": 0.3141, "step": 11226 }, { "epoch": 0.5081240099570038, "grad_norm": 0.605687051745947, "learning_rate": 5.112127515484923e-06, "loss": 0.2911, "step": 11227 }, { "epoch": 0.5081692690653994, "grad_norm": 0.6413182962936163, "learning_rate": 5.111394777581769e-06, "loss": 0.3262, "step": 11228 }, { "epoch": 0.508214528173795, "grad_norm": 0.6508238650826035, "learning_rate": 5.110662037285084e-06, "loss": 0.3477, "step": 11229 }, { "epoch": 0.5082597872821906, "grad_norm": 0.584862532364185, "learning_rate": 5.109929294610611e-06, "loss": 0.3572, "step": 11230 }, { "epoch": 0.5083050463905862, "grad_norm": 0.6690967334167736, "learning_rate": 5.109196549574097e-06, "loss": 0.327, "step": 11231 }, { "epoch": 0.5083503054989816, "grad_norm": 0.6375743641978658, "learning_rate": 5.108463802191282e-06, "loss": 0.303, "step": 11232 }, { "epoch": 0.5083955646073772, "grad_norm": 0.6475998504990564, "learning_rate": 5.1077310524779144e-06, "loss": 0.3497, "step": 11233 }, { "epoch": 0.5084408237157728, "grad_norm": 0.7126058888744391, "learning_rate": 5.106998300449738e-06, "loss": 0.3137, "step": 11234 }, { "epoch": 0.5084860828241684, "grad_norm": 0.6367622694735621, "learning_rate": 5.106265546122495e-06, "loss": 0.3389, "step": 11235 }, { "epoch": 0.5085313419325639, "grad_norm": 0.6321256652100014, "learning_rate": 5.105532789511935e-06, "loss": 0.3334, "step": 11236 }, { "epoch": 0.5085766010409595, "grad_norm": 0.5958640465462083, "learning_rate": 5.104800030633795e-06, "loss": 0.3278, "step": 11237 }, { "epoch": 0.5086218601493551, "grad_norm": 0.5115932366401839, "learning_rate": 5.104067269503828e-06, "loss": 0.4554, "step": 11238 }, { "epoch": 0.5086671192577507, "grad_norm": 0.60649625395521, "learning_rate": 5.103334506137773e-06, "loss": 0.3469, "step": 11239 }, { "epoch": 0.5087123783661461, "grad_norm": 0.6774631407983585, "learning_rate": 5.102601740551376e-06, "loss": 0.3477, "step": 11240 }, { "epoch": 0.5087576374745417, "grad_norm": 1.0746385590831669, "learning_rate": 5.101868972760384e-06, "loss": 0.3248, "step": 11241 }, { "epoch": 0.5088028965829373, "grad_norm": 0.3245666411781549, "learning_rate": 5.101136202780541e-06, "loss": 0.4738, "step": 11242 }, { "epoch": 0.5088481556913329, "grad_norm": 0.5872003958509445, "learning_rate": 5.100403430627591e-06, "loss": 0.3212, "step": 11243 }, { "epoch": 0.5088934147997285, "grad_norm": 0.315392723450212, "learning_rate": 5.099670656317279e-06, "loss": 0.5003, "step": 11244 }, { "epoch": 0.508938673908124, "grad_norm": 0.6502171119613663, "learning_rate": 5.098937879865352e-06, "loss": 0.3016, "step": 11245 }, { "epoch": 0.5089839330165196, "grad_norm": 0.6229935855299926, "learning_rate": 5.098205101287554e-06, "loss": 0.3329, "step": 11246 }, { "epoch": 0.5090291921249152, "grad_norm": 0.6350228907401154, "learning_rate": 5.09747232059963e-06, "loss": 0.3536, "step": 11247 }, { "epoch": 0.5090744512333107, "grad_norm": 0.5996675389445115, "learning_rate": 5.096739537817324e-06, "loss": 0.3211, "step": 11248 }, { "epoch": 0.5091197103417062, "grad_norm": 0.28875990227643683, "learning_rate": 5.096006752956383e-06, "loss": 0.4433, "step": 11249 }, { "epoch": 0.5091649694501018, "grad_norm": 0.6452691574848929, "learning_rate": 5.09527396603255e-06, "loss": 0.3585, "step": 11250 }, { "epoch": 0.5092102285584974, "grad_norm": 0.6289696919418744, "learning_rate": 5.094541177061575e-06, "loss": 0.3045, "step": 11251 }, { "epoch": 0.509255487666893, "grad_norm": 0.6431979381717381, "learning_rate": 5.093808386059199e-06, "loss": 0.3139, "step": 11252 }, { "epoch": 0.5093007467752886, "grad_norm": 0.3145776248691224, "learning_rate": 5.093075593041169e-06, "loss": 0.4865, "step": 11253 }, { "epoch": 0.5093460058836841, "grad_norm": 0.6125843389798492, "learning_rate": 5.092342798023231e-06, "loss": 0.2992, "step": 11254 }, { "epoch": 0.5093912649920797, "grad_norm": 0.6316823394050614, "learning_rate": 5.09161000102113e-06, "loss": 0.3389, "step": 11255 }, { "epoch": 0.5094365241004752, "grad_norm": 0.6598121599800412, "learning_rate": 5.09087720205061e-06, "loss": 0.3666, "step": 11256 }, { "epoch": 0.5094817832088708, "grad_norm": 0.28874403727307535, "learning_rate": 5.09014440112742e-06, "loss": 0.4977, "step": 11257 }, { "epoch": 0.5095270423172663, "grad_norm": 0.583549177944878, "learning_rate": 5.089411598267301e-06, "loss": 0.308, "step": 11258 }, { "epoch": 0.5095723014256619, "grad_norm": 0.6404139081606993, "learning_rate": 5.0886787934860035e-06, "loss": 0.3034, "step": 11259 }, { "epoch": 0.5096175605340575, "grad_norm": 0.7182314766797886, "learning_rate": 5.087945986799271e-06, "loss": 0.3312, "step": 11260 }, { "epoch": 0.5096628196424531, "grad_norm": 0.575133981644903, "learning_rate": 5.087213178222849e-06, "loss": 0.3477, "step": 11261 }, { "epoch": 0.5097080787508486, "grad_norm": 0.6096373866619839, "learning_rate": 5.086480367772483e-06, "loss": 0.3013, "step": 11262 }, { "epoch": 0.5097533378592441, "grad_norm": 0.6335065629864226, "learning_rate": 5.085747555463921e-06, "loss": 0.3047, "step": 11263 }, { "epoch": 0.5097985969676397, "grad_norm": 0.7209290500060698, "learning_rate": 5.0850147413129054e-06, "loss": 0.34, "step": 11264 }, { "epoch": 0.5098438560760353, "grad_norm": 0.5751831416002953, "learning_rate": 5.084281925335186e-06, "loss": 0.3312, "step": 11265 }, { "epoch": 0.5098891151844309, "grad_norm": 0.5940786723235196, "learning_rate": 5.083549107546505e-06, "loss": 0.33, "step": 11266 }, { "epoch": 0.5099343742928264, "grad_norm": 0.5952467247815935, "learning_rate": 5.082816287962612e-06, "loss": 0.3349, "step": 11267 }, { "epoch": 0.509979633401222, "grad_norm": 0.6749626366271329, "learning_rate": 5.08208346659925e-06, "loss": 0.3127, "step": 11268 }, { "epoch": 0.5100248925096176, "grad_norm": 0.6412464993754627, "learning_rate": 5.0813506434721675e-06, "loss": 0.3224, "step": 11269 }, { "epoch": 0.5100701516180132, "grad_norm": 0.6567906439007134, "learning_rate": 5.080617818597109e-06, "loss": 0.333, "step": 11270 }, { "epoch": 0.5101154107264086, "grad_norm": 0.6648483420021117, "learning_rate": 5.07988499198982e-06, "loss": 0.305, "step": 11271 }, { "epoch": 0.5101606698348042, "grad_norm": 0.5562962246107327, "learning_rate": 5.07915216366605e-06, "loss": 0.3293, "step": 11272 }, { "epoch": 0.5102059289431998, "grad_norm": 0.6539207385618393, "learning_rate": 5.078419333641542e-06, "loss": 0.3272, "step": 11273 }, { "epoch": 0.5102511880515954, "grad_norm": 0.5975765423928, "learning_rate": 5.0776865019320435e-06, "loss": 0.3564, "step": 11274 }, { "epoch": 0.5102964471599909, "grad_norm": 0.6711899726697937, "learning_rate": 5.0769536685533005e-06, "loss": 0.3093, "step": 11275 }, { "epoch": 0.5103417062683865, "grad_norm": 0.6561532773126665, "learning_rate": 5.07622083352106e-06, "loss": 0.3185, "step": 11276 }, { "epoch": 0.5103869653767821, "grad_norm": 0.6482268957086683, "learning_rate": 5.075487996851067e-06, "loss": 0.3302, "step": 11277 }, { "epoch": 0.5104322244851777, "grad_norm": 0.6693747821770387, "learning_rate": 5.074755158559071e-06, "loss": 0.3065, "step": 11278 }, { "epoch": 0.5104774835935733, "grad_norm": 0.3465167500866455, "learning_rate": 5.074022318660813e-06, "loss": 0.4807, "step": 11279 }, { "epoch": 0.5105227427019687, "grad_norm": 0.6136755901790223, "learning_rate": 5.073289477172045e-06, "loss": 0.321, "step": 11280 }, { "epoch": 0.5105680018103643, "grad_norm": 0.6950315561088651, "learning_rate": 5.072556634108511e-06, "loss": 0.3116, "step": 11281 }, { "epoch": 0.5106132609187599, "grad_norm": 0.629696999529083, "learning_rate": 5.0718237894859564e-06, "loss": 0.3314, "step": 11282 }, { "epoch": 0.5106585200271555, "grad_norm": 0.32162850665106396, "learning_rate": 5.0710909433201305e-06, "loss": 0.4862, "step": 11283 }, { "epoch": 0.510703779135551, "grad_norm": 0.6296288455006523, "learning_rate": 5.07035809562678e-06, "loss": 0.3205, "step": 11284 }, { "epoch": 0.5107490382439466, "grad_norm": 0.6243151764913002, "learning_rate": 5.069625246421646e-06, "loss": 0.328, "step": 11285 }, { "epoch": 0.5107942973523422, "grad_norm": 0.6881553488129036, "learning_rate": 5.068892395720482e-06, "loss": 0.3147, "step": 11286 }, { "epoch": 0.5108395564607378, "grad_norm": 0.6794158457913292, "learning_rate": 5.068159543539031e-06, "loss": 0.3508, "step": 11287 }, { "epoch": 0.5108848155691332, "grad_norm": 0.679924159439684, "learning_rate": 5.067426689893043e-06, "loss": 0.3202, "step": 11288 }, { "epoch": 0.5109300746775288, "grad_norm": 0.5821612922899742, "learning_rate": 5.0666938347982595e-06, "loss": 0.3471, "step": 11289 }, { "epoch": 0.5109753337859244, "grad_norm": 0.29824764368092205, "learning_rate": 5.065960978270432e-06, "loss": 0.471, "step": 11290 }, { "epoch": 0.51102059289432, "grad_norm": 0.8062790804593769, "learning_rate": 5.065228120325305e-06, "loss": 0.3381, "step": 11291 }, { "epoch": 0.5110658520027156, "grad_norm": 0.8877515178860867, "learning_rate": 5.064495260978627e-06, "loss": 0.3005, "step": 11292 }, { "epoch": 0.5111111111111111, "grad_norm": 0.3518551067373952, "learning_rate": 5.063762400246142e-06, "loss": 0.4749, "step": 11293 }, { "epoch": 0.5111563702195067, "grad_norm": 0.5545536333545275, "learning_rate": 5.0630295381436024e-06, "loss": 0.3357, "step": 11294 }, { "epoch": 0.5112016293279023, "grad_norm": 0.6323065540402106, "learning_rate": 5.0622966746867474e-06, "loss": 0.3143, "step": 11295 }, { "epoch": 0.5112468884362978, "grad_norm": 0.6076090835310118, "learning_rate": 5.061563809891331e-06, "loss": 0.3411, "step": 11296 }, { "epoch": 0.5112921475446933, "grad_norm": 0.653226132656761, "learning_rate": 5.060830943773096e-06, "loss": 0.342, "step": 11297 }, { "epoch": 0.5113374066530889, "grad_norm": 0.6571716045609006, "learning_rate": 5.060098076347793e-06, "loss": 0.3033, "step": 11298 }, { "epoch": 0.5113826657614845, "grad_norm": 0.7121177366376688, "learning_rate": 5.059365207631164e-06, "loss": 0.3646, "step": 11299 }, { "epoch": 0.5114279248698801, "grad_norm": 0.6071299612800266, "learning_rate": 5.05863233763896e-06, "loss": 0.316, "step": 11300 }, { "epoch": 0.5114731839782757, "grad_norm": 0.6936881305190062, "learning_rate": 5.057899466386927e-06, "loss": 0.3396, "step": 11301 }, { "epoch": 0.5115184430866712, "grad_norm": 0.6665339230126445, "learning_rate": 5.057166593890813e-06, "loss": 0.3067, "step": 11302 }, { "epoch": 0.5115637021950667, "grad_norm": 0.6278926273373675, "learning_rate": 5.056433720166365e-06, "loss": 0.3372, "step": 11303 }, { "epoch": 0.5116089613034623, "grad_norm": 0.7060565458924746, "learning_rate": 5.0557008452293275e-06, "loss": 0.3061, "step": 11304 }, { "epoch": 0.5116542204118579, "grad_norm": 0.5859283629350893, "learning_rate": 5.054967969095453e-06, "loss": 0.2859, "step": 11305 }, { "epoch": 0.5116994795202534, "grad_norm": 0.624192608388278, "learning_rate": 5.054235091780483e-06, "loss": 0.3336, "step": 11306 }, { "epoch": 0.511744738628649, "grad_norm": 0.6051449201263233, "learning_rate": 5.0535022133001684e-06, "loss": 0.3499, "step": 11307 }, { "epoch": 0.5117899977370446, "grad_norm": 0.33856949948383025, "learning_rate": 5.052769333670255e-06, "loss": 0.4626, "step": 11308 }, { "epoch": 0.5118352568454402, "grad_norm": 0.3127064522869994, "learning_rate": 5.052036452906493e-06, "loss": 0.4817, "step": 11309 }, { "epoch": 0.5118805159538357, "grad_norm": 0.692213222529521, "learning_rate": 5.051303571024625e-06, "loss": 0.2916, "step": 11310 }, { "epoch": 0.5119257750622312, "grad_norm": 0.6157695962818099, "learning_rate": 5.050570688040402e-06, "loss": 0.3441, "step": 11311 }, { "epoch": 0.5119710341706268, "grad_norm": 0.703509672456279, "learning_rate": 5.0498378039695685e-06, "loss": 0.3353, "step": 11312 }, { "epoch": 0.5120162932790224, "grad_norm": 0.3098488163330391, "learning_rate": 5.0491049188278755e-06, "loss": 0.4974, "step": 11313 }, { "epoch": 0.512061552387418, "grad_norm": 0.6039855223359988, "learning_rate": 5.048372032631067e-06, "loss": 0.2952, "step": 11314 }, { "epoch": 0.5121068114958135, "grad_norm": 0.6613158443050209, "learning_rate": 5.047639145394895e-06, "loss": 0.314, "step": 11315 }, { "epoch": 0.5121520706042091, "grad_norm": 0.6614902158701181, "learning_rate": 5.0469062571351e-06, "loss": 0.3231, "step": 11316 }, { "epoch": 0.5121973297126047, "grad_norm": 0.9304745086510386, "learning_rate": 5.046173367867438e-06, "loss": 0.3198, "step": 11317 }, { "epoch": 0.5122425888210003, "grad_norm": 0.5757091533057657, "learning_rate": 5.045440477607649e-06, "loss": 0.2927, "step": 11318 }, { "epoch": 0.5122878479293957, "grad_norm": 0.6358379134113163, "learning_rate": 5.0447075863714845e-06, "loss": 0.3387, "step": 11319 }, { "epoch": 0.5123331070377913, "grad_norm": 0.6245896771537696, "learning_rate": 5.0439746941746914e-06, "loss": 0.3569, "step": 11320 }, { "epoch": 0.5123783661461869, "grad_norm": 0.5986751300931925, "learning_rate": 5.043241801033016e-06, "loss": 0.3068, "step": 11321 }, { "epoch": 0.5124236252545825, "grad_norm": 0.6944406862173468, "learning_rate": 5.0425089069622094e-06, "loss": 0.3433, "step": 11322 }, { "epoch": 0.512468884362978, "grad_norm": 0.6517562624227294, "learning_rate": 5.041776011978016e-06, "loss": 0.3522, "step": 11323 }, { "epoch": 0.5125141434713736, "grad_norm": 0.6062106577965188, "learning_rate": 5.041043116096184e-06, "loss": 0.3264, "step": 11324 }, { "epoch": 0.5125594025797692, "grad_norm": 0.6044463216389774, "learning_rate": 5.040310219332462e-06, "loss": 0.282, "step": 11325 }, { "epoch": 0.5126046616881648, "grad_norm": 0.6393686203566962, "learning_rate": 5.039577321702597e-06, "loss": 0.3062, "step": 11326 }, { "epoch": 0.5126499207965604, "grad_norm": 0.6003692003449826, "learning_rate": 5.038844423222337e-06, "loss": 0.2901, "step": 11327 }, { "epoch": 0.5126951799049558, "grad_norm": 0.588872996754063, "learning_rate": 5.038111523907429e-06, "loss": 0.2987, "step": 11328 }, { "epoch": 0.5127404390133514, "grad_norm": 0.4081869555533952, "learning_rate": 5.037378623773622e-06, "loss": 0.4625, "step": 11329 }, { "epoch": 0.512785698121747, "grad_norm": 0.5975544563016666, "learning_rate": 5.0366457228366625e-06, "loss": 0.3555, "step": 11330 }, { "epoch": 0.5128309572301426, "grad_norm": 0.6545036025728562, "learning_rate": 5.0359128211123e-06, "loss": 0.3226, "step": 11331 }, { "epoch": 0.5128762163385381, "grad_norm": 0.6590359641589497, "learning_rate": 5.03517991861628e-06, "loss": 0.3669, "step": 11332 }, { "epoch": 0.5129214754469337, "grad_norm": 0.6099890479128197, "learning_rate": 5.0344470153643525e-06, "loss": 0.3039, "step": 11333 }, { "epoch": 0.5129667345553293, "grad_norm": 0.28940934663786255, "learning_rate": 5.033714111372264e-06, "loss": 0.4761, "step": 11334 }, { "epoch": 0.5130119936637249, "grad_norm": 0.27877054420668457, "learning_rate": 5.0329812066557625e-06, "loss": 0.4659, "step": 11335 }, { "epoch": 0.5130572527721204, "grad_norm": 0.6698297968837902, "learning_rate": 5.032248301230598e-06, "loss": 0.3442, "step": 11336 }, { "epoch": 0.5131025118805159, "grad_norm": 0.6981877078120617, "learning_rate": 5.031515395112514e-06, "loss": 0.3132, "step": 11337 }, { "epoch": 0.5131477709889115, "grad_norm": 0.29108841447013006, "learning_rate": 5.030782488317264e-06, "loss": 0.4765, "step": 11338 }, { "epoch": 0.5131930300973071, "grad_norm": 0.6216874067123067, "learning_rate": 5.0300495808605905e-06, "loss": 0.3438, "step": 11339 }, { "epoch": 0.5132382892057027, "grad_norm": 0.6140580587652936, "learning_rate": 5.029316672758244e-06, "loss": 0.295, "step": 11340 }, { "epoch": 0.5132835483140982, "grad_norm": 0.6596484104592925, "learning_rate": 5.028583764025973e-06, "loss": 0.3516, "step": 11341 }, { "epoch": 0.5133288074224938, "grad_norm": 0.3130909373251786, "learning_rate": 5.027850854679525e-06, "loss": 0.5116, "step": 11342 }, { "epoch": 0.5133740665308894, "grad_norm": 0.2835968055163799, "learning_rate": 5.0271179447346465e-06, "loss": 0.4425, "step": 11343 }, { "epoch": 0.5134193256392849, "grad_norm": 0.7193538124059337, "learning_rate": 5.026385034207087e-06, "loss": 0.3356, "step": 11344 }, { "epoch": 0.5134645847476804, "grad_norm": 0.5762159401245821, "learning_rate": 5.0256521231125945e-06, "loss": 0.3215, "step": 11345 }, { "epoch": 0.513509843856076, "grad_norm": 0.27371331037951185, "learning_rate": 5.024919211466916e-06, "loss": 0.457, "step": 11346 }, { "epoch": 0.5135551029644716, "grad_norm": 0.6497819866175213, "learning_rate": 5.024186299285801e-06, "loss": 0.3389, "step": 11347 }, { "epoch": 0.5136003620728672, "grad_norm": 0.5756216408873701, "learning_rate": 5.023453386584997e-06, "loss": 0.3606, "step": 11348 }, { "epoch": 0.5136456211812628, "grad_norm": 0.581163075070484, "learning_rate": 5.02272047338025e-06, "loss": 0.3028, "step": 11349 }, { "epoch": 0.5136908802896583, "grad_norm": 0.5916833302080331, "learning_rate": 5.021987559687311e-06, "loss": 0.3246, "step": 11350 }, { "epoch": 0.5137361393980538, "grad_norm": 0.6405202372136433, "learning_rate": 5.021254645521927e-06, "loss": 0.3248, "step": 11351 }, { "epoch": 0.5137813985064494, "grad_norm": 0.5926543425677399, "learning_rate": 5.020521730899846e-06, "loss": 0.2805, "step": 11352 }, { "epoch": 0.513826657614845, "grad_norm": 0.676497412921749, "learning_rate": 5.019788815836816e-06, "loss": 0.3524, "step": 11353 }, { "epoch": 0.5138719167232405, "grad_norm": 0.6304341978323126, "learning_rate": 5.019055900348584e-06, "loss": 0.4115, "step": 11354 }, { "epoch": 0.5139171758316361, "grad_norm": 0.5920342969027814, "learning_rate": 5.018322984450902e-06, "loss": 0.3192, "step": 11355 }, { "epoch": 0.5139624349400317, "grad_norm": 0.6092487652218684, "learning_rate": 5.0175900681595116e-06, "loss": 0.3172, "step": 11356 }, { "epoch": 0.5140076940484273, "grad_norm": 0.6443659421278713, "learning_rate": 5.016857151490167e-06, "loss": 0.352, "step": 11357 }, { "epoch": 0.5140529531568228, "grad_norm": 0.6284718857153448, "learning_rate": 5.016124234458612e-06, "loss": 0.3265, "step": 11358 }, { "epoch": 0.5140982122652183, "grad_norm": 0.3664084196581059, "learning_rate": 5.0153913170806e-06, "loss": 0.4694, "step": 11359 }, { "epoch": 0.5141434713736139, "grad_norm": 0.6508373755946546, "learning_rate": 5.0146583993718746e-06, "loss": 0.3564, "step": 11360 }, { "epoch": 0.5141887304820095, "grad_norm": 0.7032696127025374, "learning_rate": 5.013925481348184e-06, "loss": 0.3143, "step": 11361 }, { "epoch": 0.5142339895904051, "grad_norm": 0.6514752461003346, "learning_rate": 5.013192563025279e-06, "loss": 0.335, "step": 11362 }, { "epoch": 0.5142792486988006, "grad_norm": 0.6165901346699098, "learning_rate": 5.012459644418905e-06, "loss": 0.3743, "step": 11363 }, { "epoch": 0.5143245078071962, "grad_norm": 0.6550129626238077, "learning_rate": 5.0117267255448125e-06, "loss": 0.3444, "step": 11364 }, { "epoch": 0.5143697669155918, "grad_norm": 0.6607140941538844, "learning_rate": 5.010993806418749e-06, "loss": 0.3336, "step": 11365 }, { "epoch": 0.5144150260239874, "grad_norm": 0.6011220664787138, "learning_rate": 5.010260887056461e-06, "loss": 0.3137, "step": 11366 }, { "epoch": 0.5144602851323828, "grad_norm": 0.7794734661688585, "learning_rate": 5.0095279674736985e-06, "loss": 0.2907, "step": 11367 }, { "epoch": 0.5145055442407784, "grad_norm": 0.6761020044403644, "learning_rate": 5.00879504768621e-06, "loss": 0.359, "step": 11368 }, { "epoch": 0.514550803349174, "grad_norm": 0.3171163849441446, "learning_rate": 5.0080621277097415e-06, "loss": 0.4823, "step": 11369 }, { "epoch": 0.5145960624575696, "grad_norm": 0.6345630972622137, "learning_rate": 5.007329207560045e-06, "loss": 0.3481, "step": 11370 }, { "epoch": 0.5146413215659652, "grad_norm": 0.6281598512262935, "learning_rate": 5.006596287252864e-06, "loss": 0.3116, "step": 11371 }, { "epoch": 0.5146865806743607, "grad_norm": 0.5969350370380021, "learning_rate": 5.005863366803949e-06, "loss": 0.3117, "step": 11372 }, { "epoch": 0.5147318397827563, "grad_norm": 0.3021692952870953, "learning_rate": 5.005130446229051e-06, "loss": 0.4937, "step": 11373 }, { "epoch": 0.5147770988911519, "grad_norm": 0.6542062893315373, "learning_rate": 5.004397525543912e-06, "loss": 0.3018, "step": 11374 }, { "epoch": 0.5148223579995475, "grad_norm": 0.6304255438734256, "learning_rate": 5.003664604764287e-06, "loss": 0.3452, "step": 11375 }, { "epoch": 0.5148676171079429, "grad_norm": 0.6261344215797569, "learning_rate": 5.0029316839059185e-06, "loss": 0.3184, "step": 11376 }, { "epoch": 0.5149128762163385, "grad_norm": 0.6652000712960027, "learning_rate": 5.002198762984558e-06, "loss": 0.3365, "step": 11377 }, { "epoch": 0.5149581353247341, "grad_norm": 0.6554706239445697, "learning_rate": 5.001465842015952e-06, "loss": 0.3421, "step": 11378 }, { "epoch": 0.5150033944331297, "grad_norm": 0.6191293900481116, "learning_rate": 5.00073292101585e-06, "loss": 0.3314, "step": 11379 }, { "epoch": 0.5150486535415252, "grad_norm": 0.6021614928686941, "learning_rate": 5e-06, "loss": 0.3071, "step": 11380 }, { "epoch": 0.5150939126499208, "grad_norm": 0.6004614620135654, "learning_rate": 4.999267078984151e-06, "loss": 0.3469, "step": 11381 }, { "epoch": 0.5151391717583164, "grad_norm": 0.6382424917561594, "learning_rate": 4.9985341579840505e-06, "loss": 0.2796, "step": 11382 }, { "epoch": 0.515184430866712, "grad_norm": 0.3659738748625842, "learning_rate": 4.997801237015443e-06, "loss": 0.5161, "step": 11383 }, { "epoch": 0.5152296899751075, "grad_norm": 0.3201373508939831, "learning_rate": 4.997068316094082e-06, "loss": 0.4659, "step": 11384 }, { "epoch": 0.515274949083503, "grad_norm": 0.6206303491788823, "learning_rate": 4.996335395235715e-06, "loss": 0.3298, "step": 11385 }, { "epoch": 0.5153202081918986, "grad_norm": 0.27895766028848795, "learning_rate": 4.9956024744560895e-06, "loss": 0.4794, "step": 11386 }, { "epoch": 0.5153654673002942, "grad_norm": 0.679592436620248, "learning_rate": 4.994869553770951e-06, "loss": 0.3421, "step": 11387 }, { "epoch": 0.5154107264086898, "grad_norm": 0.28995181663669395, "learning_rate": 4.99413663319605e-06, "loss": 0.4615, "step": 11388 }, { "epoch": 0.5154559855170853, "grad_norm": 0.6250008664608733, "learning_rate": 4.9934037127471375e-06, "loss": 0.3421, "step": 11389 }, { "epoch": 0.5155012446254809, "grad_norm": 0.6482942267558705, "learning_rate": 4.992670792439958e-06, "loss": 0.3616, "step": 11390 }, { "epoch": 0.5155465037338764, "grad_norm": 0.6523165173209333, "learning_rate": 4.9919378722902585e-06, "loss": 0.366, "step": 11391 }, { "epoch": 0.515591762842272, "grad_norm": 0.6858262611847109, "learning_rate": 4.991204952313793e-06, "loss": 0.3269, "step": 11392 }, { "epoch": 0.5156370219506675, "grad_norm": 0.6718800704422592, "learning_rate": 4.990472032526302e-06, "loss": 0.3338, "step": 11393 }, { "epoch": 0.5156822810590631, "grad_norm": 0.6642519794348746, "learning_rate": 4.98973911294354e-06, "loss": 0.3295, "step": 11394 }, { "epoch": 0.5157275401674587, "grad_norm": 0.9539961494847039, "learning_rate": 4.989006193581254e-06, "loss": 0.3274, "step": 11395 }, { "epoch": 0.5157727992758543, "grad_norm": 0.7514131009659355, "learning_rate": 4.98827327445519e-06, "loss": 0.3493, "step": 11396 }, { "epoch": 0.5158180583842499, "grad_norm": 0.5885314984600968, "learning_rate": 4.987540355581095e-06, "loss": 0.303, "step": 11397 }, { "epoch": 0.5158633174926454, "grad_norm": 0.6082433375446966, "learning_rate": 4.986807436974723e-06, "loss": 0.3593, "step": 11398 }, { "epoch": 0.515908576601041, "grad_norm": 0.6451130905415389, "learning_rate": 4.986074518651817e-06, "loss": 0.3339, "step": 11399 }, { "epoch": 0.5159538357094365, "grad_norm": 0.5772202526007606, "learning_rate": 4.985341600628127e-06, "loss": 0.3196, "step": 11400 }, { "epoch": 0.5159990948178321, "grad_norm": 0.5913749203713625, "learning_rate": 4.984608682919402e-06, "loss": 0.3233, "step": 11401 }, { "epoch": 0.5160443539262276, "grad_norm": 0.3501031557695471, "learning_rate": 4.983875765541389e-06, "loss": 0.4645, "step": 11402 }, { "epoch": 0.5160896130346232, "grad_norm": 0.6909893434416505, "learning_rate": 4.9831428485098336e-06, "loss": 0.2862, "step": 11403 }, { "epoch": 0.5161348721430188, "grad_norm": 0.6190287034727685, "learning_rate": 4.982409931840489e-06, "loss": 0.2947, "step": 11404 }, { "epoch": 0.5161801312514144, "grad_norm": 0.608323485565074, "learning_rate": 4.981677015549101e-06, "loss": 0.3384, "step": 11405 }, { "epoch": 0.51622539035981, "grad_norm": 0.2996223595119399, "learning_rate": 4.9809440996514175e-06, "loss": 0.4672, "step": 11406 }, { "epoch": 0.5162706494682054, "grad_norm": 0.6549238199747524, "learning_rate": 4.980211184163185e-06, "loss": 0.4021, "step": 11407 }, { "epoch": 0.516315908576601, "grad_norm": 0.545305893167641, "learning_rate": 4.979478269100156e-06, "loss": 0.301, "step": 11408 }, { "epoch": 0.5163611676849966, "grad_norm": 0.5946659894475338, "learning_rate": 4.978745354478074e-06, "loss": 0.3317, "step": 11409 }, { "epoch": 0.5164064267933922, "grad_norm": 0.34696223925319947, "learning_rate": 4.97801244031269e-06, "loss": 0.5026, "step": 11410 }, { "epoch": 0.5164516859017877, "grad_norm": 0.5886503604054116, "learning_rate": 4.977279526619752e-06, "loss": 0.3588, "step": 11411 }, { "epoch": 0.5164969450101833, "grad_norm": 0.6532625865775712, "learning_rate": 4.976546613415005e-06, "loss": 0.3335, "step": 11412 }, { "epoch": 0.5165422041185789, "grad_norm": 0.5755714762624607, "learning_rate": 4.9758137007141996e-06, "loss": 0.3029, "step": 11413 }, { "epoch": 0.5165874632269745, "grad_norm": 0.6314543801653296, "learning_rate": 4.975080788533086e-06, "loss": 0.2644, "step": 11414 }, { "epoch": 0.5166327223353699, "grad_norm": 0.6535819873033479, "learning_rate": 4.974347876887408e-06, "loss": 0.3214, "step": 11415 }, { "epoch": 0.5166779814437655, "grad_norm": 0.6748569895089654, "learning_rate": 4.9736149657929136e-06, "loss": 0.3325, "step": 11416 }, { "epoch": 0.5167232405521611, "grad_norm": 0.5890510812062223, "learning_rate": 4.972882055265354e-06, "loss": 0.2784, "step": 11417 }, { "epoch": 0.5167684996605567, "grad_norm": 0.669667646454698, "learning_rate": 4.9721491453204775e-06, "loss": 0.3645, "step": 11418 }, { "epoch": 0.5168137587689523, "grad_norm": 0.6469370141276698, "learning_rate": 4.971416235974029e-06, "loss": 0.356, "step": 11419 }, { "epoch": 0.5168590178773478, "grad_norm": 0.28727861727393994, "learning_rate": 4.970683327241756e-06, "loss": 0.4433, "step": 11420 }, { "epoch": 0.5169042769857434, "grad_norm": 0.5721570053047997, "learning_rate": 4.969950419139412e-06, "loss": 0.3127, "step": 11421 }, { "epoch": 0.516949536094139, "grad_norm": 0.6452400902352549, "learning_rate": 4.969217511682738e-06, "loss": 0.2943, "step": 11422 }, { "epoch": 0.5169947952025346, "grad_norm": 0.6123018177339106, "learning_rate": 4.968484604887486e-06, "loss": 0.3301, "step": 11423 }, { "epoch": 0.51704005431093, "grad_norm": 0.6869000275438506, "learning_rate": 4.967751698769404e-06, "loss": 0.3226, "step": 11424 }, { "epoch": 0.5170853134193256, "grad_norm": 0.33237830928716483, "learning_rate": 4.967018793344238e-06, "loss": 0.4798, "step": 11425 }, { "epoch": 0.5171305725277212, "grad_norm": 0.6681644421207563, "learning_rate": 4.966285888627737e-06, "loss": 0.3272, "step": 11426 }, { "epoch": 0.5171758316361168, "grad_norm": 0.35537307589594674, "learning_rate": 4.965552984635649e-06, "loss": 0.4678, "step": 11427 }, { "epoch": 0.5172210907445123, "grad_norm": 0.5954443500581541, "learning_rate": 4.964820081383721e-06, "loss": 0.3503, "step": 11428 }, { "epoch": 0.5172663498529079, "grad_norm": 1.2948284935381316, "learning_rate": 4.964087178887702e-06, "loss": 0.3322, "step": 11429 }, { "epoch": 0.5173116089613035, "grad_norm": 0.6466216348525649, "learning_rate": 4.9633542771633374e-06, "loss": 0.3744, "step": 11430 }, { "epoch": 0.517356868069699, "grad_norm": 0.28981058998753617, "learning_rate": 4.96262137622638e-06, "loss": 0.4401, "step": 11431 }, { "epoch": 0.5174021271780946, "grad_norm": 0.6355869243233664, "learning_rate": 4.961888476092572e-06, "loss": 0.3396, "step": 11432 }, { "epoch": 0.5174473862864901, "grad_norm": 0.669479187452769, "learning_rate": 4.961155576777665e-06, "loss": 0.3626, "step": 11433 }, { "epoch": 0.5174926453948857, "grad_norm": 0.2818465609687323, "learning_rate": 4.960422678297405e-06, "loss": 0.4604, "step": 11434 }, { "epoch": 0.5175379045032813, "grad_norm": 0.643082002501434, "learning_rate": 4.959689780667541e-06, "loss": 0.3208, "step": 11435 }, { "epoch": 0.5175831636116769, "grad_norm": 0.6454671244405962, "learning_rate": 4.958956883903816e-06, "loss": 0.3216, "step": 11436 }, { "epoch": 0.5176284227200724, "grad_norm": 0.6115329967832458, "learning_rate": 4.958223988021986e-06, "loss": 0.2993, "step": 11437 }, { "epoch": 0.517673681828468, "grad_norm": 0.6792218491410067, "learning_rate": 4.957491093037792e-06, "loss": 0.3235, "step": 11438 }, { "epoch": 0.5177189409368635, "grad_norm": 0.6469072805006693, "learning_rate": 4.9567581989669846e-06, "loss": 0.3097, "step": 11439 }, { "epoch": 0.5177642000452591, "grad_norm": 0.5611885638407211, "learning_rate": 4.956025305825311e-06, "loss": 0.332, "step": 11440 }, { "epoch": 0.5178094591536547, "grad_norm": 0.646053881191501, "learning_rate": 4.955292413628517e-06, "loss": 0.3216, "step": 11441 }, { "epoch": 0.5178547182620502, "grad_norm": 0.6438452394996295, "learning_rate": 4.954559522392353e-06, "loss": 0.3362, "step": 11442 }, { "epoch": 0.5178999773704458, "grad_norm": 0.5463017390785394, "learning_rate": 4.953826632132565e-06, "loss": 0.3116, "step": 11443 }, { "epoch": 0.5179452364788414, "grad_norm": 0.6006738174475235, "learning_rate": 4.953093742864901e-06, "loss": 0.3027, "step": 11444 }, { "epoch": 0.517990495587237, "grad_norm": 0.6338506106940345, "learning_rate": 4.952360854605107e-06, "loss": 0.3432, "step": 11445 }, { "epoch": 0.5180357546956325, "grad_norm": 0.30322297477801935, "learning_rate": 4.9516279673689325e-06, "loss": 0.4782, "step": 11446 }, { "epoch": 0.518081013804028, "grad_norm": 0.2954691434464442, "learning_rate": 4.950895081172126e-06, "loss": 0.4573, "step": 11447 }, { "epoch": 0.5181262729124236, "grad_norm": 0.60676611158701, "learning_rate": 4.950162196030432e-06, "loss": 0.3375, "step": 11448 }, { "epoch": 0.5181715320208192, "grad_norm": 0.6813301058139706, "learning_rate": 4.949429311959599e-06, "loss": 0.3341, "step": 11449 }, { "epoch": 0.5182167911292147, "grad_norm": 0.6132195451458272, "learning_rate": 4.948696428975378e-06, "loss": 0.3329, "step": 11450 }, { "epoch": 0.5182620502376103, "grad_norm": 0.3179752179157839, "learning_rate": 4.94796354709351e-06, "loss": 0.4948, "step": 11451 }, { "epoch": 0.5183073093460059, "grad_norm": 0.6149764342268045, "learning_rate": 4.947230666329746e-06, "loss": 0.307, "step": 11452 }, { "epoch": 0.5183525684544015, "grad_norm": 0.6185752635572527, "learning_rate": 4.946497786699834e-06, "loss": 0.3167, "step": 11453 }, { "epoch": 0.5183978275627971, "grad_norm": 0.6235007636911877, "learning_rate": 4.945764908219518e-06, "loss": 0.3347, "step": 11454 }, { "epoch": 0.5184430866711925, "grad_norm": 0.5803562587200566, "learning_rate": 4.945032030904549e-06, "loss": 0.3323, "step": 11455 }, { "epoch": 0.5184883457795881, "grad_norm": 0.30145818078884856, "learning_rate": 4.944299154770673e-06, "loss": 0.4791, "step": 11456 }, { "epoch": 0.5185336048879837, "grad_norm": 0.5993805071540821, "learning_rate": 4.943566279833637e-06, "loss": 0.3044, "step": 11457 }, { "epoch": 0.5185788639963793, "grad_norm": 0.7045733996670277, "learning_rate": 4.942833406109188e-06, "loss": 0.334, "step": 11458 }, { "epoch": 0.5186241231047748, "grad_norm": 0.7412287485525683, "learning_rate": 4.942100533613073e-06, "loss": 0.3033, "step": 11459 }, { "epoch": 0.5186693822131704, "grad_norm": 0.6682742478151386, "learning_rate": 4.9413676623610415e-06, "loss": 0.3015, "step": 11460 }, { "epoch": 0.518714641321566, "grad_norm": 0.6134757832433286, "learning_rate": 4.940634792368838e-06, "loss": 0.3295, "step": 11461 }, { "epoch": 0.5187599004299616, "grad_norm": 0.5984181963560531, "learning_rate": 4.93990192365221e-06, "loss": 0.3094, "step": 11462 }, { "epoch": 0.518805159538357, "grad_norm": 0.2826570298747315, "learning_rate": 4.939169056226905e-06, "loss": 0.4776, "step": 11463 }, { "epoch": 0.5188504186467526, "grad_norm": 0.6281200564906452, "learning_rate": 4.93843619010867e-06, "loss": 0.3114, "step": 11464 }, { "epoch": 0.5188956777551482, "grad_norm": 0.5984727108969583, "learning_rate": 4.9377033253132525e-06, "loss": 0.3136, "step": 11465 }, { "epoch": 0.5189409368635438, "grad_norm": 0.2765346528439434, "learning_rate": 4.936970461856401e-06, "loss": 0.4897, "step": 11466 }, { "epoch": 0.5189861959719394, "grad_norm": 0.5694173173865967, "learning_rate": 4.9362375997538585e-06, "loss": 0.2882, "step": 11467 }, { "epoch": 0.5190314550803349, "grad_norm": 0.6698294625395641, "learning_rate": 4.935504739021373e-06, "loss": 0.33, "step": 11468 }, { "epoch": 0.5190767141887305, "grad_norm": 0.6551708984371104, "learning_rate": 4.934771879674697e-06, "loss": 0.3066, "step": 11469 }, { "epoch": 0.5191219732971261, "grad_norm": 0.6106802527555026, "learning_rate": 4.9340390217295695e-06, "loss": 0.3236, "step": 11470 }, { "epoch": 0.5191672324055217, "grad_norm": 0.6727567886294621, "learning_rate": 4.933306165201741e-06, "loss": 0.359, "step": 11471 }, { "epoch": 0.5192124915139171, "grad_norm": 0.6107305453605221, "learning_rate": 4.93257331010696e-06, "loss": 0.3012, "step": 11472 }, { "epoch": 0.5192577506223127, "grad_norm": 0.6547948104154359, "learning_rate": 4.93184045646097e-06, "loss": 0.2876, "step": 11473 }, { "epoch": 0.5193030097307083, "grad_norm": 0.7767733500126526, "learning_rate": 4.9311076042795185e-06, "loss": 0.401, "step": 11474 }, { "epoch": 0.5193482688391039, "grad_norm": 0.6586598931865223, "learning_rate": 4.9303747535783546e-06, "loss": 0.3949, "step": 11475 }, { "epoch": 0.5193935279474995, "grad_norm": 0.6023598627621121, "learning_rate": 4.929641904373224e-06, "loss": 0.3286, "step": 11476 }, { "epoch": 0.519438787055895, "grad_norm": 0.6771772438763664, "learning_rate": 4.928909056679871e-06, "loss": 0.3372, "step": 11477 }, { "epoch": 0.5194840461642906, "grad_norm": 0.6669304323314843, "learning_rate": 4.9281762105140435e-06, "loss": 0.3104, "step": 11478 }, { "epoch": 0.5195293052726861, "grad_norm": 0.5836886978307488, "learning_rate": 4.927443365891491e-06, "loss": 0.3041, "step": 11479 }, { "epoch": 0.5195745643810817, "grad_norm": 0.2876301646577038, "learning_rate": 4.926710522827956e-06, "loss": 0.4568, "step": 11480 }, { "epoch": 0.5196198234894772, "grad_norm": 0.6165277494595637, "learning_rate": 4.925977681339187e-06, "loss": 0.3551, "step": 11481 }, { "epoch": 0.5196650825978728, "grad_norm": 0.27093643070250045, "learning_rate": 4.925244841440932e-06, "loss": 0.4685, "step": 11482 }, { "epoch": 0.5197103417062684, "grad_norm": 0.6405904609560221, "learning_rate": 4.924512003148934e-06, "loss": 0.3018, "step": 11483 }, { "epoch": 0.519755600814664, "grad_norm": 0.9864860414307957, "learning_rate": 4.923779166478941e-06, "loss": 0.3549, "step": 11484 }, { "epoch": 0.5198008599230595, "grad_norm": 0.6053341823481457, "learning_rate": 4.923046331446701e-06, "loss": 0.3307, "step": 11485 }, { "epoch": 0.5198461190314551, "grad_norm": 0.687499422803569, "learning_rate": 4.922313498067957e-06, "loss": 0.4052, "step": 11486 }, { "epoch": 0.5198913781398506, "grad_norm": 0.30206540097624307, "learning_rate": 4.921580666358459e-06, "loss": 0.5052, "step": 11487 }, { "epoch": 0.5199366372482462, "grad_norm": 0.637064127625573, "learning_rate": 4.92084783633395e-06, "loss": 0.2951, "step": 11488 }, { "epoch": 0.5199818963566418, "grad_norm": 0.6493410177667419, "learning_rate": 4.92011500801018e-06, "loss": 0.3268, "step": 11489 }, { "epoch": 0.5200271554650373, "grad_norm": 0.2834604949324538, "learning_rate": 4.919382181402892e-06, "loss": 0.4844, "step": 11490 }, { "epoch": 0.5200724145734329, "grad_norm": 1.102465837368673, "learning_rate": 4.918649356527833e-06, "loss": 0.4206, "step": 11491 }, { "epoch": 0.5201176736818285, "grad_norm": 0.32046127818036063, "learning_rate": 4.917916533400751e-06, "loss": 0.4756, "step": 11492 }, { "epoch": 0.5201629327902241, "grad_norm": 0.7355754043795567, "learning_rate": 4.917183712037389e-06, "loss": 0.3912, "step": 11493 }, { "epoch": 0.5202081918986196, "grad_norm": 0.638127357125275, "learning_rate": 4.916450892453495e-06, "loss": 0.3091, "step": 11494 }, { "epoch": 0.5202534510070151, "grad_norm": 0.6315955219941163, "learning_rate": 4.915718074664816e-06, "loss": 0.3264, "step": 11495 }, { "epoch": 0.5202987101154107, "grad_norm": 0.7076413970554138, "learning_rate": 4.914985258687096e-06, "loss": 0.2954, "step": 11496 }, { "epoch": 0.5203439692238063, "grad_norm": 0.27708411098994, "learning_rate": 4.91425244453608e-06, "loss": 0.4464, "step": 11497 }, { "epoch": 0.5203892283322018, "grad_norm": 0.31059135604941385, "learning_rate": 4.9135196322275195e-06, "loss": 0.4828, "step": 11498 }, { "epoch": 0.5204344874405974, "grad_norm": 0.6448738124679796, "learning_rate": 4.912786821777152e-06, "loss": 0.3207, "step": 11499 }, { "epoch": 0.520479746548993, "grad_norm": 0.6242971855514119, "learning_rate": 4.912054013200731e-06, "loss": 0.3071, "step": 11500 }, { "epoch": 0.5205250056573886, "grad_norm": 0.5925596570301539, "learning_rate": 4.911321206513996e-06, "loss": 0.32, "step": 11501 }, { "epoch": 0.5205702647657842, "grad_norm": 0.5976703369179355, "learning_rate": 4.9105884017327e-06, "loss": 0.3359, "step": 11502 }, { "epoch": 0.5206155238741796, "grad_norm": 0.6275887504151985, "learning_rate": 4.9098555988725814e-06, "loss": 0.3594, "step": 11503 }, { "epoch": 0.5206607829825752, "grad_norm": 0.6165560172448781, "learning_rate": 4.909122797949391e-06, "loss": 0.3609, "step": 11504 }, { "epoch": 0.5207060420909708, "grad_norm": 0.5923730829115844, "learning_rate": 4.908389998978872e-06, "loss": 0.2873, "step": 11505 }, { "epoch": 0.5207513011993664, "grad_norm": 0.625363493919042, "learning_rate": 4.90765720197677e-06, "loss": 0.3318, "step": 11506 }, { "epoch": 0.5207965603077619, "grad_norm": 0.6421525850337274, "learning_rate": 4.9069244069588305e-06, "loss": 0.3188, "step": 11507 }, { "epoch": 0.5208418194161575, "grad_norm": 0.6784976499251623, "learning_rate": 4.906191613940802e-06, "loss": 0.2584, "step": 11508 }, { "epoch": 0.5208870785245531, "grad_norm": 0.5921940899188074, "learning_rate": 4.905458822938426e-06, "loss": 0.3429, "step": 11509 }, { "epoch": 0.5209323376329487, "grad_norm": 0.3467998770991752, "learning_rate": 4.904726033967449e-06, "loss": 0.4917, "step": 11510 }, { "epoch": 0.5209775967413443, "grad_norm": 0.6696798074266498, "learning_rate": 4.903993247043619e-06, "loss": 0.2988, "step": 11511 }, { "epoch": 0.5210228558497397, "grad_norm": 0.6357144820286588, "learning_rate": 4.903260462182679e-06, "loss": 0.3349, "step": 11512 }, { "epoch": 0.5210681149581353, "grad_norm": 0.2981795596572952, "learning_rate": 4.9025276794003715e-06, "loss": 0.4921, "step": 11513 }, { "epoch": 0.5211133740665309, "grad_norm": 0.2738371351530118, "learning_rate": 4.901794898712448e-06, "loss": 0.4528, "step": 11514 }, { "epoch": 0.5211586331749265, "grad_norm": 0.6725567010375184, "learning_rate": 4.901062120134649e-06, "loss": 0.3219, "step": 11515 }, { "epoch": 0.521203892283322, "grad_norm": 0.652312178444958, "learning_rate": 4.900329343682722e-06, "loss": 0.3185, "step": 11516 }, { "epoch": 0.5212491513917176, "grad_norm": 0.30183426080952397, "learning_rate": 4.899596569372409e-06, "loss": 0.5009, "step": 11517 }, { "epoch": 0.5212944105001132, "grad_norm": 0.6150731244812301, "learning_rate": 4.898863797219461e-06, "loss": 0.3348, "step": 11518 }, { "epoch": 0.5213396696085087, "grad_norm": 0.6168255764535435, "learning_rate": 4.898131027239617e-06, "loss": 0.3102, "step": 11519 }, { "epoch": 0.5213849287169042, "grad_norm": 0.6182232484985482, "learning_rate": 4.897398259448625e-06, "loss": 0.3573, "step": 11520 }, { "epoch": 0.5214301878252998, "grad_norm": 0.6503476877876679, "learning_rate": 4.89666549386223e-06, "loss": 0.3264, "step": 11521 }, { "epoch": 0.5214754469336954, "grad_norm": 0.6180965256332858, "learning_rate": 4.895932730496174e-06, "loss": 0.3065, "step": 11522 }, { "epoch": 0.521520706042091, "grad_norm": 0.6210216574725677, "learning_rate": 4.895199969366206e-06, "loss": 0.3076, "step": 11523 }, { "epoch": 0.5215659651504866, "grad_norm": 0.35488493465928805, "learning_rate": 4.894467210488069e-06, "loss": 0.498, "step": 11524 }, { "epoch": 0.5216112242588821, "grad_norm": 0.5825552011467424, "learning_rate": 4.893734453877506e-06, "loss": 0.3155, "step": 11525 }, { "epoch": 0.5216564833672777, "grad_norm": 0.723602606465192, "learning_rate": 4.893001699550263e-06, "loss": 0.325, "step": 11526 }, { "epoch": 0.5217017424756732, "grad_norm": 0.6108827405279276, "learning_rate": 4.892268947522088e-06, "loss": 0.3568, "step": 11527 }, { "epoch": 0.5217470015840688, "grad_norm": 0.6114150637030855, "learning_rate": 4.891536197808719e-06, "loss": 0.2834, "step": 11528 }, { "epoch": 0.5217922606924643, "grad_norm": 0.6360114618316366, "learning_rate": 4.890803450425905e-06, "loss": 0.3451, "step": 11529 }, { "epoch": 0.5218375198008599, "grad_norm": 0.6545738871505882, "learning_rate": 4.890070705389388e-06, "loss": 0.3402, "step": 11530 }, { "epoch": 0.5218827789092555, "grad_norm": 0.6206975055805826, "learning_rate": 4.889337962714918e-06, "loss": 0.337, "step": 11531 }, { "epoch": 0.5219280380176511, "grad_norm": 0.7917530209205013, "learning_rate": 4.888605222418232e-06, "loss": 0.3216, "step": 11532 }, { "epoch": 0.5219732971260466, "grad_norm": 1.048978913657367, "learning_rate": 4.887872484515078e-06, "loss": 0.2912, "step": 11533 }, { "epoch": 0.5220185562344422, "grad_norm": 0.6780769881876405, "learning_rate": 4.8871397490212015e-06, "loss": 0.3535, "step": 11534 }, { "epoch": 0.5220638153428377, "grad_norm": 0.33566560310405, "learning_rate": 4.886407015952344e-06, "loss": 0.4988, "step": 11535 }, { "epoch": 0.5221090744512333, "grad_norm": 0.33959867580729625, "learning_rate": 4.8856742853242504e-06, "loss": 0.4726, "step": 11536 }, { "epoch": 0.5221543335596289, "grad_norm": 0.6735425814731475, "learning_rate": 4.884941557152666e-06, "loss": 0.2987, "step": 11537 }, { "epoch": 0.5221995926680244, "grad_norm": 0.654398883844253, "learning_rate": 4.884208831453335e-06, "loss": 0.2959, "step": 11538 }, { "epoch": 0.52224485177642, "grad_norm": 0.28491726359378344, "learning_rate": 4.883476108241999e-06, "loss": 0.4815, "step": 11539 }, { "epoch": 0.5222901108848156, "grad_norm": 0.632877892939454, "learning_rate": 4.882743387534406e-06, "loss": 0.383, "step": 11540 }, { "epoch": 0.5223353699932112, "grad_norm": 0.678673092334952, "learning_rate": 4.882010669346294e-06, "loss": 0.3215, "step": 11541 }, { "epoch": 0.5223806291016067, "grad_norm": 0.5418262072495581, "learning_rate": 4.881277953693412e-06, "loss": 0.2891, "step": 11542 }, { "epoch": 0.5224258882100022, "grad_norm": 0.6492432998704616, "learning_rate": 4.8805452405915025e-06, "loss": 0.3584, "step": 11543 }, { "epoch": 0.5224711473183978, "grad_norm": 0.6027133643001226, "learning_rate": 4.879812530056309e-06, "loss": 0.3324, "step": 11544 }, { "epoch": 0.5225164064267934, "grad_norm": 0.37705179015063844, "learning_rate": 4.879079822103575e-06, "loss": 0.4705, "step": 11545 }, { "epoch": 0.5225616655351889, "grad_norm": 0.6388994946240203, "learning_rate": 4.878347116749042e-06, "loss": 0.2968, "step": 11546 }, { "epoch": 0.5226069246435845, "grad_norm": 1.0835052752581886, "learning_rate": 4.877614414008459e-06, "loss": 0.2954, "step": 11547 }, { "epoch": 0.5226521837519801, "grad_norm": 0.6052171996998544, "learning_rate": 4.876881713897565e-06, "loss": 0.3191, "step": 11548 }, { "epoch": 0.5226974428603757, "grad_norm": 0.2959331868748914, "learning_rate": 4.876149016432104e-06, "loss": 0.4955, "step": 11549 }, { "epoch": 0.5227427019687713, "grad_norm": 0.6744346120219048, "learning_rate": 4.875416321627823e-06, "loss": 0.3521, "step": 11550 }, { "epoch": 0.5227879610771667, "grad_norm": 0.6485308645284827, "learning_rate": 4.87468362950046e-06, "loss": 0.3312, "step": 11551 }, { "epoch": 0.5228332201855623, "grad_norm": 0.6479885264159125, "learning_rate": 4.873950940065762e-06, "loss": 0.327, "step": 11552 }, { "epoch": 0.5228784792939579, "grad_norm": 0.6287323547050803, "learning_rate": 4.8732182533394716e-06, "loss": 0.3253, "step": 11553 }, { "epoch": 0.5229237384023535, "grad_norm": 0.5957143013642198, "learning_rate": 4.87248556933733e-06, "loss": 0.3297, "step": 11554 }, { "epoch": 0.522968997510749, "grad_norm": 0.7821061377676696, "learning_rate": 4.871752888075082e-06, "loss": 0.3196, "step": 11555 }, { "epoch": 0.5230142566191446, "grad_norm": 0.5824779569512576, "learning_rate": 4.871020209568473e-06, "loss": 0.2934, "step": 11556 }, { "epoch": 0.5230595157275402, "grad_norm": 0.6703735640538666, "learning_rate": 4.870287533833241e-06, "loss": 0.3042, "step": 11557 }, { "epoch": 0.5231047748359358, "grad_norm": 0.6302364020589345, "learning_rate": 4.8695548608851326e-06, "loss": 0.3039, "step": 11558 }, { "epoch": 0.5231500339443313, "grad_norm": 0.3537950301088894, "learning_rate": 4.868822190739888e-06, "loss": 0.4762, "step": 11559 }, { "epoch": 0.5231952930527268, "grad_norm": 0.32810713616438986, "learning_rate": 4.868089523413255e-06, "loss": 0.4747, "step": 11560 }, { "epoch": 0.5232405521611224, "grad_norm": 0.6764435138048115, "learning_rate": 4.86735685892097e-06, "loss": 0.3298, "step": 11561 }, { "epoch": 0.523285811269518, "grad_norm": 0.665599259745116, "learning_rate": 4.8666241972787794e-06, "loss": 0.2964, "step": 11562 }, { "epoch": 0.5233310703779136, "grad_norm": 0.29847661924202107, "learning_rate": 4.865891538502427e-06, "loss": 0.4723, "step": 11563 }, { "epoch": 0.5233763294863091, "grad_norm": 0.7256024754638591, "learning_rate": 4.8651588826076514e-06, "loss": 0.3278, "step": 11564 }, { "epoch": 0.5234215885947047, "grad_norm": 0.633130441389736, "learning_rate": 4.864426229610197e-06, "loss": 0.3186, "step": 11565 }, { "epoch": 0.5234668477031003, "grad_norm": 0.7023084421439088, "learning_rate": 4.863693579525809e-06, "loss": 0.2952, "step": 11566 }, { "epoch": 0.5235121068114958, "grad_norm": 0.32772249762550243, "learning_rate": 4.862960932370225e-06, "loss": 0.4613, "step": 11567 }, { "epoch": 0.5235573659198913, "grad_norm": 0.481162743463051, "learning_rate": 4.862228288159191e-06, "loss": 0.4735, "step": 11568 }, { "epoch": 0.5236026250282869, "grad_norm": 0.5770663986524796, "learning_rate": 4.861495646908448e-06, "loss": 0.3308, "step": 11569 }, { "epoch": 0.5236478841366825, "grad_norm": 0.7454149959163855, "learning_rate": 4.860763008633736e-06, "loss": 0.3551, "step": 11570 }, { "epoch": 0.5236931432450781, "grad_norm": 1.1015416066071197, "learning_rate": 4.860030373350801e-06, "loss": 0.3473, "step": 11571 }, { "epoch": 0.5237384023534737, "grad_norm": 0.6270488120483275, "learning_rate": 4.859297741075384e-06, "loss": 0.3224, "step": 11572 }, { "epoch": 0.5237836614618692, "grad_norm": 0.6364875464470632, "learning_rate": 4.858565111823226e-06, "loss": 0.3071, "step": 11573 }, { "epoch": 0.5238289205702648, "grad_norm": 0.6294535199966994, "learning_rate": 4.857832485610068e-06, "loss": 0.3626, "step": 11574 }, { "epoch": 0.5238741796786603, "grad_norm": 0.6204239645520809, "learning_rate": 4.857099862451654e-06, "loss": 0.3054, "step": 11575 }, { "epoch": 0.5239194387870559, "grad_norm": 0.6299344433260511, "learning_rate": 4.856367242363727e-06, "loss": 0.3272, "step": 11576 }, { "epoch": 0.5239646978954514, "grad_norm": 0.7757501561103395, "learning_rate": 4.8556346253620256e-06, "loss": 0.3414, "step": 11577 }, { "epoch": 0.524009957003847, "grad_norm": 0.6331417772382716, "learning_rate": 4.854902011462291e-06, "loss": 0.3632, "step": 11578 }, { "epoch": 0.5240552161122426, "grad_norm": 0.5749494648969943, "learning_rate": 4.85416940068027e-06, "loss": 0.289, "step": 11579 }, { "epoch": 0.5241004752206382, "grad_norm": 0.6052192024100661, "learning_rate": 4.853436793031698e-06, "loss": 0.2715, "step": 11580 }, { "epoch": 0.5241457343290337, "grad_norm": 0.6163087970541375, "learning_rate": 4.852704188532319e-06, "loss": 0.3065, "step": 11581 }, { "epoch": 0.5241909934374293, "grad_norm": 0.650723415979576, "learning_rate": 4.851971587197877e-06, "loss": 0.3127, "step": 11582 }, { "epoch": 0.5242362525458248, "grad_norm": 0.694249073684704, "learning_rate": 4.8512389890441085e-06, "loss": 0.3626, "step": 11583 }, { "epoch": 0.5242815116542204, "grad_norm": 0.6401599007480614, "learning_rate": 4.850506394086758e-06, "loss": 0.2904, "step": 11584 }, { "epoch": 0.524326770762616, "grad_norm": 0.6155923503742697, "learning_rate": 4.849773802341567e-06, "loss": 0.3397, "step": 11585 }, { "epoch": 0.5243720298710115, "grad_norm": 0.5400596329824081, "learning_rate": 4.849041213824274e-06, "loss": 0.4777, "step": 11586 }, { "epoch": 0.5244172889794071, "grad_norm": 0.6594264393188617, "learning_rate": 4.8483086285506224e-06, "loss": 0.3501, "step": 11587 }, { "epoch": 0.5244625480878027, "grad_norm": 0.5901626896745396, "learning_rate": 4.847576046536351e-06, "loss": 0.2842, "step": 11588 }, { "epoch": 0.5245078071961983, "grad_norm": 0.313815295453429, "learning_rate": 4.8468434677972055e-06, "loss": 0.467, "step": 11589 }, { "epoch": 0.5245530663045938, "grad_norm": 0.6058793660821811, "learning_rate": 4.846110892348921e-06, "loss": 0.3314, "step": 11590 }, { "epoch": 0.5245983254129893, "grad_norm": 0.6104563403943248, "learning_rate": 4.845378320207241e-06, "loss": 0.3176, "step": 11591 }, { "epoch": 0.5246435845213849, "grad_norm": 0.6523091636477124, "learning_rate": 4.844645751387908e-06, "loss": 0.2999, "step": 11592 }, { "epoch": 0.5246888436297805, "grad_norm": 0.7426514955082142, "learning_rate": 4.843913185906658e-06, "loss": 0.3172, "step": 11593 }, { "epoch": 0.5247341027381761, "grad_norm": 0.7036324622303272, "learning_rate": 4.843180623779235e-06, "loss": 0.3273, "step": 11594 }, { "epoch": 0.5247793618465716, "grad_norm": 0.5821863957649411, "learning_rate": 4.84244806502138e-06, "loss": 0.311, "step": 11595 }, { "epoch": 0.5248246209549672, "grad_norm": 0.5758372192065354, "learning_rate": 4.8417155096488315e-06, "loss": 0.3352, "step": 11596 }, { "epoch": 0.5248698800633628, "grad_norm": 0.5169097065158664, "learning_rate": 4.84098295767733e-06, "loss": 0.4698, "step": 11597 }, { "epoch": 0.5249151391717584, "grad_norm": 0.6203385669439295, "learning_rate": 4.840250409122617e-06, "loss": 0.3466, "step": 11598 }, { "epoch": 0.5249603982801538, "grad_norm": 0.6227472133526765, "learning_rate": 4.8395178640004316e-06, "loss": 0.2755, "step": 11599 }, { "epoch": 0.5250056573885494, "grad_norm": 0.6781019728826658, "learning_rate": 4.838785322326514e-06, "loss": 0.3559, "step": 11600 }, { "epoch": 0.525050916496945, "grad_norm": 0.6091503037903379, "learning_rate": 4.838052784116606e-06, "loss": 0.3216, "step": 11601 }, { "epoch": 0.5250961756053406, "grad_norm": 0.6534442212691152, "learning_rate": 4.837320249386446e-06, "loss": 0.3209, "step": 11602 }, { "epoch": 0.5251414347137361, "grad_norm": 0.629883307804516, "learning_rate": 4.836587718151773e-06, "loss": 0.3384, "step": 11603 }, { "epoch": 0.5251866938221317, "grad_norm": 0.6153519783719406, "learning_rate": 4.8358551904283285e-06, "loss": 0.34, "step": 11604 }, { "epoch": 0.5252319529305273, "grad_norm": 0.6418449757511626, "learning_rate": 4.835122666231854e-06, "loss": 0.343, "step": 11605 }, { "epoch": 0.5252772120389229, "grad_norm": 0.6674292870560887, "learning_rate": 4.834390145578085e-06, "loss": 0.3172, "step": 11606 }, { "epoch": 0.5253224711473184, "grad_norm": 0.6841831017464953, "learning_rate": 4.833657628482762e-06, "loss": 0.33, "step": 11607 }, { "epoch": 0.5253677302557139, "grad_norm": 0.656399605022193, "learning_rate": 4.832925114961629e-06, "loss": 0.3138, "step": 11608 }, { "epoch": 0.5254129893641095, "grad_norm": 0.6375523783307073, "learning_rate": 4.832192605030419e-06, "loss": 0.3187, "step": 11609 }, { "epoch": 0.5254582484725051, "grad_norm": 0.6306405944594975, "learning_rate": 4.8314600987048755e-06, "loss": 0.3179, "step": 11610 }, { "epoch": 0.5255035075809007, "grad_norm": 0.6388363788630063, "learning_rate": 4.8307275960007385e-06, "loss": 0.3457, "step": 11611 }, { "epoch": 0.5255487666892962, "grad_norm": 0.6314349162576515, "learning_rate": 4.829995096933744e-06, "loss": 0.3226, "step": 11612 }, { "epoch": 0.5255940257976918, "grad_norm": 0.6298986320385729, "learning_rate": 4.829262601519632e-06, "loss": 0.3318, "step": 11613 }, { "epoch": 0.5256392849060874, "grad_norm": 0.5812589402658017, "learning_rate": 4.828530109774143e-06, "loss": 0.3065, "step": 11614 }, { "epoch": 0.525684544014483, "grad_norm": 0.6130135799885693, "learning_rate": 4.827797621713017e-06, "loss": 0.3103, "step": 11615 }, { "epoch": 0.5257298031228784, "grad_norm": 0.6913402064094225, "learning_rate": 4.827065137351989e-06, "loss": 0.3412, "step": 11616 }, { "epoch": 0.525775062231274, "grad_norm": 0.609221764111646, "learning_rate": 4.8263326567068e-06, "loss": 0.3162, "step": 11617 }, { "epoch": 0.5258203213396696, "grad_norm": 0.6355749663590441, "learning_rate": 4.82560017979319e-06, "loss": 0.3289, "step": 11618 }, { "epoch": 0.5258655804480652, "grad_norm": 0.5889558380160951, "learning_rate": 4.824867706626896e-06, "loss": 0.3576, "step": 11619 }, { "epoch": 0.5259108395564608, "grad_norm": 0.6284121917912118, "learning_rate": 4.824135237223657e-06, "loss": 0.3272, "step": 11620 }, { "epoch": 0.5259560986648563, "grad_norm": 0.7364661098326682, "learning_rate": 4.823402771599213e-06, "loss": 0.3105, "step": 11621 }, { "epoch": 0.5260013577732519, "grad_norm": 0.6478860127947099, "learning_rate": 4.8226703097693e-06, "loss": 0.3555, "step": 11622 }, { "epoch": 0.5260466168816474, "grad_norm": 0.6213982022709907, "learning_rate": 4.821937851749656e-06, "loss": 0.3183, "step": 11623 }, { "epoch": 0.526091875990043, "grad_norm": 0.6582634639981945, "learning_rate": 4.8212053975560234e-06, "loss": 0.3429, "step": 11624 }, { "epoch": 0.5261371350984385, "grad_norm": 0.8059204720277006, "learning_rate": 4.820472947204136e-06, "loss": 0.3219, "step": 11625 }, { "epoch": 0.5261823942068341, "grad_norm": 0.6953029754941127, "learning_rate": 4.8197405007097346e-06, "loss": 0.305, "step": 11626 }, { "epoch": 0.5262276533152297, "grad_norm": 0.7495626317789454, "learning_rate": 4.819008058088557e-06, "loss": 0.3335, "step": 11627 }, { "epoch": 0.5262729124236253, "grad_norm": 1.065609757684985, "learning_rate": 4.8182756193563365e-06, "loss": 0.3214, "step": 11628 }, { "epoch": 0.5263181715320209, "grad_norm": 0.6059383855298436, "learning_rate": 4.817543184528817e-06, "loss": 0.3208, "step": 11629 }, { "epoch": 0.5263634306404164, "grad_norm": 0.6914288972480178, "learning_rate": 4.816810753621735e-06, "loss": 0.2865, "step": 11630 }, { "epoch": 0.5264086897488119, "grad_norm": 0.7088506050035354, "learning_rate": 4.816078326650827e-06, "loss": 0.3535, "step": 11631 }, { "epoch": 0.5264539488572075, "grad_norm": 0.6231122942346944, "learning_rate": 4.8153459036318295e-06, "loss": 0.3199, "step": 11632 }, { "epoch": 0.5264992079656031, "grad_norm": 0.7525461765440211, "learning_rate": 4.8146134845804825e-06, "loss": 0.3321, "step": 11633 }, { "epoch": 0.5265444670739986, "grad_norm": 0.6610384539648507, "learning_rate": 4.813881069512523e-06, "loss": 0.3682, "step": 11634 }, { "epoch": 0.5265897261823942, "grad_norm": 0.6021964475738, "learning_rate": 4.813148658443687e-06, "loss": 0.3105, "step": 11635 }, { "epoch": 0.5266349852907898, "grad_norm": 0.7096372561872358, "learning_rate": 4.812416251389711e-06, "loss": 0.3171, "step": 11636 }, { "epoch": 0.5266802443991854, "grad_norm": 0.6117060167385769, "learning_rate": 4.811683848366337e-06, "loss": 0.2988, "step": 11637 }, { "epoch": 0.5267255035075809, "grad_norm": 0.6371498511510651, "learning_rate": 4.810951449389296e-06, "loss": 0.3235, "step": 11638 }, { "epoch": 0.5267707626159764, "grad_norm": 0.42872180050104086, "learning_rate": 4.810219054474328e-06, "loss": 0.5039, "step": 11639 }, { "epoch": 0.526816021724372, "grad_norm": 0.6127103887238807, "learning_rate": 4.809486663637171e-06, "loss": 0.2954, "step": 11640 }, { "epoch": 0.5268612808327676, "grad_norm": 0.6174635844891634, "learning_rate": 4.808754276893561e-06, "loss": 0.3038, "step": 11641 }, { "epoch": 0.5269065399411632, "grad_norm": 0.6389246437029213, "learning_rate": 4.808021894259231e-06, "loss": 0.2961, "step": 11642 }, { "epoch": 0.5269517990495587, "grad_norm": 0.33784446396240825, "learning_rate": 4.807289515749922e-06, "loss": 0.4631, "step": 11643 }, { "epoch": 0.5269970581579543, "grad_norm": 0.6487534965345723, "learning_rate": 4.806557141381372e-06, "loss": 0.3594, "step": 11644 }, { "epoch": 0.5270423172663499, "grad_norm": 0.6231353459848805, "learning_rate": 4.8058247711693125e-06, "loss": 0.3189, "step": 11645 }, { "epoch": 0.5270875763747455, "grad_norm": 0.6998728150085539, "learning_rate": 4.805092405129482e-06, "loss": 0.3131, "step": 11646 }, { "epoch": 0.5271328354831409, "grad_norm": 0.6695260173459374, "learning_rate": 4.8043600432776186e-06, "loss": 0.3058, "step": 11647 }, { "epoch": 0.5271780945915365, "grad_norm": 0.6494845805375873, "learning_rate": 4.803627685629456e-06, "loss": 0.352, "step": 11648 }, { "epoch": 0.5272233536999321, "grad_norm": 0.593318239997085, "learning_rate": 4.802895332200732e-06, "loss": 0.3103, "step": 11649 }, { "epoch": 0.5272686128083277, "grad_norm": 0.6348791995672993, "learning_rate": 4.8021629830071824e-06, "loss": 0.3239, "step": 11650 }, { "epoch": 0.5273138719167232, "grad_norm": 0.6810575830189948, "learning_rate": 4.801430638064541e-06, "loss": 0.361, "step": 11651 }, { "epoch": 0.5273591310251188, "grad_norm": 0.3611467775995699, "learning_rate": 4.800698297388546e-06, "loss": 0.4956, "step": 11652 }, { "epoch": 0.5274043901335144, "grad_norm": 0.7888433588801044, "learning_rate": 4.799965960994934e-06, "loss": 0.3363, "step": 11653 }, { "epoch": 0.52744964924191, "grad_norm": 0.6675214604942749, "learning_rate": 4.799233628899438e-06, "loss": 0.3621, "step": 11654 }, { "epoch": 0.5274949083503055, "grad_norm": 0.6828697170062485, "learning_rate": 4.798501301117795e-06, "loss": 0.2869, "step": 11655 }, { "epoch": 0.527540167458701, "grad_norm": 0.8840805701229352, "learning_rate": 4.79776897766574e-06, "loss": 0.342, "step": 11656 }, { "epoch": 0.5275854265670966, "grad_norm": 0.6454186370060752, "learning_rate": 4.797036658559008e-06, "loss": 0.2812, "step": 11657 }, { "epoch": 0.5276306856754922, "grad_norm": 0.7100634649960617, "learning_rate": 4.796304343813334e-06, "loss": 0.3479, "step": 11658 }, { "epoch": 0.5276759447838878, "grad_norm": 0.6197573068724485, "learning_rate": 4.795572033444456e-06, "loss": 0.3524, "step": 11659 }, { "epoch": 0.5277212038922833, "grad_norm": 0.9175159401326038, "learning_rate": 4.794839727468107e-06, "loss": 0.3208, "step": 11660 }, { "epoch": 0.5277664630006789, "grad_norm": 0.6546605773311394, "learning_rate": 4.7941074259000205e-06, "loss": 0.359, "step": 11661 }, { "epoch": 0.5278117221090745, "grad_norm": 0.6532201480708522, "learning_rate": 4.793375128755934e-06, "loss": 0.3707, "step": 11662 }, { "epoch": 0.52785698121747, "grad_norm": 0.6959393775248613, "learning_rate": 4.792642836051582e-06, "loss": 0.3373, "step": 11663 }, { "epoch": 0.5279022403258656, "grad_norm": 0.6183465447029041, "learning_rate": 4.7919105478026985e-06, "loss": 0.3366, "step": 11664 }, { "epoch": 0.5279474994342611, "grad_norm": 0.6034941995919838, "learning_rate": 4.791178264025017e-06, "loss": 0.305, "step": 11665 }, { "epoch": 0.5279927585426567, "grad_norm": 0.6695252246471096, "learning_rate": 4.790445984734276e-06, "loss": 0.3577, "step": 11666 }, { "epoch": 0.5280380176510523, "grad_norm": 0.6469582967562887, "learning_rate": 4.789713709946204e-06, "loss": 0.3265, "step": 11667 }, { "epoch": 0.5280832767594479, "grad_norm": 0.614241262918158, "learning_rate": 4.78898143967654e-06, "loss": 0.3115, "step": 11668 }, { "epoch": 0.5281285358678434, "grad_norm": 0.6096260352455624, "learning_rate": 4.788249173941018e-06, "loss": 0.2803, "step": 11669 }, { "epoch": 0.528173794976239, "grad_norm": 0.7147214143476651, "learning_rate": 4.787516912755369e-06, "loss": 0.3376, "step": 11670 }, { "epoch": 0.5282190540846345, "grad_norm": 0.5721691495643542, "learning_rate": 4.786784656135328e-06, "loss": 0.3161, "step": 11671 }, { "epoch": 0.5282643131930301, "grad_norm": 0.6560440703019803, "learning_rate": 4.7860524040966316e-06, "loss": 0.2999, "step": 11672 }, { "epoch": 0.5283095723014256, "grad_norm": 0.35524932920415997, "learning_rate": 4.785320156655013e-06, "loss": 0.4573, "step": 11673 }, { "epoch": 0.5283548314098212, "grad_norm": 0.5887683799085369, "learning_rate": 4.784587913826203e-06, "loss": 0.3017, "step": 11674 }, { "epoch": 0.5284000905182168, "grad_norm": 0.5629299722363656, "learning_rate": 4.7838556756259365e-06, "loss": 0.3053, "step": 11675 }, { "epoch": 0.5284453496266124, "grad_norm": 0.30189560182789005, "learning_rate": 4.78312344206995e-06, "loss": 0.4844, "step": 11676 }, { "epoch": 0.528490608735008, "grad_norm": 0.6264257087723134, "learning_rate": 4.782391213173973e-06, "loss": 0.3099, "step": 11677 }, { "epoch": 0.5285358678434035, "grad_norm": 0.6104495057889123, "learning_rate": 4.7816589889537415e-06, "loss": 0.2973, "step": 11678 }, { "epoch": 0.528581126951799, "grad_norm": 0.6157904204618331, "learning_rate": 4.780926769424988e-06, "loss": 0.3305, "step": 11679 }, { "epoch": 0.5286263860601946, "grad_norm": 0.3013608723061759, "learning_rate": 4.780194554603444e-06, "loss": 0.4591, "step": 11680 }, { "epoch": 0.5286716451685902, "grad_norm": 0.6707113511900932, "learning_rate": 4.779462344504845e-06, "loss": 0.3438, "step": 11681 }, { "epoch": 0.5287169042769857, "grad_norm": 0.6069204030793611, "learning_rate": 4.778730139144923e-06, "loss": 0.3302, "step": 11682 }, { "epoch": 0.5287621633853813, "grad_norm": 0.30036751368928316, "learning_rate": 4.777997938539411e-06, "loss": 0.4674, "step": 11683 }, { "epoch": 0.5288074224937769, "grad_norm": 0.6392490111304531, "learning_rate": 4.777265742704039e-06, "loss": 0.3414, "step": 11684 }, { "epoch": 0.5288526816021725, "grad_norm": 0.6088450841981451, "learning_rate": 4.776533551654543e-06, "loss": 0.293, "step": 11685 }, { "epoch": 0.528897940710568, "grad_norm": 0.6248569779027412, "learning_rate": 4.775801365406657e-06, "loss": 0.3402, "step": 11686 }, { "epoch": 0.5289431998189635, "grad_norm": 0.6331192626517268, "learning_rate": 4.77506918397611e-06, "loss": 0.3631, "step": 11687 }, { "epoch": 0.5289884589273591, "grad_norm": 0.5615598344171098, "learning_rate": 4.774337007378633e-06, "loss": 0.2863, "step": 11688 }, { "epoch": 0.5290337180357547, "grad_norm": 0.6345617408032932, "learning_rate": 4.773604835629965e-06, "loss": 0.379, "step": 11689 }, { "epoch": 0.5290789771441503, "grad_norm": 0.6236578233133567, "learning_rate": 4.77287266874583e-06, "loss": 0.3255, "step": 11690 }, { "epoch": 0.5291242362525458, "grad_norm": 0.6598727164174507, "learning_rate": 4.772140506741966e-06, "loss": 0.3424, "step": 11691 }, { "epoch": 0.5291694953609414, "grad_norm": 0.33072579076807246, "learning_rate": 4.771408349634103e-06, "loss": 0.4797, "step": 11692 }, { "epoch": 0.529214754469337, "grad_norm": 0.3321147784678341, "learning_rate": 4.770676197437971e-06, "loss": 0.4725, "step": 11693 }, { "epoch": 0.5292600135777326, "grad_norm": 0.6858860519788593, "learning_rate": 4.769944050169303e-06, "loss": 0.3207, "step": 11694 }, { "epoch": 0.529305272686128, "grad_norm": 0.6174437330371626, "learning_rate": 4.769211907843833e-06, "loss": 0.3649, "step": 11695 }, { "epoch": 0.5293505317945236, "grad_norm": 0.6631526550644542, "learning_rate": 4.768479770477287e-06, "loss": 0.32, "step": 11696 }, { "epoch": 0.5293957909029192, "grad_norm": 0.6050793631720137, "learning_rate": 4.767747638085402e-06, "loss": 0.3154, "step": 11697 }, { "epoch": 0.5294410500113148, "grad_norm": 0.6513012927299898, "learning_rate": 4.767015510683906e-06, "loss": 0.3788, "step": 11698 }, { "epoch": 0.5294863091197104, "grad_norm": 0.7069803491491897, "learning_rate": 4.766283388288532e-06, "loss": 0.3555, "step": 11699 }, { "epoch": 0.5295315682281059, "grad_norm": 0.6905703982268144, "learning_rate": 4.765551270915008e-06, "loss": 0.3533, "step": 11700 }, { "epoch": 0.5295768273365015, "grad_norm": 0.6164310287508942, "learning_rate": 4.764819158579069e-06, "loss": 0.2813, "step": 11701 }, { "epoch": 0.5296220864448971, "grad_norm": 0.6334263773508509, "learning_rate": 4.764087051296445e-06, "loss": 0.3171, "step": 11702 }, { "epoch": 0.5296673455532926, "grad_norm": 0.4756404789746504, "learning_rate": 4.763354949082864e-06, "loss": 0.4844, "step": 11703 }, { "epoch": 0.5297126046616881, "grad_norm": 0.8110841726189192, "learning_rate": 4.762622851954058e-06, "loss": 0.3588, "step": 11704 }, { "epoch": 0.5297578637700837, "grad_norm": 0.6715038490678876, "learning_rate": 4.761890759925759e-06, "loss": 0.3587, "step": 11705 }, { "epoch": 0.5298031228784793, "grad_norm": 0.29191115580705324, "learning_rate": 4.761158673013696e-06, "loss": 0.4792, "step": 11706 }, { "epoch": 0.5298483819868749, "grad_norm": 0.67648546466302, "learning_rate": 4.7604265912336e-06, "loss": 0.342, "step": 11707 }, { "epoch": 0.5298936410952704, "grad_norm": 0.3268432712087116, "learning_rate": 4.759694514601201e-06, "loss": 0.4784, "step": 11708 }, { "epoch": 0.529938900203666, "grad_norm": 0.6779791178983064, "learning_rate": 4.758962443132227e-06, "loss": 0.304, "step": 11709 }, { "epoch": 0.5299841593120616, "grad_norm": 0.6503922808669877, "learning_rate": 4.75823037684241e-06, "loss": 0.3333, "step": 11710 }, { "epoch": 0.5300294184204571, "grad_norm": 0.6275606411625194, "learning_rate": 4.757498315747482e-06, "loss": 0.3482, "step": 11711 }, { "epoch": 0.5300746775288527, "grad_norm": 0.6126662598486359, "learning_rate": 4.756766259863169e-06, "loss": 0.3182, "step": 11712 }, { "epoch": 0.5301199366372482, "grad_norm": 0.604527940770792, "learning_rate": 4.756034209205201e-06, "loss": 0.3135, "step": 11713 }, { "epoch": 0.5301651957456438, "grad_norm": 0.6128021844596666, "learning_rate": 4.75530216378931e-06, "loss": 0.2854, "step": 11714 }, { "epoch": 0.5302104548540394, "grad_norm": 0.6237930727497758, "learning_rate": 4.754570123631224e-06, "loss": 0.313, "step": 11715 }, { "epoch": 0.530255713962435, "grad_norm": 0.41318223344137056, "learning_rate": 4.753838088746672e-06, "loss": 0.468, "step": 11716 }, { "epoch": 0.5303009730708305, "grad_norm": 0.6638863894739259, "learning_rate": 4.753106059151382e-06, "loss": 0.3176, "step": 11717 }, { "epoch": 0.530346232179226, "grad_norm": 0.32642953381378825, "learning_rate": 4.752374034861088e-06, "loss": 0.4704, "step": 11718 }, { "epoch": 0.5303914912876216, "grad_norm": 0.621866477687391, "learning_rate": 4.7516420158915115e-06, "loss": 0.3375, "step": 11719 }, { "epoch": 0.5304367503960172, "grad_norm": 0.6712285445401825, "learning_rate": 4.750910002258387e-06, "loss": 0.3308, "step": 11720 }, { "epoch": 0.5304820095044127, "grad_norm": 0.7017424575427466, "learning_rate": 4.750177993977442e-06, "loss": 0.293, "step": 11721 }, { "epoch": 0.5305272686128083, "grad_norm": 0.7136507819466259, "learning_rate": 4.7494459910644044e-06, "loss": 0.3111, "step": 11722 }, { "epoch": 0.5305725277212039, "grad_norm": 0.6721779070751936, "learning_rate": 4.7487139935350015e-06, "loss": 0.3338, "step": 11723 }, { "epoch": 0.5306177868295995, "grad_norm": 0.6295776044002247, "learning_rate": 4.747982001404965e-06, "loss": 0.2902, "step": 11724 }, { "epoch": 0.5306630459379951, "grad_norm": 0.64717952938952, "learning_rate": 4.7472500146900206e-06, "loss": 0.3294, "step": 11725 }, { "epoch": 0.5307083050463905, "grad_norm": 0.6384251289927297, "learning_rate": 4.746518033405897e-06, "loss": 0.3328, "step": 11726 }, { "epoch": 0.5307535641547861, "grad_norm": 0.6299181902472535, "learning_rate": 4.745786057568324e-06, "loss": 0.3558, "step": 11727 }, { "epoch": 0.5307988232631817, "grad_norm": 0.6467058997655225, "learning_rate": 4.745054087193025e-06, "loss": 0.4017, "step": 11728 }, { "epoch": 0.5308440823715773, "grad_norm": 0.6051062425231508, "learning_rate": 4.744322122295732e-06, "loss": 0.3431, "step": 11729 }, { "epoch": 0.5308893414799728, "grad_norm": 0.4436395210006455, "learning_rate": 4.743590162892171e-06, "loss": 0.503, "step": 11730 }, { "epoch": 0.5309346005883684, "grad_norm": 0.7358701096074631, "learning_rate": 4.742858208998072e-06, "loss": 0.3252, "step": 11731 }, { "epoch": 0.530979859696764, "grad_norm": 0.5952948832084214, "learning_rate": 4.742126260629158e-06, "loss": 0.284, "step": 11732 }, { "epoch": 0.5310251188051596, "grad_norm": 0.6126645832337427, "learning_rate": 4.741394317801158e-06, "loss": 0.336, "step": 11733 }, { "epoch": 0.5310703779135552, "grad_norm": 0.5700785120288356, "learning_rate": 4.740662380529802e-06, "loss": 0.3016, "step": 11734 }, { "epoch": 0.5311156370219506, "grad_norm": 0.6028915201145363, "learning_rate": 4.739930448830814e-06, "loss": 0.2852, "step": 11735 }, { "epoch": 0.5311608961303462, "grad_norm": 0.29438188912144864, "learning_rate": 4.739198522719922e-06, "loss": 0.4607, "step": 11736 }, { "epoch": 0.5312061552387418, "grad_norm": 0.6169329863522592, "learning_rate": 4.738466602212854e-06, "loss": 0.3175, "step": 11737 }, { "epoch": 0.5312514143471374, "grad_norm": 0.6408398342033451, "learning_rate": 4.737734687325332e-06, "loss": 0.3394, "step": 11738 }, { "epoch": 0.5312966734555329, "grad_norm": 0.2797451455880777, "learning_rate": 4.737002778073089e-06, "loss": 0.4713, "step": 11739 }, { "epoch": 0.5313419325639285, "grad_norm": 0.8173420620683368, "learning_rate": 4.736270874471849e-06, "loss": 0.3008, "step": 11740 }, { "epoch": 0.5313871916723241, "grad_norm": 0.6721444700649927, "learning_rate": 4.735538976537336e-06, "loss": 0.3178, "step": 11741 }, { "epoch": 0.5314324507807197, "grad_norm": 0.2953942616732436, "learning_rate": 4.734807084285278e-06, "loss": 0.4832, "step": 11742 }, { "epoch": 0.5314777098891151, "grad_norm": 0.6173867128773916, "learning_rate": 4.734075197731403e-06, "loss": 0.2878, "step": 11743 }, { "epoch": 0.5315229689975107, "grad_norm": 0.6106232963497055, "learning_rate": 4.733343316891435e-06, "loss": 0.3174, "step": 11744 }, { "epoch": 0.5315682281059063, "grad_norm": 0.6509230072417117, "learning_rate": 4.7326114417811e-06, "loss": 0.3542, "step": 11745 }, { "epoch": 0.5316134872143019, "grad_norm": 0.627189292175373, "learning_rate": 4.7318795724161214e-06, "loss": 0.2748, "step": 11746 }, { "epoch": 0.5316587463226975, "grad_norm": 0.7136115075309686, "learning_rate": 4.731147708812232e-06, "loss": 0.3616, "step": 11747 }, { "epoch": 0.531704005431093, "grad_norm": 0.6629747069512445, "learning_rate": 4.730415850985149e-06, "loss": 0.3701, "step": 11748 }, { "epoch": 0.5317492645394886, "grad_norm": 0.6906180915021461, "learning_rate": 4.729683998950602e-06, "loss": 0.3288, "step": 11749 }, { "epoch": 0.5317945236478842, "grad_norm": 0.5979704596044886, "learning_rate": 4.728952152724317e-06, "loss": 0.3461, "step": 11750 }, { "epoch": 0.5318397827562797, "grad_norm": 0.6733068733347735, "learning_rate": 4.728220312322017e-06, "loss": 0.3514, "step": 11751 }, { "epoch": 0.5318850418646752, "grad_norm": 0.6288131092820356, "learning_rate": 4.7274884777594265e-06, "loss": 0.3166, "step": 11752 }, { "epoch": 0.5319303009730708, "grad_norm": 0.34938270335009647, "learning_rate": 4.726756649052274e-06, "loss": 0.4755, "step": 11753 }, { "epoch": 0.5319755600814664, "grad_norm": 0.8193599994439758, "learning_rate": 4.726024826216281e-06, "loss": 0.2964, "step": 11754 }, { "epoch": 0.532020819189862, "grad_norm": 0.6055021071486388, "learning_rate": 4.725293009267173e-06, "loss": 0.2954, "step": 11755 }, { "epoch": 0.5320660782982575, "grad_norm": 0.31638532661399055, "learning_rate": 4.724561198220672e-06, "loss": 0.4802, "step": 11756 }, { "epoch": 0.5321113374066531, "grad_norm": 0.6145143406936915, "learning_rate": 4.7238293930925085e-06, "loss": 0.3484, "step": 11757 }, { "epoch": 0.5321565965150487, "grad_norm": 1.0571107759072538, "learning_rate": 4.723097593898402e-06, "loss": 0.3007, "step": 11758 }, { "epoch": 0.5322018556234442, "grad_norm": 0.6978453898443266, "learning_rate": 4.7223658006540775e-06, "loss": 0.3161, "step": 11759 }, { "epoch": 0.5322471147318398, "grad_norm": 0.7433654020085223, "learning_rate": 4.7216340133752604e-06, "loss": 0.3334, "step": 11760 }, { "epoch": 0.5322923738402353, "grad_norm": 0.30745122387587526, "learning_rate": 4.720902232077671e-06, "loss": 0.4618, "step": 11761 }, { "epoch": 0.5323376329486309, "grad_norm": 1.7468362884445348, "learning_rate": 4.720170456777036e-06, "loss": 0.2957, "step": 11762 }, { "epoch": 0.5323828920570265, "grad_norm": 0.5927317587947376, "learning_rate": 4.719438687489081e-06, "loss": 0.3068, "step": 11763 }, { "epoch": 0.5324281511654221, "grad_norm": 0.773194991169054, "learning_rate": 4.718706924229525e-06, "loss": 0.3427, "step": 11764 }, { "epoch": 0.5324734102738176, "grad_norm": 0.6088699127754879, "learning_rate": 4.7179751670140936e-06, "loss": 0.303, "step": 11765 }, { "epoch": 0.5325186693822132, "grad_norm": 0.6149492255144813, "learning_rate": 4.717243415858511e-06, "loss": 0.3374, "step": 11766 }, { "epoch": 0.5325639284906087, "grad_norm": 0.6561851623753537, "learning_rate": 4.716511670778496e-06, "loss": 0.3539, "step": 11767 }, { "epoch": 0.5326091875990043, "grad_norm": 0.6043910413103322, "learning_rate": 4.715779931789776e-06, "loss": 0.3041, "step": 11768 }, { "epoch": 0.5326544467073998, "grad_norm": 0.6451846146810193, "learning_rate": 4.715048198908074e-06, "loss": 0.2939, "step": 11769 }, { "epoch": 0.5326997058157954, "grad_norm": 0.3657427690358148, "learning_rate": 4.7143164721491095e-06, "loss": 0.509, "step": 11770 }, { "epoch": 0.532744964924191, "grad_norm": 0.3300517167423059, "learning_rate": 4.713584751528605e-06, "loss": 0.4882, "step": 11771 }, { "epoch": 0.5327902240325866, "grad_norm": 0.8410660502421503, "learning_rate": 4.712853037062286e-06, "loss": 0.3101, "step": 11772 }, { "epoch": 0.5328354831409822, "grad_norm": 0.3151118849340022, "learning_rate": 4.712121328765875e-06, "loss": 0.4687, "step": 11773 }, { "epoch": 0.5328807422493776, "grad_norm": 0.6545005536550695, "learning_rate": 4.71138962665509e-06, "loss": 0.3459, "step": 11774 }, { "epoch": 0.5329260013577732, "grad_norm": 0.30887497244294765, "learning_rate": 4.710657930745656e-06, "loss": 0.481, "step": 11775 }, { "epoch": 0.5329712604661688, "grad_norm": 0.6612389703396073, "learning_rate": 4.709926241053296e-06, "loss": 0.3226, "step": 11776 }, { "epoch": 0.5330165195745644, "grad_norm": 0.6661687889059671, "learning_rate": 4.709194557593729e-06, "loss": 0.3174, "step": 11777 }, { "epoch": 0.5330617786829599, "grad_norm": 0.6595961119315379, "learning_rate": 4.708462880382677e-06, "loss": 0.3221, "step": 11778 }, { "epoch": 0.5331070377913555, "grad_norm": 0.7566412686848811, "learning_rate": 4.707731209435864e-06, "loss": 0.2952, "step": 11779 }, { "epoch": 0.5331522968997511, "grad_norm": 0.6319506838900698, "learning_rate": 4.706999544769009e-06, "loss": 0.3253, "step": 11780 }, { "epoch": 0.5331975560081467, "grad_norm": 0.5638834312022738, "learning_rate": 4.706267886397833e-06, "loss": 0.3122, "step": 11781 }, { "epoch": 0.5332428151165423, "grad_norm": 0.570363300428756, "learning_rate": 4.705536234338059e-06, "loss": 0.3171, "step": 11782 }, { "epoch": 0.5332880742249377, "grad_norm": 0.6580123748014653, "learning_rate": 4.704804588605407e-06, "loss": 0.32, "step": 11783 }, { "epoch": 0.5333333333333333, "grad_norm": 1.0069993479241546, "learning_rate": 4.704072949215598e-06, "loss": 0.3385, "step": 11784 }, { "epoch": 0.5333785924417289, "grad_norm": 0.43109927661137915, "learning_rate": 4.703341316184351e-06, "loss": 0.4799, "step": 11785 }, { "epoch": 0.5334238515501245, "grad_norm": 0.6811949904047957, "learning_rate": 4.702609689527389e-06, "loss": 0.3446, "step": 11786 }, { "epoch": 0.53346911065852, "grad_norm": 0.6131719461888363, "learning_rate": 4.701878069260432e-06, "loss": 0.386, "step": 11787 }, { "epoch": 0.5335143697669156, "grad_norm": 0.6740418379070753, "learning_rate": 4.701146455399198e-06, "loss": 0.3326, "step": 11788 }, { "epoch": 0.5335596288753112, "grad_norm": 0.6461944062048777, "learning_rate": 4.7004148479594114e-06, "loss": 0.3025, "step": 11789 }, { "epoch": 0.5336048879837068, "grad_norm": 0.6409431886352649, "learning_rate": 4.699683246956787e-06, "loss": 0.3169, "step": 11790 }, { "epoch": 0.5336501470921022, "grad_norm": 0.7155276422903248, "learning_rate": 4.698951652407048e-06, "loss": 0.3526, "step": 11791 }, { "epoch": 0.5336954062004978, "grad_norm": 0.6465783751741991, "learning_rate": 4.698220064325915e-06, "loss": 0.3639, "step": 11792 }, { "epoch": 0.5337406653088934, "grad_norm": 0.6097472048185627, "learning_rate": 4.697488482729105e-06, "loss": 0.3421, "step": 11793 }, { "epoch": 0.533785924417289, "grad_norm": 0.6409515693192006, "learning_rate": 4.696756907632336e-06, "loss": 0.3602, "step": 11794 }, { "epoch": 0.5338311835256846, "grad_norm": 0.599249241356723, "learning_rate": 4.6960253390513346e-06, "loss": 0.3309, "step": 11795 }, { "epoch": 0.5338764426340801, "grad_norm": 0.6196382148914346, "learning_rate": 4.6952937770018105e-06, "loss": 0.3502, "step": 11796 }, { "epoch": 0.5339217017424757, "grad_norm": 0.6316758562418318, "learning_rate": 4.694562221499489e-06, "loss": 0.2808, "step": 11797 }, { "epoch": 0.5339669608508713, "grad_norm": 0.6741241563752027, "learning_rate": 4.693830672560089e-06, "loss": 0.3623, "step": 11798 }, { "epoch": 0.5340122199592668, "grad_norm": 0.587340740877701, "learning_rate": 4.6930991301993255e-06, "loss": 0.3471, "step": 11799 }, { "epoch": 0.5340574790676623, "grad_norm": 0.32964651554241226, "learning_rate": 4.692367594432919e-06, "loss": 0.4839, "step": 11800 }, { "epoch": 0.5341027381760579, "grad_norm": 0.6854707444673713, "learning_rate": 4.6916360652765876e-06, "loss": 0.3331, "step": 11801 }, { "epoch": 0.5341479972844535, "grad_norm": 0.7373786452883535, "learning_rate": 4.690904542746052e-06, "loss": 0.3483, "step": 11802 }, { "epoch": 0.5341932563928491, "grad_norm": 0.6466194356635377, "learning_rate": 4.690173026857028e-06, "loss": 0.3411, "step": 11803 }, { "epoch": 0.5342385155012446, "grad_norm": 0.6102879667747352, "learning_rate": 4.689441517625232e-06, "loss": 0.3298, "step": 11804 }, { "epoch": 0.5342837746096402, "grad_norm": 0.30553155491046163, "learning_rate": 4.688710015066388e-06, "loss": 0.4822, "step": 11805 }, { "epoch": 0.5343290337180358, "grad_norm": 0.650611281210423, "learning_rate": 4.687978519196205e-06, "loss": 0.2957, "step": 11806 }, { "epoch": 0.5343742928264313, "grad_norm": 0.5941169919812131, "learning_rate": 4.687247030030409e-06, "loss": 0.3534, "step": 11807 }, { "epoch": 0.5344195519348269, "grad_norm": 0.6493684457101689, "learning_rate": 4.686515547584713e-06, "loss": 0.29, "step": 11808 }, { "epoch": 0.5344648110432224, "grad_norm": 0.6147351712741508, "learning_rate": 4.685784071874835e-06, "loss": 0.3088, "step": 11809 }, { "epoch": 0.534510070151618, "grad_norm": 0.6363524473472438, "learning_rate": 4.68505260291649e-06, "loss": 0.3271, "step": 11810 }, { "epoch": 0.5345553292600136, "grad_norm": 0.618546025225323, "learning_rate": 4.6843211407254e-06, "loss": 0.3188, "step": 11811 }, { "epoch": 0.5346005883684092, "grad_norm": 0.6070095148382589, "learning_rate": 4.683589685317278e-06, "loss": 0.3589, "step": 11812 }, { "epoch": 0.5346458474768047, "grad_norm": 0.619057250383705, "learning_rate": 4.682858236707842e-06, "loss": 0.3006, "step": 11813 }, { "epoch": 0.5346911065852002, "grad_norm": 0.6694414833050002, "learning_rate": 4.682126794912808e-06, "loss": 0.2924, "step": 11814 }, { "epoch": 0.5347363656935958, "grad_norm": 0.6538425008294642, "learning_rate": 4.681395359947894e-06, "loss": 0.3283, "step": 11815 }, { "epoch": 0.5347816248019914, "grad_norm": 0.618884000810644, "learning_rate": 4.680663931828815e-06, "loss": 0.3126, "step": 11816 }, { "epoch": 0.534826883910387, "grad_norm": 0.6622986593486823, "learning_rate": 4.679932510571286e-06, "loss": 0.3021, "step": 11817 }, { "epoch": 0.5348721430187825, "grad_norm": 0.6046152483107878, "learning_rate": 4.679201096191027e-06, "loss": 0.3415, "step": 11818 }, { "epoch": 0.5349174021271781, "grad_norm": 0.6457583281268079, "learning_rate": 4.6784696887037475e-06, "loss": 0.2878, "step": 11819 }, { "epoch": 0.5349626612355737, "grad_norm": 0.7731153771800899, "learning_rate": 4.6777382881251695e-06, "loss": 0.3254, "step": 11820 }, { "epoch": 0.5350079203439693, "grad_norm": 0.5818533268500036, "learning_rate": 4.677006894471006e-06, "loss": 0.3089, "step": 11821 }, { "epoch": 0.5350531794523647, "grad_norm": 0.6437935399271492, "learning_rate": 4.676275507756972e-06, "loss": 0.3404, "step": 11822 }, { "epoch": 0.5350984385607603, "grad_norm": 0.6062609675532306, "learning_rate": 4.6755441279987815e-06, "loss": 0.3071, "step": 11823 }, { "epoch": 0.5351436976691559, "grad_norm": 0.6875800441401139, "learning_rate": 4.674812755212154e-06, "loss": 0.3151, "step": 11824 }, { "epoch": 0.5351889567775515, "grad_norm": 0.6476206078752045, "learning_rate": 4.674081389412799e-06, "loss": 0.3227, "step": 11825 }, { "epoch": 0.535234215885947, "grad_norm": 0.7150470473074529, "learning_rate": 4.673350030616435e-06, "loss": 0.3612, "step": 11826 }, { "epoch": 0.5352794749943426, "grad_norm": 0.8702744022584356, "learning_rate": 4.6726186788387745e-06, "loss": 0.3064, "step": 11827 }, { "epoch": 0.5353247341027382, "grad_norm": 0.6838788147840302, "learning_rate": 4.671887334095537e-06, "loss": 0.3063, "step": 11828 }, { "epoch": 0.5353699932111338, "grad_norm": 0.3631132216259713, "learning_rate": 4.671155996402429e-06, "loss": 0.4708, "step": 11829 }, { "epoch": 0.5354152523195294, "grad_norm": 0.7416558030623343, "learning_rate": 4.670424665775169e-06, "loss": 0.2972, "step": 11830 }, { "epoch": 0.5354605114279248, "grad_norm": 0.3263187846566589, "learning_rate": 4.669693342229473e-06, "loss": 0.4574, "step": 11831 }, { "epoch": 0.5355057705363204, "grad_norm": 0.6714728109702466, "learning_rate": 4.668962025781051e-06, "loss": 0.3016, "step": 11832 }, { "epoch": 0.535551029644716, "grad_norm": 0.6091662070395258, "learning_rate": 4.668230716445618e-06, "loss": 0.308, "step": 11833 }, { "epoch": 0.5355962887531116, "grad_norm": 0.6675165747541576, "learning_rate": 4.66749941423889e-06, "loss": 0.365, "step": 11834 }, { "epoch": 0.5356415478615071, "grad_norm": 0.3165058569241368, "learning_rate": 4.666768119176576e-06, "loss": 0.4817, "step": 11835 }, { "epoch": 0.5356868069699027, "grad_norm": 0.6903447640884273, "learning_rate": 4.666036831274392e-06, "loss": 0.3129, "step": 11836 }, { "epoch": 0.5357320660782983, "grad_norm": 0.37165014809763863, "learning_rate": 4.665305550548053e-06, "loss": 0.4939, "step": 11837 }, { "epoch": 0.5357773251866939, "grad_norm": 0.6023690777695794, "learning_rate": 4.664574277013267e-06, "loss": 0.2911, "step": 11838 }, { "epoch": 0.5358225842950893, "grad_norm": 0.6341556166653007, "learning_rate": 4.663843010685751e-06, "loss": 0.3533, "step": 11839 }, { "epoch": 0.5358678434034849, "grad_norm": 0.3014725055190625, "learning_rate": 4.663111751581217e-06, "loss": 0.4711, "step": 11840 }, { "epoch": 0.5359131025118805, "grad_norm": 0.2846751634981725, "learning_rate": 4.662380499715376e-06, "loss": 0.4784, "step": 11841 }, { "epoch": 0.5359583616202761, "grad_norm": 0.6283147441083387, "learning_rate": 4.661649255103941e-06, "loss": 0.3562, "step": 11842 }, { "epoch": 0.5360036207286717, "grad_norm": 0.7101453282416704, "learning_rate": 4.660918017762624e-06, "loss": 0.4033, "step": 11843 }, { "epoch": 0.5360488798370672, "grad_norm": 0.6798369669668829, "learning_rate": 4.660186787707137e-06, "loss": 0.3172, "step": 11844 }, { "epoch": 0.5360941389454628, "grad_norm": 0.6234796803913034, "learning_rate": 4.6594555649531935e-06, "loss": 0.3335, "step": 11845 }, { "epoch": 0.5361393980538584, "grad_norm": 0.6276398422477145, "learning_rate": 4.658724349516504e-06, "loss": 0.3402, "step": 11846 }, { "epoch": 0.5361846571622539, "grad_norm": 0.6032229180107805, "learning_rate": 4.657993141412781e-06, "loss": 0.2826, "step": 11847 }, { "epoch": 0.5362299162706494, "grad_norm": 0.6492236595026835, "learning_rate": 4.657261940657732e-06, "loss": 0.3014, "step": 11848 }, { "epoch": 0.536275175379045, "grad_norm": 0.6576568803938141, "learning_rate": 4.656530747267073e-06, "loss": 0.3241, "step": 11849 }, { "epoch": 0.5363204344874406, "grad_norm": 0.6720134070854751, "learning_rate": 4.6557995612565146e-06, "loss": 0.2732, "step": 11850 }, { "epoch": 0.5363656935958362, "grad_norm": 0.6301081903669309, "learning_rate": 4.655068382641764e-06, "loss": 0.324, "step": 11851 }, { "epoch": 0.5364109527042318, "grad_norm": 0.649505180454233, "learning_rate": 4.654337211438535e-06, "loss": 0.3196, "step": 11852 }, { "epoch": 0.5364562118126273, "grad_norm": 0.9158481667574583, "learning_rate": 4.653606047662541e-06, "loss": 0.3554, "step": 11853 }, { "epoch": 0.5365014709210228, "grad_norm": 0.5772610368338478, "learning_rate": 4.652874891329484e-06, "loss": 0.3233, "step": 11854 }, { "epoch": 0.5365467300294184, "grad_norm": 0.6187458467750405, "learning_rate": 4.652143742455082e-06, "loss": 0.3304, "step": 11855 }, { "epoch": 0.536591989137814, "grad_norm": 0.6887980125160978, "learning_rate": 4.651412601055042e-06, "loss": 0.3128, "step": 11856 }, { "epoch": 0.5366372482462095, "grad_norm": 0.5715217572713842, "learning_rate": 4.650681467145077e-06, "loss": 0.2979, "step": 11857 }, { "epoch": 0.5366825073546051, "grad_norm": 0.47967059834371656, "learning_rate": 4.649950340740892e-06, "loss": 0.5086, "step": 11858 }, { "epoch": 0.5367277664630007, "grad_norm": 0.6855366105109115, "learning_rate": 4.649219221858199e-06, "loss": 0.3593, "step": 11859 }, { "epoch": 0.5367730255713963, "grad_norm": 0.5939512965029445, "learning_rate": 4.64848811051271e-06, "loss": 0.2789, "step": 11860 }, { "epoch": 0.5368182846797918, "grad_norm": 0.7928225031359858, "learning_rate": 4.6477570067201295e-06, "loss": 0.33, "step": 11861 }, { "epoch": 0.5368635437881873, "grad_norm": 0.6282339858757565, "learning_rate": 4.647025910496169e-06, "loss": 0.3168, "step": 11862 }, { "epoch": 0.5369088028965829, "grad_norm": 0.6349383292936635, "learning_rate": 4.646294821856539e-06, "loss": 0.3152, "step": 11863 }, { "epoch": 0.5369540620049785, "grad_norm": 0.636330307450891, "learning_rate": 4.6455637408169466e-06, "loss": 0.3244, "step": 11864 }, { "epoch": 0.5369993211133741, "grad_norm": 0.3136639017064418, "learning_rate": 4.6448326673931e-06, "loss": 0.5009, "step": 11865 }, { "epoch": 0.5370445802217696, "grad_norm": 0.26578378504096073, "learning_rate": 4.644101601600711e-06, "loss": 0.4579, "step": 11866 }, { "epoch": 0.5370898393301652, "grad_norm": 0.6772093834795655, "learning_rate": 4.6433705434554825e-06, "loss": 0.3301, "step": 11867 }, { "epoch": 0.5371350984385608, "grad_norm": 0.6869899521420297, "learning_rate": 4.6426394929731264e-06, "loss": 0.3938, "step": 11868 }, { "epoch": 0.5371803575469564, "grad_norm": 0.6032550183160156, "learning_rate": 4.641908450169351e-06, "loss": 0.287, "step": 11869 }, { "epoch": 0.5372256166553518, "grad_norm": 0.2729589981531489, "learning_rate": 4.641177415059863e-06, "loss": 0.4619, "step": 11870 }, { "epoch": 0.5372708757637474, "grad_norm": 0.2930555677217789, "learning_rate": 4.640446387660369e-06, "loss": 0.4725, "step": 11871 }, { "epoch": 0.537316134872143, "grad_norm": 0.6367420254060255, "learning_rate": 4.639715367986578e-06, "loss": 0.3643, "step": 11872 }, { "epoch": 0.5373613939805386, "grad_norm": 0.2707564173906194, "learning_rate": 4.6389843560541995e-06, "loss": 0.4915, "step": 11873 }, { "epoch": 0.5374066530889341, "grad_norm": 0.7680386608088853, "learning_rate": 4.638253351878937e-06, "loss": 0.3589, "step": 11874 }, { "epoch": 0.5374519121973297, "grad_norm": 0.6348233087753784, "learning_rate": 4.637522355476499e-06, "loss": 0.3109, "step": 11875 }, { "epoch": 0.5374971713057253, "grad_norm": 0.7222645882308814, "learning_rate": 4.636791366862593e-06, "loss": 0.2914, "step": 11876 }, { "epoch": 0.5375424304141209, "grad_norm": 0.5781883641735861, "learning_rate": 4.636060386052924e-06, "loss": 0.3326, "step": 11877 }, { "epoch": 0.5375876895225165, "grad_norm": 0.593240330734267, "learning_rate": 4.635329413063199e-06, "loss": 0.2968, "step": 11878 }, { "epoch": 0.5376329486309119, "grad_norm": 0.846105349465719, "learning_rate": 4.634598447909127e-06, "loss": 0.2873, "step": 11879 }, { "epoch": 0.5376782077393075, "grad_norm": 0.9134058698420537, "learning_rate": 4.633867490606411e-06, "loss": 0.3117, "step": 11880 }, { "epoch": 0.5377234668477031, "grad_norm": 0.6051682716152033, "learning_rate": 4.633136541170757e-06, "loss": 0.3077, "step": 11881 }, { "epoch": 0.5377687259560987, "grad_norm": 0.6438228832444766, "learning_rate": 4.632405599617875e-06, "loss": 0.3233, "step": 11882 }, { "epoch": 0.5378139850644942, "grad_norm": 0.6111067636628326, "learning_rate": 4.631674665963464e-06, "loss": 0.3048, "step": 11883 }, { "epoch": 0.5378592441728898, "grad_norm": 0.5708830172022095, "learning_rate": 4.630943740223235e-06, "loss": 0.3436, "step": 11884 }, { "epoch": 0.5379045032812854, "grad_norm": 0.385893873408589, "learning_rate": 4.630212822412891e-06, "loss": 0.4633, "step": 11885 }, { "epoch": 0.537949762389681, "grad_norm": 0.6496981603736385, "learning_rate": 4.62948191254814e-06, "loss": 0.3244, "step": 11886 }, { "epoch": 0.5379950214980765, "grad_norm": 0.5687704852137276, "learning_rate": 4.6287510106446814e-06, "loss": 0.3126, "step": 11887 }, { "epoch": 0.538040280606472, "grad_norm": 0.6763024486567406, "learning_rate": 4.628020116718225e-06, "loss": 0.3365, "step": 11888 }, { "epoch": 0.5380855397148676, "grad_norm": 0.3024211002191299, "learning_rate": 4.627289230784474e-06, "loss": 0.4631, "step": 11889 }, { "epoch": 0.5381307988232632, "grad_norm": 0.6528307867202187, "learning_rate": 4.626558352859133e-06, "loss": 0.2705, "step": 11890 }, { "epoch": 0.5381760579316588, "grad_norm": 0.6384263104660005, "learning_rate": 4.625827482957904e-06, "loss": 0.3328, "step": 11891 }, { "epoch": 0.5382213170400543, "grad_norm": 0.6278570776518351, "learning_rate": 4.625096621096497e-06, "loss": 0.3629, "step": 11892 }, { "epoch": 0.5382665761484499, "grad_norm": 0.6506955725953594, "learning_rate": 4.624365767290609e-06, "loss": 0.3031, "step": 11893 }, { "epoch": 0.5383118352568455, "grad_norm": 0.6058536493638982, "learning_rate": 4.6236349215559476e-06, "loss": 0.3065, "step": 11894 }, { "epoch": 0.538357094365241, "grad_norm": 0.6477269762566259, "learning_rate": 4.6229040839082174e-06, "loss": 0.3567, "step": 11895 }, { "epoch": 0.5384023534736365, "grad_norm": 0.7315641361074862, "learning_rate": 4.622173254363117e-06, "loss": 0.358, "step": 11896 }, { "epoch": 0.5384476125820321, "grad_norm": 0.6390613193796425, "learning_rate": 4.621442432936355e-06, "loss": 0.3315, "step": 11897 }, { "epoch": 0.5384928716904277, "grad_norm": 0.6932636661113306, "learning_rate": 4.620711619643633e-06, "loss": 0.3121, "step": 11898 }, { "epoch": 0.5385381307988233, "grad_norm": 0.6466938654027299, "learning_rate": 4.619980814500654e-06, "loss": 0.3536, "step": 11899 }, { "epoch": 0.5385833899072189, "grad_norm": 0.3754239818716435, "learning_rate": 4.619250017523118e-06, "loss": 0.5017, "step": 11900 }, { "epoch": 0.5386286490156144, "grad_norm": 0.6201964027603627, "learning_rate": 4.61851922872673e-06, "loss": 0.3208, "step": 11901 }, { "epoch": 0.53867390812401, "grad_norm": 0.6683817172821697, "learning_rate": 4.617788448127194e-06, "loss": 0.3018, "step": 11902 }, { "epoch": 0.5387191672324055, "grad_norm": 0.6207113423464251, "learning_rate": 4.6170576757402095e-06, "loss": 0.3449, "step": 11903 }, { "epoch": 0.5387644263408011, "grad_norm": 0.6061645330534171, "learning_rate": 4.616326911581478e-06, "loss": 0.3246, "step": 11904 }, { "epoch": 0.5388096854491966, "grad_norm": 0.6102535429940648, "learning_rate": 4.6155961556667064e-06, "loss": 0.2933, "step": 11905 }, { "epoch": 0.5388549445575922, "grad_norm": 0.6458502246912481, "learning_rate": 4.614865408011589e-06, "loss": 0.3462, "step": 11906 }, { "epoch": 0.5389002036659878, "grad_norm": 0.7296757629234377, "learning_rate": 4.614134668631832e-06, "loss": 0.3193, "step": 11907 }, { "epoch": 0.5389454627743834, "grad_norm": 0.6260925635905281, "learning_rate": 4.613403937543138e-06, "loss": 0.3622, "step": 11908 }, { "epoch": 0.5389907218827789, "grad_norm": 0.3092383604250637, "learning_rate": 4.612673214761204e-06, "loss": 0.4524, "step": 11909 }, { "epoch": 0.5390359809911744, "grad_norm": 0.641101217881589, "learning_rate": 4.611942500301733e-06, "loss": 0.3545, "step": 11910 }, { "epoch": 0.53908124009957, "grad_norm": 0.729212585377849, "learning_rate": 4.611211794180427e-06, "loss": 0.3894, "step": 11911 }, { "epoch": 0.5391264992079656, "grad_norm": 0.6985165644437046, "learning_rate": 4.610481096412985e-06, "loss": 0.3575, "step": 11912 }, { "epoch": 0.5391717583163612, "grad_norm": 0.5623032596839105, "learning_rate": 4.609750407015107e-06, "loss": 0.3114, "step": 11913 }, { "epoch": 0.5392170174247567, "grad_norm": 0.6227257576443729, "learning_rate": 4.609019726002494e-06, "loss": 0.3008, "step": 11914 }, { "epoch": 0.5392622765331523, "grad_norm": 0.6173945881059006, "learning_rate": 4.608289053390849e-06, "loss": 0.3494, "step": 11915 }, { "epoch": 0.5393075356415479, "grad_norm": 0.3187630793576256, "learning_rate": 4.6075583891958665e-06, "loss": 0.5019, "step": 11916 }, { "epoch": 0.5393527947499435, "grad_norm": 0.6328780046178049, "learning_rate": 4.606827733433249e-06, "loss": 0.3434, "step": 11917 }, { "epoch": 0.5393980538583389, "grad_norm": 0.2991991417478379, "learning_rate": 4.606097086118699e-06, "loss": 0.46, "step": 11918 }, { "epoch": 0.5394433129667345, "grad_norm": 0.5951216847187082, "learning_rate": 4.60536644726791e-06, "loss": 0.3301, "step": 11919 }, { "epoch": 0.5394885720751301, "grad_norm": 0.6331957282164389, "learning_rate": 4.604635816896583e-06, "loss": 0.2981, "step": 11920 }, { "epoch": 0.5395338311835257, "grad_norm": 0.28609010619166414, "learning_rate": 4.6039051950204215e-06, "loss": 0.4919, "step": 11921 }, { "epoch": 0.5395790902919213, "grad_norm": 0.5983856542995213, "learning_rate": 4.603174581655118e-06, "loss": 0.3241, "step": 11922 }, { "epoch": 0.5396243494003168, "grad_norm": 0.65502210479594, "learning_rate": 4.602443976816375e-06, "loss": 0.3307, "step": 11923 }, { "epoch": 0.5396696085087124, "grad_norm": 0.6122185417503756, "learning_rate": 4.601713380519891e-06, "loss": 0.3455, "step": 11924 }, { "epoch": 0.539714867617108, "grad_norm": 0.6313224879589558, "learning_rate": 4.600982792781361e-06, "loss": 0.3278, "step": 11925 }, { "epoch": 0.5397601267255036, "grad_norm": 0.608001776525165, "learning_rate": 4.600252213616486e-06, "loss": 0.3315, "step": 11926 }, { "epoch": 0.539805385833899, "grad_norm": 0.6289475431513395, "learning_rate": 4.599521643040964e-06, "loss": 0.3284, "step": 11927 }, { "epoch": 0.5398506449422946, "grad_norm": 0.6749502573548707, "learning_rate": 4.598791081070493e-06, "loss": 0.3224, "step": 11928 }, { "epoch": 0.5398959040506902, "grad_norm": 0.5380611898001285, "learning_rate": 4.598060527720766e-06, "loss": 0.2764, "step": 11929 }, { "epoch": 0.5399411631590858, "grad_norm": 0.6538404966143314, "learning_rate": 4.597329983007486e-06, "loss": 0.329, "step": 11930 }, { "epoch": 0.5399864222674813, "grad_norm": 0.6662952534304654, "learning_rate": 4.5965994469463485e-06, "loss": 0.3138, "step": 11931 }, { "epoch": 0.5400316813758769, "grad_norm": 0.59477988751589, "learning_rate": 4.595868919553049e-06, "loss": 0.3088, "step": 11932 }, { "epoch": 0.5400769404842725, "grad_norm": 0.5960129169306702, "learning_rate": 4.595138400843285e-06, "loss": 0.3148, "step": 11933 }, { "epoch": 0.540122199592668, "grad_norm": 0.8784161525700802, "learning_rate": 4.594407890832755e-06, "loss": 0.3107, "step": 11934 }, { "epoch": 0.5401674587010636, "grad_norm": 0.7629956582515477, "learning_rate": 4.5936773895371525e-06, "loss": 0.3674, "step": 11935 }, { "epoch": 0.5402127178094591, "grad_norm": 0.38093627317634576, "learning_rate": 4.592946896972174e-06, "loss": 0.4688, "step": 11936 }, { "epoch": 0.5402579769178547, "grad_norm": 0.5874686472601677, "learning_rate": 4.592216413153519e-06, "loss": 0.2927, "step": 11937 }, { "epoch": 0.5403032360262503, "grad_norm": 0.6845177792070586, "learning_rate": 4.591485938096879e-06, "loss": 0.2726, "step": 11938 }, { "epoch": 0.5403484951346459, "grad_norm": 0.6942333260757441, "learning_rate": 4.590755471817951e-06, "loss": 0.2617, "step": 11939 }, { "epoch": 0.5403937542430414, "grad_norm": 0.5685941422602182, "learning_rate": 4.590025014332431e-06, "loss": 0.3057, "step": 11940 }, { "epoch": 0.540439013351437, "grad_norm": 0.6192698308865857, "learning_rate": 4.589294565656017e-06, "loss": 0.3292, "step": 11941 }, { "epoch": 0.5404842724598325, "grad_norm": 0.7505932995096428, "learning_rate": 4.5885641258044e-06, "loss": 0.3679, "step": 11942 }, { "epoch": 0.5405295315682281, "grad_norm": 0.5872677963439269, "learning_rate": 4.587833694793274e-06, "loss": 0.3437, "step": 11943 }, { "epoch": 0.5405747906766236, "grad_norm": 0.29940284734855005, "learning_rate": 4.587103272638339e-06, "loss": 0.4798, "step": 11944 }, { "epoch": 0.5406200497850192, "grad_norm": 0.7274460135206671, "learning_rate": 4.586372859355285e-06, "loss": 0.3247, "step": 11945 }, { "epoch": 0.5406653088934148, "grad_norm": 0.6549271468756779, "learning_rate": 4.585642454959809e-06, "loss": 0.3083, "step": 11946 }, { "epoch": 0.5407105680018104, "grad_norm": 0.6872198529055045, "learning_rate": 4.584912059467604e-06, "loss": 0.2975, "step": 11947 }, { "epoch": 0.540755827110206, "grad_norm": 0.7216276634194774, "learning_rate": 4.584181672894362e-06, "loss": 0.3594, "step": 11948 }, { "epoch": 0.5408010862186015, "grad_norm": 0.6088900714811756, "learning_rate": 4.5834512952557805e-06, "loss": 0.3129, "step": 11949 }, { "epoch": 0.540846345326997, "grad_norm": 0.5911821508409031, "learning_rate": 4.582720926567552e-06, "loss": 0.3398, "step": 11950 }, { "epoch": 0.5408916044353926, "grad_norm": 0.6734692864936836, "learning_rate": 4.581990566845368e-06, "loss": 0.3065, "step": 11951 }, { "epoch": 0.5409368635437882, "grad_norm": 0.6074726667239979, "learning_rate": 4.581260216104923e-06, "loss": 0.3369, "step": 11952 }, { "epoch": 0.5409821226521837, "grad_norm": 0.779060196209588, "learning_rate": 4.580529874361911e-06, "loss": 0.3316, "step": 11953 }, { "epoch": 0.5410273817605793, "grad_norm": 0.6229799956881542, "learning_rate": 4.579799541632022e-06, "loss": 0.3096, "step": 11954 }, { "epoch": 0.5410726408689749, "grad_norm": 0.30134750706287144, "learning_rate": 4.5790692179309506e-06, "loss": 0.4898, "step": 11955 }, { "epoch": 0.5411178999773705, "grad_norm": 0.6001279768603515, "learning_rate": 4.578338903274389e-06, "loss": 0.2994, "step": 11956 }, { "epoch": 0.5411631590857661, "grad_norm": 0.6335982180014519, "learning_rate": 4.577608597678031e-06, "loss": 0.3237, "step": 11957 }, { "epoch": 0.5412084181941615, "grad_norm": 0.6388397554978568, "learning_rate": 4.576878301157564e-06, "loss": 0.3291, "step": 11958 }, { "epoch": 0.5412536773025571, "grad_norm": 0.6780612805093535, "learning_rate": 4.576148013728685e-06, "loss": 0.2893, "step": 11959 }, { "epoch": 0.5412989364109527, "grad_norm": 0.6035805490342991, "learning_rate": 4.575417735407084e-06, "loss": 0.288, "step": 11960 }, { "epoch": 0.5413441955193483, "grad_norm": 0.5938415113037409, "learning_rate": 4.57468746620845e-06, "loss": 0.3083, "step": 11961 }, { "epoch": 0.5413894546277438, "grad_norm": 0.6037836247611696, "learning_rate": 4.573957206148476e-06, "loss": 0.2967, "step": 11962 }, { "epoch": 0.5414347137361394, "grad_norm": 0.5917513817545543, "learning_rate": 4.573226955242856e-06, "loss": 0.3599, "step": 11963 }, { "epoch": 0.541479972844535, "grad_norm": 0.6220484958209552, "learning_rate": 4.5724967135072746e-06, "loss": 0.3089, "step": 11964 }, { "epoch": 0.5415252319529306, "grad_norm": 0.6425750847613974, "learning_rate": 4.571766480957427e-06, "loss": 0.3187, "step": 11965 }, { "epoch": 0.541570491061326, "grad_norm": 0.6090489388183843, "learning_rate": 4.571036257609004e-06, "loss": 0.3204, "step": 11966 }, { "epoch": 0.5416157501697216, "grad_norm": 0.31832057329905533, "learning_rate": 4.570306043477693e-06, "loss": 0.4816, "step": 11967 }, { "epoch": 0.5416610092781172, "grad_norm": 0.6710364857290778, "learning_rate": 4.569575838579184e-06, "loss": 0.3218, "step": 11968 }, { "epoch": 0.5417062683865128, "grad_norm": 0.6389355538423641, "learning_rate": 4.56884564292917e-06, "loss": 0.3415, "step": 11969 }, { "epoch": 0.5417515274949084, "grad_norm": 0.682930932654138, "learning_rate": 4.568115456543339e-06, "loss": 0.3246, "step": 11970 }, { "epoch": 0.5417967866033039, "grad_norm": 0.2845147003828224, "learning_rate": 4.567385279437381e-06, "loss": 0.4868, "step": 11971 }, { "epoch": 0.5418420457116995, "grad_norm": 0.2978005161473394, "learning_rate": 4.566655111626982e-06, "loss": 0.4951, "step": 11972 }, { "epoch": 0.5418873048200951, "grad_norm": 0.688595153123291, "learning_rate": 4.565924953127837e-06, "loss": 0.362, "step": 11973 }, { "epoch": 0.5419325639284907, "grad_norm": 0.28024177680746104, "learning_rate": 4.56519480395563e-06, "loss": 0.4786, "step": 11974 }, { "epoch": 0.5419778230368861, "grad_norm": 0.6663854522634133, "learning_rate": 4.564464664126052e-06, "loss": 0.3376, "step": 11975 }, { "epoch": 0.5420230821452817, "grad_norm": 0.6908051407952255, "learning_rate": 4.56373453365479e-06, "loss": 0.3519, "step": 11976 }, { "epoch": 0.5420683412536773, "grad_norm": 0.658050688240133, "learning_rate": 4.563004412557532e-06, "loss": 0.3238, "step": 11977 }, { "epoch": 0.5421136003620729, "grad_norm": 0.6400947024626424, "learning_rate": 4.562274300849968e-06, "loss": 0.3674, "step": 11978 }, { "epoch": 0.5421588594704684, "grad_norm": 0.611671600869507, "learning_rate": 4.561544198547786e-06, "loss": 0.3258, "step": 11979 }, { "epoch": 0.542204118578864, "grad_norm": 0.6596228395551971, "learning_rate": 4.560814105666672e-06, "loss": 0.317, "step": 11980 }, { "epoch": 0.5422493776872596, "grad_norm": 0.3664205560652866, "learning_rate": 4.560084022222313e-06, "loss": 0.4869, "step": 11981 }, { "epoch": 0.5422946367956551, "grad_norm": 0.655928615674818, "learning_rate": 4.559353948230399e-06, "loss": 0.3114, "step": 11982 }, { "epoch": 0.5423398959040507, "grad_norm": 0.591209692657875, "learning_rate": 4.558623883706613e-06, "loss": 0.2704, "step": 11983 }, { "epoch": 0.5423851550124462, "grad_norm": 0.6378830156479883, "learning_rate": 4.5578938286666455e-06, "loss": 0.3081, "step": 11984 }, { "epoch": 0.5424304141208418, "grad_norm": 0.28458597921919504, "learning_rate": 4.557163783126181e-06, "loss": 0.4662, "step": 11985 }, { "epoch": 0.5424756732292374, "grad_norm": 0.29464568064879143, "learning_rate": 4.556433747100909e-06, "loss": 0.4925, "step": 11986 }, { "epoch": 0.542520932337633, "grad_norm": 0.2928511770969385, "learning_rate": 4.5557037206065105e-06, "loss": 0.4858, "step": 11987 }, { "epoch": 0.5425661914460285, "grad_norm": 0.6863478044764233, "learning_rate": 4.554973703658676e-06, "loss": 0.3027, "step": 11988 }, { "epoch": 0.5426114505544241, "grad_norm": 0.3077387580483022, "learning_rate": 4.554243696273091e-06, "loss": 0.4724, "step": 11989 }, { "epoch": 0.5426567096628196, "grad_norm": 0.5953582888405915, "learning_rate": 4.553513698465438e-06, "loss": 0.3115, "step": 11990 }, { "epoch": 0.5427019687712152, "grad_norm": 0.6285184049048317, "learning_rate": 4.552783710251404e-06, "loss": 0.3038, "step": 11991 }, { "epoch": 0.5427472278796107, "grad_norm": 0.5900146272819862, "learning_rate": 4.5520537316466775e-06, "loss": 0.3, "step": 11992 }, { "epoch": 0.5427924869880063, "grad_norm": 0.5857511473636708, "learning_rate": 4.551323762666937e-06, "loss": 0.3211, "step": 11993 }, { "epoch": 0.5428377460964019, "grad_norm": 0.35763795383161173, "learning_rate": 4.550593803327873e-06, "loss": 0.4651, "step": 11994 }, { "epoch": 0.5428830052047975, "grad_norm": 0.6251418565575303, "learning_rate": 4.5498638536451675e-06, "loss": 0.3648, "step": 11995 }, { "epoch": 0.5429282643131931, "grad_norm": 0.6511923733741692, "learning_rate": 4.5491339136345055e-06, "loss": 0.3317, "step": 11996 }, { "epoch": 0.5429735234215886, "grad_norm": 0.6871015698177254, "learning_rate": 4.548403983311569e-06, "loss": 0.3394, "step": 11997 }, { "epoch": 0.5430187825299841, "grad_norm": 0.3014881726501736, "learning_rate": 4.547674062692046e-06, "loss": 0.505, "step": 11998 }, { "epoch": 0.5430640416383797, "grad_norm": 0.28402470065143265, "learning_rate": 4.546944151791618e-06, "loss": 0.4723, "step": 11999 }, { "epoch": 0.5431093007467753, "grad_norm": 0.5945012448408755, "learning_rate": 4.546214250625969e-06, "loss": 0.3266, "step": 12000 }, { "epoch": 0.5431545598551708, "grad_norm": 0.6330897895361995, "learning_rate": 4.54548435921078e-06, "loss": 0.3621, "step": 12001 }, { "epoch": 0.5431998189635664, "grad_norm": 0.3276877083329625, "learning_rate": 4.544754477561739e-06, "loss": 0.474, "step": 12002 }, { "epoch": 0.543245078071962, "grad_norm": 0.6783486670274087, "learning_rate": 4.544024605694524e-06, "loss": 0.3111, "step": 12003 }, { "epoch": 0.5432903371803576, "grad_norm": 0.6288752587564099, "learning_rate": 4.54329474362482e-06, "loss": 0.3129, "step": 12004 }, { "epoch": 0.5433355962887532, "grad_norm": 0.6338755715175465, "learning_rate": 4.542564891368311e-06, "loss": 0.2944, "step": 12005 }, { "epoch": 0.5433808553971486, "grad_norm": 0.7239986062645524, "learning_rate": 4.541835048940675e-06, "loss": 0.3262, "step": 12006 }, { "epoch": 0.5434261145055442, "grad_norm": 0.6078227665196527, "learning_rate": 4.5411052163575986e-06, "loss": 0.3215, "step": 12007 }, { "epoch": 0.5434713736139398, "grad_norm": 0.6332652552296952, "learning_rate": 4.540375393634762e-06, "loss": 0.343, "step": 12008 }, { "epoch": 0.5435166327223354, "grad_norm": 0.29713938921676397, "learning_rate": 4.539645580787845e-06, "loss": 0.4671, "step": 12009 }, { "epoch": 0.5435618918307309, "grad_norm": 0.30752786135562743, "learning_rate": 4.538915777832531e-06, "loss": 0.4816, "step": 12010 }, { "epoch": 0.5436071509391265, "grad_norm": 0.30628064591608223, "learning_rate": 4.538185984784501e-06, "loss": 0.4879, "step": 12011 }, { "epoch": 0.5436524100475221, "grad_norm": 0.6034792349851319, "learning_rate": 4.537456201659437e-06, "loss": 0.3286, "step": 12012 }, { "epoch": 0.5436976691559177, "grad_norm": 0.6460022175329486, "learning_rate": 4.536726428473017e-06, "loss": 0.3018, "step": 12013 }, { "epoch": 0.5437429282643131, "grad_norm": 0.6330256849552708, "learning_rate": 4.535996665240923e-06, "loss": 0.3531, "step": 12014 }, { "epoch": 0.5437881873727087, "grad_norm": 1.0554318969867602, "learning_rate": 4.535266911978838e-06, "loss": 0.3037, "step": 12015 }, { "epoch": 0.5438334464811043, "grad_norm": 1.0406055103027143, "learning_rate": 4.534537168702437e-06, "loss": 0.3666, "step": 12016 }, { "epoch": 0.5438787055894999, "grad_norm": 0.6586450980212927, "learning_rate": 4.533807435427404e-06, "loss": 0.318, "step": 12017 }, { "epoch": 0.5439239646978955, "grad_norm": 0.6321733642129694, "learning_rate": 4.533077712169418e-06, "loss": 0.3146, "step": 12018 }, { "epoch": 0.543969223806291, "grad_norm": 0.5850842470544956, "learning_rate": 4.532347998944158e-06, "loss": 0.2886, "step": 12019 }, { "epoch": 0.5440144829146866, "grad_norm": 0.356016927067565, "learning_rate": 4.531618295767301e-06, "loss": 0.4809, "step": 12020 }, { "epoch": 0.5440597420230822, "grad_norm": 0.30494498859019264, "learning_rate": 4.53088860265453e-06, "loss": 0.4538, "step": 12021 }, { "epoch": 0.5441050011314778, "grad_norm": 0.27791137503165086, "learning_rate": 4.5301589196215214e-06, "loss": 0.4459, "step": 12022 }, { "epoch": 0.5441502602398732, "grad_norm": 0.7059627342048436, "learning_rate": 4.529429246683956e-06, "loss": 0.3609, "step": 12023 }, { "epoch": 0.5441955193482688, "grad_norm": 0.6517913855189322, "learning_rate": 4.52869958385751e-06, "loss": 0.333, "step": 12024 }, { "epoch": 0.5442407784566644, "grad_norm": 0.6495883155524725, "learning_rate": 4.527969931157863e-06, "loss": 0.3143, "step": 12025 }, { "epoch": 0.54428603756506, "grad_norm": 0.633776785633181, "learning_rate": 4.5272402886006904e-06, "loss": 0.3674, "step": 12026 }, { "epoch": 0.5443312966734555, "grad_norm": 0.3472350589231652, "learning_rate": 4.526510656201673e-06, "loss": 0.4945, "step": 12027 }, { "epoch": 0.5443765557818511, "grad_norm": 0.682612710667583, "learning_rate": 4.525781033976489e-06, "loss": 0.3017, "step": 12028 }, { "epoch": 0.5444218148902467, "grad_norm": 0.7569396141176389, "learning_rate": 4.525051421940813e-06, "loss": 0.3625, "step": 12029 }, { "epoch": 0.5444670739986422, "grad_norm": 0.7007330874287369, "learning_rate": 4.524321820110322e-06, "loss": 0.3546, "step": 12030 }, { "epoch": 0.5445123331070378, "grad_norm": 0.624317023029584, "learning_rate": 4.523592228500696e-06, "loss": 0.3169, "step": 12031 }, { "epoch": 0.5445575922154333, "grad_norm": 0.6360197728590014, "learning_rate": 4.522862647127609e-06, "loss": 0.2608, "step": 12032 }, { "epoch": 0.5446028513238289, "grad_norm": 0.622608362495375, "learning_rate": 4.5221330760067386e-06, "loss": 0.2985, "step": 12033 }, { "epoch": 0.5446481104322245, "grad_norm": 0.6777283117689842, "learning_rate": 4.521403515153762e-06, "loss": 0.2987, "step": 12034 }, { "epoch": 0.5446933695406201, "grad_norm": 0.6558982457517104, "learning_rate": 4.520673964584351e-06, "loss": 0.312, "step": 12035 }, { "epoch": 0.5447386286490156, "grad_norm": 0.5762145784272449, "learning_rate": 4.519944424314186e-06, "loss": 0.273, "step": 12036 }, { "epoch": 0.5447838877574112, "grad_norm": 0.7052212045652171, "learning_rate": 4.519214894358942e-06, "loss": 0.3186, "step": 12037 }, { "epoch": 0.5448291468658067, "grad_norm": 0.6014994041697003, "learning_rate": 4.5184853747342926e-06, "loss": 0.3102, "step": 12038 }, { "epoch": 0.5448744059742023, "grad_norm": 0.3189094508896656, "learning_rate": 4.517755865455912e-06, "loss": 0.4806, "step": 12039 }, { "epoch": 0.5449196650825979, "grad_norm": 0.6859969141969133, "learning_rate": 4.517026366539477e-06, "loss": 0.3474, "step": 12040 }, { "epoch": 0.5449649241909934, "grad_norm": 0.6115840685105562, "learning_rate": 4.516296878000664e-06, "loss": 0.323, "step": 12041 }, { "epoch": 0.545010183299389, "grad_norm": 0.6546584864922931, "learning_rate": 4.515567399855145e-06, "loss": 0.3155, "step": 12042 }, { "epoch": 0.5450554424077846, "grad_norm": 0.600731391425279, "learning_rate": 4.514837932118593e-06, "loss": 0.3146, "step": 12043 }, { "epoch": 0.5451007015161802, "grad_norm": 0.5894286013671077, "learning_rate": 4.514108474806687e-06, "loss": 0.295, "step": 12044 }, { "epoch": 0.5451459606245757, "grad_norm": 0.7288286128601018, "learning_rate": 4.513379027935094e-06, "loss": 0.3349, "step": 12045 }, { "epoch": 0.5451912197329712, "grad_norm": 0.5718145121685675, "learning_rate": 4.5126495915194936e-06, "loss": 0.3451, "step": 12046 }, { "epoch": 0.5452364788413668, "grad_norm": 0.6346709284490044, "learning_rate": 4.5119201655755565e-06, "loss": 0.3341, "step": 12047 }, { "epoch": 0.5452817379497624, "grad_norm": 0.6521556040336637, "learning_rate": 4.511190750118955e-06, "loss": 0.3268, "step": 12048 }, { "epoch": 0.5453269970581579, "grad_norm": 0.3027426900568299, "learning_rate": 4.510461345165362e-06, "loss": 0.4741, "step": 12049 }, { "epoch": 0.5453722561665535, "grad_norm": 0.649681449175025, "learning_rate": 4.509731950730454e-06, "loss": 0.3516, "step": 12050 }, { "epoch": 0.5454175152749491, "grad_norm": 0.7190082909611529, "learning_rate": 4.509002566829899e-06, "loss": 0.3321, "step": 12051 }, { "epoch": 0.5454627743833447, "grad_norm": 0.30540548048002153, "learning_rate": 4.508273193479371e-06, "loss": 0.4746, "step": 12052 }, { "epoch": 0.5455080334917403, "grad_norm": 0.6481182817233865, "learning_rate": 4.507543830694543e-06, "loss": 0.3647, "step": 12053 }, { "epoch": 0.5455532926001357, "grad_norm": 0.6334004009278218, "learning_rate": 4.506814478491084e-06, "loss": 0.308, "step": 12054 }, { "epoch": 0.5455985517085313, "grad_norm": 0.6768165181731143, "learning_rate": 4.506085136884667e-06, "loss": 0.3341, "step": 12055 }, { "epoch": 0.5456438108169269, "grad_norm": 0.6072803233190442, "learning_rate": 4.505355805890964e-06, "loss": 0.3679, "step": 12056 }, { "epoch": 0.5456890699253225, "grad_norm": 0.6449685267219651, "learning_rate": 4.504626485525647e-06, "loss": 0.2941, "step": 12057 }, { "epoch": 0.545734329033718, "grad_norm": 0.3259735884598887, "learning_rate": 4.503897175804383e-06, "loss": 0.4699, "step": 12058 }, { "epoch": 0.5457795881421136, "grad_norm": 0.622368199343704, "learning_rate": 4.503167876742846e-06, "loss": 0.3379, "step": 12059 }, { "epoch": 0.5458248472505092, "grad_norm": 0.6202468893832425, "learning_rate": 4.502438588356707e-06, "loss": 0.3012, "step": 12060 }, { "epoch": 0.5458701063589048, "grad_norm": 0.6416966220784046, "learning_rate": 4.501709310661632e-06, "loss": 0.297, "step": 12061 }, { "epoch": 0.5459153654673002, "grad_norm": 0.4889086177698067, "learning_rate": 4.500980043673295e-06, "loss": 0.5102, "step": 12062 }, { "epoch": 0.5459606245756958, "grad_norm": 0.25808571648322653, "learning_rate": 4.5002507874073655e-06, "loss": 0.4713, "step": 12063 }, { "epoch": 0.5460058836840914, "grad_norm": 0.6337181113011422, "learning_rate": 4.499521541879508e-06, "loss": 0.3067, "step": 12064 }, { "epoch": 0.546051142792487, "grad_norm": 0.5806705729297652, "learning_rate": 4.498792307105398e-06, "loss": 0.3187, "step": 12065 }, { "epoch": 0.5460964019008826, "grad_norm": 0.6180438362672532, "learning_rate": 4.498063083100703e-06, "loss": 0.3391, "step": 12066 }, { "epoch": 0.5461416610092781, "grad_norm": 0.6576832425490626, "learning_rate": 4.497333869881089e-06, "loss": 0.3072, "step": 12067 }, { "epoch": 0.5461869201176737, "grad_norm": 0.32216628821352994, "learning_rate": 4.496604667462225e-06, "loss": 0.4592, "step": 12068 }, { "epoch": 0.5462321792260693, "grad_norm": 0.7037977697870069, "learning_rate": 4.495875475859783e-06, "loss": 0.3309, "step": 12069 }, { "epoch": 0.5462774383344648, "grad_norm": 0.6512723214925447, "learning_rate": 4.495146295089428e-06, "loss": 0.3421, "step": 12070 }, { "epoch": 0.5463226974428603, "grad_norm": 0.6249409724190828, "learning_rate": 4.49441712516683e-06, "loss": 0.3383, "step": 12071 }, { "epoch": 0.5463679565512559, "grad_norm": 0.5988598636905825, "learning_rate": 4.493687966107652e-06, "loss": 0.3084, "step": 12072 }, { "epoch": 0.5464132156596515, "grad_norm": 0.666932683877474, "learning_rate": 4.492958817927569e-06, "loss": 0.3239, "step": 12073 }, { "epoch": 0.5464584747680471, "grad_norm": 0.7087470576939989, "learning_rate": 4.492229680642239e-06, "loss": 0.3071, "step": 12074 }, { "epoch": 0.5465037338764427, "grad_norm": 0.5911853754264186, "learning_rate": 4.4915005542673365e-06, "loss": 0.3713, "step": 12075 }, { "epoch": 0.5465489929848382, "grad_norm": 0.6432801875991805, "learning_rate": 4.490771438818525e-06, "loss": 0.3105, "step": 12076 }, { "epoch": 0.5465942520932338, "grad_norm": 0.6452758095884515, "learning_rate": 4.490042334311472e-06, "loss": 0.3419, "step": 12077 }, { "epoch": 0.5466395112016293, "grad_norm": 0.6089284177796053, "learning_rate": 4.48931324076184e-06, "loss": 0.269, "step": 12078 }, { "epoch": 0.5466847703100249, "grad_norm": 0.6470458332930936, "learning_rate": 4.488584158185301e-06, "loss": 0.3078, "step": 12079 }, { "epoch": 0.5467300294184204, "grad_norm": 0.6071068578697375, "learning_rate": 4.487855086597517e-06, "loss": 0.2899, "step": 12080 }, { "epoch": 0.546775288526816, "grad_norm": 0.6715335365000962, "learning_rate": 4.487126026014154e-06, "loss": 0.3827, "step": 12081 }, { "epoch": 0.5468205476352116, "grad_norm": 0.6113273788313882, "learning_rate": 4.486396976450876e-06, "loss": 0.3398, "step": 12082 }, { "epoch": 0.5468658067436072, "grad_norm": 0.6478775139279586, "learning_rate": 4.485667937923352e-06, "loss": 0.2983, "step": 12083 }, { "epoch": 0.5469110658520027, "grad_norm": 0.34625857371418256, "learning_rate": 4.4849389104472435e-06, "loss": 0.4905, "step": 12084 }, { "epoch": 0.5469563249603983, "grad_norm": 0.6405866789560921, "learning_rate": 4.4842098940382155e-06, "loss": 0.3285, "step": 12085 }, { "epoch": 0.5470015840687938, "grad_norm": 0.5824949965901939, "learning_rate": 4.483480888711935e-06, "loss": 0.3227, "step": 12086 }, { "epoch": 0.5470468431771894, "grad_norm": 0.6195648132210937, "learning_rate": 4.4827518944840606e-06, "loss": 0.3531, "step": 12087 }, { "epoch": 0.547092102285585, "grad_norm": 0.6462744084113154, "learning_rate": 4.48202291137026e-06, "loss": 0.3271, "step": 12088 }, { "epoch": 0.5471373613939805, "grad_norm": 0.6415272922809488, "learning_rate": 4.481293939386198e-06, "loss": 0.3474, "step": 12089 }, { "epoch": 0.5471826205023761, "grad_norm": 0.7149404102600072, "learning_rate": 4.480564978547535e-06, "loss": 0.3007, "step": 12090 }, { "epoch": 0.5472278796107717, "grad_norm": 0.7008732021330117, "learning_rate": 4.479836028869935e-06, "loss": 0.3111, "step": 12091 }, { "epoch": 0.5472731387191673, "grad_norm": 0.583088917034706, "learning_rate": 4.479107090369063e-06, "loss": 0.3306, "step": 12092 }, { "epoch": 0.5473183978275628, "grad_norm": 0.6070470620347299, "learning_rate": 4.478378163060577e-06, "loss": 0.2929, "step": 12093 }, { "epoch": 0.5473636569359583, "grad_norm": 0.7040018121624776, "learning_rate": 4.477649246960144e-06, "loss": 0.292, "step": 12094 }, { "epoch": 0.5474089160443539, "grad_norm": 0.6153340024288626, "learning_rate": 4.476920342083425e-06, "loss": 0.293, "step": 12095 }, { "epoch": 0.5474541751527495, "grad_norm": 0.6445238387160752, "learning_rate": 4.47619144844608e-06, "loss": 0.3593, "step": 12096 }, { "epoch": 0.547499434261145, "grad_norm": 0.5952893306035464, "learning_rate": 4.475462566063771e-06, "loss": 0.3116, "step": 12097 }, { "epoch": 0.5475446933695406, "grad_norm": 0.5764937417401543, "learning_rate": 4.474733694952162e-06, "loss": 0.3125, "step": 12098 }, { "epoch": 0.5475899524779362, "grad_norm": 0.5910636591909206, "learning_rate": 4.474004835126913e-06, "loss": 0.3149, "step": 12099 }, { "epoch": 0.5476352115863318, "grad_norm": 0.6165891092203315, "learning_rate": 4.4732759866036846e-06, "loss": 0.3249, "step": 12100 }, { "epoch": 0.5476804706947274, "grad_norm": 0.6241000733082706, "learning_rate": 4.472547149398136e-06, "loss": 0.3324, "step": 12101 }, { "epoch": 0.5477257298031228, "grad_norm": 0.6730294073739986, "learning_rate": 4.471818323525932e-06, "loss": 0.284, "step": 12102 }, { "epoch": 0.5477709889115184, "grad_norm": 0.6396666882471538, "learning_rate": 4.471089509002728e-06, "loss": 0.301, "step": 12103 }, { "epoch": 0.547816248019914, "grad_norm": 0.33717396214770234, "learning_rate": 4.470360705844186e-06, "loss": 0.4623, "step": 12104 }, { "epoch": 0.5478615071283096, "grad_norm": 0.5946426926801431, "learning_rate": 4.469631914065967e-06, "loss": 0.2796, "step": 12105 }, { "epoch": 0.5479067662367051, "grad_norm": 0.3031311469575686, "learning_rate": 4.468903133683728e-06, "loss": 0.4492, "step": 12106 }, { "epoch": 0.5479520253451007, "grad_norm": 0.5894908589290491, "learning_rate": 4.4681743647131285e-06, "loss": 0.3431, "step": 12107 }, { "epoch": 0.5479972844534963, "grad_norm": 0.6107167467803513, "learning_rate": 4.4674456071698315e-06, "loss": 0.3392, "step": 12108 }, { "epoch": 0.5480425435618919, "grad_norm": 0.7012730961307557, "learning_rate": 4.466716861069491e-06, "loss": 0.3126, "step": 12109 }, { "epoch": 0.5480878026702874, "grad_norm": 0.29570784932734145, "learning_rate": 4.465988126427767e-06, "loss": 0.4605, "step": 12110 }, { "epoch": 0.5481330617786829, "grad_norm": 0.3330322126985907, "learning_rate": 4.4652594032603174e-06, "loss": 0.4735, "step": 12111 }, { "epoch": 0.5481783208870785, "grad_norm": 0.6404707219224894, "learning_rate": 4.4645306915828025e-06, "loss": 0.3188, "step": 12112 }, { "epoch": 0.5482235799954741, "grad_norm": 0.6781019519562312, "learning_rate": 4.463801991410878e-06, "loss": 0.2984, "step": 12113 }, { "epoch": 0.5482688391038697, "grad_norm": 0.6193509541981514, "learning_rate": 4.463073302760202e-06, "loss": 0.3858, "step": 12114 }, { "epoch": 0.5483140982122652, "grad_norm": 0.6195827035062909, "learning_rate": 4.462344625646433e-06, "loss": 0.3567, "step": 12115 }, { "epoch": 0.5483593573206608, "grad_norm": 0.29883589279706985, "learning_rate": 4.461615960085224e-06, "loss": 0.4641, "step": 12116 }, { "epoch": 0.5484046164290564, "grad_norm": 0.6455256843550773, "learning_rate": 4.460887306092236e-06, "loss": 0.3359, "step": 12117 }, { "epoch": 0.548449875537452, "grad_norm": 0.6054056715448799, "learning_rate": 4.460158663683125e-06, "loss": 0.3094, "step": 12118 }, { "epoch": 0.5484951346458474, "grad_norm": 0.641684405299334, "learning_rate": 4.459430032873545e-06, "loss": 0.3059, "step": 12119 }, { "epoch": 0.548540393754243, "grad_norm": 0.6473034497692319, "learning_rate": 4.458701413679152e-06, "loss": 0.3443, "step": 12120 }, { "epoch": 0.5485856528626386, "grad_norm": 0.578951718971156, "learning_rate": 4.457972806115607e-06, "loss": 0.2921, "step": 12121 }, { "epoch": 0.5486309119710342, "grad_norm": 0.66634270501242, "learning_rate": 4.4572442101985584e-06, "loss": 0.311, "step": 12122 }, { "epoch": 0.5486761710794298, "grad_norm": 0.2952537233831481, "learning_rate": 4.456515625943666e-06, "loss": 0.4561, "step": 12123 }, { "epoch": 0.5487214301878253, "grad_norm": 0.6761764609665392, "learning_rate": 4.455787053366583e-06, "loss": 0.3031, "step": 12124 }, { "epoch": 0.5487666892962209, "grad_norm": 0.6259223256492764, "learning_rate": 4.455058492482966e-06, "loss": 0.331, "step": 12125 }, { "epoch": 0.5488119484046164, "grad_norm": 0.27580866668673604, "learning_rate": 4.454329943308466e-06, "loss": 0.458, "step": 12126 }, { "epoch": 0.548857207513012, "grad_norm": 0.693568649449279, "learning_rate": 4.453601405858741e-06, "loss": 0.3242, "step": 12127 }, { "epoch": 0.5489024666214075, "grad_norm": 0.29380132883853927, "learning_rate": 4.4528728801494455e-06, "loss": 0.501, "step": 12128 }, { "epoch": 0.5489477257298031, "grad_norm": 0.6456773713755553, "learning_rate": 4.452144366196229e-06, "loss": 0.297, "step": 12129 }, { "epoch": 0.5489929848381987, "grad_norm": 0.6466053776304937, "learning_rate": 4.451415864014747e-06, "loss": 0.3567, "step": 12130 }, { "epoch": 0.5490382439465943, "grad_norm": 0.6261122547635842, "learning_rate": 4.450687373620656e-06, "loss": 0.2942, "step": 12131 }, { "epoch": 0.5490835030549898, "grad_norm": 0.6552351948762685, "learning_rate": 4.449958895029604e-06, "loss": 0.3534, "step": 12132 }, { "epoch": 0.5491287621633854, "grad_norm": 0.28897300839931045, "learning_rate": 4.449230428257247e-06, "loss": 0.4713, "step": 12133 }, { "epoch": 0.5491740212717809, "grad_norm": 0.6544743726214356, "learning_rate": 4.448501973319237e-06, "loss": 0.3511, "step": 12134 }, { "epoch": 0.5492192803801765, "grad_norm": 0.6922051336111326, "learning_rate": 4.447773530231225e-06, "loss": 0.3179, "step": 12135 }, { "epoch": 0.5492645394885721, "grad_norm": 0.7139447536673934, "learning_rate": 4.447045099008863e-06, "loss": 0.3338, "step": 12136 }, { "epoch": 0.5493097985969676, "grad_norm": 0.6248896889526092, "learning_rate": 4.446316679667805e-06, "loss": 0.3448, "step": 12137 }, { "epoch": 0.5493550577053632, "grad_norm": 0.6580828432429198, "learning_rate": 4.445588272223701e-06, "loss": 0.3224, "step": 12138 }, { "epoch": 0.5494003168137588, "grad_norm": 0.6255330996021665, "learning_rate": 4.4448598766922005e-06, "loss": 0.3264, "step": 12139 }, { "epoch": 0.5494455759221544, "grad_norm": 0.655788026482158, "learning_rate": 4.444131493088956e-06, "loss": 0.3109, "step": 12140 }, { "epoch": 0.5494908350305499, "grad_norm": 0.33868090853487126, "learning_rate": 4.443403121429621e-06, "loss": 0.4838, "step": 12141 }, { "epoch": 0.5495360941389454, "grad_norm": 0.6138962693984958, "learning_rate": 4.442674761729843e-06, "loss": 0.2962, "step": 12142 }, { "epoch": 0.549581353247341, "grad_norm": 0.6139537752754014, "learning_rate": 4.441946414005272e-06, "loss": 0.3167, "step": 12143 }, { "epoch": 0.5496266123557366, "grad_norm": 0.5965832383615192, "learning_rate": 4.44121807827156e-06, "loss": 0.2908, "step": 12144 }, { "epoch": 0.5496718714641322, "grad_norm": 0.6017878823620227, "learning_rate": 4.4404897545443525e-06, "loss": 0.2979, "step": 12145 }, { "epoch": 0.5497171305725277, "grad_norm": 0.6192189887678029, "learning_rate": 4.439761442839303e-06, "loss": 0.2946, "step": 12146 }, { "epoch": 0.5497623896809233, "grad_norm": 0.6770136591794844, "learning_rate": 4.439033143172061e-06, "loss": 0.3149, "step": 12147 }, { "epoch": 0.5498076487893189, "grad_norm": 0.6328717534830032, "learning_rate": 4.4383048555582725e-06, "loss": 0.2928, "step": 12148 }, { "epoch": 0.5498529078977145, "grad_norm": 0.6284658393596169, "learning_rate": 4.437576580013587e-06, "loss": 0.3373, "step": 12149 }, { "epoch": 0.5498981670061099, "grad_norm": 0.6863147237521107, "learning_rate": 4.436848316553655e-06, "loss": 0.352, "step": 12150 }, { "epoch": 0.5499434261145055, "grad_norm": 0.6818944988875426, "learning_rate": 4.436120065194121e-06, "loss": 0.3245, "step": 12151 }, { "epoch": 0.5499886852229011, "grad_norm": 0.3969587015992509, "learning_rate": 4.435391825950637e-06, "loss": 0.4963, "step": 12152 }, { "epoch": 0.5500339443312967, "grad_norm": 0.6994559752644615, "learning_rate": 4.434663598838847e-06, "loss": 0.2811, "step": 12153 }, { "epoch": 0.5500792034396922, "grad_norm": 0.5782646329030336, "learning_rate": 4.4339353838744024e-06, "loss": 0.2816, "step": 12154 }, { "epoch": 0.5501244625480878, "grad_norm": 0.6333818254219354, "learning_rate": 4.433207181072945e-06, "loss": 0.3159, "step": 12155 }, { "epoch": 0.5501697216564834, "grad_norm": 0.6194011645499826, "learning_rate": 4.432478990450126e-06, "loss": 0.3255, "step": 12156 }, { "epoch": 0.550214980764879, "grad_norm": 0.680557849521606, "learning_rate": 4.431750812021591e-06, "loss": 0.2947, "step": 12157 }, { "epoch": 0.5502602398732745, "grad_norm": 0.6530503515211541, "learning_rate": 4.431022645802985e-06, "loss": 0.3302, "step": 12158 }, { "epoch": 0.55030549898167, "grad_norm": 0.6781429736989625, "learning_rate": 4.430294491809954e-06, "loss": 0.3135, "step": 12159 }, { "epoch": 0.5503507580900656, "grad_norm": 0.6310181148735358, "learning_rate": 4.429566350058146e-06, "loss": 0.3066, "step": 12160 }, { "epoch": 0.5503960171984612, "grad_norm": 0.6180237070682488, "learning_rate": 4.428838220563205e-06, "loss": 0.3766, "step": 12161 }, { "epoch": 0.5504412763068568, "grad_norm": 0.6238922783779193, "learning_rate": 4.428110103340776e-06, "loss": 0.2911, "step": 12162 }, { "epoch": 0.5504865354152523, "grad_norm": 0.627010311697926, "learning_rate": 4.427381998406506e-06, "loss": 0.3012, "step": 12163 }, { "epoch": 0.5505317945236479, "grad_norm": 0.3007742329014419, "learning_rate": 4.426653905776035e-06, "loss": 0.458, "step": 12164 }, { "epoch": 0.5505770536320435, "grad_norm": 0.2872082070907317, "learning_rate": 4.425925825465013e-06, "loss": 0.4722, "step": 12165 }, { "epoch": 0.550622312740439, "grad_norm": 0.6197356908953288, "learning_rate": 4.425197757489082e-06, "loss": 0.3196, "step": 12166 }, { "epoch": 0.5506675718488345, "grad_norm": 0.6987742922291102, "learning_rate": 4.4244697018638845e-06, "loss": 0.3273, "step": 12167 }, { "epoch": 0.5507128309572301, "grad_norm": 0.6004808697850207, "learning_rate": 4.423741658605066e-06, "loss": 0.2972, "step": 12168 }, { "epoch": 0.5507580900656257, "grad_norm": 0.6460250550153468, "learning_rate": 4.423013627728269e-06, "loss": 0.3355, "step": 12169 }, { "epoch": 0.5508033491740213, "grad_norm": 0.6257553030413675, "learning_rate": 4.422285609249139e-06, "loss": 0.3276, "step": 12170 }, { "epoch": 0.5508486082824169, "grad_norm": 0.6327753745132836, "learning_rate": 4.4215576031833155e-06, "loss": 0.3384, "step": 12171 }, { "epoch": 0.5508938673908124, "grad_norm": 0.31764744050962074, "learning_rate": 4.420829609546442e-06, "loss": 0.4806, "step": 12172 }, { "epoch": 0.550939126499208, "grad_norm": 0.6285208469040747, "learning_rate": 4.420101628354164e-06, "loss": 0.3133, "step": 12173 }, { "epoch": 0.5509843856076035, "grad_norm": 0.5957967636997421, "learning_rate": 4.419373659622117e-06, "loss": 0.3227, "step": 12174 }, { "epoch": 0.5510296447159991, "grad_norm": 0.6882976139091561, "learning_rate": 4.418645703365949e-06, "loss": 0.3315, "step": 12175 }, { "epoch": 0.5510749038243946, "grad_norm": 0.7370220979424742, "learning_rate": 4.4179177596013005e-06, "loss": 0.3295, "step": 12176 }, { "epoch": 0.5511201629327902, "grad_norm": 0.30805795099873884, "learning_rate": 4.4171898283438104e-06, "loss": 0.4769, "step": 12177 }, { "epoch": 0.5511654220411858, "grad_norm": 0.6003256657053226, "learning_rate": 4.416461909609119e-06, "loss": 0.3468, "step": 12178 }, { "epoch": 0.5512106811495814, "grad_norm": 0.6286414600373732, "learning_rate": 4.415734003412873e-06, "loss": 0.2972, "step": 12179 }, { "epoch": 0.551255940257977, "grad_norm": 0.6598533584898012, "learning_rate": 4.415006109770706e-06, "loss": 0.3631, "step": 12180 }, { "epoch": 0.5513011993663725, "grad_norm": 0.2659973937123042, "learning_rate": 4.414278228698261e-06, "loss": 0.4777, "step": 12181 }, { "epoch": 0.551346458474768, "grad_norm": 2.6385411080370003, "learning_rate": 4.413550360211177e-06, "loss": 0.3176, "step": 12182 }, { "epoch": 0.5513917175831636, "grad_norm": 0.6190630777781309, "learning_rate": 4.412822504325099e-06, "loss": 0.3038, "step": 12183 }, { "epoch": 0.5514369766915592, "grad_norm": 0.6594277253803893, "learning_rate": 4.412094661055658e-06, "loss": 0.3428, "step": 12184 }, { "epoch": 0.5514822357999547, "grad_norm": 0.6978013596534337, "learning_rate": 4.411366830418498e-06, "loss": 0.3288, "step": 12185 }, { "epoch": 0.5515274949083503, "grad_norm": 0.6448886516709758, "learning_rate": 4.410639012429259e-06, "loss": 0.3011, "step": 12186 }, { "epoch": 0.5515727540167459, "grad_norm": 0.6144338416530524, "learning_rate": 4.409911207103576e-06, "loss": 0.3257, "step": 12187 }, { "epoch": 0.5516180131251415, "grad_norm": 0.6649078543814887, "learning_rate": 4.409183414457086e-06, "loss": 0.3444, "step": 12188 }, { "epoch": 0.551663272233537, "grad_norm": 0.6712539994119634, "learning_rate": 4.408455634505435e-06, "loss": 0.3016, "step": 12189 }, { "epoch": 0.5517085313419325, "grad_norm": 0.5761806251973871, "learning_rate": 4.407727867264253e-06, "loss": 0.3021, "step": 12190 }, { "epoch": 0.5517537904503281, "grad_norm": 0.31372664794374205, "learning_rate": 4.407000112749179e-06, "loss": 0.474, "step": 12191 }, { "epoch": 0.5517990495587237, "grad_norm": 0.7033017381026658, "learning_rate": 4.406272370975854e-06, "loss": 0.3507, "step": 12192 }, { "epoch": 0.5518443086671193, "grad_norm": 0.8187659702589716, "learning_rate": 4.40554464195991e-06, "loss": 0.3767, "step": 12193 }, { "epoch": 0.5518895677755148, "grad_norm": 0.7055026439228711, "learning_rate": 4.404816925716987e-06, "loss": 0.332, "step": 12194 }, { "epoch": 0.5519348268839104, "grad_norm": 0.30227345454402327, "learning_rate": 4.404089222262721e-06, "loss": 0.4674, "step": 12195 }, { "epoch": 0.551980085992306, "grad_norm": 0.6038295544645176, "learning_rate": 4.4033615316127466e-06, "loss": 0.2831, "step": 12196 }, { "epoch": 0.5520253451007016, "grad_norm": 0.6109965842943273, "learning_rate": 4.402633853782699e-06, "loss": 0.312, "step": 12197 }, { "epoch": 0.552070604209097, "grad_norm": 0.6117511805063619, "learning_rate": 4.401906188788216e-06, "loss": 0.2787, "step": 12198 }, { "epoch": 0.5521158633174926, "grad_norm": 0.6615939112863005, "learning_rate": 4.401178536644934e-06, "loss": 0.2971, "step": 12199 }, { "epoch": 0.5521611224258882, "grad_norm": 0.7905339578895, "learning_rate": 4.4004508973684844e-06, "loss": 0.3102, "step": 12200 }, { "epoch": 0.5522063815342838, "grad_norm": 0.6298379390373061, "learning_rate": 4.399723270974503e-06, "loss": 0.2908, "step": 12201 }, { "epoch": 0.5522516406426793, "grad_norm": 1.0137912656423052, "learning_rate": 4.398995657478628e-06, "loss": 0.3327, "step": 12202 }, { "epoch": 0.5522968997510749, "grad_norm": 0.642412628400293, "learning_rate": 4.398268056896488e-06, "loss": 0.3398, "step": 12203 }, { "epoch": 0.5523421588594705, "grad_norm": 0.6056875907816118, "learning_rate": 4.397540469243719e-06, "loss": 0.2802, "step": 12204 }, { "epoch": 0.5523874179678661, "grad_norm": 0.623085870979715, "learning_rate": 4.396812894535957e-06, "loss": 0.3177, "step": 12205 }, { "epoch": 0.5524326770762616, "grad_norm": 0.6998316003144387, "learning_rate": 4.396085332788832e-06, "loss": 0.3342, "step": 12206 }, { "epoch": 0.5524779361846571, "grad_norm": 0.692349791540142, "learning_rate": 4.395357784017977e-06, "loss": 0.2944, "step": 12207 }, { "epoch": 0.5525231952930527, "grad_norm": 0.5964649889517802, "learning_rate": 4.394630248239029e-06, "loss": 0.2867, "step": 12208 }, { "epoch": 0.5525684544014483, "grad_norm": 0.36281497979131894, "learning_rate": 4.393902725467616e-06, "loss": 0.491, "step": 12209 }, { "epoch": 0.5526137135098439, "grad_norm": 0.6925980131373757, "learning_rate": 4.3931752157193725e-06, "loss": 0.3744, "step": 12210 }, { "epoch": 0.5526589726182394, "grad_norm": 0.5824848786527309, "learning_rate": 4.3924477190099286e-06, "loss": 0.3029, "step": 12211 }, { "epoch": 0.552704231726635, "grad_norm": 0.6192125272134775, "learning_rate": 4.391720235354921e-06, "loss": 0.3234, "step": 12212 }, { "epoch": 0.5527494908350306, "grad_norm": 0.6198155454511541, "learning_rate": 4.390992764769974e-06, "loss": 0.3189, "step": 12213 }, { "epoch": 0.5527947499434261, "grad_norm": 0.6843133179389448, "learning_rate": 4.390265307270722e-06, "loss": 0.3609, "step": 12214 }, { "epoch": 0.5528400090518216, "grad_norm": 0.6429985571485314, "learning_rate": 4.389537862872798e-06, "loss": 0.3014, "step": 12215 }, { "epoch": 0.5528852681602172, "grad_norm": 0.6993967541463431, "learning_rate": 4.388810431591829e-06, "loss": 0.2934, "step": 12216 }, { "epoch": 0.5529305272686128, "grad_norm": 0.5852124802333049, "learning_rate": 4.388083013443445e-06, "loss": 0.3322, "step": 12217 }, { "epoch": 0.5529757863770084, "grad_norm": 0.6098756681057482, "learning_rate": 4.387355608443281e-06, "loss": 0.3433, "step": 12218 }, { "epoch": 0.553021045485404, "grad_norm": 0.5960790982391869, "learning_rate": 4.386628216606962e-06, "loss": 0.3231, "step": 12219 }, { "epoch": 0.5530663045937995, "grad_norm": 0.5563541033680087, "learning_rate": 4.385900837950119e-06, "loss": 0.2942, "step": 12220 }, { "epoch": 0.553111563702195, "grad_norm": 0.6190208956368316, "learning_rate": 4.385173472488382e-06, "loss": 0.2754, "step": 12221 }, { "epoch": 0.5531568228105906, "grad_norm": 0.6148730927860822, "learning_rate": 4.384446120237375e-06, "loss": 0.2881, "step": 12222 }, { "epoch": 0.5532020819189862, "grad_norm": 0.6017200305214885, "learning_rate": 4.3837187812127335e-06, "loss": 0.2961, "step": 12223 }, { "epoch": 0.5532473410273817, "grad_norm": 0.6199969656357945, "learning_rate": 4.382991455430082e-06, "loss": 0.3047, "step": 12224 }, { "epoch": 0.5532926001357773, "grad_norm": 0.6884288422848379, "learning_rate": 4.38226414290505e-06, "loss": 0.3139, "step": 12225 }, { "epoch": 0.5533378592441729, "grad_norm": 0.574458671702428, "learning_rate": 4.381536843653262e-06, "loss": 0.2918, "step": 12226 }, { "epoch": 0.5533831183525685, "grad_norm": 0.6292783706079346, "learning_rate": 4.380809557690349e-06, "loss": 0.3011, "step": 12227 }, { "epoch": 0.5534283774609641, "grad_norm": 0.3549734484788748, "learning_rate": 4.380082285031938e-06, "loss": 0.4775, "step": 12228 }, { "epoch": 0.5534736365693596, "grad_norm": 0.3279537547182673, "learning_rate": 4.379355025693654e-06, "loss": 0.4714, "step": 12229 }, { "epoch": 0.5535188956777551, "grad_norm": 0.6157546861625464, "learning_rate": 4.378627779691123e-06, "loss": 0.309, "step": 12230 }, { "epoch": 0.5535641547861507, "grad_norm": 0.6530262504499489, "learning_rate": 4.377900547039976e-06, "loss": 0.2888, "step": 12231 }, { "epoch": 0.5536094138945463, "grad_norm": 0.6055986541476903, "learning_rate": 4.377173327755832e-06, "loss": 0.3317, "step": 12232 }, { "epoch": 0.5536546730029418, "grad_norm": 0.6338147844885054, "learning_rate": 4.376446121854322e-06, "loss": 0.3026, "step": 12233 }, { "epoch": 0.5536999321113374, "grad_norm": 0.6080275418862028, "learning_rate": 4.3757189293510696e-06, "loss": 0.357, "step": 12234 }, { "epoch": 0.553745191219733, "grad_norm": 0.5928751762361252, "learning_rate": 4.3749917502617e-06, "loss": 0.3525, "step": 12235 }, { "epoch": 0.5537904503281286, "grad_norm": 0.698941683211696, "learning_rate": 4.374264584601837e-06, "loss": 0.3303, "step": 12236 }, { "epoch": 0.553835709436524, "grad_norm": 0.6663961230327661, "learning_rate": 4.3735374323871084e-06, "loss": 0.3464, "step": 12237 }, { "epoch": 0.5538809685449196, "grad_norm": 0.6377326823601144, "learning_rate": 4.372810293633135e-06, "loss": 0.2846, "step": 12238 }, { "epoch": 0.5539262276533152, "grad_norm": 0.5987760173833161, "learning_rate": 4.372083168355543e-06, "loss": 0.2871, "step": 12239 }, { "epoch": 0.5539714867617108, "grad_norm": 0.582142971841127, "learning_rate": 4.371356056569953e-06, "loss": 0.3175, "step": 12240 }, { "epoch": 0.5540167458701064, "grad_norm": 0.6951289154150705, "learning_rate": 4.370628958291993e-06, "loss": 0.2885, "step": 12241 }, { "epoch": 0.5540620049785019, "grad_norm": 0.691357756254213, "learning_rate": 4.369901873537283e-06, "loss": 0.3195, "step": 12242 }, { "epoch": 0.5541072640868975, "grad_norm": 0.6281731913361734, "learning_rate": 4.369174802321447e-06, "loss": 0.3092, "step": 12243 }, { "epoch": 0.5541525231952931, "grad_norm": 0.7427270951811668, "learning_rate": 4.368447744660107e-06, "loss": 0.3712, "step": 12244 }, { "epoch": 0.5541977823036887, "grad_norm": 0.40689852775381286, "learning_rate": 4.367720700568885e-06, "loss": 0.487, "step": 12245 }, { "epoch": 0.5542430414120841, "grad_norm": 0.7117771895581163, "learning_rate": 4.366993670063402e-06, "loss": 0.2849, "step": 12246 }, { "epoch": 0.5542883005204797, "grad_norm": 0.6206158885079222, "learning_rate": 4.366266653159283e-06, "loss": 0.2953, "step": 12247 }, { "epoch": 0.5543335596288753, "grad_norm": 0.6255779764365972, "learning_rate": 4.365539649872146e-06, "loss": 0.3379, "step": 12248 }, { "epoch": 0.5543788187372709, "grad_norm": 0.6808302225014655, "learning_rate": 4.364812660217614e-06, "loss": 0.3351, "step": 12249 }, { "epoch": 0.5544240778456664, "grad_norm": 0.6666912810231732, "learning_rate": 4.364085684211307e-06, "loss": 0.3615, "step": 12250 }, { "epoch": 0.554469336954062, "grad_norm": 0.6315922128061487, "learning_rate": 4.363358721868844e-06, "loss": 0.3224, "step": 12251 }, { "epoch": 0.5545145960624576, "grad_norm": 0.6674302959256082, "learning_rate": 4.362631773205848e-06, "loss": 0.354, "step": 12252 }, { "epoch": 0.5545598551708532, "grad_norm": 0.6301333820021424, "learning_rate": 4.361904838237938e-06, "loss": 0.3141, "step": 12253 }, { "epoch": 0.5546051142792487, "grad_norm": 0.6182147024876224, "learning_rate": 4.3611779169807335e-06, "loss": 0.3001, "step": 12254 }, { "epoch": 0.5546503733876442, "grad_norm": 0.3287120848947762, "learning_rate": 4.360451009449852e-06, "loss": 0.5037, "step": 12255 }, { "epoch": 0.5546956324960398, "grad_norm": 0.5805587844522027, "learning_rate": 4.359724115660915e-06, "loss": 0.3193, "step": 12256 }, { "epoch": 0.5547408916044354, "grad_norm": 0.5753460065480046, "learning_rate": 4.3589972356295415e-06, "loss": 0.2737, "step": 12257 }, { "epoch": 0.554786150712831, "grad_norm": 0.27834192808116914, "learning_rate": 4.3582703693713475e-06, "loss": 0.4724, "step": 12258 }, { "epoch": 0.5548314098212265, "grad_norm": 0.5975988206476961, "learning_rate": 4.357543516901951e-06, "loss": 0.3138, "step": 12259 }, { "epoch": 0.5548766689296221, "grad_norm": 0.66133209006719, "learning_rate": 4.356816678236975e-06, "loss": 0.3372, "step": 12260 }, { "epoch": 0.5549219280380177, "grad_norm": 0.5894943538014618, "learning_rate": 4.35608985339203e-06, "loss": 0.3399, "step": 12261 }, { "epoch": 0.5549671871464132, "grad_norm": 0.5781218574299265, "learning_rate": 4.355363042382737e-06, "loss": 0.3074, "step": 12262 }, { "epoch": 0.5550124462548088, "grad_norm": 0.6363791850426851, "learning_rate": 4.3546362452247135e-06, "loss": 0.3386, "step": 12263 }, { "epoch": 0.5550577053632043, "grad_norm": 0.6129669070303816, "learning_rate": 4.3539094619335746e-06, "loss": 0.3461, "step": 12264 }, { "epoch": 0.5551029644715999, "grad_norm": 0.5914749739899136, "learning_rate": 4.3531826925249355e-06, "loss": 0.2904, "step": 12265 }, { "epoch": 0.5551482235799955, "grad_norm": 0.6209594996658195, "learning_rate": 4.352455937014414e-06, "loss": 0.3281, "step": 12266 }, { "epoch": 0.5551934826883911, "grad_norm": 0.6419748858543516, "learning_rate": 4.351729195417627e-06, "loss": 0.3305, "step": 12267 }, { "epoch": 0.5552387417967866, "grad_norm": 0.6433578417970277, "learning_rate": 4.351002467750189e-06, "loss": 0.3338, "step": 12268 }, { "epoch": 0.5552840009051822, "grad_norm": 0.6542378892446439, "learning_rate": 4.350275754027713e-06, "loss": 0.3219, "step": 12269 }, { "epoch": 0.5553292600135777, "grad_norm": 0.6360447076266809, "learning_rate": 4.349549054265817e-06, "loss": 0.3169, "step": 12270 }, { "epoch": 0.5553745191219733, "grad_norm": 0.5615509706467567, "learning_rate": 4.348822368480113e-06, "loss": 0.3268, "step": 12271 }, { "epoch": 0.5554197782303688, "grad_norm": 0.3815671050635444, "learning_rate": 4.348095696686217e-06, "loss": 0.4908, "step": 12272 }, { "epoch": 0.5554650373387644, "grad_norm": 0.6375360091018338, "learning_rate": 4.347369038899744e-06, "loss": 0.2881, "step": 12273 }, { "epoch": 0.55551029644716, "grad_norm": 0.5722597760505286, "learning_rate": 4.346642395136303e-06, "loss": 0.31, "step": 12274 }, { "epoch": 0.5555555555555556, "grad_norm": 0.633575672379051, "learning_rate": 4.345915765411511e-06, "loss": 0.337, "step": 12275 }, { "epoch": 0.5556008146639512, "grad_norm": 0.6112490004503777, "learning_rate": 4.345189149740982e-06, "loss": 0.3364, "step": 12276 }, { "epoch": 0.5556460737723466, "grad_norm": 0.5802223407166853, "learning_rate": 4.344462548140325e-06, "loss": 0.2906, "step": 12277 }, { "epoch": 0.5556913328807422, "grad_norm": 0.5860357053241917, "learning_rate": 4.343735960625156e-06, "loss": 0.3308, "step": 12278 }, { "epoch": 0.5557365919891378, "grad_norm": 0.3729925473292314, "learning_rate": 4.343009387211086e-06, "loss": 0.4983, "step": 12279 }, { "epoch": 0.5557818510975334, "grad_norm": 0.5933383937464417, "learning_rate": 4.3422828279137245e-06, "loss": 0.3084, "step": 12280 }, { "epoch": 0.5558271102059289, "grad_norm": 0.8911220942079052, "learning_rate": 4.341556282748685e-06, "loss": 0.349, "step": 12281 }, { "epoch": 0.5558723693143245, "grad_norm": 0.6579449341515171, "learning_rate": 4.34082975173158e-06, "loss": 0.289, "step": 12282 }, { "epoch": 0.5559176284227201, "grad_norm": 0.64297167758931, "learning_rate": 4.34010323487802e-06, "loss": 0.3467, "step": 12283 }, { "epoch": 0.5559628875311157, "grad_norm": 0.6358433470685065, "learning_rate": 4.3393767322036125e-06, "loss": 0.3273, "step": 12284 }, { "epoch": 0.5560081466395111, "grad_norm": 0.5989624689504114, "learning_rate": 4.338650243723971e-06, "loss": 0.2801, "step": 12285 }, { "epoch": 0.5560534057479067, "grad_norm": 0.6361718365706649, "learning_rate": 4.337923769454706e-06, "loss": 0.383, "step": 12286 }, { "epoch": 0.5560986648563023, "grad_norm": 0.5717929766259564, "learning_rate": 4.337197309411424e-06, "loss": 0.3281, "step": 12287 }, { "epoch": 0.5561439239646979, "grad_norm": 0.31954508426963363, "learning_rate": 4.336470863609736e-06, "loss": 0.4839, "step": 12288 }, { "epoch": 0.5561891830730935, "grad_norm": 0.7010522726012361, "learning_rate": 4.335744432065254e-06, "loss": 0.3484, "step": 12289 }, { "epoch": 0.556234442181489, "grad_norm": 0.6413260103451215, "learning_rate": 4.33501801479358e-06, "loss": 0.3233, "step": 12290 }, { "epoch": 0.5562797012898846, "grad_norm": 0.6260547273063286, "learning_rate": 4.334291611810329e-06, "loss": 0.26, "step": 12291 }, { "epoch": 0.5563249603982802, "grad_norm": 0.5989004568803357, "learning_rate": 4.333565223131107e-06, "loss": 0.3036, "step": 12292 }, { "epoch": 0.5563702195066758, "grad_norm": 0.6397236265829002, "learning_rate": 4.332838848771521e-06, "loss": 0.2733, "step": 12293 }, { "epoch": 0.5564154786150712, "grad_norm": 0.6092452917225999, "learning_rate": 4.332112488747178e-06, "loss": 0.3241, "step": 12294 }, { "epoch": 0.5564607377234668, "grad_norm": 0.6211793412226144, "learning_rate": 4.331386143073687e-06, "loss": 0.3309, "step": 12295 }, { "epoch": 0.5565059968318624, "grad_norm": 0.2849571156764159, "learning_rate": 4.330659811766655e-06, "loss": 0.4651, "step": 12296 }, { "epoch": 0.556551255940258, "grad_norm": 0.6398917116737811, "learning_rate": 4.329933494841689e-06, "loss": 0.3299, "step": 12297 }, { "epoch": 0.5565965150486536, "grad_norm": 0.6752901353544469, "learning_rate": 4.3292071923143905e-06, "loss": 0.3257, "step": 12298 }, { "epoch": 0.5566417741570491, "grad_norm": 0.7044712818301637, "learning_rate": 4.328480904200373e-06, "loss": 0.3306, "step": 12299 }, { "epoch": 0.5566870332654447, "grad_norm": 0.6004103904041961, "learning_rate": 4.327754630515236e-06, "loss": 0.3237, "step": 12300 }, { "epoch": 0.5567322923738403, "grad_norm": 0.6051455344287193, "learning_rate": 4.3270283712745885e-06, "loss": 0.2621, "step": 12301 }, { "epoch": 0.5567775514822358, "grad_norm": 0.6325282147607654, "learning_rate": 4.326302126494035e-06, "loss": 0.3687, "step": 12302 }, { "epoch": 0.5568228105906313, "grad_norm": 0.2778695822780776, "learning_rate": 4.325575896189178e-06, "loss": 0.4804, "step": 12303 }, { "epoch": 0.5568680696990269, "grad_norm": 0.6901227211736138, "learning_rate": 4.324849680375625e-06, "loss": 0.3322, "step": 12304 }, { "epoch": 0.5569133288074225, "grad_norm": 0.6373310885407559, "learning_rate": 4.324123479068979e-06, "loss": 0.3157, "step": 12305 }, { "epoch": 0.5569585879158181, "grad_norm": 0.626480672047507, "learning_rate": 4.3233972922848435e-06, "loss": 0.3418, "step": 12306 }, { "epoch": 0.5570038470242136, "grad_norm": 0.6081506214931938, "learning_rate": 4.32267112003882e-06, "loss": 0.3671, "step": 12307 }, { "epoch": 0.5570491061326092, "grad_norm": 0.6349245263217774, "learning_rate": 4.321944962346517e-06, "loss": 0.3599, "step": 12308 }, { "epoch": 0.5570943652410048, "grad_norm": 0.6028597309271571, "learning_rate": 4.321218819223533e-06, "loss": 0.279, "step": 12309 }, { "epoch": 0.5571396243494003, "grad_norm": 0.6558489076897699, "learning_rate": 4.320492690685471e-06, "loss": 0.347, "step": 12310 }, { "epoch": 0.5571848834577959, "grad_norm": 0.631884906983182, "learning_rate": 4.319766576747934e-06, "loss": 0.29, "step": 12311 }, { "epoch": 0.5572301425661914, "grad_norm": 0.5963251037440932, "learning_rate": 4.319040477426527e-06, "loss": 0.2948, "step": 12312 }, { "epoch": 0.557275401674587, "grad_norm": 0.3178597251797919, "learning_rate": 4.318314392736845e-06, "loss": 0.4583, "step": 12313 }, { "epoch": 0.5573206607829826, "grad_norm": 0.5772619349251756, "learning_rate": 4.317588322694495e-06, "loss": 0.3433, "step": 12314 }, { "epoch": 0.5573659198913782, "grad_norm": 0.29671798934162635, "learning_rate": 4.3168622673150765e-06, "loss": 0.4762, "step": 12315 }, { "epoch": 0.5574111789997737, "grad_norm": 0.7101392642545054, "learning_rate": 4.3161362266141895e-06, "loss": 0.3671, "step": 12316 }, { "epoch": 0.5574564381081692, "grad_norm": 0.5306534955505768, "learning_rate": 4.315410200607433e-06, "loss": 0.2922, "step": 12317 }, { "epoch": 0.5575016972165648, "grad_norm": 0.6101963309651014, "learning_rate": 4.314684189310412e-06, "loss": 0.3151, "step": 12318 }, { "epoch": 0.5575469563249604, "grad_norm": 0.6203140579627333, "learning_rate": 4.31395819273872e-06, "loss": 0.3394, "step": 12319 }, { "epoch": 0.5575922154333559, "grad_norm": 0.6078518079041008, "learning_rate": 4.313232210907959e-06, "loss": 0.3391, "step": 12320 }, { "epoch": 0.5576374745417515, "grad_norm": 0.3124391598086161, "learning_rate": 4.312506243833732e-06, "loss": 0.4811, "step": 12321 }, { "epoch": 0.5576827336501471, "grad_norm": 0.31236298450559097, "learning_rate": 4.311780291531632e-06, "loss": 0.4722, "step": 12322 }, { "epoch": 0.5577279927585427, "grad_norm": 0.2680955973781369, "learning_rate": 4.311054354017259e-06, "loss": 0.4677, "step": 12323 }, { "epoch": 0.5577732518669383, "grad_norm": 0.6576465320357912, "learning_rate": 4.310328431306213e-06, "loss": 0.3133, "step": 12324 }, { "epoch": 0.5578185109753337, "grad_norm": 0.6337519434030606, "learning_rate": 4.309602523414092e-06, "loss": 0.2899, "step": 12325 }, { "epoch": 0.5578637700837293, "grad_norm": 0.6146377689137756, "learning_rate": 4.308876630356491e-06, "loss": 0.3153, "step": 12326 }, { "epoch": 0.5579090291921249, "grad_norm": 0.6836536803399664, "learning_rate": 4.308150752149007e-06, "loss": 0.293, "step": 12327 }, { "epoch": 0.5579542883005205, "grad_norm": 0.6250698035311567, "learning_rate": 4.307424888807242e-06, "loss": 0.3027, "step": 12328 }, { "epoch": 0.557999547408916, "grad_norm": 0.7400318887479171, "learning_rate": 4.306699040346788e-06, "loss": 0.3148, "step": 12329 }, { "epoch": 0.5580448065173116, "grad_norm": 0.5955651260196211, "learning_rate": 4.305973206783241e-06, "loss": 0.2921, "step": 12330 }, { "epoch": 0.5580900656257072, "grad_norm": 0.6247834393219636, "learning_rate": 4.3052473881322e-06, "loss": 0.2934, "step": 12331 }, { "epoch": 0.5581353247341028, "grad_norm": 1.2878975281808585, "learning_rate": 4.304521584409257e-06, "loss": 0.2905, "step": 12332 }, { "epoch": 0.5581805838424984, "grad_norm": 0.6963145804773342, "learning_rate": 4.30379579563001e-06, "loss": 0.3457, "step": 12333 }, { "epoch": 0.5582258429508938, "grad_norm": 0.6439684461235223, "learning_rate": 4.303070021810053e-06, "loss": 0.3477, "step": 12334 }, { "epoch": 0.5582711020592894, "grad_norm": 0.37819610638538015, "learning_rate": 4.3023442629649816e-06, "loss": 0.4662, "step": 12335 }, { "epoch": 0.558316361167685, "grad_norm": 0.6274598277275636, "learning_rate": 4.3016185191103874e-06, "loss": 0.3422, "step": 12336 }, { "epoch": 0.5583616202760806, "grad_norm": 0.6752885359439768, "learning_rate": 4.300892790261867e-06, "loss": 0.3476, "step": 12337 }, { "epoch": 0.5584068793844761, "grad_norm": 0.6359580258984199, "learning_rate": 4.300167076435015e-06, "loss": 0.3122, "step": 12338 }, { "epoch": 0.5584521384928717, "grad_norm": 0.6038278153379546, "learning_rate": 4.2994413776454225e-06, "loss": 0.3011, "step": 12339 }, { "epoch": 0.5584973976012673, "grad_norm": 0.6408495486894391, "learning_rate": 4.298715693908682e-06, "loss": 0.3211, "step": 12340 }, { "epoch": 0.5585426567096629, "grad_norm": 0.6594204681087569, "learning_rate": 4.2979900252403895e-06, "loss": 0.2956, "step": 12341 }, { "epoch": 0.5585879158180583, "grad_norm": 0.8247731950449819, "learning_rate": 4.297264371656133e-06, "loss": 0.2894, "step": 12342 }, { "epoch": 0.5586331749264539, "grad_norm": 0.6941235288007112, "learning_rate": 4.296538733171507e-06, "loss": 0.3281, "step": 12343 }, { "epoch": 0.5586784340348495, "grad_norm": 0.6599637003374355, "learning_rate": 4.295813109802106e-06, "loss": 0.2901, "step": 12344 }, { "epoch": 0.5587236931432451, "grad_norm": 0.29949819318411175, "learning_rate": 4.295087501563516e-06, "loss": 0.4677, "step": 12345 }, { "epoch": 0.5587689522516407, "grad_norm": 0.6169216710706968, "learning_rate": 4.294361908471329e-06, "loss": 0.2863, "step": 12346 }, { "epoch": 0.5588142113600362, "grad_norm": 0.27206602819963843, "learning_rate": 4.293636330541141e-06, "loss": 0.4689, "step": 12347 }, { "epoch": 0.5588594704684318, "grad_norm": 0.7291025964436351, "learning_rate": 4.2929107677885375e-06, "loss": 0.3434, "step": 12348 }, { "epoch": 0.5589047295768274, "grad_norm": 0.5961323828297462, "learning_rate": 4.29218522022911e-06, "loss": 0.3006, "step": 12349 }, { "epoch": 0.5589499886852229, "grad_norm": 0.6081864008471584, "learning_rate": 4.291459687878449e-06, "loss": 0.3232, "step": 12350 }, { "epoch": 0.5589952477936184, "grad_norm": 0.8359323755964424, "learning_rate": 4.29073417075214e-06, "loss": 0.3078, "step": 12351 }, { "epoch": 0.559040506902014, "grad_norm": 0.7135344530168528, "learning_rate": 4.290008668865778e-06, "loss": 0.3308, "step": 12352 }, { "epoch": 0.5590857660104096, "grad_norm": 0.6491425822680523, "learning_rate": 4.289283182234948e-06, "loss": 0.2947, "step": 12353 }, { "epoch": 0.5591310251188052, "grad_norm": 0.6778886453996668, "learning_rate": 4.288557710875242e-06, "loss": 0.3435, "step": 12354 }, { "epoch": 0.5591762842272007, "grad_norm": 0.6056368355836651, "learning_rate": 4.287832254802244e-06, "loss": 0.3024, "step": 12355 }, { "epoch": 0.5592215433355963, "grad_norm": 0.5971191283513166, "learning_rate": 4.287106814031542e-06, "loss": 0.2883, "step": 12356 }, { "epoch": 0.5592668024439919, "grad_norm": 0.6408366890253473, "learning_rate": 4.286381388578728e-06, "loss": 0.3402, "step": 12357 }, { "epoch": 0.5593120615523874, "grad_norm": 0.614472336181477, "learning_rate": 4.285655978459385e-06, "loss": 0.3338, "step": 12358 }, { "epoch": 0.559357320660783, "grad_norm": 0.645675843719086, "learning_rate": 4.2849305836891e-06, "loss": 0.3243, "step": 12359 }, { "epoch": 0.5594025797691785, "grad_norm": 0.6961527371204147, "learning_rate": 4.284205204283463e-06, "loss": 0.3116, "step": 12360 }, { "epoch": 0.5594478388775741, "grad_norm": 0.37595319417062534, "learning_rate": 4.283479840258055e-06, "loss": 0.4632, "step": 12361 }, { "epoch": 0.5594930979859697, "grad_norm": 0.6705977459084392, "learning_rate": 4.2827544916284655e-06, "loss": 0.3192, "step": 12362 }, { "epoch": 0.5595383570943653, "grad_norm": 0.6716578120239428, "learning_rate": 4.2820291584102815e-06, "loss": 0.3134, "step": 12363 }, { "epoch": 0.5595836162027608, "grad_norm": 0.6678896667477587, "learning_rate": 4.281303840619083e-06, "loss": 0.2788, "step": 12364 }, { "epoch": 0.5596288753111563, "grad_norm": 0.6762181244429644, "learning_rate": 4.280578538270458e-06, "loss": 0.3372, "step": 12365 }, { "epoch": 0.5596741344195519, "grad_norm": 0.30584590139664086, "learning_rate": 4.27985325137999e-06, "loss": 0.4595, "step": 12366 }, { "epoch": 0.5597193935279475, "grad_norm": 0.6381422081849868, "learning_rate": 4.279127979963266e-06, "loss": 0.3272, "step": 12367 }, { "epoch": 0.5597646526363431, "grad_norm": 0.7344560788607514, "learning_rate": 4.278402724035868e-06, "loss": 0.3583, "step": 12368 }, { "epoch": 0.5598099117447386, "grad_norm": 0.6260833716254307, "learning_rate": 4.277677483613377e-06, "loss": 0.3134, "step": 12369 }, { "epoch": 0.5598551708531342, "grad_norm": 0.7521739392430035, "learning_rate": 4.276952258711381e-06, "loss": 0.3312, "step": 12370 }, { "epoch": 0.5599004299615298, "grad_norm": 0.599559188266718, "learning_rate": 4.276227049345458e-06, "loss": 0.2971, "step": 12371 }, { "epoch": 0.5599456890699254, "grad_norm": 1.1519106873598228, "learning_rate": 4.2755018555311935e-06, "loss": 0.326, "step": 12372 }, { "epoch": 0.5599909481783208, "grad_norm": 1.1096769841546963, "learning_rate": 4.2747766772841695e-06, "loss": 0.3232, "step": 12373 }, { "epoch": 0.5600362072867164, "grad_norm": 0.628112357109904, "learning_rate": 4.2740515146199675e-06, "loss": 0.3061, "step": 12374 }, { "epoch": 0.560081466395112, "grad_norm": 0.6317378580340778, "learning_rate": 4.273326367554167e-06, "loss": 0.3176, "step": 12375 }, { "epoch": 0.5601267255035076, "grad_norm": 0.6372257992411345, "learning_rate": 4.272601236102353e-06, "loss": 0.3638, "step": 12376 }, { "epoch": 0.5601719846119031, "grad_norm": 0.32587176034152876, "learning_rate": 4.271876120280104e-06, "loss": 0.4667, "step": 12377 }, { "epoch": 0.5602172437202987, "grad_norm": 0.6291645665666001, "learning_rate": 4.2711510201030005e-06, "loss": 0.3214, "step": 12378 }, { "epoch": 0.5602625028286943, "grad_norm": 0.6139529937097798, "learning_rate": 4.270425935586624e-06, "loss": 0.2826, "step": 12379 }, { "epoch": 0.5603077619370899, "grad_norm": 0.7866626209421049, "learning_rate": 4.2697008667465515e-06, "loss": 0.3625, "step": 12380 }, { "epoch": 0.5603530210454855, "grad_norm": 0.6575234379812429, "learning_rate": 4.268975813598366e-06, "loss": 0.3586, "step": 12381 }, { "epoch": 0.5603982801538809, "grad_norm": 0.6294270608967876, "learning_rate": 4.268250776157644e-06, "loss": 0.3114, "step": 12382 }, { "epoch": 0.5604435392622765, "grad_norm": 0.6302772266526723, "learning_rate": 4.267525754439967e-06, "loss": 0.3194, "step": 12383 }, { "epoch": 0.5604887983706721, "grad_norm": 0.6089326567380705, "learning_rate": 4.2668007484609106e-06, "loss": 0.3264, "step": 12384 }, { "epoch": 0.5605340574790677, "grad_norm": 0.5959077220631366, "learning_rate": 4.266075758236055e-06, "loss": 0.2781, "step": 12385 }, { "epoch": 0.5605793165874632, "grad_norm": 0.6175674183428848, "learning_rate": 4.265350783780977e-06, "loss": 0.2962, "step": 12386 }, { "epoch": 0.5606245756958588, "grad_norm": 0.544309122649723, "learning_rate": 4.264625825111255e-06, "loss": 0.2856, "step": 12387 }, { "epoch": 0.5606698348042544, "grad_norm": 0.6570475858920106, "learning_rate": 4.2639008822424644e-06, "loss": 0.2918, "step": 12388 }, { "epoch": 0.56071509391265, "grad_norm": 0.5978688068020497, "learning_rate": 4.2631759551901845e-06, "loss": 0.2824, "step": 12389 }, { "epoch": 0.5607603530210454, "grad_norm": 0.689402321038741, "learning_rate": 4.262451043969988e-06, "loss": 0.3136, "step": 12390 }, { "epoch": 0.560805612129441, "grad_norm": 0.3426201622095635, "learning_rate": 4.2617261485974545e-06, "loss": 0.4657, "step": 12391 }, { "epoch": 0.5608508712378366, "grad_norm": 0.6249086540005816, "learning_rate": 4.261001269088161e-06, "loss": 0.3229, "step": 12392 }, { "epoch": 0.5608961303462322, "grad_norm": 0.6209909771694748, "learning_rate": 4.260276405457678e-06, "loss": 0.2768, "step": 12393 }, { "epoch": 0.5609413894546278, "grad_norm": 0.7335251266441353, "learning_rate": 4.259551557721582e-06, "loss": 0.3645, "step": 12394 }, { "epoch": 0.5609866485630233, "grad_norm": 0.6449609075121965, "learning_rate": 4.25882672589545e-06, "loss": 0.3363, "step": 12395 }, { "epoch": 0.5610319076714189, "grad_norm": 0.9284828798571106, "learning_rate": 4.258101909994857e-06, "loss": 0.2566, "step": 12396 }, { "epoch": 0.5610771667798145, "grad_norm": 0.8104997105300392, "learning_rate": 4.257377110035374e-06, "loss": 0.3242, "step": 12397 }, { "epoch": 0.56112242588821, "grad_norm": 0.5587361776941873, "learning_rate": 4.2566523260325755e-06, "loss": 0.294, "step": 12398 }, { "epoch": 0.5611676849966055, "grad_norm": 0.8574056365982415, "learning_rate": 4.255927558002038e-06, "loss": 0.317, "step": 12399 }, { "epoch": 0.5612129441050011, "grad_norm": 0.6338371685512539, "learning_rate": 4.2552028059593294e-06, "loss": 0.2914, "step": 12400 }, { "epoch": 0.5612582032133967, "grad_norm": 0.6167283818031284, "learning_rate": 4.2544780699200265e-06, "loss": 0.32, "step": 12401 }, { "epoch": 0.5613034623217923, "grad_norm": 0.5741515398590784, "learning_rate": 4.2537533498997005e-06, "loss": 0.3176, "step": 12402 }, { "epoch": 0.5613487214301879, "grad_norm": 0.6307609620005522, "learning_rate": 4.253028645913922e-06, "loss": 0.3117, "step": 12403 }, { "epoch": 0.5613939805385834, "grad_norm": 0.3266962933317707, "learning_rate": 4.252303957978263e-06, "loss": 0.4565, "step": 12404 }, { "epoch": 0.561439239646979, "grad_norm": 0.9995670900758218, "learning_rate": 4.251579286108297e-06, "loss": 0.4909, "step": 12405 }, { "epoch": 0.5614844987553745, "grad_norm": 0.8186606301570003, "learning_rate": 4.250854630319593e-06, "loss": 0.3343, "step": 12406 }, { "epoch": 0.5615297578637701, "grad_norm": 0.6546198346426051, "learning_rate": 4.2501299906277225e-06, "loss": 0.3144, "step": 12407 }, { "epoch": 0.5615750169721656, "grad_norm": 0.661691131812228, "learning_rate": 4.249405367048254e-06, "loss": 0.3132, "step": 12408 }, { "epoch": 0.5616202760805612, "grad_norm": 0.6055607608378684, "learning_rate": 4.248680759596761e-06, "loss": 0.3229, "step": 12409 }, { "epoch": 0.5616655351889568, "grad_norm": 0.33701630670332994, "learning_rate": 4.24795616828881e-06, "loss": 0.4666, "step": 12410 }, { "epoch": 0.5617107942973524, "grad_norm": 0.5820937489912922, "learning_rate": 4.247231593139971e-06, "loss": 0.3406, "step": 12411 }, { "epoch": 0.5617560534057479, "grad_norm": 0.6162890639808882, "learning_rate": 4.246507034165815e-06, "loss": 0.2933, "step": 12412 }, { "epoch": 0.5618013125141434, "grad_norm": 0.3381713375818819, "learning_rate": 4.245782491381905e-06, "loss": 0.4581, "step": 12413 }, { "epoch": 0.561846571622539, "grad_norm": 0.6219759160425906, "learning_rate": 4.245057964803815e-06, "loss": 0.252, "step": 12414 }, { "epoch": 0.5618918307309346, "grad_norm": 0.6374850105878566, "learning_rate": 4.244333454447112e-06, "loss": 0.2949, "step": 12415 }, { "epoch": 0.5619370898393302, "grad_norm": 0.2702414442270348, "learning_rate": 4.243608960327361e-06, "loss": 0.4669, "step": 12416 }, { "epoch": 0.5619823489477257, "grad_norm": 0.2660859641437605, "learning_rate": 4.242884482460129e-06, "loss": 0.4573, "step": 12417 }, { "epoch": 0.5620276080561213, "grad_norm": 0.3057110766984944, "learning_rate": 4.242160020860988e-06, "loss": 0.4678, "step": 12418 }, { "epoch": 0.5620728671645169, "grad_norm": 0.6044778611836339, "learning_rate": 4.241435575545496e-06, "loss": 0.3115, "step": 12419 }, { "epoch": 0.5621181262729125, "grad_norm": 0.6265272349973912, "learning_rate": 4.2407111465292265e-06, "loss": 0.331, "step": 12420 }, { "epoch": 0.562163385381308, "grad_norm": 0.2717995782592673, "learning_rate": 4.239986733827742e-06, "loss": 0.4752, "step": 12421 }, { "epoch": 0.5622086444897035, "grad_norm": 0.586424601557017, "learning_rate": 4.239262337456609e-06, "loss": 0.3366, "step": 12422 }, { "epoch": 0.5622539035980991, "grad_norm": 0.30407532265471443, "learning_rate": 4.238537957431389e-06, "loss": 0.4845, "step": 12423 }, { "epoch": 0.5622991627064947, "grad_norm": 0.28007290194860684, "learning_rate": 4.2378135937676515e-06, "loss": 0.467, "step": 12424 }, { "epoch": 0.5623444218148902, "grad_norm": 0.6812849363923967, "learning_rate": 4.23708924648096e-06, "loss": 0.3326, "step": 12425 }, { "epoch": 0.5623896809232858, "grad_norm": 0.6931167864204915, "learning_rate": 4.236364915586877e-06, "loss": 0.3533, "step": 12426 }, { "epoch": 0.5624349400316814, "grad_norm": 0.616717908059227, "learning_rate": 4.2356406011009654e-06, "loss": 0.3113, "step": 12427 }, { "epoch": 0.562480199140077, "grad_norm": 0.6588738954730928, "learning_rate": 4.234916303038793e-06, "loss": 0.3134, "step": 12428 }, { "epoch": 0.5625254582484726, "grad_norm": 0.6045150831513721, "learning_rate": 4.234192021415916e-06, "loss": 0.3124, "step": 12429 }, { "epoch": 0.562570717356868, "grad_norm": 0.6866858585460003, "learning_rate": 4.233467756247901e-06, "loss": 0.3182, "step": 12430 }, { "epoch": 0.5626159764652636, "grad_norm": 0.7354770339037802, "learning_rate": 4.232743507550311e-06, "loss": 0.3195, "step": 12431 }, { "epoch": 0.5626612355736592, "grad_norm": 0.5548344294691757, "learning_rate": 4.232019275338706e-06, "loss": 0.2722, "step": 12432 }, { "epoch": 0.5627064946820548, "grad_norm": 0.6686252850257488, "learning_rate": 4.231295059628647e-06, "loss": 0.3577, "step": 12433 }, { "epoch": 0.5627517537904503, "grad_norm": 0.6753077640957464, "learning_rate": 4.230570860435698e-06, "loss": 0.3318, "step": 12434 }, { "epoch": 0.5627970128988459, "grad_norm": 0.37310213050581825, "learning_rate": 4.2298466777754175e-06, "loss": 0.4465, "step": 12435 }, { "epoch": 0.5628422720072415, "grad_norm": 0.5783839882048719, "learning_rate": 4.2291225116633665e-06, "loss": 0.3255, "step": 12436 }, { "epoch": 0.562887531115637, "grad_norm": 0.6434521887200615, "learning_rate": 4.228398362115103e-06, "loss": 0.3002, "step": 12437 }, { "epoch": 0.5629327902240326, "grad_norm": 0.3290184769604561, "learning_rate": 4.227674229146193e-06, "loss": 0.4682, "step": 12438 }, { "epoch": 0.5629780493324281, "grad_norm": 0.27603873378163185, "learning_rate": 4.226950112772189e-06, "loss": 0.4763, "step": 12439 }, { "epoch": 0.5630233084408237, "grad_norm": 0.6221420328633019, "learning_rate": 4.226226013008654e-06, "loss": 0.2841, "step": 12440 }, { "epoch": 0.5630685675492193, "grad_norm": 0.5667063179695143, "learning_rate": 4.225501929871146e-06, "loss": 0.2746, "step": 12441 }, { "epoch": 0.5631138266576149, "grad_norm": 0.33589286969250104, "learning_rate": 4.22477786337522e-06, "loss": 0.4623, "step": 12442 }, { "epoch": 0.5631590857660104, "grad_norm": 0.6587862966320408, "learning_rate": 4.224053813536439e-06, "loss": 0.3203, "step": 12443 }, { "epoch": 0.563204344874406, "grad_norm": 0.6754237354060931, "learning_rate": 4.223329780370359e-06, "loss": 0.2977, "step": 12444 }, { "epoch": 0.5632496039828015, "grad_norm": 0.663692464836983, "learning_rate": 4.222605763892535e-06, "loss": 0.3681, "step": 12445 }, { "epoch": 0.5632948630911971, "grad_norm": 0.33810651602626374, "learning_rate": 4.221881764118526e-06, "loss": 0.4469, "step": 12446 }, { "epoch": 0.5633401221995926, "grad_norm": 0.6857029521910759, "learning_rate": 4.22115778106389e-06, "loss": 0.2916, "step": 12447 }, { "epoch": 0.5633853813079882, "grad_norm": 0.6359154186799292, "learning_rate": 4.220433814744179e-06, "loss": 0.3617, "step": 12448 }, { "epoch": 0.5634306404163838, "grad_norm": 0.29578040102195857, "learning_rate": 4.219709865174951e-06, "loss": 0.4948, "step": 12449 }, { "epoch": 0.5634758995247794, "grad_norm": 0.6621635262913025, "learning_rate": 4.218985932371764e-06, "loss": 0.3066, "step": 12450 }, { "epoch": 0.563521158633175, "grad_norm": 0.5902543749544588, "learning_rate": 4.218262016350169e-06, "loss": 0.2919, "step": 12451 }, { "epoch": 0.5635664177415705, "grad_norm": 0.8094348894797204, "learning_rate": 4.21753811712572e-06, "loss": 0.3323, "step": 12452 }, { "epoch": 0.563611676849966, "grad_norm": 0.6149692212115275, "learning_rate": 4.2168142347139765e-06, "loss": 0.3319, "step": 12453 }, { "epoch": 0.5636569359583616, "grad_norm": 0.7002479435493842, "learning_rate": 4.21609036913049e-06, "loss": 0.2577, "step": 12454 }, { "epoch": 0.5637021950667572, "grad_norm": 0.6264439152159017, "learning_rate": 4.2153665203908125e-06, "loss": 0.2823, "step": 12455 }, { "epoch": 0.5637474541751527, "grad_norm": 0.6442835838787396, "learning_rate": 4.214642688510498e-06, "loss": 0.3785, "step": 12456 }, { "epoch": 0.5637927132835483, "grad_norm": 0.5754049656613867, "learning_rate": 4.213918873505103e-06, "loss": 0.3356, "step": 12457 }, { "epoch": 0.5638379723919439, "grad_norm": 0.6206389488426615, "learning_rate": 4.213195075390175e-06, "loss": 0.2787, "step": 12458 }, { "epoch": 0.5638832315003395, "grad_norm": 0.6025820884307962, "learning_rate": 4.212471294181269e-06, "loss": 0.3161, "step": 12459 }, { "epoch": 0.563928490608735, "grad_norm": 0.6599164073600439, "learning_rate": 4.211747529893936e-06, "loss": 0.3361, "step": 12460 }, { "epoch": 0.5639737497171305, "grad_norm": 0.4259348750726592, "learning_rate": 4.2110237825437275e-06, "loss": 0.4476, "step": 12461 }, { "epoch": 0.5640190088255261, "grad_norm": 0.6265420256719098, "learning_rate": 4.210300052146194e-06, "loss": 0.3217, "step": 12462 }, { "epoch": 0.5640642679339217, "grad_norm": 0.6012447982413059, "learning_rate": 4.2095763387168895e-06, "loss": 0.3401, "step": 12463 }, { "epoch": 0.5641095270423173, "grad_norm": 0.6426222003181654, "learning_rate": 4.208852642271359e-06, "loss": 0.2803, "step": 12464 }, { "epoch": 0.5641547861507128, "grad_norm": 2.3473227350320647, "learning_rate": 4.208128962825157e-06, "loss": 0.2666, "step": 12465 }, { "epoch": 0.5642000452591084, "grad_norm": 0.6419177385904847, "learning_rate": 4.2074053003938296e-06, "loss": 0.3239, "step": 12466 }, { "epoch": 0.564245304367504, "grad_norm": 0.640389380164466, "learning_rate": 4.2066816549929315e-06, "loss": 0.2964, "step": 12467 }, { "epoch": 0.5642905634758996, "grad_norm": 0.5871885642771933, "learning_rate": 4.205958026638006e-06, "loss": 0.3068, "step": 12468 }, { "epoch": 0.564335822584295, "grad_norm": 0.36427456424647253, "learning_rate": 4.2052344153446035e-06, "loss": 0.4818, "step": 12469 }, { "epoch": 0.5643810816926906, "grad_norm": 0.3353419505539499, "learning_rate": 4.204510821128274e-06, "loss": 0.5034, "step": 12470 }, { "epoch": 0.5644263408010862, "grad_norm": 0.629458993351915, "learning_rate": 4.2037872440045615e-06, "loss": 0.2994, "step": 12471 }, { "epoch": 0.5644715999094818, "grad_norm": 0.7561757437539882, "learning_rate": 4.203063683989017e-06, "loss": 0.3031, "step": 12472 }, { "epoch": 0.5645168590178773, "grad_norm": 0.6085330300726697, "learning_rate": 4.202340141097188e-06, "loss": 0.2994, "step": 12473 }, { "epoch": 0.5645621181262729, "grad_norm": 0.5898340763903122, "learning_rate": 4.2016166153446174e-06, "loss": 0.3275, "step": 12474 }, { "epoch": 0.5646073772346685, "grad_norm": 0.3621647513055581, "learning_rate": 4.200893106746853e-06, "loss": 0.5033, "step": 12475 }, { "epoch": 0.5646526363430641, "grad_norm": 0.6416906160818326, "learning_rate": 4.2001696153194445e-06, "loss": 0.3175, "step": 12476 }, { "epoch": 0.5646978954514597, "grad_norm": 0.6275430528310015, "learning_rate": 4.199446141077932e-06, "loss": 0.3422, "step": 12477 }, { "epoch": 0.5647431545598551, "grad_norm": 0.66606714845945, "learning_rate": 4.198722684037864e-06, "loss": 0.2949, "step": 12478 }, { "epoch": 0.5647884136682507, "grad_norm": 0.6124342570104664, "learning_rate": 4.197999244214783e-06, "loss": 0.3036, "step": 12479 }, { "epoch": 0.5648336727766463, "grad_norm": 0.8367497963384407, "learning_rate": 4.197275821624239e-06, "loss": 0.3299, "step": 12480 }, { "epoch": 0.5648789318850419, "grad_norm": 0.3321969996370139, "learning_rate": 4.196552416281768e-06, "loss": 0.482, "step": 12481 }, { "epoch": 0.5649241909934374, "grad_norm": 0.5723278264837929, "learning_rate": 4.19582902820292e-06, "loss": 0.3089, "step": 12482 }, { "epoch": 0.564969450101833, "grad_norm": 0.6631719342411454, "learning_rate": 4.195105657403236e-06, "loss": 0.3272, "step": 12483 }, { "epoch": 0.5650147092102286, "grad_norm": 0.2974743436661958, "learning_rate": 4.19438230389826e-06, "loss": 0.4605, "step": 12484 }, { "epoch": 0.5650599683186242, "grad_norm": 2.4532523574610137, "learning_rate": 4.193658967703532e-06, "loss": 0.2911, "step": 12485 }, { "epoch": 0.5651052274270197, "grad_norm": 0.6637453590331593, "learning_rate": 4.192935648834599e-06, "loss": 0.3498, "step": 12486 }, { "epoch": 0.5651504865354152, "grad_norm": 0.2940291784258796, "learning_rate": 4.192212347306999e-06, "loss": 0.4973, "step": 12487 }, { "epoch": 0.5651957456438108, "grad_norm": 0.5755812778287472, "learning_rate": 4.191489063136274e-06, "loss": 0.3016, "step": 12488 }, { "epoch": 0.5652410047522064, "grad_norm": 0.6427057193532089, "learning_rate": 4.190765796337968e-06, "loss": 0.3179, "step": 12489 }, { "epoch": 0.565286263860602, "grad_norm": 0.6591412378938449, "learning_rate": 4.190042546927618e-06, "loss": 0.3303, "step": 12490 }, { "epoch": 0.5653315229689975, "grad_norm": 0.32428203390183824, "learning_rate": 4.189319314920766e-06, "loss": 0.4698, "step": 12491 }, { "epoch": 0.5653767820773931, "grad_norm": 0.6346047768285362, "learning_rate": 4.188596100332953e-06, "loss": 0.3052, "step": 12492 }, { "epoch": 0.5654220411857886, "grad_norm": 0.5467035608516998, "learning_rate": 4.1878729031797165e-06, "loss": 0.4703, "step": 12493 }, { "epoch": 0.5654673002941842, "grad_norm": 0.6546999103912576, "learning_rate": 4.187149723476597e-06, "loss": 0.3133, "step": 12494 }, { "epoch": 0.5655125594025797, "grad_norm": 0.6745645431237851, "learning_rate": 4.186426561239134e-06, "loss": 0.3387, "step": 12495 }, { "epoch": 0.5655578185109753, "grad_norm": 0.6720264368361668, "learning_rate": 4.185703416482867e-06, "loss": 0.2982, "step": 12496 }, { "epoch": 0.5656030776193709, "grad_norm": 0.5966460736841118, "learning_rate": 4.184980289223331e-06, "loss": 0.3212, "step": 12497 }, { "epoch": 0.5656483367277665, "grad_norm": 0.5941486030441155, "learning_rate": 4.184257179476065e-06, "loss": 0.4905, "step": 12498 }, { "epoch": 0.5656935958361621, "grad_norm": 0.6087554097990314, "learning_rate": 4.183534087256609e-06, "loss": 0.283, "step": 12499 }, { "epoch": 0.5657388549445576, "grad_norm": 0.6923120727230525, "learning_rate": 4.182811012580495e-06, "loss": 0.3618, "step": 12500 }, { "epoch": 0.5657841140529531, "grad_norm": 0.6157486806061441, "learning_rate": 4.182087955463264e-06, "loss": 0.3, "step": 12501 }, { "epoch": 0.5658293731613487, "grad_norm": 0.5838262062234353, "learning_rate": 4.181364915920453e-06, "loss": 0.316, "step": 12502 }, { "epoch": 0.5658746322697443, "grad_norm": 0.7193124084515455, "learning_rate": 4.180641893967593e-06, "loss": 0.3127, "step": 12503 }, { "epoch": 0.5659198913781398, "grad_norm": 0.655315477496645, "learning_rate": 4.179918889620221e-06, "loss": 0.3122, "step": 12504 }, { "epoch": 0.5659651504865354, "grad_norm": 0.645451805585538, "learning_rate": 4.179195902893878e-06, "loss": 0.3616, "step": 12505 }, { "epoch": 0.566010409594931, "grad_norm": 0.6778970618048709, "learning_rate": 4.17847293380409e-06, "loss": 0.2893, "step": 12506 }, { "epoch": 0.5660556687033266, "grad_norm": 0.5948215804243057, "learning_rate": 4.177749982366397e-06, "loss": 0.3205, "step": 12507 }, { "epoch": 0.566100927811722, "grad_norm": 0.6467076040360541, "learning_rate": 4.17702704859633e-06, "loss": 0.3061, "step": 12508 }, { "epoch": 0.5661461869201176, "grad_norm": 0.5704107704943451, "learning_rate": 4.176304132509428e-06, "loss": 0.3304, "step": 12509 }, { "epoch": 0.5661914460285132, "grad_norm": 0.6390073058823326, "learning_rate": 4.175581234121216e-06, "loss": 0.3336, "step": 12510 }, { "epoch": 0.5662367051369088, "grad_norm": 0.6960136454544098, "learning_rate": 4.174858353447234e-06, "loss": 0.311, "step": 12511 }, { "epoch": 0.5662819642453044, "grad_norm": 0.6104350462181477, "learning_rate": 4.1741354905030115e-06, "loss": 0.3133, "step": 12512 }, { "epoch": 0.5663272233536999, "grad_norm": 0.2917322371481019, "learning_rate": 4.17341264530408e-06, "loss": 0.4926, "step": 12513 }, { "epoch": 0.5663724824620955, "grad_norm": 0.7144016324699435, "learning_rate": 4.1726898178659714e-06, "loss": 0.3293, "step": 12514 }, { "epoch": 0.5664177415704911, "grad_norm": 0.7909838933014116, "learning_rate": 4.1719670082042194e-06, "loss": 0.2988, "step": 12515 }, { "epoch": 0.5664630006788867, "grad_norm": 0.5857732475977157, "learning_rate": 4.171244216334353e-06, "loss": 0.2753, "step": 12516 }, { "epoch": 0.5665082597872821, "grad_norm": 0.7735522212373134, "learning_rate": 4.1705214422719024e-06, "loss": 0.2952, "step": 12517 }, { "epoch": 0.5665535188956777, "grad_norm": 0.5934958361037524, "learning_rate": 4.1697986860324e-06, "loss": 0.2912, "step": 12518 }, { "epoch": 0.5665987780040733, "grad_norm": 0.5350188760957395, "learning_rate": 4.169075947631371e-06, "loss": 0.2762, "step": 12519 }, { "epoch": 0.5666440371124689, "grad_norm": 0.86593002964982, "learning_rate": 4.1683532270843505e-06, "loss": 0.3131, "step": 12520 }, { "epoch": 0.5666892962208645, "grad_norm": 0.8766304309226985, "learning_rate": 4.1676305244068645e-06, "loss": 0.2886, "step": 12521 }, { "epoch": 0.56673455532926, "grad_norm": 0.6122471541271316, "learning_rate": 4.166907839614442e-06, "loss": 0.2739, "step": 12522 }, { "epoch": 0.5667798144376556, "grad_norm": 0.6411446012105898, "learning_rate": 4.16618517272261e-06, "loss": 0.3459, "step": 12523 }, { "epoch": 0.5668250735460512, "grad_norm": 0.6081462210575242, "learning_rate": 4.165462523746899e-06, "loss": 0.3422, "step": 12524 }, { "epoch": 0.5668703326544468, "grad_norm": 0.5875629432466725, "learning_rate": 4.164739892702836e-06, "loss": 0.2996, "step": 12525 }, { "epoch": 0.5669155917628422, "grad_norm": 0.3287040692234207, "learning_rate": 4.164017279605946e-06, "loss": 0.4936, "step": 12526 }, { "epoch": 0.5669608508712378, "grad_norm": 0.6252197324600116, "learning_rate": 4.163294684471757e-06, "loss": 0.3036, "step": 12527 }, { "epoch": 0.5670061099796334, "grad_norm": 0.301561350521929, "learning_rate": 4.162572107315798e-06, "loss": 0.475, "step": 12528 }, { "epoch": 0.567051369088029, "grad_norm": 0.6671373326093292, "learning_rate": 4.161849548153589e-06, "loss": 0.2777, "step": 12529 }, { "epoch": 0.5670966281964245, "grad_norm": 0.6620647807278043, "learning_rate": 4.161127007000662e-06, "loss": 0.324, "step": 12530 }, { "epoch": 0.5671418873048201, "grad_norm": 0.6390365926440836, "learning_rate": 4.160404483872538e-06, "loss": 0.3413, "step": 12531 }, { "epoch": 0.5671871464132157, "grad_norm": 0.6515037937373498, "learning_rate": 4.159681978784743e-06, "loss": 0.3346, "step": 12532 }, { "epoch": 0.5672324055216112, "grad_norm": 0.70976713212594, "learning_rate": 4.1589594917528006e-06, "loss": 0.2838, "step": 12533 }, { "epoch": 0.5672776646300068, "grad_norm": 0.6151749329986993, "learning_rate": 4.158237022792237e-06, "loss": 0.3601, "step": 12534 }, { "epoch": 0.5673229237384023, "grad_norm": 0.5838365082606324, "learning_rate": 4.157514571918574e-06, "loss": 0.3204, "step": 12535 }, { "epoch": 0.5673681828467979, "grad_norm": 0.6441399562644541, "learning_rate": 4.156792139147336e-06, "loss": 0.3476, "step": 12536 }, { "epoch": 0.5674134419551935, "grad_norm": 0.5794311590688679, "learning_rate": 4.156069724494043e-06, "loss": 0.3006, "step": 12537 }, { "epoch": 0.5674587010635891, "grad_norm": 0.6359896469226998, "learning_rate": 4.155347327974223e-06, "loss": 0.3577, "step": 12538 }, { "epoch": 0.5675039601719846, "grad_norm": 0.9274568004702104, "learning_rate": 4.154624949603391e-06, "loss": 0.3332, "step": 12539 }, { "epoch": 0.5675492192803802, "grad_norm": 0.6045661204640542, "learning_rate": 4.153902589397075e-06, "loss": 0.3121, "step": 12540 }, { "epoch": 0.5675944783887757, "grad_norm": 0.5885564599166756, "learning_rate": 4.153180247370794e-06, "loss": 0.2814, "step": 12541 }, { "epoch": 0.5676397374971713, "grad_norm": 0.6470512546656807, "learning_rate": 4.152457923540068e-06, "loss": 0.3642, "step": 12542 }, { "epoch": 0.5676849966055668, "grad_norm": 0.6405301017985353, "learning_rate": 4.151735617920417e-06, "loss": 0.3252, "step": 12543 }, { "epoch": 0.5677302557139624, "grad_norm": 0.44958018256330395, "learning_rate": 4.151013330527364e-06, "loss": 0.5038, "step": 12544 }, { "epoch": 0.567775514822358, "grad_norm": 0.6490291798847689, "learning_rate": 4.150291061376426e-06, "loss": 0.2978, "step": 12545 }, { "epoch": 0.5678207739307536, "grad_norm": 0.7109462139773439, "learning_rate": 4.149568810483124e-06, "loss": 0.3462, "step": 12546 }, { "epoch": 0.5678660330391492, "grad_norm": 0.6038537660170973, "learning_rate": 4.148846577862977e-06, "loss": 0.3077, "step": 12547 }, { "epoch": 0.5679112921475447, "grad_norm": 0.628410020280818, "learning_rate": 4.148124363531501e-06, "loss": 0.324, "step": 12548 }, { "epoch": 0.5679565512559402, "grad_norm": 0.6446328523395594, "learning_rate": 4.147402167504218e-06, "loss": 0.3144, "step": 12549 }, { "epoch": 0.5680018103643358, "grad_norm": 0.5789304471638873, "learning_rate": 4.146679989796643e-06, "loss": 0.3268, "step": 12550 }, { "epoch": 0.5680470694727314, "grad_norm": 0.2817358866934816, "learning_rate": 4.145957830424294e-06, "loss": 0.486, "step": 12551 }, { "epoch": 0.5680923285811269, "grad_norm": 0.6622630583860681, "learning_rate": 4.145235689402688e-06, "loss": 0.3289, "step": 12552 }, { "epoch": 0.5681375876895225, "grad_norm": 0.3054074301809164, "learning_rate": 4.144513566747342e-06, "loss": 0.4641, "step": 12553 }, { "epoch": 0.5681828467979181, "grad_norm": 0.6051067622157187, "learning_rate": 4.143791462473774e-06, "loss": 0.2911, "step": 12554 }, { "epoch": 0.5682281059063137, "grad_norm": 0.27926213530322874, "learning_rate": 4.143069376597496e-06, "loss": 0.4848, "step": 12555 }, { "epoch": 0.5682733650147093, "grad_norm": 0.6382366524036927, "learning_rate": 4.142347309134024e-06, "loss": 0.3453, "step": 12556 }, { "epoch": 0.5683186241231047, "grad_norm": 0.6275300895206359, "learning_rate": 4.141625260098878e-06, "loss": 0.2998, "step": 12557 }, { "epoch": 0.5683638832315003, "grad_norm": 0.5958461166824709, "learning_rate": 4.140903229507566e-06, "loss": 0.2994, "step": 12558 }, { "epoch": 0.5684091423398959, "grad_norm": 0.6190751677743475, "learning_rate": 4.1401812173756055e-06, "loss": 0.2883, "step": 12559 }, { "epoch": 0.5684544014482915, "grad_norm": 0.7678343018745986, "learning_rate": 4.139459223718511e-06, "loss": 0.2983, "step": 12560 }, { "epoch": 0.568499660556687, "grad_norm": 0.3230824047445227, "learning_rate": 4.138737248551793e-06, "loss": 0.4877, "step": 12561 }, { "epoch": 0.5685449196650826, "grad_norm": 0.6874374038803182, "learning_rate": 4.1380152918909665e-06, "loss": 0.295, "step": 12562 }, { "epoch": 0.5685901787734782, "grad_norm": 0.6884284518616037, "learning_rate": 4.137293353751546e-06, "loss": 0.3186, "step": 12563 }, { "epoch": 0.5686354378818738, "grad_norm": 0.5863243128631148, "learning_rate": 4.13657143414904e-06, "loss": 0.3564, "step": 12564 }, { "epoch": 0.5686806969902692, "grad_norm": 0.627551766252621, "learning_rate": 4.1358495330989625e-06, "loss": 0.294, "step": 12565 }, { "epoch": 0.5687259560986648, "grad_norm": 0.27480827574499894, "learning_rate": 4.1351276506168235e-06, "loss": 0.4816, "step": 12566 }, { "epoch": 0.5687712152070604, "grad_norm": 0.6537997783814505, "learning_rate": 4.134405786718138e-06, "loss": 0.3285, "step": 12567 }, { "epoch": 0.568816474315456, "grad_norm": 0.6606229899697229, "learning_rate": 4.133683941418411e-06, "loss": 0.3106, "step": 12568 }, { "epoch": 0.5688617334238516, "grad_norm": 0.6383340830097654, "learning_rate": 4.132962114733156e-06, "loss": 0.3025, "step": 12569 }, { "epoch": 0.5689069925322471, "grad_norm": 0.6340031462442014, "learning_rate": 4.132240306677883e-06, "loss": 0.2986, "step": 12570 }, { "epoch": 0.5689522516406427, "grad_norm": 0.7355684662490959, "learning_rate": 4.1315185172681e-06, "loss": 0.3112, "step": 12571 }, { "epoch": 0.5689975107490383, "grad_norm": 0.6237544027549345, "learning_rate": 4.130796746519316e-06, "loss": 0.3361, "step": 12572 }, { "epoch": 0.5690427698574338, "grad_norm": 0.628615082490157, "learning_rate": 4.130074994447042e-06, "loss": 0.3462, "step": 12573 }, { "epoch": 0.5690880289658293, "grad_norm": 0.3102732059637665, "learning_rate": 4.129353261066784e-06, "loss": 0.4807, "step": 12574 }, { "epoch": 0.5691332880742249, "grad_norm": 0.5653306933787046, "learning_rate": 4.12863154639405e-06, "loss": 0.287, "step": 12575 }, { "epoch": 0.5691785471826205, "grad_norm": 0.6455550118701715, "learning_rate": 4.127909850444349e-06, "loss": 0.2981, "step": 12576 }, { "epoch": 0.5692238062910161, "grad_norm": 0.6443727050940745, "learning_rate": 4.127188173233185e-06, "loss": 0.3185, "step": 12577 }, { "epoch": 0.5692690653994116, "grad_norm": 0.6621726582166643, "learning_rate": 4.126466514776067e-06, "loss": 0.3418, "step": 12578 }, { "epoch": 0.5693143245078072, "grad_norm": 0.6077376379773018, "learning_rate": 4.125744875088502e-06, "loss": 0.3396, "step": 12579 }, { "epoch": 0.5693595836162028, "grad_norm": 0.28837605872185323, "learning_rate": 4.125023254185995e-06, "loss": 0.4544, "step": 12580 }, { "epoch": 0.5694048427245983, "grad_norm": 0.5750559154445537, "learning_rate": 4.124301652084049e-06, "loss": 0.3333, "step": 12581 }, { "epoch": 0.5694501018329939, "grad_norm": 0.6627604804141155, "learning_rate": 4.123580068798171e-06, "loss": 0.2996, "step": 12582 }, { "epoch": 0.5694953609413894, "grad_norm": 0.7667761592962654, "learning_rate": 4.122858504343868e-06, "loss": 0.3424, "step": 12583 }, { "epoch": 0.569540620049785, "grad_norm": 0.7305511014596607, "learning_rate": 4.1221369587366395e-06, "loss": 0.3573, "step": 12584 }, { "epoch": 0.5695858791581806, "grad_norm": 0.6430158353794494, "learning_rate": 4.121415431991991e-06, "loss": 0.3094, "step": 12585 }, { "epoch": 0.5696311382665762, "grad_norm": 0.7005562455378558, "learning_rate": 4.12069392412543e-06, "loss": 0.3755, "step": 12586 }, { "epoch": 0.5696763973749717, "grad_norm": 0.5827775448153438, "learning_rate": 4.119972435152453e-06, "loss": 0.3079, "step": 12587 }, { "epoch": 0.5697216564833673, "grad_norm": 0.7272577668064656, "learning_rate": 4.119250965088566e-06, "loss": 0.367, "step": 12588 }, { "epoch": 0.5697669155917628, "grad_norm": 0.6866287625304002, "learning_rate": 4.118529513949272e-06, "loss": 0.32, "step": 12589 }, { "epoch": 0.5698121747001584, "grad_norm": 0.6199082633100993, "learning_rate": 4.11780808175007e-06, "loss": 0.3211, "step": 12590 }, { "epoch": 0.569857433808554, "grad_norm": 0.3132223313660452, "learning_rate": 4.1170866685064625e-06, "loss": 0.4791, "step": 12591 }, { "epoch": 0.5699026929169495, "grad_norm": 0.6191028735269731, "learning_rate": 4.116365274233952e-06, "loss": 0.3395, "step": 12592 }, { "epoch": 0.5699479520253451, "grad_norm": 0.3177343220045825, "learning_rate": 4.115643898948039e-06, "loss": 0.4736, "step": 12593 }, { "epoch": 0.5699932111337407, "grad_norm": 0.662381067071977, "learning_rate": 4.114922542664221e-06, "loss": 0.3509, "step": 12594 }, { "epoch": 0.5700384702421363, "grad_norm": 0.6767235362430225, "learning_rate": 4.114201205397998e-06, "loss": 0.3642, "step": 12595 }, { "epoch": 0.5700837293505318, "grad_norm": 0.6522887209274827, "learning_rate": 4.113479887164873e-06, "loss": 0.3442, "step": 12596 }, { "epoch": 0.5701289884589273, "grad_norm": 0.6629478414681544, "learning_rate": 4.112758587980342e-06, "loss": 0.3241, "step": 12597 }, { "epoch": 0.5701742475673229, "grad_norm": 0.5835930500962448, "learning_rate": 4.112037307859903e-06, "loss": 0.3091, "step": 12598 }, { "epoch": 0.5702195066757185, "grad_norm": 0.6259458536357286, "learning_rate": 4.111316046819057e-06, "loss": 0.3226, "step": 12599 }, { "epoch": 0.570264765784114, "grad_norm": 0.6550185899599036, "learning_rate": 4.110594804873297e-06, "loss": 0.2821, "step": 12600 }, { "epoch": 0.5703100248925096, "grad_norm": 0.6371420170637637, "learning_rate": 4.1098735820381244e-06, "loss": 0.3065, "step": 12601 }, { "epoch": 0.5703552840009052, "grad_norm": 0.7330960754239171, "learning_rate": 4.109152378329036e-06, "loss": 0.3473, "step": 12602 }, { "epoch": 0.5704005431093008, "grad_norm": 0.662731391985183, "learning_rate": 4.108431193761525e-06, "loss": 0.3019, "step": 12603 }, { "epoch": 0.5704458022176964, "grad_norm": 0.5955175335506525, "learning_rate": 4.107710028351089e-06, "loss": 0.3158, "step": 12604 }, { "epoch": 0.5704910613260918, "grad_norm": 0.5912044747365863, "learning_rate": 4.106988882113228e-06, "loss": 0.3779, "step": 12605 }, { "epoch": 0.5705363204344874, "grad_norm": 0.7742260903363052, "learning_rate": 4.106267755063429e-06, "loss": 0.3616, "step": 12606 }, { "epoch": 0.570581579542883, "grad_norm": 0.6187984756118377, "learning_rate": 4.105546647217192e-06, "loss": 0.3191, "step": 12607 }, { "epoch": 0.5706268386512786, "grad_norm": 0.6393811844148454, "learning_rate": 4.104825558590011e-06, "loss": 0.2965, "step": 12608 }, { "epoch": 0.5706720977596741, "grad_norm": 0.7597578452839173, "learning_rate": 4.104104489197381e-06, "loss": 0.3399, "step": 12609 }, { "epoch": 0.5707173568680697, "grad_norm": 0.5876493739667094, "learning_rate": 4.1033834390547905e-06, "loss": 0.2856, "step": 12610 }, { "epoch": 0.5707626159764653, "grad_norm": 0.6053375291576475, "learning_rate": 4.102662408177738e-06, "loss": 0.3119, "step": 12611 }, { "epoch": 0.5708078750848609, "grad_norm": 0.6556433596751344, "learning_rate": 4.1019413965817154e-06, "loss": 0.3265, "step": 12612 }, { "epoch": 0.5708531341932563, "grad_norm": 0.4186174147926591, "learning_rate": 4.101220404282213e-06, "loss": 0.4635, "step": 12613 }, { "epoch": 0.5708983933016519, "grad_norm": 0.3760300246390405, "learning_rate": 4.100499431294722e-06, "loss": 0.4867, "step": 12614 }, { "epoch": 0.5709436524100475, "grad_norm": 0.6868071595945288, "learning_rate": 4.099778477634739e-06, "loss": 0.3504, "step": 12615 }, { "epoch": 0.5709889115184431, "grad_norm": 0.6522276286304554, "learning_rate": 4.099057543317749e-06, "loss": 0.2872, "step": 12616 }, { "epoch": 0.5710341706268387, "grad_norm": 0.611826672633969, "learning_rate": 4.098336628359247e-06, "loss": 0.3311, "step": 12617 }, { "epoch": 0.5710794297352342, "grad_norm": 0.5757073653381102, "learning_rate": 4.097615732774722e-06, "loss": 0.3015, "step": 12618 }, { "epoch": 0.5711246888436298, "grad_norm": 0.6543449155667355, "learning_rate": 4.096894856579662e-06, "loss": 0.3205, "step": 12619 }, { "epoch": 0.5711699479520254, "grad_norm": 0.6499685402810728, "learning_rate": 4.096173999789558e-06, "loss": 0.3432, "step": 12620 }, { "epoch": 0.571215207060421, "grad_norm": 0.6168195605009977, "learning_rate": 4.095453162419898e-06, "loss": 0.286, "step": 12621 }, { "epoch": 0.5712604661688164, "grad_norm": 0.6798988113354032, "learning_rate": 4.094732344486174e-06, "loss": 0.2918, "step": 12622 }, { "epoch": 0.571305725277212, "grad_norm": 0.6054918063775061, "learning_rate": 4.0940115460038695e-06, "loss": 0.3076, "step": 12623 }, { "epoch": 0.5713509843856076, "grad_norm": 0.6838615401522805, "learning_rate": 4.093290766988474e-06, "loss": 0.3144, "step": 12624 }, { "epoch": 0.5713962434940032, "grad_norm": 0.6405000541011384, "learning_rate": 4.092570007455477e-06, "loss": 0.3133, "step": 12625 }, { "epoch": 0.5714415026023988, "grad_norm": 0.6336666469050273, "learning_rate": 4.0918492674203634e-06, "loss": 0.3394, "step": 12626 }, { "epoch": 0.5714867617107943, "grad_norm": 0.6549913219931577, "learning_rate": 4.091128546898619e-06, "loss": 0.2751, "step": 12627 }, { "epoch": 0.5715320208191899, "grad_norm": 0.6524068949012286, "learning_rate": 4.090407845905732e-06, "loss": 0.3164, "step": 12628 }, { "epoch": 0.5715772799275854, "grad_norm": 0.6281430874277494, "learning_rate": 4.089687164457184e-06, "loss": 0.3177, "step": 12629 }, { "epoch": 0.571622539035981, "grad_norm": 0.6065271029200794, "learning_rate": 4.088966502568465e-06, "loss": 0.2746, "step": 12630 }, { "epoch": 0.5716677981443765, "grad_norm": 0.5427963923567827, "learning_rate": 4.0882458602550586e-06, "loss": 0.4791, "step": 12631 }, { "epoch": 0.5717130572527721, "grad_norm": 0.6819911887506003, "learning_rate": 4.087525237532447e-06, "loss": 0.3448, "step": 12632 }, { "epoch": 0.5717583163611677, "grad_norm": 0.655494482755223, "learning_rate": 4.086804634416115e-06, "loss": 0.3352, "step": 12633 }, { "epoch": 0.5718035754695633, "grad_norm": 0.6626172355189169, "learning_rate": 4.08608405092155e-06, "loss": 0.3025, "step": 12634 }, { "epoch": 0.5718488345779588, "grad_norm": 0.6266465106621113, "learning_rate": 4.085363487064228e-06, "loss": 0.3052, "step": 12635 }, { "epoch": 0.5718940936863544, "grad_norm": 0.6048553709958095, "learning_rate": 4.084642942859638e-06, "loss": 0.287, "step": 12636 }, { "epoch": 0.5719393527947499, "grad_norm": 0.6391854257326505, "learning_rate": 4.083922418323257e-06, "loss": 0.3331, "step": 12637 }, { "epoch": 0.5719846119031455, "grad_norm": 0.6385617384850278, "learning_rate": 4.083201913470574e-06, "loss": 0.2752, "step": 12638 }, { "epoch": 0.5720298710115411, "grad_norm": 0.6224515114391435, "learning_rate": 4.082481428317063e-06, "loss": 0.2886, "step": 12639 }, { "epoch": 0.5720751301199366, "grad_norm": 0.31548837794382356, "learning_rate": 4.081760962878209e-06, "loss": 0.4714, "step": 12640 }, { "epoch": 0.5721203892283322, "grad_norm": 0.6290456500635456, "learning_rate": 4.081040517169493e-06, "loss": 0.3219, "step": 12641 }, { "epoch": 0.5721656483367278, "grad_norm": 0.6123838167027538, "learning_rate": 4.080320091206392e-06, "loss": 0.3539, "step": 12642 }, { "epoch": 0.5722109074451234, "grad_norm": 0.7026155094801178, "learning_rate": 4.079599685004388e-06, "loss": 0.2924, "step": 12643 }, { "epoch": 0.5722561665535189, "grad_norm": 0.6244310496732987, "learning_rate": 4.078879298578961e-06, "loss": 0.311, "step": 12644 }, { "epoch": 0.5723014256619144, "grad_norm": 0.3315480626927533, "learning_rate": 4.078158931945588e-06, "loss": 0.4704, "step": 12645 }, { "epoch": 0.57234668477031, "grad_norm": 0.28464159746166984, "learning_rate": 4.077438585119748e-06, "loss": 0.485, "step": 12646 }, { "epoch": 0.5723919438787056, "grad_norm": 0.6443006669304278, "learning_rate": 4.076718258116922e-06, "loss": 0.3401, "step": 12647 }, { "epoch": 0.5724372029871011, "grad_norm": 0.3044131293795776, "learning_rate": 4.0759979509525826e-06, "loss": 0.4848, "step": 12648 }, { "epoch": 0.5724824620954967, "grad_norm": 0.630328672719413, "learning_rate": 4.075277663642208e-06, "loss": 0.3524, "step": 12649 }, { "epoch": 0.5725277212038923, "grad_norm": 0.6414834546317076, "learning_rate": 4.074557396201279e-06, "loss": 0.317, "step": 12650 }, { "epoch": 0.5725729803122879, "grad_norm": 0.5825287557417055, "learning_rate": 4.073837148645269e-06, "loss": 0.2713, "step": 12651 }, { "epoch": 0.5726182394206835, "grad_norm": 0.5882568662766171, "learning_rate": 4.073116920989653e-06, "loss": 0.2846, "step": 12652 }, { "epoch": 0.5726634985290789, "grad_norm": 0.3948042084638724, "learning_rate": 4.072396713249907e-06, "loss": 0.4546, "step": 12653 }, { "epoch": 0.5727087576374745, "grad_norm": 0.6078405818525381, "learning_rate": 4.071676525441509e-06, "loss": 0.3553, "step": 12654 }, { "epoch": 0.5727540167458701, "grad_norm": 0.7194445472363838, "learning_rate": 4.07095635757993e-06, "loss": 0.308, "step": 12655 }, { "epoch": 0.5727992758542657, "grad_norm": 0.3168071964095616, "learning_rate": 4.070236209680646e-06, "loss": 0.4775, "step": 12656 }, { "epoch": 0.5728445349626612, "grad_norm": 0.6062929801973241, "learning_rate": 4.069516081759131e-06, "loss": 0.2968, "step": 12657 }, { "epoch": 0.5728897940710568, "grad_norm": 0.6174877429131435, "learning_rate": 4.068795973830856e-06, "loss": 0.3192, "step": 12658 }, { "epoch": 0.5729350531794524, "grad_norm": 0.614924387600183, "learning_rate": 4.068075885911295e-06, "loss": 0.3188, "step": 12659 }, { "epoch": 0.572980312287848, "grad_norm": 0.7104822249231385, "learning_rate": 4.067355818015925e-06, "loss": 0.3344, "step": 12660 }, { "epoch": 0.5730255713962435, "grad_norm": 0.7277396852893528, "learning_rate": 4.0666357701602105e-06, "loss": 0.2727, "step": 12661 }, { "epoch": 0.573070830504639, "grad_norm": 0.5984528852078446, "learning_rate": 4.0659157423596265e-06, "loss": 0.3058, "step": 12662 }, { "epoch": 0.5731160896130346, "grad_norm": 0.5822935757880945, "learning_rate": 4.065195734629646e-06, "loss": 0.3135, "step": 12663 }, { "epoch": 0.5731613487214302, "grad_norm": 0.7260328271628357, "learning_rate": 4.064475746985738e-06, "loss": 0.3307, "step": 12664 }, { "epoch": 0.5732066078298258, "grad_norm": 0.6039575429945476, "learning_rate": 4.063755779443372e-06, "loss": 0.3185, "step": 12665 }, { "epoch": 0.5732518669382213, "grad_norm": 0.6447887050870809, "learning_rate": 4.063035832018018e-06, "loss": 0.3056, "step": 12666 }, { "epoch": 0.5732971260466169, "grad_norm": 0.636991093146034, "learning_rate": 4.06231590472515e-06, "loss": 0.3142, "step": 12667 }, { "epoch": 0.5733423851550125, "grad_norm": 0.6648637483903794, "learning_rate": 4.06159599758023e-06, "loss": 0.3491, "step": 12668 }, { "epoch": 0.573387644263408, "grad_norm": 0.3602955931210812, "learning_rate": 4.060876110598731e-06, "loss": 0.4871, "step": 12669 }, { "epoch": 0.5734329033718035, "grad_norm": 0.5996667931514984, "learning_rate": 4.0601562437961215e-06, "loss": 0.3092, "step": 12670 }, { "epoch": 0.5734781624801991, "grad_norm": 0.591590722739638, "learning_rate": 4.059436397187866e-06, "loss": 0.3432, "step": 12671 }, { "epoch": 0.5735234215885947, "grad_norm": 0.42572110447408806, "learning_rate": 4.0587165707894326e-06, "loss": 0.4563, "step": 12672 }, { "epoch": 0.5735686806969903, "grad_norm": 0.6530519024731599, "learning_rate": 4.0579967646162915e-06, "loss": 0.3394, "step": 12673 }, { "epoch": 0.5736139398053859, "grad_norm": 0.29068368801514916, "learning_rate": 4.057276978683906e-06, "loss": 0.4838, "step": 12674 }, { "epoch": 0.5736591989137814, "grad_norm": 0.6793392061769342, "learning_rate": 4.056557213007743e-06, "loss": 0.2792, "step": 12675 }, { "epoch": 0.573704458022177, "grad_norm": 0.6032629284496717, "learning_rate": 4.055837467603268e-06, "loss": 0.3201, "step": 12676 }, { "epoch": 0.5737497171305725, "grad_norm": 0.6227563593182059, "learning_rate": 4.055117742485944e-06, "loss": 0.3243, "step": 12677 }, { "epoch": 0.5737949762389681, "grad_norm": 0.6140441774350899, "learning_rate": 4.05439803767124e-06, "loss": 0.3283, "step": 12678 }, { "epoch": 0.5738402353473636, "grad_norm": 0.6493695835053662, "learning_rate": 4.053678353174616e-06, "loss": 0.3038, "step": 12679 }, { "epoch": 0.5738854944557592, "grad_norm": 0.674659064650729, "learning_rate": 4.05295868901154e-06, "loss": 0.2927, "step": 12680 }, { "epoch": 0.5739307535641548, "grad_norm": 0.3355983023504748, "learning_rate": 4.052239045197472e-06, "loss": 0.4449, "step": 12681 }, { "epoch": 0.5739760126725504, "grad_norm": 0.6013667689517775, "learning_rate": 4.051519421747876e-06, "loss": 0.2783, "step": 12682 }, { "epoch": 0.5740212717809459, "grad_norm": 0.6710995569903319, "learning_rate": 4.050799818678216e-06, "loss": 0.3296, "step": 12683 }, { "epoch": 0.5740665308893415, "grad_norm": 0.6503624710261977, "learning_rate": 4.050080236003952e-06, "loss": 0.3149, "step": 12684 }, { "epoch": 0.574111789997737, "grad_norm": 0.29002163267180253, "learning_rate": 4.049360673740545e-06, "loss": 0.4615, "step": 12685 }, { "epoch": 0.5741570491061326, "grad_norm": 0.6306557530330095, "learning_rate": 4.04864113190346e-06, "loss": 0.357, "step": 12686 }, { "epoch": 0.5742023082145282, "grad_norm": 0.2837338449412986, "learning_rate": 4.047921610508152e-06, "loss": 0.4879, "step": 12687 }, { "epoch": 0.5742475673229237, "grad_norm": 0.6031911633754032, "learning_rate": 4.047202109570086e-06, "loss": 0.2996, "step": 12688 }, { "epoch": 0.5742928264313193, "grad_norm": 0.7921926294430719, "learning_rate": 4.046482629104722e-06, "loss": 0.3194, "step": 12689 }, { "epoch": 0.5743380855397149, "grad_norm": 0.6333884473496525, "learning_rate": 4.045763169127516e-06, "loss": 0.3046, "step": 12690 }, { "epoch": 0.5743833446481105, "grad_norm": 0.2641626700740687, "learning_rate": 4.045043729653927e-06, "loss": 0.4862, "step": 12691 }, { "epoch": 0.574428603756506, "grad_norm": 0.6023775403806684, "learning_rate": 4.044324310699418e-06, "loss": 0.3217, "step": 12692 }, { "epoch": 0.5744738628649015, "grad_norm": 0.6682577862608416, "learning_rate": 4.043604912279444e-06, "loss": 0.3261, "step": 12693 }, { "epoch": 0.5745191219732971, "grad_norm": 0.6677978891771651, "learning_rate": 4.0428855344094635e-06, "loss": 0.3143, "step": 12694 }, { "epoch": 0.5745643810816927, "grad_norm": 0.5927057380531942, "learning_rate": 4.042166177104932e-06, "loss": 0.3123, "step": 12695 }, { "epoch": 0.5746096401900882, "grad_norm": 0.6226581838813324, "learning_rate": 4.041446840381309e-06, "loss": 0.3114, "step": 12696 }, { "epoch": 0.5746548992984838, "grad_norm": 0.6481231373661983, "learning_rate": 4.040727524254048e-06, "loss": 0.3143, "step": 12697 }, { "epoch": 0.5747001584068794, "grad_norm": 0.6117393568444289, "learning_rate": 4.040008228738607e-06, "loss": 0.3287, "step": 12698 }, { "epoch": 0.574745417515275, "grad_norm": 0.6422235183124816, "learning_rate": 4.039288953850442e-06, "loss": 0.3346, "step": 12699 }, { "epoch": 0.5747906766236706, "grad_norm": 0.6381619210328913, "learning_rate": 4.038569699605005e-06, "loss": 0.2951, "step": 12700 }, { "epoch": 0.574835935732066, "grad_norm": 0.6660502247869946, "learning_rate": 4.037850466017752e-06, "loss": 0.3187, "step": 12701 }, { "epoch": 0.5748811948404616, "grad_norm": 0.6603447028048267, "learning_rate": 4.03713125310414e-06, "loss": 0.3356, "step": 12702 }, { "epoch": 0.5749264539488572, "grad_norm": 0.5892533315864354, "learning_rate": 4.036412060879618e-06, "loss": 0.3152, "step": 12703 }, { "epoch": 0.5749717130572528, "grad_norm": 0.6621800751010352, "learning_rate": 4.035692889359642e-06, "loss": 0.346, "step": 12704 }, { "epoch": 0.5750169721656483, "grad_norm": 0.6373051961133144, "learning_rate": 4.034973738559664e-06, "loss": 0.3314, "step": 12705 }, { "epoch": 0.5750622312740439, "grad_norm": 0.6217914074195996, "learning_rate": 4.034254608495136e-06, "loss": 0.325, "step": 12706 }, { "epoch": 0.5751074903824395, "grad_norm": 0.6660384422033511, "learning_rate": 4.03353549918151e-06, "loss": 0.3181, "step": 12707 }, { "epoch": 0.5751527494908351, "grad_norm": 0.6239159549463238, "learning_rate": 4.032816410634239e-06, "loss": 0.3084, "step": 12708 }, { "epoch": 0.5751980085992306, "grad_norm": 0.37417081939858304, "learning_rate": 4.032097342868774e-06, "loss": 0.4744, "step": 12709 }, { "epoch": 0.5752432677076261, "grad_norm": 0.6254837344720027, "learning_rate": 4.031378295900562e-06, "loss": 0.2774, "step": 12710 }, { "epoch": 0.5752885268160217, "grad_norm": 0.7112272484871678, "learning_rate": 4.030659269745057e-06, "loss": 0.3565, "step": 12711 }, { "epoch": 0.5753337859244173, "grad_norm": 0.5642715316156801, "learning_rate": 4.029940264417708e-06, "loss": 0.3082, "step": 12712 }, { "epoch": 0.5753790450328129, "grad_norm": 0.918628603221368, "learning_rate": 4.0292212799339615e-06, "loss": 0.3061, "step": 12713 }, { "epoch": 0.5754243041412084, "grad_norm": 0.6357959836387956, "learning_rate": 4.028502316309268e-06, "loss": 0.3714, "step": 12714 }, { "epoch": 0.575469563249604, "grad_norm": 0.2789898995520453, "learning_rate": 4.0277833735590785e-06, "loss": 0.4591, "step": 12715 }, { "epoch": 0.5755148223579996, "grad_norm": 0.6074162663412183, "learning_rate": 4.027064451698836e-06, "loss": 0.3302, "step": 12716 }, { "epoch": 0.5755600814663951, "grad_norm": 0.6881224733338933, "learning_rate": 4.026345550743991e-06, "loss": 0.2989, "step": 12717 }, { "epoch": 0.5756053405747906, "grad_norm": 0.6765735227591337, "learning_rate": 4.02562667070999e-06, "loss": 0.3411, "step": 12718 }, { "epoch": 0.5756505996831862, "grad_norm": 0.28841045085364697, "learning_rate": 4.024907811612279e-06, "loss": 0.4767, "step": 12719 }, { "epoch": 0.5756958587915818, "grad_norm": 0.6353278290575984, "learning_rate": 4.024188973466304e-06, "loss": 0.3265, "step": 12720 }, { "epoch": 0.5757411178999774, "grad_norm": 0.26528212661842693, "learning_rate": 4.023470156287511e-06, "loss": 0.4648, "step": 12721 }, { "epoch": 0.575786377008373, "grad_norm": 0.7287600120998405, "learning_rate": 4.022751360091347e-06, "loss": 0.3239, "step": 12722 }, { "epoch": 0.5758316361167685, "grad_norm": 0.7113149968104155, "learning_rate": 4.022032584893253e-06, "loss": 0.308, "step": 12723 }, { "epoch": 0.575876895225164, "grad_norm": 0.581184386254656, "learning_rate": 4.021313830708675e-06, "loss": 0.3078, "step": 12724 }, { "epoch": 0.5759221543335596, "grad_norm": 0.2746754520286916, "learning_rate": 4.0205950975530596e-06, "loss": 0.4655, "step": 12725 }, { "epoch": 0.5759674134419552, "grad_norm": 0.5908541244857715, "learning_rate": 4.019876385441844e-06, "loss": 0.272, "step": 12726 }, { "epoch": 0.5760126725503507, "grad_norm": 0.2643314381459348, "learning_rate": 4.019157694390477e-06, "loss": 0.4895, "step": 12727 }, { "epoch": 0.5760579316587463, "grad_norm": 0.650686614819478, "learning_rate": 4.018439024414399e-06, "loss": 0.3155, "step": 12728 }, { "epoch": 0.5761031907671419, "grad_norm": 0.6030405524006986, "learning_rate": 4.0177203755290496e-06, "loss": 0.3354, "step": 12729 }, { "epoch": 0.5761484498755375, "grad_norm": 0.6695106687422865, "learning_rate": 4.017001747749873e-06, "loss": 0.3462, "step": 12730 }, { "epoch": 0.576193708983933, "grad_norm": 0.5929157274011767, "learning_rate": 4.016283141092311e-06, "loss": 0.2662, "step": 12731 }, { "epoch": 0.5762389680923286, "grad_norm": 0.669110331844809, "learning_rate": 4.015564555571802e-06, "loss": 0.3397, "step": 12732 }, { "epoch": 0.5762842272007241, "grad_norm": 0.6443987887172116, "learning_rate": 4.014845991203787e-06, "loss": 0.3736, "step": 12733 }, { "epoch": 0.5763294863091197, "grad_norm": 0.5731768790903239, "learning_rate": 4.0141274480037065e-06, "loss": 0.3354, "step": 12734 }, { "epoch": 0.5763747454175153, "grad_norm": 0.6561540012298605, "learning_rate": 4.0134089259870005e-06, "loss": 0.3162, "step": 12735 }, { "epoch": 0.5764200045259108, "grad_norm": 0.6221184811404515, "learning_rate": 4.012690425169104e-06, "loss": 0.2947, "step": 12736 }, { "epoch": 0.5764652636343064, "grad_norm": 0.32495705128941155, "learning_rate": 4.011971945565461e-06, "loss": 0.459, "step": 12737 }, { "epoch": 0.576510522742702, "grad_norm": 0.2992483251964859, "learning_rate": 4.011253487191505e-06, "loss": 0.4768, "step": 12738 }, { "epoch": 0.5765557818510976, "grad_norm": 0.2760061214392678, "learning_rate": 4.0105350500626735e-06, "loss": 0.4937, "step": 12739 }, { "epoch": 0.576601040959493, "grad_norm": 0.3164653998547672, "learning_rate": 4.009816634194405e-06, "loss": 0.476, "step": 12740 }, { "epoch": 0.5766463000678886, "grad_norm": 0.6132393217954801, "learning_rate": 4.009098239602139e-06, "loss": 0.2865, "step": 12741 }, { "epoch": 0.5766915591762842, "grad_norm": 0.6912522265863241, "learning_rate": 4.008379866301307e-06, "loss": 0.3452, "step": 12742 }, { "epoch": 0.5767368182846798, "grad_norm": 0.688102334677657, "learning_rate": 4.007661514307344e-06, "loss": 0.3405, "step": 12743 }, { "epoch": 0.5767820773930754, "grad_norm": 0.5962739726272968, "learning_rate": 4.006943183635691e-06, "loss": 0.2986, "step": 12744 }, { "epoch": 0.5768273365014709, "grad_norm": 0.6144464910937142, "learning_rate": 4.006224874301776e-06, "loss": 0.3043, "step": 12745 }, { "epoch": 0.5768725956098665, "grad_norm": 0.34017827111284143, "learning_rate": 4.0055065863210365e-06, "loss": 0.4581, "step": 12746 }, { "epoch": 0.5769178547182621, "grad_norm": 0.6265824965179838, "learning_rate": 4.004788319708908e-06, "loss": 0.3251, "step": 12747 }, { "epoch": 0.5769631138266577, "grad_norm": 0.7435574824893756, "learning_rate": 4.004070074480821e-06, "loss": 0.3459, "step": 12748 }, { "epoch": 0.5770083729350531, "grad_norm": 0.31812262154286036, "learning_rate": 4.003351850652208e-06, "loss": 0.4624, "step": 12749 }, { "epoch": 0.5770536320434487, "grad_norm": 0.7157534177543335, "learning_rate": 4.002633648238504e-06, "loss": 0.3289, "step": 12750 }, { "epoch": 0.5770988911518443, "grad_norm": 0.5694676418437684, "learning_rate": 4.00191546725514e-06, "loss": 0.2835, "step": 12751 }, { "epoch": 0.5771441502602399, "grad_norm": 0.6275407189140688, "learning_rate": 4.001197307717547e-06, "loss": 0.3197, "step": 12752 }, { "epoch": 0.5771894093686354, "grad_norm": 0.6405241813082845, "learning_rate": 4.000479169641155e-06, "loss": 0.2917, "step": 12753 }, { "epoch": 0.577234668477031, "grad_norm": 0.30821742585046996, "learning_rate": 3.999761053041398e-06, "loss": 0.485, "step": 12754 }, { "epoch": 0.5772799275854266, "grad_norm": 0.5630410441773159, "learning_rate": 3.999042957933703e-06, "loss": 0.2894, "step": 12755 }, { "epoch": 0.5773251866938222, "grad_norm": 0.6669897014151894, "learning_rate": 3.9983248843335e-06, "loss": 0.3302, "step": 12756 }, { "epoch": 0.5773704458022177, "grad_norm": 0.7217996992980149, "learning_rate": 3.997606832256221e-06, "loss": 0.3323, "step": 12757 }, { "epoch": 0.5774157049106132, "grad_norm": 0.7234170943206248, "learning_rate": 3.9968888017172905e-06, "loss": 0.3612, "step": 12758 }, { "epoch": 0.5774609640190088, "grad_norm": 0.6280443839476121, "learning_rate": 3.996170792732139e-06, "loss": 0.3308, "step": 12759 }, { "epoch": 0.5775062231274044, "grad_norm": 0.6410420830552802, "learning_rate": 3.995452805316195e-06, "loss": 0.3524, "step": 12760 }, { "epoch": 0.5775514822358, "grad_norm": 0.6175844970036328, "learning_rate": 3.994734839484884e-06, "loss": 0.3039, "step": 12761 }, { "epoch": 0.5775967413441955, "grad_norm": 0.6249105358043234, "learning_rate": 3.994016895253635e-06, "loss": 0.3257, "step": 12762 }, { "epoch": 0.5776420004525911, "grad_norm": 0.6208124682102306, "learning_rate": 3.9932989726378705e-06, "loss": 0.349, "step": 12763 }, { "epoch": 0.5776872595609867, "grad_norm": 0.6246253614348443, "learning_rate": 3.992581071653023e-06, "loss": 0.3263, "step": 12764 }, { "epoch": 0.5777325186693822, "grad_norm": 0.35917458120348167, "learning_rate": 3.991863192314512e-06, "loss": 0.4966, "step": 12765 }, { "epoch": 0.5777777777777777, "grad_norm": 0.5729151681896888, "learning_rate": 3.991145334637765e-06, "loss": 0.3089, "step": 12766 }, { "epoch": 0.5778230368861733, "grad_norm": 0.6572528307452108, "learning_rate": 3.990427498638208e-06, "loss": 0.2974, "step": 12767 }, { "epoch": 0.5778682959945689, "grad_norm": 0.8428472380687402, "learning_rate": 3.98970968433126e-06, "loss": 0.264, "step": 12768 }, { "epoch": 0.5779135551029645, "grad_norm": 0.6736949965479246, "learning_rate": 3.98899189173235e-06, "loss": 0.3117, "step": 12769 }, { "epoch": 0.5779588142113601, "grad_norm": 0.6581459249727647, "learning_rate": 3.988274120856901e-06, "loss": 0.3536, "step": 12770 }, { "epoch": 0.5780040733197556, "grad_norm": 0.28315764861161824, "learning_rate": 3.987556371720331e-06, "loss": 0.482, "step": 12771 }, { "epoch": 0.5780493324281512, "grad_norm": 0.28487596963205375, "learning_rate": 3.986838644338066e-06, "loss": 0.4758, "step": 12772 }, { "epoch": 0.5780945915365467, "grad_norm": 0.2829952282231626, "learning_rate": 3.986120938725529e-06, "loss": 0.4713, "step": 12773 }, { "epoch": 0.5781398506449423, "grad_norm": 0.621195865422692, "learning_rate": 3.9854032548981354e-06, "loss": 0.313, "step": 12774 }, { "epoch": 0.5781851097533378, "grad_norm": 0.611176870304872, "learning_rate": 3.984685592871311e-06, "loss": 0.3479, "step": 12775 }, { "epoch": 0.5782303688617334, "grad_norm": 0.6438316659957177, "learning_rate": 3.983967952660477e-06, "loss": 0.3335, "step": 12776 }, { "epoch": 0.578275627970129, "grad_norm": 0.29361965736641604, "learning_rate": 3.983250334281049e-06, "loss": 0.4994, "step": 12777 }, { "epoch": 0.5783208870785246, "grad_norm": 0.7576573224162063, "learning_rate": 3.982532737748448e-06, "loss": 0.303, "step": 12778 }, { "epoch": 0.5783661461869202, "grad_norm": 0.5893669556988376, "learning_rate": 3.9818151630780945e-06, "loss": 0.2755, "step": 12779 }, { "epoch": 0.5784114052953157, "grad_norm": 0.6434397060345102, "learning_rate": 3.981097610285407e-06, "loss": 0.2798, "step": 12780 }, { "epoch": 0.5784566644037112, "grad_norm": 0.6010668767369041, "learning_rate": 3.980380079385802e-06, "loss": 0.3416, "step": 12781 }, { "epoch": 0.5785019235121068, "grad_norm": 0.6356394138357916, "learning_rate": 3.979662570394696e-06, "loss": 0.3412, "step": 12782 }, { "epoch": 0.5785471826205024, "grad_norm": 0.5965268149709056, "learning_rate": 3.97894508332751e-06, "loss": 0.3152, "step": 12783 }, { "epoch": 0.5785924417288979, "grad_norm": 0.6397490509031601, "learning_rate": 3.978227618199657e-06, "loss": 0.3035, "step": 12784 }, { "epoch": 0.5786377008372935, "grad_norm": 0.7026293236806275, "learning_rate": 3.977510175026555e-06, "loss": 0.3658, "step": 12785 }, { "epoch": 0.5786829599456891, "grad_norm": 0.647604276207456, "learning_rate": 3.976792753823619e-06, "loss": 0.3371, "step": 12786 }, { "epoch": 0.5787282190540847, "grad_norm": 1.547699319518536, "learning_rate": 3.976075354606263e-06, "loss": 0.2776, "step": 12787 }, { "epoch": 0.5787734781624801, "grad_norm": 0.5939460075902443, "learning_rate": 3.975357977389903e-06, "loss": 0.3261, "step": 12788 }, { "epoch": 0.5788187372708757, "grad_norm": 0.6238851371558091, "learning_rate": 3.974640622189955e-06, "loss": 0.2983, "step": 12789 }, { "epoch": 0.5788639963792713, "grad_norm": 0.6823620323587973, "learning_rate": 3.973923289021829e-06, "loss": 0.3346, "step": 12790 }, { "epoch": 0.5789092554876669, "grad_norm": 0.6663588433491106, "learning_rate": 3.97320597790094e-06, "loss": 0.3021, "step": 12791 }, { "epoch": 0.5789545145960625, "grad_norm": 0.7458479916275186, "learning_rate": 3.972488688842701e-06, "loss": 0.3425, "step": 12792 }, { "epoch": 0.578999773704458, "grad_norm": 0.6802867122290038, "learning_rate": 3.971771421862527e-06, "loss": 0.3797, "step": 12793 }, { "epoch": 0.5790450328128536, "grad_norm": 0.6712055248027057, "learning_rate": 3.971054176975825e-06, "loss": 0.3342, "step": 12794 }, { "epoch": 0.5790902919212492, "grad_norm": 0.7196814387097701, "learning_rate": 3.970336954198008e-06, "loss": 0.3158, "step": 12795 }, { "epoch": 0.5791355510296448, "grad_norm": 0.6351901479408457, "learning_rate": 3.969619753544491e-06, "loss": 0.2616, "step": 12796 }, { "epoch": 0.5791808101380402, "grad_norm": 0.655342732680527, "learning_rate": 3.968902575030676e-06, "loss": 0.3328, "step": 12797 }, { "epoch": 0.5792260692464358, "grad_norm": 0.770406743505147, "learning_rate": 3.968185418671981e-06, "loss": 0.3492, "step": 12798 }, { "epoch": 0.5792713283548314, "grad_norm": 0.3541400721543433, "learning_rate": 3.967468284483812e-06, "loss": 0.468, "step": 12799 }, { "epoch": 0.579316587463227, "grad_norm": 0.3190653247968186, "learning_rate": 3.966751172481577e-06, "loss": 0.4574, "step": 12800 }, { "epoch": 0.5793618465716225, "grad_norm": 0.6770977156420795, "learning_rate": 3.966034082680686e-06, "loss": 0.2742, "step": 12801 }, { "epoch": 0.5794071056800181, "grad_norm": 0.28384315591545206, "learning_rate": 3.9653170150965494e-06, "loss": 0.4658, "step": 12802 }, { "epoch": 0.5794523647884137, "grad_norm": 0.6288247634755264, "learning_rate": 3.96459996974457e-06, "loss": 0.3507, "step": 12803 }, { "epoch": 0.5794976238968093, "grad_norm": 0.6554554918173014, "learning_rate": 3.963882946640158e-06, "loss": 0.3339, "step": 12804 }, { "epoch": 0.5795428830052048, "grad_norm": 0.6005424677853866, "learning_rate": 3.963165945798718e-06, "loss": 0.3093, "step": 12805 }, { "epoch": 0.5795881421136003, "grad_norm": 0.6177179456099513, "learning_rate": 3.9624489672356605e-06, "loss": 0.3165, "step": 12806 }, { "epoch": 0.5796334012219959, "grad_norm": 0.623730570206981, "learning_rate": 3.961732010966385e-06, "loss": 0.3249, "step": 12807 }, { "epoch": 0.5796786603303915, "grad_norm": 0.5890721069191882, "learning_rate": 3.961015077006301e-06, "loss": 0.3376, "step": 12808 }, { "epoch": 0.5797239194387871, "grad_norm": 0.5774276192360328, "learning_rate": 3.960298165370814e-06, "loss": 0.2701, "step": 12809 }, { "epoch": 0.5797691785471826, "grad_norm": 0.6945883497703088, "learning_rate": 3.959581276075324e-06, "loss": 0.3298, "step": 12810 }, { "epoch": 0.5798144376555782, "grad_norm": 0.5855370075421497, "learning_rate": 3.958864409135236e-06, "loss": 0.3123, "step": 12811 }, { "epoch": 0.5798596967639738, "grad_norm": 0.6266850936855771, "learning_rate": 3.9581475645659565e-06, "loss": 0.3212, "step": 12812 }, { "epoch": 0.5799049558723693, "grad_norm": 0.6205072222388495, "learning_rate": 3.957430742382885e-06, "loss": 0.2849, "step": 12813 }, { "epoch": 0.5799502149807649, "grad_norm": 0.6942275103026728, "learning_rate": 3.956713942601425e-06, "loss": 0.2991, "step": 12814 }, { "epoch": 0.5799954740891604, "grad_norm": 0.4025010941475067, "learning_rate": 3.955997165236979e-06, "loss": 0.4859, "step": 12815 }, { "epoch": 0.580040733197556, "grad_norm": 0.6681984766651937, "learning_rate": 3.955280410304945e-06, "loss": 0.3484, "step": 12816 }, { "epoch": 0.5800859923059516, "grad_norm": 0.6516308948976135, "learning_rate": 3.954563677820729e-06, "loss": 0.3124, "step": 12817 }, { "epoch": 0.5801312514143472, "grad_norm": 0.31077237574827565, "learning_rate": 3.953846967799728e-06, "loss": 0.4557, "step": 12818 }, { "epoch": 0.5801765105227427, "grad_norm": 0.2932590904326027, "learning_rate": 3.953130280257342e-06, "loss": 0.4849, "step": 12819 }, { "epoch": 0.5802217696311383, "grad_norm": 0.6527721169835451, "learning_rate": 3.95241361520897e-06, "loss": 0.2986, "step": 12820 }, { "epoch": 0.5802670287395338, "grad_norm": 0.6206449598878321, "learning_rate": 3.9516969726700135e-06, "loss": 0.2966, "step": 12821 }, { "epoch": 0.5803122878479294, "grad_norm": 0.6256445807776387, "learning_rate": 3.950980352655871e-06, "loss": 0.3191, "step": 12822 }, { "epoch": 0.5803575469563249, "grad_norm": 0.32108889209316294, "learning_rate": 3.950263755181937e-06, "loss": 0.459, "step": 12823 }, { "epoch": 0.5804028060647205, "grad_norm": 0.6527273268980939, "learning_rate": 3.94954718026361e-06, "loss": 0.3327, "step": 12824 }, { "epoch": 0.5804480651731161, "grad_norm": 0.6044083467629512, "learning_rate": 3.948830627916291e-06, "loss": 0.3324, "step": 12825 }, { "epoch": 0.5804933242815117, "grad_norm": 0.4489799411707293, "learning_rate": 3.94811409815537e-06, "loss": 0.4796, "step": 12826 }, { "epoch": 0.5805385833899073, "grad_norm": 0.6309711058364544, "learning_rate": 3.9473975909962484e-06, "loss": 0.3054, "step": 12827 }, { "epoch": 0.5805838424983027, "grad_norm": 0.33958261127758466, "learning_rate": 3.946681106454319e-06, "loss": 0.4489, "step": 12828 }, { "epoch": 0.5806291016066983, "grad_norm": 0.6527249012356833, "learning_rate": 3.9459646445449785e-06, "loss": 0.318, "step": 12829 }, { "epoch": 0.5806743607150939, "grad_norm": 0.5899912082073602, "learning_rate": 3.945248205283618e-06, "loss": 0.2796, "step": 12830 }, { "epoch": 0.5807196198234895, "grad_norm": 0.6648150974363372, "learning_rate": 3.944531788685637e-06, "loss": 0.3251, "step": 12831 }, { "epoch": 0.580764878931885, "grad_norm": 0.33089472321988717, "learning_rate": 3.943815394766426e-06, "loss": 0.4743, "step": 12832 }, { "epoch": 0.5808101380402806, "grad_norm": 0.5800850848571503, "learning_rate": 3.943099023541377e-06, "loss": 0.2991, "step": 12833 }, { "epoch": 0.5808553971486762, "grad_norm": 0.33315107133424154, "learning_rate": 3.942382675025883e-06, "loss": 0.4845, "step": 12834 }, { "epoch": 0.5809006562570718, "grad_norm": 0.598203060435861, "learning_rate": 3.941666349235341e-06, "loss": 0.3057, "step": 12835 }, { "epoch": 0.5809459153654672, "grad_norm": 0.6589915175799442, "learning_rate": 3.9409500461851355e-06, "loss": 0.3694, "step": 12836 }, { "epoch": 0.5809911744738628, "grad_norm": 0.7464556060783643, "learning_rate": 3.9402337658906615e-06, "loss": 0.3176, "step": 12837 }, { "epoch": 0.5810364335822584, "grad_norm": 0.38661431178489153, "learning_rate": 3.93951750836731e-06, "loss": 0.4881, "step": 12838 }, { "epoch": 0.581081692690654, "grad_norm": 0.6059876360330815, "learning_rate": 3.93880127363047e-06, "loss": 0.3421, "step": 12839 }, { "epoch": 0.5811269517990496, "grad_norm": 0.3021531130840247, "learning_rate": 3.938085061695529e-06, "loss": 0.489, "step": 12840 }, { "epoch": 0.5811722109074451, "grad_norm": 0.27781242396514616, "learning_rate": 3.937368872577882e-06, "loss": 0.4935, "step": 12841 }, { "epoch": 0.5812174700158407, "grad_norm": 0.44269264539934433, "learning_rate": 3.9366527062929126e-06, "loss": 0.4707, "step": 12842 }, { "epoch": 0.5812627291242363, "grad_norm": 0.6714753451841458, "learning_rate": 3.935936562856011e-06, "loss": 0.3245, "step": 12843 }, { "epoch": 0.5813079882326319, "grad_norm": 0.5905751013141842, "learning_rate": 3.935220442282565e-06, "loss": 0.3028, "step": 12844 }, { "epoch": 0.5813532473410273, "grad_norm": 0.5738321032246363, "learning_rate": 3.93450434458796e-06, "loss": 0.3371, "step": 12845 }, { "epoch": 0.5813985064494229, "grad_norm": 0.6601455637263614, "learning_rate": 3.933788269787585e-06, "loss": 0.3246, "step": 12846 }, { "epoch": 0.5814437655578185, "grad_norm": 0.5894179831252712, "learning_rate": 3.9330722178968275e-06, "loss": 0.2964, "step": 12847 }, { "epoch": 0.5814890246662141, "grad_norm": 0.6887398419556391, "learning_rate": 3.932356188931069e-06, "loss": 0.2994, "step": 12848 }, { "epoch": 0.5815342837746097, "grad_norm": 0.5871548930199265, "learning_rate": 3.931640182905696e-06, "loss": 0.3156, "step": 12849 }, { "epoch": 0.5815795428830052, "grad_norm": 0.37197516532945846, "learning_rate": 3.930924199836096e-06, "loss": 0.4892, "step": 12850 }, { "epoch": 0.5816248019914008, "grad_norm": 0.572034524958866, "learning_rate": 3.930208239737651e-06, "loss": 0.3032, "step": 12851 }, { "epoch": 0.5816700610997964, "grad_norm": 1.2407798640954382, "learning_rate": 3.929492302625746e-06, "loss": 0.285, "step": 12852 }, { "epoch": 0.5817153202081919, "grad_norm": 0.6219601274131884, "learning_rate": 3.9287763885157625e-06, "loss": 0.3773, "step": 12853 }, { "epoch": 0.5817605793165874, "grad_norm": 0.3062327908374255, "learning_rate": 3.928060497423087e-06, "loss": 0.5083, "step": 12854 }, { "epoch": 0.581805838424983, "grad_norm": 0.6209752052130844, "learning_rate": 3.9273446293630956e-06, "loss": 0.3089, "step": 12855 }, { "epoch": 0.5818510975333786, "grad_norm": 0.6137163222857616, "learning_rate": 3.926628784351175e-06, "loss": 0.3641, "step": 12856 }, { "epoch": 0.5818963566417742, "grad_norm": 0.27066161094848945, "learning_rate": 3.925912962402707e-06, "loss": 0.4472, "step": 12857 }, { "epoch": 0.5819416157501697, "grad_norm": 0.2924520462926132, "learning_rate": 3.925197163533069e-06, "loss": 0.4903, "step": 12858 }, { "epoch": 0.5819868748585653, "grad_norm": 0.5878082240172025, "learning_rate": 3.924481387757642e-06, "loss": 0.2943, "step": 12859 }, { "epoch": 0.5820321339669609, "grad_norm": 0.28326281618130894, "learning_rate": 3.9237656350918095e-06, "loss": 0.4897, "step": 12860 }, { "epoch": 0.5820773930753564, "grad_norm": 0.655266197542611, "learning_rate": 3.9230499055509454e-06, "loss": 0.2985, "step": 12861 }, { "epoch": 0.582122652183752, "grad_norm": 0.6480727118851123, "learning_rate": 3.922334199150433e-06, "loss": 0.2734, "step": 12862 }, { "epoch": 0.5821679112921475, "grad_norm": 0.6601708874608262, "learning_rate": 3.921618515905647e-06, "loss": 0.3413, "step": 12863 }, { "epoch": 0.5822131704005431, "grad_norm": 0.895556337999452, "learning_rate": 3.920902855831969e-06, "loss": 0.2701, "step": 12864 }, { "epoch": 0.5822584295089387, "grad_norm": 0.6330166938868974, "learning_rate": 3.920187218944774e-06, "loss": 0.318, "step": 12865 }, { "epoch": 0.5823036886173343, "grad_norm": 0.6601310669210659, "learning_rate": 3.919471605259438e-06, "loss": 0.2955, "step": 12866 }, { "epoch": 0.5823489477257298, "grad_norm": 0.30908864221676563, "learning_rate": 3.918756014791341e-06, "loss": 0.4634, "step": 12867 }, { "epoch": 0.5823942068341253, "grad_norm": 0.5976452518286259, "learning_rate": 3.9180404475558555e-06, "loss": 0.3537, "step": 12868 }, { "epoch": 0.5824394659425209, "grad_norm": 0.5953767557162067, "learning_rate": 3.917324903568356e-06, "loss": 0.3309, "step": 12869 }, { "epoch": 0.5824847250509165, "grad_norm": 0.6054511531383094, "learning_rate": 3.916609382844221e-06, "loss": 0.3396, "step": 12870 }, { "epoch": 0.582529984159312, "grad_norm": 0.2822725519781029, "learning_rate": 3.915893885398823e-06, "loss": 0.4907, "step": 12871 }, { "epoch": 0.5825752432677076, "grad_norm": 0.6860614582918673, "learning_rate": 3.915178411247535e-06, "loss": 0.2795, "step": 12872 }, { "epoch": 0.5826205023761032, "grad_norm": 0.6228482677721467, "learning_rate": 3.914462960405733e-06, "loss": 0.3569, "step": 12873 }, { "epoch": 0.5826657614844988, "grad_norm": 0.6258446491336329, "learning_rate": 3.913747532888784e-06, "loss": 0.3243, "step": 12874 }, { "epoch": 0.5827110205928944, "grad_norm": 0.6210560291668852, "learning_rate": 3.913032128712068e-06, "loss": 0.3357, "step": 12875 }, { "epoch": 0.5827562797012898, "grad_norm": 0.6804612819642828, "learning_rate": 3.912316747890951e-06, "loss": 0.307, "step": 12876 }, { "epoch": 0.5828015388096854, "grad_norm": 0.2943025412568137, "learning_rate": 3.911601390440809e-06, "loss": 0.4564, "step": 12877 }, { "epoch": 0.582846797918081, "grad_norm": 0.6165984329019698, "learning_rate": 3.910886056377008e-06, "loss": 0.3168, "step": 12878 }, { "epoch": 0.5828920570264766, "grad_norm": 0.6612618568342109, "learning_rate": 3.9101707457149216e-06, "loss": 0.3258, "step": 12879 }, { "epoch": 0.5829373161348721, "grad_norm": 0.6777851209953912, "learning_rate": 3.90945545846992e-06, "loss": 0.292, "step": 12880 }, { "epoch": 0.5829825752432677, "grad_norm": 0.6028725455461451, "learning_rate": 3.908740194657369e-06, "loss": 0.3352, "step": 12881 }, { "epoch": 0.5830278343516633, "grad_norm": 0.6581191427684837, "learning_rate": 3.90802495429264e-06, "loss": 0.3178, "step": 12882 }, { "epoch": 0.5830730934600589, "grad_norm": 0.29210513084469514, "learning_rate": 3.907309737391104e-06, "loss": 0.483, "step": 12883 }, { "epoch": 0.5831183525684545, "grad_norm": 0.6627943810879815, "learning_rate": 3.906594543968122e-06, "loss": 0.3101, "step": 12884 }, { "epoch": 0.5831636116768499, "grad_norm": 0.7161743237685878, "learning_rate": 3.905879374039066e-06, "loss": 0.3404, "step": 12885 }, { "epoch": 0.5832088707852455, "grad_norm": 0.6400180015090664, "learning_rate": 3.905164227619303e-06, "loss": 0.3235, "step": 12886 }, { "epoch": 0.5832541298936411, "grad_norm": 0.266768614980608, "learning_rate": 3.904449104724198e-06, "loss": 0.4719, "step": 12887 }, { "epoch": 0.5832993890020367, "grad_norm": 0.6990613540113876, "learning_rate": 3.903734005369115e-06, "loss": 0.3302, "step": 12888 }, { "epoch": 0.5833446481104322, "grad_norm": 0.5934812759285606, "learning_rate": 3.903018929569424e-06, "loss": 0.3261, "step": 12889 }, { "epoch": 0.5833899072188278, "grad_norm": 0.6164190942026797, "learning_rate": 3.902303877340486e-06, "loss": 0.3165, "step": 12890 }, { "epoch": 0.5834351663272234, "grad_norm": 0.7572763337769141, "learning_rate": 3.9015888486976666e-06, "loss": 0.3159, "step": 12891 }, { "epoch": 0.583480425435619, "grad_norm": 0.31349991781540076, "learning_rate": 3.900873843656328e-06, "loss": 0.4659, "step": 12892 }, { "epoch": 0.5835256845440144, "grad_norm": 0.6034155507082287, "learning_rate": 3.900158862231837e-06, "loss": 0.3018, "step": 12893 }, { "epoch": 0.58357094365241, "grad_norm": 0.27460524405924486, "learning_rate": 3.899443904439553e-06, "loss": 0.4543, "step": 12894 }, { "epoch": 0.5836162027608056, "grad_norm": 0.5654824623125475, "learning_rate": 3.89872897029484e-06, "loss": 0.3148, "step": 12895 }, { "epoch": 0.5836614618692012, "grad_norm": 0.6280443259816898, "learning_rate": 3.8980140598130585e-06, "loss": 0.3318, "step": 12896 }, { "epoch": 0.5837067209775968, "grad_norm": 0.6714072933785529, "learning_rate": 3.89729917300957e-06, "loss": 0.3363, "step": 12897 }, { "epoch": 0.5837519800859923, "grad_norm": 2.789699193483099, "learning_rate": 3.896584309899736e-06, "loss": 0.2579, "step": 12898 }, { "epoch": 0.5837972391943879, "grad_norm": 0.6012598147773093, "learning_rate": 3.895869470498917e-06, "loss": 0.2702, "step": 12899 }, { "epoch": 0.5838424983027835, "grad_norm": 0.6348915755057556, "learning_rate": 3.895154654822471e-06, "loss": 0.3081, "step": 12900 }, { "epoch": 0.583887757411179, "grad_norm": 0.5786267882485973, "learning_rate": 3.894439862885758e-06, "loss": 0.2654, "step": 12901 }, { "epoch": 0.5839330165195745, "grad_norm": 0.5707728278791753, "learning_rate": 3.89372509470414e-06, "loss": 0.2948, "step": 12902 }, { "epoch": 0.5839782756279701, "grad_norm": 0.2980866435477156, "learning_rate": 3.893010350292967e-06, "loss": 0.4875, "step": 12903 }, { "epoch": 0.5840235347363657, "grad_norm": 0.272228408390971, "learning_rate": 3.892295629667604e-06, "loss": 0.4832, "step": 12904 }, { "epoch": 0.5840687938447613, "grad_norm": 0.6108062446836691, "learning_rate": 3.891580932843406e-06, "loss": 0.2819, "step": 12905 }, { "epoch": 0.5841140529531568, "grad_norm": 0.7200864095677929, "learning_rate": 3.890866259835731e-06, "loss": 0.3065, "step": 12906 }, { "epoch": 0.5841593120615524, "grad_norm": 0.6191814565649314, "learning_rate": 3.890151610659931e-06, "loss": 0.3127, "step": 12907 }, { "epoch": 0.584204571169948, "grad_norm": 0.6345450883334448, "learning_rate": 3.8894369853313654e-06, "loss": 0.3008, "step": 12908 }, { "epoch": 0.5842498302783435, "grad_norm": 0.6054599253457945, "learning_rate": 3.888722383865389e-06, "loss": 0.3201, "step": 12909 }, { "epoch": 0.5842950893867391, "grad_norm": 0.6019468470687604, "learning_rate": 3.888007806277355e-06, "loss": 0.2845, "step": 12910 }, { "epoch": 0.5843403484951346, "grad_norm": 0.6457389851076811, "learning_rate": 3.887293252582616e-06, "loss": 0.282, "step": 12911 }, { "epoch": 0.5843856076035302, "grad_norm": 0.666848565280595, "learning_rate": 3.886578722796532e-06, "loss": 0.3555, "step": 12912 }, { "epoch": 0.5844308667119258, "grad_norm": 0.7169589320217262, "learning_rate": 3.885864216934448e-06, "loss": 0.2958, "step": 12913 }, { "epoch": 0.5844761258203214, "grad_norm": 0.7171063512367375, "learning_rate": 3.88514973501172e-06, "loss": 0.302, "step": 12914 }, { "epoch": 0.5845213849287169, "grad_norm": 0.35810858882672103, "learning_rate": 3.884435277043703e-06, "loss": 0.4629, "step": 12915 }, { "epoch": 0.5845666440371124, "grad_norm": 0.3523811905750544, "learning_rate": 3.883720843045744e-06, "loss": 0.4891, "step": 12916 }, { "epoch": 0.584611903145508, "grad_norm": 0.6418237068424264, "learning_rate": 3.883006433033194e-06, "loss": 0.3221, "step": 12917 }, { "epoch": 0.5846571622539036, "grad_norm": 0.6018725955131646, "learning_rate": 3.882292047021407e-06, "loss": 0.3118, "step": 12918 }, { "epoch": 0.5847024213622991, "grad_norm": 0.6411292128169545, "learning_rate": 3.8815776850257325e-06, "loss": 0.2973, "step": 12919 }, { "epoch": 0.5847476804706947, "grad_norm": 0.6211094756032005, "learning_rate": 3.880863347061516e-06, "loss": 0.329, "step": 12920 }, { "epoch": 0.5847929395790903, "grad_norm": 0.620245134177958, "learning_rate": 3.88014903314411e-06, "loss": 0.3365, "step": 12921 }, { "epoch": 0.5848381986874859, "grad_norm": 0.6868312250884809, "learning_rate": 3.879434743288863e-06, "loss": 0.322, "step": 12922 }, { "epoch": 0.5848834577958815, "grad_norm": 0.6462710705303839, "learning_rate": 3.87872047751112e-06, "loss": 0.303, "step": 12923 }, { "epoch": 0.584928716904277, "grad_norm": 0.6655929839190928, "learning_rate": 3.878006235826231e-06, "loss": 0.2956, "step": 12924 }, { "epoch": 0.5849739760126725, "grad_norm": 0.6818995713564633, "learning_rate": 3.877292018249543e-06, "loss": 0.3132, "step": 12925 }, { "epoch": 0.5850192351210681, "grad_norm": 0.38136703826778795, "learning_rate": 3.8765778247964e-06, "loss": 0.4698, "step": 12926 }, { "epoch": 0.5850644942294637, "grad_norm": 0.3412002803460328, "learning_rate": 3.875863655482149e-06, "loss": 0.4696, "step": 12927 }, { "epoch": 0.5851097533378592, "grad_norm": 0.5931125850967812, "learning_rate": 3.875149510322137e-06, "loss": 0.343, "step": 12928 }, { "epoch": 0.5851550124462548, "grad_norm": 0.6466884623426743, "learning_rate": 3.8744353893317075e-06, "loss": 0.3706, "step": 12929 }, { "epoch": 0.5852002715546504, "grad_norm": 0.6434405477840357, "learning_rate": 3.873721292526202e-06, "loss": 0.2481, "step": 12930 }, { "epoch": 0.585245530663046, "grad_norm": 0.7373483580330585, "learning_rate": 3.8730072199209705e-06, "loss": 0.2998, "step": 12931 }, { "epoch": 0.5852907897714416, "grad_norm": 0.9587743096576387, "learning_rate": 3.87229317153135e-06, "loss": 0.3174, "step": 12932 }, { "epoch": 0.585336048879837, "grad_norm": 0.6359927738078779, "learning_rate": 3.871579147372685e-06, "loss": 0.3463, "step": 12933 }, { "epoch": 0.5853813079882326, "grad_norm": 0.34809681752623584, "learning_rate": 3.870865147460319e-06, "loss": 0.4735, "step": 12934 }, { "epoch": 0.5854265670966282, "grad_norm": 0.6740694498628458, "learning_rate": 3.870151171809596e-06, "loss": 0.3279, "step": 12935 }, { "epoch": 0.5854718262050238, "grad_norm": 0.6666175522244245, "learning_rate": 3.869437220435851e-06, "loss": 0.3176, "step": 12936 }, { "epoch": 0.5855170853134193, "grad_norm": 0.31852509391543643, "learning_rate": 3.868723293354429e-06, "loss": 0.474, "step": 12937 }, { "epoch": 0.5855623444218149, "grad_norm": 0.6416405830095192, "learning_rate": 3.8680093905806695e-06, "loss": 0.3437, "step": 12938 }, { "epoch": 0.5856076035302105, "grad_norm": 0.608559978948162, "learning_rate": 3.86729551212991e-06, "loss": 0.2698, "step": 12939 }, { "epoch": 0.585652862638606, "grad_norm": 0.6460179817947529, "learning_rate": 3.866581658017492e-06, "loss": 0.3077, "step": 12940 }, { "epoch": 0.5856981217470015, "grad_norm": 0.6021138283075794, "learning_rate": 3.865867828258754e-06, "loss": 0.3642, "step": 12941 }, { "epoch": 0.5857433808553971, "grad_norm": 0.3031943483218063, "learning_rate": 3.865154022869032e-06, "loss": 0.5104, "step": 12942 }, { "epoch": 0.5857886399637927, "grad_norm": 0.32523217420743983, "learning_rate": 3.864440241863665e-06, "loss": 0.4606, "step": 12943 }, { "epoch": 0.5858338990721883, "grad_norm": 0.6740941885338018, "learning_rate": 3.86372648525799e-06, "loss": 0.337, "step": 12944 }, { "epoch": 0.5858791581805839, "grad_norm": 0.7082381442153999, "learning_rate": 3.863012753067343e-06, "loss": 0.307, "step": 12945 }, { "epoch": 0.5859244172889794, "grad_norm": 0.3069995352713124, "learning_rate": 3.862299045307058e-06, "loss": 0.4816, "step": 12946 }, { "epoch": 0.585969676397375, "grad_norm": 0.6359988599338807, "learning_rate": 3.861585361992474e-06, "loss": 0.3188, "step": 12947 }, { "epoch": 0.5860149355057706, "grad_norm": 0.6044620031315169, "learning_rate": 3.860871703138925e-06, "loss": 0.2627, "step": 12948 }, { "epoch": 0.5860601946141661, "grad_norm": 0.28995056119402646, "learning_rate": 3.860158068761743e-06, "loss": 0.4874, "step": 12949 }, { "epoch": 0.5861054537225616, "grad_norm": 0.6203997084408709, "learning_rate": 3.859444458876264e-06, "loss": 0.3463, "step": 12950 }, { "epoch": 0.5861507128309572, "grad_norm": 0.5766424550001963, "learning_rate": 3.85873087349782e-06, "loss": 0.2999, "step": 12951 }, { "epoch": 0.5861959719393528, "grad_norm": 0.6454390235375199, "learning_rate": 3.8580173126417455e-06, "loss": 0.3003, "step": 12952 }, { "epoch": 0.5862412310477484, "grad_norm": 0.6459639638157862, "learning_rate": 3.857303776323371e-06, "loss": 0.3141, "step": 12953 }, { "epoch": 0.5862864901561439, "grad_norm": 0.674173137586515, "learning_rate": 3.85659026455803e-06, "loss": 0.3233, "step": 12954 }, { "epoch": 0.5863317492645395, "grad_norm": 0.6182253537794538, "learning_rate": 3.855876777361051e-06, "loss": 0.3477, "step": 12955 }, { "epoch": 0.586377008372935, "grad_norm": 0.30382783121496, "learning_rate": 3.855163314747765e-06, "loss": 0.4931, "step": 12956 }, { "epoch": 0.5864222674813306, "grad_norm": 0.6468381071406778, "learning_rate": 3.854449876733507e-06, "loss": 0.3316, "step": 12957 }, { "epoch": 0.5864675265897262, "grad_norm": 0.6063408189941445, "learning_rate": 3.8537364633336e-06, "loss": 0.3195, "step": 12958 }, { "epoch": 0.5865127856981217, "grad_norm": 0.5842726588714356, "learning_rate": 3.853023074563376e-06, "loss": 0.2697, "step": 12959 }, { "epoch": 0.5865580448065173, "grad_norm": 0.5781391825709464, "learning_rate": 3.852309710438165e-06, "loss": 0.3199, "step": 12960 }, { "epoch": 0.5866033039149129, "grad_norm": 0.6755210754771175, "learning_rate": 3.851596370973292e-06, "loss": 0.333, "step": 12961 }, { "epoch": 0.5866485630233085, "grad_norm": 0.6235477776992845, "learning_rate": 3.850883056184087e-06, "loss": 0.3779, "step": 12962 }, { "epoch": 0.586693822131704, "grad_norm": 0.5812073917967391, "learning_rate": 3.850169766085874e-06, "loss": 0.2543, "step": 12963 }, { "epoch": 0.5867390812400995, "grad_norm": 0.6138726206796495, "learning_rate": 3.849456500693985e-06, "loss": 0.3029, "step": 12964 }, { "epoch": 0.5867843403484951, "grad_norm": 0.6533874774901375, "learning_rate": 3.848743260023739e-06, "loss": 0.2815, "step": 12965 }, { "epoch": 0.5868295994568907, "grad_norm": 1.647690739910162, "learning_rate": 3.848030044090464e-06, "loss": 0.3062, "step": 12966 }, { "epoch": 0.5868748585652863, "grad_norm": 0.751645176452105, "learning_rate": 3.847316852909488e-06, "loss": 0.2889, "step": 12967 }, { "epoch": 0.5869201176736818, "grad_norm": 0.6473090293190665, "learning_rate": 3.8466036864961315e-06, "loss": 0.3171, "step": 12968 }, { "epoch": 0.5869653767820774, "grad_norm": 0.6242772006515882, "learning_rate": 3.845890544865718e-06, "loss": 0.3188, "step": 12969 }, { "epoch": 0.587010635890473, "grad_norm": 0.5576862503082809, "learning_rate": 3.845177428033574e-06, "loss": 0.3178, "step": 12970 }, { "epoch": 0.5870558949988686, "grad_norm": 0.6225941302204959, "learning_rate": 3.84446433601502e-06, "loss": 0.3154, "step": 12971 }, { "epoch": 0.587101154107264, "grad_norm": 0.7071677053660017, "learning_rate": 3.843751268825378e-06, "loss": 0.355, "step": 12972 }, { "epoch": 0.5871464132156596, "grad_norm": 0.6348094595951862, "learning_rate": 3.843038226479971e-06, "loss": 0.2623, "step": 12973 }, { "epoch": 0.5871916723240552, "grad_norm": 0.6802360931954542, "learning_rate": 3.842325208994117e-06, "loss": 0.318, "step": 12974 }, { "epoch": 0.5872369314324508, "grad_norm": 0.3432497848458775, "learning_rate": 3.84161221638314e-06, "loss": 0.474, "step": 12975 }, { "epoch": 0.5872821905408463, "grad_norm": 0.3179869040291361, "learning_rate": 3.840899248662358e-06, "loss": 0.4543, "step": 12976 }, { "epoch": 0.5873274496492419, "grad_norm": 0.30280561510928294, "learning_rate": 3.840186305847094e-06, "loss": 0.4646, "step": 12977 }, { "epoch": 0.5873727087576375, "grad_norm": 0.588527362911705, "learning_rate": 3.839473387952662e-06, "loss": 0.2744, "step": 12978 }, { "epoch": 0.5874179678660331, "grad_norm": 0.6391647492314123, "learning_rate": 3.8387604949943816e-06, "loss": 0.3408, "step": 12979 }, { "epoch": 0.5874632269744287, "grad_norm": 0.6678100391164818, "learning_rate": 3.8380476269875745e-06, "loss": 0.3547, "step": 12980 }, { "epoch": 0.5875084860828241, "grad_norm": 0.6755665912320579, "learning_rate": 3.837334783947553e-06, "loss": 0.3443, "step": 12981 }, { "epoch": 0.5875537451912197, "grad_norm": 0.6287999602413568, "learning_rate": 3.836621965889637e-06, "loss": 0.2746, "step": 12982 }, { "epoch": 0.5875990042996153, "grad_norm": 1.2565627186637056, "learning_rate": 3.8359091728291426e-06, "loss": 0.3053, "step": 12983 }, { "epoch": 0.5876442634080109, "grad_norm": 0.6786037263914724, "learning_rate": 3.835196404781383e-06, "loss": 0.3133, "step": 12984 }, { "epoch": 0.5876895225164064, "grad_norm": 0.6310320695975805, "learning_rate": 3.834483661761676e-06, "loss": 0.3334, "step": 12985 }, { "epoch": 0.587734781624802, "grad_norm": 0.6925419971381628, "learning_rate": 3.8337709437853365e-06, "loss": 0.3574, "step": 12986 }, { "epoch": 0.5877800407331976, "grad_norm": 0.4543710896067492, "learning_rate": 3.833058250867677e-06, "loss": 0.4888, "step": 12987 }, { "epoch": 0.5878252998415932, "grad_norm": 0.6252709175360854, "learning_rate": 3.83234558302401e-06, "loss": 0.3251, "step": 12988 }, { "epoch": 0.5878705589499886, "grad_norm": 0.35321906291413185, "learning_rate": 3.8316329402696524e-06, "loss": 0.4555, "step": 12989 }, { "epoch": 0.5879158180583842, "grad_norm": 0.8580523387406522, "learning_rate": 3.8309203226199145e-06, "loss": 0.354, "step": 12990 }, { "epoch": 0.5879610771667798, "grad_norm": 0.648674379215099, "learning_rate": 3.830207730090108e-06, "loss": 0.3289, "step": 12991 }, { "epoch": 0.5880063362751754, "grad_norm": 0.6421814724828729, "learning_rate": 3.829495162695543e-06, "loss": 0.3033, "step": 12992 }, { "epoch": 0.588051595383571, "grad_norm": 0.6463585445701963, "learning_rate": 3.828782620451535e-06, "loss": 0.3507, "step": 12993 }, { "epoch": 0.5880968544919665, "grad_norm": 0.5912068879481209, "learning_rate": 3.828070103373389e-06, "loss": 0.3406, "step": 12994 }, { "epoch": 0.5881421136003621, "grad_norm": 0.6465651509972019, "learning_rate": 3.8273576114764176e-06, "loss": 0.2994, "step": 12995 }, { "epoch": 0.5881873727087576, "grad_norm": 0.6432592889234297, "learning_rate": 3.8266451447759315e-06, "loss": 0.2897, "step": 12996 }, { "epoch": 0.5882326318171532, "grad_norm": 0.640912875516796, "learning_rate": 3.825932703287236e-06, "loss": 0.3524, "step": 12997 }, { "epoch": 0.5882778909255487, "grad_norm": 0.441436437662703, "learning_rate": 3.8252202870256395e-06, "loss": 0.4835, "step": 12998 }, { "epoch": 0.5883231500339443, "grad_norm": 0.39010100191764097, "learning_rate": 3.824507896006454e-06, "loss": 0.4494, "step": 12999 }, { "epoch": 0.5883684091423399, "grad_norm": 0.6478115035019769, "learning_rate": 3.823795530244982e-06, "loss": 0.2936, "step": 13000 }, { "epoch": 0.5884136682507355, "grad_norm": 0.5908031580738624, "learning_rate": 3.823083189756531e-06, "loss": 0.3461, "step": 13001 }, { "epoch": 0.5884589273591311, "grad_norm": 0.6325161225022814, "learning_rate": 3.822370874556408e-06, "loss": 0.3281, "step": 13002 }, { "epoch": 0.5885041864675266, "grad_norm": 0.6765632817653675, "learning_rate": 3.821658584659918e-06, "loss": 0.3767, "step": 13003 }, { "epoch": 0.5885494455759221, "grad_norm": 0.3610735045231856, "learning_rate": 3.820946320082366e-06, "loss": 0.4932, "step": 13004 }, { "epoch": 0.5885947046843177, "grad_norm": 0.7040019211616559, "learning_rate": 3.820234080839057e-06, "loss": 0.3134, "step": 13005 }, { "epoch": 0.5886399637927133, "grad_norm": 0.6711299751775798, "learning_rate": 3.819521866945295e-06, "loss": 0.3553, "step": 13006 }, { "epoch": 0.5886852229011088, "grad_norm": 0.5931720760249749, "learning_rate": 3.81880967841638e-06, "loss": 0.3059, "step": 13007 }, { "epoch": 0.5887304820095044, "grad_norm": 0.36097307950893653, "learning_rate": 3.818097515267618e-06, "loss": 0.4693, "step": 13008 }, { "epoch": 0.5887757411179, "grad_norm": 0.772885550432615, "learning_rate": 3.817385377514312e-06, "loss": 0.2972, "step": 13009 }, { "epoch": 0.5888210002262956, "grad_norm": 0.6133863749155195, "learning_rate": 3.816673265171762e-06, "loss": 0.3071, "step": 13010 }, { "epoch": 0.5888662593346911, "grad_norm": 0.6605504314425381, "learning_rate": 3.815961178255267e-06, "loss": 0.3196, "step": 13011 }, { "epoch": 0.5889115184430866, "grad_norm": 0.5994473366414182, "learning_rate": 3.815249116780133e-06, "loss": 0.297, "step": 13012 }, { "epoch": 0.5889567775514822, "grad_norm": 0.31147458714389, "learning_rate": 3.8145370807616545e-06, "loss": 0.4696, "step": 13013 }, { "epoch": 0.5890020366598778, "grad_norm": 0.27633811696298244, "learning_rate": 3.8138250702151336e-06, "loss": 0.4779, "step": 13014 }, { "epoch": 0.5890472957682734, "grad_norm": 0.6898232002811877, "learning_rate": 3.8131130851558696e-06, "loss": 0.3148, "step": 13015 }, { "epoch": 0.5890925548766689, "grad_norm": 0.6506935341637908, "learning_rate": 3.81240112559916e-06, "loss": 0.3316, "step": 13016 }, { "epoch": 0.5891378139850645, "grad_norm": 0.6508169215201881, "learning_rate": 3.811689191560301e-06, "loss": 0.2769, "step": 13017 }, { "epoch": 0.5891830730934601, "grad_norm": 0.659463492837663, "learning_rate": 3.8109772830545933e-06, "loss": 0.2914, "step": 13018 }, { "epoch": 0.5892283322018557, "grad_norm": 0.6711464673967386, "learning_rate": 3.8102654000973326e-06, "loss": 0.3229, "step": 13019 }, { "epoch": 0.5892735913102511, "grad_norm": 0.6483412067025784, "learning_rate": 3.8095535427038134e-06, "loss": 0.3426, "step": 13020 }, { "epoch": 0.5893188504186467, "grad_norm": 0.5998729560447539, "learning_rate": 3.808841710889332e-06, "loss": 0.2988, "step": 13021 }, { "epoch": 0.5893641095270423, "grad_norm": 0.6400881001941805, "learning_rate": 3.808129904669186e-06, "loss": 0.3056, "step": 13022 }, { "epoch": 0.5894093686354379, "grad_norm": 0.7228179397581488, "learning_rate": 3.807418124058665e-06, "loss": 0.2728, "step": 13023 }, { "epoch": 0.5894546277438334, "grad_norm": 0.646531452310446, "learning_rate": 3.8067063690730672e-06, "loss": 0.318, "step": 13024 }, { "epoch": 0.589499886852229, "grad_norm": 0.6952376992340696, "learning_rate": 3.8059946397276854e-06, "loss": 0.3524, "step": 13025 }, { "epoch": 0.5895451459606246, "grad_norm": 0.4455158133737101, "learning_rate": 3.805282936037811e-06, "loss": 0.5017, "step": 13026 }, { "epoch": 0.5895904050690202, "grad_norm": 0.3768748833292996, "learning_rate": 3.8045712580187356e-06, "loss": 0.4685, "step": 13027 }, { "epoch": 0.5896356641774158, "grad_norm": 0.2977798946241966, "learning_rate": 3.803859605685754e-06, "loss": 0.4487, "step": 13028 }, { "epoch": 0.5896809232858112, "grad_norm": 0.7536806509562007, "learning_rate": 3.803147979054155e-06, "loss": 0.3178, "step": 13029 }, { "epoch": 0.5897261823942068, "grad_norm": 0.34640164397571077, "learning_rate": 3.8024363781392304e-06, "loss": 0.4742, "step": 13030 }, { "epoch": 0.5897714415026024, "grad_norm": 0.6354442806532212, "learning_rate": 3.8017248029562713e-06, "loss": 0.2978, "step": 13031 }, { "epoch": 0.589816700610998, "grad_norm": 0.6308339653666732, "learning_rate": 3.8010132535205634e-06, "loss": 0.3283, "step": 13032 }, { "epoch": 0.5898619597193935, "grad_norm": 0.43622553607956044, "learning_rate": 3.8003017298474e-06, "loss": 0.4627, "step": 13033 }, { "epoch": 0.5899072188277891, "grad_norm": 0.5934010485802537, "learning_rate": 3.7995902319520674e-06, "loss": 0.2918, "step": 13034 }, { "epoch": 0.5899524779361847, "grad_norm": 0.6677173047396981, "learning_rate": 3.7988787598498543e-06, "loss": 0.2955, "step": 13035 }, { "epoch": 0.5899977370445803, "grad_norm": 0.38851060414369165, "learning_rate": 3.7981673135560464e-06, "loss": 0.5038, "step": 13036 }, { "epoch": 0.5900429961529758, "grad_norm": 0.657627358931422, "learning_rate": 3.797455893085933e-06, "loss": 0.3106, "step": 13037 }, { "epoch": 0.5900882552613713, "grad_norm": 0.6333835119681968, "learning_rate": 3.7967444984548e-06, "loss": 0.3411, "step": 13038 }, { "epoch": 0.5901335143697669, "grad_norm": 0.5793072590899354, "learning_rate": 3.796033129677931e-06, "loss": 0.3323, "step": 13039 }, { "epoch": 0.5901787734781625, "grad_norm": 0.6453468287225503, "learning_rate": 3.7953217867706106e-06, "loss": 0.3203, "step": 13040 }, { "epoch": 0.5902240325865581, "grad_norm": 0.6391441798115033, "learning_rate": 3.794610469748129e-06, "loss": 0.2964, "step": 13041 }, { "epoch": 0.5902692916949536, "grad_norm": 0.621375675956063, "learning_rate": 3.793899178625763e-06, "loss": 0.3161, "step": 13042 }, { "epoch": 0.5903145508033492, "grad_norm": 0.6274388766689246, "learning_rate": 3.7931879134188002e-06, "loss": 0.2955, "step": 13043 }, { "epoch": 0.5903598099117447, "grad_norm": 0.6190550008897789, "learning_rate": 3.7924766741425247e-06, "loss": 0.3132, "step": 13044 }, { "epoch": 0.5904050690201403, "grad_norm": 0.616290704979202, "learning_rate": 3.791765460812215e-06, "loss": 0.3291, "step": 13045 }, { "epoch": 0.5904503281285358, "grad_norm": 0.6969178418384471, "learning_rate": 3.7910542734431537e-06, "loss": 0.3059, "step": 13046 }, { "epoch": 0.5904955872369314, "grad_norm": 0.6856688562365002, "learning_rate": 3.7903431120506247e-06, "loss": 0.2953, "step": 13047 }, { "epoch": 0.590540846345327, "grad_norm": 0.4143437058883918, "learning_rate": 3.7896319766499073e-06, "loss": 0.4756, "step": 13048 }, { "epoch": 0.5905861054537226, "grad_norm": 0.6607130200060778, "learning_rate": 3.788920867256281e-06, "loss": 0.2771, "step": 13049 }, { "epoch": 0.5906313645621182, "grad_norm": 0.5988983320355563, "learning_rate": 3.788209783885024e-06, "loss": 0.3414, "step": 13050 }, { "epoch": 0.5906766236705137, "grad_norm": 0.6226333656842887, "learning_rate": 3.7874987265514197e-06, "loss": 0.2825, "step": 13051 }, { "epoch": 0.5907218827789092, "grad_norm": 0.6485822725381329, "learning_rate": 3.786787695270743e-06, "loss": 0.2941, "step": 13052 }, { "epoch": 0.5907671418873048, "grad_norm": 0.6526722775186214, "learning_rate": 3.7860766900582716e-06, "loss": 0.3147, "step": 13053 }, { "epoch": 0.5908124009957004, "grad_norm": 0.2947319369351773, "learning_rate": 3.785365710929286e-06, "loss": 0.4555, "step": 13054 }, { "epoch": 0.5908576601040959, "grad_norm": 0.6537057936992485, "learning_rate": 3.784654757899059e-06, "loss": 0.3248, "step": 13055 }, { "epoch": 0.5909029192124915, "grad_norm": 0.6367926569809269, "learning_rate": 3.783943830982868e-06, "loss": 0.3591, "step": 13056 }, { "epoch": 0.5909481783208871, "grad_norm": 0.6043301757986551, "learning_rate": 3.7832329301959914e-06, "loss": 0.2841, "step": 13057 }, { "epoch": 0.5909934374292827, "grad_norm": 0.6727643037951684, "learning_rate": 3.7825220555537006e-06, "loss": 0.3573, "step": 13058 }, { "epoch": 0.5910386965376782, "grad_norm": 0.590524442701709, "learning_rate": 3.781811207071272e-06, "loss": 0.3215, "step": 13059 }, { "epoch": 0.5910839556460737, "grad_norm": 0.6354132649387176, "learning_rate": 3.781100384763978e-06, "loss": 0.3183, "step": 13060 }, { "epoch": 0.5911292147544693, "grad_norm": 0.6075769280663038, "learning_rate": 3.7803895886470952e-06, "loss": 0.2922, "step": 13061 }, { "epoch": 0.5911744738628649, "grad_norm": 0.592386100884859, "learning_rate": 3.7796788187358934e-06, "loss": 0.3243, "step": 13062 }, { "epoch": 0.5912197329712605, "grad_norm": 0.3625514389963376, "learning_rate": 3.778968075045646e-06, "loss": 0.4747, "step": 13063 }, { "epoch": 0.591264992079656, "grad_norm": 0.6902941165882858, "learning_rate": 3.7782573575916255e-06, "loss": 0.3174, "step": 13064 }, { "epoch": 0.5913102511880516, "grad_norm": 0.6402809000377081, "learning_rate": 3.7775466663890997e-06, "loss": 0.3421, "step": 13065 }, { "epoch": 0.5913555102964472, "grad_norm": 0.30137486000398755, "learning_rate": 3.7768360014533427e-06, "loss": 0.4765, "step": 13066 }, { "epoch": 0.5914007694048428, "grad_norm": 0.5848318681138999, "learning_rate": 3.7761253627996245e-06, "loss": 0.3262, "step": 13067 }, { "epoch": 0.5914460285132382, "grad_norm": 0.6320255325803715, "learning_rate": 3.7754147504432128e-06, "loss": 0.2678, "step": 13068 }, { "epoch": 0.5914912876216338, "grad_norm": 0.6638000769349245, "learning_rate": 3.7747041643993755e-06, "loss": 0.3337, "step": 13069 }, { "epoch": 0.5915365467300294, "grad_norm": 0.6680733135489192, "learning_rate": 3.7739936046833856e-06, "loss": 0.3151, "step": 13070 }, { "epoch": 0.591581805838425, "grad_norm": 0.6145310273808362, "learning_rate": 3.773283071310505e-06, "loss": 0.2493, "step": 13071 }, { "epoch": 0.5916270649468206, "grad_norm": 0.5646272596608798, "learning_rate": 3.7725725642960047e-06, "loss": 0.3029, "step": 13072 }, { "epoch": 0.5916723240552161, "grad_norm": 0.5684147601436456, "learning_rate": 3.7718620836551512e-06, "loss": 0.3276, "step": 13073 }, { "epoch": 0.5917175831636117, "grad_norm": 0.6525397914151486, "learning_rate": 3.7711516294032086e-06, "loss": 0.3275, "step": 13074 }, { "epoch": 0.5917628422720073, "grad_norm": 0.6030261523641939, "learning_rate": 3.770441201555442e-06, "loss": 0.3035, "step": 13075 }, { "epoch": 0.5918081013804029, "grad_norm": 0.658476768697074, "learning_rate": 3.769730800127119e-06, "loss": 0.3092, "step": 13076 }, { "epoch": 0.5918533604887983, "grad_norm": 0.752410239829877, "learning_rate": 3.769020425133503e-06, "loss": 0.3108, "step": 13077 }, { "epoch": 0.5918986195971939, "grad_norm": 0.8037543463094624, "learning_rate": 3.7683100765898573e-06, "loss": 0.3165, "step": 13078 }, { "epoch": 0.5919438787055895, "grad_norm": 0.7767337488831968, "learning_rate": 3.7675997545114435e-06, "loss": 0.3113, "step": 13079 }, { "epoch": 0.5919891378139851, "grad_norm": 0.6177110144688015, "learning_rate": 3.7668894589135284e-06, "loss": 0.3304, "step": 13080 }, { "epoch": 0.5920343969223806, "grad_norm": 0.6972389230091802, "learning_rate": 3.76617918981137e-06, "loss": 0.3054, "step": 13081 }, { "epoch": 0.5920796560307762, "grad_norm": 0.594758697082108, "learning_rate": 3.7654689472202323e-06, "loss": 0.3142, "step": 13082 }, { "epoch": 0.5921249151391718, "grad_norm": 0.6565473637710484, "learning_rate": 3.7647587311553758e-06, "loss": 0.3388, "step": 13083 }, { "epoch": 0.5921701742475673, "grad_norm": 0.3632395504641611, "learning_rate": 3.7640485416320586e-06, "loss": 0.4423, "step": 13084 }, { "epoch": 0.5922154333559629, "grad_norm": 0.6618035706270688, "learning_rate": 3.763338378665543e-06, "loss": 0.2714, "step": 13085 }, { "epoch": 0.5922606924643584, "grad_norm": 0.9727610599227895, "learning_rate": 3.762628242271089e-06, "loss": 0.315, "step": 13086 }, { "epoch": 0.592305951572754, "grad_norm": 0.6265858065970411, "learning_rate": 3.7619181324639526e-06, "loss": 0.2623, "step": 13087 }, { "epoch": 0.5923512106811496, "grad_norm": 0.658143947875074, "learning_rate": 3.761208049259393e-06, "loss": 0.3221, "step": 13088 }, { "epoch": 0.5923964697895452, "grad_norm": 0.2794168498838751, "learning_rate": 3.760497992672667e-06, "loss": 0.4734, "step": 13089 }, { "epoch": 0.5924417288979407, "grad_norm": 0.29591340671507, "learning_rate": 3.7597879627190337e-06, "loss": 0.4726, "step": 13090 }, { "epoch": 0.5924869880063363, "grad_norm": 0.2957711298433703, "learning_rate": 3.7590779594137476e-06, "loss": 0.4824, "step": 13091 }, { "epoch": 0.5925322471147318, "grad_norm": 0.29424958786733124, "learning_rate": 3.758367982772065e-06, "loss": 0.51, "step": 13092 }, { "epoch": 0.5925775062231274, "grad_norm": 0.6516448084403705, "learning_rate": 3.7576580328092416e-06, "loss": 0.322, "step": 13093 }, { "epoch": 0.5926227653315229, "grad_norm": 0.6188868863340756, "learning_rate": 3.7569481095405297e-06, "loss": 0.3076, "step": 13094 }, { "epoch": 0.5926680244399185, "grad_norm": 0.643871355009258, "learning_rate": 3.7562382129811863e-06, "loss": 0.3304, "step": 13095 }, { "epoch": 0.5927132835483141, "grad_norm": 0.613491725606984, "learning_rate": 3.755528343146465e-06, "loss": 0.3142, "step": 13096 }, { "epoch": 0.5927585426567097, "grad_norm": 0.5718218562017449, "learning_rate": 3.7548185000516163e-06, "loss": 0.3098, "step": 13097 }, { "epoch": 0.5928038017651053, "grad_norm": 0.32929129744003455, "learning_rate": 3.7541086837118923e-06, "loss": 0.4664, "step": 13098 }, { "epoch": 0.5928490608735008, "grad_norm": 0.7171791450318256, "learning_rate": 3.7533988941425497e-06, "loss": 0.3681, "step": 13099 }, { "epoch": 0.5928943199818963, "grad_norm": 0.7508003949385689, "learning_rate": 3.7526891313588334e-06, "loss": 0.2816, "step": 13100 }, { "epoch": 0.5929395790902919, "grad_norm": 0.6463903925300547, "learning_rate": 3.7519793953759976e-06, "loss": 0.342, "step": 13101 }, { "epoch": 0.5929848381986875, "grad_norm": 0.6034850507917721, "learning_rate": 3.7512696862092924e-06, "loss": 0.3186, "step": 13102 }, { "epoch": 0.593030097307083, "grad_norm": 0.323643335339662, "learning_rate": 3.750560003873965e-06, "loss": 0.4677, "step": 13103 }, { "epoch": 0.5930753564154786, "grad_norm": 0.5797391881618859, "learning_rate": 3.7498503483852655e-06, "loss": 0.3061, "step": 13104 }, { "epoch": 0.5931206155238742, "grad_norm": 0.6533239166183596, "learning_rate": 3.749140719758444e-06, "loss": 0.2891, "step": 13105 }, { "epoch": 0.5931658746322698, "grad_norm": 0.28435596156172427, "learning_rate": 3.748431118008747e-06, "loss": 0.4748, "step": 13106 }, { "epoch": 0.5932111337406654, "grad_norm": 0.6077688964257816, "learning_rate": 3.7477215431514203e-06, "loss": 0.3061, "step": 13107 }, { "epoch": 0.5932563928490608, "grad_norm": 0.8999032205253109, "learning_rate": 3.74701199520171e-06, "loss": 0.3221, "step": 13108 }, { "epoch": 0.5933016519574564, "grad_norm": 0.34773585807513885, "learning_rate": 3.7463024741748665e-06, "loss": 0.4725, "step": 13109 }, { "epoch": 0.593346911065852, "grad_norm": 0.6586318888584578, "learning_rate": 3.745592980086132e-06, "loss": 0.3044, "step": 13110 }, { "epoch": 0.5933921701742476, "grad_norm": 0.26404863142989515, "learning_rate": 3.744883512950751e-06, "loss": 0.4663, "step": 13111 }, { "epoch": 0.5934374292826431, "grad_norm": 0.6557620068116499, "learning_rate": 3.7441740727839693e-06, "loss": 0.3381, "step": 13112 }, { "epoch": 0.5934826883910387, "grad_norm": 0.72527557830192, "learning_rate": 3.7434646596010284e-06, "loss": 0.3182, "step": 13113 }, { "epoch": 0.5935279474994343, "grad_norm": 0.6732815876045888, "learning_rate": 3.742755273417173e-06, "loss": 0.3369, "step": 13114 }, { "epoch": 0.5935732066078299, "grad_norm": 0.6230884754824806, "learning_rate": 3.742045914247647e-06, "loss": 0.3187, "step": 13115 }, { "epoch": 0.5936184657162253, "grad_norm": 0.6512486014953773, "learning_rate": 3.7413365821076897e-06, "loss": 0.3313, "step": 13116 }, { "epoch": 0.5936637248246209, "grad_norm": 0.6264029046891488, "learning_rate": 3.740627277012542e-06, "loss": 0.2908, "step": 13117 }, { "epoch": 0.5937089839330165, "grad_norm": 0.8811085700085609, "learning_rate": 3.7399179989774483e-06, "loss": 0.2991, "step": 13118 }, { "epoch": 0.5937542430414121, "grad_norm": 0.6397706143848809, "learning_rate": 3.739208748017647e-06, "loss": 0.3208, "step": 13119 }, { "epoch": 0.5937995021498077, "grad_norm": 0.6302304728471242, "learning_rate": 3.7384995241483767e-06, "loss": 0.3121, "step": 13120 }, { "epoch": 0.5938447612582032, "grad_norm": 0.6752519831545459, "learning_rate": 3.737790327384876e-06, "loss": 0.3062, "step": 13121 }, { "epoch": 0.5938900203665988, "grad_norm": 0.6138243669102184, "learning_rate": 3.7370811577423883e-06, "loss": 0.3445, "step": 13122 }, { "epoch": 0.5939352794749944, "grad_norm": 0.6637397709643503, "learning_rate": 3.7363720152361436e-06, "loss": 0.3125, "step": 13123 }, { "epoch": 0.59398053858339, "grad_norm": 0.5809437749619315, "learning_rate": 3.735662899881385e-06, "loss": 0.2955, "step": 13124 }, { "epoch": 0.5940257976917854, "grad_norm": 0.6544272698464104, "learning_rate": 3.734953811693349e-06, "loss": 0.3139, "step": 13125 }, { "epoch": 0.594071056800181, "grad_norm": 1.2643189481680515, "learning_rate": 3.7342447506872686e-06, "loss": 0.3094, "step": 13126 }, { "epoch": 0.5941163159085766, "grad_norm": 0.6409913588796115, "learning_rate": 3.7335357168783802e-06, "loss": 0.2845, "step": 13127 }, { "epoch": 0.5941615750169722, "grad_norm": 0.7650230050867153, "learning_rate": 3.732826710281923e-06, "loss": 0.3311, "step": 13128 }, { "epoch": 0.5942068341253677, "grad_norm": 0.6220906941303066, "learning_rate": 3.7321177309131248e-06, "loss": 0.2978, "step": 13129 }, { "epoch": 0.5942520932337633, "grad_norm": 0.6174783888824636, "learning_rate": 3.7314087787872234e-06, "loss": 0.3028, "step": 13130 }, { "epoch": 0.5942973523421589, "grad_norm": 0.5795536682728678, "learning_rate": 3.73069985391945e-06, "loss": 0.2791, "step": 13131 }, { "epoch": 0.5943426114505544, "grad_norm": 0.5777705970662388, "learning_rate": 3.7299909563250414e-06, "loss": 0.2955, "step": 13132 }, { "epoch": 0.59438787055895, "grad_norm": 0.3486003160911572, "learning_rate": 3.7292820860192235e-06, "loss": 0.4909, "step": 13133 }, { "epoch": 0.5944331296673455, "grad_norm": 0.6233231953624803, "learning_rate": 3.7285732430172327e-06, "loss": 0.2864, "step": 13134 }, { "epoch": 0.5944783887757411, "grad_norm": 0.6304934226609963, "learning_rate": 3.7278644273342982e-06, "loss": 0.3719, "step": 13135 }, { "epoch": 0.5945236478841367, "grad_norm": 0.30752683520412194, "learning_rate": 3.7271556389856493e-06, "loss": 0.4767, "step": 13136 }, { "epoch": 0.5945689069925323, "grad_norm": 0.6703948879065504, "learning_rate": 3.726446877986516e-06, "loss": 0.3436, "step": 13137 }, { "epoch": 0.5946141661009278, "grad_norm": 0.29394166362208135, "learning_rate": 3.725738144352129e-06, "loss": 0.4707, "step": 13138 }, { "epoch": 0.5946594252093234, "grad_norm": 0.6364131387899856, "learning_rate": 3.725029438097715e-06, "loss": 0.3421, "step": 13139 }, { "epoch": 0.594704684317719, "grad_norm": 0.3072016357761098, "learning_rate": 3.7243207592385034e-06, "loss": 0.4662, "step": 13140 }, { "epoch": 0.5947499434261145, "grad_norm": 1.065624468877734, "learning_rate": 3.7236121077897208e-06, "loss": 0.3032, "step": 13141 }, { "epoch": 0.59479520253451, "grad_norm": 0.6361755726188255, "learning_rate": 3.7229034837665923e-06, "loss": 0.329, "step": 13142 }, { "epoch": 0.5948404616429056, "grad_norm": 0.6633173196046636, "learning_rate": 3.722194887184346e-06, "loss": 0.323, "step": 13143 }, { "epoch": 0.5948857207513012, "grad_norm": 0.39576080171503325, "learning_rate": 3.7214863180582085e-06, "loss": 0.4561, "step": 13144 }, { "epoch": 0.5949309798596968, "grad_norm": 1.0278169141323898, "learning_rate": 3.7207777764034027e-06, "loss": 0.2974, "step": 13145 }, { "epoch": 0.5949762389680924, "grad_norm": 0.6570809526656647, "learning_rate": 3.720069262235152e-06, "loss": 0.3285, "step": 13146 }, { "epoch": 0.5950214980764879, "grad_norm": 0.6269859847151987, "learning_rate": 3.7193607755686836e-06, "loss": 0.3014, "step": 13147 }, { "epoch": 0.5950667571848834, "grad_norm": 0.657802326899779, "learning_rate": 3.718652316419219e-06, "loss": 0.3585, "step": 13148 }, { "epoch": 0.595112016293279, "grad_norm": 0.5563274901862401, "learning_rate": 3.7179438848019805e-06, "loss": 0.3145, "step": 13149 }, { "epoch": 0.5951572754016746, "grad_norm": 0.3098645111637703, "learning_rate": 3.7172354807321894e-06, "loss": 0.4512, "step": 13150 }, { "epoch": 0.5952025345100701, "grad_norm": 0.3088313873530433, "learning_rate": 3.7165271042250706e-06, "loss": 0.4798, "step": 13151 }, { "epoch": 0.5952477936184657, "grad_norm": 0.6343676769371877, "learning_rate": 3.7158187552958403e-06, "loss": 0.2714, "step": 13152 }, { "epoch": 0.5952930527268613, "grad_norm": 0.6710306240092611, "learning_rate": 3.7151104339597212e-06, "loss": 0.2857, "step": 13153 }, { "epoch": 0.5953383118352569, "grad_norm": 0.27054839637529265, "learning_rate": 3.7144021402319334e-06, "loss": 0.4489, "step": 13154 }, { "epoch": 0.5953835709436525, "grad_norm": 0.6816851121029694, "learning_rate": 3.713693874127695e-06, "loss": 0.2968, "step": 13155 }, { "epoch": 0.5954288300520479, "grad_norm": 0.6553860649813443, "learning_rate": 3.712985635662223e-06, "loss": 0.3018, "step": 13156 }, { "epoch": 0.5954740891604435, "grad_norm": 0.6782676736421867, "learning_rate": 3.7122774248507386e-06, "loss": 0.304, "step": 13157 }, { "epoch": 0.5955193482688391, "grad_norm": 0.2797039203100852, "learning_rate": 3.7115692417084574e-06, "loss": 0.4485, "step": 13158 }, { "epoch": 0.5955646073772347, "grad_norm": 0.6062200687096395, "learning_rate": 3.7108610862505955e-06, "loss": 0.291, "step": 13159 }, { "epoch": 0.5956098664856302, "grad_norm": 0.5928153101844938, "learning_rate": 3.710152958492369e-06, "loss": 0.3194, "step": 13160 }, { "epoch": 0.5956551255940258, "grad_norm": 0.6381956679356376, "learning_rate": 3.7094448584489955e-06, "loss": 0.3465, "step": 13161 }, { "epoch": 0.5957003847024214, "grad_norm": 0.6051622048176114, "learning_rate": 3.708736786135687e-06, "loss": 0.2943, "step": 13162 }, { "epoch": 0.595745643810817, "grad_norm": 0.7120118076247374, "learning_rate": 3.70802874156766e-06, "loss": 0.2923, "step": 13163 }, { "epoch": 0.5957909029192124, "grad_norm": 0.3312549098297892, "learning_rate": 3.7073207247601285e-06, "loss": 0.4615, "step": 13164 }, { "epoch": 0.595836162027608, "grad_norm": 0.29657193989846825, "learning_rate": 3.7066127357283026e-06, "loss": 0.4679, "step": 13165 }, { "epoch": 0.5958814211360036, "grad_norm": 0.5841555972131169, "learning_rate": 3.705904774487396e-06, "loss": 0.3242, "step": 13166 }, { "epoch": 0.5959266802443992, "grad_norm": 0.28481066257332377, "learning_rate": 3.7051968410526236e-06, "loss": 0.4993, "step": 13167 }, { "epoch": 0.5959719393527948, "grad_norm": 0.6477331403565503, "learning_rate": 3.7044889354391934e-06, "loss": 0.3452, "step": 13168 }, { "epoch": 0.5960171984611903, "grad_norm": 0.6739577213503118, "learning_rate": 3.703781057662317e-06, "loss": 0.2652, "step": 13169 }, { "epoch": 0.5960624575695859, "grad_norm": 0.6052741040937766, "learning_rate": 3.703073207737205e-06, "loss": 0.3061, "step": 13170 }, { "epoch": 0.5961077166779815, "grad_norm": 0.5757953144746631, "learning_rate": 3.7023653856790655e-06, "loss": 0.2465, "step": 13171 }, { "epoch": 0.596152975786377, "grad_norm": 0.6197451940727606, "learning_rate": 3.7016575915031084e-06, "loss": 0.3131, "step": 13172 }, { "epoch": 0.5961982348947725, "grad_norm": 0.6442250708090359, "learning_rate": 3.700949825224544e-06, "loss": 0.3386, "step": 13173 }, { "epoch": 0.5962434940031681, "grad_norm": 0.6828610388809717, "learning_rate": 3.700242086858577e-06, "loss": 0.3504, "step": 13174 }, { "epoch": 0.5962887531115637, "grad_norm": 0.6245904625773707, "learning_rate": 3.6995343764204143e-06, "loss": 0.3109, "step": 13175 }, { "epoch": 0.5963340122199593, "grad_norm": 0.29816155391876076, "learning_rate": 3.6988266939252647e-06, "loss": 0.4575, "step": 13176 }, { "epoch": 0.5963792713283548, "grad_norm": 0.6163325957250416, "learning_rate": 3.698119039388335e-06, "loss": 0.3007, "step": 13177 }, { "epoch": 0.5964245304367504, "grad_norm": 0.5790691504496822, "learning_rate": 3.6974114128248274e-06, "loss": 0.2607, "step": 13178 }, { "epoch": 0.596469789545146, "grad_norm": 0.7598184284764053, "learning_rate": 3.696703814249947e-06, "loss": 0.3196, "step": 13179 }, { "epoch": 0.5965150486535415, "grad_norm": 0.2825882685277589, "learning_rate": 3.695996243678901e-06, "loss": 0.4382, "step": 13180 }, { "epoch": 0.5965603077619371, "grad_norm": 0.6359353171429746, "learning_rate": 3.6952887011268885e-06, "loss": 0.3261, "step": 13181 }, { "epoch": 0.5966055668703326, "grad_norm": 0.6116551042236428, "learning_rate": 3.6945811866091153e-06, "loss": 0.3117, "step": 13182 }, { "epoch": 0.5966508259787282, "grad_norm": 0.7821988598175718, "learning_rate": 3.6938737001407847e-06, "loss": 0.2932, "step": 13183 }, { "epoch": 0.5966960850871238, "grad_norm": 0.621388684245905, "learning_rate": 3.6931662417370956e-06, "loss": 0.3058, "step": 13184 }, { "epoch": 0.5967413441955194, "grad_norm": 0.5882159349528706, "learning_rate": 3.692458811413249e-06, "loss": 0.2793, "step": 13185 }, { "epoch": 0.5967866033039149, "grad_norm": 0.5827495491993245, "learning_rate": 3.6917514091844497e-06, "loss": 0.3099, "step": 13186 }, { "epoch": 0.5968318624123105, "grad_norm": 0.6505631041125741, "learning_rate": 3.691044035065893e-06, "loss": 0.3516, "step": 13187 }, { "epoch": 0.596877121520706, "grad_norm": 0.6166883268007635, "learning_rate": 3.6903366890727792e-06, "loss": 0.298, "step": 13188 }, { "epoch": 0.5969223806291016, "grad_norm": 0.6128453820552738, "learning_rate": 3.6896293712203075e-06, "loss": 0.2935, "step": 13189 }, { "epoch": 0.5969676397374972, "grad_norm": 0.6593897806724689, "learning_rate": 3.6889220815236776e-06, "loss": 0.3052, "step": 13190 }, { "epoch": 0.5970128988458927, "grad_norm": 0.6467674494601752, "learning_rate": 3.688214819998085e-06, "loss": 0.2967, "step": 13191 }, { "epoch": 0.5970581579542883, "grad_norm": 0.9026493624411818, "learning_rate": 3.687507586658726e-06, "loss": 0.3223, "step": 13192 }, { "epoch": 0.5971034170626839, "grad_norm": 0.35026014317474924, "learning_rate": 3.6868003815208003e-06, "loss": 0.4837, "step": 13193 }, { "epoch": 0.5971486761710795, "grad_norm": 0.6215915010677973, "learning_rate": 3.686093204599499e-06, "loss": 0.3092, "step": 13194 }, { "epoch": 0.597193935279475, "grad_norm": 0.6232982673485767, "learning_rate": 3.68538605591002e-06, "loss": 0.2798, "step": 13195 }, { "epoch": 0.5972391943878705, "grad_norm": 0.6129914831636445, "learning_rate": 3.6846789354675584e-06, "loss": 0.3021, "step": 13196 }, { "epoch": 0.5972844534962661, "grad_norm": 0.6216919708011899, "learning_rate": 3.683971843287305e-06, "loss": 0.2759, "step": 13197 }, { "epoch": 0.5973297126046617, "grad_norm": 0.6521665296020159, "learning_rate": 3.6832647793844557e-06, "loss": 0.2936, "step": 13198 }, { "epoch": 0.5973749717130572, "grad_norm": 0.2537563545628852, "learning_rate": 3.6825577437742028e-06, "loss": 0.479, "step": 13199 }, { "epoch": 0.5974202308214528, "grad_norm": 0.5903977732511272, "learning_rate": 3.681850736471736e-06, "loss": 0.2845, "step": 13200 }, { "epoch": 0.5974654899298484, "grad_norm": 0.639447725295969, "learning_rate": 3.6811437574922494e-06, "loss": 0.326, "step": 13201 }, { "epoch": 0.597510749038244, "grad_norm": 0.6507427678934059, "learning_rate": 3.680436806850933e-06, "loss": 0.36, "step": 13202 }, { "epoch": 0.5975560081466396, "grad_norm": 0.6147854529236003, "learning_rate": 3.6797298845629776e-06, "loss": 0.3502, "step": 13203 }, { "epoch": 0.597601267255035, "grad_norm": 0.6453083421397205, "learning_rate": 3.6790229906435706e-06, "loss": 0.3185, "step": 13204 }, { "epoch": 0.5976465263634306, "grad_norm": 0.2641602153652307, "learning_rate": 3.6783161251079026e-06, "loss": 0.4633, "step": 13205 }, { "epoch": 0.5976917854718262, "grad_norm": 0.6993737385876693, "learning_rate": 3.677609287971163e-06, "loss": 0.3133, "step": 13206 }, { "epoch": 0.5977370445802218, "grad_norm": 0.6268767172580938, "learning_rate": 3.676902479248538e-06, "loss": 0.3749, "step": 13207 }, { "epoch": 0.5977823036886173, "grad_norm": 0.27262484330790376, "learning_rate": 3.6761956989552138e-06, "loss": 0.4625, "step": 13208 }, { "epoch": 0.5978275627970129, "grad_norm": 0.8495143981164707, "learning_rate": 3.6754889471063814e-06, "loss": 0.2837, "step": 13209 }, { "epoch": 0.5978728219054085, "grad_norm": 0.33892822582460136, "learning_rate": 3.6747822237172204e-06, "loss": 0.4842, "step": 13210 }, { "epoch": 0.5979180810138041, "grad_norm": 0.6494562517896424, "learning_rate": 3.6740755288029206e-06, "loss": 0.3122, "step": 13211 }, { "epoch": 0.5979633401221995, "grad_norm": 0.6489746897269669, "learning_rate": 3.6733688623786667e-06, "loss": 0.2886, "step": 13212 }, { "epoch": 0.5980085992305951, "grad_norm": 0.27422493756120675, "learning_rate": 3.67266222445964e-06, "loss": 0.4729, "step": 13213 }, { "epoch": 0.5980538583389907, "grad_norm": 0.5409356168569753, "learning_rate": 3.6719556150610243e-06, "loss": 0.2931, "step": 13214 }, { "epoch": 0.5980991174473863, "grad_norm": 0.6618889061564469, "learning_rate": 3.6712490341980057e-06, "loss": 0.306, "step": 13215 }, { "epoch": 0.5981443765557819, "grad_norm": 0.27558100516395273, "learning_rate": 3.6705424818857636e-06, "loss": 0.4711, "step": 13216 }, { "epoch": 0.5981896356641774, "grad_norm": 0.26551145333875725, "learning_rate": 3.6698359581394803e-06, "loss": 0.4569, "step": 13217 }, { "epoch": 0.598234894772573, "grad_norm": 0.6406255430648299, "learning_rate": 3.669129462974337e-06, "loss": 0.309, "step": 13218 }, { "epoch": 0.5982801538809686, "grad_norm": 0.6131297048044994, "learning_rate": 3.668422996405515e-06, "loss": 0.3046, "step": 13219 }, { "epoch": 0.5983254129893641, "grad_norm": 0.7123061004163291, "learning_rate": 3.667716558448192e-06, "loss": 0.3683, "step": 13220 }, { "epoch": 0.5983706720977596, "grad_norm": 0.4038625913971144, "learning_rate": 3.667010149117549e-06, "loss": 0.4673, "step": 13221 }, { "epoch": 0.5984159312061552, "grad_norm": 0.635090127562506, "learning_rate": 3.666303768428765e-06, "loss": 0.2671, "step": 13222 }, { "epoch": 0.5984611903145508, "grad_norm": 0.8239338249393553, "learning_rate": 3.665597416397014e-06, "loss": 0.3068, "step": 13223 }, { "epoch": 0.5985064494229464, "grad_norm": 0.6299322564454998, "learning_rate": 3.6648910930374783e-06, "loss": 0.3097, "step": 13224 }, { "epoch": 0.598551708531342, "grad_norm": 0.28881756200221953, "learning_rate": 3.6641847983653326e-06, "loss": 0.4702, "step": 13225 }, { "epoch": 0.5985969676397375, "grad_norm": 0.5770655353848592, "learning_rate": 3.6634785323957522e-06, "loss": 0.3299, "step": 13226 }, { "epoch": 0.598642226748133, "grad_norm": 0.5883840900394046, "learning_rate": 3.6627722951439125e-06, "loss": 0.2902, "step": 13227 }, { "epoch": 0.5986874858565286, "grad_norm": 0.6533817489703067, "learning_rate": 3.6620660866249922e-06, "loss": 0.2909, "step": 13228 }, { "epoch": 0.5987327449649242, "grad_norm": 0.6342454950558009, "learning_rate": 3.66135990685416e-06, "loss": 0.3526, "step": 13229 }, { "epoch": 0.5987780040733197, "grad_norm": 0.6688804477128425, "learning_rate": 3.6606537558465925e-06, "loss": 0.3083, "step": 13230 }, { "epoch": 0.5988232631817153, "grad_norm": 0.6621555789786193, "learning_rate": 3.6599476336174622e-06, "loss": 0.3435, "step": 13231 }, { "epoch": 0.5988685222901109, "grad_norm": 0.614671513931057, "learning_rate": 3.659241540181943e-06, "loss": 0.3667, "step": 13232 }, { "epoch": 0.5989137813985065, "grad_norm": 0.5872663880952672, "learning_rate": 3.6585354755552032e-06, "loss": 0.3283, "step": 13233 }, { "epoch": 0.598959040506902, "grad_norm": 0.6158303410100913, "learning_rate": 3.6578294397524174e-06, "loss": 0.3528, "step": 13234 }, { "epoch": 0.5990042996152976, "grad_norm": 0.6203668634959106, "learning_rate": 3.657123432788755e-06, "loss": 0.3133, "step": 13235 }, { "epoch": 0.5990495587236931, "grad_norm": 0.6394831186748366, "learning_rate": 3.656417454679385e-06, "loss": 0.3182, "step": 13236 }, { "epoch": 0.5990948178320887, "grad_norm": 0.30639042230507724, "learning_rate": 3.6557115054394764e-06, "loss": 0.4717, "step": 13237 }, { "epoch": 0.5991400769404843, "grad_norm": 0.29415688444257554, "learning_rate": 3.655005585084202e-06, "loss": 0.4776, "step": 13238 }, { "epoch": 0.5991853360488798, "grad_norm": 0.8168793453305554, "learning_rate": 3.6542996936287233e-06, "loss": 0.3387, "step": 13239 }, { "epoch": 0.5992305951572754, "grad_norm": 0.6441091079639482, "learning_rate": 3.6535938310882124e-06, "loss": 0.3146, "step": 13240 }, { "epoch": 0.599275854265671, "grad_norm": 0.6282743875406009, "learning_rate": 3.6528879974778365e-06, "loss": 0.3021, "step": 13241 }, { "epoch": 0.5993211133740666, "grad_norm": 0.6778664796167693, "learning_rate": 3.6521821928127588e-06, "loss": 0.3358, "step": 13242 }, { "epoch": 0.599366372482462, "grad_norm": 0.27633099760908875, "learning_rate": 3.6514764171081454e-06, "loss": 0.4482, "step": 13243 }, { "epoch": 0.5994116315908576, "grad_norm": 0.6270164059202556, "learning_rate": 3.6507706703791624e-06, "loss": 0.2861, "step": 13244 }, { "epoch": 0.5994568906992532, "grad_norm": 0.5866021898261772, "learning_rate": 3.650064952640976e-06, "loss": 0.3334, "step": 13245 }, { "epoch": 0.5995021498076488, "grad_norm": 0.627212468249608, "learning_rate": 3.649359263908746e-06, "loss": 0.2756, "step": 13246 }, { "epoch": 0.5995474089160443, "grad_norm": 0.6358470020069243, "learning_rate": 3.6486536041976362e-06, "loss": 0.2926, "step": 13247 }, { "epoch": 0.5995926680244399, "grad_norm": 0.6168356613463569, "learning_rate": 3.6479479735228117e-06, "loss": 0.2641, "step": 13248 }, { "epoch": 0.5996379271328355, "grad_norm": 0.3012527809032249, "learning_rate": 3.6472423718994326e-06, "loss": 0.4825, "step": 13249 }, { "epoch": 0.5996831862412311, "grad_norm": 0.6151714677223123, "learning_rate": 3.6465367993426603e-06, "loss": 0.3112, "step": 13250 }, { "epoch": 0.5997284453496267, "grad_norm": 0.6661811042765926, "learning_rate": 3.6458312558676555e-06, "loss": 0.2875, "step": 13251 }, { "epoch": 0.5997737044580221, "grad_norm": 0.6218647748077784, "learning_rate": 3.6451257414895767e-06, "loss": 0.3149, "step": 13252 }, { "epoch": 0.5998189635664177, "grad_norm": 0.5646580107053886, "learning_rate": 3.6444202562235854e-06, "loss": 0.3269, "step": 13253 }, { "epoch": 0.5998642226748133, "grad_norm": 0.6234519410493783, "learning_rate": 3.6437148000848404e-06, "loss": 0.3071, "step": 13254 }, { "epoch": 0.5999094817832089, "grad_norm": 0.6258900034740645, "learning_rate": 3.6430093730884973e-06, "loss": 0.3069, "step": 13255 }, { "epoch": 0.5999547408916044, "grad_norm": 0.30236203249057025, "learning_rate": 3.6423039752497146e-06, "loss": 0.4885, "step": 13256 }, { "epoch": 0.6, "grad_norm": 0.8056969703403314, "learning_rate": 3.641598606583653e-06, "loss": 0.3121, "step": 13257 }, { "epoch": 0.6000452591083956, "grad_norm": 0.623021541836843, "learning_rate": 3.640893267105462e-06, "loss": 0.2839, "step": 13258 }, { "epoch": 0.6000905182167912, "grad_norm": 0.654267609693474, "learning_rate": 3.6401879568303013e-06, "loss": 0.2896, "step": 13259 }, { "epoch": 0.6001357773251867, "grad_norm": 0.7279223216549868, "learning_rate": 3.639482675773324e-06, "loss": 0.2611, "step": 13260 }, { "epoch": 0.6001810364335822, "grad_norm": 0.7553416821907419, "learning_rate": 3.6387774239496893e-06, "loss": 0.3153, "step": 13261 }, { "epoch": 0.6002262955419778, "grad_norm": 0.6802481482404504, "learning_rate": 3.6380722013745434e-06, "loss": 0.3622, "step": 13262 }, { "epoch": 0.6002715546503734, "grad_norm": 0.6082365759513287, "learning_rate": 3.637367008063044e-06, "loss": 0.3007, "step": 13263 }, { "epoch": 0.600316813758769, "grad_norm": 0.5913283821044162, "learning_rate": 3.6366618440303436e-06, "loss": 0.2381, "step": 13264 }, { "epoch": 0.6003620728671645, "grad_norm": 0.7278794020979359, "learning_rate": 3.6359567092915928e-06, "loss": 0.3526, "step": 13265 }, { "epoch": 0.6004073319755601, "grad_norm": 0.785940979506602, "learning_rate": 3.635251603861941e-06, "loss": 0.3553, "step": 13266 }, { "epoch": 0.6004525910839557, "grad_norm": 0.6008767130405239, "learning_rate": 3.6345465277565427e-06, "loss": 0.3146, "step": 13267 }, { "epoch": 0.6004978501923512, "grad_norm": 0.967970368322063, "learning_rate": 3.6338414809905453e-06, "loss": 0.3412, "step": 13268 }, { "epoch": 0.6005431093007467, "grad_norm": 0.6307483799109639, "learning_rate": 3.633136463579099e-06, "loss": 0.287, "step": 13269 }, { "epoch": 0.6005883684091423, "grad_norm": 0.600574822035914, "learning_rate": 3.6324314755373523e-06, "loss": 0.3575, "step": 13270 }, { "epoch": 0.6006336275175379, "grad_norm": 0.29013622581752974, "learning_rate": 3.6317265168804526e-06, "loss": 0.4642, "step": 13271 }, { "epoch": 0.6006788866259335, "grad_norm": 0.6462805277268697, "learning_rate": 3.631021587623547e-06, "loss": 0.3476, "step": 13272 }, { "epoch": 0.6007241457343291, "grad_norm": 0.6454512431625602, "learning_rate": 3.630316687781783e-06, "loss": 0.33, "step": 13273 }, { "epoch": 0.6007694048427246, "grad_norm": 0.6181042514441442, "learning_rate": 3.6296118173703075e-06, "loss": 0.3078, "step": 13274 }, { "epoch": 0.6008146639511202, "grad_norm": 0.6651883769771438, "learning_rate": 3.628906976404265e-06, "loss": 0.3061, "step": 13275 }, { "epoch": 0.6008599230595157, "grad_norm": 0.6182735386264937, "learning_rate": 3.6282021648988e-06, "loss": 0.3268, "step": 13276 }, { "epoch": 0.6009051821679113, "grad_norm": 0.7468317605762024, "learning_rate": 3.6274973828690584e-06, "loss": 0.3354, "step": 13277 }, { "epoch": 0.6009504412763068, "grad_norm": 0.6376219336618065, "learning_rate": 3.6267926303301827e-06, "loss": 0.3057, "step": 13278 }, { "epoch": 0.6009957003847024, "grad_norm": 0.5961662944166821, "learning_rate": 3.6260879072973155e-06, "loss": 0.3278, "step": 13279 }, { "epoch": 0.601040959493098, "grad_norm": 0.590256658116722, "learning_rate": 3.6253832137856e-06, "loss": 0.3189, "step": 13280 }, { "epoch": 0.6010862186014936, "grad_norm": 0.6627626694142456, "learning_rate": 3.6246785498101754e-06, "loss": 0.3412, "step": 13281 }, { "epoch": 0.6011314777098891, "grad_norm": 0.7990817075726195, "learning_rate": 3.6239739153861863e-06, "loss": 0.3421, "step": 13282 }, { "epoch": 0.6011767368182847, "grad_norm": 0.6661198485629423, "learning_rate": 3.623269310528773e-06, "loss": 0.2819, "step": 13283 }, { "epoch": 0.6012219959266802, "grad_norm": 0.3169460283991881, "learning_rate": 3.622564735253072e-06, "loss": 0.5014, "step": 13284 }, { "epoch": 0.6012672550350758, "grad_norm": 0.6367798706528928, "learning_rate": 3.6218601895742234e-06, "loss": 0.3107, "step": 13285 }, { "epoch": 0.6013125141434714, "grad_norm": 0.6489505146493503, "learning_rate": 3.6211556735073704e-06, "loss": 0.3321, "step": 13286 }, { "epoch": 0.6013577732518669, "grad_norm": 0.30221069429949665, "learning_rate": 3.620451187067644e-06, "loss": 0.4626, "step": 13287 }, { "epoch": 0.6014030323602625, "grad_norm": 0.6338490368445091, "learning_rate": 3.619746730270185e-06, "loss": 0.2908, "step": 13288 }, { "epoch": 0.6014482914686581, "grad_norm": 0.6348180019316083, "learning_rate": 3.619042303130129e-06, "loss": 0.3412, "step": 13289 }, { "epoch": 0.6014935505770537, "grad_norm": 0.2727124763310069, "learning_rate": 3.618337905662616e-06, "loss": 0.4547, "step": 13290 }, { "epoch": 0.6015388096854491, "grad_norm": 0.6508143398730285, "learning_rate": 3.6176335378827747e-06, "loss": 0.3109, "step": 13291 }, { "epoch": 0.6015840687938447, "grad_norm": 0.8292016353801743, "learning_rate": 3.616929199805744e-06, "loss": 0.4804, "step": 13292 }, { "epoch": 0.6016293279022403, "grad_norm": 0.6238778648347993, "learning_rate": 3.616224891446658e-06, "loss": 0.2883, "step": 13293 }, { "epoch": 0.6016745870106359, "grad_norm": 0.6587510313119597, "learning_rate": 3.615520612820649e-06, "loss": 0.354, "step": 13294 }, { "epoch": 0.6017198461190315, "grad_norm": 0.6311257520628122, "learning_rate": 3.6148163639428475e-06, "loss": 0.3445, "step": 13295 }, { "epoch": 0.601765105227427, "grad_norm": 0.6216525729643326, "learning_rate": 3.6141121448283904e-06, "loss": 0.3234, "step": 13296 }, { "epoch": 0.6018103643358226, "grad_norm": 0.7357679569482551, "learning_rate": 3.6134079554924062e-06, "loss": 0.3193, "step": 13297 }, { "epoch": 0.6018556234442182, "grad_norm": 0.3342103094602393, "learning_rate": 3.6127037959500267e-06, "loss": 0.4836, "step": 13298 }, { "epoch": 0.6019008825526138, "grad_norm": 0.6043645496196589, "learning_rate": 3.6119996662163824e-06, "loss": 0.3104, "step": 13299 }, { "epoch": 0.6019461416610092, "grad_norm": 0.32463802624158095, "learning_rate": 3.6112955663066008e-06, "loss": 0.4925, "step": 13300 }, { "epoch": 0.6019914007694048, "grad_norm": 0.6827651046445397, "learning_rate": 3.610591496235813e-06, "loss": 0.3298, "step": 13301 }, { "epoch": 0.6020366598778004, "grad_norm": 0.6881451561163642, "learning_rate": 3.6098874560191465e-06, "loss": 0.3104, "step": 13302 }, { "epoch": 0.602081918986196, "grad_norm": 0.2740287878315835, "learning_rate": 3.609183445671731e-06, "loss": 0.4674, "step": 13303 }, { "epoch": 0.6021271780945915, "grad_norm": 0.6265971913851386, "learning_rate": 3.6084794652086892e-06, "loss": 0.2934, "step": 13304 }, { "epoch": 0.6021724372029871, "grad_norm": 0.27654597975083506, "learning_rate": 3.607775514645151e-06, "loss": 0.4862, "step": 13305 }, { "epoch": 0.6022176963113827, "grad_norm": 0.6190244001689355, "learning_rate": 3.607071593996242e-06, "loss": 0.3091, "step": 13306 }, { "epoch": 0.6022629554197783, "grad_norm": 0.6018265444032312, "learning_rate": 3.606367703277085e-06, "loss": 0.3189, "step": 13307 }, { "epoch": 0.6023082145281738, "grad_norm": 0.6326827208686144, "learning_rate": 3.6056638425028068e-06, "loss": 0.2785, "step": 13308 }, { "epoch": 0.6023534736365693, "grad_norm": 1.0066003674078408, "learning_rate": 3.6049600116885307e-06, "loss": 0.3024, "step": 13309 }, { "epoch": 0.6023987327449649, "grad_norm": 0.3089674950974842, "learning_rate": 3.6042562108493772e-06, "loss": 0.4698, "step": 13310 }, { "epoch": 0.6024439918533605, "grad_norm": 0.6178521392338734, "learning_rate": 3.603552440000472e-06, "loss": 0.2933, "step": 13311 }, { "epoch": 0.6024892509617561, "grad_norm": 0.6770493087166252, "learning_rate": 3.6028486991569376e-06, "loss": 0.3329, "step": 13312 }, { "epoch": 0.6025345100701516, "grad_norm": 0.6601070659367079, "learning_rate": 3.6021449883338923e-06, "loss": 0.3046, "step": 13313 }, { "epoch": 0.6025797691785472, "grad_norm": 0.6320469520340588, "learning_rate": 3.6014413075464573e-06, "loss": 0.3644, "step": 13314 }, { "epoch": 0.6026250282869428, "grad_norm": 0.6056737659973819, "learning_rate": 3.600737656809754e-06, "loss": 0.2946, "step": 13315 }, { "epoch": 0.6026702873953383, "grad_norm": 0.5849266824543456, "learning_rate": 3.600034036138902e-06, "loss": 0.2944, "step": 13316 }, { "epoch": 0.6027155465037338, "grad_norm": 0.30346221504831905, "learning_rate": 3.5993304455490173e-06, "loss": 0.4546, "step": 13317 }, { "epoch": 0.6027608056121294, "grad_norm": 0.6478262489689017, "learning_rate": 3.598626885055219e-06, "loss": 0.3263, "step": 13318 }, { "epoch": 0.602806064720525, "grad_norm": 0.6031657905281659, "learning_rate": 3.597923354672628e-06, "loss": 0.2907, "step": 13319 }, { "epoch": 0.6028513238289206, "grad_norm": 0.6356314317536731, "learning_rate": 3.597219854416355e-06, "loss": 0.3628, "step": 13320 }, { "epoch": 0.6028965829373162, "grad_norm": 0.2929946520224333, "learning_rate": 3.59651638430152e-06, "loss": 0.4826, "step": 13321 }, { "epoch": 0.6029418420457117, "grad_norm": 0.2812105966859869, "learning_rate": 3.595812944343239e-06, "loss": 0.4851, "step": 13322 }, { "epoch": 0.6029871011541073, "grad_norm": 0.6210192622739172, "learning_rate": 3.5951095345566232e-06, "loss": 0.3125, "step": 13323 }, { "epoch": 0.6030323602625028, "grad_norm": 0.6331813274922533, "learning_rate": 3.5944061549567876e-06, "loss": 0.2914, "step": 13324 }, { "epoch": 0.6030776193708984, "grad_norm": 0.6672025406942507, "learning_rate": 3.59370280555885e-06, "loss": 0.2978, "step": 13325 }, { "epoch": 0.6031228784792939, "grad_norm": 0.2833025363843245, "learning_rate": 3.592999486377918e-06, "loss": 0.4624, "step": 13326 }, { "epoch": 0.6031681375876895, "grad_norm": 0.6844554810860389, "learning_rate": 3.592296197429106e-06, "loss": 0.3058, "step": 13327 }, { "epoch": 0.6032133966960851, "grad_norm": 0.2904387392592274, "learning_rate": 3.591592938727526e-06, "loss": 0.5005, "step": 13328 }, { "epoch": 0.6032586558044807, "grad_norm": 0.6018361904817027, "learning_rate": 3.5908897102882868e-06, "loss": 0.2839, "step": 13329 }, { "epoch": 0.6033039149128763, "grad_norm": 0.2586940970741543, "learning_rate": 3.5901865121265e-06, "loss": 0.4737, "step": 13330 }, { "epoch": 0.6033491740212718, "grad_norm": 0.27398799440128146, "learning_rate": 3.5894833442572763e-06, "loss": 0.4526, "step": 13331 }, { "epoch": 0.6033944331296673, "grad_norm": 0.622998206410414, "learning_rate": 3.588780206695724e-06, "loss": 0.3301, "step": 13332 }, { "epoch": 0.6034396922380629, "grad_norm": 0.6808638843999142, "learning_rate": 3.5880770994569485e-06, "loss": 0.313, "step": 13333 }, { "epoch": 0.6034849513464585, "grad_norm": 0.6318067758791139, "learning_rate": 3.587374022556061e-06, "loss": 0.2803, "step": 13334 }, { "epoch": 0.603530210454854, "grad_norm": 0.6496949493400798, "learning_rate": 3.5866709760081684e-06, "loss": 0.3366, "step": 13335 }, { "epoch": 0.6035754695632496, "grad_norm": 0.6469781095008104, "learning_rate": 3.585967959828375e-06, "loss": 0.33, "step": 13336 }, { "epoch": 0.6036207286716452, "grad_norm": 0.6061078384711173, "learning_rate": 3.5852649740317858e-06, "loss": 0.3142, "step": 13337 }, { "epoch": 0.6036659877800408, "grad_norm": 0.6063965955355926, "learning_rate": 3.58456201863351e-06, "loss": 0.3582, "step": 13338 }, { "epoch": 0.6037112468884362, "grad_norm": 0.6221814436406296, "learning_rate": 3.5838590936486467e-06, "loss": 0.2983, "step": 13339 }, { "epoch": 0.6037565059968318, "grad_norm": 0.6344451011991756, "learning_rate": 3.583156199092303e-06, "loss": 0.3154, "step": 13340 }, { "epoch": 0.6038017651052274, "grad_norm": 0.5959380289884578, "learning_rate": 3.582453334979582e-06, "loss": 0.3203, "step": 13341 }, { "epoch": 0.603847024213623, "grad_norm": 0.6284294962048357, "learning_rate": 3.5817505013255847e-06, "loss": 0.316, "step": 13342 }, { "epoch": 0.6038922833220186, "grad_norm": 0.6991259807827724, "learning_rate": 3.581047698145412e-06, "loss": 0.3196, "step": 13343 }, { "epoch": 0.6039375424304141, "grad_norm": 0.6036458234344553, "learning_rate": 3.580344925454167e-06, "loss": 0.2832, "step": 13344 }, { "epoch": 0.6039828015388097, "grad_norm": 0.6094023062667671, "learning_rate": 3.5796421832669503e-06, "loss": 0.313, "step": 13345 }, { "epoch": 0.6040280606472053, "grad_norm": 0.6116601881054674, "learning_rate": 3.5789394715988602e-06, "loss": 0.358, "step": 13346 }, { "epoch": 0.6040733197556009, "grad_norm": 0.33519227315021916, "learning_rate": 3.578236790464995e-06, "loss": 0.4588, "step": 13347 }, { "epoch": 0.6041185788639963, "grad_norm": 1.018762962468953, "learning_rate": 3.5775341398804585e-06, "loss": 0.3414, "step": 13348 }, { "epoch": 0.6041638379723919, "grad_norm": 0.6323299652027151, "learning_rate": 3.576831519860341e-06, "loss": 0.3244, "step": 13349 }, { "epoch": 0.6042090970807875, "grad_norm": 0.6310873421400982, "learning_rate": 3.576128930419744e-06, "loss": 0.2797, "step": 13350 }, { "epoch": 0.6042543561891831, "grad_norm": 0.2841091768586345, "learning_rate": 3.575426371573764e-06, "loss": 0.4488, "step": 13351 }, { "epoch": 0.6042996152975786, "grad_norm": 0.6497447780283677, "learning_rate": 3.5747238433374952e-06, "loss": 0.2842, "step": 13352 }, { "epoch": 0.6043448744059742, "grad_norm": 0.6442050756032109, "learning_rate": 3.5740213457260333e-06, "loss": 0.3004, "step": 13353 }, { "epoch": 0.6043901335143698, "grad_norm": 0.6022216934016352, "learning_rate": 3.573318878754475e-06, "loss": 0.2854, "step": 13354 }, { "epoch": 0.6044353926227654, "grad_norm": 0.6266066483654207, "learning_rate": 3.5726164424379106e-06, "loss": 0.3581, "step": 13355 }, { "epoch": 0.604480651731161, "grad_norm": 0.2971858300753773, "learning_rate": 3.571914036791435e-06, "loss": 0.4978, "step": 13356 }, { "epoch": 0.6045259108395564, "grad_norm": 0.6109185868269074, "learning_rate": 3.571211661830142e-06, "loss": 0.316, "step": 13357 }, { "epoch": 0.604571169947952, "grad_norm": 0.6521278981812285, "learning_rate": 3.5705093175691195e-06, "loss": 0.2798, "step": 13358 }, { "epoch": 0.6046164290563476, "grad_norm": 0.6130984616975004, "learning_rate": 3.5698070040234633e-06, "loss": 0.3216, "step": 13359 }, { "epoch": 0.6046616881647432, "grad_norm": 0.6458711888618369, "learning_rate": 3.569104721208262e-06, "loss": 0.3053, "step": 13360 }, { "epoch": 0.6047069472731387, "grad_norm": 0.585594980145394, "learning_rate": 3.5684024691386067e-06, "loss": 0.3134, "step": 13361 }, { "epoch": 0.6047522063815343, "grad_norm": 0.6469463908589563, "learning_rate": 3.567700247829583e-06, "loss": 0.3302, "step": 13362 }, { "epoch": 0.6047974654899299, "grad_norm": 0.6154644667480161, "learning_rate": 3.5669980572962836e-06, "loss": 0.2925, "step": 13363 }, { "epoch": 0.6048427245983254, "grad_norm": 0.6104621738919574, "learning_rate": 3.5662958975537955e-06, "loss": 0.2968, "step": 13364 }, { "epoch": 0.604887983706721, "grad_norm": 0.6360070530683783, "learning_rate": 3.5655937686172037e-06, "loss": 0.3088, "step": 13365 }, { "epoch": 0.6049332428151165, "grad_norm": 0.5661342734041364, "learning_rate": 3.5648916705015964e-06, "loss": 0.3785, "step": 13366 }, { "epoch": 0.6049785019235121, "grad_norm": 0.5702960130273277, "learning_rate": 3.5641896032220626e-06, "loss": 0.3104, "step": 13367 }, { "epoch": 0.6050237610319077, "grad_norm": 0.6131253656760342, "learning_rate": 3.5634875667936803e-06, "loss": 0.3048, "step": 13368 }, { "epoch": 0.6050690201403033, "grad_norm": 0.6174377700558944, "learning_rate": 3.56278556123154e-06, "loss": 0.3033, "step": 13369 }, { "epoch": 0.6051142792486988, "grad_norm": 0.6471682654144832, "learning_rate": 3.562083586550725e-06, "loss": 0.3133, "step": 13370 }, { "epoch": 0.6051595383570944, "grad_norm": 0.36123504228980124, "learning_rate": 3.5613816427663162e-06, "loss": 0.4485, "step": 13371 }, { "epoch": 0.6052047974654899, "grad_norm": 0.6044077284848175, "learning_rate": 3.5606797298933967e-06, "loss": 0.2823, "step": 13372 }, { "epoch": 0.6052500565738855, "grad_norm": 0.6937280255969518, "learning_rate": 3.5599778479470498e-06, "loss": 0.295, "step": 13373 }, { "epoch": 0.605295315682281, "grad_norm": 0.6140668102863763, "learning_rate": 3.5592759969423573e-06, "loss": 0.2912, "step": 13374 }, { "epoch": 0.6053405747906766, "grad_norm": 0.6584099771068982, "learning_rate": 3.5585741768943982e-06, "loss": 0.3286, "step": 13375 }, { "epoch": 0.6053858338990722, "grad_norm": 0.613803210549905, "learning_rate": 3.5578723878182518e-06, "loss": 0.3716, "step": 13376 }, { "epoch": 0.6054310930074678, "grad_norm": 0.7060930148845469, "learning_rate": 3.557170629729001e-06, "loss": 0.3128, "step": 13377 }, { "epoch": 0.6054763521158634, "grad_norm": 0.6792705093090348, "learning_rate": 3.556468902641721e-06, "loss": 0.3283, "step": 13378 }, { "epoch": 0.6055216112242588, "grad_norm": 0.8428630717752048, "learning_rate": 3.555767206571491e-06, "loss": 0.3075, "step": 13379 }, { "epoch": 0.6055668703326544, "grad_norm": 0.5752766645066538, "learning_rate": 3.555065541533389e-06, "loss": 0.3179, "step": 13380 }, { "epoch": 0.60561212944105, "grad_norm": 0.3290549372923084, "learning_rate": 3.5543639075424897e-06, "loss": 0.4824, "step": 13381 }, { "epoch": 0.6056573885494456, "grad_norm": 0.6225255968894837, "learning_rate": 3.5536623046138685e-06, "loss": 0.3344, "step": 13382 }, { "epoch": 0.6057026476578411, "grad_norm": 0.5772962530922928, "learning_rate": 3.552960732762605e-06, "loss": 0.2937, "step": 13383 }, { "epoch": 0.6057479067662367, "grad_norm": 0.28994310594926126, "learning_rate": 3.5522591920037698e-06, "loss": 0.4464, "step": 13384 }, { "epoch": 0.6057931658746323, "grad_norm": 0.7948689428509049, "learning_rate": 3.5515576823524377e-06, "loss": 0.3062, "step": 13385 }, { "epoch": 0.6058384249830279, "grad_norm": 0.6432547158435971, "learning_rate": 3.5508562038236817e-06, "loss": 0.3117, "step": 13386 }, { "epoch": 0.6058836840914233, "grad_norm": 0.713155852051197, "learning_rate": 3.5501547564325777e-06, "loss": 0.3015, "step": 13387 }, { "epoch": 0.6059289431998189, "grad_norm": 0.5769908367074335, "learning_rate": 3.549453340194194e-06, "loss": 0.3255, "step": 13388 }, { "epoch": 0.6059742023082145, "grad_norm": 0.5942258901578376, "learning_rate": 3.5487519551236025e-06, "loss": 0.3209, "step": 13389 }, { "epoch": 0.6060194614166101, "grad_norm": 0.29522903900146297, "learning_rate": 3.548050601235876e-06, "loss": 0.4456, "step": 13390 }, { "epoch": 0.6060647205250057, "grad_norm": 0.599766040405492, "learning_rate": 3.54734927854608e-06, "loss": 0.3175, "step": 13391 }, { "epoch": 0.6061099796334012, "grad_norm": 0.6533666125281052, "learning_rate": 3.5466479870692883e-06, "loss": 0.3344, "step": 13392 }, { "epoch": 0.6061552387417968, "grad_norm": 0.3529903275260208, "learning_rate": 3.5459467268205683e-06, "loss": 0.4758, "step": 13393 }, { "epoch": 0.6062004978501924, "grad_norm": 0.3486681973081686, "learning_rate": 3.5452454978149864e-06, "loss": 0.4692, "step": 13394 }, { "epoch": 0.606245756958588, "grad_norm": 0.6403340211996071, "learning_rate": 3.5445443000676096e-06, "loss": 0.3866, "step": 13395 }, { "epoch": 0.6062910160669834, "grad_norm": 0.6369882531039904, "learning_rate": 3.543843133593509e-06, "loss": 0.2986, "step": 13396 }, { "epoch": 0.606336275175379, "grad_norm": 0.6608157083343831, "learning_rate": 3.5431419984077444e-06, "loss": 0.3006, "step": 13397 }, { "epoch": 0.6063815342837746, "grad_norm": 0.6016818949787632, "learning_rate": 3.542440894525384e-06, "loss": 0.3066, "step": 13398 }, { "epoch": 0.6064267933921702, "grad_norm": 0.6261206840159689, "learning_rate": 3.541739821961494e-06, "loss": 0.3113, "step": 13399 }, { "epoch": 0.6064720525005657, "grad_norm": 0.3348048032808948, "learning_rate": 3.5410387807311353e-06, "loss": 0.4883, "step": 13400 }, { "epoch": 0.6065173116089613, "grad_norm": 0.6042801979473782, "learning_rate": 3.5403377708493714e-06, "loss": 0.3508, "step": 13401 }, { "epoch": 0.6065625707173569, "grad_norm": 0.30132407700194175, "learning_rate": 3.539636792331267e-06, "loss": 0.4753, "step": 13402 }, { "epoch": 0.6066078298257525, "grad_norm": 0.645422373017156, "learning_rate": 3.538935845191884e-06, "loss": 0.3028, "step": 13403 }, { "epoch": 0.606653088934148, "grad_norm": 0.6487120512379727, "learning_rate": 3.5382349294462803e-06, "loss": 0.3165, "step": 13404 }, { "epoch": 0.6066983480425435, "grad_norm": 0.6076635547412993, "learning_rate": 3.5375340451095186e-06, "loss": 0.3097, "step": 13405 }, { "epoch": 0.6067436071509391, "grad_norm": 0.6358029989813431, "learning_rate": 3.53683319219666e-06, "loss": 0.2996, "step": 13406 }, { "epoch": 0.6067888662593347, "grad_norm": 0.2926919659262302, "learning_rate": 3.536132370722761e-06, "loss": 0.5306, "step": 13407 }, { "epoch": 0.6068341253677303, "grad_norm": 0.2741018832720857, "learning_rate": 3.5354315807028826e-06, "loss": 0.4834, "step": 13408 }, { "epoch": 0.6068793844761258, "grad_norm": 0.6091839935589203, "learning_rate": 3.5347308221520814e-06, "loss": 0.2883, "step": 13409 }, { "epoch": 0.6069246435845214, "grad_norm": 0.6006493393550284, "learning_rate": 3.5340300950854135e-06, "loss": 0.2632, "step": 13410 }, { "epoch": 0.606969902692917, "grad_norm": 0.5984574567811332, "learning_rate": 3.5333293995179362e-06, "loss": 0.281, "step": 13411 }, { "epoch": 0.6070151618013125, "grad_norm": 0.5765612346379179, "learning_rate": 3.5326287354647077e-06, "loss": 0.3004, "step": 13412 }, { "epoch": 0.6070604209097081, "grad_norm": 0.5907166678593426, "learning_rate": 3.5319281029407793e-06, "loss": 0.3161, "step": 13413 }, { "epoch": 0.6071056800181036, "grad_norm": 0.6486670915591076, "learning_rate": 3.5312275019612065e-06, "loss": 0.3119, "step": 13414 }, { "epoch": 0.6071509391264992, "grad_norm": 0.2855136328850759, "learning_rate": 3.530526932541045e-06, "loss": 0.4739, "step": 13415 }, { "epoch": 0.6071961982348948, "grad_norm": 0.28702072041527865, "learning_rate": 3.529826394695347e-06, "loss": 0.4689, "step": 13416 }, { "epoch": 0.6072414573432904, "grad_norm": 1.0163123512406775, "learning_rate": 3.529125888439164e-06, "loss": 0.2641, "step": 13417 }, { "epoch": 0.6072867164516859, "grad_norm": 0.3417303228950468, "learning_rate": 3.5284254137875472e-06, "loss": 0.4725, "step": 13418 }, { "epoch": 0.6073319755600814, "grad_norm": 0.6336573660997611, "learning_rate": 3.5277249707555507e-06, "loss": 0.3404, "step": 13419 }, { "epoch": 0.607377234668477, "grad_norm": 0.27356633073337205, "learning_rate": 3.527024559358221e-06, "loss": 0.4588, "step": 13420 }, { "epoch": 0.6074224937768726, "grad_norm": 2.292413323718286, "learning_rate": 3.5263241796106097e-06, "loss": 0.3182, "step": 13421 }, { "epoch": 0.6074677528852681, "grad_norm": 0.6645893638135631, "learning_rate": 3.525623831527767e-06, "loss": 0.294, "step": 13422 }, { "epoch": 0.6075130119936637, "grad_norm": 0.6186388894118936, "learning_rate": 3.5249235151247398e-06, "loss": 0.3234, "step": 13423 }, { "epoch": 0.6075582711020593, "grad_norm": 0.633921444031, "learning_rate": 3.5242232304165736e-06, "loss": 0.3798, "step": 13424 }, { "epoch": 0.6076035302104549, "grad_norm": 0.6255478093227299, "learning_rate": 3.5235229774183217e-06, "loss": 0.3376, "step": 13425 }, { "epoch": 0.6076487893188505, "grad_norm": 0.6235753491898307, "learning_rate": 3.522822756145022e-06, "loss": 0.298, "step": 13426 }, { "epoch": 0.607694048427246, "grad_norm": 0.6644382198804875, "learning_rate": 3.5221225666117272e-06, "loss": 0.3151, "step": 13427 }, { "epoch": 0.6077393075356415, "grad_norm": 0.6755344353780085, "learning_rate": 3.52142240883348e-06, "loss": 0.2818, "step": 13428 }, { "epoch": 0.6077845666440371, "grad_norm": 0.6135967249537492, "learning_rate": 3.520722282825323e-06, "loss": 0.319, "step": 13429 }, { "epoch": 0.6078298257524327, "grad_norm": 0.6180751455692161, "learning_rate": 3.520022188602299e-06, "loss": 0.314, "step": 13430 }, { "epoch": 0.6078750848608282, "grad_norm": 0.6419498870555718, "learning_rate": 3.519322126179455e-06, "loss": 0.3285, "step": 13431 }, { "epoch": 0.6079203439692238, "grad_norm": 0.34575186377873107, "learning_rate": 3.518622095571831e-06, "loss": 0.4893, "step": 13432 }, { "epoch": 0.6079656030776194, "grad_norm": 0.6326462699830524, "learning_rate": 3.517922096794468e-06, "loss": 0.3333, "step": 13433 }, { "epoch": 0.608010862186015, "grad_norm": 0.6098311111428941, "learning_rate": 3.5172221298624067e-06, "loss": 0.3326, "step": 13434 }, { "epoch": 0.6080561212944104, "grad_norm": 0.2651308468352901, "learning_rate": 3.516522194790689e-06, "loss": 0.4515, "step": 13435 }, { "epoch": 0.608101380402806, "grad_norm": 0.27729441427110796, "learning_rate": 3.5158222915943524e-06, "loss": 0.456, "step": 13436 }, { "epoch": 0.6081466395112016, "grad_norm": 0.6091409290588269, "learning_rate": 3.5151224202884364e-06, "loss": 0.3203, "step": 13437 }, { "epoch": 0.6081918986195972, "grad_norm": 0.2652121404589406, "learning_rate": 3.5144225808879806e-06, "loss": 0.4623, "step": 13438 }, { "epoch": 0.6082371577279928, "grad_norm": 0.6086578023770722, "learning_rate": 3.513722773408018e-06, "loss": 0.2576, "step": 13439 }, { "epoch": 0.6082824168363883, "grad_norm": 0.7042375101393927, "learning_rate": 3.51302299786359e-06, "loss": 0.3113, "step": 13440 }, { "epoch": 0.6083276759447839, "grad_norm": 0.6347651648407661, "learning_rate": 3.512323254269732e-06, "loss": 0.3279, "step": 13441 }, { "epoch": 0.6083729350531795, "grad_norm": 0.6096456656656934, "learning_rate": 3.5116235426414767e-06, "loss": 0.2872, "step": 13442 }, { "epoch": 0.608418194161575, "grad_norm": 0.6177301460681318, "learning_rate": 3.51092386299386e-06, "loss": 0.2631, "step": 13443 }, { "epoch": 0.6084634532699705, "grad_norm": 0.6562325905779547, "learning_rate": 3.5102242153419164e-06, "loss": 0.3088, "step": 13444 }, { "epoch": 0.6085087123783661, "grad_norm": 0.3103744888178165, "learning_rate": 3.50952459970068e-06, "loss": 0.4631, "step": 13445 }, { "epoch": 0.6085539714867617, "grad_norm": 0.6626718082982688, "learning_rate": 3.5088250160851817e-06, "loss": 0.3588, "step": 13446 }, { "epoch": 0.6085992305951573, "grad_norm": 0.6568108014435411, "learning_rate": 3.5081254645104525e-06, "loss": 0.3407, "step": 13447 }, { "epoch": 0.6086444897035529, "grad_norm": 0.27975503482648345, "learning_rate": 3.507425944991529e-06, "loss": 0.4779, "step": 13448 }, { "epoch": 0.6086897488119484, "grad_norm": 0.6176299986228942, "learning_rate": 3.506726457543434e-06, "loss": 0.3334, "step": 13449 }, { "epoch": 0.608735007920344, "grad_norm": 0.701602134769069, "learning_rate": 3.5060270021812027e-06, "loss": 0.2849, "step": 13450 }, { "epoch": 0.6087802670287396, "grad_norm": 0.28129388479860296, "learning_rate": 3.5053275789198634e-06, "loss": 0.4584, "step": 13451 }, { "epoch": 0.6088255261371351, "grad_norm": 0.7571593717790948, "learning_rate": 3.5046281877744424e-06, "loss": 0.3203, "step": 13452 }, { "epoch": 0.6088707852455306, "grad_norm": 0.6840506216804946, "learning_rate": 3.503928828759969e-06, "loss": 0.2742, "step": 13453 }, { "epoch": 0.6089160443539262, "grad_norm": 0.6572465584720832, "learning_rate": 3.503229501891472e-06, "loss": 0.3317, "step": 13454 }, { "epoch": 0.6089613034623218, "grad_norm": 0.6389350839603637, "learning_rate": 3.5025302071839746e-06, "loss": 0.3169, "step": 13455 }, { "epoch": 0.6090065625707174, "grad_norm": 0.5981348804029151, "learning_rate": 3.501830944652504e-06, "loss": 0.3028, "step": 13456 }, { "epoch": 0.6090518216791129, "grad_norm": 0.26849945214336984, "learning_rate": 3.5011317143120845e-06, "loss": 0.4633, "step": 13457 }, { "epoch": 0.6090970807875085, "grad_norm": 0.7653660823165149, "learning_rate": 3.5004325161777437e-06, "loss": 0.3132, "step": 13458 }, { "epoch": 0.609142339895904, "grad_norm": 0.6098125186287409, "learning_rate": 3.4997333502644994e-06, "loss": 0.3269, "step": 13459 }, { "epoch": 0.6091875990042996, "grad_norm": 0.5955342692604938, "learning_rate": 3.499034216587379e-06, "loss": 0.3019, "step": 13460 }, { "epoch": 0.6092328581126952, "grad_norm": 0.28986142679869115, "learning_rate": 3.4983351151614043e-06, "loss": 0.4498, "step": 13461 }, { "epoch": 0.6092781172210907, "grad_norm": 0.6228379080550507, "learning_rate": 3.4976360460015953e-06, "loss": 0.304, "step": 13462 }, { "epoch": 0.6093233763294863, "grad_norm": 0.6021659146296512, "learning_rate": 3.496937009122972e-06, "loss": 0.3011, "step": 13463 }, { "epoch": 0.6093686354378819, "grad_norm": 1.0426986399623595, "learning_rate": 3.4962380045405585e-06, "loss": 0.3529, "step": 13464 }, { "epoch": 0.6094138945462775, "grad_norm": 0.7277310854494984, "learning_rate": 3.4955390322693704e-06, "loss": 0.3446, "step": 13465 }, { "epoch": 0.609459153654673, "grad_norm": 0.6511462469867378, "learning_rate": 3.4948400923244286e-06, "loss": 0.2836, "step": 13466 }, { "epoch": 0.6095044127630685, "grad_norm": 0.6410223484258175, "learning_rate": 3.4941411847207505e-06, "loss": 0.3008, "step": 13467 }, { "epoch": 0.6095496718714641, "grad_norm": 0.6357391756745651, "learning_rate": 3.4934423094733516e-06, "loss": 0.3549, "step": 13468 }, { "epoch": 0.6095949309798597, "grad_norm": 0.8127095590421107, "learning_rate": 3.492743466597252e-06, "loss": 0.3126, "step": 13469 }, { "epoch": 0.6096401900882552, "grad_norm": 0.6054784960538481, "learning_rate": 3.4920446561074673e-06, "loss": 0.3239, "step": 13470 }, { "epoch": 0.6096854491966508, "grad_norm": 0.6112478540856312, "learning_rate": 3.49134587801901e-06, "loss": 0.3085, "step": 13471 }, { "epoch": 0.6097307083050464, "grad_norm": 0.655204505443951, "learning_rate": 3.4906471323468955e-06, "loss": 0.2751, "step": 13472 }, { "epoch": 0.609775967413442, "grad_norm": 0.6170945340887124, "learning_rate": 3.4899484191061394e-06, "loss": 0.2883, "step": 13473 }, { "epoch": 0.6098212265218376, "grad_norm": 0.3074866732977803, "learning_rate": 3.4892497383117553e-06, "loss": 0.4773, "step": 13474 }, { "epoch": 0.609866485630233, "grad_norm": 0.6455642516141017, "learning_rate": 3.488551089978753e-06, "loss": 0.3611, "step": 13475 }, { "epoch": 0.6099117447386286, "grad_norm": 0.6488672160053081, "learning_rate": 3.487852474122145e-06, "loss": 0.3316, "step": 13476 }, { "epoch": 0.6099570038470242, "grad_norm": 0.5809931280865803, "learning_rate": 3.487153890756946e-06, "loss": 0.3207, "step": 13477 }, { "epoch": 0.6100022629554198, "grad_norm": 0.6430702923168584, "learning_rate": 3.4864553398981606e-06, "loss": 0.3279, "step": 13478 }, { "epoch": 0.6100475220638153, "grad_norm": 0.6790153937287134, "learning_rate": 3.4857568215608024e-06, "loss": 0.2798, "step": 13479 }, { "epoch": 0.6100927811722109, "grad_norm": 0.5692761648336583, "learning_rate": 3.4850583357598805e-06, "loss": 0.2876, "step": 13480 }, { "epoch": 0.6101380402806065, "grad_norm": 2.1181254113615373, "learning_rate": 3.4843598825104013e-06, "loss": 0.2788, "step": 13481 }, { "epoch": 0.6101832993890021, "grad_norm": 0.6191110811683334, "learning_rate": 3.483661461827372e-06, "loss": 0.3434, "step": 13482 }, { "epoch": 0.6102285584973977, "grad_norm": 0.6159076449318271, "learning_rate": 3.482963073725803e-06, "loss": 0.3268, "step": 13483 }, { "epoch": 0.6102738176057931, "grad_norm": 0.29597245858900834, "learning_rate": 3.482264718220697e-06, "loss": 0.457, "step": 13484 }, { "epoch": 0.6103190767141887, "grad_norm": 0.6516268351726187, "learning_rate": 3.481566395327062e-06, "loss": 0.2979, "step": 13485 }, { "epoch": 0.6103643358225843, "grad_norm": 0.5816022423269239, "learning_rate": 3.480868105059899e-06, "loss": 0.3135, "step": 13486 }, { "epoch": 0.6104095949309799, "grad_norm": 0.5999535442164554, "learning_rate": 3.4801698474342176e-06, "loss": 0.2927, "step": 13487 }, { "epoch": 0.6104548540393754, "grad_norm": 0.7022729924679137, "learning_rate": 3.479471622465017e-06, "loss": 0.316, "step": 13488 }, { "epoch": 0.610500113147771, "grad_norm": 0.6212432678891955, "learning_rate": 3.478773430167302e-06, "loss": 0.3041, "step": 13489 }, { "epoch": 0.6105453722561666, "grad_norm": 0.6285515117991046, "learning_rate": 3.478075270556075e-06, "loss": 0.2661, "step": 13490 }, { "epoch": 0.6105906313645622, "grad_norm": 0.26767567328925884, "learning_rate": 3.4773771436463346e-06, "loss": 0.4593, "step": 13491 }, { "epoch": 0.6106358904729576, "grad_norm": 0.6986766791328546, "learning_rate": 3.4766790494530824e-06, "loss": 0.3232, "step": 13492 }, { "epoch": 0.6106811495813532, "grad_norm": 0.6301950786058406, "learning_rate": 3.47598098799132e-06, "loss": 0.3202, "step": 13493 }, { "epoch": 0.6107264086897488, "grad_norm": 0.6723265814968592, "learning_rate": 3.475282959276045e-06, "loss": 0.3065, "step": 13494 }, { "epoch": 0.6107716677981444, "grad_norm": 0.7080890105682609, "learning_rate": 3.4745849633222566e-06, "loss": 0.3619, "step": 13495 }, { "epoch": 0.61081692690654, "grad_norm": 0.2851715591834395, "learning_rate": 3.4738870001449533e-06, "loss": 0.4856, "step": 13496 }, { "epoch": 0.6108621860149355, "grad_norm": 0.6202568147808254, "learning_rate": 3.4731890697591297e-06, "loss": 0.3401, "step": 13497 }, { "epoch": 0.6109074451233311, "grad_norm": 0.6091828279663052, "learning_rate": 3.472491172179784e-06, "loss": 0.2923, "step": 13498 }, { "epoch": 0.6109527042317267, "grad_norm": 0.6171615737326103, "learning_rate": 3.471793307421913e-06, "loss": 0.324, "step": 13499 }, { "epoch": 0.6109979633401222, "grad_norm": 0.2717845790814612, "learning_rate": 3.4710954755005087e-06, "loss": 0.4887, "step": 13500 }, { "epoch": 0.6110432224485177, "grad_norm": 0.2640603581855504, "learning_rate": 3.470397676430567e-06, "loss": 0.48, "step": 13501 }, { "epoch": 0.6110884815569133, "grad_norm": 0.6644426756295154, "learning_rate": 3.469699910227082e-06, "loss": 0.3468, "step": 13502 }, { "epoch": 0.6111337406653089, "grad_norm": 0.25364988424384577, "learning_rate": 3.4690021769050462e-06, "loss": 0.467, "step": 13503 }, { "epoch": 0.6111789997737045, "grad_norm": 0.6486677182552096, "learning_rate": 3.4683044764794516e-06, "loss": 0.2898, "step": 13504 }, { "epoch": 0.6112242588821, "grad_norm": 0.26813038137037226, "learning_rate": 3.4676068089652883e-06, "loss": 0.4651, "step": 13505 }, { "epoch": 0.6112695179904956, "grad_norm": 0.671097206847934, "learning_rate": 3.466909174377551e-06, "loss": 0.3387, "step": 13506 }, { "epoch": 0.6113147770988911, "grad_norm": 0.6126280949793477, "learning_rate": 3.466211572731224e-06, "loss": 0.2821, "step": 13507 }, { "epoch": 0.6113600362072867, "grad_norm": 0.5705294614602041, "learning_rate": 3.465514004041301e-06, "loss": 0.3244, "step": 13508 }, { "epoch": 0.6114052953156823, "grad_norm": 0.5805375561356275, "learning_rate": 3.4648164683227702e-06, "loss": 0.3043, "step": 13509 }, { "epoch": 0.6114505544240778, "grad_norm": 0.6295482157076951, "learning_rate": 3.464118965590617e-06, "loss": 0.2985, "step": 13510 }, { "epoch": 0.6114958135324734, "grad_norm": 0.3022925327555956, "learning_rate": 3.46342149585983e-06, "loss": 0.4726, "step": 13511 }, { "epoch": 0.611541072640869, "grad_norm": 0.27754398433310995, "learning_rate": 3.462724059145397e-06, "loss": 0.4703, "step": 13512 }, { "epoch": 0.6115863317492646, "grad_norm": 0.6260772278890104, "learning_rate": 3.4620266554623016e-06, "loss": 0.3484, "step": 13513 }, { "epoch": 0.6116315908576601, "grad_norm": 0.32133429856181145, "learning_rate": 3.4613292848255307e-06, "loss": 0.4927, "step": 13514 }, { "epoch": 0.6116768499660556, "grad_norm": 0.2931450993845942, "learning_rate": 3.460631947250066e-06, "loss": 0.4865, "step": 13515 }, { "epoch": 0.6117221090744512, "grad_norm": 0.6189908854062645, "learning_rate": 3.459934642750895e-06, "loss": 0.326, "step": 13516 }, { "epoch": 0.6117673681828468, "grad_norm": 0.6752200443544591, "learning_rate": 3.4592373713429984e-06, "loss": 0.2923, "step": 13517 }, { "epoch": 0.6118126272912424, "grad_norm": 0.5882537092542902, "learning_rate": 3.4585401330413574e-06, "loss": 0.3386, "step": 13518 }, { "epoch": 0.6118578863996379, "grad_norm": 0.936107982255461, "learning_rate": 3.4578429278609566e-06, "loss": 0.299, "step": 13519 }, { "epoch": 0.6119031455080335, "grad_norm": 0.3321527606429436, "learning_rate": 3.4571457558167727e-06, "loss": 0.4628, "step": 13520 }, { "epoch": 0.6119484046164291, "grad_norm": 0.7063148875935, "learning_rate": 3.4564486169237888e-06, "loss": 0.3115, "step": 13521 }, { "epoch": 0.6119936637248247, "grad_norm": 0.6443386328125476, "learning_rate": 3.4557515111969843e-06, "loss": 0.3187, "step": 13522 }, { "epoch": 0.6120389228332201, "grad_norm": 0.6155674561221551, "learning_rate": 3.4550544386513364e-06, "loss": 0.3011, "step": 13523 }, { "epoch": 0.6120841819416157, "grad_norm": 0.6732227821436269, "learning_rate": 3.4543573993018225e-06, "loss": 0.3257, "step": 13524 }, { "epoch": 0.6121294410500113, "grad_norm": 0.2799539037248683, "learning_rate": 3.453660393163424e-06, "loss": 0.4452, "step": 13525 }, { "epoch": 0.6121747001584069, "grad_norm": 0.5963331242645898, "learning_rate": 3.452963420251112e-06, "loss": 0.3111, "step": 13526 }, { "epoch": 0.6122199592668024, "grad_norm": 0.5948844068033212, "learning_rate": 3.4522664805798643e-06, "loss": 0.3555, "step": 13527 }, { "epoch": 0.612265218375198, "grad_norm": 0.5394219952798536, "learning_rate": 3.451569574164658e-06, "loss": 0.3022, "step": 13528 }, { "epoch": 0.6123104774835936, "grad_norm": 0.631473684109706, "learning_rate": 3.4508727010204663e-06, "loss": 0.2994, "step": 13529 }, { "epoch": 0.6123557365919892, "grad_norm": 0.6854496656754867, "learning_rate": 3.4501758611622606e-06, "loss": 0.3709, "step": 13530 }, { "epoch": 0.6124009957003848, "grad_norm": 0.6196837073329371, "learning_rate": 3.449479054605016e-06, "loss": 0.3225, "step": 13531 }, { "epoch": 0.6124462548087802, "grad_norm": 0.2770645560779661, "learning_rate": 3.448782281363706e-06, "loss": 0.4733, "step": 13532 }, { "epoch": 0.6124915139171758, "grad_norm": 0.5787548892775997, "learning_rate": 3.4480855414533e-06, "loss": 0.2879, "step": 13533 }, { "epoch": 0.6125367730255714, "grad_norm": 0.5992493454284118, "learning_rate": 3.4473888348887673e-06, "loss": 0.292, "step": 13534 }, { "epoch": 0.612582032133967, "grad_norm": 0.6407299956446944, "learning_rate": 3.4466921616850847e-06, "loss": 0.3218, "step": 13535 }, { "epoch": 0.6126272912423625, "grad_norm": 0.6333286620983509, "learning_rate": 3.445995521857213e-06, "loss": 0.3027, "step": 13536 }, { "epoch": 0.6126725503507581, "grad_norm": 0.662291156417707, "learning_rate": 3.4452989154201256e-06, "loss": 0.3287, "step": 13537 }, { "epoch": 0.6127178094591537, "grad_norm": 0.7020675293337494, "learning_rate": 3.4446023423887905e-06, "loss": 0.3112, "step": 13538 }, { "epoch": 0.6127630685675493, "grad_norm": 0.8046855346438109, "learning_rate": 3.443905802778173e-06, "loss": 0.3326, "step": 13539 }, { "epoch": 0.6128083276759447, "grad_norm": 0.6657469550052136, "learning_rate": 3.4432092966032397e-06, "loss": 0.3219, "step": 13540 }, { "epoch": 0.6128535867843403, "grad_norm": 0.3111574269257986, "learning_rate": 3.4425128238789594e-06, "loss": 0.4805, "step": 13541 }, { "epoch": 0.6128988458927359, "grad_norm": 0.7803599969425479, "learning_rate": 3.4418163846202945e-06, "loss": 0.3149, "step": 13542 }, { "epoch": 0.6129441050011315, "grad_norm": 0.6984892738731218, "learning_rate": 3.4411199788422093e-06, "loss": 0.2993, "step": 13543 }, { "epoch": 0.6129893641095271, "grad_norm": 0.3298082981291895, "learning_rate": 3.4404236065596673e-06, "loss": 0.4713, "step": 13544 }, { "epoch": 0.6130346232179226, "grad_norm": 0.641591555289859, "learning_rate": 3.439727267787634e-06, "loss": 0.3007, "step": 13545 }, { "epoch": 0.6130798823263182, "grad_norm": 0.30014997903508966, "learning_rate": 3.439030962541069e-06, "loss": 0.4702, "step": 13546 }, { "epoch": 0.6131251414347137, "grad_norm": 0.7192659293934158, "learning_rate": 3.438334690834934e-06, "loss": 0.2835, "step": 13547 }, { "epoch": 0.6131704005431093, "grad_norm": 0.27457620479106704, "learning_rate": 3.4376384526841918e-06, "loss": 0.4918, "step": 13548 }, { "epoch": 0.6132156596515048, "grad_norm": 0.6625082498934292, "learning_rate": 3.4369422481037984e-06, "loss": 0.3029, "step": 13549 }, { "epoch": 0.6132609187599004, "grad_norm": 0.6179382329542554, "learning_rate": 3.4362460771087162e-06, "loss": 0.3097, "step": 13550 }, { "epoch": 0.613306177868296, "grad_norm": 0.690846385935376, "learning_rate": 3.4355499397139047e-06, "loss": 0.3403, "step": 13551 }, { "epoch": 0.6133514369766916, "grad_norm": 0.6691610028396701, "learning_rate": 3.4348538359343187e-06, "loss": 0.308, "step": 13552 }, { "epoch": 0.6133966960850872, "grad_norm": 0.6484163841141876, "learning_rate": 3.4341577657849163e-06, "loss": 0.2863, "step": 13553 }, { "epoch": 0.6134419551934827, "grad_norm": 0.6710404290800567, "learning_rate": 3.433461729280657e-06, "loss": 0.3299, "step": 13554 }, { "epoch": 0.6134872143018782, "grad_norm": 0.662797078639147, "learning_rate": 3.4327657264364913e-06, "loss": 0.3115, "step": 13555 }, { "epoch": 0.6135324734102738, "grad_norm": 0.331292948132087, "learning_rate": 3.4320697572673774e-06, "loss": 0.4806, "step": 13556 }, { "epoch": 0.6135777325186694, "grad_norm": 0.6338532740399662, "learning_rate": 3.4313738217882676e-06, "loss": 0.3057, "step": 13557 }, { "epoch": 0.6136229916270649, "grad_norm": 0.28146032818850647, "learning_rate": 3.4306779200141204e-06, "loss": 0.4512, "step": 13558 }, { "epoch": 0.6136682507354605, "grad_norm": 0.28191321203490505, "learning_rate": 3.4299820519598814e-06, "loss": 0.4511, "step": 13559 }, { "epoch": 0.6137135098438561, "grad_norm": 0.7828211842898387, "learning_rate": 3.4292862176405075e-06, "loss": 0.3293, "step": 13560 }, { "epoch": 0.6137587689522517, "grad_norm": 0.6837348984558683, "learning_rate": 3.4285904170709495e-06, "loss": 0.3475, "step": 13561 }, { "epoch": 0.6138040280606472, "grad_norm": 0.6027187377328831, "learning_rate": 3.427894650266156e-06, "loss": 0.3363, "step": 13562 }, { "epoch": 0.6138492871690427, "grad_norm": 0.6826603664435904, "learning_rate": 3.4271989172410768e-06, "loss": 0.3355, "step": 13563 }, { "epoch": 0.6138945462774383, "grad_norm": 0.6563556306672855, "learning_rate": 3.4265032180106656e-06, "loss": 0.3017, "step": 13564 }, { "epoch": 0.6139398053858339, "grad_norm": 0.6249553420140989, "learning_rate": 3.425807552589866e-06, "loss": 0.3064, "step": 13565 }, { "epoch": 0.6139850644942295, "grad_norm": 0.6720765103225914, "learning_rate": 3.425111920993627e-06, "loss": 0.3024, "step": 13566 }, { "epoch": 0.614030323602625, "grad_norm": 0.6304178675344458, "learning_rate": 3.424416323236897e-06, "loss": 0.3224, "step": 13567 }, { "epoch": 0.6140755827110206, "grad_norm": 0.6751915719397409, "learning_rate": 3.4237207593346207e-06, "loss": 0.3275, "step": 13568 }, { "epoch": 0.6141208418194162, "grad_norm": 0.6877550526977185, "learning_rate": 3.423025229301743e-06, "loss": 0.3224, "step": 13569 }, { "epoch": 0.6141661009278118, "grad_norm": 0.65181587448465, "learning_rate": 3.42232973315321e-06, "loss": 0.3017, "step": 13570 }, { "epoch": 0.6142113600362072, "grad_norm": 0.39223110812417794, "learning_rate": 3.4216342709039675e-06, "loss": 0.4635, "step": 13571 }, { "epoch": 0.6142566191446028, "grad_norm": 0.33188089678483657, "learning_rate": 3.4209388425689556e-06, "loss": 0.4698, "step": 13572 }, { "epoch": 0.6143018782529984, "grad_norm": 0.6084418154167035, "learning_rate": 3.420243448163117e-06, "loss": 0.2885, "step": 13573 }, { "epoch": 0.614347137361394, "grad_norm": 0.5984272589446169, "learning_rate": 3.4195480877013976e-06, "loss": 0.329, "step": 13574 }, { "epoch": 0.6143923964697895, "grad_norm": 0.5804821195101811, "learning_rate": 3.4188527611987343e-06, "loss": 0.3345, "step": 13575 }, { "epoch": 0.6144376555781851, "grad_norm": 0.31348491520577604, "learning_rate": 3.4181574686700687e-06, "loss": 0.4794, "step": 13576 }, { "epoch": 0.6144829146865807, "grad_norm": 0.6456037759163459, "learning_rate": 3.417462210130342e-06, "loss": 0.3458, "step": 13577 }, { "epoch": 0.6145281737949763, "grad_norm": 0.6486113186177295, "learning_rate": 3.4167669855944905e-06, "loss": 0.3037, "step": 13578 }, { "epoch": 0.6145734329033719, "grad_norm": 0.6203561762672279, "learning_rate": 3.416071795077455e-06, "loss": 0.3292, "step": 13579 }, { "epoch": 0.6146186920117673, "grad_norm": 0.6063378791570977, "learning_rate": 3.415376638594172e-06, "loss": 0.3388, "step": 13580 }, { "epoch": 0.6146639511201629, "grad_norm": 0.6807597534346058, "learning_rate": 3.414681516159578e-06, "loss": 0.2982, "step": 13581 }, { "epoch": 0.6147092102285585, "grad_norm": 1.0380220289205693, "learning_rate": 3.4139864277886083e-06, "loss": 0.3341, "step": 13582 }, { "epoch": 0.6147544693369541, "grad_norm": 0.3318394658340282, "learning_rate": 3.413291373496202e-06, "loss": 0.4856, "step": 13583 }, { "epoch": 0.6147997284453496, "grad_norm": 0.6834381237273527, "learning_rate": 3.4125963532972878e-06, "loss": 0.3554, "step": 13584 }, { "epoch": 0.6148449875537452, "grad_norm": 0.661222968496695, "learning_rate": 3.4119013672068034e-06, "loss": 0.3444, "step": 13585 }, { "epoch": 0.6148902466621408, "grad_norm": 0.6150340736437391, "learning_rate": 3.411206415239681e-06, "loss": 0.2932, "step": 13586 }, { "epoch": 0.6149355057705364, "grad_norm": 0.5982665467514687, "learning_rate": 3.4105114974108553e-06, "loss": 0.3131, "step": 13587 }, { "epoch": 0.6149807648789319, "grad_norm": 0.6062719537081716, "learning_rate": 3.4098166137352534e-06, "loss": 0.3097, "step": 13588 }, { "epoch": 0.6150260239873274, "grad_norm": 0.5801537581376082, "learning_rate": 3.409121764227809e-06, "loss": 0.3013, "step": 13589 }, { "epoch": 0.615071283095723, "grad_norm": 0.6342492363047709, "learning_rate": 3.408426948903453e-06, "loss": 0.3208, "step": 13590 }, { "epoch": 0.6151165422041186, "grad_norm": 0.618670713571157, "learning_rate": 3.4077321677771137e-06, "loss": 0.2913, "step": 13591 }, { "epoch": 0.6151618013125142, "grad_norm": 0.6074155228706901, "learning_rate": 3.4070374208637173e-06, "loss": 0.3249, "step": 13592 }, { "epoch": 0.6152070604209097, "grad_norm": 0.6094925796063878, "learning_rate": 3.4063427081781973e-06, "loss": 0.2985, "step": 13593 }, { "epoch": 0.6152523195293053, "grad_norm": 0.6737245100747968, "learning_rate": 3.4056480297354767e-06, "loss": 0.336, "step": 13594 }, { "epoch": 0.6152975786377008, "grad_norm": 0.3467582044192305, "learning_rate": 3.4049533855504835e-06, "loss": 0.486, "step": 13595 }, { "epoch": 0.6153428377460964, "grad_norm": 0.6309695090828856, "learning_rate": 3.404258775638144e-06, "loss": 0.321, "step": 13596 }, { "epoch": 0.6153880968544919, "grad_norm": 0.5930313711457792, "learning_rate": 3.4035642000133806e-06, "loss": 0.2694, "step": 13597 }, { "epoch": 0.6154333559628875, "grad_norm": 0.5859657562645595, "learning_rate": 3.4028696586911203e-06, "loss": 0.3151, "step": 13598 }, { "epoch": 0.6154786150712831, "grad_norm": 0.6732209596085635, "learning_rate": 3.4021751516862856e-06, "loss": 0.2716, "step": 13599 }, { "epoch": 0.6155238741796787, "grad_norm": 0.3069295366174163, "learning_rate": 3.401480679013801e-06, "loss": 0.4739, "step": 13600 }, { "epoch": 0.6155691332880743, "grad_norm": 0.7672622217379496, "learning_rate": 3.4007862406885863e-06, "loss": 0.2899, "step": 13601 }, { "epoch": 0.6156143923964698, "grad_norm": 0.6369540019262062, "learning_rate": 3.400091836725562e-06, "loss": 0.3028, "step": 13602 }, { "epoch": 0.6156596515048653, "grad_norm": 0.5712373454878116, "learning_rate": 3.3993974671396523e-06, "loss": 0.2963, "step": 13603 }, { "epoch": 0.6157049106132609, "grad_norm": 0.30381427668797734, "learning_rate": 3.3987031319457747e-06, "loss": 0.4829, "step": 13604 }, { "epoch": 0.6157501697216565, "grad_norm": 0.8279645261570301, "learning_rate": 3.398008831158849e-06, "loss": 0.3064, "step": 13605 }, { "epoch": 0.615795428830052, "grad_norm": 0.6295116827920167, "learning_rate": 3.3973145647937935e-06, "loss": 0.3673, "step": 13606 }, { "epoch": 0.6158406879384476, "grad_norm": 0.6245798726321056, "learning_rate": 3.3966203328655244e-06, "loss": 0.2947, "step": 13607 }, { "epoch": 0.6158859470468432, "grad_norm": 0.5876837841079967, "learning_rate": 3.3959261353889605e-06, "loss": 0.3124, "step": 13608 }, { "epoch": 0.6159312061552388, "grad_norm": 0.6718860068837423, "learning_rate": 3.395231972379019e-06, "loss": 0.3449, "step": 13609 }, { "epoch": 0.6159764652636343, "grad_norm": 0.327732997048033, "learning_rate": 3.3945378438506125e-06, "loss": 0.4554, "step": 13610 }, { "epoch": 0.6160217243720298, "grad_norm": 0.6726952775245555, "learning_rate": 3.393843749818656e-06, "loss": 0.2966, "step": 13611 }, { "epoch": 0.6160669834804254, "grad_norm": 0.6939323728620533, "learning_rate": 3.393149690298067e-06, "loss": 0.2989, "step": 13612 }, { "epoch": 0.616112242588821, "grad_norm": 0.7009716702689694, "learning_rate": 3.3924556653037533e-06, "loss": 0.3446, "step": 13613 }, { "epoch": 0.6161575016972166, "grad_norm": 0.6311392257799732, "learning_rate": 3.391761674850631e-06, "loss": 0.3226, "step": 13614 }, { "epoch": 0.6162027608056121, "grad_norm": 0.6330456115405911, "learning_rate": 3.39106771895361e-06, "loss": 0.2801, "step": 13615 }, { "epoch": 0.6162480199140077, "grad_norm": 0.5927001608548688, "learning_rate": 3.3903737976276064e-06, "loss": 0.3049, "step": 13616 }, { "epoch": 0.6162932790224033, "grad_norm": 0.6139805996534979, "learning_rate": 3.389679910887522e-06, "loss": 0.3208, "step": 13617 }, { "epoch": 0.6163385381307989, "grad_norm": 0.7454624269731916, "learning_rate": 3.3889860587482716e-06, "loss": 0.2736, "step": 13618 }, { "epoch": 0.6163837972391943, "grad_norm": 0.5839197225597309, "learning_rate": 3.3882922412247644e-06, "loss": 0.3121, "step": 13619 }, { "epoch": 0.6164290563475899, "grad_norm": 0.32865271198484763, "learning_rate": 3.387598458331906e-06, "loss": 0.4687, "step": 13620 }, { "epoch": 0.6164743154559855, "grad_norm": 0.6346643957182696, "learning_rate": 3.386904710084603e-06, "loss": 0.3113, "step": 13621 }, { "epoch": 0.6165195745643811, "grad_norm": 0.5804358646950067, "learning_rate": 3.3862109964977665e-06, "loss": 0.2919, "step": 13622 }, { "epoch": 0.6165648336727766, "grad_norm": 0.6657660824615055, "learning_rate": 3.3855173175862976e-06, "loss": 0.3376, "step": 13623 }, { "epoch": 0.6166100927811722, "grad_norm": 0.6726518102362474, "learning_rate": 3.3848236733651034e-06, "loss": 0.3341, "step": 13624 }, { "epoch": 0.6166553518895678, "grad_norm": 0.6619560109542502, "learning_rate": 3.3841300638490885e-06, "loss": 0.2905, "step": 13625 }, { "epoch": 0.6167006109979634, "grad_norm": 0.4164067724363868, "learning_rate": 3.383436489053154e-06, "loss": 0.4705, "step": 13626 }, { "epoch": 0.616745870106359, "grad_norm": 0.630389271345759, "learning_rate": 3.3827429489922053e-06, "loss": 0.3422, "step": 13627 }, { "epoch": 0.6167911292147544, "grad_norm": 0.59583288899598, "learning_rate": 3.3820494436811435e-06, "loss": 0.2897, "step": 13628 }, { "epoch": 0.61683638832315, "grad_norm": 0.5847224930970383, "learning_rate": 3.3813559731348716e-06, "loss": 0.2902, "step": 13629 }, { "epoch": 0.6168816474315456, "grad_norm": 0.6896455432590854, "learning_rate": 3.380662537368286e-06, "loss": 0.3259, "step": 13630 }, { "epoch": 0.6169269065399412, "grad_norm": 0.5948295445386829, "learning_rate": 3.3799691363962904e-06, "loss": 0.2756, "step": 13631 }, { "epoch": 0.6169721656483367, "grad_norm": 0.2790831913582214, "learning_rate": 3.379275770233783e-06, "loss": 0.4591, "step": 13632 }, { "epoch": 0.6170174247567323, "grad_norm": 0.698686932276024, "learning_rate": 3.3785824388956613e-06, "loss": 0.32, "step": 13633 }, { "epoch": 0.6170626838651279, "grad_norm": 0.7019383457076268, "learning_rate": 3.377889142396822e-06, "loss": 0.3113, "step": 13634 }, { "epoch": 0.6171079429735234, "grad_norm": 0.2976661593405612, "learning_rate": 3.3771958807521656e-06, "loss": 0.4626, "step": 13635 }, { "epoch": 0.617153202081919, "grad_norm": 0.3217511310855456, "learning_rate": 3.3765026539765832e-06, "loss": 0.464, "step": 13636 }, { "epoch": 0.6171984611903145, "grad_norm": 0.623664016439237, "learning_rate": 3.3758094620849737e-06, "loss": 0.3252, "step": 13637 }, { "epoch": 0.6172437202987101, "grad_norm": 0.5967522282609519, "learning_rate": 3.3751163050922307e-06, "loss": 0.3349, "step": 13638 }, { "epoch": 0.6172889794071057, "grad_norm": 0.2962581492991003, "learning_rate": 3.3744231830132473e-06, "loss": 0.4745, "step": 13639 }, { "epoch": 0.6173342385155013, "grad_norm": 0.6495079733390845, "learning_rate": 3.373730095862916e-06, "loss": 0.3707, "step": 13640 }, { "epoch": 0.6173794976238968, "grad_norm": 0.6280898772506784, "learning_rate": 3.3730370436561316e-06, "loss": 0.295, "step": 13641 }, { "epoch": 0.6174247567322924, "grad_norm": 0.604642646016628, "learning_rate": 3.372344026407785e-06, "loss": 0.3142, "step": 13642 }, { "epoch": 0.617470015840688, "grad_norm": 0.6327627082802671, "learning_rate": 3.3716510441327653e-06, "loss": 0.3517, "step": 13643 }, { "epoch": 0.6175152749490835, "grad_norm": 0.6490284830665943, "learning_rate": 3.3709580968459628e-06, "loss": 0.2857, "step": 13644 }, { "epoch": 0.617560534057479, "grad_norm": 0.6287983386684702, "learning_rate": 3.3702651845622703e-06, "loss": 0.3027, "step": 13645 }, { "epoch": 0.6176057931658746, "grad_norm": 0.6207686356181382, "learning_rate": 3.3695723072965707e-06, "loss": 0.3167, "step": 13646 }, { "epoch": 0.6176510522742702, "grad_norm": 0.5933952431953913, "learning_rate": 3.3688794650637557e-06, "loss": 0.2965, "step": 13647 }, { "epoch": 0.6176963113826658, "grad_norm": 0.6157345800940398, "learning_rate": 3.3681866578787124e-06, "loss": 0.3241, "step": 13648 }, { "epoch": 0.6177415704910614, "grad_norm": 0.5934092362653312, "learning_rate": 3.3674938857563256e-06, "loss": 0.3288, "step": 13649 }, { "epoch": 0.6177868295994569, "grad_norm": 0.6173483257897173, "learning_rate": 3.3668011487114798e-06, "loss": 0.2881, "step": 13650 }, { "epoch": 0.6178320887078524, "grad_norm": 0.5965277040554047, "learning_rate": 3.3661084467590637e-06, "loss": 0.2957, "step": 13651 }, { "epoch": 0.617877347816248, "grad_norm": 0.6436909221396913, "learning_rate": 3.3654157799139576e-06, "loss": 0.3309, "step": 13652 }, { "epoch": 0.6179226069246436, "grad_norm": 0.6945090155316245, "learning_rate": 3.3647231481910464e-06, "loss": 0.2989, "step": 13653 }, { "epoch": 0.6179678660330391, "grad_norm": 0.8550100311569186, "learning_rate": 3.364030551605213e-06, "loss": 0.3137, "step": 13654 }, { "epoch": 0.6180131251414347, "grad_norm": 0.6315291386273997, "learning_rate": 3.363337990171337e-06, "loss": 0.3127, "step": 13655 }, { "epoch": 0.6180583842498303, "grad_norm": 0.6369925272081775, "learning_rate": 3.3626454639043018e-06, "loss": 0.3174, "step": 13656 }, { "epoch": 0.6181036433582259, "grad_norm": 0.6373841023921314, "learning_rate": 3.361952972818987e-06, "loss": 0.321, "step": 13657 }, { "epoch": 0.6181489024666214, "grad_norm": 0.6862637267107752, "learning_rate": 3.3612605169302724e-06, "loss": 0.3059, "step": 13658 }, { "epoch": 0.6181941615750169, "grad_norm": 0.3469364433723692, "learning_rate": 3.360568096253035e-06, "loss": 0.4704, "step": 13659 }, { "epoch": 0.6182394206834125, "grad_norm": 0.33049897548744755, "learning_rate": 3.3598757108021546e-06, "loss": 0.4795, "step": 13660 }, { "epoch": 0.6182846797918081, "grad_norm": 0.3221609743944461, "learning_rate": 3.359183360592509e-06, "loss": 0.4743, "step": 13661 }, { "epoch": 0.6183299389002037, "grad_norm": 0.6382213127197119, "learning_rate": 3.3584910456389726e-06, "loss": 0.2902, "step": 13662 }, { "epoch": 0.6183751980085992, "grad_norm": 0.5931313383666015, "learning_rate": 3.357798765956421e-06, "loss": 0.2928, "step": 13663 }, { "epoch": 0.6184204571169948, "grad_norm": 0.6169420726404996, "learning_rate": 3.357106521559733e-06, "loss": 0.2874, "step": 13664 }, { "epoch": 0.6184657162253904, "grad_norm": 0.6228570680089183, "learning_rate": 3.356414312463778e-06, "loss": 0.2929, "step": 13665 }, { "epoch": 0.618510975333786, "grad_norm": 0.4429317158088941, "learning_rate": 3.3557221386834323e-06, "loss": 0.4908, "step": 13666 }, { "epoch": 0.6185562344421814, "grad_norm": 0.339620418569209, "learning_rate": 3.3550300002335685e-06, "loss": 0.4649, "step": 13667 }, { "epoch": 0.618601493550577, "grad_norm": 0.6310993574186863, "learning_rate": 3.354337897129057e-06, "loss": 0.3275, "step": 13668 }, { "epoch": 0.6186467526589726, "grad_norm": 0.6359004375019375, "learning_rate": 3.3536458293847686e-06, "loss": 0.2751, "step": 13669 }, { "epoch": 0.6186920117673682, "grad_norm": 0.6743555355132227, "learning_rate": 3.3529537970155756e-06, "loss": 0.3195, "step": 13670 }, { "epoch": 0.6187372708757638, "grad_norm": 0.31263598762639505, "learning_rate": 3.3522618000363487e-06, "loss": 0.5041, "step": 13671 }, { "epoch": 0.6187825299841593, "grad_norm": 0.6122582855146629, "learning_rate": 3.3515698384619543e-06, "loss": 0.3067, "step": 13672 }, { "epoch": 0.6188277890925549, "grad_norm": 0.6522907156600588, "learning_rate": 3.35087791230726e-06, "loss": 0.3204, "step": 13673 }, { "epoch": 0.6188730482009505, "grad_norm": 0.2912571970881983, "learning_rate": 3.3501860215871363e-06, "loss": 0.4722, "step": 13674 }, { "epoch": 0.618918307309346, "grad_norm": 0.6105368967235455, "learning_rate": 3.3494941663164465e-06, "loss": 0.2829, "step": 13675 }, { "epoch": 0.6189635664177415, "grad_norm": 0.2997500457463555, "learning_rate": 3.348802346510058e-06, "loss": 0.4621, "step": 13676 }, { "epoch": 0.6190088255261371, "grad_norm": 0.6123374708352166, "learning_rate": 3.348110562182838e-06, "loss": 0.2748, "step": 13677 }, { "epoch": 0.6190540846345327, "grad_norm": 0.6084595710799209, "learning_rate": 3.3474188133496466e-06, "loss": 0.3066, "step": 13678 }, { "epoch": 0.6190993437429283, "grad_norm": 0.5729332026915518, "learning_rate": 3.346727100025349e-06, "loss": 0.2863, "step": 13679 }, { "epoch": 0.6191446028513238, "grad_norm": 0.6368878796261367, "learning_rate": 3.34603542222481e-06, "loss": 0.3138, "step": 13680 }, { "epoch": 0.6191898619597194, "grad_norm": 0.5823494292580653, "learning_rate": 3.3453437799628885e-06, "loss": 0.3202, "step": 13681 }, { "epoch": 0.619235121068115, "grad_norm": 0.6531662241634709, "learning_rate": 3.344652173254448e-06, "loss": 0.3633, "step": 13682 }, { "epoch": 0.6192803801765105, "grad_norm": 0.30126341048357147, "learning_rate": 3.343960602114349e-06, "loss": 0.4781, "step": 13683 }, { "epoch": 0.6193256392849061, "grad_norm": 0.6433215004630558, "learning_rate": 3.3432690665574485e-06, "loss": 0.2774, "step": 13684 }, { "epoch": 0.6193708983933016, "grad_norm": 0.29760997546260387, "learning_rate": 3.3425775665986093e-06, "loss": 0.4561, "step": 13685 }, { "epoch": 0.6194161575016972, "grad_norm": 0.6400974170920216, "learning_rate": 3.341886102252687e-06, "loss": 0.2772, "step": 13686 }, { "epoch": 0.6194614166100928, "grad_norm": 0.6834423109711578, "learning_rate": 3.3411946735345412e-06, "loss": 0.3839, "step": 13687 }, { "epoch": 0.6195066757184884, "grad_norm": 0.6234401108842078, "learning_rate": 3.340503280459024e-06, "loss": 0.2889, "step": 13688 }, { "epoch": 0.6195519348268839, "grad_norm": 0.2790971693531577, "learning_rate": 3.3398119230409976e-06, "loss": 0.4897, "step": 13689 }, { "epoch": 0.6195971939352795, "grad_norm": 0.6686486132169249, "learning_rate": 3.339120601295314e-06, "loss": 0.3546, "step": 13690 }, { "epoch": 0.619642453043675, "grad_norm": 0.6426563328291927, "learning_rate": 3.3384293152368264e-06, "loss": 0.322, "step": 13691 }, { "epoch": 0.6196877121520706, "grad_norm": 0.6026238741279107, "learning_rate": 3.3377380648803894e-06, "loss": 0.2905, "step": 13692 }, { "epoch": 0.6197329712604661, "grad_norm": 0.6161566790579164, "learning_rate": 3.3370468502408584e-06, "loss": 0.3009, "step": 13693 }, { "epoch": 0.6197782303688617, "grad_norm": 0.6770974820021, "learning_rate": 3.3363556713330806e-06, "loss": 0.3089, "step": 13694 }, { "epoch": 0.6198234894772573, "grad_norm": 0.5855209014504454, "learning_rate": 3.3356645281719114e-06, "loss": 0.3018, "step": 13695 }, { "epoch": 0.6198687485856529, "grad_norm": 0.594537096198144, "learning_rate": 3.3349734207722e-06, "loss": 0.4007, "step": 13696 }, { "epoch": 0.6199140076940485, "grad_norm": 0.575094066503019, "learning_rate": 3.334282349148795e-06, "loss": 0.2955, "step": 13697 }, { "epoch": 0.619959266802444, "grad_norm": 0.6381673155404338, "learning_rate": 3.3335913133165467e-06, "loss": 0.3129, "step": 13698 }, { "epoch": 0.6200045259108395, "grad_norm": 0.310952462490026, "learning_rate": 3.332900313290303e-06, "loss": 0.4803, "step": 13699 }, { "epoch": 0.6200497850192351, "grad_norm": 0.6186094329233779, "learning_rate": 3.332209349084913e-06, "loss": 0.28, "step": 13700 }, { "epoch": 0.6200950441276307, "grad_norm": 0.6440602505732157, "learning_rate": 3.3315184207152208e-06, "loss": 0.3011, "step": 13701 }, { "epoch": 0.6201403032360262, "grad_norm": 0.29370766930924735, "learning_rate": 3.330827528196072e-06, "loss": 0.4786, "step": 13702 }, { "epoch": 0.6201855623444218, "grad_norm": 0.5860643236849766, "learning_rate": 3.330136671542315e-06, "loss": 0.3187, "step": 13703 }, { "epoch": 0.6202308214528174, "grad_norm": 0.6053177624397245, "learning_rate": 3.329445850768792e-06, "loss": 0.3108, "step": 13704 }, { "epoch": 0.620276080561213, "grad_norm": 0.6287777257029172, "learning_rate": 3.3287550658903466e-06, "loss": 0.3194, "step": 13705 }, { "epoch": 0.6203213396696086, "grad_norm": 0.6483596673339858, "learning_rate": 3.328064316921823e-06, "loss": 0.2893, "step": 13706 }, { "epoch": 0.620366598778004, "grad_norm": 0.640477653434398, "learning_rate": 3.3273736038780604e-06, "loss": 0.3129, "step": 13707 }, { "epoch": 0.6204118578863996, "grad_norm": 0.6681407434538018, "learning_rate": 3.3266829267739026e-06, "loss": 0.3462, "step": 13708 }, { "epoch": 0.6204571169947952, "grad_norm": 0.29857528906145336, "learning_rate": 3.325992285624191e-06, "loss": 0.4843, "step": 13709 }, { "epoch": 0.6205023761031908, "grad_norm": 0.6447706460657421, "learning_rate": 3.325301680443762e-06, "loss": 0.3636, "step": 13710 }, { "epoch": 0.6205476352115863, "grad_norm": 0.6682698026092924, "learning_rate": 3.3246111112474578e-06, "loss": 0.3289, "step": 13711 }, { "epoch": 0.6205928943199819, "grad_norm": 0.6591637616635906, "learning_rate": 3.3239205780501134e-06, "loss": 0.3497, "step": 13712 }, { "epoch": 0.6206381534283775, "grad_norm": 0.6754149109862705, "learning_rate": 3.3232300808665703e-06, "loss": 0.2837, "step": 13713 }, { "epoch": 0.6206834125367731, "grad_norm": 0.6193464763113535, "learning_rate": 3.3225396197116616e-06, "loss": 0.3218, "step": 13714 }, { "epoch": 0.6207286716451685, "grad_norm": 0.6162007209511707, "learning_rate": 3.321849194600225e-06, "loss": 0.328, "step": 13715 }, { "epoch": 0.6207739307535641, "grad_norm": 0.6253423668804438, "learning_rate": 3.321158805547096e-06, "loss": 0.2941, "step": 13716 }, { "epoch": 0.6208191898619597, "grad_norm": 0.6661802237947486, "learning_rate": 3.320468452567106e-06, "loss": 0.3408, "step": 13717 }, { "epoch": 0.6208644489703553, "grad_norm": 0.6118279427263077, "learning_rate": 3.319778135675092e-06, "loss": 0.3066, "step": 13718 }, { "epoch": 0.6209097080787509, "grad_norm": 0.6382050988995399, "learning_rate": 3.3190878548858862e-06, "loss": 0.3464, "step": 13719 }, { "epoch": 0.6209549671871464, "grad_norm": 0.6587494163288371, "learning_rate": 3.318397610214319e-06, "loss": 0.2996, "step": 13720 }, { "epoch": 0.621000226295542, "grad_norm": 0.7209291907841151, "learning_rate": 3.317707401675221e-06, "loss": 0.2783, "step": 13721 }, { "epoch": 0.6210454854039376, "grad_norm": 0.5451826261203058, "learning_rate": 3.317017229283428e-06, "loss": 0.2755, "step": 13722 }, { "epoch": 0.6210907445123331, "grad_norm": 0.6646853793016391, "learning_rate": 3.3163270930537623e-06, "loss": 0.3281, "step": 13723 }, { "epoch": 0.6211360036207286, "grad_norm": 0.7129622897575655, "learning_rate": 3.3156369930010574e-06, "loss": 0.2968, "step": 13724 }, { "epoch": 0.6211812627291242, "grad_norm": 0.6001944567292407, "learning_rate": 3.3149469291401413e-06, "loss": 0.3496, "step": 13725 }, { "epoch": 0.6212265218375198, "grad_norm": 0.31675565312106924, "learning_rate": 3.3142569014858395e-06, "loss": 0.4998, "step": 13726 }, { "epoch": 0.6212717809459154, "grad_norm": 0.5826162656953073, "learning_rate": 3.313566910052979e-06, "loss": 0.2726, "step": 13727 }, { "epoch": 0.6213170400543109, "grad_norm": 0.6090598312717707, "learning_rate": 3.3128769548563864e-06, "loss": 0.3287, "step": 13728 }, { "epoch": 0.6213622991627065, "grad_norm": 0.6458666374249951, "learning_rate": 3.312187035910888e-06, "loss": 0.3374, "step": 13729 }, { "epoch": 0.6214075582711021, "grad_norm": 0.583242363800884, "learning_rate": 3.3114971532313058e-06, "loss": 0.3267, "step": 13730 }, { "epoch": 0.6214528173794976, "grad_norm": 0.6149879962998592, "learning_rate": 3.310807306832462e-06, "loss": 0.2917, "step": 13731 }, { "epoch": 0.6214980764878932, "grad_norm": 0.622620677794301, "learning_rate": 3.310117496729184e-06, "loss": 0.3231, "step": 13732 }, { "epoch": 0.6215433355962887, "grad_norm": 0.6379068039069801, "learning_rate": 3.309427722936289e-06, "loss": 0.3134, "step": 13733 }, { "epoch": 0.6215885947046843, "grad_norm": 0.6829768054297026, "learning_rate": 3.308737985468601e-06, "loss": 0.3594, "step": 13734 }, { "epoch": 0.6216338538130799, "grad_norm": 0.6276077247995031, "learning_rate": 3.3080482843409402e-06, "loss": 0.3395, "step": 13735 }, { "epoch": 0.6216791129214755, "grad_norm": 0.5825313407520454, "learning_rate": 3.307358619568123e-06, "loss": 0.2992, "step": 13736 }, { "epoch": 0.621724372029871, "grad_norm": 0.6154254435580296, "learning_rate": 3.3066689911649714e-06, "loss": 0.3046, "step": 13737 }, { "epoch": 0.6217696311382666, "grad_norm": 0.30001794513072544, "learning_rate": 3.305979399146304e-06, "loss": 0.4916, "step": 13738 }, { "epoch": 0.6218148902466621, "grad_norm": 0.29097572170197145, "learning_rate": 3.305289843526935e-06, "loss": 0.4988, "step": 13739 }, { "epoch": 0.6218601493550577, "grad_norm": 0.6322721344768839, "learning_rate": 3.304600324321682e-06, "loss": 0.3225, "step": 13740 }, { "epoch": 0.6219054084634533, "grad_norm": 0.6406997502751267, "learning_rate": 3.3039108415453614e-06, "loss": 0.2876, "step": 13741 }, { "epoch": 0.6219506675718488, "grad_norm": 0.6260918992215052, "learning_rate": 3.303221395212789e-06, "loss": 0.3173, "step": 13742 }, { "epoch": 0.6219959266802444, "grad_norm": 0.2673962411768744, "learning_rate": 3.302531985338776e-06, "loss": 0.4688, "step": 13743 }, { "epoch": 0.62204118578864, "grad_norm": 0.6301571532531297, "learning_rate": 3.3018426119381364e-06, "loss": 0.3163, "step": 13744 }, { "epoch": 0.6220864448970356, "grad_norm": 0.28869214485696515, "learning_rate": 3.3011532750256874e-06, "loss": 0.4791, "step": 13745 }, { "epoch": 0.622131704005431, "grad_norm": 0.2968194690831498, "learning_rate": 3.300463974616234e-06, "loss": 0.5083, "step": 13746 }, { "epoch": 0.6221769631138266, "grad_norm": 0.2750714673096283, "learning_rate": 3.2997747107245898e-06, "loss": 0.4796, "step": 13747 }, { "epoch": 0.6222222222222222, "grad_norm": 0.6431241474261704, "learning_rate": 3.2990854833655674e-06, "loss": 0.3564, "step": 13748 }, { "epoch": 0.6222674813306178, "grad_norm": 0.3403552256461183, "learning_rate": 3.298396292553972e-06, "loss": 0.5099, "step": 13749 }, { "epoch": 0.6223127404390133, "grad_norm": 0.5901200168171835, "learning_rate": 3.2977071383046134e-06, "loss": 0.3121, "step": 13750 }, { "epoch": 0.6223579995474089, "grad_norm": 0.5680916309917373, "learning_rate": 3.297018020632304e-06, "loss": 0.3034, "step": 13751 }, { "epoch": 0.6224032586558045, "grad_norm": 0.2810511524918745, "learning_rate": 3.2963289395518434e-06, "loss": 0.4828, "step": 13752 }, { "epoch": 0.6224485177642001, "grad_norm": 0.6593233991957315, "learning_rate": 3.295639895078042e-06, "loss": 0.3447, "step": 13753 }, { "epoch": 0.6224937768725957, "grad_norm": 0.6151045949479557, "learning_rate": 3.294950887225707e-06, "loss": 0.2901, "step": 13754 }, { "epoch": 0.6225390359809911, "grad_norm": 0.2867113598124026, "learning_rate": 3.294261916009639e-06, "loss": 0.478, "step": 13755 }, { "epoch": 0.6225842950893867, "grad_norm": 0.6284771216660319, "learning_rate": 3.2935729814446426e-06, "loss": 0.3057, "step": 13756 }, { "epoch": 0.6226295541977823, "grad_norm": 0.6213561568312568, "learning_rate": 3.2928840835455233e-06, "loss": 0.3069, "step": 13757 }, { "epoch": 0.6226748133061779, "grad_norm": 0.5948747707066405, "learning_rate": 3.2921952223270824e-06, "loss": 0.3111, "step": 13758 }, { "epoch": 0.6227200724145734, "grad_norm": 0.6322336320261707, "learning_rate": 3.2915063978041205e-06, "loss": 0.2965, "step": 13759 }, { "epoch": 0.622765331522969, "grad_norm": 0.6610317478714497, "learning_rate": 3.290817609991438e-06, "loss": 0.3032, "step": 13760 }, { "epoch": 0.6228105906313646, "grad_norm": 0.6406700715494363, "learning_rate": 3.290128858903837e-06, "loss": 0.3142, "step": 13761 }, { "epoch": 0.6228558497397602, "grad_norm": 0.5882719121004318, "learning_rate": 3.2894401445561154e-06, "loss": 0.2807, "step": 13762 }, { "epoch": 0.6229011088481556, "grad_norm": 0.6216161164157775, "learning_rate": 3.2887514669630706e-06, "loss": 0.2932, "step": 13763 }, { "epoch": 0.6229463679565512, "grad_norm": 0.6124949720716838, "learning_rate": 3.2880628261395033e-06, "loss": 0.3113, "step": 13764 }, { "epoch": 0.6229916270649468, "grad_norm": 0.5685852505759508, "learning_rate": 3.287374222100205e-06, "loss": 0.3131, "step": 13765 }, { "epoch": 0.6230368861733424, "grad_norm": 0.5748963189048516, "learning_rate": 3.2866856548599757e-06, "loss": 0.2906, "step": 13766 }, { "epoch": 0.623082145281738, "grad_norm": 0.6265152952502434, "learning_rate": 3.2859971244336107e-06, "loss": 0.313, "step": 13767 }, { "epoch": 0.6231274043901335, "grad_norm": 0.6210138216125896, "learning_rate": 3.285308630835903e-06, "loss": 0.326, "step": 13768 }, { "epoch": 0.6231726634985291, "grad_norm": 0.6093822225680078, "learning_rate": 3.2846201740816446e-06, "loss": 0.278, "step": 13769 }, { "epoch": 0.6232179226069247, "grad_norm": 0.6318730141083124, "learning_rate": 3.2839317541856317e-06, "loss": 0.3086, "step": 13770 }, { "epoch": 0.6232631817153202, "grad_norm": 0.6329790145727102, "learning_rate": 3.2832433711626562e-06, "loss": 0.3258, "step": 13771 }, { "epoch": 0.6233084408237157, "grad_norm": 0.5543442506257924, "learning_rate": 3.282555025027507e-06, "loss": 0.3155, "step": 13772 }, { "epoch": 0.6233536999321113, "grad_norm": 0.6117032449228192, "learning_rate": 3.2818667157949742e-06, "loss": 0.3133, "step": 13773 }, { "epoch": 0.6233989590405069, "grad_norm": 0.6131769415951153, "learning_rate": 3.281178443479852e-06, "loss": 0.314, "step": 13774 }, { "epoch": 0.6234442181489025, "grad_norm": 0.754090728019248, "learning_rate": 3.2804902080969233e-06, "loss": 0.3104, "step": 13775 }, { "epoch": 0.6234894772572981, "grad_norm": 0.4622437492790766, "learning_rate": 3.2798020096609795e-06, "loss": 0.4743, "step": 13776 }, { "epoch": 0.6235347363656936, "grad_norm": 0.5918545279223919, "learning_rate": 3.2791138481868084e-06, "loss": 0.3231, "step": 13777 }, { "epoch": 0.6235799954740892, "grad_norm": 0.6049844320425433, "learning_rate": 3.2784257236891948e-06, "loss": 0.3062, "step": 13778 }, { "epoch": 0.6236252545824847, "grad_norm": 0.6220890451444148, "learning_rate": 3.2777376361829237e-06, "loss": 0.2907, "step": 13779 }, { "epoch": 0.6236705136908803, "grad_norm": 0.5917243166323288, "learning_rate": 3.2770495856827834e-06, "loss": 0.3416, "step": 13780 }, { "epoch": 0.6237157727992758, "grad_norm": 0.3768603010876834, "learning_rate": 3.2763615722035548e-06, "loss": 0.4654, "step": 13781 }, { "epoch": 0.6237610319076714, "grad_norm": 1.1432024816430995, "learning_rate": 3.275673595760022e-06, "loss": 0.3292, "step": 13782 }, { "epoch": 0.623806291016067, "grad_norm": 0.6519811601739103, "learning_rate": 3.274985656366967e-06, "loss": 0.3377, "step": 13783 }, { "epoch": 0.6238515501244626, "grad_norm": 0.6088029234300733, "learning_rate": 3.2742977540391747e-06, "loss": 0.284, "step": 13784 }, { "epoch": 0.6238968092328581, "grad_norm": 0.6060950905983736, "learning_rate": 3.273609888791422e-06, "loss": 0.3033, "step": 13785 }, { "epoch": 0.6239420683412537, "grad_norm": 1.1703658968985153, "learning_rate": 3.2729220606384905e-06, "loss": 0.2962, "step": 13786 }, { "epoch": 0.6239873274496492, "grad_norm": 0.3148542397514561, "learning_rate": 3.2722342695951612e-06, "loss": 0.4635, "step": 13787 }, { "epoch": 0.6240325865580448, "grad_norm": 0.5937671301737225, "learning_rate": 3.2715465156762095e-06, "loss": 0.3219, "step": 13788 }, { "epoch": 0.6240778456664404, "grad_norm": 0.6898405827371933, "learning_rate": 3.2708587988964134e-06, "loss": 0.3268, "step": 13789 }, { "epoch": 0.6241231047748359, "grad_norm": 0.6329187007223473, "learning_rate": 3.270171119270554e-06, "loss": 0.3403, "step": 13790 }, { "epoch": 0.6241683638832315, "grad_norm": 0.6249047775111826, "learning_rate": 3.269483476813403e-06, "loss": 0.3126, "step": 13791 }, { "epoch": 0.6242136229916271, "grad_norm": 0.287356452626235, "learning_rate": 3.2687958715397373e-06, "loss": 0.4665, "step": 13792 }, { "epoch": 0.6242588821000227, "grad_norm": 0.30557783476155476, "learning_rate": 3.2681083034643323e-06, "loss": 0.4719, "step": 13793 }, { "epoch": 0.6243041412084182, "grad_norm": 0.6428013543053296, "learning_rate": 3.2674207726019586e-06, "loss": 0.3011, "step": 13794 }, { "epoch": 0.6243494003168137, "grad_norm": 0.6440078992859094, "learning_rate": 3.2667332789673923e-06, "loss": 0.3334, "step": 13795 }, { "epoch": 0.6243946594252093, "grad_norm": 0.6103591359768376, "learning_rate": 3.2660458225754053e-06, "loss": 0.291, "step": 13796 }, { "epoch": 0.6244399185336049, "grad_norm": 0.5711343505430136, "learning_rate": 3.2653584034407677e-06, "loss": 0.3366, "step": 13797 }, { "epoch": 0.6244851776420004, "grad_norm": 0.6753677755634805, "learning_rate": 3.264671021578249e-06, "loss": 0.2947, "step": 13798 }, { "epoch": 0.624530436750396, "grad_norm": 0.647574021925651, "learning_rate": 3.2639836770026215e-06, "loss": 0.3245, "step": 13799 }, { "epoch": 0.6245756958587916, "grad_norm": 0.6507967950709432, "learning_rate": 3.2632963697286546e-06, "loss": 0.3375, "step": 13800 }, { "epoch": 0.6246209549671872, "grad_norm": 0.7578310633233802, "learning_rate": 3.262609099771113e-06, "loss": 0.3414, "step": 13801 }, { "epoch": 0.6246662140755828, "grad_norm": 0.7524822672827859, "learning_rate": 3.261921867144765e-06, "loss": 0.317, "step": 13802 }, { "epoch": 0.6247114731839782, "grad_norm": 0.6484206522561464, "learning_rate": 3.2612346718643818e-06, "loss": 0.3302, "step": 13803 }, { "epoch": 0.6247567322923738, "grad_norm": 0.6192293017632162, "learning_rate": 3.2605475139447207e-06, "loss": 0.2862, "step": 13804 }, { "epoch": 0.6248019914007694, "grad_norm": 0.6352264745867553, "learning_rate": 3.2598603934005535e-06, "loss": 0.3266, "step": 13805 }, { "epoch": 0.624847250509165, "grad_norm": 0.6771239055173687, "learning_rate": 3.259173310246643e-06, "loss": 0.3212, "step": 13806 }, { "epoch": 0.6248925096175605, "grad_norm": 0.6036924888487291, "learning_rate": 3.25848626449775e-06, "loss": 0.3014, "step": 13807 }, { "epoch": 0.6249377687259561, "grad_norm": 0.5917879949587903, "learning_rate": 3.2577992561686377e-06, "loss": 0.3092, "step": 13808 }, { "epoch": 0.6249830278343517, "grad_norm": 0.6359248283898935, "learning_rate": 3.2571122852740703e-06, "loss": 0.3168, "step": 13809 }, { "epoch": 0.6250282869427473, "grad_norm": 0.6815532910505805, "learning_rate": 3.256425351828807e-06, "loss": 0.3212, "step": 13810 }, { "epoch": 0.6250735460511428, "grad_norm": 0.6523054381738229, "learning_rate": 3.2557384558476067e-06, "loss": 0.3324, "step": 13811 }, { "epoch": 0.6251188051595383, "grad_norm": 0.36420794672238715, "learning_rate": 3.2550515973452295e-06, "loss": 0.4923, "step": 13812 }, { "epoch": 0.6251640642679339, "grad_norm": 0.6536888411766718, "learning_rate": 3.2543647763364362e-06, "loss": 0.3078, "step": 13813 }, { "epoch": 0.6252093233763295, "grad_norm": 0.6278223232996571, "learning_rate": 3.2536779928359818e-06, "loss": 0.2874, "step": 13814 }, { "epoch": 0.6252545824847251, "grad_norm": 0.6031600414981914, "learning_rate": 3.252991246858623e-06, "loss": 0.3187, "step": 13815 }, { "epoch": 0.6252998415931206, "grad_norm": 0.8775349594361912, "learning_rate": 3.2523045384191186e-06, "loss": 0.3119, "step": 13816 }, { "epoch": 0.6253451007015162, "grad_norm": 0.6574777145007359, "learning_rate": 3.25161786753222e-06, "loss": 0.3461, "step": 13817 }, { "epoch": 0.6253903598099118, "grad_norm": 0.6385496332359393, "learning_rate": 3.2509312342126846e-06, "loss": 0.3365, "step": 13818 }, { "epoch": 0.6254356189183073, "grad_norm": 0.5592285709201028, "learning_rate": 3.250244638475266e-06, "loss": 0.319, "step": 13819 }, { "epoch": 0.6254808780267028, "grad_norm": 0.3097990064128394, "learning_rate": 3.249558080334716e-06, "loss": 0.48, "step": 13820 }, { "epoch": 0.6255261371350984, "grad_norm": 0.27962557719584863, "learning_rate": 3.2488715598057856e-06, "loss": 0.4853, "step": 13821 }, { "epoch": 0.625571396243494, "grad_norm": 0.9387839473662496, "learning_rate": 3.2481850769032287e-06, "loss": 0.3368, "step": 13822 }, { "epoch": 0.6256166553518896, "grad_norm": 0.6884232226794563, "learning_rate": 3.2474986316417923e-06, "loss": 0.317, "step": 13823 }, { "epoch": 0.6256619144602852, "grad_norm": 0.2886797804451184, "learning_rate": 3.2468122240362287e-06, "loss": 0.4574, "step": 13824 }, { "epoch": 0.6257071735686807, "grad_norm": 0.6355643364928295, "learning_rate": 3.246125854101287e-06, "loss": 0.3144, "step": 13825 }, { "epoch": 0.6257524326770763, "grad_norm": 0.6015479752126067, "learning_rate": 3.2454395218517132e-06, "loss": 0.3257, "step": 13826 }, { "epoch": 0.6257976917854718, "grad_norm": 0.6398553600797272, "learning_rate": 3.2447532273022536e-06, "loss": 0.2851, "step": 13827 }, { "epoch": 0.6258429508938674, "grad_norm": 0.6120003353559114, "learning_rate": 3.244066970467658e-06, "loss": 0.2759, "step": 13828 }, { "epoch": 0.6258882100022629, "grad_norm": 0.6475841047237043, "learning_rate": 3.2433807513626714e-06, "loss": 0.2943, "step": 13829 }, { "epoch": 0.6259334691106585, "grad_norm": 0.6480570558579203, "learning_rate": 3.242694570002036e-06, "loss": 0.321, "step": 13830 }, { "epoch": 0.6259787282190541, "grad_norm": 0.6847686675984392, "learning_rate": 3.2420084264004966e-06, "loss": 0.297, "step": 13831 }, { "epoch": 0.6260239873274497, "grad_norm": 0.6184729580512613, "learning_rate": 3.2413223205727995e-06, "loss": 0.3233, "step": 13832 }, { "epoch": 0.6260692464358452, "grad_norm": 0.63232383158112, "learning_rate": 3.240636252533681e-06, "loss": 0.285, "step": 13833 }, { "epoch": 0.6261145055442408, "grad_norm": 1.6564727417893674, "learning_rate": 3.2399502222978875e-06, "loss": 0.328, "step": 13834 }, { "epoch": 0.6261597646526363, "grad_norm": 0.6573502586519927, "learning_rate": 3.239264229880159e-06, "loss": 0.2903, "step": 13835 }, { "epoch": 0.6262050237610319, "grad_norm": 0.6437835906829803, "learning_rate": 3.2385782752952336e-06, "loss": 0.2902, "step": 13836 }, { "epoch": 0.6262502828694275, "grad_norm": 0.6287223521759137, "learning_rate": 3.2378923585578504e-06, "loss": 0.3426, "step": 13837 }, { "epoch": 0.626295541977823, "grad_norm": 0.6936936100572169, "learning_rate": 3.237206479682751e-06, "loss": 0.3348, "step": 13838 }, { "epoch": 0.6263408010862186, "grad_norm": 0.6140669836485039, "learning_rate": 3.236520638684668e-06, "loss": 0.3061, "step": 13839 }, { "epoch": 0.6263860601946142, "grad_norm": 0.6145631143983877, "learning_rate": 3.235834835578341e-06, "loss": 0.3029, "step": 13840 }, { "epoch": 0.6264313193030098, "grad_norm": 0.6446262287679247, "learning_rate": 3.235149070378504e-06, "loss": 0.3551, "step": 13841 }, { "epoch": 0.6264765784114052, "grad_norm": 0.41859033443519317, "learning_rate": 3.2344633430998955e-06, "loss": 0.4625, "step": 13842 }, { "epoch": 0.6265218375198008, "grad_norm": 0.6986814785352065, "learning_rate": 3.233777653757246e-06, "loss": 0.2925, "step": 13843 }, { "epoch": 0.6265670966281964, "grad_norm": 0.6146056230805496, "learning_rate": 3.2330920023652906e-06, "loss": 0.2838, "step": 13844 }, { "epoch": 0.626612355736592, "grad_norm": 0.6248541826618237, "learning_rate": 3.2324063889387624e-06, "loss": 0.2855, "step": 13845 }, { "epoch": 0.6266576148449875, "grad_norm": 0.29127147352835037, "learning_rate": 3.2317208134923895e-06, "loss": 0.4494, "step": 13846 }, { "epoch": 0.6267028739533831, "grad_norm": 0.5426443301129628, "learning_rate": 3.2310352760409067e-06, "loss": 0.4794, "step": 13847 }, { "epoch": 0.6267481330617787, "grad_norm": 0.256103861392261, "learning_rate": 3.2303497765990445e-06, "loss": 0.4425, "step": 13848 }, { "epoch": 0.6267933921701743, "grad_norm": 0.6424238921109408, "learning_rate": 3.229664315181529e-06, "loss": 0.3382, "step": 13849 }, { "epoch": 0.6268386512785699, "grad_norm": 0.27190227539063644, "learning_rate": 3.2289788918030894e-06, "loss": 0.4543, "step": 13850 }, { "epoch": 0.6268839103869653, "grad_norm": 0.6729867835394304, "learning_rate": 3.228293506478457e-06, "loss": 0.3145, "step": 13851 }, { "epoch": 0.6269291694953609, "grad_norm": 0.6188356839840186, "learning_rate": 3.227608159222353e-06, "loss": 0.3033, "step": 13852 }, { "epoch": 0.6269744286037565, "grad_norm": 0.5950567912039953, "learning_rate": 3.2269228500495066e-06, "loss": 0.2733, "step": 13853 }, { "epoch": 0.6270196877121521, "grad_norm": 0.6284604076040228, "learning_rate": 3.2262375789746426e-06, "loss": 0.2744, "step": 13854 }, { "epoch": 0.6270649468205476, "grad_norm": 0.726216221839223, "learning_rate": 3.225552346012487e-06, "loss": 0.3253, "step": 13855 }, { "epoch": 0.6271102059289432, "grad_norm": 0.3194964820786272, "learning_rate": 3.22486715117776e-06, "loss": 0.4537, "step": 13856 }, { "epoch": 0.6271554650373388, "grad_norm": 0.7452714533400248, "learning_rate": 3.224181994485186e-06, "loss": 0.2999, "step": 13857 }, { "epoch": 0.6272007241457344, "grad_norm": 0.5865218640010116, "learning_rate": 3.2234968759494883e-06, "loss": 0.2673, "step": 13858 }, { "epoch": 0.62724598325413, "grad_norm": 0.625521131109761, "learning_rate": 3.2228117955853853e-06, "loss": 0.2728, "step": 13859 }, { "epoch": 0.6272912423625254, "grad_norm": 0.6312711646093929, "learning_rate": 3.2221267534075986e-06, "loss": 0.3171, "step": 13860 }, { "epoch": 0.627336501470921, "grad_norm": 0.6587997158261353, "learning_rate": 3.221441749430849e-06, "loss": 0.3308, "step": 13861 }, { "epoch": 0.6273817605793166, "grad_norm": 0.3269749147811726, "learning_rate": 3.220756783669852e-06, "loss": 0.4494, "step": 13862 }, { "epoch": 0.6274270196877122, "grad_norm": 0.6300477574521686, "learning_rate": 3.2200718561393283e-06, "loss": 0.3255, "step": 13863 }, { "epoch": 0.6274722787961077, "grad_norm": 0.7105608912769671, "learning_rate": 3.2193869668539947e-06, "loss": 0.3018, "step": 13864 }, { "epoch": 0.6275175379045033, "grad_norm": 0.6095324079681443, "learning_rate": 3.2187021158285646e-06, "loss": 0.3009, "step": 13865 }, { "epoch": 0.6275627970128989, "grad_norm": 0.585902388220184, "learning_rate": 3.2180173030777552e-06, "loss": 0.3255, "step": 13866 }, { "epoch": 0.6276080561212944, "grad_norm": 0.6738114149519168, "learning_rate": 3.2173325286162825e-06, "loss": 0.324, "step": 13867 }, { "epoch": 0.6276533152296899, "grad_norm": 0.6705044356390757, "learning_rate": 3.216647792458858e-06, "loss": 0.3191, "step": 13868 }, { "epoch": 0.6276985743380855, "grad_norm": 0.6308960620317975, "learning_rate": 3.215963094620195e-06, "loss": 0.3322, "step": 13869 }, { "epoch": 0.6277438334464811, "grad_norm": 0.6366572702729194, "learning_rate": 3.215278435115005e-06, "loss": 0.3152, "step": 13870 }, { "epoch": 0.6277890925548767, "grad_norm": 0.6480624211144015, "learning_rate": 3.2145938139580015e-06, "loss": 0.2958, "step": 13871 }, { "epoch": 0.6278343516632723, "grad_norm": 0.6289197414985261, "learning_rate": 3.2139092311638932e-06, "loss": 0.3296, "step": 13872 }, { "epoch": 0.6278796107716678, "grad_norm": 0.5775930571137048, "learning_rate": 3.2132246867473892e-06, "loss": 0.3154, "step": 13873 }, { "epoch": 0.6279248698800634, "grad_norm": 0.6611953191837328, "learning_rate": 3.2125401807232008e-06, "loss": 0.3109, "step": 13874 }, { "epoch": 0.6279701289884589, "grad_norm": 0.6291090272339493, "learning_rate": 3.2118557131060323e-06, "loss": 0.3177, "step": 13875 }, { "epoch": 0.6280153880968545, "grad_norm": 0.6368641840116436, "learning_rate": 3.211171283910593e-06, "loss": 0.3307, "step": 13876 }, { "epoch": 0.62806064720525, "grad_norm": 0.2998450724408988, "learning_rate": 3.21048689315159e-06, "loss": 0.4719, "step": 13877 }, { "epoch": 0.6281059063136456, "grad_norm": 0.3263249370296473, "learning_rate": 3.209802540843727e-06, "loss": 0.4801, "step": 13878 }, { "epoch": 0.6281511654220412, "grad_norm": 0.6287527403460311, "learning_rate": 3.2091182270017073e-06, "loss": 0.3182, "step": 13879 }, { "epoch": 0.6281964245304368, "grad_norm": 0.6882364507965004, "learning_rate": 3.208433951640241e-06, "loss": 0.2956, "step": 13880 }, { "epoch": 0.6282416836388323, "grad_norm": 0.6499708052175333, "learning_rate": 3.207749714774023e-06, "loss": 0.2979, "step": 13881 }, { "epoch": 0.6282869427472279, "grad_norm": 0.27344950178467453, "learning_rate": 3.20706551641776e-06, "loss": 0.4612, "step": 13882 }, { "epoch": 0.6283322018556234, "grad_norm": 0.6161048425175149, "learning_rate": 3.206381356586151e-06, "loss": 0.3316, "step": 13883 }, { "epoch": 0.628377460964019, "grad_norm": 0.5950934825653854, "learning_rate": 3.205697235293902e-06, "loss": 0.3029, "step": 13884 }, { "epoch": 0.6284227200724146, "grad_norm": 0.5932586492807993, "learning_rate": 3.205013152555705e-06, "loss": 0.3429, "step": 13885 }, { "epoch": 0.6284679791808101, "grad_norm": 0.666508282891372, "learning_rate": 3.2043291083862636e-06, "loss": 0.311, "step": 13886 }, { "epoch": 0.6285132382892057, "grad_norm": 0.5829000266122752, "learning_rate": 3.203645102800276e-06, "loss": 0.2759, "step": 13887 }, { "epoch": 0.6285584973976013, "grad_norm": 0.6251673579480885, "learning_rate": 3.202961135812437e-06, "loss": 0.3163, "step": 13888 }, { "epoch": 0.6286037565059969, "grad_norm": 0.6107779886231521, "learning_rate": 3.2022772074374424e-06, "loss": 0.3294, "step": 13889 }, { "epoch": 0.6286490156143923, "grad_norm": 0.6090162862844448, "learning_rate": 3.2015933176899915e-06, "loss": 0.3132, "step": 13890 }, { "epoch": 0.6286942747227879, "grad_norm": 0.6906544995735248, "learning_rate": 3.2009094665847763e-06, "loss": 0.3301, "step": 13891 }, { "epoch": 0.6287395338311835, "grad_norm": 0.6114037897338339, "learning_rate": 3.200225654136491e-06, "loss": 0.3585, "step": 13892 }, { "epoch": 0.6287847929395791, "grad_norm": 0.6393766558008107, "learning_rate": 3.19954188035983e-06, "loss": 0.3176, "step": 13893 }, { "epoch": 0.6288300520479747, "grad_norm": 0.7565660542118592, "learning_rate": 3.1988581452694815e-06, "loss": 0.3214, "step": 13894 }, { "epoch": 0.6288753111563702, "grad_norm": 0.624572649998261, "learning_rate": 3.1981744488801416e-06, "loss": 0.3344, "step": 13895 }, { "epoch": 0.6289205702647658, "grad_norm": 0.33261469621894185, "learning_rate": 3.1974907912064986e-06, "loss": 0.4628, "step": 13896 }, { "epoch": 0.6289658293731614, "grad_norm": 0.7448596327261254, "learning_rate": 3.1968071722632432e-06, "loss": 0.3199, "step": 13897 }, { "epoch": 0.629011088481557, "grad_norm": 0.5851809149186197, "learning_rate": 3.196123592065063e-06, "loss": 0.3175, "step": 13898 }, { "epoch": 0.6290563475899524, "grad_norm": 0.6151664808855503, "learning_rate": 3.1954400506266453e-06, "loss": 0.3231, "step": 13899 }, { "epoch": 0.629101606698348, "grad_norm": 0.61384899017604, "learning_rate": 3.194756547962681e-06, "loss": 0.3243, "step": 13900 }, { "epoch": 0.6291468658067436, "grad_norm": 0.6155705262342234, "learning_rate": 3.1940730840878532e-06, "loss": 0.2846, "step": 13901 }, { "epoch": 0.6291921249151392, "grad_norm": 0.6030867830465274, "learning_rate": 3.193389659016848e-06, "loss": 0.323, "step": 13902 }, { "epoch": 0.6292373840235347, "grad_norm": 0.5851968191163726, "learning_rate": 3.192706272764351e-06, "loss": 0.2796, "step": 13903 }, { "epoch": 0.6292826431319303, "grad_norm": 0.6389463259250212, "learning_rate": 3.192022925345044e-06, "loss": 0.33, "step": 13904 }, { "epoch": 0.6293279022403259, "grad_norm": 0.6699153846748191, "learning_rate": 3.191339616773612e-06, "loss": 0.3088, "step": 13905 }, { "epoch": 0.6293731613487215, "grad_norm": 0.6763927720203098, "learning_rate": 3.190656347064739e-06, "loss": 0.3375, "step": 13906 }, { "epoch": 0.629418420457117, "grad_norm": 0.6410174157777444, "learning_rate": 3.189973116233103e-06, "loss": 0.3117, "step": 13907 }, { "epoch": 0.6294636795655125, "grad_norm": 0.613551811374985, "learning_rate": 3.1892899242933834e-06, "loss": 0.3496, "step": 13908 }, { "epoch": 0.6295089386739081, "grad_norm": 0.606707201595439, "learning_rate": 3.1886067712602656e-06, "loss": 0.3513, "step": 13909 }, { "epoch": 0.6295541977823037, "grad_norm": 0.6494404568011635, "learning_rate": 3.1879236571484224e-06, "loss": 0.3551, "step": 13910 }, { "epoch": 0.6295994568906993, "grad_norm": 0.5912183003606637, "learning_rate": 3.1872405819725356e-06, "loss": 0.2697, "step": 13911 }, { "epoch": 0.6296447159990948, "grad_norm": 0.6521680921138964, "learning_rate": 3.1865575457472797e-06, "loss": 0.2802, "step": 13912 }, { "epoch": 0.6296899751074904, "grad_norm": 0.5875091213842563, "learning_rate": 3.1858745484873356e-06, "loss": 0.2909, "step": 13913 }, { "epoch": 0.629735234215886, "grad_norm": 0.6159229710750475, "learning_rate": 3.1851915902073734e-06, "loss": 0.3577, "step": 13914 }, { "epoch": 0.6297804933242815, "grad_norm": 0.6312091121112716, "learning_rate": 3.184508670922071e-06, "loss": 0.3101, "step": 13915 }, { "epoch": 0.629825752432677, "grad_norm": 0.6328397534853961, "learning_rate": 3.1838257906461016e-06, "loss": 0.3303, "step": 13916 }, { "epoch": 0.6298710115410726, "grad_norm": 0.6220419513305495, "learning_rate": 3.183142949394138e-06, "loss": 0.2836, "step": 13917 }, { "epoch": 0.6299162706494682, "grad_norm": 0.6483485814812089, "learning_rate": 3.1824601471808504e-06, "loss": 0.3122, "step": 13918 }, { "epoch": 0.6299615297578638, "grad_norm": 0.6153026674344495, "learning_rate": 3.181777384020915e-06, "loss": 0.3569, "step": 13919 }, { "epoch": 0.6300067888662594, "grad_norm": 1.1590618909167922, "learning_rate": 3.1810946599289983e-06, "loss": 0.3436, "step": 13920 }, { "epoch": 0.6300520479746549, "grad_norm": 0.6600727506975453, "learning_rate": 3.1804119749197703e-06, "loss": 0.3378, "step": 13921 }, { "epoch": 0.6300973070830505, "grad_norm": 0.570883949381704, "learning_rate": 3.179729329007902e-06, "loss": 0.2901, "step": 13922 }, { "epoch": 0.630142566191446, "grad_norm": 0.6916262353759627, "learning_rate": 3.179046722208058e-06, "loss": 0.3714, "step": 13923 }, { "epoch": 0.6301878252998416, "grad_norm": 0.580028197703851, "learning_rate": 3.1783641545349074e-06, "loss": 0.3072, "step": 13924 }, { "epoch": 0.6302330844082371, "grad_norm": 0.2973455929085098, "learning_rate": 3.1776816260031172e-06, "loss": 0.4392, "step": 13925 }, { "epoch": 0.6302783435166327, "grad_norm": 0.6103130810803539, "learning_rate": 3.1769991366273533e-06, "loss": 0.2949, "step": 13926 }, { "epoch": 0.6303236026250283, "grad_norm": 0.6174746681694337, "learning_rate": 3.1763166864222766e-06, "loss": 0.3511, "step": 13927 }, { "epoch": 0.6303688617334239, "grad_norm": 0.6284170932260028, "learning_rate": 3.175634275402555e-06, "loss": 0.309, "step": 13928 }, { "epoch": 0.6304141208418195, "grad_norm": 0.6609520162027985, "learning_rate": 3.1749519035828495e-06, "loss": 0.3246, "step": 13929 }, { "epoch": 0.630459379950215, "grad_norm": 0.7255600667588301, "learning_rate": 3.1742695709778222e-06, "loss": 0.325, "step": 13930 }, { "epoch": 0.6305046390586105, "grad_norm": 0.5971870426282323, "learning_rate": 3.1735872776021344e-06, "loss": 0.2919, "step": 13931 }, { "epoch": 0.6305498981670061, "grad_norm": 0.6410748277342618, "learning_rate": 3.1729050234704474e-06, "loss": 0.3205, "step": 13932 }, { "epoch": 0.6305951572754017, "grad_norm": 0.3077461694511978, "learning_rate": 3.1722228085974183e-06, "loss": 0.4587, "step": 13933 }, { "epoch": 0.6306404163837972, "grad_norm": 0.2873055720141124, "learning_rate": 3.1715406329977083e-06, "loss": 0.4757, "step": 13934 }, { "epoch": 0.6306856754921928, "grad_norm": 0.6291234455503854, "learning_rate": 3.1708584966859745e-06, "loss": 0.2686, "step": 13935 }, { "epoch": 0.6307309346005884, "grad_norm": 0.2676697398113157, "learning_rate": 3.1701763996768744e-06, "loss": 0.4715, "step": 13936 }, { "epoch": 0.630776193708984, "grad_norm": 0.2841129202382441, "learning_rate": 3.1694943419850616e-06, "loss": 0.4649, "step": 13937 }, { "epoch": 0.6308214528173794, "grad_norm": 0.5972951884939902, "learning_rate": 3.1688123236251967e-06, "loss": 0.2485, "step": 13938 }, { "epoch": 0.630866711925775, "grad_norm": 0.6976197308832803, "learning_rate": 3.1681303446119277e-06, "loss": 0.3125, "step": 13939 }, { "epoch": 0.6309119710341706, "grad_norm": 0.6712376562482888, "learning_rate": 3.167448404959913e-06, "loss": 0.3349, "step": 13940 }, { "epoch": 0.6309572301425662, "grad_norm": 0.581681353721388, "learning_rate": 3.166766504683802e-06, "loss": 0.3106, "step": 13941 }, { "epoch": 0.6310024892509618, "grad_norm": 0.3447944047961648, "learning_rate": 3.166084643798252e-06, "loss": 0.494, "step": 13942 }, { "epoch": 0.6310477483593573, "grad_norm": 0.6397469634467333, "learning_rate": 3.165402822317908e-06, "loss": 0.3649, "step": 13943 }, { "epoch": 0.6310930074677529, "grad_norm": 1.0218459586067286, "learning_rate": 3.1647210402574223e-06, "loss": 0.2909, "step": 13944 }, { "epoch": 0.6311382665761485, "grad_norm": 0.5746655024858726, "learning_rate": 3.1640392976314472e-06, "loss": 0.3024, "step": 13945 }, { "epoch": 0.6311835256845441, "grad_norm": 0.6052059717797788, "learning_rate": 3.1633575944546273e-06, "loss": 0.2883, "step": 13946 }, { "epoch": 0.6312287847929395, "grad_norm": 0.6117422105639195, "learning_rate": 3.162675930741611e-06, "loss": 0.302, "step": 13947 }, { "epoch": 0.6312740439013351, "grad_norm": 0.2782623106961924, "learning_rate": 3.161994306507048e-06, "loss": 0.457, "step": 13948 }, { "epoch": 0.6313193030097307, "grad_norm": 0.6013180195685133, "learning_rate": 3.1613127217655814e-06, "loss": 0.309, "step": 13949 }, { "epoch": 0.6313645621181263, "grad_norm": 0.6245021539144215, "learning_rate": 3.160631176531858e-06, "loss": 0.2948, "step": 13950 }, { "epoch": 0.6314098212265218, "grad_norm": 0.6155792584363897, "learning_rate": 3.1599496708205212e-06, "loss": 0.2789, "step": 13951 }, { "epoch": 0.6314550803349174, "grad_norm": 0.5400955697561185, "learning_rate": 3.159268204646213e-06, "loss": 0.4629, "step": 13952 }, { "epoch": 0.631500339443313, "grad_norm": 0.6008466199890503, "learning_rate": 3.158586778023579e-06, "loss": 0.2852, "step": 13953 }, { "epoch": 0.6315455985517086, "grad_norm": 0.6247523900052501, "learning_rate": 3.1579053909672597e-06, "loss": 0.2784, "step": 13954 }, { "epoch": 0.6315908576601041, "grad_norm": 0.624934184912694, "learning_rate": 3.1572240434918975e-06, "loss": 0.3079, "step": 13955 }, { "epoch": 0.6316361167684996, "grad_norm": 0.6361115372253076, "learning_rate": 3.156542735612128e-06, "loss": 0.3273, "step": 13956 }, { "epoch": 0.6316813758768952, "grad_norm": 0.2770654450453919, "learning_rate": 3.1558614673425946e-06, "loss": 0.4658, "step": 13957 }, { "epoch": 0.6317266349852908, "grad_norm": 0.26875663521487186, "learning_rate": 3.1551802386979356e-06, "loss": 0.4614, "step": 13958 }, { "epoch": 0.6317718940936864, "grad_norm": 0.8911871688377059, "learning_rate": 3.1544990496927864e-06, "loss": 0.2884, "step": 13959 }, { "epoch": 0.6318171532020819, "grad_norm": 0.6482525152061249, "learning_rate": 3.1538179003417836e-06, "loss": 0.3352, "step": 13960 }, { "epoch": 0.6318624123104775, "grad_norm": 0.6406084975429104, "learning_rate": 3.1531367906595665e-06, "loss": 0.3274, "step": 13961 }, { "epoch": 0.631907671418873, "grad_norm": 0.6568044044364002, "learning_rate": 3.1524557206607655e-06, "loss": 0.3397, "step": 13962 }, { "epoch": 0.6319529305272686, "grad_norm": 0.6302651681053412, "learning_rate": 3.1517746903600173e-06, "loss": 0.3109, "step": 13963 }, { "epoch": 0.6319981896356642, "grad_norm": 0.27192785144332376, "learning_rate": 3.1510936997719557e-06, "loss": 0.4441, "step": 13964 }, { "epoch": 0.6320434487440597, "grad_norm": 0.7875442893777145, "learning_rate": 3.1504127489112105e-06, "loss": 0.3169, "step": 13965 }, { "epoch": 0.6320887078524553, "grad_norm": 0.2886598711250437, "learning_rate": 3.149731837792414e-06, "loss": 0.4735, "step": 13966 }, { "epoch": 0.6321339669608509, "grad_norm": 0.5776455071697102, "learning_rate": 3.149050966430199e-06, "loss": 0.3278, "step": 13967 }, { "epoch": 0.6321792260692465, "grad_norm": 0.6388254430484523, "learning_rate": 3.148370134839195e-06, "loss": 0.3332, "step": 13968 }, { "epoch": 0.632224485177642, "grad_norm": 0.6266131600997259, "learning_rate": 3.1476893430340282e-06, "loss": 0.326, "step": 13969 }, { "epoch": 0.6322697442860375, "grad_norm": 0.6778133379330554, "learning_rate": 3.147008591029328e-06, "loss": 0.3019, "step": 13970 }, { "epoch": 0.6323150033944331, "grad_norm": 0.5863638792175061, "learning_rate": 3.1463278788397256e-06, "loss": 0.3309, "step": 13971 }, { "epoch": 0.6323602625028287, "grad_norm": 0.6172203148817229, "learning_rate": 3.1456472064798403e-06, "loss": 0.2714, "step": 13972 }, { "epoch": 0.6324055216112242, "grad_norm": 0.2877354389046555, "learning_rate": 3.144966573964302e-06, "loss": 0.472, "step": 13973 }, { "epoch": 0.6324507807196198, "grad_norm": 0.26938771291979924, "learning_rate": 3.1442859813077364e-06, "loss": 0.4476, "step": 13974 }, { "epoch": 0.6324960398280154, "grad_norm": 0.6809283680606592, "learning_rate": 3.1436054285247645e-06, "loss": 0.3145, "step": 13975 }, { "epoch": 0.632541298936411, "grad_norm": 0.6095048093758612, "learning_rate": 3.1429249156300094e-06, "loss": 0.3005, "step": 13976 }, { "epoch": 0.6325865580448066, "grad_norm": 0.5992126505470327, "learning_rate": 3.1422444426380964e-06, "loss": 0.3093, "step": 13977 }, { "epoch": 0.632631817153202, "grad_norm": 0.5853842538818939, "learning_rate": 3.1415640095636436e-06, "loss": 0.2985, "step": 13978 }, { "epoch": 0.6326770762615976, "grad_norm": 0.5870067931295567, "learning_rate": 3.1408836164212724e-06, "loss": 0.3389, "step": 13979 }, { "epoch": 0.6327223353699932, "grad_norm": 0.588258900006323, "learning_rate": 3.140203263225604e-06, "loss": 0.2815, "step": 13980 }, { "epoch": 0.6327675944783888, "grad_norm": 0.644122902292735, "learning_rate": 3.139522949991253e-06, "loss": 0.2697, "step": 13981 }, { "epoch": 0.6328128535867843, "grad_norm": 0.7828996185510263, "learning_rate": 3.1388426767328408e-06, "loss": 0.26, "step": 13982 }, { "epoch": 0.6328581126951799, "grad_norm": 0.6284030766321109, "learning_rate": 3.138162443464983e-06, "loss": 0.3584, "step": 13983 }, { "epoch": 0.6329033718035755, "grad_norm": 0.6178985167885237, "learning_rate": 3.137482250202298e-06, "loss": 0.3107, "step": 13984 }, { "epoch": 0.6329486309119711, "grad_norm": 0.6510107780176263, "learning_rate": 3.1368020969593967e-06, "loss": 0.3128, "step": 13985 }, { "epoch": 0.6329938900203665, "grad_norm": 0.7311425717620409, "learning_rate": 3.136121983750897e-06, "loss": 0.3402, "step": 13986 }, { "epoch": 0.6330391491287621, "grad_norm": 0.6064276109335814, "learning_rate": 3.1354419105914127e-06, "loss": 0.3011, "step": 13987 }, { "epoch": 0.6330844082371577, "grad_norm": 0.6193044912933445, "learning_rate": 3.1347618774955534e-06, "loss": 0.3333, "step": 13988 }, { "epoch": 0.6331296673455533, "grad_norm": 0.6514901654955743, "learning_rate": 3.134081884477932e-06, "loss": 0.304, "step": 13989 }, { "epoch": 0.6331749264539489, "grad_norm": 0.7694051174167692, "learning_rate": 3.133401931553163e-06, "loss": 0.3437, "step": 13990 }, { "epoch": 0.6332201855623444, "grad_norm": 0.6834306358109133, "learning_rate": 3.1327220187358515e-06, "loss": 0.2673, "step": 13991 }, { "epoch": 0.63326544467074, "grad_norm": 0.6285507208508668, "learning_rate": 3.1320421460406093e-06, "loss": 0.3151, "step": 13992 }, { "epoch": 0.6333107037791356, "grad_norm": 0.29874642143798624, "learning_rate": 3.1313623134820454e-06, "loss": 0.4871, "step": 13993 }, { "epoch": 0.6333559628875312, "grad_norm": 0.6455014552179699, "learning_rate": 3.1306825210747654e-06, "loss": 0.3333, "step": 13994 }, { "epoch": 0.6334012219959266, "grad_norm": 0.6898874640279824, "learning_rate": 3.130002768833376e-06, "loss": 0.3577, "step": 13995 }, { "epoch": 0.6334464811043222, "grad_norm": 0.6025726694594395, "learning_rate": 3.1293230567724843e-06, "loss": 0.3018, "step": 13996 }, { "epoch": 0.6334917402127178, "grad_norm": 0.6278449707753837, "learning_rate": 3.1286433849066965e-06, "loss": 0.3003, "step": 13997 }, { "epoch": 0.6335369993211134, "grad_norm": 0.6014718230979809, "learning_rate": 3.1279637532506134e-06, "loss": 0.2732, "step": 13998 }, { "epoch": 0.633582258429509, "grad_norm": 0.2878032134378391, "learning_rate": 3.1272841618188388e-06, "loss": 0.478, "step": 13999 }, { "epoch": 0.6336275175379045, "grad_norm": 0.38175351117616113, "learning_rate": 3.1266046106259784e-06, "loss": 0.4708, "step": 14000 }, { "epoch": 0.6336727766463001, "grad_norm": 0.6449141988543913, "learning_rate": 3.1259250996866296e-06, "loss": 0.3235, "step": 14001 }, { "epoch": 0.6337180357546957, "grad_norm": 0.7625685410368898, "learning_rate": 3.1252456290153952e-06, "loss": 0.2579, "step": 14002 }, { "epoch": 0.6337632948630912, "grad_norm": 0.6300075430673482, "learning_rate": 3.124566198626875e-06, "loss": 0.2912, "step": 14003 }, { "epoch": 0.6338085539714867, "grad_norm": 0.26417259947224064, "learning_rate": 3.1238868085356656e-06, "loss": 0.4758, "step": 14004 }, { "epoch": 0.6338538130798823, "grad_norm": 0.5877373818985486, "learning_rate": 3.1232074587563667e-06, "loss": 0.3039, "step": 14005 }, { "epoch": 0.6338990721882779, "grad_norm": 0.6570464396043647, "learning_rate": 3.1225281493035776e-06, "loss": 0.3232, "step": 14006 }, { "epoch": 0.6339443312966735, "grad_norm": 0.6492915172805375, "learning_rate": 3.12184888019189e-06, "loss": 0.2726, "step": 14007 }, { "epoch": 0.633989590405069, "grad_norm": 0.6045436003490696, "learning_rate": 3.121169651435903e-06, "loss": 0.319, "step": 14008 }, { "epoch": 0.6340348495134646, "grad_norm": 0.6003389466902463, "learning_rate": 3.12049046305021e-06, "loss": 0.3086, "step": 14009 }, { "epoch": 0.6340801086218602, "grad_norm": 0.6248635843347558, "learning_rate": 3.1198113150494026e-06, "loss": 0.3019, "step": 14010 }, { "epoch": 0.6341253677302557, "grad_norm": 0.6040295162437355, "learning_rate": 3.1191322074480766e-06, "loss": 0.278, "step": 14011 }, { "epoch": 0.6341706268386513, "grad_norm": 0.5699431480642985, "learning_rate": 3.118453140260823e-06, "loss": 0.2921, "step": 14012 }, { "epoch": 0.6342158859470468, "grad_norm": 0.6460627176672942, "learning_rate": 3.1177741135022334e-06, "loss": 0.3305, "step": 14013 }, { "epoch": 0.6342611450554424, "grad_norm": 0.6028203619114058, "learning_rate": 3.1170951271868953e-06, "loss": 0.318, "step": 14014 }, { "epoch": 0.634306404163838, "grad_norm": 0.6705489517107884, "learning_rate": 3.1164161813294014e-06, "loss": 0.3098, "step": 14015 }, { "epoch": 0.6343516632722336, "grad_norm": 0.6932377918562581, "learning_rate": 3.1157372759443396e-06, "loss": 0.2868, "step": 14016 }, { "epoch": 0.6343969223806291, "grad_norm": 0.5981430645831732, "learning_rate": 3.1150584110462955e-06, "loss": 0.2307, "step": 14017 }, { "epoch": 0.6344421814890246, "grad_norm": 0.6158121063213936, "learning_rate": 3.114379586649856e-06, "loss": 0.3063, "step": 14018 }, { "epoch": 0.6344874405974202, "grad_norm": 0.6415675114557254, "learning_rate": 3.1137008027696113e-06, "loss": 0.3035, "step": 14019 }, { "epoch": 0.6345326997058158, "grad_norm": 0.6342444158352719, "learning_rate": 3.1130220594201395e-06, "loss": 0.2814, "step": 14020 }, { "epoch": 0.6345779588142113, "grad_norm": 0.6398484780157805, "learning_rate": 3.1123433566160293e-06, "loss": 0.368, "step": 14021 }, { "epoch": 0.6346232179226069, "grad_norm": 0.6312930733263915, "learning_rate": 3.1116646943718642e-06, "loss": 0.351, "step": 14022 }, { "epoch": 0.6346684770310025, "grad_norm": 0.606712973490306, "learning_rate": 3.110986072702224e-06, "loss": 0.2892, "step": 14023 }, { "epoch": 0.6347137361393981, "grad_norm": 0.6732129537847016, "learning_rate": 3.1103074916216903e-06, "loss": 0.3021, "step": 14024 }, { "epoch": 0.6347589952477937, "grad_norm": 0.5789095832572656, "learning_rate": 3.1096289511448464e-06, "loss": 0.32, "step": 14025 }, { "epoch": 0.6348042543561891, "grad_norm": 0.5983393100348733, "learning_rate": 3.108950451286271e-06, "loss": 0.329, "step": 14026 }, { "epoch": 0.6348495134645847, "grad_norm": 0.3569180718168922, "learning_rate": 3.1082719920605413e-06, "loss": 0.4729, "step": 14027 }, { "epoch": 0.6348947725729803, "grad_norm": 0.6096092009852317, "learning_rate": 3.107593573482236e-06, "loss": 0.2957, "step": 14028 }, { "epoch": 0.6349400316813759, "grad_norm": 0.5723108604017576, "learning_rate": 3.106915195565935e-06, "loss": 0.3194, "step": 14029 }, { "epoch": 0.6349852907897714, "grad_norm": 0.29577567478017497, "learning_rate": 3.1062368583262103e-06, "loss": 0.4703, "step": 14030 }, { "epoch": 0.635030549898167, "grad_norm": 0.6064904924551691, "learning_rate": 3.1055585617776397e-06, "loss": 0.3194, "step": 14031 }, { "epoch": 0.6350758090065626, "grad_norm": 0.2820866429678342, "learning_rate": 3.104880305934799e-06, "loss": 0.4598, "step": 14032 }, { "epoch": 0.6351210681149582, "grad_norm": 0.7375791329775107, "learning_rate": 3.104202090812257e-06, "loss": 0.2889, "step": 14033 }, { "epoch": 0.6351663272233538, "grad_norm": 0.6573603364676751, "learning_rate": 3.1035239164245913e-06, "loss": 0.2739, "step": 14034 }, { "epoch": 0.6352115863317492, "grad_norm": 0.6032982927863696, "learning_rate": 3.1028457827863723e-06, "loss": 0.2995, "step": 14035 }, { "epoch": 0.6352568454401448, "grad_norm": 0.28255894706465395, "learning_rate": 3.1021676899121703e-06, "loss": 0.4676, "step": 14036 }, { "epoch": 0.6353021045485404, "grad_norm": 0.6202280264903082, "learning_rate": 3.101489637816555e-06, "loss": 0.2935, "step": 14037 }, { "epoch": 0.635347363656936, "grad_norm": 1.0338433813847048, "learning_rate": 3.1008116265140974e-06, "loss": 0.2959, "step": 14038 }, { "epoch": 0.6353926227653315, "grad_norm": 0.6175401370396773, "learning_rate": 3.100133656019366e-06, "loss": 0.2817, "step": 14039 }, { "epoch": 0.6354378818737271, "grad_norm": 0.28522691456631716, "learning_rate": 3.0994557263469267e-06, "loss": 0.4735, "step": 14040 }, { "epoch": 0.6354831409821227, "grad_norm": 0.5962872012390195, "learning_rate": 3.0987778375113464e-06, "loss": 0.286, "step": 14041 }, { "epoch": 0.6355284000905183, "grad_norm": 0.28706614162773536, "learning_rate": 3.0980999895271923e-06, "loss": 0.4837, "step": 14042 }, { "epoch": 0.6355736591989137, "grad_norm": 0.6489585933566904, "learning_rate": 3.0974221824090263e-06, "loss": 0.3296, "step": 14043 }, { "epoch": 0.6356189183073093, "grad_norm": 0.5938771744151462, "learning_rate": 3.096744416171415e-06, "loss": 0.31, "step": 14044 }, { "epoch": 0.6356641774157049, "grad_norm": 0.295895117425673, "learning_rate": 3.0960666908289217e-06, "loss": 0.4782, "step": 14045 }, { "epoch": 0.6357094365241005, "grad_norm": 0.7077041445755813, "learning_rate": 3.095389006396107e-06, "loss": 0.3508, "step": 14046 }, { "epoch": 0.6357546956324961, "grad_norm": 0.6472413942804193, "learning_rate": 3.0947113628875327e-06, "loss": 0.3049, "step": 14047 }, { "epoch": 0.6357999547408916, "grad_norm": 0.6015905003099128, "learning_rate": 3.094033760317761e-06, "loss": 0.2803, "step": 14048 }, { "epoch": 0.6358452138492872, "grad_norm": 0.6162874944280102, "learning_rate": 3.0933561987013484e-06, "loss": 0.3106, "step": 14049 }, { "epoch": 0.6358904729576828, "grad_norm": 0.5711546807757878, "learning_rate": 3.092678678052855e-06, "loss": 0.3219, "step": 14050 }, { "epoch": 0.6359357320660783, "grad_norm": 0.6153270001605267, "learning_rate": 3.0920011983868413e-06, "loss": 0.3423, "step": 14051 }, { "epoch": 0.6359809911744738, "grad_norm": 0.29725827368371055, "learning_rate": 3.0913237597178603e-06, "loss": 0.4676, "step": 14052 }, { "epoch": 0.6360262502828694, "grad_norm": 0.5803809341027548, "learning_rate": 3.0906463620604688e-06, "loss": 0.3056, "step": 14053 }, { "epoch": 0.636071509391265, "grad_norm": 0.5935044897307553, "learning_rate": 3.089969005429223e-06, "loss": 0.2788, "step": 14054 }, { "epoch": 0.6361167684996606, "grad_norm": 0.7871930290907165, "learning_rate": 3.089291689838679e-06, "loss": 0.3317, "step": 14055 }, { "epoch": 0.6361620276080561, "grad_norm": 0.5927085282311848, "learning_rate": 3.088614415303387e-06, "loss": 0.2705, "step": 14056 }, { "epoch": 0.6362072867164517, "grad_norm": 0.5601225069918039, "learning_rate": 3.0879371818379e-06, "loss": 0.2861, "step": 14057 }, { "epoch": 0.6362525458248472, "grad_norm": 0.6310018963259992, "learning_rate": 3.0872599894567723e-06, "loss": 0.2878, "step": 14058 }, { "epoch": 0.6362978049332428, "grad_norm": 0.6657166888741248, "learning_rate": 3.0865828381745515e-06, "loss": 0.3544, "step": 14059 }, { "epoch": 0.6363430640416384, "grad_norm": 0.6768852705295367, "learning_rate": 3.08590572800579e-06, "loss": 0.3219, "step": 14060 }, { "epoch": 0.6363883231500339, "grad_norm": 0.30506889420259875, "learning_rate": 3.085228658965036e-06, "loss": 0.4713, "step": 14061 }, { "epoch": 0.6364335822584295, "grad_norm": 0.5846094679846036, "learning_rate": 3.0845516310668348e-06, "loss": 0.3135, "step": 14062 }, { "epoch": 0.6364788413668251, "grad_norm": 0.5972984714076979, "learning_rate": 3.0838746443257385e-06, "loss": 0.3101, "step": 14063 }, { "epoch": 0.6365241004752207, "grad_norm": 0.6280799877614217, "learning_rate": 3.0831976987562906e-06, "loss": 0.3254, "step": 14064 }, { "epoch": 0.6365693595836162, "grad_norm": 0.7375942101160714, "learning_rate": 3.0825207943730375e-06, "loss": 0.2823, "step": 14065 }, { "epoch": 0.6366146186920117, "grad_norm": 0.6223337925347012, "learning_rate": 3.081843931190522e-06, "loss": 0.2988, "step": 14066 }, { "epoch": 0.6366598778004073, "grad_norm": 0.5629945588989438, "learning_rate": 3.0811671092232896e-06, "loss": 0.2809, "step": 14067 }, { "epoch": 0.6367051369088029, "grad_norm": 0.5995456341261228, "learning_rate": 3.0804903284858844e-06, "loss": 0.3316, "step": 14068 }, { "epoch": 0.6367503960171985, "grad_norm": 0.6028071031391948, "learning_rate": 3.079813588992846e-06, "loss": 0.3203, "step": 14069 }, { "epoch": 0.636795655125594, "grad_norm": 0.6773538683823238, "learning_rate": 3.079136890758715e-06, "loss": 0.3213, "step": 14070 }, { "epoch": 0.6368409142339896, "grad_norm": 0.30538410810758704, "learning_rate": 3.078460233798036e-06, "loss": 0.4667, "step": 14071 }, { "epoch": 0.6368861733423852, "grad_norm": 0.5734688367277772, "learning_rate": 3.077783618125341e-06, "loss": 0.306, "step": 14072 }, { "epoch": 0.6369314324507808, "grad_norm": 0.647864019317746, "learning_rate": 3.0771070437551743e-06, "loss": 0.2836, "step": 14073 }, { "epoch": 0.6369766915591762, "grad_norm": 0.6234597688592606, "learning_rate": 3.076430510702072e-06, "loss": 0.3498, "step": 14074 }, { "epoch": 0.6370219506675718, "grad_norm": 0.5875273985255427, "learning_rate": 3.0757540189805695e-06, "loss": 0.2771, "step": 14075 }, { "epoch": 0.6370672097759674, "grad_norm": 0.5434031054147186, "learning_rate": 3.0750775686052024e-06, "loss": 0.2913, "step": 14076 }, { "epoch": 0.637112468884363, "grad_norm": 0.6519079447295152, "learning_rate": 3.0744011595905084e-06, "loss": 0.3241, "step": 14077 }, { "epoch": 0.6371577279927585, "grad_norm": 0.6046272817777023, "learning_rate": 3.0737247919510182e-06, "loss": 0.316, "step": 14078 }, { "epoch": 0.6372029871011541, "grad_norm": 0.5697051594331598, "learning_rate": 3.073048465701266e-06, "loss": 0.2697, "step": 14079 }, { "epoch": 0.6372482462095497, "grad_norm": 0.6877517308747793, "learning_rate": 3.0723721808557857e-06, "loss": 0.3651, "step": 14080 }, { "epoch": 0.6372935053179453, "grad_norm": 0.7527290715522306, "learning_rate": 3.0716959374291053e-06, "loss": 0.3316, "step": 14081 }, { "epoch": 0.6373387644263409, "grad_norm": 0.6233015972507545, "learning_rate": 3.071019735435756e-06, "loss": 0.3241, "step": 14082 }, { "epoch": 0.6373840235347363, "grad_norm": 0.6072325059612153, "learning_rate": 3.0703435748902693e-06, "loss": 0.3037, "step": 14083 }, { "epoch": 0.6374292826431319, "grad_norm": 0.6523962092443791, "learning_rate": 3.069667455807174e-06, "loss": 0.3355, "step": 14084 }, { "epoch": 0.6374745417515275, "grad_norm": 0.6225076419749288, "learning_rate": 3.068991378200995e-06, "loss": 0.2885, "step": 14085 }, { "epoch": 0.6375198008599231, "grad_norm": 0.6509686915667399, "learning_rate": 3.06831534208626e-06, "loss": 0.3127, "step": 14086 }, { "epoch": 0.6375650599683186, "grad_norm": 0.5662595934973575, "learning_rate": 3.0676393474774972e-06, "loss": 0.2725, "step": 14087 }, { "epoch": 0.6376103190767142, "grad_norm": 0.29967031884905504, "learning_rate": 3.0669633943892294e-06, "loss": 0.4708, "step": 14088 }, { "epoch": 0.6376555781851098, "grad_norm": 0.5931619888481149, "learning_rate": 3.066287482835981e-06, "loss": 0.313, "step": 14089 }, { "epoch": 0.6377008372935054, "grad_norm": 0.6794717002108048, "learning_rate": 3.0656116128322773e-06, "loss": 0.3233, "step": 14090 }, { "epoch": 0.6377460964019008, "grad_norm": 0.6069205047274574, "learning_rate": 3.0649357843926365e-06, "loss": 0.2784, "step": 14091 }, { "epoch": 0.6377913555102964, "grad_norm": 0.5762227454928642, "learning_rate": 3.0642599975315836e-06, "loss": 0.2856, "step": 14092 }, { "epoch": 0.637836614618692, "grad_norm": 0.8781504213385238, "learning_rate": 3.0635842522636392e-06, "loss": 0.2966, "step": 14093 }, { "epoch": 0.6378818737270876, "grad_norm": 0.5917374303485107, "learning_rate": 3.0629085486033217e-06, "loss": 0.3098, "step": 14094 }, { "epoch": 0.6379271328354832, "grad_norm": 0.6064820929771447, "learning_rate": 3.0622328865651486e-06, "loss": 0.2716, "step": 14095 }, { "epoch": 0.6379723919438787, "grad_norm": 0.621100849610007, "learning_rate": 3.06155726616364e-06, "loss": 0.3389, "step": 14096 }, { "epoch": 0.6380176510522743, "grad_norm": 0.6248818555128179, "learning_rate": 3.0608816874133135e-06, "loss": 0.2949, "step": 14097 }, { "epoch": 0.6380629101606698, "grad_norm": 0.6350564177824697, "learning_rate": 3.0602061503286827e-06, "loss": 0.2765, "step": 14098 }, { "epoch": 0.6381081692690654, "grad_norm": 0.6025741385211456, "learning_rate": 3.0595306549242643e-06, "loss": 0.2896, "step": 14099 }, { "epoch": 0.6381534283774609, "grad_norm": 0.7198585599741678, "learning_rate": 3.0588552012145743e-06, "loss": 0.359, "step": 14100 }, { "epoch": 0.6381986874858565, "grad_norm": 0.2987848378295094, "learning_rate": 3.058179789214122e-06, "loss": 0.4593, "step": 14101 }, { "epoch": 0.6382439465942521, "grad_norm": 0.610061918764689, "learning_rate": 3.0575044189374225e-06, "loss": 0.3294, "step": 14102 }, { "epoch": 0.6382892057026477, "grad_norm": 0.5692385277557372, "learning_rate": 3.0568290903989885e-06, "loss": 0.3345, "step": 14103 }, { "epoch": 0.6383344648110432, "grad_norm": 0.6333484754121524, "learning_rate": 3.0561538036133275e-06, "loss": 0.2944, "step": 14104 }, { "epoch": 0.6383797239194388, "grad_norm": 0.5669055540397957, "learning_rate": 3.0554785585949514e-06, "loss": 0.2976, "step": 14105 }, { "epoch": 0.6384249830278343, "grad_norm": 0.6554879825600077, "learning_rate": 3.0548033553583707e-06, "loss": 0.312, "step": 14106 }, { "epoch": 0.6384702421362299, "grad_norm": 0.5691376303708063, "learning_rate": 3.05412819391809e-06, "loss": 0.2654, "step": 14107 }, { "epoch": 0.6385155012446255, "grad_norm": 0.28976649932335347, "learning_rate": 3.0534530742886187e-06, "loss": 0.4849, "step": 14108 }, { "epoch": 0.638560760353021, "grad_norm": 0.6500680482864272, "learning_rate": 3.052777996484462e-06, "loss": 0.3153, "step": 14109 }, { "epoch": 0.6386060194614166, "grad_norm": 0.6022213605742276, "learning_rate": 3.052102960520126e-06, "loss": 0.313, "step": 14110 }, { "epoch": 0.6386512785698122, "grad_norm": 0.26796570792864116, "learning_rate": 3.0514279664101153e-06, "loss": 0.4745, "step": 14111 }, { "epoch": 0.6386965376782078, "grad_norm": 0.5572222325190271, "learning_rate": 3.0507530141689324e-06, "loss": 0.2933, "step": 14112 }, { "epoch": 0.6387417967866033, "grad_norm": 0.6644756722110994, "learning_rate": 3.050078103811082e-06, "loss": 0.3544, "step": 14113 }, { "epoch": 0.6387870558949988, "grad_norm": 0.5716566187790919, "learning_rate": 3.0494032353510634e-06, "loss": 0.3171, "step": 14114 }, { "epoch": 0.6388323150033944, "grad_norm": 0.6707308766695779, "learning_rate": 3.0487284088033776e-06, "loss": 0.3356, "step": 14115 }, { "epoch": 0.63887757411179, "grad_norm": 0.27757105153482103, "learning_rate": 3.0480536241825263e-06, "loss": 0.4822, "step": 14116 }, { "epoch": 0.6389228332201856, "grad_norm": 0.7721032612446138, "learning_rate": 3.047378881503008e-06, "loss": 0.3391, "step": 14117 }, { "epoch": 0.6389680923285811, "grad_norm": 0.2996677332075844, "learning_rate": 3.0467041807793198e-06, "loss": 0.4815, "step": 14118 }, { "epoch": 0.6390133514369767, "grad_norm": 0.633219965755037, "learning_rate": 3.046029522025961e-06, "loss": 0.3202, "step": 14119 }, { "epoch": 0.6390586105453723, "grad_norm": 0.7161682752114296, "learning_rate": 3.045354905257425e-06, "loss": 0.2799, "step": 14120 }, { "epoch": 0.6391038696537679, "grad_norm": 0.5959397462784317, "learning_rate": 3.044680330488209e-06, "loss": 0.3324, "step": 14121 }, { "epoch": 0.6391491287621633, "grad_norm": 0.629765307972555, "learning_rate": 3.0440057977328086e-06, "loss": 0.324, "step": 14122 }, { "epoch": 0.6391943878705589, "grad_norm": 0.6622370050010291, "learning_rate": 3.0433313070057157e-06, "loss": 0.3315, "step": 14123 }, { "epoch": 0.6392396469789545, "grad_norm": 0.2798673125588423, "learning_rate": 3.0426568583214224e-06, "loss": 0.4666, "step": 14124 }, { "epoch": 0.6392849060873501, "grad_norm": 0.6499396878375385, "learning_rate": 3.041982451694422e-06, "loss": 0.3218, "step": 14125 }, { "epoch": 0.6393301651957456, "grad_norm": 0.5736196118655933, "learning_rate": 3.0413080871392063e-06, "loss": 0.3067, "step": 14126 }, { "epoch": 0.6393754243041412, "grad_norm": 0.6391268496601686, "learning_rate": 3.0406337646702638e-06, "loss": 0.3067, "step": 14127 }, { "epoch": 0.6394206834125368, "grad_norm": 1.020993104950133, "learning_rate": 3.039959484302083e-06, "loss": 0.3353, "step": 14128 }, { "epoch": 0.6394659425209324, "grad_norm": 0.6174232971913675, "learning_rate": 3.039285246049155e-06, "loss": 0.2749, "step": 14129 }, { "epoch": 0.639511201629328, "grad_norm": 0.6704099198505801, "learning_rate": 3.0386110499259635e-06, "loss": 0.3148, "step": 14130 }, { "epoch": 0.6395564607377234, "grad_norm": 0.6262980876605436, "learning_rate": 3.0379368959469967e-06, "loss": 0.2975, "step": 14131 }, { "epoch": 0.639601719846119, "grad_norm": 0.6167601797243967, "learning_rate": 3.0372627841267418e-06, "loss": 0.2947, "step": 14132 }, { "epoch": 0.6396469789545146, "grad_norm": 0.6249766585899085, "learning_rate": 3.0365887144796796e-06, "loss": 0.3014, "step": 14133 }, { "epoch": 0.6396922380629102, "grad_norm": 0.5942506404857038, "learning_rate": 3.0359146870202954e-06, "loss": 0.3278, "step": 14134 }, { "epoch": 0.6397374971713057, "grad_norm": 0.5785357614177752, "learning_rate": 3.035240701763074e-06, "loss": 0.2704, "step": 14135 }, { "epoch": 0.6397827562797013, "grad_norm": 0.5573281982727035, "learning_rate": 3.0345667587224946e-06, "loss": 0.2888, "step": 14136 }, { "epoch": 0.6398280153880969, "grad_norm": 0.6203447435262367, "learning_rate": 3.03389285791304e-06, "loss": 0.2976, "step": 14137 }, { "epoch": 0.6398732744964925, "grad_norm": 0.5662763294622898, "learning_rate": 3.0332189993491877e-06, "loss": 0.3066, "step": 14138 }, { "epoch": 0.6399185336048879, "grad_norm": 0.6295862196549189, "learning_rate": 3.0325451830454207e-06, "loss": 0.3161, "step": 14139 }, { "epoch": 0.6399637927132835, "grad_norm": 0.6594638994579315, "learning_rate": 3.031871409016214e-06, "loss": 0.3402, "step": 14140 }, { "epoch": 0.6400090518216791, "grad_norm": 0.6413837854225898, "learning_rate": 3.0311976772760466e-06, "loss": 0.321, "step": 14141 }, { "epoch": 0.6400543109300747, "grad_norm": 0.636807890867043, "learning_rate": 3.0305239878393947e-06, "loss": 0.3008, "step": 14142 }, { "epoch": 0.6400995700384703, "grad_norm": 0.6562468927418301, "learning_rate": 3.0298503407207317e-06, "loss": 0.2723, "step": 14143 }, { "epoch": 0.6401448291468658, "grad_norm": 0.31525091302289265, "learning_rate": 3.029176735934536e-06, "loss": 0.4841, "step": 14144 }, { "epoch": 0.6401900882552614, "grad_norm": 0.5849769289677633, "learning_rate": 3.028503173495279e-06, "loss": 0.2699, "step": 14145 }, { "epoch": 0.640235347363657, "grad_norm": 0.6111170056322482, "learning_rate": 3.0278296534174334e-06, "loss": 0.2883, "step": 14146 }, { "epoch": 0.6402806064720525, "grad_norm": 0.700306479916621, "learning_rate": 3.0271561757154705e-06, "loss": 0.3315, "step": 14147 }, { "epoch": 0.640325865580448, "grad_norm": 0.6045621286992134, "learning_rate": 3.0264827404038655e-06, "loss": 0.3392, "step": 14148 }, { "epoch": 0.6403711246888436, "grad_norm": 0.613524506215485, "learning_rate": 3.0258093474970817e-06, "loss": 0.3427, "step": 14149 }, { "epoch": 0.6404163837972392, "grad_norm": 0.6087397259443168, "learning_rate": 3.0251359970095927e-06, "loss": 0.3591, "step": 14150 }, { "epoch": 0.6404616429056348, "grad_norm": 0.7792919719497309, "learning_rate": 3.024462688955867e-06, "loss": 0.3239, "step": 14151 }, { "epoch": 0.6405069020140304, "grad_norm": 0.6220333276322899, "learning_rate": 3.0237894233503697e-06, "loss": 0.316, "step": 14152 }, { "epoch": 0.6405521611224259, "grad_norm": 0.31641963417420904, "learning_rate": 3.0231162002075678e-06, "loss": 0.4724, "step": 14153 }, { "epoch": 0.6405974202308214, "grad_norm": 0.6263995623866292, "learning_rate": 3.0224430195419274e-06, "loss": 0.2947, "step": 14154 }, { "epoch": 0.640642679339217, "grad_norm": 0.6354504950160101, "learning_rate": 3.021769881367914e-06, "loss": 0.3623, "step": 14155 }, { "epoch": 0.6406879384476126, "grad_norm": 0.6123559216921669, "learning_rate": 3.0210967856999896e-06, "loss": 0.2683, "step": 14156 }, { "epoch": 0.6407331975560081, "grad_norm": 0.6038707224113978, "learning_rate": 3.0204237325526166e-06, "loss": 0.315, "step": 14157 }, { "epoch": 0.6407784566644037, "grad_norm": 0.2684083420193887, "learning_rate": 3.01975072194026e-06, "loss": 0.4695, "step": 14158 }, { "epoch": 0.6408237157727993, "grad_norm": 0.791181978079724, "learning_rate": 3.0190777538773763e-06, "loss": 0.2578, "step": 14159 }, { "epoch": 0.6408689748811949, "grad_norm": 0.5665999544527709, "learning_rate": 3.0184048283784284e-06, "loss": 0.2635, "step": 14160 }, { "epoch": 0.6409142339895904, "grad_norm": 0.6494721148390382, "learning_rate": 3.0177319454578756e-06, "loss": 0.3019, "step": 14161 }, { "epoch": 0.6409594930979859, "grad_norm": 0.6559197261771749, "learning_rate": 3.0170591051301746e-06, "loss": 0.3231, "step": 14162 }, { "epoch": 0.6410047522063815, "grad_norm": 0.29533722371071636, "learning_rate": 3.0163863074097823e-06, "loss": 0.4769, "step": 14163 }, { "epoch": 0.6410500113147771, "grad_norm": 0.7705795692488842, "learning_rate": 3.0157135523111574e-06, "loss": 0.2935, "step": 14164 }, { "epoch": 0.6410952704231727, "grad_norm": 0.8557306437058548, "learning_rate": 3.0150408398487536e-06, "loss": 0.3477, "step": 14165 }, { "epoch": 0.6411405295315682, "grad_norm": 0.6391934999181982, "learning_rate": 3.0143681700370253e-06, "loss": 0.3246, "step": 14166 }, { "epoch": 0.6411857886399638, "grad_norm": 0.27606869359092295, "learning_rate": 3.013695542890426e-06, "loss": 0.4821, "step": 14167 }, { "epoch": 0.6412310477483594, "grad_norm": 0.7056753474223514, "learning_rate": 3.0130229584234117e-06, "loss": 0.2753, "step": 14168 }, { "epoch": 0.641276306856755, "grad_norm": 0.6001043987094551, "learning_rate": 3.0123504166504293e-06, "loss": 0.313, "step": 14169 }, { "epoch": 0.6413215659651504, "grad_norm": 0.6753891660126754, "learning_rate": 3.0116779175859322e-06, "loss": 0.3387, "step": 14170 }, { "epoch": 0.641366825073546, "grad_norm": 0.6610764557175421, "learning_rate": 3.011005461244372e-06, "loss": 0.3401, "step": 14171 }, { "epoch": 0.6414120841819416, "grad_norm": 0.6515044167069902, "learning_rate": 3.010333047640192e-06, "loss": 0.3199, "step": 14172 }, { "epoch": 0.6414573432903372, "grad_norm": 0.6496929131103447, "learning_rate": 3.009660676787846e-06, "loss": 0.3511, "step": 14173 }, { "epoch": 0.6415026023987327, "grad_norm": 0.3124187274189695, "learning_rate": 3.0089883487017803e-06, "loss": 0.4819, "step": 14174 }, { "epoch": 0.6415478615071283, "grad_norm": 0.6471645470233871, "learning_rate": 3.0083160633964385e-06, "loss": 0.3403, "step": 14175 }, { "epoch": 0.6415931206155239, "grad_norm": 0.30259448985732956, "learning_rate": 3.007643820886267e-06, "loss": 0.4696, "step": 14176 }, { "epoch": 0.6416383797239195, "grad_norm": 0.6346368867233724, "learning_rate": 3.0069716211857137e-06, "loss": 0.2916, "step": 14177 }, { "epoch": 0.641683638832315, "grad_norm": 0.8301621225683237, "learning_rate": 3.006299464309216e-06, "loss": 0.2947, "step": 14178 }, { "epoch": 0.6417288979407105, "grad_norm": 0.6255210650197129, "learning_rate": 3.0056273502712203e-06, "loss": 0.329, "step": 14179 }, { "epoch": 0.6417741570491061, "grad_norm": 0.5838680131848357, "learning_rate": 3.004955279086167e-06, "loss": 0.3265, "step": 14180 }, { "epoch": 0.6418194161575017, "grad_norm": 0.31067328632448243, "learning_rate": 3.0042832507685005e-06, "loss": 0.4702, "step": 14181 }, { "epoch": 0.6418646752658973, "grad_norm": 0.6004743983019462, "learning_rate": 3.0036112653326544e-06, "loss": 0.3028, "step": 14182 }, { "epoch": 0.6419099343742928, "grad_norm": 0.551667150339416, "learning_rate": 3.0029393227930712e-06, "loss": 0.2985, "step": 14183 }, { "epoch": 0.6419551934826884, "grad_norm": 0.2957063245086027, "learning_rate": 3.0022674231641903e-06, "loss": 0.4687, "step": 14184 }, { "epoch": 0.642000452591084, "grad_norm": 0.624071202751652, "learning_rate": 3.001595566460446e-06, "loss": 0.3463, "step": 14185 }, { "epoch": 0.6420457116994795, "grad_norm": 0.6038896558156434, "learning_rate": 3.0009237526962735e-06, "loss": 0.3186, "step": 14186 }, { "epoch": 0.6420909708078751, "grad_norm": 0.5568839175147954, "learning_rate": 3.0002519818861126e-06, "loss": 0.3022, "step": 14187 }, { "epoch": 0.6421362299162706, "grad_norm": 0.6025428150532423, "learning_rate": 2.999580254044393e-06, "loss": 0.3807, "step": 14188 }, { "epoch": 0.6421814890246662, "grad_norm": 0.5803168353423076, "learning_rate": 2.9989085691855513e-06, "loss": 0.2849, "step": 14189 }, { "epoch": 0.6422267481330618, "grad_norm": 0.2851186215276721, "learning_rate": 2.9982369273240186e-06, "loss": 0.4689, "step": 14190 }, { "epoch": 0.6422720072414574, "grad_norm": 0.5989723155273211, "learning_rate": 2.9975653284742257e-06, "loss": 0.3065, "step": 14191 }, { "epoch": 0.6423172663498529, "grad_norm": 0.26671375041777434, "learning_rate": 2.996893772650602e-06, "loss": 0.4629, "step": 14192 }, { "epoch": 0.6423625254582485, "grad_norm": 0.6129745855779944, "learning_rate": 2.996222259867582e-06, "loss": 0.3266, "step": 14193 }, { "epoch": 0.642407784566644, "grad_norm": 0.5675197444532076, "learning_rate": 2.9955507901395908e-06, "loss": 0.3478, "step": 14194 }, { "epoch": 0.6424530436750396, "grad_norm": 0.28583493374778624, "learning_rate": 2.994879363481056e-06, "loss": 0.4871, "step": 14195 }, { "epoch": 0.6424983027834351, "grad_norm": 0.60330637142904, "learning_rate": 2.994207979906405e-06, "loss": 0.3435, "step": 14196 }, { "epoch": 0.6425435618918307, "grad_norm": 0.6031206315634002, "learning_rate": 2.993536639430066e-06, "loss": 0.3244, "step": 14197 }, { "epoch": 0.6425888210002263, "grad_norm": 0.6196682127212395, "learning_rate": 2.992865342066461e-06, "loss": 0.2869, "step": 14198 }, { "epoch": 0.6426340801086219, "grad_norm": 0.2655158263326608, "learning_rate": 2.992194087830016e-06, "loss": 0.4426, "step": 14199 }, { "epoch": 0.6426793392170175, "grad_norm": 0.2960483703553002, "learning_rate": 2.991522876735154e-06, "loss": 0.4621, "step": 14200 }, { "epoch": 0.642724598325413, "grad_norm": 0.6015521179132051, "learning_rate": 2.990851708796295e-06, "loss": 0.302, "step": 14201 }, { "epoch": 0.6427698574338085, "grad_norm": 0.5963876428881998, "learning_rate": 2.990180584027863e-06, "loss": 0.3565, "step": 14202 }, { "epoch": 0.6428151165422041, "grad_norm": 0.2691629477578797, "learning_rate": 2.989509502444279e-06, "loss": 0.4862, "step": 14203 }, { "epoch": 0.6428603756505997, "grad_norm": 0.7691217666066809, "learning_rate": 2.98883846405996e-06, "loss": 0.34, "step": 14204 }, { "epoch": 0.6429056347589952, "grad_norm": 0.28863810537045187, "learning_rate": 2.988167468889324e-06, "loss": 0.4938, "step": 14205 }, { "epoch": 0.6429508938673908, "grad_norm": 0.662039036583885, "learning_rate": 2.9874965169467934e-06, "loss": 0.2781, "step": 14206 }, { "epoch": 0.6429961529757864, "grad_norm": 0.6085765306963504, "learning_rate": 2.986825608246779e-06, "loss": 0.2576, "step": 14207 }, { "epoch": 0.643041412084182, "grad_norm": 0.6022563110175009, "learning_rate": 2.9861547428037003e-06, "loss": 0.3167, "step": 14208 }, { "epoch": 0.6430866711925775, "grad_norm": 0.5465923287275317, "learning_rate": 2.9854839206319697e-06, "loss": 0.2836, "step": 14209 }, { "epoch": 0.643131930300973, "grad_norm": 0.28547996312858637, "learning_rate": 2.984813141746006e-06, "loss": 0.4954, "step": 14210 }, { "epoch": 0.6431771894093686, "grad_norm": 0.2592260567682769, "learning_rate": 2.9841424061602153e-06, "loss": 0.4616, "step": 14211 }, { "epoch": 0.6432224485177642, "grad_norm": 0.5807378407030216, "learning_rate": 2.9834717138890145e-06, "loss": 0.275, "step": 14212 }, { "epoch": 0.6432677076261598, "grad_norm": 0.5648631166659934, "learning_rate": 2.9828010649468144e-06, "loss": 0.284, "step": 14213 }, { "epoch": 0.6433129667345553, "grad_norm": 0.5817751136785265, "learning_rate": 2.982130459348022e-06, "loss": 0.3002, "step": 14214 }, { "epoch": 0.6433582258429509, "grad_norm": 0.6588689318366263, "learning_rate": 2.9814598971070487e-06, "loss": 0.3609, "step": 14215 }, { "epoch": 0.6434034849513465, "grad_norm": 0.6150071152737294, "learning_rate": 2.980789378238305e-06, "loss": 0.3373, "step": 14216 }, { "epoch": 0.6434487440597421, "grad_norm": 0.6178255211284963, "learning_rate": 2.980118902756194e-06, "loss": 0.3161, "step": 14217 }, { "epoch": 0.6434940031681375, "grad_norm": 0.6240835861392467, "learning_rate": 2.9794484706751243e-06, "loss": 0.3345, "step": 14218 }, { "epoch": 0.6435392622765331, "grad_norm": 0.6383863142520622, "learning_rate": 2.9787780820095025e-06, "loss": 0.2888, "step": 14219 }, { "epoch": 0.6435845213849287, "grad_norm": 0.6012313492309478, "learning_rate": 2.97810773677373e-06, "loss": 0.2687, "step": 14220 }, { "epoch": 0.6436297804933243, "grad_norm": 0.6116343534165997, "learning_rate": 2.977437434982214e-06, "loss": 0.2902, "step": 14221 }, { "epoch": 0.6436750396017199, "grad_norm": 0.5824607060502492, "learning_rate": 2.976767176649356e-06, "loss": 0.2817, "step": 14222 }, { "epoch": 0.6437202987101154, "grad_norm": 0.6720034791116571, "learning_rate": 2.9760969617895567e-06, "loss": 0.3218, "step": 14223 }, { "epoch": 0.643765557818511, "grad_norm": 0.30218453334348905, "learning_rate": 2.975426790417218e-06, "loss": 0.4402, "step": 14224 }, { "epoch": 0.6438108169269066, "grad_norm": 0.618944995342216, "learning_rate": 2.974756662546738e-06, "loss": 0.3273, "step": 14225 }, { "epoch": 0.6438560760353021, "grad_norm": 0.27838162273474976, "learning_rate": 2.97408657819252e-06, "loss": 0.4503, "step": 14226 }, { "epoch": 0.6439013351436976, "grad_norm": 0.6088025550095602, "learning_rate": 2.9734165373689577e-06, "loss": 0.2749, "step": 14227 }, { "epoch": 0.6439465942520932, "grad_norm": 0.6119993938372056, "learning_rate": 2.97274654009045e-06, "loss": 0.3192, "step": 14228 }, { "epoch": 0.6439918533604888, "grad_norm": 0.6109517436523327, "learning_rate": 2.972076586371394e-06, "loss": 0.3238, "step": 14229 }, { "epoch": 0.6440371124688844, "grad_norm": 0.6097190740331956, "learning_rate": 2.9714066762261825e-06, "loss": 0.3165, "step": 14230 }, { "epoch": 0.6440823715772799, "grad_norm": 0.6508118809135099, "learning_rate": 2.9707368096692113e-06, "loss": 0.3748, "step": 14231 }, { "epoch": 0.6441276306856755, "grad_norm": 0.5701642957641045, "learning_rate": 2.9700669867148747e-06, "loss": 0.2921, "step": 14232 }, { "epoch": 0.6441728897940711, "grad_norm": 0.5970835905768958, "learning_rate": 2.9693972073775633e-06, "loss": 0.2968, "step": 14233 }, { "epoch": 0.6442181489024666, "grad_norm": 0.2789643423704904, "learning_rate": 2.9687274716716686e-06, "loss": 0.4623, "step": 14234 }, { "epoch": 0.6442634080108622, "grad_norm": 0.6103176597058867, "learning_rate": 2.968057779611585e-06, "loss": 0.2662, "step": 14235 }, { "epoch": 0.6443086671192577, "grad_norm": 0.5945299475385736, "learning_rate": 2.967388131211696e-06, "loss": 0.291, "step": 14236 }, { "epoch": 0.6443539262276533, "grad_norm": 0.6105465442782668, "learning_rate": 2.966718526486394e-06, "loss": 0.3026, "step": 14237 }, { "epoch": 0.6443991853360489, "grad_norm": 0.7439019656806879, "learning_rate": 2.966048965450065e-06, "loss": 0.3068, "step": 14238 }, { "epoch": 0.6444444444444445, "grad_norm": 0.6234767585672157, "learning_rate": 2.9653794481171006e-06, "loss": 0.3215, "step": 14239 }, { "epoch": 0.64448970355284, "grad_norm": 0.6511768410489892, "learning_rate": 2.9647099745018794e-06, "loss": 0.355, "step": 14240 }, { "epoch": 0.6445349626612356, "grad_norm": 0.5577217240585588, "learning_rate": 2.9640405446187915e-06, "loss": 0.2834, "step": 14241 }, { "epoch": 0.6445802217696311, "grad_norm": 0.6636043203449777, "learning_rate": 2.96337115848222e-06, "loss": 0.2845, "step": 14242 }, { "epoch": 0.6446254808780267, "grad_norm": 0.5674048522001951, "learning_rate": 2.9627018161065456e-06, "loss": 0.2784, "step": 14243 }, { "epoch": 0.6446707399864222, "grad_norm": 0.6702110182418993, "learning_rate": 2.962032517506152e-06, "loss": 0.3145, "step": 14244 }, { "epoch": 0.6447159990948178, "grad_norm": 0.6525622517016991, "learning_rate": 2.9613632626954226e-06, "loss": 0.3155, "step": 14245 }, { "epoch": 0.6447612582032134, "grad_norm": 0.7183003943697874, "learning_rate": 2.960694051688734e-06, "loss": 0.2913, "step": 14246 }, { "epoch": 0.644806517311609, "grad_norm": 0.6211610036592395, "learning_rate": 2.960024884500467e-06, "loss": 0.2935, "step": 14247 }, { "epoch": 0.6448517764200046, "grad_norm": 0.2738736290787737, "learning_rate": 2.959355761145001e-06, "loss": 0.4627, "step": 14248 }, { "epoch": 0.6448970355284, "grad_norm": 0.640655623750578, "learning_rate": 2.9586866816367104e-06, "loss": 0.303, "step": 14249 }, { "epoch": 0.6449422946367956, "grad_norm": 0.6000781467813328, "learning_rate": 2.9580176459899747e-06, "loss": 0.3239, "step": 14250 }, { "epoch": 0.6449875537451912, "grad_norm": 0.26561936790025625, "learning_rate": 2.9573486542191682e-06, "loss": 0.4631, "step": 14251 }, { "epoch": 0.6450328128535868, "grad_norm": 0.9501142452737259, "learning_rate": 2.9566797063386665e-06, "loss": 0.2882, "step": 14252 }, { "epoch": 0.6450780719619823, "grad_norm": 0.6272983545080569, "learning_rate": 2.9560108023628403e-06, "loss": 0.3373, "step": 14253 }, { "epoch": 0.6451233310703779, "grad_norm": 0.6390746721660203, "learning_rate": 2.955341942306066e-06, "loss": 0.3377, "step": 14254 }, { "epoch": 0.6451685901787735, "grad_norm": 0.6762475747292197, "learning_rate": 2.9546731261827135e-06, "loss": 0.2609, "step": 14255 }, { "epoch": 0.6452138492871691, "grad_norm": 0.6354579445660217, "learning_rate": 2.9540043540071535e-06, "loss": 0.3167, "step": 14256 }, { "epoch": 0.6452591083955647, "grad_norm": 0.2658740095150606, "learning_rate": 2.953335625793755e-06, "loss": 0.4601, "step": 14257 }, { "epoch": 0.6453043675039601, "grad_norm": 0.6089096635417536, "learning_rate": 2.952666941556891e-06, "loss": 0.3051, "step": 14258 }, { "epoch": 0.6453496266123557, "grad_norm": 0.6345912634128601, "learning_rate": 2.9519983013109233e-06, "loss": 0.3246, "step": 14259 }, { "epoch": 0.6453948857207513, "grad_norm": 0.6944907784932608, "learning_rate": 2.9513297050702238e-06, "loss": 0.3671, "step": 14260 }, { "epoch": 0.6454401448291469, "grad_norm": 0.5834899490919657, "learning_rate": 2.9506611528491574e-06, "loss": 0.3174, "step": 14261 }, { "epoch": 0.6454854039375424, "grad_norm": 0.6705937698241765, "learning_rate": 2.949992644662088e-06, "loss": 0.3125, "step": 14262 }, { "epoch": 0.645530663045938, "grad_norm": 0.5996420799570138, "learning_rate": 2.9493241805233795e-06, "loss": 0.3252, "step": 14263 }, { "epoch": 0.6455759221543336, "grad_norm": 0.26899489308008584, "learning_rate": 2.9486557604473993e-06, "loss": 0.4378, "step": 14264 }, { "epoch": 0.6456211812627292, "grad_norm": 0.7507072498117943, "learning_rate": 2.947987384448503e-06, "loss": 0.3032, "step": 14265 }, { "epoch": 0.6456664403711246, "grad_norm": 0.6252518968544177, "learning_rate": 2.9473190525410573e-06, "loss": 0.3055, "step": 14266 }, { "epoch": 0.6457116994795202, "grad_norm": 0.6217862203131317, "learning_rate": 2.9466507647394193e-06, "loss": 0.3365, "step": 14267 }, { "epoch": 0.6457569585879158, "grad_norm": 0.6136071409273623, "learning_rate": 2.9459825210579534e-06, "loss": 0.2922, "step": 14268 }, { "epoch": 0.6458022176963114, "grad_norm": 0.6190482145623376, "learning_rate": 2.9453143215110113e-06, "loss": 0.2907, "step": 14269 }, { "epoch": 0.645847476804707, "grad_norm": 0.6836419932946133, "learning_rate": 2.9446461661129553e-06, "loss": 0.2979, "step": 14270 }, { "epoch": 0.6458927359131025, "grad_norm": 0.6098661425702997, "learning_rate": 2.9439780548781414e-06, "loss": 0.2856, "step": 14271 }, { "epoch": 0.6459379950214981, "grad_norm": 0.6647327199400282, "learning_rate": 2.9433099878209238e-06, "loss": 0.3018, "step": 14272 }, { "epoch": 0.6459832541298937, "grad_norm": 0.40327286634865583, "learning_rate": 2.9426419649556566e-06, "loss": 0.4411, "step": 14273 }, { "epoch": 0.6460285132382892, "grad_norm": 0.6334619344167035, "learning_rate": 2.941973986296697e-06, "loss": 0.3332, "step": 14274 }, { "epoch": 0.6460737723466847, "grad_norm": 0.6411940748488537, "learning_rate": 2.9413060518583948e-06, "loss": 0.2829, "step": 14275 }, { "epoch": 0.6461190314550803, "grad_norm": 0.5965215268418078, "learning_rate": 2.9406381616551026e-06, "loss": 0.2762, "step": 14276 }, { "epoch": 0.6461642905634759, "grad_norm": 0.6653536857292324, "learning_rate": 2.939970315701173e-06, "loss": 0.3011, "step": 14277 }, { "epoch": 0.6462095496718715, "grad_norm": 0.2752597094240355, "learning_rate": 2.939302514010951e-06, "loss": 0.4659, "step": 14278 }, { "epoch": 0.646254808780267, "grad_norm": 0.6272241354713876, "learning_rate": 2.9386347565987917e-06, "loss": 0.3371, "step": 14279 }, { "epoch": 0.6463000678886626, "grad_norm": 0.64838854296624, "learning_rate": 2.937967043479039e-06, "loss": 0.3134, "step": 14280 }, { "epoch": 0.6463453269970582, "grad_norm": 0.628894173057687, "learning_rate": 2.937299374666044e-06, "loss": 0.3058, "step": 14281 }, { "epoch": 0.6463905861054537, "grad_norm": 0.6724655897300117, "learning_rate": 2.936631750174147e-06, "loss": 0.3215, "step": 14282 }, { "epoch": 0.6464358452138493, "grad_norm": 0.5889457910829546, "learning_rate": 2.9359641700176977e-06, "loss": 0.2922, "step": 14283 }, { "epoch": 0.6464811043222448, "grad_norm": 0.7019494698064456, "learning_rate": 2.935296634211041e-06, "loss": 0.2842, "step": 14284 }, { "epoch": 0.6465263634306404, "grad_norm": 0.30655246499787175, "learning_rate": 2.934629142768517e-06, "loss": 0.469, "step": 14285 }, { "epoch": 0.646571622539036, "grad_norm": 0.6274803319498268, "learning_rate": 2.9339616957044683e-06, "loss": 0.285, "step": 14286 }, { "epoch": 0.6466168816474316, "grad_norm": 0.5830271213214427, "learning_rate": 2.9332942930332404e-06, "loss": 0.3083, "step": 14287 }, { "epoch": 0.6466621407558271, "grad_norm": 0.5883275567298492, "learning_rate": 2.9326269347691675e-06, "loss": 0.2746, "step": 14288 }, { "epoch": 0.6467073998642227, "grad_norm": 0.640035380891334, "learning_rate": 2.931959620926594e-06, "loss": 0.3313, "step": 14289 }, { "epoch": 0.6467526589726182, "grad_norm": 0.6397116725595818, "learning_rate": 2.9312923515198577e-06, "loss": 0.2949, "step": 14290 }, { "epoch": 0.6467979180810138, "grad_norm": 0.8665629168087222, "learning_rate": 2.9306251265632932e-06, "loss": 0.2905, "step": 14291 }, { "epoch": 0.6468431771894094, "grad_norm": 0.6764450931946282, "learning_rate": 2.929957946071239e-06, "loss": 0.3104, "step": 14292 }, { "epoch": 0.6468884362978049, "grad_norm": 0.5662457813127475, "learning_rate": 2.929290810058032e-06, "loss": 0.2939, "step": 14293 }, { "epoch": 0.6469336954062005, "grad_norm": 0.6583252374043029, "learning_rate": 2.928623718538006e-06, "loss": 0.3027, "step": 14294 }, { "epoch": 0.6469789545145961, "grad_norm": 0.6466550381257198, "learning_rate": 2.9279566715254944e-06, "loss": 0.3758, "step": 14295 }, { "epoch": 0.6470242136229917, "grad_norm": 0.7036033115749474, "learning_rate": 2.9272896690348283e-06, "loss": 0.3053, "step": 14296 }, { "epoch": 0.6470694727313872, "grad_norm": 0.3131935441913814, "learning_rate": 2.926622711080345e-06, "loss": 0.4613, "step": 14297 }, { "epoch": 0.6471147318397827, "grad_norm": 0.6368356188862032, "learning_rate": 2.9259557976763686e-06, "loss": 0.2803, "step": 14298 }, { "epoch": 0.6471599909481783, "grad_norm": 0.629431616289445, "learning_rate": 2.9252889288372335e-06, "loss": 0.3066, "step": 14299 }, { "epoch": 0.6472052500565739, "grad_norm": 0.3006838383038752, "learning_rate": 2.9246221045772683e-06, "loss": 0.444, "step": 14300 }, { "epoch": 0.6472505091649694, "grad_norm": 1.038397028608156, "learning_rate": 2.9239553249107985e-06, "loss": 0.3333, "step": 14301 }, { "epoch": 0.647295768273365, "grad_norm": 0.613277700934174, "learning_rate": 2.9232885898521516e-06, "loss": 0.3064, "step": 14302 }, { "epoch": 0.6473410273817606, "grad_norm": 0.6310216092316894, "learning_rate": 2.9226218994156574e-06, "loss": 0.3362, "step": 14303 }, { "epoch": 0.6473862864901562, "grad_norm": 0.6578581401449011, "learning_rate": 2.921955253615637e-06, "loss": 0.2963, "step": 14304 }, { "epoch": 0.6474315455985518, "grad_norm": 0.6242737955561755, "learning_rate": 2.9212886524664164e-06, "loss": 0.3143, "step": 14305 }, { "epoch": 0.6474768047069472, "grad_norm": 0.28960963861197336, "learning_rate": 2.9206220959823183e-06, "loss": 0.4529, "step": 14306 }, { "epoch": 0.6475220638153428, "grad_norm": 0.3077247899182825, "learning_rate": 2.9199555841776637e-06, "loss": 0.4931, "step": 14307 }, { "epoch": 0.6475673229237384, "grad_norm": 0.2732012975551414, "learning_rate": 2.919289117066777e-06, "loss": 0.4699, "step": 14308 }, { "epoch": 0.647612582032134, "grad_norm": 0.6653553447247057, "learning_rate": 2.918622694663975e-06, "loss": 0.3195, "step": 14309 }, { "epoch": 0.6476578411405295, "grad_norm": 0.6963381989202885, "learning_rate": 2.9179563169835808e-06, "loss": 0.3181, "step": 14310 }, { "epoch": 0.6477031002489251, "grad_norm": 0.642825853602677, "learning_rate": 2.9172899840399106e-06, "loss": 0.3637, "step": 14311 }, { "epoch": 0.6477483593573207, "grad_norm": 0.2854289444108715, "learning_rate": 2.9166236958472805e-06, "loss": 0.4687, "step": 14312 }, { "epoch": 0.6477936184657163, "grad_norm": 0.7019237006351507, "learning_rate": 2.9159574524200105e-06, "loss": 0.2866, "step": 14313 }, { "epoch": 0.6478388775741117, "grad_norm": 0.6214376307211359, "learning_rate": 2.915291253772412e-06, "loss": 0.3105, "step": 14314 }, { "epoch": 0.6478841366825073, "grad_norm": 0.6254524080494197, "learning_rate": 2.9146250999188043e-06, "loss": 0.3209, "step": 14315 }, { "epoch": 0.6479293957909029, "grad_norm": 0.57960723110441, "learning_rate": 2.9139589908734977e-06, "loss": 0.3289, "step": 14316 }, { "epoch": 0.6479746548992985, "grad_norm": 0.8486329736280915, "learning_rate": 2.9132929266508043e-06, "loss": 0.2834, "step": 14317 }, { "epoch": 0.6480199140076941, "grad_norm": 0.5990172383692677, "learning_rate": 2.912626907265037e-06, "loss": 0.3208, "step": 14318 }, { "epoch": 0.6480651731160896, "grad_norm": 0.31174741569254, "learning_rate": 2.91196093273051e-06, "loss": 0.4633, "step": 14319 }, { "epoch": 0.6481104322244852, "grad_norm": 0.5803472112943121, "learning_rate": 2.911295003061526e-06, "loss": 0.2711, "step": 14320 }, { "epoch": 0.6481556913328808, "grad_norm": 0.2820889514644517, "learning_rate": 2.910629118272398e-06, "loss": 0.4638, "step": 14321 }, { "epoch": 0.6482009504412763, "grad_norm": 0.6109816105803477, "learning_rate": 2.9099632783774325e-06, "loss": 0.3118, "step": 14322 }, { "epoch": 0.6482462095496718, "grad_norm": 0.6306317742637505, "learning_rate": 2.909297483390941e-06, "loss": 0.2832, "step": 14323 }, { "epoch": 0.6482914686580674, "grad_norm": 0.6263910342435784, "learning_rate": 2.9086317333272218e-06, "loss": 0.3358, "step": 14324 }, { "epoch": 0.648336727766463, "grad_norm": 0.9421587567712512, "learning_rate": 2.9079660282005833e-06, "loss": 0.2469, "step": 14325 }, { "epoch": 0.6483819868748586, "grad_norm": 0.6924369118078506, "learning_rate": 2.907300368025332e-06, "loss": 0.2902, "step": 14326 }, { "epoch": 0.6484272459832541, "grad_norm": 0.6439189029583597, "learning_rate": 2.906634752815768e-06, "loss": 0.3156, "step": 14327 }, { "epoch": 0.6484725050916497, "grad_norm": 0.6731846534259659, "learning_rate": 2.9059691825861926e-06, "loss": 0.294, "step": 14328 }, { "epoch": 0.6485177642000453, "grad_norm": 0.3488924832775132, "learning_rate": 2.9053036573509096e-06, "loss": 0.4662, "step": 14329 }, { "epoch": 0.6485630233084408, "grad_norm": 0.3336693302384131, "learning_rate": 2.904638177124216e-06, "loss": 0.4322, "step": 14330 }, { "epoch": 0.6486082824168364, "grad_norm": 0.6285069632218658, "learning_rate": 2.9039727419204146e-06, "loss": 0.2927, "step": 14331 }, { "epoch": 0.6486535415252319, "grad_norm": 0.6141502723029155, "learning_rate": 2.9033073517538008e-06, "loss": 0.2927, "step": 14332 }, { "epoch": 0.6486988006336275, "grad_norm": 0.6560939059690654, "learning_rate": 2.9026420066386705e-06, "loss": 0.3329, "step": 14333 }, { "epoch": 0.6487440597420231, "grad_norm": 0.6558394371129095, "learning_rate": 2.9019767065893227e-06, "loss": 0.2866, "step": 14334 }, { "epoch": 0.6487893188504187, "grad_norm": 0.6485885431882231, "learning_rate": 2.9013114516200537e-06, "loss": 0.2918, "step": 14335 }, { "epoch": 0.6488345779588142, "grad_norm": 0.6142572990614329, "learning_rate": 2.900646241745156e-06, "loss": 0.3494, "step": 14336 }, { "epoch": 0.6488798370672098, "grad_norm": 0.6310608157762924, "learning_rate": 2.8999810769789204e-06, "loss": 0.3, "step": 14337 }, { "epoch": 0.6489250961756053, "grad_norm": 0.6451028508599663, "learning_rate": 2.899315957335642e-06, "loss": 0.2935, "step": 14338 }, { "epoch": 0.6489703552840009, "grad_norm": 0.38797774658659645, "learning_rate": 2.8986508828296144e-06, "loss": 0.4867, "step": 14339 }, { "epoch": 0.6490156143923965, "grad_norm": 0.7113345408661017, "learning_rate": 2.897985853475125e-06, "loss": 0.2877, "step": 14340 }, { "epoch": 0.649060873500792, "grad_norm": 0.5809836155986944, "learning_rate": 2.8973208692864623e-06, "loss": 0.2924, "step": 14341 }, { "epoch": 0.6491061326091876, "grad_norm": 0.8393594686255713, "learning_rate": 2.896655930277918e-06, "loss": 0.3202, "step": 14342 }, { "epoch": 0.6491513917175832, "grad_norm": 0.6353389822032426, "learning_rate": 2.8959910364637755e-06, "loss": 0.2833, "step": 14343 }, { "epoch": 0.6491966508259788, "grad_norm": 0.6497489518809756, "learning_rate": 2.8953261878583263e-06, "loss": 0.3251, "step": 14344 }, { "epoch": 0.6492419099343743, "grad_norm": 0.6217565026702535, "learning_rate": 2.8946613844758526e-06, "loss": 0.2892, "step": 14345 }, { "epoch": 0.6492871690427698, "grad_norm": 0.682100611004778, "learning_rate": 2.893996626330638e-06, "loss": 0.3525, "step": 14346 }, { "epoch": 0.6493324281511654, "grad_norm": 0.6481556737222688, "learning_rate": 2.8933319134369677e-06, "loss": 0.3273, "step": 14347 }, { "epoch": 0.649377687259561, "grad_norm": 0.5924432907301626, "learning_rate": 2.8926672458091265e-06, "loss": 0.3111, "step": 14348 }, { "epoch": 0.6494229463679565, "grad_norm": 0.35012226221446846, "learning_rate": 2.892002623461394e-06, "loss": 0.4781, "step": 14349 }, { "epoch": 0.6494682054763521, "grad_norm": 0.7342121078319623, "learning_rate": 2.8913380464080487e-06, "loss": 0.311, "step": 14350 }, { "epoch": 0.6495134645847477, "grad_norm": 0.6000927905765256, "learning_rate": 2.890673514663373e-06, "loss": 0.3, "step": 14351 }, { "epoch": 0.6495587236931433, "grad_norm": 1.7539158633690095, "learning_rate": 2.890009028241647e-06, "loss": 0.2875, "step": 14352 }, { "epoch": 0.6496039828015389, "grad_norm": 0.6715777521885723, "learning_rate": 2.8893445871571463e-06, "loss": 0.3334, "step": 14353 }, { "epoch": 0.6496492419099343, "grad_norm": 0.6676157791442822, "learning_rate": 2.8886801914241465e-06, "loss": 0.2926, "step": 14354 }, { "epoch": 0.6496945010183299, "grad_norm": 0.6263183832848004, "learning_rate": 2.8880158410569264e-06, "loss": 0.3457, "step": 14355 }, { "epoch": 0.6497397601267255, "grad_norm": 0.675025958023004, "learning_rate": 2.88735153606976e-06, "loss": 0.2787, "step": 14356 }, { "epoch": 0.6497850192351211, "grad_norm": 0.30587303186604464, "learning_rate": 2.8866872764769183e-06, "loss": 0.4732, "step": 14357 }, { "epoch": 0.6498302783435166, "grad_norm": 0.6729939352178069, "learning_rate": 2.8860230622926787e-06, "loss": 0.3298, "step": 14358 }, { "epoch": 0.6498755374519122, "grad_norm": 0.596371834667015, "learning_rate": 2.885358893531308e-06, "loss": 0.3025, "step": 14359 }, { "epoch": 0.6499207965603078, "grad_norm": 0.8082889237237667, "learning_rate": 2.884694770207083e-06, "loss": 0.33, "step": 14360 }, { "epoch": 0.6499660556687034, "grad_norm": 0.5788032355268777, "learning_rate": 2.8840306923342707e-06, "loss": 0.3092, "step": 14361 }, { "epoch": 0.6500113147770988, "grad_norm": 0.599923225178742, "learning_rate": 2.883366659927138e-06, "loss": 0.3058, "step": 14362 }, { "epoch": 0.6500565738854944, "grad_norm": 0.6499818118745128, "learning_rate": 2.8827026729999553e-06, "loss": 0.3054, "step": 14363 }, { "epoch": 0.65010183299389, "grad_norm": 0.2773497872558239, "learning_rate": 2.882038731566991e-06, "loss": 0.4747, "step": 14364 }, { "epoch": 0.6501470921022856, "grad_norm": 0.6571747074412797, "learning_rate": 2.881374835642509e-06, "loss": 0.2914, "step": 14365 }, { "epoch": 0.6501923512106812, "grad_norm": 0.7022076125813193, "learning_rate": 2.880710985240774e-06, "loss": 0.3449, "step": 14366 }, { "epoch": 0.6502376103190767, "grad_norm": 0.6016931902558902, "learning_rate": 2.8800471803760504e-06, "loss": 0.3139, "step": 14367 }, { "epoch": 0.6502828694274723, "grad_norm": 0.5979079268440192, "learning_rate": 2.8793834210626036e-06, "loss": 0.2877, "step": 14368 }, { "epoch": 0.6503281285358679, "grad_norm": 0.5774696254930354, "learning_rate": 2.878719707314695e-06, "loss": 0.312, "step": 14369 }, { "epoch": 0.6503733876442634, "grad_norm": 0.6664975643935593, "learning_rate": 2.8780560391465828e-06, "loss": 0.2982, "step": 14370 }, { "epoch": 0.6504186467526589, "grad_norm": 0.6311890521309541, "learning_rate": 2.877392416572531e-06, "loss": 0.338, "step": 14371 }, { "epoch": 0.6504639058610545, "grad_norm": 0.6102114402960955, "learning_rate": 2.876728839606795e-06, "loss": 0.2884, "step": 14372 }, { "epoch": 0.6505091649694501, "grad_norm": 0.6672385468388162, "learning_rate": 2.876065308263637e-06, "loss": 0.2693, "step": 14373 }, { "epoch": 0.6505544240778457, "grad_norm": 0.6171736644178366, "learning_rate": 2.875401822557312e-06, "loss": 0.2921, "step": 14374 }, { "epoch": 0.6505996831862413, "grad_norm": 0.6134210902409978, "learning_rate": 2.8747383825020753e-06, "loss": 0.3198, "step": 14375 }, { "epoch": 0.6506449422946368, "grad_norm": 0.6485596815035234, "learning_rate": 2.874074988112183e-06, "loss": 0.3354, "step": 14376 }, { "epoch": 0.6506902014030324, "grad_norm": 0.6538269871511858, "learning_rate": 2.873411639401893e-06, "loss": 0.2816, "step": 14377 }, { "epoch": 0.6507354605114279, "grad_norm": 0.6694802347127378, "learning_rate": 2.8727483363854547e-06, "loss": 0.3235, "step": 14378 }, { "epoch": 0.6507807196198235, "grad_norm": 0.619858970538202, "learning_rate": 2.872085079077119e-06, "loss": 0.3212, "step": 14379 }, { "epoch": 0.650825978728219, "grad_norm": 0.2800973148325299, "learning_rate": 2.8714218674911397e-06, "loss": 0.4462, "step": 14380 }, { "epoch": 0.6508712378366146, "grad_norm": 0.61955442849468, "learning_rate": 2.8707587016417695e-06, "loss": 0.3097, "step": 14381 }, { "epoch": 0.6509164969450102, "grad_norm": 0.5888297171014283, "learning_rate": 2.870095581543255e-06, "loss": 0.3206, "step": 14382 }, { "epoch": 0.6509617560534058, "grad_norm": 0.5962007118260905, "learning_rate": 2.8694325072098434e-06, "loss": 0.3221, "step": 14383 }, { "epoch": 0.6510070151618013, "grad_norm": 0.5353932920407563, "learning_rate": 2.868769478655785e-06, "loss": 0.2852, "step": 14384 }, { "epoch": 0.6510522742701969, "grad_norm": 0.6399218831839524, "learning_rate": 2.868106495895323e-06, "loss": 0.2848, "step": 14385 }, { "epoch": 0.6510975333785924, "grad_norm": 0.6031502416346952, "learning_rate": 2.8674435589427075e-06, "loss": 0.312, "step": 14386 }, { "epoch": 0.651142792486988, "grad_norm": 0.6202017345451638, "learning_rate": 2.86678066781218e-06, "loss": 0.3196, "step": 14387 }, { "epoch": 0.6511880515953836, "grad_norm": 0.6092919726405652, "learning_rate": 2.866117822517982e-06, "loss": 0.2961, "step": 14388 }, { "epoch": 0.6512333107037791, "grad_norm": 0.7608931850326427, "learning_rate": 2.8654550230743605e-06, "loss": 0.2981, "step": 14389 }, { "epoch": 0.6512785698121747, "grad_norm": 0.7392695622420434, "learning_rate": 2.8647922694955544e-06, "loss": 0.3183, "step": 14390 }, { "epoch": 0.6513238289205703, "grad_norm": 0.704571196624388, "learning_rate": 2.8641295617958033e-06, "loss": 0.322, "step": 14391 }, { "epoch": 0.6513690880289659, "grad_norm": 0.282888891059829, "learning_rate": 2.8634668999893477e-06, "loss": 0.4592, "step": 14392 }, { "epoch": 0.6514143471373613, "grad_norm": 0.2980961398662729, "learning_rate": 2.862804284090428e-06, "loss": 0.4833, "step": 14393 }, { "epoch": 0.6514596062457569, "grad_norm": 1.271083687124575, "learning_rate": 2.8621417141132813e-06, "loss": 0.2558, "step": 14394 }, { "epoch": 0.6515048653541525, "grad_norm": 0.43418125969404187, "learning_rate": 2.8614791900721407e-06, "loss": 0.4753, "step": 14395 }, { "epoch": 0.6515501244625481, "grad_norm": 0.5849742285716373, "learning_rate": 2.860816711981245e-06, "loss": 0.3046, "step": 14396 }, { "epoch": 0.6515953835709436, "grad_norm": 0.6102146219858934, "learning_rate": 2.8601542798548295e-06, "loss": 0.2849, "step": 14397 }, { "epoch": 0.6516406426793392, "grad_norm": 0.2841653912168513, "learning_rate": 2.8594918937071264e-06, "loss": 0.4796, "step": 14398 }, { "epoch": 0.6516859017877348, "grad_norm": 0.6324213646858495, "learning_rate": 2.8588295535523667e-06, "loss": 0.3134, "step": 14399 }, { "epoch": 0.6517311608961304, "grad_norm": 0.6129634366352824, "learning_rate": 2.858167259404786e-06, "loss": 0.3675, "step": 14400 }, { "epoch": 0.651776420004526, "grad_norm": 0.28533615643166216, "learning_rate": 2.85750501127861e-06, "loss": 0.4834, "step": 14401 }, { "epoch": 0.6518216791129214, "grad_norm": 0.699339220550563, "learning_rate": 2.856842809188074e-06, "loss": 0.3116, "step": 14402 }, { "epoch": 0.651866938221317, "grad_norm": 0.6195956920362005, "learning_rate": 2.8561806531474035e-06, "loss": 0.2572, "step": 14403 }, { "epoch": 0.6519121973297126, "grad_norm": 0.5413467945492126, "learning_rate": 2.855518543170824e-06, "loss": 0.2432, "step": 14404 }, { "epoch": 0.6519574564381082, "grad_norm": 0.600232623190571, "learning_rate": 2.8548564792725652e-06, "loss": 0.3013, "step": 14405 }, { "epoch": 0.6520027155465037, "grad_norm": 0.2723010923448121, "learning_rate": 2.8541944614668548e-06, "loss": 0.4688, "step": 14406 }, { "epoch": 0.6520479746548993, "grad_norm": 0.6251866065262676, "learning_rate": 2.8535324897679153e-06, "loss": 0.3261, "step": 14407 }, { "epoch": 0.6520932337632949, "grad_norm": 0.6008352127614647, "learning_rate": 2.852870564189967e-06, "loss": 0.3214, "step": 14408 }, { "epoch": 0.6521384928716905, "grad_norm": 0.7247530449061969, "learning_rate": 2.8522086847472365e-06, "loss": 0.2951, "step": 14409 }, { "epoch": 0.652183751980086, "grad_norm": 0.5944063153735807, "learning_rate": 2.851546851453947e-06, "loss": 0.2495, "step": 14410 }, { "epoch": 0.6522290110884815, "grad_norm": 0.6242000460070781, "learning_rate": 2.8508850643243168e-06, "loss": 0.2665, "step": 14411 }, { "epoch": 0.6522742701968771, "grad_norm": 0.6220752224649899, "learning_rate": 2.8502233233725647e-06, "loss": 0.3021, "step": 14412 }, { "epoch": 0.6523195293052727, "grad_norm": 0.6201982821878654, "learning_rate": 2.8495616286129125e-06, "loss": 0.2813, "step": 14413 }, { "epoch": 0.6523647884136683, "grad_norm": 0.2602797614505162, "learning_rate": 2.848899980059574e-06, "loss": 0.4717, "step": 14414 }, { "epoch": 0.6524100475220638, "grad_norm": 0.5564924797920047, "learning_rate": 2.8482383777267707e-06, "loss": 0.3093, "step": 14415 }, { "epoch": 0.6524553066304594, "grad_norm": 0.5661725215802511, "learning_rate": 2.847576821628716e-06, "loss": 0.2676, "step": 14416 }, { "epoch": 0.652500565738855, "grad_norm": 0.605395525670404, "learning_rate": 2.8469153117796226e-06, "loss": 0.2867, "step": 14417 }, { "epoch": 0.6525458248472505, "grad_norm": 0.5962137534020132, "learning_rate": 2.8462538481937067e-06, "loss": 0.2992, "step": 14418 }, { "epoch": 0.652591083955646, "grad_norm": 0.27038535192728275, "learning_rate": 2.8455924308851843e-06, "loss": 0.465, "step": 14419 }, { "epoch": 0.6526363430640416, "grad_norm": 0.6366746385318536, "learning_rate": 2.844931059868261e-06, "loss": 0.3161, "step": 14420 }, { "epoch": 0.6526816021724372, "grad_norm": 0.6479535834948442, "learning_rate": 2.8442697351571496e-06, "loss": 0.3016, "step": 14421 }, { "epoch": 0.6527268612808328, "grad_norm": 0.7228641491464504, "learning_rate": 2.8436084567660604e-06, "loss": 0.3387, "step": 14422 }, { "epoch": 0.6527721203892284, "grad_norm": 0.6542739679281547, "learning_rate": 2.8429472247092077e-06, "loss": 0.3561, "step": 14423 }, { "epoch": 0.6528173794976239, "grad_norm": 0.7079313320065974, "learning_rate": 2.8422860390007896e-06, "loss": 0.3671, "step": 14424 }, { "epoch": 0.6528626386060195, "grad_norm": 0.6326576281591828, "learning_rate": 2.8416248996550176e-06, "loss": 0.2878, "step": 14425 }, { "epoch": 0.652907897714415, "grad_norm": 0.2668548599523566, "learning_rate": 2.8409638066860994e-06, "loss": 0.459, "step": 14426 }, { "epoch": 0.6529531568228106, "grad_norm": 0.6751405890400213, "learning_rate": 2.8403027601082385e-06, "loss": 0.3585, "step": 14427 }, { "epoch": 0.6529984159312061, "grad_norm": 0.2812480485423244, "learning_rate": 2.8396417599356363e-06, "loss": 0.4647, "step": 14428 }, { "epoch": 0.6530436750396017, "grad_norm": 0.6065274845901797, "learning_rate": 2.838980806182499e-06, "loss": 0.3163, "step": 14429 }, { "epoch": 0.6530889341479973, "grad_norm": 0.6540976748783749, "learning_rate": 2.8383198988630257e-06, "loss": 0.2846, "step": 14430 }, { "epoch": 0.6531341932563929, "grad_norm": 0.5858231834276961, "learning_rate": 2.83765903799142e-06, "loss": 0.3243, "step": 14431 }, { "epoch": 0.6531794523647884, "grad_norm": 0.6227041477821702, "learning_rate": 2.8369982235818817e-06, "loss": 0.3672, "step": 14432 }, { "epoch": 0.653224711473184, "grad_norm": 1.1696453604041717, "learning_rate": 2.836337455648605e-06, "loss": 0.2545, "step": 14433 }, { "epoch": 0.6532699705815795, "grad_norm": 0.6102567853373334, "learning_rate": 2.835676734205792e-06, "loss": 0.3471, "step": 14434 }, { "epoch": 0.6533152296899751, "grad_norm": 0.617072658570023, "learning_rate": 2.8350160592676407e-06, "loss": 0.3261, "step": 14435 }, { "epoch": 0.6533604887983707, "grad_norm": 0.640343205295688, "learning_rate": 2.8343554308483444e-06, "loss": 0.3148, "step": 14436 }, { "epoch": 0.6534057479067662, "grad_norm": 0.5843377197812158, "learning_rate": 2.8336948489620973e-06, "loss": 0.3159, "step": 14437 }, { "epoch": 0.6534510070151618, "grad_norm": 0.6244745426145994, "learning_rate": 2.833034313623095e-06, "loss": 0.2863, "step": 14438 }, { "epoch": 0.6534962661235574, "grad_norm": 0.27814110294909966, "learning_rate": 2.8323738248455313e-06, "loss": 0.481, "step": 14439 }, { "epoch": 0.653541525231953, "grad_norm": 0.6083305305579919, "learning_rate": 2.8317133826435968e-06, "loss": 0.2858, "step": 14440 }, { "epoch": 0.6535867843403484, "grad_norm": 0.6288104167954419, "learning_rate": 2.8310529870314805e-06, "loss": 0.3011, "step": 14441 }, { "epoch": 0.653632043448744, "grad_norm": 0.6484817765709348, "learning_rate": 2.830392638023376e-06, "loss": 0.2908, "step": 14442 }, { "epoch": 0.6536773025571396, "grad_norm": 0.6555705986626023, "learning_rate": 2.8297323356334683e-06, "loss": 0.3, "step": 14443 }, { "epoch": 0.6537225616655352, "grad_norm": 0.6426833166344065, "learning_rate": 2.829072079875949e-06, "loss": 0.3228, "step": 14444 }, { "epoch": 0.6537678207739308, "grad_norm": 0.6407112303345188, "learning_rate": 2.8284118707650033e-06, "loss": 0.3043, "step": 14445 }, { "epoch": 0.6538130798823263, "grad_norm": 0.5983310459118458, "learning_rate": 2.8277517083148155e-06, "loss": 0.3129, "step": 14446 }, { "epoch": 0.6538583389907219, "grad_norm": 0.6640789563914657, "learning_rate": 2.8270915925395714e-06, "loss": 0.3132, "step": 14447 }, { "epoch": 0.6539035980991175, "grad_norm": 0.6133590414182064, "learning_rate": 2.8264315234534594e-06, "loss": 0.2707, "step": 14448 }, { "epoch": 0.6539488572075131, "grad_norm": 0.6340453316708997, "learning_rate": 2.8257715010706544e-06, "loss": 0.3189, "step": 14449 }, { "epoch": 0.6539941163159085, "grad_norm": 0.27372905646296936, "learning_rate": 2.8251115254053426e-06, "loss": 0.4638, "step": 14450 }, { "epoch": 0.6540393754243041, "grad_norm": 0.5895567568955655, "learning_rate": 2.824451596471704e-06, "loss": 0.3543, "step": 14451 }, { "epoch": 0.6540846345326997, "grad_norm": 0.6503694689675438, "learning_rate": 2.823791714283923e-06, "loss": 0.3237, "step": 14452 }, { "epoch": 0.6541298936410953, "grad_norm": 0.5918394190940748, "learning_rate": 2.8231318788561702e-06, "loss": 0.3103, "step": 14453 }, { "epoch": 0.6541751527494908, "grad_norm": 0.6474765844945153, "learning_rate": 2.8224720902026283e-06, "loss": 0.2933, "step": 14454 }, { "epoch": 0.6542204118578864, "grad_norm": 0.5915692141412208, "learning_rate": 2.821812348337475e-06, "loss": 0.3107, "step": 14455 }, { "epoch": 0.654265670966282, "grad_norm": 0.2727139188700424, "learning_rate": 2.821152653274884e-06, "loss": 0.4804, "step": 14456 }, { "epoch": 0.6543109300746776, "grad_norm": 0.6330423123605577, "learning_rate": 2.820493005029029e-06, "loss": 0.3106, "step": 14457 }, { "epoch": 0.6543561891830731, "grad_norm": 0.26587214402432924, "learning_rate": 2.8198334036140873e-06, "loss": 0.467, "step": 14458 }, { "epoch": 0.6544014482914686, "grad_norm": 0.612997939811741, "learning_rate": 2.819173849044229e-06, "loss": 0.3307, "step": 14459 }, { "epoch": 0.6544467073998642, "grad_norm": 0.6129959000307879, "learning_rate": 2.8185143413336272e-06, "loss": 0.2693, "step": 14460 }, { "epoch": 0.6544919665082598, "grad_norm": 0.6254208228940998, "learning_rate": 2.8178548804964536e-06, "loss": 0.3163, "step": 14461 }, { "epoch": 0.6545372256166554, "grad_norm": 0.9923324801370447, "learning_rate": 2.817195466546874e-06, "loss": 0.3117, "step": 14462 }, { "epoch": 0.6545824847250509, "grad_norm": 0.3034442698917206, "learning_rate": 2.8165360994990598e-06, "loss": 0.4408, "step": 14463 }, { "epoch": 0.6546277438334465, "grad_norm": 0.6557756043198308, "learning_rate": 2.815876779367181e-06, "loss": 0.3154, "step": 14464 }, { "epoch": 0.654673002941842, "grad_norm": 0.6200793549760715, "learning_rate": 2.8152175061654017e-06, "loss": 0.308, "step": 14465 }, { "epoch": 0.6547182620502376, "grad_norm": 0.541491605820331, "learning_rate": 2.8145582799078873e-06, "loss": 0.2597, "step": 14466 }, { "epoch": 0.6547635211586331, "grad_norm": 0.6239722423207902, "learning_rate": 2.8138991006088024e-06, "loss": 0.3003, "step": 14467 }, { "epoch": 0.6548087802670287, "grad_norm": 0.6460007049209576, "learning_rate": 2.813239968282314e-06, "loss": 0.3497, "step": 14468 }, { "epoch": 0.6548540393754243, "grad_norm": 0.2721666372049071, "learning_rate": 2.812580882942583e-06, "loss": 0.4872, "step": 14469 }, { "epoch": 0.6548992984838199, "grad_norm": 0.673252288948146, "learning_rate": 2.811921844603768e-06, "loss": 0.3248, "step": 14470 }, { "epoch": 0.6549445575922155, "grad_norm": 0.2721882793861817, "learning_rate": 2.8112628532800345e-06, "loss": 0.4691, "step": 14471 }, { "epoch": 0.654989816700611, "grad_norm": 0.2922565844858254, "learning_rate": 2.8106039089855385e-06, "loss": 0.4552, "step": 14472 }, { "epoch": 0.6550350758090066, "grad_norm": 0.735896164955326, "learning_rate": 2.809945011734442e-06, "loss": 0.2839, "step": 14473 }, { "epoch": 0.6550803349174021, "grad_norm": 0.6297271221726509, "learning_rate": 2.8092861615409004e-06, "loss": 0.3003, "step": 14474 }, { "epoch": 0.6551255940257977, "grad_norm": 0.6394901774218247, "learning_rate": 2.8086273584190704e-06, "loss": 0.3047, "step": 14475 }, { "epoch": 0.6551708531341932, "grad_norm": 0.6439208339140995, "learning_rate": 2.807968602383107e-06, "loss": 0.3283, "step": 14476 }, { "epoch": 0.6552161122425888, "grad_norm": 0.5642350700947738, "learning_rate": 2.8073098934471703e-06, "loss": 0.3227, "step": 14477 }, { "epoch": 0.6552613713509844, "grad_norm": 0.6685770730535802, "learning_rate": 2.806651231625406e-06, "loss": 0.2996, "step": 14478 }, { "epoch": 0.65530663045938, "grad_norm": 0.6824815601127436, "learning_rate": 2.8059926169319694e-06, "loss": 0.2653, "step": 14479 }, { "epoch": 0.6553518895677756, "grad_norm": 0.30267560974840607, "learning_rate": 2.8053340493810143e-06, "loss": 0.4775, "step": 14480 }, { "epoch": 0.655397148676171, "grad_norm": 0.671206729091289, "learning_rate": 2.804675528986693e-06, "loss": 0.3277, "step": 14481 }, { "epoch": 0.6554424077845666, "grad_norm": 0.5600886177378184, "learning_rate": 2.804017055763149e-06, "loss": 0.3015, "step": 14482 }, { "epoch": 0.6554876668929622, "grad_norm": 0.6033961960418963, "learning_rate": 2.8033586297245336e-06, "loss": 0.3511, "step": 14483 }, { "epoch": 0.6555329260013578, "grad_norm": 0.5938406866774515, "learning_rate": 2.8027002508849967e-06, "loss": 0.3397, "step": 14484 }, { "epoch": 0.6555781851097533, "grad_norm": 0.6218836501499261, "learning_rate": 2.8020419192586836e-06, "loss": 0.2954, "step": 14485 }, { "epoch": 0.6556234442181489, "grad_norm": 0.6338006210940533, "learning_rate": 2.801383634859737e-06, "loss": 0.3092, "step": 14486 }, { "epoch": 0.6556687033265445, "grad_norm": 0.5836146648909326, "learning_rate": 2.8007253977023045e-06, "loss": 0.2932, "step": 14487 }, { "epoch": 0.6557139624349401, "grad_norm": 0.735478218662718, "learning_rate": 2.8000672078005277e-06, "loss": 0.3117, "step": 14488 }, { "epoch": 0.6557592215433355, "grad_norm": 0.5864591443386192, "learning_rate": 2.799409065168551e-06, "loss": 0.2882, "step": 14489 }, { "epoch": 0.6558044806517311, "grad_norm": 0.6412764443291205, "learning_rate": 2.7987509698205163e-06, "loss": 0.3331, "step": 14490 }, { "epoch": 0.6558497397601267, "grad_norm": 0.6238632744185769, "learning_rate": 2.79809292177056e-06, "loss": 0.3269, "step": 14491 }, { "epoch": 0.6558949988685223, "grad_norm": 0.6110432933489035, "learning_rate": 2.7974349210328234e-06, "loss": 0.3298, "step": 14492 }, { "epoch": 0.6559402579769179, "grad_norm": 0.6878994627966224, "learning_rate": 2.7967769676214486e-06, "loss": 0.3514, "step": 14493 }, { "epoch": 0.6559855170853134, "grad_norm": 0.6380556906955277, "learning_rate": 2.7961190615505695e-06, "loss": 0.3398, "step": 14494 }, { "epoch": 0.656030776193709, "grad_norm": 0.6014487945413114, "learning_rate": 2.7954612028343218e-06, "loss": 0.2999, "step": 14495 }, { "epoch": 0.6560760353021046, "grad_norm": 0.678185293870894, "learning_rate": 2.7948033914868415e-06, "loss": 0.3078, "step": 14496 }, { "epoch": 0.6561212944105002, "grad_norm": 0.577372925066938, "learning_rate": 2.7941456275222658e-06, "loss": 0.2989, "step": 14497 }, { "epoch": 0.6561665535188956, "grad_norm": 0.5706492885380627, "learning_rate": 2.793487910954726e-06, "loss": 0.3397, "step": 14498 }, { "epoch": 0.6562118126272912, "grad_norm": 0.6273409052262731, "learning_rate": 2.7928302417983524e-06, "loss": 0.2975, "step": 14499 }, { "epoch": 0.6562570717356868, "grad_norm": 0.5952325031578662, "learning_rate": 2.7921726200672793e-06, "loss": 0.2611, "step": 14500 }, { "epoch": 0.6563023308440824, "grad_norm": 0.6492498565560609, "learning_rate": 2.791515045775634e-06, "loss": 0.2996, "step": 14501 }, { "epoch": 0.6563475899524779, "grad_norm": 0.2819515413997284, "learning_rate": 2.79085751893755e-06, "loss": 0.4532, "step": 14502 }, { "epoch": 0.6563928490608735, "grad_norm": 0.6220399803635098, "learning_rate": 2.7902000395671523e-06, "loss": 0.3138, "step": 14503 }, { "epoch": 0.6564381081692691, "grad_norm": 0.625456312620898, "learning_rate": 2.7895426076785676e-06, "loss": 0.3258, "step": 14504 }, { "epoch": 0.6564833672776647, "grad_norm": 0.28459561254149424, "learning_rate": 2.788885223285923e-06, "loss": 0.4833, "step": 14505 }, { "epoch": 0.6565286263860602, "grad_norm": 1.8729040089538573, "learning_rate": 2.7882278864033465e-06, "loss": 0.2799, "step": 14506 }, { "epoch": 0.6565738854944557, "grad_norm": 0.6141294598649133, "learning_rate": 2.787570597044959e-06, "loss": 0.3439, "step": 14507 }, { "epoch": 0.6566191446028513, "grad_norm": 0.6304622492167625, "learning_rate": 2.786913355224883e-06, "loss": 0.2964, "step": 14508 }, { "epoch": 0.6566644037112469, "grad_norm": 0.6245041485695481, "learning_rate": 2.7862561609572414e-06, "loss": 0.321, "step": 14509 }, { "epoch": 0.6567096628196425, "grad_norm": 0.6741403524459103, "learning_rate": 2.7855990142561606e-06, "loss": 0.3239, "step": 14510 }, { "epoch": 0.656754921928038, "grad_norm": 0.5674773642493651, "learning_rate": 2.7849419151357513e-06, "loss": 0.3053, "step": 14511 }, { "epoch": 0.6568001810364336, "grad_norm": 0.28760962113666705, "learning_rate": 2.784284863610138e-06, "loss": 0.4902, "step": 14512 }, { "epoch": 0.6568454401448292, "grad_norm": 0.6315742915634163, "learning_rate": 2.7836278596934395e-06, "loss": 0.3242, "step": 14513 }, { "epoch": 0.6568906992532247, "grad_norm": 0.7371476507524212, "learning_rate": 2.782970903399771e-06, "loss": 0.2842, "step": 14514 }, { "epoch": 0.6569359583616203, "grad_norm": 0.7316752146725508, "learning_rate": 2.782313994743247e-06, "loss": 0.2426, "step": 14515 }, { "epoch": 0.6569812174700158, "grad_norm": 0.6650642757049506, "learning_rate": 2.781657133737986e-06, "loss": 0.299, "step": 14516 }, { "epoch": 0.6570264765784114, "grad_norm": 0.5989291909812126, "learning_rate": 2.7810003203980983e-06, "loss": 0.2837, "step": 14517 }, { "epoch": 0.657071735686807, "grad_norm": 0.6512623588879672, "learning_rate": 2.7803435547377006e-06, "loss": 0.3266, "step": 14518 }, { "epoch": 0.6571169947952026, "grad_norm": 0.2594526263361003, "learning_rate": 2.779686836770903e-06, "loss": 0.4504, "step": 14519 }, { "epoch": 0.6571622539035981, "grad_norm": 0.3290550773122507, "learning_rate": 2.7790301665118137e-06, "loss": 0.4825, "step": 14520 }, { "epoch": 0.6572075130119936, "grad_norm": 0.6135740857375592, "learning_rate": 2.7783735439745447e-06, "loss": 0.2996, "step": 14521 }, { "epoch": 0.6572527721203892, "grad_norm": 0.6624231097263641, "learning_rate": 2.7777169691732074e-06, "loss": 0.295, "step": 14522 }, { "epoch": 0.6572980312287848, "grad_norm": 0.6008466558370399, "learning_rate": 2.777060442121907e-06, "loss": 0.302, "step": 14523 }, { "epoch": 0.6573432903371803, "grad_norm": 0.6499752987880355, "learning_rate": 2.7764039628347484e-06, "loss": 0.3174, "step": 14524 }, { "epoch": 0.6573885494455759, "grad_norm": 0.5845108030775217, "learning_rate": 2.7757475313258397e-06, "loss": 0.3008, "step": 14525 }, { "epoch": 0.6574338085539715, "grad_norm": 0.6367031103099358, "learning_rate": 2.775091147609287e-06, "loss": 0.3072, "step": 14526 }, { "epoch": 0.6574790676623671, "grad_norm": 0.29519705624979603, "learning_rate": 2.7744348116991925e-06, "loss": 0.4913, "step": 14527 }, { "epoch": 0.6575243267707627, "grad_norm": 0.9076162605324154, "learning_rate": 2.7737785236096563e-06, "loss": 0.3021, "step": 14528 }, { "epoch": 0.6575695858791581, "grad_norm": 0.7232551838129118, "learning_rate": 2.7731222833547842e-06, "loss": 0.3272, "step": 14529 }, { "epoch": 0.6576148449875537, "grad_norm": 0.665210978002039, "learning_rate": 2.7724660909486732e-06, "loss": 0.3128, "step": 14530 }, { "epoch": 0.6576601040959493, "grad_norm": 0.6183998088090225, "learning_rate": 2.771809946405427e-06, "loss": 0.2759, "step": 14531 }, { "epoch": 0.6577053632043449, "grad_norm": 0.6421908889548431, "learning_rate": 2.771153849739141e-06, "loss": 0.2959, "step": 14532 }, { "epoch": 0.6577506223127404, "grad_norm": 0.6180099869793233, "learning_rate": 2.7704978009639117e-06, "loss": 0.3348, "step": 14533 }, { "epoch": 0.657795881421136, "grad_norm": 0.6045601813827167, "learning_rate": 2.7698418000938374e-06, "loss": 0.3214, "step": 14534 }, { "epoch": 0.6578411405295316, "grad_norm": 0.6655538988058615, "learning_rate": 2.7691858471430157e-06, "loss": 0.3022, "step": 14535 }, { "epoch": 0.6578863996379272, "grad_norm": 0.6071011266492075, "learning_rate": 2.7685299421255373e-06, "loss": 0.2832, "step": 14536 }, { "epoch": 0.6579316587463226, "grad_norm": 0.6744160545583696, "learning_rate": 2.7678740850554965e-06, "loss": 0.3455, "step": 14537 }, { "epoch": 0.6579769178547182, "grad_norm": 0.5592789821618848, "learning_rate": 2.7672182759469857e-06, "loss": 0.2837, "step": 14538 }, { "epoch": 0.6580221769631138, "grad_norm": 0.5920929266921543, "learning_rate": 2.7665625148141e-06, "loss": 0.2734, "step": 14539 }, { "epoch": 0.6580674360715094, "grad_norm": 0.5622962505052304, "learning_rate": 2.7659068016709234e-06, "loss": 0.2813, "step": 14540 }, { "epoch": 0.658112695179905, "grad_norm": 0.7655350616626647, "learning_rate": 2.7652511365315473e-06, "loss": 0.3048, "step": 14541 }, { "epoch": 0.6581579542883005, "grad_norm": 0.6174261956501719, "learning_rate": 2.764595519410063e-06, "loss": 0.2995, "step": 14542 }, { "epoch": 0.6582032133966961, "grad_norm": 0.6266561514215079, "learning_rate": 2.763939950320556e-06, "loss": 0.3058, "step": 14543 }, { "epoch": 0.6582484725050917, "grad_norm": 0.28002556277807183, "learning_rate": 2.7632844292771094e-06, "loss": 0.4655, "step": 14544 }, { "epoch": 0.6582937316134873, "grad_norm": 0.6485738125198387, "learning_rate": 2.762628956293813e-06, "loss": 0.3146, "step": 14545 }, { "epoch": 0.6583389907218827, "grad_norm": 0.7066485212378323, "learning_rate": 2.7619735313847467e-06, "loss": 0.3156, "step": 14546 }, { "epoch": 0.6583842498302783, "grad_norm": 0.2891774709824707, "learning_rate": 2.761318154563998e-06, "loss": 0.4787, "step": 14547 }, { "epoch": 0.6584295089386739, "grad_norm": 0.6208970598323403, "learning_rate": 2.7606628258456457e-06, "loss": 0.283, "step": 14548 }, { "epoch": 0.6584747680470695, "grad_norm": 0.5981822572032913, "learning_rate": 2.760007545243771e-06, "loss": 0.2955, "step": 14549 }, { "epoch": 0.658520027155465, "grad_norm": 0.629617347306717, "learning_rate": 2.759352312772454e-06, "loss": 0.3219, "step": 14550 }, { "epoch": 0.6585652862638606, "grad_norm": 0.6254752051909233, "learning_rate": 2.7586971284457753e-06, "loss": 0.2668, "step": 14551 }, { "epoch": 0.6586105453722562, "grad_norm": 0.6137413177807575, "learning_rate": 2.7580419922778124e-06, "loss": 0.3469, "step": 14552 }, { "epoch": 0.6586558044806518, "grad_norm": 0.714623058271599, "learning_rate": 2.7573869042826396e-06, "loss": 0.3049, "step": 14553 }, { "epoch": 0.6587010635890473, "grad_norm": 0.5973950768771438, "learning_rate": 2.7567318644743344e-06, "loss": 0.305, "step": 14554 }, { "epoch": 0.6587463226974428, "grad_norm": 0.5703781422935155, "learning_rate": 2.756076872866974e-06, "loss": 0.3131, "step": 14555 }, { "epoch": 0.6587915818058384, "grad_norm": 0.6563924959666125, "learning_rate": 2.755421929474629e-06, "loss": 0.3368, "step": 14556 }, { "epoch": 0.658836840914234, "grad_norm": 0.314269215921467, "learning_rate": 2.7547670343113718e-06, "loss": 0.4752, "step": 14557 }, { "epoch": 0.6588821000226296, "grad_norm": 0.267282794090128, "learning_rate": 2.7541121873912774e-06, "loss": 0.436, "step": 14558 }, { "epoch": 0.6589273591310251, "grad_norm": 0.638997590884908, "learning_rate": 2.7534573887284123e-06, "loss": 0.3465, "step": 14559 }, { "epoch": 0.6589726182394207, "grad_norm": 0.8136242615153971, "learning_rate": 2.75280263833685e-06, "loss": 0.3123, "step": 14560 }, { "epoch": 0.6590178773478162, "grad_norm": 0.5918204058084633, "learning_rate": 2.7521479362306574e-06, "loss": 0.333, "step": 14561 }, { "epoch": 0.6590631364562118, "grad_norm": 0.6079493814701858, "learning_rate": 2.7514932824239e-06, "loss": 0.3035, "step": 14562 }, { "epoch": 0.6591083955646074, "grad_norm": 0.6118825012765209, "learning_rate": 2.7508386769306462e-06, "loss": 0.3094, "step": 14563 }, { "epoch": 0.6591536546730029, "grad_norm": 0.7444506793187203, "learning_rate": 2.7501841197649627e-06, "loss": 0.2915, "step": 14564 }, { "epoch": 0.6591989137813985, "grad_norm": 0.6609723903965469, "learning_rate": 2.7495296109409136e-06, "loss": 0.3388, "step": 14565 }, { "epoch": 0.6592441728897941, "grad_norm": 0.5776648608785567, "learning_rate": 2.7488751504725587e-06, "loss": 0.3072, "step": 14566 }, { "epoch": 0.6592894319981897, "grad_norm": 0.2930781439292153, "learning_rate": 2.7482207383739636e-06, "loss": 0.4886, "step": 14567 }, { "epoch": 0.6593346911065852, "grad_norm": 0.6247275615918767, "learning_rate": 2.7475663746591906e-06, "loss": 0.3029, "step": 14568 }, { "epoch": 0.6593799502149807, "grad_norm": 0.6498235375519393, "learning_rate": 2.746912059342299e-06, "loss": 0.3377, "step": 14569 }, { "epoch": 0.6594252093233763, "grad_norm": 0.29449251536630294, "learning_rate": 2.7462577924373448e-06, "loss": 0.4664, "step": 14570 }, { "epoch": 0.6594704684317719, "grad_norm": 0.572784277483015, "learning_rate": 2.745603573958391e-06, "loss": 0.2717, "step": 14571 }, { "epoch": 0.6595157275401674, "grad_norm": 0.728629363071293, "learning_rate": 2.74494940391949e-06, "loss": 0.3143, "step": 14572 }, { "epoch": 0.659560986648563, "grad_norm": 0.6563748386276432, "learning_rate": 2.7442952823347035e-06, "loss": 0.3188, "step": 14573 }, { "epoch": 0.6596062457569586, "grad_norm": 0.28586724962853566, "learning_rate": 2.743641209218083e-06, "loss": 0.4824, "step": 14574 }, { "epoch": 0.6596515048653542, "grad_norm": 0.6000616689940764, "learning_rate": 2.742987184583681e-06, "loss": 0.2562, "step": 14575 }, { "epoch": 0.6596967639737498, "grad_norm": 0.6180064457315072, "learning_rate": 2.7423332084455543e-06, "loss": 0.3191, "step": 14576 }, { "epoch": 0.6597420230821452, "grad_norm": 0.5672784538288332, "learning_rate": 2.7416792808177516e-06, "loss": 0.32, "step": 14577 }, { "epoch": 0.6597872821905408, "grad_norm": 0.6002952662222252, "learning_rate": 2.741025401714327e-06, "loss": 0.3216, "step": 14578 }, { "epoch": 0.6598325412989364, "grad_norm": 0.7181766236652201, "learning_rate": 2.7403715711493264e-06, "loss": 0.3128, "step": 14579 }, { "epoch": 0.659877800407332, "grad_norm": 0.6649504513424246, "learning_rate": 2.7397177891368033e-06, "loss": 0.3064, "step": 14580 }, { "epoch": 0.6599230595157275, "grad_norm": 0.5940222820479465, "learning_rate": 2.7390640556908023e-06, "loss": 0.2912, "step": 14581 }, { "epoch": 0.6599683186241231, "grad_norm": 0.28298728873634355, "learning_rate": 2.7384103708253697e-06, "loss": 0.4343, "step": 14582 }, { "epoch": 0.6600135777325187, "grad_norm": 0.6560700005915628, "learning_rate": 2.7377567345545514e-06, "loss": 0.2864, "step": 14583 }, { "epoch": 0.6600588368409143, "grad_norm": 0.6090951121414581, "learning_rate": 2.737103146892395e-06, "loss": 0.357, "step": 14584 }, { "epoch": 0.6601040959493097, "grad_norm": 0.6028122135930079, "learning_rate": 2.7364496078529425e-06, "loss": 0.317, "step": 14585 }, { "epoch": 0.6601493550577053, "grad_norm": 0.5996556745656106, "learning_rate": 2.7357961174502335e-06, "loss": 0.2849, "step": 14586 }, { "epoch": 0.6601946141661009, "grad_norm": 0.5778079712939874, "learning_rate": 2.7351426756983145e-06, "loss": 0.2917, "step": 14587 }, { "epoch": 0.6602398732744965, "grad_norm": 0.6911837816561222, "learning_rate": 2.734489282611221e-06, "loss": 0.4023, "step": 14588 }, { "epoch": 0.6602851323828921, "grad_norm": 0.2729175501123961, "learning_rate": 2.733835938202997e-06, "loss": 0.4774, "step": 14589 }, { "epoch": 0.6603303914912876, "grad_norm": 0.5936874447342413, "learning_rate": 2.7331826424876782e-06, "loss": 0.2917, "step": 14590 }, { "epoch": 0.6603756505996832, "grad_norm": 0.6315346424071278, "learning_rate": 2.7325293954793013e-06, "loss": 0.3421, "step": 14591 }, { "epoch": 0.6604209097080788, "grad_norm": 0.302728605590915, "learning_rate": 2.7318761971919034e-06, "loss": 0.4755, "step": 14592 }, { "epoch": 0.6604661688164744, "grad_norm": 0.6244775173123975, "learning_rate": 2.731223047639522e-06, "loss": 0.2912, "step": 14593 }, { "epoch": 0.6605114279248698, "grad_norm": 0.60301323677301, "learning_rate": 2.730569946836189e-06, "loss": 0.329, "step": 14594 }, { "epoch": 0.6605566870332654, "grad_norm": 0.7322230548298987, "learning_rate": 2.7299168947959365e-06, "loss": 0.3207, "step": 14595 }, { "epoch": 0.660601946141661, "grad_norm": 0.6134897267742255, "learning_rate": 2.7292638915327975e-06, "loss": 0.3251, "step": 14596 }, { "epoch": 0.6606472052500566, "grad_norm": 0.5731490592855168, "learning_rate": 2.728610937060805e-06, "loss": 0.2786, "step": 14597 }, { "epoch": 0.6606924643584522, "grad_norm": 0.6774614477652757, "learning_rate": 2.727958031393988e-06, "loss": 0.3816, "step": 14598 }, { "epoch": 0.6607377234668477, "grad_norm": 0.6043544377177085, "learning_rate": 2.727305174546372e-06, "loss": 0.2787, "step": 14599 }, { "epoch": 0.6607829825752433, "grad_norm": 0.30317006861177326, "learning_rate": 2.7266523665319904e-06, "loss": 0.4921, "step": 14600 }, { "epoch": 0.6608282416836389, "grad_norm": 0.5828626242746316, "learning_rate": 2.725999607364865e-06, "loss": 0.2868, "step": 14601 }, { "epoch": 0.6608735007920344, "grad_norm": 0.6287623493610928, "learning_rate": 2.725346897059027e-06, "loss": 0.3035, "step": 14602 }, { "epoch": 0.6609187599004299, "grad_norm": 0.6667618633210299, "learning_rate": 2.724694235628498e-06, "loss": 0.3314, "step": 14603 }, { "epoch": 0.6609640190088255, "grad_norm": 0.6039172989770963, "learning_rate": 2.724041623087299e-06, "loss": 0.2995, "step": 14604 }, { "epoch": 0.6610092781172211, "grad_norm": 0.6335276592368326, "learning_rate": 2.723389059449455e-06, "loss": 0.3158, "step": 14605 }, { "epoch": 0.6610545372256167, "grad_norm": 0.6461064971619715, "learning_rate": 2.722736544728991e-06, "loss": 0.3307, "step": 14606 }, { "epoch": 0.6610997963340122, "grad_norm": 0.5979661875521738, "learning_rate": 2.7220840789399243e-06, "loss": 0.3228, "step": 14607 }, { "epoch": 0.6611450554424078, "grad_norm": 0.650606256072474, "learning_rate": 2.7214316620962727e-06, "loss": 0.2936, "step": 14608 }, { "epoch": 0.6611903145508033, "grad_norm": 0.6703756569986283, "learning_rate": 2.720779294212059e-06, "loss": 0.3279, "step": 14609 }, { "epoch": 0.6612355736591989, "grad_norm": 0.7022940751613966, "learning_rate": 2.720126975301297e-06, "loss": 0.3444, "step": 14610 }, { "epoch": 0.6612808327675945, "grad_norm": 0.27846024025290605, "learning_rate": 2.7194747053780037e-06, "loss": 0.4496, "step": 14611 }, { "epoch": 0.66132609187599, "grad_norm": 0.6336076704471227, "learning_rate": 2.718822484456194e-06, "loss": 0.3225, "step": 14612 }, { "epoch": 0.6613713509843856, "grad_norm": 0.6162067561278477, "learning_rate": 2.718170312549885e-06, "loss": 0.2723, "step": 14613 }, { "epoch": 0.6614166100927812, "grad_norm": 0.5828372659832474, "learning_rate": 2.717518189673088e-06, "loss": 0.334, "step": 14614 }, { "epoch": 0.6614618692011768, "grad_norm": 0.6171211495647955, "learning_rate": 2.716866115839813e-06, "loss": 0.2896, "step": 14615 }, { "epoch": 0.6615071283095723, "grad_norm": 0.6540331289151489, "learning_rate": 2.716214091064075e-06, "loss": 0.2921, "step": 14616 }, { "epoch": 0.6615523874179678, "grad_norm": 0.5574949221745757, "learning_rate": 2.71556211535988e-06, "loss": 0.2761, "step": 14617 }, { "epoch": 0.6615976465263634, "grad_norm": 0.6485906746506321, "learning_rate": 2.714910188741241e-06, "loss": 0.3316, "step": 14618 }, { "epoch": 0.661642905634759, "grad_norm": 0.27752831386377874, "learning_rate": 2.714258311222162e-06, "loss": 0.4668, "step": 14619 }, { "epoch": 0.6616881647431545, "grad_norm": 0.2789879546078319, "learning_rate": 2.7136064828166543e-06, "loss": 0.475, "step": 14620 }, { "epoch": 0.6617334238515501, "grad_norm": 0.7431375370864788, "learning_rate": 2.7129547035387187e-06, "loss": 0.3271, "step": 14621 }, { "epoch": 0.6617786829599457, "grad_norm": 0.6110256512004637, "learning_rate": 2.7123029734023643e-06, "loss": 0.3225, "step": 14622 }, { "epoch": 0.6618239420683413, "grad_norm": 0.5927904023046602, "learning_rate": 2.711651292421593e-06, "loss": 0.2673, "step": 14623 }, { "epoch": 0.6618692011767369, "grad_norm": 0.6309109945227651, "learning_rate": 2.7109996606104054e-06, "loss": 0.3119, "step": 14624 }, { "epoch": 0.6619144602851323, "grad_norm": 0.27083279929004345, "learning_rate": 2.710348077982805e-06, "loss": 0.4362, "step": 14625 }, { "epoch": 0.6619597193935279, "grad_norm": 0.6326640005584911, "learning_rate": 2.7096965445527947e-06, "loss": 0.2773, "step": 14626 }, { "epoch": 0.6620049785019235, "grad_norm": 0.26875623419781375, "learning_rate": 2.7090450603343703e-06, "loss": 0.4829, "step": 14627 }, { "epoch": 0.6620502376103191, "grad_norm": 0.5908815928580272, "learning_rate": 2.70839362534153e-06, "loss": 0.3308, "step": 14628 }, { "epoch": 0.6620954967187146, "grad_norm": 0.591809248196907, "learning_rate": 2.7077422395882745e-06, "loss": 0.2937, "step": 14629 }, { "epoch": 0.6621407558271102, "grad_norm": 0.25957882080678385, "learning_rate": 2.7070909030885967e-06, "loss": 0.4638, "step": 14630 }, { "epoch": 0.6621860149355058, "grad_norm": 0.5857590862321028, "learning_rate": 2.706439615856495e-06, "loss": 0.3007, "step": 14631 }, { "epoch": 0.6622312740439014, "grad_norm": 0.6156546064522539, "learning_rate": 2.705788377905961e-06, "loss": 0.2595, "step": 14632 }, { "epoch": 0.662276533152297, "grad_norm": 0.27266227794839076, "learning_rate": 2.705137189250988e-06, "loss": 0.4496, "step": 14633 }, { "epoch": 0.6623217922606924, "grad_norm": 0.7641074447659439, "learning_rate": 2.7044860499055682e-06, "loss": 0.3283, "step": 14634 }, { "epoch": 0.662367051369088, "grad_norm": 0.26066804349701916, "learning_rate": 2.7038349598836944e-06, "loss": 0.4556, "step": 14635 }, { "epoch": 0.6624123104774836, "grad_norm": 0.5907273354900149, "learning_rate": 2.703183919199356e-06, "loss": 0.2849, "step": 14636 }, { "epoch": 0.6624575695858792, "grad_norm": 0.631603567997779, "learning_rate": 2.702532927866538e-06, "loss": 0.2869, "step": 14637 }, { "epoch": 0.6625028286942747, "grad_norm": 0.61739783806948, "learning_rate": 2.7018819858992323e-06, "loss": 0.2978, "step": 14638 }, { "epoch": 0.6625480878026703, "grad_norm": 0.2919577399403899, "learning_rate": 2.7012310933114283e-06, "loss": 0.5085, "step": 14639 }, { "epoch": 0.6625933469110659, "grad_norm": 0.6630811709636194, "learning_rate": 2.7005802501171037e-06, "loss": 0.2701, "step": 14640 }, { "epoch": 0.6626386060194615, "grad_norm": 0.6682876965040512, "learning_rate": 2.6999294563302474e-06, "loss": 0.3429, "step": 14641 }, { "epoch": 0.6626838651278569, "grad_norm": 0.6592272286289782, "learning_rate": 2.6992787119648456e-06, "loss": 0.3079, "step": 14642 }, { "epoch": 0.6627291242362525, "grad_norm": 0.6500740029003796, "learning_rate": 2.698628017034877e-06, "loss": 0.2948, "step": 14643 }, { "epoch": 0.6627743833446481, "grad_norm": 0.3547010827896515, "learning_rate": 2.6979773715543234e-06, "loss": 0.4846, "step": 14644 }, { "epoch": 0.6628196424530437, "grad_norm": 0.8248138219132292, "learning_rate": 2.697326775537167e-06, "loss": 0.3246, "step": 14645 }, { "epoch": 0.6628649015614393, "grad_norm": 0.6486771166763444, "learning_rate": 2.696676228997385e-06, "loss": 0.2961, "step": 14646 }, { "epoch": 0.6629101606698348, "grad_norm": 0.5916296106868336, "learning_rate": 2.696025731948958e-06, "loss": 0.2985, "step": 14647 }, { "epoch": 0.6629554197782304, "grad_norm": 0.5843377254274962, "learning_rate": 2.69537528440586e-06, "loss": 0.2885, "step": 14648 }, { "epoch": 0.663000678886626, "grad_norm": 0.6058458147622786, "learning_rate": 2.6947248863820712e-06, "loss": 0.3126, "step": 14649 }, { "epoch": 0.6630459379950215, "grad_norm": 0.7042748783059469, "learning_rate": 2.6940745378915623e-06, "loss": 0.324, "step": 14650 }, { "epoch": 0.663091197103417, "grad_norm": 0.7634106275562195, "learning_rate": 2.6934242389483118e-06, "loss": 0.3391, "step": 14651 }, { "epoch": 0.6631364562118126, "grad_norm": 0.6186184423505748, "learning_rate": 2.6927739895662897e-06, "loss": 0.3108, "step": 14652 }, { "epoch": 0.6631817153202082, "grad_norm": 0.28352438648346373, "learning_rate": 2.692123789759467e-06, "loss": 0.4476, "step": 14653 }, { "epoch": 0.6632269744286038, "grad_norm": 0.6628832546732235, "learning_rate": 2.6914736395418162e-06, "loss": 0.3498, "step": 14654 }, { "epoch": 0.6632722335369993, "grad_norm": 0.6349727134291555, "learning_rate": 2.6908235389273086e-06, "loss": 0.2811, "step": 14655 }, { "epoch": 0.6633174926453949, "grad_norm": 0.5571570233509476, "learning_rate": 2.69017348792991e-06, "loss": 0.3177, "step": 14656 }, { "epoch": 0.6633627517537904, "grad_norm": 0.587265023201979, "learning_rate": 2.6895234865635883e-06, "loss": 0.33, "step": 14657 }, { "epoch": 0.663408010862186, "grad_norm": 0.6182820774276857, "learning_rate": 2.688873534842312e-06, "loss": 0.3366, "step": 14658 }, { "epoch": 0.6634532699705816, "grad_norm": 0.7652304431041801, "learning_rate": 2.688223632780044e-06, "loss": 0.2685, "step": 14659 }, { "epoch": 0.6634985290789771, "grad_norm": 0.6361782325815011, "learning_rate": 2.687573780390752e-06, "loss": 0.2825, "step": 14660 }, { "epoch": 0.6635437881873727, "grad_norm": 0.5523403925336813, "learning_rate": 2.686923977688397e-06, "loss": 0.2845, "step": 14661 }, { "epoch": 0.6635890472957683, "grad_norm": 0.5916077285971831, "learning_rate": 2.68627422468694e-06, "loss": 0.2967, "step": 14662 }, { "epoch": 0.6636343064041639, "grad_norm": 0.6534276462669631, "learning_rate": 2.685624521400344e-06, "loss": 0.3592, "step": 14663 }, { "epoch": 0.6636795655125594, "grad_norm": 0.5755201570950208, "learning_rate": 2.68497486784257e-06, "loss": 0.3006, "step": 14664 }, { "epoch": 0.663724824620955, "grad_norm": 0.6065498661857095, "learning_rate": 2.684325264027577e-06, "loss": 0.3319, "step": 14665 }, { "epoch": 0.6637700837293505, "grad_norm": 0.7403589657336263, "learning_rate": 2.68367570996932e-06, "loss": 0.2887, "step": 14666 }, { "epoch": 0.6638153428377461, "grad_norm": 0.9069088530531536, "learning_rate": 2.6830262056817574e-06, "loss": 0.2969, "step": 14667 }, { "epoch": 0.6638606019461417, "grad_norm": 0.37938075410330124, "learning_rate": 2.68237675117885e-06, "loss": 0.4687, "step": 14668 }, { "epoch": 0.6639058610545372, "grad_norm": 0.36833643317394965, "learning_rate": 2.6817273464745443e-06, "loss": 0.4758, "step": 14669 }, { "epoch": 0.6639511201629328, "grad_norm": 0.6321562537827897, "learning_rate": 2.681077991582797e-06, "loss": 0.3122, "step": 14670 }, { "epoch": 0.6639963792713284, "grad_norm": 0.6201058925027416, "learning_rate": 2.6804286865175645e-06, "loss": 0.3505, "step": 14671 }, { "epoch": 0.664041638379724, "grad_norm": 0.7259966491374608, "learning_rate": 2.679779431292795e-06, "loss": 0.3678, "step": 14672 }, { "epoch": 0.6640868974881194, "grad_norm": 0.5941258634548173, "learning_rate": 2.6791302259224385e-06, "loss": 0.285, "step": 14673 }, { "epoch": 0.664132156596515, "grad_norm": 0.6272193636667374, "learning_rate": 2.678481070420446e-06, "loss": 0.3586, "step": 14674 }, { "epoch": 0.6641774157049106, "grad_norm": 0.5736449544025902, "learning_rate": 2.6778319648007645e-06, "loss": 0.3205, "step": 14675 }, { "epoch": 0.6642226748133062, "grad_norm": 0.7449192943128308, "learning_rate": 2.677182909077343e-06, "loss": 0.3317, "step": 14676 }, { "epoch": 0.6642679339217017, "grad_norm": 0.6651381927164225, "learning_rate": 2.6765339032641256e-06, "loss": 0.295, "step": 14677 }, { "epoch": 0.6643131930300973, "grad_norm": 0.6301906237222805, "learning_rate": 2.6758849473750605e-06, "loss": 0.3309, "step": 14678 }, { "epoch": 0.6643584521384929, "grad_norm": 0.685974008499579, "learning_rate": 2.6752360414240874e-06, "loss": 0.3705, "step": 14679 }, { "epoch": 0.6644037112468885, "grad_norm": 0.7277368945564235, "learning_rate": 2.674587185425155e-06, "loss": 0.3604, "step": 14680 }, { "epoch": 0.664448970355284, "grad_norm": 0.668819004807301, "learning_rate": 2.6739383793922007e-06, "loss": 0.336, "step": 14681 }, { "epoch": 0.6644942294636795, "grad_norm": 0.7308638274786853, "learning_rate": 2.673289623339165e-06, "loss": 0.3389, "step": 14682 }, { "epoch": 0.6645394885720751, "grad_norm": 0.5964153150842396, "learning_rate": 2.67264091727999e-06, "loss": 0.3391, "step": 14683 }, { "epoch": 0.6645847476804707, "grad_norm": 0.6417431207623295, "learning_rate": 2.6719922612286152e-06, "loss": 0.3304, "step": 14684 }, { "epoch": 0.6646300067888663, "grad_norm": 0.5823163333153432, "learning_rate": 2.6713436551989767e-06, "loss": 0.2997, "step": 14685 }, { "epoch": 0.6646752658972618, "grad_norm": 0.6141979768525521, "learning_rate": 2.6706950992050097e-06, "loss": 0.273, "step": 14686 }, { "epoch": 0.6647205250056574, "grad_norm": 0.6255508535122599, "learning_rate": 2.670046593260652e-06, "loss": 0.2546, "step": 14687 }, { "epoch": 0.664765784114053, "grad_norm": 0.5823902037514233, "learning_rate": 2.669398137379837e-06, "loss": 0.2532, "step": 14688 }, { "epoch": 0.6648110432224485, "grad_norm": 0.6167108534057749, "learning_rate": 2.6687497315764987e-06, "loss": 0.2779, "step": 14689 }, { "epoch": 0.664856302330844, "grad_norm": 0.7864013107862138, "learning_rate": 2.668101375864567e-06, "loss": 0.2896, "step": 14690 }, { "epoch": 0.6649015614392396, "grad_norm": 0.7672884746572013, "learning_rate": 2.667453070257977e-06, "loss": 0.3622, "step": 14691 }, { "epoch": 0.6649468205476352, "grad_norm": 0.6104806892085198, "learning_rate": 2.666804814770654e-06, "loss": 0.3145, "step": 14692 }, { "epoch": 0.6649920796560308, "grad_norm": 0.6513842948804288, "learning_rate": 2.6661566094165327e-06, "loss": 0.3222, "step": 14693 }, { "epoch": 0.6650373387644264, "grad_norm": 0.6239399644369519, "learning_rate": 2.665508454209538e-06, "loss": 0.3105, "step": 14694 }, { "epoch": 0.6650825978728219, "grad_norm": 0.6439218172036969, "learning_rate": 2.664860349163594e-06, "loss": 0.3106, "step": 14695 }, { "epoch": 0.6651278569812175, "grad_norm": 0.6535200562376338, "learning_rate": 2.6642122942926297e-06, "loss": 0.3386, "step": 14696 }, { "epoch": 0.665173116089613, "grad_norm": 0.6673961251279874, "learning_rate": 2.663564289610573e-06, "loss": 0.2991, "step": 14697 }, { "epoch": 0.6652183751980086, "grad_norm": 0.6856669351238862, "learning_rate": 2.66291633513134e-06, "loss": 0.3338, "step": 14698 }, { "epoch": 0.6652636343064041, "grad_norm": 0.6393130628186594, "learning_rate": 2.6622684308688575e-06, "loss": 0.292, "step": 14699 }, { "epoch": 0.6653088934147997, "grad_norm": 0.6061214167728383, "learning_rate": 2.6616205768370483e-06, "loss": 0.3133, "step": 14700 }, { "epoch": 0.6653541525231953, "grad_norm": 0.6467006444397589, "learning_rate": 2.660972773049831e-06, "loss": 0.3005, "step": 14701 }, { "epoch": 0.6653994116315909, "grad_norm": 0.6369249651369691, "learning_rate": 2.6603250195211235e-06, "loss": 0.3107, "step": 14702 }, { "epoch": 0.6654446707399865, "grad_norm": 0.70653484849908, "learning_rate": 2.659677316264847e-06, "loss": 0.3086, "step": 14703 }, { "epoch": 0.665489929848382, "grad_norm": 0.6565328879057835, "learning_rate": 2.6590296632949157e-06, "loss": 0.2752, "step": 14704 }, { "epoch": 0.6655351889567775, "grad_norm": 0.35181142116798614, "learning_rate": 2.658382060625249e-06, "loss": 0.4851, "step": 14705 }, { "epoch": 0.6655804480651731, "grad_norm": 0.630447726301422, "learning_rate": 2.657734508269758e-06, "loss": 0.3556, "step": 14706 }, { "epoch": 0.6656257071735687, "grad_norm": 0.3048831867135513, "learning_rate": 2.6570870062423616e-06, "loss": 0.4654, "step": 14707 }, { "epoch": 0.6656709662819642, "grad_norm": 0.7057071837671293, "learning_rate": 2.6564395545569667e-06, "loss": 0.3463, "step": 14708 }, { "epoch": 0.6657162253903598, "grad_norm": 0.5558048212112113, "learning_rate": 2.65579215322749e-06, "loss": 0.3569, "step": 14709 }, { "epoch": 0.6657614844987554, "grad_norm": 0.6253333082889162, "learning_rate": 2.6551448022678406e-06, "loss": 0.3089, "step": 14710 }, { "epoch": 0.665806743607151, "grad_norm": 0.2712330226795423, "learning_rate": 2.6544975016919263e-06, "loss": 0.461, "step": 14711 }, { "epoch": 0.6658520027155465, "grad_norm": 0.6592651233475737, "learning_rate": 2.653850251513656e-06, "loss": 0.3745, "step": 14712 }, { "epoch": 0.665897261823942, "grad_norm": 0.28287393808975175, "learning_rate": 2.6532030517469408e-06, "loss": 0.4944, "step": 14713 }, { "epoch": 0.6659425209323376, "grad_norm": 0.655168003309896, "learning_rate": 2.652555902405684e-06, "loss": 0.3081, "step": 14714 }, { "epoch": 0.6659877800407332, "grad_norm": 0.6746233358903633, "learning_rate": 2.651908803503789e-06, "loss": 0.3165, "step": 14715 }, { "epoch": 0.6660330391491288, "grad_norm": 0.6332929557042597, "learning_rate": 2.651261755055165e-06, "loss": 0.2922, "step": 14716 }, { "epoch": 0.6660782982575243, "grad_norm": 0.6089090989543154, "learning_rate": 2.6506147570737094e-06, "loss": 0.3304, "step": 14717 }, { "epoch": 0.6661235573659199, "grad_norm": 0.6286355370023886, "learning_rate": 2.64996780957333e-06, "loss": 0.3131, "step": 14718 }, { "epoch": 0.6661688164743155, "grad_norm": 0.7411944904056007, "learning_rate": 2.649320912567922e-06, "loss": 0.3271, "step": 14719 }, { "epoch": 0.6662140755827111, "grad_norm": 0.6004250448542339, "learning_rate": 2.6486740660713904e-06, "loss": 0.3336, "step": 14720 }, { "epoch": 0.6662593346911065, "grad_norm": 0.6536229703714422, "learning_rate": 2.64802727009763e-06, "loss": 0.3026, "step": 14721 }, { "epoch": 0.6663045937995021, "grad_norm": 0.6600307770153987, "learning_rate": 2.6473805246605416e-06, "loss": 0.3047, "step": 14722 }, { "epoch": 0.6663498529078977, "grad_norm": 0.6713346700277442, "learning_rate": 2.64673382977402e-06, "loss": 0.3154, "step": 14723 }, { "epoch": 0.6663951120162933, "grad_norm": 0.6903849972604962, "learning_rate": 2.6460871854519594e-06, "loss": 0.2783, "step": 14724 }, { "epoch": 0.6664403711246888, "grad_norm": 0.581681796442678, "learning_rate": 2.6454405917082556e-06, "loss": 0.3169, "step": 14725 }, { "epoch": 0.6664856302330844, "grad_norm": 0.6353521186320087, "learning_rate": 2.6447940485568057e-06, "loss": 0.2909, "step": 14726 }, { "epoch": 0.66653088934148, "grad_norm": 0.6398498279203021, "learning_rate": 2.6441475560114938e-06, "loss": 0.3011, "step": 14727 }, { "epoch": 0.6665761484498756, "grad_norm": 0.6425698529167011, "learning_rate": 2.6435011140862167e-06, "loss": 0.3179, "step": 14728 }, { "epoch": 0.6666214075582712, "grad_norm": 0.3115862301345666, "learning_rate": 2.642854722794864e-06, "loss": 0.4648, "step": 14729 }, { "epoch": 0.6666666666666666, "grad_norm": 0.5943414758115273, "learning_rate": 2.6422083821513246e-06, "loss": 0.2429, "step": 14730 }, { "epoch": 0.6667119257750622, "grad_norm": 0.5934254282346557, "learning_rate": 2.6415620921694836e-06, "loss": 0.3025, "step": 14731 }, { "epoch": 0.6667571848834578, "grad_norm": 0.5653671869192441, "learning_rate": 2.6409158528632315e-06, "loss": 0.2915, "step": 14732 }, { "epoch": 0.6668024439918534, "grad_norm": 0.7070394724449415, "learning_rate": 2.640269664246451e-06, "loss": 0.3329, "step": 14733 }, { "epoch": 0.6668477031002489, "grad_norm": 0.6389565087574908, "learning_rate": 2.6396235263330293e-06, "loss": 0.3314, "step": 14734 }, { "epoch": 0.6668929622086445, "grad_norm": 0.6245585495817344, "learning_rate": 2.638977439136847e-06, "loss": 0.3354, "step": 14735 }, { "epoch": 0.6669382213170401, "grad_norm": 0.28605449137789823, "learning_rate": 2.6383314026717903e-06, "loss": 0.4906, "step": 14736 }, { "epoch": 0.6669834804254356, "grad_norm": 0.6119075839120809, "learning_rate": 2.637685416951736e-06, "loss": 0.2988, "step": 14737 }, { "epoch": 0.6670287395338312, "grad_norm": 0.3082055751433085, "learning_rate": 2.6370394819905698e-06, "loss": 0.4716, "step": 14738 }, { "epoch": 0.6670739986422267, "grad_norm": 0.6307907284535162, "learning_rate": 2.636393597802167e-06, "loss": 0.3388, "step": 14739 }, { "epoch": 0.6671192577506223, "grad_norm": 0.6078912974944578, "learning_rate": 2.635747764400405e-06, "loss": 0.2721, "step": 14740 }, { "epoch": 0.6671645168590179, "grad_norm": 0.5925676623298787, "learning_rate": 2.635101981799162e-06, "loss": 0.3125, "step": 14741 }, { "epoch": 0.6672097759674135, "grad_norm": 0.5533639263255467, "learning_rate": 2.634456250012316e-06, "loss": 0.2931, "step": 14742 }, { "epoch": 0.667255035075809, "grad_norm": 0.29181393082657847, "learning_rate": 2.6338105690537402e-06, "loss": 0.4449, "step": 14743 }, { "epoch": 0.6673002941842046, "grad_norm": 0.3115948826921394, "learning_rate": 2.633164938937306e-06, "loss": 0.512, "step": 14744 }, { "epoch": 0.6673455532926001, "grad_norm": 0.28064957458736633, "learning_rate": 2.6325193596768905e-06, "loss": 0.4772, "step": 14745 }, { "epoch": 0.6673908124009957, "grad_norm": 0.7221727407308424, "learning_rate": 2.63187383128636e-06, "loss": 0.2879, "step": 14746 }, { "epoch": 0.6674360715093912, "grad_norm": 0.6131893416410314, "learning_rate": 2.6312283537795902e-06, "loss": 0.2931, "step": 14747 }, { "epoch": 0.6674813306177868, "grad_norm": 0.6204251169985142, "learning_rate": 2.630582927170446e-06, "loss": 0.3419, "step": 14748 }, { "epoch": 0.6675265897261824, "grad_norm": 0.6119163871445997, "learning_rate": 2.6299375514727998e-06, "loss": 0.3098, "step": 14749 }, { "epoch": 0.667571848834578, "grad_norm": 0.5971973040823447, "learning_rate": 2.629292226700514e-06, "loss": 0.3163, "step": 14750 }, { "epoch": 0.6676171079429736, "grad_norm": 0.3046693253865641, "learning_rate": 2.6286469528674598e-06, "loss": 0.4745, "step": 14751 }, { "epoch": 0.667662367051369, "grad_norm": 0.5983885615746021, "learning_rate": 2.6280017299874984e-06, "loss": 0.2506, "step": 14752 }, { "epoch": 0.6677076261597646, "grad_norm": 0.35135599929684164, "learning_rate": 2.6273565580744942e-06, "loss": 0.4827, "step": 14753 }, { "epoch": 0.6677528852681602, "grad_norm": 0.6258238245008194, "learning_rate": 2.6267114371423097e-06, "loss": 0.2888, "step": 14754 }, { "epoch": 0.6677981443765558, "grad_norm": 0.6635996374843116, "learning_rate": 2.6260663672048094e-06, "loss": 0.2864, "step": 14755 }, { "epoch": 0.6678434034849513, "grad_norm": 0.6056793405065296, "learning_rate": 2.6254213482758518e-06, "loss": 0.2592, "step": 14756 }, { "epoch": 0.6678886625933469, "grad_norm": 0.6187894917066431, "learning_rate": 2.624776380369295e-06, "loss": 0.2924, "step": 14757 }, { "epoch": 0.6679339217017425, "grad_norm": 0.6193570671741726, "learning_rate": 2.6241314634990005e-06, "loss": 0.3077, "step": 14758 }, { "epoch": 0.6679791808101381, "grad_norm": 0.7064378066940625, "learning_rate": 2.6234865976788236e-06, "loss": 0.3027, "step": 14759 }, { "epoch": 0.6680244399185336, "grad_norm": 0.7637733039531868, "learning_rate": 2.6228417829226195e-06, "loss": 0.2954, "step": 14760 }, { "epoch": 0.6680696990269291, "grad_norm": 0.6751147642440454, "learning_rate": 2.622197019244245e-06, "loss": 0.2501, "step": 14761 }, { "epoch": 0.6681149581353247, "grad_norm": 0.622694810711134, "learning_rate": 2.6215523066575542e-06, "loss": 0.3243, "step": 14762 }, { "epoch": 0.6681602172437203, "grad_norm": 0.7096323697849127, "learning_rate": 2.6209076451764004e-06, "loss": 0.313, "step": 14763 }, { "epoch": 0.6682054763521159, "grad_norm": 0.575045414444866, "learning_rate": 2.6202630348146323e-06, "loss": 0.3191, "step": 14764 }, { "epoch": 0.6682507354605114, "grad_norm": 0.3627578595993997, "learning_rate": 2.6196184755861054e-06, "loss": 0.4933, "step": 14765 }, { "epoch": 0.668295994568907, "grad_norm": 0.6621699630985428, "learning_rate": 2.618973967504664e-06, "loss": 0.3007, "step": 14766 }, { "epoch": 0.6683412536773026, "grad_norm": 0.6963598038335564, "learning_rate": 2.618329510584161e-06, "loss": 0.2904, "step": 14767 }, { "epoch": 0.6683865127856982, "grad_norm": 0.6530712366145673, "learning_rate": 2.617685104838443e-06, "loss": 0.2846, "step": 14768 }, { "epoch": 0.6684317718940936, "grad_norm": 0.551809083567866, "learning_rate": 2.617040750281352e-06, "loss": 0.2612, "step": 14769 }, { "epoch": 0.6684770310024892, "grad_norm": 0.5585593830608696, "learning_rate": 2.616396446926738e-06, "loss": 0.3178, "step": 14770 }, { "epoch": 0.6685222901108848, "grad_norm": 0.6878872213137671, "learning_rate": 2.615752194788445e-06, "loss": 0.3394, "step": 14771 }, { "epoch": 0.6685675492192804, "grad_norm": 0.6736646003467953, "learning_rate": 2.615107993880315e-06, "loss": 0.3154, "step": 14772 }, { "epoch": 0.6686128083276759, "grad_norm": 0.577778634752951, "learning_rate": 2.614463844216187e-06, "loss": 0.2896, "step": 14773 }, { "epoch": 0.6686580674360715, "grad_norm": 0.5888540087169923, "learning_rate": 2.613819745809907e-06, "loss": 0.2919, "step": 14774 }, { "epoch": 0.6687033265444671, "grad_norm": 0.3142271462539952, "learning_rate": 2.6131756986753097e-06, "loss": 0.4709, "step": 14775 }, { "epoch": 0.6687485856528627, "grad_norm": 0.2816735923390981, "learning_rate": 2.6125317028262383e-06, "loss": 0.4692, "step": 14776 }, { "epoch": 0.6687938447612582, "grad_norm": 0.5990464043433165, "learning_rate": 2.6118877582765255e-06, "loss": 0.3335, "step": 14777 }, { "epoch": 0.6688391038696537, "grad_norm": 0.6297307102369214, "learning_rate": 2.611243865040013e-06, "loss": 0.3245, "step": 14778 }, { "epoch": 0.6688843629780493, "grad_norm": 0.6290230209495843, "learning_rate": 2.6106000231305306e-06, "loss": 0.3014, "step": 14779 }, { "epoch": 0.6689296220864449, "grad_norm": 0.8288293083591458, "learning_rate": 2.6099562325619175e-06, "loss": 0.294, "step": 14780 }, { "epoch": 0.6689748811948405, "grad_norm": 0.6020139200786987, "learning_rate": 2.6093124933480052e-06, "loss": 0.2819, "step": 14781 }, { "epoch": 0.669020140303236, "grad_norm": 0.27612277737532254, "learning_rate": 2.608668805502622e-06, "loss": 0.4852, "step": 14782 }, { "epoch": 0.6690653994116316, "grad_norm": 0.6359528732903538, "learning_rate": 2.6080251690396026e-06, "loss": 0.2924, "step": 14783 }, { "epoch": 0.6691106585200272, "grad_norm": 0.2736896880795578, "learning_rate": 2.607381583972777e-06, "loss": 0.4634, "step": 14784 }, { "epoch": 0.6691559176284227, "grad_norm": 0.9012747453630279, "learning_rate": 2.6067380503159735e-06, "loss": 0.3164, "step": 14785 }, { "epoch": 0.6692011767368183, "grad_norm": 0.6229379860976303, "learning_rate": 2.606094568083017e-06, "loss": 0.3233, "step": 14786 }, { "epoch": 0.6692464358452138, "grad_norm": 0.6330120866522541, "learning_rate": 2.605451137287738e-06, "loss": 0.3263, "step": 14787 }, { "epoch": 0.6692916949536094, "grad_norm": 0.6707506830477721, "learning_rate": 2.604807757943957e-06, "loss": 0.3047, "step": 14788 }, { "epoch": 0.669336954062005, "grad_norm": 0.301561285535464, "learning_rate": 2.6041644300655035e-06, "loss": 0.4905, "step": 14789 }, { "epoch": 0.6693822131704006, "grad_norm": 0.5884118503855394, "learning_rate": 2.6035211536661966e-06, "loss": 0.3398, "step": 14790 }, { "epoch": 0.6694274722787961, "grad_norm": 0.5863125088203237, "learning_rate": 2.6028779287598606e-06, "loss": 0.2668, "step": 14791 }, { "epoch": 0.6694727313871917, "grad_norm": 0.6488902625627246, "learning_rate": 2.6022347553603145e-06, "loss": 0.3191, "step": 14792 }, { "epoch": 0.6695179904955872, "grad_norm": 0.27688118481367496, "learning_rate": 2.6015916334813818e-06, "loss": 0.4875, "step": 14793 }, { "epoch": 0.6695632496039828, "grad_norm": 0.6130194968066307, "learning_rate": 2.600948563136878e-06, "loss": 0.2703, "step": 14794 }, { "epoch": 0.6696085087123783, "grad_norm": 0.5475132725839402, "learning_rate": 2.60030554434062e-06, "loss": 0.2429, "step": 14795 }, { "epoch": 0.6696537678207739, "grad_norm": 0.609963564618995, "learning_rate": 2.599662577106427e-06, "loss": 0.2978, "step": 14796 }, { "epoch": 0.6696990269291695, "grad_norm": 0.5612618651319895, "learning_rate": 2.5990196614481135e-06, "loss": 0.2818, "step": 14797 }, { "epoch": 0.6697442860375651, "grad_norm": 0.6252181494789513, "learning_rate": 2.5983767973794915e-06, "loss": 0.3169, "step": 14798 }, { "epoch": 0.6697895451459607, "grad_norm": 0.6255870501678163, "learning_rate": 2.597733984914377e-06, "loss": 0.3206, "step": 14799 }, { "epoch": 0.6698348042543562, "grad_norm": 0.2880044890391729, "learning_rate": 2.5970912240665815e-06, "loss": 0.4794, "step": 14800 }, { "epoch": 0.6698800633627517, "grad_norm": 0.7775456388447965, "learning_rate": 2.5964485148499165e-06, "loss": 0.2681, "step": 14801 }, { "epoch": 0.6699253224711473, "grad_norm": 0.8829594166813511, "learning_rate": 2.595805857278189e-06, "loss": 0.2694, "step": 14802 }, { "epoch": 0.6699705815795429, "grad_norm": 0.28253780830597774, "learning_rate": 2.5951632513652113e-06, "loss": 0.4689, "step": 14803 }, { "epoch": 0.6700158406879384, "grad_norm": 0.6075988341196107, "learning_rate": 2.594520697124788e-06, "loss": 0.3272, "step": 14804 }, { "epoch": 0.670061099796334, "grad_norm": 0.6036323472055383, "learning_rate": 2.5938781945707293e-06, "loss": 0.3277, "step": 14805 }, { "epoch": 0.6701063589047296, "grad_norm": 0.5779008475528601, "learning_rate": 2.5932357437168353e-06, "loss": 0.3211, "step": 14806 }, { "epoch": 0.6701516180131252, "grad_norm": 0.6180057462429976, "learning_rate": 2.592593344576916e-06, "loss": 0.3489, "step": 14807 }, { "epoch": 0.6701968771215207, "grad_norm": 0.2792284287855959, "learning_rate": 2.59195099716477e-06, "loss": 0.4964, "step": 14808 }, { "epoch": 0.6702421362299162, "grad_norm": 0.2840852019818451, "learning_rate": 2.591308701494203e-06, "loss": 0.4862, "step": 14809 }, { "epoch": 0.6702873953383118, "grad_norm": 0.6465522456857574, "learning_rate": 2.590666457579014e-06, "loss": 0.3202, "step": 14810 }, { "epoch": 0.6703326544467074, "grad_norm": 0.2608260318289237, "learning_rate": 2.590024265433002e-06, "loss": 0.4471, "step": 14811 }, { "epoch": 0.670377913555103, "grad_norm": 0.6305923595751279, "learning_rate": 2.589382125069967e-06, "loss": 0.3108, "step": 14812 }, { "epoch": 0.6704231726634985, "grad_norm": 1.0727394971790696, "learning_rate": 2.5887400365037075e-06, "loss": 0.33, "step": 14813 }, { "epoch": 0.6704684317718941, "grad_norm": 0.8948768742495324, "learning_rate": 2.5880979997480193e-06, "loss": 0.3479, "step": 14814 }, { "epoch": 0.6705136908802897, "grad_norm": 0.6213365279867468, "learning_rate": 2.5874560148166953e-06, "loss": 0.3509, "step": 14815 }, { "epoch": 0.6705589499886853, "grad_norm": 0.2882151533209925, "learning_rate": 2.5868140817235344e-06, "loss": 0.4971, "step": 14816 }, { "epoch": 0.6706042090970807, "grad_norm": 0.6089141964028402, "learning_rate": 2.5861722004823254e-06, "loss": 0.2446, "step": 14817 }, { "epoch": 0.6706494682054763, "grad_norm": 0.6515023495329259, "learning_rate": 2.585530371106864e-06, "loss": 0.3552, "step": 14818 }, { "epoch": 0.6706947273138719, "grad_norm": 0.26172932451240744, "learning_rate": 2.5848885936109382e-06, "loss": 0.4446, "step": 14819 }, { "epoch": 0.6707399864222675, "grad_norm": 0.6545152496550372, "learning_rate": 2.58424686800834e-06, "loss": 0.3255, "step": 14820 }, { "epoch": 0.6707852455306631, "grad_norm": 0.6269818687343297, "learning_rate": 2.583605194312856e-06, "loss": 0.3356, "step": 14821 }, { "epoch": 0.6708305046390586, "grad_norm": 0.7356448283387264, "learning_rate": 2.5829635725382764e-06, "loss": 0.2754, "step": 14822 }, { "epoch": 0.6708757637474542, "grad_norm": 0.6411225363370119, "learning_rate": 2.5823220026983865e-06, "loss": 0.3226, "step": 14823 }, { "epoch": 0.6709210228558498, "grad_norm": 0.6612202553526849, "learning_rate": 2.5816804848069693e-06, "loss": 0.3262, "step": 14824 }, { "epoch": 0.6709662819642453, "grad_norm": 0.29496916689818664, "learning_rate": 2.581039018877811e-06, "loss": 0.4557, "step": 14825 }, { "epoch": 0.6710115410726408, "grad_norm": 0.5952504723621468, "learning_rate": 2.580397604924699e-06, "loss": 0.3078, "step": 14826 }, { "epoch": 0.6710568001810364, "grad_norm": 0.6380343137276523, "learning_rate": 2.5797562429614075e-06, "loss": 0.3059, "step": 14827 }, { "epoch": 0.671102059289432, "grad_norm": 0.6199335184293526, "learning_rate": 2.579114933001722e-06, "loss": 0.2787, "step": 14828 }, { "epoch": 0.6711473183978276, "grad_norm": 1.0874228926103768, "learning_rate": 2.5784736750594218e-06, "loss": 0.2945, "step": 14829 }, { "epoch": 0.6711925775062231, "grad_norm": 0.6125779118419148, "learning_rate": 2.577832469148286e-06, "loss": 0.3024, "step": 14830 }, { "epoch": 0.6712378366146187, "grad_norm": 0.7788844335689945, "learning_rate": 2.5771913152820895e-06, "loss": 0.2795, "step": 14831 }, { "epoch": 0.6712830957230143, "grad_norm": 0.6314433647143567, "learning_rate": 2.57655021347461e-06, "loss": 0.3204, "step": 14832 }, { "epoch": 0.6713283548314098, "grad_norm": 0.6141001508362471, "learning_rate": 2.5759091637396254e-06, "loss": 0.287, "step": 14833 }, { "epoch": 0.6713736139398054, "grad_norm": 0.6273161713818961, "learning_rate": 2.575268166090908e-06, "loss": 0.2765, "step": 14834 }, { "epoch": 0.6714188730482009, "grad_norm": 0.6215596863860496, "learning_rate": 2.5746272205422285e-06, "loss": 0.3148, "step": 14835 }, { "epoch": 0.6714641321565965, "grad_norm": 0.6978944273632913, "learning_rate": 2.5739863271073634e-06, "loss": 0.3477, "step": 14836 }, { "epoch": 0.6715093912649921, "grad_norm": 0.6040212388799695, "learning_rate": 2.5733454858000795e-06, "loss": 0.2893, "step": 14837 }, { "epoch": 0.6715546503733877, "grad_norm": 0.6080583118847751, "learning_rate": 2.5727046966341495e-06, "loss": 0.2562, "step": 14838 }, { "epoch": 0.6715999094817832, "grad_norm": 0.5755508530350106, "learning_rate": 2.572063959623341e-06, "loss": 0.2728, "step": 14839 }, { "epoch": 0.6716451685901788, "grad_norm": 0.6491815330242983, "learning_rate": 2.5714232747814192e-06, "loss": 0.3187, "step": 14840 }, { "epoch": 0.6716904276985743, "grad_norm": 0.5995387960306222, "learning_rate": 2.5707826421221527e-06, "loss": 0.3104, "step": 14841 }, { "epoch": 0.6717356868069699, "grad_norm": 0.9448323782002909, "learning_rate": 2.5701420616593078e-06, "loss": 0.3251, "step": 14842 }, { "epoch": 0.6717809459153654, "grad_norm": 0.6522763308065314, "learning_rate": 2.5695015334066475e-06, "loss": 0.2946, "step": 14843 }, { "epoch": 0.671826205023761, "grad_norm": 0.5770132512230949, "learning_rate": 2.5688610573779327e-06, "loss": 0.3095, "step": 14844 }, { "epoch": 0.6718714641321566, "grad_norm": 0.29970308751832503, "learning_rate": 2.568220633586929e-06, "loss": 0.4789, "step": 14845 }, { "epoch": 0.6719167232405522, "grad_norm": 0.30084471159795745, "learning_rate": 2.567580262047393e-06, "loss": 0.4513, "step": 14846 }, { "epoch": 0.6719619823489478, "grad_norm": 0.6722694384592166, "learning_rate": 2.566939942773089e-06, "loss": 0.3568, "step": 14847 }, { "epoch": 0.6720072414573433, "grad_norm": 0.6594993876155704, "learning_rate": 2.5662996757777716e-06, "loss": 0.2789, "step": 14848 }, { "epoch": 0.6720525005657388, "grad_norm": 0.9012927279021385, "learning_rate": 2.5656594610752005e-06, "loss": 0.2859, "step": 14849 }, { "epoch": 0.6720977596741344, "grad_norm": 0.8042398329352978, "learning_rate": 2.5650192986791293e-06, "loss": 0.3334, "step": 14850 }, { "epoch": 0.67214301878253, "grad_norm": 0.5570366902178083, "learning_rate": 2.5643791886033177e-06, "loss": 0.3362, "step": 14851 }, { "epoch": 0.6721882778909255, "grad_norm": 0.6377163244803467, "learning_rate": 2.5637391308615155e-06, "loss": 0.2737, "step": 14852 }, { "epoch": 0.6722335369993211, "grad_norm": 0.3154837118667572, "learning_rate": 2.5630991254674764e-06, "loss": 0.4655, "step": 14853 }, { "epoch": 0.6722787961077167, "grad_norm": 0.3066986197362134, "learning_rate": 2.562459172434952e-06, "loss": 0.5063, "step": 14854 }, { "epoch": 0.6723240552161123, "grad_norm": 0.2595637218282012, "learning_rate": 2.561819271777698e-06, "loss": 0.4417, "step": 14855 }, { "epoch": 0.6723693143245079, "grad_norm": 0.27092358982107523, "learning_rate": 2.5611794235094545e-06, "loss": 0.465, "step": 14856 }, { "epoch": 0.6724145734329033, "grad_norm": 0.6557612974703131, "learning_rate": 2.5605396276439764e-06, "loss": 0.2826, "step": 14857 }, { "epoch": 0.6724598325412989, "grad_norm": 0.5703719087363823, "learning_rate": 2.5598998841950105e-06, "loss": 0.2981, "step": 14858 }, { "epoch": 0.6725050916496945, "grad_norm": 0.28514248737355297, "learning_rate": 2.5592601931763024e-06, "loss": 0.466, "step": 14859 }, { "epoch": 0.6725503507580901, "grad_norm": 0.2741781805340848, "learning_rate": 2.558620554601594e-06, "loss": 0.4683, "step": 14860 }, { "epoch": 0.6725956098664856, "grad_norm": 0.6923206061416766, "learning_rate": 2.5579809684846323e-06, "loss": 0.2875, "step": 14861 }, { "epoch": 0.6726408689748812, "grad_norm": 0.26899865757289887, "learning_rate": 2.5573414348391613e-06, "loss": 0.4465, "step": 14862 }, { "epoch": 0.6726861280832768, "grad_norm": 0.2803036034475733, "learning_rate": 2.5567019536789204e-06, "loss": 0.4642, "step": 14863 }, { "epoch": 0.6727313871916724, "grad_norm": 0.2722707798580064, "learning_rate": 2.5560625250176495e-06, "loss": 0.478, "step": 14864 }, { "epoch": 0.6727766463000678, "grad_norm": 0.5717873551481465, "learning_rate": 2.5554231488690908e-06, "loss": 0.2754, "step": 14865 }, { "epoch": 0.6728219054084634, "grad_norm": 0.7573567287495568, "learning_rate": 2.554783825246978e-06, "loss": 0.2943, "step": 14866 }, { "epoch": 0.672867164516859, "grad_norm": 0.310794806996194, "learning_rate": 2.5541445541650536e-06, "loss": 0.4947, "step": 14867 }, { "epoch": 0.6729124236252546, "grad_norm": 0.5633981967848383, "learning_rate": 2.55350533563705e-06, "loss": 0.3025, "step": 14868 }, { "epoch": 0.6729576827336502, "grad_norm": 0.6291189797545162, "learning_rate": 2.552866169676701e-06, "loss": 0.3401, "step": 14869 }, { "epoch": 0.6730029418420457, "grad_norm": 0.5796014872041827, "learning_rate": 2.5522270562977424e-06, "loss": 0.3003, "step": 14870 }, { "epoch": 0.6730482009504413, "grad_norm": 0.5882744559978718, "learning_rate": 2.551587995513909e-06, "loss": 0.3058, "step": 14871 }, { "epoch": 0.6730934600588369, "grad_norm": 0.5832979545729652, "learning_rate": 2.550948987338929e-06, "loss": 0.302, "step": 14872 }, { "epoch": 0.6731387191672324, "grad_norm": 0.6154236592404919, "learning_rate": 2.5503100317865324e-06, "loss": 0.3275, "step": 14873 }, { "epoch": 0.6731839782756279, "grad_norm": 0.6174955768741429, "learning_rate": 2.549671128870452e-06, "loss": 0.3201, "step": 14874 }, { "epoch": 0.6732292373840235, "grad_norm": 0.6488605190840145, "learning_rate": 2.549032278604411e-06, "loss": 0.269, "step": 14875 }, { "epoch": 0.6732744964924191, "grad_norm": 0.649150724749603, "learning_rate": 2.54839348100214e-06, "loss": 0.326, "step": 14876 }, { "epoch": 0.6733197556008147, "grad_norm": 0.684597473992833, "learning_rate": 2.5477547360773626e-06, "loss": 0.2756, "step": 14877 }, { "epoch": 0.6733650147092102, "grad_norm": 0.6649204591550062, "learning_rate": 2.5471160438438058e-06, "loss": 0.2814, "step": 14878 }, { "epoch": 0.6734102738176058, "grad_norm": 0.6315462929374838, "learning_rate": 2.5464774043151897e-06, "loss": 0.2824, "step": 14879 }, { "epoch": 0.6734555329260014, "grad_norm": 0.6376100767180388, "learning_rate": 2.5458388175052407e-06, "loss": 0.3113, "step": 14880 }, { "epoch": 0.673500792034397, "grad_norm": 0.3157602884109542, "learning_rate": 2.5452002834276784e-06, "loss": 0.4864, "step": 14881 }, { "epoch": 0.6735460511427925, "grad_norm": 0.3159061388350569, "learning_rate": 2.5445618020962203e-06, "loss": 0.4724, "step": 14882 }, { "epoch": 0.673591310251188, "grad_norm": 0.6535128707460278, "learning_rate": 2.543923373524588e-06, "loss": 0.2934, "step": 14883 }, { "epoch": 0.6736365693595836, "grad_norm": 0.6120965324245942, "learning_rate": 2.543284997726504e-06, "loss": 0.3202, "step": 14884 }, { "epoch": 0.6736818284679792, "grad_norm": 0.6501927712827466, "learning_rate": 2.542646674715675e-06, "loss": 0.3191, "step": 14885 }, { "epoch": 0.6737270875763748, "grad_norm": 0.6882156235163461, "learning_rate": 2.5420084045058226e-06, "loss": 0.2935, "step": 14886 }, { "epoch": 0.6737723466847703, "grad_norm": 0.6085103868276374, "learning_rate": 2.5413701871106618e-06, "loss": 0.3092, "step": 14887 }, { "epoch": 0.6738176057931659, "grad_norm": 0.32797325748302764, "learning_rate": 2.540732022543905e-06, "loss": 0.4571, "step": 14888 }, { "epoch": 0.6738628649015614, "grad_norm": 0.5767157669986146, "learning_rate": 2.5400939108192615e-06, "loss": 0.259, "step": 14889 }, { "epoch": 0.673908124009957, "grad_norm": 0.6497609474966436, "learning_rate": 2.539455851950445e-06, "loss": 0.32, "step": 14890 }, { "epoch": 0.6739533831183526, "grad_norm": 0.6661441669420962, "learning_rate": 2.5388178459511676e-06, "loss": 0.3382, "step": 14891 }, { "epoch": 0.6739986422267481, "grad_norm": 0.6545120640034809, "learning_rate": 2.5381798928351355e-06, "loss": 0.3246, "step": 14892 }, { "epoch": 0.6740439013351437, "grad_norm": 0.7178892402201152, "learning_rate": 2.537541992616055e-06, "loss": 0.3176, "step": 14893 }, { "epoch": 0.6740891604435393, "grad_norm": 0.5834186467443114, "learning_rate": 2.5369041453076355e-06, "loss": 0.3053, "step": 14894 }, { "epoch": 0.6741344195519349, "grad_norm": 0.637241992326166, "learning_rate": 2.5362663509235796e-06, "loss": 0.316, "step": 14895 }, { "epoch": 0.6741796786603304, "grad_norm": 0.6823378248252496, "learning_rate": 2.5356286094775943e-06, "loss": 0.2712, "step": 14896 }, { "epoch": 0.6742249377687259, "grad_norm": 0.6170334175678358, "learning_rate": 2.5349909209833823e-06, "loss": 0.2777, "step": 14897 }, { "epoch": 0.6742701968771215, "grad_norm": 0.30346031839056486, "learning_rate": 2.5343532854546425e-06, "loss": 0.4658, "step": 14898 }, { "epoch": 0.6743154559855171, "grad_norm": 0.5985682336587592, "learning_rate": 2.533715702905078e-06, "loss": 0.3181, "step": 14899 }, { "epoch": 0.6743607150939126, "grad_norm": 0.6128606460509117, "learning_rate": 2.53307817334839e-06, "loss": 0.3288, "step": 14900 }, { "epoch": 0.6744059742023082, "grad_norm": 0.2968114827100891, "learning_rate": 2.5324406967982764e-06, "loss": 0.4709, "step": 14901 }, { "epoch": 0.6744512333107038, "grad_norm": 0.38491508192950186, "learning_rate": 2.5318032732684306e-06, "loss": 0.4621, "step": 14902 }, { "epoch": 0.6744964924190994, "grad_norm": 0.6463875398224902, "learning_rate": 2.5311659027725523e-06, "loss": 0.3006, "step": 14903 }, { "epoch": 0.674541751527495, "grad_norm": 0.7427266610588816, "learning_rate": 2.530528585324339e-06, "loss": 0.2863, "step": 14904 }, { "epoch": 0.6745870106358904, "grad_norm": 0.6320123767632743, "learning_rate": 2.529891320937481e-06, "loss": 0.2886, "step": 14905 }, { "epoch": 0.674632269744286, "grad_norm": 0.282299480515388, "learning_rate": 2.5292541096256706e-06, "loss": 0.4613, "step": 14906 }, { "epoch": 0.6746775288526816, "grad_norm": 0.5895777919573687, "learning_rate": 2.528616951402603e-06, "loss": 0.3129, "step": 14907 }, { "epoch": 0.6747227879610772, "grad_norm": 0.5958128659036863, "learning_rate": 2.5279798462819647e-06, "loss": 0.3194, "step": 14908 }, { "epoch": 0.6747680470694727, "grad_norm": 0.6247947020635181, "learning_rate": 2.52734279427745e-06, "loss": 0.3085, "step": 14909 }, { "epoch": 0.6748133061778683, "grad_norm": 0.6390878216793594, "learning_rate": 2.5267057954027437e-06, "loss": 0.3221, "step": 14910 }, { "epoch": 0.6748585652862639, "grad_norm": 0.6288651134205863, "learning_rate": 2.5260688496715318e-06, "loss": 0.2886, "step": 14911 }, { "epoch": 0.6749038243946595, "grad_norm": 0.6489868403838829, "learning_rate": 2.5254319570975026e-06, "loss": 0.3176, "step": 14912 }, { "epoch": 0.6749490835030549, "grad_norm": 0.6122035663289863, "learning_rate": 2.524795117694344e-06, "loss": 0.3026, "step": 14913 }, { "epoch": 0.6749943426114505, "grad_norm": 0.277427821508405, "learning_rate": 2.5241583314757327e-06, "loss": 0.4706, "step": 14914 }, { "epoch": 0.6750396017198461, "grad_norm": 0.6597618750541544, "learning_rate": 2.523521598455355e-06, "loss": 0.3289, "step": 14915 }, { "epoch": 0.6750848608282417, "grad_norm": 0.5947017395599469, "learning_rate": 2.522884918646894e-06, "loss": 0.3565, "step": 14916 }, { "epoch": 0.6751301199366373, "grad_norm": 0.5862091108400317, "learning_rate": 2.5222482920640285e-06, "loss": 0.2782, "step": 14917 }, { "epoch": 0.6751753790450328, "grad_norm": 0.2960613611617704, "learning_rate": 2.5216117187204346e-06, "loss": 0.4796, "step": 14918 }, { "epoch": 0.6752206381534284, "grad_norm": 0.2674426621288423, "learning_rate": 2.520975198629794e-06, "loss": 0.4825, "step": 14919 }, { "epoch": 0.675265897261824, "grad_norm": 0.6306769854950436, "learning_rate": 2.520338731805785e-06, "loss": 0.2978, "step": 14920 }, { "epoch": 0.6753111563702195, "grad_norm": 0.26766885441575, "learning_rate": 2.5197023182620795e-06, "loss": 0.4577, "step": 14921 }, { "epoch": 0.675356415478615, "grad_norm": 0.6303183429655842, "learning_rate": 2.5190659580123524e-06, "loss": 0.3197, "step": 14922 }, { "epoch": 0.6754016745870106, "grad_norm": 0.624135549266601, "learning_rate": 2.51842965107028e-06, "loss": 0.2715, "step": 14923 }, { "epoch": 0.6754469336954062, "grad_norm": 0.6602309754641182, "learning_rate": 2.517793397449531e-06, "loss": 0.3257, "step": 14924 }, { "epoch": 0.6754921928038018, "grad_norm": 0.43063048420359085, "learning_rate": 2.5171571971637805e-06, "loss": 0.4771, "step": 14925 }, { "epoch": 0.6755374519121974, "grad_norm": 0.6352771016933549, "learning_rate": 2.5165210502266964e-06, "loss": 0.2835, "step": 14926 }, { "epoch": 0.6755827110205929, "grad_norm": 0.7075697434121344, "learning_rate": 2.515884956651945e-06, "loss": 0.3086, "step": 14927 }, { "epoch": 0.6756279701289885, "grad_norm": 0.6011383335968612, "learning_rate": 2.515248916453197e-06, "loss": 0.2733, "step": 14928 }, { "epoch": 0.675673229237384, "grad_norm": 0.5922423977368473, "learning_rate": 2.51461292964412e-06, "loss": 0.2945, "step": 14929 }, { "epoch": 0.6757184883457796, "grad_norm": 1.2989889900263096, "learning_rate": 2.5139769962383788e-06, "loss": 0.2941, "step": 14930 }, { "epoch": 0.6757637474541751, "grad_norm": 0.5764395400902315, "learning_rate": 2.5133411162496335e-06, "loss": 0.2791, "step": 14931 }, { "epoch": 0.6758090065625707, "grad_norm": 0.6428344078064796, "learning_rate": 2.512705289691551e-06, "loss": 0.3288, "step": 14932 }, { "epoch": 0.6758542656709663, "grad_norm": 0.6114776529509702, "learning_rate": 2.5120695165777946e-06, "loss": 0.2842, "step": 14933 }, { "epoch": 0.6758995247793619, "grad_norm": 0.28320462128242, "learning_rate": 2.5114337969220233e-06, "loss": 0.4543, "step": 14934 }, { "epoch": 0.6759447838877574, "grad_norm": 0.6086254527757627, "learning_rate": 2.510798130737895e-06, "loss": 0.3156, "step": 14935 }, { "epoch": 0.675990042996153, "grad_norm": 0.6228585294371244, "learning_rate": 2.510162518039071e-06, "loss": 0.3279, "step": 14936 }, { "epoch": 0.6760353021045485, "grad_norm": 0.62743672614483, "learning_rate": 2.5095269588392055e-06, "loss": 0.3093, "step": 14937 }, { "epoch": 0.6760805612129441, "grad_norm": 0.637625832238023, "learning_rate": 2.50889145315196e-06, "loss": 0.3188, "step": 14938 }, { "epoch": 0.6761258203213397, "grad_norm": 0.7949598540965647, "learning_rate": 2.508256000990985e-06, "loss": 0.2864, "step": 14939 }, { "epoch": 0.6761710794297352, "grad_norm": 0.606394208948491, "learning_rate": 2.5076206023699344e-06, "loss": 0.2753, "step": 14940 }, { "epoch": 0.6762163385381308, "grad_norm": 0.5686953213430728, "learning_rate": 2.5069852573024624e-06, "loss": 0.3026, "step": 14941 }, { "epoch": 0.6762615976465264, "grad_norm": 0.6984108573696061, "learning_rate": 2.5063499658022227e-06, "loss": 0.3118, "step": 14942 }, { "epoch": 0.676306856754922, "grad_norm": 0.6034097239048911, "learning_rate": 2.505714727882863e-06, "loss": 0.2925, "step": 14943 }, { "epoch": 0.6763521158633174, "grad_norm": 0.6563126248198178, "learning_rate": 2.505079543558031e-06, "loss": 0.3273, "step": 14944 }, { "epoch": 0.676397374971713, "grad_norm": 0.2716205123914153, "learning_rate": 2.504444412841378e-06, "loss": 0.4755, "step": 14945 }, { "epoch": 0.6764426340801086, "grad_norm": 0.628172470558325, "learning_rate": 2.503809335746553e-06, "loss": 0.3471, "step": 14946 }, { "epoch": 0.6764878931885042, "grad_norm": 0.6236765157508242, "learning_rate": 2.5031743122871954e-06, "loss": 0.2969, "step": 14947 }, { "epoch": 0.6765331522968997, "grad_norm": 0.7013063988857922, "learning_rate": 2.502539342476953e-06, "loss": 0.3084, "step": 14948 }, { "epoch": 0.6765784114052953, "grad_norm": 0.630089781026667, "learning_rate": 2.5019044263294724e-06, "loss": 0.2788, "step": 14949 }, { "epoch": 0.6766236705136909, "grad_norm": 0.5768425223257683, "learning_rate": 2.5012695638583933e-06, "loss": 0.2592, "step": 14950 }, { "epoch": 0.6766689296220865, "grad_norm": 0.669027420317218, "learning_rate": 2.5006347550773547e-06, "loss": 0.2956, "step": 14951 }, { "epoch": 0.6767141887304821, "grad_norm": 0.3014969538495637, "learning_rate": 2.5000000000000015e-06, "loss": 0.4678, "step": 14952 }, { "epoch": 0.6767594478388775, "grad_norm": 0.28886597729116914, "learning_rate": 2.4993652986399675e-06, "loss": 0.4667, "step": 14953 }, { "epoch": 0.6768047069472731, "grad_norm": 0.5987131742441169, "learning_rate": 2.4987306510108956e-06, "loss": 0.3097, "step": 14954 }, { "epoch": 0.6768499660556687, "grad_norm": 0.2877713827912253, "learning_rate": 2.4980960571264195e-06, "loss": 0.491, "step": 14955 }, { "epoch": 0.6768952251640643, "grad_norm": 0.7496311178592051, "learning_rate": 2.497461517000173e-06, "loss": 0.2975, "step": 14956 }, { "epoch": 0.6769404842724598, "grad_norm": 0.3042963939527462, "learning_rate": 2.496827030645793e-06, "loss": 0.4762, "step": 14957 }, { "epoch": 0.6769857433808554, "grad_norm": 0.6794376426236794, "learning_rate": 2.4961925980769144e-06, "loss": 0.2719, "step": 14958 }, { "epoch": 0.677031002489251, "grad_norm": 0.682983746007749, "learning_rate": 2.4955582193071664e-06, "loss": 0.318, "step": 14959 }, { "epoch": 0.6770762615976466, "grad_norm": 0.5837459735848904, "learning_rate": 2.494923894350179e-06, "loss": 0.3331, "step": 14960 }, { "epoch": 0.6771215207060421, "grad_norm": 0.6577176249026865, "learning_rate": 2.494289623219583e-06, "loss": 0.3305, "step": 14961 }, { "epoch": 0.6771667798144376, "grad_norm": 0.3025775812822773, "learning_rate": 2.4936554059290095e-06, "loss": 0.4543, "step": 14962 }, { "epoch": 0.6772120389228332, "grad_norm": 0.6238240104432574, "learning_rate": 2.4930212424920837e-06, "loss": 0.3092, "step": 14963 }, { "epoch": 0.6772572980312288, "grad_norm": 0.5594267201562955, "learning_rate": 2.49238713292243e-06, "loss": 0.2957, "step": 14964 }, { "epoch": 0.6773025571396244, "grad_norm": 0.6158262707830824, "learning_rate": 2.491753077233676e-06, "loss": 0.3335, "step": 14965 }, { "epoch": 0.6773478162480199, "grad_norm": 0.6131637951233143, "learning_rate": 2.4911190754394445e-06, "loss": 0.3059, "step": 14966 }, { "epoch": 0.6773930753564155, "grad_norm": 0.5816517091849465, "learning_rate": 2.49048512755336e-06, "loss": 0.2906, "step": 14967 }, { "epoch": 0.677438334464811, "grad_norm": 0.6345875687869158, "learning_rate": 2.4898512335890425e-06, "loss": 0.3485, "step": 14968 }, { "epoch": 0.6774835935732066, "grad_norm": 0.6298797929591657, "learning_rate": 2.4892173935601112e-06, "loss": 0.3308, "step": 14969 }, { "epoch": 0.6775288526816021, "grad_norm": 0.6800828759636052, "learning_rate": 2.488583607480186e-06, "loss": 0.3261, "step": 14970 }, { "epoch": 0.6775741117899977, "grad_norm": 0.6050569028834316, "learning_rate": 2.4879498753628885e-06, "loss": 0.3023, "step": 14971 }, { "epoch": 0.6776193708983933, "grad_norm": 0.6514948521987356, "learning_rate": 2.487316197221833e-06, "loss": 0.3389, "step": 14972 }, { "epoch": 0.6776646300067889, "grad_norm": 0.6305047865132518, "learning_rate": 2.486682573070633e-06, "loss": 0.2832, "step": 14973 }, { "epoch": 0.6777098891151845, "grad_norm": 0.642479019505682, "learning_rate": 2.4860490029229056e-06, "loss": 0.3041, "step": 14974 }, { "epoch": 0.67775514822358, "grad_norm": 0.2966655752974817, "learning_rate": 2.485415486792266e-06, "loss": 0.4724, "step": 14975 }, { "epoch": 0.6778004073319756, "grad_norm": 0.654410954345375, "learning_rate": 2.4847820246923244e-06, "loss": 0.3296, "step": 14976 }, { "epoch": 0.6778456664403711, "grad_norm": 0.7127968738118841, "learning_rate": 2.4841486166366908e-06, "loss": 0.2785, "step": 14977 }, { "epoch": 0.6778909255487667, "grad_norm": 0.6122340196596452, "learning_rate": 2.483515262638978e-06, "loss": 0.2813, "step": 14978 }, { "epoch": 0.6779361846571622, "grad_norm": 0.6249973534876662, "learning_rate": 2.482881962712794e-06, "loss": 0.2824, "step": 14979 }, { "epoch": 0.6779814437655578, "grad_norm": 0.6260218678357067, "learning_rate": 2.4822487168717437e-06, "loss": 0.2977, "step": 14980 }, { "epoch": 0.6780267028739534, "grad_norm": 0.6507315239786348, "learning_rate": 2.481615525129437e-06, "loss": 0.2551, "step": 14981 }, { "epoch": 0.678071961982349, "grad_norm": 0.5892910468708393, "learning_rate": 2.480982387499477e-06, "loss": 0.3069, "step": 14982 }, { "epoch": 0.6781172210907445, "grad_norm": 1.8074788988332977, "learning_rate": 2.480349303995471e-06, "loss": 0.3318, "step": 14983 }, { "epoch": 0.67816248019914, "grad_norm": 0.5737467735703359, "learning_rate": 2.4797162746310193e-06, "loss": 0.2766, "step": 14984 }, { "epoch": 0.6782077393075356, "grad_norm": 0.3156381897305302, "learning_rate": 2.479083299419723e-06, "loss": 0.4643, "step": 14985 }, { "epoch": 0.6782529984159312, "grad_norm": 0.7418544758899195, "learning_rate": 2.4784503783751834e-06, "loss": 0.3306, "step": 14986 }, { "epoch": 0.6782982575243268, "grad_norm": 0.5487600317511814, "learning_rate": 2.477817511511003e-06, "loss": 0.2876, "step": 14987 }, { "epoch": 0.6783435166327223, "grad_norm": 0.2603885575345069, "learning_rate": 2.477184698840779e-06, "loss": 0.4652, "step": 14988 }, { "epoch": 0.6783887757411179, "grad_norm": 0.6194122431720369, "learning_rate": 2.4765519403781048e-06, "loss": 0.3801, "step": 14989 }, { "epoch": 0.6784340348495135, "grad_norm": 0.27762558728478176, "learning_rate": 2.475919236136579e-06, "loss": 0.4808, "step": 14990 }, { "epoch": 0.6784792939579091, "grad_norm": 0.31156010455713856, "learning_rate": 2.4752865861297994e-06, "loss": 0.4834, "step": 14991 }, { "epoch": 0.6785245530663045, "grad_norm": 0.6896153422630451, "learning_rate": 2.474653990371356e-06, "loss": 0.3135, "step": 14992 }, { "epoch": 0.6785698121747001, "grad_norm": 0.2979982913756971, "learning_rate": 2.474021448874841e-06, "loss": 0.4519, "step": 14993 }, { "epoch": 0.6786150712830957, "grad_norm": 0.6265256108940277, "learning_rate": 2.4733889616538493e-06, "loss": 0.307, "step": 14994 }, { "epoch": 0.6786603303914913, "grad_norm": 0.292726559882182, "learning_rate": 2.472756528721966e-06, "loss": 0.4919, "step": 14995 }, { "epoch": 0.6787055894998869, "grad_norm": 0.29246799357364633, "learning_rate": 2.4721241500927863e-06, "loss": 0.4561, "step": 14996 }, { "epoch": 0.6787508486082824, "grad_norm": 0.6261086562541617, "learning_rate": 2.4714918257798936e-06, "loss": 0.2792, "step": 14997 }, { "epoch": 0.678796107716678, "grad_norm": 0.6299828393508288, "learning_rate": 2.470859555796875e-06, "loss": 0.3236, "step": 14998 }, { "epoch": 0.6788413668250736, "grad_norm": 0.6333138654289642, "learning_rate": 2.470227340157316e-06, "loss": 0.254, "step": 14999 }, { "epoch": 0.6788866259334692, "grad_norm": 0.6989859571225095, "learning_rate": 2.4695951788748047e-06, "loss": 0.291, "step": 15000 }, { "epoch": 0.6789318850418646, "grad_norm": 0.6215116559144774, "learning_rate": 2.4689630719629206e-06, "loss": 0.2595, "step": 15001 }, { "epoch": 0.6789771441502602, "grad_norm": 0.5904808625350783, "learning_rate": 2.468331019435245e-06, "loss": 0.2968, "step": 15002 }, { "epoch": 0.6790224032586558, "grad_norm": 0.575496997947114, "learning_rate": 2.4676990213053603e-06, "loss": 0.2764, "step": 15003 }, { "epoch": 0.6790676623670514, "grad_norm": 0.31230031547698267, "learning_rate": 2.467067077586848e-06, "loss": 0.463, "step": 15004 }, { "epoch": 0.6791129214754469, "grad_norm": 0.7635158903039766, "learning_rate": 2.466435188293286e-06, "loss": 0.2692, "step": 15005 }, { "epoch": 0.6791581805838425, "grad_norm": 1.3168042258773374, "learning_rate": 2.4658033534382476e-06, "loss": 0.3226, "step": 15006 }, { "epoch": 0.6792034396922381, "grad_norm": 0.5862584713732727, "learning_rate": 2.465171573035314e-06, "loss": 0.2941, "step": 15007 }, { "epoch": 0.6792486988006337, "grad_norm": 0.5971792872764682, "learning_rate": 2.4645398470980564e-06, "loss": 0.2765, "step": 15008 }, { "epoch": 0.6792939579090292, "grad_norm": 0.6321924431964658, "learning_rate": 2.463908175640052e-06, "loss": 0.2697, "step": 15009 }, { "epoch": 0.6793392170174247, "grad_norm": 0.643688802392928, "learning_rate": 2.463276558674872e-06, "loss": 0.2953, "step": 15010 }, { "epoch": 0.6793844761258203, "grad_norm": 0.9119352682482706, "learning_rate": 2.462644996216086e-06, "loss": 0.2685, "step": 15011 }, { "epoch": 0.6794297352342159, "grad_norm": 0.925526246168888, "learning_rate": 2.4620134882772683e-06, "loss": 0.2431, "step": 15012 }, { "epoch": 0.6794749943426115, "grad_norm": 0.32426128200660725, "learning_rate": 2.461382034871986e-06, "loss": 0.4957, "step": 15013 }, { "epoch": 0.679520253451007, "grad_norm": 0.6005725209893311, "learning_rate": 2.4607506360138044e-06, "loss": 0.3386, "step": 15014 }, { "epoch": 0.6795655125594026, "grad_norm": 0.5949138426379929, "learning_rate": 2.460119291716293e-06, "loss": 0.3316, "step": 15015 }, { "epoch": 0.6796107716677982, "grad_norm": 0.6225411278253692, "learning_rate": 2.4594880019930194e-06, "loss": 0.3183, "step": 15016 }, { "epoch": 0.6796560307761937, "grad_norm": 0.6325554520778243, "learning_rate": 2.4588567668575463e-06, "loss": 0.3276, "step": 15017 }, { "epoch": 0.6797012898845892, "grad_norm": 0.6760739235968206, "learning_rate": 2.458225586323435e-06, "loss": 0.3176, "step": 15018 }, { "epoch": 0.6797465489929848, "grad_norm": 0.5645343967429904, "learning_rate": 2.457594460404249e-06, "loss": 0.3023, "step": 15019 }, { "epoch": 0.6797918081013804, "grad_norm": 0.6337538050357513, "learning_rate": 2.456963389113552e-06, "loss": 0.2849, "step": 15020 }, { "epoch": 0.679837067209776, "grad_norm": 0.2906828686861893, "learning_rate": 2.4563323724649006e-06, "loss": 0.4676, "step": 15021 }, { "epoch": 0.6798823263181716, "grad_norm": 0.5591360322721972, "learning_rate": 2.4557014104718536e-06, "loss": 0.3091, "step": 15022 }, { "epoch": 0.6799275854265671, "grad_norm": 0.6327486766443987, "learning_rate": 2.4550705031479697e-06, "loss": 0.3012, "step": 15023 }, { "epoch": 0.6799728445349627, "grad_norm": 0.6229533038465822, "learning_rate": 2.4544396505068037e-06, "loss": 0.297, "step": 15024 }, { "epoch": 0.6800181036433582, "grad_norm": 0.7074763849772877, "learning_rate": 2.4538088525619124e-06, "loss": 0.3375, "step": 15025 }, { "epoch": 0.6800633627517538, "grad_norm": 0.641807895663935, "learning_rate": 2.453178109326849e-06, "loss": 0.3504, "step": 15026 }, { "epoch": 0.6801086218601493, "grad_norm": 0.6876442627979192, "learning_rate": 2.452547420815165e-06, "loss": 0.3808, "step": 15027 }, { "epoch": 0.6801538809685449, "grad_norm": 0.6633112177907754, "learning_rate": 2.4519167870404126e-06, "loss": 0.2731, "step": 15028 }, { "epoch": 0.6801991400769405, "grad_norm": 0.3050394725195347, "learning_rate": 2.451286208016144e-06, "loss": 0.4741, "step": 15029 }, { "epoch": 0.6802443991853361, "grad_norm": 0.3037036497796297, "learning_rate": 2.4506556837559074e-06, "loss": 0.4441, "step": 15030 }, { "epoch": 0.6802896582937316, "grad_norm": 0.2735878686555206, "learning_rate": 2.450025214273249e-06, "loss": 0.4828, "step": 15031 }, { "epoch": 0.6803349174021271, "grad_norm": 0.6187834499050971, "learning_rate": 2.4493947995817165e-06, "loss": 0.3022, "step": 15032 }, { "epoch": 0.6803801765105227, "grad_norm": 0.5779258372319492, "learning_rate": 2.4487644396948584e-06, "loss": 0.2988, "step": 15033 }, { "epoch": 0.6804254356189183, "grad_norm": 0.6103992843675301, "learning_rate": 2.448134134626217e-06, "loss": 0.2988, "step": 15034 }, { "epoch": 0.6804706947273139, "grad_norm": 0.6238415603043627, "learning_rate": 2.4475038843893327e-06, "loss": 0.2891, "step": 15035 }, { "epoch": 0.6805159538357094, "grad_norm": 0.6368674523002104, "learning_rate": 2.4468736889977536e-06, "loss": 0.3173, "step": 15036 }, { "epoch": 0.680561212944105, "grad_norm": 0.29612709938276416, "learning_rate": 2.4462435484650156e-06, "loss": 0.468, "step": 15037 }, { "epoch": 0.6806064720525006, "grad_norm": 0.6270179992072349, "learning_rate": 2.4456134628046617e-06, "loss": 0.3427, "step": 15038 }, { "epoch": 0.6806517311608962, "grad_norm": 0.6686654576794504, "learning_rate": 2.4449834320302297e-06, "loss": 0.2887, "step": 15039 }, { "epoch": 0.6806969902692916, "grad_norm": 0.3245521884159804, "learning_rate": 2.4443534561552543e-06, "loss": 0.4802, "step": 15040 }, { "epoch": 0.6807422493776872, "grad_norm": 0.6097813124642308, "learning_rate": 2.4437235351932746e-06, "loss": 0.3216, "step": 15041 }, { "epoch": 0.6807875084860828, "grad_norm": 0.5942174037417574, "learning_rate": 2.4430936691578287e-06, "loss": 0.3034, "step": 15042 }, { "epoch": 0.6808327675944784, "grad_norm": 0.6237027666111084, "learning_rate": 2.442463858062444e-06, "loss": 0.3415, "step": 15043 }, { "epoch": 0.680878026702874, "grad_norm": 0.6188913554656105, "learning_rate": 2.441834101920655e-06, "loss": 0.3145, "step": 15044 }, { "epoch": 0.6809232858112695, "grad_norm": 0.6309016051437135, "learning_rate": 2.4412044007459945e-06, "loss": 0.291, "step": 15045 }, { "epoch": 0.6809685449196651, "grad_norm": 0.26462806540351497, "learning_rate": 2.4405747545519966e-06, "loss": 0.468, "step": 15046 }, { "epoch": 0.6810138040280607, "grad_norm": 0.2661694906424091, "learning_rate": 2.4399451633521825e-06, "loss": 0.4632, "step": 15047 }, { "epoch": 0.6810590631364563, "grad_norm": 0.6641368818274791, "learning_rate": 2.4393156271600847e-06, "loss": 0.3349, "step": 15048 }, { "epoch": 0.6811043222448517, "grad_norm": 0.5960234629586615, "learning_rate": 2.4386861459892312e-06, "loss": 0.2653, "step": 15049 }, { "epoch": 0.6811495813532473, "grad_norm": 0.62521763613905, "learning_rate": 2.4380567198531462e-06, "loss": 0.3426, "step": 15050 }, { "epoch": 0.6811948404616429, "grad_norm": 0.2717915731520093, "learning_rate": 2.4374273487653517e-06, "loss": 0.4493, "step": 15051 }, { "epoch": 0.6812400995700385, "grad_norm": 0.5922236013507167, "learning_rate": 2.4367980327393752e-06, "loss": 0.3155, "step": 15052 }, { "epoch": 0.681285358678434, "grad_norm": 0.6491962951976494, "learning_rate": 2.4361687717887346e-06, "loss": 0.3052, "step": 15053 }, { "epoch": 0.6813306177868296, "grad_norm": 0.6055216662373926, "learning_rate": 2.435539565926955e-06, "loss": 0.2897, "step": 15054 }, { "epoch": 0.6813758768952252, "grad_norm": 0.62030495638086, "learning_rate": 2.434910415167554e-06, "loss": 0.2912, "step": 15055 }, { "epoch": 0.6814211360036208, "grad_norm": 0.5848255587798645, "learning_rate": 2.4342813195240477e-06, "loss": 0.2882, "step": 15056 }, { "epoch": 0.6814663951120163, "grad_norm": 1.0867145517086927, "learning_rate": 2.4336522790099563e-06, "loss": 0.2777, "step": 15057 }, { "epoch": 0.6815116542204118, "grad_norm": 0.616768628832872, "learning_rate": 2.4330232936387975e-06, "loss": 0.292, "step": 15058 }, { "epoch": 0.6815569133288074, "grad_norm": 0.6299643341596347, "learning_rate": 2.4323943634240838e-06, "loss": 0.2836, "step": 15059 }, { "epoch": 0.681602172437203, "grad_norm": 0.6101124308071156, "learning_rate": 2.431765488379328e-06, "loss": 0.3279, "step": 15060 }, { "epoch": 0.6816474315455986, "grad_norm": 0.29891723094065137, "learning_rate": 2.4311366685180436e-06, "loss": 0.4191, "step": 15061 }, { "epoch": 0.6816926906539941, "grad_norm": 0.31111970321204335, "learning_rate": 2.430507903853745e-06, "loss": 0.4834, "step": 15062 }, { "epoch": 0.6817379497623897, "grad_norm": 0.5747182195248783, "learning_rate": 2.42987919439994e-06, "loss": 0.2647, "step": 15063 }, { "epoch": 0.6817832088707853, "grad_norm": 0.6957102220606519, "learning_rate": 2.429250540170135e-06, "loss": 0.2548, "step": 15064 }, { "epoch": 0.6818284679791808, "grad_norm": 0.5184383773596717, "learning_rate": 2.428621941177843e-06, "loss": 0.2803, "step": 15065 }, { "epoch": 0.6818737270875763, "grad_norm": 0.649242266265955, "learning_rate": 2.4279933974365662e-06, "loss": 0.3289, "step": 15066 }, { "epoch": 0.6819189861959719, "grad_norm": 0.6386315571308373, "learning_rate": 2.4273649089598133e-06, "loss": 0.2894, "step": 15067 }, { "epoch": 0.6819642453043675, "grad_norm": 0.6135005713400414, "learning_rate": 2.4267364757610878e-06, "loss": 0.2738, "step": 15068 }, { "epoch": 0.6820095044127631, "grad_norm": 0.6213338889164568, "learning_rate": 2.4261080978538897e-06, "loss": 0.3396, "step": 15069 }, { "epoch": 0.6820547635211587, "grad_norm": 0.5964228446691353, "learning_rate": 2.425479775251724e-06, "loss": 0.2587, "step": 15070 }, { "epoch": 0.6821000226295542, "grad_norm": 0.5767022668088903, "learning_rate": 2.4248515079680945e-06, "loss": 0.3102, "step": 15071 }, { "epoch": 0.6821452817379497, "grad_norm": 0.6267563690965127, "learning_rate": 2.4242232960164937e-06, "loss": 0.2785, "step": 15072 }, { "epoch": 0.6821905408463453, "grad_norm": 0.6050276519979759, "learning_rate": 2.423595139410423e-06, "loss": 0.2771, "step": 15073 }, { "epoch": 0.6822357999547409, "grad_norm": 0.5967407347994533, "learning_rate": 2.4229670381633804e-06, "loss": 0.2796, "step": 15074 }, { "epoch": 0.6822810590631364, "grad_norm": 0.5878917183116571, "learning_rate": 2.4223389922888646e-06, "loss": 0.3188, "step": 15075 }, { "epoch": 0.682326318171532, "grad_norm": 0.5176462985315065, "learning_rate": 2.4217110018003636e-06, "loss": 0.3001, "step": 15076 }, { "epoch": 0.6823715772799276, "grad_norm": 0.6936683358693684, "learning_rate": 2.4210830667113745e-06, "loss": 0.2778, "step": 15077 }, { "epoch": 0.6824168363883232, "grad_norm": 0.6189248083875724, "learning_rate": 2.4204551870353917e-06, "loss": 0.3505, "step": 15078 }, { "epoch": 0.6824620954967188, "grad_norm": 0.7311188825467586, "learning_rate": 2.4198273627859043e-06, "loss": 0.3475, "step": 15079 }, { "epoch": 0.6825073546051142, "grad_norm": 0.6536316463000047, "learning_rate": 2.419199593976401e-06, "loss": 0.2967, "step": 15080 }, { "epoch": 0.6825526137135098, "grad_norm": 0.6168381295450212, "learning_rate": 2.4185718806203738e-06, "loss": 0.2991, "step": 15081 }, { "epoch": 0.6825978728219054, "grad_norm": 0.5989253129012235, "learning_rate": 2.4179442227313065e-06, "loss": 0.2927, "step": 15082 }, { "epoch": 0.682643131930301, "grad_norm": 0.6387303771408718, "learning_rate": 2.41731662032269e-06, "loss": 0.3425, "step": 15083 }, { "epoch": 0.6826883910386965, "grad_norm": 0.3572420858535034, "learning_rate": 2.4166890734080066e-06, "loss": 0.4635, "step": 15084 }, { "epoch": 0.6827336501470921, "grad_norm": 0.3106526202462484, "learning_rate": 2.41606158200074e-06, "loss": 0.4804, "step": 15085 }, { "epoch": 0.6827789092554877, "grad_norm": 0.6606184577508364, "learning_rate": 2.4154341461143734e-06, "loss": 0.2942, "step": 15086 }, { "epoch": 0.6828241683638833, "grad_norm": 0.6548534213992573, "learning_rate": 2.4148067657623907e-06, "loss": 0.3086, "step": 15087 }, { "epoch": 0.6828694274722787, "grad_norm": 0.6151721435049017, "learning_rate": 2.4141794409582713e-06, "loss": 0.2829, "step": 15088 }, { "epoch": 0.6829146865806743, "grad_norm": 0.70234226813624, "learning_rate": 2.413552171715492e-06, "loss": 0.3002, "step": 15089 }, { "epoch": 0.6829599456890699, "grad_norm": 0.5181581808026793, "learning_rate": 2.412924958047533e-06, "loss": 0.3023, "step": 15090 }, { "epoch": 0.6830052047974655, "grad_norm": 0.655670861689387, "learning_rate": 2.4122977999678727e-06, "loss": 0.3185, "step": 15091 }, { "epoch": 0.6830504639058611, "grad_norm": 0.5946982122245101, "learning_rate": 2.4116706974899857e-06, "loss": 0.3472, "step": 15092 }, { "epoch": 0.6830957230142566, "grad_norm": 0.7419455436996496, "learning_rate": 2.411043650627343e-06, "loss": 0.3462, "step": 15093 }, { "epoch": 0.6831409821226522, "grad_norm": 0.33988968281405163, "learning_rate": 2.4104166593934237e-06, "loss": 0.4753, "step": 15094 }, { "epoch": 0.6831862412310478, "grad_norm": 0.643007225023249, "learning_rate": 2.409789723801695e-06, "loss": 0.3085, "step": 15095 }, { "epoch": 0.6832315003394434, "grad_norm": 0.7106285604036804, "learning_rate": 2.409162843865632e-06, "loss": 0.2933, "step": 15096 }, { "epoch": 0.6832767594478388, "grad_norm": 0.6235195273871946, "learning_rate": 2.4085360195987017e-06, "loss": 0.2589, "step": 15097 }, { "epoch": 0.6833220185562344, "grad_norm": 0.5841690154702869, "learning_rate": 2.4079092510143712e-06, "loss": 0.3071, "step": 15098 }, { "epoch": 0.68336727766463, "grad_norm": 0.5636313308066683, "learning_rate": 2.407282538126111e-06, "loss": 0.2403, "step": 15099 }, { "epoch": 0.6834125367730256, "grad_norm": 0.6407811380404763, "learning_rate": 2.4066558809473896e-06, "loss": 0.2854, "step": 15100 }, { "epoch": 0.6834577958814211, "grad_norm": 0.6759491624865307, "learning_rate": 2.406029279491664e-06, "loss": 0.3146, "step": 15101 }, { "epoch": 0.6835030549898167, "grad_norm": 0.6259445406121261, "learning_rate": 2.405402733772403e-06, "loss": 0.3136, "step": 15102 }, { "epoch": 0.6835483140982123, "grad_norm": 0.6051948786321726, "learning_rate": 2.404776243803068e-06, "loss": 0.2622, "step": 15103 }, { "epoch": 0.6835935732066079, "grad_norm": 0.5876119935476364, "learning_rate": 2.4041498095971253e-06, "loss": 0.2993, "step": 15104 }, { "epoch": 0.6836388323150034, "grad_norm": 0.5800757727860213, "learning_rate": 2.4035234311680267e-06, "loss": 0.3371, "step": 15105 }, { "epoch": 0.6836840914233989, "grad_norm": 0.6108119594459906, "learning_rate": 2.402897108529235e-06, "loss": 0.2724, "step": 15106 }, { "epoch": 0.6837293505317945, "grad_norm": 0.6520504016494019, "learning_rate": 2.40227084169421e-06, "loss": 0.3746, "step": 15107 }, { "epoch": 0.6837746096401901, "grad_norm": 0.7391233436868181, "learning_rate": 2.401644630676406e-06, "loss": 0.3151, "step": 15108 }, { "epoch": 0.6838198687485857, "grad_norm": 0.5998437164037514, "learning_rate": 2.4010184754892773e-06, "loss": 0.2969, "step": 15109 }, { "epoch": 0.6838651278569812, "grad_norm": 0.612633443932278, "learning_rate": 2.400392376146281e-06, "loss": 0.3113, "step": 15110 }, { "epoch": 0.6839103869653768, "grad_norm": 0.6737683247307297, "learning_rate": 2.3997663326608663e-06, "loss": 0.3705, "step": 15111 }, { "epoch": 0.6839556460737723, "grad_norm": 0.5984500289040087, "learning_rate": 2.3991403450464896e-06, "loss": 0.2848, "step": 15112 }, { "epoch": 0.6840009051821679, "grad_norm": 0.685521554319672, "learning_rate": 2.398514413316598e-06, "loss": 0.3334, "step": 15113 }, { "epoch": 0.6840461642905635, "grad_norm": 0.6089728266060771, "learning_rate": 2.397888537484641e-06, "loss": 0.3064, "step": 15114 }, { "epoch": 0.684091423398959, "grad_norm": 0.5548942735671424, "learning_rate": 2.397262717564067e-06, "loss": 0.3223, "step": 15115 }, { "epoch": 0.6841366825073546, "grad_norm": 0.6383884701744859, "learning_rate": 2.3966369535683254e-06, "loss": 0.3467, "step": 15116 }, { "epoch": 0.6841819416157502, "grad_norm": 0.5859354192074316, "learning_rate": 2.3960112455108604e-06, "loss": 0.2759, "step": 15117 }, { "epoch": 0.6842272007241458, "grad_norm": 0.6497286961086137, "learning_rate": 2.3953855934051135e-06, "loss": 0.2808, "step": 15118 }, { "epoch": 0.6842724598325413, "grad_norm": 0.6832885488840652, "learning_rate": 2.3947599972645313e-06, "loss": 0.3897, "step": 15119 }, { "epoch": 0.6843177189409368, "grad_norm": 0.5990897995013262, "learning_rate": 2.3941344571025575e-06, "loss": 0.2846, "step": 15120 }, { "epoch": 0.6843629780493324, "grad_norm": 0.6285928770397525, "learning_rate": 2.3935089729326307e-06, "loss": 0.295, "step": 15121 }, { "epoch": 0.684408237157728, "grad_norm": 0.637133212579721, "learning_rate": 2.3928835447681886e-06, "loss": 0.3387, "step": 15122 }, { "epoch": 0.6844534962661235, "grad_norm": 0.5783934039329125, "learning_rate": 2.392258172622674e-06, "loss": 0.2953, "step": 15123 }, { "epoch": 0.6844987553745191, "grad_norm": 0.5715727546399437, "learning_rate": 2.391632856509521e-06, "loss": 0.2819, "step": 15124 }, { "epoch": 0.6845440144829147, "grad_norm": 0.6303769605021657, "learning_rate": 2.3910075964421682e-06, "loss": 0.3124, "step": 15125 }, { "epoch": 0.6845892735913103, "grad_norm": 0.6382169114047389, "learning_rate": 2.390382392434049e-06, "loss": 0.3138, "step": 15126 }, { "epoch": 0.6846345326997059, "grad_norm": 1.884026468527679, "learning_rate": 2.389757244498596e-06, "loss": 0.3024, "step": 15127 }, { "epoch": 0.6846797918081013, "grad_norm": 0.6009302379190262, "learning_rate": 2.389132152649243e-06, "loss": 0.304, "step": 15128 }, { "epoch": 0.6847250509164969, "grad_norm": 0.2907404985498884, "learning_rate": 2.3885071168994245e-06, "loss": 0.4527, "step": 15129 }, { "epoch": 0.6847703100248925, "grad_norm": 0.6375852995803484, "learning_rate": 2.3878821372625645e-06, "loss": 0.3325, "step": 15130 }, { "epoch": 0.6848155691332881, "grad_norm": 0.6255821180704887, "learning_rate": 2.3872572137520942e-06, "loss": 0.2996, "step": 15131 }, { "epoch": 0.6848608282416836, "grad_norm": 0.6084577807492569, "learning_rate": 2.3866323463814426e-06, "loss": 0.3524, "step": 15132 }, { "epoch": 0.6849060873500792, "grad_norm": 0.6140593026995164, "learning_rate": 2.386007535164039e-06, "loss": 0.2679, "step": 15133 }, { "epoch": 0.6849513464584748, "grad_norm": 0.37943621133603733, "learning_rate": 2.3853827801133015e-06, "loss": 0.4455, "step": 15134 }, { "epoch": 0.6849966055668704, "grad_norm": 0.6303980113130074, "learning_rate": 2.384758081242658e-06, "loss": 0.32, "step": 15135 }, { "epoch": 0.6850418646752658, "grad_norm": 0.6468168375303229, "learning_rate": 2.384133438565533e-06, "loss": 0.3279, "step": 15136 }, { "epoch": 0.6850871237836614, "grad_norm": 0.6264960194763265, "learning_rate": 2.383508852095346e-06, "loss": 0.3224, "step": 15137 }, { "epoch": 0.685132382892057, "grad_norm": 0.6333601588370803, "learning_rate": 2.382884321845516e-06, "loss": 0.3099, "step": 15138 }, { "epoch": 0.6851776420004526, "grad_norm": 0.6282786241901707, "learning_rate": 2.382259847829467e-06, "loss": 0.3354, "step": 15139 }, { "epoch": 0.6852229011088482, "grad_norm": 0.2730465512823068, "learning_rate": 2.381635430060611e-06, "loss": 0.4853, "step": 15140 }, { "epoch": 0.6852681602172437, "grad_norm": 0.5941869212174515, "learning_rate": 2.38101106855237e-06, "loss": 0.2762, "step": 15141 }, { "epoch": 0.6853134193256393, "grad_norm": 0.28845968455259635, "learning_rate": 2.3803867633181575e-06, "loss": 0.4624, "step": 15142 }, { "epoch": 0.6853586784340349, "grad_norm": 0.6890545300013281, "learning_rate": 2.3797625143713865e-06, "loss": 0.2775, "step": 15143 }, { "epoch": 0.6854039375424305, "grad_norm": 0.6849086964199705, "learning_rate": 2.3791383217254717e-06, "loss": 0.3612, "step": 15144 }, { "epoch": 0.6854491966508259, "grad_norm": 0.6329796736475822, "learning_rate": 2.3785141853938266e-06, "loss": 0.2985, "step": 15145 }, { "epoch": 0.6854944557592215, "grad_norm": 0.2658507084788963, "learning_rate": 2.37789010538986e-06, "loss": 0.4552, "step": 15146 }, { "epoch": 0.6855397148676171, "grad_norm": 0.6106735785669123, "learning_rate": 2.3772660817269806e-06, "loss": 0.2875, "step": 15147 }, { "epoch": 0.6855849739760127, "grad_norm": 0.5513527402067496, "learning_rate": 2.3766421144185977e-06, "loss": 0.2948, "step": 15148 }, { "epoch": 0.6856302330844083, "grad_norm": 0.5290145509732991, "learning_rate": 2.3760182034781203e-06, "loss": 0.2645, "step": 15149 }, { "epoch": 0.6856754921928038, "grad_norm": 0.6402660303302956, "learning_rate": 2.3753943489189537e-06, "loss": 0.3056, "step": 15150 }, { "epoch": 0.6857207513011994, "grad_norm": 0.6428218167659006, "learning_rate": 2.3747705507544986e-06, "loss": 0.3194, "step": 15151 }, { "epoch": 0.685766010409595, "grad_norm": 0.6706866565556091, "learning_rate": 2.3741468089981646e-06, "loss": 0.2862, "step": 15152 }, { "epoch": 0.6858112695179905, "grad_norm": 0.5990018968682368, "learning_rate": 2.3735231236633483e-06, "loss": 0.2883, "step": 15153 }, { "epoch": 0.685856528626386, "grad_norm": 0.6217163752330904, "learning_rate": 2.372899494763456e-06, "loss": 0.293, "step": 15154 }, { "epoch": 0.6859017877347816, "grad_norm": 0.28048717025355663, "learning_rate": 2.3722759223118846e-06, "loss": 0.4643, "step": 15155 }, { "epoch": 0.6859470468431772, "grad_norm": 0.6270455720998772, "learning_rate": 2.371652406322031e-06, "loss": 0.2873, "step": 15156 }, { "epoch": 0.6859923059515728, "grad_norm": 0.5866299850272513, "learning_rate": 2.3710289468072957e-06, "loss": 0.3119, "step": 15157 }, { "epoch": 0.6860375650599683, "grad_norm": 0.27914664793966554, "learning_rate": 2.3704055437810754e-06, "loss": 0.4836, "step": 15158 }, { "epoch": 0.6860828241683639, "grad_norm": 0.5762223082408546, "learning_rate": 2.3697821972567635e-06, "loss": 0.2848, "step": 15159 }, { "epoch": 0.6861280832767594, "grad_norm": 0.29689041409226063, "learning_rate": 2.3691589072477527e-06, "loss": 0.4544, "step": 15160 }, { "epoch": 0.686173342385155, "grad_norm": 0.616291195054509, "learning_rate": 2.3685356737674364e-06, "loss": 0.3022, "step": 15161 }, { "epoch": 0.6862186014935506, "grad_norm": 0.6195029821454514, "learning_rate": 2.367912496829211e-06, "loss": 0.2788, "step": 15162 }, { "epoch": 0.6862638606019461, "grad_norm": 0.2606837427560347, "learning_rate": 2.367289376446458e-06, "loss": 0.4686, "step": 15163 }, { "epoch": 0.6863091197103417, "grad_norm": 0.6483111444061642, "learning_rate": 2.3666663126325705e-06, "loss": 0.2994, "step": 15164 }, { "epoch": 0.6863543788187373, "grad_norm": 0.5808106655048166, "learning_rate": 2.3660433054009385e-06, "loss": 0.2767, "step": 15165 }, { "epoch": 0.6863996379271329, "grad_norm": 0.5582665769123862, "learning_rate": 2.3654203547649463e-06, "loss": 0.2759, "step": 15166 }, { "epoch": 0.6864448970355284, "grad_norm": 0.6517516887106195, "learning_rate": 2.364797460737977e-06, "loss": 0.2635, "step": 15167 }, { "epoch": 0.686490156143924, "grad_norm": 0.6199077902775992, "learning_rate": 2.364174623333419e-06, "loss": 0.2802, "step": 15168 }, { "epoch": 0.6865354152523195, "grad_norm": 0.6461909522002824, "learning_rate": 2.363551842564651e-06, "loss": 0.3534, "step": 15169 }, { "epoch": 0.6865806743607151, "grad_norm": 0.6299888083758769, "learning_rate": 2.362929118445059e-06, "loss": 0.3022, "step": 15170 }, { "epoch": 0.6866259334691106, "grad_norm": 0.7662971676827985, "learning_rate": 2.36230645098802e-06, "loss": 0.3375, "step": 15171 }, { "epoch": 0.6866711925775062, "grad_norm": 0.28553350487559287, "learning_rate": 2.3616838402069132e-06, "loss": 0.4623, "step": 15172 }, { "epoch": 0.6867164516859018, "grad_norm": 0.28039720222164843, "learning_rate": 2.361061286115118e-06, "loss": 0.4512, "step": 15173 }, { "epoch": 0.6867617107942974, "grad_norm": 0.6293996693057095, "learning_rate": 2.3604387887260122e-06, "loss": 0.2728, "step": 15174 }, { "epoch": 0.686806969902693, "grad_norm": 0.620450588469678, "learning_rate": 2.35981634805297e-06, "loss": 0.2687, "step": 15175 }, { "epoch": 0.6868522290110884, "grad_norm": 0.2611191221662981, "learning_rate": 2.359193964109364e-06, "loss": 0.4641, "step": 15176 }, { "epoch": 0.686897488119484, "grad_norm": 0.2633681409227305, "learning_rate": 2.3585716369085692e-06, "loss": 0.4613, "step": 15177 }, { "epoch": 0.6869427472278796, "grad_norm": 0.2854475002845172, "learning_rate": 2.35794936646396e-06, "loss": 0.4759, "step": 15178 }, { "epoch": 0.6869880063362752, "grad_norm": 0.9651744734923887, "learning_rate": 2.357327152788903e-06, "loss": 0.3001, "step": 15179 }, { "epoch": 0.6870332654446707, "grad_norm": 0.5976494048168159, "learning_rate": 2.356704995896768e-06, "loss": 0.277, "step": 15180 }, { "epoch": 0.6870785245530663, "grad_norm": 0.637045182518737, "learning_rate": 2.3560828958009265e-06, "loss": 0.3234, "step": 15181 }, { "epoch": 0.6871237836614619, "grad_norm": 0.6255287785575562, "learning_rate": 2.355460852514741e-06, "loss": 0.3296, "step": 15182 }, { "epoch": 0.6871690427698575, "grad_norm": 0.6239150838054454, "learning_rate": 2.354838866051582e-06, "loss": 0.3245, "step": 15183 }, { "epoch": 0.687214301878253, "grad_norm": 0.6051523620230199, "learning_rate": 2.354216936424812e-06, "loss": 0.2777, "step": 15184 }, { "epoch": 0.6872595609866485, "grad_norm": 0.6090172591445984, "learning_rate": 2.3535950636477915e-06, "loss": 0.2943, "step": 15185 }, { "epoch": 0.6873048200950441, "grad_norm": 0.627645662677417, "learning_rate": 2.3529732477338857e-06, "loss": 0.2999, "step": 15186 }, { "epoch": 0.6873500792034397, "grad_norm": 0.2991361603487357, "learning_rate": 2.352351488696457e-06, "loss": 0.4835, "step": 15187 }, { "epoch": 0.6873953383118353, "grad_norm": 0.636926639668503, "learning_rate": 2.351729786548863e-06, "loss": 0.3004, "step": 15188 }, { "epoch": 0.6874405974202308, "grad_norm": 0.5933253379819048, "learning_rate": 2.3511081413044605e-06, "loss": 0.3057, "step": 15189 }, { "epoch": 0.6874858565286264, "grad_norm": 0.6585305490498075, "learning_rate": 2.3504865529766084e-06, "loss": 0.3628, "step": 15190 }, { "epoch": 0.687531115637022, "grad_norm": 0.791935913806427, "learning_rate": 2.3498650215786656e-06, "loss": 0.2863, "step": 15191 }, { "epoch": 0.6875763747454176, "grad_norm": 0.3022278662815868, "learning_rate": 2.349243547123983e-06, "loss": 0.4629, "step": 15192 }, { "epoch": 0.687621633853813, "grad_norm": 0.6842440528203508, "learning_rate": 2.348622129625914e-06, "loss": 0.3112, "step": 15193 }, { "epoch": 0.6876668929622086, "grad_norm": 0.598615904191372, "learning_rate": 2.3480007690978153e-06, "loss": 0.2748, "step": 15194 }, { "epoch": 0.6877121520706042, "grad_norm": 0.6166353577834562, "learning_rate": 2.3473794655530317e-06, "loss": 0.2794, "step": 15195 }, { "epoch": 0.6877574111789998, "grad_norm": 0.6474296155650149, "learning_rate": 2.3467582190049194e-06, "loss": 0.3015, "step": 15196 }, { "epoch": 0.6878026702873954, "grad_norm": 0.6586067699851095, "learning_rate": 2.3461370294668234e-06, "loss": 0.3272, "step": 15197 }, { "epoch": 0.6878479293957909, "grad_norm": 0.6412806959305043, "learning_rate": 2.3455158969520908e-06, "loss": 0.3416, "step": 15198 }, { "epoch": 0.6878931885041865, "grad_norm": 0.29779863556989944, "learning_rate": 2.3448948214740703e-06, "loss": 0.4603, "step": 15199 }, { "epoch": 0.687938447612582, "grad_norm": 0.7199625021107681, "learning_rate": 2.3442738030461054e-06, "loss": 0.3547, "step": 15200 }, { "epoch": 0.6879837067209776, "grad_norm": 0.6317413566544562, "learning_rate": 2.3436528416815384e-06, "loss": 0.3109, "step": 15201 }, { "epoch": 0.6880289658293731, "grad_norm": 0.5831209937004782, "learning_rate": 2.343031937393714e-06, "loss": 0.2895, "step": 15202 }, { "epoch": 0.6880742249377687, "grad_norm": 0.5753754117741635, "learning_rate": 2.342411090195974e-06, "loss": 0.2532, "step": 15203 }, { "epoch": 0.6881194840461643, "grad_norm": 0.6494344248438111, "learning_rate": 2.341790300101658e-06, "loss": 0.3236, "step": 15204 }, { "epoch": 0.6881647431545599, "grad_norm": 0.6113703342258673, "learning_rate": 2.3411695671241026e-06, "loss": 0.2958, "step": 15205 }, { "epoch": 0.6882100022629554, "grad_norm": 0.6102866066688664, "learning_rate": 2.3405488912766468e-06, "loss": 0.324, "step": 15206 }, { "epoch": 0.688255261371351, "grad_norm": 0.5710834246103191, "learning_rate": 2.3399282725726297e-06, "loss": 0.3133, "step": 15207 }, { "epoch": 0.6883005204797465, "grad_norm": 0.5744056006601107, "learning_rate": 2.3393077110253838e-06, "loss": 0.2984, "step": 15208 }, { "epoch": 0.6883457795881421, "grad_norm": 0.6411009454649048, "learning_rate": 2.338687206648242e-06, "loss": 0.3326, "step": 15209 }, { "epoch": 0.6883910386965377, "grad_norm": 0.6492584420661419, "learning_rate": 2.3380667594545402e-06, "loss": 0.2735, "step": 15210 }, { "epoch": 0.6884362978049332, "grad_norm": 0.6666730619678819, "learning_rate": 2.337446369457607e-06, "loss": 0.3121, "step": 15211 }, { "epoch": 0.6884815569133288, "grad_norm": 0.6411538049352037, "learning_rate": 2.3368260366707745e-06, "loss": 0.3143, "step": 15212 }, { "epoch": 0.6885268160217244, "grad_norm": 0.5730742986343003, "learning_rate": 2.3362057611073722e-06, "loss": 0.3211, "step": 15213 }, { "epoch": 0.68857207513012, "grad_norm": 0.6724012403402507, "learning_rate": 2.3355855427807247e-06, "loss": 0.4833, "step": 15214 }, { "epoch": 0.6886173342385155, "grad_norm": 0.8056592749483618, "learning_rate": 2.3349653817041607e-06, "loss": 0.2862, "step": 15215 }, { "epoch": 0.688662593346911, "grad_norm": 0.6759884149762039, "learning_rate": 2.3343452778910076e-06, "loss": 0.3437, "step": 15216 }, { "epoch": 0.6887078524553066, "grad_norm": 0.7560353506301231, "learning_rate": 2.333725231354588e-06, "loss": 0.3249, "step": 15217 }, { "epoch": 0.6887531115637022, "grad_norm": 0.3039208009113988, "learning_rate": 2.333105242108222e-06, "loss": 0.4799, "step": 15218 }, { "epoch": 0.6887983706720978, "grad_norm": 0.29755263599505566, "learning_rate": 2.332485310165233e-06, "loss": 0.466, "step": 15219 }, { "epoch": 0.6888436297804933, "grad_norm": 0.5981605057766334, "learning_rate": 2.3318654355389448e-06, "loss": 0.3045, "step": 15220 }, { "epoch": 0.6888888888888889, "grad_norm": 0.7221055998904644, "learning_rate": 2.3312456182426736e-06, "loss": 0.3102, "step": 15221 }, { "epoch": 0.6889341479972845, "grad_norm": 0.5825786882913035, "learning_rate": 2.330625858289736e-06, "loss": 0.3337, "step": 15222 }, { "epoch": 0.6889794071056801, "grad_norm": 0.6336510446811916, "learning_rate": 2.330006155693451e-06, "loss": 0.3228, "step": 15223 }, { "epoch": 0.6890246662140755, "grad_norm": 0.32607904113700137, "learning_rate": 2.3293865104671324e-06, "loss": 0.4962, "step": 15224 }, { "epoch": 0.6890699253224711, "grad_norm": 0.7399849731248741, "learning_rate": 2.328766922624098e-06, "loss": 0.3154, "step": 15225 }, { "epoch": 0.6891151844308667, "grad_norm": 0.6466307348063, "learning_rate": 2.3281473921776577e-06, "loss": 0.2894, "step": 15226 }, { "epoch": 0.6891604435392623, "grad_norm": 0.6474918515256418, "learning_rate": 2.327527919141122e-06, "loss": 0.3082, "step": 15227 }, { "epoch": 0.6892057026476578, "grad_norm": 0.6907658263724318, "learning_rate": 2.3269085035278037e-06, "loss": 0.3248, "step": 15228 }, { "epoch": 0.6892509617560534, "grad_norm": 0.668855494816506, "learning_rate": 2.326289145351014e-06, "loss": 0.302, "step": 15229 }, { "epoch": 0.689296220864449, "grad_norm": 0.6269732722978408, "learning_rate": 2.325669844624058e-06, "loss": 0.3008, "step": 15230 }, { "epoch": 0.6893414799728446, "grad_norm": 0.665790307558888, "learning_rate": 2.3250506013602425e-06, "loss": 0.2953, "step": 15231 }, { "epoch": 0.6893867390812402, "grad_norm": 0.6336689756404082, "learning_rate": 2.3244314155728758e-06, "loss": 0.3252, "step": 15232 }, { "epoch": 0.6894319981896356, "grad_norm": 0.6333133047342612, "learning_rate": 2.3238122872752606e-06, "loss": 0.2881, "step": 15233 }, { "epoch": 0.6894772572980312, "grad_norm": 0.6604219335095796, "learning_rate": 2.323193216480698e-06, "loss": 0.2952, "step": 15234 }, { "epoch": 0.6895225164064268, "grad_norm": 0.2641142503850825, "learning_rate": 2.3225742032024923e-06, "loss": 0.467, "step": 15235 }, { "epoch": 0.6895677755148224, "grad_norm": 0.6703900163351237, "learning_rate": 2.3219552474539452e-06, "loss": 0.2812, "step": 15236 }, { "epoch": 0.6896130346232179, "grad_norm": 0.6644438053059106, "learning_rate": 2.3213363492483553e-06, "loss": 0.291, "step": 15237 }, { "epoch": 0.6896582937316135, "grad_norm": 0.6069432377435615, "learning_rate": 2.3207175085990184e-06, "loss": 0.2678, "step": 15238 }, { "epoch": 0.6897035528400091, "grad_norm": 0.5916808380907982, "learning_rate": 2.3200987255192354e-06, "loss": 0.3135, "step": 15239 }, { "epoch": 0.6897488119484046, "grad_norm": 0.3107890007219854, "learning_rate": 2.3194800000222984e-06, "loss": 0.4828, "step": 15240 }, { "epoch": 0.6897940710568001, "grad_norm": 0.6601154188421439, "learning_rate": 2.3188613321215046e-06, "loss": 0.28, "step": 15241 }, { "epoch": 0.6898393301651957, "grad_norm": 0.6937457942077409, "learning_rate": 2.3182427218301473e-06, "loss": 0.2971, "step": 15242 }, { "epoch": 0.6898845892735913, "grad_norm": 0.5748328417560244, "learning_rate": 2.317624169161515e-06, "loss": 0.3301, "step": 15243 }, { "epoch": 0.6899298483819869, "grad_norm": 0.5830720790065382, "learning_rate": 2.3170056741289015e-06, "loss": 0.307, "step": 15244 }, { "epoch": 0.6899751074903825, "grad_norm": 0.6566537590448583, "learning_rate": 2.3163872367455976e-06, "loss": 0.2767, "step": 15245 }, { "epoch": 0.690020366598778, "grad_norm": 0.6135232962091322, "learning_rate": 2.31576885702489e-06, "loss": 0.262, "step": 15246 }, { "epoch": 0.6900656257071736, "grad_norm": 0.3635384431340751, "learning_rate": 2.3151505349800635e-06, "loss": 0.4986, "step": 15247 }, { "epoch": 0.6901108848155691, "grad_norm": 0.3197510057064505, "learning_rate": 2.314532270624406e-06, "loss": 0.4831, "step": 15248 }, { "epoch": 0.6901561439239647, "grad_norm": 0.3014046711126635, "learning_rate": 2.3139140639712045e-06, "loss": 0.4696, "step": 15249 }, { "epoch": 0.6902014030323602, "grad_norm": 0.6362004212788522, "learning_rate": 2.31329591503374e-06, "loss": 0.3149, "step": 15250 }, { "epoch": 0.6902466621407558, "grad_norm": 0.25358928563314215, "learning_rate": 2.312677823825292e-06, "loss": 0.4488, "step": 15251 }, { "epoch": 0.6902919212491514, "grad_norm": 0.26585388827939566, "learning_rate": 2.312059790359147e-06, "loss": 0.4831, "step": 15252 }, { "epoch": 0.690337180357547, "grad_norm": 0.6304065372531822, "learning_rate": 2.3114418146485793e-06, "loss": 0.3269, "step": 15253 }, { "epoch": 0.6903824394659425, "grad_norm": 0.6465249995769476, "learning_rate": 2.310823896706872e-06, "loss": 0.282, "step": 15254 }, { "epoch": 0.6904276985743381, "grad_norm": 0.6462067809944219, "learning_rate": 2.3102060365473e-06, "loss": 0.3379, "step": 15255 }, { "epoch": 0.6904729576827336, "grad_norm": 0.27030553396271884, "learning_rate": 2.309588234183137e-06, "loss": 0.4506, "step": 15256 }, { "epoch": 0.6905182167911292, "grad_norm": 0.6383121590737316, "learning_rate": 2.3089704896276597e-06, "loss": 0.3188, "step": 15257 }, { "epoch": 0.6905634758995248, "grad_norm": 0.37044181031209134, "learning_rate": 2.3083528028941444e-06, "loss": 0.4898, "step": 15258 }, { "epoch": 0.6906087350079203, "grad_norm": 0.6686546014233246, "learning_rate": 2.30773517399586e-06, "loss": 0.3093, "step": 15259 }, { "epoch": 0.6906539941163159, "grad_norm": 0.5594686195477221, "learning_rate": 2.307117602946076e-06, "loss": 0.3016, "step": 15260 }, { "epoch": 0.6906992532247115, "grad_norm": 0.6476786146664553, "learning_rate": 2.306500089758065e-06, "loss": 0.283, "step": 15261 }, { "epoch": 0.6907445123331071, "grad_norm": 0.6184613319727662, "learning_rate": 2.3058826344450973e-06, "loss": 0.2755, "step": 15262 }, { "epoch": 0.6907897714415026, "grad_norm": 0.602580473203544, "learning_rate": 2.3052652370204344e-06, "loss": 0.267, "step": 15263 }, { "epoch": 0.6908350305498981, "grad_norm": 0.8129775661719012, "learning_rate": 2.304647897497345e-06, "loss": 0.308, "step": 15264 }, { "epoch": 0.6908802896582937, "grad_norm": 0.5952174745127535, "learning_rate": 2.3040306158890963e-06, "loss": 0.3276, "step": 15265 }, { "epoch": 0.6909255487666893, "grad_norm": 0.6401445111905103, "learning_rate": 2.3034133922089496e-06, "loss": 0.303, "step": 15266 }, { "epoch": 0.6909708078750849, "grad_norm": 0.6491990782272895, "learning_rate": 2.3027962264701654e-06, "loss": 0.3166, "step": 15267 }, { "epoch": 0.6910160669834804, "grad_norm": 0.6406130372218273, "learning_rate": 2.3021791186860078e-06, "loss": 0.3303, "step": 15268 }, { "epoch": 0.691061326091876, "grad_norm": 0.27208154366542836, "learning_rate": 2.3015620688697336e-06, "loss": 0.4585, "step": 15269 }, { "epoch": 0.6911065852002716, "grad_norm": 0.6524243568444674, "learning_rate": 2.300945077034605e-06, "loss": 0.2978, "step": 15270 }, { "epoch": 0.6911518443086672, "grad_norm": 0.6690897724061226, "learning_rate": 2.300328143193875e-06, "loss": 0.32, "step": 15271 }, { "epoch": 0.6911971034170626, "grad_norm": 0.608535062697799, "learning_rate": 2.2997112673608035e-06, "loss": 0.2847, "step": 15272 }, { "epoch": 0.6912423625254582, "grad_norm": 0.5510052701718511, "learning_rate": 2.299094449548642e-06, "loss": 0.3089, "step": 15273 }, { "epoch": 0.6912876216338538, "grad_norm": 0.62296168685443, "learning_rate": 2.298477689770648e-06, "loss": 0.3045, "step": 15274 }, { "epoch": 0.6913328807422494, "grad_norm": 0.6005738358702823, "learning_rate": 2.2978609880400706e-06, "loss": 0.3012, "step": 15275 }, { "epoch": 0.6913781398506449, "grad_norm": 0.6278202168048158, "learning_rate": 2.29724434437016e-06, "loss": 0.3577, "step": 15276 }, { "epoch": 0.6914233989590405, "grad_norm": 0.6911484776265677, "learning_rate": 2.296627758774167e-06, "loss": 0.2752, "step": 15277 }, { "epoch": 0.6914686580674361, "grad_norm": 0.2865799014938107, "learning_rate": 2.296011231265343e-06, "loss": 0.4458, "step": 15278 }, { "epoch": 0.6915139171758317, "grad_norm": 0.7099050953246633, "learning_rate": 2.2953947618569335e-06, "loss": 0.3224, "step": 15279 }, { "epoch": 0.6915591762842273, "grad_norm": 0.5985945647670636, "learning_rate": 2.2947783505621813e-06, "loss": 0.286, "step": 15280 }, { "epoch": 0.6916044353926227, "grad_norm": 0.6077100325384621, "learning_rate": 2.2941619973943363e-06, "loss": 0.3016, "step": 15281 }, { "epoch": 0.6916496945010183, "grad_norm": 0.7309766071163295, "learning_rate": 2.2935457023666375e-06, "loss": 0.3239, "step": 15282 }, { "epoch": 0.6916949536094139, "grad_norm": 0.5335062188242945, "learning_rate": 2.2929294654923313e-06, "loss": 0.2771, "step": 15283 }, { "epoch": 0.6917402127178095, "grad_norm": 0.6075102502850656, "learning_rate": 2.2923132867846564e-06, "loss": 0.298, "step": 15284 }, { "epoch": 0.691785471826205, "grad_norm": 0.4341561501175963, "learning_rate": 2.2916971662568514e-06, "loss": 0.4757, "step": 15285 }, { "epoch": 0.6918307309346006, "grad_norm": 0.6455224544042446, "learning_rate": 2.2910811039221564e-06, "loss": 0.3227, "step": 15286 }, { "epoch": 0.6918759900429962, "grad_norm": 0.28667936098983365, "learning_rate": 2.2904650997938105e-06, "loss": 0.4699, "step": 15287 }, { "epoch": 0.6919212491513917, "grad_norm": 0.28509191413429114, "learning_rate": 2.2898491538850478e-06, "loss": 0.4993, "step": 15288 }, { "epoch": 0.6919665082597872, "grad_norm": 0.6057133211093666, "learning_rate": 2.2892332662091017e-06, "loss": 0.2808, "step": 15289 }, { "epoch": 0.6920117673681828, "grad_norm": 0.5920501028162446, "learning_rate": 2.288617436779207e-06, "loss": 0.2905, "step": 15290 }, { "epoch": 0.6920570264765784, "grad_norm": 0.26799070501983296, "learning_rate": 2.2880016656085995e-06, "loss": 0.4701, "step": 15291 }, { "epoch": 0.692102285584974, "grad_norm": 0.30276461529003007, "learning_rate": 2.2873859527105037e-06, "loss": 0.4876, "step": 15292 }, { "epoch": 0.6921475446933696, "grad_norm": 0.5982779481081313, "learning_rate": 2.286770298098153e-06, "loss": 0.3028, "step": 15293 }, { "epoch": 0.6921928038017651, "grad_norm": 0.6289253461990337, "learning_rate": 2.286154701784776e-06, "loss": 0.3119, "step": 15294 }, { "epoch": 0.6922380629101607, "grad_norm": 0.6455845115964702, "learning_rate": 2.2855391637836006e-06, "loss": 0.3064, "step": 15295 }, { "epoch": 0.6922833220185562, "grad_norm": 0.6567945353620316, "learning_rate": 2.2849236841078496e-06, "loss": 0.2721, "step": 15296 }, { "epoch": 0.6923285811269518, "grad_norm": 0.5886357694478286, "learning_rate": 2.2843082627707517e-06, "loss": 0.2861, "step": 15297 }, { "epoch": 0.6923738402353473, "grad_norm": 0.7481216016016371, "learning_rate": 2.2836928997855274e-06, "loss": 0.3164, "step": 15298 }, { "epoch": 0.6924190993437429, "grad_norm": 0.6880285776070417, "learning_rate": 2.2830775951654018e-06, "loss": 0.3323, "step": 15299 }, { "epoch": 0.6924643584521385, "grad_norm": 0.5801076690074016, "learning_rate": 2.282462348923592e-06, "loss": 0.3055, "step": 15300 }, { "epoch": 0.6925096175605341, "grad_norm": 0.5879289946383263, "learning_rate": 2.281847161073322e-06, "loss": 0.283, "step": 15301 }, { "epoch": 0.6925548766689297, "grad_norm": 0.6179725409027124, "learning_rate": 2.2812320316278065e-06, "loss": 0.3125, "step": 15302 }, { "epoch": 0.6926001357773252, "grad_norm": 0.6156506331013871, "learning_rate": 2.2806169606002663e-06, "loss": 0.279, "step": 15303 }, { "epoch": 0.6926453948857207, "grad_norm": 1.2120338640195765, "learning_rate": 2.280001948003916e-06, "loss": 0.2796, "step": 15304 }, { "epoch": 0.6926906539941163, "grad_norm": 0.6522398121819206, "learning_rate": 2.279386993851968e-06, "loss": 0.3045, "step": 15305 }, { "epoch": 0.6927359131025119, "grad_norm": 0.6099418151117931, "learning_rate": 2.278772098157638e-06, "loss": 0.3168, "step": 15306 }, { "epoch": 0.6927811722109074, "grad_norm": 0.6312106783246817, "learning_rate": 2.2781572609341397e-06, "loss": 0.3295, "step": 15307 }, { "epoch": 0.692826431319303, "grad_norm": 0.6366029474353617, "learning_rate": 2.2775424821946824e-06, "loss": 0.3372, "step": 15308 }, { "epoch": 0.6928716904276986, "grad_norm": 0.6116300351638355, "learning_rate": 2.2769277619524737e-06, "loss": 0.2665, "step": 15309 }, { "epoch": 0.6929169495360942, "grad_norm": 0.5517062110299067, "learning_rate": 2.276313100220726e-06, "loss": 0.3119, "step": 15310 }, { "epoch": 0.6929622086444897, "grad_norm": 0.6117086962300063, "learning_rate": 2.275698497012643e-06, "loss": 0.3067, "step": 15311 }, { "epoch": 0.6930074677528852, "grad_norm": 0.6382350178222626, "learning_rate": 2.275083952341434e-06, "loss": 0.2745, "step": 15312 }, { "epoch": 0.6930527268612808, "grad_norm": 0.6304432415131873, "learning_rate": 2.2744694662203022e-06, "loss": 0.3727, "step": 15313 }, { "epoch": 0.6930979859696764, "grad_norm": 0.7568616098636195, "learning_rate": 2.273855038662448e-06, "loss": 0.2959, "step": 15314 }, { "epoch": 0.693143245078072, "grad_norm": 0.5968925730201458, "learning_rate": 2.2732406696810773e-06, "loss": 0.3061, "step": 15315 }, { "epoch": 0.6931885041864675, "grad_norm": 0.6491887672754614, "learning_rate": 2.2726263592893914e-06, "loss": 0.2965, "step": 15316 }, { "epoch": 0.6932337632948631, "grad_norm": 0.5861213051717736, "learning_rate": 2.2720121075005884e-06, "loss": 0.3298, "step": 15317 }, { "epoch": 0.6932790224032587, "grad_norm": 0.639189288756921, "learning_rate": 2.271397914327865e-06, "loss": 0.281, "step": 15318 }, { "epoch": 0.6933242815116543, "grad_norm": 0.5877308010010714, "learning_rate": 2.2707837797844208e-06, "loss": 0.2718, "step": 15319 }, { "epoch": 0.6933695406200497, "grad_norm": 0.32706793670548634, "learning_rate": 2.2701697038834543e-06, "loss": 0.4743, "step": 15320 }, { "epoch": 0.6934147997284453, "grad_norm": 0.5812374384104216, "learning_rate": 2.269555686638153e-06, "loss": 0.3242, "step": 15321 }, { "epoch": 0.6934600588368409, "grad_norm": 0.8841772008999619, "learning_rate": 2.268941728061714e-06, "loss": 0.3378, "step": 15322 }, { "epoch": 0.6935053179452365, "grad_norm": 0.6341474894444818, "learning_rate": 2.2683278281673315e-06, "loss": 0.2963, "step": 15323 }, { "epoch": 0.693550577053632, "grad_norm": 0.5919393409921639, "learning_rate": 2.2677139869681943e-06, "loss": 0.3238, "step": 15324 }, { "epoch": 0.6935958361620276, "grad_norm": 0.5823503974483093, "learning_rate": 2.2671002044774896e-06, "loss": 0.2791, "step": 15325 }, { "epoch": 0.6936410952704232, "grad_norm": 0.6825871993082148, "learning_rate": 2.266486480708411e-06, "loss": 0.3359, "step": 15326 }, { "epoch": 0.6936863543788188, "grad_norm": 0.6033434672171372, "learning_rate": 2.26587281567414e-06, "loss": 0.3062, "step": 15327 }, { "epoch": 0.6937316134872143, "grad_norm": 0.5871056512885556, "learning_rate": 2.265259209387867e-06, "loss": 0.2796, "step": 15328 }, { "epoch": 0.6937768725956098, "grad_norm": 0.2792552271003976, "learning_rate": 2.2646456618627723e-06, "loss": 0.4542, "step": 15329 }, { "epoch": 0.6938221317040054, "grad_norm": 0.6157049851124522, "learning_rate": 2.2640321731120434e-06, "loss": 0.3141, "step": 15330 }, { "epoch": 0.693867390812401, "grad_norm": 0.6200462231323365, "learning_rate": 2.2634187431488585e-06, "loss": 0.2843, "step": 15331 }, { "epoch": 0.6939126499207966, "grad_norm": 0.6690563909113196, "learning_rate": 2.262805371986402e-06, "loss": 0.3096, "step": 15332 }, { "epoch": 0.6939579090291921, "grad_norm": 0.5942516202410125, "learning_rate": 2.2621920596378503e-06, "loss": 0.3136, "step": 15333 }, { "epoch": 0.6940031681375877, "grad_norm": 0.6065599068867679, "learning_rate": 2.2615788061163824e-06, "loss": 0.2676, "step": 15334 }, { "epoch": 0.6940484272459833, "grad_norm": 0.6547981697132818, "learning_rate": 2.2609656114351745e-06, "loss": 0.2857, "step": 15335 }, { "epoch": 0.6940936863543788, "grad_norm": 0.6390839950958643, "learning_rate": 2.2603524756074057e-06, "loss": 0.36, "step": 15336 }, { "epoch": 0.6941389454627744, "grad_norm": 0.6215447818598099, "learning_rate": 2.2597393986462477e-06, "loss": 0.3148, "step": 15337 }, { "epoch": 0.6941842045711699, "grad_norm": 0.6120277468101288, "learning_rate": 2.2591263805648724e-06, "loss": 0.2996, "step": 15338 }, { "epoch": 0.6942294636795655, "grad_norm": 0.2806594600194946, "learning_rate": 2.258513421376455e-06, "loss": 0.4759, "step": 15339 }, { "epoch": 0.6942747227879611, "grad_norm": 0.6209858582212454, "learning_rate": 2.2579005210941622e-06, "loss": 0.3083, "step": 15340 }, { "epoch": 0.6943199818963567, "grad_norm": 0.6114612468150938, "learning_rate": 2.2572876797311676e-06, "loss": 0.3039, "step": 15341 }, { "epoch": 0.6943652410047522, "grad_norm": 0.29673365820543085, "learning_rate": 2.256674897300635e-06, "loss": 0.4914, "step": 15342 }, { "epoch": 0.6944105001131478, "grad_norm": 0.5557606997037232, "learning_rate": 2.2560621738157357e-06, "loss": 0.2851, "step": 15343 }, { "epoch": 0.6944557592215433, "grad_norm": 0.5788703839311166, "learning_rate": 2.2554495092896306e-06, "loss": 0.3078, "step": 15344 }, { "epoch": 0.6945010183299389, "grad_norm": 0.6749206715587992, "learning_rate": 2.254836903735488e-06, "loss": 0.3457, "step": 15345 }, { "epoch": 0.6945462774383344, "grad_norm": 0.6495157356241859, "learning_rate": 2.25422435716647e-06, "loss": 0.3052, "step": 15346 }, { "epoch": 0.69459153654673, "grad_norm": 0.6655681762682285, "learning_rate": 2.2536118695957353e-06, "loss": 0.3935, "step": 15347 }, { "epoch": 0.6946367956551256, "grad_norm": 0.6001421976661787, "learning_rate": 2.252999441036447e-06, "loss": 0.263, "step": 15348 }, { "epoch": 0.6946820547635212, "grad_norm": 0.616823676348002, "learning_rate": 2.252387071501767e-06, "loss": 0.3224, "step": 15349 }, { "epoch": 0.6947273138719168, "grad_norm": 0.6300197767755465, "learning_rate": 2.2517747610048467e-06, "loss": 0.3126, "step": 15350 }, { "epoch": 0.6947725729803123, "grad_norm": 0.6020949362258067, "learning_rate": 2.2511625095588465e-06, "loss": 0.2893, "step": 15351 }, { "epoch": 0.6948178320887078, "grad_norm": 0.8110079469776496, "learning_rate": 2.2505503171769233e-06, "loss": 0.2813, "step": 15352 }, { "epoch": 0.6948630911971034, "grad_norm": 0.6305017685210226, "learning_rate": 2.2499381838722296e-06, "loss": 0.3671, "step": 15353 }, { "epoch": 0.694908350305499, "grad_norm": 0.6055438844247768, "learning_rate": 2.2493261096579163e-06, "loss": 0.3377, "step": 15354 }, { "epoch": 0.6949536094138945, "grad_norm": 0.8846887760337592, "learning_rate": 2.2487140945471382e-06, "loss": 0.3407, "step": 15355 }, { "epoch": 0.6949988685222901, "grad_norm": 0.6130218136946555, "learning_rate": 2.2481021385530427e-06, "loss": 0.3077, "step": 15356 }, { "epoch": 0.6950441276306857, "grad_norm": 0.30618512419935756, "learning_rate": 2.2474902416887824e-06, "loss": 0.4541, "step": 15357 }, { "epoch": 0.6950893867390813, "grad_norm": 0.6349363257772199, "learning_rate": 2.246878403967501e-06, "loss": 0.331, "step": 15358 }, { "epoch": 0.6951346458474768, "grad_norm": 0.5705475206325165, "learning_rate": 2.2462666254023495e-06, "loss": 0.3305, "step": 15359 }, { "epoch": 0.6951799049558723, "grad_norm": 0.2784925673579084, "learning_rate": 2.2456549060064684e-06, "loss": 0.4916, "step": 15360 }, { "epoch": 0.6952251640642679, "grad_norm": 0.6430028863711853, "learning_rate": 2.245043245793006e-06, "loss": 0.3054, "step": 15361 }, { "epoch": 0.6952704231726635, "grad_norm": 0.5952655056608498, "learning_rate": 2.2444316447751034e-06, "loss": 0.3257, "step": 15362 }, { "epoch": 0.6953156822810591, "grad_norm": 0.6920910924343269, "learning_rate": 2.2438201029658995e-06, "loss": 0.3128, "step": 15363 }, { "epoch": 0.6953609413894546, "grad_norm": 0.5834875442704955, "learning_rate": 2.243208620378537e-06, "loss": 0.3147, "step": 15364 }, { "epoch": 0.6954062004978502, "grad_norm": 0.6003270635637863, "learning_rate": 2.2425971970261558e-06, "loss": 0.2993, "step": 15365 }, { "epoch": 0.6954514596062458, "grad_norm": 0.6027439237743176, "learning_rate": 2.2419858329218926e-06, "loss": 0.2991, "step": 15366 }, { "epoch": 0.6954967187146414, "grad_norm": 0.6248205418356746, "learning_rate": 2.2413745280788806e-06, "loss": 0.3246, "step": 15367 }, { "epoch": 0.6955419778230368, "grad_norm": 0.6615877596406995, "learning_rate": 2.2407632825102605e-06, "loss": 0.2491, "step": 15368 }, { "epoch": 0.6955872369314324, "grad_norm": 0.63773715869281, "learning_rate": 2.24015209622916e-06, "loss": 0.2661, "step": 15369 }, { "epoch": 0.695632496039828, "grad_norm": 0.5830994928670054, "learning_rate": 2.2395409692487174e-06, "loss": 0.2991, "step": 15370 }, { "epoch": 0.6956777551482236, "grad_norm": 0.2917371253576962, "learning_rate": 2.2389299015820592e-06, "loss": 0.4761, "step": 15371 }, { "epoch": 0.6957230142566192, "grad_norm": 0.64376924689846, "learning_rate": 2.2383188932423192e-06, "loss": 0.2921, "step": 15372 }, { "epoch": 0.6957682733650147, "grad_norm": 0.5848437032666506, "learning_rate": 2.237707944242623e-06, "loss": 0.3158, "step": 15373 }, { "epoch": 0.6958135324734103, "grad_norm": 0.5974157437942595, "learning_rate": 2.2370970545961005e-06, "loss": 0.3715, "step": 15374 }, { "epoch": 0.6958587915818059, "grad_norm": 0.29087052445388767, "learning_rate": 2.236486224315877e-06, "loss": 0.469, "step": 15375 }, { "epoch": 0.6959040506902014, "grad_norm": 0.605095854819963, "learning_rate": 2.2358754534150752e-06, "loss": 0.2879, "step": 15376 }, { "epoch": 0.6959493097985969, "grad_norm": 0.6365435641897976, "learning_rate": 2.2352647419068207e-06, "loss": 0.3192, "step": 15377 }, { "epoch": 0.6959945689069925, "grad_norm": 0.6548937754424179, "learning_rate": 2.2346540898042372e-06, "loss": 0.2828, "step": 15378 }, { "epoch": 0.6960398280153881, "grad_norm": 0.5831388735421803, "learning_rate": 2.2340434971204445e-06, "loss": 0.3149, "step": 15379 }, { "epoch": 0.6960850871237837, "grad_norm": 0.6870834276561678, "learning_rate": 2.2334329638685598e-06, "loss": 0.3101, "step": 15380 }, { "epoch": 0.6961303462321792, "grad_norm": 0.5934653026487168, "learning_rate": 2.2328224900617064e-06, "loss": 0.3087, "step": 15381 }, { "epoch": 0.6961756053405748, "grad_norm": 0.6883932566836034, "learning_rate": 2.2322120757129983e-06, "loss": 0.2841, "step": 15382 }, { "epoch": 0.6962208644489704, "grad_norm": 0.7244763280980958, "learning_rate": 2.2316017208355504e-06, "loss": 0.3141, "step": 15383 }, { "epoch": 0.696266123557366, "grad_norm": 0.6129160752040061, "learning_rate": 2.2309914254424807e-06, "loss": 0.2932, "step": 15384 }, { "epoch": 0.6963113826657615, "grad_norm": 0.26808067124315665, "learning_rate": 2.2303811895468996e-06, "loss": 0.4697, "step": 15385 }, { "epoch": 0.696356641774157, "grad_norm": 0.6290085659815462, "learning_rate": 2.2297710131619214e-06, "loss": 0.3376, "step": 15386 }, { "epoch": 0.6964019008825526, "grad_norm": 0.6568816515106904, "learning_rate": 2.229160896300655e-06, "loss": 0.3676, "step": 15387 }, { "epoch": 0.6964471599909482, "grad_norm": 0.610442344706643, "learning_rate": 2.228550838976213e-06, "loss": 0.2938, "step": 15388 }, { "epoch": 0.6964924190993438, "grad_norm": 0.2898365808540318, "learning_rate": 2.227940841201699e-06, "loss": 0.4745, "step": 15389 }, { "epoch": 0.6965376782077393, "grad_norm": 0.2822728999278438, "learning_rate": 2.227330902990225e-06, "loss": 0.4687, "step": 15390 }, { "epoch": 0.6965829373161349, "grad_norm": 0.5926488550372246, "learning_rate": 2.2267210243548943e-06, "loss": 0.2607, "step": 15391 }, { "epoch": 0.6966281964245304, "grad_norm": 0.6007425928292506, "learning_rate": 2.226111205308809e-06, "loss": 0.3012, "step": 15392 }, { "epoch": 0.696673455532926, "grad_norm": 0.2771887358013033, "learning_rate": 2.225501445865075e-06, "loss": 0.4933, "step": 15393 }, { "epoch": 0.6967187146413215, "grad_norm": 0.6469024784582776, "learning_rate": 2.224891746036795e-06, "loss": 0.2889, "step": 15394 }, { "epoch": 0.6967639737497171, "grad_norm": 0.2972666825183385, "learning_rate": 2.224282105837069e-06, "loss": 0.493, "step": 15395 }, { "epoch": 0.6968092328581127, "grad_norm": 0.6249787087006309, "learning_rate": 2.2236725252789933e-06, "loss": 0.2966, "step": 15396 }, { "epoch": 0.6968544919665083, "grad_norm": 0.28522173700529135, "learning_rate": 2.22306300437567e-06, "loss": 0.468, "step": 15397 }, { "epoch": 0.6968997510749039, "grad_norm": 0.6156039440420061, "learning_rate": 2.222453543140192e-06, "loss": 0.3143, "step": 15398 }, { "epoch": 0.6969450101832994, "grad_norm": 0.5954022875600979, "learning_rate": 2.221844141585659e-06, "loss": 0.3107, "step": 15399 }, { "epoch": 0.6969902692916949, "grad_norm": 0.6166093898007311, "learning_rate": 2.221234799725161e-06, "loss": 0.2811, "step": 15400 }, { "epoch": 0.6970355284000905, "grad_norm": 0.6630235280814255, "learning_rate": 2.220625517571795e-06, "loss": 0.3344, "step": 15401 }, { "epoch": 0.6970807875084861, "grad_norm": 0.6450303140316446, "learning_rate": 2.2200162951386477e-06, "loss": 0.3113, "step": 15402 }, { "epoch": 0.6971260466168816, "grad_norm": 0.6059302954765791, "learning_rate": 2.219407132438815e-06, "loss": 0.3176, "step": 15403 }, { "epoch": 0.6971713057252772, "grad_norm": 0.6265849775857281, "learning_rate": 2.2187980294853827e-06, "loss": 0.2914, "step": 15404 }, { "epoch": 0.6972165648336728, "grad_norm": 0.6272461141911604, "learning_rate": 2.2181889862914368e-06, "loss": 0.2895, "step": 15405 }, { "epoch": 0.6972618239420684, "grad_norm": 0.6329130486899887, "learning_rate": 2.217580002870066e-06, "loss": 0.309, "step": 15406 }, { "epoch": 0.697307083050464, "grad_norm": 0.6510575440133307, "learning_rate": 2.2169710792343574e-06, "loss": 0.3516, "step": 15407 }, { "epoch": 0.6973523421588594, "grad_norm": 0.6025879494225266, "learning_rate": 2.216362215397393e-06, "loss": 0.2705, "step": 15408 }, { "epoch": 0.697397601267255, "grad_norm": 0.6229692611246024, "learning_rate": 2.2157534113722533e-06, "loss": 0.3211, "step": 15409 }, { "epoch": 0.6974428603756506, "grad_norm": 0.6336128765611528, "learning_rate": 2.215144667172023e-06, "loss": 0.3557, "step": 15410 }, { "epoch": 0.6974881194840462, "grad_norm": 0.6089725873892945, "learning_rate": 2.21453598280978e-06, "loss": 0.3174, "step": 15411 }, { "epoch": 0.6975333785924417, "grad_norm": 0.6146045841194961, "learning_rate": 2.213927358298605e-06, "loss": 0.3422, "step": 15412 }, { "epoch": 0.6975786377008373, "grad_norm": 0.6473314379938416, "learning_rate": 2.213318793651573e-06, "loss": 0.2705, "step": 15413 }, { "epoch": 0.6976238968092329, "grad_norm": 0.5548174179736901, "learning_rate": 2.2127102888817626e-06, "loss": 0.2832, "step": 15414 }, { "epoch": 0.6976691559176285, "grad_norm": 0.6272400617526707, "learning_rate": 2.2121018440022458e-06, "loss": 0.3175, "step": 15415 }, { "epoch": 0.6977144150260239, "grad_norm": 0.2872778648761435, "learning_rate": 2.2114934590261e-06, "loss": 0.4648, "step": 15416 }, { "epoch": 0.6977596741344195, "grad_norm": 0.6501873735094622, "learning_rate": 2.2108851339663956e-06, "loss": 0.3486, "step": 15417 }, { "epoch": 0.6978049332428151, "grad_norm": 0.5714597352968046, "learning_rate": 2.210276868836202e-06, "loss": 0.2585, "step": 15418 }, { "epoch": 0.6978501923512107, "grad_norm": 0.6699368490383701, "learning_rate": 2.209668663648592e-06, "loss": 0.2976, "step": 15419 }, { "epoch": 0.6978954514596063, "grad_norm": 0.5839244450998203, "learning_rate": 2.2090605184166325e-06, "loss": 0.3516, "step": 15420 }, { "epoch": 0.6979407105680018, "grad_norm": 0.6601736114108441, "learning_rate": 2.208452433153389e-06, "loss": 0.2602, "step": 15421 }, { "epoch": 0.6979859696763974, "grad_norm": 1.2962557190320656, "learning_rate": 2.207844407871929e-06, "loss": 0.2763, "step": 15422 }, { "epoch": 0.698031228784793, "grad_norm": 0.3335447019667144, "learning_rate": 2.2072364425853193e-06, "loss": 0.475, "step": 15423 }, { "epoch": 0.6980764878931885, "grad_norm": 0.6271978005182927, "learning_rate": 2.206628537306621e-06, "loss": 0.3245, "step": 15424 }, { "epoch": 0.698121747001584, "grad_norm": 0.5811086521395183, "learning_rate": 2.206020692048895e-06, "loss": 0.2691, "step": 15425 }, { "epoch": 0.6981670061099796, "grad_norm": 0.5722795124330649, "learning_rate": 2.2054129068252037e-06, "loss": 0.2459, "step": 15426 }, { "epoch": 0.6982122652183752, "grad_norm": 0.6096023565887093, "learning_rate": 2.2048051816486054e-06, "loss": 0.2928, "step": 15427 }, { "epoch": 0.6982575243267708, "grad_norm": 0.294609066488518, "learning_rate": 2.2041975165321606e-06, "loss": 0.4718, "step": 15428 }, { "epoch": 0.6983027834351663, "grad_norm": 0.2535641837168702, "learning_rate": 2.2035899114889226e-06, "loss": 0.4586, "step": 15429 }, { "epoch": 0.6983480425435619, "grad_norm": 0.5771085270607175, "learning_rate": 2.2029823665319504e-06, "loss": 0.272, "step": 15430 }, { "epoch": 0.6983933016519575, "grad_norm": 0.6769738222692228, "learning_rate": 2.2023748816742955e-06, "loss": 0.3222, "step": 15431 }, { "epoch": 0.698438560760353, "grad_norm": 0.6033882521564669, "learning_rate": 2.201767456929014e-06, "loss": 0.2761, "step": 15432 }, { "epoch": 0.6984838198687486, "grad_norm": 0.5714842277367986, "learning_rate": 2.2011600923091554e-06, "loss": 0.3192, "step": 15433 }, { "epoch": 0.6985290789771441, "grad_norm": 0.6509570167193155, "learning_rate": 2.200552787827768e-06, "loss": 0.3089, "step": 15434 }, { "epoch": 0.6985743380855397, "grad_norm": 0.6189100093640094, "learning_rate": 2.1999455434979046e-06, "loss": 0.2903, "step": 15435 }, { "epoch": 0.6986195971939353, "grad_norm": 0.6344851280611853, "learning_rate": 2.1993383593326127e-06, "loss": 0.3026, "step": 15436 }, { "epoch": 0.6986648563023309, "grad_norm": 0.27780582832440787, "learning_rate": 2.1987312353449386e-06, "loss": 0.4913, "step": 15437 }, { "epoch": 0.6987101154107264, "grad_norm": 0.28615854234003973, "learning_rate": 2.1981241715479247e-06, "loss": 0.4925, "step": 15438 }, { "epoch": 0.698755374519122, "grad_norm": 0.5812592354372973, "learning_rate": 2.1975171679546187e-06, "loss": 0.2965, "step": 15439 }, { "epoch": 0.6988006336275175, "grad_norm": 0.6557422880930702, "learning_rate": 2.1969102245780592e-06, "loss": 0.3421, "step": 15440 }, { "epoch": 0.6988458927359131, "grad_norm": 0.6649927896003363, "learning_rate": 2.196303341431293e-06, "loss": 0.269, "step": 15441 }, { "epoch": 0.6988911518443087, "grad_norm": 0.5648141574982591, "learning_rate": 2.1956965185273545e-06, "loss": 0.3044, "step": 15442 }, { "epoch": 0.6989364109527042, "grad_norm": 0.6455390831857626, "learning_rate": 2.1950897558792873e-06, "loss": 0.2672, "step": 15443 }, { "epoch": 0.6989816700610998, "grad_norm": 0.65669426469483, "learning_rate": 2.1944830535001244e-06, "loss": 0.3157, "step": 15444 }, { "epoch": 0.6990269291694954, "grad_norm": 0.6604232285727459, "learning_rate": 2.193876411402906e-06, "loss": 0.2951, "step": 15445 }, { "epoch": 0.699072188277891, "grad_norm": 0.6278571713730251, "learning_rate": 2.193269829600665e-06, "loss": 0.3403, "step": 15446 }, { "epoch": 0.6991174473862865, "grad_norm": 0.2877594596193233, "learning_rate": 2.1926633081064336e-06, "loss": 0.4824, "step": 15447 }, { "epoch": 0.699162706494682, "grad_norm": 0.626805222348979, "learning_rate": 2.1920568469332458e-06, "loss": 0.2951, "step": 15448 }, { "epoch": 0.6992079656030776, "grad_norm": 0.6295785599758981, "learning_rate": 2.191450446094136e-06, "loss": 0.3224, "step": 15449 }, { "epoch": 0.6992532247114732, "grad_norm": 0.9393405038196936, "learning_rate": 2.190844105602127e-06, "loss": 0.283, "step": 15450 }, { "epoch": 0.6992984838198687, "grad_norm": 0.33121434047560755, "learning_rate": 2.19023782547025e-06, "loss": 0.4909, "step": 15451 }, { "epoch": 0.6993437429282643, "grad_norm": 0.3220401310269192, "learning_rate": 2.1896316057115343e-06, "loss": 0.4659, "step": 15452 }, { "epoch": 0.6993890020366599, "grad_norm": 0.5645794389594799, "learning_rate": 2.189025446339004e-06, "loss": 0.266, "step": 15453 }, { "epoch": 0.6994342611450555, "grad_norm": 0.6312858299620019, "learning_rate": 2.1884193473656824e-06, "loss": 0.2731, "step": 15454 }, { "epoch": 0.6994795202534511, "grad_norm": 0.6166425710591585, "learning_rate": 2.187813308804595e-06, "loss": 0.3496, "step": 15455 }, { "epoch": 0.6995247793618465, "grad_norm": 0.6094685675555125, "learning_rate": 2.1872073306687614e-06, "loss": 0.3373, "step": 15456 }, { "epoch": 0.6995700384702421, "grad_norm": 0.6105705990820928, "learning_rate": 2.186601412971205e-06, "loss": 0.3242, "step": 15457 }, { "epoch": 0.6996152975786377, "grad_norm": 0.2643205914938593, "learning_rate": 2.185995555724942e-06, "loss": 0.4515, "step": 15458 }, { "epoch": 0.6996605566870333, "grad_norm": 0.29166666670093994, "learning_rate": 2.1853897589429935e-06, "loss": 0.4682, "step": 15459 }, { "epoch": 0.6997058157954288, "grad_norm": 0.6264830100800826, "learning_rate": 2.184784022638373e-06, "loss": 0.3121, "step": 15460 }, { "epoch": 0.6997510749038244, "grad_norm": 0.25817546316852996, "learning_rate": 2.184178346824099e-06, "loss": 0.4483, "step": 15461 }, { "epoch": 0.69979633401222, "grad_norm": 0.2978882566016645, "learning_rate": 2.1835727315131842e-06, "loss": 0.4954, "step": 15462 }, { "epoch": 0.6998415931206156, "grad_norm": 0.5901124855544712, "learning_rate": 2.18296717671864e-06, "loss": 0.3194, "step": 15463 }, { "epoch": 0.699886852229011, "grad_norm": 0.28291400965652297, "learning_rate": 2.1823616824534788e-06, "loss": 0.4612, "step": 15464 }, { "epoch": 0.6999321113374066, "grad_norm": 0.6461112710167152, "learning_rate": 2.181756248730714e-06, "loss": 0.2949, "step": 15465 }, { "epoch": 0.6999773704458022, "grad_norm": 0.5991463451115281, "learning_rate": 2.1811508755633508e-06, "loss": 0.3271, "step": 15466 }, { "epoch": 0.7000226295541978, "grad_norm": 0.6766384769010018, "learning_rate": 2.1805455629643966e-06, "loss": 0.255, "step": 15467 }, { "epoch": 0.7000678886625934, "grad_norm": 0.6061024812032464, "learning_rate": 2.179940310946861e-06, "loss": 0.2896, "step": 15468 }, { "epoch": 0.7001131477709889, "grad_norm": 0.6709462130279129, "learning_rate": 2.179335119523745e-06, "loss": 0.2962, "step": 15469 }, { "epoch": 0.7001584068793845, "grad_norm": 0.581660104951167, "learning_rate": 2.178729988708056e-06, "loss": 0.2924, "step": 15470 }, { "epoch": 0.70020366598778, "grad_norm": 0.6539112408764716, "learning_rate": 2.178124918512793e-06, "loss": 0.2999, "step": 15471 }, { "epoch": 0.7002489250961756, "grad_norm": 0.6104589647548964, "learning_rate": 2.17751990895096e-06, "loss": 0.2721, "step": 15472 }, { "epoch": 0.7002941842045711, "grad_norm": 0.6344018666873753, "learning_rate": 2.1769149600355545e-06, "loss": 0.2883, "step": 15473 }, { "epoch": 0.7003394433129667, "grad_norm": 0.8703735890057297, "learning_rate": 2.176310071779577e-06, "loss": 0.2954, "step": 15474 }, { "epoch": 0.7003847024213623, "grad_norm": 0.6420105953615923, "learning_rate": 2.1757052441960248e-06, "loss": 0.2981, "step": 15475 }, { "epoch": 0.7004299615297579, "grad_norm": 0.5941777634412325, "learning_rate": 2.17510047729789e-06, "loss": 0.2762, "step": 15476 }, { "epoch": 0.7004752206381534, "grad_norm": 0.3041590428798745, "learning_rate": 2.174495771098171e-06, "loss": 0.4802, "step": 15477 }, { "epoch": 0.700520479746549, "grad_norm": 0.3065766008871142, "learning_rate": 2.173891125609863e-06, "loss": 0.4989, "step": 15478 }, { "epoch": 0.7005657388549446, "grad_norm": 0.6260164462093338, "learning_rate": 2.1732865408459508e-06, "loss": 0.3296, "step": 15479 }, { "epoch": 0.7006109979633401, "grad_norm": 0.2776070640653133, "learning_rate": 2.17268201681943e-06, "loss": 0.4473, "step": 15480 }, { "epoch": 0.7006562570717357, "grad_norm": 0.2922294395288578, "learning_rate": 2.172077553543291e-06, "loss": 0.4879, "step": 15481 }, { "epoch": 0.7007015161801312, "grad_norm": 0.6019312043691404, "learning_rate": 2.17147315103052e-06, "loss": 0.2829, "step": 15482 }, { "epoch": 0.7007467752885268, "grad_norm": 0.5946946099216283, "learning_rate": 2.1708688092941018e-06, "loss": 0.3021, "step": 15483 }, { "epoch": 0.7007920343969224, "grad_norm": 0.6213603603657362, "learning_rate": 2.1702645283470238e-06, "loss": 0.3011, "step": 15484 }, { "epoch": 0.700837293505318, "grad_norm": 0.6604124753553283, "learning_rate": 2.169660308202272e-06, "loss": 0.3158, "step": 15485 }, { "epoch": 0.7008825526137135, "grad_norm": 0.6684013995648608, "learning_rate": 2.169056148872828e-06, "loss": 0.2895, "step": 15486 }, { "epoch": 0.700927811722109, "grad_norm": 0.7528060434075988, "learning_rate": 2.1684520503716704e-06, "loss": 0.2915, "step": 15487 }, { "epoch": 0.7009730708305046, "grad_norm": 0.31181609176172187, "learning_rate": 2.167848012711784e-06, "loss": 0.4931, "step": 15488 }, { "epoch": 0.7010183299389002, "grad_norm": 0.5794370482748491, "learning_rate": 2.1672440359061435e-06, "loss": 0.3021, "step": 15489 }, { "epoch": 0.7010635890472958, "grad_norm": 0.6148117834325464, "learning_rate": 2.16664011996773e-06, "loss": 0.3133, "step": 15490 }, { "epoch": 0.7011088481556913, "grad_norm": 0.7677040014486417, "learning_rate": 2.166036264909519e-06, "loss": 0.3024, "step": 15491 }, { "epoch": 0.7011541072640869, "grad_norm": 0.6786446701334466, "learning_rate": 2.165432470744483e-06, "loss": 0.2839, "step": 15492 }, { "epoch": 0.7011993663724825, "grad_norm": 0.8929527677804295, "learning_rate": 2.164828737485597e-06, "loss": 0.3138, "step": 15493 }, { "epoch": 0.7012446254808781, "grad_norm": 0.6217136165511883, "learning_rate": 2.164225065145836e-06, "loss": 0.3477, "step": 15494 }, { "epoch": 0.7012898845892735, "grad_norm": 0.6117178180181198, "learning_rate": 2.163621453738168e-06, "loss": 0.3093, "step": 15495 }, { "epoch": 0.7013351436976691, "grad_norm": 0.6523515263755088, "learning_rate": 2.1630179032755632e-06, "loss": 0.2741, "step": 15496 }, { "epoch": 0.7013804028060647, "grad_norm": 0.5729455223348102, "learning_rate": 2.1624144137709917e-06, "loss": 0.3055, "step": 15497 }, { "epoch": 0.7014256619144603, "grad_norm": 0.6059087224660069, "learning_rate": 2.161810985237418e-06, "loss": 0.3363, "step": 15498 }, { "epoch": 0.7014709210228558, "grad_norm": 0.7130322049678949, "learning_rate": 2.1612076176878112e-06, "loss": 0.3404, "step": 15499 }, { "epoch": 0.7015161801312514, "grad_norm": 0.5802648354575368, "learning_rate": 2.1606043111351316e-06, "loss": 0.2398, "step": 15500 }, { "epoch": 0.701561439239647, "grad_norm": 0.7169971190891021, "learning_rate": 2.160001065592347e-06, "loss": 0.2982, "step": 15501 }, { "epoch": 0.7016066983480426, "grad_norm": 0.696332156180076, "learning_rate": 2.1593978810724152e-06, "loss": 0.3027, "step": 15502 }, { "epoch": 0.7016519574564382, "grad_norm": 0.6148654487309713, "learning_rate": 2.158794757588301e-06, "loss": 0.2887, "step": 15503 }, { "epoch": 0.7016972165648336, "grad_norm": 0.3234783939738736, "learning_rate": 2.1581916951529606e-06, "loss": 0.479, "step": 15504 }, { "epoch": 0.7017424756732292, "grad_norm": 0.5654991950174835, "learning_rate": 2.1575886937793515e-06, "loss": 0.3061, "step": 15505 }, { "epoch": 0.7017877347816248, "grad_norm": 0.6185639792057311, "learning_rate": 2.1569857534804317e-06, "loss": 0.2862, "step": 15506 }, { "epoch": 0.7018329938900204, "grad_norm": 0.5872142467018157, "learning_rate": 2.1563828742691597e-06, "loss": 0.2907, "step": 15507 }, { "epoch": 0.7018782529984159, "grad_norm": 0.6251172930466563, "learning_rate": 2.1557800561584822e-06, "loss": 0.2905, "step": 15508 }, { "epoch": 0.7019235121068115, "grad_norm": 0.6491345308673889, "learning_rate": 2.155177299161357e-06, "loss": 0.3231, "step": 15509 }, { "epoch": 0.7019687712152071, "grad_norm": 0.6886456035695668, "learning_rate": 2.154574603290735e-06, "loss": 0.3149, "step": 15510 }, { "epoch": 0.7020140303236027, "grad_norm": 0.6153866555546205, "learning_rate": 2.1539719685595665e-06, "loss": 0.3017, "step": 15511 }, { "epoch": 0.7020592894319981, "grad_norm": 0.6256755519011062, "learning_rate": 2.153369394980798e-06, "loss": 0.3578, "step": 15512 }, { "epoch": 0.7021045485403937, "grad_norm": 0.6207121681626878, "learning_rate": 2.1527668825673777e-06, "loss": 0.3478, "step": 15513 }, { "epoch": 0.7021498076487893, "grad_norm": 0.634240642136747, "learning_rate": 2.1521644313322543e-06, "loss": 0.3382, "step": 15514 }, { "epoch": 0.7021950667571849, "grad_norm": 0.6182497417181321, "learning_rate": 2.151562041288371e-06, "loss": 0.3213, "step": 15515 }, { "epoch": 0.7022403258655805, "grad_norm": 0.6069266273627872, "learning_rate": 2.1509597124486693e-06, "loss": 0.2757, "step": 15516 }, { "epoch": 0.702285584973976, "grad_norm": 0.568023784096722, "learning_rate": 2.150357444826095e-06, "loss": 0.2847, "step": 15517 }, { "epoch": 0.7023308440823716, "grad_norm": 0.617320839569645, "learning_rate": 2.1497552384335858e-06, "loss": 0.3185, "step": 15518 }, { "epoch": 0.7023761031907672, "grad_norm": 0.6363743882498056, "learning_rate": 2.1491530932840835e-06, "loss": 0.3063, "step": 15519 }, { "epoch": 0.7024213622991627, "grad_norm": 0.6209437669725376, "learning_rate": 2.1485510093905264e-06, "loss": 0.3117, "step": 15520 }, { "epoch": 0.7024666214075582, "grad_norm": 0.6466039291351509, "learning_rate": 2.147948986765849e-06, "loss": 0.3298, "step": 15521 }, { "epoch": 0.7025118805159538, "grad_norm": 0.2965267650217191, "learning_rate": 2.147347025422988e-06, "loss": 0.5006, "step": 15522 }, { "epoch": 0.7025571396243494, "grad_norm": 0.6130321389581915, "learning_rate": 2.1467451253748797e-06, "loss": 0.2877, "step": 15523 }, { "epoch": 0.702602398732745, "grad_norm": 0.6827378240286927, "learning_rate": 2.1461432866344554e-06, "loss": 0.2878, "step": 15524 }, { "epoch": 0.7026476578411406, "grad_norm": 0.5873439345678003, "learning_rate": 2.145541509214646e-06, "loss": 0.311, "step": 15525 }, { "epoch": 0.7026929169495361, "grad_norm": 0.6193308607911314, "learning_rate": 2.1449397931283838e-06, "loss": 0.3204, "step": 15526 }, { "epoch": 0.7027381760579317, "grad_norm": 0.6274669568283813, "learning_rate": 2.1443381383885954e-06, "loss": 0.3135, "step": 15527 }, { "epoch": 0.7027834351663272, "grad_norm": 0.5998583644959756, "learning_rate": 2.1437365450082114e-06, "loss": 0.2914, "step": 15528 }, { "epoch": 0.7028286942747228, "grad_norm": 0.6134254482027904, "learning_rate": 2.1431350130001556e-06, "loss": 0.2954, "step": 15529 }, { "epoch": 0.7028739533831183, "grad_norm": 0.28329977300242243, "learning_rate": 2.142533542377355e-06, "loss": 0.4972, "step": 15530 }, { "epoch": 0.7029192124915139, "grad_norm": 0.6315642879116592, "learning_rate": 2.1419321331527317e-06, "loss": 0.3163, "step": 15531 }, { "epoch": 0.7029644715999095, "grad_norm": 0.6288753910599849, "learning_rate": 2.14133078533921e-06, "loss": 0.3264, "step": 15532 }, { "epoch": 0.7030097307083051, "grad_norm": 0.6167214538896445, "learning_rate": 2.14072949894971e-06, "loss": 0.3117, "step": 15533 }, { "epoch": 0.7030549898167006, "grad_norm": 0.5920196063320219, "learning_rate": 2.14012827399715e-06, "loss": 0.2891, "step": 15534 }, { "epoch": 0.7031002489250961, "grad_norm": 0.5728835353448037, "learning_rate": 2.13952711049445e-06, "loss": 0.2853, "step": 15535 }, { "epoch": 0.7031455080334917, "grad_norm": 0.6498149796461337, "learning_rate": 2.1389260084545305e-06, "loss": 0.3132, "step": 15536 }, { "epoch": 0.7031907671418873, "grad_norm": 0.60638016947381, "learning_rate": 2.1383249678903006e-06, "loss": 0.3054, "step": 15537 }, { "epoch": 0.7032360262502829, "grad_norm": 0.2768250776447379, "learning_rate": 2.1377239888146785e-06, "loss": 0.4889, "step": 15538 }, { "epoch": 0.7032812853586784, "grad_norm": 0.647992038999156, "learning_rate": 2.1371230712405783e-06, "loss": 0.3623, "step": 15539 }, { "epoch": 0.703326544467074, "grad_norm": 0.7498658647713098, "learning_rate": 2.1365222151809106e-06, "loss": 0.3067, "step": 15540 }, { "epoch": 0.7033718035754696, "grad_norm": 0.5874224222939307, "learning_rate": 2.1359214206485845e-06, "loss": 0.2784, "step": 15541 }, { "epoch": 0.7034170626838652, "grad_norm": 0.6189769199261504, "learning_rate": 2.135320687656511e-06, "loss": 0.3168, "step": 15542 }, { "epoch": 0.7034623217922606, "grad_norm": 0.6490191871652556, "learning_rate": 2.1347200162175984e-06, "loss": 0.3384, "step": 15543 }, { "epoch": 0.7035075809006562, "grad_norm": 0.6092680751785646, "learning_rate": 2.1341194063447533e-06, "loss": 0.335, "step": 15544 }, { "epoch": 0.7035528400090518, "grad_norm": 0.6465144265028875, "learning_rate": 2.133518858050879e-06, "loss": 0.3049, "step": 15545 }, { "epoch": 0.7035980991174474, "grad_norm": 0.42971251058278587, "learning_rate": 2.132918371348882e-06, "loss": 0.4656, "step": 15546 }, { "epoch": 0.7036433582258429, "grad_norm": 0.5763015261511008, "learning_rate": 2.132317946251662e-06, "loss": 0.3116, "step": 15547 }, { "epoch": 0.7036886173342385, "grad_norm": 0.7014396526280077, "learning_rate": 2.1317175827721238e-06, "loss": 0.2937, "step": 15548 }, { "epoch": 0.7037338764426341, "grad_norm": 0.6105683067560459, "learning_rate": 2.131117280923165e-06, "loss": 0.3095, "step": 15549 }, { "epoch": 0.7037791355510297, "grad_norm": 0.5806980703941906, "learning_rate": 2.1305170407176836e-06, "loss": 0.3185, "step": 15550 }, { "epoch": 0.7038243946594253, "grad_norm": 0.3146629447531045, "learning_rate": 2.1299168621685775e-06, "loss": 0.4511, "step": 15551 }, { "epoch": 0.7038696537678207, "grad_norm": 0.5856947087391554, "learning_rate": 2.1293167452887452e-06, "loss": 0.3012, "step": 15552 }, { "epoch": 0.7039149128762163, "grad_norm": 0.69512091931565, "learning_rate": 2.1287166900910796e-06, "loss": 0.295, "step": 15553 }, { "epoch": 0.7039601719846119, "grad_norm": 0.5998239511118214, "learning_rate": 2.1281166965884715e-06, "loss": 0.3547, "step": 15554 }, { "epoch": 0.7040054310930075, "grad_norm": 0.7292774262951063, "learning_rate": 2.1275167647938153e-06, "loss": 0.3277, "step": 15555 }, { "epoch": 0.704050690201403, "grad_norm": 0.610099937979236, "learning_rate": 2.1269168947200043e-06, "loss": 0.2779, "step": 15556 }, { "epoch": 0.7040959493097986, "grad_norm": 0.7000545008934641, "learning_rate": 2.126317086379925e-06, "loss": 0.3535, "step": 15557 }, { "epoch": 0.7041412084181942, "grad_norm": 0.26531741359570854, "learning_rate": 2.1257173397864635e-06, "loss": 0.4646, "step": 15558 }, { "epoch": 0.7041864675265898, "grad_norm": 0.6500995480765627, "learning_rate": 2.1251176549525102e-06, "loss": 0.3249, "step": 15559 }, { "epoch": 0.7042317266349853, "grad_norm": 0.5911001356915474, "learning_rate": 2.1245180318909482e-06, "loss": 0.3052, "step": 15560 }, { "epoch": 0.7042769857433808, "grad_norm": 0.6448318552907646, "learning_rate": 2.123918470614663e-06, "loss": 0.3311, "step": 15561 }, { "epoch": 0.7043222448517764, "grad_norm": 0.5916943962480607, "learning_rate": 2.1233189711365374e-06, "loss": 0.2825, "step": 15562 }, { "epoch": 0.704367503960172, "grad_norm": 0.5972384349849474, "learning_rate": 2.12271953346945e-06, "loss": 0.3075, "step": 15563 }, { "epoch": 0.7044127630685676, "grad_norm": 0.7624894797514498, "learning_rate": 2.1221201576262828e-06, "loss": 0.3396, "step": 15564 }, { "epoch": 0.7044580221769631, "grad_norm": 0.5869299786308838, "learning_rate": 2.121520843619917e-06, "loss": 0.294, "step": 15565 }, { "epoch": 0.7045032812853587, "grad_norm": 0.5884655200661071, "learning_rate": 2.1209215914632275e-06, "loss": 0.2832, "step": 15566 }, { "epoch": 0.7045485403937543, "grad_norm": 0.27608380802360893, "learning_rate": 2.120322401169088e-06, "loss": 0.4624, "step": 15567 }, { "epoch": 0.7045937995021498, "grad_norm": 0.6114286440045849, "learning_rate": 2.119723272750379e-06, "loss": 0.2897, "step": 15568 }, { "epoch": 0.7046390586105453, "grad_norm": 0.6040466508722457, "learning_rate": 2.1191242062199695e-06, "loss": 0.295, "step": 15569 }, { "epoch": 0.7046843177189409, "grad_norm": 0.7752228122059384, "learning_rate": 2.118525201590732e-06, "loss": 0.2781, "step": 15570 }, { "epoch": 0.7047295768273365, "grad_norm": 0.6012127086414305, "learning_rate": 2.117926258875538e-06, "loss": 0.3222, "step": 15571 }, { "epoch": 0.7047748359357321, "grad_norm": 0.28231026926098374, "learning_rate": 2.1173273780872584e-06, "loss": 0.4625, "step": 15572 }, { "epoch": 0.7048200950441277, "grad_norm": 0.6151721387865137, "learning_rate": 2.11672855923876e-06, "loss": 0.3222, "step": 15573 }, { "epoch": 0.7048653541525232, "grad_norm": 1.0305312559650226, "learning_rate": 2.1161298023429076e-06, "loss": 0.2739, "step": 15574 }, { "epoch": 0.7049106132609188, "grad_norm": 0.615711634922465, "learning_rate": 2.1155311074125713e-06, "loss": 0.2865, "step": 15575 }, { "epoch": 0.7049558723693143, "grad_norm": 0.6168852774937957, "learning_rate": 2.1149324744606103e-06, "loss": 0.2861, "step": 15576 }, { "epoch": 0.7050011314777099, "grad_norm": 0.6119238649528917, "learning_rate": 2.114333903499891e-06, "loss": 0.3326, "step": 15577 }, { "epoch": 0.7050463905861054, "grad_norm": 0.5819637721539274, "learning_rate": 2.1137353945432743e-06, "loss": 0.3255, "step": 15578 }, { "epoch": 0.705091649694501, "grad_norm": 0.6676049772112951, "learning_rate": 2.1131369476036173e-06, "loss": 0.3066, "step": 15579 }, { "epoch": 0.7051369088028966, "grad_norm": 0.2675541874957772, "learning_rate": 2.1125385626937806e-06, "loss": 0.4758, "step": 15580 }, { "epoch": 0.7051821679112922, "grad_norm": 0.6290051475284912, "learning_rate": 2.111940239826624e-06, "loss": 0.2666, "step": 15581 }, { "epoch": 0.7052274270196877, "grad_norm": 0.5945427288601011, "learning_rate": 2.1113419790150017e-06, "loss": 0.3403, "step": 15582 }, { "epoch": 0.7052726861280832, "grad_norm": 0.2695891017928327, "learning_rate": 2.1107437802717667e-06, "loss": 0.4439, "step": 15583 }, { "epoch": 0.7053179452364788, "grad_norm": 0.2815561171450684, "learning_rate": 2.1101456436097744e-06, "loss": 0.4736, "step": 15584 }, { "epoch": 0.7053632043448744, "grad_norm": 0.6002777685049623, "learning_rate": 2.109547569041878e-06, "loss": 0.3111, "step": 15585 }, { "epoch": 0.70540846345327, "grad_norm": 0.6074427839267478, "learning_rate": 2.1089495565809274e-06, "loss": 0.2802, "step": 15586 }, { "epoch": 0.7054537225616655, "grad_norm": 0.2618171024604309, "learning_rate": 2.10835160623977e-06, "loss": 0.4811, "step": 15587 }, { "epoch": 0.7054989816700611, "grad_norm": 0.6403282477430244, "learning_rate": 2.1077537180312568e-06, "loss": 0.2957, "step": 15588 }, { "epoch": 0.7055442407784567, "grad_norm": 0.6931161483217249, "learning_rate": 2.107155891968232e-06, "loss": 0.2813, "step": 15589 }, { "epoch": 0.7055894998868523, "grad_norm": 0.5854855835134954, "learning_rate": 2.106558128063544e-06, "loss": 0.2764, "step": 15590 }, { "epoch": 0.7056347589952477, "grad_norm": 0.9400059804734506, "learning_rate": 2.1059604263300354e-06, "loss": 0.2293, "step": 15591 }, { "epoch": 0.7056800181036433, "grad_norm": 0.31758819314786835, "learning_rate": 2.105362786780547e-06, "loss": 0.448, "step": 15592 }, { "epoch": 0.7057252772120389, "grad_norm": 0.59255163939248, "learning_rate": 2.104765209427922e-06, "loss": 0.2823, "step": 15593 }, { "epoch": 0.7057705363204345, "grad_norm": 0.6202984057488541, "learning_rate": 2.1041676942850025e-06, "loss": 0.2659, "step": 15594 }, { "epoch": 0.7058157954288301, "grad_norm": 0.6665940338013063, "learning_rate": 2.1035702413646257e-06, "loss": 0.3219, "step": 15595 }, { "epoch": 0.7058610545372256, "grad_norm": 0.2709211805243884, "learning_rate": 2.1029728506796266e-06, "loss": 0.462, "step": 15596 }, { "epoch": 0.7059063136456212, "grad_norm": 0.6551666991759286, "learning_rate": 2.1023755222428434e-06, "loss": 0.3088, "step": 15597 }, { "epoch": 0.7059515727540168, "grad_norm": 0.6156762722283183, "learning_rate": 2.1017782560671124e-06, "loss": 0.3216, "step": 15598 }, { "epoch": 0.7059968318624124, "grad_norm": 0.4609435977100646, "learning_rate": 2.101181052165266e-06, "loss": 0.4734, "step": 15599 }, { "epoch": 0.7060420909708078, "grad_norm": 0.6868399069144914, "learning_rate": 2.1005839105501336e-06, "loss": 0.3084, "step": 15600 }, { "epoch": 0.7060873500792034, "grad_norm": 0.6540307934496004, "learning_rate": 2.09998683123455e-06, "loss": 0.3307, "step": 15601 }, { "epoch": 0.706132609187599, "grad_norm": 0.5804650262670074, "learning_rate": 2.0993898142313428e-06, "loss": 0.2532, "step": 15602 }, { "epoch": 0.7061778682959946, "grad_norm": 0.7045137113564987, "learning_rate": 2.098792859553338e-06, "loss": 0.3121, "step": 15603 }, { "epoch": 0.7062231274043901, "grad_norm": 0.5985304281397166, "learning_rate": 2.0981959672133663e-06, "loss": 0.2671, "step": 15604 }, { "epoch": 0.7062683865127857, "grad_norm": 0.6932785443502347, "learning_rate": 2.0975991372242488e-06, "loss": 0.3095, "step": 15605 }, { "epoch": 0.7063136456211813, "grad_norm": 0.25596252597725394, "learning_rate": 2.097002369598814e-06, "loss": 0.4578, "step": 15606 }, { "epoch": 0.7063589047295769, "grad_norm": 0.6023667287025952, "learning_rate": 2.096405664349882e-06, "loss": 0.3176, "step": 15607 }, { "epoch": 0.7064041638379724, "grad_norm": 0.6669780970769399, "learning_rate": 2.095809021490273e-06, "loss": 0.3647, "step": 15608 }, { "epoch": 0.7064494229463679, "grad_norm": 0.27753170005461786, "learning_rate": 2.0952124410328085e-06, "loss": 0.4931, "step": 15609 }, { "epoch": 0.7064946820547635, "grad_norm": 0.5896931641117975, "learning_rate": 2.094615922990309e-06, "loss": 0.294, "step": 15610 }, { "epoch": 0.7065399411631591, "grad_norm": 0.28245895056221954, "learning_rate": 2.0940194673755903e-06, "loss": 0.4577, "step": 15611 }, { "epoch": 0.7065852002715547, "grad_norm": 0.5821755169282244, "learning_rate": 2.0934230742014666e-06, "loss": 0.2881, "step": 15612 }, { "epoch": 0.7066304593799502, "grad_norm": 0.6095114697705112, "learning_rate": 2.0928267434807537e-06, "loss": 0.3145, "step": 15613 }, { "epoch": 0.7066757184883458, "grad_norm": 0.5966172519027911, "learning_rate": 2.0922304752262672e-06, "loss": 0.3091, "step": 15614 }, { "epoch": 0.7067209775967414, "grad_norm": 0.5854903933345119, "learning_rate": 2.0916342694508177e-06, "loss": 0.2981, "step": 15615 }, { "epoch": 0.7067662367051369, "grad_norm": 0.6476592475962757, "learning_rate": 2.0910381261672136e-06, "loss": 0.303, "step": 15616 }, { "epoch": 0.7068114958135324, "grad_norm": 0.6178026593378579, "learning_rate": 2.0904420453882675e-06, "loss": 0.3085, "step": 15617 }, { "epoch": 0.706856754921928, "grad_norm": 0.6411289753975102, "learning_rate": 2.089846027126784e-06, "loss": 0.3193, "step": 15618 }, { "epoch": 0.7069020140303236, "grad_norm": 0.5891169438856212, "learning_rate": 2.089250071395573e-06, "loss": 0.2949, "step": 15619 }, { "epoch": 0.7069472731387192, "grad_norm": 0.2940271428331134, "learning_rate": 2.088654178207439e-06, "loss": 0.4793, "step": 15620 }, { "epoch": 0.7069925322471148, "grad_norm": 0.3054544662909863, "learning_rate": 2.088058347575183e-06, "loss": 0.4955, "step": 15621 }, { "epoch": 0.7070377913555103, "grad_norm": 0.6651374294296094, "learning_rate": 2.08746257951161e-06, "loss": 0.3594, "step": 15622 }, { "epoch": 0.7070830504639058, "grad_norm": 0.7210944144731016, "learning_rate": 2.0868668740295223e-06, "loss": 0.3299, "step": 15623 }, { "epoch": 0.7071283095723014, "grad_norm": 0.5831154354291413, "learning_rate": 2.086271231141719e-06, "loss": 0.3078, "step": 15624 }, { "epoch": 0.707173568680697, "grad_norm": 0.623507882297109, "learning_rate": 2.085675650860996e-06, "loss": 0.3114, "step": 15625 }, { "epoch": 0.7072188277890925, "grad_norm": 0.6069100613245061, "learning_rate": 2.0850801332001526e-06, "loss": 0.3274, "step": 15626 }, { "epoch": 0.7072640868974881, "grad_norm": 0.2759847739560776, "learning_rate": 2.0844846781719865e-06, "loss": 0.463, "step": 15627 }, { "epoch": 0.7073093460058837, "grad_norm": 0.6771389749984982, "learning_rate": 2.0838892857892908e-06, "loss": 0.2949, "step": 15628 }, { "epoch": 0.7073546051142793, "grad_norm": 0.5745668071147314, "learning_rate": 2.0832939560648557e-06, "loss": 0.2532, "step": 15629 }, { "epoch": 0.7073998642226749, "grad_norm": 0.7284067278697884, "learning_rate": 2.0826986890114775e-06, "loss": 0.3186, "step": 15630 }, { "epoch": 0.7074451233310703, "grad_norm": 0.6221647222093457, "learning_rate": 2.082103484641943e-06, "loss": 0.3153, "step": 15631 }, { "epoch": 0.7074903824394659, "grad_norm": 0.6522605474785989, "learning_rate": 2.0815083429690445e-06, "loss": 0.3593, "step": 15632 }, { "epoch": 0.7075356415478615, "grad_norm": 0.6066986056777576, "learning_rate": 2.0809132640055685e-06, "loss": 0.2701, "step": 15633 }, { "epoch": 0.7075809006562571, "grad_norm": 0.5604974164989961, "learning_rate": 2.080318247764299e-06, "loss": 0.2929, "step": 15634 }, { "epoch": 0.7076261597646526, "grad_norm": 0.5917143842030397, "learning_rate": 2.0797232942580238e-06, "loss": 0.2509, "step": 15635 }, { "epoch": 0.7076714188730482, "grad_norm": 0.6229136734813554, "learning_rate": 2.0791284034995296e-06, "loss": 0.3029, "step": 15636 }, { "epoch": 0.7077166779814438, "grad_norm": 0.734163918066207, "learning_rate": 2.0785335755015913e-06, "loss": 0.3006, "step": 15637 }, { "epoch": 0.7077619370898394, "grad_norm": 0.4989460637768117, "learning_rate": 2.077938810276994e-06, "loss": 0.4719, "step": 15638 }, { "epoch": 0.7078071961982348, "grad_norm": 0.6273674589339869, "learning_rate": 2.0773441078385194e-06, "loss": 0.3453, "step": 15639 }, { "epoch": 0.7078524553066304, "grad_norm": 0.6310211789960349, "learning_rate": 2.076749468198943e-06, "loss": 0.3269, "step": 15640 }, { "epoch": 0.707897714415026, "grad_norm": 0.6198212516711802, "learning_rate": 2.076154891371041e-06, "loss": 0.2853, "step": 15641 }, { "epoch": 0.7079429735234216, "grad_norm": 0.6392808620470593, "learning_rate": 2.0755603773675905e-06, "loss": 0.3448, "step": 15642 }, { "epoch": 0.7079882326318172, "grad_norm": 0.278762441922395, "learning_rate": 2.0749659262013676e-06, "loss": 0.4709, "step": 15643 }, { "epoch": 0.7080334917402127, "grad_norm": 0.2769252676380643, "learning_rate": 2.074371537885143e-06, "loss": 0.4739, "step": 15644 }, { "epoch": 0.7080787508486083, "grad_norm": 0.5528948171061779, "learning_rate": 2.0737772124316872e-06, "loss": 0.2756, "step": 15645 }, { "epoch": 0.7081240099570039, "grad_norm": 0.6144812745521786, "learning_rate": 2.0731829498537743e-06, "loss": 0.2993, "step": 15646 }, { "epoch": 0.7081692690653995, "grad_norm": 0.6165657421144701, "learning_rate": 2.072588750164168e-06, "loss": 0.3328, "step": 15647 }, { "epoch": 0.7082145281737949, "grad_norm": 0.617449276444202, "learning_rate": 2.071994613375641e-06, "loss": 0.3008, "step": 15648 }, { "epoch": 0.7082597872821905, "grad_norm": 0.6203225191777019, "learning_rate": 2.0714005395009566e-06, "loss": 0.3041, "step": 15649 }, { "epoch": 0.7083050463905861, "grad_norm": 0.671178645171008, "learning_rate": 2.0708065285528784e-06, "loss": 0.3275, "step": 15650 }, { "epoch": 0.7083503054989817, "grad_norm": 0.29431578101845934, "learning_rate": 2.070212580544172e-06, "loss": 0.4803, "step": 15651 }, { "epoch": 0.7083955646073772, "grad_norm": 0.666418692678892, "learning_rate": 2.0696186954876002e-06, "loss": 0.2571, "step": 15652 }, { "epoch": 0.7084408237157728, "grad_norm": 0.6247469527950302, "learning_rate": 2.0690248733959235e-06, "loss": 0.2972, "step": 15653 }, { "epoch": 0.7084860828241684, "grad_norm": 0.623897382602735, "learning_rate": 2.068431114281898e-06, "loss": 0.3069, "step": 15654 }, { "epoch": 0.708531341932564, "grad_norm": 0.5754801717126035, "learning_rate": 2.0678374181582845e-06, "loss": 0.2809, "step": 15655 }, { "epoch": 0.7085766010409595, "grad_norm": 0.6202131802305537, "learning_rate": 2.0672437850378414e-06, "loss": 0.2959, "step": 15656 }, { "epoch": 0.708621860149355, "grad_norm": 0.6962650105072428, "learning_rate": 2.0666502149333215e-06, "loss": 0.3044, "step": 15657 }, { "epoch": 0.7086671192577506, "grad_norm": 0.5738380545478389, "learning_rate": 2.066056707857478e-06, "loss": 0.2375, "step": 15658 }, { "epoch": 0.7087123783661462, "grad_norm": 0.6153974552831404, "learning_rate": 2.0654632638230664e-06, "loss": 0.3182, "step": 15659 }, { "epoch": 0.7087576374745418, "grad_norm": 0.583783207426821, "learning_rate": 2.064869882842835e-06, "loss": 0.3539, "step": 15660 }, { "epoch": 0.7088028965829373, "grad_norm": 0.5867962921817134, "learning_rate": 2.064276564929537e-06, "loss": 0.3274, "step": 15661 }, { "epoch": 0.7088481556913329, "grad_norm": 0.6622066610058321, "learning_rate": 2.0636833100959198e-06, "loss": 0.2961, "step": 15662 }, { "epoch": 0.7088934147997284, "grad_norm": 0.68034769054094, "learning_rate": 2.0630901183547274e-06, "loss": 0.3433, "step": 15663 }, { "epoch": 0.708938673908124, "grad_norm": 0.6299499173817064, "learning_rate": 2.0624969897187084e-06, "loss": 0.2824, "step": 15664 }, { "epoch": 0.7089839330165196, "grad_norm": 0.6134710115056257, "learning_rate": 2.0619039242006117e-06, "loss": 0.2903, "step": 15665 }, { "epoch": 0.7090291921249151, "grad_norm": 1.1167269850382753, "learning_rate": 2.0613109218131717e-06, "loss": 0.2736, "step": 15666 }, { "epoch": 0.7090744512333107, "grad_norm": 0.5868181423205776, "learning_rate": 2.0607179825691344e-06, "loss": 0.2655, "step": 15667 }, { "epoch": 0.7091197103417063, "grad_norm": 0.6804760253555822, "learning_rate": 2.0601251064812407e-06, "loss": 0.2824, "step": 15668 }, { "epoch": 0.7091649694501019, "grad_norm": 0.6415825775100403, "learning_rate": 2.0595322935622326e-06, "loss": 0.3331, "step": 15669 }, { "epoch": 0.7092102285584974, "grad_norm": 0.9332292806580279, "learning_rate": 2.058939543824841e-06, "loss": 0.2952, "step": 15670 }, { "epoch": 0.709255487666893, "grad_norm": 0.6043048873822964, "learning_rate": 2.058346857281806e-06, "loss": 0.3002, "step": 15671 }, { "epoch": 0.7093007467752885, "grad_norm": 0.6590343921393856, "learning_rate": 2.0577542339458647e-06, "loss": 0.3137, "step": 15672 }, { "epoch": 0.7093460058836841, "grad_norm": 0.29385629964947496, "learning_rate": 2.0571616738297473e-06, "loss": 0.4802, "step": 15673 }, { "epoch": 0.7093912649920796, "grad_norm": 0.6251899858791947, "learning_rate": 2.0565691769461865e-06, "loss": 0.3158, "step": 15674 }, { "epoch": 0.7094365241004752, "grad_norm": 0.5689479115374646, "learning_rate": 2.0559767433079154e-06, "loss": 0.3026, "step": 15675 }, { "epoch": 0.7094817832088708, "grad_norm": 0.5731850317292073, "learning_rate": 2.0553843729276606e-06, "loss": 0.2944, "step": 15676 }, { "epoch": 0.7095270423172664, "grad_norm": 0.6013672663083953, "learning_rate": 2.0547920658181535e-06, "loss": 0.309, "step": 15677 }, { "epoch": 0.709572301425662, "grad_norm": 0.5966705722753385, "learning_rate": 2.0541998219921194e-06, "loss": 0.2835, "step": 15678 }, { "epoch": 0.7096175605340574, "grad_norm": 0.6065622374492098, "learning_rate": 2.0536076414622824e-06, "loss": 0.2486, "step": 15679 }, { "epoch": 0.709662819642453, "grad_norm": 0.6093719632820457, "learning_rate": 2.0530155242413676e-06, "loss": 0.3338, "step": 15680 }, { "epoch": 0.7097080787508486, "grad_norm": 0.7299538787359219, "learning_rate": 2.0524234703421003e-06, "loss": 0.2503, "step": 15681 }, { "epoch": 0.7097533378592442, "grad_norm": 0.6195740147006733, "learning_rate": 2.0518314797771993e-06, "loss": 0.3176, "step": 15682 }, { "epoch": 0.7097985969676397, "grad_norm": 0.27778375446200076, "learning_rate": 2.0512395525593842e-06, "loss": 0.4632, "step": 15683 }, { "epoch": 0.7098438560760353, "grad_norm": 0.31267117915988685, "learning_rate": 2.050647688701374e-06, "loss": 0.4892, "step": 15684 }, { "epoch": 0.7098891151844309, "grad_norm": 0.5625030089547218, "learning_rate": 2.050055888215889e-06, "loss": 0.3681, "step": 15685 }, { "epoch": 0.7099343742928265, "grad_norm": 0.5762043814346973, "learning_rate": 2.0494641511156426e-06, "loss": 0.2921, "step": 15686 }, { "epoch": 0.7099796334012219, "grad_norm": 0.677688149072792, "learning_rate": 2.048872477413348e-06, "loss": 0.3002, "step": 15687 }, { "epoch": 0.7100248925096175, "grad_norm": 0.2675888295042279, "learning_rate": 2.048280867121722e-06, "loss": 0.4714, "step": 15688 }, { "epoch": 0.7100701516180131, "grad_norm": 0.610043033065155, "learning_rate": 2.0476893202534726e-06, "loss": 0.2863, "step": 15689 }, { "epoch": 0.7101154107264087, "grad_norm": 0.6019687341898033, "learning_rate": 2.0470978368213145e-06, "loss": 0.2699, "step": 15690 }, { "epoch": 0.7101606698348043, "grad_norm": 0.6557212029775732, "learning_rate": 2.0465064168379547e-06, "loss": 0.3047, "step": 15691 }, { "epoch": 0.7102059289431998, "grad_norm": 0.5681907011018104, "learning_rate": 2.0459150603160993e-06, "loss": 0.3123, "step": 15692 }, { "epoch": 0.7102511880515954, "grad_norm": 0.6937559347912634, "learning_rate": 2.045323767268456e-06, "loss": 0.3232, "step": 15693 }, { "epoch": 0.710296447159991, "grad_norm": 0.7626075879462596, "learning_rate": 2.0447325377077344e-06, "loss": 0.3281, "step": 15694 }, { "epoch": 0.7103417062683866, "grad_norm": 0.6171676917923269, "learning_rate": 2.0441413716466308e-06, "loss": 0.3237, "step": 15695 }, { "epoch": 0.710386965376782, "grad_norm": 0.5615951852382349, "learning_rate": 2.0435502690978502e-06, "loss": 0.2728, "step": 15696 }, { "epoch": 0.7104322244851776, "grad_norm": 0.6073481684244535, "learning_rate": 2.0429592300740945e-06, "loss": 0.272, "step": 15697 }, { "epoch": 0.7104774835935732, "grad_norm": 0.5990557902507999, "learning_rate": 2.042368254588067e-06, "loss": 0.2885, "step": 15698 }, { "epoch": 0.7105227427019688, "grad_norm": 0.5806910031766905, "learning_rate": 2.0417773426524583e-06, "loss": 0.2918, "step": 15699 }, { "epoch": 0.7105680018103643, "grad_norm": 0.6167601568081476, "learning_rate": 2.0411864942799685e-06, "loss": 0.3183, "step": 15700 }, { "epoch": 0.7106132609187599, "grad_norm": 0.2933210594741521, "learning_rate": 2.0405957094832962e-06, "loss": 0.4806, "step": 15701 }, { "epoch": 0.7106585200271555, "grad_norm": 0.6009201878774669, "learning_rate": 2.0400049882751327e-06, "loss": 0.2852, "step": 15702 }, { "epoch": 0.710703779135551, "grad_norm": 0.5691961115059543, "learning_rate": 2.0394143306681692e-06, "loss": 0.3211, "step": 15703 }, { "epoch": 0.7107490382439466, "grad_norm": 0.6433385542199535, "learning_rate": 2.0388237366751005e-06, "loss": 0.313, "step": 15704 }, { "epoch": 0.7107942973523421, "grad_norm": 0.6882102875117379, "learning_rate": 2.038233206308614e-06, "loss": 0.312, "step": 15705 }, { "epoch": 0.7108395564607377, "grad_norm": 0.6308695404554812, "learning_rate": 2.037642739581401e-06, "loss": 0.2823, "step": 15706 }, { "epoch": 0.7108848155691333, "grad_norm": 0.6075481401338595, "learning_rate": 2.0370523365061473e-06, "loss": 0.2824, "step": 15707 }, { "epoch": 0.7109300746775289, "grad_norm": 0.6553470517522016, "learning_rate": 2.0364619970955373e-06, "loss": 0.3136, "step": 15708 }, { "epoch": 0.7109753337859244, "grad_norm": 0.6008238589049834, "learning_rate": 2.035871721362257e-06, "loss": 0.3161, "step": 15709 }, { "epoch": 0.71102059289432, "grad_norm": 0.6521871065838722, "learning_rate": 2.0352815093189913e-06, "loss": 0.2807, "step": 15710 }, { "epoch": 0.7110658520027155, "grad_norm": 1.0025184314929678, "learning_rate": 2.0346913609784215e-06, "loss": 0.2905, "step": 15711 }, { "epoch": 0.7111111111111111, "grad_norm": 0.7522931147926903, "learning_rate": 2.0341012763532243e-06, "loss": 0.3228, "step": 15712 }, { "epoch": 0.7111563702195067, "grad_norm": 0.6231508704120658, "learning_rate": 2.033511255456082e-06, "loss": 0.3222, "step": 15713 }, { "epoch": 0.7112016293279022, "grad_norm": 0.6309365311711861, "learning_rate": 2.032921298299674e-06, "loss": 0.2979, "step": 15714 }, { "epoch": 0.7112468884362978, "grad_norm": 0.624392999154907, "learning_rate": 2.0323314048966737e-06, "loss": 0.2789, "step": 15715 }, { "epoch": 0.7112921475446934, "grad_norm": 0.579559880412466, "learning_rate": 2.031741575259756e-06, "loss": 0.3034, "step": 15716 }, { "epoch": 0.711337406653089, "grad_norm": 0.5873497811127301, "learning_rate": 2.031151809401597e-06, "loss": 0.3132, "step": 15717 }, { "epoch": 0.7113826657614845, "grad_norm": 0.6213809208434222, "learning_rate": 2.030562107334866e-06, "loss": 0.3197, "step": 15718 }, { "epoch": 0.71142792486988, "grad_norm": 0.2944410583813787, "learning_rate": 2.0299724690722367e-06, "loss": 0.4771, "step": 15719 }, { "epoch": 0.7114731839782756, "grad_norm": 0.6200291281083014, "learning_rate": 2.029382894626378e-06, "loss": 0.2686, "step": 15720 }, { "epoch": 0.7115184430866712, "grad_norm": 0.6471888141535328, "learning_rate": 2.028793384009955e-06, "loss": 0.2802, "step": 15721 }, { "epoch": 0.7115637021950667, "grad_norm": 0.5992679305582995, "learning_rate": 2.028203937235637e-06, "loss": 0.3292, "step": 15722 }, { "epoch": 0.7116089613034623, "grad_norm": 0.6208222626265416, "learning_rate": 2.0276145543160923e-06, "loss": 0.3402, "step": 15723 }, { "epoch": 0.7116542204118579, "grad_norm": 0.5213881130170567, "learning_rate": 2.027025235263979e-06, "loss": 0.2572, "step": 15724 }, { "epoch": 0.7116994795202535, "grad_norm": 0.6372870238486473, "learning_rate": 2.0264359800919626e-06, "loss": 0.3361, "step": 15725 }, { "epoch": 0.7117447386286491, "grad_norm": 0.5644816426982812, "learning_rate": 2.0258467888127036e-06, "loss": 0.289, "step": 15726 }, { "epoch": 0.7117899977370445, "grad_norm": 0.6133526414307099, "learning_rate": 2.0252576614388668e-06, "loss": 0.3023, "step": 15727 }, { "epoch": 0.7118352568454401, "grad_norm": 0.635948868871442, "learning_rate": 2.024668597983103e-06, "loss": 0.3226, "step": 15728 }, { "epoch": 0.7118805159538357, "grad_norm": 0.6566520390761876, "learning_rate": 2.0240795984580734e-06, "loss": 0.315, "step": 15729 }, { "epoch": 0.7119257750622313, "grad_norm": 0.6218129037138104, "learning_rate": 2.023490662876435e-06, "loss": 0.2813, "step": 15730 }, { "epoch": 0.7119710341706268, "grad_norm": 0.6490607702973659, "learning_rate": 2.0229017912508403e-06, "loss": 0.3203, "step": 15731 }, { "epoch": 0.7120162932790224, "grad_norm": 0.6511603840546198, "learning_rate": 2.022312983593941e-06, "loss": 0.3047, "step": 15732 }, { "epoch": 0.712061552387418, "grad_norm": 0.6033531094960397, "learning_rate": 2.021724239918392e-06, "loss": 0.2785, "step": 15733 }, { "epoch": 0.7121068114958136, "grad_norm": 0.60674720041964, "learning_rate": 2.0211355602368404e-06, "loss": 0.3141, "step": 15734 }, { "epoch": 0.712152070604209, "grad_norm": 0.9254902853327204, "learning_rate": 2.0205469445619386e-06, "loss": 0.3585, "step": 15735 }, { "epoch": 0.7121973297126046, "grad_norm": 0.5968363646845488, "learning_rate": 2.019958392906332e-06, "loss": 0.3517, "step": 15736 }, { "epoch": 0.7122425888210002, "grad_norm": 0.6486568418701398, "learning_rate": 2.0193699052826656e-06, "loss": 0.3078, "step": 15737 }, { "epoch": 0.7122878479293958, "grad_norm": 0.6678475248324102, "learning_rate": 2.0187814817035855e-06, "loss": 0.2922, "step": 15738 }, { "epoch": 0.7123331070377914, "grad_norm": 0.7596394560633785, "learning_rate": 2.018193122181737e-06, "loss": 0.2679, "step": 15739 }, { "epoch": 0.7123783661461869, "grad_norm": 0.5864552075821262, "learning_rate": 2.0176048267297603e-06, "loss": 0.3269, "step": 15740 }, { "epoch": 0.7124236252545825, "grad_norm": 1.2647697192982412, "learning_rate": 2.0170165953602944e-06, "loss": 0.4661, "step": 15741 }, { "epoch": 0.7124688843629781, "grad_norm": 0.6058089185740559, "learning_rate": 2.0164284280859803e-06, "loss": 0.3029, "step": 15742 }, { "epoch": 0.7125141434713737, "grad_norm": 0.6800456990515935, "learning_rate": 2.015840324919458e-06, "loss": 0.3031, "step": 15743 }, { "epoch": 0.7125594025797691, "grad_norm": 0.6160556600016963, "learning_rate": 2.0152522858733613e-06, "loss": 0.2906, "step": 15744 }, { "epoch": 0.7126046616881647, "grad_norm": 0.6190587645711407, "learning_rate": 2.0146643109603247e-06, "loss": 0.315, "step": 15745 }, { "epoch": 0.7126499207965603, "grad_norm": 0.630589774663842, "learning_rate": 2.0140764001929853e-06, "loss": 0.3072, "step": 15746 }, { "epoch": 0.7126951799049559, "grad_norm": 0.27009444682988265, "learning_rate": 2.0134885535839714e-06, "loss": 0.4883, "step": 15747 }, { "epoch": 0.7127404390133515, "grad_norm": 0.5805411200121087, "learning_rate": 2.012900771145918e-06, "loss": 0.2955, "step": 15748 }, { "epoch": 0.712785698121747, "grad_norm": 0.6036597094340551, "learning_rate": 2.012313052891453e-06, "loss": 0.2941, "step": 15749 }, { "epoch": 0.7128309572301426, "grad_norm": 0.29160376342527783, "learning_rate": 2.0117253988332023e-06, "loss": 0.4879, "step": 15750 }, { "epoch": 0.7128762163385381, "grad_norm": 0.287614861835318, "learning_rate": 2.0111378089837958e-06, "loss": 0.4641, "step": 15751 }, { "epoch": 0.7129214754469337, "grad_norm": 0.6776791947833849, "learning_rate": 2.010550283355861e-06, "loss": 0.3494, "step": 15752 }, { "epoch": 0.7129667345553292, "grad_norm": 0.6812298987323966, "learning_rate": 2.009962821962016e-06, "loss": 0.2833, "step": 15753 }, { "epoch": 0.7130119936637248, "grad_norm": 0.6090574363970508, "learning_rate": 2.009375424814886e-06, "loss": 0.2879, "step": 15754 }, { "epoch": 0.7130572527721204, "grad_norm": 0.8689524193398631, "learning_rate": 2.0087880919270943e-06, "loss": 0.3274, "step": 15755 }, { "epoch": 0.713102511880516, "grad_norm": 0.5948898312977335, "learning_rate": 2.008200823311263e-06, "loss": 0.2807, "step": 15756 }, { "epoch": 0.7131477709889115, "grad_norm": 0.6558636463613668, "learning_rate": 2.0076136189800033e-06, "loss": 0.3436, "step": 15757 }, { "epoch": 0.7131930300973071, "grad_norm": 0.851091129043898, "learning_rate": 2.0070264789459365e-06, "loss": 0.2648, "step": 15758 }, { "epoch": 0.7132382892057026, "grad_norm": 0.5796179561464531, "learning_rate": 2.0064394032216807e-06, "loss": 0.299, "step": 15759 }, { "epoch": 0.7132835483140982, "grad_norm": 0.5994608703240536, "learning_rate": 2.0058523918198473e-06, "loss": 0.3189, "step": 15760 }, { "epoch": 0.7133288074224938, "grad_norm": 0.5952886279712135, "learning_rate": 2.0052654447530497e-06, "loss": 0.3101, "step": 15761 }, { "epoch": 0.7133740665308893, "grad_norm": 0.5411759893169642, "learning_rate": 2.004678562033901e-06, "loss": 0.2733, "step": 15762 }, { "epoch": 0.7134193256392849, "grad_norm": 0.6305517774581135, "learning_rate": 2.004091743675009e-06, "loss": 0.2811, "step": 15763 }, { "epoch": 0.7134645847476805, "grad_norm": 0.622821270648253, "learning_rate": 2.0035049896889857e-06, "loss": 0.2973, "step": 15764 }, { "epoch": 0.7135098438560761, "grad_norm": 0.6579762622647377, "learning_rate": 2.0029183000884372e-06, "loss": 0.3082, "step": 15765 }, { "epoch": 0.7135551029644716, "grad_norm": 0.5941769016688686, "learning_rate": 2.0023316748859683e-06, "loss": 0.289, "step": 15766 }, { "epoch": 0.7136003620728671, "grad_norm": 0.6074341860587897, "learning_rate": 2.0017451140941848e-06, "loss": 0.299, "step": 15767 }, { "epoch": 0.7136456211812627, "grad_norm": 0.5944365416986104, "learning_rate": 2.001158617725692e-06, "loss": 0.2788, "step": 15768 }, { "epoch": 0.7136908802896583, "grad_norm": 0.6734313146626568, "learning_rate": 2.0005721857930902e-06, "loss": 0.2867, "step": 15769 }, { "epoch": 0.7137361393980538, "grad_norm": 0.6120330385710467, "learning_rate": 1.999985818308979e-06, "loss": 0.2697, "step": 15770 }, { "epoch": 0.7137813985064494, "grad_norm": 0.6424849521515572, "learning_rate": 1.9993995152859574e-06, "loss": 0.2846, "step": 15771 }, { "epoch": 0.713826657614845, "grad_norm": 0.592428537896021, "learning_rate": 1.9988132767366274e-06, "loss": 0.3511, "step": 15772 }, { "epoch": 0.7138719167232406, "grad_norm": 0.6274610076103312, "learning_rate": 1.9982271026735822e-06, "loss": 0.293, "step": 15773 }, { "epoch": 0.7139171758316362, "grad_norm": 0.6506363639159314, "learning_rate": 1.997640993109416e-06, "loss": 0.3029, "step": 15774 }, { "epoch": 0.7139624349400316, "grad_norm": 0.6224530901843206, "learning_rate": 1.9970549480567253e-06, "loss": 0.3086, "step": 15775 }, { "epoch": 0.7140076940484272, "grad_norm": 0.5956593042207174, "learning_rate": 1.9964689675280993e-06, "loss": 0.2699, "step": 15776 }, { "epoch": 0.7140529531568228, "grad_norm": 0.308828138671443, "learning_rate": 1.9958830515361323e-06, "loss": 0.4754, "step": 15777 }, { "epoch": 0.7140982122652184, "grad_norm": 0.6760202791887676, "learning_rate": 1.995297200093412e-06, "loss": 0.2883, "step": 15778 }, { "epoch": 0.7141434713736139, "grad_norm": 0.6687713757079061, "learning_rate": 1.9947114132125243e-06, "loss": 0.335, "step": 15779 }, { "epoch": 0.7141887304820095, "grad_norm": 0.6688648010934541, "learning_rate": 1.994125690906059e-06, "loss": 0.2982, "step": 15780 }, { "epoch": 0.7142339895904051, "grad_norm": 0.6302850774865625, "learning_rate": 1.993540033186602e-06, "loss": 0.263, "step": 15781 }, { "epoch": 0.7142792486988007, "grad_norm": 0.28680344474268504, "learning_rate": 1.9929544400667366e-06, "loss": 0.4811, "step": 15782 }, { "epoch": 0.7143245078071963, "grad_norm": 0.6167589499171111, "learning_rate": 1.9923689115590428e-06, "loss": 0.3014, "step": 15783 }, { "epoch": 0.7143697669155917, "grad_norm": 0.5816952636059396, "learning_rate": 1.9917834476761037e-06, "loss": 0.3096, "step": 15784 }, { "epoch": 0.7144150260239873, "grad_norm": 0.6636120045872163, "learning_rate": 1.9911980484305017e-06, "loss": 0.3257, "step": 15785 }, { "epoch": 0.7144602851323829, "grad_norm": 0.6286370620860274, "learning_rate": 1.9906127138348123e-06, "loss": 0.2956, "step": 15786 }, { "epoch": 0.7145055442407785, "grad_norm": 0.6780622703223456, "learning_rate": 1.9900274439016116e-06, "loss": 0.2681, "step": 15787 }, { "epoch": 0.714550803349174, "grad_norm": 0.7062114812695381, "learning_rate": 1.989442238643478e-06, "loss": 0.3265, "step": 15788 }, { "epoch": 0.7145960624575696, "grad_norm": 0.5841561044833753, "learning_rate": 1.9888570980729847e-06, "loss": 0.2714, "step": 15789 }, { "epoch": 0.7146413215659652, "grad_norm": 0.7220036200359434, "learning_rate": 1.9882720222027026e-06, "loss": 0.3009, "step": 15790 }, { "epoch": 0.7146865806743607, "grad_norm": 0.7006671521470127, "learning_rate": 1.9876870110452066e-06, "loss": 0.2785, "step": 15791 }, { "epoch": 0.7147318397827562, "grad_norm": 0.5872154904076039, "learning_rate": 1.9871020646130633e-06, "loss": 0.2899, "step": 15792 }, { "epoch": 0.7147770988911518, "grad_norm": 0.596507452041044, "learning_rate": 1.9865171829188455e-06, "loss": 0.2957, "step": 15793 }, { "epoch": 0.7148223579995474, "grad_norm": 0.6760547872145583, "learning_rate": 1.9859323659751178e-06, "loss": 0.3272, "step": 15794 }, { "epoch": 0.714867617107943, "grad_norm": 0.6936074449471855, "learning_rate": 1.985347613794445e-06, "loss": 0.3072, "step": 15795 }, { "epoch": 0.7149128762163386, "grad_norm": 0.6141029292821792, "learning_rate": 1.984762926389393e-06, "loss": 0.3119, "step": 15796 }, { "epoch": 0.7149581353247341, "grad_norm": 0.28622782989550266, "learning_rate": 1.9841783037725264e-06, "loss": 0.4784, "step": 15797 }, { "epoch": 0.7150033944331297, "grad_norm": 0.603727115110764, "learning_rate": 1.9835937459564065e-06, "loss": 0.333, "step": 15798 }, { "epoch": 0.7150486535415252, "grad_norm": 0.2584585191662679, "learning_rate": 1.983009252953591e-06, "loss": 0.4622, "step": 15799 }, { "epoch": 0.7150939126499208, "grad_norm": 0.6295600557995713, "learning_rate": 1.9824248247766404e-06, "loss": 0.2999, "step": 15800 }, { "epoch": 0.7151391717583163, "grad_norm": 0.6054018087673412, "learning_rate": 1.981840461438114e-06, "loss": 0.2997, "step": 15801 }, { "epoch": 0.7151844308667119, "grad_norm": 0.6474448280391323, "learning_rate": 1.9812561629505666e-06, "loss": 0.336, "step": 15802 }, { "epoch": 0.7152296899751075, "grad_norm": 0.6423659371952307, "learning_rate": 1.980671929326551e-06, "loss": 0.3294, "step": 15803 }, { "epoch": 0.7152749490835031, "grad_norm": 0.29430593806193195, "learning_rate": 1.980087760578625e-06, "loss": 0.4523, "step": 15804 }, { "epoch": 0.7153202081918986, "grad_norm": 0.5765988062290655, "learning_rate": 1.979503656719336e-06, "loss": 0.2993, "step": 15805 }, { "epoch": 0.7153654673002942, "grad_norm": 0.5732845095529945, "learning_rate": 1.9789196177612384e-06, "loss": 0.2718, "step": 15806 }, { "epoch": 0.7154107264086897, "grad_norm": 0.6072815854683996, "learning_rate": 1.97833564371688e-06, "loss": 0.3432, "step": 15807 }, { "epoch": 0.7154559855170853, "grad_norm": 0.599200295872143, "learning_rate": 1.9777517345988057e-06, "loss": 0.3245, "step": 15808 }, { "epoch": 0.7155012446254809, "grad_norm": 0.6179029784376359, "learning_rate": 1.977167890419565e-06, "loss": 0.2977, "step": 15809 }, { "epoch": 0.7155465037338764, "grad_norm": 0.619875528901657, "learning_rate": 1.976584111191704e-06, "loss": 0.2772, "step": 15810 }, { "epoch": 0.715591762842272, "grad_norm": 0.5717346985299682, "learning_rate": 1.976000396927765e-06, "loss": 0.2653, "step": 15811 }, { "epoch": 0.7156370219506676, "grad_norm": 0.6167885903070414, "learning_rate": 1.975416747640288e-06, "loss": 0.3106, "step": 15812 }, { "epoch": 0.7156822810590632, "grad_norm": 0.6860304566379826, "learning_rate": 1.974833163341816e-06, "loss": 0.3206, "step": 15813 }, { "epoch": 0.7157275401674587, "grad_norm": 0.558697239360316, "learning_rate": 1.9742496440448895e-06, "loss": 0.2664, "step": 15814 }, { "epoch": 0.7157727992758542, "grad_norm": 0.6422467254804713, "learning_rate": 1.973666189762046e-06, "loss": 0.3243, "step": 15815 }, { "epoch": 0.7158180583842498, "grad_norm": 0.6272481981865834, "learning_rate": 1.973082800505819e-06, "loss": 0.2976, "step": 15816 }, { "epoch": 0.7158633174926454, "grad_norm": 0.6170692813442996, "learning_rate": 1.9724994762887484e-06, "loss": 0.2906, "step": 15817 }, { "epoch": 0.715908576601041, "grad_norm": 0.3043379788722677, "learning_rate": 1.9719162171233636e-06, "loss": 0.4571, "step": 15818 }, { "epoch": 0.7159538357094365, "grad_norm": 0.7171550794067272, "learning_rate": 1.9713330230222013e-06, "loss": 0.2886, "step": 15819 }, { "epoch": 0.7159990948178321, "grad_norm": 1.193219478002798, "learning_rate": 1.9707498939977905e-06, "loss": 0.3184, "step": 15820 }, { "epoch": 0.7160443539262277, "grad_norm": 0.5752556708479493, "learning_rate": 1.970166830062659e-06, "loss": 0.2735, "step": 15821 }, { "epoch": 0.7160896130346233, "grad_norm": 0.6125335200732254, "learning_rate": 1.969583831229338e-06, "loss": 0.3245, "step": 15822 }, { "epoch": 0.7161348721430187, "grad_norm": 0.5971442106455194, "learning_rate": 1.969000897510354e-06, "loss": 0.3523, "step": 15823 }, { "epoch": 0.7161801312514143, "grad_norm": 0.26319467873710606, "learning_rate": 1.9684180289182297e-06, "loss": 0.4491, "step": 15824 }, { "epoch": 0.7162253903598099, "grad_norm": 0.6962210165352554, "learning_rate": 1.9678352254654914e-06, "loss": 0.2929, "step": 15825 }, { "epoch": 0.7162706494682055, "grad_norm": 0.597435716964487, "learning_rate": 1.967252487164663e-06, "loss": 0.3234, "step": 15826 }, { "epoch": 0.716315908576601, "grad_norm": 0.5774198683188823, "learning_rate": 1.9666698140282648e-06, "loss": 0.3108, "step": 15827 }, { "epoch": 0.7163611676849966, "grad_norm": 0.6242275328564052, "learning_rate": 1.966087206068814e-06, "loss": 0.3164, "step": 15828 }, { "epoch": 0.7164064267933922, "grad_norm": 0.7541080933452219, "learning_rate": 1.9655046632988313e-06, "loss": 0.2784, "step": 15829 }, { "epoch": 0.7164516859017878, "grad_norm": 0.6206540518782321, "learning_rate": 1.964922185730835e-06, "loss": 0.2849, "step": 15830 }, { "epoch": 0.7164969450101834, "grad_norm": 0.6295019724223111, "learning_rate": 1.96433977337734e-06, "loss": 0.2744, "step": 15831 }, { "epoch": 0.7165422041185788, "grad_norm": 0.5879735551319749, "learning_rate": 1.963757426250858e-06, "loss": 0.2941, "step": 15832 }, { "epoch": 0.7165874632269744, "grad_norm": 0.29055395224746566, "learning_rate": 1.9631751443639054e-06, "loss": 0.4766, "step": 15833 }, { "epoch": 0.71663272233537, "grad_norm": 0.7155505457515415, "learning_rate": 1.96259292772899e-06, "loss": 0.3128, "step": 15834 }, { "epoch": 0.7166779814437656, "grad_norm": 0.7041649501282407, "learning_rate": 1.9620107763586267e-06, "loss": 0.3233, "step": 15835 }, { "epoch": 0.7167232405521611, "grad_norm": 0.2991827612162243, "learning_rate": 1.96142869026532e-06, "loss": 0.4652, "step": 15836 }, { "epoch": 0.7167684996605567, "grad_norm": 0.5923121808846988, "learning_rate": 1.960846669461578e-06, "loss": 0.2683, "step": 15837 }, { "epoch": 0.7168137587689523, "grad_norm": 0.630301288602417, "learning_rate": 1.9602647139599063e-06, "loss": 0.2829, "step": 15838 }, { "epoch": 0.7168590178773478, "grad_norm": 0.6268779453538741, "learning_rate": 1.959682823772812e-06, "loss": 0.3598, "step": 15839 }, { "epoch": 0.7169042769857433, "grad_norm": 0.7842283739627655, "learning_rate": 1.9591009989127958e-06, "loss": 0.3044, "step": 15840 }, { "epoch": 0.7169495360941389, "grad_norm": 0.27938510457142823, "learning_rate": 1.9585192393923583e-06, "loss": 0.481, "step": 15841 }, { "epoch": 0.7169947952025345, "grad_norm": 0.6064302788309044, "learning_rate": 1.9579375452240013e-06, "loss": 0.2783, "step": 15842 }, { "epoch": 0.7170400543109301, "grad_norm": 0.6281747951378225, "learning_rate": 1.9573559164202248e-06, "loss": 0.3212, "step": 15843 }, { "epoch": 0.7170853134193257, "grad_norm": 0.6213000653987445, "learning_rate": 1.956774352993526e-06, "loss": 0.2711, "step": 15844 }, { "epoch": 0.7171305725277212, "grad_norm": 0.6296797112903717, "learning_rate": 1.956192854956397e-06, "loss": 0.3124, "step": 15845 }, { "epoch": 0.7171758316361168, "grad_norm": 0.5755777054173816, "learning_rate": 1.955611422321337e-06, "loss": 0.2927, "step": 15846 }, { "epoch": 0.7172210907445123, "grad_norm": 0.6481445542956137, "learning_rate": 1.9550300551008357e-06, "loss": 0.3052, "step": 15847 }, { "epoch": 0.7172663498529079, "grad_norm": 0.2622176815018156, "learning_rate": 1.9544487533073887e-06, "loss": 0.4604, "step": 15848 }, { "epoch": 0.7173116089613034, "grad_norm": 0.6096963530780615, "learning_rate": 1.9538675169534838e-06, "loss": 0.3055, "step": 15849 }, { "epoch": 0.717356868069699, "grad_norm": 0.5801968716904514, "learning_rate": 1.9532863460516095e-06, "loss": 0.3121, "step": 15850 }, { "epoch": 0.7174021271780946, "grad_norm": 0.6445991323541399, "learning_rate": 1.9527052406142534e-06, "loss": 0.3132, "step": 15851 }, { "epoch": 0.7174473862864902, "grad_norm": 0.6252343576574004, "learning_rate": 1.9521242006539065e-06, "loss": 0.3084, "step": 15852 }, { "epoch": 0.7174926453948858, "grad_norm": 0.7262094242239051, "learning_rate": 1.9515432261830465e-06, "loss": 0.2998, "step": 15853 }, { "epoch": 0.7175379045032813, "grad_norm": 0.6183870478720913, "learning_rate": 1.9509623172141596e-06, "loss": 0.2937, "step": 15854 }, { "epoch": 0.7175831636116768, "grad_norm": 0.6979481403695195, "learning_rate": 1.9503814737597297e-06, "loss": 0.2938, "step": 15855 }, { "epoch": 0.7176284227200724, "grad_norm": 0.7083826073473445, "learning_rate": 1.949800695832236e-06, "loss": 0.2918, "step": 15856 }, { "epoch": 0.717673681828468, "grad_norm": 0.6447679929100643, "learning_rate": 1.949219983444156e-06, "loss": 0.3071, "step": 15857 }, { "epoch": 0.7177189409368635, "grad_norm": 0.6497871890283972, "learning_rate": 1.9486393366079687e-06, "loss": 0.313, "step": 15858 }, { "epoch": 0.7177642000452591, "grad_norm": 0.682234150983536, "learning_rate": 1.948058755336152e-06, "loss": 0.2814, "step": 15859 }, { "epoch": 0.7178094591536547, "grad_norm": 0.6759019114753635, "learning_rate": 1.947478239641179e-06, "loss": 0.3152, "step": 15860 }, { "epoch": 0.7178547182620503, "grad_norm": 0.6525915599032759, "learning_rate": 1.9468977895355225e-06, "loss": 0.3431, "step": 15861 }, { "epoch": 0.7178999773704458, "grad_norm": 0.660095016570737, "learning_rate": 1.946317405031657e-06, "loss": 0.2991, "step": 15862 }, { "epoch": 0.7179452364788413, "grad_norm": 0.6324364409123052, "learning_rate": 1.94573708614205e-06, "loss": 0.2844, "step": 15863 }, { "epoch": 0.7179904955872369, "grad_norm": 0.6335492193236768, "learning_rate": 1.945156832879174e-06, "loss": 0.2878, "step": 15864 }, { "epoch": 0.7180357546956325, "grad_norm": 0.6585628540801011, "learning_rate": 1.944576645255496e-06, "loss": 0.3329, "step": 15865 }, { "epoch": 0.7180810138040281, "grad_norm": 0.3185024687658992, "learning_rate": 1.94399652328348e-06, "loss": 0.4802, "step": 15866 }, { "epoch": 0.7181262729124236, "grad_norm": 0.5985997255672704, "learning_rate": 1.9434164669755928e-06, "loss": 0.315, "step": 15867 }, { "epoch": 0.7181715320208192, "grad_norm": 0.6134883826608056, "learning_rate": 1.9428364763443e-06, "loss": 0.3393, "step": 15868 }, { "epoch": 0.7182167911292148, "grad_norm": 0.6659292946147676, "learning_rate": 1.942256551402062e-06, "loss": 0.3076, "step": 15869 }, { "epoch": 0.7182620502376104, "grad_norm": 0.7063404873269847, "learning_rate": 1.9416766921613375e-06, "loss": 0.3074, "step": 15870 }, { "epoch": 0.7183073093460058, "grad_norm": 0.8006880516701447, "learning_rate": 1.941096898634588e-06, "loss": 0.3203, "step": 15871 }, { "epoch": 0.7183525684544014, "grad_norm": 0.6808668128914034, "learning_rate": 1.9405171708342734e-06, "loss": 0.3336, "step": 15872 }, { "epoch": 0.718397827562797, "grad_norm": 0.6046078305349775, "learning_rate": 1.9399375087728485e-06, "loss": 0.2706, "step": 15873 }, { "epoch": 0.7184430866711926, "grad_norm": 0.28548787155681826, "learning_rate": 1.939357912462766e-06, "loss": 0.4806, "step": 15874 }, { "epoch": 0.7184883457795881, "grad_norm": 0.5994086971826666, "learning_rate": 1.938778381916484e-06, "loss": 0.2869, "step": 15875 }, { "epoch": 0.7185336048879837, "grad_norm": 0.6159373405288157, "learning_rate": 1.938198917146451e-06, "loss": 0.3169, "step": 15876 }, { "epoch": 0.7185788639963793, "grad_norm": 0.6221266268026915, "learning_rate": 1.937619518165121e-06, "loss": 0.3013, "step": 15877 }, { "epoch": 0.7186241231047749, "grad_norm": 0.6939394072074498, "learning_rate": 1.937040184984943e-06, "loss": 0.2803, "step": 15878 }, { "epoch": 0.7186693822131704, "grad_norm": 0.6745653728912595, "learning_rate": 1.936460917618362e-06, "loss": 0.2946, "step": 15879 }, { "epoch": 0.7187146413215659, "grad_norm": 0.6351055310352166, "learning_rate": 1.9358817160778272e-06, "loss": 0.3579, "step": 15880 }, { "epoch": 0.7187599004299615, "grad_norm": 0.2773305345910911, "learning_rate": 1.935302580375785e-06, "loss": 0.4696, "step": 15881 }, { "epoch": 0.7188051595383571, "grad_norm": 0.28853986338486576, "learning_rate": 1.9347235105246783e-06, "loss": 0.4589, "step": 15882 }, { "epoch": 0.7188504186467527, "grad_norm": 0.6801749109228717, "learning_rate": 1.934144506536946e-06, "loss": 0.3329, "step": 15883 }, { "epoch": 0.7188956777551482, "grad_norm": 0.6160835970036759, "learning_rate": 1.9335655684250335e-06, "loss": 0.3068, "step": 15884 }, { "epoch": 0.7189409368635438, "grad_norm": 0.6493036262924194, "learning_rate": 1.9329866962013825e-06, "loss": 0.3009, "step": 15885 }, { "epoch": 0.7189861959719394, "grad_norm": 0.6042260802325438, "learning_rate": 1.9324078898784245e-06, "loss": 0.2646, "step": 15886 }, { "epoch": 0.719031455080335, "grad_norm": 0.6283814205090501, "learning_rate": 1.9318291494685986e-06, "loss": 0.317, "step": 15887 }, { "epoch": 0.7190767141887305, "grad_norm": 0.27051702087871426, "learning_rate": 1.9312504749843435e-06, "loss": 0.4805, "step": 15888 }, { "epoch": 0.719121973297126, "grad_norm": 0.6903824750745414, "learning_rate": 1.9306718664380907e-06, "loss": 0.3057, "step": 15889 }, { "epoch": 0.7191672324055216, "grad_norm": 0.5720509988148191, "learning_rate": 1.930093323842271e-06, "loss": 0.2872, "step": 15890 }, { "epoch": 0.7192124915139172, "grad_norm": 0.6374653633908585, "learning_rate": 1.929514847209319e-06, "loss": 0.3056, "step": 15891 }, { "epoch": 0.7192577506223128, "grad_norm": 0.641860689442778, "learning_rate": 1.928936436551661e-06, "loss": 0.309, "step": 15892 }, { "epoch": 0.7193030097307083, "grad_norm": 0.33271062511421506, "learning_rate": 1.9283580918817284e-06, "loss": 0.4849, "step": 15893 }, { "epoch": 0.7193482688391039, "grad_norm": 0.5551185896909835, "learning_rate": 1.927779813211947e-06, "loss": 0.3101, "step": 15894 }, { "epoch": 0.7193935279474994, "grad_norm": 0.6580061730478527, "learning_rate": 1.92720160055474e-06, "loss": 0.2824, "step": 15895 }, { "epoch": 0.719438787055895, "grad_norm": 0.603955540037126, "learning_rate": 1.926623453922533e-06, "loss": 0.3185, "step": 15896 }, { "epoch": 0.7194840461642905, "grad_norm": 0.5711212663065297, "learning_rate": 1.9260453733277505e-06, "loss": 0.2553, "step": 15897 }, { "epoch": 0.7195293052726861, "grad_norm": 0.6889052549277278, "learning_rate": 1.925467358782812e-06, "loss": 0.3484, "step": 15898 }, { "epoch": 0.7195745643810817, "grad_norm": 0.2783497457159708, "learning_rate": 1.924889410300136e-06, "loss": 0.4684, "step": 15899 }, { "epoch": 0.7196198234894773, "grad_norm": 0.25821795988046475, "learning_rate": 1.9243115278921416e-06, "loss": 0.4618, "step": 15900 }, { "epoch": 0.7196650825978729, "grad_norm": 0.611423173817838, "learning_rate": 1.923733711571248e-06, "loss": 0.297, "step": 15901 }, { "epoch": 0.7197103417062684, "grad_norm": 0.5840177696674929, "learning_rate": 1.923155961349869e-06, "loss": 0.2693, "step": 15902 }, { "epoch": 0.7197556008146639, "grad_norm": 0.265000607498201, "learning_rate": 1.9225782772404166e-06, "loss": 0.4751, "step": 15903 }, { "epoch": 0.7198008599230595, "grad_norm": 0.64824568095004, "learning_rate": 1.9220006592553075e-06, "loss": 0.3015, "step": 15904 }, { "epoch": 0.7198461190314551, "grad_norm": 0.31206766498139427, "learning_rate": 1.921423107406949e-06, "loss": 0.4871, "step": 15905 }, { "epoch": 0.7198913781398506, "grad_norm": 0.6588422898892048, "learning_rate": 1.920845621707755e-06, "loss": 0.271, "step": 15906 }, { "epoch": 0.7199366372482462, "grad_norm": 0.5959150073512096, "learning_rate": 1.920268202170131e-06, "loss": 0.3178, "step": 15907 }, { "epoch": 0.7199818963566418, "grad_norm": 0.5610244219911166, "learning_rate": 1.9196908488064832e-06, "loss": 0.2759, "step": 15908 }, { "epoch": 0.7200271554650374, "grad_norm": 0.6057227446694428, "learning_rate": 1.9191135616292184e-06, "loss": 0.2924, "step": 15909 }, { "epoch": 0.7200724145734329, "grad_norm": 0.25465578536998, "learning_rate": 1.918536340650743e-06, "loss": 0.4753, "step": 15910 }, { "epoch": 0.7201176736818284, "grad_norm": 0.6162224428012438, "learning_rate": 1.9179591858834572e-06, "loss": 0.2771, "step": 15911 }, { "epoch": 0.720162932790224, "grad_norm": 0.6236076685705199, "learning_rate": 1.9173820973397617e-06, "loss": 0.2924, "step": 15912 }, { "epoch": 0.7202081918986196, "grad_norm": 0.6610076407431865, "learning_rate": 1.916805075032057e-06, "loss": 0.2964, "step": 15913 }, { "epoch": 0.7202534510070152, "grad_norm": 0.8042526061411998, "learning_rate": 1.9162281189727455e-06, "loss": 0.2768, "step": 15914 }, { "epoch": 0.7202987101154107, "grad_norm": 0.6371717003262385, "learning_rate": 1.915651229174217e-06, "loss": 0.316, "step": 15915 }, { "epoch": 0.7203439692238063, "grad_norm": 0.630859574891062, "learning_rate": 1.9150744056488708e-06, "loss": 0.3211, "step": 15916 }, { "epoch": 0.7203892283322019, "grad_norm": 0.692485725729248, "learning_rate": 1.9144976484091025e-06, "loss": 0.3136, "step": 15917 }, { "epoch": 0.7204344874405975, "grad_norm": 0.5700622958020366, "learning_rate": 1.913920957467304e-06, "loss": 0.2925, "step": 15918 }, { "epoch": 0.7204797465489929, "grad_norm": 0.5472061376592867, "learning_rate": 1.913344332835864e-06, "loss": 0.2976, "step": 15919 }, { "epoch": 0.7205250056573885, "grad_norm": 0.7371868821920786, "learning_rate": 1.9127677745271754e-06, "loss": 0.275, "step": 15920 }, { "epoch": 0.7205702647657841, "grad_norm": 0.9859973646941241, "learning_rate": 1.912191282553624e-06, "loss": 0.2919, "step": 15921 }, { "epoch": 0.7206155238741797, "grad_norm": 0.3129194570815802, "learning_rate": 1.911614856927601e-06, "loss": 0.4891, "step": 15922 }, { "epoch": 0.7206607829825753, "grad_norm": 0.7252536527570467, "learning_rate": 1.911038497661487e-06, "loss": 0.3079, "step": 15923 }, { "epoch": 0.7207060420909708, "grad_norm": 0.6356784735211609, "learning_rate": 1.910462204767671e-06, "loss": 0.3388, "step": 15924 }, { "epoch": 0.7207513011993664, "grad_norm": 0.8678659339354213, "learning_rate": 1.9098859782585313e-06, "loss": 0.3187, "step": 15925 }, { "epoch": 0.720796560307762, "grad_norm": 0.615283773322787, "learning_rate": 1.909309818146453e-06, "loss": 0.2709, "step": 15926 }, { "epoch": 0.7208418194161575, "grad_norm": 0.2776919225343176, "learning_rate": 1.9087337244438147e-06, "loss": 0.4684, "step": 15927 }, { "epoch": 0.720887078524553, "grad_norm": 0.6257009861394178, "learning_rate": 1.908157697162993e-06, "loss": 0.3262, "step": 15928 }, { "epoch": 0.7209323376329486, "grad_norm": 0.2599198789267447, "learning_rate": 1.9075817363163655e-06, "loss": 0.4589, "step": 15929 }, { "epoch": 0.7209775967413442, "grad_norm": 0.6840195491615668, "learning_rate": 1.9070058419163118e-06, "loss": 0.2479, "step": 15930 }, { "epoch": 0.7210228558497398, "grad_norm": 0.6094399970096488, "learning_rate": 1.9064300139752024e-06, "loss": 0.3021, "step": 15931 }, { "epoch": 0.7210681149581353, "grad_norm": 0.25754235454591273, "learning_rate": 1.9058542525054096e-06, "loss": 0.466, "step": 15932 }, { "epoch": 0.7211133740665309, "grad_norm": 0.6092321613812091, "learning_rate": 1.9052785575193072e-06, "loss": 0.3106, "step": 15933 }, { "epoch": 0.7211586331749265, "grad_norm": 0.6228942019247349, "learning_rate": 1.9047029290292623e-06, "loss": 0.2786, "step": 15934 }, { "epoch": 0.721203892283322, "grad_norm": 0.6296667281726287, "learning_rate": 1.9041273670476468e-06, "loss": 0.3071, "step": 15935 }, { "epoch": 0.7212491513917176, "grad_norm": 0.6188099089168433, "learning_rate": 1.9035518715868262e-06, "loss": 0.2921, "step": 15936 }, { "epoch": 0.7212944105001131, "grad_norm": 0.7346168096084281, "learning_rate": 1.9029764426591641e-06, "loss": 0.3299, "step": 15937 }, { "epoch": 0.7213396696085087, "grad_norm": 0.6566646675703187, "learning_rate": 1.902401080277026e-06, "loss": 0.2967, "step": 15938 }, { "epoch": 0.7213849287169043, "grad_norm": 0.6763430559004452, "learning_rate": 1.901825784452777e-06, "loss": 0.3422, "step": 15939 }, { "epoch": 0.7214301878252999, "grad_norm": 0.5829262688664122, "learning_rate": 1.9012505551987764e-06, "loss": 0.3176, "step": 15940 }, { "epoch": 0.7214754469336954, "grad_norm": 0.6464037491427786, "learning_rate": 1.900675392527383e-06, "loss": 0.355, "step": 15941 }, { "epoch": 0.721520706042091, "grad_norm": 0.6426142657790407, "learning_rate": 1.9001002964509564e-06, "loss": 0.3419, "step": 15942 }, { "epoch": 0.7215659651504865, "grad_norm": 0.3018870085983446, "learning_rate": 1.8995252669818577e-06, "loss": 0.4996, "step": 15943 }, { "epoch": 0.7216112242588821, "grad_norm": 0.6567595177042254, "learning_rate": 1.8989503041324341e-06, "loss": 0.2671, "step": 15944 }, { "epoch": 0.7216564833672776, "grad_norm": 1.1565325384593736, "learning_rate": 1.8983754079150452e-06, "loss": 0.3004, "step": 15945 }, { "epoch": 0.7217017424756732, "grad_norm": 0.6058733951545143, "learning_rate": 1.8978005783420444e-06, "loss": 0.2855, "step": 15946 }, { "epoch": 0.7217470015840688, "grad_norm": 0.6461791664712543, "learning_rate": 1.8972258154257816e-06, "loss": 0.3139, "step": 15947 }, { "epoch": 0.7217922606924644, "grad_norm": 0.6570032567439259, "learning_rate": 1.8966511191786047e-06, "loss": 0.2739, "step": 15948 }, { "epoch": 0.72183751980086, "grad_norm": 0.5894999645744051, "learning_rate": 1.896076489612866e-06, "loss": 0.2943, "step": 15949 }, { "epoch": 0.7218827789092555, "grad_norm": 0.5993238627006856, "learning_rate": 1.895501926740908e-06, "loss": 0.2833, "step": 15950 }, { "epoch": 0.721928038017651, "grad_norm": 0.6305668222337719, "learning_rate": 1.8949274305750814e-06, "loss": 0.2601, "step": 15951 }, { "epoch": 0.7219732971260466, "grad_norm": 0.617752552570206, "learning_rate": 1.8943530011277261e-06, "loss": 0.2872, "step": 15952 }, { "epoch": 0.7220185562344422, "grad_norm": 0.626608894837736, "learning_rate": 1.893778638411188e-06, "loss": 0.3434, "step": 15953 }, { "epoch": 0.7220638153428377, "grad_norm": 0.733790000471714, "learning_rate": 1.8932043424378049e-06, "loss": 0.3371, "step": 15954 }, { "epoch": 0.7221090744512333, "grad_norm": 0.294562872158215, "learning_rate": 1.892630113219921e-06, "loss": 0.4619, "step": 15955 }, { "epoch": 0.7221543335596289, "grad_norm": 0.6323404370467263, "learning_rate": 1.8920559507698722e-06, "loss": 0.3145, "step": 15956 }, { "epoch": 0.7221995926680245, "grad_norm": 0.6202866373164587, "learning_rate": 1.891481855099994e-06, "loss": 0.2851, "step": 15957 }, { "epoch": 0.72224485177642, "grad_norm": 0.6657534635544718, "learning_rate": 1.8909078262226237e-06, "loss": 0.3197, "step": 15958 }, { "epoch": 0.7222901108848155, "grad_norm": 0.5681651570767629, "learning_rate": 1.8903338641500967e-06, "loss": 0.3175, "step": 15959 }, { "epoch": 0.7223353699932111, "grad_norm": 0.2866490398022034, "learning_rate": 1.889759968894745e-06, "loss": 0.4748, "step": 15960 }, { "epoch": 0.7223806291016067, "grad_norm": 0.6383201904904624, "learning_rate": 1.889186140468897e-06, "loss": 0.3252, "step": 15961 }, { "epoch": 0.7224258882100023, "grad_norm": 0.5736735821452259, "learning_rate": 1.8886123788848864e-06, "loss": 0.2795, "step": 15962 }, { "epoch": 0.7224711473183978, "grad_norm": 0.6218336748590672, "learning_rate": 1.8880386841550385e-06, "loss": 0.2914, "step": 15963 }, { "epoch": 0.7225164064267934, "grad_norm": 0.6702019724144967, "learning_rate": 1.887465056291683e-06, "loss": 0.327, "step": 15964 }, { "epoch": 0.722561665535189, "grad_norm": 1.4128354899466162, "learning_rate": 1.8868914953071444e-06, "loss": 0.3026, "step": 15965 }, { "epoch": 0.7226069246435846, "grad_norm": 0.619592327990379, "learning_rate": 1.886318001213744e-06, "loss": 0.2937, "step": 15966 }, { "epoch": 0.72265218375198, "grad_norm": 0.6439866145775998, "learning_rate": 1.8857445740238073e-06, "loss": 0.3358, "step": 15967 }, { "epoch": 0.7226974428603756, "grad_norm": 0.6418633396022916, "learning_rate": 1.8851712137496564e-06, "loss": 0.3288, "step": 15968 }, { "epoch": 0.7227427019687712, "grad_norm": 0.5714028641956019, "learning_rate": 1.8845979204036101e-06, "loss": 0.2891, "step": 15969 }, { "epoch": 0.7227879610771668, "grad_norm": 0.5926099561173371, "learning_rate": 1.8840246939979846e-06, "loss": 0.3185, "step": 15970 }, { "epoch": 0.7228332201855624, "grad_norm": 0.5924911218056633, "learning_rate": 1.8834515345450977e-06, "loss": 0.2876, "step": 15971 }, { "epoch": 0.7228784792939579, "grad_norm": 0.6234909875094362, "learning_rate": 1.88287844205727e-06, "loss": 0.3069, "step": 15972 }, { "epoch": 0.7229237384023535, "grad_norm": 0.2745665548547292, "learning_rate": 1.882305416546807e-06, "loss": 0.4633, "step": 15973 }, { "epoch": 0.7229689975107491, "grad_norm": 0.6551699035958258, "learning_rate": 1.8817324580260254e-06, "loss": 0.3584, "step": 15974 }, { "epoch": 0.7230142566191446, "grad_norm": 0.27671999389208507, "learning_rate": 1.881159566507238e-06, "loss": 0.4772, "step": 15975 }, { "epoch": 0.7230595157275401, "grad_norm": 0.9831127283266593, "learning_rate": 1.8805867420027529e-06, "loss": 0.2879, "step": 15976 }, { "epoch": 0.7231047748359357, "grad_norm": 0.6164403804911328, "learning_rate": 1.880013984524876e-06, "loss": 0.3036, "step": 15977 }, { "epoch": 0.7231500339443313, "grad_norm": 0.2706023179382082, "learning_rate": 1.8794412940859186e-06, "loss": 0.4742, "step": 15978 }, { "epoch": 0.7231952930527269, "grad_norm": 0.813015049690279, "learning_rate": 1.8788686706981813e-06, "loss": 0.294, "step": 15979 }, { "epoch": 0.7232405521611224, "grad_norm": 0.617435460194587, "learning_rate": 1.8782961143739724e-06, "loss": 0.3002, "step": 15980 }, { "epoch": 0.723285811269518, "grad_norm": 0.6091669825529811, "learning_rate": 1.877723625125591e-06, "loss": 0.3329, "step": 15981 }, { "epoch": 0.7233310703779136, "grad_norm": 0.5936151512194228, "learning_rate": 1.877151202965341e-06, "loss": 0.2673, "step": 15982 }, { "epoch": 0.7233763294863091, "grad_norm": 0.26325091324160144, "learning_rate": 1.876578847905519e-06, "loss": 0.5012, "step": 15983 }, { "epoch": 0.7234215885947047, "grad_norm": 0.6278205249319658, "learning_rate": 1.8760065599584266e-06, "loss": 0.3388, "step": 15984 }, { "epoch": 0.7234668477031002, "grad_norm": 0.6902459735771315, "learning_rate": 1.8754343391363584e-06, "loss": 0.3194, "step": 15985 }, { "epoch": 0.7235121068114958, "grad_norm": 0.5960812307850535, "learning_rate": 1.874862185451608e-06, "loss": 0.3023, "step": 15986 }, { "epoch": 0.7235573659198914, "grad_norm": 0.6296054589179261, "learning_rate": 1.8742900989164713e-06, "loss": 0.2792, "step": 15987 }, { "epoch": 0.723602625028287, "grad_norm": 0.9689884059413011, "learning_rate": 1.8737180795432425e-06, "loss": 0.3203, "step": 15988 }, { "epoch": 0.7236478841366825, "grad_norm": 0.6232037537471353, "learning_rate": 1.8731461273442097e-06, "loss": 0.3106, "step": 15989 }, { "epoch": 0.723693143245078, "grad_norm": 0.6087724636557703, "learning_rate": 1.8725742423316623e-06, "loss": 0.2677, "step": 15990 }, { "epoch": 0.7237384023534736, "grad_norm": 0.6207701067127781, "learning_rate": 1.872002424517891e-06, "loss": 0.3131, "step": 15991 }, { "epoch": 0.7237836614618692, "grad_norm": 0.6289892487218232, "learning_rate": 1.8714306739151782e-06, "loss": 0.2783, "step": 15992 }, { "epoch": 0.7238289205702647, "grad_norm": 0.6020901857016302, "learning_rate": 1.8708589905358138e-06, "loss": 0.2943, "step": 15993 }, { "epoch": 0.7238741796786603, "grad_norm": 0.6692875016527511, "learning_rate": 1.8702873743920774e-06, "loss": 0.3065, "step": 15994 }, { "epoch": 0.7239194387870559, "grad_norm": 0.7282381530703047, "learning_rate": 1.869715825496255e-06, "loss": 0.3056, "step": 15995 }, { "epoch": 0.7239646978954515, "grad_norm": 0.29601027516313794, "learning_rate": 1.8691443438606239e-06, "loss": 0.4801, "step": 15996 }, { "epoch": 0.7240099570038471, "grad_norm": 0.6945105051331881, "learning_rate": 1.8685729294974668e-06, "loss": 0.3043, "step": 15997 }, { "epoch": 0.7240552161122426, "grad_norm": 0.618491726853251, "learning_rate": 1.86800158241906e-06, "loss": 0.3075, "step": 15998 }, { "epoch": 0.7241004752206381, "grad_norm": 0.6025137434539652, "learning_rate": 1.8674303026376783e-06, "loss": 0.2814, "step": 15999 }, { "epoch": 0.7241457343290337, "grad_norm": 0.59573508665197, "learning_rate": 1.866859090165598e-06, "loss": 0.251, "step": 16000 }, { "epoch": 0.7241909934374293, "grad_norm": 0.7315932204963735, "learning_rate": 1.8662879450150956e-06, "loss": 0.329, "step": 16001 }, { "epoch": 0.7242362525458248, "grad_norm": 0.6784928963482864, "learning_rate": 1.8657168671984404e-06, "loss": 0.4074, "step": 16002 }, { "epoch": 0.7242815116542204, "grad_norm": 0.6790960623785016, "learning_rate": 1.8651458567279018e-06, "loss": 0.2876, "step": 16003 }, { "epoch": 0.724326770762616, "grad_norm": 0.2862915989405643, "learning_rate": 1.8645749136157526e-06, "loss": 0.4789, "step": 16004 }, { "epoch": 0.7243720298710116, "grad_norm": 0.6123843036858047, "learning_rate": 1.8640040378742585e-06, "loss": 0.285, "step": 16005 }, { "epoch": 0.7244172889794072, "grad_norm": 0.6121730728790465, "learning_rate": 1.8634332295156848e-06, "loss": 0.3159, "step": 16006 }, { "epoch": 0.7244625480878026, "grad_norm": 0.5802479016860167, "learning_rate": 1.8628624885522994e-06, "loss": 0.2564, "step": 16007 }, { "epoch": 0.7245078071961982, "grad_norm": 0.26500677475920026, "learning_rate": 1.8622918149963626e-06, "loss": 0.4929, "step": 16008 }, { "epoch": 0.7245530663045938, "grad_norm": 0.6523572564394401, "learning_rate": 1.8617212088601395e-06, "loss": 0.3143, "step": 16009 }, { "epoch": 0.7245983254129894, "grad_norm": 0.6026168399191, "learning_rate": 1.8611506701558874e-06, "loss": 0.2882, "step": 16010 }, { "epoch": 0.7246435845213849, "grad_norm": 0.6382442474781357, "learning_rate": 1.8605801988958688e-06, "loss": 0.3491, "step": 16011 }, { "epoch": 0.7246888436297805, "grad_norm": 0.26119721971741644, "learning_rate": 1.8600097950923379e-06, "loss": 0.4603, "step": 16012 }, { "epoch": 0.7247341027381761, "grad_norm": 0.6042488032958495, "learning_rate": 1.8594394587575548e-06, "loss": 0.3025, "step": 16013 }, { "epoch": 0.7247793618465717, "grad_norm": 0.5995169075539502, "learning_rate": 1.858869189903772e-06, "loss": 0.2698, "step": 16014 }, { "epoch": 0.7248246209549671, "grad_norm": 0.5959838329113539, "learning_rate": 1.8582989885432412e-06, "loss": 0.313, "step": 16015 }, { "epoch": 0.7248698800633627, "grad_norm": 0.6811871875801533, "learning_rate": 1.8577288546882167e-06, "loss": 0.3019, "step": 16016 }, { "epoch": 0.7249151391717583, "grad_norm": 0.598166448273423, "learning_rate": 1.8571587883509495e-06, "loss": 0.2613, "step": 16017 }, { "epoch": 0.7249603982801539, "grad_norm": 0.6119582544966328, "learning_rate": 1.8565887895436874e-06, "loss": 0.2858, "step": 16018 }, { "epoch": 0.7250056573885495, "grad_norm": 0.6892323236298378, "learning_rate": 1.856018858278677e-06, "loss": 0.3108, "step": 16019 }, { "epoch": 0.725050916496945, "grad_norm": 0.5374278359807803, "learning_rate": 1.8554489945681663e-06, "loss": 0.2651, "step": 16020 }, { "epoch": 0.7250961756053406, "grad_norm": 0.6362418049415203, "learning_rate": 1.8548791984243975e-06, "loss": 0.2739, "step": 16021 }, { "epoch": 0.7251414347137362, "grad_norm": 0.6069602912118967, "learning_rate": 1.854309469859617e-06, "loss": 0.2955, "step": 16022 }, { "epoch": 0.7251866938221317, "grad_norm": 0.7050148269586889, "learning_rate": 1.853739808886063e-06, "loss": 0.3109, "step": 16023 }, { "epoch": 0.7252319529305272, "grad_norm": 1.9981279830274465, "learning_rate": 1.8531702155159792e-06, "loss": 0.3129, "step": 16024 }, { "epoch": 0.7252772120389228, "grad_norm": 0.7003608172407041, "learning_rate": 1.8526006897616011e-06, "loss": 0.3675, "step": 16025 }, { "epoch": 0.7253224711473184, "grad_norm": 0.6301314495559056, "learning_rate": 1.8520312316351692e-06, "loss": 0.272, "step": 16026 }, { "epoch": 0.725367730255714, "grad_norm": 0.6483469398299987, "learning_rate": 1.8514618411489176e-06, "loss": 0.2866, "step": 16027 }, { "epoch": 0.7254129893641095, "grad_norm": 0.6696378094528813, "learning_rate": 1.85089251831508e-06, "loss": 0.3393, "step": 16028 }, { "epoch": 0.7254582484725051, "grad_norm": 0.5718861514539335, "learning_rate": 1.85032326314589e-06, "loss": 0.2847, "step": 16029 }, { "epoch": 0.7255035075809007, "grad_norm": 0.6306886641099659, "learning_rate": 1.8497540756535814e-06, "loss": 0.3192, "step": 16030 }, { "epoch": 0.7255487666892962, "grad_norm": 0.6723825164117041, "learning_rate": 1.8491849558503827e-06, "loss": 0.2775, "step": 16031 }, { "epoch": 0.7255940257976918, "grad_norm": 0.589946999511413, "learning_rate": 1.8486159037485202e-06, "loss": 0.3138, "step": 16032 }, { "epoch": 0.7256392849060873, "grad_norm": 0.6564239072718545, "learning_rate": 1.848046919360225e-06, "loss": 0.2836, "step": 16033 }, { "epoch": 0.7256845440144829, "grad_norm": 0.30486315624182836, "learning_rate": 1.8474780026977196e-06, "loss": 0.4662, "step": 16034 }, { "epoch": 0.7257298031228785, "grad_norm": 0.28477761396096907, "learning_rate": 1.8469091537732315e-06, "loss": 0.4488, "step": 16035 }, { "epoch": 0.7257750622312741, "grad_norm": 0.6452670364264114, "learning_rate": 1.846340372598981e-06, "loss": 0.2711, "step": 16036 }, { "epoch": 0.7258203213396696, "grad_norm": 0.6356012564902356, "learning_rate": 1.8457716591871887e-06, "loss": 0.2993, "step": 16037 }, { "epoch": 0.7258655804480652, "grad_norm": 0.7041584132129752, "learning_rate": 1.8452030135500765e-06, "loss": 0.3009, "step": 16038 }, { "epoch": 0.7259108395564607, "grad_norm": 0.606881481776386, "learning_rate": 1.8446344356998635e-06, "loss": 0.2898, "step": 16039 }, { "epoch": 0.7259560986648563, "grad_norm": 0.6058541708204863, "learning_rate": 1.8440659256487658e-06, "loss": 0.2707, "step": 16040 }, { "epoch": 0.7260013577732519, "grad_norm": 0.6760264236093753, "learning_rate": 1.843497483408997e-06, "loss": 0.2898, "step": 16041 }, { "epoch": 0.7260466168816474, "grad_norm": 0.28486960915330306, "learning_rate": 1.8429291089927742e-06, "loss": 0.4867, "step": 16042 }, { "epoch": 0.726091875990043, "grad_norm": 0.6270785078332332, "learning_rate": 1.8423608024123086e-06, "loss": 0.3212, "step": 16043 }, { "epoch": 0.7261371350984386, "grad_norm": 0.560317095729978, "learning_rate": 1.8417925636798101e-06, "loss": 0.2915, "step": 16044 }, { "epoch": 0.7261823942068342, "grad_norm": 0.9136672675320224, "learning_rate": 1.8412243928074897e-06, "loss": 0.3181, "step": 16045 }, { "epoch": 0.7262276533152296, "grad_norm": 0.6204588826654109, "learning_rate": 1.840656289807557e-06, "loss": 0.2882, "step": 16046 }, { "epoch": 0.7262729124236252, "grad_norm": 0.620459957704693, "learning_rate": 1.8400882546922177e-06, "loss": 0.306, "step": 16047 }, { "epoch": 0.7263181715320208, "grad_norm": 0.6024461681337429, "learning_rate": 1.8395202874736752e-06, "loss": 0.3312, "step": 16048 }, { "epoch": 0.7263634306404164, "grad_norm": 0.5795091380255545, "learning_rate": 1.8389523881641363e-06, "loss": 0.299, "step": 16049 }, { "epoch": 0.7264086897488119, "grad_norm": 0.3210511536301892, "learning_rate": 1.8383845567758008e-06, "loss": 0.5161, "step": 16050 }, { "epoch": 0.7264539488572075, "grad_norm": 0.5599891840744048, "learning_rate": 1.8378167933208729e-06, "loss": 0.2658, "step": 16051 }, { "epoch": 0.7264992079656031, "grad_norm": 0.6129156924252082, "learning_rate": 1.837249097811548e-06, "loss": 0.2641, "step": 16052 }, { "epoch": 0.7265444670739987, "grad_norm": 0.7396036670714075, "learning_rate": 1.8366814702600288e-06, "loss": 0.3307, "step": 16053 }, { "epoch": 0.7265897261823943, "grad_norm": 0.30804189245711033, "learning_rate": 1.836113910678507e-06, "loss": 0.4751, "step": 16054 }, { "epoch": 0.7266349852907897, "grad_norm": 0.6450496615316457, "learning_rate": 1.835546419079182e-06, "loss": 0.3435, "step": 16055 }, { "epoch": 0.7266802443991853, "grad_norm": 0.2796091456412559, "learning_rate": 1.8349789954742459e-06, "loss": 0.4613, "step": 16056 }, { "epoch": 0.7267255035075809, "grad_norm": 0.64890080958883, "learning_rate": 1.8344116398758888e-06, "loss": 0.3306, "step": 16057 }, { "epoch": 0.7267707626159765, "grad_norm": 0.575461620410372, "learning_rate": 1.8338443522963028e-06, "loss": 0.3081, "step": 16058 }, { "epoch": 0.726816021724372, "grad_norm": 0.2883379797548653, "learning_rate": 1.8332771327476795e-06, "loss": 0.4966, "step": 16059 }, { "epoch": 0.7268612808327676, "grad_norm": 0.5764107107006161, "learning_rate": 1.832709981242205e-06, "loss": 0.3059, "step": 16060 }, { "epoch": 0.7269065399411632, "grad_norm": 0.6613462650278219, "learning_rate": 1.8321428977920635e-06, "loss": 0.3088, "step": 16061 }, { "epoch": 0.7269517990495588, "grad_norm": 0.5971963076408902, "learning_rate": 1.8315758824094432e-06, "loss": 0.2816, "step": 16062 }, { "epoch": 0.7269970581579542, "grad_norm": 0.6458655637611862, "learning_rate": 1.8310089351065246e-06, "loss": 0.3588, "step": 16063 }, { "epoch": 0.7270423172663498, "grad_norm": 0.6029614831205884, "learning_rate": 1.8304420558954933e-06, "loss": 0.2893, "step": 16064 }, { "epoch": 0.7270875763747454, "grad_norm": 0.6523860978116632, "learning_rate": 1.8298752447885254e-06, "loss": 0.3295, "step": 16065 }, { "epoch": 0.727132835483141, "grad_norm": 0.6085824327664999, "learning_rate": 1.829308501797804e-06, "loss": 0.3268, "step": 16066 }, { "epoch": 0.7271780945915366, "grad_norm": 0.5839321712913418, "learning_rate": 1.8287418269355035e-06, "loss": 0.3163, "step": 16067 }, { "epoch": 0.7272233536999321, "grad_norm": 0.5934730562217898, "learning_rate": 1.8281752202138032e-06, "loss": 0.2845, "step": 16068 }, { "epoch": 0.7272686128083277, "grad_norm": 0.5862163177911442, "learning_rate": 1.8276086816448751e-06, "loss": 0.3397, "step": 16069 }, { "epoch": 0.7273138719167233, "grad_norm": 0.6138676841077318, "learning_rate": 1.8270422112408919e-06, "loss": 0.3131, "step": 16070 }, { "epoch": 0.7273591310251188, "grad_norm": 0.6599671578492269, "learning_rate": 1.8264758090140267e-06, "loss": 0.2898, "step": 16071 }, { "epoch": 0.7274043901335143, "grad_norm": 0.6756821861159864, "learning_rate": 1.8259094749764532e-06, "loss": 0.2862, "step": 16072 }, { "epoch": 0.7274496492419099, "grad_norm": 0.5798011713799885, "learning_rate": 1.8253432091403329e-06, "loss": 0.2819, "step": 16073 }, { "epoch": 0.7274949083503055, "grad_norm": 0.5905087514687498, "learning_rate": 1.824777011517837e-06, "loss": 0.3195, "step": 16074 }, { "epoch": 0.7275401674587011, "grad_norm": 0.6026175779356564, "learning_rate": 1.8242108821211324e-06, "loss": 0.259, "step": 16075 }, { "epoch": 0.7275854265670967, "grad_norm": 0.2949314560163198, "learning_rate": 1.8236448209623825e-06, "loss": 0.4743, "step": 16076 }, { "epoch": 0.7276306856754922, "grad_norm": 0.28458791237527803, "learning_rate": 1.8230788280537487e-06, "loss": 0.4569, "step": 16077 }, { "epoch": 0.7276759447838878, "grad_norm": 0.5643217470901566, "learning_rate": 1.8225129034073951e-06, "loss": 0.2971, "step": 16078 }, { "epoch": 0.7277212038922833, "grad_norm": 0.9389791664087129, "learning_rate": 1.8219470470354784e-06, "loss": 0.3062, "step": 16079 }, { "epoch": 0.7277664630006789, "grad_norm": 0.6886383299953388, "learning_rate": 1.8213812589501611e-06, "loss": 0.2948, "step": 16080 }, { "epoch": 0.7278117221090744, "grad_norm": 0.5850857135648151, "learning_rate": 1.8208155391635963e-06, "loss": 0.2632, "step": 16081 }, { "epoch": 0.72785698121747, "grad_norm": 0.6042575374580038, "learning_rate": 1.8202498876879432e-06, "loss": 0.3238, "step": 16082 }, { "epoch": 0.7279022403258656, "grad_norm": 0.6928404697901881, "learning_rate": 1.8196843045353519e-06, "loss": 0.2896, "step": 16083 }, { "epoch": 0.7279474994342612, "grad_norm": 0.5341234808814107, "learning_rate": 1.8191187897179796e-06, "loss": 0.2536, "step": 16084 }, { "epoch": 0.7279927585426567, "grad_norm": 0.7459069229430887, "learning_rate": 1.8185533432479751e-06, "loss": 0.3156, "step": 16085 }, { "epoch": 0.7280380176510522, "grad_norm": 0.6091769615205567, "learning_rate": 1.8179879651374866e-06, "loss": 0.3109, "step": 16086 }, { "epoch": 0.7280832767594478, "grad_norm": 0.7380141827131348, "learning_rate": 1.8174226553986635e-06, "loss": 0.3079, "step": 16087 }, { "epoch": 0.7281285358678434, "grad_norm": 0.714612464179002, "learning_rate": 1.816857414043655e-06, "loss": 0.3053, "step": 16088 }, { "epoch": 0.728173794976239, "grad_norm": 0.2860785773149628, "learning_rate": 1.8162922410846046e-06, "loss": 0.4745, "step": 16089 }, { "epoch": 0.7282190540846345, "grad_norm": 0.6039689558987191, "learning_rate": 1.8157271365336536e-06, "loss": 0.303, "step": 16090 }, { "epoch": 0.7282643131930301, "grad_norm": 0.6083549302228266, "learning_rate": 1.815162100402949e-06, "loss": 0.2985, "step": 16091 }, { "epoch": 0.7283095723014257, "grad_norm": 0.2660101982454626, "learning_rate": 1.8145971327046274e-06, "loss": 0.465, "step": 16092 }, { "epoch": 0.7283548314098213, "grad_norm": 0.6313514296218542, "learning_rate": 1.814032233450832e-06, "loss": 0.2818, "step": 16093 }, { "epoch": 0.7284000905182167, "grad_norm": 0.6567990816362825, "learning_rate": 1.8134674026536968e-06, "loss": 0.3437, "step": 16094 }, { "epoch": 0.7284453496266123, "grad_norm": 0.5475313855383052, "learning_rate": 1.8129026403253624e-06, "loss": 0.2579, "step": 16095 }, { "epoch": 0.7284906087350079, "grad_norm": 0.5950986141428766, "learning_rate": 1.8123379464779606e-06, "loss": 0.3002, "step": 16096 }, { "epoch": 0.7285358678434035, "grad_norm": 0.27592219794883777, "learning_rate": 1.8117733211236277e-06, "loss": 0.4713, "step": 16097 }, { "epoch": 0.728581126951799, "grad_norm": 0.6526013033225897, "learning_rate": 1.811208764274494e-06, "loss": 0.3383, "step": 16098 }, { "epoch": 0.7286263860601946, "grad_norm": 0.3223904043897807, "learning_rate": 1.8106442759426884e-06, "loss": 0.4811, "step": 16099 }, { "epoch": 0.7286716451685902, "grad_norm": 0.5550383719292121, "learning_rate": 1.8100798561403426e-06, "loss": 0.2741, "step": 16100 }, { "epoch": 0.7287169042769858, "grad_norm": 0.6392507722711632, "learning_rate": 1.8095155048795865e-06, "loss": 0.2984, "step": 16101 }, { "epoch": 0.7287621633853814, "grad_norm": 0.2879523619249691, "learning_rate": 1.8089512221725402e-06, "loss": 0.4739, "step": 16102 }, { "epoch": 0.7288074224937768, "grad_norm": 0.6498409838546698, "learning_rate": 1.8083870080313315e-06, "loss": 0.2789, "step": 16103 }, { "epoch": 0.7288526816021724, "grad_norm": 0.6382210882940718, "learning_rate": 1.8078228624680854e-06, "loss": 0.3114, "step": 16104 }, { "epoch": 0.728897940710568, "grad_norm": 0.6468102133095937, "learning_rate": 1.807258785494922e-06, "loss": 0.277, "step": 16105 }, { "epoch": 0.7289431998189636, "grad_norm": 0.6125188426779778, "learning_rate": 1.8066947771239597e-06, "loss": 0.2999, "step": 16106 }, { "epoch": 0.7289884589273591, "grad_norm": 0.6198896839628587, "learning_rate": 1.8061308373673208e-06, "loss": 0.2838, "step": 16107 }, { "epoch": 0.7290337180357547, "grad_norm": 0.28230401094755525, "learning_rate": 1.8055669662371194e-06, "loss": 0.492, "step": 16108 }, { "epoch": 0.7290789771441503, "grad_norm": 0.6089025119666203, "learning_rate": 1.8050031637454746e-06, "loss": 0.286, "step": 16109 }, { "epoch": 0.7291242362525459, "grad_norm": 0.6294693049743567, "learning_rate": 1.8044394299044976e-06, "loss": 0.2955, "step": 16110 }, { "epoch": 0.7291694953609414, "grad_norm": 0.7226863549811137, "learning_rate": 1.8038757647263045e-06, "loss": 0.2899, "step": 16111 }, { "epoch": 0.7292147544693369, "grad_norm": 0.31058843361683514, "learning_rate": 1.803312168223003e-06, "loss": 0.4737, "step": 16112 }, { "epoch": 0.7292600135777325, "grad_norm": 0.6455846866744789, "learning_rate": 1.8027486404067075e-06, "loss": 0.3407, "step": 16113 }, { "epoch": 0.7293052726861281, "grad_norm": 0.5926280243786574, "learning_rate": 1.8021851812895235e-06, "loss": 0.2939, "step": 16114 }, { "epoch": 0.7293505317945237, "grad_norm": 0.6670643403778769, "learning_rate": 1.8016217908835575e-06, "loss": 0.269, "step": 16115 }, { "epoch": 0.7293957909029192, "grad_norm": 0.5872139058201896, "learning_rate": 1.8010584692009158e-06, "loss": 0.316, "step": 16116 }, { "epoch": 0.7294410500113148, "grad_norm": 0.6360687494862899, "learning_rate": 1.8004952162537043e-06, "loss": 0.3087, "step": 16117 }, { "epoch": 0.7294863091197104, "grad_norm": 0.27153954274586384, "learning_rate": 1.7999320320540242e-06, "loss": 0.4696, "step": 16118 }, { "epoch": 0.7295315682281059, "grad_norm": 0.7052951096372572, "learning_rate": 1.799368916613975e-06, "loss": 0.3101, "step": 16119 }, { "epoch": 0.7295768273365014, "grad_norm": 0.7958306372804045, "learning_rate": 1.7988058699456596e-06, "loss": 0.3768, "step": 16120 }, { "epoch": 0.729622086444897, "grad_norm": 0.5730055696897941, "learning_rate": 1.7982428920611722e-06, "loss": 0.2838, "step": 16121 }, { "epoch": 0.7296673455532926, "grad_norm": 0.5679127683681903, "learning_rate": 1.7976799829726138e-06, "loss": 0.2892, "step": 16122 }, { "epoch": 0.7297126046616882, "grad_norm": 0.6729493635220757, "learning_rate": 1.7971171426920753e-06, "loss": 0.2859, "step": 16123 }, { "epoch": 0.7297578637700838, "grad_norm": 0.6069928623620314, "learning_rate": 1.796554371231654e-06, "loss": 0.3034, "step": 16124 }, { "epoch": 0.7298031228784793, "grad_norm": 0.5807258567124326, "learning_rate": 1.7959916686034395e-06, "loss": 0.2854, "step": 16125 }, { "epoch": 0.7298483819868749, "grad_norm": 0.8648799347898458, "learning_rate": 1.7954290348195248e-06, "loss": 0.257, "step": 16126 }, { "epoch": 0.7298936410952704, "grad_norm": 0.620623586958228, "learning_rate": 1.7948664698919987e-06, "loss": 0.3324, "step": 16127 }, { "epoch": 0.729938900203666, "grad_norm": 0.6149741888961061, "learning_rate": 1.794303973832946e-06, "loss": 0.2806, "step": 16128 }, { "epoch": 0.7299841593120615, "grad_norm": 0.627170473824644, "learning_rate": 1.7937415466544556e-06, "loss": 0.3367, "step": 16129 }, { "epoch": 0.7300294184204571, "grad_norm": 0.6527986768715455, "learning_rate": 1.7931791883686155e-06, "loss": 0.2592, "step": 16130 }, { "epoch": 0.7300746775288527, "grad_norm": 0.6122081032264333, "learning_rate": 1.7926168989875027e-06, "loss": 0.3038, "step": 16131 }, { "epoch": 0.7301199366372483, "grad_norm": 0.8831182658752256, "learning_rate": 1.7920546785232013e-06, "loss": 0.3087, "step": 16132 }, { "epoch": 0.7301651957456438, "grad_norm": 1.3518827731634866, "learning_rate": 1.7914925269877947e-06, "loss": 0.2888, "step": 16133 }, { "epoch": 0.7302104548540393, "grad_norm": 0.6225243673416847, "learning_rate": 1.790930444393359e-06, "loss": 0.2892, "step": 16134 }, { "epoch": 0.7302557139624349, "grad_norm": 0.8839277255860258, "learning_rate": 1.790368430751971e-06, "loss": 0.284, "step": 16135 }, { "epoch": 0.7303009730708305, "grad_norm": 0.6256720305902387, "learning_rate": 1.789806486075707e-06, "loss": 0.2914, "step": 16136 }, { "epoch": 0.7303462321792261, "grad_norm": 0.929858129804423, "learning_rate": 1.7892446103766448e-06, "loss": 0.2669, "step": 16137 }, { "epoch": 0.7303914912876216, "grad_norm": 0.613979034701072, "learning_rate": 1.7886828036668541e-06, "loss": 0.2845, "step": 16138 }, { "epoch": 0.7304367503960172, "grad_norm": 0.7011755943506092, "learning_rate": 1.7881210659584059e-06, "loss": 0.3454, "step": 16139 }, { "epoch": 0.7304820095044128, "grad_norm": 0.28666942994097444, "learning_rate": 1.787559397263373e-06, "loss": 0.4831, "step": 16140 }, { "epoch": 0.7305272686128084, "grad_norm": 0.5808081092214722, "learning_rate": 1.7869977975938207e-06, "loss": 0.3135, "step": 16141 }, { "epoch": 0.7305725277212038, "grad_norm": 0.6514388854146006, "learning_rate": 1.7864362669618197e-06, "loss": 0.2843, "step": 16142 }, { "epoch": 0.7306177868295994, "grad_norm": 0.27221879864624515, "learning_rate": 1.7858748053794334e-06, "loss": 0.45, "step": 16143 }, { "epoch": 0.730663045937995, "grad_norm": 0.6348420393974701, "learning_rate": 1.7853134128587246e-06, "loss": 0.2486, "step": 16144 }, { "epoch": 0.7307083050463906, "grad_norm": 0.6297590527294329, "learning_rate": 1.7847520894117571e-06, "loss": 0.2917, "step": 16145 }, { "epoch": 0.7307535641547862, "grad_norm": 0.6166517237990424, "learning_rate": 1.7841908350505938e-06, "loss": 0.247, "step": 16146 }, { "epoch": 0.7307988232631817, "grad_norm": 0.6476230729976626, "learning_rate": 1.7836296497872934e-06, "loss": 0.3184, "step": 16147 }, { "epoch": 0.7308440823715773, "grad_norm": 0.6107349964845618, "learning_rate": 1.7830685336339114e-06, "loss": 0.3087, "step": 16148 }, { "epoch": 0.7308893414799729, "grad_norm": 0.6243457657114342, "learning_rate": 1.7825074866025089e-06, "loss": 0.3058, "step": 16149 }, { "epoch": 0.7309346005883685, "grad_norm": 0.609189981925863, "learning_rate": 1.7819465087051363e-06, "loss": 0.2918, "step": 16150 }, { "epoch": 0.7309798596967639, "grad_norm": 0.7824741621821414, "learning_rate": 1.7813855999538516e-06, "loss": 0.2945, "step": 16151 }, { "epoch": 0.7310251188051595, "grad_norm": 0.6048509705744186, "learning_rate": 1.7808247603607037e-06, "loss": 0.3111, "step": 16152 }, { "epoch": 0.7310703779135551, "grad_norm": 0.6128880269982051, "learning_rate": 1.780263989937746e-06, "loss": 0.3529, "step": 16153 }, { "epoch": 0.7311156370219507, "grad_norm": 0.2896886702152505, "learning_rate": 1.7797032886970255e-06, "loss": 0.4761, "step": 16154 }, { "epoch": 0.7311608961303462, "grad_norm": 0.5650019294174237, "learning_rate": 1.779142656650592e-06, "loss": 0.2465, "step": 16155 }, { "epoch": 0.7312061552387418, "grad_norm": 0.6835006757504737, "learning_rate": 1.7785820938104908e-06, "loss": 0.2917, "step": 16156 }, { "epoch": 0.7312514143471374, "grad_norm": 0.6490444548301334, "learning_rate": 1.778021600188765e-06, "loss": 0.2823, "step": 16157 }, { "epoch": 0.731296673455533, "grad_norm": 0.5865118446140698, "learning_rate": 1.7774611757974597e-06, "loss": 0.3177, "step": 16158 }, { "epoch": 0.7313419325639285, "grad_norm": 0.5935785923704926, "learning_rate": 1.7769008206486198e-06, "loss": 0.3035, "step": 16159 }, { "epoch": 0.731387191672324, "grad_norm": 0.6843858349341361, "learning_rate": 1.7763405347542783e-06, "loss": 0.2603, "step": 16160 }, { "epoch": 0.7314324507807196, "grad_norm": 0.6532083072580005, "learning_rate": 1.7757803181264787e-06, "loss": 0.3195, "step": 16161 }, { "epoch": 0.7314777098891152, "grad_norm": 0.6233717325091536, "learning_rate": 1.7752201707772593e-06, "loss": 0.2925, "step": 16162 }, { "epoch": 0.7315229689975108, "grad_norm": 1.1668439049554575, "learning_rate": 1.7746600927186537e-06, "loss": 0.3321, "step": 16163 }, { "epoch": 0.7315682281059063, "grad_norm": 0.5655046089812515, "learning_rate": 1.7741000839626954e-06, "loss": 0.289, "step": 16164 }, { "epoch": 0.7316134872143019, "grad_norm": 0.5983977680445043, "learning_rate": 1.773540144521419e-06, "loss": 0.2901, "step": 16165 }, { "epoch": 0.7316587463226975, "grad_norm": 0.5956927504390058, "learning_rate": 1.7729802744068568e-06, "loss": 0.3212, "step": 16166 }, { "epoch": 0.731704005431093, "grad_norm": 0.6098619134893986, "learning_rate": 1.772420473631038e-06, "loss": 0.2913, "step": 16167 }, { "epoch": 0.7317492645394885, "grad_norm": 0.5879445773626746, "learning_rate": 1.771860742205988e-06, "loss": 0.3132, "step": 16168 }, { "epoch": 0.7317945236478841, "grad_norm": 0.5972963244648661, "learning_rate": 1.7713010801437385e-06, "loss": 0.3341, "step": 16169 }, { "epoch": 0.7318397827562797, "grad_norm": 0.6732092232271325, "learning_rate": 1.7707414874563105e-06, "loss": 0.3067, "step": 16170 }, { "epoch": 0.7318850418646753, "grad_norm": 0.5417037764108821, "learning_rate": 1.7701819641557321e-06, "loss": 0.2702, "step": 16171 }, { "epoch": 0.7319303009730709, "grad_norm": 0.31958280675188766, "learning_rate": 1.7696225102540238e-06, "loss": 0.482, "step": 16172 }, { "epoch": 0.7319755600814664, "grad_norm": 0.5808850557217945, "learning_rate": 1.769063125763204e-06, "loss": 0.2677, "step": 16173 }, { "epoch": 0.732020819189862, "grad_norm": 0.32794153591687286, "learning_rate": 1.7685038106952952e-06, "loss": 0.4763, "step": 16174 }, { "epoch": 0.7320660782982575, "grad_norm": 0.27041347084527767, "learning_rate": 1.7679445650623162e-06, "loss": 0.463, "step": 16175 }, { "epoch": 0.7321113374066531, "grad_norm": 0.6774948881437475, "learning_rate": 1.767385388876282e-06, "loss": 0.2688, "step": 16176 }, { "epoch": 0.7321565965150486, "grad_norm": 0.6307995198758111, "learning_rate": 1.7668262821492061e-06, "loss": 0.3656, "step": 16177 }, { "epoch": 0.7322018556234442, "grad_norm": 0.645933753279425, "learning_rate": 1.7662672448931045e-06, "loss": 0.2587, "step": 16178 }, { "epoch": 0.7322471147318398, "grad_norm": 0.6571171710246331, "learning_rate": 1.7657082771199875e-06, "loss": 0.3034, "step": 16179 }, { "epoch": 0.7322923738402354, "grad_norm": 0.27135675583410107, "learning_rate": 1.7651493788418671e-06, "loss": 0.4493, "step": 16180 }, { "epoch": 0.7323376329486309, "grad_norm": 0.6301878641061803, "learning_rate": 1.76459055007075e-06, "loss": 0.2961, "step": 16181 }, { "epoch": 0.7323828920570264, "grad_norm": 0.5330125153875653, "learning_rate": 1.7640317908186466e-06, "loss": 0.2704, "step": 16182 }, { "epoch": 0.732428151165422, "grad_norm": 0.6052744126769056, "learning_rate": 1.7634731010975603e-06, "loss": 0.3365, "step": 16183 }, { "epoch": 0.7324734102738176, "grad_norm": 0.7142932836569383, "learning_rate": 1.7629144809194982e-06, "loss": 0.2946, "step": 16184 }, { "epoch": 0.7325186693822132, "grad_norm": 0.637528726658327, "learning_rate": 1.762355930296462e-06, "loss": 0.3449, "step": 16185 }, { "epoch": 0.7325639284906087, "grad_norm": 0.6385320594102418, "learning_rate": 1.7617974492404517e-06, "loss": 0.3179, "step": 16186 }, { "epoch": 0.7326091875990043, "grad_norm": 0.3100847434791019, "learning_rate": 1.7612390377634685e-06, "loss": 0.487, "step": 16187 }, { "epoch": 0.7326544467073999, "grad_norm": 0.6732555146506458, "learning_rate": 1.7606806958775135e-06, "loss": 0.3023, "step": 16188 }, { "epoch": 0.7326997058157955, "grad_norm": 1.0851260304440182, "learning_rate": 1.7601224235945814e-06, "loss": 0.2677, "step": 16189 }, { "epoch": 0.732744964924191, "grad_norm": 0.5790100028298947, "learning_rate": 1.7595642209266656e-06, "loss": 0.358, "step": 16190 }, { "epoch": 0.7327902240325865, "grad_norm": 0.5527251623525239, "learning_rate": 1.7590060878857646e-06, "loss": 0.2689, "step": 16191 }, { "epoch": 0.7328354831409821, "grad_norm": 0.31178955263418023, "learning_rate": 1.7584480244838687e-06, "loss": 0.4709, "step": 16192 }, { "epoch": 0.7328807422493777, "grad_norm": 0.645658883416608, "learning_rate": 1.7578900307329677e-06, "loss": 0.3121, "step": 16193 }, { "epoch": 0.7329260013577733, "grad_norm": 0.6602342215994514, "learning_rate": 1.7573321066450521e-06, "loss": 0.32, "step": 16194 }, { "epoch": 0.7329712604661688, "grad_norm": 0.28188324345028737, "learning_rate": 1.7567742522321125e-06, "loss": 0.4472, "step": 16195 }, { "epoch": 0.7330165195745644, "grad_norm": 0.5970984254245914, "learning_rate": 1.7562164675061332e-06, "loss": 0.3067, "step": 16196 }, { "epoch": 0.73306177868296, "grad_norm": 0.5885991002760158, "learning_rate": 1.755658752479098e-06, "loss": 0.3314, "step": 16197 }, { "epoch": 0.7331070377913556, "grad_norm": 0.27455054719754596, "learning_rate": 1.7551011071629937e-06, "loss": 0.4673, "step": 16198 }, { "epoch": 0.733152296899751, "grad_norm": 0.564345589903344, "learning_rate": 1.7545435315697984e-06, "loss": 0.2751, "step": 16199 }, { "epoch": 0.7331975560081466, "grad_norm": 0.5819176536099305, "learning_rate": 1.7539860257114972e-06, "loss": 0.2864, "step": 16200 }, { "epoch": 0.7332428151165422, "grad_norm": 0.6644071129619711, "learning_rate": 1.7534285896000668e-06, "loss": 0.344, "step": 16201 }, { "epoch": 0.7332880742249378, "grad_norm": 0.9543976113779502, "learning_rate": 1.7528712232474832e-06, "loss": 0.302, "step": 16202 }, { "epoch": 0.7333333333333333, "grad_norm": 0.6011566578844129, "learning_rate": 1.7523139266657241e-06, "loss": 0.2844, "step": 16203 }, { "epoch": 0.7333785924417289, "grad_norm": 0.6951890360401429, "learning_rate": 1.7517566998667661e-06, "loss": 0.2943, "step": 16204 }, { "epoch": 0.7334238515501245, "grad_norm": 0.2817462091680761, "learning_rate": 1.7511995428625805e-06, "loss": 0.4641, "step": 16205 }, { "epoch": 0.73346911065852, "grad_norm": 0.28614243110327414, "learning_rate": 1.7506424556651368e-06, "loss": 0.4649, "step": 16206 }, { "epoch": 0.7335143697669156, "grad_norm": 0.2573959389343955, "learning_rate": 1.7500854382864073e-06, "loss": 0.4686, "step": 16207 }, { "epoch": 0.7335596288753111, "grad_norm": 0.6144018276945281, "learning_rate": 1.749528490738362e-06, "loss": 0.3207, "step": 16208 }, { "epoch": 0.7336048879837067, "grad_norm": 0.5710859253017799, "learning_rate": 1.7489716130329665e-06, "loss": 0.2789, "step": 16209 }, { "epoch": 0.7336501470921023, "grad_norm": 0.6540083390363107, "learning_rate": 1.7484148051821842e-06, "loss": 0.2682, "step": 16210 }, { "epoch": 0.7336954062004979, "grad_norm": 0.6419187298562828, "learning_rate": 1.7478580671979834e-06, "loss": 0.307, "step": 16211 }, { "epoch": 0.7337406653088934, "grad_norm": 0.552366230549885, "learning_rate": 1.7473013990923226e-06, "loss": 0.2631, "step": 16212 }, { "epoch": 0.733785924417289, "grad_norm": 0.6651943539308403, "learning_rate": 1.7467448008771664e-06, "loss": 0.2808, "step": 16213 }, { "epoch": 0.7338311835256845, "grad_norm": 0.6485848113037442, "learning_rate": 1.746188272564473e-06, "loss": 0.3085, "step": 16214 }, { "epoch": 0.7338764426340801, "grad_norm": 0.6131481732266474, "learning_rate": 1.7456318141661987e-06, "loss": 0.2773, "step": 16215 }, { "epoch": 0.7339217017424756, "grad_norm": 0.5674989271994176, "learning_rate": 1.7450754256943014e-06, "loss": 0.318, "step": 16216 }, { "epoch": 0.7339669608508712, "grad_norm": 0.2680277654815759, "learning_rate": 1.7445191071607386e-06, "loss": 0.4632, "step": 16217 }, { "epoch": 0.7340122199592668, "grad_norm": 0.5726163088083188, "learning_rate": 1.7439628585774614e-06, "loss": 0.3311, "step": 16218 }, { "epoch": 0.7340574790676624, "grad_norm": 0.5971453091407026, "learning_rate": 1.7434066799564204e-06, "loss": 0.2926, "step": 16219 }, { "epoch": 0.734102738176058, "grad_norm": 0.5955186786844018, "learning_rate": 1.74285057130957e-06, "loss": 0.2949, "step": 16220 }, { "epoch": 0.7341479972844535, "grad_norm": 0.5848454136822139, "learning_rate": 1.7422945326488555e-06, "loss": 0.2825, "step": 16221 }, { "epoch": 0.734193256392849, "grad_norm": 0.28955425891968584, "learning_rate": 1.7417385639862278e-06, "loss": 0.4347, "step": 16222 }, { "epoch": 0.7342385155012446, "grad_norm": 0.5873018904151689, "learning_rate": 1.7411826653336294e-06, "loss": 0.2977, "step": 16223 }, { "epoch": 0.7342837746096402, "grad_norm": 0.7384682886218776, "learning_rate": 1.7406268367030094e-06, "loss": 0.2365, "step": 16224 }, { "epoch": 0.7343290337180357, "grad_norm": 0.5949250394015495, "learning_rate": 1.7400710781063073e-06, "loss": 0.312, "step": 16225 }, { "epoch": 0.7343742928264313, "grad_norm": 0.6304591743920028, "learning_rate": 1.7395153895554646e-06, "loss": 0.354, "step": 16226 }, { "epoch": 0.7344195519348269, "grad_norm": 0.2623909581641417, "learning_rate": 1.7389597710624234e-06, "loss": 0.4543, "step": 16227 }, { "epoch": 0.7344648110432225, "grad_norm": 0.5992424361294328, "learning_rate": 1.73840422263912e-06, "loss": 0.2858, "step": 16228 }, { "epoch": 0.7345100701516181, "grad_norm": 0.6152299382781466, "learning_rate": 1.7378487442974946e-06, "loss": 0.3059, "step": 16229 }, { "epoch": 0.7345553292600135, "grad_norm": 0.6201435136046975, "learning_rate": 1.7372933360494803e-06, "loss": 0.2761, "step": 16230 }, { "epoch": 0.7346005883684091, "grad_norm": 0.5975736264648502, "learning_rate": 1.7367379979070098e-06, "loss": 0.2514, "step": 16231 }, { "epoch": 0.7346458474768047, "grad_norm": 0.6407013057308736, "learning_rate": 1.7361827298820177e-06, "loss": 0.2821, "step": 16232 }, { "epoch": 0.7346911065852003, "grad_norm": 0.5952618697201847, "learning_rate": 1.7356275319864363e-06, "loss": 0.2691, "step": 16233 }, { "epoch": 0.7347363656935958, "grad_norm": 0.7453342568149233, "learning_rate": 1.735072404232193e-06, "loss": 0.2683, "step": 16234 }, { "epoch": 0.7347816248019914, "grad_norm": 0.3013045199708562, "learning_rate": 1.7345173466312154e-06, "loss": 0.467, "step": 16235 }, { "epoch": 0.734826883910387, "grad_norm": 0.6086860644721339, "learning_rate": 1.7339623591954302e-06, "loss": 0.3008, "step": 16236 }, { "epoch": 0.7348721430187826, "grad_norm": 0.2812574722109745, "learning_rate": 1.7334074419367653e-06, "loss": 0.4836, "step": 16237 }, { "epoch": 0.734917402127178, "grad_norm": 0.6199272597355754, "learning_rate": 1.7328525948671415e-06, "loss": 0.33, "step": 16238 }, { "epoch": 0.7349626612355736, "grad_norm": 0.666636524721789, "learning_rate": 1.7322978179984794e-06, "loss": 0.3248, "step": 16239 }, { "epoch": 0.7350079203439692, "grad_norm": 0.5992083829293151, "learning_rate": 1.731743111342703e-06, "loss": 0.2746, "step": 16240 }, { "epoch": 0.7350531794523648, "grad_norm": 0.6130098920143602, "learning_rate": 1.731188474911728e-06, "loss": 0.315, "step": 16241 }, { "epoch": 0.7350984385607604, "grad_norm": 0.6134321032151977, "learning_rate": 1.7306339087174746e-06, "loss": 0.3267, "step": 16242 }, { "epoch": 0.7351436976691559, "grad_norm": 0.6233762873878859, "learning_rate": 1.7300794127718573e-06, "loss": 0.3262, "step": 16243 }, { "epoch": 0.7351889567775515, "grad_norm": 0.6017139780144443, "learning_rate": 1.7295249870867898e-06, "loss": 0.2899, "step": 16244 }, { "epoch": 0.7352342158859471, "grad_norm": 0.6050681339465913, "learning_rate": 1.728970631674185e-06, "loss": 0.2636, "step": 16245 }, { "epoch": 0.7352794749943427, "grad_norm": 0.24791910992335017, "learning_rate": 1.7284163465459568e-06, "loss": 0.4481, "step": 16246 }, { "epoch": 0.7353247341027381, "grad_norm": 0.24852137458762985, "learning_rate": 1.7278621317140138e-06, "loss": 0.4439, "step": 16247 }, { "epoch": 0.7353699932111337, "grad_norm": 0.6095744267748847, "learning_rate": 1.727307987190262e-06, "loss": 0.2865, "step": 16248 }, { "epoch": 0.7354152523195293, "grad_norm": 0.6017360995143893, "learning_rate": 1.7267539129866107e-06, "loss": 0.2863, "step": 16249 }, { "epoch": 0.7354605114279249, "grad_norm": 0.6757623548504654, "learning_rate": 1.7261999091149662e-06, "loss": 0.3323, "step": 16250 }, { "epoch": 0.7355057705363204, "grad_norm": 0.5997681786802973, "learning_rate": 1.7256459755872306e-06, "loss": 0.2743, "step": 16251 }, { "epoch": 0.735551029644716, "grad_norm": 0.6532107189741281, "learning_rate": 1.7250921124153057e-06, "loss": 0.3206, "step": 16252 }, { "epoch": 0.7355962887531116, "grad_norm": 0.3912425490997418, "learning_rate": 1.7245383196110944e-06, "loss": 0.4662, "step": 16253 }, { "epoch": 0.7356415478615072, "grad_norm": 0.6560478513281396, "learning_rate": 1.7239845971864932e-06, "loss": 0.2901, "step": 16254 }, { "epoch": 0.7356868069699027, "grad_norm": 0.6498356637789432, "learning_rate": 1.7234309451534032e-06, "loss": 0.2927, "step": 16255 }, { "epoch": 0.7357320660782982, "grad_norm": 0.27313198293460317, "learning_rate": 1.7228773635237183e-06, "loss": 0.4435, "step": 16256 }, { "epoch": 0.7357773251866938, "grad_norm": 0.6455232477977734, "learning_rate": 1.7223238523093334e-06, "loss": 0.3067, "step": 16257 }, { "epoch": 0.7358225842950894, "grad_norm": 0.2594817494985136, "learning_rate": 1.7217704115221417e-06, "loss": 0.4774, "step": 16258 }, { "epoch": 0.735867843403485, "grad_norm": 0.6079411337663703, "learning_rate": 1.7212170411740386e-06, "loss": 0.2811, "step": 16259 }, { "epoch": 0.7359131025118805, "grad_norm": 0.6617218118446594, "learning_rate": 1.7206637412769084e-06, "loss": 0.2982, "step": 16260 }, { "epoch": 0.7359583616202761, "grad_norm": 0.6300539343632511, "learning_rate": 1.7201105118426425e-06, "loss": 0.3082, "step": 16261 }, { "epoch": 0.7360036207286716, "grad_norm": 0.7164263661192096, "learning_rate": 1.71955735288313e-06, "loss": 0.2977, "step": 16262 }, { "epoch": 0.7360488798370672, "grad_norm": 0.6183776534457194, "learning_rate": 1.719004264410255e-06, "loss": 0.2782, "step": 16263 }, { "epoch": 0.7360941389454628, "grad_norm": 0.6105854472134217, "learning_rate": 1.7184512464358998e-06, "loss": 0.2975, "step": 16264 }, { "epoch": 0.7361393980538583, "grad_norm": 0.27064198134083034, "learning_rate": 1.717898298971949e-06, "loss": 0.4445, "step": 16265 }, { "epoch": 0.7361846571622539, "grad_norm": 0.6112261479619383, "learning_rate": 1.717345422030285e-06, "loss": 0.3312, "step": 16266 }, { "epoch": 0.7362299162706495, "grad_norm": 0.5993561678079502, "learning_rate": 1.7167926156227854e-06, "loss": 0.2999, "step": 16267 }, { "epoch": 0.7362751753790451, "grad_norm": 0.2625199804522994, "learning_rate": 1.7162398797613284e-06, "loss": 0.4422, "step": 16268 }, { "epoch": 0.7363204344874406, "grad_norm": 0.6146855148656558, "learning_rate": 1.7156872144577918e-06, "loss": 0.3353, "step": 16269 }, { "epoch": 0.7363656935958361, "grad_norm": 0.5542848996872033, "learning_rate": 1.7151346197240486e-06, "loss": 0.2922, "step": 16270 }, { "epoch": 0.7364109527042317, "grad_norm": 0.2699452576598286, "learning_rate": 1.7145820955719755e-06, "loss": 0.4661, "step": 16271 }, { "epoch": 0.7364562118126273, "grad_norm": 0.6130827450264918, "learning_rate": 1.7140296420134428e-06, "loss": 0.2892, "step": 16272 }, { "epoch": 0.7365014709210228, "grad_norm": 0.2513562381783873, "learning_rate": 1.7134772590603193e-06, "loss": 0.459, "step": 16273 }, { "epoch": 0.7365467300294184, "grad_norm": 0.5855918035070893, "learning_rate": 1.7129249467244758e-06, "loss": 0.281, "step": 16274 }, { "epoch": 0.736591989137814, "grad_norm": 0.6899883521801347, "learning_rate": 1.7123727050177808e-06, "loss": 0.3348, "step": 16275 }, { "epoch": 0.7366372482462096, "grad_norm": 0.6239946463347905, "learning_rate": 1.7118205339520999e-06, "loss": 0.3188, "step": 16276 }, { "epoch": 0.7366825073546052, "grad_norm": 0.27240169811472975, "learning_rate": 1.7112684335392948e-06, "loss": 0.4644, "step": 16277 }, { "epoch": 0.7367277664630006, "grad_norm": 0.6059417313635661, "learning_rate": 1.7107164037912305e-06, "loss": 0.3083, "step": 16278 }, { "epoch": 0.7367730255713962, "grad_norm": 0.6361738820039944, "learning_rate": 1.7101644447197702e-06, "loss": 0.3065, "step": 16279 }, { "epoch": 0.7368182846797918, "grad_norm": 0.6360595017871099, "learning_rate": 1.7096125563367722e-06, "loss": 0.2608, "step": 16280 }, { "epoch": 0.7368635437881874, "grad_norm": 0.6382952432671731, "learning_rate": 1.709060738654093e-06, "loss": 0.3589, "step": 16281 }, { "epoch": 0.7369088028965829, "grad_norm": 0.6057322972207433, "learning_rate": 1.7085089916835924e-06, "loss": 0.2809, "step": 16282 }, { "epoch": 0.7369540620049785, "grad_norm": 0.2639576838682394, "learning_rate": 1.7079573154371233e-06, "loss": 0.4449, "step": 16283 }, { "epoch": 0.7369993211133741, "grad_norm": 0.259451887808222, "learning_rate": 1.7074057099265422e-06, "loss": 0.4703, "step": 16284 }, { "epoch": 0.7370445802217697, "grad_norm": 0.6382920149509974, "learning_rate": 1.7068541751637001e-06, "loss": 0.3055, "step": 16285 }, { "epoch": 0.7370898393301651, "grad_norm": 0.6298061371883908, "learning_rate": 1.7063027111604457e-06, "loss": 0.3105, "step": 16286 }, { "epoch": 0.7371350984385607, "grad_norm": 0.5901058915221602, "learning_rate": 1.7057513179286305e-06, "loss": 0.2964, "step": 16287 }, { "epoch": 0.7371803575469563, "grad_norm": 0.6692352016470466, "learning_rate": 1.7051999954801058e-06, "loss": 0.3239, "step": 16288 }, { "epoch": 0.7372256166553519, "grad_norm": 0.5637542321209367, "learning_rate": 1.7046487438267101e-06, "loss": 0.2878, "step": 16289 }, { "epoch": 0.7372708757637475, "grad_norm": 0.6137312238539702, "learning_rate": 1.704097562980292e-06, "loss": 0.3092, "step": 16290 }, { "epoch": 0.737316134872143, "grad_norm": 0.6104309860060394, "learning_rate": 1.7035464529526963e-06, "loss": 0.2703, "step": 16291 }, { "epoch": 0.7373613939805386, "grad_norm": 0.2693494755988427, "learning_rate": 1.702995413755763e-06, "loss": 0.4814, "step": 16292 }, { "epoch": 0.7374066530889342, "grad_norm": 0.6943479505825775, "learning_rate": 1.7024444454013305e-06, "loss": 0.2872, "step": 16293 }, { "epoch": 0.7374519121973298, "grad_norm": 0.5915976030744456, "learning_rate": 1.7018935479012394e-06, "loss": 0.2772, "step": 16294 }, { "epoch": 0.7374971713057252, "grad_norm": 0.6959850127902959, "learning_rate": 1.7013427212673285e-06, "loss": 0.262, "step": 16295 }, { "epoch": 0.7375424304141208, "grad_norm": 0.2816393378161216, "learning_rate": 1.7007919655114314e-06, "loss": 0.4919, "step": 16296 }, { "epoch": 0.7375876895225164, "grad_norm": 0.28639396589527855, "learning_rate": 1.7002412806453799e-06, "loss": 0.4763, "step": 16297 }, { "epoch": 0.737632948630912, "grad_norm": 0.610734186823621, "learning_rate": 1.6996906666810116e-06, "loss": 0.2929, "step": 16298 }, { "epoch": 0.7376782077393076, "grad_norm": 0.650120453481213, "learning_rate": 1.699140123630152e-06, "loss": 0.2951, "step": 16299 }, { "epoch": 0.7377234668477031, "grad_norm": 0.27258678959918653, "learning_rate": 1.6985896515046357e-06, "loss": 0.4629, "step": 16300 }, { "epoch": 0.7377687259560987, "grad_norm": 0.6129395204170821, "learning_rate": 1.698039250316288e-06, "loss": 0.2871, "step": 16301 }, { "epoch": 0.7378139850644942, "grad_norm": 0.5771135278717086, "learning_rate": 1.697488920076934e-06, "loss": 0.2952, "step": 16302 }, { "epoch": 0.7378592441728898, "grad_norm": 0.6090117336444172, "learning_rate": 1.6969386607984e-06, "loss": 0.3341, "step": 16303 }, { "epoch": 0.7379045032812853, "grad_norm": 0.6698203949048338, "learning_rate": 1.6963884724925116e-06, "loss": 0.3434, "step": 16304 }, { "epoch": 0.7379497623896809, "grad_norm": 0.5727007389889912, "learning_rate": 1.6958383551710888e-06, "loss": 0.2917, "step": 16305 }, { "epoch": 0.7379950214980765, "grad_norm": 0.6287283582218592, "learning_rate": 1.6952883088459498e-06, "loss": 0.2911, "step": 16306 }, { "epoch": 0.7380402806064721, "grad_norm": 0.6010885159430064, "learning_rate": 1.6947383335289152e-06, "loss": 0.3169, "step": 16307 }, { "epoch": 0.7380855397148676, "grad_norm": 0.5694010188110677, "learning_rate": 1.6941884292318044e-06, "loss": 0.339, "step": 16308 }, { "epoch": 0.7381307988232632, "grad_norm": 0.6348745967305999, "learning_rate": 1.6936385959664315e-06, "loss": 0.2999, "step": 16309 }, { "epoch": 0.7381760579316587, "grad_norm": 0.609076028085305, "learning_rate": 1.6930888337446082e-06, "loss": 0.3054, "step": 16310 }, { "epoch": 0.7382213170400543, "grad_norm": 0.6158342661208451, "learning_rate": 1.6925391425781519e-06, "loss": 0.2698, "step": 16311 }, { "epoch": 0.7382665761484499, "grad_norm": 0.6013018213101735, "learning_rate": 1.691989522478869e-06, "loss": 0.2937, "step": 16312 }, { "epoch": 0.7383118352568454, "grad_norm": 0.5464993799583493, "learning_rate": 1.6914399734585735e-06, "loss": 0.298, "step": 16313 }, { "epoch": 0.738357094365241, "grad_norm": 0.5666297376449007, "learning_rate": 1.690890495529071e-06, "loss": 0.3046, "step": 16314 }, { "epoch": 0.7384023534736366, "grad_norm": 0.26592206593075024, "learning_rate": 1.6903410887021676e-06, "loss": 0.4429, "step": 16315 }, { "epoch": 0.7384476125820322, "grad_norm": 0.6022086141416868, "learning_rate": 1.6897917529896691e-06, "loss": 0.2991, "step": 16316 }, { "epoch": 0.7384928716904277, "grad_norm": 0.5292050781665859, "learning_rate": 1.6892424884033825e-06, "loss": 0.2886, "step": 16317 }, { "epoch": 0.7385381307988232, "grad_norm": 0.31019800261565705, "learning_rate": 1.6886932949551032e-06, "loss": 0.4588, "step": 16318 }, { "epoch": 0.7385833899072188, "grad_norm": 0.5384154654615427, "learning_rate": 1.6881441726566355e-06, "loss": 0.2842, "step": 16319 }, { "epoch": 0.7386286490156144, "grad_norm": 0.25599225115318863, "learning_rate": 1.6875951215197779e-06, "loss": 0.4539, "step": 16320 }, { "epoch": 0.7386739081240099, "grad_norm": 0.6232868132447051, "learning_rate": 1.6870461415563311e-06, "loss": 0.2836, "step": 16321 }, { "epoch": 0.7387191672324055, "grad_norm": 0.5879990085252694, "learning_rate": 1.6864972327780842e-06, "loss": 0.273, "step": 16322 }, { "epoch": 0.7387644263408011, "grad_norm": 0.6348301993005881, "learning_rate": 1.6859483951968353e-06, "loss": 0.2917, "step": 16323 }, { "epoch": 0.7388096854491967, "grad_norm": 0.6169572468167954, "learning_rate": 1.6853996288243785e-06, "loss": 0.2771, "step": 16324 }, { "epoch": 0.7388549445575923, "grad_norm": 0.5949852175498666, "learning_rate": 1.6848509336725039e-06, "loss": 0.2983, "step": 16325 }, { "epoch": 0.7389002036659877, "grad_norm": 0.5970094415996532, "learning_rate": 1.6843023097529993e-06, "loss": 0.2544, "step": 16326 }, { "epoch": 0.7389454627743833, "grad_norm": 0.2664461704857231, "learning_rate": 1.6837537570776563e-06, "loss": 0.4461, "step": 16327 }, { "epoch": 0.7389907218827789, "grad_norm": 0.6167237480226018, "learning_rate": 1.6832052756582583e-06, "loss": 0.3353, "step": 16328 }, { "epoch": 0.7390359809911745, "grad_norm": 0.6651092051598921, "learning_rate": 1.682656865506594e-06, "loss": 0.3018, "step": 16329 }, { "epoch": 0.73908124009957, "grad_norm": 0.5786650897321024, "learning_rate": 1.682108526634445e-06, "loss": 0.2956, "step": 16330 }, { "epoch": 0.7391264992079656, "grad_norm": 0.7224670401316967, "learning_rate": 1.6815602590535923e-06, "loss": 0.3015, "step": 16331 }, { "epoch": 0.7391717583163612, "grad_norm": 0.590046763536563, "learning_rate": 1.6810120627758176e-06, "loss": 0.3501, "step": 16332 }, { "epoch": 0.7392170174247568, "grad_norm": 0.6788892856512094, "learning_rate": 1.6804639378129017e-06, "loss": 0.2926, "step": 16333 }, { "epoch": 0.7392622765331524, "grad_norm": 0.5922790130333325, "learning_rate": 1.6799158841766206e-06, "loss": 0.2937, "step": 16334 }, { "epoch": 0.7393075356415478, "grad_norm": 0.5851526631177592, "learning_rate": 1.679367901878749e-06, "loss": 0.2815, "step": 16335 }, { "epoch": 0.7393527947499434, "grad_norm": 0.6265131905703484, "learning_rate": 1.6788199909310626e-06, "loss": 0.3148, "step": 16336 }, { "epoch": 0.739398053858339, "grad_norm": 0.5723849009706529, "learning_rate": 1.6782721513453353e-06, "loss": 0.309, "step": 16337 }, { "epoch": 0.7394433129667346, "grad_norm": 0.6314188202897429, "learning_rate": 1.6777243831333383e-06, "loss": 0.3172, "step": 16338 }, { "epoch": 0.7394885720751301, "grad_norm": 0.5981352108047535, "learning_rate": 1.6771766863068389e-06, "loss": 0.2789, "step": 16339 }, { "epoch": 0.7395338311835257, "grad_norm": 1.6972236802070124, "learning_rate": 1.6766290608776093e-06, "loss": 0.4823, "step": 16340 }, { "epoch": 0.7395790902919213, "grad_norm": 0.6993751371813014, "learning_rate": 1.6760815068574116e-06, "loss": 0.3213, "step": 16341 }, { "epoch": 0.7396243494003168, "grad_norm": 0.31186821878598653, "learning_rate": 1.6755340242580158e-06, "loss": 0.4798, "step": 16342 }, { "epoch": 0.7396696085087123, "grad_norm": 0.647280432904068, "learning_rate": 1.674986613091184e-06, "loss": 0.3498, "step": 16343 }, { "epoch": 0.7397148676171079, "grad_norm": 0.9307783766001247, "learning_rate": 1.6744392733686754e-06, "loss": 0.2766, "step": 16344 }, { "epoch": 0.7397601267255035, "grad_norm": 0.3135811131714723, "learning_rate": 1.673892005102254e-06, "loss": 0.4809, "step": 16345 }, { "epoch": 0.7398053858338991, "grad_norm": 0.7435748301186856, "learning_rate": 1.6733448083036806e-06, "loss": 0.2704, "step": 16346 }, { "epoch": 0.7398506449422947, "grad_norm": 0.28471121810758254, "learning_rate": 1.6727976829847075e-06, "loss": 0.4669, "step": 16347 }, { "epoch": 0.7398959040506902, "grad_norm": 0.590816121161494, "learning_rate": 1.6722506291570929e-06, "loss": 0.2602, "step": 16348 }, { "epoch": 0.7399411631590858, "grad_norm": 0.5922176862394455, "learning_rate": 1.671703646832592e-06, "loss": 0.269, "step": 16349 }, { "epoch": 0.7399864222674813, "grad_norm": 0.633108164986941, "learning_rate": 1.6711567360229613e-06, "loss": 0.2886, "step": 16350 }, { "epoch": 0.7400316813758769, "grad_norm": 0.25987291362248266, "learning_rate": 1.6706098967399454e-06, "loss": 0.4559, "step": 16351 }, { "epoch": 0.7400769404842724, "grad_norm": 0.672889853871863, "learning_rate": 1.6700631289952967e-06, "loss": 0.2582, "step": 16352 }, { "epoch": 0.740122199592668, "grad_norm": 0.6149642362793232, "learning_rate": 1.6695164328007663e-06, "loss": 0.307, "step": 16353 }, { "epoch": 0.7401674587010636, "grad_norm": 0.637888793193678, "learning_rate": 1.6689698081680988e-06, "loss": 0.298, "step": 16354 }, { "epoch": 0.7402127178094592, "grad_norm": 0.629594169843332, "learning_rate": 1.6684232551090385e-06, "loss": 0.3063, "step": 16355 }, { "epoch": 0.7402579769178547, "grad_norm": 0.5993736230807224, "learning_rate": 1.6678767736353313e-06, "loss": 0.2921, "step": 16356 }, { "epoch": 0.7403032360262503, "grad_norm": 0.6533709936406462, "learning_rate": 1.6673303637587169e-06, "loss": 0.2944, "step": 16357 }, { "epoch": 0.7403484951346458, "grad_norm": 0.6279231132277909, "learning_rate": 1.6667840254909395e-06, "loss": 0.2746, "step": 16358 }, { "epoch": 0.7403937542430414, "grad_norm": 0.6895537865960341, "learning_rate": 1.6662377588437356e-06, "loss": 0.3198, "step": 16359 }, { "epoch": 0.740439013351437, "grad_norm": 0.5613014329500297, "learning_rate": 1.6656915638288423e-06, "loss": 0.2509, "step": 16360 }, { "epoch": 0.7404842724598325, "grad_norm": 0.5244703265777766, "learning_rate": 1.6651454404579965e-06, "loss": 0.2772, "step": 16361 }, { "epoch": 0.7405295315682281, "grad_norm": 0.6434737155875516, "learning_rate": 1.6645993887429345e-06, "loss": 0.2939, "step": 16362 }, { "epoch": 0.7405747906766237, "grad_norm": 0.27442691120208346, "learning_rate": 1.664053408695388e-06, "loss": 0.4551, "step": 16363 }, { "epoch": 0.7406200497850193, "grad_norm": 0.6432461688130403, "learning_rate": 1.6635075003270861e-06, "loss": 0.3343, "step": 16364 }, { "epoch": 0.7406653088934148, "grad_norm": 0.6585033459543134, "learning_rate": 1.6629616636497615e-06, "loss": 0.306, "step": 16365 }, { "epoch": 0.7407105680018103, "grad_norm": 0.28741188531833256, "learning_rate": 1.6624158986751427e-06, "loss": 0.4308, "step": 16366 }, { "epoch": 0.7407558271102059, "grad_norm": 0.6272621824628382, "learning_rate": 1.661870205414956e-06, "loss": 0.2924, "step": 16367 }, { "epoch": 0.7408010862186015, "grad_norm": 0.6357340295514764, "learning_rate": 1.6613245838809244e-06, "loss": 0.3293, "step": 16368 }, { "epoch": 0.7408463453269971, "grad_norm": 0.586649855741785, "learning_rate": 1.6607790340847757e-06, "loss": 0.2847, "step": 16369 }, { "epoch": 0.7408916044353926, "grad_norm": 0.6519309610805597, "learning_rate": 1.6602335560382276e-06, "loss": 0.3355, "step": 16370 }, { "epoch": 0.7409368635437882, "grad_norm": 0.27314375144628994, "learning_rate": 1.6596881497530054e-06, "loss": 0.4759, "step": 16371 }, { "epoch": 0.7409821226521838, "grad_norm": 0.6334847302860123, "learning_rate": 1.6591428152408256e-06, "loss": 0.3212, "step": 16372 }, { "epoch": 0.7410273817605794, "grad_norm": 0.6054026655609173, "learning_rate": 1.6585975525134041e-06, "loss": 0.2976, "step": 16373 }, { "epoch": 0.7410726408689748, "grad_norm": 0.6606486067332609, "learning_rate": 1.658052361582459e-06, "loss": 0.3441, "step": 16374 }, { "epoch": 0.7411178999773704, "grad_norm": 0.7073574769713497, "learning_rate": 1.6575072424597083e-06, "loss": 0.3342, "step": 16375 }, { "epoch": 0.741163159085766, "grad_norm": 0.6335308387328569, "learning_rate": 1.6569621951568575e-06, "loss": 0.2961, "step": 16376 }, { "epoch": 0.7412084181941616, "grad_norm": 0.5610231362990775, "learning_rate": 1.6564172196856222e-06, "loss": 0.2878, "step": 16377 }, { "epoch": 0.7412536773025571, "grad_norm": 0.5774696662726435, "learning_rate": 1.6558723160577118e-06, "loss": 0.2611, "step": 16378 }, { "epoch": 0.7412989364109527, "grad_norm": 0.6121664560415999, "learning_rate": 1.655327484284837e-06, "loss": 0.3021, "step": 16379 }, { "epoch": 0.7413441955193483, "grad_norm": 0.6178927095424993, "learning_rate": 1.6547827243787002e-06, "loss": 0.2902, "step": 16380 }, { "epoch": 0.7413894546277439, "grad_norm": 0.28287751021161356, "learning_rate": 1.654238036351008e-06, "loss": 0.4395, "step": 16381 }, { "epoch": 0.7414347137361395, "grad_norm": 0.598779619002262, "learning_rate": 1.6536934202134663e-06, "loss": 0.3038, "step": 16382 }, { "epoch": 0.7414799728445349, "grad_norm": 0.5969691615599192, "learning_rate": 1.6531488759777753e-06, "loss": 0.2914, "step": 16383 }, { "epoch": 0.7415252319529305, "grad_norm": 0.667321425427489, "learning_rate": 1.6526044036556349e-06, "loss": 0.2809, "step": 16384 }, { "epoch": 0.7415704910613261, "grad_norm": 0.2795974705144027, "learning_rate": 1.6520600032587464e-06, "loss": 0.4745, "step": 16385 }, { "epoch": 0.7416157501697217, "grad_norm": 0.7550919144344835, "learning_rate": 1.6515156747988043e-06, "loss": 0.2814, "step": 16386 }, { "epoch": 0.7416610092781172, "grad_norm": 0.6126708197070092, "learning_rate": 1.650971418287508e-06, "loss": 0.2722, "step": 16387 }, { "epoch": 0.7417062683865128, "grad_norm": 0.6412752477864133, "learning_rate": 1.6504272337365501e-06, "loss": 0.3413, "step": 16388 }, { "epoch": 0.7417515274949084, "grad_norm": 0.2807606489818299, "learning_rate": 1.6498831211576222e-06, "loss": 0.5012, "step": 16389 }, { "epoch": 0.741796786603304, "grad_norm": 0.27978365250473713, "learning_rate": 1.6493390805624165e-06, "loss": 0.4903, "step": 16390 }, { "epoch": 0.7418420457116994, "grad_norm": 0.5874638050564923, "learning_rate": 1.648795111962625e-06, "loss": 0.3449, "step": 16391 }, { "epoch": 0.741887304820095, "grad_norm": 0.24334727858073832, "learning_rate": 1.6482512153699344e-06, "loss": 0.4518, "step": 16392 }, { "epoch": 0.7419325639284906, "grad_norm": 0.6706376008021124, "learning_rate": 1.647707390796029e-06, "loss": 0.3001, "step": 16393 }, { "epoch": 0.7419778230368862, "grad_norm": 0.581696236394703, "learning_rate": 1.6471636382525963e-06, "loss": 0.3018, "step": 16394 }, { "epoch": 0.7420230821452818, "grad_norm": 0.6585204969144176, "learning_rate": 1.6466199577513209e-06, "loss": 0.2851, "step": 16395 }, { "epoch": 0.7420683412536773, "grad_norm": 0.2582143407608148, "learning_rate": 1.646076349303884e-06, "loss": 0.4776, "step": 16396 }, { "epoch": 0.7421136003620729, "grad_norm": 0.5990806798374867, "learning_rate": 1.6455328129219634e-06, "loss": 0.2621, "step": 16397 }, { "epoch": 0.7421588594704684, "grad_norm": 0.5858539733747705, "learning_rate": 1.6449893486172418e-06, "loss": 0.3084, "step": 16398 }, { "epoch": 0.742204118578864, "grad_norm": 0.6423462532100619, "learning_rate": 1.6444459564013938e-06, "loss": 0.3453, "step": 16399 }, { "epoch": 0.7422493776872595, "grad_norm": 0.6999659736527946, "learning_rate": 1.6439026362860977e-06, "loss": 0.3212, "step": 16400 }, { "epoch": 0.7422946367956551, "grad_norm": 0.6350926523497435, "learning_rate": 1.6433593882830262e-06, "loss": 0.2644, "step": 16401 }, { "epoch": 0.7423398959040507, "grad_norm": 0.6700380593625176, "learning_rate": 1.642816212403851e-06, "loss": 0.2781, "step": 16402 }, { "epoch": 0.7423851550124463, "grad_norm": 0.5725905632375857, "learning_rate": 1.642273108660245e-06, "loss": 0.3064, "step": 16403 }, { "epoch": 0.7424304141208418, "grad_norm": 0.30964321741467477, "learning_rate": 1.6417300770638784e-06, "loss": 0.4693, "step": 16404 }, { "epoch": 0.7424756732292374, "grad_norm": 0.6382625480112772, "learning_rate": 1.6411871176264188e-06, "loss": 0.3223, "step": 16405 }, { "epoch": 0.7425209323376329, "grad_norm": 0.7372807946732123, "learning_rate": 1.6406442303595305e-06, "loss": 0.2886, "step": 16406 }, { "epoch": 0.7425661914460285, "grad_norm": 0.6382685159478572, "learning_rate": 1.6401014152748801e-06, "loss": 0.249, "step": 16407 }, { "epoch": 0.7426114505544241, "grad_norm": 0.5808086404162707, "learning_rate": 1.6395586723841328e-06, "loss": 0.3053, "step": 16408 }, { "epoch": 0.7426567096628196, "grad_norm": 0.30327634396753367, "learning_rate": 1.6390160016989487e-06, "loss": 0.5011, "step": 16409 }, { "epoch": 0.7427019687712152, "grad_norm": 0.5749949314930741, "learning_rate": 1.6384734032309868e-06, "loss": 0.2838, "step": 16410 }, { "epoch": 0.7427472278796108, "grad_norm": 0.8120256752960093, "learning_rate": 1.6379308769919084e-06, "loss": 0.2751, "step": 16411 }, { "epoch": 0.7427924869880064, "grad_norm": 0.2966374383743913, "learning_rate": 1.63738842299337e-06, "loss": 0.4333, "step": 16412 }, { "epoch": 0.7428377460964019, "grad_norm": 0.817417467052321, "learning_rate": 1.6368460412470255e-06, "loss": 0.2531, "step": 16413 }, { "epoch": 0.7428830052047974, "grad_norm": 0.6579780976381642, "learning_rate": 1.636303731764532e-06, "loss": 0.2897, "step": 16414 }, { "epoch": 0.742928264313193, "grad_norm": 0.532435367077427, "learning_rate": 1.635761494557539e-06, "loss": 0.2566, "step": 16415 }, { "epoch": 0.7429735234215886, "grad_norm": 0.573111185664909, "learning_rate": 1.6352193296377006e-06, "loss": 0.3194, "step": 16416 }, { "epoch": 0.7430187825299842, "grad_norm": 0.6146707134736058, "learning_rate": 1.6346772370166646e-06, "loss": 0.2936, "step": 16417 }, { "epoch": 0.7430640416383797, "grad_norm": 0.6074305357766798, "learning_rate": 1.634135216706077e-06, "loss": 0.266, "step": 16418 }, { "epoch": 0.7431093007467753, "grad_norm": 0.5672575122908394, "learning_rate": 1.6335932687175865e-06, "loss": 0.3097, "step": 16419 }, { "epoch": 0.7431545598551709, "grad_norm": 0.27175533404918323, "learning_rate": 1.6330513930628389e-06, "loss": 0.4583, "step": 16420 }, { "epoch": 0.7431998189635665, "grad_norm": 0.27517315541514614, "learning_rate": 1.6325095897534765e-06, "loss": 0.4705, "step": 16421 }, { "epoch": 0.7432450780719619, "grad_norm": 0.6187333387559553, "learning_rate": 1.6319678588011385e-06, "loss": 0.3075, "step": 16422 }, { "epoch": 0.7432903371803575, "grad_norm": 0.5779202963904356, "learning_rate": 1.6314262002174674e-06, "loss": 0.2521, "step": 16423 }, { "epoch": 0.7433355962887531, "grad_norm": 0.7523310961274872, "learning_rate": 1.6308846140141027e-06, "loss": 0.2957, "step": 16424 }, { "epoch": 0.7433808553971487, "grad_norm": 0.5847693172280757, "learning_rate": 1.630343100202681e-06, "loss": 0.2476, "step": 16425 }, { "epoch": 0.7434261145055442, "grad_norm": 0.6238932100164696, "learning_rate": 1.6298016587948345e-06, "loss": 0.3456, "step": 16426 }, { "epoch": 0.7434713736139398, "grad_norm": 0.5644616877249133, "learning_rate": 1.6292602898022015e-06, "loss": 0.2797, "step": 16427 }, { "epoch": 0.7435166327223354, "grad_norm": 0.6019491578971776, "learning_rate": 1.6287189932364106e-06, "loss": 0.287, "step": 16428 }, { "epoch": 0.743561891830731, "grad_norm": 0.5979609372657156, "learning_rate": 1.6281777691090966e-06, "loss": 0.3096, "step": 16429 }, { "epoch": 0.7436071509391265, "grad_norm": 0.5681120904040521, "learning_rate": 1.6276366174318865e-06, "loss": 0.2774, "step": 16430 }, { "epoch": 0.743652410047522, "grad_norm": 0.6291721807245989, "learning_rate": 1.627095538216406e-06, "loss": 0.2919, "step": 16431 }, { "epoch": 0.7436976691559176, "grad_norm": 0.6375260230038594, "learning_rate": 1.6265545314742838e-06, "loss": 0.2693, "step": 16432 }, { "epoch": 0.7437429282643132, "grad_norm": 0.6516953368882744, "learning_rate": 1.6260135972171448e-06, "loss": 0.3493, "step": 16433 }, { "epoch": 0.7437881873727088, "grad_norm": 0.6326681888223729, "learning_rate": 1.625472735456612e-06, "loss": 0.2763, "step": 16434 }, { "epoch": 0.7438334464811043, "grad_norm": 0.6176613716364594, "learning_rate": 1.6249319462043039e-06, "loss": 0.2825, "step": 16435 }, { "epoch": 0.7438787055894999, "grad_norm": 0.6120209281474143, "learning_rate": 1.6243912294718428e-06, "loss": 0.2937, "step": 16436 }, { "epoch": 0.7439239646978955, "grad_norm": 0.5795229600915072, "learning_rate": 1.6238505852708481e-06, "loss": 0.28, "step": 16437 }, { "epoch": 0.743969223806291, "grad_norm": 0.617044457533017, "learning_rate": 1.623310013612936e-06, "loss": 0.3315, "step": 16438 }, { "epoch": 0.7440144829146865, "grad_norm": 0.6151415354863083, "learning_rate": 1.622769514509719e-06, "loss": 0.2776, "step": 16439 }, { "epoch": 0.7440597420230821, "grad_norm": 0.6251360148196247, "learning_rate": 1.6222290879728142e-06, "loss": 0.2804, "step": 16440 }, { "epoch": 0.7441050011314777, "grad_norm": 0.6917015257469193, "learning_rate": 1.6216887340138304e-06, "loss": 0.2605, "step": 16441 }, { "epoch": 0.7441502602398733, "grad_norm": 0.616749212638954, "learning_rate": 1.621148452644382e-06, "loss": 0.3526, "step": 16442 }, { "epoch": 0.7441955193482689, "grad_norm": 0.2708142105961081, "learning_rate": 1.6206082438760762e-06, "loss": 0.4795, "step": 16443 }, { "epoch": 0.7442407784566644, "grad_norm": 0.5801112566416491, "learning_rate": 1.6200681077205182e-06, "loss": 0.2995, "step": 16444 }, { "epoch": 0.74428603756506, "grad_norm": 0.5712979072266995, "learning_rate": 1.619528044189318e-06, "loss": 0.2847, "step": 16445 }, { "epoch": 0.7443312966734555, "grad_norm": 0.29290842989103355, "learning_rate": 1.6189880532940772e-06, "loss": 0.4951, "step": 16446 }, { "epoch": 0.7443765557818511, "grad_norm": 0.2558401143062855, "learning_rate": 1.6184481350463976e-06, "loss": 0.455, "step": 16447 }, { "epoch": 0.7444218148902466, "grad_norm": 0.7829357141843611, "learning_rate": 1.6179082894578824e-06, "loss": 0.3241, "step": 16448 }, { "epoch": 0.7444670739986422, "grad_norm": 0.6261295834024594, "learning_rate": 1.617368516540132e-06, "loss": 0.2901, "step": 16449 }, { "epoch": 0.7445123331070378, "grad_norm": 0.26736031489859946, "learning_rate": 1.6168288163047434e-06, "loss": 0.4512, "step": 16450 }, { "epoch": 0.7445575922154334, "grad_norm": 0.6085865074985845, "learning_rate": 1.6162891887633114e-06, "loss": 0.2953, "step": 16451 }, { "epoch": 0.744602851323829, "grad_norm": 0.5997255472378802, "learning_rate": 1.615749633927432e-06, "loss": 0.2951, "step": 16452 }, { "epoch": 0.7446481104322245, "grad_norm": 0.6290365174173874, "learning_rate": 1.615210151808701e-06, "loss": 0.2781, "step": 16453 }, { "epoch": 0.74469336954062, "grad_norm": 0.6093195173585996, "learning_rate": 1.6146707424187086e-06, "loss": 0.2618, "step": 16454 }, { "epoch": 0.7447386286490156, "grad_norm": 0.6563387860435451, "learning_rate": 1.6141314057690426e-06, "loss": 0.3281, "step": 16455 }, { "epoch": 0.7447838877574112, "grad_norm": 0.5755936718531313, "learning_rate": 1.6135921418712959e-06, "loss": 0.2966, "step": 16456 }, { "epoch": 0.7448291468658067, "grad_norm": 0.26943398549730296, "learning_rate": 1.6130529507370513e-06, "loss": 0.4748, "step": 16457 }, { "epoch": 0.7448744059742023, "grad_norm": 0.6445979510026212, "learning_rate": 1.6125138323778983e-06, "loss": 0.3227, "step": 16458 }, { "epoch": 0.7449196650825979, "grad_norm": 0.6014424196667216, "learning_rate": 1.6119747868054193e-06, "loss": 0.3094, "step": 16459 }, { "epoch": 0.7449649241909935, "grad_norm": 0.5965822463164119, "learning_rate": 1.6114358140311948e-06, "loss": 0.2948, "step": 16460 }, { "epoch": 0.745010183299389, "grad_norm": 0.27711896806631753, "learning_rate": 1.610896914066808e-06, "loss": 0.4665, "step": 16461 }, { "epoch": 0.7450554424077845, "grad_norm": 0.5903313173991954, "learning_rate": 1.6103580869238388e-06, "loss": 0.2882, "step": 16462 }, { "epoch": 0.7451007015161801, "grad_norm": 0.5993926220789347, "learning_rate": 1.609819332613864e-06, "loss": 0.2436, "step": 16463 }, { "epoch": 0.7451459606245757, "grad_norm": 0.6501241995643003, "learning_rate": 1.6092806511484576e-06, "loss": 0.2978, "step": 16464 }, { "epoch": 0.7451912197329713, "grad_norm": 0.6725764205319074, "learning_rate": 1.6087420425391964e-06, "loss": 0.3198, "step": 16465 }, { "epoch": 0.7452364788413668, "grad_norm": 0.6499926682600154, "learning_rate": 1.6082035067976553e-06, "loss": 0.2942, "step": 16466 }, { "epoch": 0.7452817379497624, "grad_norm": 0.7833476590078484, "learning_rate": 1.6076650439354035e-06, "loss": 0.3156, "step": 16467 }, { "epoch": 0.745326997058158, "grad_norm": 0.6036145375496808, "learning_rate": 1.6071266539640095e-06, "loss": 0.3058, "step": 16468 }, { "epoch": 0.7453722561665536, "grad_norm": 0.6395409785445747, "learning_rate": 1.6065883368950447e-06, "loss": 0.3243, "step": 16469 }, { "epoch": 0.745417515274949, "grad_norm": 0.607747457083173, "learning_rate": 1.606050092740073e-06, "loss": 0.3002, "step": 16470 }, { "epoch": 0.7454627743833446, "grad_norm": 0.6562502783928543, "learning_rate": 1.6055119215106629e-06, "loss": 0.2663, "step": 16471 }, { "epoch": 0.7455080334917402, "grad_norm": 0.6645214066337757, "learning_rate": 1.604973823218376e-06, "loss": 0.3166, "step": 16472 }, { "epoch": 0.7455532926001358, "grad_norm": 0.26599225456602565, "learning_rate": 1.6044357978747733e-06, "loss": 0.4609, "step": 16473 }, { "epoch": 0.7455985517085313, "grad_norm": 0.6025592209553091, "learning_rate": 1.603897845491416e-06, "loss": 0.2608, "step": 16474 }, { "epoch": 0.7456438108169269, "grad_norm": 0.596383754180155, "learning_rate": 1.6033599660798676e-06, "loss": 0.2616, "step": 16475 }, { "epoch": 0.7456890699253225, "grad_norm": 0.6375212264875807, "learning_rate": 1.6028221596516779e-06, "loss": 0.3297, "step": 16476 }, { "epoch": 0.7457343290337181, "grad_norm": 0.5940876054246925, "learning_rate": 1.6022844262184061e-06, "loss": 0.2497, "step": 16477 }, { "epoch": 0.7457795881421136, "grad_norm": 0.6218890417536425, "learning_rate": 1.6017467657916075e-06, "loss": 0.276, "step": 16478 }, { "epoch": 0.7458248472505091, "grad_norm": 0.6154177745187074, "learning_rate": 1.6012091783828365e-06, "loss": 0.3192, "step": 16479 }, { "epoch": 0.7458701063589047, "grad_norm": 0.5586980717106319, "learning_rate": 1.600671664003639e-06, "loss": 0.3104, "step": 16480 }, { "epoch": 0.7459153654673003, "grad_norm": 0.6361626035580967, "learning_rate": 1.600134222665567e-06, "loss": 0.2703, "step": 16481 }, { "epoch": 0.7459606245756959, "grad_norm": 0.6284177067454095, "learning_rate": 1.59959685438017e-06, "loss": 0.3597, "step": 16482 }, { "epoch": 0.7460058836840914, "grad_norm": 0.7516775332368926, "learning_rate": 1.599059559158993e-06, "loss": 0.3308, "step": 16483 }, { "epoch": 0.746051142792487, "grad_norm": 0.5991664336958192, "learning_rate": 1.5985223370135795e-06, "loss": 0.2719, "step": 16484 }, { "epoch": 0.7460964019008826, "grad_norm": 0.5970637698076308, "learning_rate": 1.5979851879554758e-06, "loss": 0.2858, "step": 16485 }, { "epoch": 0.7461416610092781, "grad_norm": 0.5938806151950049, "learning_rate": 1.5974481119962203e-06, "loss": 0.3179, "step": 16486 }, { "epoch": 0.7461869201176737, "grad_norm": 0.6206591591584846, "learning_rate": 1.596911109147356e-06, "loss": 0.3068, "step": 16487 }, { "epoch": 0.7462321792260692, "grad_norm": 0.5958474881442908, "learning_rate": 1.5963741794204207e-06, "loss": 0.3092, "step": 16488 }, { "epoch": 0.7462774383344648, "grad_norm": 0.6318170853705819, "learning_rate": 1.595837322826949e-06, "loss": 0.2846, "step": 16489 }, { "epoch": 0.7463226974428604, "grad_norm": 0.5792372877921103, "learning_rate": 1.5953005393784782e-06, "loss": 0.2862, "step": 16490 }, { "epoch": 0.746367956551256, "grad_norm": 0.6654866867136477, "learning_rate": 1.5947638290865436e-06, "loss": 0.2985, "step": 16491 }, { "epoch": 0.7464132156596515, "grad_norm": 0.6909000728983873, "learning_rate": 1.5942271919626762e-06, "loss": 0.2709, "step": 16492 }, { "epoch": 0.746458474768047, "grad_norm": 0.62095556360136, "learning_rate": 1.5936906280184045e-06, "loss": 0.287, "step": 16493 }, { "epoch": 0.7465037338764426, "grad_norm": 0.6299401063232833, "learning_rate": 1.5931541372652592e-06, "loss": 0.3418, "step": 16494 }, { "epoch": 0.7465489929848382, "grad_norm": 0.3067155177006923, "learning_rate": 1.5926177197147702e-06, "loss": 0.4905, "step": 16495 }, { "epoch": 0.7465942520932337, "grad_norm": 0.6525451405318027, "learning_rate": 1.5920813753784614e-06, "loss": 0.322, "step": 16496 }, { "epoch": 0.7466395112016293, "grad_norm": 0.6263061006818194, "learning_rate": 1.5915451042678558e-06, "loss": 0.2977, "step": 16497 }, { "epoch": 0.7466847703100249, "grad_norm": 0.2618271933779362, "learning_rate": 1.591008906394479e-06, "loss": 0.4502, "step": 16498 }, { "epoch": 0.7467300294184205, "grad_norm": 0.5847023317077064, "learning_rate": 1.5904727817698495e-06, "loss": 0.2815, "step": 16499 }, { "epoch": 0.7467752885268161, "grad_norm": 0.7444545205585212, "learning_rate": 1.5899367304054898e-06, "loss": 0.3401, "step": 16500 }, { "epoch": 0.7468205476352116, "grad_norm": 0.627059815960014, "learning_rate": 1.5894007523129162e-06, "loss": 0.2957, "step": 16501 }, { "epoch": 0.7468658067436071, "grad_norm": 0.650345947630085, "learning_rate": 1.5888648475036445e-06, "loss": 0.3021, "step": 16502 }, { "epoch": 0.7469110658520027, "grad_norm": 2.417059302876411, "learning_rate": 1.5883290159891907e-06, "loss": 0.3422, "step": 16503 }, { "epoch": 0.7469563249603983, "grad_norm": 0.6539258850224872, "learning_rate": 1.5877932577810712e-06, "loss": 0.3189, "step": 16504 }, { "epoch": 0.7470015840687938, "grad_norm": 0.6039525924981889, "learning_rate": 1.5872575728907914e-06, "loss": 0.2911, "step": 16505 }, { "epoch": 0.7470468431771894, "grad_norm": 0.5788256798294671, "learning_rate": 1.586721961329865e-06, "loss": 0.3224, "step": 16506 }, { "epoch": 0.747092102285585, "grad_norm": 0.7105780907921888, "learning_rate": 1.5861864231098006e-06, "loss": 0.3178, "step": 16507 }, { "epoch": 0.7471373613939806, "grad_norm": 0.7063708263989005, "learning_rate": 1.5856509582421086e-06, "loss": 0.3351, "step": 16508 }, { "epoch": 0.747182620502376, "grad_norm": 0.6510998279016662, "learning_rate": 1.585115566738288e-06, "loss": 0.369, "step": 16509 }, { "epoch": 0.7472278796107716, "grad_norm": 0.277973618515596, "learning_rate": 1.5845802486098461e-06, "loss": 0.4612, "step": 16510 }, { "epoch": 0.7472731387191672, "grad_norm": 0.642679572704552, "learning_rate": 1.584045003868286e-06, "loss": 0.3621, "step": 16511 }, { "epoch": 0.7473183978275628, "grad_norm": 0.6128790270686905, "learning_rate": 1.5835098325251075e-06, "loss": 0.2871, "step": 16512 }, { "epoch": 0.7473636569359584, "grad_norm": 0.5868380908965802, "learning_rate": 1.5829747345918083e-06, "loss": 0.312, "step": 16513 }, { "epoch": 0.7474089160443539, "grad_norm": 0.5961565717006754, "learning_rate": 1.5824397100798893e-06, "loss": 0.2839, "step": 16514 }, { "epoch": 0.7474541751527495, "grad_norm": 0.5754325439828039, "learning_rate": 1.5819047590008429e-06, "loss": 0.3001, "step": 16515 }, { "epoch": 0.7474994342611451, "grad_norm": 0.7193337762494555, "learning_rate": 1.5813698813661672e-06, "loss": 0.3086, "step": 16516 }, { "epoch": 0.7475446933695407, "grad_norm": 0.6679149437502885, "learning_rate": 1.5808350771873527e-06, "loss": 0.3474, "step": 16517 }, { "epoch": 0.7475899524779361, "grad_norm": 0.5970275979081476, "learning_rate": 1.58030034647589e-06, "loss": 0.3043, "step": 16518 }, { "epoch": 0.7476352115863317, "grad_norm": 0.5749082872452106, "learning_rate": 1.57976568924327e-06, "loss": 0.268, "step": 16519 }, { "epoch": 0.7476804706947273, "grad_norm": 0.6548085932399869, "learning_rate": 1.5792311055009824e-06, "loss": 0.2956, "step": 16520 }, { "epoch": 0.7477257298031229, "grad_norm": 0.623026618245212, "learning_rate": 1.578696595260512e-06, "loss": 0.2868, "step": 16521 }, { "epoch": 0.7477709889115185, "grad_norm": 0.5916390234323907, "learning_rate": 1.578162158533343e-06, "loss": 0.2832, "step": 16522 }, { "epoch": 0.747816248019914, "grad_norm": 0.6014859725372366, "learning_rate": 1.57762779533096e-06, "loss": 0.2819, "step": 16523 }, { "epoch": 0.7478615071283096, "grad_norm": 0.7435152017578733, "learning_rate": 1.5770935056648456e-06, "loss": 0.2898, "step": 16524 }, { "epoch": 0.7479067662367052, "grad_norm": 0.6084117400456398, "learning_rate": 1.5765592895464793e-06, "loss": 0.3335, "step": 16525 }, { "epoch": 0.7479520253451007, "grad_norm": 0.6107025095627568, "learning_rate": 1.5760251469873378e-06, "loss": 0.3038, "step": 16526 }, { "epoch": 0.7479972844534962, "grad_norm": 0.25637382721289265, "learning_rate": 1.5754910779989018e-06, "loss": 0.4819, "step": 16527 }, { "epoch": 0.7480425435618918, "grad_norm": 0.6661645367906474, "learning_rate": 1.5749570825926437e-06, "loss": 0.3256, "step": 16528 }, { "epoch": 0.7480878026702874, "grad_norm": 0.5931956952900801, "learning_rate": 1.5744231607800397e-06, "loss": 0.3045, "step": 16529 }, { "epoch": 0.748133061778683, "grad_norm": 0.6488485046622263, "learning_rate": 1.5738893125725613e-06, "loss": 0.2963, "step": 16530 }, { "epoch": 0.7481783208870785, "grad_norm": 0.27494362095273467, "learning_rate": 1.5733555379816773e-06, "loss": 0.4647, "step": 16531 }, { "epoch": 0.7482235799954741, "grad_norm": 0.6009411757917738, "learning_rate": 1.572821837018859e-06, "loss": 0.2794, "step": 16532 }, { "epoch": 0.7482688391038697, "grad_norm": 0.311812204997518, "learning_rate": 1.5722882096955748e-06, "loss": 0.4698, "step": 16533 }, { "epoch": 0.7483140982122652, "grad_norm": 0.5832169580997869, "learning_rate": 1.5717546560232904e-06, "loss": 0.2724, "step": 16534 }, { "epoch": 0.7483593573206608, "grad_norm": 0.5453719989550989, "learning_rate": 1.5712211760134672e-06, "loss": 0.2928, "step": 16535 }, { "epoch": 0.7484046164290563, "grad_norm": 0.6266500321574185, "learning_rate": 1.5706877696775703e-06, "loss": 0.2585, "step": 16536 }, { "epoch": 0.7484498755374519, "grad_norm": 0.6208100270151411, "learning_rate": 1.5701544370270638e-06, "loss": 0.2831, "step": 16537 }, { "epoch": 0.7484951346458475, "grad_norm": 0.2689173965747566, "learning_rate": 1.5696211780734017e-06, "loss": 0.4833, "step": 16538 }, { "epoch": 0.7485403937542431, "grad_norm": 0.6334801681171852, "learning_rate": 1.569087992828045e-06, "loss": 0.2706, "step": 16539 }, { "epoch": 0.7485856528626386, "grad_norm": 0.28267318715463907, "learning_rate": 1.5685548813024516e-06, "loss": 0.4996, "step": 16540 }, { "epoch": 0.7486309119710342, "grad_norm": 0.5544036599356525, "learning_rate": 1.5680218435080747e-06, "loss": 0.3117, "step": 16541 }, { "epoch": 0.7486761710794297, "grad_norm": 0.5937306440955723, "learning_rate": 1.5674888794563663e-06, "loss": 0.2904, "step": 16542 }, { "epoch": 0.7487214301878253, "grad_norm": 0.5710840182569125, "learning_rate": 1.566955989158781e-06, "loss": 0.2802, "step": 16543 }, { "epoch": 0.7487666892962208, "grad_norm": 0.6156900161661162, "learning_rate": 1.5664231726267664e-06, "loss": 0.2877, "step": 16544 }, { "epoch": 0.7488119484046164, "grad_norm": 1.9108896254674193, "learning_rate": 1.5658904298717742e-06, "loss": 0.3083, "step": 16545 }, { "epoch": 0.748857207513012, "grad_norm": 0.6082886537469275, "learning_rate": 1.5653577609052495e-06, "loss": 0.2686, "step": 16546 }, { "epoch": 0.7489024666214076, "grad_norm": 0.5797560507570885, "learning_rate": 1.5648251657386366e-06, "loss": 0.2715, "step": 16547 }, { "epoch": 0.7489477257298032, "grad_norm": 0.6528100426591587, "learning_rate": 1.56429264438338e-06, "loss": 0.3317, "step": 16548 }, { "epoch": 0.7489929848381986, "grad_norm": 0.6272924481051566, "learning_rate": 1.5637601968509242e-06, "loss": 0.325, "step": 16549 }, { "epoch": 0.7490382439465942, "grad_norm": 0.6446741475507854, "learning_rate": 1.5632278231527081e-06, "loss": 0.2886, "step": 16550 }, { "epoch": 0.7490835030549898, "grad_norm": 0.7003736284823379, "learning_rate": 1.5626955233001695e-06, "loss": 0.2752, "step": 16551 }, { "epoch": 0.7491287621633854, "grad_norm": 0.2720333118535126, "learning_rate": 1.5621632973047468e-06, "loss": 0.4716, "step": 16552 }, { "epoch": 0.7491740212717809, "grad_norm": 0.6303202358609897, "learning_rate": 1.5616311451778782e-06, "loss": 0.3199, "step": 16553 }, { "epoch": 0.7492192803801765, "grad_norm": 0.6152455382833465, "learning_rate": 1.5610990669309961e-06, "loss": 0.3115, "step": 16554 }, { "epoch": 0.7492645394885721, "grad_norm": 0.27440801803982967, "learning_rate": 1.560567062575532e-06, "loss": 0.4601, "step": 16555 }, { "epoch": 0.7493097985969677, "grad_norm": 0.6169645054117495, "learning_rate": 1.5600351321229196e-06, "loss": 0.2785, "step": 16556 }, { "epoch": 0.7493550577053633, "grad_norm": 0.6401479659716349, "learning_rate": 1.5595032755845857e-06, "loss": 0.2773, "step": 16557 }, { "epoch": 0.7494003168137587, "grad_norm": 0.6396815110074804, "learning_rate": 1.5589714929719614e-06, "loss": 0.3114, "step": 16558 }, { "epoch": 0.7494455759221543, "grad_norm": 0.5781596757272333, "learning_rate": 1.558439784296471e-06, "loss": 0.3343, "step": 16559 }, { "epoch": 0.7494908350305499, "grad_norm": 0.5997425540319526, "learning_rate": 1.5579081495695381e-06, "loss": 0.2718, "step": 16560 }, { "epoch": 0.7495360941389455, "grad_norm": 0.6039137344409807, "learning_rate": 1.5573765888025877e-06, "loss": 0.3065, "step": 16561 }, { "epoch": 0.749581353247341, "grad_norm": 0.5809519922124259, "learning_rate": 1.556845102007043e-06, "loss": 0.2898, "step": 16562 }, { "epoch": 0.7496266123557366, "grad_norm": 0.6782488781990339, "learning_rate": 1.556313689194322e-06, "loss": 0.3315, "step": 16563 }, { "epoch": 0.7496718714641322, "grad_norm": 0.6110668027970159, "learning_rate": 1.5557823503758418e-06, "loss": 0.3036, "step": 16564 }, { "epoch": 0.7497171305725278, "grad_norm": 0.6452243181347994, "learning_rate": 1.555251085563021e-06, "loss": 0.2489, "step": 16565 }, { "epoch": 0.7497623896809232, "grad_norm": 0.3017016378995432, "learning_rate": 1.5547198947672777e-06, "loss": 0.473, "step": 16566 }, { "epoch": 0.7498076487893188, "grad_norm": 0.26271291805718217, "learning_rate": 1.5541887780000187e-06, "loss": 0.4459, "step": 16567 }, { "epoch": 0.7498529078977144, "grad_norm": 0.5799561207828952, "learning_rate": 1.5536577352726607e-06, "loss": 0.3114, "step": 16568 }, { "epoch": 0.74989816700611, "grad_norm": 0.2854953151376636, "learning_rate": 1.5531267665966143e-06, "loss": 0.4627, "step": 16569 }, { "epoch": 0.7499434261145056, "grad_norm": 0.5779329050400861, "learning_rate": 1.5525958719832879e-06, "loss": 0.2798, "step": 16570 }, { "epoch": 0.7499886852229011, "grad_norm": 0.6120806007444967, "learning_rate": 1.5520650514440866e-06, "loss": 0.2901, "step": 16571 }, { "epoch": 0.7500339443312967, "grad_norm": 0.6358208723438779, "learning_rate": 1.5515343049904191e-06, "loss": 0.3147, "step": 16572 }, { "epoch": 0.7500792034396923, "grad_norm": 0.6218208817316578, "learning_rate": 1.5510036326336868e-06, "loss": 0.3193, "step": 16573 }, { "epoch": 0.7501244625480878, "grad_norm": 0.6407424514409737, "learning_rate": 1.5504730343852952e-06, "loss": 0.3067, "step": 16574 }, { "epoch": 0.7501697216564833, "grad_norm": 0.27408098422727273, "learning_rate": 1.5499425102566423e-06, "loss": 0.4935, "step": 16575 }, { "epoch": 0.7502149807648789, "grad_norm": 0.5948893953322856, "learning_rate": 1.5494120602591305e-06, "loss": 0.2392, "step": 16576 }, { "epoch": 0.7502602398732745, "grad_norm": 0.2785590734503924, "learning_rate": 1.5488816844041537e-06, "loss": 0.4424, "step": 16577 }, { "epoch": 0.7503054989816701, "grad_norm": 0.2694455060170366, "learning_rate": 1.5483513827031122e-06, "loss": 0.4678, "step": 16578 }, { "epoch": 0.7503507580900656, "grad_norm": 0.611838194162662, "learning_rate": 1.547821155167399e-06, "loss": 0.2833, "step": 16579 }, { "epoch": 0.7503960171984612, "grad_norm": 0.6544908570921042, "learning_rate": 1.5472910018084043e-06, "loss": 0.3155, "step": 16580 }, { "epoch": 0.7504412763068568, "grad_norm": 0.6395240466759102, "learning_rate": 1.546760922637522e-06, "loss": 0.312, "step": 16581 }, { "epoch": 0.7504865354152523, "grad_norm": 0.600114842632944, "learning_rate": 1.5462309176661433e-06, "loss": 0.3063, "step": 16582 }, { "epoch": 0.7505317945236479, "grad_norm": 0.25831747528691457, "learning_rate": 1.5457009869056545e-06, "loss": 0.4876, "step": 16583 }, { "epoch": 0.7505770536320434, "grad_norm": 0.6005233821159032, "learning_rate": 1.5451711303674411e-06, "loss": 0.2753, "step": 16584 }, { "epoch": 0.750622312740439, "grad_norm": 0.6294182622576255, "learning_rate": 1.5446413480628908e-06, "loss": 0.3225, "step": 16585 }, { "epoch": 0.7506675718488346, "grad_norm": 0.6474888224795824, "learning_rate": 1.5441116400033846e-06, "loss": 0.3342, "step": 16586 }, { "epoch": 0.7507128309572302, "grad_norm": 0.738721455533006, "learning_rate": 1.543582006200306e-06, "loss": 0.2893, "step": 16587 }, { "epoch": 0.7507580900656257, "grad_norm": 0.5786158399123434, "learning_rate": 1.5430524466650354e-06, "loss": 0.2991, "step": 16588 }, { "epoch": 0.7508033491740213, "grad_norm": 0.596568758570493, "learning_rate": 1.5425229614089482e-06, "loss": 0.3466, "step": 16589 }, { "epoch": 0.7508486082824168, "grad_norm": 0.2613221091049353, "learning_rate": 1.5419935504434242e-06, "loss": 0.4615, "step": 16590 }, { "epoch": 0.7508938673908124, "grad_norm": 0.377820860939105, "learning_rate": 1.5414642137798396e-06, "loss": 0.4633, "step": 16591 }, { "epoch": 0.750939126499208, "grad_norm": 0.5871596830036833, "learning_rate": 1.5409349514295674e-06, "loss": 0.2742, "step": 16592 }, { "epoch": 0.7509843856076035, "grad_norm": 0.27193806045907737, "learning_rate": 1.540405763403977e-06, "loss": 0.4966, "step": 16593 }, { "epoch": 0.7510296447159991, "grad_norm": 0.5923123067358512, "learning_rate": 1.5398766497144424e-06, "loss": 0.2994, "step": 16594 }, { "epoch": 0.7510749038243947, "grad_norm": 0.6217953147336326, "learning_rate": 1.5393476103723342e-06, "loss": 0.3565, "step": 16595 }, { "epoch": 0.7511201629327903, "grad_norm": 0.6456203570155561, "learning_rate": 1.5388186453890142e-06, "loss": 0.27, "step": 16596 }, { "epoch": 0.7511654220411857, "grad_norm": 0.5788306938991927, "learning_rate": 1.5382897547758513e-06, "loss": 0.283, "step": 16597 }, { "epoch": 0.7512106811495813, "grad_norm": 0.2657948767174147, "learning_rate": 1.5377609385442116e-06, "loss": 0.4624, "step": 16598 }, { "epoch": 0.7512559402579769, "grad_norm": 0.6724230533015774, "learning_rate": 1.5372321967054554e-06, "loss": 0.3256, "step": 16599 }, { "epoch": 0.7513011993663725, "grad_norm": 0.6915387200679524, "learning_rate": 1.5367035292709432e-06, "loss": 0.3744, "step": 16600 }, { "epoch": 0.751346458474768, "grad_norm": 0.2619534121431575, "learning_rate": 1.5361749362520363e-06, "loss": 0.4691, "step": 16601 }, { "epoch": 0.7513917175831636, "grad_norm": 0.6145861080376424, "learning_rate": 1.5356464176600905e-06, "loss": 0.2918, "step": 16602 }, { "epoch": 0.7514369766915592, "grad_norm": 0.6530194354447144, "learning_rate": 1.5351179735064647e-06, "loss": 0.2846, "step": 16603 }, { "epoch": 0.7514822357999548, "grad_norm": 0.6849799194550324, "learning_rate": 1.534589603802511e-06, "loss": 0.2863, "step": 16604 }, { "epoch": 0.7515274949083504, "grad_norm": 0.6193998756496838, "learning_rate": 1.5340613085595846e-06, "loss": 0.3173, "step": 16605 }, { "epoch": 0.7515727540167458, "grad_norm": 0.616145686650051, "learning_rate": 1.5335330877890341e-06, "loss": 0.3018, "step": 16606 }, { "epoch": 0.7516180131251414, "grad_norm": 0.6140848804103951, "learning_rate": 1.533004941502213e-06, "loss": 0.3318, "step": 16607 }, { "epoch": 0.751663272233537, "grad_norm": 0.6719963364193847, "learning_rate": 1.5324768697104681e-06, "loss": 0.3211, "step": 16608 }, { "epoch": 0.7517085313419326, "grad_norm": 0.6257389026054484, "learning_rate": 1.5319488724251436e-06, "loss": 0.3511, "step": 16609 }, { "epoch": 0.7517537904503281, "grad_norm": 0.6387686337466676, "learning_rate": 1.5314209496575861e-06, "loss": 0.3059, "step": 16610 }, { "epoch": 0.7517990495587237, "grad_norm": 0.7750573942287288, "learning_rate": 1.5308931014191414e-06, "loss": 0.2943, "step": 16611 }, { "epoch": 0.7518443086671193, "grad_norm": 0.6321905375975513, "learning_rate": 1.5303653277211493e-06, "loss": 0.2929, "step": 16612 }, { "epoch": 0.7518895677755149, "grad_norm": 0.6112993269416406, "learning_rate": 1.5298376285749489e-06, "loss": 0.3131, "step": 16613 }, { "epoch": 0.7519348268839103, "grad_norm": 0.6284143210745861, "learning_rate": 1.5293100039918812e-06, "loss": 0.3148, "step": 16614 }, { "epoch": 0.7519800859923059, "grad_norm": 0.5935461720492932, "learning_rate": 1.5287824539832808e-06, "loss": 0.3209, "step": 16615 }, { "epoch": 0.7520253451007015, "grad_norm": 0.6039730603185649, "learning_rate": 1.5282549785604861e-06, "loss": 0.3211, "step": 16616 }, { "epoch": 0.7520706042090971, "grad_norm": 0.30584223112382614, "learning_rate": 1.5277275777348294e-06, "loss": 0.4479, "step": 16617 }, { "epoch": 0.7521158633174927, "grad_norm": 0.5795522532688792, "learning_rate": 1.5272002515176404e-06, "loss": 0.2825, "step": 16618 }, { "epoch": 0.7521611224258882, "grad_norm": 0.5665869525523641, "learning_rate": 1.526672999920253e-06, "loss": 0.2639, "step": 16619 }, { "epoch": 0.7522063815342838, "grad_norm": 0.5847304350030971, "learning_rate": 1.5261458229539966e-06, "loss": 0.3083, "step": 16620 }, { "epoch": 0.7522516406426794, "grad_norm": 0.27271189698999354, "learning_rate": 1.525618720630197e-06, "loss": 0.4373, "step": 16621 }, { "epoch": 0.7522968997510749, "grad_norm": 0.2711721727141647, "learning_rate": 1.525091692960179e-06, "loss": 0.4454, "step": 16622 }, { "epoch": 0.7523421588594704, "grad_norm": 0.6319203562525041, "learning_rate": 1.5245647399552682e-06, "loss": 0.3096, "step": 16623 }, { "epoch": 0.752387417967866, "grad_norm": 0.5365429777613504, "learning_rate": 1.5240378616267887e-06, "loss": 0.3046, "step": 16624 }, { "epoch": 0.7524326770762616, "grad_norm": 0.5917964105538425, "learning_rate": 1.5235110579860602e-06, "loss": 0.3068, "step": 16625 }, { "epoch": 0.7524779361846572, "grad_norm": 0.6203010656794513, "learning_rate": 1.5229843290443996e-06, "loss": 0.2773, "step": 16626 }, { "epoch": 0.7525231952930528, "grad_norm": 0.6404234014576459, "learning_rate": 1.5224576748131292e-06, "loss": 0.3062, "step": 16627 }, { "epoch": 0.7525684544014483, "grad_norm": 0.5958364086183551, "learning_rate": 1.521931095303561e-06, "loss": 0.3381, "step": 16628 }, { "epoch": 0.7526137135098439, "grad_norm": 0.6663594972658775, "learning_rate": 1.521404590527013e-06, "loss": 0.3265, "step": 16629 }, { "epoch": 0.7526589726182394, "grad_norm": 0.6487574124977519, "learning_rate": 1.520878160494797e-06, "loss": 0.2932, "step": 16630 }, { "epoch": 0.752704231726635, "grad_norm": 0.5654488136672179, "learning_rate": 1.520351805218222e-06, "loss": 0.3152, "step": 16631 }, { "epoch": 0.7527494908350305, "grad_norm": 0.6411086584970878, "learning_rate": 1.5198255247086018e-06, "loss": 0.2959, "step": 16632 }, { "epoch": 0.7527947499434261, "grad_norm": 0.6128062221240184, "learning_rate": 1.5192993189772408e-06, "loss": 0.3323, "step": 16633 }, { "epoch": 0.7528400090518217, "grad_norm": 0.617088053959374, "learning_rate": 1.5187731880354489e-06, "loss": 0.3243, "step": 16634 }, { "epoch": 0.7528852681602173, "grad_norm": 0.7494997082124705, "learning_rate": 1.5182471318945275e-06, "loss": 0.3403, "step": 16635 }, { "epoch": 0.7529305272686128, "grad_norm": 0.6297086295505582, "learning_rate": 1.517721150565784e-06, "loss": 0.2955, "step": 16636 }, { "epoch": 0.7529757863770083, "grad_norm": 0.6379063471545178, "learning_rate": 1.5171952440605175e-06, "loss": 0.3205, "step": 16637 }, { "epoch": 0.7530210454854039, "grad_norm": 0.5753119543744168, "learning_rate": 1.5166694123900271e-06, "loss": 0.2886, "step": 16638 }, { "epoch": 0.7530663045937995, "grad_norm": 0.657485752082828, "learning_rate": 1.5161436555656129e-06, "loss": 0.3017, "step": 16639 }, { "epoch": 0.7531115637021951, "grad_norm": 0.6017532634783226, "learning_rate": 1.5156179735985732e-06, "loss": 0.3474, "step": 16640 }, { "epoch": 0.7531568228105906, "grad_norm": 0.6199333396182094, "learning_rate": 1.5150923665002021e-06, "loss": 0.2847, "step": 16641 }, { "epoch": 0.7532020819189862, "grad_norm": 0.6428709789975856, "learning_rate": 1.514566834281791e-06, "loss": 0.2791, "step": 16642 }, { "epoch": 0.7532473410273818, "grad_norm": 0.6080445266538872, "learning_rate": 1.5140413769546353e-06, "loss": 0.2884, "step": 16643 }, { "epoch": 0.7532926001357774, "grad_norm": 0.6450629264084641, "learning_rate": 1.5135159945300232e-06, "loss": 0.2953, "step": 16644 }, { "epoch": 0.7533378592441728, "grad_norm": 0.7004210719059599, "learning_rate": 1.5129906870192456e-06, "loss": 0.2754, "step": 16645 }, { "epoch": 0.7533831183525684, "grad_norm": 0.6067616219779681, "learning_rate": 1.512465454433587e-06, "loss": 0.2697, "step": 16646 }, { "epoch": 0.753428377460964, "grad_norm": 0.593391764471863, "learning_rate": 1.5119402967843361e-06, "loss": 0.2953, "step": 16647 }, { "epoch": 0.7534736365693596, "grad_norm": 0.26635515223260847, "learning_rate": 1.5114152140827744e-06, "loss": 0.4766, "step": 16648 }, { "epoch": 0.7535188956777551, "grad_norm": 0.5983568795117736, "learning_rate": 1.5108902063401865e-06, "loss": 0.2911, "step": 16649 }, { "epoch": 0.7535641547861507, "grad_norm": 0.6481268868427346, "learning_rate": 1.5103652735678525e-06, "loss": 0.3202, "step": 16650 }, { "epoch": 0.7536094138945463, "grad_norm": 0.5963856117452525, "learning_rate": 1.509840415777049e-06, "loss": 0.2971, "step": 16651 }, { "epoch": 0.7536546730029419, "grad_norm": 0.6869703319652534, "learning_rate": 1.5093156329790564e-06, "loss": 0.2684, "step": 16652 }, { "epoch": 0.7536999321113375, "grad_norm": 0.5996001305416143, "learning_rate": 1.5087909251851513e-06, "loss": 0.2859, "step": 16653 }, { "epoch": 0.7537451912197329, "grad_norm": 0.6645363319026046, "learning_rate": 1.5082662924066067e-06, "loss": 0.353, "step": 16654 }, { "epoch": 0.7537904503281285, "grad_norm": 0.6327652610190093, "learning_rate": 1.5077417346546942e-06, "loss": 0.3123, "step": 16655 }, { "epoch": 0.7538357094365241, "grad_norm": 0.6546052835923984, "learning_rate": 1.5072172519406874e-06, "loss": 0.3011, "step": 16656 }, { "epoch": 0.7538809685449197, "grad_norm": 0.26338153529601854, "learning_rate": 1.5066928442758528e-06, "loss": 0.4763, "step": 16657 }, { "epoch": 0.7539262276533152, "grad_norm": 0.2735645155703924, "learning_rate": 1.506168511671462e-06, "loss": 0.4702, "step": 16658 }, { "epoch": 0.7539714867617108, "grad_norm": 0.6256308592453239, "learning_rate": 1.5056442541387794e-06, "loss": 0.3219, "step": 16659 }, { "epoch": 0.7540167458701064, "grad_norm": 0.6069178482632577, "learning_rate": 1.5051200716890686e-06, "loss": 0.3154, "step": 16660 }, { "epoch": 0.754062004978502, "grad_norm": 0.5788701325657072, "learning_rate": 1.5045959643335928e-06, "loss": 0.326, "step": 16661 }, { "epoch": 0.7541072640868974, "grad_norm": 0.6112234127041374, "learning_rate": 1.5040719320836167e-06, "loss": 0.3219, "step": 16662 }, { "epoch": 0.754152523195293, "grad_norm": 0.5851015832556286, "learning_rate": 1.5035479749503973e-06, "loss": 0.2864, "step": 16663 }, { "epoch": 0.7541977823036886, "grad_norm": 0.8292918564880299, "learning_rate": 1.5030240929451922e-06, "loss": 0.3121, "step": 16664 }, { "epoch": 0.7542430414120842, "grad_norm": 1.198695686104617, "learning_rate": 1.5025002860792609e-06, "loss": 0.2842, "step": 16665 }, { "epoch": 0.7542883005204798, "grad_norm": 0.6179582469616123, "learning_rate": 1.5019765543638564e-06, "loss": 0.2948, "step": 16666 }, { "epoch": 0.7543335596288753, "grad_norm": 0.6735381253035728, "learning_rate": 1.5014528978102311e-06, "loss": 0.2801, "step": 16667 }, { "epoch": 0.7543788187372709, "grad_norm": 0.6356498941144694, "learning_rate": 1.500929316429638e-06, "loss": 0.2803, "step": 16668 }, { "epoch": 0.7544240778456665, "grad_norm": 0.6436793787689117, "learning_rate": 1.5004058102333285e-06, "loss": 0.3282, "step": 16669 }, { "epoch": 0.754469336954062, "grad_norm": 0.763312169832447, "learning_rate": 1.49988237923255e-06, "loss": 0.3088, "step": 16670 }, { "epoch": 0.7545145960624575, "grad_norm": 0.838672560377792, "learning_rate": 1.499359023438548e-06, "loss": 0.3191, "step": 16671 }, { "epoch": 0.7545598551708531, "grad_norm": 0.5915128483790769, "learning_rate": 1.4988357428625711e-06, "loss": 0.2931, "step": 16672 }, { "epoch": 0.7546051142792487, "grad_norm": 0.7560524050943002, "learning_rate": 1.4983125375158591e-06, "loss": 0.321, "step": 16673 }, { "epoch": 0.7546503733876443, "grad_norm": 0.6354801252230485, "learning_rate": 1.4977894074096576e-06, "loss": 0.2892, "step": 16674 }, { "epoch": 0.7546956324960399, "grad_norm": 0.5789119692914134, "learning_rate": 1.497266352555204e-06, "loss": 0.2678, "step": 16675 }, { "epoch": 0.7547408916044354, "grad_norm": 0.298435257825619, "learning_rate": 1.4967433729637403e-06, "loss": 0.4841, "step": 16676 }, { "epoch": 0.754786150712831, "grad_norm": 0.5903516195540437, "learning_rate": 1.4962204686465003e-06, "loss": 0.286, "step": 16677 }, { "epoch": 0.7548314098212265, "grad_norm": 0.6343168652793674, "learning_rate": 1.4956976396147233e-06, "loss": 0.3419, "step": 16678 }, { "epoch": 0.7548766689296221, "grad_norm": 0.2934288256714517, "learning_rate": 1.4951748858796411e-06, "loss": 0.4855, "step": 16679 }, { "epoch": 0.7549219280380176, "grad_norm": 0.635717850760982, "learning_rate": 1.494652207452485e-06, "loss": 0.2848, "step": 16680 }, { "epoch": 0.7549671871464132, "grad_norm": 0.5702053407110187, "learning_rate": 1.4941296043444869e-06, "loss": 0.3075, "step": 16681 }, { "epoch": 0.7550124462548088, "grad_norm": 0.6190446810455607, "learning_rate": 1.493607076566878e-06, "loss": 0.2881, "step": 16682 }, { "epoch": 0.7550577053632044, "grad_norm": 0.26498951155860173, "learning_rate": 1.4930846241308838e-06, "loss": 0.4557, "step": 16683 }, { "epoch": 0.7551029644715999, "grad_norm": 0.592932363094059, "learning_rate": 1.4925622470477291e-06, "loss": 0.2828, "step": 16684 }, { "epoch": 0.7551482235799954, "grad_norm": 0.6565257146532069, "learning_rate": 1.4920399453286405e-06, "loss": 0.2936, "step": 16685 }, { "epoch": 0.755193482688391, "grad_norm": 0.6645887871402962, "learning_rate": 1.4915177189848384e-06, "loss": 0.2476, "step": 16686 }, { "epoch": 0.7552387417967866, "grad_norm": 0.6285093421542468, "learning_rate": 1.4909955680275462e-06, "loss": 0.2285, "step": 16687 }, { "epoch": 0.7552840009051822, "grad_norm": 0.2759830785118571, "learning_rate": 1.4904734924679825e-06, "loss": 0.4875, "step": 16688 }, { "epoch": 0.7553292600135777, "grad_norm": 0.6342964851974112, "learning_rate": 1.489951492317363e-06, "loss": 0.3002, "step": 16689 }, { "epoch": 0.7553745191219733, "grad_norm": 0.6198259499774071, "learning_rate": 1.4894295675869058e-06, "loss": 0.3391, "step": 16690 }, { "epoch": 0.7554197782303689, "grad_norm": 0.57067587269311, "learning_rate": 1.488907718287827e-06, "loss": 0.2754, "step": 16691 }, { "epoch": 0.7554650373387645, "grad_norm": 0.600458579427856, "learning_rate": 1.4883859444313376e-06, "loss": 0.3354, "step": 16692 }, { "epoch": 0.75551029644716, "grad_norm": 0.6040714705322814, "learning_rate": 1.4878642460286474e-06, "loss": 0.3409, "step": 16693 }, { "epoch": 0.7555555555555555, "grad_norm": 0.6057206746979718, "learning_rate": 1.4873426230909682e-06, "loss": 0.3009, "step": 16694 }, { "epoch": 0.7556008146639511, "grad_norm": 0.645707146707286, "learning_rate": 1.4868210756295109e-06, "loss": 0.338, "step": 16695 }, { "epoch": 0.7556460737723467, "grad_norm": 0.6667021532346543, "learning_rate": 1.4862996036554756e-06, "loss": 0.3085, "step": 16696 }, { "epoch": 0.7556913328807422, "grad_norm": 0.2810025522918448, "learning_rate": 1.4857782071800697e-06, "loss": 0.4542, "step": 16697 }, { "epoch": 0.7557365919891378, "grad_norm": 0.6814649766957752, "learning_rate": 1.4852568862144995e-06, "loss": 0.3069, "step": 16698 }, { "epoch": 0.7557818510975334, "grad_norm": 0.5990733271348277, "learning_rate": 1.4847356407699632e-06, "loss": 0.3161, "step": 16699 }, { "epoch": 0.755827110205929, "grad_norm": 0.6273544717931, "learning_rate": 1.4842144708576606e-06, "loss": 0.3418, "step": 16700 }, { "epoch": 0.7558723693143246, "grad_norm": 0.6326039496529865, "learning_rate": 1.4836933764887928e-06, "loss": 0.2986, "step": 16701 }, { "epoch": 0.75591762842272, "grad_norm": 0.26567037266128657, "learning_rate": 1.4831723576745531e-06, "loss": 0.4501, "step": 16702 }, { "epoch": 0.7559628875311156, "grad_norm": 0.6678320036059954, "learning_rate": 1.48265141442614e-06, "loss": 0.2983, "step": 16703 }, { "epoch": 0.7560081466395112, "grad_norm": 0.6033148157639469, "learning_rate": 1.4821305467547436e-06, "loss": 0.2782, "step": 16704 }, { "epoch": 0.7560534057479068, "grad_norm": 0.6145787945428695, "learning_rate": 1.481609754671559e-06, "loss": 0.3172, "step": 16705 }, { "epoch": 0.7560986648563023, "grad_norm": 0.6623739706543618, "learning_rate": 1.4810890381877736e-06, "loss": 0.275, "step": 16706 }, { "epoch": 0.7561439239646979, "grad_norm": 0.594661372064247, "learning_rate": 1.4805683973145784e-06, "loss": 0.33, "step": 16707 }, { "epoch": 0.7561891830730935, "grad_norm": 0.2872788418078114, "learning_rate": 1.4800478320631595e-06, "loss": 0.4667, "step": 16708 }, { "epoch": 0.756234442181489, "grad_norm": 0.664942448101899, "learning_rate": 1.4795273424446998e-06, "loss": 0.3092, "step": 16709 }, { "epoch": 0.7562797012898846, "grad_norm": 0.5697394913447789, "learning_rate": 1.4790069284703863e-06, "loss": 0.2566, "step": 16710 }, { "epoch": 0.7563249603982801, "grad_norm": 0.6307175705577794, "learning_rate": 1.4784865901514005e-06, "loss": 0.3119, "step": 16711 }, { "epoch": 0.7563702195066757, "grad_norm": 0.6019116928534433, "learning_rate": 1.4779663274989232e-06, "loss": 0.3139, "step": 16712 }, { "epoch": 0.7564154786150713, "grad_norm": 0.6226910834635931, "learning_rate": 1.4774461405241303e-06, "loss": 0.3392, "step": 16713 }, { "epoch": 0.7564607377234669, "grad_norm": 0.6134479971109972, "learning_rate": 1.4769260292382031e-06, "loss": 0.2916, "step": 16714 }, { "epoch": 0.7565059968318624, "grad_norm": 0.6155452094594851, "learning_rate": 1.4764059936523134e-06, "loss": 0.2951, "step": 16715 }, { "epoch": 0.756551255940258, "grad_norm": 0.2636065545551082, "learning_rate": 1.4758860337776387e-06, "loss": 0.4449, "step": 16716 }, { "epoch": 0.7565965150486536, "grad_norm": 0.2850426046437727, "learning_rate": 1.475366149625348e-06, "loss": 0.4538, "step": 16717 }, { "epoch": 0.7566417741570491, "grad_norm": 0.6652238532497509, "learning_rate": 1.474846341206615e-06, "loss": 0.3183, "step": 16718 }, { "epoch": 0.7566870332654446, "grad_norm": 0.7590976861643215, "learning_rate": 1.4743266085326062e-06, "loss": 0.3575, "step": 16719 }, { "epoch": 0.7567322923738402, "grad_norm": 0.5542928044334143, "learning_rate": 1.473806951614492e-06, "loss": 0.3177, "step": 16720 }, { "epoch": 0.7567775514822358, "grad_norm": 0.646674522922809, "learning_rate": 1.4732873704634366e-06, "loss": 0.291, "step": 16721 }, { "epoch": 0.7568228105906314, "grad_norm": 0.6264009080590869, "learning_rate": 1.472767865090602e-06, "loss": 0.3181, "step": 16722 }, { "epoch": 0.756868069699027, "grad_norm": 0.279109290941747, "learning_rate": 1.472248435507153e-06, "loss": 0.4683, "step": 16723 }, { "epoch": 0.7569133288074225, "grad_norm": 0.6090954445924818, "learning_rate": 1.4717290817242542e-06, "loss": 0.3097, "step": 16724 }, { "epoch": 0.756958587915818, "grad_norm": 0.73037171924201, "learning_rate": 1.4712098037530575e-06, "loss": 0.3042, "step": 16725 }, { "epoch": 0.7570038470242136, "grad_norm": 0.650877793870771, "learning_rate": 1.4706906016047246e-06, "loss": 0.3073, "step": 16726 }, { "epoch": 0.7570491061326092, "grad_norm": 0.7641152232774284, "learning_rate": 1.4701714752904123e-06, "loss": 0.3051, "step": 16727 }, { "epoch": 0.7570943652410047, "grad_norm": 0.6338668362810962, "learning_rate": 1.4696524248212746e-06, "loss": 0.2979, "step": 16728 }, { "epoch": 0.7571396243494003, "grad_norm": 0.26435423960215104, "learning_rate": 1.4691334502084614e-06, "loss": 0.4609, "step": 16729 }, { "epoch": 0.7571848834577959, "grad_norm": 0.27102785628931125, "learning_rate": 1.4686145514631284e-06, "loss": 0.4625, "step": 16730 }, { "epoch": 0.7572301425661915, "grad_norm": 0.6085883837552643, "learning_rate": 1.4680957285964208e-06, "loss": 0.2541, "step": 16731 }, { "epoch": 0.757275401674587, "grad_norm": 0.5566789634371905, "learning_rate": 1.4675769816194902e-06, "loss": 0.2708, "step": 16732 }, { "epoch": 0.7573206607829825, "grad_norm": 0.6355008962027197, "learning_rate": 1.46705831054348e-06, "loss": 0.2918, "step": 16733 }, { "epoch": 0.7573659198913781, "grad_norm": 0.25688814557401, "learning_rate": 1.4665397153795375e-06, "loss": 0.4723, "step": 16734 }, { "epoch": 0.7574111789997737, "grad_norm": 0.5969059123585657, "learning_rate": 1.4660211961388027e-06, "loss": 0.3298, "step": 16735 }, { "epoch": 0.7574564381081693, "grad_norm": 0.6764990779117397, "learning_rate": 1.46550275283242e-06, "loss": 0.2872, "step": 16736 }, { "epoch": 0.7575016972165648, "grad_norm": 0.5972428709774328, "learning_rate": 1.464984385471528e-06, "loss": 0.307, "step": 16737 }, { "epoch": 0.7575469563249604, "grad_norm": 0.6033021842878759, "learning_rate": 1.4644660940672628e-06, "loss": 0.3265, "step": 16738 }, { "epoch": 0.757592215433356, "grad_norm": 0.6668498063128243, "learning_rate": 1.4639478786307627e-06, "loss": 0.3038, "step": 16739 }, { "epoch": 0.7576374745417516, "grad_norm": 1.843639898452091, "learning_rate": 1.4634297391731645e-06, "loss": 0.3057, "step": 16740 }, { "epoch": 0.757682733650147, "grad_norm": 0.6062164374997798, "learning_rate": 1.4629116757055989e-06, "loss": 0.2588, "step": 16741 }, { "epoch": 0.7577279927585426, "grad_norm": 0.6141414599099798, "learning_rate": 1.462393688239197e-06, "loss": 0.2775, "step": 16742 }, { "epoch": 0.7577732518669382, "grad_norm": 0.6209748362392395, "learning_rate": 1.461875776785091e-06, "loss": 0.3779, "step": 16743 }, { "epoch": 0.7578185109753338, "grad_norm": 0.5652933419403928, "learning_rate": 1.4613579413544065e-06, "loss": 0.2686, "step": 16744 }, { "epoch": 0.7578637700837294, "grad_norm": 0.6753961791194718, "learning_rate": 1.4608401819582734e-06, "loss": 0.3364, "step": 16745 }, { "epoch": 0.7579090291921249, "grad_norm": 0.594061372333826, "learning_rate": 1.460322498607814e-06, "loss": 0.3186, "step": 16746 }, { "epoch": 0.7579542883005205, "grad_norm": 0.2855580345783155, "learning_rate": 1.4598048913141538e-06, "loss": 0.4631, "step": 16747 }, { "epoch": 0.7579995474089161, "grad_norm": 0.6084118565420273, "learning_rate": 1.4592873600884123e-06, "loss": 0.3163, "step": 16748 }, { "epoch": 0.7580448065173117, "grad_norm": 0.624547778812951, "learning_rate": 1.458769904941712e-06, "loss": 0.322, "step": 16749 }, { "epoch": 0.7580900656257071, "grad_norm": 0.5736452180550441, "learning_rate": 1.458252525885171e-06, "loss": 0.3053, "step": 16750 }, { "epoch": 0.7581353247341027, "grad_norm": 0.6451197460634303, "learning_rate": 1.4577352229299036e-06, "loss": 0.3067, "step": 16751 }, { "epoch": 0.7581805838424983, "grad_norm": 0.5835805495229613, "learning_rate": 1.4572179960870276e-06, "loss": 0.2996, "step": 16752 }, { "epoch": 0.7582258429508939, "grad_norm": 0.644994455896996, "learning_rate": 1.4567008453676584e-06, "loss": 0.304, "step": 16753 }, { "epoch": 0.7582711020592894, "grad_norm": 0.6242410509122905, "learning_rate": 1.456183770782903e-06, "loss": 0.2737, "step": 16754 }, { "epoch": 0.758316361167685, "grad_norm": 0.5967340225522353, "learning_rate": 1.4556667723438745e-06, "loss": 0.2896, "step": 16755 }, { "epoch": 0.7583616202760806, "grad_norm": 0.27541432600904664, "learning_rate": 1.4551498500616823e-06, "loss": 0.4955, "step": 16756 }, { "epoch": 0.7584068793844762, "grad_norm": 0.7689975087348239, "learning_rate": 1.4546330039474332e-06, "loss": 0.2772, "step": 16757 }, { "epoch": 0.7584521384928717, "grad_norm": 0.6404359902753533, "learning_rate": 1.4541162340122305e-06, "loss": 0.2655, "step": 16758 }, { "epoch": 0.7584973976012672, "grad_norm": 0.6518976716703694, "learning_rate": 1.453599540267181e-06, "loss": 0.3307, "step": 16759 }, { "epoch": 0.7585426567096628, "grad_norm": 0.6298797588350113, "learning_rate": 1.453082922723384e-06, "loss": 0.268, "step": 16760 }, { "epoch": 0.7585879158180584, "grad_norm": 0.6043313496398718, "learning_rate": 1.4525663813919433e-06, "loss": 0.3191, "step": 16761 }, { "epoch": 0.758633174926454, "grad_norm": 0.7437923777491514, "learning_rate": 1.452049916283954e-06, "loss": 0.3195, "step": 16762 }, { "epoch": 0.7586784340348495, "grad_norm": 0.6746630879217013, "learning_rate": 1.4515335274105168e-06, "loss": 0.3604, "step": 16763 }, { "epoch": 0.7587236931432451, "grad_norm": 0.5701015759486506, "learning_rate": 1.4510172147827244e-06, "loss": 0.2821, "step": 16764 }, { "epoch": 0.7587689522516406, "grad_norm": 0.6900711667667638, "learning_rate": 1.4505009784116735e-06, "loss": 0.2907, "step": 16765 }, { "epoch": 0.7588142113600362, "grad_norm": 0.5760290714206665, "learning_rate": 1.4499848183084558e-06, "loss": 0.2687, "step": 16766 }, { "epoch": 0.7588594704684317, "grad_norm": 0.5925292721443706, "learning_rate": 1.449468734484159e-06, "loss": 0.3341, "step": 16767 }, { "epoch": 0.7589047295768273, "grad_norm": 0.6251576508171806, "learning_rate": 1.4489527269498749e-06, "loss": 0.2754, "step": 16768 }, { "epoch": 0.7589499886852229, "grad_norm": 0.5910949194326818, "learning_rate": 1.4484367957166923e-06, "loss": 0.3144, "step": 16769 }, { "epoch": 0.7589952477936185, "grad_norm": 0.6602577060771069, "learning_rate": 1.4479209407956946e-06, "loss": 0.2814, "step": 16770 }, { "epoch": 0.7590405069020141, "grad_norm": 0.6725839382128941, "learning_rate": 1.4474051621979651e-06, "loss": 0.306, "step": 16771 }, { "epoch": 0.7590857660104096, "grad_norm": 0.6187387367824468, "learning_rate": 1.4468894599345895e-06, "loss": 0.2708, "step": 16772 }, { "epoch": 0.7591310251188051, "grad_norm": 0.2720563841107742, "learning_rate": 1.446373834016645e-06, "loss": 0.4706, "step": 16773 }, { "epoch": 0.7591762842272007, "grad_norm": 0.6388025149213199, "learning_rate": 1.4458582844552144e-06, "loss": 0.3396, "step": 16774 }, { "epoch": 0.7592215433355963, "grad_norm": 0.6594795049049732, "learning_rate": 1.4453428112613716e-06, "loss": 0.2842, "step": 16775 }, { "epoch": 0.7592668024439918, "grad_norm": 0.6183436482999873, "learning_rate": 1.4448274144461965e-06, "loss": 0.2865, "step": 16776 }, { "epoch": 0.7593120615523874, "grad_norm": 0.7109544276087211, "learning_rate": 1.44431209402076e-06, "loss": 0.263, "step": 16777 }, { "epoch": 0.759357320660783, "grad_norm": 0.6297736322139782, "learning_rate": 1.4437968499961374e-06, "loss": 0.2731, "step": 16778 }, { "epoch": 0.7594025797691786, "grad_norm": 0.6384738341119297, "learning_rate": 1.4432816823833983e-06, "loss": 0.3162, "step": 16779 }, { "epoch": 0.7594478388775742, "grad_norm": 0.582218562638432, "learning_rate": 1.4427665911936106e-06, "loss": 0.2916, "step": 16780 }, { "epoch": 0.7594930979859696, "grad_norm": 0.6127931682849966, "learning_rate": 1.4422515764378443e-06, "loss": 0.294, "step": 16781 }, { "epoch": 0.7595383570943652, "grad_norm": 0.6399210423841506, "learning_rate": 1.4417366381271674e-06, "loss": 0.286, "step": 16782 }, { "epoch": 0.7595836162027608, "grad_norm": 0.620176767003677, "learning_rate": 1.4412217762726388e-06, "loss": 0.2846, "step": 16783 }, { "epoch": 0.7596288753111564, "grad_norm": 0.6019342956194589, "learning_rate": 1.4407069908853243e-06, "loss": 0.2601, "step": 16784 }, { "epoch": 0.7596741344195519, "grad_norm": 0.2856625396067131, "learning_rate": 1.4401922819762864e-06, "loss": 0.4776, "step": 16785 }, { "epoch": 0.7597193935279475, "grad_norm": 0.27648760043520637, "learning_rate": 1.4396776495565833e-06, "loss": 0.4881, "step": 16786 }, { "epoch": 0.7597646526363431, "grad_norm": 0.625835672946518, "learning_rate": 1.4391630936372714e-06, "loss": 0.3046, "step": 16787 }, { "epoch": 0.7598099117447387, "grad_norm": 0.6146365451550019, "learning_rate": 1.4386486142294081e-06, "loss": 0.2524, "step": 16788 }, { "epoch": 0.7598551708531341, "grad_norm": 0.6884840712238033, "learning_rate": 1.43813421134405e-06, "loss": 0.2684, "step": 16789 }, { "epoch": 0.7599004299615297, "grad_norm": 0.7172089130658871, "learning_rate": 1.4376198849922484e-06, "loss": 0.2825, "step": 16790 }, { "epoch": 0.7599456890699253, "grad_norm": 0.6264937069460348, "learning_rate": 1.4371056351850525e-06, "loss": 0.3419, "step": 16791 }, { "epoch": 0.7599909481783209, "grad_norm": 0.6564587162794594, "learning_rate": 1.4365914619335158e-06, "loss": 0.2876, "step": 16792 }, { "epoch": 0.7600362072867165, "grad_norm": 0.658685267212285, "learning_rate": 1.4360773652486826e-06, "loss": 0.3208, "step": 16793 }, { "epoch": 0.760081466395112, "grad_norm": 0.598486391096725, "learning_rate": 1.435563345141603e-06, "loss": 0.3154, "step": 16794 }, { "epoch": 0.7601267255035076, "grad_norm": 0.6100147880854968, "learning_rate": 1.4350494016233197e-06, "loss": 0.2963, "step": 16795 }, { "epoch": 0.7601719846119032, "grad_norm": 0.6434704416028074, "learning_rate": 1.4345355347048739e-06, "loss": 0.2693, "step": 16796 }, { "epoch": 0.7602172437202988, "grad_norm": 0.5765541262298857, "learning_rate": 1.4340217443973093e-06, "loss": 0.282, "step": 16797 }, { "epoch": 0.7602625028286942, "grad_norm": 0.6419387726754483, "learning_rate": 1.4335080307116667e-06, "loss": 0.3015, "step": 16798 }, { "epoch": 0.7603077619370898, "grad_norm": 0.5943876804016321, "learning_rate": 1.432994393658983e-06, "loss": 0.2685, "step": 16799 }, { "epoch": 0.7603530210454854, "grad_norm": 0.6019177857531949, "learning_rate": 1.4324808332502932e-06, "loss": 0.2883, "step": 16800 }, { "epoch": 0.760398280153881, "grad_norm": 0.27134340303500054, "learning_rate": 1.4319673494966345e-06, "loss": 0.4577, "step": 16801 }, { "epoch": 0.7604435392622765, "grad_norm": 0.7610053429037491, "learning_rate": 1.431453942409038e-06, "loss": 0.2485, "step": 16802 }, { "epoch": 0.7604887983706721, "grad_norm": 0.6087080165987451, "learning_rate": 1.430940611998538e-06, "loss": 0.28, "step": 16803 }, { "epoch": 0.7605340574790677, "grad_norm": 0.5778916428969291, "learning_rate": 1.4304273582761607e-06, "loss": 0.2827, "step": 16804 }, { "epoch": 0.7605793165874632, "grad_norm": 0.6189611531085527, "learning_rate": 1.4299141812529382e-06, "loss": 0.3085, "step": 16805 }, { "epoch": 0.7606245756958588, "grad_norm": 0.6054645325134527, "learning_rate": 1.429401080939894e-06, "loss": 0.2818, "step": 16806 }, { "epoch": 0.7606698348042543, "grad_norm": 0.7938087806773069, "learning_rate": 1.4288880573480551e-06, "loss": 0.3107, "step": 16807 }, { "epoch": 0.7607150939126499, "grad_norm": 0.7210383208925405, "learning_rate": 1.4283751104884446e-06, "loss": 0.309, "step": 16808 }, { "epoch": 0.7607603530210455, "grad_norm": 0.28603461792807805, "learning_rate": 1.4278622403720816e-06, "loss": 0.4726, "step": 16809 }, { "epoch": 0.7608056121294411, "grad_norm": 0.6318045524562781, "learning_rate": 1.4273494470099886e-06, "loss": 0.3327, "step": 16810 }, { "epoch": 0.7608508712378366, "grad_norm": 0.7677521171288333, "learning_rate": 1.4268367304131847e-06, "loss": 0.2926, "step": 16811 }, { "epoch": 0.7608961303462322, "grad_norm": 0.5626308426564788, "learning_rate": 1.426324090592685e-06, "loss": 0.2787, "step": 16812 }, { "epoch": 0.7609413894546277, "grad_norm": 0.2539087849679761, "learning_rate": 1.4258115275595036e-06, "loss": 0.456, "step": 16813 }, { "epoch": 0.7609866485630233, "grad_norm": 0.6223413384815674, "learning_rate": 1.425299041324657e-06, "loss": 0.3336, "step": 16814 }, { "epoch": 0.7610319076714189, "grad_norm": 0.6122816121017155, "learning_rate": 1.424786631899155e-06, "loss": 0.3442, "step": 16815 }, { "epoch": 0.7610771667798144, "grad_norm": 0.6532982871938595, "learning_rate": 1.424274299294006e-06, "loss": 0.3565, "step": 16816 }, { "epoch": 0.76112242588821, "grad_norm": 0.563622553510085, "learning_rate": 1.423762043520221e-06, "loss": 0.2918, "step": 16817 }, { "epoch": 0.7611676849966056, "grad_norm": 1.1159220788524196, "learning_rate": 1.4232498645888071e-06, "loss": 0.316, "step": 16818 }, { "epoch": 0.7612129441050012, "grad_norm": 0.5783143028320339, "learning_rate": 1.4227377625107686e-06, "loss": 0.3085, "step": 16819 }, { "epoch": 0.7612582032133967, "grad_norm": 0.24991075881259164, "learning_rate": 1.4222257372971072e-06, "loss": 0.4805, "step": 16820 }, { "epoch": 0.7613034623217922, "grad_norm": 0.6570138851141044, "learning_rate": 1.4217137889588279e-06, "loss": 0.3054, "step": 16821 }, { "epoch": 0.7613487214301878, "grad_norm": 0.6034558054078771, "learning_rate": 1.421201917506928e-06, "loss": 0.2591, "step": 16822 }, { "epoch": 0.7613939805385834, "grad_norm": 0.5642207860718541, "learning_rate": 1.4206901229524089e-06, "loss": 0.2863, "step": 16823 }, { "epoch": 0.7614392396469789, "grad_norm": 0.6288858995612324, "learning_rate": 1.4201784053062662e-06, "loss": 0.2803, "step": 16824 }, { "epoch": 0.7614844987553745, "grad_norm": 0.6032890415679383, "learning_rate": 1.4196667645794932e-06, "loss": 0.2858, "step": 16825 }, { "epoch": 0.7615297578637701, "grad_norm": 0.577659202381288, "learning_rate": 1.4191552007830856e-06, "loss": 0.2414, "step": 16826 }, { "epoch": 0.7615750169721657, "grad_norm": 0.6491047599613663, "learning_rate": 1.4186437139280363e-06, "loss": 0.3162, "step": 16827 }, { "epoch": 0.7616202760805613, "grad_norm": 0.624204633497584, "learning_rate": 1.4181323040253346e-06, "loss": 0.2882, "step": 16828 }, { "epoch": 0.7616655351889567, "grad_norm": 0.6492984485836424, "learning_rate": 1.4176209710859672e-06, "loss": 0.2684, "step": 16829 }, { "epoch": 0.7617107942973523, "grad_norm": 0.9304262523062551, "learning_rate": 1.417109715120924e-06, "loss": 0.265, "step": 16830 }, { "epoch": 0.7617560534057479, "grad_norm": 0.6239550151772859, "learning_rate": 1.4165985361411878e-06, "loss": 0.3371, "step": 16831 }, { "epoch": 0.7618013125141435, "grad_norm": 0.6818527097603084, "learning_rate": 1.4160874341577447e-06, "loss": 0.3229, "step": 16832 }, { "epoch": 0.761846571622539, "grad_norm": 0.6515337635420715, "learning_rate": 1.4155764091815737e-06, "loss": 0.2793, "step": 16833 }, { "epoch": 0.7618918307309346, "grad_norm": 0.6006328439560533, "learning_rate": 1.4150654612236592e-06, "loss": 0.3002, "step": 16834 }, { "epoch": 0.7619370898393302, "grad_norm": 0.2696520101700211, "learning_rate": 1.4145545902949758e-06, "loss": 0.4464, "step": 16835 }, { "epoch": 0.7619823489477258, "grad_norm": 0.5866097715554073, "learning_rate": 1.4140437964065034e-06, "loss": 0.2875, "step": 16836 }, { "epoch": 0.7620276080561212, "grad_norm": 0.6464923450865269, "learning_rate": 1.413533079569217e-06, "loss": 0.3314, "step": 16837 }, { "epoch": 0.7620728671645168, "grad_norm": 0.6175031665946599, "learning_rate": 1.4130224397940883e-06, "loss": 0.2522, "step": 16838 }, { "epoch": 0.7621181262729124, "grad_norm": 0.6334144130604863, "learning_rate": 1.4125118770920903e-06, "loss": 0.2754, "step": 16839 }, { "epoch": 0.762163385381308, "grad_norm": 0.2774705809975444, "learning_rate": 1.412001391474196e-06, "loss": 0.4639, "step": 16840 }, { "epoch": 0.7622086444897036, "grad_norm": 0.7367744811054106, "learning_rate": 1.4114909829513718e-06, "loss": 0.2797, "step": 16841 }, { "epoch": 0.7622539035980991, "grad_norm": 0.6474503842151763, "learning_rate": 1.4109806515345836e-06, "loss": 0.2997, "step": 16842 }, { "epoch": 0.7622991627064947, "grad_norm": 0.60544937199488, "learning_rate": 1.4104703972348e-06, "loss": 0.2746, "step": 16843 }, { "epoch": 0.7623444218148903, "grad_norm": 0.5857315723739743, "learning_rate": 1.4099602200629813e-06, "loss": 0.2633, "step": 16844 }, { "epoch": 0.7623896809232859, "grad_norm": 0.6437930937488767, "learning_rate": 1.4094501200300937e-06, "loss": 0.334, "step": 16845 }, { "epoch": 0.7624349400316813, "grad_norm": 0.6409464336461744, "learning_rate": 1.4089400971470935e-06, "loss": 0.2714, "step": 16846 }, { "epoch": 0.7624801991400769, "grad_norm": 0.6142536853103293, "learning_rate": 1.4084301514249432e-06, "loss": 0.247, "step": 16847 }, { "epoch": 0.7625254582484725, "grad_norm": 0.7107969655346827, "learning_rate": 1.407920282874598e-06, "loss": 0.3, "step": 16848 }, { "epoch": 0.7625707173568681, "grad_norm": 0.7072927653548671, "learning_rate": 1.4074104915070124e-06, "loss": 0.3144, "step": 16849 }, { "epoch": 0.7626159764652637, "grad_norm": 0.7667570326124314, "learning_rate": 1.4069007773331433e-06, "loss": 0.3365, "step": 16850 }, { "epoch": 0.7626612355736592, "grad_norm": 0.6173278493372766, "learning_rate": 1.4063911403639392e-06, "loss": 0.3386, "step": 16851 }, { "epoch": 0.7627064946820548, "grad_norm": 0.28676641994733726, "learning_rate": 1.4058815806103542e-06, "loss": 0.4656, "step": 16852 }, { "epoch": 0.7627517537904503, "grad_norm": 0.5823508981388617, "learning_rate": 1.4053720980833357e-06, "loss": 0.2728, "step": 16853 }, { "epoch": 0.7627970128988459, "grad_norm": 0.6438356399181908, "learning_rate": 1.4048626927938292e-06, "loss": 0.3565, "step": 16854 }, { "epoch": 0.7628422720072414, "grad_norm": 0.5933897311378838, "learning_rate": 1.4043533647527813e-06, "loss": 0.2896, "step": 16855 }, { "epoch": 0.762887531115637, "grad_norm": 0.5942110961471518, "learning_rate": 1.4038441139711384e-06, "loss": 0.3016, "step": 16856 }, { "epoch": 0.7629327902240326, "grad_norm": 0.5985865760232018, "learning_rate": 1.4033349404598407e-06, "loss": 0.2777, "step": 16857 }, { "epoch": 0.7629780493324282, "grad_norm": 0.6502313785103132, "learning_rate": 1.402825844229827e-06, "loss": 0.326, "step": 16858 }, { "epoch": 0.7630233084408237, "grad_norm": 0.6101212744528433, "learning_rate": 1.4023168252920384e-06, "loss": 0.267, "step": 16859 }, { "epoch": 0.7630685675492193, "grad_norm": 0.6054417882461244, "learning_rate": 1.4018078836574134e-06, "loss": 0.2968, "step": 16860 }, { "epoch": 0.7631138266576148, "grad_norm": 0.2915904665319847, "learning_rate": 1.401299019336886e-06, "loss": 0.4793, "step": 16861 }, { "epoch": 0.7631590857660104, "grad_norm": 0.5737394256906462, "learning_rate": 1.400790232341388e-06, "loss": 0.2823, "step": 16862 }, { "epoch": 0.763204344874406, "grad_norm": 0.6102924639830412, "learning_rate": 1.4002815226818557e-06, "loss": 0.2596, "step": 16863 }, { "epoch": 0.7632496039828015, "grad_norm": 0.6441089802380752, "learning_rate": 1.3997728903692164e-06, "loss": 0.2871, "step": 16864 }, { "epoch": 0.7632948630911971, "grad_norm": 0.653370143391403, "learning_rate": 1.3992643354144013e-06, "loss": 0.2683, "step": 16865 }, { "epoch": 0.7633401221995927, "grad_norm": 0.8143105617909899, "learning_rate": 1.3987558578283378e-06, "loss": 0.2918, "step": 16866 }, { "epoch": 0.7633853813079883, "grad_norm": 0.6276857224416678, "learning_rate": 1.3982474576219485e-06, "loss": 0.2695, "step": 16867 }, { "epoch": 0.7634306404163838, "grad_norm": 0.6342212940275045, "learning_rate": 1.3977391348061592e-06, "loss": 0.2982, "step": 16868 }, { "epoch": 0.7634758995247793, "grad_norm": 0.6223834130747216, "learning_rate": 1.397230889391894e-06, "loss": 0.2849, "step": 16869 }, { "epoch": 0.7635211586331749, "grad_norm": 0.5577136042915022, "learning_rate": 1.3967227213900725e-06, "loss": 0.2476, "step": 16870 }, { "epoch": 0.7635664177415705, "grad_norm": 0.7179559302744081, "learning_rate": 1.3962146308116109e-06, "loss": 0.2798, "step": 16871 }, { "epoch": 0.763611676849966, "grad_norm": 0.30340564592253993, "learning_rate": 1.3957066176674306e-06, "loss": 0.4514, "step": 16872 }, { "epoch": 0.7636569359583616, "grad_norm": 0.6339792291596237, "learning_rate": 1.3951986819684432e-06, "loss": 0.3246, "step": 16873 }, { "epoch": 0.7637021950667572, "grad_norm": 0.5892116521049253, "learning_rate": 1.3946908237255668e-06, "loss": 0.3045, "step": 16874 }, { "epoch": 0.7637474541751528, "grad_norm": 0.6261217936668318, "learning_rate": 1.3941830429497105e-06, "loss": 0.2898, "step": 16875 }, { "epoch": 0.7637927132835484, "grad_norm": 1.5401022245595641, "learning_rate": 1.3936753396517877e-06, "loss": 0.2698, "step": 16876 }, { "epoch": 0.7638379723919438, "grad_norm": 0.8747644893541968, "learning_rate": 1.3931677138427035e-06, "loss": 0.317, "step": 16877 }, { "epoch": 0.7638832315003394, "grad_norm": 0.6173016787995003, "learning_rate": 1.39266016553337e-06, "loss": 0.3333, "step": 16878 }, { "epoch": 0.763928490608735, "grad_norm": 0.6037204276293361, "learning_rate": 1.3921526947346902e-06, "loss": 0.2761, "step": 16879 }, { "epoch": 0.7639737497171306, "grad_norm": 0.6201939894233246, "learning_rate": 1.3916453014575664e-06, "loss": 0.2864, "step": 16880 }, { "epoch": 0.7640190088255261, "grad_norm": 0.6081279482261308, "learning_rate": 1.3911379857129037e-06, "loss": 0.2896, "step": 16881 }, { "epoch": 0.7640642679339217, "grad_norm": 0.5885304306737921, "learning_rate": 1.3906307475116044e-06, "loss": 0.3191, "step": 16882 }, { "epoch": 0.7641095270423173, "grad_norm": 0.5702277237570906, "learning_rate": 1.390123586864562e-06, "loss": 0.3063, "step": 16883 }, { "epoch": 0.7641547861507129, "grad_norm": 0.6062661374592121, "learning_rate": 1.389616503782677e-06, "loss": 0.3084, "step": 16884 }, { "epoch": 0.7642000452591083, "grad_norm": 0.5808660659853312, "learning_rate": 1.389109498276846e-06, "loss": 0.2715, "step": 16885 }, { "epoch": 0.7642453043675039, "grad_norm": 0.6005673175985065, "learning_rate": 1.388602570357962e-06, "loss": 0.3076, "step": 16886 }, { "epoch": 0.7642905634758995, "grad_norm": 0.6122167898907246, "learning_rate": 1.388095720036916e-06, "loss": 0.2916, "step": 16887 }, { "epoch": 0.7643358225842951, "grad_norm": 0.2734031758798873, "learning_rate": 1.3875889473245996e-06, "loss": 0.4631, "step": 16888 }, { "epoch": 0.7643810816926907, "grad_norm": 0.6061707662293109, "learning_rate": 1.3870822522319039e-06, "loss": 0.3079, "step": 16889 }, { "epoch": 0.7644263408010862, "grad_norm": 0.6162602129563666, "learning_rate": 1.386575634769714e-06, "loss": 0.2763, "step": 16890 }, { "epoch": 0.7644715999094818, "grad_norm": 0.3073469359186612, "learning_rate": 1.3860690949489141e-06, "loss": 0.4764, "step": 16891 }, { "epoch": 0.7645168590178774, "grad_norm": 0.5780747748601645, "learning_rate": 1.3855626327803923e-06, "loss": 0.2817, "step": 16892 }, { "epoch": 0.764562118126273, "grad_norm": 0.26548664320126003, "learning_rate": 1.385056248275027e-06, "loss": 0.4591, "step": 16893 }, { "epoch": 0.7646073772346684, "grad_norm": 0.6425476158887657, "learning_rate": 1.3845499414437013e-06, "loss": 0.2995, "step": 16894 }, { "epoch": 0.764652636343064, "grad_norm": 0.2543732806748501, "learning_rate": 1.384043712297294e-06, "loss": 0.4742, "step": 16895 }, { "epoch": 0.7646978954514596, "grad_norm": 0.27888961787907973, "learning_rate": 1.38353756084668e-06, "loss": 0.4554, "step": 16896 }, { "epoch": 0.7647431545598552, "grad_norm": 0.6126119132895413, "learning_rate": 1.3830314871027367e-06, "loss": 0.322, "step": 16897 }, { "epoch": 0.7647884136682508, "grad_norm": 0.6800039236791499, "learning_rate": 1.3825254910763396e-06, "loss": 0.2992, "step": 16898 }, { "epoch": 0.7648336727766463, "grad_norm": 0.6701975701935918, "learning_rate": 1.3820195727783597e-06, "loss": 0.3103, "step": 16899 }, { "epoch": 0.7648789318850419, "grad_norm": 0.2552845699044317, "learning_rate": 1.3815137322196654e-06, "loss": 0.4648, "step": 16900 }, { "epoch": 0.7649241909934374, "grad_norm": 0.665911728128072, "learning_rate": 1.3810079694111295e-06, "loss": 0.2839, "step": 16901 }, { "epoch": 0.764969450101833, "grad_norm": 0.7525189007952864, "learning_rate": 1.3805022843636162e-06, "loss": 0.3057, "step": 16902 }, { "epoch": 0.7650147092102285, "grad_norm": 0.2652382252083221, "learning_rate": 1.3799966770879936e-06, "loss": 0.455, "step": 16903 }, { "epoch": 0.7650599683186241, "grad_norm": 0.6639683051494911, "learning_rate": 1.3794911475951229e-06, "loss": 0.3246, "step": 16904 }, { "epoch": 0.7651052274270197, "grad_norm": 0.6142888855102578, "learning_rate": 1.3789856958958692e-06, "loss": 0.2624, "step": 16905 }, { "epoch": 0.7651504865354153, "grad_norm": 0.6536807988841115, "learning_rate": 1.3784803220010906e-06, "loss": 0.299, "step": 16906 }, { "epoch": 0.7651957456438108, "grad_norm": 0.6210388750058032, "learning_rate": 1.3779750259216484e-06, "loss": 0.2996, "step": 16907 }, { "epoch": 0.7652410047522064, "grad_norm": 0.24744556272978027, "learning_rate": 1.377469807668399e-06, "loss": 0.4469, "step": 16908 }, { "epoch": 0.765286263860602, "grad_norm": 0.5780801130034201, "learning_rate": 1.3769646672521964e-06, "loss": 0.2728, "step": 16909 }, { "epoch": 0.7653315229689975, "grad_norm": 0.2845392317576298, "learning_rate": 1.3764596046838951e-06, "loss": 0.5005, "step": 16910 }, { "epoch": 0.7653767820773931, "grad_norm": 0.6439653211486774, "learning_rate": 1.3759546199743518e-06, "loss": 0.2606, "step": 16911 }, { "epoch": 0.7654220411857886, "grad_norm": 0.6052871614791017, "learning_rate": 1.3754497131344097e-06, "loss": 0.2423, "step": 16912 }, { "epoch": 0.7654673002941842, "grad_norm": 0.6858402042648036, "learning_rate": 1.3749448841749213e-06, "loss": 0.289, "step": 16913 }, { "epoch": 0.7655125594025798, "grad_norm": 0.6380469222281904, "learning_rate": 1.3744401331067358e-06, "loss": 0.321, "step": 16914 }, { "epoch": 0.7655578185109754, "grad_norm": 0.6318465111490466, "learning_rate": 1.3739354599406969e-06, "loss": 0.2968, "step": 16915 }, { "epoch": 0.7656030776193709, "grad_norm": 0.27806466123117685, "learning_rate": 1.373430864687646e-06, "loss": 0.4759, "step": 16916 }, { "epoch": 0.7656483367277664, "grad_norm": 0.559100606385692, "learning_rate": 1.3729263473584281e-06, "loss": 0.2649, "step": 16917 }, { "epoch": 0.765693595836162, "grad_norm": 0.5835671908916155, "learning_rate": 1.372421907963885e-06, "loss": 0.2895, "step": 16918 }, { "epoch": 0.7657388549445576, "grad_norm": 0.5678136911278053, "learning_rate": 1.3719175465148538e-06, "loss": 0.3081, "step": 16919 }, { "epoch": 0.7657841140529531, "grad_norm": 0.6582775454018125, "learning_rate": 1.3714132630221699e-06, "loss": 0.3243, "step": 16920 }, { "epoch": 0.7658293731613487, "grad_norm": 0.6572142174436094, "learning_rate": 1.3709090574966726e-06, "loss": 0.288, "step": 16921 }, { "epoch": 0.7658746322697443, "grad_norm": 0.5759000279508841, "learning_rate": 1.3704049299491923e-06, "loss": 0.2615, "step": 16922 }, { "epoch": 0.7659198913781399, "grad_norm": 0.9477769156689875, "learning_rate": 1.3699008803905633e-06, "loss": 0.2983, "step": 16923 }, { "epoch": 0.7659651504865355, "grad_norm": 0.28476611007507796, "learning_rate": 1.369396908831616e-06, "loss": 0.4754, "step": 16924 }, { "epoch": 0.7660104095949309, "grad_norm": 0.5858814659736541, "learning_rate": 1.368893015283177e-06, "loss": 0.3237, "step": 16925 }, { "epoch": 0.7660556687033265, "grad_norm": 0.2752287362306891, "learning_rate": 1.368389199756075e-06, "loss": 0.4561, "step": 16926 }, { "epoch": 0.7661009278117221, "grad_norm": 0.8509358285265931, "learning_rate": 1.3678854622611371e-06, "loss": 0.2647, "step": 16927 }, { "epoch": 0.7661461869201177, "grad_norm": 0.5731027176137438, "learning_rate": 1.367381802809185e-06, "loss": 0.2603, "step": 16928 }, { "epoch": 0.7661914460285132, "grad_norm": 0.6125266417761187, "learning_rate": 1.3668782214110404e-06, "loss": 0.3007, "step": 16929 }, { "epoch": 0.7662367051369088, "grad_norm": 0.6363985154525227, "learning_rate": 1.3663747180775238e-06, "loss": 0.3023, "step": 16930 }, { "epoch": 0.7662819642453044, "grad_norm": 0.5990897495045766, "learning_rate": 1.3658712928194567e-06, "loss": 0.2762, "step": 16931 }, { "epoch": 0.7663272233537, "grad_norm": 0.5934237035423845, "learning_rate": 1.3653679456476536e-06, "loss": 0.3044, "step": 16932 }, { "epoch": 0.7663724824620956, "grad_norm": 0.7283000284782619, "learning_rate": 1.3648646765729295e-06, "loss": 0.334, "step": 16933 }, { "epoch": 0.766417741570491, "grad_norm": 0.609175602263638, "learning_rate": 1.3643614856061005e-06, "loss": 0.2992, "step": 16934 }, { "epoch": 0.7664630006788866, "grad_norm": 0.2539648191964789, "learning_rate": 1.3638583727579752e-06, "loss": 0.4318, "step": 16935 }, { "epoch": 0.7665082597872822, "grad_norm": 0.6069518858266917, "learning_rate": 1.3633553380393677e-06, "loss": 0.3037, "step": 16936 }, { "epoch": 0.7665535188956778, "grad_norm": 0.5627562186944901, "learning_rate": 1.362852381461085e-06, "loss": 0.3155, "step": 16937 }, { "epoch": 0.7665987780040733, "grad_norm": 0.26406261763895233, "learning_rate": 1.3623495030339323e-06, "loss": 0.4702, "step": 16938 }, { "epoch": 0.7666440371124689, "grad_norm": 0.7349242877482646, "learning_rate": 1.3618467027687165e-06, "loss": 0.315, "step": 16939 }, { "epoch": 0.7666892962208645, "grad_norm": 0.6108496294096064, "learning_rate": 1.3613439806762447e-06, "loss": 0.2913, "step": 16940 }, { "epoch": 0.76673455532926, "grad_norm": 0.2741832860992178, "learning_rate": 1.3608413367673123e-06, "loss": 0.4662, "step": 16941 }, { "epoch": 0.7667798144376555, "grad_norm": 0.6248896759210308, "learning_rate": 1.3603387710527228e-06, "loss": 0.2901, "step": 16942 }, { "epoch": 0.7668250735460511, "grad_norm": 0.2945483709669181, "learning_rate": 1.359836283543276e-06, "loss": 0.4719, "step": 16943 }, { "epoch": 0.7668703326544467, "grad_norm": 0.6619790012059299, "learning_rate": 1.3593338742497675e-06, "loss": 0.2947, "step": 16944 }, { "epoch": 0.7669155917628423, "grad_norm": 0.6236601589678317, "learning_rate": 1.3588315431829913e-06, "loss": 0.2854, "step": 16945 }, { "epoch": 0.7669608508712379, "grad_norm": 0.7724796575015, "learning_rate": 1.3583292903537427e-06, "loss": 0.2866, "step": 16946 }, { "epoch": 0.7670061099796334, "grad_norm": 1.0154723266293544, "learning_rate": 1.357827115772814e-06, "loss": 0.2853, "step": 16947 }, { "epoch": 0.767051369088029, "grad_norm": 0.7038822773492349, "learning_rate": 1.3573250194509946e-06, "loss": 0.3171, "step": 16948 }, { "epoch": 0.7670966281964245, "grad_norm": 0.5762294358474576, "learning_rate": 1.3568230013990713e-06, "loss": 0.265, "step": 16949 }, { "epoch": 0.7671418873048201, "grad_norm": 0.5917370820067537, "learning_rate": 1.3563210616278345e-06, "loss": 0.2564, "step": 16950 }, { "epoch": 0.7671871464132156, "grad_norm": 0.277251790379558, "learning_rate": 1.3558192001480652e-06, "loss": 0.4499, "step": 16951 }, { "epoch": 0.7672324055216112, "grad_norm": 0.26052497713959355, "learning_rate": 1.3553174169705507e-06, "loss": 0.4531, "step": 16952 }, { "epoch": 0.7672776646300068, "grad_norm": 0.6220368259883828, "learning_rate": 1.3548157121060718e-06, "loss": 0.2812, "step": 16953 }, { "epoch": 0.7673229237384024, "grad_norm": 0.5294918508547679, "learning_rate": 1.3543140855654058e-06, "loss": 0.257, "step": 16954 }, { "epoch": 0.7673681828467979, "grad_norm": 0.2807578134342027, "learning_rate": 1.3538125373593335e-06, "loss": 0.4716, "step": 16955 }, { "epoch": 0.7674134419551935, "grad_norm": 0.5845163546613862, "learning_rate": 1.3533110674986327e-06, "loss": 0.2835, "step": 16956 }, { "epoch": 0.767458701063589, "grad_norm": 0.5872242276041237, "learning_rate": 1.3528096759940768e-06, "loss": 0.3021, "step": 16957 }, { "epoch": 0.7675039601719846, "grad_norm": 0.6504178575741542, "learning_rate": 1.3523083628564388e-06, "loss": 0.291, "step": 16958 }, { "epoch": 0.7675492192803802, "grad_norm": 0.6391989446126154, "learning_rate": 1.3518071280964901e-06, "loss": 0.2885, "step": 16959 }, { "epoch": 0.7675944783887757, "grad_norm": 0.2884959672495194, "learning_rate": 1.3513059717250037e-06, "loss": 0.4722, "step": 16960 }, { "epoch": 0.7676397374971713, "grad_norm": 0.569592458656659, "learning_rate": 1.3508048937527458e-06, "loss": 0.3074, "step": 16961 }, { "epoch": 0.7676849966055669, "grad_norm": 0.6032703298178237, "learning_rate": 1.3503038941904818e-06, "loss": 0.3544, "step": 16962 }, { "epoch": 0.7677302557139625, "grad_norm": 0.5832410428674883, "learning_rate": 1.3498029730489793e-06, "loss": 0.3116, "step": 16963 }, { "epoch": 0.767775514822358, "grad_norm": 0.6220872943704416, "learning_rate": 1.3493021303389985e-06, "loss": 0.2951, "step": 16964 }, { "epoch": 0.7678207739307535, "grad_norm": 0.89794621665398, "learning_rate": 1.348801366071304e-06, "loss": 0.3117, "step": 16965 }, { "epoch": 0.7678660330391491, "grad_norm": 0.6600777353123349, "learning_rate": 1.3483006802566546e-06, "loss": 0.311, "step": 16966 }, { "epoch": 0.7679112921475447, "grad_norm": 0.6373080824566424, "learning_rate": 1.3478000729058065e-06, "loss": 0.3076, "step": 16967 }, { "epoch": 0.7679565512559403, "grad_norm": 0.2797095518192231, "learning_rate": 1.3472995440295183e-06, "loss": 0.47, "step": 16968 }, { "epoch": 0.7680018103643358, "grad_norm": 0.6633517189792275, "learning_rate": 1.3467990936385478e-06, "loss": 0.2976, "step": 16969 }, { "epoch": 0.7680470694727314, "grad_norm": 0.6534031456071727, "learning_rate": 1.3462987217436412e-06, "loss": 0.3041, "step": 16970 }, { "epoch": 0.768092328581127, "grad_norm": 0.6053512025354313, "learning_rate": 1.3457984283555536e-06, "loss": 0.3258, "step": 16971 }, { "epoch": 0.7681375876895226, "grad_norm": 0.5653073402346999, "learning_rate": 1.345298213485035e-06, "loss": 0.3381, "step": 16972 }, { "epoch": 0.768182846797918, "grad_norm": 0.5950416465028642, "learning_rate": 1.344798077142836e-06, "loss": 0.2806, "step": 16973 }, { "epoch": 0.7682281059063136, "grad_norm": 0.6699224940747642, "learning_rate": 1.3442980193396976e-06, "loss": 0.2696, "step": 16974 }, { "epoch": 0.7682733650147092, "grad_norm": 0.5930106756206643, "learning_rate": 1.3437980400863671e-06, "loss": 0.3147, "step": 16975 }, { "epoch": 0.7683186241231048, "grad_norm": 0.6205814856898128, "learning_rate": 1.3432981393935885e-06, "loss": 0.3014, "step": 16976 }, { "epoch": 0.7683638832315003, "grad_norm": 0.6479243058495224, "learning_rate": 1.3427983172721026e-06, "loss": 0.3099, "step": 16977 }, { "epoch": 0.7684091423398959, "grad_norm": 0.6434276617349629, "learning_rate": 1.3422985737326471e-06, "loss": 0.2738, "step": 16978 }, { "epoch": 0.7684544014482915, "grad_norm": 0.6253202570490182, "learning_rate": 1.3417989087859628e-06, "loss": 0.2903, "step": 16979 }, { "epoch": 0.7684996605566871, "grad_norm": 0.6205806187921971, "learning_rate": 1.3412993224427834e-06, "loss": 0.2788, "step": 16980 }, { "epoch": 0.7685449196650826, "grad_norm": 0.28114508109508995, "learning_rate": 1.3407998147138462e-06, "loss": 0.4619, "step": 16981 }, { "epoch": 0.7685901787734781, "grad_norm": 0.6156844052315181, "learning_rate": 1.3403003856098823e-06, "loss": 0.2637, "step": 16982 }, { "epoch": 0.7686354378818737, "grad_norm": 0.5597348557261431, "learning_rate": 1.339801035141622e-06, "loss": 0.3111, "step": 16983 }, { "epoch": 0.7686806969902693, "grad_norm": 0.2478413630444913, "learning_rate": 1.3393017633197958e-06, "loss": 0.4477, "step": 16984 }, { "epoch": 0.7687259560986649, "grad_norm": 0.6558781901080365, "learning_rate": 1.3388025701551339e-06, "loss": 0.2989, "step": 16985 }, { "epoch": 0.7687712152070604, "grad_norm": 0.6440251359091363, "learning_rate": 1.3383034556583596e-06, "loss": 0.2987, "step": 16986 }, { "epoch": 0.768816474315456, "grad_norm": 0.646678875565266, "learning_rate": 1.3378044198401963e-06, "loss": 0.3121, "step": 16987 }, { "epoch": 0.7688617334238516, "grad_norm": 0.596718918220146, "learning_rate": 1.337305462711369e-06, "loss": 0.2762, "step": 16988 }, { "epoch": 0.7689069925322471, "grad_norm": 0.6003635530165137, "learning_rate": 1.3368065842825994e-06, "loss": 0.2831, "step": 16989 }, { "epoch": 0.7689522516406426, "grad_norm": 0.25469878483264125, "learning_rate": 1.3363077845646056e-06, "loss": 0.4325, "step": 16990 }, { "epoch": 0.7689975107490382, "grad_norm": 0.5622020283876651, "learning_rate": 1.3358090635681043e-06, "loss": 0.244, "step": 16991 }, { "epoch": 0.7690427698574338, "grad_norm": 0.6387750462435204, "learning_rate": 1.335310421303813e-06, "loss": 0.2878, "step": 16992 }, { "epoch": 0.7690880289658294, "grad_norm": 0.7494803556084741, "learning_rate": 1.3348118577824448e-06, "loss": 0.2549, "step": 16993 }, { "epoch": 0.769133288074225, "grad_norm": 0.2880711269974684, "learning_rate": 1.3343133730147144e-06, "loss": 0.4734, "step": 16994 }, { "epoch": 0.7691785471826205, "grad_norm": 0.7187617476015365, "learning_rate": 1.3338149670113314e-06, "loss": 0.2833, "step": 16995 }, { "epoch": 0.769223806291016, "grad_norm": 0.6144067884129695, "learning_rate": 1.3333166397830033e-06, "loss": 0.2975, "step": 16996 }, { "epoch": 0.7692690653994116, "grad_norm": 0.6488198429125233, "learning_rate": 1.3328183913404396e-06, "loss": 0.2951, "step": 16997 }, { "epoch": 0.7693143245078072, "grad_norm": 0.6572369985639023, "learning_rate": 1.3323202216943488e-06, "loss": 0.2879, "step": 16998 }, { "epoch": 0.7693595836162027, "grad_norm": 0.5623155598704372, "learning_rate": 1.3318221308554287e-06, "loss": 0.297, "step": 16999 }, { "epoch": 0.7694048427245983, "grad_norm": 0.590359266967176, "learning_rate": 1.3313241188343845e-06, "loss": 0.282, "step": 17000 }, { "epoch": 0.7694501018329939, "grad_norm": 0.6139718057304095, "learning_rate": 1.330826185641918e-06, "loss": 0.2862, "step": 17001 }, { "epoch": 0.7694953609413895, "grad_norm": 0.6207758725197622, "learning_rate": 1.330328331288731e-06, "loss": 0.2734, "step": 17002 }, { "epoch": 0.7695406200497851, "grad_norm": 0.5562841534231531, "learning_rate": 1.3298305557855146e-06, "loss": 0.2743, "step": 17003 }, { "epoch": 0.7695858791581806, "grad_norm": 0.6279572767987115, "learning_rate": 1.329332859142967e-06, "loss": 0.3076, "step": 17004 }, { "epoch": 0.7696311382665761, "grad_norm": 0.5915825930214385, "learning_rate": 1.3288352413717847e-06, "loss": 0.3034, "step": 17005 }, { "epoch": 0.7696763973749717, "grad_norm": 0.5942031851275636, "learning_rate": 1.3283377024826576e-06, "loss": 0.3188, "step": 17006 }, { "epoch": 0.7697216564833673, "grad_norm": 0.276941718944032, "learning_rate": 1.3278402424862758e-06, "loss": 0.4766, "step": 17007 }, { "epoch": 0.7697669155917628, "grad_norm": 0.6329108071302986, "learning_rate": 1.3273428613933298e-06, "loss": 0.2926, "step": 17008 }, { "epoch": 0.7698121747001584, "grad_norm": 0.5793166555741998, "learning_rate": 1.3268455592145047e-06, "loss": 0.3074, "step": 17009 }, { "epoch": 0.769857433808554, "grad_norm": 0.25748776118581745, "learning_rate": 1.3263483359604884e-06, "loss": 0.4525, "step": 17010 }, { "epoch": 0.7699026929169496, "grad_norm": 0.5655961201333015, "learning_rate": 1.3258511916419641e-06, "loss": 0.3125, "step": 17011 }, { "epoch": 0.769947952025345, "grad_norm": 0.5823148605766695, "learning_rate": 1.3253541262696117e-06, "loss": 0.2969, "step": 17012 }, { "epoch": 0.7699932111337406, "grad_norm": 0.6444920412097761, "learning_rate": 1.3248571398541138e-06, "loss": 0.2965, "step": 17013 }, { "epoch": 0.7700384702421362, "grad_norm": 0.2839524981208011, "learning_rate": 1.3243602324061495e-06, "loss": 0.4673, "step": 17014 }, { "epoch": 0.7700837293505318, "grad_norm": 0.6402337676488344, "learning_rate": 1.3238634039363952e-06, "loss": 0.3205, "step": 17015 }, { "epoch": 0.7701289884589274, "grad_norm": 0.6372879682518151, "learning_rate": 1.3233666544555246e-06, "loss": 0.3168, "step": 17016 }, { "epoch": 0.7701742475673229, "grad_norm": 0.6019808709883003, "learning_rate": 1.3228699839742125e-06, "loss": 0.3091, "step": 17017 }, { "epoch": 0.7702195066757185, "grad_norm": 0.7257516660275176, "learning_rate": 1.3223733925031324e-06, "loss": 0.3478, "step": 17018 }, { "epoch": 0.7702647657841141, "grad_norm": 0.6080211785313477, "learning_rate": 1.321876880052953e-06, "loss": 0.2837, "step": 17019 }, { "epoch": 0.7703100248925097, "grad_norm": 0.2720508001144198, "learning_rate": 1.321380446634342e-06, "loss": 0.4872, "step": 17020 }, { "epoch": 0.7703552840009051, "grad_norm": 0.5971448911188646, "learning_rate": 1.3208840922579686e-06, "loss": 0.3264, "step": 17021 }, { "epoch": 0.7704005431093007, "grad_norm": 0.6620853507211729, "learning_rate": 1.3203878169344948e-06, "loss": 0.3739, "step": 17022 }, { "epoch": 0.7704458022176963, "grad_norm": 0.6750399270932413, "learning_rate": 1.3198916206745871e-06, "loss": 0.2813, "step": 17023 }, { "epoch": 0.7704910613260919, "grad_norm": 0.6677213392981335, "learning_rate": 1.3193955034889056e-06, "loss": 0.304, "step": 17024 }, { "epoch": 0.7705363204344874, "grad_norm": 0.26104906765845687, "learning_rate": 1.31889946538811e-06, "loss": 0.4523, "step": 17025 }, { "epoch": 0.770581579542883, "grad_norm": 0.6332788248079336, "learning_rate": 1.3184035063828586e-06, "loss": 0.3088, "step": 17026 }, { "epoch": 0.7706268386512786, "grad_norm": 0.5982813398175602, "learning_rate": 1.3179076264838102e-06, "loss": 0.2801, "step": 17027 }, { "epoch": 0.7706720977596742, "grad_norm": 0.6142886930313358, "learning_rate": 1.3174118257016182e-06, "loss": 0.3024, "step": 17028 }, { "epoch": 0.7707173568680697, "grad_norm": 0.5813833619991972, "learning_rate": 1.3169161040469347e-06, "loss": 0.2989, "step": 17029 }, { "epoch": 0.7707626159764652, "grad_norm": 0.5807463456591303, "learning_rate": 1.316420461530412e-06, "loss": 0.2915, "step": 17030 }, { "epoch": 0.7708078750848608, "grad_norm": 0.6038158844407124, "learning_rate": 1.3159248981627026e-06, "loss": 0.3041, "step": 17031 }, { "epoch": 0.7708531341932564, "grad_norm": 0.6905869638067237, "learning_rate": 1.3154294139544516e-06, "loss": 0.321, "step": 17032 }, { "epoch": 0.770898393301652, "grad_norm": 0.587729403920332, "learning_rate": 1.3149340089163048e-06, "loss": 0.3035, "step": 17033 }, { "epoch": 0.7709436524100475, "grad_norm": 0.5984624721440018, "learning_rate": 1.3144386830589102e-06, "loss": 0.2999, "step": 17034 }, { "epoch": 0.7709889115184431, "grad_norm": 0.5938178505981546, "learning_rate": 1.3139434363929088e-06, "loss": 0.3004, "step": 17035 }, { "epoch": 0.7710341706268387, "grad_norm": 0.6211449043197764, "learning_rate": 1.3134482689289408e-06, "loss": 0.2901, "step": 17036 }, { "epoch": 0.7710794297352342, "grad_norm": 0.6013448927897931, "learning_rate": 1.312953180677648e-06, "loss": 0.3086, "step": 17037 }, { "epoch": 0.7711246888436298, "grad_norm": 0.5856091578020052, "learning_rate": 1.3124581716496666e-06, "loss": 0.2874, "step": 17038 }, { "epoch": 0.7711699479520253, "grad_norm": 0.606121887689741, "learning_rate": 1.3119632418556344e-06, "loss": 0.3335, "step": 17039 }, { "epoch": 0.7712152070604209, "grad_norm": 0.610090598311767, "learning_rate": 1.311468391306186e-06, "loss": 0.2584, "step": 17040 }, { "epoch": 0.7712604661688165, "grad_norm": 0.6136849850789233, "learning_rate": 1.3109736200119517e-06, "loss": 0.3297, "step": 17041 }, { "epoch": 0.7713057252772121, "grad_norm": 0.6267240457586536, "learning_rate": 1.310478927983564e-06, "loss": 0.2766, "step": 17042 }, { "epoch": 0.7713509843856076, "grad_norm": 0.6977661479268513, "learning_rate": 1.3099843152316543e-06, "loss": 0.293, "step": 17043 }, { "epoch": 0.7713962434940032, "grad_norm": 0.2869785079670655, "learning_rate": 1.309489781766849e-06, "loss": 0.4703, "step": 17044 }, { "epoch": 0.7714415026023987, "grad_norm": 0.6219557466552592, "learning_rate": 1.308995327599772e-06, "loss": 0.2609, "step": 17045 }, { "epoch": 0.7714867617107943, "grad_norm": 0.2754491484448643, "learning_rate": 1.3085009527410491e-06, "loss": 0.486, "step": 17046 }, { "epoch": 0.7715320208191898, "grad_norm": 0.6285882729758558, "learning_rate": 1.3080066572013045e-06, "loss": 0.3062, "step": 17047 }, { "epoch": 0.7715772799275854, "grad_norm": 0.6209444308060075, "learning_rate": 1.3075124409911584e-06, "loss": 0.3307, "step": 17048 }, { "epoch": 0.771622539035981, "grad_norm": 0.608458141857825, "learning_rate": 1.3070183041212276e-06, "loss": 0.3477, "step": 17049 }, { "epoch": 0.7716677981443766, "grad_norm": 0.5640585393302556, "learning_rate": 1.3065242466021328e-06, "loss": 0.2742, "step": 17050 }, { "epoch": 0.7717130572527722, "grad_norm": 0.6017392265311926, "learning_rate": 1.3060302684444864e-06, "loss": 0.2725, "step": 17051 }, { "epoch": 0.7717583163611677, "grad_norm": 0.6761743285746282, "learning_rate": 1.3055363696589062e-06, "loss": 0.3202, "step": 17052 }, { "epoch": 0.7718035754695632, "grad_norm": 0.3254544022283287, "learning_rate": 1.3050425502560028e-06, "loss": 0.4524, "step": 17053 }, { "epoch": 0.7718488345779588, "grad_norm": 0.6596226760850494, "learning_rate": 1.3045488102463856e-06, "loss": 0.3541, "step": 17054 }, { "epoch": 0.7718940936863544, "grad_norm": 0.6610524550640676, "learning_rate": 1.304055149640664e-06, "loss": 0.3441, "step": 17055 }, { "epoch": 0.7719393527947499, "grad_norm": 0.613461336777729, "learning_rate": 1.303561568449448e-06, "loss": 0.3042, "step": 17056 }, { "epoch": 0.7719846119031455, "grad_norm": 0.6389856450153001, "learning_rate": 1.3030680666833411e-06, "loss": 0.3254, "step": 17057 }, { "epoch": 0.7720298710115411, "grad_norm": 0.5882841282729346, "learning_rate": 1.3025746443529459e-06, "loss": 0.2951, "step": 17058 }, { "epoch": 0.7720751301199367, "grad_norm": 0.5547272609744122, "learning_rate": 1.302081301468865e-06, "loss": 0.2637, "step": 17059 }, { "epoch": 0.7721203892283321, "grad_norm": 0.5719637599550884, "learning_rate": 1.3015880380417017e-06, "loss": 0.2698, "step": 17060 }, { "epoch": 0.7721656483367277, "grad_norm": 0.6128984698011726, "learning_rate": 1.3010948540820528e-06, "loss": 0.3142, "step": 17061 }, { "epoch": 0.7722109074451233, "grad_norm": 0.7081009445258358, "learning_rate": 1.3006017496005135e-06, "loss": 0.3351, "step": 17062 }, { "epoch": 0.7722561665535189, "grad_norm": 0.280580376477097, "learning_rate": 1.3001087246076821e-06, "loss": 0.4538, "step": 17063 }, { "epoch": 0.7723014256619145, "grad_norm": 0.7330898678125246, "learning_rate": 1.29961577911415e-06, "loss": 0.3027, "step": 17064 }, { "epoch": 0.77234668477031, "grad_norm": 0.5807060894420104, "learning_rate": 1.2991229131305106e-06, "loss": 0.2733, "step": 17065 }, { "epoch": 0.7723919438787056, "grad_norm": 0.27381713925430146, "learning_rate": 1.298630126667354e-06, "loss": 0.4627, "step": 17066 }, { "epoch": 0.7724372029871012, "grad_norm": 0.70382379688019, "learning_rate": 1.2981374197352663e-06, "loss": 0.2926, "step": 17067 }, { "epoch": 0.7724824620954968, "grad_norm": 0.5869086110499759, "learning_rate": 1.2976447923448376e-06, "loss": 0.278, "step": 17068 }, { "epoch": 0.7725277212038922, "grad_norm": 0.26054148489340934, "learning_rate": 1.2971522445066515e-06, "loss": 0.4378, "step": 17069 }, { "epoch": 0.7725729803122878, "grad_norm": 0.7147306754379935, "learning_rate": 1.29665977623129e-06, "loss": 0.3465, "step": 17070 }, { "epoch": 0.7726182394206834, "grad_norm": 0.6196791744507667, "learning_rate": 1.2961673875293352e-06, "loss": 0.287, "step": 17071 }, { "epoch": 0.772663498529079, "grad_norm": 0.7222106900360292, "learning_rate": 1.2956750784113698e-06, "loss": 0.3465, "step": 17072 }, { "epoch": 0.7727087576374746, "grad_norm": 0.6263213043582183, "learning_rate": 1.2951828488879702e-06, "loss": 0.3168, "step": 17073 }, { "epoch": 0.7727540167458701, "grad_norm": 0.601989846349521, "learning_rate": 1.2946906989697106e-06, "loss": 0.3167, "step": 17074 }, { "epoch": 0.7727992758542657, "grad_norm": 0.6061256602703506, "learning_rate": 1.2941986286671682e-06, "loss": 0.3222, "step": 17075 }, { "epoch": 0.7728445349626613, "grad_norm": 0.6098812345161176, "learning_rate": 1.2937066379909174e-06, "loss": 0.3075, "step": 17076 }, { "epoch": 0.7728897940710568, "grad_norm": 0.577058215520092, "learning_rate": 1.2932147269515278e-06, "loss": 0.2862, "step": 17077 }, { "epoch": 0.7729350531794523, "grad_norm": 0.7320423453045078, "learning_rate": 1.2927228955595678e-06, "loss": 0.2623, "step": 17078 }, { "epoch": 0.7729803122878479, "grad_norm": 0.634616556189686, "learning_rate": 1.292231143825608e-06, "loss": 0.2689, "step": 17079 }, { "epoch": 0.7730255713962435, "grad_norm": 0.6614808934275564, "learning_rate": 1.2917394717602123e-06, "loss": 0.2991, "step": 17080 }, { "epoch": 0.7730708305046391, "grad_norm": 0.6504693596242206, "learning_rate": 1.2912478793739474e-06, "loss": 0.2944, "step": 17081 }, { "epoch": 0.7731160896130346, "grad_norm": 0.6111587590059084, "learning_rate": 1.2907563666773753e-06, "loss": 0.2904, "step": 17082 }, { "epoch": 0.7731613487214302, "grad_norm": 0.6120474107293098, "learning_rate": 1.2902649336810553e-06, "loss": 0.3246, "step": 17083 }, { "epoch": 0.7732066078298258, "grad_norm": 0.6841406326328182, "learning_rate": 1.289773580395548e-06, "loss": 0.2907, "step": 17084 }, { "epoch": 0.7732518669382213, "grad_norm": 0.6543032317655777, "learning_rate": 1.289282306831413e-06, "loss": 0.3093, "step": 17085 }, { "epoch": 0.7732971260466169, "grad_norm": 0.6332293830460849, "learning_rate": 1.2887911129992047e-06, "loss": 0.2928, "step": 17086 }, { "epoch": 0.7733423851550124, "grad_norm": 0.6293863668690138, "learning_rate": 1.2882999989094758e-06, "loss": 0.2799, "step": 17087 }, { "epoch": 0.773387644263408, "grad_norm": 0.6591978501859447, "learning_rate": 1.2878089645727803e-06, "loss": 0.3173, "step": 17088 }, { "epoch": 0.7734329033718036, "grad_norm": 0.6410972269223547, "learning_rate": 1.2873180099996701e-06, "loss": 0.3301, "step": 17089 }, { "epoch": 0.7734781624801992, "grad_norm": 0.9874183972194387, "learning_rate": 1.2868271352006938e-06, "loss": 0.2957, "step": 17090 }, { "epoch": 0.7735234215885947, "grad_norm": 0.6636071316751236, "learning_rate": 1.2863363401863966e-06, "loss": 0.2871, "step": 17091 }, { "epoch": 0.7735686806969903, "grad_norm": 0.5968374672452968, "learning_rate": 1.2858456249673268e-06, "loss": 0.2966, "step": 17092 }, { "epoch": 0.7736139398053858, "grad_norm": 0.6203246444497553, "learning_rate": 1.2853549895540268e-06, "loss": 0.3091, "step": 17093 }, { "epoch": 0.7736591989137814, "grad_norm": 0.5971734328529894, "learning_rate": 1.2848644339570403e-06, "loss": 0.3399, "step": 17094 }, { "epoch": 0.7737044580221769, "grad_norm": 0.7651834237145628, "learning_rate": 1.2843739581869068e-06, "loss": 0.3061, "step": 17095 }, { "epoch": 0.7737497171305725, "grad_norm": 0.63633869631367, "learning_rate": 1.283883562254164e-06, "loss": 0.2688, "step": 17096 }, { "epoch": 0.7737949762389681, "grad_norm": 0.6051754070221738, "learning_rate": 1.2833932461693504e-06, "loss": 0.2692, "step": 17097 }, { "epoch": 0.7738402353473637, "grad_norm": 0.592598034098679, "learning_rate": 1.282903009943004e-06, "loss": 0.3354, "step": 17098 }, { "epoch": 0.7738854944557593, "grad_norm": 0.624401314189695, "learning_rate": 1.282412853585653e-06, "loss": 0.3162, "step": 17099 }, { "epoch": 0.7739307535641547, "grad_norm": 0.7322262936851486, "learning_rate": 1.2819227771078318e-06, "loss": 0.2703, "step": 17100 }, { "epoch": 0.7739760126725503, "grad_norm": 0.6266047926837723, "learning_rate": 1.281432780520071e-06, "loss": 0.2636, "step": 17101 }, { "epoch": 0.7740212717809459, "grad_norm": 0.6831300671044548, "learning_rate": 1.280942863832902e-06, "loss": 0.2694, "step": 17102 }, { "epoch": 0.7740665308893415, "grad_norm": 0.799933249191529, "learning_rate": 1.280453027056846e-06, "loss": 0.2572, "step": 17103 }, { "epoch": 0.774111789997737, "grad_norm": 0.28749663474539183, "learning_rate": 1.2799632702024307e-06, "loss": 0.4234, "step": 17104 }, { "epoch": 0.7741570491061326, "grad_norm": 0.6069052169608677, "learning_rate": 1.2794735932801805e-06, "loss": 0.2805, "step": 17105 }, { "epoch": 0.7742023082145282, "grad_norm": 0.28689733822411023, "learning_rate": 1.2789839963006161e-06, "loss": 0.4686, "step": 17106 }, { "epoch": 0.7742475673229238, "grad_norm": 0.5960346646546882, "learning_rate": 1.278494479274256e-06, "loss": 0.3029, "step": 17107 }, { "epoch": 0.7742928264313192, "grad_norm": 0.6323977456256337, "learning_rate": 1.2780050422116214e-06, "loss": 0.3479, "step": 17108 }, { "epoch": 0.7743380855397148, "grad_norm": 0.6001880736032134, "learning_rate": 1.2775156851232262e-06, "loss": 0.2833, "step": 17109 }, { "epoch": 0.7743833446481104, "grad_norm": 0.6834787514837493, "learning_rate": 1.277026408019587e-06, "loss": 0.2696, "step": 17110 }, { "epoch": 0.774428603756506, "grad_norm": 0.5895092584163092, "learning_rate": 1.276537210911216e-06, "loss": 0.2604, "step": 17111 }, { "epoch": 0.7744738628649016, "grad_norm": 0.5966485169919734, "learning_rate": 1.2760480938086234e-06, "loss": 0.3136, "step": 17112 }, { "epoch": 0.7745191219732971, "grad_norm": 0.6310195151175784, "learning_rate": 1.2755590567223203e-06, "loss": 0.2904, "step": 17113 }, { "epoch": 0.7745643810816927, "grad_norm": 0.6517472902360024, "learning_rate": 1.275070099662815e-06, "loss": 0.3208, "step": 17114 }, { "epoch": 0.7746096401900883, "grad_norm": 0.6227530110658003, "learning_rate": 1.274581222640614e-06, "loss": 0.3065, "step": 17115 }, { "epoch": 0.7746548992984839, "grad_norm": 0.6298855001744631, "learning_rate": 1.2740924256662185e-06, "loss": 0.2588, "step": 17116 }, { "epoch": 0.7747001584068793, "grad_norm": 0.610395970451911, "learning_rate": 1.2736037087501342e-06, "loss": 0.2841, "step": 17117 }, { "epoch": 0.7747454175152749, "grad_norm": 0.6716630549579216, "learning_rate": 1.2731150719028622e-06, "loss": 0.2988, "step": 17118 }, { "epoch": 0.7747906766236705, "grad_norm": 0.6214868094767717, "learning_rate": 1.2726265151349015e-06, "loss": 0.3546, "step": 17119 }, { "epoch": 0.7748359357320661, "grad_norm": 0.5840971481190741, "learning_rate": 1.2721380384567477e-06, "loss": 0.3065, "step": 17120 }, { "epoch": 0.7748811948404617, "grad_norm": 0.5838822957571945, "learning_rate": 1.2716496418788998e-06, "loss": 0.2748, "step": 17121 }, { "epoch": 0.7749264539488572, "grad_norm": 0.6504767470495529, "learning_rate": 1.2711613254118482e-06, "loss": 0.3055, "step": 17122 }, { "epoch": 0.7749717130572528, "grad_norm": 0.5934252736975144, "learning_rate": 1.2706730890660896e-06, "loss": 0.291, "step": 17123 }, { "epoch": 0.7750169721656484, "grad_norm": 0.6088252559614474, "learning_rate": 1.2701849328521127e-06, "loss": 0.3175, "step": 17124 }, { "epoch": 0.775062231274044, "grad_norm": 0.6474140755106753, "learning_rate": 1.2696968567804042e-06, "loss": 0.3136, "step": 17125 }, { "epoch": 0.7751074903824394, "grad_norm": 0.5741135972312951, "learning_rate": 1.269208860861454e-06, "loss": 0.2631, "step": 17126 }, { "epoch": 0.775152749490835, "grad_norm": 0.7014835529371155, "learning_rate": 1.2687209451057498e-06, "loss": 0.3057, "step": 17127 }, { "epoch": 0.7751980085992306, "grad_norm": 0.6135637429616104, "learning_rate": 1.26823310952377e-06, "loss": 0.3084, "step": 17128 }, { "epoch": 0.7752432677076262, "grad_norm": 0.7016143908046977, "learning_rate": 1.2677453541259993e-06, "loss": 0.3007, "step": 17129 }, { "epoch": 0.7752885268160217, "grad_norm": 0.29424487408614314, "learning_rate": 1.2672576789229186e-06, "loss": 0.4599, "step": 17130 }, { "epoch": 0.7753337859244173, "grad_norm": 0.6311842885262616, "learning_rate": 1.2667700839250086e-06, "loss": 0.2971, "step": 17131 }, { "epoch": 0.7753790450328129, "grad_norm": 0.5907732570227133, "learning_rate": 1.266282569142741e-06, "loss": 0.2783, "step": 17132 }, { "epoch": 0.7754243041412084, "grad_norm": 0.6576672305095088, "learning_rate": 1.2657951345865938e-06, "loss": 0.2876, "step": 17133 }, { "epoch": 0.775469563249604, "grad_norm": 0.8345840354859797, "learning_rate": 1.2653077802670416e-06, "loss": 0.3081, "step": 17134 }, { "epoch": 0.7755148223579995, "grad_norm": 0.6008978189685654, "learning_rate": 1.264820506194555e-06, "loss": 0.3252, "step": 17135 }, { "epoch": 0.7755600814663951, "grad_norm": 0.2687867028456543, "learning_rate": 1.2643333123796025e-06, "loss": 0.4631, "step": 17136 }, { "epoch": 0.7756053405747907, "grad_norm": 0.6555826987070684, "learning_rate": 1.2638461988326556e-06, "loss": 0.2904, "step": 17137 }, { "epoch": 0.7756505996831863, "grad_norm": 0.609080407444969, "learning_rate": 1.263359165564178e-06, "loss": 0.2812, "step": 17138 }, { "epoch": 0.7756958587915818, "grad_norm": 1.2277354180594564, "learning_rate": 1.2628722125846365e-06, "loss": 0.3462, "step": 17139 }, { "epoch": 0.7757411178999774, "grad_norm": 0.5927932744795882, "learning_rate": 1.2623853399044938e-06, "loss": 0.2616, "step": 17140 }, { "epoch": 0.7757863770083729, "grad_norm": 0.5984460545486344, "learning_rate": 1.2618985475342093e-06, "loss": 0.2858, "step": 17141 }, { "epoch": 0.7758316361167685, "grad_norm": 0.59031256302671, "learning_rate": 1.2614118354842447e-06, "loss": 0.2662, "step": 17142 }, { "epoch": 0.775876895225164, "grad_norm": 0.7025175929736768, "learning_rate": 1.2609252037650587e-06, "loss": 0.3013, "step": 17143 }, { "epoch": 0.7759221543335596, "grad_norm": 0.643701224946021, "learning_rate": 1.2604386523871064e-06, "loss": 0.2853, "step": 17144 }, { "epoch": 0.7759674134419552, "grad_norm": 0.5989556928627308, "learning_rate": 1.2599521813608412e-06, "loss": 0.2928, "step": 17145 }, { "epoch": 0.7760126725503508, "grad_norm": 0.2845253593714727, "learning_rate": 1.2594657906967161e-06, "loss": 0.4847, "step": 17146 }, { "epoch": 0.7760579316587464, "grad_norm": 0.6871974893336779, "learning_rate": 1.2589794804051852e-06, "loss": 0.3145, "step": 17147 }, { "epoch": 0.7761031907671418, "grad_norm": 0.5784810860714322, "learning_rate": 1.2584932504966952e-06, "loss": 0.3174, "step": 17148 }, { "epoch": 0.7761484498755374, "grad_norm": 0.6567586672835883, "learning_rate": 1.258007100981693e-06, "loss": 0.3097, "step": 17149 }, { "epoch": 0.776193708983933, "grad_norm": 0.6514163489561462, "learning_rate": 1.2575210318706266e-06, "loss": 0.2711, "step": 17150 }, { "epoch": 0.7762389680923286, "grad_norm": 0.6198600650648628, "learning_rate": 1.2570350431739382e-06, "loss": 0.3026, "step": 17151 }, { "epoch": 0.7762842272007241, "grad_norm": 0.6841175367625959, "learning_rate": 1.256549134902072e-06, "loss": 0.2564, "step": 17152 }, { "epoch": 0.7763294863091197, "grad_norm": 0.6208186416223886, "learning_rate": 1.2560633070654677e-06, "loss": 0.3106, "step": 17153 }, { "epoch": 0.7763747454175153, "grad_norm": 0.592246840762376, "learning_rate": 1.2555775596745628e-06, "loss": 0.2898, "step": 17154 }, { "epoch": 0.7764200045259109, "grad_norm": 0.6202578708073626, "learning_rate": 1.2550918927397965e-06, "loss": 0.2798, "step": 17155 }, { "epoch": 0.7764652636343065, "grad_norm": 0.2673820938547034, "learning_rate": 1.2546063062716069e-06, "loss": 0.4716, "step": 17156 }, { "epoch": 0.7765105227427019, "grad_norm": 0.2625151895796911, "learning_rate": 1.2541208002804211e-06, "loss": 0.4611, "step": 17157 }, { "epoch": 0.7765557818510975, "grad_norm": 0.3058334961048853, "learning_rate": 1.253635374776675e-06, "loss": 0.4789, "step": 17158 }, { "epoch": 0.7766010409594931, "grad_norm": 0.6107794870209484, "learning_rate": 1.2531500297707987e-06, "loss": 0.3129, "step": 17159 }, { "epoch": 0.7766463000678887, "grad_norm": 0.5760173758350476, "learning_rate": 1.2526647652732233e-06, "loss": 0.2955, "step": 17160 }, { "epoch": 0.7766915591762842, "grad_norm": 0.6996473534148662, "learning_rate": 1.2521795812943704e-06, "loss": 0.3089, "step": 17161 }, { "epoch": 0.7767368182846798, "grad_norm": 0.6170363776126002, "learning_rate": 1.2516944778446676e-06, "loss": 0.3142, "step": 17162 }, { "epoch": 0.7767820773930754, "grad_norm": 0.2843270946939196, "learning_rate": 1.2512094549345399e-06, "loss": 0.4601, "step": 17163 }, { "epoch": 0.776827336501471, "grad_norm": 0.6367604001768401, "learning_rate": 1.2507245125744077e-06, "loss": 0.3181, "step": 17164 }, { "epoch": 0.7768725956098664, "grad_norm": 0.5898394959517429, "learning_rate": 1.2502396507746889e-06, "loss": 0.2979, "step": 17165 }, { "epoch": 0.776917854718262, "grad_norm": 0.6324469738057149, "learning_rate": 1.2497548695458051e-06, "loss": 0.3047, "step": 17166 }, { "epoch": 0.7769631138266576, "grad_norm": 0.5924126772432989, "learning_rate": 1.24927016889817e-06, "loss": 0.29, "step": 17167 }, { "epoch": 0.7770083729350532, "grad_norm": 0.274682558773357, "learning_rate": 1.2487855488422007e-06, "loss": 0.4564, "step": 17168 }, { "epoch": 0.7770536320434488, "grad_norm": 0.5996590732612687, "learning_rate": 1.2483010093883086e-06, "loss": 0.2866, "step": 17169 }, { "epoch": 0.7770988911518443, "grad_norm": 0.6573037277905804, "learning_rate": 1.2478165505469042e-06, "loss": 0.2807, "step": 17170 }, { "epoch": 0.7771441502602399, "grad_norm": 0.6040269001549651, "learning_rate": 1.2473321723283982e-06, "loss": 0.2917, "step": 17171 }, { "epoch": 0.7771894093686355, "grad_norm": 0.263686879267683, "learning_rate": 1.2468478747432e-06, "loss": 0.4632, "step": 17172 }, { "epoch": 0.777234668477031, "grad_norm": 0.6994408235588686, "learning_rate": 1.2463636578017142e-06, "loss": 0.3008, "step": 17173 }, { "epoch": 0.7772799275854265, "grad_norm": 0.6136295462490068, "learning_rate": 1.2458795215143431e-06, "loss": 0.2719, "step": 17174 }, { "epoch": 0.7773251866938221, "grad_norm": 0.6700928782014203, "learning_rate": 1.2453954658914913e-06, "loss": 0.33, "step": 17175 }, { "epoch": 0.7773704458022177, "grad_norm": 0.6453116410368338, "learning_rate": 1.2449114909435611e-06, "loss": 0.2892, "step": 17176 }, { "epoch": 0.7774157049106133, "grad_norm": 0.28101865286112065, "learning_rate": 1.24442759668095e-06, "loss": 0.4958, "step": 17177 }, { "epoch": 0.7774609640190088, "grad_norm": 0.25113335487181443, "learning_rate": 1.2439437831140538e-06, "loss": 0.4634, "step": 17178 }, { "epoch": 0.7775062231274044, "grad_norm": 0.662903364481204, "learning_rate": 1.2434600502532717e-06, "loss": 0.2786, "step": 17179 }, { "epoch": 0.7775514822358, "grad_norm": 0.7470827691874483, "learning_rate": 1.2429763981089938e-06, "loss": 0.2952, "step": 17180 }, { "epoch": 0.7775967413441955, "grad_norm": 0.3091443192580121, "learning_rate": 1.2424928266916164e-06, "loss": 0.4501, "step": 17181 }, { "epoch": 0.7776420004525911, "grad_norm": 0.5766299726257766, "learning_rate": 1.2420093360115276e-06, "loss": 0.2978, "step": 17182 }, { "epoch": 0.7776872595609866, "grad_norm": 0.2973232597197437, "learning_rate": 1.2415259260791147e-06, "loss": 0.4589, "step": 17183 }, { "epoch": 0.7777325186693822, "grad_norm": 0.6270181736822528, "learning_rate": 1.2410425969047667e-06, "loss": 0.2664, "step": 17184 }, { "epoch": 0.7777777777777778, "grad_norm": 0.26161742101697577, "learning_rate": 1.2405593484988697e-06, "loss": 0.4854, "step": 17185 }, { "epoch": 0.7778230368861734, "grad_norm": 0.2728915388621859, "learning_rate": 1.2400761808718065e-06, "loss": 0.4808, "step": 17186 }, { "epoch": 0.7778682959945689, "grad_norm": 0.6215441103367519, "learning_rate": 1.2395930940339562e-06, "loss": 0.3026, "step": 17187 }, { "epoch": 0.7779135551029644, "grad_norm": 0.5801009208199909, "learning_rate": 1.2391100879957018e-06, "loss": 0.2923, "step": 17188 }, { "epoch": 0.77795881421136, "grad_norm": 0.6763289072826297, "learning_rate": 1.2386271627674234e-06, "loss": 0.3494, "step": 17189 }, { "epoch": 0.7780040733197556, "grad_norm": 0.28140635961391025, "learning_rate": 1.2381443183594927e-06, "loss": 0.4727, "step": 17190 }, { "epoch": 0.7780493324281512, "grad_norm": 0.670661369162288, "learning_rate": 1.2376615547822867e-06, "loss": 0.2701, "step": 17191 }, { "epoch": 0.7780945915365467, "grad_norm": 0.8439057856638054, "learning_rate": 1.2371788720461802e-06, "loss": 0.3252, "step": 17192 }, { "epoch": 0.7781398506449423, "grad_norm": 1.1001434994464, "learning_rate": 1.2366962701615431e-06, "loss": 0.229, "step": 17193 }, { "epoch": 0.7781851097533379, "grad_norm": 0.5700448027566635, "learning_rate": 1.2362137491387433e-06, "loss": 0.2615, "step": 17194 }, { "epoch": 0.7782303688617335, "grad_norm": 0.6436858853915678, "learning_rate": 1.2357313089881524e-06, "loss": 0.2852, "step": 17195 }, { "epoch": 0.778275627970129, "grad_norm": 0.6936044601347898, "learning_rate": 1.235248949720133e-06, "loss": 0.3068, "step": 17196 }, { "epoch": 0.7783208870785245, "grad_norm": 0.6631834829042466, "learning_rate": 1.2347666713450524e-06, "loss": 0.3152, "step": 17197 }, { "epoch": 0.7783661461869201, "grad_norm": 0.6094239192280482, "learning_rate": 1.2342844738732724e-06, "loss": 0.3193, "step": 17198 }, { "epoch": 0.7784114052953157, "grad_norm": 0.6329941486039516, "learning_rate": 1.2338023573151514e-06, "loss": 0.3004, "step": 17199 }, { "epoch": 0.7784566644037112, "grad_norm": 0.6459641698773567, "learning_rate": 1.2333203216810514e-06, "loss": 0.2855, "step": 17200 }, { "epoch": 0.7785019235121068, "grad_norm": 0.615623943776746, "learning_rate": 1.2328383669813304e-06, "loss": 0.3281, "step": 17201 }, { "epoch": 0.7785471826205024, "grad_norm": 1.4465718273959651, "learning_rate": 1.2323564932263428e-06, "loss": 0.2979, "step": 17202 }, { "epoch": 0.778592441728898, "grad_norm": 0.6687764650944367, "learning_rate": 1.2318747004264414e-06, "loss": 0.2778, "step": 17203 }, { "epoch": 0.7786377008372936, "grad_norm": 0.6070489391281674, "learning_rate": 1.2313929885919796e-06, "loss": 0.2385, "step": 17204 }, { "epoch": 0.778682959945689, "grad_norm": 0.2771610215799396, "learning_rate": 1.2309113577333098e-06, "loss": 0.4846, "step": 17205 }, { "epoch": 0.7787282190540846, "grad_norm": 0.6130620834328264, "learning_rate": 1.230429807860779e-06, "loss": 0.2521, "step": 17206 }, { "epoch": 0.7787734781624802, "grad_norm": 0.6405205580826229, "learning_rate": 1.2299483389847328e-06, "loss": 0.3027, "step": 17207 }, { "epoch": 0.7788187372708758, "grad_norm": 0.5343316133104158, "learning_rate": 1.2294669511155193e-06, "loss": 0.2639, "step": 17208 }, { "epoch": 0.7788639963792713, "grad_norm": 0.6271906841197386, "learning_rate": 1.2289856442634796e-06, "loss": 0.2932, "step": 17209 }, { "epoch": 0.7789092554876669, "grad_norm": 0.6349351163411207, "learning_rate": 1.2285044184389578e-06, "loss": 0.291, "step": 17210 }, { "epoch": 0.7789545145960625, "grad_norm": 0.6329715545668408, "learning_rate": 1.2280232736522928e-06, "loss": 0.3204, "step": 17211 }, { "epoch": 0.778999773704458, "grad_norm": 0.2753888188223901, "learning_rate": 1.2275422099138213e-06, "loss": 0.4897, "step": 17212 }, { "epoch": 0.7790450328128535, "grad_norm": 0.6631362683205497, "learning_rate": 1.2270612272338816e-06, "loss": 0.345, "step": 17213 }, { "epoch": 0.7790902919212491, "grad_norm": 0.6205185522767497, "learning_rate": 1.2265803256228103e-06, "loss": 0.3039, "step": 17214 }, { "epoch": 0.7791355510296447, "grad_norm": 0.607641422679698, "learning_rate": 1.226099505090938e-06, "loss": 0.2761, "step": 17215 }, { "epoch": 0.7791808101380403, "grad_norm": 0.2766637285628353, "learning_rate": 1.2256187656485957e-06, "loss": 0.4918, "step": 17216 }, { "epoch": 0.7792260692464359, "grad_norm": 0.6734382404378367, "learning_rate": 1.2251381073061137e-06, "loss": 0.2883, "step": 17217 }, { "epoch": 0.7792713283548314, "grad_norm": 0.5843294729100404, "learning_rate": 1.2246575300738234e-06, "loss": 0.293, "step": 17218 }, { "epoch": 0.779316587463227, "grad_norm": 0.29133487415654197, "learning_rate": 1.2241770339620446e-06, "loss": 0.4569, "step": 17219 }, { "epoch": 0.7793618465716226, "grad_norm": 0.6515322371384752, "learning_rate": 1.2236966189811045e-06, "loss": 0.3347, "step": 17220 }, { "epoch": 0.7794071056800181, "grad_norm": 0.6285603746441605, "learning_rate": 1.2232162851413282e-06, "loss": 0.3264, "step": 17221 }, { "epoch": 0.7794523647884136, "grad_norm": 0.614769755240032, "learning_rate": 1.2227360324530335e-06, "loss": 0.3157, "step": 17222 }, { "epoch": 0.7794976238968092, "grad_norm": 0.5887134813600734, "learning_rate": 1.2222558609265394e-06, "loss": 0.3023, "step": 17223 }, { "epoch": 0.7795428830052048, "grad_norm": 0.6053064454054246, "learning_rate": 1.2217757705721662e-06, "loss": 0.2957, "step": 17224 }, { "epoch": 0.7795881421136004, "grad_norm": 0.6885769936709684, "learning_rate": 1.2212957614002263e-06, "loss": 0.2738, "step": 17225 }, { "epoch": 0.779633401221996, "grad_norm": 0.5805273307300288, "learning_rate": 1.2208158334210363e-06, "loss": 0.2744, "step": 17226 }, { "epoch": 0.7796786603303915, "grad_norm": 0.6466902830249935, "learning_rate": 1.2203359866449073e-06, "loss": 0.2786, "step": 17227 }, { "epoch": 0.779723919438787, "grad_norm": 0.645014514513907, "learning_rate": 1.2198562210821474e-06, "loss": 0.3269, "step": 17228 }, { "epoch": 0.7797691785471826, "grad_norm": 0.6807809877605372, "learning_rate": 1.2193765367430683e-06, "loss": 0.2917, "step": 17229 }, { "epoch": 0.7798144376555782, "grad_norm": 0.2872760545323733, "learning_rate": 1.2188969336379775e-06, "loss": 0.4527, "step": 17230 }, { "epoch": 0.7798596967639737, "grad_norm": 0.5980345291753337, "learning_rate": 1.2184174117771786e-06, "loss": 0.322, "step": 17231 }, { "epoch": 0.7799049558723693, "grad_norm": 0.6540469404760676, "learning_rate": 1.2179379711709738e-06, "loss": 0.2945, "step": 17232 }, { "epoch": 0.7799502149807649, "grad_norm": 0.6760927496791808, "learning_rate": 1.2174586118296665e-06, "loss": 0.3381, "step": 17233 }, { "epoch": 0.7799954740891605, "grad_norm": 0.9163450526486921, "learning_rate": 1.2169793337635577e-06, "loss": 0.2636, "step": 17234 }, { "epoch": 0.780040733197556, "grad_norm": 0.607433315490523, "learning_rate": 1.2165001369829442e-06, "loss": 0.2601, "step": 17235 }, { "epoch": 0.7800859923059515, "grad_norm": 0.6189073530057309, "learning_rate": 1.2160210214981217e-06, "loss": 0.295, "step": 17236 }, { "epoch": 0.7801312514143471, "grad_norm": 0.6835413411402755, "learning_rate": 1.215541987319387e-06, "loss": 0.329, "step": 17237 }, { "epoch": 0.7801765105227427, "grad_norm": 0.566895279509252, "learning_rate": 1.2150630344570301e-06, "loss": 0.2517, "step": 17238 }, { "epoch": 0.7802217696311383, "grad_norm": 0.6177703234752498, "learning_rate": 1.2145841629213462e-06, "loss": 0.3313, "step": 17239 }, { "epoch": 0.7802670287395338, "grad_norm": 0.6494082399900752, "learning_rate": 1.2141053727226222e-06, "loss": 0.2414, "step": 17240 }, { "epoch": 0.7803122878479294, "grad_norm": 0.6395650775518928, "learning_rate": 1.2136266638711452e-06, "loss": 0.2737, "step": 17241 }, { "epoch": 0.780357546956325, "grad_norm": 0.6178599277507637, "learning_rate": 1.2131480363772018e-06, "loss": 0.3063, "step": 17242 }, { "epoch": 0.7804028060647206, "grad_norm": 0.6273313869636293, "learning_rate": 1.2126694902510783e-06, "loss": 0.3056, "step": 17243 }, { "epoch": 0.780448065173116, "grad_norm": 1.1095679829939256, "learning_rate": 1.2121910255030556e-06, "loss": 0.2808, "step": 17244 }, { "epoch": 0.7804933242815116, "grad_norm": 0.5816461982748863, "learning_rate": 1.2117126421434127e-06, "loss": 0.2703, "step": 17245 }, { "epoch": 0.7805385833899072, "grad_norm": 0.6286741517081604, "learning_rate": 1.2112343401824306e-06, "loss": 0.3267, "step": 17246 }, { "epoch": 0.7805838424983028, "grad_norm": 0.6109071921166503, "learning_rate": 1.2107561196303874e-06, "loss": 0.3199, "step": 17247 }, { "epoch": 0.7806291016066983, "grad_norm": 0.680967226262785, "learning_rate": 1.2102779804975574e-06, "loss": 0.321, "step": 17248 }, { "epoch": 0.7806743607150939, "grad_norm": 0.6266455172496049, "learning_rate": 1.209799922794213e-06, "loss": 0.3042, "step": 17249 }, { "epoch": 0.7807196198234895, "grad_norm": 0.2809296741128752, "learning_rate": 1.2093219465306289e-06, "loss": 0.4413, "step": 17250 }, { "epoch": 0.7807648789318851, "grad_norm": 0.6475967871995113, "learning_rate": 1.2088440517170729e-06, "loss": 0.3074, "step": 17251 }, { "epoch": 0.7808101380402807, "grad_norm": 0.5713740728066894, "learning_rate": 1.2083662383638156e-06, "loss": 0.2676, "step": 17252 }, { "epoch": 0.7808553971486761, "grad_norm": 0.5782149452659897, "learning_rate": 1.207888506481123e-06, "loss": 0.28, "step": 17253 }, { "epoch": 0.7809006562570717, "grad_norm": 0.2789851147253102, "learning_rate": 1.2074108560792586e-06, "loss": 0.4498, "step": 17254 }, { "epoch": 0.7809459153654673, "grad_norm": 0.6078179929177168, "learning_rate": 1.2069332871684875e-06, "loss": 0.2967, "step": 17255 }, { "epoch": 0.7809911744738629, "grad_norm": 0.5688470930722148, "learning_rate": 1.2064557997590697e-06, "loss": 0.2738, "step": 17256 }, { "epoch": 0.7810364335822584, "grad_norm": 0.5808970913291032, "learning_rate": 1.2059783938612674e-06, "loss": 0.2825, "step": 17257 }, { "epoch": 0.781081692690654, "grad_norm": 0.60264982018552, "learning_rate": 1.2055010694853347e-06, "loss": 0.2928, "step": 17258 }, { "epoch": 0.7811269517990496, "grad_norm": 0.630800348885263, "learning_rate": 1.2050238266415325e-06, "loss": 0.3223, "step": 17259 }, { "epoch": 0.7811722109074452, "grad_norm": 0.6324553607490628, "learning_rate": 1.2045466653401122e-06, "loss": 0.2849, "step": 17260 }, { "epoch": 0.7812174700158407, "grad_norm": 0.6422699549315195, "learning_rate": 1.204069585591326e-06, "loss": 0.2833, "step": 17261 }, { "epoch": 0.7812627291242362, "grad_norm": 0.25655613677725086, "learning_rate": 1.203592587405426e-06, "loss": 0.437, "step": 17262 }, { "epoch": 0.7813079882326318, "grad_norm": 0.5916781393995728, "learning_rate": 1.2031156707926632e-06, "loss": 0.3006, "step": 17263 }, { "epoch": 0.7813532473410274, "grad_norm": 0.5927202272252555, "learning_rate": 1.2026388357632835e-06, "loss": 0.2649, "step": 17264 }, { "epoch": 0.781398506449423, "grad_norm": 0.6134259430364656, "learning_rate": 1.202162082327531e-06, "loss": 0.2859, "step": 17265 }, { "epoch": 0.7814437655578185, "grad_norm": 0.598847689996696, "learning_rate": 1.2016854104956522e-06, "loss": 0.289, "step": 17266 }, { "epoch": 0.7814890246662141, "grad_norm": 0.6069934691634431, "learning_rate": 1.201208820277887e-06, "loss": 0.2622, "step": 17267 }, { "epoch": 0.7815342837746097, "grad_norm": 0.6079465311713511, "learning_rate": 1.2007323116844789e-06, "loss": 0.2764, "step": 17268 }, { "epoch": 0.7815795428830052, "grad_norm": 0.5959024824792365, "learning_rate": 1.2002558847256652e-06, "loss": 0.2586, "step": 17269 }, { "epoch": 0.7816248019914007, "grad_norm": 0.6086991031251796, "learning_rate": 1.1997795394116802e-06, "loss": 0.3159, "step": 17270 }, { "epoch": 0.7816700610997963, "grad_norm": 0.6123399712260394, "learning_rate": 1.1993032757527618e-06, "loss": 0.3057, "step": 17271 }, { "epoch": 0.7817153202081919, "grad_norm": 0.29205414473219377, "learning_rate": 1.1988270937591446e-06, "loss": 0.4743, "step": 17272 }, { "epoch": 0.7817605793165875, "grad_norm": 0.5817875667486778, "learning_rate": 1.1983509934410586e-06, "loss": 0.2693, "step": 17273 }, { "epoch": 0.7818058384249831, "grad_norm": 0.6029411095590721, "learning_rate": 1.1978749748087325e-06, "loss": 0.2755, "step": 17274 }, { "epoch": 0.7818510975333786, "grad_norm": 0.25139198220891606, "learning_rate": 1.1973990378723954e-06, "loss": 0.4454, "step": 17275 }, { "epoch": 0.7818963566417741, "grad_norm": 1.0187806068919762, "learning_rate": 1.1969231826422762e-06, "loss": 0.2676, "step": 17276 }, { "epoch": 0.7819416157501697, "grad_norm": 0.5490177076170151, "learning_rate": 1.1964474091285976e-06, "loss": 0.2568, "step": 17277 }, { "epoch": 0.7819868748585653, "grad_norm": 0.2790758147332501, "learning_rate": 1.1959717173415807e-06, "loss": 0.4767, "step": 17278 }, { "epoch": 0.7820321339669608, "grad_norm": 0.27360207391679175, "learning_rate": 1.19549610729145e-06, "loss": 0.4783, "step": 17279 }, { "epoch": 0.7820773930753564, "grad_norm": 0.6240491325841333, "learning_rate": 1.1950205789884217e-06, "loss": 0.2921, "step": 17280 }, { "epoch": 0.782122652183752, "grad_norm": 0.5984853106488534, "learning_rate": 1.1945451324427166e-06, "loss": 0.2819, "step": 17281 }, { "epoch": 0.7821679112921476, "grad_norm": 0.5494188922806639, "learning_rate": 1.194069767664549e-06, "loss": 0.2744, "step": 17282 }, { "epoch": 0.7822131704005431, "grad_norm": 0.5946047717941394, "learning_rate": 1.1935944846641318e-06, "loss": 0.2958, "step": 17283 }, { "epoch": 0.7822584295089386, "grad_norm": 0.6009843995686562, "learning_rate": 1.1931192834516787e-06, "loss": 0.298, "step": 17284 }, { "epoch": 0.7823036886173342, "grad_norm": 0.5980249673659658, "learning_rate": 1.1926441640374015e-06, "loss": 0.2896, "step": 17285 }, { "epoch": 0.7823489477257298, "grad_norm": 1.0807125893259095, "learning_rate": 1.1921691264315078e-06, "loss": 0.2637, "step": 17286 }, { "epoch": 0.7823942068341254, "grad_norm": 0.6771671739260511, "learning_rate": 1.191694170644203e-06, "loss": 0.304, "step": 17287 }, { "epoch": 0.7824394659425209, "grad_norm": 0.6605418925307608, "learning_rate": 1.191219296685696e-06, "loss": 0.3074, "step": 17288 }, { "epoch": 0.7824847250509165, "grad_norm": 0.2795546897923235, "learning_rate": 1.1907445045661885e-06, "loss": 0.5121, "step": 17289 }, { "epoch": 0.7825299841593121, "grad_norm": 0.2621604055121253, "learning_rate": 1.1902697942958806e-06, "loss": 0.4615, "step": 17290 }, { "epoch": 0.7825752432677077, "grad_norm": 0.5772421413586936, "learning_rate": 1.189795165884975e-06, "loss": 0.2757, "step": 17291 }, { "epoch": 0.7826205023761031, "grad_norm": 0.7082060921982903, "learning_rate": 1.1893206193436696e-06, "loss": 0.299, "step": 17292 }, { "epoch": 0.7826657614844987, "grad_norm": 0.2718666477866807, "learning_rate": 1.188846154682161e-06, "loss": 0.467, "step": 17293 }, { "epoch": 0.7827110205928943, "grad_norm": 0.5893728826172965, "learning_rate": 1.1883717719106419e-06, "loss": 0.3106, "step": 17294 }, { "epoch": 0.7827562797012899, "grad_norm": 0.773709814014488, "learning_rate": 1.1878974710393082e-06, "loss": 0.2885, "step": 17295 }, { "epoch": 0.7828015388096855, "grad_norm": 0.6151307002494818, "learning_rate": 1.1874232520783486e-06, "loss": 0.2649, "step": 17296 }, { "epoch": 0.782846797918081, "grad_norm": 0.6510029108292334, "learning_rate": 1.1869491150379553e-06, "loss": 0.2729, "step": 17297 }, { "epoch": 0.7828920570264766, "grad_norm": 0.6532810746624844, "learning_rate": 1.1864750599283132e-06, "loss": 0.2947, "step": 17298 }, { "epoch": 0.7829373161348722, "grad_norm": 0.6156139091235419, "learning_rate": 1.1860010867596112e-06, "loss": 0.3081, "step": 17299 }, { "epoch": 0.7829825752432678, "grad_norm": 0.636874452229096, "learning_rate": 1.1855271955420306e-06, "loss": 0.3007, "step": 17300 }, { "epoch": 0.7830278343516632, "grad_norm": 0.6506900226776479, "learning_rate": 1.1850533862857567e-06, "loss": 0.2929, "step": 17301 }, { "epoch": 0.7830730934600588, "grad_norm": 0.645473079213239, "learning_rate": 1.1845796590009684e-06, "loss": 0.3279, "step": 17302 }, { "epoch": 0.7831183525684544, "grad_norm": 0.6372074891495952, "learning_rate": 1.1841060136978443e-06, "loss": 0.2731, "step": 17303 }, { "epoch": 0.78316361167685, "grad_norm": 0.560182761230861, "learning_rate": 1.183632450386562e-06, "loss": 0.2679, "step": 17304 }, { "epoch": 0.7832088707852455, "grad_norm": 0.6097224018444345, "learning_rate": 1.1831589690772988e-06, "loss": 0.2748, "step": 17305 }, { "epoch": 0.7832541298936411, "grad_norm": 0.2578450905562074, "learning_rate": 1.1826855697802264e-06, "loss": 0.4553, "step": 17306 }, { "epoch": 0.7832993890020367, "grad_norm": 0.6094006990832728, "learning_rate": 1.1822122525055163e-06, "loss": 0.3061, "step": 17307 }, { "epoch": 0.7833446481104323, "grad_norm": 0.6032761051944457, "learning_rate": 1.1817390172633402e-06, "loss": 0.2967, "step": 17308 }, { "epoch": 0.7833899072188278, "grad_norm": 0.6550332039805137, "learning_rate": 1.1812658640638653e-06, "loss": 0.3423, "step": 17309 }, { "epoch": 0.7834351663272233, "grad_norm": 0.6853703330648068, "learning_rate": 1.180792792917259e-06, "loss": 0.2907, "step": 17310 }, { "epoch": 0.7834804254356189, "grad_norm": 0.6279731879623388, "learning_rate": 1.1803198038336866e-06, "loss": 0.2822, "step": 17311 }, { "epoch": 0.7835256845440145, "grad_norm": 0.601357385134243, "learning_rate": 1.1798468968233084e-06, "loss": 0.3088, "step": 17312 }, { "epoch": 0.7835709436524101, "grad_norm": 0.27019008623071317, "learning_rate": 1.179374071896288e-06, "loss": 0.4758, "step": 17313 }, { "epoch": 0.7836162027608056, "grad_norm": 0.5989830670091586, "learning_rate": 1.178901329062786e-06, "loss": 0.265, "step": 17314 }, { "epoch": 0.7836614618692012, "grad_norm": 0.6033427541616095, "learning_rate": 1.1784286683329587e-06, "loss": 0.2784, "step": 17315 }, { "epoch": 0.7837067209775967, "grad_norm": 0.261811394535706, "learning_rate": 1.1779560897169611e-06, "loss": 0.4445, "step": 17316 }, { "epoch": 0.7837519800859923, "grad_norm": 0.5609501858635176, "learning_rate": 1.1774835932249485e-06, "loss": 0.2336, "step": 17317 }, { "epoch": 0.7837972391943878, "grad_norm": 0.6105280956973805, "learning_rate": 1.1770111788670763e-06, "loss": 0.2644, "step": 17318 }, { "epoch": 0.7838424983027834, "grad_norm": 0.6030722422612644, "learning_rate": 1.1765388466534895e-06, "loss": 0.2569, "step": 17319 }, { "epoch": 0.783887757411179, "grad_norm": 0.6215687069796155, "learning_rate": 1.1760665965943402e-06, "loss": 0.3065, "step": 17320 }, { "epoch": 0.7839330165195746, "grad_norm": 0.612128101161067, "learning_rate": 1.1755944286997766e-06, "loss": 0.2529, "step": 17321 }, { "epoch": 0.7839782756279702, "grad_norm": 0.5852209019344554, "learning_rate": 1.175122342979943e-06, "loss": 0.2851, "step": 17322 }, { "epoch": 0.7840235347363657, "grad_norm": 0.2576637423810731, "learning_rate": 1.174650339444982e-06, "loss": 0.4541, "step": 17323 }, { "epoch": 0.7840687938447612, "grad_norm": 0.5993989103121361, "learning_rate": 1.1741784181050376e-06, "loss": 0.2818, "step": 17324 }, { "epoch": 0.7841140529531568, "grad_norm": 0.6645192315616409, "learning_rate": 1.1737065789702473e-06, "loss": 0.2974, "step": 17325 }, { "epoch": 0.7841593120615524, "grad_norm": 0.6193089282975309, "learning_rate": 1.1732348220507529e-06, "loss": 0.2586, "step": 17326 }, { "epoch": 0.7842045711699479, "grad_norm": 0.2807318580083904, "learning_rate": 1.1727631473566875e-06, "loss": 0.4753, "step": 17327 }, { "epoch": 0.7842498302783435, "grad_norm": 0.28674929824535034, "learning_rate": 1.1722915548981896e-06, "loss": 0.4746, "step": 17328 }, { "epoch": 0.7842950893867391, "grad_norm": 0.23822296288756592, "learning_rate": 1.1718200446853877e-06, "loss": 0.4742, "step": 17329 }, { "epoch": 0.7843403484951347, "grad_norm": 0.6216948625679023, "learning_rate": 1.1713486167284183e-06, "loss": 0.2915, "step": 17330 }, { "epoch": 0.7843856076035302, "grad_norm": 0.2684113634588021, "learning_rate": 1.1708772710374078e-06, "loss": 0.4857, "step": 17331 }, { "epoch": 0.7844308667119257, "grad_norm": 0.2603748463730575, "learning_rate": 1.1704060076224827e-06, "loss": 0.4521, "step": 17332 }, { "epoch": 0.7844761258203213, "grad_norm": 0.6779421800191525, "learning_rate": 1.169934826493771e-06, "loss": 0.2767, "step": 17333 }, { "epoch": 0.7845213849287169, "grad_norm": 0.7134792482230873, "learning_rate": 1.1694637276613985e-06, "loss": 0.3272, "step": 17334 }, { "epoch": 0.7845666440371125, "grad_norm": 0.6114288019983596, "learning_rate": 1.168992711135486e-06, "loss": 0.2796, "step": 17335 }, { "epoch": 0.784611903145508, "grad_norm": 0.6899832877009546, "learning_rate": 1.1685217769261519e-06, "loss": 0.2972, "step": 17336 }, { "epoch": 0.7846571622539036, "grad_norm": 0.5679586106893408, "learning_rate": 1.1680509250435195e-06, "loss": 0.2816, "step": 17337 }, { "epoch": 0.7847024213622992, "grad_norm": 0.6080534926128125, "learning_rate": 1.1675801554977017e-06, "loss": 0.3232, "step": 17338 }, { "epoch": 0.7847476804706948, "grad_norm": 0.5998955037981734, "learning_rate": 1.1671094682988182e-06, "loss": 0.2831, "step": 17339 }, { "epoch": 0.7847929395790902, "grad_norm": 0.5728524362999736, "learning_rate": 1.1666388634569798e-06, "loss": 0.2993, "step": 17340 }, { "epoch": 0.7848381986874858, "grad_norm": 0.672290396497668, "learning_rate": 1.1661683409822976e-06, "loss": 0.3322, "step": 17341 }, { "epoch": 0.7848834577958814, "grad_norm": 0.650360992582781, "learning_rate": 1.1656979008848834e-06, "loss": 0.2962, "step": 17342 }, { "epoch": 0.784928716904277, "grad_norm": 0.5724326349339708, "learning_rate": 1.1652275431748462e-06, "loss": 0.2957, "step": 17343 }, { "epoch": 0.7849739760126726, "grad_norm": 1.5135642866782946, "learning_rate": 1.164757267862292e-06, "loss": 0.2927, "step": 17344 }, { "epoch": 0.7850192351210681, "grad_norm": 0.6320185461540719, "learning_rate": 1.1642870749573231e-06, "loss": 0.26, "step": 17345 }, { "epoch": 0.7850644942294637, "grad_norm": 0.6058476301879185, "learning_rate": 1.1638169644700447e-06, "loss": 0.3077, "step": 17346 }, { "epoch": 0.7851097533378593, "grad_norm": 0.3120894027493385, "learning_rate": 1.1633469364105604e-06, "loss": 0.4534, "step": 17347 }, { "epoch": 0.7851550124462549, "grad_norm": 0.5984657291673785, "learning_rate": 1.1628769907889643e-06, "loss": 0.3339, "step": 17348 }, { "epoch": 0.7852002715546503, "grad_norm": 0.6255476437590982, "learning_rate": 1.162407127615357e-06, "loss": 0.2648, "step": 17349 }, { "epoch": 0.7852455306630459, "grad_norm": 0.6902499963136173, "learning_rate": 1.1619373468998357e-06, "loss": 0.3146, "step": 17350 }, { "epoch": 0.7852907897714415, "grad_norm": 0.2817907502818888, "learning_rate": 1.1614676486524927e-06, "loss": 0.4615, "step": 17351 }, { "epoch": 0.7853360488798371, "grad_norm": 0.6043644113354228, "learning_rate": 1.1609980328834196e-06, "loss": 0.2971, "step": 17352 }, { "epoch": 0.7853813079882326, "grad_norm": 0.5668065899841754, "learning_rate": 1.16052849960271e-06, "loss": 0.2657, "step": 17353 }, { "epoch": 0.7854265670966282, "grad_norm": 0.26723005660984916, "learning_rate": 1.1600590488204495e-06, "loss": 0.4568, "step": 17354 }, { "epoch": 0.7854718262050238, "grad_norm": 0.5662361819226162, "learning_rate": 1.159589680546727e-06, "loss": 0.2744, "step": 17355 }, { "epoch": 0.7855170853134193, "grad_norm": 0.26301807410907907, "learning_rate": 1.159120394791627e-06, "loss": 0.4581, "step": 17356 }, { "epoch": 0.7855623444218149, "grad_norm": 0.26376801379454134, "learning_rate": 1.1586511915652343e-06, "loss": 0.4793, "step": 17357 }, { "epoch": 0.7856076035302104, "grad_norm": 0.6552962676442267, "learning_rate": 1.1581820708776282e-06, "loss": 0.3042, "step": 17358 }, { "epoch": 0.785652862638606, "grad_norm": 0.749149067682797, "learning_rate": 1.1577130327388918e-06, "loss": 0.2622, "step": 17359 }, { "epoch": 0.7856981217470016, "grad_norm": 0.5841610216480839, "learning_rate": 1.1572440771591014e-06, "loss": 0.3008, "step": 17360 }, { "epoch": 0.7857433808553972, "grad_norm": 0.6299373407052793, "learning_rate": 1.1567752041483328e-06, "loss": 0.3048, "step": 17361 }, { "epoch": 0.7857886399637927, "grad_norm": 0.7158363775054724, "learning_rate": 1.1563064137166607e-06, "loss": 0.2657, "step": 17362 }, { "epoch": 0.7858338990721883, "grad_norm": 0.27321909472327843, "learning_rate": 1.1558377058741605e-06, "loss": 0.4914, "step": 17363 }, { "epoch": 0.7858791581805838, "grad_norm": 0.27974658066057784, "learning_rate": 1.1553690806309015e-06, "loss": 0.4868, "step": 17364 }, { "epoch": 0.7859244172889794, "grad_norm": 0.26357198631483486, "learning_rate": 1.154900537996952e-06, "loss": 0.4575, "step": 17365 }, { "epoch": 0.7859696763973749, "grad_norm": 0.8363162554904776, "learning_rate": 1.154432077982382e-06, "loss": 0.2967, "step": 17366 }, { "epoch": 0.7860149355057705, "grad_norm": 0.5646840841086878, "learning_rate": 1.1539637005972543e-06, "loss": 0.2601, "step": 17367 }, { "epoch": 0.7860601946141661, "grad_norm": 0.5884686071005956, "learning_rate": 1.1534954058516357e-06, "loss": 0.2893, "step": 17368 }, { "epoch": 0.7861054537225617, "grad_norm": 0.6341286607495344, "learning_rate": 1.1530271937555859e-06, "loss": 0.3212, "step": 17369 }, { "epoch": 0.7861507128309573, "grad_norm": 0.653173806007168, "learning_rate": 1.152559064319168e-06, "loss": 0.2998, "step": 17370 }, { "epoch": 0.7861959719393528, "grad_norm": 0.28527858241156007, "learning_rate": 1.152091017552438e-06, "loss": 0.4712, "step": 17371 }, { "epoch": 0.7862412310477483, "grad_norm": 0.7963908930873177, "learning_rate": 1.1516230534654554e-06, "loss": 0.3352, "step": 17372 }, { "epoch": 0.7862864901561439, "grad_norm": 0.6102186650838729, "learning_rate": 1.151155172068274e-06, "loss": 0.2846, "step": 17373 }, { "epoch": 0.7863317492645395, "grad_norm": 0.27518292768602326, "learning_rate": 1.1506873733709457e-06, "loss": 0.4701, "step": 17374 }, { "epoch": 0.786377008372935, "grad_norm": 0.6106521447355924, "learning_rate": 1.1502196573835239e-06, "loss": 0.2709, "step": 17375 }, { "epoch": 0.7864222674813306, "grad_norm": 0.6243633846315203, "learning_rate": 1.1497520241160603e-06, "loss": 0.2618, "step": 17376 }, { "epoch": 0.7864675265897262, "grad_norm": 0.9340048826982438, "learning_rate": 1.1492844735785979e-06, "loss": 0.3185, "step": 17377 }, { "epoch": 0.7865127856981218, "grad_norm": 0.6010856148363889, "learning_rate": 1.1488170057811853e-06, "loss": 0.3282, "step": 17378 }, { "epoch": 0.7865580448065174, "grad_norm": 0.650174914655158, "learning_rate": 1.148349620733869e-06, "loss": 0.2786, "step": 17379 }, { "epoch": 0.7866033039149128, "grad_norm": 0.6267531884417445, "learning_rate": 1.1478823184466897e-06, "loss": 0.2876, "step": 17380 }, { "epoch": 0.7866485630233084, "grad_norm": 0.651212363619707, "learning_rate": 1.1474150989296872e-06, "loss": 0.2996, "step": 17381 }, { "epoch": 0.786693822131704, "grad_norm": 0.3385550548730938, "learning_rate": 1.1469479621929036e-06, "loss": 0.4627, "step": 17382 }, { "epoch": 0.7867390812400996, "grad_norm": 1.1355741833386386, "learning_rate": 1.146480908246373e-06, "loss": 0.293, "step": 17383 }, { "epoch": 0.7867843403484951, "grad_norm": 0.5953511777939547, "learning_rate": 1.1460139371001339e-06, "loss": 0.3296, "step": 17384 }, { "epoch": 0.7868295994568907, "grad_norm": 0.28265491438938334, "learning_rate": 1.1455470487642167e-06, "loss": 0.4861, "step": 17385 }, { "epoch": 0.7868748585652863, "grad_norm": 0.6026682293599088, "learning_rate": 1.1450802432486574e-06, "loss": 0.2921, "step": 17386 }, { "epoch": 0.7869201176736819, "grad_norm": 0.6458133985405689, "learning_rate": 1.1446135205634829e-06, "loss": 0.2722, "step": 17387 }, { "epoch": 0.7869653767820773, "grad_norm": 0.6222737944884161, "learning_rate": 1.144146880718724e-06, "loss": 0.2813, "step": 17388 }, { "epoch": 0.7870106358904729, "grad_norm": 0.6055298101392338, "learning_rate": 1.1436803237244065e-06, "loss": 0.3041, "step": 17389 }, { "epoch": 0.7870558949988685, "grad_norm": 0.6225200984515917, "learning_rate": 1.1432138495905531e-06, "loss": 0.3363, "step": 17390 }, { "epoch": 0.7871011541072641, "grad_norm": 0.630228041297407, "learning_rate": 1.1427474583271896e-06, "loss": 0.2508, "step": 17391 }, { "epoch": 0.7871464132156597, "grad_norm": 0.8153673652545924, "learning_rate": 1.1422811499443375e-06, "loss": 0.32, "step": 17392 }, { "epoch": 0.7871916723240552, "grad_norm": 0.6295439380761974, "learning_rate": 1.1418149244520155e-06, "loss": 0.3029, "step": 17393 }, { "epoch": 0.7872369314324508, "grad_norm": 0.6664844860170986, "learning_rate": 1.1413487818602397e-06, "loss": 0.3676, "step": 17394 }, { "epoch": 0.7872821905408464, "grad_norm": 0.6075032187161125, "learning_rate": 1.1408827221790297e-06, "loss": 0.2806, "step": 17395 }, { "epoch": 0.787327449649242, "grad_norm": 0.6965414154938917, "learning_rate": 1.1404167454183957e-06, "loss": 0.3159, "step": 17396 }, { "epoch": 0.7873727087576374, "grad_norm": 0.6003957067092349, "learning_rate": 1.1399508515883533e-06, "loss": 0.272, "step": 17397 }, { "epoch": 0.787417967866033, "grad_norm": 0.28195165676181216, "learning_rate": 1.1394850406989106e-06, "loss": 0.4759, "step": 17398 }, { "epoch": 0.7874632269744286, "grad_norm": 1.5650898904360315, "learning_rate": 1.139019312760079e-06, "loss": 0.2927, "step": 17399 }, { "epoch": 0.7875084860828242, "grad_norm": 0.34002607274237956, "learning_rate": 1.1385536677818632e-06, "loss": 0.429, "step": 17400 }, { "epoch": 0.7875537451912197, "grad_norm": 0.6491468227013686, "learning_rate": 1.138088105774271e-06, "loss": 0.2949, "step": 17401 }, { "epoch": 0.7875990042996153, "grad_norm": 0.6195093709848404, "learning_rate": 1.137622626747304e-06, "loss": 0.2673, "step": 17402 }, { "epoch": 0.7876442634080109, "grad_norm": 0.5960717594699569, "learning_rate": 1.1371572307109634e-06, "loss": 0.3477, "step": 17403 }, { "epoch": 0.7876895225164064, "grad_norm": 0.5759243199990242, "learning_rate": 1.13669191767525e-06, "loss": 0.2689, "step": 17404 }, { "epoch": 0.787734781624802, "grad_norm": 0.6149926025008491, "learning_rate": 1.1362266876501649e-06, "loss": 0.2394, "step": 17405 }, { "epoch": 0.7877800407331975, "grad_norm": 0.262255617987434, "learning_rate": 1.1357615406456985e-06, "loss": 0.4622, "step": 17406 }, { "epoch": 0.7878252998415931, "grad_norm": 0.6810866592082769, "learning_rate": 1.1352964766718488e-06, "loss": 0.3056, "step": 17407 }, { "epoch": 0.7878705589499887, "grad_norm": 0.8782494286183904, "learning_rate": 1.1348314957386093e-06, "loss": 0.2777, "step": 17408 }, { "epoch": 0.7879158180583843, "grad_norm": 0.5896235487727614, "learning_rate": 1.1343665978559704e-06, "loss": 0.2746, "step": 17409 }, { "epoch": 0.7879610771667798, "grad_norm": 0.6212552351887514, "learning_rate": 1.1339017830339195e-06, "loss": 0.2693, "step": 17410 }, { "epoch": 0.7880063362751754, "grad_norm": 0.6423392372429871, "learning_rate": 1.1334370512824466e-06, "loss": 0.2959, "step": 17411 }, { "epoch": 0.788051595383571, "grad_norm": 0.3022873126019532, "learning_rate": 1.1329724026115345e-06, "loss": 0.4557, "step": 17412 }, { "epoch": 0.7880968544919665, "grad_norm": 0.27724362715051737, "learning_rate": 1.132507837031171e-06, "loss": 0.4582, "step": 17413 }, { "epoch": 0.7881421136003621, "grad_norm": 0.5863026360362641, "learning_rate": 1.1320433545513342e-06, "loss": 0.3247, "step": 17414 }, { "epoch": 0.7881873727087576, "grad_norm": 0.6152221198174394, "learning_rate": 1.1315789551820078e-06, "loss": 0.2866, "step": 17415 }, { "epoch": 0.7882326318171532, "grad_norm": 0.6148723884806709, "learning_rate": 1.1311146389331667e-06, "loss": 0.3318, "step": 17416 }, { "epoch": 0.7882778909255488, "grad_norm": 0.5900675655471992, "learning_rate": 1.1306504058147915e-06, "loss": 0.3178, "step": 17417 }, { "epoch": 0.7883231500339444, "grad_norm": 0.6071007835312443, "learning_rate": 1.1301862558368554e-06, "loss": 0.3106, "step": 17418 }, { "epoch": 0.7883684091423399, "grad_norm": 0.24889869987412316, "learning_rate": 1.1297221890093302e-06, "loss": 0.463, "step": 17419 }, { "epoch": 0.7884136682507354, "grad_norm": 0.6513774161982812, "learning_rate": 1.129258205342188e-06, "loss": 0.319, "step": 17420 }, { "epoch": 0.788458927359131, "grad_norm": 0.6863154751764883, "learning_rate": 1.1287943048454003e-06, "loss": 0.314, "step": 17421 }, { "epoch": 0.7885041864675266, "grad_norm": 0.6930197690759812, "learning_rate": 1.1283304875289335e-06, "loss": 0.2905, "step": 17422 }, { "epoch": 0.7885494455759221, "grad_norm": 0.27159987742847796, "learning_rate": 1.1278667534027525e-06, "loss": 0.4794, "step": 17423 }, { "epoch": 0.7885947046843177, "grad_norm": 0.544502579307501, "learning_rate": 1.1274031024768239e-06, "loss": 0.3424, "step": 17424 }, { "epoch": 0.7886399637927133, "grad_norm": 0.6749145704048627, "learning_rate": 1.1269395347611074e-06, "loss": 0.3051, "step": 17425 }, { "epoch": 0.7886852229011089, "grad_norm": 0.6598064622660003, "learning_rate": 1.126476050265567e-06, "loss": 0.3006, "step": 17426 }, { "epoch": 0.7887304820095045, "grad_norm": 0.6273926410342063, "learning_rate": 1.1260126490001577e-06, "loss": 0.2898, "step": 17427 }, { "epoch": 0.7887757411178999, "grad_norm": 0.26148859512293166, "learning_rate": 1.12554933097484e-06, "loss": 0.4581, "step": 17428 }, { "epoch": 0.7888210002262955, "grad_norm": 0.6653921917108375, "learning_rate": 1.1250860961995663e-06, "loss": 0.2734, "step": 17429 }, { "epoch": 0.7888662593346911, "grad_norm": 0.5786418872287515, "learning_rate": 1.1246229446842927e-06, "loss": 0.2815, "step": 17430 }, { "epoch": 0.7889115184430867, "grad_norm": 0.282184982960806, "learning_rate": 1.1241598764389699e-06, "loss": 0.4784, "step": 17431 }, { "epoch": 0.7889567775514822, "grad_norm": 0.27743184880991817, "learning_rate": 1.1236968914735462e-06, "loss": 0.4798, "step": 17432 }, { "epoch": 0.7890020366598778, "grad_norm": 0.6517786801029701, "learning_rate": 1.1232339897979716e-06, "loss": 0.2717, "step": 17433 }, { "epoch": 0.7890472957682734, "grad_norm": 0.5885837605798061, "learning_rate": 1.1227711714221928e-06, "loss": 0.2987, "step": 17434 }, { "epoch": 0.789092554876669, "grad_norm": 0.2615744958143664, "learning_rate": 1.1223084363561538e-06, "loss": 0.4636, "step": 17435 }, { "epoch": 0.7891378139850644, "grad_norm": 0.2574679366280398, "learning_rate": 1.1218457846097958e-06, "loss": 0.4644, "step": 17436 }, { "epoch": 0.78918307309346, "grad_norm": 0.2889475935321559, "learning_rate": 1.1213832161930622e-06, "loss": 0.4656, "step": 17437 }, { "epoch": 0.7892283322018556, "grad_norm": 0.6560240655089297, "learning_rate": 1.120920731115891e-06, "loss": 0.3093, "step": 17438 }, { "epoch": 0.7892735913102512, "grad_norm": 0.6313307722903918, "learning_rate": 1.1204583293882181e-06, "loss": 0.3272, "step": 17439 }, { "epoch": 0.7893188504186468, "grad_norm": 0.6278866803927241, "learning_rate": 1.119996011019981e-06, "loss": 0.3384, "step": 17440 }, { "epoch": 0.7893641095270423, "grad_norm": 0.6354112550807469, "learning_rate": 1.119533776021114e-06, "loss": 0.3399, "step": 17441 }, { "epoch": 0.7894093686354379, "grad_norm": 0.6007230130425893, "learning_rate": 1.1190716244015487e-06, "loss": 0.2715, "step": 17442 }, { "epoch": 0.7894546277438335, "grad_norm": 0.2898353552609453, "learning_rate": 1.118609556171213e-06, "loss": 0.4671, "step": 17443 }, { "epoch": 0.789499886852229, "grad_norm": 0.6571880434843874, "learning_rate": 1.118147571340039e-06, "loss": 0.3023, "step": 17444 }, { "epoch": 0.7895451459606245, "grad_norm": 0.37409319570832594, "learning_rate": 1.11768566991795e-06, "loss": 0.4866, "step": 17445 }, { "epoch": 0.7895904050690201, "grad_norm": 0.621756727887467, "learning_rate": 1.1172238519148732e-06, "loss": 0.3239, "step": 17446 }, { "epoch": 0.7896356641774157, "grad_norm": 0.273835122139672, "learning_rate": 1.1167621173407312e-06, "loss": 0.4652, "step": 17447 }, { "epoch": 0.7896809232858113, "grad_norm": 0.5843208882446077, "learning_rate": 1.1163004662054434e-06, "loss": 0.3353, "step": 17448 }, { "epoch": 0.7897261823942069, "grad_norm": 0.5808348307996056, "learning_rate": 1.1158388985189312e-06, "loss": 0.2871, "step": 17449 }, { "epoch": 0.7897714415026024, "grad_norm": 0.5875845384179548, "learning_rate": 1.1153774142911123e-06, "loss": 0.3018, "step": 17450 }, { "epoch": 0.789816700610998, "grad_norm": 0.594981041595839, "learning_rate": 1.1149160135319027e-06, "loss": 0.2962, "step": 17451 }, { "epoch": 0.7898619597193935, "grad_norm": 0.6065672910891571, "learning_rate": 1.1144546962512144e-06, "loss": 0.2997, "step": 17452 }, { "epoch": 0.7899072188277891, "grad_norm": 0.6603432041018994, "learning_rate": 1.113993462458962e-06, "loss": 0.2902, "step": 17453 }, { "epoch": 0.7899524779361846, "grad_norm": 0.30835422814081065, "learning_rate": 1.1135323121650542e-06, "loss": 0.4582, "step": 17454 }, { "epoch": 0.7899977370445802, "grad_norm": 0.2869527199386691, "learning_rate": 1.113071245379402e-06, "loss": 0.4902, "step": 17455 }, { "epoch": 0.7900429961529758, "grad_norm": 0.6115552347964449, "learning_rate": 1.1126102621119095e-06, "loss": 0.2943, "step": 17456 }, { "epoch": 0.7900882552613714, "grad_norm": 0.6205860996932847, "learning_rate": 1.1121493623724845e-06, "loss": 0.287, "step": 17457 }, { "epoch": 0.7901335143697669, "grad_norm": 0.6428203422179108, "learning_rate": 1.111688546171028e-06, "loss": 0.3194, "step": 17458 }, { "epoch": 0.7901787734781625, "grad_norm": 0.6574805982745042, "learning_rate": 1.1112278135174438e-06, "loss": 0.278, "step": 17459 }, { "epoch": 0.790224032586558, "grad_norm": 0.6510734470303903, "learning_rate": 1.1107671644216305e-06, "loss": 0.2562, "step": 17460 }, { "epoch": 0.7902692916949536, "grad_norm": 0.2731843402504672, "learning_rate": 1.1103065988934842e-06, "loss": 0.4615, "step": 17461 }, { "epoch": 0.7903145508033492, "grad_norm": 0.60979356487696, "learning_rate": 1.109846116942903e-06, "loss": 0.3162, "step": 17462 }, { "epoch": 0.7903598099117447, "grad_norm": 0.26468973035547944, "learning_rate": 1.109385718579783e-06, "loss": 0.4731, "step": 17463 }, { "epoch": 0.7904050690201403, "grad_norm": 0.7844896219515496, "learning_rate": 1.1089254038140141e-06, "loss": 0.3443, "step": 17464 }, { "epoch": 0.7904503281285359, "grad_norm": 0.6988599850950089, "learning_rate": 1.1084651726554868e-06, "loss": 0.3096, "step": 17465 }, { "epoch": 0.7904955872369315, "grad_norm": 0.25101069040293145, "learning_rate": 1.1080050251140923e-06, "loss": 0.4939, "step": 17466 }, { "epoch": 0.790540846345327, "grad_norm": 0.6044654536372189, "learning_rate": 1.1075449611997153e-06, "loss": 0.3558, "step": 17467 }, { "epoch": 0.7905861054537225, "grad_norm": 0.6321237819686049, "learning_rate": 1.1070849809222428e-06, "loss": 0.3099, "step": 17468 }, { "epoch": 0.7906313645621181, "grad_norm": 0.6163219336638721, "learning_rate": 1.106625084291557e-06, "loss": 0.2565, "step": 17469 }, { "epoch": 0.7906766236705137, "grad_norm": 0.6160762324173575, "learning_rate": 1.1061652713175425e-06, "loss": 0.2767, "step": 17470 }, { "epoch": 0.7907218827789092, "grad_norm": 0.572659054651402, "learning_rate": 1.1057055420100755e-06, "loss": 0.2694, "step": 17471 }, { "epoch": 0.7907671418873048, "grad_norm": 0.6128638406139482, "learning_rate": 1.1052458963790374e-06, "loss": 0.3049, "step": 17472 }, { "epoch": 0.7908124009957004, "grad_norm": 0.6937107123304866, "learning_rate": 1.104786334434303e-06, "loss": 0.3289, "step": 17473 }, { "epoch": 0.790857660104096, "grad_norm": 0.5975880810758326, "learning_rate": 1.1043268561857456e-06, "loss": 0.3183, "step": 17474 }, { "epoch": 0.7909029192124916, "grad_norm": 0.5915920011910971, "learning_rate": 1.103867461643241e-06, "loss": 0.2738, "step": 17475 }, { "epoch": 0.790948178320887, "grad_norm": 0.6566936994613086, "learning_rate": 1.1034081508166588e-06, "loss": 0.2447, "step": 17476 }, { "epoch": 0.7909934374292826, "grad_norm": 0.6188551895462027, "learning_rate": 1.1029489237158663e-06, "loss": 0.3233, "step": 17477 }, { "epoch": 0.7910386965376782, "grad_norm": 0.6344862698039115, "learning_rate": 1.1024897803507322e-06, "loss": 0.2977, "step": 17478 }, { "epoch": 0.7910839556460738, "grad_norm": 0.6057029705000123, "learning_rate": 1.1020307207311244e-06, "loss": 0.3067, "step": 17479 }, { "epoch": 0.7911292147544693, "grad_norm": 0.6705464585220849, "learning_rate": 1.1015717448669045e-06, "loss": 0.2907, "step": 17480 }, { "epoch": 0.7911744738628649, "grad_norm": 0.26897717434855983, "learning_rate": 1.1011128527679332e-06, "loss": 0.478, "step": 17481 }, { "epoch": 0.7912197329712605, "grad_norm": 0.5829298651200642, "learning_rate": 1.1006540444440738e-06, "loss": 0.2897, "step": 17482 }, { "epoch": 0.7912649920796561, "grad_norm": 0.27077008808618885, "learning_rate": 1.100195319905182e-06, "loss": 0.4589, "step": 17483 }, { "epoch": 0.7913102511880516, "grad_norm": 0.6846489277337233, "learning_rate": 1.0997366791611165e-06, "loss": 0.3433, "step": 17484 }, { "epoch": 0.7913555102964471, "grad_norm": 0.6362951825644277, "learning_rate": 1.0992781222217291e-06, "loss": 0.3462, "step": 17485 }, { "epoch": 0.7914007694048427, "grad_norm": 0.5888260862616873, "learning_rate": 1.0988196490968766e-06, "loss": 0.2928, "step": 17486 }, { "epoch": 0.7914460285132383, "grad_norm": 0.6074798482109431, "learning_rate": 1.0983612597964065e-06, "loss": 0.2874, "step": 17487 }, { "epoch": 0.7914912876216339, "grad_norm": 0.6282389175937101, "learning_rate": 1.0979029543301718e-06, "loss": 0.2783, "step": 17488 }, { "epoch": 0.7915365467300294, "grad_norm": 0.5872783503668951, "learning_rate": 1.0974447327080185e-06, "loss": 0.319, "step": 17489 }, { "epoch": 0.791581805838425, "grad_norm": 0.27033500104217156, "learning_rate": 1.0969865949397902e-06, "loss": 0.4732, "step": 17490 }, { "epoch": 0.7916270649468206, "grad_norm": 0.5968647069455049, "learning_rate": 1.0965285410353326e-06, "loss": 0.2963, "step": 17491 }, { "epoch": 0.7916723240552161, "grad_norm": 0.5431438516873746, "learning_rate": 1.09607057100449e-06, "loss": 0.2741, "step": 17492 }, { "epoch": 0.7917175831636116, "grad_norm": 0.5890407122521515, "learning_rate": 1.0956126848571004e-06, "loss": 0.3069, "step": 17493 }, { "epoch": 0.7917628422720072, "grad_norm": 0.6542280291407335, "learning_rate": 1.0951548826030018e-06, "loss": 0.311, "step": 17494 }, { "epoch": 0.7918081013804028, "grad_norm": 0.5326352594264989, "learning_rate": 1.0946971642520327e-06, "loss": 0.2977, "step": 17495 }, { "epoch": 0.7918533604887984, "grad_norm": 0.6390072525495386, "learning_rate": 1.0942395298140262e-06, "loss": 0.367, "step": 17496 }, { "epoch": 0.791898619597194, "grad_norm": 0.25659664659858095, "learning_rate": 1.0937819792988186e-06, "loss": 0.4745, "step": 17497 }, { "epoch": 0.7919438787055895, "grad_norm": 0.6051771049858189, "learning_rate": 1.0933245127162373e-06, "loss": 0.2549, "step": 17498 }, { "epoch": 0.7919891378139851, "grad_norm": 0.6346089264271125, "learning_rate": 1.0928671300761152e-06, "loss": 0.2758, "step": 17499 }, { "epoch": 0.7920343969223806, "grad_norm": 0.7071202798055578, "learning_rate": 1.092409831388277e-06, "loss": 0.3607, "step": 17500 }, { "epoch": 0.7920796560307762, "grad_norm": 0.6110020712418186, "learning_rate": 1.091952616662552e-06, "loss": 0.2941, "step": 17501 }, { "epoch": 0.7921249151391717, "grad_norm": 0.8158655254030276, "learning_rate": 1.0914954859087629e-06, "loss": 0.2926, "step": 17502 }, { "epoch": 0.7921701742475673, "grad_norm": 0.6683921375881807, "learning_rate": 1.0910384391367296e-06, "loss": 0.2774, "step": 17503 }, { "epoch": 0.7922154333559629, "grad_norm": 0.5978056609012776, "learning_rate": 1.0905814763562755e-06, "loss": 0.3091, "step": 17504 }, { "epoch": 0.7922606924643585, "grad_norm": 0.6267850613795837, "learning_rate": 1.0901245975772207e-06, "loss": 0.2897, "step": 17505 }, { "epoch": 0.792305951572754, "grad_norm": 0.25613541359831193, "learning_rate": 1.0896678028093777e-06, "loss": 0.4773, "step": 17506 }, { "epoch": 0.7923512106811496, "grad_norm": 0.28159218026235666, "learning_rate": 1.0892110920625643e-06, "loss": 0.4721, "step": 17507 }, { "epoch": 0.7923964697895451, "grad_norm": 0.681314188020368, "learning_rate": 1.0887544653465942e-06, "loss": 0.3366, "step": 17508 }, { "epoch": 0.7924417288979407, "grad_norm": 0.7454404200334943, "learning_rate": 1.0882979226712782e-06, "loss": 0.2951, "step": 17509 }, { "epoch": 0.7924869880063363, "grad_norm": 0.2699858245041074, "learning_rate": 1.0878414640464247e-06, "loss": 0.4393, "step": 17510 }, { "epoch": 0.7925322471147318, "grad_norm": 0.27894602885659225, "learning_rate": 1.0873850894818433e-06, "loss": 0.455, "step": 17511 }, { "epoch": 0.7925775062231274, "grad_norm": 0.29535618246704853, "learning_rate": 1.0869287989873406e-06, "loss": 0.4873, "step": 17512 }, { "epoch": 0.792622765331523, "grad_norm": 0.27670213394183, "learning_rate": 1.0864725925727198e-06, "loss": 0.4604, "step": 17513 }, { "epoch": 0.7926680244399186, "grad_norm": 1.046633322989338, "learning_rate": 1.0860164702477826e-06, "loss": 0.2957, "step": 17514 }, { "epoch": 0.792713283548314, "grad_norm": 0.6662828314797252, "learning_rate": 1.0855604320223317e-06, "loss": 0.3571, "step": 17515 }, { "epoch": 0.7927585426567096, "grad_norm": 0.6028625071373115, "learning_rate": 1.085104477906163e-06, "loss": 0.2822, "step": 17516 }, { "epoch": 0.7928038017651052, "grad_norm": 0.7617858158323627, "learning_rate": 1.0846486079090773e-06, "loss": 0.3176, "step": 17517 }, { "epoch": 0.7928490608735008, "grad_norm": 0.6198032078988815, "learning_rate": 1.0841928220408682e-06, "loss": 0.2881, "step": 17518 }, { "epoch": 0.7928943199818964, "grad_norm": 0.595029698665207, "learning_rate": 1.0837371203113266e-06, "loss": 0.2502, "step": 17519 }, { "epoch": 0.7929395790902919, "grad_norm": 0.6069144135300447, "learning_rate": 1.0832815027302473e-06, "loss": 0.2761, "step": 17520 }, { "epoch": 0.7929848381986875, "grad_norm": 0.626882704408546, "learning_rate": 1.08282596930742e-06, "loss": 0.3101, "step": 17521 }, { "epoch": 0.7930300973070831, "grad_norm": 0.5670749951186008, "learning_rate": 1.0823705200526325e-06, "loss": 0.2865, "step": 17522 }, { "epoch": 0.7930753564154787, "grad_norm": 0.5826516551992915, "learning_rate": 1.0819151549756685e-06, "loss": 0.3193, "step": 17523 }, { "epoch": 0.7931206155238741, "grad_norm": 0.27305265116969735, "learning_rate": 1.081459874086316e-06, "loss": 0.4747, "step": 17524 }, { "epoch": 0.7931658746322697, "grad_norm": 0.7086693801004199, "learning_rate": 1.0810046773943544e-06, "loss": 0.3022, "step": 17525 }, { "epoch": 0.7932111337406653, "grad_norm": 0.5747501021359275, "learning_rate": 1.0805495649095676e-06, "loss": 0.2586, "step": 17526 }, { "epoch": 0.7932563928490609, "grad_norm": 0.6506436875014426, "learning_rate": 1.0800945366417316e-06, "loss": 0.277, "step": 17527 }, { "epoch": 0.7933016519574564, "grad_norm": 0.6422115703988123, "learning_rate": 1.0796395926006258e-06, "loss": 0.3037, "step": 17528 }, { "epoch": 0.793346911065852, "grad_norm": 0.25125905669770676, "learning_rate": 1.0791847327960236e-06, "loss": 0.452, "step": 17529 }, { "epoch": 0.7933921701742476, "grad_norm": 0.719021657172756, "learning_rate": 1.0787299572377015e-06, "loss": 0.2964, "step": 17530 }, { "epoch": 0.7934374292826432, "grad_norm": 0.28278646840808924, "learning_rate": 1.078275265935429e-06, "loss": 0.4507, "step": 17531 }, { "epoch": 0.7934826883910387, "grad_norm": 0.6185083956022757, "learning_rate": 1.0778206588989748e-06, "loss": 0.268, "step": 17532 }, { "epoch": 0.7935279474994342, "grad_norm": 0.6212316194701377, "learning_rate": 1.0773661361381088e-06, "loss": 0.3169, "step": 17533 }, { "epoch": 0.7935732066078298, "grad_norm": 0.6590947742085103, "learning_rate": 1.0769116976625998e-06, "loss": 0.3188, "step": 17534 }, { "epoch": 0.7936184657162254, "grad_norm": 0.5734631130915846, "learning_rate": 1.0764573434822067e-06, "loss": 0.2602, "step": 17535 }, { "epoch": 0.793663724824621, "grad_norm": 0.5968917546790733, "learning_rate": 1.0760030736066952e-06, "loss": 0.2786, "step": 17536 }, { "epoch": 0.7937089839330165, "grad_norm": 0.2466211171565022, "learning_rate": 1.075548888045827e-06, "loss": 0.4527, "step": 17537 }, { "epoch": 0.7937542430414121, "grad_norm": 0.6092165463869262, "learning_rate": 1.0750947868093608e-06, "loss": 0.3244, "step": 17538 }, { "epoch": 0.7937995021498077, "grad_norm": 0.6165602362370749, "learning_rate": 1.0746407699070516e-06, "loss": 0.2905, "step": 17539 }, { "epoch": 0.7938447612582032, "grad_norm": 0.25864134317846904, "learning_rate": 1.0741868373486564e-06, "loss": 0.4509, "step": 17540 }, { "epoch": 0.7938900203665987, "grad_norm": 0.6255255495885236, "learning_rate": 1.0737329891439303e-06, "loss": 0.3143, "step": 17541 }, { "epoch": 0.7939352794749943, "grad_norm": 0.5879664093941299, "learning_rate": 1.0732792253026231e-06, "loss": 0.2957, "step": 17542 }, { "epoch": 0.7939805385833899, "grad_norm": 0.7776972902240687, "learning_rate": 1.0728255458344843e-06, "loss": 0.3047, "step": 17543 }, { "epoch": 0.7940257976917855, "grad_norm": 0.6120374867402096, "learning_rate": 1.0723719507492648e-06, "loss": 0.3092, "step": 17544 }, { "epoch": 0.7940710568001811, "grad_norm": 0.6156840091395118, "learning_rate": 1.0719184400567078e-06, "loss": 0.3109, "step": 17545 }, { "epoch": 0.7941163159085766, "grad_norm": 0.6428936730218494, "learning_rate": 1.0714650137665604e-06, "loss": 0.3113, "step": 17546 }, { "epoch": 0.7941615750169722, "grad_norm": 0.6156742053106594, "learning_rate": 1.071011671888565e-06, "loss": 0.3354, "step": 17547 }, { "epoch": 0.7942068341253677, "grad_norm": 0.7783029452965077, "learning_rate": 1.07055841443246e-06, "loss": 0.3165, "step": 17548 }, { "epoch": 0.7942520932337633, "grad_norm": 0.6133884955674895, "learning_rate": 1.070105241407986e-06, "loss": 0.2703, "step": 17549 }, { "epoch": 0.7942973523421588, "grad_norm": 0.26731785140084197, "learning_rate": 1.0696521528248822e-06, "loss": 0.4519, "step": 17550 }, { "epoch": 0.7943426114505544, "grad_norm": 0.7314874059177867, "learning_rate": 1.0691991486928826e-06, "loss": 0.2993, "step": 17551 }, { "epoch": 0.79438787055895, "grad_norm": 0.5885684214964538, "learning_rate": 1.0687462290217193e-06, "loss": 0.3224, "step": 17552 }, { "epoch": 0.7944331296673456, "grad_norm": 0.6464653570859419, "learning_rate": 1.0682933938211272e-06, "loss": 0.3066, "step": 17553 }, { "epoch": 0.7944783887757412, "grad_norm": 0.6038104443874369, "learning_rate": 1.067840643100833e-06, "loss": 0.2899, "step": 17554 }, { "epoch": 0.7945236478841367, "grad_norm": 0.26682934326948565, "learning_rate": 1.0673879768705681e-06, "loss": 0.4692, "step": 17555 }, { "epoch": 0.7945689069925322, "grad_norm": 0.582118110369442, "learning_rate": 1.0669353951400563e-06, "loss": 0.31, "step": 17556 }, { "epoch": 0.7946141661009278, "grad_norm": 1.0647840750997166, "learning_rate": 1.066482897919025e-06, "loss": 0.2951, "step": 17557 }, { "epoch": 0.7946594252093234, "grad_norm": 0.569722159501484, "learning_rate": 1.0660304852171932e-06, "loss": 0.3115, "step": 17558 }, { "epoch": 0.7947046843177189, "grad_norm": 0.5872530009817197, "learning_rate": 1.0655781570442864e-06, "loss": 0.2882, "step": 17559 }, { "epoch": 0.7947499434261145, "grad_norm": 0.6712127830923466, "learning_rate": 1.0651259134100205e-06, "loss": 0.2829, "step": 17560 }, { "epoch": 0.7947952025345101, "grad_norm": 0.6292811854227739, "learning_rate": 1.0646737543241125e-06, "loss": 0.2982, "step": 17561 }, { "epoch": 0.7948404616429057, "grad_norm": 0.5772369823628817, "learning_rate": 1.0642216797962795e-06, "loss": 0.2927, "step": 17562 }, { "epoch": 0.7948857207513012, "grad_norm": 0.270961324115532, "learning_rate": 1.063769689836237e-06, "loss": 0.4438, "step": 17563 }, { "epoch": 0.7949309798596967, "grad_norm": 0.8262430223157452, "learning_rate": 1.0633177844536924e-06, "loss": 0.3384, "step": 17564 }, { "epoch": 0.7949762389680923, "grad_norm": 0.3042669338076642, "learning_rate": 1.0628659636583577e-06, "loss": 0.4954, "step": 17565 }, { "epoch": 0.7950214980764879, "grad_norm": 0.6161863340896231, "learning_rate": 1.0624142274599425e-06, "loss": 0.2735, "step": 17566 }, { "epoch": 0.7950667571848835, "grad_norm": 1.033689895662228, "learning_rate": 1.061962575868153e-06, "loss": 0.2773, "step": 17567 }, { "epoch": 0.795112016293279, "grad_norm": 0.3166101438817103, "learning_rate": 1.061511008892691e-06, "loss": 0.4553, "step": 17568 }, { "epoch": 0.7951572754016746, "grad_norm": 0.6893218205777645, "learning_rate": 1.0610595265432615e-06, "loss": 0.2797, "step": 17569 }, { "epoch": 0.7952025345100702, "grad_norm": 0.65132750453131, "learning_rate": 1.0606081288295666e-06, "loss": 0.2648, "step": 17570 }, { "epoch": 0.7952477936184658, "grad_norm": 0.700838971389305, "learning_rate": 1.060156815761304e-06, "loss": 0.2626, "step": 17571 }, { "epoch": 0.7952930527268612, "grad_norm": 0.7335130980432949, "learning_rate": 1.05970558734817e-06, "loss": 0.2909, "step": 17572 }, { "epoch": 0.7953383118352568, "grad_norm": 0.25704225033621225, "learning_rate": 1.059254443599862e-06, "loss": 0.4322, "step": 17573 }, { "epoch": 0.7953835709436524, "grad_norm": 0.6633779095086854, "learning_rate": 1.058803384526072e-06, "loss": 0.2754, "step": 17574 }, { "epoch": 0.795428830052048, "grad_norm": 0.5887178006824763, "learning_rate": 1.0583524101364945e-06, "loss": 0.2911, "step": 17575 }, { "epoch": 0.7954740891604435, "grad_norm": 0.6230379356817728, "learning_rate": 1.0579015204408172e-06, "loss": 0.2468, "step": 17576 }, { "epoch": 0.7955193482688391, "grad_norm": 0.27580698258235353, "learning_rate": 1.0574507154487279e-06, "loss": 0.4821, "step": 17577 }, { "epoch": 0.7955646073772347, "grad_norm": 0.7505786064339385, "learning_rate": 1.0569999951699145e-06, "loss": 0.355, "step": 17578 }, { "epoch": 0.7956098664856303, "grad_norm": 0.5648279984944123, "learning_rate": 1.056549359614062e-06, "loss": 0.2797, "step": 17579 }, { "epoch": 0.7956551255940258, "grad_norm": 0.2638095897738326, "learning_rate": 1.0560988087908525e-06, "loss": 0.4642, "step": 17580 }, { "epoch": 0.7957003847024213, "grad_norm": 0.6005269717461627, "learning_rate": 1.0556483427099656e-06, "loss": 0.2654, "step": 17581 }, { "epoch": 0.7957456438108169, "grad_norm": 0.6777576064315107, "learning_rate": 1.0551979613810814e-06, "loss": 0.2949, "step": 17582 }, { "epoch": 0.7957909029192125, "grad_norm": 0.599855625140221, "learning_rate": 1.0547476648138794e-06, "loss": 0.2811, "step": 17583 }, { "epoch": 0.7958361620276081, "grad_norm": 0.25174656246019217, "learning_rate": 1.0542974530180327e-06, "loss": 0.45, "step": 17584 }, { "epoch": 0.7958814211360036, "grad_norm": 0.7033764647057604, "learning_rate": 1.053847326003214e-06, "loss": 0.2858, "step": 17585 }, { "epoch": 0.7959266802443992, "grad_norm": 0.5999488290277657, "learning_rate": 1.0533972837790985e-06, "loss": 0.2377, "step": 17586 }, { "epoch": 0.7959719393527948, "grad_norm": 0.5964618331830029, "learning_rate": 1.0529473263553524e-06, "loss": 0.2866, "step": 17587 }, { "epoch": 0.7960171984611903, "grad_norm": 0.6255752090349693, "learning_rate": 1.052497453741647e-06, "loss": 0.2431, "step": 17588 }, { "epoch": 0.7960624575695858, "grad_norm": 0.6280639343828609, "learning_rate": 1.052047665947648e-06, "loss": 0.2714, "step": 17589 }, { "epoch": 0.7961077166779814, "grad_norm": 0.6175233941700501, "learning_rate": 1.051597962983018e-06, "loss": 0.3195, "step": 17590 }, { "epoch": 0.796152975786377, "grad_norm": 0.595813377188827, "learning_rate": 1.0511483448574212e-06, "loss": 0.2878, "step": 17591 }, { "epoch": 0.7961982348947726, "grad_norm": 0.6555253013213158, "learning_rate": 1.0506988115805212e-06, "loss": 0.2934, "step": 17592 }, { "epoch": 0.7962434940031682, "grad_norm": 0.6763322039481978, "learning_rate": 1.0502493631619715e-06, "loss": 0.2639, "step": 17593 }, { "epoch": 0.7962887531115637, "grad_norm": 0.6351978850263815, "learning_rate": 1.0497999996114322e-06, "loss": 0.2782, "step": 17594 }, { "epoch": 0.7963340122199593, "grad_norm": 0.4986814228547615, "learning_rate": 1.0493507209385606e-06, "loss": 0.4738, "step": 17595 }, { "epoch": 0.7963792713283548, "grad_norm": 0.2834511240637836, "learning_rate": 1.0489015271530084e-06, "loss": 0.4641, "step": 17596 }, { "epoch": 0.7964245304367504, "grad_norm": 0.6958528444526049, "learning_rate": 1.0484524182644257e-06, "loss": 0.3247, "step": 17597 }, { "epoch": 0.7964697895451459, "grad_norm": 0.6465101177250668, "learning_rate": 1.0480033942824647e-06, "loss": 0.293, "step": 17598 }, { "epoch": 0.7965150486535415, "grad_norm": 0.6326649070370953, "learning_rate": 1.0475544552167744e-06, "loss": 0.3043, "step": 17599 }, { "epoch": 0.7965603077619371, "grad_norm": 0.5894944985589974, "learning_rate": 1.0471056010769997e-06, "loss": 0.3071, "step": 17600 }, { "epoch": 0.7966055668703327, "grad_norm": 0.2670228912312689, "learning_rate": 1.0466568318727837e-06, "loss": 0.458, "step": 17601 }, { "epoch": 0.7966508259787283, "grad_norm": 0.5992312412861361, "learning_rate": 1.0462081476137726e-06, "loss": 0.3272, "step": 17602 }, { "epoch": 0.7966960850871238, "grad_norm": 0.7801764524156193, "learning_rate": 1.0457595483096033e-06, "loss": 0.2947, "step": 17603 }, { "epoch": 0.7967413441955193, "grad_norm": 0.608868802735606, "learning_rate": 1.0453110339699184e-06, "loss": 0.3329, "step": 17604 }, { "epoch": 0.7967866033039149, "grad_norm": 0.7777214442910269, "learning_rate": 1.0448626046043536e-06, "loss": 0.2705, "step": 17605 }, { "epoch": 0.7968318624123105, "grad_norm": 0.6140735157977182, "learning_rate": 1.0444142602225426e-06, "loss": 0.29, "step": 17606 }, { "epoch": 0.796877121520706, "grad_norm": 0.6711582640372278, "learning_rate": 1.0439660008341208e-06, "loss": 0.3553, "step": 17607 }, { "epoch": 0.7969223806291016, "grad_norm": 0.6043826392582125, "learning_rate": 1.0435178264487205e-06, "loss": 0.3375, "step": 17608 }, { "epoch": 0.7969676397374972, "grad_norm": 0.2688407526676726, "learning_rate": 1.0430697370759706e-06, "loss": 0.4748, "step": 17609 }, { "epoch": 0.7970128988458928, "grad_norm": 0.6964940237730056, "learning_rate": 1.0426217327254984e-06, "loss": 0.257, "step": 17610 }, { "epoch": 0.7970581579542882, "grad_norm": 0.6460807926911792, "learning_rate": 1.0421738134069309e-06, "loss": 0.2833, "step": 17611 }, { "epoch": 0.7971034170626838, "grad_norm": 0.6468028147962669, "learning_rate": 1.041725979129894e-06, "loss": 0.3434, "step": 17612 }, { "epoch": 0.7971486761710794, "grad_norm": 0.2694178792341854, "learning_rate": 1.0412782299040086e-06, "loss": 0.4458, "step": 17613 }, { "epoch": 0.797193935279475, "grad_norm": 0.296431835651895, "learning_rate": 1.040830565738895e-06, "loss": 0.4712, "step": 17614 }, { "epoch": 0.7972391943878706, "grad_norm": 0.6637816879674655, "learning_rate": 1.0403829866441734e-06, "loss": 0.2945, "step": 17615 }, { "epoch": 0.7972844534962661, "grad_norm": 0.5937931470412802, "learning_rate": 1.0399354926294596e-06, "loss": 0.2797, "step": 17616 }, { "epoch": 0.7973297126046617, "grad_norm": 0.5951980187062453, "learning_rate": 1.0394880837043708e-06, "loss": 0.2686, "step": 17617 }, { "epoch": 0.7973749717130573, "grad_norm": 0.6100560066934261, "learning_rate": 1.0390407598785196e-06, "loss": 0.2934, "step": 17618 }, { "epoch": 0.7974202308214529, "grad_norm": 0.6433777122358736, "learning_rate": 1.0385935211615156e-06, "loss": 0.3198, "step": 17619 }, { "epoch": 0.7974654899298483, "grad_norm": 0.5906931436439543, "learning_rate": 1.0381463675629705e-06, "loss": 0.3051, "step": 17620 }, { "epoch": 0.7975107490382439, "grad_norm": 0.6337864263053438, "learning_rate": 1.0376992990924934e-06, "loss": 0.2657, "step": 17621 }, { "epoch": 0.7975560081466395, "grad_norm": 0.6088029034611621, "learning_rate": 1.0372523157596892e-06, "loss": 0.3196, "step": 17622 }, { "epoch": 0.7976012672550351, "grad_norm": 0.599341414845403, "learning_rate": 1.0368054175741605e-06, "loss": 0.2832, "step": 17623 }, { "epoch": 0.7976465263634306, "grad_norm": 0.6090217413328994, "learning_rate": 1.0363586045455116e-06, "loss": 0.3151, "step": 17624 }, { "epoch": 0.7976917854718262, "grad_norm": 0.7199767068371429, "learning_rate": 1.0359118766833449e-06, "loss": 0.2896, "step": 17625 }, { "epoch": 0.7977370445802218, "grad_norm": 0.6920834954483712, "learning_rate": 1.0354652339972554e-06, "loss": 0.2849, "step": 17626 }, { "epoch": 0.7977823036886174, "grad_norm": 0.6620269494371919, "learning_rate": 1.0350186764968412e-06, "loss": 0.2696, "step": 17627 }, { "epoch": 0.797827562797013, "grad_norm": 0.43858767360734446, "learning_rate": 1.0345722041917e-06, "loss": 0.4844, "step": 17628 }, { "epoch": 0.7978728219054084, "grad_norm": 0.6894559615869037, "learning_rate": 1.0341258170914232e-06, "loss": 0.2617, "step": 17629 }, { "epoch": 0.797918081013804, "grad_norm": 0.605112420834483, "learning_rate": 1.0336795152056006e-06, "loss": 0.3109, "step": 17630 }, { "epoch": 0.7979633401221996, "grad_norm": 0.7878539249551214, "learning_rate": 1.0332332985438248e-06, "loss": 0.3024, "step": 17631 }, { "epoch": 0.7980085992305952, "grad_norm": 0.5933180263637753, "learning_rate": 1.0327871671156814e-06, "loss": 0.2722, "step": 17632 }, { "epoch": 0.7980538583389907, "grad_norm": 0.5777220334047763, "learning_rate": 1.0323411209307587e-06, "loss": 0.2742, "step": 17633 }, { "epoch": 0.7980991174473863, "grad_norm": 0.5305555011830864, "learning_rate": 1.03189515999864e-06, "loss": 0.2725, "step": 17634 }, { "epoch": 0.7981443765557819, "grad_norm": 0.6397495079379393, "learning_rate": 1.0314492843289053e-06, "loss": 0.3186, "step": 17635 }, { "epoch": 0.7981896356641774, "grad_norm": 0.5854544091507691, "learning_rate": 1.0310034939311376e-06, "loss": 0.2468, "step": 17636 }, { "epoch": 0.798234894772573, "grad_norm": 0.6268437136490664, "learning_rate": 1.030557788814916e-06, "loss": 0.2811, "step": 17637 }, { "epoch": 0.7982801538809685, "grad_norm": 0.61366665863228, "learning_rate": 1.0301121689898158e-06, "loss": 0.3128, "step": 17638 }, { "epoch": 0.7983254129893641, "grad_norm": 0.6636046824576902, "learning_rate": 1.0296666344654115e-06, "loss": 0.3215, "step": 17639 }, { "epoch": 0.7983706720977597, "grad_norm": 0.6466243191249332, "learning_rate": 1.029221185251278e-06, "loss": 0.312, "step": 17640 }, { "epoch": 0.7984159312061553, "grad_norm": 0.6267688365230768, "learning_rate": 1.0287758213569865e-06, "loss": 0.2783, "step": 17641 }, { "epoch": 0.7984611903145508, "grad_norm": 0.6067358900754252, "learning_rate": 1.0283305427921058e-06, "loss": 0.2886, "step": 17642 }, { "epoch": 0.7985064494229464, "grad_norm": 0.6130856239014243, "learning_rate": 1.0278853495662028e-06, "loss": 0.269, "step": 17643 }, { "epoch": 0.7985517085313419, "grad_norm": 0.6277865285234768, "learning_rate": 1.0274402416888452e-06, "loss": 0.2817, "step": 17644 }, { "epoch": 0.7985969676397375, "grad_norm": 0.26982262436094706, "learning_rate": 1.0269952191695948e-06, "loss": 0.4845, "step": 17645 }, { "epoch": 0.798642226748133, "grad_norm": 0.29022274123693, "learning_rate": 1.0265502820180167e-06, "loss": 0.486, "step": 17646 }, { "epoch": 0.7986874858565286, "grad_norm": 0.27151041749692006, "learning_rate": 1.026105430243669e-06, "loss": 0.472, "step": 17647 }, { "epoch": 0.7987327449649242, "grad_norm": 0.6086692202481577, "learning_rate": 1.0256606638561094e-06, "loss": 0.2523, "step": 17648 }, { "epoch": 0.7987780040733198, "grad_norm": 0.6459863488738338, "learning_rate": 1.0252159828648961e-06, "loss": 0.3177, "step": 17649 }, { "epoch": 0.7988232631817154, "grad_norm": 0.6423110726927362, "learning_rate": 1.024771387279585e-06, "loss": 0.3043, "step": 17650 }, { "epoch": 0.7988685222901108, "grad_norm": 0.6897569820488314, "learning_rate": 1.024326877109728e-06, "loss": 0.29, "step": 17651 }, { "epoch": 0.7989137813985064, "grad_norm": 0.6781956667845884, "learning_rate": 1.0238824523648744e-06, "loss": 0.2848, "step": 17652 }, { "epoch": 0.798959040506902, "grad_norm": 0.6070597055985785, "learning_rate": 1.0234381130545757e-06, "loss": 0.2834, "step": 17653 }, { "epoch": 0.7990042996152976, "grad_norm": 0.6186120877052891, "learning_rate": 1.0229938591883798e-06, "loss": 0.2665, "step": 17654 }, { "epoch": 0.7990495587236931, "grad_norm": 0.6134788953343151, "learning_rate": 1.0225496907758314e-06, "loss": 0.2771, "step": 17655 }, { "epoch": 0.7990948178320887, "grad_norm": 0.6517754167771941, "learning_rate": 1.022105607826473e-06, "loss": 0.3166, "step": 17656 }, { "epoch": 0.7991400769404843, "grad_norm": 0.672306692935697, "learning_rate": 1.0216616103498494e-06, "loss": 0.2806, "step": 17657 }, { "epoch": 0.7991853360488799, "grad_norm": 0.6391489481315076, "learning_rate": 1.021217698355499e-06, "loss": 0.3117, "step": 17658 }, { "epoch": 0.7992305951572753, "grad_norm": 0.6034580647484019, "learning_rate": 1.0207738718529592e-06, "loss": 0.2954, "step": 17659 }, { "epoch": 0.7992758542656709, "grad_norm": 0.601464046082302, "learning_rate": 1.0203301308517687e-06, "loss": 0.2504, "step": 17660 }, { "epoch": 0.7993211133740665, "grad_norm": 0.6401166761859084, "learning_rate": 1.0198864753614602e-06, "loss": 0.2727, "step": 17661 }, { "epoch": 0.7993663724824621, "grad_norm": 0.6225955998793017, "learning_rate": 1.0194429053915683e-06, "loss": 0.2765, "step": 17662 }, { "epoch": 0.7994116315908577, "grad_norm": 0.26537395003035563, "learning_rate": 1.0189994209516234e-06, "loss": 0.4749, "step": 17663 }, { "epoch": 0.7994568906992532, "grad_norm": 0.6540042865726453, "learning_rate": 1.0185560220511525e-06, "loss": 0.2843, "step": 17664 }, { "epoch": 0.7995021498076488, "grad_norm": 0.6185663619453415, "learning_rate": 1.018112708699685e-06, "loss": 0.3233, "step": 17665 }, { "epoch": 0.7995474089160444, "grad_norm": 0.647486381785589, "learning_rate": 1.0176694809067471e-06, "loss": 0.2752, "step": 17666 }, { "epoch": 0.79959266802444, "grad_norm": 0.6502279765653171, "learning_rate": 1.0172263386818615e-06, "loss": 0.3064, "step": 17667 }, { "epoch": 0.7996379271328354, "grad_norm": 0.2757197327489894, "learning_rate": 1.016783282034548e-06, "loss": 0.471, "step": 17668 }, { "epoch": 0.799683186241231, "grad_norm": 0.7585339264168486, "learning_rate": 1.0163403109743287e-06, "loss": 0.3186, "step": 17669 }, { "epoch": 0.7997284453496266, "grad_norm": 0.5495025188913338, "learning_rate": 1.0158974255107223e-06, "loss": 0.3192, "step": 17670 }, { "epoch": 0.7997737044580222, "grad_norm": 0.6535560242829833, "learning_rate": 1.0154546256532438e-06, "loss": 0.295, "step": 17671 }, { "epoch": 0.7998189635664178, "grad_norm": 0.6595014318560279, "learning_rate": 1.0150119114114066e-06, "loss": 0.3458, "step": 17672 }, { "epoch": 0.7998642226748133, "grad_norm": 0.5929165171952434, "learning_rate": 1.0145692827947256e-06, "loss": 0.3189, "step": 17673 }, { "epoch": 0.7999094817832089, "grad_norm": 0.6466146823306345, "learning_rate": 1.0141267398127098e-06, "loss": 0.3333, "step": 17674 }, { "epoch": 0.7999547408916045, "grad_norm": 0.7863825353875846, "learning_rate": 1.0136842824748694e-06, "loss": 0.303, "step": 17675 }, { "epoch": 0.8, "grad_norm": 0.6008716630456132, "learning_rate": 1.013241910790711e-06, "loss": 0.3102, "step": 17676 }, { "epoch": 0.8000452591083955, "grad_norm": 0.5469202165957461, "learning_rate": 1.012799624769738e-06, "loss": 0.2713, "step": 17677 }, { "epoch": 0.8000905182167911, "grad_norm": 0.6538977964631789, "learning_rate": 1.0123574244214552e-06, "loss": 0.2669, "step": 17678 }, { "epoch": 0.8001357773251867, "grad_norm": 0.6411931463000914, "learning_rate": 1.0119153097553657e-06, "loss": 0.2752, "step": 17679 }, { "epoch": 0.8001810364335823, "grad_norm": 0.6266810385085103, "learning_rate": 1.011473280780968e-06, "loss": 0.3539, "step": 17680 }, { "epoch": 0.8002262955419778, "grad_norm": 0.2999474320900906, "learning_rate": 1.011031337507758e-06, "loss": 0.4766, "step": 17681 }, { "epoch": 0.8002715546503734, "grad_norm": 0.7185236225481391, "learning_rate": 1.0105894799452337e-06, "loss": 0.3193, "step": 17682 }, { "epoch": 0.800316813758769, "grad_norm": 0.6359176456441936, "learning_rate": 1.0101477081028899e-06, "loss": 0.282, "step": 17683 }, { "epoch": 0.8003620728671645, "grad_norm": 0.6174531661683802, "learning_rate": 1.0097060219902183e-06, "loss": 0.2772, "step": 17684 }, { "epoch": 0.8004073319755601, "grad_norm": 0.26616502028666844, "learning_rate": 1.0092644216167076e-06, "loss": 0.454, "step": 17685 }, { "epoch": 0.8004525910839556, "grad_norm": 0.6308937760368052, "learning_rate": 1.0088229069918488e-06, "loss": 0.2659, "step": 17686 }, { "epoch": 0.8004978501923512, "grad_norm": 0.5928485973391232, "learning_rate": 1.0083814781251266e-06, "loss": 0.2866, "step": 17687 }, { "epoch": 0.8005431093007468, "grad_norm": 0.6026542050506672, "learning_rate": 1.0079401350260288e-06, "loss": 0.2788, "step": 17688 }, { "epoch": 0.8005883684091424, "grad_norm": 0.6765753998077264, "learning_rate": 1.0074988777040368e-06, "loss": 0.3273, "step": 17689 }, { "epoch": 0.8006336275175379, "grad_norm": 0.7063825924900976, "learning_rate": 1.0070577061686305e-06, "loss": 0.3075, "step": 17690 }, { "epoch": 0.8006788866259335, "grad_norm": 0.6579882369889726, "learning_rate": 1.0066166204292915e-06, "loss": 0.3028, "step": 17691 }, { "epoch": 0.800724145734329, "grad_norm": 0.6188920702015304, "learning_rate": 1.006175620495497e-06, "loss": 0.282, "step": 17692 }, { "epoch": 0.8007694048427246, "grad_norm": 0.7598314254003824, "learning_rate": 1.005734706376721e-06, "loss": 0.284, "step": 17693 }, { "epoch": 0.8008146639511201, "grad_norm": 0.6781608359130874, "learning_rate": 1.005293878082439e-06, "loss": 0.2815, "step": 17694 }, { "epoch": 0.8008599230595157, "grad_norm": 0.6378508227692651, "learning_rate": 1.0048531356221235e-06, "loss": 0.3225, "step": 17695 }, { "epoch": 0.8009051821679113, "grad_norm": 0.5678500878789736, "learning_rate": 1.0044124790052445e-06, "loss": 0.2734, "step": 17696 }, { "epoch": 0.8009504412763069, "grad_norm": 0.5803032201494306, "learning_rate": 1.003971908241268e-06, "loss": 0.324, "step": 17697 }, { "epoch": 0.8009957003847025, "grad_norm": 0.612816553250393, "learning_rate": 1.0035314233396625e-06, "loss": 0.3237, "step": 17698 }, { "epoch": 0.801040959493098, "grad_norm": 0.30486019227051825, "learning_rate": 1.003091024309894e-06, "loss": 0.4817, "step": 17699 }, { "epoch": 0.8010862186014935, "grad_norm": 0.5718601567690808, "learning_rate": 1.0026507111614237e-06, "loss": 0.3197, "step": 17700 }, { "epoch": 0.8011314777098891, "grad_norm": 0.6975670743286432, "learning_rate": 1.0022104839037117e-06, "loss": 0.3026, "step": 17701 }, { "epoch": 0.8011767368182847, "grad_norm": 0.698243244761055, "learning_rate": 1.0017703425462188e-06, "loss": 0.2704, "step": 17702 }, { "epoch": 0.8012219959266802, "grad_norm": 0.6034277810329546, "learning_rate": 1.001330287098401e-06, "loss": 0.2848, "step": 17703 }, { "epoch": 0.8012672550350758, "grad_norm": 0.6413270440601546, "learning_rate": 1.000890317569715e-06, "loss": 0.2948, "step": 17704 }, { "epoch": 0.8013125141434714, "grad_norm": 0.5807050571774788, "learning_rate": 1.0004504339696142e-06, "loss": 0.2834, "step": 17705 }, { "epoch": 0.801357773251867, "grad_norm": 0.5554722470951098, "learning_rate": 1.0000106363075486e-06, "loss": 0.2843, "step": 17706 }, { "epoch": 0.8014030323602626, "grad_norm": 0.6102523337797331, "learning_rate": 9.995709245929691e-07, "loss": 0.3053, "step": 17707 }, { "epoch": 0.801448291468658, "grad_norm": 0.5993098468757605, "learning_rate": 9.991312988353252e-07, "loss": 0.297, "step": 17708 }, { "epoch": 0.8014935505770536, "grad_norm": 0.25248078783050365, "learning_rate": 9.986917590440626e-07, "loss": 0.4618, "step": 17709 }, { "epoch": 0.8015388096854492, "grad_norm": 0.6236113679626196, "learning_rate": 9.98252305228623e-07, "loss": 0.3025, "step": 17710 }, { "epoch": 0.8015840687938448, "grad_norm": 0.652868395328871, "learning_rate": 9.978129373984513e-07, "loss": 0.2469, "step": 17711 }, { "epoch": 0.8016293279022403, "grad_norm": 0.8312087621953048, "learning_rate": 9.973736555629894e-07, "loss": 0.3167, "step": 17712 }, { "epoch": 0.8016745870106359, "grad_norm": 0.5410252483694974, "learning_rate": 9.969344597316737e-07, "loss": 0.2976, "step": 17713 }, { "epoch": 0.8017198461190315, "grad_norm": 0.5911874079384539, "learning_rate": 9.964953499139412e-07, "loss": 0.265, "step": 17714 }, { "epoch": 0.801765105227427, "grad_norm": 0.6134724463121841, "learning_rate": 9.96056326119229e-07, "loss": 0.2728, "step": 17715 }, { "epoch": 0.8018103643358225, "grad_norm": 0.6130417169184403, "learning_rate": 9.95617388356968e-07, "loss": 0.3108, "step": 17716 }, { "epoch": 0.8018556234442181, "grad_norm": 0.5780465373609368, "learning_rate": 9.951785366365924e-07, "loss": 0.2989, "step": 17717 }, { "epoch": 0.8019008825526137, "grad_norm": 0.2673010955946115, "learning_rate": 9.9473977096753e-07, "loss": 0.4656, "step": 17718 }, { "epoch": 0.8019461416610093, "grad_norm": 0.6280599241854697, "learning_rate": 9.943010913592072e-07, "loss": 0.296, "step": 17719 }, { "epoch": 0.8019914007694049, "grad_norm": 0.2905817135983755, "learning_rate": 9.938624978210514e-07, "loss": 0.4676, "step": 17720 }, { "epoch": 0.8020366598778004, "grad_norm": 0.2699402741418339, "learning_rate": 9.934239903624893e-07, "loss": 0.4627, "step": 17721 }, { "epoch": 0.802081918986196, "grad_norm": 0.5640631750848742, "learning_rate": 9.929855689929374e-07, "loss": 0.2849, "step": 17722 }, { "epoch": 0.8021271780945916, "grad_norm": 0.6868637680081962, "learning_rate": 9.925472337218194e-07, "loss": 0.2839, "step": 17723 }, { "epoch": 0.8021724372029871, "grad_norm": 0.26501366463231285, "learning_rate": 9.921089845585536e-07, "loss": 0.4631, "step": 17724 }, { "epoch": 0.8022176963113826, "grad_norm": 0.6741465898250314, "learning_rate": 9.916708215125586e-07, "loss": 0.2915, "step": 17725 }, { "epoch": 0.8022629554197782, "grad_norm": 0.6280940510386683, "learning_rate": 9.912327445932446e-07, "loss": 0.299, "step": 17726 }, { "epoch": 0.8023082145281738, "grad_norm": 0.6069141990952394, "learning_rate": 9.907947538100265e-07, "loss": 0.2883, "step": 17727 }, { "epoch": 0.8023534736365694, "grad_norm": 0.6037705382049574, "learning_rate": 9.903568491723176e-07, "loss": 0.33, "step": 17728 }, { "epoch": 0.8023987327449649, "grad_norm": 0.6288078967755846, "learning_rate": 9.899190306895257e-07, "loss": 0.2898, "step": 17729 }, { "epoch": 0.8024439918533605, "grad_norm": 0.555905713304811, "learning_rate": 9.894812983710556e-07, "loss": 0.2756, "step": 17730 }, { "epoch": 0.802489250961756, "grad_norm": 0.2952776150612605, "learning_rate": 9.89043652226317e-07, "loss": 0.4919, "step": 17731 }, { "epoch": 0.8025345100701516, "grad_norm": 0.5651942536592243, "learning_rate": 9.8860609226471e-07, "loss": 0.2967, "step": 17732 }, { "epoch": 0.8025797691785472, "grad_norm": 0.6505822448237075, "learning_rate": 9.881686184956396e-07, "loss": 0.3383, "step": 17733 }, { "epoch": 0.8026250282869427, "grad_norm": 0.5949901731512393, "learning_rate": 9.877312309285036e-07, "loss": 0.3293, "step": 17734 }, { "epoch": 0.8026702873953383, "grad_norm": 0.6151881571192074, "learning_rate": 9.872939295726997e-07, "loss": 0.2992, "step": 17735 }, { "epoch": 0.8027155465037339, "grad_norm": 0.621174808148157, "learning_rate": 9.868567144376256e-07, "loss": 0.3155, "step": 17736 }, { "epoch": 0.8027608056121295, "grad_norm": 0.6111958778982538, "learning_rate": 9.864195855326764e-07, "loss": 0.2803, "step": 17737 }, { "epoch": 0.802806064720525, "grad_norm": 0.5843529790163214, "learning_rate": 9.85982542867243e-07, "loss": 0.2744, "step": 17738 }, { "epoch": 0.8028513238289205, "grad_norm": 0.26551540249461303, "learning_rate": 9.855455864507157e-07, "loss": 0.4794, "step": 17739 }, { "epoch": 0.8028965829373161, "grad_norm": 0.6113881148320287, "learning_rate": 9.851087162924845e-07, "loss": 0.2857, "step": 17740 }, { "epoch": 0.8029418420457117, "grad_norm": 0.6239502664209545, "learning_rate": 9.846719324019372e-07, "loss": 0.2742, "step": 17741 }, { "epoch": 0.8029871011541073, "grad_norm": 0.6533691162116929, "learning_rate": 9.842352347884582e-07, "loss": 0.3235, "step": 17742 }, { "epoch": 0.8030323602625028, "grad_norm": 1.5388800022815832, "learning_rate": 9.837986234614288e-07, "loss": 0.2605, "step": 17743 }, { "epoch": 0.8030776193708984, "grad_norm": 0.543305453627655, "learning_rate": 9.833620984302338e-07, "loss": 0.2682, "step": 17744 }, { "epoch": 0.803122878479294, "grad_norm": 0.6244755921904002, "learning_rate": 9.829256597042496e-07, "loss": 0.2777, "step": 17745 }, { "epoch": 0.8031681375876896, "grad_norm": 0.6570277246440114, "learning_rate": 9.824893072928572e-07, "loss": 0.2946, "step": 17746 }, { "epoch": 0.803213396696085, "grad_norm": 0.6215458936046111, "learning_rate": 9.820530412054302e-07, "loss": 0.2848, "step": 17747 }, { "epoch": 0.8032586558044806, "grad_norm": 0.6040671769548295, "learning_rate": 9.816168614513423e-07, "loss": 0.2676, "step": 17748 }, { "epoch": 0.8033039149128762, "grad_norm": 0.6104696414519816, "learning_rate": 9.81180768039966e-07, "loss": 0.2659, "step": 17749 }, { "epoch": 0.8033491740212718, "grad_norm": 0.5967745873187079, "learning_rate": 9.807447609806752e-07, "loss": 0.2717, "step": 17750 }, { "epoch": 0.8033944331296673, "grad_norm": 0.6607978228742731, "learning_rate": 9.803088402828326e-07, "loss": 0.316, "step": 17751 }, { "epoch": 0.8034396922380629, "grad_norm": 0.659657357573865, "learning_rate": 9.798730059558076e-07, "loss": 0.2631, "step": 17752 }, { "epoch": 0.8034849513464585, "grad_norm": 0.6103075212447557, "learning_rate": 9.794372580089645e-07, "loss": 0.2906, "step": 17753 }, { "epoch": 0.8035302104548541, "grad_norm": 0.6494566495041401, "learning_rate": 9.790015964516692e-07, "loss": 0.3029, "step": 17754 }, { "epoch": 0.8035754695632497, "grad_norm": 1.0423514431242897, "learning_rate": 9.785660212932775e-07, "loss": 0.3044, "step": 17755 }, { "epoch": 0.8036207286716451, "grad_norm": 0.5784660241782171, "learning_rate": 9.781305325431512e-07, "loss": 0.2676, "step": 17756 }, { "epoch": 0.8036659877800407, "grad_norm": 0.6102474087109717, "learning_rate": 9.776951302106485e-07, "loss": 0.266, "step": 17757 }, { "epoch": 0.8037112468884363, "grad_norm": 0.6286956970183284, "learning_rate": 9.772598143051242e-07, "loss": 0.3052, "step": 17758 }, { "epoch": 0.8037565059968319, "grad_norm": 0.6239997771945285, "learning_rate": 9.768245848359304e-07, "loss": 0.2955, "step": 17759 }, { "epoch": 0.8038017651052274, "grad_norm": 0.6649553793926286, "learning_rate": 9.763894418124215e-07, "loss": 0.3546, "step": 17760 }, { "epoch": 0.803847024213623, "grad_norm": 0.28664882723210144, "learning_rate": 9.75954385243944e-07, "loss": 0.4974, "step": 17761 }, { "epoch": 0.8038922833220186, "grad_norm": 0.5878687259601594, "learning_rate": 9.755194151398494e-07, "loss": 0.3236, "step": 17762 }, { "epoch": 0.8039375424304142, "grad_norm": 0.7083215294265206, "learning_rate": 9.750845315094826e-07, "loss": 0.26, "step": 17763 }, { "epoch": 0.8039828015388096, "grad_norm": 0.2928910819210211, "learning_rate": 9.746497343621857e-07, "loss": 0.4376, "step": 17764 }, { "epoch": 0.8040280606472052, "grad_norm": 0.24709386014473778, "learning_rate": 9.74215023707304e-07, "loss": 0.4392, "step": 17765 }, { "epoch": 0.8040733197556008, "grad_norm": 0.26616740028184244, "learning_rate": 9.737803995541777e-07, "loss": 0.4614, "step": 17766 }, { "epoch": 0.8041185788639964, "grad_norm": 0.7103385064900106, "learning_rate": 9.733458619121449e-07, "loss": 0.351, "step": 17767 }, { "epoch": 0.804163837972392, "grad_norm": 0.6863027935741712, "learning_rate": 9.72911410790542e-07, "loss": 0.2807, "step": 17768 }, { "epoch": 0.8042090970807875, "grad_norm": 0.2519538309492859, "learning_rate": 9.724770461987044e-07, "loss": 0.4491, "step": 17769 }, { "epoch": 0.8042543561891831, "grad_norm": 0.614031610054373, "learning_rate": 9.720427681459665e-07, "loss": 0.2903, "step": 17770 }, { "epoch": 0.8042996152975787, "grad_norm": 0.6690508718365019, "learning_rate": 9.71608576641659e-07, "loss": 0.3037, "step": 17771 }, { "epoch": 0.8043448744059742, "grad_norm": 0.7043153878217588, "learning_rate": 9.711744716951093e-07, "loss": 0.3126, "step": 17772 }, { "epoch": 0.8043901335143697, "grad_norm": 0.5684993411944421, "learning_rate": 9.707404533156479e-07, "loss": 0.2348, "step": 17773 }, { "epoch": 0.8044353926227653, "grad_norm": 0.625394348488827, "learning_rate": 9.703065215125978e-07, "loss": 0.3292, "step": 17774 }, { "epoch": 0.8044806517311609, "grad_norm": 0.612393036000987, "learning_rate": 9.698726762952859e-07, "loss": 0.2888, "step": 17775 }, { "epoch": 0.8045259108395565, "grad_norm": 0.6443397133260294, "learning_rate": 9.69438917673033e-07, "loss": 0.3287, "step": 17776 }, { "epoch": 0.8045711699479521, "grad_norm": 0.2731566439388685, "learning_rate": 9.69005245655157e-07, "loss": 0.4616, "step": 17777 }, { "epoch": 0.8046164290563476, "grad_norm": 0.6635968408950185, "learning_rate": 9.685716602509782e-07, "loss": 0.3266, "step": 17778 }, { "epoch": 0.8046616881647431, "grad_norm": 0.6750096154295964, "learning_rate": 9.681381614698148e-07, "loss": 0.2842, "step": 17779 }, { "epoch": 0.8047069472731387, "grad_norm": 0.5740177530113943, "learning_rate": 9.677047493209775e-07, "loss": 0.3, "step": 17780 }, { "epoch": 0.8047522063815343, "grad_norm": 0.59785691959512, "learning_rate": 9.67271423813781e-07, "loss": 0.3076, "step": 17781 }, { "epoch": 0.8047974654899298, "grad_norm": 0.6427667444325058, "learning_rate": 9.668381849575354e-07, "loss": 0.3088, "step": 17782 }, { "epoch": 0.8048427245983254, "grad_norm": 0.27035245054586593, "learning_rate": 9.664050327615531e-07, "loss": 0.4606, "step": 17783 }, { "epoch": 0.804887983706721, "grad_norm": 0.5900749386428757, "learning_rate": 9.659719672351363e-07, "loss": 0.2815, "step": 17784 }, { "epoch": 0.8049332428151166, "grad_norm": 0.6792514119123676, "learning_rate": 9.65538988387592e-07, "loss": 0.2977, "step": 17785 }, { "epoch": 0.8049785019235121, "grad_norm": 0.6449374754101008, "learning_rate": 9.65106096228225e-07, "loss": 0.2898, "step": 17786 }, { "epoch": 0.8050237610319076, "grad_norm": 0.5739704721959326, "learning_rate": 9.646732907663358e-07, "loss": 0.2707, "step": 17787 }, { "epoch": 0.8050690201403032, "grad_norm": 0.27968337547443045, "learning_rate": 9.64240572011223e-07, "loss": 0.4649, "step": 17788 }, { "epoch": 0.8051142792486988, "grad_norm": 0.6508549364566079, "learning_rate": 9.638079399721866e-07, "loss": 0.2833, "step": 17789 }, { "epoch": 0.8051595383570944, "grad_norm": 0.6567856991621365, "learning_rate": 9.633753946585201e-07, "loss": 0.2914, "step": 17790 }, { "epoch": 0.8052047974654899, "grad_norm": 0.5991907161446376, "learning_rate": 9.629429360795201e-07, "loss": 0.29, "step": 17791 }, { "epoch": 0.8052500565738855, "grad_norm": 0.6436835596344362, "learning_rate": 9.625105642444777e-07, "loss": 0.3134, "step": 17792 }, { "epoch": 0.8052953156822811, "grad_norm": 0.26121420030905806, "learning_rate": 9.620782791626815e-07, "loss": 0.4442, "step": 17793 }, { "epoch": 0.8053405747906767, "grad_norm": 0.7029702963957971, "learning_rate": 9.616460808434213e-07, "loss": 0.3315, "step": 17794 }, { "epoch": 0.8053858338990721, "grad_norm": 0.5785032749920184, "learning_rate": 9.612139692959859e-07, "loss": 0.2775, "step": 17795 }, { "epoch": 0.8054310930074677, "grad_norm": 0.6740378749138689, "learning_rate": 9.607819445296579e-07, "loss": 0.323, "step": 17796 }, { "epoch": 0.8054763521158633, "grad_norm": 0.3714646127639946, "learning_rate": 9.60350006553719e-07, "loss": 0.463, "step": 17797 }, { "epoch": 0.8055216112242589, "grad_norm": 0.6722240591447426, "learning_rate": 9.599181553774517e-07, "loss": 0.2908, "step": 17798 }, { "epoch": 0.8055668703326544, "grad_norm": 0.2518733239893241, "learning_rate": 9.59486391010136e-07, "loss": 0.4378, "step": 17799 }, { "epoch": 0.80561212944105, "grad_norm": 0.5960606271702927, "learning_rate": 9.59054713461049e-07, "loss": 0.258, "step": 17800 }, { "epoch": 0.8056573885494456, "grad_norm": 0.5225734865181609, "learning_rate": 9.586231227394632e-07, "loss": 0.2191, "step": 17801 }, { "epoch": 0.8057026476578412, "grad_norm": 0.30134938053213317, "learning_rate": 9.581916188546563e-07, "loss": 0.455, "step": 17802 }, { "epoch": 0.8057479067662368, "grad_norm": 0.2919711089673619, "learning_rate": 9.577602018158966e-07, "loss": 0.462, "step": 17803 }, { "epoch": 0.8057931658746322, "grad_norm": 0.5348588791690345, "learning_rate": 9.57328871632457e-07, "loss": 0.2597, "step": 17804 }, { "epoch": 0.8058384249830278, "grad_norm": 0.26899116106159493, "learning_rate": 9.568976283136033e-07, "loss": 0.4445, "step": 17805 }, { "epoch": 0.8058836840914234, "grad_norm": 0.6010819484837083, "learning_rate": 9.564664718686006e-07, "loss": 0.2669, "step": 17806 }, { "epoch": 0.805928943199819, "grad_norm": 0.5988338193919135, "learning_rate": 9.560354023067154e-07, "loss": 0.3217, "step": 17807 }, { "epoch": 0.8059742023082145, "grad_norm": 0.6031911398914606, "learning_rate": 9.556044196372117e-07, "loss": 0.2926, "step": 17808 }, { "epoch": 0.8060194614166101, "grad_norm": 0.6186691343631564, "learning_rate": 9.551735238693448e-07, "loss": 0.2715, "step": 17809 }, { "epoch": 0.8060647205250057, "grad_norm": 0.6173799694688191, "learning_rate": 9.547427150123762e-07, "loss": 0.2874, "step": 17810 }, { "epoch": 0.8061099796334013, "grad_norm": 0.6022156353413017, "learning_rate": 9.543119930755622e-07, "loss": 0.2956, "step": 17811 }, { "epoch": 0.8061552387417967, "grad_norm": 0.5927472848824709, "learning_rate": 9.538813580681616e-07, "loss": 0.2715, "step": 17812 }, { "epoch": 0.8062004978501923, "grad_norm": 0.6148306294036522, "learning_rate": 9.534508099994206e-07, "loss": 0.3106, "step": 17813 }, { "epoch": 0.8062457569585879, "grad_norm": 0.7236385379377256, "learning_rate": 9.530203488785939e-07, "loss": 0.3033, "step": 17814 }, { "epoch": 0.8062910160669835, "grad_norm": 0.6026043617790225, "learning_rate": 9.52589974714932e-07, "loss": 0.2908, "step": 17815 }, { "epoch": 0.8063362751753791, "grad_norm": 0.2657865844437305, "learning_rate": 9.521596875176803e-07, "loss": 0.4463, "step": 17816 }, { "epoch": 0.8063815342837746, "grad_norm": 0.664579620544881, "learning_rate": 9.517294872960841e-07, "loss": 0.2808, "step": 17817 }, { "epoch": 0.8064267933921702, "grad_norm": 0.62770544763799, "learning_rate": 9.51299374059389e-07, "loss": 0.2625, "step": 17818 }, { "epoch": 0.8064720525005658, "grad_norm": 0.6491865545045638, "learning_rate": 9.508693478168346e-07, "loss": 0.265, "step": 17819 }, { "epoch": 0.8065173116089613, "grad_norm": 0.6222501394219117, "learning_rate": 9.504394085776636e-07, "loss": 0.289, "step": 17820 }, { "epoch": 0.8065625707173568, "grad_norm": 0.5920336723940774, "learning_rate": 9.500095563511119e-07, "loss": 0.3043, "step": 17821 }, { "epoch": 0.8066078298257524, "grad_norm": 0.6828166339707074, "learning_rate": 9.49579791146415e-07, "loss": 0.2999, "step": 17822 }, { "epoch": 0.806653088934148, "grad_norm": 0.8659427099989623, "learning_rate": 9.491501129728087e-07, "loss": 0.3415, "step": 17823 }, { "epoch": 0.8066983480425436, "grad_norm": 0.6783573168092197, "learning_rate": 9.487205218395262e-07, "loss": 0.3317, "step": 17824 }, { "epoch": 0.8067436071509392, "grad_norm": 0.569484306281922, "learning_rate": 9.482910177557975e-07, "loss": 0.2727, "step": 17825 }, { "epoch": 0.8067888662593347, "grad_norm": 0.6803035123904807, "learning_rate": 9.478616007308495e-07, "loss": 0.3235, "step": 17826 }, { "epoch": 0.8068341253677302, "grad_norm": 0.34306276811634023, "learning_rate": 9.474322707739103e-07, "loss": 0.4562, "step": 17827 }, { "epoch": 0.8068793844761258, "grad_norm": 0.6123543709961801, "learning_rate": 9.470030278942066e-07, "loss": 0.3109, "step": 17828 }, { "epoch": 0.8069246435845214, "grad_norm": 0.2548844258247678, "learning_rate": 9.465738721009598e-07, "loss": 0.4591, "step": 17829 }, { "epoch": 0.8069699026929169, "grad_norm": 0.26506859233429114, "learning_rate": 9.461448034033905e-07, "loss": 0.4505, "step": 17830 }, { "epoch": 0.8070151618013125, "grad_norm": 0.6048925450365089, "learning_rate": 9.457158218107198e-07, "loss": 0.2945, "step": 17831 }, { "epoch": 0.8070604209097081, "grad_norm": 0.6257261716315243, "learning_rate": 9.45286927332163e-07, "loss": 0.2926, "step": 17832 }, { "epoch": 0.8071056800181037, "grad_norm": 0.6735816668686118, "learning_rate": 9.448581199769385e-07, "loss": 0.2789, "step": 17833 }, { "epoch": 0.8071509391264992, "grad_norm": 0.2743845808695478, "learning_rate": 9.444293997542586e-07, "loss": 0.4636, "step": 17834 }, { "epoch": 0.8071961982348947, "grad_norm": 0.28865643557615306, "learning_rate": 9.440007666733336e-07, "loss": 0.4819, "step": 17835 }, { "epoch": 0.8072414573432903, "grad_norm": 0.5745541285151884, "learning_rate": 9.43572220743375e-07, "loss": 0.2733, "step": 17836 }, { "epoch": 0.8072867164516859, "grad_norm": 0.24529223471523312, "learning_rate": 9.431437619735928e-07, "loss": 0.436, "step": 17837 }, { "epoch": 0.8073319755600815, "grad_norm": 0.591013245264191, "learning_rate": 9.427153903731912e-07, "loss": 0.2857, "step": 17838 }, { "epoch": 0.807377234668477, "grad_norm": 0.5672598214064283, "learning_rate": 9.422871059513738e-07, "loss": 0.2954, "step": 17839 }, { "epoch": 0.8074224937768726, "grad_norm": 0.2732986318260846, "learning_rate": 9.418589087173441e-07, "loss": 0.4775, "step": 17840 }, { "epoch": 0.8074677528852682, "grad_norm": 0.6033196974822153, "learning_rate": 9.414307986803051e-07, "loss": 0.2578, "step": 17841 }, { "epoch": 0.8075130119936638, "grad_norm": 0.6700641066968276, "learning_rate": 9.410027758494511e-07, "loss": 0.2617, "step": 17842 }, { "epoch": 0.8075582711020592, "grad_norm": 0.6062704598820653, "learning_rate": 9.405748402339809e-07, "loss": 0.2894, "step": 17843 }, { "epoch": 0.8076035302104548, "grad_norm": 0.29249875010539206, "learning_rate": 9.401469918430911e-07, "loss": 0.4807, "step": 17844 }, { "epoch": 0.8076487893188504, "grad_norm": 0.5552771925540408, "learning_rate": 9.397192306859737e-07, "loss": 0.253, "step": 17845 }, { "epoch": 0.807694048427246, "grad_norm": 0.729487092010495, "learning_rate": 9.392915567718186e-07, "loss": 0.3286, "step": 17846 }, { "epoch": 0.8077393075356415, "grad_norm": 0.6632461653794297, "learning_rate": 9.388639701098174e-07, "loss": 0.3288, "step": 17847 }, { "epoch": 0.8077845666440371, "grad_norm": 0.2959198050260553, "learning_rate": 9.384364707091559e-07, "loss": 0.4735, "step": 17848 }, { "epoch": 0.8078298257524327, "grad_norm": 0.5870934956027272, "learning_rate": 9.380090585790213e-07, "loss": 0.3435, "step": 17849 }, { "epoch": 0.8078750848608283, "grad_norm": 0.5788613590093816, "learning_rate": 9.375817337285969e-07, "loss": 0.2863, "step": 17850 }, { "epoch": 0.8079203439692239, "grad_norm": 0.6004090232292422, "learning_rate": 9.371544961670625e-07, "loss": 0.2654, "step": 17851 }, { "epoch": 0.8079656030776193, "grad_norm": 0.6542045633090844, "learning_rate": 9.367273459036003e-07, "loss": 0.2952, "step": 17852 }, { "epoch": 0.8080108621860149, "grad_norm": 0.6154991149598072, "learning_rate": 9.363002829473894e-07, "loss": 0.2866, "step": 17853 }, { "epoch": 0.8080561212944105, "grad_norm": 0.6160983703257152, "learning_rate": 9.358733073076048e-07, "loss": 0.2862, "step": 17854 }, { "epoch": 0.8081013804028061, "grad_norm": 0.5935234985164778, "learning_rate": 9.354464189934193e-07, "loss": 0.298, "step": 17855 }, { "epoch": 0.8081466395112016, "grad_norm": 0.6286116231527318, "learning_rate": 9.35019618014007e-07, "loss": 0.3286, "step": 17856 }, { "epoch": 0.8081918986195972, "grad_norm": 0.6033556103336765, "learning_rate": 9.345929043785396e-07, "loss": 0.2455, "step": 17857 }, { "epoch": 0.8082371577279928, "grad_norm": 0.2754283766879807, "learning_rate": 9.341662780961847e-07, "loss": 0.458, "step": 17858 }, { "epoch": 0.8082824168363884, "grad_norm": 0.6388948865050439, "learning_rate": 9.337397391761083e-07, "loss": 0.3176, "step": 17859 }, { "epoch": 0.8083276759447839, "grad_norm": 0.7290291472085741, "learning_rate": 9.333132876274775e-07, "loss": 0.314, "step": 17860 }, { "epoch": 0.8083729350531794, "grad_norm": 0.6223888135457937, "learning_rate": 9.328869234594529e-07, "loss": 0.2904, "step": 17861 }, { "epoch": 0.808418194161575, "grad_norm": 0.681702030465637, "learning_rate": 9.32460646681198e-07, "loss": 0.3566, "step": 17862 }, { "epoch": 0.8084634532699706, "grad_norm": 0.642590939024477, "learning_rate": 9.320344573018719e-07, "loss": 0.2918, "step": 17863 }, { "epoch": 0.8085087123783662, "grad_norm": 0.3022485742167686, "learning_rate": 9.316083553306299e-07, "loss": 0.4925, "step": 17864 }, { "epoch": 0.8085539714867617, "grad_norm": 0.6056913667244028, "learning_rate": 9.311823407766297e-07, "loss": 0.3207, "step": 17865 }, { "epoch": 0.8085992305951573, "grad_norm": 0.6621139643820714, "learning_rate": 9.307564136490255e-07, "loss": 0.3178, "step": 17866 }, { "epoch": 0.8086444897035528, "grad_norm": 0.6056796351091303, "learning_rate": 9.303305739569685e-07, "loss": 0.2781, "step": 17867 }, { "epoch": 0.8086897488119484, "grad_norm": 0.26125384139409213, "learning_rate": 9.299048217096068e-07, "loss": 0.463, "step": 17868 }, { "epoch": 0.8087350079203439, "grad_norm": 0.6179247104927748, "learning_rate": 9.294791569160899e-07, "loss": 0.2742, "step": 17869 }, { "epoch": 0.8087802670287395, "grad_norm": 0.5790561879273974, "learning_rate": 9.290535795855659e-07, "loss": 0.3285, "step": 17870 }, { "epoch": 0.8088255261371351, "grad_norm": 0.25355361987720526, "learning_rate": 9.286280897271777e-07, "loss": 0.4392, "step": 17871 }, { "epoch": 0.8088707852455307, "grad_norm": 0.5659073976736065, "learning_rate": 9.282026873500666e-07, "loss": 0.263, "step": 17872 }, { "epoch": 0.8089160443539263, "grad_norm": 0.5744715963109575, "learning_rate": 9.277773724633749e-07, "loss": 0.3104, "step": 17873 }, { "epoch": 0.8089613034623218, "grad_norm": 0.6116546412618016, "learning_rate": 9.273521450762391e-07, "loss": 0.309, "step": 17874 }, { "epoch": 0.8090065625707173, "grad_norm": 0.5956581699580364, "learning_rate": 9.269270051977991e-07, "loss": 0.3248, "step": 17875 }, { "epoch": 0.8090518216791129, "grad_norm": 0.5953470393565178, "learning_rate": 9.265019528371882e-07, "loss": 0.2871, "step": 17876 }, { "epoch": 0.8090970807875085, "grad_norm": 0.6054083677074037, "learning_rate": 9.260769880035387e-07, "loss": 0.2765, "step": 17877 }, { "epoch": 0.809142339895904, "grad_norm": 0.5646933571107533, "learning_rate": 9.256521107059834e-07, "loss": 0.2642, "step": 17878 }, { "epoch": 0.8091875990042996, "grad_norm": 0.5796948761794014, "learning_rate": 9.25227320953651e-07, "loss": 0.2792, "step": 17879 }, { "epoch": 0.8092328581126952, "grad_norm": 0.5997158177178545, "learning_rate": 9.248026187556674e-07, "loss": 0.2902, "step": 17880 }, { "epoch": 0.8092781172210908, "grad_norm": 0.5903397760622614, "learning_rate": 9.243780041211597e-07, "loss": 0.277, "step": 17881 }, { "epoch": 0.8093233763294863, "grad_norm": 0.6532787133554799, "learning_rate": 9.239534770592529e-07, "loss": 0.2828, "step": 17882 }, { "epoch": 0.8093686354378818, "grad_norm": 0.5876285887382989, "learning_rate": 9.235290375790668e-07, "loss": 0.3298, "step": 17883 }, { "epoch": 0.8094138945462774, "grad_norm": 0.28400911286173425, "learning_rate": 9.231046856897202e-07, "loss": 0.4658, "step": 17884 }, { "epoch": 0.809459153654673, "grad_norm": 0.6034911675444754, "learning_rate": 9.226804214003332e-07, "loss": 0.3213, "step": 17885 }, { "epoch": 0.8095044127630686, "grad_norm": 0.5545357819570826, "learning_rate": 9.222562447200228e-07, "loss": 0.2834, "step": 17886 }, { "epoch": 0.8095496718714641, "grad_norm": 0.5909633921075437, "learning_rate": 9.218321556579013e-07, "loss": 0.3009, "step": 17887 }, { "epoch": 0.8095949309798597, "grad_norm": 0.6095861341205993, "learning_rate": 9.214081542230808e-07, "loss": 0.2812, "step": 17888 }, { "epoch": 0.8096401900882553, "grad_norm": 0.6267122163248784, "learning_rate": 9.209842404246738e-07, "loss": 0.3018, "step": 17889 }, { "epoch": 0.8096854491966509, "grad_norm": 0.6088538323220015, "learning_rate": 9.205604142717866e-07, "loss": 0.3304, "step": 17890 }, { "epoch": 0.8097307083050463, "grad_norm": 0.6242827380667848, "learning_rate": 9.201366757735281e-07, "loss": 0.261, "step": 17891 }, { "epoch": 0.8097759674134419, "grad_norm": 0.686089695766711, "learning_rate": 9.197130249390019e-07, "loss": 0.2722, "step": 17892 }, { "epoch": 0.8098212265218375, "grad_norm": 0.2739949605184704, "learning_rate": 9.192894617773102e-07, "loss": 0.4771, "step": 17893 }, { "epoch": 0.8098664856302331, "grad_norm": 0.5891830152785924, "learning_rate": 9.188659862975552e-07, "loss": 0.2851, "step": 17894 }, { "epoch": 0.8099117447386287, "grad_norm": 0.6435546338845332, "learning_rate": 9.184425985088368e-07, "loss": 0.2699, "step": 17895 }, { "epoch": 0.8099570038470242, "grad_norm": 0.6427501974253429, "learning_rate": 9.180192984202513e-07, "loss": 0.2669, "step": 17896 }, { "epoch": 0.8100022629554198, "grad_norm": 0.6225840811639484, "learning_rate": 9.175960860408934e-07, "loss": 0.2409, "step": 17897 }, { "epoch": 0.8100475220638154, "grad_norm": 0.6270124468852579, "learning_rate": 9.171729613798575e-07, "loss": 0.2685, "step": 17898 }, { "epoch": 0.810092781172211, "grad_norm": 0.6317719814591691, "learning_rate": 9.167499244462358e-07, "loss": 0.3228, "step": 17899 }, { "epoch": 0.8101380402806064, "grad_norm": 0.677980534030642, "learning_rate": 9.163269752491183e-07, "loss": 0.2426, "step": 17900 }, { "epoch": 0.810183299389002, "grad_norm": 0.28151380028711165, "learning_rate": 9.159041137975904e-07, "loss": 0.4515, "step": 17901 }, { "epoch": 0.8102285584973976, "grad_norm": 0.6332702542591043, "learning_rate": 9.154813401007406e-07, "loss": 0.3118, "step": 17902 }, { "epoch": 0.8102738176057932, "grad_norm": 0.6925917382896261, "learning_rate": 9.150586541676515e-07, "loss": 0.312, "step": 17903 }, { "epoch": 0.8103190767141887, "grad_norm": 0.5881584189149481, "learning_rate": 9.146360560074074e-07, "loss": 0.3068, "step": 17904 }, { "epoch": 0.8103643358225843, "grad_norm": 0.6392350598878368, "learning_rate": 9.142135456290868e-07, "loss": 0.2829, "step": 17905 }, { "epoch": 0.8104095949309799, "grad_norm": 0.6827851072609606, "learning_rate": 9.137911230417673e-07, "loss": 0.3119, "step": 17906 }, { "epoch": 0.8104548540393754, "grad_norm": 0.6056204064823583, "learning_rate": 9.133687882545267e-07, "loss": 0.2369, "step": 17907 }, { "epoch": 0.810500113147771, "grad_norm": 0.6214795257359838, "learning_rate": 9.12946541276441e-07, "loss": 0.2816, "step": 17908 }, { "epoch": 0.8105453722561665, "grad_norm": 0.6456268891092399, "learning_rate": 9.125243821165819e-07, "loss": 0.2977, "step": 17909 }, { "epoch": 0.8105906313645621, "grad_norm": 0.5826360599394663, "learning_rate": 9.121023107840188e-07, "loss": 0.283, "step": 17910 }, { "epoch": 0.8106358904729577, "grad_norm": 0.6909731471921312, "learning_rate": 9.116803272878233e-07, "loss": 0.3122, "step": 17911 }, { "epoch": 0.8106811495813533, "grad_norm": 0.6152573399790072, "learning_rate": 9.112584316370615e-07, "loss": 0.2973, "step": 17912 }, { "epoch": 0.8107264086897488, "grad_norm": 0.6196372725926754, "learning_rate": 9.108366238407968e-07, "loss": 0.302, "step": 17913 }, { "epoch": 0.8107716677981444, "grad_norm": 0.6446896084894796, "learning_rate": 9.104149039080939e-07, "loss": 0.2914, "step": 17914 }, { "epoch": 0.81081692690654, "grad_norm": 0.5551934075128154, "learning_rate": 9.099932718480158e-07, "loss": 0.2629, "step": 17915 }, { "epoch": 0.8108621860149355, "grad_norm": 0.6081600900581837, "learning_rate": 9.095717276696214e-07, "loss": 0.2885, "step": 17916 }, { "epoch": 0.810907445123331, "grad_norm": 0.2681156513940687, "learning_rate": 9.091502713819661e-07, "loss": 0.4772, "step": 17917 }, { "epoch": 0.8109527042317266, "grad_norm": 0.2478478171090661, "learning_rate": 9.087289029941088e-07, "loss": 0.4607, "step": 17918 }, { "epoch": 0.8109979633401222, "grad_norm": 0.25789240256355805, "learning_rate": 9.083076225151005e-07, "loss": 0.457, "step": 17919 }, { "epoch": 0.8110432224485178, "grad_norm": 0.6053463787096428, "learning_rate": 9.078864299539963e-07, "loss": 0.2934, "step": 17920 }, { "epoch": 0.8110884815569134, "grad_norm": 0.73569061476308, "learning_rate": 9.074653253198445e-07, "loss": 0.3346, "step": 17921 }, { "epoch": 0.8111337406653089, "grad_norm": 0.6063644964412455, "learning_rate": 9.070443086216924e-07, "loss": 0.3413, "step": 17922 }, { "epoch": 0.8111789997737044, "grad_norm": 0.5351965867554764, "learning_rate": 9.066233798685875e-07, "loss": 0.3067, "step": 17923 }, { "epoch": 0.8112242588821, "grad_norm": 0.28645027424828934, "learning_rate": 9.062025390695756e-07, "loss": 0.4717, "step": 17924 }, { "epoch": 0.8112695179904956, "grad_norm": 0.2566172115969165, "learning_rate": 9.057817862336982e-07, "loss": 0.4457, "step": 17925 }, { "epoch": 0.8113147770988911, "grad_norm": 0.5957504613420535, "learning_rate": 9.053611213699942e-07, "loss": 0.2781, "step": 17926 }, { "epoch": 0.8113600362072867, "grad_norm": 0.6092725014501065, "learning_rate": 9.049405444875042e-07, "loss": 0.3022, "step": 17927 }, { "epoch": 0.8114052953156823, "grad_norm": 0.5947499132955205, "learning_rate": 9.04520055595266e-07, "loss": 0.2739, "step": 17928 }, { "epoch": 0.8114505544240779, "grad_norm": 0.6436976100680032, "learning_rate": 9.040996547023134e-07, "loss": 0.2487, "step": 17929 }, { "epoch": 0.8114958135324735, "grad_norm": 0.5838302765186906, "learning_rate": 9.036793418176786e-07, "loss": 0.3405, "step": 17930 }, { "epoch": 0.8115410726408689, "grad_norm": 0.6307928816587278, "learning_rate": 9.032591169503951e-07, "loss": 0.2652, "step": 17931 }, { "epoch": 0.8115863317492645, "grad_norm": 0.6497305944441155, "learning_rate": 9.028389801094895e-07, "loss": 0.2793, "step": 17932 }, { "epoch": 0.8116315908576601, "grad_norm": 0.5786671075336611, "learning_rate": 9.024189313039922e-07, "loss": 0.2842, "step": 17933 }, { "epoch": 0.8116768499660557, "grad_norm": 0.28544876263977276, "learning_rate": 9.019989705429271e-07, "loss": 0.4457, "step": 17934 }, { "epoch": 0.8117221090744512, "grad_norm": 0.6562782485085401, "learning_rate": 9.015790978353173e-07, "loss": 0.2923, "step": 17935 }, { "epoch": 0.8117673681828468, "grad_norm": 0.6443448934260734, "learning_rate": 9.011593131901852e-07, "loss": 0.3118, "step": 17936 }, { "epoch": 0.8118126272912424, "grad_norm": 0.3076745719943253, "learning_rate": 9.007396166165516e-07, "loss": 0.4967, "step": 17937 }, { "epoch": 0.811857886399638, "grad_norm": 0.6008853865334476, "learning_rate": 9.003200081234342e-07, "loss": 0.3007, "step": 17938 }, { "epoch": 0.8119031455080334, "grad_norm": 0.5958514186274408, "learning_rate": 8.999004877198475e-07, "loss": 0.2712, "step": 17939 }, { "epoch": 0.811948404616429, "grad_norm": 0.604126124888519, "learning_rate": 8.994810554148065e-07, "loss": 0.297, "step": 17940 }, { "epoch": 0.8119936637248246, "grad_norm": 0.5785700048548983, "learning_rate": 8.990617112173261e-07, "loss": 0.2805, "step": 17941 }, { "epoch": 0.8120389228332202, "grad_norm": 0.6076645267963782, "learning_rate": 8.986424551364126e-07, "loss": 0.3081, "step": 17942 }, { "epoch": 0.8120841819416158, "grad_norm": 0.71513808602186, "learning_rate": 8.982232871810759e-07, "loss": 0.2825, "step": 17943 }, { "epoch": 0.8121294410500113, "grad_norm": 0.2832504811945113, "learning_rate": 8.978042073603243e-07, "loss": 0.4603, "step": 17944 }, { "epoch": 0.8121747001584069, "grad_norm": 0.5795502680430666, "learning_rate": 8.97385215683162e-07, "loss": 0.2935, "step": 17945 }, { "epoch": 0.8122199592668025, "grad_norm": 0.6008787860153728, "learning_rate": 8.969663121585892e-07, "loss": 0.2908, "step": 17946 }, { "epoch": 0.812265218375198, "grad_norm": 0.5873590487284043, "learning_rate": 8.965474967956106e-07, "loss": 0.2657, "step": 17947 }, { "epoch": 0.8123104774835935, "grad_norm": 0.6827117507553307, "learning_rate": 8.961287696032217e-07, "loss": 0.3314, "step": 17948 }, { "epoch": 0.8123557365919891, "grad_norm": 0.6190725778935383, "learning_rate": 8.957101305904231e-07, "loss": 0.2584, "step": 17949 }, { "epoch": 0.8124009957003847, "grad_norm": 0.635167529439559, "learning_rate": 8.95291579766207e-07, "loss": 0.3212, "step": 17950 }, { "epoch": 0.8124462548087803, "grad_norm": 0.6384375583049835, "learning_rate": 8.948731171395697e-07, "loss": 0.295, "step": 17951 }, { "epoch": 0.8124915139171758, "grad_norm": 0.6788234412119452, "learning_rate": 8.944547427195e-07, "loss": 0.3228, "step": 17952 }, { "epoch": 0.8125367730255714, "grad_norm": 0.5621391389443448, "learning_rate": 8.940364565149895e-07, "loss": 0.2866, "step": 17953 }, { "epoch": 0.812582032133967, "grad_norm": 0.641359077850069, "learning_rate": 8.936182585350256e-07, "loss": 0.2987, "step": 17954 }, { "epoch": 0.8126272912423625, "grad_norm": 0.6266553026836502, "learning_rate": 8.932001487885916e-07, "loss": 0.2646, "step": 17955 }, { "epoch": 0.8126725503507581, "grad_norm": 0.5493797054964148, "learning_rate": 8.927821272846737e-07, "loss": 0.3098, "step": 17956 }, { "epoch": 0.8127178094591536, "grad_norm": 0.7846153097565445, "learning_rate": 8.923641940322547e-07, "loss": 0.2677, "step": 17957 }, { "epoch": 0.8127630685675492, "grad_norm": 0.5725269933339863, "learning_rate": 8.919463490403141e-07, "loss": 0.299, "step": 17958 }, { "epoch": 0.8128083276759448, "grad_norm": 0.5885140435649727, "learning_rate": 8.915285923178274e-07, "loss": 0.2828, "step": 17959 }, { "epoch": 0.8128535867843404, "grad_norm": 0.5613966687381595, "learning_rate": 8.911109238737748e-07, "loss": 0.2552, "step": 17960 }, { "epoch": 0.8128988458927359, "grad_norm": 0.5813063519597989, "learning_rate": 8.906933437171278e-07, "loss": 0.2803, "step": 17961 }, { "epoch": 0.8129441050011315, "grad_norm": 0.2914511549712682, "learning_rate": 8.90275851856861e-07, "loss": 0.4879, "step": 17962 }, { "epoch": 0.812989364109527, "grad_norm": 0.5898202553287785, "learning_rate": 8.89858448301944e-07, "loss": 0.2995, "step": 17963 }, { "epoch": 0.8130346232179226, "grad_norm": 0.6590792125265483, "learning_rate": 8.894411330613445e-07, "loss": 0.3256, "step": 17964 }, { "epoch": 0.8130798823263182, "grad_norm": 0.27424966293835146, "learning_rate": 8.890239061440303e-07, "loss": 0.4525, "step": 17965 }, { "epoch": 0.8131251414347137, "grad_norm": 0.5828946147301375, "learning_rate": 8.886067675589682e-07, "loss": 0.2955, "step": 17966 }, { "epoch": 0.8131704005431093, "grad_norm": 0.5702092181704951, "learning_rate": 8.881897173151188e-07, "loss": 0.315, "step": 17967 }, { "epoch": 0.8132156596515049, "grad_norm": 0.2968341038768363, "learning_rate": 8.877727554214432e-07, "loss": 0.4809, "step": 17968 }, { "epoch": 0.8132609187599005, "grad_norm": 0.2735751270105653, "learning_rate": 8.87355881886901e-07, "loss": 0.4748, "step": 17969 }, { "epoch": 0.813306177868296, "grad_norm": 0.5959178434447817, "learning_rate": 8.869390967204527e-07, "loss": 0.2481, "step": 17970 }, { "epoch": 0.8133514369766915, "grad_norm": 0.581536219848497, "learning_rate": 8.865223999310485e-07, "loss": 0.2885, "step": 17971 }, { "epoch": 0.8133966960850871, "grad_norm": 0.25640784722105925, "learning_rate": 8.861057915276438e-07, "loss": 0.4554, "step": 17972 }, { "epoch": 0.8134419551934827, "grad_norm": 0.6235145197841093, "learning_rate": 8.856892715191929e-07, "loss": 0.2738, "step": 17973 }, { "epoch": 0.8134872143018782, "grad_norm": 0.2672252289127243, "learning_rate": 8.852728399146427e-07, "loss": 0.4696, "step": 17974 }, { "epoch": 0.8135324734102738, "grad_norm": 0.7879556336384355, "learning_rate": 8.848564967229407e-07, "loss": 0.2915, "step": 17975 }, { "epoch": 0.8135777325186694, "grad_norm": 0.6113467717808888, "learning_rate": 8.844402419530346e-07, "loss": 0.25, "step": 17976 }, { "epoch": 0.813622991627065, "grad_norm": 0.6012089521551512, "learning_rate": 8.840240756138673e-07, "loss": 0.2584, "step": 17977 }, { "epoch": 0.8136682507354606, "grad_norm": 0.6901266025938774, "learning_rate": 8.836079977143819e-07, "loss": 0.3234, "step": 17978 }, { "epoch": 0.813713509843856, "grad_norm": 0.266879344823511, "learning_rate": 8.831920082635175e-07, "loss": 0.4446, "step": 17979 }, { "epoch": 0.8137587689522516, "grad_norm": 0.2728346231036277, "learning_rate": 8.82776107270214e-07, "loss": 0.458, "step": 17980 }, { "epoch": 0.8138040280606472, "grad_norm": 0.5920782461326178, "learning_rate": 8.823602947434056e-07, "loss": 0.2792, "step": 17981 }, { "epoch": 0.8138492871690428, "grad_norm": 0.6327987553773374, "learning_rate": 8.819445706920293e-07, "loss": 0.3333, "step": 17982 }, { "epoch": 0.8138945462774383, "grad_norm": 0.6320931870141716, "learning_rate": 8.815289351250166e-07, "loss": 0.2912, "step": 17983 }, { "epoch": 0.8139398053858339, "grad_norm": 0.6171374305864387, "learning_rate": 8.811133880512967e-07, "loss": 0.2914, "step": 17984 }, { "epoch": 0.8139850644942295, "grad_norm": 0.5839525656588083, "learning_rate": 8.806979294798001e-07, "loss": 0.3362, "step": 17985 }, { "epoch": 0.8140303236026251, "grad_norm": 0.6262579128109615, "learning_rate": 8.802825594194553e-07, "loss": 0.2797, "step": 17986 }, { "epoch": 0.8140755827110205, "grad_norm": 0.6644319477416321, "learning_rate": 8.798672778791851e-07, "loss": 0.2652, "step": 17987 }, { "epoch": 0.8141208418194161, "grad_norm": 0.5892605327097258, "learning_rate": 8.794520848679117e-07, "loss": 0.3056, "step": 17988 }, { "epoch": 0.8141661009278117, "grad_norm": 0.651773532486608, "learning_rate": 8.790369803945586e-07, "loss": 0.3171, "step": 17989 }, { "epoch": 0.8142113600362073, "grad_norm": 0.6321790753968162, "learning_rate": 8.786219644680433e-07, "loss": 0.3445, "step": 17990 }, { "epoch": 0.8142566191446029, "grad_norm": 0.7756419867480293, "learning_rate": 8.782070370972856e-07, "loss": 0.308, "step": 17991 }, { "epoch": 0.8143018782529984, "grad_norm": 0.6363231400815615, "learning_rate": 8.777921982911996e-07, "loss": 0.2983, "step": 17992 }, { "epoch": 0.814347137361394, "grad_norm": 0.6368075871813724, "learning_rate": 8.773774480586972e-07, "loss": 0.2611, "step": 17993 }, { "epoch": 0.8143923964697896, "grad_norm": 0.6556289472461144, "learning_rate": 8.769627864086922e-07, "loss": 0.3046, "step": 17994 }, { "epoch": 0.8144376555781851, "grad_norm": 0.2811520282166436, "learning_rate": 8.765482133500952e-07, "loss": 0.4674, "step": 17995 }, { "epoch": 0.8144829146865806, "grad_norm": 0.6395634551090364, "learning_rate": 8.761337288918126e-07, "loss": 0.3681, "step": 17996 }, { "epoch": 0.8145281737949762, "grad_norm": 0.5727505237743223, "learning_rate": 8.757193330427494e-07, "loss": 0.2535, "step": 17997 }, { "epoch": 0.8145734329033718, "grad_norm": 0.6768003967586744, "learning_rate": 8.753050258118112e-07, "loss": 0.3083, "step": 17998 }, { "epoch": 0.8146186920117674, "grad_norm": 0.6193381144097816, "learning_rate": 8.748908072079021e-07, "loss": 0.3023, "step": 17999 }, { "epoch": 0.814663951120163, "grad_norm": 0.3014669651232553, "learning_rate": 8.744766772399182e-07, "loss": 0.4727, "step": 18000 }, { "epoch": 0.8147092102285585, "grad_norm": 0.29125316456076983, "learning_rate": 8.740626359167598e-07, "loss": 0.463, "step": 18001 }, { "epoch": 0.8147544693369541, "grad_norm": 0.5728738895218383, "learning_rate": 8.736486832473246e-07, "loss": 0.2559, "step": 18002 }, { "epoch": 0.8147997284453496, "grad_norm": 0.592202199487586, "learning_rate": 8.732348192405061e-07, "loss": 0.2874, "step": 18003 }, { "epoch": 0.8148449875537452, "grad_norm": 0.6240452002246657, "learning_rate": 8.72821043905196e-07, "loss": 0.3314, "step": 18004 }, { "epoch": 0.8148902466621407, "grad_norm": 0.27107133608396483, "learning_rate": 8.724073572502867e-07, "loss": 0.4812, "step": 18005 }, { "epoch": 0.8149355057705363, "grad_norm": 0.26570821313541987, "learning_rate": 8.719937592846655e-07, "loss": 0.4683, "step": 18006 }, { "epoch": 0.8149807648789319, "grad_norm": 0.6297787448641845, "learning_rate": 8.715802500172215e-07, "loss": 0.3069, "step": 18007 }, { "epoch": 0.8150260239873275, "grad_norm": 1.1029666181074564, "learning_rate": 8.71166829456837e-07, "loss": 0.2747, "step": 18008 }, { "epoch": 0.815071283095723, "grad_norm": 0.6509827180821017, "learning_rate": 8.707534976123982e-07, "loss": 0.2896, "step": 18009 }, { "epoch": 0.8151165422041186, "grad_norm": 0.6877266774259756, "learning_rate": 8.70340254492783e-07, "loss": 0.3169, "step": 18010 }, { "epoch": 0.8151618013125141, "grad_norm": 0.6522964469108322, "learning_rate": 8.699271001068737e-07, "loss": 0.3197, "step": 18011 }, { "epoch": 0.8152070604209097, "grad_norm": 0.607681550138856, "learning_rate": 8.695140344635472e-07, "loss": 0.3409, "step": 18012 }, { "epoch": 0.8152523195293053, "grad_norm": 0.28011214100128695, "learning_rate": 8.691010575716763e-07, "loss": 0.4695, "step": 18013 }, { "epoch": 0.8152975786377008, "grad_norm": 0.6319478865347986, "learning_rate": 8.686881694401366e-07, "loss": 0.3129, "step": 18014 }, { "epoch": 0.8153428377460964, "grad_norm": 0.6545870615655542, "learning_rate": 8.682753700778013e-07, "loss": 0.2748, "step": 18015 }, { "epoch": 0.815388096854492, "grad_norm": 0.5383505723290435, "learning_rate": 8.678626594935385e-07, "loss": 0.2814, "step": 18016 }, { "epoch": 0.8154333559628876, "grad_norm": 0.6287309199313841, "learning_rate": 8.674500376962153e-07, "loss": 0.3214, "step": 18017 }, { "epoch": 0.815478615071283, "grad_norm": 0.6113330668405157, "learning_rate": 8.670375046946999e-07, "loss": 0.2629, "step": 18018 }, { "epoch": 0.8155238741796786, "grad_norm": 0.6091712904104718, "learning_rate": 8.666250604978532e-07, "loss": 0.2794, "step": 18019 }, { "epoch": 0.8155691332880742, "grad_norm": 0.5902500448470669, "learning_rate": 8.662127051145414e-07, "loss": 0.2868, "step": 18020 }, { "epoch": 0.8156143923964698, "grad_norm": 0.852530710451694, "learning_rate": 8.658004385536207e-07, "loss": 0.3043, "step": 18021 }, { "epoch": 0.8156596515048653, "grad_norm": 0.662962701091303, "learning_rate": 8.653882608239528e-07, "loss": 0.287, "step": 18022 }, { "epoch": 0.8157049106132609, "grad_norm": 0.6398341194492669, "learning_rate": 8.649761719343913e-07, "loss": 0.2593, "step": 18023 }, { "epoch": 0.8157501697216565, "grad_norm": 0.6288687742255715, "learning_rate": 8.645641718937936e-07, "loss": 0.3002, "step": 18024 }, { "epoch": 0.8157954288300521, "grad_norm": 0.6466427119006023, "learning_rate": 8.641522607110108e-07, "loss": 0.2877, "step": 18025 }, { "epoch": 0.8158406879384477, "grad_norm": 0.5266603977129436, "learning_rate": 8.637404383948922e-07, "loss": 0.2459, "step": 18026 }, { "epoch": 0.8158859470468431, "grad_norm": 0.5882476530914964, "learning_rate": 8.633287049542882e-07, "loss": 0.2877, "step": 18027 }, { "epoch": 0.8159312061552387, "grad_norm": 0.6191750205576243, "learning_rate": 8.62917060398048e-07, "loss": 0.3038, "step": 18028 }, { "epoch": 0.8159764652636343, "grad_norm": 0.5662399271037007, "learning_rate": 8.625055047350117e-07, "loss": 0.2968, "step": 18029 }, { "epoch": 0.8160217243720299, "grad_norm": 0.6095585560534345, "learning_rate": 8.620940379740245e-07, "loss": 0.2823, "step": 18030 }, { "epoch": 0.8160669834804254, "grad_norm": 0.658250569960579, "learning_rate": 8.616826601239292e-07, "loss": 0.2968, "step": 18031 }, { "epoch": 0.816112242588821, "grad_norm": 0.6742898954206353, "learning_rate": 8.612713711935633e-07, "loss": 0.3158, "step": 18032 }, { "epoch": 0.8161575016972166, "grad_norm": 0.5886347097456335, "learning_rate": 8.608601711917635e-07, "loss": 0.2991, "step": 18033 }, { "epoch": 0.8162027608056122, "grad_norm": 0.5730933904332982, "learning_rate": 8.60449060127368e-07, "loss": 0.277, "step": 18034 }, { "epoch": 0.8162480199140076, "grad_norm": 0.314793965110205, "learning_rate": 8.600380380092066e-07, "loss": 0.4785, "step": 18035 }, { "epoch": 0.8162932790224032, "grad_norm": 0.6019555970834, "learning_rate": 8.596271048461141e-07, "loss": 0.2944, "step": 18036 }, { "epoch": 0.8163385381307988, "grad_norm": 0.6373598092176254, "learning_rate": 8.592162606469179e-07, "loss": 0.3038, "step": 18037 }, { "epoch": 0.8163837972391944, "grad_norm": 0.2930173461656693, "learning_rate": 8.588055054204481e-07, "loss": 0.4772, "step": 18038 }, { "epoch": 0.81642905634759, "grad_norm": 0.5853816249810239, "learning_rate": 8.583948391755281e-07, "loss": 0.297, "step": 18039 }, { "epoch": 0.8164743154559855, "grad_norm": 0.6216333902770305, "learning_rate": 8.579842619209844e-07, "loss": 0.3457, "step": 18040 }, { "epoch": 0.8165195745643811, "grad_norm": 0.5844381819470246, "learning_rate": 8.575737736656376e-07, "loss": 0.2948, "step": 18041 }, { "epoch": 0.8165648336727767, "grad_norm": 0.5545450711342551, "learning_rate": 8.571633744183061e-07, "loss": 0.2926, "step": 18042 }, { "epoch": 0.8166100927811722, "grad_norm": 0.27769105334571487, "learning_rate": 8.567530641878103e-07, "loss": 0.4673, "step": 18043 }, { "epoch": 0.8166553518895677, "grad_norm": 0.6955840626435977, "learning_rate": 8.563428429829674e-07, "loss": 0.3118, "step": 18044 }, { "epoch": 0.8167006109979633, "grad_norm": 0.7041439251924754, "learning_rate": 8.559327108125909e-07, "loss": 0.3299, "step": 18045 }, { "epoch": 0.8167458701063589, "grad_norm": 0.6386908175674691, "learning_rate": 8.555226676854911e-07, "loss": 0.2401, "step": 18046 }, { "epoch": 0.8167911292147545, "grad_norm": 0.2727069733305922, "learning_rate": 8.55112713610482e-07, "loss": 0.4705, "step": 18047 }, { "epoch": 0.8168363883231501, "grad_norm": 0.2700845917645277, "learning_rate": 8.547028485963693e-07, "loss": 0.4624, "step": 18048 }, { "epoch": 0.8168816474315456, "grad_norm": 0.6460354403133349, "learning_rate": 8.542930726519622e-07, "loss": 0.3294, "step": 18049 }, { "epoch": 0.8169269065399412, "grad_norm": 0.6197181489036633, "learning_rate": 8.538833857860635e-07, "loss": 0.2709, "step": 18050 }, { "epoch": 0.8169721656483367, "grad_norm": 0.735925769436235, "learning_rate": 8.534737880074778e-07, "loss": 0.2896, "step": 18051 }, { "epoch": 0.8170174247567323, "grad_norm": 0.6346324401263916, "learning_rate": 8.530642793250044e-07, "loss": 0.2803, "step": 18052 }, { "epoch": 0.8170626838651278, "grad_norm": 0.5409400804031699, "learning_rate": 8.526548597474444e-07, "loss": 0.2843, "step": 18053 }, { "epoch": 0.8171079429735234, "grad_norm": 0.5605393417963239, "learning_rate": 8.522455292835935e-07, "loss": 0.2799, "step": 18054 }, { "epoch": 0.817153202081919, "grad_norm": 0.6013058872868924, "learning_rate": 8.518362879422465e-07, "loss": 0.3166, "step": 18055 }, { "epoch": 0.8171984611903146, "grad_norm": 0.5732754320023369, "learning_rate": 8.514271357321974e-07, "loss": 0.3177, "step": 18056 }, { "epoch": 0.8172437202987101, "grad_norm": 0.6212655502781934, "learning_rate": 8.510180726622392e-07, "loss": 0.2924, "step": 18057 }, { "epoch": 0.8172889794071057, "grad_norm": 0.30100841815431545, "learning_rate": 8.506090987411603e-07, "loss": 0.4635, "step": 18058 }, { "epoch": 0.8173342385155012, "grad_norm": 0.6259434935033461, "learning_rate": 8.50200213977746e-07, "loss": 0.3195, "step": 18059 }, { "epoch": 0.8173794976238968, "grad_norm": 0.611446509904659, "learning_rate": 8.49791418380786e-07, "loss": 0.2448, "step": 18060 }, { "epoch": 0.8174247567322924, "grad_norm": 0.6051093462489923, "learning_rate": 8.493827119590615e-07, "loss": 0.2709, "step": 18061 }, { "epoch": 0.8174700158406879, "grad_norm": 0.6503222997170561, "learning_rate": 8.489740947213537e-07, "loss": 0.328, "step": 18062 }, { "epoch": 0.8175152749490835, "grad_norm": 0.6214017202106538, "learning_rate": 8.485655666764448e-07, "loss": 0.2965, "step": 18063 }, { "epoch": 0.8175605340574791, "grad_norm": 0.5921564688005043, "learning_rate": 8.481571278331108e-07, "loss": 0.2627, "step": 18064 }, { "epoch": 0.8176057931658747, "grad_norm": 0.6325059930394564, "learning_rate": 8.477487782001298e-07, "loss": 0.2888, "step": 18065 }, { "epoch": 0.8176510522742702, "grad_norm": 0.579900367318165, "learning_rate": 8.473405177862737e-07, "loss": 0.3454, "step": 18066 }, { "epoch": 0.8176963113826657, "grad_norm": 0.51058712141538, "learning_rate": 8.46932346600317e-07, "loss": 0.4872, "step": 18067 }, { "epoch": 0.8177415704910613, "grad_norm": 0.285936508840432, "learning_rate": 8.46524264651028e-07, "loss": 0.5006, "step": 18068 }, { "epoch": 0.8177868295994569, "grad_norm": 0.6130167016167806, "learning_rate": 8.461162719471772e-07, "loss": 0.2589, "step": 18069 }, { "epoch": 0.8178320887078524, "grad_norm": 0.5926511878837478, "learning_rate": 8.457083684975298e-07, "loss": 0.2764, "step": 18070 }, { "epoch": 0.817877347816248, "grad_norm": 0.5814554261878402, "learning_rate": 8.453005543108501e-07, "loss": 0.3133, "step": 18071 }, { "epoch": 0.8179226069246436, "grad_norm": 0.5453222644749728, "learning_rate": 8.448928293959007e-07, "loss": 0.2403, "step": 18072 }, { "epoch": 0.8179678660330392, "grad_norm": 0.6616303384602777, "learning_rate": 8.444851937614446e-07, "loss": 0.3076, "step": 18073 }, { "epoch": 0.8180131251414348, "grad_norm": 0.2775563857111059, "learning_rate": 8.440776474162388e-07, "loss": 0.4685, "step": 18074 }, { "epoch": 0.8180583842498302, "grad_norm": 0.5854024135886258, "learning_rate": 8.436701903690392e-07, "loss": 0.2993, "step": 18075 }, { "epoch": 0.8181036433582258, "grad_norm": 0.6477255114728717, "learning_rate": 8.432628226286032e-07, "loss": 0.3135, "step": 18076 }, { "epoch": 0.8181489024666214, "grad_norm": 0.27798679897661827, "learning_rate": 8.428555442036812e-07, "loss": 0.4663, "step": 18077 }, { "epoch": 0.818194161575017, "grad_norm": 0.5661684252041219, "learning_rate": 8.424483551030277e-07, "loss": 0.3152, "step": 18078 }, { "epoch": 0.8182394206834125, "grad_norm": 0.616865906589501, "learning_rate": 8.420412553353885e-07, "loss": 0.2672, "step": 18079 }, { "epoch": 0.8182846797918081, "grad_norm": 0.2682258287041038, "learning_rate": 8.416342449095138e-07, "loss": 0.4634, "step": 18080 }, { "epoch": 0.8183299389002037, "grad_norm": 0.6944034869375798, "learning_rate": 8.412273238341462e-07, "loss": 0.306, "step": 18081 }, { "epoch": 0.8183751980085993, "grad_norm": 0.632263742360841, "learning_rate": 8.408204921180324e-07, "loss": 0.28, "step": 18082 }, { "epoch": 0.8184204571169948, "grad_norm": 0.626627799718189, "learning_rate": 8.404137497699122e-07, "loss": 0.2901, "step": 18083 }, { "epoch": 0.8184657162253903, "grad_norm": 0.6525676731594316, "learning_rate": 8.400070967985241e-07, "loss": 0.3531, "step": 18084 }, { "epoch": 0.8185109753337859, "grad_norm": 0.7172573954857759, "learning_rate": 8.396005332126068e-07, "loss": 0.3061, "step": 18085 }, { "epoch": 0.8185562344421815, "grad_norm": 0.2704532318775338, "learning_rate": 8.391940590208975e-07, "loss": 0.4708, "step": 18086 }, { "epoch": 0.8186014935505771, "grad_norm": 0.24641823451323808, "learning_rate": 8.387876742321294e-07, "loss": 0.4428, "step": 18087 }, { "epoch": 0.8186467526589726, "grad_norm": 0.6891107336428738, "learning_rate": 8.383813788550326e-07, "loss": 0.3161, "step": 18088 }, { "epoch": 0.8186920117673682, "grad_norm": 0.6139209157814352, "learning_rate": 8.379751728983399e-07, "loss": 0.3436, "step": 18089 }, { "epoch": 0.8187372708757638, "grad_norm": 0.595071574398606, "learning_rate": 8.375690563707761e-07, "loss": 0.2992, "step": 18090 }, { "epoch": 0.8187825299841593, "grad_norm": 0.770051033788159, "learning_rate": 8.371630292810712e-07, "loss": 0.2521, "step": 18091 }, { "epoch": 0.8188277890925548, "grad_norm": 1.1735476464817132, "learning_rate": 8.367570916379464e-07, "loss": 0.3236, "step": 18092 }, { "epoch": 0.8188730482009504, "grad_norm": 0.7713069831108617, "learning_rate": 8.363512434501264e-07, "loss": 0.3005, "step": 18093 }, { "epoch": 0.818918307309346, "grad_norm": 0.2847719199034834, "learning_rate": 8.359454847263293e-07, "loss": 0.4746, "step": 18094 }, { "epoch": 0.8189635664177416, "grad_norm": 0.552169758517861, "learning_rate": 8.355398154752759e-07, "loss": 0.2622, "step": 18095 }, { "epoch": 0.8190088255261372, "grad_norm": 0.6249685361362285, "learning_rate": 8.351342357056818e-07, "loss": 0.2641, "step": 18096 }, { "epoch": 0.8190540846345327, "grad_norm": 0.6003519390627243, "learning_rate": 8.347287454262603e-07, "loss": 0.265, "step": 18097 }, { "epoch": 0.8190993437429283, "grad_norm": 0.5991625551143557, "learning_rate": 8.343233446457272e-07, "loss": 0.2519, "step": 18098 }, { "epoch": 0.8191446028513238, "grad_norm": 0.2810411008882779, "learning_rate": 8.339180333727909e-07, "loss": 0.4893, "step": 18099 }, { "epoch": 0.8191898619597194, "grad_norm": 0.5922428037797257, "learning_rate": 8.335128116161595e-07, "loss": 0.314, "step": 18100 }, { "epoch": 0.8192351210681149, "grad_norm": 0.7208442527501777, "learning_rate": 8.331076793845422e-07, "loss": 0.2865, "step": 18101 }, { "epoch": 0.8192803801765105, "grad_norm": 0.2829411112652835, "learning_rate": 8.327026366866437e-07, "loss": 0.4443, "step": 18102 }, { "epoch": 0.8193256392849061, "grad_norm": 0.6211237231506862, "learning_rate": 8.322976835311669e-07, "loss": 0.292, "step": 18103 }, { "epoch": 0.8193708983933017, "grad_norm": 0.26641329034091676, "learning_rate": 8.318928199268117e-07, "loss": 0.4805, "step": 18104 }, { "epoch": 0.8194161575016972, "grad_norm": 0.5954674577374436, "learning_rate": 8.314880458822794e-07, "loss": 0.3213, "step": 18105 }, { "epoch": 0.8194614166100928, "grad_norm": 0.6591766551015656, "learning_rate": 8.310833614062652e-07, "loss": 0.3267, "step": 18106 }, { "epoch": 0.8195066757184883, "grad_norm": 0.634582558619486, "learning_rate": 8.306787665074673e-07, "loss": 0.2646, "step": 18107 }, { "epoch": 0.8195519348268839, "grad_norm": 0.591732723888874, "learning_rate": 8.302742611945758e-07, "loss": 0.2812, "step": 18108 }, { "epoch": 0.8195971939352795, "grad_norm": 0.5876345405105374, "learning_rate": 8.298698454762854e-07, "loss": 0.2814, "step": 18109 }, { "epoch": 0.819642453043675, "grad_norm": 0.2784159769170307, "learning_rate": 8.294655193612838e-07, "loss": 0.4747, "step": 18110 }, { "epoch": 0.8196877121520706, "grad_norm": 0.6053642942795595, "learning_rate": 8.2906128285826e-07, "loss": 0.3417, "step": 18111 }, { "epoch": 0.8197329712604662, "grad_norm": 0.3103793994779556, "learning_rate": 8.286571359758993e-07, "loss": 0.4722, "step": 18112 }, { "epoch": 0.8197782303688618, "grad_norm": 0.6863803379605348, "learning_rate": 8.282530787228848e-07, "loss": 0.2578, "step": 18113 }, { "epoch": 0.8198234894772573, "grad_norm": 0.286499942924758, "learning_rate": 8.278491111078984e-07, "loss": 0.4758, "step": 18114 }, { "epoch": 0.8198687485856528, "grad_norm": 0.28709485782608885, "learning_rate": 8.274452331396221e-07, "loss": 0.4866, "step": 18115 }, { "epoch": 0.8199140076940484, "grad_norm": 0.2764893701502648, "learning_rate": 8.270414448267333e-07, "loss": 0.4694, "step": 18116 }, { "epoch": 0.819959266802444, "grad_norm": 0.8317042355162554, "learning_rate": 8.266377461779057e-07, "loss": 0.3021, "step": 18117 }, { "epoch": 0.8200045259108396, "grad_norm": 0.5772234597460723, "learning_rate": 8.262341372018168e-07, "loss": 0.2858, "step": 18118 }, { "epoch": 0.8200497850192351, "grad_norm": 0.5982028186899865, "learning_rate": 8.258306179071368e-07, "loss": 0.3145, "step": 18119 }, { "epoch": 0.8200950441276307, "grad_norm": 0.6180362368064066, "learning_rate": 8.254271883025377e-07, "loss": 0.2632, "step": 18120 }, { "epoch": 0.8201403032360263, "grad_norm": 0.6745805870388583, "learning_rate": 8.250238483966855e-07, "loss": 0.2411, "step": 18121 }, { "epoch": 0.8201855623444219, "grad_norm": 0.5797029407307055, "learning_rate": 8.246205981982503e-07, "loss": 0.3088, "step": 18122 }, { "epoch": 0.8202308214528173, "grad_norm": 0.5830337703042072, "learning_rate": 8.242174377158929e-07, "loss": 0.2584, "step": 18123 }, { "epoch": 0.8202760805612129, "grad_norm": 0.8578833069169375, "learning_rate": 8.238143669582794e-07, "loss": 0.348, "step": 18124 }, { "epoch": 0.8203213396696085, "grad_norm": 0.5814887271750685, "learning_rate": 8.234113859340687e-07, "loss": 0.2835, "step": 18125 }, { "epoch": 0.8203665987780041, "grad_norm": 0.994467368324847, "learning_rate": 8.23008494651919e-07, "loss": 0.3162, "step": 18126 }, { "epoch": 0.8204118578863996, "grad_norm": 0.6106514026492696, "learning_rate": 8.226056931204879e-07, "loss": 0.2825, "step": 18127 }, { "epoch": 0.8204571169947952, "grad_norm": 0.6112142660925025, "learning_rate": 8.222029813484333e-07, "loss": 0.3031, "step": 18128 }, { "epoch": 0.8205023761031908, "grad_norm": 0.6635302954408059, "learning_rate": 8.218003593444029e-07, "loss": 0.3145, "step": 18129 }, { "epoch": 0.8205476352115864, "grad_norm": 0.6091885895922927, "learning_rate": 8.213978271170503e-07, "loss": 0.2666, "step": 18130 }, { "epoch": 0.820592894319982, "grad_norm": 0.6142540711638156, "learning_rate": 8.209953846750257e-07, "loss": 0.3235, "step": 18131 }, { "epoch": 0.8206381534283774, "grad_norm": 0.32349620026022546, "learning_rate": 8.205930320269762e-07, "loss": 0.4826, "step": 18132 }, { "epoch": 0.820683412536773, "grad_norm": 0.6565366682003055, "learning_rate": 8.201907691815448e-07, "loss": 0.3094, "step": 18133 }, { "epoch": 0.8207286716451686, "grad_norm": 1.3502509981198274, "learning_rate": 8.197885961473773e-07, "loss": 0.3032, "step": 18134 }, { "epoch": 0.8207739307535642, "grad_norm": 0.5785720017965967, "learning_rate": 8.193865129331136e-07, "loss": 0.2772, "step": 18135 }, { "epoch": 0.8208191898619597, "grad_norm": 0.28259587502298983, "learning_rate": 8.18984519547395e-07, "loss": 0.4586, "step": 18136 }, { "epoch": 0.8208644489703553, "grad_norm": 0.5966135804840258, "learning_rate": 8.18582615998857e-07, "loss": 0.2603, "step": 18137 }, { "epoch": 0.8209097080787509, "grad_norm": 0.6561140381210594, "learning_rate": 8.181808022961374e-07, "loss": 0.3201, "step": 18138 }, { "epoch": 0.8209549671871464, "grad_norm": 0.6647527581768201, "learning_rate": 8.177790784478679e-07, "loss": 0.2884, "step": 18139 }, { "epoch": 0.8210002262955419, "grad_norm": 0.24971824884844132, "learning_rate": 8.173774444626819e-07, "loss": 0.4565, "step": 18140 }, { "epoch": 0.8210454854039375, "grad_norm": 0.6438764482155982, "learning_rate": 8.169759003492095e-07, "loss": 0.3287, "step": 18141 }, { "epoch": 0.8210907445123331, "grad_norm": 0.6143142139977565, "learning_rate": 8.165744461160763e-07, "loss": 0.3141, "step": 18142 }, { "epoch": 0.8211360036207287, "grad_norm": 0.25061996236946416, "learning_rate": 8.161730817719094e-07, "loss": 0.4639, "step": 18143 }, { "epoch": 0.8211812627291243, "grad_norm": 0.26770998240193866, "learning_rate": 8.157718073253351e-07, "loss": 0.461, "step": 18144 }, { "epoch": 0.8212265218375198, "grad_norm": 0.6405285204560663, "learning_rate": 8.153706227849734e-07, "loss": 0.2556, "step": 18145 }, { "epoch": 0.8212717809459154, "grad_norm": 0.637428253048071, "learning_rate": 8.149695281594438e-07, "loss": 0.2818, "step": 18146 }, { "epoch": 0.8213170400543109, "grad_norm": 0.6201304150041989, "learning_rate": 8.145685234573675e-07, "loss": 0.3443, "step": 18147 }, { "epoch": 0.8213622991627065, "grad_norm": 0.5821698916057503, "learning_rate": 8.141676086873574e-07, "loss": 0.3329, "step": 18148 }, { "epoch": 0.821407558271102, "grad_norm": 0.2703313986797029, "learning_rate": 8.137667838580304e-07, "loss": 0.4701, "step": 18149 }, { "epoch": 0.8214528173794976, "grad_norm": 0.5871374932622914, "learning_rate": 8.13366048977997e-07, "loss": 0.252, "step": 18150 }, { "epoch": 0.8214980764878932, "grad_norm": 0.6248557357627881, "learning_rate": 8.12965404055871e-07, "loss": 0.3217, "step": 18151 }, { "epoch": 0.8215433355962888, "grad_norm": 0.6547924784275919, "learning_rate": 8.125648491002569e-07, "loss": 0.2855, "step": 18152 }, { "epoch": 0.8215885947046844, "grad_norm": 0.6527228580605702, "learning_rate": 8.121643841197652e-07, "loss": 0.29, "step": 18153 }, { "epoch": 0.8216338538130799, "grad_norm": 0.6755647792905147, "learning_rate": 8.117640091229984e-07, "loss": 0.3293, "step": 18154 }, { "epoch": 0.8216791129214754, "grad_norm": 0.5999022378057997, "learning_rate": 8.11363724118559e-07, "loss": 0.2874, "step": 18155 }, { "epoch": 0.821724372029871, "grad_norm": 0.6007954263800855, "learning_rate": 8.109635291150492e-07, "loss": 0.252, "step": 18156 }, { "epoch": 0.8217696311382666, "grad_norm": 0.7865647067064967, "learning_rate": 8.105634241210692e-07, "loss": 0.3278, "step": 18157 }, { "epoch": 0.8218148902466621, "grad_norm": 0.27734227101871806, "learning_rate": 8.101634091452121e-07, "loss": 0.4878, "step": 18158 }, { "epoch": 0.8218601493550577, "grad_norm": 0.5600608454366888, "learning_rate": 8.097634841960756e-07, "loss": 0.2536, "step": 18159 }, { "epoch": 0.8219054084634533, "grad_norm": 0.623917531673197, "learning_rate": 8.093636492822532e-07, "loss": 0.3144, "step": 18160 }, { "epoch": 0.8219506675718489, "grad_norm": 0.623452221799919, "learning_rate": 8.089639044123354e-07, "loss": 0.2931, "step": 18161 }, { "epoch": 0.8219959266802443, "grad_norm": 0.6132492355170043, "learning_rate": 8.085642495949108e-07, "loss": 0.2827, "step": 18162 }, { "epoch": 0.8220411857886399, "grad_norm": 0.6219717134997269, "learning_rate": 8.081646848385671e-07, "loss": 0.3229, "step": 18163 }, { "epoch": 0.8220864448970355, "grad_norm": 0.5882007765149354, "learning_rate": 8.077652101518918e-07, "loss": 0.2871, "step": 18164 }, { "epoch": 0.8221317040054311, "grad_norm": 0.2626698915552704, "learning_rate": 8.073658255434658e-07, "loss": 0.4568, "step": 18165 }, { "epoch": 0.8221769631138267, "grad_norm": 0.8530835081708823, "learning_rate": 8.06966531021871e-07, "loss": 0.3057, "step": 18166 }, { "epoch": 0.8222222222222222, "grad_norm": 0.6556701708777884, "learning_rate": 8.065673265956886e-07, "loss": 0.3907, "step": 18167 }, { "epoch": 0.8222674813306178, "grad_norm": 0.27094993368097087, "learning_rate": 8.061682122734937e-07, "loss": 0.4701, "step": 18168 }, { "epoch": 0.8223127404390134, "grad_norm": 0.5941525019158841, "learning_rate": 8.057691880638651e-07, "loss": 0.3179, "step": 18169 }, { "epoch": 0.822357999547409, "grad_norm": 0.5982256887096105, "learning_rate": 8.053702539753749e-07, "loss": 0.2853, "step": 18170 }, { "epoch": 0.8224032586558044, "grad_norm": 0.7014758040783442, "learning_rate": 8.04971410016594e-07, "loss": 0.2905, "step": 18171 }, { "epoch": 0.8224485177642, "grad_norm": 0.28190956210995716, "learning_rate": 8.045726561960931e-07, "loss": 0.4598, "step": 18172 }, { "epoch": 0.8224937768725956, "grad_norm": 0.6631737483533378, "learning_rate": 8.041739925224424e-07, "loss": 0.2901, "step": 18173 }, { "epoch": 0.8225390359809912, "grad_norm": 0.643318251146491, "learning_rate": 8.037754190042058e-07, "loss": 0.2919, "step": 18174 }, { "epoch": 0.8225842950893867, "grad_norm": 0.2627393483539942, "learning_rate": 8.033769356499466e-07, "loss": 0.4767, "step": 18175 }, { "epoch": 0.8226295541977823, "grad_norm": 0.5960163959853573, "learning_rate": 8.029785424682291e-07, "loss": 0.2922, "step": 18176 }, { "epoch": 0.8226748133061779, "grad_norm": 0.6325423313576644, "learning_rate": 8.025802394676114e-07, "loss": 0.3352, "step": 18177 }, { "epoch": 0.8227200724145735, "grad_norm": 0.6717469644769754, "learning_rate": 8.021820266566538e-07, "loss": 0.3123, "step": 18178 }, { "epoch": 0.822765331522969, "grad_norm": 0.6987405573728781, "learning_rate": 8.017839040439113e-07, "loss": 0.2657, "step": 18179 }, { "epoch": 0.8228105906313645, "grad_norm": 0.6914251246915453, "learning_rate": 8.013858716379396e-07, "loss": 0.3038, "step": 18180 }, { "epoch": 0.8228558497397601, "grad_norm": 0.6329902662785449, "learning_rate": 8.009879294472894e-07, "loss": 0.3245, "step": 18181 }, { "epoch": 0.8229011088481557, "grad_norm": 0.6271602808615151, "learning_rate": 8.005900774805137e-07, "loss": 0.282, "step": 18182 }, { "epoch": 0.8229463679565513, "grad_norm": 0.5855788353703362, "learning_rate": 8.001923157461594e-07, "loss": 0.2784, "step": 18183 }, { "epoch": 0.8229916270649468, "grad_norm": 0.661459765686884, "learning_rate": 7.997946442527726e-07, "loss": 0.2903, "step": 18184 }, { "epoch": 0.8230368861733424, "grad_norm": 0.6626994250988717, "learning_rate": 7.993970630088988e-07, "loss": 0.2791, "step": 18185 }, { "epoch": 0.823082145281738, "grad_norm": 0.2745844389633259, "learning_rate": 7.989995720230837e-07, "loss": 0.4706, "step": 18186 }, { "epoch": 0.8231274043901335, "grad_norm": 0.6205753959267856, "learning_rate": 7.986021713038627e-07, "loss": 0.3139, "step": 18187 }, { "epoch": 0.8231726634985291, "grad_norm": 0.2632761372907264, "learning_rate": 7.982048608597776e-07, "loss": 0.4659, "step": 18188 }, { "epoch": 0.8232179226069246, "grad_norm": 0.7485575335159796, "learning_rate": 7.978076406993662e-07, "loss": 0.2786, "step": 18189 }, { "epoch": 0.8232631817153202, "grad_norm": 0.6566044462595303, "learning_rate": 7.974105108311625e-07, "loss": 0.2959, "step": 18190 }, { "epoch": 0.8233084408237158, "grad_norm": 0.2937101749404086, "learning_rate": 7.970134712636984e-07, "loss": 0.473, "step": 18191 }, { "epoch": 0.8233536999321114, "grad_norm": 0.6265125798422845, "learning_rate": 7.966165220055067e-07, "loss": 0.3067, "step": 18192 }, { "epoch": 0.8233989590405069, "grad_norm": 0.6204538426926942, "learning_rate": 7.96219663065117e-07, "loss": 0.2693, "step": 18193 }, { "epoch": 0.8234442181489025, "grad_norm": 0.6668241049027904, "learning_rate": 7.95822894451056e-07, "loss": 0.2908, "step": 18194 }, { "epoch": 0.823489477257298, "grad_norm": 0.6167276572327692, "learning_rate": 7.954262161718479e-07, "loss": 0.2979, "step": 18195 }, { "epoch": 0.8235347363656936, "grad_norm": 0.263679280167042, "learning_rate": 7.950296282360181e-07, "loss": 0.4892, "step": 18196 }, { "epoch": 0.8235799954740891, "grad_norm": 0.7312264021619062, "learning_rate": 7.946331306520854e-07, "loss": 0.2942, "step": 18197 }, { "epoch": 0.8236252545824847, "grad_norm": 0.6937632900134934, "learning_rate": 7.942367234285725e-07, "loss": 0.2761, "step": 18198 }, { "epoch": 0.8236705136908803, "grad_norm": 0.6389653819189878, "learning_rate": 7.938404065739952e-07, "loss": 0.3461, "step": 18199 }, { "epoch": 0.8237157727992759, "grad_norm": 0.7198943347630627, "learning_rate": 7.934441800968684e-07, "loss": 0.3216, "step": 18200 }, { "epoch": 0.8237610319076715, "grad_norm": 0.9748183117086185, "learning_rate": 7.93048044005707e-07, "loss": 0.2941, "step": 18201 }, { "epoch": 0.823806291016067, "grad_norm": 0.26246050734572146, "learning_rate": 7.92651998309023e-07, "loss": 0.4613, "step": 18202 }, { "epoch": 0.8238515501244625, "grad_norm": 0.2857559137902954, "learning_rate": 7.922560430153259e-07, "loss": 0.471, "step": 18203 }, { "epoch": 0.8238968092328581, "grad_norm": 0.6119988564709138, "learning_rate": 7.918601781331225e-07, "loss": 0.2734, "step": 18204 }, { "epoch": 0.8239420683412537, "grad_norm": 0.6360295830543973, "learning_rate": 7.914644036709202e-07, "loss": 0.3015, "step": 18205 }, { "epoch": 0.8239873274496492, "grad_norm": 0.5614432284169272, "learning_rate": 7.910687196372214e-07, "loss": 0.3613, "step": 18206 }, { "epoch": 0.8240325865580448, "grad_norm": 0.28829720900091843, "learning_rate": 7.906731260405304e-07, "loss": 0.4515, "step": 18207 }, { "epoch": 0.8240778456664404, "grad_norm": 0.5525539167791396, "learning_rate": 7.902776228893444e-07, "loss": 0.2823, "step": 18208 }, { "epoch": 0.824123104774836, "grad_norm": 0.6762310852060991, "learning_rate": 7.898822101921644e-07, "loss": 0.3265, "step": 18209 }, { "epoch": 0.8241683638832314, "grad_norm": 0.6259275549416197, "learning_rate": 7.894868879574847e-07, "loss": 0.2474, "step": 18210 }, { "epoch": 0.824213622991627, "grad_norm": 0.6182305289073031, "learning_rate": 7.890916561938006e-07, "loss": 0.2945, "step": 18211 }, { "epoch": 0.8242588821000226, "grad_norm": 0.634163999330956, "learning_rate": 7.886965149096044e-07, "loss": 0.3004, "step": 18212 }, { "epoch": 0.8243041412084182, "grad_norm": 0.613473307222863, "learning_rate": 7.883014641133846e-07, "loss": 0.2223, "step": 18213 }, { "epoch": 0.8243494003168138, "grad_norm": 0.590548117790914, "learning_rate": 7.879065038136314e-07, "loss": 0.2979, "step": 18214 }, { "epoch": 0.8243946594252093, "grad_norm": 0.6453971910214398, "learning_rate": 7.875116340188333e-07, "loss": 0.3004, "step": 18215 }, { "epoch": 0.8244399185336049, "grad_norm": 0.2649864985329331, "learning_rate": 7.871168547374697e-07, "loss": 0.4756, "step": 18216 }, { "epoch": 0.8244851776420005, "grad_norm": 0.6405631892725506, "learning_rate": 7.867221659780267e-07, "loss": 0.2831, "step": 18217 }, { "epoch": 0.8245304367503961, "grad_norm": 0.6321704014564995, "learning_rate": 7.863275677489851e-07, "loss": 0.308, "step": 18218 }, { "epoch": 0.8245756958587915, "grad_norm": 0.6509092342644126, "learning_rate": 7.859330600588228e-07, "loss": 0.2833, "step": 18219 }, { "epoch": 0.8246209549671871, "grad_norm": 0.6273536804779368, "learning_rate": 7.85538642916015e-07, "loss": 0.2995, "step": 18220 }, { "epoch": 0.8246662140755827, "grad_norm": 0.587258924680402, "learning_rate": 7.851443163290385e-07, "loss": 0.2725, "step": 18221 }, { "epoch": 0.8247114731839783, "grad_norm": 0.6799857601085152, "learning_rate": 7.847500803063668e-07, "loss": 0.2936, "step": 18222 }, { "epoch": 0.8247567322923739, "grad_norm": 0.2675036031094028, "learning_rate": 7.843559348564694e-07, "loss": 0.458, "step": 18223 }, { "epoch": 0.8248019914007694, "grad_norm": 0.6276520182225253, "learning_rate": 7.839618799878146e-07, "loss": 0.3493, "step": 18224 }, { "epoch": 0.824847250509165, "grad_norm": 0.6380138615809682, "learning_rate": 7.835679157088716e-07, "loss": 0.3136, "step": 18225 }, { "epoch": 0.8248925096175606, "grad_norm": 0.5879257908734836, "learning_rate": 7.831740420281031e-07, "loss": 0.2854, "step": 18226 }, { "epoch": 0.8249377687259561, "grad_norm": 0.6046994935357449, "learning_rate": 7.827802589539751e-07, "loss": 0.2752, "step": 18227 }, { "epoch": 0.8249830278343516, "grad_norm": 0.2677090660018164, "learning_rate": 7.823865664949464e-07, "loss": 0.4613, "step": 18228 }, { "epoch": 0.8250282869427472, "grad_norm": 0.672067112995326, "learning_rate": 7.819929646594765e-07, "loss": 0.2667, "step": 18229 }, { "epoch": 0.8250735460511428, "grad_norm": 0.6228260122442586, "learning_rate": 7.815994534560228e-07, "loss": 0.2792, "step": 18230 }, { "epoch": 0.8251188051595384, "grad_norm": 0.6467169822046223, "learning_rate": 7.812060328930421e-07, "loss": 0.3122, "step": 18231 }, { "epoch": 0.8251640642679339, "grad_norm": 0.5694912430159714, "learning_rate": 7.808127029789869e-07, "loss": 0.2974, "step": 18232 }, { "epoch": 0.8252093233763295, "grad_norm": 0.2691041012160917, "learning_rate": 7.804194637223073e-07, "loss": 0.4857, "step": 18233 }, { "epoch": 0.825254582484725, "grad_norm": 0.6448056928408912, "learning_rate": 7.800263151314536e-07, "loss": 0.2878, "step": 18234 }, { "epoch": 0.8252998415931206, "grad_norm": 0.5742698318047919, "learning_rate": 7.796332572148752e-07, "loss": 0.2713, "step": 18235 }, { "epoch": 0.8253451007015162, "grad_norm": 0.6351411007375356, "learning_rate": 7.792402899810164e-07, "loss": 0.2851, "step": 18236 }, { "epoch": 0.8253903598099117, "grad_norm": 0.6244020735748892, "learning_rate": 7.788474134383195e-07, "loss": 0.2896, "step": 18237 }, { "epoch": 0.8254356189183073, "grad_norm": 0.27950882607118965, "learning_rate": 7.784546275952281e-07, "loss": 0.4723, "step": 18238 }, { "epoch": 0.8254808780267029, "grad_norm": 0.622105002869538, "learning_rate": 7.780619324601807e-07, "loss": 0.2944, "step": 18239 }, { "epoch": 0.8255261371350985, "grad_norm": 0.6414907773269223, "learning_rate": 7.776693280416164e-07, "loss": 0.2871, "step": 18240 }, { "epoch": 0.825571396243494, "grad_norm": 0.9654656942328771, "learning_rate": 7.772768143479703e-07, "loss": 0.3158, "step": 18241 }, { "epoch": 0.8256166553518896, "grad_norm": 0.7863181674165229, "learning_rate": 7.768843913876756e-07, "loss": 0.3062, "step": 18242 }, { "epoch": 0.8256619144602851, "grad_norm": 0.5856414481972946, "learning_rate": 7.76492059169165e-07, "loss": 0.2808, "step": 18243 }, { "epoch": 0.8257071735686807, "grad_norm": 0.7651089549539092, "learning_rate": 7.760998177008694e-07, "loss": 0.2964, "step": 18244 }, { "epoch": 0.8257524326770762, "grad_norm": 0.987347191861977, "learning_rate": 7.757076669912162e-07, "loss": 0.3085, "step": 18245 }, { "epoch": 0.8257976917854718, "grad_norm": 0.6591369804984707, "learning_rate": 7.7531560704863e-07, "loss": 0.3031, "step": 18246 }, { "epoch": 0.8258429508938674, "grad_norm": 0.5720351463462361, "learning_rate": 7.749236378815372e-07, "loss": 0.2856, "step": 18247 }, { "epoch": 0.825888210002263, "grad_norm": 0.6792636477123376, "learning_rate": 7.745317594983598e-07, "loss": 0.3181, "step": 18248 }, { "epoch": 0.8259334691106586, "grad_norm": 0.2756247302938806, "learning_rate": 7.741399719075154e-07, "loss": 0.4846, "step": 18249 }, { "epoch": 0.825978728219054, "grad_norm": 0.6683871282988517, "learning_rate": 7.737482751174247e-07, "loss": 0.3261, "step": 18250 }, { "epoch": 0.8260239873274496, "grad_norm": 0.5851850370664479, "learning_rate": 7.733566691365047e-07, "loss": 0.2598, "step": 18251 }, { "epoch": 0.8260692464358452, "grad_norm": 0.6077307922996963, "learning_rate": 7.729651539731686e-07, "loss": 0.3272, "step": 18252 }, { "epoch": 0.8261145055442408, "grad_norm": 0.6496080829350848, "learning_rate": 7.725737296358283e-07, "loss": 0.3178, "step": 18253 }, { "epoch": 0.8261597646526363, "grad_norm": 0.6895525446763424, "learning_rate": 7.721823961328955e-07, "loss": 0.3069, "step": 18254 }, { "epoch": 0.8262050237610319, "grad_norm": 0.6007041300072309, "learning_rate": 7.717911534727778e-07, "loss": 0.3144, "step": 18255 }, { "epoch": 0.8262502828694275, "grad_norm": 0.6795520769191841, "learning_rate": 7.714000016638829e-07, "loss": 0.3323, "step": 18256 }, { "epoch": 0.8262955419778231, "grad_norm": 0.5915455209682717, "learning_rate": 7.710089407146154e-07, "loss": 0.2825, "step": 18257 }, { "epoch": 0.8263408010862185, "grad_norm": 0.24806576764877158, "learning_rate": 7.706179706333755e-07, "loss": 0.4318, "step": 18258 }, { "epoch": 0.8263860601946141, "grad_norm": 0.6538030184224267, "learning_rate": 7.702270914285664e-07, "loss": 0.2767, "step": 18259 }, { "epoch": 0.8264313193030097, "grad_norm": 0.6195635754908934, "learning_rate": 7.698363031085871e-07, "loss": 0.2771, "step": 18260 }, { "epoch": 0.8264765784114053, "grad_norm": 0.6598416768005659, "learning_rate": 7.694456056818339e-07, "loss": 0.2988, "step": 18261 }, { "epoch": 0.8265218375198009, "grad_norm": 0.28054173240776076, "learning_rate": 7.690549991567004e-07, "loss": 0.4652, "step": 18262 }, { "epoch": 0.8265670966281964, "grad_norm": 0.6181440261644516, "learning_rate": 7.686644835415808e-07, "loss": 0.2588, "step": 18263 }, { "epoch": 0.826612355736592, "grad_norm": 0.6146368913346695, "learning_rate": 7.682740588448667e-07, "loss": 0.3479, "step": 18264 }, { "epoch": 0.8266576148449876, "grad_norm": 0.25630777827620876, "learning_rate": 7.67883725074946e-07, "loss": 0.4507, "step": 18265 }, { "epoch": 0.8267028739533832, "grad_norm": 0.5530316672521514, "learning_rate": 7.674934822402052e-07, "loss": 0.3121, "step": 18266 }, { "epoch": 0.8267481330617786, "grad_norm": 1.172841472236167, "learning_rate": 7.671033303490321e-07, "loss": 0.3163, "step": 18267 }, { "epoch": 0.8267933921701742, "grad_norm": 0.5813172977654625, "learning_rate": 7.667132694098061e-07, "loss": 0.3352, "step": 18268 }, { "epoch": 0.8268386512785698, "grad_norm": 0.678137676491045, "learning_rate": 7.663232994309122e-07, "loss": 0.281, "step": 18269 }, { "epoch": 0.8268839103869654, "grad_norm": 0.6224508351144359, "learning_rate": 7.659334204207275e-07, "loss": 0.3451, "step": 18270 }, { "epoch": 0.826929169495361, "grad_norm": 0.6507117854871701, "learning_rate": 7.655436323876286e-07, "loss": 0.2906, "step": 18271 }, { "epoch": 0.8269744286037565, "grad_norm": 0.6180031129751423, "learning_rate": 7.651539353399917e-07, "loss": 0.3121, "step": 18272 }, { "epoch": 0.8270196877121521, "grad_norm": 0.7227123766917248, "learning_rate": 7.647643292861917e-07, "loss": 0.3068, "step": 18273 }, { "epoch": 0.8270649468205477, "grad_norm": 0.6440853563337421, "learning_rate": 7.643748142345985e-07, "loss": 0.2751, "step": 18274 }, { "epoch": 0.8271102059289432, "grad_norm": 0.6618308941143313, "learning_rate": 7.639853901935812e-07, "loss": 0.2505, "step": 18275 }, { "epoch": 0.8271554650373387, "grad_norm": 0.6067429456867909, "learning_rate": 7.635960571715073e-07, "loss": 0.2894, "step": 18276 }, { "epoch": 0.8272007241457343, "grad_norm": 0.6330449486181741, "learning_rate": 7.632068151767447e-07, "loss": 0.2784, "step": 18277 }, { "epoch": 0.8272459832541299, "grad_norm": 0.6117914158146655, "learning_rate": 7.628176642176549e-07, "loss": 0.2569, "step": 18278 }, { "epoch": 0.8272912423625255, "grad_norm": 0.5901946253104174, "learning_rate": 7.624286043025991e-07, "loss": 0.2808, "step": 18279 }, { "epoch": 0.827336501470921, "grad_norm": 0.6189254349398654, "learning_rate": 7.62039635439939e-07, "loss": 0.3258, "step": 18280 }, { "epoch": 0.8273817605793166, "grad_norm": 0.5864574378011577, "learning_rate": 7.616507576380311e-07, "loss": 0.2848, "step": 18281 }, { "epoch": 0.8274270196877122, "grad_norm": 0.6289188315276548, "learning_rate": 7.612619709052305e-07, "loss": 0.3302, "step": 18282 }, { "epoch": 0.8274722787961077, "grad_norm": 0.6599194063754822, "learning_rate": 7.608732752498926e-07, "loss": 0.2773, "step": 18283 }, { "epoch": 0.8275175379045033, "grad_norm": 0.7143180101946018, "learning_rate": 7.604846706803676e-07, "loss": 0.306, "step": 18284 }, { "epoch": 0.8275627970128988, "grad_norm": 0.5686795089354336, "learning_rate": 7.600961572050076e-07, "loss": 0.2777, "step": 18285 }, { "epoch": 0.8276080561212944, "grad_norm": 0.5544043684290126, "learning_rate": 7.59707734832159e-07, "loss": 0.3026, "step": 18286 }, { "epoch": 0.82765331522969, "grad_norm": 0.26360211053535226, "learning_rate": 7.593194035701667e-07, "loss": 0.4679, "step": 18287 }, { "epoch": 0.8276985743380856, "grad_norm": 0.6223757948396181, "learning_rate": 7.589311634273766e-07, "loss": 0.3124, "step": 18288 }, { "epoch": 0.8277438334464811, "grad_norm": 0.7011240909472778, "learning_rate": 7.585430144121319e-07, "loss": 0.2925, "step": 18289 }, { "epoch": 0.8277890925548766, "grad_norm": 0.2836839740412367, "learning_rate": 7.581549565327706e-07, "loss": 0.4616, "step": 18290 }, { "epoch": 0.8278343516632722, "grad_norm": 0.5970258776833657, "learning_rate": 7.577669897976303e-07, "loss": 0.2795, "step": 18291 }, { "epoch": 0.8278796107716678, "grad_norm": 0.7240878255723272, "learning_rate": 7.573791142150488e-07, "loss": 0.2582, "step": 18292 }, { "epoch": 0.8279248698800633, "grad_norm": 0.2803187872537055, "learning_rate": 7.569913297933606e-07, "loss": 0.4656, "step": 18293 }, { "epoch": 0.8279701289884589, "grad_norm": 0.6074138860738469, "learning_rate": 7.566036365408974e-07, "loss": 0.3276, "step": 18294 }, { "epoch": 0.8280153880968545, "grad_norm": 0.6355019438669003, "learning_rate": 7.562160344659886e-07, "loss": 0.2956, "step": 18295 }, { "epoch": 0.8280606472052501, "grad_norm": 0.6349727647123978, "learning_rate": 7.558285235769647e-07, "loss": 0.3196, "step": 18296 }, { "epoch": 0.8281059063136457, "grad_norm": 0.24541544171528995, "learning_rate": 7.55441103882149e-07, "loss": 0.4601, "step": 18297 }, { "epoch": 0.8281511654220411, "grad_norm": 0.5882825549019338, "learning_rate": 7.550537753898696e-07, "loss": 0.2831, "step": 18298 }, { "epoch": 0.8281964245304367, "grad_norm": 0.6734086488781459, "learning_rate": 7.546665381084467e-07, "loss": 0.2905, "step": 18299 }, { "epoch": 0.8282416836388323, "grad_norm": 0.6355215550001918, "learning_rate": 7.542793920462005e-07, "loss": 0.2904, "step": 18300 }, { "epoch": 0.8282869427472279, "grad_norm": 0.6804674889052729, "learning_rate": 7.538923372114504e-07, "loss": 0.3429, "step": 18301 }, { "epoch": 0.8283322018556234, "grad_norm": 0.6514486631880623, "learning_rate": 7.535053736125142e-07, "loss": 0.296, "step": 18302 }, { "epoch": 0.828377460964019, "grad_norm": 0.2913101001381168, "learning_rate": 7.531185012577052e-07, "loss": 0.4688, "step": 18303 }, { "epoch": 0.8284227200724146, "grad_norm": 0.6438046832602575, "learning_rate": 7.527317201553358e-07, "loss": 0.2999, "step": 18304 }, { "epoch": 0.8284679791808102, "grad_norm": 0.6767431632880623, "learning_rate": 7.523450303137164e-07, "loss": 0.2919, "step": 18305 }, { "epoch": 0.8285132382892058, "grad_norm": 0.5343222301415081, "learning_rate": 7.519584317411582e-07, "loss": 0.2844, "step": 18306 }, { "epoch": 0.8285584973976012, "grad_norm": 0.2829817591705852, "learning_rate": 7.515719244459668e-07, "loss": 0.4714, "step": 18307 }, { "epoch": 0.8286037565059968, "grad_norm": 0.6824523978433078, "learning_rate": 7.51185508436445e-07, "loss": 0.3009, "step": 18308 }, { "epoch": 0.8286490156143924, "grad_norm": 0.5993978929754125, "learning_rate": 7.507991837208989e-07, "loss": 0.2714, "step": 18309 }, { "epoch": 0.828694274722788, "grad_norm": 0.5934461993346714, "learning_rate": 7.504129503076263e-07, "loss": 0.2783, "step": 18310 }, { "epoch": 0.8287395338311835, "grad_norm": 0.6268985757761065, "learning_rate": 7.500268082049294e-07, "loss": 0.3129, "step": 18311 }, { "epoch": 0.8287847929395791, "grad_norm": 0.630009354553089, "learning_rate": 7.496407574211034e-07, "loss": 0.2818, "step": 18312 }, { "epoch": 0.8288300520479747, "grad_norm": 0.5945913692915438, "learning_rate": 7.492547979644421e-07, "loss": 0.2825, "step": 18313 }, { "epoch": 0.8288753111563703, "grad_norm": 0.6051115804362879, "learning_rate": 7.488689298432406e-07, "loss": 0.2916, "step": 18314 }, { "epoch": 0.8289205702647657, "grad_norm": 0.6861148878623919, "learning_rate": 7.484831530657916e-07, "loss": 0.3228, "step": 18315 }, { "epoch": 0.8289658293731613, "grad_norm": 2.0859298361444627, "learning_rate": 7.480974676403796e-07, "loss": 0.2573, "step": 18316 }, { "epoch": 0.8290110884815569, "grad_norm": 0.6780884526798012, "learning_rate": 7.477118735752942e-07, "loss": 0.3188, "step": 18317 }, { "epoch": 0.8290563475899525, "grad_norm": 0.6263272543005641, "learning_rate": 7.47326370878822e-07, "loss": 0.2828, "step": 18318 }, { "epoch": 0.8291016066983481, "grad_norm": 0.27210140659531634, "learning_rate": 7.469409595592453e-07, "loss": 0.4676, "step": 18319 }, { "epoch": 0.8291468658067436, "grad_norm": 0.27514811354097995, "learning_rate": 7.465556396248436e-07, "loss": 0.4613, "step": 18320 }, { "epoch": 0.8291921249151392, "grad_norm": 0.6326532116835614, "learning_rate": 7.461704110838974e-07, "loss": 0.2831, "step": 18321 }, { "epoch": 0.8292373840235348, "grad_norm": 0.5798521668499196, "learning_rate": 7.457852739446864e-07, "loss": 0.2694, "step": 18322 }, { "epoch": 0.8292826431319303, "grad_norm": 0.6280232175895027, "learning_rate": 7.454002282154838e-07, "loss": 0.3344, "step": 18323 }, { "epoch": 0.8293279022403258, "grad_norm": 0.6556717243167894, "learning_rate": 7.450152739045618e-07, "loss": 0.2764, "step": 18324 }, { "epoch": 0.8293731613487214, "grad_norm": 0.6230288274977733, "learning_rate": 7.446304110201947e-07, "loss": 0.2853, "step": 18325 }, { "epoch": 0.829418420457117, "grad_norm": 1.2027587348251012, "learning_rate": 7.442456395706493e-07, "loss": 0.3202, "step": 18326 }, { "epoch": 0.8294636795655126, "grad_norm": 0.5805160457644685, "learning_rate": 7.43860959564196e-07, "loss": 0.273, "step": 18327 }, { "epoch": 0.8295089386739081, "grad_norm": 0.6256624824026773, "learning_rate": 7.434763710090991e-07, "loss": 0.2808, "step": 18328 }, { "epoch": 0.8295541977823037, "grad_norm": 0.2540345517953192, "learning_rate": 7.430918739136206e-07, "loss": 0.4638, "step": 18329 }, { "epoch": 0.8295994568906992, "grad_norm": 0.627344755177281, "learning_rate": 7.427074682860242e-07, "loss": 0.3041, "step": 18330 }, { "epoch": 0.8296447159990948, "grad_norm": 0.5883979495725812, "learning_rate": 7.423231541345694e-07, "loss": 0.3091, "step": 18331 }, { "epoch": 0.8296899751074904, "grad_norm": 0.6278410167294457, "learning_rate": 7.41938931467514e-07, "loss": 0.2699, "step": 18332 }, { "epoch": 0.8297352342158859, "grad_norm": 0.2937449638440562, "learning_rate": 7.415548002931122e-07, "loss": 0.4733, "step": 18333 }, { "epoch": 0.8297804933242815, "grad_norm": 0.2606301138768154, "learning_rate": 7.411707606196189e-07, "loss": 0.4704, "step": 18334 }, { "epoch": 0.8298257524326771, "grad_norm": 0.6100340455285485, "learning_rate": 7.40786812455287e-07, "loss": 0.2954, "step": 18335 }, { "epoch": 0.8298710115410727, "grad_norm": 0.2608413139470748, "learning_rate": 7.404029558083653e-07, "loss": 0.4779, "step": 18336 }, { "epoch": 0.8299162706494682, "grad_norm": 0.6010332039258852, "learning_rate": 7.400191906871007e-07, "loss": 0.2791, "step": 18337 }, { "epoch": 0.8299615297578637, "grad_norm": 0.6136009454379556, "learning_rate": 7.396355170997411e-07, "loss": 0.2787, "step": 18338 }, { "epoch": 0.8300067888662593, "grad_norm": 0.6287725134443892, "learning_rate": 7.392519350545286e-07, "loss": 0.2675, "step": 18339 }, { "epoch": 0.8300520479746549, "grad_norm": 0.6205791387005112, "learning_rate": 7.388684445597072e-07, "loss": 0.2872, "step": 18340 }, { "epoch": 0.8300973070830505, "grad_norm": 0.6596207022524252, "learning_rate": 7.384850456235154e-07, "loss": 0.289, "step": 18341 }, { "epoch": 0.830142566191446, "grad_norm": 0.6058336742195085, "learning_rate": 7.38101738254191e-07, "loss": 0.2587, "step": 18342 }, { "epoch": 0.8301878252998416, "grad_norm": 0.6265365425373657, "learning_rate": 7.377185224599709e-07, "loss": 0.308, "step": 18343 }, { "epoch": 0.8302330844082372, "grad_norm": 0.6376954651218508, "learning_rate": 7.373353982490916e-07, "loss": 0.3419, "step": 18344 }, { "epoch": 0.8302783435166328, "grad_norm": 0.6332723305238434, "learning_rate": 7.369523656297805e-07, "loss": 0.2814, "step": 18345 }, { "epoch": 0.8303236026250282, "grad_norm": 0.6078735243851932, "learning_rate": 7.3656942461027e-07, "loss": 0.2584, "step": 18346 }, { "epoch": 0.8303688617334238, "grad_norm": 0.619477069126535, "learning_rate": 7.361865751987879e-07, "loss": 0.2874, "step": 18347 }, { "epoch": 0.8304141208418194, "grad_norm": 0.5551233601014256, "learning_rate": 7.358038174035642e-07, "loss": 0.2428, "step": 18348 }, { "epoch": 0.830459379950215, "grad_norm": 0.6291363041257584, "learning_rate": 7.354211512328169e-07, "loss": 0.3064, "step": 18349 }, { "epoch": 0.8305046390586105, "grad_norm": 0.5688927307438404, "learning_rate": 7.350385766947721e-07, "loss": 0.2716, "step": 18350 }, { "epoch": 0.8305498981670061, "grad_norm": 0.2701299884443355, "learning_rate": 7.346560937976499e-07, "loss": 0.4701, "step": 18351 }, { "epoch": 0.8305951572754017, "grad_norm": 0.5826385085192366, "learning_rate": 7.342737025496688e-07, "loss": 0.3139, "step": 18352 }, { "epoch": 0.8306404163837973, "grad_norm": 0.5941275237233942, "learning_rate": 7.338914029590432e-07, "loss": 0.3124, "step": 18353 }, { "epoch": 0.8306856754921929, "grad_norm": 0.661803715012073, "learning_rate": 7.335091950339901e-07, "loss": 0.2959, "step": 18354 }, { "epoch": 0.8307309346005883, "grad_norm": 0.5813518898375629, "learning_rate": 7.3312707878272e-07, "loss": 0.2954, "step": 18355 }, { "epoch": 0.8307761937089839, "grad_norm": 0.6580523114531696, "learning_rate": 7.327450542134457e-07, "loss": 0.3009, "step": 18356 }, { "epoch": 0.8308214528173795, "grad_norm": 0.6537994532148272, "learning_rate": 7.323631213343735e-07, "loss": 0.3442, "step": 18357 }, { "epoch": 0.8308667119257751, "grad_norm": 0.26140961085679265, "learning_rate": 7.319812801537101e-07, "loss": 0.4757, "step": 18358 }, { "epoch": 0.8309119710341706, "grad_norm": 0.2807159183875202, "learning_rate": 7.315995306796608e-07, "loss": 0.4598, "step": 18359 }, { "epoch": 0.8309572301425662, "grad_norm": 0.635033300667888, "learning_rate": 7.312178729204294e-07, "loss": 0.2972, "step": 18360 }, { "epoch": 0.8310024892509618, "grad_norm": 0.2736155481440812, "learning_rate": 7.30836306884215e-07, "loss": 0.4711, "step": 18361 }, { "epoch": 0.8310477483593574, "grad_norm": 0.6417947220928917, "learning_rate": 7.304548325792154e-07, "loss": 0.2794, "step": 18362 }, { "epoch": 0.8310930074677528, "grad_norm": 0.5762612450271447, "learning_rate": 7.300734500136291e-07, "loss": 0.3218, "step": 18363 }, { "epoch": 0.8311382665761484, "grad_norm": 0.2596337538859704, "learning_rate": 7.296921591956513e-07, "loss": 0.457, "step": 18364 }, { "epoch": 0.831183525684544, "grad_norm": 0.5954486161827727, "learning_rate": 7.293109601334735e-07, "loss": 0.308, "step": 18365 }, { "epoch": 0.8312287847929396, "grad_norm": 0.6269519114385219, "learning_rate": 7.289298528352857e-07, "loss": 0.2823, "step": 18366 }, { "epoch": 0.8312740439013352, "grad_norm": 0.518563367356704, "learning_rate": 7.285488373092792e-07, "loss": 0.2188, "step": 18367 }, { "epoch": 0.8313193030097307, "grad_norm": 0.6214800550202545, "learning_rate": 7.281679135636377e-07, "loss": 0.346, "step": 18368 }, { "epoch": 0.8313645621181263, "grad_norm": 0.5959904463385176, "learning_rate": 7.27787081606549e-07, "loss": 0.2907, "step": 18369 }, { "epoch": 0.8314098212265219, "grad_norm": 0.6622710687024219, "learning_rate": 7.274063414461952e-07, "loss": 0.263, "step": 18370 }, { "epoch": 0.8314550803349174, "grad_norm": 0.5778474098970238, "learning_rate": 7.270256930907555e-07, "loss": 0.3058, "step": 18371 }, { "epoch": 0.8315003394433129, "grad_norm": 0.5545792197697489, "learning_rate": 7.266451365484106e-07, "loss": 0.274, "step": 18372 }, { "epoch": 0.8315455985517085, "grad_norm": 0.8707822719027274, "learning_rate": 7.262646718273392e-07, "loss": 0.3256, "step": 18373 }, { "epoch": 0.8315908576601041, "grad_norm": 0.6130864604779631, "learning_rate": 7.258842989357118e-07, "loss": 0.3162, "step": 18374 }, { "epoch": 0.8316361167684997, "grad_norm": 0.6267185140516509, "learning_rate": 7.255040178817035e-07, "loss": 0.3024, "step": 18375 }, { "epoch": 0.8316813758768953, "grad_norm": 0.2661307336173574, "learning_rate": 7.251238286734863e-07, "loss": 0.4653, "step": 18376 }, { "epoch": 0.8317266349852908, "grad_norm": 0.5452519945094221, "learning_rate": 7.247437313192307e-07, "loss": 0.2627, "step": 18377 }, { "epoch": 0.8317718940936863, "grad_norm": 0.597310206470709, "learning_rate": 7.243637258270996e-07, "loss": 0.332, "step": 18378 }, { "epoch": 0.8318171532020819, "grad_norm": 0.6262737359671798, "learning_rate": 7.239838122052612e-07, "loss": 0.2937, "step": 18379 }, { "epoch": 0.8318624123104775, "grad_norm": 0.2774610859586583, "learning_rate": 7.23603990461878e-07, "loss": 0.4686, "step": 18380 }, { "epoch": 0.831907671418873, "grad_norm": 0.6236227179505209, "learning_rate": 7.232242606051115e-07, "loss": 0.2775, "step": 18381 }, { "epoch": 0.8319529305272686, "grad_norm": 0.7734252543559725, "learning_rate": 7.228446226431196e-07, "loss": 0.2429, "step": 18382 }, { "epoch": 0.8319981896356642, "grad_norm": 0.5598087332181514, "learning_rate": 7.224650765840613e-07, "loss": 0.2224, "step": 18383 }, { "epoch": 0.8320434487440598, "grad_norm": 0.2714013413523971, "learning_rate": 7.2208562243609e-07, "loss": 0.4499, "step": 18384 }, { "epoch": 0.8320887078524553, "grad_norm": 0.627473107254701, "learning_rate": 7.21706260207361e-07, "loss": 0.2592, "step": 18385 }, { "epoch": 0.8321339669608508, "grad_norm": 0.5923881783519446, "learning_rate": 7.213269899060249e-07, "loss": 0.2811, "step": 18386 }, { "epoch": 0.8321792260692464, "grad_norm": 0.26871978050997436, "learning_rate": 7.209478115402302e-07, "loss": 0.4794, "step": 18387 }, { "epoch": 0.832224485177642, "grad_norm": 0.6224968148808394, "learning_rate": 7.205687251181242e-07, "loss": 0.3104, "step": 18388 }, { "epoch": 0.8322697442860376, "grad_norm": 0.29372753562995985, "learning_rate": 7.201897306478544e-07, "loss": 0.4785, "step": 18389 }, { "epoch": 0.8323150033944331, "grad_norm": 0.5670068481317462, "learning_rate": 7.198108281375627e-07, "loss": 0.2776, "step": 18390 }, { "epoch": 0.8323602625028287, "grad_norm": 0.633384202144468, "learning_rate": 7.194320175953901e-07, "loss": 0.319, "step": 18391 }, { "epoch": 0.8324055216112243, "grad_norm": 0.6352015901863839, "learning_rate": 7.190532990294762e-07, "loss": 0.3073, "step": 18392 }, { "epoch": 0.8324507807196199, "grad_norm": 0.6621876582354893, "learning_rate": 7.186746724479599e-07, "loss": 0.3043, "step": 18393 }, { "epoch": 0.8324960398280153, "grad_norm": 1.1329216180159083, "learning_rate": 7.182961378589765e-07, "loss": 0.2757, "step": 18394 }, { "epoch": 0.8325412989364109, "grad_norm": 0.6590145185584861, "learning_rate": 7.179176952706574e-07, "loss": 0.2705, "step": 18395 }, { "epoch": 0.8325865580448065, "grad_norm": 0.710785836852594, "learning_rate": 7.175393446911366e-07, "loss": 0.3282, "step": 18396 }, { "epoch": 0.8326318171532021, "grad_norm": 0.5993552009304864, "learning_rate": 7.171610861285417e-07, "loss": 0.3095, "step": 18397 }, { "epoch": 0.8326770762615976, "grad_norm": 0.5895560821888629, "learning_rate": 7.167829195910026e-07, "loss": 0.26, "step": 18398 }, { "epoch": 0.8327223353699932, "grad_norm": 0.7544662890759368, "learning_rate": 7.164048450866435e-07, "loss": 0.2786, "step": 18399 }, { "epoch": 0.8327675944783888, "grad_norm": 0.6287372418726318, "learning_rate": 7.160268626235866e-07, "loss": 0.2823, "step": 18400 }, { "epoch": 0.8328128535867844, "grad_norm": 0.30292889477882584, "learning_rate": 7.156489722099558e-07, "loss": 0.4802, "step": 18401 }, { "epoch": 0.83285811269518, "grad_norm": 0.26542531608177117, "learning_rate": 7.152711738538725e-07, "loss": 0.4563, "step": 18402 }, { "epoch": 0.8329033718035754, "grad_norm": 0.6015363738942859, "learning_rate": 7.148934675634494e-07, "loss": 0.3128, "step": 18403 }, { "epoch": 0.832948630911971, "grad_norm": 0.5985106227128415, "learning_rate": 7.145158533468055e-07, "loss": 0.2569, "step": 18404 }, { "epoch": 0.8329938900203666, "grad_norm": 0.28834188236230374, "learning_rate": 7.141383312120536e-07, "loss": 0.4644, "step": 18405 }, { "epoch": 0.8330391491287622, "grad_norm": 0.5680174625657594, "learning_rate": 7.137609011673086e-07, "loss": 0.3215, "step": 18406 }, { "epoch": 0.8330844082371577, "grad_norm": 0.3533673673809758, "learning_rate": 7.133835632206754e-07, "loss": 0.4899, "step": 18407 }, { "epoch": 0.8331296673455533, "grad_norm": 0.5735933751162591, "learning_rate": 7.130063173802637e-07, "loss": 0.2644, "step": 18408 }, { "epoch": 0.8331749264539489, "grad_norm": 0.3244715465410745, "learning_rate": 7.126291636541815e-07, "loss": 0.4551, "step": 18409 }, { "epoch": 0.8332201855623445, "grad_norm": 0.2608028807397206, "learning_rate": 7.122521020505302e-07, "loss": 0.4724, "step": 18410 }, { "epoch": 0.83326544467074, "grad_norm": 0.6129342907073956, "learning_rate": 7.11875132577412e-07, "loss": 0.298, "step": 18411 }, { "epoch": 0.8333107037791355, "grad_norm": 0.6824953976909873, "learning_rate": 7.114982552429278e-07, "loss": 0.2856, "step": 18412 }, { "epoch": 0.8333559628875311, "grad_norm": 0.6085744465612274, "learning_rate": 7.111214700551738e-07, "loss": 0.271, "step": 18413 }, { "epoch": 0.8334012219959267, "grad_norm": 0.7326685463051631, "learning_rate": 7.107447770222486e-07, "loss": 0.2786, "step": 18414 }, { "epoch": 0.8334464811043223, "grad_norm": 0.6808975607293165, "learning_rate": 7.103681761522446e-07, "loss": 0.2859, "step": 18415 }, { "epoch": 0.8334917402127178, "grad_norm": 0.5929595113087428, "learning_rate": 7.099916674532526e-07, "loss": 0.3479, "step": 18416 }, { "epoch": 0.8335369993211134, "grad_norm": 0.2701159437656839, "learning_rate": 7.096152509333642e-07, "loss": 0.4868, "step": 18417 }, { "epoch": 0.833582258429509, "grad_norm": 0.5661430809971738, "learning_rate": 7.092389266006683e-07, "loss": 0.2754, "step": 18418 }, { "epoch": 0.8336275175379045, "grad_norm": 0.6159875929159425, "learning_rate": 7.088626944632493e-07, "loss": 0.3262, "step": 18419 }, { "epoch": 0.8336727766463, "grad_norm": 0.6080998733893245, "learning_rate": 7.084865545291914e-07, "loss": 0.2933, "step": 18420 }, { "epoch": 0.8337180357546956, "grad_norm": 0.6126167266070449, "learning_rate": 7.081105068065764e-07, "loss": 0.3477, "step": 18421 }, { "epoch": 0.8337632948630912, "grad_norm": 0.2536339863608969, "learning_rate": 7.077345513034861e-07, "loss": 0.4426, "step": 18422 }, { "epoch": 0.8338085539714868, "grad_norm": 0.2617414439907864, "learning_rate": 7.073586880279981e-07, "loss": 0.4655, "step": 18423 }, { "epoch": 0.8338538130798824, "grad_norm": 0.5566006485219414, "learning_rate": 7.06982916988187e-07, "loss": 0.2721, "step": 18424 }, { "epoch": 0.8338990721882779, "grad_norm": 0.6710321305525754, "learning_rate": 7.066072381921285e-07, "loss": 0.3213, "step": 18425 }, { "epoch": 0.8339443312966734, "grad_norm": 0.6511717324963595, "learning_rate": 7.06231651647894e-07, "loss": 0.331, "step": 18426 }, { "epoch": 0.833989590405069, "grad_norm": 0.5692816316713081, "learning_rate": 7.058561573635548e-07, "loss": 0.2835, "step": 18427 }, { "epoch": 0.8340348495134646, "grad_norm": 0.663359168877623, "learning_rate": 7.054807553471782e-07, "loss": 0.278, "step": 18428 }, { "epoch": 0.8340801086218601, "grad_norm": 0.5552336909462475, "learning_rate": 7.05105445606829e-07, "loss": 0.2733, "step": 18429 }, { "epoch": 0.8341253677302557, "grad_norm": 0.5845264375176329, "learning_rate": 7.047302281505735e-07, "loss": 0.3261, "step": 18430 }, { "epoch": 0.8341706268386513, "grad_norm": 0.6262122985968133, "learning_rate": 7.043551029864759e-07, "loss": 0.319, "step": 18431 }, { "epoch": 0.8342158859470469, "grad_norm": 0.6622169142228267, "learning_rate": 7.039800701225918e-07, "loss": 0.2854, "step": 18432 }, { "epoch": 0.8342611450554424, "grad_norm": 0.27194861515127755, "learning_rate": 7.036051295669816e-07, "loss": 0.461, "step": 18433 }, { "epoch": 0.834306404163838, "grad_norm": 0.25211593145997274, "learning_rate": 7.03230281327702e-07, "loss": 0.4504, "step": 18434 }, { "epoch": 0.8343516632722335, "grad_norm": 0.6034517931757836, "learning_rate": 7.028555254128089e-07, "loss": 0.2867, "step": 18435 }, { "epoch": 0.8343969223806291, "grad_norm": 0.6218373751755876, "learning_rate": 7.024808618303508e-07, "loss": 0.2809, "step": 18436 }, { "epoch": 0.8344421814890247, "grad_norm": 0.622619872023975, "learning_rate": 7.021062905883802e-07, "loss": 0.3138, "step": 18437 }, { "epoch": 0.8344874405974202, "grad_norm": 0.25726287792657343, "learning_rate": 7.017318116949468e-07, "loss": 0.4385, "step": 18438 }, { "epoch": 0.8345326997058158, "grad_norm": 0.5635510149757561, "learning_rate": 7.013574251580956e-07, "loss": 0.2942, "step": 18439 }, { "epoch": 0.8345779588142114, "grad_norm": 0.5767968519994132, "learning_rate": 7.009831309858701e-07, "loss": 0.2885, "step": 18440 }, { "epoch": 0.834623217922607, "grad_norm": 0.6454957968254394, "learning_rate": 7.006089291863144e-07, "loss": 0.2802, "step": 18441 }, { "epoch": 0.8346684770310024, "grad_norm": 0.6431056047668252, "learning_rate": 7.002348197674669e-07, "loss": 0.2863, "step": 18442 }, { "epoch": 0.834713736139398, "grad_norm": 0.6548391597943949, "learning_rate": 6.998608027373694e-07, "loss": 0.2709, "step": 18443 }, { "epoch": 0.8347589952477936, "grad_norm": 0.5851244717476433, "learning_rate": 6.994868781040553e-07, "loss": 0.2773, "step": 18444 }, { "epoch": 0.8348042543561892, "grad_norm": 0.6108327610865241, "learning_rate": 6.991130458755596e-07, "loss": 0.3102, "step": 18445 }, { "epoch": 0.8348495134645848, "grad_norm": 0.5969419647703962, "learning_rate": 6.987393060599157e-07, "loss": 0.287, "step": 18446 }, { "epoch": 0.8348947725729803, "grad_norm": 0.7424954024336571, "learning_rate": 6.983656586651543e-07, "loss": 0.2959, "step": 18447 }, { "epoch": 0.8349400316813759, "grad_norm": 0.32286385121349703, "learning_rate": 6.979921036993042e-07, "loss": 0.4771, "step": 18448 }, { "epoch": 0.8349852907897715, "grad_norm": 0.577601691006914, "learning_rate": 6.976186411703894e-07, "loss": 0.2719, "step": 18449 }, { "epoch": 0.835030549898167, "grad_norm": 0.6326586227520709, "learning_rate": 6.972452710864364e-07, "loss": 0.3351, "step": 18450 }, { "epoch": 0.8350758090065625, "grad_norm": 0.6604271876887587, "learning_rate": 6.968719934554691e-07, "loss": 0.3225, "step": 18451 }, { "epoch": 0.8351210681149581, "grad_norm": 0.589944492600839, "learning_rate": 6.964988082855062e-07, "loss": 0.2977, "step": 18452 }, { "epoch": 0.8351663272233537, "grad_norm": 0.5965548477237558, "learning_rate": 6.961257155845658e-07, "loss": 0.2546, "step": 18453 }, { "epoch": 0.8352115863317493, "grad_norm": 0.6621594533928651, "learning_rate": 6.957527153606664e-07, "loss": 0.2908, "step": 18454 }, { "epoch": 0.8352568454401448, "grad_norm": 0.5712805422352859, "learning_rate": 6.953798076218204e-07, "loss": 0.2905, "step": 18455 }, { "epoch": 0.8353021045485404, "grad_norm": 0.5798015410898242, "learning_rate": 6.950069923760433e-07, "loss": 0.2914, "step": 18456 }, { "epoch": 0.835347363656936, "grad_norm": 0.26862311862827815, "learning_rate": 6.946342696313435e-07, "loss": 0.4883, "step": 18457 }, { "epoch": 0.8353926227653315, "grad_norm": 0.2655924882413599, "learning_rate": 6.942616393957297e-07, "loss": 0.4824, "step": 18458 }, { "epoch": 0.8354378818737271, "grad_norm": 0.828216103418796, "learning_rate": 6.938891016772092e-07, "loss": 0.2974, "step": 18459 }, { "epoch": 0.8354831409821226, "grad_norm": 0.6503665178507614, "learning_rate": 6.935166564837875e-07, "loss": 0.315, "step": 18460 }, { "epoch": 0.8355284000905182, "grad_norm": 0.809480352605823, "learning_rate": 6.93144303823467e-07, "loss": 0.2828, "step": 18461 }, { "epoch": 0.8355736591989138, "grad_norm": 0.6732385764784247, "learning_rate": 6.927720437042462e-07, "loss": 0.3042, "step": 18462 }, { "epoch": 0.8356189183073094, "grad_norm": 0.29168950544423544, "learning_rate": 6.923998761341261e-07, "loss": 0.4947, "step": 18463 }, { "epoch": 0.8356641774157049, "grad_norm": 0.6211694790944401, "learning_rate": 6.920278011211034e-07, "loss": 0.2732, "step": 18464 }, { "epoch": 0.8357094365241005, "grad_norm": 0.6205506528108833, "learning_rate": 6.916558186731726e-07, "loss": 0.2932, "step": 18465 }, { "epoch": 0.835754695632496, "grad_norm": 0.6014775172694962, "learning_rate": 6.912839287983253e-07, "loss": 0.2827, "step": 18466 }, { "epoch": 0.8357999547408916, "grad_norm": 0.2592762946173183, "learning_rate": 6.909121315045541e-07, "loss": 0.4542, "step": 18467 }, { "epoch": 0.8358452138492871, "grad_norm": 0.9117098601539213, "learning_rate": 6.905404267998466e-07, "loss": 0.3685, "step": 18468 }, { "epoch": 0.8358904729576827, "grad_norm": 0.7034910533099269, "learning_rate": 6.901688146921892e-07, "loss": 0.2811, "step": 18469 }, { "epoch": 0.8359357320660783, "grad_norm": 0.6019756167831373, "learning_rate": 6.897972951895682e-07, "loss": 0.3007, "step": 18470 }, { "epoch": 0.8359809911744739, "grad_norm": 0.6456353538014693, "learning_rate": 6.894258682999644e-07, "loss": 0.3, "step": 18471 }, { "epoch": 0.8360262502828695, "grad_norm": 0.2788759898919128, "learning_rate": 6.890545340313609e-07, "loss": 0.4847, "step": 18472 }, { "epoch": 0.836071509391265, "grad_norm": 0.7049032865204843, "learning_rate": 6.886832923917358e-07, "loss": 0.3145, "step": 18473 }, { "epoch": 0.8361167684996605, "grad_norm": 0.5249122167302466, "learning_rate": 6.883121433890639e-07, "loss": 0.2557, "step": 18474 }, { "epoch": 0.8361620276080561, "grad_norm": 0.6918651384335092, "learning_rate": 6.879410870313219e-07, "loss": 0.3068, "step": 18475 }, { "epoch": 0.8362072867164517, "grad_norm": 0.5832527914148368, "learning_rate": 6.875701233264837e-07, "loss": 0.251, "step": 18476 }, { "epoch": 0.8362525458248472, "grad_norm": 0.6140431784941516, "learning_rate": 6.871992522825183e-07, "loss": 0.2806, "step": 18477 }, { "epoch": 0.8362978049332428, "grad_norm": 0.26624596622234775, "learning_rate": 6.868284739073949e-07, "loss": 0.4759, "step": 18478 }, { "epoch": 0.8363430640416384, "grad_norm": 0.6660657563262626, "learning_rate": 6.8645778820908e-07, "loss": 0.2673, "step": 18479 }, { "epoch": 0.836388323150034, "grad_norm": 0.6050959441806862, "learning_rate": 6.860871951955412e-07, "loss": 0.2871, "step": 18480 }, { "epoch": 0.8364335822584296, "grad_norm": 0.6525082970979413, "learning_rate": 6.857166948747385e-07, "loss": 0.3073, "step": 18481 }, { "epoch": 0.836478841366825, "grad_norm": 0.2765877051458427, "learning_rate": 6.853462872546329e-07, "loss": 0.4547, "step": 18482 }, { "epoch": 0.8365241004752206, "grad_norm": 0.6329547373858155, "learning_rate": 6.849759723431853e-07, "loss": 0.3209, "step": 18483 }, { "epoch": 0.8365693595836162, "grad_norm": 0.6013514520423907, "learning_rate": 6.846057501483505e-07, "loss": 0.317, "step": 18484 }, { "epoch": 0.8366146186920118, "grad_norm": 0.6708127579750481, "learning_rate": 6.842356206780853e-07, "loss": 0.267, "step": 18485 }, { "epoch": 0.8366598778004073, "grad_norm": 0.6180890666149683, "learning_rate": 6.838655839403419e-07, "loss": 0.2944, "step": 18486 }, { "epoch": 0.8367051369088029, "grad_norm": 0.5812224184554288, "learning_rate": 6.834956399430703e-07, "loss": 0.3012, "step": 18487 }, { "epoch": 0.8367503960171985, "grad_norm": 0.2711922549276204, "learning_rate": 6.8312578869422e-07, "loss": 0.4628, "step": 18488 }, { "epoch": 0.8367956551255941, "grad_norm": 0.797307266433614, "learning_rate": 6.827560302017389e-07, "loss": 0.3233, "step": 18489 }, { "epoch": 0.8368409142339895, "grad_norm": 0.6030944382919147, "learning_rate": 6.823863644735718e-07, "loss": 0.2964, "step": 18490 }, { "epoch": 0.8368861733423851, "grad_norm": 0.7432983858204644, "learning_rate": 6.820167915176601e-07, "loss": 0.3053, "step": 18491 }, { "epoch": 0.8369314324507807, "grad_norm": 0.7667762688745203, "learning_rate": 6.816473113419459e-07, "loss": 0.2915, "step": 18492 }, { "epoch": 0.8369766915591763, "grad_norm": 0.6022476658119068, "learning_rate": 6.812779239543688e-07, "loss": 0.2945, "step": 18493 }, { "epoch": 0.8370219506675719, "grad_norm": 0.5689870681284912, "learning_rate": 6.809086293628658e-07, "loss": 0.2849, "step": 18494 }, { "epoch": 0.8370672097759674, "grad_norm": 0.6094370358822103, "learning_rate": 6.805394275753696e-07, "loss": 0.2985, "step": 18495 }, { "epoch": 0.837112468884363, "grad_norm": 0.6096821133744085, "learning_rate": 6.801703185998165e-07, "loss": 0.309, "step": 18496 }, { "epoch": 0.8371577279927586, "grad_norm": 0.6210190486095353, "learning_rate": 6.798013024441346e-07, "loss": 0.2717, "step": 18497 }, { "epoch": 0.8372029871011542, "grad_norm": 0.5832405292000097, "learning_rate": 6.794323791162549e-07, "loss": 0.2883, "step": 18498 }, { "epoch": 0.8372482462095496, "grad_norm": 0.6939500706572298, "learning_rate": 6.790635486241043e-07, "loss": 0.2912, "step": 18499 }, { "epoch": 0.8372935053179452, "grad_norm": 0.5921908749665488, "learning_rate": 6.786948109756064e-07, "loss": 0.3227, "step": 18500 }, { "epoch": 0.8373387644263408, "grad_norm": 0.2750326264546234, "learning_rate": 6.783261661786855e-07, "loss": 0.479, "step": 18501 }, { "epoch": 0.8373840235347364, "grad_norm": 0.24164130552241625, "learning_rate": 6.77957614241263e-07, "loss": 0.4474, "step": 18502 }, { "epoch": 0.8374292826431319, "grad_norm": 0.5861892387606339, "learning_rate": 6.775891551712555e-07, "loss": 0.2594, "step": 18503 }, { "epoch": 0.8374745417515275, "grad_norm": 0.7380838640426645, "learning_rate": 6.77220788976582e-07, "loss": 0.3095, "step": 18504 }, { "epoch": 0.8375198008599231, "grad_norm": 0.6415416692617534, "learning_rate": 6.768525156651589e-07, "loss": 0.257, "step": 18505 }, { "epoch": 0.8375650599683186, "grad_norm": 0.6310572300126577, "learning_rate": 6.764843352448974e-07, "loss": 0.297, "step": 18506 }, { "epoch": 0.8376103190767142, "grad_norm": 0.5824094431531872, "learning_rate": 6.761162477237076e-07, "loss": 0.266, "step": 18507 }, { "epoch": 0.8376555781851097, "grad_norm": 0.5821116481571228, "learning_rate": 6.757482531094999e-07, "loss": 0.2683, "step": 18508 }, { "epoch": 0.8377008372935053, "grad_norm": 0.6269496246057881, "learning_rate": 6.753803514101826e-07, "loss": 0.2824, "step": 18509 }, { "epoch": 0.8377460964019009, "grad_norm": 0.6786039172697631, "learning_rate": 6.75012542633659e-07, "loss": 0.3464, "step": 18510 }, { "epoch": 0.8377913555102965, "grad_norm": 0.2701244215128959, "learning_rate": 6.74644826787832e-07, "loss": 0.4969, "step": 18511 }, { "epoch": 0.837836614618692, "grad_norm": 0.5815568096960195, "learning_rate": 6.742772038806045e-07, "loss": 0.3244, "step": 18512 }, { "epoch": 0.8378818737270876, "grad_norm": 0.6155053020966658, "learning_rate": 6.739096739198731e-07, "loss": 0.3038, "step": 18513 }, { "epoch": 0.8379271328354831, "grad_norm": 0.2623644484991377, "learning_rate": 6.735422369135375e-07, "loss": 0.4636, "step": 18514 }, { "epoch": 0.8379723919438787, "grad_norm": 0.9128337737816552, "learning_rate": 6.731748928694914e-07, "loss": 0.3144, "step": 18515 }, { "epoch": 0.8380176510522742, "grad_norm": 0.6336593216480945, "learning_rate": 6.72807641795627e-07, "loss": 0.2951, "step": 18516 }, { "epoch": 0.8380629101606698, "grad_norm": 0.6044869413049455, "learning_rate": 6.724404836998366e-07, "loss": 0.3192, "step": 18517 }, { "epoch": 0.8381081692690654, "grad_norm": 0.6188016034575473, "learning_rate": 6.720734185900101e-07, "loss": 0.2872, "step": 18518 }, { "epoch": 0.838153428377461, "grad_norm": 0.6214542611014593, "learning_rate": 6.717064464740336e-07, "loss": 0.2403, "step": 18519 }, { "epoch": 0.8381986874858566, "grad_norm": 0.7963334781011263, "learning_rate": 6.713395673597911e-07, "loss": 0.2977, "step": 18520 }, { "epoch": 0.838243946594252, "grad_norm": 0.28427234355197933, "learning_rate": 6.709727812551669e-07, "loss": 0.4759, "step": 18521 }, { "epoch": 0.8382892057026476, "grad_norm": 0.6178113786648095, "learning_rate": 6.706060881680432e-07, "loss": 0.324, "step": 18522 }, { "epoch": 0.8383344648110432, "grad_norm": 0.27607898173326817, "learning_rate": 6.702394881062974e-07, "loss": 0.4676, "step": 18523 }, { "epoch": 0.8383797239194388, "grad_norm": 0.6041009697924854, "learning_rate": 6.698729810778065e-07, "loss": 0.3249, "step": 18524 }, { "epoch": 0.8384249830278343, "grad_norm": 0.6633498696162133, "learning_rate": 6.695065670904477e-07, "loss": 0.2971, "step": 18525 }, { "epoch": 0.8384702421362299, "grad_norm": 0.5935907065614485, "learning_rate": 6.691402461520913e-07, "loss": 0.2991, "step": 18526 }, { "epoch": 0.8385155012446255, "grad_norm": 0.25082580423110495, "learning_rate": 6.687740182706103e-07, "loss": 0.4612, "step": 18527 }, { "epoch": 0.8385607603530211, "grad_norm": 0.6078515373628862, "learning_rate": 6.684078834538743e-07, "loss": 0.2703, "step": 18528 }, { "epoch": 0.8386060194614167, "grad_norm": 0.6176734156235348, "learning_rate": 6.680418417097478e-07, "loss": 0.3062, "step": 18529 }, { "epoch": 0.8386512785698121, "grad_norm": 0.2644695950699044, "learning_rate": 6.676758930460975e-07, "loss": 0.4671, "step": 18530 }, { "epoch": 0.8386965376782077, "grad_norm": 0.2695615966163874, "learning_rate": 6.673100374707886e-07, "loss": 0.4595, "step": 18531 }, { "epoch": 0.8387417967866033, "grad_norm": 0.6319710434142184, "learning_rate": 6.669442749916782e-07, "loss": 0.2783, "step": 18532 }, { "epoch": 0.8387870558949989, "grad_norm": 0.27180888854367324, "learning_rate": 6.665786056166274e-07, "loss": 0.4834, "step": 18533 }, { "epoch": 0.8388323150033944, "grad_norm": 0.263547407515624, "learning_rate": 6.662130293534941e-07, "loss": 0.4602, "step": 18534 }, { "epoch": 0.83887757411179, "grad_norm": 0.28470399398942675, "learning_rate": 6.658475462101327e-07, "loss": 0.4475, "step": 18535 }, { "epoch": 0.8389228332201856, "grad_norm": 0.594393630301785, "learning_rate": 6.654821561943953e-07, "loss": 0.2538, "step": 18536 }, { "epoch": 0.8389680923285812, "grad_norm": 0.6516799206285943, "learning_rate": 6.651168593141339e-07, "loss": 0.28, "step": 18537 }, { "epoch": 0.8390133514369766, "grad_norm": 0.5994769333314872, "learning_rate": 6.647516555771988e-07, "loss": 0.3085, "step": 18538 }, { "epoch": 0.8390586105453722, "grad_norm": 0.3360962215042519, "learning_rate": 6.643865449914355e-07, "loss": 0.4624, "step": 18539 }, { "epoch": 0.8391038696537678, "grad_norm": 0.553898450685638, "learning_rate": 6.640215275646889e-07, "loss": 0.2594, "step": 18540 }, { "epoch": 0.8391491287621634, "grad_norm": 0.6146330382727423, "learning_rate": 6.636566033048037e-07, "loss": 0.2977, "step": 18541 }, { "epoch": 0.839194387870559, "grad_norm": 0.6534901474622999, "learning_rate": 6.632917722196186e-07, "loss": 0.3527, "step": 18542 }, { "epoch": 0.8392396469789545, "grad_norm": 0.6374263507876112, "learning_rate": 6.629270343169752e-07, "loss": 0.2602, "step": 18543 }, { "epoch": 0.8392849060873501, "grad_norm": 0.9073245039519338, "learning_rate": 6.625623896047101e-07, "loss": 0.2771, "step": 18544 }, { "epoch": 0.8393301651957457, "grad_norm": 0.5576977394379002, "learning_rate": 6.621978380906563e-07, "loss": 0.2847, "step": 18545 }, { "epoch": 0.8393754243041412, "grad_norm": 0.6261781506288243, "learning_rate": 6.618333797826487e-07, "loss": 0.3053, "step": 18546 }, { "epoch": 0.8394206834125367, "grad_norm": 0.2783462372789997, "learning_rate": 6.614690146885189e-07, "loss": 0.4567, "step": 18547 }, { "epoch": 0.8394659425209323, "grad_norm": 0.5963827788955498, "learning_rate": 6.611047428160954e-07, "loss": 0.3023, "step": 18548 }, { "epoch": 0.8395112016293279, "grad_norm": 0.5959170023377508, "learning_rate": 6.60740564173204e-07, "loss": 0.2707, "step": 18549 }, { "epoch": 0.8395564607377235, "grad_norm": 0.25713733466545213, "learning_rate": 6.603764787676703e-07, "loss": 0.4457, "step": 18550 }, { "epoch": 0.839601719846119, "grad_norm": 0.38063115712027656, "learning_rate": 6.600124866073199e-07, "loss": 0.4678, "step": 18551 }, { "epoch": 0.8396469789545146, "grad_norm": 0.5948071627450263, "learning_rate": 6.596485876999714e-07, "loss": 0.3188, "step": 18552 }, { "epoch": 0.8396922380629102, "grad_norm": 0.5947740569630174, "learning_rate": 6.592847820534432e-07, "loss": 0.2585, "step": 18553 }, { "epoch": 0.8397374971713057, "grad_norm": 0.26618875327704994, "learning_rate": 6.589210696755549e-07, "loss": 0.4601, "step": 18554 }, { "epoch": 0.8397827562797013, "grad_norm": 0.3881434164089876, "learning_rate": 6.585574505741188e-07, "loss": 0.4836, "step": 18555 }, { "epoch": 0.8398280153880968, "grad_norm": 0.6202801403288642, "learning_rate": 6.581939247569508e-07, "loss": 0.2967, "step": 18556 }, { "epoch": 0.8398732744964924, "grad_norm": 0.5730519871310629, "learning_rate": 6.578304922318607e-07, "loss": 0.2491, "step": 18557 }, { "epoch": 0.839918533604888, "grad_norm": 0.6228626600596571, "learning_rate": 6.574671530066557e-07, "loss": 0.3151, "step": 18558 }, { "epoch": 0.8399637927132836, "grad_norm": 0.28480024563607853, "learning_rate": 6.571039070891449e-07, "loss": 0.4816, "step": 18559 }, { "epoch": 0.8400090518216791, "grad_norm": 0.7009703026905671, "learning_rate": 6.567407544871341e-07, "loss": 0.2483, "step": 18560 }, { "epoch": 0.8400543109300747, "grad_norm": 0.6737484038016858, "learning_rate": 6.56377695208425e-07, "loss": 0.2819, "step": 18561 }, { "epoch": 0.8400995700384702, "grad_norm": 0.6495359229581547, "learning_rate": 6.560147292608177e-07, "loss": 0.3284, "step": 18562 }, { "epoch": 0.8401448291468658, "grad_norm": 0.7227420956022628, "learning_rate": 6.556518566521125e-07, "loss": 0.2784, "step": 18563 }, { "epoch": 0.8401900882552614, "grad_norm": 0.2921568050989983, "learning_rate": 6.552890773901083e-07, "loss": 0.4648, "step": 18564 }, { "epoch": 0.8402353473636569, "grad_norm": 0.9524537477492414, "learning_rate": 6.54926391482596e-07, "loss": 0.2818, "step": 18565 }, { "epoch": 0.8402806064720525, "grad_norm": 0.5970172164361539, "learning_rate": 6.545637989373704e-07, "loss": 0.3514, "step": 18566 }, { "epoch": 0.8403258655804481, "grad_norm": 0.6591626744459652, "learning_rate": 6.542012997622238e-07, "loss": 0.3053, "step": 18567 }, { "epoch": 0.8403711246888437, "grad_norm": 0.6245199254392384, "learning_rate": 6.538388939649442e-07, "loss": 0.263, "step": 18568 }, { "epoch": 0.8404163837972392, "grad_norm": 0.6100214864717944, "learning_rate": 6.534765815533179e-07, "loss": 0.2426, "step": 18569 }, { "epoch": 0.8404616429056347, "grad_norm": 0.6087616477739742, "learning_rate": 6.531143625351316e-07, "loss": 0.2755, "step": 18570 }, { "epoch": 0.8405069020140303, "grad_norm": 0.5870832214071411, "learning_rate": 6.527522369181655e-07, "loss": 0.2807, "step": 18571 }, { "epoch": 0.8405521611224259, "grad_norm": 0.26530881471943285, "learning_rate": 6.523902047102038e-07, "loss": 0.4648, "step": 18572 }, { "epoch": 0.8405974202308214, "grad_norm": 0.6748885856919552, "learning_rate": 6.520282659190241e-07, "loss": 0.333, "step": 18573 }, { "epoch": 0.840642679339217, "grad_norm": 0.6525134433216766, "learning_rate": 6.516664205524021e-07, "loss": 0.3334, "step": 18574 }, { "epoch": 0.8406879384476126, "grad_norm": 0.626439705015834, "learning_rate": 6.513046686181135e-07, "loss": 0.3059, "step": 18575 }, { "epoch": 0.8407331975560082, "grad_norm": 0.6414577042974596, "learning_rate": 6.509430101239328e-07, "loss": 0.3226, "step": 18576 }, { "epoch": 0.8407784566644038, "grad_norm": 0.6366298350867605, "learning_rate": 6.505814450776299e-07, "loss": 0.3111, "step": 18577 }, { "epoch": 0.8408237157727992, "grad_norm": 0.6135687883135332, "learning_rate": 6.502199734869718e-07, "loss": 0.3032, "step": 18578 }, { "epoch": 0.8408689748811948, "grad_norm": 0.6618915855858729, "learning_rate": 6.498585953597275e-07, "loss": 0.2993, "step": 18579 }, { "epoch": 0.8409142339895904, "grad_norm": 0.5810166039177667, "learning_rate": 6.494973107036628e-07, "loss": 0.2969, "step": 18580 }, { "epoch": 0.840959493097986, "grad_norm": 0.6190871177789631, "learning_rate": 6.491361195265394e-07, "loss": 0.2933, "step": 18581 }, { "epoch": 0.8410047522063815, "grad_norm": 0.571417004000721, "learning_rate": 6.487750218361172e-07, "loss": 0.2754, "step": 18582 }, { "epoch": 0.8410500113147771, "grad_norm": 0.26255393521512566, "learning_rate": 6.484140176401565e-07, "loss": 0.45, "step": 18583 }, { "epoch": 0.8410952704231727, "grad_norm": 0.5854899811507577, "learning_rate": 6.48053106946413e-07, "loss": 0.2464, "step": 18584 }, { "epoch": 0.8411405295315683, "grad_norm": 0.685442932062727, "learning_rate": 6.476922897626431e-07, "loss": 0.2958, "step": 18585 }, { "epoch": 0.8411857886399637, "grad_norm": 0.25921140126472897, "learning_rate": 6.47331566096599e-07, "loss": 0.465, "step": 18586 }, { "epoch": 0.8412310477483593, "grad_norm": 0.6404645261086098, "learning_rate": 6.4697093595603e-07, "loss": 0.3098, "step": 18587 }, { "epoch": 0.8412763068567549, "grad_norm": 0.2909442563168596, "learning_rate": 6.466103993486866e-07, "loss": 0.4794, "step": 18588 }, { "epoch": 0.8413215659651505, "grad_norm": 0.6357938988339054, "learning_rate": 6.462499562823166e-07, "loss": 0.2952, "step": 18589 }, { "epoch": 0.8413668250735461, "grad_norm": 0.5806777395557357, "learning_rate": 6.45889606764663e-07, "loss": 0.2687, "step": 18590 }, { "epoch": 0.8414120841819416, "grad_norm": 0.2657439730935918, "learning_rate": 6.455293508034682e-07, "loss": 0.4687, "step": 18591 }, { "epoch": 0.8414573432903372, "grad_norm": 1.2369104990125643, "learning_rate": 6.451691884064737e-07, "loss": 0.2837, "step": 18592 }, { "epoch": 0.8415026023987328, "grad_norm": 0.5736522130921489, "learning_rate": 6.44809119581421e-07, "loss": 0.3056, "step": 18593 }, { "epoch": 0.8415478615071283, "grad_norm": 0.5971583599445661, "learning_rate": 6.444491443360423e-07, "loss": 0.2915, "step": 18594 }, { "epoch": 0.8415931206155238, "grad_norm": 0.26921985013098143, "learning_rate": 6.440892626780742e-07, "loss": 0.4945, "step": 18595 }, { "epoch": 0.8416383797239194, "grad_norm": 0.702059806929883, "learning_rate": 6.437294746152506e-07, "loss": 0.3517, "step": 18596 }, { "epoch": 0.841683638832315, "grad_norm": 0.6959560991057743, "learning_rate": 6.433697801553018e-07, "loss": 0.2978, "step": 18597 }, { "epoch": 0.8417288979407106, "grad_norm": 0.6424117920937337, "learning_rate": 6.430101793059545e-07, "loss": 0.3127, "step": 18598 }, { "epoch": 0.8417741570491062, "grad_norm": 0.6470073503248186, "learning_rate": 6.426506720749382e-07, "loss": 0.2602, "step": 18599 }, { "epoch": 0.8418194161575017, "grad_norm": 0.5961597104114085, "learning_rate": 6.422912584699753e-07, "loss": 0.2543, "step": 18600 }, { "epoch": 0.8418646752658973, "grad_norm": 0.6579959921697064, "learning_rate": 6.41931938498791e-07, "loss": 0.2955, "step": 18601 }, { "epoch": 0.8419099343742928, "grad_norm": 0.5739176031467433, "learning_rate": 6.415727121691029e-07, "loss": 0.2896, "step": 18602 }, { "epoch": 0.8419551934826884, "grad_norm": 0.7145701489626878, "learning_rate": 6.412135794886326e-07, "loss": 0.3284, "step": 18603 }, { "epoch": 0.8420004525910839, "grad_norm": 0.6603734675555657, "learning_rate": 6.408545404650945e-07, "loss": 0.3477, "step": 18604 }, { "epoch": 0.8420457116994795, "grad_norm": 0.60232612770229, "learning_rate": 6.404955951062058e-07, "loss": 0.3122, "step": 18605 }, { "epoch": 0.8420909708078751, "grad_norm": 0.5689447404173003, "learning_rate": 6.40136743419677e-07, "loss": 0.2737, "step": 18606 }, { "epoch": 0.8421362299162707, "grad_norm": 0.5942540051798831, "learning_rate": 6.39777985413218e-07, "loss": 0.2842, "step": 18607 }, { "epoch": 0.8421814890246662, "grad_norm": 0.5761743364454467, "learning_rate": 6.394193210945393e-07, "loss": 0.236, "step": 18608 }, { "epoch": 0.8422267481330618, "grad_norm": 0.6744896562351126, "learning_rate": 6.390607504713476e-07, "loss": 0.3192, "step": 18609 }, { "epoch": 0.8422720072414573, "grad_norm": 0.5559132233571377, "learning_rate": 6.387022735513465e-07, "loss": 0.2759, "step": 18610 }, { "epoch": 0.8423172663498529, "grad_norm": 0.5737130358392764, "learning_rate": 6.383438903422384e-07, "loss": 0.2971, "step": 18611 }, { "epoch": 0.8423625254582485, "grad_norm": 0.5801767481601431, "learning_rate": 6.379856008517249e-07, "loss": 0.3357, "step": 18612 }, { "epoch": 0.842407784566644, "grad_norm": 0.6083233227030705, "learning_rate": 6.376274050875031e-07, "loss": 0.302, "step": 18613 }, { "epoch": 0.8424530436750396, "grad_norm": 0.644686585245738, "learning_rate": 6.372693030572713e-07, "loss": 0.3155, "step": 18614 }, { "epoch": 0.8424983027834352, "grad_norm": 0.2862529176463453, "learning_rate": 6.369112947687228e-07, "loss": 0.5087, "step": 18615 }, { "epoch": 0.8425435618918308, "grad_norm": 0.6002537971676198, "learning_rate": 6.365533802295498e-07, "loss": 0.2779, "step": 18616 }, { "epoch": 0.8425888210002263, "grad_norm": 0.6794693559548842, "learning_rate": 6.361955594474434e-07, "loss": 0.2924, "step": 18617 }, { "epoch": 0.8426340801086218, "grad_norm": 0.6557389050187346, "learning_rate": 6.358378324300929e-07, "loss": 0.2633, "step": 18618 }, { "epoch": 0.8426793392170174, "grad_norm": 0.5913684893750959, "learning_rate": 6.354801991851839e-07, "loss": 0.2793, "step": 18619 }, { "epoch": 0.842724598325413, "grad_norm": 0.56507780537573, "learning_rate": 6.351226597203996e-07, "loss": 0.2813, "step": 18620 }, { "epoch": 0.8427698574338085, "grad_norm": 0.6543157854903586, "learning_rate": 6.347652140434235e-07, "loss": 0.3006, "step": 18621 }, { "epoch": 0.8428151165422041, "grad_norm": 0.27062195826958063, "learning_rate": 6.344078621619388e-07, "loss": 0.4696, "step": 18622 }, { "epoch": 0.8428603756505997, "grad_norm": 0.5956050072863, "learning_rate": 6.340506040836186e-07, "loss": 0.3229, "step": 18623 }, { "epoch": 0.8429056347589953, "grad_norm": 0.6072215620693544, "learning_rate": 6.336934398161421e-07, "loss": 0.2641, "step": 18624 }, { "epoch": 0.8429508938673909, "grad_norm": 0.6481129870782569, "learning_rate": 6.333363693671846e-07, "loss": 0.2568, "step": 18625 }, { "epoch": 0.8429961529757863, "grad_norm": 0.6048251027030022, "learning_rate": 6.329793927444178e-07, "loss": 0.311, "step": 18626 }, { "epoch": 0.8430414120841819, "grad_norm": 0.6737990008870608, "learning_rate": 6.3262250995551e-07, "loss": 0.3054, "step": 18627 }, { "epoch": 0.8430866711925775, "grad_norm": 0.27320233517049275, "learning_rate": 6.322657210081318e-07, "loss": 0.4757, "step": 18628 }, { "epoch": 0.8431319303009731, "grad_norm": 0.5951501062135266, "learning_rate": 6.319090259099486e-07, "loss": 0.2854, "step": 18629 }, { "epoch": 0.8431771894093686, "grad_norm": 0.6803703893987928, "learning_rate": 6.31552424668625e-07, "loss": 0.2763, "step": 18630 }, { "epoch": 0.8432224485177642, "grad_norm": 0.606461679888621, "learning_rate": 6.311959172918225e-07, "loss": 0.3098, "step": 18631 }, { "epoch": 0.8432677076261598, "grad_norm": 0.6402462679139579, "learning_rate": 6.308395037872034e-07, "loss": 0.3023, "step": 18632 }, { "epoch": 0.8433129667345554, "grad_norm": 0.6456806940445331, "learning_rate": 6.304831841624231e-07, "loss": 0.3131, "step": 18633 }, { "epoch": 0.843358225842951, "grad_norm": 0.6475293952371274, "learning_rate": 6.301269584251402e-07, "loss": 0.3454, "step": 18634 }, { "epoch": 0.8434034849513464, "grad_norm": 0.6217532563366727, "learning_rate": 6.297708265830083e-07, "loss": 0.3182, "step": 18635 }, { "epoch": 0.843448744059742, "grad_norm": 0.624619552227697, "learning_rate": 6.294147886436774e-07, "loss": 0.2799, "step": 18636 }, { "epoch": 0.8434940031681376, "grad_norm": 0.6023133975470476, "learning_rate": 6.290588446148005e-07, "loss": 0.2987, "step": 18637 }, { "epoch": 0.8435392622765332, "grad_norm": 0.6966724938516231, "learning_rate": 6.287029945040251e-07, "loss": 0.2694, "step": 18638 }, { "epoch": 0.8435845213849287, "grad_norm": 0.9611111517466708, "learning_rate": 6.28347238318997e-07, "loss": 0.2905, "step": 18639 }, { "epoch": 0.8436297804933243, "grad_norm": 0.6493403691536899, "learning_rate": 6.279915760673593e-07, "loss": 0.3332, "step": 18640 }, { "epoch": 0.8436750396017199, "grad_norm": 0.5719059711061655, "learning_rate": 6.276360077567556e-07, "loss": 0.286, "step": 18641 }, { "epoch": 0.8437202987101154, "grad_norm": 0.6307456637814162, "learning_rate": 6.27280533394825e-07, "loss": 0.3522, "step": 18642 }, { "epoch": 0.8437655578185109, "grad_norm": 0.6154099960134782, "learning_rate": 6.269251529892067e-07, "loss": 0.3227, "step": 18643 }, { "epoch": 0.8438108169269065, "grad_norm": 0.5971840758665974, "learning_rate": 6.265698665475362e-07, "loss": 0.2806, "step": 18644 }, { "epoch": 0.8438560760353021, "grad_norm": 0.5660815219881201, "learning_rate": 6.26214674077446e-07, "loss": 0.262, "step": 18645 }, { "epoch": 0.8439013351436977, "grad_norm": 0.5582881587328437, "learning_rate": 6.258595755865693e-07, "loss": 0.2644, "step": 18646 }, { "epoch": 0.8439465942520933, "grad_norm": 0.270000743015364, "learning_rate": 6.255045710825375e-07, "loss": 0.4854, "step": 18647 }, { "epoch": 0.8439918533604888, "grad_norm": 0.5930373376426961, "learning_rate": 6.251496605729773e-07, "loss": 0.2698, "step": 18648 }, { "epoch": 0.8440371124688844, "grad_norm": 0.2799876243783329, "learning_rate": 6.247948440655133e-07, "loss": 0.4542, "step": 18649 }, { "epoch": 0.8440823715772799, "grad_norm": 0.5833763261664848, "learning_rate": 6.244401215677709e-07, "loss": 0.2915, "step": 18650 }, { "epoch": 0.8441276306856755, "grad_norm": 0.6616761716049248, "learning_rate": 6.240854930873735e-07, "loss": 0.3143, "step": 18651 }, { "epoch": 0.844172889794071, "grad_norm": 0.5763756407201951, "learning_rate": 6.237309586319378e-07, "loss": 0.3434, "step": 18652 }, { "epoch": 0.8442181489024666, "grad_norm": 0.6994970687999154, "learning_rate": 6.233765182090829e-07, "loss": 0.2785, "step": 18653 }, { "epoch": 0.8442634080108622, "grad_norm": 0.5907761492683824, "learning_rate": 6.230221718264257e-07, "loss": 0.2635, "step": 18654 }, { "epoch": 0.8443086671192578, "grad_norm": 0.6022140883838824, "learning_rate": 6.226679194915791e-07, "loss": 0.3221, "step": 18655 }, { "epoch": 0.8443539262276533, "grad_norm": 0.6181478771378632, "learning_rate": 6.223137612121538e-07, "loss": 0.2902, "step": 18656 }, { "epoch": 0.8443991853360489, "grad_norm": 0.6570667306521998, "learning_rate": 6.219596969957619e-07, "loss": 0.2774, "step": 18657 }, { "epoch": 0.8444444444444444, "grad_norm": 0.26911455724039257, "learning_rate": 6.216057268500092e-07, "loss": 0.4801, "step": 18658 }, { "epoch": 0.84448970355284, "grad_norm": 0.28515342343215133, "learning_rate": 6.212518507825027e-07, "loss": 0.4859, "step": 18659 }, { "epoch": 0.8445349626612356, "grad_norm": 0.6390100496113705, "learning_rate": 6.208980688008453e-07, "loss": 0.3107, "step": 18660 }, { "epoch": 0.8445802217696311, "grad_norm": 0.6064606481601544, "learning_rate": 6.205443809126399e-07, "loss": 0.2957, "step": 18661 }, { "epoch": 0.8446254808780267, "grad_norm": 0.6515192999358557, "learning_rate": 6.201907871254836e-07, "loss": 0.3138, "step": 18662 }, { "epoch": 0.8446707399864223, "grad_norm": 0.5526030700262841, "learning_rate": 6.198372874469777e-07, "loss": 0.2815, "step": 18663 }, { "epoch": 0.8447159990948179, "grad_norm": 0.27052209067226685, "learning_rate": 6.194838818847155e-07, "loss": 0.4647, "step": 18664 }, { "epoch": 0.8447612582032133, "grad_norm": 0.6545133811559477, "learning_rate": 6.191305704462897e-07, "loss": 0.2587, "step": 18665 }, { "epoch": 0.8448065173116089, "grad_norm": 0.6538530551934313, "learning_rate": 6.187773531392932e-07, "loss": 0.3043, "step": 18666 }, { "epoch": 0.8448517764200045, "grad_norm": 0.640001109669132, "learning_rate": 6.184242299713162e-07, "loss": 0.282, "step": 18667 }, { "epoch": 0.8448970355284001, "grad_norm": 0.25687967698350045, "learning_rate": 6.180712009499462e-07, "loss": 0.4548, "step": 18668 }, { "epoch": 0.8449422946367957, "grad_norm": 0.27272353640801006, "learning_rate": 6.177182660827664e-07, "loss": 0.4615, "step": 18669 }, { "epoch": 0.8449875537451912, "grad_norm": 0.561871817929382, "learning_rate": 6.173654253773631e-07, "loss": 0.2885, "step": 18670 }, { "epoch": 0.8450328128535868, "grad_norm": 0.6008742399480839, "learning_rate": 6.170126788413156e-07, "loss": 0.2925, "step": 18671 }, { "epoch": 0.8450780719619824, "grad_norm": 0.6377610207407267, "learning_rate": 6.166600264822054e-07, "loss": 0.3027, "step": 18672 }, { "epoch": 0.845123331070378, "grad_norm": 0.625818270946855, "learning_rate": 6.163074683076081e-07, "loss": 0.2892, "step": 18673 }, { "epoch": 0.8451685901787734, "grad_norm": 0.8744926735664028, "learning_rate": 6.159550043251006e-07, "loss": 0.3326, "step": 18674 }, { "epoch": 0.845213849287169, "grad_norm": 0.6022902779539743, "learning_rate": 6.156026345422539e-07, "loss": 0.2761, "step": 18675 }, { "epoch": 0.8452591083955646, "grad_norm": 0.7967324838798232, "learning_rate": 6.152503589666426e-07, "loss": 0.2864, "step": 18676 }, { "epoch": 0.8453043675039602, "grad_norm": 0.7098888607833619, "learning_rate": 6.148981776058344e-07, "loss": 0.2881, "step": 18677 }, { "epoch": 0.8453496266123557, "grad_norm": 0.6371350015862445, "learning_rate": 6.14546090467395e-07, "loss": 0.2999, "step": 18678 }, { "epoch": 0.8453948857207513, "grad_norm": 0.649784256762448, "learning_rate": 6.141940975588917e-07, "loss": 0.312, "step": 18679 }, { "epoch": 0.8454401448291469, "grad_norm": 0.6282063633236151, "learning_rate": 6.138421988878884e-07, "loss": 0.2819, "step": 18680 }, { "epoch": 0.8454854039375425, "grad_norm": 0.600461964804082, "learning_rate": 6.134903944619447e-07, "loss": 0.2983, "step": 18681 }, { "epoch": 0.845530663045938, "grad_norm": 0.24963576277311014, "learning_rate": 6.131386842886194e-07, "loss": 0.4629, "step": 18682 }, { "epoch": 0.8455759221543335, "grad_norm": 0.6689409334850602, "learning_rate": 6.127870683754717e-07, "loss": 0.234, "step": 18683 }, { "epoch": 0.8456211812627291, "grad_norm": 0.671592567687221, "learning_rate": 6.124355467300558e-07, "loss": 0.3267, "step": 18684 }, { "epoch": 0.8456664403711247, "grad_norm": 0.6113351501273673, "learning_rate": 6.120841193599231e-07, "loss": 0.3232, "step": 18685 }, { "epoch": 0.8457116994795203, "grad_norm": 0.9509336152397326, "learning_rate": 6.11732786272628e-07, "loss": 0.3147, "step": 18686 }, { "epoch": 0.8457569585879158, "grad_norm": 0.6433261864621458, "learning_rate": 6.113815474757162e-07, "loss": 0.2984, "step": 18687 }, { "epoch": 0.8458022176963114, "grad_norm": 0.6026784560014598, "learning_rate": 6.110304029767372e-07, "loss": 0.3001, "step": 18688 }, { "epoch": 0.845847476804707, "grad_norm": 0.2685922247370795, "learning_rate": 6.106793527832344e-07, "loss": 0.4963, "step": 18689 }, { "epoch": 0.8458927359131025, "grad_norm": 0.26093778596229505, "learning_rate": 6.103283969027524e-07, "loss": 0.4648, "step": 18690 }, { "epoch": 0.845937995021498, "grad_norm": 0.6104503654019943, "learning_rate": 6.099775353428306e-07, "loss": 0.2752, "step": 18691 }, { "epoch": 0.8459832541298936, "grad_norm": 0.6807608671938857, "learning_rate": 6.096267681110097e-07, "loss": 0.2844, "step": 18692 }, { "epoch": 0.8460285132382892, "grad_norm": 0.27051557821575795, "learning_rate": 6.092760952148253e-07, "loss": 0.4885, "step": 18693 }, { "epoch": 0.8460737723466848, "grad_norm": 0.26212738678058434, "learning_rate": 6.089255166618113e-07, "loss": 0.4542, "step": 18694 }, { "epoch": 0.8461190314550804, "grad_norm": 0.57817303041535, "learning_rate": 6.085750324595019e-07, "loss": 0.3503, "step": 18695 }, { "epoch": 0.8461642905634759, "grad_norm": 0.8624097034013176, "learning_rate": 6.082246426154292e-07, "loss": 0.2785, "step": 18696 }, { "epoch": 0.8462095496718715, "grad_norm": 0.6297980038757982, "learning_rate": 6.078743471371207e-07, "loss": 0.3429, "step": 18697 }, { "epoch": 0.846254808780267, "grad_norm": 0.28655257814501794, "learning_rate": 6.075241460321013e-07, "loss": 0.4822, "step": 18698 }, { "epoch": 0.8463000678886626, "grad_norm": 0.6108057426107879, "learning_rate": 6.071740393078995e-07, "loss": 0.3127, "step": 18699 }, { "epoch": 0.8463453269970581, "grad_norm": 0.5917538247552456, "learning_rate": 6.068240269720343e-07, "loss": 0.2779, "step": 18700 }, { "epoch": 0.8463905861054537, "grad_norm": 0.5479848011861002, "learning_rate": 6.064741090320297e-07, "loss": 0.264, "step": 18701 }, { "epoch": 0.8464358452138493, "grad_norm": 0.5713766082902766, "learning_rate": 6.061242854954014e-07, "loss": 0.302, "step": 18702 }, { "epoch": 0.8464811043222449, "grad_norm": 0.641789067864487, "learning_rate": 6.057745563696688e-07, "loss": 0.334, "step": 18703 }, { "epoch": 0.8465263634306405, "grad_norm": 0.6392789934629359, "learning_rate": 6.054249216623437e-07, "loss": 0.3085, "step": 18704 }, { "epoch": 0.846571622539036, "grad_norm": 0.6277894109142051, "learning_rate": 6.050753813809412e-07, "loss": 0.2877, "step": 18705 }, { "epoch": 0.8466168816474315, "grad_norm": 0.6102758916483524, "learning_rate": 6.04725935532971e-07, "loss": 0.2878, "step": 18706 }, { "epoch": 0.8466621407558271, "grad_norm": 0.5987516515555169, "learning_rate": 6.043765841259402e-07, "loss": 0.2652, "step": 18707 }, { "epoch": 0.8467073998642227, "grad_norm": 0.6540286604005203, "learning_rate": 6.040273271673569e-07, "loss": 0.2849, "step": 18708 }, { "epoch": 0.8467526589726182, "grad_norm": 0.2626634167184486, "learning_rate": 6.036781646647261e-07, "loss": 0.4783, "step": 18709 }, { "epoch": 0.8467979180810138, "grad_norm": 0.2624905961997942, "learning_rate": 6.03329096625549e-07, "loss": 0.4351, "step": 18710 }, { "epoch": 0.8468431771894094, "grad_norm": 0.665480812530786, "learning_rate": 6.029801230573252e-07, "loss": 0.2784, "step": 18711 }, { "epoch": 0.846888436297805, "grad_norm": 0.25667545942246284, "learning_rate": 6.026312439675553e-07, "loss": 0.4464, "step": 18712 }, { "epoch": 0.8469336954062004, "grad_norm": 0.6136753121244453, "learning_rate": 6.022824593637334e-07, "loss": 0.3045, "step": 18713 }, { "epoch": 0.846978954514596, "grad_norm": 0.5794067426638368, "learning_rate": 6.019337692533556e-07, "loss": 0.2807, "step": 18714 }, { "epoch": 0.8470242136229916, "grad_norm": 0.5829253591861213, "learning_rate": 6.015851736439138e-07, "loss": 0.2631, "step": 18715 }, { "epoch": 0.8470694727313872, "grad_norm": 0.5811772762809982, "learning_rate": 6.01236672542897e-07, "loss": 0.2638, "step": 18716 }, { "epoch": 0.8471147318397828, "grad_norm": 0.606423734682036, "learning_rate": 6.008882659577942e-07, "loss": 0.2473, "step": 18717 }, { "epoch": 0.8471599909481783, "grad_norm": 0.6286834947259153, "learning_rate": 6.005399538960927e-07, "loss": 0.2999, "step": 18718 }, { "epoch": 0.8472052500565739, "grad_norm": 0.6016500099893688, "learning_rate": 6.001917363652759e-07, "loss": 0.2997, "step": 18719 }, { "epoch": 0.8472505091649695, "grad_norm": 0.8563674664688892, "learning_rate": 5.998436133728247e-07, "loss": 0.2978, "step": 18720 }, { "epoch": 0.8472957682733651, "grad_norm": 0.6109050842316257, "learning_rate": 5.994955849262207e-07, "loss": 0.3001, "step": 18721 }, { "epoch": 0.8473410273817605, "grad_norm": 0.6730770713324719, "learning_rate": 5.991476510329419e-07, "loss": 0.2903, "step": 18722 }, { "epoch": 0.8473862864901561, "grad_norm": 0.6298080817289243, "learning_rate": 5.987998117004628e-07, "loss": 0.266, "step": 18723 }, { "epoch": 0.8474315455985517, "grad_norm": 0.6843953732002055, "learning_rate": 5.984520669362587e-07, "loss": 0.2539, "step": 18724 }, { "epoch": 0.8474768047069473, "grad_norm": 0.6452454414382873, "learning_rate": 5.981044167478017e-07, "loss": 0.3277, "step": 18725 }, { "epoch": 0.8475220638153428, "grad_norm": 0.6321163182383871, "learning_rate": 5.977568611425621e-07, "loss": 0.2997, "step": 18726 }, { "epoch": 0.8475673229237384, "grad_norm": 0.6705720958675141, "learning_rate": 5.974094001280056e-07, "loss": 0.3082, "step": 18727 }, { "epoch": 0.847612582032134, "grad_norm": 0.7455710756474468, "learning_rate": 5.970620337116012e-07, "loss": 0.3023, "step": 18728 }, { "epoch": 0.8476578411405296, "grad_norm": 1.577587002978819, "learning_rate": 5.967147619008096e-07, "loss": 0.2658, "step": 18729 }, { "epoch": 0.8477031002489251, "grad_norm": 0.6551931720234587, "learning_rate": 5.963675847030953e-07, "loss": 0.2914, "step": 18730 }, { "epoch": 0.8477483593573206, "grad_norm": 0.6306636264131432, "learning_rate": 5.960205021259158e-07, "loss": 0.2945, "step": 18731 }, { "epoch": 0.8477936184657162, "grad_norm": 0.6513955703366753, "learning_rate": 5.956735141767306e-07, "loss": 0.3369, "step": 18732 }, { "epoch": 0.8478388775741118, "grad_norm": 0.6427932218936455, "learning_rate": 5.953266208629943e-07, "loss": 0.2704, "step": 18733 }, { "epoch": 0.8478841366825074, "grad_norm": 0.6024984714834914, "learning_rate": 5.949798221921616e-07, "loss": 0.2858, "step": 18734 }, { "epoch": 0.8479293957909029, "grad_norm": 0.6340217838797076, "learning_rate": 5.946331181716836e-07, "loss": 0.2667, "step": 18735 }, { "epoch": 0.8479746548992985, "grad_norm": 0.6800193111456344, "learning_rate": 5.942865088090088e-07, "loss": 0.2957, "step": 18736 }, { "epoch": 0.848019914007694, "grad_norm": 0.6755842385766843, "learning_rate": 5.939399941115859e-07, "loss": 0.2763, "step": 18737 }, { "epoch": 0.8480651731160896, "grad_norm": 0.5660382128890884, "learning_rate": 5.935935740868614e-07, "loss": 0.3231, "step": 18738 }, { "epoch": 0.8481104322244851, "grad_norm": 0.8721972221487893, "learning_rate": 5.93247248742278e-07, "loss": 0.289, "step": 18739 }, { "epoch": 0.8481556913328807, "grad_norm": 0.5421885890616007, "learning_rate": 5.929010180852756e-07, "loss": 0.2935, "step": 18740 }, { "epoch": 0.8482009504412763, "grad_norm": 0.2620228273329047, "learning_rate": 5.925548821232957e-07, "loss": 0.4406, "step": 18741 }, { "epoch": 0.8482462095496719, "grad_norm": 0.7154912165594549, "learning_rate": 5.922088408637743e-07, "loss": 0.2586, "step": 18742 }, { "epoch": 0.8482914686580675, "grad_norm": 0.5712037351530368, "learning_rate": 5.918628943141486e-07, "loss": 0.2971, "step": 18743 }, { "epoch": 0.848336727766463, "grad_norm": 0.6005964579283544, "learning_rate": 5.915170424818495e-07, "loss": 0.2986, "step": 18744 }, { "epoch": 0.8483819868748586, "grad_norm": 0.6268217753093963, "learning_rate": 5.911712853743101e-07, "loss": 0.3353, "step": 18745 }, { "epoch": 0.8484272459832541, "grad_norm": 0.6444851868878317, "learning_rate": 5.90825622998959e-07, "loss": 0.2985, "step": 18746 }, { "epoch": 0.8484725050916497, "grad_norm": 0.619428402824257, "learning_rate": 5.90480055363224e-07, "loss": 0.3326, "step": 18747 }, { "epoch": 0.8485177642000452, "grad_norm": 0.5733968766422702, "learning_rate": 5.901345824745297e-07, "loss": 0.2932, "step": 18748 }, { "epoch": 0.8485630233084408, "grad_norm": 0.6356372521086717, "learning_rate": 5.897892043402986e-07, "loss": 0.297, "step": 18749 }, { "epoch": 0.8486082824168364, "grad_norm": 0.7481215559015781, "learning_rate": 5.89443920967952e-07, "loss": 0.2889, "step": 18750 }, { "epoch": 0.848653541525232, "grad_norm": 0.7661646171965207, "learning_rate": 5.890987323649122e-07, "loss": 0.2871, "step": 18751 }, { "epoch": 0.8486988006336276, "grad_norm": 0.565257020815254, "learning_rate": 5.887536385385917e-07, "loss": 0.2719, "step": 18752 }, { "epoch": 0.848744059742023, "grad_norm": 0.2543059215630452, "learning_rate": 5.884086394964067e-07, "loss": 0.4449, "step": 18753 }, { "epoch": 0.8487893188504186, "grad_norm": 0.6522712431560104, "learning_rate": 5.880637352457724e-07, "loss": 0.2935, "step": 18754 }, { "epoch": 0.8488345779588142, "grad_norm": 0.7678483583609577, "learning_rate": 5.87718925794098e-07, "loss": 0.2842, "step": 18755 }, { "epoch": 0.8488798370672098, "grad_norm": 0.8928166842517539, "learning_rate": 5.873742111487917e-07, "loss": 0.2841, "step": 18756 }, { "epoch": 0.8489250961756053, "grad_norm": 0.5877417914147925, "learning_rate": 5.870295913172625e-07, "loss": 0.2586, "step": 18757 }, { "epoch": 0.8489703552840009, "grad_norm": 0.6971168951778712, "learning_rate": 5.866850663069124e-07, "loss": 0.2773, "step": 18758 }, { "epoch": 0.8490156143923965, "grad_norm": 0.7007595502752746, "learning_rate": 5.863406361251472e-07, "loss": 0.3014, "step": 18759 }, { "epoch": 0.8490608735007921, "grad_norm": 0.6213943742869298, "learning_rate": 5.859963007793651e-07, "loss": 0.3526, "step": 18760 }, { "epoch": 0.8491061326091875, "grad_norm": 0.5838266017627748, "learning_rate": 5.856520602769667e-07, "loss": 0.2767, "step": 18761 }, { "epoch": 0.8491513917175831, "grad_norm": 0.5926360558327918, "learning_rate": 5.853079146253471e-07, "loss": 0.2932, "step": 18762 }, { "epoch": 0.8491966508259787, "grad_norm": 0.5563599425447716, "learning_rate": 5.849638638319027e-07, "loss": 0.2734, "step": 18763 }, { "epoch": 0.8492419099343743, "grad_norm": 0.2510181119494583, "learning_rate": 5.846199079040249e-07, "loss": 0.4497, "step": 18764 }, { "epoch": 0.8492871690427699, "grad_norm": 0.6454068837621051, "learning_rate": 5.842760468491037e-07, "loss": 0.3179, "step": 18765 }, { "epoch": 0.8493324281511654, "grad_norm": 0.6272581164047886, "learning_rate": 5.839322806745285e-07, "loss": 0.3043, "step": 18766 }, { "epoch": 0.849377687259561, "grad_norm": 0.654346951581988, "learning_rate": 5.835886093876863e-07, "loss": 0.2909, "step": 18767 }, { "epoch": 0.8494229463679566, "grad_norm": 0.5879851108229344, "learning_rate": 5.832450329959616e-07, "loss": 0.3006, "step": 18768 }, { "epoch": 0.8494682054763522, "grad_norm": 0.7232430310009791, "learning_rate": 5.829015515067344e-07, "loss": 0.3012, "step": 18769 }, { "epoch": 0.8495134645847476, "grad_norm": 0.6953028335043215, "learning_rate": 5.825581649273881e-07, "loss": 0.2543, "step": 18770 }, { "epoch": 0.8495587236931432, "grad_norm": 0.6051757432812306, "learning_rate": 5.822148732652988e-07, "loss": 0.2915, "step": 18771 }, { "epoch": 0.8496039828015388, "grad_norm": 0.5621090053762571, "learning_rate": 5.818716765278443e-07, "loss": 0.2894, "step": 18772 }, { "epoch": 0.8496492419099344, "grad_norm": 0.6685361801495828, "learning_rate": 5.815285747223975e-07, "loss": 0.3113, "step": 18773 }, { "epoch": 0.8496945010183299, "grad_norm": 0.6262082362319686, "learning_rate": 5.811855678563322e-07, "loss": 0.3037, "step": 18774 }, { "epoch": 0.8497397601267255, "grad_norm": 0.27065950072618833, "learning_rate": 5.808426559370172e-07, "loss": 0.4543, "step": 18775 }, { "epoch": 0.8497850192351211, "grad_norm": 0.6036759949570691, "learning_rate": 5.804998389718214e-07, "loss": 0.3205, "step": 18776 }, { "epoch": 0.8498302783435167, "grad_norm": 0.5681712093767958, "learning_rate": 5.801571169681108e-07, "loss": 0.2772, "step": 18777 }, { "epoch": 0.8498755374519122, "grad_norm": 0.5971504050818677, "learning_rate": 5.798144899332486e-07, "loss": 0.2948, "step": 18778 }, { "epoch": 0.8499207965603077, "grad_norm": 0.7151176048313252, "learning_rate": 5.794719578745972e-07, "loss": 0.2643, "step": 18779 }, { "epoch": 0.8499660556687033, "grad_norm": 0.684751460375087, "learning_rate": 5.79129520799519e-07, "loss": 0.299, "step": 18780 }, { "epoch": 0.8500113147770989, "grad_norm": 0.2779298704574024, "learning_rate": 5.787871787153676e-07, "loss": 0.4719, "step": 18781 }, { "epoch": 0.8500565738854945, "grad_norm": 0.599828675778426, "learning_rate": 5.784449316295005e-07, "loss": 0.306, "step": 18782 }, { "epoch": 0.85010183299389, "grad_norm": 0.6811163672881649, "learning_rate": 5.781027795492738e-07, "loss": 0.2583, "step": 18783 }, { "epoch": 0.8501470921022856, "grad_norm": 0.26523875058027263, "learning_rate": 5.77760722482037e-07, "loss": 0.4424, "step": 18784 }, { "epoch": 0.8501923512106812, "grad_norm": 0.5872002054895826, "learning_rate": 5.7741876043514e-07, "loss": 0.3334, "step": 18785 }, { "epoch": 0.8502376103190767, "grad_norm": 0.2891465936058678, "learning_rate": 5.770768934159315e-07, "loss": 0.4533, "step": 18786 }, { "epoch": 0.8502828694274723, "grad_norm": 0.27265696093260977, "learning_rate": 5.767351214317557e-07, "loss": 0.46, "step": 18787 }, { "epoch": 0.8503281285358678, "grad_norm": 0.606245514986393, "learning_rate": 5.763934444899577e-07, "loss": 0.2617, "step": 18788 }, { "epoch": 0.8503733876442634, "grad_norm": 0.5748858767760663, "learning_rate": 5.760518625978778e-07, "loss": 0.358, "step": 18789 }, { "epoch": 0.850418646752659, "grad_norm": 0.6154813445986127, "learning_rate": 5.757103757628573e-07, "loss": 0.28, "step": 18790 }, { "epoch": 0.8504639058610546, "grad_norm": 0.5935345821580245, "learning_rate": 5.753689839922321e-07, "loss": 0.2952, "step": 18791 }, { "epoch": 0.8505091649694501, "grad_norm": 0.6015336243939349, "learning_rate": 5.750276872933386e-07, "loss": 0.3154, "step": 18792 }, { "epoch": 0.8505544240778456, "grad_norm": 0.6187644617250292, "learning_rate": 5.746864856735102e-07, "loss": 0.3443, "step": 18793 }, { "epoch": 0.8505996831862412, "grad_norm": 0.24585577370749073, "learning_rate": 5.743453791400766e-07, "loss": 0.461, "step": 18794 }, { "epoch": 0.8506449422946368, "grad_norm": 0.6568526354093772, "learning_rate": 5.740043677003688e-07, "loss": 0.2903, "step": 18795 }, { "epoch": 0.8506902014030323, "grad_norm": 0.6077793753624469, "learning_rate": 5.736634513617145e-07, "loss": 0.2996, "step": 18796 }, { "epoch": 0.8507354605114279, "grad_norm": 0.27141939267184767, "learning_rate": 5.733226301314381e-07, "loss": 0.4758, "step": 18797 }, { "epoch": 0.8507807196198235, "grad_norm": 0.2860800943684531, "learning_rate": 5.729819040168622e-07, "loss": 0.4898, "step": 18798 }, { "epoch": 0.8508259787282191, "grad_norm": 0.2722142642852313, "learning_rate": 5.72641273025309e-07, "loss": 0.4675, "step": 18799 }, { "epoch": 0.8508712378366147, "grad_norm": 0.6485039319914844, "learning_rate": 5.723007371640965e-07, "loss": 0.317, "step": 18800 }, { "epoch": 0.8509164969450101, "grad_norm": 0.6123542113321424, "learning_rate": 5.719602964405441e-07, "loss": 0.2897, "step": 18801 }, { "epoch": 0.8509617560534057, "grad_norm": 0.6664532371643351, "learning_rate": 5.716199508619635e-07, "loss": 0.2893, "step": 18802 }, { "epoch": 0.8510070151618013, "grad_norm": 0.6345223426266785, "learning_rate": 5.712797004356707e-07, "loss": 0.3076, "step": 18803 }, { "epoch": 0.8510522742701969, "grad_norm": 0.6016173433152902, "learning_rate": 5.709395451689748e-07, "loss": 0.2654, "step": 18804 }, { "epoch": 0.8510975333785924, "grad_norm": 0.5833362501878927, "learning_rate": 5.705994850691854e-07, "loss": 0.2816, "step": 18805 }, { "epoch": 0.851142792486988, "grad_norm": 0.6211038318307641, "learning_rate": 5.702595201436101e-07, "loss": 0.3097, "step": 18806 }, { "epoch": 0.8511880515953836, "grad_norm": 0.6724082872979813, "learning_rate": 5.699196503995513e-07, "loss": 0.3061, "step": 18807 }, { "epoch": 0.8512333107037792, "grad_norm": 0.6125127055127121, "learning_rate": 5.695798758443133e-07, "loss": 0.3068, "step": 18808 }, { "epoch": 0.8512785698121746, "grad_norm": 0.656838136626938, "learning_rate": 5.692401964851985e-07, "loss": 0.2419, "step": 18809 }, { "epoch": 0.8513238289205702, "grad_norm": 0.5754169823983257, "learning_rate": 5.689006123295021e-07, "loss": 0.3015, "step": 18810 }, { "epoch": 0.8513690880289658, "grad_norm": 0.6049474580450107, "learning_rate": 5.685611233845228e-07, "loss": 0.2808, "step": 18811 }, { "epoch": 0.8514143471373614, "grad_norm": 0.6100428373955716, "learning_rate": 5.682217296575554e-07, "loss": 0.3118, "step": 18812 }, { "epoch": 0.851459606245757, "grad_norm": 0.6017512056907653, "learning_rate": 5.678824311558923e-07, "loss": 0.3305, "step": 18813 }, { "epoch": 0.8515048653541525, "grad_norm": 0.6388400015965405, "learning_rate": 5.675432278868221e-07, "loss": 0.3152, "step": 18814 }, { "epoch": 0.8515501244625481, "grad_norm": 0.5639933085295421, "learning_rate": 5.672041198576345e-07, "loss": 0.2422, "step": 18815 }, { "epoch": 0.8515953835709437, "grad_norm": 0.27213150733841973, "learning_rate": 5.668651070756176e-07, "loss": 0.4692, "step": 18816 }, { "epoch": 0.8516406426793393, "grad_norm": 0.6384639020043845, "learning_rate": 5.66526189548054e-07, "loss": 0.3034, "step": 18817 }, { "epoch": 0.8516859017877347, "grad_norm": 0.6115731159495899, "learning_rate": 5.661873672822249e-07, "loss": 0.2785, "step": 18818 }, { "epoch": 0.8517311608961303, "grad_norm": 0.6277971478222045, "learning_rate": 5.658486402854136e-07, "loss": 0.2857, "step": 18819 }, { "epoch": 0.8517764200045259, "grad_norm": 0.6595635277979729, "learning_rate": 5.655100085648945e-07, "loss": 0.3337, "step": 18820 }, { "epoch": 0.8518216791129215, "grad_norm": 0.27716929417854275, "learning_rate": 5.651714721279478e-07, "loss": 0.4378, "step": 18821 }, { "epoch": 0.8518669382213171, "grad_norm": 0.6675023621782427, "learning_rate": 5.648330309818451e-07, "loss": 0.3588, "step": 18822 }, { "epoch": 0.8519121973297126, "grad_norm": 0.6199954797671143, "learning_rate": 5.644946851338584e-07, "loss": 0.2927, "step": 18823 }, { "epoch": 0.8519574564381082, "grad_norm": 0.6273237859124571, "learning_rate": 5.641564345912581e-07, "loss": 0.3057, "step": 18824 }, { "epoch": 0.8520027155465038, "grad_norm": 0.6171373265787468, "learning_rate": 5.638182793613134e-07, "loss": 0.2555, "step": 18825 }, { "epoch": 0.8520479746548993, "grad_norm": 0.2743814316281519, "learning_rate": 5.634802194512889e-07, "loss": 0.4488, "step": 18826 }, { "epoch": 0.8520932337632948, "grad_norm": 0.6320920857305001, "learning_rate": 5.631422548684479e-07, "loss": 0.2773, "step": 18827 }, { "epoch": 0.8521384928716904, "grad_norm": 0.5809184334187234, "learning_rate": 5.628043856200543e-07, "loss": 0.269, "step": 18828 }, { "epoch": 0.852183751980086, "grad_norm": 0.636732723488326, "learning_rate": 5.624666117133653e-07, "loss": 0.2843, "step": 18829 }, { "epoch": 0.8522290110884816, "grad_norm": 0.5666407427293152, "learning_rate": 5.621289331556413e-07, "loss": 0.2933, "step": 18830 }, { "epoch": 0.8522742701968771, "grad_norm": 0.6216220501351425, "learning_rate": 5.617913499541355e-07, "loss": 0.3025, "step": 18831 }, { "epoch": 0.8523195293052727, "grad_norm": 0.6165932173152734, "learning_rate": 5.614538621161036e-07, "loss": 0.2516, "step": 18832 }, { "epoch": 0.8523647884136683, "grad_norm": 0.5415014831368999, "learning_rate": 5.611164696487953e-07, "loss": 0.2596, "step": 18833 }, { "epoch": 0.8524100475220638, "grad_norm": 0.6254587466014427, "learning_rate": 5.607791725594619e-07, "loss": 0.2992, "step": 18834 }, { "epoch": 0.8524553066304594, "grad_norm": 0.2603911234479618, "learning_rate": 5.604419708553504e-07, "loss": 0.4804, "step": 18835 }, { "epoch": 0.8525005657388549, "grad_norm": 0.5863409592256777, "learning_rate": 5.601048645437046e-07, "loss": 0.2681, "step": 18836 }, { "epoch": 0.8525458248472505, "grad_norm": 0.5850552431665895, "learning_rate": 5.597678536317697e-07, "loss": 0.3173, "step": 18837 }, { "epoch": 0.8525910839556461, "grad_norm": 0.6171999858745233, "learning_rate": 5.594309381267882e-07, "loss": 0.2922, "step": 18838 }, { "epoch": 0.8526363430640417, "grad_norm": 0.6624620574115613, "learning_rate": 5.590941180359954e-07, "loss": 0.3304, "step": 18839 }, { "epoch": 0.8526816021724372, "grad_norm": 0.5769346122637378, "learning_rate": 5.587573933666307e-07, "loss": 0.282, "step": 18840 }, { "epoch": 0.8527268612808327, "grad_norm": 0.5673563829975743, "learning_rate": 5.584207641259309e-07, "loss": 0.2493, "step": 18841 }, { "epoch": 0.8527721203892283, "grad_norm": 0.2607286583379762, "learning_rate": 5.580842303211275e-07, "loss": 0.4406, "step": 18842 }, { "epoch": 0.8528173794976239, "grad_norm": 0.6296153721567521, "learning_rate": 5.577477919594504e-07, "loss": 0.302, "step": 18843 }, { "epoch": 0.8528626386060194, "grad_norm": 0.599328037123013, "learning_rate": 5.574114490481303e-07, "loss": 0.3279, "step": 18844 }, { "epoch": 0.852907897714415, "grad_norm": 0.6831492461060868, "learning_rate": 5.570752015943942e-07, "loss": 0.3385, "step": 18845 }, { "epoch": 0.8529531568228106, "grad_norm": 0.5819428657972001, "learning_rate": 5.56739049605467e-07, "loss": 0.2928, "step": 18846 }, { "epoch": 0.8529984159312062, "grad_norm": 0.5730264752763495, "learning_rate": 5.5640299308857e-07, "loss": 0.2754, "step": 18847 }, { "epoch": 0.8530436750396018, "grad_norm": 0.5948009048473426, "learning_rate": 5.560670320509265e-07, "loss": 0.3011, "step": 18848 }, { "epoch": 0.8530889341479972, "grad_norm": 0.6239783490017728, "learning_rate": 5.557311664997528e-07, "loss": 0.265, "step": 18849 }, { "epoch": 0.8531341932563928, "grad_norm": 0.6317259979875175, "learning_rate": 5.553953964422681e-07, "loss": 0.287, "step": 18850 }, { "epoch": 0.8531794523647884, "grad_norm": 0.26053981077165644, "learning_rate": 5.550597218856857e-07, "loss": 0.4627, "step": 18851 }, { "epoch": 0.853224711473184, "grad_norm": 0.5959948035580483, "learning_rate": 5.547241428372169e-07, "loss": 0.2711, "step": 18852 }, { "epoch": 0.8532699705815795, "grad_norm": 0.605770568665207, "learning_rate": 5.543886593040737e-07, "loss": 0.2901, "step": 18853 }, { "epoch": 0.8533152296899751, "grad_norm": 0.6075204232134758, "learning_rate": 5.54053271293466e-07, "loss": 0.2762, "step": 18854 }, { "epoch": 0.8533604887983707, "grad_norm": 0.6230506570272137, "learning_rate": 5.537179788125985e-07, "loss": 0.2638, "step": 18855 }, { "epoch": 0.8534057479067663, "grad_norm": 0.644161196347823, "learning_rate": 5.533827818686749e-07, "loss": 0.2977, "step": 18856 }, { "epoch": 0.8534510070151619, "grad_norm": 0.2786588955899518, "learning_rate": 5.530476804688994e-07, "loss": 0.4676, "step": 18857 }, { "epoch": 0.8534962661235573, "grad_norm": 0.6168369356365765, "learning_rate": 5.527126746204708e-07, "loss": 0.2872, "step": 18858 }, { "epoch": 0.8535415252319529, "grad_norm": 0.568447828785355, "learning_rate": 5.523777643305888e-07, "loss": 0.2992, "step": 18859 }, { "epoch": 0.8535867843403485, "grad_norm": 0.6224121972005846, "learning_rate": 5.520429496064483e-07, "loss": 0.2535, "step": 18860 }, { "epoch": 0.8536320434487441, "grad_norm": 0.2702032240620627, "learning_rate": 5.517082304552446e-07, "loss": 0.4564, "step": 18861 }, { "epoch": 0.8536773025571396, "grad_norm": 0.6557325387237996, "learning_rate": 5.513736068841679e-07, "loss": 0.2937, "step": 18862 }, { "epoch": 0.8537225616655352, "grad_norm": 0.6332128454241116, "learning_rate": 5.510390789004105e-07, "loss": 0.3211, "step": 18863 }, { "epoch": 0.8537678207739308, "grad_norm": 0.5827273766818591, "learning_rate": 5.507046465111598e-07, "loss": 0.3144, "step": 18864 }, { "epoch": 0.8538130798823264, "grad_norm": 0.9306378137704379, "learning_rate": 5.503703097236002e-07, "loss": 0.2749, "step": 18865 }, { "epoch": 0.8538583389907218, "grad_norm": 0.6298125599693024, "learning_rate": 5.500360685449163e-07, "loss": 0.2842, "step": 18866 }, { "epoch": 0.8539035980991174, "grad_norm": 0.5290613603715577, "learning_rate": 5.497019229822914e-07, "loss": 0.2998, "step": 18867 }, { "epoch": 0.853948857207513, "grad_norm": 0.6734853602452973, "learning_rate": 5.493678730429041e-07, "loss": 0.3622, "step": 18868 }, { "epoch": 0.8539941163159086, "grad_norm": 0.6189222555767221, "learning_rate": 5.490339187339317e-07, "loss": 0.323, "step": 18869 }, { "epoch": 0.8540393754243042, "grad_norm": 0.6925699517107268, "learning_rate": 5.487000600625509e-07, "loss": 0.2794, "step": 18870 }, { "epoch": 0.8540846345326997, "grad_norm": 0.5761053277661985, "learning_rate": 5.483662970359344e-07, "loss": 0.3002, "step": 18871 }, { "epoch": 0.8541298936410953, "grad_norm": 0.6464114513484791, "learning_rate": 5.480326296612532e-07, "loss": 0.3006, "step": 18872 }, { "epoch": 0.8541751527494909, "grad_norm": 0.6209160449026865, "learning_rate": 5.476990579456776e-07, "loss": 0.2717, "step": 18873 }, { "epoch": 0.8542204118578864, "grad_norm": 0.6411823424903216, "learning_rate": 5.473655818963758e-07, "loss": 0.2691, "step": 18874 }, { "epoch": 0.8542656709662819, "grad_norm": 0.6069517485269621, "learning_rate": 5.470322015205132e-07, "loss": 0.2636, "step": 18875 }, { "epoch": 0.8543109300746775, "grad_norm": 0.6509685693287112, "learning_rate": 5.466989168252506e-07, "loss": 0.2925, "step": 18876 }, { "epoch": 0.8543561891830731, "grad_norm": 0.5782497626693441, "learning_rate": 5.463657278177526e-07, "loss": 0.3091, "step": 18877 }, { "epoch": 0.8544014482914687, "grad_norm": 0.6516349786922266, "learning_rate": 5.460326345051753e-07, "loss": 0.3234, "step": 18878 }, { "epoch": 0.8544467073998642, "grad_norm": 0.5935709697545233, "learning_rate": 5.456996368946782e-07, "loss": 0.2522, "step": 18879 }, { "epoch": 0.8544919665082598, "grad_norm": 0.275509383061996, "learning_rate": 5.45366734993416e-07, "loss": 0.4659, "step": 18880 }, { "epoch": 0.8545372256166553, "grad_norm": 0.5750454126126509, "learning_rate": 5.450339288085404e-07, "loss": 0.2656, "step": 18881 }, { "epoch": 0.8545824847250509, "grad_norm": 0.6455719189966478, "learning_rate": 5.447012183472027e-07, "loss": 0.29, "step": 18882 }, { "epoch": 0.8546277438334465, "grad_norm": 0.6337452815116204, "learning_rate": 5.443686036165541e-07, "loss": 0.2941, "step": 18883 }, { "epoch": 0.854673002941842, "grad_norm": 0.6011439559025845, "learning_rate": 5.440360846237397e-07, "loss": 0.3315, "step": 18884 }, { "epoch": 0.8547182620502376, "grad_norm": 0.6416022117789674, "learning_rate": 5.437036613759028e-07, "loss": 0.2904, "step": 18885 }, { "epoch": 0.8547635211586332, "grad_norm": 0.5634699989693578, "learning_rate": 5.433713338801883e-07, "loss": 0.2722, "step": 18886 }, { "epoch": 0.8548087802670288, "grad_norm": 0.614677168689508, "learning_rate": 5.43039102143737e-07, "loss": 0.3061, "step": 18887 }, { "epoch": 0.8548540393754243, "grad_norm": 0.2506225998611218, "learning_rate": 5.427069661736873e-07, "loss": 0.4437, "step": 18888 }, { "epoch": 0.8548992984838198, "grad_norm": 0.5797397847475295, "learning_rate": 5.423749259771738e-07, "loss": 0.3086, "step": 18889 }, { "epoch": 0.8549445575922154, "grad_norm": 0.5874281031572004, "learning_rate": 5.420429815613343e-07, "loss": 0.2535, "step": 18890 }, { "epoch": 0.854989816700611, "grad_norm": 0.6109100658942526, "learning_rate": 5.41711132933298e-07, "loss": 0.3137, "step": 18891 }, { "epoch": 0.8550350758090066, "grad_norm": 0.2686910506299942, "learning_rate": 5.413793801001981e-07, "loss": 0.4826, "step": 18892 }, { "epoch": 0.8550803349174021, "grad_norm": 0.6193657188011698, "learning_rate": 5.410477230691618e-07, "loss": 0.291, "step": 18893 }, { "epoch": 0.8551255940257977, "grad_norm": 0.2658226084268709, "learning_rate": 5.407161618473139e-07, "loss": 0.4563, "step": 18894 }, { "epoch": 0.8551708531341933, "grad_norm": 0.2628564515559104, "learning_rate": 5.403846964417803e-07, "loss": 0.4571, "step": 18895 }, { "epoch": 0.8552161122425889, "grad_norm": 0.7782007656221628, "learning_rate": 5.400533268596841e-07, "loss": 0.3062, "step": 18896 }, { "epoch": 0.8552613713509843, "grad_norm": 0.6839910168496154, "learning_rate": 5.397220531081437e-07, "loss": 0.3558, "step": 18897 }, { "epoch": 0.8553066304593799, "grad_norm": 0.6251850507046652, "learning_rate": 5.393908751942773e-07, "loss": 0.2878, "step": 18898 }, { "epoch": 0.8553518895677755, "grad_norm": 0.7957418093710049, "learning_rate": 5.390597931252017e-07, "loss": 0.3013, "step": 18899 }, { "epoch": 0.8553971486761711, "grad_norm": 0.638593999873009, "learning_rate": 5.387288069080298e-07, "loss": 0.2899, "step": 18900 }, { "epoch": 0.8554424077845666, "grad_norm": 0.6447274806725127, "learning_rate": 5.383979165498748e-07, "loss": 0.2584, "step": 18901 }, { "epoch": 0.8554876668929622, "grad_norm": 0.6147292165958476, "learning_rate": 5.380671220578454e-07, "loss": 0.2995, "step": 18902 }, { "epoch": 0.8555329260013578, "grad_norm": 0.5864371107534302, "learning_rate": 5.377364234390503e-07, "loss": 0.314, "step": 18903 }, { "epoch": 0.8555781851097534, "grad_norm": 0.7160624991087194, "learning_rate": 5.374058207005945e-07, "loss": 0.2921, "step": 18904 }, { "epoch": 0.855623444218149, "grad_norm": 0.5965671451551371, "learning_rate": 5.37075313849581e-07, "loss": 0.2886, "step": 18905 }, { "epoch": 0.8556687033265444, "grad_norm": 0.25418867193021494, "learning_rate": 5.367449028931133e-07, "loss": 0.4619, "step": 18906 }, { "epoch": 0.85571396243494, "grad_norm": 0.5835242042302965, "learning_rate": 5.364145878382887e-07, "loss": 0.3046, "step": 18907 }, { "epoch": 0.8557592215433356, "grad_norm": 0.6649529932271935, "learning_rate": 5.360843686922068e-07, "loss": 0.2951, "step": 18908 }, { "epoch": 0.8558044806517312, "grad_norm": 0.6184811126092957, "learning_rate": 5.357542454619619e-07, "loss": 0.3277, "step": 18909 }, { "epoch": 0.8558497397601267, "grad_norm": 0.5683251110712597, "learning_rate": 5.354242181546465e-07, "loss": 0.2856, "step": 18910 }, { "epoch": 0.8558949988685223, "grad_norm": 0.658689800101766, "learning_rate": 5.350942867773523e-07, "loss": 0.276, "step": 18911 }, { "epoch": 0.8559402579769179, "grad_norm": 0.700714967968001, "learning_rate": 5.347644513371702e-07, "loss": 0.3159, "step": 18912 }, { "epoch": 0.8559855170853135, "grad_norm": 0.2668507086692464, "learning_rate": 5.344347118411863e-07, "loss": 0.4583, "step": 18913 }, { "epoch": 0.8560307761937089, "grad_norm": 0.6919274318584268, "learning_rate": 5.341050682964844e-07, "loss": 0.2797, "step": 18914 }, { "epoch": 0.8560760353021045, "grad_norm": 0.6035576730213521, "learning_rate": 5.337755207101486e-07, "loss": 0.2587, "step": 18915 }, { "epoch": 0.8561212944105001, "grad_norm": 0.6375513840137232, "learning_rate": 5.334460690892613e-07, "loss": 0.2689, "step": 18916 }, { "epoch": 0.8561665535188957, "grad_norm": 0.7166262430313622, "learning_rate": 5.331167134408994e-07, "loss": 0.3083, "step": 18917 }, { "epoch": 0.8562118126272913, "grad_norm": 0.5963358319668006, "learning_rate": 5.327874537721395e-07, "loss": 0.3031, "step": 18918 }, { "epoch": 0.8562570717356868, "grad_norm": 0.6120859632707557, "learning_rate": 5.324582900900587e-07, "loss": 0.3269, "step": 18919 }, { "epoch": 0.8563023308440824, "grad_norm": 0.6375785644361093, "learning_rate": 5.321292224017266e-07, "loss": 0.2969, "step": 18920 }, { "epoch": 0.856347589952478, "grad_norm": 0.6517198634795168, "learning_rate": 5.318002507142167e-07, "loss": 0.3002, "step": 18921 }, { "epoch": 0.8563928490608735, "grad_norm": 0.7554083589509392, "learning_rate": 5.314713750345968e-07, "loss": 0.295, "step": 18922 }, { "epoch": 0.856438108169269, "grad_norm": 0.6673993566220832, "learning_rate": 5.311425953699312e-07, "loss": 0.331, "step": 18923 }, { "epoch": 0.8564833672776646, "grad_norm": 0.278931041184877, "learning_rate": 5.30813911727287e-07, "loss": 0.452, "step": 18924 }, { "epoch": 0.8565286263860602, "grad_norm": 0.6127972655988442, "learning_rate": 5.304853241137264e-07, "loss": 0.3178, "step": 18925 }, { "epoch": 0.8565738854944558, "grad_norm": 0.61155340954273, "learning_rate": 5.301568325363088e-07, "loss": 0.2958, "step": 18926 }, { "epoch": 0.8566191446028514, "grad_norm": 0.6030392436805826, "learning_rate": 5.298284370020923e-07, "loss": 0.304, "step": 18927 }, { "epoch": 0.8566644037112469, "grad_norm": 0.2969858127149444, "learning_rate": 5.295001375181336e-07, "loss": 0.4643, "step": 18928 }, { "epoch": 0.8567096628196424, "grad_norm": 0.5792598709562329, "learning_rate": 5.291719340914875e-07, "loss": 0.3374, "step": 18929 }, { "epoch": 0.856754921928038, "grad_norm": 0.6383794283244996, "learning_rate": 5.288438267292057e-07, "loss": 0.3105, "step": 18930 }, { "epoch": 0.8568001810364336, "grad_norm": 0.6203512639753918, "learning_rate": 5.285158154383369e-07, "loss": 0.3027, "step": 18931 }, { "epoch": 0.8568454401448291, "grad_norm": 0.5828768141171531, "learning_rate": 5.28187900225931e-07, "loss": 0.2773, "step": 18932 }, { "epoch": 0.8568906992532247, "grad_norm": 0.6918531275424276, "learning_rate": 5.27860081099032e-07, "loss": 0.2567, "step": 18933 }, { "epoch": 0.8569359583616203, "grad_norm": 0.6421699593795226, "learning_rate": 5.275323580646857e-07, "loss": 0.2882, "step": 18934 }, { "epoch": 0.8569812174700159, "grad_norm": 0.68124116019829, "learning_rate": 5.272047311299333e-07, "loss": 0.2321, "step": 18935 }, { "epoch": 0.8570264765784114, "grad_norm": 1.7539131270402744, "learning_rate": 5.268772003018124e-07, "loss": 0.2825, "step": 18936 }, { "epoch": 0.857071735686807, "grad_norm": 0.6545848551286552, "learning_rate": 5.26549765587363e-07, "loss": 0.2473, "step": 18937 }, { "epoch": 0.8571169947952025, "grad_norm": 0.5745584200238323, "learning_rate": 5.262224269936217e-07, "loss": 0.2917, "step": 18938 }, { "epoch": 0.8571622539035981, "grad_norm": 0.596404529852008, "learning_rate": 5.258951845276178e-07, "loss": 0.2557, "step": 18939 }, { "epoch": 0.8572075130119937, "grad_norm": 0.6719309822426754, "learning_rate": 5.255680381963856e-07, "loss": 0.3002, "step": 18940 }, { "epoch": 0.8572527721203892, "grad_norm": 0.30837623461057606, "learning_rate": 5.252409880069553e-07, "loss": 0.4646, "step": 18941 }, { "epoch": 0.8572980312287848, "grad_norm": 0.6469260324986573, "learning_rate": 5.249140339663533e-07, "loss": 0.2939, "step": 18942 }, { "epoch": 0.8573432903371804, "grad_norm": 0.2768016336533323, "learning_rate": 5.245871760816029e-07, "loss": 0.4531, "step": 18943 }, { "epoch": 0.857388549445576, "grad_norm": 0.6037990091196405, "learning_rate": 5.24260414359729e-07, "loss": 0.3148, "step": 18944 }, { "epoch": 0.8574338085539714, "grad_norm": 0.6346860481158199, "learning_rate": 5.239337488077539e-07, "loss": 0.3031, "step": 18945 }, { "epoch": 0.857479067662367, "grad_norm": 0.6893880255876333, "learning_rate": 5.236071794326952e-07, "loss": 0.329, "step": 18946 }, { "epoch": 0.8575243267707626, "grad_norm": 0.6232018323259965, "learning_rate": 5.232807062415691e-07, "loss": 0.2441, "step": 18947 }, { "epoch": 0.8575695858791582, "grad_norm": 0.6015287791266947, "learning_rate": 5.229543292413919e-07, "loss": 0.2605, "step": 18948 }, { "epoch": 0.8576148449875537, "grad_norm": 0.6469701975664196, "learning_rate": 5.226280484391754e-07, "loss": 0.2543, "step": 18949 }, { "epoch": 0.8576601040959493, "grad_norm": 0.5637765135573233, "learning_rate": 5.22301863841932e-07, "loss": 0.2346, "step": 18950 }, { "epoch": 0.8577053632043449, "grad_norm": 0.6134828769689903, "learning_rate": 5.219757754566696e-07, "loss": 0.3188, "step": 18951 }, { "epoch": 0.8577506223127405, "grad_norm": 0.6391224852971039, "learning_rate": 5.216497832903927e-07, "loss": 0.2864, "step": 18952 }, { "epoch": 0.857795881421136, "grad_norm": 0.5689721588401953, "learning_rate": 5.213238873501086e-07, "loss": 0.3206, "step": 18953 }, { "epoch": 0.8578411405295315, "grad_norm": 0.7111119399235565, "learning_rate": 5.209980876428195e-07, "loss": 0.3183, "step": 18954 }, { "epoch": 0.8578863996379271, "grad_norm": 0.7379886429307578, "learning_rate": 5.206723841755257e-07, "loss": 0.3008, "step": 18955 }, { "epoch": 0.8579316587463227, "grad_norm": 0.6115833659110569, "learning_rate": 5.203467769552239e-07, "loss": 0.274, "step": 18956 }, { "epoch": 0.8579769178547183, "grad_norm": 0.24484072528037706, "learning_rate": 5.200212659889114e-07, "loss": 0.4851, "step": 18957 }, { "epoch": 0.8580221769631138, "grad_norm": 0.6280509556954103, "learning_rate": 5.196958512835843e-07, "loss": 0.257, "step": 18958 }, { "epoch": 0.8580674360715094, "grad_norm": 0.588999369246937, "learning_rate": 5.193705328462328e-07, "loss": 0.2908, "step": 18959 }, { "epoch": 0.858112695179905, "grad_norm": 0.6195333674917924, "learning_rate": 5.190453106838461e-07, "loss": 0.3257, "step": 18960 }, { "epoch": 0.8581579542883006, "grad_norm": 0.6086824841574335, "learning_rate": 5.187201848034146e-07, "loss": 0.318, "step": 18961 }, { "epoch": 0.858203213396696, "grad_norm": 0.58719677522352, "learning_rate": 5.183951552119227e-07, "loss": 0.2802, "step": 18962 }, { "epoch": 0.8582484725050916, "grad_norm": 0.580374303078345, "learning_rate": 5.180702219163552e-07, "loss": 0.327, "step": 18963 }, { "epoch": 0.8582937316134872, "grad_norm": 0.6288797524191543, "learning_rate": 5.177453849236935e-07, "loss": 0.3092, "step": 18964 }, { "epoch": 0.8583389907218828, "grad_norm": 0.25583946705194716, "learning_rate": 5.174206442409163e-07, "loss": 0.4658, "step": 18965 }, { "epoch": 0.8583842498302784, "grad_norm": 0.6087906512514828, "learning_rate": 5.17095999875002e-07, "loss": 0.2621, "step": 18966 }, { "epoch": 0.8584295089386739, "grad_norm": 0.5619462201669044, "learning_rate": 5.167714518329286e-07, "loss": 0.2717, "step": 18967 }, { "epoch": 0.8584747680470695, "grad_norm": 0.6351139673480072, "learning_rate": 5.16447000121666e-07, "loss": 0.3056, "step": 18968 }, { "epoch": 0.858520027155465, "grad_norm": 0.5868098674192496, "learning_rate": 5.161226447481865e-07, "loss": 0.3107, "step": 18969 }, { "epoch": 0.8585652862638606, "grad_norm": 0.2619592796412879, "learning_rate": 5.157983857194615e-07, "loss": 0.4457, "step": 18970 }, { "epoch": 0.8586105453722561, "grad_norm": 0.6057152229425675, "learning_rate": 5.154742230424575e-07, "loss": 0.2846, "step": 18971 }, { "epoch": 0.8586558044806517, "grad_norm": 0.6068278424363579, "learning_rate": 5.151501567241373e-07, "loss": 0.3123, "step": 18972 }, { "epoch": 0.8587010635890473, "grad_norm": 0.6502329942997074, "learning_rate": 5.148261867714671e-07, "loss": 0.2686, "step": 18973 }, { "epoch": 0.8587463226974429, "grad_norm": 0.6056539004633698, "learning_rate": 5.145023131914074e-07, "loss": 0.2948, "step": 18974 }, { "epoch": 0.8587915818058385, "grad_norm": 0.6625029521242054, "learning_rate": 5.141785359909168e-07, "loss": 0.3182, "step": 18975 }, { "epoch": 0.858836840914234, "grad_norm": 0.6228892249909714, "learning_rate": 5.138548551769512e-07, "loss": 0.2629, "step": 18976 }, { "epoch": 0.8588821000226295, "grad_norm": 0.5779265648025561, "learning_rate": 5.135312707564683e-07, "loss": 0.2828, "step": 18977 }, { "epoch": 0.8589273591310251, "grad_norm": 0.6126297100733569, "learning_rate": 5.132077827364174e-07, "loss": 0.3255, "step": 18978 }, { "epoch": 0.8589726182394207, "grad_norm": 0.2882798253765335, "learning_rate": 5.128843911237525e-07, "loss": 0.4581, "step": 18979 }, { "epoch": 0.8590178773478162, "grad_norm": 0.5635818665300483, "learning_rate": 5.125610959254213e-07, "loss": 0.2562, "step": 18980 }, { "epoch": 0.8590631364562118, "grad_norm": 0.6155989790532402, "learning_rate": 5.122378971483683e-07, "loss": 0.305, "step": 18981 }, { "epoch": 0.8591083955646074, "grad_norm": 0.6168694726468859, "learning_rate": 5.119147947995401e-07, "loss": 0.3282, "step": 18982 }, { "epoch": 0.859153654673003, "grad_norm": 0.7878038210236072, "learning_rate": 5.115917888858802e-07, "loss": 0.324, "step": 18983 }, { "epoch": 0.8591989137813985, "grad_norm": 0.2860775231703091, "learning_rate": 5.112688794143273e-07, "loss": 0.4733, "step": 18984 }, { "epoch": 0.859244172889794, "grad_norm": 0.584825913982036, "learning_rate": 5.109460663918192e-07, "loss": 0.3034, "step": 18985 }, { "epoch": 0.8592894319981896, "grad_norm": 0.634246711145265, "learning_rate": 5.106233498252927e-07, "loss": 0.3081, "step": 18986 }, { "epoch": 0.8593346911065852, "grad_norm": 0.2571638233495209, "learning_rate": 5.103007297216838e-07, "loss": 0.4738, "step": 18987 }, { "epoch": 0.8593799502149808, "grad_norm": 0.636572794194953, "learning_rate": 5.099782060879227e-07, "loss": 0.2935, "step": 18988 }, { "epoch": 0.8594252093233763, "grad_norm": 0.8216739640425371, "learning_rate": 5.096557789309392e-07, "loss": 0.3017, "step": 18989 }, { "epoch": 0.8594704684317719, "grad_norm": 0.5856909949197273, "learning_rate": 5.093334482576634e-07, "loss": 0.3003, "step": 18990 }, { "epoch": 0.8595157275401675, "grad_norm": 0.6767030167755445, "learning_rate": 5.09011214075018e-07, "loss": 0.3364, "step": 18991 }, { "epoch": 0.8595609866485631, "grad_norm": 0.26674590985470387, "learning_rate": 5.086890763899299e-07, "loss": 0.4833, "step": 18992 }, { "epoch": 0.8596062457569585, "grad_norm": 0.5781600172894498, "learning_rate": 5.083670352093196e-07, "loss": 0.3348, "step": 18993 }, { "epoch": 0.8596515048653541, "grad_norm": 0.6306420137429097, "learning_rate": 5.080450905401057e-07, "loss": 0.3246, "step": 18994 }, { "epoch": 0.8596967639737497, "grad_norm": 0.6241465051839432, "learning_rate": 5.07723242389207e-07, "loss": 0.2773, "step": 18995 }, { "epoch": 0.8597420230821453, "grad_norm": 0.6184597563032113, "learning_rate": 5.074014907635405e-07, "loss": 0.3036, "step": 18996 }, { "epoch": 0.8597872821905408, "grad_norm": 0.5868468882841714, "learning_rate": 5.070798356700163e-07, "loss": 0.3068, "step": 18997 }, { "epoch": 0.8598325412989364, "grad_norm": 0.6258664955028327, "learning_rate": 5.067582771155472e-07, "loss": 0.2949, "step": 18998 }, { "epoch": 0.859877800407332, "grad_norm": 0.578382879309725, "learning_rate": 5.064368151070431e-07, "loss": 0.2924, "step": 18999 }, { "epoch": 0.8599230595157276, "grad_norm": 0.2750712488483772, "learning_rate": 5.061154496514125e-07, "loss": 0.4532, "step": 19000 }, { "epoch": 0.8599683186241232, "grad_norm": 0.6657087671047848, "learning_rate": 5.057941807555571e-07, "loss": 0.2935, "step": 19001 }, { "epoch": 0.8600135777325186, "grad_norm": 0.6026886189492119, "learning_rate": 5.05473008426382e-07, "loss": 0.2981, "step": 19002 }, { "epoch": 0.8600588368409142, "grad_norm": 0.6383401618594063, "learning_rate": 5.051519326707893e-07, "loss": 0.3204, "step": 19003 }, { "epoch": 0.8601040959493098, "grad_norm": 0.6416246251796458, "learning_rate": 5.048309534956763e-07, "loss": 0.3278, "step": 19004 }, { "epoch": 0.8601493550577054, "grad_norm": 0.6460942069471778, "learning_rate": 5.045100709079393e-07, "loss": 0.2715, "step": 19005 }, { "epoch": 0.8601946141661009, "grad_norm": 0.40949862405358045, "learning_rate": 5.041892849144753e-07, "loss": 0.4784, "step": 19006 }, { "epoch": 0.8602398732744965, "grad_norm": 0.5929098033750538, "learning_rate": 5.038685955221745e-07, "loss": 0.3326, "step": 19007 }, { "epoch": 0.8602851323828921, "grad_norm": 0.61831375951075, "learning_rate": 5.035480027379297e-07, "loss": 0.3042, "step": 19008 }, { "epoch": 0.8603303914912876, "grad_norm": 0.6487690460983344, "learning_rate": 5.032275065686287e-07, "loss": 0.2795, "step": 19009 }, { "epoch": 0.8603756505996832, "grad_norm": 0.6187830430063399, "learning_rate": 5.029071070211566e-07, "loss": 0.3131, "step": 19010 }, { "epoch": 0.8604209097080787, "grad_norm": 0.5865905552575247, "learning_rate": 5.025868041023996e-07, "loss": 0.3177, "step": 19011 }, { "epoch": 0.8604661688164743, "grad_norm": 0.6062723886429946, "learning_rate": 5.022665978192398e-07, "loss": 0.2824, "step": 19012 }, { "epoch": 0.8605114279248699, "grad_norm": 1.024759836194424, "learning_rate": 5.019464881785569e-07, "loss": 0.2734, "step": 19013 }, { "epoch": 0.8605566870332655, "grad_norm": 0.599430239637231, "learning_rate": 5.016264751872291e-07, "loss": 0.2917, "step": 19014 }, { "epoch": 0.860601946141661, "grad_norm": 0.5957761500719001, "learning_rate": 5.013065588521321e-07, "loss": 0.3019, "step": 19015 }, { "epoch": 0.8606472052500566, "grad_norm": 0.6263540868459652, "learning_rate": 5.009867391801415e-07, "loss": 0.2628, "step": 19016 }, { "epoch": 0.8606924643584521, "grad_norm": 0.6826419226058544, "learning_rate": 5.00667016178128e-07, "loss": 0.2809, "step": 19017 }, { "epoch": 0.8607377234668477, "grad_norm": 0.2711226807270318, "learning_rate": 5.00347389852961e-07, "loss": 0.4614, "step": 19018 }, { "epoch": 0.8607829825752432, "grad_norm": 0.5811627750482236, "learning_rate": 5.0002786021151e-07, "loss": 0.3382, "step": 19019 }, { "epoch": 0.8608282416836388, "grad_norm": 0.674512844703926, "learning_rate": 4.997084272606384e-07, "loss": 0.3412, "step": 19020 }, { "epoch": 0.8608735007920344, "grad_norm": 0.6337745654730212, "learning_rate": 4.993890910072124e-07, "loss": 0.2769, "step": 19021 }, { "epoch": 0.86091875990043, "grad_norm": 0.5466312533797884, "learning_rate": 4.990698514580922e-07, "loss": 0.295, "step": 19022 }, { "epoch": 0.8609640190088256, "grad_norm": 0.6220405566632338, "learning_rate": 4.987507086201359e-07, "loss": 0.2996, "step": 19023 }, { "epoch": 0.861009278117221, "grad_norm": 0.5593024505119963, "learning_rate": 4.984316625002029e-07, "loss": 0.2729, "step": 19024 }, { "epoch": 0.8610545372256166, "grad_norm": 0.597273768740094, "learning_rate": 4.981127131051494e-07, "loss": 0.2785, "step": 19025 }, { "epoch": 0.8610997963340122, "grad_norm": 0.581083934494568, "learning_rate": 4.977938604418259e-07, "loss": 0.2976, "step": 19026 }, { "epoch": 0.8611450554424078, "grad_norm": 0.8116799266557139, "learning_rate": 4.974751045170845e-07, "loss": 0.306, "step": 19027 }, { "epoch": 0.8611903145508033, "grad_norm": 0.5998911145327874, "learning_rate": 4.971564453377748e-07, "loss": 0.2869, "step": 19028 }, { "epoch": 0.8612355736591989, "grad_norm": 0.6301029855331602, "learning_rate": 4.968378829107451e-07, "loss": 0.2929, "step": 19029 }, { "epoch": 0.8612808327675945, "grad_norm": 0.6169160539266364, "learning_rate": 4.965194172428378e-07, "loss": 0.3149, "step": 19030 }, { "epoch": 0.8613260918759901, "grad_norm": 0.6082576689734003, "learning_rate": 4.962010483408964e-07, "loss": 0.2762, "step": 19031 }, { "epoch": 0.8613713509843856, "grad_norm": 0.658833602087984, "learning_rate": 4.95882776211763e-07, "loss": 0.2853, "step": 19032 }, { "epoch": 0.8614166100927811, "grad_norm": 0.2621143460969143, "learning_rate": 4.955646008622755e-07, "loss": 0.4381, "step": 19033 }, { "epoch": 0.8614618692011767, "grad_norm": 0.7241578876038501, "learning_rate": 4.952465222992692e-07, "loss": 0.3295, "step": 19034 }, { "epoch": 0.8615071283095723, "grad_norm": 0.5715552371285294, "learning_rate": 4.949285405295812e-07, "loss": 0.2792, "step": 19035 }, { "epoch": 0.8615523874179679, "grad_norm": 0.627102608544472, "learning_rate": 4.94610655560041e-07, "loss": 0.2917, "step": 19036 }, { "epoch": 0.8615976465263634, "grad_norm": 0.5924211155800102, "learning_rate": 4.942928673974823e-07, "loss": 0.2747, "step": 19037 }, { "epoch": 0.861642905634759, "grad_norm": 0.6872239178605828, "learning_rate": 4.93975176048731e-07, "loss": 0.3035, "step": 19038 }, { "epoch": 0.8616881647431546, "grad_norm": 0.6337648406533888, "learning_rate": 4.936575815206134e-07, "loss": 0.2408, "step": 19039 }, { "epoch": 0.8617334238515502, "grad_norm": 0.600265567939054, "learning_rate": 4.933400838199543e-07, "loss": 0.3157, "step": 19040 }, { "epoch": 0.8617786829599456, "grad_norm": 0.5771906865411033, "learning_rate": 4.930226829535767e-07, "loss": 0.2966, "step": 19041 }, { "epoch": 0.8618239420683412, "grad_norm": 0.6112288585274278, "learning_rate": 4.927053789282988e-07, "loss": 0.3165, "step": 19042 }, { "epoch": 0.8618692011767368, "grad_norm": 0.5944446545435385, "learning_rate": 4.923881717509388e-07, "loss": 0.2963, "step": 19043 }, { "epoch": 0.8619144602851324, "grad_norm": 0.6289189375880971, "learning_rate": 4.920710614283131e-07, "loss": 0.2409, "step": 19044 }, { "epoch": 0.861959719393528, "grad_norm": 0.599893130066422, "learning_rate": 4.917540479672356e-07, "loss": 0.2782, "step": 19045 }, { "epoch": 0.8620049785019235, "grad_norm": 0.5993331924655295, "learning_rate": 4.914371313745181e-07, "loss": 0.2983, "step": 19046 }, { "epoch": 0.8620502376103191, "grad_norm": 0.6404876556911271, "learning_rate": 4.911203116569685e-07, "loss": 0.315, "step": 19047 }, { "epoch": 0.8620954967187147, "grad_norm": 0.5569564271980745, "learning_rate": 4.908035888213964e-07, "loss": 0.3065, "step": 19048 }, { "epoch": 0.8621407558271103, "grad_norm": 0.6423297856896808, "learning_rate": 4.904869628746051e-07, "loss": 0.3015, "step": 19049 }, { "epoch": 0.8621860149355057, "grad_norm": 0.6286364099047824, "learning_rate": 4.901704338234004e-07, "loss": 0.2633, "step": 19050 }, { "epoch": 0.8622312740439013, "grad_norm": 0.5863196918495058, "learning_rate": 4.898540016745818e-07, "loss": 0.2815, "step": 19051 }, { "epoch": 0.8622765331522969, "grad_norm": 0.5968521463422396, "learning_rate": 4.895376664349482e-07, "loss": 0.3161, "step": 19052 }, { "epoch": 0.8623217922606925, "grad_norm": 0.6459674874191642, "learning_rate": 4.892214281112973e-07, "loss": 0.346, "step": 19053 }, { "epoch": 0.862367051369088, "grad_norm": 0.6018249731642297, "learning_rate": 4.88905286710426e-07, "loss": 0.2461, "step": 19054 }, { "epoch": 0.8624123104774836, "grad_norm": 0.646433803061555, "learning_rate": 4.88589242239123e-07, "loss": 0.2744, "step": 19055 }, { "epoch": 0.8624575695858792, "grad_norm": 0.5662144953473035, "learning_rate": 4.882732947041818e-07, "loss": 0.2501, "step": 19056 }, { "epoch": 0.8625028286942747, "grad_norm": 0.620217354053335, "learning_rate": 4.879574441123907e-07, "loss": 0.2622, "step": 19057 }, { "epoch": 0.8625480878026703, "grad_norm": 0.5854897297948762, "learning_rate": 4.876416904705384e-07, "loss": 0.2975, "step": 19058 }, { "epoch": 0.8625933469110658, "grad_norm": 0.26857698043195, "learning_rate": 4.873260337854058e-07, "loss": 0.4751, "step": 19059 }, { "epoch": 0.8626386060194614, "grad_norm": 0.6472511347380544, "learning_rate": 4.870104740637771e-07, "loss": 0.2864, "step": 19060 }, { "epoch": 0.862683865127857, "grad_norm": 0.7115035145085302, "learning_rate": 4.866950113124335e-07, "loss": 0.2362, "step": 19061 }, { "epoch": 0.8627291242362526, "grad_norm": 0.5470744766222366, "learning_rate": 4.863796455381525e-07, "loss": 0.2481, "step": 19062 }, { "epoch": 0.8627743833446481, "grad_norm": 0.5624476468618756, "learning_rate": 4.860643767477097e-07, "loss": 0.2607, "step": 19063 }, { "epoch": 0.8628196424530437, "grad_norm": 0.6459982206651735, "learning_rate": 4.857492049478807e-07, "loss": 0.2816, "step": 19064 }, { "epoch": 0.8628649015614392, "grad_norm": 0.7126904269804301, "learning_rate": 4.854341301454357e-07, "loss": 0.2854, "step": 19065 }, { "epoch": 0.8629101606698348, "grad_norm": 0.6386738086723427, "learning_rate": 4.851191523471465e-07, "loss": 0.2792, "step": 19066 }, { "epoch": 0.8629554197782303, "grad_norm": 0.26909620891534086, "learning_rate": 4.848042715597811e-07, "loss": 0.44, "step": 19067 }, { "epoch": 0.8630006788866259, "grad_norm": 0.6898405599890808, "learning_rate": 4.84489487790103e-07, "loss": 0.2905, "step": 19068 }, { "epoch": 0.8630459379950215, "grad_norm": 0.6080263444100681, "learning_rate": 4.841748010448777e-07, "loss": 0.2512, "step": 19069 }, { "epoch": 0.8630911971034171, "grad_norm": 0.6142457407787201, "learning_rate": 4.838602113308677e-07, "loss": 0.2908, "step": 19070 }, { "epoch": 0.8631364562118127, "grad_norm": 0.6201942893627198, "learning_rate": 4.835457186548315e-07, "loss": 0.2512, "step": 19071 }, { "epoch": 0.8631817153202082, "grad_norm": 0.6845755794045176, "learning_rate": 4.832313230235253e-07, "loss": 0.2799, "step": 19072 }, { "epoch": 0.8632269744286037, "grad_norm": 1.158425573208752, "learning_rate": 4.829170244437064e-07, "loss": 0.3014, "step": 19073 }, { "epoch": 0.8632722335369993, "grad_norm": 0.6331246849845407, "learning_rate": 4.82602822922128e-07, "loss": 0.2885, "step": 19074 }, { "epoch": 0.8633174926453949, "grad_norm": 0.5773053131214465, "learning_rate": 4.822887184655406e-07, "loss": 0.2824, "step": 19075 }, { "epoch": 0.8633627517537904, "grad_norm": 0.6958738240209923, "learning_rate": 4.819747110806928e-07, "loss": 0.3117, "step": 19076 }, { "epoch": 0.863408010862186, "grad_norm": 0.6302093385948211, "learning_rate": 4.816608007743335e-07, "loss": 0.3118, "step": 19077 }, { "epoch": 0.8634532699705816, "grad_norm": 0.26448191342255795, "learning_rate": 4.813469875532056e-07, "loss": 0.479, "step": 19078 }, { "epoch": 0.8634985290789772, "grad_norm": 0.6490925546607369, "learning_rate": 4.810332714240534e-07, "loss": 0.311, "step": 19079 }, { "epoch": 0.8635437881873728, "grad_norm": 0.6298511384731518, "learning_rate": 4.80719652393618e-07, "loss": 0.3362, "step": 19080 }, { "epoch": 0.8635890472957682, "grad_norm": 0.6095024090671313, "learning_rate": 4.804061304686358e-07, "loss": 0.2934, "step": 19081 }, { "epoch": 0.8636343064041638, "grad_norm": 0.5775729096049567, "learning_rate": 4.800927056558452e-07, "loss": 0.3192, "step": 19082 }, { "epoch": 0.8636795655125594, "grad_norm": 0.6587368973474238, "learning_rate": 4.79779377961982e-07, "loss": 0.3155, "step": 19083 }, { "epoch": 0.863724824620955, "grad_norm": 0.6612433715020307, "learning_rate": 4.794661473937761e-07, "loss": 0.3334, "step": 19084 }, { "epoch": 0.8637700837293505, "grad_norm": 0.41205551413684943, "learning_rate": 4.791530139579586e-07, "loss": 0.4772, "step": 19085 }, { "epoch": 0.8638153428377461, "grad_norm": 0.6138513167648837, "learning_rate": 4.788399776612584e-07, "loss": 0.2692, "step": 19086 }, { "epoch": 0.8638606019461417, "grad_norm": 0.6230080093900928, "learning_rate": 4.785270385104018e-07, "loss": 0.2664, "step": 19087 }, { "epoch": 0.8639058610545373, "grad_norm": 0.6425162193038234, "learning_rate": 4.782141965121129e-07, "loss": 0.3199, "step": 19088 }, { "epoch": 0.8639511201629327, "grad_norm": 0.28171184535994015, "learning_rate": 4.779014516731123e-07, "loss": 0.4722, "step": 19089 }, { "epoch": 0.8639963792713283, "grad_norm": 0.5849269358010359, "learning_rate": 4.775888040001214e-07, "loss": 0.2832, "step": 19090 }, { "epoch": 0.8640416383797239, "grad_norm": 0.6586879746802395, "learning_rate": 4.772762534998582e-07, "loss": 0.2955, "step": 19091 }, { "epoch": 0.8640868974881195, "grad_norm": 0.642220205319096, "learning_rate": 4.769638001790366e-07, "loss": 0.2682, "step": 19092 }, { "epoch": 0.8641321565965151, "grad_norm": 0.6185275549850182, "learning_rate": 4.766514440443726e-07, "loss": 0.2928, "step": 19093 }, { "epoch": 0.8641774157049106, "grad_norm": 0.6533054865884008, "learning_rate": 4.763391851025756e-07, "loss": 0.2647, "step": 19094 }, { "epoch": 0.8642226748133062, "grad_norm": 0.7854922368088226, "learning_rate": 4.76027023360357e-07, "loss": 0.304, "step": 19095 }, { "epoch": 0.8642679339217018, "grad_norm": 0.643605119309677, "learning_rate": 4.7571495882442363e-07, "loss": 0.2787, "step": 19096 }, { "epoch": 0.8643131930300973, "grad_norm": 0.2500595226999286, "learning_rate": 4.7540299150147906e-07, "loss": 0.4567, "step": 19097 }, { "epoch": 0.8643584521384928, "grad_norm": 0.654488899090377, "learning_rate": 4.7509112139822846e-07, "loss": 0.2883, "step": 19098 }, { "epoch": 0.8644037112468884, "grad_norm": 0.6683060747430509, "learning_rate": 4.7477934852137306e-07, "loss": 0.3268, "step": 19099 }, { "epoch": 0.864448970355284, "grad_norm": 0.3008385202888705, "learning_rate": 4.7446767287761154e-07, "loss": 0.4767, "step": 19100 }, { "epoch": 0.8644942294636796, "grad_norm": 0.6540711942901398, "learning_rate": 4.741560944736395e-07, "loss": 0.3294, "step": 19101 }, { "epoch": 0.8645394885720751, "grad_norm": 0.6253051333606227, "learning_rate": 4.7384461331615284e-07, "loss": 0.2962, "step": 19102 }, { "epoch": 0.8645847476804707, "grad_norm": 0.6168597078443402, "learning_rate": 4.735332294118455e-07, "loss": 0.2801, "step": 19103 }, { "epoch": 0.8646300067888663, "grad_norm": 0.6142779127798528, "learning_rate": 4.732219427674073e-07, "loss": 0.3245, "step": 19104 }, { "epoch": 0.8646752658972618, "grad_norm": 0.6313286970148196, "learning_rate": 4.729107533895255e-07, "loss": 0.298, "step": 19105 }, { "epoch": 0.8647205250056574, "grad_norm": 0.5928257456260824, "learning_rate": 4.7259966128488876e-07, "loss": 0.3042, "step": 19106 }, { "epoch": 0.8647657841140529, "grad_norm": 0.554898880701603, "learning_rate": 4.722886664601795e-07, "loss": 0.254, "step": 19107 }, { "epoch": 0.8648110432224485, "grad_norm": 0.6208995354350286, "learning_rate": 4.719777689220817e-07, "loss": 0.3293, "step": 19108 }, { "epoch": 0.8648563023308441, "grad_norm": 0.6028647084343479, "learning_rate": 4.716669686772751e-07, "loss": 0.2971, "step": 19109 }, { "epoch": 0.8649015614392397, "grad_norm": 0.5898969298823253, "learning_rate": 4.7135626573243607e-07, "loss": 0.2651, "step": 19110 }, { "epoch": 0.8649468205476352, "grad_norm": 0.6482671602848603, "learning_rate": 4.710456600942431e-07, "loss": 0.303, "step": 19111 }, { "epoch": 0.8649920796560308, "grad_norm": 0.6068135500269236, "learning_rate": 4.707351517693698e-07, "loss": 0.2706, "step": 19112 }, { "epoch": 0.8650373387644263, "grad_norm": 0.6790603909443061, "learning_rate": 4.704247407644874e-07, "loss": 0.2919, "step": 19113 }, { "epoch": 0.8650825978728219, "grad_norm": 0.31337349374409723, "learning_rate": 4.701144270862651e-07, "loss": 0.4753, "step": 19114 }, { "epoch": 0.8651278569812175, "grad_norm": 0.285205795606402, "learning_rate": 4.6980421074137137e-07, "loss": 0.4887, "step": 19115 }, { "epoch": 0.865173116089613, "grad_norm": 0.6040188107489841, "learning_rate": 4.6949409173647267e-07, "loss": 0.2864, "step": 19116 }, { "epoch": 0.8652183751980086, "grad_norm": 0.28986218884428416, "learning_rate": 4.691840700782313e-07, "loss": 0.4959, "step": 19117 }, { "epoch": 0.8652636343064042, "grad_norm": 0.6388514118297662, "learning_rate": 4.6887414577330814e-07, "loss": 0.3104, "step": 19118 }, { "epoch": 0.8653088934147998, "grad_norm": 0.620333813228641, "learning_rate": 4.6856431882836397e-07, "loss": 0.2906, "step": 19119 }, { "epoch": 0.8653541525231953, "grad_norm": 0.604231251337452, "learning_rate": 4.682545892500545e-07, "loss": 0.2928, "step": 19120 }, { "epoch": 0.8653994116315908, "grad_norm": 0.6711168213936696, "learning_rate": 4.679449570450367e-07, "loss": 0.3025, "step": 19121 }, { "epoch": 0.8654446707399864, "grad_norm": 0.5841088916852475, "learning_rate": 4.676354222199625e-07, "loss": 0.3123, "step": 19122 }, { "epoch": 0.865489929848382, "grad_norm": 0.6040892371615114, "learning_rate": 4.6732598478148264e-07, "loss": 0.29, "step": 19123 }, { "epoch": 0.8655351889567775, "grad_norm": 0.26525114918965464, "learning_rate": 4.6701664473624677e-07, "loss": 0.454, "step": 19124 }, { "epoch": 0.8655804480651731, "grad_norm": 0.6638484740908424, "learning_rate": 4.667074020909013e-07, "loss": 0.2827, "step": 19125 }, { "epoch": 0.8656257071735687, "grad_norm": 0.6088432095068631, "learning_rate": 4.663982568520897e-07, "loss": 0.2832, "step": 19126 }, { "epoch": 0.8656709662819643, "grad_norm": 0.606926588924379, "learning_rate": 4.660892090264557e-07, "loss": 0.3158, "step": 19127 }, { "epoch": 0.8657162253903599, "grad_norm": 0.5796853979962601, "learning_rate": 4.657802586206411e-07, "loss": 0.2902, "step": 19128 }, { "epoch": 0.8657614844987553, "grad_norm": 0.6563871862561939, "learning_rate": 4.6547140564128236e-07, "loss": 0.2957, "step": 19129 }, { "epoch": 0.8658067436071509, "grad_norm": 0.6368524606296716, "learning_rate": 4.651626500950157e-07, "loss": 0.2634, "step": 19130 }, { "epoch": 0.8658520027155465, "grad_norm": 0.6389499416717722, "learning_rate": 4.648539919884759e-07, "loss": 0.3179, "step": 19131 }, { "epoch": 0.8658972618239421, "grad_norm": 0.5272471866796019, "learning_rate": 4.6454543132829653e-07, "loss": 0.2628, "step": 19132 }, { "epoch": 0.8659425209323376, "grad_norm": 0.6371155766939642, "learning_rate": 4.6423696812110564e-07, "loss": 0.2985, "step": 19133 }, { "epoch": 0.8659877800407332, "grad_norm": 0.5966897807770152, "learning_rate": 4.639286023735312e-07, "loss": 0.2576, "step": 19134 }, { "epoch": 0.8660330391491288, "grad_norm": 0.6236122764447524, "learning_rate": 4.6362033409220077e-07, "loss": 0.2903, "step": 19135 }, { "epoch": 0.8660782982575244, "grad_norm": 0.5832164595872447, "learning_rate": 4.6331216328373565e-07, "loss": 0.3142, "step": 19136 }, { "epoch": 0.8661235573659198, "grad_norm": 0.6123744465516868, "learning_rate": 4.6300408995476e-07, "loss": 0.3063, "step": 19137 }, { "epoch": 0.8661688164743154, "grad_norm": 0.5975181078415203, "learning_rate": 4.6269611411189185e-07, "loss": 0.3219, "step": 19138 }, { "epoch": 0.866214075582711, "grad_norm": 0.5996937805734076, "learning_rate": 4.6238823576174817e-07, "loss": 0.3384, "step": 19139 }, { "epoch": 0.8662593346911066, "grad_norm": 0.6347639658352031, "learning_rate": 4.620804549109448e-07, "loss": 0.2838, "step": 19140 }, { "epoch": 0.8663045937995022, "grad_norm": 0.2863017410983737, "learning_rate": 4.6177277156609634e-07, "loss": 0.475, "step": 19141 }, { "epoch": 0.8663498529078977, "grad_norm": 0.5374663072332243, "learning_rate": 4.6146518573381314e-07, "loss": 0.2555, "step": 19142 }, { "epoch": 0.8663951120162933, "grad_norm": 0.6177097756381084, "learning_rate": 4.6115769742070326e-07, "loss": 0.2865, "step": 19143 }, { "epoch": 0.8664403711246889, "grad_norm": 0.5915875014756634, "learning_rate": 4.608503066333742e-07, "loss": 0.3041, "step": 19144 }, { "epoch": 0.8664856302330844, "grad_norm": 0.2633069387160369, "learning_rate": 4.6054301337843165e-07, "loss": 0.4299, "step": 19145 }, { "epoch": 0.8665308893414799, "grad_norm": 0.2848193531287552, "learning_rate": 4.6023581766247825e-07, "loss": 0.4688, "step": 19146 }, { "epoch": 0.8665761484498755, "grad_norm": 0.5622954185203691, "learning_rate": 4.5992871949211373e-07, "loss": 0.2719, "step": 19147 }, { "epoch": 0.8666214075582711, "grad_norm": 0.2668497744033602, "learning_rate": 4.596217188739377e-07, "loss": 0.4457, "step": 19148 }, { "epoch": 0.8666666666666667, "grad_norm": 0.6091572459376194, "learning_rate": 4.593148158145455e-07, "loss": 0.2476, "step": 19149 }, { "epoch": 0.8667119257750623, "grad_norm": 0.5863981063835381, "learning_rate": 4.59008010320533e-07, "loss": 0.2957, "step": 19150 }, { "epoch": 0.8667571848834578, "grad_norm": 0.7047953065256126, "learning_rate": 4.587013023984921e-07, "loss": 0.2619, "step": 19151 }, { "epoch": 0.8668024439918534, "grad_norm": 0.7000958588469114, "learning_rate": 4.583946920550114e-07, "loss": 0.2535, "step": 19152 }, { "epoch": 0.866847703100249, "grad_norm": 0.6598872071664383, "learning_rate": 4.580881792966807e-07, "loss": 0.2703, "step": 19153 }, { "epoch": 0.8668929622086445, "grad_norm": 0.602439683038341, "learning_rate": 4.577817641300869e-07, "loss": 0.2874, "step": 19154 }, { "epoch": 0.86693822131704, "grad_norm": 0.590883109590152, "learning_rate": 4.574754465618114e-07, "loss": 0.2972, "step": 19155 }, { "epoch": 0.8669834804254356, "grad_norm": 0.6449816228421348, "learning_rate": 4.571692265984368e-07, "loss": 0.3008, "step": 19156 }, { "epoch": 0.8670287395338312, "grad_norm": 0.6160323402068825, "learning_rate": 4.5686310424654325e-07, "loss": 0.28, "step": 19157 }, { "epoch": 0.8670739986422268, "grad_norm": 0.6428005330677683, "learning_rate": 4.565570795127106e-07, "loss": 0.2698, "step": 19158 }, { "epoch": 0.8671192577506223, "grad_norm": 0.5738385724075437, "learning_rate": 4.5625115240351016e-07, "loss": 0.3133, "step": 19159 }, { "epoch": 0.8671645168590179, "grad_norm": 0.8398364145820625, "learning_rate": 4.559453229255173e-07, "loss": 0.2853, "step": 19160 }, { "epoch": 0.8672097759674134, "grad_norm": 0.5485241443634394, "learning_rate": 4.5563959108530455e-07, "loss": 0.2243, "step": 19161 }, { "epoch": 0.867255035075809, "grad_norm": 0.580763951997358, "learning_rate": 4.553339568894399e-07, "loss": 0.2844, "step": 19162 }, { "epoch": 0.8673002941842046, "grad_norm": 0.6433553330581192, "learning_rate": 4.550284203444899e-07, "loss": 0.2904, "step": 19163 }, { "epoch": 0.8673455532926001, "grad_norm": 0.4912122765876196, "learning_rate": 4.5472298145702144e-07, "loss": 0.4849, "step": 19164 }, { "epoch": 0.8673908124009957, "grad_norm": 0.6330924353046441, "learning_rate": 4.5441764023359483e-07, "loss": 0.2417, "step": 19165 }, { "epoch": 0.8674360715093913, "grad_norm": 0.6120571184065314, "learning_rate": 4.5411239668077366e-07, "loss": 0.3085, "step": 19166 }, { "epoch": 0.8674813306177869, "grad_norm": 0.2666466628316768, "learning_rate": 4.5380725080511555e-07, "loss": 0.4664, "step": 19167 }, { "epoch": 0.8675265897261824, "grad_norm": 0.6969170437946436, "learning_rate": 4.5350220261317633e-07, "loss": 0.2461, "step": 19168 }, { "epoch": 0.8675718488345779, "grad_norm": 0.6219580735827156, "learning_rate": 4.5319725211151077e-07, "loss": 0.262, "step": 19169 }, { "epoch": 0.8676171079429735, "grad_norm": 0.5670702283976174, "learning_rate": 4.5289239930667304e-07, "loss": 0.2742, "step": 19170 }, { "epoch": 0.8676623670513691, "grad_norm": 0.6359844552906783, "learning_rate": 4.525876442052124e-07, "loss": 0.3061, "step": 19171 }, { "epoch": 0.8677076261597646, "grad_norm": 0.7597180178156122, "learning_rate": 4.522829868136758e-07, "loss": 0.3253, "step": 19172 }, { "epoch": 0.8677528852681602, "grad_norm": 0.5932075929375237, "learning_rate": 4.519784271386107e-07, "loss": 0.2991, "step": 19173 }, { "epoch": 0.8677981443765558, "grad_norm": 0.6196670959860574, "learning_rate": 4.516739651865615e-07, "loss": 0.2778, "step": 19174 }, { "epoch": 0.8678434034849514, "grad_norm": 0.6200868791032506, "learning_rate": 4.5136960096407e-07, "loss": 0.3191, "step": 19175 }, { "epoch": 0.867888662593347, "grad_norm": 0.6199821020576625, "learning_rate": 4.5106533447767496e-07, "loss": 0.2861, "step": 19176 }, { "epoch": 0.8679339217017424, "grad_norm": 0.6538168468033612, "learning_rate": 4.507611657339156e-07, "loss": 0.3108, "step": 19177 }, { "epoch": 0.867979180810138, "grad_norm": 0.25948403348974586, "learning_rate": 4.504570947393261e-07, "loss": 0.4587, "step": 19178 }, { "epoch": 0.8680244399185336, "grad_norm": 0.7001967108251121, "learning_rate": 4.5015312150044177e-07, "loss": 0.3174, "step": 19179 }, { "epoch": 0.8680696990269292, "grad_norm": 0.6252283624355447, "learning_rate": 4.49849246023793e-07, "loss": 0.3012, "step": 19180 }, { "epoch": 0.8681149581353247, "grad_norm": 0.2573979034500562, "learning_rate": 4.4954546831590837e-07, "loss": 0.4589, "step": 19181 }, { "epoch": 0.8681602172437203, "grad_norm": 0.5719706327436379, "learning_rate": 4.4924178838331554e-07, "loss": 0.2555, "step": 19182 }, { "epoch": 0.8682054763521159, "grad_norm": 0.6167229877308994, "learning_rate": 4.4893820623254257e-07, "loss": 0.2688, "step": 19183 }, { "epoch": 0.8682507354605115, "grad_norm": 0.6365963953210592, "learning_rate": 4.486347218701076e-07, "loss": 0.3255, "step": 19184 }, { "epoch": 0.868295994568907, "grad_norm": 0.6035459960710321, "learning_rate": 4.4833133530253425e-07, "loss": 0.2828, "step": 19185 }, { "epoch": 0.8683412536773025, "grad_norm": 0.7515385783969697, "learning_rate": 4.4802804653634124e-07, "loss": 0.2625, "step": 19186 }, { "epoch": 0.8683865127856981, "grad_norm": 0.6160544269891035, "learning_rate": 4.477248555780467e-07, "loss": 0.2819, "step": 19187 }, { "epoch": 0.8684317718940937, "grad_norm": 0.2900133552382891, "learning_rate": 4.4742176243416257e-07, "loss": 0.4588, "step": 19188 }, { "epoch": 0.8684770310024893, "grad_norm": 0.5679561213409132, "learning_rate": 4.4711876711120206e-07, "loss": 0.2858, "step": 19189 }, { "epoch": 0.8685222901108848, "grad_norm": 2.1890383143218557, "learning_rate": 4.4681586961567714e-07, "loss": 0.3471, "step": 19190 }, { "epoch": 0.8685675492192804, "grad_norm": 0.5816623983622343, "learning_rate": 4.4651306995409485e-07, "loss": 0.2829, "step": 19191 }, { "epoch": 0.868612808327676, "grad_norm": 0.5621643901170886, "learning_rate": 4.462103681329616e-07, "loss": 0.31, "step": 19192 }, { "epoch": 0.8686580674360715, "grad_norm": 0.6637047733200644, "learning_rate": 4.4590776415878166e-07, "loss": 0.3343, "step": 19193 }, { "epoch": 0.868703326544467, "grad_norm": 0.6641349132062052, "learning_rate": 4.4560525803805654e-07, "loss": 0.2687, "step": 19194 }, { "epoch": 0.8687485856528626, "grad_norm": 0.5905567406914118, "learning_rate": 4.453028497772877e-07, "loss": 0.2857, "step": 19195 }, { "epoch": 0.8687938447612582, "grad_norm": 0.6181811323044204, "learning_rate": 4.4500053938297205e-07, "loss": 0.2685, "step": 19196 }, { "epoch": 0.8688391038696538, "grad_norm": 0.5688526447255133, "learning_rate": 4.4469832686160395e-07, "loss": 0.3152, "step": 19197 }, { "epoch": 0.8688843629780494, "grad_norm": 0.5609431518771413, "learning_rate": 4.443962122196782e-07, "loss": 0.2868, "step": 19198 }, { "epoch": 0.8689296220864449, "grad_norm": 0.61520595196673, "learning_rate": 4.4409419546368735e-07, "loss": 0.2702, "step": 19199 }, { "epoch": 0.8689748811948405, "grad_norm": 0.6052659169977241, "learning_rate": 4.437922766001201e-07, "loss": 0.2714, "step": 19200 }, { "epoch": 0.869020140303236, "grad_norm": 0.27276169252499394, "learning_rate": 4.4349045563546245e-07, "loss": 0.4929, "step": 19201 }, { "epoch": 0.8690653994116316, "grad_norm": 0.6297723203334212, "learning_rate": 4.4318873257620077e-07, "loss": 0.2841, "step": 19202 }, { "epoch": 0.8691106585200271, "grad_norm": 0.5606502021223138, "learning_rate": 4.428871074288188e-07, "loss": 0.261, "step": 19203 }, { "epoch": 0.8691559176284227, "grad_norm": 0.6184436659663045, "learning_rate": 4.425855801997969e-07, "loss": 0.2845, "step": 19204 }, { "epoch": 0.8692011767368183, "grad_norm": 0.26524336851540486, "learning_rate": 4.422841508956127e-07, "loss": 0.4507, "step": 19205 }, { "epoch": 0.8692464358452139, "grad_norm": 0.6086760919671172, "learning_rate": 4.419828195227455e-07, "loss": 0.2866, "step": 19206 }, { "epoch": 0.8692916949536094, "grad_norm": 0.6494256573886917, "learning_rate": 4.416815860876672e-07, "loss": 0.2894, "step": 19207 }, { "epoch": 0.869336954062005, "grad_norm": 0.6112122012915637, "learning_rate": 4.413804505968533e-07, "loss": 0.2499, "step": 19208 }, { "epoch": 0.8693822131704005, "grad_norm": 0.6051172673894841, "learning_rate": 4.410794130567725e-07, "loss": 0.2771, "step": 19209 }, { "epoch": 0.8694274722787961, "grad_norm": 0.5811739345315933, "learning_rate": 4.4077847347389236e-07, "loss": 0.28, "step": 19210 }, { "epoch": 0.8694727313871917, "grad_norm": 0.6868280778020592, "learning_rate": 4.404776318546805e-07, "loss": 0.3089, "step": 19211 }, { "epoch": 0.8695179904955872, "grad_norm": 0.5590297594558641, "learning_rate": 4.401768882056012e-07, "loss": 0.2917, "step": 19212 }, { "epoch": 0.8695632496039828, "grad_norm": 0.6357604627136214, "learning_rate": 4.3987624253311657e-07, "loss": 0.2982, "step": 19213 }, { "epoch": 0.8696085087123784, "grad_norm": 0.25405496257449234, "learning_rate": 4.3957569484368523e-07, "loss": 0.4706, "step": 19214 }, { "epoch": 0.869653767820774, "grad_norm": 0.5730905648746117, "learning_rate": 4.3927524514376596e-07, "loss": 0.283, "step": 19215 }, { "epoch": 0.8696990269291694, "grad_norm": 0.5808504897339036, "learning_rate": 4.389748934398164e-07, "loss": 0.2872, "step": 19216 }, { "epoch": 0.869744286037565, "grad_norm": 0.5989775669737522, "learning_rate": 4.386746397382863e-07, "loss": 0.3183, "step": 19217 }, { "epoch": 0.8697895451459606, "grad_norm": 0.2539415810006778, "learning_rate": 4.3837448404562886e-07, "loss": 0.4799, "step": 19218 }, { "epoch": 0.8698348042543562, "grad_norm": 0.7084341348935672, "learning_rate": 4.3807442636829513e-07, "loss": 0.2673, "step": 19219 }, { "epoch": 0.8698800633627517, "grad_norm": 0.6404269693956344, "learning_rate": 4.3777446671273093e-07, "loss": 0.2818, "step": 19220 }, { "epoch": 0.8699253224711473, "grad_norm": 0.6818600806110079, "learning_rate": 4.3747460508538064e-07, "loss": 0.3055, "step": 19221 }, { "epoch": 0.8699705815795429, "grad_norm": 0.5967480519356161, "learning_rate": 4.371748414926896e-07, "loss": 0.2804, "step": 19222 }, { "epoch": 0.8700158406879385, "grad_norm": 0.6705196046683227, "learning_rate": 4.3687517594109664e-07, "loss": 0.3152, "step": 19223 }, { "epoch": 0.8700610997963341, "grad_norm": 0.27213909087842897, "learning_rate": 4.3657560843704207e-07, "loss": 0.4877, "step": 19224 }, { "epoch": 0.8701063589047295, "grad_norm": 0.5580890194538464, "learning_rate": 4.362761389869624e-07, "loss": 0.2902, "step": 19225 }, { "epoch": 0.8701516180131251, "grad_norm": 0.6272575113776604, "learning_rate": 4.3597676759729147e-07, "loss": 0.2687, "step": 19226 }, { "epoch": 0.8701968771215207, "grad_norm": 0.6501694422134591, "learning_rate": 4.356774942744618e-07, "loss": 0.3212, "step": 19227 }, { "epoch": 0.8702421362299163, "grad_norm": 0.6227079354249133, "learning_rate": 4.353783190249061e-07, "loss": 0.3212, "step": 19228 }, { "epoch": 0.8702873953383118, "grad_norm": 0.6774682709699127, "learning_rate": 4.350792418550509e-07, "loss": 0.3429, "step": 19229 }, { "epoch": 0.8703326544467074, "grad_norm": 0.6162439245033716, "learning_rate": 4.3478026277132157e-07, "loss": 0.3038, "step": 19230 }, { "epoch": 0.870377913555103, "grad_norm": 0.5618576367930656, "learning_rate": 4.3448138178014354e-07, "loss": 0.2752, "step": 19231 }, { "epoch": 0.8704231726634986, "grad_norm": 0.8201144878058423, "learning_rate": 4.3418259888794e-07, "loss": 0.2934, "step": 19232 }, { "epoch": 0.8704684317718941, "grad_norm": 0.3217543622772321, "learning_rate": 4.338839141011292e-07, "loss": 0.4781, "step": 19233 }, { "epoch": 0.8705136908802896, "grad_norm": 0.5878816880054325, "learning_rate": 4.3358532742612814e-07, "loss": 0.2976, "step": 19234 }, { "epoch": 0.8705589499886852, "grad_norm": 0.26947232868969295, "learning_rate": 4.3328683886935507e-07, "loss": 0.4722, "step": 19235 }, { "epoch": 0.8706042090970808, "grad_norm": 0.7166495013877396, "learning_rate": 4.329884484372215e-07, "loss": 0.3097, "step": 19236 }, { "epoch": 0.8706494682054764, "grad_norm": 0.6034099383936006, "learning_rate": 4.326901561361402e-07, "loss": 0.2757, "step": 19237 }, { "epoch": 0.8706947273138719, "grad_norm": 0.5717583707219932, "learning_rate": 4.3239196197252034e-07, "loss": 0.2653, "step": 19238 }, { "epoch": 0.8707399864222675, "grad_norm": 0.5895933010053086, "learning_rate": 4.3209386595276737e-07, "loss": 0.2792, "step": 19239 }, { "epoch": 0.870785245530663, "grad_norm": 0.6151786474253428, "learning_rate": 4.317958680832884e-07, "loss": 0.2686, "step": 19240 }, { "epoch": 0.8708305046390586, "grad_norm": 0.6573071928254116, "learning_rate": 4.3149796837048677e-07, "loss": 0.289, "step": 19241 }, { "epoch": 0.8708757637474541, "grad_norm": 0.6535222992402957, "learning_rate": 4.3120016682076324e-07, "loss": 0.306, "step": 19242 }, { "epoch": 0.8709210228558497, "grad_norm": 0.6144457344128956, "learning_rate": 4.309024634405146e-07, "loss": 0.2703, "step": 19243 }, { "epoch": 0.8709662819642453, "grad_norm": 0.5685603172224465, "learning_rate": 4.306048582361394e-07, "loss": 0.2945, "step": 19244 }, { "epoch": 0.8710115410726409, "grad_norm": 0.2646618931089399, "learning_rate": 4.3030735121403376e-07, "loss": 0.4831, "step": 19245 }, { "epoch": 0.8710568001810365, "grad_norm": 0.2749930163517183, "learning_rate": 4.300099423805865e-07, "loss": 0.4574, "step": 19246 }, { "epoch": 0.871102059289432, "grad_norm": 0.27635098523908386, "learning_rate": 4.2971263174219014e-07, "loss": 0.4804, "step": 19247 }, { "epoch": 0.8711473183978276, "grad_norm": 0.5877024693849136, "learning_rate": 4.2941541930523356e-07, "loss": 0.2964, "step": 19248 }, { "epoch": 0.8711925775062231, "grad_norm": 0.6122661833595895, "learning_rate": 4.291183050761022e-07, "loss": 0.2738, "step": 19249 }, { "epoch": 0.8712378366146187, "grad_norm": 0.6142665018253046, "learning_rate": 4.288212890611787e-07, "loss": 0.2818, "step": 19250 }, { "epoch": 0.8712830957230142, "grad_norm": 0.6032876851784271, "learning_rate": 4.28524371266848e-07, "loss": 0.3234, "step": 19251 }, { "epoch": 0.8713283548314098, "grad_norm": 0.6290539406396431, "learning_rate": 4.2822755169948714e-07, "loss": 0.3027, "step": 19252 }, { "epoch": 0.8713736139398054, "grad_norm": 0.5551021014013245, "learning_rate": 4.2793083036547554e-07, "loss": 0.2438, "step": 19253 }, { "epoch": 0.871418873048201, "grad_norm": 0.7048705729104559, "learning_rate": 4.276342072711881e-07, "loss": 0.2844, "step": 19254 }, { "epoch": 0.8714641321565965, "grad_norm": 0.28930790958547165, "learning_rate": 4.273376824229991e-07, "loss": 0.4484, "step": 19255 }, { "epoch": 0.871509391264992, "grad_norm": 0.615694385686459, "learning_rate": 4.270412558272785e-07, "loss": 0.3384, "step": 19256 }, { "epoch": 0.8715546503733876, "grad_norm": 0.2746311252453542, "learning_rate": 4.267449274903979e-07, "loss": 0.5046, "step": 19257 }, { "epoch": 0.8715999094817832, "grad_norm": 0.25477527336527783, "learning_rate": 4.2644869741872263e-07, "loss": 0.4676, "step": 19258 }, { "epoch": 0.8716451685901788, "grad_norm": 0.6237616386439251, "learning_rate": 4.2615256561861773e-07, "loss": 0.3168, "step": 19259 }, { "epoch": 0.8716904276985743, "grad_norm": 0.5799414767663191, "learning_rate": 4.258565320964464e-07, "loss": 0.2646, "step": 19260 }, { "epoch": 0.8717356868069699, "grad_norm": 0.5581433964083954, "learning_rate": 4.2556059685857133e-07, "loss": 0.2521, "step": 19261 }, { "epoch": 0.8717809459153655, "grad_norm": 0.647202387736622, "learning_rate": 4.252647599113491e-07, "loss": 0.2704, "step": 19262 }, { "epoch": 0.8718262050237611, "grad_norm": 0.608873076960802, "learning_rate": 4.2496902126113626e-07, "loss": 0.3396, "step": 19263 }, { "epoch": 0.8718714641321565, "grad_norm": 0.689268710860395, "learning_rate": 4.246733809142889e-07, "loss": 0.3353, "step": 19264 }, { "epoch": 0.8719167232405521, "grad_norm": 0.6566491745917127, "learning_rate": 4.2437783887715745e-07, "loss": 0.282, "step": 19265 }, { "epoch": 0.8719619823489477, "grad_norm": 0.28732687237202653, "learning_rate": 4.2408239515609407e-07, "loss": 0.4821, "step": 19266 }, { "epoch": 0.8720072414573433, "grad_norm": 0.6146920890965886, "learning_rate": 4.2378704975744646e-07, "loss": 0.2778, "step": 19267 }, { "epoch": 0.8720525005657389, "grad_norm": 0.6326512945876744, "learning_rate": 4.2349180268755953e-07, "loss": 0.2898, "step": 19268 }, { "epoch": 0.8720977596741344, "grad_norm": 0.6194643099384461, "learning_rate": 4.231966539527782e-07, "loss": 0.3457, "step": 19269 }, { "epoch": 0.87214301878253, "grad_norm": 0.5949243243637904, "learning_rate": 4.2290160355944467e-07, "loss": 0.2819, "step": 19270 }, { "epoch": 0.8721882778909256, "grad_norm": 0.6479036250261243, "learning_rate": 4.2260665151389825e-07, "loss": 0.2972, "step": 19271 }, { "epoch": 0.8722335369993212, "grad_norm": 0.6817991465424627, "learning_rate": 4.223117978224761e-07, "loss": 0.2769, "step": 19272 }, { "epoch": 0.8722787961077166, "grad_norm": 0.6523627812235554, "learning_rate": 4.2201704249151377e-07, "loss": 0.339, "step": 19273 }, { "epoch": 0.8723240552161122, "grad_norm": 0.630393557338104, "learning_rate": 4.217223855273467e-07, "loss": 0.3178, "step": 19274 }, { "epoch": 0.8723693143245078, "grad_norm": 0.6068565329435825, "learning_rate": 4.214278269363026e-07, "loss": 0.3071, "step": 19275 }, { "epoch": 0.8724145734329034, "grad_norm": 0.26120053279786065, "learning_rate": 4.211333667247125e-07, "loss": 0.4875, "step": 19276 }, { "epoch": 0.8724598325412989, "grad_norm": 0.6359553199205671, "learning_rate": 4.208390048989047e-07, "loss": 0.3256, "step": 19277 }, { "epoch": 0.8725050916496945, "grad_norm": 0.28486232174258747, "learning_rate": 4.2054474146520254e-07, "loss": 0.449, "step": 19278 }, { "epoch": 0.8725503507580901, "grad_norm": 0.5948076828239628, "learning_rate": 4.202505764299286e-07, "loss": 0.3179, "step": 19279 }, { "epoch": 0.8725956098664857, "grad_norm": 0.683832652398244, "learning_rate": 4.199565097994046e-07, "loss": 0.2939, "step": 19280 }, { "epoch": 0.8726408689748812, "grad_norm": 0.29879773745404237, "learning_rate": 4.1966254157994826e-07, "loss": 0.4816, "step": 19281 }, { "epoch": 0.8726861280832767, "grad_norm": 0.27659396071200854, "learning_rate": 4.1936867177787723e-07, "loss": 0.4963, "step": 19282 }, { "epoch": 0.8727313871916723, "grad_norm": 0.6063704551071534, "learning_rate": 4.190749003995037e-07, "loss": 0.3116, "step": 19283 }, { "epoch": 0.8727766463000679, "grad_norm": 0.6478424532617484, "learning_rate": 4.187812274511427e-07, "loss": 0.2813, "step": 19284 }, { "epoch": 0.8728219054084635, "grad_norm": 0.25002317875374175, "learning_rate": 4.1848765293910187e-07, "loss": 0.4547, "step": 19285 }, { "epoch": 0.872867164516859, "grad_norm": 0.8394752438160599, "learning_rate": 4.181941768696912e-07, "loss": 0.3154, "step": 19286 }, { "epoch": 0.8729124236252546, "grad_norm": 0.3167525239809427, "learning_rate": 4.1790079924921625e-07, "loss": 0.4757, "step": 19287 }, { "epoch": 0.8729576827336502, "grad_norm": 0.6515488862914801, "learning_rate": 4.176075200839791e-07, "loss": 0.2937, "step": 19288 }, { "epoch": 0.8730029418420457, "grad_norm": 0.6422687423007235, "learning_rate": 4.173143393802825e-07, "loss": 0.3317, "step": 19289 }, { "epoch": 0.8730482009504412, "grad_norm": 0.6589076061519356, "learning_rate": 4.170212571444271e-07, "loss": 0.2833, "step": 19290 }, { "epoch": 0.8730934600588368, "grad_norm": 0.26402214706993543, "learning_rate": 4.1672827338270884e-07, "loss": 0.4838, "step": 19291 }, { "epoch": 0.8731387191672324, "grad_norm": 0.5423897183305791, "learning_rate": 4.1643538810142324e-07, "loss": 0.2827, "step": 19292 }, { "epoch": 0.873183978275628, "grad_norm": 0.693746363923824, "learning_rate": 4.1614260130686424e-07, "loss": 0.29, "step": 19293 }, { "epoch": 0.8732292373840236, "grad_norm": 0.6786270636978193, "learning_rate": 4.158499130053223e-07, "loss": 0.2907, "step": 19294 }, { "epoch": 0.8732744964924191, "grad_norm": 0.5433855176325215, "learning_rate": 4.155573232030868e-07, "loss": 0.2811, "step": 19295 }, { "epoch": 0.8733197556008147, "grad_norm": 0.6064155161405602, "learning_rate": 4.152648319064445e-07, "loss": 0.2744, "step": 19296 }, { "epoch": 0.8733650147092102, "grad_norm": 0.4722005543780067, "learning_rate": 4.1497243912167975e-07, "loss": 0.4553, "step": 19297 }, { "epoch": 0.8734102738176058, "grad_norm": 0.6905482528007223, "learning_rate": 4.146801448550747e-07, "loss": 0.2969, "step": 19298 }, { "epoch": 0.8734555329260013, "grad_norm": 0.6159339429816463, "learning_rate": 4.143879491129116e-07, "loss": 0.2956, "step": 19299 }, { "epoch": 0.8735007920343969, "grad_norm": 0.5837472702885921, "learning_rate": 4.140958519014682e-07, "loss": 0.2665, "step": 19300 }, { "epoch": 0.8735460511427925, "grad_norm": 0.26396583160348003, "learning_rate": 4.1380385322701945e-07, "loss": 0.443, "step": 19301 }, { "epoch": 0.8735913102511881, "grad_norm": 0.6096296037623206, "learning_rate": 4.1351195309584034e-07, "loss": 0.2684, "step": 19302 }, { "epoch": 0.8736365693595837, "grad_norm": 0.5867769778710267, "learning_rate": 4.132201515142037e-07, "loss": 0.2921, "step": 19303 }, { "epoch": 0.8736818284679791, "grad_norm": 0.7311603956808999, "learning_rate": 4.129284484883789e-07, "loss": 0.2702, "step": 19304 }, { "epoch": 0.8737270875763747, "grad_norm": 0.5948717141358987, "learning_rate": 4.126368440246331e-07, "loss": 0.3367, "step": 19305 }, { "epoch": 0.8737723466847703, "grad_norm": 0.572718298861418, "learning_rate": 4.1234533812923307e-07, "loss": 0.2991, "step": 19306 }, { "epoch": 0.8738176057931659, "grad_norm": 0.5464262209786708, "learning_rate": 4.120539308084409e-07, "loss": 0.2663, "step": 19307 }, { "epoch": 0.8738628649015614, "grad_norm": 0.6203983342770546, "learning_rate": 4.1176262206852e-07, "loss": 0.2939, "step": 19308 }, { "epoch": 0.873908124009957, "grad_norm": 0.6275878683163069, "learning_rate": 4.114714119157287e-07, "loss": 0.2894, "step": 19309 }, { "epoch": 0.8739533831183526, "grad_norm": 0.2656071642941677, "learning_rate": 4.111803003563231e-07, "loss": 0.4439, "step": 19310 }, { "epoch": 0.8739986422267482, "grad_norm": 0.6453478987417932, "learning_rate": 4.108892873965603e-07, "loss": 0.2719, "step": 19311 }, { "epoch": 0.8740439013351436, "grad_norm": 0.6799707695236196, "learning_rate": 4.105983730426916e-07, "loss": 0.3123, "step": 19312 }, { "epoch": 0.8740891604435392, "grad_norm": 0.5816988735360314, "learning_rate": 4.103075573009691e-07, "loss": 0.2624, "step": 19313 }, { "epoch": 0.8741344195519348, "grad_norm": 0.6331262659881548, "learning_rate": 4.1001684017764053e-07, "loss": 0.2969, "step": 19314 }, { "epoch": 0.8741796786603304, "grad_norm": 0.7024568024365313, "learning_rate": 4.097262216789538e-07, "loss": 0.2979, "step": 19315 }, { "epoch": 0.874224937768726, "grad_norm": 0.6067067548843695, "learning_rate": 4.0943570181115275e-07, "loss": 0.2764, "step": 19316 }, { "epoch": 0.8742701968771215, "grad_norm": 0.608306134835602, "learning_rate": 4.091452805804785e-07, "loss": 0.3062, "step": 19317 }, { "epoch": 0.8743154559855171, "grad_norm": 0.2563566022423515, "learning_rate": 4.088549579931722e-07, "loss": 0.4438, "step": 19318 }, { "epoch": 0.8743607150939127, "grad_norm": 0.6088891635681201, "learning_rate": 4.085647340554738e-07, "loss": 0.2903, "step": 19319 }, { "epoch": 0.8744059742023083, "grad_norm": 0.6325971987131987, "learning_rate": 4.0827460877361724e-07, "loss": 0.2875, "step": 19320 }, { "epoch": 0.8744512333107037, "grad_norm": 0.5723939317045896, "learning_rate": 4.079845821538364e-07, "loss": 0.2751, "step": 19321 }, { "epoch": 0.8744964924190993, "grad_norm": 0.7989220925329679, "learning_rate": 4.0769465420236407e-07, "loss": 0.281, "step": 19322 }, { "epoch": 0.8745417515274949, "grad_norm": 0.5519022007177745, "learning_rate": 4.0740482492542864e-07, "loss": 0.3168, "step": 19323 }, { "epoch": 0.8745870106358905, "grad_norm": 0.27196518760632776, "learning_rate": 4.0711509432925955e-07, "loss": 0.4728, "step": 19324 }, { "epoch": 0.874632269744286, "grad_norm": 0.5852629861057203, "learning_rate": 4.0682546242008017e-07, "loss": 0.2973, "step": 19325 }, { "epoch": 0.8746775288526816, "grad_norm": 0.8416817462412677, "learning_rate": 4.0653592920411545e-07, "loss": 0.3121, "step": 19326 }, { "epoch": 0.8747227879610772, "grad_norm": 0.656747312664601, "learning_rate": 4.0624649468758494e-07, "loss": 0.2591, "step": 19327 }, { "epoch": 0.8747680470694728, "grad_norm": 0.2852204573784242, "learning_rate": 4.0595715887670973e-07, "loss": 0.4515, "step": 19328 }, { "epoch": 0.8748133061778683, "grad_norm": 0.5623803990170919, "learning_rate": 4.056679217777054e-07, "loss": 0.2606, "step": 19329 }, { "epoch": 0.8748585652862638, "grad_norm": 0.5641726428377336, "learning_rate": 4.0537878339678647e-07, "loss": 0.2584, "step": 19330 }, { "epoch": 0.8749038243946594, "grad_norm": 0.6174679946555726, "learning_rate": 4.050897437401657e-07, "loss": 0.2992, "step": 19331 }, { "epoch": 0.874949083503055, "grad_norm": 0.2592118664339325, "learning_rate": 4.0480080281405544e-07, "loss": 0.4789, "step": 19332 }, { "epoch": 0.8749943426114506, "grad_norm": 0.6222844653809816, "learning_rate": 4.045119606246628e-07, "loss": 0.2708, "step": 19333 }, { "epoch": 0.8750396017198461, "grad_norm": 0.561262388965449, "learning_rate": 4.0422321717819347e-07, "loss": 0.2892, "step": 19334 }, { "epoch": 0.8750848608282417, "grad_norm": 0.6080040410481603, "learning_rate": 4.03934572480853e-07, "loss": 0.277, "step": 19335 }, { "epoch": 0.8751301199366373, "grad_norm": 0.6680865928353568, "learning_rate": 4.03646026538842e-07, "loss": 0.2712, "step": 19336 }, { "epoch": 0.8751753790450328, "grad_norm": 0.2658129002143189, "learning_rate": 4.0335757935836216e-07, "loss": 0.4717, "step": 19337 }, { "epoch": 0.8752206381534284, "grad_norm": 0.6268641775392694, "learning_rate": 4.0306923094561025e-07, "loss": 0.2591, "step": 19338 }, { "epoch": 0.8752658972618239, "grad_norm": 0.6131332345601052, "learning_rate": 4.027809813067812e-07, "loss": 0.3132, "step": 19339 }, { "epoch": 0.8753111563702195, "grad_norm": 0.6250152587687039, "learning_rate": 4.024928304480696e-07, "loss": 0.2804, "step": 19340 }, { "epoch": 0.8753564154786151, "grad_norm": 0.6700185047153728, "learning_rate": 4.022047783756683e-07, "loss": 0.2782, "step": 19341 }, { "epoch": 0.8754016745870107, "grad_norm": 0.5891930228963966, "learning_rate": 4.0191682509576503e-07, "loss": 0.2948, "step": 19342 }, { "epoch": 0.8754469336954062, "grad_norm": 0.27279569033472617, "learning_rate": 4.0162897061454596e-07, "loss": 0.4634, "step": 19343 }, { "epoch": 0.8754921928038017, "grad_norm": 0.6391335136643886, "learning_rate": 4.0134121493819897e-07, "loss": 0.3157, "step": 19344 }, { "epoch": 0.8755374519121973, "grad_norm": 0.6616544774477524, "learning_rate": 4.0105355807290523e-07, "loss": 0.3173, "step": 19345 }, { "epoch": 0.8755827110205929, "grad_norm": 0.2702210661503924, "learning_rate": 4.0076600002484533e-07, "loss": 0.4681, "step": 19346 }, { "epoch": 0.8756279701289884, "grad_norm": 0.285967670957762, "learning_rate": 4.004785408001982e-07, "loss": 0.475, "step": 19347 }, { "epoch": 0.875673229237384, "grad_norm": 0.6405104658919056, "learning_rate": 4.001911804051417e-07, "loss": 0.3145, "step": 19348 }, { "epoch": 0.8757184883457796, "grad_norm": 0.6727554754483442, "learning_rate": 3.999039188458498e-07, "loss": 0.3267, "step": 19349 }, { "epoch": 0.8757637474541752, "grad_norm": 0.6028274037616632, "learning_rate": 3.996167561284936e-07, "loss": 0.258, "step": 19350 }, { "epoch": 0.8758090065625708, "grad_norm": 0.5946123279302, "learning_rate": 3.9932969225924546e-07, "loss": 0.2794, "step": 19351 }, { "epoch": 0.8758542656709662, "grad_norm": 0.5871803342675288, "learning_rate": 3.990427272442715e-07, "loss": 0.2662, "step": 19352 }, { "epoch": 0.8758995247793618, "grad_norm": 0.6323833531414866, "learning_rate": 3.987558610897391e-07, "loss": 0.2839, "step": 19353 }, { "epoch": 0.8759447838877574, "grad_norm": 0.5265504978894515, "learning_rate": 3.9846909380181096e-07, "loss": 0.2938, "step": 19354 }, { "epoch": 0.875990042996153, "grad_norm": 0.6319468212936, "learning_rate": 3.981824253866501e-07, "loss": 0.3392, "step": 19355 }, { "epoch": 0.8760353021045485, "grad_norm": 0.6347599472287441, "learning_rate": 3.978958558504148e-07, "loss": 0.2472, "step": 19356 }, { "epoch": 0.8760805612129441, "grad_norm": 0.29546727970639075, "learning_rate": 3.9760938519926404e-07, "loss": 0.4936, "step": 19357 }, { "epoch": 0.8761258203213397, "grad_norm": 0.6320587601466439, "learning_rate": 3.9732301343935243e-07, "loss": 0.2786, "step": 19358 }, { "epoch": 0.8761710794297353, "grad_norm": 0.63209996992386, "learning_rate": 3.970367405768322e-07, "loss": 0.2796, "step": 19359 }, { "epoch": 0.8762163385381307, "grad_norm": 0.26120033002662857, "learning_rate": 3.9675056661785563e-07, "loss": 0.4559, "step": 19360 }, { "epoch": 0.8762615976465263, "grad_norm": 0.5661813233089844, "learning_rate": 3.964644915685728e-07, "loss": 0.2847, "step": 19361 }, { "epoch": 0.8763068567549219, "grad_norm": 0.6678577903977209, "learning_rate": 3.961785154351289e-07, "loss": 0.3123, "step": 19362 }, { "epoch": 0.8763521158633175, "grad_norm": 0.6577484082745595, "learning_rate": 3.9589263822366886e-07, "loss": 0.3141, "step": 19363 }, { "epoch": 0.8763973749717131, "grad_norm": 0.6469438211303374, "learning_rate": 3.9560685994033566e-07, "loss": 0.2685, "step": 19364 }, { "epoch": 0.8764426340801086, "grad_norm": 0.6733623265000933, "learning_rate": 3.9532118059126935e-07, "loss": 0.2983, "step": 19365 }, { "epoch": 0.8764878931885042, "grad_norm": 0.2691216368567256, "learning_rate": 3.9503560018260945e-07, "loss": 0.4546, "step": 19366 }, { "epoch": 0.8765331522968998, "grad_norm": 0.631367657505299, "learning_rate": 3.9475011872049164e-07, "loss": 0.285, "step": 19367 }, { "epoch": 0.8765784114052954, "grad_norm": 0.28257712047841743, "learning_rate": 3.9446473621104877e-07, "loss": 0.4689, "step": 19368 }, { "epoch": 0.8766236705136908, "grad_norm": 0.6845795804766492, "learning_rate": 3.9417945266041367e-07, "loss": 0.3236, "step": 19369 }, { "epoch": 0.8766689296220864, "grad_norm": 0.2629276730359694, "learning_rate": 3.9389426807471764e-07, "loss": 0.4324, "step": 19370 }, { "epoch": 0.876714188730482, "grad_norm": 0.6353848202383956, "learning_rate": 3.9360918246008684e-07, "loss": 0.2742, "step": 19371 }, { "epoch": 0.8767594478388776, "grad_norm": 0.5924153676161918, "learning_rate": 3.933241958226469e-07, "loss": 0.321, "step": 19372 }, { "epoch": 0.8768047069472732, "grad_norm": 0.7712837707043331, "learning_rate": 3.930393081685213e-07, "loss": 0.2434, "step": 19373 }, { "epoch": 0.8768499660556687, "grad_norm": 0.2724202673210525, "learning_rate": 3.9275451950383346e-07, "loss": 0.4528, "step": 19374 }, { "epoch": 0.8768952251640643, "grad_norm": 0.6339317784538435, "learning_rate": 3.924698298346996e-07, "loss": 0.3075, "step": 19375 }, { "epoch": 0.8769404842724599, "grad_norm": 0.6102452598889615, "learning_rate": 3.9218523916723814e-07, "loss": 0.3037, "step": 19376 }, { "epoch": 0.8769857433808554, "grad_norm": 0.24234823449444134, "learning_rate": 3.9190074750756424e-07, "loss": 0.4438, "step": 19377 }, { "epoch": 0.8770310024892509, "grad_norm": 0.7741991460040867, "learning_rate": 3.916163548617913e-07, "loss": 0.3213, "step": 19378 }, { "epoch": 0.8770762615976465, "grad_norm": 0.5343018843761397, "learning_rate": 3.913320612360283e-07, "loss": 0.2576, "step": 19379 }, { "epoch": 0.8771215207060421, "grad_norm": 0.6245680766490284, "learning_rate": 3.9104786663638537e-07, "loss": 0.2738, "step": 19380 }, { "epoch": 0.8771667798144377, "grad_norm": 0.6527068583160411, "learning_rate": 3.9076377106896765e-07, "loss": 0.3479, "step": 19381 }, { "epoch": 0.8772120389228332, "grad_norm": 0.5873223696868036, "learning_rate": 3.904797745398814e-07, "loss": 0.3232, "step": 19382 }, { "epoch": 0.8772572980312288, "grad_norm": 0.8239279730871042, "learning_rate": 3.901958770552272e-07, "loss": 0.3038, "step": 19383 }, { "epoch": 0.8773025571396244, "grad_norm": 0.2989201442753827, "learning_rate": 3.899120786211058e-07, "loss": 0.4633, "step": 19384 }, { "epoch": 0.8773478162480199, "grad_norm": 0.588468140386913, "learning_rate": 3.8962837924361454e-07, "loss": 0.3101, "step": 19385 }, { "epoch": 0.8773930753564155, "grad_norm": 0.6190450837806823, "learning_rate": 3.893447789288507e-07, "loss": 0.3124, "step": 19386 }, { "epoch": 0.877438334464811, "grad_norm": 0.5657772448108447, "learning_rate": 3.890612776829067e-07, "loss": 0.3072, "step": 19387 }, { "epoch": 0.8774835935732066, "grad_norm": 0.7836726961639346, "learning_rate": 3.887778755118743e-07, "loss": 0.2591, "step": 19388 }, { "epoch": 0.8775288526816022, "grad_norm": 0.6392129475718579, "learning_rate": 3.884945724218425e-07, "loss": 0.3644, "step": 19389 }, { "epoch": 0.8775741117899978, "grad_norm": 0.7234741241049042, "learning_rate": 3.882113684188998e-07, "loss": 0.2711, "step": 19390 }, { "epoch": 0.8776193708983933, "grad_norm": 0.6200475117502785, "learning_rate": 3.879282635091308e-07, "loss": 0.3199, "step": 19391 }, { "epoch": 0.8776646300067888, "grad_norm": 0.6234293255662298, "learning_rate": 3.876452576986184e-07, "loss": 0.3023, "step": 19392 }, { "epoch": 0.8777098891151844, "grad_norm": 0.701509162656271, "learning_rate": 3.8736235099344375e-07, "loss": 0.2745, "step": 19393 }, { "epoch": 0.87775514822358, "grad_norm": 0.6403016579755417, "learning_rate": 3.870795433996849e-07, "loss": 0.2669, "step": 19394 }, { "epoch": 0.8778004073319755, "grad_norm": 0.6178465099533741, "learning_rate": 3.8679683492342023e-07, "loss": 0.3066, "step": 19395 }, { "epoch": 0.8778456664403711, "grad_norm": 0.5918812381592131, "learning_rate": 3.865142255707222e-07, "loss": 0.3097, "step": 19396 }, { "epoch": 0.8778909255487667, "grad_norm": 0.6154359170578279, "learning_rate": 3.862317153476647e-07, "loss": 0.2848, "step": 19397 }, { "epoch": 0.8779361846571623, "grad_norm": 0.60710922438913, "learning_rate": 3.859493042603174e-07, "loss": 0.3003, "step": 19398 }, { "epoch": 0.8779814437655579, "grad_norm": 0.6197900213750153, "learning_rate": 3.856669923147488e-07, "loss": 0.2623, "step": 19399 }, { "epoch": 0.8780267028739533, "grad_norm": 0.6219230728627466, "learning_rate": 3.8538477951702515e-07, "loss": 0.2578, "step": 19400 }, { "epoch": 0.8780719619823489, "grad_norm": 0.6047950271722398, "learning_rate": 3.8510266587320876e-07, "loss": 0.2988, "step": 19401 }, { "epoch": 0.8781172210907445, "grad_norm": 0.6586513286928564, "learning_rate": 3.8482065138936263e-07, "loss": 0.3019, "step": 19402 }, { "epoch": 0.8781624801991401, "grad_norm": 0.27443656822880147, "learning_rate": 3.84538736071548e-07, "loss": 0.4491, "step": 19403 }, { "epoch": 0.8782077393075356, "grad_norm": 0.6154796542159318, "learning_rate": 3.8425691992581836e-07, "loss": 0.2536, "step": 19404 }, { "epoch": 0.8782529984159312, "grad_norm": 0.6173411982057696, "learning_rate": 3.839752029582322e-07, "loss": 0.2785, "step": 19405 }, { "epoch": 0.8782982575243268, "grad_norm": 0.6446679072516598, "learning_rate": 3.836935851748419e-07, "loss": 0.2677, "step": 19406 }, { "epoch": 0.8783435166327224, "grad_norm": 0.6188887158583402, "learning_rate": 3.834120665816993e-07, "loss": 0.3072, "step": 19407 }, { "epoch": 0.878388775741118, "grad_norm": 0.6223118704251513, "learning_rate": 3.8313064718485116e-07, "loss": 0.2933, "step": 19408 }, { "epoch": 0.8784340348495134, "grad_norm": 0.6092096403353362, "learning_rate": 3.8284932699034717e-07, "loss": 0.2547, "step": 19409 }, { "epoch": 0.878479293957909, "grad_norm": 0.2827938848992071, "learning_rate": 3.825681060042297e-07, "loss": 0.4956, "step": 19410 }, { "epoch": 0.8785245530663046, "grad_norm": 0.6332323053744587, "learning_rate": 3.822869842325427e-07, "loss": 0.3117, "step": 19411 }, { "epoch": 0.8785698121747002, "grad_norm": 0.5821023599323277, "learning_rate": 3.8200596168132596e-07, "loss": 0.3028, "step": 19412 }, { "epoch": 0.8786150712830957, "grad_norm": 0.6325107408918255, "learning_rate": 3.8172503835661846e-07, "loss": 0.2867, "step": 19413 }, { "epoch": 0.8786603303914913, "grad_norm": 0.2831159664124358, "learning_rate": 3.814442142644548e-07, "loss": 0.4556, "step": 19414 }, { "epoch": 0.8787055894998869, "grad_norm": 0.5849533493887362, "learning_rate": 3.8116348941087176e-07, "loss": 0.2689, "step": 19415 }, { "epoch": 0.8787508486082825, "grad_norm": 0.6519080783139413, "learning_rate": 3.808828638018991e-07, "loss": 0.2817, "step": 19416 }, { "epoch": 0.8787961077166779, "grad_norm": 0.5594713028214605, "learning_rate": 3.8060233744356634e-07, "loss": 0.2636, "step": 19417 }, { "epoch": 0.8788413668250735, "grad_norm": 0.5705113920596403, "learning_rate": 3.8032191034190204e-07, "loss": 0.2823, "step": 19418 }, { "epoch": 0.8788866259334691, "grad_norm": 0.6227759180273239, "learning_rate": 3.8004158250293246e-07, "loss": 0.2877, "step": 19419 }, { "epoch": 0.8789318850418647, "grad_norm": 0.6259684011915944, "learning_rate": 3.7976135393268057e-07, "loss": 0.2699, "step": 19420 }, { "epoch": 0.8789771441502603, "grad_norm": 0.6387988070703673, "learning_rate": 3.79481224637166e-07, "loss": 0.2828, "step": 19421 }, { "epoch": 0.8790224032586558, "grad_norm": 0.6254957096979336, "learning_rate": 3.7920119462241e-07, "loss": 0.3304, "step": 19422 }, { "epoch": 0.8790676623670514, "grad_norm": 0.5490326198817921, "learning_rate": 3.789212638944273e-07, "loss": 0.2539, "step": 19423 }, { "epoch": 0.879112921475447, "grad_norm": 0.25844367227032256, "learning_rate": 3.786414324592358e-07, "loss": 0.4545, "step": 19424 }, { "epoch": 0.8791581805838425, "grad_norm": 0.707449751803239, "learning_rate": 3.7836170032284516e-07, "loss": 0.276, "step": 19425 }, { "epoch": 0.879203439692238, "grad_norm": 0.7146855031244232, "learning_rate": 3.7808206749126777e-07, "loss": 0.2782, "step": 19426 }, { "epoch": 0.8792486988006336, "grad_norm": 0.681521661479075, "learning_rate": 3.778025339705116e-07, "loss": 0.2449, "step": 19427 }, { "epoch": 0.8792939579090292, "grad_norm": 0.624553488593751, "learning_rate": 3.7752309976658295e-07, "loss": 0.2698, "step": 19428 }, { "epoch": 0.8793392170174248, "grad_norm": 0.5821597816164235, "learning_rate": 3.7724376488548655e-07, "loss": 0.2938, "step": 19429 }, { "epoch": 0.8793844761258203, "grad_norm": 0.25441806517758003, "learning_rate": 3.7696452933322305e-07, "loss": 0.4679, "step": 19430 }, { "epoch": 0.8794297352342159, "grad_norm": 0.24762157896644646, "learning_rate": 3.766853931157932e-07, "loss": 0.436, "step": 19431 }, { "epoch": 0.8794749943426114, "grad_norm": 0.5959314807385596, "learning_rate": 3.7640635623919674e-07, "loss": 0.317, "step": 19432 }, { "epoch": 0.879520253451007, "grad_norm": 0.6562698471158812, "learning_rate": 3.761274187094255e-07, "loss": 0.3062, "step": 19433 }, { "epoch": 0.8795655125594026, "grad_norm": 0.27390885110652485, "learning_rate": 3.758485805324746e-07, "loss": 0.4677, "step": 19434 }, { "epoch": 0.8796107716677981, "grad_norm": 0.692513863185441, "learning_rate": 3.7556984171433663e-07, "loss": 0.2797, "step": 19435 }, { "epoch": 0.8796560307761937, "grad_norm": 0.6152037869565279, "learning_rate": 3.752912022610006e-07, "loss": 0.2814, "step": 19436 }, { "epoch": 0.8797012898845893, "grad_norm": 0.24162605717765784, "learning_rate": 3.750126621784511e-07, "loss": 0.4398, "step": 19437 }, { "epoch": 0.8797465489929849, "grad_norm": 0.6122811807147558, "learning_rate": 3.7473422147267623e-07, "loss": 0.285, "step": 19438 }, { "epoch": 0.8797918081013804, "grad_norm": 0.5819518988029752, "learning_rate": 3.744558801496567e-07, "loss": 0.2805, "step": 19439 }, { "epoch": 0.879837067209776, "grad_norm": 0.8562388874106288, "learning_rate": 3.74177638215375e-07, "loss": 0.2991, "step": 19440 }, { "epoch": 0.8798823263181715, "grad_norm": 0.6023165883846132, "learning_rate": 3.73899495675808e-07, "loss": 0.2922, "step": 19441 }, { "epoch": 0.8799275854265671, "grad_norm": 0.6116074594476207, "learning_rate": 3.736214525369336e-07, "loss": 0.3151, "step": 19442 }, { "epoch": 0.8799728445349626, "grad_norm": 0.3015748179436036, "learning_rate": 3.7334350880472434e-07, "loss": 0.4811, "step": 19443 }, { "epoch": 0.8800181036433582, "grad_norm": 0.5836461033173239, "learning_rate": 3.730656644851538e-07, "loss": 0.3192, "step": 19444 }, { "epoch": 0.8800633627517538, "grad_norm": 1.1388168216379084, "learning_rate": 3.727879195841921e-07, "loss": 0.2662, "step": 19445 }, { "epoch": 0.8801086218601494, "grad_norm": 0.6501681088951335, "learning_rate": 3.7251027410780573e-07, "loss": 0.3012, "step": 19446 }, { "epoch": 0.880153880968545, "grad_norm": 0.6716599059645094, "learning_rate": 3.722327280619614e-07, "loss": 0.3035, "step": 19447 }, { "epoch": 0.8801991400769404, "grad_norm": 0.6544174561508919, "learning_rate": 3.7195528145262337e-07, "loss": 0.3487, "step": 19448 }, { "epoch": 0.880244399185336, "grad_norm": 0.6577095681684545, "learning_rate": 3.7167793428575236e-07, "loss": 0.2736, "step": 19449 }, { "epoch": 0.8802896582937316, "grad_norm": 0.6075001612961495, "learning_rate": 3.71400686567307e-07, "loss": 0.2975, "step": 19450 }, { "epoch": 0.8803349174021272, "grad_norm": 0.6407300018974064, "learning_rate": 3.7112353830324576e-07, "loss": 0.2914, "step": 19451 }, { "epoch": 0.8803801765105227, "grad_norm": 0.6312364077947341, "learning_rate": 3.7084648949952284e-07, "loss": 0.2878, "step": 19452 }, { "epoch": 0.8804254356189183, "grad_norm": 0.7431039517859039, "learning_rate": 3.705695401620918e-07, "loss": 0.3099, "step": 19453 }, { "epoch": 0.8804706947273139, "grad_norm": 0.5882110778724492, "learning_rate": 3.7029269029690287e-07, "loss": 0.2982, "step": 19454 }, { "epoch": 0.8805159538357095, "grad_norm": 0.8440818057248546, "learning_rate": 3.700159399099057e-07, "loss": 0.2796, "step": 19455 }, { "epoch": 0.880561212944105, "grad_norm": 0.2834390846480046, "learning_rate": 3.6973928900704503e-07, "loss": 0.4566, "step": 19456 }, { "epoch": 0.8806064720525005, "grad_norm": 0.5899447925861252, "learning_rate": 3.6946273759426667e-07, "loss": 0.3159, "step": 19457 }, { "epoch": 0.8806517311608961, "grad_norm": 0.5717095051810808, "learning_rate": 3.69186285677513e-07, "loss": 0.2919, "step": 19458 }, { "epoch": 0.8806969902692917, "grad_norm": 0.5766207199446806, "learning_rate": 3.6890993326272273e-07, "loss": 0.3135, "step": 19459 }, { "epoch": 0.8807422493776873, "grad_norm": 0.6403324284212105, "learning_rate": 3.6863368035583494e-07, "loss": 0.2814, "step": 19460 }, { "epoch": 0.8807875084860828, "grad_norm": 0.5780060358993144, "learning_rate": 3.683575269627865e-07, "loss": 0.3103, "step": 19461 }, { "epoch": 0.8808327675944784, "grad_norm": 0.5773243391426244, "learning_rate": 3.680814730895077e-07, "loss": 0.2913, "step": 19462 }, { "epoch": 0.880878026702874, "grad_norm": 0.2893917732617025, "learning_rate": 3.6780551874193273e-07, "loss": 0.4877, "step": 19463 }, { "epoch": 0.8809232858112696, "grad_norm": 0.6755635937322024, "learning_rate": 3.675296639259912e-07, "loss": 0.3022, "step": 19464 }, { "epoch": 0.880968544919665, "grad_norm": 1.2261435587924292, "learning_rate": 3.672539086476101e-07, "loss": 0.3154, "step": 19465 }, { "epoch": 0.8810138040280606, "grad_norm": 0.593591631903637, "learning_rate": 3.669782529127125e-07, "loss": 0.2787, "step": 19466 }, { "epoch": 0.8810590631364562, "grad_norm": 0.2859027748282125, "learning_rate": 3.667026967272236e-07, "loss": 0.4651, "step": 19467 }, { "epoch": 0.8811043222448518, "grad_norm": 0.5509629077468778, "learning_rate": 3.6642724009706423e-07, "loss": 0.2995, "step": 19468 }, { "epoch": 0.8811495813532474, "grad_norm": 0.552569680672477, "learning_rate": 3.661518830281524e-07, "loss": 0.3176, "step": 19469 }, { "epoch": 0.8811948404616429, "grad_norm": 0.6266113022409174, "learning_rate": 3.658766255264046e-07, "loss": 0.3136, "step": 19470 }, { "epoch": 0.8812400995700385, "grad_norm": 0.6089083806216865, "learning_rate": 3.65601467597736e-07, "loss": 0.3461, "step": 19471 }, { "epoch": 0.881285358678434, "grad_norm": 0.2774525191944238, "learning_rate": 3.653264092480574e-07, "loss": 0.4639, "step": 19472 }, { "epoch": 0.8813306177868296, "grad_norm": 0.2802605123799578, "learning_rate": 3.650514504832808e-07, "loss": 0.469, "step": 19473 }, { "epoch": 0.8813758768952251, "grad_norm": 0.6079325883555515, "learning_rate": 3.647765913093132e-07, "loss": 0.2696, "step": 19474 }, { "epoch": 0.8814211360036207, "grad_norm": 0.7064070655192801, "learning_rate": 3.6450183173205975e-07, "loss": 0.2796, "step": 19475 }, { "epoch": 0.8814663951120163, "grad_norm": 0.6073929551673423, "learning_rate": 3.6422717175742584e-07, "loss": 0.3106, "step": 19476 }, { "epoch": 0.8815116542204119, "grad_norm": 0.6969037107691501, "learning_rate": 3.639526113913122e-07, "loss": 0.3233, "step": 19477 }, { "epoch": 0.8815569133288074, "grad_norm": 0.6067566723679195, "learning_rate": 3.636781506396192e-07, "loss": 0.2654, "step": 19478 }, { "epoch": 0.881602172437203, "grad_norm": 0.27022736806054953, "learning_rate": 3.634037895082421e-07, "loss": 0.4727, "step": 19479 }, { "epoch": 0.8816474315455985, "grad_norm": 0.2748053658632415, "learning_rate": 3.631295280030783e-07, "loss": 0.4623, "step": 19480 }, { "epoch": 0.8816926906539941, "grad_norm": 0.6392027857838919, "learning_rate": 3.628553661300194e-07, "loss": 0.2831, "step": 19481 }, { "epoch": 0.8817379497623897, "grad_norm": 0.2676240449487449, "learning_rate": 3.6258130389495714e-07, "loss": 0.4477, "step": 19482 }, { "epoch": 0.8817832088707852, "grad_norm": 0.603529385483359, "learning_rate": 3.623073413037792e-07, "loss": 0.2927, "step": 19483 }, { "epoch": 0.8818284679791808, "grad_norm": 0.5873583234591071, "learning_rate": 3.620334783623736e-07, "loss": 0.3097, "step": 19484 }, { "epoch": 0.8818737270875764, "grad_norm": 0.6453616824837317, "learning_rate": 3.6175971507662334e-07, "loss": 0.2814, "step": 19485 }, { "epoch": 0.881918986195972, "grad_norm": 0.7953146155133014, "learning_rate": 3.6148605145241264e-07, "loss": 0.2953, "step": 19486 }, { "epoch": 0.8819642453043675, "grad_norm": 0.6501379276469679, "learning_rate": 3.612124874956202e-07, "loss": 0.3198, "step": 19487 }, { "epoch": 0.882009504412763, "grad_norm": 0.6536634477685312, "learning_rate": 3.6093902321212405e-07, "loss": 0.3009, "step": 19488 }, { "epoch": 0.8820547635211586, "grad_norm": 0.6306391823413456, "learning_rate": 3.606656586078e-07, "loss": 0.2996, "step": 19489 }, { "epoch": 0.8821000226295542, "grad_norm": 0.6143080705714032, "learning_rate": 3.603923936885234e-07, "loss": 0.2883, "step": 19490 }, { "epoch": 0.8821452817379498, "grad_norm": 0.5784749363086075, "learning_rate": 3.6011922846016513e-07, "loss": 0.2809, "step": 19491 }, { "epoch": 0.8821905408463453, "grad_norm": 0.24860289722800014, "learning_rate": 3.598461629285932e-07, "loss": 0.4634, "step": 19492 }, { "epoch": 0.8822357999547409, "grad_norm": 0.25458788983963304, "learning_rate": 3.5957319709967686e-07, "loss": 0.4477, "step": 19493 }, { "epoch": 0.8822810590631365, "grad_norm": 0.5409817762850514, "learning_rate": 3.5930033097928086e-07, "loss": 0.2921, "step": 19494 }, { "epoch": 0.8823263181715321, "grad_norm": 0.82768520924384, "learning_rate": 3.590275645732666e-07, "loss": 0.3658, "step": 19495 }, { "epoch": 0.8823715772799275, "grad_norm": 0.6294611657462783, "learning_rate": 3.5875489788749665e-07, "loss": 0.271, "step": 19496 }, { "epoch": 0.8824168363883231, "grad_norm": 0.6913804587027634, "learning_rate": 3.5848233092783015e-07, "loss": 0.362, "step": 19497 }, { "epoch": 0.8824620954967187, "grad_norm": 0.25888332790044416, "learning_rate": 3.5820986370012303e-07, "loss": 0.473, "step": 19498 }, { "epoch": 0.8825073546051143, "grad_norm": 0.6432899343617813, "learning_rate": 3.579374962102289e-07, "loss": 0.2763, "step": 19499 }, { "epoch": 0.8825526137135098, "grad_norm": 0.6067096968977975, "learning_rate": 3.57665228464002e-07, "loss": 0.2607, "step": 19500 }, { "epoch": 0.8825978728219054, "grad_norm": 0.6861618750869042, "learning_rate": 3.573930604672904e-07, "loss": 0.2651, "step": 19501 }, { "epoch": 0.882643131930301, "grad_norm": 0.2943924522520371, "learning_rate": 3.571209922259439e-07, "loss": 0.4661, "step": 19502 }, { "epoch": 0.8826883910386966, "grad_norm": 0.6177842227396511, "learning_rate": 3.568490237458083e-07, "loss": 0.303, "step": 19503 }, { "epoch": 0.8827336501470922, "grad_norm": 0.2745503493714577, "learning_rate": 3.5657715503272574e-07, "loss": 0.4871, "step": 19504 }, { "epoch": 0.8827789092554876, "grad_norm": 0.5571673918655595, "learning_rate": 3.563053860925392e-07, "loss": 0.302, "step": 19505 }, { "epoch": 0.8828241683638832, "grad_norm": 0.6663631237912184, "learning_rate": 3.5603371693108845e-07, "loss": 0.3064, "step": 19506 }, { "epoch": 0.8828694274722788, "grad_norm": 0.6260927130262418, "learning_rate": 3.5576214755421e-07, "loss": 0.2845, "step": 19507 }, { "epoch": 0.8829146865806744, "grad_norm": 0.5917560520583667, "learning_rate": 3.5549067796773915e-07, "loss": 0.31, "step": 19508 }, { "epoch": 0.8829599456890699, "grad_norm": 1.1379628888985436, "learning_rate": 3.5521930817750963e-07, "loss": 0.2842, "step": 19509 }, { "epoch": 0.8830052047974655, "grad_norm": 0.6216868931141476, "learning_rate": 3.549480381893505e-07, "loss": 0.2603, "step": 19510 }, { "epoch": 0.8830504639058611, "grad_norm": 0.6292864149594591, "learning_rate": 3.546768680090934e-07, "loss": 0.2915, "step": 19511 }, { "epoch": 0.8830957230142567, "grad_norm": 0.5450364036985776, "learning_rate": 3.544057976425619e-07, "loss": 0.2967, "step": 19512 }, { "epoch": 0.8831409821226521, "grad_norm": 0.6244991880782165, "learning_rate": 3.5413482709558353e-07, "loss": 0.297, "step": 19513 }, { "epoch": 0.8831862412310477, "grad_norm": 0.6562732652908535, "learning_rate": 3.538639563739776e-07, "loss": 0.3106, "step": 19514 }, { "epoch": 0.8832315003394433, "grad_norm": 0.24905978799813444, "learning_rate": 3.535931854835667e-07, "loss": 0.4413, "step": 19515 }, { "epoch": 0.8832767594478389, "grad_norm": 0.6667836600959485, "learning_rate": 3.533225144301683e-07, "loss": 0.314, "step": 19516 }, { "epoch": 0.8833220185562345, "grad_norm": 0.6324897324523692, "learning_rate": 3.530519432195967e-07, "loss": 0.269, "step": 19517 }, { "epoch": 0.88336727766463, "grad_norm": 0.6004718094460242, "learning_rate": 3.5278147185766665e-07, "loss": 0.315, "step": 19518 }, { "epoch": 0.8834125367730256, "grad_norm": 0.580260577387419, "learning_rate": 3.525111003501908e-07, "loss": 0.3061, "step": 19519 }, { "epoch": 0.8834577958814211, "grad_norm": 0.5955680232408859, "learning_rate": 3.522408287029783e-07, "loss": 0.3236, "step": 19520 }, { "epoch": 0.8835030549898167, "grad_norm": 0.5709331388756373, "learning_rate": 3.519706569218345e-07, "loss": 0.2763, "step": 19521 }, { "epoch": 0.8835483140982122, "grad_norm": 0.7962422901003292, "learning_rate": 3.517005850125671e-07, "loss": 0.2955, "step": 19522 }, { "epoch": 0.8835935732066078, "grad_norm": 0.5829977543303796, "learning_rate": 3.5143061298097693e-07, "loss": 0.2651, "step": 19523 }, { "epoch": 0.8836388323150034, "grad_norm": 0.6197722715291717, "learning_rate": 3.5116074083286655e-07, "loss": 0.2744, "step": 19524 }, { "epoch": 0.883684091423399, "grad_norm": 0.7073843878252019, "learning_rate": 3.508909685740336e-07, "loss": 0.2971, "step": 19525 }, { "epoch": 0.8837293505317946, "grad_norm": 0.6411539261445458, "learning_rate": 3.5062129621027565e-07, "loss": 0.3349, "step": 19526 }, { "epoch": 0.8837746096401901, "grad_norm": 0.5867776122798319, "learning_rate": 3.5035172374738636e-07, "loss": 0.3039, "step": 19527 }, { "epoch": 0.8838198687485856, "grad_norm": 0.5909421682088367, "learning_rate": 3.500822511911578e-07, "loss": 0.3061, "step": 19528 }, { "epoch": 0.8838651278569812, "grad_norm": 1.0457195445167602, "learning_rate": 3.4981287854738143e-07, "loss": 0.3391, "step": 19529 }, { "epoch": 0.8839103869653768, "grad_norm": 0.6062906376849054, "learning_rate": 3.495436058218432e-07, "loss": 0.2978, "step": 19530 }, { "epoch": 0.8839556460737723, "grad_norm": 0.6345821755484723, "learning_rate": 3.4927443302033127e-07, "loss": 0.298, "step": 19531 }, { "epoch": 0.8840009051821679, "grad_norm": 0.5693391318164701, "learning_rate": 3.4900536014862763e-07, "loss": 0.2746, "step": 19532 }, { "epoch": 0.8840461642905635, "grad_norm": 0.607301733592083, "learning_rate": 3.487363872125138e-07, "loss": 0.3167, "step": 19533 }, { "epoch": 0.8840914233989591, "grad_norm": 0.6503070900326725, "learning_rate": 3.4846751421777014e-07, "loss": 0.2913, "step": 19534 }, { "epoch": 0.8841366825073546, "grad_norm": 0.6221687774308418, "learning_rate": 3.4819874117017373e-07, "loss": 0.2766, "step": 19535 }, { "epoch": 0.8841819416157501, "grad_norm": 0.5918727382827849, "learning_rate": 3.479300680754999e-07, "loss": 0.3141, "step": 19536 }, { "epoch": 0.8842272007241457, "grad_norm": 0.7346385733269784, "learning_rate": 3.4766149493952015e-07, "loss": 0.3138, "step": 19537 }, { "epoch": 0.8842724598325413, "grad_norm": 0.9691102569844686, "learning_rate": 3.4739302176800603e-07, "loss": 0.2779, "step": 19538 }, { "epoch": 0.8843177189409369, "grad_norm": 0.5998689432100688, "learning_rate": 3.471246485667279e-07, "loss": 0.2885, "step": 19539 }, { "epoch": 0.8843629780493324, "grad_norm": 0.5951426041184009, "learning_rate": 3.468563753414506e-07, "loss": 0.2637, "step": 19540 }, { "epoch": 0.884408237157728, "grad_norm": 0.6478330409952806, "learning_rate": 3.4658820209793773e-07, "loss": 0.2924, "step": 19541 }, { "epoch": 0.8844534962661236, "grad_norm": 0.6745078177873124, "learning_rate": 3.463201288419532e-07, "loss": 0.2855, "step": 19542 }, { "epoch": 0.8844987553745192, "grad_norm": 0.2845659706695868, "learning_rate": 3.460521555792562e-07, "loss": 0.4745, "step": 19543 }, { "epoch": 0.8845440144829146, "grad_norm": 0.588620732096941, "learning_rate": 3.4578428231560547e-07, "loss": 0.3066, "step": 19544 }, { "epoch": 0.8845892735913102, "grad_norm": 0.6574074748283296, "learning_rate": 3.4551650905675584e-07, "loss": 0.3262, "step": 19545 }, { "epoch": 0.8846345326997058, "grad_norm": 0.6045371469529954, "learning_rate": 3.4524883580846045e-07, "loss": 0.3199, "step": 19546 }, { "epoch": 0.8846797918081014, "grad_norm": 0.5269759620850714, "learning_rate": 3.44981262576472e-07, "loss": 0.2819, "step": 19547 }, { "epoch": 0.8847250509164969, "grad_norm": 0.5953117426683678, "learning_rate": 3.4471378936654033e-07, "loss": 0.2828, "step": 19548 }, { "epoch": 0.8847703100248925, "grad_norm": 0.5621445856567112, "learning_rate": 3.444464161844113e-07, "loss": 0.2823, "step": 19549 }, { "epoch": 0.8848155691332881, "grad_norm": 0.5682398961006359, "learning_rate": 3.441791430358299e-07, "loss": 0.2874, "step": 19550 }, { "epoch": 0.8848608282416837, "grad_norm": 0.5835295496557882, "learning_rate": 3.4391196992653976e-07, "loss": 0.253, "step": 19551 }, { "epoch": 0.8849060873500793, "grad_norm": 0.6077999682841784, "learning_rate": 3.4364489686228076e-07, "loss": 0.2733, "step": 19552 }, { "epoch": 0.8849513464584747, "grad_norm": 0.6696188768672875, "learning_rate": 3.4337792384879274e-07, "loss": 0.2468, "step": 19553 }, { "epoch": 0.8849966055668703, "grad_norm": 0.2749716275869113, "learning_rate": 3.431110508918112e-07, "loss": 0.4613, "step": 19554 }, { "epoch": 0.8850418646752659, "grad_norm": 0.312283411686006, "learning_rate": 3.428442779970709e-07, "loss": 0.4337, "step": 19555 }, { "epoch": 0.8850871237836615, "grad_norm": 0.2585317322224034, "learning_rate": 3.425776051703028e-07, "loss": 0.4855, "step": 19556 }, { "epoch": 0.885132382892057, "grad_norm": 0.5817616751386044, "learning_rate": 3.4231103241723904e-07, "loss": 0.2766, "step": 19557 }, { "epoch": 0.8851776420004526, "grad_norm": 0.657507339106589, "learning_rate": 3.420445597436056e-07, "loss": 0.3129, "step": 19558 }, { "epoch": 0.8852229011088482, "grad_norm": 0.25678973673554567, "learning_rate": 3.4177818715512844e-07, "loss": 0.4524, "step": 19559 }, { "epoch": 0.8852681602172437, "grad_norm": 0.6084533850542106, "learning_rate": 3.415119146575313e-07, "loss": 0.2883, "step": 19560 }, { "epoch": 0.8853134193256393, "grad_norm": 0.6689930661361905, "learning_rate": 3.412457422565368e-07, "loss": 0.3022, "step": 19561 }, { "epoch": 0.8853586784340348, "grad_norm": 0.6389051792047346, "learning_rate": 3.409796699578621e-07, "loss": 0.3097, "step": 19562 }, { "epoch": 0.8854039375424304, "grad_norm": 0.6029784116061799, "learning_rate": 3.4071369776722487e-07, "loss": 0.2956, "step": 19563 }, { "epoch": 0.885449196650826, "grad_norm": 0.2873501741847482, "learning_rate": 3.4044782569034096e-07, "loss": 0.4892, "step": 19564 }, { "epoch": 0.8854944557592216, "grad_norm": 0.6136217708277859, "learning_rate": 3.401820537329231e-07, "loss": 0.2888, "step": 19565 }, { "epoch": 0.8855397148676171, "grad_norm": 0.6195423898571132, "learning_rate": 3.399163819006801e-07, "loss": 0.2727, "step": 19566 }, { "epoch": 0.8855849739760127, "grad_norm": 0.5806307328938312, "learning_rate": 3.3965081019932176e-07, "loss": 0.2648, "step": 19567 }, { "epoch": 0.8856302330844082, "grad_norm": 1.002216804194398, "learning_rate": 3.3938533863455526e-07, "loss": 0.321, "step": 19568 }, { "epoch": 0.8856754921928038, "grad_norm": 0.2583117712856083, "learning_rate": 3.3911996721208373e-07, "loss": 0.4512, "step": 19569 }, { "epoch": 0.8857207513011993, "grad_norm": 0.6515063155660002, "learning_rate": 3.388546959376088e-07, "loss": 0.2698, "step": 19570 }, { "epoch": 0.8857660104095949, "grad_norm": 0.6489297620437484, "learning_rate": 3.385895248168314e-07, "loss": 0.2663, "step": 19571 }, { "epoch": 0.8858112695179905, "grad_norm": 0.6414007301380998, "learning_rate": 3.383244538554481e-07, "loss": 0.312, "step": 19572 }, { "epoch": 0.8858565286263861, "grad_norm": 0.6456000031136664, "learning_rate": 3.380594830591555e-07, "loss": 0.311, "step": 19573 }, { "epoch": 0.8859017877347817, "grad_norm": 0.6274904157405076, "learning_rate": 3.3779461243364673e-07, "loss": 0.3125, "step": 19574 }, { "epoch": 0.8859470468431772, "grad_norm": 0.6555214158150692, "learning_rate": 3.3752984198461236e-07, "loss": 0.3102, "step": 19575 }, { "epoch": 0.8859923059515727, "grad_norm": 0.6002976364382615, "learning_rate": 3.3726517171774163e-07, "loss": 0.251, "step": 19576 }, { "epoch": 0.8860375650599683, "grad_norm": 0.6060495541921025, "learning_rate": 3.3700060163872285e-07, "loss": 0.2729, "step": 19577 }, { "epoch": 0.8860828241683639, "grad_norm": 0.5862835298549485, "learning_rate": 3.367361317532397e-07, "loss": 0.2833, "step": 19578 }, { "epoch": 0.8861280832767594, "grad_norm": 0.26710656491030127, "learning_rate": 3.3647176206697387e-07, "loss": 0.4968, "step": 19579 }, { "epoch": 0.886173342385155, "grad_norm": 0.2666211536533626, "learning_rate": 3.362074925856079e-07, "loss": 0.431, "step": 19580 }, { "epoch": 0.8862186014935506, "grad_norm": 0.5695538360488616, "learning_rate": 3.359433233148185e-07, "loss": 0.2709, "step": 19581 }, { "epoch": 0.8862638606019462, "grad_norm": 0.59719229785415, "learning_rate": 3.356792542602838e-07, "loss": 0.2706, "step": 19582 }, { "epoch": 0.8863091197103417, "grad_norm": 0.5836204564546559, "learning_rate": 3.354152854276749e-07, "loss": 0.2906, "step": 19583 }, { "epoch": 0.8863543788187372, "grad_norm": 0.26799320129721166, "learning_rate": 3.351514168226666e-07, "loss": 0.4762, "step": 19584 }, { "epoch": 0.8863996379271328, "grad_norm": 0.26377778286885706, "learning_rate": 3.348876484509267e-07, "loss": 0.4441, "step": 19585 }, { "epoch": 0.8864448970355284, "grad_norm": 0.2715906803668781, "learning_rate": 3.346239803181239e-07, "loss": 0.4714, "step": 19586 }, { "epoch": 0.886490156143924, "grad_norm": 0.6451659642294735, "learning_rate": 3.343604124299232e-07, "loss": 0.309, "step": 19587 }, { "epoch": 0.8865354152523195, "grad_norm": 0.6017547316225268, "learning_rate": 3.340969447919873e-07, "loss": 0.2856, "step": 19588 }, { "epoch": 0.8865806743607151, "grad_norm": 0.6555996985935236, "learning_rate": 3.338335774099777e-07, "loss": 0.3516, "step": 19589 }, { "epoch": 0.8866259334691107, "grad_norm": 0.6593669661485506, "learning_rate": 3.335703102895549e-07, "loss": 0.3465, "step": 19590 }, { "epoch": 0.8866711925775063, "grad_norm": 0.26153963630435606, "learning_rate": 3.333071434363727e-07, "loss": 0.4901, "step": 19591 }, { "epoch": 0.8867164516859017, "grad_norm": 0.5857658939032128, "learning_rate": 3.3304407685608777e-07, "loss": 0.2507, "step": 19592 }, { "epoch": 0.8867617107942973, "grad_norm": 0.26285765382768767, "learning_rate": 3.3278111055435214e-07, "loss": 0.4731, "step": 19593 }, { "epoch": 0.8868069699026929, "grad_norm": 0.590814928105251, "learning_rate": 3.325182445368169e-07, "loss": 0.3197, "step": 19594 }, { "epoch": 0.8868522290110885, "grad_norm": 0.6646910180079847, "learning_rate": 3.322554788091287e-07, "loss": 0.3039, "step": 19595 }, { "epoch": 0.8868974881194841, "grad_norm": 0.6051269524511917, "learning_rate": 3.31992813376934e-07, "loss": 0.2911, "step": 19596 }, { "epoch": 0.8869427472278796, "grad_norm": 0.6154458023837881, "learning_rate": 3.3173024824587786e-07, "loss": 0.2668, "step": 19597 }, { "epoch": 0.8869880063362752, "grad_norm": 0.7649855997860965, "learning_rate": 3.314677834216012e-07, "loss": 0.2939, "step": 19598 }, { "epoch": 0.8870332654446708, "grad_norm": 0.5910296190533375, "learning_rate": 3.31205418909743e-07, "loss": 0.3067, "step": 19599 }, { "epoch": 0.8870785245530663, "grad_norm": 0.6687276660419679, "learning_rate": 3.30943154715942e-07, "loss": 0.384, "step": 19600 }, { "epoch": 0.8871237836614618, "grad_norm": 0.26577943660214226, "learning_rate": 3.3068099084583195e-07, "loss": 0.4374, "step": 19601 }, { "epoch": 0.8871690427698574, "grad_norm": 0.5708204411176053, "learning_rate": 3.304189273050473e-07, "loss": 0.2687, "step": 19602 }, { "epoch": 0.887214301878253, "grad_norm": 0.28507712724926143, "learning_rate": 3.301569640992186e-07, "loss": 0.4675, "step": 19603 }, { "epoch": 0.8872595609866486, "grad_norm": 0.6315800803320455, "learning_rate": 3.298951012339735e-07, "loss": 0.2979, "step": 19604 }, { "epoch": 0.8873048200950441, "grad_norm": 0.6476269168568197, "learning_rate": 3.2963333871493917e-07, "loss": 0.2999, "step": 19605 }, { "epoch": 0.8873500792034397, "grad_norm": 0.5974437769779533, "learning_rate": 3.293716765477417e-07, "loss": 0.2903, "step": 19606 }, { "epoch": 0.8873953383118353, "grad_norm": 0.6080218454670456, "learning_rate": 3.2911011473800213e-07, "loss": 0.2874, "step": 19607 }, { "epoch": 0.8874405974202308, "grad_norm": 0.24843599926496016, "learning_rate": 3.2884865329133986e-07, "loss": 0.4416, "step": 19608 }, { "epoch": 0.8874858565286264, "grad_norm": 0.6070739732249638, "learning_rate": 3.285872922133737e-07, "loss": 0.264, "step": 19609 }, { "epoch": 0.8875311156370219, "grad_norm": 0.61134458557448, "learning_rate": 3.2832603150971974e-07, "loss": 0.2928, "step": 19610 }, { "epoch": 0.8875763747454175, "grad_norm": 0.590230363263004, "learning_rate": 3.2806487118599237e-07, "loss": 0.2799, "step": 19611 }, { "epoch": 0.8876216338538131, "grad_norm": 0.5998546487827437, "learning_rate": 3.2780381124780046e-07, "loss": 0.306, "step": 19612 }, { "epoch": 0.8876668929622087, "grad_norm": 0.6350566283688125, "learning_rate": 3.275428517007562e-07, "loss": 0.2695, "step": 19613 }, { "epoch": 0.8877121520706042, "grad_norm": 0.607949377490081, "learning_rate": 3.27281992550465e-07, "loss": 0.3069, "step": 19614 }, { "epoch": 0.8877574111789998, "grad_norm": 0.6484075872349462, "learning_rate": 3.270212338025336e-07, "loss": 0.2813, "step": 19615 }, { "epoch": 0.8878026702873953, "grad_norm": 0.6570954002846704, "learning_rate": 3.2676057546256354e-07, "loss": 0.2596, "step": 19616 }, { "epoch": 0.8878479293957909, "grad_norm": 0.6249630982237082, "learning_rate": 3.2650001753615547e-07, "loss": 0.2986, "step": 19617 }, { "epoch": 0.8878931885041864, "grad_norm": 0.6217870094199994, "learning_rate": 3.262395600289087e-07, "loss": 0.3208, "step": 19618 }, { "epoch": 0.887938447612582, "grad_norm": 0.5814716308866661, "learning_rate": 3.259792029464204e-07, "loss": 0.293, "step": 19619 }, { "epoch": 0.8879837067209776, "grad_norm": 0.6060563502689635, "learning_rate": 3.2571894629428224e-07, "loss": 0.3349, "step": 19620 }, { "epoch": 0.8880289658293732, "grad_norm": 0.6316516663442189, "learning_rate": 3.2545879007808866e-07, "loss": 0.2974, "step": 19621 }, { "epoch": 0.8880742249377688, "grad_norm": 0.5989664177057166, "learning_rate": 3.2519873430342905e-07, "loss": 0.2803, "step": 19622 }, { "epoch": 0.8881194840461643, "grad_norm": 0.650238534391762, "learning_rate": 3.2493877897589123e-07, "loss": 0.2616, "step": 19623 }, { "epoch": 0.8881647431545598, "grad_norm": 0.5862701585975826, "learning_rate": 3.2467892410106006e-07, "loss": 0.3034, "step": 19624 }, { "epoch": 0.8882100022629554, "grad_norm": 0.2878441759964836, "learning_rate": 3.2441916968452003e-07, "loss": 0.4626, "step": 19625 }, { "epoch": 0.888255261371351, "grad_norm": 0.7429412990238157, "learning_rate": 3.2415951573185224e-07, "loss": 0.2857, "step": 19626 }, { "epoch": 0.8883005204797465, "grad_norm": 0.5785353982968996, "learning_rate": 3.2389996224863604e-07, "loss": 0.271, "step": 19627 }, { "epoch": 0.8883457795881421, "grad_norm": 0.282444870309688, "learning_rate": 3.236405092404471e-07, "loss": 0.4653, "step": 19628 }, { "epoch": 0.8883910386965377, "grad_norm": 0.5859165748543158, "learning_rate": 3.2338115671286254e-07, "loss": 0.305, "step": 19629 }, { "epoch": 0.8884362978049333, "grad_norm": 0.5722429759030558, "learning_rate": 3.231219046714523e-07, "loss": 0.2632, "step": 19630 }, { "epoch": 0.8884815569133289, "grad_norm": 0.2766515224836743, "learning_rate": 3.2286275312178984e-07, "loss": 0.464, "step": 19631 }, { "epoch": 0.8885268160217243, "grad_norm": 0.6600272957732479, "learning_rate": 3.226037020694417e-07, "loss": 0.2758, "step": 19632 }, { "epoch": 0.8885720751301199, "grad_norm": 0.6126178906891568, "learning_rate": 3.2234475151997345e-07, "loss": 0.2981, "step": 19633 }, { "epoch": 0.8886173342385155, "grad_norm": 0.4553668902972284, "learning_rate": 3.220859014789507e-07, "loss": 0.4847, "step": 19634 }, { "epoch": 0.8886625933469111, "grad_norm": 0.6261440561231867, "learning_rate": 3.21827151951935e-07, "loss": 0.2758, "step": 19635 }, { "epoch": 0.8887078524553066, "grad_norm": 0.6035397288881246, "learning_rate": 3.215685029444865e-07, "loss": 0.2576, "step": 19636 }, { "epoch": 0.8887531115637022, "grad_norm": 0.6880253117826668, "learning_rate": 3.213099544621612e-07, "loss": 0.2752, "step": 19637 }, { "epoch": 0.8887983706720978, "grad_norm": 0.5554182104281393, "learning_rate": 3.210515065105152e-07, "loss": 0.2761, "step": 19638 }, { "epoch": 0.8888436297804934, "grad_norm": 0.7699347195496511, "learning_rate": 3.20793159095103e-07, "loss": 0.2992, "step": 19639 }, { "epoch": 0.8888888888888888, "grad_norm": 0.674613342892287, "learning_rate": 3.2053491222147514e-07, "loss": 0.2525, "step": 19640 }, { "epoch": 0.8889341479972844, "grad_norm": 0.6265349024511476, "learning_rate": 3.2027676589517885e-07, "loss": 0.3129, "step": 19641 }, { "epoch": 0.88897940710568, "grad_norm": 0.6407998943248913, "learning_rate": 3.2001872012176304e-07, "loss": 0.3456, "step": 19642 }, { "epoch": 0.8890246662140756, "grad_norm": 0.6249769827265742, "learning_rate": 3.1976077490677106e-07, "loss": 0.2703, "step": 19643 }, { "epoch": 0.8890699253224712, "grad_norm": 0.2689051847218968, "learning_rate": 3.195029302557462e-07, "loss": 0.4517, "step": 19644 }, { "epoch": 0.8891151844308667, "grad_norm": 0.8638207476845396, "learning_rate": 3.1924518617422796e-07, "loss": 0.2631, "step": 19645 }, { "epoch": 0.8891604435392623, "grad_norm": 0.5727394461563624, "learning_rate": 3.1898754266775467e-07, "loss": 0.2959, "step": 19646 }, { "epoch": 0.8892057026476579, "grad_norm": 0.6598354797321724, "learning_rate": 3.1872999974186194e-07, "loss": 0.2718, "step": 19647 }, { "epoch": 0.8892509617560534, "grad_norm": 0.5766600113148568, "learning_rate": 3.1847255740208636e-07, "loss": 0.2871, "step": 19648 }, { "epoch": 0.8892962208644489, "grad_norm": 0.2839105455499461, "learning_rate": 3.182152156539553e-07, "loss": 0.4891, "step": 19649 }, { "epoch": 0.8893414799728445, "grad_norm": 0.6785505816778864, "learning_rate": 3.179579745029998e-07, "loss": 0.2825, "step": 19650 }, { "epoch": 0.8893867390812401, "grad_norm": 0.5529712585838098, "learning_rate": 3.1770083395474827e-07, "loss": 0.2801, "step": 19651 }, { "epoch": 0.8894319981896357, "grad_norm": 0.6154279850046951, "learning_rate": 3.174437940147268e-07, "loss": 0.3026, "step": 19652 }, { "epoch": 0.8894772572980312, "grad_norm": 0.6320580096458656, "learning_rate": 3.171868546884549e-07, "loss": 0.2991, "step": 19653 }, { "epoch": 0.8895225164064268, "grad_norm": 0.5896944818087667, "learning_rate": 3.169300159814559e-07, "loss": 0.2572, "step": 19654 }, { "epoch": 0.8895677755148224, "grad_norm": 0.6314943278887438, "learning_rate": 3.1667327789924815e-07, "loss": 0.2978, "step": 19655 }, { "epoch": 0.889613034623218, "grad_norm": 0.6240822715544811, "learning_rate": 3.1641664044734786e-07, "loss": 0.3456, "step": 19656 }, { "epoch": 0.8896582937316135, "grad_norm": 0.5924244399609876, "learning_rate": 3.1616010363126893e-07, "loss": 0.3238, "step": 19657 }, { "epoch": 0.889703552840009, "grad_norm": 0.6401946065654505, "learning_rate": 3.159036674565247e-07, "loss": 0.2926, "step": 19658 }, { "epoch": 0.8897488119484046, "grad_norm": 0.6277174806465244, "learning_rate": 3.156473319286241e-07, "loss": 0.2609, "step": 19659 }, { "epoch": 0.8897940710568002, "grad_norm": 0.6024028873920564, "learning_rate": 3.15391097053076e-07, "loss": 0.2848, "step": 19660 }, { "epoch": 0.8898393301651958, "grad_norm": 0.6713968922655957, "learning_rate": 3.151349628353856e-07, "loss": 0.2723, "step": 19661 }, { "epoch": 0.8898845892735913, "grad_norm": 0.28489424233366306, "learning_rate": 3.1487892928105554e-07, "loss": 0.4566, "step": 19662 }, { "epoch": 0.8899298483819869, "grad_norm": 0.2687351928135246, "learning_rate": 3.146229963955877e-07, "loss": 0.4745, "step": 19663 }, { "epoch": 0.8899751074903824, "grad_norm": 0.2776367386077948, "learning_rate": 3.143671641844831e-07, "loss": 0.464, "step": 19664 }, { "epoch": 0.890020366598778, "grad_norm": 0.7031881152911872, "learning_rate": 3.1411143265323684e-07, "loss": 0.3152, "step": 19665 }, { "epoch": 0.8900656257071735, "grad_norm": 0.7058053280755887, "learning_rate": 3.138558018073434e-07, "loss": 0.3258, "step": 19666 }, { "epoch": 0.8901108848155691, "grad_norm": 0.2524281733289989, "learning_rate": 3.1360027165229677e-07, "loss": 0.4666, "step": 19667 }, { "epoch": 0.8901561439239647, "grad_norm": 0.5566851001002566, "learning_rate": 3.1334484219358754e-07, "loss": 0.3123, "step": 19668 }, { "epoch": 0.8902014030323603, "grad_norm": 0.6187301932164185, "learning_rate": 3.13089513436704e-07, "loss": 0.3133, "step": 19669 }, { "epoch": 0.8902466621407559, "grad_norm": 0.6546110361034537, "learning_rate": 3.1283428538713134e-07, "loss": 0.2975, "step": 19670 }, { "epoch": 0.8902919212491514, "grad_norm": 0.5680701004053491, "learning_rate": 3.125791580503551e-07, "loss": 0.2893, "step": 19671 }, { "epoch": 0.8903371803575469, "grad_norm": 0.6341125142971917, "learning_rate": 3.1232413143185534e-07, "loss": 0.285, "step": 19672 }, { "epoch": 0.8903824394659425, "grad_norm": 0.6664029205667784, "learning_rate": 3.1206920553711385e-07, "loss": 0.2534, "step": 19673 }, { "epoch": 0.8904276985743381, "grad_norm": 0.660749332252751, "learning_rate": 3.1181438037160727e-07, "loss": 0.3067, "step": 19674 }, { "epoch": 0.8904729576827336, "grad_norm": 0.2588901641031759, "learning_rate": 3.1155965594081017e-07, "loss": 0.4597, "step": 19675 }, { "epoch": 0.8905182167911292, "grad_norm": 0.6116703932544021, "learning_rate": 3.1130503225019705e-07, "loss": 0.3262, "step": 19676 }, { "epoch": 0.8905634758995248, "grad_norm": 0.28303243772908865, "learning_rate": 3.110505093052396e-07, "loss": 0.4598, "step": 19677 }, { "epoch": 0.8906087350079204, "grad_norm": 0.624993176785746, "learning_rate": 3.107960871114041e-07, "loss": 0.2759, "step": 19678 }, { "epoch": 0.890653994116316, "grad_norm": 0.2662360133368587, "learning_rate": 3.1054176567415937e-07, "loss": 0.4466, "step": 19679 }, { "epoch": 0.8906992532247114, "grad_norm": 0.6189525480592604, "learning_rate": 3.1028754499896895e-07, "loss": 0.2865, "step": 19680 }, { "epoch": 0.890744512333107, "grad_norm": 0.6925785423664801, "learning_rate": 3.1003342509129783e-07, "loss": 0.287, "step": 19681 }, { "epoch": 0.8907897714415026, "grad_norm": 0.5897566594709222, "learning_rate": 3.097794059566023e-07, "loss": 0.2926, "step": 19682 }, { "epoch": 0.8908350305498982, "grad_norm": 0.5965249195848944, "learning_rate": 3.0952548760034284e-07, "loss": 0.2999, "step": 19683 }, { "epoch": 0.8908802896582937, "grad_norm": 0.2501494233144302, "learning_rate": 3.0927167002797574e-07, "loss": 0.443, "step": 19684 }, { "epoch": 0.8909255487666893, "grad_norm": 0.6550215832039792, "learning_rate": 3.0901795324495334e-07, "loss": 0.2894, "step": 19685 }, { "epoch": 0.8909708078750849, "grad_norm": 0.6083383919033705, "learning_rate": 3.0876433725672783e-07, "loss": 0.2451, "step": 19686 }, { "epoch": 0.8910160669834805, "grad_norm": 0.5901909902580682, "learning_rate": 3.085108220687494e-07, "loss": 0.2653, "step": 19687 }, { "epoch": 0.8910613260918759, "grad_norm": 0.6244542350942888, "learning_rate": 3.082574076864636e-07, "loss": 0.3328, "step": 19688 }, { "epoch": 0.8911065852002715, "grad_norm": 0.6182059717072552, "learning_rate": 3.0800409411531727e-07, "loss": 0.2903, "step": 19689 }, { "epoch": 0.8911518443086671, "grad_norm": 0.27308560925210523, "learning_rate": 3.077508813607527e-07, "loss": 0.4782, "step": 19690 }, { "epoch": 0.8911971034170627, "grad_norm": 0.6191578915878152, "learning_rate": 3.0749776942820943e-07, "loss": 0.3073, "step": 19691 }, { "epoch": 0.8912423625254583, "grad_norm": 0.6610736152984831, "learning_rate": 3.072447583231275e-07, "loss": 0.224, "step": 19692 }, { "epoch": 0.8912876216338538, "grad_norm": 0.2789734855593772, "learning_rate": 3.0699184805094374e-07, "loss": 0.4748, "step": 19693 }, { "epoch": 0.8913328807422494, "grad_norm": 0.7874295132580852, "learning_rate": 3.067390386170915e-07, "loss": 0.2646, "step": 19694 }, { "epoch": 0.891378139850645, "grad_norm": 0.2728170143891171, "learning_rate": 3.064863300270027e-07, "loss": 0.4445, "step": 19695 }, { "epoch": 0.8914233989590405, "grad_norm": 0.7961230497887644, "learning_rate": 3.0623372228610725e-07, "loss": 0.3046, "step": 19696 }, { "epoch": 0.891468658067436, "grad_norm": 1.4269209581457398, "learning_rate": 3.059812153998343e-07, "loss": 0.3039, "step": 19697 }, { "epoch": 0.8915139171758316, "grad_norm": 0.7972617116547703, "learning_rate": 3.057288093736083e-07, "loss": 0.2733, "step": 19698 }, { "epoch": 0.8915591762842272, "grad_norm": 0.650142097827148, "learning_rate": 3.0547650421285216e-07, "loss": 0.2966, "step": 19699 }, { "epoch": 0.8916044353926228, "grad_norm": 1.0215184862868254, "learning_rate": 3.0522429992298873e-07, "loss": 0.3322, "step": 19700 }, { "epoch": 0.8916496945010183, "grad_norm": 0.5915881881019676, "learning_rate": 3.0497219650943545e-07, "loss": 0.2878, "step": 19701 }, { "epoch": 0.8916949536094139, "grad_norm": 0.6089222195563149, "learning_rate": 3.0472019397761065e-07, "loss": 0.2904, "step": 19702 }, { "epoch": 0.8917402127178095, "grad_norm": 0.6306950949640981, "learning_rate": 3.044682923329284e-07, "loss": 0.2931, "step": 19703 }, { "epoch": 0.891785471826205, "grad_norm": 0.6377067107024558, "learning_rate": 3.0421649158080047e-07, "loss": 0.29, "step": 19704 }, { "epoch": 0.8918307309346006, "grad_norm": 0.6472275255096006, "learning_rate": 3.0396479172663806e-07, "loss": 0.2944, "step": 19705 }, { "epoch": 0.8918759900429961, "grad_norm": 0.6015998556020318, "learning_rate": 3.037131927758502e-07, "loss": 0.2888, "step": 19706 }, { "epoch": 0.8919212491513917, "grad_norm": 0.6050816306761447, "learning_rate": 3.0346169473384255e-07, "loss": 0.2742, "step": 19707 }, { "epoch": 0.8919665082597873, "grad_norm": 0.5798568141658198, "learning_rate": 3.032102976060181e-07, "loss": 0.2751, "step": 19708 }, { "epoch": 0.8920117673681829, "grad_norm": 0.7827332971119896, "learning_rate": 3.02959001397779e-07, "loss": 0.2635, "step": 19709 }, { "epoch": 0.8920570264765784, "grad_norm": 0.6218102387465159, "learning_rate": 3.027078061145261e-07, "loss": 0.3007, "step": 19710 }, { "epoch": 0.892102285584974, "grad_norm": 0.5946378433026083, "learning_rate": 3.024567117616556e-07, "loss": 0.2722, "step": 19711 }, { "epoch": 0.8921475446933695, "grad_norm": 0.6229968934552937, "learning_rate": 3.0220571834456256e-07, "loss": 0.2859, "step": 19712 }, { "epoch": 0.8921928038017651, "grad_norm": 0.551512068307753, "learning_rate": 3.0195482586864055e-07, "loss": 0.2961, "step": 19713 }, { "epoch": 0.8922380629101607, "grad_norm": 0.6761291147721131, "learning_rate": 3.0170403433928077e-07, "loss": 0.2716, "step": 19714 }, { "epoch": 0.8922833220185562, "grad_norm": 0.5916261620907665, "learning_rate": 3.014533437618711e-07, "loss": 0.2877, "step": 19715 }, { "epoch": 0.8923285811269518, "grad_norm": 0.6567528853482709, "learning_rate": 3.012027541417989e-07, "loss": 0.3192, "step": 19716 }, { "epoch": 0.8923738402353474, "grad_norm": 0.6371454442751823, "learning_rate": 3.0095226548444765e-07, "loss": 0.3236, "step": 19717 }, { "epoch": 0.892419099343743, "grad_norm": 0.5819460949351073, "learning_rate": 3.007018777952009e-07, "loss": 0.3087, "step": 19718 }, { "epoch": 0.8924643584521385, "grad_norm": 0.6690205421170103, "learning_rate": 3.004515910794381e-07, "loss": 0.2836, "step": 19719 }, { "epoch": 0.892509617560534, "grad_norm": 0.24512552320011244, "learning_rate": 3.0020140534253617e-07, "loss": 0.4502, "step": 19720 }, { "epoch": 0.8925548766689296, "grad_norm": 0.2524877763429363, "learning_rate": 2.9995132058987185e-07, "loss": 0.4571, "step": 19721 }, { "epoch": 0.8926001357773252, "grad_norm": 0.6603633468410276, "learning_rate": 2.9970133682681924e-07, "loss": 0.2898, "step": 19722 }, { "epoch": 0.8926453948857207, "grad_norm": 0.623220132327247, "learning_rate": 2.9945145405874955e-07, "loss": 0.2916, "step": 19723 }, { "epoch": 0.8926906539941163, "grad_norm": 0.694842873664057, "learning_rate": 2.9920167229103015e-07, "loss": 0.284, "step": 19724 }, { "epoch": 0.8927359131025119, "grad_norm": 0.6110616350131243, "learning_rate": 2.9895199152902955e-07, "loss": 0.2988, "step": 19725 }, { "epoch": 0.8927811722109075, "grad_norm": 0.6380149501154079, "learning_rate": 2.987024117781129e-07, "loss": 0.2796, "step": 19726 }, { "epoch": 0.8928264313193031, "grad_norm": 0.5801201468884075, "learning_rate": 2.984529330436431e-07, "loss": 0.2867, "step": 19727 }, { "epoch": 0.8928716904276985, "grad_norm": 0.5946765214793907, "learning_rate": 2.9820355533097864e-07, "loss": 0.2699, "step": 19728 }, { "epoch": 0.8929169495360941, "grad_norm": 0.2925541355352873, "learning_rate": 2.9795427864548034e-07, "loss": 0.4857, "step": 19729 }, { "epoch": 0.8929622086444897, "grad_norm": 0.6101996258652933, "learning_rate": 2.9770510299250265e-07, "loss": 0.2928, "step": 19730 }, { "epoch": 0.8930074677528853, "grad_norm": 0.9588491138498303, "learning_rate": 2.974560283774014e-07, "loss": 0.3178, "step": 19731 }, { "epoch": 0.8930527268612808, "grad_norm": 0.650951067812214, "learning_rate": 2.972070548055267e-07, "loss": 0.3092, "step": 19732 }, { "epoch": 0.8930979859696764, "grad_norm": 0.6326366483138217, "learning_rate": 2.9695818228222873e-07, "loss": 0.2679, "step": 19733 }, { "epoch": 0.893143245078072, "grad_norm": 0.6164318764744039, "learning_rate": 2.967094108128549e-07, "loss": 0.3045, "step": 19734 }, { "epoch": 0.8931885041864676, "grad_norm": 0.563153323637875, "learning_rate": 2.964607404027514e-07, "loss": 0.2565, "step": 19735 }, { "epoch": 0.893233763294863, "grad_norm": 0.607980885936067, "learning_rate": 2.9621217105726077e-07, "loss": 0.3056, "step": 19736 }, { "epoch": 0.8932790224032586, "grad_norm": 0.708213998184299, "learning_rate": 2.9596370278172305e-07, "loss": 0.2901, "step": 19737 }, { "epoch": 0.8933242815116542, "grad_norm": 0.6246550411649562, "learning_rate": 2.9571533558147845e-07, "loss": 0.2731, "step": 19738 }, { "epoch": 0.8933695406200498, "grad_norm": 0.6935539968631732, "learning_rate": 2.9546706946186387e-07, "loss": 0.3315, "step": 19739 }, { "epoch": 0.8934147997284454, "grad_norm": 0.61341847188213, "learning_rate": 2.9521890442821276e-07, "loss": 0.2844, "step": 19740 }, { "epoch": 0.8934600588368409, "grad_norm": 0.6046356361903988, "learning_rate": 2.9497084048585755e-07, "loss": 0.2901, "step": 19741 }, { "epoch": 0.8935053179452365, "grad_norm": 0.6001415015783245, "learning_rate": 2.94722877640129e-07, "loss": 0.2884, "step": 19742 }, { "epoch": 0.8935505770536321, "grad_norm": 0.6380842124636362, "learning_rate": 2.9447501589635387e-07, "loss": 0.247, "step": 19743 }, { "epoch": 0.8935958361620276, "grad_norm": 0.5885874973556446, "learning_rate": 2.942272552598596e-07, "loss": 0.3277, "step": 19744 }, { "epoch": 0.8936410952704231, "grad_norm": 0.6059575773494633, "learning_rate": 2.9397959573596867e-07, "loss": 0.2867, "step": 19745 }, { "epoch": 0.8936863543788187, "grad_norm": 0.25104711480411934, "learning_rate": 2.9373203733000234e-07, "loss": 0.4673, "step": 19746 }, { "epoch": 0.8937316134872143, "grad_norm": 0.2764519128206775, "learning_rate": 2.9348458004728074e-07, "loss": 0.4565, "step": 19747 }, { "epoch": 0.8937768725956099, "grad_norm": 0.25004411755184186, "learning_rate": 2.9323722389312084e-07, "loss": 0.448, "step": 19748 }, { "epoch": 0.8938221317040055, "grad_norm": 0.2816876893548513, "learning_rate": 2.929899688728366e-07, "loss": 0.4502, "step": 19749 }, { "epoch": 0.893867390812401, "grad_norm": 0.6316241558565415, "learning_rate": 2.927428149917416e-07, "loss": 0.2989, "step": 19750 }, { "epoch": 0.8939126499207966, "grad_norm": 0.2710113008541533, "learning_rate": 2.9249576225514664e-07, "loss": 0.4817, "step": 19751 }, { "epoch": 0.8939579090291921, "grad_norm": 0.633843670051217, "learning_rate": 2.922488106683596e-07, "loss": 0.3181, "step": 19752 }, { "epoch": 0.8940031681375877, "grad_norm": 0.6530386741855132, "learning_rate": 2.9200196023668693e-07, "loss": 0.3278, "step": 19753 }, { "epoch": 0.8940484272459832, "grad_norm": 0.6829899675953176, "learning_rate": 2.91755210965432e-07, "loss": 0.3048, "step": 19754 }, { "epoch": 0.8940936863543788, "grad_norm": 0.5950240474932199, "learning_rate": 2.915085628598979e-07, "loss": 0.291, "step": 19755 }, { "epoch": 0.8941389454627744, "grad_norm": 0.27733112695232565, "learning_rate": 2.9126201592538427e-07, "loss": 0.4681, "step": 19756 }, { "epoch": 0.89418420457117, "grad_norm": 0.6544517655613107, "learning_rate": 2.910155701671868e-07, "loss": 0.2653, "step": 19757 }, { "epoch": 0.8942294636795655, "grad_norm": 0.2657406569618254, "learning_rate": 2.907692255906036e-07, "loss": 0.4757, "step": 19758 }, { "epoch": 0.894274722787961, "grad_norm": 0.6613698668778631, "learning_rate": 2.905229822009253e-07, "loss": 0.3149, "step": 19759 }, { "epoch": 0.8943199818963566, "grad_norm": 0.5564095968570708, "learning_rate": 2.9027684000344446e-07, "loss": 0.2823, "step": 19760 }, { "epoch": 0.8943652410047522, "grad_norm": 0.2668504868753882, "learning_rate": 2.900307990034501e-07, "loss": 0.4609, "step": 19761 }, { "epoch": 0.8944105001131478, "grad_norm": 0.6364155599319349, "learning_rate": 2.8978485920622747e-07, "loss": 0.3106, "step": 19762 }, { "epoch": 0.8944557592215433, "grad_norm": 0.550987796887358, "learning_rate": 2.8953902061706173e-07, "loss": 0.2661, "step": 19763 }, { "epoch": 0.8945010183299389, "grad_norm": 0.25702933360286156, "learning_rate": 2.8929328324123595e-07, "loss": 0.4433, "step": 19764 }, { "epoch": 0.8945462774383345, "grad_norm": 0.5719382824193083, "learning_rate": 2.890476470840303e-07, "loss": 0.3219, "step": 19765 }, { "epoch": 0.8945915365467301, "grad_norm": 0.6305795164274872, "learning_rate": 2.8880211215072065e-07, "loss": 0.279, "step": 19766 }, { "epoch": 0.8946367956551255, "grad_norm": 0.6123742014140814, "learning_rate": 2.8855667844658484e-07, "loss": 0.2827, "step": 19767 }, { "epoch": 0.8946820547635211, "grad_norm": 0.6371501831330458, "learning_rate": 2.8831134597689604e-07, "loss": 0.2471, "step": 19768 }, { "epoch": 0.8947273138719167, "grad_norm": 0.6011369890792166, "learning_rate": 2.8806611474692604e-07, "loss": 0.3209, "step": 19769 }, { "epoch": 0.8947725729803123, "grad_norm": 0.6019283022002772, "learning_rate": 2.878209847619429e-07, "loss": 0.2731, "step": 19770 }, { "epoch": 0.8948178320887078, "grad_norm": 0.6117442681689266, "learning_rate": 2.875759560272151e-07, "loss": 0.2717, "step": 19771 }, { "epoch": 0.8948630911971034, "grad_norm": 0.2607889799884667, "learning_rate": 2.873310285480063e-07, "loss": 0.4742, "step": 19772 }, { "epoch": 0.894908350305499, "grad_norm": 0.6182961608864804, "learning_rate": 2.8708620232958004e-07, "loss": 0.3086, "step": 19773 }, { "epoch": 0.8949536094138946, "grad_norm": 0.579715758551968, "learning_rate": 2.868414773771971e-07, "loss": 0.294, "step": 19774 }, { "epoch": 0.8949988685222902, "grad_norm": 0.5744817660343502, "learning_rate": 2.8659685369611503e-07, "loss": 0.275, "step": 19775 }, { "epoch": 0.8950441276306856, "grad_norm": 0.5618204289540498, "learning_rate": 2.8635233129159004e-07, "loss": 0.2363, "step": 19776 }, { "epoch": 0.8950893867390812, "grad_norm": 0.650461405504146, "learning_rate": 2.8610791016887794e-07, "loss": 0.2863, "step": 19777 }, { "epoch": 0.8951346458474768, "grad_norm": 0.5736113775880847, "learning_rate": 2.85863590333228e-07, "loss": 0.2734, "step": 19778 }, { "epoch": 0.8951799049558724, "grad_norm": 0.5879128588576477, "learning_rate": 2.8561937178989087e-07, "loss": 0.2767, "step": 19779 }, { "epoch": 0.8952251640642679, "grad_norm": 0.6277117466751468, "learning_rate": 2.853752545441146e-07, "loss": 0.2962, "step": 19780 }, { "epoch": 0.8952704231726635, "grad_norm": 0.666912011889595, "learning_rate": 2.851312386011457e-07, "loss": 0.3418, "step": 19781 }, { "epoch": 0.8953156822810591, "grad_norm": 0.724911195777025, "learning_rate": 2.8488732396622476e-07, "loss": 0.2705, "step": 19782 }, { "epoch": 0.8953609413894547, "grad_norm": 0.28456627284890906, "learning_rate": 2.846435106445933e-07, "loss": 0.459, "step": 19783 }, { "epoch": 0.8954062004978502, "grad_norm": 0.6560552485337452, "learning_rate": 2.843997986414915e-07, "loss": 0.254, "step": 19784 }, { "epoch": 0.8954514596062457, "grad_norm": 0.6501966318352483, "learning_rate": 2.8415618796215516e-07, "loss": 0.3142, "step": 19785 }, { "epoch": 0.8954967187146413, "grad_norm": 0.25189795807932797, "learning_rate": 2.839126786118179e-07, "loss": 0.4688, "step": 19786 }, { "epoch": 0.8955419778230369, "grad_norm": 0.5445596252129087, "learning_rate": 2.8366927059571393e-07, "loss": 0.2673, "step": 19787 }, { "epoch": 0.8955872369314325, "grad_norm": 0.6155411118466514, "learning_rate": 2.834259639190712e-07, "loss": 0.2709, "step": 19788 }, { "epoch": 0.895632496039828, "grad_norm": 0.5917130188296267, "learning_rate": 2.8318275858711943e-07, "loss": 0.3112, "step": 19789 }, { "epoch": 0.8956777551482236, "grad_norm": 0.2553760666330001, "learning_rate": 2.829396546050839e-07, "loss": 0.4581, "step": 19790 }, { "epoch": 0.8957230142566192, "grad_norm": 0.26227643370388387, "learning_rate": 2.826966519781871e-07, "loss": 0.4565, "step": 19791 }, { "epoch": 0.8957682733650147, "grad_norm": 0.6770457020151776, "learning_rate": 2.824537507116504e-07, "loss": 0.2896, "step": 19792 }, { "epoch": 0.8958135324734102, "grad_norm": 0.6022884364992735, "learning_rate": 2.8221095081069517e-07, "loss": 0.2608, "step": 19793 }, { "epoch": 0.8958587915818058, "grad_norm": 0.5787444811677719, "learning_rate": 2.819682522805367e-07, "loss": 0.2736, "step": 19794 }, { "epoch": 0.8959040506902014, "grad_norm": 0.599606507374959, "learning_rate": 2.8172565512638974e-07, "loss": 0.2927, "step": 19795 }, { "epoch": 0.895949309798597, "grad_norm": 0.625534914275994, "learning_rate": 2.8148315935346725e-07, "loss": 0.2343, "step": 19796 }, { "epoch": 0.8959945689069926, "grad_norm": 0.5826505623078216, "learning_rate": 2.812407649669807e-07, "loss": 0.3011, "step": 19797 }, { "epoch": 0.8960398280153881, "grad_norm": 0.6161056662098575, "learning_rate": 2.809984719721376e-07, "loss": 0.2726, "step": 19798 }, { "epoch": 0.8960850871237837, "grad_norm": 0.5970542362819024, "learning_rate": 2.807562803741426e-07, "loss": 0.2774, "step": 19799 }, { "epoch": 0.8961303462321792, "grad_norm": 0.6152274129951627, "learning_rate": 2.805141901782027e-07, "loss": 0.2983, "step": 19800 }, { "epoch": 0.8961756053405748, "grad_norm": 0.28077492126298775, "learning_rate": 2.8027220138951705e-07, "loss": 0.4626, "step": 19801 }, { "epoch": 0.8962208644489703, "grad_norm": 0.6393216136802916, "learning_rate": 2.8003031401328653e-07, "loss": 0.2933, "step": 19802 }, { "epoch": 0.8962661235573659, "grad_norm": 0.5985841723566697, "learning_rate": 2.797885280547086e-07, "loss": 0.2614, "step": 19803 }, { "epoch": 0.8963113826657615, "grad_norm": 0.5298212310543782, "learning_rate": 2.795468435189774e-07, "loss": 0.2568, "step": 19804 }, { "epoch": 0.8963566417741571, "grad_norm": 0.5987061216283815, "learning_rate": 2.7930526041128727e-07, "loss": 0.2981, "step": 19805 }, { "epoch": 0.8964019008825526, "grad_norm": 0.6934345951631308, "learning_rate": 2.790637787368294e-07, "loss": 0.3146, "step": 19806 }, { "epoch": 0.8964471599909482, "grad_norm": 0.6084117691504284, "learning_rate": 2.788223985007904e-07, "loss": 0.2934, "step": 19807 }, { "epoch": 0.8964924190993437, "grad_norm": 0.5735161793850959, "learning_rate": 2.7858111970835823e-07, "loss": 0.3225, "step": 19808 }, { "epoch": 0.8965376782077393, "grad_norm": 0.6054105921436966, "learning_rate": 2.783399423647171e-07, "loss": 0.3502, "step": 19809 }, { "epoch": 0.8965829373161349, "grad_norm": 0.7195183412606594, "learning_rate": 2.7809886647505e-07, "loss": 0.3202, "step": 19810 }, { "epoch": 0.8966281964245304, "grad_norm": 0.5922610762383163, "learning_rate": 2.778578920445352e-07, "loss": 0.2581, "step": 19811 }, { "epoch": 0.896673455532926, "grad_norm": 0.8144053010320692, "learning_rate": 2.7761701907835114e-07, "loss": 0.3336, "step": 19812 }, { "epoch": 0.8967187146413216, "grad_norm": 0.5985999716901659, "learning_rate": 2.7737624758167436e-07, "loss": 0.3069, "step": 19813 }, { "epoch": 0.8967639737497172, "grad_norm": 0.6518395917982522, "learning_rate": 2.771355775596779e-07, "loss": 0.2978, "step": 19814 }, { "epoch": 0.8968092328581126, "grad_norm": 0.6309417840869341, "learning_rate": 2.768950090175315e-07, "loss": 0.3159, "step": 19815 }, { "epoch": 0.8968544919665082, "grad_norm": 0.28977489773166926, "learning_rate": 2.7665454196040665e-07, "loss": 0.4729, "step": 19816 }, { "epoch": 0.8968997510749038, "grad_norm": 0.2650338371075625, "learning_rate": 2.76414176393468e-07, "loss": 0.4647, "step": 19817 }, { "epoch": 0.8969450101832994, "grad_norm": 0.5966383668175729, "learning_rate": 2.7617391232188207e-07, "loss": 0.2825, "step": 19818 }, { "epoch": 0.896990269291695, "grad_norm": 0.7291846411758335, "learning_rate": 2.7593374975081075e-07, "loss": 0.2948, "step": 19819 }, { "epoch": 0.8970355284000905, "grad_norm": 0.29386027248409563, "learning_rate": 2.7569368868541333e-07, "loss": 0.4718, "step": 19820 }, { "epoch": 0.8970807875084861, "grad_norm": 0.5366175112637515, "learning_rate": 2.75453729130849e-07, "loss": 0.2841, "step": 19821 }, { "epoch": 0.8971260466168817, "grad_norm": 0.5773925409926636, "learning_rate": 2.752138710922747e-07, "loss": 0.2648, "step": 19822 }, { "epoch": 0.8971713057252773, "grad_norm": 0.6528258350735204, "learning_rate": 2.74974114574843e-07, "loss": 0.2812, "step": 19823 }, { "epoch": 0.8972165648336727, "grad_norm": 0.5804721946337439, "learning_rate": 2.747344595837048e-07, "loss": 0.2388, "step": 19824 }, { "epoch": 0.8972618239420683, "grad_norm": 0.5968044745916934, "learning_rate": 2.74494906124011e-07, "loss": 0.3212, "step": 19825 }, { "epoch": 0.8973070830504639, "grad_norm": 0.6206777396994065, "learning_rate": 2.7425545420090906e-07, "loss": 0.2932, "step": 19826 }, { "epoch": 0.8973523421588595, "grad_norm": 0.5662554206299493, "learning_rate": 2.7401610381954325e-07, "loss": 0.2769, "step": 19827 }, { "epoch": 0.897397601267255, "grad_norm": 0.6144031913927165, "learning_rate": 2.7377685498505557e-07, "loss": 0.3565, "step": 19828 }, { "epoch": 0.8974428603756506, "grad_norm": 0.2641253441420351, "learning_rate": 2.7353770770258915e-07, "loss": 0.4778, "step": 19829 }, { "epoch": 0.8974881194840462, "grad_norm": 0.5820930843681503, "learning_rate": 2.7329866197727983e-07, "loss": 0.3096, "step": 19830 }, { "epoch": 0.8975333785924418, "grad_norm": 0.274671774086108, "learning_rate": 2.7305971781426634e-07, "loss": 0.4733, "step": 19831 }, { "epoch": 0.8975786377008373, "grad_norm": 0.586907469338291, "learning_rate": 2.728208752186817e-07, "loss": 0.2492, "step": 19832 }, { "epoch": 0.8976238968092328, "grad_norm": 0.2651039284392432, "learning_rate": 2.725821341956575e-07, "loss": 0.4904, "step": 19833 }, { "epoch": 0.8976691559176284, "grad_norm": 0.558027137087256, "learning_rate": 2.7234349475032395e-07, "loss": 0.2695, "step": 19834 }, { "epoch": 0.897714415026024, "grad_norm": 0.2854868461334528, "learning_rate": 2.7210495688781037e-07, "loss": 0.4558, "step": 19835 }, { "epoch": 0.8977596741344196, "grad_norm": 0.2792178849027721, "learning_rate": 2.7186652061323924e-07, "loss": 0.4828, "step": 19836 }, { "epoch": 0.8978049332428151, "grad_norm": 0.6265739449917149, "learning_rate": 2.716281859317349e-07, "loss": 0.2715, "step": 19837 }, { "epoch": 0.8978501923512107, "grad_norm": 0.6166940853718327, "learning_rate": 2.713899528484193e-07, "loss": 0.3151, "step": 19838 }, { "epoch": 0.8978954514596063, "grad_norm": 0.6109203277092121, "learning_rate": 2.7115182136841166e-07, "loss": 0.3109, "step": 19839 }, { "epoch": 0.8979407105680018, "grad_norm": 0.5714322175597286, "learning_rate": 2.7091379149682683e-07, "loss": 0.3114, "step": 19840 }, { "epoch": 0.8979859696763973, "grad_norm": 0.2799105941790582, "learning_rate": 2.7067586323878014e-07, "loss": 0.4597, "step": 19841 }, { "epoch": 0.8980312287847929, "grad_norm": 0.6264111829057746, "learning_rate": 2.704380365993847e-07, "loss": 0.2858, "step": 19842 }, { "epoch": 0.8980764878931885, "grad_norm": 0.6384133066448844, "learning_rate": 2.7020031158375037e-07, "loss": 0.3181, "step": 19843 }, { "epoch": 0.8981217470015841, "grad_norm": 0.6823010909322235, "learning_rate": 2.699626881969841e-07, "loss": 0.3096, "step": 19844 }, { "epoch": 0.8981670061099797, "grad_norm": 0.6476981453613727, "learning_rate": 2.6972516644419356e-07, "loss": 0.2549, "step": 19845 }, { "epoch": 0.8982122652183752, "grad_norm": 0.6620153827854617, "learning_rate": 2.6948774633048016e-07, "loss": 0.3313, "step": 19846 }, { "epoch": 0.8982575243267708, "grad_norm": 0.748192870105131, "learning_rate": 2.69250427860947e-07, "loss": 0.295, "step": 19847 }, { "epoch": 0.8983027834351663, "grad_norm": 0.5661829688022068, "learning_rate": 2.690132110406929e-07, "loss": 0.2879, "step": 19848 }, { "epoch": 0.8983480425435619, "grad_norm": 0.602374880920724, "learning_rate": 2.687760958748137e-07, "loss": 0.3005, "step": 19849 }, { "epoch": 0.8983933016519574, "grad_norm": 0.7271757380328313, "learning_rate": 2.6853908236840586e-07, "loss": 0.3028, "step": 19850 }, { "epoch": 0.898438560760353, "grad_norm": 0.6395083076273245, "learning_rate": 2.68302170526562e-07, "loss": 0.2946, "step": 19851 }, { "epoch": 0.8984838198687486, "grad_norm": 0.5677435398394265, "learning_rate": 2.680653603543726e-07, "loss": 0.268, "step": 19852 }, { "epoch": 0.8985290789771442, "grad_norm": 0.6325083354864175, "learning_rate": 2.678286518569245e-07, "loss": 0.3044, "step": 19853 }, { "epoch": 0.8985743380855398, "grad_norm": 0.5950952403576475, "learning_rate": 2.675920450393049e-07, "loss": 0.3017, "step": 19854 }, { "epoch": 0.8986195971939352, "grad_norm": 0.6290601694265202, "learning_rate": 2.673555399065986e-07, "loss": 0.2637, "step": 19855 }, { "epoch": 0.8986648563023308, "grad_norm": 0.6594481525414645, "learning_rate": 2.6711913646388645e-07, "loss": 0.2924, "step": 19856 }, { "epoch": 0.8987101154107264, "grad_norm": 0.6759843523758717, "learning_rate": 2.6688283471624775e-07, "loss": 0.3086, "step": 19857 }, { "epoch": 0.898755374519122, "grad_norm": 0.6150657269648987, "learning_rate": 2.666466346687607e-07, "loss": 0.2711, "step": 19858 }, { "epoch": 0.8988006336275175, "grad_norm": 0.683435761376238, "learning_rate": 2.6641053632649907e-07, "loss": 0.2714, "step": 19859 }, { "epoch": 0.8988458927359131, "grad_norm": 0.6200624707524008, "learning_rate": 2.661745396945381e-07, "loss": 0.3114, "step": 19860 }, { "epoch": 0.8988911518443087, "grad_norm": 0.5928210284723426, "learning_rate": 2.6593864477794716e-07, "loss": 0.2759, "step": 19861 }, { "epoch": 0.8989364109527043, "grad_norm": 0.6245841851464607, "learning_rate": 2.65702851581795e-07, "loss": 0.2771, "step": 19862 }, { "epoch": 0.8989816700610997, "grad_norm": 0.6491764300200128, "learning_rate": 2.654671601111475e-07, "loss": 0.3323, "step": 19863 }, { "epoch": 0.8990269291694953, "grad_norm": 0.5722118069854693, "learning_rate": 2.652315703710712e-07, "loss": 0.3018, "step": 19864 }, { "epoch": 0.8990721882778909, "grad_norm": 0.5964576792561528, "learning_rate": 2.649960823666259e-07, "loss": 0.2881, "step": 19865 }, { "epoch": 0.8991174473862865, "grad_norm": 0.5918655001352278, "learning_rate": 2.64760696102872e-07, "loss": 0.316, "step": 19866 }, { "epoch": 0.8991627064946821, "grad_norm": 0.5898085749442864, "learning_rate": 2.6452541158486776e-07, "loss": 0.3112, "step": 19867 }, { "epoch": 0.8992079656030776, "grad_norm": 0.6609907398862443, "learning_rate": 2.642902288176696e-07, "loss": 0.254, "step": 19868 }, { "epoch": 0.8992532247114732, "grad_norm": 0.6117096439894077, "learning_rate": 2.640551478063286e-07, "loss": 0.2878, "step": 19869 }, { "epoch": 0.8992984838198688, "grad_norm": 0.6931583049749798, "learning_rate": 2.638201685558972e-07, "loss": 0.2783, "step": 19870 }, { "epoch": 0.8993437429282644, "grad_norm": 0.2860786343684956, "learning_rate": 2.6358529107142485e-07, "loss": 0.4694, "step": 19871 }, { "epoch": 0.8993890020366598, "grad_norm": 0.2578641755720357, "learning_rate": 2.63350515357958e-07, "loss": 0.4533, "step": 19872 }, { "epoch": 0.8994342611450554, "grad_norm": 0.5993616311568444, "learning_rate": 2.6311584142054036e-07, "loss": 0.3173, "step": 19873 }, { "epoch": 0.899479520253451, "grad_norm": 0.6154401783980245, "learning_rate": 2.6288126926421576e-07, "loss": 0.2741, "step": 19874 }, { "epoch": 0.8995247793618466, "grad_norm": 0.6898472911804348, "learning_rate": 2.626467988940229e-07, "loss": 0.2823, "step": 19875 }, { "epoch": 0.8995700384702421, "grad_norm": 0.6204420429013519, "learning_rate": 2.624124303150011e-07, "loss": 0.2818, "step": 19876 }, { "epoch": 0.8996152975786377, "grad_norm": 0.6136446340161927, "learning_rate": 2.621781635321863e-07, "loss": 0.2975, "step": 19877 }, { "epoch": 0.8996605566870333, "grad_norm": 0.6612494059694828, "learning_rate": 2.6194399855061056e-07, "loss": 0.2961, "step": 19878 }, { "epoch": 0.8997058157954289, "grad_norm": 0.6256958878919391, "learning_rate": 2.6170993537530665e-07, "loss": 0.2769, "step": 19879 }, { "epoch": 0.8997510749038244, "grad_norm": 0.6156127701428498, "learning_rate": 2.6147597401130433e-07, "loss": 0.2967, "step": 19880 }, { "epoch": 0.8997963340122199, "grad_norm": 0.5999686058867921, "learning_rate": 2.612421144636301e-07, "loss": 0.2683, "step": 19881 }, { "epoch": 0.8998415931206155, "grad_norm": 0.6345276232974958, "learning_rate": 2.610083567373078e-07, "loss": 0.2729, "step": 19882 }, { "epoch": 0.8998868522290111, "grad_norm": 0.6370424196514467, "learning_rate": 2.6077470083736176e-07, "loss": 0.2669, "step": 19883 }, { "epoch": 0.8999321113374067, "grad_norm": 0.25297348773573924, "learning_rate": 2.6054114676881237e-07, "loss": 0.4509, "step": 19884 }, { "epoch": 0.8999773704458022, "grad_norm": 0.6323420343265559, "learning_rate": 2.6030769453667783e-07, "loss": 0.2861, "step": 19885 }, { "epoch": 0.9000226295541978, "grad_norm": 0.6696787676826822, "learning_rate": 2.60074344145973e-07, "loss": 0.3002, "step": 19886 }, { "epoch": 0.9000678886625934, "grad_norm": 0.6649350361638969, "learning_rate": 2.5984109560171387e-07, "loss": 0.3064, "step": 19887 }, { "epoch": 0.9001131477709889, "grad_norm": 0.2588500924076885, "learning_rate": 2.5960794890891093e-07, "loss": 0.4569, "step": 19888 }, { "epoch": 0.9001584068793844, "grad_norm": 0.6195830757026074, "learning_rate": 2.593749040725746e-07, "loss": 0.3292, "step": 19889 }, { "epoch": 0.90020366598778, "grad_norm": 0.5795950758623272, "learning_rate": 2.5914196109771197e-07, "loss": 0.2796, "step": 19890 }, { "epoch": 0.9002489250961756, "grad_norm": 0.2654230841344855, "learning_rate": 2.5890911998932735e-07, "loss": 0.4723, "step": 19891 }, { "epoch": 0.9002941842045712, "grad_norm": 0.5663503785166497, "learning_rate": 2.5867638075242454e-07, "loss": 0.2968, "step": 19892 }, { "epoch": 0.9003394433129668, "grad_norm": 0.6279957619169866, "learning_rate": 2.5844374339200505e-07, "loss": 0.3256, "step": 19893 }, { "epoch": 0.9003847024213623, "grad_norm": 0.5559620151418101, "learning_rate": 2.5821120791306665e-07, "loss": 0.2999, "step": 19894 }, { "epoch": 0.9004299615297578, "grad_norm": 0.7242133638005693, "learning_rate": 2.579787743206058e-07, "loss": 0.2995, "step": 19895 }, { "epoch": 0.9004752206381534, "grad_norm": 0.6079929095062804, "learning_rate": 2.5774644261961746e-07, "loss": 0.2885, "step": 19896 }, { "epoch": 0.900520479746549, "grad_norm": 0.25827572484656686, "learning_rate": 2.5751421281509426e-07, "loss": 0.4523, "step": 19897 }, { "epoch": 0.9005657388549445, "grad_norm": 0.5761463549202924, "learning_rate": 2.572820849120239e-07, "loss": 0.2691, "step": 19898 }, { "epoch": 0.9006109979633401, "grad_norm": 0.6240479186911713, "learning_rate": 2.5705005891539516e-07, "loss": 0.2806, "step": 19899 }, { "epoch": 0.9006562570717357, "grad_norm": 0.6456703003520794, "learning_rate": 2.5681813483019515e-07, "loss": 0.2929, "step": 19900 }, { "epoch": 0.9007015161801313, "grad_norm": 0.624521209277269, "learning_rate": 2.565863126614049e-07, "loss": 0.2685, "step": 19901 }, { "epoch": 0.9007467752885269, "grad_norm": 0.6036744838044934, "learning_rate": 2.563545924140065e-07, "loss": 0.3145, "step": 19902 }, { "epoch": 0.9007920343969223, "grad_norm": 0.6375461561357829, "learning_rate": 2.5612297409297937e-07, "loss": 0.2643, "step": 19903 }, { "epoch": 0.9008372935053179, "grad_norm": 0.6038009033673213, "learning_rate": 2.558914577032995e-07, "loss": 0.3051, "step": 19904 }, { "epoch": 0.9008825526137135, "grad_norm": 0.5593653532095186, "learning_rate": 2.5566004324994174e-07, "loss": 0.2905, "step": 19905 }, { "epoch": 0.9009278117221091, "grad_norm": 0.5949014043160534, "learning_rate": 2.554287307378794e-07, "loss": 0.2783, "step": 19906 }, { "epoch": 0.9009730708305046, "grad_norm": 0.5745613856583127, "learning_rate": 2.551975201720802e-07, "loss": 0.2942, "step": 19907 }, { "epoch": 0.9010183299389002, "grad_norm": 0.6167366159363535, "learning_rate": 2.5496641155751456e-07, "loss": 0.2976, "step": 19908 }, { "epoch": 0.9010635890472958, "grad_norm": 0.6439202150247513, "learning_rate": 2.5473540489914794e-07, "loss": 0.295, "step": 19909 }, { "epoch": 0.9011088481556914, "grad_norm": 0.6657246782177099, "learning_rate": 2.5450450020194306e-07, "loss": 0.3307, "step": 19910 }, { "epoch": 0.9011541072640868, "grad_norm": 0.6133423808228226, "learning_rate": 2.542736974708615e-07, "loss": 0.2642, "step": 19911 }, { "epoch": 0.9011993663724824, "grad_norm": 0.686243999667497, "learning_rate": 2.5404299671086264e-07, "loss": 0.3237, "step": 19912 }, { "epoch": 0.901244625480878, "grad_norm": 0.6013825159558385, "learning_rate": 2.538123979269047e-07, "loss": 0.2688, "step": 19913 }, { "epoch": 0.9012898845892736, "grad_norm": 0.5847798766604101, "learning_rate": 2.5358190112394097e-07, "loss": 0.2772, "step": 19914 }, { "epoch": 0.9013351436976692, "grad_norm": 0.6089930596789058, "learning_rate": 2.5335150630692476e-07, "loss": 0.2988, "step": 19915 }, { "epoch": 0.9013804028060647, "grad_norm": 0.5788620985116087, "learning_rate": 2.5312121348080643e-07, "loss": 0.2939, "step": 19916 }, { "epoch": 0.9014256619144603, "grad_norm": 0.6364074845395432, "learning_rate": 2.528910226505338e-07, "loss": 0.2769, "step": 19917 }, { "epoch": 0.9014709210228559, "grad_norm": 0.2567038459793554, "learning_rate": 2.5266093382105395e-07, "loss": 0.4693, "step": 19918 }, { "epoch": 0.9015161801312515, "grad_norm": 0.7494711890048615, "learning_rate": 2.5243094699731076e-07, "loss": 0.2732, "step": 19919 }, { "epoch": 0.9015614392396469, "grad_norm": 0.5794564708348203, "learning_rate": 2.522010621842447e-07, "loss": 0.2735, "step": 19920 }, { "epoch": 0.9016066983480425, "grad_norm": 0.6298020059586386, "learning_rate": 2.5197127938679567e-07, "loss": 0.3306, "step": 19921 }, { "epoch": 0.9016519574564381, "grad_norm": 0.6395789983688206, "learning_rate": 2.5174159860990256e-07, "loss": 0.3239, "step": 19922 }, { "epoch": 0.9016972165648337, "grad_norm": 0.6277059924919379, "learning_rate": 2.5151201985849915e-07, "loss": 0.2741, "step": 19923 }, { "epoch": 0.9017424756732292, "grad_norm": 0.7003457691753658, "learning_rate": 2.512825431375177e-07, "loss": 0.2628, "step": 19924 }, { "epoch": 0.9017877347816248, "grad_norm": 0.5869953938455421, "learning_rate": 2.510531684518902e-07, "loss": 0.2927, "step": 19925 }, { "epoch": 0.9018329938900204, "grad_norm": 0.6669116927869857, "learning_rate": 2.508238958065451e-07, "loss": 0.2906, "step": 19926 }, { "epoch": 0.901878252998416, "grad_norm": 0.6093736144281799, "learning_rate": 2.505947252064089e-07, "loss": 0.3189, "step": 19927 }, { "epoch": 0.9019235121068115, "grad_norm": 0.25538566156125225, "learning_rate": 2.5036565665640443e-07, "loss": 0.465, "step": 19928 }, { "epoch": 0.901968771215207, "grad_norm": 0.2582399895913366, "learning_rate": 2.501366901614555e-07, "loss": 0.4727, "step": 19929 }, { "epoch": 0.9020140303236026, "grad_norm": 0.28190071012238044, "learning_rate": 2.4990782572647977e-07, "loss": 0.4667, "step": 19930 }, { "epoch": 0.9020592894319982, "grad_norm": 1.11822190822124, "learning_rate": 2.4967906335639725e-07, "loss": 0.3162, "step": 19931 }, { "epoch": 0.9021045485403938, "grad_norm": 0.6665944056354663, "learning_rate": 2.494504030561223e-07, "loss": 0.3137, "step": 19932 }, { "epoch": 0.9021498076487893, "grad_norm": 0.5415064243105101, "learning_rate": 2.4922184483056665e-07, "loss": 0.2803, "step": 19933 }, { "epoch": 0.9021950667571849, "grad_norm": 0.7574661488081733, "learning_rate": 2.4899338868464404e-07, "loss": 0.3501, "step": 19934 }, { "epoch": 0.9022403258655805, "grad_norm": 0.6385815253014705, "learning_rate": 2.487650346232606e-07, "loss": 0.3302, "step": 19935 }, { "epoch": 0.902285584973976, "grad_norm": 0.5893896562648631, "learning_rate": 2.485367826513258e-07, "loss": 0.2581, "step": 19936 }, { "epoch": 0.9023308440823716, "grad_norm": 0.5638720757545203, "learning_rate": 2.483086327737411e-07, "loss": 0.3226, "step": 19937 }, { "epoch": 0.9023761031907671, "grad_norm": 0.5858279262262214, "learning_rate": 2.48080584995411e-07, "loss": 0.2622, "step": 19938 }, { "epoch": 0.9024213622991627, "grad_norm": 0.6459975315468293, "learning_rate": 2.4785263932123495e-07, "loss": 0.3356, "step": 19939 }, { "epoch": 0.9024666214075583, "grad_norm": 0.5968005891204486, "learning_rate": 2.4762479575610954e-07, "loss": 0.288, "step": 19940 }, { "epoch": 0.9025118805159539, "grad_norm": 0.6842359527286419, "learning_rate": 2.47397054304932e-07, "loss": 0.2635, "step": 19941 }, { "epoch": 0.9025571396243494, "grad_norm": 0.6616650535720541, "learning_rate": 2.4716941497259563e-07, "loss": 0.293, "step": 19942 }, { "epoch": 0.902602398732745, "grad_norm": 0.6865500924952155, "learning_rate": 2.4694187776399094e-07, "loss": 0.2953, "step": 19943 }, { "epoch": 0.9026476578411405, "grad_norm": 0.24231874254102226, "learning_rate": 2.4671444268400736e-07, "loss": 0.465, "step": 19944 }, { "epoch": 0.9026929169495361, "grad_norm": 0.5940515750046157, "learning_rate": 2.464871097375321e-07, "loss": 0.2587, "step": 19945 }, { "epoch": 0.9027381760579316, "grad_norm": 0.6427760010776417, "learning_rate": 2.46259878929449e-07, "loss": 0.3054, "step": 19946 }, { "epoch": 0.9027834351663272, "grad_norm": 0.6293624346771375, "learning_rate": 2.460327502646415e-07, "loss": 0.2891, "step": 19947 }, { "epoch": 0.9028286942747228, "grad_norm": 0.6467156463352277, "learning_rate": 2.4580572374798997e-07, "loss": 0.2955, "step": 19948 }, { "epoch": 0.9028739533831184, "grad_norm": 0.2585270588356642, "learning_rate": 2.455787993843711e-07, "loss": 0.4673, "step": 19949 }, { "epoch": 0.902919212491514, "grad_norm": 0.5902196062976237, "learning_rate": 2.453519771786617e-07, "loss": 0.2961, "step": 19950 }, { "epoch": 0.9029644715999094, "grad_norm": 0.2511985102721687, "learning_rate": 2.451252571357365e-07, "loss": 0.4569, "step": 19951 }, { "epoch": 0.903009730708305, "grad_norm": 0.6232119279137442, "learning_rate": 2.4489863926046577e-07, "loss": 0.2901, "step": 19952 }, { "epoch": 0.9030549898167006, "grad_norm": 1.440194667225179, "learning_rate": 2.446721235577182e-07, "loss": 0.2961, "step": 19953 }, { "epoch": 0.9031002489250962, "grad_norm": 0.615639529056928, "learning_rate": 2.4444571003236216e-07, "loss": 0.3075, "step": 19954 }, { "epoch": 0.9031455080334917, "grad_norm": 0.593651072497511, "learning_rate": 2.4421939868926325e-07, "loss": 0.2727, "step": 19955 }, { "epoch": 0.9031907671418873, "grad_norm": 0.6528840635575128, "learning_rate": 2.4399318953328255e-07, "loss": 0.2899, "step": 19956 }, { "epoch": 0.9032360262502829, "grad_norm": 0.6198831311701227, "learning_rate": 2.437670825692812e-07, "loss": 0.2697, "step": 19957 }, { "epoch": 0.9032812853586785, "grad_norm": 0.6317778826705256, "learning_rate": 2.4354107780211745e-07, "loss": 0.3065, "step": 19958 }, { "epoch": 0.9033265444670739, "grad_norm": 0.2854533424264698, "learning_rate": 2.433151752366475e-07, "loss": 0.4926, "step": 19959 }, { "epoch": 0.9033718035754695, "grad_norm": 0.735995959989909, "learning_rate": 2.4308937487772576e-07, "loss": 0.2814, "step": 19960 }, { "epoch": 0.9034170626838651, "grad_norm": 0.6361012382328806, "learning_rate": 2.4286367673020396e-07, "loss": 0.2507, "step": 19961 }, { "epoch": 0.9034623217922607, "grad_norm": 0.6104055856919773, "learning_rate": 2.4263808079893035e-07, "loss": 0.2508, "step": 19962 }, { "epoch": 0.9035075809006563, "grad_norm": 0.6575551325731162, "learning_rate": 2.4241258708875336e-07, "loss": 0.2905, "step": 19963 }, { "epoch": 0.9035528400090518, "grad_norm": 0.5908448190267949, "learning_rate": 2.4218719560451907e-07, "loss": 0.3214, "step": 19964 }, { "epoch": 0.9035980991174474, "grad_norm": 0.5794135894633232, "learning_rate": 2.4196190635106917e-07, "loss": 0.2715, "step": 19965 }, { "epoch": 0.903643358225843, "grad_norm": 0.703211043777139, "learning_rate": 2.4173671933324373e-07, "loss": 0.2657, "step": 19966 }, { "epoch": 0.9036886173342386, "grad_norm": 0.6009922340041405, "learning_rate": 2.415116345558832e-07, "loss": 0.2983, "step": 19967 }, { "epoch": 0.903733876442634, "grad_norm": 0.2530597694122791, "learning_rate": 2.4128665202382327e-07, "loss": 0.464, "step": 19968 }, { "epoch": 0.9037791355510296, "grad_norm": 0.6404494730165863, "learning_rate": 2.4106177174189724e-07, "loss": 0.3327, "step": 19969 }, { "epoch": 0.9038243946594252, "grad_norm": 0.6221944918677672, "learning_rate": 2.408369937149374e-07, "loss": 0.2858, "step": 19970 }, { "epoch": 0.9038696537678208, "grad_norm": 0.7356818975973848, "learning_rate": 2.4061231794777483e-07, "loss": 0.3332, "step": 19971 }, { "epoch": 0.9039149128762164, "grad_norm": 0.2557326175187924, "learning_rate": 2.4038774444523627e-07, "loss": 0.4571, "step": 19972 }, { "epoch": 0.9039601719846119, "grad_norm": 0.6719787234019912, "learning_rate": 2.4016327321214614e-07, "loss": 0.3138, "step": 19973 }, { "epoch": 0.9040054310930075, "grad_norm": 0.5657826799708439, "learning_rate": 2.3993890425332957e-07, "loss": 0.3172, "step": 19974 }, { "epoch": 0.904050690201403, "grad_norm": 0.6515336390605506, "learning_rate": 2.3971463757360537e-07, "loss": 0.2936, "step": 19975 }, { "epoch": 0.9040959493097986, "grad_norm": 0.630534874722075, "learning_rate": 2.394904731777947e-07, "loss": 0.3024, "step": 19976 }, { "epoch": 0.9041412084181941, "grad_norm": 0.5926030588348037, "learning_rate": 2.392664110707116e-07, "loss": 0.2706, "step": 19977 }, { "epoch": 0.9041864675265897, "grad_norm": 0.6027188615891163, "learning_rate": 2.390424512571732e-07, "loss": 0.3356, "step": 19978 }, { "epoch": 0.9042317266349853, "grad_norm": 0.8008295264317392, "learning_rate": 2.388185937419896e-07, "loss": 0.2613, "step": 19979 }, { "epoch": 0.9042769857433809, "grad_norm": 0.5926005579561792, "learning_rate": 2.385948385299719e-07, "loss": 0.2557, "step": 19980 }, { "epoch": 0.9043222448517764, "grad_norm": 0.6086515646248625, "learning_rate": 2.3837118562592799e-07, "loss": 0.2976, "step": 19981 }, { "epoch": 0.904367503960172, "grad_norm": 0.7498121647059611, "learning_rate": 2.3814763503466175e-07, "loss": 0.2795, "step": 19982 }, { "epoch": 0.9044127630685675, "grad_norm": 0.6778933569637745, "learning_rate": 2.3792418676097884e-07, "loss": 0.3199, "step": 19983 }, { "epoch": 0.9044580221769631, "grad_norm": 0.6100026715479245, "learning_rate": 2.3770084080967926e-07, "loss": 0.274, "step": 19984 }, { "epoch": 0.9045032812853587, "grad_norm": 0.26096261402513665, "learning_rate": 2.3747759718556308e-07, "loss": 0.4535, "step": 19985 }, { "epoch": 0.9045485403937542, "grad_norm": 0.6154908976706324, "learning_rate": 2.3725445589342534e-07, "loss": 0.3258, "step": 19986 }, { "epoch": 0.9045937995021498, "grad_norm": 0.6117443541239407, "learning_rate": 2.3703141693806276e-07, "loss": 0.2933, "step": 19987 }, { "epoch": 0.9046390586105454, "grad_norm": 0.7156865784325133, "learning_rate": 2.368084803242654e-07, "loss": 0.3113, "step": 19988 }, { "epoch": 0.904684317718941, "grad_norm": 0.4855626199280217, "learning_rate": 2.3658564605682555e-07, "loss": 0.4333, "step": 19989 }, { "epoch": 0.9047295768273365, "grad_norm": 0.6961645975967724, "learning_rate": 2.3636291414053104e-07, "loss": 0.2703, "step": 19990 }, { "epoch": 0.904774835935732, "grad_norm": 0.2763988693404931, "learning_rate": 2.3614028458016581e-07, "loss": 0.4774, "step": 19991 }, { "epoch": 0.9048200950441276, "grad_norm": 0.6112826169360427, "learning_rate": 2.3591775738051491e-07, "loss": 0.2983, "step": 19992 }, { "epoch": 0.9048653541525232, "grad_norm": 0.6104745671130538, "learning_rate": 2.356953325463607e-07, "loss": 0.2626, "step": 19993 }, { "epoch": 0.9049106132609187, "grad_norm": 0.7505273977344898, "learning_rate": 2.354730100824809e-07, "loss": 0.2903, "step": 19994 }, { "epoch": 0.9049558723693143, "grad_norm": 0.26741496817940696, "learning_rate": 2.3525078999365236e-07, "loss": 0.4528, "step": 19995 }, { "epoch": 0.9050011314777099, "grad_norm": 0.28491452323675803, "learning_rate": 2.3502867228465064e-07, "loss": 0.4586, "step": 19996 }, { "epoch": 0.9050463905861055, "grad_norm": 0.6851341238362276, "learning_rate": 2.3480665696024974e-07, "loss": 0.3186, "step": 19997 }, { "epoch": 0.9050916496945011, "grad_norm": 0.5780368717859541, "learning_rate": 2.3458474402521747e-07, "loss": 0.3063, "step": 19998 }, { "epoch": 0.9051369088028965, "grad_norm": 0.5757468936890212, "learning_rate": 2.343629334843228e-07, "loss": 0.2774, "step": 19999 }, { "epoch": 0.9051821679112921, "grad_norm": 0.6206281078686119, "learning_rate": 2.3414122534233252e-07, "loss": 0.3095, "step": 20000 }, { "epoch": 0.9052274270196877, "grad_norm": 0.6001697472110555, "learning_rate": 2.3391961960401055e-07, "loss": 0.2821, "step": 20001 }, { "epoch": 0.9052726861280833, "grad_norm": 0.6455405388295047, "learning_rate": 2.3369811627411755e-07, "loss": 0.2946, "step": 20002 }, { "epoch": 0.9053179452364788, "grad_norm": 0.5857060542849744, "learning_rate": 2.3347671535741356e-07, "loss": 0.2875, "step": 20003 }, { "epoch": 0.9053632043448744, "grad_norm": 0.2701905458880112, "learning_rate": 2.3325541685865538e-07, "loss": 0.4448, "step": 20004 }, { "epoch": 0.90540846345327, "grad_norm": 0.7333708834362386, "learning_rate": 2.3303422078259918e-07, "loss": 0.2884, "step": 20005 }, { "epoch": 0.9054537225616656, "grad_norm": 0.6560985783085491, "learning_rate": 2.3281312713399618e-07, "loss": 0.2768, "step": 20006 }, { "epoch": 0.9054989816700612, "grad_norm": 0.24549467484021845, "learning_rate": 2.325921359175981e-07, "loss": 0.468, "step": 20007 }, { "epoch": 0.9055442407784566, "grad_norm": 0.35618191466750615, "learning_rate": 2.3237124713815285e-07, "loss": 0.4485, "step": 20008 }, { "epoch": 0.9055894998868522, "grad_norm": 1.165867138539832, "learning_rate": 2.3215046080040714e-07, "loss": 0.28, "step": 20009 }, { "epoch": 0.9056347589952478, "grad_norm": 0.5878889928573706, "learning_rate": 2.31929776909105e-07, "loss": 0.265, "step": 20010 }, { "epoch": 0.9056800181036434, "grad_norm": 0.6493016711033609, "learning_rate": 2.3170919546898707e-07, "loss": 0.2676, "step": 20011 }, { "epoch": 0.9057252772120389, "grad_norm": 0.6119097365861756, "learning_rate": 2.3148871648479398e-07, "loss": 0.3042, "step": 20012 }, { "epoch": 0.9057705363204345, "grad_norm": 0.25708074637369255, "learning_rate": 2.3126833996126364e-07, "loss": 0.4837, "step": 20013 }, { "epoch": 0.9058157954288301, "grad_norm": 0.5724588237659539, "learning_rate": 2.3104806590313055e-07, "loss": 0.2769, "step": 20014 }, { "epoch": 0.9058610545372257, "grad_norm": 0.2631363793982775, "learning_rate": 2.308278943151271e-07, "loss": 0.4353, "step": 20015 }, { "epoch": 0.9059063136456211, "grad_norm": 0.6307812688200601, "learning_rate": 2.3060782520198554e-07, "loss": 0.2949, "step": 20016 }, { "epoch": 0.9059515727540167, "grad_norm": 0.8016890777574586, "learning_rate": 2.3038785856843328e-07, "loss": 0.3419, "step": 20017 }, { "epoch": 0.9059968318624123, "grad_norm": 0.2989237934568109, "learning_rate": 2.3016799441919756e-07, "loss": 0.4753, "step": 20018 }, { "epoch": 0.9060420909708079, "grad_norm": 0.6004554050861189, "learning_rate": 2.2994823275900246e-07, "loss": 0.2807, "step": 20019 }, { "epoch": 0.9060873500792035, "grad_norm": 0.6495330424530107, "learning_rate": 2.2972857359256862e-07, "loss": 0.3212, "step": 20020 }, { "epoch": 0.906132609187599, "grad_norm": 0.5534044314636215, "learning_rate": 2.2950901692461725e-07, "loss": 0.2954, "step": 20021 }, { "epoch": 0.9061778682959946, "grad_norm": 0.5939677994361185, "learning_rate": 2.292895627598668e-07, "loss": 0.3441, "step": 20022 }, { "epoch": 0.9062231274043901, "grad_norm": 0.28416916028422884, "learning_rate": 2.2907021110303073e-07, "loss": 0.4868, "step": 20023 }, { "epoch": 0.9062683865127857, "grad_norm": 0.6777718658467312, "learning_rate": 2.2885096195882306e-07, "loss": 0.2729, "step": 20024 }, { "epoch": 0.9063136456211812, "grad_norm": 0.6626368509700329, "learning_rate": 2.2863181533195443e-07, "loss": 0.3077, "step": 20025 }, { "epoch": 0.9063589047295768, "grad_norm": 0.5582505524440544, "learning_rate": 2.2841277122713502e-07, "loss": 0.2678, "step": 20026 }, { "epoch": 0.9064041638379724, "grad_norm": 0.6291111402466241, "learning_rate": 2.2819382964906933e-07, "loss": 0.2972, "step": 20027 }, { "epoch": 0.906449422946368, "grad_norm": 0.6332071836580533, "learning_rate": 2.2797499060246253e-07, "loss": 0.339, "step": 20028 }, { "epoch": 0.9064946820547635, "grad_norm": 0.592577141665457, "learning_rate": 2.2775625409201807e-07, "loss": 0.2715, "step": 20029 }, { "epoch": 0.9065399411631591, "grad_norm": 0.5395007641614055, "learning_rate": 2.275376201224344e-07, "loss": 0.29, "step": 20030 }, { "epoch": 0.9065852002715546, "grad_norm": 0.5726874305021403, "learning_rate": 2.2731908869840945e-07, "loss": 0.2871, "step": 20031 }, { "epoch": 0.9066304593799502, "grad_norm": 0.5915757607695858, "learning_rate": 2.2710065982464001e-07, "loss": 0.2853, "step": 20032 }, { "epoch": 0.9066757184883458, "grad_norm": 0.6336437929439116, "learning_rate": 2.2688233350581734e-07, "loss": 0.3084, "step": 20033 }, { "epoch": 0.9067209775967413, "grad_norm": 0.5800979237410989, "learning_rate": 2.266641097466349e-07, "loss": 0.2711, "step": 20034 }, { "epoch": 0.9067662367051369, "grad_norm": 0.6896619283347677, "learning_rate": 2.2644598855177947e-07, "loss": 0.2564, "step": 20035 }, { "epoch": 0.9068114958135325, "grad_norm": 0.6594548147719869, "learning_rate": 2.262279699259401e-07, "loss": 0.3011, "step": 20036 }, { "epoch": 0.9068567549219281, "grad_norm": 0.6633480941917634, "learning_rate": 2.2601005387379914e-07, "loss": 0.3024, "step": 20037 }, { "epoch": 0.9069020140303236, "grad_norm": 0.6002884781872193, "learning_rate": 2.2579224040004068e-07, "loss": 0.2946, "step": 20038 }, { "epoch": 0.9069472731387191, "grad_norm": 0.26351204782415494, "learning_rate": 2.2557452950934367e-07, "loss": 0.4717, "step": 20039 }, { "epoch": 0.9069925322471147, "grad_norm": 0.6369228382695861, "learning_rate": 2.2535692120638665e-07, "loss": 0.3007, "step": 20040 }, { "epoch": 0.9070377913555103, "grad_norm": 0.27484098339073637, "learning_rate": 2.2513941549584473e-07, "loss": 0.4742, "step": 20041 }, { "epoch": 0.9070830504639059, "grad_norm": 0.5711325651584115, "learning_rate": 2.2492201238239252e-07, "loss": 0.2869, "step": 20042 }, { "epoch": 0.9071283095723014, "grad_norm": 0.5805424742255618, "learning_rate": 2.2470471187070075e-07, "loss": 0.2799, "step": 20043 }, { "epoch": 0.907173568680697, "grad_norm": 0.5694199887498899, "learning_rate": 2.2448751396543788e-07, "loss": 0.2586, "step": 20044 }, { "epoch": 0.9072188277890926, "grad_norm": 0.5960203023496685, "learning_rate": 2.242704186712724e-07, "loss": 0.267, "step": 20045 }, { "epoch": 0.9072640868974882, "grad_norm": 0.6596114041630285, "learning_rate": 2.2405342599286672e-07, "loss": 0.2806, "step": 20046 }, { "epoch": 0.9073093460058836, "grad_norm": 0.27839170887263026, "learning_rate": 2.2383653593488596e-07, "loss": 0.479, "step": 20047 }, { "epoch": 0.9073546051142792, "grad_norm": 0.6263478186913513, "learning_rate": 2.2361974850198865e-07, "loss": 0.2961, "step": 20048 }, { "epoch": 0.9073998642226748, "grad_norm": 0.6984960638546034, "learning_rate": 2.234030636988338e-07, "loss": 0.2843, "step": 20049 }, { "epoch": 0.9074451233310704, "grad_norm": 0.6520341024668019, "learning_rate": 2.2318648153007605e-07, "loss": 0.3258, "step": 20050 }, { "epoch": 0.9074903824394659, "grad_norm": 0.6284751430203892, "learning_rate": 2.229700020003711e-07, "loss": 0.2776, "step": 20051 }, { "epoch": 0.9075356415478615, "grad_norm": 0.5311253177293819, "learning_rate": 2.2275362511436914e-07, "loss": 0.2703, "step": 20052 }, { "epoch": 0.9075809006562571, "grad_norm": 0.616319374180847, "learning_rate": 2.2253735087671867e-07, "loss": 0.2971, "step": 20053 }, { "epoch": 0.9076261597646527, "grad_norm": 0.26938372789301107, "learning_rate": 2.2232117929206764e-07, "loss": 0.4823, "step": 20054 }, { "epoch": 0.9076714188730483, "grad_norm": 0.6187431043930877, "learning_rate": 2.2210511036506232e-07, "loss": 0.3024, "step": 20055 }, { "epoch": 0.9077166779814437, "grad_norm": 0.6003295596575504, "learning_rate": 2.218891441003429e-07, "loss": 0.3081, "step": 20056 }, { "epoch": 0.9077619370898393, "grad_norm": 0.5985733209787534, "learning_rate": 2.2167328050255122e-07, "loss": 0.3059, "step": 20057 }, { "epoch": 0.9078071961982349, "grad_norm": 0.6669948310068601, "learning_rate": 2.2145751957632521e-07, "loss": 0.3032, "step": 20058 }, { "epoch": 0.9078524553066305, "grad_norm": 0.25838696674097744, "learning_rate": 2.2124186132630122e-07, "loss": 0.4851, "step": 20059 }, { "epoch": 0.907897714415026, "grad_norm": 0.5950360146604151, "learning_rate": 2.2102630575711215e-07, "loss": 0.2995, "step": 20060 }, { "epoch": 0.9079429735234216, "grad_norm": 0.6025156236670351, "learning_rate": 2.20810852873391e-07, "loss": 0.2496, "step": 20061 }, { "epoch": 0.9079882326318172, "grad_norm": 0.26372452664482027, "learning_rate": 2.2059550267976572e-07, "loss": 0.4513, "step": 20062 }, { "epoch": 0.9080334917402128, "grad_norm": 0.5920258230976766, "learning_rate": 2.2038025518086482e-07, "loss": 0.2796, "step": 20063 }, { "epoch": 0.9080787508486082, "grad_norm": 0.615303934129582, "learning_rate": 2.2016511038131238e-07, "loss": 0.3333, "step": 20064 }, { "epoch": 0.9081240099570038, "grad_norm": 0.6655320396579752, "learning_rate": 2.1995006828573194e-07, "loss": 0.2623, "step": 20065 }, { "epoch": 0.9081692690653994, "grad_norm": 0.2551258815085798, "learning_rate": 2.1973512889874316e-07, "loss": 0.4526, "step": 20066 }, { "epoch": 0.908214528173795, "grad_norm": 0.786400107775112, "learning_rate": 2.1952029222496562e-07, "loss": 0.3186, "step": 20067 }, { "epoch": 0.9082597872821906, "grad_norm": 0.5946120226496916, "learning_rate": 2.1930555826901513e-07, "loss": 0.2552, "step": 20068 }, { "epoch": 0.9083050463905861, "grad_norm": 0.6447590483502432, "learning_rate": 2.1909092703550406e-07, "loss": 0.3191, "step": 20069 }, { "epoch": 0.9083503054989817, "grad_norm": 0.25579389228282384, "learning_rate": 2.1887639852904653e-07, "loss": 0.4662, "step": 20070 }, { "epoch": 0.9083955646073772, "grad_norm": 0.6112753434468116, "learning_rate": 2.1866197275425106e-07, "loss": 0.2557, "step": 20071 }, { "epoch": 0.9084408237157728, "grad_norm": 0.674747862300124, "learning_rate": 2.1844764971572507e-07, "loss": 0.3018, "step": 20072 }, { "epoch": 0.9084860828241683, "grad_norm": 0.6150259270468362, "learning_rate": 2.1823342941807324e-07, "loss": 0.2986, "step": 20073 }, { "epoch": 0.9085313419325639, "grad_norm": 0.6258412983607646, "learning_rate": 2.1801931186589963e-07, "loss": 0.3338, "step": 20074 }, { "epoch": 0.9085766010409595, "grad_norm": 0.6052915432140621, "learning_rate": 2.1780529706380337e-07, "loss": 0.2614, "step": 20075 }, { "epoch": 0.9086218601493551, "grad_norm": 0.6119499527385366, "learning_rate": 2.1759138501638466e-07, "loss": 0.2688, "step": 20076 }, { "epoch": 0.9086671192577507, "grad_norm": 0.5978537982068777, "learning_rate": 2.1737757572823813e-07, "loss": 0.3054, "step": 20077 }, { "epoch": 0.9087123783661462, "grad_norm": 0.5522834789899027, "learning_rate": 2.1716386920396016e-07, "loss": 0.283, "step": 20078 }, { "epoch": 0.9087576374745417, "grad_norm": 0.5917621379636202, "learning_rate": 2.169502654481398e-07, "loss": 0.302, "step": 20079 }, { "epoch": 0.9088028965829373, "grad_norm": 0.27429627501030185, "learning_rate": 2.1673676446536952e-07, "loss": 0.4688, "step": 20080 }, { "epoch": 0.9088481556913329, "grad_norm": 0.6334485282257166, "learning_rate": 2.1652336626023506e-07, "loss": 0.3555, "step": 20081 }, { "epoch": 0.9088934147997284, "grad_norm": 0.6271218443000107, "learning_rate": 2.1631007083732169e-07, "loss": 0.2904, "step": 20082 }, { "epoch": 0.908938673908124, "grad_norm": 0.5528300230753579, "learning_rate": 2.1609687820121295e-07, "loss": 0.2757, "step": 20083 }, { "epoch": 0.9089839330165196, "grad_norm": 0.5809626317071711, "learning_rate": 2.158837883564907e-07, "loss": 0.2674, "step": 20084 }, { "epoch": 0.9090291921249152, "grad_norm": 0.5976098597063977, "learning_rate": 2.1567080130773188e-07, "loss": 0.2712, "step": 20085 }, { "epoch": 0.9090744512333107, "grad_norm": 0.5778406755545751, "learning_rate": 2.154579170595128e-07, "loss": 0.2683, "step": 20086 }, { "epoch": 0.9091197103417062, "grad_norm": 0.6191740308187985, "learning_rate": 2.152451356164098e-07, "loss": 0.2899, "step": 20087 }, { "epoch": 0.9091649694501018, "grad_norm": 0.6120327299909363, "learning_rate": 2.1503245698299312e-07, "loss": 0.266, "step": 20088 }, { "epoch": 0.9092102285584974, "grad_norm": 0.688995530064606, "learning_rate": 2.1481988116383246e-07, "loss": 0.2843, "step": 20089 }, { "epoch": 0.909255487666893, "grad_norm": 0.5403359844763675, "learning_rate": 2.146074081634969e-07, "loss": 0.282, "step": 20090 }, { "epoch": 0.9093007467752885, "grad_norm": 1.0998611164448522, "learning_rate": 2.1439503798655003e-07, "loss": 0.2768, "step": 20091 }, { "epoch": 0.9093460058836841, "grad_norm": 0.5736670905305177, "learning_rate": 2.1418277063755656e-07, "loss": 0.2578, "step": 20092 }, { "epoch": 0.9093912649920797, "grad_norm": 0.6303521103735157, "learning_rate": 2.139706061210761e-07, "loss": 0.272, "step": 20093 }, { "epoch": 0.9094365241004753, "grad_norm": 0.5708800033364091, "learning_rate": 2.13758544441669e-07, "loss": 0.2708, "step": 20094 }, { "epoch": 0.9094817832088707, "grad_norm": 0.6194234916597959, "learning_rate": 2.1354658560389042e-07, "loss": 0.2416, "step": 20095 }, { "epoch": 0.9095270423172663, "grad_norm": 0.5330122281441105, "learning_rate": 2.1333472961229563e-07, "loss": 0.2535, "step": 20096 }, { "epoch": 0.9095723014256619, "grad_norm": 0.2637332376720348, "learning_rate": 2.1312297647143653e-07, "loss": 0.4591, "step": 20097 }, { "epoch": 0.9096175605340575, "grad_norm": 0.625594032869571, "learning_rate": 2.129113261858623e-07, "loss": 0.2574, "step": 20098 }, { "epoch": 0.909662819642453, "grad_norm": 0.5724869592135517, "learning_rate": 2.1269977876012094e-07, "loss": 0.2765, "step": 20099 }, { "epoch": 0.9097080787508486, "grad_norm": 0.5638714706740632, "learning_rate": 2.1248833419875936e-07, "loss": 0.2833, "step": 20100 }, { "epoch": 0.9097533378592442, "grad_norm": 0.6085258497235556, "learning_rate": 2.122769925063195e-07, "loss": 0.3008, "step": 20101 }, { "epoch": 0.9097985969676398, "grad_norm": 0.5601390088119588, "learning_rate": 2.1206575368734216e-07, "loss": 0.2708, "step": 20102 }, { "epoch": 0.9098438560760354, "grad_norm": 0.6428953467497713, "learning_rate": 2.1185461774636705e-07, "loss": 0.3048, "step": 20103 }, { "epoch": 0.9098891151844308, "grad_norm": 0.2727656342620612, "learning_rate": 2.1164358468793055e-07, "loss": 0.4753, "step": 20104 }, { "epoch": 0.9099343742928264, "grad_norm": 0.6124735588652476, "learning_rate": 2.1143265451656736e-07, "loss": 0.3, "step": 20105 }, { "epoch": 0.909979633401222, "grad_norm": 0.647641665379744, "learning_rate": 2.1122182723680883e-07, "loss": 0.2805, "step": 20106 }, { "epoch": 0.9100248925096176, "grad_norm": 0.5980609552698322, "learning_rate": 2.1101110285318639e-07, "loss": 0.3268, "step": 20107 }, { "epoch": 0.9100701516180131, "grad_norm": 0.6609734334611863, "learning_rate": 2.108004813702258e-07, "loss": 0.2965, "step": 20108 }, { "epoch": 0.9101154107264087, "grad_norm": 0.5644962278927006, "learning_rate": 2.1058996279245515e-07, "loss": 0.2486, "step": 20109 }, { "epoch": 0.9101606698348043, "grad_norm": 0.2563080729027022, "learning_rate": 2.103795471243969e-07, "loss": 0.4734, "step": 20110 }, { "epoch": 0.9102059289431998, "grad_norm": 0.6255772447099949, "learning_rate": 2.101692343705708e-07, "loss": 0.2781, "step": 20111 }, { "epoch": 0.9102511880515954, "grad_norm": 0.6850588419635695, "learning_rate": 2.0995902453549766e-07, "loss": 0.2852, "step": 20112 }, { "epoch": 0.9102964471599909, "grad_norm": 0.9849127423646337, "learning_rate": 2.0974891762369386e-07, "loss": 0.3114, "step": 20113 }, { "epoch": 0.9103417062683865, "grad_norm": 0.6059976563244579, "learning_rate": 2.095389136396736e-07, "loss": 0.3264, "step": 20114 }, { "epoch": 0.9103869653767821, "grad_norm": 0.5625520832592269, "learning_rate": 2.093290125879488e-07, "loss": 0.2636, "step": 20115 }, { "epoch": 0.9104322244851777, "grad_norm": 0.6029349016535929, "learning_rate": 2.0911921447303086e-07, "loss": 0.3275, "step": 20116 }, { "epoch": 0.9104774835935732, "grad_norm": 0.6418283491471828, "learning_rate": 2.0890951929942671e-07, "loss": 0.2979, "step": 20117 }, { "epoch": 0.9105227427019688, "grad_norm": 0.5858464661181148, "learning_rate": 2.0869992707164166e-07, "loss": 0.3058, "step": 20118 }, { "epoch": 0.9105680018103643, "grad_norm": 0.5446063183284051, "learning_rate": 2.0849043779417987e-07, "loss": 0.253, "step": 20119 }, { "epoch": 0.9106132609187599, "grad_norm": 0.6664356187095676, "learning_rate": 2.0828105147154275e-07, "loss": 0.2791, "step": 20120 }, { "epoch": 0.9106585200271554, "grad_norm": 0.6085205672276123, "learning_rate": 2.0807176810823005e-07, "loss": 0.2497, "step": 20121 }, { "epoch": 0.910703779135551, "grad_norm": 0.2654758316672969, "learning_rate": 2.0786258770873647e-07, "loss": 0.4548, "step": 20122 }, { "epoch": 0.9107490382439466, "grad_norm": 0.6455911301462177, "learning_rate": 2.0765351027755897e-07, "loss": 0.2668, "step": 20123 }, { "epoch": 0.9107942973523422, "grad_norm": 0.6219158113543389, "learning_rate": 2.0744453581918843e-07, "loss": 0.3166, "step": 20124 }, { "epoch": 0.9108395564607378, "grad_norm": 0.7528225257884705, "learning_rate": 2.0723566433811572e-07, "loss": 0.2693, "step": 20125 }, { "epoch": 0.9108848155691333, "grad_norm": 0.6306450779415568, "learning_rate": 2.0702689583882883e-07, "loss": 0.3284, "step": 20126 }, { "epoch": 0.9109300746775288, "grad_norm": 0.5932455349455487, "learning_rate": 2.0681823032581316e-07, "loss": 0.3162, "step": 20127 }, { "epoch": 0.9109753337859244, "grad_norm": 0.6433329829199189, "learning_rate": 2.066096678035523e-07, "loss": 0.3029, "step": 20128 }, { "epoch": 0.91102059289432, "grad_norm": 0.6278373181227935, "learning_rate": 2.0640120827652876e-07, "loss": 0.3142, "step": 20129 }, { "epoch": 0.9110658520027155, "grad_norm": 0.5950706534219158, "learning_rate": 2.0619285174922067e-07, "loss": 0.2493, "step": 20130 }, { "epoch": 0.9111111111111111, "grad_norm": 0.6003694710464432, "learning_rate": 2.0598459822610494e-07, "loss": 0.2582, "step": 20131 }, { "epoch": 0.9111563702195067, "grad_norm": 0.6110120573969848, "learning_rate": 2.057764477116564e-07, "loss": 0.3004, "step": 20132 }, { "epoch": 0.9112016293279023, "grad_norm": 0.6446796955432598, "learning_rate": 2.0556840021034753e-07, "loss": 0.2936, "step": 20133 }, { "epoch": 0.9112468884362978, "grad_norm": 0.6100270371936517, "learning_rate": 2.053604557266492e-07, "loss": 0.3027, "step": 20134 }, { "epoch": 0.9112921475446933, "grad_norm": 0.6406473157774112, "learning_rate": 2.0515261426502897e-07, "loss": 0.2601, "step": 20135 }, { "epoch": 0.9113374066530889, "grad_norm": 0.4227689291899131, "learning_rate": 2.049448758299527e-07, "loss": 0.4509, "step": 20136 }, { "epoch": 0.9113826657614845, "grad_norm": 0.581610239211927, "learning_rate": 2.0473724042588405e-07, "loss": 0.254, "step": 20137 }, { "epoch": 0.9114279248698801, "grad_norm": 0.6115934720483611, "learning_rate": 2.0452970805728502e-07, "loss": 0.2672, "step": 20138 }, { "epoch": 0.9114731839782756, "grad_norm": 0.6571594489557938, "learning_rate": 2.0432227872861422e-07, "loss": 0.3153, "step": 20139 }, { "epoch": 0.9115184430866712, "grad_norm": 0.5950439047019607, "learning_rate": 2.041149524443281e-07, "loss": 0.2624, "step": 20140 }, { "epoch": 0.9115637021950668, "grad_norm": 1.4537733982204537, "learning_rate": 2.0390772920888258e-07, "loss": 0.3267, "step": 20141 }, { "epoch": 0.9116089613034624, "grad_norm": 0.7336297650765733, "learning_rate": 2.0370060902673074e-07, "loss": 0.2958, "step": 20142 }, { "epoch": 0.9116542204118578, "grad_norm": 0.6293224370546653, "learning_rate": 2.0349359190232176e-07, "loss": 0.3183, "step": 20143 }, { "epoch": 0.9116994795202534, "grad_norm": 0.6208929072969963, "learning_rate": 2.0328667784010324e-07, "loss": 0.2704, "step": 20144 }, { "epoch": 0.911744738628649, "grad_norm": 0.6356988807823192, "learning_rate": 2.030798668445233e-07, "loss": 0.2778, "step": 20145 }, { "epoch": 0.9117899977370446, "grad_norm": 0.632110705142832, "learning_rate": 2.0287315892002335e-07, "loss": 0.2838, "step": 20146 }, { "epoch": 0.9118352568454401, "grad_norm": 0.6456962160612171, "learning_rate": 2.0266655407104652e-07, "loss": 0.3172, "step": 20147 }, { "epoch": 0.9118805159538357, "grad_norm": 0.6471312561425153, "learning_rate": 2.024600523020309e-07, "loss": 0.2721, "step": 20148 }, { "epoch": 0.9119257750622313, "grad_norm": 0.6297922945477495, "learning_rate": 2.0225365361741522e-07, "loss": 0.3046, "step": 20149 }, { "epoch": 0.9119710341706269, "grad_norm": 0.6130515227525498, "learning_rate": 2.0204735802163254e-07, "loss": 0.2654, "step": 20150 }, { "epoch": 0.9120162932790224, "grad_norm": 0.5487820024424245, "learning_rate": 2.0184116551911714e-07, "loss": 0.282, "step": 20151 }, { "epoch": 0.9120615523874179, "grad_norm": 0.599754947660118, "learning_rate": 2.0163507611429823e-07, "loss": 0.3083, "step": 20152 }, { "epoch": 0.9121068114958135, "grad_norm": 0.6336611363559453, "learning_rate": 2.0142908981160447e-07, "loss": 0.3301, "step": 20153 }, { "epoch": 0.9121520706042091, "grad_norm": 0.6186858420131777, "learning_rate": 2.012232066154618e-07, "loss": 0.3038, "step": 20154 }, { "epoch": 0.9121973297126047, "grad_norm": 0.6368384635497683, "learning_rate": 2.01017426530295e-07, "loss": 0.2921, "step": 20155 }, { "epoch": 0.9122425888210002, "grad_norm": 0.7317290019886159, "learning_rate": 2.0081174956052329e-07, "loss": 0.3185, "step": 20156 }, { "epoch": 0.9122878479293958, "grad_norm": 0.26234659961321066, "learning_rate": 2.0060617571056817e-07, "loss": 0.4373, "step": 20157 }, { "epoch": 0.9123331070377914, "grad_norm": 0.7022199549928246, "learning_rate": 2.004007049848461e-07, "loss": 0.3736, "step": 20158 }, { "epoch": 0.912378366146187, "grad_norm": 0.24999613707843227, "learning_rate": 2.001953373877724e-07, "loss": 0.4316, "step": 20159 }, { "epoch": 0.9124236252545825, "grad_norm": 1.258637722295874, "learning_rate": 1.999900729237586e-07, "loss": 0.2693, "step": 20160 }, { "epoch": 0.912468884362978, "grad_norm": 0.5582673961022042, "learning_rate": 1.9978491159721724e-07, "loss": 0.287, "step": 20161 }, { "epoch": 0.9125141434713736, "grad_norm": 0.5574906899089822, "learning_rate": 1.9957985341255427e-07, "loss": 0.2933, "step": 20162 }, { "epoch": 0.9125594025797692, "grad_norm": 0.6276203254950994, "learning_rate": 1.9937489837417723e-07, "loss": 0.2954, "step": 20163 }, { "epoch": 0.9126046616881648, "grad_norm": 0.25034283765087206, "learning_rate": 1.991700464864893e-07, "loss": 0.4516, "step": 20164 }, { "epoch": 0.9126499207965603, "grad_norm": 0.6101342253794745, "learning_rate": 1.9896529775389363e-07, "loss": 0.3309, "step": 20165 }, { "epoch": 0.9126951799049559, "grad_norm": 0.5735698779122026, "learning_rate": 1.9876065218078722e-07, "loss": 0.3228, "step": 20166 }, { "epoch": 0.9127404390133514, "grad_norm": 0.9948355697347983, "learning_rate": 1.9855610977156882e-07, "loss": 0.298, "step": 20167 }, { "epoch": 0.912785698121747, "grad_norm": 0.6163256323172275, "learning_rate": 1.9835167053063376e-07, "loss": 0.2965, "step": 20168 }, { "epoch": 0.9128309572301425, "grad_norm": 0.29371825179107114, "learning_rate": 1.9814733446237356e-07, "loss": 0.4638, "step": 20169 }, { "epoch": 0.9128762163385381, "grad_norm": 0.70807913278701, "learning_rate": 1.9794310157117913e-07, "loss": 0.3371, "step": 20170 }, { "epoch": 0.9129214754469337, "grad_norm": 0.6712528365040975, "learning_rate": 1.977389718614392e-07, "loss": 0.3038, "step": 20171 }, { "epoch": 0.9129667345553293, "grad_norm": 0.6521966163230466, "learning_rate": 1.9753494533754026e-07, "loss": 0.3169, "step": 20172 }, { "epoch": 0.9130119936637249, "grad_norm": 0.260499083334948, "learning_rate": 1.9733102200386544e-07, "loss": 0.4618, "step": 20173 }, { "epoch": 0.9130572527721204, "grad_norm": 0.2448547025157443, "learning_rate": 1.9712720186479685e-07, "loss": 0.4476, "step": 20174 }, { "epoch": 0.9131025118805159, "grad_norm": 0.6350681819680561, "learning_rate": 1.9692348492471313e-07, "loss": 0.2788, "step": 20175 }, { "epoch": 0.9131477709889115, "grad_norm": 0.6428461694612434, "learning_rate": 1.9671987118799307e-07, "loss": 0.3113, "step": 20176 }, { "epoch": 0.9131930300973071, "grad_norm": 0.2592207557142955, "learning_rate": 1.965163606590098e-07, "loss": 0.4812, "step": 20177 }, { "epoch": 0.9132382892057026, "grad_norm": 0.6141639243091234, "learning_rate": 1.963129533421382e-07, "loss": 0.3166, "step": 20178 }, { "epoch": 0.9132835483140982, "grad_norm": 0.2552167500263459, "learning_rate": 1.961096492417469e-07, "loss": 0.4558, "step": 20179 }, { "epoch": 0.9133288074224938, "grad_norm": 0.24585128292836528, "learning_rate": 1.9590644836220584e-07, "loss": 0.4447, "step": 20180 }, { "epoch": 0.9133740665308894, "grad_norm": 0.6693985643334417, "learning_rate": 1.9570335070788093e-07, "loss": 0.337, "step": 20181 }, { "epoch": 0.9134193256392849, "grad_norm": 0.566802657436706, "learning_rate": 1.9550035628313478e-07, "loss": 0.3002, "step": 20182 }, { "epoch": 0.9134645847476804, "grad_norm": 0.24719745682119912, "learning_rate": 1.9529746509233006e-07, "loss": 0.4712, "step": 20183 }, { "epoch": 0.913509843856076, "grad_norm": 0.5804125523677844, "learning_rate": 1.950946771398282e-07, "loss": 0.2691, "step": 20184 }, { "epoch": 0.9135551029644716, "grad_norm": 0.5569565564874374, "learning_rate": 1.9489199242998248e-07, "loss": 0.2865, "step": 20185 }, { "epoch": 0.9136003620728672, "grad_norm": 0.5907424873541962, "learning_rate": 1.9468941096715043e-07, "loss": 0.3374, "step": 20186 }, { "epoch": 0.9136456211812627, "grad_norm": 1.7032824969349205, "learning_rate": 1.9448693275568532e-07, "loss": 0.2975, "step": 20187 }, { "epoch": 0.9136908802896583, "grad_norm": 0.8489322917680525, "learning_rate": 1.9428455779993694e-07, "loss": 0.3074, "step": 20188 }, { "epoch": 0.9137361393980539, "grad_norm": 0.6903877179464709, "learning_rate": 1.9408228610425296e-07, "loss": 0.2862, "step": 20189 }, { "epoch": 0.9137813985064495, "grad_norm": 0.652244246341138, "learning_rate": 1.9388011767298042e-07, "loss": 0.292, "step": 20190 }, { "epoch": 0.9138266576148449, "grad_norm": 0.597657405511287, "learning_rate": 1.9367805251046422e-07, "loss": 0.2418, "step": 20191 }, { "epoch": 0.9138719167232405, "grad_norm": 0.6294900696669814, "learning_rate": 1.9347609062104478e-07, "loss": 0.3036, "step": 20192 }, { "epoch": 0.9139171758316361, "grad_norm": 0.7343966627058698, "learning_rate": 1.932742320090619e-07, "loss": 0.2796, "step": 20193 }, { "epoch": 0.9139624349400317, "grad_norm": 0.2675377517537364, "learning_rate": 1.9307247667885331e-07, "loss": 0.4451, "step": 20194 }, { "epoch": 0.9140076940484273, "grad_norm": 0.6294374440983784, "learning_rate": 1.9287082463475326e-07, "loss": 0.2973, "step": 20195 }, { "epoch": 0.9140529531568228, "grad_norm": 0.2735152794246382, "learning_rate": 1.926692758810955e-07, "loss": 0.4829, "step": 20196 }, { "epoch": 0.9140982122652184, "grad_norm": 1.029793456721243, "learning_rate": 1.9246783042221106e-07, "loss": 0.2955, "step": 20197 }, { "epoch": 0.914143471373614, "grad_norm": 0.5930683611980938, "learning_rate": 1.9226648826242699e-07, "loss": 0.2777, "step": 20198 }, { "epoch": 0.9141887304820095, "grad_norm": 0.2621875181458503, "learning_rate": 1.9206524940606984e-07, "loss": 0.4717, "step": 20199 }, { "epoch": 0.914233989590405, "grad_norm": 0.27766580160689125, "learning_rate": 1.9186411385746507e-07, "loss": 0.469, "step": 20200 }, { "epoch": 0.9142792486988006, "grad_norm": 0.6663637678544063, "learning_rate": 1.9166308162093306e-07, "loss": 0.3206, "step": 20201 }, { "epoch": 0.9143245078071962, "grad_norm": 0.5736598664573532, "learning_rate": 1.914621527007937e-07, "loss": 0.2368, "step": 20202 }, { "epoch": 0.9143697669155918, "grad_norm": 0.5907927822480857, "learning_rate": 1.912613271013647e-07, "loss": 0.3147, "step": 20203 }, { "epoch": 0.9144150260239873, "grad_norm": 0.6438013056377528, "learning_rate": 1.9106060482695976e-07, "loss": 0.296, "step": 20204 }, { "epoch": 0.9144602851323829, "grad_norm": 0.6053099453746283, "learning_rate": 1.9085998588189436e-07, "loss": 0.2871, "step": 20205 }, { "epoch": 0.9145055442407785, "grad_norm": 0.622434748725618, "learning_rate": 1.906594702704767e-07, "loss": 0.3364, "step": 20206 }, { "epoch": 0.914550803349174, "grad_norm": 0.6125138418175076, "learning_rate": 1.904590579970167e-07, "loss": 0.2924, "step": 20207 }, { "epoch": 0.9145960624575696, "grad_norm": 0.6837576033495794, "learning_rate": 1.9025874906581975e-07, "loss": 0.3192, "step": 20208 }, { "epoch": 0.9146413215659651, "grad_norm": 0.5913973359919333, "learning_rate": 1.900585434811908e-07, "loss": 0.287, "step": 20209 }, { "epoch": 0.9146865806743607, "grad_norm": 0.7310883652397158, "learning_rate": 1.8985844124743136e-07, "loss": 0.2943, "step": 20210 }, { "epoch": 0.9147318397827563, "grad_norm": 0.620939923788956, "learning_rate": 1.8965844236883968e-07, "loss": 0.3022, "step": 20211 }, { "epoch": 0.9147770988911519, "grad_norm": 0.5870684472252491, "learning_rate": 1.894585468497151e-07, "loss": 0.267, "step": 20212 }, { "epoch": 0.9148223579995474, "grad_norm": 0.579162736305499, "learning_rate": 1.892587546943525e-07, "loss": 0.289, "step": 20213 }, { "epoch": 0.914867617107943, "grad_norm": 0.61310203729002, "learning_rate": 1.8905906590704293e-07, "loss": 0.3629, "step": 20214 }, { "epoch": 0.9149128762163385, "grad_norm": 0.6277058044001794, "learning_rate": 1.8885948049207847e-07, "loss": 0.2849, "step": 20215 }, { "epoch": 0.9149581353247341, "grad_norm": 0.8194378360950997, "learning_rate": 1.8865999845374794e-07, "loss": 0.323, "step": 20216 }, { "epoch": 0.9150033944331296, "grad_norm": 0.2803414338616721, "learning_rate": 1.8846061979633734e-07, "loss": 0.4627, "step": 20217 }, { "epoch": 0.9150486535415252, "grad_norm": 0.6307224297322953, "learning_rate": 1.8826134452412993e-07, "loss": 0.3527, "step": 20218 }, { "epoch": 0.9150939126499208, "grad_norm": 0.5779802801482953, "learning_rate": 1.8806217264140836e-07, "loss": 0.3024, "step": 20219 }, { "epoch": 0.9151391717583164, "grad_norm": 0.5418445470374095, "learning_rate": 1.87863104152452e-07, "loss": 0.2627, "step": 20220 }, { "epoch": 0.915184430866712, "grad_norm": 0.5964870628673384, "learning_rate": 1.8766413906153856e-07, "loss": 0.3207, "step": 20221 }, { "epoch": 0.9152296899751075, "grad_norm": 0.6014968870783488, "learning_rate": 1.874652773729424e-07, "loss": 0.2746, "step": 20222 }, { "epoch": 0.915274949083503, "grad_norm": 0.2805350431099635, "learning_rate": 1.8726651909093675e-07, "loss": 0.4705, "step": 20223 }, { "epoch": 0.9153202081918986, "grad_norm": 0.6307605153708684, "learning_rate": 1.870678642197926e-07, "loss": 0.3217, "step": 20224 }, { "epoch": 0.9153654673002942, "grad_norm": 0.5446748202273499, "learning_rate": 1.868693127637783e-07, "loss": 0.2803, "step": 20225 }, { "epoch": 0.9154107264086897, "grad_norm": 0.5966799858674203, "learning_rate": 1.8667086472716034e-07, "loss": 0.3142, "step": 20226 }, { "epoch": 0.9154559855170853, "grad_norm": 0.5956319525704, "learning_rate": 1.8647252011420202e-07, "loss": 0.2786, "step": 20227 }, { "epoch": 0.9155012446254809, "grad_norm": 0.5717016703576566, "learning_rate": 1.8627427892916493e-07, "loss": 0.2844, "step": 20228 }, { "epoch": 0.9155465037338765, "grad_norm": 0.670300461314522, "learning_rate": 1.860761411763107e-07, "loss": 0.2898, "step": 20229 }, { "epoch": 0.9155917628422721, "grad_norm": 0.6314533304522649, "learning_rate": 1.8587810685989528e-07, "loss": 0.2834, "step": 20230 }, { "epoch": 0.9156370219506675, "grad_norm": 0.6125017548125974, "learning_rate": 1.856801759841731e-07, "loss": 0.2744, "step": 20231 }, { "epoch": 0.9156822810590631, "grad_norm": 0.5840046732133334, "learning_rate": 1.8548234855339798e-07, "loss": 0.2708, "step": 20232 }, { "epoch": 0.9157275401674587, "grad_norm": 0.25643681040427324, "learning_rate": 1.8528462457182095e-07, "loss": 0.4598, "step": 20233 }, { "epoch": 0.9157727992758543, "grad_norm": 0.6206408904388228, "learning_rate": 1.8508700404368973e-07, "loss": 0.2768, "step": 20234 }, { "epoch": 0.9158180583842498, "grad_norm": 0.6378085195757364, "learning_rate": 1.8488948697325094e-07, "loss": 0.3203, "step": 20235 }, { "epoch": 0.9158633174926454, "grad_norm": 0.5901667715023341, "learning_rate": 1.8469207336474893e-07, "loss": 0.2854, "step": 20236 }, { "epoch": 0.915908576601041, "grad_norm": 0.6633365915851603, "learning_rate": 1.8449476322242476e-07, "loss": 0.3057, "step": 20237 }, { "epoch": 0.9159538357094366, "grad_norm": 0.2721618197466199, "learning_rate": 1.8429755655051896e-07, "loss": 0.4787, "step": 20238 }, { "epoch": 0.915999094817832, "grad_norm": 0.5846259701182656, "learning_rate": 1.841004533532681e-07, "loss": 0.2677, "step": 20239 }, { "epoch": 0.9160443539262276, "grad_norm": 0.586431225367831, "learning_rate": 1.8390345363490713e-07, "loss": 0.2987, "step": 20240 }, { "epoch": 0.9160896130346232, "grad_norm": 0.5920702643710308, "learning_rate": 1.8370655739966937e-07, "loss": 0.3144, "step": 20241 }, { "epoch": 0.9161348721430188, "grad_norm": 0.5928483811429326, "learning_rate": 1.8350976465178693e-07, "loss": 0.2833, "step": 20242 }, { "epoch": 0.9161801312514144, "grad_norm": 0.6568539880090193, "learning_rate": 1.8331307539548593e-07, "loss": 0.2931, "step": 20243 }, { "epoch": 0.9162253903598099, "grad_norm": 0.6174884944900461, "learning_rate": 1.831164896349935e-07, "loss": 0.2672, "step": 20244 }, { "epoch": 0.9162706494682055, "grad_norm": 0.6719921439413501, "learning_rate": 1.829200073745341e-07, "loss": 0.2718, "step": 20245 }, { "epoch": 0.9163159085766011, "grad_norm": 0.2804039360211197, "learning_rate": 1.8272362861832925e-07, "loss": 0.4778, "step": 20246 }, { "epoch": 0.9163611676849966, "grad_norm": 0.24948144674247946, "learning_rate": 1.825273533705979e-07, "loss": 0.4582, "step": 20247 }, { "epoch": 0.9164064267933921, "grad_norm": 0.7611009625456361, "learning_rate": 1.823311816355583e-07, "loss": 0.2726, "step": 20248 }, { "epoch": 0.9164516859017877, "grad_norm": 0.5962263712466588, "learning_rate": 1.8213511341742596e-07, "loss": 0.2935, "step": 20249 }, { "epoch": 0.9164969450101833, "grad_norm": 0.2622130822164122, "learning_rate": 1.819391487204125e-07, "loss": 0.479, "step": 20250 }, { "epoch": 0.9165422041185789, "grad_norm": 0.5557890604995867, "learning_rate": 1.8174328754872906e-07, "loss": 0.2505, "step": 20251 }, { "epoch": 0.9165874632269744, "grad_norm": 0.5915074391315309, "learning_rate": 1.815475299065844e-07, "loss": 0.254, "step": 20252 }, { "epoch": 0.91663272233537, "grad_norm": 0.2590629272394503, "learning_rate": 1.8135187579818415e-07, "loss": 0.4755, "step": 20253 }, { "epoch": 0.9166779814437656, "grad_norm": 0.554163045102261, "learning_rate": 1.8115632522773375e-07, "loss": 0.2421, "step": 20254 }, { "epoch": 0.9167232405521611, "grad_norm": 0.5629796866398182, "learning_rate": 1.8096087819943376e-07, "loss": 0.2769, "step": 20255 }, { "epoch": 0.9167684996605567, "grad_norm": 0.6647392026898864, "learning_rate": 1.8076553471748304e-07, "loss": 0.2869, "step": 20256 }, { "epoch": 0.9168137587689522, "grad_norm": 0.5923359751380025, "learning_rate": 1.805702947860799e-07, "loss": 0.2561, "step": 20257 }, { "epoch": 0.9168590178773478, "grad_norm": 0.2732673759257525, "learning_rate": 1.8037515840942043e-07, "loss": 0.4584, "step": 20258 }, { "epoch": 0.9169042769857434, "grad_norm": 0.5901968281956451, "learning_rate": 1.8018012559169573e-07, "loss": 0.2926, "step": 20259 }, { "epoch": 0.916949536094139, "grad_norm": 0.694288842386728, "learning_rate": 1.7998519633709688e-07, "loss": 0.3034, "step": 20260 }, { "epoch": 0.9169947952025345, "grad_norm": 0.581170916707714, "learning_rate": 1.7979037064981275e-07, "loss": 0.3262, "step": 20261 }, { "epoch": 0.91704005431093, "grad_norm": 0.24995335589526108, "learning_rate": 1.7959564853403e-07, "loss": 0.4322, "step": 20262 }, { "epoch": 0.9170853134193256, "grad_norm": 0.6144727338350052, "learning_rate": 1.7940102999393194e-07, "loss": 0.2856, "step": 20263 }, { "epoch": 0.9171305725277212, "grad_norm": 0.25951800013520543, "learning_rate": 1.7920651503370022e-07, "loss": 0.4764, "step": 20264 }, { "epoch": 0.9171758316361168, "grad_norm": 0.2825775327652799, "learning_rate": 1.7901210365751488e-07, "loss": 0.4756, "step": 20265 }, { "epoch": 0.9172210907445123, "grad_norm": 0.5810418071924581, "learning_rate": 1.7881779586955196e-07, "loss": 0.2479, "step": 20266 }, { "epoch": 0.9172663498529079, "grad_norm": 0.6689716280098308, "learning_rate": 1.7862359167398814e-07, "loss": 0.2806, "step": 20267 }, { "epoch": 0.9173116089613035, "grad_norm": 0.6429654330586431, "learning_rate": 1.784294910749962e-07, "loss": 0.2845, "step": 20268 }, { "epoch": 0.9173568680696991, "grad_norm": 0.6171635547788606, "learning_rate": 1.78235494076745e-07, "loss": 0.2493, "step": 20269 }, { "epoch": 0.9174021271780946, "grad_norm": 0.6655164567554036, "learning_rate": 1.7804160068340403e-07, "loss": 0.3252, "step": 20270 }, { "epoch": 0.9174473862864901, "grad_norm": 0.5822090955235841, "learning_rate": 1.7784781089914106e-07, "loss": 0.2945, "step": 20271 }, { "epoch": 0.9174926453948857, "grad_norm": 0.24552934203250296, "learning_rate": 1.776541247281177e-07, "loss": 0.456, "step": 20272 }, { "epoch": 0.9175379045032813, "grad_norm": 0.2575428016919689, "learning_rate": 1.774605421744957e-07, "loss": 0.4819, "step": 20273 }, { "epoch": 0.9175831636116768, "grad_norm": 0.5968669926945249, "learning_rate": 1.7726706324243614e-07, "loss": 0.3044, "step": 20274 }, { "epoch": 0.9176284227200724, "grad_norm": 0.679367312200526, "learning_rate": 1.770736879360957e-07, "loss": 0.3052, "step": 20275 }, { "epoch": 0.917673681828468, "grad_norm": 0.2811066629242291, "learning_rate": 1.7688041625962881e-07, "loss": 0.4838, "step": 20276 }, { "epoch": 0.9177189409368636, "grad_norm": 0.5880107542504787, "learning_rate": 1.766872482171883e-07, "loss": 0.2946, "step": 20277 }, { "epoch": 0.9177642000452592, "grad_norm": 0.6804630857887852, "learning_rate": 1.7649418381292584e-07, "loss": 0.2766, "step": 20278 }, { "epoch": 0.9178094591536546, "grad_norm": 0.7164473666448703, "learning_rate": 1.7630122305098919e-07, "loss": 0.2822, "step": 20279 }, { "epoch": 0.9178547182620502, "grad_norm": 0.5634355600166236, "learning_rate": 1.7610836593552394e-07, "loss": 0.2706, "step": 20280 }, { "epoch": 0.9178999773704458, "grad_norm": 0.6060948444999675, "learning_rate": 1.7591561247067513e-07, "loss": 0.2619, "step": 20281 }, { "epoch": 0.9179452364788414, "grad_norm": 0.6906994401131011, "learning_rate": 1.7572296266058274e-07, "loss": 0.2884, "step": 20282 }, { "epoch": 0.9179904955872369, "grad_norm": 0.3472640325425808, "learning_rate": 1.7553041650938797e-07, "loss": 0.4929, "step": 20283 }, { "epoch": 0.9180357546956325, "grad_norm": 0.2600903816186952, "learning_rate": 1.7533797402122743e-07, "loss": 0.4658, "step": 20284 }, { "epoch": 0.9180810138040281, "grad_norm": 0.28327782277311986, "learning_rate": 1.7514563520023565e-07, "loss": 0.4415, "step": 20285 }, { "epoch": 0.9181262729124237, "grad_norm": 0.26357230226310807, "learning_rate": 1.749534000505454e-07, "loss": 0.4762, "step": 20286 }, { "epoch": 0.9181715320208191, "grad_norm": 0.6251890113242208, "learning_rate": 1.747612685762884e-07, "loss": 0.2731, "step": 20287 }, { "epoch": 0.9182167911292147, "grad_norm": 0.5775415680510588, "learning_rate": 1.7456924078159187e-07, "loss": 0.2898, "step": 20288 }, { "epoch": 0.9182620502376103, "grad_norm": 0.5923941246023781, "learning_rate": 1.7437731667058143e-07, "loss": 0.2839, "step": 20289 }, { "epoch": 0.9183073093460059, "grad_norm": 0.8476013350525512, "learning_rate": 1.7418549624738213e-07, "loss": 0.3324, "step": 20290 }, { "epoch": 0.9183525684544015, "grad_norm": 0.6336776573796626, "learning_rate": 1.7399377951611563e-07, "loss": 0.2754, "step": 20291 }, { "epoch": 0.918397827562797, "grad_norm": 0.6547617577821889, "learning_rate": 1.7380216648090087e-07, "loss": 0.3239, "step": 20292 }, { "epoch": 0.9184430866711926, "grad_norm": 0.5762644864125279, "learning_rate": 1.7361065714585458e-07, "loss": 0.2698, "step": 20293 }, { "epoch": 0.9184883457795882, "grad_norm": 0.26382957927102185, "learning_rate": 1.734192515150923e-07, "loss": 0.4401, "step": 20294 }, { "epoch": 0.9185336048879837, "grad_norm": 0.6306528339851688, "learning_rate": 1.732279495927264e-07, "loss": 0.3416, "step": 20295 }, { "epoch": 0.9185788639963792, "grad_norm": 0.587214543222049, "learning_rate": 1.730367513828679e-07, "loss": 0.2814, "step": 20296 }, { "epoch": 0.9186241231047748, "grad_norm": 0.5364668170202281, "learning_rate": 1.7284565688962474e-07, "loss": 0.2797, "step": 20297 }, { "epoch": 0.9186693822131704, "grad_norm": 0.5879666024093826, "learning_rate": 1.7265466611710248e-07, "loss": 0.2945, "step": 20298 }, { "epoch": 0.918714641321566, "grad_norm": 0.7149839138267361, "learning_rate": 1.7246377906940503e-07, "loss": 0.3117, "step": 20299 }, { "epoch": 0.9187599004299616, "grad_norm": 0.27485696126433967, "learning_rate": 1.7227299575063528e-07, "loss": 0.474, "step": 20300 }, { "epoch": 0.9188051595383571, "grad_norm": 0.6842081565564186, "learning_rate": 1.7208231616489156e-07, "loss": 0.2535, "step": 20301 }, { "epoch": 0.9188504186467527, "grad_norm": 0.9067952840010864, "learning_rate": 1.7189174031627064e-07, "loss": 0.3039, "step": 20302 }, { "epoch": 0.9188956777551482, "grad_norm": 0.6247776678453516, "learning_rate": 1.7170126820886755e-07, "loss": 0.2834, "step": 20303 }, { "epoch": 0.9189409368635438, "grad_norm": 0.6025353886341012, "learning_rate": 1.7151089984677684e-07, "loss": 0.265, "step": 20304 }, { "epoch": 0.9189861959719393, "grad_norm": 0.2846579159301575, "learning_rate": 1.713206352340857e-07, "loss": 0.4568, "step": 20305 }, { "epoch": 0.9190314550803349, "grad_norm": 0.6058765647167526, "learning_rate": 1.7113047437488373e-07, "loss": 0.267, "step": 20306 }, { "epoch": 0.9190767141887305, "grad_norm": 0.6440807707619911, "learning_rate": 1.7094041727325817e-07, "loss": 0.2987, "step": 20307 }, { "epoch": 0.9191219732971261, "grad_norm": 0.6087728665863835, "learning_rate": 1.7075046393329132e-07, "loss": 0.2649, "step": 20308 }, { "epoch": 0.9191672324055216, "grad_norm": 0.6629216048938273, "learning_rate": 1.705606143590649e-07, "loss": 0.3285, "step": 20309 }, { "epoch": 0.9192124915139172, "grad_norm": 0.6432855564190427, "learning_rate": 1.7037086855465902e-07, "loss": 0.2972, "step": 20310 }, { "epoch": 0.9192577506223127, "grad_norm": 0.5504870130107095, "learning_rate": 1.7018122652414926e-07, "loss": 0.3088, "step": 20311 }, { "epoch": 0.9193030097307083, "grad_norm": 0.6063378414760505, "learning_rate": 1.6999168827161182e-07, "loss": 0.2958, "step": 20312 }, { "epoch": 0.9193482688391039, "grad_norm": 0.5759916244656855, "learning_rate": 1.6980225380111904e-07, "loss": 0.2869, "step": 20313 }, { "epoch": 0.9193935279474994, "grad_norm": 0.591904167150174, "learning_rate": 1.6961292311674037e-07, "loss": 0.2959, "step": 20314 }, { "epoch": 0.919438787055895, "grad_norm": 0.6214481185267032, "learning_rate": 1.6942369622254428e-07, "loss": 0.2788, "step": 20315 }, { "epoch": 0.9194840461642906, "grad_norm": 0.2848607056517523, "learning_rate": 1.692345731225975e-07, "loss": 0.4749, "step": 20316 }, { "epoch": 0.9195293052726862, "grad_norm": 0.25736908411097426, "learning_rate": 1.6904555382096343e-07, "loss": 0.4568, "step": 20317 }, { "epoch": 0.9195745643810816, "grad_norm": 0.6279904182505865, "learning_rate": 1.6885663832170274e-07, "loss": 0.2423, "step": 20318 }, { "epoch": 0.9196198234894772, "grad_norm": 0.5400609614569958, "learning_rate": 1.686678266288755e-07, "loss": 0.2607, "step": 20319 }, { "epoch": 0.9196650825978728, "grad_norm": 0.5738164996555193, "learning_rate": 1.6847911874653843e-07, "loss": 0.2651, "step": 20320 }, { "epoch": 0.9197103417062684, "grad_norm": 0.6456818098102596, "learning_rate": 1.6829051467874613e-07, "loss": 0.2998, "step": 20321 }, { "epoch": 0.9197556008146639, "grad_norm": 0.6348852708401035, "learning_rate": 1.6810201442955087e-07, "loss": 0.3245, "step": 20322 }, { "epoch": 0.9198008599230595, "grad_norm": 0.6234987326189086, "learning_rate": 1.6791361800300386e-07, "loss": 0.2987, "step": 20323 }, { "epoch": 0.9198461190314551, "grad_norm": 0.46159750163370644, "learning_rate": 1.6772532540315188e-07, "loss": 0.4803, "step": 20324 }, { "epoch": 0.9198913781398507, "grad_norm": 0.27836394932852804, "learning_rate": 1.6753713663404224e-07, "loss": 0.472, "step": 20325 }, { "epoch": 0.9199366372482463, "grad_norm": 0.6227338739189778, "learning_rate": 1.6734905169971782e-07, "loss": 0.2471, "step": 20326 }, { "epoch": 0.9199818963566417, "grad_norm": 0.6299923273196348, "learning_rate": 1.671610706042187e-07, "loss": 0.2589, "step": 20327 }, { "epoch": 0.9200271554650373, "grad_norm": 0.5676574324783089, "learning_rate": 1.6697319335158613e-07, "loss": 0.2706, "step": 20328 }, { "epoch": 0.9200724145734329, "grad_norm": 0.6999647644794571, "learning_rate": 1.6678541994585629e-07, "loss": 0.2926, "step": 20329 }, { "epoch": 0.9201176736818285, "grad_norm": 0.643394225929882, "learning_rate": 1.665977503910632e-07, "loss": 0.3487, "step": 20330 }, { "epoch": 0.920162932790224, "grad_norm": 0.5346259476521655, "learning_rate": 1.664101846912397e-07, "loss": 0.2489, "step": 20331 }, { "epoch": 0.9202081918986196, "grad_norm": 0.625222026956306, "learning_rate": 1.6622272285041652e-07, "loss": 0.2456, "step": 20332 }, { "epoch": 0.9202534510070152, "grad_norm": 0.7322590324640349, "learning_rate": 1.6603536487262095e-07, "loss": 0.2566, "step": 20333 }, { "epoch": 0.9202987101154108, "grad_norm": 0.5569926068928689, "learning_rate": 1.658481107618798e-07, "loss": 0.2397, "step": 20334 }, { "epoch": 0.9203439692238063, "grad_norm": 0.6147399641282735, "learning_rate": 1.6566096052221482e-07, "loss": 0.2932, "step": 20335 }, { "epoch": 0.9203892283322018, "grad_norm": 0.639177648247498, "learning_rate": 1.6547391415764836e-07, "loss": 0.3123, "step": 20336 }, { "epoch": 0.9204344874405974, "grad_norm": 0.6068624796114247, "learning_rate": 1.652869716722e-07, "loss": 0.3208, "step": 20337 }, { "epoch": 0.920479746548993, "grad_norm": 0.762520313233785, "learning_rate": 1.6510013306988538e-07, "loss": 0.3149, "step": 20338 }, { "epoch": 0.9205250056573886, "grad_norm": 0.29952288591201587, "learning_rate": 1.6491339835471964e-07, "loss": 0.474, "step": 20339 }, { "epoch": 0.9205702647657841, "grad_norm": 0.6158717020716946, "learning_rate": 1.6472676753071516e-07, "loss": 0.2757, "step": 20340 }, { "epoch": 0.9206155238741797, "grad_norm": 0.610806311783428, "learning_rate": 1.6454024060188257e-07, "loss": 0.3228, "step": 20341 }, { "epoch": 0.9206607829825753, "grad_norm": 0.6269652583233704, "learning_rate": 1.6435381757222869e-07, "loss": 0.3003, "step": 20342 }, { "epoch": 0.9207060420909708, "grad_norm": 0.6759177811930258, "learning_rate": 1.6416749844575974e-07, "loss": 0.2882, "step": 20343 }, { "epoch": 0.9207513011993663, "grad_norm": 0.631666234340163, "learning_rate": 1.6398128322647865e-07, "loss": 0.2816, "step": 20344 }, { "epoch": 0.9207965603077619, "grad_norm": 0.6173364819027063, "learning_rate": 1.6379517191838777e-07, "loss": 0.2593, "step": 20345 }, { "epoch": 0.9208418194161575, "grad_norm": 0.5951260182263146, "learning_rate": 1.636091645254856e-07, "loss": 0.2865, "step": 20346 }, { "epoch": 0.9208870785245531, "grad_norm": 0.5968563815932153, "learning_rate": 1.634232610517683e-07, "loss": 0.2656, "step": 20347 }, { "epoch": 0.9209323376329487, "grad_norm": 0.5688768727910141, "learning_rate": 1.6323746150123e-07, "loss": 0.276, "step": 20348 }, { "epoch": 0.9209775967413442, "grad_norm": 0.602614942186902, "learning_rate": 1.6305176587786465e-07, "loss": 0.2923, "step": 20349 }, { "epoch": 0.9210228558497398, "grad_norm": 0.8071633035076183, "learning_rate": 1.628661741856613e-07, "loss": 0.2828, "step": 20350 }, { "epoch": 0.9210681149581353, "grad_norm": 0.5866499802529012, "learning_rate": 1.6268068642860735e-07, "loss": 0.2556, "step": 20351 }, { "epoch": 0.9211133740665309, "grad_norm": 0.25558126830066286, "learning_rate": 1.6249530261068903e-07, "loss": 0.466, "step": 20352 }, { "epoch": 0.9211586331749264, "grad_norm": 0.25617421405222546, "learning_rate": 1.623100227358887e-07, "loss": 0.4592, "step": 20353 }, { "epoch": 0.921203892283322, "grad_norm": 0.6722170018272257, "learning_rate": 1.621248468081893e-07, "loss": 0.2959, "step": 20354 }, { "epoch": 0.9212491513917176, "grad_norm": 0.2676472468700252, "learning_rate": 1.619397748315682e-07, "loss": 0.4913, "step": 20355 }, { "epoch": 0.9212944105001132, "grad_norm": 0.269768813951422, "learning_rate": 1.6175480681000167e-07, "loss": 0.4517, "step": 20356 }, { "epoch": 0.9213396696085087, "grad_norm": 0.6520827900369393, "learning_rate": 1.6156994274746484e-07, "loss": 0.3, "step": 20357 }, { "epoch": 0.9213849287169043, "grad_norm": 0.6370859639103148, "learning_rate": 1.613851826479307e-07, "loss": 0.3292, "step": 20358 }, { "epoch": 0.9214301878252998, "grad_norm": 0.2639862715381424, "learning_rate": 1.6120052651536766e-07, "loss": 0.4813, "step": 20359 }, { "epoch": 0.9214754469336954, "grad_norm": 0.2595331991104463, "learning_rate": 1.6101597435374428e-07, "loss": 0.4575, "step": 20360 }, { "epoch": 0.921520706042091, "grad_norm": 0.6429967201358898, "learning_rate": 1.6083152616702512e-07, "loss": 0.3012, "step": 20361 }, { "epoch": 0.9215659651504865, "grad_norm": 0.5585878017776091, "learning_rate": 1.606471819591754e-07, "loss": 0.3135, "step": 20362 }, { "epoch": 0.9216112242588821, "grad_norm": 0.5677551745022221, "learning_rate": 1.604629417341541e-07, "loss": 0.2622, "step": 20363 }, { "epoch": 0.9216564833672777, "grad_norm": 0.6363566209381748, "learning_rate": 1.6027880549592033e-07, "loss": 0.2885, "step": 20364 }, { "epoch": 0.9217017424756733, "grad_norm": 0.7943373394647701, "learning_rate": 1.6009477324843204e-07, "loss": 0.3059, "step": 20365 }, { "epoch": 0.9217470015840687, "grad_norm": 0.5412161990938706, "learning_rate": 1.59910844995641e-07, "loss": 0.2455, "step": 20366 }, { "epoch": 0.9217922606924643, "grad_norm": 0.628707186222274, "learning_rate": 1.5972702074150194e-07, "loss": 0.2965, "step": 20367 }, { "epoch": 0.9218375198008599, "grad_norm": 0.671824730233963, "learning_rate": 1.5954330048996326e-07, "loss": 0.2751, "step": 20368 }, { "epoch": 0.9218827789092555, "grad_norm": 0.5892389520276957, "learning_rate": 1.5935968424497184e-07, "loss": 0.2822, "step": 20369 }, { "epoch": 0.921928038017651, "grad_norm": 0.5617899868690214, "learning_rate": 1.5917617201047508e-07, "loss": 0.2617, "step": 20370 }, { "epoch": 0.9219732971260466, "grad_norm": 0.27106261944272375, "learning_rate": 1.589927637904143e-07, "loss": 0.4458, "step": 20371 }, { "epoch": 0.9220185562344422, "grad_norm": 0.6156014604944067, "learning_rate": 1.5880945958873073e-07, "loss": 0.2717, "step": 20372 }, { "epoch": 0.9220638153428378, "grad_norm": 0.6014911690767994, "learning_rate": 1.586262594093635e-07, "loss": 0.322, "step": 20373 }, { "epoch": 0.9221090744512334, "grad_norm": 0.6133283076051357, "learning_rate": 1.5844316325624887e-07, "loss": 0.2976, "step": 20374 }, { "epoch": 0.9221543335596288, "grad_norm": 0.6033457098179053, "learning_rate": 1.5826017113332148e-07, "loss": 0.2794, "step": 20375 }, { "epoch": 0.9221995926680244, "grad_norm": 0.5862417085140412, "learning_rate": 1.580772830445121e-07, "loss": 0.2638, "step": 20376 }, { "epoch": 0.92224485177642, "grad_norm": 0.5840447890147605, "learning_rate": 1.5789449899375086e-07, "loss": 0.2529, "step": 20377 }, { "epoch": 0.9222901108848156, "grad_norm": 0.5688909679197075, "learning_rate": 1.5771181898496578e-07, "loss": 0.2991, "step": 20378 }, { "epoch": 0.9223353699932111, "grad_norm": 0.724734229839457, "learning_rate": 1.5752924302208206e-07, "loss": 0.2936, "step": 20379 }, { "epoch": 0.9223806291016067, "grad_norm": 0.5286664324455521, "learning_rate": 1.573467711090221e-07, "loss": 0.3022, "step": 20380 }, { "epoch": 0.9224258882100023, "grad_norm": 0.6311872016999195, "learning_rate": 1.5716440324970716e-07, "loss": 0.2745, "step": 20381 }, { "epoch": 0.9224711473183979, "grad_norm": 0.2896336364166879, "learning_rate": 1.5698213944805528e-07, "loss": 0.4545, "step": 20382 }, { "epoch": 0.9225164064267934, "grad_norm": 0.26109570845484165, "learning_rate": 1.5679997970798333e-07, "loss": 0.4907, "step": 20383 }, { "epoch": 0.9225616655351889, "grad_norm": 0.5823890638539553, "learning_rate": 1.566179240334048e-07, "loss": 0.2804, "step": 20384 }, { "epoch": 0.9226069246435845, "grad_norm": 0.24110901552768604, "learning_rate": 1.564359724282316e-07, "loss": 0.4458, "step": 20385 }, { "epoch": 0.9226521837519801, "grad_norm": 0.6395430045112075, "learning_rate": 1.5625412489637337e-07, "loss": 0.268, "step": 20386 }, { "epoch": 0.9226974428603757, "grad_norm": 0.5978845010368887, "learning_rate": 1.5607238144173864e-07, "loss": 0.2735, "step": 20387 }, { "epoch": 0.9227427019687712, "grad_norm": 0.6037538312066337, "learning_rate": 1.5589074206823096e-07, "loss": 0.2703, "step": 20388 }, { "epoch": 0.9227879610771668, "grad_norm": 0.27980886714460196, "learning_rate": 1.5570920677975276e-07, "loss": 0.4569, "step": 20389 }, { "epoch": 0.9228332201855624, "grad_norm": 0.6337457442557619, "learning_rate": 1.5552777558020594e-07, "loss": 0.2486, "step": 20390 }, { "epoch": 0.9228784792939579, "grad_norm": 0.6288532464939638, "learning_rate": 1.5534644847348957e-07, "loss": 0.2934, "step": 20391 }, { "epoch": 0.9229237384023534, "grad_norm": 0.6810993647037666, "learning_rate": 1.5516522546349833e-07, "loss": 0.2449, "step": 20392 }, { "epoch": 0.922968997510749, "grad_norm": 0.6479504141767006, "learning_rate": 1.5498410655412577e-07, "loss": 0.283, "step": 20393 }, { "epoch": 0.9230142566191446, "grad_norm": 0.6164855382342345, "learning_rate": 1.5480309174926544e-07, "loss": 0.3113, "step": 20394 }, { "epoch": 0.9230595157275402, "grad_norm": 0.8744004839033451, "learning_rate": 1.5462218105280535e-07, "loss": 0.2923, "step": 20395 }, { "epoch": 0.9231047748359358, "grad_norm": 0.6521839231619401, "learning_rate": 1.544413744686335e-07, "loss": 0.3171, "step": 20396 }, { "epoch": 0.9231500339443313, "grad_norm": 0.628945928780969, "learning_rate": 1.5426067200063454e-07, "loss": 0.3141, "step": 20397 }, { "epoch": 0.9231952930527269, "grad_norm": 0.6002713949526557, "learning_rate": 1.540800736526904e-07, "loss": 0.3094, "step": 20398 }, { "epoch": 0.9232405521611224, "grad_norm": 0.6054775864880105, "learning_rate": 1.5389957942868295e-07, "loss": 0.325, "step": 20399 }, { "epoch": 0.923285811269518, "grad_norm": 0.5423819600754345, "learning_rate": 1.5371918933249018e-07, "loss": 0.2665, "step": 20400 }, { "epoch": 0.9233310703779135, "grad_norm": 0.6114592856681977, "learning_rate": 1.5353890336798738e-07, "loss": 0.2896, "step": 20401 }, { "epoch": 0.9233763294863091, "grad_norm": 0.6591721781153652, "learning_rate": 1.5335872153904863e-07, "loss": 0.3219, "step": 20402 }, { "epoch": 0.9234215885947047, "grad_norm": 0.5609042868126081, "learning_rate": 1.5317864384954527e-07, "loss": 0.2773, "step": 20403 }, { "epoch": 0.9234668477031003, "grad_norm": 0.6318151743882193, "learning_rate": 1.5299867030334815e-07, "loss": 0.273, "step": 20404 }, { "epoch": 0.9235121068114958, "grad_norm": 0.6692743045696299, "learning_rate": 1.5281880090432245e-07, "loss": 0.3142, "step": 20405 }, { "epoch": 0.9235573659198913, "grad_norm": 0.623888346712142, "learning_rate": 1.5263903565633342e-07, "loss": 0.2948, "step": 20406 }, { "epoch": 0.9236026250282869, "grad_norm": 0.2757973992857609, "learning_rate": 1.5245937456324468e-07, "loss": 0.4559, "step": 20407 }, { "epoch": 0.9236478841366825, "grad_norm": 0.6188098397848397, "learning_rate": 1.5227981762891586e-07, "loss": 0.2783, "step": 20408 }, { "epoch": 0.9236931432450781, "grad_norm": 0.8300233252171786, "learning_rate": 1.5210036485720503e-07, "loss": 0.302, "step": 20409 }, { "epoch": 0.9237384023534736, "grad_norm": 0.37985033345341146, "learning_rate": 1.5192101625196798e-07, "loss": 0.4886, "step": 20410 }, { "epoch": 0.9237836614618692, "grad_norm": 0.5821665503233471, "learning_rate": 1.517417718170583e-07, "loss": 0.2816, "step": 20411 }, { "epoch": 0.9238289205702648, "grad_norm": 0.24585264221427977, "learning_rate": 1.5156263155632844e-07, "loss": 0.4586, "step": 20412 }, { "epoch": 0.9238741796786604, "grad_norm": 0.6593814272013578, "learning_rate": 1.5138359547362645e-07, "loss": 0.2861, "step": 20413 }, { "epoch": 0.9239194387870558, "grad_norm": 0.2898750406110483, "learning_rate": 1.5120466357279929e-07, "loss": 0.4899, "step": 20414 }, { "epoch": 0.9239646978954514, "grad_norm": 0.5845322241114378, "learning_rate": 1.510258358576916e-07, "loss": 0.2763, "step": 20415 }, { "epoch": 0.924009957003847, "grad_norm": 0.6005937355250502, "learning_rate": 1.5084711233214699e-07, "loss": 0.2856, "step": 20416 }, { "epoch": 0.9240552161122426, "grad_norm": 0.6675591618962443, "learning_rate": 1.5066849300000519e-07, "loss": 0.2622, "step": 20417 }, { "epoch": 0.9241004752206382, "grad_norm": 0.6381729858413194, "learning_rate": 1.5048997786510311e-07, "loss": 0.3195, "step": 20418 }, { "epoch": 0.9241457343290337, "grad_norm": 0.6069740562512995, "learning_rate": 1.5031156693127714e-07, "loss": 0.2958, "step": 20419 }, { "epoch": 0.9241909934374293, "grad_norm": 0.6311512886772741, "learning_rate": 1.5013326020236141e-07, "loss": 0.3274, "step": 20420 }, { "epoch": 0.9242362525458249, "grad_norm": 0.5988129173441522, "learning_rate": 1.4995505768218677e-07, "loss": 0.2664, "step": 20421 }, { "epoch": 0.9242815116542205, "grad_norm": 0.6004426149550289, "learning_rate": 1.497769593745818e-07, "loss": 0.2802, "step": 20422 }, { "epoch": 0.9243267707626159, "grad_norm": 0.6129639964883378, "learning_rate": 1.4959896528337402e-07, "loss": 0.3328, "step": 20423 }, { "epoch": 0.9243720298710115, "grad_norm": 0.5832031731496322, "learning_rate": 1.4942107541238705e-07, "loss": 0.3161, "step": 20424 }, { "epoch": 0.9244172889794071, "grad_norm": 0.2922199415284455, "learning_rate": 1.4924328976544446e-07, "loss": 0.4766, "step": 20425 }, { "epoch": 0.9244625480878027, "grad_norm": 0.6427471456037717, "learning_rate": 1.490656083463654e-07, "loss": 0.3016, "step": 20426 }, { "epoch": 0.9245078071961982, "grad_norm": 0.6452756589772367, "learning_rate": 1.4888803115896745e-07, "loss": 0.2993, "step": 20427 }, { "epoch": 0.9245530663045938, "grad_norm": 0.7535574291069664, "learning_rate": 1.4871055820706692e-07, "loss": 0.297, "step": 20428 }, { "epoch": 0.9245983254129894, "grad_norm": 0.619299210313287, "learning_rate": 1.4853318949447747e-07, "loss": 0.3203, "step": 20429 }, { "epoch": 0.924643584521385, "grad_norm": 0.7475947457906347, "learning_rate": 1.4835592502500883e-07, "loss": 0.2791, "step": 20430 }, { "epoch": 0.9246888436297805, "grad_norm": 0.6180770913988448, "learning_rate": 1.4817876480247074e-07, "loss": 0.2761, "step": 20431 }, { "epoch": 0.924734102738176, "grad_norm": 0.7096132177751993, "learning_rate": 1.4800170883066954e-07, "loss": 0.2817, "step": 20432 }, { "epoch": 0.9247793618465716, "grad_norm": 0.27086503197020917, "learning_rate": 1.4782475711341115e-07, "loss": 0.4727, "step": 20433 }, { "epoch": 0.9248246209549672, "grad_norm": 0.5942397803198581, "learning_rate": 1.4764790965449528e-07, "loss": 0.2906, "step": 20434 }, { "epoch": 0.9248698800633628, "grad_norm": 0.6921617036728822, "learning_rate": 1.474711664577233e-07, "loss": 0.2978, "step": 20435 }, { "epoch": 0.9249151391717583, "grad_norm": 0.5857174790255871, "learning_rate": 1.4729452752689277e-07, "loss": 0.3047, "step": 20436 }, { "epoch": 0.9249603982801539, "grad_norm": 0.7227079550701959, "learning_rate": 1.471179928657984e-07, "loss": 0.2796, "step": 20437 }, { "epoch": 0.9250056573885495, "grad_norm": 0.6329769635085243, "learning_rate": 1.4694156247823387e-07, "loss": 0.3013, "step": 20438 }, { "epoch": 0.925050916496945, "grad_norm": 0.301921818944306, "learning_rate": 1.4676523636799057e-07, "loss": 0.4498, "step": 20439 }, { "epoch": 0.9250961756053405, "grad_norm": 0.5936333054729149, "learning_rate": 1.4658901453885654e-07, "loss": 0.2787, "step": 20440 }, { "epoch": 0.9251414347137361, "grad_norm": 0.6066804280429381, "learning_rate": 1.464128969946188e-07, "loss": 0.3102, "step": 20441 }, { "epoch": 0.9251866938221317, "grad_norm": 0.5606739862421105, "learning_rate": 1.4623688373906098e-07, "loss": 0.2788, "step": 20442 }, { "epoch": 0.9252319529305273, "grad_norm": 0.6343599251414981, "learning_rate": 1.4606097477596504e-07, "loss": 0.3192, "step": 20443 }, { "epoch": 0.9252772120389229, "grad_norm": 0.5491031501485523, "learning_rate": 1.4588517010911073e-07, "loss": 0.3119, "step": 20444 }, { "epoch": 0.9253224711473184, "grad_norm": 0.6006706830936251, "learning_rate": 1.4570946974227674e-07, "loss": 0.2711, "step": 20445 }, { "epoch": 0.925367730255714, "grad_norm": 0.6806183941372534, "learning_rate": 1.455338736792372e-07, "loss": 0.3267, "step": 20446 }, { "epoch": 0.9254129893641095, "grad_norm": 0.6494127773785351, "learning_rate": 1.4535838192376527e-07, "loss": 0.3118, "step": 20447 }, { "epoch": 0.9254582484725051, "grad_norm": 0.6462344113018397, "learning_rate": 1.4518299447963126e-07, "loss": 0.3411, "step": 20448 }, { "epoch": 0.9255035075809006, "grad_norm": 0.5892924906517587, "learning_rate": 1.4500771135060486e-07, "loss": 0.3017, "step": 20449 }, { "epoch": 0.9255487666892962, "grad_norm": 0.6098691150241621, "learning_rate": 1.4483253254045205e-07, "loss": 0.2883, "step": 20450 }, { "epoch": 0.9255940257976918, "grad_norm": 0.6008950850538864, "learning_rate": 1.4465745805293584e-07, "loss": 0.3161, "step": 20451 }, { "epoch": 0.9256392849060874, "grad_norm": 0.6600949221221687, "learning_rate": 1.444824878918194e-07, "loss": 0.2913, "step": 20452 }, { "epoch": 0.925684544014483, "grad_norm": 0.5904981565608284, "learning_rate": 1.4430762206086136e-07, "loss": 0.2743, "step": 20453 }, { "epoch": 0.9257298031228784, "grad_norm": 0.604631254211844, "learning_rate": 1.441328605638198e-07, "loss": 0.3187, "step": 20454 }, { "epoch": 0.925775062231274, "grad_norm": 0.6057068609858999, "learning_rate": 1.4395820340444954e-07, "loss": 0.2989, "step": 20455 }, { "epoch": 0.9258203213396696, "grad_norm": 0.6196797755533714, "learning_rate": 1.4378365058650257e-07, "loss": 0.3047, "step": 20456 }, { "epoch": 0.9258655804480652, "grad_norm": 0.5852014846220924, "learning_rate": 1.436092021137303e-07, "loss": 0.2727, "step": 20457 }, { "epoch": 0.9259108395564607, "grad_norm": 0.6442860368973576, "learning_rate": 1.4343485798988198e-07, "loss": 0.3025, "step": 20458 }, { "epoch": 0.9259560986648563, "grad_norm": 0.6644307473314526, "learning_rate": 1.4326061821870186e-07, "loss": 0.2908, "step": 20459 }, { "epoch": 0.9260013577732519, "grad_norm": 0.6125065176034205, "learning_rate": 1.4308648280393466e-07, "loss": 0.2927, "step": 20460 }, { "epoch": 0.9260466168816475, "grad_norm": 0.7227698857705623, "learning_rate": 1.429124517493219e-07, "loss": 0.2954, "step": 20461 }, { "epoch": 0.926091875990043, "grad_norm": 0.6853652240967014, "learning_rate": 1.4273852505860443e-07, "loss": 0.2987, "step": 20462 }, { "epoch": 0.9261371350984385, "grad_norm": 0.2613456844771442, "learning_rate": 1.4256470273551705e-07, "loss": 0.474, "step": 20463 }, { "epoch": 0.9261823942068341, "grad_norm": 1.0479860117384798, "learning_rate": 1.4239098478379565e-07, "loss": 0.2837, "step": 20464 }, { "epoch": 0.9262276533152297, "grad_norm": 0.5307899678938974, "learning_rate": 1.4221737120717338e-07, "loss": 0.2954, "step": 20465 }, { "epoch": 0.9262729124236253, "grad_norm": 0.6494081792983811, "learning_rate": 1.4204386200938057e-07, "loss": 0.3101, "step": 20466 }, { "epoch": 0.9263181715320208, "grad_norm": 0.26612494706636547, "learning_rate": 1.4187045719414427e-07, "loss": 0.4481, "step": 20467 }, { "epoch": 0.9263634306404164, "grad_norm": 0.5947286912424484, "learning_rate": 1.4169715676519203e-07, "loss": 0.2688, "step": 20468 }, { "epoch": 0.926408689748812, "grad_norm": 0.5686910911062496, "learning_rate": 1.4152396072624587e-07, "loss": 0.2903, "step": 20469 }, { "epoch": 0.9264539488572076, "grad_norm": 0.5883994277724498, "learning_rate": 1.413508690810289e-07, "loss": 0.3428, "step": 20470 }, { "epoch": 0.926499207965603, "grad_norm": 0.6112876770382101, "learning_rate": 1.4117788183325986e-07, "loss": 0.2879, "step": 20471 }, { "epoch": 0.9265444670739986, "grad_norm": 0.6009679063039658, "learning_rate": 1.410049989866541e-07, "loss": 0.2958, "step": 20472 }, { "epoch": 0.9265897261823942, "grad_norm": 0.7066580354453394, "learning_rate": 1.4083222054492862e-07, "loss": 0.2611, "step": 20473 }, { "epoch": 0.9266349852907898, "grad_norm": 0.6038274186738244, "learning_rate": 1.4065954651179492e-07, "loss": 0.3046, "step": 20474 }, { "epoch": 0.9266802443991853, "grad_norm": 0.26245478634417746, "learning_rate": 1.404869768909628e-07, "loss": 0.461, "step": 20475 }, { "epoch": 0.9267255035075809, "grad_norm": 0.5982783265922673, "learning_rate": 1.4031451168614097e-07, "loss": 0.2844, "step": 20476 }, { "epoch": 0.9267707626159765, "grad_norm": 0.5835922919662517, "learning_rate": 1.4014215090103424e-07, "loss": 0.3082, "step": 20477 }, { "epoch": 0.926816021724372, "grad_norm": 0.5992267862265387, "learning_rate": 1.3996989453934795e-07, "loss": 0.2762, "step": 20478 }, { "epoch": 0.9268612808327676, "grad_norm": 0.2659060116019865, "learning_rate": 1.397977426047814e-07, "loss": 0.4519, "step": 20479 }, { "epoch": 0.9269065399411631, "grad_norm": 0.2554853549334449, "learning_rate": 1.396256951010344e-07, "loss": 0.4599, "step": 20480 }, { "epoch": 0.9269517990495587, "grad_norm": 0.6112969280647528, "learning_rate": 1.39453752031804e-07, "loss": 0.3366, "step": 20481 }, { "epoch": 0.9269970581579543, "grad_norm": 0.6211876213503223, "learning_rate": 1.3928191340078446e-07, "loss": 0.3113, "step": 20482 }, { "epoch": 0.9270423172663499, "grad_norm": 0.6725018512657114, "learning_rate": 1.391101792116678e-07, "loss": 0.3026, "step": 20483 }, { "epoch": 0.9270875763747454, "grad_norm": 0.6567315780209119, "learning_rate": 1.38938549468145e-07, "loss": 0.2821, "step": 20484 }, { "epoch": 0.927132835483141, "grad_norm": 0.6525421756495428, "learning_rate": 1.3876702417390197e-07, "loss": 0.3291, "step": 20485 }, { "epoch": 0.9271780945915366, "grad_norm": 0.5911414570134386, "learning_rate": 1.3859560333262578e-07, "loss": 0.3101, "step": 20486 }, { "epoch": 0.9272233536999321, "grad_norm": 0.5579965197267512, "learning_rate": 1.384242869480007e-07, "loss": 0.2805, "step": 20487 }, { "epoch": 0.9272686128083277, "grad_norm": 0.665727549513722, "learning_rate": 1.3825307502370487e-07, "loss": 0.2891, "step": 20488 }, { "epoch": 0.9273138719167232, "grad_norm": 0.5640027269792147, "learning_rate": 1.3808196756341928e-07, "loss": 0.2708, "step": 20489 }, { "epoch": 0.9273591310251188, "grad_norm": 0.6294402244906123, "learning_rate": 1.3791096457081987e-07, "loss": 0.2878, "step": 20490 }, { "epoch": 0.9274043901335144, "grad_norm": 0.8039347348919981, "learning_rate": 1.3774006604958202e-07, "loss": 0.3326, "step": 20491 }, { "epoch": 0.92744964924191, "grad_norm": 0.2858853839858013, "learning_rate": 1.3756927200337555e-07, "loss": 0.4793, "step": 20492 }, { "epoch": 0.9274949083503055, "grad_norm": 0.6492039128977286, "learning_rate": 1.37398582435872e-07, "loss": 0.2837, "step": 20493 }, { "epoch": 0.927540167458701, "grad_norm": 0.6545488674984096, "learning_rate": 1.3722799735073898e-07, "loss": 0.3027, "step": 20494 }, { "epoch": 0.9275854265670966, "grad_norm": 0.6080759408041968, "learning_rate": 1.3705751675164137e-07, "loss": 0.2678, "step": 20495 }, { "epoch": 0.9276306856754922, "grad_norm": 0.6326392617289096, "learning_rate": 1.3688714064224175e-07, "loss": 0.2975, "step": 20496 }, { "epoch": 0.9276759447838877, "grad_norm": 0.6756598727801661, "learning_rate": 1.367168690262022e-07, "loss": 0.3192, "step": 20497 }, { "epoch": 0.9277212038922833, "grad_norm": 0.2631476713639336, "learning_rate": 1.3654670190718035e-07, "loss": 0.447, "step": 20498 }, { "epoch": 0.9277664630006789, "grad_norm": 0.5927515198474593, "learning_rate": 1.3637663928883328e-07, "loss": 0.2915, "step": 20499 }, { "epoch": 0.9278117221090745, "grad_norm": 0.6149204123557976, "learning_rate": 1.3620668117481471e-07, "loss": 0.2322, "step": 20500 }, { "epoch": 0.9278569812174701, "grad_norm": 0.6259834762829701, "learning_rate": 1.3603682756877624e-07, "loss": 0.29, "step": 20501 }, { "epoch": 0.9279022403258655, "grad_norm": 0.592115814894418, "learning_rate": 1.3586707847436765e-07, "loss": 0.2761, "step": 20502 }, { "epoch": 0.9279474994342611, "grad_norm": 0.6097902401023106, "learning_rate": 1.356974338952366e-07, "loss": 0.3121, "step": 20503 }, { "epoch": 0.9279927585426567, "grad_norm": 0.6350445882270588, "learning_rate": 1.3552789383502906e-07, "loss": 0.278, "step": 20504 }, { "epoch": 0.9280380176510523, "grad_norm": 0.617967458484192, "learning_rate": 1.3535845829738547e-07, "loss": 0.3052, "step": 20505 }, { "epoch": 0.9280832767594478, "grad_norm": 0.5859739429927824, "learning_rate": 1.3518912728594902e-07, "loss": 0.2497, "step": 20506 }, { "epoch": 0.9281285358678434, "grad_norm": 0.5816997049619769, "learning_rate": 1.350199008043568e-07, "loss": 0.3071, "step": 20507 }, { "epoch": 0.928173794976239, "grad_norm": 0.6387081232887973, "learning_rate": 1.3485077885624587e-07, "loss": 0.295, "step": 20508 }, { "epoch": 0.9282190540846346, "grad_norm": 0.6058050849986356, "learning_rate": 1.3468176144524837e-07, "loss": 0.2468, "step": 20509 }, { "epoch": 0.92826431319303, "grad_norm": 0.6973876198738655, "learning_rate": 1.3451284857499803e-07, "loss": 0.3326, "step": 20510 }, { "epoch": 0.9283095723014256, "grad_norm": 0.5773441879086344, "learning_rate": 1.3434404024912307e-07, "loss": 0.2416, "step": 20511 }, { "epoch": 0.9283548314098212, "grad_norm": 0.26222600729671075, "learning_rate": 1.3417533647125114e-07, "loss": 0.4776, "step": 20512 }, { "epoch": 0.9284000905182168, "grad_norm": 0.5985889900254078, "learning_rate": 1.3400673724500713e-07, "loss": 0.313, "step": 20513 }, { "epoch": 0.9284453496266124, "grad_norm": 1.1236766909494023, "learning_rate": 1.3383824257401256e-07, "loss": 0.2749, "step": 20514 }, { "epoch": 0.9284906087350079, "grad_norm": 0.3598829426277801, "learning_rate": 1.3366985246188958e-07, "loss": 0.4529, "step": 20515 }, { "epoch": 0.9285358678434035, "grad_norm": 0.6381871757977565, "learning_rate": 1.335015669122558e-07, "loss": 0.3305, "step": 20516 }, { "epoch": 0.9285811269517991, "grad_norm": 0.27547840975691507, "learning_rate": 1.3333338592872725e-07, "loss": 0.476, "step": 20517 }, { "epoch": 0.9286263860601947, "grad_norm": 0.654558987708579, "learning_rate": 1.3316530951491712e-07, "loss": 0.3096, "step": 20518 }, { "epoch": 0.9286716451685901, "grad_norm": 0.6093967244105901, "learning_rate": 1.3299733767443645e-07, "loss": 0.2882, "step": 20519 }, { "epoch": 0.9287169042769857, "grad_norm": 0.6398248693621602, "learning_rate": 1.3282947041089678e-07, "loss": 0.3011, "step": 20520 }, { "epoch": 0.9287621633853813, "grad_norm": 0.6069225158778648, "learning_rate": 1.3266170772790244e-07, "loss": 0.2636, "step": 20521 }, { "epoch": 0.9288074224937769, "grad_norm": 0.26498649232465793, "learning_rate": 1.3249404962905832e-07, "loss": 0.4825, "step": 20522 }, { "epoch": 0.9288526816021725, "grad_norm": 0.4456432719083173, "learning_rate": 1.3232649611796878e-07, "loss": 0.4737, "step": 20523 }, { "epoch": 0.928897940710568, "grad_norm": 0.6128672166428326, "learning_rate": 1.3215904719823313e-07, "loss": 0.2678, "step": 20524 }, { "epoch": 0.9289431998189636, "grad_norm": 0.629973709969791, "learning_rate": 1.3199170287344797e-07, "loss": 0.3146, "step": 20525 }, { "epoch": 0.9289884589273592, "grad_norm": 0.26166497076034695, "learning_rate": 1.3182446314721154e-07, "loss": 0.4286, "step": 20526 }, { "epoch": 0.9290337180357547, "grad_norm": 0.7510941062492887, "learning_rate": 1.316573280231148e-07, "loss": 0.2927, "step": 20527 }, { "epoch": 0.9290789771441502, "grad_norm": 0.28980127141565226, "learning_rate": 1.3149029750475052e-07, "loss": 0.4667, "step": 20528 }, { "epoch": 0.9291242362525458, "grad_norm": 0.5980028104852588, "learning_rate": 1.313233715957074e-07, "loss": 0.3383, "step": 20529 }, { "epoch": 0.9291694953609414, "grad_norm": 0.700142480044533, "learning_rate": 1.3115655029957207e-07, "loss": 0.3128, "step": 20530 }, { "epoch": 0.929214754469337, "grad_norm": 0.6492349316199251, "learning_rate": 1.3098983361992834e-07, "loss": 0.31, "step": 20531 }, { "epoch": 0.9292600135777325, "grad_norm": 0.6112670944838561, "learning_rate": 1.3082322156035942e-07, "loss": 0.3311, "step": 20532 }, { "epoch": 0.9293052726861281, "grad_norm": 0.2691885698944432, "learning_rate": 1.3065671412444526e-07, "loss": 0.4467, "step": 20533 }, { "epoch": 0.9293505317945236, "grad_norm": 0.640175163072835, "learning_rate": 1.3049031131576294e-07, "loss": 0.3091, "step": 20534 }, { "epoch": 0.9293957909029192, "grad_norm": 0.5699163789000503, "learning_rate": 1.30324013137888e-07, "loss": 0.2457, "step": 20535 }, { "epoch": 0.9294410500113148, "grad_norm": 0.6347799630181783, "learning_rate": 1.3015781959439478e-07, "loss": 0.2705, "step": 20536 }, { "epoch": 0.9294863091197103, "grad_norm": 0.6121332607475962, "learning_rate": 1.299917306888532e-07, "loss": 0.2613, "step": 20537 }, { "epoch": 0.9295315682281059, "grad_norm": 0.579396510362602, "learning_rate": 1.2982574642483148e-07, "loss": 0.302, "step": 20538 }, { "epoch": 0.9295768273365015, "grad_norm": 0.6109700917502632, "learning_rate": 1.2965986680589793e-07, "loss": 0.2864, "step": 20539 }, { "epoch": 0.9296220864448971, "grad_norm": 0.5384522792034464, "learning_rate": 1.2949409183561467e-07, "loss": 0.2482, "step": 20540 }, { "epoch": 0.9296673455532926, "grad_norm": 0.27588932629748913, "learning_rate": 1.2932842151754555e-07, "loss": 0.4769, "step": 20541 }, { "epoch": 0.9297126046616881, "grad_norm": 0.5868988348140859, "learning_rate": 1.2916285585524936e-07, "loss": 0.3171, "step": 20542 }, { "epoch": 0.9297578637700837, "grad_norm": 0.5927639601052319, "learning_rate": 1.2899739485228325e-07, "loss": 0.2801, "step": 20543 }, { "epoch": 0.9298031228784793, "grad_norm": 0.5875612986420774, "learning_rate": 1.2883203851220326e-07, "loss": 0.3065, "step": 20544 }, { "epoch": 0.9298483819868748, "grad_norm": 0.5992372891431096, "learning_rate": 1.286667868385627e-07, "loss": 0.2955, "step": 20545 }, { "epoch": 0.9298936410952704, "grad_norm": 0.5718712978749788, "learning_rate": 1.285016398349115e-07, "loss": 0.2895, "step": 20546 }, { "epoch": 0.929938900203666, "grad_norm": 0.30508341455682847, "learning_rate": 1.2833659750479787e-07, "loss": 0.472, "step": 20547 }, { "epoch": 0.9299841593120616, "grad_norm": 0.6402722698777407, "learning_rate": 1.281716598517685e-07, "loss": 0.2778, "step": 20548 }, { "epoch": 0.9300294184204572, "grad_norm": 0.6912387838467804, "learning_rate": 1.2800682687936826e-07, "loss": 0.2441, "step": 20549 }, { "epoch": 0.9300746775288526, "grad_norm": 0.5550573796308262, "learning_rate": 1.2784209859113773e-07, "loss": 0.2777, "step": 20550 }, { "epoch": 0.9301199366372482, "grad_norm": 0.7114796618199457, "learning_rate": 1.2767747499061677e-07, "loss": 0.316, "step": 20551 }, { "epoch": 0.9301651957456438, "grad_norm": 0.6176338168213179, "learning_rate": 1.2751295608134262e-07, "loss": 0.2525, "step": 20552 }, { "epoch": 0.9302104548540394, "grad_norm": 0.257163146418204, "learning_rate": 1.273485418668502e-07, "loss": 0.4685, "step": 20553 }, { "epoch": 0.9302557139624349, "grad_norm": 0.5724525940518886, "learning_rate": 1.2718423235067278e-07, "loss": 0.3013, "step": 20554 }, { "epoch": 0.9303009730708305, "grad_norm": 0.6024815855082362, "learning_rate": 1.2702002753634092e-07, "loss": 0.3173, "step": 20555 }, { "epoch": 0.9303462321792261, "grad_norm": 0.6725907048687568, "learning_rate": 1.2685592742738173e-07, "loss": 0.3283, "step": 20556 }, { "epoch": 0.9303914912876217, "grad_norm": 0.2593537537342258, "learning_rate": 1.266919320273219e-07, "loss": 0.4496, "step": 20557 }, { "epoch": 0.9304367503960173, "grad_norm": 0.6055413336996001, "learning_rate": 1.2652804133968578e-07, "loss": 0.3077, "step": 20558 }, { "epoch": 0.9304820095044127, "grad_norm": 0.27130371236025036, "learning_rate": 1.263642553679939e-07, "loss": 0.4526, "step": 20559 }, { "epoch": 0.9305272686128083, "grad_norm": 0.2867604133985603, "learning_rate": 1.2620057411576568e-07, "loss": 0.4429, "step": 20560 }, { "epoch": 0.9305725277212039, "grad_norm": 0.7368223557648287, "learning_rate": 1.2603699758651888e-07, "loss": 0.3223, "step": 20561 }, { "epoch": 0.9306177868295995, "grad_norm": 0.5805378186525145, "learning_rate": 1.2587352578376787e-07, "loss": 0.3017, "step": 20562 }, { "epoch": 0.930663045937995, "grad_norm": 0.28028151826188114, "learning_rate": 1.2571015871102433e-07, "loss": 0.4467, "step": 20563 }, { "epoch": 0.9307083050463906, "grad_norm": 0.28380587605428187, "learning_rate": 1.2554689637179984e-07, "loss": 0.4622, "step": 20564 }, { "epoch": 0.9307535641547862, "grad_norm": 0.6322006217264248, "learning_rate": 1.2538373876960162e-07, "loss": 0.2572, "step": 20565 }, { "epoch": 0.9307988232631818, "grad_norm": 0.28134304880944516, "learning_rate": 1.2522068590793578e-07, "loss": 0.4756, "step": 20566 }, { "epoch": 0.9308440823715772, "grad_norm": 0.5847147524196179, "learning_rate": 1.2505773779030562e-07, "loss": 0.342, "step": 20567 }, { "epoch": 0.9308893414799728, "grad_norm": 0.6099517197345848, "learning_rate": 1.2489489442021275e-07, "loss": 0.28, "step": 20568 }, { "epoch": 0.9309346005883684, "grad_norm": 0.3126201811819335, "learning_rate": 1.2473215580115493e-07, "loss": 0.4718, "step": 20569 }, { "epoch": 0.930979859696764, "grad_norm": 0.5750391423215298, "learning_rate": 1.2456952193663052e-07, "loss": 0.3037, "step": 20570 }, { "epoch": 0.9310251188051596, "grad_norm": 0.613391861418245, "learning_rate": 1.2440699283013335e-07, "loss": 0.3163, "step": 20571 }, { "epoch": 0.9310703779135551, "grad_norm": 0.5927581691696879, "learning_rate": 1.2424456848515565e-07, "loss": 0.27, "step": 20572 }, { "epoch": 0.9311156370219507, "grad_norm": 0.6240474780698677, "learning_rate": 1.2408224890518683e-07, "loss": 0.2794, "step": 20573 }, { "epoch": 0.9311608961303462, "grad_norm": 0.6195892179866072, "learning_rate": 1.2392003409371578e-07, "loss": 0.3358, "step": 20574 }, { "epoch": 0.9312061552387418, "grad_norm": 0.6295648159672592, "learning_rate": 1.2375792405422748e-07, "loss": 0.2981, "step": 20575 }, { "epoch": 0.9312514143471373, "grad_norm": 0.6303571979522588, "learning_rate": 1.2359591879020528e-07, "loss": 0.3139, "step": 20576 }, { "epoch": 0.9312966734555329, "grad_norm": 0.2804170390755626, "learning_rate": 1.2343401830512914e-07, "loss": 0.4946, "step": 20577 }, { "epoch": 0.9313419325639285, "grad_norm": 0.2491879000750708, "learning_rate": 1.232722226024796e-07, "loss": 0.4687, "step": 20578 }, { "epoch": 0.9313871916723241, "grad_norm": 0.5779750601252036, "learning_rate": 1.231105316857323e-07, "loss": 0.2897, "step": 20579 }, { "epoch": 0.9314324507807196, "grad_norm": 0.6781888687374773, "learning_rate": 1.22948945558361e-07, "loss": 0.3061, "step": 20580 }, { "epoch": 0.9314777098891152, "grad_norm": 0.5907780247980898, "learning_rate": 1.2278746422383858e-07, "loss": 0.2441, "step": 20581 }, { "epoch": 0.9315229689975107, "grad_norm": 0.5702829526712206, "learning_rate": 1.226260876856339e-07, "loss": 0.2912, "step": 20582 }, { "epoch": 0.9315682281059063, "grad_norm": 0.23769228889810345, "learning_rate": 1.2246481594721582e-07, "loss": 0.4518, "step": 20583 }, { "epoch": 0.9316134872143019, "grad_norm": 0.6490998239245681, "learning_rate": 1.2230364901204773e-07, "loss": 0.2644, "step": 20584 }, { "epoch": 0.9316587463226974, "grad_norm": 0.5921873669323882, "learning_rate": 1.2214258688359347e-07, "loss": 0.2807, "step": 20585 }, { "epoch": 0.931704005431093, "grad_norm": 0.6513329181124101, "learning_rate": 1.2198162956531423e-07, "loss": 0.2608, "step": 20586 }, { "epoch": 0.9317492645394886, "grad_norm": 0.5945147095377089, "learning_rate": 1.2182077706066776e-07, "loss": 0.2831, "step": 20587 }, { "epoch": 0.9317945236478842, "grad_norm": 0.27614195415263953, "learning_rate": 1.2166002937311128e-07, "loss": 0.4728, "step": 20588 }, { "epoch": 0.9318397827562797, "grad_norm": 0.5984964831266933, "learning_rate": 1.2149938650609704e-07, "loss": 0.2974, "step": 20589 }, { "epoch": 0.9318850418646752, "grad_norm": 0.5825886799572433, "learning_rate": 1.2133884846307898e-07, "loss": 0.2943, "step": 20590 }, { "epoch": 0.9319303009730708, "grad_norm": 0.6139118481888114, "learning_rate": 1.2117841524750485e-07, "loss": 0.3212, "step": 20591 }, { "epoch": 0.9319755600814664, "grad_norm": 0.6098701406625552, "learning_rate": 1.210180868628219e-07, "loss": 0.2863, "step": 20592 }, { "epoch": 0.9320208191898619, "grad_norm": 0.6222401621125467, "learning_rate": 1.2085786331247574e-07, "loss": 0.2707, "step": 20593 }, { "epoch": 0.9320660782982575, "grad_norm": 0.6321271350748866, "learning_rate": 1.206977445999097e-07, "loss": 0.2575, "step": 20594 }, { "epoch": 0.9321113374066531, "grad_norm": 0.65663036194109, "learning_rate": 1.2053773072856323e-07, "loss": 0.3058, "step": 20595 }, { "epoch": 0.9321565965150487, "grad_norm": 0.5785509596715999, "learning_rate": 1.2037782170187472e-07, "loss": 0.2685, "step": 20596 }, { "epoch": 0.9322018556234443, "grad_norm": 0.6303907355875169, "learning_rate": 1.2021801752328034e-07, "loss": 0.289, "step": 20597 }, { "epoch": 0.9322471147318397, "grad_norm": 0.5863599732194953, "learning_rate": 1.2005831819621284e-07, "loss": 0.2338, "step": 20598 }, { "epoch": 0.9322923738402353, "grad_norm": 0.610079565117258, "learning_rate": 1.198987237241056e-07, "loss": 0.2871, "step": 20599 }, { "epoch": 0.9323376329486309, "grad_norm": 0.6265622624776339, "learning_rate": 1.1973923411038646e-07, "loss": 0.3204, "step": 20600 }, { "epoch": 0.9323828920570265, "grad_norm": 0.6561079202368796, "learning_rate": 1.195798493584821e-07, "loss": 0.2588, "step": 20601 }, { "epoch": 0.932428151165422, "grad_norm": 0.25492769257299186, "learning_rate": 1.1942056947181757e-07, "loss": 0.4727, "step": 20602 }, { "epoch": 0.9324734102738176, "grad_norm": 0.6279894540127755, "learning_rate": 1.1926139445381624e-07, "loss": 0.283, "step": 20603 }, { "epoch": 0.9325186693822132, "grad_norm": 0.2566352653015581, "learning_rate": 1.1910232430789703e-07, "loss": 0.458, "step": 20604 }, { "epoch": 0.9325639284906088, "grad_norm": 0.6515167565739174, "learning_rate": 1.1894335903747834e-07, "loss": 0.2847, "step": 20605 }, { "epoch": 0.9326091875990044, "grad_norm": 0.6284090350465346, "learning_rate": 1.1878449864597575e-07, "loss": 0.3173, "step": 20606 }, { "epoch": 0.9326544467073998, "grad_norm": 0.26375783626835564, "learning_rate": 1.1862574313680264e-07, "loss": 0.4519, "step": 20607 }, { "epoch": 0.9326997058157954, "grad_norm": 0.6398174159409153, "learning_rate": 1.1846709251337129e-07, "loss": 0.2929, "step": 20608 }, { "epoch": 0.932744964924191, "grad_norm": 0.2867187620695821, "learning_rate": 1.1830854677908842e-07, "loss": 0.4455, "step": 20609 }, { "epoch": 0.9327902240325866, "grad_norm": 0.5787649635408306, "learning_rate": 1.1815010593736298e-07, "loss": 0.2664, "step": 20610 }, { "epoch": 0.9328354831409821, "grad_norm": 0.585519096160613, "learning_rate": 1.1799176999159722e-07, "loss": 0.2548, "step": 20611 }, { "epoch": 0.9328807422493777, "grad_norm": 0.26683750157452135, "learning_rate": 1.1783353894519512e-07, "loss": 0.4511, "step": 20612 }, { "epoch": 0.9329260013577733, "grad_norm": 0.5801571515630817, "learning_rate": 1.1767541280155614e-07, "loss": 0.3128, "step": 20613 }, { "epoch": 0.9329712604661689, "grad_norm": 0.3611265207641925, "learning_rate": 1.1751739156407649e-07, "loss": 0.4847, "step": 20614 }, { "epoch": 0.9330165195745643, "grad_norm": 0.6068286561560011, "learning_rate": 1.1735947523615344e-07, "loss": 0.2688, "step": 20615 }, { "epoch": 0.9330617786829599, "grad_norm": 0.5836598884521632, "learning_rate": 1.1720166382117925e-07, "loss": 0.2683, "step": 20616 }, { "epoch": 0.9331070377913555, "grad_norm": 0.5905638232357641, "learning_rate": 1.1704395732254515e-07, "loss": 0.281, "step": 20617 }, { "epoch": 0.9331522968997511, "grad_norm": 0.5952007858494884, "learning_rate": 1.1688635574363894e-07, "loss": 0.2656, "step": 20618 }, { "epoch": 0.9331975560081467, "grad_norm": 0.6158201663415633, "learning_rate": 1.1672885908784792e-07, "loss": 0.286, "step": 20619 }, { "epoch": 0.9332428151165422, "grad_norm": 0.561801502229038, "learning_rate": 1.1657146735855662e-07, "loss": 0.2793, "step": 20620 }, { "epoch": 0.9332880742249378, "grad_norm": 0.6148801897922445, "learning_rate": 1.1641418055914566e-07, "loss": 0.3061, "step": 20621 }, { "epoch": 0.9333333333333333, "grad_norm": 0.5864090389864688, "learning_rate": 1.1625699869299457e-07, "loss": 0.2931, "step": 20622 }, { "epoch": 0.9333785924417289, "grad_norm": 0.6281392670998087, "learning_rate": 1.1609992176348228e-07, "loss": 0.2819, "step": 20623 }, { "epoch": 0.9334238515501244, "grad_norm": 0.5604583332916402, "learning_rate": 1.1594294977398224e-07, "loss": 0.3034, "step": 20624 }, { "epoch": 0.93346911065852, "grad_norm": 0.6333945542597627, "learning_rate": 1.1578608272786785e-07, "loss": 0.2666, "step": 20625 }, { "epoch": 0.9335143697669156, "grad_norm": 0.6070115405261841, "learning_rate": 1.1562932062851084e-07, "loss": 0.3127, "step": 20626 }, { "epoch": 0.9335596288753112, "grad_norm": 0.6164082922394221, "learning_rate": 1.1547266347927743e-07, "loss": 0.3199, "step": 20627 }, { "epoch": 0.9336048879837067, "grad_norm": 0.5987945549595481, "learning_rate": 1.1531611128353548e-07, "loss": 0.3147, "step": 20628 }, { "epoch": 0.9336501470921023, "grad_norm": 0.6618821649290977, "learning_rate": 1.1515966404464728e-07, "loss": 0.2713, "step": 20629 }, { "epoch": 0.9336954062004978, "grad_norm": 0.5678115155151787, "learning_rate": 1.1500332176597629e-07, "loss": 0.2605, "step": 20630 }, { "epoch": 0.9337406653088934, "grad_norm": 0.6471740693252899, "learning_rate": 1.1484708445087978e-07, "loss": 0.3178, "step": 20631 }, { "epoch": 0.933785924417289, "grad_norm": 0.5827959737037237, "learning_rate": 1.1469095210271675e-07, "loss": 0.3047, "step": 20632 }, { "epoch": 0.9338311835256845, "grad_norm": 0.6409107104206825, "learning_rate": 1.1453492472484118e-07, "loss": 0.2448, "step": 20633 }, { "epoch": 0.9338764426340801, "grad_norm": 0.5763797274333525, "learning_rate": 1.1437900232060483e-07, "loss": 0.2844, "step": 20634 }, { "epoch": 0.9339217017424757, "grad_norm": 0.6591185442707223, "learning_rate": 1.1422318489335838e-07, "loss": 0.3084, "step": 20635 }, { "epoch": 0.9339669608508713, "grad_norm": 0.6101336407060521, "learning_rate": 1.1406747244645078e-07, "loss": 0.2699, "step": 20636 }, { "epoch": 0.9340122199592668, "grad_norm": 0.6088189525568914, "learning_rate": 1.1391186498322771e-07, "loss": 0.2701, "step": 20637 }, { "epoch": 0.9340574790676623, "grad_norm": 0.647299207439683, "learning_rate": 1.1375636250703092e-07, "loss": 0.2833, "step": 20638 }, { "epoch": 0.9341027381760579, "grad_norm": 0.6330465226385166, "learning_rate": 1.1360096502120387e-07, "loss": 0.2807, "step": 20639 }, { "epoch": 0.9341479972844535, "grad_norm": 0.7381215076472419, "learning_rate": 1.1344567252908445e-07, "loss": 0.3136, "step": 20640 }, { "epoch": 0.9341932563928491, "grad_norm": 0.5984678510006978, "learning_rate": 1.1329048503400996e-07, "loss": 0.2919, "step": 20641 }, { "epoch": 0.9342385155012446, "grad_norm": 0.2648725606387613, "learning_rate": 1.1313540253931387e-07, "loss": 0.452, "step": 20642 }, { "epoch": 0.9342837746096402, "grad_norm": 0.6362303922975918, "learning_rate": 1.1298042504832963e-07, "loss": 0.3172, "step": 20643 }, { "epoch": 0.9343290337180358, "grad_norm": 0.5489044672031816, "learning_rate": 1.1282555256438622e-07, "loss": 0.2738, "step": 20644 }, { "epoch": 0.9343742928264314, "grad_norm": 0.6001819554223173, "learning_rate": 1.1267078509081209e-07, "loss": 0.2534, "step": 20645 }, { "epoch": 0.9344195519348268, "grad_norm": 0.5818315990671367, "learning_rate": 1.1251612263093292e-07, "loss": 0.2834, "step": 20646 }, { "epoch": 0.9344648110432224, "grad_norm": 0.6244052785314712, "learning_rate": 1.1236156518807106e-07, "loss": 0.3121, "step": 20647 }, { "epoch": 0.934510070151618, "grad_norm": 0.2778742047993764, "learning_rate": 1.1220711276554775e-07, "loss": 0.4486, "step": 20648 }, { "epoch": 0.9345553292600136, "grad_norm": 0.6124342133392783, "learning_rate": 1.1205276536668252e-07, "loss": 0.2993, "step": 20649 }, { "epoch": 0.9346005883684091, "grad_norm": 0.5638354052595087, "learning_rate": 1.118985229947911e-07, "loss": 0.2454, "step": 20650 }, { "epoch": 0.9346458474768047, "grad_norm": 0.6008229714251444, "learning_rate": 1.1174438565318691e-07, "loss": 0.2681, "step": 20651 }, { "epoch": 0.9346911065852003, "grad_norm": 0.6160137116835261, "learning_rate": 1.1159035334518343e-07, "loss": 0.2764, "step": 20652 }, { "epoch": 0.9347363656935959, "grad_norm": 0.6336472871929654, "learning_rate": 1.1143642607409023e-07, "loss": 0.2865, "step": 20653 }, { "epoch": 0.9347816248019915, "grad_norm": 0.6108194372529918, "learning_rate": 1.11282603843213e-07, "loss": 0.2658, "step": 20654 }, { "epoch": 0.9348268839103869, "grad_norm": 0.2761153769668537, "learning_rate": 1.1112888665585852e-07, "loss": 0.4549, "step": 20655 }, { "epoch": 0.9348721430187825, "grad_norm": 0.6343527944142431, "learning_rate": 1.109752745153292e-07, "loss": 0.3186, "step": 20656 }, { "epoch": 0.9349174021271781, "grad_norm": 0.5965075668169835, "learning_rate": 1.1082176742492623e-07, "loss": 0.2781, "step": 20657 }, { "epoch": 0.9349626612355737, "grad_norm": 0.6259405292833713, "learning_rate": 1.1066836538794645e-07, "loss": 0.2521, "step": 20658 }, { "epoch": 0.9350079203439692, "grad_norm": 0.6056507338546241, "learning_rate": 1.1051506840768833e-07, "loss": 0.3014, "step": 20659 }, { "epoch": 0.9350531794523648, "grad_norm": 0.5962640258792247, "learning_rate": 1.1036187648744311e-07, "loss": 0.3197, "step": 20660 }, { "epoch": 0.9350984385607604, "grad_norm": 0.28611351228558085, "learning_rate": 1.1020878963050485e-07, "loss": 0.4817, "step": 20661 }, { "epoch": 0.935143697669156, "grad_norm": 0.8179938486173777, "learning_rate": 1.10055807840162e-07, "loss": 0.3086, "step": 20662 }, { "epoch": 0.9351889567775514, "grad_norm": 0.6348754369470706, "learning_rate": 1.0990293111970085e-07, "loss": 0.2467, "step": 20663 }, { "epoch": 0.935234215885947, "grad_norm": 0.6353518229408754, "learning_rate": 1.0975015947240652e-07, "loss": 0.2806, "step": 20664 }, { "epoch": 0.9352794749943426, "grad_norm": 0.5646218500174285, "learning_rate": 1.0959749290156307e-07, "loss": 0.2838, "step": 20665 }, { "epoch": 0.9353247341027382, "grad_norm": 0.6067841157078296, "learning_rate": 1.0944493141044953e-07, "loss": 0.3028, "step": 20666 }, { "epoch": 0.9353699932111338, "grad_norm": 0.6727425150929137, "learning_rate": 1.0929247500234386e-07, "loss": 0.3187, "step": 20667 }, { "epoch": 0.9354152523195293, "grad_norm": 0.5660911183506132, "learning_rate": 1.0914012368052229e-07, "loss": 0.2772, "step": 20668 }, { "epoch": 0.9354605114279249, "grad_norm": 0.26176590366847236, "learning_rate": 1.0898787744825833e-07, "loss": 0.4611, "step": 20669 }, { "epoch": 0.9355057705363204, "grad_norm": 0.6051240123558446, "learning_rate": 1.0883573630882327e-07, "loss": 0.3043, "step": 20670 }, { "epoch": 0.935551029644716, "grad_norm": 0.28517697737707726, "learning_rate": 1.086837002654867e-07, "loss": 0.4457, "step": 20671 }, { "epoch": 0.9355962887531115, "grad_norm": 0.6380053084611067, "learning_rate": 1.0853176932151432e-07, "loss": 0.283, "step": 20672 }, { "epoch": 0.9356415478615071, "grad_norm": 0.6196496503170629, "learning_rate": 1.0837994348017133e-07, "loss": 0.2406, "step": 20673 }, { "epoch": 0.9356868069699027, "grad_norm": 0.5589782837257363, "learning_rate": 1.0822822274472011e-07, "loss": 0.32, "step": 20674 }, { "epoch": 0.9357320660782983, "grad_norm": 0.6107204778054238, "learning_rate": 1.0807660711842027e-07, "loss": 0.326, "step": 20675 }, { "epoch": 0.9357773251866939, "grad_norm": 0.5624460182654537, "learning_rate": 1.0792509660452921e-07, "loss": 0.2679, "step": 20676 }, { "epoch": 0.9358225842950894, "grad_norm": 0.6935165199403789, "learning_rate": 1.0777369120630377e-07, "loss": 0.2483, "step": 20677 }, { "epoch": 0.935867843403485, "grad_norm": 0.6135992877109803, "learning_rate": 1.0762239092699633e-07, "loss": 0.2961, "step": 20678 }, { "epoch": 0.9359131025118805, "grad_norm": 0.5876182716494162, "learning_rate": 1.0747119576985765e-07, "loss": 0.3076, "step": 20679 }, { "epoch": 0.9359583616202761, "grad_norm": 0.6489565993731783, "learning_rate": 1.0732010573813623e-07, "loss": 0.2848, "step": 20680 }, { "epoch": 0.9360036207286716, "grad_norm": 0.6246061102068522, "learning_rate": 1.0716912083508003e-07, "loss": 0.3443, "step": 20681 }, { "epoch": 0.9360488798370672, "grad_norm": 0.2884324669723961, "learning_rate": 1.07018241063932e-07, "loss": 0.4986, "step": 20682 }, { "epoch": 0.9360941389454628, "grad_norm": 0.29179013837324413, "learning_rate": 1.06867466427934e-07, "loss": 0.4963, "step": 20683 }, { "epoch": 0.9361393980538584, "grad_norm": 0.6290104692273423, "learning_rate": 1.0671679693032621e-07, "loss": 0.3042, "step": 20684 }, { "epoch": 0.9361846571622539, "grad_norm": 0.5833963754232364, "learning_rate": 1.0656623257434551e-07, "loss": 0.3001, "step": 20685 }, { "epoch": 0.9362299162706494, "grad_norm": 0.2653414681907125, "learning_rate": 1.0641577336322761e-07, "loss": 0.4641, "step": 20686 }, { "epoch": 0.936275175379045, "grad_norm": 0.271146481973984, "learning_rate": 1.0626541930020551e-07, "loss": 0.4616, "step": 20687 }, { "epoch": 0.9363204344874406, "grad_norm": 0.6061218869882479, "learning_rate": 1.0611517038850938e-07, "loss": 0.2942, "step": 20688 }, { "epoch": 0.9363656935958362, "grad_norm": 0.6484075906920178, "learning_rate": 1.0596502663136776e-07, "loss": 0.305, "step": 20689 }, { "epoch": 0.9364109527042317, "grad_norm": 0.6201439884226438, "learning_rate": 1.0581498803200696e-07, "loss": 0.2868, "step": 20690 }, { "epoch": 0.9364562118126273, "grad_norm": 0.26360757034570326, "learning_rate": 1.0566505459365106e-07, "loss": 0.4628, "step": 20691 }, { "epoch": 0.9365014709210229, "grad_norm": 0.2568003036889309, "learning_rate": 1.0551522631952083e-07, "loss": 0.4424, "step": 20692 }, { "epoch": 0.9365467300294185, "grad_norm": 0.670806761610968, "learning_rate": 1.0536550321283589e-07, "loss": 0.2877, "step": 20693 }, { "epoch": 0.9365919891378139, "grad_norm": 0.26251251174654255, "learning_rate": 1.0521588527681426e-07, "loss": 0.461, "step": 20694 }, { "epoch": 0.9366372482462095, "grad_norm": 0.62146968516843, "learning_rate": 1.0506637251467e-07, "loss": 0.3156, "step": 20695 }, { "epoch": 0.9366825073546051, "grad_norm": 0.6042091551748556, "learning_rate": 1.0491696492961501e-07, "loss": 0.2895, "step": 20696 }, { "epoch": 0.9367277664630007, "grad_norm": 0.6251271004901205, "learning_rate": 1.0476766252486114e-07, "loss": 0.3159, "step": 20697 }, { "epoch": 0.9367730255713962, "grad_norm": 0.7420180169744252, "learning_rate": 1.046184653036153e-07, "loss": 0.2795, "step": 20698 }, { "epoch": 0.9368182846797918, "grad_norm": 0.5907238212284458, "learning_rate": 1.044693732690838e-07, "loss": 0.2615, "step": 20699 }, { "epoch": 0.9368635437881874, "grad_norm": 0.5952101244183566, "learning_rate": 1.0432038642446962e-07, "loss": 0.3017, "step": 20700 }, { "epoch": 0.936908802896583, "grad_norm": 0.6378778314486471, "learning_rate": 1.0417150477297466e-07, "loss": 0.2927, "step": 20701 }, { "epoch": 0.9369540620049785, "grad_norm": 0.6421025040459815, "learning_rate": 1.0402272831779747e-07, "loss": 0.2539, "step": 20702 }, { "epoch": 0.936999321113374, "grad_norm": 0.6106293661526145, "learning_rate": 1.038740570621355e-07, "loss": 0.2801, "step": 20703 }, { "epoch": 0.9370445802217696, "grad_norm": 0.5933251946238154, "learning_rate": 1.0372549100918283e-07, "loss": 0.2888, "step": 20704 }, { "epoch": 0.9370898393301652, "grad_norm": 0.6368085204757286, "learning_rate": 1.0357703016213083e-07, "loss": 0.3189, "step": 20705 }, { "epoch": 0.9371350984385608, "grad_norm": 0.8117745628825518, "learning_rate": 1.0342867452417027e-07, "loss": 0.2458, "step": 20706 }, { "epoch": 0.9371803575469563, "grad_norm": 0.593644986221223, "learning_rate": 1.0328042409849026e-07, "loss": 0.3155, "step": 20707 }, { "epoch": 0.9372256166553519, "grad_norm": 0.6291442440715559, "learning_rate": 1.0313227888827326e-07, "loss": 0.314, "step": 20708 }, { "epoch": 0.9372708757637475, "grad_norm": 0.5833471343156812, "learning_rate": 1.0298423889670395e-07, "loss": 0.3022, "step": 20709 }, { "epoch": 0.937316134872143, "grad_norm": 0.6802737846945168, "learning_rate": 1.0283630412696422e-07, "loss": 0.3423, "step": 20710 }, { "epoch": 0.9373613939805386, "grad_norm": 0.6108634600364936, "learning_rate": 1.0268847458223152e-07, "loss": 0.2871, "step": 20711 }, { "epoch": 0.9374066530889341, "grad_norm": 0.6103729859109673, "learning_rate": 1.0254075026568222e-07, "loss": 0.2992, "step": 20712 }, { "epoch": 0.9374519121973297, "grad_norm": 0.68933196891107, "learning_rate": 1.0239313118049155e-07, "loss": 0.3295, "step": 20713 }, { "epoch": 0.9374971713057253, "grad_norm": 0.6171769175551642, "learning_rate": 1.0224561732982973e-07, "loss": 0.3062, "step": 20714 }, { "epoch": 0.9375424304141209, "grad_norm": 0.6477669112440486, "learning_rate": 1.0209820871686816e-07, "loss": 0.2867, "step": 20715 }, { "epoch": 0.9375876895225164, "grad_norm": 0.5893855899352083, "learning_rate": 1.0195090534477258e-07, "loss": 0.2634, "step": 20716 }, { "epoch": 0.937632948630912, "grad_norm": 0.5858031388266357, "learning_rate": 1.0180370721670941e-07, "loss": 0.29, "step": 20717 }, { "epoch": 0.9376782077393075, "grad_norm": 0.6179295544108785, "learning_rate": 1.0165661433583996e-07, "loss": 0.2734, "step": 20718 }, { "epoch": 0.9377234668477031, "grad_norm": 0.588071945852166, "learning_rate": 1.0150962670532671e-07, "loss": 0.2639, "step": 20719 }, { "epoch": 0.9377687259560986, "grad_norm": 0.654440332520569, "learning_rate": 1.0136274432832715e-07, "loss": 0.3177, "step": 20720 }, { "epoch": 0.9378139850644942, "grad_norm": 0.6446000731328926, "learning_rate": 1.0121596720799653e-07, "loss": 0.3317, "step": 20721 }, { "epoch": 0.9378592441728898, "grad_norm": 0.6274830605070559, "learning_rate": 1.01069295347489e-07, "loss": 0.2716, "step": 20722 }, { "epoch": 0.9379045032812854, "grad_norm": 0.571998916266495, "learning_rate": 1.00922728749957e-07, "loss": 0.2628, "step": 20723 }, { "epoch": 0.937949762389681, "grad_norm": 0.2695101803328294, "learning_rate": 1.0077626741854973e-07, "loss": 0.4834, "step": 20724 }, { "epoch": 0.9379950214980765, "grad_norm": 0.605191147919929, "learning_rate": 1.0062991135641242e-07, "loss": 0.2976, "step": 20725 }, { "epoch": 0.938040280606472, "grad_norm": 0.6077927199877912, "learning_rate": 1.0048366056669201e-07, "loss": 0.253, "step": 20726 }, { "epoch": 0.9380855397148676, "grad_norm": 0.2903158097699309, "learning_rate": 1.0033751505252987e-07, "loss": 0.4636, "step": 20727 }, { "epoch": 0.9381307988232632, "grad_norm": 0.5420678267629183, "learning_rate": 1.0019147481706626e-07, "loss": 0.2904, "step": 20728 }, { "epoch": 0.9381760579316587, "grad_norm": 0.566633524220132, "learning_rate": 1.0004553986343868e-07, "loss": 0.3297, "step": 20729 }, { "epoch": 0.9382213170400543, "grad_norm": 0.6372132574695685, "learning_rate": 9.989971019478406e-08, "loss": 0.2887, "step": 20730 }, { "epoch": 0.9382665761484499, "grad_norm": 0.5991609239206822, "learning_rate": 9.97539858142349e-08, "loss": 0.2905, "step": 20731 }, { "epoch": 0.9383118352568455, "grad_norm": 0.6303969117656907, "learning_rate": 9.960836672492313e-08, "loss": 0.294, "step": 20732 }, { "epoch": 0.938357094365241, "grad_norm": 0.6249848427120213, "learning_rate": 9.946285292997681e-08, "loss": 0.2889, "step": 20733 }, { "epoch": 0.9384023534736365, "grad_norm": 0.2906943402790951, "learning_rate": 9.931744443252234e-08, "loss": 0.4659, "step": 20734 }, { "epoch": 0.9384476125820321, "grad_norm": 0.601588500629499, "learning_rate": 9.917214123568498e-08, "loss": 0.2762, "step": 20735 }, { "epoch": 0.9384928716904277, "grad_norm": 0.5907351514132653, "learning_rate": 9.902694334258722e-08, "loss": 0.2967, "step": 20736 }, { "epoch": 0.9385381307988233, "grad_norm": 0.6572000147313092, "learning_rate": 9.88818507563477e-08, "loss": 0.2877, "step": 20737 }, { "epoch": 0.9385833899072188, "grad_norm": 0.5992227687883745, "learning_rate": 9.873686348008448e-08, "loss": 0.29, "step": 20738 }, { "epoch": 0.9386286490156144, "grad_norm": 0.578189315166507, "learning_rate": 9.859198151691341e-08, "loss": 0.2909, "step": 20739 }, { "epoch": 0.93867390812401, "grad_norm": 0.6107110327904092, "learning_rate": 9.844720486994752e-08, "loss": 0.2614, "step": 20740 }, { "epoch": 0.9387191672324056, "grad_norm": 0.6555056399941228, "learning_rate": 9.830253354229601e-08, "loss": 0.2667, "step": 20741 }, { "epoch": 0.938764426340801, "grad_norm": 0.6031419210418268, "learning_rate": 9.815796753706975e-08, "loss": 0.2976, "step": 20742 }, { "epoch": 0.9388096854491966, "grad_norm": 0.6368408296601574, "learning_rate": 9.801350685737288e-08, "loss": 0.2762, "step": 20743 }, { "epoch": 0.9388549445575922, "grad_norm": 0.2690041210892171, "learning_rate": 9.786915150631126e-08, "loss": 0.4666, "step": 20744 }, { "epoch": 0.9389002036659878, "grad_norm": 0.2698433370702812, "learning_rate": 9.772490148698522e-08, "loss": 0.4607, "step": 20745 }, { "epoch": 0.9389454627743834, "grad_norm": 0.690187986117633, "learning_rate": 9.758075680249556e-08, "loss": 0.3069, "step": 20746 }, { "epoch": 0.9389907218827789, "grad_norm": 0.5945569657567784, "learning_rate": 9.743671745593819e-08, "loss": 0.2957, "step": 20747 }, { "epoch": 0.9390359809911745, "grad_norm": 0.25335591426758786, "learning_rate": 9.729278345040894e-08, "loss": 0.4828, "step": 20748 }, { "epoch": 0.9390812400995701, "grad_norm": 0.27672363140824363, "learning_rate": 9.714895478900088e-08, "loss": 0.4656, "step": 20749 }, { "epoch": 0.9391264992079656, "grad_norm": 0.6777892512541681, "learning_rate": 9.700523147480267e-08, "loss": 0.3492, "step": 20750 }, { "epoch": 0.9391717583163611, "grad_norm": 0.6734370034072383, "learning_rate": 9.686161351090407e-08, "loss": 0.2834, "step": 20751 }, { "epoch": 0.9392170174247567, "grad_norm": 0.6072316263959454, "learning_rate": 9.671810090039091e-08, "loss": 0.2997, "step": 20752 }, { "epoch": 0.9392622765331523, "grad_norm": 0.7106171343448984, "learning_rate": 9.65746936463463e-08, "loss": 0.2921, "step": 20753 }, { "epoch": 0.9393075356415479, "grad_norm": 0.6342138174196997, "learning_rate": 9.643139175185168e-08, "loss": 0.3073, "step": 20754 }, { "epoch": 0.9393527947499434, "grad_norm": 0.6157424495113403, "learning_rate": 9.628819521998622e-08, "loss": 0.3091, "step": 20755 }, { "epoch": 0.939398053858339, "grad_norm": 0.6602136680439613, "learning_rate": 9.614510405382693e-08, "loss": 0.2859, "step": 20756 }, { "epoch": 0.9394433129667346, "grad_norm": 0.5572487286447867, "learning_rate": 9.600211825644856e-08, "loss": 0.2603, "step": 20757 }, { "epoch": 0.9394885720751301, "grad_norm": 0.261804490907496, "learning_rate": 9.585923783092255e-08, "loss": 0.4781, "step": 20758 }, { "epoch": 0.9395338311835257, "grad_norm": 0.2599046113400412, "learning_rate": 9.571646278032032e-08, "loss": 0.4741, "step": 20759 }, { "epoch": 0.9395790902919212, "grad_norm": 0.6160630272152418, "learning_rate": 9.557379310770831e-08, "loss": 0.3133, "step": 20760 }, { "epoch": 0.9396243494003168, "grad_norm": 0.26545070340134724, "learning_rate": 9.543122881615297e-08, "loss": 0.4767, "step": 20761 }, { "epoch": 0.9396696085087124, "grad_norm": 0.27299182989107457, "learning_rate": 9.528876990871793e-08, "loss": 0.4777, "step": 20762 }, { "epoch": 0.939714867617108, "grad_norm": 0.6274370211652937, "learning_rate": 9.514641638846245e-08, "loss": 0.2666, "step": 20763 }, { "epoch": 0.9397601267255035, "grad_norm": 0.26111906070225804, "learning_rate": 9.500416825844682e-08, "loss": 0.4652, "step": 20764 }, { "epoch": 0.939805385833899, "grad_norm": 0.6329097033253354, "learning_rate": 9.486202552172697e-08, "loss": 0.288, "step": 20765 }, { "epoch": 0.9398506449422946, "grad_norm": 0.25605798885576536, "learning_rate": 9.471998818135764e-08, "loss": 0.451, "step": 20766 }, { "epoch": 0.9398959040506902, "grad_norm": 0.5778371870857809, "learning_rate": 9.457805624038974e-08, "loss": 0.2861, "step": 20767 }, { "epoch": 0.9399411631590857, "grad_norm": 0.6041845447583947, "learning_rate": 9.443622970187415e-08, "loss": 0.2864, "step": 20768 }, { "epoch": 0.9399864222674813, "grad_norm": 0.6005423832736728, "learning_rate": 9.429450856885736e-08, "loss": 0.2925, "step": 20769 }, { "epoch": 0.9400316813758769, "grad_norm": 0.6072185585444965, "learning_rate": 9.415289284438523e-08, "loss": 0.2868, "step": 20770 }, { "epoch": 0.9400769404842725, "grad_norm": 0.5844322503894608, "learning_rate": 9.401138253149977e-08, "loss": 0.2903, "step": 20771 }, { "epoch": 0.9401221995926681, "grad_norm": 0.5930622865313223, "learning_rate": 9.386997763324246e-08, "loss": 0.3265, "step": 20772 }, { "epoch": 0.9401674587010636, "grad_norm": 0.5847618141175167, "learning_rate": 9.372867815265085e-08, "loss": 0.2775, "step": 20773 }, { "epoch": 0.9402127178094591, "grad_norm": 0.2686276723017246, "learning_rate": 9.358748409276196e-08, "loss": 0.4778, "step": 20774 }, { "epoch": 0.9402579769178547, "grad_norm": 0.2631650236530227, "learning_rate": 9.34463954566095e-08, "loss": 0.4637, "step": 20775 }, { "epoch": 0.9403032360262503, "grad_norm": 0.2683016572122838, "learning_rate": 9.330541224722378e-08, "loss": 0.4509, "step": 20776 }, { "epoch": 0.9403484951346458, "grad_norm": 0.7865247979900946, "learning_rate": 9.316453446763518e-08, "loss": 0.2846, "step": 20777 }, { "epoch": 0.9403937542430414, "grad_norm": 0.7200645826034856, "learning_rate": 9.302376212087128e-08, "loss": 0.2904, "step": 20778 }, { "epoch": 0.940439013351437, "grad_norm": 0.24627763004895964, "learning_rate": 9.28830952099552e-08, "loss": 0.4622, "step": 20779 }, { "epoch": 0.9404842724598326, "grad_norm": 0.8043361758815954, "learning_rate": 9.274253373791064e-08, "loss": 0.289, "step": 20780 }, { "epoch": 0.9405295315682282, "grad_norm": 0.6579035360584158, "learning_rate": 9.260207770775742e-08, "loss": 0.2704, "step": 20781 }, { "epoch": 0.9405747906766236, "grad_norm": 0.6009245769666856, "learning_rate": 9.246172712251422e-08, "loss": 0.2917, "step": 20782 }, { "epoch": 0.9406200497850192, "grad_norm": 0.6449447415026867, "learning_rate": 9.23214819851953e-08, "loss": 0.2859, "step": 20783 }, { "epoch": 0.9406653088934148, "grad_norm": 0.7220785813494247, "learning_rate": 9.218134229881548e-08, "loss": 0.2819, "step": 20784 }, { "epoch": 0.9407105680018104, "grad_norm": 0.6103242402975791, "learning_rate": 9.204130806638511e-08, "loss": 0.2776, "step": 20785 }, { "epoch": 0.9407558271102059, "grad_norm": 0.6693346951778233, "learning_rate": 9.190137929091403e-08, "loss": 0.3184, "step": 20786 }, { "epoch": 0.9408010862186015, "grad_norm": 0.2612038012801092, "learning_rate": 9.176155597540759e-08, "loss": 0.4541, "step": 20787 }, { "epoch": 0.9408463453269971, "grad_norm": 0.5896828998572881, "learning_rate": 9.162183812287117e-08, "loss": 0.2876, "step": 20788 }, { "epoch": 0.9408916044353927, "grad_norm": 0.6221679556915928, "learning_rate": 9.148222573630572e-08, "loss": 0.2627, "step": 20789 }, { "epoch": 0.9409368635437881, "grad_norm": 0.6494938453249083, "learning_rate": 9.13427188187127e-08, "loss": 0.3288, "step": 20790 }, { "epoch": 0.9409821226521837, "grad_norm": 0.2865355009159929, "learning_rate": 9.120331737308919e-08, "loss": 0.453, "step": 20791 }, { "epoch": 0.9410273817605793, "grad_norm": 0.6116692277411361, "learning_rate": 9.106402140242943e-08, "loss": 0.3156, "step": 20792 }, { "epoch": 0.9410726408689749, "grad_norm": 0.5744247951556596, "learning_rate": 9.092483090972714e-08, "loss": 0.2678, "step": 20793 }, { "epoch": 0.9411178999773705, "grad_norm": 0.5521819457940438, "learning_rate": 9.078574589797329e-08, "loss": 0.2615, "step": 20794 }, { "epoch": 0.941163159085766, "grad_norm": 0.9947077639779605, "learning_rate": 9.064676637015656e-08, "loss": 0.2676, "step": 20795 }, { "epoch": 0.9412084181941616, "grad_norm": 0.28656657329167173, "learning_rate": 9.050789232926293e-08, "loss": 0.4613, "step": 20796 }, { "epoch": 0.9412536773025572, "grad_norm": 0.6196670981555568, "learning_rate": 9.036912377827611e-08, "loss": 0.2707, "step": 20797 }, { "epoch": 0.9412989364109527, "grad_norm": 0.6390512291005944, "learning_rate": 9.023046072017761e-08, "loss": 0.2722, "step": 20798 }, { "epoch": 0.9413441955193482, "grad_norm": 0.6302381460021429, "learning_rate": 9.009190315794835e-08, "loss": 0.308, "step": 20799 }, { "epoch": 0.9413894546277438, "grad_norm": 0.6638656744628239, "learning_rate": 8.995345109456377e-08, "loss": 0.2708, "step": 20800 }, { "epoch": 0.9414347137361394, "grad_norm": 0.6004928538328558, "learning_rate": 8.981510453299925e-08, "loss": 0.3135, "step": 20801 }, { "epoch": 0.941479972844535, "grad_norm": 0.6447108416431894, "learning_rate": 8.967686347622795e-08, "loss": 0.3093, "step": 20802 }, { "epoch": 0.9415252319529305, "grad_norm": 0.5841072682650577, "learning_rate": 8.953872792722029e-08, "loss": 0.2828, "step": 20803 }, { "epoch": 0.9415704910613261, "grad_norm": 0.26468154786651854, "learning_rate": 8.940069788894389e-08, "loss": 0.4916, "step": 20804 }, { "epoch": 0.9416157501697217, "grad_norm": 0.6191241165888246, "learning_rate": 8.926277336436417e-08, "loss": 0.2887, "step": 20805 }, { "epoch": 0.9416610092781172, "grad_norm": 0.5628097473321694, "learning_rate": 8.912495435644542e-08, "loss": 0.2721, "step": 20806 }, { "epoch": 0.9417062683865128, "grad_norm": 0.6086306229708609, "learning_rate": 8.898724086814969e-08, "loss": 0.2938, "step": 20807 }, { "epoch": 0.9417515274949083, "grad_norm": 0.6401280558876904, "learning_rate": 8.88496329024341e-08, "loss": 0.3095, "step": 20808 }, { "epoch": 0.9417967866033039, "grad_norm": 0.616183908277212, "learning_rate": 8.87121304622568e-08, "loss": 0.3041, "step": 20809 }, { "epoch": 0.9418420457116995, "grad_norm": 0.5798350520600436, "learning_rate": 8.857473355057211e-08, "loss": 0.3106, "step": 20810 }, { "epoch": 0.9418873048200951, "grad_norm": 0.2668041636397088, "learning_rate": 8.843744217033212e-08, "loss": 0.4618, "step": 20811 }, { "epoch": 0.9419325639284906, "grad_norm": 0.5873337668301859, "learning_rate": 8.83002563244867e-08, "loss": 0.3185, "step": 20812 }, { "epoch": 0.9419778230368862, "grad_norm": 0.5872641247341023, "learning_rate": 8.816317601598346e-08, "loss": 0.2823, "step": 20813 }, { "epoch": 0.9420230821452817, "grad_norm": 0.6006640705752363, "learning_rate": 8.802620124776784e-08, "loss": 0.3097, "step": 20814 }, { "epoch": 0.9420683412536773, "grad_norm": 0.5830047965432855, "learning_rate": 8.78893320227836e-08, "loss": 0.2669, "step": 20815 }, { "epoch": 0.9421136003620728, "grad_norm": 0.6608002252496344, "learning_rate": 8.775256834397117e-08, "loss": 0.2908, "step": 20816 }, { "epoch": 0.9421588594704684, "grad_norm": 0.6022366659295527, "learning_rate": 8.761591021426929e-08, "loss": 0.2896, "step": 20817 }, { "epoch": 0.942204118578864, "grad_norm": 0.5838450301006088, "learning_rate": 8.747935763661397e-08, "loss": 0.2679, "step": 20818 }, { "epoch": 0.9422493776872596, "grad_norm": 0.5875361382398744, "learning_rate": 8.734291061394006e-08, "loss": 0.279, "step": 20819 }, { "epoch": 0.9422946367956552, "grad_norm": 0.5922955937040799, "learning_rate": 8.720656914917858e-08, "loss": 0.2504, "step": 20820 }, { "epoch": 0.9423398959040507, "grad_norm": 0.5978814440316839, "learning_rate": 8.707033324525937e-08, "loss": 0.2884, "step": 20821 }, { "epoch": 0.9423851550124462, "grad_norm": 0.5789905148749843, "learning_rate": 8.693420290510957e-08, "loss": 0.2639, "step": 20822 }, { "epoch": 0.9424304141208418, "grad_norm": 0.6163399307257849, "learning_rate": 8.679817813165514e-08, "loss": 0.2994, "step": 20823 }, { "epoch": 0.9424756732292374, "grad_norm": 0.8125102862013436, "learning_rate": 8.666225892781765e-08, "loss": 0.3021, "step": 20824 }, { "epoch": 0.9425209323376329, "grad_norm": 0.5710581587433058, "learning_rate": 8.65264452965181e-08, "loss": 0.2927, "step": 20825 }, { "epoch": 0.9425661914460285, "grad_norm": 0.6127986569841006, "learning_rate": 8.63907372406747e-08, "loss": 0.2423, "step": 20826 }, { "epoch": 0.9426114505544241, "grad_norm": 1.494464415261979, "learning_rate": 8.625513476320291e-08, "loss": 0.2586, "step": 20827 }, { "epoch": 0.9426567096628197, "grad_norm": 0.6249746398755778, "learning_rate": 8.61196378670176e-08, "loss": 0.2899, "step": 20828 }, { "epoch": 0.9427019687712153, "grad_norm": 0.6167952244077558, "learning_rate": 8.598424655502868e-08, "loss": 0.2926, "step": 20829 }, { "epoch": 0.9427472278796107, "grad_norm": 0.2539420689861876, "learning_rate": 8.584896083014715e-08, "loss": 0.4542, "step": 20830 }, { "epoch": 0.9427924869880063, "grad_norm": 0.6555364937207958, "learning_rate": 8.571378069527792e-08, "loss": 0.2907, "step": 20831 }, { "epoch": 0.9428377460964019, "grad_norm": 0.6597898779039076, "learning_rate": 8.557870615332642e-08, "loss": 0.3222, "step": 20832 }, { "epoch": 0.9428830052047975, "grad_norm": 0.5962301735572672, "learning_rate": 8.54437372071959e-08, "loss": 0.2518, "step": 20833 }, { "epoch": 0.942928264313193, "grad_norm": 0.6429082872294095, "learning_rate": 8.53088738597846e-08, "loss": 0.3197, "step": 20834 }, { "epoch": 0.9429735234215886, "grad_norm": 0.5912302009849205, "learning_rate": 8.517411611399129e-08, "loss": 0.3089, "step": 20835 }, { "epoch": 0.9430187825299842, "grad_norm": 0.25971197451149486, "learning_rate": 8.503946397271257e-08, "loss": 0.4697, "step": 20836 }, { "epoch": 0.9430640416383798, "grad_norm": 0.6035761663858425, "learning_rate": 8.490491743883944e-08, "loss": 0.2927, "step": 20837 }, { "epoch": 0.9431093007467752, "grad_norm": 0.6500550482103429, "learning_rate": 8.47704765152646e-08, "loss": 0.3457, "step": 20838 }, { "epoch": 0.9431545598551708, "grad_norm": 0.6167534463037921, "learning_rate": 8.463614120487629e-08, "loss": 0.3086, "step": 20839 }, { "epoch": 0.9431998189635664, "grad_norm": 0.6201984186786818, "learning_rate": 8.450191151056054e-08, "loss": 0.324, "step": 20840 }, { "epoch": 0.943245078071962, "grad_norm": 0.6488985759841122, "learning_rate": 8.436778743520225e-08, "loss": 0.2899, "step": 20841 }, { "epoch": 0.9432903371803576, "grad_norm": 0.6156545222452543, "learning_rate": 8.423376898168246e-08, "loss": 0.2632, "step": 20842 }, { "epoch": 0.9433355962887531, "grad_norm": 0.6573285195409124, "learning_rate": 8.409985615288218e-08, "loss": 0.3041, "step": 20843 }, { "epoch": 0.9433808553971487, "grad_norm": 0.6037230252645684, "learning_rate": 8.396604895167748e-08, "loss": 0.2964, "step": 20844 }, { "epoch": 0.9434261145055443, "grad_norm": 0.5870445554468589, "learning_rate": 8.383234738094381e-08, "loss": 0.3154, "step": 20845 }, { "epoch": 0.9434713736139398, "grad_norm": 0.5362564404723827, "learning_rate": 8.3698751443555e-08, "loss": 0.2493, "step": 20846 }, { "epoch": 0.9435166327223353, "grad_norm": 0.6489866126328017, "learning_rate": 8.356526114237983e-08, "loss": 0.3024, "step": 20847 }, { "epoch": 0.9435618918307309, "grad_norm": 0.2631653817827076, "learning_rate": 8.343187648028772e-08, "loss": 0.4851, "step": 20848 }, { "epoch": 0.9436071509391265, "grad_norm": 0.6463116506629104, "learning_rate": 8.329859746014468e-08, "loss": 0.2714, "step": 20849 }, { "epoch": 0.9436524100475221, "grad_norm": 0.5690519206908605, "learning_rate": 8.316542408481398e-08, "loss": 0.2543, "step": 20850 }, { "epoch": 0.9436976691559176, "grad_norm": 0.5890163713323255, "learning_rate": 8.303235635715723e-08, "loss": 0.2665, "step": 20851 }, { "epoch": 0.9437429282643132, "grad_norm": 0.6416623026573095, "learning_rate": 8.289939428003491e-08, "loss": 0.3173, "step": 20852 }, { "epoch": 0.9437881873727088, "grad_norm": 0.5765374229975274, "learning_rate": 8.276653785630195e-08, "loss": 0.2747, "step": 20853 }, { "epoch": 0.9438334464811043, "grad_norm": 0.25657991387142726, "learning_rate": 8.263378708881443e-08, "loss": 0.4645, "step": 20854 }, { "epoch": 0.9438787055894999, "grad_norm": 0.5873869400501082, "learning_rate": 8.250114198042392e-08, "loss": 0.2769, "step": 20855 }, { "epoch": 0.9439239646978954, "grad_norm": 0.5907343998929974, "learning_rate": 8.236860253398094e-08, "loss": 0.3104, "step": 20856 }, { "epoch": 0.943969223806291, "grad_norm": 0.6388619579014826, "learning_rate": 8.223616875233376e-08, "loss": 0.3117, "step": 20857 }, { "epoch": 0.9440144829146866, "grad_norm": 0.6244062370435659, "learning_rate": 8.210384063832678e-08, "loss": 0.2738, "step": 20858 }, { "epoch": 0.9440597420230822, "grad_norm": 0.6299226406317449, "learning_rate": 8.197161819480493e-08, "loss": 0.2964, "step": 20859 }, { "epoch": 0.9441050011314777, "grad_norm": 0.7781736528219134, "learning_rate": 8.183950142460761e-08, "loss": 0.2986, "step": 20860 }, { "epoch": 0.9441502602398733, "grad_norm": 0.5846529738886026, "learning_rate": 8.170749033057534e-08, "loss": 0.2877, "step": 20861 }, { "epoch": 0.9441955193482688, "grad_norm": 0.5571171188715623, "learning_rate": 8.157558491554306e-08, "loss": 0.2746, "step": 20862 }, { "epoch": 0.9442407784566644, "grad_norm": 0.6518076238522412, "learning_rate": 8.144378518234574e-08, "loss": 0.2819, "step": 20863 }, { "epoch": 0.94428603756506, "grad_norm": 0.6662587604637679, "learning_rate": 8.131209113381556e-08, "loss": 0.2865, "step": 20864 }, { "epoch": 0.9443312966734555, "grad_norm": 0.28615234380710475, "learning_rate": 8.118050277278245e-08, "loss": 0.4741, "step": 20865 }, { "epoch": 0.9443765557818511, "grad_norm": 0.6165350758593174, "learning_rate": 8.104902010207249e-08, "loss": 0.2808, "step": 20866 }, { "epoch": 0.9444218148902467, "grad_norm": 0.945412991945462, "learning_rate": 8.091764312451122e-08, "loss": 0.2474, "step": 20867 }, { "epoch": 0.9444670739986423, "grad_norm": 0.6484266098431832, "learning_rate": 8.078637184292304e-08, "loss": 0.3092, "step": 20868 }, { "epoch": 0.9445123331070377, "grad_norm": 0.5862338401789485, "learning_rate": 8.065520626012735e-08, "loss": 0.3084, "step": 20869 }, { "epoch": 0.9445575922154333, "grad_norm": 0.6182423665804847, "learning_rate": 8.052414637894246e-08, "loss": 0.2862, "step": 20870 }, { "epoch": 0.9446028513238289, "grad_norm": 0.6010714458374049, "learning_rate": 8.039319220218444e-08, "loss": 0.3038, "step": 20871 }, { "epoch": 0.9446481104322245, "grad_norm": 0.6317935772444221, "learning_rate": 8.026234373266773e-08, "loss": 0.2724, "step": 20872 }, { "epoch": 0.94469336954062, "grad_norm": 0.5788671859175848, "learning_rate": 8.013160097320339e-08, "loss": 0.2702, "step": 20873 }, { "epoch": 0.9447386286490156, "grad_norm": 0.24808045688663874, "learning_rate": 8.000096392660029e-08, "loss": 0.4617, "step": 20874 }, { "epoch": 0.9447838877574112, "grad_norm": 0.5853744341393615, "learning_rate": 7.987043259566618e-08, "loss": 0.3128, "step": 20875 }, { "epoch": 0.9448291468658068, "grad_norm": 0.5850339197503641, "learning_rate": 7.974000698320495e-08, "loss": 0.2834, "step": 20876 }, { "epoch": 0.9448744059742024, "grad_norm": 0.23458010581167918, "learning_rate": 7.960968709202044e-08, "loss": 0.4502, "step": 20877 }, { "epoch": 0.9449196650825978, "grad_norm": 0.6946033096154024, "learning_rate": 7.947947292491154e-08, "loss": 0.3207, "step": 20878 }, { "epoch": 0.9449649241909934, "grad_norm": 0.5948525066627732, "learning_rate": 7.9349364484676e-08, "loss": 0.2688, "step": 20879 }, { "epoch": 0.945010183299389, "grad_norm": 0.585106418093436, "learning_rate": 7.921936177411049e-08, "loss": 0.3158, "step": 20880 }, { "epoch": 0.9450554424077846, "grad_norm": 0.7232573710512356, "learning_rate": 7.908946479600777e-08, "loss": 0.2977, "step": 20881 }, { "epoch": 0.9451007015161801, "grad_norm": 0.6729210253022881, "learning_rate": 7.895967355315948e-08, "loss": 0.3173, "step": 20882 }, { "epoch": 0.9451459606245757, "grad_norm": 0.5913024246309978, "learning_rate": 7.88299880483534e-08, "loss": 0.2871, "step": 20883 }, { "epoch": 0.9451912197329713, "grad_norm": 0.6504311777673383, "learning_rate": 7.870040828437675e-08, "loss": 0.2656, "step": 20884 }, { "epoch": 0.9452364788413669, "grad_norm": 0.6857428537796664, "learning_rate": 7.857093426401397e-08, "loss": 0.2983, "step": 20885 }, { "epoch": 0.9452817379497623, "grad_norm": 0.6297503614601261, "learning_rate": 7.844156599004671e-08, "loss": 0.3062, "step": 20886 }, { "epoch": 0.9453269970581579, "grad_norm": 0.5954023304747791, "learning_rate": 7.831230346525443e-08, "loss": 0.2848, "step": 20887 }, { "epoch": 0.9453722561665535, "grad_norm": 0.26616954929128794, "learning_rate": 7.818314669241544e-08, "loss": 0.4977, "step": 20888 }, { "epoch": 0.9454175152749491, "grad_norm": 0.6184736597521032, "learning_rate": 7.805409567430367e-08, "loss": 0.2996, "step": 20889 }, { "epoch": 0.9454627743833447, "grad_norm": 0.25006736595217566, "learning_rate": 7.792515041369353e-08, "loss": 0.4563, "step": 20890 }, { "epoch": 0.9455080334917402, "grad_norm": 0.5676641219555973, "learning_rate": 7.779631091335505e-08, "loss": 0.3086, "step": 20891 }, { "epoch": 0.9455532926001358, "grad_norm": 0.2454819629724571, "learning_rate": 7.7667577176056e-08, "loss": 0.4475, "step": 20892 }, { "epoch": 0.9455985517085314, "grad_norm": 0.7133383522561514, "learning_rate": 7.753894920456251e-08, "loss": 0.265, "step": 20893 }, { "epoch": 0.9456438108169269, "grad_norm": 0.7529118268346086, "learning_rate": 7.741042700164014e-08, "loss": 0.3141, "step": 20894 }, { "epoch": 0.9456890699253224, "grad_norm": 0.6052921223708356, "learning_rate": 7.72820105700478e-08, "loss": 0.2876, "step": 20895 }, { "epoch": 0.945734329033718, "grad_norm": 0.7209021765111919, "learning_rate": 7.715369991254662e-08, "loss": 0.2644, "step": 20896 }, { "epoch": 0.9457795881421136, "grad_norm": 0.5757050535425359, "learning_rate": 7.702549503189272e-08, "loss": 0.3039, "step": 20897 }, { "epoch": 0.9458248472505092, "grad_norm": 0.2770258305772582, "learning_rate": 7.689739593084166e-08, "loss": 0.4918, "step": 20898 }, { "epoch": 0.9458701063589048, "grad_norm": 0.22921896598491626, "learning_rate": 7.676940261214516e-08, "loss": 0.4559, "step": 20899 }, { "epoch": 0.9459153654673003, "grad_norm": 0.276942567838856, "learning_rate": 7.664151507855378e-08, "loss": 0.4843, "step": 20900 }, { "epoch": 0.9459606245756959, "grad_norm": 0.605310768362177, "learning_rate": 7.651373333281532e-08, "loss": 0.314, "step": 20901 }, { "epoch": 0.9460058836840914, "grad_norm": 0.6351944506005454, "learning_rate": 7.638605737767534e-08, "loss": 0.2826, "step": 20902 }, { "epoch": 0.946051142792487, "grad_norm": 0.6064184675760638, "learning_rate": 7.625848721587725e-08, "loss": 0.2872, "step": 20903 }, { "epoch": 0.9460964019008825, "grad_norm": 0.6023337318092542, "learning_rate": 7.613102285016216e-08, "loss": 0.2878, "step": 20904 }, { "epoch": 0.9461416610092781, "grad_norm": 0.5897002049598271, "learning_rate": 7.600366428326845e-08, "loss": 0.3028, "step": 20905 }, { "epoch": 0.9461869201176737, "grad_norm": 0.5920218219632091, "learning_rate": 7.58764115179339e-08, "loss": 0.2779, "step": 20906 }, { "epoch": 0.9462321792260693, "grad_norm": 0.5523652834187022, "learning_rate": 7.574926455689136e-08, "loss": 0.2515, "step": 20907 }, { "epoch": 0.9462774383344648, "grad_norm": 0.5483838828103694, "learning_rate": 7.562222340287362e-08, "loss": 0.2723, "step": 20908 }, { "epoch": 0.9463226974428603, "grad_norm": 0.6156417525577635, "learning_rate": 7.549528805861017e-08, "loss": 0.2727, "step": 20909 }, { "epoch": 0.9463679565512559, "grad_norm": 0.6154673128491037, "learning_rate": 7.536845852682884e-08, "loss": 0.3334, "step": 20910 }, { "epoch": 0.9464132156596515, "grad_norm": 0.5543715790612631, "learning_rate": 7.52417348102541e-08, "loss": 0.2951, "step": 20911 }, { "epoch": 0.9464584747680471, "grad_norm": 0.5990124247903211, "learning_rate": 7.511511691160933e-08, "loss": 0.2926, "step": 20912 }, { "epoch": 0.9465037338764426, "grad_norm": 0.6182521099000854, "learning_rate": 7.498860483361459e-08, "loss": 0.2876, "step": 20913 }, { "epoch": 0.9465489929848382, "grad_norm": 0.6492851657534132, "learning_rate": 7.486219857898935e-08, "loss": 0.307, "step": 20914 }, { "epoch": 0.9465942520932338, "grad_norm": 1.1963892970319248, "learning_rate": 7.473589815044924e-08, "loss": 0.2528, "step": 20915 }, { "epoch": 0.9466395112016294, "grad_norm": 0.6083346740598554, "learning_rate": 7.460970355070763e-08, "loss": 0.2931, "step": 20916 }, { "epoch": 0.9466847703100248, "grad_norm": 0.7154186931695922, "learning_rate": 7.448361478247624e-08, "loss": 0.2864, "step": 20917 }, { "epoch": 0.9467300294184204, "grad_norm": 0.28426325171555816, "learning_rate": 7.4357631848464e-08, "loss": 0.4749, "step": 20918 }, { "epoch": 0.946775288526816, "grad_norm": 0.6014997826042021, "learning_rate": 7.423175475137934e-08, "loss": 0.3012, "step": 20919 }, { "epoch": 0.9468205476352116, "grad_norm": 0.28871653746278625, "learning_rate": 7.410598349392506e-08, "loss": 0.4796, "step": 20920 }, { "epoch": 0.9468658067436071, "grad_norm": 0.2493791500104858, "learning_rate": 7.398031807880456e-08, "loss": 0.4565, "step": 20921 }, { "epoch": 0.9469110658520027, "grad_norm": 0.6420186837671348, "learning_rate": 7.385475850871793e-08, "loss": 0.2726, "step": 20922 }, { "epoch": 0.9469563249603983, "grad_norm": 0.7493605256465465, "learning_rate": 7.372930478636353e-08, "loss": 0.3089, "step": 20923 }, { "epoch": 0.9470015840687939, "grad_norm": 0.6441946120542986, "learning_rate": 7.360395691443644e-08, "loss": 0.2867, "step": 20924 }, { "epoch": 0.9470468431771895, "grad_norm": 0.6416847443775258, "learning_rate": 7.347871489562952e-08, "loss": 0.2897, "step": 20925 }, { "epoch": 0.9470921022855849, "grad_norm": 0.6957522478918147, "learning_rate": 7.335357873263449e-08, "loss": 0.3384, "step": 20926 }, { "epoch": 0.9471373613939805, "grad_norm": 0.6612940508036357, "learning_rate": 7.322854842814031e-08, "loss": 0.3174, "step": 20927 }, { "epoch": 0.9471826205023761, "grad_norm": 0.2789703973879626, "learning_rate": 7.310362398483262e-08, "loss": 0.4583, "step": 20928 }, { "epoch": 0.9472278796107717, "grad_norm": 0.6310776014752988, "learning_rate": 7.297880540539648e-08, "loss": 0.31, "step": 20929 }, { "epoch": 0.9472731387191672, "grad_norm": 0.6304965415205801, "learning_rate": 7.28540926925142e-08, "loss": 0.2681, "step": 20930 }, { "epoch": 0.9473183978275628, "grad_norm": 0.2765236675299349, "learning_rate": 7.27294858488642e-08, "loss": 0.4656, "step": 20931 }, { "epoch": 0.9473636569359584, "grad_norm": 0.6720546270612172, "learning_rate": 7.260498487712487e-08, "loss": 0.3207, "step": 20932 }, { "epoch": 0.947408916044354, "grad_norm": 0.627060354808679, "learning_rate": 7.24805897799713e-08, "loss": 0.2713, "step": 20933 }, { "epoch": 0.9474541751527495, "grad_norm": 0.5966061175146216, "learning_rate": 7.23563005600758e-08, "loss": 0.2702, "step": 20934 }, { "epoch": 0.947499434261145, "grad_norm": 0.6318013396150959, "learning_rate": 7.223211722010959e-08, "loss": 0.311, "step": 20935 }, { "epoch": 0.9475446933695406, "grad_norm": 0.5954183406323375, "learning_rate": 7.21080397627405e-08, "loss": 0.2959, "step": 20936 }, { "epoch": 0.9475899524779362, "grad_norm": 0.6338239230437382, "learning_rate": 7.198406819063419e-08, "loss": 0.3436, "step": 20937 }, { "epoch": 0.9476352115863318, "grad_norm": 0.2799655198032767, "learning_rate": 7.186020250645576e-08, "loss": 0.4425, "step": 20938 }, { "epoch": 0.9476804706947273, "grad_norm": 0.5993272085024067, "learning_rate": 7.173644271286584e-08, "loss": 0.2909, "step": 20939 }, { "epoch": 0.9477257298031229, "grad_norm": 0.6036991084042239, "learning_rate": 7.161278881252398e-08, "loss": 0.2581, "step": 20940 }, { "epoch": 0.9477709889115185, "grad_norm": 0.5657049971902813, "learning_rate": 7.14892408080864e-08, "loss": 0.292, "step": 20941 }, { "epoch": 0.947816248019914, "grad_norm": 0.619094047430233, "learning_rate": 7.136579870220817e-08, "loss": 0.2978, "step": 20942 }, { "epoch": 0.9478615071283095, "grad_norm": 0.6030486256087684, "learning_rate": 7.124246249754218e-08, "loss": 0.2904, "step": 20943 }, { "epoch": 0.9479067662367051, "grad_norm": 0.275658799458521, "learning_rate": 7.1119232196738e-08, "loss": 0.4689, "step": 20944 }, { "epoch": 0.9479520253451007, "grad_norm": 0.3242225239131589, "learning_rate": 7.099610780244348e-08, "loss": 0.4701, "step": 20945 }, { "epoch": 0.9479972844534963, "grad_norm": 0.6053260765775155, "learning_rate": 7.087308931730485e-08, "loss": 0.2765, "step": 20946 }, { "epoch": 0.9480425435618919, "grad_norm": 0.5822627920801777, "learning_rate": 7.075017674396445e-08, "loss": 0.283, "step": 20947 }, { "epoch": 0.9480878026702874, "grad_norm": 0.6450773095173824, "learning_rate": 7.062737008506404e-08, "loss": 0.283, "step": 20948 }, { "epoch": 0.948133061778683, "grad_norm": 0.6037121474357465, "learning_rate": 7.050466934324207e-08, "loss": 0.3242, "step": 20949 }, { "epoch": 0.9481783208870785, "grad_norm": 0.5702545627996378, "learning_rate": 7.038207452113422e-08, "loss": 0.2438, "step": 20950 }, { "epoch": 0.9482235799954741, "grad_norm": 0.5819989483938457, "learning_rate": 7.025958562137559e-08, "loss": 0.291, "step": 20951 }, { "epoch": 0.9482688391038696, "grad_norm": 0.6063900047506062, "learning_rate": 7.013720264659851e-08, "loss": 0.2897, "step": 20952 }, { "epoch": 0.9483140982122652, "grad_norm": 0.25345238884624277, "learning_rate": 7.001492559943201e-08, "loss": 0.4894, "step": 20953 }, { "epoch": 0.9483593573206608, "grad_norm": 0.607414809250055, "learning_rate": 6.989275448250288e-08, "loss": 0.3302, "step": 20954 }, { "epoch": 0.9484046164290564, "grad_norm": 0.6563703110364023, "learning_rate": 6.977068929843678e-08, "loss": 0.2831, "step": 20955 }, { "epoch": 0.9484498755374519, "grad_norm": 0.6503675742457773, "learning_rate": 6.964873004985717e-08, "loss": 0.2811, "step": 20956 }, { "epoch": 0.9484951346458474, "grad_norm": 0.6344037115955137, "learning_rate": 6.952687673938363e-08, "loss": 0.3489, "step": 20957 }, { "epoch": 0.948540393754243, "grad_norm": 0.5878882777015434, "learning_rate": 6.940512936963461e-08, "loss": 0.2568, "step": 20958 }, { "epoch": 0.9485856528626386, "grad_norm": 0.5402159930154613, "learning_rate": 6.928348794322637e-08, "loss": 0.2659, "step": 20959 }, { "epoch": 0.9486309119710342, "grad_norm": 0.5676910831484602, "learning_rate": 6.916195246277291e-08, "loss": 0.2994, "step": 20960 }, { "epoch": 0.9486761710794297, "grad_norm": 0.6005688409644764, "learning_rate": 6.904052293088437e-08, "loss": 0.2892, "step": 20961 }, { "epoch": 0.9487214301878253, "grad_norm": 0.5844076062105606, "learning_rate": 6.891919935017089e-08, "loss": 0.2766, "step": 20962 }, { "epoch": 0.9487666892962209, "grad_norm": 0.2680542891543253, "learning_rate": 6.879798172323926e-08, "loss": 0.4712, "step": 20963 }, { "epoch": 0.9488119484046165, "grad_norm": 1.233302978572291, "learning_rate": 6.867687005269408e-08, "loss": 0.274, "step": 20964 }, { "epoch": 0.948857207513012, "grad_norm": 0.5678739452683957, "learning_rate": 6.855586434113771e-08, "loss": 0.2768, "step": 20965 }, { "epoch": 0.9489024666214075, "grad_norm": 0.6447409783343343, "learning_rate": 6.843496459116917e-08, "loss": 0.2816, "step": 20966 }, { "epoch": 0.9489477257298031, "grad_norm": 0.6330282871837848, "learning_rate": 6.83141708053875e-08, "loss": 0.2733, "step": 20967 }, { "epoch": 0.9489929848381987, "grad_norm": 0.2887291296955998, "learning_rate": 6.819348298638839e-08, "loss": 0.481, "step": 20968 }, { "epoch": 0.9490382439465943, "grad_norm": 0.6455807936183818, "learning_rate": 6.807290113676423e-08, "loss": 0.3026, "step": 20969 }, { "epoch": 0.9490835030549898, "grad_norm": 0.5974785045800158, "learning_rate": 6.795242525910573e-08, "loss": 0.2907, "step": 20970 }, { "epoch": 0.9491287621633854, "grad_norm": 0.5544173397992318, "learning_rate": 6.783205535600191e-08, "loss": 0.3221, "step": 20971 }, { "epoch": 0.949174021271781, "grad_norm": 0.6038550671982219, "learning_rate": 6.771179143003958e-08, "loss": 0.3031, "step": 20972 }, { "epoch": 0.9492192803801766, "grad_norm": 0.6218445340632932, "learning_rate": 6.759163348380282e-08, "loss": 0.2946, "step": 20973 }, { "epoch": 0.949264539488572, "grad_norm": 0.2522104366288927, "learning_rate": 6.747158151987232e-08, "loss": 0.4569, "step": 20974 }, { "epoch": 0.9493097985969676, "grad_norm": 0.2725387958153841, "learning_rate": 6.73516355408288e-08, "loss": 0.4977, "step": 20975 }, { "epoch": 0.9493550577053632, "grad_norm": 0.5699970378852044, "learning_rate": 6.723179554924908e-08, "loss": 0.2763, "step": 20976 }, { "epoch": 0.9494003168137588, "grad_norm": 0.5946194671942663, "learning_rate": 6.711206154770833e-08, "loss": 0.2901, "step": 20977 }, { "epoch": 0.9494455759221543, "grad_norm": 0.5964805198685067, "learning_rate": 6.699243353877949e-08, "loss": 0.2875, "step": 20978 }, { "epoch": 0.9494908350305499, "grad_norm": 1.2702132941453168, "learning_rate": 6.687291152503217e-08, "loss": 0.3049, "step": 20979 }, { "epoch": 0.9495360941389455, "grad_norm": 0.35882497371212974, "learning_rate": 6.675349550903488e-08, "loss": 0.4688, "step": 20980 }, { "epoch": 0.949581353247341, "grad_norm": 0.7600225740849603, "learning_rate": 6.663418549335443e-08, "loss": 0.2948, "step": 20981 }, { "epoch": 0.9496266123557366, "grad_norm": 0.6181063297081756, "learning_rate": 6.651498148055324e-08, "loss": 0.3708, "step": 20982 }, { "epoch": 0.9496718714641321, "grad_norm": 1.4409835173105592, "learning_rate": 6.639588347319315e-08, "loss": 0.2737, "step": 20983 }, { "epoch": 0.9497171305725277, "grad_norm": 0.5926133598300354, "learning_rate": 6.627689147383265e-08, "loss": 0.275, "step": 20984 }, { "epoch": 0.9497623896809233, "grad_norm": 0.621328656285908, "learning_rate": 6.615800548502971e-08, "loss": 0.2557, "step": 20985 }, { "epoch": 0.9498076487893189, "grad_norm": 0.6553306928571592, "learning_rate": 6.603922550933783e-08, "loss": 0.2822, "step": 20986 }, { "epoch": 0.9498529078977144, "grad_norm": 0.5898052926929013, "learning_rate": 6.592055154930887e-08, "loss": 0.29, "step": 20987 }, { "epoch": 0.94989816700611, "grad_norm": 0.60423853194223, "learning_rate": 6.580198360749412e-08, "loss": 0.2807, "step": 20988 }, { "epoch": 0.9499434261145056, "grad_norm": 0.6083911386276145, "learning_rate": 6.568352168644043e-08, "loss": 0.2714, "step": 20989 }, { "epoch": 0.9499886852229011, "grad_norm": 0.8479645873483991, "learning_rate": 6.556516578869299e-08, "loss": 0.3046, "step": 20990 }, { "epoch": 0.9500339443312966, "grad_norm": 0.5760836337681511, "learning_rate": 6.544691591679531e-08, "loss": 0.274, "step": 20991 }, { "epoch": 0.9500792034396922, "grad_norm": 0.6151864967390867, "learning_rate": 6.532877207328813e-08, "loss": 0.271, "step": 20992 }, { "epoch": 0.9501244625480878, "grad_norm": 0.6503544528267623, "learning_rate": 6.521073426070945e-08, "loss": 0.2904, "step": 20993 }, { "epoch": 0.9501697216564834, "grad_norm": 0.26711729261399775, "learning_rate": 6.509280248159721e-08, "loss": 0.4739, "step": 20994 }, { "epoch": 0.950214980764879, "grad_norm": 0.571338219335804, "learning_rate": 6.49749767384833e-08, "loss": 0.2758, "step": 20995 }, { "epoch": 0.9502602398732745, "grad_norm": 0.6613673798392251, "learning_rate": 6.485725703390067e-08, "loss": 0.3139, "step": 20996 }, { "epoch": 0.95030549898167, "grad_norm": 0.6584892264971919, "learning_rate": 6.473964337037842e-08, "loss": 0.2549, "step": 20997 }, { "epoch": 0.9503507580900656, "grad_norm": 0.5943344110402399, "learning_rate": 6.462213575044396e-08, "loss": 0.2781, "step": 20998 }, { "epoch": 0.9503960171984612, "grad_norm": 0.6127678986376591, "learning_rate": 6.45047341766214e-08, "loss": 0.2842, "step": 20999 }, { "epoch": 0.9504412763068567, "grad_norm": 0.6000744206177664, "learning_rate": 6.438743865143371e-08, "loss": 0.3379, "step": 21000 }, { "epoch": 0.9504865354152523, "grad_norm": 0.5657602497024272, "learning_rate": 6.42702491774022e-08, "loss": 0.2908, "step": 21001 }, { "epoch": 0.9505317945236479, "grad_norm": 0.2609860411322821, "learning_rate": 6.415316575704378e-08, "loss": 0.4503, "step": 21002 }, { "epoch": 0.9505770536320435, "grad_norm": 0.604504207006534, "learning_rate": 6.403618839287418e-08, "loss": 0.2892, "step": 21003 }, { "epoch": 0.9506223127404391, "grad_norm": 0.6030571400391765, "learning_rate": 6.391931708740806e-08, "loss": 0.2736, "step": 21004 }, { "epoch": 0.9506675718488345, "grad_norm": 0.2807062113118566, "learning_rate": 6.380255184315509e-08, "loss": 0.4747, "step": 21005 }, { "epoch": 0.9507128309572301, "grad_norm": 0.5946543907442272, "learning_rate": 6.368589266262493e-08, "loss": 0.2965, "step": 21006 }, { "epoch": 0.9507580900656257, "grad_norm": 0.590482601249089, "learning_rate": 6.356933954832501e-08, "loss": 0.3242, "step": 21007 }, { "epoch": 0.9508033491740213, "grad_norm": 0.5794334337238063, "learning_rate": 6.345289250275777e-08, "loss": 0.2708, "step": 21008 }, { "epoch": 0.9508486082824168, "grad_norm": 0.5793012980790783, "learning_rate": 6.333655152842676e-08, "loss": 0.3122, "step": 21009 }, { "epoch": 0.9508938673908124, "grad_norm": 0.5797144339972501, "learning_rate": 6.322031662783167e-08, "loss": 0.303, "step": 21010 }, { "epoch": 0.950939126499208, "grad_norm": 0.5331724832690391, "learning_rate": 6.310418780346993e-08, "loss": 0.283, "step": 21011 }, { "epoch": 0.9509843856076036, "grad_norm": 0.6539748251622324, "learning_rate": 6.298816505783623e-08, "loss": 0.3098, "step": 21012 }, { "epoch": 0.951029644715999, "grad_norm": 0.5968445176581573, "learning_rate": 6.28722483934241e-08, "loss": 0.2896, "step": 21013 }, { "epoch": 0.9510749038243946, "grad_norm": 0.7165491692322432, "learning_rate": 6.275643781272489e-08, "loss": 0.3001, "step": 21014 }, { "epoch": 0.9511201629327902, "grad_norm": 0.5754934026250936, "learning_rate": 6.264073331822551e-08, "loss": 0.3247, "step": 21015 }, { "epoch": 0.9511654220411858, "grad_norm": 0.6371439596459173, "learning_rate": 6.252513491241285e-08, "loss": 0.3054, "step": 21016 }, { "epoch": 0.9512106811495814, "grad_norm": 0.6393908936424378, "learning_rate": 6.240964259777104e-08, "loss": 0.304, "step": 21017 }, { "epoch": 0.9512559402579769, "grad_norm": 0.6371497263366444, "learning_rate": 6.229425637678088e-08, "loss": 0.2724, "step": 21018 }, { "epoch": 0.9513011993663725, "grad_norm": 0.26785550578434497, "learning_rate": 6.217897625192259e-08, "loss": 0.4779, "step": 21019 }, { "epoch": 0.9513464584747681, "grad_norm": 0.5961553134529505, "learning_rate": 6.206380222567254e-08, "loss": 0.301, "step": 21020 }, { "epoch": 0.9513917175831637, "grad_norm": 0.5852208105910898, "learning_rate": 6.194873430050596e-08, "loss": 0.2966, "step": 21021 }, { "epoch": 0.9514369766915591, "grad_norm": 0.6041103603260393, "learning_rate": 6.183377247889422e-08, "loss": 0.2742, "step": 21022 }, { "epoch": 0.9514822357999547, "grad_norm": 0.6030801014720969, "learning_rate": 6.171891676330922e-08, "loss": 0.2631, "step": 21023 }, { "epoch": 0.9515274949083503, "grad_norm": 0.6407847667172143, "learning_rate": 6.160416715621786e-08, "loss": 0.268, "step": 21024 }, { "epoch": 0.9515727540167459, "grad_norm": 0.2788988931928052, "learning_rate": 6.148952366008487e-08, "loss": 0.4682, "step": 21025 }, { "epoch": 0.9516180131251414, "grad_norm": 0.5766844844108057, "learning_rate": 6.137498627737492e-08, "loss": 0.2491, "step": 21026 }, { "epoch": 0.951663272233537, "grad_norm": 0.6204674046222174, "learning_rate": 6.126055501054995e-08, "loss": 0.2971, "step": 21027 }, { "epoch": 0.9517085313419326, "grad_norm": 0.566262868538345, "learning_rate": 6.114622986206575e-08, "loss": 0.2732, "step": 21028 }, { "epoch": 0.9517537904503282, "grad_norm": 0.6266490854478113, "learning_rate": 6.103201083438149e-08, "loss": 0.3184, "step": 21029 }, { "epoch": 0.9517990495587237, "grad_norm": 0.6233291681881801, "learning_rate": 6.091789792995018e-08, "loss": 0.3184, "step": 21030 }, { "epoch": 0.9518443086671192, "grad_norm": 0.6900403839984441, "learning_rate": 6.080389115122432e-08, "loss": 0.3531, "step": 21031 }, { "epoch": 0.9518895677755148, "grad_norm": 0.6418354601151535, "learning_rate": 6.06899905006525e-08, "loss": 0.3536, "step": 21032 }, { "epoch": 0.9519348268839104, "grad_norm": 0.7346705264008148, "learning_rate": 6.057619598068332e-08, "loss": 0.3051, "step": 21033 }, { "epoch": 0.951980085992306, "grad_norm": 0.6679759636947534, "learning_rate": 6.046250759376148e-08, "loss": 0.2993, "step": 21034 }, { "epoch": 0.9520253451007015, "grad_norm": 0.27520480360869226, "learning_rate": 6.034892534233006e-08, "loss": 0.4927, "step": 21035 }, { "epoch": 0.9520706042090971, "grad_norm": 0.6746084316484376, "learning_rate": 6.023544922882874e-08, "loss": 0.3048, "step": 21036 }, { "epoch": 0.9521158633174926, "grad_norm": 0.6528323441208711, "learning_rate": 6.012207925569613e-08, "loss": 0.3036, "step": 21037 }, { "epoch": 0.9521611224258882, "grad_norm": 0.6145097092357207, "learning_rate": 6.000881542536863e-08, "loss": 0.2905, "step": 21038 }, { "epoch": 0.9522063815342838, "grad_norm": 0.24463072379857484, "learning_rate": 5.989565774027983e-08, "loss": 0.4491, "step": 21039 }, { "epoch": 0.9522516406426793, "grad_norm": 0.2627344849839754, "learning_rate": 5.978260620286058e-08, "loss": 0.4778, "step": 21040 }, { "epoch": 0.9522968997510749, "grad_norm": 0.596168684386448, "learning_rate": 5.96696608155406e-08, "loss": 0.242, "step": 21041 }, { "epoch": 0.9523421588594705, "grad_norm": 0.2646225649141157, "learning_rate": 5.955682158074627e-08, "loss": 0.4841, "step": 21042 }, { "epoch": 0.9523874179678661, "grad_norm": 0.7122779503387644, "learning_rate": 5.944408850090289e-08, "loss": 0.3003, "step": 21043 }, { "epoch": 0.9524326770762616, "grad_norm": 0.6594758890068033, "learning_rate": 5.933146157843239e-08, "loss": 0.2806, "step": 21044 }, { "epoch": 0.9524779361846571, "grad_norm": 0.5710099357492796, "learning_rate": 5.921894081575397e-08, "loss": 0.2961, "step": 21045 }, { "epoch": 0.9525231952930527, "grad_norm": 0.26264952959296717, "learning_rate": 5.9106526215286786e-08, "loss": 0.477, "step": 21046 }, { "epoch": 0.9525684544014483, "grad_norm": 0.667966157173857, "learning_rate": 5.899421777944503e-08, "loss": 0.25, "step": 21047 }, { "epoch": 0.9526137135098438, "grad_norm": 0.6473696352491063, "learning_rate": 5.888201551064288e-08, "loss": 0.2955, "step": 21048 }, { "epoch": 0.9526589726182394, "grad_norm": 0.6820985118600682, "learning_rate": 5.876991941129062e-08, "loss": 0.2818, "step": 21049 }, { "epoch": 0.952704231726635, "grad_norm": 0.6109836936427275, "learning_rate": 5.8657929483796336e-08, "loss": 0.2987, "step": 21050 }, { "epoch": 0.9527494908350306, "grad_norm": 0.651082778888153, "learning_rate": 5.854604573056755e-08, "loss": 0.287, "step": 21051 }, { "epoch": 0.9527947499434262, "grad_norm": 0.30875576273308114, "learning_rate": 5.843426815400788e-08, "loss": 0.4622, "step": 21052 }, { "epoch": 0.9528400090518216, "grad_norm": 0.655355098365421, "learning_rate": 5.8322596756518744e-08, "loss": 0.2646, "step": 21053 }, { "epoch": 0.9528852681602172, "grad_norm": 0.6104545493531314, "learning_rate": 5.821103154049934e-08, "loss": 0.306, "step": 21054 }, { "epoch": 0.9529305272686128, "grad_norm": 0.5416426331637725, "learning_rate": 5.809957250834774e-08, "loss": 0.2829, "step": 21055 }, { "epoch": 0.9529757863770084, "grad_norm": 0.6268521076725558, "learning_rate": 5.7988219662458714e-08, "loss": 0.2843, "step": 21056 }, { "epoch": 0.9530210454854039, "grad_norm": 0.2669820642962163, "learning_rate": 5.787697300522421e-08, "loss": 0.4533, "step": 21057 }, { "epoch": 0.9530663045937995, "grad_norm": 0.2477758080254133, "learning_rate": 5.7765832539035113e-08, "loss": 0.4546, "step": 21058 }, { "epoch": 0.9531115637021951, "grad_norm": 0.648880992568745, "learning_rate": 5.765479826627951e-08, "loss": 0.2887, "step": 21059 }, { "epoch": 0.9531568228105907, "grad_norm": 0.6113640942856314, "learning_rate": 5.754387018934271e-08, "loss": 0.3575, "step": 21060 }, { "epoch": 0.9532020819189861, "grad_norm": 0.2506811649761924, "learning_rate": 5.743304831060836e-08, "loss": 0.4683, "step": 21061 }, { "epoch": 0.9532473410273817, "grad_norm": 0.6305241344926028, "learning_rate": 5.7322332632458454e-08, "loss": 0.3598, "step": 21062 }, { "epoch": 0.9532926001357773, "grad_norm": 0.9082959487210251, "learning_rate": 5.721172315727108e-08, "loss": 0.3155, "step": 21063 }, { "epoch": 0.9533378592441729, "grad_norm": 0.30722117733165927, "learning_rate": 5.7101219887423233e-08, "loss": 0.4894, "step": 21064 }, { "epoch": 0.9533831183525685, "grad_norm": 0.30885965328323056, "learning_rate": 5.6990822825289115e-08, "loss": 0.4934, "step": 21065 }, { "epoch": 0.953428377460964, "grad_norm": 0.6392135432575204, "learning_rate": 5.688053197324073e-08, "loss": 0.2562, "step": 21066 }, { "epoch": 0.9534736365693596, "grad_norm": 0.5728801787248158, "learning_rate": 5.677034733364839e-08, "loss": 0.288, "step": 21067 }, { "epoch": 0.9535188956777552, "grad_norm": 0.2522632360951553, "learning_rate": 5.66602689088791e-08, "loss": 0.4584, "step": 21068 }, { "epoch": 0.9535641547861508, "grad_norm": 0.7143571198515337, "learning_rate": 5.655029670129875e-08, "loss": 0.293, "step": 21069 }, { "epoch": 0.9536094138945462, "grad_norm": 0.5981811006835039, "learning_rate": 5.6440430713269325e-08, "loss": 0.2969, "step": 21070 }, { "epoch": 0.9536546730029418, "grad_norm": 0.2668000224402627, "learning_rate": 5.633067094715228e-08, "loss": 0.4681, "step": 21071 }, { "epoch": 0.9536999321113374, "grad_norm": 0.6051485800178262, "learning_rate": 5.622101740530572e-08, "loss": 0.2771, "step": 21072 }, { "epoch": 0.953745191219733, "grad_norm": 0.6327385317687241, "learning_rate": 5.6111470090086106e-08, "loss": 0.303, "step": 21073 }, { "epoch": 0.9537904503281285, "grad_norm": 0.7189651235790038, "learning_rate": 5.6002029003847105e-08, "loss": 0.3054, "step": 21074 }, { "epoch": 0.9538357094365241, "grad_norm": 0.6291207355588576, "learning_rate": 5.589269414893961e-08, "loss": 0.3036, "step": 21075 }, { "epoch": 0.9538809685449197, "grad_norm": 0.6193220306777247, "learning_rate": 5.5783465527713964e-08, "loss": 0.3429, "step": 21076 }, { "epoch": 0.9539262276533153, "grad_norm": 0.5910164438973713, "learning_rate": 5.567434314251663e-08, "loss": 0.2816, "step": 21077 }, { "epoch": 0.9539714867617108, "grad_norm": 0.2552213221032225, "learning_rate": 5.5565326995691835e-08, "loss": 0.4609, "step": 21078 }, { "epoch": 0.9540167458701063, "grad_norm": 0.5937592714720844, "learning_rate": 5.5456417089582715e-08, "loss": 0.2692, "step": 21079 }, { "epoch": 0.9540620049785019, "grad_norm": 0.5623907371733852, "learning_rate": 5.534761342652906e-08, "loss": 0.2704, "step": 21080 }, { "epoch": 0.9541072640868975, "grad_norm": 0.6982056335613128, "learning_rate": 5.523891600886955e-08, "loss": 0.2716, "step": 21081 }, { "epoch": 0.9541525231952931, "grad_norm": 0.5974350115498644, "learning_rate": 5.513032483893843e-08, "loss": 0.2886, "step": 21082 }, { "epoch": 0.9541977823036886, "grad_norm": 0.5876815070067601, "learning_rate": 5.50218399190694e-08, "loss": 0.2659, "step": 21083 }, { "epoch": 0.9542430414120842, "grad_norm": 0.5721221155130534, "learning_rate": 5.491346125159391e-08, "loss": 0.2632, "step": 21084 }, { "epoch": 0.9542883005204797, "grad_norm": 0.5523906313642887, "learning_rate": 5.4805188838841226e-08, "loss": 0.2783, "step": 21085 }, { "epoch": 0.9543335596288753, "grad_norm": 0.6276116118037559, "learning_rate": 5.4697022683136145e-08, "loss": 0.2775, "step": 21086 }, { "epoch": 0.9543788187372709, "grad_norm": 0.5721432827948391, "learning_rate": 5.4588962786804035e-08, "loss": 0.2863, "step": 21087 }, { "epoch": 0.9544240778456664, "grad_norm": 0.5807363563910248, "learning_rate": 5.448100915216636e-08, "loss": 0.2458, "step": 21088 }, { "epoch": 0.954469336954062, "grad_norm": 0.238644150188603, "learning_rate": 5.437316178154295e-08, "loss": 0.4643, "step": 21089 }, { "epoch": 0.9545145960624576, "grad_norm": 0.7048671117012055, "learning_rate": 5.4265420677250267e-08, "loss": 0.3157, "step": 21090 }, { "epoch": 0.9545598551708532, "grad_norm": 0.7289959440252665, "learning_rate": 5.4157785841604805e-08, "loss": 0.3047, "step": 21091 }, { "epoch": 0.9546051142792487, "grad_norm": 0.28604388476548864, "learning_rate": 5.4050257276918036e-08, "loss": 0.4595, "step": 21092 }, { "epoch": 0.9546503733876442, "grad_norm": 0.6084096838677485, "learning_rate": 5.3942834985501455e-08, "loss": 0.271, "step": 21093 }, { "epoch": 0.9546956324960398, "grad_norm": 0.5654045262842151, "learning_rate": 5.383551896966266e-08, "loss": 0.2735, "step": 21094 }, { "epoch": 0.9547408916044354, "grad_norm": 0.6056723134167218, "learning_rate": 5.372830923170702e-08, "loss": 0.3032, "step": 21095 }, { "epoch": 0.9547861507128309, "grad_norm": 0.6271978895462672, "learning_rate": 5.362120577393881e-08, "loss": 0.2952, "step": 21096 }, { "epoch": 0.9548314098212265, "grad_norm": 0.578230439300833, "learning_rate": 5.351420859865952e-08, "loss": 0.2753, "step": 21097 }, { "epoch": 0.9548766689296221, "grad_norm": 0.6369091384458982, "learning_rate": 5.340731770816843e-08, "loss": 0.2957, "step": 21098 }, { "epoch": 0.9549219280380177, "grad_norm": 0.2938941194995703, "learning_rate": 5.330053310476091e-08, "loss": 0.4708, "step": 21099 }, { "epoch": 0.9549671871464133, "grad_norm": 0.2623577246616529, "learning_rate": 5.319385479073236e-08, "loss": 0.4724, "step": 21100 }, { "epoch": 0.9550124462548087, "grad_norm": 1.2555377118351114, "learning_rate": 5.308728276837538e-08, "loss": 0.2841, "step": 21101 }, { "epoch": 0.9550577053632043, "grad_norm": 0.6404644636725458, "learning_rate": 5.298081703997926e-08, "loss": 0.2868, "step": 21102 }, { "epoch": 0.9551029644715999, "grad_norm": 0.5611261911554767, "learning_rate": 5.287445760783161e-08, "loss": 0.3046, "step": 21103 }, { "epoch": 0.9551482235799955, "grad_norm": 0.26375488095717653, "learning_rate": 5.276820447421782e-08, "loss": 0.4611, "step": 21104 }, { "epoch": 0.955193482688391, "grad_norm": 0.7815139458922309, "learning_rate": 5.266205764142107e-08, "loss": 0.2924, "step": 21105 }, { "epoch": 0.9552387417967866, "grad_norm": 0.6860109324350689, "learning_rate": 5.2556017111722315e-08, "loss": 0.3019, "step": 21106 }, { "epoch": 0.9552840009051822, "grad_norm": 0.5940709035253605, "learning_rate": 5.245008288740028e-08, "loss": 0.2745, "step": 21107 }, { "epoch": 0.9553292600135778, "grad_norm": 0.6224783076128134, "learning_rate": 5.234425497072981e-08, "loss": 0.2929, "step": 21108 }, { "epoch": 0.9553745191219732, "grad_norm": 0.25188962605758014, "learning_rate": 5.223853336398632e-08, "loss": 0.4686, "step": 21109 }, { "epoch": 0.9554197782303688, "grad_norm": 0.5638666587277356, "learning_rate": 5.213291806944076e-08, "loss": 0.2819, "step": 21110 }, { "epoch": 0.9554650373387644, "grad_norm": 0.6578418934123136, "learning_rate": 5.2027409089362434e-08, "loss": 0.2873, "step": 21111 }, { "epoch": 0.95551029644716, "grad_norm": 0.6370928784488743, "learning_rate": 5.192200642601841e-08, "loss": 0.2969, "step": 21112 }, { "epoch": 0.9555555555555556, "grad_norm": 0.25498088535690616, "learning_rate": 5.181671008167355e-08, "loss": 0.4389, "step": 21113 }, { "epoch": 0.9556008146639511, "grad_norm": 0.2758628803552411, "learning_rate": 5.171152005859159e-08, "loss": 0.473, "step": 21114 }, { "epoch": 0.9556460737723467, "grad_norm": 0.5835708463549837, "learning_rate": 5.1606436359030174e-08, "loss": 0.3202, "step": 21115 }, { "epoch": 0.9556913328807423, "grad_norm": 0.616351688744552, "learning_rate": 5.150145898524916e-08, "loss": 0.3031, "step": 21116 }, { "epoch": 0.9557365919891379, "grad_norm": 0.25333214668607923, "learning_rate": 5.139658793950342e-08, "loss": 0.454, "step": 21117 }, { "epoch": 0.9557818510975333, "grad_norm": 0.2571882919873599, "learning_rate": 5.1291823224046687e-08, "loss": 0.4685, "step": 21118 }, { "epoch": 0.9558271102059289, "grad_norm": 0.7099556179776506, "learning_rate": 5.1187164841129954e-08, "loss": 0.3117, "step": 21119 }, { "epoch": 0.9558723693143245, "grad_norm": 0.5463801828022469, "learning_rate": 5.1082612793001976e-08, "loss": 0.2631, "step": 21120 }, { "epoch": 0.9559176284227201, "grad_norm": 0.5868453745955741, "learning_rate": 5.0978167081908726e-08, "loss": 0.2613, "step": 21121 }, { "epoch": 0.9559628875311157, "grad_norm": 0.2854284040132491, "learning_rate": 5.0873827710095636e-08, "loss": 0.468, "step": 21122 }, { "epoch": 0.9560081466395112, "grad_norm": 1.618474871427813, "learning_rate": 5.076959467980369e-08, "loss": 0.2847, "step": 21123 }, { "epoch": 0.9560534057479068, "grad_norm": 0.5758606216633175, "learning_rate": 5.066546799327221e-08, "loss": 0.2828, "step": 21124 }, { "epoch": 0.9560986648563023, "grad_norm": 0.5602948965142438, "learning_rate": 5.0561447652739404e-08, "loss": 0.2697, "step": 21125 }, { "epoch": 0.9561439239646979, "grad_norm": 0.6075309864313594, "learning_rate": 5.045753366044015e-08, "loss": 0.236, "step": 21126 }, { "epoch": 0.9561891830730934, "grad_norm": 0.6679248281529808, "learning_rate": 5.035372601860766e-08, "loss": 0.2855, "step": 21127 }, { "epoch": 0.956234442181489, "grad_norm": 0.6291431479098318, "learning_rate": 5.0250024729470714e-08, "loss": 0.2807, "step": 21128 }, { "epoch": 0.9562797012898846, "grad_norm": 0.564615306221422, "learning_rate": 5.0146429795259745e-08, "loss": 0.3114, "step": 21129 }, { "epoch": 0.9563249603982802, "grad_norm": 0.26899426000202586, "learning_rate": 5.004294121819908e-08, "loss": 0.4783, "step": 21130 }, { "epoch": 0.9563702195066757, "grad_norm": 0.29653148382525596, "learning_rate": 4.993955900051362e-08, "loss": 0.4898, "step": 21131 }, { "epoch": 0.9564154786150713, "grad_norm": 0.6173135466935873, "learning_rate": 4.983628314442324e-08, "loss": 0.293, "step": 21132 }, { "epoch": 0.9564607377234668, "grad_norm": 0.5586290414287138, "learning_rate": 4.973311365214894e-08, "loss": 0.318, "step": 21133 }, { "epoch": 0.9565059968318624, "grad_norm": 0.5546851153757404, "learning_rate": 4.9630050525905635e-08, "loss": 0.3139, "step": 21134 }, { "epoch": 0.956551255940258, "grad_norm": 0.5424416970865831, "learning_rate": 4.9527093767908765e-08, "loss": 0.2762, "step": 21135 }, { "epoch": 0.9565965150486535, "grad_norm": 0.6311911428226128, "learning_rate": 4.942424338037044e-08, "loss": 0.2852, "step": 21136 }, { "epoch": 0.9566417741570491, "grad_norm": 0.5741090634293206, "learning_rate": 4.932149936550057e-08, "loss": 0.2514, "step": 21137 }, { "epoch": 0.9566870332654447, "grad_norm": 0.6936989040590055, "learning_rate": 4.9218861725506825e-08, "loss": 0.2994, "step": 21138 }, { "epoch": 0.9567322923738403, "grad_norm": 0.6044609637642747, "learning_rate": 4.9116330462594677e-08, "loss": 0.2769, "step": 21139 }, { "epoch": 0.9567775514822358, "grad_norm": 0.25341767036578816, "learning_rate": 4.9013905578967346e-08, "loss": 0.446, "step": 21140 }, { "epoch": 0.9568228105906313, "grad_norm": 0.2809826063683718, "learning_rate": 4.8911587076825305e-08, "loss": 0.4946, "step": 21141 }, { "epoch": 0.9568680696990269, "grad_norm": 0.663001023558929, "learning_rate": 4.8809374958366796e-08, "loss": 0.3133, "step": 21142 }, { "epoch": 0.9569133288074225, "grad_norm": 0.61431465932955, "learning_rate": 4.870726922578839e-08, "loss": 0.2811, "step": 21143 }, { "epoch": 0.956958587915818, "grad_norm": 0.5896603758861003, "learning_rate": 4.8605269881284446e-08, "loss": 0.298, "step": 21144 }, { "epoch": 0.9570038470242136, "grad_norm": 0.6509342502713384, "learning_rate": 4.8503376927045984e-08, "loss": 0.2603, "step": 21145 }, { "epoch": 0.9570491061326092, "grad_norm": 0.5676415541024242, "learning_rate": 4.840159036526237e-08, "loss": 0.2688, "step": 21146 }, { "epoch": 0.9570943652410048, "grad_norm": 0.2541454717914221, "learning_rate": 4.8299910198121304e-08, "loss": 0.4955, "step": 21147 }, { "epoch": 0.9571396243494004, "grad_norm": 0.6064039473492522, "learning_rate": 4.819833642780713e-08, "loss": 0.2653, "step": 21148 }, { "epoch": 0.9571848834577958, "grad_norm": 0.6175290510783422, "learning_rate": 4.809686905650257e-08, "loss": 0.265, "step": 21149 }, { "epoch": 0.9572301425661914, "grad_norm": 0.6565275491143019, "learning_rate": 4.7995508086386975e-08, "loss": 0.288, "step": 21150 }, { "epoch": 0.957275401674587, "grad_norm": 0.762414490219462, "learning_rate": 4.789425351963972e-08, "loss": 0.2669, "step": 21151 }, { "epoch": 0.9573206607829826, "grad_norm": 0.6262336664562198, "learning_rate": 4.779310535843573e-08, "loss": 0.26, "step": 21152 }, { "epoch": 0.9573659198913781, "grad_norm": 0.31724872112389707, "learning_rate": 4.769206360494771e-08, "loss": 0.4672, "step": 21153 }, { "epoch": 0.9574111789997737, "grad_norm": 0.5928255327048539, "learning_rate": 4.759112826134782e-08, "loss": 0.2793, "step": 21154 }, { "epoch": 0.9574564381081693, "grad_norm": 0.6557021358975665, "learning_rate": 4.749029932980431e-08, "loss": 0.2363, "step": 21155 }, { "epoch": 0.9575016972165649, "grad_norm": 0.6553815062030756, "learning_rate": 4.73895768124838e-08, "loss": 0.2506, "step": 21156 }, { "epoch": 0.9575469563249605, "grad_norm": 2.1409793774596806, "learning_rate": 4.7288960711550644e-08, "loss": 0.3, "step": 21157 }, { "epoch": 0.9575922154333559, "grad_norm": 0.5651822107390357, "learning_rate": 4.718845102916592e-08, "loss": 0.2757, "step": 21158 }, { "epoch": 0.9576374745417515, "grad_norm": 0.625507174834113, "learning_rate": 4.708804776749121e-08, "loss": 0.2822, "step": 21159 }, { "epoch": 0.9576827336501471, "grad_norm": 0.6486296978176692, "learning_rate": 4.6987750928682017e-08, "loss": 0.2983, "step": 21160 }, { "epoch": 0.9577279927585427, "grad_norm": 0.5840867435621814, "learning_rate": 4.688756051489385e-08, "loss": 0.297, "step": 21161 }, { "epoch": 0.9577732518669382, "grad_norm": 0.6474200844795487, "learning_rate": 4.678747652827997e-08, "loss": 0.2637, "step": 21162 }, { "epoch": 0.9578185109753338, "grad_norm": 0.2523292538945871, "learning_rate": 4.668749897099034e-08, "loss": 0.4583, "step": 21163 }, { "epoch": 0.9578637700837294, "grad_norm": 0.26201240745249477, "learning_rate": 4.6587627845173786e-08, "loss": 0.4774, "step": 21164 }, { "epoch": 0.957909029192125, "grad_norm": 0.2650016651303161, "learning_rate": 4.648786315297582e-08, "loss": 0.474, "step": 21165 }, { "epoch": 0.9579542883005204, "grad_norm": 0.6206100835637757, "learning_rate": 4.6388204896539724e-08, "loss": 0.3042, "step": 21166 }, { "epoch": 0.957999547408916, "grad_norm": 0.61239911731407, "learning_rate": 4.628865307800712e-08, "loss": 0.2747, "step": 21167 }, { "epoch": 0.9580448065173116, "grad_norm": 0.6238800569552224, "learning_rate": 4.618920769951796e-08, "loss": 0.3023, "step": 21168 }, { "epoch": 0.9580900656257072, "grad_norm": 0.24533446552736418, "learning_rate": 4.6089868763207756e-08, "loss": 0.4675, "step": 21169 }, { "epoch": 0.9581353247341028, "grad_norm": 0.927201016908863, "learning_rate": 4.5990636271211474e-08, "loss": 0.3292, "step": 21170 }, { "epoch": 0.9581805838424983, "grad_norm": 0.5815065294128933, "learning_rate": 4.58915102256613e-08, "loss": 0.2917, "step": 21171 }, { "epoch": 0.9582258429508939, "grad_norm": 0.28944106861197766, "learning_rate": 4.5792490628687734e-08, "loss": 0.4839, "step": 21172 }, { "epoch": 0.9582711020592894, "grad_norm": 0.5599205054206638, "learning_rate": 4.569357748241743e-08, "loss": 0.2879, "step": 21173 }, { "epoch": 0.958316361167685, "grad_norm": 0.7083504678979596, "learning_rate": 4.55947707889759e-08, "loss": 0.2428, "step": 21174 }, { "epoch": 0.9583616202760805, "grad_norm": 0.5732163183389878, "learning_rate": 4.549607055048699e-08, "loss": 0.3069, "step": 21175 }, { "epoch": 0.9584068793844761, "grad_norm": 0.5684435449877221, "learning_rate": 4.539747676907069e-08, "loss": 0.2736, "step": 21176 }, { "epoch": 0.9584521384928717, "grad_norm": 0.7016450015645664, "learning_rate": 4.529898944684585e-08, "loss": 0.2662, "step": 21177 }, { "epoch": 0.9584973976012673, "grad_norm": 0.6133835173395702, "learning_rate": 4.5200608585928566e-08, "loss": 0.2779, "step": 21178 }, { "epoch": 0.9585426567096628, "grad_norm": 0.8378032151828722, "learning_rate": 4.510233418843213e-08, "loss": 0.2647, "step": 21179 }, { "epoch": 0.9585879158180584, "grad_norm": 0.3407268560380774, "learning_rate": 4.5004166256469305e-08, "loss": 0.4614, "step": 21180 }, { "epoch": 0.958633174926454, "grad_norm": 0.6134658495980012, "learning_rate": 4.490610479214841e-08, "loss": 0.2638, "step": 21181 }, { "epoch": 0.9586784340348495, "grad_norm": 0.6981064429014657, "learning_rate": 4.480814979757719e-08, "loss": 0.3505, "step": 21182 }, { "epoch": 0.9587236931432451, "grad_norm": 0.5817051446699949, "learning_rate": 4.471030127486009e-08, "loss": 0.2621, "step": 21183 }, { "epoch": 0.9587689522516406, "grad_norm": 0.6006523021407582, "learning_rate": 4.461255922609986e-08, "loss": 0.2656, "step": 21184 }, { "epoch": 0.9588142113600362, "grad_norm": 0.6019792523072536, "learning_rate": 4.451492365339594e-08, "loss": 0.2884, "step": 21185 }, { "epoch": 0.9588594704684318, "grad_norm": 0.6145690423990191, "learning_rate": 4.4417394558846636e-08, "loss": 0.3031, "step": 21186 }, { "epoch": 0.9589047295768274, "grad_norm": 0.6242685469563993, "learning_rate": 4.431997194454807e-08, "loss": 0.355, "step": 21187 }, { "epoch": 0.9589499886852229, "grad_norm": 0.6240215002808371, "learning_rate": 4.4222655812592995e-08, "loss": 0.2737, "step": 21188 }, { "epoch": 0.9589952477936184, "grad_norm": 0.6140724268164072, "learning_rate": 4.412544616507253e-08, "loss": 0.3129, "step": 21189 }, { "epoch": 0.959040506902014, "grad_norm": 0.8783380764141445, "learning_rate": 4.402834300407499e-08, "loss": 0.2695, "step": 21190 }, { "epoch": 0.9590857660104096, "grad_norm": 0.5972526903824302, "learning_rate": 4.3931346331688165e-08, "loss": 0.3249, "step": 21191 }, { "epoch": 0.9591310251188052, "grad_norm": 0.25982751541373955, "learning_rate": 4.383445614999426e-08, "loss": 0.4751, "step": 21192 }, { "epoch": 0.9591762842272007, "grad_norm": 0.6209074527492965, "learning_rate": 4.373767246107718e-08, "loss": 0.2567, "step": 21193 }, { "epoch": 0.9592215433355963, "grad_norm": 0.6748164420339022, "learning_rate": 4.3640995267014704e-08, "loss": 0.2274, "step": 21194 }, { "epoch": 0.9592668024439919, "grad_norm": 0.6207451339187819, "learning_rate": 4.354442456988517e-08, "loss": 0.2833, "step": 21195 }, { "epoch": 0.9593120615523875, "grad_norm": 0.2598314089164235, "learning_rate": 4.3447960371763575e-08, "loss": 0.4636, "step": 21196 }, { "epoch": 0.9593573206607829, "grad_norm": 0.6280994414834518, "learning_rate": 4.335160267472216e-08, "loss": 0.3045, "step": 21197 }, { "epoch": 0.9594025797691785, "grad_norm": 0.6668469020947756, "learning_rate": 4.325535148083204e-08, "loss": 0.3126, "step": 21198 }, { "epoch": 0.9594478388775741, "grad_norm": 0.7184303504115127, "learning_rate": 4.3159206792160455e-08, "loss": 0.2935, "step": 21199 }, { "epoch": 0.9594930979859697, "grad_norm": 0.6537703551533632, "learning_rate": 4.3063168610774084e-08, "loss": 0.2885, "step": 21200 }, { "epoch": 0.9595383570943652, "grad_norm": 0.6323361763200717, "learning_rate": 4.2967236938735725e-08, "loss": 0.2797, "step": 21201 }, { "epoch": 0.9595836162027608, "grad_norm": 0.2735815182206019, "learning_rate": 4.287141177810761e-08, "loss": 0.4609, "step": 21202 }, { "epoch": 0.9596288753111564, "grad_norm": 0.6295577981996854, "learning_rate": 4.2775693130948094e-08, "loss": 0.3092, "step": 21203 }, { "epoch": 0.959674134419552, "grad_norm": 0.5876943016991591, "learning_rate": 4.268008099931387e-08, "loss": 0.2894, "step": 21204 }, { "epoch": 0.9597193935279476, "grad_norm": 0.5478724164381471, "learning_rate": 4.25845753852594e-08, "loss": 0.2857, "step": 21205 }, { "epoch": 0.959764652636343, "grad_norm": 0.7570418593038322, "learning_rate": 4.248917629083693e-08, "loss": 0.2725, "step": 21206 }, { "epoch": 0.9598099117447386, "grad_norm": 0.2699582044210566, "learning_rate": 4.2393883718096495e-08, "loss": 0.4692, "step": 21207 }, { "epoch": 0.9598551708531342, "grad_norm": 0.6258507229904574, "learning_rate": 4.2298697669084785e-08, "loss": 0.2818, "step": 21208 }, { "epoch": 0.9599004299615298, "grad_norm": 0.6338652523526765, "learning_rate": 4.2203618145847946e-08, "loss": 0.3102, "step": 21209 }, { "epoch": 0.9599456890699253, "grad_norm": 0.25974894190440184, "learning_rate": 4.210864515042878e-08, "loss": 0.4548, "step": 21210 }, { "epoch": 0.9599909481783209, "grad_norm": 0.6166636330929279, "learning_rate": 4.2013778684867335e-08, "loss": 0.297, "step": 21211 }, { "epoch": 0.9600362072867165, "grad_norm": 0.5978516768881983, "learning_rate": 4.191901875120308e-08, "loss": 0.3011, "step": 21212 }, { "epoch": 0.960081466395112, "grad_norm": 0.6019325817783381, "learning_rate": 4.182436535147105e-08, "loss": 0.3409, "step": 21213 }, { "epoch": 0.9601267255035075, "grad_norm": 0.6240142157887236, "learning_rate": 4.1729818487706297e-08, "loss": 0.2794, "step": 21214 }, { "epoch": 0.9601719846119031, "grad_norm": 0.6209004645487891, "learning_rate": 4.163537816193885e-08, "loss": 0.3074, "step": 21215 }, { "epoch": 0.9602172437202987, "grad_norm": 0.5959644019366294, "learning_rate": 4.154104437619877e-08, "loss": 0.2902, "step": 21216 }, { "epoch": 0.9602625028286943, "grad_norm": 0.6120197629494267, "learning_rate": 4.144681713251275e-08, "loss": 0.2721, "step": 21217 }, { "epoch": 0.9603077619370899, "grad_norm": 0.6320329580407882, "learning_rate": 4.1352696432906405e-08, "loss": 0.2833, "step": 21218 }, { "epoch": 0.9603530210454854, "grad_norm": 0.5909436023906957, "learning_rate": 4.125868227940033e-08, "loss": 0.3063, "step": 21219 }, { "epoch": 0.960398280153881, "grad_norm": 0.5730815741653685, "learning_rate": 4.116477467401625e-08, "loss": 0.3018, "step": 21220 }, { "epoch": 0.9604435392622765, "grad_norm": 0.26835348997717295, "learning_rate": 4.107097361877088e-08, "loss": 0.4632, "step": 21221 }, { "epoch": 0.9604887983706721, "grad_norm": 0.63553753421761, "learning_rate": 4.097727911568039e-08, "loss": 0.3173, "step": 21222 }, { "epoch": 0.9605340574790676, "grad_norm": 0.6397865086894752, "learning_rate": 4.088369116675761e-08, "loss": 0.2952, "step": 21223 }, { "epoch": 0.9605793165874632, "grad_norm": 0.599380152021612, "learning_rate": 4.0790209774013156e-08, "loss": 0.2655, "step": 21224 }, { "epoch": 0.9606245756958588, "grad_norm": 0.5637309960267389, "learning_rate": 4.069683493945598e-08, "loss": 0.2557, "step": 21225 }, { "epoch": 0.9606698348042544, "grad_norm": 0.6125425675800832, "learning_rate": 4.060356666509335e-08, "loss": 0.298, "step": 21226 }, { "epoch": 0.96071509391265, "grad_norm": 0.6476639084705905, "learning_rate": 4.051040495292757e-08, "loss": 0.3023, "step": 21227 }, { "epoch": 0.9607603530210455, "grad_norm": 0.5677392671931741, "learning_rate": 4.041734980496148e-08, "loss": 0.3043, "step": 21228 }, { "epoch": 0.960805612129441, "grad_norm": 0.6092973251190281, "learning_rate": 4.032440122319459e-08, "loss": 0.2896, "step": 21229 }, { "epoch": 0.9608508712378366, "grad_norm": 0.26849380700297054, "learning_rate": 4.0231559209624185e-08, "loss": 0.4789, "step": 21230 }, { "epoch": 0.9608961303462322, "grad_norm": 1.0232158053660931, "learning_rate": 4.013882376624423e-08, "loss": 0.3167, "step": 21231 }, { "epoch": 0.9609413894546277, "grad_norm": 0.30389324861319167, "learning_rate": 4.004619489504813e-08, "loss": 0.456, "step": 21232 }, { "epoch": 0.9609866485630233, "grad_norm": 0.6564086381621276, "learning_rate": 3.995367259802596e-08, "loss": 0.3061, "step": 21233 }, { "epoch": 0.9610319076714189, "grad_norm": 0.6095148878709185, "learning_rate": 3.986125687716558e-08, "loss": 0.2643, "step": 21234 }, { "epoch": 0.9610771667798145, "grad_norm": 0.6100252027921181, "learning_rate": 3.976894773445261e-08, "loss": 0.3019, "step": 21235 }, { "epoch": 0.96112242588821, "grad_norm": 0.5839556175104145, "learning_rate": 3.967674517187159e-08, "loss": 0.2719, "step": 21236 }, { "epoch": 0.9611676849966055, "grad_norm": 0.5543483765055761, "learning_rate": 3.9584649191402034e-08, "loss": 0.3032, "step": 21237 }, { "epoch": 0.9612129441050011, "grad_norm": 0.2859477139223187, "learning_rate": 3.9492659795024035e-08, "loss": 0.4663, "step": 21238 }, { "epoch": 0.9612582032133967, "grad_norm": 0.2531076016653137, "learning_rate": 3.940077698471378e-08, "loss": 0.4635, "step": 21239 }, { "epoch": 0.9613034623217923, "grad_norm": 0.6157171437043197, "learning_rate": 3.930900076244526e-08, "loss": 0.3218, "step": 21240 }, { "epoch": 0.9613487214301878, "grad_norm": 0.6477974935783631, "learning_rate": 3.921733113019077e-08, "loss": 0.2452, "step": 21241 }, { "epoch": 0.9613939805385834, "grad_norm": 0.6188385285914921, "learning_rate": 3.912576808991986e-08, "loss": 0.3111, "step": 21242 }, { "epoch": 0.961439239646979, "grad_norm": 0.6520923746735432, "learning_rate": 3.903431164360094e-08, "loss": 0.3082, "step": 21243 }, { "epoch": 0.9614844987553746, "grad_norm": 0.6093413924671904, "learning_rate": 3.8942961793197456e-08, "loss": 0.2757, "step": 21244 }, { "epoch": 0.96152975786377, "grad_norm": 0.6180322542059126, "learning_rate": 3.885171854067282e-08, "loss": 0.2801, "step": 21245 }, { "epoch": 0.9615750169721656, "grad_norm": 0.6173825785972774, "learning_rate": 3.8760581887987706e-08, "loss": 0.2981, "step": 21246 }, { "epoch": 0.9616202760805612, "grad_norm": 0.6535693212898909, "learning_rate": 3.866955183710108e-08, "loss": 0.281, "step": 21247 }, { "epoch": 0.9616655351889568, "grad_norm": 0.6009252696082468, "learning_rate": 3.857862838996751e-08, "loss": 0.2282, "step": 21248 }, { "epoch": 0.9617107942973523, "grad_norm": 0.7029568060532387, "learning_rate": 3.8487811548542086e-08, "loss": 0.3151, "step": 21249 }, { "epoch": 0.9617560534057479, "grad_norm": 0.27285242924989755, "learning_rate": 3.839710131477492e-08, "loss": 0.4705, "step": 21250 }, { "epoch": 0.9618013125141435, "grad_norm": 0.6684912899665435, "learning_rate": 3.8306497690615564e-08, "loss": 0.3379, "step": 21251 }, { "epoch": 0.9618465716225391, "grad_norm": 0.5495280270916768, "learning_rate": 3.8216000678011344e-08, "loss": 0.2976, "step": 21252 }, { "epoch": 0.9618918307309346, "grad_norm": 0.2520299691807092, "learning_rate": 3.812561027890571e-08, "loss": 0.4836, "step": 21253 }, { "epoch": 0.9619370898393301, "grad_norm": 0.559546923586854, "learning_rate": 3.8035326495242106e-08, "loss": 0.2578, "step": 21254 }, { "epoch": 0.9619823489477257, "grad_norm": 0.6358503324588595, "learning_rate": 3.794514932895954e-08, "loss": 0.3164, "step": 21255 }, { "epoch": 0.9620276080561213, "grad_norm": 0.5869712565273543, "learning_rate": 3.78550787819959e-08, "loss": 0.2585, "step": 21256 }, { "epoch": 0.9620728671645169, "grad_norm": 0.25232073452894843, "learning_rate": 3.7765114856286866e-08, "loss": 0.4614, "step": 21257 }, { "epoch": 0.9621181262729124, "grad_norm": 0.6596492126163388, "learning_rate": 3.7675257553764224e-08, "loss": 0.2838, "step": 21258 }, { "epoch": 0.962163385381308, "grad_norm": 0.625749386386828, "learning_rate": 3.7585506876360865e-08, "loss": 0.312, "step": 21259 }, { "epoch": 0.9622086444897036, "grad_norm": 0.2912275382564058, "learning_rate": 3.749586282600359e-08, "loss": 0.4784, "step": 21260 }, { "epoch": 0.9622539035980991, "grad_norm": 0.7980256338173004, "learning_rate": 3.740632540461864e-08, "loss": 0.2934, "step": 21261 }, { "epoch": 0.9622991627064947, "grad_norm": 0.5957946792402637, "learning_rate": 3.731689461413113e-08, "loss": 0.2908, "step": 21262 }, { "epoch": 0.9623444218148902, "grad_norm": 0.6516521315572986, "learning_rate": 3.7227570456461194e-08, "loss": 0.2563, "step": 21263 }, { "epoch": 0.9623896809232858, "grad_norm": 0.6167601726247165, "learning_rate": 3.7138352933528965e-08, "loss": 0.3169, "step": 21264 }, { "epoch": 0.9624349400316814, "grad_norm": 0.6262162319203259, "learning_rate": 3.70492420472518e-08, "loss": 0.2906, "step": 21265 }, { "epoch": 0.962480199140077, "grad_norm": 0.6135124490731227, "learning_rate": 3.6960237799543166e-08, "loss": 0.2909, "step": 21266 }, { "epoch": 0.9625254582484725, "grad_norm": 0.7785797765189498, "learning_rate": 3.6871340192315974e-08, "loss": 0.2873, "step": 21267 }, { "epoch": 0.962570717356868, "grad_norm": 0.6334405969382095, "learning_rate": 3.6782549227481476e-08, "loss": 0.2909, "step": 21268 }, { "epoch": 0.9626159764652636, "grad_norm": 0.5805036307580624, "learning_rate": 3.669386490694593e-08, "loss": 0.268, "step": 21269 }, { "epoch": 0.9626612355736592, "grad_norm": 0.2630489032207125, "learning_rate": 3.6605287232616137e-08, "loss": 0.4674, "step": 21270 }, { "epoch": 0.9627064946820547, "grad_norm": 0.6354005481006761, "learning_rate": 3.651681620639447e-08, "loss": 0.2704, "step": 21271 }, { "epoch": 0.9627517537904503, "grad_norm": 0.6971951264532625, "learning_rate": 3.642845183018273e-08, "loss": 0.2801, "step": 21272 }, { "epoch": 0.9627970128988459, "grad_norm": 0.618233409724753, "learning_rate": 3.63401941058783e-08, "loss": 0.2253, "step": 21273 }, { "epoch": 0.9628422720072415, "grad_norm": 0.2852131682171573, "learning_rate": 3.625204303537855e-08, "loss": 0.4863, "step": 21274 }, { "epoch": 0.9628875311156371, "grad_norm": 0.6274027089335161, "learning_rate": 3.6163998620578065e-08, "loss": 0.3192, "step": 21275 }, { "epoch": 0.9629327902240326, "grad_norm": 0.639063695040162, "learning_rate": 3.6076060863367565e-08, "loss": 0.2792, "step": 21276 }, { "epoch": 0.9629780493324281, "grad_norm": 0.657216145416747, "learning_rate": 3.598822976563665e-08, "loss": 0.3416, "step": 21277 }, { "epoch": 0.9630233084408237, "grad_norm": 0.314988580497501, "learning_rate": 3.5900505329273804e-08, "loss": 0.4811, "step": 21278 }, { "epoch": 0.9630685675492193, "grad_norm": 0.6421338261470828, "learning_rate": 3.581288755616197e-08, "loss": 0.2978, "step": 21279 }, { "epoch": 0.9631138266576148, "grad_norm": 0.6104777277721297, "learning_rate": 3.5725376448185744e-08, "loss": 0.3207, "step": 21280 }, { "epoch": 0.9631590857660104, "grad_norm": 0.25787119378948375, "learning_rate": 3.563797200722363e-08, "loss": 0.4712, "step": 21281 }, { "epoch": 0.963204344874406, "grad_norm": 0.6496668162613294, "learning_rate": 3.555067423515523e-08, "loss": 0.2714, "step": 21282 }, { "epoch": 0.9632496039828016, "grad_norm": 0.5899650319063284, "learning_rate": 3.5463483133855726e-08, "loss": 0.3019, "step": 21283 }, { "epoch": 0.963294863091197, "grad_norm": 0.6227285001305403, "learning_rate": 3.5376398705198603e-08, "loss": 0.2778, "step": 21284 }, { "epoch": 0.9633401221995926, "grad_norm": 0.5848115284332623, "learning_rate": 3.5289420951055145e-08, "loss": 0.2984, "step": 21285 }, { "epoch": 0.9633853813079882, "grad_norm": 0.613686346774837, "learning_rate": 3.5202549873293304e-08, "loss": 0.3182, "step": 21286 }, { "epoch": 0.9634306404163838, "grad_norm": 0.5834137635129636, "learning_rate": 3.5115785473781026e-08, "loss": 0.3004, "step": 21287 }, { "epoch": 0.9634758995247794, "grad_norm": 0.6097829559220829, "learning_rate": 3.502912775438183e-08, "loss": 0.2664, "step": 21288 }, { "epoch": 0.9635211586331749, "grad_norm": 0.25201403422770474, "learning_rate": 3.494257671695811e-08, "loss": 0.4597, "step": 21289 }, { "epoch": 0.9635664177415705, "grad_norm": 0.6905157574890572, "learning_rate": 3.4856132363369485e-08, "loss": 0.3064, "step": 21290 }, { "epoch": 0.9636116768499661, "grad_norm": 1.02299639303271, "learning_rate": 3.476979469547337e-08, "loss": 0.269, "step": 21291 }, { "epoch": 0.9636569359583617, "grad_norm": 0.6560738269000224, "learning_rate": 3.468356371512438e-08, "loss": 0.3031, "step": 21292 }, { "epoch": 0.9637021950667571, "grad_norm": 0.26279930128979606, "learning_rate": 3.459743942417604e-08, "loss": 0.4655, "step": 21293 }, { "epoch": 0.9637474541751527, "grad_norm": 0.6623621867401419, "learning_rate": 3.451142182447908e-08, "loss": 0.2765, "step": 21294 }, { "epoch": 0.9637927132835483, "grad_norm": 0.5857663907388148, "learning_rate": 3.442551091788038e-08, "loss": 0.266, "step": 21295 }, { "epoch": 0.9638379723919439, "grad_norm": 0.6146602373853328, "learning_rate": 3.4339706706227326e-08, "loss": 0.3012, "step": 21296 }, { "epoch": 0.9638832315003394, "grad_norm": 0.5996634569620617, "learning_rate": 3.425400919136346e-08, "loss": 0.3118, "step": 21297 }, { "epoch": 0.963928490608735, "grad_norm": 0.5527183377984016, "learning_rate": 3.416841837512952e-08, "loss": 0.2982, "step": 21298 }, { "epoch": 0.9639737497171306, "grad_norm": 0.6135812886345209, "learning_rate": 3.40829342593646e-08, "loss": 0.2939, "step": 21299 }, { "epoch": 0.9640190088255262, "grad_norm": 0.6100060335767012, "learning_rate": 3.399755684590611e-08, "loss": 0.2768, "step": 21300 }, { "epoch": 0.9640642679339217, "grad_norm": 0.6862427360829833, "learning_rate": 3.39122861365887e-08, "loss": 0.3029, "step": 21301 }, { "epoch": 0.9641095270423172, "grad_norm": 1.103074870883008, "learning_rate": 3.382712213324313e-08, "loss": 0.2559, "step": 21302 }, { "epoch": 0.9641547861507128, "grad_norm": 0.27218569974804424, "learning_rate": 3.374206483770071e-08, "loss": 0.4603, "step": 21303 }, { "epoch": 0.9642000452591084, "grad_norm": 0.2551901000735597, "learning_rate": 3.365711425178886e-08, "loss": 0.4548, "step": 21304 }, { "epoch": 0.964245304367504, "grad_norm": 0.6019272256332595, "learning_rate": 3.357227037733224e-08, "loss": 0.2514, "step": 21305 }, { "epoch": 0.9642905634758995, "grad_norm": 0.5836409045668817, "learning_rate": 3.3487533216154386e-08, "loss": 0.2902, "step": 21306 }, { "epoch": 0.9643358225842951, "grad_norm": 0.3000098505611248, "learning_rate": 3.340290277007607e-08, "loss": 0.4798, "step": 21307 }, { "epoch": 0.9643810816926907, "grad_norm": 0.6196779072403756, "learning_rate": 3.3318379040915284e-08, "loss": 0.3096, "step": 21308 }, { "epoch": 0.9644263408010862, "grad_norm": 0.6049054192400112, "learning_rate": 3.3233962030489453e-08, "loss": 0.2708, "step": 21309 }, { "epoch": 0.9644715999094818, "grad_norm": 0.6443006566661341, "learning_rate": 3.3149651740610464e-08, "loss": 0.299, "step": 21310 }, { "epoch": 0.9645168590178773, "grad_norm": 0.5848403475285998, "learning_rate": 3.3065448173091873e-08, "loss": 0.2754, "step": 21311 }, { "epoch": 0.9645621181262729, "grad_norm": 0.6277945205866138, "learning_rate": 3.298135132974112e-08, "loss": 0.2824, "step": 21312 }, { "epoch": 0.9646073772346685, "grad_norm": 0.6249229890940834, "learning_rate": 3.289736121236675e-08, "loss": 0.3334, "step": 21313 }, { "epoch": 0.9646526363430641, "grad_norm": 0.23627308508275272, "learning_rate": 3.2813477822772885e-08, "loss": 0.4655, "step": 21314 }, { "epoch": 0.9646978954514596, "grad_norm": 0.6215681145266133, "learning_rate": 3.2729701162760865e-08, "loss": 0.293, "step": 21315 }, { "epoch": 0.9647431545598552, "grad_norm": 0.6019029106587596, "learning_rate": 3.264603123413257e-08, "loss": 0.2539, "step": 21316 }, { "epoch": 0.9647884136682507, "grad_norm": 0.7327028825493089, "learning_rate": 3.25624680386849e-08, "loss": 0.298, "step": 21317 }, { "epoch": 0.9648336727766463, "grad_norm": 0.5796198943379359, "learning_rate": 3.247901157821365e-08, "loss": 0.2691, "step": 21318 }, { "epoch": 0.9648789318850418, "grad_norm": 0.628252502714485, "learning_rate": 3.2395661854511264e-08, "loss": 0.3111, "step": 21319 }, { "epoch": 0.9649241909934374, "grad_norm": 0.6548347401051914, "learning_rate": 3.23124188693702e-08, "loss": 0.2912, "step": 21320 }, { "epoch": 0.964969450101833, "grad_norm": 0.6173020789342651, "learning_rate": 3.222928262457736e-08, "loss": 0.2759, "step": 21321 }, { "epoch": 0.9650147092102286, "grad_norm": 0.6195357469478212, "learning_rate": 3.2146253121920215e-08, "loss": 0.3075, "step": 21322 }, { "epoch": 0.9650599683186242, "grad_norm": 0.6253934878655681, "learning_rate": 3.2063330363182323e-08, "loss": 0.2806, "step": 21323 }, { "epoch": 0.9651052274270197, "grad_norm": 0.24519266579212162, "learning_rate": 3.19805143501456e-08, "loss": 0.4556, "step": 21324 }, { "epoch": 0.9651504865354152, "grad_norm": 0.2714398967046984, "learning_rate": 3.1897805084589726e-08, "loss": 0.4621, "step": 21325 }, { "epoch": 0.9651957456438108, "grad_norm": 0.7212628197749601, "learning_rate": 3.1815202568291625e-08, "loss": 0.2697, "step": 21326 }, { "epoch": 0.9652410047522064, "grad_norm": 0.5578222247668211, "learning_rate": 3.173270680302598e-08, "loss": 0.2547, "step": 21327 }, { "epoch": 0.9652862638606019, "grad_norm": 0.6141571513729598, "learning_rate": 3.165031779056582e-08, "loss": 0.321, "step": 21328 }, { "epoch": 0.9653315229689975, "grad_norm": 0.5825078984311327, "learning_rate": 3.156803553268084e-08, "loss": 0.2925, "step": 21329 }, { "epoch": 0.9653767820773931, "grad_norm": 0.5824074331733144, "learning_rate": 3.1485860031140183e-08, "loss": 0.2951, "step": 21330 }, { "epoch": 0.9654220411857887, "grad_norm": 0.26065467366216083, "learning_rate": 3.1403791287707986e-08, "loss": 0.4912, "step": 21331 }, { "epoch": 0.9654673002941841, "grad_norm": 0.6281474671279208, "learning_rate": 3.1321829304148954e-08, "loss": 0.3223, "step": 21332 }, { "epoch": 0.9655125594025797, "grad_norm": 0.6394881447406083, "learning_rate": 3.1239974082223347e-08, "loss": 0.3109, "step": 21333 }, { "epoch": 0.9655578185109753, "grad_norm": 0.6804236092716724, "learning_rate": 3.115822562369086e-08, "loss": 0.2848, "step": 21334 }, { "epoch": 0.9656030776193709, "grad_norm": 0.2707283479344151, "learning_rate": 3.107658393030677e-08, "loss": 0.451, "step": 21335 }, { "epoch": 0.9656483367277665, "grad_norm": 0.6753311154891868, "learning_rate": 3.0995049003826325e-08, "loss": 0.2892, "step": 21336 }, { "epoch": 0.965693595836162, "grad_norm": 0.6298728408666605, "learning_rate": 3.0913620846000916e-08, "loss": 0.3038, "step": 21337 }, { "epoch": 0.9657388549445576, "grad_norm": 0.5729926603889947, "learning_rate": 3.083229945858079e-08, "loss": 0.3137, "step": 21338 }, { "epoch": 0.9657841140529532, "grad_norm": 0.7725364840099211, "learning_rate": 3.075108484331235e-08, "loss": 0.3039, "step": 21339 }, { "epoch": 0.9658293731613488, "grad_norm": 0.6693582053497801, "learning_rate": 3.066997700194197e-08, "loss": 0.2881, "step": 21340 }, { "epoch": 0.9658746322697442, "grad_norm": 0.6188826859538752, "learning_rate": 3.0588975936211017e-08, "loss": 0.2904, "step": 21341 }, { "epoch": 0.9659198913781398, "grad_norm": 0.6369808490204127, "learning_rate": 3.05080816478609e-08, "loss": 0.3559, "step": 21342 }, { "epoch": 0.9659651504865354, "grad_norm": 0.6384628270851854, "learning_rate": 3.042729413862966e-08, "loss": 0.3077, "step": 21343 }, { "epoch": 0.966010409594931, "grad_norm": 0.6037050679353614, "learning_rate": 3.034661341025258e-08, "loss": 0.275, "step": 21344 }, { "epoch": 0.9660556687033266, "grad_norm": 0.576146704475577, "learning_rate": 3.0266039464463823e-08, "loss": 0.2826, "step": 21345 }, { "epoch": 0.9661009278117221, "grad_norm": 0.5736812009031691, "learning_rate": 3.0185572302994795e-08, "loss": 0.3294, "step": 21346 }, { "epoch": 0.9661461869201177, "grad_norm": 0.6388020999905242, "learning_rate": 3.0105211927574096e-08, "loss": 0.2729, "step": 21347 }, { "epoch": 0.9661914460285133, "grad_norm": 0.5807264304566786, "learning_rate": 3.002495833992813e-08, "loss": 0.3027, "step": 21348 }, { "epoch": 0.9662367051369088, "grad_norm": 0.6342121065316381, "learning_rate": 2.994481154178164e-08, "loss": 0.3396, "step": 21349 }, { "epoch": 0.9662819642453043, "grad_norm": 0.6107434989636781, "learning_rate": 2.9864771534857114e-08, "loss": 0.2826, "step": 21350 }, { "epoch": 0.9663272233536999, "grad_norm": 0.6483472713840857, "learning_rate": 2.978483832087431e-08, "loss": 0.3127, "step": 21351 }, { "epoch": 0.9663724824620955, "grad_norm": 0.5739299116426992, "learning_rate": 2.970501190154962e-08, "loss": 0.2856, "step": 21352 }, { "epoch": 0.9664177415704911, "grad_norm": 0.25963244238987054, "learning_rate": 2.9625292278600005e-08, "loss": 0.4672, "step": 21353 }, { "epoch": 0.9664630006788866, "grad_norm": 0.5650708348270717, "learning_rate": 2.9545679453736874e-08, "loss": 0.2673, "step": 21354 }, { "epoch": 0.9665082597872822, "grad_norm": 0.5911926544516285, "learning_rate": 2.9466173428672197e-08, "loss": 0.2856, "step": 21355 }, { "epoch": 0.9665535188956778, "grad_norm": 0.6348876111669358, "learning_rate": 2.9386774205112934e-08, "loss": 0.3381, "step": 21356 }, { "epoch": 0.9665987780040733, "grad_norm": 0.5792450882193889, "learning_rate": 2.9307481784766057e-08, "loss": 0.2879, "step": 21357 }, { "epoch": 0.9666440371124689, "grad_norm": 0.6526299208429099, "learning_rate": 2.92282961693352e-08, "loss": 0.3035, "step": 21358 }, { "epoch": 0.9666892962208644, "grad_norm": 0.5981744444686902, "learning_rate": 2.9149217360521788e-08, "loss": 0.3048, "step": 21359 }, { "epoch": 0.96673455532926, "grad_norm": 0.6631230697732848, "learning_rate": 2.907024536002501e-08, "loss": 0.2464, "step": 21360 }, { "epoch": 0.9667798144376556, "grad_norm": 0.6415576068812007, "learning_rate": 2.8991380169541284e-08, "loss": 0.3114, "step": 21361 }, { "epoch": 0.9668250735460512, "grad_norm": 0.6141966263098978, "learning_rate": 2.8912621790765373e-08, "loss": 0.2719, "step": 21362 }, { "epoch": 0.9668703326544467, "grad_norm": 0.6159770387398168, "learning_rate": 2.883397022538981e-08, "loss": 0.3383, "step": 21363 }, { "epoch": 0.9669155917628423, "grad_norm": 0.6686155973457726, "learning_rate": 2.8755425475104904e-08, "loss": 0.2737, "step": 21364 }, { "epoch": 0.9669608508712378, "grad_norm": 0.3101889005652721, "learning_rate": 2.8676987541597646e-08, "loss": 0.4554, "step": 21365 }, { "epoch": 0.9670061099796334, "grad_norm": 0.2625387624468325, "learning_rate": 2.859865642655335e-08, "loss": 0.4551, "step": 21366 }, { "epoch": 0.9670513690880289, "grad_norm": 0.6784526673158491, "learning_rate": 2.8520432131655673e-08, "loss": 0.2948, "step": 21367 }, { "epoch": 0.9670966281964245, "grad_norm": 0.7018253409325259, "learning_rate": 2.8442314658584936e-08, "loss": 0.2791, "step": 21368 }, { "epoch": 0.9671418873048201, "grad_norm": 0.6030150366424402, "learning_rate": 2.8364304009020348e-08, "loss": 0.2803, "step": 21369 }, { "epoch": 0.9671871464132157, "grad_norm": 0.5687094575670066, "learning_rate": 2.8286400184637242e-08, "loss": 0.3103, "step": 21370 }, { "epoch": 0.9672324055216113, "grad_norm": 0.604944121680553, "learning_rate": 2.820860318710983e-08, "loss": 0.269, "step": 21371 }, { "epoch": 0.9672776646300068, "grad_norm": 0.6127066407259337, "learning_rate": 2.813091301811066e-08, "loss": 0.3115, "step": 21372 }, { "epoch": 0.9673229237384023, "grad_norm": 0.5783694521108265, "learning_rate": 2.8053329679307293e-08, "loss": 0.2924, "step": 21373 }, { "epoch": 0.9673681828467979, "grad_norm": 0.28195681477137985, "learning_rate": 2.797585317236784e-08, "loss": 0.4593, "step": 21374 }, { "epoch": 0.9674134419551935, "grad_norm": 0.6422170672348851, "learning_rate": 2.789848349895763e-08, "loss": 0.27, "step": 21375 }, { "epoch": 0.967458701063589, "grad_norm": 0.5907711761183103, "learning_rate": 2.782122066073756e-08, "loss": 0.2965, "step": 21376 }, { "epoch": 0.9675039601719846, "grad_norm": 0.2995926429078034, "learning_rate": 2.7744064659369073e-08, "loss": 0.4785, "step": 21377 }, { "epoch": 0.9675492192803802, "grad_norm": 0.8353705596359702, "learning_rate": 2.7667015496509187e-08, "loss": 0.3488, "step": 21378 }, { "epoch": 0.9675944783887758, "grad_norm": 0.6500850117419013, "learning_rate": 2.7590073173813792e-08, "loss": 0.3093, "step": 21379 }, { "epoch": 0.9676397374971714, "grad_norm": 0.2531423524179933, "learning_rate": 2.7513237692936567e-08, "loss": 0.4659, "step": 21380 }, { "epoch": 0.9676849966055668, "grad_norm": 0.649250415015185, "learning_rate": 2.743650905552786e-08, "loss": 0.2858, "step": 21381 }, { "epoch": 0.9677302557139624, "grad_norm": 0.24597576493641854, "learning_rate": 2.7359887263236352e-08, "loss": 0.436, "step": 21382 }, { "epoch": 0.967775514822358, "grad_norm": 0.23698909783832273, "learning_rate": 2.7283372317708502e-08, "loss": 0.4512, "step": 21383 }, { "epoch": 0.9678207739307536, "grad_norm": 0.5635119405187751, "learning_rate": 2.720696422058855e-08, "loss": 0.3041, "step": 21384 }, { "epoch": 0.9678660330391491, "grad_norm": 0.6416710534046843, "learning_rate": 2.713066297351852e-08, "loss": 0.3106, "step": 21385 }, { "epoch": 0.9679112921475447, "grad_norm": 0.6454105343058075, "learning_rate": 2.7054468578137093e-08, "loss": 0.2724, "step": 21386 }, { "epoch": 0.9679565512559403, "grad_norm": 0.8120809368879789, "learning_rate": 2.6978381036081857e-08, "loss": 0.2513, "step": 21387 }, { "epoch": 0.9680018103643359, "grad_norm": 0.5913980149953101, "learning_rate": 2.6902400348987613e-08, "loss": 0.284, "step": 21388 }, { "epoch": 0.9680470694727313, "grad_norm": 0.2593861442090093, "learning_rate": 2.6826526518487496e-08, "loss": 0.4646, "step": 21389 }, { "epoch": 0.9680923285811269, "grad_norm": 0.5799799435681116, "learning_rate": 2.6750759546211312e-08, "loss": 0.2709, "step": 21390 }, { "epoch": 0.9681375876895225, "grad_norm": 0.8052886730121234, "learning_rate": 2.6675099433787212e-08, "loss": 0.2577, "step": 21391 }, { "epoch": 0.9681828467979181, "grad_norm": 0.6162388408613758, "learning_rate": 2.6599546182840553e-08, "loss": 0.2765, "step": 21392 }, { "epoch": 0.9682281059063137, "grad_norm": 0.6170886181395424, "learning_rate": 2.652409979499504e-08, "loss": 0.3015, "step": 21393 }, { "epoch": 0.9682733650147092, "grad_norm": 0.25442359485009247, "learning_rate": 2.6448760271872152e-08, "loss": 0.4623, "step": 21394 }, { "epoch": 0.9683186241231048, "grad_norm": 0.2603122844717185, "learning_rate": 2.6373527615090044e-08, "loss": 0.4517, "step": 21395 }, { "epoch": 0.9683638832315004, "grad_norm": 0.603507615130539, "learning_rate": 2.6298401826265195e-08, "loss": 0.2465, "step": 21396 }, { "epoch": 0.968409142339896, "grad_norm": 0.5992182971411182, "learning_rate": 2.6223382907012428e-08, "loss": 0.3009, "step": 21397 }, { "epoch": 0.9684544014482914, "grad_norm": 0.6311970985757361, "learning_rate": 2.6148470858943787e-08, "loss": 0.2966, "step": 21398 }, { "epoch": 0.968499660556687, "grad_norm": 0.6006286490801301, "learning_rate": 2.607366568366798e-08, "loss": 0.2755, "step": 21399 }, { "epoch": 0.9685449196650826, "grad_norm": 0.6708687272743549, "learning_rate": 2.5998967382792618e-08, "loss": 0.3063, "step": 21400 }, { "epoch": 0.9685901787734782, "grad_norm": 0.5393526089892805, "learning_rate": 2.592437595792363e-08, "loss": 0.2948, "step": 21401 }, { "epoch": 0.9686354378818737, "grad_norm": 0.5912557610649447, "learning_rate": 2.584989141066252e-08, "loss": 0.2808, "step": 21402 }, { "epoch": 0.9686806969902693, "grad_norm": 0.6030725729127032, "learning_rate": 2.577551374261078e-08, "loss": 0.2584, "step": 21403 }, { "epoch": 0.9687259560986649, "grad_norm": 0.6478863741545718, "learning_rate": 2.5701242955365468e-08, "loss": 0.2946, "step": 21404 }, { "epoch": 0.9687712152070604, "grad_norm": 0.6117868458022317, "learning_rate": 2.562707905052364e-08, "loss": 0.2576, "step": 21405 }, { "epoch": 0.968816474315456, "grad_norm": 0.2594879696804052, "learning_rate": 2.555302202967791e-08, "loss": 0.4557, "step": 21406 }, { "epoch": 0.9688617334238515, "grad_norm": 0.2998048691048826, "learning_rate": 2.5479071894420337e-08, "loss": 0.4701, "step": 21407 }, { "epoch": 0.9689069925322471, "grad_norm": 0.6665315299044354, "learning_rate": 2.5405228646339096e-08, "loss": 0.3045, "step": 21408 }, { "epoch": 0.9689522516406427, "grad_norm": 0.6795854446055811, "learning_rate": 2.5331492287021252e-08, "loss": 0.3092, "step": 21409 }, { "epoch": 0.9689975107490383, "grad_norm": 0.6512646157878369, "learning_rate": 2.5257862818051092e-08, "loss": 0.2733, "step": 21410 }, { "epoch": 0.9690427698574338, "grad_norm": 0.2645611035802779, "learning_rate": 2.5184340241010687e-08, "loss": 0.4437, "step": 21411 }, { "epoch": 0.9690880289658294, "grad_norm": 0.24962282761171117, "learning_rate": 2.511092455747932e-08, "loss": 0.4545, "step": 21412 }, { "epoch": 0.9691332880742249, "grad_norm": 0.5633033273133744, "learning_rate": 2.503761576903574e-08, "loss": 0.2791, "step": 21413 }, { "epoch": 0.9691785471826205, "grad_norm": 0.24964261861415946, "learning_rate": 2.4964413877254233e-08, "loss": 0.4514, "step": 21414 }, { "epoch": 0.9692238062910161, "grad_norm": 0.8924893770088967, "learning_rate": 2.489131888370744e-08, "loss": 0.2838, "step": 21415 }, { "epoch": 0.9692690653994116, "grad_norm": 0.6368048568730261, "learning_rate": 2.4818330789966872e-08, "loss": 0.2717, "step": 21416 }, { "epoch": 0.9693143245078072, "grad_norm": 0.25813157938113807, "learning_rate": 2.474544959760017e-08, "loss": 0.4867, "step": 21417 }, { "epoch": 0.9693595836162028, "grad_norm": 0.6155721713424308, "learning_rate": 2.4672675308173298e-08, "loss": 0.3078, "step": 21418 }, { "epoch": 0.9694048427245984, "grad_norm": 0.5923410351217169, "learning_rate": 2.460000792324946e-08, "loss": 0.2826, "step": 21419 }, { "epoch": 0.9694501018329938, "grad_norm": 0.6407965605028224, "learning_rate": 2.4527447444391838e-08, "loss": 0.3003, "step": 21420 }, { "epoch": 0.9694953609413894, "grad_norm": 0.6538379741907768, "learning_rate": 2.445499387315753e-08, "loss": 0.3277, "step": 21421 }, { "epoch": 0.969540620049785, "grad_norm": 0.6457363395930453, "learning_rate": 2.4382647211104173e-08, "loss": 0.3262, "step": 21422 }, { "epoch": 0.9695858791581806, "grad_norm": 0.6266991708334412, "learning_rate": 2.4310407459786634e-08, "loss": 0.2973, "step": 21423 }, { "epoch": 0.9696311382665761, "grad_norm": 0.26378314561444793, "learning_rate": 2.423827462075701e-08, "loss": 0.471, "step": 21424 }, { "epoch": 0.9696763973749717, "grad_norm": 0.5846522553477222, "learning_rate": 2.416624869556461e-08, "loss": 0.3047, "step": 21425 }, { "epoch": 0.9697216564833673, "grad_norm": 0.5748228073096529, "learning_rate": 2.409432968575709e-08, "loss": 0.2762, "step": 21426 }, { "epoch": 0.9697669155917629, "grad_norm": 0.5776693574561639, "learning_rate": 2.402251759288099e-08, "loss": 0.3131, "step": 21427 }, { "epoch": 0.9698121747001585, "grad_norm": 0.6249029756593694, "learning_rate": 2.3950812418477852e-08, "loss": 0.2992, "step": 21428 }, { "epoch": 0.9698574338085539, "grad_norm": 1.7975295639136513, "learning_rate": 2.3879214164088672e-08, "loss": 0.2795, "step": 21429 }, { "epoch": 0.9699026929169495, "grad_norm": 0.34538818894279005, "learning_rate": 2.3807722831252768e-08, "loss": 0.4724, "step": 21430 }, { "epoch": 0.9699479520253451, "grad_norm": 0.6697070486805683, "learning_rate": 2.3736338421505578e-08, "loss": 0.3004, "step": 21431 }, { "epoch": 0.9699932111337407, "grad_norm": 0.26052467045218836, "learning_rate": 2.366506093638088e-08, "loss": 0.4629, "step": 21432 }, { "epoch": 0.9700384702421362, "grad_norm": 0.6098721901226468, "learning_rate": 2.359389037741022e-08, "loss": 0.3041, "step": 21433 }, { "epoch": 0.9700837293505318, "grad_norm": 0.6389878370521055, "learning_rate": 2.3522826746123496e-08, "loss": 0.2828, "step": 21434 }, { "epoch": 0.9701289884589274, "grad_norm": 0.6152278199424831, "learning_rate": 2.3451870044046698e-08, "loss": 0.2912, "step": 21435 }, { "epoch": 0.970174247567323, "grad_norm": 0.25780271796413584, "learning_rate": 2.338102027270528e-08, "loss": 0.4857, "step": 21436 }, { "epoch": 0.9702195066757184, "grad_norm": 0.6092924189798018, "learning_rate": 2.33102774336208e-08, "loss": 0.2852, "step": 21437 }, { "epoch": 0.970264765784114, "grad_norm": 0.5870916846508486, "learning_rate": 2.323964152831426e-08, "loss": 0.3193, "step": 21438 }, { "epoch": 0.9703100248925096, "grad_norm": 0.5496351854261411, "learning_rate": 2.3169112558302232e-08, "loss": 0.3011, "step": 21439 }, { "epoch": 0.9703552840009052, "grad_norm": 0.5707888549972117, "learning_rate": 2.3098690525101275e-08, "loss": 0.2424, "step": 21440 }, { "epoch": 0.9704005431093008, "grad_norm": 0.5956520405613818, "learning_rate": 2.302837543022407e-08, "loss": 0.2863, "step": 21441 }, { "epoch": 0.9704458022176963, "grad_norm": 0.6552895738865977, "learning_rate": 2.2958167275181076e-08, "loss": 0.2945, "step": 21442 }, { "epoch": 0.9704910613260919, "grad_norm": 0.5961285069005348, "learning_rate": 2.288806606148164e-08, "loss": 0.2832, "step": 21443 }, { "epoch": 0.9705363204344875, "grad_norm": 0.6151196913769361, "learning_rate": 2.281807179063178e-08, "loss": 0.3379, "step": 21444 }, { "epoch": 0.970581579542883, "grad_norm": 0.7587253293887296, "learning_rate": 2.2748184464134736e-08, "loss": 0.2987, "step": 21445 }, { "epoch": 0.9706268386512785, "grad_norm": 0.6081690588146057, "learning_rate": 2.26784040834932e-08, "loss": 0.3182, "step": 21446 }, { "epoch": 0.9706720977596741, "grad_norm": 0.6881531310815816, "learning_rate": 2.2608730650205966e-08, "loss": 0.278, "step": 21447 }, { "epoch": 0.9707173568680697, "grad_norm": 0.642456803386314, "learning_rate": 2.2539164165770178e-08, "loss": 0.2851, "step": 21448 }, { "epoch": 0.9707626159764653, "grad_norm": 0.6942711063770738, "learning_rate": 2.2469704631680743e-08, "loss": 0.2962, "step": 21449 }, { "epoch": 0.9708078750848609, "grad_norm": 0.24719462371086962, "learning_rate": 2.2400352049429807e-08, "loss": 0.4807, "step": 21450 }, { "epoch": 0.9708531341932564, "grad_norm": 0.7658959323580632, "learning_rate": 2.2331106420507843e-08, "loss": 0.2831, "step": 21451 }, { "epoch": 0.970898393301652, "grad_norm": 0.6173714523200496, "learning_rate": 2.2261967746402545e-08, "loss": 0.3444, "step": 21452 }, { "epoch": 0.9709436524100475, "grad_norm": 0.638420072583342, "learning_rate": 2.2192936028599953e-08, "loss": 0.2827, "step": 21453 }, { "epoch": 0.9709889115184431, "grad_norm": 0.5560272357588397, "learning_rate": 2.212401126858277e-08, "loss": 0.2824, "step": 21454 }, { "epoch": 0.9710341706268386, "grad_norm": 0.5624028101250275, "learning_rate": 2.2055193467832582e-08, "loss": 0.2749, "step": 21455 }, { "epoch": 0.9710794297352342, "grad_norm": 0.6437701220694999, "learning_rate": 2.1986482627827098e-08, "loss": 0.3075, "step": 21456 }, { "epoch": 0.9711246888436298, "grad_norm": 0.6032683639248221, "learning_rate": 2.1917878750043475e-08, "loss": 0.2736, "step": 21457 }, { "epoch": 0.9711699479520254, "grad_norm": 0.6148573920082251, "learning_rate": 2.1849381835956084e-08, "loss": 0.3054, "step": 21458 }, { "epoch": 0.9712152070604209, "grad_norm": 0.5825952684250644, "learning_rate": 2.1780991887035973e-08, "loss": 0.3534, "step": 21459 }, { "epoch": 0.9712604661688164, "grad_norm": 0.87701726721472, "learning_rate": 2.1712708904752522e-08, "loss": 0.3276, "step": 21460 }, { "epoch": 0.971305725277212, "grad_norm": 0.6016188627438112, "learning_rate": 2.1644532890573444e-08, "loss": 0.2737, "step": 21461 }, { "epoch": 0.9713509843856076, "grad_norm": 0.6370604104920156, "learning_rate": 2.1576463845964236e-08, "loss": 0.2692, "step": 21462 }, { "epoch": 0.9713962434940032, "grad_norm": 0.56852633621876, "learning_rate": 2.150850177238595e-08, "loss": 0.3225, "step": 21463 }, { "epoch": 0.9714415026023987, "grad_norm": 0.6540170279504566, "learning_rate": 2.1440646671300193e-08, "loss": 0.3534, "step": 21464 }, { "epoch": 0.9714867617107943, "grad_norm": 0.6270797679263911, "learning_rate": 2.1372898544164134e-08, "loss": 0.2592, "step": 21465 }, { "epoch": 0.9715320208191899, "grad_norm": 0.6053132155608527, "learning_rate": 2.1305257392433832e-08, "loss": 0.3286, "step": 21466 }, { "epoch": 0.9715772799275855, "grad_norm": 0.5849943563276676, "learning_rate": 2.1237723217562566e-08, "loss": 0.2471, "step": 21467 }, { "epoch": 0.971622539035981, "grad_norm": 0.9313910065181935, "learning_rate": 2.1170296021001956e-08, "loss": 0.3247, "step": 21468 }, { "epoch": 0.9716677981443765, "grad_norm": 0.6476378321677396, "learning_rate": 2.1102975804200287e-08, "loss": 0.2969, "step": 21469 }, { "epoch": 0.9717130572527721, "grad_norm": 0.6802598270516329, "learning_rate": 2.1035762568603623e-08, "loss": 0.3016, "step": 21470 }, { "epoch": 0.9717583163611677, "grad_norm": 0.25566338750406215, "learning_rate": 2.096865631565692e-08, "loss": 0.437, "step": 21471 }, { "epoch": 0.9718035754695632, "grad_norm": 0.6144524105325259, "learning_rate": 2.090165704680236e-08, "loss": 0.2753, "step": 21472 }, { "epoch": 0.9718488345779588, "grad_norm": 0.5939623725491535, "learning_rate": 2.083476476347823e-08, "loss": 0.3103, "step": 21473 }, { "epoch": 0.9718940936863544, "grad_norm": 0.6245506587877645, "learning_rate": 2.076797946712339e-08, "loss": 0.299, "step": 21474 }, { "epoch": 0.97193935279475, "grad_norm": 0.6063673889028894, "learning_rate": 2.0701301159171683e-08, "loss": 0.2893, "step": 21475 }, { "epoch": 0.9719846119031456, "grad_norm": 0.6002761281196382, "learning_rate": 2.0634729841056966e-08, "loss": 0.2745, "step": 21476 }, { "epoch": 0.972029871011541, "grad_norm": 0.6330634611823746, "learning_rate": 2.0568265514208097e-08, "loss": 0.2948, "step": 21477 }, { "epoch": 0.9720751301199366, "grad_norm": 0.5653621329054418, "learning_rate": 2.0501908180054486e-08, "loss": 0.2946, "step": 21478 }, { "epoch": 0.9721203892283322, "grad_norm": 0.6638168852132872, "learning_rate": 2.0435657840021104e-08, "loss": 0.2776, "step": 21479 }, { "epoch": 0.9721656483367278, "grad_norm": 0.5561328122632877, "learning_rate": 2.0369514495532373e-08, "loss": 0.2782, "step": 21480 }, { "epoch": 0.9722109074451233, "grad_norm": 0.24582704186734805, "learning_rate": 2.0303478148008813e-08, "loss": 0.4444, "step": 21481 }, { "epoch": 0.9722561665535189, "grad_norm": 0.6173504157082066, "learning_rate": 2.02375487988693e-08, "loss": 0.3019, "step": 21482 }, { "epoch": 0.9723014256619145, "grad_norm": 0.6212625353881241, "learning_rate": 2.0171726449531025e-08, "loss": 0.3275, "step": 21483 }, { "epoch": 0.97234668477031, "grad_norm": 0.6496282193749826, "learning_rate": 2.010601110140786e-08, "loss": 0.3348, "step": 21484 }, { "epoch": 0.9723919438787056, "grad_norm": 0.5653231467934803, "learning_rate": 2.0040402755912013e-08, "loss": 0.461, "step": 21485 }, { "epoch": 0.9724372029871011, "grad_norm": 0.605088017779233, "learning_rate": 1.9974901414452907e-08, "loss": 0.2806, "step": 21486 }, { "epoch": 0.9724824620954967, "grad_norm": 0.6410627323825772, "learning_rate": 1.9909507078438307e-08, "loss": 0.2612, "step": 21487 }, { "epoch": 0.9725277212038923, "grad_norm": 0.2572270975628381, "learning_rate": 1.984421974927375e-08, "loss": 0.481, "step": 21488 }, { "epoch": 0.9725729803122879, "grad_norm": 0.5717832368892245, "learning_rate": 1.9779039428360904e-08, "loss": 0.277, "step": 21489 }, { "epoch": 0.9726182394206834, "grad_norm": 0.5836692238315675, "learning_rate": 1.971396611710086e-08, "loss": 0.2534, "step": 21490 }, { "epoch": 0.972663498529079, "grad_norm": 0.6329943429107763, "learning_rate": 1.9648999816891944e-08, "loss": 0.2539, "step": 21491 }, { "epoch": 0.9727087576374746, "grad_norm": 0.6262710039347652, "learning_rate": 1.958414052913027e-08, "loss": 0.2916, "step": 21492 }, { "epoch": 0.9727540167458701, "grad_norm": 0.6340901437998767, "learning_rate": 1.951938825520916e-08, "loss": 0.3064, "step": 21493 }, { "epoch": 0.9727992758542656, "grad_norm": 0.613620906601682, "learning_rate": 1.9454742996519726e-08, "loss": 0.3294, "step": 21494 }, { "epoch": 0.9728445349626612, "grad_norm": 0.5792752864447169, "learning_rate": 1.9390204754451967e-08, "loss": 0.2749, "step": 21495 }, { "epoch": 0.9728897940710568, "grad_norm": 0.26855686320942884, "learning_rate": 1.9325773530391446e-08, "loss": 0.4725, "step": 21496 }, { "epoch": 0.9729350531794524, "grad_norm": 0.5983834047394934, "learning_rate": 1.926144932572316e-08, "loss": 0.277, "step": 21497 }, { "epoch": 0.972980312287848, "grad_norm": 0.7360701247702012, "learning_rate": 1.9197232141829335e-08, "loss": 0.2616, "step": 21498 }, { "epoch": 0.9730255713962435, "grad_norm": 0.6197883921720705, "learning_rate": 1.913312198008943e-08, "loss": 0.3041, "step": 21499 }, { "epoch": 0.973070830504639, "grad_norm": 0.5844609404817493, "learning_rate": 1.9069118841881228e-08, "loss": 0.2714, "step": 21500 }, { "epoch": 0.9731160896130346, "grad_norm": 0.5603732149038518, "learning_rate": 1.9005222728579742e-08, "loss": 0.3048, "step": 21501 }, { "epoch": 0.9731613487214302, "grad_norm": 0.6533007099497831, "learning_rate": 1.8941433641558315e-08, "loss": 0.2807, "step": 21502 }, { "epoch": 0.9732066078298257, "grad_norm": 0.6925200042670877, "learning_rate": 1.8877751582186966e-08, "loss": 0.2856, "step": 21503 }, { "epoch": 0.9732518669382213, "grad_norm": 0.6110740454862127, "learning_rate": 1.8814176551834595e-08, "loss": 0.3288, "step": 21504 }, { "epoch": 0.9732971260466169, "grad_norm": 0.6500400906628822, "learning_rate": 1.8750708551867336e-08, "loss": 0.3062, "step": 21505 }, { "epoch": 0.9733423851550125, "grad_norm": 0.38635779551762117, "learning_rate": 1.8687347583647985e-08, "loss": 0.4977, "step": 21506 }, { "epoch": 0.973387644263408, "grad_norm": 0.6038331706183906, "learning_rate": 1.8624093648539344e-08, "loss": 0.2942, "step": 21507 }, { "epoch": 0.9734329033718035, "grad_norm": 0.6262687242978424, "learning_rate": 1.856094674789921e-08, "loss": 0.2343, "step": 21508 }, { "epoch": 0.9734781624801991, "grad_norm": 0.6102937548707332, "learning_rate": 1.8497906883085394e-08, "loss": 0.3216, "step": 21509 }, { "epoch": 0.9735234215885947, "grad_norm": 0.5662823014312907, "learning_rate": 1.8434974055451248e-08, "loss": 0.4984, "step": 21510 }, { "epoch": 0.9735686806969903, "grad_norm": 0.6200777181397742, "learning_rate": 1.8372148266350696e-08, "loss": 0.2854, "step": 21511 }, { "epoch": 0.9736139398053858, "grad_norm": 0.6630690064524812, "learning_rate": 1.830942951713266e-08, "loss": 0.2692, "step": 21512 }, { "epoch": 0.9736591989137814, "grad_norm": 0.6760977205002116, "learning_rate": 1.8246817809144392e-08, "loss": 0.3083, "step": 21513 }, { "epoch": 0.973704458022177, "grad_norm": 0.6257431106132486, "learning_rate": 1.8184313143732035e-08, "loss": 0.3112, "step": 21514 }, { "epoch": 0.9737497171305726, "grad_norm": 0.5883702422264766, "learning_rate": 1.812191552223841e-08, "loss": 0.3552, "step": 21515 }, { "epoch": 0.973794976238968, "grad_norm": 0.610938178761219, "learning_rate": 1.8059624946004105e-08, "loss": 0.2976, "step": 21516 }, { "epoch": 0.9738402353473636, "grad_norm": 0.6088542630214259, "learning_rate": 1.79974414163675e-08, "loss": 0.3352, "step": 21517 }, { "epoch": 0.9738854944557592, "grad_norm": 0.23932636750915598, "learning_rate": 1.7935364934664744e-08, "loss": 0.4551, "step": 21518 }, { "epoch": 0.9739307535641548, "grad_norm": 0.6661497356134606, "learning_rate": 1.7873395502229774e-08, "loss": 0.3013, "step": 21519 }, { "epoch": 0.9739760126725503, "grad_norm": 0.5806129832544668, "learning_rate": 1.7811533120394296e-08, "loss": 0.256, "step": 21520 }, { "epoch": 0.9740212717809459, "grad_norm": 0.28544530450591754, "learning_rate": 1.7749777790487256e-08, "loss": 0.4692, "step": 21521 }, { "epoch": 0.9740665308893415, "grad_norm": 0.5703201834714856, "learning_rate": 1.7688129513835915e-08, "loss": 0.3194, "step": 21522 }, { "epoch": 0.9741117899977371, "grad_norm": 0.5872381541669788, "learning_rate": 1.7626588291764225e-08, "loss": 0.2476, "step": 21523 }, { "epoch": 0.9741570491061327, "grad_norm": 0.5319838055635173, "learning_rate": 1.7565154125595006e-08, "loss": 0.2815, "step": 21524 }, { "epoch": 0.9742023082145281, "grad_norm": 0.28202626818847565, "learning_rate": 1.7503827016648876e-08, "loss": 0.4754, "step": 21525 }, { "epoch": 0.9742475673229237, "grad_norm": 0.6586370358676067, "learning_rate": 1.7442606966242005e-08, "loss": 0.2987, "step": 21526 }, { "epoch": 0.9742928264313193, "grad_norm": 0.2766401223790382, "learning_rate": 1.7381493975691667e-08, "loss": 0.4714, "step": 21527 }, { "epoch": 0.9743380855397149, "grad_norm": 0.6413995570584645, "learning_rate": 1.7320488046309593e-08, "loss": 0.2988, "step": 21528 }, { "epoch": 0.9743833446481104, "grad_norm": 0.6030670799447602, "learning_rate": 1.7259589179406953e-08, "loss": 0.3021, "step": 21529 }, { "epoch": 0.974428603756506, "grad_norm": 0.8385567228907049, "learning_rate": 1.7198797376292708e-08, "loss": 0.2757, "step": 21530 }, { "epoch": 0.9744738628649016, "grad_norm": 0.6998410794379304, "learning_rate": 1.7138112638272476e-08, "loss": 0.3029, "step": 21531 }, { "epoch": 0.9745191219732972, "grad_norm": 0.668868767753376, "learning_rate": 1.7077534966650767e-08, "loss": 0.3045, "step": 21532 }, { "epoch": 0.9745643810816927, "grad_norm": 0.6094772471471711, "learning_rate": 1.7017064362728764e-08, "loss": 0.2379, "step": 21533 }, { "epoch": 0.9746096401900882, "grad_norm": 0.6392838917586049, "learning_rate": 1.6956700827806538e-08, "loss": 0.2848, "step": 21534 }, { "epoch": 0.9746548992984838, "grad_norm": 0.7005772112789435, "learning_rate": 1.689644436317972e-08, "loss": 0.3148, "step": 21535 }, { "epoch": 0.9747001584068794, "grad_norm": 0.6039779318860282, "learning_rate": 1.6836294970144495e-08, "loss": 0.2571, "step": 21536 }, { "epoch": 0.974745417515275, "grad_norm": 0.6087022627637411, "learning_rate": 1.6776252649992608e-08, "loss": 0.2869, "step": 21537 }, { "epoch": 0.9747906766236705, "grad_norm": 0.6536183340738748, "learning_rate": 1.6716317404014136e-08, "loss": 0.276, "step": 21538 }, { "epoch": 0.9748359357320661, "grad_norm": 0.5890982188008039, "learning_rate": 1.665648923349694e-08, "loss": 0.2726, "step": 21539 }, { "epoch": 0.9748811948404617, "grad_norm": 0.6485504916729523, "learning_rate": 1.659676813972666e-08, "loss": 0.3057, "step": 21540 }, { "epoch": 0.9749264539488572, "grad_norm": 0.6579237092730789, "learning_rate": 1.6537154123986156e-08, "loss": 0.3385, "step": 21541 }, { "epoch": 0.9749717130572527, "grad_norm": 0.69823033168968, "learning_rate": 1.647764718755718e-08, "loss": 0.2696, "step": 21542 }, { "epoch": 0.9750169721656483, "grad_norm": 0.5525465895809587, "learning_rate": 1.641824733171815e-08, "loss": 0.3085, "step": 21543 }, { "epoch": 0.9750622312740439, "grad_norm": 0.5679357769200283, "learning_rate": 1.6358954557744166e-08, "loss": 0.3365, "step": 21544 }, { "epoch": 0.9751074903824395, "grad_norm": 0.250665352604849, "learning_rate": 1.629976886691087e-08, "loss": 0.4753, "step": 21545 }, { "epoch": 0.9751527494908351, "grad_norm": 0.5955064802272411, "learning_rate": 1.6240690260488913e-08, "loss": 0.3078, "step": 21546 }, { "epoch": 0.9751980085992306, "grad_norm": 0.608417079637003, "learning_rate": 1.6181718739748388e-08, "loss": 0.3097, "step": 21547 }, { "epoch": 0.9752432677076261, "grad_norm": 0.6143087542971571, "learning_rate": 1.6122854305955505e-08, "loss": 0.2917, "step": 21548 }, { "epoch": 0.9752885268160217, "grad_norm": 0.2602212956379094, "learning_rate": 1.6064096960376476e-08, "loss": 0.4708, "step": 21549 }, { "epoch": 0.9753337859244173, "grad_norm": 0.6126645269940824, "learning_rate": 1.600544670427251e-08, "loss": 0.331, "step": 21550 }, { "epoch": 0.9753790450328128, "grad_norm": 0.6449883399574192, "learning_rate": 1.5946903538904267e-08, "loss": 0.2803, "step": 21551 }, { "epoch": 0.9754243041412084, "grad_norm": 0.6410152600174044, "learning_rate": 1.5888467465529632e-08, "loss": 0.2897, "step": 21552 }, { "epoch": 0.975469563249604, "grad_norm": 1.1902445501114625, "learning_rate": 1.583013848540482e-08, "loss": 0.3022, "step": 21553 }, { "epoch": 0.9755148223579996, "grad_norm": 0.562064160439384, "learning_rate": 1.5771916599782167e-08, "loss": 0.2639, "step": 21554 }, { "epoch": 0.9755600814663951, "grad_norm": 0.5666439513306696, "learning_rate": 1.5713801809913443e-08, "loss": 0.2595, "step": 21555 }, { "epoch": 0.9756053405747906, "grad_norm": 0.6185233159292626, "learning_rate": 1.5655794117047097e-08, "loss": 0.253, "step": 21556 }, { "epoch": 0.9756505996831862, "grad_norm": 0.6169772789722926, "learning_rate": 1.5597893522428796e-08, "loss": 0.304, "step": 21557 }, { "epoch": 0.9756958587915818, "grad_norm": 0.27782104133037505, "learning_rate": 1.5540100027304217e-08, "loss": 0.4642, "step": 21558 }, { "epoch": 0.9757411178999774, "grad_norm": 0.2715187409490002, "learning_rate": 1.5482413632914028e-08, "loss": 0.4763, "step": 21559 }, { "epoch": 0.9757863770083729, "grad_norm": 0.5957365122967457, "learning_rate": 1.5424834340497796e-08, "loss": 0.3033, "step": 21560 }, { "epoch": 0.9758316361167685, "grad_norm": 0.6438850183819776, "learning_rate": 1.5367362151292863e-08, "loss": 0.3173, "step": 21561 }, { "epoch": 0.9758768952251641, "grad_norm": 0.8646528602373577, "learning_rate": 1.5309997066534354e-08, "loss": 0.2965, "step": 21562 }, { "epoch": 0.9759221543335597, "grad_norm": 0.6283703457696358, "learning_rate": 1.5252739087454617e-08, "loss": 0.3036, "step": 21563 }, { "epoch": 0.9759674134419551, "grad_norm": 0.5739317342776467, "learning_rate": 1.5195588215283773e-08, "loss": 0.2437, "step": 21564 }, { "epoch": 0.9760126725503507, "grad_norm": 0.5952634209800168, "learning_rate": 1.5138544451250292e-08, "loss": 0.3071, "step": 21565 }, { "epoch": 0.9760579316587463, "grad_norm": 0.6403235870149089, "learning_rate": 1.5081607796579856e-08, "loss": 0.3059, "step": 21566 }, { "epoch": 0.9761031907671419, "grad_norm": 0.27436692115992706, "learning_rate": 1.502477825249593e-08, "loss": 0.4823, "step": 21567 }, { "epoch": 0.9761484498755375, "grad_norm": 0.6302113621127572, "learning_rate": 1.4968055820218653e-08, "loss": 0.2929, "step": 21568 }, { "epoch": 0.976193708983933, "grad_norm": 0.6228658000514758, "learning_rate": 1.4911440500968155e-08, "loss": 0.2988, "step": 21569 }, { "epoch": 0.9762389680923286, "grad_norm": 0.2591940642929855, "learning_rate": 1.4854932295959578e-08, "loss": 0.4579, "step": 21570 }, { "epoch": 0.9762842272007242, "grad_norm": 0.2359238616287538, "learning_rate": 1.4798531206408617e-08, "loss": 0.4573, "step": 21571 }, { "epoch": 0.9763294863091198, "grad_norm": 0.5557793876162257, "learning_rate": 1.474223723352597e-08, "loss": 0.2585, "step": 21572 }, { "epoch": 0.9763747454175152, "grad_norm": 0.6008817741426201, "learning_rate": 1.4686050378521221e-08, "loss": 0.2887, "step": 21573 }, { "epoch": 0.9764200045259108, "grad_norm": 0.25315653321249215, "learning_rate": 1.4629970642602298e-08, "loss": 0.4633, "step": 21574 }, { "epoch": 0.9764652636343064, "grad_norm": 0.5625693320994961, "learning_rate": 1.457399802697379e-08, "loss": 0.2273, "step": 21575 }, { "epoch": 0.976510522742702, "grad_norm": 0.5776294944714752, "learning_rate": 1.4518132532838624e-08, "loss": 0.2596, "step": 21576 }, { "epoch": 0.9765557818510975, "grad_norm": 0.6204162617245031, "learning_rate": 1.4462374161396952e-08, "loss": 0.273, "step": 21577 }, { "epoch": 0.9766010409594931, "grad_norm": 0.5925661056508587, "learning_rate": 1.440672291384726e-08, "loss": 0.2819, "step": 21578 }, { "epoch": 0.9766463000678887, "grad_norm": 0.5902542261005069, "learning_rate": 1.4351178791384702e-08, "loss": 0.3058, "step": 21579 }, { "epoch": 0.9766915591762843, "grad_norm": 0.6734892936433218, "learning_rate": 1.4295741795203322e-08, "loss": 0.3097, "step": 21580 }, { "epoch": 0.9767368182846798, "grad_norm": 0.25765264371655133, "learning_rate": 1.4240411926493835e-08, "loss": 0.4596, "step": 21581 }, { "epoch": 0.9767820773930753, "grad_norm": 0.569592922648196, "learning_rate": 1.4185189186445292e-08, "loss": 0.2836, "step": 21582 }, { "epoch": 0.9768273365014709, "grad_norm": 0.6310649915158688, "learning_rate": 1.4130073576244518e-08, "loss": 0.2876, "step": 21583 }, { "epoch": 0.9768725956098665, "grad_norm": 0.5954175314647394, "learning_rate": 1.4075065097075013e-08, "loss": 0.289, "step": 21584 }, { "epoch": 0.9769178547182621, "grad_norm": 0.584969965100779, "learning_rate": 1.402016375011972e-08, "loss": 0.3152, "step": 21585 }, { "epoch": 0.9769631138266576, "grad_norm": 0.6513597095588576, "learning_rate": 1.3965369536557694e-08, "loss": 0.2726, "step": 21586 }, { "epoch": 0.9770083729350532, "grad_norm": 0.6068463477849974, "learning_rate": 1.3910682457566327e-08, "loss": 0.3172, "step": 21587 }, { "epoch": 0.9770536320434487, "grad_norm": 0.6569562461124709, "learning_rate": 1.3856102514321345e-08, "loss": 0.2749, "step": 21588 }, { "epoch": 0.9770988911518443, "grad_norm": 0.6050937931355871, "learning_rate": 1.3801629707994035e-08, "loss": 0.2997, "step": 21589 }, { "epoch": 0.9771441502602398, "grad_norm": 0.26659203847765517, "learning_rate": 1.3747264039756236e-08, "loss": 0.4529, "step": 21590 }, { "epoch": 0.9771894093686354, "grad_norm": 0.6891775542701496, "learning_rate": 1.3693005510775903e-08, "loss": 0.2708, "step": 21591 }, { "epoch": 0.977234668477031, "grad_norm": 0.6208702276175495, "learning_rate": 1.3638854122218214e-08, "loss": 0.2726, "step": 21592 }, { "epoch": 0.9772799275854266, "grad_norm": 0.5831698339732241, "learning_rate": 1.358480987524724e-08, "loss": 0.2751, "step": 21593 }, { "epoch": 0.9773251866938222, "grad_norm": 0.6343592251514779, "learning_rate": 1.3530872771024273e-08, "loss": 0.321, "step": 21594 }, { "epoch": 0.9773704458022177, "grad_norm": 0.5875276035704957, "learning_rate": 1.3477042810707829e-08, "loss": 0.2605, "step": 21595 }, { "epoch": 0.9774157049106132, "grad_norm": 0.24808703230881213, "learning_rate": 1.3423319995454765e-08, "loss": 0.4601, "step": 21596 }, { "epoch": 0.9774609640190088, "grad_norm": 0.26599602174500386, "learning_rate": 1.3369704326419709e-08, "loss": 0.4717, "step": 21597 }, { "epoch": 0.9775062231274044, "grad_norm": 0.6308260174148227, "learning_rate": 1.3316195804753962e-08, "loss": 0.2957, "step": 21598 }, { "epoch": 0.9775514822357999, "grad_norm": 0.6417862465844587, "learning_rate": 1.3262794431608272e-08, "loss": 0.3095, "step": 21599 }, { "epoch": 0.9775967413441955, "grad_norm": 0.5779867840850115, "learning_rate": 1.32095002081295e-08, "loss": 0.3006, "step": 21600 }, { "epoch": 0.9776420004525911, "grad_norm": 0.6374846073793348, "learning_rate": 1.3156313135462284e-08, "loss": 0.2623, "step": 21601 }, { "epoch": 0.9776872595609867, "grad_norm": 0.6026661398778114, "learning_rate": 1.310323321475071e-08, "loss": 0.3378, "step": 21602 }, { "epoch": 0.9777325186693823, "grad_norm": 0.24092280344486997, "learning_rate": 1.3050260447133866e-08, "loss": 0.4632, "step": 21603 }, { "epoch": 0.9777777777777777, "grad_norm": 0.6303012480618946, "learning_rate": 1.2997394833750842e-08, "loss": 0.295, "step": 21604 }, { "epoch": 0.9778230368861733, "grad_norm": 0.5629119592200705, "learning_rate": 1.2944636375737952e-08, "loss": 0.2873, "step": 21605 }, { "epoch": 0.9778682959945689, "grad_norm": 0.587805715190683, "learning_rate": 1.289198507422762e-08, "loss": 0.248, "step": 21606 }, { "epoch": 0.9779135551029645, "grad_norm": 0.7009930509067366, "learning_rate": 1.2839440930352276e-08, "loss": 0.3042, "step": 21607 }, { "epoch": 0.97795881421136, "grad_norm": 0.624810524584758, "learning_rate": 1.2787003945239906e-08, "loss": 0.2643, "step": 21608 }, { "epoch": 0.9780040733197556, "grad_norm": 0.62834319904867, "learning_rate": 1.2734674120018497e-08, "loss": 0.2777, "step": 21609 }, { "epoch": 0.9780493324281512, "grad_norm": 0.2711195268524511, "learning_rate": 1.268245145581104e-08, "loss": 0.4483, "step": 21610 }, { "epoch": 0.9780945915365468, "grad_norm": 0.6405341538941641, "learning_rate": 1.2630335953740524e-08, "loss": 0.2703, "step": 21611 }, { "epoch": 0.9781398506449422, "grad_norm": 0.7713359262031876, "learning_rate": 1.257832761492661e-08, "loss": 0.3029, "step": 21612 }, { "epoch": 0.9781851097533378, "grad_norm": 0.6050305225094347, "learning_rate": 1.2526426440486738e-08, "loss": 0.297, "step": 21613 }, { "epoch": 0.9782303688617334, "grad_norm": 0.2565092506379215, "learning_rate": 1.2474632431536126e-08, "loss": 0.4589, "step": 21614 }, { "epoch": 0.978275627970129, "grad_norm": 0.6426891785778904, "learning_rate": 1.2422945589187774e-08, "loss": 0.3016, "step": 21615 }, { "epoch": 0.9783208870785246, "grad_norm": 0.7105239256678185, "learning_rate": 1.2371365914551903e-08, "loss": 0.3074, "step": 21616 }, { "epoch": 0.9783661461869201, "grad_norm": 0.6318290634122832, "learning_rate": 1.2319893408737072e-08, "loss": 0.3187, "step": 21617 }, { "epoch": 0.9784114052953157, "grad_norm": 0.7091202626299058, "learning_rate": 1.2268528072849063e-08, "loss": 0.2676, "step": 21618 }, { "epoch": 0.9784566644037113, "grad_norm": 0.5991788178591732, "learning_rate": 1.221726990799199e-08, "loss": 0.2756, "step": 21619 }, { "epoch": 0.9785019235121069, "grad_norm": 0.2644385392660403, "learning_rate": 1.21661189152672e-08, "loss": 0.4842, "step": 21620 }, { "epoch": 0.9785471826205023, "grad_norm": 0.6044750980358486, "learning_rate": 1.2115075095773255e-08, "loss": 0.3265, "step": 21621 }, { "epoch": 0.9785924417288979, "grad_norm": 0.6782799426016777, "learning_rate": 1.206413845060761e-08, "loss": 0.3153, "step": 21622 }, { "epoch": 0.9786377008372935, "grad_norm": 0.6519410449382191, "learning_rate": 1.2013308980863836e-08, "loss": 0.2665, "step": 21623 }, { "epoch": 0.9786829599456891, "grad_norm": 0.5774124278120938, "learning_rate": 1.1962586687634947e-08, "loss": 0.2875, "step": 21624 }, { "epoch": 0.9787282190540846, "grad_norm": 0.6004423970128941, "learning_rate": 1.1911971572010073e-08, "loss": 0.2441, "step": 21625 }, { "epoch": 0.9787734781624802, "grad_norm": 0.5843913260285941, "learning_rate": 1.1861463635077785e-08, "loss": 0.3083, "step": 21626 }, { "epoch": 0.9788187372708758, "grad_norm": 0.6598961792562245, "learning_rate": 1.181106287792222e-08, "loss": 0.3188, "step": 21627 }, { "epoch": 0.9788639963792714, "grad_norm": 0.6118189962772933, "learning_rate": 1.1760769301626951e-08, "loss": 0.2678, "step": 21628 }, { "epoch": 0.9789092554876669, "grad_norm": 0.7576996983657155, "learning_rate": 1.1710582907272783e-08, "loss": 0.2965, "step": 21629 }, { "epoch": 0.9789545145960624, "grad_norm": 0.28542584526267356, "learning_rate": 1.166050369593774e-08, "loss": 0.4581, "step": 21630 }, { "epoch": 0.978999773704458, "grad_norm": 0.5788657187354852, "learning_rate": 1.1610531668697633e-08, "loss": 0.2755, "step": 21631 }, { "epoch": 0.9790450328128536, "grad_norm": 0.6078411782319739, "learning_rate": 1.1560666826627154e-08, "loss": 0.2844, "step": 21632 }, { "epoch": 0.9790902919212492, "grad_norm": 0.6201364270028205, "learning_rate": 1.1510909170796558e-08, "loss": 0.3157, "step": 21633 }, { "epoch": 0.9791355510296447, "grad_norm": 0.5508890145642606, "learning_rate": 1.14612587022761e-08, "loss": 0.2549, "step": 21634 }, { "epoch": 0.9791808101380403, "grad_norm": 0.6795842409627164, "learning_rate": 1.1411715422131598e-08, "loss": 0.2701, "step": 21635 }, { "epoch": 0.9792260692464358, "grad_norm": 0.5777709409082231, "learning_rate": 1.1362279331428305e-08, "loss": 0.2996, "step": 21636 }, { "epoch": 0.9792713283548314, "grad_norm": 0.2564448952346507, "learning_rate": 1.1312950431228153e-08, "loss": 0.4574, "step": 21637 }, { "epoch": 0.979316587463227, "grad_norm": 0.7122237193809913, "learning_rate": 1.126372872259085e-08, "loss": 0.2868, "step": 21638 }, { "epoch": 0.9793618465716225, "grad_norm": 0.5895747232643146, "learning_rate": 1.1214614206574437e-08, "loss": 0.2437, "step": 21639 }, { "epoch": 0.9794071056800181, "grad_norm": 0.630322761241128, "learning_rate": 1.1165606884234182e-08, "loss": 0.3135, "step": 21640 }, { "epoch": 0.9794523647884137, "grad_norm": 0.6556277953449368, "learning_rate": 1.111670675662313e-08, "loss": 0.2899, "step": 21641 }, { "epoch": 0.9794976238968093, "grad_norm": 0.5784803966331807, "learning_rate": 1.1067913824791553e-08, "loss": 0.2835, "step": 21642 }, { "epoch": 0.9795428830052048, "grad_norm": 0.547317704540307, "learning_rate": 1.1019228089788613e-08, "loss": 0.2543, "step": 21643 }, { "epoch": 0.9795881421136003, "grad_norm": 0.5978956990884308, "learning_rate": 1.0970649552659585e-08, "loss": 0.2773, "step": 21644 }, { "epoch": 0.9796334012219959, "grad_norm": 0.5614664206206553, "learning_rate": 1.092217821444863e-08, "loss": 0.2687, "step": 21645 }, { "epoch": 0.9796786603303915, "grad_norm": 0.672027978277878, "learning_rate": 1.0873814076197142e-08, "loss": 0.2588, "step": 21646 }, { "epoch": 0.979723919438787, "grad_norm": 0.6207091688396222, "learning_rate": 1.0825557138944843e-08, "loss": 0.2885, "step": 21647 }, { "epoch": 0.9797691785471826, "grad_norm": 0.6139079486988628, "learning_rate": 1.0777407403728123e-08, "loss": 0.2752, "step": 21648 }, { "epoch": 0.9798144376555782, "grad_norm": 0.5793301468223664, "learning_rate": 1.0729364871581716e-08, "loss": 0.2969, "step": 21649 }, { "epoch": 0.9798596967639738, "grad_norm": 0.7081059442224155, "learning_rate": 1.0681429543538125e-08, "loss": 0.309, "step": 21650 }, { "epoch": 0.9799049558723694, "grad_norm": 0.6522737384702686, "learning_rate": 1.0633601420626528e-08, "loss": 0.31, "step": 21651 }, { "epoch": 0.9799502149807648, "grad_norm": 0.6194217668405, "learning_rate": 1.0585880503875546e-08, "loss": 0.3273, "step": 21652 }, { "epoch": 0.9799954740891604, "grad_norm": 0.7235806562053905, "learning_rate": 1.0538266794309914e-08, "loss": 0.2502, "step": 21653 }, { "epoch": 0.980040733197556, "grad_norm": 0.6185869570994706, "learning_rate": 1.049076029295326e-08, "loss": 0.3025, "step": 21654 }, { "epoch": 0.9800859923059516, "grad_norm": 0.628706820093385, "learning_rate": 1.044336100082588e-08, "loss": 0.3451, "step": 21655 }, { "epoch": 0.9801312514143471, "grad_norm": 0.6164354143700633, "learning_rate": 1.03960689189464e-08, "loss": 0.3198, "step": 21656 }, { "epoch": 0.9801765105227427, "grad_norm": 0.6485613291924816, "learning_rate": 1.0348884048331232e-08, "loss": 0.2957, "step": 21657 }, { "epoch": 0.9802217696311383, "grad_norm": 0.6598439358952244, "learning_rate": 1.0301806389994006e-08, "loss": 0.3375, "step": 21658 }, { "epoch": 0.9802670287395339, "grad_norm": 0.5931629157219409, "learning_rate": 1.025483594494614e-08, "loss": 0.3017, "step": 21659 }, { "epoch": 0.9803122878479293, "grad_norm": 0.5585563100963906, "learning_rate": 1.0207972714196824e-08, "loss": 0.2703, "step": 21660 }, { "epoch": 0.9803575469563249, "grad_norm": 0.5768770066117372, "learning_rate": 1.0161216698753029e-08, "loss": 0.329, "step": 21661 }, { "epoch": 0.9804028060647205, "grad_norm": 0.266834695010642, "learning_rate": 1.0114567899620066e-08, "loss": 0.4897, "step": 21662 }, { "epoch": 0.9804480651731161, "grad_norm": 0.6164671838543433, "learning_rate": 1.0068026317799906e-08, "loss": 0.2643, "step": 21663 }, { "epoch": 0.9804933242815117, "grad_norm": 0.6455551846103544, "learning_rate": 1.0021591954291754e-08, "loss": 0.2698, "step": 21664 }, { "epoch": 0.9805385833899072, "grad_norm": 0.5680472792211821, "learning_rate": 9.975264810094254e-09, "loss": 0.2947, "step": 21665 }, { "epoch": 0.9805838424983028, "grad_norm": 0.6534256833339015, "learning_rate": 9.929044886203276e-09, "loss": 0.2885, "step": 21666 }, { "epoch": 0.9806291016066984, "grad_norm": 0.6006793231315621, "learning_rate": 9.882932183610806e-09, "loss": 0.2808, "step": 21667 }, { "epoch": 0.980674360715094, "grad_norm": 0.25155329493710854, "learning_rate": 9.836926703307714e-09, "loss": 0.4742, "step": 21668 }, { "epoch": 0.9807196198234894, "grad_norm": 0.5974539516861112, "learning_rate": 9.791028446283768e-09, "loss": 0.3059, "step": 21669 }, { "epoch": 0.980764878931885, "grad_norm": 0.6617095694002861, "learning_rate": 9.745237413523733e-09, "loss": 0.248, "step": 21670 }, { "epoch": 0.9808101380402806, "grad_norm": 0.6060716678997371, "learning_rate": 9.69955360601238e-09, "loss": 0.2827, "step": 21671 }, { "epoch": 0.9808553971486762, "grad_norm": 0.5792760913170358, "learning_rate": 9.653977024731143e-09, "loss": 0.3029, "step": 21672 }, { "epoch": 0.9809006562570718, "grad_norm": 0.2910242789051668, "learning_rate": 9.608507670659239e-09, "loss": 0.4947, "step": 21673 }, { "epoch": 0.9809459153654673, "grad_norm": 0.6148235384331048, "learning_rate": 9.563145544773666e-09, "loss": 0.2854, "step": 21674 }, { "epoch": 0.9809911744738629, "grad_norm": 0.615447357686388, "learning_rate": 9.517890648049199e-09, "loss": 0.2991, "step": 21675 }, { "epoch": 0.9810364335822584, "grad_norm": 0.6092808326297857, "learning_rate": 9.472742981458393e-09, "loss": 0.2366, "step": 21676 }, { "epoch": 0.981081692690654, "grad_norm": 0.6672915344250399, "learning_rate": 9.427702545970474e-09, "loss": 0.3255, "step": 21677 }, { "epoch": 0.9811269517990495, "grad_norm": 0.6059301178060165, "learning_rate": 9.38276934255411e-09, "loss": 0.2415, "step": 21678 }, { "epoch": 0.9811722109074451, "grad_norm": 0.5872986394992463, "learning_rate": 9.337943372175195e-09, "loss": 0.272, "step": 21679 }, { "epoch": 0.9812174700158407, "grad_norm": 0.5859666654621924, "learning_rate": 9.293224635795184e-09, "loss": 0.2612, "step": 21680 }, { "epoch": 0.9812627291242363, "grad_norm": 0.6206243334069153, "learning_rate": 9.248613134376638e-09, "loss": 0.278, "step": 21681 }, { "epoch": 0.9813079882326318, "grad_norm": 0.5897081586642346, "learning_rate": 9.204108868877127e-09, "loss": 0.3476, "step": 21682 }, { "epoch": 0.9813532473410274, "grad_norm": 0.626181183441953, "learning_rate": 9.15971184025366e-09, "loss": 0.3461, "step": 21683 }, { "epoch": 0.981398506449423, "grad_norm": 0.5570704921342378, "learning_rate": 9.115422049459365e-09, "loss": 0.3023, "step": 21684 }, { "epoch": 0.9814437655578185, "grad_norm": 0.6182692913792313, "learning_rate": 9.071239497446815e-09, "loss": 0.3333, "step": 21685 }, { "epoch": 0.9814890246662141, "grad_norm": 0.6284031326139313, "learning_rate": 9.027164185164694e-09, "loss": 0.3385, "step": 21686 }, { "epoch": 0.9815342837746096, "grad_norm": 0.7153580755772212, "learning_rate": 8.983196113560023e-09, "loss": 0.2968, "step": 21687 }, { "epoch": 0.9815795428830052, "grad_norm": 0.27580635853133384, "learning_rate": 8.939335283577599e-09, "loss": 0.4477, "step": 21688 }, { "epoch": 0.9816248019914008, "grad_norm": 0.615050617869175, "learning_rate": 8.895581696160006e-09, "loss": 0.2952, "step": 21689 }, { "epoch": 0.9816700610997964, "grad_norm": 0.5893081313297105, "learning_rate": 8.851935352247597e-09, "loss": 0.3283, "step": 21690 }, { "epoch": 0.9817153202081919, "grad_norm": 0.6061131730742584, "learning_rate": 8.808396252777962e-09, "loss": 0.2646, "step": 21691 }, { "epoch": 0.9817605793165874, "grad_norm": 0.602182475757348, "learning_rate": 8.76496439868646e-09, "loss": 0.2645, "step": 21692 }, { "epoch": 0.981805838424983, "grad_norm": 0.5596327104464522, "learning_rate": 8.721639790906788e-09, "loss": 0.3046, "step": 21693 }, { "epoch": 0.9818510975333786, "grad_norm": 0.24469286039476712, "learning_rate": 8.67842243036876e-09, "loss": 0.4679, "step": 21694 }, { "epoch": 0.9818963566417741, "grad_norm": 0.7909436392895103, "learning_rate": 8.635312318002742e-09, "loss": 0.2419, "step": 21695 }, { "epoch": 0.9819416157501697, "grad_norm": 0.28832400803941166, "learning_rate": 8.59230945473355e-09, "loss": 0.4627, "step": 21696 }, { "epoch": 0.9819868748585653, "grad_norm": 0.6002097689708532, "learning_rate": 8.549413841485443e-09, "loss": 0.2726, "step": 21697 }, { "epoch": 0.9820321339669609, "grad_norm": 0.5583495432364033, "learning_rate": 8.506625479181018e-09, "loss": 0.3065, "step": 21698 }, { "epoch": 0.9820773930753565, "grad_norm": 0.26688574661396586, "learning_rate": 8.46394436873843e-09, "loss": 0.4479, "step": 21699 }, { "epoch": 0.9821226521837519, "grad_norm": 0.6449065606928776, "learning_rate": 8.421370511075833e-09, "loss": 0.2828, "step": 21700 }, { "epoch": 0.9821679112921475, "grad_norm": 0.6317061710680665, "learning_rate": 8.378903907106938e-09, "loss": 0.2696, "step": 21701 }, { "epoch": 0.9822131704005431, "grad_norm": 0.577668120306397, "learning_rate": 8.336544557745463e-09, "loss": 0.2752, "step": 21702 }, { "epoch": 0.9822584295089387, "grad_norm": 0.6065787159864158, "learning_rate": 8.294292463900123e-09, "loss": 0.2829, "step": 21703 }, { "epoch": 0.9823036886173342, "grad_norm": 0.600593619384963, "learning_rate": 8.25214762648019e-09, "loss": 0.2909, "step": 21704 }, { "epoch": 0.9823489477257298, "grad_norm": 0.6630398083335145, "learning_rate": 8.210110046390496e-09, "loss": 0.2916, "step": 21705 }, { "epoch": 0.9823942068341254, "grad_norm": 0.7609258393204628, "learning_rate": 8.168179724534209e-09, "loss": 0.3217, "step": 21706 }, { "epoch": 0.982439465942521, "grad_norm": 0.6582641164460659, "learning_rate": 8.126356661812829e-09, "loss": 0.2742, "step": 21707 }, { "epoch": 0.9824847250509166, "grad_norm": 0.6759465229322568, "learning_rate": 8.084640859124527e-09, "loss": 0.3159, "step": 21708 }, { "epoch": 0.982529984159312, "grad_norm": 0.6703636419723167, "learning_rate": 8.043032317365807e-09, "loss": 0.3072, "step": 21709 }, { "epoch": 0.9825752432677076, "grad_norm": 1.1165883505399121, "learning_rate": 8.001531037430954e-09, "loss": 0.2814, "step": 21710 }, { "epoch": 0.9826205023761032, "grad_norm": 0.6061301568076047, "learning_rate": 7.960137020210923e-09, "loss": 0.2428, "step": 21711 }, { "epoch": 0.9826657614844988, "grad_norm": 0.6607322205290386, "learning_rate": 7.918850266596112e-09, "loss": 0.2852, "step": 21712 }, { "epoch": 0.9827110205928943, "grad_norm": 1.1731956802419958, "learning_rate": 7.877670777473035e-09, "loss": 0.3121, "step": 21713 }, { "epoch": 0.9827562797012899, "grad_norm": 0.4363254521210518, "learning_rate": 7.836598553726538e-09, "loss": 0.4782, "step": 21714 }, { "epoch": 0.9828015388096855, "grad_norm": 0.4375526802347568, "learning_rate": 7.79563359623925e-09, "loss": 0.4794, "step": 21715 }, { "epoch": 0.982846797918081, "grad_norm": 0.6432974621505368, "learning_rate": 7.754775905891576e-09, "loss": 0.2834, "step": 21716 }, { "epoch": 0.9828920570264765, "grad_norm": 0.2588387571579923, "learning_rate": 7.714025483561149e-09, "loss": 0.4849, "step": 21717 }, { "epoch": 0.9829373161348721, "grad_norm": 0.68694740461852, "learning_rate": 7.673382330123936e-09, "loss": 0.2615, "step": 21718 }, { "epoch": 0.9829825752432677, "grad_norm": 1.042378564690906, "learning_rate": 7.63284644645257e-09, "loss": 0.2903, "step": 21719 }, { "epoch": 0.9830278343516633, "grad_norm": 0.6446807855150728, "learning_rate": 7.59241783341913e-09, "loss": 0.3058, "step": 21720 }, { "epoch": 0.9830730934600589, "grad_norm": 0.2694289263740075, "learning_rate": 7.552096491891259e-09, "loss": 0.4596, "step": 21721 }, { "epoch": 0.9831183525684544, "grad_norm": 0.559283975254006, "learning_rate": 7.511882422735483e-09, "loss": 0.2648, "step": 21722 }, { "epoch": 0.98316361167685, "grad_norm": 0.26536821524762305, "learning_rate": 7.471775626816114e-09, "loss": 0.4668, "step": 21723 }, { "epoch": 0.9832088707852455, "grad_norm": 0.7440514698602366, "learning_rate": 7.431776104994681e-09, "loss": 0.3224, "step": 21724 }, { "epoch": 0.9832541298936411, "grad_norm": 0.6962424596967072, "learning_rate": 7.39188385813161e-09, "loss": 0.2766, "step": 21725 }, { "epoch": 0.9832993890020366, "grad_norm": 0.8940563277181036, "learning_rate": 7.352098887082881e-09, "loss": 0.2606, "step": 21726 }, { "epoch": 0.9833446481104322, "grad_norm": 0.7214958076035248, "learning_rate": 7.312421192703923e-09, "loss": 0.3176, "step": 21727 }, { "epoch": 0.9833899072188278, "grad_norm": 0.6343731980873564, "learning_rate": 7.2728507758468295e-09, "loss": 0.2644, "step": 21728 }, { "epoch": 0.9834351663272234, "grad_norm": 0.5961209533114099, "learning_rate": 7.233387637362587e-09, "loss": 0.3388, "step": 21729 }, { "epoch": 0.9834804254356189, "grad_norm": 0.6028494217289609, "learning_rate": 7.194031778098853e-09, "loss": 0.2604, "step": 21730 }, { "epoch": 0.9835256845440145, "grad_norm": 0.6506180980165432, "learning_rate": 7.1547831989005055e-09, "loss": 0.3093, "step": 21731 }, { "epoch": 0.98357094365241, "grad_norm": 0.6274426767704868, "learning_rate": 7.1156419006118695e-09, "loss": 0.2921, "step": 21732 }, { "epoch": 0.9836162027608056, "grad_norm": 0.6356227393707362, "learning_rate": 7.076607884073939e-09, "loss": 0.3085, "step": 21733 }, { "epoch": 0.9836614618692012, "grad_norm": 0.6440067946101039, "learning_rate": 7.037681150124931e-09, "loss": 0.3198, "step": 21734 }, { "epoch": 0.9837067209775967, "grad_norm": 0.622467940468477, "learning_rate": 6.998861699600845e-09, "loss": 0.301, "step": 21735 }, { "epoch": 0.9837519800859923, "grad_norm": 0.5591829293615435, "learning_rate": 6.960149533337124e-09, "loss": 0.2924, "step": 21736 }, { "epoch": 0.9837972391943879, "grad_norm": 0.568720885745079, "learning_rate": 6.921544652164769e-09, "loss": 0.328, "step": 21737 }, { "epoch": 0.9838424983027835, "grad_norm": 0.34489008410081096, "learning_rate": 6.883047056913117e-09, "loss": 0.4822, "step": 21738 }, { "epoch": 0.983887757411179, "grad_norm": 0.6254423314914499, "learning_rate": 6.844656748409284e-09, "loss": 0.265, "step": 21739 }, { "epoch": 0.9839330165195745, "grad_norm": 0.5603541966537482, "learning_rate": 6.8063737274787214e-09, "loss": 0.2559, "step": 21740 }, { "epoch": 0.9839782756279701, "grad_norm": 0.5814920279218694, "learning_rate": 6.768197994944103e-09, "loss": 0.2705, "step": 21741 }, { "epoch": 0.9840235347363657, "grad_norm": 0.6190468275501919, "learning_rate": 6.730129551625331e-09, "loss": 0.2855, "step": 21742 }, { "epoch": 0.9840687938447613, "grad_norm": 0.5794138544317892, "learning_rate": 6.692168398340082e-09, "loss": 0.3286, "step": 21743 }, { "epoch": 0.9841140529531568, "grad_norm": 0.6187931500635636, "learning_rate": 6.6543145359043714e-09, "loss": 0.2499, "step": 21744 }, { "epoch": 0.9841593120615524, "grad_norm": 0.7168440597895542, "learning_rate": 6.616567965131992e-09, "loss": 0.3007, "step": 21745 }, { "epoch": 0.984204571169948, "grad_norm": 0.5912575346461676, "learning_rate": 6.578928686832853e-09, "loss": 0.2904, "step": 21746 }, { "epoch": 0.9842498302783436, "grad_norm": 0.5860976780626126, "learning_rate": 6.54139670181686e-09, "loss": 0.2431, "step": 21747 }, { "epoch": 0.984295089386739, "grad_norm": 0.26903828166599186, "learning_rate": 6.503972010890036e-09, "loss": 0.457, "step": 21748 }, { "epoch": 0.9843403484951346, "grad_norm": 0.6051058553500327, "learning_rate": 6.466654614856183e-09, "loss": 0.2648, "step": 21749 }, { "epoch": 0.9843856076035302, "grad_norm": 0.25852584920291466, "learning_rate": 6.42944451451799e-09, "loss": 0.4579, "step": 21750 }, { "epoch": 0.9844308667119258, "grad_norm": 0.2643033016293149, "learning_rate": 6.392341710674266e-09, "loss": 0.4677, "step": 21751 }, { "epoch": 0.9844761258203213, "grad_norm": 0.25894655658059895, "learning_rate": 6.355346204122148e-09, "loss": 0.4427, "step": 21752 }, { "epoch": 0.9845213849287169, "grad_norm": 0.6426058287872317, "learning_rate": 6.318457995657113e-09, "loss": 0.2816, "step": 21753 }, { "epoch": 0.9845666440371125, "grad_norm": 0.6257485278487301, "learning_rate": 6.281677086071303e-09, "loss": 0.2634, "step": 21754 }, { "epoch": 0.9846119031455081, "grad_norm": 0.6100901183104055, "learning_rate": 6.245003476155198e-09, "loss": 0.2717, "step": 21755 }, { "epoch": 0.9846571622539037, "grad_norm": 0.6110668534928377, "learning_rate": 6.208437166697056e-09, "loss": 0.2676, "step": 21756 }, { "epoch": 0.9847024213622991, "grad_norm": 0.584749554284075, "learning_rate": 6.171978158482361e-09, "loss": 0.2739, "step": 21757 }, { "epoch": 0.9847476804706947, "grad_norm": 0.2766780699015175, "learning_rate": 6.135626452294374e-09, "loss": 0.4525, "step": 21758 }, { "epoch": 0.9847929395790903, "grad_norm": 0.6570081472633232, "learning_rate": 6.099382048914138e-09, "loss": 0.3081, "step": 21759 }, { "epoch": 0.9848381986874859, "grad_norm": 0.6301607344700081, "learning_rate": 6.063244949120473e-09, "loss": 0.2725, "step": 21760 }, { "epoch": 0.9848834577958814, "grad_norm": 0.8074035664818978, "learning_rate": 6.027215153689981e-09, "loss": 0.2909, "step": 21761 }, { "epoch": 0.984928716904277, "grad_norm": 0.5661608368630222, "learning_rate": 5.9912926633970415e-09, "loss": 0.2785, "step": 21762 }, { "epoch": 0.9849739760126726, "grad_norm": 0.699512720197519, "learning_rate": 5.955477479013816e-09, "loss": 0.3317, "step": 21763 }, { "epoch": 0.9850192351210681, "grad_norm": 0.5609269535348155, "learning_rate": 5.919769601308578e-09, "loss": 0.3022, "step": 21764 }, { "epoch": 0.9850644942294636, "grad_norm": 0.5993231895671898, "learning_rate": 5.8841690310496024e-09, "loss": 0.3011, "step": 21765 }, { "epoch": 0.9851097533378592, "grad_norm": 0.5689363005629489, "learning_rate": 5.8486757690012775e-09, "loss": 0.2157, "step": 21766 }, { "epoch": 0.9851550124462548, "grad_norm": 0.6758401330216106, "learning_rate": 5.8132898159268815e-09, "loss": 0.3054, "step": 21767 }, { "epoch": 0.9852002715546504, "grad_norm": 0.567882054273113, "learning_rate": 5.778011172586362e-09, "loss": 0.2651, "step": 21768 }, { "epoch": 0.985245530663046, "grad_norm": 0.5951791861094041, "learning_rate": 5.742839839738001e-09, "loss": 0.2943, "step": 21769 }, { "epoch": 0.9852907897714415, "grad_norm": 0.6362072194919404, "learning_rate": 5.7077758181367516e-09, "loss": 0.2947, "step": 21770 }, { "epoch": 0.9853360488798371, "grad_norm": 0.5708793133016763, "learning_rate": 5.6728191085370085e-09, "loss": 0.3013, "step": 21771 }, { "epoch": 0.9853813079882326, "grad_norm": 0.5973514269004946, "learning_rate": 5.637969711689839e-09, "loss": 0.2648, "step": 21772 }, { "epoch": 0.9854265670966282, "grad_norm": 0.5688949733432468, "learning_rate": 5.603227628342978e-09, "loss": 0.2827, "step": 21773 }, { "epoch": 0.9854718262050237, "grad_norm": 0.6476887178964443, "learning_rate": 5.56859285924416e-09, "loss": 0.3101, "step": 21774 }, { "epoch": 0.9855170853134193, "grad_norm": 0.6396008335007618, "learning_rate": 5.534065405136679e-09, "loss": 0.2817, "step": 21775 }, { "epoch": 0.9855623444218149, "grad_norm": 0.2376931055703006, "learning_rate": 5.499645266762721e-09, "loss": 0.448, "step": 21776 }, { "epoch": 0.9856076035302105, "grad_norm": 0.6520128967399444, "learning_rate": 5.465332444862248e-09, "loss": 0.3093, "step": 21777 }, { "epoch": 0.985652862638606, "grad_norm": 0.2865227420837884, "learning_rate": 5.431126940172449e-09, "loss": 0.4658, "step": 21778 }, { "epoch": 0.9856981217470016, "grad_norm": 0.5659488627340931, "learning_rate": 5.397028753427735e-09, "loss": 0.2714, "step": 21779 }, { "epoch": 0.9857433808553971, "grad_norm": 0.6335095816920012, "learning_rate": 5.363037885360856e-09, "loss": 0.3034, "step": 21780 }, { "epoch": 0.9857886399637927, "grad_norm": 0.7757831393322988, "learning_rate": 5.329154336702891e-09, "loss": 0.2621, "step": 21781 }, { "epoch": 0.9858338990721883, "grad_norm": 0.5468882390931025, "learning_rate": 5.295378108181592e-09, "loss": 0.261, "step": 21782 }, { "epoch": 0.9858791581805838, "grad_norm": 0.5784789337992116, "learning_rate": 5.261709200521936e-09, "loss": 0.2598, "step": 21783 }, { "epoch": 0.9859244172889794, "grad_norm": 0.6304084729105638, "learning_rate": 5.228147614448342e-09, "loss": 0.3111, "step": 21784 }, { "epoch": 0.985969676397375, "grad_norm": 0.6380056859811842, "learning_rate": 5.194693350681901e-09, "loss": 0.2934, "step": 21785 }, { "epoch": 0.9860149355057706, "grad_norm": 0.5925074282805367, "learning_rate": 5.161346409940371e-09, "loss": 0.2863, "step": 21786 }, { "epoch": 0.986060194614166, "grad_norm": 0.28274373509663, "learning_rate": 5.128106792941512e-09, "loss": 0.494, "step": 21787 }, { "epoch": 0.9861054537225616, "grad_norm": 0.9279097453101085, "learning_rate": 5.094974500399197e-09, "loss": 0.2801, "step": 21788 }, { "epoch": 0.9861507128309572, "grad_norm": 0.6266657228728079, "learning_rate": 5.061949533025079e-09, "loss": 0.2814, "step": 21789 }, { "epoch": 0.9861959719393528, "grad_norm": 0.3617893690237543, "learning_rate": 5.02903189152859e-09, "loss": 0.4443, "step": 21790 }, { "epoch": 0.9862412310477484, "grad_norm": 0.603404324430023, "learning_rate": 4.996221576617499e-09, "loss": 0.3358, "step": 21791 }, { "epoch": 0.9862864901561439, "grad_norm": 0.5743585459695362, "learning_rate": 4.9635185889967966e-09, "loss": 0.3327, "step": 21792 }, { "epoch": 0.9863317492645395, "grad_norm": 0.25070337964915884, "learning_rate": 4.930922929368698e-09, "loss": 0.4703, "step": 21793 }, { "epoch": 0.9863770083729351, "grad_norm": 0.29054184356976154, "learning_rate": 4.89843459843431e-09, "loss": 0.4523, "step": 21794 }, { "epoch": 0.9864222674813307, "grad_norm": 0.5935151628381033, "learning_rate": 4.8660535968908515e-09, "loss": 0.3137, "step": 21795 }, { "epoch": 0.9864675265897261, "grad_norm": 0.6235034034439103, "learning_rate": 4.833779925434434e-09, "loss": 0.3458, "step": 21796 }, { "epoch": 0.9865127856981217, "grad_norm": 0.6738464231815146, "learning_rate": 4.801613584758946e-09, "loss": 0.29, "step": 21797 }, { "epoch": 0.9865580448065173, "grad_norm": 0.2779606632713415, "learning_rate": 4.769554575554947e-09, "loss": 0.4779, "step": 21798 }, { "epoch": 0.9866033039149129, "grad_norm": 0.5716203526929273, "learning_rate": 4.737602898511884e-09, "loss": 0.2765, "step": 21799 }, { "epoch": 0.9866485630233084, "grad_norm": 0.24347567952338942, "learning_rate": 4.705758554315876e-09, "loss": 0.4396, "step": 21800 }, { "epoch": 0.986693822131704, "grad_norm": 0.5874975033790542, "learning_rate": 4.674021543651374e-09, "loss": 0.2783, "step": 21801 }, { "epoch": 0.9867390812400996, "grad_norm": 0.7032677349580255, "learning_rate": 4.642391867199503e-09, "loss": 0.2863, "step": 21802 }, { "epoch": 0.9867843403484952, "grad_norm": 0.6305097485833882, "learning_rate": 4.610869525641382e-09, "loss": 0.3122, "step": 21803 }, { "epoch": 0.9868295994568907, "grad_norm": 0.5912198400207207, "learning_rate": 4.579454519653137e-09, "loss": 0.2941, "step": 21804 }, { "epoch": 0.9868748585652862, "grad_norm": 0.2503455997627201, "learning_rate": 4.5481468499097845e-09, "loss": 0.466, "step": 21805 }, { "epoch": 0.9869201176736818, "grad_norm": 0.6940940885386304, "learning_rate": 4.516946517084675e-09, "loss": 0.2868, "step": 21806 }, { "epoch": 0.9869653767820774, "grad_norm": 0.644379368900686, "learning_rate": 4.485853521848382e-09, "loss": 0.3202, "step": 21807 }, { "epoch": 0.987010635890473, "grad_norm": 0.6286080941019534, "learning_rate": 4.4548678648681506e-09, "loss": 0.3505, "step": 21808 }, { "epoch": 0.9870558949988685, "grad_norm": 0.6199985918169026, "learning_rate": 4.423989546810115e-09, "loss": 0.2872, "step": 21809 }, { "epoch": 0.9871011541072641, "grad_norm": 0.5828107840187536, "learning_rate": 4.3932185683376316e-09, "loss": 0.3019, "step": 21810 }, { "epoch": 0.9871464132156597, "grad_norm": 0.591062507801819, "learning_rate": 4.362554930112395e-09, "loss": 0.2626, "step": 21811 }, { "epoch": 0.9871916723240552, "grad_norm": 0.6613664730270813, "learning_rate": 4.331998632792766e-09, "loss": 0.3347, "step": 21812 }, { "epoch": 0.9872369314324507, "grad_norm": 0.591931367567967, "learning_rate": 4.3015496770354435e-09, "loss": 0.2522, "step": 21813 }, { "epoch": 0.9872821905408463, "grad_norm": 0.6118863093822567, "learning_rate": 4.2712080634949024e-09, "loss": 0.3038, "step": 21814 }, { "epoch": 0.9873274496492419, "grad_norm": 0.9204855378714794, "learning_rate": 4.240973792822845e-09, "loss": 0.3698, "step": 21815 }, { "epoch": 0.9873727087576375, "grad_norm": 0.5471052873028251, "learning_rate": 4.210846865668749e-09, "loss": 0.2452, "step": 21816 }, { "epoch": 0.9874179678660331, "grad_norm": 0.648162350342267, "learning_rate": 4.180827282680433e-09, "loss": 0.2873, "step": 21817 }, { "epoch": 0.9874632269744286, "grad_norm": 0.6168060659402789, "learning_rate": 4.1509150445023794e-09, "loss": 0.283, "step": 21818 }, { "epoch": 0.9875084860828242, "grad_norm": 0.6184546215910344, "learning_rate": 4.121110151777407e-09, "loss": 0.2466, "step": 21819 }, { "epoch": 0.9875537451912197, "grad_norm": 0.7674514700369797, "learning_rate": 4.0914126051466715e-09, "loss": 0.2808, "step": 21820 }, { "epoch": 0.9875990042996153, "grad_norm": 0.597187270312596, "learning_rate": 4.06182240524744e-09, "loss": 0.3179, "step": 21821 }, { "epoch": 0.9876442634080108, "grad_norm": 0.668955246942683, "learning_rate": 4.032339552715869e-09, "loss": 0.3038, "step": 21822 }, { "epoch": 0.9876895225164064, "grad_norm": 0.6083238151005289, "learning_rate": 4.002964048185342e-09, "loss": 0.2824, "step": 21823 }, { "epoch": 0.987734781624802, "grad_norm": 0.6671406711905182, "learning_rate": 3.973695892287022e-09, "loss": 0.3158, "step": 21824 }, { "epoch": 0.9877800407331976, "grad_norm": 0.7626532643269714, "learning_rate": 3.944535085649848e-09, "loss": 0.2742, "step": 21825 }, { "epoch": 0.9878252998415932, "grad_norm": 0.6286400094111584, "learning_rate": 3.915481628900541e-09, "loss": 0.2999, "step": 21826 }, { "epoch": 0.9878705589499887, "grad_norm": 0.5968127011830571, "learning_rate": 3.8865355226630484e-09, "loss": 0.2548, "step": 21827 }, { "epoch": 0.9879158180583842, "grad_norm": 0.6086665535501046, "learning_rate": 3.857696767559649e-09, "loss": 0.3023, "step": 21828 }, { "epoch": 0.9879610771667798, "grad_norm": 0.6130992682575301, "learning_rate": 3.828965364209847e-09, "loss": 0.278, "step": 21829 }, { "epoch": 0.9880063362751754, "grad_norm": 0.6310926783173733, "learning_rate": 3.8003413132309265e-09, "loss": 0.2965, "step": 21830 }, { "epoch": 0.9880515953835709, "grad_norm": 0.6549428048986663, "learning_rate": 3.771824615237951e-09, "loss": 0.3078, "step": 21831 }, { "epoch": 0.9880968544919665, "grad_norm": 0.6044611572338786, "learning_rate": 3.7434152708437645e-09, "loss": 0.2942, "step": 21832 }, { "epoch": 0.9881421136003621, "grad_norm": 0.6306510752902703, "learning_rate": 3.7151132806589885e-09, "loss": 0.2848, "step": 21833 }, { "epoch": 0.9881873727087577, "grad_norm": 0.5962400120103846, "learning_rate": 3.6869186452909157e-09, "loss": 0.2606, "step": 21834 }, { "epoch": 0.9882326318171532, "grad_norm": 0.6985939720579019, "learning_rate": 3.6588313653468377e-09, "loss": 0.3017, "step": 21835 }, { "epoch": 0.9882778909255487, "grad_norm": 0.6139462080468838, "learning_rate": 3.6308514414284958e-09, "loss": 0.2337, "step": 21836 }, { "epoch": 0.9883231500339443, "grad_norm": 0.6032328510366322, "learning_rate": 3.6029788741387407e-09, "loss": 0.3009, "step": 21837 }, { "epoch": 0.9883684091423399, "grad_norm": 0.5821709047204009, "learning_rate": 3.5752136640754278e-09, "loss": 0.2985, "step": 21838 }, { "epoch": 0.9884136682507355, "grad_norm": 0.6916801065461516, "learning_rate": 3.5475558118353015e-09, "loss": 0.2756, "step": 21839 }, { "epoch": 0.988458927359131, "grad_norm": 0.5836411571093898, "learning_rate": 3.5200053180128867e-09, "loss": 0.2696, "step": 21840 }, { "epoch": 0.9885041864675266, "grad_norm": 0.5775515617423247, "learning_rate": 3.4925621831999325e-09, "loss": 0.2749, "step": 21841 }, { "epoch": 0.9885494455759222, "grad_norm": 0.6257537502517646, "learning_rate": 3.4652264079859666e-09, "loss": 0.274, "step": 21842 }, { "epoch": 0.9885947046843178, "grad_norm": 0.5803599968862889, "learning_rate": 3.4379979929588526e-09, "loss": 0.2858, "step": 21843 }, { "epoch": 0.9886399637927132, "grad_norm": 0.6363360456382771, "learning_rate": 3.410876938703678e-09, "loss": 0.3185, "step": 21844 }, { "epoch": 0.9886852229011088, "grad_norm": 0.5635976745712896, "learning_rate": 3.383863245802754e-09, "loss": 0.2686, "step": 21845 }, { "epoch": 0.9887304820095044, "grad_norm": 0.5524016813596654, "learning_rate": 3.3569569148367286e-09, "loss": 0.2938, "step": 21846 }, { "epoch": 0.9887757411179, "grad_norm": 0.5676520951342483, "learning_rate": 3.3301579463834722e-09, "loss": 0.2986, "step": 21847 }, { "epoch": 0.9888210002262955, "grad_norm": 0.5711577922083861, "learning_rate": 3.30346634101919e-09, "loss": 0.3036, "step": 21848 }, { "epoch": 0.9888662593346911, "grad_norm": 0.6275128453665675, "learning_rate": 3.276882099316758e-09, "loss": 0.2543, "step": 21849 }, { "epoch": 0.9889115184430867, "grad_norm": 0.6967194406864456, "learning_rate": 3.250405221848496e-09, "loss": 0.2959, "step": 21850 }, { "epoch": 0.9889567775514823, "grad_norm": 0.5869615549138006, "learning_rate": 3.224035709182283e-09, "loss": 0.269, "step": 21851 }, { "epoch": 0.9890020366598778, "grad_norm": 0.6351290356709387, "learning_rate": 3.1977735618854423e-09, "loss": 0.319, "step": 21852 }, { "epoch": 0.9890472957682733, "grad_norm": 0.6284376408209436, "learning_rate": 3.1716187805214127e-09, "loss": 0.2715, "step": 21853 }, { "epoch": 0.9890925548766689, "grad_norm": 0.624262732120063, "learning_rate": 3.1455713656530774e-09, "loss": 0.2756, "step": 21854 }, { "epoch": 0.9891378139850645, "grad_norm": 0.5863698484875334, "learning_rate": 3.1196313178399882e-09, "loss": 0.278, "step": 21855 }, { "epoch": 0.9891830730934601, "grad_norm": 0.5864017516835073, "learning_rate": 3.093798637638923e-09, "loss": 0.2609, "step": 21856 }, { "epoch": 0.9892283322018556, "grad_norm": 0.6361268027097685, "learning_rate": 3.0680733256055475e-09, "loss": 0.313, "step": 21857 }, { "epoch": 0.9892735913102512, "grad_norm": 0.5593585491784253, "learning_rate": 3.0424553822921977e-09, "loss": 0.2762, "step": 21858 }, { "epoch": 0.9893188504186468, "grad_norm": 0.5934333708764978, "learning_rate": 3.0169448082495446e-09, "loss": 0.323, "step": 21859 }, { "epoch": 0.9893641095270423, "grad_norm": 0.5554011995679686, "learning_rate": 2.991541604025483e-09, "loss": 0.2079, "step": 21860 }, { "epoch": 0.9894093686354379, "grad_norm": 0.6551671338868413, "learning_rate": 2.9662457701662428e-09, "loss": 0.3135, "step": 21861 }, { "epoch": 0.9894546277438334, "grad_norm": 0.5727764315689057, "learning_rate": 2.9410573072152783e-09, "loss": 0.2909, "step": 21862 }, { "epoch": 0.989499886852229, "grad_norm": 0.5694765003150496, "learning_rate": 2.915976215713268e-09, "loss": 0.2915, "step": 21863 }, { "epoch": 0.9895451459606246, "grad_norm": 0.6835921802362995, "learning_rate": 2.8910024962003347e-09, "loss": 0.3184, "step": 21864 }, { "epoch": 0.9895904050690202, "grad_norm": 0.597071520318116, "learning_rate": 2.866136149211607e-09, "loss": 0.3253, "step": 21865 }, { "epoch": 0.9896356641774157, "grad_norm": 0.6053926412628252, "learning_rate": 2.8413771752822116e-09, "loss": 0.2917, "step": 21866 }, { "epoch": 0.9896809232858113, "grad_norm": 0.6335645278894662, "learning_rate": 2.816725574943946e-09, "loss": 0.2666, "step": 21867 }, { "epoch": 0.9897261823942068, "grad_norm": 0.6513409620336323, "learning_rate": 2.792181348726941e-09, "loss": 0.2856, "step": 21868 }, { "epoch": 0.9897714415026024, "grad_norm": 0.6472487900335032, "learning_rate": 2.767744497157998e-09, "loss": 0.3292, "step": 21869 }, { "epoch": 0.9898167006109979, "grad_norm": 0.6328915428497753, "learning_rate": 2.7434150207622525e-09, "loss": 0.2938, "step": 21870 }, { "epoch": 0.9898619597193935, "grad_norm": 0.676358863988407, "learning_rate": 2.719192920063174e-09, "loss": 0.3124, "step": 21871 }, { "epoch": 0.9899072188277891, "grad_norm": 0.7129888996080095, "learning_rate": 2.6950781955803475e-09, "loss": 0.2925, "step": 21872 }, { "epoch": 0.9899524779361847, "grad_norm": 0.5912825030245769, "learning_rate": 2.6710708478316914e-09, "loss": 0.2965, "step": 21873 }, { "epoch": 0.9899977370445803, "grad_norm": 0.6327579557572981, "learning_rate": 2.6471708773340154e-09, "loss": 0.2893, "step": 21874 }, { "epoch": 0.9900429961529758, "grad_norm": 0.6187239759164697, "learning_rate": 2.623378284600797e-09, "loss": 0.2831, "step": 21875 }, { "epoch": 0.9900882552613713, "grad_norm": 0.5763850127898063, "learning_rate": 2.599693070142739e-09, "loss": 0.274, "step": 21876 }, { "epoch": 0.9901335143697669, "grad_norm": 0.6044053598183847, "learning_rate": 2.576115234468324e-09, "loss": 0.3054, "step": 21877 }, { "epoch": 0.9901787734781625, "grad_norm": 0.6342602639735581, "learning_rate": 2.552644778085478e-09, "loss": 0.3095, "step": 21878 }, { "epoch": 0.990224032586558, "grad_norm": 0.5974125313774147, "learning_rate": 2.5292817014976877e-09, "loss": 0.2937, "step": 21879 }, { "epoch": 0.9902692916949536, "grad_norm": 0.5630186640044591, "learning_rate": 2.5060260052067742e-09, "loss": 0.2344, "step": 21880 }, { "epoch": 0.9903145508033492, "grad_norm": 0.6598742022611298, "learning_rate": 2.4828776897128925e-09, "loss": 0.3481, "step": 21881 }, { "epoch": 0.9903598099117448, "grad_norm": 0.679798663117042, "learning_rate": 2.459836755513423e-09, "loss": 0.31, "step": 21882 }, { "epoch": 0.9904050690201402, "grad_norm": 0.6326383323678978, "learning_rate": 2.4369032031029695e-09, "loss": 0.2829, "step": 21883 }, { "epoch": 0.9904503281285358, "grad_norm": 0.6012953654016868, "learning_rate": 2.4140770329750264e-09, "loss": 0.2817, "step": 21884 }, { "epoch": 0.9904955872369314, "grad_norm": 0.6390496404163677, "learning_rate": 2.391358245619202e-09, "loss": 0.2729, "step": 21885 }, { "epoch": 0.990540846345327, "grad_norm": 0.6142964865838836, "learning_rate": 2.3687468415245494e-09, "loss": 0.2952, "step": 21886 }, { "epoch": 0.9905861054537226, "grad_norm": 0.7176865699863244, "learning_rate": 2.346242821176237e-09, "loss": 0.3201, "step": 21887 }, { "epoch": 0.9906313645621181, "grad_norm": 0.6598530408570948, "learning_rate": 2.3238461850583206e-09, "loss": 0.3243, "step": 21888 }, { "epoch": 0.9906766236705137, "grad_norm": 0.6331095745795835, "learning_rate": 2.3015569336509724e-09, "loss": 0.3047, "step": 21889 }, { "epoch": 0.9907218827789093, "grad_norm": 0.6414719345668272, "learning_rate": 2.279375067434919e-09, "loss": 0.2782, "step": 21890 }, { "epoch": 0.9907671418873049, "grad_norm": 0.638176475097762, "learning_rate": 2.2573005868853358e-09, "loss": 0.3231, "step": 21891 }, { "epoch": 0.9908124009957003, "grad_norm": 0.6025064831274091, "learning_rate": 2.2353334924768435e-09, "loss": 0.2795, "step": 21892 }, { "epoch": 0.9908576601040959, "grad_norm": 0.5871043366477097, "learning_rate": 2.213473784681286e-09, "loss": 0.2546, "step": 21893 }, { "epoch": 0.9909029192124915, "grad_norm": 0.6708388062940376, "learning_rate": 2.1917214639693985e-09, "loss": 0.2715, "step": 21894 }, { "epoch": 0.9909481783208871, "grad_norm": 0.6180303987275302, "learning_rate": 2.1700765308074743e-09, "loss": 0.2948, "step": 21895 }, { "epoch": 0.9909934374292827, "grad_norm": 0.5761702880401627, "learning_rate": 2.1485389856606973e-09, "loss": 0.3112, "step": 21896 }, { "epoch": 0.9910386965376782, "grad_norm": 0.5283381025407042, "learning_rate": 2.1271088289920304e-09, "loss": 0.292, "step": 21897 }, { "epoch": 0.9910839556460738, "grad_norm": 0.6000773893981395, "learning_rate": 2.1057860612627713e-09, "loss": 0.2763, "step": 21898 }, { "epoch": 0.9911292147544694, "grad_norm": 0.5964113949286953, "learning_rate": 2.0845706829297762e-09, "loss": 0.3268, "step": 21899 }, { "epoch": 0.991174473862865, "grad_norm": 0.5875991897924737, "learning_rate": 2.0634626944493475e-09, "loss": 0.251, "step": 21900 }, { "epoch": 0.9912197329712604, "grad_norm": 0.6623000008080091, "learning_rate": 2.0424620962750107e-09, "loss": 0.3334, "step": 21901 }, { "epoch": 0.991264992079656, "grad_norm": 0.5850960457521891, "learning_rate": 2.021568888858627e-09, "loss": 0.2595, "step": 21902 }, { "epoch": 0.9913102511880516, "grad_norm": 0.5840999288698242, "learning_rate": 2.0007830726481716e-09, "loss": 0.2888, "step": 21903 }, { "epoch": 0.9913555102964472, "grad_norm": 0.7335081157665665, "learning_rate": 1.980104648090508e-09, "loss": 0.2938, "step": 21904 }, { "epoch": 0.9914007694048427, "grad_norm": 0.6805641085989351, "learning_rate": 1.9595336156308375e-09, "loss": 0.321, "step": 21905 }, { "epoch": 0.9914460285132383, "grad_norm": 0.6457916470088789, "learning_rate": 1.9390699757099174e-09, "loss": 0.2899, "step": 21906 }, { "epoch": 0.9914912876216339, "grad_norm": 0.6183658680749494, "learning_rate": 1.9187137287685065e-09, "loss": 0.2824, "step": 21907 }, { "epoch": 0.9915365467300294, "grad_norm": 0.6144721012585188, "learning_rate": 1.8984648752429222e-09, "loss": 0.287, "step": 21908 }, { "epoch": 0.991581805838425, "grad_norm": 0.6641942731452835, "learning_rate": 1.878323415568928e-09, "loss": 0.2704, "step": 21909 }, { "epoch": 0.9916270649468205, "grad_norm": 1.0685510439896335, "learning_rate": 1.8582893501795096e-09, "loss": 0.2873, "step": 21910 }, { "epoch": 0.9916723240552161, "grad_norm": 0.5948347391105585, "learning_rate": 1.8383626795048793e-09, "loss": 0.2665, "step": 21911 }, { "epoch": 0.9917175831636117, "grad_norm": 0.625767466906673, "learning_rate": 1.8185434039730277e-09, "loss": 0.2922, "step": 21912 }, { "epoch": 0.9917628422720073, "grad_norm": 0.5766022669904595, "learning_rate": 1.7988315240097254e-09, "loss": 0.2751, "step": 21913 }, { "epoch": 0.9918081013804028, "grad_norm": 0.6275741354154193, "learning_rate": 1.7792270400390776e-09, "loss": 0.2615, "step": 21914 }, { "epoch": 0.9918533604887984, "grad_norm": 0.5845001298448244, "learning_rate": 1.759729952481859e-09, "loss": 0.3006, "step": 21915 }, { "epoch": 0.9918986195971939, "grad_norm": 1.86394089138806, "learning_rate": 1.7403402617571785e-09, "loss": 0.2822, "step": 21916 }, { "epoch": 0.9919438787055895, "grad_norm": 0.6429230000792644, "learning_rate": 1.72105796828137e-09, "loss": 0.3051, "step": 21917 }, { "epoch": 0.991989137813985, "grad_norm": 0.6895858610389437, "learning_rate": 1.7018830724691016e-09, "loss": 0.2702, "step": 21918 }, { "epoch": 0.9920343969223806, "grad_norm": 0.592948558171695, "learning_rate": 1.682815574732266e-09, "loss": 0.2767, "step": 21919 }, { "epoch": 0.9920796560307762, "grad_norm": 0.5687485020465194, "learning_rate": 1.6638554754805358e-09, "loss": 0.2975, "step": 21920 }, { "epoch": 0.9921249151391718, "grad_norm": 0.5920284006547106, "learning_rate": 1.6450027751213626e-09, "loss": 0.2997, "step": 21921 }, { "epoch": 0.9921701742475674, "grad_norm": 0.6121210158543754, "learning_rate": 1.6262574740599778e-09, "loss": 0.3029, "step": 21922 }, { "epoch": 0.9922154333559629, "grad_norm": 0.5701848941441401, "learning_rate": 1.6076195726982824e-09, "loss": 0.2618, "step": 21923 }, { "epoch": 0.9922606924643584, "grad_norm": 0.6239870659742223, "learning_rate": 1.5890890714381769e-09, "loss": 0.3071, "step": 21924 }, { "epoch": 0.992305951572754, "grad_norm": 0.5982485102238448, "learning_rate": 1.5706659706771211e-09, "loss": 0.2781, "step": 21925 }, { "epoch": 0.9923512106811496, "grad_norm": 0.5887485429538946, "learning_rate": 1.5523502708103544e-09, "loss": 0.304, "step": 21926 }, { "epoch": 0.9923964697895451, "grad_norm": 0.6232348804393142, "learning_rate": 1.5341419722325612e-09, "loss": 0.2928, "step": 21927 }, { "epoch": 0.9924417288979407, "grad_norm": 0.6679441376503535, "learning_rate": 1.51604107533454e-09, "loss": 0.2651, "step": 21928 }, { "epoch": 0.9924869880063363, "grad_norm": 0.6308610655766753, "learning_rate": 1.4980475805048688e-09, "loss": 0.3113, "step": 21929 }, { "epoch": 0.9925322471147319, "grad_norm": 0.6038910862193314, "learning_rate": 1.4801614881304604e-09, "loss": 0.2558, "step": 21930 }, { "epoch": 0.9925775062231275, "grad_norm": 0.566272493740601, "learning_rate": 1.462382798595452e-09, "loss": 0.2528, "step": 21931 }, { "epoch": 0.9926227653315229, "grad_norm": 0.6423144924625247, "learning_rate": 1.4447115122817601e-09, "loss": 0.2585, "step": 21932 }, { "epoch": 0.9926680244399185, "grad_norm": 0.6201093438260887, "learning_rate": 1.4271476295696363e-09, "loss": 0.2633, "step": 21933 }, { "epoch": 0.9927132835483141, "grad_norm": 0.6085470147719076, "learning_rate": 1.4096911508365564e-09, "loss": 0.28, "step": 21934 }, { "epoch": 0.9927585426567097, "grad_norm": 0.637300112023428, "learning_rate": 1.3923420764566653e-09, "loss": 0.2697, "step": 21935 }, { "epoch": 0.9928038017651052, "grad_norm": 0.5883667578079982, "learning_rate": 1.3751004068035534e-09, "loss": 0.2711, "step": 21936 }, { "epoch": 0.9928490608735008, "grad_norm": 0.5961564073876877, "learning_rate": 1.35796614224748e-09, "loss": 0.2861, "step": 21937 }, { "epoch": 0.9928943199818964, "grad_norm": 0.6375560477188555, "learning_rate": 1.3409392831564838e-09, "loss": 0.2655, "step": 21938 }, { "epoch": 0.992939579090292, "grad_norm": 0.6792700890001889, "learning_rate": 1.3240198298963836e-09, "loss": 0.2948, "step": 21939 }, { "epoch": 0.9929848381986874, "grad_norm": 0.5840208606838262, "learning_rate": 1.3072077828307772e-09, "loss": 0.3265, "step": 21940 }, { "epoch": 0.993030097307083, "grad_norm": 0.6286625929006481, "learning_rate": 1.2905031423210423e-09, "loss": 0.2734, "step": 21941 }, { "epoch": 0.9930753564154786, "grad_norm": 0.7285734220275707, "learning_rate": 1.2739059087263362e-09, "loss": 0.3262, "step": 21942 }, { "epoch": 0.9931206155238742, "grad_norm": 0.5779318308145721, "learning_rate": 1.257416082402485e-09, "loss": 0.2495, "step": 21943 }, { "epoch": 0.9931658746322698, "grad_norm": 0.6083077717806407, "learning_rate": 1.2410336637047604e-09, "loss": 0.3073, "step": 21944 }, { "epoch": 0.9932111337406653, "grad_norm": 0.6214203937496386, "learning_rate": 1.2247586529845479e-09, "loss": 0.2834, "step": 21945 }, { "epoch": 0.9932563928490609, "grad_norm": 0.5481423624767995, "learning_rate": 1.2085910505915677e-09, "loss": 0.3056, "step": 21946 }, { "epoch": 0.9933016519574565, "grad_norm": 0.572538572374642, "learning_rate": 1.1925308568733197e-09, "loss": 0.2804, "step": 21947 }, { "epoch": 0.993346911065852, "grad_norm": 0.6241378536217829, "learning_rate": 1.176578072175083e-09, "loss": 0.273, "step": 21948 }, { "epoch": 0.9933921701742475, "grad_norm": 0.5799838848065667, "learning_rate": 1.1607326968393617e-09, "loss": 0.2911, "step": 21949 }, { "epoch": 0.9934374292826431, "grad_norm": 0.6480647784604268, "learning_rate": 1.1449947312064392e-09, "loss": 0.3535, "step": 21950 }, { "epoch": 0.9934826883910387, "grad_norm": 0.6990225541403731, "learning_rate": 1.1293641756154883e-09, "loss": 0.3251, "step": 21951 }, { "epoch": 0.9935279474994343, "grad_norm": 0.5795906833178194, "learning_rate": 1.1138410304012415e-09, "loss": 0.3409, "step": 21952 }, { "epoch": 0.9935732066078298, "grad_norm": 0.605951227260699, "learning_rate": 1.0984252958973207e-09, "loss": 0.3098, "step": 21953 }, { "epoch": 0.9936184657162254, "grad_norm": 0.6328547372958854, "learning_rate": 1.0831169724356828e-09, "loss": 0.321, "step": 21954 }, { "epoch": 0.993663724824621, "grad_norm": 0.6120233663098031, "learning_rate": 1.0679160603449533e-09, "loss": 0.3066, "step": 21955 }, { "epoch": 0.9937089839330165, "grad_norm": 0.5841427755427862, "learning_rate": 1.0528225599515385e-09, "loss": 0.3099, "step": 21956 }, { "epoch": 0.9937542430414121, "grad_norm": 0.6222734322557101, "learning_rate": 1.037836471579623e-09, "loss": 0.3139, "step": 21957 }, { "epoch": 0.9937995021498076, "grad_norm": 0.6193352639862391, "learning_rate": 1.0229577955517267e-09, "loss": 0.3219, "step": 21958 }, { "epoch": 0.9938447612582032, "grad_norm": 0.6356822729983301, "learning_rate": 1.008186532187594e-09, "loss": 0.2576, "step": 21959 }, { "epoch": 0.9938900203665988, "grad_norm": 0.5978070819650005, "learning_rate": 9.93522681803638e-10, "loss": 0.3192, "step": 21960 }, { "epoch": 0.9939352794749944, "grad_norm": 0.6261767044610226, "learning_rate": 9.789662447157178e-10, "loss": 0.3025, "step": 21961 }, { "epoch": 0.9939805385833899, "grad_norm": 0.593104996327592, "learning_rate": 9.645172212369158e-10, "loss": 0.2947, "step": 21962 }, { "epoch": 0.9940257976917855, "grad_norm": 0.6462433332947585, "learning_rate": 9.501756116769844e-10, "loss": 0.2828, "step": 21963 }, { "epoch": 0.994071056800181, "grad_norm": 0.5524732648542988, "learning_rate": 9.359414163445657e-10, "loss": 0.3159, "step": 21964 }, { "epoch": 0.9941163159085766, "grad_norm": 0.583561381683474, "learning_rate": 9.218146355449709e-10, "loss": 0.2795, "step": 21965 }, { "epoch": 0.9941615750169722, "grad_norm": 0.6135715357641147, "learning_rate": 9.07795269582401e-10, "loss": 0.3043, "step": 21966 }, { "epoch": 0.9942068341253677, "grad_norm": 0.6229441688619807, "learning_rate": 8.938833187577267e-10, "loss": 0.2594, "step": 21967 }, { "epoch": 0.9942520932337633, "grad_norm": 0.6177528880302975, "learning_rate": 8.800787833695978e-10, "loss": 0.28, "step": 21968 }, { "epoch": 0.9942973523421589, "grad_norm": 1.8804680331595953, "learning_rate": 8.663816637149991e-10, "loss": 0.2535, "step": 21969 }, { "epoch": 0.9943426114505545, "grad_norm": 0.7889549714801867, "learning_rate": 8.527919600886947e-10, "loss": 0.3012, "step": 21970 }, { "epoch": 0.99438787055895, "grad_norm": 0.6087356486241385, "learning_rate": 8.393096727815631e-10, "loss": 0.3013, "step": 21971 }, { "epoch": 0.9944331296673455, "grad_norm": 0.6173904701348589, "learning_rate": 8.259348020844827e-10, "loss": 0.3094, "step": 21972 }, { "epoch": 0.9944783887757411, "grad_norm": 0.6153369456157168, "learning_rate": 8.126673482838909e-10, "loss": 0.2814, "step": 21973 }, { "epoch": 0.9945236478841367, "grad_norm": 0.6292016348416848, "learning_rate": 7.995073116656704e-10, "loss": 0.3302, "step": 21974 }, { "epoch": 0.9945689069925322, "grad_norm": 0.5631017655648378, "learning_rate": 7.864546925118177e-10, "loss": 0.2664, "step": 21975 }, { "epoch": 0.9946141661009278, "grad_norm": 0.6069259072107689, "learning_rate": 7.735094911032193e-10, "loss": 0.2962, "step": 21976 }, { "epoch": 0.9946594252093234, "grad_norm": 0.531390973141288, "learning_rate": 7.606717077179859e-10, "loss": 0.2682, "step": 21977 }, { "epoch": 0.994704684317719, "grad_norm": 0.7526144498408558, "learning_rate": 7.47941342631453e-10, "loss": 0.3383, "step": 21978 }, { "epoch": 0.9947499434261146, "grad_norm": 0.6084599902690812, "learning_rate": 7.353183961184007e-10, "loss": 0.323, "step": 21979 }, { "epoch": 0.99479520253451, "grad_norm": 0.5413245411564832, "learning_rate": 7.228028684486132e-10, "loss": 0.2905, "step": 21980 }, { "epoch": 0.9948404616429056, "grad_norm": 0.598947853073034, "learning_rate": 7.103947598918747e-10, "loss": 0.3424, "step": 21981 }, { "epoch": 0.9948857207513012, "grad_norm": 0.6844650599597334, "learning_rate": 6.980940707146388e-10, "loss": 0.2437, "step": 21982 }, { "epoch": 0.9949309798596968, "grad_norm": 0.6625091436369556, "learning_rate": 6.859008011816937e-10, "loss": 0.2969, "step": 21983 }, { "epoch": 0.9949762389680923, "grad_norm": 0.6723159430338779, "learning_rate": 6.738149515539416e-10, "loss": 0.2529, "step": 21984 }, { "epoch": 0.9950214980764879, "grad_norm": 1.4999966827879183, "learning_rate": 6.618365220917299e-10, "loss": 0.3146, "step": 21985 }, { "epoch": 0.9950667571848835, "grad_norm": 0.6497521224251643, "learning_rate": 6.499655130526306e-10, "loss": 0.3073, "step": 21986 }, { "epoch": 0.9951120162932791, "grad_norm": 0.6168352312353976, "learning_rate": 6.382019246908844e-10, "loss": 0.2842, "step": 21987 }, { "epoch": 0.9951572754016745, "grad_norm": 0.5962748762799298, "learning_rate": 6.265457572601774e-10, "loss": 0.2974, "step": 21988 }, { "epoch": 0.9952025345100701, "grad_norm": 0.6219469205840408, "learning_rate": 6.149970110108649e-10, "loss": 0.2968, "step": 21989 }, { "epoch": 0.9952477936184657, "grad_norm": 0.5677401620285023, "learning_rate": 6.035556861905268e-10, "loss": 0.256, "step": 21990 }, { "epoch": 0.9952930527268613, "grad_norm": 0.6820100081540836, "learning_rate": 5.922217830450772e-10, "loss": 0.2936, "step": 21991 }, { "epoch": 0.9953383118352569, "grad_norm": 0.5594585909652834, "learning_rate": 5.809953018187652e-10, "loss": 0.2635, "step": 21992 }, { "epoch": 0.9953835709436524, "grad_norm": 0.6206856525103511, "learning_rate": 5.698762427519544e-10, "loss": 0.32, "step": 21993 }, { "epoch": 0.995428830052048, "grad_norm": 0.5969689557141281, "learning_rate": 5.588646060838976e-10, "loss": 0.258, "step": 21994 }, { "epoch": 0.9954740891604436, "grad_norm": 0.6377851737737551, "learning_rate": 5.479603920516275e-10, "loss": 0.328, "step": 21995 }, { "epoch": 0.9955193482688391, "grad_norm": 0.5864974798279645, "learning_rate": 5.371636008888459e-10, "loss": 0.2919, "step": 21996 }, { "epoch": 0.9955646073772346, "grad_norm": 0.599331196829423, "learning_rate": 5.264742328275896e-10, "loss": 0.3034, "step": 21997 }, { "epoch": 0.9956098664856302, "grad_norm": 0.5859840998410031, "learning_rate": 5.158922880976747e-10, "loss": 0.3054, "step": 21998 }, { "epoch": 0.9956551255940258, "grad_norm": 0.6133896988379982, "learning_rate": 5.054177669266969e-10, "loss": 0.2732, "step": 21999 }, { "epoch": 0.9957003847024214, "grad_norm": 0.6355733324276068, "learning_rate": 4.950506695394763e-10, "loss": 0.2989, "step": 22000 }, { "epoch": 0.9957456438108169, "grad_norm": 0.6405292750738774, "learning_rate": 4.847909961586128e-10, "loss": 0.2672, "step": 22001 }, { "epoch": 0.9957909029192125, "grad_norm": 0.6117498728482897, "learning_rate": 4.746387470044855e-10, "loss": 0.2925, "step": 22002 }, { "epoch": 0.995836162027608, "grad_norm": 0.5603673735108489, "learning_rate": 4.645939222963636e-10, "loss": 0.293, "step": 22003 }, { "epoch": 0.9958814211360036, "grad_norm": 0.6255173214844361, "learning_rate": 4.5465652224851996e-10, "loss": 0.3283, "step": 22004 }, { "epoch": 0.9959266802443992, "grad_norm": 0.6383297301865899, "learning_rate": 4.4482654707522774e-10, "loss": 0.2651, "step": 22005 }, { "epoch": 0.9959719393527947, "grad_norm": 0.6596672113163915, "learning_rate": 4.3510399698798445e-10, "loss": 0.3096, "step": 22006 }, { "epoch": 0.9960171984611903, "grad_norm": 0.5702939246958649, "learning_rate": 4.2548887219551196e-10, "loss": 0.2759, "step": 22007 }, { "epoch": 0.9960624575695859, "grad_norm": 0.6001358151182172, "learning_rate": 4.159811729037566e-10, "loss": 0.3007, "step": 22008 }, { "epoch": 0.9961077166779815, "grad_norm": 0.5805687401563262, "learning_rate": 4.0658089931755463e-10, "loss": 0.2896, "step": 22009 }, { "epoch": 0.996152975786377, "grad_norm": 0.5658873242067638, "learning_rate": 3.9728805163896654e-10, "loss": 0.2929, "step": 22010 }, { "epoch": 0.9961982348947725, "grad_norm": 0.6399124156608923, "learning_rate": 3.8810263006783255e-10, "loss": 0.2973, "step": 22011 }, { "epoch": 0.9962434940031681, "grad_norm": 0.5762794400323175, "learning_rate": 3.790246348012172e-10, "loss": 0.3147, "step": 22012 }, { "epoch": 0.9962887531115637, "grad_norm": 0.5395651917899117, "learning_rate": 3.7005406603396464e-10, "loss": 0.2454, "step": 22013 }, { "epoch": 0.9963340122199593, "grad_norm": 0.5848163901483809, "learning_rate": 3.6119092395869857e-10, "loss": 0.3129, "step": 22014 }, { "epoch": 0.9963792713283548, "grad_norm": 0.6193390910504796, "learning_rate": 3.524352087669325e-10, "loss": 0.3131, "step": 22015 }, { "epoch": 0.9964245304367504, "grad_norm": 0.6067972758265497, "learning_rate": 3.4378692064573895e-10, "loss": 0.3218, "step": 22016 }, { "epoch": 0.996469789545146, "grad_norm": 0.5943380199518459, "learning_rate": 3.3524605978108027e-10, "loss": 0.3294, "step": 22017 }, { "epoch": 0.9965150486535416, "grad_norm": 0.6979205732955975, "learning_rate": 3.268126263572535e-10, "loss": 0.3097, "step": 22018 }, { "epoch": 0.996560307761937, "grad_norm": 0.6026104421965898, "learning_rate": 3.1848662055411484e-10, "loss": 0.3155, "step": 22019 }, { "epoch": 0.9966055668703326, "grad_norm": 0.5833935911580108, "learning_rate": 3.1026804255207544e-10, "loss": 0.348, "step": 22020 }, { "epoch": 0.9966508259787282, "grad_norm": 0.5544309223963749, "learning_rate": 3.0215689252655056e-10, "loss": 0.2619, "step": 22021 }, { "epoch": 0.9966960850871238, "grad_norm": 0.62487696640231, "learning_rate": 2.9415317065240037e-10, "loss": 0.2839, "step": 22022 }, { "epoch": 0.9967413441955193, "grad_norm": 0.5779353778908592, "learning_rate": 2.8625687710170933e-10, "loss": 0.282, "step": 22023 }, { "epoch": 0.9967866033039149, "grad_norm": 0.6887365734828341, "learning_rate": 2.784680120437866e-10, "loss": 0.28, "step": 22024 }, { "epoch": 0.9968318624123105, "grad_norm": 0.6284514613591872, "learning_rate": 2.7078657564572065e-10, "loss": 0.2795, "step": 22025 }, { "epoch": 0.9968771215207061, "grad_norm": 0.6018631891275054, "learning_rate": 2.632125680734898e-10, "loss": 0.3028, "step": 22026 }, { "epoch": 0.9969223806291017, "grad_norm": 0.6014152852040981, "learning_rate": 2.557459894891867e-10, "loss": 0.3303, "step": 22027 }, { "epoch": 0.9969676397374971, "grad_norm": 0.6160925266368528, "learning_rate": 2.4838684005323853e-10, "loss": 0.2885, "step": 22028 }, { "epoch": 0.9970128988458927, "grad_norm": 0.7129821777469944, "learning_rate": 2.4113511992385206e-10, "loss": 0.2893, "step": 22029 }, { "epoch": 0.9970581579542883, "grad_norm": 0.735108791638317, "learning_rate": 2.3399082925701367e-10, "loss": 0.2745, "step": 22030 }, { "epoch": 0.9971034170626839, "grad_norm": 0.6263294300073722, "learning_rate": 2.2695396820593408e-10, "loss": 0.3017, "step": 22031 }, { "epoch": 0.9971486761710794, "grad_norm": 0.650352346210055, "learning_rate": 2.2002453692215875e-10, "loss": 0.3046, "step": 22032 }, { "epoch": 0.997193935279475, "grad_norm": 0.6100886433596671, "learning_rate": 2.1320253555445758e-10, "loss": 0.297, "step": 22033 }, { "epoch": 0.9972391943878706, "grad_norm": 0.5483647830903922, "learning_rate": 2.064879642488249e-10, "loss": 0.2162, "step": 22034 }, { "epoch": 0.9972844534962662, "grad_norm": 0.6325002031474769, "learning_rate": 1.998808231506999e-10, "loss": 0.2947, "step": 22035 }, { "epoch": 0.9973297126046616, "grad_norm": 0.6281452891500079, "learning_rate": 1.9338111240108094e-10, "loss": 0.2726, "step": 22036 }, { "epoch": 0.9973749717130572, "grad_norm": 0.6656382032845043, "learning_rate": 1.8698883214041118e-10, "loss": 0.2957, "step": 22037 }, { "epoch": 0.9974202308214528, "grad_norm": 0.5760080145872826, "learning_rate": 1.8070398250524811e-10, "loss": 0.2731, "step": 22038 }, { "epoch": 0.9974654899298484, "grad_norm": 0.5702775651101916, "learning_rate": 1.7452656363103893e-10, "loss": 0.2941, "step": 22039 }, { "epoch": 0.997510749038244, "grad_norm": 0.6276471887693469, "learning_rate": 1.6845657565045526e-10, "loss": 0.266, "step": 22040 }, { "epoch": 0.9975560081466395, "grad_norm": 0.576372293276278, "learning_rate": 1.6249401869394832e-10, "loss": 0.2733, "step": 22041 }, { "epoch": 0.9976012672550351, "grad_norm": 0.6028893622022222, "learning_rate": 1.5663889288919377e-10, "loss": 0.3227, "step": 22042 }, { "epoch": 0.9976465263634307, "grad_norm": 0.5989598041118115, "learning_rate": 1.50891198362757e-10, "loss": 0.2877, "step": 22043 }, { "epoch": 0.9976917854718262, "grad_norm": 0.7686153505540809, "learning_rate": 1.452509352378728e-10, "loss": 0.3048, "step": 22044 }, { "epoch": 0.9977370445802217, "grad_norm": 0.6289101541046046, "learning_rate": 1.397181036361106e-10, "loss": 0.2678, "step": 22045 }, { "epoch": 0.9977823036886173, "grad_norm": 0.7850580188083028, "learning_rate": 1.3429270367515402e-10, "loss": 0.2787, "step": 22046 }, { "epoch": 0.9978275627970129, "grad_norm": 0.6581661810845428, "learning_rate": 1.289747354726867e-10, "loss": 0.2581, "step": 22047 }, { "epoch": 0.9978728219054085, "grad_norm": 0.6217159388539761, "learning_rate": 1.237641991425065e-10, "loss": 0.2898, "step": 22048 }, { "epoch": 0.9979180810138041, "grad_norm": 0.6195719530982097, "learning_rate": 1.1866109479674593e-10, "loss": 0.2847, "step": 22049 }, { "epoch": 0.9979633401221996, "grad_norm": 0.6210557170903431, "learning_rate": 1.1366542254476198e-10, "loss": 0.296, "step": 22050 }, { "epoch": 0.9980085992305952, "grad_norm": 0.634926737070786, "learning_rate": 1.087771824948014e-10, "loss": 0.298, "step": 22051 }, { "epoch": 0.9980538583389907, "grad_norm": 0.5940878592865888, "learning_rate": 1.0399637475067004e-10, "loss": 0.275, "step": 22052 }, { "epoch": 0.9980991174473863, "grad_norm": 0.593622440995024, "learning_rate": 9.932299941561862e-11, "loss": 0.2628, "step": 22053 }, { "epoch": 0.9981443765557818, "grad_norm": 0.5324880581632664, "learning_rate": 9.475705659012236e-11, "loss": 0.2769, "step": 22054 }, { "epoch": 0.9981896356641774, "grad_norm": 0.6244130460469303, "learning_rate": 9.029854637243595e-11, "loss": 0.294, "step": 22055 }, { "epoch": 0.998234894772573, "grad_norm": 0.5707182384816525, "learning_rate": 8.594746885803862e-11, "loss": 0.2953, "step": 22056 }, { "epoch": 0.9982801538809686, "grad_norm": 0.602646670813106, "learning_rate": 8.170382414074418e-11, "loss": 0.2922, "step": 22057 }, { "epoch": 0.9983254129893641, "grad_norm": 0.7079394099653381, "learning_rate": 7.756761231159094e-11, "loss": 0.2928, "step": 22058 }, { "epoch": 0.9983706720977596, "grad_norm": 0.6180266400910659, "learning_rate": 7.353883345939672e-11, "loss": 0.309, "step": 22059 }, { "epoch": 0.9984159312061552, "grad_norm": 0.6097297959534627, "learning_rate": 6.961748767020382e-11, "loss": 0.3378, "step": 22060 }, { "epoch": 0.9984611903145508, "grad_norm": 0.6597148209260325, "learning_rate": 6.580357502949942e-11, "loss": 0.2849, "step": 22061 }, { "epoch": 0.9985064494229464, "grad_norm": 0.5774377184498107, "learning_rate": 6.209709561832977e-11, "loss": 0.27, "step": 22062 }, { "epoch": 0.9985517085313419, "grad_norm": 0.6361187588078144, "learning_rate": 5.849804951663096e-11, "loss": 0.3152, "step": 22063 }, { "epoch": 0.9985969676397375, "grad_norm": 0.6457484657525558, "learning_rate": 5.500643680156348e-11, "loss": 0.2703, "step": 22064 }, { "epoch": 0.9986422267481331, "grad_norm": 0.6391349815071075, "learning_rate": 5.162225754806738e-11, "loss": 0.2947, "step": 22065 }, { "epoch": 0.9986874858565287, "grad_norm": 0.6407162809395204, "learning_rate": 4.834551182941738e-11, "loss": 0.2897, "step": 22066 }, { "epoch": 0.9987327449649241, "grad_norm": 0.7632623349347577, "learning_rate": 4.517619971500242e-11, "loss": 0.2673, "step": 22067 }, { "epoch": 0.9987780040733197, "grad_norm": 0.6325446996026688, "learning_rate": 4.211432127421144e-11, "loss": 0.2909, "step": 22068 }, { "epoch": 0.9988232631817153, "grad_norm": 0.6078505153502691, "learning_rate": 3.9159876571992495e-11, "loss": 0.2539, "step": 22069 }, { "epoch": 0.9988685222901109, "grad_norm": 0.5766445623028778, "learning_rate": 3.6312865672183394e-11, "loss": 0.2862, "step": 22070 }, { "epoch": 0.9989137813985064, "grad_norm": 0.5453404898877032, "learning_rate": 3.3573288635291304e-11, "loss": 0.2688, "step": 22071 }, { "epoch": 0.998959040506902, "grad_norm": 0.6131718694107106, "learning_rate": 3.094114552126826e-11, "loss": 0.2529, "step": 22072 }, { "epoch": 0.9990042996152976, "grad_norm": 0.5919616950332834, "learning_rate": 2.8416436385625412e-11, "loss": 0.2526, "step": 22073 }, { "epoch": 0.9990495587236932, "grad_norm": 0.673900937746956, "learning_rate": 2.599916128331881e-11, "loss": 0.2754, "step": 22074 }, { "epoch": 0.9990948178320888, "grad_norm": 0.601033366388982, "learning_rate": 2.3689320265973815e-11, "loss": 0.2914, "step": 22075 }, { "epoch": 0.9991400769404842, "grad_norm": 0.5964276240291718, "learning_rate": 2.1486913383550467e-11, "loss": 0.3096, "step": 22076 }, { "epoch": 0.9991853360488798, "grad_norm": 0.7357281689405786, "learning_rate": 1.9391940682678133e-11, "loss": 0.259, "step": 22077 }, { "epoch": 0.9992305951572754, "grad_norm": 0.6228373665170601, "learning_rate": 1.740440220887596e-11, "loss": 0.3087, "step": 22078 }, { "epoch": 0.999275854265671, "grad_norm": 0.6323616538871186, "learning_rate": 1.5524298004887527e-11, "loss": 0.2665, "step": 22079 }, { "epoch": 0.9993211133740665, "grad_norm": 0.6233482317348159, "learning_rate": 1.3751628111235981e-11, "loss": 0.2945, "step": 22080 }, { "epoch": 0.9993663724824621, "grad_norm": 0.5677360160707733, "learning_rate": 1.2086392565113792e-11, "loss": 0.2714, "step": 22081 }, { "epoch": 0.9994116315908577, "grad_norm": 0.5755998827043224, "learning_rate": 1.0528591403713428e-11, "loss": 0.2969, "step": 22082 }, { "epoch": 0.9994568906992533, "grad_norm": 0.6115343210945876, "learning_rate": 9.07822465923136e-12, "loss": 0.3026, "step": 22083 }, { "epoch": 0.9995021498076488, "grad_norm": 0.551669168000992, "learning_rate": 7.735292363864055e-12, "loss": 0.2999, "step": 22084 }, { "epoch": 0.9995474089160443, "grad_norm": 0.630577749173783, "learning_rate": 6.4997945453670885e-12, "loss": 0.296, "step": 22085 }, { "epoch": 0.9995926680244399, "grad_norm": 0.6528100887496123, "learning_rate": 5.371731231496036e-12, "loss": 0.2953, "step": 22086 }, { "epoch": 0.9996379271328355, "grad_norm": 0.5826005800793707, "learning_rate": 4.3511024455655806e-12, "loss": 0.2665, "step": 22087 }, { "epoch": 0.9996831862412311, "grad_norm": 0.5722049007305591, "learning_rate": 3.437908209780183e-12, "loss": 0.2648, "step": 22088 }, { "epoch": 0.9997284453496266, "grad_norm": 0.615232940231692, "learning_rate": 2.6321485435687465e-12, "loss": 0.2973, "step": 22089 }, { "epoch": 0.9997737044580222, "grad_norm": 0.5882069213196555, "learning_rate": 1.9338234646948397e-12, "loss": 0.2987, "step": 22090 }, { "epoch": 0.9998189635664178, "grad_norm": 0.5699427946030503, "learning_rate": 1.3429329881464725e-12, "loss": 0.2605, "step": 22091 }, { "epoch": 0.9998642226748133, "grad_norm": 0.6242708571432227, "learning_rate": 8.59477126136099e-13, "loss": 0.2567, "step": 22092 }, { "epoch": 0.9999094817832088, "grad_norm": 0.6206500807977732, "learning_rate": 4.834558897659492e-13, "loss": 0.2771, "step": 22093 }, { "epoch": 0.9999547408916044, "grad_norm": 0.6006375397432488, "learning_rate": 2.148692862524726e-13, "loss": 0.2852, "step": 22094 }, { "epoch": 1.0, "grad_norm": 0.5607683363863506, "learning_rate": 5.3717321701896033e-14, "loss": 0.2744, "step": 22095 }, { "epoch": 1.0, "step": 22095, "total_flos": 1.469106584682496e+17, "train_loss": 0.3743057166681648, "train_runtime": 137166.71, "train_samples_per_second": 82.478, "train_steps_per_second": 0.161 } ], "logging_steps": 1.0, "max_steps": 22095, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 2000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.469106584682496e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }