DeepSeek-R1-Distill-Llama-70B-Uncensored-v2-Unbiased-Reasoner-Lora
/
checkpoint-948
/trainer_state.json
| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.6007604562737643, | |
| "eval_steps": 500, | |
| "global_step": 948, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0006337135614702154, | |
| "grad_norm": 0.22353313863277435, | |
| "learning_rate": 2e-05, | |
| "loss": 0.795, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0012674271229404308, | |
| "grad_norm": 0.270685613155365, | |
| "learning_rate": 4e-05, | |
| "loss": 0.9841, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0019011406844106464, | |
| "grad_norm": 0.13555319607257843, | |
| "learning_rate": 6e-05, | |
| "loss": 0.8728, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.0025348542458808617, | |
| "grad_norm": 0.1665652096271515, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8625, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0031685678073510772, | |
| "grad_norm": 0.13588839769363403, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6776, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0038022813688212928, | |
| "grad_norm": 0.2811749279499054, | |
| "learning_rate": 0.00012, | |
| "loss": 0.8813, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.004435994930291508, | |
| "grad_norm": 0.327694833278656, | |
| "learning_rate": 0.00014, | |
| "loss": 0.9009, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.005069708491761723, | |
| "grad_norm": 0.24555213749408722, | |
| "learning_rate": 0.00016, | |
| "loss": 0.7054, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.005703422053231939, | |
| "grad_norm": 0.14921338856220245, | |
| "learning_rate": 0.00018, | |
| "loss": 0.697, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.0063371356147021544, | |
| "grad_norm": 0.13169103860855103, | |
| "learning_rate": 0.0002, | |
| "loss": 0.6007, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.00697084917617237, | |
| "grad_norm": 0.06807047873735428, | |
| "learning_rate": 0.00019999979928608238, | |
| "loss": 0.6155, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.0076045627376425855, | |
| "grad_norm": 0.08288167417049408, | |
| "learning_rate": 0.00019999919714513528, | |
| "loss": 0.5641, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.008238276299112801, | |
| "grad_norm": 0.12285872548818588, | |
| "learning_rate": 0.00019999819357957582, | |
| "loss": 0.7526, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.008871989860583017, | |
| "grad_norm": 0.15566691756248474, | |
| "learning_rate": 0.00019999678859343263, | |
| "loss": 0.4519, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.009505703422053232, | |
| "grad_norm": 0.1301712989807129, | |
| "learning_rate": 0.00019999498219234568, | |
| "loss": 0.486, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.010139416983523447, | |
| "grad_norm": 0.14493511617183685, | |
| "learning_rate": 0.00019999277438356638, | |
| "loss": 0.7146, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.010773130544993664, | |
| "grad_norm": 0.1372271478176117, | |
| "learning_rate": 0.00019999016517595753, | |
| "loss": 0.5933, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.011406844106463879, | |
| "grad_norm": 0.09944190829992294, | |
| "learning_rate": 0.00019998715457999314, | |
| "loss": 0.8399, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.012040557667934094, | |
| "grad_norm": 0.057923465967178345, | |
| "learning_rate": 0.0001999837426077586, | |
| "loss": 0.5613, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.012674271229404309, | |
| "grad_norm": 0.06214901804924011, | |
| "learning_rate": 0.00019997992927295059, | |
| "loss": 0.5374, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.013307984790874524, | |
| "grad_norm": 0.04898112639784813, | |
| "learning_rate": 0.0001999757145908768, | |
| "loss": 0.5451, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.01394169835234474, | |
| "grad_norm": 0.07026948034763336, | |
| "learning_rate": 0.0001999710985784562, | |
| "loss": 0.5635, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.014575411913814956, | |
| "grad_norm": 0.0672365352511406, | |
| "learning_rate": 0.00019996608125421873, | |
| "loss": 0.5996, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.015209125475285171, | |
| "grad_norm": 0.06477885693311691, | |
| "learning_rate": 0.00019996066263830531, | |
| "loss": 0.4707, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.015842839036755388, | |
| "grad_norm": 0.07720793038606644, | |
| "learning_rate": 0.0001999548427524678, | |
| "loss": 0.5891, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.016476552598225603, | |
| "grad_norm": 0.06699500977993011, | |
| "learning_rate": 0.0001999486216200688, | |
| "loss": 0.5316, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.017110266159695818, | |
| "grad_norm": 0.07539479434490204, | |
| "learning_rate": 0.00019994199926608172, | |
| "loss": 0.5854, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.017743979721166033, | |
| "grad_norm": 4.677523136138916, | |
| "learning_rate": 0.00019993497571709048, | |
| "loss": 0.5019, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.018377693282636248, | |
| "grad_norm": 0.07100815325975418, | |
| "learning_rate": 0.00019992755100128962, | |
| "loss": 0.4729, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.019011406844106463, | |
| "grad_norm": 0.06506210565567017, | |
| "learning_rate": 0.000199919725148484, | |
| "loss": 0.5597, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.01964512040557668, | |
| "grad_norm": 0.04945315420627594, | |
| "learning_rate": 0.0001999114981900887, | |
| "loss": 0.5044, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.020278833967046894, | |
| "grad_norm": 0.05103156715631485, | |
| "learning_rate": 0.0001999028701591291, | |
| "loss": 0.3637, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.02091254752851711, | |
| "grad_norm": 0.05288761481642723, | |
| "learning_rate": 0.00019989384109024048, | |
| "loss": 0.4345, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.021546261089987327, | |
| "grad_norm": 0.05457635968923569, | |
| "learning_rate": 0.0001998844110196681, | |
| "loss": 0.4714, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.022179974651457542, | |
| "grad_norm": 0.055830612778663635, | |
| "learning_rate": 0.0001998745799852668, | |
| "loss": 0.5285, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.022813688212927757, | |
| "grad_norm": 0.05858856439590454, | |
| "learning_rate": 0.00019986434802650113, | |
| "loss": 0.5106, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.023447401774397972, | |
| "grad_norm": 0.05847540497779846, | |
| "learning_rate": 0.00019985371518444503, | |
| "loss": 0.4394, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.024081115335868188, | |
| "grad_norm": 0.1140831857919693, | |
| "learning_rate": 0.00019984268150178167, | |
| "loss": 0.4782, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.024714828897338403, | |
| "grad_norm": 0.06483329832553864, | |
| "learning_rate": 0.00019983124702280334, | |
| "loss": 0.396, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.025348542458808618, | |
| "grad_norm": 0.07212468981742859, | |
| "learning_rate": 0.00019981941179341117, | |
| "loss": 0.5173, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.025982256020278833, | |
| "grad_norm": 0.1697537750005722, | |
| "learning_rate": 0.00019980717586111512, | |
| "loss": 0.6164, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.026615969581749048, | |
| "grad_norm": 0.05975339934229851, | |
| "learning_rate": 0.00019979453927503364, | |
| "loss": 0.4981, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.027249683143219267, | |
| "grad_norm": 0.0607403926551342, | |
| "learning_rate": 0.00019978150208589348, | |
| "loss": 0.533, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.02788339670468948, | |
| "grad_norm": 0.07225210964679718, | |
| "learning_rate": 0.00019976806434602952, | |
| "loss": 0.5055, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.028517110266159697, | |
| "grad_norm": 0.07008686661720276, | |
| "learning_rate": 0.00019975422610938462, | |
| "loss": 0.6274, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.029150823827629912, | |
| "grad_norm": 0.07289402186870575, | |
| "learning_rate": 0.0001997399874315093, | |
| "loss": 0.5247, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.029784537389100127, | |
| "grad_norm": 0.10037431120872498, | |
| "learning_rate": 0.0001997253483695616, | |
| "loss": 0.647, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.030418250950570342, | |
| "grad_norm": 0.06468270719051361, | |
| "learning_rate": 0.00019971030898230672, | |
| "loss": 0.5719, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.031051964512040557, | |
| "grad_norm": 0.0472278967499733, | |
| "learning_rate": 0.00019969486933011705, | |
| "loss": 0.5565, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.031685678073510776, | |
| "grad_norm": 0.0584145151078701, | |
| "learning_rate": 0.00019967902947497156, | |
| "loss": 0.5432, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.03231939163498099, | |
| "grad_norm": 0.08962458372116089, | |
| "learning_rate": 0.00019966278948045592, | |
| "loss": 0.6432, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.032953105196451206, | |
| "grad_norm": 0.08193643391132355, | |
| "learning_rate": 0.00019964614941176195, | |
| "loss": 0.5341, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.03358681875792142, | |
| "grad_norm": 0.07166769355535507, | |
| "learning_rate": 0.00019962910933568747, | |
| "loss": 0.5481, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.034220532319391636, | |
| "grad_norm": 0.10422351956367493, | |
| "learning_rate": 0.00019961166932063614, | |
| "loss": 0.6145, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.03485424588086185, | |
| "grad_norm": 0.06273826211690903, | |
| "learning_rate": 0.00019959382943661704, | |
| "loss": 0.4969, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.035487959442332066, | |
| "grad_norm": 0.06504670530557632, | |
| "learning_rate": 0.0001995755897552444, | |
| "loss": 0.6093, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.03612167300380228, | |
| "grad_norm": 0.05045778304338455, | |
| "learning_rate": 0.00019955695034973742, | |
| "loss": 0.4191, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.036755386565272496, | |
| "grad_norm": 0.06495866179466248, | |
| "learning_rate": 0.00019953791129491983, | |
| "loss": 0.4762, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.037389100126742715, | |
| "grad_norm": 0.0814126655459404, | |
| "learning_rate": 0.0001995184726672197, | |
| "loss": 0.5599, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.03802281368821293, | |
| "grad_norm": 0.052061304450035095, | |
| "learning_rate": 0.00019949863454466908, | |
| "loss": 0.4822, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.038656527249683145, | |
| "grad_norm": 0.05419475957751274, | |
| "learning_rate": 0.00019947839700690375, | |
| "loss": 0.5625, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.03929024081115336, | |
| "grad_norm": 0.06495067477226257, | |
| "learning_rate": 0.0001994577601351628, | |
| "loss": 0.5863, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.039923954372623575, | |
| "grad_norm": 0.055791907012462616, | |
| "learning_rate": 0.00019943672401228837, | |
| "loss": 0.4588, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.04055766793409379, | |
| "grad_norm": 0.03923908621072769, | |
| "learning_rate": 0.00019941528872272532, | |
| "loss": 0.3841, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.041191381495564006, | |
| "grad_norm": 0.08200399577617645, | |
| "learning_rate": 0.00019939345435252088, | |
| "loss": 0.6163, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.04182509505703422, | |
| "grad_norm": 0.05708305537700653, | |
| "learning_rate": 0.00019937122098932428, | |
| "loss": 0.6363, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.042458808618504436, | |
| "grad_norm": 0.053468603640794754, | |
| "learning_rate": 0.0001993485887223864, | |
| "loss": 0.4777, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.043092522179974654, | |
| "grad_norm": 0.08539824187755585, | |
| "learning_rate": 0.00019932555764255952, | |
| "loss": 0.4922, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.043726235741444866, | |
| "grad_norm": 0.07483454793691635, | |
| "learning_rate": 0.00019930212784229675, | |
| "loss": 0.6337, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.044359949302915085, | |
| "grad_norm": 0.06771700084209442, | |
| "learning_rate": 0.00019927829941565186, | |
| "loss": 0.4559, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.044993662864385296, | |
| "grad_norm": 0.05689261853694916, | |
| "learning_rate": 0.0001992540724582788, | |
| "loss": 0.5489, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.045627376425855515, | |
| "grad_norm": 0.05044565722346306, | |
| "learning_rate": 0.00019922944706743127, | |
| "loss": 0.4472, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.046261089987325726, | |
| "grad_norm": 0.07331253588199615, | |
| "learning_rate": 0.00019920442334196248, | |
| "loss": 0.4752, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.046894803548795945, | |
| "grad_norm": 0.057449884712696075, | |
| "learning_rate": 0.0001991790013823246, | |
| "loss": 0.4525, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.04752851711026616, | |
| "grad_norm": 0.08357278257608414, | |
| "learning_rate": 0.00019915318129056853, | |
| "loss": 0.5813, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.048162230671736375, | |
| "grad_norm": 0.051311176270246506, | |
| "learning_rate": 0.00019912696317034322, | |
| "loss": 0.4593, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.048795944233206594, | |
| "grad_norm": 0.06535078585147858, | |
| "learning_rate": 0.00019910034712689552, | |
| "loss": 0.5339, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.049429657794676805, | |
| "grad_norm": 0.13796891272068024, | |
| "learning_rate": 0.00019907333326706967, | |
| "loss": 0.5438, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.050063371356147024, | |
| "grad_norm": 0.05667581036686897, | |
| "learning_rate": 0.0001990459216993068, | |
| "loss": 0.6295, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.050697084917617236, | |
| "grad_norm": 0.05243121087551117, | |
| "learning_rate": 0.00019901811253364456, | |
| "loss": 0.4782, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.051330798479087454, | |
| "grad_norm": 0.0769771933555603, | |
| "learning_rate": 0.0001989899058817167, | |
| "loss": 0.5692, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.051964512040557666, | |
| "grad_norm": 0.07334766536951065, | |
| "learning_rate": 0.00019896130185675261, | |
| "loss": 0.569, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.052598225602027884, | |
| "grad_norm": 0.07953603565692902, | |
| "learning_rate": 0.00019893230057357671, | |
| "loss": 0.4059, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.053231939163498096, | |
| "grad_norm": 0.05282806232571602, | |
| "learning_rate": 0.00019890290214860833, | |
| "loss": 0.5186, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.053865652724968315, | |
| "grad_norm": 0.06661225110292435, | |
| "learning_rate": 0.00019887310669986085, | |
| "loss": 0.6404, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.05449936628643853, | |
| "grad_norm": 0.07150626182556152, | |
| "learning_rate": 0.00019884291434694152, | |
| "loss": 0.5865, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.055133079847908745, | |
| "grad_norm": 0.054674554616212845, | |
| "learning_rate": 0.00019881232521105089, | |
| "loss": 0.5429, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.05576679340937896, | |
| "grad_norm": 0.057950377464294434, | |
| "learning_rate": 0.00019878133941498224, | |
| "loss": 0.6705, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.056400506970849175, | |
| "grad_norm": 0.07045155763626099, | |
| "learning_rate": 0.0001987499570831211, | |
| "loss": 0.5393, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.057034220532319393, | |
| "grad_norm": 0.055960092693567276, | |
| "learning_rate": 0.00019871817834144504, | |
| "loss": 0.4481, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.057667934093789605, | |
| "grad_norm": 0.05631652846932411, | |
| "learning_rate": 0.00019868600331752264, | |
| "loss": 0.5963, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.058301647655259824, | |
| "grad_norm": 0.05120407044887543, | |
| "learning_rate": 0.00019865343214051347, | |
| "loss": 0.486, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.058935361216730035, | |
| "grad_norm": 0.05507562682032585, | |
| "learning_rate": 0.0001986204649411673, | |
| "loss": 0.5514, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.059569074778200254, | |
| "grad_norm": 0.057690516114234924, | |
| "learning_rate": 0.0001985871018518236, | |
| "loss": 0.4969, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.060202788339670466, | |
| "grad_norm": 0.05942325294017792, | |
| "learning_rate": 0.00019855334300641114, | |
| "loss": 0.51, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.060836501901140684, | |
| "grad_norm": 0.05777527391910553, | |
| "learning_rate": 0.0001985191885404473, | |
| "loss": 0.5401, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.0614702154626109, | |
| "grad_norm": 0.07077159732580185, | |
| "learning_rate": 0.00019848463859103763, | |
| "loss": 0.5568, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.062103929024081114, | |
| "grad_norm": 0.050649482756853104, | |
| "learning_rate": 0.00019844969329687527, | |
| "loss": 0.5418, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.06273764258555133, | |
| "grad_norm": 0.059522844851017, | |
| "learning_rate": 0.00019841435279824028, | |
| "loss": 0.4679, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.06337135614702155, | |
| "grad_norm": 0.061260003596544266, | |
| "learning_rate": 0.0001983786172369993, | |
| "loss": 0.557, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.06400506970849176, | |
| "grad_norm": 0.0513591468334198, | |
| "learning_rate": 0.00019834248675660486, | |
| "loss": 0.5849, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.06463878326996197, | |
| "grad_norm": 0.06722971051931381, | |
| "learning_rate": 0.0001983059615020947, | |
| "loss": 0.4003, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.06527249683143219, | |
| "grad_norm": 0.0629379004240036, | |
| "learning_rate": 0.0001982690416200914, | |
| "loss": 0.5322, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.06590621039290241, | |
| "grad_norm": 0.05402471870183945, | |
| "learning_rate": 0.00019823172725880165, | |
| "loss": 0.5634, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.06653992395437262, | |
| "grad_norm": 0.15680162608623505, | |
| "learning_rate": 0.0001981940185680156, | |
| "loss": 0.5361, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.06717363751584284, | |
| "grad_norm": 0.06348865479230881, | |
| "learning_rate": 0.00019815591569910654, | |
| "loss": 0.5322, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.06780735107731306, | |
| "grad_norm": 0.05004284158349037, | |
| "learning_rate": 0.00019811741880502995, | |
| "loss": 0.5524, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.06844106463878327, | |
| "grad_norm": 0.06271985173225403, | |
| "learning_rate": 0.00019807852804032305, | |
| "loss": 0.4347, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.06907477820025348, | |
| "grad_norm": 0.1546468287706375, | |
| "learning_rate": 0.00019803924356110423, | |
| "loss": 0.4294, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.0697084917617237, | |
| "grad_norm": 0.06472460180521011, | |
| "learning_rate": 0.00019799956552507233, | |
| "loss": 0.5693, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.07034220532319392, | |
| "grad_norm": 0.06021984666585922, | |
| "learning_rate": 0.00019795949409150598, | |
| "loss": 0.6554, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.07097591888466413, | |
| "grad_norm": 0.04533032327890396, | |
| "learning_rate": 0.00019791902942126313, | |
| "loss": 0.4425, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.07160963244613434, | |
| "grad_norm": 0.0662391185760498, | |
| "learning_rate": 0.0001978781716767802, | |
| "loss": 0.5258, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.07224334600760456, | |
| "grad_norm": 0.06131117045879364, | |
| "learning_rate": 0.00019783692102207155, | |
| "loss": 0.4556, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.07287705956907478, | |
| "grad_norm": 0.07924918830394745, | |
| "learning_rate": 0.00019779527762272877, | |
| "loss": 0.5137, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.07351077313054499, | |
| "grad_norm": 0.07061261683702469, | |
| "learning_rate": 0.0001977532416459201, | |
| "loss": 0.4554, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.0741444866920152, | |
| "grad_norm": 0.04919254407286644, | |
| "learning_rate": 0.00019771081326038962, | |
| "loss": 0.5213, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.07477820025348543, | |
| "grad_norm": 0.053799472749233246, | |
| "learning_rate": 0.00019766799263645673, | |
| "loss": 0.5648, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.07541191381495564, | |
| "grad_norm": 0.06857369095087051, | |
| "learning_rate": 0.00019762477994601522, | |
| "loss": 0.6841, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.07604562737642585, | |
| "grad_norm": 0.0719090923666954, | |
| "learning_rate": 0.000197581175362533, | |
| "loss": 0.4154, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.07667934093789606, | |
| "grad_norm": 0.10528447479009628, | |
| "learning_rate": 0.00019753717906105092, | |
| "loss": 0.5674, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.07731305449936629, | |
| "grad_norm": 0.05879104137420654, | |
| "learning_rate": 0.00019749279121818235, | |
| "loss": 0.5282, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.0779467680608365, | |
| "grad_norm": 0.050949644297361374, | |
| "learning_rate": 0.00019744801201211255, | |
| "loss": 0.4398, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.07858048162230671, | |
| "grad_norm": 0.061247747391462326, | |
| "learning_rate": 0.00019740284162259765, | |
| "loss": 0.4269, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.07921419518377694, | |
| "grad_norm": 0.09446462988853455, | |
| "learning_rate": 0.0001973572802309642, | |
| "loss": 0.6362, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.07984790874524715, | |
| "grad_norm": 0.06124195456504822, | |
| "learning_rate": 0.0001973113280201082, | |
| "loss": 0.435, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.08048162230671736, | |
| "grad_norm": 0.05198049172759056, | |
| "learning_rate": 0.0001972649851744948, | |
| "loss": 0.4617, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.08111533586818757, | |
| "grad_norm": 0.05457935482263565, | |
| "learning_rate": 0.00019721825188015693, | |
| "loss": 0.548, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.0817490494296578, | |
| "grad_norm": 0.054542481899261475, | |
| "learning_rate": 0.0001971711283246951, | |
| "loss": 0.4449, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.08238276299112801, | |
| "grad_norm": 0.0528152696788311, | |
| "learning_rate": 0.0001971236146972764, | |
| "loss": 0.5868, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.08301647655259822, | |
| "grad_norm": 0.049837883561849594, | |
| "learning_rate": 0.0001970757111886337, | |
| "loss": 0.4426, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.08365019011406843, | |
| "grad_norm": 0.04912682995200157, | |
| "learning_rate": 0.00019702741799106508, | |
| "loss": 0.5328, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.08428390367553866, | |
| "grad_norm": 0.06654444336891174, | |
| "learning_rate": 0.00019697873529843282, | |
| "loss": 0.6239, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.08491761723700887, | |
| "grad_norm": 0.1822642683982849, | |
| "learning_rate": 0.00019692966330616283, | |
| "loss": 0.6482, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.08555133079847908, | |
| "grad_norm": 0.07404999434947968, | |
| "learning_rate": 0.00019688020221124376, | |
| "loss": 0.5473, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.08618504435994931, | |
| "grad_norm": 0.08534666895866394, | |
| "learning_rate": 0.00019683035221222618, | |
| "loss": 0.4794, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.08681875792141952, | |
| "grad_norm": 0.05804799869656563, | |
| "learning_rate": 0.00019678011350922185, | |
| "loss": 0.5749, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.08745247148288973, | |
| "grad_norm": 0.0600556954741478, | |
| "learning_rate": 0.00019672948630390294, | |
| "loss": 0.4929, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.08808618504435994, | |
| "grad_norm": 0.07564158737659454, | |
| "learning_rate": 0.00019667847079950118, | |
| "loss": 0.5806, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.08871989860583017, | |
| "grad_norm": 0.06359097361564636, | |
| "learning_rate": 0.00019662706720080693, | |
| "loss": 0.5427, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.08935361216730038, | |
| "grad_norm": 0.05452190712094307, | |
| "learning_rate": 0.00019657527571416856, | |
| "loss": 0.4845, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.08998732572877059, | |
| "grad_norm": 0.05258841812610626, | |
| "learning_rate": 0.00019652309654749156, | |
| "loss": 0.5255, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.09062103929024082, | |
| "grad_norm": 0.06789179146289825, | |
| "learning_rate": 0.0001964705299102376, | |
| "loss": 0.6002, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.09125475285171103, | |
| "grad_norm": 0.05940316617488861, | |
| "learning_rate": 0.00019641757601342378, | |
| "loss": 0.6178, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.09188846641318124, | |
| "grad_norm": 0.08051005005836487, | |
| "learning_rate": 0.00019636423506962181, | |
| "loss": 0.4728, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.09252217997465145, | |
| "grad_norm": 0.06979210674762726, | |
| "learning_rate": 0.00019631050729295707, | |
| "loss": 0.5166, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.09315589353612168, | |
| "grad_norm": 0.04284743592143059, | |
| "learning_rate": 0.00019625639289910777, | |
| "loss": 0.3685, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.09378960709759189, | |
| "grad_norm": 0.05410388484597206, | |
| "learning_rate": 0.00019620189210530425, | |
| "loss": 0.582, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.0944233206590621, | |
| "grad_norm": 0.08875017613172531, | |
| "learning_rate": 0.00019614700513032775, | |
| "loss": 0.6757, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.09505703422053231, | |
| "grad_norm": 0.06792068481445312, | |
| "learning_rate": 0.00019609173219450998, | |
| "loss": 0.5236, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.09569074778200254, | |
| "grad_norm": 0.060000237077474594, | |
| "learning_rate": 0.0001960360735197318, | |
| "loss": 0.4813, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.09632446134347275, | |
| "grad_norm": 0.052172888070344925, | |
| "learning_rate": 0.00019598002932942266, | |
| "loss": 0.5792, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.09695817490494296, | |
| "grad_norm": 0.04992865398526192, | |
| "learning_rate": 0.00019592359984855952, | |
| "loss": 0.4652, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.09759188846641319, | |
| "grad_norm": 0.05908304825425148, | |
| "learning_rate": 0.00019586678530366606, | |
| "loss": 0.4968, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.0982256020278834, | |
| "grad_norm": 0.16080443561077118, | |
| "learning_rate": 0.00019580958592281167, | |
| "loss": 0.4804, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.09885931558935361, | |
| "grad_norm": 0.05863935872912407, | |
| "learning_rate": 0.00019575200193561057, | |
| "loss": 0.5313, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.09949302915082382, | |
| "grad_norm": 0.047341488301754, | |
| "learning_rate": 0.0001956940335732209, | |
| "loss": 0.4939, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.10012674271229405, | |
| "grad_norm": 0.059797484427690506, | |
| "learning_rate": 0.00019563568106834383, | |
| "loss": 0.4806, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.10076045627376426, | |
| "grad_norm": 0.08543235808610916, | |
| "learning_rate": 0.00019557694465522255, | |
| "loss": 0.5691, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.10139416983523447, | |
| "grad_norm": 0.0614972747862339, | |
| "learning_rate": 0.00019551782456964136, | |
| "loss": 0.5143, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.10202788339670468, | |
| "grad_norm": 0.12742456793785095, | |
| "learning_rate": 0.00019545832104892475, | |
| "loss": 0.4987, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.10266159695817491, | |
| "grad_norm": 0.06898955255746841, | |
| "learning_rate": 0.00019539843433193639, | |
| "loss": 0.5504, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.10329531051964512, | |
| "grad_norm": 0.11239788681268692, | |
| "learning_rate": 0.0001953381646590783, | |
| "loss": 0.3448, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.10392902408111533, | |
| "grad_norm": 0.24028901755809784, | |
| "learning_rate": 0.00019527751227228963, | |
| "loss": 0.5294, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.10456273764258556, | |
| "grad_norm": 0.0903674066066742, | |
| "learning_rate": 0.00019521647741504604, | |
| "loss": 0.514, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.10519645120405577, | |
| "grad_norm": 0.051598865538835526, | |
| "learning_rate": 0.00019515506033235833, | |
| "loss": 0.4771, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.10583016476552598, | |
| "grad_norm": 0.05018608644604683, | |
| "learning_rate": 0.0001950932612707719, | |
| "loss": 0.4492, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.10646387832699619, | |
| "grad_norm": 0.07150580734014511, | |
| "learning_rate": 0.00019503108047836523, | |
| "loss": 0.5806, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.10709759188846642, | |
| "grad_norm": 0.05979820713400841, | |
| "learning_rate": 0.00019496851820474944, | |
| "loss": 0.6138, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.10773130544993663, | |
| "grad_norm": 0.05117090418934822, | |
| "learning_rate": 0.00019490557470106686, | |
| "loss": 0.5138, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.10836501901140684, | |
| "grad_norm": 0.049405183643102646, | |
| "learning_rate": 0.0001948422502199903, | |
| "loss": 0.4974, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.10899873257287707, | |
| "grad_norm": 0.060524292290210724, | |
| "learning_rate": 0.00019477854501572176, | |
| "loss": 0.5448, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.10963244613434728, | |
| "grad_norm": 0.05022512748837471, | |
| "learning_rate": 0.0001947144593439917, | |
| "loss": 0.5295, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.11026615969581749, | |
| "grad_norm": 0.05024838447570801, | |
| "learning_rate": 0.0001946499934620579, | |
| "loss": 0.4842, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.1108998732572877, | |
| "grad_norm": 0.05859989672899246, | |
| "learning_rate": 0.00019458514762870426, | |
| "loss": 0.5105, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.11153358681875793, | |
| "grad_norm": 0.05963319167494774, | |
| "learning_rate": 0.00019451992210424006, | |
| "loss": 0.4833, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.11216730038022814, | |
| "grad_norm": 0.05941782146692276, | |
| "learning_rate": 0.0001944543171504987, | |
| "loss": 0.4743, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.11280101394169835, | |
| "grad_norm": 0.07598856091499329, | |
| "learning_rate": 0.00019438833303083678, | |
| "loss": 0.483, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.11343472750316856, | |
| "grad_norm": 0.05751622095704079, | |
| "learning_rate": 0.0001943219700101328, | |
| "loss": 0.563, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.11406844106463879, | |
| "grad_norm": 0.08273158222436905, | |
| "learning_rate": 0.0001942552283547865, | |
| "loss": 0.5514, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.114702154626109, | |
| "grad_norm": 0.04589926823973656, | |
| "learning_rate": 0.00019418810833271745, | |
| "loss": 0.4353, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.11533586818757921, | |
| "grad_norm": 0.04818568378686905, | |
| "learning_rate": 0.00019412061021336404, | |
| "loss": 0.4653, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.11596958174904944, | |
| "grad_norm": 0.062292054295539856, | |
| "learning_rate": 0.0001940527342676826, | |
| "loss": 0.5451, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.11660329531051965, | |
| "grad_norm": 0.05161510780453682, | |
| "learning_rate": 0.000193984480768146, | |
| "loss": 0.5174, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.11723700887198986, | |
| "grad_norm": 0.0669926106929779, | |
| "learning_rate": 0.0001939158499887428, | |
| "loss": 0.5074, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.11787072243346007, | |
| "grad_norm": 0.04856441915035248, | |
| "learning_rate": 0.00019384684220497605, | |
| "loss": 0.3898, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.1185044359949303, | |
| "grad_norm": 0.05841194465756416, | |
| "learning_rate": 0.0001937774576938622, | |
| "loss": 0.5437, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.11913814955640051, | |
| "grad_norm": 0.05253444239497185, | |
| "learning_rate": 0.00019370769673393007, | |
| "loss": 0.5669, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.11977186311787072, | |
| "grad_norm": 0.05771539360284805, | |
| "learning_rate": 0.00019363755960521943, | |
| "loss": 0.4965, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.12040557667934093, | |
| "grad_norm": 0.07135152071714401, | |
| "learning_rate": 0.00019356704658928035, | |
| "loss": 0.4089, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.12103929024081116, | |
| "grad_norm": 0.05927246809005737, | |
| "learning_rate": 0.00019349615796917163, | |
| "loss": 0.465, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.12167300380228137, | |
| "grad_norm": 0.06522128731012344, | |
| "learning_rate": 0.00019342489402945998, | |
| "loss": 0.3797, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.12230671736375158, | |
| "grad_norm": 0.05745214596390724, | |
| "learning_rate": 0.0001933532550562187, | |
| "loss": 0.56, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.1229404309252218, | |
| "grad_norm": 0.05626146122813225, | |
| "learning_rate": 0.0001932812413370265, | |
| "loss": 0.5439, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.12357414448669202, | |
| "grad_norm": 0.07615689933300018, | |
| "learning_rate": 0.00019320885316096654, | |
| "loss": 0.5187, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.12420785804816223, | |
| "grad_norm": 0.19566097855567932, | |
| "learning_rate": 0.00019313609081862508, | |
| "loss": 0.5535, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.12484157160963244, | |
| "grad_norm": 0.052284326404333115, | |
| "learning_rate": 0.00019306295460209044, | |
| "loss": 0.4056, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.12547528517110265, | |
| "grad_norm": 0.050081610679626465, | |
| "learning_rate": 0.00019298944480495176, | |
| "loss": 0.451, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.12610899873257286, | |
| "grad_norm": 0.07420384138822556, | |
| "learning_rate": 0.00019291556172229785, | |
| "loss": 0.5485, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.1267427122940431, | |
| "grad_norm": 0.046289846301078796, | |
| "learning_rate": 0.00019284130565071588, | |
| "loss": 0.4944, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.12737642585551331, | |
| "grad_norm": 0.041031207889318466, | |
| "learning_rate": 0.00019276667688829043, | |
| "loss": 0.4507, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.12801013941698353, | |
| "grad_norm": 0.07089229673147202, | |
| "learning_rate": 0.0001926916757346022, | |
| "loss": 0.513, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.12864385297845374, | |
| "grad_norm": 0.04405022785067558, | |
| "learning_rate": 0.00019261630249072659, | |
| "loss": 0.3709, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.12927756653992395, | |
| "grad_norm": 0.059661708772182465, | |
| "learning_rate": 0.00019254055745923285, | |
| "loss": 0.4813, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.12991128010139416, | |
| "grad_norm": 0.07400868833065033, | |
| "learning_rate": 0.00019246444094418255, | |
| "loss": 0.5346, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.13054499366286437, | |
| "grad_norm": 0.05862591415643692, | |
| "learning_rate": 0.0001923879532511287, | |
| "loss": 0.4856, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.1311787072243346, | |
| "grad_norm": 0.05793355405330658, | |
| "learning_rate": 0.00019231109468711405, | |
| "loss": 0.5129, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.13181242078580482, | |
| "grad_norm": 0.043961625546216965, | |
| "learning_rate": 0.00019223386556067033, | |
| "loss": 0.4803, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.13244613434727504, | |
| "grad_norm": 0.07102088630199432, | |
| "learning_rate": 0.00019215626618181676, | |
| "loss": 0.5078, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.13307984790874525, | |
| "grad_norm": 0.07707204669713974, | |
| "learning_rate": 0.00019207829686205882, | |
| "loss": 0.5465, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.13371356147021546, | |
| "grad_norm": 0.06010926514863968, | |
| "learning_rate": 0.0001919999579143871, | |
| "loss": 0.5532, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.13434727503168567, | |
| "grad_norm": 0.0627330020070076, | |
| "learning_rate": 0.0001919212496532759, | |
| "loss": 0.4055, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.13498098859315588, | |
| "grad_norm": 0.04347623884677887, | |
| "learning_rate": 0.00019184217239468212, | |
| "loss": 0.4581, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.13561470215462612, | |
| "grad_norm": 0.05672100558876991, | |
| "learning_rate": 0.00019176272645604386, | |
| "loss": 0.5335, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.13624841571609633, | |
| "grad_norm": 0.05062992498278618, | |
| "learning_rate": 0.00019168291215627926, | |
| "loss": 0.4801, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.13688212927756654, | |
| "grad_norm": 8.16939640045166, | |
| "learning_rate": 0.00019160272981578512, | |
| "loss": 0.5814, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.13751584283903676, | |
| "grad_norm": 0.058165278285741806, | |
| "learning_rate": 0.00019152217975643566, | |
| "loss": 0.5163, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.13814955640050697, | |
| "grad_norm": 0.06994735449552536, | |
| "learning_rate": 0.00019144126230158127, | |
| "loss": 0.5558, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.13878326996197718, | |
| "grad_norm": 0.05495104938745499, | |
| "learning_rate": 0.0001913599777760471, | |
| "loss": 0.5298, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.1394169835234474, | |
| "grad_norm": 0.060677338391542435, | |
| "learning_rate": 0.00019127832650613189, | |
| "loss": 0.5614, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.14005069708491763, | |
| "grad_norm": 0.060457441955804825, | |
| "learning_rate": 0.00019119630881960658, | |
| "loss": 0.5139, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.14068441064638784, | |
| "grad_norm": 0.0608784481883049, | |
| "learning_rate": 0.00019111392504571296, | |
| "loss": 0.4711, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.14131812420785805, | |
| "grad_norm": 0.07560902833938599, | |
| "learning_rate": 0.00019103117551516244, | |
| "loss": 0.486, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.14195183776932827, | |
| "grad_norm": 0.0847187414765358, | |
| "learning_rate": 0.00019094806056013468, | |
| "loss": 0.5934, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.14258555133079848, | |
| "grad_norm": 0.06016870215535164, | |
| "learning_rate": 0.00019086458051427622, | |
| "loss": 0.4529, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.1432192648922687, | |
| "grad_norm": 0.17245864868164062, | |
| "learning_rate": 0.00019078073571269922, | |
| "loss": 0.5307, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.1438529784537389, | |
| "grad_norm": 0.0647033080458641, | |
| "learning_rate": 0.00019069652649198005, | |
| "loss": 0.569, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.1444866920152091, | |
| "grad_norm": 0.07447489351034164, | |
| "learning_rate": 0.00019061195319015797, | |
| "loss": 0.547, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.14512040557667935, | |
| "grad_norm": 0.05335066467523575, | |
| "learning_rate": 0.00019052701614673373, | |
| "loss": 0.5363, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.14575411913814956, | |
| "grad_norm": 0.04057115688920021, | |
| "learning_rate": 0.0001904417157026683, | |
| "loss": 0.4354, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.14638783269961977, | |
| "grad_norm": 0.05564083158969879, | |
| "learning_rate": 0.00019035605220038137, | |
| "loss": 0.5674, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.14702154626108999, | |
| "grad_norm": 0.1210884302854538, | |
| "learning_rate": 0.00019027002598375012, | |
| "loss": 0.5645, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.1476552598225602, | |
| "grad_norm": 0.05494518578052521, | |
| "learning_rate": 0.00019018363739810767, | |
| "loss": 0.6239, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.1482889733840304, | |
| "grad_norm": 0.04633218050003052, | |
| "learning_rate": 0.0001900968867902419, | |
| "loss": 0.4787, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.14892268694550062, | |
| "grad_norm": 0.06846950203180313, | |
| "learning_rate": 0.00019000977450839393, | |
| "loss": 0.5607, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.14955640050697086, | |
| "grad_norm": 0.0618814192712307, | |
| "learning_rate": 0.0001899223009022566, | |
| "loss": 0.631, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.15019011406844107, | |
| "grad_norm": 0.06061235070228577, | |
| "learning_rate": 0.00018983446632297343, | |
| "loss": 0.5989, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.15082382762991128, | |
| "grad_norm": 0.06494279205799103, | |
| "learning_rate": 0.00018974627112313677, | |
| "loss": 0.5816, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.1514575411913815, | |
| "grad_norm": 0.04907020181417465, | |
| "learning_rate": 0.0001896577156567868, | |
| "loss": 0.5097, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.1520912547528517, | |
| "grad_norm": 0.04682941362261772, | |
| "learning_rate": 0.00018956880027940967, | |
| "loss": 0.5828, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.15272496831432192, | |
| "grad_norm": 0.05498978868126869, | |
| "learning_rate": 0.00018947952534793661, | |
| "loss": 0.5257, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.15335868187579213, | |
| "grad_norm": 0.04309950768947601, | |
| "learning_rate": 0.00018938989122074197, | |
| "loss": 0.3662, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.15399239543726237, | |
| "grad_norm": 0.06519515067338943, | |
| "learning_rate": 0.00018929989825764207, | |
| "loss": 0.4058, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.15462610899873258, | |
| "grad_norm": 0.046929214149713516, | |
| "learning_rate": 0.00018920954681989378, | |
| "loss": 0.4916, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.1552598225602028, | |
| "grad_norm": 0.05388319492340088, | |
| "learning_rate": 0.00018911883727019285, | |
| "loss": 0.4143, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.155893536121673, | |
| "grad_norm": 0.05619863048195839, | |
| "learning_rate": 0.00018902776997267268, | |
| "loss": 0.5107, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.15652724968314322, | |
| "grad_norm": 0.053882747888565063, | |
| "learning_rate": 0.00018893634529290279, | |
| "loss": 0.5559, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.15716096324461343, | |
| "grad_norm": 0.05231885239481926, | |
| "learning_rate": 0.00018884456359788724, | |
| "loss": 0.5076, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.15779467680608364, | |
| "grad_norm": 0.07149146497249603, | |
| "learning_rate": 0.00018875242525606334, | |
| "loss": 0.558, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.15842839036755388, | |
| "grad_norm": 0.04615316912531853, | |
| "learning_rate": 0.00018865993063730004, | |
| "loss": 0.4971, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.1590621039290241, | |
| "grad_norm": 0.05331886187195778, | |
| "learning_rate": 0.00018856708011289643, | |
| "loss": 0.5506, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.1596958174904943, | |
| "grad_norm": 0.05348580330610275, | |
| "learning_rate": 0.00018847387405558045, | |
| "loss": 0.4515, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.1603295310519645, | |
| "grad_norm": 0.0438147634267807, | |
| "learning_rate": 0.00018838031283950705, | |
| "loss": 0.3818, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.16096324461343473, | |
| "grad_norm": 0.0473354198038578, | |
| "learning_rate": 0.0001882863968402571, | |
| "loss": 0.4458, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.16159695817490494, | |
| "grad_norm": 0.05930502712726593, | |
| "learning_rate": 0.0001881921264348355, | |
| "loss": 0.6228, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.16223067173637515, | |
| "grad_norm": 0.04982107877731323, | |
| "learning_rate": 0.00018809750200166994, | |
| "loss": 0.5916, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.1628643852978454, | |
| "grad_norm": 0.09739918261766434, | |
| "learning_rate": 0.0001880025239206092, | |
| "loss": 0.651, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.1634980988593156, | |
| "grad_norm": 0.09072676301002502, | |
| "learning_rate": 0.00018790719257292174, | |
| "loss": 0.5564, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.1641318124207858, | |
| "grad_norm": 0.0638791099190712, | |
| "learning_rate": 0.00018781150834129413, | |
| "loss": 0.4545, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.16476552598225602, | |
| "grad_norm": 0.05755198001861572, | |
| "learning_rate": 0.0001877154716098295, | |
| "loss": 0.4457, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.16539923954372623, | |
| "grad_norm": 0.2049247920513153, | |
| "learning_rate": 0.00018761908276404603, | |
| "loss": 0.5447, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.16603295310519645, | |
| "grad_norm": 0.06760350614786148, | |
| "learning_rate": 0.00018752234219087538, | |
| "loss": 0.4743, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.16666666666666666, | |
| "grad_norm": 0.061410121619701385, | |
| "learning_rate": 0.00018742525027866115, | |
| "loss": 0.547, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.16730038022813687, | |
| "grad_norm": 0.04981521889567375, | |
| "learning_rate": 0.00018732780741715724, | |
| "loss": 0.4924, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.1679340937896071, | |
| "grad_norm": 0.06636273115873337, | |
| "learning_rate": 0.00018723001399752653, | |
| "loss": 0.591, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.16856780735107732, | |
| "grad_norm": 0.0517747662961483, | |
| "learning_rate": 0.00018713187041233896, | |
| "loss": 0.5294, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.16920152091254753, | |
| "grad_norm": 0.11798780411481857, | |
| "learning_rate": 0.00018703337705557017, | |
| "loss": 0.4953, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.16983523447401774, | |
| "grad_norm": 0.1441587656736374, | |
| "learning_rate": 0.00018693453432259998, | |
| "loss": 0.4898, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.17046894803548795, | |
| "grad_norm": 0.06387986242771149, | |
| "learning_rate": 0.00018683534261021057, | |
| "loss": 0.4663, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.17110266159695817, | |
| "grad_norm": 0.05943833664059639, | |
| "learning_rate": 0.0001867358023165851, | |
| "loss": 0.5607, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.17173637515842838, | |
| "grad_norm": 0.05011943355202675, | |
| "learning_rate": 0.00018663591384130606, | |
| "loss": 0.5297, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.17237008871989862, | |
| "grad_norm": 0.059131983667612076, | |
| "learning_rate": 0.00018653567758535354, | |
| "loss": 0.4896, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.17300380228136883, | |
| "grad_norm": 0.06053609773516655, | |
| "learning_rate": 0.0001864350939511038, | |
| "loss": 0.5446, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.17363751584283904, | |
| "grad_norm": 0.05496980994939804, | |
| "learning_rate": 0.00018633416334232753, | |
| "loss": 0.5427, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.17427122940430925, | |
| "grad_norm": 0.05304751545190811, | |
| "learning_rate": 0.0001862328861641883, | |
| "loss": 0.4189, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.17490494296577946, | |
| "grad_norm": 0.04881710559129715, | |
| "learning_rate": 0.00018613126282324092, | |
| "loss": 0.4555, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.17553865652724968, | |
| "grad_norm": 0.051984284073114395, | |
| "learning_rate": 0.0001860292937274297, | |
| "loss": 0.5282, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.1761723700887199, | |
| "grad_norm": 0.05241424962878227, | |
| "learning_rate": 0.00018592697928608703, | |
| "loss": 0.4924, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.17680608365019013, | |
| "grad_norm": 0.04947778955101967, | |
| "learning_rate": 0.00018582431990993151, | |
| "loss": 0.4867, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.17743979721166034, | |
| "grad_norm": 0.04952229931950569, | |
| "learning_rate": 0.00018572131601106654, | |
| "loss": 0.4362, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.17807351077313055, | |
| "grad_norm": 0.061900023370981216, | |
| "learning_rate": 0.00018561796800297832, | |
| "loss": 0.6342, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.17870722433460076, | |
| "grad_norm": 0.04405650496482849, | |
| "learning_rate": 0.00018551427630053463, | |
| "loss": 0.4612, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.17934093789607097, | |
| "grad_norm": 0.5723605155944824, | |
| "learning_rate": 0.00018541024131998274, | |
| "loss": 0.4917, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.17997465145754118, | |
| "grad_norm": 0.07066962867975235, | |
| "learning_rate": 0.0001853058634789481, | |
| "loss": 0.5386, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.1806083650190114, | |
| "grad_norm": 0.041575830429792404, | |
| "learning_rate": 0.00018520114319643235, | |
| "loss": 0.4894, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.18124207858048164, | |
| "grad_norm": 0.07731833308935165, | |
| "learning_rate": 0.0001850960808928119, | |
| "loss": 0.5382, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.18187579214195185, | |
| "grad_norm": 0.05468999221920967, | |
| "learning_rate": 0.00018499067698983605, | |
| "loss": 0.4514, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.18250950570342206, | |
| "grad_norm": 0.04942842200398445, | |
| "learning_rate": 0.00018488493191062542, | |
| "loss": 0.4329, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.18314321926489227, | |
| "grad_norm": 0.053615666925907135, | |
| "learning_rate": 0.0001847788460796702, | |
| "loss": 0.5182, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.18377693282636248, | |
| "grad_norm": 0.04232574254274368, | |
| "learning_rate": 0.00018467241992282843, | |
| "loss": 0.3108, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.1844106463878327, | |
| "grad_norm": 0.04795556515455246, | |
| "learning_rate": 0.00018456565386732433, | |
| "loss": 0.383, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.1850443599493029, | |
| "grad_norm": 0.053252723067998886, | |
| "learning_rate": 0.00018445854834174655, | |
| "loss": 0.4597, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.18567807351077312, | |
| "grad_norm": 0.044747479259967804, | |
| "learning_rate": 0.00018435110377604654, | |
| "loss": 0.5066, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.18631178707224336, | |
| "grad_norm": 0.0473531037569046, | |
| "learning_rate": 0.00018424332060153664, | |
| "loss": 0.4258, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.18694550063371357, | |
| "grad_norm": 0.05739828571677208, | |
| "learning_rate": 0.0001841351992508885, | |
| "loss": 0.4498, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.18757921419518378, | |
| "grad_norm": 0.0635855570435524, | |
| "learning_rate": 0.0001840267401581314, | |
| "loss": 0.5368, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.188212927756654, | |
| "grad_norm": 0.05470935255289078, | |
| "learning_rate": 0.00018391794375865024, | |
| "loss": 0.5367, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.1888466413181242, | |
| "grad_norm": 0.04850434139370918, | |
| "learning_rate": 0.00018380881048918405, | |
| "loss": 0.5369, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.18948035487959441, | |
| "grad_norm": 0.1420743763446808, | |
| "learning_rate": 0.00018369934078782426, | |
| "loss": 0.5101, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.19011406844106463, | |
| "grad_norm": 0.0749795064330101, | |
| "learning_rate": 0.00018358953509401262, | |
| "loss": 0.5756, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.19074778200253487, | |
| "grad_norm": 0.05331069603562355, | |
| "learning_rate": 0.00018347939384853978, | |
| "loss": 0.5759, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.19138149556400508, | |
| "grad_norm": 0.05981903895735741, | |
| "learning_rate": 0.00018336891749354335, | |
| "loss": 0.6036, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.1920152091254753, | |
| "grad_norm": 0.08048289269208908, | |
| "learning_rate": 0.00018325810647250616, | |
| "loss": 0.4424, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.1926489226869455, | |
| "grad_norm": 0.07861804962158203, | |
| "learning_rate": 0.00018314696123025454, | |
| "loss": 0.5725, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.1932826362484157, | |
| "grad_norm": 0.14672251045703888, | |
| "learning_rate": 0.0001830354822129564, | |
| "loss": 0.5068, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.19391634980988592, | |
| "grad_norm": 0.06640765070915222, | |
| "learning_rate": 0.0001829236698681195, | |
| "loss": 0.585, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.19455006337135614, | |
| "grad_norm": 0.0588274821639061, | |
| "learning_rate": 0.0001828115246445898, | |
| "loss": 0.5779, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.19518377693282637, | |
| "grad_norm": 0.05600736290216446, | |
| "learning_rate": 0.0001826990469925494, | |
| "loss": 0.5216, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.1958174904942966, | |
| "grad_norm": 0.052844930440187454, | |
| "learning_rate": 0.0001825862373635149, | |
| "loss": 0.5482, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.1964512040557668, | |
| "grad_norm": 0.04969317838549614, | |
| "learning_rate": 0.0001824730962103356, | |
| "loss": 0.5928, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.197084917617237, | |
| "grad_norm": 0.06168043613433838, | |
| "learning_rate": 0.00018235962398719147, | |
| "loss": 0.5185, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.19771863117870722, | |
| "grad_norm": 0.051151130348443985, | |
| "learning_rate": 0.00018224582114959172, | |
| "loss": 0.4677, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.19835234474017743, | |
| "grad_norm": 0.060467127710580826, | |
| "learning_rate": 0.00018213168815437255, | |
| "loss": 0.5566, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.19898605830164764, | |
| "grad_norm": 0.043170325458049774, | |
| "learning_rate": 0.0001820172254596956, | |
| "loss": 0.489, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.19961977186311788, | |
| "grad_norm": 0.06550537794828415, | |
| "learning_rate": 0.00018190243352504597, | |
| "loss": 0.5809, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.2002534854245881, | |
| "grad_norm": 0.04956373944878578, | |
| "learning_rate": 0.00018178731281123044, | |
| "loss": 0.462, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.2008871989860583, | |
| "grad_norm": 0.05908495932817459, | |
| "learning_rate": 0.00018167186378037563, | |
| "loss": 0.4611, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.20152091254752852, | |
| "grad_norm": 0.047168437391519547, | |
| "learning_rate": 0.00018155608689592604, | |
| "loss": 0.5283, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.20215462610899873, | |
| "grad_norm": 0.04968830570578575, | |
| "learning_rate": 0.00018143998262264233, | |
| "loss": 0.4982, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.20278833967046894, | |
| "grad_norm": 0.06764087826013565, | |
| "learning_rate": 0.00018132355142659937, | |
| "loss": 0.5244, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.20342205323193915, | |
| "grad_norm": 0.06344570964574814, | |
| "learning_rate": 0.0001812067937751844, | |
| "loss": 0.606, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.20405576679340937, | |
| "grad_norm": 0.06029113009572029, | |
| "learning_rate": 0.0001810897101370951, | |
| "loss": 0.5407, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.2046894803548796, | |
| "grad_norm": 0.08346560597419739, | |
| "learning_rate": 0.00018097230098233785, | |
| "loss": 0.4814, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.20532319391634982, | |
| "grad_norm": 0.04595065116882324, | |
| "learning_rate": 0.00018085456678222558, | |
| "loss": 0.471, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.20595690747782003, | |
| "grad_norm": 0.4050588309764862, | |
| "learning_rate": 0.00018073650800937624, | |
| "loss": 0.4586, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.20659062103929024, | |
| "grad_norm": 0.055679477751255035, | |
| "learning_rate": 0.00018061812513771053, | |
| "loss": 0.516, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.20722433460076045, | |
| "grad_norm": 0.05209626257419586, | |
| "learning_rate": 0.00018049941864245033, | |
| "loss": 0.4528, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.20785804816223066, | |
| "grad_norm": 0.05503727123141289, | |
| "learning_rate": 0.00018038038900011652, | |
| "loss": 0.4297, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.20849176172370087, | |
| "grad_norm": 0.05453247204422951, | |
| "learning_rate": 0.0001802610366885271, | |
| "loss": 0.4731, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.20912547528517111, | |
| "grad_norm": 0.05371938645839691, | |
| "learning_rate": 0.00018014136218679567, | |
| "loss": 0.569, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.20975918884664133, | |
| "grad_norm": 0.05164814740419388, | |
| "learning_rate": 0.0001800213659753289, | |
| "loss": 0.4883, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.21039290240811154, | |
| "grad_norm": 0.06455442309379578, | |
| "learning_rate": 0.00017990104853582493, | |
| "loss": 0.4829, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.21102661596958175, | |
| "grad_norm": 0.04764432832598686, | |
| "learning_rate": 0.0001797804103512715, | |
| "loss": 0.5525, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.21166032953105196, | |
| "grad_norm": 0.0578368604183197, | |
| "learning_rate": 0.00017965945190594388, | |
| "loss": 0.4824, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.21229404309252217, | |
| "grad_norm": 0.05196613445878029, | |
| "learning_rate": 0.00017953817368540292, | |
| "loss": 0.5036, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.21292775665399238, | |
| "grad_norm": 0.044868264347314835, | |
| "learning_rate": 0.00017941657617649316, | |
| "loss": 0.36, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.21356147021546262, | |
| "grad_norm": 0.0686643123626709, | |
| "learning_rate": 0.00017929465986734084, | |
| "loss": 0.6069, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.21419518377693283, | |
| "grad_norm": 0.08286602050065994, | |
| "learning_rate": 0.000179172425247352, | |
| "loss": 0.5635, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.21482889733840305, | |
| "grad_norm": 0.5979371070861816, | |
| "learning_rate": 0.00017904987280721035, | |
| "loss": 0.3994, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.21546261089987326, | |
| "grad_norm": 0.05577315390110016, | |
| "learning_rate": 0.00017892700303887558, | |
| "loss": 0.5699, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.21609632446134347, | |
| "grad_norm": 0.06650438159704208, | |
| "learning_rate": 0.0001788038164355811, | |
| "loss": 0.5557, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.21673003802281368, | |
| "grad_norm": 0.06644187867641449, | |
| "learning_rate": 0.00017868031349183217, | |
| "loss": 0.5593, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.2173637515842839, | |
| "grad_norm": 0.05286836251616478, | |
| "learning_rate": 0.00017855649470340413, | |
| "loss": 0.4902, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.21799746514575413, | |
| "grad_norm": 0.05314694344997406, | |
| "learning_rate": 0.00017843236056733992, | |
| "loss": 0.5036, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.21863117870722434, | |
| "grad_norm": 0.0668027251958847, | |
| "learning_rate": 0.0001783079115819486, | |
| "loss": 0.6198, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.21926489226869456, | |
| "grad_norm": 0.04909252002835274, | |
| "learning_rate": 0.000178183148246803, | |
| "loss": 0.4273, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.21989860583016477, | |
| "grad_norm": 0.053546786308288574, | |
| "learning_rate": 0.00017805807106273787, | |
| "loss": 0.5077, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.22053231939163498, | |
| "grad_norm": 0.0647466629743576, | |
| "learning_rate": 0.00017793268053184786, | |
| "loss": 0.5262, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.2211660329531052, | |
| "grad_norm": 0.05518212169408798, | |
| "learning_rate": 0.00017780697715748546, | |
| "loss": 0.5621, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.2217997465145754, | |
| "grad_norm": 0.0661974772810936, | |
| "learning_rate": 0.00017768096144425902, | |
| "loss": 0.5727, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.2224334600760456, | |
| "grad_norm": 0.09333747625350952, | |
| "learning_rate": 0.00017755463389803065, | |
| "loss": 0.4891, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.22306717363751585, | |
| "grad_norm": 0.04791216179728508, | |
| "learning_rate": 0.0001774279950259143, | |
| "loss": 0.5569, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.22370088719898606, | |
| "grad_norm": 0.05712969973683357, | |
| "learning_rate": 0.0001773010453362737, | |
| "loss": 0.5433, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.22433460076045628, | |
| "grad_norm": 0.05735623091459274, | |
| "learning_rate": 0.00017717378533872017, | |
| "loss": 0.5702, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.2249683143219265, | |
| "grad_norm": 0.05040268227458, | |
| "learning_rate": 0.00017704621554411084, | |
| "loss": 0.4964, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.2256020278833967, | |
| "grad_norm": 0.04687810316681862, | |
| "learning_rate": 0.00017691833646454628, | |
| "loss": 0.5242, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.2262357414448669, | |
| "grad_norm": 0.051406193524599075, | |
| "learning_rate": 0.00017679014861336878, | |
| "loss": 0.5146, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.22686945500633712, | |
| "grad_norm": 0.04884679988026619, | |
| "learning_rate": 0.00017666165250516006, | |
| "loss": 0.4825, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.22750316856780736, | |
| "grad_norm": 0.053725842386484146, | |
| "learning_rate": 0.0001765328486557392, | |
| "loss": 0.4932, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.22813688212927757, | |
| "grad_norm": 0.06212908402085304, | |
| "learning_rate": 0.00017640373758216077, | |
| "loss": 0.506, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.22877059569074779, | |
| "grad_norm": 0.05059286579489708, | |
| "learning_rate": 0.0001762743198027125, | |
| "loss": 0.4719, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.229404309252218, | |
| "grad_norm": 0.04520050436258316, | |
| "learning_rate": 0.00017614459583691346, | |
| "loss": 0.4553, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.2300380228136882, | |
| "grad_norm": 0.05503036454319954, | |
| "learning_rate": 0.0001760145662055117, | |
| "loss": 0.4706, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.23067173637515842, | |
| "grad_norm": 0.046107854694128036, | |
| "learning_rate": 0.00017588423143048235, | |
| "loss": 0.4177, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.23130544993662863, | |
| "grad_norm": 0.12301266193389893, | |
| "learning_rate": 0.0001757535920350255, | |
| "loss": 0.5922, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.23193916349809887, | |
| "grad_norm": 1.179470419883728, | |
| "learning_rate": 0.00017562264854356405, | |
| "loss": 0.5123, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.23257287705956908, | |
| "grad_norm": 0.11167129874229431, | |
| "learning_rate": 0.0001754914014817416, | |
| "loss": 0.3884, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.2332065906210393, | |
| "grad_norm": 0.055067550390958786, | |
| "learning_rate": 0.00017535985137642044, | |
| "loss": 0.4544, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.2338403041825095, | |
| "grad_norm": 0.07947530597448349, | |
| "learning_rate": 0.0001752279987556792, | |
| "loss": 0.6575, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.23447401774397972, | |
| "grad_norm": 0.10236025601625443, | |
| "learning_rate": 0.00017509584414881113, | |
| "loss": 0.5334, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.23510773130544993, | |
| "grad_norm": 0.12996040284633636, | |
| "learning_rate": 0.00017496338808632155, | |
| "loss": 0.3897, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.23574144486692014, | |
| "grad_norm": 0.07005209475755692, | |
| "learning_rate": 0.00017483063109992596, | |
| "loss": 0.5077, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.23637515842839038, | |
| "grad_norm": 0.04446430131793022, | |
| "learning_rate": 0.00017469757372254785, | |
| "loss": 0.4467, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.2370088719898606, | |
| "grad_norm": 6.105027198791504, | |
| "learning_rate": 0.00017456421648831655, | |
| "loss": 1.722, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.2376425855513308, | |
| "grad_norm": 0.07488813251256943, | |
| "learning_rate": 0.0001744305599325652, | |
| "loss": 0.7018, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.23827629911280102, | |
| "grad_norm": 0.05676595866680145, | |
| "learning_rate": 0.00017429660459182834, | |
| "loss": 0.4865, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.23891001267427123, | |
| "grad_norm": 0.058106616139411926, | |
| "learning_rate": 0.00017416235100384007, | |
| "loss": 0.4453, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.23954372623574144, | |
| "grad_norm": 0.4252207577228546, | |
| "learning_rate": 0.00017402779970753155, | |
| "loss": 3.008, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.24017743979721165, | |
| "grad_norm": 0.24036817252635956, | |
| "learning_rate": 0.00017389295124302923, | |
| "loss": 0.7246, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.24081115335868186, | |
| "grad_norm": 4.316144943237305, | |
| "learning_rate": 0.00017375780615165235, | |
| "loss": 0.664, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.2414448669201521, | |
| "grad_norm": 6.4877166748046875, | |
| "learning_rate": 0.00017362236497591094, | |
| "loss": 0.487, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.2420785804816223, | |
| "grad_norm": 0.12358918786048889, | |
| "learning_rate": 0.00017348662825950357, | |
| "loss": 0.4839, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.24271229404309252, | |
| "grad_norm": 0.7211472988128662, | |
| "learning_rate": 0.0001733505965473152, | |
| "loss": 0.6351, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.24334600760456274, | |
| "grad_norm": 0.10177785158157349, | |
| "learning_rate": 0.00017321427038541494, | |
| "loss": 0.6043, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.24397972116603295, | |
| "grad_norm": 0.054658226668834686, | |
| "learning_rate": 0.00017307765032105406, | |
| "loss": 0.473, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.24461343472750316, | |
| "grad_norm": 0.10075858235359192, | |
| "learning_rate": 0.00017294073690266344, | |
| "loss": 0.4892, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.24524714828897337, | |
| "grad_norm": 0.06497970223426819, | |
| "learning_rate": 0.00017280353067985167, | |
| "loss": 0.4986, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.2458808618504436, | |
| "grad_norm": 0.7542481422424316, | |
| "learning_rate": 0.0001726660322034027, | |
| "loss": 0.5513, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.24651457541191382, | |
| "grad_norm": 0.08190987259149551, | |
| "learning_rate": 0.00017252824202527376, | |
| "loss": 0.5077, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.24714828897338403, | |
| "grad_norm": 0.08874624967575073, | |
| "learning_rate": 0.0001723901606985929, | |
| "loss": 0.3973, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.24778200253485425, | |
| "grad_norm": 0.32968223094940186, | |
| "learning_rate": 0.00017225178877765704, | |
| "loss": 0.4411, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.24841571609632446, | |
| "grad_norm": 0.39434677362442017, | |
| "learning_rate": 0.00017211312681792958, | |
| "loss": 0.5201, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.24904942965779467, | |
| "grad_norm": 0.11154969036579132, | |
| "learning_rate": 0.00017197417537603827, | |
| "loss": 0.6205, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.24968314321926488, | |
| "grad_norm": 0.07316391915082932, | |
| "learning_rate": 0.00017183493500977278, | |
| "loss": 0.5129, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.2503168567807351, | |
| "grad_norm": 0.08883780986070633, | |
| "learning_rate": 0.00017169540627808274, | |
| "loss": 0.5036, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.2509505703422053, | |
| "grad_norm": 0.07377318292856216, | |
| "learning_rate": 0.00017155558974107536, | |
| "loss": 0.591, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.25158428390367554, | |
| "grad_norm": 0.064984992146492, | |
| "learning_rate": 0.00017141548596001305, | |
| "loss": 0.645, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.2522179974651457, | |
| "grad_norm": 0.07279626280069351, | |
| "learning_rate": 0.00017127509549731148, | |
| "loss": 0.5108, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.25285171102661597, | |
| "grad_norm": 0.06948740035295486, | |
| "learning_rate": 0.000171134418916537, | |
| "loss": 0.4959, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.2534854245880862, | |
| "grad_norm": 1.0025055408477783, | |
| "learning_rate": 0.00017099345678240452, | |
| "loss": 0.5248, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.2541191381495564, | |
| "grad_norm": 0.34188470244407654, | |
| "learning_rate": 0.00017085220966077538, | |
| "loss": 0.5588, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.25475285171102663, | |
| "grad_norm": 0.04984923452138901, | |
| "learning_rate": 0.00017071067811865476, | |
| "loss": 0.4033, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.2553865652724968, | |
| "grad_norm": 0.05613204464316368, | |
| "learning_rate": 0.0001705688627241897, | |
| "loss": 0.5774, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.25602027883396705, | |
| "grad_norm": 0.058507829904556274, | |
| "learning_rate": 0.0001704267640466667, | |
| "loss": 0.52, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.25665399239543724, | |
| "grad_norm": 0.23744581639766693, | |
| "learning_rate": 0.00017028438265650933, | |
| "loss": 0.6028, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.2572877059569075, | |
| "grad_norm": 0.11817914992570877, | |
| "learning_rate": 0.00017014171912527616, | |
| "loss": 0.5416, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.2579214195183777, | |
| "grad_norm": 0.29011303186416626, | |
| "learning_rate": 0.00016999877402565833, | |
| "loss": 0.4381, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.2585551330798479, | |
| "grad_norm": 0.06895189732313156, | |
| "learning_rate": 0.00016985554793147727, | |
| "loss": 0.5046, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.25918884664131814, | |
| "grad_norm": 0.059166181832551956, | |
| "learning_rate": 0.00016971204141768233, | |
| "loss": 0.582, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.2598225602027883, | |
| "grad_norm": 0.09994165599346161, | |
| "learning_rate": 0.00016956825506034867, | |
| "loss": 0.6042, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.26045627376425856, | |
| "grad_norm": 0.09195294976234436, | |
| "learning_rate": 0.00016942418943667468, | |
| "loss": 0.577, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.26108998732572875, | |
| "grad_norm": 0.08966407924890518, | |
| "learning_rate": 0.00016927984512497992, | |
| "loss": 0.5795, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.261723700887199, | |
| "grad_norm": 0.08420640975236893, | |
| "learning_rate": 0.00016913522270470263, | |
| "loss": 0.4446, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.2623574144486692, | |
| "grad_norm": 0.05902143940329552, | |
| "learning_rate": 0.0001689903227563975, | |
| "loss": 0.4458, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.2629911280101394, | |
| "grad_norm": 0.046236153692007065, | |
| "learning_rate": 0.0001688451458617332, | |
| "loss": 0.3762, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.26362484157160965, | |
| "grad_norm": 0.10383841395378113, | |
| "learning_rate": 0.00016869969260349018, | |
| "loss": 0.6076, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.26425855513307983, | |
| "grad_norm": 0.059753723442554474, | |
| "learning_rate": 0.00016855396356555834, | |
| "loss": 0.4116, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.26489226869455007, | |
| "grad_norm": 0.05825261399149895, | |
| "learning_rate": 0.00016840795933293463, | |
| "loss": 0.5377, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.26552598225602025, | |
| "grad_norm": 0.07149126380681992, | |
| "learning_rate": 0.00016826168049172062, | |
| "loss": 0.5946, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.2661596958174905, | |
| "grad_norm": 0.0636037141084671, | |
| "learning_rate": 0.00016811512762912034, | |
| "loss": 0.4232, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.26679340937896073, | |
| "grad_norm": 0.06662221997976303, | |
| "learning_rate": 0.00016796830133343775, | |
| "loss": 0.5406, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.2674271229404309, | |
| "grad_norm": 0.058340173214673996, | |
| "learning_rate": 0.00016782120219407452, | |
| "loss": 0.5402, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.26806083650190116, | |
| "grad_norm": 0.054275717586278915, | |
| "learning_rate": 0.00016767383080152742, | |
| "loss": 0.5215, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.26869455006337134, | |
| "grad_norm": 0.055525969713926315, | |
| "learning_rate": 0.00016752618774738639, | |
| "loss": 0.5743, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.2693282636248416, | |
| "grad_norm": 0.05762525647878647, | |
| "learning_rate": 0.00016737827362433164, | |
| "loss": 0.5806, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.26996197718631176, | |
| "grad_norm": 0.059116896241903305, | |
| "learning_rate": 0.0001672300890261317, | |
| "loss": 0.4828, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.270595690747782, | |
| "grad_norm": 0.046420734375715256, | |
| "learning_rate": 0.00016708163454764075, | |
| "loss": 0.4509, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.27122940430925224, | |
| "grad_norm": 0.11202160269021988, | |
| "learning_rate": 0.00016693291078479638, | |
| "loss": 0.5139, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.2718631178707224, | |
| "grad_norm": 0.08383259177207947, | |
| "learning_rate": 0.00016678391833461722, | |
| "loss": 0.7026, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.27249683143219267, | |
| "grad_norm": 0.058648403733968735, | |
| "learning_rate": 0.0001666346577952004, | |
| "loss": 0.4704, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.27313054499366285, | |
| "grad_norm": 0.08609268069267273, | |
| "learning_rate": 0.0001664851297657193, | |
| "loss": 0.5186, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.2737642585551331, | |
| "grad_norm": 0.10570003092288971, | |
| "learning_rate": 0.00016633533484642103, | |
| "loss": 0.4615, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.2743979721166033, | |
| "grad_norm": 0.09764793515205383, | |
| "learning_rate": 0.00016618527363862408, | |
| "loss": 0.4519, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.2750316856780735, | |
| "grad_norm": 0.08797989040613174, | |
| "learning_rate": 0.00016603494674471593, | |
| "loss": 0.6139, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.27566539923954375, | |
| "grad_norm": 0.0714520812034607, | |
| "learning_rate": 0.0001658843547681506, | |
| "loss": 0.5027, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.27629911280101394, | |
| "grad_norm": 0.08733757585287094, | |
| "learning_rate": 0.00016573349831344616, | |
| "loss": 0.4582, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.2769328263624842, | |
| "grad_norm": 0.0712830200791359, | |
| "learning_rate": 0.00016558237798618245, | |
| "loss": 0.4336, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.27756653992395436, | |
| "grad_norm": 0.06345337629318237, | |
| "learning_rate": 0.00016543099439299844, | |
| "loss": 0.4587, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.2782002534854246, | |
| "grad_norm": 0.06224706023931503, | |
| "learning_rate": 0.0001652793481415901, | |
| "loss": 0.5171, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.2788339670468948, | |
| "grad_norm": 0.0549205057322979, | |
| "learning_rate": 0.00016512743984070769, | |
| "loss": 0.5189, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.279467680608365, | |
| "grad_norm": 0.07211892306804657, | |
| "learning_rate": 0.00016497527010015336, | |
| "loss": 0.6118, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.28010139416983526, | |
| "grad_norm": 0.05902037024497986, | |
| "learning_rate": 0.00016482283953077887, | |
| "loss": 0.5376, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.28073510773130544, | |
| "grad_norm": 0.04935478791594505, | |
| "learning_rate": 0.00016467014874448288, | |
| "loss": 0.5468, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.2813688212927757, | |
| "grad_norm": 0.08219460397958755, | |
| "learning_rate": 0.00016451719835420877, | |
| "loss": 0.5723, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.28200253485424587, | |
| "grad_norm": 0.08607888221740723, | |
| "learning_rate": 0.000164363988973942, | |
| "loss": 0.4821, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.2826362484157161, | |
| "grad_norm": 0.05368666350841522, | |
| "learning_rate": 0.00016421052121870755, | |
| "loss": 0.4759, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.2832699619771863, | |
| "grad_norm": 0.09421613812446594, | |
| "learning_rate": 0.00016405679570456782, | |
| "loss": 0.4634, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.28390367553865653, | |
| "grad_norm": 0.06585177779197693, | |
| "learning_rate": 0.0001639028130486198, | |
| "loss": 0.5049, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.28453738910012677, | |
| "grad_norm": 0.07445032149553299, | |
| "learning_rate": 0.00016374857386899268, | |
| "loss": 0.6255, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.28517110266159695, | |
| "grad_norm": 0.05892190709710121, | |
| "learning_rate": 0.00016359407878484552, | |
| "loss": 0.5035, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.2858048162230672, | |
| "grad_norm": 0.08238600939512253, | |
| "learning_rate": 0.00016343932841636456, | |
| "loss": 0.4818, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.2864385297845374, | |
| "grad_norm": 0.0664915144443512, | |
| "learning_rate": 0.00016328432338476084, | |
| "loss": 0.4375, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.2870722433460076, | |
| "grad_norm": 0.04862099885940552, | |
| "learning_rate": 0.00016312906431226773, | |
| "loss": 0.4138, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.2877059569074778, | |
| "grad_norm": 0.04187007248401642, | |
| "learning_rate": 0.00016297355182213837, | |
| "loss": 0.3836, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.28833967046894804, | |
| "grad_norm": 0.05451095104217529, | |
| "learning_rate": 0.00016281778653864316, | |
| "loss": 0.4451, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.2889733840304182, | |
| "grad_norm": 0.061764512211084366, | |
| "learning_rate": 0.0001626617690870673, | |
| "loss": 0.6315, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.28960709759188846, | |
| "grad_norm": 0.05365981534123421, | |
| "learning_rate": 0.0001625055000937083, | |
| "loss": 0.4399, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.2902408111533587, | |
| "grad_norm": 0.10771326720714569, | |
| "learning_rate": 0.00016234898018587337, | |
| "loss": 0.5229, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.2908745247148289, | |
| "grad_norm": 0.05859148129820824, | |
| "learning_rate": 0.000162192209991877, | |
| "loss": 0.4254, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.2915082382762991, | |
| "grad_norm": 0.08183909952640533, | |
| "learning_rate": 0.00016203519014103837, | |
| "loss": 0.3658, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.2921419518377693, | |
| "grad_norm": 0.04404648020863533, | |
| "learning_rate": 0.00016187792126367886, | |
| "loss": 0.4138, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.29277566539923955, | |
| "grad_norm": 0.056379418820142746, | |
| "learning_rate": 0.00016172040399111957, | |
| "loss": 0.4781, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.29340937896070973, | |
| "grad_norm": 0.0440094955265522, | |
| "learning_rate": 0.00016156263895567867, | |
| "loss": 0.4623, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.29404309252217997, | |
| "grad_norm": 0.055651161819696426, | |
| "learning_rate": 0.00016140462679066885, | |
| "loss": 0.5002, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.2946768060836502, | |
| "grad_norm": 0.09338720887899399, | |
| "learning_rate": 0.00016124636813039502, | |
| "loss": 0.5199, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.2953105196451204, | |
| "grad_norm": 0.07024485617876053, | |
| "learning_rate": 0.00016108786361015143, | |
| "loss": 0.5378, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.29594423320659063, | |
| "grad_norm": 0.05211356282234192, | |
| "learning_rate": 0.00016092911386621938, | |
| "loss": 0.5895, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.2965779467680608, | |
| "grad_norm": 0.05571569502353668, | |
| "learning_rate": 0.00016077011953586452, | |
| "loss": 0.4952, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.29721166032953106, | |
| "grad_norm": 0.07663686573505402, | |
| "learning_rate": 0.00016061088125733433, | |
| "loss": 0.5341, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.29784537389100124, | |
| "grad_norm": 0.04910871386528015, | |
| "learning_rate": 0.0001604513996698556, | |
| "loss": 0.445, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.2984790874524715, | |
| "grad_norm": 0.07365076243877411, | |
| "learning_rate": 0.0001602916754136318, | |
| "loss": 0.5364, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.2991128010139417, | |
| "grad_norm": 0.08367875218391418, | |
| "learning_rate": 0.00016013170912984058, | |
| "loss": 0.5709, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.2997465145754119, | |
| "grad_norm": 0.06659605354070663, | |
| "learning_rate": 0.00015997150146063115, | |
| "loss": 0.5351, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.30038022813688214, | |
| "grad_norm": 0.05647695064544678, | |
| "learning_rate": 0.00015981105304912162, | |
| "loss": 0.4103, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.3010139416983523, | |
| "grad_norm": 0.05512802302837372, | |
| "learning_rate": 0.0001596503645393966, | |
| "loss": 0.4919, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.30164765525982257, | |
| "grad_norm": 0.07482268661260605, | |
| "learning_rate": 0.0001594894365765045, | |
| "loss": 0.5266, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.30228136882129275, | |
| "grad_norm": 0.08068813383579254, | |
| "learning_rate": 0.000159328269806455, | |
| "loss": 0.6268, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.302915082382763, | |
| "grad_norm": 0.05029362812638283, | |
| "learning_rate": 0.00015916686487621635, | |
| "loss": 0.4999, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.30354879594423323, | |
| "grad_norm": 0.0705760046839714, | |
| "learning_rate": 0.00015900522243371282, | |
| "loss": 0.5182, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.3041825095057034, | |
| "grad_norm": 0.20289281010627747, | |
| "learning_rate": 0.00015884334312782223, | |
| "loss": 0.6609, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.30481622306717365, | |
| "grad_norm": 0.05456344410777092, | |
| "learning_rate": 0.00015868122760837313, | |
| "loss": 0.4575, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.30544993662864384, | |
| "grad_norm": 0.06280402094125748, | |
| "learning_rate": 0.00015851887652614237, | |
| "loss": 0.4186, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.3060836501901141, | |
| "grad_norm": 0.06588494777679443, | |
| "learning_rate": 0.0001583562905328524, | |
| "loss": 0.5235, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.30671736375158426, | |
| "grad_norm": 0.14238761365413666, | |
| "learning_rate": 0.00015819347028116858, | |
| "loss": 0.5727, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.3073510773130545, | |
| "grad_norm": 0.0709756463766098, | |
| "learning_rate": 0.0001580304164246968, | |
| "loss": 0.4003, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.30798479087452474, | |
| "grad_norm": 0.3064410388469696, | |
| "learning_rate": 0.0001578671296179806, | |
| "loss": 0.524, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.3086185044359949, | |
| "grad_norm": 0.04714261740446091, | |
| "learning_rate": 0.00015770361051649863, | |
| "loss": 0.3965, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.30925221799746516, | |
| "grad_norm": 0.05930585786700249, | |
| "learning_rate": 0.00015753985977666213, | |
| "loss": 0.4562, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.30988593155893535, | |
| "grad_norm": 0.07817406952381134, | |
| "learning_rate": 0.00015737587805581219, | |
| "loss": 0.5846, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.3105196451204056, | |
| "grad_norm": 0.05352717638015747, | |
| "learning_rate": 0.00015721166601221698, | |
| "loss": 0.5899, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.31115335868187577, | |
| "grad_norm": 0.05995578318834305, | |
| "learning_rate": 0.00015704722430506942, | |
| "loss": 0.5521, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.311787072243346, | |
| "grad_norm": 0.15946877002716064, | |
| "learning_rate": 0.00015688255359448428, | |
| "loss": 0.6366, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.31242078580481625, | |
| "grad_norm": 0.06116756424307823, | |
| "learning_rate": 0.00015671765454149559, | |
| "loss": 0.4436, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.31305449936628643, | |
| "grad_norm": 0.272954523563385, | |
| "learning_rate": 0.00015655252780805414, | |
| "loss": 0.6512, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.31368821292775667, | |
| "grad_norm": 0.0462493859231472, | |
| "learning_rate": 0.0001563871740570245, | |
| "loss": 0.4075, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.31432192648922685, | |
| "grad_norm": 0.08116989582777023, | |
| "learning_rate": 0.00015622159395218272, | |
| "loss": 0.6353, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.3149556400506971, | |
| "grad_norm": 0.07837241142988205, | |
| "learning_rate": 0.0001560557881582134, | |
| "loss": 0.5087, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.3155893536121673, | |
| "grad_norm": 0.07096578180789948, | |
| "learning_rate": 0.00015588975734070717, | |
| "loss": 0.617, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.3162230671736375, | |
| "grad_norm": 0.07047011703252792, | |
| "learning_rate": 0.0001557235021661579, | |
| "loss": 0.6406, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.31685678073510776, | |
| "grad_norm": 0.06322109699249268, | |
| "learning_rate": 0.00015555702330196023, | |
| "loss": 0.5973, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.31749049429657794, | |
| "grad_norm": 0.1788979321718216, | |
| "learning_rate": 0.00015539032141640658, | |
| "loss": 0.6022, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.3181242078580482, | |
| "grad_norm": 0.05936092510819435, | |
| "learning_rate": 0.00015522339717868476, | |
| "loss": 0.4314, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.31875792141951836, | |
| "grad_norm": 0.05811009183526039, | |
| "learning_rate": 0.00015505625125887508, | |
| "loss": 0.5641, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.3193916349809886, | |
| "grad_norm": 0.11950580030679703, | |
| "learning_rate": 0.00015488888432794784, | |
| "loss": 0.5796, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.3200253485424588, | |
| "grad_norm": 0.04393857717514038, | |
| "learning_rate": 0.00015472129705776047, | |
| "loss": 0.3637, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.320659062103929, | |
| "grad_norm": 0.11919873207807541, | |
| "learning_rate": 0.00015455349012105486, | |
| "loss": 0.4967, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.32129277566539927, | |
| "grad_norm": 0.055687014013528824, | |
| "learning_rate": 0.00015438546419145488, | |
| "loss": 0.4932, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.32192648922686945, | |
| "grad_norm": 0.058437906205654144, | |
| "learning_rate": 0.00015421721994346327, | |
| "loss": 0.5351, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.3225602027883397, | |
| "grad_norm": 0.04726817458868027, | |
| "learning_rate": 0.00015404875805245935, | |
| "loss": 0.433, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.3231939163498099, | |
| "grad_norm": 0.04807078838348389, | |
| "learning_rate": 0.00015388007919469603, | |
| "loss": 0.4534, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.3238276299112801, | |
| "grad_norm": 0.07437839359045029, | |
| "learning_rate": 0.00015371118404729716, | |
| "loss": 0.584, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.3244613434727503, | |
| "grad_norm": 0.050413914024829865, | |
| "learning_rate": 0.00015354207328825491, | |
| "loss": 0.3788, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.32509505703422054, | |
| "grad_norm": 0.07370271533727646, | |
| "learning_rate": 0.0001533727475964269, | |
| "loss": 0.4768, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.3257287705956908, | |
| "grad_norm": 0.06317605078220367, | |
| "learning_rate": 0.00015320320765153367, | |
| "loss": 0.5665, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.32636248415716096, | |
| "grad_norm": 0.061747610569000244, | |
| "learning_rate": 0.00015303345413415564, | |
| "loss": 0.6061, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.3269961977186312, | |
| "grad_norm": 0.07719457149505615, | |
| "learning_rate": 0.00015286348772573075, | |
| "loss": 0.4041, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.3276299112801014, | |
| "grad_norm": 0.048449669033288956, | |
| "learning_rate": 0.0001526933091085515, | |
| "loss": 0.4865, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.3282636248415716, | |
| "grad_norm": 0.06786296516656876, | |
| "learning_rate": 0.00015252291896576214, | |
| "loss": 0.5036, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.3288973384030418, | |
| "grad_norm": 0.056538064032793045, | |
| "learning_rate": 0.0001523523179813562, | |
| "loss": 0.5077, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.32953105196451205, | |
| "grad_norm": 0.06674568355083466, | |
| "learning_rate": 0.00015218150684017347, | |
| "loss": 0.701, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.33016476552598223, | |
| "grad_norm": 0.07875782251358032, | |
| "learning_rate": 0.00015201048622789747, | |
| "loss": 0.5375, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.33079847908745247, | |
| "grad_norm": 0.06530767679214478, | |
| "learning_rate": 0.00015183925683105254, | |
| "loss": 0.5136, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.3314321926489227, | |
| "grad_norm": 0.06704816222190857, | |
| "learning_rate": 0.00015166781933700105, | |
| "loss": 0.6015, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.3320659062103929, | |
| "grad_norm": 0.061236705631017685, | |
| "learning_rate": 0.00015149617443394094, | |
| "loss": 0.5323, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.33269961977186313, | |
| "grad_norm": 0.11219301074743271, | |
| "learning_rate": 0.00015132432281090256, | |
| "loss": 0.6076, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.3333333333333333, | |
| "grad_norm": 0.04857495799660683, | |
| "learning_rate": 0.00015115226515774618, | |
| "loss": 0.4208, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.33396704689480355, | |
| "grad_norm": 0.04918389767408371, | |
| "learning_rate": 0.0001509800021651591, | |
| "loss": 0.5069, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.33460076045627374, | |
| "grad_norm": 0.06613993644714355, | |
| "learning_rate": 0.00015080753452465296, | |
| "loss": 0.5443, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.335234474017744, | |
| "grad_norm": 0.05695560947060585, | |
| "learning_rate": 0.00015063486292856082, | |
| "loss": 0.5632, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.3358681875792142, | |
| "grad_norm": 0.05377941578626633, | |
| "learning_rate": 0.0001504619880700346, | |
| "loss": 0.3954, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.3365019011406844, | |
| "grad_norm": 0.06934024393558502, | |
| "learning_rate": 0.000150288910643042, | |
| "loss": 0.5669, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.33713561470215464, | |
| "grad_norm": 0.10134469717741013, | |
| "learning_rate": 0.00015011563134236408, | |
| "loss": 0.5248, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.3377693282636248, | |
| "grad_norm": 0.11486341804265976, | |
| "learning_rate": 0.00014994215086359212, | |
| "loss": 0.6074, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.33840304182509506, | |
| "grad_norm": 0.07518647611141205, | |
| "learning_rate": 0.00014976846990312514, | |
| "loss": 0.5196, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.33903675538656525, | |
| "grad_norm": 0.06767034530639648, | |
| "learning_rate": 0.0001495945891581668, | |
| "loss": 0.4821, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.3396704689480355, | |
| "grad_norm": 0.047710105776786804, | |
| "learning_rate": 0.00014942050932672277, | |
| "loss": 0.4468, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.3403041825095057, | |
| "grad_norm": 0.10735978931188583, | |
| "learning_rate": 0.000149246231107598, | |
| "loss": 0.4851, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.3409378960709759, | |
| "grad_norm": 0.0501636303961277, | |
| "learning_rate": 0.0001490717552003938, | |
| "loss": 0.4831, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.34157160963244615, | |
| "grad_norm": 0.052001163363456726, | |
| "learning_rate": 0.00014889708230550496, | |
| "loss": 0.5206, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.34220532319391633, | |
| "grad_norm": 0.06634392589330673, | |
| "learning_rate": 0.00014872221312411718, | |
| "loss": 0.5051, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.3428390367553866, | |
| "grad_norm": 0.053568046540021896, | |
| "learning_rate": 0.00014854714835820394, | |
| "loss": 0.5257, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.34347275031685676, | |
| "grad_norm": 0.05587064474821091, | |
| "learning_rate": 0.000148371888710524, | |
| "loss": 0.5924, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.344106463878327, | |
| "grad_norm": 0.055588286370038986, | |
| "learning_rate": 0.00014819643488461835, | |
| "loss": 0.4242, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.34474017743979724, | |
| "grad_norm": 0.07102327048778534, | |
| "learning_rate": 0.00014802078758480747, | |
| "loss": 0.5229, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.3453738910012674, | |
| "grad_norm": 0.06629911810159683, | |
| "learning_rate": 0.00014784494751618853, | |
| "loss": 0.435, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.34600760456273766, | |
| "grad_norm": 0.054953474551439285, | |
| "learning_rate": 0.00014766891538463254, | |
| "loss": 0.5796, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.34664131812420784, | |
| "grad_norm": 0.05943427234888077, | |
| "learning_rate": 0.00014749269189678142, | |
| "loss": 0.427, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.3472750316856781, | |
| "grad_norm": 0.05509248375892639, | |
| "learning_rate": 0.00014731627776004536, | |
| "loss": 0.5456, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.34790874524714827, | |
| "grad_norm": 0.0867772102355957, | |
| "learning_rate": 0.0001471396736825998, | |
| "loss": 0.5649, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.3485424588086185, | |
| "grad_norm": 0.08892481029033661, | |
| "learning_rate": 0.00014696288037338256, | |
| "loss": 0.5489, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.34917617237008874, | |
| "grad_norm": 0.07534697651863098, | |
| "learning_rate": 0.00014678589854209134, | |
| "loss": 0.4728, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.34980988593155893, | |
| "grad_norm": 0.03929729387164116, | |
| "learning_rate": 0.00014660872889918044, | |
| "loss": 0.3527, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.35044359949302917, | |
| "grad_norm": 0.06847205758094788, | |
| "learning_rate": 0.00014643137215585806, | |
| "loss": 0.4204, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.35107731305449935, | |
| "grad_norm": 0.06959280371665955, | |
| "learning_rate": 0.00014625382902408356, | |
| "loss": 0.5043, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.3517110266159696, | |
| "grad_norm": 0.057750072330236435, | |
| "learning_rate": 0.0001460761002165645, | |
| "loss": 0.5717, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.3523447401774398, | |
| "grad_norm": 0.0640597864985466, | |
| "learning_rate": 0.00014589818644675378, | |
| "loss": 0.5691, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.35297845373891, | |
| "grad_norm": 0.05334803834557533, | |
| "learning_rate": 0.0001457200884288468, | |
| "loss": 0.4438, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.35361216730038025, | |
| "grad_norm": 0.050739504396915436, | |
| "learning_rate": 0.0001455418068777786, | |
| "loss": 0.4418, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.35424588086185044, | |
| "grad_norm": 0.04636020213365555, | |
| "learning_rate": 0.00014536334250922093, | |
| "loss": 0.3724, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.3548795944233207, | |
| "grad_norm": 0.04343942552804947, | |
| "learning_rate": 0.00014518469603957943, | |
| "loss": 0.3218, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.35551330798479086, | |
| "grad_norm": 0.06655412167310715, | |
| "learning_rate": 0.00014500586818599076, | |
| "loss": 0.5158, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.3561470215462611, | |
| "grad_norm": 0.06236552819609642, | |
| "learning_rate": 0.0001448268596663197, | |
| "loss": 0.5348, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.3567807351077313, | |
| "grad_norm": 0.0551675446331501, | |
| "learning_rate": 0.00014464767119915629, | |
| "loss": 0.5191, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.3574144486692015, | |
| "grad_norm": 0.0711677148938179, | |
| "learning_rate": 0.00014446830350381293, | |
| "loss": 0.5787, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.35804816223067176, | |
| "grad_norm": 0.05513966456055641, | |
| "learning_rate": 0.00014428875730032145, | |
| "loss": 0.4056, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.35868187579214195, | |
| "grad_norm": 0.07472972571849823, | |
| "learning_rate": 0.00014410903330943029, | |
| "loss": 0.4217, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.3593155893536122, | |
| "grad_norm": 0.05436578020453453, | |
| "learning_rate": 0.00014392913225260153, | |
| "loss": 0.5195, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.35994930291508237, | |
| "grad_norm": 0.14983688294887543, | |
| "learning_rate": 0.00014374905485200817, | |
| "loss": 0.6106, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.3605830164765526, | |
| "grad_norm": 0.09657621383666992, | |
| "learning_rate": 0.00014356880183053104, | |
| "loss": 0.5487, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.3612167300380228, | |
| "grad_norm": 0.06128871440887451, | |
| "learning_rate": 0.00014338837391175582, | |
| "loss": 0.2958, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.36185044359949303, | |
| "grad_norm": 0.3691087067127228, | |
| "learning_rate": 0.00014320777181997052, | |
| "loss": 0.4846, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.36248415716096327, | |
| "grad_norm": 0.07217471301555634, | |
| "learning_rate": 0.00014302699628016208, | |
| "loss": 0.4256, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.36311787072243346, | |
| "grad_norm": 0.05521377548575401, | |
| "learning_rate": 0.00014284604801801396, | |
| "loss": 0.48, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.3637515842839037, | |
| "grad_norm": 0.04929598793387413, | |
| "learning_rate": 0.0001426649277599028, | |
| "loss": 0.5303, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.3643852978453739, | |
| "grad_norm": 0.050052460283041, | |
| "learning_rate": 0.00014248363623289574, | |
| "loss": 0.4863, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.3650190114068441, | |
| "grad_norm": 0.04534770920872688, | |
| "learning_rate": 0.0001423021741647474, | |
| "loss": 0.5239, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.3656527249683143, | |
| "grad_norm": 0.07982175797224045, | |
| "learning_rate": 0.0001421205422838971, | |
| "loss": 0.5924, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.36628643852978454, | |
| "grad_norm": 0.04665097966790199, | |
| "learning_rate": 0.0001419387413194657, | |
| "loss": 0.4579, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.3669201520912547, | |
| "grad_norm": 0.0721178650856018, | |
| "learning_rate": 0.0001417567720012529, | |
| "loss": 0.5235, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.36755386565272496, | |
| "grad_norm": 0.04838218167424202, | |
| "learning_rate": 0.00014157463505973418, | |
| "loss": 0.4138, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.3681875792141952, | |
| "grad_norm": 0.07050075381994247, | |
| "learning_rate": 0.00014139233122605798, | |
| "loss": 0.5749, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.3688212927756654, | |
| "grad_norm": 0.07718097418546677, | |
| "learning_rate": 0.00014120986123204257, | |
| "loss": 0.5399, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.3694550063371356, | |
| "grad_norm": 0.08041960000991821, | |
| "learning_rate": 0.00014102722581017332, | |
| "loss": 0.4264, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.3700887198986058, | |
| "grad_norm": 0.08530323952436447, | |
| "learning_rate": 0.00014084442569359964, | |
| "loss": 0.4534, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.37072243346007605, | |
| "grad_norm": 0.0639512911438942, | |
| "learning_rate": 0.00014066146161613208, | |
| "loss": 0.4295, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.37135614702154623, | |
| "grad_norm": 0.06618323922157288, | |
| "learning_rate": 0.00014047833431223938, | |
| "loss": 0.6437, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.3719898605830165, | |
| "grad_norm": 0.057782579213380814, | |
| "learning_rate": 0.00014029504451704557, | |
| "loss": 0.4855, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.3726235741444867, | |
| "grad_norm": 0.04774455726146698, | |
| "learning_rate": 0.00014011159296632678, | |
| "loss": 0.3035, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.3732572877059569, | |
| "grad_norm": 0.05420040711760521, | |
| "learning_rate": 0.00013992798039650872, | |
| "loss": 0.4444, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.37389100126742714, | |
| "grad_norm": 0.06096061319112778, | |
| "learning_rate": 0.00013974420754466328, | |
| "loss": 0.5743, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.3745247148288973, | |
| "grad_norm": 0.055694580078125, | |
| "learning_rate": 0.0001395602751485059, | |
| "loss": 0.5652, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.37515842839036756, | |
| "grad_norm": 0.0731462761759758, | |
| "learning_rate": 0.00013937618394639235, | |
| "loss": 0.4977, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.37579214195183774, | |
| "grad_norm": 0.05172240361571312, | |
| "learning_rate": 0.000139191934677316, | |
| "loss": 0.524, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.376425855513308, | |
| "grad_norm": 0.05123208463191986, | |
| "learning_rate": 0.00013900752808090468, | |
| "loss": 0.5355, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.3770595690747782, | |
| "grad_norm": 0.056850165128707886, | |
| "learning_rate": 0.00013882296489741783, | |
| "loss": 0.4908, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.3776932826362484, | |
| "grad_norm": 0.06634749472141266, | |
| "learning_rate": 0.00013863824586774344, | |
| "loss": 0.4283, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.37832699619771865, | |
| "grad_norm": 0.04840132221579552, | |
| "learning_rate": 0.00013845337173339507, | |
| "loss": 0.4897, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.37896070975918883, | |
| "grad_norm": 0.0695575699210167, | |
| "learning_rate": 0.000138268343236509, | |
| "loss": 0.583, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.37959442332065907, | |
| "grad_norm": 0.048906922340393066, | |
| "learning_rate": 0.00013808316111984107, | |
| "loss": 0.4496, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.38022813688212925, | |
| "grad_norm": 0.05677906423807144, | |
| "learning_rate": 0.0001378978261267639, | |
| "loss": 0.4717, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.3808618504435995, | |
| "grad_norm": 0.10213559865951538, | |
| "learning_rate": 0.0001377123390012637, | |
| "loss": 0.6238, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.38149556400506973, | |
| "grad_norm": 0.050033628940582275, | |
| "learning_rate": 0.00013752670048793744, | |
| "loss": 0.4001, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.3821292775665399, | |
| "grad_norm": 0.07862118631601334, | |
| "learning_rate": 0.00013734091133198975, | |
| "loss": 0.5346, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.38276299112801015, | |
| "grad_norm": 0.053442683070898056, | |
| "learning_rate": 0.00013715497227923006, | |
| "loss": 0.4903, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.38339670468948034, | |
| "grad_norm": 0.06940152496099472, | |
| "learning_rate": 0.00013696888407606952, | |
| "loss": 0.568, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.3840304182509506, | |
| "grad_norm": 0.048307280987501144, | |
| "learning_rate": 0.00013678264746951787, | |
| "loss": 0.5245, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.38466413181242076, | |
| "grad_norm": 0.04498027265071869, | |
| "learning_rate": 0.00013659626320718077, | |
| "loss": 0.3682, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.385297845373891, | |
| "grad_norm": 0.05874482914805412, | |
| "learning_rate": 0.0001364097320372565, | |
| "loss": 0.5148, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.38593155893536124, | |
| "grad_norm": 0.04996568709611893, | |
| "learning_rate": 0.00013622305470853313, | |
| "loss": 0.4756, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.3865652724968314, | |
| "grad_norm": 0.07363967597484589, | |
| "learning_rate": 0.00013603623197038536, | |
| "loss": 0.5053, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.38719898605830166, | |
| "grad_norm": 0.0668586939573288, | |
| "learning_rate": 0.00013584926457277168, | |
| "loss": 0.5362, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.38783269961977185, | |
| "grad_norm": 0.06371022760868073, | |
| "learning_rate": 0.0001356621532662313, | |
| "loss": 0.5457, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.3884664131812421, | |
| "grad_norm": 0.07108695805072784, | |
| "learning_rate": 0.00013547489880188108, | |
| "loss": 0.5238, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.38910012674271227, | |
| "grad_norm": 0.05326547846198082, | |
| "learning_rate": 0.00013528750193141255, | |
| "loss": 0.4505, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.3897338403041825, | |
| "grad_norm": 0.08405181765556335, | |
| "learning_rate": 0.0001350999634070889, | |
| "loss": 0.6235, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.39036755386565275, | |
| "grad_norm": 0.05981157347559929, | |
| "learning_rate": 0.000134912283981742, | |
| "loss": 0.5175, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.39100126742712293, | |
| "grad_norm": 0.05275322496891022, | |
| "learning_rate": 0.00013472446440876927, | |
| "loss": 0.5536, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.3916349809885932, | |
| "grad_norm": 0.053324826061725616, | |
| "learning_rate": 0.00013453650544213076, | |
| "loss": 0.5926, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.39226869455006336, | |
| "grad_norm": 0.056955184787511826, | |
| "learning_rate": 0.0001343484078363461, | |
| "loss": 0.4393, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.3929024081115336, | |
| "grad_norm": 0.05232278257608414, | |
| "learning_rate": 0.00013416017234649146, | |
| "loss": 0.5163, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.3935361216730038, | |
| "grad_norm": 0.06405606865882874, | |
| "learning_rate": 0.00013397179972819643, | |
| "loss": 0.575, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.394169835234474, | |
| "grad_norm": 0.058417316526174545, | |
| "learning_rate": 0.00013378329073764119, | |
| "loss": 0.542, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.39480354879594426, | |
| "grad_norm": 0.05610906332731247, | |
| "learning_rate": 0.00013359464613155325, | |
| "loss": 0.4576, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.39543726235741444, | |
| "grad_norm": 0.06383884698152542, | |
| "learning_rate": 0.00013340586666720457, | |
| "loss": 0.5938, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.3960709759188847, | |
| "grad_norm": 0.05517081543803215, | |
| "learning_rate": 0.0001332169531024085, | |
| "loss": 0.4492, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.39670468948035487, | |
| "grad_norm": 0.07210738211870193, | |
| "learning_rate": 0.00013302790619551674, | |
| "loss": 0.6145, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.3973384030418251, | |
| "grad_norm": 0.06636934727430344, | |
| "learning_rate": 0.00013283872670541604, | |
| "loss": 0.4242, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.3979721166032953, | |
| "grad_norm": 0.07977598905563354, | |
| "learning_rate": 0.00013264941539152566, | |
| "loss": 0.5553, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.39860583016476553, | |
| "grad_norm": 0.056893352419137955, | |
| "learning_rate": 0.00013245997301379383, | |
| "loss": 0.4311, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.39923954372623577, | |
| "grad_norm": 1.9656810760498047, | |
| "learning_rate": 0.000132270400332695, | |
| "loss": 0.5208, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.39987325728770595, | |
| "grad_norm": 0.05810742825269699, | |
| "learning_rate": 0.00013208069810922673, | |
| "loss": 0.56, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.4005069708491762, | |
| "grad_norm": 0.08527707308530807, | |
| "learning_rate": 0.00013189086710490647, | |
| "loss": 0.5094, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.4011406844106464, | |
| "grad_norm": 0.07540644705295563, | |
| "learning_rate": 0.00013170090808176883, | |
| "loss": 0.5193, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.4017743979721166, | |
| "grad_norm": 0.08294139802455902, | |
| "learning_rate": 0.0001315108218023621, | |
| "loss": 0.5538, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.4024081115335868, | |
| "grad_norm": 0.07025711983442307, | |
| "learning_rate": 0.00013132060902974554, | |
| "loss": 0.5451, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.40304182509505704, | |
| "grad_norm": 0.05808630213141441, | |
| "learning_rate": 0.00013113027052748615, | |
| "loss": 0.5342, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.4036755386565273, | |
| "grad_norm": 0.040730297565460205, | |
| "learning_rate": 0.0001309398070596557, | |
| "loss": 0.4434, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.40430925221799746, | |
| "grad_norm": 0.06423351913690567, | |
| "learning_rate": 0.00013074921939082757, | |
| "loss": 0.5463, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.4049429657794677, | |
| "grad_norm": 0.07848164439201355, | |
| "learning_rate": 0.00013055850828607368, | |
| "loss": 0.651, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.4055766793409379, | |
| "grad_norm": 0.08495569974184036, | |
| "learning_rate": 0.00013036767451096148, | |
| "loss": 0.4675, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.4062103929024081, | |
| "grad_norm": 0.06640883535146713, | |
| "learning_rate": 0.0001301767188315509, | |
| "loss": 0.5261, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.4068441064638783, | |
| "grad_norm": 0.04708843678236008, | |
| "learning_rate": 0.00012998564201439116, | |
| "loss": 0.3417, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.40747782002534855, | |
| "grad_norm": 0.09854655712842941, | |
| "learning_rate": 0.00012979444482651782, | |
| "loss": 0.6236, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.40811153358681873, | |
| "grad_norm": 0.11556591838598251, | |
| "learning_rate": 0.00012960312803544962, | |
| "loss": 0.6022, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.40874524714828897, | |
| "grad_norm": 0.922315776348114, | |
| "learning_rate": 0.00012941169240918534, | |
| "loss": 0.4034, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.4093789607097592, | |
| "grad_norm": 0.08266003429889679, | |
| "learning_rate": 0.00012922013871620095, | |
| "loss": 0.5455, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.4100126742712294, | |
| "grad_norm": 0.05183318257331848, | |
| "learning_rate": 0.00012902846772544624, | |
| "loss": 0.437, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.41064638783269963, | |
| "grad_norm": 0.10581205785274506, | |
| "learning_rate": 0.00012883668020634195, | |
| "loss": 0.5762, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.4112801013941698, | |
| "grad_norm": 0.06646697223186493, | |
| "learning_rate": 0.00012864477692877657, | |
| "loss": 0.5462, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.41191381495564006, | |
| "grad_norm": 0.10537492483854294, | |
| "learning_rate": 0.00012845275866310324, | |
| "loss": 0.5098, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.41254752851711024, | |
| "grad_norm": 0.07540510594844818, | |
| "learning_rate": 0.0001282606261801368, | |
| "loss": 0.6208, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.4131812420785805, | |
| "grad_norm": 0.06597273051738739, | |
| "learning_rate": 0.0001280683802511504, | |
| "loss": 0.5896, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.4138149556400507, | |
| "grad_norm": 0.060704171657562256, | |
| "learning_rate": 0.0001278760216478728, | |
| "loss": 0.4844, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.4144486692015209, | |
| "grad_norm": 0.07420588284730911, | |
| "learning_rate": 0.00012768355114248494, | |
| "loss": 0.5673, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.41508238276299114, | |
| "grad_norm": 0.06360962241888046, | |
| "learning_rate": 0.00012749096950761702, | |
| "loss": 0.5322, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.4157160963244613, | |
| "grad_norm": 0.0631156638264656, | |
| "learning_rate": 0.00012729827751634533, | |
| "loss": 0.4863, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.41634980988593157, | |
| "grad_norm": 0.06497811526060104, | |
| "learning_rate": 0.00012710547594218917, | |
| "loss": 0.5775, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.41698352344740175, | |
| "grad_norm": 0.07515639066696167, | |
| "learning_rate": 0.00012691256555910768, | |
| "loss": 0.5207, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.417617237008872, | |
| "grad_norm": 0.073845274746418, | |
| "learning_rate": 0.0001267195471414969, | |
| "loss": 0.5306, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.41825095057034223, | |
| "grad_norm": 0.0654008612036705, | |
| "learning_rate": 0.0001265264214641864, | |
| "loss": 0.4677, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.4188846641318124, | |
| "grad_norm": 0.043669626116752625, | |
| "learning_rate": 0.00012633318930243648, | |
| "loss": 0.4221, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.41951837769328265, | |
| "grad_norm": 0.047917358577251434, | |
| "learning_rate": 0.00012613985143193482, | |
| "loss": 0.3635, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.42015209125475284, | |
| "grad_norm": 0.06635928153991699, | |
| "learning_rate": 0.0001259464086287934, | |
| "loss": 0.5453, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.4207858048162231, | |
| "grad_norm": 0.05781178921461105, | |
| "learning_rate": 0.0001257528616695455, | |
| "loss": 0.5, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.42141951837769326, | |
| "grad_norm": 0.0605790875852108, | |
| "learning_rate": 0.00012555921133114247, | |
| "loss": 0.5034, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.4220532319391635, | |
| "grad_norm": 0.04980487376451492, | |
| "learning_rate": 0.00012536545839095074, | |
| "loss": 0.4347, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.42268694550063374, | |
| "grad_norm": 0.06540601700544357, | |
| "learning_rate": 0.00012517160362674848, | |
| "loss": 0.5351, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.4233206590621039, | |
| "grad_norm": 0.049716752022504807, | |
| "learning_rate": 0.0001249776478167227, | |
| "loss": 0.4476, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.42395437262357416, | |
| "grad_norm": 0.10267884284257889, | |
| "learning_rate": 0.00012478359173946602, | |
| "loss": 0.5616, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.42458808618504434, | |
| "grad_norm": 0.05907197296619415, | |
| "learning_rate": 0.00012458943617397344, | |
| "loss": 0.4403, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.4252217997465146, | |
| "grad_norm": 0.09869077801704407, | |
| "learning_rate": 0.0001243951818996396, | |
| "loss": 0.6336, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.42585551330798477, | |
| "grad_norm": 0.07539843767881393, | |
| "learning_rate": 0.00012420082969625518, | |
| "loss": 0.6676, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.426489226869455, | |
| "grad_norm": 0.09385417401790619, | |
| "learning_rate": 0.00012400638034400395, | |
| "loss": 0.5714, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.42712294043092525, | |
| "grad_norm": 0.06782330572605133, | |
| "learning_rate": 0.00012381183462345982, | |
| "loss": 0.4956, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.42775665399239543, | |
| "grad_norm": 0.06100660189986229, | |
| "learning_rate": 0.00012361719331558345, | |
| "loss": 0.4217, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.42839036755386567, | |
| "grad_norm": 0.09908254444599152, | |
| "learning_rate": 0.00012342245720171918, | |
| "loss": 0.5405, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.42902408111533585, | |
| "grad_norm": 0.05237731710076332, | |
| "learning_rate": 0.00012322762706359203, | |
| "loss": 0.5044, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.4296577946768061, | |
| "grad_norm": 0.04910963028669357, | |
| "learning_rate": 0.00012303270368330439, | |
| "loss": 0.5073, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.4302915082382763, | |
| "grad_norm": 0.06268120557069778, | |
| "learning_rate": 0.00012283768784333293, | |
| "loss": 0.5736, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.4309252217997465, | |
| "grad_norm": 0.05207136273384094, | |
| "learning_rate": 0.00012264258032652559, | |
| "loss": 0.5319, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.43155893536121676, | |
| "grad_norm": 0.09583932906389236, | |
| "learning_rate": 0.00012244738191609814, | |
| "loss": 0.5891, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.43219264892268694, | |
| "grad_norm": 0.06307169795036316, | |
| "learning_rate": 0.00012225209339563145, | |
| "loss": 0.556, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.4328263624841572, | |
| "grad_norm": 0.062134500592947006, | |
| "learning_rate": 0.00012205671554906794, | |
| "loss": 0.5607, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.43346007604562736, | |
| "grad_norm": 0.04890581965446472, | |
| "learning_rate": 0.00012186124916070867, | |
| "loss": 0.4789, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.4340937896070976, | |
| "grad_norm": 0.04669584706425667, | |
| "learning_rate": 0.00012166569501521017, | |
| "loss": 0.4784, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.4347275031685678, | |
| "grad_norm": 0.05782284587621689, | |
| "learning_rate": 0.00012147005389758117, | |
| "loss": 0.5761, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.435361216730038, | |
| "grad_norm": 0.07015878707170486, | |
| "learning_rate": 0.00012127432659317956, | |
| "loss": 0.5462, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.43599493029150826, | |
| "grad_norm": 0.05989618971943855, | |
| "learning_rate": 0.00012107851388770928, | |
| "loss": 0.4671, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.43662864385297845, | |
| "grad_norm": 0.05732743442058563, | |
| "learning_rate": 0.000120882616567217, | |
| "loss": 0.4952, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.4372623574144487, | |
| "grad_norm": 0.06397297978401184, | |
| "learning_rate": 0.00012068663541808909, | |
| "loss": 0.5001, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.43789607097591887, | |
| "grad_norm": 0.05474892258644104, | |
| "learning_rate": 0.00012049057122704846, | |
| "loss": 0.4371, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.4385297845373891, | |
| "grad_norm": 0.0542195625603199, | |
| "learning_rate": 0.00012029442478115129, | |
| "loss": 0.4027, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.4391634980988593, | |
| "grad_norm": 0.0857028216123581, | |
| "learning_rate": 0.00012009819686778408, | |
| "loss": 0.5752, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.43979721166032953, | |
| "grad_norm": 0.07950462400913239, | |
| "learning_rate": 0.00011990188827466025, | |
| "loss": 0.4821, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.4404309252217998, | |
| "grad_norm": 0.13862280547618866, | |
| "learning_rate": 0.00011970549978981715, | |
| "loss": 0.5725, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.44106463878326996, | |
| "grad_norm": 0.06896214932203293, | |
| "learning_rate": 0.00011950903220161285, | |
| "loss": 0.5461, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.4416983523447402, | |
| "grad_norm": 0.05688636004924774, | |
| "learning_rate": 0.00011931248629872287, | |
| "loss": 0.6257, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.4423320659062104, | |
| "grad_norm": 0.07330068945884705, | |
| "learning_rate": 0.00011911586287013725, | |
| "loss": 0.4781, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.4429657794676806, | |
| "grad_norm": 0.057357531040906906, | |
| "learning_rate": 0.0001189191627051571, | |
| "loss": 0.3767, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.4435994930291508, | |
| "grad_norm": 0.05856744199991226, | |
| "learning_rate": 0.00011872238659339168, | |
| "loss": 0.5233, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.44423320659062104, | |
| "grad_norm": 0.04932614043354988, | |
| "learning_rate": 0.00011852553532475503, | |
| "loss": 0.5493, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.4448669201520912, | |
| "grad_norm": 0.10165086388587952, | |
| "learning_rate": 0.00011832860968946297, | |
| "loss": 0.626, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.44550063371356147, | |
| "grad_norm": 0.059510741382837296, | |
| "learning_rate": 0.00011813161047802985, | |
| "loss": 0.4979, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.4461343472750317, | |
| "grad_norm": 0.059596769511699677, | |
| "learning_rate": 0.00011793453848126526, | |
| "loss": 0.5903, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.4467680608365019, | |
| "grad_norm": 0.043714553117752075, | |
| "learning_rate": 0.00011773739449027108, | |
| "loss": 0.4347, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.44740177439797213, | |
| "grad_norm": 0.06549560278654099, | |
| "learning_rate": 0.00011754017929643817, | |
| "loss": 0.3608, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.4480354879594423, | |
| "grad_norm": 0.07389537245035172, | |
| "learning_rate": 0.00011734289369144323, | |
| "loss": 0.6457, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.44866920152091255, | |
| "grad_norm": 0.0611582025885582, | |
| "learning_rate": 0.00011714553846724558, | |
| "loss": 0.4182, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.44930291508238274, | |
| "grad_norm": 0.06682246923446655, | |
| "learning_rate": 0.00011694811441608402, | |
| "loss": 0.4601, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.449936628643853, | |
| "grad_norm": 0.05429236590862274, | |
| "learning_rate": 0.00011675062233047364, | |
| "loss": 0.5933, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.4505703422053232, | |
| "grad_norm": 0.07824891060590744, | |
| "learning_rate": 0.00011655306300320268, | |
| "loss": 0.6553, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.4512040557667934, | |
| "grad_norm": 0.0523335300385952, | |
| "learning_rate": 0.0001163554372273292, | |
| "loss": 0.382, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.45183776932826364, | |
| "grad_norm": 0.0779106542468071, | |
| "learning_rate": 0.00011615774579617817, | |
| "loss": 0.5208, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.4524714828897338, | |
| "grad_norm": 0.05331442877650261, | |
| "learning_rate": 0.00011595998950333793, | |
| "loss": 0.4668, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.45310519645120406, | |
| "grad_norm": 0.077408067882061, | |
| "learning_rate": 0.00011576216914265734, | |
| "loss": 0.4491, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.45373891001267425, | |
| "grad_norm": 0.2051779180765152, | |
| "learning_rate": 0.00011556428550824237, | |
| "loss": 0.5396, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.4543726235741445, | |
| "grad_norm": 0.052188027650117874, | |
| "learning_rate": 0.000115366339394453, | |
| "loss": 0.5815, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.4550063371356147, | |
| "grad_norm": 0.060880374163389206, | |
| "learning_rate": 0.0001151683315959001, | |
| "loss": 0.5019, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.4556400506970849, | |
| "grad_norm": 0.10370609164237976, | |
| "learning_rate": 0.000114970262907442, | |
| "loss": 0.5166, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.45627376425855515, | |
| "grad_norm": 0.059755194932222366, | |
| "learning_rate": 0.00011477213412418157, | |
| "loss": 0.5363, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.45690747782002533, | |
| "grad_norm": 0.05834079161286354, | |
| "learning_rate": 0.00011457394604146294, | |
| "loss": 0.487, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 0.45754119138149557, | |
| "grad_norm": 0.07119245082139969, | |
| "learning_rate": 0.00011437569945486819, | |
| "loss": 0.5711, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.45817490494296575, | |
| "grad_norm": 0.06131361797451973, | |
| "learning_rate": 0.00011417739516021428, | |
| "loss": 0.5226, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.458808618504436, | |
| "grad_norm": 0.04943651333451271, | |
| "learning_rate": 0.00011397903395354996, | |
| "loss": 0.4307, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.45944233206590623, | |
| "grad_norm": 0.046283356845378876, | |
| "learning_rate": 0.00011378061663115222, | |
| "loss": 0.3834, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.4600760456273764, | |
| "grad_norm": 0.0585121251642704, | |
| "learning_rate": 0.00011358214398952347, | |
| "loss": 0.6028, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.46070975918884666, | |
| "grad_norm": 0.08686511963605881, | |
| "learning_rate": 0.00011338361682538811, | |
| "loss": 0.4879, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.46134347275031684, | |
| "grad_norm": 0.07081152498722076, | |
| "learning_rate": 0.00011318503593568948, | |
| "loss": 0.6132, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.4619771863117871, | |
| "grad_norm": 0.05887436121702194, | |
| "learning_rate": 0.00011298640211758648, | |
| "loss": 0.5707, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.46261089987325726, | |
| "grad_norm": 0.06929212808609009, | |
| "learning_rate": 0.00011278771616845061, | |
| "loss": 0.449, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.4632446134347275, | |
| "grad_norm": 0.04306876286864281, | |
| "learning_rate": 0.00011258897888586255, | |
| "loss": 0.486, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.46387832699619774, | |
| "grad_norm": 0.05465447157621384, | |
| "learning_rate": 0.00011239019106760908, | |
| "loss": 0.4704, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.4645120405576679, | |
| "grad_norm": 0.058161042630672455, | |
| "learning_rate": 0.00011219135351167979, | |
| "loss": 0.5467, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 0.46514575411913817, | |
| "grad_norm": 0.06773436069488525, | |
| "learning_rate": 0.00011199246701626405, | |
| "loss": 0.5329, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.46577946768060835, | |
| "grad_norm": 0.04506424069404602, | |
| "learning_rate": 0.00011179353237974756, | |
| "loss": 0.4359, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.4664131812420786, | |
| "grad_norm": 0.05979963019490242, | |
| "learning_rate": 0.00011159455040070936, | |
| "loss": 0.5445, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.4670468948035488, | |
| "grad_norm": 0.0482424721121788, | |
| "learning_rate": 0.00011139552187791848, | |
| "loss": 0.4957, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 0.467680608365019, | |
| "grad_norm": 0.05097084492444992, | |
| "learning_rate": 0.00011119644761033078, | |
| "loss": 0.4642, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.46831432192648925, | |
| "grad_norm": 0.05539529025554657, | |
| "learning_rate": 0.00011099732839708586, | |
| "loss": 0.4227, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.46894803548795944, | |
| "grad_norm": 0.06280332803726196, | |
| "learning_rate": 0.0001107981650375036, | |
| "loss": 0.5842, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.4695817490494297, | |
| "grad_norm": 0.05138114467263222, | |
| "learning_rate": 0.00011059895833108119, | |
| "loss": 0.5681, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 0.47021546261089986, | |
| "grad_norm": 0.058239031583070755, | |
| "learning_rate": 0.0001103997090774898, | |
| "loss": 0.5582, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.4708491761723701, | |
| "grad_norm": 0.06877847760915756, | |
| "learning_rate": 0.00011020041807657138, | |
| "loss": 0.5912, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 0.4714828897338403, | |
| "grad_norm": 0.05639166757464409, | |
| "learning_rate": 0.00011000108612833551, | |
| "loss": 0.5888, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.4721166032953105, | |
| "grad_norm": 0.05756942555308342, | |
| "learning_rate": 0.0001098017140329561, | |
| "loss": 0.5451, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.47275031685678076, | |
| "grad_norm": 0.057658858597278595, | |
| "learning_rate": 0.00010960230259076818, | |
| "loss": 0.4939, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.47338403041825095, | |
| "grad_norm": 0.05436946451663971, | |
| "learning_rate": 0.00010940285260226488, | |
| "loss": 0.5084, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 0.4740177439797212, | |
| "grad_norm": 0.06349501758813858, | |
| "learning_rate": 0.00010920336486809393, | |
| "loss": 0.6588, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.47465145754119137, | |
| "grad_norm": 0.06300094723701477, | |
| "learning_rate": 0.00010900384018905463, | |
| "loss": 0.5655, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.4752851711026616, | |
| "grad_norm": 0.06454197317361832, | |
| "learning_rate": 0.00010880427936609455, | |
| "loss": 0.5455, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.4759188846641318, | |
| "grad_norm": 0.06663431227207184, | |
| "learning_rate": 0.0001086046832003064, | |
| "loss": 0.5263, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 0.47655259822560203, | |
| "grad_norm": 0.06523749232292175, | |
| "learning_rate": 0.00010840505249292476, | |
| "loss": 0.4109, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.47718631178707227, | |
| "grad_norm": 0.066495381295681, | |
| "learning_rate": 0.00010820538804532286, | |
| "loss": 0.5395, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 0.47782002534854245, | |
| "grad_norm": 0.07330245524644852, | |
| "learning_rate": 0.00010800569065900933, | |
| "loss": 0.5392, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.4784537389100127, | |
| "grad_norm": 0.05793917551636696, | |
| "learning_rate": 0.00010780596113562514, | |
| "loss": 0.5323, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.4790874524714829, | |
| "grad_norm": 0.05146726965904236, | |
| "learning_rate": 0.0001076062002769401, | |
| "loss": 0.4334, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.4797211660329531, | |
| "grad_norm": 0.06809573620557785, | |
| "learning_rate": 0.00010740640888484996, | |
| "loss": 0.5635, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 0.4803548795944233, | |
| "grad_norm": 0.05846872553229332, | |
| "learning_rate": 0.00010720658776137298, | |
| "loss": 0.5631, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.48098859315589354, | |
| "grad_norm": 0.06662282347679138, | |
| "learning_rate": 0.00010700673770864673, | |
| "loss": 0.3119, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.4816223067173637, | |
| "grad_norm": 0.05133543908596039, | |
| "learning_rate": 0.00010680685952892502, | |
| "loss": 0.5222, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.48225602027883396, | |
| "grad_norm": 0.06625013798475266, | |
| "learning_rate": 0.00010660695402457442, | |
| "loss": 0.4834, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 0.4828897338403042, | |
| "grad_norm": 0.07142903655767441, | |
| "learning_rate": 0.0001064070219980713, | |
| "loss": 0.551, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.4835234474017744, | |
| "grad_norm": 0.06273732334375381, | |
| "learning_rate": 0.00010620706425199849, | |
| "loss": 0.6681, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.4841571609632446, | |
| "grad_norm": 0.05467168986797333, | |
| "learning_rate": 0.000106007081589042, | |
| "loss": 0.5253, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.4847908745247148, | |
| "grad_norm": 0.05966407433152199, | |
| "learning_rate": 0.00010580707481198796, | |
| "loss": 0.516, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.48542458808618505, | |
| "grad_norm": 0.0470612607896328, | |
| "learning_rate": 0.00010560704472371919, | |
| "loss": 0.4632, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.48605830164765523, | |
| "grad_norm": 0.0659315288066864, | |
| "learning_rate": 0.00010540699212721219, | |
| "loss": 0.5164, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 0.4866920152091255, | |
| "grad_norm": 0.061314892023801804, | |
| "learning_rate": 0.0001052069178255337, | |
| "loss": 0.5968, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.4873257287705957, | |
| "grad_norm": 0.05175092816352844, | |
| "learning_rate": 0.00010500682262183772, | |
| "loss": 0.4665, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 0.4879594423320659, | |
| "grad_norm": 0.04965231940150261, | |
| "learning_rate": 0.00010480670731936208, | |
| "loss": 0.5068, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.48859315589353614, | |
| "grad_norm": 0.06218743324279785, | |
| "learning_rate": 0.0001046065727214253, | |
| "loss": 0.4043, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 0.4892268694550063, | |
| "grad_norm": 0.05969774350523949, | |
| "learning_rate": 0.00010440641963142336, | |
| "loss": 0.4471, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.48986058301647656, | |
| "grad_norm": 0.04538511112332344, | |
| "learning_rate": 0.00010420624885282653, | |
| "loss": 0.4891, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 0.49049429657794674, | |
| "grad_norm": 0.06056825444102287, | |
| "learning_rate": 0.00010400606118917593, | |
| "loss": 0.452, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.491128010139417, | |
| "grad_norm": 0.04322752729058266, | |
| "learning_rate": 0.00010380585744408065, | |
| "loss": 0.4044, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.4917617237008872, | |
| "grad_norm": 0.05485018342733383, | |
| "learning_rate": 0.0001036056384212142, | |
| "loss": 0.4913, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.4923954372623574, | |
| "grad_norm": 0.045921441167593, | |
| "learning_rate": 0.0001034054049243115, | |
| "loss": 0.4713, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 0.49302915082382764, | |
| "grad_norm": 0.05987657979130745, | |
| "learning_rate": 0.00010320515775716555, | |
| "loss": 0.4339, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.49366286438529783, | |
| "grad_norm": 0.06263814866542816, | |
| "learning_rate": 0.00010300489772362416, | |
| "loss": 0.5853, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 0.49429657794676807, | |
| "grad_norm": 0.07110540568828583, | |
| "learning_rate": 0.0001028046256275869, | |
| "loss": 0.5899, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.49493029150823825, | |
| "grad_norm": 0.05008992552757263, | |
| "learning_rate": 0.00010260434227300171, | |
| "loss": 0.5061, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 0.4955640050697085, | |
| "grad_norm": 0.05329698696732521, | |
| "learning_rate": 0.00010240404846386168, | |
| "loss": 0.5073, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.49619771863117873, | |
| "grad_norm": 0.060529615730047226, | |
| "learning_rate": 0.000102203745004202, | |
| "loss": 0.5194, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 0.4968314321926489, | |
| "grad_norm": 0.05783366411924362, | |
| "learning_rate": 0.00010200343269809642, | |
| "loss": 0.5393, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.49746514575411915, | |
| "grad_norm": 0.05209111049771309, | |
| "learning_rate": 0.00010180311234965433, | |
| "loss": 0.4858, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.49809885931558934, | |
| "grad_norm": 0.05122411996126175, | |
| "learning_rate": 0.0001016027847630174, | |
| "loss": 0.4476, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.4987325728770596, | |
| "grad_norm": 0.06304119527339935, | |
| "learning_rate": 0.00010140245074235624, | |
| "loss": 0.5741, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 0.49936628643852976, | |
| "grad_norm": 0.09011054039001465, | |
| "learning_rate": 0.00010120211109186747, | |
| "loss": 0.3418, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 0.06214231252670288, | |
| "learning_rate": 0.00010100176661577015, | |
| "loss": 0.5186, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 0.5006337135614702, | |
| "grad_norm": 0.19616113603115082, | |
| "learning_rate": 0.00010080141811830277, | |
| "loss": 0.5121, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.5012674271229405, | |
| "grad_norm": 0.05623235926032066, | |
| "learning_rate": 0.00010060106640372, | |
| "loss": 0.4457, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 0.5019011406844106, | |
| "grad_norm": 0.06097716465592384, | |
| "learning_rate": 0.00010040071227628938, | |
| "loss": 0.4578, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.5025348542458808, | |
| "grad_norm": 0.042372945696115494, | |
| "learning_rate": 0.00010020035654028816, | |
| "loss": 0.3896, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 0.5031685678073511, | |
| "grad_norm": 0.05927233397960663, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6026, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.5038022813688213, | |
| "grad_norm": 0.06227416917681694, | |
| "learning_rate": 9.979964345971188e-05, | |
| "loss": 0.4366, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.5044359949302915, | |
| "grad_norm": 0.055778343230485916, | |
| "learning_rate": 9.959928772371061e-05, | |
| "loss": 0.4425, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.5050697084917617, | |
| "grad_norm": 0.04457565397024155, | |
| "learning_rate": 9.939893359628001e-05, | |
| "loss": 0.5326, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 0.5057034220532319, | |
| "grad_norm": 0.05732344835996628, | |
| "learning_rate": 9.919858188169724e-05, | |
| "loss": 0.5296, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.5063371356147022, | |
| "grad_norm": 0.04832519590854645, | |
| "learning_rate": 9.899823338422986e-05, | |
| "loss": 0.3992, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 0.5069708491761724, | |
| "grad_norm": 0.06504333764314651, | |
| "learning_rate": 9.879788890813255e-05, | |
| "loss": 0.3772, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.5076045627376425, | |
| "grad_norm": 0.05304650217294693, | |
| "learning_rate": 9.859754925764378e-05, | |
| "loss": 0.5455, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 0.5082382762991128, | |
| "grad_norm": 0.04738354682922363, | |
| "learning_rate": 9.839721523698264e-05, | |
| "loss": 0.4221, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.508871989860583, | |
| "grad_norm": 0.061429157853126526, | |
| "learning_rate": 9.819688765034568e-05, | |
| "loss": 0.5197, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 0.5095057034220533, | |
| "grad_norm": 0.04687187448143959, | |
| "learning_rate": 9.79965673019036e-05, | |
| "loss": 0.417, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.5101394169835235, | |
| "grad_norm": 0.05944183096289635, | |
| "learning_rate": 9.779625499579805e-05, | |
| "loss": 0.6043, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.5107731305449936, | |
| "grad_norm": 0.05007549747824669, | |
| "learning_rate": 9.75959515361383e-05, | |
| "loss": 0.5161, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.5114068441064639, | |
| "grad_norm": 0.0616040863096714, | |
| "learning_rate": 9.739565772699831e-05, | |
| "loss": 0.6219, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 0.5120405576679341, | |
| "grad_norm": 0.23154355585575104, | |
| "learning_rate": 9.719537437241312e-05, | |
| "loss": 0.4653, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.5126742712294043, | |
| "grad_norm": 0.08757317066192627, | |
| "learning_rate": 9.699510227637586e-05, | |
| "loss": 0.7004, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 0.5133079847908745, | |
| "grad_norm": 0.053165238350629807, | |
| "learning_rate": 9.679484224283449e-05, | |
| "loss": 0.5367, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.5139416983523447, | |
| "grad_norm": 0.05361173674464226, | |
| "learning_rate": 9.659459507568853e-05, | |
| "loss": 0.5044, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 0.514575411913815, | |
| "grad_norm": 0.0656973198056221, | |
| "learning_rate": 9.63943615787858e-05, | |
| "loss": 0.5785, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.5152091254752852, | |
| "grad_norm": 0.056508004665374756, | |
| "learning_rate": 9.619414255591937e-05, | |
| "loss": 0.505, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 0.5158428390367554, | |
| "grad_norm": 0.061718232929706573, | |
| "learning_rate": 9.599393881082408e-05, | |
| "loss": 0.5194, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.5164765525982256, | |
| "grad_norm": 0.055572785437107086, | |
| "learning_rate": 9.579375114717351e-05, | |
| "loss": 0.4633, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.5171102661596958, | |
| "grad_norm": 0.0603361539542675, | |
| "learning_rate": 9.559358036857663e-05, | |
| "loss": 0.4628, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.517743979721166, | |
| "grad_norm": 0.08223170042037964, | |
| "learning_rate": 9.53934272785747e-05, | |
| "loss": 0.4932, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 0.5183776932826363, | |
| "grad_norm": 0.05056726187467575, | |
| "learning_rate": 9.519329268063795e-05, | |
| "loss": 0.5267, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.5190114068441065, | |
| "grad_norm": 0.0726744681596756, | |
| "learning_rate": 9.499317737816229e-05, | |
| "loss": 0.5233, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 0.5196451204055766, | |
| "grad_norm": 0.06118292361497879, | |
| "learning_rate": 9.479308217446633e-05, | |
| "loss": 0.5627, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.5202788339670469, | |
| "grad_norm": 0.05231308937072754, | |
| "learning_rate": 9.459300787278785e-05, | |
| "loss": 0.5238, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 0.5209125475285171, | |
| "grad_norm": 0.0555204376578331, | |
| "learning_rate": 9.439295527628081e-05, | |
| "loss": 0.5648, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.5215462610899874, | |
| "grad_norm": 0.056751273572444916, | |
| "learning_rate": 9.419292518801205e-05, | |
| "loss": 0.6158, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 0.5221799746514575, | |
| "grad_norm": 0.055247753858566284, | |
| "learning_rate": 9.399291841095802e-05, | |
| "loss": 0.5938, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.5228136882129277, | |
| "grad_norm": 0.05264151841402054, | |
| "learning_rate": 9.379293574800154e-05, | |
| "loss": 0.4908, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.523447401774398, | |
| "grad_norm": 0.06633622944355011, | |
| "learning_rate": 9.359297800192872e-05, | |
| "loss": 0.4516, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.5240811153358682, | |
| "grad_norm": 0.06326263397932053, | |
| "learning_rate": 9.33930459754256e-05, | |
| "loss": 0.4583, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 0.5247148288973384, | |
| "grad_norm": 0.061470355838537216, | |
| "learning_rate": 9.319314047107504e-05, | |
| "loss": 0.5209, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.5253485424588086, | |
| "grad_norm": 0.048166628926992416, | |
| "learning_rate": 9.299326229135326e-05, | |
| "loss": 0.5184, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 0.5259822560202788, | |
| "grad_norm": 0.09853006154298782, | |
| "learning_rate": 9.279341223862705e-05, | |
| "loss": 0.5219, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.526615969581749, | |
| "grad_norm": 0.5687222480773926, | |
| "learning_rate": 9.259359111515006e-05, | |
| "loss": 0.4086, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 0.5272496831432193, | |
| "grad_norm": 0.05580870062112808, | |
| "learning_rate": 9.239379972305992e-05, | |
| "loss": 0.492, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.5278833967046895, | |
| "grad_norm": 0.05025511607527733, | |
| "learning_rate": 9.219403886437489e-05, | |
| "loss": 0.5146, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 0.5285171102661597, | |
| "grad_norm": 0.05787106603384018, | |
| "learning_rate": 9.199430934099068e-05, | |
| "loss": 0.5356, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.5291508238276299, | |
| "grad_norm": 0.06410747766494751, | |
| "learning_rate": 9.179461195467714e-05, | |
| "loss": 0.6312, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.5297845373891001, | |
| "grad_norm": 0.053113870322704315, | |
| "learning_rate": 9.159494750707526e-05, | |
| "loss": 0.4838, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.5304182509505704, | |
| "grad_norm": 0.06018316373229027, | |
| "learning_rate": 9.139531679969362e-05, | |
| "loss": 0.4631, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 0.5310519645120405, | |
| "grad_norm": 0.05416072905063629, | |
| "learning_rate": 9.119572063390549e-05, | |
| "loss": 0.4439, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.5316856780735107, | |
| "grad_norm": 0.08766517043113708, | |
| "learning_rate": 9.09961598109454e-05, | |
| "loss": 0.5445, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 0.532319391634981, | |
| "grad_norm": 0.0619327537715435, | |
| "learning_rate": 9.079663513190611e-05, | |
| "loss": 0.5428, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.5329531051964512, | |
| "grad_norm": 0.059881288558244705, | |
| "learning_rate": 9.059714739773516e-05, | |
| "loss": 0.513, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 0.5335868187579215, | |
| "grad_norm": 0.06464383006095886, | |
| "learning_rate": 9.039769740923183e-05, | |
| "loss": 0.4746, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.5342205323193916, | |
| "grad_norm": 0.054081957787275314, | |
| "learning_rate": 9.019828596704394e-05, | |
| "loss": 0.391, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 0.5348542458808618, | |
| "grad_norm": 0.07097287476062775, | |
| "learning_rate": 8.999891387166453e-05, | |
| "loss": 0.5668, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.5354879594423321, | |
| "grad_norm": 0.050909094512462616, | |
| "learning_rate": 8.979958192342862e-05, | |
| "loss": 0.5574, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.5361216730038023, | |
| "grad_norm": 0.0605645477771759, | |
| "learning_rate": 8.960029092251023e-05, | |
| "loss": 0.5608, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.5367553865652726, | |
| "grad_norm": 0.05807255208492279, | |
| "learning_rate": 8.940104166891885e-05, | |
| "loss": 0.5057, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 0.5373891001267427, | |
| "grad_norm": 0.05229676514863968, | |
| "learning_rate": 8.920183496249642e-05, | |
| "loss": 0.4968, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.5380228136882129, | |
| "grad_norm": 0.05831581726670265, | |
| "learning_rate": 8.900267160291416e-05, | |
| "loss": 0.421, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 0.5386565272496832, | |
| "grad_norm": 0.04102315753698349, | |
| "learning_rate": 8.880355238966923e-05, | |
| "loss": 0.4176, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.5392902408111534, | |
| "grad_norm": 0.04635517671704292, | |
| "learning_rate": 8.860447812208157e-05, | |
| "loss": 0.4623, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 0.5399239543726235, | |
| "grad_norm": 0.08849713206291199, | |
| "learning_rate": 8.840544959929065e-05, | |
| "loss": 0.6421, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.5405576679340938, | |
| "grad_norm": 0.07401357591152191, | |
| "learning_rate": 8.820646762025246e-05, | |
| "loss": 0.4958, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 0.541191381495564, | |
| "grad_norm": 0.07079368084669113, | |
| "learning_rate": 8.800753298373596e-05, | |
| "loss": 0.4828, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.5418250950570342, | |
| "grad_norm": 0.06453298032283783, | |
| "learning_rate": 8.780864648832022e-05, | |
| "loss": 0.6269, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.5424588086185045, | |
| "grad_norm": 0.05445917323231697, | |
| "learning_rate": 8.760980893239094e-05, | |
| "loss": 0.5873, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.5430925221799746, | |
| "grad_norm": 0.047000445425510406, | |
| "learning_rate": 8.741102111413748e-05, | |
| "loss": 0.4938, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 0.5437262357414449, | |
| "grad_norm": 0.06307143718004227, | |
| "learning_rate": 8.721228383154939e-05, | |
| "loss": 0.602, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.5443599493029151, | |
| "grad_norm": 0.046326130628585815, | |
| "learning_rate": 8.701359788241354e-05, | |
| "loss": 0.453, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 0.5449936628643853, | |
| "grad_norm": 0.05878138169646263, | |
| "learning_rate": 8.681496406431056e-05, | |
| "loss": 0.5619, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.5456273764258555, | |
| "grad_norm": 0.06828006356954575, | |
| "learning_rate": 8.66163831746119e-05, | |
| "loss": 0.4723, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 0.5462610899873257, | |
| "grad_norm": 0.062354519963264465, | |
| "learning_rate": 8.641785601047654e-05, | |
| "loss": 0.5345, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.5468948035487959, | |
| "grad_norm": 0.052326980978250504, | |
| "learning_rate": 8.621938336884781e-05, | |
| "loss": 0.5096, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 0.5475285171102662, | |
| "grad_norm": 0.09620847553014755, | |
| "learning_rate": 8.602096604645009e-05, | |
| "loss": 0.6523, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.5481622306717364, | |
| "grad_norm": 0.07187427580356598, | |
| "learning_rate": 8.58226048397857e-05, | |
| "loss": 0.5051, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.5487959442332065, | |
| "grad_norm": 0.058141518384218216, | |
| "learning_rate": 8.562430054513184e-05, | |
| "loss": 0.501, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.5494296577946768, | |
| "grad_norm": 0.037818700075149536, | |
| "learning_rate": 8.54260539585371e-05, | |
| "loss": 0.2518, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 0.550063371356147, | |
| "grad_norm": 0.04658188298344612, | |
| "learning_rate": 8.522786587581844e-05, | |
| "loss": 0.4531, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.5506970849176173, | |
| "grad_norm": 0.04527122154831886, | |
| "learning_rate": 8.502973709255804e-05, | |
| "loss": 0.4592, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 0.5513307984790875, | |
| "grad_norm": 0.05705267935991287, | |
| "learning_rate": 8.483166840409995e-05, | |
| "loss": 0.4575, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.5519645120405576, | |
| "grad_norm": 0.08155850321054459, | |
| "learning_rate": 8.463366060554698e-05, | |
| "loss": 0.5167, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 0.5525982256020279, | |
| "grad_norm": 0.07388201355934143, | |
| "learning_rate": 8.443571449175766e-05, | |
| "loss": 0.6817, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.5532319391634981, | |
| "grad_norm": 0.06419550627470016, | |
| "learning_rate": 8.423783085734268e-05, | |
| "loss": 0.5468, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 0.5538656527249683, | |
| "grad_norm": 0.05985475331544876, | |
| "learning_rate": 8.404001049666211e-05, | |
| "loss": 0.5247, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.5544993662864385, | |
| "grad_norm": 0.05610859394073486, | |
| "learning_rate": 8.384225420382185e-05, | |
| "loss": 0.5088, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.5551330798479087, | |
| "grad_norm": 0.5789166688919067, | |
| "learning_rate": 8.36445627726708e-05, | |
| "loss": 0.5744, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.555766793409379, | |
| "grad_norm": 0.05248624086380005, | |
| "learning_rate": 8.344693699679736e-05, | |
| "loss": 0.4797, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 0.5564005069708492, | |
| "grad_norm": 0.06693774461746216, | |
| "learning_rate": 8.324937766952638e-05, | |
| "loss": 0.5354, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.5570342205323194, | |
| "grad_norm": 0.058544524013996124, | |
| "learning_rate": 8.305188558391599e-05, | |
| "loss": 0.602, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 0.5576679340937896, | |
| "grad_norm": 0.05111921206116676, | |
| "learning_rate": 8.285446153275445e-05, | |
| "loss": 0.4541, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.5583016476552598, | |
| "grad_norm": 0.0569741316139698, | |
| "learning_rate": 8.265710630855677e-05, | |
| "loss": 0.5306, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 0.55893536121673, | |
| "grad_norm": 0.13403062522411346, | |
| "learning_rate": 8.245982070356185e-05, | |
| "loss": 0.56, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.5595690747782003, | |
| "grad_norm": 0.07512082904577255, | |
| "learning_rate": 8.226260550972895e-05, | |
| "loss": 0.5951, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 0.5602027883396705, | |
| "grad_norm": 0.046271927654743195, | |
| "learning_rate": 8.206546151873478e-05, | |
| "loss": 0.436, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.5608365019011406, | |
| "grad_norm": 0.05913880839943886, | |
| "learning_rate": 8.186838952197018e-05, | |
| "loss": 0.5116, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.5614702154626109, | |
| "grad_norm": 0.05060280114412308, | |
| "learning_rate": 8.167139031053705e-05, | |
| "loss": 0.5245, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.5621039290240811, | |
| "grad_norm": 0.0638653039932251, | |
| "learning_rate": 8.1474464675245e-05, | |
| "loss": 0.5099, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 0.5627376425855514, | |
| "grad_norm": 0.04928203299641609, | |
| "learning_rate": 8.127761340660835e-05, | |
| "loss": 0.3581, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.5633713561470215, | |
| "grad_norm": 0.04772525653243065, | |
| "learning_rate": 8.108083729484292e-05, | |
| "loss": 0.4432, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 0.5640050697084917, | |
| "grad_norm": 0.0834617018699646, | |
| "learning_rate": 8.08841371298628e-05, | |
| "loss": 0.6493, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.564638783269962, | |
| "grad_norm": 0.06321214139461517, | |
| "learning_rate": 8.068751370127712e-05, | |
| "loss": 0.4376, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 0.5652724968314322, | |
| "grad_norm": 0.07898563891649246, | |
| "learning_rate": 8.049096779838719e-05, | |
| "loss": 0.3803, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.5659062103929025, | |
| "grad_norm": 0.061078350991010666, | |
| "learning_rate": 8.029450021018287e-05, | |
| "loss": 0.4417, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 0.5665399239543726, | |
| "grad_norm": 0.05912580341100693, | |
| "learning_rate": 8.009811172533976e-05, | |
| "loss": 0.4558, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.5671736375158428, | |
| "grad_norm": 0.06853251159191132, | |
| "learning_rate": 7.990180313221596e-05, | |
| "loss": 0.4647, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.5678073510773131, | |
| "grad_norm": 0.13536880910396576, | |
| "learning_rate": 7.970557521884873e-05, | |
| "loss": 0.4849, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.5684410646387833, | |
| "grad_norm": 0.051422230899333954, | |
| "learning_rate": 7.950942877295155e-05, | |
| "loss": 0.5153, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 0.5690747782002535, | |
| "grad_norm": 0.05563550814986229, | |
| "learning_rate": 7.931336458191092e-05, | |
| "loss": 0.4608, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.5697084917617237, | |
| "grad_norm": 0.05387943610548973, | |
| "learning_rate": 7.911738343278304e-05, | |
| "loss": 0.308, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 0.5703422053231939, | |
| "grad_norm": 0.05549965053796768, | |
| "learning_rate": 7.892148611229075e-05, | |
| "loss": 0.477, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.5709759188846641, | |
| "grad_norm": 0.06661087274551392, | |
| "learning_rate": 7.872567340682045e-05, | |
| "loss": 0.5179, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 0.5716096324461344, | |
| "grad_norm": 0.06925564259290695, | |
| "learning_rate": 7.852994610241885e-05, | |
| "loss": 0.4785, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.5722433460076045, | |
| "grad_norm": 0.05441868305206299, | |
| "learning_rate": 7.833430498478988e-05, | |
| "loss": 0.5596, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 0.5728770595690748, | |
| "grad_norm": 0.04862716421484947, | |
| "learning_rate": 7.813875083929132e-05, | |
| "loss": 0.4659, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.573510773130545, | |
| "grad_norm": 0.07547637820243835, | |
| "learning_rate": 7.794328445093208e-05, | |
| "loss": 0.4485, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.5741444866920152, | |
| "grad_norm": 0.08132816851139069, | |
| "learning_rate": 7.774790660436858e-05, | |
| "loss": 0.6294, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.5747782002534855, | |
| "grad_norm": 0.06841199100017548, | |
| "learning_rate": 7.755261808390187e-05, | |
| "loss": 0.4667, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 0.5754119138149556, | |
| "grad_norm": 0.05556390807032585, | |
| "learning_rate": 7.735741967347445e-05, | |
| "loss": 0.5166, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.5760456273764258, | |
| "grad_norm": 0.07941378653049469, | |
| "learning_rate": 7.716231215666711e-05, | |
| "loss": 0.4368, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 0.5766793409378961, | |
| "grad_norm": 0.08058507740497589, | |
| "learning_rate": 7.696729631669564e-05, | |
| "loss": 0.6772, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.5773130544993663, | |
| "grad_norm": 0.06999081373214722, | |
| "learning_rate": 7.6772372936408e-05, | |
| "loss": 0.6374, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 0.5779467680608364, | |
| "grad_norm": 0.05269391089677811, | |
| "learning_rate": 7.657754279828083e-05, | |
| "loss": 0.3222, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.5785804816223067, | |
| "grad_norm": 0.059798724949359894, | |
| "learning_rate": 7.63828066844166e-05, | |
| "loss": 0.5354, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 0.5792141951837769, | |
| "grad_norm": 0.05695294961333275, | |
| "learning_rate": 7.618816537654018e-05, | |
| "loss": 0.4552, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.5798479087452472, | |
| "grad_norm": 0.07460351288318634, | |
| "learning_rate": 7.599361965599606e-05, | |
| "loss": 0.581, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.5804816223067174, | |
| "grad_norm": 0.04292193427681923, | |
| "learning_rate": 7.579917030374489e-05, | |
| "loss": 0.435, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.5811153358681875, | |
| "grad_norm": 0.05156205967068672, | |
| "learning_rate": 7.56048181003604e-05, | |
| "loss": 0.5231, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 0.5817490494296578, | |
| "grad_norm": 0.05971655622124672, | |
| "learning_rate": 7.541056382602657e-05, | |
| "loss": 0.5196, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 0.582382762991128, | |
| "grad_norm": 0.06214692071080208, | |
| "learning_rate": 7.521640826053404e-05, | |
| "loss": 0.5237, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 0.5830164765525983, | |
| "grad_norm": 0.05921977758407593, | |
| "learning_rate": 7.502235218327731e-05, | |
| "loss": 0.5444, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.5836501901140685, | |
| "grad_norm": 0.05885602533817291, | |
| "learning_rate": 7.482839637325153e-05, | |
| "loss": 0.4045, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 0.5842839036755386, | |
| "grad_norm": 0.05014495924115181, | |
| "learning_rate": 7.463454160904928e-05, | |
| "loss": 0.4261, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 0.5849176172370089, | |
| "grad_norm": 0.07014278322458267, | |
| "learning_rate": 7.444078866885753e-05, | |
| "loss": 0.5934, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 0.5855513307984791, | |
| "grad_norm": 0.04919711500406265, | |
| "learning_rate": 7.424713833045452e-05, | |
| "loss": 0.4819, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.5861850443599493, | |
| "grad_norm": 0.05253986269235611, | |
| "learning_rate": 7.405359137120662e-05, | |
| "loss": 0.5067, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.5868187579214195, | |
| "grad_norm": 0.05310770869255066, | |
| "learning_rate": 7.386014856806523e-05, | |
| "loss": 0.4878, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 0.5874524714828897, | |
| "grad_norm": 0.0604504756629467, | |
| "learning_rate": 7.366681069756352e-05, | |
| "loss": 0.3944, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 0.5880861850443599, | |
| "grad_norm": 0.042067963629961014, | |
| "learning_rate": 7.347357853581361e-05, | |
| "loss": 0.412, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.5887198986058302, | |
| "grad_norm": 0.04595714807510376, | |
| "learning_rate": 7.328045285850313e-05, | |
| "loss": 0.4234, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 0.5893536121673004, | |
| "grad_norm": 0.05038761347532272, | |
| "learning_rate": 7.308743444089232e-05, | |
| "loss": 0.5915, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.5899873257287706, | |
| "grad_norm": 0.061250437051057816, | |
| "learning_rate": 7.289452405781084e-05, | |
| "loss": 0.6433, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 0.5906210392902408, | |
| "grad_norm": 0.07605701684951782, | |
| "learning_rate": 7.270172248365468e-05, | |
| "loss": 0.6252, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 0.591254752851711, | |
| "grad_norm": 0.05717351287603378, | |
| "learning_rate": 7.250903049238297e-05, | |
| "loss": 0.4693, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 0.5918884664131813, | |
| "grad_norm": 0.05955088511109352, | |
| "learning_rate": 7.231644885751507e-05, | |
| "loss": 0.5883, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 0.5925221799746515, | |
| "grad_norm": 0.06226349249482155, | |
| "learning_rate": 7.212397835212722e-05, | |
| "loss": 0.4226, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.5931558935361216, | |
| "grad_norm": 0.062126316130161285, | |
| "learning_rate": 7.193161974884964e-05, | |
| "loss": 0.568, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 0.5937896070975919, | |
| "grad_norm": 0.08957802504301071, | |
| "learning_rate": 7.173937381986323e-05, | |
| "loss": 0.5132, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 0.5944233206590621, | |
| "grad_norm": 0.06909901648759842, | |
| "learning_rate": 7.154724133689677e-05, | |
| "loss": 0.5055, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 0.5950570342205324, | |
| "grad_norm": 0.0510685071349144, | |
| "learning_rate": 7.135522307122346e-05, | |
| "loss": 0.5349, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 0.5956907477820025, | |
| "grad_norm": 0.05713349208235741, | |
| "learning_rate": 7.116331979365805e-05, | |
| "loss": 0.4435, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.5963244613434727, | |
| "grad_norm": 0.05836547538638115, | |
| "learning_rate": 7.097153227455379e-05, | |
| "loss": 0.4525, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 0.596958174904943, | |
| "grad_norm": 0.058628011494874954, | |
| "learning_rate": 7.077986128379908e-05, | |
| "loss": 0.3689, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 0.5975918884664132, | |
| "grad_norm": 0.05638744682073593, | |
| "learning_rate": 7.058830759081464e-05, | |
| "loss": 0.4296, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 0.5982256020278834, | |
| "grad_norm": 0.04396173730492592, | |
| "learning_rate": 7.039687196455042e-05, | |
| "loss": 0.4846, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 0.5988593155893536, | |
| "grad_norm": 0.051896654069423676, | |
| "learning_rate": 7.02055551734822e-05, | |
| "loss": 0.5216, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.5994930291508238, | |
| "grad_norm": 0.07102696597576141, | |
| "learning_rate": 7.001435798560883e-05, | |
| "loss": 0.5707, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 0.600126742712294, | |
| "grad_norm": 0.06377355009317398, | |
| "learning_rate": 6.982328116844912e-05, | |
| "loss": 0.4078, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 0.6007604562737643, | |
| "grad_norm": 0.05575268715620041, | |
| "learning_rate": 6.963232548903853e-05, | |
| "loss": 0.5136, | |
| "step": 948 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1578, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 158, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.3031479668589462e+19, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |