| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 563148, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0026635982015384943, | |
| "grad_norm": 0.2271278351545334, | |
| "learning_rate": 0.0001996, | |
| "loss": 8.7148, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.005327196403076989, | |
| "grad_norm": 0.448383092880249, | |
| "learning_rate": 0.0003996, | |
| "loss": 7.4094, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.007990794604615483, | |
| "grad_norm": 0.46370673179626465, | |
| "learning_rate": 0.0005996, | |
| "loss": 7.1049, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.010654392806153977, | |
| "grad_norm": 0.7845134735107422, | |
| "learning_rate": 0.0007996, | |
| "loss": 6.8619, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.013317991007692471, | |
| "grad_norm": 0.7677924036979675, | |
| "learning_rate": 0.0009996, | |
| "loss": 6.7206, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.015981589209230967, | |
| "grad_norm": 0.7272828817367554, | |
| "learning_rate": 0.0009991099584766199, | |
| "loss": 6.6171, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.01864518741076946, | |
| "grad_norm": 0.7266383171081543, | |
| "learning_rate": 0.0009982181333028923, | |
| "loss": 6.4961, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.021308785612307955, | |
| "grad_norm": 0.8149316310882568, | |
| "learning_rate": 0.0009973263081291647, | |
| "loss": 6.3995, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.02397238381384645, | |
| "grad_norm": 0.8527867794036865, | |
| "learning_rate": 0.0009964344829554372, | |
| "loss": 6.3342, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.026635982015384942, | |
| "grad_norm": 1.2359241247177124, | |
| "learning_rate": 0.0009955444414320573, | |
| "loss": 6.2305, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.029299580216923436, | |
| "grad_norm": 1.1131370067596436, | |
| "learning_rate": 0.0009946526162583297, | |
| "loss": 6.0731, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.031963178418461934, | |
| "grad_norm": 1.185133457183838, | |
| "learning_rate": 0.0009937607910846021, | |
| "loss": 5.9349, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.034626776620000424, | |
| "grad_norm": 1.201166033744812, | |
| "learning_rate": 0.0009928689659108746, | |
| "loss": 5.7587, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.03729037482153892, | |
| "grad_norm": 1.2446848154067993, | |
| "learning_rate": 0.0009919789243874944, | |
| "loss": 5.6453, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.03995397302307741, | |
| "grad_norm": 1.2813904285430908, | |
| "learning_rate": 0.0009910870992137668, | |
| "loss": 5.5547, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.04261757122461591, | |
| "grad_norm": 0.9883731007575989, | |
| "learning_rate": 0.0009901952740400395, | |
| "loss": 5.3078, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.045281169426154406, | |
| "grad_norm": 0.9527985453605652, | |
| "learning_rate": 0.000989303448866312, | |
| "loss": 5.1301, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.0479447676276929, | |
| "grad_norm": 0.9772309064865112, | |
| "learning_rate": 0.0009884134073429318, | |
| "loss": 5.0381, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.050608365829231394, | |
| "grad_norm": 1.0352524518966675, | |
| "learning_rate": 0.0009875215821692042, | |
| "loss": 4.9814, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.053271964030769885, | |
| "grad_norm": 0.8517736196517944, | |
| "learning_rate": 0.0009866297569954767, | |
| "loss": 4.9238, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.05593556223230838, | |
| "grad_norm": 0.9034407138824463, | |
| "learning_rate": 0.000985737931821749, | |
| "loss": 4.8745, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.05859916043384687, | |
| "grad_norm": 0.8332895636558533, | |
| "learning_rate": 0.0009848461066480215, | |
| "loss": 4.845, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.06126275863538537, | |
| "grad_norm": 0.8637209534645081, | |
| "learning_rate": 0.0009839560651246416, | |
| "loss": 4.8014, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.06392635683692387, | |
| "grad_norm": 0.8696839213371277, | |
| "learning_rate": 0.000983064239950914, | |
| "loss": 4.7803, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.06658995503846236, | |
| "grad_norm": 0.8878291249275208, | |
| "learning_rate": 0.0009821724147771865, | |
| "loss": 4.7629, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.06925355324000085, | |
| "grad_norm": 0.8268778324127197, | |
| "learning_rate": 0.000981280589603459, | |
| "loss": 4.7312, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.07191715144153935, | |
| "grad_norm": 0.884635329246521, | |
| "learning_rate": 0.0009803887644297313, | |
| "loss": 4.7146, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.07458074964307784, | |
| "grad_norm": 0.7639057636260986, | |
| "learning_rate": 0.0009794969392560038, | |
| "loss": 4.6961, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.07724434784461634, | |
| "grad_norm": 0.8192263245582581, | |
| "learning_rate": 0.0009786051140822762, | |
| "loss": 4.6766, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.07990794604615482, | |
| "grad_norm": 0.8075643181800842, | |
| "learning_rate": 0.0009777132889085486, | |
| "loss": 4.6582, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.08257154424769332, | |
| "grad_norm": 0.7193809151649475, | |
| "learning_rate": 0.0009768232473851685, | |
| "loss": 4.655, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.08523514244923182, | |
| "grad_norm": 0.8761749267578125, | |
| "learning_rate": 0.000975931422211441, | |
| "loss": 4.6378, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.08789874065077032, | |
| "grad_norm": 0.8616175055503845, | |
| "learning_rate": 0.0009750395970377135, | |
| "loss": 4.6265, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.09056233885230881, | |
| "grad_norm": 0.8099841475486755, | |
| "learning_rate": 0.000974147771863986, | |
| "loss": 4.6079, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.0932259370538473, | |
| "grad_norm": 0.811244010925293, | |
| "learning_rate": 0.000973257730340606, | |
| "loss": 4.5949, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.0958895352553858, | |
| "grad_norm": 0.8826119303703308, | |
| "learning_rate": 0.0009723659051668784, | |
| "loss": 4.589, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.09855313345692429, | |
| "grad_norm": 0.8135235905647278, | |
| "learning_rate": 0.0009714740799931508, | |
| "loss": 4.5715, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.10121673165846279, | |
| "grad_norm": 0.8390595316886902, | |
| "learning_rate": 0.0009705822548194233, | |
| "loss": 4.5581, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.10388032986000127, | |
| "grad_norm": 0.7602077126502991, | |
| "learning_rate": 0.0009696922132960431, | |
| "loss": 4.5527, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.10654392806153977, | |
| "grad_norm": 0.8945237994194031, | |
| "learning_rate": 0.0009688003881223157, | |
| "loss": 4.5301, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.10920752626307827, | |
| "grad_norm": 0.6963039040565491, | |
| "learning_rate": 0.0009679085629485881, | |
| "loss": 4.5186, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.11187112446461676, | |
| "grad_norm": 0.7871098518371582, | |
| "learning_rate": 0.0009670167377748605, | |
| "loss": 4.5069, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.11453472266615526, | |
| "grad_norm": 0.7853402495384216, | |
| "learning_rate": 0.000966124912601133, | |
| "loss": 4.4966, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.11719832086769374, | |
| "grad_norm": 0.7557271718978882, | |
| "learning_rate": 0.0009652348710777528, | |
| "loss": 4.4857, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.11986191906923224, | |
| "grad_norm": 0.7256771326065063, | |
| "learning_rate": 0.0009643430459040254, | |
| "loss": 4.4756, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.12252551727077074, | |
| "grad_norm": 0.7980550527572632, | |
| "learning_rate": 0.0009634512207302978, | |
| "loss": 4.4726, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.12518911547230924, | |
| "grad_norm": 0.7480477690696716, | |
| "learning_rate": 0.0009625593955565702, | |
| "loss": 4.4558, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.12785271367384773, | |
| "grad_norm": 0.7309882044792175, | |
| "learning_rate": 0.0009616675703828427, | |
| "loss": 4.4546, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.13051631187538623, | |
| "grad_norm": 0.8072414398193359, | |
| "learning_rate": 0.0009607775288594626, | |
| "loss": 4.4408, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.13317991007692473, | |
| "grad_norm": 0.7929727435112, | |
| "learning_rate": 0.0009598857036857352, | |
| "loss": 4.4436, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.1358435082784632, | |
| "grad_norm": 0.7073729038238525, | |
| "learning_rate": 0.0009589938785120076, | |
| "loss": 4.4261, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.1385071064800017, | |
| "grad_norm": 0.7210267782211304, | |
| "learning_rate": 0.00095810205333828, | |
| "loss": 4.425, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.1411707046815402, | |
| "grad_norm": 0.6783360838890076, | |
| "learning_rate": 0.0009572102281645525, | |
| "loss": 4.4123, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.1438343028830787, | |
| "grad_norm": 0.7039027214050293, | |
| "learning_rate": 0.0009563184029908249, | |
| "loss": 4.414, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.1464979010846172, | |
| "grad_norm": 0.7899590730667114, | |
| "learning_rate": 0.0009554265778170974, | |
| "loss": 4.3951, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.14916149928615569, | |
| "grad_norm": 0.7651330828666687, | |
| "learning_rate": 0.0009545347526433699, | |
| "loss": 4.3997, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.15182509748769418, | |
| "grad_norm": 0.8091022372245789, | |
| "learning_rate": 0.0009536447111199897, | |
| "loss": 4.3865, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 0.15448869568923268, | |
| "grad_norm": 0.7238765954971313, | |
| "learning_rate": 0.0009527528859462622, | |
| "loss": 4.3845, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.15715229389077118, | |
| "grad_norm": 0.7803590893745422, | |
| "learning_rate": 0.0009518610607725346, | |
| "loss": 4.3805, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 0.15981589209230965, | |
| "grad_norm": 0.778491735458374, | |
| "learning_rate": 0.0009509692355988071, | |
| "loss": 4.3794, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.16247949029384814, | |
| "grad_norm": 0.7399048209190369, | |
| "learning_rate": 0.000950079194075427, | |
| "loss": 4.3795, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 0.16514308849538664, | |
| "grad_norm": 0.7823745012283325, | |
| "learning_rate": 0.0009491873689016994, | |
| "loss": 4.3782, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.16780668669692514, | |
| "grad_norm": 0.7693122029304504, | |
| "learning_rate": 0.0009482955437279719, | |
| "loss": 4.3612, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 0.17047028489846364, | |
| "grad_norm": 0.7326549887657166, | |
| "learning_rate": 0.0009474037185542443, | |
| "loss": 4.3658, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.17313388310000213, | |
| "grad_norm": 0.6827363967895508, | |
| "learning_rate": 0.0009465136770308644, | |
| "loss": 4.3621, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 0.17579748130154063, | |
| "grad_norm": 0.7000982761383057, | |
| "learning_rate": 0.0009456218518571368, | |
| "loss": 4.3566, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.17846107950307913, | |
| "grad_norm": 0.7949216365814209, | |
| "learning_rate": 0.0009447300266834092, | |
| "loss": 4.349, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 0.18112467770461763, | |
| "grad_norm": 0.7766338586807251, | |
| "learning_rate": 0.0009438382015096817, | |
| "loss": 4.3564, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.1837882759061561, | |
| "grad_norm": 0.7235038876533508, | |
| "learning_rate": 0.0009429481599863015, | |
| "loss": 4.3434, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 0.1864518741076946, | |
| "grad_norm": 0.7254591584205627, | |
| "learning_rate": 0.0009420563348125741, | |
| "loss": 4.3352, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.1891154723092331, | |
| "grad_norm": 0.6868504285812378, | |
| "learning_rate": 0.0009411645096388465, | |
| "loss": 4.34, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 0.1917790705107716, | |
| "grad_norm": 0.7674193978309631, | |
| "learning_rate": 0.0009402726844651189, | |
| "loss": 4.3333, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.19444266871231008, | |
| "grad_norm": 0.778035581111908, | |
| "learning_rate": 0.0009393826429417389, | |
| "loss": 4.3314, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 0.19710626691384858, | |
| "grad_norm": 0.7400960922241211, | |
| "learning_rate": 0.0009384908177680113, | |
| "loss": 4.3319, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.19976986511538708, | |
| "grad_norm": 0.7500663995742798, | |
| "learning_rate": 0.0009375989925942838, | |
| "loss": 4.328, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 0.20243346331692558, | |
| "grad_norm": 0.683749794960022, | |
| "learning_rate": 0.0009367071674205563, | |
| "loss": 4.3268, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.20509706151846407, | |
| "grad_norm": 0.7642583250999451, | |
| "learning_rate": 0.0009358171258971762, | |
| "loss": 4.3269, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 0.20776065972000254, | |
| "grad_norm": 0.6992856860160828, | |
| "learning_rate": 0.0009349253007234486, | |
| "loss": 4.3218, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.21042425792154104, | |
| "grad_norm": 0.7553698420524597, | |
| "learning_rate": 0.000934033475549721, | |
| "loss": 4.3209, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 0.21308785612307954, | |
| "grad_norm": 0.6873403787612915, | |
| "learning_rate": 0.0009331416503759935, | |
| "loss": 4.3157, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.21575145432461804, | |
| "grad_norm": 0.7638967633247375, | |
| "learning_rate": 0.0009322516088526134, | |
| "loss": 4.3163, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 0.21841505252615653, | |
| "grad_norm": 0.6896612048149109, | |
| "learning_rate": 0.0009313597836788859, | |
| "loss": 4.3123, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 0.22107865072769503, | |
| "grad_norm": 0.7294336557388306, | |
| "learning_rate": 0.0009304679585051583, | |
| "loss": 4.3142, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 0.22374224892923353, | |
| "grad_norm": 0.7498676776885986, | |
| "learning_rate": 0.0009295761333314307, | |
| "loss": 4.3038, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 0.22640584713077203, | |
| "grad_norm": 0.7050178647041321, | |
| "learning_rate": 0.0009286860918080507, | |
| "loss": 4.2978, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 0.22906944533231052, | |
| "grad_norm": 0.7527032494544983, | |
| "learning_rate": 0.0009277942666343233, | |
| "loss": 4.3067, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 0.231733043533849, | |
| "grad_norm": 0.6919755935668945, | |
| "learning_rate": 0.0009269024414605957, | |
| "loss": 4.295, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 0.2343966417353875, | |
| "grad_norm": 0.7255104184150696, | |
| "learning_rate": 0.0009260106162868681, | |
| "loss": 4.2946, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.237060239936926, | |
| "grad_norm": 0.6978445649147034, | |
| "learning_rate": 0.000925120574763488, | |
| "loss": 4.2937, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 0.23972383813846448, | |
| "grad_norm": 0.7008663415908813, | |
| "learning_rate": 0.0009242287495897604, | |
| "loss": 4.2974, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.24238743634000298, | |
| "grad_norm": 0.704937756061554, | |
| "learning_rate": 0.000923336924416033, | |
| "loss": 4.2857, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 0.24505103454154148, | |
| "grad_norm": 0.7343337535858154, | |
| "learning_rate": 0.0009224450992423054, | |
| "loss": 4.2891, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 0.24771463274307998, | |
| "grad_norm": 0.7263538241386414, | |
| "learning_rate": 0.0009215550577189252, | |
| "loss": 4.2895, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 0.2503782309446185, | |
| "grad_norm": 0.7095937728881836, | |
| "learning_rate": 0.0009206632325451977, | |
| "loss": 4.2853, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 0.25304182914615697, | |
| "grad_norm": 0.7221779823303223, | |
| "learning_rate": 0.0009197714073714701, | |
| "loss": 4.2858, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 0.25570542734769547, | |
| "grad_norm": 0.7522983551025391, | |
| "learning_rate": 0.0009188795821977425, | |
| "loss": 4.2795, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 0.25836902554923397, | |
| "grad_norm": 0.7212731838226318, | |
| "learning_rate": 0.0009179895406743626, | |
| "loss": 4.2749, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 0.26103262375077246, | |
| "grad_norm": 0.75824373960495, | |
| "learning_rate": 0.000917097715500635, | |
| "loss": 4.2738, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 0.26369622195231096, | |
| "grad_norm": 0.7861409783363342, | |
| "learning_rate": 0.0009162058903269075, | |
| "loss": 4.2781, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 0.26635982015384946, | |
| "grad_norm": 0.7585176229476929, | |
| "learning_rate": 0.0009153140651531799, | |
| "loss": 4.2742, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.2690234183553879, | |
| "grad_norm": 0.7468889951705933, | |
| "learning_rate": 0.0009144240236297998, | |
| "loss": 4.2779, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 0.2716870165569264, | |
| "grad_norm": 0.7378383278846741, | |
| "learning_rate": 0.0009135321984560723, | |
| "loss": 4.2724, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 0.2743506147584649, | |
| "grad_norm": 0.6867294907569885, | |
| "learning_rate": 0.0009126403732823447, | |
| "loss": 4.2753, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 0.2770142129600034, | |
| "grad_norm": 0.6850928068161011, | |
| "learning_rate": 0.0009117485481086172, | |
| "loss": 4.2718, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 0.2796778111615419, | |
| "grad_norm": 0.7450153827667236, | |
| "learning_rate": 0.000910858506585237, | |
| "loss": 4.2711, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 0.2823414093630804, | |
| "grad_norm": 0.7175604104995728, | |
| "learning_rate": 0.0009099666814115095, | |
| "loss": 4.2636, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 0.2850050075646189, | |
| "grad_norm": 0.7004239559173584, | |
| "learning_rate": 0.000909074856237782, | |
| "loss": 4.273, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 0.2876686057661574, | |
| "grad_norm": 0.7755109667778015, | |
| "learning_rate": 0.0009081830310640544, | |
| "loss": 4.262, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 0.2903322039676959, | |
| "grad_norm": 0.7420957684516907, | |
| "learning_rate": 0.0009072929895406744, | |
| "loss": 4.2703, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 0.2929958021692344, | |
| "grad_norm": 0.7163523435592651, | |
| "learning_rate": 0.0009064011643669468, | |
| "loss": 4.265, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 0.2956594003707729, | |
| "grad_norm": 0.7003483176231384, | |
| "learning_rate": 0.0009055093391932193, | |
| "loss": 4.2529, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 0.29832299857231137, | |
| "grad_norm": 0.7118489742279053, | |
| "learning_rate": 0.0009046175140194918, | |
| "loss": 4.2556, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 0.30098659677384987, | |
| "grad_norm": 0.7034066319465637, | |
| "learning_rate": 0.0009037274724961117, | |
| "loss": 4.2547, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 0.30365019497538837, | |
| "grad_norm": 0.6700213551521301, | |
| "learning_rate": 0.0009028356473223841, | |
| "loss": 4.2561, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 0.30631379317692686, | |
| "grad_norm": 0.738164484500885, | |
| "learning_rate": 0.0009019438221486565, | |
| "loss": 4.26, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 0.30897739137846536, | |
| "grad_norm": 0.7396353483200073, | |
| "learning_rate": 0.000901051996974929, | |
| "loss": 4.2562, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 0.31164098958000386, | |
| "grad_norm": 0.7478146553039551, | |
| "learning_rate": 0.0009001619554515488, | |
| "loss": 4.25, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 0.31430458778154235, | |
| "grad_norm": 0.7298335433006287, | |
| "learning_rate": 0.0008992701302778215, | |
| "loss": 4.2562, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 0.3169681859830808, | |
| "grad_norm": 0.7685016989707947, | |
| "learning_rate": 0.0008983783051040939, | |
| "loss": 4.2551, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 0.3196317841846193, | |
| "grad_norm": 0.8017458915710449, | |
| "learning_rate": 0.0008974864799303664, | |
| "loss": 4.2481, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.3222953823861578, | |
| "grad_norm": 0.7588088512420654, | |
| "learning_rate": 0.0008965964384069862, | |
| "loss": 4.2537, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 0.3249589805876963, | |
| "grad_norm": 0.7897168397903442, | |
| "learning_rate": 0.0008957046132332586, | |
| "loss": 4.2427, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 0.3276225787892348, | |
| "grad_norm": 0.7311574220657349, | |
| "learning_rate": 0.0008948127880595312, | |
| "loss": 4.2518, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 0.3302861769907733, | |
| "grad_norm": 0.7892371416091919, | |
| "learning_rate": 0.0008939209628858036, | |
| "loss": 4.234, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 0.3329497751923118, | |
| "grad_norm": 0.6944438815116882, | |
| "learning_rate": 0.0008930309213624235, | |
| "loss": 4.2382, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 0.3356133733938503, | |
| "grad_norm": 0.7701837420463562, | |
| "learning_rate": 0.0008921390961886959, | |
| "loss": 4.2474, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 0.3382769715953888, | |
| "grad_norm": 0.7789635062217712, | |
| "learning_rate": 0.0008912472710149683, | |
| "loss": 4.2379, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 0.3409405697969273, | |
| "grad_norm": 0.7212055921554565, | |
| "learning_rate": 0.0008903554458412409, | |
| "loss": 4.2407, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 0.34360416799846577, | |
| "grad_norm": 0.7439520359039307, | |
| "learning_rate": 0.0008894654043178609, | |
| "loss": 4.2386, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 0.34626776620000427, | |
| "grad_norm": 0.6747229695320129, | |
| "learning_rate": 0.0008885735791441333, | |
| "loss": 4.2391, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 0.34893136440154277, | |
| "grad_norm": 0.7761566638946533, | |
| "learning_rate": 0.0008876817539704057, | |
| "loss": 4.2337, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 0.35159496260308126, | |
| "grad_norm": 0.7024859189987183, | |
| "learning_rate": 0.0008867899287966782, | |
| "loss": 4.2299, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 0.35425856080461976, | |
| "grad_norm": 0.7179946303367615, | |
| "learning_rate": 0.000885899887273298, | |
| "loss": 4.2379, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 0.35692215900615826, | |
| "grad_norm": 0.699834942817688, | |
| "learning_rate": 0.0008850080620995706, | |
| "loss": 4.2321, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 0.35958575720769675, | |
| "grad_norm": 0.6902332901954651, | |
| "learning_rate": 0.000884116236925843, | |
| "loss": 4.2376, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 0.36224935540923525, | |
| "grad_norm": 0.7003384232521057, | |
| "learning_rate": 0.0008832244117521154, | |
| "loss": 4.2261, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 0.36491295361077375, | |
| "grad_norm": 0.7879477739334106, | |
| "learning_rate": 0.0008823343702287353, | |
| "loss": 4.2292, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 0.3675765518123122, | |
| "grad_norm": 0.6793246269226074, | |
| "learning_rate": 0.0008814425450550077, | |
| "loss": 4.2342, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 0.3702401500138507, | |
| "grad_norm": 0.7284209728240967, | |
| "learning_rate": 0.0008805507198812803, | |
| "loss": 4.2276, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 0.3729037482153892, | |
| "grad_norm": 0.7192456722259521, | |
| "learning_rate": 0.0008796588947075527, | |
| "loss": 4.2248, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 0.3755673464169277, | |
| "grad_norm": 0.7695698738098145, | |
| "learning_rate": 0.0008787688531841727, | |
| "loss": 4.2276, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 0.3782309446184662, | |
| "grad_norm": 0.740368664264679, | |
| "learning_rate": 0.0008778770280104451, | |
| "loss": 4.2286, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 0.3808945428200047, | |
| "grad_norm": 0.7393242716789246, | |
| "learning_rate": 0.0008769852028367175, | |
| "loss": 4.2239, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 0.3835581410215432, | |
| "grad_norm": 0.7269551157951355, | |
| "learning_rate": 0.0008760933776629901, | |
| "loss": 4.2196, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 0.3862217392230817, | |
| "grad_norm": 0.6773830056190491, | |
| "learning_rate": 0.0008752033361396099, | |
| "loss": 4.2283, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 0.38888533742462017, | |
| "grad_norm": 0.7091046571731567, | |
| "learning_rate": 0.0008743115109658824, | |
| "loss": 4.2252, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 0.39154893562615867, | |
| "grad_norm": 0.7202826738357544, | |
| "learning_rate": 0.0008734196857921548, | |
| "loss": 4.2102, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 0.39421253382769716, | |
| "grad_norm": 0.6965381503105164, | |
| "learning_rate": 0.0008725278606184272, | |
| "loss": 4.222, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 0.39687613202923566, | |
| "grad_norm": 0.7711541652679443, | |
| "learning_rate": 0.0008716378190950471, | |
| "loss": 4.2138, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 0.39953973023077416, | |
| "grad_norm": 0.6982942223548889, | |
| "learning_rate": 0.0008707459939213196, | |
| "loss": 4.2209, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 0.40220332843231266, | |
| "grad_norm": 0.700356662273407, | |
| "learning_rate": 0.0008698541687475921, | |
| "loss": 4.2153, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 0.40486692663385115, | |
| "grad_norm": 0.7417271137237549, | |
| "learning_rate": 0.0008689623435738645, | |
| "loss": 4.216, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 0.40753052483538965, | |
| "grad_norm": 0.7237849235534668, | |
| "learning_rate": 0.0008680723020504845, | |
| "loss": 4.2172, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 0.41019412303692815, | |
| "grad_norm": 0.7940893769264221, | |
| "learning_rate": 0.0008671804768767569, | |
| "loss": 4.2224, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 0.41285772123846665, | |
| "grad_norm": 0.7201411724090576, | |
| "learning_rate": 0.0008662886517030294, | |
| "loss": 4.2203, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 0.4155213194400051, | |
| "grad_norm": 0.7360599637031555, | |
| "learning_rate": 0.0008653968265293019, | |
| "loss": 4.2208, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 0.4181849176415436, | |
| "grad_norm": 0.7827675938606262, | |
| "learning_rate": 0.0008645067850059217, | |
| "loss": 4.2095, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 0.4208485158430821, | |
| "grad_norm": 0.7322735786437988, | |
| "learning_rate": 0.0008636149598321942, | |
| "loss": 4.2085, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 0.4235121140446206, | |
| "grad_norm": 0.6896507740020752, | |
| "learning_rate": 0.0008627231346584666, | |
| "loss": 4.2045, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 0.4261757122461591, | |
| "grad_norm": 0.780642569065094, | |
| "learning_rate": 0.0008618313094847391, | |
| "loss": 4.2157, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 0.4288393104476976, | |
| "grad_norm": 0.717087984085083, | |
| "learning_rate": 0.000860941267961359, | |
| "loss": 4.208, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 0.43150290864923607, | |
| "grad_norm": 0.7145330309867859, | |
| "learning_rate": 0.0008600494427876314, | |
| "loss": 4.2128, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 0.43416650685077457, | |
| "grad_norm": 0.7336823344230652, | |
| "learning_rate": 0.0008591576176139039, | |
| "loss": 4.2124, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 0.43683010505231307, | |
| "grad_norm": 0.6869795322418213, | |
| "learning_rate": 0.0008582657924401764, | |
| "loss": 4.2103, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 0.43949370325385156, | |
| "grad_norm": 0.7188379168510437, | |
| "learning_rate": 0.0008573757509167964, | |
| "loss": 4.2084, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 0.44215730145539006, | |
| "grad_norm": 0.7271597981452942, | |
| "learning_rate": 0.0008564839257430688, | |
| "loss": 4.2087, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 0.44482089965692856, | |
| "grad_norm": 0.7935476303100586, | |
| "learning_rate": 0.0008555921005693412, | |
| "loss": 4.199, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 0.44748449785846706, | |
| "grad_norm": 0.732509195804596, | |
| "learning_rate": 0.0008547002753956137, | |
| "loss": 4.2014, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 0.45014809606000555, | |
| "grad_norm": 0.7381872534751892, | |
| "learning_rate": 0.0008538102338722335, | |
| "loss": 4.2078, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 0.45281169426154405, | |
| "grad_norm": 0.697894811630249, | |
| "learning_rate": 0.0008529184086985061, | |
| "loss": 4.1978, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 0.45547529246308255, | |
| "grad_norm": 0.715933084487915, | |
| "learning_rate": 0.0008520265835247785, | |
| "loss": 4.205, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 0.45813889066462105, | |
| "grad_norm": 0.7199248671531677, | |
| "learning_rate": 0.0008511347583510509, | |
| "loss": 4.201, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 0.46080248886615954, | |
| "grad_norm": 0.7358156442642212, | |
| "learning_rate": 0.0008502447168276709, | |
| "loss": 4.2025, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 0.463466087067698, | |
| "grad_norm": 0.8218105435371399, | |
| "learning_rate": 0.0008493528916539433, | |
| "loss": 4.2017, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 0.4661296852692365, | |
| "grad_norm": 0.77776700258255, | |
| "learning_rate": 0.0008484610664802158, | |
| "loss": 4.1905, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 0.468793283470775, | |
| "grad_norm": 0.6795767545700073, | |
| "learning_rate": 0.0008475692413064883, | |
| "loss": 4.1913, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 0.4714568816723135, | |
| "grad_norm": 0.7476922869682312, | |
| "learning_rate": 0.0008466791997831082, | |
| "loss": 4.1935, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 0.474120479873852, | |
| "grad_norm": 0.7420318722724915, | |
| "learning_rate": 0.0008457873746093806, | |
| "loss": 4.1989, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 0.47678407807539047, | |
| "grad_norm": 0.677543044090271, | |
| "learning_rate": 0.000844895549435653, | |
| "loss": 4.1921, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 0.47944767627692897, | |
| "grad_norm": 0.7159215211868286, | |
| "learning_rate": 0.0008440037242619255, | |
| "loss": 4.1935, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 0.48211127447846747, | |
| "grad_norm": 0.7259414792060852, | |
| "learning_rate": 0.0008431136827385454, | |
| "loss": 4.2041, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 0.48477487268000596, | |
| "grad_norm": 0.6838536262512207, | |
| "learning_rate": 0.0008422218575648179, | |
| "loss": 4.1954, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 0.48743847088154446, | |
| "grad_norm": 0.6978190541267395, | |
| "learning_rate": 0.0008413300323910903, | |
| "loss": 4.1944, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 0.49010206908308296, | |
| "grad_norm": 0.7434132695198059, | |
| "learning_rate": 0.0008404382072173627, | |
| "loss": 4.1932, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 0.49276566728462146, | |
| "grad_norm": 0.6992717981338501, | |
| "learning_rate": 0.0008395481656939827, | |
| "loss": 4.1963, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 0.49542926548615995, | |
| "grad_norm": 0.7276673316955566, | |
| "learning_rate": 0.0008386563405202552, | |
| "loss": 4.1967, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 0.49809286368769845, | |
| "grad_norm": 0.7243706583976746, | |
| "learning_rate": 0.0008377645153465277, | |
| "loss": 4.1938, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 0.500756461889237, | |
| "grad_norm": 0.7238306999206543, | |
| "learning_rate": 0.0008368726901728001, | |
| "loss": 4.1944, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 0.5034200600907754, | |
| "grad_norm": 0.7251293063163757, | |
| "learning_rate": 0.00083598264864942, | |
| "loss": 4.187, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 0.5060836582923139, | |
| "grad_norm": 0.6981387734413147, | |
| "learning_rate": 0.0008350908234756924, | |
| "loss": 4.1942, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 0.5087472564938524, | |
| "grad_norm": 0.7512865662574768, | |
| "learning_rate": 0.0008341989983019649, | |
| "loss": 4.1896, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 0.5114108546953909, | |
| "grad_norm": 0.76689213514328, | |
| "learning_rate": 0.0008333071731282374, | |
| "loss": 4.1895, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 0.5140744528969294, | |
| "grad_norm": 0.7794478535652161, | |
| "learning_rate": 0.0008324171316048572, | |
| "loss": 4.1877, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 0.5167380510984679, | |
| "grad_norm": 0.7624120712280273, | |
| "learning_rate": 0.0008315253064311297, | |
| "loss": 4.1905, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 0.5194016493000064, | |
| "grad_norm": 0.812703549861908, | |
| "learning_rate": 0.0008306334812574021, | |
| "loss": 4.1918, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 0.5220652475015449, | |
| "grad_norm": 0.7445054054260254, | |
| "learning_rate": 0.0008297416560836745, | |
| "loss": 4.1932, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 0.5247288457030834, | |
| "grad_norm": 0.6916468143463135, | |
| "learning_rate": 0.0008288498309099471, | |
| "loss": 4.1927, | |
| "step": 98500 | |
| }, | |
| { | |
| "epoch": 0.5273924439046219, | |
| "grad_norm": 0.7391178011894226, | |
| "learning_rate": 0.000827959789386567, | |
| "loss": 4.1822, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 0.5300560421061604, | |
| "grad_norm": 0.7245861887931824, | |
| "learning_rate": 0.0008270679642128395, | |
| "loss": 4.1897, | |
| "step": 99500 | |
| }, | |
| { | |
| "epoch": 0.5327196403076989, | |
| "grad_norm": 0.7156808376312256, | |
| "learning_rate": 0.0008261761390391119, | |
| "loss": 4.186, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 0.5353832385092374, | |
| "grad_norm": 0.7185246348381042, | |
| "learning_rate": 0.0008252843138653843, | |
| "loss": 4.182, | |
| "step": 100500 | |
| }, | |
| { | |
| "epoch": 0.5380468367107758, | |
| "grad_norm": 0.7230123281478882, | |
| "learning_rate": 0.0008243942723420043, | |
| "loss": 4.1888, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 0.5407104349123143, | |
| "grad_norm": 0.6807687282562256, | |
| "learning_rate": 0.0008235024471682767, | |
| "loss": 4.1757, | |
| "step": 101500 | |
| }, | |
| { | |
| "epoch": 0.5433740331138528, | |
| "grad_norm": 0.6942833065986633, | |
| "learning_rate": 0.0008226106219945492, | |
| "loss": 4.1818, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 0.5460376313153913, | |
| "grad_norm": 0.7553761601448059, | |
| "learning_rate": 0.0008217187968208216, | |
| "loss": 4.1876, | |
| "step": 102500 | |
| }, | |
| { | |
| "epoch": 0.5487012295169298, | |
| "grad_norm": 0.8295273184776306, | |
| "learning_rate": 0.0008208287552974415, | |
| "loss": 4.1763, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 0.5513648277184683, | |
| "grad_norm": 0.7182528972625732, | |
| "learning_rate": 0.000819936930123714, | |
| "loss": 4.1867, | |
| "step": 103500 | |
| }, | |
| { | |
| "epoch": 0.5540284259200068, | |
| "grad_norm": 0.7191228270530701, | |
| "learning_rate": 0.0008190451049499864, | |
| "loss": 4.1822, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 0.5566920241215453, | |
| "grad_norm": 0.7880285382270813, | |
| "learning_rate": 0.0008181532797762589, | |
| "loss": 4.178, | |
| "step": 104500 | |
| }, | |
| { | |
| "epoch": 0.5593556223230838, | |
| "grad_norm": 0.7537713050842285, | |
| "learning_rate": 0.0008172632382528788, | |
| "loss": 4.1865, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 0.5620192205246223, | |
| "grad_norm": 0.7707012891769409, | |
| "learning_rate": 0.0008163714130791513, | |
| "loss": 4.1847, | |
| "step": 105500 | |
| }, | |
| { | |
| "epoch": 0.5646828187261608, | |
| "grad_norm": 0.7433204054832458, | |
| "learning_rate": 0.0008154795879054238, | |
| "loss": 4.1778, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 0.5673464169276993, | |
| "grad_norm": 0.760553240776062, | |
| "learning_rate": 0.0008145877627316962, | |
| "loss": 4.1804, | |
| "step": 106500 | |
| }, | |
| { | |
| "epoch": 0.5700100151292378, | |
| "grad_norm": 0.744844913482666, | |
| "learning_rate": 0.0008136977212083161, | |
| "loss": 4.1809, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 0.5726736133307763, | |
| "grad_norm": 0.7252081036567688, | |
| "learning_rate": 0.0008128058960345885, | |
| "loss": 4.1731, | |
| "step": 107500 | |
| }, | |
| { | |
| "epoch": 0.5753372115323148, | |
| "grad_norm": 0.6822036504745483, | |
| "learning_rate": 0.000811914070860861, | |
| "loss": 4.1799, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 0.5780008097338533, | |
| "grad_norm": 0.7590454816818237, | |
| "learning_rate": 0.0008110222456871334, | |
| "loss": 4.1771, | |
| "step": 108500 | |
| }, | |
| { | |
| "epoch": 0.5806644079353918, | |
| "grad_norm": 0.7851970791816711, | |
| "learning_rate": 0.0008101322041637535, | |
| "loss": 4.1762, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 0.5833280061369303, | |
| "grad_norm": 0.7638763785362244, | |
| "learning_rate": 0.0008092403789900259, | |
| "loss": 4.1699, | |
| "step": 109500 | |
| }, | |
| { | |
| "epoch": 0.5859916043384688, | |
| "grad_norm": 0.7190741896629333, | |
| "learning_rate": 0.0008083485538162983, | |
| "loss": 4.181, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 0.5886552025400072, | |
| "grad_norm": 0.8082555532455444, | |
| "learning_rate": 0.0008074567286425708, | |
| "loss": 4.1711, | |
| "step": 110500 | |
| }, | |
| { | |
| "epoch": 0.5913188007415457, | |
| "grad_norm": 0.7326035499572754, | |
| "learning_rate": 0.0008065666871191906, | |
| "loss": 4.1743, | |
| "step": 111000 | |
| }, | |
| { | |
| "epoch": 0.5939823989430842, | |
| "grad_norm": 0.7412554621696472, | |
| "learning_rate": 0.0008056748619454632, | |
| "loss": 4.1761, | |
| "step": 111500 | |
| }, | |
| { | |
| "epoch": 0.5966459971446227, | |
| "grad_norm": 0.6986061930656433, | |
| "learning_rate": 0.0008047830367717356, | |
| "loss": 4.1788, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 0.5993095953461612, | |
| "grad_norm": 0.8155457973480225, | |
| "learning_rate": 0.000803891211598008, | |
| "loss": 4.1801, | |
| "step": 112500 | |
| }, | |
| { | |
| "epoch": 0.6019731935476997, | |
| "grad_norm": 0.7332949042320251, | |
| "learning_rate": 0.0008030011700746279, | |
| "loss": 4.1678, | |
| "step": 113000 | |
| }, | |
| { | |
| "epoch": 0.6046367917492382, | |
| "grad_norm": 0.8117866516113281, | |
| "learning_rate": 0.0008021093449009003, | |
| "loss": 4.1781, | |
| "step": 113500 | |
| }, | |
| { | |
| "epoch": 0.6073003899507767, | |
| "grad_norm": 0.7188646197319031, | |
| "learning_rate": 0.0008012175197271729, | |
| "loss": 4.1702, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 0.6099639881523152, | |
| "grad_norm": 0.7319905757904053, | |
| "learning_rate": 0.0008003256945534453, | |
| "loss": 4.1709, | |
| "step": 114500 | |
| }, | |
| { | |
| "epoch": 0.6126275863538537, | |
| "grad_norm": 0.7118169069290161, | |
| "learning_rate": 0.0007994356530300653, | |
| "loss": 4.1709, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 0.6152911845553922, | |
| "grad_norm": 0.7694860696792603, | |
| "learning_rate": 0.0007985438278563377, | |
| "loss": 4.1723, | |
| "step": 115500 | |
| }, | |
| { | |
| "epoch": 0.6179547827569307, | |
| "grad_norm": 0.7366968989372253, | |
| "learning_rate": 0.0007976520026826101, | |
| "loss": 4.1676, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 0.6206183809584692, | |
| "grad_norm": 0.7481387257575989, | |
| "learning_rate": 0.0007967601775088827, | |
| "loss": 4.1729, | |
| "step": 116500 | |
| }, | |
| { | |
| "epoch": 0.6232819791600077, | |
| "grad_norm": 0.7446570992469788, | |
| "learning_rate": 0.0007958701359855025, | |
| "loss": 4.1657, | |
| "step": 117000 | |
| }, | |
| { | |
| "epoch": 0.6259455773615462, | |
| "grad_norm": 0.7612956166267395, | |
| "learning_rate": 0.000794978310811775, | |
| "loss": 4.1685, | |
| "step": 117500 | |
| }, | |
| { | |
| "epoch": 0.6286091755630847, | |
| "grad_norm": 0.7427545189857483, | |
| "learning_rate": 0.0007940864856380474, | |
| "loss": 4.1685, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 0.6312727737646232, | |
| "grad_norm": 0.7789895534515381, | |
| "learning_rate": 0.0007931946604643198, | |
| "loss": 4.1726, | |
| "step": 118500 | |
| }, | |
| { | |
| "epoch": 0.6339363719661616, | |
| "grad_norm": 0.751118540763855, | |
| "learning_rate": 0.0007923046189409397, | |
| "loss": 4.1693, | |
| "step": 119000 | |
| }, | |
| { | |
| "epoch": 0.6365999701677001, | |
| "grad_norm": 0.8121469616889954, | |
| "learning_rate": 0.0007914127937672122, | |
| "loss": 4.1667, | |
| "step": 119500 | |
| }, | |
| { | |
| "epoch": 0.6392635683692386, | |
| "grad_norm": 0.7127716541290283, | |
| "learning_rate": 0.0007905209685934847, | |
| "loss": 4.1604, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 0.6419271665707771, | |
| "grad_norm": 0.7496224045753479, | |
| "learning_rate": 0.0007896291434197571, | |
| "loss": 4.1655, | |
| "step": 120500 | |
| }, | |
| { | |
| "epoch": 0.6445907647723156, | |
| "grad_norm": 0.7957298755645752, | |
| "learning_rate": 0.0007887391018963771, | |
| "loss": 4.1685, | |
| "step": 121000 | |
| }, | |
| { | |
| "epoch": 0.6472543629738541, | |
| "grad_norm": 0.708066463470459, | |
| "learning_rate": 0.0007878472767226495, | |
| "loss": 4.1684, | |
| "step": 121500 | |
| }, | |
| { | |
| "epoch": 0.6499179611753926, | |
| "grad_norm": 0.8204523324966431, | |
| "learning_rate": 0.000786955451548922, | |
| "loss": 4.1685, | |
| "step": 122000 | |
| }, | |
| { | |
| "epoch": 0.6525815593769311, | |
| "grad_norm": 0.7236646413803101, | |
| "learning_rate": 0.0007860636263751945, | |
| "loss": 4.1692, | |
| "step": 122500 | |
| }, | |
| { | |
| "epoch": 0.6552451575784696, | |
| "grad_norm": 0.7952857613563538, | |
| "learning_rate": 0.0007851735848518143, | |
| "loss": 4.1623, | |
| "step": 123000 | |
| }, | |
| { | |
| "epoch": 0.6579087557800081, | |
| "grad_norm": 0.7337407469749451, | |
| "learning_rate": 0.0007842817596780868, | |
| "loss": 4.1675, | |
| "step": 123500 | |
| }, | |
| { | |
| "epoch": 0.6605723539815466, | |
| "grad_norm": 0.740993082523346, | |
| "learning_rate": 0.0007833899345043592, | |
| "loss": 4.1643, | |
| "step": 124000 | |
| }, | |
| { | |
| "epoch": 0.6632359521830851, | |
| "grad_norm": 0.7212578654289246, | |
| "learning_rate": 0.0007824981093306317, | |
| "loss": 4.1656, | |
| "step": 124500 | |
| }, | |
| { | |
| "epoch": 0.6658995503846236, | |
| "grad_norm": 0.7532219886779785, | |
| "learning_rate": 0.0007816080678072516, | |
| "loss": 4.1682, | |
| "step": 125000 | |
| }, | |
| { | |
| "epoch": 0.6685631485861621, | |
| "grad_norm": 0.759222686290741, | |
| "learning_rate": 0.000780716242633524, | |
| "loss": 4.165, | |
| "step": 125500 | |
| }, | |
| { | |
| "epoch": 0.6712267467877006, | |
| "grad_norm": 0.7389349937438965, | |
| "learning_rate": 0.0007798244174597965, | |
| "loss": 4.1623, | |
| "step": 126000 | |
| }, | |
| { | |
| "epoch": 0.673890344989239, | |
| "grad_norm": 0.7558398246765137, | |
| "learning_rate": 0.0007789325922860689, | |
| "loss": 4.165, | |
| "step": 126500 | |
| }, | |
| { | |
| "epoch": 0.6765539431907776, | |
| "grad_norm": 0.778786838054657, | |
| "learning_rate": 0.0007780425507626889, | |
| "loss": 4.1636, | |
| "step": 127000 | |
| }, | |
| { | |
| "epoch": 0.679217541392316, | |
| "grad_norm": 0.7308077812194824, | |
| "learning_rate": 0.0007771507255889614, | |
| "loss": 4.1609, | |
| "step": 127500 | |
| }, | |
| { | |
| "epoch": 0.6818811395938545, | |
| "grad_norm": 0.7642717361450195, | |
| "learning_rate": 0.0007762589004152338, | |
| "loss": 4.1623, | |
| "step": 128000 | |
| }, | |
| { | |
| "epoch": 0.684544737795393, | |
| "grad_norm": 0.7278922200202942, | |
| "learning_rate": 0.0007753670752415063, | |
| "loss": 4.1636, | |
| "step": 128500 | |
| }, | |
| { | |
| "epoch": 0.6872083359969315, | |
| "grad_norm": 0.7422888278961182, | |
| "learning_rate": 0.0007744770337181261, | |
| "loss": 4.1542, | |
| "step": 129000 | |
| }, | |
| { | |
| "epoch": 0.68987193419847, | |
| "grad_norm": 0.7136949896812439, | |
| "learning_rate": 0.0007735852085443986, | |
| "loss": 4.1579, | |
| "step": 129500 | |
| }, | |
| { | |
| "epoch": 0.6925355324000085, | |
| "grad_norm": 0.7696181535720825, | |
| "learning_rate": 0.0007726933833706711, | |
| "loss": 4.1615, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 0.695199130601547, | |
| "grad_norm": 0.7375788688659668, | |
| "learning_rate": 0.0007718015581969435, | |
| "loss": 4.1625, | |
| "step": 130500 | |
| }, | |
| { | |
| "epoch": 0.6978627288030855, | |
| "grad_norm": 0.7175765037536621, | |
| "learning_rate": 0.0007709115166735635, | |
| "loss": 4.1562, | |
| "step": 131000 | |
| }, | |
| { | |
| "epoch": 0.700526327004624, | |
| "grad_norm": 0.7179591655731201, | |
| "learning_rate": 0.000770019691499836, | |
| "loss": 4.1604, | |
| "step": 131500 | |
| }, | |
| { | |
| "epoch": 0.7031899252061625, | |
| "grad_norm": 0.7693660259246826, | |
| "learning_rate": 0.0007691278663261084, | |
| "loss": 4.1623, | |
| "step": 132000 | |
| }, | |
| { | |
| "epoch": 0.705853523407701, | |
| "grad_norm": 0.7547662854194641, | |
| "learning_rate": 0.0007682360411523809, | |
| "loss": 4.1604, | |
| "step": 132500 | |
| }, | |
| { | |
| "epoch": 0.7085171216092395, | |
| "grad_norm": 0.7436234951019287, | |
| "learning_rate": 0.0007673459996290008, | |
| "loss": 4.159, | |
| "step": 133000 | |
| }, | |
| { | |
| "epoch": 0.711180719810778, | |
| "grad_norm": 0.7248745560646057, | |
| "learning_rate": 0.0007664541744552732, | |
| "loss": 4.155, | |
| "step": 133500 | |
| }, | |
| { | |
| "epoch": 0.7138443180123165, | |
| "grad_norm": 0.7338257431983948, | |
| "learning_rate": 0.0007655623492815456, | |
| "loss": 4.1573, | |
| "step": 134000 | |
| }, | |
| { | |
| "epoch": 0.716507916213855, | |
| "grad_norm": 0.7636457085609436, | |
| "learning_rate": 0.0007646705241078181, | |
| "loss": 4.1568, | |
| "step": 134500 | |
| }, | |
| { | |
| "epoch": 0.7191715144153935, | |
| "grad_norm": 0.7198740243911743, | |
| "learning_rate": 0.000763780482584438, | |
| "loss": 4.1597, | |
| "step": 135000 | |
| }, | |
| { | |
| "epoch": 0.721835112616932, | |
| "grad_norm": 0.7390605807304382, | |
| "learning_rate": 0.0007628886574107105, | |
| "loss": 4.1471, | |
| "step": 135500 | |
| }, | |
| { | |
| "epoch": 0.7244987108184705, | |
| "grad_norm": 0.7730891108512878, | |
| "learning_rate": 0.0007619968322369829, | |
| "loss": 4.1518, | |
| "step": 136000 | |
| }, | |
| { | |
| "epoch": 0.727162309020009, | |
| "grad_norm": 0.7512543797492981, | |
| "learning_rate": 0.0007611050070632553, | |
| "loss": 4.1602, | |
| "step": 136500 | |
| }, | |
| { | |
| "epoch": 0.7298259072215475, | |
| "grad_norm": 0.7366748452186584, | |
| "learning_rate": 0.0007602149655398753, | |
| "loss": 4.1583, | |
| "step": 137000 | |
| }, | |
| { | |
| "epoch": 0.7324895054230859, | |
| "grad_norm": 0.7468605041503906, | |
| "learning_rate": 0.0007593231403661477, | |
| "loss": 4.1535, | |
| "step": 137500 | |
| }, | |
| { | |
| "epoch": 0.7351531036246244, | |
| "grad_norm": 0.7176985144615173, | |
| "learning_rate": 0.0007584313151924203, | |
| "loss": 4.1525, | |
| "step": 138000 | |
| }, | |
| { | |
| "epoch": 0.7378167018261629, | |
| "grad_norm": 0.7422710657119751, | |
| "learning_rate": 0.0007575394900186927, | |
| "loss": 4.1507, | |
| "step": 138500 | |
| }, | |
| { | |
| "epoch": 0.7404803000277014, | |
| "grad_norm": 0.7459094524383545, | |
| "learning_rate": 0.0007566494484953126, | |
| "loss": 4.1541, | |
| "step": 139000 | |
| }, | |
| { | |
| "epoch": 0.7431438982292399, | |
| "grad_norm": 0.7306596636772156, | |
| "learning_rate": 0.000755757623321585, | |
| "loss": 4.1502, | |
| "step": 139500 | |
| }, | |
| { | |
| "epoch": 0.7458074964307784, | |
| "grad_norm": 0.7191296219825745, | |
| "learning_rate": 0.0007548657981478574, | |
| "loss": 4.1483, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 0.7484710946323169, | |
| "grad_norm": 0.7819980382919312, | |
| "learning_rate": 0.00075397397297413, | |
| "loss": 4.1589, | |
| "step": 140500 | |
| }, | |
| { | |
| "epoch": 0.7511346928338554, | |
| "grad_norm": 0.7624921202659607, | |
| "learning_rate": 0.0007530839314507498, | |
| "loss": 4.1531, | |
| "step": 141000 | |
| }, | |
| { | |
| "epoch": 0.7537982910353939, | |
| "grad_norm": 0.7341359257698059, | |
| "learning_rate": 0.0007521921062770223, | |
| "loss": 4.1514, | |
| "step": 141500 | |
| }, | |
| { | |
| "epoch": 0.7564618892369324, | |
| "grad_norm": 0.7539492249488831, | |
| "learning_rate": 0.0007513002811032947, | |
| "loss": 4.153, | |
| "step": 142000 | |
| }, | |
| { | |
| "epoch": 0.7591254874384709, | |
| "grad_norm": 0.7897160053253174, | |
| "learning_rate": 0.0007504084559295671, | |
| "loss": 4.1462, | |
| "step": 142500 | |
| }, | |
| { | |
| "epoch": 0.7617890856400094, | |
| "grad_norm": 0.7714428901672363, | |
| "learning_rate": 0.0007495184144061872, | |
| "loss": 4.1436, | |
| "step": 143000 | |
| }, | |
| { | |
| "epoch": 0.7644526838415479, | |
| "grad_norm": 0.8038801550865173, | |
| "learning_rate": 0.0007486265892324597, | |
| "loss": 4.1506, | |
| "step": 143500 | |
| }, | |
| { | |
| "epoch": 0.7671162820430864, | |
| "grad_norm": 0.7296925187110901, | |
| "learning_rate": 0.0007477347640587321, | |
| "loss": 4.1493, | |
| "step": 144000 | |
| }, | |
| { | |
| "epoch": 0.7697798802446248, | |
| "grad_norm": 0.7423230409622192, | |
| "learning_rate": 0.0007468429388850045, | |
| "loss": 4.1464, | |
| "step": 144500 | |
| }, | |
| { | |
| "epoch": 0.7724434784461633, | |
| "grad_norm": 0.7713762521743774, | |
| "learning_rate": 0.0007459528973616244, | |
| "loss": 4.151, | |
| "step": 145000 | |
| }, | |
| { | |
| "epoch": 0.7751070766477018, | |
| "grad_norm": 0.7986962199211121, | |
| "learning_rate": 0.0007450610721878969, | |
| "loss": 4.1448, | |
| "step": 145500 | |
| }, | |
| { | |
| "epoch": 0.7777706748492403, | |
| "grad_norm": 0.794867217540741, | |
| "learning_rate": 0.0007441692470141694, | |
| "loss": 4.1523, | |
| "step": 146000 | |
| }, | |
| { | |
| "epoch": 0.7804342730507788, | |
| "grad_norm": 0.7599649429321289, | |
| "learning_rate": 0.0007432774218404418, | |
| "loss": 4.1454, | |
| "step": 146500 | |
| }, | |
| { | |
| "epoch": 0.7830978712523173, | |
| "grad_norm": 0.7340590357780457, | |
| "learning_rate": 0.0007423873803170616, | |
| "loss": 4.144, | |
| "step": 147000 | |
| }, | |
| { | |
| "epoch": 0.7857614694538558, | |
| "grad_norm": 0.7674250602722168, | |
| "learning_rate": 0.0007414955551433341, | |
| "loss": 4.1502, | |
| "step": 147500 | |
| }, | |
| { | |
| "epoch": 0.7884250676553943, | |
| "grad_norm": 0.7552058696746826, | |
| "learning_rate": 0.0007406037299696065, | |
| "loss": 4.1453, | |
| "step": 148000 | |
| }, | |
| { | |
| "epoch": 0.7910886658569328, | |
| "grad_norm": 0.7295849323272705, | |
| "learning_rate": 0.0007397119047958791, | |
| "loss": 4.1506, | |
| "step": 148500 | |
| }, | |
| { | |
| "epoch": 0.7937522640584713, | |
| "grad_norm": 0.754206120967865, | |
| "learning_rate": 0.000738821863272499, | |
| "loss": 4.1452, | |
| "step": 149000 | |
| }, | |
| { | |
| "epoch": 0.7964158622600098, | |
| "grad_norm": 0.8196142911911011, | |
| "learning_rate": 0.0007379300380987715, | |
| "loss": 4.153, | |
| "step": 149500 | |
| }, | |
| { | |
| "epoch": 0.7990794604615483, | |
| "grad_norm": 0.7535151243209839, | |
| "learning_rate": 0.0007370382129250439, | |
| "loss": 4.1493, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 0.8017430586630868, | |
| "grad_norm": 0.8634600043296814, | |
| "learning_rate": 0.0007361463877513163, | |
| "loss": 4.1483, | |
| "step": 150500 | |
| }, | |
| { | |
| "epoch": 0.8044066568646253, | |
| "grad_norm": 0.7539383769035339, | |
| "learning_rate": 0.0007352563462279363, | |
| "loss": 4.1511, | |
| "step": 151000 | |
| }, | |
| { | |
| "epoch": 0.8070702550661638, | |
| "grad_norm": 0.7170119881629944, | |
| "learning_rate": 0.0007343645210542087, | |
| "loss": 4.1504, | |
| "step": 151500 | |
| }, | |
| { | |
| "epoch": 0.8097338532677023, | |
| "grad_norm": 0.7679442763328552, | |
| "learning_rate": 0.0007334726958804812, | |
| "loss": 4.1455, | |
| "step": 152000 | |
| }, | |
| { | |
| "epoch": 0.8123974514692408, | |
| "grad_norm": 0.7368362545967102, | |
| "learning_rate": 0.0007325808707067536, | |
| "loss": 4.1481, | |
| "step": 152500 | |
| }, | |
| { | |
| "epoch": 0.8150610496707793, | |
| "grad_norm": 0.7174336910247803, | |
| "learning_rate": 0.000731689045533026, | |
| "loss": 4.1451, | |
| "step": 153000 | |
| }, | |
| { | |
| "epoch": 0.8177246478723178, | |
| "grad_norm": 0.7762460708618164, | |
| "learning_rate": 0.0007307990040096461, | |
| "loss": 4.1437, | |
| "step": 153500 | |
| }, | |
| { | |
| "epoch": 0.8203882460738563, | |
| "grad_norm": 0.6886820197105408, | |
| "learning_rate": 0.0007299071788359185, | |
| "loss": 4.1429, | |
| "step": 154000 | |
| }, | |
| { | |
| "epoch": 0.8230518442753948, | |
| "grad_norm": 0.7819857597351074, | |
| "learning_rate": 0.000729015353662191, | |
| "loss": 4.1408, | |
| "step": 154500 | |
| }, | |
| { | |
| "epoch": 0.8257154424769333, | |
| "grad_norm": 0.78780198097229, | |
| "learning_rate": 0.0007281235284884634, | |
| "loss": 4.147, | |
| "step": 155000 | |
| }, | |
| { | |
| "epoch": 0.8283790406784717, | |
| "grad_norm": 0.7623980045318604, | |
| "learning_rate": 0.0007272334869650833, | |
| "loss": 4.1449, | |
| "step": 155500 | |
| }, | |
| { | |
| "epoch": 0.8310426388800102, | |
| "grad_norm": 0.7452903389930725, | |
| "learning_rate": 0.0007263416617913558, | |
| "loss": 4.1444, | |
| "step": 156000 | |
| }, | |
| { | |
| "epoch": 0.8337062370815487, | |
| "grad_norm": 0.7188674807548523, | |
| "learning_rate": 0.0007254498366176282, | |
| "loss": 4.1378, | |
| "step": 156500 | |
| }, | |
| { | |
| "epoch": 0.8363698352830872, | |
| "grad_norm": 0.7653003931045532, | |
| "learning_rate": 0.0007245580114439007, | |
| "loss": 4.1454, | |
| "step": 157000 | |
| }, | |
| { | |
| "epoch": 0.8390334334846257, | |
| "grad_norm": 0.7343904376029968, | |
| "learning_rate": 0.0007236679699205205, | |
| "loss": 4.1479, | |
| "step": 157500 | |
| }, | |
| { | |
| "epoch": 0.8416970316861642, | |
| "grad_norm": 0.7688188552856445, | |
| "learning_rate": 0.000722776144746793, | |
| "loss": 4.1353, | |
| "step": 158000 | |
| }, | |
| { | |
| "epoch": 0.8443606298877027, | |
| "grad_norm": 0.7669944167137146, | |
| "learning_rate": 0.0007218843195730654, | |
| "loss": 4.1369, | |
| "step": 158500 | |
| }, | |
| { | |
| "epoch": 0.8470242280892412, | |
| "grad_norm": 0.7605074048042297, | |
| "learning_rate": 0.0007209924943993379, | |
| "loss": 4.1446, | |
| "step": 159000 | |
| }, | |
| { | |
| "epoch": 0.8496878262907797, | |
| "grad_norm": 0.7343530058860779, | |
| "learning_rate": 0.0007201024528759579, | |
| "loss": 4.1409, | |
| "step": 159500 | |
| }, | |
| { | |
| "epoch": 0.8523514244923182, | |
| "grad_norm": 0.7942246198654175, | |
| "learning_rate": 0.0007192106277022303, | |
| "loss": 4.144, | |
| "step": 160000 | |
| }, | |
| { | |
| "epoch": 0.8550150226938567, | |
| "grad_norm": 0.7736623287200928, | |
| "learning_rate": 0.0007183188025285028, | |
| "loss": 4.141, | |
| "step": 160500 | |
| }, | |
| { | |
| "epoch": 0.8576786208953951, | |
| "grad_norm": 0.7663691639900208, | |
| "learning_rate": 0.0007174269773547752, | |
| "loss": 4.1434, | |
| "step": 161000 | |
| }, | |
| { | |
| "epoch": 0.8603422190969336, | |
| "grad_norm": 0.7635341286659241, | |
| "learning_rate": 0.0007165369358313952, | |
| "loss": 4.1439, | |
| "step": 161500 | |
| }, | |
| { | |
| "epoch": 0.8630058172984721, | |
| "grad_norm": 0.797211766242981, | |
| "learning_rate": 0.0007156451106576676, | |
| "loss": 4.1331, | |
| "step": 162000 | |
| }, | |
| { | |
| "epoch": 0.8656694155000106, | |
| "grad_norm": 0.7563562393188477, | |
| "learning_rate": 0.00071475328548394, | |
| "loss": 4.1429, | |
| "step": 162500 | |
| }, | |
| { | |
| "epoch": 0.8683330137015491, | |
| "grad_norm": 0.7162951827049255, | |
| "learning_rate": 0.0007138614603102125, | |
| "loss": 4.1389, | |
| "step": 163000 | |
| }, | |
| { | |
| "epoch": 0.8709966119030876, | |
| "grad_norm": 0.7123258709907532, | |
| "learning_rate": 0.0007129714187868323, | |
| "loss": 4.136, | |
| "step": 163500 | |
| }, | |
| { | |
| "epoch": 0.8736602101046261, | |
| "grad_norm": 0.728543221950531, | |
| "learning_rate": 0.0007120795936131049, | |
| "loss": 4.1325, | |
| "step": 164000 | |
| }, | |
| { | |
| "epoch": 0.8763238083061646, | |
| "grad_norm": 0.7728511691093445, | |
| "learning_rate": 0.0007111877684393773, | |
| "loss": 4.1348, | |
| "step": 164500 | |
| }, | |
| { | |
| "epoch": 0.8789874065077031, | |
| "grad_norm": 0.7468729019165039, | |
| "learning_rate": 0.0007102959432656497, | |
| "loss": 4.1361, | |
| "step": 165000 | |
| }, | |
| { | |
| "epoch": 0.8816510047092416, | |
| "grad_norm": 0.7346534132957458, | |
| "learning_rate": 0.0007094059017422697, | |
| "loss": 4.1396, | |
| "step": 165500 | |
| }, | |
| { | |
| "epoch": 0.8843146029107801, | |
| "grad_norm": 0.7773277759552002, | |
| "learning_rate": 0.0007085140765685421, | |
| "loss": 4.1401, | |
| "step": 166000 | |
| }, | |
| { | |
| "epoch": 0.8869782011123186, | |
| "grad_norm": 0.709701657295227, | |
| "learning_rate": 0.0007076222513948147, | |
| "loss": 4.1317, | |
| "step": 166500 | |
| }, | |
| { | |
| "epoch": 0.8896417993138571, | |
| "grad_norm": 0.7487180233001709, | |
| "learning_rate": 0.0007067304262210871, | |
| "loss": 4.13, | |
| "step": 167000 | |
| }, | |
| { | |
| "epoch": 0.8923053975153956, | |
| "grad_norm": 0.7227104306221008, | |
| "learning_rate": 0.000705840384697707, | |
| "loss": 4.1367, | |
| "step": 167500 | |
| }, | |
| { | |
| "epoch": 0.8949689957169341, | |
| "grad_norm": 0.7912375330924988, | |
| "learning_rate": 0.0007049485595239794, | |
| "loss": 4.1294, | |
| "step": 168000 | |
| }, | |
| { | |
| "epoch": 0.8976325939184726, | |
| "grad_norm": 0.8671672344207764, | |
| "learning_rate": 0.0007040567343502518, | |
| "loss": 4.129, | |
| "step": 168500 | |
| }, | |
| { | |
| "epoch": 0.9002961921200111, | |
| "grad_norm": 0.7554329633712769, | |
| "learning_rate": 0.0007031649091765244, | |
| "loss": 4.1381, | |
| "step": 169000 | |
| }, | |
| { | |
| "epoch": 0.9029597903215496, | |
| "grad_norm": 0.7798919081687927, | |
| "learning_rate": 0.0007022748676531442, | |
| "loss": 4.1297, | |
| "step": 169500 | |
| }, | |
| { | |
| "epoch": 0.9056233885230881, | |
| "grad_norm": 0.7176423668861389, | |
| "learning_rate": 0.0007013830424794167, | |
| "loss": 4.132, | |
| "step": 170000 | |
| }, | |
| { | |
| "epoch": 0.9082869867246266, | |
| "grad_norm": 0.7016908526420593, | |
| "learning_rate": 0.0007004912173056891, | |
| "loss": 4.132, | |
| "step": 170500 | |
| }, | |
| { | |
| "epoch": 0.9109505849261651, | |
| "grad_norm": 0.7394859790802002, | |
| "learning_rate": 0.0006995993921319615, | |
| "loss": 4.1337, | |
| "step": 171000 | |
| }, | |
| { | |
| "epoch": 0.9136141831277036, | |
| "grad_norm": 0.745543897151947, | |
| "learning_rate": 0.0006987093506085815, | |
| "loss": 4.1316, | |
| "step": 171500 | |
| }, | |
| { | |
| "epoch": 0.9162777813292421, | |
| "grad_norm": 0.7842167019844055, | |
| "learning_rate": 0.000697817525434854, | |
| "loss": 4.1314, | |
| "step": 172000 | |
| }, | |
| { | |
| "epoch": 0.9189413795307806, | |
| "grad_norm": 0.7487747073173523, | |
| "learning_rate": 0.0006969257002611265, | |
| "loss": 4.1281, | |
| "step": 172500 | |
| }, | |
| { | |
| "epoch": 0.9216049777323191, | |
| "grad_norm": 0.737399160861969, | |
| "learning_rate": 0.0006960338750873989, | |
| "loss": 4.1325, | |
| "step": 173000 | |
| }, | |
| { | |
| "epoch": 0.9242685759338576, | |
| "grad_norm": 0.7666307687759399, | |
| "learning_rate": 0.0006951438335640188, | |
| "loss": 4.1333, | |
| "step": 173500 | |
| }, | |
| { | |
| "epoch": 0.926932174135396, | |
| "grad_norm": 0.7485344409942627, | |
| "learning_rate": 0.0006942520083902912, | |
| "loss": 4.1317, | |
| "step": 174000 | |
| }, | |
| { | |
| "epoch": 0.9295957723369345, | |
| "grad_norm": 0.7282237410545349, | |
| "learning_rate": 0.0006933601832165637, | |
| "loss": 4.1326, | |
| "step": 174500 | |
| }, | |
| { | |
| "epoch": 0.932259370538473, | |
| "grad_norm": 0.7747819423675537, | |
| "learning_rate": 0.0006924701416931836, | |
| "loss": 4.1362, | |
| "step": 175000 | |
| }, | |
| { | |
| "epoch": 0.9349229687400115, | |
| "grad_norm": 0.7578604817390442, | |
| "learning_rate": 0.000691578316519456, | |
| "loss": 4.1383, | |
| "step": 175500 | |
| }, | |
| { | |
| "epoch": 0.93758656694155, | |
| "grad_norm": 0.7957220673561096, | |
| "learning_rate": 0.0006906864913457285, | |
| "loss": 4.128, | |
| "step": 176000 | |
| }, | |
| { | |
| "epoch": 0.9402501651430885, | |
| "grad_norm": 0.7936584949493408, | |
| "learning_rate": 0.000689794666172001, | |
| "loss": 4.122, | |
| "step": 176500 | |
| }, | |
| { | |
| "epoch": 0.942913763344627, | |
| "grad_norm": 0.8081178069114685, | |
| "learning_rate": 0.0006889028409982735, | |
| "loss": 4.1298, | |
| "step": 177000 | |
| }, | |
| { | |
| "epoch": 0.9455773615461655, | |
| "grad_norm": 0.7892795205116272, | |
| "learning_rate": 0.000688011015824546, | |
| "loss": 4.1267, | |
| "step": 177500 | |
| }, | |
| { | |
| "epoch": 0.948240959747704, | |
| "grad_norm": 0.7274259328842163, | |
| "learning_rate": 0.0006871191906508184, | |
| "loss": 4.1232, | |
| "step": 178000 | |
| }, | |
| { | |
| "epoch": 0.9509045579492424, | |
| "grad_norm": 0.7544950246810913, | |
| "learning_rate": 0.0006862291491274383, | |
| "loss": 4.1267, | |
| "step": 178500 | |
| }, | |
| { | |
| "epoch": 0.9535681561507809, | |
| "grad_norm": 0.798841655254364, | |
| "learning_rate": 0.0006853373239537107, | |
| "loss": 4.1328, | |
| "step": 179000 | |
| }, | |
| { | |
| "epoch": 0.9562317543523194, | |
| "grad_norm": 0.7239564657211304, | |
| "learning_rate": 0.0006844454987799832, | |
| "loss": 4.1336, | |
| "step": 179500 | |
| }, | |
| { | |
| "epoch": 0.9588953525538579, | |
| "grad_norm": 0.8423783779144287, | |
| "learning_rate": 0.0006835536736062557, | |
| "loss": 4.1286, | |
| "step": 180000 | |
| }, | |
| { | |
| "epoch": 0.9615589507553964, | |
| "grad_norm": 0.7887551784515381, | |
| "learning_rate": 0.0006826618484325281, | |
| "loss": 4.1199, | |
| "step": 180500 | |
| }, | |
| { | |
| "epoch": 0.9642225489569349, | |
| "grad_norm": 0.7365000247955322, | |
| "learning_rate": 0.0006817700232588005, | |
| "loss": 4.1321, | |
| "step": 181000 | |
| }, | |
| { | |
| "epoch": 0.9668861471584734, | |
| "grad_norm": 0.7989848256111145, | |
| "learning_rate": 0.0006808799817354204, | |
| "loss": 4.1327, | |
| "step": 181500 | |
| }, | |
| { | |
| "epoch": 0.9695497453600119, | |
| "grad_norm": 0.7484691143035889, | |
| "learning_rate": 0.0006799881565616928, | |
| "loss": 4.1239, | |
| "step": 182000 | |
| }, | |
| { | |
| "epoch": 0.9722133435615504, | |
| "grad_norm": 0.8183499574661255, | |
| "learning_rate": 0.0006790963313879654, | |
| "loss": 4.1253, | |
| "step": 182500 | |
| }, | |
| { | |
| "epoch": 0.9748769417630889, | |
| "grad_norm": 0.7121425271034241, | |
| "learning_rate": 0.0006782045062142378, | |
| "loss": 4.1342, | |
| "step": 183000 | |
| }, | |
| { | |
| "epoch": 0.9775405399646274, | |
| "grad_norm": 0.7777406573295593, | |
| "learning_rate": 0.0006773144646908578, | |
| "loss": 4.1286, | |
| "step": 183500 | |
| }, | |
| { | |
| "epoch": 0.9802041381661659, | |
| "grad_norm": 0.7477155327796936, | |
| "learning_rate": 0.0006764226395171302, | |
| "loss": 4.1278, | |
| "step": 184000 | |
| }, | |
| { | |
| "epoch": 0.9828677363677044, | |
| "grad_norm": 0.8153510093688965, | |
| "learning_rate": 0.0006755308143434026, | |
| "loss": 4.1232, | |
| "step": 184500 | |
| }, | |
| { | |
| "epoch": 0.9855313345692429, | |
| "grad_norm": 0.7904220819473267, | |
| "learning_rate": 0.0006746389891696752, | |
| "loss": 4.1283, | |
| "step": 185000 | |
| }, | |
| { | |
| "epoch": 0.9881949327707814, | |
| "grad_norm": 0.8383620977401733, | |
| "learning_rate": 0.0006737471639959476, | |
| "loss": 4.1334, | |
| "step": 185500 | |
| }, | |
| { | |
| "epoch": 0.9908585309723199, | |
| "grad_norm": 0.7521381378173828, | |
| "learning_rate": 0.0006728571224725675, | |
| "loss": 4.1339, | |
| "step": 186000 | |
| }, | |
| { | |
| "epoch": 0.9935221291738584, | |
| "grad_norm": 0.7851571440696716, | |
| "learning_rate": 0.0006719652972988399, | |
| "loss": 4.1289, | |
| "step": 186500 | |
| }, | |
| { | |
| "epoch": 0.9961857273753969, | |
| "grad_norm": 0.7758961319923401, | |
| "learning_rate": 0.0006710734721251123, | |
| "loss": 4.1294, | |
| "step": 187000 | |
| }, | |
| { | |
| "epoch": 0.9988493255769354, | |
| "grad_norm": 0.7806641459465027, | |
| "learning_rate": 0.0006701816469513849, | |
| "loss": 4.1285, | |
| "step": 187500 | |
| }, | |
| { | |
| "epoch": 1.001512923778474, | |
| "grad_norm": 0.7453823685646057, | |
| "learning_rate": 0.0006692916054280047, | |
| "loss": 4.1283, | |
| "step": 188000 | |
| }, | |
| { | |
| "epoch": 1.0041765219800123, | |
| "grad_norm": 0.7377151846885681, | |
| "learning_rate": 0.0006683997802542772, | |
| "loss": 4.1297, | |
| "step": 188500 | |
| }, | |
| { | |
| "epoch": 1.006840120181551, | |
| "grad_norm": 0.7941287755966187, | |
| "learning_rate": 0.0006675079550805496, | |
| "loss": 4.1212, | |
| "step": 189000 | |
| }, | |
| { | |
| "epoch": 1.0095037183830893, | |
| "grad_norm": 0.767425000667572, | |
| "learning_rate": 0.000666616129906822, | |
| "loss": 4.1229, | |
| "step": 189500 | |
| }, | |
| { | |
| "epoch": 1.0121673165846279, | |
| "grad_norm": 0.7483153343200684, | |
| "learning_rate": 0.0006657243047330946, | |
| "loss": 4.1242, | |
| "step": 190000 | |
| }, | |
| { | |
| "epoch": 1.0148309147861663, | |
| "grad_norm": 0.7890580892562866, | |
| "learning_rate": 0.0006648342632097145, | |
| "loss": 4.1306, | |
| "step": 190500 | |
| }, | |
| { | |
| "epoch": 1.0174945129877049, | |
| "grad_norm": 0.7415242791175842, | |
| "learning_rate": 0.000663942438035987, | |
| "loss": 4.1285, | |
| "step": 191000 | |
| }, | |
| { | |
| "epoch": 1.0201581111892433, | |
| "grad_norm": 0.7596645951271057, | |
| "learning_rate": 0.0006630506128622594, | |
| "loss": 4.1258, | |
| "step": 191500 | |
| }, | |
| { | |
| "epoch": 1.0228217093907819, | |
| "grad_norm": 0.8304431438446045, | |
| "learning_rate": 0.0006621587876885318, | |
| "loss": 4.1232, | |
| "step": 192000 | |
| }, | |
| { | |
| "epoch": 1.0254853075923203, | |
| "grad_norm": 0.77840656042099, | |
| "learning_rate": 0.0006612687461651517, | |
| "loss": 4.1195, | |
| "step": 192500 | |
| }, | |
| { | |
| "epoch": 1.0281489057938589, | |
| "grad_norm": 0.7862575650215149, | |
| "learning_rate": 0.0006603769209914242, | |
| "loss": 4.1258, | |
| "step": 193000 | |
| }, | |
| { | |
| "epoch": 1.0308125039953973, | |
| "grad_norm": 0.7667100429534912, | |
| "learning_rate": 0.0006594850958176967, | |
| "loss": 4.1185, | |
| "step": 193500 | |
| }, | |
| { | |
| "epoch": 1.0334761021969359, | |
| "grad_norm": 0.7835633754730225, | |
| "learning_rate": 0.0006585932706439691, | |
| "loss": 4.1224, | |
| "step": 194000 | |
| }, | |
| { | |
| "epoch": 1.0361397003984743, | |
| "grad_norm": 0.7486304640769958, | |
| "learning_rate": 0.000657703229120589, | |
| "loss": 4.124, | |
| "step": 194500 | |
| }, | |
| { | |
| "epoch": 1.0388032986000129, | |
| "grad_norm": 0.7897284030914307, | |
| "learning_rate": 0.0006568114039468614, | |
| "loss": 4.1203, | |
| "step": 195000 | |
| }, | |
| { | |
| "epoch": 1.0414668968015512, | |
| "grad_norm": 0.7997919321060181, | |
| "learning_rate": 0.0006559195787731339, | |
| "loss": 4.1202, | |
| "step": 195500 | |
| }, | |
| { | |
| "epoch": 1.0441304950030899, | |
| "grad_norm": 0.7987415194511414, | |
| "learning_rate": 0.0006550277535994064, | |
| "loss": 4.1231, | |
| "step": 196000 | |
| }, | |
| { | |
| "epoch": 1.0467940932046282, | |
| "grad_norm": 0.7434735894203186, | |
| "learning_rate": 0.0006541377120760263, | |
| "loss": 4.1196, | |
| "step": 196500 | |
| }, | |
| { | |
| "epoch": 1.0494576914061668, | |
| "grad_norm": 0.806969404220581, | |
| "learning_rate": 0.0006532458869022988, | |
| "loss": 4.1185, | |
| "step": 197000 | |
| }, | |
| { | |
| "epoch": 1.0521212896077052, | |
| "grad_norm": 0.8006301522254944, | |
| "learning_rate": 0.0006523540617285712, | |
| "loss": 4.1209, | |
| "step": 197500 | |
| }, | |
| { | |
| "epoch": 1.0547848878092438, | |
| "grad_norm": 0.759758472442627, | |
| "learning_rate": 0.0006514622365548438, | |
| "loss": 4.1194, | |
| "step": 198000 | |
| }, | |
| { | |
| "epoch": 1.0574484860107822, | |
| "grad_norm": 0.8778506517410278, | |
| "learning_rate": 0.0006505704113811162, | |
| "loss": 4.1293, | |
| "step": 198500 | |
| }, | |
| { | |
| "epoch": 1.0601120842123208, | |
| "grad_norm": 0.7795832753181458, | |
| "learning_rate": 0.000649680369857736, | |
| "loss": 4.1152, | |
| "step": 199000 | |
| }, | |
| { | |
| "epoch": 1.0627756824138592, | |
| "grad_norm": 0.7928754687309265, | |
| "learning_rate": 0.0006487885446840085, | |
| "loss": 4.1177, | |
| "step": 199500 | |
| }, | |
| { | |
| "epoch": 1.0654392806153978, | |
| "grad_norm": 0.8119847774505615, | |
| "learning_rate": 0.0006478967195102809, | |
| "loss": 4.1205, | |
| "step": 200000 | |
| }, | |
| { | |
| "epoch": 1.0681028788169362, | |
| "grad_norm": 0.739378035068512, | |
| "learning_rate": 0.0006470048943365535, | |
| "loss": 4.1111, | |
| "step": 200500 | |
| }, | |
| { | |
| "epoch": 1.0707664770184748, | |
| "grad_norm": 0.7906088829040527, | |
| "learning_rate": 0.0006461148528131734, | |
| "loss": 4.1186, | |
| "step": 201000 | |
| }, | |
| { | |
| "epoch": 1.0734300752200132, | |
| "grad_norm": 0.7810208797454834, | |
| "learning_rate": 0.0006452230276394459, | |
| "loss": 4.1204, | |
| "step": 201500 | |
| }, | |
| { | |
| "epoch": 1.0760936734215516, | |
| "grad_norm": 0.741383969783783, | |
| "learning_rate": 0.0006443312024657183, | |
| "loss": 4.1222, | |
| "step": 202000 | |
| }, | |
| { | |
| "epoch": 1.0787572716230902, | |
| "grad_norm": 0.7824720740318298, | |
| "learning_rate": 0.0006434393772919907, | |
| "loss": 4.1174, | |
| "step": 202500 | |
| }, | |
| { | |
| "epoch": 1.0814208698246286, | |
| "grad_norm": 0.7920011281967163, | |
| "learning_rate": 0.0006425493357686106, | |
| "loss": 4.1196, | |
| "step": 203000 | |
| }, | |
| { | |
| "epoch": 1.0840844680261672, | |
| "grad_norm": 0.792914628982544, | |
| "learning_rate": 0.0006416575105948831, | |
| "loss": 4.1153, | |
| "step": 203500 | |
| }, | |
| { | |
| "epoch": 1.0867480662277056, | |
| "grad_norm": 0.7724523544311523, | |
| "learning_rate": 0.0006407656854211556, | |
| "loss": 4.1105, | |
| "step": 204000 | |
| }, | |
| { | |
| "epoch": 1.0894116644292442, | |
| "grad_norm": 0.7834595441818237, | |
| "learning_rate": 0.000639873860247428, | |
| "loss": 4.1179, | |
| "step": 204500 | |
| }, | |
| { | |
| "epoch": 1.0920752626307826, | |
| "grad_norm": 0.8056479096412659, | |
| "learning_rate": 0.0006389838187240478, | |
| "loss": 4.1126, | |
| "step": 205000 | |
| }, | |
| { | |
| "epoch": 1.0947388608323212, | |
| "grad_norm": 0.7697902321815491, | |
| "learning_rate": 0.0006380919935503203, | |
| "loss": 4.1193, | |
| "step": 205500 | |
| }, | |
| { | |
| "epoch": 1.0974024590338596, | |
| "grad_norm": 0.7807758450508118, | |
| "learning_rate": 0.0006372001683765928, | |
| "loss": 4.1192, | |
| "step": 206000 | |
| }, | |
| { | |
| "epoch": 1.1000660572353982, | |
| "grad_norm": 0.7408417463302612, | |
| "learning_rate": 0.0006363083432028652, | |
| "loss": 4.1119, | |
| "step": 206500 | |
| }, | |
| { | |
| "epoch": 1.1027296554369366, | |
| "grad_norm": 0.9000714421272278, | |
| "learning_rate": 0.0006354165180291377, | |
| "loss": 4.1185, | |
| "step": 207000 | |
| }, | |
| { | |
| "epoch": 1.1053932536384752, | |
| "grad_norm": 0.8088692426681519, | |
| "learning_rate": 0.0006345264765057577, | |
| "loss": 4.1177, | |
| "step": 207500 | |
| }, | |
| { | |
| "epoch": 1.1080568518400136, | |
| "grad_norm": 0.778122067451477, | |
| "learning_rate": 0.0006336346513320301, | |
| "loss": 4.1143, | |
| "step": 208000 | |
| }, | |
| { | |
| "epoch": 1.1107204500415522, | |
| "grad_norm": 0.8222107291221619, | |
| "learning_rate": 0.0006327428261583026, | |
| "loss": 4.1136, | |
| "step": 208500 | |
| }, | |
| { | |
| "epoch": 1.1133840482430906, | |
| "grad_norm": 0.7356205582618713, | |
| "learning_rate": 0.0006318510009845751, | |
| "loss": 4.1187, | |
| "step": 209000 | |
| }, | |
| { | |
| "epoch": 1.1160476464446292, | |
| "grad_norm": 0.7457647919654846, | |
| "learning_rate": 0.0006309609594611949, | |
| "loss": 4.1123, | |
| "step": 209500 | |
| }, | |
| { | |
| "epoch": 1.1187112446461676, | |
| "grad_norm": 0.789622962474823, | |
| "learning_rate": 0.0006300691342874674, | |
| "loss": 4.1175, | |
| "step": 210000 | |
| }, | |
| { | |
| "epoch": 1.1213748428477062, | |
| "grad_norm": 0.8369338512420654, | |
| "learning_rate": 0.0006291773091137398, | |
| "loss": 4.1147, | |
| "step": 210500 | |
| }, | |
| { | |
| "epoch": 1.1240384410492446, | |
| "grad_norm": 0.8210717439651489, | |
| "learning_rate": 0.0006282854839400123, | |
| "loss": 4.1142, | |
| "step": 211000 | |
| }, | |
| { | |
| "epoch": 1.1267020392507832, | |
| "grad_norm": 0.7775838375091553, | |
| "learning_rate": 0.0006273954424166322, | |
| "loss": 4.1203, | |
| "step": 211500 | |
| }, | |
| { | |
| "epoch": 1.1293656374523215, | |
| "grad_norm": 0.7949962019920349, | |
| "learning_rate": 0.0006265036172429046, | |
| "loss": 4.1139, | |
| "step": 212000 | |
| }, | |
| { | |
| "epoch": 1.1320292356538602, | |
| "grad_norm": 0.7534223794937134, | |
| "learning_rate": 0.000625611792069177, | |
| "loss": 4.1177, | |
| "step": 212500 | |
| }, | |
| { | |
| "epoch": 1.1346928338553985, | |
| "grad_norm": 0.8075549602508545, | |
| "learning_rate": 0.0006247199668954495, | |
| "loss": 4.1147, | |
| "step": 213000 | |
| }, | |
| { | |
| "epoch": 1.1373564320569371, | |
| "grad_norm": 0.7999294400215149, | |
| "learning_rate": 0.0006238299253720696, | |
| "loss": 4.116, | |
| "step": 213500 | |
| }, | |
| { | |
| "epoch": 1.1400200302584755, | |
| "grad_norm": 0.7690563797950745, | |
| "learning_rate": 0.000622938100198342, | |
| "loss": 4.1108, | |
| "step": 214000 | |
| }, | |
| { | |
| "epoch": 1.1426836284600141, | |
| "grad_norm": 0.7599471211433411, | |
| "learning_rate": 0.0006220462750246144, | |
| "loss": 4.1155, | |
| "step": 214500 | |
| }, | |
| { | |
| "epoch": 1.1453472266615525, | |
| "grad_norm": 0.7433050274848938, | |
| "learning_rate": 0.0006211544498508869, | |
| "loss": 4.1172, | |
| "step": 215000 | |
| }, | |
| { | |
| "epoch": 1.1480108248630911, | |
| "grad_norm": 0.781114935874939, | |
| "learning_rate": 0.0006202644083275067, | |
| "loss": 4.1084, | |
| "step": 215500 | |
| }, | |
| { | |
| "epoch": 1.1506744230646295, | |
| "grad_norm": 0.7194410562515259, | |
| "learning_rate": 0.0006193725831537791, | |
| "loss": 4.1127, | |
| "step": 216000 | |
| }, | |
| { | |
| "epoch": 1.1533380212661681, | |
| "grad_norm": 0.8126916289329529, | |
| "learning_rate": 0.0006184807579800517, | |
| "loss": 4.1126, | |
| "step": 216500 | |
| }, | |
| { | |
| "epoch": 1.1560016194677065, | |
| "grad_norm": 0.8229861855506897, | |
| "learning_rate": 0.0006175889328063241, | |
| "loss": 4.1121, | |
| "step": 217000 | |
| }, | |
| { | |
| "epoch": 1.158665217669245, | |
| "grad_norm": 0.8246269226074219, | |
| "learning_rate": 0.000616698891282944, | |
| "loss": 4.1092, | |
| "step": 217500 | |
| }, | |
| { | |
| "epoch": 1.1613288158707835, | |
| "grad_norm": 0.8146107196807861, | |
| "learning_rate": 0.0006158070661092164, | |
| "loss": 4.1091, | |
| "step": 218000 | |
| }, | |
| { | |
| "epoch": 1.1639924140723221, | |
| "grad_norm": 0.7878261208534241, | |
| "learning_rate": 0.0006149152409354888, | |
| "loss": 4.1161, | |
| "step": 218500 | |
| }, | |
| { | |
| "epoch": 1.1666560122738605, | |
| "grad_norm": 0.7780360579490662, | |
| "learning_rate": 0.0006140234157617614, | |
| "loss": 4.1079, | |
| "step": 219000 | |
| }, | |
| { | |
| "epoch": 1.169319610475399, | |
| "grad_norm": 0.7969585657119751, | |
| "learning_rate": 0.0006131333742383814, | |
| "loss": 4.1134, | |
| "step": 219500 | |
| }, | |
| { | |
| "epoch": 1.1719832086769375, | |
| "grad_norm": 0.8402618765830994, | |
| "learning_rate": 0.0006122415490646538, | |
| "loss": 4.1143, | |
| "step": 220000 | |
| }, | |
| { | |
| "epoch": 1.1746468068784761, | |
| "grad_norm": 0.7946035861968994, | |
| "learning_rate": 0.0006113497238909262, | |
| "loss": 4.114, | |
| "step": 220500 | |
| }, | |
| { | |
| "epoch": 1.1773104050800145, | |
| "grad_norm": 0.7864482402801514, | |
| "learning_rate": 0.0006104578987171987, | |
| "loss": 4.1126, | |
| "step": 221000 | |
| }, | |
| { | |
| "epoch": 1.1799740032815529, | |
| "grad_norm": 0.8313577771186829, | |
| "learning_rate": 0.0006095678571938186, | |
| "loss": 4.106, | |
| "step": 221500 | |
| }, | |
| { | |
| "epoch": 1.1826376014830915, | |
| "grad_norm": 0.8574484586715698, | |
| "learning_rate": 0.0006086760320200911, | |
| "loss": 4.1085, | |
| "step": 222000 | |
| }, | |
| { | |
| "epoch": 1.1853011996846299, | |
| "grad_norm": 0.7599306702613831, | |
| "learning_rate": 0.0006077842068463635, | |
| "loss": 4.1071, | |
| "step": 222500 | |
| }, | |
| { | |
| "epoch": 1.1879647978861685, | |
| "grad_norm": 0.7732433676719666, | |
| "learning_rate": 0.0006068923816726359, | |
| "loss": 4.1185, | |
| "step": 223000 | |
| }, | |
| { | |
| "epoch": 1.1906283960877069, | |
| "grad_norm": 0.8210047483444214, | |
| "learning_rate": 0.0006060023401492559, | |
| "loss": 4.1099, | |
| "step": 223500 | |
| }, | |
| { | |
| "epoch": 1.1932919942892455, | |
| "grad_norm": 0.8054102063179016, | |
| "learning_rate": 0.0006051105149755284, | |
| "loss": 4.1181, | |
| "step": 224000 | |
| }, | |
| { | |
| "epoch": 1.1959555924907839, | |
| "grad_norm": 0.7870852947235107, | |
| "learning_rate": 0.0006042186898018009, | |
| "loss": 4.1016, | |
| "step": 224500 | |
| }, | |
| { | |
| "epoch": 1.1986191906923225, | |
| "grad_norm": 0.8508167266845703, | |
| "learning_rate": 0.0006033268646280733, | |
| "loss": 4.1202, | |
| "step": 225000 | |
| }, | |
| { | |
| "epoch": 1.2012827888938609, | |
| "grad_norm": 0.7744969129562378, | |
| "learning_rate": 0.0006024368231046932, | |
| "loss": 4.1094, | |
| "step": 225500 | |
| }, | |
| { | |
| "epoch": 1.2039463870953995, | |
| "grad_norm": 0.7836142778396606, | |
| "learning_rate": 0.0006015449979309656, | |
| "loss": 4.1079, | |
| "step": 226000 | |
| }, | |
| { | |
| "epoch": 1.2066099852969379, | |
| "grad_norm": 0.7741486430168152, | |
| "learning_rate": 0.000600653172757238, | |
| "loss": 4.1088, | |
| "step": 226500 | |
| }, | |
| { | |
| "epoch": 1.2092735834984765, | |
| "grad_norm": 0.77290940284729, | |
| "learning_rate": 0.0005997613475835106, | |
| "loss": 4.1025, | |
| "step": 227000 | |
| }, | |
| { | |
| "epoch": 1.2119371817000149, | |
| "grad_norm": 0.8240610361099243, | |
| "learning_rate": 0.0005988713060601304, | |
| "loss": 4.104, | |
| "step": 227500 | |
| }, | |
| { | |
| "epoch": 1.2146007799015535, | |
| "grad_norm": 0.7438703775405884, | |
| "learning_rate": 0.0005979794808864029, | |
| "loss": 4.1084, | |
| "step": 228000 | |
| }, | |
| { | |
| "epoch": 1.2172643781030918, | |
| "grad_norm": 0.837753415107727, | |
| "learning_rate": 0.0005970876557126753, | |
| "loss": 4.1017, | |
| "step": 228500 | |
| }, | |
| { | |
| "epoch": 1.2199279763046305, | |
| "grad_norm": 0.7918710112571716, | |
| "learning_rate": 0.0005961958305389477, | |
| "loss": 4.1094, | |
| "step": 229000 | |
| }, | |
| { | |
| "epoch": 1.2225915745061688, | |
| "grad_norm": 0.8078004121780396, | |
| "learning_rate": 0.0005953040053652203, | |
| "loss": 4.1043, | |
| "step": 229500 | |
| }, | |
| { | |
| "epoch": 1.2252551727077075, | |
| "grad_norm": 0.8458930253982544, | |
| "learning_rate": 0.0005944139638418402, | |
| "loss": 4.1069, | |
| "step": 230000 | |
| }, | |
| { | |
| "epoch": 1.2279187709092458, | |
| "grad_norm": 0.7811508178710938, | |
| "learning_rate": 0.0005935221386681127, | |
| "loss": 4.1071, | |
| "step": 230500 | |
| }, | |
| { | |
| "epoch": 1.2305823691107844, | |
| "grad_norm": 0.8446598649024963, | |
| "learning_rate": 0.0005926303134943851, | |
| "loss": 4.1063, | |
| "step": 231000 | |
| }, | |
| { | |
| "epoch": 1.2332459673123228, | |
| "grad_norm": 0.8074429035186768, | |
| "learning_rate": 0.0005917384883206575, | |
| "loss": 4.109, | |
| "step": 231500 | |
| }, | |
| { | |
| "epoch": 1.2359095655138614, | |
| "grad_norm": 0.8163787722587585, | |
| "learning_rate": 0.0005908484467972775, | |
| "loss": 4.1028, | |
| "step": 232000 | |
| }, | |
| { | |
| "epoch": 1.2385731637153998, | |
| "grad_norm": 0.7774120569229126, | |
| "learning_rate": 0.0005899566216235499, | |
| "loss": 4.1084, | |
| "step": 232500 | |
| }, | |
| { | |
| "epoch": 1.2412367619169384, | |
| "grad_norm": 0.7910379767417908, | |
| "learning_rate": 0.0005890647964498224, | |
| "loss": 4.1002, | |
| "step": 233000 | |
| }, | |
| { | |
| "epoch": 1.2439003601184768, | |
| "grad_norm": 0.8428027629852295, | |
| "learning_rate": 0.0005881729712760948, | |
| "loss": 4.1127, | |
| "step": 233500 | |
| }, | |
| { | |
| "epoch": 1.2465639583200154, | |
| "grad_norm": 0.7961114645004272, | |
| "learning_rate": 0.0005872829297527147, | |
| "loss": 4.1046, | |
| "step": 234000 | |
| }, | |
| { | |
| "epoch": 1.2492275565215538, | |
| "grad_norm": 0.8194419145584106, | |
| "learning_rate": 0.0005863911045789872, | |
| "loss": 4.1088, | |
| "step": 234500 | |
| }, | |
| { | |
| "epoch": 1.2518911547230922, | |
| "grad_norm": 0.783875584602356, | |
| "learning_rate": 0.0005854992794052596, | |
| "loss": 4.1086, | |
| "step": 235000 | |
| }, | |
| { | |
| "epoch": 1.2545547529246308, | |
| "grad_norm": 0.7610777020454407, | |
| "learning_rate": 0.0005846074542315321, | |
| "loss": 4.1024, | |
| "step": 235500 | |
| }, | |
| { | |
| "epoch": 1.2572183511261694, | |
| "grad_norm": 0.7696565389633179, | |
| "learning_rate": 0.000583717412708152, | |
| "loss": 4.1016, | |
| "step": 236000 | |
| }, | |
| { | |
| "epoch": 1.2598819493277078, | |
| "grad_norm": 0.82817542552948, | |
| "learning_rate": 0.0005828255875344245, | |
| "loss": 4.0958, | |
| "step": 236500 | |
| }, | |
| { | |
| "epoch": 1.2625455475292462, | |
| "grad_norm": 0.8974746465682983, | |
| "learning_rate": 0.0005819337623606969, | |
| "loss": 4.1077, | |
| "step": 237000 | |
| }, | |
| { | |
| "epoch": 1.2652091457307848, | |
| "grad_norm": 0.7882625460624695, | |
| "learning_rate": 0.0005810419371869694, | |
| "loss": 4.1027, | |
| "step": 237500 | |
| }, | |
| { | |
| "epoch": 1.2678727439323234, | |
| "grad_norm": 0.7710665464401245, | |
| "learning_rate": 0.0005801518956635893, | |
| "loss": 4.1071, | |
| "step": 238000 | |
| }, | |
| { | |
| "epoch": 1.2705363421338618, | |
| "grad_norm": 0.8462359309196472, | |
| "learning_rate": 0.0005792600704898617, | |
| "loss": 4.0993, | |
| "step": 238500 | |
| }, | |
| { | |
| "epoch": 1.2731999403354002, | |
| "grad_norm": 0.7785073518753052, | |
| "learning_rate": 0.0005783682453161342, | |
| "loss": 4.1051, | |
| "step": 239000 | |
| }, | |
| { | |
| "epoch": 1.2758635385369388, | |
| "grad_norm": 0.7724746465682983, | |
| "learning_rate": 0.0005774764201424066, | |
| "loss": 4.1082, | |
| "step": 239500 | |
| }, | |
| { | |
| "epoch": 1.2785271367384774, | |
| "grad_norm": 0.8276979923248291, | |
| "learning_rate": 0.0005765863786190266, | |
| "loss": 4.095, | |
| "step": 240000 | |
| }, | |
| { | |
| "epoch": 1.2811907349400158, | |
| "grad_norm": 0.7959253191947937, | |
| "learning_rate": 0.000575694553445299, | |
| "loss": 4.1026, | |
| "step": 240500 | |
| }, | |
| { | |
| "epoch": 1.2838543331415542, | |
| "grad_norm": 0.806239664554596, | |
| "learning_rate": 0.0005748027282715714, | |
| "loss": 4.1019, | |
| "step": 241000 | |
| }, | |
| { | |
| "epoch": 1.2865179313430928, | |
| "grad_norm": 0.9089943170547485, | |
| "learning_rate": 0.0005739109030978439, | |
| "loss": 4.0955, | |
| "step": 241500 | |
| }, | |
| { | |
| "epoch": 1.2891815295446314, | |
| "grad_norm": 0.8239426612854004, | |
| "learning_rate": 0.0005730208615744638, | |
| "loss": 4.1033, | |
| "step": 242000 | |
| }, | |
| { | |
| "epoch": 1.2918451277461698, | |
| "grad_norm": 0.8066053986549377, | |
| "learning_rate": 0.0005721290364007364, | |
| "loss": 4.1068, | |
| "step": 242500 | |
| }, | |
| { | |
| "epoch": 1.2945087259477082, | |
| "grad_norm": 0.7600257396697998, | |
| "learning_rate": 0.0005712372112270088, | |
| "loss": 4.1006, | |
| "step": 243000 | |
| }, | |
| { | |
| "epoch": 1.2971723241492468, | |
| "grad_norm": 0.7940685749053955, | |
| "learning_rate": 0.0005703471697036287, | |
| "loss": 4.1004, | |
| "step": 243500 | |
| }, | |
| { | |
| "epoch": 1.2998359223507852, | |
| "grad_norm": 0.7310413718223572, | |
| "learning_rate": 0.0005694553445299011, | |
| "loss": 4.1028, | |
| "step": 244000 | |
| }, | |
| { | |
| "epoch": 1.3024995205523238, | |
| "grad_norm": 0.8132951855659485, | |
| "learning_rate": 0.0005685635193561735, | |
| "loss": 4.1104, | |
| "step": 244500 | |
| }, | |
| { | |
| "epoch": 1.3051631187538622, | |
| "grad_norm": 0.8280708193778992, | |
| "learning_rate": 0.0005676716941824461, | |
| "loss": 4.1029, | |
| "step": 245000 | |
| }, | |
| { | |
| "epoch": 1.3078267169554008, | |
| "grad_norm": 0.7521162629127502, | |
| "learning_rate": 0.0005667798690087185, | |
| "loss": 4.0991, | |
| "step": 245500 | |
| }, | |
| { | |
| "epoch": 1.3104903151569391, | |
| "grad_norm": 0.8909037709236145, | |
| "learning_rate": 0.0005658880438349909, | |
| "loss": 4.1005, | |
| "step": 246000 | |
| }, | |
| { | |
| "epoch": 1.3131539133584778, | |
| "grad_norm": 0.8605440855026245, | |
| "learning_rate": 0.0005649962186612634, | |
| "loss": 4.0999, | |
| "step": 246500 | |
| }, | |
| { | |
| "epoch": 1.3158175115600161, | |
| "grad_norm": 0.9294172525405884, | |
| "learning_rate": 0.0005641043934875358, | |
| "loss": 4.0978, | |
| "step": 247000 | |
| }, | |
| { | |
| "epoch": 1.3184811097615547, | |
| "grad_norm": 0.8271783590316772, | |
| "learning_rate": 0.0005632143519641559, | |
| "loss": 4.1005, | |
| "step": 247500 | |
| }, | |
| { | |
| "epoch": 1.3211447079630931, | |
| "grad_norm": 0.7716344594955444, | |
| "learning_rate": 0.0005623225267904283, | |
| "loss": 4.0972, | |
| "step": 248000 | |
| }, | |
| { | |
| "epoch": 1.3238083061646317, | |
| "grad_norm": 0.7663143873214722, | |
| "learning_rate": 0.0005614307016167007, | |
| "loss": 4.1068, | |
| "step": 248500 | |
| }, | |
| { | |
| "epoch": 1.3264719043661701, | |
| "grad_norm": 0.8361650705337524, | |
| "learning_rate": 0.0005605388764429732, | |
| "loss": 4.0955, | |
| "step": 249000 | |
| }, | |
| { | |
| "epoch": 1.3291355025677087, | |
| "grad_norm": 0.8032039403915405, | |
| "learning_rate": 0.000559648834919593, | |
| "loss": 4.0981, | |
| "step": 249500 | |
| }, | |
| { | |
| "epoch": 1.3317991007692471, | |
| "grad_norm": 0.7755228281021118, | |
| "learning_rate": 0.0005587570097458655, | |
| "loss": 4.0985, | |
| "step": 250000 | |
| }, | |
| { | |
| "epoch": 1.3344626989707857, | |
| "grad_norm": 0.8239076733589172, | |
| "learning_rate": 0.000557865184572138, | |
| "loss": 4.102, | |
| "step": 250500 | |
| }, | |
| { | |
| "epoch": 1.3371262971723241, | |
| "grad_norm": 0.849665105342865, | |
| "learning_rate": 0.0005569733593984104, | |
| "loss": 4.1022, | |
| "step": 251000 | |
| }, | |
| { | |
| "epoch": 1.3397898953738627, | |
| "grad_norm": 0.7836341857910156, | |
| "learning_rate": 0.0005560833178750303, | |
| "loss": 4.0985, | |
| "step": 251500 | |
| }, | |
| { | |
| "epoch": 1.3424534935754011, | |
| "grad_norm": 0.7993196845054626, | |
| "learning_rate": 0.0005551914927013027, | |
| "loss": 4.0959, | |
| "step": 252000 | |
| }, | |
| { | |
| "epoch": 1.3451170917769395, | |
| "grad_norm": 0.8100605010986328, | |
| "learning_rate": 0.0005542996675275752, | |
| "loss": 4.0938, | |
| "step": 252500 | |
| }, | |
| { | |
| "epoch": 1.347780689978478, | |
| "grad_norm": 0.8267188668251038, | |
| "learning_rate": 0.0005534078423538477, | |
| "loss": 4.0975, | |
| "step": 253000 | |
| }, | |
| { | |
| "epoch": 1.3504442881800167, | |
| "grad_norm": 0.7876518964767456, | |
| "learning_rate": 0.0005525178008304677, | |
| "loss": 4.0966, | |
| "step": 253500 | |
| }, | |
| { | |
| "epoch": 1.353107886381555, | |
| "grad_norm": 0.8013073801994324, | |
| "learning_rate": 0.0005516259756567401, | |
| "loss": 4.0993, | |
| "step": 254000 | |
| }, | |
| { | |
| "epoch": 1.3557714845830935, | |
| "grad_norm": 0.7732263207435608, | |
| "learning_rate": 0.0005507341504830125, | |
| "loss": 4.0955, | |
| "step": 254500 | |
| }, | |
| { | |
| "epoch": 1.358435082784632, | |
| "grad_norm": 0.8235819935798645, | |
| "learning_rate": 0.000549842325309285, | |
| "loss": 4.0997, | |
| "step": 255000 | |
| }, | |
| { | |
| "epoch": 1.3610986809861707, | |
| "grad_norm": 0.7818782329559326, | |
| "learning_rate": 0.0005489505001355575, | |
| "loss": 4.1026, | |
| "step": 255500 | |
| }, | |
| { | |
| "epoch": 1.363762279187709, | |
| "grad_norm": 0.8184423446655273, | |
| "learning_rate": 0.0005480604586121774, | |
| "loss": 4.092, | |
| "step": 256000 | |
| }, | |
| { | |
| "epoch": 1.3664258773892475, | |
| "grad_norm": 0.7807801365852356, | |
| "learning_rate": 0.0005471686334384498, | |
| "loss": 4.0938, | |
| "step": 256500 | |
| }, | |
| { | |
| "epoch": 1.369089475590786, | |
| "grad_norm": 0.8043480515480042, | |
| "learning_rate": 0.0005462768082647222, | |
| "loss": 4.0964, | |
| "step": 257000 | |
| }, | |
| { | |
| "epoch": 1.3717530737923247, | |
| "grad_norm": 0.8113440871238708, | |
| "learning_rate": 0.0005453849830909947, | |
| "loss": 4.092, | |
| "step": 257500 | |
| }, | |
| { | |
| "epoch": 1.374416671993863, | |
| "grad_norm": 0.776531994342804, | |
| "learning_rate": 0.0005444949415676145, | |
| "loss": 4.1043, | |
| "step": 258000 | |
| }, | |
| { | |
| "epoch": 1.3770802701954015, | |
| "grad_norm": 0.9090542197227478, | |
| "learning_rate": 0.0005436031163938871, | |
| "loss": 4.1026, | |
| "step": 258500 | |
| }, | |
| { | |
| "epoch": 1.37974386839694, | |
| "grad_norm": 0.8724551796913147, | |
| "learning_rate": 0.0005427112912201595, | |
| "loss": 4.0983, | |
| "step": 259000 | |
| }, | |
| { | |
| "epoch": 1.3824074665984787, | |
| "grad_norm": 0.7889623045921326, | |
| "learning_rate": 0.0005418194660464319, | |
| "loss": 4.1027, | |
| "step": 259500 | |
| }, | |
| { | |
| "epoch": 1.385071064800017, | |
| "grad_norm": 0.7813825011253357, | |
| "learning_rate": 0.0005409294245230519, | |
| "loss": 4.092, | |
| "step": 260000 | |
| }, | |
| { | |
| "epoch": 1.3877346630015555, | |
| "grad_norm": 0.8187386989593506, | |
| "learning_rate": 0.0005400393829996718, | |
| "loss": 4.0955, | |
| "step": 260500 | |
| }, | |
| { | |
| "epoch": 1.390398261203094, | |
| "grad_norm": 0.8593798279762268, | |
| "learning_rate": 0.0005391475578259443, | |
| "loss": 4.094, | |
| "step": 261000 | |
| }, | |
| { | |
| "epoch": 1.3930618594046325, | |
| "grad_norm": 0.8074827194213867, | |
| "learning_rate": 0.0005382557326522167, | |
| "loss": 4.095, | |
| "step": 261500 | |
| }, | |
| { | |
| "epoch": 1.395725457606171, | |
| "grad_norm": 0.8229965567588806, | |
| "learning_rate": 0.0005373639074784892, | |
| "loss": 4.0909, | |
| "step": 262000 | |
| }, | |
| { | |
| "epoch": 1.3983890558077094, | |
| "grad_norm": 0.7867224216461182, | |
| "learning_rate": 0.0005364720823047616, | |
| "loss": 4.0934, | |
| "step": 262500 | |
| }, | |
| { | |
| "epoch": 1.401052654009248, | |
| "grad_norm": 0.9083333611488342, | |
| "learning_rate": 0.000535580257131034, | |
| "loss": 4.0982, | |
| "step": 263000 | |
| }, | |
| { | |
| "epoch": 1.4037162522107864, | |
| "grad_norm": 0.8077040314674377, | |
| "learning_rate": 0.0005346884319573066, | |
| "loss": 4.0949, | |
| "step": 263500 | |
| }, | |
| { | |
| "epoch": 1.406379850412325, | |
| "grad_norm": 0.871181070804596, | |
| "learning_rate": 0.000533796606783579, | |
| "loss": 4.096, | |
| "step": 264000 | |
| }, | |
| { | |
| "epoch": 1.4090434486138634, | |
| "grad_norm": 0.8004094958305359, | |
| "learning_rate": 0.0005329065652601989, | |
| "loss": 4.0969, | |
| "step": 264500 | |
| }, | |
| { | |
| "epoch": 1.411707046815402, | |
| "grad_norm": 0.8624884486198425, | |
| "learning_rate": 0.0005320147400864713, | |
| "loss": 4.0964, | |
| "step": 265000 | |
| }, | |
| { | |
| "epoch": 1.4143706450169404, | |
| "grad_norm": 0.7955045104026794, | |
| "learning_rate": 0.0005311229149127437, | |
| "loss": 4.0944, | |
| "step": 265500 | |
| }, | |
| { | |
| "epoch": 1.417034243218479, | |
| "grad_norm": 0.7732199430465698, | |
| "learning_rate": 0.0005302310897390163, | |
| "loss": 4.0906, | |
| "step": 266000 | |
| }, | |
| { | |
| "epoch": 1.4196978414200174, | |
| "grad_norm": 0.8164415955543518, | |
| "learning_rate": 0.0005293410482156362, | |
| "loss": 4.0887, | |
| "step": 266500 | |
| }, | |
| { | |
| "epoch": 1.422361439621556, | |
| "grad_norm": 0.8961130380630493, | |
| "learning_rate": 0.0005284492230419087, | |
| "loss": 4.1001, | |
| "step": 267000 | |
| }, | |
| { | |
| "epoch": 1.4250250378230944, | |
| "grad_norm": 0.8140637874603271, | |
| "learning_rate": 0.0005275573978681811, | |
| "loss": 4.0898, | |
| "step": 267500 | |
| }, | |
| { | |
| "epoch": 1.427688636024633, | |
| "grad_norm": 0.8230092525482178, | |
| "learning_rate": 0.0005266655726944535, | |
| "loss": 4.0994, | |
| "step": 268000 | |
| }, | |
| { | |
| "epoch": 1.4303522342261714, | |
| "grad_norm": 0.800144612789154, | |
| "learning_rate": 0.0005257755311710735, | |
| "loss": 4.0914, | |
| "step": 268500 | |
| }, | |
| { | |
| "epoch": 1.43301583242771, | |
| "grad_norm": 0.8252524733543396, | |
| "learning_rate": 0.000524883705997346, | |
| "loss": 4.0944, | |
| "step": 269000 | |
| }, | |
| { | |
| "epoch": 1.4356794306292484, | |
| "grad_norm": 0.7676013708114624, | |
| "learning_rate": 0.0005239918808236184, | |
| "loss": 4.092, | |
| "step": 269500 | |
| }, | |
| { | |
| "epoch": 1.4383430288307868, | |
| "grad_norm": 0.8423929810523987, | |
| "learning_rate": 0.0005231000556498908, | |
| "loss": 4.0871, | |
| "step": 270000 | |
| }, | |
| { | |
| "epoch": 1.4410066270323254, | |
| "grad_norm": 0.7545808553695679, | |
| "learning_rate": 0.0005222100141265108, | |
| "loss": 4.0923, | |
| "step": 270500 | |
| }, | |
| { | |
| "epoch": 1.443670225233864, | |
| "grad_norm": 0.820381224155426, | |
| "learning_rate": 0.0005213181889527832, | |
| "loss": 4.0827, | |
| "step": 271000 | |
| }, | |
| { | |
| "epoch": 1.4463338234354024, | |
| "grad_norm": 0.8105764985084534, | |
| "learning_rate": 0.0005204263637790558, | |
| "loss": 4.0943, | |
| "step": 271500 | |
| }, | |
| { | |
| "epoch": 1.4489974216369408, | |
| "grad_norm": 0.7974145412445068, | |
| "learning_rate": 0.0005195345386053282, | |
| "loss": 4.0852, | |
| "step": 272000 | |
| }, | |
| { | |
| "epoch": 1.4516610198384794, | |
| "grad_norm": 0.7740100026130676, | |
| "learning_rate": 0.000518644497081948, | |
| "loss": 4.0943, | |
| "step": 272500 | |
| }, | |
| { | |
| "epoch": 1.454324618040018, | |
| "grad_norm": 0.8262558579444885, | |
| "learning_rate": 0.0005177526719082205, | |
| "loss": 4.0889, | |
| "step": 273000 | |
| }, | |
| { | |
| "epoch": 1.4569882162415564, | |
| "grad_norm": 0.8640192747116089, | |
| "learning_rate": 0.0005168608467344929, | |
| "loss": 4.0844, | |
| "step": 273500 | |
| }, | |
| { | |
| "epoch": 1.4596518144430948, | |
| "grad_norm": 0.8319873809814453, | |
| "learning_rate": 0.0005159690215607655, | |
| "loss": 4.0936, | |
| "step": 274000 | |
| }, | |
| { | |
| "epoch": 1.4623154126446334, | |
| "grad_norm": 0.876741886138916, | |
| "learning_rate": 0.0005150789800373853, | |
| "loss": 4.0855, | |
| "step": 274500 | |
| }, | |
| { | |
| "epoch": 1.464979010846172, | |
| "grad_norm": 0.8290923833847046, | |
| "learning_rate": 0.0005141871548636577, | |
| "loss": 4.0949, | |
| "step": 275000 | |
| }, | |
| { | |
| "epoch": 1.4676426090477104, | |
| "grad_norm": 0.7827680110931396, | |
| "learning_rate": 0.0005132953296899302, | |
| "loss": 4.0821, | |
| "step": 275500 | |
| }, | |
| { | |
| "epoch": 1.4703062072492488, | |
| "grad_norm": 0.8360860347747803, | |
| "learning_rate": 0.0005124035045162026, | |
| "loss": 4.0921, | |
| "step": 276000 | |
| }, | |
| { | |
| "epoch": 1.4729698054507874, | |
| "grad_norm": 0.7869288325309753, | |
| "learning_rate": 0.0005115134629928227, | |
| "loss": 4.0795, | |
| "step": 276500 | |
| }, | |
| { | |
| "epoch": 1.475633403652326, | |
| "grad_norm": 0.8743867874145508, | |
| "learning_rate": 0.0005106216378190951, | |
| "loss": 4.0867, | |
| "step": 277000 | |
| }, | |
| { | |
| "epoch": 1.4782970018538644, | |
| "grad_norm": 0.8454434871673584, | |
| "learning_rate": 0.0005097298126453676, | |
| "loss": 4.083, | |
| "step": 277500 | |
| }, | |
| { | |
| "epoch": 1.4809606000554028, | |
| "grad_norm": 0.8108798265457153, | |
| "learning_rate": 0.00050883798747164, | |
| "loss": 4.086, | |
| "step": 278000 | |
| }, | |
| { | |
| "epoch": 1.4836241982569414, | |
| "grad_norm": 0.8548552989959717, | |
| "learning_rate": 0.0005079479459482598, | |
| "loss": 4.0853, | |
| "step": 278500 | |
| }, | |
| { | |
| "epoch": 1.4862877964584797, | |
| "grad_norm": 0.8752163052558899, | |
| "learning_rate": 0.0005070561207745324, | |
| "loss": 4.0891, | |
| "step": 279000 | |
| }, | |
| { | |
| "epoch": 1.4889513946600184, | |
| "grad_norm": 0.9157357811927795, | |
| "learning_rate": 0.0005061642956008048, | |
| "loss": 4.0872, | |
| "step": 279500 | |
| }, | |
| { | |
| "epoch": 1.4916149928615567, | |
| "grad_norm": 0.8573022484779358, | |
| "learning_rate": 0.0005052724704270773, | |
| "loss": 4.0854, | |
| "step": 280000 | |
| }, | |
| { | |
| "epoch": 1.4942785910630954, | |
| "grad_norm": 0.8331462740898132, | |
| "learning_rate": 0.0005043806452533497, | |
| "loss": 4.0887, | |
| "step": 280500 | |
| }, | |
| { | |
| "epoch": 1.4969421892646337, | |
| "grad_norm": 0.7753505110740662, | |
| "learning_rate": 0.0005034888200796221, | |
| "loss": 4.0901, | |
| "step": 281000 | |
| }, | |
| { | |
| "epoch": 1.4996057874661723, | |
| "grad_norm": 0.781449556350708, | |
| "learning_rate": 0.0005025969949058947, | |
| "loss": 4.0844, | |
| "step": 281500 | |
| }, | |
| { | |
| "epoch": 1.5022693856677107, | |
| "grad_norm": 0.9343318343162537, | |
| "learning_rate": 0.0005017051697321671, | |
| "loss": 4.0906, | |
| "step": 282000 | |
| }, | |
| { | |
| "epoch": 1.5049329838692493, | |
| "grad_norm": 0.8867080807685852, | |
| "learning_rate": 0.000500815128208787, | |
| "loss": 4.08, | |
| "step": 282500 | |
| }, | |
| { | |
| "epoch": 1.507596582070788, | |
| "grad_norm": 0.8553933501243591, | |
| "learning_rate": 0.0004999233030350595, | |
| "loss": 4.0898, | |
| "step": 283000 | |
| }, | |
| { | |
| "epoch": 1.5102601802723261, | |
| "grad_norm": 0.849162757396698, | |
| "learning_rate": 0.0004990314778613319, | |
| "loss": 4.0894, | |
| "step": 283500 | |
| }, | |
| { | |
| "epoch": 1.5129237784738647, | |
| "grad_norm": 0.787109375, | |
| "learning_rate": 0.0004981396526876044, | |
| "loss": 4.085, | |
| "step": 284000 | |
| }, | |
| { | |
| "epoch": 1.5155873766754033, | |
| "grad_norm": 0.8072954416275024, | |
| "learning_rate": 0.0004972496111642243, | |
| "loss": 4.0842, | |
| "step": 284500 | |
| }, | |
| { | |
| "epoch": 1.5182509748769417, | |
| "grad_norm": 0.8034284114837646, | |
| "learning_rate": 0.0004963595696408442, | |
| "loss": 4.0866, | |
| "step": 285000 | |
| }, | |
| { | |
| "epoch": 1.52091457307848, | |
| "grad_norm": 0.8554684519767761, | |
| "learning_rate": 0.0004954677444671166, | |
| "loss": 4.0851, | |
| "step": 285500 | |
| }, | |
| { | |
| "epoch": 1.5235781712800187, | |
| "grad_norm": 0.8422802686691284, | |
| "learning_rate": 0.000494575919293389, | |
| "loss": 4.0869, | |
| "step": 286000 | |
| }, | |
| { | |
| "epoch": 1.5262417694815573, | |
| "grad_norm": 0.7712003588676453, | |
| "learning_rate": 0.0004936840941196615, | |
| "loss": 4.0808, | |
| "step": 286500 | |
| }, | |
| { | |
| "epoch": 1.5289053676830957, | |
| "grad_norm": 0.8626993894577026, | |
| "learning_rate": 0.000492792268945934, | |
| "loss": 4.0805, | |
| "step": 287000 | |
| }, | |
| { | |
| "epoch": 1.531568965884634, | |
| "grad_norm": 0.8277269601821899, | |
| "learning_rate": 0.0004919022274225539, | |
| "loss": 4.0906, | |
| "step": 287500 | |
| }, | |
| { | |
| "epoch": 1.5342325640861727, | |
| "grad_norm": 0.8013060688972473, | |
| "learning_rate": 0.0004910104022488263, | |
| "loss": 4.0836, | |
| "step": 288000 | |
| }, | |
| { | |
| "epoch": 1.5368961622877113, | |
| "grad_norm": 0.7702099084854126, | |
| "learning_rate": 0.0004901185770750989, | |
| "loss": 4.0777, | |
| "step": 288500 | |
| }, | |
| { | |
| "epoch": 1.5395597604892497, | |
| "grad_norm": 0.8085469603538513, | |
| "learning_rate": 0.0004892267519013713, | |
| "loss": 4.0898, | |
| "step": 289000 | |
| }, | |
| { | |
| "epoch": 1.542223358690788, | |
| "grad_norm": 0.7977801561355591, | |
| "learning_rate": 0.0004883349267276437, | |
| "loss": 4.0955, | |
| "step": 289500 | |
| }, | |
| { | |
| "epoch": 1.5448869568923267, | |
| "grad_norm": 0.8373309969902039, | |
| "learning_rate": 0.0004874431015539162, | |
| "loss": 4.0783, | |
| "step": 290000 | |
| }, | |
| { | |
| "epoch": 1.5475505550938653, | |
| "grad_norm": 0.7764778733253479, | |
| "learning_rate": 0.0004865530600305361, | |
| "loss": 4.0861, | |
| "step": 290500 | |
| }, | |
| { | |
| "epoch": 1.5502141532954037, | |
| "grad_norm": 0.8451995849609375, | |
| "learning_rate": 0.00048566123485680856, | |
| "loss": 4.0817, | |
| "step": 291000 | |
| }, | |
| { | |
| "epoch": 1.552877751496942, | |
| "grad_norm": 0.8463019728660583, | |
| "learning_rate": 0.00048476940968308105, | |
| "loss": 4.0822, | |
| "step": 291500 | |
| }, | |
| { | |
| "epoch": 1.5555413496984807, | |
| "grad_norm": 0.8065968155860901, | |
| "learning_rate": 0.0004838775845093535, | |
| "loss": 4.089, | |
| "step": 292000 | |
| }, | |
| { | |
| "epoch": 1.5582049479000193, | |
| "grad_norm": 0.8490435481071472, | |
| "learning_rate": 0.00048298754298597334, | |
| "loss": 4.0765, | |
| "step": 292500 | |
| }, | |
| { | |
| "epoch": 1.5608685461015577, | |
| "grad_norm": 0.8057785630226135, | |
| "learning_rate": 0.0004820957178122458, | |
| "loss": 4.0809, | |
| "step": 293000 | |
| }, | |
| { | |
| "epoch": 1.563532144303096, | |
| "grad_norm": 0.9338017702102661, | |
| "learning_rate": 0.00048120389263851826, | |
| "loss": 4.0787, | |
| "step": 293500 | |
| }, | |
| { | |
| "epoch": 1.5661957425046347, | |
| "grad_norm": 0.9003413915634155, | |
| "learning_rate": 0.00048031206746479074, | |
| "loss": 4.0756, | |
| "step": 294000 | |
| }, | |
| { | |
| "epoch": 1.5688593407061733, | |
| "grad_norm": 0.779014527797699, | |
| "learning_rate": 0.00047942024229106323, | |
| "loss": 4.0832, | |
| "step": 294500 | |
| }, | |
| { | |
| "epoch": 1.5715229389077117, | |
| "grad_norm": 0.8321064114570618, | |
| "learning_rate": 0.0004785302007676831, | |
| "loss": 4.0885, | |
| "step": 295000 | |
| }, | |
| { | |
| "epoch": 1.57418653710925, | |
| "grad_norm": 0.8152427077293396, | |
| "learning_rate": 0.0004776383755939556, | |
| "loss": 4.0847, | |
| "step": 295500 | |
| }, | |
| { | |
| "epoch": 1.5768501353107887, | |
| "grad_norm": 0.8888664245605469, | |
| "learning_rate": 0.000476746550420228, | |
| "loss": 4.0777, | |
| "step": 296000 | |
| }, | |
| { | |
| "epoch": 1.5795137335123273, | |
| "grad_norm": 0.8546236157417297, | |
| "learning_rate": 0.0004758547252465005, | |
| "loss": 4.0898, | |
| "step": 296500 | |
| }, | |
| { | |
| "epoch": 1.5821773317138657, | |
| "grad_norm": 0.7983977794647217, | |
| "learning_rate": 0.00047496290007277293, | |
| "loss": 4.0869, | |
| "step": 297000 | |
| }, | |
| { | |
| "epoch": 1.584840929915404, | |
| "grad_norm": 0.9709325432777405, | |
| "learning_rate": 0.00047407107489904536, | |
| "loss": 4.0864, | |
| "step": 297500 | |
| }, | |
| { | |
| "epoch": 1.5875045281169426, | |
| "grad_norm": 0.8570044040679932, | |
| "learning_rate": 0.00047317924972531785, | |
| "loss": 4.0886, | |
| "step": 298000 | |
| }, | |
| { | |
| "epoch": 1.5901681263184813, | |
| "grad_norm": 0.8361437320709229, | |
| "learning_rate": 0.00047228920820193776, | |
| "loss": 4.0794, | |
| "step": 298500 | |
| }, | |
| { | |
| "epoch": 1.5928317245200196, | |
| "grad_norm": 0.8911067247390747, | |
| "learning_rate": 0.00047139738302821025, | |
| "loss": 4.0836, | |
| "step": 299000 | |
| }, | |
| { | |
| "epoch": 1.595495322721558, | |
| "grad_norm": 0.8150638341903687, | |
| "learning_rate": 0.0004705055578544827, | |
| "loss": 4.0806, | |
| "step": 299500 | |
| }, | |
| { | |
| "epoch": 1.5981589209230966, | |
| "grad_norm": 0.8484770059585571, | |
| "learning_rate": 0.0004696137326807551, | |
| "loss": 4.0796, | |
| "step": 300000 | |
| }, | |
| { | |
| "epoch": 1.6008225191246352, | |
| "grad_norm": 0.8199454545974731, | |
| "learning_rate": 0.0004687219075070276, | |
| "loss": 4.0789, | |
| "step": 300500 | |
| }, | |
| { | |
| "epoch": 1.6034861173261736, | |
| "grad_norm": 0.8845428824424744, | |
| "learning_rate": 0.0004678318659836475, | |
| "loss": 4.073, | |
| "step": 301000 | |
| }, | |
| { | |
| "epoch": 1.606149715527712, | |
| "grad_norm": 0.8244544267654419, | |
| "learning_rate": 0.00046694004080991995, | |
| "loss": 4.0753, | |
| "step": 301500 | |
| }, | |
| { | |
| "epoch": 1.6088133137292506, | |
| "grad_norm": 0.8862385153770447, | |
| "learning_rate": 0.00046604821563619244, | |
| "loss": 4.0784, | |
| "step": 302000 | |
| }, | |
| { | |
| "epoch": 1.611476911930789, | |
| "grad_norm": 0.8142257928848267, | |
| "learning_rate": 0.00046515639046246487, | |
| "loss": 4.0806, | |
| "step": 302500 | |
| }, | |
| { | |
| "epoch": 1.6141405101323274, | |
| "grad_norm": 0.850913941860199, | |
| "learning_rate": 0.00046426456528873735, | |
| "loss": 4.0821, | |
| "step": 303000 | |
| }, | |
| { | |
| "epoch": 1.616804108333866, | |
| "grad_norm": 0.7964518666267395, | |
| "learning_rate": 0.0004633727401150098, | |
| "loss": 4.0802, | |
| "step": 303500 | |
| }, | |
| { | |
| "epoch": 1.6194677065354046, | |
| "grad_norm": 0.8475667834281921, | |
| "learning_rate": 0.0004624809149412823, | |
| "loss": 4.0825, | |
| "step": 304000 | |
| }, | |
| { | |
| "epoch": 1.622131304736943, | |
| "grad_norm": 0.8427020311355591, | |
| "learning_rate": 0.0004615890897675547, | |
| "loss": 4.0746, | |
| "step": 304500 | |
| }, | |
| { | |
| "epoch": 1.6247949029384814, | |
| "grad_norm": 0.8353922367095947, | |
| "learning_rate": 0.0004606990482441746, | |
| "loss": 4.0785, | |
| "step": 305000 | |
| }, | |
| { | |
| "epoch": 1.62745850114002, | |
| "grad_norm": 0.8765130043029785, | |
| "learning_rate": 0.0004598072230704471, | |
| "loss": 4.0827, | |
| "step": 305500 | |
| }, | |
| { | |
| "epoch": 1.6301220993415586, | |
| "grad_norm": 0.7863726615905762, | |
| "learning_rate": 0.00045891718154706697, | |
| "loss": 4.0782, | |
| "step": 306000 | |
| }, | |
| { | |
| "epoch": 1.632785697543097, | |
| "grad_norm": 0.7965743541717529, | |
| "learning_rate": 0.0004580253563733394, | |
| "loss": 4.0751, | |
| "step": 306500 | |
| }, | |
| { | |
| "epoch": 1.6354492957446354, | |
| "grad_norm": 0.7712193131446838, | |
| "learning_rate": 0.0004571335311996119, | |
| "loss": 4.0775, | |
| "step": 307000 | |
| }, | |
| { | |
| "epoch": 1.638112893946174, | |
| "grad_norm": 0.8547102212905884, | |
| "learning_rate": 0.0004562417060258843, | |
| "loss": 4.0687, | |
| "step": 307500 | |
| }, | |
| { | |
| "epoch": 1.6407764921477126, | |
| "grad_norm": 0.794670581817627, | |
| "learning_rate": 0.00045535166450250423, | |
| "loss": 4.0809, | |
| "step": 308000 | |
| }, | |
| { | |
| "epoch": 1.643440090349251, | |
| "grad_norm": 0.8939191102981567, | |
| "learning_rate": 0.0004544598393287767, | |
| "loss": 4.0755, | |
| "step": 308500 | |
| }, | |
| { | |
| "epoch": 1.6461036885507894, | |
| "grad_norm": 0.830675482749939, | |
| "learning_rate": 0.00045356801415504915, | |
| "loss": 4.0849, | |
| "step": 309000 | |
| }, | |
| { | |
| "epoch": 1.648767286752328, | |
| "grad_norm": 0.8708091378211975, | |
| "learning_rate": 0.00045267618898132164, | |
| "loss": 4.0664, | |
| "step": 309500 | |
| }, | |
| { | |
| "epoch": 1.6514308849538666, | |
| "grad_norm": 0.7933617830276489, | |
| "learning_rate": 0.00045178436380759407, | |
| "loss": 4.0802, | |
| "step": 310000 | |
| }, | |
| { | |
| "epoch": 1.654094483155405, | |
| "grad_norm": 0.8032438158988953, | |
| "learning_rate": 0.000450894322284214, | |
| "loss": 4.0783, | |
| "step": 310500 | |
| }, | |
| { | |
| "epoch": 1.6567580813569434, | |
| "grad_norm": 0.8478823304176331, | |
| "learning_rate": 0.0004500024971104865, | |
| "loss": 4.0831, | |
| "step": 311000 | |
| }, | |
| { | |
| "epoch": 1.659421679558482, | |
| "grad_norm": 0.8288933634757996, | |
| "learning_rate": 0.0004491106719367589, | |
| "loss": 4.0801, | |
| "step": 311500 | |
| }, | |
| { | |
| "epoch": 1.6620852777600206, | |
| "grad_norm": 0.8561184406280518, | |
| "learning_rate": 0.0004482188467630314, | |
| "loss": 4.0788, | |
| "step": 312000 | |
| }, | |
| { | |
| "epoch": 1.664748875961559, | |
| "grad_norm": 0.9229483008384705, | |
| "learning_rate": 0.0004473270215893038, | |
| "loss": 4.0813, | |
| "step": 312500 | |
| }, | |
| { | |
| "epoch": 1.6674124741630973, | |
| "grad_norm": 0.8853760361671448, | |
| "learning_rate": 0.0004464369800659237, | |
| "loss": 4.0728, | |
| "step": 313000 | |
| }, | |
| { | |
| "epoch": 1.670076072364636, | |
| "grad_norm": 0.8472786545753479, | |
| "learning_rate": 0.0004455451548921962, | |
| "loss": 4.076, | |
| "step": 313500 | |
| }, | |
| { | |
| "epoch": 1.6727396705661746, | |
| "grad_norm": 0.834415853023529, | |
| "learning_rate": 0.0004446533297184686, | |
| "loss": 4.0776, | |
| "step": 314000 | |
| }, | |
| { | |
| "epoch": 1.675403268767713, | |
| "grad_norm": 0.8151890635490417, | |
| "learning_rate": 0.0004437615045447411, | |
| "loss": 4.0712, | |
| "step": 314500 | |
| }, | |
| { | |
| "epoch": 1.6780668669692513, | |
| "grad_norm": 0.8340436816215515, | |
| "learning_rate": 0.0004428696793710135, | |
| "loss": 4.0773, | |
| "step": 315000 | |
| }, | |
| { | |
| "epoch": 1.68073046517079, | |
| "grad_norm": 0.7873215079307556, | |
| "learning_rate": 0.00044197963784763344, | |
| "loss": 4.0796, | |
| "step": 315500 | |
| }, | |
| { | |
| "epoch": 1.6833940633723286, | |
| "grad_norm": 0.7956321835517883, | |
| "learning_rate": 0.0004410878126739059, | |
| "loss": 4.0738, | |
| "step": 316000 | |
| }, | |
| { | |
| "epoch": 1.686057661573867, | |
| "grad_norm": 0.8906182646751404, | |
| "learning_rate": 0.00044019598750017836, | |
| "loss": 4.0776, | |
| "step": 316500 | |
| }, | |
| { | |
| "epoch": 1.6887212597754053, | |
| "grad_norm": 0.8356565833091736, | |
| "learning_rate": 0.0004393041623264508, | |
| "loss": 4.0686, | |
| "step": 317000 | |
| }, | |
| { | |
| "epoch": 1.691384857976944, | |
| "grad_norm": 0.8309632539749146, | |
| "learning_rate": 0.0004384123371527233, | |
| "loss": 4.0786, | |
| "step": 317500 | |
| }, | |
| { | |
| "epoch": 1.6940484561784825, | |
| "grad_norm": 0.8648601770401001, | |
| "learning_rate": 0.0004375205119789957, | |
| "loss": 4.076, | |
| "step": 318000 | |
| }, | |
| { | |
| "epoch": 1.696712054380021, | |
| "grad_norm": 0.799662172794342, | |
| "learning_rate": 0.0004366304704556157, | |
| "loss": 4.0769, | |
| "step": 318500 | |
| }, | |
| { | |
| "epoch": 1.6993756525815593, | |
| "grad_norm": 0.884032130241394, | |
| "learning_rate": 0.0004357386452818881, | |
| "loss": 4.0742, | |
| "step": 319000 | |
| }, | |
| { | |
| "epoch": 1.702039250783098, | |
| "grad_norm": 0.8695617914199829, | |
| "learning_rate": 0.00043484682010816054, | |
| "loss": 4.0721, | |
| "step": 319500 | |
| }, | |
| { | |
| "epoch": 1.7047028489846365, | |
| "grad_norm": 0.801929235458374, | |
| "learning_rate": 0.00043395499493443303, | |
| "loss": 4.0722, | |
| "step": 320000 | |
| }, | |
| { | |
| "epoch": 1.7073664471861747, | |
| "grad_norm": 0.7920409440994263, | |
| "learning_rate": 0.00043306495341105295, | |
| "loss": 4.076, | |
| "step": 320500 | |
| }, | |
| { | |
| "epoch": 1.7100300453877133, | |
| "grad_norm": 0.821932852268219, | |
| "learning_rate": 0.00043217312823732543, | |
| "loss": 4.076, | |
| "step": 321000 | |
| }, | |
| { | |
| "epoch": 1.712693643589252, | |
| "grad_norm": 0.8553212881088257, | |
| "learning_rate": 0.00043128130306359786, | |
| "loss": 4.0748, | |
| "step": 321500 | |
| }, | |
| { | |
| "epoch": 1.7153572417907903, | |
| "grad_norm": 0.911418080329895, | |
| "learning_rate": 0.0004303894778898703, | |
| "loss": 4.0794, | |
| "step": 322000 | |
| }, | |
| { | |
| "epoch": 1.7180208399923287, | |
| "grad_norm": 0.8463834524154663, | |
| "learning_rate": 0.0004294976527161428, | |
| "loss": 4.0676, | |
| "step": 322500 | |
| }, | |
| { | |
| "epoch": 1.7206844381938673, | |
| "grad_norm": 0.8559086322784424, | |
| "learning_rate": 0.0004286058275424152, | |
| "loss": 4.0771, | |
| "step": 323000 | |
| }, | |
| { | |
| "epoch": 1.723348036395406, | |
| "grad_norm": 0.8981167674064636, | |
| "learning_rate": 0.0004277140023686877, | |
| "loss": 4.0688, | |
| "step": 323500 | |
| }, | |
| { | |
| "epoch": 1.7260116345969443, | |
| "grad_norm": 0.8651977181434631, | |
| "learning_rate": 0.00042682396084530756, | |
| "loss": 4.0728, | |
| "step": 324000 | |
| }, | |
| { | |
| "epoch": 1.7286752327984827, | |
| "grad_norm": 0.9066988229751587, | |
| "learning_rate": 0.00042593213567158, | |
| "loss": 4.072, | |
| "step": 324500 | |
| }, | |
| { | |
| "epoch": 1.7313388310000213, | |
| "grad_norm": 0.8543113470077515, | |
| "learning_rate": 0.0004250403104978525, | |
| "loss": 4.0727, | |
| "step": 325000 | |
| }, | |
| { | |
| "epoch": 1.73400242920156, | |
| "grad_norm": 0.8599368333816528, | |
| "learning_rate": 0.00042414848532412497, | |
| "loss": 4.0665, | |
| "step": 325500 | |
| }, | |
| { | |
| "epoch": 1.7366660274030983, | |
| "grad_norm": 0.8290531039237976, | |
| "learning_rate": 0.00042325666015039746, | |
| "loss": 4.0739, | |
| "step": 326000 | |
| }, | |
| { | |
| "epoch": 1.7393296256046367, | |
| "grad_norm": 0.8055272102355957, | |
| "learning_rate": 0.0004223666186270173, | |
| "loss": 4.0735, | |
| "step": 326500 | |
| }, | |
| { | |
| "epoch": 1.7419932238061753, | |
| "grad_norm": 0.8045780658721924, | |
| "learning_rate": 0.00042147479345328975, | |
| "loss": 4.071, | |
| "step": 327000 | |
| }, | |
| { | |
| "epoch": 1.7446568220077139, | |
| "grad_norm": 0.8758577108383179, | |
| "learning_rate": 0.00042058296827956224, | |
| "loss": 4.0735, | |
| "step": 327500 | |
| }, | |
| { | |
| "epoch": 1.7473204202092523, | |
| "grad_norm": 0.8138041496276855, | |
| "learning_rate": 0.00041969114310583467, | |
| "loss": 4.0686, | |
| "step": 328000 | |
| }, | |
| { | |
| "epoch": 1.7499840184107907, | |
| "grad_norm": 0.8927600979804993, | |
| "learning_rate": 0.0004188011015824546, | |
| "loss": 4.0749, | |
| "step": 328500 | |
| }, | |
| { | |
| "epoch": 1.7526476166123293, | |
| "grad_norm": 0.8370145559310913, | |
| "learning_rate": 0.00041790927640872707, | |
| "loss": 4.0723, | |
| "step": 329000 | |
| }, | |
| { | |
| "epoch": 1.7553112148138679, | |
| "grad_norm": 0.8793504238128662, | |
| "learning_rate": 0.0004170174512349995, | |
| "loss": 4.0674, | |
| "step": 329500 | |
| }, | |
| { | |
| "epoch": 1.7579748130154063, | |
| "grad_norm": 0.8913201689720154, | |
| "learning_rate": 0.000416125626061272, | |
| "loss": 4.0699, | |
| "step": 330000 | |
| }, | |
| { | |
| "epoch": 1.7606384112169446, | |
| "grad_norm": 0.8198757767677307, | |
| "learning_rate": 0.0004152338008875444, | |
| "loss": 4.0738, | |
| "step": 330500 | |
| }, | |
| { | |
| "epoch": 1.7633020094184833, | |
| "grad_norm": 0.8716715574264526, | |
| "learning_rate": 0.00041434375936416434, | |
| "loss": 4.0762, | |
| "step": 331000 | |
| }, | |
| { | |
| "epoch": 1.7659656076200219, | |
| "grad_norm": 0.8413424491882324, | |
| "learning_rate": 0.0004134519341904368, | |
| "loss": 4.0635, | |
| "step": 331500 | |
| }, | |
| { | |
| "epoch": 1.7686292058215602, | |
| "grad_norm": 0.838036060333252, | |
| "learning_rate": 0.00041256010901670926, | |
| "loss": 4.0731, | |
| "step": 332000 | |
| }, | |
| { | |
| "epoch": 1.7712928040230986, | |
| "grad_norm": 0.8625719547271729, | |
| "learning_rate": 0.00041166828384298174, | |
| "loss": 4.0765, | |
| "step": 332500 | |
| }, | |
| { | |
| "epoch": 1.7739564022246372, | |
| "grad_norm": 0.8333448171615601, | |
| "learning_rate": 0.0004107782423196016, | |
| "loss": 4.0691, | |
| "step": 333000 | |
| }, | |
| { | |
| "epoch": 1.7766200004261758, | |
| "grad_norm": 0.8514916300773621, | |
| "learning_rate": 0.00040988641714587403, | |
| "loss": 4.0682, | |
| "step": 333500 | |
| }, | |
| { | |
| "epoch": 1.7792835986277142, | |
| "grad_norm": 0.8220165371894836, | |
| "learning_rate": 0.0004089945919721465, | |
| "loss": 4.0796, | |
| "step": 334000 | |
| }, | |
| { | |
| "epoch": 1.7819471968292526, | |
| "grad_norm": 0.838065505027771, | |
| "learning_rate": 0.00040810276679841895, | |
| "loss": 4.0672, | |
| "step": 334500 | |
| }, | |
| { | |
| "epoch": 1.7846107950307912, | |
| "grad_norm": 0.8731646537780762, | |
| "learning_rate": 0.00040721272527503887, | |
| "loss": 4.0667, | |
| "step": 335000 | |
| }, | |
| { | |
| "epoch": 1.7872743932323298, | |
| "grad_norm": 0.8466665148735046, | |
| "learning_rate": 0.00040632090010131136, | |
| "loss": 4.0733, | |
| "step": 335500 | |
| }, | |
| { | |
| "epoch": 1.7899379914338682, | |
| "grad_norm": 0.9406811594963074, | |
| "learning_rate": 0.0004054290749275838, | |
| "loss": 4.0708, | |
| "step": 336000 | |
| }, | |
| { | |
| "epoch": 1.7926015896354066, | |
| "grad_norm": 0.8663309812545776, | |
| "learning_rate": 0.0004045372497538563, | |
| "loss": 4.0688, | |
| "step": 336500 | |
| }, | |
| { | |
| "epoch": 1.7952651878369452, | |
| "grad_norm": 0.8506413698196411, | |
| "learning_rate": 0.0004036454245801287, | |
| "loss": 4.0795, | |
| "step": 337000 | |
| }, | |
| { | |
| "epoch": 1.7979287860384838, | |
| "grad_norm": 0.8088420033454895, | |
| "learning_rate": 0.0004027553830567486, | |
| "loss": 4.0724, | |
| "step": 337500 | |
| }, | |
| { | |
| "epoch": 1.8005923842400222, | |
| "grad_norm": 0.8378006815910339, | |
| "learning_rate": 0.0004018635578830211, | |
| "loss": 4.0668, | |
| "step": 338000 | |
| }, | |
| { | |
| "epoch": 1.8032559824415606, | |
| "grad_norm": 0.8574025630950928, | |
| "learning_rate": 0.00040097173270929354, | |
| "loss": 4.0678, | |
| "step": 338500 | |
| }, | |
| { | |
| "epoch": 1.8059195806430992, | |
| "grad_norm": 0.8278779983520508, | |
| "learning_rate": 0.00040007990753556603, | |
| "loss": 4.0695, | |
| "step": 339000 | |
| }, | |
| { | |
| "epoch": 1.8085831788446376, | |
| "grad_norm": 0.9120043516159058, | |
| "learning_rate": 0.00039918986601218594, | |
| "loss": 4.0629, | |
| "step": 339500 | |
| }, | |
| { | |
| "epoch": 1.811246777046176, | |
| "grad_norm": 0.822943925857544, | |
| "learning_rate": 0.0003982980408384584, | |
| "loss": 4.0674, | |
| "step": 340000 | |
| }, | |
| { | |
| "epoch": 1.8139103752477146, | |
| "grad_norm": 0.8420679569244385, | |
| "learning_rate": 0.00039740621566473086, | |
| "loss": 4.0683, | |
| "step": 340500 | |
| }, | |
| { | |
| "epoch": 1.8165739734492532, | |
| "grad_norm": 0.8428717851638794, | |
| "learning_rate": 0.0003965143904910033, | |
| "loss": 4.0672, | |
| "step": 341000 | |
| }, | |
| { | |
| "epoch": 1.8192375716507916, | |
| "grad_norm": 0.8921811580657959, | |
| "learning_rate": 0.0003956225653172757, | |
| "loss": 4.0655, | |
| "step": 341500 | |
| }, | |
| { | |
| "epoch": 1.82190116985233, | |
| "grad_norm": 0.8687016367912292, | |
| "learning_rate": 0.0003947307401435482, | |
| "loss": 4.0712, | |
| "step": 342000 | |
| }, | |
| { | |
| "epoch": 1.8245647680538686, | |
| "grad_norm": 0.8464400172233582, | |
| "learning_rate": 0.0003938406986201681, | |
| "loss": 4.0687, | |
| "step": 342500 | |
| }, | |
| { | |
| "epoch": 1.8272283662554072, | |
| "grad_norm": 0.8673765063285828, | |
| "learning_rate": 0.00039294887344644056, | |
| "loss": 4.0628, | |
| "step": 343000 | |
| }, | |
| { | |
| "epoch": 1.8298919644569456, | |
| "grad_norm": 0.9040893316268921, | |
| "learning_rate": 0.000392057048272713, | |
| "loss": 4.0633, | |
| "step": 343500 | |
| }, | |
| { | |
| "epoch": 1.832555562658484, | |
| "grad_norm": 0.8810034394264221, | |
| "learning_rate": 0.0003911652230989854, | |
| "loss": 4.0637, | |
| "step": 344000 | |
| }, | |
| { | |
| "epoch": 1.8352191608600226, | |
| "grad_norm": 0.8870866894721985, | |
| "learning_rate": 0.0003902733979252579, | |
| "loss": 4.0712, | |
| "step": 344500 | |
| }, | |
| { | |
| "epoch": 1.8378827590615612, | |
| "grad_norm": 0.8724194169044495, | |
| "learning_rate": 0.0003893833564018778, | |
| "loss": 4.0761, | |
| "step": 345000 | |
| }, | |
| { | |
| "epoch": 1.8405463572630996, | |
| "grad_norm": 1.1327623128890991, | |
| "learning_rate": 0.00038849153122815026, | |
| "loss": 4.0656, | |
| "step": 345500 | |
| }, | |
| { | |
| "epoch": 1.843209955464638, | |
| "grad_norm": 0.8693875670433044, | |
| "learning_rate": 0.00038759970605442275, | |
| "loss": 4.0692, | |
| "step": 346000 | |
| }, | |
| { | |
| "epoch": 1.8458735536661766, | |
| "grad_norm": 0.9146456122398376, | |
| "learning_rate": 0.0003867078808806952, | |
| "loss": 4.0663, | |
| "step": 346500 | |
| }, | |
| { | |
| "epoch": 1.8485371518677152, | |
| "grad_norm": 0.8626604676246643, | |
| "learning_rate": 0.00038581605570696766, | |
| "loss": 4.0618, | |
| "step": 347000 | |
| }, | |
| { | |
| "epoch": 1.8512007500692536, | |
| "grad_norm": 1.0062013864517212, | |
| "learning_rate": 0.0003849242305332401, | |
| "loss": 4.0678, | |
| "step": 347500 | |
| }, | |
| { | |
| "epoch": 1.853864348270792, | |
| "grad_norm": 0.842510461807251, | |
| "learning_rate": 0.00038403418900986, | |
| "loss": 4.065, | |
| "step": 348000 | |
| }, | |
| { | |
| "epoch": 1.8565279464723305, | |
| "grad_norm": 0.8646286129951477, | |
| "learning_rate": 0.0003831423638361325, | |
| "loss": 4.0629, | |
| "step": 348500 | |
| }, | |
| { | |
| "epoch": 1.8591915446738692, | |
| "grad_norm": 0.8638767004013062, | |
| "learning_rate": 0.00038225053866240493, | |
| "loss": 4.0656, | |
| "step": 349000 | |
| }, | |
| { | |
| "epoch": 1.8618551428754075, | |
| "grad_norm": 0.8934078216552734, | |
| "learning_rate": 0.0003813587134886774, | |
| "loss": 4.0714, | |
| "step": 349500 | |
| }, | |
| { | |
| "epoch": 1.864518741076946, | |
| "grad_norm": 0.8266724944114685, | |
| "learning_rate": 0.00038046688831494985, | |
| "loss": 4.0645, | |
| "step": 350000 | |
| }, | |
| { | |
| "epoch": 1.8671823392784845, | |
| "grad_norm": 0.8602758646011353, | |
| "learning_rate": 0.00037957684679156977, | |
| "loss": 4.0642, | |
| "step": 350500 | |
| }, | |
| { | |
| "epoch": 1.8698459374800231, | |
| "grad_norm": 0.8677871823310852, | |
| "learning_rate": 0.00037868502161784225, | |
| "loss": 4.0685, | |
| "step": 351000 | |
| }, | |
| { | |
| "epoch": 1.8725095356815615, | |
| "grad_norm": 0.870879590511322, | |
| "learning_rate": 0.0003777931964441147, | |
| "loss": 4.0747, | |
| "step": 351500 | |
| }, | |
| { | |
| "epoch": 1.8751731338831, | |
| "grad_norm": 0.8714147806167603, | |
| "learning_rate": 0.00037690137127038717, | |
| "loss": 4.061, | |
| "step": 352000 | |
| }, | |
| { | |
| "epoch": 1.8778367320846385, | |
| "grad_norm": 0.8625131249427795, | |
| "learning_rate": 0.00037601132974700703, | |
| "loss": 4.06, | |
| "step": 352500 | |
| }, | |
| { | |
| "epoch": 1.8805003302861771, | |
| "grad_norm": 0.9685169458389282, | |
| "learning_rate": 0.00037511950457327946, | |
| "loss": 4.071, | |
| "step": 353000 | |
| }, | |
| { | |
| "epoch": 1.8831639284877155, | |
| "grad_norm": 0.9301902055740356, | |
| "learning_rate": 0.00037422767939955195, | |
| "loss": 4.0663, | |
| "step": 353500 | |
| }, | |
| { | |
| "epoch": 1.885827526689254, | |
| "grad_norm": 0.8485379219055176, | |
| "learning_rate": 0.0003733358542258244, | |
| "loss": 4.0709, | |
| "step": 354000 | |
| }, | |
| { | |
| "epoch": 1.8884911248907925, | |
| "grad_norm": 0.833081841468811, | |
| "learning_rate": 0.00037244402905209687, | |
| "loss": 4.0596, | |
| "step": 354500 | |
| }, | |
| { | |
| "epoch": 1.8911547230923311, | |
| "grad_norm": 0.8548697829246521, | |
| "learning_rate": 0.0003715539875287168, | |
| "loss": 4.0701, | |
| "step": 355000 | |
| }, | |
| { | |
| "epoch": 1.8938183212938695, | |
| "grad_norm": 0.8501580357551575, | |
| "learning_rate": 0.0003706621623549892, | |
| "loss": 4.0567, | |
| "step": 355500 | |
| }, | |
| { | |
| "epoch": 1.896481919495408, | |
| "grad_norm": 0.8642673492431641, | |
| "learning_rate": 0.0003697703371812617, | |
| "loss": 4.0621, | |
| "step": 356000 | |
| }, | |
| { | |
| "epoch": 1.8991455176969465, | |
| "grad_norm": 0.8171157240867615, | |
| "learning_rate": 0.00036887851200753414, | |
| "loss": 4.0542, | |
| "step": 356500 | |
| }, | |
| { | |
| "epoch": 1.901809115898485, | |
| "grad_norm": 0.873189389705658, | |
| "learning_rate": 0.00036798668683380657, | |
| "loss": 4.06, | |
| "step": 357000 | |
| }, | |
| { | |
| "epoch": 1.9044727141000233, | |
| "grad_norm": 0.8762955665588379, | |
| "learning_rate": 0.00036709664531042654, | |
| "loss": 4.063, | |
| "step": 357500 | |
| }, | |
| { | |
| "epoch": 1.9071363123015619, | |
| "grad_norm": 0.8550353050231934, | |
| "learning_rate": 0.00036620482013669897, | |
| "loss": 4.0597, | |
| "step": 358000 | |
| }, | |
| { | |
| "epoch": 1.9097999105031005, | |
| "grad_norm": 0.8709129691123962, | |
| "learning_rate": 0.00036531299496297146, | |
| "loss": 4.0578, | |
| "step": 358500 | |
| }, | |
| { | |
| "epoch": 1.9124635087046389, | |
| "grad_norm": 0.9054292440414429, | |
| "learning_rate": 0.0003644211697892439, | |
| "loss": 4.0589, | |
| "step": 359000 | |
| }, | |
| { | |
| "epoch": 1.9151271069061773, | |
| "grad_norm": 0.8816952705383301, | |
| "learning_rate": 0.0003635293446155163, | |
| "loss": 4.0563, | |
| "step": 359500 | |
| }, | |
| { | |
| "epoch": 1.9177907051077159, | |
| "grad_norm": 0.8601788282394409, | |
| "learning_rate": 0.0003626393030921363, | |
| "loss": 4.057, | |
| "step": 360000 | |
| }, | |
| { | |
| "epoch": 1.9204543033092545, | |
| "grad_norm": 0.933283269405365, | |
| "learning_rate": 0.0003617474779184087, | |
| "loss": 4.0688, | |
| "step": 360500 | |
| }, | |
| { | |
| "epoch": 1.9231179015107929, | |
| "grad_norm": 0.9095755815505981, | |
| "learning_rate": 0.0003608556527446812, | |
| "loss": 4.0531, | |
| "step": 361000 | |
| }, | |
| { | |
| "epoch": 1.9257814997123313, | |
| "grad_norm": 0.8889813423156738, | |
| "learning_rate": 0.00035996382757095364, | |
| "loss": 4.0638, | |
| "step": 361500 | |
| }, | |
| { | |
| "epoch": 1.9284450979138699, | |
| "grad_norm": 0.8663842678070068, | |
| "learning_rate": 0.0003590737860475735, | |
| "loss": 4.062, | |
| "step": 362000 | |
| }, | |
| { | |
| "epoch": 1.9311086961154085, | |
| "grad_norm": 0.8386211395263672, | |
| "learning_rate": 0.000358181960873846, | |
| "loss": 4.0561, | |
| "step": 362500 | |
| }, | |
| { | |
| "epoch": 1.9337722943169469, | |
| "grad_norm": 0.8373234868049622, | |
| "learning_rate": 0.0003572901357001184, | |
| "loss": 4.0666, | |
| "step": 363000 | |
| }, | |
| { | |
| "epoch": 1.9364358925184852, | |
| "grad_norm": 0.8931795954704285, | |
| "learning_rate": 0.00035639831052639085, | |
| "loss": 4.0554, | |
| "step": 363500 | |
| }, | |
| { | |
| "epoch": 1.9390994907200239, | |
| "grad_norm": 0.8433584570884705, | |
| "learning_rate": 0.0003555082690030108, | |
| "loss": 4.0583, | |
| "step": 364000 | |
| }, | |
| { | |
| "epoch": 1.9417630889215625, | |
| "grad_norm": 0.8926225900650024, | |
| "learning_rate": 0.00035461644382928326, | |
| "loss": 4.0585, | |
| "step": 364500 | |
| }, | |
| { | |
| "epoch": 1.9444266871231008, | |
| "grad_norm": 0.865616500377655, | |
| "learning_rate": 0.00035372461865555574, | |
| "loss": 4.0633, | |
| "step": 365000 | |
| }, | |
| { | |
| "epoch": 1.9470902853246392, | |
| "grad_norm": 0.8474301099777222, | |
| "learning_rate": 0.0003528327934818282, | |
| "loss": 4.0602, | |
| "step": 365500 | |
| }, | |
| { | |
| "epoch": 1.9497538835261778, | |
| "grad_norm": 0.8580695986747742, | |
| "learning_rate": 0.0003519427519584481, | |
| "loss": 4.0544, | |
| "step": 366000 | |
| }, | |
| { | |
| "epoch": 1.9524174817277165, | |
| "grad_norm": 0.8627407550811768, | |
| "learning_rate": 0.0003510509267847206, | |
| "loss": 4.0481, | |
| "step": 366500 | |
| }, | |
| { | |
| "epoch": 1.9550810799292548, | |
| "grad_norm": 0.8328742384910583, | |
| "learning_rate": 0.000350159101610993, | |
| "loss": 4.0581, | |
| "step": 367000 | |
| }, | |
| { | |
| "epoch": 1.9577446781307932, | |
| "grad_norm": 0.8515557050704956, | |
| "learning_rate": 0.0003492672764372655, | |
| "loss": 4.06, | |
| "step": 367500 | |
| }, | |
| { | |
| "epoch": 1.9604082763323318, | |
| "grad_norm": 0.9069979786872864, | |
| "learning_rate": 0.00034837545126353793, | |
| "loss": 4.0602, | |
| "step": 368000 | |
| }, | |
| { | |
| "epoch": 1.9630718745338704, | |
| "grad_norm": 0.8612348437309265, | |
| "learning_rate": 0.0003474854097401578, | |
| "loss": 4.0565, | |
| "step": 368500 | |
| }, | |
| { | |
| "epoch": 1.9657354727354088, | |
| "grad_norm": 0.9286240339279175, | |
| "learning_rate": 0.0003465935845664303, | |
| "loss": 4.0605, | |
| "step": 369000 | |
| }, | |
| { | |
| "epoch": 1.9683990709369472, | |
| "grad_norm": 0.8804614543914795, | |
| "learning_rate": 0.00034570175939270276, | |
| "loss": 4.0575, | |
| "step": 369500 | |
| }, | |
| { | |
| "epoch": 1.9710626691384858, | |
| "grad_norm": 0.8332533836364746, | |
| "learning_rate": 0.0003448099342189752, | |
| "loss": 4.0587, | |
| "step": 370000 | |
| }, | |
| { | |
| "epoch": 1.9737262673400244, | |
| "grad_norm": 0.8402279615402222, | |
| "learning_rate": 0.0003439198926955951, | |
| "loss": 4.0569, | |
| "step": 370500 | |
| }, | |
| { | |
| "epoch": 1.9763898655415628, | |
| "grad_norm": 0.8684757351875305, | |
| "learning_rate": 0.00034302806752186754, | |
| "loss": 4.0668, | |
| "step": 371000 | |
| }, | |
| { | |
| "epoch": 1.9790534637431012, | |
| "grad_norm": 0.880416750907898, | |
| "learning_rate": 0.00034213624234814003, | |
| "loss": 4.0612, | |
| "step": 371500 | |
| }, | |
| { | |
| "epoch": 1.9817170619446398, | |
| "grad_norm": 0.9281913042068481, | |
| "learning_rate": 0.00034124441717441246, | |
| "loss": 4.0583, | |
| "step": 372000 | |
| }, | |
| { | |
| "epoch": 1.9843806601461784, | |
| "grad_norm": 0.8712506294250488, | |
| "learning_rate": 0.0003403525920006849, | |
| "loss": 4.0539, | |
| "step": 372500 | |
| }, | |
| { | |
| "epoch": 1.9870442583477168, | |
| "grad_norm": 0.8760526180267334, | |
| "learning_rate": 0.00033946255047730486, | |
| "loss": 4.0502, | |
| "step": 373000 | |
| }, | |
| { | |
| "epoch": 1.9897078565492552, | |
| "grad_norm": 0.8705692291259766, | |
| "learning_rate": 0.0003385707253035773, | |
| "loss": 4.0592, | |
| "step": 373500 | |
| }, | |
| { | |
| "epoch": 1.9923714547507938, | |
| "grad_norm": 0.8519155383110046, | |
| "learning_rate": 0.00033767890012984973, | |
| "loss": 4.0607, | |
| "step": 374000 | |
| }, | |
| { | |
| "epoch": 1.9950350529523324, | |
| "grad_norm": 0.879636287689209, | |
| "learning_rate": 0.0003367870749561222, | |
| "loss": 4.0566, | |
| "step": 374500 | |
| }, | |
| { | |
| "epoch": 1.9976986511538706, | |
| "grad_norm": 0.8572770357131958, | |
| "learning_rate": 0.00033589703343274213, | |
| "loss": 4.0504, | |
| "step": 375000 | |
| }, | |
| { | |
| "epoch": 2.000362249355409, | |
| "grad_norm": 0.8497179746627808, | |
| "learning_rate": 0.0003350052082590146, | |
| "loss": 4.0603, | |
| "step": 375500 | |
| }, | |
| { | |
| "epoch": 2.003025847556948, | |
| "grad_norm": 0.8854038715362549, | |
| "learning_rate": 0.00033411338308528705, | |
| "loss": 4.055, | |
| "step": 376000 | |
| }, | |
| { | |
| "epoch": 2.0056894457584864, | |
| "grad_norm": 0.9853951334953308, | |
| "learning_rate": 0.0003332215579115595, | |
| "loss": 4.057, | |
| "step": 376500 | |
| }, | |
| { | |
| "epoch": 2.0083530439600246, | |
| "grad_norm": 0.9749231934547424, | |
| "learning_rate": 0.0003323315163881794, | |
| "loss": 4.0497, | |
| "step": 377000 | |
| }, | |
| { | |
| "epoch": 2.011016642161563, | |
| "grad_norm": 0.9801936745643616, | |
| "learning_rate": 0.00033143969121445183, | |
| "loss": 4.0609, | |
| "step": 377500 | |
| }, | |
| { | |
| "epoch": 2.013680240363102, | |
| "grad_norm": 0.9140198826789856, | |
| "learning_rate": 0.0003305478660407243, | |
| "loss": 4.0491, | |
| "step": 378000 | |
| }, | |
| { | |
| "epoch": 2.0163438385646404, | |
| "grad_norm": 0.9118580222129822, | |
| "learning_rate": 0.00032965604086699675, | |
| "loss": 4.0484, | |
| "step": 378500 | |
| }, | |
| { | |
| "epoch": 2.0190074367661786, | |
| "grad_norm": 1.0234750509262085, | |
| "learning_rate": 0.0003287642156932692, | |
| "loss": 4.0466, | |
| "step": 379000 | |
| }, | |
| { | |
| "epoch": 2.021671034967717, | |
| "grad_norm": 0.8892688751220703, | |
| "learning_rate": 0.00032787239051954167, | |
| "loss": 4.0569, | |
| "step": 379500 | |
| }, | |
| { | |
| "epoch": 2.0243346331692558, | |
| "grad_norm": 0.860365092754364, | |
| "learning_rate": 0.0003269823489961616, | |
| "loss": 4.0592, | |
| "step": 380000 | |
| }, | |
| { | |
| "epoch": 2.0269982313707944, | |
| "grad_norm": 0.8938810229301453, | |
| "learning_rate": 0.000326090523822434, | |
| "loss": 4.0523, | |
| "step": 380500 | |
| }, | |
| { | |
| "epoch": 2.0296618295723325, | |
| "grad_norm": 0.885435163974762, | |
| "learning_rate": 0.0003251986986487065, | |
| "loss": 4.0574, | |
| "step": 381000 | |
| }, | |
| { | |
| "epoch": 2.032325427773871, | |
| "grad_norm": 0.9123975038528442, | |
| "learning_rate": 0.00032430687347497893, | |
| "loss": 4.046, | |
| "step": 381500 | |
| }, | |
| { | |
| "epoch": 2.0349890259754098, | |
| "grad_norm": 0.9096443057060242, | |
| "learning_rate": 0.0003234168319515989, | |
| "loss": 4.0551, | |
| "step": 382000 | |
| }, | |
| { | |
| "epoch": 2.0376526241769484, | |
| "grad_norm": 0.8680484890937805, | |
| "learning_rate": 0.00032252500677787133, | |
| "loss": 4.0532, | |
| "step": 382500 | |
| }, | |
| { | |
| "epoch": 2.0403162223784865, | |
| "grad_norm": 0.8725469708442688, | |
| "learning_rate": 0.00032163318160414377, | |
| "loss": 4.0563, | |
| "step": 383000 | |
| }, | |
| { | |
| "epoch": 2.042979820580025, | |
| "grad_norm": 0.9647555947303772, | |
| "learning_rate": 0.00032074135643041625, | |
| "loss": 4.0536, | |
| "step": 383500 | |
| }, | |
| { | |
| "epoch": 2.0456434187815637, | |
| "grad_norm": 0.8826559782028198, | |
| "learning_rate": 0.0003198495312566887, | |
| "loss": 4.0527, | |
| "step": 384000 | |
| }, | |
| { | |
| "epoch": 2.0483070169831024, | |
| "grad_norm": 0.9342438578605652, | |
| "learning_rate": 0.0003189594897333086, | |
| "loss": 4.0607, | |
| "step": 384500 | |
| }, | |
| { | |
| "epoch": 2.0509706151846405, | |
| "grad_norm": 0.9360005855560303, | |
| "learning_rate": 0.0003180676645595811, | |
| "loss": 4.0472, | |
| "step": 385000 | |
| }, | |
| { | |
| "epoch": 2.053634213386179, | |
| "grad_norm": 0.9147686958312988, | |
| "learning_rate": 0.0003171758393858535, | |
| "loss": 4.0485, | |
| "step": 385500 | |
| }, | |
| { | |
| "epoch": 2.0562978115877177, | |
| "grad_norm": 0.8479260206222534, | |
| "learning_rate": 0.000316284014212126, | |
| "loss": 4.0504, | |
| "step": 386000 | |
| }, | |
| { | |
| "epoch": 2.058961409789256, | |
| "grad_norm": 0.8525492548942566, | |
| "learning_rate": 0.00031539218903839844, | |
| "loss": 4.0496, | |
| "step": 386500 | |
| }, | |
| { | |
| "epoch": 2.0616250079907945, | |
| "grad_norm": 0.8503657579421997, | |
| "learning_rate": 0.0003145021475150183, | |
| "loss": 4.0571, | |
| "step": 387000 | |
| }, | |
| { | |
| "epoch": 2.064288606192333, | |
| "grad_norm": 0.8873237371444702, | |
| "learning_rate": 0.0003136103223412908, | |
| "loss": 4.0511, | |
| "step": 387500 | |
| }, | |
| { | |
| "epoch": 2.0669522043938717, | |
| "grad_norm": 0.9111925959587097, | |
| "learning_rate": 0.0003127184971675632, | |
| "loss": 4.0477, | |
| "step": 388000 | |
| }, | |
| { | |
| "epoch": 2.06961580259541, | |
| "grad_norm": 0.864146888256073, | |
| "learning_rate": 0.0003118266719938357, | |
| "loss": 4.0526, | |
| "step": 388500 | |
| }, | |
| { | |
| "epoch": 2.0722794007969485, | |
| "grad_norm": 0.8477506637573242, | |
| "learning_rate": 0.00031093484682010814, | |
| "loss": 4.054, | |
| "step": 389000 | |
| }, | |
| { | |
| "epoch": 2.074942998998487, | |
| "grad_norm": 0.9023974537849426, | |
| "learning_rate": 0.00031004480529672805, | |
| "loss": 4.0579, | |
| "step": 389500 | |
| }, | |
| { | |
| "epoch": 2.0776065972000257, | |
| "grad_norm": 0.8909152150154114, | |
| "learning_rate": 0.00030915298012300054, | |
| "loss": 4.0521, | |
| "step": 390000 | |
| }, | |
| { | |
| "epoch": 2.080270195401564, | |
| "grad_norm": 0.9014437794685364, | |
| "learning_rate": 0.00030826115494927297, | |
| "loss": 4.0553, | |
| "step": 390500 | |
| }, | |
| { | |
| "epoch": 2.0829337936031025, | |
| "grad_norm": 0.8972243666648865, | |
| "learning_rate": 0.00030736932977554546, | |
| "loss": 4.0507, | |
| "step": 391000 | |
| }, | |
| { | |
| "epoch": 2.085597391804641, | |
| "grad_norm": 0.8825047016143799, | |
| "learning_rate": 0.0003064792882521654, | |
| "loss": 4.0526, | |
| "step": 391500 | |
| }, | |
| { | |
| "epoch": 2.0882609900061797, | |
| "grad_norm": 0.924751341342926, | |
| "learning_rate": 0.0003055874630784378, | |
| "loss": 4.0521, | |
| "step": 392000 | |
| }, | |
| { | |
| "epoch": 2.090924588207718, | |
| "grad_norm": 0.8999988436698914, | |
| "learning_rate": 0.0003046956379047103, | |
| "loss": 4.0524, | |
| "step": 392500 | |
| }, | |
| { | |
| "epoch": 2.0935881864092565, | |
| "grad_norm": 0.8595131635665894, | |
| "learning_rate": 0.0003038038127309827, | |
| "loss": 4.0519, | |
| "step": 393000 | |
| }, | |
| { | |
| "epoch": 2.096251784610795, | |
| "grad_norm": 0.9281662106513977, | |
| "learning_rate": 0.00030291377120760264, | |
| "loss": 4.0489, | |
| "step": 393500 | |
| }, | |
| { | |
| "epoch": 2.0989153828123337, | |
| "grad_norm": 0.8841512799263, | |
| "learning_rate": 0.0003020219460338751, | |
| "loss": 4.0504, | |
| "step": 394000 | |
| }, | |
| { | |
| "epoch": 2.101578981013872, | |
| "grad_norm": 0.8970746994018555, | |
| "learning_rate": 0.00030113012086014756, | |
| "loss": 4.0453, | |
| "step": 394500 | |
| }, | |
| { | |
| "epoch": 2.1042425792154105, | |
| "grad_norm": 0.946937084197998, | |
| "learning_rate": 0.00030023829568642005, | |
| "loss": 4.0443, | |
| "step": 395000 | |
| }, | |
| { | |
| "epoch": 2.106906177416949, | |
| "grad_norm": 1.066956877708435, | |
| "learning_rate": 0.0002993482541630399, | |
| "loss": 4.0591, | |
| "step": 395500 | |
| }, | |
| { | |
| "epoch": 2.1095697756184877, | |
| "grad_norm": 0.8527683615684509, | |
| "learning_rate": 0.00029845642898931234, | |
| "loss": 4.0498, | |
| "step": 396000 | |
| }, | |
| { | |
| "epoch": 2.112233373820026, | |
| "grad_norm": 0.9100342988967896, | |
| "learning_rate": 0.0002975646038155848, | |
| "loss": 4.0463, | |
| "step": 396500 | |
| }, | |
| { | |
| "epoch": 2.1148969720215645, | |
| "grad_norm": 0.9486255645751953, | |
| "learning_rate": 0.00029667277864185726, | |
| "loss": 4.0541, | |
| "step": 397000 | |
| }, | |
| { | |
| "epoch": 2.117560570223103, | |
| "grad_norm": 0.9460600018501282, | |
| "learning_rate": 0.00029578273711847717, | |
| "loss": 4.0481, | |
| "step": 397500 | |
| }, | |
| { | |
| "epoch": 2.1202241684246417, | |
| "grad_norm": 0.9710919857025146, | |
| "learning_rate": 0.00029489091194474966, | |
| "loss": 4.0486, | |
| "step": 398000 | |
| }, | |
| { | |
| "epoch": 2.12288776662618, | |
| "grad_norm": 0.9194395542144775, | |
| "learning_rate": 0.0002939990867710221, | |
| "loss": 4.0458, | |
| "step": 398500 | |
| }, | |
| { | |
| "epoch": 2.1255513648277184, | |
| "grad_norm": 0.8708109855651855, | |
| "learning_rate": 0.0002931072615972946, | |
| "loss": 4.0465, | |
| "step": 399000 | |
| }, | |
| { | |
| "epoch": 2.128214963029257, | |
| "grad_norm": 0.8814635276794434, | |
| "learning_rate": 0.0002922172200739145, | |
| "loss": 4.0441, | |
| "step": 399500 | |
| }, | |
| { | |
| "epoch": 2.1308785612307957, | |
| "grad_norm": 0.9306267499923706, | |
| "learning_rate": 0.0002913253949001869, | |
| "loss": 4.0417, | |
| "step": 400000 | |
| }, | |
| { | |
| "epoch": 2.133542159432334, | |
| "grad_norm": 0.9086319208145142, | |
| "learning_rate": 0.0002904335697264594, | |
| "loss": 4.0485, | |
| "step": 400500 | |
| }, | |
| { | |
| "epoch": 2.1362057576338724, | |
| "grad_norm": 0.9667945504188538, | |
| "learning_rate": 0.00028954174455273184, | |
| "loss": 4.0387, | |
| "step": 401000 | |
| }, | |
| { | |
| "epoch": 2.138869355835411, | |
| "grad_norm": 0.9225121736526489, | |
| "learning_rate": 0.00028864991937900433, | |
| "loss": 4.0424, | |
| "step": 401500 | |
| }, | |
| { | |
| "epoch": 2.1415329540369497, | |
| "grad_norm": 0.891379714012146, | |
| "learning_rate": 0.0002877598778556242, | |
| "loss": 4.046, | |
| "step": 402000 | |
| }, | |
| { | |
| "epoch": 2.144196552238488, | |
| "grad_norm": 0.9507352709770203, | |
| "learning_rate": 0.0002868680526818966, | |
| "loss": 4.0477, | |
| "step": 402500 | |
| }, | |
| { | |
| "epoch": 2.1468601504400264, | |
| "grad_norm": 0.9602506756782532, | |
| "learning_rate": 0.00028597622750816917, | |
| "loss": 4.0498, | |
| "step": 403000 | |
| }, | |
| { | |
| "epoch": 2.149523748641565, | |
| "grad_norm": 0.9250164031982422, | |
| "learning_rate": 0.0002850844023344416, | |
| "loss": 4.0404, | |
| "step": 403500 | |
| }, | |
| { | |
| "epoch": 2.152187346843103, | |
| "grad_norm": 0.917396605014801, | |
| "learning_rate": 0.00028419436081106146, | |
| "loss": 4.0488, | |
| "step": 404000 | |
| }, | |
| { | |
| "epoch": 2.154850945044642, | |
| "grad_norm": 0.8889843821525574, | |
| "learning_rate": 0.00028330253563733395, | |
| "loss": 4.0412, | |
| "step": 404500 | |
| }, | |
| { | |
| "epoch": 2.1575145432461804, | |
| "grad_norm": 0.9360488653182983, | |
| "learning_rate": 0.0002824107104636064, | |
| "loss": 4.0407, | |
| "step": 405000 | |
| }, | |
| { | |
| "epoch": 2.160178141447719, | |
| "grad_norm": 0.9107580184936523, | |
| "learning_rate": 0.00028151888528987886, | |
| "loss": 4.0439, | |
| "step": 405500 | |
| }, | |
| { | |
| "epoch": 2.162841739649257, | |
| "grad_norm": 0.9053534865379333, | |
| "learning_rate": 0.0002806270601161513, | |
| "loss": 4.042, | |
| "step": 406000 | |
| }, | |
| { | |
| "epoch": 2.165505337850796, | |
| "grad_norm": 0.8875529766082764, | |
| "learning_rate": 0.0002797370185927712, | |
| "loss": 4.0429, | |
| "step": 406500 | |
| }, | |
| { | |
| "epoch": 2.1681689360523344, | |
| "grad_norm": 0.9056974053382874, | |
| "learning_rate": 0.0002788451934190437, | |
| "loss": 4.0461, | |
| "step": 407000 | |
| }, | |
| { | |
| "epoch": 2.170832534253873, | |
| "grad_norm": 0.8870306015014648, | |
| "learning_rate": 0.00027795336824531613, | |
| "loss": 4.0473, | |
| "step": 407500 | |
| }, | |
| { | |
| "epoch": 2.173496132455411, | |
| "grad_norm": 0.9122534394264221, | |
| "learning_rate": 0.0002770615430715886, | |
| "loss": 4.0423, | |
| "step": 408000 | |
| }, | |
| { | |
| "epoch": 2.17615973065695, | |
| "grad_norm": 0.8884118795394897, | |
| "learning_rate": 0.00027617150154820853, | |
| "loss": 4.0455, | |
| "step": 408500 | |
| }, | |
| { | |
| "epoch": 2.1788233288584884, | |
| "grad_norm": 0.8788624405860901, | |
| "learning_rate": 0.00027527967637448096, | |
| "loss": 4.0396, | |
| "step": 409000 | |
| }, | |
| { | |
| "epoch": 2.181486927060027, | |
| "grad_norm": 0.9050582647323608, | |
| "learning_rate": 0.00027438785120075345, | |
| "loss": 4.0364, | |
| "step": 409500 | |
| }, | |
| { | |
| "epoch": 2.184150525261565, | |
| "grad_norm": 0.9116672277450562, | |
| "learning_rate": 0.0002734960260270259, | |
| "loss": 4.0479, | |
| "step": 410000 | |
| }, | |
| { | |
| "epoch": 2.1868141234631038, | |
| "grad_norm": 0.8476006984710693, | |
| "learning_rate": 0.00027260420085329837, | |
| "loss": 4.0407, | |
| "step": 410500 | |
| }, | |
| { | |
| "epoch": 2.1894777216646424, | |
| "grad_norm": 0.9175940752029419, | |
| "learning_rate": 0.00027171415932991823, | |
| "loss": 4.0469, | |
| "step": 411000 | |
| }, | |
| { | |
| "epoch": 2.192141319866181, | |
| "grad_norm": 0.9391987919807434, | |
| "learning_rate": 0.00027082233415619066, | |
| "loss": 4.0477, | |
| "step": 411500 | |
| }, | |
| { | |
| "epoch": 2.194804918067719, | |
| "grad_norm": 0.880539059638977, | |
| "learning_rate": 0.00026993050898246315, | |
| "loss": 4.0483, | |
| "step": 412000 | |
| }, | |
| { | |
| "epoch": 2.1974685162692578, | |
| "grad_norm": 0.9159991145133972, | |
| "learning_rate": 0.0002690386838087356, | |
| "loss": 4.0439, | |
| "step": 412500 | |
| }, | |
| { | |
| "epoch": 2.2001321144707964, | |
| "grad_norm": 0.846324622631073, | |
| "learning_rate": 0.0002681486422853555, | |
| "loss": 4.0491, | |
| "step": 413000 | |
| }, | |
| { | |
| "epoch": 2.202795712672335, | |
| "grad_norm": 0.9291318655014038, | |
| "learning_rate": 0.000267256817111628, | |
| "loss": 4.0433, | |
| "step": 413500 | |
| }, | |
| { | |
| "epoch": 2.205459310873873, | |
| "grad_norm": 0.9299983978271484, | |
| "learning_rate": 0.0002663649919379004, | |
| "loss": 4.039, | |
| "step": 414000 | |
| }, | |
| { | |
| "epoch": 2.2081229090754118, | |
| "grad_norm": 0.9034929275512695, | |
| "learning_rate": 0.0002654731667641729, | |
| "loss": 4.0426, | |
| "step": 414500 | |
| }, | |
| { | |
| "epoch": 2.2107865072769504, | |
| "grad_norm": 0.8487489223480225, | |
| "learning_rate": 0.0002645831252407928, | |
| "loss": 4.0382, | |
| "step": 415000 | |
| }, | |
| { | |
| "epoch": 2.213450105478489, | |
| "grad_norm": 0.9376189112663269, | |
| "learning_rate": 0.00026369130006706525, | |
| "loss": 4.0478, | |
| "step": 415500 | |
| }, | |
| { | |
| "epoch": 2.216113703680027, | |
| "grad_norm": 0.9032031297683716, | |
| "learning_rate": 0.00026279947489333774, | |
| "loss": 4.0446, | |
| "step": 416000 | |
| }, | |
| { | |
| "epoch": 2.2187773018815657, | |
| "grad_norm": 0.873349666595459, | |
| "learning_rate": 0.00026190764971961017, | |
| "loss": 4.0419, | |
| "step": 416500 | |
| }, | |
| { | |
| "epoch": 2.2214409000831044, | |
| "grad_norm": 0.9227972626686096, | |
| "learning_rate": 0.0002610176081962301, | |
| "loss": 4.0415, | |
| "step": 417000 | |
| }, | |
| { | |
| "epoch": 2.224104498284643, | |
| "grad_norm": 0.9360315203666687, | |
| "learning_rate": 0.00026012578302250257, | |
| "loss": 4.0391, | |
| "step": 417500 | |
| }, | |
| { | |
| "epoch": 2.226768096486181, | |
| "grad_norm": 1.0437467098236084, | |
| "learning_rate": 0.000259233957848775, | |
| "loss": 4.0425, | |
| "step": 418000 | |
| }, | |
| { | |
| "epoch": 2.2294316946877197, | |
| "grad_norm": 0.9248673319816589, | |
| "learning_rate": 0.0002583421326750475, | |
| "loss": 4.0413, | |
| "step": 418500 | |
| }, | |
| { | |
| "epoch": 2.2320952928892583, | |
| "grad_norm": 0.8973048329353333, | |
| "learning_rate": 0.00025745209115166735, | |
| "loss": 4.0411, | |
| "step": 419000 | |
| }, | |
| { | |
| "epoch": 2.234758891090797, | |
| "grad_norm": 0.9082027077674866, | |
| "learning_rate": 0.0002565602659779398, | |
| "loss": 4.0424, | |
| "step": 419500 | |
| }, | |
| { | |
| "epoch": 2.237422489292335, | |
| "grad_norm": 0.8980434536933899, | |
| "learning_rate": 0.00025566844080421227, | |
| "loss": 4.0389, | |
| "step": 420000 | |
| }, | |
| { | |
| "epoch": 2.2400860874938737, | |
| "grad_norm": 0.8749063014984131, | |
| "learning_rate": 0.0002547766156304847, | |
| "loss": 4.0283, | |
| "step": 420500 | |
| }, | |
| { | |
| "epoch": 2.2427496856954123, | |
| "grad_norm": 0.9931572675704956, | |
| "learning_rate": 0.0002538865741071046, | |
| "loss": 4.0411, | |
| "step": 421000 | |
| }, | |
| { | |
| "epoch": 2.2454132838969505, | |
| "grad_norm": 1.0000332593917847, | |
| "learning_rate": 0.0002529947489333771, | |
| "loss": 4.0426, | |
| "step": 421500 | |
| }, | |
| { | |
| "epoch": 2.248076882098489, | |
| "grad_norm": 0.8988611698150635, | |
| "learning_rate": 0.00025210292375964954, | |
| "loss": 4.0401, | |
| "step": 422000 | |
| }, | |
| { | |
| "epoch": 2.2507404803000277, | |
| "grad_norm": 0.9371945261955261, | |
| "learning_rate": 0.000251211098585922, | |
| "loss": 4.0367, | |
| "step": 422500 | |
| }, | |
| { | |
| "epoch": 2.2534040785015663, | |
| "grad_norm": 0.9270386099815369, | |
| "learning_rate": 0.00025031927341219446, | |
| "loss": 4.0481, | |
| "step": 423000 | |
| }, | |
| { | |
| "epoch": 2.256067676703105, | |
| "grad_norm": 0.964900553226471, | |
| "learning_rate": 0.00024942923188881437, | |
| "loss": 4.0381, | |
| "step": 423500 | |
| }, | |
| { | |
| "epoch": 2.258731274904643, | |
| "grad_norm": 0.8744553923606873, | |
| "learning_rate": 0.00024853740671508686, | |
| "loss": 4.0375, | |
| "step": 424000 | |
| }, | |
| { | |
| "epoch": 2.2613948731061817, | |
| "grad_norm": 0.9299191236495972, | |
| "learning_rate": 0.0002476455815413593, | |
| "loss": 4.036, | |
| "step": 424500 | |
| }, | |
| { | |
| "epoch": 2.2640584713077203, | |
| "grad_norm": 0.9264661073684692, | |
| "learning_rate": 0.0002467537563676318, | |
| "loss": 4.04, | |
| "step": 425000 | |
| }, | |
| { | |
| "epoch": 2.2667220695092585, | |
| "grad_norm": 0.9486096501350403, | |
| "learning_rate": 0.00024586371484425164, | |
| "loss": 4.0362, | |
| "step": 425500 | |
| }, | |
| { | |
| "epoch": 2.269385667710797, | |
| "grad_norm": 0.9084232449531555, | |
| "learning_rate": 0.0002449718896705241, | |
| "loss": 4.0442, | |
| "step": 426000 | |
| }, | |
| { | |
| "epoch": 2.2720492659123357, | |
| "grad_norm": 0.898169755935669, | |
| "learning_rate": 0.00024408006449679656, | |
| "loss": 4.04, | |
| "step": 426500 | |
| }, | |
| { | |
| "epoch": 2.2747128641138743, | |
| "grad_norm": 0.9344006180763245, | |
| "learning_rate": 0.00024318823932306902, | |
| "loss": 4.0393, | |
| "step": 427000 | |
| }, | |
| { | |
| "epoch": 2.2773764623154125, | |
| "grad_norm": 0.9698314666748047, | |
| "learning_rate": 0.00024229641414934147, | |
| "loss": 4.0293, | |
| "step": 427500 | |
| }, | |
| { | |
| "epoch": 2.280040060516951, | |
| "grad_norm": 0.9501084685325623, | |
| "learning_rate": 0.0002414063726259614, | |
| "loss": 4.038, | |
| "step": 428000 | |
| }, | |
| { | |
| "epoch": 2.2827036587184897, | |
| "grad_norm": 0.8912844061851501, | |
| "learning_rate": 0.00024051454745223385, | |
| "loss": 4.0374, | |
| "step": 428500 | |
| }, | |
| { | |
| "epoch": 2.2853672569200283, | |
| "grad_norm": 0.9317381978034973, | |
| "learning_rate": 0.0002396227222785063, | |
| "loss": 4.0353, | |
| "step": 429000 | |
| }, | |
| { | |
| "epoch": 2.2880308551215665, | |
| "grad_norm": 0.9316912889480591, | |
| "learning_rate": 0.00023873089710477877, | |
| "loss": 4.0383, | |
| "step": 429500 | |
| }, | |
| { | |
| "epoch": 2.290694453323105, | |
| "grad_norm": 0.9433039426803589, | |
| "learning_rate": 0.00023784085558139868, | |
| "loss": 4.0332, | |
| "step": 430000 | |
| }, | |
| { | |
| "epoch": 2.2933580515246437, | |
| "grad_norm": 0.9455925226211548, | |
| "learning_rate": 0.00023694903040767112, | |
| "loss": 4.0326, | |
| "step": 430500 | |
| }, | |
| { | |
| "epoch": 2.2960216497261823, | |
| "grad_norm": 0.9149669408798218, | |
| "learning_rate": 0.00023605720523394358, | |
| "loss": 4.0442, | |
| "step": 431000 | |
| }, | |
| { | |
| "epoch": 2.2986852479277204, | |
| "grad_norm": 0.9723134636878967, | |
| "learning_rate": 0.00023516538006021603, | |
| "loss": 4.0313, | |
| "step": 431500 | |
| }, | |
| { | |
| "epoch": 2.301348846129259, | |
| "grad_norm": 0.9359349012374878, | |
| "learning_rate": 0.00023427533853683595, | |
| "loss": 4.0369, | |
| "step": 432000 | |
| }, | |
| { | |
| "epoch": 2.3040124443307977, | |
| "grad_norm": 0.9478726983070374, | |
| "learning_rate": 0.0002333835133631084, | |
| "loss": 4.0386, | |
| "step": 432500 | |
| }, | |
| { | |
| "epoch": 2.3066760425323363, | |
| "grad_norm": 0.9433446526527405, | |
| "learning_rate": 0.00023249168818938084, | |
| "loss": 4.0334, | |
| "step": 433000 | |
| }, | |
| { | |
| "epoch": 2.3093396407338744, | |
| "grad_norm": 0.9548355340957642, | |
| "learning_rate": 0.00023159986301565333, | |
| "loss": 4.0404, | |
| "step": 433500 | |
| }, | |
| { | |
| "epoch": 2.312003238935413, | |
| "grad_norm": 1.014600157737732, | |
| "learning_rate": 0.0002307080378419258, | |
| "loss": 4.0337, | |
| "step": 434000 | |
| }, | |
| { | |
| "epoch": 2.3146668371369516, | |
| "grad_norm": 0.8967020511627197, | |
| "learning_rate": 0.0002298179963185457, | |
| "loss": 4.0343, | |
| "step": 434500 | |
| }, | |
| { | |
| "epoch": 2.31733043533849, | |
| "grad_norm": 1.0393925905227661, | |
| "learning_rate": 0.00022892617114481814, | |
| "loss": 4.0354, | |
| "step": 435000 | |
| }, | |
| { | |
| "epoch": 2.3199940335400284, | |
| "grad_norm": 0.9963262677192688, | |
| "learning_rate": 0.0002280343459710906, | |
| "loss": 4.0358, | |
| "step": 435500 | |
| }, | |
| { | |
| "epoch": 2.322657631741567, | |
| "grad_norm": 0.9155731797218323, | |
| "learning_rate": 0.00022714252079736305, | |
| "loss": 4.0372, | |
| "step": 436000 | |
| }, | |
| { | |
| "epoch": 2.3253212299431056, | |
| "grad_norm": 0.9272859692573547, | |
| "learning_rate": 0.00022625247927398297, | |
| "loss": 4.04, | |
| "step": 436500 | |
| }, | |
| { | |
| "epoch": 2.3279848281446442, | |
| "grad_norm": 0.9763675928115845, | |
| "learning_rate": 0.0002253606541002554, | |
| "loss": 4.0312, | |
| "step": 437000 | |
| }, | |
| { | |
| "epoch": 2.3306484263461824, | |
| "grad_norm": 0.9596668481826782, | |
| "learning_rate": 0.00022446882892652786, | |
| "loss": 4.0337, | |
| "step": 437500 | |
| }, | |
| { | |
| "epoch": 2.333312024547721, | |
| "grad_norm": 0.9284877777099609, | |
| "learning_rate": 0.00022357700375280032, | |
| "loss": 4.0386, | |
| "step": 438000 | |
| }, | |
| { | |
| "epoch": 2.3359756227492596, | |
| "grad_norm": 0.9726400971412659, | |
| "learning_rate": 0.00022268696222942026, | |
| "loss": 4.0354, | |
| "step": 438500 | |
| }, | |
| { | |
| "epoch": 2.338639220950798, | |
| "grad_norm": 0.9305101037025452, | |
| "learning_rate": 0.0002217951370556927, | |
| "loss": 4.0213, | |
| "step": 439000 | |
| }, | |
| { | |
| "epoch": 2.3413028191523364, | |
| "grad_norm": 0.9207624793052673, | |
| "learning_rate": 0.00022090331188196515, | |
| "loss": 4.0388, | |
| "step": 439500 | |
| }, | |
| { | |
| "epoch": 2.343966417353875, | |
| "grad_norm": 0.940703809261322, | |
| "learning_rate": 0.00022001148670823761, | |
| "loss": 4.0303, | |
| "step": 440000 | |
| }, | |
| { | |
| "epoch": 2.3466300155554136, | |
| "grad_norm": 1.0912624597549438, | |
| "learning_rate": 0.00021912144518485753, | |
| "loss": 4.0319, | |
| "step": 440500 | |
| }, | |
| { | |
| "epoch": 2.3492936137569522, | |
| "grad_norm": 0.9056357145309448, | |
| "learning_rate": 0.00021822962001113, | |
| "loss": 4.0326, | |
| "step": 441000 | |
| }, | |
| { | |
| "epoch": 2.3519572119584904, | |
| "grad_norm": 0.891265332698822, | |
| "learning_rate": 0.00021733779483740242, | |
| "loss": 4.0398, | |
| "step": 441500 | |
| }, | |
| { | |
| "epoch": 2.354620810160029, | |
| "grad_norm": 0.9790766835212708, | |
| "learning_rate": 0.00021644596966367488, | |
| "loss": 4.0352, | |
| "step": 442000 | |
| }, | |
| { | |
| "epoch": 2.3572844083615676, | |
| "grad_norm": 0.9584769010543823, | |
| "learning_rate": 0.00021555414448994734, | |
| "loss": 4.0393, | |
| "step": 442500 | |
| }, | |
| { | |
| "epoch": 2.3599480065631058, | |
| "grad_norm": 0.9171414971351624, | |
| "learning_rate": 0.00021466410296656728, | |
| "loss": 4.0384, | |
| "step": 443000 | |
| }, | |
| { | |
| "epoch": 2.3626116047646444, | |
| "grad_norm": 0.9353621006011963, | |
| "learning_rate": 0.00021377227779283972, | |
| "loss": 4.0247, | |
| "step": 443500 | |
| }, | |
| { | |
| "epoch": 2.365275202966183, | |
| "grad_norm": 1.1184170246124268, | |
| "learning_rate": 0.00021288045261911217, | |
| "loss": 4.0374, | |
| "step": 444000 | |
| }, | |
| { | |
| "epoch": 2.3679388011677216, | |
| "grad_norm": 0.9417023062705994, | |
| "learning_rate": 0.00021198862744538463, | |
| "loss": 4.0279, | |
| "step": 444500 | |
| }, | |
| { | |
| "epoch": 2.3706023993692598, | |
| "grad_norm": 1.0378462076187134, | |
| "learning_rate": 0.00021109858592200455, | |
| "loss": 4.0357, | |
| "step": 445000 | |
| }, | |
| { | |
| "epoch": 2.3732659975707984, | |
| "grad_norm": 0.9642356634140015, | |
| "learning_rate": 0.00021020676074827698, | |
| "loss": 4.0334, | |
| "step": 445500 | |
| }, | |
| { | |
| "epoch": 2.375929595772337, | |
| "grad_norm": 0.970891535282135, | |
| "learning_rate": 0.00020931493557454944, | |
| "loss": 4.025, | |
| "step": 446000 | |
| }, | |
| { | |
| "epoch": 2.3785931939738756, | |
| "grad_norm": 0.9346612691879272, | |
| "learning_rate": 0.0002084231104008219, | |
| "loss": 4.0255, | |
| "step": 446500 | |
| }, | |
| { | |
| "epoch": 2.3812567921754138, | |
| "grad_norm": 0.9348496794700623, | |
| "learning_rate": 0.00020753128522709436, | |
| "loss": 4.0305, | |
| "step": 447000 | |
| }, | |
| { | |
| "epoch": 2.3839203903769524, | |
| "grad_norm": 0.9465219974517822, | |
| "learning_rate": 0.00020664124370371428, | |
| "loss": 4.0279, | |
| "step": 447500 | |
| }, | |
| { | |
| "epoch": 2.386583988578491, | |
| "grad_norm": 0.9686950445175171, | |
| "learning_rate": 0.00020574941852998673, | |
| "loss": 4.038, | |
| "step": 448000 | |
| }, | |
| { | |
| "epoch": 2.3892475867800296, | |
| "grad_norm": 0.8983688354492188, | |
| "learning_rate": 0.0002048575933562592, | |
| "loss": 4.0302, | |
| "step": 448500 | |
| }, | |
| { | |
| "epoch": 2.3919111849815677, | |
| "grad_norm": 0.9491548538208008, | |
| "learning_rate": 0.00020396576818253165, | |
| "loss": 4.0302, | |
| "step": 449000 | |
| }, | |
| { | |
| "epoch": 2.3945747831831063, | |
| "grad_norm": 0.9248127341270447, | |
| "learning_rate": 0.00020307572665915154, | |
| "loss": 4.0338, | |
| "step": 449500 | |
| }, | |
| { | |
| "epoch": 2.397238381384645, | |
| "grad_norm": 0.9573125243186951, | |
| "learning_rate": 0.000202183901485424, | |
| "loss": 4.0337, | |
| "step": 450000 | |
| }, | |
| { | |
| "epoch": 2.3999019795861836, | |
| "grad_norm": 0.9655391573905945, | |
| "learning_rate": 0.00020129207631169646, | |
| "loss": 4.0338, | |
| "step": 450500 | |
| }, | |
| { | |
| "epoch": 2.4025655777877217, | |
| "grad_norm": 0.9134914875030518, | |
| "learning_rate": 0.00020040025113796892, | |
| "loss": 4.0241, | |
| "step": 451000 | |
| }, | |
| { | |
| "epoch": 2.4052291759892603, | |
| "grad_norm": 0.9635368585586548, | |
| "learning_rate": 0.00019951020961458886, | |
| "loss": 4.0357, | |
| "step": 451500 | |
| }, | |
| { | |
| "epoch": 2.407892774190799, | |
| "grad_norm": 0.9742798805236816, | |
| "learning_rate": 0.0001986183844408613, | |
| "loss": 4.0242, | |
| "step": 452000 | |
| }, | |
| { | |
| "epoch": 2.4105563723923376, | |
| "grad_norm": 0.9775349497795105, | |
| "learning_rate": 0.00019772655926713375, | |
| "loss": 4.0279, | |
| "step": 452500 | |
| }, | |
| { | |
| "epoch": 2.4132199705938757, | |
| "grad_norm": 0.9313619136810303, | |
| "learning_rate": 0.0001968347340934062, | |
| "loss": 4.03, | |
| "step": 453000 | |
| }, | |
| { | |
| "epoch": 2.4158835687954143, | |
| "grad_norm": 0.9796269536018372, | |
| "learning_rate": 0.00019594469257002613, | |
| "loss": 4.0254, | |
| "step": 453500 | |
| }, | |
| { | |
| "epoch": 2.418547166996953, | |
| "grad_norm": 0.9695695042610168, | |
| "learning_rate": 0.00019505286739629856, | |
| "loss": 4.0353, | |
| "step": 454000 | |
| }, | |
| { | |
| "epoch": 2.4212107651984915, | |
| "grad_norm": 0.9753876328468323, | |
| "learning_rate": 0.00019416104222257102, | |
| "loss": 4.0269, | |
| "step": 454500 | |
| }, | |
| { | |
| "epoch": 2.4238743634000297, | |
| "grad_norm": 0.9220411777496338, | |
| "learning_rate": 0.00019326921704884348, | |
| "loss": 4.0289, | |
| "step": 455000 | |
| }, | |
| { | |
| "epoch": 2.4265379616015683, | |
| "grad_norm": 0.9355341196060181, | |
| "learning_rate": 0.0001923791755254634, | |
| "loss": 4.0297, | |
| "step": 455500 | |
| }, | |
| { | |
| "epoch": 2.429201559803107, | |
| "grad_norm": 1.0068522691726685, | |
| "learning_rate": 0.00019148735035173583, | |
| "loss": 4.0332, | |
| "step": 456000 | |
| }, | |
| { | |
| "epoch": 2.431865158004645, | |
| "grad_norm": 0.9809306263923645, | |
| "learning_rate": 0.00019059552517800831, | |
| "loss": 4.025, | |
| "step": 456500 | |
| }, | |
| { | |
| "epoch": 2.4345287562061837, | |
| "grad_norm": 0.9140877723693848, | |
| "learning_rate": 0.00018970370000428077, | |
| "loss": 4.0237, | |
| "step": 457000 | |
| }, | |
| { | |
| "epoch": 2.4371923544077223, | |
| "grad_norm": 0.942362368106842, | |
| "learning_rate": 0.00018881187483055323, | |
| "loss": 4.0299, | |
| "step": 457500 | |
| }, | |
| { | |
| "epoch": 2.439855952609261, | |
| "grad_norm": 1.0030492544174194, | |
| "learning_rate": 0.00018792183330717312, | |
| "loss": 4.0241, | |
| "step": 458000 | |
| }, | |
| { | |
| "epoch": 2.4425195508107995, | |
| "grad_norm": 0.9555344581604004, | |
| "learning_rate": 0.00018703000813344558, | |
| "loss": 4.0269, | |
| "step": 458500 | |
| }, | |
| { | |
| "epoch": 2.4451831490123377, | |
| "grad_norm": 0.9068697690963745, | |
| "learning_rate": 0.00018613818295971804, | |
| "loss": 4.0273, | |
| "step": 459000 | |
| }, | |
| { | |
| "epoch": 2.4478467472138763, | |
| "grad_norm": 1.026928186416626, | |
| "learning_rate": 0.0001852463577859905, | |
| "loss": 4.0271, | |
| "step": 459500 | |
| }, | |
| { | |
| "epoch": 2.450510345415415, | |
| "grad_norm": 1.0138953924179077, | |
| "learning_rate": 0.00018435631626261041, | |
| "loss": 4.0273, | |
| "step": 460000 | |
| }, | |
| { | |
| "epoch": 2.453173943616953, | |
| "grad_norm": 0.9750286936759949, | |
| "learning_rate": 0.00018346449108888285, | |
| "loss": 4.0304, | |
| "step": 460500 | |
| }, | |
| { | |
| "epoch": 2.4558375418184917, | |
| "grad_norm": 0.9891506433486938, | |
| "learning_rate": 0.0001825726659151553, | |
| "loss": 4.028, | |
| "step": 461000 | |
| }, | |
| { | |
| "epoch": 2.4585011400200303, | |
| "grad_norm": 0.9331740140914917, | |
| "learning_rate": 0.00018168084074142777, | |
| "loss": 4.0259, | |
| "step": 461500 | |
| }, | |
| { | |
| "epoch": 2.461164738221569, | |
| "grad_norm": 0.9839907288551331, | |
| "learning_rate": 0.00018078901556770025, | |
| "loss": 4.0299, | |
| "step": 462000 | |
| }, | |
| { | |
| "epoch": 2.463828336423107, | |
| "grad_norm": 1.092699408531189, | |
| "learning_rate": 0.00017989897404432014, | |
| "loss": 4.0279, | |
| "step": 462500 | |
| }, | |
| { | |
| "epoch": 2.4664919346246457, | |
| "grad_norm": 0.9484713673591614, | |
| "learning_rate": 0.0001790071488705926, | |
| "loss": 4.0141, | |
| "step": 463000 | |
| }, | |
| { | |
| "epoch": 2.4691555328261843, | |
| "grad_norm": 0.9671944975852966, | |
| "learning_rate": 0.00017811532369686506, | |
| "loss": 4.0262, | |
| "step": 463500 | |
| }, | |
| { | |
| "epoch": 2.471819131027723, | |
| "grad_norm": 0.9488347172737122, | |
| "learning_rate": 0.00017722349852313752, | |
| "loss": 4.0197, | |
| "step": 464000 | |
| }, | |
| { | |
| "epoch": 2.474482729229261, | |
| "grad_norm": 0.9663012623786926, | |
| "learning_rate": 0.0001763334569997574, | |
| "loss": 4.0238, | |
| "step": 464500 | |
| }, | |
| { | |
| "epoch": 2.4771463274307997, | |
| "grad_norm": 0.9515085220336914, | |
| "learning_rate": 0.00017544163182602987, | |
| "loss": 4.0248, | |
| "step": 465000 | |
| }, | |
| { | |
| "epoch": 2.4798099256323383, | |
| "grad_norm": 0.969129204750061, | |
| "learning_rate": 0.00017454980665230233, | |
| "loss": 4.027, | |
| "step": 465500 | |
| }, | |
| { | |
| "epoch": 2.482473523833877, | |
| "grad_norm": 0.9723744988441467, | |
| "learning_rate": 0.00017365798147857479, | |
| "loss": 4.0223, | |
| "step": 466000 | |
| }, | |
| { | |
| "epoch": 2.485137122035415, | |
| "grad_norm": 0.9454832673072815, | |
| "learning_rate": 0.0001727679399551947, | |
| "loss": 4.0257, | |
| "step": 466500 | |
| }, | |
| { | |
| "epoch": 2.4878007202369536, | |
| "grad_norm": 0.9404035210609436, | |
| "learning_rate": 0.00017187611478146716, | |
| "loss": 4.0292, | |
| "step": 467000 | |
| }, | |
| { | |
| "epoch": 2.4904643184384923, | |
| "grad_norm": 0.9745790362358093, | |
| "learning_rate": 0.00017098428960773962, | |
| "loss": 4.027, | |
| "step": 467500 | |
| }, | |
| { | |
| "epoch": 2.493127916640031, | |
| "grad_norm": 0.952643871307373, | |
| "learning_rate": 0.00017009246443401208, | |
| "loss": 4.0259, | |
| "step": 468000 | |
| }, | |
| { | |
| "epoch": 2.495791514841569, | |
| "grad_norm": 1.0002975463867188, | |
| "learning_rate": 0.000169202422910632, | |
| "loss": 4.0286, | |
| "step": 468500 | |
| }, | |
| { | |
| "epoch": 2.4984551130431076, | |
| "grad_norm": 0.9904667139053345, | |
| "learning_rate": 0.00016831059773690443, | |
| "loss": 4.0233, | |
| "step": 469000 | |
| }, | |
| { | |
| "epoch": 2.5011187112446462, | |
| "grad_norm": 0.9523800015449524, | |
| "learning_rate": 0.00016741877256317689, | |
| "loss": 4.0205, | |
| "step": 469500 | |
| }, | |
| { | |
| "epoch": 2.5037823094461844, | |
| "grad_norm": 1.111253023147583, | |
| "learning_rate": 0.00016652694738944935, | |
| "loss": 4.0211, | |
| "step": 470000 | |
| }, | |
| { | |
| "epoch": 2.506445907647723, | |
| "grad_norm": 0.9411515593528748, | |
| "learning_rate": 0.0001656369058660693, | |
| "loss": 4.0276, | |
| "step": 470500 | |
| }, | |
| { | |
| "epoch": 2.5091095058492616, | |
| "grad_norm": 0.9541642665863037, | |
| "learning_rate": 0.00016474508069234172, | |
| "loss": 4.0248, | |
| "step": 471000 | |
| }, | |
| { | |
| "epoch": 2.5117731040508002, | |
| "grad_norm": 1.016478180885315, | |
| "learning_rate": 0.00016385325551861418, | |
| "loss": 4.0253, | |
| "step": 471500 | |
| }, | |
| { | |
| "epoch": 2.514436702252339, | |
| "grad_norm": 0.9605896472930908, | |
| "learning_rate": 0.00016296143034488664, | |
| "loss": 4.0201, | |
| "step": 472000 | |
| }, | |
| { | |
| "epoch": 2.517100300453877, | |
| "grad_norm": 0.9732680916786194, | |
| "learning_rate": 0.00016207138882150655, | |
| "loss": 4.02, | |
| "step": 472500 | |
| }, | |
| { | |
| "epoch": 2.5197638986554156, | |
| "grad_norm": 0.9240507483482361, | |
| "learning_rate": 0.000161179563647779, | |
| "loss": 4.0156, | |
| "step": 473000 | |
| }, | |
| { | |
| "epoch": 2.522427496856954, | |
| "grad_norm": 1.063936471939087, | |
| "learning_rate": 0.00016028773847405145, | |
| "loss": 4.0252, | |
| "step": 473500 | |
| }, | |
| { | |
| "epoch": 2.5250910950584924, | |
| "grad_norm": 0.9789932370185852, | |
| "learning_rate": 0.0001593959133003239, | |
| "loss": 4.0243, | |
| "step": 474000 | |
| }, | |
| { | |
| "epoch": 2.527754693260031, | |
| "grad_norm": 0.9427129030227661, | |
| "learning_rate": 0.00015850587177694385, | |
| "loss": 4.0193, | |
| "step": 474500 | |
| }, | |
| { | |
| "epoch": 2.5304182914615696, | |
| "grad_norm": 1.0714107751846313, | |
| "learning_rate": 0.00015761404660321628, | |
| "loss": 4.0165, | |
| "step": 475000 | |
| }, | |
| { | |
| "epoch": 2.533081889663108, | |
| "grad_norm": 0.9931527376174927, | |
| "learning_rate": 0.00015672222142948874, | |
| "loss": 4.0236, | |
| "step": 475500 | |
| }, | |
| { | |
| "epoch": 2.535745487864647, | |
| "grad_norm": 0.9835180640220642, | |
| "learning_rate": 0.0001558303962557612, | |
| "loss": 4.0227, | |
| "step": 476000 | |
| }, | |
| { | |
| "epoch": 2.538409086066185, | |
| "grad_norm": 1.021427869796753, | |
| "learning_rate": 0.00015493857108203366, | |
| "loss": 4.0233, | |
| "step": 476500 | |
| }, | |
| { | |
| "epoch": 2.5410726842677236, | |
| "grad_norm": 1.2135415077209473, | |
| "learning_rate": 0.00015404852955865357, | |
| "loss": 4.0206, | |
| "step": 477000 | |
| }, | |
| { | |
| "epoch": 2.543736282469262, | |
| "grad_norm": 1.0140650272369385, | |
| "learning_rate": 0.000153156704384926, | |
| "loss": 4.0232, | |
| "step": 477500 | |
| }, | |
| { | |
| "epoch": 2.5463998806708004, | |
| "grad_norm": 1.0078463554382324, | |
| "learning_rate": 0.00015226487921119847, | |
| "loss": 4.0182, | |
| "step": 478000 | |
| }, | |
| { | |
| "epoch": 2.549063478872339, | |
| "grad_norm": 1.0854226350784302, | |
| "learning_rate": 0.00015137305403747092, | |
| "loss": 4.019, | |
| "step": 478500 | |
| }, | |
| { | |
| "epoch": 2.5517270770738776, | |
| "grad_norm": 0.9886216521263123, | |
| "learning_rate": 0.00015048301251409084, | |
| "loss": 4.0224, | |
| "step": 479000 | |
| }, | |
| { | |
| "epoch": 2.554390675275416, | |
| "grad_norm": 1.0139665603637695, | |
| "learning_rate": 0.0001495911873403633, | |
| "loss": 4.0129, | |
| "step": 479500 | |
| }, | |
| { | |
| "epoch": 2.557054273476955, | |
| "grad_norm": 0.9683591723442078, | |
| "learning_rate": 0.00014869936216663576, | |
| "loss": 4.017, | |
| "step": 480000 | |
| }, | |
| { | |
| "epoch": 2.559717871678493, | |
| "grad_norm": 1.039494276046753, | |
| "learning_rate": 0.00014780753699290822, | |
| "loss": 4.0145, | |
| "step": 480500 | |
| }, | |
| { | |
| "epoch": 2.5623814698800316, | |
| "grad_norm": 1.0008569955825806, | |
| "learning_rate": 0.00014691749546952813, | |
| "loss": 4.0191, | |
| "step": 481000 | |
| }, | |
| { | |
| "epoch": 2.56504506808157, | |
| "grad_norm": 0.9593690037727356, | |
| "learning_rate": 0.00014602567029580057, | |
| "loss": 4.0247, | |
| "step": 481500 | |
| }, | |
| { | |
| "epoch": 2.5677086662831083, | |
| "grad_norm": 0.9470319747924805, | |
| "learning_rate": 0.00014513384512207303, | |
| "loss": 4.0227, | |
| "step": 482000 | |
| }, | |
| { | |
| "epoch": 2.570372264484647, | |
| "grad_norm": 1.0550135374069214, | |
| "learning_rate": 0.00014424201994834549, | |
| "loss": 4.0201, | |
| "step": 482500 | |
| }, | |
| { | |
| "epoch": 2.5730358626861856, | |
| "grad_norm": 1.0270289182662964, | |
| "learning_rate": 0.0001433519784249654, | |
| "loss": 4.0155, | |
| "step": 483000 | |
| }, | |
| { | |
| "epoch": 2.575699460887724, | |
| "grad_norm": 1.0669533014297485, | |
| "learning_rate": 0.00014246015325123783, | |
| "loss": 4.0256, | |
| "step": 483500 | |
| }, | |
| { | |
| "epoch": 2.5783630590892628, | |
| "grad_norm": 0.9935122132301331, | |
| "learning_rate": 0.0001415683280775103, | |
| "loss": 4.0131, | |
| "step": 484000 | |
| }, | |
| { | |
| "epoch": 2.581026657290801, | |
| "grad_norm": 1.0519307851791382, | |
| "learning_rate": 0.00014067650290378275, | |
| "loss": 4.0225, | |
| "step": 484500 | |
| }, | |
| { | |
| "epoch": 2.5836902554923395, | |
| "grad_norm": 0.9848348498344421, | |
| "learning_rate": 0.0001397864613804027, | |
| "loss": 4.0173, | |
| "step": 485000 | |
| }, | |
| { | |
| "epoch": 2.586353853693878, | |
| "grad_norm": 0.9730287194252014, | |
| "learning_rate": 0.00013889463620667515, | |
| "loss": 4.0184, | |
| "step": 485500 | |
| }, | |
| { | |
| "epoch": 2.5890174518954163, | |
| "grad_norm": 1.023484706878662, | |
| "learning_rate": 0.00013800281103294759, | |
| "loss": 4.0183, | |
| "step": 486000 | |
| }, | |
| { | |
| "epoch": 2.591681050096955, | |
| "grad_norm": 0.9631215929985046, | |
| "learning_rate": 0.00013711098585922005, | |
| "loss": 4.0186, | |
| "step": 486500 | |
| }, | |
| { | |
| "epoch": 2.5943446482984935, | |
| "grad_norm": 0.9774326682090759, | |
| "learning_rate": 0.00013622094433583996, | |
| "loss": 4.0212, | |
| "step": 487000 | |
| }, | |
| { | |
| "epoch": 2.5970082465000317, | |
| "grad_norm": 1.052068829536438, | |
| "learning_rate": 0.00013532911916211242, | |
| "loss": 4.0183, | |
| "step": 487500 | |
| }, | |
| { | |
| "epoch": 2.5996718447015703, | |
| "grad_norm": 0.9873191714286804, | |
| "learning_rate": 0.00013443729398838485, | |
| "loss": 4.0241, | |
| "step": 488000 | |
| }, | |
| { | |
| "epoch": 2.602335442903109, | |
| "grad_norm": 1.1005477905273438, | |
| "learning_rate": 0.0001335454688146573, | |
| "loss": 4.017, | |
| "step": 488500 | |
| }, | |
| { | |
| "epoch": 2.6049990411046475, | |
| "grad_norm": 0.9617475271224976, | |
| "learning_rate": 0.00013265542729127725, | |
| "loss": 4.0207, | |
| "step": 489000 | |
| }, | |
| { | |
| "epoch": 2.607662639306186, | |
| "grad_norm": 0.9862669706344604, | |
| "learning_rate": 0.0001317636021175497, | |
| "loss": 4.0168, | |
| "step": 489500 | |
| }, | |
| { | |
| "epoch": 2.6103262375077243, | |
| "grad_norm": 0.9720093011856079, | |
| "learning_rate": 0.00013087177694382215, | |
| "loss": 4.0058, | |
| "step": 490000 | |
| }, | |
| { | |
| "epoch": 2.612989835709263, | |
| "grad_norm": 0.9520342350006104, | |
| "learning_rate": 0.0001299799517700946, | |
| "loss": 4.0146, | |
| "step": 490500 | |
| }, | |
| { | |
| "epoch": 2.6156534339108015, | |
| "grad_norm": 1.054432988166809, | |
| "learning_rate": 0.00012908991024671452, | |
| "loss": 4.0105, | |
| "step": 491000 | |
| }, | |
| { | |
| "epoch": 2.6183170321123397, | |
| "grad_norm": 0.9796612858772278, | |
| "learning_rate": 0.00012819808507298698, | |
| "loss": 4.0114, | |
| "step": 491500 | |
| }, | |
| { | |
| "epoch": 2.6209806303138783, | |
| "grad_norm": 1.0970081090927124, | |
| "learning_rate": 0.0001273062598992594, | |
| "loss": 4.0232, | |
| "step": 492000 | |
| }, | |
| { | |
| "epoch": 2.623644228515417, | |
| "grad_norm": 0.9749308228492737, | |
| "learning_rate": 0.00012641443472553187, | |
| "loss": 4.009, | |
| "step": 492500 | |
| }, | |
| { | |
| "epoch": 2.6263078267169555, | |
| "grad_norm": 1.0011272430419922, | |
| "learning_rate": 0.00012552439320215181, | |
| "loss": 4.0182, | |
| "step": 493000 | |
| }, | |
| { | |
| "epoch": 2.628971424918494, | |
| "grad_norm": 0.9727855920791626, | |
| "learning_rate": 0.00012463256802842425, | |
| "loss": 4.0142, | |
| "step": 493500 | |
| }, | |
| { | |
| "epoch": 2.6316350231200323, | |
| "grad_norm": 1.054745078086853, | |
| "learning_rate": 0.0001237407428546967, | |
| "loss": 4.0153, | |
| "step": 494000 | |
| }, | |
| { | |
| "epoch": 2.634298621321571, | |
| "grad_norm": 0.9852134585380554, | |
| "learning_rate": 0.00012284891768096917, | |
| "loss": 4.0202, | |
| "step": 494500 | |
| }, | |
| { | |
| "epoch": 2.6369622195231095, | |
| "grad_norm": 1.0056986808776855, | |
| "learning_rate": 0.00012195887615758908, | |
| "loss": 4.0187, | |
| "step": 495000 | |
| }, | |
| { | |
| "epoch": 2.6396258177246477, | |
| "grad_norm": 0.9925665259361267, | |
| "learning_rate": 0.00012106705098386153, | |
| "loss": 4.0102, | |
| "step": 495500 | |
| }, | |
| { | |
| "epoch": 2.6422894159261863, | |
| "grad_norm": 0.9884349703788757, | |
| "learning_rate": 0.00012017522581013399, | |
| "loss": 4.0161, | |
| "step": 496000 | |
| }, | |
| { | |
| "epoch": 2.644953014127725, | |
| "grad_norm": 0.9753773808479309, | |
| "learning_rate": 0.00011928340063640645, | |
| "loss": 4.0122, | |
| "step": 496500 | |
| }, | |
| { | |
| "epoch": 2.6476166123292635, | |
| "grad_norm": 1.0602976083755493, | |
| "learning_rate": 0.00011839157546267889, | |
| "loss": 4.0148, | |
| "step": 497000 | |
| }, | |
| { | |
| "epoch": 2.650280210530802, | |
| "grad_norm": 1.024678349494934, | |
| "learning_rate": 0.00011750153393929882, | |
| "loss": 4.0148, | |
| "step": 497500 | |
| }, | |
| { | |
| "epoch": 2.6529438087323403, | |
| "grad_norm": 1.0422247648239136, | |
| "learning_rate": 0.00011660970876557127, | |
| "loss": 4.0139, | |
| "step": 498000 | |
| }, | |
| { | |
| "epoch": 2.655607406933879, | |
| "grad_norm": 0.9945011734962463, | |
| "learning_rate": 0.00011571788359184373, | |
| "loss": 4.0098, | |
| "step": 498500 | |
| }, | |
| { | |
| "epoch": 2.6582710051354175, | |
| "grad_norm": 0.9866018891334534, | |
| "learning_rate": 0.00011482605841811617, | |
| "loss": 4.0151, | |
| "step": 499000 | |
| }, | |
| { | |
| "epoch": 2.6609346033369556, | |
| "grad_norm": 1.071170449256897, | |
| "learning_rate": 0.0001139360168947361, | |
| "loss": 4.016, | |
| "step": 499500 | |
| }, | |
| { | |
| "epoch": 2.6635982015384942, | |
| "grad_norm": 1.120274543762207, | |
| "learning_rate": 0.00011304419172100855, | |
| "loss": 4.0115, | |
| "step": 500000 | |
| }, | |
| { | |
| "epoch": 2.666261799740033, | |
| "grad_norm": 1.0567705631256104, | |
| "learning_rate": 0.000112152366547281, | |
| "loss": 4.012, | |
| "step": 500500 | |
| }, | |
| { | |
| "epoch": 2.6689253979415715, | |
| "grad_norm": 0.9878965020179749, | |
| "learning_rate": 0.00011126054137355346, | |
| "loss": 4.0176, | |
| "step": 501000 | |
| }, | |
| { | |
| "epoch": 2.67158899614311, | |
| "grad_norm": 1.064886212348938, | |
| "learning_rate": 0.00011037049985017338, | |
| "loss": 4.0103, | |
| "step": 501500 | |
| }, | |
| { | |
| "epoch": 2.6742525943446482, | |
| "grad_norm": 1.0028510093688965, | |
| "learning_rate": 0.00010947867467644583, | |
| "loss": 4.0122, | |
| "step": 502000 | |
| }, | |
| { | |
| "epoch": 2.676916192546187, | |
| "grad_norm": 1.0561763048171997, | |
| "learning_rate": 0.00010858684950271829, | |
| "loss": 4.0078, | |
| "step": 502500 | |
| }, | |
| { | |
| "epoch": 2.6795797907477255, | |
| "grad_norm": 0.9861183166503906, | |
| "learning_rate": 0.00010769502432899074, | |
| "loss": 4.0162, | |
| "step": 503000 | |
| }, | |
| { | |
| "epoch": 2.6822433889492636, | |
| "grad_norm": 1.0413438081741333, | |
| "learning_rate": 0.00010680498280561066, | |
| "loss": 4.0205, | |
| "step": 503500 | |
| }, | |
| { | |
| "epoch": 2.6849069871508022, | |
| "grad_norm": 0.9923077821731567, | |
| "learning_rate": 0.0001059131576318831, | |
| "loss": 4.0078, | |
| "step": 504000 | |
| }, | |
| { | |
| "epoch": 2.687570585352341, | |
| "grad_norm": 0.9952608346939087, | |
| "learning_rate": 0.00010502133245815557, | |
| "loss": 4.0078, | |
| "step": 504500 | |
| }, | |
| { | |
| "epoch": 2.690234183553879, | |
| "grad_norm": 1.0345313549041748, | |
| "learning_rate": 0.00010412950728442802, | |
| "loss": 4.0118, | |
| "step": 505000 | |
| }, | |
| { | |
| "epoch": 2.6928977817554176, | |
| "grad_norm": 0.9837112426757812, | |
| "learning_rate": 0.00010323946576104794, | |
| "loss": 4.0108, | |
| "step": 505500 | |
| }, | |
| { | |
| "epoch": 2.695561379956956, | |
| "grad_norm": 1.0294288396835327, | |
| "learning_rate": 0.00010234764058732039, | |
| "loss": 4.0074, | |
| "step": 506000 | |
| }, | |
| { | |
| "epoch": 2.698224978158495, | |
| "grad_norm": 1.0430691242218018, | |
| "learning_rate": 0.00010145581541359285, | |
| "loss": 4.008, | |
| "step": 506500 | |
| }, | |
| { | |
| "epoch": 2.7008885763600334, | |
| "grad_norm": 1.006121039390564, | |
| "learning_rate": 0.0001005639902398653, | |
| "loss": 4.0022, | |
| "step": 507000 | |
| }, | |
| { | |
| "epoch": 2.7035521745615716, | |
| "grad_norm": 1.0028232336044312, | |
| "learning_rate": 9.967216506613775e-05, | |
| "loss": 4.0164, | |
| "step": 507500 | |
| }, | |
| { | |
| "epoch": 2.70621577276311, | |
| "grad_norm": 0.9883862733840942, | |
| "learning_rate": 9.878212354275768e-05, | |
| "loss": 4.0104, | |
| "step": 508000 | |
| }, | |
| { | |
| "epoch": 2.708879370964649, | |
| "grad_norm": 1.087190866470337, | |
| "learning_rate": 9.789029836903013e-05, | |
| "loss": 4.0132, | |
| "step": 508500 | |
| }, | |
| { | |
| "epoch": 2.711542969166187, | |
| "grad_norm": 1.0679038763046265, | |
| "learning_rate": 9.699847319530258e-05, | |
| "loss": 4.0105, | |
| "step": 509000 | |
| }, | |
| { | |
| "epoch": 2.7142065673677256, | |
| "grad_norm": 0.9755781888961792, | |
| "learning_rate": 9.610664802157504e-05, | |
| "loss": 4.0141, | |
| "step": 509500 | |
| }, | |
| { | |
| "epoch": 2.716870165569264, | |
| "grad_norm": 1.09120512008667, | |
| "learning_rate": 9.521660649819495e-05, | |
| "loss": 4.0138, | |
| "step": 510000 | |
| }, | |
| { | |
| "epoch": 2.719533763770803, | |
| "grad_norm": 1.0885505676269531, | |
| "learning_rate": 9.43247813244674e-05, | |
| "loss": 4.0065, | |
| "step": 510500 | |
| }, | |
| { | |
| "epoch": 2.7221973619723414, | |
| "grad_norm": 0.9858110547065735, | |
| "learning_rate": 9.343295615073986e-05, | |
| "loss": 4.0082, | |
| "step": 511000 | |
| }, | |
| { | |
| "epoch": 2.7248609601738796, | |
| "grad_norm": 1.0929360389709473, | |
| "learning_rate": 9.254113097701232e-05, | |
| "loss": 4.0107, | |
| "step": 511500 | |
| }, | |
| { | |
| "epoch": 2.727524558375418, | |
| "grad_norm": 1.139798641204834, | |
| "learning_rate": 9.165108945363223e-05, | |
| "loss": 4.0113, | |
| "step": 512000 | |
| }, | |
| { | |
| "epoch": 2.730188156576957, | |
| "grad_norm": 1.009216070175171, | |
| "learning_rate": 9.075926427990467e-05, | |
| "loss": 4.0065, | |
| "step": 512500 | |
| }, | |
| { | |
| "epoch": 2.732851754778495, | |
| "grad_norm": 1.047379732131958, | |
| "learning_rate": 8.986743910617714e-05, | |
| "loss": 4.0164, | |
| "step": 513000 | |
| }, | |
| { | |
| "epoch": 2.7355153529800336, | |
| "grad_norm": 0.9918530583381653, | |
| "learning_rate": 8.89756139324496e-05, | |
| "loss": 4.0016, | |
| "step": 513500 | |
| }, | |
| { | |
| "epoch": 2.738178951181572, | |
| "grad_norm": 1.0664864778518677, | |
| "learning_rate": 8.80855724090695e-05, | |
| "loss": 4.0112, | |
| "step": 514000 | |
| }, | |
| { | |
| "epoch": 2.740842549383111, | |
| "grad_norm": 1.0139024257659912, | |
| "learning_rate": 8.719374723534195e-05, | |
| "loss": 4.014, | |
| "step": 514500 | |
| }, | |
| { | |
| "epoch": 2.7435061475846494, | |
| "grad_norm": 1.0350786447525024, | |
| "learning_rate": 8.630192206161441e-05, | |
| "loss": 4.0062, | |
| "step": 515000 | |
| }, | |
| { | |
| "epoch": 2.7461697457861876, | |
| "grad_norm": 1.1327440738677979, | |
| "learning_rate": 8.541009688788688e-05, | |
| "loss": 4.0072, | |
| "step": 515500 | |
| }, | |
| { | |
| "epoch": 2.748833343987726, | |
| "grad_norm": 1.0807819366455078, | |
| "learning_rate": 8.452005536450679e-05, | |
| "loss": 4.0037, | |
| "step": 516000 | |
| }, | |
| { | |
| "epoch": 2.7514969421892648, | |
| "grad_norm": 0.9618473649024963, | |
| "learning_rate": 8.362823019077925e-05, | |
| "loss": 4.0069, | |
| "step": 516500 | |
| }, | |
| { | |
| "epoch": 2.754160540390803, | |
| "grad_norm": 1.0459738969802856, | |
| "learning_rate": 8.273640501705169e-05, | |
| "loss": 4.0066, | |
| "step": 517000 | |
| }, | |
| { | |
| "epoch": 2.7568241385923415, | |
| "grad_norm": 0.9917722940444946, | |
| "learning_rate": 8.184457984332415e-05, | |
| "loss": 3.9992, | |
| "step": 517500 | |
| }, | |
| { | |
| "epoch": 2.75948773679388, | |
| "grad_norm": 1.0388100147247314, | |
| "learning_rate": 8.095453831994407e-05, | |
| "loss": 4.0052, | |
| "step": 518000 | |
| }, | |
| { | |
| "epoch": 2.7621513349954188, | |
| "grad_norm": 1.041391372680664, | |
| "learning_rate": 8.006271314621653e-05, | |
| "loss": 4.0032, | |
| "step": 518500 | |
| }, | |
| { | |
| "epoch": 2.7648149331969574, | |
| "grad_norm": 1.06915283203125, | |
| "learning_rate": 7.917088797248897e-05, | |
| "loss": 4.0031, | |
| "step": 519000 | |
| }, | |
| { | |
| "epoch": 2.7674785313984955, | |
| "grad_norm": 1.0097078084945679, | |
| "learning_rate": 7.827906279876143e-05, | |
| "loss": 4.0074, | |
| "step": 519500 | |
| }, | |
| { | |
| "epoch": 2.770142129600034, | |
| "grad_norm": 1.0231430530548096, | |
| "learning_rate": 7.738902127538135e-05, | |
| "loss": 4.0133, | |
| "step": 520000 | |
| }, | |
| { | |
| "epoch": 2.7728057278015728, | |
| "grad_norm": 1.1709152460098267, | |
| "learning_rate": 7.64971961016538e-05, | |
| "loss": 4.0105, | |
| "step": 520500 | |
| }, | |
| { | |
| "epoch": 2.775469326003111, | |
| "grad_norm": 1.0553919076919556, | |
| "learning_rate": 7.560537092792625e-05, | |
| "loss": 4.0005, | |
| "step": 521000 | |
| }, | |
| { | |
| "epoch": 2.7781329242046495, | |
| "grad_norm": 1.0332099199295044, | |
| "learning_rate": 7.471354575419871e-05, | |
| "loss": 4.0137, | |
| "step": 521500 | |
| }, | |
| { | |
| "epoch": 2.780796522406188, | |
| "grad_norm": 1.0436155796051025, | |
| "learning_rate": 7.382350423081863e-05, | |
| "loss": 4.0046, | |
| "step": 522000 | |
| }, | |
| { | |
| "epoch": 2.7834601206077263, | |
| "grad_norm": 1.0391409397125244, | |
| "learning_rate": 7.293167905709109e-05, | |
| "loss": 4.0041, | |
| "step": 522500 | |
| }, | |
| { | |
| "epoch": 2.786123718809265, | |
| "grad_norm": 1.1365002393722534, | |
| "learning_rate": 7.203985388336353e-05, | |
| "loss": 4.0052, | |
| "step": 523000 | |
| }, | |
| { | |
| "epoch": 2.7887873170108035, | |
| "grad_norm": 1.0857511758804321, | |
| "learning_rate": 7.114802870963599e-05, | |
| "loss": 4.0059, | |
| "step": 523500 | |
| }, | |
| { | |
| "epoch": 2.791450915212342, | |
| "grad_norm": 0.9912382364273071, | |
| "learning_rate": 7.02579871862559e-05, | |
| "loss": 3.9987, | |
| "step": 524000 | |
| }, | |
| { | |
| "epoch": 2.7941145134138807, | |
| "grad_norm": 1.032727599143982, | |
| "learning_rate": 6.936616201252837e-05, | |
| "loss": 4.0058, | |
| "step": 524500 | |
| }, | |
| { | |
| "epoch": 2.796778111615419, | |
| "grad_norm": 1.0187702178955078, | |
| "learning_rate": 6.847433683880082e-05, | |
| "loss": 4.0103, | |
| "step": 525000 | |
| }, | |
| { | |
| "epoch": 2.7994417098169575, | |
| "grad_norm": 0.981054425239563, | |
| "learning_rate": 6.758251166507327e-05, | |
| "loss": 4.0111, | |
| "step": 525500 | |
| }, | |
| { | |
| "epoch": 2.802105308018496, | |
| "grad_norm": 1.1054233312606812, | |
| "learning_rate": 6.669068649134573e-05, | |
| "loss": 4.0051, | |
| "step": 526000 | |
| }, | |
| { | |
| "epoch": 2.8047689062200343, | |
| "grad_norm": 1.060707449913025, | |
| "learning_rate": 6.580064496796565e-05, | |
| "loss": 4.0112, | |
| "step": 526500 | |
| }, | |
| { | |
| "epoch": 2.807432504421573, | |
| "grad_norm": 0.9906247854232788, | |
| "learning_rate": 6.49088197942381e-05, | |
| "loss": 4.0067, | |
| "step": 527000 | |
| }, | |
| { | |
| "epoch": 2.8100961026231115, | |
| "grad_norm": 1.0259308815002441, | |
| "learning_rate": 6.401699462051055e-05, | |
| "loss": 3.9976, | |
| "step": 527500 | |
| }, | |
| { | |
| "epoch": 2.81275970082465, | |
| "grad_norm": 1.0347638130187988, | |
| "learning_rate": 6.312516944678301e-05, | |
| "loss": 4.0036, | |
| "step": 528000 | |
| }, | |
| { | |
| "epoch": 2.8154232990261887, | |
| "grad_norm": 1.0310813188552856, | |
| "learning_rate": 6.223512792340293e-05, | |
| "loss": 3.9994, | |
| "step": 528500 | |
| }, | |
| { | |
| "epoch": 2.818086897227727, | |
| "grad_norm": 1.085179090499878, | |
| "learning_rate": 6.134330274967537e-05, | |
| "loss": 4.0085, | |
| "step": 529000 | |
| }, | |
| { | |
| "epoch": 2.8207504954292655, | |
| "grad_norm": 1.0044561624526978, | |
| "learning_rate": 6.045147757594784e-05, | |
| "loss": 4.0058, | |
| "step": 529500 | |
| }, | |
| { | |
| "epoch": 2.823414093630804, | |
| "grad_norm": 1.0580705404281616, | |
| "learning_rate": 5.955965240222029e-05, | |
| "loss": 3.9968, | |
| "step": 530000 | |
| }, | |
| { | |
| "epoch": 2.8260776918323423, | |
| "grad_norm": 1.1205203533172607, | |
| "learning_rate": 5.86696108788402e-05, | |
| "loss": 3.9991, | |
| "step": 530500 | |
| }, | |
| { | |
| "epoch": 2.828741290033881, | |
| "grad_norm": 1.0346322059631348, | |
| "learning_rate": 5.777778570511266e-05, | |
| "loss": 4.0044, | |
| "step": 531000 | |
| }, | |
| { | |
| "epoch": 2.8314048882354195, | |
| "grad_norm": 1.078075647354126, | |
| "learning_rate": 5.688596053138511e-05, | |
| "loss": 3.9978, | |
| "step": 531500 | |
| }, | |
| { | |
| "epoch": 2.834068486436958, | |
| "grad_norm": 1.0365418195724487, | |
| "learning_rate": 5.599413535765757e-05, | |
| "loss": 4.0039, | |
| "step": 532000 | |
| }, | |
| { | |
| "epoch": 2.8367320846384967, | |
| "grad_norm": 1.0657716989517212, | |
| "learning_rate": 5.510409383427748e-05, | |
| "loss": 4.004, | |
| "step": 532500 | |
| }, | |
| { | |
| "epoch": 2.839395682840035, | |
| "grad_norm": 1.1193735599517822, | |
| "learning_rate": 5.421226866054994e-05, | |
| "loss": 3.9981, | |
| "step": 533000 | |
| }, | |
| { | |
| "epoch": 2.8420592810415735, | |
| "grad_norm": 1.0354912281036377, | |
| "learning_rate": 5.332044348682239e-05, | |
| "loss": 4.004, | |
| "step": 533500 | |
| }, | |
| { | |
| "epoch": 2.844722879243112, | |
| "grad_norm": 1.0501588582992554, | |
| "learning_rate": 5.2428618313094844e-05, | |
| "loss": 4.0008, | |
| "step": 534000 | |
| }, | |
| { | |
| "epoch": 2.8473864774446502, | |
| "grad_norm": 1.0080904960632324, | |
| "learning_rate": 5.1538576789714766e-05, | |
| "loss": 4.002, | |
| "step": 534500 | |
| }, | |
| { | |
| "epoch": 2.850050075646189, | |
| "grad_norm": 1.0569877624511719, | |
| "learning_rate": 5.064675161598722e-05, | |
| "loss": 4.0042, | |
| "step": 535000 | |
| }, | |
| { | |
| "epoch": 2.8527136738477274, | |
| "grad_norm": 1.0170665979385376, | |
| "learning_rate": 4.975492644225967e-05, | |
| "loss": 4.0016, | |
| "step": 535500 | |
| }, | |
| { | |
| "epoch": 2.855377272049266, | |
| "grad_norm": 1.0019437074661255, | |
| "learning_rate": 4.886310126853213e-05, | |
| "loss": 3.9992, | |
| "step": 536000 | |
| }, | |
| { | |
| "epoch": 2.8580408702508047, | |
| "grad_norm": 1.059810757637024, | |
| "learning_rate": 4.797305974515204e-05, | |
| "loss": 4.0066, | |
| "step": 536500 | |
| }, | |
| { | |
| "epoch": 2.860704468452343, | |
| "grad_norm": 1.0938292741775513, | |
| "learning_rate": 4.70812345714245e-05, | |
| "loss": 4.0008, | |
| "step": 537000 | |
| }, | |
| { | |
| "epoch": 2.8633680666538814, | |
| "grad_norm": 1.0392727851867676, | |
| "learning_rate": 4.618940939769695e-05, | |
| "loss": 4.0009, | |
| "step": 537500 | |
| }, | |
| { | |
| "epoch": 2.86603166485542, | |
| "grad_norm": 1.041225790977478, | |
| "learning_rate": 4.529758422396941e-05, | |
| "loss": 4.0025, | |
| "step": 538000 | |
| }, | |
| { | |
| "epoch": 2.868695263056958, | |
| "grad_norm": 1.0904215574264526, | |
| "learning_rate": 4.440754270058932e-05, | |
| "loss": 3.9982, | |
| "step": 538500 | |
| }, | |
| { | |
| "epoch": 2.871358861258497, | |
| "grad_norm": 1.0225439071655273, | |
| "learning_rate": 4.351571752686177e-05, | |
| "loss": 3.9986, | |
| "step": 539000 | |
| }, | |
| { | |
| "epoch": 2.8740224594600354, | |
| "grad_norm": 1.0368945598602295, | |
| "learning_rate": 4.262389235313424e-05, | |
| "loss": 3.9998, | |
| "step": 539500 | |
| }, | |
| { | |
| "epoch": 2.8766860576615736, | |
| "grad_norm": 1.0657331943511963, | |
| "learning_rate": 4.173206717940669e-05, | |
| "loss": 3.996, | |
| "step": 540000 | |
| }, | |
| { | |
| "epoch": 2.879349655863112, | |
| "grad_norm": 1.0275654792785645, | |
| "learning_rate": 4.084024200567914e-05, | |
| "loss": 3.9983, | |
| "step": 540500 | |
| }, | |
| { | |
| "epoch": 2.882013254064651, | |
| "grad_norm": 1.107050895690918, | |
| "learning_rate": 3.995020048229905e-05, | |
| "loss": 4.0028, | |
| "step": 541000 | |
| }, | |
| { | |
| "epoch": 2.8846768522661894, | |
| "grad_norm": 1.001038908958435, | |
| "learning_rate": 3.905837530857151e-05, | |
| "loss": 3.9941, | |
| "step": 541500 | |
| }, | |
| { | |
| "epoch": 2.887340450467728, | |
| "grad_norm": 1.0545873641967773, | |
| "learning_rate": 3.8166550134843964e-05, | |
| "loss": 3.9987, | |
| "step": 542000 | |
| }, | |
| { | |
| "epoch": 2.890004048669266, | |
| "grad_norm": 1.0375920534133911, | |
| "learning_rate": 3.727472496111642e-05, | |
| "loss": 3.995, | |
| "step": 542500 | |
| }, | |
| { | |
| "epoch": 2.892667646870805, | |
| "grad_norm": 1.0322425365447998, | |
| "learning_rate": 3.638468343773634e-05, | |
| "loss": 3.994, | |
| "step": 543000 | |
| }, | |
| { | |
| "epoch": 2.8953312450723434, | |
| "grad_norm": 1.0789730548858643, | |
| "learning_rate": 3.549285826400879e-05, | |
| "loss": 3.9958, | |
| "step": 543500 | |
| }, | |
| { | |
| "epoch": 2.8979948432738816, | |
| "grad_norm": 1.1932363510131836, | |
| "learning_rate": 3.4601033090281244e-05, | |
| "loss": 4.005, | |
| "step": 544000 | |
| }, | |
| { | |
| "epoch": 2.90065844147542, | |
| "grad_norm": 1.1194884777069092, | |
| "learning_rate": 3.3709207916553696e-05, | |
| "loss": 3.9965, | |
| "step": 544500 | |
| }, | |
| { | |
| "epoch": 2.903322039676959, | |
| "grad_norm": 1.03001868724823, | |
| "learning_rate": 3.281916639317362e-05, | |
| "loss": 4.0013, | |
| "step": 545000 | |
| }, | |
| { | |
| "epoch": 2.9059856378784974, | |
| "grad_norm": 0.986453115940094, | |
| "learning_rate": 3.192734121944607e-05, | |
| "loss": 3.9935, | |
| "step": 545500 | |
| }, | |
| { | |
| "epoch": 2.908649236080036, | |
| "grad_norm": 1.0338671207427979, | |
| "learning_rate": 3.1035516045718524e-05, | |
| "loss": 4.0017, | |
| "step": 546000 | |
| }, | |
| { | |
| "epoch": 2.911312834281574, | |
| "grad_norm": 1.0669965744018555, | |
| "learning_rate": 3.014369087199098e-05, | |
| "loss": 3.9954, | |
| "step": 546500 | |
| }, | |
| { | |
| "epoch": 2.9139764324831128, | |
| "grad_norm": 1.024873971939087, | |
| "learning_rate": 2.9253649348610895e-05, | |
| "loss": 3.9967, | |
| "step": 547000 | |
| }, | |
| { | |
| "epoch": 2.9166400306846514, | |
| "grad_norm": 1.0891566276550293, | |
| "learning_rate": 2.8361824174883348e-05, | |
| "loss": 4.0024, | |
| "step": 547500 | |
| }, | |
| { | |
| "epoch": 2.9193036288861895, | |
| "grad_norm": 0.9691978096961975, | |
| "learning_rate": 2.7469999001155807e-05, | |
| "loss": 3.9982, | |
| "step": 548000 | |
| }, | |
| { | |
| "epoch": 2.921967227087728, | |
| "grad_norm": 1.0564926862716675, | |
| "learning_rate": 2.6578173827428263e-05, | |
| "loss": 4.0025, | |
| "step": 548500 | |
| }, | |
| { | |
| "epoch": 2.9246308252892668, | |
| "grad_norm": 0.997660756111145, | |
| "learning_rate": 2.5688132304048175e-05, | |
| "loss": 3.9959, | |
| "step": 549000 | |
| }, | |
| { | |
| "epoch": 2.9272944234908054, | |
| "grad_norm": 1.0368565320968628, | |
| "learning_rate": 2.479630713032063e-05, | |
| "loss": 3.9977, | |
| "step": 549500 | |
| }, | |
| { | |
| "epoch": 2.929958021692344, | |
| "grad_norm": 1.069231629371643, | |
| "learning_rate": 2.3904481956593084e-05, | |
| "loss": 3.9915, | |
| "step": 550000 | |
| }, | |
| { | |
| "epoch": 2.932621619893882, | |
| "grad_norm": 1.0751917362213135, | |
| "learning_rate": 2.3012656782865543e-05, | |
| "loss": 3.997, | |
| "step": 550500 | |
| }, | |
| { | |
| "epoch": 2.9352852180954208, | |
| "grad_norm": 1.0397218465805054, | |
| "learning_rate": 2.212261525948545e-05, | |
| "loss": 3.9997, | |
| "step": 551000 | |
| }, | |
| { | |
| "epoch": 2.9379488162969594, | |
| "grad_norm": 1.086714506149292, | |
| "learning_rate": 2.1230790085757908e-05, | |
| "loss": 3.9943, | |
| "step": 551500 | |
| }, | |
| { | |
| "epoch": 2.9406124144984975, | |
| "grad_norm": 1.141553521156311, | |
| "learning_rate": 2.0338964912030367e-05, | |
| "loss": 3.9987, | |
| "step": 552000 | |
| }, | |
| { | |
| "epoch": 2.943276012700036, | |
| "grad_norm": 1.005601406097412, | |
| "learning_rate": 1.944713973830282e-05, | |
| "loss": 3.9904, | |
| "step": 552500 | |
| }, | |
| { | |
| "epoch": 2.9459396109015747, | |
| "grad_norm": 1.010642647743225, | |
| "learning_rate": 1.8557098214922735e-05, | |
| "loss": 3.9881, | |
| "step": 553000 | |
| }, | |
| { | |
| "epoch": 2.9486032091031134, | |
| "grad_norm": 1.104560375213623, | |
| "learning_rate": 1.7665273041195188e-05, | |
| "loss": 3.9918, | |
| "step": 553500 | |
| }, | |
| { | |
| "epoch": 2.951266807304652, | |
| "grad_norm": 1.0412003993988037, | |
| "learning_rate": 1.6773447867467644e-05, | |
| "loss": 3.9997, | |
| "step": 554000 | |
| }, | |
| { | |
| "epoch": 2.95393040550619, | |
| "grad_norm": 1.0635658502578735, | |
| "learning_rate": 1.5881622693740103e-05, | |
| "loss": 3.994, | |
| "step": 554500 | |
| }, | |
| { | |
| "epoch": 2.9565940037077287, | |
| "grad_norm": 1.0909868478775024, | |
| "learning_rate": 1.4991581170360012e-05, | |
| "loss": 3.9942, | |
| "step": 555000 | |
| }, | |
| { | |
| "epoch": 2.9592576019092673, | |
| "grad_norm": 1.052293062210083, | |
| "learning_rate": 1.4099755996632468e-05, | |
| "loss": 3.9975, | |
| "step": 555500 | |
| }, | |
| { | |
| "epoch": 2.9619212001108055, | |
| "grad_norm": 1.068088412284851, | |
| "learning_rate": 1.3207930822904926e-05, | |
| "loss": 3.9942, | |
| "step": 556000 | |
| }, | |
| { | |
| "epoch": 2.964584798312344, | |
| "grad_norm": 1.1510958671569824, | |
| "learning_rate": 1.2316105649177382e-05, | |
| "loss": 3.9951, | |
| "step": 556500 | |
| }, | |
| { | |
| "epoch": 2.9672483965138827, | |
| "grad_norm": 1.048006534576416, | |
| "learning_rate": 1.1426064125797293e-05, | |
| "loss": 3.9971, | |
| "step": 557000 | |
| }, | |
| { | |
| "epoch": 2.9699119947154213, | |
| "grad_norm": 1.0319584608078003, | |
| "learning_rate": 1.0534238952069748e-05, | |
| "loss": 3.9934, | |
| "step": 557500 | |
| }, | |
| { | |
| "epoch": 2.9725755929169595, | |
| "grad_norm": 1.0391571521759033, | |
| "learning_rate": 9.642413778342204e-06, | |
| "loss": 3.9943, | |
| "step": 558000 | |
| }, | |
| { | |
| "epoch": 2.975239191118498, | |
| "grad_norm": 1.0609184503555298, | |
| "learning_rate": 8.75058860461466e-06, | |
| "loss": 3.9923, | |
| "step": 558500 | |
| }, | |
| { | |
| "epoch": 2.9779027893200367, | |
| "grad_norm": 1.0420206785202026, | |
| "learning_rate": 7.860547081234572e-06, | |
| "loss": 3.9939, | |
| "step": 559000 | |
| }, | |
| { | |
| "epoch": 2.9805663875215753, | |
| "grad_norm": 1.0162791013717651, | |
| "learning_rate": 6.968721907507028e-06, | |
| "loss": 3.9993, | |
| "step": 559500 | |
| }, | |
| { | |
| "epoch": 2.9832299857231135, | |
| "grad_norm": 1.1188008785247803, | |
| "learning_rate": 6.076896733779484e-06, | |
| "loss": 3.9952, | |
| "step": 560000 | |
| }, | |
| { | |
| "epoch": 2.985893583924652, | |
| "grad_norm": 1.1251684427261353, | |
| "learning_rate": 5.18507156005194e-06, | |
| "loss": 3.9936, | |
| "step": 560500 | |
| }, | |
| { | |
| "epoch": 2.9885571821261907, | |
| "grad_norm": 1.072590947151184, | |
| "learning_rate": 4.295030036671852e-06, | |
| "loss": 3.9891, | |
| "step": 561000 | |
| }, | |
| { | |
| "epoch": 2.991220780327729, | |
| "grad_norm": 1.0949697494506836, | |
| "learning_rate": 3.403204862944307e-06, | |
| "loss": 3.9909, | |
| "step": 561500 | |
| }, | |
| { | |
| "epoch": 2.9938843785292675, | |
| "grad_norm": 1.0467427968978882, | |
| "learning_rate": 2.5113796892167635e-06, | |
| "loss": 4.0004, | |
| "step": 562000 | |
| }, | |
| { | |
| "epoch": 2.996547976730806, | |
| "grad_norm": 1.0436049699783325, | |
| "learning_rate": 1.6195545154892197e-06, | |
| "loss": 3.9896, | |
| "step": 562500 | |
| }, | |
| { | |
| "epoch": 2.9992115749323447, | |
| "grad_norm": 1.1010395288467407, | |
| "learning_rate": 7.295129921091309e-07, | |
| "loss": 3.9912, | |
| "step": 563000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 563148, | |
| "total_flos": 4.819699538212516e+17, | |
| "train_loss": 4.150129232981245, | |
| "train_runtime": 39834.0737, | |
| "train_samples_per_second": 904.789, | |
| "train_steps_per_second": 14.137 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 563148, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 5000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.819699538212516e+17, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |