diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,9 +1,9 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 0.5625180957107995, + "epoch": 0.43016089672002317, "eval_steps": 500, - "global_step": 68000, + "global_step": 52000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, @@ -36407,11206 +36407,6 @@ "learning_rate": 7.188686115401628e-06, "loss": 127.5024, "step": 52000 - }, - { - "epoch": 0.4302436199693924, - "grad_norm": 1592.32666015625, - "learning_rate": 7.187417963144358e-06, - "loss": 102.1783, - "step": 52010 - }, - { - "epoch": 0.4303263432187616, - "grad_norm": 356.8098449707031, - "learning_rate": 7.18614963684428e-06, - "loss": 94.6831, - "step": 52020 - }, - { - "epoch": 0.4304090664681309, - "grad_norm": 762.9508666992188, - "learning_rate": 7.184881136602309e-06, - "loss": 117.5537, - "step": 52030 - }, - { - "epoch": 0.43049178971750013, - "grad_norm": 614.644775390625, - "learning_rate": 7.183612462519371e-06, - "loss": 120.3886, - "step": 52040 - }, - { - "epoch": 0.4305745129668693, - "grad_norm": 884.6917724609375, - "learning_rate": 7.182343614696412e-06, - "loss": 87.1297, - "step": 52050 - }, - { - "epoch": 0.4306572362162386, - "grad_norm": 1503.93603515625, - "learning_rate": 7.181074593234387e-06, - "loss": 102.104, - "step": 52060 - }, - { - "epoch": 0.43073995946560784, - "grad_norm": 431.5899963378906, - "learning_rate": 7.179805398234266e-06, - "loss": 93.1396, - "step": 52070 - }, - { - "epoch": 0.43082268271497703, - "grad_norm": 1156.7777099609375, - "learning_rate": 7.178536029797035e-06, - "loss": 113.4172, - "step": 52080 - }, - { - "epoch": 0.4309054059643463, - "grad_norm": 680.9636840820312, - "learning_rate": 7.177266488023688e-06, - "loss": 92.4326, - "step": 52090 - }, - { - "epoch": 0.4309881292137155, - "grad_norm": 1016.119873046875, - "learning_rate": 7.17599677301524e-06, - "loss": 83.3582, - "step": 52100 - }, - { - "epoch": 0.43107085246308474, - "grad_norm": 625.1591796875, - "learning_rate": 7.174726884872716e-06, - "loss": 85.9998, - "step": 52110 - }, - { - "epoch": 0.431153575712454, - "grad_norm": 1880.2071533203125, - "learning_rate": 7.173456823697154e-06, - "loss": 148.4656, - "step": 52120 - }, - { - "epoch": 0.4312362989618232, - "grad_norm": 559.7216796875, - "learning_rate": 7.172186589589607e-06, - "loss": 117.4733, - "step": 52130 - }, - { - "epoch": 0.43131902221119245, - "grad_norm": 878.204833984375, - "learning_rate": 7.170916182651141e-06, - "loss": 72.1657, - "step": 52140 - }, - { - "epoch": 0.4314017454605617, - "grad_norm": 785.7388916015625, - "learning_rate": 7.1696456029828386e-06, - "loss": 78.2875, - "step": 52150 - }, - { - "epoch": 0.4314844687099309, - "grad_norm": 960.450439453125, - "learning_rate": 7.168374850685794e-06, - "loss": 99.7961, - "step": 52160 - }, - { - "epoch": 0.43156719195930016, - "grad_norm": 1000.7466430664062, - "learning_rate": 7.167103925861113e-06, - "loss": 113.2332, - "step": 52170 - }, - { - "epoch": 0.4316499152086694, - "grad_norm": 923.0742797851562, - "learning_rate": 7.165832828609918e-06, - "loss": 108.3951, - "step": 52180 - }, - { - "epoch": 0.4317326384580386, - "grad_norm": 1476.0849609375, - "learning_rate": 7.164561559033344e-06, - "loss": 104.3691, - "step": 52190 - }, - { - "epoch": 0.43181536170740786, - "grad_norm": 656.1659545898438, - "learning_rate": 7.163290117232542e-06, - "loss": 111.1306, - "step": 52200 - }, - { - "epoch": 0.4318980849567771, - "grad_norm": 1209.8184814453125, - "learning_rate": 7.162018503308674e-06, - "loss": 105.6424, - "step": 52210 - }, - { - "epoch": 0.4319808082061463, - "grad_norm": 780.0021362304688, - "learning_rate": 7.1607467173629145e-06, - "loss": 90.2464, - "step": 52220 - }, - { - "epoch": 0.43206353145551557, - "grad_norm": 785.8810424804688, - "learning_rate": 7.1594747594964564e-06, - "loss": 123.5292, - "step": 52230 - }, - { - "epoch": 0.4321462547048848, - "grad_norm": 1079.1829833984375, - "learning_rate": 7.1582026298105e-06, - "loss": 79.3976, - "step": 52240 - }, - { - "epoch": 0.432228977954254, - "grad_norm": 1131.198974609375, - "learning_rate": 7.156930328406268e-06, - "loss": 137.1325, - "step": 52250 - }, - { - "epoch": 0.4323117012036233, - "grad_norm": 1154.282470703125, - "learning_rate": 7.1556578553849875e-06, - "loss": 78.7498, - "step": 52260 - }, - { - "epoch": 0.43239442445299253, - "grad_norm": 783.6915283203125, - "learning_rate": 7.154385210847905e-06, - "loss": 96.9286, - "step": 52270 - }, - { - "epoch": 0.43247714770236173, - "grad_norm": 1002.9415893554688, - "learning_rate": 7.153112394896279e-06, - "loss": 101.9183, - "step": 52280 - }, - { - "epoch": 0.432559870951731, - "grad_norm": 805.485595703125, - "learning_rate": 7.15183940763138e-06, - "loss": 84.4508, - "step": 52290 - }, - { - "epoch": 0.43264259420110024, - "grad_norm": 972.188720703125, - "learning_rate": 7.150566249154496e-06, - "loss": 112.0686, - "step": 52300 - }, - { - "epoch": 0.43272531745046944, - "grad_norm": 694.895263671875, - "learning_rate": 7.149292919566924e-06, - "loss": 86.5633, - "step": 52310 - }, - { - "epoch": 0.4328080406998387, - "grad_norm": 507.7707214355469, - "learning_rate": 7.148019418969979e-06, - "loss": 108.7999, - "step": 52320 - }, - { - "epoch": 0.43289076394920795, - "grad_norm": 928.7743530273438, - "learning_rate": 7.146745747464987e-06, - "loss": 133.0144, - "step": 52330 - }, - { - "epoch": 0.43297348719857714, - "grad_norm": 888.1306762695312, - "learning_rate": 7.145471905153288e-06, - "loss": 74.0938, - "step": 52340 - }, - { - "epoch": 0.4330562104479464, - "grad_norm": 602.6085815429688, - "learning_rate": 7.1441978921362365e-06, - "loss": 106.0087, - "step": 52350 - }, - { - "epoch": 0.43313893369731565, - "grad_norm": 739.8253784179688, - "learning_rate": 7.142923708515199e-06, - "loss": 130.5773, - "step": 52360 - }, - { - "epoch": 0.43322165694668485, - "grad_norm": 561.8637084960938, - "learning_rate": 7.141649354391556e-06, - "loss": 116.5679, - "step": 52370 - }, - { - "epoch": 0.4333043801960541, - "grad_norm": 635.5332641601562, - "learning_rate": 7.140374829866703e-06, - "loss": 102.7501, - "step": 52380 - }, - { - "epoch": 0.43338710344542336, - "grad_norm": 849.3636474609375, - "learning_rate": 7.1391001350420486e-06, - "loss": 112.318, - "step": 52390 - }, - { - "epoch": 0.43346982669479256, - "grad_norm": 968.818603515625, - "learning_rate": 7.137825270019012e-06, - "loss": 116.6222, - "step": 52400 - }, - { - "epoch": 0.4335525499441618, - "grad_norm": 879.7605590820312, - "learning_rate": 7.1365502348990315e-06, - "loss": 72.7146, - "step": 52410 - }, - { - "epoch": 0.43363527319353107, - "grad_norm": 974.20947265625, - "learning_rate": 7.135275029783554e-06, - "loss": 87.4071, - "step": 52420 - }, - { - "epoch": 0.43371799644290027, - "grad_norm": 898.2373046875, - "learning_rate": 7.133999654774041e-06, - "loss": 96.0373, - "step": 52430 - }, - { - "epoch": 0.4338007196922695, - "grad_norm": 1289.7435302734375, - "learning_rate": 7.13272410997197e-06, - "loss": 121.3297, - "step": 52440 - }, - { - "epoch": 0.4338834429416388, - "grad_norm": 406.2857666015625, - "learning_rate": 7.13144839547883e-06, - "loss": 72.3889, - "step": 52450 - }, - { - "epoch": 0.433966166191008, - "grad_norm": 1770.5550537109375, - "learning_rate": 7.130172511396123e-06, - "loss": 157.1078, - "step": 52460 - }, - { - "epoch": 0.43404888944037723, - "grad_norm": 924.5420532226562, - "learning_rate": 7.128896457825364e-06, - "loss": 94.4494, - "step": 52470 - }, - { - "epoch": 0.4341316126897464, - "grad_norm": 1168.6419677734375, - "learning_rate": 7.127620234868085e-06, - "loss": 151.9046, - "step": 52480 - }, - { - "epoch": 0.4342143359391157, - "grad_norm": 923.4803466796875, - "learning_rate": 7.126343842625828e-06, - "loss": 103.1586, - "step": 52490 - }, - { - "epoch": 0.43429705918848494, - "grad_norm": 849.2039794921875, - "learning_rate": 7.1250672812001505e-06, - "loss": 98.9931, - "step": 52500 - }, - { - "epoch": 0.43437978243785413, - "grad_norm": 1026.3223876953125, - "learning_rate": 7.123790550692624e-06, - "loss": 81.8112, - "step": 52510 - }, - { - "epoch": 0.4344625056872234, - "grad_norm": 1015.2179565429688, - "learning_rate": 7.1225136512048275e-06, - "loss": 98.0542, - "step": 52520 - }, - { - "epoch": 0.43454522893659264, - "grad_norm": 756.0176391601562, - "learning_rate": 7.1212365828383615e-06, - "loss": 143.049, - "step": 52530 - }, - { - "epoch": 0.43462795218596184, - "grad_norm": 919.13427734375, - "learning_rate": 7.119959345694835e-06, - "loss": 89.8264, - "step": 52540 - }, - { - "epoch": 0.4347106754353311, - "grad_norm": 693.4651489257812, - "learning_rate": 7.118681939875875e-06, - "loss": 129.897, - "step": 52550 - }, - { - "epoch": 0.43479339868470035, - "grad_norm": 1032.639404296875, - "learning_rate": 7.117404365483116e-06, - "loss": 109.7115, - "step": 52560 - }, - { - "epoch": 0.43487612193406955, - "grad_norm": 644.7308959960938, - "learning_rate": 7.116126622618207e-06, - "loss": 121.4155, - "step": 52570 - }, - { - "epoch": 0.4349588451834388, - "grad_norm": 1076.7471923828125, - "learning_rate": 7.114848711382816e-06, - "loss": 105.8533, - "step": 52580 - }, - { - "epoch": 0.43504156843280806, - "grad_norm": 682.4072265625, - "learning_rate": 7.1135706318786195e-06, - "loss": 126.4976, - "step": 52590 - }, - { - "epoch": 0.43512429168217726, - "grad_norm": 334.1803894042969, - "learning_rate": 7.112292384207306e-06, - "loss": 72.7947, - "step": 52600 - }, - { - "epoch": 0.4352070149315465, - "grad_norm": 501.4839172363281, - "learning_rate": 7.111013968470581e-06, - "loss": 88.0988, - "step": 52610 - }, - { - "epoch": 0.43528973818091576, - "grad_norm": 1030.7449951171875, - "learning_rate": 7.109735384770166e-06, - "loss": 92.1345, - "step": 52620 - }, - { - "epoch": 0.43537246143028496, - "grad_norm": 1418.4031982421875, - "learning_rate": 7.108456633207787e-06, - "loss": 137.2343, - "step": 52630 - }, - { - "epoch": 0.4354551846796542, - "grad_norm": 1056.733154296875, - "learning_rate": 7.10717771388519e-06, - "loss": 122.0539, - "step": 52640 - }, - { - "epoch": 0.43553790792902347, - "grad_norm": 567.1400756835938, - "learning_rate": 7.105898626904134e-06, - "loss": 97.4046, - "step": 52650 - }, - { - "epoch": 0.43562063117839267, - "grad_norm": 557.343017578125, - "learning_rate": 7.104619372366387e-06, - "loss": 97.5606, - "step": 52660 - }, - { - "epoch": 0.4357033544277619, - "grad_norm": 663.3614501953125, - "learning_rate": 7.103339950373737e-06, - "loss": 78.0228, - "step": 52670 - }, - { - "epoch": 0.4357860776771312, - "grad_norm": 726.0701293945312, - "learning_rate": 7.102060361027981e-06, - "loss": 122.9625, - "step": 52680 - }, - { - "epoch": 0.4358688009265004, - "grad_norm": 833.1370239257812, - "learning_rate": 7.100780604430928e-06, - "loss": 92.9005, - "step": 52690 - }, - { - "epoch": 0.43595152417586963, - "grad_norm": 1035.490478515625, - "learning_rate": 7.099500680684404e-06, - "loss": 82.178, - "step": 52700 - }, - { - "epoch": 0.4360342474252389, - "grad_norm": 721.7255859375, - "learning_rate": 7.0982205898902444e-06, - "loss": 134.4474, - "step": 52710 - }, - { - "epoch": 0.4361169706746081, - "grad_norm": 872.9884033203125, - "learning_rate": 7.096940332150305e-06, - "loss": 112.2354, - "step": 52720 - }, - { - "epoch": 0.43619969392397734, - "grad_norm": 504.2191162109375, - "learning_rate": 7.095659907566446e-06, - "loss": 69.3615, - "step": 52730 - }, - { - "epoch": 0.4362824171733466, - "grad_norm": 1016.4193725585938, - "learning_rate": 7.094379316240545e-06, - "loss": 88.9207, - "step": 52740 - }, - { - "epoch": 0.4363651404227158, - "grad_norm": 972.1260986328125, - "learning_rate": 7.093098558274494e-06, - "loss": 104.1136, - "step": 52750 - }, - { - "epoch": 0.43644786367208505, - "grad_norm": 1002.4033203125, - "learning_rate": 7.091817633770197e-06, - "loss": 94.7899, - "step": 52760 - }, - { - "epoch": 0.4365305869214543, - "grad_norm": 880.1514892578125, - "learning_rate": 7.090536542829571e-06, - "loss": 87.8467, - "step": 52770 - }, - { - "epoch": 0.4366133101708235, - "grad_norm": 620.6061401367188, - "learning_rate": 7.089255285554546e-06, - "loss": 136.4645, - "step": 52780 - }, - { - "epoch": 0.43669603342019275, - "grad_norm": 1144.162353515625, - "learning_rate": 7.087973862047067e-06, - "loss": 100.7308, - "step": 52790 - }, - { - "epoch": 0.436778756669562, - "grad_norm": 848.2889404296875, - "learning_rate": 7.08669227240909e-06, - "loss": 87.7345, - "step": 52800 - }, - { - "epoch": 0.4368614799189312, - "grad_norm": 812.3153686523438, - "learning_rate": 7.085410516742586e-06, - "loss": 101.8244, - "step": 52810 - }, - { - "epoch": 0.43694420316830046, - "grad_norm": 872.6069946289062, - "learning_rate": 7.084128595149538e-06, - "loss": 104.2199, - "step": 52820 - }, - { - "epoch": 0.43702692641766966, - "grad_norm": 897.1786499023438, - "learning_rate": 7.082846507731942e-06, - "loss": 96.152, - "step": 52830 - }, - { - "epoch": 0.4371096496670389, - "grad_norm": 631.2103271484375, - "learning_rate": 7.081564254591809e-06, - "loss": 101.3164, - "step": 52840 - }, - { - "epoch": 0.43719237291640817, - "grad_norm": 781.96044921875, - "learning_rate": 7.08028183583116e-06, - "loss": 114.7007, - "step": 52850 - }, - { - "epoch": 0.43727509616577737, - "grad_norm": 995.4803466796875, - "learning_rate": 7.078999251552034e-06, - "loss": 98.927, - "step": 52860 - }, - { - "epoch": 0.4373578194151466, - "grad_norm": 841.7239990234375, - "learning_rate": 7.077716501856478e-06, - "loss": 106.8644, - "step": 52870 - }, - { - "epoch": 0.4374405426645159, - "grad_norm": 455.7330322265625, - "learning_rate": 7.076433586846555e-06, - "loss": 119.5307, - "step": 52880 - }, - { - "epoch": 0.4375232659138851, - "grad_norm": 1019.272216796875, - "learning_rate": 7.075150506624342e-06, - "loss": 120.9018, - "step": 52890 - }, - { - "epoch": 0.4376059891632543, - "grad_norm": 783.7548828125, - "learning_rate": 7.073867261291926e-06, - "loss": 107.6927, - "step": 52900 - }, - { - "epoch": 0.4376887124126236, - "grad_norm": 483.0557556152344, - "learning_rate": 7.0725838509514115e-06, - "loss": 57.1351, - "step": 52910 - }, - { - "epoch": 0.4377714356619928, - "grad_norm": 1054.7470703125, - "learning_rate": 7.07130027570491e-06, - "loss": 99.6627, - "step": 52920 - }, - { - "epoch": 0.43785415891136203, - "grad_norm": 716.9881591796875, - "learning_rate": 7.070016535654551e-06, - "loss": 85.3958, - "step": 52930 - }, - { - "epoch": 0.4379368821607313, - "grad_norm": 1354.7027587890625, - "learning_rate": 7.068732630902479e-06, - "loss": 107.1556, - "step": 52940 - }, - { - "epoch": 0.4380196054101005, - "grad_norm": 1029.6153564453125, - "learning_rate": 7.067448561550844e-06, - "loss": 92.6526, - "step": 52950 - }, - { - "epoch": 0.43810232865946974, - "grad_norm": 1133.869873046875, - "learning_rate": 7.066164327701815e-06, - "loss": 149.094, - "step": 52960 - }, - { - "epoch": 0.438185051908839, - "grad_norm": 1119.0980224609375, - "learning_rate": 7.064879929457573e-06, - "loss": 143.9678, - "step": 52970 - }, - { - "epoch": 0.4382677751582082, - "grad_norm": 643.8549194335938, - "learning_rate": 7.063595366920314e-06, - "loss": 76.5045, - "step": 52980 - }, - { - "epoch": 0.43835049840757745, - "grad_norm": 1303.5244140625, - "learning_rate": 7.062310640192239e-06, - "loss": 111.4981, - "step": 52990 - }, - { - "epoch": 0.4384332216569467, - "grad_norm": 862.070556640625, - "learning_rate": 7.061025749375572e-06, - "loss": 100.7834, - "step": 53000 - }, - { - "epoch": 0.4385159449063159, - "grad_norm": 776.7830810546875, - "learning_rate": 7.059740694572545e-06, - "loss": 103.979, - "step": 53010 - }, - { - "epoch": 0.43859866815568516, - "grad_norm": 449.9796447753906, - "learning_rate": 7.058455475885405e-06, - "loss": 85.2324, - "step": 53020 - }, - { - "epoch": 0.4386813914050544, - "grad_norm": 1027.198486328125, - "learning_rate": 7.05717009341641e-06, - "loss": 93.9646, - "step": 53030 - }, - { - "epoch": 0.4387641146544236, - "grad_norm": 1561.4610595703125, - "learning_rate": 7.05588454726783e-06, - "loss": 98.5875, - "step": 53040 - }, - { - "epoch": 0.43884683790379286, - "grad_norm": 886.0679931640625, - "learning_rate": 7.054598837541951e-06, - "loss": 91.799, - "step": 53050 - }, - { - "epoch": 0.4389295611531621, - "grad_norm": 822.8799438476562, - "learning_rate": 7.053312964341075e-06, - "loss": 96.1325, - "step": 53060 - }, - { - "epoch": 0.4390122844025313, - "grad_norm": 1466.96337890625, - "learning_rate": 7.052026927767508e-06, - "loss": 125.8915, - "step": 53070 - }, - { - "epoch": 0.43909500765190057, - "grad_norm": 1202.5977783203125, - "learning_rate": 7.050740727923576e-06, - "loss": 100.9376, - "step": 53080 - }, - { - "epoch": 0.4391777309012698, - "grad_norm": 834.5575561523438, - "learning_rate": 7.049454364911615e-06, - "loss": 100.5369, - "step": 53090 - }, - { - "epoch": 0.439260454150639, - "grad_norm": 589.8638916015625, - "learning_rate": 7.048167838833977e-06, - "loss": 94.5479, - "step": 53100 - }, - { - "epoch": 0.4393431774000083, - "grad_norm": 722.2205200195312, - "learning_rate": 7.046881149793026e-06, - "loss": 94.6359, - "step": 53110 - }, - { - "epoch": 0.43942590064937753, - "grad_norm": 706.8112182617188, - "learning_rate": 7.045594297891133e-06, - "loss": 80.4411, - "step": 53120 - }, - { - "epoch": 0.43950862389874673, - "grad_norm": 1541.61279296875, - "learning_rate": 7.04430728323069e-06, - "loss": 126.8126, - "step": 53130 - }, - { - "epoch": 0.439591347148116, - "grad_norm": 1694.7259521484375, - "learning_rate": 7.043020105914098e-06, - "loss": 112.2094, - "step": 53140 - }, - { - "epoch": 0.43967407039748524, - "grad_norm": 1049.023193359375, - "learning_rate": 7.041732766043775e-06, - "loss": 96.9257, - "step": 53150 - }, - { - "epoch": 0.43975679364685444, - "grad_norm": 928.5380859375, - "learning_rate": 7.040445263722145e-06, - "loss": 90.1928, - "step": 53160 - }, - { - "epoch": 0.4398395168962237, - "grad_norm": 517.4678344726562, - "learning_rate": 7.039157599051648e-06, - "loss": 100.6094, - "step": 53170 - }, - { - "epoch": 0.43992224014559295, - "grad_norm": 1040.879638671875, - "learning_rate": 7.037869772134741e-06, - "loss": 93.181, - "step": 53180 - }, - { - "epoch": 0.44000496339496215, - "grad_norm": 1226.857421875, - "learning_rate": 7.036581783073888e-06, - "loss": 139.1528, - "step": 53190 - }, - { - "epoch": 0.4400876866443314, - "grad_norm": 2283.062744140625, - "learning_rate": 7.035293631971569e-06, - "loss": 121.1719, - "step": 53200 - }, - { - "epoch": 0.4401704098937006, - "grad_norm": 1155.0572509765625, - "learning_rate": 7.034005318930277e-06, - "loss": 86.4772, - "step": 53210 - }, - { - "epoch": 0.44025313314306985, - "grad_norm": 1117.125244140625, - "learning_rate": 7.032716844052517e-06, - "loss": 111.3512, - "step": 53220 - }, - { - "epoch": 0.4403358563924391, - "grad_norm": 1401.2935791015625, - "learning_rate": 7.031428207440807e-06, - "loss": 122.4468, - "step": 53230 - }, - { - "epoch": 0.4404185796418083, - "grad_norm": 719.4153442382812, - "learning_rate": 7.030139409197676e-06, - "loss": 86.415, - "step": 53240 - }, - { - "epoch": 0.44050130289117756, - "grad_norm": 802.9329833984375, - "learning_rate": 7.02885044942567e-06, - "loss": 161.6186, - "step": 53250 - }, - { - "epoch": 0.4405840261405468, - "grad_norm": 720.5238037109375, - "learning_rate": 7.027561328227345e-06, - "loss": 109.4637, - "step": 53260 - }, - { - "epoch": 0.440666749389916, - "grad_norm": 786.7742919921875, - "learning_rate": 7.02627204570527e-06, - "loss": 113.8812, - "step": 53270 - }, - { - "epoch": 0.44074947263928527, - "grad_norm": 449.480712890625, - "learning_rate": 7.024982601962027e-06, - "loss": 94.8391, - "step": 53280 - }, - { - "epoch": 0.4408321958886545, - "grad_norm": 934.8798217773438, - "learning_rate": 7.023692997100213e-06, - "loss": 79.2688, - "step": 53290 - }, - { - "epoch": 0.4409149191380237, - "grad_norm": 1120.0533447265625, - "learning_rate": 7.0224032312224345e-06, - "loss": 91.2629, - "step": 53300 - }, - { - "epoch": 0.440997642387393, - "grad_norm": 1041.491943359375, - "learning_rate": 7.021113304431313e-06, - "loss": 65.5652, - "step": 53310 - }, - { - "epoch": 0.44108036563676223, - "grad_norm": 1426.9508056640625, - "learning_rate": 7.01982321682948e-06, - "loss": 106.7264, - "step": 53320 - }, - { - "epoch": 0.4411630888861314, - "grad_norm": 1245.83154296875, - "learning_rate": 7.018532968519584e-06, - "loss": 97.5596, - "step": 53330 - }, - { - "epoch": 0.4412458121355007, - "grad_norm": 667.5186157226562, - "learning_rate": 7.0172425596042846e-06, - "loss": 96.0253, - "step": 53340 - }, - { - "epoch": 0.44132853538486994, - "grad_norm": 558.8970947265625, - "learning_rate": 7.0159519901862515e-06, - "loss": 108.9551, - "step": 53350 - }, - { - "epoch": 0.44141125863423913, - "grad_norm": 636.3999633789062, - "learning_rate": 7.014661260368171e-06, - "loss": 84.9733, - "step": 53360 - }, - { - "epoch": 0.4414939818836084, - "grad_norm": 927.197265625, - "learning_rate": 7.01337037025274e-06, - "loss": 111.2972, - "step": 53370 - }, - { - "epoch": 0.44157670513297764, - "grad_norm": 786.5078125, - "learning_rate": 7.012079319942668e-06, - "loss": 95.5135, - "step": 53380 - }, - { - "epoch": 0.44165942838234684, - "grad_norm": 807.628173828125, - "learning_rate": 7.01078810954068e-06, - "loss": 138.3115, - "step": 53390 - }, - { - "epoch": 0.4417421516317161, - "grad_norm": 657.76123046875, - "learning_rate": 7.0094967391495095e-06, - "loss": 96.7088, - "step": 53400 - }, - { - "epoch": 0.44182487488108535, - "grad_norm": 414.78240966796875, - "learning_rate": 7.008205208871906e-06, - "loss": 77.1027, - "step": 53410 - }, - { - "epoch": 0.44190759813045455, - "grad_norm": 719.4336547851562, - "learning_rate": 7.00691351881063e-06, - "loss": 95.9553, - "step": 53420 - }, - { - "epoch": 0.4419903213798238, - "grad_norm": 1148.5152587890625, - "learning_rate": 7.005621669068456e-06, - "loss": 109.5746, - "step": 53430 - }, - { - "epoch": 0.44207304462919306, - "grad_norm": 508.8988342285156, - "learning_rate": 7.004329659748172e-06, - "loss": 123.2771, - "step": 53440 - }, - { - "epoch": 0.44215576787856226, - "grad_norm": 777.6555786132812, - "learning_rate": 7.003037490952574e-06, - "loss": 101.2551, - "step": 53450 - }, - { - "epoch": 0.4422384911279315, - "grad_norm": 1049.9522705078125, - "learning_rate": 7.0017451627844765e-06, - "loss": 137.9618, - "step": 53460 - }, - { - "epoch": 0.44232121437730076, - "grad_norm": 666.4544677734375, - "learning_rate": 7.0004526753467004e-06, - "loss": 109.7146, - "step": 53470 - }, - { - "epoch": 0.44240393762666996, - "grad_norm": 1020.0592651367188, - "learning_rate": 6.999160028742089e-06, - "loss": 113.0266, - "step": 53480 - }, - { - "epoch": 0.4424866608760392, - "grad_norm": 646.3621215820312, - "learning_rate": 6.997867223073487e-06, - "loss": 148.3913, - "step": 53490 - }, - { - "epoch": 0.44256938412540847, - "grad_norm": 1154.8201904296875, - "learning_rate": 6.996574258443761e-06, - "loss": 111.0904, - "step": 53500 - }, - { - "epoch": 0.44265210737477767, - "grad_norm": 1100.033935546875, - "learning_rate": 6.995281134955784e-06, - "loss": 90.6527, - "step": 53510 - }, - { - "epoch": 0.4427348306241469, - "grad_norm": 1160.05712890625, - "learning_rate": 6.993987852712442e-06, - "loss": 113.9817, - "step": 53520 - }, - { - "epoch": 0.4428175538735162, - "grad_norm": 1008.1426391601562, - "learning_rate": 6.992694411816638e-06, - "loss": 91.588, - "step": 53530 - }, - { - "epoch": 0.4429002771228854, - "grad_norm": 892.0647583007812, - "learning_rate": 6.991400812371287e-06, - "loss": 127.6992, - "step": 53540 - }, - { - "epoch": 0.44298300037225463, - "grad_norm": 700.9825439453125, - "learning_rate": 6.990107054479313e-06, - "loss": 85.8635, - "step": 53550 - }, - { - "epoch": 0.44306572362162383, - "grad_norm": 452.05950927734375, - "learning_rate": 6.988813138243652e-06, - "loss": 109.3417, - "step": 53560 - }, - { - "epoch": 0.4431484468709931, - "grad_norm": 1181.6788330078125, - "learning_rate": 6.987519063767257e-06, - "loss": 116.6035, - "step": 53570 - }, - { - "epoch": 0.44323117012036234, - "grad_norm": 991.6622924804688, - "learning_rate": 6.986224831153092e-06, - "loss": 78.8246, - "step": 53580 - }, - { - "epoch": 0.44331389336973154, - "grad_norm": 1134.1353759765625, - "learning_rate": 6.984930440504134e-06, - "loss": 113.1138, - "step": 53590 - }, - { - "epoch": 0.4433966166191008, - "grad_norm": 708.4700317382812, - "learning_rate": 6.9836358919233695e-06, - "loss": 79.0538, - "step": 53600 - }, - { - "epoch": 0.44347933986847005, - "grad_norm": 917.70166015625, - "learning_rate": 6.982341185513799e-06, - "loss": 88.8924, - "step": 53610 - }, - { - "epoch": 0.44356206311783924, - "grad_norm": 966.7335815429688, - "learning_rate": 6.981046321378441e-06, - "loss": 142.0511, - "step": 53620 - }, - { - "epoch": 0.4436447863672085, - "grad_norm": 804.2879028320312, - "learning_rate": 6.979751299620318e-06, - "loss": 75.3757, - "step": 53630 - }, - { - "epoch": 0.44372750961657775, - "grad_norm": 1585.9019775390625, - "learning_rate": 6.978456120342469e-06, - "loss": 95.9753, - "step": 53640 - }, - { - "epoch": 0.44381023286594695, - "grad_norm": 1567.5927734375, - "learning_rate": 6.977160783647947e-06, - "loss": 126.4141, - "step": 53650 - }, - { - "epoch": 0.4438929561153162, - "grad_norm": 1687.5382080078125, - "learning_rate": 6.975865289639815e-06, - "loss": 90.8707, - "step": 53660 - }, - { - "epoch": 0.44397567936468546, - "grad_norm": 973.2371215820312, - "learning_rate": 6.974569638421151e-06, - "loss": 82.7147, - "step": 53670 - }, - { - "epoch": 0.44405840261405466, - "grad_norm": 658.38818359375, - "learning_rate": 6.973273830095042e-06, - "loss": 85.3202, - "step": 53680 - }, - { - "epoch": 0.4441411258634239, - "grad_norm": 1075.40673828125, - "learning_rate": 6.971977864764591e-06, - "loss": 99.2303, - "step": 53690 - }, - { - "epoch": 0.44422384911279317, - "grad_norm": 910.6012573242188, - "learning_rate": 6.970681742532911e-06, - "loss": 129.7319, - "step": 53700 - }, - { - "epoch": 0.44430657236216237, - "grad_norm": 1031.2911376953125, - "learning_rate": 6.969385463503129e-06, - "loss": 96.3028, - "step": 53710 - }, - { - "epoch": 0.4443892956115316, - "grad_norm": 679.598876953125, - "learning_rate": 6.968089027778384e-06, - "loss": 91.5366, - "step": 53720 - }, - { - "epoch": 0.4444720188609009, - "grad_norm": 842.2387084960938, - "learning_rate": 6.9667924354618275e-06, - "loss": 107.6039, - "step": 53730 - }, - { - "epoch": 0.4445547421102701, - "grad_norm": 645.2871704101562, - "learning_rate": 6.965495686656623e-06, - "loss": 125.044, - "step": 53740 - }, - { - "epoch": 0.44463746535963933, - "grad_norm": 614.8228149414062, - "learning_rate": 6.964198781465948e-06, - "loss": 101.3111, - "step": 53750 - }, - { - "epoch": 0.4447201886090086, - "grad_norm": 942.0760498046875, - "learning_rate": 6.962901719992989e-06, - "loss": 89.1372, - "step": 53760 - }, - { - "epoch": 0.4448029118583778, - "grad_norm": 577.6919555664062, - "learning_rate": 6.961604502340949e-06, - "loss": 59.7649, - "step": 53770 - }, - { - "epoch": 0.44488563510774704, - "grad_norm": 1402.629638671875, - "learning_rate": 6.960307128613042e-06, - "loss": 133.4121, - "step": 53780 - }, - { - "epoch": 0.4449683583571163, - "grad_norm": 1055.0478515625, - "learning_rate": 6.959009598912493e-06, - "loss": 127.3038, - "step": 53790 - }, - { - "epoch": 0.4450510816064855, - "grad_norm": 1320.6951904296875, - "learning_rate": 6.957711913342541e-06, - "loss": 86.9509, - "step": 53800 - }, - { - "epoch": 0.44513380485585474, - "grad_norm": 1073.6241455078125, - "learning_rate": 6.956414072006437e-06, - "loss": 122.6924, - "step": 53810 - }, - { - "epoch": 0.445216528105224, - "grad_norm": 870.7139282226562, - "learning_rate": 6.955116075007443e-06, - "loss": 124.2368, - "step": 53820 - }, - { - "epoch": 0.4452992513545932, - "grad_norm": 1268.8851318359375, - "learning_rate": 6.953817922448837e-06, - "loss": 89.0271, - "step": 53830 - }, - { - "epoch": 0.44538197460396245, - "grad_norm": 1143.03955078125, - "learning_rate": 6.9525196144339055e-06, - "loss": 145.819, - "step": 53840 - }, - { - "epoch": 0.4454646978533317, - "grad_norm": 1068.4166259765625, - "learning_rate": 6.951221151065948e-06, - "loss": 131.2076, - "step": 53850 - }, - { - "epoch": 0.4455474211027009, - "grad_norm": 797.7089233398438, - "learning_rate": 6.949922532448279e-06, - "loss": 98.0425, - "step": 53860 - }, - { - "epoch": 0.44563014435207016, - "grad_norm": 760.931640625, - "learning_rate": 6.948623758684223e-06, - "loss": 96.0778, - "step": 53870 - }, - { - "epoch": 0.4457128676014394, - "grad_norm": 624.017822265625, - "learning_rate": 6.9473248298771176e-06, - "loss": 89.5199, - "step": 53880 - }, - { - "epoch": 0.4457955908508086, - "grad_norm": 689.559814453125, - "learning_rate": 6.946025746130312e-06, - "loss": 123.3743, - "step": 53890 - }, - { - "epoch": 0.44587831410017786, - "grad_norm": 1045.9923095703125, - "learning_rate": 6.944726507547169e-06, - "loss": 100.6308, - "step": 53900 - }, - { - "epoch": 0.44596103734954706, - "grad_norm": 1568.38330078125, - "learning_rate": 6.943427114231064e-06, - "loss": 136.4211, - "step": 53910 - }, - { - "epoch": 0.4460437605989163, - "grad_norm": 1263.7076416015625, - "learning_rate": 6.942127566285382e-06, - "loss": 89.5075, - "step": 53920 - }, - { - "epoch": 0.44612648384828557, - "grad_norm": 1032.7841796875, - "learning_rate": 6.940827863813523e-06, - "loss": 124.588, - "step": 53930 - }, - { - "epoch": 0.44620920709765477, - "grad_norm": 1023.0800170898438, - "learning_rate": 6.9395280069188964e-06, - "loss": 127.864, - "step": 53940 - }, - { - "epoch": 0.446291930347024, - "grad_norm": 684.29931640625, - "learning_rate": 6.9382279957049295e-06, - "loss": 106.943, - "step": 53950 - }, - { - "epoch": 0.4463746535963933, - "grad_norm": 590.7765502929688, - "learning_rate": 6.936927830275055e-06, - "loss": 80.368, - "step": 53960 - }, - { - "epoch": 0.4464573768457625, - "grad_norm": 953.7996826171875, - "learning_rate": 6.935627510732724e-06, - "loss": 114.7125, - "step": 53970 - }, - { - "epoch": 0.44654010009513173, - "grad_norm": 665.14697265625, - "learning_rate": 6.934327037181394e-06, - "loss": 107.5566, - "step": 53980 - }, - { - "epoch": 0.446622823344501, - "grad_norm": 933.9505615234375, - "learning_rate": 6.933026409724538e-06, - "loss": 119.2751, - "step": 53990 - }, - { - "epoch": 0.4467055465938702, - "grad_norm": 1047.66796875, - "learning_rate": 6.931725628465643e-06, - "loss": 84.2427, - "step": 54000 - }, - { - "epoch": 0.44678826984323944, - "grad_norm": 1026.7486572265625, - "learning_rate": 6.9304246935082065e-06, - "loss": 80.6261, - "step": 54010 - }, - { - "epoch": 0.4468709930926087, - "grad_norm": 948.4655151367188, - "learning_rate": 6.929123604955735e-06, - "loss": 128.3851, - "step": 54020 - }, - { - "epoch": 0.4469537163419779, - "grad_norm": 1473.8193359375, - "learning_rate": 6.927822362911753e-06, - "loss": 83.6743, - "step": 54030 - }, - { - "epoch": 0.44703643959134715, - "grad_norm": 677.3907470703125, - "learning_rate": 6.926520967479791e-06, - "loss": 96.5376, - "step": 54040 - }, - { - "epoch": 0.4471191628407164, - "grad_norm": 759.7684326171875, - "learning_rate": 6.9252194187634e-06, - "loss": 85.2003, - "step": 54050 - }, - { - "epoch": 0.4472018860900856, - "grad_norm": 1008.9971923828125, - "learning_rate": 6.923917716866133e-06, - "loss": 108.9541, - "step": 54060 - }, - { - "epoch": 0.44728460933945485, - "grad_norm": 1046.4508056640625, - "learning_rate": 6.922615861891564e-06, - "loss": 73.9177, - "step": 54070 - }, - { - "epoch": 0.4473673325888241, - "grad_norm": 1061.5517578125, - "learning_rate": 6.921313853943275e-06, - "loss": 116.9172, - "step": 54080 - }, - { - "epoch": 0.4474500558381933, - "grad_norm": 1054.0621337890625, - "learning_rate": 6.9200116931248575e-06, - "loss": 94.5179, - "step": 54090 - }, - { - "epoch": 0.44753277908756256, - "grad_norm": 1365.836181640625, - "learning_rate": 6.918709379539924e-06, - "loss": 91.8605, - "step": 54100 - }, - { - "epoch": 0.4476155023369318, - "grad_norm": 914.6397094726562, - "learning_rate": 6.917406913292089e-06, - "loss": 95.1237, - "step": 54110 - }, - { - "epoch": 0.447698225586301, - "grad_norm": 1938.41064453125, - "learning_rate": 6.916104294484988e-06, - "loss": 133.195, - "step": 54120 - }, - { - "epoch": 0.44778094883567027, - "grad_norm": 728.0489501953125, - "learning_rate": 6.91480152322226e-06, - "loss": 105.5754, - "step": 54130 - }, - { - "epoch": 0.4478636720850395, - "grad_norm": 1188.586181640625, - "learning_rate": 6.913498599607563e-06, - "loss": 110.4302, - "step": 54140 - }, - { - "epoch": 0.4479463953344087, - "grad_norm": 1016.9649047851562, - "learning_rate": 6.9121955237445644e-06, - "loss": 75.1243, - "step": 54150 - }, - { - "epoch": 0.448029118583778, - "grad_norm": 1644.49755859375, - "learning_rate": 6.910892295736944e-06, - "loss": 87.0271, - "step": 54160 - }, - { - "epoch": 0.44811184183314723, - "grad_norm": 885.784423828125, - "learning_rate": 6.9095889156883934e-06, - "loss": 104.9015, - "step": 54170 - }, - { - "epoch": 0.4481945650825164, - "grad_norm": 575.415283203125, - "learning_rate": 6.908285383702617e-06, - "loss": 90.7336, - "step": 54180 - }, - { - "epoch": 0.4482772883318857, - "grad_norm": 876.1041870117188, - "learning_rate": 6.906981699883329e-06, - "loss": 93.0574, - "step": 54190 - }, - { - "epoch": 0.44836001158125494, - "grad_norm": 665.2525024414062, - "learning_rate": 6.90567786433426e-06, - "loss": 98.4839, - "step": 54200 - }, - { - "epoch": 0.44844273483062413, - "grad_norm": 601.7535400390625, - "learning_rate": 6.904373877159149e-06, - "loss": 88.9101, - "step": 54210 - }, - { - "epoch": 0.4485254580799934, - "grad_norm": 761.7291870117188, - "learning_rate": 6.903069738461749e-06, - "loss": 90.6817, - "step": 54220 - }, - { - "epoch": 0.44860818132936264, - "grad_norm": 510.3106994628906, - "learning_rate": 6.901765448345823e-06, - "loss": 172.6727, - "step": 54230 - }, - { - "epoch": 0.44869090457873184, - "grad_norm": 836.3589477539062, - "learning_rate": 6.900461006915149e-06, - "loss": 107.1047, - "step": 54240 - }, - { - "epoch": 0.4487736278281011, - "grad_norm": 981.2500610351562, - "learning_rate": 6.899156414273514e-06, - "loss": 102.4325, - "step": 54250 - }, - { - "epoch": 0.44885635107747035, - "grad_norm": 996.1396484375, - "learning_rate": 6.89785167052472e-06, - "loss": 76.1564, - "step": 54260 - }, - { - "epoch": 0.44893907432683955, - "grad_norm": 1051.859619140625, - "learning_rate": 6.896546775772577e-06, - "loss": 89.3364, - "step": 54270 - }, - { - "epoch": 0.4490217975762088, - "grad_norm": 743.7044677734375, - "learning_rate": 6.8952417301209114e-06, - "loss": 92.615, - "step": 54280 - }, - { - "epoch": 0.449104520825578, - "grad_norm": 1194.3951416015625, - "learning_rate": 6.893936533673561e-06, - "loss": 87.0885, - "step": 54290 - }, - { - "epoch": 0.44918724407494726, - "grad_norm": 855.311767578125, - "learning_rate": 6.892631186534371e-06, - "loss": 94.2941, - "step": 54300 - }, - { - "epoch": 0.4492699673243165, - "grad_norm": 804.5349731445312, - "learning_rate": 6.891325688807204e-06, - "loss": 119.2308, - "step": 54310 - }, - { - "epoch": 0.4493526905736857, - "grad_norm": 1499.4466552734375, - "learning_rate": 6.890020040595932e-06, - "loss": 117.2243, - "step": 54320 - }, - { - "epoch": 0.44943541382305496, - "grad_norm": 907.1102905273438, - "learning_rate": 6.88871424200444e-06, - "loss": 120.5858, - "step": 54330 - }, - { - "epoch": 0.4495181370724242, - "grad_norm": 1519.6060791015625, - "learning_rate": 6.887408293136621e-06, - "loss": 98.4492, - "step": 54340 - }, - { - "epoch": 0.4496008603217934, - "grad_norm": 802.8118896484375, - "learning_rate": 6.886102194096389e-06, - "loss": 67.4142, - "step": 54350 - }, - { - "epoch": 0.44968358357116267, - "grad_norm": 638.83935546875, - "learning_rate": 6.884795944987661e-06, - "loss": 89.7945, - "step": 54360 - }, - { - "epoch": 0.4497663068205319, - "grad_norm": 857.33984375, - "learning_rate": 6.8834895459143694e-06, - "loss": 103.017, - "step": 54370 - }, - { - "epoch": 0.4498490300699011, - "grad_norm": 1263.925537109375, - "learning_rate": 6.882182996980457e-06, - "loss": 80.3623, - "step": 54380 - }, - { - "epoch": 0.4499317533192704, - "grad_norm": 617.5503540039062, - "learning_rate": 6.880876298289885e-06, - "loss": 90.8478, - "step": 54390 - }, - { - "epoch": 0.45001447656863963, - "grad_norm": 978.7340087890625, - "learning_rate": 6.879569449946617e-06, - "loss": 85.7712, - "step": 54400 - }, - { - "epoch": 0.45009719981800883, - "grad_norm": 1923.4696044921875, - "learning_rate": 6.878262452054632e-06, - "loss": 119.6836, - "step": 54410 - }, - { - "epoch": 0.4501799230673781, - "grad_norm": 713.7830200195312, - "learning_rate": 6.876955304717925e-06, - "loss": 99.3105, - "step": 54420 - }, - { - "epoch": 0.45026264631674734, - "grad_norm": 714.7610473632812, - "learning_rate": 6.875648008040499e-06, - "loss": 67.4421, - "step": 54430 - }, - { - "epoch": 0.45034536956611654, - "grad_norm": 1194.353271484375, - "learning_rate": 6.874340562126368e-06, - "loss": 97.9703, - "step": 54440 - }, - { - "epoch": 0.4504280928154858, - "grad_norm": 747.249755859375, - "learning_rate": 6.873032967079562e-06, - "loss": 113.6194, - "step": 54450 - }, - { - "epoch": 0.45051081606485505, - "grad_norm": 1645.705322265625, - "learning_rate": 6.871725223004118e-06, - "loss": 134.7527, - "step": 54460 - }, - { - "epoch": 0.45059353931422425, - "grad_norm": 704.1566162109375, - "learning_rate": 6.870417330004086e-06, - "loss": 97.6264, - "step": 54470 - }, - { - "epoch": 0.4506762625635935, - "grad_norm": 945.1201782226562, - "learning_rate": 6.869109288183534e-06, - "loss": 82.3648, - "step": 54480 - }, - { - "epoch": 0.45075898581296275, - "grad_norm": 752.2423706054688, - "learning_rate": 6.867801097646534e-06, - "loss": 93.4124, - "step": 54490 - }, - { - "epoch": 0.45084170906233195, - "grad_norm": 1171.9190673828125, - "learning_rate": 6.866492758497171e-06, - "loss": 71.8976, - "step": 54500 - }, - { - "epoch": 0.4509244323117012, - "grad_norm": 1028.865478515625, - "learning_rate": 6.865184270839546e-06, - "loss": 79.6011, - "step": 54510 - }, - { - "epoch": 0.45100715556107046, - "grad_norm": 1303.591552734375, - "learning_rate": 6.863875634777767e-06, - "loss": 111.4603, - "step": 54520 - }, - { - "epoch": 0.45108987881043966, - "grad_norm": 702.6375122070312, - "learning_rate": 6.86256685041596e-06, - "loss": 76.6232, - "step": 54530 - }, - { - "epoch": 0.4511726020598089, - "grad_norm": 1701.81005859375, - "learning_rate": 6.861257917858257e-06, - "loss": 110.8894, - "step": 54540 - }, - { - "epoch": 0.45125532530917817, - "grad_norm": 640.2454833984375, - "learning_rate": 6.859948837208802e-06, - "loss": 83.109, - "step": 54550 - }, - { - "epoch": 0.45133804855854737, - "grad_norm": 681.0009155273438, - "learning_rate": 6.8586396085717536e-06, - "loss": 105.6306, - "step": 54560 - }, - { - "epoch": 0.4514207718079166, - "grad_norm": 2675.404052734375, - "learning_rate": 6.8573302320512836e-06, - "loss": 132.9688, - "step": 54570 - }, - { - "epoch": 0.4515034950572859, - "grad_norm": 891.1157836914062, - "learning_rate": 6.85602070775157e-06, - "loss": 103.1662, - "step": 54580 - }, - { - "epoch": 0.4515862183066551, - "grad_norm": 1364.07666015625, - "learning_rate": 6.854711035776806e-06, - "loss": 99.1324, - "step": 54590 - }, - { - "epoch": 0.45166894155602433, - "grad_norm": 524.9562377929688, - "learning_rate": 6.853401216231198e-06, - "loss": 106.766, - "step": 54600 - }, - { - "epoch": 0.4517516648053936, - "grad_norm": 818.6365966796875, - "learning_rate": 6.8520912492189605e-06, - "loss": 80.276, - "step": 54610 - }, - { - "epoch": 0.4518343880547628, - "grad_norm": 1519.1331787109375, - "learning_rate": 6.850781134844323e-06, - "loss": 69.9319, - "step": 54620 - }, - { - "epoch": 0.45191711130413204, - "grad_norm": 697.9396362304688, - "learning_rate": 6.8494708732115235e-06, - "loss": 123.4269, - "step": 54630 - }, - { - "epoch": 0.45199983455350123, - "grad_norm": 990.307373046875, - "learning_rate": 6.8481604644248155e-06, - "loss": 89.6535, - "step": 54640 - }, - { - "epoch": 0.4520825578028705, - "grad_norm": 651.0850830078125, - "learning_rate": 6.846849908588461e-06, - "loss": 80.7496, - "step": 54650 - }, - { - "epoch": 0.45216528105223974, - "grad_norm": 1296.8258056640625, - "learning_rate": 6.845539205806735e-06, - "loss": 129.8521, - "step": 54660 - }, - { - "epoch": 0.45224800430160894, - "grad_norm": 867.2517700195312, - "learning_rate": 6.844228356183924e-06, - "loss": 76.4351, - "step": 54670 - }, - { - "epoch": 0.4523307275509782, - "grad_norm": 878.8154907226562, - "learning_rate": 6.842917359824326e-06, - "loss": 103.5479, - "step": 54680 - }, - { - "epoch": 0.45241345080034745, - "grad_norm": 589.3108520507812, - "learning_rate": 6.841606216832253e-06, - "loss": 158.1967, - "step": 54690 - }, - { - "epoch": 0.45249617404971665, - "grad_norm": 440.4094543457031, - "learning_rate": 6.840294927312024e-06, - "loss": 84.1425, - "step": 54700 - }, - { - "epoch": 0.4525788972990859, - "grad_norm": 773.7758178710938, - "learning_rate": 6.838983491367974e-06, - "loss": 81.9263, - "step": 54710 - }, - { - "epoch": 0.45266162054845516, - "grad_norm": 1917.0274658203125, - "learning_rate": 6.837671909104447e-06, - "loss": 108.1925, - "step": 54720 - }, - { - "epoch": 0.45274434379782436, - "grad_norm": 528.8961791992188, - "learning_rate": 6.836360180625801e-06, - "loss": 123.8608, - "step": 54730 - }, - { - "epoch": 0.4528270670471936, - "grad_norm": 719.6814575195312, - "learning_rate": 6.835048306036404e-06, - "loss": 108.1355, - "step": 54740 - }, - { - "epoch": 0.45290979029656286, - "grad_norm": 1164.0782470703125, - "learning_rate": 6.833736285440632e-06, - "loss": 83.8529, - "step": 54750 - }, - { - "epoch": 0.45299251354593206, - "grad_norm": 540.9407958984375, - "learning_rate": 6.832424118942881e-06, - "loss": 125.107, - "step": 54760 - }, - { - "epoch": 0.4530752367953013, - "grad_norm": 736.422119140625, - "learning_rate": 6.831111806647552e-06, - "loss": 106.4315, - "step": 54770 - }, - { - "epoch": 0.45315796004467057, - "grad_norm": 697.813232421875, - "learning_rate": 6.829799348659061e-06, - "loss": 105.6159, - "step": 54780 - }, - { - "epoch": 0.45324068329403977, - "grad_norm": 925.9052734375, - "learning_rate": 6.828486745081835e-06, - "loss": 116.2571, - "step": 54790 - }, - { - "epoch": 0.453323406543409, - "grad_norm": 2547.846923828125, - "learning_rate": 6.8271739960203065e-06, - "loss": 142.8061, - "step": 54800 - }, - { - "epoch": 0.4534061297927783, - "grad_norm": 1475.7213134765625, - "learning_rate": 6.825861101578931e-06, - "loss": 81.7697, - "step": 54810 - }, - { - "epoch": 0.4534888530421475, - "grad_norm": 1138.5965576171875, - "learning_rate": 6.824548061862166e-06, - "loss": 92.7645, - "step": 54820 - }, - { - "epoch": 0.45357157629151673, - "grad_norm": 851.4307250976562, - "learning_rate": 6.823234876974489e-06, - "loss": 121.6354, - "step": 54830 - }, - { - "epoch": 0.453654299540886, - "grad_norm": 1226.28076171875, - "learning_rate": 6.8219215470203756e-06, - "loss": 102.6578, - "step": 54840 - }, - { - "epoch": 0.4537370227902552, - "grad_norm": 578.7922973632812, - "learning_rate": 6.820608072104329e-06, - "loss": 102.5517, - "step": 54850 - }, - { - "epoch": 0.45381974603962444, - "grad_norm": 673.4541625976562, - "learning_rate": 6.819294452330853e-06, - "loss": 69.6824, - "step": 54860 - }, - { - "epoch": 0.4539024692889937, - "grad_norm": 2711.473876953125, - "learning_rate": 6.817980687804467e-06, - "loss": 101.1284, - "step": 54870 - }, - { - "epoch": 0.4539851925383629, - "grad_norm": 736.4070434570312, - "learning_rate": 6.8166667786297e-06, - "loss": 96.8542, - "step": 54880 - }, - { - "epoch": 0.45406791578773215, - "grad_norm": 760.29150390625, - "learning_rate": 6.815352724911095e-06, - "loss": 105.462, - "step": 54890 - }, - { - "epoch": 0.4541506390371014, - "grad_norm": 692.9434204101562, - "learning_rate": 6.814038526753205e-06, - "loss": 91.7443, - "step": 54900 - }, - { - "epoch": 0.4542333622864706, - "grad_norm": 549.2918090820312, - "learning_rate": 6.812724184260596e-06, - "loss": 73.3805, - "step": 54910 - }, - { - "epoch": 0.45431608553583985, - "grad_norm": 702.6921997070312, - "learning_rate": 6.811409697537843e-06, - "loss": 84.0114, - "step": 54920 - }, - { - "epoch": 0.4543988087852091, - "grad_norm": 787.5999755859375, - "learning_rate": 6.810095066689533e-06, - "loss": 101.185, - "step": 54930 - }, - { - "epoch": 0.4544815320345783, - "grad_norm": 1066.020263671875, - "learning_rate": 6.808780291820264e-06, - "loss": 99.9101, - "step": 54940 - }, - { - "epoch": 0.45456425528394756, - "grad_norm": 1216.9044189453125, - "learning_rate": 6.80746537303465e-06, - "loss": 120.2617, - "step": 54950 - }, - { - "epoch": 0.4546469785333168, - "grad_norm": 1686.6910400390625, - "learning_rate": 6.806150310437312e-06, - "loss": 121.6102, - "step": 54960 - }, - { - "epoch": 0.454729701782686, - "grad_norm": 1484.89111328125, - "learning_rate": 6.804835104132883e-06, - "loss": 104.0064, - "step": 54970 - }, - { - "epoch": 0.45481242503205527, - "grad_norm": 979.0828857421875, - "learning_rate": 6.803519754226007e-06, - "loss": 94.6825, - "step": 54980 - }, - { - "epoch": 0.4548951482814245, - "grad_norm": 864.7905883789062, - "learning_rate": 6.80220426082134e-06, - "loss": 91.2416, - "step": 54990 - }, - { - "epoch": 0.4549778715307937, - "grad_norm": 888.6685791015625, - "learning_rate": 6.800888624023552e-06, - "loss": 105.9705, - "step": 55000 - }, - { - "epoch": 0.455060594780163, - "grad_norm": 497.9424133300781, - "learning_rate": 6.799572843937322e-06, - "loss": 108.1165, - "step": 55010 - }, - { - "epoch": 0.4551433180295322, - "grad_norm": 518.8756713867188, - "learning_rate": 6.79825692066734e-06, - "loss": 106.614, - "step": 55020 - }, - { - "epoch": 0.4552260412789014, - "grad_norm": 1119.955078125, - "learning_rate": 6.796940854318306e-06, - "loss": 140.4494, - "step": 55030 - }, - { - "epoch": 0.4553087645282707, - "grad_norm": 594.2408447265625, - "learning_rate": 6.795624644994936e-06, - "loss": 77.2674, - "step": 55040 - }, - { - "epoch": 0.4553914877776399, - "grad_norm": 797.1567993164062, - "learning_rate": 6.794308292801954e-06, - "loss": 111.2638, - "step": 55050 - }, - { - "epoch": 0.45547421102700913, - "grad_norm": 821.6486206054688, - "learning_rate": 6.792991797844095e-06, - "loss": 136.5393, - "step": 55060 - }, - { - "epoch": 0.4555569342763784, - "grad_norm": 597.4654541015625, - "learning_rate": 6.791675160226109e-06, - "loss": 96.4142, - "step": 55070 - }, - { - "epoch": 0.4556396575257476, - "grad_norm": 592.2135009765625, - "learning_rate": 6.790358380052752e-06, - "loss": 93.9998, - "step": 55080 - }, - { - "epoch": 0.45572238077511684, - "grad_norm": 1021.7890625, - "learning_rate": 6.789041457428796e-06, - "loss": 124.1362, - "step": 55090 - }, - { - "epoch": 0.4558051040244861, - "grad_norm": 486.76080322265625, - "learning_rate": 6.7877243924590205e-06, - "loss": 77.4204, - "step": 55100 - }, - { - "epoch": 0.4558878272738553, - "grad_norm": 875.5304565429688, - "learning_rate": 6.7864071852482205e-06, - "loss": 104.1027, - "step": 55110 - }, - { - "epoch": 0.45597055052322455, - "grad_norm": 738.2188110351562, - "learning_rate": 6.7850898359012e-06, - "loss": 106.4531, - "step": 55120 - }, - { - "epoch": 0.4560532737725938, - "grad_norm": 1422.643798828125, - "learning_rate": 6.7837723445227724e-06, - "loss": 101.3412, - "step": 55130 - }, - { - "epoch": 0.456135997021963, - "grad_norm": 1019.88818359375, - "learning_rate": 6.782454711217767e-06, - "loss": 104.1804, - "step": 55140 - }, - { - "epoch": 0.45621872027133226, - "grad_norm": 1043.4281005859375, - "learning_rate": 6.78113693609102e-06, - "loss": 159.6309, - "step": 55150 - }, - { - "epoch": 0.4563014435207015, - "grad_norm": 733.2887573242188, - "learning_rate": 6.77981901924738e-06, - "loss": 76.8028, - "step": 55160 - }, - { - "epoch": 0.4563841667700707, - "grad_norm": 650.5491943359375, - "learning_rate": 6.7785009607917095e-06, - "loss": 132.4901, - "step": 55170 - }, - { - "epoch": 0.45646689001943996, - "grad_norm": 993.3848876953125, - "learning_rate": 6.777182760828881e-06, - "loss": 127.0856, - "step": 55180 - }, - { - "epoch": 0.4565496132688092, - "grad_norm": 941.2340087890625, - "learning_rate": 6.7758644194637755e-06, - "loss": 68.8753, - "step": 55190 - }, - { - "epoch": 0.4566323365181784, - "grad_norm": 1339.5758056640625, - "learning_rate": 6.774545936801289e-06, - "loss": 93.002, - "step": 55200 - }, - { - "epoch": 0.45671505976754767, - "grad_norm": 605.7567749023438, - "learning_rate": 6.773227312946327e-06, - "loss": 123.4982, - "step": 55210 - }, - { - "epoch": 0.4567977830169169, - "grad_norm": 984.2488403320312, - "learning_rate": 6.771908548003803e-06, - "loss": 77.7853, - "step": 55220 - }, - { - "epoch": 0.4568805062662861, - "grad_norm": 829.1386108398438, - "learning_rate": 6.77058964207865e-06, - "loss": 213.839, - "step": 55230 - }, - { - "epoch": 0.4569632295156554, - "grad_norm": 764.8698120117188, - "learning_rate": 6.769270595275804e-06, - "loss": 102.3559, - "step": 55240 - }, - { - "epoch": 0.45704595276502463, - "grad_norm": 798.2879028320312, - "learning_rate": 6.767951407700217e-06, - "loss": 90.5174, - "step": 55250 - }, - { - "epoch": 0.45712867601439383, - "grad_norm": 856.211181640625, - "learning_rate": 6.766632079456852e-06, - "loss": 85.0527, - "step": 55260 - }, - { - "epoch": 0.4572113992637631, - "grad_norm": 831.584716796875, - "learning_rate": 6.765312610650677e-06, - "loss": 95.3017, - "step": 55270 - }, - { - "epoch": 0.45729412251313234, - "grad_norm": 883.2146606445312, - "learning_rate": 6.763993001386681e-06, - "loss": 113.933, - "step": 55280 - }, - { - "epoch": 0.45737684576250154, - "grad_norm": 954.8955688476562, - "learning_rate": 6.762673251769858e-06, - "loss": 121.8417, - "step": 55290 - }, - { - "epoch": 0.4574595690118708, - "grad_norm": 1161.1273193359375, - "learning_rate": 6.761353361905214e-06, - "loss": 90.7742, - "step": 55300 - }, - { - "epoch": 0.45754229226124005, - "grad_norm": 683.6365966796875, - "learning_rate": 6.7600333318977655e-06, - "loss": 91.0024, - "step": 55310 - }, - { - "epoch": 0.45762501551060925, - "grad_norm": 1601.54638671875, - "learning_rate": 6.758713161852541e-06, - "loss": 110.4416, - "step": 55320 - }, - { - "epoch": 0.4577077387599785, - "grad_norm": 934.130126953125, - "learning_rate": 6.757392851874584e-06, - "loss": 113.9463, - "step": 55330 - }, - { - "epoch": 0.45779046200934775, - "grad_norm": 1528.1434326171875, - "learning_rate": 6.756072402068943e-06, - "loss": 127.9479, - "step": 55340 - }, - { - "epoch": 0.45787318525871695, - "grad_norm": 771.1652221679688, - "learning_rate": 6.75475181254068e-06, - "loss": 89.5568, - "step": 55350 - }, - { - "epoch": 0.4579559085080862, - "grad_norm": 879.8575439453125, - "learning_rate": 6.753431083394868e-06, - "loss": 99.1166, - "step": 55360 - }, - { - "epoch": 0.4580386317574554, - "grad_norm": 684.24609375, - "learning_rate": 6.75211021473659e-06, - "loss": 86.4435, - "step": 55370 - }, - { - "epoch": 0.45812135500682466, - "grad_norm": 1416.9102783203125, - "learning_rate": 6.750789206670945e-06, - "loss": 95.3575, - "step": 55380 - }, - { - "epoch": 0.4582040782561939, - "grad_norm": 889.2215576171875, - "learning_rate": 6.749468059303039e-06, - "loss": 92.2547, - "step": 55390 - }, - { - "epoch": 0.4582868015055631, - "grad_norm": 1061.4666748046875, - "learning_rate": 6.748146772737988e-06, - "loss": 101.2263, - "step": 55400 - }, - { - "epoch": 0.45836952475493237, - "grad_norm": 1019.367919921875, - "learning_rate": 6.7468253470809205e-06, - "loss": 136.364, - "step": 55410 - }, - { - "epoch": 0.4584522480043016, - "grad_norm": 1758.954833984375, - "learning_rate": 6.745503782436976e-06, - "loss": 124.5927, - "step": 55420 - }, - { - "epoch": 0.4585349712536708, - "grad_norm": 763.4608154296875, - "learning_rate": 6.7441820789113085e-06, - "loss": 105.7726, - "step": 55430 - }, - { - "epoch": 0.4586176945030401, - "grad_norm": 946.8983764648438, - "learning_rate": 6.7428602366090764e-06, - "loss": 112.0909, - "step": 55440 - }, - { - "epoch": 0.45870041775240933, - "grad_norm": 932.01318359375, - "learning_rate": 6.741538255635454e-06, - "loss": 88.3237, - "step": 55450 - }, - { - "epoch": 0.4587831410017785, - "grad_norm": 829.2271118164062, - "learning_rate": 6.740216136095626e-06, - "loss": 77.0077, - "step": 55460 - }, - { - "epoch": 0.4588658642511478, - "grad_norm": 386.30364990234375, - "learning_rate": 6.738893878094786e-06, - "loss": 87.8386, - "step": 55470 - }, - { - "epoch": 0.45894858750051704, - "grad_norm": 1005.0819702148438, - "learning_rate": 6.737571481738141e-06, - "loss": 128.3534, - "step": 55480 - }, - { - "epoch": 0.45903131074988623, - "grad_norm": 921.326904296875, - "learning_rate": 6.736248947130907e-06, - "loss": 107.7665, - "step": 55490 - }, - { - "epoch": 0.4591140339992555, - "grad_norm": 1480.9969482421875, - "learning_rate": 6.734926274378313e-06, - "loss": 163.0793, - "step": 55500 - }, - { - "epoch": 0.45919675724862474, - "grad_norm": 955.8799438476562, - "learning_rate": 6.733603463585598e-06, - "loss": 93.535, - "step": 55510 - }, - { - "epoch": 0.45927948049799394, - "grad_norm": 1354.3538818359375, - "learning_rate": 6.73228051485801e-06, - "loss": 85.5063, - "step": 55520 - }, - { - "epoch": 0.4593622037473632, - "grad_norm": 536.1598510742188, - "learning_rate": 6.7309574283008125e-06, - "loss": 84.9367, - "step": 55530 - }, - { - "epoch": 0.45944492699673245, - "grad_norm": 663.9678344726562, - "learning_rate": 6.729634204019277e-06, - "loss": 104.2453, - "step": 55540 - }, - { - "epoch": 0.45952765024610165, - "grad_norm": 690.8705444335938, - "learning_rate": 6.7283108421186835e-06, - "loss": 108.3504, - "step": 55550 - }, - { - "epoch": 0.4596103734954709, - "grad_norm": 1198.9808349609375, - "learning_rate": 6.726987342704331e-06, - "loss": 83.5574, - "step": 55560 - }, - { - "epoch": 0.45969309674484016, - "grad_norm": 780.8455810546875, - "learning_rate": 6.72566370588152e-06, - "loss": 92.0601, - "step": 55570 - }, - { - "epoch": 0.45977581999420936, - "grad_norm": 1550.3758544921875, - "learning_rate": 6.724339931755568e-06, - "loss": 114.7621, - "step": 55580 - }, - { - "epoch": 0.4598585432435786, - "grad_norm": 515.3203735351562, - "learning_rate": 6.7230160204318e-06, - "loss": 85.6729, - "step": 55590 - }, - { - "epoch": 0.45994126649294786, - "grad_norm": 622.2483520507812, - "learning_rate": 6.721691972015557e-06, - "loss": 91.3313, - "step": 55600 - }, - { - "epoch": 0.46002398974231706, - "grad_norm": 1014.8973999023438, - "learning_rate": 6.720367786612185e-06, - "loss": 71.0175, - "step": 55610 - }, - { - "epoch": 0.4601067129916863, - "grad_norm": 2489.36962890625, - "learning_rate": 6.719043464327043e-06, - "loss": 82.0414, - "step": 55620 - }, - { - "epoch": 0.46018943624105557, - "grad_norm": 736.4323120117188, - "learning_rate": 6.717719005265502e-06, - "loss": 68.9147, - "step": 55630 - }, - { - "epoch": 0.46027215949042477, - "grad_norm": 749.0441284179688, - "learning_rate": 6.716394409532944e-06, - "loss": 117.8003, - "step": 55640 - }, - { - "epoch": 0.460354882739794, - "grad_norm": 869.51123046875, - "learning_rate": 6.715069677234758e-06, - "loss": 116.4776, - "step": 55650 - }, - { - "epoch": 0.4604376059891633, - "grad_norm": 686.819091796875, - "learning_rate": 6.713744808476349e-06, - "loss": 101.6366, - "step": 55660 - }, - { - "epoch": 0.4605203292385325, - "grad_norm": 1027.5953369140625, - "learning_rate": 6.712419803363132e-06, - "loss": 92.2464, - "step": 55670 - }, - { - "epoch": 0.46060305248790173, - "grad_norm": 743.2421875, - "learning_rate": 6.711094662000529e-06, - "loss": 104.2116, - "step": 55680 - }, - { - "epoch": 0.460685775737271, - "grad_norm": 992.1659545898438, - "learning_rate": 6.709769384493978e-06, - "loss": 102.0216, - "step": 55690 - }, - { - "epoch": 0.4607684989866402, - "grad_norm": 763.0567626953125, - "learning_rate": 6.708443970948923e-06, - "loss": 105.3009, - "step": 55700 - }, - { - "epoch": 0.46085122223600944, - "grad_norm": 1082.239501953125, - "learning_rate": 6.707118421470822e-06, - "loss": 82.143, - "step": 55710 - }, - { - "epoch": 0.4609339454853787, - "grad_norm": 943.666259765625, - "learning_rate": 6.705792736165142e-06, - "loss": 117.292, - "step": 55720 - }, - { - "epoch": 0.4610166687347479, - "grad_norm": 761.20166015625, - "learning_rate": 6.7044669151373645e-06, - "loss": 79.007, - "step": 55730 - }, - { - "epoch": 0.46109939198411715, - "grad_norm": 676.6871337890625, - "learning_rate": 6.7031409584929765e-06, - "loss": 96.3533, - "step": 55740 - }, - { - "epoch": 0.46118211523348634, - "grad_norm": 851.9503173828125, - "learning_rate": 6.701814866337477e-06, - "loss": 131.2042, - "step": 55750 - }, - { - "epoch": 0.4612648384828556, - "grad_norm": 1339.9852294921875, - "learning_rate": 6.700488638776379e-06, - "loss": 113.9575, - "step": 55760 - }, - { - "epoch": 0.46134756173222485, - "grad_norm": 945.3772583007812, - "learning_rate": 6.699162275915208e-06, - "loss": 88.1573, - "step": 55770 - }, - { - "epoch": 0.46143028498159405, - "grad_norm": 1223.3470458984375, - "learning_rate": 6.6978357778594896e-06, - "loss": 81.1195, - "step": 55780 - }, - { - "epoch": 0.4615130082309633, - "grad_norm": 922.9274291992188, - "learning_rate": 6.69650914471477e-06, - "loss": 97.8219, - "step": 55790 - }, - { - "epoch": 0.46159573148033256, - "grad_norm": 1972.91162109375, - "learning_rate": 6.695182376586603e-06, - "loss": 99.1887, - "step": 55800 - }, - { - "epoch": 0.46167845472970176, - "grad_norm": 1156.863525390625, - "learning_rate": 6.6938554735805565e-06, - "loss": 99.5834, - "step": 55810 - }, - { - "epoch": 0.461761177979071, - "grad_norm": 1650.710205078125, - "learning_rate": 6.6925284358022035e-06, - "loss": 155.1294, - "step": 55820 - }, - { - "epoch": 0.46184390122844027, - "grad_norm": 520.858642578125, - "learning_rate": 6.69120126335713e-06, - "loss": 122.3727, - "step": 55830 - }, - { - "epoch": 0.46192662447780947, - "grad_norm": 916.6767578125, - "learning_rate": 6.689873956350932e-06, - "loss": 75.5874, - "step": 55840 - }, - { - "epoch": 0.4620093477271787, - "grad_norm": 917.3632202148438, - "learning_rate": 6.688546514889221e-06, - "loss": 82.2032, - "step": 55850 - }, - { - "epoch": 0.462092070976548, - "grad_norm": 1109.2620849609375, - "learning_rate": 6.687218939077613e-06, - "loss": 93.1306, - "step": 55860 - }, - { - "epoch": 0.4621747942259172, - "grad_norm": 933.2094116210938, - "learning_rate": 6.685891229021736e-06, - "loss": 82.2505, - "step": 55870 - }, - { - "epoch": 0.46225751747528643, - "grad_norm": 930.94873046875, - "learning_rate": 6.6845633848272315e-06, - "loss": 113.1939, - "step": 55880 - }, - { - "epoch": 0.4623402407246557, - "grad_norm": 808.5699462890625, - "learning_rate": 6.68323540659975e-06, - "loss": 100.777, - "step": 55890 - }, - { - "epoch": 0.4624229639740249, - "grad_norm": 663.3551025390625, - "learning_rate": 6.681907294444952e-06, - "loss": 67.9267, - "step": 55900 - }, - { - "epoch": 0.46250568722339414, - "grad_norm": 1566.524169921875, - "learning_rate": 6.6805790484685094e-06, - "loss": 93.2308, - "step": 55910 - }, - { - "epoch": 0.4625884104727634, - "grad_norm": 1127.6248779296875, - "learning_rate": 6.679250668776105e-06, - "loss": 140.5565, - "step": 55920 - }, - { - "epoch": 0.4626711337221326, - "grad_norm": 1181.0595703125, - "learning_rate": 6.677922155473432e-06, - "loss": 99.0083, - "step": 55930 - }, - { - "epoch": 0.46275385697150184, - "grad_norm": 1022.6056518554688, - "learning_rate": 6.676593508666192e-06, - "loss": 135.9459, - "step": 55940 - }, - { - "epoch": 0.4628365802208711, - "grad_norm": 1002.8577880859375, - "learning_rate": 6.675264728460103e-06, - "loss": 85.4971, - "step": 55950 - }, - { - "epoch": 0.4629193034702403, - "grad_norm": 944.582763671875, - "learning_rate": 6.673935814960887e-06, - "loss": 107.2265, - "step": 55960 - }, - { - "epoch": 0.46300202671960955, - "grad_norm": 1288.594970703125, - "learning_rate": 6.672606768274281e-06, - "loss": 90.4464, - "step": 55970 - }, - { - "epoch": 0.4630847499689788, - "grad_norm": 14770.8916015625, - "learning_rate": 6.67127758850603e-06, - "loss": 241.7437, - "step": 55980 - }, - { - "epoch": 0.463167473218348, - "grad_norm": 957.4649047851562, - "learning_rate": 6.669948275761893e-06, - "loss": 94.3999, - "step": 55990 - }, - { - "epoch": 0.46325019646771726, - "grad_norm": 1285.9119873046875, - "learning_rate": 6.668618830147634e-06, - "loss": 125.4894, - "step": 56000 - }, - { - "epoch": 0.4633329197170865, - "grad_norm": 722.8943481445312, - "learning_rate": 6.667289251769033e-06, - "loss": 85.9025, - "step": 56010 - }, - { - "epoch": 0.4634156429664557, - "grad_norm": 1016.5885009765625, - "learning_rate": 6.6659595407318775e-06, - "loss": 107.344, - "step": 56020 - }, - { - "epoch": 0.46349836621582496, - "grad_norm": 900.4605712890625, - "learning_rate": 6.664629697141969e-06, - "loss": 111.3321, - "step": 56030 - }, - { - "epoch": 0.4635810894651942, - "grad_norm": 971.4387817382812, - "learning_rate": 6.663299721105113e-06, - "loss": 106.712, - "step": 56040 - }, - { - "epoch": 0.4636638127145634, - "grad_norm": 1102.5128173828125, - "learning_rate": 6.661969612727133e-06, - "loss": 94.0693, - "step": 56050 - }, - { - "epoch": 0.46374653596393267, - "grad_norm": 1239.50341796875, - "learning_rate": 6.660639372113858e-06, - "loss": 109.2637, - "step": 56060 - }, - { - "epoch": 0.4638292592133019, - "grad_norm": 986.812255859375, - "learning_rate": 6.65930899937113e-06, - "loss": 104.8453, - "step": 56070 - }, - { - "epoch": 0.4639119824626711, - "grad_norm": 1008.2682495117188, - "learning_rate": 6.657978494604799e-06, - "loss": 109.9477, - "step": 56080 - }, - { - "epoch": 0.4639947057120404, - "grad_norm": 683.7396850585938, - "learning_rate": 6.656647857920728e-06, - "loss": 109.3742, - "step": 56090 - }, - { - "epoch": 0.4640774289614096, - "grad_norm": 1494.6700439453125, - "learning_rate": 6.655317089424791e-06, - "loss": 106.0912, - "step": 56100 - }, - { - "epoch": 0.46416015221077883, - "grad_norm": 1352.005615234375, - "learning_rate": 6.6539861892228695e-06, - "loss": 96.1662, - "step": 56110 - }, - { - "epoch": 0.4642428754601481, - "grad_norm": 787.8734741210938, - "learning_rate": 6.652655157420859e-06, - "loss": 81.0995, - "step": 56120 - }, - { - "epoch": 0.4643255987095173, - "grad_norm": 837.31787109375, - "learning_rate": 6.651323994124661e-06, - "loss": 93.8052, - "step": 56130 - }, - { - "epoch": 0.46440832195888654, - "grad_norm": 1119.1298828125, - "learning_rate": 6.649992699440191e-06, - "loss": 86.7144, - "step": 56140 - }, - { - "epoch": 0.4644910452082558, - "grad_norm": 1033.329345703125, - "learning_rate": 6.648661273473375e-06, - "loss": 73.4606, - "step": 56150 - }, - { - "epoch": 0.464573768457625, - "grad_norm": 1087.544921875, - "learning_rate": 6.6473297163301485e-06, - "loss": 93.5026, - "step": 56160 - }, - { - "epoch": 0.46465649170699425, - "grad_norm": 637.343505859375, - "learning_rate": 6.645998028116455e-06, - "loss": 106.3616, - "step": 56170 - }, - { - "epoch": 0.4647392149563635, - "grad_norm": 1312.9207763671875, - "learning_rate": 6.6446662089382545e-06, - "loss": 126.186, - "step": 56180 - }, - { - "epoch": 0.4648219382057327, - "grad_norm": 756.4004516601562, - "learning_rate": 6.643334258901511e-06, - "loss": 110.0223, - "step": 56190 - }, - { - "epoch": 0.46490466145510195, - "grad_norm": 577.8167724609375, - "learning_rate": 6.642002178112202e-06, - "loss": 114.2335, - "step": 56200 - }, - { - "epoch": 0.4649873847044712, - "grad_norm": 419.95404052734375, - "learning_rate": 6.640669966676316e-06, - "loss": 88.2521, - "step": 56210 - }, - { - "epoch": 0.4650701079538404, - "grad_norm": 503.7681579589844, - "learning_rate": 6.6393376246998485e-06, - "loss": 105.7174, - "step": 56220 - }, - { - "epoch": 0.46515283120320966, - "grad_norm": 721.5469360351562, - "learning_rate": 6.638005152288811e-06, - "loss": 100.881, - "step": 56230 - }, - { - "epoch": 0.4652355544525789, - "grad_norm": 1773.4716796875, - "learning_rate": 6.636672549549221e-06, - "loss": 115.8908, - "step": 56240 - }, - { - "epoch": 0.4653182777019481, - "grad_norm": 960.2298583984375, - "learning_rate": 6.635339816587109e-06, - "loss": 109.554, - "step": 56250 - }, - { - "epoch": 0.46540100095131737, - "grad_norm": 1094.2969970703125, - "learning_rate": 6.634006953508512e-06, - "loss": 104.5612, - "step": 56260 - }, - { - "epoch": 0.4654837242006866, - "grad_norm": 706.4091186523438, - "learning_rate": 6.63267396041948e-06, - "loss": 113.3086, - "step": 56270 - }, - { - "epoch": 0.4655664474500558, - "grad_norm": 1204.162841796875, - "learning_rate": 6.631340837426075e-06, - "loss": 105.2585, - "step": 56280 - }, - { - "epoch": 0.4656491706994251, - "grad_norm": 807.52734375, - "learning_rate": 6.630007584634366e-06, - "loss": 78.1581, - "step": 56290 - }, - { - "epoch": 0.46573189394879433, - "grad_norm": 912.2439575195312, - "learning_rate": 6.628674202150434e-06, - "loss": 95.7974, - "step": 56300 - }, - { - "epoch": 0.4658146171981635, - "grad_norm": 793.3104858398438, - "learning_rate": 6.627340690080371e-06, - "loss": 94.2195, - "step": 56310 - }, - { - "epoch": 0.4658973404475328, - "grad_norm": 1065.8125, - "learning_rate": 6.626007048530276e-06, - "loss": 72.4793, - "step": 56320 - }, - { - "epoch": 0.46598006369690204, - "grad_norm": 565.6997680664062, - "learning_rate": 6.624673277606264e-06, - "loss": 90.7239, - "step": 56330 - }, - { - "epoch": 0.46606278694627123, - "grad_norm": 1088.072998046875, - "learning_rate": 6.623339377414456e-06, - "loss": 114.9387, - "step": 56340 - }, - { - "epoch": 0.4661455101956405, - "grad_norm": 1036.712158203125, - "learning_rate": 6.622005348060983e-06, - "loss": 98.3773, - "step": 56350 - }, - { - "epoch": 0.46622823344500974, - "grad_norm": 657.3320922851562, - "learning_rate": 6.620671189651988e-06, - "loss": 78.3256, - "step": 56360 - }, - { - "epoch": 0.46631095669437894, - "grad_norm": 396.6316223144531, - "learning_rate": 6.6193369022936245e-06, - "loss": 111.291, - "step": 56370 - }, - { - "epoch": 0.4663936799437482, - "grad_norm": 747.0289306640625, - "learning_rate": 6.618002486092056e-06, - "loss": 131.3509, - "step": 56380 - }, - { - "epoch": 0.46647640319311745, - "grad_norm": 645.677978515625, - "learning_rate": 6.616667941153456e-06, - "loss": 112.265, - "step": 56390 - }, - { - "epoch": 0.46655912644248665, - "grad_norm": 2745.71533203125, - "learning_rate": 6.615333267584007e-06, - "loss": 94.6054, - "step": 56400 - }, - { - "epoch": 0.4666418496918559, - "grad_norm": 617.6024780273438, - "learning_rate": 6.613998465489902e-06, - "loss": 86.2714, - "step": 56410 - }, - { - "epoch": 0.46672457294122516, - "grad_norm": 2982.6630859375, - "learning_rate": 6.612663534977347e-06, - "loss": 158.7063, - "step": 56420 - }, - { - "epoch": 0.46680729619059436, - "grad_norm": 372.95379638671875, - "learning_rate": 6.611328476152557e-06, - "loss": 127.7486, - "step": 56430 - }, - { - "epoch": 0.4668900194399636, - "grad_norm": 665.3734130859375, - "learning_rate": 6.609993289121753e-06, - "loss": 108.4631, - "step": 56440 - }, - { - "epoch": 0.4669727426893328, - "grad_norm": 637.3652954101562, - "learning_rate": 6.608657973991172e-06, - "loss": 84.1843, - "step": 56450 - }, - { - "epoch": 0.46705546593870206, - "grad_norm": 962.6121215820312, - "learning_rate": 6.607322530867061e-06, - "loss": 88.8814, - "step": 56460 - }, - { - "epoch": 0.4671381891880713, - "grad_norm": 588.2682495117188, - "learning_rate": 6.605986959855672e-06, - "loss": 76.9025, - "step": 56470 - }, - { - "epoch": 0.4672209124374405, - "grad_norm": 1287.7611083984375, - "learning_rate": 6.60465126106327e-06, - "loss": 120.8118, - "step": 56480 - }, - { - "epoch": 0.46730363568680977, - "grad_norm": 932.3352661132812, - "learning_rate": 6.6033154345961314e-06, - "loss": 89.3703, - "step": 56490 - }, - { - "epoch": 0.467386358936179, - "grad_norm": 577.4403076171875, - "learning_rate": 6.601979480560543e-06, - "loss": 94.8228, - "step": 56500 - }, - { - "epoch": 0.4674690821855482, - "grad_norm": 937.298583984375, - "learning_rate": 6.6006433990627985e-06, - "loss": 122.7913, - "step": 56510 - }, - { - "epoch": 0.4675518054349175, - "grad_norm": 588.9931030273438, - "learning_rate": 6.599307190209206e-06, - "loss": 112.8304, - "step": 56520 - }, - { - "epoch": 0.46763452868428673, - "grad_norm": 1339.0936279296875, - "learning_rate": 6.5979708541060796e-06, - "loss": 144.9437, - "step": 56530 - }, - { - "epoch": 0.46771725193365593, - "grad_norm": 1465.9853515625, - "learning_rate": 6.596634390859745e-06, - "loss": 91.1747, - "step": 56540 - }, - { - "epoch": 0.4677999751830252, - "grad_norm": 890.4679565429688, - "learning_rate": 6.59529780057654e-06, - "loss": 82.902, - "step": 56550 - }, - { - "epoch": 0.46788269843239444, - "grad_norm": 839.3494873046875, - "learning_rate": 6.593961083362811e-06, - "loss": 85.888, - "step": 56560 - }, - { - "epoch": 0.46796542168176364, - "grad_norm": 992.9126586914062, - "learning_rate": 6.592624239324914e-06, - "loss": 109.3493, - "step": 56570 - }, - { - "epoch": 0.4680481449311329, - "grad_norm": 1971.344482421875, - "learning_rate": 6.591287268569215e-06, - "loss": 99.3406, - "step": 56580 - }, - { - "epoch": 0.46813086818050215, - "grad_norm": 1292.9296875, - "learning_rate": 6.589950171202092e-06, - "loss": 67.5184, - "step": 56590 - }, - { - "epoch": 0.46821359142987135, - "grad_norm": 1166.9710693359375, - "learning_rate": 6.588612947329929e-06, - "loss": 121.0264, - "step": 56600 - }, - { - "epoch": 0.4682963146792406, - "grad_norm": 1211.2210693359375, - "learning_rate": 6.587275597059125e-06, - "loss": 90.2953, - "step": 56610 - }, - { - "epoch": 0.46837903792860985, - "grad_norm": 860.5660400390625, - "learning_rate": 6.585938120496087e-06, - "loss": 110.3769, - "step": 56620 - }, - { - "epoch": 0.46846176117797905, - "grad_norm": 1788.8858642578125, - "learning_rate": 6.584600517747232e-06, - "loss": 111.8886, - "step": 56630 - }, - { - "epoch": 0.4685444844273483, - "grad_norm": 826.596923828125, - "learning_rate": 6.583262788918985e-06, - "loss": 84.6638, - "step": 56640 - }, - { - "epoch": 0.46862720767671756, - "grad_norm": 794.5626220703125, - "learning_rate": 6.581924934117783e-06, - "loss": 108.4789, - "step": 56650 - }, - { - "epoch": 0.46870993092608676, - "grad_norm": 1118.61962890625, - "learning_rate": 6.580586953450076e-06, - "loss": 74.7545, - "step": 56660 - }, - { - "epoch": 0.468792654175456, - "grad_norm": 763.598388671875, - "learning_rate": 6.579248847022317e-06, - "loss": 78.2781, - "step": 56670 - }, - { - "epoch": 0.46887537742482527, - "grad_norm": 1182.76806640625, - "learning_rate": 6.577910614940978e-06, - "loss": 97.6059, - "step": 56680 - }, - { - "epoch": 0.46895810067419447, - "grad_norm": 1287.8709716796875, - "learning_rate": 6.576572257312531e-06, - "loss": 94.5327, - "step": 56690 - }, - { - "epoch": 0.4690408239235637, - "grad_norm": 625.0133666992188, - "learning_rate": 6.5752337742434644e-06, - "loss": 90.6583, - "step": 56700 - }, - { - "epoch": 0.469123547172933, - "grad_norm": 352.9053649902344, - "learning_rate": 6.573895165840276e-06, - "loss": 99.1602, - "step": 56710 - }, - { - "epoch": 0.4692062704223022, - "grad_norm": 1227.5992431640625, - "learning_rate": 6.5725564322094745e-06, - "loss": 106.9007, - "step": 56720 - }, - { - "epoch": 0.46928899367167143, - "grad_norm": 1981.240234375, - "learning_rate": 6.571217573457573e-06, - "loss": 110.456, - "step": 56730 - }, - { - "epoch": 0.4693717169210407, - "grad_norm": 915.96728515625, - "learning_rate": 6.569878589691101e-06, - "loss": 64.4055, - "step": 56740 - }, - { - "epoch": 0.4694544401704099, - "grad_norm": 1044.962890625, - "learning_rate": 6.568539481016593e-06, - "loss": 108.6498, - "step": 56750 - }, - { - "epoch": 0.46953716341977914, - "grad_norm": 1632.702880859375, - "learning_rate": 6.567200247540599e-06, - "loss": 123.9141, - "step": 56760 - }, - { - "epoch": 0.4696198866691484, - "grad_norm": 1159.7938232421875, - "learning_rate": 6.5658608893696714e-06, - "loss": 105.7761, - "step": 56770 - }, - { - "epoch": 0.4697026099185176, - "grad_norm": 1023.9876098632812, - "learning_rate": 6.564521406610382e-06, - "loss": 130.591, - "step": 56780 - }, - { - "epoch": 0.46978533316788684, - "grad_norm": 1106.73779296875, - "learning_rate": 6.563181799369301e-06, - "loss": 89.8389, - "step": 56790 - }, - { - "epoch": 0.4698680564172561, - "grad_norm": 636.095458984375, - "learning_rate": 6.561842067753021e-06, - "loss": 95.9526, - "step": 56800 - }, - { - "epoch": 0.4699507796666253, - "grad_norm": 725.4862670898438, - "learning_rate": 6.560502211868135e-06, - "loss": 71.1143, - "step": 56810 - }, - { - "epoch": 0.47003350291599455, - "grad_norm": 946.0494384765625, - "learning_rate": 6.55916223182125e-06, - "loss": 106.9812, - "step": 56820 - }, - { - "epoch": 0.47011622616536375, - "grad_norm": 1510.7655029296875, - "learning_rate": 6.55782212771898e-06, - "loss": 147.8765, - "step": 56830 - }, - { - "epoch": 0.470198949414733, - "grad_norm": 1354.79296875, - "learning_rate": 6.5564818996679536e-06, - "loss": 121.1624, - "step": 56840 - }, - { - "epoch": 0.47028167266410226, - "grad_norm": 1657.0260009765625, - "learning_rate": 6.555141547774807e-06, - "loss": 153.1369, - "step": 56850 - }, - { - "epoch": 0.47036439591347146, - "grad_norm": 432.9583435058594, - "learning_rate": 6.553801072146184e-06, - "loss": 112.8747, - "step": 56860 - }, - { - "epoch": 0.4704471191628407, - "grad_norm": 1424.265380859375, - "learning_rate": 6.55246047288874e-06, - "loss": 102.5585, - "step": 56870 - }, - { - "epoch": 0.47052984241220996, - "grad_norm": 842.8648071289062, - "learning_rate": 6.551119750109142e-06, - "loss": 95.888, - "step": 56880 - }, - { - "epoch": 0.47061256566157916, - "grad_norm": 1254.891845703125, - "learning_rate": 6.5497789039140635e-06, - "loss": 88.7369, - "step": 56890 - }, - { - "epoch": 0.4706952889109484, - "grad_norm": 593.7796020507812, - "learning_rate": 6.54843793441019e-06, - "loss": 117.424, - "step": 56900 - }, - { - "epoch": 0.47077801216031767, - "grad_norm": 5807.5322265625, - "learning_rate": 6.547096841704217e-06, - "loss": 123.5693, - "step": 56910 - }, - { - "epoch": 0.47086073540968687, - "grad_norm": 698.7401733398438, - "learning_rate": 6.545755625902848e-06, - "loss": 108.5493, - "step": 56920 - }, - { - "epoch": 0.4709434586590561, - "grad_norm": 770.432373046875, - "learning_rate": 6.544414287112798e-06, - "loss": 60.7358, - "step": 56930 - }, - { - "epoch": 0.4710261819084254, - "grad_norm": 772.1907348632812, - "learning_rate": 6.54307282544079e-06, - "loss": 90.2566, - "step": 56940 - }, - { - "epoch": 0.4711089051577946, - "grad_norm": 543.7560424804688, - "learning_rate": 6.5417312409935606e-06, - "loss": 74.9508, - "step": 56950 - }, - { - "epoch": 0.47119162840716383, - "grad_norm": 605.939697265625, - "learning_rate": 6.540389533877852e-06, - "loss": 117.9458, - "step": 56960 - }, - { - "epoch": 0.4712743516565331, - "grad_norm": 546.9862670898438, - "learning_rate": 6.539047704200417e-06, - "loss": 83.1111, - "step": 56970 - }, - { - "epoch": 0.4713570749059023, - "grad_norm": 860.575439453125, - "learning_rate": 6.53770575206802e-06, - "loss": 102.208, - "step": 56980 - }, - { - "epoch": 0.47143979815527154, - "grad_norm": 616.0123291015625, - "learning_rate": 6.536363677587433e-06, - "loss": 101.8752, - "step": 56990 - }, - { - "epoch": 0.4715225214046408, - "grad_norm": 912.4677124023438, - "learning_rate": 6.535021480865439e-06, - "loss": 94.7414, - "step": 57000 - }, - { - "epoch": 0.47160524465401, - "grad_norm": 1033.161376953125, - "learning_rate": 6.5336791620088306e-06, - "loss": 98.8203, - "step": 57010 - }, - { - "epoch": 0.47168796790337925, - "grad_norm": 813.3025512695312, - "learning_rate": 6.53233672112441e-06, - "loss": 111.74, - "step": 57020 - }, - { - "epoch": 0.4717706911527485, - "grad_norm": 1582.189208984375, - "learning_rate": 6.530994158318988e-06, - "loss": 113.1147, - "step": 57030 - }, - { - "epoch": 0.4718534144021177, - "grad_norm": 862.9869384765625, - "learning_rate": 6.529651473699389e-06, - "loss": 85.5126, - "step": 57040 - }, - { - "epoch": 0.47193613765148695, - "grad_norm": 1320.765869140625, - "learning_rate": 6.528308667372441e-06, - "loss": 101.8769, - "step": 57050 - }, - { - "epoch": 0.4720188609008562, - "grad_norm": 717.2901611328125, - "learning_rate": 6.526965739444988e-06, - "loss": 119.4057, - "step": 57060 - }, - { - "epoch": 0.4721015841502254, - "grad_norm": 699.390625, - "learning_rate": 6.525622690023878e-06, - "loss": 105.9801, - "step": 57070 - }, - { - "epoch": 0.47218430739959466, - "grad_norm": 1108.2218017578125, - "learning_rate": 6.524279519215972e-06, - "loss": 105.0386, - "step": 57080 - }, - { - "epoch": 0.4722670306489639, - "grad_norm": 1312.4166259765625, - "learning_rate": 6.522936227128139e-06, - "loss": 116.3358, - "step": 57090 - }, - { - "epoch": 0.4723497538983331, - "grad_norm": 881.8489379882812, - "learning_rate": 6.521592813867261e-06, - "loss": 115.962, - "step": 57100 - }, - { - "epoch": 0.47243247714770237, - "grad_norm": 1025.9259033203125, - "learning_rate": 6.520249279540227e-06, - "loss": 112.2708, - "step": 57110 - }, - { - "epoch": 0.4725152003970716, - "grad_norm": 968.8225708007812, - "learning_rate": 6.5189056242539325e-06, - "loss": 81.8784, - "step": 57120 - }, - { - "epoch": 0.4725979236464408, - "grad_norm": 776.3606567382812, - "learning_rate": 6.51756184811529e-06, - "loss": 97.869, - "step": 57130 - }, - { - "epoch": 0.4726806468958101, - "grad_norm": 1188.362060546875, - "learning_rate": 6.516217951231215e-06, - "loss": 78.5015, - "step": 57140 - }, - { - "epoch": 0.47276337014517933, - "grad_norm": 1396.0478515625, - "learning_rate": 6.514873933708637e-06, - "loss": 115.7227, - "step": 57150 - }, - { - "epoch": 0.47284609339454853, - "grad_norm": 701.3724365234375, - "learning_rate": 6.513529795654493e-06, - "loss": 91.0152, - "step": 57160 - }, - { - "epoch": 0.4729288166439178, - "grad_norm": 773.8468627929688, - "learning_rate": 6.512185537175727e-06, - "loss": 140.7189, - "step": 57170 - }, - { - "epoch": 0.473011539893287, - "grad_norm": 948.971923828125, - "learning_rate": 6.5108411583793e-06, - "loss": 163.1197, - "step": 57180 - }, - { - "epoch": 0.47309426314265624, - "grad_norm": 662.3345336914062, - "learning_rate": 6.509496659372175e-06, - "loss": 92.557, - "step": 57190 - }, - { - "epoch": 0.4731769863920255, - "grad_norm": 902.59521484375, - "learning_rate": 6.508152040261329e-06, - "loss": 98.1511, - "step": 57200 - }, - { - "epoch": 0.4732597096413947, - "grad_norm": 912.7888793945312, - "learning_rate": 6.506807301153746e-06, - "loss": 82.9847, - "step": 57210 - }, - { - "epoch": 0.47334243289076394, - "grad_norm": 711.9552001953125, - "learning_rate": 6.5054624421564204e-06, - "loss": 85.3893, - "step": 57220 - }, - { - "epoch": 0.4734251561401332, - "grad_norm": 996.9633178710938, - "learning_rate": 6.504117463376358e-06, - "loss": 103.8014, - "step": 57230 - }, - { - "epoch": 0.4735078793895024, - "grad_norm": 889.486083984375, - "learning_rate": 6.502772364920573e-06, - "loss": 119.4068, - "step": 57240 - }, - { - "epoch": 0.47359060263887165, - "grad_norm": 1359.7982177734375, - "learning_rate": 6.501427146896087e-06, - "loss": 131.6854, - "step": 57250 - }, - { - "epoch": 0.4736733258882409, - "grad_norm": 863.1353759765625, - "learning_rate": 6.5000818094099345e-06, - "loss": 125.5572, - "step": 57260 - }, - { - "epoch": 0.4737560491376101, - "grad_norm": 583.1968383789062, - "learning_rate": 6.498736352569155e-06, - "loss": 97.2687, - "step": 57270 - }, - { - "epoch": 0.47383877238697936, - "grad_norm": 613.0692138671875, - "learning_rate": 6.497390776480804e-06, - "loss": 83.3367, - "step": 57280 - }, - { - "epoch": 0.4739214956363486, - "grad_norm": 894.8236083984375, - "learning_rate": 6.49604508125194e-06, - "loss": 123.6242, - "step": 57290 - }, - { - "epoch": 0.4740042188857178, - "grad_norm": 1123.2113037109375, - "learning_rate": 6.4946992669896355e-06, - "loss": 90.5414, - "step": 57300 - }, - { - "epoch": 0.47408694213508706, - "grad_norm": 634.5689086914062, - "learning_rate": 6.493353333800969e-06, - "loss": 84.9406, - "step": 57310 - }, - { - "epoch": 0.4741696653844563, - "grad_norm": 618.6438598632812, - "learning_rate": 6.492007281793032e-06, - "loss": 101.8569, - "step": 57320 - }, - { - "epoch": 0.4742523886338255, - "grad_norm": 760.6466064453125, - "learning_rate": 6.490661111072923e-06, - "loss": 98.2763, - "step": 57330 - }, - { - "epoch": 0.47433511188319477, - "grad_norm": 642.0409545898438, - "learning_rate": 6.489314821747751e-06, - "loss": 82.7239, - "step": 57340 - }, - { - "epoch": 0.474417835132564, - "grad_norm": 491.7732849121094, - "learning_rate": 6.487968413924634e-06, - "loss": 82.5276, - "step": 57350 - }, - { - "epoch": 0.4745005583819332, - "grad_norm": 847.6077270507812, - "learning_rate": 6.486621887710698e-06, - "loss": 85.0193, - "step": 57360 - }, - { - "epoch": 0.4745832816313025, - "grad_norm": 1501.081787109375, - "learning_rate": 6.485275243213081e-06, - "loss": 88.0962, - "step": 57370 - }, - { - "epoch": 0.47466600488067173, - "grad_norm": 961.3021850585938, - "learning_rate": 6.4839284805389305e-06, - "loss": 131.3477, - "step": 57380 - }, - { - "epoch": 0.47474872813004093, - "grad_norm": 695.2109985351562, - "learning_rate": 6.4825815997954e-06, - "loss": 99.5357, - "step": 57390 - }, - { - "epoch": 0.4748314513794102, - "grad_norm": 478.3403625488281, - "learning_rate": 6.481234601089655e-06, - "loss": 96.97, - "step": 57400 - }, - { - "epoch": 0.47491417462877944, - "grad_norm": 967.1907348632812, - "learning_rate": 6.4798874845288725e-06, - "loss": 84.3332, - "step": 57410 - }, - { - "epoch": 0.47499689787814864, - "grad_norm": 789.9042358398438, - "learning_rate": 6.4785402502202345e-06, - "loss": 98.0186, - "step": 57420 - }, - { - "epoch": 0.4750796211275179, - "grad_norm": 515.9044799804688, - "learning_rate": 6.477192898270934e-06, - "loss": 98.3447, - "step": 57430 - }, - { - "epoch": 0.47516234437688715, - "grad_norm": 777.9580078125, - "learning_rate": 6.475845428788173e-06, - "loss": 92.8213, - "step": 57440 - }, - { - "epoch": 0.47524506762625635, - "grad_norm": 732.5836791992188, - "learning_rate": 6.474497841879166e-06, - "loss": 100.1301, - "step": 57450 - }, - { - "epoch": 0.4753277908756256, - "grad_norm": 905.1466064453125, - "learning_rate": 6.473150137651132e-06, - "loss": 87.0629, - "step": 57460 - }, - { - "epoch": 0.47541051412499485, - "grad_norm": 1096.08935546875, - "learning_rate": 6.471802316211302e-06, - "loss": 94.4893, - "step": 57470 - }, - { - "epoch": 0.47549323737436405, - "grad_norm": 781.8897094726562, - "learning_rate": 6.4704543776669174e-06, - "loss": 85.6178, - "step": 57480 - }, - { - "epoch": 0.4755759606237333, - "grad_norm": 1561.7882080078125, - "learning_rate": 6.469106322125227e-06, - "loss": 110.5366, - "step": 57490 - }, - { - "epoch": 0.47565868387310256, - "grad_norm": 773.6251220703125, - "learning_rate": 6.467758149693486e-06, - "loss": 94.7606, - "step": 57500 - }, - { - "epoch": 0.47574140712247176, - "grad_norm": 696.1630249023438, - "learning_rate": 6.466409860478967e-06, - "loss": 84.5489, - "step": 57510 - }, - { - "epoch": 0.475824130371841, - "grad_norm": 1580.1494140625, - "learning_rate": 6.465061454588946e-06, - "loss": 114.6306, - "step": 57520 - }, - { - "epoch": 0.47590685362121027, - "grad_norm": 584.140380859375, - "learning_rate": 6.463712932130708e-06, - "loss": 91.1199, - "step": 57530 - }, - { - "epoch": 0.47598957687057947, - "grad_norm": 678.8070068359375, - "learning_rate": 6.462364293211549e-06, - "loss": 80.3412, - "step": 57540 - }, - { - "epoch": 0.4760723001199487, - "grad_norm": 671.9398803710938, - "learning_rate": 6.4610155379387755e-06, - "loss": 116.5642, - "step": 57550 - }, - { - "epoch": 0.4761550233693179, - "grad_norm": 1506.737060546875, - "learning_rate": 6.459666666419699e-06, - "loss": 79.6158, - "step": 57560 - }, - { - "epoch": 0.4762377466186872, - "grad_norm": 695.2562255859375, - "learning_rate": 6.4583176787616466e-06, - "loss": 61.0726, - "step": 57570 - }, - { - "epoch": 0.47632046986805643, - "grad_norm": 1047.5999755859375, - "learning_rate": 6.456968575071951e-06, - "loss": 115.039, - "step": 57580 - }, - { - "epoch": 0.4764031931174256, - "grad_norm": 865.7882690429688, - "learning_rate": 6.45561935545795e-06, - "loss": 111.2492, - "step": 57590 - }, - { - "epoch": 0.4764859163667949, - "grad_norm": 991.0267333984375, - "learning_rate": 6.454270020026996e-06, - "loss": 88.2671, - "step": 57600 - }, - { - "epoch": 0.47656863961616414, - "grad_norm": 985.0994873046875, - "learning_rate": 6.452920568886452e-06, - "loss": 101.4334, - "step": 57610 - }, - { - "epoch": 0.47665136286553333, - "grad_norm": 621.8745727539062, - "learning_rate": 6.451571002143687e-06, - "loss": 80.6865, - "step": 57620 - }, - { - "epoch": 0.4767340861149026, - "grad_norm": 851.007080078125, - "learning_rate": 6.450221319906079e-06, - "loss": 93.8453, - "step": 57630 - }, - { - "epoch": 0.47681680936427184, - "grad_norm": 1132.76708984375, - "learning_rate": 6.448871522281016e-06, - "loss": 85.5419, - "step": 57640 - }, - { - "epoch": 0.47689953261364104, - "grad_norm": 739.5919189453125, - "learning_rate": 6.447521609375894e-06, - "loss": 67.1973, - "step": 57650 - }, - { - "epoch": 0.4769822558630103, - "grad_norm": 829.6648559570312, - "learning_rate": 6.446171581298123e-06, - "loss": 106.6791, - "step": 57660 - }, - { - "epoch": 0.47706497911237955, - "grad_norm": 1077.7239990234375, - "learning_rate": 6.444821438155115e-06, - "loss": 80.1578, - "step": 57670 - }, - { - "epoch": 0.47714770236174875, - "grad_norm": 654.5147094726562, - "learning_rate": 6.443471180054297e-06, - "loss": 69.1088, - "step": 57680 - }, - { - "epoch": 0.477230425611118, - "grad_norm": 1064.0953369140625, - "learning_rate": 6.442120807103102e-06, - "loss": 96.6103, - "step": 57690 - }, - { - "epoch": 0.47731314886048726, - "grad_norm": 846.7904663085938, - "learning_rate": 6.440770319408971e-06, - "loss": 113.4604, - "step": 57700 - }, - { - "epoch": 0.47739587210985646, - "grad_norm": 1160.3079833984375, - "learning_rate": 6.43941971707936e-06, - "loss": 110.0301, - "step": 57710 - }, - { - "epoch": 0.4774785953592257, - "grad_norm": 524.7237548828125, - "learning_rate": 6.438069000221727e-06, - "loss": 92.4454, - "step": 57720 - }, - { - "epoch": 0.47756131860859496, - "grad_norm": 363.4929504394531, - "learning_rate": 6.4367181689435434e-06, - "loss": 107.8367, - "step": 57730 - }, - { - "epoch": 0.47764404185796416, - "grad_norm": 953.7645263671875, - "learning_rate": 6.435367223352289e-06, - "loss": 121.483, - "step": 57740 - }, - { - "epoch": 0.4777267651073334, - "grad_norm": 1026.001708984375, - "learning_rate": 6.434016163555452e-06, - "loss": 91.6397, - "step": 57750 - }, - { - "epoch": 0.47780948835670267, - "grad_norm": 811.3062133789062, - "learning_rate": 6.432664989660531e-06, - "loss": 91.4573, - "step": 57760 - }, - { - "epoch": 0.47789221160607187, - "grad_norm": 1303.14111328125, - "learning_rate": 6.43131370177503e-06, - "loss": 112.111, - "step": 57770 - }, - { - "epoch": 0.4779749348554411, - "grad_norm": 754.6259765625, - "learning_rate": 6.429962300006468e-06, - "loss": 100.0983, - "step": 57780 - }, - { - "epoch": 0.4780576581048104, - "grad_norm": 738.0693359375, - "learning_rate": 6.428610784462368e-06, - "loss": 79.652, - "step": 57790 - }, - { - "epoch": 0.4781403813541796, - "grad_norm": 1683.4595947265625, - "learning_rate": 6.427259155250265e-06, - "loss": 95.6643, - "step": 57800 - }, - { - "epoch": 0.47822310460354883, - "grad_norm": 840.048828125, - "learning_rate": 6.4259074124777e-06, - "loss": 93.0407, - "step": 57810 - }, - { - "epoch": 0.4783058278529181, - "grad_norm": 849.6332397460938, - "learning_rate": 6.4245555562522265e-06, - "loss": 98.129, - "step": 57820 - }, - { - "epoch": 0.4783885511022873, - "grad_norm": 495.7574768066406, - "learning_rate": 6.423203586681406e-06, - "loss": 87.5308, - "step": 57830 - }, - { - "epoch": 0.47847127435165654, - "grad_norm": 998.4563598632812, - "learning_rate": 6.421851503872807e-06, - "loss": 124.0158, - "step": 57840 - }, - { - "epoch": 0.4785539976010258, - "grad_norm": 636.4727783203125, - "learning_rate": 6.42049930793401e-06, - "loss": 74.8035, - "step": 57850 - }, - { - "epoch": 0.478636720850395, - "grad_norm": 872.4989013671875, - "learning_rate": 6.419146998972602e-06, - "loss": 78.3126, - "step": 57860 - }, - { - "epoch": 0.47871944409976425, - "grad_norm": 1358.409912109375, - "learning_rate": 6.417794577096179e-06, - "loss": 98.5134, - "step": 57870 - }, - { - "epoch": 0.4788021673491335, - "grad_norm": 1421.047119140625, - "learning_rate": 6.41644204241235e-06, - "loss": 119.0474, - "step": 57880 - }, - { - "epoch": 0.4788848905985027, - "grad_norm": 750.8233642578125, - "learning_rate": 6.4150893950287275e-06, - "loss": 108.5803, - "step": 57890 - }, - { - "epoch": 0.47896761384787195, - "grad_norm": 2111.186767578125, - "learning_rate": 6.413736635052936e-06, - "loss": 141.4099, - "step": 57900 - }, - { - "epoch": 0.47905033709724115, - "grad_norm": 681.4733276367188, - "learning_rate": 6.41238376259261e-06, - "loss": 63.5274, - "step": 57910 - }, - { - "epoch": 0.4791330603466104, - "grad_norm": 805.0604248046875, - "learning_rate": 6.411030777755389e-06, - "loss": 98.8302, - "step": 57920 - }, - { - "epoch": 0.47921578359597966, - "grad_norm": 810.17041015625, - "learning_rate": 6.409677680648925e-06, - "loss": 113.7874, - "step": 57930 - }, - { - "epoch": 0.47929850684534886, - "grad_norm": 827.2803344726562, - "learning_rate": 6.4083244713808765e-06, - "loss": 119.2214, - "step": 57940 - }, - { - "epoch": 0.4793812300947181, - "grad_norm": 1362.2303466796875, - "learning_rate": 6.406971150058914e-06, - "loss": 92.4825, - "step": 57950 - }, - { - "epoch": 0.47946395334408737, - "grad_norm": 855.9775390625, - "learning_rate": 6.405617716790714e-06, - "loss": 79.9808, - "step": 57960 - }, - { - "epoch": 0.47954667659345657, - "grad_norm": 632.9134521484375, - "learning_rate": 6.404264171683965e-06, - "loss": 87.7965, - "step": 57970 - }, - { - "epoch": 0.4796293998428258, - "grad_norm": 731.3795166015625, - "learning_rate": 6.402910514846358e-06, - "loss": 82.6081, - "step": 57980 - }, - { - "epoch": 0.4797121230921951, - "grad_norm": 498.25, - "learning_rate": 6.4015567463856e-06, - "loss": 82.2082, - "step": 57990 - }, - { - "epoch": 0.4797948463415643, - "grad_norm": 1057.318359375, - "learning_rate": 6.400202866409405e-06, - "loss": 111.3383, - "step": 58000 - }, - { - "epoch": 0.47987756959093353, - "grad_norm": 775.5044555664062, - "learning_rate": 6.398848875025494e-06, - "loss": 88.3121, - "step": 58010 - }, - { - "epoch": 0.4799602928403028, - "grad_norm": 1417.432373046875, - "learning_rate": 6.3974947723415985e-06, - "loss": 95.6965, - "step": 58020 - }, - { - "epoch": 0.480043016089672, - "grad_norm": 644.7517700195312, - "learning_rate": 6.396140558465456e-06, - "loss": 104.0072, - "step": 58030 - }, - { - "epoch": 0.48012573933904124, - "grad_norm": 849.2105712890625, - "learning_rate": 6.394786233504816e-06, - "loss": 86.352, - "step": 58040 - }, - { - "epoch": 0.4802084625884105, - "grad_norm": 824.1756591796875, - "learning_rate": 6.39343179756744e-06, - "loss": 114.2667, - "step": 58050 - }, - { - "epoch": 0.4802911858377797, - "grad_norm": 567.4685668945312, - "learning_rate": 6.392077250761088e-06, - "loss": 88.4801, - "step": 58060 - }, - { - "epoch": 0.48037390908714894, - "grad_norm": 910.6168823242188, - "learning_rate": 6.390722593193538e-06, - "loss": 85.6822, - "step": 58070 - }, - { - "epoch": 0.4804566323365182, - "grad_norm": 924.6448364257812, - "learning_rate": 6.389367824972575e-06, - "loss": 96.7753, - "step": 58080 - }, - { - "epoch": 0.4805393555858874, - "grad_norm": 814.3735961914062, - "learning_rate": 6.388012946205991e-06, - "loss": 90.9101, - "step": 58090 - }, - { - "epoch": 0.48062207883525665, - "grad_norm": 1433.7420654296875, - "learning_rate": 6.386657957001585e-06, - "loss": 105.3125, - "step": 58100 - }, - { - "epoch": 0.4807048020846259, - "grad_norm": 841.0825805664062, - "learning_rate": 6.38530285746717e-06, - "loss": 128.7726, - "step": 58110 - }, - { - "epoch": 0.4807875253339951, - "grad_norm": 1228.2952880859375, - "learning_rate": 6.383947647710565e-06, - "loss": 114.0045, - "step": 58120 - }, - { - "epoch": 0.48087024858336436, - "grad_norm": 716.3721313476562, - "learning_rate": 6.382592327839596e-06, - "loss": 96.0517, - "step": 58130 - }, - { - "epoch": 0.4809529718327336, - "grad_norm": 837.0059204101562, - "learning_rate": 6.381236897962102e-06, - "loss": 112.9432, - "step": 58140 - }, - { - "epoch": 0.4810356950821028, - "grad_norm": 1200.560302734375, - "learning_rate": 6.379881358185926e-06, - "loss": 126.8512, - "step": 58150 - }, - { - "epoch": 0.48111841833147206, - "grad_norm": 772.40478515625, - "learning_rate": 6.378525708618924e-06, - "loss": 75.8569, - "step": 58160 - }, - { - "epoch": 0.4812011415808413, - "grad_norm": 980.9589233398438, - "learning_rate": 6.377169949368956e-06, - "loss": 83.9632, - "step": 58170 - }, - { - "epoch": 0.4812838648302105, - "grad_norm": 647.2731323242188, - "learning_rate": 6.375814080543899e-06, - "loss": 104.6065, - "step": 58180 - }, - { - "epoch": 0.48136658807957977, - "grad_norm": 1349.8760986328125, - "learning_rate": 6.3744581022516285e-06, - "loss": 82.4153, - "step": 58190 - }, - { - "epoch": 0.481449311328949, - "grad_norm": 1214.870361328125, - "learning_rate": 6.373102014600033e-06, - "loss": 106.5302, - "step": 58200 - }, - { - "epoch": 0.4815320345783182, - "grad_norm": 1136.8599853515625, - "learning_rate": 6.371745817697012e-06, - "loss": 110.1129, - "step": 58210 - }, - { - "epoch": 0.4816147578276875, - "grad_norm": 2490.23876953125, - "learning_rate": 6.370389511650474e-06, - "loss": 138.5235, - "step": 58220 - }, - { - "epoch": 0.48169748107705673, - "grad_norm": 851.6533203125, - "learning_rate": 6.3690330965683304e-06, - "loss": 105.1765, - "step": 58230 - }, - { - "epoch": 0.48178020432642593, - "grad_norm": 635.8304443359375, - "learning_rate": 6.367676572558506e-06, - "loss": 138.1114, - "step": 58240 - }, - { - "epoch": 0.4818629275757952, - "grad_norm": 747.5317993164062, - "learning_rate": 6.366319939728934e-06, - "loss": 86.2269, - "step": 58250 - }, - { - "epoch": 0.48194565082516444, - "grad_norm": 1157.3438720703125, - "learning_rate": 6.364963198187555e-06, - "loss": 81.2648, - "step": 58260 - }, - { - "epoch": 0.48202837407453364, - "grad_norm": 961.3770141601562, - "learning_rate": 6.363606348042318e-06, - "loss": 69.4496, - "step": 58270 - }, - { - "epoch": 0.4821110973239029, - "grad_norm": 823.779541015625, - "learning_rate": 6.362249389401183e-06, - "loss": 96.8622, - "step": 58280 - }, - { - "epoch": 0.4821938205732721, - "grad_norm": 510.96942138671875, - "learning_rate": 6.360892322372115e-06, - "loss": 77.7201, - "step": 58290 - }, - { - "epoch": 0.48227654382264135, - "grad_norm": 745.046875, - "learning_rate": 6.359535147063092e-06, - "loss": 81.9999, - "step": 58300 - }, - { - "epoch": 0.4823592670720106, - "grad_norm": 611.5297241210938, - "learning_rate": 6.358177863582095e-06, - "loss": 87.0968, - "step": 58310 - }, - { - "epoch": 0.4824419903213798, - "grad_norm": 860.2018432617188, - "learning_rate": 6.35682047203712e-06, - "loss": 81.5001, - "step": 58320 - }, - { - "epoch": 0.48252471357074905, - "grad_norm": 439.9252014160156, - "learning_rate": 6.355462972536166e-06, - "loss": 88.4102, - "step": 58330 - }, - { - "epoch": 0.4826074368201183, - "grad_norm": 922.5792846679688, - "learning_rate": 6.354105365187244e-06, - "loss": 91.7119, - "step": 58340 - }, - { - "epoch": 0.4826901600694875, - "grad_norm": 1041.380615234375, - "learning_rate": 6.352747650098373e-06, - "loss": 104.9556, - "step": 58350 - }, - { - "epoch": 0.48277288331885676, - "grad_norm": 902.4854125976562, - "learning_rate": 6.35138982737758e-06, - "loss": 104.9955, - "step": 58360 - }, - { - "epoch": 0.482855606568226, - "grad_norm": 856.4240112304688, - "learning_rate": 6.3500318971329e-06, - "loss": 68.9174, - "step": 58370 - }, - { - "epoch": 0.4829383298175952, - "grad_norm": 613.3790893554688, - "learning_rate": 6.348673859472378e-06, - "loss": 104.4474, - "step": 58380 - }, - { - "epoch": 0.48302105306696447, - "grad_norm": 625.4129028320312, - "learning_rate": 6.347315714504066e-06, - "loss": 104.9294, - "step": 58390 - }, - { - "epoch": 0.4831037763163337, - "grad_norm": 597.6600341796875, - "learning_rate": 6.345957462336026e-06, - "loss": 81.4773, - "step": 58400 - }, - { - "epoch": 0.4831864995657029, - "grad_norm": 531.10986328125, - "learning_rate": 6.344599103076329e-06, - "loss": 100.6717, - "step": 58410 - }, - { - "epoch": 0.4832692228150722, - "grad_norm": 812.7492065429688, - "learning_rate": 6.343240636833051e-06, - "loss": 108.6641, - "step": 58420 - }, - { - "epoch": 0.48335194606444143, - "grad_norm": 1158.002685546875, - "learning_rate": 6.341882063714282e-06, - "loss": 125.5026, - "step": 58430 - }, - { - "epoch": 0.4834346693138106, - "grad_norm": 846.1316528320312, - "learning_rate": 6.340523383828115e-06, - "loss": 107.7689, - "step": 58440 - }, - { - "epoch": 0.4835173925631799, - "grad_norm": 1098.09130859375, - "learning_rate": 6.339164597282652e-06, - "loss": 106.0673, - "step": 58450 - }, - { - "epoch": 0.48360011581254914, - "grad_norm": 925.63818359375, - "learning_rate": 6.337805704186011e-06, - "loss": 118.7983, - "step": 58460 - }, - { - "epoch": 0.48368283906191833, - "grad_norm": 558.7349853515625, - "learning_rate": 6.336446704646307e-06, - "loss": 105.5596, - "step": 58470 - }, - { - "epoch": 0.4837655623112876, - "grad_norm": 918.0574340820312, - "learning_rate": 6.335087598771676e-06, - "loss": 102.35, - "step": 58480 - }, - { - "epoch": 0.48384828556065684, - "grad_norm": 2873.980712890625, - "learning_rate": 6.333728386670249e-06, - "loss": 102.2267, - "step": 58490 - }, - { - "epoch": 0.48393100881002604, - "grad_norm": 740.7039794921875, - "learning_rate": 6.332369068450175e-06, - "loss": 92.0805, - "step": 58500 - }, - { - "epoch": 0.4840137320593953, - "grad_norm": 1080.8912353515625, - "learning_rate": 6.33100964421961e-06, - "loss": 99.742, - "step": 58510 - }, - { - "epoch": 0.48409645530876455, - "grad_norm": 1029.510009765625, - "learning_rate": 6.329650114086717e-06, - "loss": 77.3601, - "step": 58520 - }, - { - "epoch": 0.48417917855813375, - "grad_norm": 739.2198486328125, - "learning_rate": 6.328290478159666e-06, - "loss": 108.9343, - "step": 58530 - }, - { - "epoch": 0.484261901807503, - "grad_norm": 584.7896118164062, - "learning_rate": 6.326930736546637e-06, - "loss": 87.4123, - "step": 58540 - }, - { - "epoch": 0.48434462505687226, - "grad_norm": 1001.3221435546875, - "learning_rate": 6.325570889355819e-06, - "loss": 115.3958, - "step": 58550 - }, - { - "epoch": 0.48442734830624146, - "grad_norm": 0.0, - "learning_rate": 6.32421093669541e-06, - "loss": 100.6869, - "step": 58560 - }, - { - "epoch": 0.4845100715556107, - "grad_norm": 1452.001953125, - "learning_rate": 6.322850878673614e-06, - "loss": 127.1429, - "step": 58570 - }, - { - "epoch": 0.48459279480497996, - "grad_norm": 841.3821411132812, - "learning_rate": 6.321490715398644e-06, - "loss": 94.9818, - "step": 58580 - }, - { - "epoch": 0.48467551805434916, - "grad_norm": 813.81298828125, - "learning_rate": 6.320130446978722e-06, - "loss": 76.4308, - "step": 58590 - }, - { - "epoch": 0.4847582413037184, - "grad_norm": 1246.0950927734375, - "learning_rate": 6.31877007352208e-06, - "loss": 111.9752, - "step": 58600 - }, - { - "epoch": 0.48484096455308767, - "grad_norm": 822.40283203125, - "learning_rate": 6.317409595136956e-06, - "loss": 90.058, - "step": 58610 - }, - { - "epoch": 0.48492368780245687, - "grad_norm": 1240.24609375, - "learning_rate": 6.316049011931595e-06, - "loss": 118.0982, - "step": 58620 - }, - { - "epoch": 0.4850064110518261, - "grad_norm": 710.9925537109375, - "learning_rate": 6.314688324014255e-06, - "loss": 99.1123, - "step": 58630 - }, - { - "epoch": 0.4850891343011953, - "grad_norm": 1035.0576171875, - "learning_rate": 6.3133275314931995e-06, - "loss": 123.4674, - "step": 58640 - }, - { - "epoch": 0.4851718575505646, - "grad_norm": 811.2945556640625, - "learning_rate": 6.311966634476698e-06, - "loss": 93.8845, - "step": 58650 - }, - { - "epoch": 0.48525458079993383, - "grad_norm": 568.3858642578125, - "learning_rate": 6.3106056330730335e-06, - "loss": 96.4744, - "step": 58660 - }, - { - "epoch": 0.48533730404930303, - "grad_norm": 682.4791259765625, - "learning_rate": 6.309244527390493e-06, - "loss": 96.0774, - "step": 58670 - }, - { - "epoch": 0.4854200272986723, - "grad_norm": 1391.8082275390625, - "learning_rate": 6.307883317537375e-06, - "loss": 113.0187, - "step": 58680 - }, - { - "epoch": 0.48550275054804154, - "grad_norm": 672.600830078125, - "learning_rate": 6.306522003621983e-06, - "loss": 79.8365, - "step": 58690 - }, - { - "epoch": 0.48558547379741074, - "grad_norm": 1114.2835693359375, - "learning_rate": 6.305160585752632e-06, - "loss": 116.9864, - "step": 58700 - }, - { - "epoch": 0.48566819704678, - "grad_norm": 904.7263793945312, - "learning_rate": 6.303799064037643e-06, - "loss": 95.5957, - "step": 58710 - }, - { - "epoch": 0.48575092029614925, - "grad_norm": 1291.3994140625, - "learning_rate": 6.302437438585345e-06, - "loss": 107.933, - "step": 58720 - }, - { - "epoch": 0.48583364354551845, - "grad_norm": 1033.6456298828125, - "learning_rate": 6.301075709504077e-06, - "loss": 132.5331, - "step": 58730 - }, - { - "epoch": 0.4859163667948877, - "grad_norm": 939.7048950195312, - "learning_rate": 6.299713876902188e-06, - "loss": 93.3385, - "step": 58740 - }, - { - "epoch": 0.48599909004425695, - "grad_norm": 1379.9310302734375, - "learning_rate": 6.29835194088803e-06, - "loss": 93.6808, - "step": 58750 - }, - { - "epoch": 0.48608181329362615, - "grad_norm": 850.0859375, - "learning_rate": 6.296989901569966e-06, - "loss": 106.2699, - "step": 58760 - }, - { - "epoch": 0.4861645365429954, - "grad_norm": 845.896728515625, - "learning_rate": 6.295627759056368e-06, - "loss": 97.0875, - "step": 58770 - }, - { - "epoch": 0.48624725979236466, - "grad_norm": 910.60302734375, - "learning_rate": 6.294265513455616e-06, - "loss": 107.4305, - "step": 58780 - }, - { - "epoch": 0.48632998304173386, - "grad_norm": 895.4052734375, - "learning_rate": 6.292903164876097e-06, - "loss": 99.333, - "step": 58790 - }, - { - "epoch": 0.4864127062911031, - "grad_norm": 443.9016418457031, - "learning_rate": 6.291540713426206e-06, - "loss": 76.0029, - "step": 58800 - }, - { - "epoch": 0.48649542954047237, - "grad_norm": 621.6075439453125, - "learning_rate": 6.290178159214349e-06, - "loss": 88.676, - "step": 58810 - }, - { - "epoch": 0.48657815278984157, - "grad_norm": 459.60284423828125, - "learning_rate": 6.288815502348935e-06, - "loss": 94.1174, - "step": 58820 - }, - { - "epoch": 0.4866608760392108, - "grad_norm": 1254.715576171875, - "learning_rate": 6.287452742938388e-06, - "loss": 91.2785, - "step": 58830 - }, - { - "epoch": 0.4867435992885801, - "grad_norm": 1114.0794677734375, - "learning_rate": 6.286089881091134e-06, - "loss": 139.8125, - "step": 58840 - }, - { - "epoch": 0.4868263225379493, - "grad_norm": 853.3452758789062, - "learning_rate": 6.284726916915611e-06, - "loss": 101.1512, - "step": 58850 - }, - { - "epoch": 0.48690904578731853, - "grad_norm": 741.5625610351562, - "learning_rate": 6.2833638505202635e-06, - "loss": 115.4677, - "step": 58860 - }, - { - "epoch": 0.4869917690366878, - "grad_norm": 1342.6466064453125, - "learning_rate": 6.282000682013545e-06, - "loss": 93.9246, - "step": 58870 - }, - { - "epoch": 0.487074492286057, - "grad_norm": 594.8483276367188, - "learning_rate": 6.280637411503913e-06, - "loss": 108.2251, - "step": 58880 - }, - { - "epoch": 0.48715721553542624, - "grad_norm": 899.0263061523438, - "learning_rate": 6.279274039099842e-06, - "loss": 93.0218, - "step": 58890 - }, - { - "epoch": 0.4872399387847955, - "grad_norm": 858.6878662109375, - "learning_rate": 6.277910564909806e-06, - "loss": 91.2543, - "step": 58900 - }, - { - "epoch": 0.4873226620341647, - "grad_norm": 974.9313354492188, - "learning_rate": 6.276546989042292e-06, - "loss": 91.2285, - "step": 58910 - }, - { - "epoch": 0.48740538528353394, - "grad_norm": 653.6936645507812, - "learning_rate": 6.275183311605793e-06, - "loss": 101.074, - "step": 58920 - }, - { - "epoch": 0.4874881085329032, - "grad_norm": 816.3530883789062, - "learning_rate": 6.273819532708807e-06, - "loss": 94.144, - "step": 58930 - }, - { - "epoch": 0.4875708317822724, - "grad_norm": 1098.2479248046875, - "learning_rate": 6.27245565245985e-06, - "loss": 104.6022, - "step": 58940 - }, - { - "epoch": 0.48765355503164165, - "grad_norm": 1253.642333984375, - "learning_rate": 6.271091670967437e-06, - "loss": 101.674, - "step": 58950 - }, - { - "epoch": 0.4877362782810109, - "grad_norm": 577.8956909179688, - "learning_rate": 6.269727588340091e-06, - "loss": 82.5646, - "step": 58960 - }, - { - "epoch": 0.4878190015303801, - "grad_norm": 667.8779296875, - "learning_rate": 6.268363404686348e-06, - "loss": 116.7945, - "step": 58970 - }, - { - "epoch": 0.48790172477974936, - "grad_norm": 716.1468505859375, - "learning_rate": 6.26699912011475e-06, - "loss": 118.8406, - "step": 58980 - }, - { - "epoch": 0.4879844480291186, - "grad_norm": 606.7252197265625, - "learning_rate": 6.265634734733848e-06, - "loss": 74.8637, - "step": 58990 - }, - { - "epoch": 0.4880671712784878, - "grad_norm": 1299.558349609375, - "learning_rate": 6.264270248652199e-06, - "loss": 110.4696, - "step": 59000 - }, - { - "epoch": 0.48814989452785706, - "grad_norm": 913.1611938476562, - "learning_rate": 6.262905661978367e-06, - "loss": 79.307, - "step": 59010 - }, - { - "epoch": 0.48823261777722626, - "grad_norm": 590.417724609375, - "learning_rate": 6.261540974820928e-06, - "loss": 99.6395, - "step": 59020 - }, - { - "epoch": 0.4883153410265955, - "grad_norm": 696.5634155273438, - "learning_rate": 6.260176187288463e-06, - "loss": 100.7072, - "step": 59030 - }, - { - "epoch": 0.48839806427596477, - "grad_norm": 1761.085693359375, - "learning_rate": 6.2588112994895636e-06, - "loss": 151.4153, - "step": 59040 - }, - { - "epoch": 0.48848078752533397, - "grad_norm": 1071.5557861328125, - "learning_rate": 6.257446311532824e-06, - "loss": 69.3153, - "step": 59050 - }, - { - "epoch": 0.4885635107747032, - "grad_norm": 1153.7186279296875, - "learning_rate": 6.256081223526854e-06, - "loss": 93.4576, - "step": 59060 - }, - { - "epoch": 0.4886462340240725, - "grad_norm": 1251.8411865234375, - "learning_rate": 6.254716035580264e-06, - "loss": 71.2112, - "step": 59070 - }, - { - "epoch": 0.4887289572734417, - "grad_norm": 653.9671630859375, - "learning_rate": 6.25335074780168e-06, - "loss": 101.7776, - "step": 59080 - }, - { - "epoch": 0.48881168052281093, - "grad_norm": 917.6962280273438, - "learning_rate": 6.251985360299728e-06, - "loss": 109.7955, - "step": 59090 - }, - { - "epoch": 0.4888944037721802, - "grad_norm": 1049.5726318359375, - "learning_rate": 6.250619873183046e-06, - "loss": 97.9137, - "step": 59100 - }, - { - "epoch": 0.4889771270215494, - "grad_norm": 960.5833129882812, - "learning_rate": 6.249254286560281e-06, - "loss": 115.5338, - "step": 59110 - }, - { - "epoch": 0.48905985027091864, - "grad_norm": 1100.854248046875, - "learning_rate": 6.247888600540084e-06, - "loss": 102.4374, - "step": 59120 - }, - { - "epoch": 0.4891425735202879, - "grad_norm": 858.7109985351562, - "learning_rate": 6.246522815231121e-06, - "loss": 86.8003, - "step": 59130 - }, - { - "epoch": 0.4892252967696571, - "grad_norm": 758.7923583984375, - "learning_rate": 6.245156930742057e-06, - "loss": 99.9746, - "step": 59140 - }, - { - "epoch": 0.48930802001902635, - "grad_norm": 957.5679931640625, - "learning_rate": 6.24379094718157e-06, - "loss": 105.2421, - "step": 59150 - }, - { - "epoch": 0.4893907432683956, - "grad_norm": 932.5010986328125, - "learning_rate": 6.2424248646583455e-06, - "loss": 109.755, - "step": 59160 - }, - { - "epoch": 0.4894734665177648, - "grad_norm": 1039.6802978515625, - "learning_rate": 6.241058683281077e-06, - "loss": 157.5874, - "step": 59170 - }, - { - "epoch": 0.48955618976713405, - "grad_norm": 1249.9261474609375, - "learning_rate": 6.239692403158465e-06, - "loss": 80.0913, - "step": 59180 - }, - { - "epoch": 0.4896389130165033, - "grad_norm": 769.3261108398438, - "learning_rate": 6.238326024399217e-06, - "loss": 116.0565, - "step": 59190 - }, - { - "epoch": 0.4897216362658725, - "grad_norm": 3054.625, - "learning_rate": 6.236959547112051e-06, - "loss": 135.2796, - "step": 59200 - }, - { - "epoch": 0.48980435951524176, - "grad_norm": 1218.1517333984375, - "learning_rate": 6.235592971405691e-06, - "loss": 96.0189, - "step": 59210 - }, - { - "epoch": 0.489887082764611, - "grad_norm": 964.074462890625, - "learning_rate": 6.234226297388869e-06, - "loss": 135.8784, - "step": 59220 - }, - { - "epoch": 0.4899698060139802, - "grad_norm": 822.4147338867188, - "learning_rate": 6.232859525170324e-06, - "loss": 74.8447, - "step": 59230 - }, - { - "epoch": 0.49005252926334947, - "grad_norm": 1401.8765869140625, - "learning_rate": 6.231492654858805e-06, - "loss": 124.0554, - "step": 59240 - }, - { - "epoch": 0.4901352525127187, - "grad_norm": 665.4353637695312, - "learning_rate": 6.230125686563068e-06, - "loss": 116.1569, - "step": 59250 - }, - { - "epoch": 0.4902179757620879, - "grad_norm": 1295.380126953125, - "learning_rate": 6.2287586203918745e-06, - "loss": 114.117, - "step": 59260 - }, - { - "epoch": 0.4903006990114572, - "grad_norm": 1130.935791015625, - "learning_rate": 6.227391456453997e-06, - "loss": 73.4887, - "step": 59270 - }, - { - "epoch": 0.49038342226082643, - "grad_norm": 615.1046142578125, - "learning_rate": 6.226024194858214e-06, - "loss": 83.7547, - "step": 59280 - }, - { - "epoch": 0.49046614551019563, - "grad_norm": 909.8365478515625, - "learning_rate": 6.224656835713313e-06, - "loss": 99.4262, - "step": 59290 - }, - { - "epoch": 0.4905488687595649, - "grad_norm": 768.0503540039062, - "learning_rate": 6.223289379128088e-06, - "loss": 88.5417, - "step": 59300 - }, - { - "epoch": 0.49063159200893414, - "grad_norm": 1546.40771484375, - "learning_rate": 6.221921825211342e-06, - "loss": 88.7849, - "step": 59310 - }, - { - "epoch": 0.49071431525830334, - "grad_norm": 1119.7420654296875, - "learning_rate": 6.220554174071884e-06, - "loss": 216.5368, - "step": 59320 - }, - { - "epoch": 0.4907970385076726, - "grad_norm": 738.0167846679688, - "learning_rate": 6.219186425818531e-06, - "loss": 111.826, - "step": 59330 - }, - { - "epoch": 0.49087976175704184, - "grad_norm": 1079.2379150390625, - "learning_rate": 6.217818580560111e-06, - "loss": 138.3639, - "step": 59340 - }, - { - "epoch": 0.49096248500641104, - "grad_norm": 455.43560791015625, - "learning_rate": 6.216450638405454e-06, - "loss": 106.5501, - "step": 59350 - }, - { - "epoch": 0.4910452082557803, - "grad_norm": 1222.3670654296875, - "learning_rate": 6.2150825994634025e-06, - "loss": 76.1361, - "step": 59360 - }, - { - "epoch": 0.4911279315051495, - "grad_norm": 2011.9967041015625, - "learning_rate": 6.2137144638428045e-06, - "loss": 113.7866, - "step": 59370 - }, - { - "epoch": 0.49121065475451875, - "grad_norm": 1221.827880859375, - "learning_rate": 6.21234623165252e-06, - "loss": 79.5845, - "step": 59380 - }, - { - "epoch": 0.491293378003888, - "grad_norm": 780.6744384765625, - "learning_rate": 6.210977903001406e-06, - "loss": 88.4106, - "step": 59390 - }, - { - "epoch": 0.4913761012532572, - "grad_norm": 755.7965087890625, - "learning_rate": 6.209609477998339e-06, - "loss": 97.9078, - "step": 59400 - }, - { - "epoch": 0.49145882450262646, - "grad_norm": 883.141357421875, - "learning_rate": 6.2082409567521975e-06, - "loss": 97.257, - "step": 59410 - }, - { - "epoch": 0.4915415477519957, - "grad_norm": 1329.85107421875, - "learning_rate": 6.206872339371867e-06, - "loss": 101.6907, - "step": 59420 - }, - { - "epoch": 0.4916242710013649, - "grad_norm": 2107.642578125, - "learning_rate": 6.205503625966247e-06, - "loss": 109.3981, - "step": 59430 - }, - { - "epoch": 0.49170699425073416, - "grad_norm": 663.9820556640625, - "learning_rate": 6.204134816644233e-06, - "loss": 77.1192, - "step": 59440 - }, - { - "epoch": 0.4917897175001034, - "grad_norm": 1082.100341796875, - "learning_rate": 6.2027659115147375e-06, - "loss": 109.6853, - "step": 59450 - }, - { - "epoch": 0.4918724407494726, - "grad_norm": 734.315185546875, - "learning_rate": 6.201396910686679e-06, - "loss": 92.7889, - "step": 59460 - }, - { - "epoch": 0.49195516399884187, - "grad_norm": 825.7645874023438, - "learning_rate": 6.200027814268984e-06, - "loss": 82.5341, - "step": 59470 - }, - { - "epoch": 0.4920378872482111, - "grad_norm": 990.0794677734375, - "learning_rate": 6.198658622370582e-06, - "loss": 91.5214, - "step": 59480 - }, - { - "epoch": 0.4921206104975803, - "grad_norm": 672.1451416015625, - "learning_rate": 6.197289335100412e-06, - "loss": 99.9061, - "step": 59490 - }, - { - "epoch": 0.4922033337469496, - "grad_norm": 663.6240234375, - "learning_rate": 6.195919952567426e-06, - "loss": 84.1417, - "step": 59500 - }, - { - "epoch": 0.49228605699631883, - "grad_norm": 1181.8443603515625, - "learning_rate": 6.194550474880579e-06, - "loss": 101.2219, - "step": 59510 - }, - { - "epoch": 0.49236878024568803, - "grad_norm": 1052.6051025390625, - "learning_rate": 6.193180902148833e-06, - "loss": 101.8608, - "step": 59520 - }, - { - "epoch": 0.4924515034950573, - "grad_norm": 467.2412109375, - "learning_rate": 6.1918112344811575e-06, - "loss": 64.4284, - "step": 59530 - }, - { - "epoch": 0.49253422674442654, - "grad_norm": 756.0142822265625, - "learning_rate": 6.190441471986533e-06, - "loss": 93.9991, - "step": 59540 - }, - { - "epoch": 0.49261694999379574, - "grad_norm": 650.9651489257812, - "learning_rate": 6.18907161477394e-06, - "loss": 134.6619, - "step": 59550 - }, - { - "epoch": 0.492699673243165, - "grad_norm": 845.457275390625, - "learning_rate": 6.187701662952381e-06, - "loss": 71.9417, - "step": 59560 - }, - { - "epoch": 0.49278239649253425, - "grad_norm": 1039.0081787109375, - "learning_rate": 6.18633161663085e-06, - "loss": 96.217, - "step": 59570 - }, - { - "epoch": 0.49286511974190345, - "grad_norm": 1419.552734375, - "learning_rate": 6.184961475918355e-06, - "loss": 117.6555, - "step": 59580 - }, - { - "epoch": 0.4929478429912727, - "grad_norm": 1182.2574462890625, - "learning_rate": 6.183591240923914e-06, - "loss": 116.7017, - "step": 59590 - }, - { - "epoch": 0.49303056624064195, - "grad_norm": 1182.273681640625, - "learning_rate": 6.182220911756551e-06, - "loss": 96.5914, - "step": 59600 - }, - { - "epoch": 0.49311328949001115, - "grad_norm": 1710.057861328125, - "learning_rate": 6.1808504885252955e-06, - "loss": 116.0724, - "step": 59610 - }, - { - "epoch": 0.4931960127393804, - "grad_norm": 738.5501708984375, - "learning_rate": 6.179479971339186e-06, - "loss": 102.2499, - "step": 59620 - }, - { - "epoch": 0.49327873598874966, - "grad_norm": 1123.935791015625, - "learning_rate": 6.178109360307267e-06, - "loss": 98.9781, - "step": 59630 - }, - { - "epoch": 0.49336145923811886, - "grad_norm": 1302.947998046875, - "learning_rate": 6.176738655538594e-06, - "loss": 86.7837, - "step": 59640 - }, - { - "epoch": 0.4934441824874881, - "grad_norm": 1149.8046875, - "learning_rate": 6.175367857142227e-06, - "loss": 101.772, - "step": 59650 - }, - { - "epoch": 0.49352690573685737, - "grad_norm": 313.6778564453125, - "learning_rate": 6.173996965227234e-06, - "loss": 67.0711, - "step": 59660 - }, - { - "epoch": 0.49360962898622657, - "grad_norm": 992.2843627929688, - "learning_rate": 6.17262597990269e-06, - "loss": 122.4615, - "step": 59670 - }, - { - "epoch": 0.4936923522355958, - "grad_norm": 544.5889282226562, - "learning_rate": 6.171254901277678e-06, - "loss": 118.6765, - "step": 59680 - }, - { - "epoch": 0.4937750754849651, - "grad_norm": 760.4612426757812, - "learning_rate": 6.169883729461289e-06, - "loss": 73.9667, - "step": 59690 - }, - { - "epoch": 0.4938577987343343, - "grad_norm": 879.9442749023438, - "learning_rate": 6.16851246456262e-06, - "loss": 121.3691, - "step": 59700 - }, - { - "epoch": 0.49394052198370353, - "grad_norm": 838.228271484375, - "learning_rate": 6.167141106690778e-06, - "loss": 81.8349, - "step": 59710 - }, - { - "epoch": 0.4940232452330727, - "grad_norm": 600.8211059570312, - "learning_rate": 6.1657696559548755e-06, - "loss": 82.6974, - "step": 59720 - }, - { - "epoch": 0.494105968482442, - "grad_norm": 1331.113525390625, - "learning_rate": 6.16439811246403e-06, - "loss": 91.3955, - "step": 59730 - }, - { - "epoch": 0.49418869173181124, - "grad_norm": 1298.560791015625, - "learning_rate": 6.163026476327371e-06, - "loss": 127.3068, - "step": 59740 - }, - { - "epoch": 0.49427141498118043, - "grad_norm": 1139.0467529296875, - "learning_rate": 6.161654747654033e-06, - "loss": 103.0033, - "step": 59750 - }, - { - "epoch": 0.4943541382305497, - "grad_norm": 740.8123779296875, - "learning_rate": 6.1602829265531585e-06, - "loss": 111.8351, - "step": 59760 - }, - { - "epoch": 0.49443686147991894, - "grad_norm": 811.9702758789062, - "learning_rate": 6.158911013133896e-06, - "loss": 106.3484, - "step": 59770 - }, - { - "epoch": 0.49451958472928814, - "grad_norm": 751.2868041992188, - "learning_rate": 6.157539007505402e-06, - "loss": 81.8818, - "step": 59780 - }, - { - "epoch": 0.4946023079786574, - "grad_norm": 596.0902099609375, - "learning_rate": 6.156166909776842e-06, - "loss": 141.5883, - "step": 59790 - }, - { - "epoch": 0.49468503122802665, - "grad_norm": 669.7060546875, - "learning_rate": 6.154794720057388e-06, - "loss": 92.8498, - "step": 59800 - }, - { - "epoch": 0.49476775447739585, - "grad_norm": 769.1928100585938, - "learning_rate": 6.153422438456218e-06, - "loss": 111.0396, - "step": 59810 - }, - { - "epoch": 0.4948504777267651, - "grad_norm": 1065.379638671875, - "learning_rate": 6.1520500650825175e-06, - "loss": 109.4096, - "step": 59820 - }, - { - "epoch": 0.49493320097613436, - "grad_norm": 1148.633056640625, - "learning_rate": 6.150677600045479e-06, - "loss": 107.9428, - "step": 59830 - }, - { - "epoch": 0.49501592422550356, - "grad_norm": 530.499267578125, - "learning_rate": 6.1493050434543065e-06, - "loss": 80.3538, - "step": 59840 - }, - { - "epoch": 0.4950986474748728, - "grad_norm": 888.7738647460938, - "learning_rate": 6.1479323954182055e-06, - "loss": 82.7131, - "step": 59850 - }, - { - "epoch": 0.49518137072424206, - "grad_norm": 1272.56884765625, - "learning_rate": 6.146559656046394e-06, - "loss": 115.8495, - "step": 59860 - }, - { - "epoch": 0.49526409397361126, - "grad_norm": 1251.605712890625, - "learning_rate": 6.1451868254480914e-06, - "loss": 103.9669, - "step": 59870 - }, - { - "epoch": 0.4953468172229805, - "grad_norm": 1469.060546875, - "learning_rate": 6.143813903732527e-06, - "loss": 98.5221, - "step": 59880 - }, - { - "epoch": 0.49542954047234977, - "grad_norm": 613.4310913085938, - "learning_rate": 6.142440891008941e-06, - "loss": 95.7254, - "step": 59890 - }, - { - "epoch": 0.49551226372171897, - "grad_norm": 1487.4815673828125, - "learning_rate": 6.141067787386579e-06, - "loss": 134.6069, - "step": 59900 - }, - { - "epoch": 0.4955949869710882, - "grad_norm": 1083.0224609375, - "learning_rate": 6.139694592974687e-06, - "loss": 103.1761, - "step": 59910 - }, - { - "epoch": 0.4956777102204575, - "grad_norm": 1133.168701171875, - "learning_rate": 6.1383213078825275e-06, - "loss": 111.6922, - "step": 59920 - }, - { - "epoch": 0.4957604334698267, - "grad_norm": 1490.791015625, - "learning_rate": 6.136947932219365e-06, - "loss": 102.0797, - "step": 59930 - }, - { - "epoch": 0.49584315671919593, - "grad_norm": 717.7562866210938, - "learning_rate": 6.135574466094475e-06, - "loss": 103.1788, - "step": 59940 - }, - { - "epoch": 0.4959258799685652, - "grad_norm": 707.7864379882812, - "learning_rate": 6.134200909617135e-06, - "loss": 84.5769, - "step": 59950 - }, - { - "epoch": 0.4960086032179344, - "grad_norm": 607.8535766601562, - "learning_rate": 6.132827262896634e-06, - "loss": 109.7207, - "step": 59960 - }, - { - "epoch": 0.49609132646730364, - "grad_norm": 575.4254150390625, - "learning_rate": 6.131453526042267e-06, - "loss": 94.0905, - "step": 59970 - }, - { - "epoch": 0.4961740497166729, - "grad_norm": 1168.943603515625, - "learning_rate": 6.130079699163335e-06, - "loss": 123.4409, - "step": 59980 - }, - { - "epoch": 0.4962567729660421, - "grad_norm": 806.4592895507812, - "learning_rate": 6.128705782369149e-06, - "loss": 126.4985, - "step": 59990 - }, - { - "epoch": 0.49633949621541135, - "grad_norm": 994.6439208984375, - "learning_rate": 6.127331775769023e-06, - "loss": 81.3202, - "step": 60000 - }, - { - "epoch": 0.4964222194647806, - "grad_norm": 764.6784057617188, - "learning_rate": 6.125957679472282e-06, - "loss": 98.2079, - "step": 60010 - }, - { - "epoch": 0.4965049427141498, - "grad_norm": 963.4498901367188, - "learning_rate": 6.124583493588254e-06, - "loss": 107.3761, - "step": 60020 - }, - { - "epoch": 0.49658766596351905, - "grad_norm": 763.531494140625, - "learning_rate": 6.123209218226282e-06, - "loss": 86.249, - "step": 60030 - }, - { - "epoch": 0.4966703892128883, - "grad_norm": 1130.901611328125, - "learning_rate": 6.121834853495704e-06, - "loss": 80.7327, - "step": 60040 - }, - { - "epoch": 0.4967531124622575, - "grad_norm": 830.3397827148438, - "learning_rate": 6.120460399505876e-06, - "loss": 79.5612, - "step": 60050 - }, - { - "epoch": 0.49683583571162676, - "grad_norm": 1038.9749755859375, - "learning_rate": 6.119085856366158e-06, - "loss": 104.2222, - "step": 60060 - }, - { - "epoch": 0.496918558960996, - "grad_norm": 1272.2034912109375, - "learning_rate": 6.117711224185913e-06, - "loss": 95.4032, - "step": 60070 - }, - { - "epoch": 0.4970012822103652, - "grad_norm": 678.36083984375, - "learning_rate": 6.116336503074516e-06, - "loss": 111.6325, - "step": 60080 - }, - { - "epoch": 0.49708400545973447, - "grad_norm": 902.4628295898438, - "learning_rate": 6.114961693141346e-06, - "loss": 109.0453, - "step": 60090 - }, - { - "epoch": 0.49716672870910367, - "grad_norm": 1161.1533203125, - "learning_rate": 6.113586794495792e-06, - "loss": 102.0886, - "step": 60100 - }, - { - "epoch": 0.4972494519584729, - "grad_norm": 883.01025390625, - "learning_rate": 6.112211807247246e-06, - "loss": 79.8961, - "step": 60110 - }, - { - "epoch": 0.4973321752078422, - "grad_norm": 560.7022094726562, - "learning_rate": 6.110836731505112e-06, - "loss": 104.4858, - "step": 60120 - }, - { - "epoch": 0.4974148984572114, - "grad_norm": 563.2030639648438, - "learning_rate": 6.109461567378796e-06, - "loss": 76.5894, - "step": 60130 - }, - { - "epoch": 0.49749762170658063, - "grad_norm": 567.1194458007812, - "learning_rate": 6.108086314977717e-06, - "loss": 74.5852, - "step": 60140 - }, - { - "epoch": 0.4975803449559499, - "grad_norm": 1001.0436401367188, - "learning_rate": 6.106710974411294e-06, - "loss": 89.3131, - "step": 60150 - }, - { - "epoch": 0.4976630682053191, - "grad_norm": 1134.8504638671875, - "learning_rate": 6.105335545788957e-06, - "loss": 92.9293, - "step": 60160 - }, - { - "epoch": 0.49774579145468834, - "grad_norm": 1836.4036865234375, - "learning_rate": 6.103960029220145e-06, - "loss": 96.6835, - "step": 60170 - }, - { - "epoch": 0.4978285147040576, - "grad_norm": 884.6735229492188, - "learning_rate": 6.102584424814299e-06, - "loss": 96.8814, - "step": 60180 - }, - { - "epoch": 0.4979112379534268, - "grad_norm": 1286.9527587890625, - "learning_rate": 6.101208732680872e-06, - "loss": 84.8719, - "step": 60190 - }, - { - "epoch": 0.49799396120279604, - "grad_norm": 1096.192138671875, - "learning_rate": 6.09983295292932e-06, - "loss": 133.576, - "step": 60200 - }, - { - "epoch": 0.4980766844521653, - "grad_norm": 396.5540771484375, - "learning_rate": 6.0984570856691046e-06, - "loss": 71.0101, - "step": 60210 - }, - { - "epoch": 0.4981594077015345, - "grad_norm": 1324.0350341796875, - "learning_rate": 6.097081131009703e-06, - "loss": 84.5404, - "step": 60220 - }, - { - "epoch": 0.49824213095090375, - "grad_norm": 1569.8370361328125, - "learning_rate": 6.095705089060589e-06, - "loss": 131.2618, - "step": 60230 - }, - { - "epoch": 0.498324854200273, - "grad_norm": 452.867919921875, - "learning_rate": 6.094328959931252e-06, - "loss": 98.2553, - "step": 60240 - }, - { - "epoch": 0.4984075774496422, - "grad_norm": 754.5675659179688, - "learning_rate": 6.092952743731179e-06, - "loss": 109.5702, - "step": 60250 - }, - { - "epoch": 0.49849030069901146, - "grad_norm": 1038.2977294921875, - "learning_rate": 6.091576440569873e-06, - "loss": 102.3048, - "step": 60260 - }, - { - "epoch": 0.4985730239483807, - "grad_norm": 1505.541748046875, - "learning_rate": 6.09020005055684e-06, - "loss": 93.0384, - "step": 60270 - }, - { - "epoch": 0.4986557471977499, - "grad_norm": 714.1956787109375, - "learning_rate": 6.088823573801592e-06, - "loss": 89.4964, - "step": 60280 - }, - { - "epoch": 0.49873847044711916, - "grad_norm": 1760.59033203125, - "learning_rate": 6.087447010413651e-06, - "loss": 123.4378, - "step": 60290 - }, - { - "epoch": 0.4988211936964884, - "grad_norm": 647.3226318359375, - "learning_rate": 6.08607036050254e-06, - "loss": 97.7962, - "step": 60300 - }, - { - "epoch": 0.4989039169458576, - "grad_norm": 670.925048828125, - "learning_rate": 6.084693624177794e-06, - "loss": 90.9299, - "step": 60310 - }, - { - "epoch": 0.49898664019522687, - "grad_norm": 861.1676635742188, - "learning_rate": 6.083316801548956e-06, - "loss": 96.199, - "step": 60320 - }, - { - "epoch": 0.4990693634445961, - "grad_norm": 844.1892700195312, - "learning_rate": 6.081939892725572e-06, - "loss": 89.0784, - "step": 60330 - }, - { - "epoch": 0.4991520866939653, - "grad_norm": 1057.48876953125, - "learning_rate": 6.080562897817196e-06, - "loss": 80.8512, - "step": 60340 - }, - { - "epoch": 0.4992348099433346, - "grad_norm": 547.9786987304688, - "learning_rate": 6.079185816933388e-06, - "loss": 123.7075, - "step": 60350 - }, - { - "epoch": 0.49931753319270383, - "grad_norm": 813.2322998046875, - "learning_rate": 6.077808650183718e-06, - "loss": 75.0818, - "step": 60360 - }, - { - "epoch": 0.49940025644207303, - "grad_norm": 1088.018310546875, - "learning_rate": 6.076431397677762e-06, - "loss": 99.3224, - "step": 60370 - }, - { - "epoch": 0.4994829796914423, - "grad_norm": 443.6618347167969, - "learning_rate": 6.0750540595250986e-06, - "loss": 92.7872, - "step": 60380 - }, - { - "epoch": 0.49956570294081154, - "grad_norm": 1097.77099609375, - "learning_rate": 6.073676635835317e-06, - "loss": 101.5357, - "step": 60390 - }, - { - "epoch": 0.49964842619018074, - "grad_norm": 809.2193603515625, - "learning_rate": 6.072299126718012e-06, - "loss": 102.8223, - "step": 60400 - }, - { - "epoch": 0.49973114943955, - "grad_norm": 1209.04296875, - "learning_rate": 6.070921532282788e-06, - "loss": 90.7184, - "step": 60410 - }, - { - "epoch": 0.49981387268891925, - "grad_norm": 483.89910888671875, - "learning_rate": 6.0695438526392536e-06, - "loss": 90.6916, - "step": 60420 - }, - { - "epoch": 0.49989659593828845, - "grad_norm": 939.6087036132812, - "learning_rate": 6.068166087897022e-06, - "loss": 95.3435, - "step": 60430 - }, - { - "epoch": 0.4999793191876577, - "grad_norm": 1028.2034912109375, - "learning_rate": 6.066788238165717e-06, - "loss": 122.2318, - "step": 60440 - }, - { - "epoch": 0.5000620424370269, - "grad_norm": 871.5949096679688, - "learning_rate": 6.0654103035549686e-06, - "loss": 107.04, - "step": 60450 - }, - { - "epoch": 0.5001447656863962, - "grad_norm": 631.7724609375, - "learning_rate": 6.064032284174411e-06, - "loss": 107.0457, - "step": 60460 - }, - { - "epoch": 0.5002274889357654, - "grad_norm": 1057.8795166015625, - "learning_rate": 6.062654180133689e-06, - "loss": 95.4936, - "step": 60470 - }, - { - "epoch": 0.5003102121851346, - "grad_norm": 625.803466796875, - "learning_rate": 6.06127599154245e-06, - "loss": 112.727, - "step": 60480 - }, - { - "epoch": 0.5003929354345039, - "grad_norm": 957.1911010742188, - "learning_rate": 6.059897718510351e-06, - "loss": 141.4194, - "step": 60490 - }, - { - "epoch": 0.5004756586838731, - "grad_norm": 1082.177734375, - "learning_rate": 6.058519361147055e-06, - "loss": 106.0662, - "step": 60500 - }, - { - "epoch": 0.5005583819332423, - "grad_norm": 961.278076171875, - "learning_rate": 6.057140919562231e-06, - "loss": 92.1514, - "step": 60510 - }, - { - "epoch": 0.5006411051826116, - "grad_norm": 1194.2098388671875, - "learning_rate": 6.055762393865555e-06, - "loss": 114.0582, - "step": 60520 - }, - { - "epoch": 0.5007238284319808, - "grad_norm": 2295.308349609375, - "learning_rate": 6.054383784166712e-06, - "loss": 122.28, - "step": 60530 - }, - { - "epoch": 0.50080655168135, - "grad_norm": 950.94482421875, - "learning_rate": 6.05300509057539e-06, - "loss": 65.6176, - "step": 60540 - }, - { - "epoch": 0.5008892749307193, - "grad_norm": 575.2659301757812, - "learning_rate": 6.051626313201285e-06, - "loss": 75.2322, - "step": 60550 - }, - { - "epoch": 0.5009719981800885, - "grad_norm": 955.7214965820312, - "learning_rate": 6.0502474521541014e-06, - "loss": 78.4364, - "step": 60560 - }, - { - "epoch": 0.5010547214294577, - "grad_norm": 1058.46240234375, - "learning_rate": 6.048868507543547e-06, - "loss": 76.487, - "step": 60570 - }, - { - "epoch": 0.501137444678827, - "grad_norm": 1243.02880859375, - "learning_rate": 6.047489479479339e-06, - "loss": 94.1711, - "step": 60580 - }, - { - "epoch": 0.5012201679281962, - "grad_norm": 539.5478515625, - "learning_rate": 6.046110368071201e-06, - "loss": 80.0069, - "step": 60590 - }, - { - "epoch": 0.5013028911775654, - "grad_norm": 1104.5489501953125, - "learning_rate": 6.044731173428862e-06, - "loss": 109.4119, - "step": 60600 - }, - { - "epoch": 0.5013856144269347, - "grad_norm": 547.853515625, - "learning_rate": 6.043351895662059e-06, - "loss": 74.3687, - "step": 60610 - }, - { - "epoch": 0.5014683376763039, - "grad_norm": 815.2512817382812, - "learning_rate": 6.041972534880533e-06, - "loss": 98.8586, - "step": 60620 - }, - { - "epoch": 0.5015510609256731, - "grad_norm": 934.5771484375, - "learning_rate": 6.040593091194035e-06, - "loss": 113.7061, - "step": 60630 - }, - { - "epoch": 0.5016337841750423, - "grad_norm": 980.511962890625, - "learning_rate": 6.039213564712319e-06, - "loss": 77.5835, - "step": 60640 - }, - { - "epoch": 0.5017165074244117, - "grad_norm": 875.10693359375, - "learning_rate": 6.03783395554515e-06, - "loss": 90.5567, - "step": 60650 - }, - { - "epoch": 0.5017992306737808, - "grad_norm": 564.4214477539062, - "learning_rate": 6.036454263802297e-06, - "loss": 99.0541, - "step": 60660 - }, - { - "epoch": 0.50188195392315, - "grad_norm": 859.4292602539062, - "learning_rate": 6.035074489593536e-06, - "loss": 81.3836, - "step": 60670 - }, - { - "epoch": 0.5019646771725194, - "grad_norm": 1366.642333984375, - "learning_rate": 6.033694633028644e-06, - "loss": 89.8926, - "step": 60680 - }, - { - "epoch": 0.5020474004218886, - "grad_norm": 707.115478515625, - "learning_rate": 6.032314694217416e-06, - "loss": 119.7774, - "step": 60690 - }, - { - "epoch": 0.5021301236712578, - "grad_norm": 509.8358154296875, - "learning_rate": 6.030934673269646e-06, - "loss": 76.9633, - "step": 60700 - }, - { - "epoch": 0.5022128469206271, - "grad_norm": 1127.6051025390625, - "learning_rate": 6.029554570295135e-06, - "loss": 98.3396, - "step": 60710 - }, - { - "epoch": 0.5022955701699963, - "grad_norm": 489.0888366699219, - "learning_rate": 6.028174385403693e-06, - "loss": 74.8344, - "step": 60720 - }, - { - "epoch": 0.5023782934193655, - "grad_norm": 563.7764282226562, - "learning_rate": 6.026794118705133e-06, - "loss": 123.1201, - "step": 60730 - }, - { - "epoch": 0.5024610166687348, - "grad_norm": 922.4900512695312, - "learning_rate": 6.025413770309278e-06, - "loss": 105.9961, - "step": 60740 - }, - { - "epoch": 0.502543739918104, - "grad_norm": 711.1076049804688, - "learning_rate": 6.024033340325954e-06, - "loss": 138.0339, - "step": 60750 - }, - { - "epoch": 0.5026264631674732, - "grad_norm": 1032.06787109375, - "learning_rate": 6.022652828864999e-06, - "loss": 129.3473, - "step": 60760 - }, - { - "epoch": 0.5027091864168425, - "grad_norm": 987.8187255859375, - "learning_rate": 6.0212722360362496e-06, - "loss": 98.7341, - "step": 60770 - }, - { - "epoch": 0.5027919096662117, - "grad_norm": 922.1596069335938, - "learning_rate": 6.019891561949554e-06, - "loss": 113.0199, - "step": 60780 - }, - { - "epoch": 0.5028746329155809, - "grad_norm": 966.7632446289062, - "learning_rate": 6.01851080671477e-06, - "loss": 95.6403, - "step": 60790 - }, - { - "epoch": 0.5029573561649502, - "grad_norm": 626.9385986328125, - "learning_rate": 6.017129970441756e-06, - "loss": 85.6963, - "step": 60800 - }, - { - "epoch": 0.5030400794143194, - "grad_norm": 1487.6356201171875, - "learning_rate": 6.015749053240378e-06, - "loss": 97.1684, - "step": 60810 - }, - { - "epoch": 0.5031228026636886, - "grad_norm": 789.6949462890625, - "learning_rate": 6.0143680552205075e-06, - "loss": 103.032, - "step": 60820 - }, - { - "epoch": 0.5032055259130579, - "grad_norm": 737.3887329101562, - "learning_rate": 6.012986976492025e-06, - "loss": 86.7182, - "step": 60830 - }, - { - "epoch": 0.5032882491624271, - "grad_norm": 600.4498291015625, - "learning_rate": 6.011605817164822e-06, - "loss": 94.8057, - "step": 60840 - }, - { - "epoch": 0.5033709724117963, - "grad_norm": 593.8978271484375, - "learning_rate": 6.0102245773487855e-06, - "loss": 107.6144, - "step": 60850 - }, - { - "epoch": 0.5034536956611656, - "grad_norm": 1065.4730224609375, - "learning_rate": 6.008843257153815e-06, - "loss": 90.4793, - "step": 60860 - }, - { - "epoch": 0.5035364189105348, - "grad_norm": 824.6838989257812, - "learning_rate": 6.007461856689815e-06, - "loss": 75.2064, - "step": 60870 - }, - { - "epoch": 0.503619142159904, - "grad_norm": 450.9040832519531, - "learning_rate": 6.0060803760667e-06, - "loss": 88.3228, - "step": 60880 - }, - { - "epoch": 0.5037018654092733, - "grad_norm": 829.0480346679688, - "learning_rate": 6.004698815394389e-06, - "loss": 124.8127, - "step": 60890 - }, - { - "epoch": 0.5037845886586425, - "grad_norm": 1101.244384765625, - "learning_rate": 6.003317174782801e-06, - "loss": 77.5046, - "step": 60900 - }, - { - "epoch": 0.5038673119080117, - "grad_norm": 1373.401611328125, - "learning_rate": 6.001935454341872e-06, - "loss": 92.4626, - "step": 60910 - }, - { - "epoch": 0.503950035157381, - "grad_norm": 489.91644287109375, - "learning_rate": 6.000553654181536e-06, - "loss": 92.368, - "step": 60920 - }, - { - "epoch": 0.5040327584067502, - "grad_norm": 813.798583984375, - "learning_rate": 5.999171774411737e-06, - "loss": 96.8709, - "step": 60930 - }, - { - "epoch": 0.5041154816561194, - "grad_norm": 1986.8172607421875, - "learning_rate": 5.997789815142427e-06, - "loss": 83.3268, - "step": 60940 - }, - { - "epoch": 0.5041982049054887, - "grad_norm": 721.0343017578125, - "learning_rate": 5.99640777648356e-06, - "loss": 111.3407, - "step": 60950 - }, - { - "epoch": 0.5042809281548579, - "grad_norm": 1814.00146484375, - "learning_rate": 5.9950256585450995e-06, - "loss": 101.1115, - "step": 60960 - }, - { - "epoch": 0.5043636514042271, - "grad_norm": 1539.85009765625, - "learning_rate": 5.993643461437013e-06, - "loss": 109.2939, - "step": 60970 - }, - { - "epoch": 0.5044463746535964, - "grad_norm": 1455.0811767578125, - "learning_rate": 5.992261185269278e-06, - "loss": 116.5055, - "step": 60980 - }, - { - "epoch": 0.5045290979029656, - "grad_norm": 673.0990600585938, - "learning_rate": 5.990878830151873e-06, - "loss": 84.7869, - "step": 60990 - }, - { - "epoch": 0.5046118211523348, - "grad_norm": 990.6354370117188, - "learning_rate": 5.989496396194787e-06, - "loss": 117.0743, - "step": 61000 - }, - { - "epoch": 0.5046945444017041, - "grad_norm": 659.1134033203125, - "learning_rate": 5.988113883508016e-06, - "loss": 85.4938, - "step": 61010 - }, - { - "epoch": 0.5047772676510733, - "grad_norm": 1276.8919677734375, - "learning_rate": 5.986731292201555e-06, - "loss": 111.5496, - "step": 61020 - }, - { - "epoch": 0.5048599909004425, - "grad_norm": 1091.1859130859375, - "learning_rate": 5.985348622385415e-06, - "loss": 139.6149, - "step": 61030 - }, - { - "epoch": 0.5049427141498118, - "grad_norm": 1123.3651123046875, - "learning_rate": 5.9839658741696085e-06, - "loss": 60.2066, - "step": 61040 - }, - { - "epoch": 0.505025437399181, - "grad_norm": 405.6517028808594, - "learning_rate": 5.982583047664151e-06, - "loss": 89.4623, - "step": 61050 - }, - { - "epoch": 0.5051081606485502, - "grad_norm": 489.998046875, - "learning_rate": 5.981200142979071e-06, - "loss": 70.606, - "step": 61060 - }, - { - "epoch": 0.5051908838979196, - "grad_norm": 611.7427368164062, - "learning_rate": 5.9798171602244e-06, - "loss": 90.265, - "step": 61070 - }, - { - "epoch": 0.5052736071472888, - "grad_norm": 500.9188537597656, - "learning_rate": 5.978434099510172e-06, - "loss": 102.3768, - "step": 61080 - }, - { - "epoch": 0.505356330396658, - "grad_norm": 939.5604858398438, - "learning_rate": 5.977050960946433e-06, - "loss": 95.7826, - "step": 61090 - }, - { - "epoch": 0.5054390536460273, - "grad_norm": 1079.417724609375, - "learning_rate": 5.975667744643235e-06, - "loss": 57.3004, - "step": 61100 - }, - { - "epoch": 0.5055217768953965, - "grad_norm": 801.0790405273438, - "learning_rate": 5.974284450710631e-06, - "loss": 100.4074, - "step": 61110 - }, - { - "epoch": 0.5056045001447657, - "grad_norm": 1189.5526123046875, - "learning_rate": 5.972901079258685e-06, - "loss": 89.206, - "step": 61120 - }, - { - "epoch": 0.505687223394135, - "grad_norm": 544.3245239257812, - "learning_rate": 5.971517630397465e-06, - "loss": 87.3128, - "step": 61130 - }, - { - "epoch": 0.5057699466435042, - "grad_norm": 804.9583129882812, - "learning_rate": 5.970134104237046e-06, - "loss": 118.5412, - "step": 61140 - }, - { - "epoch": 0.5058526698928734, - "grad_norm": 878.0859985351562, - "learning_rate": 5.96875050088751e-06, - "loss": 151.7707, - "step": 61150 - }, - { - "epoch": 0.5059353931422427, - "grad_norm": 555.1686401367188, - "learning_rate": 5.9673668204589396e-06, - "loss": 80.5509, - "step": 61160 - }, - { - "epoch": 0.5060181163916119, - "grad_norm": 887.9091186523438, - "learning_rate": 5.965983063061432e-06, - "loss": 97.7346, - "step": 61170 - }, - { - "epoch": 0.5061008396409811, - "grad_norm": 2689.885498046875, - "learning_rate": 5.964599228805087e-06, - "loss": 109.0754, - "step": 61180 - }, - { - "epoch": 0.5061835628903504, - "grad_norm": 851.7760009765625, - "learning_rate": 5.963215317800008e-06, - "loss": 111.0486, - "step": 61190 - }, - { - "epoch": 0.5062662861397196, - "grad_norm": 912.8546142578125, - "learning_rate": 5.961831330156306e-06, - "loss": 131.0121, - "step": 61200 - }, - { - "epoch": 0.5063490093890888, - "grad_norm": 641.365478515625, - "learning_rate": 5.960447265984098e-06, - "loss": 88.7224, - "step": 61210 - }, - { - "epoch": 0.5064317326384581, - "grad_norm": 1167.5374755859375, - "learning_rate": 5.95906312539351e-06, - "loss": 90.7471, - "step": 61220 - }, - { - "epoch": 0.5065144558878273, - "grad_norm": 808.2752685546875, - "learning_rate": 5.9576789084946705e-06, - "loss": 74.6994, - "step": 61230 - }, - { - "epoch": 0.5065971791371965, - "grad_norm": 1379.4791259765625, - "learning_rate": 5.956294615397716e-06, - "loss": 105.455, - "step": 61240 - }, - { - "epoch": 0.5066799023865658, - "grad_norm": 1430.2303466796875, - "learning_rate": 5.954910246212787e-06, - "loss": 126.3884, - "step": 61250 - }, - { - "epoch": 0.506762625635935, - "grad_norm": 1149.9755859375, - "learning_rate": 5.953525801050032e-06, - "loss": 83.9256, - "step": 61260 - }, - { - "epoch": 0.5068453488853042, - "grad_norm": 558.4307861328125, - "learning_rate": 5.952141280019605e-06, - "loss": 96.2179, - "step": 61270 - }, - { - "epoch": 0.5069280721346735, - "grad_norm": 742.656494140625, - "learning_rate": 5.950756683231667e-06, - "loss": 74.2085, - "step": 61280 - }, - { - "epoch": 0.5070107953840427, - "grad_norm": 596.1998291015625, - "learning_rate": 5.949372010796384e-06, - "loss": 52.8268, - "step": 61290 - }, - { - "epoch": 0.5070935186334119, - "grad_norm": 881.3981323242188, - "learning_rate": 5.947987262823924e-06, - "loss": 107.5389, - "step": 61300 - }, - { - "epoch": 0.5071762418827812, - "grad_norm": 675.0526123046875, - "learning_rate": 5.94660243942447e-06, - "loss": 123.2516, - "step": 61310 - }, - { - "epoch": 0.5072589651321504, - "grad_norm": 889.6369018554688, - "learning_rate": 5.945217540708206e-06, - "loss": 86.9526, - "step": 61320 - }, - { - "epoch": 0.5073416883815196, - "grad_norm": 736.0260620117188, - "learning_rate": 5.9438325667853185e-06, - "loss": 94.7347, - "step": 61330 - }, - { - "epoch": 0.5074244116308889, - "grad_norm": 899.1821899414062, - "learning_rate": 5.942447517766005e-06, - "loss": 106.1979, - "step": 61340 - }, - { - "epoch": 0.5075071348802581, - "grad_norm": 760.8129272460938, - "learning_rate": 5.941062393760467e-06, - "loss": 94.1305, - "step": 61350 - }, - { - "epoch": 0.5075898581296273, - "grad_norm": 434.6575622558594, - "learning_rate": 5.939677194878915e-06, - "loss": 140.0839, - "step": 61360 - }, - { - "epoch": 0.5076725813789965, - "grad_norm": 577.9569091796875, - "learning_rate": 5.93829192123156e-06, - "loss": 74.5071, - "step": 61370 - }, - { - "epoch": 0.5077553046283658, - "grad_norm": 694.65625, - "learning_rate": 5.936906572928625e-06, - "loss": 120.0574, - "step": 61380 - }, - { - "epoch": 0.507838027877735, - "grad_norm": 772.7642211914062, - "learning_rate": 5.935521150080331e-06, - "loss": 75.3494, - "step": 61390 - }, - { - "epoch": 0.5079207511271042, - "grad_norm": 790.8641967773438, - "learning_rate": 5.934135652796914e-06, - "loss": 87.367, - "step": 61400 - }, - { - "epoch": 0.5080034743764735, - "grad_norm": 813.5361938476562, - "learning_rate": 5.9327500811886095e-06, - "loss": 83.0071, - "step": 61410 - }, - { - "epoch": 0.5080861976258427, - "grad_norm": 437.74139404296875, - "learning_rate": 5.931364435365663e-06, - "loss": 115.0982, - "step": 61420 - }, - { - "epoch": 0.5081689208752119, - "grad_norm": 890.75927734375, - "learning_rate": 5.929978715438322e-06, - "loss": 75.1045, - "step": 61430 - }, - { - "epoch": 0.5082516441245812, - "grad_norm": 861.6392822265625, - "learning_rate": 5.928592921516843e-06, - "loss": 87.0132, - "step": 61440 - }, - { - "epoch": 0.5083343673739504, - "grad_norm": 401.1084289550781, - "learning_rate": 5.9272070537114855e-06, - "loss": 111.1549, - "step": 61450 - }, - { - "epoch": 0.5084170906233196, - "grad_norm": 886.0931396484375, - "learning_rate": 5.92582111213252e-06, - "loss": 130.5894, - "step": 61460 - }, - { - "epoch": 0.5084998138726889, - "grad_norm": 573.5504150390625, - "learning_rate": 5.924435096890216e-06, - "loss": 96.3187, - "step": 61470 - }, - { - "epoch": 0.5085825371220581, - "grad_norm": 915.1154174804688, - "learning_rate": 5.923049008094855e-06, - "loss": 102.2801, - "step": 61480 - }, - { - "epoch": 0.5086652603714273, - "grad_norm": 789.848876953125, - "learning_rate": 5.921662845856719e-06, - "loss": 93.1844, - "step": 61490 - }, - { - "epoch": 0.5087479836207967, - "grad_norm": 1229.4134521484375, - "learning_rate": 5.920276610286102e-06, - "loss": 119.4875, - "step": 61500 - }, - { - "epoch": 0.5088307068701658, - "grad_norm": 1035.0843505859375, - "learning_rate": 5.918890301493299e-06, - "loss": 95.7163, - "step": 61510 - }, - { - "epoch": 0.508913430119535, - "grad_norm": 880.9725952148438, - "learning_rate": 5.91750391958861e-06, - "loss": 73.2856, - "step": 61520 - }, - { - "epoch": 0.5089961533689044, - "grad_norm": 646.2332763671875, - "learning_rate": 5.916117464682346e-06, - "loss": 106.2007, - "step": 61530 - }, - { - "epoch": 0.5090788766182736, - "grad_norm": 485.7245178222656, - "learning_rate": 5.914730936884819e-06, - "loss": 112.5809, - "step": 61540 - }, - { - "epoch": 0.5091615998676428, - "grad_norm": 1304.14794921875, - "learning_rate": 5.91334433630635e-06, - "loss": 124.1074, - "step": 61550 - }, - { - "epoch": 0.5092443231170121, - "grad_norm": 911.8284912109375, - "learning_rate": 5.911957663057264e-06, - "loss": 87.838, - "step": 61560 - }, - { - "epoch": 0.5093270463663813, - "grad_norm": 863.0823364257812, - "learning_rate": 5.910570917247892e-06, - "loss": 79.3177, - "step": 61570 - }, - { - "epoch": 0.5094097696157505, - "grad_norm": 518.0543823242188, - "learning_rate": 5.909184098988571e-06, - "loss": 97.5815, - "step": 61580 - }, - { - "epoch": 0.5094924928651198, - "grad_norm": 641.208984375, - "learning_rate": 5.907797208389644e-06, - "loss": 112.5585, - "step": 61590 - }, - { - "epoch": 0.509575216114489, - "grad_norm": 644.5621337890625, - "learning_rate": 5.906410245561459e-06, - "loss": 80.3052, - "step": 61600 - }, - { - "epoch": 0.5096579393638582, - "grad_norm": 1210.9039306640625, - "learning_rate": 5.90502321061437e-06, - "loss": 103.0409, - "step": 61610 - }, - { - "epoch": 0.5097406626132275, - "grad_norm": 1110.852294921875, - "learning_rate": 5.90363610365874e-06, - "loss": 120.3473, - "step": 61620 - }, - { - "epoch": 0.5098233858625967, - "grad_norm": 892.0844116210938, - "learning_rate": 5.9022489248049295e-06, - "loss": 101.4571, - "step": 61630 - }, - { - "epoch": 0.5099061091119659, - "grad_norm": 456.140625, - "learning_rate": 5.900861674163314e-06, - "loss": 74.4327, - "step": 61640 - }, - { - "epoch": 0.5099888323613352, - "grad_norm": 755.904541015625, - "learning_rate": 5.89947435184427e-06, - "loss": 90.8847, - "step": 61650 - }, - { - "epoch": 0.5100715556107044, - "grad_norm": 1451.006591796875, - "learning_rate": 5.89808695795818e-06, - "loss": 101.8353, - "step": 61660 - }, - { - "epoch": 0.5101542788600736, - "grad_norm": 1081.45263671875, - "learning_rate": 5.896699492615432e-06, - "loss": 86.6635, - "step": 61670 - }, - { - "epoch": 0.5102370021094429, - "grad_norm": 497.5079040527344, - "learning_rate": 5.895311955926419e-06, - "loss": 83.6076, - "step": 61680 - }, - { - "epoch": 0.5103197253588121, - "grad_norm": 643.5697631835938, - "learning_rate": 5.893924348001544e-06, - "loss": 97.4833, - "step": 61690 - }, - { - "epoch": 0.5104024486081813, - "grad_norm": 956.6340942382812, - "learning_rate": 5.8925366689512124e-06, - "loss": 103.3445, - "step": 61700 - }, - { - "epoch": 0.5104851718575506, - "grad_norm": 665.5700073242188, - "learning_rate": 5.891148918885834e-06, - "loss": 65.6439, - "step": 61710 - }, - { - "epoch": 0.5105678951069198, - "grad_norm": 1049.6845703125, - "learning_rate": 5.8897610979158245e-06, - "loss": 89.6039, - "step": 61720 - }, - { - "epoch": 0.510650618356289, - "grad_norm": 814.806396484375, - "learning_rate": 5.888373206151608e-06, - "loss": 102.2241, - "step": 61730 - }, - { - "epoch": 0.5107333416056583, - "grad_norm": 841.8108520507812, - "learning_rate": 5.886985243703612e-06, - "loss": 103.3497, - "step": 61740 - }, - { - "epoch": 0.5108160648550275, - "grad_norm": 488.0580749511719, - "learning_rate": 5.885597210682273e-06, - "loss": 117.5541, - "step": 61750 - }, - { - "epoch": 0.5108987881043967, - "grad_norm": 780.1454467773438, - "learning_rate": 5.884209107198027e-06, - "loss": 118.4064, - "step": 61760 - }, - { - "epoch": 0.510981511353766, - "grad_norm": 1028.7705078125, - "learning_rate": 5.882820933361321e-06, - "loss": 127.9731, - "step": 61770 - }, - { - "epoch": 0.5110642346031352, - "grad_norm": 684.5718383789062, - "learning_rate": 5.881432689282604e-06, - "loss": 121.3453, - "step": 61780 - }, - { - "epoch": 0.5111469578525044, - "grad_norm": 910.465087890625, - "learning_rate": 5.880044375072333e-06, - "loss": 135.5772, - "step": 61790 - }, - { - "epoch": 0.5112296811018737, - "grad_norm": 1077.9769287109375, - "learning_rate": 5.8786559908409715e-06, - "loss": 110.6625, - "step": 61800 - }, - { - "epoch": 0.5113124043512429, - "grad_norm": 1104.657470703125, - "learning_rate": 5.877267536698984e-06, - "loss": 91.3742, - "step": 61810 - }, - { - "epoch": 0.5113951276006121, - "grad_norm": 924.7949829101562, - "learning_rate": 5.875879012756845e-06, - "loss": 93.8301, - "step": 61820 - }, - { - "epoch": 0.5114778508499814, - "grad_norm": 830.5784301757812, - "learning_rate": 5.8744904191250326e-06, - "loss": 135.9706, - "step": 61830 - }, - { - "epoch": 0.5115605740993506, - "grad_norm": 1287.56884765625, - "learning_rate": 5.873101755914031e-06, - "loss": 113.9375, - "step": 61840 - }, - { - "epoch": 0.5116432973487198, - "grad_norm": 2128.367431640625, - "learning_rate": 5.87171302323433e-06, - "loss": 91.666, - "step": 61850 - }, - { - "epoch": 0.5117260205980891, - "grad_norm": 1100.2708740234375, - "learning_rate": 5.870324221196424e-06, - "loss": 91.6138, - "step": 61860 - }, - { - "epoch": 0.5118087438474583, - "grad_norm": 636.7507934570312, - "learning_rate": 5.868935349910814e-06, - "loss": 105.8906, - "step": 61870 - }, - { - "epoch": 0.5118914670968275, - "grad_norm": 928.61083984375, - "learning_rate": 5.867546409488006e-06, - "loss": 99.2666, - "step": 61880 - }, - { - "epoch": 0.5119741903461968, - "grad_norm": 562.5609130859375, - "learning_rate": 5.8661574000385115e-06, - "loss": 76.9907, - "step": 61890 - }, - { - "epoch": 0.512056913595566, - "grad_norm": 390.1648864746094, - "learning_rate": 5.864768321672848e-06, - "loss": 76.5244, - "step": 61900 - }, - { - "epoch": 0.5121396368449352, - "grad_norm": 944.2666625976562, - "learning_rate": 5.863379174501538e-06, - "loss": 98.3042, - "step": 61910 - }, - { - "epoch": 0.5122223600943046, - "grad_norm": 1686.9356689453125, - "learning_rate": 5.861989958635109e-06, - "loss": 121.1383, - "step": 61920 - }, - { - "epoch": 0.5123050833436738, - "grad_norm": 878.6798095703125, - "learning_rate": 5.860600674184096e-06, - "loss": 105.9373, - "step": 61930 - }, - { - "epoch": 0.512387806593043, - "grad_norm": 528.4129638671875, - "learning_rate": 5.859211321259036e-06, - "loss": 83.7999, - "step": 61940 - }, - { - "epoch": 0.5124705298424123, - "grad_norm": 1257.8131103515625, - "learning_rate": 5.857821899970475e-06, - "loss": 112.0766, - "step": 61950 - }, - { - "epoch": 0.5125532530917815, - "grad_norm": 1426.787353515625, - "learning_rate": 5.856432410428963e-06, - "loss": 127.3927, - "step": 61960 - }, - { - "epoch": 0.5126359763411507, - "grad_norm": 1306.6192626953125, - "learning_rate": 5.8550428527450534e-06, - "loss": 63.1702, - "step": 61970 - }, - { - "epoch": 0.51271869959052, - "grad_norm": 1105.7908935546875, - "learning_rate": 5.8536532270293076e-06, - "loss": 80.9909, - "step": 61980 - }, - { - "epoch": 0.5128014228398892, - "grad_norm": 1267.3624267578125, - "learning_rate": 5.852263533392294e-06, - "loss": 94.7298, - "step": 61990 - }, - { - "epoch": 0.5128841460892584, - "grad_norm": 594.2600708007812, - "learning_rate": 5.850873771944581e-06, - "loss": 91.9315, - "step": 62000 - }, - { - "epoch": 0.5129668693386277, - "grad_norm": 652.3613891601562, - "learning_rate": 5.849483942796747e-06, - "loss": 87.1766, - "step": 62010 - }, - { - "epoch": 0.5130495925879969, - "grad_norm": 602.3596801757812, - "learning_rate": 5.848094046059375e-06, - "loss": 124.0294, - "step": 62020 - }, - { - "epoch": 0.5131323158373661, - "grad_norm": 573.1605224609375, - "learning_rate": 5.846704081843052e-06, - "loss": 111.6119, - "step": 62030 - }, - { - "epoch": 0.5132150390867354, - "grad_norm": 780.5606689453125, - "learning_rate": 5.84531405025837e-06, - "loss": 82.9944, - "step": 62040 - }, - { - "epoch": 0.5132977623361046, - "grad_norm": 913.4967041015625, - "learning_rate": 5.843923951415931e-06, - "loss": 108.7144, - "step": 62050 - }, - { - "epoch": 0.5133804855854738, - "grad_norm": 1110.280029296875, - "learning_rate": 5.842533785426334e-06, - "loss": 101.928, - "step": 62060 - }, - { - "epoch": 0.5134632088348431, - "grad_norm": 1380.97314453125, - "learning_rate": 5.84114355240019e-06, - "loss": 95.0093, - "step": 62070 - }, - { - "epoch": 0.5135459320842123, - "grad_norm": 570.7285766601562, - "learning_rate": 5.839753252448115e-06, - "loss": 88.7862, - "step": 62080 - }, - { - "epoch": 0.5136286553335815, - "grad_norm": 1217.79931640625, - "learning_rate": 5.838362885680728e-06, - "loss": 94.9879, - "step": 62090 - }, - { - "epoch": 0.5137113785829507, - "grad_norm": 1030.6907958984375, - "learning_rate": 5.8369724522086545e-06, - "loss": 88.0829, - "step": 62100 - }, - { - "epoch": 0.51379410183232, - "grad_norm": 2263.951416015625, - "learning_rate": 5.835581952142522e-06, - "loss": 147.4353, - "step": 62110 - }, - { - "epoch": 0.5138768250816892, - "grad_norm": 499.37347412109375, - "learning_rate": 5.834191385592969e-06, - "loss": 119.7903, - "step": 62120 - }, - { - "epoch": 0.5139595483310584, - "grad_norm": 1055.9906005859375, - "learning_rate": 5.8328007526706354e-06, - "loss": 87.058, - "step": 62130 - }, - { - "epoch": 0.5140422715804277, - "grad_norm": 1002.3789672851562, - "learning_rate": 5.83141005348617e-06, - "loss": 107.1515, - "step": 62140 - }, - { - "epoch": 0.5141249948297969, - "grad_norm": 859.0476684570312, - "learning_rate": 5.830019288150222e-06, - "loss": 90.1526, - "step": 62150 - }, - { - "epoch": 0.5142077180791661, - "grad_norm": 765.904052734375, - "learning_rate": 5.8286284567734456e-06, - "loss": 81.6313, - "step": 62160 - }, - { - "epoch": 0.5142904413285354, - "grad_norm": 676.10009765625, - "learning_rate": 5.827237559466508e-06, - "loss": 84.7288, - "step": 62170 - }, - { - "epoch": 0.5143731645779046, - "grad_norm": 1681.114990234375, - "learning_rate": 5.825846596340075e-06, - "loss": 122.0725, - "step": 62180 - }, - { - "epoch": 0.5144558878272738, - "grad_norm": 708.9700927734375, - "learning_rate": 5.824455567504817e-06, - "loss": 123.205, - "step": 62190 - }, - { - "epoch": 0.5145386110766431, - "grad_norm": 853.5513305664062, - "learning_rate": 5.823064473071414e-06, - "loss": 96.5516, - "step": 62200 - }, - { - "epoch": 0.5146213343260123, - "grad_norm": 937.1123046875, - "learning_rate": 5.821673313150546e-06, - "loss": 98.6421, - "step": 62210 - }, - { - "epoch": 0.5147040575753815, - "grad_norm": 802.1367797851562, - "learning_rate": 5.820282087852906e-06, - "loss": 99.0799, - "step": 62220 - }, - { - "epoch": 0.5147867808247508, - "grad_norm": 451.8292236328125, - "learning_rate": 5.818890797289185e-06, - "loss": 90.0844, - "step": 62230 - }, - { - "epoch": 0.51486950407412, - "grad_norm": 1798.909912109375, - "learning_rate": 5.81749944157008e-06, - "loss": 137.7885, - "step": 62240 - }, - { - "epoch": 0.5149522273234892, - "grad_norm": 985.250732421875, - "learning_rate": 5.816108020806297e-06, - "loss": 97.535, - "step": 62250 - }, - { - "epoch": 0.5150349505728585, - "grad_norm": 672.7926635742188, - "learning_rate": 5.814716535108545e-06, - "loss": 88.2172, - "step": 62260 - }, - { - "epoch": 0.5151176738222277, - "grad_norm": 716.781982421875, - "learning_rate": 5.813324984587536e-06, - "loss": 106.1954, - "step": 62270 - }, - { - "epoch": 0.5152003970715969, - "grad_norm": 1432.325439453125, - "learning_rate": 5.811933369353992e-06, - "loss": 111.049, - "step": 62280 - }, - { - "epoch": 0.5152831203209662, - "grad_norm": 760.8700561523438, - "learning_rate": 5.810541689518634e-06, - "loss": 94.8973, - "step": 62290 - }, - { - "epoch": 0.5153658435703354, - "grad_norm": 845.1988525390625, - "learning_rate": 5.809149945192194e-06, - "loss": 62.3175, - "step": 62300 - }, - { - "epoch": 0.5154485668197046, - "grad_norm": 770.4382934570312, - "learning_rate": 5.807758136485409e-06, - "loss": 106.6364, - "step": 62310 - }, - { - "epoch": 0.5155312900690739, - "grad_norm": 1148.9901123046875, - "learning_rate": 5.8063662635090136e-06, - "loss": 98.8217, - "step": 62320 - }, - { - "epoch": 0.5156140133184431, - "grad_norm": 1040.4677734375, - "learning_rate": 5.804974326373756e-06, - "loss": 92.393, - "step": 62330 - }, - { - "epoch": 0.5156967365678123, - "grad_norm": 609.6980590820312, - "learning_rate": 5.803582325190387e-06, - "loss": 97.1185, - "step": 62340 - }, - { - "epoch": 0.5157794598171817, - "grad_norm": 799.3777465820312, - "learning_rate": 5.802190260069657e-06, - "loss": 87.1282, - "step": 62350 - }, - { - "epoch": 0.5158621830665509, - "grad_norm": 968.39990234375, - "learning_rate": 5.800798131122332e-06, - "loss": 86.2219, - "step": 62360 - }, - { - "epoch": 0.51594490631592, - "grad_norm": 1028.2894287109375, - "learning_rate": 5.799405938459175e-06, - "loss": 76.0357, - "step": 62370 - }, - { - "epoch": 0.5160276295652894, - "grad_norm": 1118.3125, - "learning_rate": 5.7980136821909565e-06, - "loss": 91.065, - "step": 62380 - }, - { - "epoch": 0.5161103528146586, - "grad_norm": 817.6798706054688, - "learning_rate": 5.79662136242845e-06, - "loss": 112.0168, - "step": 62390 - }, - { - "epoch": 0.5161930760640278, - "grad_norm": 847.1541748046875, - "learning_rate": 5.795228979282439e-06, - "loss": 78.081, - "step": 62400 - }, - { - "epoch": 0.5162757993133971, - "grad_norm": 474.493896484375, - "learning_rate": 5.793836532863707e-06, - "loss": 70.5183, - "step": 62410 - }, - { - "epoch": 0.5163585225627663, - "grad_norm": 720.2383422851562, - "learning_rate": 5.792444023283046e-06, - "loss": 87.0467, - "step": 62420 - }, - { - "epoch": 0.5164412458121355, - "grad_norm": 847.5111083984375, - "learning_rate": 5.791051450651251e-06, - "loss": 80.678, - "step": 62430 - }, - { - "epoch": 0.5165239690615048, - "grad_norm": 1734.1534423828125, - "learning_rate": 5.789658815079121e-06, - "loss": 106.6263, - "step": 62440 - }, - { - "epoch": 0.516606692310874, - "grad_norm": 418.1882019042969, - "learning_rate": 5.788266116677464e-06, - "loss": 88.7378, - "step": 62450 - }, - { - "epoch": 0.5166894155602432, - "grad_norm": 1164.404541015625, - "learning_rate": 5.78687335555709e-06, - "loss": 109.2202, - "step": 62460 - }, - { - "epoch": 0.5167721388096125, - "grad_norm": 843.881103515625, - "learning_rate": 5.785480531828815e-06, - "loss": 95.9288, - "step": 62470 - }, - { - "epoch": 0.5168548620589817, - "grad_norm": 753.1651000976562, - "learning_rate": 5.784087645603459e-06, - "loss": 88.32, - "step": 62480 - }, - { - "epoch": 0.5169375853083509, - "grad_norm": 488.68487548828125, - "learning_rate": 5.782694696991845e-06, - "loss": 77.3577, - "step": 62490 - }, - { - "epoch": 0.5170203085577202, - "grad_norm": 748.185546875, - "learning_rate": 5.781301686104808e-06, - "loss": 99.6195, - "step": 62500 - }, - { - "epoch": 0.5171030318070894, - "grad_norm": 1028.9573974609375, - "learning_rate": 5.779908613053181e-06, - "loss": 89.3196, - "step": 62510 - }, - { - "epoch": 0.5171857550564586, - "grad_norm": 823.2115478515625, - "learning_rate": 5.778515477947807e-06, - "loss": 85.4209, - "step": 62520 - }, - { - "epoch": 0.5172684783058279, - "grad_norm": 720.9722900390625, - "learning_rate": 5.777122280899527e-06, - "loss": 87.5316, - "step": 62530 - }, - { - "epoch": 0.5173512015551971, - "grad_norm": 393.12646484375, - "learning_rate": 5.775729022019193e-06, - "loss": 132.2376, - "step": 62540 - }, - { - "epoch": 0.5174339248045663, - "grad_norm": 517.8870849609375, - "learning_rate": 5.774335701417662e-06, - "loss": 74.0599, - "step": 62550 - }, - { - "epoch": 0.5175166480539356, - "grad_norm": 779.7863159179688, - "learning_rate": 5.7729423192057936e-06, - "loss": 104.9648, - "step": 62560 - }, - { - "epoch": 0.5175993713033048, - "grad_norm": 1225.2161865234375, - "learning_rate": 5.771548875494453e-06, - "loss": 73.9343, - "step": 62570 - }, - { - "epoch": 0.517682094552674, - "grad_norm": 1787.88232421875, - "learning_rate": 5.7701553703945055e-06, - "loss": 99.922, - "step": 62580 - }, - { - "epoch": 0.5177648178020433, - "grad_norm": 761.9515380859375, - "learning_rate": 5.768761804016833e-06, - "loss": 126.8296, - "step": 62590 - }, - { - "epoch": 0.5178475410514125, - "grad_norm": 700.2348022460938, - "learning_rate": 5.767368176472311e-06, - "loss": 77.3953, - "step": 62600 - }, - { - "epoch": 0.5179302643007817, - "grad_norm": 1287.00732421875, - "learning_rate": 5.765974487871826e-06, - "loss": 110.9408, - "step": 62610 - }, - { - "epoch": 0.518012987550151, - "grad_norm": 1209.4278564453125, - "learning_rate": 5.764580738326265e-06, - "loss": 122.3806, - "step": 62620 - }, - { - "epoch": 0.5180957107995202, - "grad_norm": 972.08251953125, - "learning_rate": 5.763186927946523e-06, - "loss": 96.5278, - "step": 62630 - }, - { - "epoch": 0.5181784340488894, - "grad_norm": 939.2523193359375, - "learning_rate": 5.761793056843501e-06, - "loss": 96.9909, - "step": 62640 - }, - { - "epoch": 0.5182611572982587, - "grad_norm": 729.4473876953125, - "learning_rate": 5.760399125128102e-06, - "loss": 98.4378, - "step": 62650 - }, - { - "epoch": 0.5183438805476279, - "grad_norm": 780.3011474609375, - "learning_rate": 5.759005132911233e-06, - "loss": 78.8927, - "step": 62660 - }, - { - "epoch": 0.5184266037969971, - "grad_norm": 546.9750366210938, - "learning_rate": 5.75761108030381e-06, - "loss": 97.8188, - "step": 62670 - }, - { - "epoch": 0.5185093270463664, - "grad_norm": 898.6033325195312, - "learning_rate": 5.756216967416749e-06, - "loss": 73.9803, - "step": 62680 - }, - { - "epoch": 0.5185920502957356, - "grad_norm": 1270.0867919921875, - "learning_rate": 5.754822794360976e-06, - "loss": 100.2555, - "step": 62690 - }, - { - "epoch": 0.5186747735451048, - "grad_norm": 799.2184448242188, - "learning_rate": 5.753428561247416e-06, - "loss": 104.4311, - "step": 62700 - }, - { - "epoch": 0.5187574967944741, - "grad_norm": 756.2783813476562, - "learning_rate": 5.752034268187005e-06, - "loss": 90.7448, - "step": 62710 - }, - { - "epoch": 0.5188402200438433, - "grad_norm": 673.90576171875, - "learning_rate": 5.750639915290677e-06, - "loss": 100.3478, - "step": 62720 - }, - { - "epoch": 0.5189229432932125, - "grad_norm": 961.5318603515625, - "learning_rate": 5.749245502669375e-06, - "loss": 88.5219, - "step": 62730 - }, - { - "epoch": 0.5190056665425818, - "grad_norm": 700.3480834960938, - "learning_rate": 5.747851030434049e-06, - "loss": 85.1043, - "step": 62740 - }, - { - "epoch": 0.519088389791951, - "grad_norm": 508.70166015625, - "learning_rate": 5.746456498695648e-06, - "loss": 95.531, - "step": 62750 - }, - { - "epoch": 0.5191711130413202, - "grad_norm": 490.99737548828125, - "learning_rate": 5.7450619075651305e-06, - "loss": 87.5613, - "step": 62760 - }, - { - "epoch": 0.5192538362906896, - "grad_norm": 1027.1727294921875, - "learning_rate": 5.743667257153454e-06, - "loss": 81.3641, - "step": 62770 - }, - { - "epoch": 0.5193365595400588, - "grad_norm": 1106.285400390625, - "learning_rate": 5.742272547571588e-06, - "loss": 113.6713, - "step": 62780 - }, - { - "epoch": 0.519419282789428, - "grad_norm": 657.1253662109375, - "learning_rate": 5.740877778930503e-06, - "loss": 92.9361, - "step": 62790 - }, - { - "epoch": 0.5195020060387973, - "grad_norm": 831.34521484375, - "learning_rate": 5.739482951341172e-06, - "loss": 120.2162, - "step": 62800 - }, - { - "epoch": 0.5195847292881665, - "grad_norm": 709.2473754882812, - "learning_rate": 5.738088064914576e-06, - "loss": 112.4378, - "step": 62810 - }, - { - "epoch": 0.5196674525375357, - "grad_norm": 847.8458862304688, - "learning_rate": 5.7366931197617e-06, - "loss": 85.5061, - "step": 62820 - }, - { - "epoch": 0.5197501757869049, - "grad_norm": 1197.6585693359375, - "learning_rate": 5.735298115993535e-06, - "loss": 93.4385, - "step": 62830 - }, - { - "epoch": 0.5198328990362742, - "grad_norm": 1183.1817626953125, - "learning_rate": 5.733903053721072e-06, - "loss": 123.1538, - "step": 62840 - }, - { - "epoch": 0.5199156222856434, - "grad_norm": 1146.695068359375, - "learning_rate": 5.732507933055311e-06, - "loss": 133.2361, - "step": 62850 - }, - { - "epoch": 0.5199983455350126, - "grad_norm": 830.4008178710938, - "learning_rate": 5.731112754107257e-06, - "loss": 92.5354, - "step": 62860 - }, - { - "epoch": 0.5200810687843819, - "grad_norm": 1141.0968017578125, - "learning_rate": 5.729717516987916e-06, - "loss": 91.7082, - "step": 62870 - }, - { - "epoch": 0.5201637920337511, - "grad_norm": 860.5159301757812, - "learning_rate": 5.7283222218083e-06, - "loss": 84.899, - "step": 62880 - }, - { - "epoch": 0.5202465152831203, - "grad_norm": 632.0242309570312, - "learning_rate": 5.726926868679429e-06, - "loss": 127.0738, - "step": 62890 - }, - { - "epoch": 0.5203292385324896, - "grad_norm": 877.5741577148438, - "learning_rate": 5.725531457712321e-06, - "loss": 98.0383, - "step": 62900 - }, - { - "epoch": 0.5204119617818588, - "grad_norm": 955.6021728515625, - "learning_rate": 5.724135989018007e-06, - "loss": 93.8473, - "step": 62910 - }, - { - "epoch": 0.520494685031228, - "grad_norm": 1114.900634765625, - "learning_rate": 5.722740462707515e-06, - "loss": 97.0482, - "step": 62920 - }, - { - "epoch": 0.5205774082805973, - "grad_norm": 1169.9739990234375, - "learning_rate": 5.72134487889188e-06, - "loss": 75.5919, - "step": 62930 - }, - { - "epoch": 0.5206601315299665, - "grad_norm": 1042.204833984375, - "learning_rate": 5.719949237682145e-06, - "loss": 96.2548, - "step": 62940 - }, - { - "epoch": 0.5207428547793357, - "grad_norm": 923.940185546875, - "learning_rate": 5.718553539189353e-06, - "loss": 120.747, - "step": 62950 - }, - { - "epoch": 0.520825578028705, - "grad_norm": 773.2394409179688, - "learning_rate": 5.717157783524553e-06, - "loss": 107.0883, - "step": 62960 - }, - { - "epoch": 0.5209083012780742, - "grad_norm": 1147.978759765625, - "learning_rate": 5.7157619707988e-06, - "loss": 112.3523, - "step": 62970 - }, - { - "epoch": 0.5209910245274434, - "grad_norm": 869.9029541015625, - "learning_rate": 5.714366101123152e-06, - "loss": 91.1115, - "step": 62980 - }, - { - "epoch": 0.5210737477768127, - "grad_norm": 503.1574401855469, - "learning_rate": 5.712970174608671e-06, - "loss": 62.8698, - "step": 62990 - }, - { - "epoch": 0.5211564710261819, - "grad_norm": 567.345947265625, - "learning_rate": 5.711574191366427e-06, - "loss": 87.9423, - "step": 63000 - }, - { - "epoch": 0.5212391942755511, - "grad_norm": 706.41748046875, - "learning_rate": 5.710178151507488e-06, - "loss": 73.9817, - "step": 63010 - }, - { - "epoch": 0.5213219175249204, - "grad_norm": 1259.374755859375, - "learning_rate": 5.708782055142934e-06, - "loss": 91.3289, - "step": 63020 - }, - { - "epoch": 0.5214046407742896, - "grad_norm": 1063.552001953125, - "learning_rate": 5.707385902383845e-06, - "loss": 120.5428, - "step": 63030 - }, - { - "epoch": 0.5214873640236588, - "grad_norm": 910.1624145507812, - "learning_rate": 5.7059896933413076e-06, - "loss": 83.0945, - "step": 63040 - }, - { - "epoch": 0.5215700872730281, - "grad_norm": 2553.767333984375, - "learning_rate": 5.7045934281264085e-06, - "loss": 102.2294, - "step": 63050 - }, - { - "epoch": 0.5216528105223973, - "grad_norm": 1032.9830322265625, - "learning_rate": 5.7031971068502425e-06, - "loss": 98.3443, - "step": 63060 - }, - { - "epoch": 0.5217355337717665, - "grad_norm": 837.3251342773438, - "learning_rate": 5.701800729623911e-06, - "loss": 105.6823, - "step": 63070 - }, - { - "epoch": 0.5218182570211358, - "grad_norm": 470.49658203125, - "learning_rate": 5.700404296558518e-06, - "loss": 94.3055, - "step": 63080 - }, - { - "epoch": 0.521900980270505, - "grad_norm": 722.3120727539062, - "learning_rate": 5.699007807765169e-06, - "loss": 96.7037, - "step": 63090 - }, - { - "epoch": 0.5219837035198742, - "grad_norm": 1043.8006591796875, - "learning_rate": 5.6976112633549764e-06, - "loss": 63.1547, - "step": 63100 - }, - { - "epoch": 0.5220664267692435, - "grad_norm": 559.216552734375, - "learning_rate": 5.696214663439055e-06, - "loss": 89.2568, - "step": 63110 - }, - { - "epoch": 0.5221491500186127, - "grad_norm": 1489.68994140625, - "learning_rate": 5.694818008128531e-06, - "loss": 106.3069, - "step": 63120 - }, - { - "epoch": 0.5222318732679819, - "grad_norm": 1087.22802734375, - "learning_rate": 5.693421297534526e-06, - "loss": 116.6925, - "step": 63130 - }, - { - "epoch": 0.5223145965173512, - "grad_norm": 1571.4100341796875, - "learning_rate": 5.69202453176817e-06, - "loss": 104.0308, - "step": 63140 - }, - { - "epoch": 0.5223973197667204, - "grad_norm": 1171.318359375, - "learning_rate": 5.6906277109406e-06, - "loss": 85.1603, - "step": 63150 - }, - { - "epoch": 0.5224800430160896, - "grad_norm": 1105.7423095703125, - "learning_rate": 5.689230835162949e-06, - "loss": 92.7724, - "step": 63160 - }, - { - "epoch": 0.5225627662654589, - "grad_norm": 2501.460693359375, - "learning_rate": 5.687833904546367e-06, - "loss": 124.3585, - "step": 63170 - }, - { - "epoch": 0.5226454895148281, - "grad_norm": 673.11328125, - "learning_rate": 5.686436919201996e-06, - "loss": 95.2214, - "step": 63180 - }, - { - "epoch": 0.5227282127641973, - "grad_norm": 803.44970703125, - "learning_rate": 5.68503987924099e-06, - "loss": 96.271, - "step": 63190 - }, - { - "epoch": 0.5228109360135667, - "grad_norm": 733.796875, - "learning_rate": 5.683642784774506e-06, - "loss": 94.5968, - "step": 63200 - }, - { - "epoch": 0.5228936592629359, - "grad_norm": 893.05615234375, - "learning_rate": 5.682245635913701e-06, - "loss": 93.2139, - "step": 63210 - }, - { - "epoch": 0.522976382512305, - "grad_norm": 806.1270751953125, - "learning_rate": 5.680848432769743e-06, - "loss": 83.2208, - "step": 63220 - }, - { - "epoch": 0.5230591057616744, - "grad_norm": 950.8448486328125, - "learning_rate": 5.6794511754538005e-06, - "loss": 102.1387, - "step": 63230 - }, - { - "epoch": 0.5231418290110436, - "grad_norm": 994.8265991210938, - "learning_rate": 5.6780538640770455e-06, - "loss": 94.9941, - "step": 63240 - }, - { - "epoch": 0.5232245522604128, - "grad_norm": 805.738037109375, - "learning_rate": 5.6766564987506564e-06, - "loss": 97.3443, - "step": 63250 - }, - { - "epoch": 0.5233072755097821, - "grad_norm": 1825.2171630859375, - "learning_rate": 5.675259079585816e-06, - "loss": 93.6461, - "step": 63260 - }, - { - "epoch": 0.5233899987591513, - "grad_norm": 899.5811767578125, - "learning_rate": 5.673861606693708e-06, - "loss": 124.3475, - "step": 63270 - }, - { - "epoch": 0.5234727220085205, - "grad_norm": 738.1337890625, - "learning_rate": 5.672464080185526e-06, - "loss": 82.6199, - "step": 63280 - }, - { - "epoch": 0.5235554452578898, - "grad_norm": 1002.6314086914062, - "learning_rate": 5.671066500172462e-06, - "loss": 118.9046, - "step": 63290 - }, - { - "epoch": 0.523638168507259, - "grad_norm": 644.3387451171875, - "learning_rate": 5.669668866765717e-06, - "loss": 127.9811, - "step": 63300 - }, - { - "epoch": 0.5237208917566282, - "grad_norm": 654.7725219726562, - "learning_rate": 5.6682711800764935e-06, - "loss": 93.9227, - "step": 63310 - }, - { - "epoch": 0.5238036150059975, - "grad_norm": 973.210205078125, - "learning_rate": 5.6668734402159994e-06, - "loss": 84.845, - "step": 63320 - }, - { - "epoch": 0.5238863382553667, - "grad_norm": 1666.18017578125, - "learning_rate": 5.6654756472954464e-06, - "loss": 133.6506, - "step": 63330 - }, - { - "epoch": 0.5239690615047359, - "grad_norm": 1285.848388671875, - "learning_rate": 5.66407780142605e-06, - "loss": 102.1989, - "step": 63340 - }, - { - "epoch": 0.5240517847541052, - "grad_norm": 1405.201904296875, - "learning_rate": 5.66267990271903e-06, - "loss": 101.3061, - "step": 63350 - }, - { - "epoch": 0.5241345080034744, - "grad_norm": 995.1788940429688, - "learning_rate": 5.661281951285613e-06, - "loss": 80.424, - "step": 63360 - }, - { - "epoch": 0.5242172312528436, - "grad_norm": 1280.261962890625, - "learning_rate": 5.6598839472370245e-06, - "loss": 129.4751, - "step": 63370 - }, - { - "epoch": 0.5242999545022129, - "grad_norm": 1111.2105712890625, - "learning_rate": 5.6584858906845e-06, - "loss": 111.6386, - "step": 63380 - }, - { - "epoch": 0.5243826777515821, - "grad_norm": 438.5060119628906, - "learning_rate": 5.657087781739274e-06, - "loss": 88.3789, - "step": 63390 - }, - { - "epoch": 0.5244654010009513, - "grad_norm": 1246.3992919921875, - "learning_rate": 5.6556896205125896e-06, - "loss": 113.5103, - "step": 63400 - }, - { - "epoch": 0.5245481242503206, - "grad_norm": 867.26220703125, - "learning_rate": 5.654291407115692e-06, - "loss": 90.075, - "step": 63410 - }, - { - "epoch": 0.5246308474996898, - "grad_norm": 922.8386840820312, - "learning_rate": 5.652893141659829e-06, - "loss": 103.2854, - "step": 63420 - }, - { - "epoch": 0.524713570749059, - "grad_norm": 1118.366455078125, - "learning_rate": 5.651494824256256e-06, - "loss": 92.4039, - "step": 63430 - }, - { - "epoch": 0.5247962939984283, - "grad_norm": 1036.559326171875, - "learning_rate": 5.650096455016227e-06, - "loss": 77.7015, - "step": 63440 - }, - { - "epoch": 0.5248790172477975, - "grad_norm": 649.5186157226562, - "learning_rate": 5.648698034051009e-06, - "loss": 93.3135, - "step": 63450 - }, - { - "epoch": 0.5249617404971667, - "grad_norm": 685.0455322265625, - "learning_rate": 5.647299561471865e-06, - "loss": 88.9663, - "step": 63460 - }, - { - "epoch": 0.525044463746536, - "grad_norm": 786.32470703125, - "learning_rate": 5.645901037390067e-06, - "loss": 108.5513, - "step": 63470 - }, - { - "epoch": 0.5251271869959052, - "grad_norm": 863.156005859375, - "learning_rate": 5.644502461916886e-06, - "loss": 86.3591, - "step": 63480 - }, - { - "epoch": 0.5252099102452744, - "grad_norm": 911.6554565429688, - "learning_rate": 5.643103835163602e-06, - "loss": 92.7152, - "step": 63490 - }, - { - "epoch": 0.5252926334946437, - "grad_norm": 1143.3475341796875, - "learning_rate": 5.641705157241497e-06, - "loss": 126.4204, - "step": 63500 - }, - { - "epoch": 0.5253753567440129, - "grad_norm": 1113.570556640625, - "learning_rate": 5.64030642826186e-06, - "loss": 106.2012, - "step": 63510 - }, - { - "epoch": 0.5254580799933821, - "grad_norm": 734.0611572265625, - "learning_rate": 5.6389076483359774e-06, - "loss": 85.6654, - "step": 63520 - }, - { - "epoch": 0.5255408032427513, - "grad_norm": 762.94677734375, - "learning_rate": 5.637508817575145e-06, - "loss": 111.5038, - "step": 63530 - }, - { - "epoch": 0.5256235264921206, - "grad_norm": 730.315673828125, - "learning_rate": 5.636109936090661e-06, - "loss": 118.9952, - "step": 63540 - }, - { - "epoch": 0.5257062497414898, - "grad_norm": 1400.8131103515625, - "learning_rate": 5.634711003993832e-06, - "loss": 109.5317, - "step": 63550 - }, - { - "epoch": 0.525788972990859, - "grad_norm": 822.1387329101562, - "learning_rate": 5.633312021395959e-06, - "loss": 115.5602, - "step": 63560 - }, - { - "epoch": 0.5258716962402283, - "grad_norm": 836.3711547851562, - "learning_rate": 5.631912988408356e-06, - "loss": 110.6894, - "step": 63570 - }, - { - "epoch": 0.5259544194895975, - "grad_norm": 709.7254028320312, - "learning_rate": 5.630513905142334e-06, - "loss": 92.7573, - "step": 63580 - }, - { - "epoch": 0.5260371427389667, - "grad_norm": 633.5755004882812, - "learning_rate": 5.629114771709217e-06, - "loss": 97.9029, - "step": 63590 - }, - { - "epoch": 0.526119865988336, - "grad_norm": 1466.9072265625, - "learning_rate": 5.627715588220325e-06, - "loss": 82.6277, - "step": 63600 - }, - { - "epoch": 0.5262025892377052, - "grad_norm": 1547.8734130859375, - "learning_rate": 5.626316354786982e-06, - "loss": 113.497, - "step": 63610 - }, - { - "epoch": 0.5262853124870744, - "grad_norm": 691.7115478515625, - "learning_rate": 5.624917071520524e-06, - "loss": 99.4881, - "step": 63620 - }, - { - "epoch": 0.5263680357364438, - "grad_norm": 1155.3173828125, - "learning_rate": 5.62351773853228e-06, - "loss": 85.3007, - "step": 63630 - }, - { - "epoch": 0.526450758985813, - "grad_norm": 1123.159423828125, - "learning_rate": 5.6221183559335935e-06, - "loss": 103.121, - "step": 63640 - }, - { - "epoch": 0.5265334822351821, - "grad_norm": 1173.2393798828125, - "learning_rate": 5.6207189238358025e-06, - "loss": 88.9635, - "step": 63650 - }, - { - "epoch": 0.5266162054845515, - "grad_norm": 1155.658935546875, - "learning_rate": 5.619319442350256e-06, - "loss": 100.2915, - "step": 63660 - }, - { - "epoch": 0.5266989287339207, - "grad_norm": 1091.1961669921875, - "learning_rate": 5.617919911588304e-06, - "loss": 92.6392, - "step": 63670 - }, - { - "epoch": 0.5267816519832899, - "grad_norm": 922.4959106445312, - "learning_rate": 5.616520331661301e-06, - "loss": 112.2287, - "step": 63680 - }, - { - "epoch": 0.5268643752326592, - "grad_norm": 765.5256958007812, - "learning_rate": 5.615120702680604e-06, - "loss": 98.105, - "step": 63690 - }, - { - "epoch": 0.5269470984820284, - "grad_norm": 1066.0155029296875, - "learning_rate": 5.6137210247575754e-06, - "loss": 103.8472, - "step": 63700 - }, - { - "epoch": 0.5270298217313976, - "grad_norm": 1942.541015625, - "learning_rate": 5.6123212980035825e-06, - "loss": 109.4006, - "step": 63710 - }, - { - "epoch": 0.5271125449807669, - "grad_norm": 926.7212524414062, - "learning_rate": 5.610921522529994e-06, - "loss": 103.9244, - "step": 63720 - }, - { - "epoch": 0.5271952682301361, - "grad_norm": 811.9002685546875, - "learning_rate": 5.609521698448183e-06, - "loss": 96.8324, - "step": 63730 - }, - { - "epoch": 0.5272779914795053, - "grad_norm": 774.9049682617188, - "learning_rate": 5.608121825869528e-06, - "loss": 173.0452, - "step": 63740 - }, - { - "epoch": 0.5273607147288746, - "grad_norm": 1026.793212890625, - "learning_rate": 5.60672190490541e-06, - "loss": 94.8755, - "step": 63750 - }, - { - "epoch": 0.5274434379782438, - "grad_norm": 405.6708679199219, - "learning_rate": 5.6053219356672155e-06, - "loss": 86.6626, - "step": 63760 - }, - { - "epoch": 0.527526161227613, - "grad_norm": 890.8868408203125, - "learning_rate": 5.603921918266332e-06, - "loss": 87.5938, - "step": 63770 - }, - { - "epoch": 0.5276088844769823, - "grad_norm": 737.17236328125, - "learning_rate": 5.602521852814152e-06, - "loss": 73.9403, - "step": 63780 - }, - { - "epoch": 0.5276916077263515, - "grad_norm": 809.0032348632812, - "learning_rate": 5.6011217394220755e-06, - "loss": 79.7671, - "step": 63790 - }, - { - "epoch": 0.5277743309757207, - "grad_norm": 1906.908203125, - "learning_rate": 5.599721578201499e-06, - "loss": 82.5073, - "step": 63800 - }, - { - "epoch": 0.52785705422509, - "grad_norm": 1029.4927978515625, - "learning_rate": 5.59832136926383e-06, - "loss": 106.927, - "step": 63810 - }, - { - "epoch": 0.5279397774744592, - "grad_norm": 864.1856079101562, - "learning_rate": 5.5969211127204744e-06, - "loss": 100.7081, - "step": 63820 - }, - { - "epoch": 0.5280225007238284, - "grad_norm": 958.0577392578125, - "learning_rate": 5.595520808682848e-06, - "loss": 81.9101, - "step": 63830 - }, - { - "epoch": 0.5281052239731977, - "grad_norm": 1057.2666015625, - "learning_rate": 5.594120457262361e-06, - "loss": 109.2872, - "step": 63840 - }, - { - "epoch": 0.5281879472225669, - "grad_norm": 963.5335083007812, - "learning_rate": 5.592720058570438e-06, - "loss": 95.8562, - "step": 63850 - }, - { - "epoch": 0.5282706704719361, - "grad_norm": 841.0330810546875, - "learning_rate": 5.591319612718498e-06, - "loss": 76.5216, - "step": 63860 - }, - { - "epoch": 0.5283533937213054, - "grad_norm": 1039.90771484375, - "learning_rate": 5.589919119817971e-06, - "loss": 107.0152, - "step": 63870 - }, - { - "epoch": 0.5284361169706746, - "grad_norm": 1642.884033203125, - "learning_rate": 5.588518579980288e-06, - "loss": 140.7489, - "step": 63880 - }, - { - "epoch": 0.5285188402200438, - "grad_norm": 1069.137451171875, - "learning_rate": 5.587117993316882e-06, - "loss": 101.0652, - "step": 63890 - }, - { - "epoch": 0.5286015634694131, - "grad_norm": 494.1618957519531, - "learning_rate": 5.585717359939192e-06, - "loss": 85.4378, - "step": 63900 - }, - { - "epoch": 0.5286842867187823, - "grad_norm": 1175.2783203125, - "learning_rate": 5.584316679958659e-06, - "loss": 103.4738, - "step": 63910 - }, - { - "epoch": 0.5287670099681515, - "grad_norm": 927.4609375, - "learning_rate": 5.58291595348673e-06, - "loss": 104.5927, - "step": 63920 - }, - { - "epoch": 0.5288497332175208, - "grad_norm": 832.366455078125, - "learning_rate": 5.581515180634853e-06, - "loss": 80.5153, - "step": 63930 - }, - { - "epoch": 0.52893245646689, - "grad_norm": 1159.866943359375, - "learning_rate": 5.580114361514484e-06, - "loss": 90.9927, - "step": 63940 - }, - { - "epoch": 0.5290151797162592, - "grad_norm": 938.1612548828125, - "learning_rate": 5.5787134962370755e-06, - "loss": 67.0976, - "step": 63950 - }, - { - "epoch": 0.5290979029656285, - "grad_norm": 965.9879150390625, - "learning_rate": 5.57731258491409e-06, - "loss": 91.4234, - "step": 63960 - }, - { - "epoch": 0.5291806262149977, - "grad_norm": 816.1832885742188, - "learning_rate": 5.575911627656993e-06, - "loss": 87.2834, - "step": 63970 - }, - { - "epoch": 0.5292633494643669, - "grad_norm": 424.278564453125, - "learning_rate": 5.5745106245772506e-06, - "loss": 82.5088, - "step": 63980 - }, - { - "epoch": 0.5293460727137362, - "grad_norm": 1383.386962890625, - "learning_rate": 5.573109575786334e-06, - "loss": 99.203, - "step": 63990 - }, - { - "epoch": 0.5294287959631054, - "grad_norm": 1369.8514404296875, - "learning_rate": 5.571708481395719e-06, - "loss": 87.6147, - "step": 64000 - }, - { - "epoch": 0.5295115192124746, - "grad_norm": 679.2786865234375, - "learning_rate": 5.570307341516882e-06, - "loss": 109.6216, - "step": 64010 - }, - { - "epoch": 0.529594242461844, - "grad_norm": 688.9489135742188, - "learning_rate": 5.568906156261309e-06, - "loss": 78.812, - "step": 64020 - }, - { - "epoch": 0.5296769657112131, - "grad_norm": 716.52880859375, - "learning_rate": 5.567504925740484e-06, - "loss": 85.4848, - "step": 64030 - }, - { - "epoch": 0.5297596889605823, - "grad_norm": 1037.4881591796875, - "learning_rate": 5.566103650065897e-06, - "loss": 98.9546, - "step": 64040 - }, - { - "epoch": 0.5298424122099517, - "grad_norm": 835.7286376953125, - "learning_rate": 5.564702329349041e-06, - "loss": 104.4599, - "step": 64050 - }, - { - "epoch": 0.5299251354593209, - "grad_norm": 1021.035888671875, - "learning_rate": 5.56330096370141e-06, - "loss": 82.6299, - "step": 64060 - }, - { - "epoch": 0.53000785870869, - "grad_norm": 469.7624816894531, - "learning_rate": 5.561899553234509e-06, - "loss": 90.2693, - "step": 64070 - }, - { - "epoch": 0.5300905819580594, - "grad_norm": 698.7485961914062, - "learning_rate": 5.560498098059838e-06, - "loss": 116.0395, - "step": 64080 - }, - { - "epoch": 0.5301733052074286, - "grad_norm": 1172.5970458984375, - "learning_rate": 5.559096598288906e-06, - "loss": 70.6558, - "step": 64090 - }, - { - "epoch": 0.5302560284567978, - "grad_norm": 901.9520263671875, - "learning_rate": 5.557695054033223e-06, - "loss": 85.0805, - "step": 64100 - }, - { - "epoch": 0.5303387517061671, - "grad_norm": 1102.83056640625, - "learning_rate": 5.556293465404304e-06, - "loss": 104.11, - "step": 64110 - }, - { - "epoch": 0.5304214749555363, - "grad_norm": 862.3082275390625, - "learning_rate": 5.554891832513668e-06, - "loss": 69.0226, - "step": 64120 - }, - { - "epoch": 0.5305041982049055, - "grad_norm": 850.2838745117188, - "learning_rate": 5.553490155472835e-06, - "loss": 88.536, - "step": 64130 - }, - { - "epoch": 0.5305869214542748, - "grad_norm": 1342.24951171875, - "learning_rate": 5.55208843439333e-06, - "loss": 83.8514, - "step": 64140 - }, - { - "epoch": 0.530669644703644, - "grad_norm": 704.4754028320312, - "learning_rate": 5.550686669386683e-06, - "loss": 52.0388, - "step": 64150 - }, - { - "epoch": 0.5307523679530132, - "grad_norm": 709.6826171875, - "learning_rate": 5.549284860564425e-06, - "loss": 151.5818, - "step": 64160 - }, - { - "epoch": 0.5308350912023825, - "grad_norm": 891.2298583984375, - "learning_rate": 5.547883008038091e-06, - "loss": 132.4302, - "step": 64170 - }, - { - "epoch": 0.5309178144517517, - "grad_norm": 668.7271728515625, - "learning_rate": 5.54648111191922e-06, - "loss": 102.1614, - "step": 64180 - }, - { - "epoch": 0.5310005377011209, - "grad_norm": 1067.2647705078125, - "learning_rate": 5.545079172319355e-06, - "loss": 157.2434, - "step": 64190 - }, - { - "epoch": 0.5310832609504902, - "grad_norm": 731.7130737304688, - "learning_rate": 5.543677189350043e-06, - "loss": 86.4254, - "step": 64200 - }, - { - "epoch": 0.5311659841998594, - "grad_norm": 895.8660888671875, - "learning_rate": 5.542275163122831e-06, - "loss": 81.3476, - "step": 64210 - }, - { - "epoch": 0.5312487074492286, - "grad_norm": 549.8636474609375, - "learning_rate": 5.540873093749274e-06, - "loss": 91.9869, - "step": 64220 - }, - { - "epoch": 0.5313314306985979, - "grad_norm": 711.4556274414062, - "learning_rate": 5.539470981340926e-06, - "loss": 80.405, - "step": 64230 - }, - { - "epoch": 0.5314141539479671, - "grad_norm": 565.5111083984375, - "learning_rate": 5.538068826009349e-06, - "loss": 95.2233, - "step": 64240 - }, - { - "epoch": 0.5314968771973363, - "grad_norm": 1099.053955078125, - "learning_rate": 5.536666627866104e-06, - "loss": 78.3078, - "step": 64250 - }, - { - "epoch": 0.5315796004467055, - "grad_norm": 1019.4757080078125, - "learning_rate": 5.53526438702276e-06, - "loss": 100.4306, - "step": 64260 - }, - { - "epoch": 0.5316623236960748, - "grad_norm": 684.2471313476562, - "learning_rate": 5.533862103590883e-06, - "loss": 81.8137, - "step": 64270 - }, - { - "epoch": 0.531745046945444, - "grad_norm": 1522.2894287109375, - "learning_rate": 5.532459777682051e-06, - "loss": 85.9613, - "step": 64280 - }, - { - "epoch": 0.5318277701948132, - "grad_norm": 403.1702880859375, - "learning_rate": 5.5310574094078365e-06, - "loss": 84.0117, - "step": 64290 - }, - { - "epoch": 0.5319104934441825, - "grad_norm": 679.674072265625, - "learning_rate": 5.529654998879821e-06, - "loss": 59.2508, - "step": 64300 - }, - { - "epoch": 0.5319932166935517, - "grad_norm": 565.5519409179688, - "learning_rate": 5.528252546209588e-06, - "loss": 76.0347, - "step": 64310 - }, - { - "epoch": 0.5320759399429209, - "grad_norm": 939.0302124023438, - "learning_rate": 5.526850051508725e-06, - "loss": 84.2198, - "step": 64320 - }, - { - "epoch": 0.5321586631922902, - "grad_norm": 1065.8779296875, - "learning_rate": 5.525447514888822e-06, - "loss": 146.9439, - "step": 64330 - }, - { - "epoch": 0.5322413864416594, - "grad_norm": 1845.5435791015625, - "learning_rate": 5.52404493646147e-06, - "loss": 111.1832, - "step": 64340 - }, - { - "epoch": 0.5323241096910286, - "grad_norm": 1164.8885498046875, - "learning_rate": 5.522642316338268e-06, - "loss": 89.9883, - "step": 64350 - }, - { - "epoch": 0.5324068329403979, - "grad_norm": 661.2352294921875, - "learning_rate": 5.521239654630816e-06, - "loss": 92.0326, - "step": 64360 - }, - { - "epoch": 0.5324895561897671, - "grad_norm": 995.6377563476562, - "learning_rate": 5.519836951450716e-06, - "loss": 120.3884, - "step": 64370 - }, - { - "epoch": 0.5325722794391363, - "grad_norm": 664.2215576171875, - "learning_rate": 5.518434206909577e-06, - "loss": 102.107, - "step": 64380 - }, - { - "epoch": 0.5326550026885056, - "grad_norm": 934.103759765625, - "learning_rate": 5.517031421119006e-06, - "loss": 73.7082, - "step": 64390 - }, - { - "epoch": 0.5327377259378748, - "grad_norm": 702.5838012695312, - "learning_rate": 5.5156285941906175e-06, - "loss": 93.3802, - "step": 64400 - }, - { - "epoch": 0.532820449187244, - "grad_norm": 627.2533569335938, - "learning_rate": 5.51422572623603e-06, - "loss": 101.0479, - "step": 64410 - }, - { - "epoch": 0.5329031724366133, - "grad_norm": 362.7051696777344, - "learning_rate": 5.512822817366859e-06, - "loss": 79.3406, - "step": 64420 - }, - { - "epoch": 0.5329858956859825, - "grad_norm": 895.7844848632812, - "learning_rate": 5.511419867694733e-06, - "loss": 91.442, - "step": 64430 - }, - { - "epoch": 0.5330686189353517, - "grad_norm": 726.357421875, - "learning_rate": 5.510016877331271e-06, - "loss": 93.3173, - "step": 64440 - }, - { - "epoch": 0.533151342184721, - "grad_norm": 779.744873046875, - "learning_rate": 5.50861384638811e-06, - "loss": 88.6204, - "step": 64450 - }, - { - "epoch": 0.5332340654340902, - "grad_norm": 591.9418334960938, - "learning_rate": 5.50721077497688e-06, - "loss": 72.3884, - "step": 64460 - }, - { - "epoch": 0.5333167886834594, - "grad_norm": 1127.8675537109375, - "learning_rate": 5.505807663209215e-06, - "loss": 124.7344, - "step": 64470 - }, - { - "epoch": 0.5333995119328288, - "grad_norm": 932.3980102539062, - "learning_rate": 5.504404511196755e-06, - "loss": 89.4976, - "step": 64480 - }, - { - "epoch": 0.533482235182198, - "grad_norm": 972.6319580078125, - "learning_rate": 5.503001319051142e-06, - "loss": 84.1025, - "step": 64490 - }, - { - "epoch": 0.5335649584315671, - "grad_norm": 898.1183471679688, - "learning_rate": 5.5015980868840254e-06, - "loss": 126.5537, - "step": 64500 - }, - { - "epoch": 0.5336476816809365, - "grad_norm": 1008.814208984375, - "learning_rate": 5.500194814807051e-06, - "loss": 94.5552, - "step": 64510 - }, - { - "epoch": 0.5337304049303057, - "grad_norm": 1275.499267578125, - "learning_rate": 5.498791502931868e-06, - "loss": 98.7025, - "step": 64520 - }, - { - "epoch": 0.5338131281796749, - "grad_norm": 939.2733154296875, - "learning_rate": 5.497388151370136e-06, - "loss": 120.4424, - "step": 64530 - }, - { - "epoch": 0.5338958514290442, - "grad_norm": 2313.487548828125, - "learning_rate": 5.495984760233511e-06, - "loss": 104.2078, - "step": 64540 - }, - { - "epoch": 0.5339785746784134, - "grad_norm": 2875.86474609375, - "learning_rate": 5.494581329633656e-06, - "loss": 118.5582, - "step": 64550 - }, - { - "epoch": 0.5340612979277826, - "grad_norm": 1355.4534912109375, - "learning_rate": 5.493177859682234e-06, - "loss": 119.3427, - "step": 64560 - }, - { - "epoch": 0.5341440211771519, - "grad_norm": 643.4739990234375, - "learning_rate": 5.491774350490912e-06, - "loss": 76.2629, - "step": 64570 - }, - { - "epoch": 0.5342267444265211, - "grad_norm": 814.0101928710938, - "learning_rate": 5.490370802171362e-06, - "loss": 102.4891, - "step": 64580 - }, - { - "epoch": 0.5343094676758903, - "grad_norm": 653.1177978515625, - "learning_rate": 5.488967214835259e-06, - "loss": 104.6328, - "step": 64590 - }, - { - "epoch": 0.5343921909252596, - "grad_norm": 650.8169555664062, - "learning_rate": 5.487563588594278e-06, - "loss": 106.7863, - "step": 64600 - }, - { - "epoch": 0.5344749141746288, - "grad_norm": 875.9237060546875, - "learning_rate": 5.4861599235601e-06, - "loss": 78.3624, - "step": 64610 - }, - { - "epoch": 0.534557637423998, - "grad_norm": 1083.6778564453125, - "learning_rate": 5.484756219844408e-06, - "loss": 117.263, - "step": 64620 - }, - { - "epoch": 0.5346403606733673, - "grad_norm": 751.8464965820312, - "learning_rate": 5.483352477558889e-06, - "loss": 113.5279, - "step": 64630 - }, - { - "epoch": 0.5347230839227365, - "grad_norm": 1062.31982421875, - "learning_rate": 5.48194869681523e-06, - "loss": 103.9293, - "step": 64640 - }, - { - "epoch": 0.5348058071721057, - "grad_norm": 515.4086303710938, - "learning_rate": 5.480544877725127e-06, - "loss": 97.1701, - "step": 64650 - }, - { - "epoch": 0.534888530421475, - "grad_norm": 725.5648193359375, - "learning_rate": 5.479141020400271e-06, - "loss": 76.7176, - "step": 64660 - }, - { - "epoch": 0.5349712536708442, - "grad_norm": 719.1748657226562, - "learning_rate": 5.477737124952366e-06, - "loss": 96.5835, - "step": 64670 - }, - { - "epoch": 0.5350539769202134, - "grad_norm": 916.6452026367188, - "learning_rate": 5.476333191493108e-06, - "loss": 85.3995, - "step": 64680 - }, - { - "epoch": 0.5351367001695827, - "grad_norm": 436.0997619628906, - "learning_rate": 5.474929220134205e-06, - "loss": 86.9272, - "step": 64690 - }, - { - "epoch": 0.5352194234189519, - "grad_norm": 1398.6942138671875, - "learning_rate": 5.473525210987363e-06, - "loss": 91.1325, - "step": 64700 - }, - { - "epoch": 0.5353021466683211, - "grad_norm": 1058.8551025390625, - "learning_rate": 5.472121164164295e-06, - "loss": 80.0483, - "step": 64710 - }, - { - "epoch": 0.5353848699176904, - "grad_norm": 784.1776123046875, - "learning_rate": 5.47071707977671e-06, - "loss": 115.0421, - "step": 64720 - }, - { - "epoch": 0.5354675931670596, - "grad_norm": 1069.0155029296875, - "learning_rate": 5.46931295793633e-06, - "loss": 97.4913, - "step": 64730 - }, - { - "epoch": 0.5355503164164288, - "grad_norm": 1227.8470458984375, - "learning_rate": 5.46790879875487e-06, - "loss": 98.9259, - "step": 64740 - }, - { - "epoch": 0.5356330396657981, - "grad_norm": 632.8845825195312, - "learning_rate": 5.466504602344055e-06, - "loss": 74.2294, - "step": 64750 - }, - { - "epoch": 0.5357157629151673, - "grad_norm": 1411.2496337890625, - "learning_rate": 5.465100368815609e-06, - "loss": 109.9609, - "step": 64760 - }, - { - "epoch": 0.5357984861645365, - "grad_norm": 819.7656860351562, - "learning_rate": 5.463696098281262e-06, - "loss": 113.5461, - "step": 64770 - }, - { - "epoch": 0.5358812094139058, - "grad_norm": 576.60546875, - "learning_rate": 5.462291790852744e-06, - "loss": 83.2793, - "step": 64780 - }, - { - "epoch": 0.535963932663275, - "grad_norm": 563.931640625, - "learning_rate": 5.46088744664179e-06, - "loss": 112.9462, - "step": 64790 - }, - { - "epoch": 0.5360466559126442, - "grad_norm": 1462.51416015625, - "learning_rate": 5.459483065760138e-06, - "loss": 93.2692, - "step": 64800 - }, - { - "epoch": 0.5361293791620135, - "grad_norm": 717.8792114257812, - "learning_rate": 5.458078648319526e-06, - "loss": 72.0933, - "step": 64810 - }, - { - "epoch": 0.5362121024113827, - "grad_norm": 873.50341796875, - "learning_rate": 5.456674194431698e-06, - "loss": 112.2229, - "step": 64820 - }, - { - "epoch": 0.5362948256607519, - "grad_norm": 710.7631225585938, - "learning_rate": 5.455269704208401e-06, - "loss": 66.035, - "step": 64830 - }, - { - "epoch": 0.5363775489101212, - "grad_norm": 909.28564453125, - "learning_rate": 5.453865177761384e-06, - "loss": 100.6332, - "step": 64840 - }, - { - "epoch": 0.5364602721594904, - "grad_norm": 1102.3038330078125, - "learning_rate": 5.4524606152023975e-06, - "loss": 100.337, - "step": 64850 - }, - { - "epoch": 0.5365429954088596, - "grad_norm": 2068.275390625, - "learning_rate": 5.4510560166431935e-06, - "loss": 116.3672, - "step": 64860 - }, - { - "epoch": 0.536625718658229, - "grad_norm": 770.456787109375, - "learning_rate": 5.449651382195535e-06, - "loss": 93.339, - "step": 64870 - }, - { - "epoch": 0.5367084419075981, - "grad_norm": 614.5863037109375, - "learning_rate": 5.448246711971178e-06, - "loss": 112.4163, - "step": 64880 - }, - { - "epoch": 0.5367911651569673, - "grad_norm": 853.5326538085938, - "learning_rate": 5.44684200608189e-06, - "loss": 110.5457, - "step": 64890 - }, - { - "epoch": 0.5368738884063367, - "grad_norm": 718.2283935546875, - "learning_rate": 5.445437264639433e-06, - "loss": 94.2823, - "step": 64900 - }, - { - "epoch": 0.5369566116557059, - "grad_norm": 723.5383911132812, - "learning_rate": 5.444032487755575e-06, - "loss": 93.2752, - "step": 64910 - }, - { - "epoch": 0.537039334905075, - "grad_norm": 694.3563842773438, - "learning_rate": 5.442627675542092e-06, - "loss": 79.1228, - "step": 64920 - }, - { - "epoch": 0.5371220581544444, - "grad_norm": 1094.6685791015625, - "learning_rate": 5.441222828110756e-06, - "loss": 97.7163, - "step": 64930 - }, - { - "epoch": 0.5372047814038136, - "grad_norm": 809.086669921875, - "learning_rate": 5.439817945573345e-06, - "loss": 78.5984, - "step": 64940 - }, - { - "epoch": 0.5372875046531828, - "grad_norm": 1463.13037109375, - "learning_rate": 5.438413028041637e-06, - "loss": 137.7098, - "step": 64950 - }, - { - "epoch": 0.5373702279025521, - "grad_norm": 686.939697265625, - "learning_rate": 5.4370080756274155e-06, - "loss": 119.1664, - "step": 64960 - }, - { - "epoch": 0.5374529511519213, - "grad_norm": 1107.8900146484375, - "learning_rate": 5.435603088442471e-06, - "loss": 95.0079, - "step": 64970 - }, - { - "epoch": 0.5375356744012905, - "grad_norm": 705.9915161132812, - "learning_rate": 5.434198066598585e-06, - "loss": 78.4787, - "step": 64980 - }, - { - "epoch": 0.5376183976506597, - "grad_norm": 954.3821411132812, - "learning_rate": 5.4327930102075525e-06, - "loss": 89.3804, - "step": 64990 - }, - { - "epoch": 0.537701120900029, - "grad_norm": 947.5150146484375, - "learning_rate": 5.431387919381166e-06, - "loss": 84.6999, - "step": 65000 - }, - { - "epoch": 0.5377838441493982, - "grad_norm": 356.02276611328125, - "learning_rate": 5.429982794231221e-06, - "loss": 107.2157, - "step": 65010 - }, - { - "epoch": 0.5378665673987674, - "grad_norm": 1482.7928466796875, - "learning_rate": 5.428577634869521e-06, - "loss": 92.7534, - "step": 65020 - }, - { - "epoch": 0.5379492906481367, - "grad_norm": 966.0228271484375, - "learning_rate": 5.427172441407864e-06, - "loss": 112.3402, - "step": 65030 - }, - { - "epoch": 0.5380320138975059, - "grad_norm": 688.48583984375, - "learning_rate": 5.425767213958057e-06, - "loss": 67.1227, - "step": 65040 - }, - { - "epoch": 0.5381147371468751, - "grad_norm": 847.5924072265625, - "learning_rate": 5.424361952631907e-06, - "loss": 125.7834, - "step": 65050 - }, - { - "epoch": 0.5381974603962444, - "grad_norm": 2318.634033203125, - "learning_rate": 5.422956657541224e-06, - "loss": 103.7072, - "step": 65060 - }, - { - "epoch": 0.5382801836456136, - "grad_norm": 1350.766357421875, - "learning_rate": 5.421551328797821e-06, - "loss": 96.6112, - "step": 65070 - }, - { - "epoch": 0.5383629068949828, - "grad_norm": 693.9425659179688, - "learning_rate": 5.420145966513513e-06, - "loss": 59.4491, - "step": 65080 - }, - { - "epoch": 0.5384456301443521, - "grad_norm": 1209.9505615234375, - "learning_rate": 5.418740570800117e-06, - "loss": 100.2799, - "step": 65090 - }, - { - "epoch": 0.5385283533937213, - "grad_norm": 918.7635498046875, - "learning_rate": 5.4173351417694575e-06, - "loss": 98.6169, - "step": 65100 - }, - { - "epoch": 0.5386110766430905, - "grad_norm": 681.4931640625, - "learning_rate": 5.415929679533356e-06, - "loss": 95.7693, - "step": 65110 - }, - { - "epoch": 0.5386937998924598, - "grad_norm": 583.2526245117188, - "learning_rate": 5.414524184203638e-06, - "loss": 91.5053, - "step": 65120 - }, - { - "epoch": 0.538776523141829, - "grad_norm": 2256.79736328125, - "learning_rate": 5.4131186558921335e-06, - "loss": 111.124, - "step": 65130 - }, - { - "epoch": 0.5388592463911982, - "grad_norm": 1016.837890625, - "learning_rate": 5.411713094710673e-06, - "loss": 98.1128, - "step": 65140 - }, - { - "epoch": 0.5389419696405675, - "grad_norm": 914.6116943359375, - "learning_rate": 5.410307500771092e-06, - "loss": 74.292, - "step": 65150 - }, - { - "epoch": 0.5390246928899367, - "grad_norm": 1791.11962890625, - "learning_rate": 5.4089018741852264e-06, - "loss": 103.5447, - "step": 65160 - }, - { - "epoch": 0.5391074161393059, - "grad_norm": 1446.0499267578125, - "learning_rate": 5.407496215064915e-06, - "loss": 91.7238, - "step": 65170 - }, - { - "epoch": 0.5391901393886752, - "grad_norm": 865.380859375, - "learning_rate": 5.406090523521999e-06, - "loss": 127.9576, - "step": 65180 - }, - { - "epoch": 0.5392728626380444, - "grad_norm": 1105.864013671875, - "learning_rate": 5.404684799668325e-06, - "loss": 94.1973, - "step": 65190 - }, - { - "epoch": 0.5393555858874136, - "grad_norm": 675.471435546875, - "learning_rate": 5.403279043615738e-06, - "loss": 110.8662, - "step": 65200 - }, - { - "epoch": 0.5394383091367829, - "grad_norm": 0.0, - "learning_rate": 5.4018732554760875e-06, - "loss": 74.0708, - "step": 65210 - }, - { - "epoch": 0.5395210323861521, - "grad_norm": 609.0394897460938, - "learning_rate": 5.400467435361227e-06, - "loss": 110.9384, - "step": 65220 - }, - { - "epoch": 0.5396037556355213, - "grad_norm": 674.0571899414062, - "learning_rate": 5.399061583383013e-06, - "loss": 129.5516, - "step": 65230 - }, - { - "epoch": 0.5396864788848906, - "grad_norm": 1786.3568115234375, - "learning_rate": 5.3976556996532965e-06, - "loss": 100.5763, - "step": 65240 - }, - { - "epoch": 0.5397692021342598, - "grad_norm": 1053.052001953125, - "learning_rate": 5.396249784283943e-06, - "loss": 79.2251, - "step": 65250 - }, - { - "epoch": 0.539851925383629, - "grad_norm": 659.5496215820312, - "learning_rate": 5.394843837386812e-06, - "loss": 93.9248, - "step": 65260 - }, - { - "epoch": 0.5399346486329983, - "grad_norm": 1476.619140625, - "learning_rate": 5.39343785907377e-06, - "loss": 69.9831, - "step": 65270 - }, - { - "epoch": 0.5400173718823675, - "grad_norm": 873.2113647460938, - "learning_rate": 5.392031849456683e-06, - "loss": 97.2353, - "step": 65280 - }, - { - "epoch": 0.5401000951317367, - "grad_norm": 674.3704223632812, - "learning_rate": 5.39062580864742e-06, - "loss": 83.2568, - "step": 65290 - }, - { - "epoch": 0.540182818381106, - "grad_norm": 2080.84765625, - "learning_rate": 5.3892197367578535e-06, - "loss": 76.2265, - "step": 65300 - }, - { - "epoch": 0.5402655416304752, - "grad_norm": 442.13330078125, - "learning_rate": 5.38781363389986e-06, - "loss": 95.2125, - "step": 65310 - }, - { - "epoch": 0.5403482648798444, - "grad_norm": 638.2518920898438, - "learning_rate": 5.386407500185316e-06, - "loss": 72.7291, - "step": 65320 - }, - { - "epoch": 0.5404309881292138, - "grad_norm": 841.8578491210938, - "learning_rate": 5.3850013357261e-06, - "loss": 85.5195, - "step": 65330 - }, - { - "epoch": 0.540513711378583, - "grad_norm": 486.00140380859375, - "learning_rate": 5.383595140634093e-06, - "loss": 103.8048, - "step": 65340 - }, - { - "epoch": 0.5405964346279521, - "grad_norm": 945.3947143554688, - "learning_rate": 5.382188915021182e-06, - "loss": 105.8814, - "step": 65350 - }, - { - "epoch": 0.5406791578773215, - "grad_norm": 620.21435546875, - "learning_rate": 5.380782658999256e-06, - "loss": 61.2339, - "step": 65360 - }, - { - "epoch": 0.5407618811266907, - "grad_norm": 1061.0386962890625, - "learning_rate": 5.379376372680199e-06, - "loss": 87.4281, - "step": 65370 - }, - { - "epoch": 0.5408446043760599, - "grad_norm": 906.3760986328125, - "learning_rate": 5.377970056175905e-06, - "loss": 96.8989, - "step": 65380 - }, - { - "epoch": 0.5409273276254292, - "grad_norm": 621.90283203125, - "learning_rate": 5.376563709598267e-06, - "loss": 70.7825, - "step": 65390 - }, - { - "epoch": 0.5410100508747984, - "grad_norm": 818.9396362304688, - "learning_rate": 5.3751573330591855e-06, - "loss": 101.6279, - "step": 65400 - }, - { - "epoch": 0.5410927741241676, - "grad_norm": 859.1585693359375, - "learning_rate": 5.3737509266705555e-06, - "loss": 107.53, - "step": 65410 - }, - { - "epoch": 0.5411754973735369, - "grad_norm": 423.7478942871094, - "learning_rate": 5.37234449054428e-06, - "loss": 88.1217, - "step": 65420 - }, - { - "epoch": 0.5412582206229061, - "grad_norm": 1409.0037841796875, - "learning_rate": 5.370938024792262e-06, - "loss": 91.726, - "step": 65430 - }, - { - "epoch": 0.5413409438722753, - "grad_norm": 1655.0394287109375, - "learning_rate": 5.369531529526406e-06, - "loss": 95.8715, - "step": 65440 - }, - { - "epoch": 0.5414236671216446, - "grad_norm": 825.59326171875, - "learning_rate": 5.3681250048586246e-06, - "loss": 93.8956, - "step": 65450 - }, - { - "epoch": 0.5415063903710138, - "grad_norm": 1459.590576171875, - "learning_rate": 5.366718450900825e-06, - "loss": 122.376, - "step": 65460 - }, - { - "epoch": 0.541589113620383, - "grad_norm": 586.8179321289062, - "learning_rate": 5.365311867764922e-06, - "loss": 80.8746, - "step": 65470 - }, - { - "epoch": 0.5416718368697523, - "grad_norm": 973.2362060546875, - "learning_rate": 5.363905255562828e-06, - "loss": 163.7341, - "step": 65480 - }, - { - "epoch": 0.5417545601191215, - "grad_norm": 664.8019409179688, - "learning_rate": 5.362498614406466e-06, - "loss": 145.3861, - "step": 65490 - }, - { - "epoch": 0.5418372833684907, - "grad_norm": 776.0678100585938, - "learning_rate": 5.361091944407751e-06, - "loss": 91.7549, - "step": 65500 - }, - { - "epoch": 0.54192000661786, - "grad_norm": 998.9888305664062, - "learning_rate": 5.3596852456786075e-06, - "loss": 107.9666, - "step": 65510 - }, - { - "epoch": 0.5420027298672292, - "grad_norm": 866.9566040039062, - "learning_rate": 5.35827851833096e-06, - "loss": 93.1625, - "step": 65520 - }, - { - "epoch": 0.5420854531165984, - "grad_norm": 372.51800537109375, - "learning_rate": 5.356871762476735e-06, - "loss": 89.3893, - "step": 65530 - }, - { - "epoch": 0.5421681763659677, - "grad_norm": 1260.4573974609375, - "learning_rate": 5.355464978227861e-06, - "loss": 117.8481, - "step": 65540 - }, - { - "epoch": 0.5422508996153369, - "grad_norm": 911.54296875, - "learning_rate": 5.354058165696271e-06, - "loss": 107.3121, - "step": 65550 - }, - { - "epoch": 0.5423336228647061, - "grad_norm": 1118.6298828125, - "learning_rate": 5.352651324993897e-06, - "loss": 120.4601, - "step": 65560 - }, - { - "epoch": 0.5424163461140754, - "grad_norm": 873.2643432617188, - "learning_rate": 5.351244456232676e-06, - "loss": 133.8039, - "step": 65570 - }, - { - "epoch": 0.5424990693634446, - "grad_norm": 803.547119140625, - "learning_rate": 5.349837559524546e-06, - "loss": 113.3396, - "step": 65580 - }, - { - "epoch": 0.5425817926128138, - "grad_norm": 838.561767578125, - "learning_rate": 5.3484306349814455e-06, - "loss": 106.6709, - "step": 65590 - }, - { - "epoch": 0.5426645158621831, - "grad_norm": 929.80029296875, - "learning_rate": 5.34702368271532e-06, - "loss": 93.5446, - "step": 65600 - }, - { - "epoch": 0.5427472391115523, - "grad_norm": 1133.1529541015625, - "learning_rate": 5.345616702838111e-06, - "loss": 77.5028, - "step": 65610 - }, - { - "epoch": 0.5428299623609215, - "grad_norm": 945.4187622070312, - "learning_rate": 5.344209695461768e-06, - "loss": 85.2792, - "step": 65620 - }, - { - "epoch": 0.5429126856102908, - "grad_norm": 798.3004150390625, - "learning_rate": 5.3428026606982396e-06, - "loss": 75.1708, - "step": 65630 - }, - { - "epoch": 0.54299540885966, - "grad_norm": 840.9337158203125, - "learning_rate": 5.341395598659477e-06, - "loss": 139.3945, - "step": 65640 - }, - { - "epoch": 0.5430781321090292, - "grad_norm": 7416.7548828125, - "learning_rate": 5.339988509457432e-06, - "loss": 101.7888, - "step": 65650 - }, - { - "epoch": 0.5431608553583985, - "grad_norm": 914.0125732421875, - "learning_rate": 5.338581393204064e-06, - "loss": 94.0368, - "step": 65660 - }, - { - "epoch": 0.5432435786077677, - "grad_norm": 1436.60888671875, - "learning_rate": 5.337174250011326e-06, - "loss": 123.4113, - "step": 65670 - }, - { - "epoch": 0.5433263018571369, - "grad_norm": 1032.3555908203125, - "learning_rate": 5.3357670799911805e-06, - "loss": 87.8645, - "step": 65680 - }, - { - "epoch": 0.5434090251065062, - "grad_norm": 958.6307983398438, - "learning_rate": 5.334359883255591e-06, - "loss": 83.8339, - "step": 65690 - }, - { - "epoch": 0.5434917483558754, - "grad_norm": 968.6178588867188, - "learning_rate": 5.33295265991652e-06, - "loss": 102.6183, - "step": 65700 - }, - { - "epoch": 0.5435744716052446, - "grad_norm": 601.3204345703125, - "learning_rate": 5.331545410085933e-06, - "loss": 74.607, - "step": 65710 - }, - { - "epoch": 0.5436571948546138, - "grad_norm": 1049.9119873046875, - "learning_rate": 5.330138133875799e-06, - "loss": 78.9104, - "step": 65720 - }, - { - "epoch": 0.5437399181039831, - "grad_norm": 879.0147705078125, - "learning_rate": 5.328730831398089e-06, - "loss": 92.8446, - "step": 65730 - }, - { - "epoch": 0.5438226413533523, - "grad_norm": 1260.738525390625, - "learning_rate": 5.3273235027647764e-06, - "loss": 81.406, - "step": 65740 - }, - { - "epoch": 0.5439053646027215, - "grad_norm": 807.532958984375, - "learning_rate": 5.3259161480878354e-06, - "loss": 80.0483, - "step": 65750 - }, - { - "epoch": 0.5439880878520909, - "grad_norm": 628.903076171875, - "learning_rate": 5.324508767479239e-06, - "loss": 111.094, - "step": 65760 - }, - { - "epoch": 0.54407081110146, - "grad_norm": 1162.60888671875, - "learning_rate": 5.323101361050972e-06, - "loss": 102.2003, - "step": 65770 - }, - { - "epoch": 0.5441535343508292, - "grad_norm": 840.9710693359375, - "learning_rate": 5.321693928915012e-06, - "loss": 85.5873, - "step": 65780 - }, - { - "epoch": 0.5442362576001986, - "grad_norm": 588.482666015625, - "learning_rate": 5.320286471183343e-06, - "loss": 88.2541, - "step": 65790 - }, - { - "epoch": 0.5443189808495678, - "grad_norm": 826.7291259765625, - "learning_rate": 5.3188789879679496e-06, - "loss": 113.2529, - "step": 65800 - }, - { - "epoch": 0.544401704098937, - "grad_norm": 988.3714599609375, - "learning_rate": 5.317471479380816e-06, - "loss": 92.3975, - "step": 65810 - }, - { - "epoch": 0.5444844273483063, - "grad_norm": 1229.006103515625, - "learning_rate": 5.3160639455339355e-06, - "loss": 99.8077, - "step": 65820 - }, - { - "epoch": 0.5445671505976755, - "grad_norm": 558.47802734375, - "learning_rate": 5.314656386539298e-06, - "loss": 98.0959, - "step": 65830 - }, - { - "epoch": 0.5446498738470447, - "grad_norm": 1046.564697265625, - "learning_rate": 5.313248802508896e-06, - "loss": 94.008, - "step": 65840 - }, - { - "epoch": 0.544732597096414, - "grad_norm": 943.0609741210938, - "learning_rate": 5.311841193554723e-06, - "loss": 94.6098, - "step": 65850 - }, - { - "epoch": 0.5448153203457832, - "grad_norm": 465.5363464355469, - "learning_rate": 5.310433559788778e-06, - "loss": 100.7524, - "step": 65860 - }, - { - "epoch": 0.5448980435951524, - "grad_norm": 902.67333984375, - "learning_rate": 5.309025901323059e-06, - "loss": 67.7871, - "step": 65870 - }, - { - "epoch": 0.5449807668445217, - "grad_norm": 641.2943725585938, - "learning_rate": 5.307618218269569e-06, - "loss": 89.4344, - "step": 65880 - }, - { - "epoch": 0.5450634900938909, - "grad_norm": 730.5675048828125, - "learning_rate": 5.306210510740307e-06, - "loss": 78.7692, - "step": 65890 - }, - { - "epoch": 0.5451462133432601, - "grad_norm": 562.7182006835938, - "learning_rate": 5.304802778847281e-06, - "loss": 62.6107, - "step": 65900 - }, - { - "epoch": 0.5452289365926294, - "grad_norm": 776.1548461914062, - "learning_rate": 5.303395022702495e-06, - "loss": 83.2003, - "step": 65910 - }, - { - "epoch": 0.5453116598419986, - "grad_norm": 716.1808471679688, - "learning_rate": 5.301987242417963e-06, - "loss": 77.2049, - "step": 65920 - }, - { - "epoch": 0.5453943830913678, - "grad_norm": 1019.53759765625, - "learning_rate": 5.300579438105689e-06, - "loss": 108.0206, - "step": 65930 - }, - { - "epoch": 0.5454771063407371, - "grad_norm": 559.3324584960938, - "learning_rate": 5.29917160987769e-06, - "loss": 83.0377, - "step": 65940 - }, - { - "epoch": 0.5455598295901063, - "grad_norm": 785.0572509765625, - "learning_rate": 5.297763757845979e-06, - "loss": 88.7534, - "step": 65950 - }, - { - "epoch": 0.5456425528394755, - "grad_norm": 606.4642944335938, - "learning_rate": 5.296355882122572e-06, - "loss": 82.7635, - "step": 65960 - }, - { - "epoch": 0.5457252760888448, - "grad_norm": 1158.042236328125, - "learning_rate": 5.294947982819488e-06, - "loss": 108.4545, - "step": 65970 - }, - { - "epoch": 0.545807999338214, - "grad_norm": 412.3054504394531, - "learning_rate": 5.293540060048746e-06, - "loss": 116.0526, - "step": 65980 - }, - { - "epoch": 0.5458907225875832, - "grad_norm": 661.3358154296875, - "learning_rate": 5.292132113922369e-06, - "loss": 74.4473, - "step": 65990 - }, - { - "epoch": 0.5459734458369525, - "grad_norm": 637.5306396484375, - "learning_rate": 5.290724144552379e-06, - "loss": 104.4344, - "step": 66000 - }, - { - "epoch": 0.5460561690863217, - "grad_norm": 1194.7322998046875, - "learning_rate": 5.2893161520508055e-06, - "loss": 93.1337, - "step": 66010 - }, - { - "epoch": 0.5461388923356909, - "grad_norm": 1273.340576171875, - "learning_rate": 5.287908136529671e-06, - "loss": 102.8886, - "step": 66020 - }, - { - "epoch": 0.5462216155850602, - "grad_norm": 923.6551513671875, - "learning_rate": 5.28650009810101e-06, - "loss": 125.1421, - "step": 66030 - }, - { - "epoch": 0.5463043388344294, - "grad_norm": 996.6854248046875, - "learning_rate": 5.28509203687685e-06, - "loss": 93.7317, - "step": 66040 - }, - { - "epoch": 0.5463870620837986, - "grad_norm": 655.3250122070312, - "learning_rate": 5.283683952969224e-06, - "loss": 98.5502, - "step": 66050 - }, - { - "epoch": 0.5464697853331679, - "grad_norm": 724.341064453125, - "learning_rate": 5.282275846490169e-06, - "loss": 76.2478, - "step": 66060 - }, - { - "epoch": 0.5465525085825371, - "grad_norm": 665.1072998046875, - "learning_rate": 5.280867717551719e-06, - "loss": 90.0001, - "step": 66070 - }, - { - "epoch": 0.5466352318319063, - "grad_norm": 774.87451171875, - "learning_rate": 5.279459566265915e-06, - "loss": 83.016, - "step": 66080 - }, - { - "epoch": 0.5467179550812756, - "grad_norm": 1207.277587890625, - "learning_rate": 5.278051392744796e-06, - "loss": 115.3367, - "step": 66090 - }, - { - "epoch": 0.5468006783306448, - "grad_norm": 846.2992553710938, - "learning_rate": 5.2766431971004025e-06, - "loss": 76.7401, - "step": 66100 - }, - { - "epoch": 0.546883401580014, - "grad_norm": 794.3187866210938, - "learning_rate": 5.275234979444781e-06, - "loss": 104.4632, - "step": 66110 - }, - { - "epoch": 0.5469661248293833, - "grad_norm": 609.81787109375, - "learning_rate": 5.273826739889975e-06, - "loss": 93.3402, - "step": 66120 - }, - { - "epoch": 0.5470488480787525, - "grad_norm": 818.2113647460938, - "learning_rate": 5.272418478548031e-06, - "loss": 77.1904, - "step": 66130 - }, - { - "epoch": 0.5471315713281217, - "grad_norm": 2090.50390625, - "learning_rate": 5.271010195530999e-06, - "loss": 101.5279, - "step": 66140 - }, - { - "epoch": 0.547214294577491, - "grad_norm": 1038.06103515625, - "learning_rate": 5.26960189095093e-06, - "loss": 95.1865, - "step": 66150 - }, - { - "epoch": 0.5472970178268602, - "grad_norm": 984.3367309570312, - "learning_rate": 5.268193564919876e-06, - "loss": 91.2734, - "step": 66160 - }, - { - "epoch": 0.5473797410762294, - "grad_norm": 1194.6197509765625, - "learning_rate": 5.26678521754989e-06, - "loss": 116.3886, - "step": 66170 - }, - { - "epoch": 0.5474624643255988, - "grad_norm": 978.132080078125, - "learning_rate": 5.265376848953031e-06, - "loss": 114.3536, - "step": 66180 - }, - { - "epoch": 0.547545187574968, - "grad_norm": 1035.0242919921875, - "learning_rate": 5.263968459241351e-06, - "loss": 89.8195, - "step": 66190 - }, - { - "epoch": 0.5476279108243371, - "grad_norm": 1031.2353515625, - "learning_rate": 5.262560048526913e-06, - "loss": 80.5158, - "step": 66200 - }, - { - "epoch": 0.5477106340737065, - "grad_norm": 308.54156494140625, - "learning_rate": 5.261151616921778e-06, - "loss": 95.6141, - "step": 66210 - }, - { - "epoch": 0.5477933573230757, - "grad_norm": 1179.9146728515625, - "learning_rate": 5.259743164538008e-06, - "loss": 115.8417, - "step": 66220 - }, - { - "epoch": 0.5478760805724449, - "grad_norm": 1147.329345703125, - "learning_rate": 5.2583346914876655e-06, - "loss": 81.3131, - "step": 66230 - }, - { - "epoch": 0.5479588038218142, - "grad_norm": 823.7957153320312, - "learning_rate": 5.2569261978828155e-06, - "loss": 151.2819, - "step": 66240 - }, - { - "epoch": 0.5480415270711834, - "grad_norm": 466.2208557128906, - "learning_rate": 5.255517683835528e-06, - "loss": 84.0521, - "step": 66250 - }, - { - "epoch": 0.5481242503205526, - "grad_norm": 1068.723876953125, - "learning_rate": 5.254109149457873e-06, - "loss": 93.1827, - "step": 66260 - }, - { - "epoch": 0.5482069735699219, - "grad_norm": 1123.1505126953125, - "learning_rate": 5.252700594861918e-06, - "loss": 107.1272, - "step": 66270 - }, - { - "epoch": 0.5482896968192911, - "grad_norm": 1002.3335571289062, - "learning_rate": 5.251292020159736e-06, - "loss": 88.9466, - "step": 66280 - }, - { - "epoch": 0.5483724200686603, - "grad_norm": 658.9024047851562, - "learning_rate": 5.2498834254634005e-06, - "loss": 87.6717, - "step": 66290 - }, - { - "epoch": 0.5484551433180296, - "grad_norm": 1128.9300537109375, - "learning_rate": 5.248474810884988e-06, - "loss": 105.05, - "step": 66300 - }, - { - "epoch": 0.5485378665673988, - "grad_norm": 853.0302734375, - "learning_rate": 5.247066176536577e-06, - "loss": 136.7064, - "step": 66310 - }, - { - "epoch": 0.548620589816768, - "grad_norm": 1292.0511474609375, - "learning_rate": 5.245657522530243e-06, - "loss": 88.9545, - "step": 66320 - }, - { - "epoch": 0.5487033130661373, - "grad_norm": 1117.95947265625, - "learning_rate": 5.244248848978067e-06, - "loss": 74.3471, - "step": 66330 - }, - { - "epoch": 0.5487860363155065, - "grad_norm": 1050.1473388671875, - "learning_rate": 5.242840155992131e-06, - "loss": 104.2194, - "step": 66340 - }, - { - "epoch": 0.5488687595648757, - "grad_norm": 462.470458984375, - "learning_rate": 5.24143144368452e-06, - "loss": 78.9187, - "step": 66350 - }, - { - "epoch": 0.548951482814245, - "grad_norm": 1257.4759521484375, - "learning_rate": 5.240022712167315e-06, - "loss": 83.4133, - "step": 66360 - }, - { - "epoch": 0.5490342060636142, - "grad_norm": 1386.3408203125, - "learning_rate": 5.2386139615526046e-06, - "loss": 116.1773, - "step": 66370 - }, - { - "epoch": 0.5491169293129834, - "grad_norm": 742.0828247070312, - "learning_rate": 5.237205191952477e-06, - "loss": 102.3286, - "step": 66380 - }, - { - "epoch": 0.5491996525623527, - "grad_norm": 1096.52392578125, - "learning_rate": 5.235796403479021e-06, - "loss": 80.9894, - "step": 66390 - }, - { - "epoch": 0.5492823758117219, - "grad_norm": 466.41204833984375, - "learning_rate": 5.2343875962443255e-06, - "loss": 75.4198, - "step": 66400 - }, - { - "epoch": 0.5493650990610911, - "grad_norm": 1372.77685546875, - "learning_rate": 5.2329787703604875e-06, - "loss": 86.9037, - "step": 66410 - }, - { - "epoch": 0.5494478223104604, - "grad_norm": 733.8797607421875, - "learning_rate": 5.231569925939596e-06, - "loss": 84.0945, - "step": 66420 - }, - { - "epoch": 0.5495305455598296, - "grad_norm": 815.2667236328125, - "learning_rate": 5.230161063093749e-06, - "loss": 83.4252, - "step": 66430 - }, - { - "epoch": 0.5496132688091988, - "grad_norm": 1322.669921875, - "learning_rate": 5.228752181935042e-06, - "loss": 100.3188, - "step": 66440 - }, - { - "epoch": 0.549695992058568, - "grad_norm": 1326.2945556640625, - "learning_rate": 5.227343282575574e-06, - "loss": 90.2418, - "step": 66450 - }, - { - "epoch": 0.5497787153079373, - "grad_norm": 1287.527099609375, - "learning_rate": 5.225934365127445e-06, - "loss": 82.3157, - "step": 66460 - }, - { - "epoch": 0.5498614385573065, - "grad_norm": 564.8834228515625, - "learning_rate": 5.224525429702755e-06, - "loss": 67.5519, - "step": 66470 - }, - { - "epoch": 0.5499441618066757, - "grad_norm": 1590.8048095703125, - "learning_rate": 5.223116476413606e-06, - "loss": 145.7727, - "step": 66480 - }, - { - "epoch": 0.550026885056045, - "grad_norm": 424.3537292480469, - "learning_rate": 5.221707505372105e-06, - "loss": 89.2432, - "step": 66490 - }, - { - "epoch": 0.5501096083054142, - "grad_norm": 562.13330078125, - "learning_rate": 5.220298516690353e-06, - "loss": 91.3321, - "step": 66500 - }, - { - "epoch": 0.5501923315547834, - "grad_norm": 416.3981018066406, - "learning_rate": 5.21888951048046e-06, - "loss": 114.0917, - "step": 66510 - }, - { - "epoch": 0.5502750548041527, - "grad_norm": 935.5563354492188, - "learning_rate": 5.217480486854534e-06, - "loss": 112.0527, - "step": 66520 - }, - { - "epoch": 0.5503577780535219, - "grad_norm": 448.1026916503906, - "learning_rate": 5.216071445924683e-06, - "loss": 71.1966, - "step": 66530 - }, - { - "epoch": 0.5504405013028911, - "grad_norm": 715.922119140625, - "learning_rate": 5.214662387803019e-06, - "loss": 69.2816, - "step": 66540 - }, - { - "epoch": 0.5505232245522604, - "grad_norm": 816.540771484375, - "learning_rate": 5.213253312601654e-06, - "loss": 115.1411, - "step": 66550 - }, - { - "epoch": 0.5506059478016296, - "grad_norm": 613.8909912109375, - "learning_rate": 5.211844220432702e-06, - "loss": 74.3888, - "step": 66560 - }, - { - "epoch": 0.5506886710509988, - "grad_norm": 1052.91064453125, - "learning_rate": 5.210435111408276e-06, - "loss": 91.0357, - "step": 66570 - }, - { - "epoch": 0.5507713943003681, - "grad_norm": 1757.2353515625, - "learning_rate": 5.209025985640496e-06, - "loss": 101.9064, - "step": 66580 - }, - { - "epoch": 0.5508541175497373, - "grad_norm": 1139.6336669921875, - "learning_rate": 5.207616843241476e-06, - "loss": 102.2799, - "step": 66590 - }, - { - "epoch": 0.5509368407991065, - "grad_norm": 1313.9873046875, - "learning_rate": 5.206207684323337e-06, - "loss": 93.977, - "step": 66600 - }, - { - "epoch": 0.5510195640484759, - "grad_norm": 574.5587158203125, - "learning_rate": 5.2047985089982e-06, - "loss": 88.6549, - "step": 66610 - }, - { - "epoch": 0.551102287297845, - "grad_norm": 881.9164428710938, - "learning_rate": 5.203389317378183e-06, - "loss": 108.2257, - "step": 66620 - }, - { - "epoch": 0.5511850105472142, - "grad_norm": 702.6726684570312, - "learning_rate": 5.201980109575414e-06, - "loss": 102.8949, - "step": 66630 - }, - { - "epoch": 0.5512677337965836, - "grad_norm": 975.09375, - "learning_rate": 5.200570885702013e-06, - "loss": 95.4903, - "step": 66640 - }, - { - "epoch": 0.5513504570459528, - "grad_norm": 502.3410339355469, - "learning_rate": 5.19916164587011e-06, - "loss": 116.9168, - "step": 66650 - }, - { - "epoch": 0.551433180295322, - "grad_norm": 2496.73193359375, - "learning_rate": 5.197752390191827e-06, - "loss": 95.1221, - "step": 66660 - }, - { - "epoch": 0.5515159035446913, - "grad_norm": 1161.61572265625, - "learning_rate": 5.196343118779292e-06, - "loss": 103.2815, - "step": 66670 - }, - { - "epoch": 0.5515986267940605, - "grad_norm": 501.6972961425781, - "learning_rate": 5.194933831744637e-06, - "loss": 89.6646, - "step": 66680 - }, - { - "epoch": 0.5516813500434297, - "grad_norm": 808.2440795898438, - "learning_rate": 5.1935245291999945e-06, - "loss": 132.3228, - "step": 66690 - }, - { - "epoch": 0.551764073292799, - "grad_norm": 602.8800048828125, - "learning_rate": 5.192115211257491e-06, - "loss": 100.7651, - "step": 66700 - }, - { - "epoch": 0.5518467965421682, - "grad_norm": 705.0828247070312, - "learning_rate": 5.19070587802926e-06, - "loss": 94.9037, - "step": 66710 - }, - { - "epoch": 0.5519295197915374, - "grad_norm": 685.058349609375, - "learning_rate": 5.189296529627441e-06, - "loss": 101.691, - "step": 66720 - }, - { - "epoch": 0.5520122430409067, - "grad_norm": 765.4905395507812, - "learning_rate": 5.187887166164165e-06, - "loss": 89.7552, - "step": 66730 - }, - { - "epoch": 0.5520949662902759, - "grad_norm": 695.8983154296875, - "learning_rate": 5.186477787751569e-06, - "loss": 77.7547, - "step": 66740 - }, - { - "epoch": 0.5521776895396451, - "grad_norm": 633.330810546875, - "learning_rate": 5.185068394501791e-06, - "loss": 93.5725, - "step": 66750 - }, - { - "epoch": 0.5522604127890144, - "grad_norm": 566.3089599609375, - "learning_rate": 5.183658986526969e-06, - "loss": 86.7824, - "step": 66760 - }, - { - "epoch": 0.5523431360383836, - "grad_norm": 870.3569946289062, - "learning_rate": 5.1822495639392465e-06, - "loss": 137.4197, - "step": 66770 - }, - { - "epoch": 0.5524258592877528, - "grad_norm": 1036.458251953125, - "learning_rate": 5.180840126850764e-06, - "loss": 80.6906, - "step": 66780 - }, - { - "epoch": 0.5525085825371221, - "grad_norm": 792.98779296875, - "learning_rate": 5.179430675373659e-06, - "loss": 91.3037, - "step": 66790 - }, - { - "epoch": 0.5525913057864913, - "grad_norm": 1176.4154052734375, - "learning_rate": 5.17802120962008e-06, - "loss": 129.9155, - "step": 66800 - }, - { - "epoch": 0.5526740290358605, - "grad_norm": 1051.582275390625, - "learning_rate": 5.17661172970217e-06, - "loss": 73.2745, - "step": 66810 - }, - { - "epoch": 0.5527567522852298, - "grad_norm": 1047.6094970703125, - "learning_rate": 5.175202235732077e-06, - "loss": 99.0051, - "step": 66820 - }, - { - "epoch": 0.552839475534599, - "grad_norm": 871.4730834960938, - "learning_rate": 5.1737927278219446e-06, - "loss": 86.3209, - "step": 66830 - }, - { - "epoch": 0.5529221987839682, - "grad_norm": 644.1160888671875, - "learning_rate": 5.1723832060839216e-06, - "loss": 90.1693, - "step": 66840 - }, - { - "epoch": 0.5530049220333375, - "grad_norm": 688.15576171875, - "learning_rate": 5.170973670630159e-06, - "loss": 126.0678, - "step": 66850 - }, - { - "epoch": 0.5530876452827067, - "grad_norm": 737.1720581054688, - "learning_rate": 5.169564121572806e-06, - "loss": 83.4195, - "step": 66860 - }, - { - "epoch": 0.5531703685320759, - "grad_norm": 719.3773803710938, - "learning_rate": 5.168154559024014e-06, - "loss": 91.4589, - "step": 66870 - }, - { - "epoch": 0.5532530917814452, - "grad_norm": 1081.61572265625, - "learning_rate": 5.166744983095937e-06, - "loss": 115.1463, - "step": 66880 - }, - { - "epoch": 0.5533358150308144, - "grad_norm": 863.379150390625, - "learning_rate": 5.165335393900726e-06, - "loss": 91.273, - "step": 66890 - }, - { - "epoch": 0.5534185382801836, - "grad_norm": 1196.47509765625, - "learning_rate": 5.163925791550536e-06, - "loss": 74.5763, - "step": 66900 - }, - { - "epoch": 0.5535012615295529, - "grad_norm": 754.5050659179688, - "learning_rate": 5.162516176157523e-06, - "loss": 78.6246, - "step": 66910 - }, - { - "epoch": 0.5535839847789221, - "grad_norm": 332.3321228027344, - "learning_rate": 5.161106547833843e-06, - "loss": 78.1392, - "step": 66920 - }, - { - "epoch": 0.5536667080282913, - "grad_norm": 459.50701904296875, - "learning_rate": 5.159696906691656e-06, - "loss": 78.0986, - "step": 66930 - }, - { - "epoch": 0.5537494312776606, - "grad_norm": 1238.329833984375, - "learning_rate": 5.158287252843118e-06, - "loss": 105.3073, - "step": 66940 - }, - { - "epoch": 0.5538321545270298, - "grad_norm": 885.58251953125, - "learning_rate": 5.1568775864003894e-06, - "loss": 102.8519, - "step": 66950 - }, - { - "epoch": 0.553914877776399, - "grad_norm": 634.6365966796875, - "learning_rate": 5.155467907475632e-06, - "loss": 114.2997, - "step": 66960 - }, - { - "epoch": 0.5539976010257683, - "grad_norm": 957.9181518554688, - "learning_rate": 5.154058216181007e-06, - "loss": 86.0915, - "step": 66970 - }, - { - "epoch": 0.5540803242751375, - "grad_norm": 1111.42919921875, - "learning_rate": 5.1526485126286766e-06, - "loss": 98.9156, - "step": 66980 - }, - { - "epoch": 0.5541630475245067, - "grad_norm": 1166.4410400390625, - "learning_rate": 5.151238796930804e-06, - "loss": 125.5735, - "step": 66990 - }, - { - "epoch": 0.554245770773876, - "grad_norm": 650.7413330078125, - "learning_rate": 5.149829069199555e-06, - "loss": 83.7539, - "step": 67000 - }, - { - "epoch": 0.5543284940232452, - "grad_norm": 654.6986694335938, - "learning_rate": 5.148419329547094e-06, - "loss": 85.525, - "step": 67010 - }, - { - "epoch": 0.5544112172726144, - "grad_norm": 713.3673706054688, - "learning_rate": 5.147009578085589e-06, - "loss": 96.1334, - "step": 67020 - }, - { - "epoch": 0.5544939405219838, - "grad_norm": 1134.079345703125, - "learning_rate": 5.145599814927205e-06, - "loss": 106.5868, - "step": 67030 - }, - { - "epoch": 0.554576663771353, - "grad_norm": 782.4326171875, - "learning_rate": 5.144190040184114e-06, - "loss": 81.4991, - "step": 67040 - }, - { - "epoch": 0.5546593870207221, - "grad_norm": 885.5360107421875, - "learning_rate": 5.142780253968481e-06, - "loss": 106.0122, - "step": 67050 - }, - { - "epoch": 0.5547421102700915, - "grad_norm": 761.8356323242188, - "learning_rate": 5.14137045639248e-06, - "loss": 106.5318, - "step": 67060 - }, - { - "epoch": 0.5548248335194607, - "grad_norm": 796.8743896484375, - "learning_rate": 5.13996064756828e-06, - "loss": 70.0908, - "step": 67070 - }, - { - "epoch": 0.5549075567688299, - "grad_norm": 1602.5904541015625, - "learning_rate": 5.138550827608055e-06, - "loss": 97.1062, - "step": 67080 - }, - { - "epoch": 0.5549902800181992, - "grad_norm": 1278.1387939453125, - "learning_rate": 5.137140996623975e-06, - "loss": 85.0867, - "step": 67090 - }, - { - "epoch": 0.5550730032675684, - "grad_norm": 1092.7744140625, - "learning_rate": 5.135731154728215e-06, - "loss": 95.7226, - "step": 67100 - }, - { - "epoch": 0.5551557265169376, - "grad_norm": 897.9187622070312, - "learning_rate": 5.134321302032951e-06, - "loss": 118.3043, - "step": 67110 - }, - { - "epoch": 0.5552384497663069, - "grad_norm": 1308.584716796875, - "learning_rate": 5.1329114386503585e-06, - "loss": 136.3629, - "step": 67120 - }, - { - "epoch": 0.5553211730156761, - "grad_norm": 1024.6846923828125, - "learning_rate": 5.131501564692611e-06, - "loss": 121.1511, - "step": 67130 - }, - { - "epoch": 0.5554038962650453, - "grad_norm": 894.513671875, - "learning_rate": 5.130091680271887e-06, - "loss": 98.0504, - "step": 67140 - }, - { - "epoch": 0.5554866195144146, - "grad_norm": 1355.4970703125, - "learning_rate": 5.128681785500365e-06, - "loss": 91.5048, - "step": 67150 - }, - { - "epoch": 0.5555693427637838, - "grad_norm": 660.9675903320312, - "learning_rate": 5.127271880490227e-06, - "loss": 107.9526, - "step": 67160 - }, - { - "epoch": 0.555652066013153, - "grad_norm": 639.190185546875, - "learning_rate": 5.125861965353647e-06, - "loss": 95.762, - "step": 67170 - }, - { - "epoch": 0.5557347892625222, - "grad_norm": 1066.5399169921875, - "learning_rate": 5.124452040202809e-06, - "loss": 72.5684, - "step": 67180 - }, - { - "epoch": 0.5558175125118915, - "grad_norm": 1595.59033203125, - "learning_rate": 5.1230421051498914e-06, - "loss": 101.4106, - "step": 67190 - }, - { - "epoch": 0.5559002357612607, - "grad_norm": 700.05419921875, - "learning_rate": 5.121632160307078e-06, - "loss": 110.3626, - "step": 67200 - }, - { - "epoch": 0.5559829590106299, - "grad_norm": 1288.469482421875, - "learning_rate": 5.120222205786556e-06, - "loss": 134.359, - "step": 67210 - }, - { - "epoch": 0.5560656822599992, - "grad_norm": 1454.6519775390625, - "learning_rate": 5.118812241700501e-06, - "loss": 92.2554, - "step": 67220 - }, - { - "epoch": 0.5561484055093684, - "grad_norm": 937.5780029296875, - "learning_rate": 5.117402268161103e-06, - "loss": 78.8586, - "step": 67230 - }, - { - "epoch": 0.5562311287587376, - "grad_norm": 1452.914306640625, - "learning_rate": 5.115992285280543e-06, - "loss": 90.6466, - "step": 67240 - }, - { - "epoch": 0.5563138520081069, - "grad_norm": 808.0802612304688, - "learning_rate": 5.114582293171012e-06, - "loss": 84.3542, - "step": 67250 - }, - { - "epoch": 0.5563965752574761, - "grad_norm": 1291.392333984375, - "learning_rate": 5.113172291944693e-06, - "loss": 80.4222, - "step": 67260 - }, - { - "epoch": 0.5564792985068453, - "grad_norm": 0.0, - "learning_rate": 5.111762281713773e-06, - "loss": 96.9373, - "step": 67270 - }, - { - "epoch": 0.5565620217562146, - "grad_norm": 460.95623779296875, - "learning_rate": 5.110352262590442e-06, - "loss": 98.4892, - "step": 67280 - }, - { - "epoch": 0.5566447450055838, - "grad_norm": 921.4959106445312, - "learning_rate": 5.108942234686889e-06, - "loss": 85.5142, - "step": 67290 - }, - { - "epoch": 0.556727468254953, - "grad_norm": 909.4285278320312, - "learning_rate": 5.1075321981153014e-06, - "loss": 79.041, - "step": 67300 - }, - { - "epoch": 0.5568101915043223, - "grad_norm": 786.9864501953125, - "learning_rate": 5.106122152987869e-06, - "loss": 131.3443, - "step": 67310 - }, - { - "epoch": 0.5568929147536915, - "grad_norm": 837.8712158203125, - "learning_rate": 5.1047120994167855e-06, - "loss": 111.7526, - "step": 67320 - }, - { - "epoch": 0.5569756380030607, - "grad_norm": 748.6630859375, - "learning_rate": 5.103302037514241e-06, - "loss": 83.1395, - "step": 67330 - }, - { - "epoch": 0.55705836125243, - "grad_norm": 422.8838195800781, - "learning_rate": 5.101891967392426e-06, - "loss": 132.004, - "step": 67340 - }, - { - "epoch": 0.5571410845017992, - "grad_norm": 784.1287231445312, - "learning_rate": 5.100481889163535e-06, - "loss": 84.0244, - "step": 67350 - }, - { - "epoch": 0.5572238077511684, - "grad_norm": 871.6853637695312, - "learning_rate": 5.099071802939763e-06, - "loss": 96.8194, - "step": 67360 - }, - { - "epoch": 0.5573065310005377, - "grad_norm": 1200.704833984375, - "learning_rate": 5.097661708833302e-06, - "loss": 101.7852, - "step": 67370 - }, - { - "epoch": 0.5573892542499069, - "grad_norm": 605.1507568359375, - "learning_rate": 5.096251606956345e-06, - "loss": 114.6154, - "step": 67380 - }, - { - "epoch": 0.5574719774992761, - "grad_norm": 1629.7506103515625, - "learning_rate": 5.0948414974210906e-06, - "loss": 108.8764, - "step": 67390 - }, - { - "epoch": 0.5575547007486454, - "grad_norm": 963.1193237304688, - "learning_rate": 5.093431380339734e-06, - "loss": 83.0829, - "step": 67400 - }, - { - "epoch": 0.5576374239980146, - "grad_norm": 984.2836303710938, - "learning_rate": 5.092021255824471e-06, - "loss": 90.3172, - "step": 67410 - }, - { - "epoch": 0.5577201472473838, - "grad_norm": 846.0245971679688, - "learning_rate": 5.090611123987498e-06, - "loss": 75.3123, - "step": 67420 - }, - { - "epoch": 0.5578028704967531, - "grad_norm": 880.2362670898438, - "learning_rate": 5.089200984941014e-06, - "loss": 86.5572, - "step": 67430 - }, - { - "epoch": 0.5578855937461223, - "grad_norm": 1226.6094970703125, - "learning_rate": 5.087790838797217e-06, - "loss": 76.6975, - "step": 67440 - }, - { - "epoch": 0.5579683169954915, - "grad_norm": 1174.342529296875, - "learning_rate": 5.0863806856683076e-06, - "loss": 97.9613, - "step": 67450 - }, - { - "epoch": 0.5580510402448609, - "grad_norm": 832.6563110351562, - "learning_rate": 5.084970525666481e-06, - "loss": 77.5357, - "step": 67460 - }, - { - "epoch": 0.55813376349423, - "grad_norm": 1207.7735595703125, - "learning_rate": 5.083560358903942e-06, - "loss": 108.6904, - "step": 67470 - }, - { - "epoch": 0.5582164867435992, - "grad_norm": 1580.60791015625, - "learning_rate": 5.082150185492887e-06, - "loss": 123.0784, - "step": 67480 - }, - { - "epoch": 0.5582992099929686, - "grad_norm": 911.670654296875, - "learning_rate": 5.080740005545519e-06, - "loss": 81.2963, - "step": 67490 - }, - { - "epoch": 0.5583819332423378, - "grad_norm": 1346.0203857421875, - "learning_rate": 5.07932981917404e-06, - "loss": 105.0922, - "step": 67500 - }, - { - "epoch": 0.558464656491707, - "grad_norm": 1385.7469482421875, - "learning_rate": 5.077919626490651e-06, - "loss": 111.0049, - "step": 67510 - }, - { - "epoch": 0.5585473797410763, - "grad_norm": 2228.890380859375, - "learning_rate": 5.076509427607555e-06, - "loss": 89.5055, - "step": 67520 - }, - { - "epoch": 0.5586301029904455, - "grad_norm": 507.3185119628906, - "learning_rate": 5.075099222636954e-06, - "loss": 78.911, - "step": 67530 - }, - { - "epoch": 0.5587128262398147, - "grad_norm": 551.4053955078125, - "learning_rate": 5.073689011691054e-06, - "loss": 150.896, - "step": 67540 - }, - { - "epoch": 0.558795549489184, - "grad_norm": 350.7704162597656, - "learning_rate": 5.072278794882058e-06, - "loss": 78.5772, - "step": 67550 - }, - { - "epoch": 0.5588782727385532, - "grad_norm": 682.6192626953125, - "learning_rate": 5.07086857232217e-06, - "loss": 93.3245, - "step": 67560 - }, - { - "epoch": 0.5589609959879224, - "grad_norm": 699.5905151367188, - "learning_rate": 5.069458344123592e-06, - "loss": 109.2068, - "step": 67570 - }, - { - "epoch": 0.5590437192372917, - "grad_norm": 1194.6055908203125, - "learning_rate": 5.068048110398535e-06, - "loss": 103.4686, - "step": 67580 - }, - { - "epoch": 0.5591264424866609, - "grad_norm": 671.2708740234375, - "learning_rate": 5.066637871259201e-06, - "loss": 69.1066, - "step": 67590 - }, - { - "epoch": 0.5592091657360301, - "grad_norm": 817.7105102539062, - "learning_rate": 5.065227626817798e-06, - "loss": 105.476, - "step": 67600 - }, - { - "epoch": 0.5592918889853994, - "grad_norm": 664.9489135742188, - "learning_rate": 5.063817377186531e-06, - "loss": 67.6632, - "step": 67610 - }, - { - "epoch": 0.5593746122347686, - "grad_norm": 1241.629638671875, - "learning_rate": 5.062407122477609e-06, - "loss": 87.1826, - "step": 67620 - }, - { - "epoch": 0.5594573354841378, - "grad_norm": 1491.961181640625, - "learning_rate": 5.060996862803239e-06, - "loss": 78.7851, - "step": 67630 - }, - { - "epoch": 0.5595400587335071, - "grad_norm": 981.352294921875, - "learning_rate": 5.0595865982756284e-06, - "loss": 106.6009, - "step": 67640 - }, - { - "epoch": 0.5596227819828763, - "grad_norm": 695.9772338867188, - "learning_rate": 5.0581763290069865e-06, - "loss": 85.9514, - "step": 67650 - }, - { - "epoch": 0.5597055052322455, - "grad_norm": 1229.713134765625, - "learning_rate": 5.05676605510952e-06, - "loss": 105.0807, - "step": 67660 - }, - { - "epoch": 0.5597882284816148, - "grad_norm": 534.1005859375, - "learning_rate": 5.055355776695437e-06, - "loss": 93.8244, - "step": 67670 - }, - { - "epoch": 0.559870951730984, - "grad_norm": 911.0359497070312, - "learning_rate": 5.0539454938769525e-06, - "loss": 101.3288, - "step": 67680 - }, - { - "epoch": 0.5599536749803532, - "grad_norm": 918.2195434570312, - "learning_rate": 5.052535206766271e-06, - "loss": 89.4984, - "step": 67690 - }, - { - "epoch": 0.5600363982297225, - "grad_norm": 1059.2137451171875, - "learning_rate": 5.051124915475604e-06, - "loss": 99.4896, - "step": 67700 - }, - { - "epoch": 0.5601191214790917, - "grad_norm": 1157.1102294921875, - "learning_rate": 5.049714620117162e-06, - "loss": 105.2878, - "step": 67710 - }, - { - "epoch": 0.5602018447284609, - "grad_norm": 666.695068359375, - "learning_rate": 5.0483043208031575e-06, - "loss": 109.8217, - "step": 67720 - }, - { - "epoch": 0.5602845679778302, - "grad_norm": 1238.229736328125, - "learning_rate": 5.0468940176458e-06, - "loss": 94.0387, - "step": 67730 - }, - { - "epoch": 0.5603672912271994, - "grad_norm": 2138.0751953125, - "learning_rate": 5.045483710757298e-06, - "loss": 95.2673, - "step": 67740 - }, - { - "epoch": 0.5604500144765686, - "grad_norm": 914.434326171875, - "learning_rate": 5.044073400249867e-06, - "loss": 92.1393, - "step": 67750 - }, - { - "epoch": 0.5605327377259379, - "grad_norm": 1934.4398193359375, - "learning_rate": 5.0426630862357176e-06, - "loss": 142.6514, - "step": 67760 - }, - { - "epoch": 0.5606154609753071, - "grad_norm": 1084.5147705078125, - "learning_rate": 5.041252768827064e-06, - "loss": 133.2949, - "step": 67770 - }, - { - "epoch": 0.5606981842246763, - "grad_norm": 748.2398071289062, - "learning_rate": 5.039842448136115e-06, - "loss": 69.7304, - "step": 67780 - }, - { - "epoch": 0.5607809074740456, - "grad_norm": 804.4089965820312, - "learning_rate": 5.038432124275087e-06, - "loss": 88.9984, - "step": 67790 - }, - { - "epoch": 0.5608636307234148, - "grad_norm": 535.5092163085938, - "learning_rate": 5.03702179735619e-06, - "loss": 88.9917, - "step": 67800 - }, - { - "epoch": 0.560946353972784, - "grad_norm": 635.5907592773438, - "learning_rate": 5.035611467491638e-06, - "loss": 92.2593, - "step": 67810 - }, - { - "epoch": 0.5610290772221533, - "grad_norm": 851.3953247070312, - "learning_rate": 5.034201134793646e-06, - "loss": 126.7755, - "step": 67820 - }, - { - "epoch": 0.5611118004715225, - "grad_norm": 780.0504760742188, - "learning_rate": 5.032790799374426e-06, - "loss": 131.121, - "step": 67830 - }, - { - "epoch": 0.5611945237208917, - "grad_norm": 1291.3529052734375, - "learning_rate": 5.0313804613461925e-06, - "loss": 110.4924, - "step": 67840 - }, - { - "epoch": 0.561277246970261, - "grad_norm": 860.24072265625, - "learning_rate": 5.0299701208211605e-06, - "loss": 83.46, - "step": 67850 - }, - { - "epoch": 0.5613599702196302, - "grad_norm": 1199.1927490234375, - "learning_rate": 5.028559777911543e-06, - "loss": 107.3534, - "step": 67860 - }, - { - "epoch": 0.5614426934689994, - "grad_norm": 613.6046752929688, - "learning_rate": 5.027149432729555e-06, - "loss": 88.3334, - "step": 67870 - }, - { - "epoch": 0.5615254167183688, - "grad_norm": 1034.6842041015625, - "learning_rate": 5.025739085387411e-06, - "loss": 106.7181, - "step": 67880 - }, - { - "epoch": 0.561608139967738, - "grad_norm": 968.4409790039062, - "learning_rate": 5.024328735997327e-06, - "loss": 73.4767, - "step": 67890 - }, - { - "epoch": 0.5616908632171072, - "grad_norm": 636.9981689453125, - "learning_rate": 5.0229183846715154e-06, - "loss": 80.4445, - "step": 67900 - }, - { - "epoch": 0.5617735864664763, - "grad_norm": 762.04931640625, - "learning_rate": 5.021508031522195e-06, - "loss": 106.2328, - "step": 67910 - }, - { - "epoch": 0.5618563097158457, - "grad_norm": 1687.2122802734375, - "learning_rate": 5.0200976766615785e-06, - "loss": 103.8904, - "step": 67920 - }, - { - "epoch": 0.5619390329652149, - "grad_norm": 1144.8087158203125, - "learning_rate": 5.018687320201882e-06, - "loss": 88.8534, - "step": 67930 - }, - { - "epoch": 0.5620217562145841, - "grad_norm": 1060.1658935546875, - "learning_rate": 5.017276962255323e-06, - "loss": 117.6395, - "step": 67940 - }, - { - "epoch": 0.5621044794639534, - "grad_norm": 541.5599975585938, - "learning_rate": 5.015866602934112e-06, - "loss": 81.2837, - "step": 67950 - }, - { - "epoch": 0.5621872027133226, - "grad_norm": 1171.819091796875, - "learning_rate": 5.01445624235047e-06, - "loss": 122.8472, - "step": 67960 - }, - { - "epoch": 0.5622699259626918, - "grad_norm": 569.9471435546875, - "learning_rate": 5.013045880616612e-06, - "loss": 99.2828, - "step": 67970 - }, - { - "epoch": 0.5623526492120611, - "grad_norm": 857.0740356445312, - "learning_rate": 5.011635517844753e-06, - "loss": 79.8719, - "step": 67980 - }, - { - "epoch": 0.5624353724614303, - "grad_norm": 1649.099853515625, - "learning_rate": 5.010225154147107e-06, - "loss": 116.2859, - "step": 67990 - }, - { - "epoch": 0.5625180957107995, - "grad_norm": 991.4071044921875, - "learning_rate": 5.008814789635894e-06, - "loss": 84.7862, - "step": 68000 } ], "logging_steps": 10,