{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.5745257452574526, "eval_steps": 500, "global_step": 1484, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00038714672861014324, "grad_norm": 0.048257023096084595, "learning_rate": 2e-05, "loss": 0.0394, "step": 1 }, { "epoch": 0.0007742934572202865, "grad_norm": 0.048200540244579315, "learning_rate": 4e-05, "loss": 0.048, "step": 2 }, { "epoch": 0.0011614401858304297, "grad_norm": 0.10289892554283142, "learning_rate": 6e-05, "loss": 0.0864, "step": 3 }, { "epoch": 0.001548586914440573, "grad_norm": 0.10379760712385178, "learning_rate": 8e-05, "loss": 0.0887, "step": 4 }, { "epoch": 0.0019357336430507162, "grad_norm": 0.14089909195899963, "learning_rate": 0.0001, "loss": 0.1072, "step": 5 }, { "epoch": 0.0023228803716608595, "grad_norm": 0.16513778269290924, "learning_rate": 9.999988720152121e-05, "loss": 0.1346, "step": 6 }, { "epoch": 0.0027100271002710027, "grad_norm": 0.17860881984233856, "learning_rate": 9.999954880659377e-05, "loss": 0.1306, "step": 7 }, { "epoch": 0.003097173828881146, "grad_norm": 0.2414817363023758, "learning_rate": 9.999898481674448e-05, "loss": 0.1593, "step": 8 }, { "epoch": 0.003484320557491289, "grad_norm": 0.2995632588863373, "learning_rate": 9.999819523451806e-05, "loss": 0.1684, "step": 9 }, { "epoch": 0.0038714672861014324, "grad_norm": 0.2831806242465973, "learning_rate": 9.999718006347704e-05, "loss": 0.1538, "step": 10 }, { "epoch": 0.004258614014711576, "grad_norm": 0.3746040463447571, "learning_rate": 9.999593930820181e-05, "loss": 0.1573, "step": 11 }, { "epoch": 0.004645760743321719, "grad_norm": 0.5143625736236572, "learning_rate": 9.999447297429058e-05, "loss": 0.1596, "step": 12 }, { "epoch": 0.005032907471931862, "grad_norm": 0.5252905488014221, "learning_rate": 9.999278106835936e-05, "loss": 0.1503, "step": 13 }, { "epoch": 0.005420054200542005, "grad_norm": 0.43592771887779236, "learning_rate": 9.999086359804195e-05, "loss": 0.1396, "step": 14 }, { "epoch": 0.005807200929152149, "grad_norm": 0.7996945381164551, "learning_rate": 9.998872057198982e-05, "loss": 0.1226, "step": 15 }, { "epoch": 0.006194347657762292, "grad_norm": 0.8167794942855835, "learning_rate": 9.998635199987219e-05, "loss": 0.1368, "step": 16 }, { "epoch": 0.006581494386372435, "grad_norm": 0.8105260729789734, "learning_rate": 9.998375789237593e-05, "loss": 0.1199, "step": 17 }, { "epoch": 0.006968641114982578, "grad_norm": 0.7661561369895935, "learning_rate": 9.998093826120548e-05, "loss": 0.1011, "step": 18 }, { "epoch": 0.007355787843592722, "grad_norm": 0.6599623560905457, "learning_rate": 9.997789311908284e-05, "loss": 0.0966, "step": 19 }, { "epoch": 0.007742934572202865, "grad_norm": 0.8486294150352478, "learning_rate": 9.997462247974752e-05, "loss": 0.0979, "step": 20 }, { "epoch": 0.008130081300813009, "grad_norm": 0.6950157284736633, "learning_rate": 9.997112635795643e-05, "loss": 0.0894, "step": 21 }, { "epoch": 0.008517228029423151, "grad_norm": 0.4815932512283325, "learning_rate": 9.996740476948385e-05, "loss": 0.0649, "step": 22 }, { "epoch": 0.008904374758033295, "grad_norm": 0.41461682319641113, "learning_rate": 9.996345773112139e-05, "loss": 0.0704, "step": 23 }, { "epoch": 0.009291521486643438, "grad_norm": 0.42949068546295166, "learning_rate": 9.995928526067784e-05, "loss": 0.0715, "step": 24 }, { "epoch": 0.009678668215253582, "grad_norm": 0.3920012414455414, "learning_rate": 9.99548873769791e-05, "loss": 0.048, "step": 25 }, { "epoch": 0.010065814943863724, "grad_norm": 0.261152058839798, "learning_rate": 9.995026409986821e-05, "loss": 0.0343, "step": 26 }, { "epoch": 0.010452961672473868, "grad_norm": 0.3788264989852905, "learning_rate": 9.994541545020509e-05, "loss": 0.0403, "step": 27 }, { "epoch": 0.01084010840108401, "grad_norm": 0.8691251873970032, "learning_rate": 9.994034144986653e-05, "loss": 0.063, "step": 28 }, { "epoch": 0.011227255129694155, "grad_norm": 0.5656071305274963, "learning_rate": 9.993504212174613e-05, "loss": 0.0415, "step": 29 }, { "epoch": 0.011614401858304297, "grad_norm": 0.42919209599494934, "learning_rate": 9.992951748975412e-05, "loss": 0.0229, "step": 30 }, { "epoch": 0.012001548586914441, "grad_norm": 0.46598193049430847, "learning_rate": 9.992376757881735e-05, "loss": 0.0208, "step": 31 }, { "epoch": 0.012388695315524584, "grad_norm": 0.5426745414733887, "learning_rate": 9.991779241487899e-05, "loss": 0.0188, "step": 32 }, { "epoch": 0.012775842044134728, "grad_norm": 0.8520743250846863, "learning_rate": 9.991159202489871e-05, "loss": 0.0278, "step": 33 }, { "epoch": 0.01316298877274487, "grad_norm": 0.6999525427818298, "learning_rate": 9.990516643685222e-05, "loss": 0.0145, "step": 34 }, { "epoch": 0.013550135501355014, "grad_norm": 1.101209282875061, "learning_rate": 9.989851567973139e-05, "loss": 0.0096, "step": 35 }, { "epoch": 0.013937282229965157, "grad_norm": 1.42173433303833, "learning_rate": 9.989163978354408e-05, "loss": 0.01, "step": 36 }, { "epoch": 0.014324428958575301, "grad_norm": 0.1809816062450409, "learning_rate": 9.988453877931386e-05, "loss": 0.0026, "step": 37 }, { "epoch": 0.014711575687185443, "grad_norm": 0.5789023637771606, "learning_rate": 9.987721269908006e-05, "loss": 0.0264, "step": 38 }, { "epoch": 0.015098722415795587, "grad_norm": 1.4514374732971191, "learning_rate": 9.98696615758975e-05, "loss": 0.0197, "step": 39 }, { "epoch": 0.01548586914440573, "grad_norm": 1.6745890378952026, "learning_rate": 9.986188544383639e-05, "loss": 0.0398, "step": 40 }, { "epoch": 0.015873015873015872, "grad_norm": 0.31723207235336304, "learning_rate": 9.985388433798215e-05, "loss": 0.0207, "step": 41 }, { "epoch": 0.016260162601626018, "grad_norm": 0.8627777695655823, "learning_rate": 9.984565829443531e-05, "loss": 0.0182, "step": 42 }, { "epoch": 0.01664730933023616, "grad_norm": 0.023634975776076317, "learning_rate": 9.983720735031125e-05, "loss": 0.0004, "step": 43 }, { "epoch": 0.017034456058846303, "grad_norm": 0.45953628420829773, "learning_rate": 9.982853154374014e-05, "loss": 0.0111, "step": 44 }, { "epoch": 0.017421602787456445, "grad_norm": 0.2831597328186035, "learning_rate": 9.98196309138667e-05, "loss": 0.0109, "step": 45 }, { "epoch": 0.01780874951606659, "grad_norm": 0.7697696089744568, "learning_rate": 9.981050550084998e-05, "loss": 0.0427, "step": 46 }, { "epoch": 0.018195896244676733, "grad_norm": 0.5054185390472412, "learning_rate": 9.980115534586335e-05, "loss": 0.0093, "step": 47 }, { "epoch": 0.018583042973286876, "grad_norm": 0.16788819432258606, "learning_rate": 9.979158049109407e-05, "loss": 0.0024, "step": 48 }, { "epoch": 0.018970189701897018, "grad_norm": 2.8662874698638916, "learning_rate": 9.978178097974337e-05, "loss": 0.0919, "step": 49 }, { "epoch": 0.019357336430507164, "grad_norm": 2.2201218605041504, "learning_rate": 9.977175685602602e-05, "loss": 0.2433, "step": 50 }, { "epoch": 0.019744483159117306, "grad_norm": 0.08052156120538712, "learning_rate": 9.976150816517024e-05, "loss": 0.0113, "step": 51 }, { "epoch": 0.02013162988772745, "grad_norm": 0.10142353922128677, "learning_rate": 9.975103495341753e-05, "loss": 0.0095, "step": 52 }, { "epoch": 0.02051877661633759, "grad_norm": 0.14654894173145294, "learning_rate": 9.974033726802237e-05, "loss": 0.0141, "step": 53 }, { "epoch": 0.020905923344947737, "grad_norm": 0.19414274394512177, "learning_rate": 9.972941515725206e-05, "loss": 0.0129, "step": 54 }, { "epoch": 0.02129307007355788, "grad_norm": 0.24451220035552979, "learning_rate": 9.971826867038651e-05, "loss": 0.0141, "step": 55 }, { "epoch": 0.02168021680216802, "grad_norm": 0.1221214234828949, "learning_rate": 9.970689785771798e-05, "loss": 0.0025, "step": 56 }, { "epoch": 0.022067363530778164, "grad_norm": 0.4560745656490326, "learning_rate": 9.96953027705509e-05, "loss": 0.0127, "step": 57 }, { "epoch": 0.02245451025938831, "grad_norm": 0.2530369758605957, "learning_rate": 9.968348346120159e-05, "loss": 0.0148, "step": 58 }, { "epoch": 0.022841656987998452, "grad_norm": 0.7455848455429077, "learning_rate": 9.967143998299803e-05, "loss": 0.0069, "step": 59 }, { "epoch": 0.023228803716608595, "grad_norm": 0.10115598142147064, "learning_rate": 9.965917239027972e-05, "loss": 0.0094, "step": 60 }, { "epoch": 0.023615950445218737, "grad_norm": 0.15474660694599152, "learning_rate": 9.964668073839724e-05, "loss": 0.0079, "step": 61 }, { "epoch": 0.024003097173828883, "grad_norm": 0.020637422800064087, "learning_rate": 9.963396508371218e-05, "loss": 0.0004, "step": 62 }, { "epoch": 0.024390243902439025, "grad_norm": 0.14796803891658783, "learning_rate": 9.96210254835968e-05, "loss": 0.001, "step": 63 }, { "epoch": 0.024777390631049168, "grad_norm": 0.28956761956214905, "learning_rate": 9.960786199643377e-05, "loss": 0.0013, "step": 64 }, { "epoch": 0.02516453735965931, "grad_norm": 0.020133867859840393, "learning_rate": 9.959447468161598e-05, "loss": 0.0002, "step": 65 }, { "epoch": 0.025551684088269456, "grad_norm": 0.43387070298194885, "learning_rate": 9.958086359954616e-05, "loss": 0.0038, "step": 66 }, { "epoch": 0.025938830816879598, "grad_norm": 0.12942586839199066, "learning_rate": 9.956702881163669e-05, "loss": 0.0018, "step": 67 }, { "epoch": 0.02632597754548974, "grad_norm": 0.24969282746315002, "learning_rate": 9.955297038030927e-05, "loss": 0.0067, "step": 68 }, { "epoch": 0.026713124274099883, "grad_norm": 0.06376656889915466, "learning_rate": 9.953868836899472e-05, "loss": 0.0008, "step": 69 }, { "epoch": 0.02710027100271003, "grad_norm": 0.03575746715068817, "learning_rate": 9.952418284213257e-05, "loss": 0.0009, "step": 70 }, { "epoch": 0.02748741773132017, "grad_norm": 0.19091981649398804, "learning_rate": 9.95094538651709e-05, "loss": 0.0046, "step": 71 }, { "epoch": 0.027874564459930314, "grad_norm": 0.03124951757490635, "learning_rate": 9.949450150456596e-05, "loss": 0.0005, "step": 72 }, { "epoch": 0.028261711188540456, "grad_norm": 0.10794883221387863, "learning_rate": 9.947932582778188e-05, "loss": 0.0009, "step": 73 }, { "epoch": 0.028648857917150602, "grad_norm": 0.054995935410261154, "learning_rate": 9.946392690329039e-05, "loss": 0.0006, "step": 74 }, { "epoch": 0.029036004645760744, "grad_norm": 0.12434786558151245, "learning_rate": 9.944830480057049e-05, "loss": 0.0015, "step": 75 }, { "epoch": 0.029423151374370887, "grad_norm": 0.36718836426734924, "learning_rate": 9.943245959010819e-05, "loss": 0.0123, "step": 76 }, { "epoch": 0.02981029810298103, "grad_norm": 0.002867339877411723, "learning_rate": 9.941639134339608e-05, "loss": 0.0001, "step": 77 }, { "epoch": 0.030197444831591175, "grad_norm": 0.02242063172161579, "learning_rate": 9.940010013293313e-05, "loss": 0.0003, "step": 78 }, { "epoch": 0.030584591560201317, "grad_norm": 0.6994889378547668, "learning_rate": 9.938358603222429e-05, "loss": 0.005, "step": 79 }, { "epoch": 0.03097173828881146, "grad_norm": 0.06029118597507477, "learning_rate": 9.936684911578018e-05, "loss": 0.0009, "step": 80 }, { "epoch": 0.0313588850174216, "grad_norm": 0.43654757738113403, "learning_rate": 9.934988945911674e-05, "loss": 0.0041, "step": 81 }, { "epoch": 0.031746031746031744, "grad_norm": 1.1972216367721558, "learning_rate": 9.93327071387549e-05, "loss": 0.0098, "step": 82 }, { "epoch": 0.03213317847464189, "grad_norm": 0.008753047324717045, "learning_rate": 9.931530223222027e-05, "loss": 0.0001, "step": 83 }, { "epoch": 0.032520325203252036, "grad_norm": 0.019141845405101776, "learning_rate": 9.929767481804271e-05, "loss": 0.0003, "step": 84 }, { "epoch": 0.03290747193186218, "grad_norm": 0.13906456530094147, "learning_rate": 9.927982497575605e-05, "loss": 0.0017, "step": 85 }, { "epoch": 0.03329461866047232, "grad_norm": 0.018300075083971024, "learning_rate": 9.926175278589769e-05, "loss": 0.0004, "step": 86 }, { "epoch": 0.03368176538908246, "grad_norm": 0.4637622535228729, "learning_rate": 9.924345833000824e-05, "loss": 0.0056, "step": 87 }, { "epoch": 0.034068912117692605, "grad_norm": 0.00441858172416687, "learning_rate": 9.922494169063119e-05, "loss": 0.0001, "step": 88 }, { "epoch": 0.03445605884630275, "grad_norm": 0.0403720885515213, "learning_rate": 9.920620295131247e-05, "loss": 0.0007, "step": 89 }, { "epoch": 0.03484320557491289, "grad_norm": 0.17813202738761902, "learning_rate": 9.918724219660013e-05, "loss": 0.0015, "step": 90 }, { "epoch": 0.03523035230352303, "grad_norm": 0.058298081159591675, "learning_rate": 9.916805951204397e-05, "loss": 0.0004, "step": 91 }, { "epoch": 0.03561749903213318, "grad_norm": 0.29420581459999084, "learning_rate": 9.91486549841951e-05, "loss": 0.0176, "step": 92 }, { "epoch": 0.036004645760743324, "grad_norm": 0.005454974714666605, "learning_rate": 9.912902870060552e-05, "loss": 0.0001, "step": 93 }, { "epoch": 0.03639179248935347, "grad_norm": 0.01605985127389431, "learning_rate": 9.910918074982787e-05, "loss": 0.0002, "step": 94 }, { "epoch": 0.03677893921796361, "grad_norm": 0.08237680792808533, "learning_rate": 9.908911122141486e-05, "loss": 0.0011, "step": 95 }, { "epoch": 0.03716608594657375, "grad_norm": 0.6426149606704712, "learning_rate": 9.906882020591903e-05, "loss": 0.0214, "step": 96 }, { "epoch": 0.037553232675183894, "grad_norm": 0.21502873301506042, "learning_rate": 9.904830779489216e-05, "loss": 0.0019, "step": 97 }, { "epoch": 0.037940379403794036, "grad_norm": 0.7887445092201233, "learning_rate": 9.902757408088503e-05, "loss": 0.0369, "step": 98 }, { "epoch": 0.03832752613240418, "grad_norm": 0.013632730580866337, "learning_rate": 9.900661915744689e-05, "loss": 0.0001, "step": 99 }, { "epoch": 0.03871467286101433, "grad_norm": 5.337656021118164, "learning_rate": 9.898544311912508e-05, "loss": 0.163, "step": 100 }, { "epoch": 0.03910181958962447, "grad_norm": 0.07259741425514221, "learning_rate": 9.896404606146456e-05, "loss": 0.0103, "step": 101 }, { "epoch": 0.03948896631823461, "grad_norm": 0.1117100939154625, "learning_rate": 9.894242808100762e-05, "loss": 0.0058, "step": 102 }, { "epoch": 0.039876113046844755, "grad_norm": 0.15869347751140594, "learning_rate": 9.892058927529322e-05, "loss": 0.009, "step": 103 }, { "epoch": 0.0402632597754549, "grad_norm": 0.15626105666160583, "learning_rate": 9.889852974285673e-05, "loss": 0.0049, "step": 104 }, { "epoch": 0.04065040650406504, "grad_norm": 0.20995132625102997, "learning_rate": 9.887624958322946e-05, "loss": 0.0105, "step": 105 }, { "epoch": 0.04103755323267518, "grad_norm": 0.1864119917154312, "learning_rate": 9.885374889693809e-05, "loss": 0.0128, "step": 106 }, { "epoch": 0.041424699961285325, "grad_norm": 0.16312390565872192, "learning_rate": 9.883102778550434e-05, "loss": 0.0106, "step": 107 }, { "epoch": 0.041811846689895474, "grad_norm": 0.07751648128032684, "learning_rate": 9.880808635144452e-05, "loss": 0.0017, "step": 108 }, { "epoch": 0.042198993418505616, "grad_norm": 0.014390280470252037, "learning_rate": 9.878492469826896e-05, "loss": 0.0003, "step": 109 }, { "epoch": 0.04258614014711576, "grad_norm": 0.1289769858121872, "learning_rate": 9.876154293048163e-05, "loss": 0.0026, "step": 110 }, { "epoch": 0.0429732868757259, "grad_norm": 0.25492918491363525, "learning_rate": 9.873794115357966e-05, "loss": 0.0093, "step": 111 }, { "epoch": 0.04336043360433604, "grad_norm": 0.05008332058787346, "learning_rate": 9.87141194740528e-05, "loss": 0.0011, "step": 112 }, { "epoch": 0.043747580332946186, "grad_norm": 0.24651627242565155, "learning_rate": 9.869007799938305e-05, "loss": 0.0034, "step": 113 }, { "epoch": 0.04413472706155633, "grad_norm": 0.004503064788877964, "learning_rate": 9.86658168380441e-05, "loss": 0.0001, "step": 114 }, { "epoch": 0.04452187379016647, "grad_norm": 0.02013341151177883, "learning_rate": 9.864133609950078e-05, "loss": 0.0003, "step": 115 }, { "epoch": 0.04490902051877662, "grad_norm": 0.4879964590072632, "learning_rate": 9.861663589420871e-05, "loss": 0.019, "step": 116 }, { "epoch": 0.04529616724738676, "grad_norm": 0.4585297405719757, "learning_rate": 9.859171633361372e-05, "loss": 0.022, "step": 117 }, { "epoch": 0.045683313975996905, "grad_norm": 0.45999088883399963, "learning_rate": 9.856657753015135e-05, "loss": 0.0196, "step": 118 }, { "epoch": 0.04607046070460705, "grad_norm": 0.15138907730579376, "learning_rate": 9.854121959724636e-05, "loss": 0.0004, "step": 119 }, { "epoch": 0.04645760743321719, "grad_norm": 0.05731305480003357, "learning_rate": 9.851564264931219e-05, "loss": 0.0004, "step": 120 }, { "epoch": 0.04684475416182733, "grad_norm": 0.6728512644767761, "learning_rate": 9.848984680175049e-05, "loss": 0.0094, "step": 121 }, { "epoch": 0.047231900890437474, "grad_norm": 0.009215263649821281, "learning_rate": 9.846383217095053e-05, "loss": 0.0002, "step": 122 }, { "epoch": 0.047619047619047616, "grad_norm": 0.006808656267821789, "learning_rate": 9.843759887428873e-05, "loss": 0.0001, "step": 123 }, { "epoch": 0.048006194347657766, "grad_norm": 0.005876703653484583, "learning_rate": 9.841114703012817e-05, "loss": 0.0001, "step": 124 }, { "epoch": 0.04839334107626791, "grad_norm": 0.019576961174607277, "learning_rate": 9.838447675781794e-05, "loss": 0.0003, "step": 125 }, { "epoch": 0.04878048780487805, "grad_norm": 0.008768095634877682, "learning_rate": 9.835758817769269e-05, "loss": 0.0002, "step": 126 }, { "epoch": 0.04916763453348819, "grad_norm": 0.023376451805233955, "learning_rate": 9.833048141107203e-05, "loss": 0.0004, "step": 127 }, { "epoch": 0.049554781262098335, "grad_norm": 0.065276600420475, "learning_rate": 9.830315658026011e-05, "loss": 0.0008, "step": 128 }, { "epoch": 0.04994192799070848, "grad_norm": 0.240321084856987, "learning_rate": 9.827561380854482e-05, "loss": 0.0014, "step": 129 }, { "epoch": 0.05032907471931862, "grad_norm": 0.01600070670247078, "learning_rate": 9.824785322019753e-05, "loss": 0.0002, "step": 130 }, { "epoch": 0.05071622144792876, "grad_norm": 0.009562276303768158, "learning_rate": 9.821987494047229e-05, "loss": 0.0003, "step": 131 }, { "epoch": 0.05110336817653891, "grad_norm": 0.009312965907156467, "learning_rate": 9.819167909560543e-05, "loss": 0.0002, "step": 132 }, { "epoch": 0.051490514905149054, "grad_norm": 0.016949571669101715, "learning_rate": 9.816326581281485e-05, "loss": 0.0003, "step": 133 }, { "epoch": 0.051877661633759196, "grad_norm": 0.011933255940675735, "learning_rate": 9.813463522029957e-05, "loss": 0.0003, "step": 134 }, { "epoch": 0.05226480836236934, "grad_norm": 0.03951014205813408, "learning_rate": 9.810578744723911e-05, "loss": 0.0007, "step": 135 }, { "epoch": 0.05265195509097948, "grad_norm": 0.008960571140050888, "learning_rate": 9.807672262379282e-05, "loss": 0.0001, "step": 136 }, { "epoch": 0.053039101819589624, "grad_norm": 0.1299860030412674, "learning_rate": 9.804744088109943e-05, "loss": 0.0038, "step": 137 }, { "epoch": 0.053426248548199766, "grad_norm": 0.036717738956213, "learning_rate": 9.801794235127636e-05, "loss": 0.0004, "step": 138 }, { "epoch": 0.05381339527680991, "grad_norm": 0.008997570723295212, "learning_rate": 9.798822716741925e-05, "loss": 0.0002, "step": 139 }, { "epoch": 0.05420054200542006, "grad_norm": 0.03509435057640076, "learning_rate": 9.795829546360114e-05, "loss": 0.0005, "step": 140 }, { "epoch": 0.0545876887340302, "grad_norm": 0.034250613301992416, "learning_rate": 9.792814737487207e-05, "loss": 0.0006, "step": 141 }, { "epoch": 0.05497483546264034, "grad_norm": 0.0243786983191967, "learning_rate": 9.789778303725838e-05, "loss": 0.0005, "step": 142 }, { "epoch": 0.055361982191250485, "grad_norm": 0.0226341113448143, "learning_rate": 9.786720258776213e-05, "loss": 0.0004, "step": 143 }, { "epoch": 0.05574912891986063, "grad_norm": 0.23691046237945557, "learning_rate": 9.783640616436044e-05, "loss": 0.0013, "step": 144 }, { "epoch": 0.05613627564847077, "grad_norm": 0.010031294077634811, "learning_rate": 9.780539390600489e-05, "loss": 0.0002, "step": 145 }, { "epoch": 0.05652342237708091, "grad_norm": 0.07630310952663422, "learning_rate": 9.777416595262091e-05, "loss": 0.0013, "step": 146 }, { "epoch": 0.056910569105691054, "grad_norm": 0.25514844059944153, "learning_rate": 9.774272244510712e-05, "loss": 0.0045, "step": 147 }, { "epoch": 0.057297715834301204, "grad_norm": 0.03640531376004219, "learning_rate": 9.771106352533472e-05, "loss": 0.0007, "step": 148 }, { "epoch": 0.057684862562911346, "grad_norm": 0.07202103734016418, "learning_rate": 9.767918933614682e-05, "loss": 0.0016, "step": 149 }, { "epoch": 0.05807200929152149, "grad_norm": 0.12926505506038666, "learning_rate": 9.764710002135784e-05, "loss": 0.0023, "step": 150 }, { "epoch": 0.05845915602013163, "grad_norm": 0.05395910516381264, "learning_rate": 9.761479572575279e-05, "loss": 0.0121, "step": 151 }, { "epoch": 0.05884630274874177, "grad_norm": 0.06644894927740097, "learning_rate": 9.75822765950867e-05, "loss": 0.0046, "step": 152 }, { "epoch": 0.059233449477351915, "grad_norm": 0.02791532874107361, "learning_rate": 9.754954277608391e-05, "loss": 0.0009, "step": 153 }, { "epoch": 0.05962059620596206, "grad_norm": 0.03686601668596268, "learning_rate": 9.751659441643742e-05, "loss": 0.0016, "step": 154 }, { "epoch": 0.0600077429345722, "grad_norm": 0.04866962507367134, "learning_rate": 9.748343166480823e-05, "loss": 0.0016, "step": 155 }, { "epoch": 0.06039488966318235, "grad_norm": 0.06715308874845505, "learning_rate": 9.745005467082464e-05, "loss": 0.0011, "step": 156 }, { "epoch": 0.06078203639179249, "grad_norm": 0.10314395278692245, "learning_rate": 9.741646358508164e-05, "loss": 0.0038, "step": 157 }, { "epoch": 0.061169183120402634, "grad_norm": 0.02877042070031166, "learning_rate": 9.738265855914013e-05, "loss": 0.0005, "step": 158 }, { "epoch": 0.06155632984901278, "grad_norm": 0.07983982563018799, "learning_rate": 9.734863974552635e-05, "loss": 0.0019, "step": 159 }, { "epoch": 0.06194347657762292, "grad_norm": 0.006283288821578026, "learning_rate": 9.731440729773114e-05, "loss": 0.0001, "step": 160 }, { "epoch": 0.06233062330623306, "grad_norm": 0.004482879303395748, "learning_rate": 9.727996137020918e-05, "loss": 0.0001, "step": 161 }, { "epoch": 0.0627177700348432, "grad_norm": 0.0071263727732002735, "learning_rate": 9.72453021183784e-05, "loss": 0.0001, "step": 162 }, { "epoch": 0.06310491676345335, "grad_norm": 0.0014257959555834532, "learning_rate": 9.721042969861928e-05, "loss": 0.0, "step": 163 }, { "epoch": 0.06349206349206349, "grad_norm": 0.06279278546571732, "learning_rate": 9.717534426827404e-05, "loss": 0.0013, "step": 164 }, { "epoch": 0.06387921022067364, "grad_norm": 0.10048985481262207, "learning_rate": 9.714004598564598e-05, "loss": 0.0015, "step": 165 }, { "epoch": 0.06426635694928377, "grad_norm": 0.010703545063734055, "learning_rate": 9.710453500999882e-05, "loss": 0.0002, "step": 166 }, { "epoch": 0.06465350367789392, "grad_norm": 0.0008502176497131586, "learning_rate": 9.706881150155591e-05, "loss": 0.0, "step": 167 }, { "epoch": 0.06504065040650407, "grad_norm": 0.035037811845541, "learning_rate": 9.703287562149959e-05, "loss": 0.0003, "step": 168 }, { "epoch": 0.06542779713511421, "grad_norm": 0.0808122381567955, "learning_rate": 9.69967275319703e-05, "loss": 0.0007, "step": 169 }, { "epoch": 0.06581494386372436, "grad_norm": 0.0047109900042414665, "learning_rate": 9.696036739606606e-05, "loss": 0.0001, "step": 170 }, { "epoch": 0.06620209059233449, "grad_norm": 0.30203601717948914, "learning_rate": 9.692379537784157e-05, "loss": 0.0048, "step": 171 }, { "epoch": 0.06658923732094464, "grad_norm": 0.0027131803799420595, "learning_rate": 9.688701164230758e-05, "loss": 0.0001, "step": 172 }, { "epoch": 0.06697638404955478, "grad_norm": 0.002335549332201481, "learning_rate": 9.685001635543005e-05, "loss": 0.0001, "step": 173 }, { "epoch": 0.06736353077816493, "grad_norm": 0.0023098927922546864, "learning_rate": 9.681280968412947e-05, "loss": 0.0001, "step": 174 }, { "epoch": 0.06775067750677506, "grad_norm": 0.001849909545853734, "learning_rate": 9.677539179628005e-05, "loss": 0.0, "step": 175 }, { "epoch": 0.06813782423538521, "grad_norm": 0.32040995359420776, "learning_rate": 9.673776286070906e-05, "loss": 0.0028, "step": 176 }, { "epoch": 0.06852497096399536, "grad_norm": 0.0077479127794504166, "learning_rate": 9.669992304719595e-05, "loss": 0.0001, "step": 177 }, { "epoch": 0.0689121176926055, "grad_norm": 0.020282207056879997, "learning_rate": 9.666187252647165e-05, "loss": 0.0002, "step": 178 }, { "epoch": 0.06929926442121565, "grad_norm": 0.00533844530582428, "learning_rate": 9.662361147021779e-05, "loss": 0.0001, "step": 179 }, { "epoch": 0.06968641114982578, "grad_norm": 0.0043952250853180885, "learning_rate": 9.658514005106596e-05, "loss": 0.0001, "step": 180 }, { "epoch": 0.07007355787843593, "grad_norm": 0.09598931670188904, "learning_rate": 9.654645844259683e-05, "loss": 0.0023, "step": 181 }, { "epoch": 0.07046070460704607, "grad_norm": 0.006287871859967709, "learning_rate": 9.650756681933947e-05, "loss": 0.0001, "step": 182 }, { "epoch": 0.07084785133565621, "grad_norm": 0.03524954989552498, "learning_rate": 9.646846535677055e-05, "loss": 0.0002, "step": 183 }, { "epoch": 0.07123499806426636, "grad_norm": 0.005163317546248436, "learning_rate": 9.642915423131342e-05, "loss": 0.0001, "step": 184 }, { "epoch": 0.0716221447928765, "grad_norm": 0.03801601007580757, "learning_rate": 9.638963362033756e-05, "loss": 0.0009, "step": 185 }, { "epoch": 0.07200929152148665, "grad_norm": 0.009458637796342373, "learning_rate": 9.634990370215752e-05, "loss": 0.0001, "step": 186 }, { "epoch": 0.07239643825009678, "grad_norm": 0.07404721528291702, "learning_rate": 9.630996465603228e-05, "loss": 0.0011, "step": 187 }, { "epoch": 0.07278358497870693, "grad_norm": 0.1175776869058609, "learning_rate": 9.626981666216439e-05, "loss": 0.001, "step": 188 }, { "epoch": 0.07317073170731707, "grad_norm": 0.07584076374769211, "learning_rate": 9.622945990169916e-05, "loss": 0.0007, "step": 189 }, { "epoch": 0.07355787843592722, "grad_norm": 0.01918666809797287, "learning_rate": 9.618889455672384e-05, "loss": 0.0001, "step": 190 }, { "epoch": 0.07394502516453735, "grad_norm": 1.1152503490447998, "learning_rate": 9.614812081026678e-05, "loss": 0.0111, "step": 191 }, { "epoch": 0.0743321718931475, "grad_norm": 0.0046646371483802795, "learning_rate": 9.610713884629666e-05, "loss": 0.0, "step": 192 }, { "epoch": 0.07471931862175765, "grad_norm": 0.0027072590310126543, "learning_rate": 9.60659488497216e-05, "loss": 0.0001, "step": 193 }, { "epoch": 0.07510646535036779, "grad_norm": 0.4910131096839905, "learning_rate": 9.602455100638836e-05, "loss": 0.0041, "step": 194 }, { "epoch": 0.07549361207897794, "grad_norm": 0.0024883951991796494, "learning_rate": 9.598294550308149e-05, "loss": 0.0, "step": 195 }, { "epoch": 0.07588075880758807, "grad_norm": 0.012495968490839005, "learning_rate": 9.594113252752248e-05, "loss": 0.0001, "step": 196 }, { "epoch": 0.07626790553619822, "grad_norm": 0.012010253965854645, "learning_rate": 9.589911226836896e-05, "loss": 0.0001, "step": 197 }, { "epoch": 0.07665505226480836, "grad_norm": 0.019101502373814583, "learning_rate": 9.585688491521374e-05, "loss": 0.0002, "step": 198 }, { "epoch": 0.0770421989934185, "grad_norm": 0.018270643427968025, "learning_rate": 9.58144506585841e-05, "loss": 0.0004, "step": 199 }, { "epoch": 0.07742934572202866, "grad_norm": 1.0924277305603027, "learning_rate": 9.577180968994082e-05, "loss": 0.0119, "step": 200 }, { "epoch": 0.07781649245063879, "grad_norm": 0.05194557085633278, "learning_rate": 9.572896220167735e-05, "loss": 0.0083, "step": 201 }, { "epoch": 0.07820363917924894, "grad_norm": 0.0748182013630867, "learning_rate": 9.568590838711895e-05, "loss": 0.0026, "step": 202 }, { "epoch": 0.07859078590785908, "grad_norm": 0.04829065874218941, "learning_rate": 9.564264844052182e-05, "loss": 0.0009, "step": 203 }, { "epoch": 0.07897793263646923, "grad_norm": 0.07577355951070786, "learning_rate": 9.559918255707219e-05, "loss": 0.0031, "step": 204 }, { "epoch": 0.07936507936507936, "grad_norm": 0.09440341591835022, "learning_rate": 9.555551093288548e-05, "loss": 0.0033, "step": 205 }, { "epoch": 0.07975222609368951, "grad_norm": 0.028996804729104042, "learning_rate": 9.551163376500543e-05, "loss": 0.0002, "step": 206 }, { "epoch": 0.08013937282229965, "grad_norm": 0.12111848592758179, "learning_rate": 9.546755125140313e-05, "loss": 0.0018, "step": 207 }, { "epoch": 0.0805265195509098, "grad_norm": 0.01013265922665596, "learning_rate": 9.542326359097619e-05, "loss": 0.0001, "step": 208 }, { "epoch": 0.08091366627951994, "grad_norm": 0.04623102769255638, "learning_rate": 9.537877098354786e-05, "loss": 0.0007, "step": 209 }, { "epoch": 0.08130081300813008, "grad_norm": 0.0021863309666514397, "learning_rate": 9.533407362986606e-05, "loss": 0.0, "step": 210 }, { "epoch": 0.08168795973674023, "grad_norm": 0.0072447373531758785, "learning_rate": 9.528917173160255e-05, "loss": 0.0001, "step": 211 }, { "epoch": 0.08207510646535036, "grad_norm": 0.011722790077328682, "learning_rate": 9.524406549135194e-05, "loss": 0.0002, "step": 212 }, { "epoch": 0.08246225319396051, "grad_norm": 0.08522871136665344, "learning_rate": 9.519875511263086e-05, "loss": 0.0006, "step": 213 }, { "epoch": 0.08284939992257065, "grad_norm": 0.03702247142791748, "learning_rate": 9.515324079987697e-05, "loss": 0.0007, "step": 214 }, { "epoch": 0.0832365466511808, "grad_norm": 0.004982550162822008, "learning_rate": 9.51075227584481e-05, "loss": 0.0001, "step": 215 }, { "epoch": 0.08362369337979095, "grad_norm": 0.0032351913396269083, "learning_rate": 9.506160119462124e-05, "loss": 0.0, "step": 216 }, { "epoch": 0.08401084010840108, "grad_norm": 0.008003685623407364, "learning_rate": 9.501547631559172e-05, "loss": 0.0001, "step": 217 }, { "epoch": 0.08439798683701123, "grad_norm": 0.21871095895767212, "learning_rate": 9.496914832947215e-05, "loss": 0.0045, "step": 218 }, { "epoch": 0.08478513356562137, "grad_norm": 0.05447891354560852, "learning_rate": 9.492261744529163e-05, "loss": 0.0008, "step": 219 }, { "epoch": 0.08517228029423152, "grad_norm": 0.019385039806365967, "learning_rate": 9.487588387299464e-05, "loss": 0.0003, "step": 220 }, { "epoch": 0.08555942702284165, "grad_norm": 0.009612143971025944, "learning_rate": 9.482894782344025e-05, "loss": 0.0002, "step": 221 }, { "epoch": 0.0859465737514518, "grad_norm": 0.1183067038655281, "learning_rate": 9.478180950840103e-05, "loss": 0.0007, "step": 222 }, { "epoch": 0.08633372048006194, "grad_norm": 0.2591310143470764, "learning_rate": 9.47344691405622e-05, "loss": 0.0008, "step": 223 }, { "epoch": 0.08672086720867209, "grad_norm": 0.0012734796619042754, "learning_rate": 9.468692693352063e-05, "loss": 0.0, "step": 224 }, { "epoch": 0.08710801393728224, "grad_norm": 1.0743658542633057, "learning_rate": 9.463918310178384e-05, "loss": 0.0083, "step": 225 }, { "epoch": 0.08749516066589237, "grad_norm": 0.0012743185507133603, "learning_rate": 9.459123786076912e-05, "loss": 0.0, "step": 226 }, { "epoch": 0.08788230739450252, "grad_norm": 0.007091084029525518, "learning_rate": 9.454309142680247e-05, "loss": 0.0, "step": 227 }, { "epoch": 0.08826945412311266, "grad_norm": 0.16258011758327484, "learning_rate": 9.449474401711766e-05, "loss": 0.0007, "step": 228 }, { "epoch": 0.0886566008517228, "grad_norm": 0.02502388134598732, "learning_rate": 9.444619584985526e-05, "loss": 0.0002, "step": 229 }, { "epoch": 0.08904374758033294, "grad_norm": 0.020117763429880142, "learning_rate": 9.439744714406167e-05, "loss": 0.0001, "step": 230 }, { "epoch": 0.08943089430894309, "grad_norm": 0.025272773578763008, "learning_rate": 9.434849811968807e-05, "loss": 0.0001, "step": 231 }, { "epoch": 0.08981804103755324, "grad_norm": 0.0023280701134353876, "learning_rate": 9.429934899758948e-05, "loss": 0.0, "step": 232 }, { "epoch": 0.09020518776616337, "grad_norm": 0.04557230323553085, "learning_rate": 9.424999999952375e-05, "loss": 0.0003, "step": 233 }, { "epoch": 0.09059233449477352, "grad_norm": 0.3395751714706421, "learning_rate": 9.420045134815056e-05, "loss": 0.0009, "step": 234 }, { "epoch": 0.09097948122338366, "grad_norm": 0.3556441068649292, "learning_rate": 9.41507032670304e-05, "loss": 0.0203, "step": 235 }, { "epoch": 0.09136662795199381, "grad_norm": 0.021049408242106438, "learning_rate": 9.410075598062358e-05, "loss": 0.0001, "step": 236 }, { "epoch": 0.09175377468060394, "grad_norm": 0.0035629859194159508, "learning_rate": 9.405060971428923e-05, "loss": 0.0, "step": 237 }, { "epoch": 0.0921409214092141, "grad_norm": 1.088625431060791, "learning_rate": 9.400026469428424e-05, "loss": 0.003, "step": 238 }, { "epoch": 0.09252806813782423, "grad_norm": 0.34952932596206665, "learning_rate": 9.39497211477623e-05, "loss": 0.0006, "step": 239 }, { "epoch": 0.09291521486643438, "grad_norm": 0.01403273455798626, "learning_rate": 9.38989793027728e-05, "loss": 0.0002, "step": 240 }, { "epoch": 0.09330236159504453, "grad_norm": 0.0016716537065804005, "learning_rate": 9.384803938825988e-05, "loss": 0.0, "step": 241 }, { "epoch": 0.09368950832365466, "grad_norm": 0.006439753342419863, "learning_rate": 9.379690163406128e-05, "loss": 0.0, "step": 242 }, { "epoch": 0.09407665505226481, "grad_norm": 0.0021656707394868135, "learning_rate": 9.374556627090749e-05, "loss": 0.0, "step": 243 }, { "epoch": 0.09446380178087495, "grad_norm": 0.0023138655815273523, "learning_rate": 9.369403353042052e-05, "loss": 0.0, "step": 244 }, { "epoch": 0.0948509485094851, "grad_norm": 0.002230309648439288, "learning_rate": 9.364230364511296e-05, "loss": 0.0001, "step": 245 }, { "epoch": 0.09523809523809523, "grad_norm": 0.014607599936425686, "learning_rate": 9.35903768483869e-05, "loss": 0.0001, "step": 246 }, { "epoch": 0.09562524196670538, "grad_norm": 0.005560693796724081, "learning_rate": 9.353825337453291e-05, "loss": 0.0001, "step": 247 }, { "epoch": 0.09601238869531553, "grad_norm": 0.013218767009675503, "learning_rate": 9.348593345872891e-05, "loss": 0.0002, "step": 248 }, { "epoch": 0.09639953542392567, "grad_norm": 0.05328545346856117, "learning_rate": 9.343341733703919e-05, "loss": 0.0004, "step": 249 }, { "epoch": 0.09678668215253582, "grad_norm": 0.0031676662620157003, "learning_rate": 9.338070524641329e-05, "loss": 0.0001, "step": 250 }, { "epoch": 0.09717382888114595, "grad_norm": 0.050222184509038925, "learning_rate": 9.332779742468496e-05, "loss": 0.0157, "step": 251 }, { "epoch": 0.0975609756097561, "grad_norm": 0.028248561546206474, "learning_rate": 9.327469411057106e-05, "loss": 0.0007, "step": 252 }, { "epoch": 0.09794812233836624, "grad_norm": 0.03661990538239479, "learning_rate": 9.322139554367052e-05, "loss": 0.0005, "step": 253 }, { "epoch": 0.09833526906697639, "grad_norm": 0.04206279292702675, "learning_rate": 9.316790196446324e-05, "loss": 0.0013, "step": 254 }, { "epoch": 0.09872241579558652, "grad_norm": 0.11051138490438461, "learning_rate": 9.311421361430898e-05, "loss": 0.0047, "step": 255 }, { "epoch": 0.09910956252419667, "grad_norm": 0.08012690395116806, "learning_rate": 9.306033073544631e-05, "loss": 0.0006, "step": 256 }, { "epoch": 0.09949670925280682, "grad_norm": 0.05237819254398346, "learning_rate": 9.300625357099151e-05, "loss": 0.0008, "step": 257 }, { "epoch": 0.09988385598141696, "grad_norm": 0.014332195743918419, "learning_rate": 9.295198236493745e-05, "loss": 0.0002, "step": 258 }, { "epoch": 0.1002710027100271, "grad_norm": 0.05660046637058258, "learning_rate": 9.289751736215251e-05, "loss": 0.0004, "step": 259 }, { "epoch": 0.10065814943863724, "grad_norm": 0.09471816569566727, "learning_rate": 9.284285880837946e-05, "loss": 0.0009, "step": 260 }, { "epoch": 0.10104529616724739, "grad_norm": 0.0016173557378351688, "learning_rate": 9.27880069502344e-05, "loss": 0.0, "step": 261 }, { "epoch": 0.10143244289585752, "grad_norm": 0.1207282617688179, "learning_rate": 9.273296203520553e-05, "loss": 0.0009, "step": 262 }, { "epoch": 0.10181958962446767, "grad_norm": 0.0016727615147829056, "learning_rate": 9.267772431165218e-05, "loss": 0.0, "step": 263 }, { "epoch": 0.10220673635307782, "grad_norm": 0.010696031153202057, "learning_rate": 9.262229402880362e-05, "loss": 0.0001, "step": 264 }, { "epoch": 0.10259388308168796, "grad_norm": 0.003082268638536334, "learning_rate": 9.256667143675789e-05, "loss": 0.0001, "step": 265 }, { "epoch": 0.10298102981029811, "grad_norm": 0.019705908372998238, "learning_rate": 9.251085678648072e-05, "loss": 0.0001, "step": 266 }, { "epoch": 0.10336817653890824, "grad_norm": 0.0005683451890945435, "learning_rate": 9.245485032980445e-05, "loss": 0.0, "step": 267 }, { "epoch": 0.10375532326751839, "grad_norm": 0.0019504778319969773, "learning_rate": 9.239865231942678e-05, "loss": 0.0, "step": 268 }, { "epoch": 0.10414246999612853, "grad_norm": 0.002394254319369793, "learning_rate": 9.234226300890974e-05, "loss": 0.0, "step": 269 }, { "epoch": 0.10452961672473868, "grad_norm": 0.00082309142453596, "learning_rate": 9.228568265267845e-05, "loss": 0.0, "step": 270 }, { "epoch": 0.10491676345334881, "grad_norm": 0.002265011891722679, "learning_rate": 9.222891150602005e-05, "loss": 0.0001, "step": 271 }, { "epoch": 0.10530391018195896, "grad_norm": 0.0030987069476395845, "learning_rate": 9.217194982508247e-05, "loss": 0.0, "step": 272 }, { "epoch": 0.10569105691056911, "grad_norm": 0.001322443364188075, "learning_rate": 9.211479786687338e-05, "loss": 0.0, "step": 273 }, { "epoch": 0.10607820363917925, "grad_norm": 0.010089612565934658, "learning_rate": 9.205745588925891e-05, "loss": 0.0001, "step": 274 }, { "epoch": 0.1064653503677894, "grad_norm": 0.3834461569786072, "learning_rate": 9.199992415096261e-05, "loss": 0.0114, "step": 275 }, { "epoch": 0.10685249709639953, "grad_norm": 0.0012483131140470505, "learning_rate": 9.194220291156413e-05, "loss": 0.0, "step": 276 }, { "epoch": 0.10723964382500968, "grad_norm": 0.0014100170228630304, "learning_rate": 9.188429243149824e-05, "loss": 0.0, "step": 277 }, { "epoch": 0.10762679055361982, "grad_norm": 0.0015941811725497246, "learning_rate": 9.182619297205348e-05, "loss": 0.0, "step": 278 }, { "epoch": 0.10801393728222997, "grad_norm": 0.0005449766758829355, "learning_rate": 9.176790479537108e-05, "loss": 0.0, "step": 279 }, { "epoch": 0.10840108401084012, "grad_norm": 0.1997094601392746, "learning_rate": 9.170942816444375e-05, "loss": 0.0027, "step": 280 }, { "epoch": 0.10878823073945025, "grad_norm": 0.001990030286833644, "learning_rate": 9.165076334311446e-05, "loss": 0.0, "step": 281 }, { "epoch": 0.1091753774680604, "grad_norm": 0.009841017425060272, "learning_rate": 9.159191059607537e-05, "loss": 0.0001, "step": 282 }, { "epoch": 0.10956252419667054, "grad_norm": 0.0011333709117025137, "learning_rate": 9.153287018886644e-05, "loss": 0.0, "step": 283 }, { "epoch": 0.10994967092528068, "grad_norm": 0.0004827795783057809, "learning_rate": 9.147364238787443e-05, "loss": 0.0, "step": 284 }, { "epoch": 0.11033681765389082, "grad_norm": 0.0046328590251505375, "learning_rate": 9.141422746033156e-05, "loss": 0.0, "step": 285 }, { "epoch": 0.11072396438250097, "grad_norm": 0.00042733096051961184, "learning_rate": 9.135462567431438e-05, "loss": 0.0, "step": 286 }, { "epoch": 0.1111111111111111, "grad_norm": 0.023281529545783997, "learning_rate": 9.12948372987425e-05, "loss": 0.0001, "step": 287 }, { "epoch": 0.11149825783972125, "grad_norm": 0.0010597595246508718, "learning_rate": 9.123486260337744e-05, "loss": 0.0, "step": 288 }, { "epoch": 0.1118854045683314, "grad_norm": 0.006935764104127884, "learning_rate": 9.117470185882139e-05, "loss": 0.0001, "step": 289 }, { "epoch": 0.11227255129694154, "grad_norm": 0.011282574385404587, "learning_rate": 9.111435533651596e-05, "loss": 0.0001, "step": 290 }, { "epoch": 0.11265969802555169, "grad_norm": 0.006138740107417107, "learning_rate": 9.105382330874099e-05, "loss": 0.0001, "step": 291 }, { "epoch": 0.11304684475416182, "grad_norm": 0.0003823502629529685, "learning_rate": 9.099310604861329e-05, "loss": 0.0, "step": 292 }, { "epoch": 0.11343399148277197, "grad_norm": 0.009164854884147644, "learning_rate": 9.093220383008545e-05, "loss": 0.0001, "step": 293 }, { "epoch": 0.11382113821138211, "grad_norm": 0.005529541522264481, "learning_rate": 9.087111692794459e-05, "loss": 0.0001, "step": 294 }, { "epoch": 0.11420828493999226, "grad_norm": 0.04618334770202637, "learning_rate": 9.08098456178111e-05, "loss": 0.0009, "step": 295 }, { "epoch": 0.11459543166860241, "grad_norm": 0.005164325702935457, "learning_rate": 9.074839017613736e-05, "loss": 0.0001, "step": 296 }, { "epoch": 0.11498257839721254, "grad_norm": 0.005010255612432957, "learning_rate": 9.068675088020661e-05, "loss": 0.0001, "step": 297 }, { "epoch": 0.11536972512582269, "grad_norm": 0.0010323580354452133, "learning_rate": 9.062492800813162e-05, "loss": 0.0, "step": 298 }, { "epoch": 0.11575687185443283, "grad_norm": 0.0012561334297060966, "learning_rate": 9.056292183885343e-05, "loss": 0.0, "step": 299 }, { "epoch": 0.11614401858304298, "grad_norm": 0.02239901013672352, "learning_rate": 9.050073265214005e-05, "loss": 0.0003, "step": 300 }, { "epoch": 0.11653116531165311, "grad_norm": 0.03204142302274704, "learning_rate": 9.043836072858535e-05, "loss": 0.0118, "step": 301 }, { "epoch": 0.11691831204026326, "grad_norm": 0.01608111336827278, "learning_rate": 9.037580634960764e-05, "loss": 0.0005, "step": 302 }, { "epoch": 0.1173054587688734, "grad_norm": 0.006757182534784079, "learning_rate": 9.031306979744847e-05, "loss": 0.0001, "step": 303 }, { "epoch": 0.11769260549748355, "grad_norm": 0.12025570124387741, "learning_rate": 9.025015135517137e-05, "loss": 0.0013, "step": 304 }, { "epoch": 0.1180797522260937, "grad_norm": 0.03977954015135765, "learning_rate": 9.01870513066605e-05, "loss": 0.0009, "step": 305 }, { "epoch": 0.11846689895470383, "grad_norm": 0.0013618416851386428, "learning_rate": 9.012376993661944e-05, "loss": 0.0, "step": 306 }, { "epoch": 0.11885404568331398, "grad_norm": 0.018894262611865997, "learning_rate": 9.006030753056989e-05, "loss": 0.0006, "step": 307 }, { "epoch": 0.11924119241192412, "grad_norm": 0.12205176800489426, "learning_rate": 8.999666437485035e-05, "loss": 0.0008, "step": 308 }, { "epoch": 0.11962833914053426, "grad_norm": 0.0020962394773960114, "learning_rate": 8.993284075661487e-05, "loss": 0.0001, "step": 309 }, { "epoch": 0.1200154858691444, "grad_norm": 0.0007874541915953159, "learning_rate": 8.986883696383175e-05, "loss": 0.0, "step": 310 }, { "epoch": 0.12040263259775455, "grad_norm": 0.015855029225349426, "learning_rate": 8.980465328528219e-05, "loss": 0.0002, "step": 311 }, { "epoch": 0.1207897793263647, "grad_norm": 0.004300241824239492, "learning_rate": 8.974029001055906e-05, "loss": 0.0001, "step": 312 }, { "epoch": 0.12117692605497483, "grad_norm": 0.06699482351541519, "learning_rate": 8.967574743006551e-05, "loss": 0.0011, "step": 313 }, { "epoch": 0.12156407278358498, "grad_norm": 0.0414130873978138, "learning_rate": 8.961102583501377e-05, "loss": 0.0009, "step": 314 }, { "epoch": 0.12195121951219512, "grad_norm": 0.0015767719596624374, "learning_rate": 8.95461255174237e-05, "loss": 0.0, "step": 315 }, { "epoch": 0.12233836624080527, "grad_norm": 0.006629679352045059, "learning_rate": 8.948104677012161e-05, "loss": 0.0001, "step": 316 }, { "epoch": 0.1227255129694154, "grad_norm": 0.03726351261138916, "learning_rate": 8.941578988673887e-05, "loss": 0.0002, "step": 317 }, { "epoch": 0.12311265969802555, "grad_norm": 0.01188661903142929, "learning_rate": 8.935035516171051e-05, "loss": 0.0001, "step": 318 }, { "epoch": 0.12349980642663569, "grad_norm": 0.018168246373534203, "learning_rate": 8.928474289027407e-05, "loss": 0.0001, "step": 319 }, { "epoch": 0.12388695315524584, "grad_norm": 0.0020913630723953247, "learning_rate": 8.921895336846813e-05, "loss": 0.0, "step": 320 }, { "epoch": 0.12427409988385599, "grad_norm": 0.008128736168146133, "learning_rate": 8.915298689313097e-05, "loss": 0.0001, "step": 321 }, { "epoch": 0.12466124661246612, "grad_norm": 0.02947946824133396, "learning_rate": 8.908684376189935e-05, "loss": 0.0004, "step": 322 }, { "epoch": 0.12504839334107626, "grad_norm": 0.0005556530086323619, "learning_rate": 8.902052427320704e-05, "loss": 0.0, "step": 323 }, { "epoch": 0.1254355400696864, "grad_norm": 0.00042274571023881435, "learning_rate": 8.895402872628352e-05, "loss": 0.0, "step": 324 }, { "epoch": 0.12582268679829656, "grad_norm": 0.15616343915462494, "learning_rate": 8.888735742115268e-05, "loss": 0.0031, "step": 325 }, { "epoch": 0.1262098335269067, "grad_norm": 0.0006327281007543206, "learning_rate": 8.88205106586314e-05, "loss": 0.0, "step": 326 }, { "epoch": 0.12659698025551683, "grad_norm": 0.0013813385739922523, "learning_rate": 8.875348874032817e-05, "loss": 0.0, "step": 327 }, { "epoch": 0.12698412698412698, "grad_norm": 0.0075355893932282925, "learning_rate": 8.868629196864182e-05, "loss": 0.0, "step": 328 }, { "epoch": 0.12737127371273713, "grad_norm": 0.0004084401880390942, "learning_rate": 8.861892064676009e-05, "loss": 0.0, "step": 329 }, { "epoch": 0.12775842044134728, "grad_norm": 0.0006156496237963438, "learning_rate": 8.855137507865832e-05, "loss": 0.0, "step": 330 }, { "epoch": 0.12814556716995743, "grad_norm": 0.0027660855557769537, "learning_rate": 8.848365556909796e-05, "loss": 0.0, "step": 331 }, { "epoch": 0.12853271389856755, "grad_norm": 0.04597468674182892, "learning_rate": 8.841576242362533e-05, "loss": 0.0002, "step": 332 }, { "epoch": 0.1289198606271777, "grad_norm": 0.006770620588213205, "learning_rate": 8.834769594857019e-05, "loss": 0.0001, "step": 333 }, { "epoch": 0.12930700735578785, "grad_norm": 0.0005002229590900242, "learning_rate": 8.827945645104429e-05, "loss": 0.0, "step": 334 }, { "epoch": 0.129694154084398, "grad_norm": 0.0015748875448480248, "learning_rate": 8.821104423894016e-05, "loss": 0.0, "step": 335 }, { "epoch": 0.13008130081300814, "grad_norm": 0.020414279773831367, "learning_rate": 8.814245962092946e-05, "loss": 0.0003, "step": 336 }, { "epoch": 0.13046844754161827, "grad_norm": 0.024940207600593567, "learning_rate": 8.807370290646186e-05, "loss": 0.0001, "step": 337 }, { "epoch": 0.13085559427022841, "grad_norm": 0.0011114513035863638, "learning_rate": 8.800477440576347e-05, "loss": 0.0, "step": 338 }, { "epoch": 0.13124274099883856, "grad_norm": 0.006404312793165445, "learning_rate": 8.793567442983548e-05, "loss": 0.0001, "step": 339 }, { "epoch": 0.1316298877274487, "grad_norm": 0.0012291868915781379, "learning_rate": 8.786640329045278e-05, "loss": 0.0, "step": 340 }, { "epoch": 0.13201703445605883, "grad_norm": 0.027518250048160553, "learning_rate": 8.779696130016253e-05, "loss": 0.0002, "step": 341 }, { "epoch": 0.13240418118466898, "grad_norm": 0.000955001509282738, "learning_rate": 8.772734877228276e-05, "loss": 0.0, "step": 342 }, { "epoch": 0.13279132791327913, "grad_norm": 0.0012375194346532226, "learning_rate": 8.765756602090101e-05, "loss": 0.0, "step": 343 }, { "epoch": 0.13317847464188928, "grad_norm": 0.0009470246150158346, "learning_rate": 8.758761336087274e-05, "loss": 0.0, "step": 344 }, { "epoch": 0.13356562137049943, "grad_norm": 0.01968814805150032, "learning_rate": 8.751749110782012e-05, "loss": 0.0002, "step": 345 }, { "epoch": 0.13395276809910955, "grad_norm": 0.5433384776115417, "learning_rate": 8.74471995781305e-05, "loss": 0.0105, "step": 346 }, { "epoch": 0.1343399148277197, "grad_norm": 0.0011947732418775558, "learning_rate": 8.737673908895498e-05, "loss": 0.0, "step": 347 }, { "epoch": 0.13472706155632985, "grad_norm": 0.005630856845527887, "learning_rate": 8.7306109958207e-05, "loss": 0.0001, "step": 348 }, { "epoch": 0.13511420828494, "grad_norm": 0.00635663652792573, "learning_rate": 8.72353125045609e-05, "loss": 0.0001, "step": 349 }, { "epoch": 0.13550135501355012, "grad_norm": 0.8739410638809204, "learning_rate": 8.716434704745046e-05, "loss": 0.0276, "step": 350 }, { "epoch": 0.13588850174216027, "grad_norm": 0.05545734986662865, "learning_rate": 8.709321390706756e-05, "loss": 0.0078, "step": 351 }, { "epoch": 0.13627564847077042, "grad_norm": 0.027743853628635406, "learning_rate": 8.702191340436054e-05, "loss": 0.0007, "step": 352 }, { "epoch": 0.13666279519938057, "grad_norm": 0.10148515552282333, "learning_rate": 8.695044586103296e-05, "loss": 0.0048, "step": 353 }, { "epoch": 0.13704994192799072, "grad_norm": 0.031402137130498886, "learning_rate": 8.687881159954202e-05, "loss": 0.0008, "step": 354 }, { "epoch": 0.13743708865660084, "grad_norm": 0.055899184197187424, "learning_rate": 8.680701094309716e-05, "loss": 0.0008, "step": 355 }, { "epoch": 0.137824235385211, "grad_norm": 0.024489685893058777, "learning_rate": 8.673504421565856e-05, "loss": 0.0004, "step": 356 }, { "epoch": 0.13821138211382114, "grad_norm": 0.013303971849381924, "learning_rate": 8.666291174193573e-05, "loss": 0.0001, "step": 357 }, { "epoch": 0.1385985288424313, "grad_norm": 0.009390785358846188, "learning_rate": 8.659061384738598e-05, "loss": 0.0001, "step": 358 }, { "epoch": 0.1389856755710414, "grad_norm": 0.04934685677289963, "learning_rate": 8.651815085821303e-05, "loss": 0.0003, "step": 359 }, { "epoch": 0.13937282229965156, "grad_norm": 0.001061944873072207, "learning_rate": 8.644552310136546e-05, "loss": 0.0, "step": 360 }, { "epoch": 0.1397599690282617, "grad_norm": 0.005462823435664177, "learning_rate": 8.637273090453532e-05, "loss": 0.0001, "step": 361 }, { "epoch": 0.14014711575687186, "grad_norm": 0.0016542257508262992, "learning_rate": 8.629977459615655e-05, "loss": 0.0, "step": 362 }, { "epoch": 0.140534262485482, "grad_norm": 0.0004302252782508731, "learning_rate": 8.622665450540357e-05, "loss": 0.0, "step": 363 }, { "epoch": 0.14092140921409213, "grad_norm": 0.022549882531166077, "learning_rate": 8.615337096218979e-05, "loss": 0.0004, "step": 364 }, { "epoch": 0.14130855594270228, "grad_norm": 0.024535585194826126, "learning_rate": 8.60799242971661e-05, "loss": 0.0001, "step": 365 }, { "epoch": 0.14169570267131243, "grad_norm": 0.0006051576347090304, "learning_rate": 8.600631484171938e-05, "loss": 0.0, "step": 366 }, { "epoch": 0.14208284939992258, "grad_norm": 0.0036598185542970896, "learning_rate": 8.593254292797099e-05, "loss": 0.0, "step": 367 }, { "epoch": 0.14246999612853273, "grad_norm": 0.11391391605138779, "learning_rate": 8.585860888877537e-05, "loss": 0.0015, "step": 368 }, { "epoch": 0.14285714285714285, "grad_norm": 0.026237154379487038, "learning_rate": 8.578451305771836e-05, "loss": 0.0003, "step": 369 }, { "epoch": 0.143244289585753, "grad_norm": 0.005311033688485622, "learning_rate": 8.571025576911587e-05, "loss": 0.0001, "step": 370 }, { "epoch": 0.14363143631436315, "grad_norm": 0.0019114494789391756, "learning_rate": 8.563583735801224e-05, "loss": 0.0, "step": 371 }, { "epoch": 0.1440185830429733, "grad_norm": 0.0007387467776425183, "learning_rate": 8.556125816017882e-05, "loss": 0.0, "step": 372 }, { "epoch": 0.14440572977158342, "grad_norm": 0.17123480141162872, "learning_rate": 8.548651851211245e-05, "loss": 0.0016, "step": 373 }, { "epoch": 0.14479287650019357, "grad_norm": 0.0009499336592853069, "learning_rate": 8.541161875103381e-05, "loss": 0.0, "step": 374 }, { "epoch": 0.14518002322880372, "grad_norm": 0.001388967502862215, "learning_rate": 8.533655921488612e-05, "loss": 0.0, "step": 375 }, { "epoch": 0.14556716995741387, "grad_norm": 0.0017165900208055973, "learning_rate": 8.526134024233341e-05, "loss": 0.0, "step": 376 }, { "epoch": 0.14595431668602402, "grad_norm": 0.001004920806735754, "learning_rate": 8.518596217275912e-05, "loss": 0.0, "step": 377 }, { "epoch": 0.14634146341463414, "grad_norm": 0.0011910515604540706, "learning_rate": 8.511042534626449e-05, "loss": 0.0, "step": 378 }, { "epoch": 0.1467286101432443, "grad_norm": 0.020283464342355728, "learning_rate": 8.503473010366713e-05, "loss": 0.0001, "step": 379 }, { "epoch": 0.14711575687185444, "grad_norm": 0.0019051478011533618, "learning_rate": 8.495887678649933e-05, "loss": 0.0, "step": 380 }, { "epoch": 0.14750290360046459, "grad_norm": 0.0011362056247889996, "learning_rate": 8.488286573700665e-05, "loss": 0.0, "step": 381 }, { "epoch": 0.1478900503290747, "grad_norm": 0.02182379551231861, "learning_rate": 8.480669729814635e-05, "loss": 0.0002, "step": 382 }, { "epoch": 0.14827719705768486, "grad_norm": 0.006234586238861084, "learning_rate": 8.473037181358574e-05, "loss": 0.0001, "step": 383 }, { "epoch": 0.148664343786295, "grad_norm": 0.0006982333725318313, "learning_rate": 8.465388962770077e-05, "loss": 0.0, "step": 384 }, { "epoch": 0.14905149051490515, "grad_norm": 0.0026596917305141687, "learning_rate": 8.457725108557446e-05, "loss": 0.0, "step": 385 }, { "epoch": 0.1494386372435153, "grad_norm": 0.0008504387806169689, "learning_rate": 8.450045653299521e-05, "loss": 0.0, "step": 386 }, { "epoch": 0.14982578397212543, "grad_norm": 0.00914840493351221, "learning_rate": 8.442350631645536e-05, "loss": 0.0001, "step": 387 }, { "epoch": 0.15021293070073558, "grad_norm": 0.001677824417129159, "learning_rate": 8.434640078314962e-05, "loss": 0.0, "step": 388 }, { "epoch": 0.15060007742934572, "grad_norm": 0.002033672295510769, "learning_rate": 8.426914028097349e-05, "loss": 0.0, "step": 389 }, { "epoch": 0.15098722415795587, "grad_norm": 1.3247064352035522, "learning_rate": 8.419172515852159e-05, "loss": 0.0154, "step": 390 }, { "epoch": 0.151374370886566, "grad_norm": 0.0004959017969667912, "learning_rate": 8.41141557650863e-05, "loss": 0.0, "step": 391 }, { "epoch": 0.15176151761517614, "grad_norm": 0.0022033858112990856, "learning_rate": 8.403643245065598e-05, "loss": 0.0, "step": 392 }, { "epoch": 0.1521486643437863, "grad_norm": 0.00043081503827124834, "learning_rate": 8.39585555659135e-05, "loss": 0.0, "step": 393 }, { "epoch": 0.15253581107239644, "grad_norm": 0.0013331023510545492, "learning_rate": 8.388052546223461e-05, "loss": 0.0, "step": 394 }, { "epoch": 0.1529229578010066, "grad_norm": 0.0025198757648468018, "learning_rate": 8.380234249168641e-05, "loss": 0.0, "step": 395 }, { "epoch": 0.15331010452961671, "grad_norm": 0.008715974166989326, "learning_rate": 8.37240070070257e-05, "loss": 0.0001, "step": 396 }, { "epoch": 0.15369725125822686, "grad_norm": 0.0013087478000670671, "learning_rate": 8.364551936169742e-05, "loss": 0.0, "step": 397 }, { "epoch": 0.154084397986837, "grad_norm": 0.00940459594130516, "learning_rate": 8.356687990983306e-05, "loss": 0.0001, "step": 398 }, { "epoch": 0.15447154471544716, "grad_norm": 0.007965102791786194, "learning_rate": 8.348808900624901e-05, "loss": 0.0001, "step": 399 }, { "epoch": 0.1548586914440573, "grad_norm": 0.31034138798713684, "learning_rate": 8.340914700644506e-05, "loss": 0.0072, "step": 400 }, { "epoch": 0.15524583817266743, "grad_norm": 0.0318954698741436, "learning_rate": 8.333005426660272e-05, "loss": 0.0048, "step": 401 }, { "epoch": 0.15563298490127758, "grad_norm": 0.018714942038059235, "learning_rate": 8.32508111435836e-05, "loss": 0.0006, "step": 402 }, { "epoch": 0.15602013162988773, "grad_norm": 0.0029496820643544197, "learning_rate": 8.317141799492782e-05, "loss": 0.0001, "step": 403 }, { "epoch": 0.15640727835849788, "grad_norm": 0.01938321255147457, "learning_rate": 8.309187517885249e-05, "loss": 0.0003, "step": 404 }, { "epoch": 0.156794425087108, "grad_norm": 0.05222015455365181, "learning_rate": 8.301218305424994e-05, "loss": 0.0009, "step": 405 }, { "epoch": 0.15718157181571815, "grad_norm": 0.0011752565624192357, "learning_rate": 8.293234198068619e-05, "loss": 0.0, "step": 406 }, { "epoch": 0.1575687185443283, "grad_norm": 0.0020070699974894524, "learning_rate": 8.285235231839928e-05, "loss": 0.0001, "step": 407 }, { "epoch": 0.15795586527293845, "grad_norm": 0.04195886477828026, "learning_rate": 8.27722144282977e-05, "loss": 0.0037, "step": 408 }, { "epoch": 0.1583430120015486, "grad_norm": 0.0009243409149348736, "learning_rate": 8.269192867195878e-05, "loss": 0.0, "step": 409 }, { "epoch": 0.15873015873015872, "grad_norm": 0.0021821213886141777, "learning_rate": 8.261149541162691e-05, "loss": 0.0, "step": 410 }, { "epoch": 0.15911730545876887, "grad_norm": 0.0726943165063858, "learning_rate": 8.25309150102121e-05, "loss": 0.01, "step": 411 }, { "epoch": 0.15950445218737902, "grad_norm": 0.03284567967057228, "learning_rate": 8.245018783128823e-05, "loss": 0.0005, "step": 412 }, { "epoch": 0.15989159891598917, "grad_norm": 0.0005924602737650275, "learning_rate": 8.236931423909138e-05, "loss": 0.0, "step": 413 }, { "epoch": 0.1602787456445993, "grad_norm": 0.00531816016882658, "learning_rate": 8.228829459851832e-05, "loss": 0.0001, "step": 414 }, { "epoch": 0.16066589237320944, "grad_norm": 0.0028056297451257706, "learning_rate": 8.22071292751247e-05, "loss": 0.0, "step": 415 }, { "epoch": 0.1610530391018196, "grad_norm": 0.02348213456571102, "learning_rate": 8.212581863512353e-05, "loss": 0.0003, "step": 416 }, { "epoch": 0.16144018583042974, "grad_norm": 0.03243598714470863, "learning_rate": 8.204436304538349e-05, "loss": 0.0003, "step": 417 }, { "epoch": 0.1618273325590399, "grad_norm": 0.016708970069885254, "learning_rate": 8.196276287342723e-05, "loss": 0.0001, "step": 418 }, { "epoch": 0.16221447928765, "grad_norm": 0.010106835514307022, "learning_rate": 8.188101848742975e-05, "loss": 0.0001, "step": 419 }, { "epoch": 0.16260162601626016, "grad_norm": 0.007768136449158192, "learning_rate": 8.179913025621676e-05, "loss": 0.0, "step": 420 }, { "epoch": 0.1629887727448703, "grad_norm": 0.006284730043262243, "learning_rate": 8.171709854926298e-05, "loss": 0.0001, "step": 421 }, { "epoch": 0.16337591947348046, "grad_norm": 0.0018183101201429963, "learning_rate": 8.163492373669048e-05, "loss": 0.0, "step": 422 }, { "epoch": 0.16376306620209058, "grad_norm": 0.002550381701439619, "learning_rate": 8.155260618926699e-05, "loss": 0.0, "step": 423 }, { "epoch": 0.16415021293070073, "grad_norm": 0.0016546697588637471, "learning_rate": 8.14701462784043e-05, "loss": 0.0, "step": 424 }, { "epoch": 0.16453735965931088, "grad_norm": 0.002344394801184535, "learning_rate": 8.138754437615651e-05, "loss": 0.0, "step": 425 }, { "epoch": 0.16492450638792103, "grad_norm": 0.003373808925971389, "learning_rate": 8.130480085521837e-05, "loss": 0.0001, "step": 426 }, { "epoch": 0.16531165311653118, "grad_norm": 0.0002938521502073854, "learning_rate": 8.12219160889236e-05, "loss": 0.0, "step": 427 }, { "epoch": 0.1656987998451413, "grad_norm": 0.008733021095395088, "learning_rate": 8.113889045124324e-05, "loss": 0.0001, "step": 428 }, { "epoch": 0.16608594657375145, "grad_norm": 0.0003386593016330153, "learning_rate": 8.105572431678392e-05, "loss": 0.0, "step": 429 }, { "epoch": 0.1664730933023616, "grad_norm": 0.0021686386317014694, "learning_rate": 8.097241806078615e-05, "loss": 0.0, "step": 430 }, { "epoch": 0.16686024003097175, "grad_norm": 0.001093542668968439, "learning_rate": 8.088897205912272e-05, "loss": 0.0, "step": 431 }, { "epoch": 0.1672473867595819, "grad_norm": 0.0008129264460876584, "learning_rate": 8.080538668829688e-05, "loss": 0.0, "step": 432 }, { "epoch": 0.16763453348819202, "grad_norm": 0.000851363642141223, "learning_rate": 8.072166232544078e-05, "loss": 0.0, "step": 433 }, { "epoch": 0.16802168021680217, "grad_norm": 0.0010424451902508736, "learning_rate": 8.063779934831361e-05, "loss": 0.0, "step": 434 }, { "epoch": 0.16840882694541232, "grad_norm": 0.001159070641733706, "learning_rate": 8.055379813530002e-05, "loss": 0.0, "step": 435 }, { "epoch": 0.16879597367402246, "grad_norm": 0.001520829158835113, "learning_rate": 8.046965906540841e-05, "loss": 0.0001, "step": 436 }, { "epoch": 0.1691831204026326, "grad_norm": 0.03214537724852562, "learning_rate": 8.038538251826913e-05, "loss": 0.0002, "step": 437 }, { "epoch": 0.16957026713124274, "grad_norm": 0.000738432165235281, "learning_rate": 8.030096887413279e-05, "loss": 0.0, "step": 438 }, { "epoch": 0.16995741385985288, "grad_norm": 0.0010283209849148989, "learning_rate": 8.021641851386867e-05, "loss": 0.0, "step": 439 }, { "epoch": 0.17034456058846303, "grad_norm": 0.00038005216629244387, "learning_rate": 8.013173181896283e-05, "loss": 0.0, "step": 440 }, { "epoch": 0.17073170731707318, "grad_norm": 0.00039252720307558775, "learning_rate": 8.004690917151647e-05, "loss": 0.0, "step": 441 }, { "epoch": 0.1711188540456833, "grad_norm": 0.0010534483008086681, "learning_rate": 7.996195095424425e-05, "loss": 0.0, "step": 442 }, { "epoch": 0.17150600077429345, "grad_norm": 0.0006456160335801542, "learning_rate": 7.987685755047242e-05, "loss": 0.0, "step": 443 }, { "epoch": 0.1718931475029036, "grad_norm": 0.015872713178396225, "learning_rate": 7.97916293441373e-05, "loss": 0.0002, "step": 444 }, { "epoch": 0.17228029423151375, "grad_norm": 0.0021046926267445087, "learning_rate": 7.970626671978336e-05, "loss": 0.0, "step": 445 }, { "epoch": 0.17266744096012387, "grad_norm": 0.0005791055737063289, "learning_rate": 7.962077006256154e-05, "loss": 0.0, "step": 446 }, { "epoch": 0.17305458768873402, "grad_norm": 0.008445385843515396, "learning_rate": 7.953513975822755e-05, "loss": 0.0001, "step": 447 }, { "epoch": 0.17344173441734417, "grad_norm": 0.018906451761722565, "learning_rate": 7.944937619314016e-05, "loss": 0.0001, "step": 448 }, { "epoch": 0.17382888114595432, "grad_norm": 0.005337848793715239, "learning_rate": 7.93634797542593e-05, "loss": 0.0001, "step": 449 }, { "epoch": 0.17421602787456447, "grad_norm": 1.4769160747528076, "learning_rate": 7.927745082914453e-05, "loss": 0.0365, "step": 450 }, { "epoch": 0.1746031746031746, "grad_norm": 0.07269283384084702, "learning_rate": 7.919128980595309e-05, "loss": 0.0047, "step": 451 }, { "epoch": 0.17499032133178474, "grad_norm": 0.03315652534365654, "learning_rate": 7.910499707343828e-05, "loss": 0.001, "step": 452 }, { "epoch": 0.1753774680603949, "grad_norm": 0.013138361275196075, "learning_rate": 7.901857302094766e-05, "loss": 0.0002, "step": 453 }, { "epoch": 0.17576461478900504, "grad_norm": 0.6827967166900635, "learning_rate": 7.89320180384213e-05, "loss": 0.0006, "step": 454 }, { "epoch": 0.17615176151761516, "grad_norm": 0.023769304156303406, "learning_rate": 7.884533251638999e-05, "loss": 0.0003, "step": 455 }, { "epoch": 0.1765389082462253, "grad_norm": 0.06922416388988495, "learning_rate": 7.875851684597358e-05, "loss": 0.0029, "step": 456 }, { "epoch": 0.17692605497483546, "grad_norm": 0.03714381158351898, "learning_rate": 7.867157141887906e-05, "loss": 0.0003, "step": 457 }, { "epoch": 0.1773132017034456, "grad_norm": 0.029482917860150337, "learning_rate": 7.85844966273989e-05, "loss": 0.0004, "step": 458 }, { "epoch": 0.17770034843205576, "grad_norm": 0.0012693512253463268, "learning_rate": 7.849729286440928e-05, "loss": 0.0, "step": 459 }, { "epoch": 0.17808749516066588, "grad_norm": 0.0013141317758709192, "learning_rate": 7.840996052336827e-05, "loss": 0.0001, "step": 460 }, { "epoch": 0.17847464188927603, "grad_norm": 0.002732696942985058, "learning_rate": 7.832249999831407e-05, "loss": 0.0, "step": 461 }, { "epoch": 0.17886178861788618, "grad_norm": 0.005494223441928625, "learning_rate": 7.823491168386326e-05, "loss": 0.0001, "step": 462 }, { "epoch": 0.17924893534649633, "grad_norm": 0.0013144632102921605, "learning_rate": 7.814719597520894e-05, "loss": 0.0, "step": 463 }, { "epoch": 0.17963608207510648, "grad_norm": 0.0018678493797779083, "learning_rate": 7.805935326811912e-05, "loss": 0.0, "step": 464 }, { "epoch": 0.1800232288037166, "grad_norm": 0.004181758500635624, "learning_rate": 7.79713839589347e-05, "loss": 0.0001, "step": 465 }, { "epoch": 0.18041037553232675, "grad_norm": 0.0013430188409984112, "learning_rate": 7.788328844456789e-05, "loss": 0.0, "step": 466 }, { "epoch": 0.1807975222609369, "grad_norm": 0.00033934798557311296, "learning_rate": 7.779506712250024e-05, "loss": 0.0, "step": 467 }, { "epoch": 0.18118466898954705, "grad_norm": 0.0014731610426679254, "learning_rate": 7.770672039078102e-05, "loss": 0.0, "step": 468 }, { "epoch": 0.18157181571815717, "grad_norm": 0.012643665075302124, "learning_rate": 7.76182486480253e-05, "loss": 0.0002, "step": 469 }, { "epoch": 0.18195896244676732, "grad_norm": 0.19011537730693817, "learning_rate": 7.75296522934122e-05, "loss": 0.0057, "step": 470 }, { "epoch": 0.18234610917537747, "grad_norm": 0.002415846334770322, "learning_rate": 7.744093172668307e-05, "loss": 0.0, "step": 471 }, { "epoch": 0.18273325590398762, "grad_norm": 0.0007457759347744286, "learning_rate": 7.735208734813975e-05, "loss": 0.0, "step": 472 }, { "epoch": 0.18312040263259777, "grad_norm": 0.0009572989656589925, "learning_rate": 7.726311955864262e-05, "loss": 0.0, "step": 473 }, { "epoch": 0.1835075493612079, "grad_norm": 0.0011215334525331855, "learning_rate": 7.717402875960896e-05, "loss": 0.0, "step": 474 }, { "epoch": 0.18389469608981804, "grad_norm": 0.0025596783962100744, "learning_rate": 7.708481535301102e-05, "loss": 0.0, "step": 475 }, { "epoch": 0.1842818428184282, "grad_norm": 0.007137306500226259, "learning_rate": 7.699547974137426e-05, "loss": 0.0001, "step": 476 }, { "epoch": 0.18466898954703834, "grad_norm": 0.0028858983423560858, "learning_rate": 7.690602232777555e-05, "loss": 0.0, "step": 477 }, { "epoch": 0.18505613627564846, "grad_norm": 0.0008354281890206039, "learning_rate": 7.681644351584129e-05, "loss": 0.0, "step": 478 }, { "epoch": 0.1854432830042586, "grad_norm": 0.005381579976528883, "learning_rate": 7.672674370974559e-05, "loss": 0.0001, "step": 479 }, { "epoch": 0.18583042973286876, "grad_norm": 0.001957010943442583, "learning_rate": 7.663692331420857e-05, "loss": 0.0, "step": 480 }, { "epoch": 0.1862175764614789, "grad_norm": 0.001346597564406693, "learning_rate": 7.654698273449435e-05, "loss": 0.0, "step": 481 }, { "epoch": 0.18660472319008906, "grad_norm": 0.0013993862085044384, "learning_rate": 7.645692237640938e-05, "loss": 0.0, "step": 482 }, { "epoch": 0.18699186991869918, "grad_norm": 0.0008532855426892638, "learning_rate": 7.636674264630049e-05, "loss": 0.0, "step": 483 }, { "epoch": 0.18737901664730933, "grad_norm": 0.0010875778971239924, "learning_rate": 7.627644395105315e-05, "loss": 0.0, "step": 484 }, { "epoch": 0.18776616337591948, "grad_norm": 0.0006594783044420183, "learning_rate": 7.618602669808959e-05, "loss": 0.0, "step": 485 }, { "epoch": 0.18815331010452963, "grad_norm": 0.000321607367368415, "learning_rate": 7.609549129536693e-05, "loss": 0.0, "step": 486 }, { "epoch": 0.18854045683313975, "grad_norm": 0.0007030332926660776, "learning_rate": 7.60048381513754e-05, "loss": 0.0, "step": 487 }, { "epoch": 0.1889276035617499, "grad_norm": 0.006779205519706011, "learning_rate": 7.59140676751365e-05, "loss": 0.0001, "step": 488 }, { "epoch": 0.18931475029036005, "grad_norm": 0.0013242230052128434, "learning_rate": 7.582318027620105e-05, "loss": 0.0, "step": 489 }, { "epoch": 0.1897018970189702, "grad_norm": 0.018140189349651337, "learning_rate": 7.573217636464751e-05, "loss": 0.0003, "step": 490 }, { "epoch": 0.19008904374758034, "grad_norm": 0.0014813413145020604, "learning_rate": 7.564105635107996e-05, "loss": 0.0, "step": 491 }, { "epoch": 0.19047619047619047, "grad_norm": 0.0025619007647037506, "learning_rate": 7.554982064662637e-05, "loss": 0.0001, "step": 492 }, { "epoch": 0.19086333720480061, "grad_norm": 0.0014394291210919619, "learning_rate": 7.545846966293668e-05, "loss": 0.0, "step": 493 }, { "epoch": 0.19125048393341076, "grad_norm": 0.0014465994900092483, "learning_rate": 7.536700381218098e-05, "loss": 0.0, "step": 494 }, { "epoch": 0.1916376306620209, "grad_norm": 0.0030286244582384825, "learning_rate": 7.527542350704759e-05, "loss": 0.0001, "step": 495 }, { "epoch": 0.19202477739063106, "grad_norm": 0.09401986002922058, "learning_rate": 7.518372916074132e-05, "loss": 0.0004, "step": 496 }, { "epoch": 0.19241192411924118, "grad_norm": 0.017964961007237434, "learning_rate": 7.509192118698146e-05, "loss": 0.0002, "step": 497 }, { "epoch": 0.19279907084785133, "grad_norm": 0.010520697571337223, "learning_rate": 7.500000000000001e-05, "loss": 0.0001, "step": 498 }, { "epoch": 0.19318621757646148, "grad_norm": 2.2694268226623535, "learning_rate": 7.490796601453976e-05, "loss": 0.1216, "step": 499 }, { "epoch": 0.19357336430507163, "grad_norm": 0.40960317850112915, "learning_rate": 7.481581964585245e-05, "loss": 0.0043, "step": 500 }, { "epoch": 0.19396051103368175, "grad_norm": 0.0367998331785202, "learning_rate": 7.472356130969689e-05, "loss": 0.005, "step": 501 }, { "epoch": 0.1943476577622919, "grad_norm": 0.0033928700722754, "learning_rate": 7.46311914223371e-05, "loss": 0.0001, "step": 502 }, { "epoch": 0.19473480449090205, "grad_norm": 0.009577157907187939, "learning_rate": 7.453871040054037e-05, "loss": 0.0002, "step": 503 }, { "epoch": 0.1951219512195122, "grad_norm": 0.0033961099106818438, "learning_rate": 7.444611866157544e-05, "loss": 0.0001, "step": 504 }, { "epoch": 0.19550909794812235, "grad_norm": 0.015139970928430557, "learning_rate": 7.435341662321062e-05, "loss": 0.0004, "step": 505 }, { "epoch": 0.19589624467673247, "grad_norm": 0.001446568756364286, "learning_rate": 7.426060470371185e-05, "loss": 0.0001, "step": 506 }, { "epoch": 0.19628339140534262, "grad_norm": 0.03114064782857895, "learning_rate": 7.416768332184088e-05, "loss": 0.0009, "step": 507 }, { "epoch": 0.19667053813395277, "grad_norm": 0.012780173681676388, "learning_rate": 7.407465289685333e-05, "loss": 0.0003, "step": 508 }, { "epoch": 0.19705768486256292, "grad_norm": 0.07623110711574554, "learning_rate": 7.39815138484968e-05, "loss": 0.0003, "step": 509 }, { "epoch": 0.19744483159117304, "grad_norm": 0.03304853290319443, "learning_rate": 7.388826659700902e-05, "loss": 0.0002, "step": 510 }, { "epoch": 0.1978319783197832, "grad_norm": 0.002419049385935068, "learning_rate": 7.379491156311589e-05, "loss": 0.0001, "step": 511 }, { "epoch": 0.19821912504839334, "grad_norm": 0.008822989650070667, "learning_rate": 7.370144916802969e-05, "loss": 0.0002, "step": 512 }, { "epoch": 0.1986062717770035, "grad_norm": 0.0024873509537428617, "learning_rate": 7.360787983344704e-05, "loss": 0.0001, "step": 513 }, { "epoch": 0.19899341850561364, "grad_norm": 0.2240598499774933, "learning_rate": 7.351420398154705e-05, "loss": 0.0047, "step": 514 }, { "epoch": 0.19938056523422376, "grad_norm": 0.03293812274932861, "learning_rate": 7.342042203498951e-05, "loss": 0.0004, "step": 515 }, { "epoch": 0.1997677119628339, "grad_norm": 0.0035280841402709484, "learning_rate": 7.332653441691285e-05, "loss": 0.0001, "step": 516 }, { "epoch": 0.20015485869144406, "grad_norm": 0.33550676703453064, "learning_rate": 7.323254155093225e-05, "loss": 0.0161, "step": 517 }, { "epoch": 0.2005420054200542, "grad_norm": 0.009800751693546772, "learning_rate": 7.313844386113784e-05, "loss": 0.0001, "step": 518 }, { "epoch": 0.20092915214866433, "grad_norm": 0.13843496143817902, "learning_rate": 7.304424177209268e-05, "loss": 0.0008, "step": 519 }, { "epoch": 0.20131629887727448, "grad_norm": 0.07233725488185883, "learning_rate": 7.294993570883082e-05, "loss": 0.0004, "step": 520 }, { "epoch": 0.20170344560588463, "grad_norm": 0.005212654825299978, "learning_rate": 7.285552609685551e-05, "loss": 0.0001, "step": 521 }, { "epoch": 0.20209059233449478, "grad_norm": 0.002458937931805849, "learning_rate": 7.276101336213719e-05, "loss": 0.0001, "step": 522 }, { "epoch": 0.20247773906310493, "grad_norm": 0.011983088217675686, "learning_rate": 7.266639793111151e-05, "loss": 0.0003, "step": 523 }, { "epoch": 0.20286488579171505, "grad_norm": 0.003091826569288969, "learning_rate": 7.257168023067759e-05, "loss": 0.0001, "step": 524 }, { "epoch": 0.2032520325203252, "grad_norm": 0.06563227623701096, "learning_rate": 7.247686068819593e-05, "loss": 0.0004, "step": 525 }, { "epoch": 0.20363917924893535, "grad_norm": 0.002093779155984521, "learning_rate": 7.238193973148651e-05, "loss": 0.0001, "step": 526 }, { "epoch": 0.2040263259775455, "grad_norm": 0.048385411500930786, "learning_rate": 7.228691778882693e-05, "loss": 0.0008, "step": 527 }, { "epoch": 0.20441347270615565, "grad_norm": 0.00616808095946908, "learning_rate": 7.219179528895038e-05, "loss": 0.0001, "step": 528 }, { "epoch": 0.20480061943476577, "grad_norm": 0.003878154791891575, "learning_rate": 7.209657266104385e-05, "loss": 0.0001, "step": 529 }, { "epoch": 0.20518776616337592, "grad_norm": 0.008397274650633335, "learning_rate": 7.200125033474598e-05, "loss": 0.0002, "step": 530 }, { "epoch": 0.20557491289198607, "grad_norm": 0.08736693114042282, "learning_rate": 7.190582874014535e-05, "loss": 0.001, "step": 531 }, { "epoch": 0.20596205962059622, "grad_norm": 0.004129898734390736, "learning_rate": 7.181030830777837e-05, "loss": 0.0001, "step": 532 }, { "epoch": 0.20634920634920634, "grad_norm": 0.011208189651370049, "learning_rate": 7.171468946862744e-05, "loss": 0.0003, "step": 533 }, { "epoch": 0.2067363530778165, "grad_norm": 0.008657621219754219, "learning_rate": 7.161897265411891e-05, "loss": 0.0003, "step": 534 }, { "epoch": 0.20712349980642664, "grad_norm": 0.002974047791212797, "learning_rate": 7.152315829612125e-05, "loss": 0.0001, "step": 535 }, { "epoch": 0.20751064653503679, "grad_norm": 0.007309361360967159, "learning_rate": 7.142724682694299e-05, "loss": 0.0002, "step": 536 }, { "epoch": 0.20789779326364694, "grad_norm": 0.25275537371635437, "learning_rate": 7.133123867933087e-05, "loss": 0.0023, "step": 537 }, { "epoch": 0.20828493999225706, "grad_norm": 0.010795282199978828, "learning_rate": 7.12351342864678e-05, "loss": 0.0001, "step": 538 }, { "epoch": 0.2086720867208672, "grad_norm": 0.0217526163905859, "learning_rate": 7.113893408197092e-05, "loss": 0.0002, "step": 539 }, { "epoch": 0.20905923344947736, "grad_norm": 0.013393939472734928, "learning_rate": 7.104263849988976e-05, "loss": 0.0002, "step": 540 }, { "epoch": 0.2094463801780875, "grad_norm": 0.01084110513329506, "learning_rate": 7.094624797470407e-05, "loss": 0.0002, "step": 541 }, { "epoch": 0.20983352690669763, "grad_norm": 0.003147432114928961, "learning_rate": 7.084976294132208e-05, "loss": 0.0001, "step": 542 }, { "epoch": 0.21022067363530778, "grad_norm": 0.0023937560617923737, "learning_rate": 7.075318383507836e-05, "loss": 0.0001, "step": 543 }, { "epoch": 0.21060782036391792, "grad_norm": 0.0013290519127622247, "learning_rate": 7.065651109173197e-05, "loss": 0.0, "step": 544 }, { "epoch": 0.21099496709252807, "grad_norm": 0.16888383030891418, "learning_rate": 7.055974514746446e-05, "loss": 0.0018, "step": 545 }, { "epoch": 0.21138211382113822, "grad_norm": 0.014458446763455868, "learning_rate": 7.046288643887784e-05, "loss": 0.0001, "step": 546 }, { "epoch": 0.21176926054974834, "grad_norm": 0.002114094328135252, "learning_rate": 7.036593540299274e-05, "loss": 0.0, "step": 547 }, { "epoch": 0.2121564072783585, "grad_norm": 0.0411420576274395, "learning_rate": 7.026889247724635e-05, "loss": 0.0005, "step": 548 }, { "epoch": 0.21254355400696864, "grad_norm": 0.0021586837247014046, "learning_rate": 7.017175809949044e-05, "loss": 0.0001, "step": 549 }, { "epoch": 0.2129307007355788, "grad_norm": 0.08301404863595963, "learning_rate": 7.007453270798937e-05, "loss": 0.0017, "step": 550 }, { "epoch": 0.21331784746418891, "grad_norm": 0.0510292649269104, "learning_rate": 6.997721674141823e-05, "loss": 0.01, "step": 551 }, { "epoch": 0.21370499419279906, "grad_norm": 0.010133273899555206, "learning_rate": 6.987981063886074e-05, "loss": 0.0006, "step": 552 }, { "epoch": 0.2140921409214092, "grad_norm": 0.001888690865598619, "learning_rate": 6.978231483980728e-05, "loss": 0.0001, "step": 553 }, { "epoch": 0.21447928765001936, "grad_norm": 0.017150606960058212, "learning_rate": 6.968472978415301e-05, "loss": 0.0002, "step": 554 }, { "epoch": 0.2148664343786295, "grad_norm": 0.0014273010892793536, "learning_rate": 6.95870559121957e-05, "loss": 0.0, "step": 555 }, { "epoch": 0.21525358110723963, "grad_norm": 0.009298505261540413, "learning_rate": 6.948929366463396e-05, "loss": 0.0003, "step": 556 }, { "epoch": 0.21564072783584978, "grad_norm": 0.01824180595576763, "learning_rate": 6.939144348256511e-05, "loss": 0.0002, "step": 557 }, { "epoch": 0.21602787456445993, "grad_norm": 0.008332878351211548, "learning_rate": 6.92935058074832e-05, "loss": 0.0001, "step": 558 }, { "epoch": 0.21641502129307008, "grad_norm": 0.0005384304095059633, "learning_rate": 6.919548108127705e-05, "loss": 0.0, "step": 559 }, { "epoch": 0.21680216802168023, "grad_norm": 0.9911210536956787, "learning_rate": 6.909736974622827e-05, "loss": 0.1797, "step": 560 }, { "epoch": 0.21718931475029035, "grad_norm": 0.002593154553323984, "learning_rate": 6.899917224500925e-05, "loss": 0.0, "step": 561 }, { "epoch": 0.2175764614789005, "grad_norm": 0.025043247267603874, "learning_rate": 6.890088902068111e-05, "loss": 0.0001, "step": 562 }, { "epoch": 0.21796360820751065, "grad_norm": 0.0018312917090952396, "learning_rate": 6.88025205166918e-05, "loss": 0.0, "step": 563 }, { "epoch": 0.2183507549361208, "grad_norm": 0.0029724184423685074, "learning_rate": 6.870406717687402e-05, "loss": 0.0, "step": 564 }, { "epoch": 0.21873790166473092, "grad_norm": 0.0035826992243528366, "learning_rate": 6.860552944544325e-05, "loss": 0.0001, "step": 565 }, { "epoch": 0.21912504839334107, "grad_norm": 0.103156678378582, "learning_rate": 6.850690776699573e-05, "loss": 0.0007, "step": 566 }, { "epoch": 0.21951219512195122, "grad_norm": 0.003371761180460453, "learning_rate": 6.840820258650646e-05, "loss": 0.0001, "step": 567 }, { "epoch": 0.21989934185056137, "grad_norm": 0.0007010716944932938, "learning_rate": 6.830941434932726e-05, "loss": 0.0, "step": 568 }, { "epoch": 0.22028648857917152, "grad_norm": 0.061280257999897, "learning_rate": 6.821054350118458e-05, "loss": 0.0003, "step": 569 }, { "epoch": 0.22067363530778164, "grad_norm": 0.000763554242439568, "learning_rate": 6.811159048817772e-05, "loss": 0.0, "step": 570 }, { "epoch": 0.2210607820363918, "grad_norm": 0.028872434049844742, "learning_rate": 6.801255575677663e-05, "loss": 0.0002, "step": 571 }, { "epoch": 0.22144792876500194, "grad_norm": 0.005142708774656057, "learning_rate": 6.791343975382e-05, "loss": 0.0, "step": 572 }, { "epoch": 0.2218350754936121, "grad_norm": 0.0014610120560973883, "learning_rate": 6.78142429265132e-05, "loss": 0.0, "step": 573 }, { "epoch": 0.2222222222222222, "grad_norm": 0.0011419376824051142, "learning_rate": 6.771496572242627e-05, "loss": 0.0, "step": 574 }, { "epoch": 0.22260936895083236, "grad_norm": 0.005002702586352825, "learning_rate": 6.761560858949193e-05, "loss": 0.0001, "step": 575 }, { "epoch": 0.2229965156794425, "grad_norm": 0.001541085192002356, "learning_rate": 6.75161719760035e-05, "loss": 0.0, "step": 576 }, { "epoch": 0.22338366240805266, "grad_norm": 0.0024027032777667046, "learning_rate": 6.741665633061297e-05, "loss": 0.0, "step": 577 }, { "epoch": 0.2237708091366628, "grad_norm": 0.014390048570930958, "learning_rate": 6.731706210232883e-05, "loss": 0.0, "step": 578 }, { "epoch": 0.22415795586527293, "grad_norm": 0.005124339833855629, "learning_rate": 6.72173897405142e-05, "loss": 0.0001, "step": 579 }, { "epoch": 0.22454510259388308, "grad_norm": 0.005504544824361801, "learning_rate": 6.711763969488471e-05, "loss": 0.0, "step": 580 }, { "epoch": 0.22493224932249323, "grad_norm": 0.0024512598756700754, "learning_rate": 6.701781241550649e-05, "loss": 0.0, "step": 581 }, { "epoch": 0.22531939605110338, "grad_norm": 0.0021401650737971067, "learning_rate": 6.691790835279417e-05, "loss": 0.0, "step": 582 }, { "epoch": 0.2257065427797135, "grad_norm": 0.0038466479163616896, "learning_rate": 6.681792795750875e-05, "loss": 0.0, "step": 583 }, { "epoch": 0.22609368950832365, "grad_norm": 0.006864097900688648, "learning_rate": 6.671787168075577e-05, "loss": 0.0001, "step": 584 }, { "epoch": 0.2264808362369338, "grad_norm": 0.00382824894040823, "learning_rate": 6.661773997398298e-05, "loss": 0.0001, "step": 585 }, { "epoch": 0.22686798296554395, "grad_norm": 0.0022931089624762535, "learning_rate": 6.651753328897862e-05, "loss": 0.0, "step": 586 }, { "epoch": 0.2272551296941541, "grad_norm": 0.01140713132917881, "learning_rate": 6.64172520778691e-05, "loss": 0.0001, "step": 587 }, { "epoch": 0.22764227642276422, "grad_norm": 0.0010691037168726325, "learning_rate": 6.631689679311716e-05, "loss": 0.0, "step": 588 }, { "epoch": 0.22802942315137437, "grad_norm": 0.009856486693024635, "learning_rate": 6.621646788751978e-05, "loss": 0.0001, "step": 589 }, { "epoch": 0.22841656987998452, "grad_norm": 0.0005757238250225782, "learning_rate": 6.6115965814206e-05, "loss": 0.0, "step": 590 }, { "epoch": 0.22880371660859466, "grad_norm": 0.032714247703552246, "learning_rate": 6.60153910266351e-05, "loss": 0.0001, "step": 591 }, { "epoch": 0.22919086333720481, "grad_norm": 0.0019227054435759783, "learning_rate": 6.591474397859438e-05, "loss": 0.0, "step": 592 }, { "epoch": 0.22957801006581494, "grad_norm": 0.020541738718748093, "learning_rate": 6.581402512419724e-05, "loss": 0.0002, "step": 593 }, { "epoch": 0.22996515679442509, "grad_norm": 0.000846848648507148, "learning_rate": 6.571323491788098e-05, "loss": 0.0, "step": 594 }, { "epoch": 0.23035230352303523, "grad_norm": 0.006502535659819841, "learning_rate": 6.561237381440491e-05, "loss": 0.0001, "step": 595 }, { "epoch": 0.23073945025164538, "grad_norm": 0.002776126377284527, "learning_rate": 6.551144226884816e-05, "loss": 0.0, "step": 596 }, { "epoch": 0.2311265969802555, "grad_norm": 0.008968628011643887, "learning_rate": 6.541044073660773e-05, "loss": 0.0001, "step": 597 }, { "epoch": 0.23151374370886565, "grad_norm": 0.0025781530421227217, "learning_rate": 6.530936967339641e-05, "loss": 0.0, "step": 598 }, { "epoch": 0.2319008904374758, "grad_norm": 0.012835104949772358, "learning_rate": 6.520822953524064e-05, "loss": 0.0001, "step": 599 }, { "epoch": 0.23228803716608595, "grad_norm": 0.0038900829385966063, "learning_rate": 6.510702077847863e-05, "loss": 0.0001, "step": 600 }, { "epoch": 0.2326751838946961, "grad_norm": 0.02674148790538311, "learning_rate": 6.500574385975811e-05, "loss": 0.0034, "step": 601 }, { "epoch": 0.23306233062330622, "grad_norm": 0.013194573111832142, "learning_rate": 6.490439923603436e-05, "loss": 0.0003, "step": 602 }, { "epoch": 0.23344947735191637, "grad_norm": 0.0062402160838246346, "learning_rate": 6.480298736456814e-05, "loss": 0.0001, "step": 603 }, { "epoch": 0.23383662408052652, "grad_norm": 0.009061329998075962, "learning_rate": 6.470150870292369e-05, "loss": 0.0001, "step": 604 }, { "epoch": 0.23422377080913667, "grad_norm": 0.00190588622353971, "learning_rate": 6.459996370896653e-05, "loss": 0.0, "step": 605 }, { "epoch": 0.2346109175377468, "grad_norm": 0.003754197619855404, "learning_rate": 6.449835284086147e-05, "loss": 0.0001, "step": 606 }, { "epoch": 0.23499806426635694, "grad_norm": 0.005926064681261778, "learning_rate": 6.439667655707062e-05, "loss": 0.0001, "step": 607 }, { "epoch": 0.2353852109949671, "grad_norm": 0.004422381520271301, "learning_rate": 6.429493531635115e-05, "loss": 0.0001, "step": 608 }, { "epoch": 0.23577235772357724, "grad_norm": 0.01841561682522297, "learning_rate": 6.419312957775334e-05, "loss": 0.0003, "step": 609 }, { "epoch": 0.2361595044521874, "grad_norm": 0.0049548279494047165, "learning_rate": 6.409125980061853e-05, "loss": 0.0001, "step": 610 }, { "epoch": 0.2365466511807975, "grad_norm": 0.002091424772515893, "learning_rate": 6.39893264445769e-05, "loss": 0.0, "step": 611 }, { "epoch": 0.23693379790940766, "grad_norm": 0.0005297800526022911, "learning_rate": 6.388732996954559e-05, "loss": 0.0, "step": 612 }, { "epoch": 0.2373209446380178, "grad_norm": 0.0013506063260138035, "learning_rate": 6.378527083572646e-05, "loss": 0.0, "step": 613 }, { "epoch": 0.23770809136662796, "grad_norm": 0.0941486805677414, "learning_rate": 6.368314950360415e-05, "loss": 0.0005, "step": 614 }, { "epoch": 0.23809523809523808, "grad_norm": 0.0013530774740502238, "learning_rate": 6.358096643394387e-05, "loss": 0.0, "step": 615 }, { "epoch": 0.23848238482384823, "grad_norm": 0.00040758828981779516, "learning_rate": 6.34787220877894e-05, "loss": 0.0, "step": 616 }, { "epoch": 0.23886953155245838, "grad_norm": 0.0036245647352188826, "learning_rate": 6.337641692646106e-05, "loss": 0.0, "step": 617 }, { "epoch": 0.23925667828106853, "grad_norm": 0.002493639010936022, "learning_rate": 6.327405141155344e-05, "loss": 0.0, "step": 618 }, { "epoch": 0.23964382500967868, "grad_norm": 0.01201645378023386, "learning_rate": 6.317162600493357e-05, "loss": 0.0002, "step": 619 }, { "epoch": 0.2400309717382888, "grad_norm": 0.0034420141018927097, "learning_rate": 6.306914116873863e-05, "loss": 0.0001, "step": 620 }, { "epoch": 0.24041811846689895, "grad_norm": 0.08557692170143127, "learning_rate": 6.2966597365374e-05, "loss": 0.0005, "step": 621 }, { "epoch": 0.2408052651955091, "grad_norm": 0.0024753545876592398, "learning_rate": 6.286399505751102e-05, "loss": 0.0001, "step": 622 }, { "epoch": 0.24119241192411925, "grad_norm": 0.19629161059856415, "learning_rate": 6.276133470808509e-05, "loss": 0.0077, "step": 623 }, { "epoch": 0.2415795586527294, "grad_norm": 0.0015175496228039265, "learning_rate": 6.265861678029347e-05, "loss": 0.0, "step": 624 }, { "epoch": 0.24196670538133952, "grad_norm": 0.0008467718726024032, "learning_rate": 6.255584173759319e-05, "loss": 0.0, "step": 625 }, { "epoch": 0.24235385210994967, "grad_norm": 0.0015436846297234297, "learning_rate": 6.2453010043699e-05, "loss": 0.0, "step": 626 }, { "epoch": 0.24274099883855982, "grad_norm": 0.005769604351371527, "learning_rate": 6.235012216258121e-05, "loss": 0.0001, "step": 627 }, { "epoch": 0.24312814556716997, "grad_norm": 0.009738071821630001, "learning_rate": 6.224717855846374e-05, "loss": 0.0002, "step": 628 }, { "epoch": 0.2435152922957801, "grad_norm": 0.002359709469601512, "learning_rate": 6.214417969582181e-05, "loss": 0.0001, "step": 629 }, { "epoch": 0.24390243902439024, "grad_norm": 0.001280121854506433, "learning_rate": 6.204112603938006e-05, "loss": 0.0, "step": 630 }, { "epoch": 0.2442895857530004, "grad_norm": 0.002594890771433711, "learning_rate": 6.193801805411028e-05, "loss": 0.0, "step": 631 }, { "epoch": 0.24467673248161054, "grad_norm": 0.0023838337510824203, "learning_rate": 6.183485620522947e-05, "loss": 0.0, "step": 632 }, { "epoch": 0.2450638792102207, "grad_norm": 0.011900877580046654, "learning_rate": 6.173164095819758e-05, "loss": 0.0001, "step": 633 }, { "epoch": 0.2454510259388308, "grad_norm": 0.07067535817623138, "learning_rate": 6.162837277871553e-05, "loss": 0.0003, "step": 634 }, { "epoch": 0.24583817266744096, "grad_norm": 0.00056896434398368, "learning_rate": 6.152505213272307e-05, "loss": 0.0, "step": 635 }, { "epoch": 0.2462253193960511, "grad_norm": 0.002547027077525854, "learning_rate": 6.142167948639667e-05, "loss": 0.0, "step": 636 }, { "epoch": 0.24661246612466126, "grad_norm": 0.004246006719768047, "learning_rate": 6.131825530614741e-05, "loss": 0.0, "step": 637 }, { "epoch": 0.24699961285327138, "grad_norm": 0.0010026529198512435, "learning_rate": 6.121478005861891e-05, "loss": 0.0, "step": 638 }, { "epoch": 0.24738675958188153, "grad_norm": 0.0007125648553483188, "learning_rate": 6.111125421068518e-05, "loss": 0.0, "step": 639 }, { "epoch": 0.24777390631049168, "grad_norm": 0.0025278409011662006, "learning_rate": 6.100767822944856e-05, "loss": 0.0, "step": 640 }, { "epoch": 0.24816105303910183, "grad_norm": 0.0011645531048998237, "learning_rate": 6.090405258223756e-05, "loss": 0.0, "step": 641 }, { "epoch": 0.24854819976771197, "grad_norm": 0.018444223329424858, "learning_rate": 6.0800377736604806e-05, "loss": 0.0002, "step": 642 }, { "epoch": 0.2489353464963221, "grad_norm": 0.00047034319140948355, "learning_rate": 6.069665416032487e-05, "loss": 0.0, "step": 643 }, { "epoch": 0.24932249322493225, "grad_norm": 0.0012211957946419716, "learning_rate": 6.059288232139225e-05, "loss": 0.0, "step": 644 }, { "epoch": 0.2497096399535424, "grad_norm": 0.0005616419948637486, "learning_rate": 6.0489062688019146e-05, "loss": 0.0, "step": 645 }, { "epoch": 0.2500967866821525, "grad_norm": 0.0016151750460267067, "learning_rate": 6.038519572863346e-05, "loss": 0.0, "step": 646 }, { "epoch": 0.2504839334107627, "grad_norm": 0.008833995088934898, "learning_rate": 6.028128191187654e-05, "loss": 0.0002, "step": 647 }, { "epoch": 0.2508710801393728, "grad_norm": 0.004646562971174717, "learning_rate": 6.017732170660123e-05, "loss": 0.0001, "step": 648 }, { "epoch": 0.251258226867983, "grad_norm": 0.006479341071099043, "learning_rate": 6.007331558186967e-05, "loss": 0.0001, "step": 649 }, { "epoch": 0.2516453735965931, "grad_norm": 0.020141808316111565, "learning_rate": 5.996926400695113e-05, "loss": 0.0002, "step": 650 }, { "epoch": 0.25203252032520324, "grad_norm": 0.03065257892012596, "learning_rate": 5.9865167451320005e-05, "loss": 0.003, "step": 651 }, { "epoch": 0.2524196670538134, "grad_norm": 0.009445116855204105, "learning_rate": 5.9761026384653616e-05, "loss": 0.0002, "step": 652 }, { "epoch": 0.25280681378242353, "grad_norm": 0.03650001436471939, "learning_rate": 5.9656841276830133e-05, "loss": 0.0008, "step": 653 }, { "epoch": 0.25319396051103366, "grad_norm": 0.0038452832959592342, "learning_rate": 5.9552612597926415e-05, "loss": 0.0001, "step": 654 }, { "epoch": 0.25358110723964383, "grad_norm": 0.001224176143296063, "learning_rate": 5.9448340818215884e-05, "loss": 0.0, "step": 655 }, { "epoch": 0.25396825396825395, "grad_norm": 0.002449088031426072, "learning_rate": 5.934402640816652e-05, "loss": 0.0001, "step": 656 }, { "epoch": 0.25435540069686413, "grad_norm": 0.014086930081248283, "learning_rate": 5.923966983843856e-05, "loss": 0.0001, "step": 657 }, { "epoch": 0.25474254742547425, "grad_norm": 0.00690666725859046, "learning_rate": 5.913527157988252e-05, "loss": 0.0001, "step": 658 }, { "epoch": 0.2551296941540844, "grad_norm": 0.11766976863145828, "learning_rate": 5.903083210353696e-05, "loss": 0.0008, "step": 659 }, { "epoch": 0.25551684088269455, "grad_norm": 0.0009361839038319886, "learning_rate": 5.892635188062646e-05, "loss": 0.0, "step": 660 }, { "epoch": 0.2559039876113047, "grad_norm": 0.0004829127574339509, "learning_rate": 5.882183138255945e-05, "loss": 0.0, "step": 661 }, { "epoch": 0.25629113433991485, "grad_norm": 0.02990470826625824, "learning_rate": 5.8717271080926004e-05, "loss": 0.0003, "step": 662 }, { "epoch": 0.25667828106852497, "grad_norm": 0.0015614639269188046, "learning_rate": 5.8612671447495884e-05, "loss": 0.0, "step": 663 }, { "epoch": 0.2570654277971351, "grad_norm": 0.0005513576907105744, "learning_rate": 5.850803295421625e-05, "loss": 0.0, "step": 664 }, { "epoch": 0.25745257452574527, "grad_norm": 0.017185453325510025, "learning_rate": 5.840335607320964e-05, "loss": 0.0001, "step": 665 }, { "epoch": 0.2578397212543554, "grad_norm": 0.010216176509857178, "learning_rate": 5.8298641276771746e-05, "loss": 0.0001, "step": 666 }, { "epoch": 0.25822686798296557, "grad_norm": 0.000682777666952461, "learning_rate": 5.819388903736937e-05, "loss": 0.0, "step": 667 }, { "epoch": 0.2586140147115757, "grad_norm": 0.0019596980419009924, "learning_rate": 5.808909982763825e-05, "loss": 0.0, "step": 668 }, { "epoch": 0.2590011614401858, "grad_norm": 0.0015184786170721054, "learning_rate": 5.79842741203809e-05, "loss": 0.0, "step": 669 }, { "epoch": 0.259388308168796, "grad_norm": 0.0013025728985667229, "learning_rate": 5.787941238856456e-05, "loss": 0.0, "step": 670 }, { "epoch": 0.2597754548974061, "grad_norm": 0.0020054916385561228, "learning_rate": 5.777451510531895e-05, "loss": 0.0, "step": 671 }, { "epoch": 0.2601626016260163, "grad_norm": 0.00536265317350626, "learning_rate": 5.7669582743934284e-05, "loss": 0.0001, "step": 672 }, { "epoch": 0.2605497483546264, "grad_norm": 0.00033802189864218235, "learning_rate": 5.756461577785892e-05, "loss": 0.0, "step": 673 }, { "epoch": 0.26093689508323653, "grad_norm": 0.00032143425778485835, "learning_rate": 5.7459614680697495e-05, "loss": 0.0, "step": 674 }, { "epoch": 0.2613240418118467, "grad_norm": 0.03394626826047897, "learning_rate": 5.735457992620851e-05, "loss": 0.0002, "step": 675 }, { "epoch": 0.26171118854045683, "grad_norm": 0.0029896190389990807, "learning_rate": 5.7249511988302415e-05, "loss": 0.0, "step": 676 }, { "epoch": 0.26209833526906695, "grad_norm": 0.0015679013449698687, "learning_rate": 5.714441134103936e-05, "loss": 0.0, "step": 677 }, { "epoch": 0.26248548199767713, "grad_norm": 0.00024444403243251145, "learning_rate": 5.7039278458627054e-05, "loss": 0.0, "step": 678 }, { "epoch": 0.26287262872628725, "grad_norm": 0.0015600993065163493, "learning_rate": 5.6934113815418665e-05, "loss": 0.0, "step": 679 }, { "epoch": 0.2632597754548974, "grad_norm": 0.0019227303564548492, "learning_rate": 5.682891788591066e-05, "loss": 0.0, "step": 680 }, { "epoch": 0.26364692218350755, "grad_norm": 0.00032327178632840514, "learning_rate": 5.67236911447407e-05, "loss": 0.0, "step": 681 }, { "epoch": 0.26403406891211767, "grad_norm": 0.001297106733545661, "learning_rate": 5.661843406668541e-05, "loss": 0.0, "step": 682 }, { "epoch": 0.26442121564072785, "grad_norm": 0.00023812698782421649, "learning_rate": 5.651314712665833e-05, "loss": 0.0, "step": 683 }, { "epoch": 0.26480836236933797, "grad_norm": 0.012247363105416298, "learning_rate": 5.640783079970774e-05, "loss": 0.0001, "step": 684 }, { "epoch": 0.26519550909794815, "grad_norm": 0.0006516319117508829, "learning_rate": 5.6302485561014475e-05, "loss": 0.0, "step": 685 }, { "epoch": 0.26558265582655827, "grad_norm": 0.00017600289720576257, "learning_rate": 5.6197111885889867e-05, "loss": 0.0, "step": 686 }, { "epoch": 0.2659698025551684, "grad_norm": 0.00019459190662018955, "learning_rate": 5.609171024977348e-05, "loss": 0.0, "step": 687 }, { "epoch": 0.26635694928377857, "grad_norm": 0.00027738159405998886, "learning_rate": 5.598628112823114e-05, "loss": 0.0, "step": 688 }, { "epoch": 0.2667440960123887, "grad_norm": 0.000665661646053195, "learning_rate": 5.588082499695261e-05, "loss": 0.0, "step": 689 }, { "epoch": 0.26713124274099886, "grad_norm": 0.0010483519872650504, "learning_rate": 5.577534233174952e-05, "loss": 0.0, "step": 690 }, { "epoch": 0.267518389469609, "grad_norm": 0.003276185365393758, "learning_rate": 5.5669833608553244e-05, "loss": 0.0, "step": 691 }, { "epoch": 0.2679055361982191, "grad_norm": 0.0013291811337694526, "learning_rate": 5.5564299303412734e-05, "loss": 0.0, "step": 692 }, { "epoch": 0.2682926829268293, "grad_norm": 0.00019930455891881138, "learning_rate": 5.545873989249235e-05, "loss": 0.0, "step": 693 }, { "epoch": 0.2686798296554394, "grad_norm": 0.0002764319651760161, "learning_rate": 5.535315585206972e-05, "loss": 0.0, "step": 694 }, { "epoch": 0.2690669763840495, "grad_norm": 0.0007317042327485979, "learning_rate": 5.52475476585336e-05, "loss": 0.0, "step": 695 }, { "epoch": 0.2694541231126597, "grad_norm": 2.0606796741485596, "learning_rate": 5.514191578838177e-05, "loss": 0.0594, "step": 696 }, { "epoch": 0.2698412698412698, "grad_norm": 0.029833437874913216, "learning_rate": 5.503626071821877e-05, "loss": 0.0002, "step": 697 }, { "epoch": 0.27022841656988, "grad_norm": 0.0008992796647362411, "learning_rate": 5.493058292475387e-05, "loss": 0.0, "step": 698 }, { "epoch": 0.2706155632984901, "grad_norm": 0.0011732947314158082, "learning_rate": 5.48248828847988e-05, "loss": 0.0, "step": 699 }, { "epoch": 0.27100271002710025, "grad_norm": 0.593447744846344, "learning_rate": 5.4719161075265765e-05, "loss": 0.0047, "step": 700 }, { "epoch": 0.2713898567557104, "grad_norm": 0.05254895240068436, "learning_rate": 5.4613417973165106e-05, "loss": 0.0064, "step": 701 }, { "epoch": 0.27177700348432055, "grad_norm": 0.0053640869446098804, "learning_rate": 5.4507654055603275e-05, "loss": 0.0002, "step": 702 }, { "epoch": 0.2721641502129307, "grad_norm": 0.013144250027835369, "learning_rate": 5.440186979978061e-05, "loss": 0.0002, "step": 703 }, { "epoch": 0.27255129694154084, "grad_norm": 0.005304619669914246, "learning_rate": 5.429606568298926e-05, "loss": 0.0001, "step": 704 }, { "epoch": 0.27293844367015097, "grad_norm": 0.006235075183212757, "learning_rate": 5.4190242182610976e-05, "loss": 0.0001, "step": 705 }, { "epoch": 0.27332559039876114, "grad_norm": 0.007272032089531422, "learning_rate": 5.4084399776114915e-05, "loss": 0.0002, "step": 706 }, { "epoch": 0.27371273712737126, "grad_norm": 0.008517012931406498, "learning_rate": 5.397853894105559e-05, "loss": 0.0001, "step": 707 }, { "epoch": 0.27409988385598144, "grad_norm": 0.10108551383018494, "learning_rate": 5.387266015507065e-05, "loss": 0.006, "step": 708 }, { "epoch": 0.27448703058459156, "grad_norm": 0.017542164772748947, "learning_rate": 5.376676389587875e-05, "loss": 0.0002, "step": 709 }, { "epoch": 0.2748741773132017, "grad_norm": 0.0033126119524240494, "learning_rate": 5.366085064127734e-05, "loss": 0.0001, "step": 710 }, { "epoch": 0.27526132404181186, "grad_norm": 0.004340835381299257, "learning_rate": 5.355492086914059e-05, "loss": 0.0, "step": 711 }, { "epoch": 0.275648470770422, "grad_norm": 0.0020051731262356043, "learning_rate": 5.344897505741719e-05, "loss": 0.0, "step": 712 }, { "epoch": 0.27603561749903216, "grad_norm": 0.0017423669341951609, "learning_rate": 5.3343013684128206e-05, "loss": 0.0, "step": 713 }, { "epoch": 0.2764227642276423, "grad_norm": 0.004165020305663347, "learning_rate": 5.323703722736489e-05, "loss": 0.0001, "step": 714 }, { "epoch": 0.2768099109562524, "grad_norm": 0.004314817953854799, "learning_rate": 5.3131046165286556e-05, "loss": 0.0001, "step": 715 }, { "epoch": 0.2771970576848626, "grad_norm": 0.009306171908974648, "learning_rate": 5.302504097611847e-05, "loss": 0.0001, "step": 716 }, { "epoch": 0.2775842044134727, "grad_norm": 0.002385742263868451, "learning_rate": 5.2919022138149555e-05, "loss": 0.0, "step": 717 }, { "epoch": 0.2779713511420828, "grad_norm": 0.04833089932799339, "learning_rate": 5.2812990129730363e-05, "loss": 0.0003, "step": 718 }, { "epoch": 0.278358497870693, "grad_norm": 0.001545402454212308, "learning_rate": 5.270694542927088e-05, "loss": 0.0, "step": 719 }, { "epoch": 0.2787456445993031, "grad_norm": 0.0023753156419843435, "learning_rate": 5.260088851523833e-05, "loss": 0.0, "step": 720 }, { "epoch": 0.2791327913279133, "grad_norm": 0.13964203000068665, "learning_rate": 5.2494819866155065e-05, "loss": 0.0037, "step": 721 }, { "epoch": 0.2795199380565234, "grad_norm": 0.0010260421549901366, "learning_rate": 5.2388739960596364e-05, "loss": 0.0, "step": 722 }, { "epoch": 0.27990708478513354, "grad_norm": 0.0009256897028535604, "learning_rate": 5.22826492771883e-05, "loss": 0.0, "step": 723 }, { "epoch": 0.2802942315137437, "grad_norm": 0.004000549204647541, "learning_rate": 5.217654829460561e-05, "loss": 0.0001, "step": 724 }, { "epoch": 0.28068137824235384, "grad_norm": 0.0011285142973065376, "learning_rate": 5.207043749156945e-05, "loss": 0.0, "step": 725 }, { "epoch": 0.281068524970964, "grad_norm": 0.10327895730733871, "learning_rate": 5.19643173468453e-05, "loss": 0.0009, "step": 726 }, { "epoch": 0.28145567169957414, "grad_norm": 0.000834814622066915, "learning_rate": 5.185818833924081e-05, "loss": 0.0, "step": 727 }, { "epoch": 0.28184281842818426, "grad_norm": 0.006214594002813101, "learning_rate": 5.1752050947603614e-05, "loss": 0.0, "step": 728 }, { "epoch": 0.28222996515679444, "grad_norm": 0.0009311099420301616, "learning_rate": 5.164590565081914e-05, "loss": 0.0, "step": 729 }, { "epoch": 0.28261711188540456, "grad_norm": 0.1762949526309967, "learning_rate": 5.153975292780853e-05, "loss": 0.0019, "step": 730 }, { "epoch": 0.28300425861401474, "grad_norm": 0.025049149990081787, "learning_rate": 5.143359325752638e-05, "loss": 0.0001, "step": 731 }, { "epoch": 0.28339140534262486, "grad_norm": 0.002675428753718734, "learning_rate": 5.13274271189587e-05, "loss": 0.0, "step": 732 }, { "epoch": 0.283778552071235, "grad_norm": 0.001739859115332365, "learning_rate": 5.122125499112063e-05, "loss": 0.0, "step": 733 }, { "epoch": 0.28416569879984516, "grad_norm": 0.13101710379123688, "learning_rate": 5.1115077353054356e-05, "loss": 0.0008, "step": 734 }, { "epoch": 0.2845528455284553, "grad_norm": 0.008799294009804726, "learning_rate": 5.10088946838269e-05, "loss": 0.0001, "step": 735 }, { "epoch": 0.28493999225706546, "grad_norm": 0.00784998293966055, "learning_rate": 5.090270746252802e-05, "loss": 0.0001, "step": 736 }, { "epoch": 0.2853271389856756, "grad_norm": 0.012120920233428478, "learning_rate": 5.079651616826802e-05, "loss": 0.0002, "step": 737 }, { "epoch": 0.2857142857142857, "grad_norm": 0.033871788531541824, "learning_rate": 5.069032128017551e-05, "loss": 0.0002, "step": 738 }, { "epoch": 0.2861014324428959, "grad_norm": 0.05213552340865135, "learning_rate": 5.05841232773954e-05, "loss": 0.0006, "step": 739 }, { "epoch": 0.286488579171506, "grad_norm": 0.0012123180786147714, "learning_rate": 5.0477922639086596e-05, "loss": 0.0, "step": 740 }, { "epoch": 0.2868757259001161, "grad_norm": 0.01266507152467966, "learning_rate": 5.037171984441994e-05, "loss": 0.0001, "step": 741 }, { "epoch": 0.2872628726287263, "grad_norm": 0.0523063987493515, "learning_rate": 5.0265515372575956e-05, "loss": 0.0005, "step": 742 }, { "epoch": 0.2876500193573364, "grad_norm": 0.001839546370320022, "learning_rate": 5.015930970274277e-05, "loss": 0.0, "step": 743 }, { "epoch": 0.2880371660859466, "grad_norm": 0.0016663463320583105, "learning_rate": 5.00531033141139e-05, "loss": 0.0, "step": 744 }, { "epoch": 0.2884243128145567, "grad_norm": 0.0012716924538835883, "learning_rate": 4.99468966858861e-05, "loss": 0.0, "step": 745 }, { "epoch": 0.28881145954316684, "grad_norm": 0.0038500288501381874, "learning_rate": 4.984069029725723e-05, "loss": 0.0, "step": 746 }, { "epoch": 0.289198606271777, "grad_norm": 0.3896702229976654, "learning_rate": 4.973448462742405e-05, "loss": 0.0008, "step": 747 }, { "epoch": 0.28958575300038714, "grad_norm": 0.0010479100747033954, "learning_rate": 4.962828015558006e-05, "loss": 0.0, "step": 748 }, { "epoch": 0.2899728997289973, "grad_norm": 0.011884471401572227, "learning_rate": 4.952207736091341e-05, "loss": 0.0001, "step": 749 }, { "epoch": 0.29036004645760743, "grad_norm": 0.2021235078573227, "learning_rate": 4.941587672260461e-05, "loss": 0.0033, "step": 750 }, { "epoch": 0.29074719318621756, "grad_norm": 0.045500051230192184, "learning_rate": 4.930967871982451e-05, "loss": 0.0055, "step": 751 }, { "epoch": 0.29113433991482773, "grad_norm": 0.0288743507117033, "learning_rate": 4.9203483831732006e-05, "loss": 0.0007, "step": 752 }, { "epoch": 0.29152148664343785, "grad_norm": 0.020009569823741913, "learning_rate": 4.909729253747197e-05, "loss": 0.0002, "step": 753 }, { "epoch": 0.29190863337204803, "grad_norm": 0.006925898138433695, "learning_rate": 4.899110531617311e-05, "loss": 0.0001, "step": 754 }, { "epoch": 0.29229578010065815, "grad_norm": 0.03738551586866379, "learning_rate": 4.8884922646945656e-05, "loss": 0.0006, "step": 755 }, { "epoch": 0.2926829268292683, "grad_norm": 0.0020933786872774363, "learning_rate": 4.877874500887938e-05, "loss": 0.0001, "step": 756 }, { "epoch": 0.29307007355787845, "grad_norm": 0.002445946214720607, "learning_rate": 4.867257288104131e-05, "loss": 0.0, "step": 757 }, { "epoch": 0.2934572202864886, "grad_norm": 0.01913662999868393, "learning_rate": 4.856640674247363e-05, "loss": 0.0002, "step": 758 }, { "epoch": 0.2938443670150987, "grad_norm": 0.0019830456003546715, "learning_rate": 4.8460247072191496e-05, "loss": 0.0, "step": 759 }, { "epoch": 0.2942315137437089, "grad_norm": 0.0007783794426359236, "learning_rate": 4.8354094349180885e-05, "loss": 0.0, "step": 760 }, { "epoch": 0.294618660472319, "grad_norm": 0.0003296427894383669, "learning_rate": 4.82479490523964e-05, "loss": 0.0, "step": 761 }, { "epoch": 0.29500580720092917, "grad_norm": 0.0009307400323450565, "learning_rate": 4.8141811660759185e-05, "loss": 0.0, "step": 762 }, { "epoch": 0.2953929539295393, "grad_norm": 0.0011843329994007945, "learning_rate": 4.8035682653154704e-05, "loss": 0.0, "step": 763 }, { "epoch": 0.2957801006581494, "grad_norm": 0.001043383264914155, "learning_rate": 4.7929562508430556e-05, "loss": 0.0, "step": 764 }, { "epoch": 0.2961672473867596, "grad_norm": 0.00028426575590856373, "learning_rate": 4.782345170539441e-05, "loss": 0.0, "step": 765 }, { "epoch": 0.2965543941153697, "grad_norm": 0.17862246930599213, "learning_rate": 4.7717350722811705e-05, "loss": 0.0013, "step": 766 }, { "epoch": 0.2969415408439799, "grad_norm": 0.0018550584791228175, "learning_rate": 4.761126003940366e-05, "loss": 0.0, "step": 767 }, { "epoch": 0.29732868757259, "grad_norm": 0.0009288343135267496, "learning_rate": 4.7505180133844954e-05, "loss": 0.0, "step": 768 }, { "epoch": 0.29771583430120013, "grad_norm": 0.00047417895984835923, "learning_rate": 4.7399111484761674e-05, "loss": 0.0, "step": 769 }, { "epoch": 0.2981029810298103, "grad_norm": 0.0017211599042639136, "learning_rate": 4.729305457072913e-05, "loss": 0.0, "step": 770 }, { "epoch": 0.29849012775842043, "grad_norm": 0.0004953122115693986, "learning_rate": 4.718700987026964e-05, "loss": 0.0, "step": 771 }, { "epoch": 0.2988772744870306, "grad_norm": 0.0006206472753547132, "learning_rate": 4.708097786185046e-05, "loss": 0.0, "step": 772 }, { "epoch": 0.29926442121564073, "grad_norm": 0.00035984007990919054, "learning_rate": 4.697495902388154e-05, "loss": 0.0, "step": 773 }, { "epoch": 0.29965156794425085, "grad_norm": 0.0028558659832924604, "learning_rate": 4.6868953834713456e-05, "loss": 0.0, "step": 774 }, { "epoch": 0.30003871467286103, "grad_norm": 0.0013832409167662263, "learning_rate": 4.6762962772635125e-05, "loss": 0.0, "step": 775 }, { "epoch": 0.30042586140147115, "grad_norm": 0.0032838445622473955, "learning_rate": 4.665698631587182e-05, "loss": 0.0, "step": 776 }, { "epoch": 0.3008130081300813, "grad_norm": 0.0006759579991921782, "learning_rate": 4.655102494258281e-05, "loss": 0.0, "step": 777 }, { "epoch": 0.30120015485869145, "grad_norm": 0.0005392953753471375, "learning_rate": 4.644507913085942e-05, "loss": 0.0, "step": 778 }, { "epoch": 0.30158730158730157, "grad_norm": 0.005940568167716265, "learning_rate": 4.633914935872268e-05, "loss": 0.0, "step": 779 }, { "epoch": 0.30197444831591175, "grad_norm": 0.005116292275488377, "learning_rate": 4.623323610412126e-05, "loss": 0.0, "step": 780 }, { "epoch": 0.30236159504452187, "grad_norm": 0.0008851262973621488, "learning_rate": 4.612733984492936e-05, "loss": 0.0, "step": 781 }, { "epoch": 0.302748741773132, "grad_norm": 0.000602389860432595, "learning_rate": 4.602146105894442e-05, "loss": 0.0, "step": 782 }, { "epoch": 0.30313588850174217, "grad_norm": 0.0011187748750671744, "learning_rate": 4.591560022388511e-05, "loss": 0.0, "step": 783 }, { "epoch": 0.3035230352303523, "grad_norm": 0.0007747854106128216, "learning_rate": 4.580975781738905e-05, "loss": 0.0, "step": 784 }, { "epoch": 0.30391018195896247, "grad_norm": 0.021920830011367798, "learning_rate": 4.5703934317010735e-05, "loss": 0.0003, "step": 785 }, { "epoch": 0.3042973286875726, "grad_norm": 0.0004085338150616735, "learning_rate": 4.559813020021939e-05, "loss": 0.0, "step": 786 }, { "epoch": 0.3046844754161827, "grad_norm": 0.48235246539115906, "learning_rate": 4.549234594439674e-05, "loss": 0.0031, "step": 787 }, { "epoch": 0.3050716221447929, "grad_norm": 0.00026683625765144825, "learning_rate": 4.5386582026834906e-05, "loss": 0.0, "step": 788 }, { "epoch": 0.305458768873403, "grad_norm": 0.007353804539889097, "learning_rate": 4.528083892473424e-05, "loss": 0.0001, "step": 789 }, { "epoch": 0.3058459156020132, "grad_norm": 0.0011645752238109708, "learning_rate": 4.517511711520121e-05, "loss": 0.0, "step": 790 }, { "epoch": 0.3062330623306233, "grad_norm": 0.2283429056406021, "learning_rate": 4.506941707524615e-05, "loss": 0.0007, "step": 791 }, { "epoch": 0.30662020905923343, "grad_norm": 0.0008667742949910462, "learning_rate": 4.496373928178125e-05, "loss": 0.0, "step": 792 }, { "epoch": 0.3070073557878436, "grad_norm": 0.0012296324130147696, "learning_rate": 4.485808421161823e-05, "loss": 0.0, "step": 793 }, { "epoch": 0.3073945025164537, "grad_norm": 0.0036023801658302546, "learning_rate": 4.4752452341466395e-05, "loss": 0.0, "step": 794 }, { "epoch": 0.3077816492450639, "grad_norm": 0.001286810147576034, "learning_rate": 4.4646844147930294e-05, "loss": 0.0, "step": 795 }, { "epoch": 0.308168795973674, "grad_norm": 0.0008486775914207101, "learning_rate": 4.454126010750766e-05, "loss": 0.0, "step": 796 }, { "epoch": 0.30855594270228415, "grad_norm": 0.0011814788449555635, "learning_rate": 4.443570069658727e-05, "loss": 0.0, "step": 797 }, { "epoch": 0.3089430894308943, "grad_norm": 0.011497690342366695, "learning_rate": 4.433016639144676e-05, "loss": 0.0002, "step": 798 }, { "epoch": 0.30933023615950445, "grad_norm": 0.0019194751512259245, "learning_rate": 4.4224657668250495e-05, "loss": 0.0, "step": 799 }, { "epoch": 0.3097173828881146, "grad_norm": 1.5372258424758911, "learning_rate": 4.411917500304741e-05, "loss": 0.0304, "step": 800 }, { "epoch": 0.31010452961672474, "grad_norm": 0.04230941832065582, "learning_rate": 4.4013718871768855e-05, "loss": 0.0087, "step": 801 }, { "epoch": 0.31049167634533487, "grad_norm": 0.0006967206718400121, "learning_rate": 4.390828975022652e-05, "loss": 0.0, "step": 802 }, { "epoch": 0.31087882307394504, "grad_norm": 0.011427176184952259, "learning_rate": 4.380288811411015e-05, "loss": 0.0001, "step": 803 }, { "epoch": 0.31126596980255516, "grad_norm": 0.007503742817789316, "learning_rate": 4.3697514438985536e-05, "loss": 0.0, "step": 804 }, { "epoch": 0.3116531165311653, "grad_norm": 0.005879178177565336, "learning_rate": 4.359216920029227e-05, "loss": 0.0001, "step": 805 }, { "epoch": 0.31204026325977546, "grad_norm": 0.006521875504404306, "learning_rate": 4.3486852873341676e-05, "loss": 0.0001, "step": 806 }, { "epoch": 0.3124274099883856, "grad_norm": 0.0018767932197079062, "learning_rate": 4.33815659333146e-05, "loss": 0.0, "step": 807 }, { "epoch": 0.31281455671699576, "grad_norm": 0.006302808877080679, "learning_rate": 4.327630885525932e-05, "loss": 0.0001, "step": 808 }, { "epoch": 0.3132017034456059, "grad_norm": 0.0005288548418320715, "learning_rate": 4.3171082114089336e-05, "loss": 0.0, "step": 809 }, { "epoch": 0.313588850174216, "grad_norm": 0.000991316745057702, "learning_rate": 4.306588618458134e-05, "loss": 0.0, "step": 810 }, { "epoch": 0.3139759969028262, "grad_norm": 0.00023478269577026367, "learning_rate": 4.2960721541372964e-05, "loss": 0.0, "step": 811 }, { "epoch": 0.3143631436314363, "grad_norm": 0.007332727778702974, "learning_rate": 4.285558865896065e-05, "loss": 0.0, "step": 812 }, { "epoch": 0.3147502903600465, "grad_norm": 0.0019596214406192303, "learning_rate": 4.275048801169759e-05, "loss": 0.0, "step": 813 }, { "epoch": 0.3151374370886566, "grad_norm": 0.0015021011931821704, "learning_rate": 4.2645420073791496e-05, "loss": 0.0, "step": 814 }, { "epoch": 0.3155245838172667, "grad_norm": 0.0009233247837983072, "learning_rate": 4.254038531930253e-05, "loss": 0.0, "step": 815 }, { "epoch": 0.3159117305458769, "grad_norm": 0.00019637109653558582, "learning_rate": 4.2435384222141085e-05, "loss": 0.0, "step": 816 }, { "epoch": 0.316298877274487, "grad_norm": 0.00023914426856208593, "learning_rate": 4.233041725606572e-05, "loss": 0.0, "step": 817 }, { "epoch": 0.3166860240030972, "grad_norm": 0.00042257452150806785, "learning_rate": 4.222548489468105e-05, "loss": 0.0, "step": 818 }, { "epoch": 0.3170731707317073, "grad_norm": 0.0009111497784033418, "learning_rate": 4.2120587611435445e-05, "loss": 0.0, "step": 819 }, { "epoch": 0.31746031746031744, "grad_norm": 0.0006815852248109877, "learning_rate": 4.201572587961911e-05, "loss": 0.0, "step": 820 }, { "epoch": 0.3178474641889276, "grad_norm": 0.0004083358508069068, "learning_rate": 4.1910900172361764e-05, "loss": 0.0, "step": 821 }, { "epoch": 0.31823461091753774, "grad_norm": 0.001000471180304885, "learning_rate": 4.1806110962630644e-05, "loss": 0.0, "step": 822 }, { "epoch": 0.31862175764614786, "grad_norm": 0.0013421968324109912, "learning_rate": 4.1701358723228266e-05, "loss": 0.0, "step": 823 }, { "epoch": 0.31900890437475804, "grad_norm": 0.0023856195621192455, "learning_rate": 4.159664392679039e-05, "loss": 0.0, "step": 824 }, { "epoch": 0.31939605110336816, "grad_norm": 0.000275084690656513, "learning_rate": 4.1491967045783755e-05, "loss": 0.0, "step": 825 }, { "epoch": 0.31978319783197834, "grad_norm": 0.000643594772554934, "learning_rate": 4.138732855250412e-05, "loss": 0.0, "step": 826 }, { "epoch": 0.32017034456058846, "grad_norm": 0.00018649610865395516, "learning_rate": 4.128272891907401e-05, "loss": 0.0, "step": 827 }, { "epoch": 0.3205574912891986, "grad_norm": 0.004007772076874971, "learning_rate": 4.117816861744057e-05, "loss": 0.0, "step": 828 }, { "epoch": 0.32094463801780876, "grad_norm": 0.000278328952845186, "learning_rate": 4.107364811937355e-05, "loss": 0.0, "step": 829 }, { "epoch": 0.3213317847464189, "grad_norm": 0.0021861272398382425, "learning_rate": 4.096916789646305e-05, "loss": 0.0, "step": 830 }, { "epoch": 0.32171893147502906, "grad_norm": 0.00044286204501986504, "learning_rate": 4.0864728420117505e-05, "loss": 0.0, "step": 831 }, { "epoch": 0.3221060782036392, "grad_norm": 0.00034147806582041085, "learning_rate": 4.0760330161561464e-05, "loss": 0.0, "step": 832 }, { "epoch": 0.3224932249322493, "grad_norm": 0.001119753229431808, "learning_rate": 4.065597359183348e-05, "loss": 0.0, "step": 833 }, { "epoch": 0.3228803716608595, "grad_norm": 0.0005893710185773671, "learning_rate": 4.055165918178412e-05, "loss": 0.0, "step": 834 }, { "epoch": 0.3232675183894696, "grad_norm": 0.013795977458357811, "learning_rate": 4.04473874020736e-05, "loss": 0.0001, "step": 835 }, { "epoch": 0.3236546651180798, "grad_norm": 0.000739909999538213, "learning_rate": 4.034315872316988e-05, "loss": 0.0, "step": 836 }, { "epoch": 0.3240418118466899, "grad_norm": 0.0003141966590192169, "learning_rate": 4.0238973615346395e-05, "loss": 0.0, "step": 837 }, { "epoch": 0.3244289585753, "grad_norm": 0.0013074502348899841, "learning_rate": 4.0134832548680006e-05, "loss": 0.0, "step": 838 }, { "epoch": 0.3248161053039102, "grad_norm": 0.0003308141021989286, "learning_rate": 4.003073599304889e-05, "loss": 0.0, "step": 839 }, { "epoch": 0.3252032520325203, "grad_norm": 0.0002786907716654241, "learning_rate": 3.9926684418130355e-05, "loss": 0.0, "step": 840 }, { "epoch": 0.3255903987611305, "grad_norm": 0.008617997169494629, "learning_rate": 3.982267829339877e-05, "loss": 0.0, "step": 841 }, { "epoch": 0.3259775454897406, "grad_norm": 0.004956826567649841, "learning_rate": 3.971871808812348e-05, "loss": 0.0001, "step": 842 }, { "epoch": 0.32636469221835074, "grad_norm": 0.0003836003306787461, "learning_rate": 3.961480427136656e-05, "loss": 0.0, "step": 843 }, { "epoch": 0.3267518389469609, "grad_norm": 0.0010854278225451708, "learning_rate": 3.951093731198086e-05, "loss": 0.0, "step": 844 }, { "epoch": 0.32713898567557104, "grad_norm": 0.021701844409108162, "learning_rate": 3.9407117678607755e-05, "loss": 0.0002, "step": 845 }, { "epoch": 0.32752613240418116, "grad_norm": 0.0007447968819178641, "learning_rate": 3.930334583967514e-05, "loss": 0.0, "step": 846 }, { "epoch": 0.32791327913279134, "grad_norm": 0.00031065259827300906, "learning_rate": 3.919962226339521e-05, "loss": 0.0, "step": 847 }, { "epoch": 0.32830042586140146, "grad_norm": 0.003032098524272442, "learning_rate": 3.9095947417762465e-05, "loss": 0.0, "step": 848 }, { "epoch": 0.32868757259001163, "grad_norm": 0.0012839855626225471, "learning_rate": 3.899232177055145e-05, "loss": 0.0, "step": 849 }, { "epoch": 0.32907471931862176, "grad_norm": 0.00046888040378689766, "learning_rate": 3.888874578931482e-05, "loss": 0.0, "step": 850 }, { "epoch": 0.3294618660472319, "grad_norm": 0.03994712233543396, "learning_rate": 3.87852199413811e-05, "loss": 0.0035, "step": 851 }, { "epoch": 0.32984901277584205, "grad_norm": 0.00314146070741117, "learning_rate": 3.8681744693852595e-05, "loss": 0.0001, "step": 852 }, { "epoch": 0.3302361595044522, "grad_norm": 0.013279520906507969, "learning_rate": 3.857832051360335e-05, "loss": 0.0003, "step": 853 }, { "epoch": 0.33062330623306235, "grad_norm": 0.028156403452157974, "learning_rate": 3.847494786727694e-05, "loss": 0.0005, "step": 854 }, { "epoch": 0.3310104529616725, "grad_norm": 0.038008369505405426, "learning_rate": 3.8371627221284495e-05, "loss": 0.0006, "step": 855 }, { "epoch": 0.3313975996902826, "grad_norm": 0.006248411722481251, "learning_rate": 3.826835904180244e-05, "loss": 0.0001, "step": 856 }, { "epoch": 0.3317847464188928, "grad_norm": 0.00033054646337404847, "learning_rate": 3.816514379477054e-05, "loss": 0.0, "step": 857 }, { "epoch": 0.3321718931475029, "grad_norm": 0.0008822673698887229, "learning_rate": 3.8061981945889726e-05, "loss": 0.0, "step": 858 }, { "epoch": 0.33255903987611307, "grad_norm": 0.03951540216803551, "learning_rate": 3.795887396061995e-05, "loss": 0.0004, "step": 859 }, { "epoch": 0.3329461866047232, "grad_norm": 0.0011091261403635144, "learning_rate": 3.7855820304178204e-05, "loss": 0.0, "step": 860 }, { "epoch": 0.3333333333333333, "grad_norm": 0.00019632387557066977, "learning_rate": 3.775282144153627e-05, "loss": 0.0, "step": 861 }, { "epoch": 0.3337204800619435, "grad_norm": 0.015141610987484455, "learning_rate": 3.764987783741879e-05, "loss": 0.0001, "step": 862 }, { "epoch": 0.3341076267905536, "grad_norm": 0.0024289898574352264, "learning_rate": 3.754698995630101e-05, "loss": 0.0, "step": 863 }, { "epoch": 0.3344947735191638, "grad_norm": 0.030151624232530594, "learning_rate": 3.7444158262406824e-05, "loss": 0.0001, "step": 864 }, { "epoch": 0.3348819202477739, "grad_norm": 0.00045968167250975966, "learning_rate": 3.734138321970653e-05, "loss": 0.0, "step": 865 }, { "epoch": 0.33526906697638403, "grad_norm": 0.0013369093649089336, "learning_rate": 3.723866529191491e-05, "loss": 0.0, "step": 866 }, { "epoch": 0.3356562137049942, "grad_norm": 0.00015746480494271964, "learning_rate": 3.7136004942489e-05, "loss": 0.0, "step": 867 }, { "epoch": 0.33604336043360433, "grad_norm": 0.0004616810765583068, "learning_rate": 3.703340263462602e-05, "loss": 0.0, "step": 868 }, { "epoch": 0.33643050716221445, "grad_norm": 0.000538064050488174, "learning_rate": 3.693085883126137e-05, "loss": 0.0, "step": 869 }, { "epoch": 0.33681765389082463, "grad_norm": 0.0004215772496536374, "learning_rate": 3.6828373995066436e-05, "loss": 0.0, "step": 870 }, { "epoch": 0.33720480061943475, "grad_norm": 0.00042134601972065866, "learning_rate": 3.672594858844658e-05, "loss": 0.0, "step": 871 }, { "epoch": 0.33759194734804493, "grad_norm": 0.004986919928342104, "learning_rate": 3.6623583073538966e-05, "loss": 0.0001, "step": 872 }, { "epoch": 0.33797909407665505, "grad_norm": 0.0002799661597236991, "learning_rate": 3.65212779122106e-05, "loss": 0.0, "step": 873 }, { "epoch": 0.3383662408052652, "grad_norm": 0.0004925346001982689, "learning_rate": 3.641903356605614e-05, "loss": 0.0, "step": 874 }, { "epoch": 0.33875338753387535, "grad_norm": 0.001601546653546393, "learning_rate": 3.631685049639586e-05, "loss": 0.0, "step": 875 }, { "epoch": 0.33914053426248547, "grad_norm": 0.00030690181301906705, "learning_rate": 3.621472916427354e-05, "loss": 0.0, "step": 876 }, { "epoch": 0.33952768099109565, "grad_norm": 0.00021768384613096714, "learning_rate": 3.611267003045443e-05, "loss": 0.0, "step": 877 }, { "epoch": 0.33991482771970577, "grad_norm": 0.020688867196440697, "learning_rate": 3.601067355542312e-05, "loss": 0.0002, "step": 878 }, { "epoch": 0.3403019744483159, "grad_norm": 0.0002073254290735349, "learning_rate": 3.59087401993815e-05, "loss": 0.0, "step": 879 }, { "epoch": 0.34068912117692607, "grad_norm": 0.0022969204001128674, "learning_rate": 3.580687042224667e-05, "loss": 0.0, "step": 880 }, { "epoch": 0.3410762679055362, "grad_norm": 0.000548949814401567, "learning_rate": 3.5705064683648856e-05, "loss": 0.0, "step": 881 }, { "epoch": 0.34146341463414637, "grad_norm": 0.0054088798351585865, "learning_rate": 3.560332344292938e-05, "loss": 0.0, "step": 882 }, { "epoch": 0.3418505613627565, "grad_norm": 0.00040069175884127617, "learning_rate": 3.550164715913853e-05, "loss": 0.0, "step": 883 }, { "epoch": 0.3422377080913666, "grad_norm": 0.0004740517179016024, "learning_rate": 3.540003629103349e-05, "loss": 0.0, "step": 884 }, { "epoch": 0.3426248548199768, "grad_norm": 0.0020350434351712465, "learning_rate": 3.529849129707633e-05, "loss": 0.0, "step": 885 }, { "epoch": 0.3430120015485869, "grad_norm": 0.0003776460944209248, "learning_rate": 3.519701263543187e-05, "loss": 0.0, "step": 886 }, { "epoch": 0.34339914827719703, "grad_norm": 0.001164233428426087, "learning_rate": 3.5095600763965676e-05, "loss": 0.0, "step": 887 }, { "epoch": 0.3437862950058072, "grad_norm": 0.0004077065095771104, "learning_rate": 3.499425614024191e-05, "loss": 0.0, "step": 888 }, { "epoch": 0.34417344173441733, "grad_norm": 0.002573886886239052, "learning_rate": 3.489297922152136e-05, "loss": 0.0, "step": 889 }, { "epoch": 0.3445605884630275, "grad_norm": 0.02411019243299961, "learning_rate": 3.479177046475935e-05, "loss": 0.0002, "step": 890 }, { "epoch": 0.34494773519163763, "grad_norm": 0.001541433739475906, "learning_rate": 3.4690630326603604e-05, "loss": 0.0, "step": 891 }, { "epoch": 0.34533488192024775, "grad_norm": 0.05096811428666115, "learning_rate": 3.458955926339228e-05, "loss": 0.0003, "step": 892 }, { "epoch": 0.3457220286488579, "grad_norm": 0.0001714549434836954, "learning_rate": 3.448855773115185e-05, "loss": 0.0, "step": 893 }, { "epoch": 0.34610917537746805, "grad_norm": 0.0007654880173504353, "learning_rate": 3.438762618559511e-05, "loss": 0.0, "step": 894 }, { "epoch": 0.3464963221060782, "grad_norm": 0.34326171875, "learning_rate": 3.428676508211902e-05, "loss": 0.0018, "step": 895 }, { "epoch": 0.34688346883468835, "grad_norm": 0.003775347489863634, "learning_rate": 3.418597487580277e-05, "loss": 0.0001, "step": 896 }, { "epoch": 0.34727061556329847, "grad_norm": 0.0011271066032350063, "learning_rate": 3.408525602140561e-05, "loss": 0.0, "step": 897 }, { "epoch": 0.34765776229190865, "grad_norm": 0.00141782162245363, "learning_rate": 3.3984608973364904e-05, "loss": 0.0, "step": 898 }, { "epoch": 0.34804490902051877, "grad_norm": 0.006298588123172522, "learning_rate": 3.388403418579401e-05, "loss": 0.0001, "step": 899 }, { "epoch": 0.34843205574912894, "grad_norm": 0.057084690779447556, "learning_rate": 3.378353211248024e-05, "loss": 0.0006, "step": 900 }, { "epoch": 0.34881920247773907, "grad_norm": 0.03301633149385452, "learning_rate": 3.368310320688284e-05, "loss": 0.0017, "step": 901 }, { "epoch": 0.3492063492063492, "grad_norm": 0.03648944944143295, "learning_rate": 3.358274792213091e-05, "loss": 0.001, "step": 902 }, { "epoch": 0.34959349593495936, "grad_norm": 0.045417048037052155, "learning_rate": 3.34824667110214e-05, "loss": 0.0002, "step": 903 }, { "epoch": 0.3499806426635695, "grad_norm": 0.0014944113790988922, "learning_rate": 3.338226002601703e-05, "loss": 0.0, "step": 904 }, { "epoch": 0.35036778939217966, "grad_norm": 0.0017919113161042333, "learning_rate": 3.328212831924424e-05, "loss": 0.0, "step": 905 }, { "epoch": 0.3507549361207898, "grad_norm": 0.0006082098698243499, "learning_rate": 3.3182072042491244e-05, "loss": 0.0, "step": 906 }, { "epoch": 0.3511420828493999, "grad_norm": 0.010628577321767807, "learning_rate": 3.308209164720584e-05, "loss": 0.0001, "step": 907 }, { "epoch": 0.3515292295780101, "grad_norm": 0.08203943073749542, "learning_rate": 3.2982187584493515e-05, "loss": 0.0019, "step": 908 }, { "epoch": 0.3519163763066202, "grad_norm": 0.0009694083128124475, "learning_rate": 3.2882360305115294e-05, "loss": 0.0, "step": 909 }, { "epoch": 0.3523035230352303, "grad_norm": 0.00442282622680068, "learning_rate": 3.2782610259485813e-05, "loss": 0.0001, "step": 910 }, { "epoch": 0.3526906697638405, "grad_norm": 0.08702477067708969, "learning_rate": 3.268293789767118e-05, "loss": 0.0014, "step": 911 }, { "epoch": 0.3530778164924506, "grad_norm": 0.02207796275615692, "learning_rate": 3.2583343669387046e-05, "loss": 0.0001, "step": 912 }, { "epoch": 0.3534649632210608, "grad_norm": 0.0006780868279747665, "learning_rate": 3.248382802399649e-05, "loss": 0.0, "step": 913 }, { "epoch": 0.3538521099496709, "grad_norm": 0.02841506339609623, "learning_rate": 3.238439141050807e-05, "loss": 0.0003, "step": 914 }, { "epoch": 0.35423925667828104, "grad_norm": 0.00042213572305627167, "learning_rate": 3.228503427757374e-05, "loss": 0.0, "step": 915 }, { "epoch": 0.3546264034068912, "grad_norm": 0.0002454633649904281, "learning_rate": 3.218575707348681e-05, "loss": 0.0, "step": 916 }, { "epoch": 0.35501355013550134, "grad_norm": 0.0027822977863252163, "learning_rate": 3.208656024618002e-05, "loss": 0.0, "step": 917 }, { "epoch": 0.3554006968641115, "grad_norm": 0.020471172407269478, "learning_rate": 3.198744424322338e-05, "loss": 0.0004, "step": 918 }, { "epoch": 0.35578784359272164, "grad_norm": 0.0007699733250774443, "learning_rate": 3.188840951182229e-05, "loss": 0.0, "step": 919 }, { "epoch": 0.35617499032133176, "grad_norm": 0.00015266439004335552, "learning_rate": 3.178945649881543e-05, "loss": 0.0, "step": 920 }, { "epoch": 0.35656213704994194, "grad_norm": 0.00025495782028883696, "learning_rate": 3.169058565067274e-05, "loss": 0.0, "step": 921 }, { "epoch": 0.35694928377855206, "grad_norm": 8.566460019210353e-05, "learning_rate": 3.159179741349354e-05, "loss": 0.0, "step": 922 }, { "epoch": 0.35733643050716224, "grad_norm": 0.00040574869490228593, "learning_rate": 3.149309223300428e-05, "loss": 0.0, "step": 923 }, { "epoch": 0.35772357723577236, "grad_norm": 0.00045708048855885863, "learning_rate": 3.1394470554556764e-05, "loss": 0.0, "step": 924 }, { "epoch": 0.3581107239643825, "grad_norm": 0.0041862414218485355, "learning_rate": 3.1295932823125985e-05, "loss": 0.0, "step": 925 }, { "epoch": 0.35849787069299266, "grad_norm": 0.000689912005327642, "learning_rate": 3.1197479483308214e-05, "loss": 0.0, "step": 926 }, { "epoch": 0.3588850174216028, "grad_norm": 0.003948573488742113, "learning_rate": 3.109911097931891e-05, "loss": 0.0, "step": 927 }, { "epoch": 0.35927216415021296, "grad_norm": 0.00029136036755517125, "learning_rate": 3.1000827754990766e-05, "loss": 0.0, "step": 928 }, { "epoch": 0.3596593108788231, "grad_norm": 0.0020344445947557688, "learning_rate": 3.090263025377173e-05, "loss": 0.0, "step": 929 }, { "epoch": 0.3600464576074332, "grad_norm": 0.0002035750512732193, "learning_rate": 3.080451891872295e-05, "loss": 0.0, "step": 930 }, { "epoch": 0.3604336043360434, "grad_norm": 0.0010056099854409695, "learning_rate": 3.070649419251681e-05, "loss": 0.0, "step": 931 }, { "epoch": 0.3608207510646535, "grad_norm": 0.000317671918310225, "learning_rate": 3.06085565174349e-05, "loss": 0.0, "step": 932 }, { "epoch": 0.3612078977932636, "grad_norm": 0.0009133410057984293, "learning_rate": 3.0510706335366035e-05, "loss": 0.0, "step": 933 }, { "epoch": 0.3615950445218738, "grad_norm": 0.06602532416582108, "learning_rate": 3.041294408780431e-05, "loss": 0.0003, "step": 934 }, { "epoch": 0.3619821912504839, "grad_norm": 0.00024213208234868944, "learning_rate": 3.0315270215847013e-05, "loss": 0.0, "step": 935 }, { "epoch": 0.3623693379790941, "grad_norm": 0.0014147679321467876, "learning_rate": 3.0217685160192734e-05, "loss": 0.0, "step": 936 }, { "epoch": 0.3627564847077042, "grad_norm": 0.0003020058502443135, "learning_rate": 3.0120189361139273e-05, "loss": 0.0, "step": 937 }, { "epoch": 0.36314363143631434, "grad_norm": 0.003151725744828582, "learning_rate": 3.002278325858177e-05, "loss": 0.0, "step": 938 }, { "epoch": 0.3635307781649245, "grad_norm": 0.002396926749497652, "learning_rate": 2.9925467292010644e-05, "loss": 0.0, "step": 939 }, { "epoch": 0.36391792489353464, "grad_norm": 0.0011632463429123163, "learning_rate": 2.982824190050958e-05, "loss": 0.0, "step": 940 }, { "epoch": 0.3643050716221448, "grad_norm": 0.2228887379169464, "learning_rate": 2.973110752275366e-05, "loss": 0.001, "step": 941 }, { "epoch": 0.36469221835075494, "grad_norm": 0.0011429457226768136, "learning_rate": 2.963406459700726e-05, "loss": 0.0, "step": 942 }, { "epoch": 0.36507936507936506, "grad_norm": 0.00012259121285751462, "learning_rate": 2.9537113561122178e-05, "loss": 0.0, "step": 943 }, { "epoch": 0.36546651180797524, "grad_norm": 0.000644800195004791, "learning_rate": 2.944025485253557e-05, "loss": 0.0, "step": 944 }, { "epoch": 0.36585365853658536, "grad_norm": 0.00017125460726674646, "learning_rate": 2.9343488908268034e-05, "loss": 0.0, "step": 945 }, { "epoch": 0.36624080526519553, "grad_norm": 0.00020638681598939002, "learning_rate": 2.9246816164921655e-05, "loss": 0.0, "step": 946 }, { "epoch": 0.36662795199380566, "grad_norm": 0.0005838845972903073, "learning_rate": 2.915023705867794e-05, "loss": 0.0, "step": 947 }, { "epoch": 0.3670150987224158, "grad_norm": 0.000993402791209519, "learning_rate": 2.905375202529594e-05, "loss": 0.0, "step": 948 }, { "epoch": 0.36740224545102595, "grad_norm": 0.00029620109125971794, "learning_rate": 2.895736150011026e-05, "loss": 0.0, "step": 949 }, { "epoch": 0.3677893921796361, "grad_norm": 2.658259153366089, "learning_rate": 2.8861065918029085e-05, "loss": 0.0567, "step": 950 }, { "epoch": 0.3681765389082462, "grad_norm": 0.02834871970117092, "learning_rate": 2.8764865713532234e-05, "loss": 0.0021, "step": 951 }, { "epoch": 0.3685636856368564, "grad_norm": 0.03441345691680908, "learning_rate": 2.8668761320669156e-05, "loss": 0.0008, "step": 952 }, { "epoch": 0.3689508323654665, "grad_norm": 0.07143665105104446, "learning_rate": 2.8572753173057003e-05, "loss": 0.0004, "step": 953 }, { "epoch": 0.3693379790940767, "grad_norm": 0.06644494086503983, "learning_rate": 2.8476841703878772e-05, "loss": 0.0007, "step": 954 }, { "epoch": 0.3697251258226868, "grad_norm": 0.005595757160335779, "learning_rate": 2.8381027345881102e-05, "loss": 0.0001, "step": 955 }, { "epoch": 0.3701122725512969, "grad_norm": 0.13141833245754242, "learning_rate": 2.8285310531372573e-05, "loss": 0.0014, "step": 956 }, { "epoch": 0.3704994192799071, "grad_norm": 0.007661540526896715, "learning_rate": 2.8189691692221627e-05, "loss": 0.0, "step": 957 }, { "epoch": 0.3708865660085172, "grad_norm": 0.0015809281030669808, "learning_rate": 2.809417125985467e-05, "loss": 0.0001, "step": 958 }, { "epoch": 0.3712737127371274, "grad_norm": 0.007457795087248087, "learning_rate": 2.7998749665254033e-05, "loss": 0.0001, "step": 959 }, { "epoch": 0.3716608594657375, "grad_norm": 0.0005789248389191926, "learning_rate": 2.7903427338956177e-05, "loss": 0.0, "step": 960 }, { "epoch": 0.37204800619434764, "grad_norm": 0.00037478163721971214, "learning_rate": 2.7808204711049624e-05, "loss": 0.0, "step": 961 }, { "epoch": 0.3724351529229578, "grad_norm": 0.003515935968607664, "learning_rate": 2.771308221117309e-05, "loss": 0.0, "step": 962 }, { "epoch": 0.37282229965156793, "grad_norm": 0.0019339972641319036, "learning_rate": 2.7618060268513503e-05, "loss": 0.0, "step": 963 }, { "epoch": 0.3732094463801781, "grad_norm": 0.00024095486151054502, "learning_rate": 2.7523139311804074e-05, "loss": 0.0, "step": 964 }, { "epoch": 0.37359659310878823, "grad_norm": 0.0007112721796147525, "learning_rate": 2.742831976932242e-05, "loss": 0.0, "step": 965 }, { "epoch": 0.37398373983739835, "grad_norm": 0.0007284593302756548, "learning_rate": 2.7333602068888496e-05, "loss": 0.0, "step": 966 }, { "epoch": 0.37437088656600853, "grad_norm": 0.006084626540541649, "learning_rate": 2.7238986637862833e-05, "loss": 0.0, "step": 967 }, { "epoch": 0.37475803329461865, "grad_norm": 0.000507039949297905, "learning_rate": 2.7144473903144496e-05, "loss": 0.0, "step": 968 }, { "epoch": 0.37514518002322883, "grad_norm": 0.0019885883666574955, "learning_rate": 2.7050064291169187e-05, "loss": 0.0, "step": 969 }, { "epoch": 0.37553232675183895, "grad_norm": 0.04670470952987671, "learning_rate": 2.6955758227907335e-05, "loss": 0.0002, "step": 970 }, { "epoch": 0.3759194734804491, "grad_norm": 0.00021539766748901457, "learning_rate": 2.6861556138862155e-05, "loss": 0.0, "step": 971 }, { "epoch": 0.37630662020905925, "grad_norm": 0.0003146221279166639, "learning_rate": 2.676745844906776e-05, "loss": 0.0, "step": 972 }, { "epoch": 0.37669376693766937, "grad_norm": 0.0013595115160569549, "learning_rate": 2.6673465583087176e-05, "loss": 0.0, "step": 973 }, { "epoch": 0.3770809136662795, "grad_norm": 0.004096793942153454, "learning_rate": 2.65795779650105e-05, "loss": 0.0, "step": 974 }, { "epoch": 0.37746806039488967, "grad_norm": 0.0003931306710001081, "learning_rate": 2.648579601845295e-05, "loss": 0.0, "step": 975 }, { "epoch": 0.3778552071234998, "grad_norm": 0.00021391961490735412, "learning_rate": 2.6392120166552992e-05, "loss": 0.0, "step": 976 }, { "epoch": 0.37824235385210997, "grad_norm": 0.00013036343443673104, "learning_rate": 2.629855083197031e-05, "loss": 0.0, "step": 977 }, { "epoch": 0.3786295005807201, "grad_norm": 0.00223020208068192, "learning_rate": 2.6205088436884095e-05, "loss": 0.0, "step": 978 }, { "epoch": 0.3790166473093302, "grad_norm": 0.0004039799969177693, "learning_rate": 2.6111733402991e-05, "loss": 0.0, "step": 979 }, { "epoch": 0.3794037940379404, "grad_norm": 0.004602463450282812, "learning_rate": 2.6018486151503213e-05, "loss": 0.0001, "step": 980 }, { "epoch": 0.3797909407665505, "grad_norm": 0.00020700599998235703, "learning_rate": 2.592534710314668e-05, "loss": 0.0, "step": 981 }, { "epoch": 0.3801780874951607, "grad_norm": 0.0005082901916466653, "learning_rate": 2.5832316678159118e-05, "loss": 0.0, "step": 982 }, { "epoch": 0.3805652342237708, "grad_norm": 0.00023574243823532015, "learning_rate": 2.573939529628816e-05, "loss": 0.0, "step": 983 }, { "epoch": 0.38095238095238093, "grad_norm": 0.00044681079452857375, "learning_rate": 2.564658337678939e-05, "loss": 0.0, "step": 984 }, { "epoch": 0.3813395276809911, "grad_norm": 0.00034449860686436296, "learning_rate": 2.555388133842455e-05, "loss": 0.0, "step": 985 }, { "epoch": 0.38172667440960123, "grad_norm": 0.00018269941210746765, "learning_rate": 2.5461289599459646e-05, "loss": 0.0, "step": 986 }, { "epoch": 0.3821138211382114, "grad_norm": 0.00023150882043410093, "learning_rate": 2.5368808577662913e-05, "loss": 0.0, "step": 987 }, { "epoch": 0.38250096786682153, "grad_norm": 0.00035011590807698667, "learning_rate": 2.5276438690303112e-05, "loss": 0.0, "step": 988 }, { "epoch": 0.38288811459543165, "grad_norm": 0.0003966555814258754, "learning_rate": 2.518418035414756e-05, "loss": 0.0, "step": 989 }, { "epoch": 0.3832752613240418, "grad_norm": 0.002989868400618434, "learning_rate": 2.5092033985460265e-05, "loss": 0.0, "step": 990 }, { "epoch": 0.38366240805265195, "grad_norm": 0.0004271006619092077, "learning_rate": 2.500000000000001e-05, "loss": 0.0, "step": 991 }, { "epoch": 0.3840495547812621, "grad_norm": 0.0008473137859255075, "learning_rate": 2.490807881301855e-05, "loss": 0.0, "step": 992 }, { "epoch": 0.38443670150987225, "grad_norm": 1.8476226329803467, "learning_rate": 2.481627083925869e-05, "loss": 0.0797, "step": 993 }, { "epoch": 0.38482384823848237, "grad_norm": 0.00014391196600627154, "learning_rate": 2.472457649295241e-05, "loss": 0.0, "step": 994 }, { "epoch": 0.38521099496709255, "grad_norm": 0.0013244638685137033, "learning_rate": 2.4632996187819034e-05, "loss": 0.0, "step": 995 }, { "epoch": 0.38559814169570267, "grad_norm": 0.006371075287461281, "learning_rate": 2.4541530337063325e-05, "loss": 0.0001, "step": 996 }, { "epoch": 0.3859852884243128, "grad_norm": 0.0009385327575728297, "learning_rate": 2.445017935337365e-05, "loss": 0.0, "step": 997 }, { "epoch": 0.38637243515292297, "grad_norm": 0.01769573800265789, "learning_rate": 2.435894364892005e-05, "loss": 0.0001, "step": 998 }, { "epoch": 0.3867595818815331, "grad_norm": 0.003815864212810993, "learning_rate": 2.42678236353525e-05, "loss": 0.0001, "step": 999 }, { "epoch": 0.38714672861014326, "grad_norm": 0.020971521735191345, "learning_rate": 2.4176819723798948e-05, "loss": 0.0001, "step": 1000 }, { "epoch": 0.3875338753387534, "grad_norm": 0.0321972593665123, "learning_rate": 2.408593232486351e-05, "loss": 0.003, "step": 1001 }, { "epoch": 0.3879210220673635, "grad_norm": 0.0008187716011889279, "learning_rate": 2.3995161848624597e-05, "loss": 0.0, "step": 1002 }, { "epoch": 0.3883081687959737, "grad_norm": 0.020097631961107254, "learning_rate": 2.390450870463307e-05, "loss": 0.0004, "step": 1003 }, { "epoch": 0.3886953155245838, "grad_norm": 0.0003399544220883399, "learning_rate": 2.381397330191043e-05, "loss": 0.0, "step": 1004 }, { "epoch": 0.389082462253194, "grad_norm": 0.048977453261613846, "learning_rate": 2.372355604894686e-05, "loss": 0.0025, "step": 1005 }, { "epoch": 0.3894696089818041, "grad_norm": 0.0018951277015730739, "learning_rate": 2.3633257353699524e-05, "loss": 0.0, "step": 1006 }, { "epoch": 0.3898567557104142, "grad_norm": 0.0007534907781518996, "learning_rate": 2.354307762359064e-05, "loss": 0.0, "step": 1007 }, { "epoch": 0.3902439024390244, "grad_norm": 0.00365415564738214, "learning_rate": 2.3453017265505673e-05, "loss": 0.0001, "step": 1008 }, { "epoch": 0.3906310491676345, "grad_norm": 0.07262806594371796, "learning_rate": 2.3363076685791435e-05, "loss": 0.0021, "step": 1009 }, { "epoch": 0.3910181958962447, "grad_norm": 0.0027157345321029425, "learning_rate": 2.3273256290254402e-05, "loss": 0.0, "step": 1010 }, { "epoch": 0.3914053426248548, "grad_norm": 0.002876731101423502, "learning_rate": 2.3183556484158736e-05, "loss": 0.0, "step": 1011 }, { "epoch": 0.39179248935346495, "grad_norm": 0.0003100191242992878, "learning_rate": 2.309397767222446e-05, "loss": 0.0, "step": 1012 }, { "epoch": 0.3921796360820751, "grad_norm": 0.0007354211411438882, "learning_rate": 2.300452025862574e-05, "loss": 0.0, "step": 1013 }, { "epoch": 0.39256678281068524, "grad_norm": 0.16168968379497528, "learning_rate": 2.291518464698899e-05, "loss": 0.0013, "step": 1014 }, { "epoch": 0.39295392953929537, "grad_norm": 0.0008131096255965531, "learning_rate": 2.282597124039107e-05, "loss": 0.0, "step": 1015 }, { "epoch": 0.39334107626790554, "grad_norm": 0.0007273323717527092, "learning_rate": 2.27368804413574e-05, "loss": 0.0, "step": 1016 }, { "epoch": 0.39372822299651566, "grad_norm": 0.0019192282343283296, "learning_rate": 2.2647912651860252e-05, "loss": 0.0, "step": 1017 }, { "epoch": 0.39411536972512584, "grad_norm": 0.0010810550302267075, "learning_rate": 2.255906827331693e-05, "loss": 0.0, "step": 1018 }, { "epoch": 0.39450251645373596, "grad_norm": 0.0015484696486964822, "learning_rate": 2.2470347706587813e-05, "loss": 0.0, "step": 1019 }, { "epoch": 0.3948896631823461, "grad_norm": 0.0008098200778476894, "learning_rate": 2.238175135197471e-05, "loss": 0.0, "step": 1020 }, { "epoch": 0.39527680991095626, "grad_norm": 0.0020570175256580114, "learning_rate": 2.2293279609219003e-05, "loss": 0.0, "step": 1021 }, { "epoch": 0.3956639566395664, "grad_norm": 0.0012723479885607958, "learning_rate": 2.220493287749978e-05, "loss": 0.0, "step": 1022 }, { "epoch": 0.39605110336817656, "grad_norm": 0.0003519760794006288, "learning_rate": 2.2116711555432136e-05, "loss": 0.0, "step": 1023 }, { "epoch": 0.3964382500967867, "grad_norm": 0.00034122299985028803, "learning_rate": 2.2028616041065304e-05, "loss": 0.0, "step": 1024 }, { "epoch": 0.3968253968253968, "grad_norm": 0.026362977921962738, "learning_rate": 2.194064673188089e-05, "loss": 0.0002, "step": 1025 }, { "epoch": 0.397212543554007, "grad_norm": 0.00036965496838092804, "learning_rate": 2.185280402479105e-05, "loss": 0.0, "step": 1026 }, { "epoch": 0.3975996902826171, "grad_norm": 0.11109102517366409, "learning_rate": 2.1765088316136755e-05, "loss": 0.001, "step": 1027 }, { "epoch": 0.3979868370112273, "grad_norm": 0.042729608714580536, "learning_rate": 2.1677500001685945e-05, "loss": 0.0004, "step": 1028 }, { "epoch": 0.3983739837398374, "grad_norm": 0.00787183828651905, "learning_rate": 2.159003947663174e-05, "loss": 0.0001, "step": 1029 }, { "epoch": 0.3987611304684475, "grad_norm": 0.003600550815463066, "learning_rate": 2.1502707135590723e-05, "loss": 0.0, "step": 1030 }, { "epoch": 0.3991482771970577, "grad_norm": 0.007428227458149195, "learning_rate": 2.1415503372601096e-05, "loss": 0.0001, "step": 1031 }, { "epoch": 0.3995354239256678, "grad_norm": 0.0009940440068021417, "learning_rate": 2.1328428581120964e-05, "loss": 0.0, "step": 1032 }, { "epoch": 0.399922570654278, "grad_norm": 0.00677646417170763, "learning_rate": 2.124148315402642e-05, "loss": 0.0001, "step": 1033 }, { "epoch": 0.4003097173828881, "grad_norm": 0.0008721842896193266, "learning_rate": 2.1154667483609998e-05, "loss": 0.0, "step": 1034 }, { "epoch": 0.40069686411149824, "grad_norm": 0.00037974066799506545, "learning_rate": 2.106798196157872e-05, "loss": 0.0, "step": 1035 }, { "epoch": 0.4010840108401084, "grad_norm": 0.0008370023570023477, "learning_rate": 2.098142697905236e-05, "loss": 0.0, "step": 1036 }, { "epoch": 0.40147115756871854, "grad_norm": 0.0004361419996712357, "learning_rate": 2.0895002926561736e-05, "loss": 0.0, "step": 1037 }, { "epoch": 0.40185830429732866, "grad_norm": 0.0016112083103507757, "learning_rate": 2.080871019404692e-05, "loss": 0.0, "step": 1038 }, { "epoch": 0.40224545102593884, "grad_norm": 0.0007014021975919604, "learning_rate": 2.0722549170855494e-05, "loss": 0.0, "step": 1039 }, { "epoch": 0.40263259775454896, "grad_norm": 0.00015222003275994211, "learning_rate": 2.063652024574071e-05, "loss": 0.0, "step": 1040 }, { "epoch": 0.40301974448315914, "grad_norm": 0.00020387036784086376, "learning_rate": 2.0550623806859844e-05, "loss": 0.0, "step": 1041 }, { "epoch": 0.40340689121176926, "grad_norm": 0.004351683426648378, "learning_rate": 2.0464860241772455e-05, "loss": 0.0, "step": 1042 }, { "epoch": 0.4037940379403794, "grad_norm": 0.00455818697810173, "learning_rate": 2.0379229937438477e-05, "loss": 0.0001, "step": 1043 }, { "epoch": 0.40418118466898956, "grad_norm": 0.000623556668870151, "learning_rate": 2.0293733280216648e-05, "loss": 0.0, "step": 1044 }, { "epoch": 0.4045683313975997, "grad_norm": 0.0013906165258958936, "learning_rate": 2.020837065586269e-05, "loss": 0.0, "step": 1045 }, { "epoch": 0.40495547812620986, "grad_norm": 0.003972301259636879, "learning_rate": 2.0123142449527584e-05, "loss": 0.0001, "step": 1046 }, { "epoch": 0.40534262485482, "grad_norm": 0.00038339104503393173, "learning_rate": 2.0038049045755775e-05, "loss": 0.0, "step": 1047 }, { "epoch": 0.4057297715834301, "grad_norm": 0.00043085686047561467, "learning_rate": 1.995309082848354e-05, "loss": 0.0, "step": 1048 }, { "epoch": 0.4061169183120403, "grad_norm": 0.0012404817389324307, "learning_rate": 1.9868268181037185e-05, "loss": 0.0, "step": 1049 }, { "epoch": 0.4065040650406504, "grad_norm": 0.0011212611570954323, "learning_rate": 1.978358148613134e-05, "loss": 0.0, "step": 1050 }, { "epoch": 0.4068912117692606, "grad_norm": 0.039180099964141846, "learning_rate": 1.9699031125867213e-05, "loss": 0.0023, "step": 1051 }, { "epoch": 0.4072783584978707, "grad_norm": 0.02465960942208767, "learning_rate": 1.9614617481730883e-05, "loss": 0.0004, "step": 1052 }, { "epoch": 0.4076655052264808, "grad_norm": 0.013811935670673847, "learning_rate": 1.9530340934591608e-05, "loss": 0.0003, "step": 1053 }, { "epoch": 0.408052651955091, "grad_norm": 0.008351047523319721, "learning_rate": 1.9446201864699987e-05, "loss": 0.0002, "step": 1054 }, { "epoch": 0.4084397986837011, "grad_norm": 0.0017275417922064662, "learning_rate": 1.9362200651686407e-05, "loss": 0.0001, "step": 1055 }, { "epoch": 0.4088269454123113, "grad_norm": 0.031212275847792625, "learning_rate": 1.9278337674559237e-05, "loss": 0.0005, "step": 1056 }, { "epoch": 0.4092140921409214, "grad_norm": 0.001833677408285439, "learning_rate": 1.919461331170312e-05, "loss": 0.0001, "step": 1057 }, { "epoch": 0.40960123886953154, "grad_norm": 0.00048409271403215826, "learning_rate": 1.9111027940877284e-05, "loss": 0.0, "step": 1058 }, { "epoch": 0.4099883855981417, "grad_norm": 0.004958099219948053, "learning_rate": 1.902758193921385e-05, "loss": 0.0001, "step": 1059 }, { "epoch": 0.41037553232675184, "grad_norm": 0.0003586974926292896, "learning_rate": 1.89442756832161e-05, "loss": 0.0, "step": 1060 }, { "epoch": 0.41076267905536196, "grad_norm": 0.004265904892235994, "learning_rate": 1.8861109548756766e-05, "loss": 0.0, "step": 1061 }, { "epoch": 0.41114982578397213, "grad_norm": 0.0007765475893393159, "learning_rate": 1.877808391107641e-05, "loss": 0.0, "step": 1062 }, { "epoch": 0.41153697251258226, "grad_norm": 0.003014490008354187, "learning_rate": 1.8695199144781645e-05, "loss": 0.0001, "step": 1063 }, { "epoch": 0.41192411924119243, "grad_norm": 0.0007445579976774752, "learning_rate": 1.861245562384351e-05, "loss": 0.0, "step": 1064 }, { "epoch": 0.41231126596980255, "grad_norm": 0.0004886957467533648, "learning_rate": 1.8529853721595697e-05, "loss": 0.0, "step": 1065 }, { "epoch": 0.4126984126984127, "grad_norm": 0.0007616803050041199, "learning_rate": 1.844739381073301e-05, "loss": 0.0, "step": 1066 }, { "epoch": 0.41308555942702285, "grad_norm": 0.0005557533586397767, "learning_rate": 1.8365076263309545e-05, "loss": 0.0, "step": 1067 }, { "epoch": 0.413472706155633, "grad_norm": 0.00033164405613206327, "learning_rate": 1.8282901450737033e-05, "loss": 0.0, "step": 1068 }, { "epoch": 0.41385985288424315, "grad_norm": 0.000697499024681747, "learning_rate": 1.8200869743783245e-05, "loss": 0.0, "step": 1069 }, { "epoch": 0.4142469996128533, "grad_norm": 0.008421828970313072, "learning_rate": 1.8118981512570253e-05, "loss": 0.0001, "step": 1070 }, { "epoch": 0.4146341463414634, "grad_norm": 0.0010353690013289452, "learning_rate": 1.80372371265728e-05, "loss": 0.0, "step": 1071 }, { "epoch": 0.41502129307007357, "grad_norm": 0.0004474584711715579, "learning_rate": 1.795563695461653e-05, "loss": 0.0, "step": 1072 }, { "epoch": 0.4154084397986837, "grad_norm": 0.0019867317751049995, "learning_rate": 1.7874181364876464e-05, "loss": 0.0, "step": 1073 }, { "epoch": 0.41579558652729387, "grad_norm": 0.001235696254298091, "learning_rate": 1.7792870724875314e-05, "loss": 0.0, "step": 1074 }, { "epoch": 0.416182733255904, "grad_norm": 0.0007534808246418834, "learning_rate": 1.77117054014817e-05, "loss": 0.0, "step": 1075 }, { "epoch": 0.4165698799845141, "grad_norm": 0.0005520595004782081, "learning_rate": 1.7630685760908622e-05, "loss": 0.0, "step": 1076 }, { "epoch": 0.4169570267131243, "grad_norm": 0.0005588960484601557, "learning_rate": 1.7549812168711777e-05, "loss": 0.0, "step": 1077 }, { "epoch": 0.4173441734417344, "grad_norm": 0.0007228697068057954, "learning_rate": 1.746908498978791e-05, "loss": 0.0, "step": 1078 }, { "epoch": 0.41773132017034453, "grad_norm": 0.00042682827915996313, "learning_rate": 1.73885045883731e-05, "loss": 0.0, "step": 1079 }, { "epoch": 0.4181184668989547, "grad_norm": 0.0011390242725610733, "learning_rate": 1.7308071328041243e-05, "loss": 0.0, "step": 1080 }, { "epoch": 0.41850561362756483, "grad_norm": 0.00039792185998521745, "learning_rate": 1.7227785571702303e-05, "loss": 0.0, "step": 1081 }, { "epoch": 0.418892760356175, "grad_norm": 0.0003800159611273557, "learning_rate": 1.7147647681600738e-05, "loss": 0.0, "step": 1082 }, { "epoch": 0.41927990708478513, "grad_norm": 0.0011553197400644422, "learning_rate": 1.7067658019313826e-05, "loss": 0.0, "step": 1083 }, { "epoch": 0.41966705381339525, "grad_norm": 0.0001602371339686215, "learning_rate": 1.6987816945750057e-05, "loss": 0.0, "step": 1084 }, { "epoch": 0.42005420054200543, "grad_norm": 0.0007585774874314666, "learning_rate": 1.690812482114752e-05, "loss": 0.0, "step": 1085 }, { "epoch": 0.42044134727061555, "grad_norm": 0.0005152882658876479, "learning_rate": 1.6828582005072187e-05, "loss": 0.0, "step": 1086 }, { "epoch": 0.42082849399922573, "grad_norm": 0.00029434141470119357, "learning_rate": 1.6749188856416425e-05, "loss": 0.0, "step": 1087 }, { "epoch": 0.42121564072783585, "grad_norm": 0.0014791539870202541, "learning_rate": 1.666994573339729e-05, "loss": 0.0, "step": 1088 }, { "epoch": 0.42160278745644597, "grad_norm": 0.0017479541711509228, "learning_rate": 1.6590852993554935e-05, "loss": 0.0, "step": 1089 }, { "epoch": 0.42198993418505615, "grad_norm": 0.0161809790879488, "learning_rate": 1.651191099375099e-05, "loss": 0.0002, "step": 1090 }, { "epoch": 0.42237708091366627, "grad_norm": 0.0017758719623088837, "learning_rate": 1.6433120090166943e-05, "loss": 0.0, "step": 1091 }, { "epoch": 0.42276422764227645, "grad_norm": 0.0008196650887839496, "learning_rate": 1.6354480638302588e-05, "loss": 0.0, "step": 1092 }, { "epoch": 0.42315137437088657, "grad_norm": 0.0007899802876636386, "learning_rate": 1.6275992992974308e-05, "loss": 0.0, "step": 1093 }, { "epoch": 0.4235385210994967, "grad_norm": 0.0006489930092357099, "learning_rate": 1.6197657508313597e-05, "loss": 0.0, "step": 1094 }, { "epoch": 0.42392566782810687, "grad_norm": 0.0010093534365296364, "learning_rate": 1.6119474537765395e-05, "loss": 0.0, "step": 1095 }, { "epoch": 0.424312814556717, "grad_norm": 0.0457945354282856, "learning_rate": 1.604144443408653e-05, "loss": 0.0005, "step": 1096 }, { "epoch": 0.42469996128532717, "grad_norm": 0.009583662264049053, "learning_rate": 1.5963567549344028e-05, "loss": 0.0001, "step": 1097 }, { "epoch": 0.4250871080139373, "grad_norm": 0.0023053644690662622, "learning_rate": 1.58858442349137e-05, "loss": 0.0, "step": 1098 }, { "epoch": 0.4254742547425474, "grad_norm": 0.0012813667999580503, "learning_rate": 1.580827484147842e-05, "loss": 0.0, "step": 1099 }, { "epoch": 0.4258614014711576, "grad_norm": 0.001665553660131991, "learning_rate": 1.5730859719026536e-05, "loss": 0.0, "step": 1100 }, { "epoch": 0.4262485481997677, "grad_norm": 0.03397725149989128, "learning_rate": 1.5653599216850378e-05, "loss": 0.0026, "step": 1101 }, { "epoch": 0.42663569492837783, "grad_norm": 0.0028618781361728907, "learning_rate": 1.557649368354464e-05, "loss": 0.0001, "step": 1102 }, { "epoch": 0.427022841656988, "grad_norm": 0.0466277040541172, "learning_rate": 1.5499543467004813e-05, "loss": 0.0011, "step": 1103 }, { "epoch": 0.4274099883855981, "grad_norm": 0.0036304357927292585, "learning_rate": 1.5422748914425557e-05, "loss": 0.0001, "step": 1104 }, { "epoch": 0.4277971351142083, "grad_norm": 0.0021317729260772467, "learning_rate": 1.5346110372299216e-05, "loss": 0.0, "step": 1105 }, { "epoch": 0.4281842818428184, "grad_norm": 0.0051021394319832325, "learning_rate": 1.526962818641428e-05, "loss": 0.0001, "step": 1106 }, { "epoch": 0.42857142857142855, "grad_norm": 0.0004642395942937583, "learning_rate": 1.5193302701853673e-05, "loss": 0.0, "step": 1107 }, { "epoch": 0.4289585753000387, "grad_norm": 0.0023515617940574884, "learning_rate": 1.5117134262993348e-05, "loss": 0.0, "step": 1108 }, { "epoch": 0.42934572202864885, "grad_norm": 0.004486995283514261, "learning_rate": 1.5041123213500674e-05, "loss": 0.0001, "step": 1109 }, { "epoch": 0.429732868757259, "grad_norm": 0.03434832766652107, "learning_rate": 1.4965269896332885e-05, "loss": 0.0004, "step": 1110 }, { "epoch": 0.43012001548586914, "grad_norm": 0.06803593039512634, "learning_rate": 1.488957465373551e-05, "loss": 0.0015, "step": 1111 }, { "epoch": 0.43050716221447927, "grad_norm": 0.0014034640043973923, "learning_rate": 1.4814037827240895e-05, "loss": 0.0, "step": 1112 }, { "epoch": 0.43089430894308944, "grad_norm": 0.00042223307536914945, "learning_rate": 1.47386597576666e-05, "loss": 0.0, "step": 1113 }, { "epoch": 0.43128145567169957, "grad_norm": 0.0007316286792047322, "learning_rate": 1.466344078511389e-05, "loss": 0.0, "step": 1114 }, { "epoch": 0.43166860240030974, "grad_norm": 0.0007396416622214019, "learning_rate": 1.4588381248966187e-05, "loss": 0.0, "step": 1115 }, { "epoch": 0.43205574912891986, "grad_norm": 0.0020533555652946234, "learning_rate": 1.4513481487887577e-05, "loss": 0.0, "step": 1116 }, { "epoch": 0.43244289585753, "grad_norm": 0.0023500213865190744, "learning_rate": 1.4438741839821185e-05, "loss": 0.0, "step": 1117 }, { "epoch": 0.43283004258614016, "grad_norm": 0.0006268412689678371, "learning_rate": 1.4364162641987777e-05, "loss": 0.0, "step": 1118 }, { "epoch": 0.4332171893147503, "grad_norm": 0.01405635941773653, "learning_rate": 1.4289744230884144e-05, "loss": 0.0001, "step": 1119 }, { "epoch": 0.43360433604336046, "grad_norm": 0.0008690317044965923, "learning_rate": 1.4215486942281658e-05, "loss": 0.0, "step": 1120 }, { "epoch": 0.4339914827719706, "grad_norm": 0.0021271114237606525, "learning_rate": 1.4141391111224633e-05, "loss": 0.0, "step": 1121 }, { "epoch": 0.4343786295005807, "grad_norm": 0.3007533848285675, "learning_rate": 1.4067457072029e-05, "loss": 0.0013, "step": 1122 }, { "epoch": 0.4347657762291909, "grad_norm": 0.00041973349289037287, "learning_rate": 1.3993685158280644e-05, "loss": 0.0, "step": 1123 }, { "epoch": 0.435152922957801, "grad_norm": 0.0007569995941594243, "learning_rate": 1.3920075702833918e-05, "loss": 0.0, "step": 1124 }, { "epoch": 0.4355400696864111, "grad_norm": 0.0018684781389310956, "learning_rate": 1.384662903781022e-05, "loss": 0.0, "step": 1125 }, { "epoch": 0.4359272164150213, "grad_norm": 0.00015323165280278772, "learning_rate": 1.3773345494596435e-05, "loss": 0.0, "step": 1126 }, { "epoch": 0.4363143631436314, "grad_norm": 0.0013038904871791601, "learning_rate": 1.3700225403843469e-05, "loss": 0.0, "step": 1127 }, { "epoch": 0.4367015098722416, "grad_norm": 0.0009059642907232046, "learning_rate": 1.3627269095464695e-05, "loss": 0.0, "step": 1128 }, { "epoch": 0.4370886566008517, "grad_norm": 0.0026266034692525864, "learning_rate": 1.3554476898634528e-05, "loss": 0.0, "step": 1129 }, { "epoch": 0.43747580332946184, "grad_norm": 0.0018995004938915372, "learning_rate": 1.3481849141786979e-05, "loss": 0.0, "step": 1130 }, { "epoch": 0.437862950058072, "grad_norm": 0.000616900681052357, "learning_rate": 1.3409386152614023e-05, "loss": 0.0, "step": 1131 }, { "epoch": 0.43825009678668214, "grad_norm": 0.11412354558706284, "learning_rate": 1.3337088258064278e-05, "loss": 0.0009, "step": 1132 }, { "epoch": 0.4386372435152923, "grad_norm": 0.004002030473202467, "learning_rate": 1.3264955784341438e-05, "loss": 0.0001, "step": 1133 }, { "epoch": 0.43902439024390244, "grad_norm": 0.09928884357213974, "learning_rate": 1.3192989056902854e-05, "loss": 0.0006, "step": 1134 }, { "epoch": 0.43941153697251256, "grad_norm": 0.001109429053030908, "learning_rate": 1.312118840045799e-05, "loss": 0.0, "step": 1135 }, { "epoch": 0.43979868370112274, "grad_norm": 0.0007349811494350433, "learning_rate": 1.3049554138967051e-05, "loss": 0.0, "step": 1136 }, { "epoch": 0.44018583042973286, "grad_norm": 0.0004371061804704368, "learning_rate": 1.2978086595639471e-05, "loss": 0.0, "step": 1137 }, { "epoch": 0.44057297715834304, "grad_norm": 0.0006483121542260051, "learning_rate": 1.290678609293246e-05, "loss": 0.0, "step": 1138 }, { "epoch": 0.44096012388695316, "grad_norm": 0.0007824698113836348, "learning_rate": 1.2835652952549537e-05, "loss": 0.0, "step": 1139 }, { "epoch": 0.4413472706155633, "grad_norm": 0.006598141510039568, "learning_rate": 1.276468749543911e-05, "loss": 0.0, "step": 1140 }, { "epoch": 0.44173441734417346, "grad_norm": 0.0006559481262229383, "learning_rate": 1.2693890041793015e-05, "loss": 0.0, "step": 1141 }, { "epoch": 0.4421215640727836, "grad_norm": 0.0008395421900786459, "learning_rate": 1.2623260911045032e-05, "loss": 0.0, "step": 1142 }, { "epoch": 0.4425087108013937, "grad_norm": 0.005088416859507561, "learning_rate": 1.2552800421869504e-05, "loss": 0.0, "step": 1143 }, { "epoch": 0.4428958575300039, "grad_norm": 0.0006087544606998563, "learning_rate": 1.2482508892179884e-05, "loss": 0.0, "step": 1144 }, { "epoch": 0.443283004258614, "grad_norm": 0.0017820927314460278, "learning_rate": 1.2412386639127272e-05, "loss": 0.0, "step": 1145 }, { "epoch": 0.4436701509872242, "grad_norm": 1.249230146408081, "learning_rate": 1.2342433979099e-05, "loss": 0.009, "step": 1146 }, { "epoch": 0.4440572977158343, "grad_norm": 0.007895207032561302, "learning_rate": 1.2272651227717225e-05, "loss": 0.0002, "step": 1147 }, { "epoch": 0.4444444444444444, "grad_norm": 0.006302273366600275, "learning_rate": 1.2203038699837482e-05, "loss": 0.0001, "step": 1148 }, { "epoch": 0.4448315911730546, "grad_norm": 0.003651441540569067, "learning_rate": 1.2133596709547234e-05, "loss": 0.0001, "step": 1149 }, { "epoch": 0.4452187379016647, "grad_norm": 0.06293000280857086, "learning_rate": 1.206432557016453e-05, "loss": 0.0002, "step": 1150 }, { "epoch": 0.4456058846302749, "grad_norm": 0.04901011288166046, "learning_rate": 1.1995225594236537e-05, "loss": 0.006, "step": 1151 }, { "epoch": 0.445993031358885, "grad_norm": 0.05200514569878578, "learning_rate": 1.1926297093538153e-05, "loss": 0.0013, "step": 1152 }, { "epoch": 0.44638017808749514, "grad_norm": 0.0014073471538722515, "learning_rate": 1.1857540379070541e-05, "loss": 0.0, "step": 1153 }, { "epoch": 0.4467673248161053, "grad_norm": 0.03230893984436989, "learning_rate": 1.178895576105985e-05, "loss": 0.0005, "step": 1154 }, { "epoch": 0.44715447154471544, "grad_norm": 0.016670379787683487, "learning_rate": 1.1720543548955709e-05, "loss": 0.0002, "step": 1155 }, { "epoch": 0.4475416182733256, "grad_norm": 0.006980563048273325, "learning_rate": 1.1652304051429824e-05, "loss": 0.0001, "step": 1156 }, { "epoch": 0.44792876500193574, "grad_norm": 0.09809542447328568, "learning_rate": 1.1584237576374673e-05, "loss": 0.0017, "step": 1157 }, { "epoch": 0.44831591173054586, "grad_norm": 0.11370933800935745, "learning_rate": 1.151634443090205e-05, "loss": 0.0044, "step": 1158 }, { "epoch": 0.44870305845915603, "grad_norm": 0.004476567264646292, "learning_rate": 1.1448624921341699e-05, "loss": 0.0001, "step": 1159 }, { "epoch": 0.44909020518776616, "grad_norm": 0.0005020067328587174, "learning_rate": 1.1381079353239915e-05, "loss": 0.0, "step": 1160 }, { "epoch": 0.44947735191637633, "grad_norm": 0.034463606774806976, "learning_rate": 1.1313708031358183e-05, "loss": 0.0002, "step": 1161 }, { "epoch": 0.44986449864498645, "grad_norm": 0.0015444280579686165, "learning_rate": 1.1246511259671843e-05, "loss": 0.0, "step": 1162 }, { "epoch": 0.4502516453735966, "grad_norm": 0.007986946031451225, "learning_rate": 1.1179489341368615e-05, "loss": 0.0, "step": 1163 }, { "epoch": 0.45063879210220675, "grad_norm": 0.00016696866077836603, "learning_rate": 1.1112642578847316e-05, "loss": 0.0, "step": 1164 }, { "epoch": 0.4510259388308169, "grad_norm": 0.0015510886441916227, "learning_rate": 1.1045971273716477e-05, "loss": 0.0, "step": 1165 }, { "epoch": 0.451413085559427, "grad_norm": 0.031885236501693726, "learning_rate": 1.0979475726792982e-05, "loss": 0.0002, "step": 1166 }, { "epoch": 0.4518002322880372, "grad_norm": 0.00022552194423042238, "learning_rate": 1.0913156238100669e-05, "loss": 0.0, "step": 1167 }, { "epoch": 0.4521873790166473, "grad_norm": 0.0008924312423914671, "learning_rate": 1.0847013106869041e-05, "loss": 0.0, "step": 1168 }, { "epoch": 0.45257452574525747, "grad_norm": 0.0007522033411078155, "learning_rate": 1.0781046631531888e-05, "loss": 0.0, "step": 1169 }, { "epoch": 0.4529616724738676, "grad_norm": 0.0009381092386320233, "learning_rate": 1.0715257109725928e-05, "loss": 0.0, "step": 1170 }, { "epoch": 0.4533488192024777, "grad_norm": 0.00644589914008975, "learning_rate": 1.0649644838289491e-05, "loss": 0.0, "step": 1171 }, { "epoch": 0.4537359659310879, "grad_norm": 0.0002498172107152641, "learning_rate": 1.058421011326114e-05, "loss": 0.0, "step": 1172 }, { "epoch": 0.454123112659698, "grad_norm": 0.0011802815133705735, "learning_rate": 1.0518953229878393e-05, "loss": 0.0001, "step": 1173 }, { "epoch": 0.4545102593883082, "grad_norm": 0.002405043924227357, "learning_rate": 1.0453874482576309e-05, "loss": 0.0, "step": 1174 }, { "epoch": 0.4548974061169183, "grad_norm": 0.002922722604125738, "learning_rate": 1.0388974164986249e-05, "loss": 0.0001, "step": 1175 }, { "epoch": 0.45528455284552843, "grad_norm": 0.03949505090713501, "learning_rate": 1.0324252569934495e-05, "loss": 0.0002, "step": 1176 }, { "epoch": 0.4556716995741386, "grad_norm": 0.000592520518694073, "learning_rate": 1.0259709989440952e-05, "loss": 0.0, "step": 1177 }, { "epoch": 0.45605884630274873, "grad_norm": 0.00023816191242076457, "learning_rate": 1.0195346714717813e-05, "loss": 0.0, "step": 1178 }, { "epoch": 0.4564459930313589, "grad_norm": 0.0009969983948394656, "learning_rate": 1.013116303616825e-05, "loss": 0.0, "step": 1179 }, { "epoch": 0.45683313975996903, "grad_norm": 0.0006109256646595895, "learning_rate": 1.0067159243385138e-05, "loss": 0.0, "step": 1180 }, { "epoch": 0.45722028648857915, "grad_norm": 0.0004375118878670037, "learning_rate": 1.0003335625149669e-05, "loss": 0.0, "step": 1181 }, { "epoch": 0.45760743321718933, "grad_norm": 0.012121845036745071, "learning_rate": 9.939692469430129e-06, "loss": 0.0001, "step": 1182 }, { "epoch": 0.45799457994579945, "grad_norm": 0.000974025868345052, "learning_rate": 9.876230063380571e-06, "loss": 0.0, "step": 1183 }, { "epoch": 0.45838172667440963, "grad_norm": 0.0005102133145555854, "learning_rate": 9.812948693339519e-06, "loss": 0.0, "step": 1184 }, { "epoch": 0.45876887340301975, "grad_norm": 0.000832306977827102, "learning_rate": 9.74984864482863e-06, "loss": 0.0, "step": 1185 }, { "epoch": 0.45915602013162987, "grad_norm": 0.0005011268076486886, "learning_rate": 9.68693020255152e-06, "loss": 0.0, "step": 1186 }, { "epoch": 0.45954316686024005, "grad_norm": 0.0014525051228702068, "learning_rate": 9.62419365039237e-06, "loss": 0.0, "step": 1187 }, { "epoch": 0.45993031358885017, "grad_norm": 0.007520552258938551, "learning_rate": 9.561639271414662e-06, "loss": 0.0001, "step": 1188 }, { "epoch": 0.4603174603174603, "grad_norm": 0.002319543855264783, "learning_rate": 9.499267347859959e-06, "loss": 0.0, "step": 1189 }, { "epoch": 0.46070460704607047, "grad_norm": 0.0008605946786701679, "learning_rate": 9.43707816114659e-06, "loss": 0.0, "step": 1190 }, { "epoch": 0.4610917537746806, "grad_norm": 0.011598880402743816, "learning_rate": 9.375071991868383e-06, "loss": 0.0001, "step": 1191 }, { "epoch": 0.46147890050329077, "grad_norm": 0.0019046214874833822, "learning_rate": 9.313249119793388e-06, "loss": 0.0, "step": 1192 }, { "epoch": 0.4618660472319009, "grad_norm": 0.0007018601754680276, "learning_rate": 9.25160982386264e-06, "loss": 0.0, "step": 1193 }, { "epoch": 0.462253193960511, "grad_norm": 0.004615155514329672, "learning_rate": 9.190154382188921e-06, "loss": 0.0, "step": 1194 }, { "epoch": 0.4626403406891212, "grad_norm": 0.012603587470948696, "learning_rate": 9.12888307205541e-06, "loss": 0.0001, "step": 1195 }, { "epoch": 0.4630274874177313, "grad_norm": 0.0009012959199026227, "learning_rate": 9.067796169914549e-06, "loss": 0.0, "step": 1196 }, { "epoch": 0.4634146341463415, "grad_norm": 0.005865184590220451, "learning_rate": 9.006893951386713e-06, "loss": 0.0001, "step": 1197 }, { "epoch": 0.4638017808749516, "grad_norm": 0.00023360588238574564, "learning_rate": 8.946176691259028e-06, "loss": 0.0, "step": 1198 }, { "epoch": 0.46418892760356173, "grad_norm": 0.1505637913942337, "learning_rate": 8.885644663484049e-06, "loss": 0.0005, "step": 1199 }, { "epoch": 0.4645760743321719, "grad_norm": 2.420736312866211, "learning_rate": 8.825298141178611e-06, "loss": 0.0156, "step": 1200 }, { "epoch": 0.46496322106078203, "grad_norm": 0.026697484776377678, "learning_rate": 8.765137396622557e-06, "loss": 0.0039, "step": 1201 }, { "epoch": 0.4653503677893922, "grad_norm": 0.03793786093592644, "learning_rate": 8.705162701257502e-06, "loss": 0.001, "step": 1202 }, { "epoch": 0.4657375145180023, "grad_norm": 0.020723620429635048, "learning_rate": 8.645374325685624e-06, "loss": 0.0003, "step": 1203 }, { "epoch": 0.46612466124661245, "grad_norm": 0.0008785614045336843, "learning_rate": 8.585772539668435e-06, "loss": 0.0, "step": 1204 }, { "epoch": 0.4665118079752226, "grad_norm": 0.002943212864920497, "learning_rate": 8.526357612125574e-06, "loss": 0.0, "step": 1205 }, { "epoch": 0.46689895470383275, "grad_norm": 0.057028789073228836, "learning_rate": 8.467129811133561e-06, "loss": 0.0003, "step": 1206 }, { "epoch": 0.46728610143244287, "grad_norm": 0.0032581747509539127, "learning_rate": 8.40808940392464e-06, "loss": 0.0001, "step": 1207 }, { "epoch": 0.46767324816105305, "grad_norm": 0.0932520404458046, "learning_rate": 8.349236656885545e-06, "loss": 0.0021, "step": 1208 }, { "epoch": 0.46806039488966317, "grad_norm": 0.006326075177639723, "learning_rate": 8.29057183555626e-06, "loss": 0.0002, "step": 1209 }, { "epoch": 0.46844754161827334, "grad_norm": 0.0002322104701306671, "learning_rate": 8.23209520462892e-06, "loss": 0.0, "step": 1210 }, { "epoch": 0.46883468834688347, "grad_norm": 0.0006544016650877893, "learning_rate": 8.173807027946528e-06, "loss": 0.0, "step": 1211 }, { "epoch": 0.4692218350754936, "grad_norm": 0.001205835840664804, "learning_rate": 8.115707568501768e-06, "loss": 0.0, "step": 1212 }, { "epoch": 0.46960898180410376, "grad_norm": 0.0018138798186555505, "learning_rate": 8.05779708843587e-06, "loss": 0.0, "step": 1213 }, { "epoch": 0.4699961285327139, "grad_norm": 0.00928629282861948, "learning_rate": 8.000075849037408e-06, "loss": 0.0002, "step": 1214 }, { "epoch": 0.47038327526132406, "grad_norm": 0.0003023847530130297, "learning_rate": 7.942544110741102e-06, "loss": 0.0, "step": 1215 }, { "epoch": 0.4707704219899342, "grad_norm": 0.006090863142162561, "learning_rate": 7.88520213312664e-06, "loss": 0.0, "step": 1216 }, { "epoch": 0.4711575687185443, "grad_norm": 0.0023785592056810856, "learning_rate": 7.828050174917528e-06, "loss": 0.0, "step": 1217 }, { "epoch": 0.4715447154471545, "grad_norm": 0.00038500342634506524, "learning_rate": 7.771088493979967e-06, "loss": 0.0, "step": 1218 }, { "epoch": 0.4719318621757646, "grad_norm": 0.08768702298402786, "learning_rate": 7.714317347321559e-06, "loss": 0.0005, "step": 1219 }, { "epoch": 0.4723190089043748, "grad_norm": 0.01025913842022419, "learning_rate": 7.657736991090264e-06, "loss": 0.0002, "step": 1220 }, { "epoch": 0.4727061556329849, "grad_norm": 0.0001984934351639822, "learning_rate": 7.601347680573223e-06, "loss": 0.0, "step": 1221 }, { "epoch": 0.473093302361595, "grad_norm": 0.000818421074654907, "learning_rate": 7.545149670195572e-06, "loss": 0.0, "step": 1222 }, { "epoch": 0.4734804490902052, "grad_norm": 0.021213488653302193, "learning_rate": 7.489143213519301e-06, "loss": 0.0001, "step": 1223 }, { "epoch": 0.4738675958188153, "grad_norm": 0.033915307372808456, "learning_rate": 7.433328563242142e-06, "loss": 0.0002, "step": 1224 }, { "epoch": 0.4742547425474255, "grad_norm": 0.00025099285994656384, "learning_rate": 7.377705971196397e-06, "loss": 0.0, "step": 1225 }, { "epoch": 0.4746418892760356, "grad_norm": 0.00011749350233003497, "learning_rate": 7.322275688347818e-06, "loss": 0.0, "step": 1226 }, { "epoch": 0.47502903600464574, "grad_norm": 0.00033711831201799214, "learning_rate": 7.267037964794476e-06, "loss": 0.0, "step": 1227 }, { "epoch": 0.4754161827332559, "grad_norm": 0.001535428804345429, "learning_rate": 7.211993049765614e-06, "loss": 0.0, "step": 1228 }, { "epoch": 0.47580332946186604, "grad_norm": 0.0005026145954616368, "learning_rate": 7.157141191620548e-06, "loss": 0.0, "step": 1229 }, { "epoch": 0.47619047619047616, "grad_norm": 0.0004242948198225349, "learning_rate": 7.102482637847502e-06, "loss": 0.0, "step": 1230 }, { "epoch": 0.47657762291908634, "grad_norm": 0.0002841683162841946, "learning_rate": 7.048017635062559e-06, "loss": 0.0, "step": 1231 }, { "epoch": 0.47696476964769646, "grad_norm": 0.0008631069213151932, "learning_rate": 6.993746429008497e-06, "loss": 0.0, "step": 1232 }, { "epoch": 0.47735191637630664, "grad_norm": 0.0019993663299828768, "learning_rate": 6.9396692645536946e-06, "loss": 0.0, "step": 1233 }, { "epoch": 0.47773906310491676, "grad_norm": 0.0013842112384736538, "learning_rate": 6.885786385691023e-06, "loss": 0.0, "step": 1234 }, { "epoch": 0.4781262098335269, "grad_norm": 0.00023990539193619043, "learning_rate": 6.8320980355367605e-06, "loss": 0.0, "step": 1235 }, { "epoch": 0.47851335656213706, "grad_norm": 0.0004480506759136915, "learning_rate": 6.778604456329485e-06, "loss": 0.0, "step": 1236 }, { "epoch": 0.4789005032907472, "grad_norm": 0.003706539049744606, "learning_rate": 6.725305889428945e-06, "loss": 0.0, "step": 1237 }, { "epoch": 0.47928765001935736, "grad_norm": 0.0007369938539341092, "learning_rate": 6.672202575315045e-06, "loss": 0.0, "step": 1238 }, { "epoch": 0.4796747967479675, "grad_norm": 0.00016758940182626247, "learning_rate": 6.619294753586708e-06, "loss": 0.0, "step": 1239 }, { "epoch": 0.4800619434765776, "grad_norm": 0.0006811873754486442, "learning_rate": 6.566582662960818e-06, "loss": 0.0, "step": 1240 }, { "epoch": 0.4804490902051878, "grad_norm": 0.00018206462846137583, "learning_rate": 6.514066541271086e-06, "loss": 0.0, "step": 1241 }, { "epoch": 0.4808362369337979, "grad_norm": 0.0009445471223443747, "learning_rate": 6.461746625467086e-06, "loss": 0.0, "step": 1242 }, { "epoch": 0.4812233836624081, "grad_norm": 0.005226759240031242, "learning_rate": 6.409623151613103e-06, "loss": 0.0001, "step": 1243 }, { "epoch": 0.4816105303910182, "grad_norm": 0.0017884591361507773, "learning_rate": 6.3576963548870496e-06, "loss": 0.0, "step": 1244 }, { "epoch": 0.4819976771196283, "grad_norm": 0.0026653881650418043, "learning_rate": 6.305966469579489e-06, "loss": 0.0, "step": 1245 }, { "epoch": 0.4823848238482385, "grad_norm": 0.00038798339664936066, "learning_rate": 6.2544337290925185e-06, "loss": 0.0, "step": 1246 }, { "epoch": 0.4827719705768486, "grad_norm": 0.0003185675013810396, "learning_rate": 6.203098365938731e-06, "loss": 0.0, "step": 1247 }, { "epoch": 0.4831591173054588, "grad_norm": 0.000892972107976675, "learning_rate": 6.1519606117401426e-06, "loss": 0.0, "step": 1248 }, { "epoch": 0.4835462640340689, "grad_norm": 1.4739562273025513, "learning_rate": 6.101020697227189e-06, "loss": 0.0245, "step": 1249 }, { "epoch": 0.48393341076267904, "grad_norm": 0.004798859357833862, "learning_rate": 6.050278852237701e-06, "loss": 0.0001, "step": 1250 }, { "epoch": 0.4843205574912892, "grad_norm": 0.01590668596327305, "learning_rate": 5.999735305715754e-06, "loss": 0.001, "step": 1251 }, { "epoch": 0.48470770421989934, "grad_norm": 0.0043892329558730125, "learning_rate": 5.949390285710776e-06, "loss": 0.0001, "step": 1252 }, { "epoch": 0.48509485094850946, "grad_norm": 0.06413737684488297, "learning_rate": 5.899244019376426e-06, "loss": 0.0016, "step": 1253 }, { "epoch": 0.48548199767711964, "grad_norm": 0.006050611846148968, "learning_rate": 5.849296732969623e-06, "loss": 0.0001, "step": 1254 }, { "epoch": 0.48586914440572976, "grad_norm": 0.0022481298074126244, "learning_rate": 5.799548651849457e-06, "loss": 0.0, "step": 1255 }, { "epoch": 0.48625629113433994, "grad_norm": 0.004415793344378471, "learning_rate": 5.750000000476258e-06, "loss": 0.0001, "step": 1256 }, { "epoch": 0.48664343786295006, "grad_norm": 0.0013036815216764808, "learning_rate": 5.700651002410523e-06, "loss": 0.0, "step": 1257 }, { "epoch": 0.4870305845915602, "grad_norm": 0.0007666132296435535, "learning_rate": 5.651501880311933e-06, "loss": 0.0, "step": 1258 }, { "epoch": 0.48741773132017036, "grad_norm": 0.012893356382846832, "learning_rate": 5.6025528559383254e-06, "loss": 0.0001, "step": 1259 }, { "epoch": 0.4878048780487805, "grad_norm": 0.06497503072023392, "learning_rate": 5.553804150144737e-06, "loss": 0.0047, "step": 1260 }, { "epoch": 0.48819202477739065, "grad_norm": 0.004026665352284908, "learning_rate": 5.505255982882357e-06, "loss": 0.0, "step": 1261 }, { "epoch": 0.4885791715060008, "grad_norm": 0.001962028443813324, "learning_rate": 5.456908573197544e-06, "loss": 0.0, "step": 1262 }, { "epoch": 0.4889663182346109, "grad_norm": 0.0005447390722110868, "learning_rate": 5.408762139230888e-06, "loss": 0.0, "step": 1263 }, { "epoch": 0.4893534649632211, "grad_norm": 0.001602734555490315, "learning_rate": 5.360816898216164e-06, "loss": 0.0, "step": 1264 }, { "epoch": 0.4897406116918312, "grad_norm": 0.0005945903831161559, "learning_rate": 5.3130730664793795e-06, "loss": 0.0, "step": 1265 }, { "epoch": 0.4901277584204414, "grad_norm": 0.012403713539242744, "learning_rate": 5.265530859437801e-06, "loss": 0.0001, "step": 1266 }, { "epoch": 0.4905149051490515, "grad_norm": 0.03977184742689133, "learning_rate": 5.218190491598973e-06, "loss": 0.0001, "step": 1267 }, { "epoch": 0.4909020518776616, "grad_norm": 0.0005819656071253121, "learning_rate": 5.17105217655976e-06, "loss": 0.0, "step": 1268 }, { "epoch": 0.4912891986062718, "grad_norm": 0.0012119417078793049, "learning_rate": 5.124116127005363e-06, "loss": 0.0, "step": 1269 }, { "epoch": 0.4916763453348819, "grad_norm": 0.0011156294494867325, "learning_rate": 5.077382554708382e-06, "loss": 0.0, "step": 1270 }, { "epoch": 0.49206349206349204, "grad_norm": 0.00014334850129671395, "learning_rate": 5.030851670527853e-06, "loss": 0.0, "step": 1271 }, { "epoch": 0.4924506387921022, "grad_norm": 0.01676209457218647, "learning_rate": 4.984523684408304e-06, "loss": 0.0002, "step": 1272 }, { "epoch": 0.49283778552071233, "grad_norm": 0.0005447831354103982, "learning_rate": 4.938398805378763e-06, "loss": 0.0, "step": 1273 }, { "epoch": 0.4932249322493225, "grad_norm": 0.00024948938516899943, "learning_rate": 4.892477241551901e-06, "loss": 0.0, "step": 1274 }, { "epoch": 0.49361207897793263, "grad_norm": 0.001213126815855503, "learning_rate": 4.84675920012303e-06, "loss": 0.0, "step": 1275 }, { "epoch": 0.49399922570654275, "grad_norm": 0.00047369138337671757, "learning_rate": 4.801244887369144e-06, "loss": 0.0, "step": 1276 }, { "epoch": 0.49438637243515293, "grad_norm": 0.00033764008549042046, "learning_rate": 4.755934508648058e-06, "loss": 0.0, "step": 1277 }, { "epoch": 0.49477351916376305, "grad_norm": 0.0003957216104026884, "learning_rate": 4.7108282683974544e-06, "loss": 0.0, "step": 1278 }, { "epoch": 0.49516066589237323, "grad_norm": 0.0009651805739849806, "learning_rate": 4.665926370133949e-06, "loss": 0.0, "step": 1279 }, { "epoch": 0.49554781262098335, "grad_norm": 0.0003068702353630215, "learning_rate": 4.621229016452156e-06, "loss": 0.0, "step": 1280 }, { "epoch": 0.4959349593495935, "grad_norm": 0.0007897759787738323, "learning_rate": 4.576736409023813e-06, "loss": 0.0, "step": 1281 }, { "epoch": 0.49632210607820365, "grad_norm": 0.0002980028511956334, "learning_rate": 4.532448748596885e-06, "loss": 0.0, "step": 1282 }, { "epoch": 0.4967092528068138, "grad_norm": 0.0010113201569765806, "learning_rate": 4.488366234994579e-06, "loss": 0.0, "step": 1283 }, { "epoch": 0.49709639953542395, "grad_norm": 0.0002822222013492137, "learning_rate": 4.444489067114521e-06, "loss": 0.0, "step": 1284 }, { "epoch": 0.49748354626403407, "grad_norm": 0.0009353689965792, "learning_rate": 4.4008174429278185e-06, "loss": 0.0, "step": 1285 }, { "epoch": 0.4978706929926442, "grad_norm": 0.00048544921446591616, "learning_rate": 4.357351559478201e-06, "loss": 0.0, "step": 1286 }, { "epoch": 0.49825783972125437, "grad_norm": 0.00021501122682821006, "learning_rate": 4.31409161288106e-06, "loss": 0.0, "step": 1287 }, { "epoch": 0.4986449864498645, "grad_norm": 0.0011064648861065507, "learning_rate": 4.271037798322658e-06, "loss": 0.0, "step": 1288 }, { "epoch": 0.49903213317847467, "grad_norm": 0.0019151505548506975, "learning_rate": 4.2281903100591824e-06, "loss": 0.0, "step": 1289 }, { "epoch": 0.4994192799070848, "grad_norm": 0.003015509806573391, "learning_rate": 4.185549341415901e-06, "loss": 0.0001, "step": 1290 }, { "epoch": 0.4998064266356949, "grad_norm": 0.0012218995252624154, "learning_rate": 4.143115084786259e-06, "loss": 0.0, "step": 1291 }, { "epoch": 0.500193573364305, "grad_norm": 0.0007260179845616221, "learning_rate": 4.100887731631054e-06, "loss": 0.0, "step": 1292 }, { "epoch": 0.5005807200929152, "grad_norm": 0.0013869833201169968, "learning_rate": 4.058867472477529e-06, "loss": 0.0, "step": 1293 }, { "epoch": 0.5009678668215254, "grad_norm": 0.0003031007363460958, "learning_rate": 4.017054496918521e-06, "loss": 0.0, "step": 1294 }, { "epoch": 0.5013550135501355, "grad_norm": 0.0006917419377714396, "learning_rate": 3.975448993611652e-06, "loss": 0.0, "step": 1295 }, { "epoch": 0.5017421602787456, "grad_norm": 0.007670098915696144, "learning_rate": 3.934051150278417e-06, "loss": 0.0001, "step": 1296 }, { "epoch": 0.5021293070073558, "grad_norm": 0.0030773854814469814, "learning_rate": 3.892861153703342e-06, "loss": 0.0, "step": 1297 }, { "epoch": 0.502516453735966, "grad_norm": 0.0010149420704692602, "learning_rate": 3.851879189733221e-06, "loss": 0.0, "step": 1298 }, { "epoch": 0.502903600464576, "grad_norm": 0.0006883174646645784, "learning_rate": 3.811105443276164e-06, "loss": 0.0, "step": 1299 }, { "epoch": 0.5032907471931862, "grad_norm": 0.15667524933815002, "learning_rate": 3.77054009830084e-06, "loss": 0.001, "step": 1300 }, { "epoch": 0.5036778939217964, "grad_norm": 0.016902372241020203, "learning_rate": 3.7301833378356076e-06, "loss": 0.002, "step": 1301 }, { "epoch": 0.5040650406504065, "grad_norm": 0.0014611108927056193, "learning_rate": 3.690035343967724e-06, "loss": 0.0001, "step": 1302 }, { "epoch": 0.5044521873790166, "grad_norm": 0.0020591116044670343, "learning_rate": 3.6500962978424924e-06, "loss": 0.0001, "step": 1303 }, { "epoch": 0.5048393341076268, "grad_norm": 0.05074653401970863, "learning_rate": 3.6103663796624555e-06, "loss": 0.0012, "step": 1304 }, { "epoch": 0.5052264808362369, "grad_norm": 0.0005115514504723251, "learning_rate": 3.570845768686576e-06, "loss": 0.0, "step": 1305 }, { "epoch": 0.5056136275648471, "grad_norm": 0.04323451220989227, "learning_rate": 3.5315346432294726e-06, "loss": 0.0072, "step": 1306 }, { "epoch": 0.5060007742934572, "grad_norm": 0.0019445320358499885, "learning_rate": 3.4924331806605314e-06, "loss": 0.0, "step": 1307 }, { "epoch": 0.5063879210220673, "grad_norm": 0.002080347388982773, "learning_rate": 3.4535415574031783e-06, "loss": 0.0, "step": 1308 }, { "epoch": 0.5067750677506775, "grad_norm": 0.0003646564146038145, "learning_rate": 3.4148599489340517e-06, "loss": 0.0, "step": 1309 }, { "epoch": 0.5071622144792877, "grad_norm": 0.0001835545408539474, "learning_rate": 3.376388529782215e-06, "loss": 0.0, "step": 1310 }, { "epoch": 0.5075493612078978, "grad_norm": 0.001469470327720046, "learning_rate": 3.3381274735283684e-06, "loss": 0.0, "step": 1311 }, { "epoch": 0.5079365079365079, "grad_norm": 0.0013578154612332582, "learning_rate": 3.3000769528040653e-06, "loss": 0.0, "step": 1312 }, { "epoch": 0.5083236546651181, "grad_norm": 0.004504669923335314, "learning_rate": 3.2622371392909524e-06, "loss": 0.0001, "step": 1313 }, { "epoch": 0.5087108013937283, "grad_norm": 0.0013383106561377645, "learning_rate": 3.2246082037199532e-06, "loss": 0.0, "step": 1314 }, { "epoch": 0.5090979481223383, "grad_norm": 0.0010525296675041318, "learning_rate": 3.187190315870542e-06, "loss": 0.0, "step": 1315 }, { "epoch": 0.5094850948509485, "grad_norm": 0.00011710682156262919, "learning_rate": 3.149983644569948e-06, "loss": 0.0, "step": 1316 }, { "epoch": 0.5098722415795587, "grad_norm": 0.0007033901638351381, "learning_rate": 3.1129883576924203e-06, "loss": 0.0, "step": 1317 }, { "epoch": 0.5102593883081687, "grad_norm": 0.0018138588638976216, "learning_rate": 3.0762046221584294e-06, "loss": 0.0, "step": 1318 }, { "epoch": 0.5106465350367789, "grad_norm": 0.17213991284370422, "learning_rate": 3.0396326039339507e-06, "loss": 0.0025, "step": 1319 }, { "epoch": 0.5110336817653891, "grad_norm": 0.00046120636397972703, "learning_rate": 3.0032724680297107e-06, "loss": 0.0, "step": 1320 }, { "epoch": 0.5114208284939993, "grad_norm": 0.0004718530108220875, "learning_rate": 2.96712437850043e-06, "loss": 0.0, "step": 1321 }, { "epoch": 0.5118079752226093, "grad_norm": 0.0023787531536072493, "learning_rate": 2.9311884984440875e-06, "loss": 0.0, "step": 1322 }, { "epoch": 0.5121951219512195, "grad_norm": 0.0011449018493294716, "learning_rate": 2.8954649900011845e-06, "loss": 0.0, "step": 1323 }, { "epoch": 0.5125822686798297, "grad_norm": 0.04674423485994339, "learning_rate": 2.8599540143540327e-06, "loss": 0.0004, "step": 1324 }, { "epoch": 0.5129694154084398, "grad_norm": 0.00027449653134681284, "learning_rate": 2.8246557317259727e-06, "loss": 0.0, "step": 1325 }, { "epoch": 0.5133565621370499, "grad_norm": 0.0003032211388926953, "learning_rate": 2.7895703013807118e-06, "loss": 0.0, "step": 1326 }, { "epoch": 0.5137437088656601, "grad_norm": 0.0003420572611503303, "learning_rate": 2.7546978816215874e-06, "loss": 0.0, "step": 1327 }, { "epoch": 0.5141308555942702, "grad_norm": 0.00011091092164861038, "learning_rate": 2.7200386297908387e-06, "loss": 0.0, "step": 1328 }, { "epoch": 0.5145180023228804, "grad_norm": 0.0007387629011645913, "learning_rate": 2.6855927022688743e-06, "loss": 0.0, "step": 1329 }, { "epoch": 0.5149051490514905, "grad_norm": 0.00038926073466427624, "learning_rate": 2.651360254473645e-06, "loss": 0.0, "step": 1330 }, { "epoch": 0.5152922957801006, "grad_norm": 0.0004905264358967543, "learning_rate": 2.6173414408598827e-06, "loss": 0.0, "step": 1331 }, { "epoch": 0.5156794425087108, "grad_norm": 0.0011100409319624305, "learning_rate": 2.5835364149183803e-06, "loss": 0.0, "step": 1332 }, { "epoch": 0.516066589237321, "grad_norm": 0.0009677124326117337, "learning_rate": 2.549945329175363e-06, "loss": 0.0, "step": 1333 }, { "epoch": 0.5164537359659311, "grad_norm": 0.0003739868989214301, "learning_rate": 2.516568335191777e-06, "loss": 0.0, "step": 1334 }, { "epoch": 0.5168408826945412, "grad_norm": 0.00016991533630061895, "learning_rate": 2.483405583562587e-06, "loss": 0.0, "step": 1335 }, { "epoch": 0.5172280294231514, "grad_norm": 0.0003253307659178972, "learning_rate": 2.450457223916097e-06, "loss": 0.0, "step": 1336 }, { "epoch": 0.5176151761517616, "grad_norm": 0.00035142659908160567, "learning_rate": 2.417723404913302e-06, "loss": 0.0, "step": 1337 }, { "epoch": 0.5180023228803716, "grad_norm": 0.00029594748048111796, "learning_rate": 2.3852042742472215e-06, "loss": 0.0, "step": 1338 }, { "epoch": 0.5183894696089818, "grad_norm": 0.000983862904831767, "learning_rate": 2.3528999786421756e-06, "loss": 0.0, "step": 1339 }, { "epoch": 0.518776616337592, "grad_norm": 0.00020164911984466016, "learning_rate": 2.3208106638531846e-06, "loss": 0.0, "step": 1340 }, { "epoch": 0.519163763066202, "grad_norm": 0.0005400851950980723, "learning_rate": 2.2889364746652874e-06, "loss": 0.0, "step": 1341 }, { "epoch": 0.5195509097948122, "grad_norm": 0.00030268036061897874, "learning_rate": 2.2572775548928893e-06, "loss": 0.0, "step": 1342 }, { "epoch": 0.5199380565234224, "grad_norm": 0.0007914683665148914, "learning_rate": 2.2258340473790996e-06, "loss": 0.0, "step": 1343 }, { "epoch": 0.5203252032520326, "grad_norm": 0.0005217034486122429, "learning_rate": 2.1946060939951164e-06, "loss": 0.0, "step": 1344 }, { "epoch": 0.5207123499806426, "grad_norm": 0.00027877753018401563, "learning_rate": 2.163593835639566e-06, "loss": 0.0, "step": 1345 }, { "epoch": 0.5210994967092528, "grad_norm": 0.002984454622492194, "learning_rate": 2.132797412237869e-06, "loss": 0.0, "step": 1346 }, { "epoch": 0.521486643437863, "grad_norm": 0.00027315475745126605, "learning_rate": 2.1022169627416153e-06, "loss": 0.0, "step": 1347 }, { "epoch": 0.5218737901664731, "grad_norm": 0.001330073457211256, "learning_rate": 2.0718526251279346e-06, "loss": 0.0, "step": 1348 }, { "epoch": 0.5222609368950832, "grad_norm": 0.0018169977702200413, "learning_rate": 2.0417045363988752e-06, "loss": 0.0, "step": 1349 }, { "epoch": 0.5226480836236934, "grad_norm": 0.0006412347429431975, "learning_rate": 2.0117728325807606e-06, "loss": 0.0, "step": 1350 }, { "epoch": 0.5230352303523035, "grad_norm": 0.07314898818731308, "learning_rate": 1.9820576487236284e-06, "loss": 0.0036, "step": 1351 }, { "epoch": 0.5234223770809137, "grad_norm": 0.001173462369479239, "learning_rate": 1.9525591189005878e-06, "loss": 0.0001, "step": 1352 }, { "epoch": 0.5238095238095238, "grad_norm": 0.029476692900061607, "learning_rate": 1.9232773762071944e-06, "loss": 0.0013, "step": 1353 }, { "epoch": 0.5241966705381339, "grad_norm": 0.038780104368925095, "learning_rate": 1.8942125527609045e-06, "loss": 0.0012, "step": 1354 }, { "epoch": 0.5245838172667441, "grad_norm": 0.010619153268635273, "learning_rate": 1.8653647797004236e-06, "loss": 0.0001, "step": 1355 }, { "epoch": 0.5249709639953543, "grad_norm": 0.027320994064211845, "learning_rate": 1.8367341871851518e-06, "loss": 0.0002, "step": 1356 }, { "epoch": 0.5253581107239644, "grad_norm": 0.005327960010617971, "learning_rate": 1.8083209043945782e-06, "loss": 0.0001, "step": 1357 }, { "epoch": 0.5257452574525745, "grad_norm": 0.010818984359502792, "learning_rate": 1.7801250595277096e-06, "loss": 0.0001, "step": 1358 }, { "epoch": 0.5261324041811847, "grad_norm": 0.0013082500081509352, "learning_rate": 1.7521467798024772e-06, "loss": 0.0, "step": 1359 }, { "epoch": 0.5265195509097949, "grad_norm": 0.00018383938004262745, "learning_rate": 1.724386191455185e-06, "loss": 0.0, "step": 1360 }, { "epoch": 0.5269066976384049, "grad_norm": 0.06656922399997711, "learning_rate": 1.6968434197399075e-06, "loss": 0.0007, "step": 1361 }, { "epoch": 0.5272938443670151, "grad_norm": 0.004147067666053772, "learning_rate": 1.6695185889279597e-06, "loss": 0.0, "step": 1362 }, { "epoch": 0.5276809910956253, "grad_norm": 0.003467077622190118, "learning_rate": 1.6424118223073214e-06, "loss": 0.0, "step": 1363 }, { "epoch": 0.5280681378242353, "grad_norm": 0.00022673551575280726, "learning_rate": 1.6155232421820653e-06, "loss": 0.0, "step": 1364 }, { "epoch": 0.5284552845528455, "grad_norm": 0.004515313543379307, "learning_rate": 1.5888529698718346e-06, "loss": 0.0, "step": 1365 }, { "epoch": 0.5288424312814557, "grad_norm": 0.0015816775849089026, "learning_rate": 1.5624011257112714e-06, "loss": 0.0, "step": 1366 }, { "epoch": 0.5292295780100658, "grad_norm": 0.0008955047815106809, "learning_rate": 1.5361678290494952e-06, "loss": 0.0, "step": 1367 }, { "epoch": 0.5296167247386759, "grad_norm": 0.00045474289800040424, "learning_rate": 1.5101531982495308e-06, "loss": 0.0, "step": 1368 }, { "epoch": 0.5300038714672861, "grad_norm": 0.014475700445473194, "learning_rate": 1.4843573506878094e-06, "loss": 0.0003, "step": 1369 }, { "epoch": 0.5303910181958963, "grad_norm": 0.00026755264843814075, "learning_rate": 1.4587804027536456e-06, "loss": 0.0, "step": 1370 }, { "epoch": 0.5307781649245064, "grad_norm": 0.0009568330715410411, "learning_rate": 1.4334224698486554e-06, "loss": 0.0, "step": 1371 }, { "epoch": 0.5311653116531165, "grad_norm": 0.005675049964338541, "learning_rate": 1.4082836663862898e-06, "loss": 0.0, "step": 1372 }, { "epoch": 0.5315524583817267, "grad_norm": 0.0010455515002831817, "learning_rate": 1.3833641057913017e-06, "loss": 0.0, "step": 1373 }, { "epoch": 0.5319396051103368, "grad_norm": 0.0003837356634903699, "learning_rate": 1.3586639004992407e-06, "loss": 0.0, "step": 1374 }, { "epoch": 0.532326751838947, "grad_norm": 0.007896430790424347, "learning_rate": 1.3341831619559208e-06, "loss": 0.0001, "step": 1375 }, { "epoch": 0.5327138985675571, "grad_norm": 0.000981520744971931, "learning_rate": 1.309922000616942e-06, "loss": 0.0, "step": 1376 }, { "epoch": 0.5331010452961672, "grad_norm": 0.1035628467798233, "learning_rate": 1.2858805259471974e-06, "loss": 0.0023, "step": 1377 }, { "epoch": 0.5334881920247774, "grad_norm": 0.00016075636085588485, "learning_rate": 1.2620588464203553e-06, "loss": 0.0, "step": 1378 }, { "epoch": 0.5338753387533876, "grad_norm": 0.00027309221331961453, "learning_rate": 1.2384570695183783e-06, "loss": 0.0, "step": 1379 }, { "epoch": 0.5342624854819977, "grad_norm": 0.00031268582097254694, "learning_rate": 1.21507530173105e-06, "loss": 0.0, "step": 1380 }, { "epoch": 0.5346496322106078, "grad_norm": 0.0011569913476705551, "learning_rate": 1.1919136485554983e-06, "loss": 0.0, "step": 1381 }, { "epoch": 0.535036778939218, "grad_norm": 0.056165896356105804, "learning_rate": 1.1689722144956671e-06, "loss": 0.0001, "step": 1382 }, { "epoch": 0.5354239256678281, "grad_norm": 0.0005352521548047662, "learning_rate": 1.1462511030619295e-06, "loss": 0.0, "step": 1383 }, { "epoch": 0.5358110723964382, "grad_norm": 0.00021589416428469121, "learning_rate": 1.1237504167705525e-06, "loss": 0.0, "step": 1384 }, { "epoch": 0.5361982191250484, "grad_norm": 0.0008537312969565392, "learning_rate": 1.1014702571432612e-06, "loss": 0.0, "step": 1385 }, { "epoch": 0.5365853658536586, "grad_norm": 0.0002650670357979834, "learning_rate": 1.0794107247067807e-06, "loss": 0.0, "step": 1386 }, { "epoch": 0.5369725125822686, "grad_norm": 0.0031000212766230106, "learning_rate": 1.0575719189923838e-06, "loss": 0.0, "step": 1387 }, { "epoch": 0.5373596593108788, "grad_norm": 0.008421260863542557, "learning_rate": 1.0359539385354389e-06, "loss": 0.0001, "step": 1388 }, { "epoch": 0.537746806039489, "grad_norm": 0.0008919943356886506, "learning_rate": 1.0145568808749396e-06, "loss": 0.0, "step": 1389 }, { "epoch": 0.538133952768099, "grad_norm": 0.047346923500299454, "learning_rate": 9.933808425531155e-07, "loss": 0.0005, "step": 1390 }, { "epoch": 0.5385210994967092, "grad_norm": 0.0025044046342372894, "learning_rate": 9.724259191149776e-07, "loss": 0.0, "step": 1391 }, { "epoch": 0.5389082462253194, "grad_norm": 0.0005212011164985597, "learning_rate": 9.516922051078458e-07, "loss": 0.0, "step": 1392 }, { "epoch": 0.5392953929539296, "grad_norm": 0.0008081488776952028, "learning_rate": 9.311797940809775e-07, "loss": 0.0, "step": 1393 }, { "epoch": 0.5396825396825397, "grad_norm": 0.0008383397944271564, "learning_rate": 9.108887785851338e-07, "loss": 0.0, "step": 1394 }, { "epoch": 0.5400696864111498, "grad_norm": 0.00039093257510103285, "learning_rate": 8.908192501721424e-07, "loss": 0.0, "step": 1395 }, { "epoch": 0.54045683313976, "grad_norm": 0.0004751708183903247, "learning_rate": 8.709712993944852e-07, "loss": 0.0, "step": 1396 }, { "epoch": 0.5408439798683701, "grad_norm": 0.00026852250448428094, "learning_rate": 8.513450158049108e-07, "loss": 0.0, "step": 1397 }, { "epoch": 0.5412311265969802, "grad_norm": 0.0006111898110248148, "learning_rate": 8.319404879560233e-07, "loss": 0.0, "step": 1398 }, { "epoch": 0.5416182733255904, "grad_norm": 0.0026882546953856945, "learning_rate": 8.127578033998662e-07, "loss": 0.0, "step": 1399 }, { "epoch": 0.5420054200542005, "grad_norm": 0.0006612004362978041, "learning_rate": 7.93797048687539e-07, "loss": 0.0, "step": 1400 }, { "epoch": 0.5423925667828107, "grad_norm": 0.015117373317480087, "learning_rate": 7.750583093688257e-07, "loss": 0.0009, "step": 1401 }, { "epoch": 0.5427797135114208, "grad_norm": 0.017428463324904442, "learning_rate": 7.565416699917671e-07, "loss": 0.0006, "step": 1402 }, { "epoch": 0.543166860240031, "grad_norm": 0.0017397633055225015, "learning_rate": 7.382472141023223e-07, "loss": 0.0, "step": 1403 }, { "epoch": 0.5435540069686411, "grad_norm": 0.0027253013104200363, "learning_rate": 7.20175024243952e-07, "loss": 0.0001, "step": 1404 }, { "epoch": 0.5439411536972513, "grad_norm": 0.024540890008211136, "learning_rate": 7.023251819572918e-07, "loss": 0.0003, "step": 1405 }, { "epoch": 0.5443283004258614, "grad_norm": 0.04364755377173424, "learning_rate": 6.84697767779735e-07, "loss": 0.0009, "step": 1406 }, { "epoch": 0.5447154471544715, "grad_norm": 0.08018162101507187, "learning_rate": 6.672928612451002e-07, "loss": 0.0042, "step": 1407 }, { "epoch": 0.5451025938830817, "grad_norm": 0.013882956467568874, "learning_rate": 6.501105408832697e-07, "loss": 0.0001, "step": 1408 }, { "epoch": 0.5454897406116919, "grad_norm": 0.0005265059880912304, "learning_rate": 6.331508842198297e-07, "loss": 0.0, "step": 1409 }, { "epoch": 0.5458768873403019, "grad_norm": 0.06079065427184105, "learning_rate": 6.164139677757141e-07, "loss": 0.0004, "step": 1410 }, { "epoch": 0.5462640340689121, "grad_norm": 0.04179280623793602, "learning_rate": 5.998998670668721e-07, "loss": 0.0002, "step": 1411 }, { "epoch": 0.5466511807975223, "grad_norm": 0.015492278151214123, "learning_rate": 5.83608656603929e-07, "loss": 0.0002, "step": 1412 }, { "epoch": 0.5470383275261324, "grad_norm": 0.0033214192371815443, "learning_rate": 5.675404098918258e-07, "loss": 0.0, "step": 1413 }, { "epoch": 0.5474254742547425, "grad_norm": 0.001019191462546587, "learning_rate": 5.516951994295139e-07, "loss": 0.0, "step": 1414 }, { "epoch": 0.5478126209833527, "grad_norm": 0.00021250147256068885, "learning_rate": 5.360730967096272e-07, "loss": 0.0, "step": 1415 }, { "epoch": 0.5481997677119629, "grad_norm": 0.014712055213749409, "learning_rate": 5.206741722181386e-07, "loss": 0.0002, "step": 1416 }, { "epoch": 0.548586914440573, "grad_norm": 0.0003640944487415254, "learning_rate": 5.054984954340481e-07, "loss": 0.0, "step": 1417 }, { "epoch": 0.5489740611691831, "grad_norm": 0.00032771058613434434, "learning_rate": 4.905461348291007e-07, "loss": 0.0, "step": 1418 }, { "epoch": 0.5493612078977933, "grad_norm": 0.013061568140983582, "learning_rate": 4.7581715786743643e-07, "loss": 0.0, "step": 1419 }, { "epoch": 0.5497483546264034, "grad_norm": 0.010962588712573051, "learning_rate": 4.6131163100529563e-07, "loss": 0.0001, "step": 1420 }, { "epoch": 0.5501355013550135, "grad_norm": 0.003091776045039296, "learning_rate": 4.470296196907364e-07, "loss": 0.0001, "step": 1421 }, { "epoch": 0.5505226480836237, "grad_norm": 0.000399423879571259, "learning_rate": 4.329711883633236e-07, "loss": 0.0, "step": 1422 }, { "epoch": 0.5509097948122338, "grad_norm": 0.0104044359177351, "learning_rate": 4.191364004538456e-07, "loss": 0.0001, "step": 1423 }, { "epoch": 0.551296941540844, "grad_norm": 0.000441914948169142, "learning_rate": 4.0552531838402575e-07, "loss": 0.0, "step": 1424 }, { "epoch": 0.5516840882694541, "grad_norm": 0.10242582112550735, "learning_rate": 3.921380035662281e-07, "loss": 0.0005, "step": 1425 }, { "epoch": 0.5520712349980643, "grad_norm": 0.0005255243158899248, "learning_rate": 3.7897451640321323e-07, "loss": 0.0, "step": 1426 }, { "epoch": 0.5524583817266744, "grad_norm": 0.0023901767563074827, "learning_rate": 3.660349162878329e-07, "loss": 0.0, "step": 1427 }, { "epoch": 0.5528455284552846, "grad_norm": 0.0006369251641444862, "learning_rate": 3.533192616027692e-07, "loss": 0.0, "step": 1428 }, { "epoch": 0.5532326751838947, "grad_norm": 0.0010434030555188656, "learning_rate": 3.408276097202845e-07, "loss": 0.0, "step": 1429 }, { "epoch": 0.5536198219125048, "grad_norm": 0.002758053131401539, "learning_rate": 3.285600170019609e-07, "loss": 0.0, "step": 1430 }, { "epoch": 0.554006968641115, "grad_norm": 0.0003661249065771699, "learning_rate": 3.1651653879841703e-07, "loss": 0.0, "step": 1431 }, { "epoch": 0.5543941153697252, "grad_norm": 0.0017078618984669447, "learning_rate": 3.046972294491024e-07, "loss": 0.0, "step": 1432 }, { "epoch": 0.5547812620983352, "grad_norm": 0.0005711820558644831, "learning_rate": 2.9310214228202013e-07, "loss": 0.0, "step": 1433 }, { "epoch": 0.5551684088269454, "grad_norm": 0.037004098296165466, "learning_rate": 2.817313296134938e-07, "loss": 0.0002, "step": 1434 }, { "epoch": 0.5555555555555556, "grad_norm": 0.0002180523006245494, "learning_rate": 2.7058484274794515e-07, "loss": 0.0, "step": 1435 }, { "epoch": 0.5559427022841656, "grad_norm": 0.0006866626208648086, "learning_rate": 2.59662731977639e-07, "loss": 0.0, "step": 1436 }, { "epoch": 0.5563298490127758, "grad_norm": 0.011221646331250668, "learning_rate": 2.489650465824778e-07, "loss": 0.0, "step": 1437 }, { "epoch": 0.556716995741386, "grad_norm": 0.0015853219665586948, "learning_rate": 2.3849183482976824e-07, "loss": 0.0, "step": 1438 }, { "epoch": 0.5571041424699962, "grad_norm": 0.00038003918598406017, "learning_rate": 2.2824314397399406e-07, "loss": 0.0, "step": 1439 }, { "epoch": 0.5574912891986062, "grad_norm": 0.008126421831548214, "learning_rate": 2.182190202566381e-07, "loss": 0.0001, "step": 1440 }, { "epoch": 0.5578784359272164, "grad_norm": 0.00028225293499417603, "learning_rate": 2.0841950890593265e-07, "loss": 0.0, "step": 1441 }, { "epoch": 0.5582655826558266, "grad_norm": 0.0004284578317310661, "learning_rate": 1.9884465413667064e-07, "loss": 0.0, "step": 1442 }, { "epoch": 0.5586527293844367, "grad_norm": 0.0009348619496449828, "learning_rate": 1.8949449915002248e-07, "loss": 0.0, "step": 1443 }, { "epoch": 0.5590398761130468, "grad_norm": 0.010935988277196884, "learning_rate": 1.8036908613331405e-07, "loss": 0.0002, "step": 1444 }, { "epoch": 0.559427022841657, "grad_norm": 0.0023175193928182125, "learning_rate": 1.7146845625985454e-07, "loss": 0.0, "step": 1445 }, { "epoch": 0.5598141695702671, "grad_norm": 0.005808443762362003, "learning_rate": 1.627926496887422e-07, "loss": 0.0001, "step": 1446 }, { "epoch": 0.5602013162988773, "grad_norm": 0.01145158614963293, "learning_rate": 1.5434170556469228e-07, "loss": 0.0, "step": 1447 }, { "epoch": 0.5605884630274874, "grad_norm": 0.0004185729194432497, "learning_rate": 1.4611566201785387e-07, "loss": 0.0, "step": 1448 }, { "epoch": 0.5609756097560976, "grad_norm": 0.0017726552905514836, "learning_rate": 1.38114556163621e-07, "loss": 0.0, "step": 1449 }, { "epoch": 0.5613627564847077, "grad_norm": 0.00791022926568985, "learning_rate": 1.3033842410251075e-07, "loss": 0.0001, "step": 1450 }, { "epoch": 0.5617499032133179, "grad_norm": 0.027528367936611176, "learning_rate": 1.227873009199465e-07, "loss": 0.002, "step": 1451 }, { "epoch": 0.562137049941928, "grad_norm": 0.0025549372658133507, "learning_rate": 1.1546122068614717e-07, "loss": 0.0001, "step": 1452 }, { "epoch": 0.5625241966705381, "grad_norm": 0.002865710761398077, "learning_rate": 1.0836021645593276e-07, "loss": 0.0001, "step": 1453 }, { "epoch": 0.5629113433991483, "grad_norm": 0.002182627562433481, "learning_rate": 1.0148432026860777e-07, "loss": 0.0001, "step": 1454 }, { "epoch": 0.5632984901277585, "grad_norm": 0.002365889959037304, "learning_rate": 9.483356314779479e-08, "loss": 0.0, "step": 1455 }, { "epoch": 0.5636856368563685, "grad_norm": 0.0056472038850188255, "learning_rate": 8.840797510130671e-08, "loss": 0.0001, "step": 1456 }, { "epoch": 0.5640727835849787, "grad_norm": 0.0037379420828074217, "learning_rate": 8.220758512100246e-08, "loss": 0.0, "step": 1457 }, { "epoch": 0.5644599303135889, "grad_norm": 0.003960594069212675, "learning_rate": 7.623242118267038e-08, "loss": 0.0001, "step": 1458 }, { "epoch": 0.5648470770421989, "grad_norm": 0.0035338555462658405, "learning_rate": 7.048251024587837e-08, "loss": 0.0001, "step": 1459 }, { "epoch": 0.5652342237708091, "grad_norm": 0.0007295788382180035, "learning_rate": 6.49578782538851e-08, "loss": 0.0, "step": 1460 }, { "epoch": 0.5656213704994193, "grad_norm": 0.010685405693948269, "learning_rate": 5.965855013347899e-08, "loss": 0.0001, "step": 1461 }, { "epoch": 0.5660085172280295, "grad_norm": 0.0006623066728934646, "learning_rate": 5.458454979492267e-08, "loss": 0.0, "step": 1462 }, { "epoch": 0.5663956639566395, "grad_norm": 0.0034896975848823786, "learning_rate": 4.9735900131786525e-08, "loss": 0.0, "step": 1463 }, { "epoch": 0.5667828106852497, "grad_norm": 0.0008585017058067024, "learning_rate": 4.511262302088759e-08, "loss": 0.0, "step": 1464 }, { "epoch": 0.5671699574138599, "grad_norm": 0.003946602810174227, "learning_rate": 4.0714739322167404e-08, "loss": 0.0001, "step": 1465 }, { "epoch": 0.56755710414247, "grad_norm": 0.010520562529563904, "learning_rate": 3.654226887860879e-08, "loss": 0.0001, "step": 1466 }, { "epoch": 0.5679442508710801, "grad_norm": 0.00258624367415905, "learning_rate": 3.259523051615254e-08, "loss": 0.0, "step": 1467 }, { "epoch": 0.5683313975996903, "grad_norm": 0.002552057383581996, "learning_rate": 2.8873642043586445e-08, "loss": 0.0, "step": 1468 }, { "epoch": 0.5687185443283004, "grad_norm": 0.0009302432299591601, "learning_rate": 2.537752025249529e-08, "loss": 0.0, "step": 1469 }, { "epoch": 0.5691056910569106, "grad_norm": 0.0006743933190591633, "learning_rate": 2.2106880917166506e-08, "loss": 0.0, "step": 1470 }, { "epoch": 0.5694928377855207, "grad_norm": 0.0005312124267220497, "learning_rate": 1.9061738794523553e-08, "loss": 0.0, "step": 1471 }, { "epoch": 0.5698799845141309, "grad_norm": 0.0035967177245765924, "learning_rate": 1.6242107624070412e-08, "loss": 0.0, "step": 1472 }, { "epoch": 0.570267131242741, "grad_norm": 0.0016068631084635854, "learning_rate": 1.3648000127808314e-08, "loss": 0.0, "step": 1473 }, { "epoch": 0.5706542779713512, "grad_norm": 0.0003538385790307075, "learning_rate": 1.1279428010185777e-08, "loss": 0.0, "step": 1474 }, { "epoch": 0.5710414246999613, "grad_norm": 0.00034932824200950563, "learning_rate": 9.13640195805976e-09, "loss": 0.0, "step": 1475 }, { "epoch": 0.5714285714285714, "grad_norm": 0.00040175963658839464, "learning_rate": 7.2189316406345855e-09, "loss": 0.0, "step": 1476 }, { "epoch": 0.5718157181571816, "grad_norm": 0.023909885436296463, "learning_rate": 5.527025709423095e-09, "loss": 0.0001, "step": 1477 }, { "epoch": 0.5722028648857918, "grad_norm": 0.0002910869079641998, "learning_rate": 4.060691798196681e-09, "loss": 0.0, "step": 1478 }, { "epoch": 0.5725900116144018, "grad_norm": 0.0007408217643387616, "learning_rate": 2.8199365229686357e-09, "loss": 0.0, "step": 1479 }, { "epoch": 0.572977158343012, "grad_norm": 0.18819063901901245, "learning_rate": 1.8047654819441928e-09, "loss": 0.0007, "step": 1480 }, { "epoch": 0.5733643050716222, "grad_norm": 0.0005039856769144535, "learning_rate": 1.0151832555205242e-09, "loss": 0.0, "step": 1481 }, { "epoch": 0.5737514518002322, "grad_norm": 0.00021944046602584422, "learning_rate": 4.5119340624233397e-10, "loss": 0.0, "step": 1482 }, { "epoch": 0.5741385985288424, "grad_norm": 0.0005209447117522359, "learning_rate": 1.1279847879630545e-10, "loss": 0.0, "step": 1483 }, { "epoch": 0.5745257452574526, "grad_norm": 0.00040242273826152086, "learning_rate": 0.0, "loss": 0.0, "step": 1484 } ], "logging_steps": 1, "max_steps": 1484, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 371, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.2233368783355904e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }