| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.8310652178429703, | |
| "eval_steps": 500, | |
| "global_step": 2500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.003324260871371881, | |
| "grad_norm": 2.5143792629241943, | |
| "learning_rate": 1.6622340425531916e-08, | |
| "loss": 9.0836, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.006648521742743762, | |
| "grad_norm": 2.6173431873321533, | |
| "learning_rate": 3.324468085106383e-08, | |
| "loss": 9.0829, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.009972782614115643, | |
| "grad_norm": 2.5349628925323486, | |
| "learning_rate": 4.9867021276595746e-08, | |
| "loss": 9.0061, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.013297043485487523, | |
| "grad_norm": 2.3916308879852295, | |
| "learning_rate": 6.648936170212767e-08, | |
| "loss": 8.9747, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.016621304356859403, | |
| "grad_norm": 2.5103342533111572, | |
| "learning_rate": 8.311170212765958e-08, | |
| "loss": 9.0057, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.019945565228231286, | |
| "grad_norm": 2.421079397201538, | |
| "learning_rate": 9.973404255319149e-08, | |
| "loss": 8.9885, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.023269826099603166, | |
| "grad_norm": 2.6052393913269043, | |
| "learning_rate": 1.163563829787234e-07, | |
| "loss": 8.9706, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.026594086970975046, | |
| "grad_norm": 2.376847505569458, | |
| "learning_rate": 1.3297872340425533e-07, | |
| "loss": 9.0211, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.02991834784234693, | |
| "grad_norm": 2.6200971603393555, | |
| "learning_rate": 1.4960106382978723e-07, | |
| "loss": 8.9903, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.033242608713718806, | |
| "grad_norm": 2.515320301055908, | |
| "learning_rate": 1.6622340425531916e-07, | |
| "loss": 8.9643, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.03656686958509069, | |
| "grad_norm": 2.4840102195739746, | |
| "learning_rate": 1.8284574468085108e-07, | |
| "loss": 8.9761, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.03989113045646257, | |
| "grad_norm": 2.5950074195861816, | |
| "learning_rate": 1.9946808510638298e-07, | |
| "loss": 9.0101, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.04321539132783445, | |
| "grad_norm": 2.530604839324951, | |
| "learning_rate": 2.160904255319149e-07, | |
| "loss": 8.961, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.04653965219920633, | |
| "grad_norm": 2.5579464435577393, | |
| "learning_rate": 2.327127659574468e-07, | |
| "loss": 8.8733, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.04986391307057821, | |
| "grad_norm": 2.638901472091675, | |
| "learning_rate": 2.4933510638297876e-07, | |
| "loss": 8.9534, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.05318817394195009, | |
| "grad_norm": 2.6817493438720703, | |
| "learning_rate": 2.6595744680851066e-07, | |
| "loss": 9.0014, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.05651243481332197, | |
| "grad_norm": 2.6700024604797363, | |
| "learning_rate": 2.8257978723404256e-07, | |
| "loss": 8.8832, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.05983669568469386, | |
| "grad_norm": 2.794243335723877, | |
| "learning_rate": 2.9920212765957446e-07, | |
| "loss": 8.9012, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.06316095655606574, | |
| "grad_norm": 3.000873327255249, | |
| "learning_rate": 3.1582446808510636e-07, | |
| "loss": 8.7874, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.06648521742743761, | |
| "grad_norm": 2.872612714767456, | |
| "learning_rate": 3.324468085106383e-07, | |
| "loss": 8.8558, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.0698094782988095, | |
| "grad_norm": 2.9133315086364746, | |
| "learning_rate": 3.490691489361702e-07, | |
| "loss": 8.8333, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.07313373917018139, | |
| "grad_norm": 3.1017534732818604, | |
| "learning_rate": 3.6569148936170217e-07, | |
| "loss": 8.8199, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.07645800004155326, | |
| "grad_norm": 2.9153056144714355, | |
| "learning_rate": 3.8231382978723407e-07, | |
| "loss": 8.8266, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.07978226091292515, | |
| "grad_norm": 3.0996434688568115, | |
| "learning_rate": 3.9893617021276597e-07, | |
| "loss": 8.7202, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.08310652178429702, | |
| "grad_norm": 3.257809638977051, | |
| "learning_rate": 4.1555851063829787e-07, | |
| "loss": 8.6149, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.0864307826556689, | |
| "grad_norm": 3.378631353378296, | |
| "learning_rate": 4.321808510638298e-07, | |
| "loss": 8.5092, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.08975504352704078, | |
| "grad_norm": 3.3546876907348633, | |
| "learning_rate": 4.488031914893618e-07, | |
| "loss": 8.477, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.09307930439841267, | |
| "grad_norm": 3.292569637298584, | |
| "learning_rate": 4.654255319148936e-07, | |
| "loss": 8.4594, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.09640356526978455, | |
| "grad_norm": 3.190239906311035, | |
| "learning_rate": 4.820478723404255e-07, | |
| "loss": 8.3134, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.09972782614115643, | |
| "grad_norm": 3.2521212100982666, | |
| "learning_rate": 4.986702127659575e-07, | |
| "loss": 8.2896, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.10305208701252831, | |
| "grad_norm": 3.399919033050537, | |
| "learning_rate": 5.152925531914893e-07, | |
| "loss": 8.1656, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.10637634788390019, | |
| "grad_norm": 3.412688970565796, | |
| "learning_rate": 5.319148936170213e-07, | |
| "loss": 7.9764, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.10970060875527207, | |
| "grad_norm": 3.2669174671173096, | |
| "learning_rate": 5.485372340425532e-07, | |
| "loss": 7.9755, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.11302486962664395, | |
| "grad_norm": 3.405444383621216, | |
| "learning_rate": 5.651595744680851e-07, | |
| "loss": 7.8587, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.11634913049801583, | |
| "grad_norm": 3.2224161624908447, | |
| "learning_rate": 5.81781914893617e-07, | |
| "loss": 7.7357, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.11967339136938772, | |
| "grad_norm": 3.230048894882202, | |
| "learning_rate": 5.984042553191489e-07, | |
| "loss": 7.5667, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.12299765224075959, | |
| "grad_norm": 3.2728312015533447, | |
| "learning_rate": 6.150265957446809e-07, | |
| "loss": 7.4844, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.12632191311213148, | |
| "grad_norm": 3.200800895690918, | |
| "learning_rate": 6.316489361702127e-07, | |
| "loss": 7.3059, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.12964617398350337, | |
| "grad_norm": 3.075329065322876, | |
| "learning_rate": 6.482712765957447e-07, | |
| "loss": 7.1618, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.13297043485487522, | |
| "grad_norm": 3.1853721141815186, | |
| "learning_rate": 6.648936170212766e-07, | |
| "loss": 7.1392, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.1362946957262471, | |
| "grad_norm": 3.0336828231811523, | |
| "learning_rate": 6.815159574468085e-07, | |
| "loss": 6.974, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.139618956597619, | |
| "grad_norm": 3.027355670928955, | |
| "learning_rate": 6.981382978723404e-07, | |
| "loss": 6.7714, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.14294321746899089, | |
| "grad_norm": 2.99857497215271, | |
| "learning_rate": 7.147606382978723e-07, | |
| "loss": 6.6538, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.14626747834036277, | |
| "grad_norm": 2.840437650680542, | |
| "learning_rate": 7.313829787234043e-07, | |
| "loss": 6.4758, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.14959173921173463, | |
| "grad_norm": 3.076049566268921, | |
| "learning_rate": 7.480053191489362e-07, | |
| "loss": 6.3225, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.15291600008310652, | |
| "grad_norm": 2.8588602542877197, | |
| "learning_rate": 7.646276595744681e-07, | |
| "loss": 6.1646, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.1562402609544784, | |
| "grad_norm": 2.9168858528137207, | |
| "learning_rate": 7.8125e-07, | |
| "loss": 6.0828, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.1595645218258503, | |
| "grad_norm": 2.795363187789917, | |
| "learning_rate": 7.978723404255319e-07, | |
| "loss": 5.9192, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.16288878269722215, | |
| "grad_norm": 2.3897600173950195, | |
| "learning_rate": 8.144946808510639e-07, | |
| "loss": 5.6675, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.16621304356859404, | |
| "grad_norm": 2.279939651489258, | |
| "learning_rate": 8.311170212765957e-07, | |
| "loss": 5.5443, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.16953730443996592, | |
| "grad_norm": 2.394994020462036, | |
| "learning_rate": 8.477393617021276e-07, | |
| "loss": 5.4127, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.1728615653113378, | |
| "grad_norm": 2.3148529529571533, | |
| "learning_rate": 8.643617021276596e-07, | |
| "loss": 5.2102, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.1761858261827097, | |
| "grad_norm": 2.053243637084961, | |
| "learning_rate": 8.809840425531915e-07, | |
| "loss": 5.1204, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.17951008705408156, | |
| "grad_norm": 2.0687060356140137, | |
| "learning_rate": 8.976063829787235e-07, | |
| "loss": 4.9786, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.18283434792545344, | |
| "grad_norm": 1.8042306900024414, | |
| "learning_rate": 9.142287234042553e-07, | |
| "loss": 4.7735, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.18615860879682533, | |
| "grad_norm": 1.8975441455841064, | |
| "learning_rate": 9.308510638297872e-07, | |
| "loss": 4.6749, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.18948286966819722, | |
| "grad_norm": 1.6640989780426025, | |
| "learning_rate": 9.474734042553192e-07, | |
| "loss": 4.529, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.1928071305395691, | |
| "grad_norm": 1.9563913345336914, | |
| "learning_rate": 9.64095744680851e-07, | |
| "loss": 4.4255, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.19613139141094096, | |
| "grad_norm": 1.433192253112793, | |
| "learning_rate": 9.80718085106383e-07, | |
| "loss": 4.3369, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.19945565228231285, | |
| "grad_norm": 1.6940258741378784, | |
| "learning_rate": 9.97340425531915e-07, | |
| "loss": 4.2264, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.20277991315368474, | |
| "grad_norm": 1.3721990585327148, | |
| "learning_rate": 1.0139627659574467e-06, | |
| "loss": 4.0771, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.20610417402505662, | |
| "grad_norm": 1.3481799364089966, | |
| "learning_rate": 1.0305851063829786e-06, | |
| "loss": 3.9652, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.20942843489642848, | |
| "grad_norm": 1.3010597229003906, | |
| "learning_rate": 1.0472074468085108e-06, | |
| "loss": 3.9205, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.21275269576780037, | |
| "grad_norm": 1.551216721534729, | |
| "learning_rate": 1.0638297872340427e-06, | |
| "loss": 3.8234, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.21607695663917226, | |
| "grad_norm": 1.3280216455459595, | |
| "learning_rate": 1.0804521276595746e-06, | |
| "loss": 3.6898, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.21940121751054414, | |
| "grad_norm": 1.0909334421157837, | |
| "learning_rate": 1.0970744680851065e-06, | |
| "loss": 3.6489, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.22272547838191603, | |
| "grad_norm": 1.345831036567688, | |
| "learning_rate": 1.1136968085106384e-06, | |
| "loss": 3.5296, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.2260497392532879, | |
| "grad_norm": 1.0882962942123413, | |
| "learning_rate": 1.1303191489361703e-06, | |
| "loss": 3.489, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.22937400012465978, | |
| "grad_norm": 0.9840554594993591, | |
| "learning_rate": 1.1469414893617022e-06, | |
| "loss": 3.4164, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.23269826099603166, | |
| "grad_norm": 1.0956693887710571, | |
| "learning_rate": 1.163563829787234e-06, | |
| "loss": 3.3211, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.23602252186740355, | |
| "grad_norm": 0.8875247240066528, | |
| "learning_rate": 1.1801861702127662e-06, | |
| "loss": 3.2647, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.23934678273877544, | |
| "grad_norm": 1.268930196762085, | |
| "learning_rate": 1.1968085106382979e-06, | |
| "loss": 3.2033, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.2426710436101473, | |
| "grad_norm": 0.9430990815162659, | |
| "learning_rate": 1.2134308510638298e-06, | |
| "loss": 3.1317, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.24599530448151918, | |
| "grad_norm": 0.9196615219116211, | |
| "learning_rate": 1.2300531914893619e-06, | |
| "loss": 3.0706, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.24931956535289107, | |
| "grad_norm": 0.7046869993209839, | |
| "learning_rate": 1.2466755319148936e-06, | |
| "loss": 3.0142, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.25264382622426296, | |
| "grad_norm": 0.9173153638839722, | |
| "learning_rate": 1.2632978723404255e-06, | |
| "loss": 2.949, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.25596808709563484, | |
| "grad_norm": 0.8014841675758362, | |
| "learning_rate": 1.2799202127659576e-06, | |
| "loss": 2.9325, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.25929234796700673, | |
| "grad_norm": 0.9520502686500549, | |
| "learning_rate": 1.2965425531914895e-06, | |
| "loss": 2.859, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.2626166088383786, | |
| "grad_norm": 0.7679387331008911, | |
| "learning_rate": 1.3131648936170214e-06, | |
| "loss": 2.8509, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.26594086970975045, | |
| "grad_norm": 0.7660825252532959, | |
| "learning_rate": 1.3297872340425533e-06, | |
| "loss": 2.7896, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.26926513058112234, | |
| "grad_norm": 0.7754834294319153, | |
| "learning_rate": 1.3464095744680852e-06, | |
| "loss": 2.736, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.2725893914524942, | |
| "grad_norm": 0.5802922248840332, | |
| "learning_rate": 1.363031914893617e-06, | |
| "loss": 2.6962, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.2759136523238661, | |
| "grad_norm": 0.6394158601760864, | |
| "learning_rate": 1.379654255319149e-06, | |
| "loss": 2.6656, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.279237913195238, | |
| "grad_norm": 0.6503139138221741, | |
| "learning_rate": 1.3962765957446809e-06, | |
| "loss": 2.616, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.2825621740666099, | |
| "grad_norm": 0.6165557503700256, | |
| "learning_rate": 1.412898936170213e-06, | |
| "loss": 2.5971, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.28588643493798177, | |
| "grad_norm": 0.6192012429237366, | |
| "learning_rate": 1.4295212765957447e-06, | |
| "loss": 2.5536, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.28921069580935366, | |
| "grad_norm": 0.6266525983810425, | |
| "learning_rate": 1.4461436170212766e-06, | |
| "loss": 2.5036, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.29253495668072554, | |
| "grad_norm": 0.5376760363578796, | |
| "learning_rate": 1.4627659574468087e-06, | |
| "loss": 2.5136, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.2958592175520974, | |
| "grad_norm": 0.6490041613578796, | |
| "learning_rate": 1.4793882978723404e-06, | |
| "loss": 2.4638, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.29918347842346926, | |
| "grad_norm": 0.6368073225021362, | |
| "learning_rate": 1.4960106382978725e-06, | |
| "loss": 2.4258, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.30250773929484115, | |
| "grad_norm": 0.5121726989746094, | |
| "learning_rate": 1.5126329787234044e-06, | |
| "loss": 2.4016, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.30583200016621304, | |
| "grad_norm": 0.5835744738578796, | |
| "learning_rate": 1.5292553191489363e-06, | |
| "loss": 2.4192, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.3091562610375849, | |
| "grad_norm": 0.5275241732597351, | |
| "learning_rate": 1.5458776595744682e-06, | |
| "loss": 2.3687, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.3124805219089568, | |
| "grad_norm": 0.4900510609149933, | |
| "learning_rate": 1.5625e-06, | |
| "loss": 2.3208, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.3158047827803287, | |
| "grad_norm": 0.4609052240848541, | |
| "learning_rate": 1.5791223404255322e-06, | |
| "loss": 2.3363, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.3191290436517006, | |
| "grad_norm": 0.461566299200058, | |
| "learning_rate": 1.5957446808510639e-06, | |
| "loss": 2.2793, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.32245330452307247, | |
| "grad_norm": 0.49795401096343994, | |
| "learning_rate": 1.6123670212765958e-06, | |
| "loss": 2.2845, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.3257775653944443, | |
| "grad_norm": 0.4422404170036316, | |
| "learning_rate": 1.6289893617021279e-06, | |
| "loss": 2.2744, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.3291018262658162, | |
| "grad_norm": 0.4161861538887024, | |
| "learning_rate": 1.6456117021276596e-06, | |
| "loss": 2.2463, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.3324260871371881, | |
| "grad_norm": 0.46071523427963257, | |
| "learning_rate": 1.6622340425531915e-06, | |
| "loss": 2.2271, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.33575034800855996, | |
| "grad_norm": 0.3772067129611969, | |
| "learning_rate": 1.6788563829787236e-06, | |
| "loss": 2.2119, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.33907460887993185, | |
| "grad_norm": 0.44782117009162903, | |
| "learning_rate": 1.6954787234042553e-06, | |
| "loss": 2.2022, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.34239886975130374, | |
| "grad_norm": 0.4486360251903534, | |
| "learning_rate": 1.7121010638297872e-06, | |
| "loss": 2.1723, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.3457231306226756, | |
| "grad_norm": 0.47423475980758667, | |
| "learning_rate": 1.7287234042553193e-06, | |
| "loss": 2.1295, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.3490473914940475, | |
| "grad_norm": 0.4199342131614685, | |
| "learning_rate": 1.745345744680851e-06, | |
| "loss": 2.1387, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.3523716523654194, | |
| "grad_norm": 0.43744415044784546, | |
| "learning_rate": 1.761968085106383e-06, | |
| "loss": 2.1195, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.3556959132367913, | |
| "grad_norm": 0.3780044913291931, | |
| "learning_rate": 1.778590425531915e-06, | |
| "loss": 2.1194, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.3590201741081631, | |
| "grad_norm": 0.40349099040031433, | |
| "learning_rate": 1.795212765957447e-06, | |
| "loss": 2.1005, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.362344434979535, | |
| "grad_norm": 0.378764271736145, | |
| "learning_rate": 1.8118351063829788e-06, | |
| "loss": 2.0757, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.3656686958509069, | |
| "grad_norm": 0.34115588665008545, | |
| "learning_rate": 1.8284574468085107e-06, | |
| "loss": 2.0591, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.3689929567222788, | |
| "grad_norm": 0.39553964138031006, | |
| "learning_rate": 1.8450797872340428e-06, | |
| "loss": 2.0298, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.37231721759365066, | |
| "grad_norm": 0.36110466718673706, | |
| "learning_rate": 1.8617021276595745e-06, | |
| "loss": 2.0113, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.37564147846502255, | |
| "grad_norm": 0.33477863669395447, | |
| "learning_rate": 1.8783244680851066e-06, | |
| "loss": 2.0197, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.37896573933639444, | |
| "grad_norm": 0.43919846415519714, | |
| "learning_rate": 1.8949468085106385e-06, | |
| "loss": 1.9794, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.3822900002077663, | |
| "grad_norm": 0.3243393898010254, | |
| "learning_rate": 1.9115691489361704e-06, | |
| "loss": 1.9667, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.3856142610791382, | |
| "grad_norm": 0.3350262939929962, | |
| "learning_rate": 1.928191489361702e-06, | |
| "loss": 1.978, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.38893852195051004, | |
| "grad_norm": 0.3365063965320587, | |
| "learning_rate": 1.944813829787234e-06, | |
| "loss": 1.9701, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.39226278282188193, | |
| "grad_norm": 0.3240489661693573, | |
| "learning_rate": 1.961436170212766e-06, | |
| "loss": 1.9465, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.3955870436932538, | |
| "grad_norm": 0.3239437937736511, | |
| "learning_rate": 1.978058510638298e-06, | |
| "loss": 1.9253, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.3989113045646257, | |
| "grad_norm": 0.3397749364376068, | |
| "learning_rate": 1.99468085106383e-06, | |
| "loss": 1.9057, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.4022355654359976, | |
| "grad_norm": 0.2915981113910675, | |
| "learning_rate": 2.011303191489362e-06, | |
| "loss": 1.9047, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.4055598263073695, | |
| "grad_norm": 0.39456045627593994, | |
| "learning_rate": 2.0279255319148935e-06, | |
| "loss": 1.9144, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.40888408717874136, | |
| "grad_norm": 0.2593387961387634, | |
| "learning_rate": 2.0445478723404256e-06, | |
| "loss": 1.8969, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.41220834805011325, | |
| "grad_norm": 0.30935177206993103, | |
| "learning_rate": 2.0611702127659573e-06, | |
| "loss": 1.8931, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.41553260892148514, | |
| "grad_norm": 0.27917250990867615, | |
| "learning_rate": 2.0777925531914894e-06, | |
| "loss": 1.8899, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.41885686979285697, | |
| "grad_norm": 0.25976502895355225, | |
| "learning_rate": 2.0944148936170215e-06, | |
| "loss": 1.8503, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.42218113066422885, | |
| "grad_norm": 0.31833794713020325, | |
| "learning_rate": 2.111037234042553e-06, | |
| "loss": 1.8527, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.42550539153560074, | |
| "grad_norm": 0.2671976685523987, | |
| "learning_rate": 2.1276595744680853e-06, | |
| "loss": 1.8505, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.42882965240697263, | |
| "grad_norm": 0.3245258629322052, | |
| "learning_rate": 2.144281914893617e-06, | |
| "loss": 1.864, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.4321539132783445, | |
| "grad_norm": 0.2622531056404114, | |
| "learning_rate": 2.160904255319149e-06, | |
| "loss": 1.8301, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.4354781741497164, | |
| "grad_norm": 0.3247709274291992, | |
| "learning_rate": 2.177526595744681e-06, | |
| "loss": 1.812, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.4388024350210883, | |
| "grad_norm": 0.26424384117126465, | |
| "learning_rate": 2.194148936170213e-06, | |
| "loss": 1.7958, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.4421266958924602, | |
| "grad_norm": 0.2569092810153961, | |
| "learning_rate": 2.210771276595745e-06, | |
| "loss": 1.8147, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.44545095676383206, | |
| "grad_norm": 0.2393629103899002, | |
| "learning_rate": 2.2273936170212767e-06, | |
| "loss": 1.7976, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.44877521763520395, | |
| "grad_norm": 0.232402965426445, | |
| "learning_rate": 2.244015957446809e-06, | |
| "loss": 1.7597, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.4520994785065758, | |
| "grad_norm": 0.26385971903800964, | |
| "learning_rate": 2.2606382978723405e-06, | |
| "loss": 1.7781, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.45542373937794767, | |
| "grad_norm": 0.2671038806438446, | |
| "learning_rate": 2.277260638297872e-06, | |
| "loss": 1.7583, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.45874800024931955, | |
| "grad_norm": 0.27096447348594666, | |
| "learning_rate": 2.2938829787234043e-06, | |
| "loss": 1.7402, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.46207226112069144, | |
| "grad_norm": 0.2245018631219864, | |
| "learning_rate": 2.3105053191489364e-06, | |
| "loss": 1.7644, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.46539652199206333, | |
| "grad_norm": 0.20663714408874512, | |
| "learning_rate": 2.327127659574468e-06, | |
| "loss": 1.7519, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.4687207828634352, | |
| "grad_norm": 0.26273128390312195, | |
| "learning_rate": 2.3437500000000002e-06, | |
| "loss": 1.7312, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.4720450437348071, | |
| "grad_norm": 0.24725256860256195, | |
| "learning_rate": 2.3603723404255323e-06, | |
| "loss": 1.7217, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.475369304606179, | |
| "grad_norm": 0.25341796875, | |
| "learning_rate": 2.376994680851064e-06, | |
| "loss": 1.7246, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.4786935654775509, | |
| "grad_norm": 0.21035414934158325, | |
| "learning_rate": 2.3936170212765957e-06, | |
| "loss": 1.7017, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.4820178263489227, | |
| "grad_norm": 0.21454143524169922, | |
| "learning_rate": 2.410239361702128e-06, | |
| "loss": 1.7049, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.4853420872202946, | |
| "grad_norm": 0.22413010895252228, | |
| "learning_rate": 2.4268617021276595e-06, | |
| "loss": 1.6809, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.4886663480916665, | |
| "grad_norm": 0.2039473056793213, | |
| "learning_rate": 2.4434840425531916e-06, | |
| "loss": 1.6873, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.49199060896303837, | |
| "grad_norm": 0.18895457684993744, | |
| "learning_rate": 2.4601063829787237e-06, | |
| "loss": 1.69, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.49531486983441025, | |
| "grad_norm": 0.21047964692115784, | |
| "learning_rate": 2.4767287234042554e-06, | |
| "loss": 1.681, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.49863913070578214, | |
| "grad_norm": 0.2226460874080658, | |
| "learning_rate": 2.493351063829787e-06, | |
| "loss": 1.6613, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.501963391577154, | |
| "grad_norm": 0.21892835199832916, | |
| "learning_rate": 2.5099734042553192e-06, | |
| "loss": 1.6376, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.5052876524485259, | |
| "grad_norm": 0.20363831520080566, | |
| "learning_rate": 2.526595744680851e-06, | |
| "loss": 1.6541, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.5086119133198977, | |
| "grad_norm": 0.1988699585199356, | |
| "learning_rate": 2.543218085106383e-06, | |
| "loss": 1.6422, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.5119361741912697, | |
| "grad_norm": 0.2050096094608307, | |
| "learning_rate": 2.559840425531915e-06, | |
| "loss": 1.6377, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.5152604350626415, | |
| "grad_norm": 0.23265878856182098, | |
| "learning_rate": 2.5764627659574472e-06, | |
| "loss": 1.6251, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.5185846959340135, | |
| "grad_norm": 0.2024969905614853, | |
| "learning_rate": 2.593085106382979e-06, | |
| "loss": 1.6048, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.5219089568053853, | |
| "grad_norm": 0.21343863010406494, | |
| "learning_rate": 2.6097074468085106e-06, | |
| "loss": 1.6195, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.5252332176767572, | |
| "grad_norm": 0.1862565129995346, | |
| "learning_rate": 2.6263297872340427e-06, | |
| "loss": 1.5991, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.5285574785481291, | |
| "grad_norm": 0.22765249013900757, | |
| "learning_rate": 2.6429521276595744e-06, | |
| "loss": 1.5957, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.5318817394195009, | |
| "grad_norm": 0.19874997437000275, | |
| "learning_rate": 2.6595744680851065e-06, | |
| "loss": 1.5847, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.5352060002908728, | |
| "grad_norm": 0.25979486107826233, | |
| "learning_rate": 2.6761968085106386e-06, | |
| "loss": 1.6046, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.5385302611622447, | |
| "grad_norm": 0.1831529289484024, | |
| "learning_rate": 2.6928191489361703e-06, | |
| "loss": 1.5835, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.5418545220336166, | |
| "grad_norm": 0.2680751085281372, | |
| "learning_rate": 2.7094414893617024e-06, | |
| "loss": 1.6009, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.5451787829049884, | |
| "grad_norm": 0.18160907924175262, | |
| "learning_rate": 2.726063829787234e-06, | |
| "loss": 1.5666, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.5485030437763604, | |
| "grad_norm": 0.22875571250915527, | |
| "learning_rate": 2.742686170212766e-06, | |
| "loss": 1.5614, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.5518273046477322, | |
| "grad_norm": 0.21110033988952637, | |
| "learning_rate": 2.759308510638298e-06, | |
| "loss": 1.5707, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.5551515655191042, | |
| "grad_norm": 0.1887374073266983, | |
| "learning_rate": 2.77593085106383e-06, | |
| "loss": 1.5781, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.558475826390476, | |
| "grad_norm": 0.1916954219341278, | |
| "learning_rate": 2.7925531914893617e-06, | |
| "loss": 1.563, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.5618000872618478, | |
| "grad_norm": 0.21001753211021423, | |
| "learning_rate": 2.809175531914894e-06, | |
| "loss": 1.5495, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.5651243481332198, | |
| "grad_norm": 0.1702377200126648, | |
| "learning_rate": 2.825797872340426e-06, | |
| "loss": 1.5427, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.5684486090045916, | |
| "grad_norm": 0.19061295688152313, | |
| "learning_rate": 2.8424202127659576e-06, | |
| "loss": 1.5387, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.5717728698759635, | |
| "grad_norm": 0.17503058910369873, | |
| "learning_rate": 2.8590425531914893e-06, | |
| "loss": 1.5154, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.5750971307473354, | |
| "grad_norm": 0.1703094244003296, | |
| "learning_rate": 2.8756648936170214e-06, | |
| "loss": 1.5209, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.5784213916187073, | |
| "grad_norm": 0.22713126242160797, | |
| "learning_rate": 2.892287234042553e-06, | |
| "loss": 1.529, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.5817456524900791, | |
| "grad_norm": 0.16218431293964386, | |
| "learning_rate": 2.9089095744680852e-06, | |
| "loss": 1.505, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.5850699133614511, | |
| "grad_norm": 0.16082778573036194, | |
| "learning_rate": 2.9255319148936174e-06, | |
| "loss": 1.5312, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.5883941742328229, | |
| "grad_norm": 0.19500340521335602, | |
| "learning_rate": 2.942154255319149e-06, | |
| "loss": 1.4971, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.5917184351041948, | |
| "grad_norm": 0.16831324994564056, | |
| "learning_rate": 2.9587765957446807e-06, | |
| "loss": 1.5172, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.5950426959755667, | |
| "grad_norm": 0.17963413894176483, | |
| "learning_rate": 2.975398936170213e-06, | |
| "loss": 1.5076, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.5983669568469385, | |
| "grad_norm": 0.17123515903949738, | |
| "learning_rate": 2.992021276595745e-06, | |
| "loss": 1.4941, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.6016912177183105, | |
| "grad_norm": 0.15727902948856354, | |
| "learning_rate": 3.0086436170212766e-06, | |
| "loss": 1.4609, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.6050154785896823, | |
| "grad_norm": 0.1833077073097229, | |
| "learning_rate": 3.0252659574468088e-06, | |
| "loss": 1.5042, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.6083397394610542, | |
| "grad_norm": 0.16962528228759766, | |
| "learning_rate": 3.041888297872341e-06, | |
| "loss": 1.4651, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.6116640003324261, | |
| "grad_norm": 0.17829731106758118, | |
| "learning_rate": 3.0585106382978726e-06, | |
| "loss": 1.4907, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.614988261203798, | |
| "grad_norm": 0.16981306672096252, | |
| "learning_rate": 3.0751329787234042e-06, | |
| "loss": 1.4683, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.6183125220751698, | |
| "grad_norm": 0.20783671736717224, | |
| "learning_rate": 3.0917553191489363e-06, | |
| "loss": 1.463, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.6216367829465417, | |
| "grad_norm": 0.20343361794948578, | |
| "learning_rate": 3.108377659574468e-06, | |
| "loss": 1.4632, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.6249610438179136, | |
| "grad_norm": 0.18592675030231476, | |
| "learning_rate": 3.125e-06, | |
| "loss": 1.4887, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.6282853046892855, | |
| "grad_norm": 0.17272701859474182, | |
| "learning_rate": 3.141622340425532e-06, | |
| "loss": 1.4491, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.6316095655606574, | |
| "grad_norm": 0.2021792083978653, | |
| "learning_rate": 3.1582446808510644e-06, | |
| "loss": 1.4537, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.6349338264320292, | |
| "grad_norm": 0.16319766640663147, | |
| "learning_rate": 3.174867021276596e-06, | |
| "loss": 1.456, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.6382580873034012, | |
| "grad_norm": 0.2344328761100769, | |
| "learning_rate": 3.1914893617021277e-06, | |
| "loss": 1.4801, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.641582348174773, | |
| "grad_norm": 0.17495407164096832, | |
| "learning_rate": 3.20811170212766e-06, | |
| "loss": 1.4435, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.6449066090461449, | |
| "grad_norm": 0.19222399592399597, | |
| "learning_rate": 3.2247340425531915e-06, | |
| "loss": 1.4391, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.6482308699175168, | |
| "grad_norm": 0.24526530504226685, | |
| "learning_rate": 3.2413563829787232e-06, | |
| "loss": 1.4555, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.6515551307888886, | |
| "grad_norm": 0.18150673806667328, | |
| "learning_rate": 3.2579787234042558e-06, | |
| "loss": 1.4396, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.6548793916602605, | |
| "grad_norm": 0.18334811925888062, | |
| "learning_rate": 3.2746010638297875e-06, | |
| "loss": 1.4139, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.6582036525316324, | |
| "grad_norm": 0.25186312198638916, | |
| "learning_rate": 3.291223404255319e-06, | |
| "loss": 1.439, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.6615279134030043, | |
| "grad_norm": 0.16558600962162018, | |
| "learning_rate": 3.3078457446808513e-06, | |
| "loss": 1.4383, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.6648521742743762, | |
| "grad_norm": 0.2373538315296173, | |
| "learning_rate": 3.324468085106383e-06, | |
| "loss": 1.4334, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.6681764351457481, | |
| "grad_norm": 0.2821474075317383, | |
| "learning_rate": 3.3410904255319146e-06, | |
| "loss": 1.4418, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.6715006960171199, | |
| "grad_norm": 0.2443741410970688, | |
| "learning_rate": 3.357712765957447e-06, | |
| "loss": 1.4071, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.6748249568884919, | |
| "grad_norm": 0.17468735575675964, | |
| "learning_rate": 3.374335106382979e-06, | |
| "loss": 1.4109, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.6781492177598637, | |
| "grad_norm": 0.1655045598745346, | |
| "learning_rate": 3.3909574468085105e-06, | |
| "loss": 1.4049, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.6814734786312356, | |
| "grad_norm": 0.17598801851272583, | |
| "learning_rate": 3.4075797872340427e-06, | |
| "loss": 1.4188, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.6847977395026075, | |
| "grad_norm": 0.28528669476509094, | |
| "learning_rate": 3.4242021276595743e-06, | |
| "loss": 1.408, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.6881220003739793, | |
| "grad_norm": 0.17654620110988617, | |
| "learning_rate": 3.440824468085106e-06, | |
| "loss": 1.4117, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.6914462612453512, | |
| "grad_norm": 0.2636467516422272, | |
| "learning_rate": 3.4574468085106386e-06, | |
| "loss": 1.3947, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.6947705221167231, | |
| "grad_norm": 0.26495933532714844, | |
| "learning_rate": 3.4740691489361703e-06, | |
| "loss": 1.398, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.698094782988095, | |
| "grad_norm": 0.3873574435710907, | |
| "learning_rate": 3.490691489361702e-06, | |
| "loss": 1.4204, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.7014190438594669, | |
| "grad_norm": 0.327854186296463, | |
| "learning_rate": 3.5073138297872345e-06, | |
| "loss": 1.3744, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.7047433047308388, | |
| "grad_norm": 0.308570921421051, | |
| "learning_rate": 3.523936170212766e-06, | |
| "loss": 1.4293, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.7080675656022106, | |
| "grad_norm": 0.21123336255550385, | |
| "learning_rate": 3.5405585106382983e-06, | |
| "loss": 1.3878, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.7113918264735826, | |
| "grad_norm": 0.18777534365653992, | |
| "learning_rate": 3.55718085106383e-06, | |
| "loss": 1.3882, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.7147160873449544, | |
| "grad_norm": 0.2535350024700165, | |
| "learning_rate": 3.5738031914893617e-06, | |
| "loss": 1.3974, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.7180403482163262, | |
| "grad_norm": 0.15405435860157013, | |
| "learning_rate": 3.590425531914894e-06, | |
| "loss": 1.3853, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.7213646090876982, | |
| "grad_norm": 0.1863648146390915, | |
| "learning_rate": 3.607047872340426e-06, | |
| "loss": 1.3835, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.72468886995907, | |
| "grad_norm": 0.18587157130241394, | |
| "learning_rate": 3.6236702127659576e-06, | |
| "loss": 1.3711, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.728013130830442, | |
| "grad_norm": 0.18254730105400085, | |
| "learning_rate": 3.6402925531914897e-06, | |
| "loss": 1.3768, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.7313373917018138, | |
| "grad_norm": 0.21665969491004944, | |
| "learning_rate": 3.6569148936170214e-06, | |
| "loss": 1.3638, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.7346616525731857, | |
| "grad_norm": 0.15701924264431, | |
| "learning_rate": 3.673537234042553e-06, | |
| "loss": 1.3885, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.7379859134445576, | |
| "grad_norm": 0.19307725131511688, | |
| "learning_rate": 3.6901595744680856e-06, | |
| "loss": 1.3933, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.7413101743159295, | |
| "grad_norm": 0.16837100684642792, | |
| "learning_rate": 3.7067819148936173e-06, | |
| "loss": 1.3685, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.7446344351873013, | |
| "grad_norm": 0.2914402484893799, | |
| "learning_rate": 3.723404255319149e-06, | |
| "loss": 1.3802, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.7479586960586732, | |
| "grad_norm": 0.2770545184612274, | |
| "learning_rate": 3.7400265957446815e-06, | |
| "loss": 1.3575, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.7512829569300451, | |
| "grad_norm": 0.19819234311580658, | |
| "learning_rate": 3.756648936170213e-06, | |
| "loss": 1.3695, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.7546072178014169, | |
| "grad_norm": 0.15371359884738922, | |
| "learning_rate": 3.7732712765957445e-06, | |
| "loss": 1.3514, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.7579314786727889, | |
| "grad_norm": 0.26700448989868164, | |
| "learning_rate": 3.789893617021277e-06, | |
| "loss": 1.3689, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.7612557395441607, | |
| "grad_norm": 0.2938506007194519, | |
| "learning_rate": 3.8065159574468087e-06, | |
| "loss": 1.3518, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.7645800004155326, | |
| "grad_norm": 0.2514606714248657, | |
| "learning_rate": 3.823138297872341e-06, | |
| "loss": 1.3655, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.7679042612869045, | |
| "grad_norm": 0.2503184378147125, | |
| "learning_rate": 3.8397606382978725e-06, | |
| "loss": 1.3511, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.7712285221582764, | |
| "grad_norm": 0.1815042346715927, | |
| "learning_rate": 3.856382978723404e-06, | |
| "loss": 1.383, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.7745527830296483, | |
| "grad_norm": 0.25425419211387634, | |
| "learning_rate": 3.873005319148936e-06, | |
| "loss": 1.3354, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.7778770439010201, | |
| "grad_norm": 0.18466657400131226, | |
| "learning_rate": 3.889627659574468e-06, | |
| "loss": 1.3514, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.781201304772392, | |
| "grad_norm": 0.1782332807779312, | |
| "learning_rate": 3.90625e-06, | |
| "loss": 1.32, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.7845255656437639, | |
| "grad_norm": 0.27637991309165955, | |
| "learning_rate": 3.922872340425532e-06, | |
| "loss": 1.3383, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.7878498265151358, | |
| "grad_norm": 0.17314772307872772, | |
| "learning_rate": 3.939494680851064e-06, | |
| "loss": 1.3314, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.7911740873865076, | |
| "grad_norm": 0.3641667068004608, | |
| "learning_rate": 3.956117021276596e-06, | |
| "loss": 1.3543, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.7944983482578796, | |
| "grad_norm": 0.3088253438472748, | |
| "learning_rate": 3.972739361702128e-06, | |
| "loss": 1.3444, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.7978226091292514, | |
| "grad_norm": 0.25276973843574524, | |
| "learning_rate": 3.98936170212766e-06, | |
| "loss": 1.3102, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.8011468700006233, | |
| "grad_norm": 0.26414382457733154, | |
| "learning_rate": 4.005984042553192e-06, | |
| "loss": 1.3119, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.8044711308719952, | |
| "grad_norm": 0.1684638261795044, | |
| "learning_rate": 4.022606382978724e-06, | |
| "loss": 1.3204, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.807795391743367, | |
| "grad_norm": 0.18500946462154388, | |
| "learning_rate": 4.039228723404256e-06, | |
| "loss": 1.3251, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.811119652614739, | |
| "grad_norm": 0.2754835784435272, | |
| "learning_rate": 4.055851063829787e-06, | |
| "loss": 1.3258, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.8144439134861108, | |
| "grad_norm": 0.18949855864048004, | |
| "learning_rate": 4.072473404255319e-06, | |
| "loss": 1.3145, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.8177681743574827, | |
| "grad_norm": 0.6927218437194824, | |
| "learning_rate": 4.089095744680851e-06, | |
| "loss": 1.3205, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.8210924352288546, | |
| "grad_norm": 0.36098670959472656, | |
| "learning_rate": 4.105718085106383e-06, | |
| "loss": 1.3295, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.8244166961002265, | |
| "grad_norm": 0.25839686393737793, | |
| "learning_rate": 4.1223404255319146e-06, | |
| "loss": 1.321, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.8277409569715983, | |
| "grad_norm": 0.18720127642154694, | |
| "learning_rate": 4.138962765957447e-06, | |
| "loss": 1.2975, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.8310652178429703, | |
| "grad_norm": 0.17975495755672455, | |
| "learning_rate": 4.155585106382979e-06, | |
| "loss": 1.318, | |
| "step": 2500 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 150400, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 50, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.4430265344e+18, | |
| "train_batch_size": 5, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |