| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 0, | |
| "global_step": 246, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0040650406504065045, | |
| "grad_norm": 0.37466323375701904, | |
| "learning_rate": 1e-05, | |
| "loss": 1.9179, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.008130081300813009, | |
| "grad_norm": 0.36199215054512024, | |
| "learning_rate": 9.959349593495936e-06, | |
| "loss": 1.891, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.012195121951219513, | |
| "grad_norm": 0.37762531638145447, | |
| "learning_rate": 9.91869918699187e-06, | |
| "loss": 1.8919, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.016260162601626018, | |
| "grad_norm": 0.366923063993454, | |
| "learning_rate": 9.878048780487805e-06, | |
| "loss": 1.8734, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.02032520325203252, | |
| "grad_norm": 0.3635967969894409, | |
| "learning_rate": 9.837398373983741e-06, | |
| "loss": 1.8069, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.024390243902439025, | |
| "grad_norm": 0.385695219039917, | |
| "learning_rate": 9.796747967479675e-06, | |
| "loss": 1.8299, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.028455284552845527, | |
| "grad_norm": 0.38534337282180786, | |
| "learning_rate": 9.756097560975611e-06, | |
| "loss": 1.8937, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.032520325203252036, | |
| "grad_norm": 0.39530763030052185, | |
| "learning_rate": 9.715447154471546e-06, | |
| "loss": 1.887, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.036585365853658534, | |
| "grad_norm": 0.36345309019088745, | |
| "learning_rate": 9.67479674796748e-06, | |
| "loss": 1.727, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.04065040650406504, | |
| "grad_norm": 0.3516029715538025, | |
| "learning_rate": 9.634146341463415e-06, | |
| "loss": 1.7273, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.044715447154471545, | |
| "grad_norm": 0.3647765517234802, | |
| "learning_rate": 9.59349593495935e-06, | |
| "loss": 1.8437, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.04878048780487805, | |
| "grad_norm": 0.34979772567749023, | |
| "learning_rate": 9.552845528455286e-06, | |
| "loss": 1.7521, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.052845528455284556, | |
| "grad_norm": 0.32717210054397583, | |
| "learning_rate": 9.51219512195122e-06, | |
| "loss": 1.7187, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.056910569105691054, | |
| "grad_norm": 0.3098759651184082, | |
| "learning_rate": 9.471544715447156e-06, | |
| "loss": 1.7383, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.06097560975609756, | |
| "grad_norm": 0.308346152305603, | |
| "learning_rate": 9.43089430894309e-06, | |
| "loss": 1.718, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.06504065040650407, | |
| "grad_norm": 0.2985452711582184, | |
| "learning_rate": 9.390243902439025e-06, | |
| "loss": 1.6939, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.06910569105691057, | |
| "grad_norm": 0.3071250915527344, | |
| "learning_rate": 9.34959349593496e-06, | |
| "loss": 1.7706, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.07317073170731707, | |
| "grad_norm": 0.28936341404914856, | |
| "learning_rate": 9.308943089430895e-06, | |
| "loss": 1.6595, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.07723577235772358, | |
| "grad_norm": 0.3091869056224823, | |
| "learning_rate": 9.268292682926831e-06, | |
| "loss": 1.7392, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.08130081300813008, | |
| "grad_norm": 0.2812434732913971, | |
| "learning_rate": 9.227642276422764e-06, | |
| "loss": 1.6386, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.08536585365853659, | |
| "grad_norm": 0.30109739303588867, | |
| "learning_rate": 9.1869918699187e-06, | |
| "loss": 1.6592, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.08943089430894309, | |
| "grad_norm": 0.30123457312583923, | |
| "learning_rate": 9.146341463414635e-06, | |
| "loss": 1.6466, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.09349593495934959, | |
| "grad_norm": 0.2957462668418884, | |
| "learning_rate": 9.10569105691057e-06, | |
| "loss": 1.632, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.0975609756097561, | |
| "grad_norm": 0.270624577999115, | |
| "learning_rate": 9.065040650406505e-06, | |
| "loss": 1.578, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.1016260162601626, | |
| "grad_norm": 0.2735169231891632, | |
| "learning_rate": 9.02439024390244e-06, | |
| "loss": 1.6163, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.10569105691056911, | |
| "grad_norm": 0.2614346146583557, | |
| "learning_rate": 8.983739837398374e-06, | |
| "loss": 1.6092, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.10975609756097561, | |
| "grad_norm": 0.25784415006637573, | |
| "learning_rate": 8.94308943089431e-06, | |
| "loss": 1.621, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.11382113821138211, | |
| "grad_norm": 0.24907095730304718, | |
| "learning_rate": 8.902439024390244e-06, | |
| "loss": 1.5968, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.11788617886178862, | |
| "grad_norm": 0.23298929631710052, | |
| "learning_rate": 8.86178861788618e-06, | |
| "loss": 1.5136, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.12195121951219512, | |
| "grad_norm": 0.24476122856140137, | |
| "learning_rate": 8.821138211382113e-06, | |
| "loss": 1.5748, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.12601626016260162, | |
| "grad_norm": 0.22472171485424042, | |
| "learning_rate": 8.78048780487805e-06, | |
| "loss": 1.5214, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.13008130081300814, | |
| "grad_norm": 0.22423842549324036, | |
| "learning_rate": 8.739837398373985e-06, | |
| "loss": 1.5136, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.13414634146341464, | |
| "grad_norm": 0.22147248685359955, | |
| "learning_rate": 8.69918699186992e-06, | |
| "loss": 1.5276, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.13821138211382114, | |
| "grad_norm": 0.22564049065113068, | |
| "learning_rate": 8.658536585365854e-06, | |
| "loss": 1.5549, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.14227642276422764, | |
| "grad_norm": 0.2238183170557022, | |
| "learning_rate": 8.617886178861789e-06, | |
| "loss": 1.5138, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.14634146341463414, | |
| "grad_norm": 0.2192021906375885, | |
| "learning_rate": 8.577235772357724e-06, | |
| "loss": 1.5152, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.15040650406504066, | |
| "grad_norm": 0.2100159078836441, | |
| "learning_rate": 8.536585365853658e-06, | |
| "loss": 1.4951, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.15447154471544716, | |
| "grad_norm": 0.20502997934818268, | |
| "learning_rate": 8.495934959349595e-06, | |
| "loss": 1.4513, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.15853658536585366, | |
| "grad_norm": 0.2130008190870285, | |
| "learning_rate": 8.45528455284553e-06, | |
| "loss": 1.4937, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.16260162601626016, | |
| "grad_norm": 0.19370350241661072, | |
| "learning_rate": 8.414634146341464e-06, | |
| "loss": 1.4002, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.16666666666666666, | |
| "grad_norm": 0.18506227433681488, | |
| "learning_rate": 8.373983739837399e-06, | |
| "loss": 1.3856, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.17073170731707318, | |
| "grad_norm": 0.209653839468956, | |
| "learning_rate": 8.333333333333334e-06, | |
| "loss": 1.4828, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.17479674796747968, | |
| "grad_norm": 0.19840197265148163, | |
| "learning_rate": 8.292682926829268e-06, | |
| "loss": 1.4348, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.17886178861788618, | |
| "grad_norm": 0.18005311489105225, | |
| "learning_rate": 8.252032520325203e-06, | |
| "loss": 1.3554, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.18292682926829268, | |
| "grad_norm": 0.19674968719482422, | |
| "learning_rate": 8.21138211382114e-06, | |
| "loss": 1.4429, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.18699186991869918, | |
| "grad_norm": 0.18445399403572083, | |
| "learning_rate": 8.170731707317073e-06, | |
| "loss": 1.4179, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.1910569105691057, | |
| "grad_norm": 0.1927977353334427, | |
| "learning_rate": 8.130081300813009e-06, | |
| "loss": 1.4503, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.1951219512195122, | |
| "grad_norm": 0.17811378836631775, | |
| "learning_rate": 8.089430894308944e-06, | |
| "loss": 1.4001, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.1991869918699187, | |
| "grad_norm": 0.16938495635986328, | |
| "learning_rate": 8.048780487804879e-06, | |
| "loss": 1.3482, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.2032520325203252, | |
| "grad_norm": 0.18051789700984955, | |
| "learning_rate": 8.008130081300813e-06, | |
| "loss": 1.4097, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.2073170731707317, | |
| "grad_norm": 0.17777681350708008, | |
| "learning_rate": 7.967479674796748e-06, | |
| "loss": 1.3837, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.21138211382113822, | |
| "grad_norm": 0.17888520658016205, | |
| "learning_rate": 7.926829268292685e-06, | |
| "loss": 1.4076, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.21544715447154472, | |
| "grad_norm": 0.16906102001667023, | |
| "learning_rate": 7.886178861788618e-06, | |
| "loss": 1.3576, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.21951219512195122, | |
| "grad_norm": 0.16587428748607635, | |
| "learning_rate": 7.845528455284554e-06, | |
| "loss": 1.3646, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.22357723577235772, | |
| "grad_norm": 0.1608864665031433, | |
| "learning_rate": 7.804878048780489e-06, | |
| "loss": 1.3441, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.22764227642276422, | |
| "grad_norm": 0.16243964433670044, | |
| "learning_rate": 7.764227642276424e-06, | |
| "loss": 1.3597, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.23170731707317074, | |
| "grad_norm": 0.16970421373844147, | |
| "learning_rate": 7.723577235772358e-06, | |
| "loss": 1.3691, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.23577235772357724, | |
| "grad_norm": 0.1493624895811081, | |
| "learning_rate": 7.682926829268293e-06, | |
| "loss": 1.2929, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.23983739837398374, | |
| "grad_norm": 0.14960215985774994, | |
| "learning_rate": 7.64227642276423e-06, | |
| "loss": 1.2966, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.24390243902439024, | |
| "grad_norm": 0.1617850363254547, | |
| "learning_rate": 7.601626016260163e-06, | |
| "loss": 1.3179, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.24796747967479674, | |
| "grad_norm": 0.15413883328437805, | |
| "learning_rate": 7.560975609756098e-06, | |
| "loss": 1.3147, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.25203252032520324, | |
| "grad_norm": 0.1458769589662552, | |
| "learning_rate": 7.520325203252034e-06, | |
| "loss": 1.3044, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.25609756097560976, | |
| "grad_norm": 0.15694762766361237, | |
| "learning_rate": 7.4796747967479676e-06, | |
| "loss": 1.3446, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.2601626016260163, | |
| "grad_norm": 0.14829568564891815, | |
| "learning_rate": 7.439024390243903e-06, | |
| "loss": 1.2993, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.26422764227642276, | |
| "grad_norm": 0.14209705591201782, | |
| "learning_rate": 7.398373983739838e-06, | |
| "loss": 1.3118, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.2682926829268293, | |
| "grad_norm": 0.14432059228420258, | |
| "learning_rate": 7.357723577235773e-06, | |
| "loss": 1.2808, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.27235772357723576, | |
| "grad_norm": 0.14690716564655304, | |
| "learning_rate": 7.317073170731707e-06, | |
| "loss": 1.3004, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.2764227642276423, | |
| "grad_norm": 0.14109715819358826, | |
| "learning_rate": 7.276422764227643e-06, | |
| "loss": 1.3, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.2804878048780488, | |
| "grad_norm": 0.14119209349155426, | |
| "learning_rate": 7.2357723577235786e-06, | |
| "loss": 1.2918, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.2845528455284553, | |
| "grad_norm": 0.15276534855365753, | |
| "learning_rate": 7.1951219512195125e-06, | |
| "loss": 1.2638, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.2886178861788618, | |
| "grad_norm": 0.14538833498954773, | |
| "learning_rate": 7.154471544715448e-06, | |
| "loss": 1.2629, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.2926829268292683, | |
| "grad_norm": 0.1435929387807846, | |
| "learning_rate": 7.113821138211383e-06, | |
| "loss": 1.2731, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.2967479674796748, | |
| "grad_norm": 0.14157208800315857, | |
| "learning_rate": 7.0731707317073175e-06, | |
| "loss": 1.25, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.3008130081300813, | |
| "grad_norm": 0.13198794424533844, | |
| "learning_rate": 7.032520325203252e-06, | |
| "loss": 1.2257, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.3048780487804878, | |
| "grad_norm": 0.14817562699317932, | |
| "learning_rate": 6.991869918699188e-06, | |
| "loss": 1.2912, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.3089430894308943, | |
| "grad_norm": 0.1346520185470581, | |
| "learning_rate": 6.951219512195122e-06, | |
| "loss": 1.2525, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.3130081300813008, | |
| "grad_norm": 0.13347090780735016, | |
| "learning_rate": 6.910569105691057e-06, | |
| "loss": 1.2395, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.3170731707317073, | |
| "grad_norm": 0.1383773386478424, | |
| "learning_rate": 6.869918699186993e-06, | |
| "loss": 1.2359, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.32113821138211385, | |
| "grad_norm": 0.13513882458209991, | |
| "learning_rate": 6.829268292682928e-06, | |
| "loss": 1.2356, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.3252032520325203, | |
| "grad_norm": 0.1364334225654602, | |
| "learning_rate": 6.788617886178862e-06, | |
| "loss": 1.2635, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.32926829268292684, | |
| "grad_norm": 0.14114025235176086, | |
| "learning_rate": 6.747967479674797e-06, | |
| "loss": 1.2198, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.3333333333333333, | |
| "grad_norm": 0.1414542943239212, | |
| "learning_rate": 6.707317073170733e-06, | |
| "loss": 1.236, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.33739837398373984, | |
| "grad_norm": 0.14660492539405823, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 1.2659, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.34146341463414637, | |
| "grad_norm": 0.13563567399978638, | |
| "learning_rate": 6.626016260162602e-06, | |
| "loss": 1.2293, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.34552845528455284, | |
| "grad_norm": 0.1366971880197525, | |
| "learning_rate": 6.585365853658538e-06, | |
| "loss": 1.2411, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.34959349593495936, | |
| "grad_norm": 0.13849468529224396, | |
| "learning_rate": 6.544715447154472e-06, | |
| "loss": 1.2109, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.35365853658536583, | |
| "grad_norm": 0.1426294893026352, | |
| "learning_rate": 6.504065040650407e-06, | |
| "loss": 1.2268, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.35772357723577236, | |
| "grad_norm": 0.14396873116493225, | |
| "learning_rate": 6.463414634146342e-06, | |
| "loss": 1.2525, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.3617886178861789, | |
| "grad_norm": 0.14269301295280457, | |
| "learning_rate": 6.422764227642278e-06, | |
| "loss": 1.2786, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.36585365853658536, | |
| "grad_norm": 0.13728809356689453, | |
| "learning_rate": 6.3821138211382115e-06, | |
| "loss": 1.2408, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.3699186991869919, | |
| "grad_norm": 0.13985177874565125, | |
| "learning_rate": 6.341463414634147e-06, | |
| "loss": 1.2613, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.37398373983739835, | |
| "grad_norm": 0.1378786265850067, | |
| "learning_rate": 6.300813008130082e-06, | |
| "loss": 1.2353, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.3780487804878049, | |
| "grad_norm": 0.1417645812034607, | |
| "learning_rate": 6.260162601626017e-06, | |
| "loss": 1.234, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.3821138211382114, | |
| "grad_norm": 0.14507374167442322, | |
| "learning_rate": 6.219512195121951e-06, | |
| "loss": 1.2313, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.3861788617886179, | |
| "grad_norm": 0.14076276123523712, | |
| "learning_rate": 6.178861788617887e-06, | |
| "loss": 1.2403, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.3902439024390244, | |
| "grad_norm": 0.1414155662059784, | |
| "learning_rate": 6.138211382113821e-06, | |
| "loss": 1.2192, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.3943089430894309, | |
| "grad_norm": 0.14313894510269165, | |
| "learning_rate": 6.0975609756097564e-06, | |
| "loss": 1.1967, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.3983739837398374, | |
| "grad_norm": 0.14830103516578674, | |
| "learning_rate": 6.056910569105692e-06, | |
| "loss": 1.2047, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.4024390243902439, | |
| "grad_norm": 0.15455585718154907, | |
| "learning_rate": 6.016260162601627e-06, | |
| "loss": 1.2054, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.4065040650406504, | |
| "grad_norm": 0.14887511730194092, | |
| "learning_rate": 5.9756097560975615e-06, | |
| "loss": 1.2123, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.4105691056910569, | |
| "grad_norm": 0.1438646763563156, | |
| "learning_rate": 5.934959349593496e-06, | |
| "loss": 1.1864, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.4146341463414634, | |
| "grad_norm": 0.13993819057941437, | |
| "learning_rate": 5.894308943089432e-06, | |
| "loss": 1.2191, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.4186991869918699, | |
| "grad_norm": 0.14674201607704163, | |
| "learning_rate": 5.853658536585366e-06, | |
| "loss": 1.2102, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.42276422764227645, | |
| "grad_norm": 0.1535062938928604, | |
| "learning_rate": 5.813008130081301e-06, | |
| "loss": 1.2211, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.4268292682926829, | |
| "grad_norm": 0.13475534319877625, | |
| "learning_rate": 5.772357723577237e-06, | |
| "loss": 1.1769, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.43089430894308944, | |
| "grad_norm": 0.15499569475650787, | |
| "learning_rate": 5.731707317073171e-06, | |
| "loss": 1.2572, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.4349593495934959, | |
| "grad_norm": 0.15704728662967682, | |
| "learning_rate": 5.691056910569106e-06, | |
| "loss": 1.2462, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.43902439024390244, | |
| "grad_norm": 0.15167436003684998, | |
| "learning_rate": 5.650406504065041e-06, | |
| "loss": 1.238, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.44308943089430897, | |
| "grad_norm": 0.16091591119766235, | |
| "learning_rate": 5.609756097560977e-06, | |
| "loss": 1.2157, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.44715447154471544, | |
| "grad_norm": 0.14369367063045502, | |
| "learning_rate": 5.569105691056911e-06, | |
| "loss": 1.1802, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.45121951219512196, | |
| "grad_norm": 0.15445178747177124, | |
| "learning_rate": 5.528455284552846e-06, | |
| "loss": 1.2274, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.45528455284552843, | |
| "grad_norm": 0.289122998714447, | |
| "learning_rate": 5.487804878048781e-06, | |
| "loss": 1.157, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.45934959349593496, | |
| "grad_norm": 0.14909254014492035, | |
| "learning_rate": 5.447154471544716e-06, | |
| "loss": 1.1684, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.4634146341463415, | |
| "grad_norm": 0.15071816742420197, | |
| "learning_rate": 5.4065040650406504e-06, | |
| "loss": 1.2162, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.46747967479674796, | |
| "grad_norm": 0.15695182979106903, | |
| "learning_rate": 5.365853658536586e-06, | |
| "loss": 1.1799, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.4715447154471545, | |
| "grad_norm": 0.1441459357738495, | |
| "learning_rate": 5.32520325203252e-06, | |
| "loss": 1.1762, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.47560975609756095, | |
| "grad_norm": 0.15637452900409698, | |
| "learning_rate": 5.2845528455284555e-06, | |
| "loss": 1.1865, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.4796747967479675, | |
| "grad_norm": 0.15726090967655182, | |
| "learning_rate": 5.243902439024391e-06, | |
| "loss": 1.2216, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.483739837398374, | |
| "grad_norm": 0.15303117036819458, | |
| "learning_rate": 5.203252032520326e-06, | |
| "loss": 1.1612, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.4878048780487805, | |
| "grad_norm": 0.1533118337392807, | |
| "learning_rate": 5.162601626016261e-06, | |
| "loss": 1.2007, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.491869918699187, | |
| "grad_norm": 0.15438418090343475, | |
| "learning_rate": 5.121951219512195e-06, | |
| "loss": 1.1708, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.4959349593495935, | |
| "grad_norm": 0.1471181958913803, | |
| "learning_rate": 5.081300813008131e-06, | |
| "loss": 1.1586, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 0.15365855395793915, | |
| "learning_rate": 5.040650406504065e-06, | |
| "loss": 1.1754, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.5040650406504065, | |
| "grad_norm": 0.1605301797389984, | |
| "learning_rate": 5e-06, | |
| "loss": 1.1837, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.508130081300813, | |
| "grad_norm": 0.15496233105659485, | |
| "learning_rate": 4.959349593495935e-06, | |
| "loss": 1.1623, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.5121951219512195, | |
| "grad_norm": 0.15336526930332184, | |
| "learning_rate": 4.918699186991871e-06, | |
| "loss": 1.1912, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.516260162601626, | |
| "grad_norm": 0.15313974022865295, | |
| "learning_rate": 4.8780487804878055e-06, | |
| "loss": 1.1564, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.5203252032520326, | |
| "grad_norm": 0.16495689749717712, | |
| "learning_rate": 4.83739837398374e-06, | |
| "loss": 1.1778, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.524390243902439, | |
| "grad_norm": 0.15195077657699585, | |
| "learning_rate": 4.796747967479675e-06, | |
| "loss": 1.1862, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.5284552845528455, | |
| "grad_norm": 0.1482411026954651, | |
| "learning_rate": 4.75609756097561e-06, | |
| "loss": 1.1578, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.532520325203252, | |
| "grad_norm": 0.15776410698890686, | |
| "learning_rate": 4.715447154471545e-06, | |
| "loss": 1.1715, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.5365853658536586, | |
| "grad_norm": 0.15097281336784363, | |
| "learning_rate": 4.67479674796748e-06, | |
| "loss": 1.1643, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.540650406504065, | |
| "grad_norm": 0.1520746350288391, | |
| "learning_rate": 4.634146341463416e-06, | |
| "loss": 1.1758, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.5447154471544715, | |
| "grad_norm": 0.1401418298482895, | |
| "learning_rate": 4.59349593495935e-06, | |
| "loss": 1.1427, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.5487804878048781, | |
| "grad_norm": 0.1752861589193344, | |
| "learning_rate": 4.552845528455285e-06, | |
| "loss": 1.182, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.5528455284552846, | |
| "grad_norm": 0.14296431839466095, | |
| "learning_rate": 4.51219512195122e-06, | |
| "loss": 1.1436, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.556910569105691, | |
| "grad_norm": 0.15898752212524414, | |
| "learning_rate": 4.471544715447155e-06, | |
| "loss": 1.195, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.5609756097560976, | |
| "grad_norm": 0.14214490354061127, | |
| "learning_rate": 4.43089430894309e-06, | |
| "loss": 1.1414, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.5650406504065041, | |
| "grad_norm": 0.1454809606075287, | |
| "learning_rate": 4.390243902439025e-06, | |
| "loss": 1.1479, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.5691056910569106, | |
| "grad_norm": 0.17529405653476715, | |
| "learning_rate": 4.34959349593496e-06, | |
| "loss": 1.1979, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.573170731707317, | |
| "grad_norm": 0.14693313837051392, | |
| "learning_rate": 4.308943089430894e-06, | |
| "loss": 1.161, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.5772357723577236, | |
| "grad_norm": 0.15913361310958862, | |
| "learning_rate": 4.268292682926829e-06, | |
| "loss": 1.1707, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.5813008130081301, | |
| "grad_norm": 0.15354648232460022, | |
| "learning_rate": 4.227642276422765e-06, | |
| "loss": 1.1974, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.5853658536585366, | |
| "grad_norm": 0.15235230326652527, | |
| "learning_rate": 4.1869918699186995e-06, | |
| "loss": 1.1452, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.5894308943089431, | |
| "grad_norm": 0.1511920690536499, | |
| "learning_rate": 4.146341463414634e-06, | |
| "loss": 1.1264, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.5934959349593496, | |
| "grad_norm": 0.15749534964561462, | |
| "learning_rate": 4.10569105691057e-06, | |
| "loss": 1.1906, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.5975609756097561, | |
| "grad_norm": 0.16904385387897491, | |
| "learning_rate": 4.0650406504065046e-06, | |
| "loss": 1.193, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.6016260162601627, | |
| "grad_norm": 0.16513940691947937, | |
| "learning_rate": 4.024390243902439e-06, | |
| "loss": 1.21, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.6056910569105691, | |
| "grad_norm": 0.16372235119342804, | |
| "learning_rate": 3.983739837398374e-06, | |
| "loss": 1.1754, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.6097560975609756, | |
| "grad_norm": 0.15626975893974304, | |
| "learning_rate": 3.943089430894309e-06, | |
| "loss": 1.185, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.6138211382113821, | |
| "grad_norm": 0.1519530713558197, | |
| "learning_rate": 3.902439024390244e-06, | |
| "loss": 1.187, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.6178861788617886, | |
| "grad_norm": 0.14789018034934998, | |
| "learning_rate": 3.861788617886179e-06, | |
| "loss": 1.1461, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.6219512195121951, | |
| "grad_norm": 0.14152097702026367, | |
| "learning_rate": 3.821138211382115e-06, | |
| "loss": 1.1393, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.6260162601626016, | |
| "grad_norm": 0.15340520441532135, | |
| "learning_rate": 3.780487804878049e-06, | |
| "loss": 1.1751, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.6300813008130082, | |
| "grad_norm": 0.15177197754383087, | |
| "learning_rate": 3.7398373983739838e-06, | |
| "loss": 1.1293, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.6341463414634146, | |
| "grad_norm": 0.14667950570583344, | |
| "learning_rate": 3.699186991869919e-06, | |
| "loss": 1.1507, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.6382113821138211, | |
| "grad_norm": 0.15677042305469513, | |
| "learning_rate": 3.6585365853658537e-06, | |
| "loss": 1.1783, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.6422764227642277, | |
| "grad_norm": 0.15823784470558167, | |
| "learning_rate": 3.6178861788617893e-06, | |
| "loss": 1.1536, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.6463414634146342, | |
| "grad_norm": 0.14942224323749542, | |
| "learning_rate": 3.577235772357724e-06, | |
| "loss": 1.1546, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.6504065040650406, | |
| "grad_norm": 0.16173319518566132, | |
| "learning_rate": 3.5365853658536588e-06, | |
| "loss": 1.1635, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.6544715447154471, | |
| "grad_norm": 0.14434999227523804, | |
| "learning_rate": 3.495934959349594e-06, | |
| "loss": 1.1155, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.6585365853658537, | |
| "grad_norm": 0.1534000039100647, | |
| "learning_rate": 3.4552845528455287e-06, | |
| "loss": 1.16, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.6626016260162602, | |
| "grad_norm": 0.15426084399223328, | |
| "learning_rate": 3.414634146341464e-06, | |
| "loss": 1.1552, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 0.15010565519332886, | |
| "learning_rate": 3.3739837398373986e-06, | |
| "loss": 1.169, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.6707317073170732, | |
| "grad_norm": 0.15017640590667725, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 1.1272, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.6747967479674797, | |
| "grad_norm": 0.15031912922859192, | |
| "learning_rate": 3.292682926829269e-06, | |
| "loss": 1.1215, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.6788617886178862, | |
| "grad_norm": 0.14726445078849792, | |
| "learning_rate": 3.2520325203252037e-06, | |
| "loss": 1.0797, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.6829268292682927, | |
| "grad_norm": 0.14389647543430328, | |
| "learning_rate": 3.211382113821139e-06, | |
| "loss": 1.1457, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.6869918699186992, | |
| "grad_norm": 0.15171369910240173, | |
| "learning_rate": 3.1707317073170736e-06, | |
| "loss": 1.1609, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.6910569105691057, | |
| "grad_norm": 0.15547853708267212, | |
| "learning_rate": 3.1300813008130083e-06, | |
| "loss": 1.1308, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.6951219512195121, | |
| "grad_norm": 0.14568693935871124, | |
| "learning_rate": 3.0894308943089435e-06, | |
| "loss": 1.1545, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.6991869918699187, | |
| "grad_norm": 0.14311257004737854, | |
| "learning_rate": 3.0487804878048782e-06, | |
| "loss": 1.1154, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.7032520325203252, | |
| "grad_norm": 0.1535826474428177, | |
| "learning_rate": 3.0081300813008134e-06, | |
| "loss": 1.1456, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.7073170731707317, | |
| "grad_norm": 0.15715304017066956, | |
| "learning_rate": 2.967479674796748e-06, | |
| "loss": 1.1602, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.7113821138211383, | |
| "grad_norm": 0.16260135173797607, | |
| "learning_rate": 2.926829268292683e-06, | |
| "loss": 1.1413, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.7154471544715447, | |
| "grad_norm": 0.2152421921491623, | |
| "learning_rate": 2.8861788617886185e-06, | |
| "loss": 1.1748, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.7195121951219512, | |
| "grad_norm": 0.15476493537425995, | |
| "learning_rate": 2.845528455284553e-06, | |
| "loss": 1.127, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.7235772357723578, | |
| "grad_norm": 0.16068868339061737, | |
| "learning_rate": 2.8048780487804884e-06, | |
| "loss": 1.1608, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.7276422764227642, | |
| "grad_norm": 0.14887186884880066, | |
| "learning_rate": 2.764227642276423e-06, | |
| "loss": 1.165, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.7317073170731707, | |
| "grad_norm": 0.15321257710456848, | |
| "learning_rate": 2.723577235772358e-06, | |
| "loss": 1.1218, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.7357723577235772, | |
| "grad_norm": 0.14605645835399628, | |
| "learning_rate": 2.682926829268293e-06, | |
| "loss": 1.1159, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.7398373983739838, | |
| "grad_norm": 0.14976952970027924, | |
| "learning_rate": 2.6422764227642278e-06, | |
| "loss": 1.1269, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.7439024390243902, | |
| "grad_norm": 0.15458685159683228, | |
| "learning_rate": 2.601626016260163e-06, | |
| "loss": 1.1313, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.7479674796747967, | |
| "grad_norm": 0.15546010434627533, | |
| "learning_rate": 2.5609756097560977e-06, | |
| "loss": 1.1488, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.7520325203252033, | |
| "grad_norm": 0.14949896931648254, | |
| "learning_rate": 2.5203252032520324e-06, | |
| "loss": 1.1691, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.7560975609756098, | |
| "grad_norm": 0.16152317821979523, | |
| "learning_rate": 2.4796747967479676e-06, | |
| "loss": 1.2138, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.7601626016260162, | |
| "grad_norm": 0.14412018656730652, | |
| "learning_rate": 2.4390243902439027e-06, | |
| "loss": 1.1094, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.7642276422764228, | |
| "grad_norm": 0.16365866363048553, | |
| "learning_rate": 2.3983739837398375e-06, | |
| "loss": 1.1309, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.7682926829268293, | |
| "grad_norm": 0.14860795438289642, | |
| "learning_rate": 2.3577235772357727e-06, | |
| "loss": 1.1431, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.7723577235772358, | |
| "grad_norm": 0.1498783528804779, | |
| "learning_rate": 2.317073170731708e-06, | |
| "loss": 1.1597, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.7764227642276422, | |
| "grad_norm": 0.1552543342113495, | |
| "learning_rate": 2.2764227642276426e-06, | |
| "loss": 1.1514, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.7804878048780488, | |
| "grad_norm": 0.14823728799819946, | |
| "learning_rate": 2.2357723577235773e-06, | |
| "loss": 1.1326, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.7845528455284553, | |
| "grad_norm": 0.16356539726257324, | |
| "learning_rate": 2.1951219512195125e-06, | |
| "loss": 1.1395, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.7886178861788617, | |
| "grad_norm": 0.1480284482240677, | |
| "learning_rate": 2.154471544715447e-06, | |
| "loss": 1.1069, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.7926829268292683, | |
| "grad_norm": 0.15803301334381104, | |
| "learning_rate": 2.1138211382113824e-06, | |
| "loss": 1.1612, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.7967479674796748, | |
| "grad_norm": 0.15757571160793304, | |
| "learning_rate": 2.073170731707317e-06, | |
| "loss": 1.1291, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.8008130081300813, | |
| "grad_norm": 0.1588039994239807, | |
| "learning_rate": 2.0325203252032523e-06, | |
| "loss": 1.1479, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.8048780487804879, | |
| "grad_norm": 0.14807240664958954, | |
| "learning_rate": 1.991869918699187e-06, | |
| "loss": 1.1414, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.8089430894308943, | |
| "grad_norm": 0.16562148928642273, | |
| "learning_rate": 1.951219512195122e-06, | |
| "loss": 1.1632, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.8130081300813008, | |
| "grad_norm": 0.15663650631904602, | |
| "learning_rate": 1.9105691056910574e-06, | |
| "loss": 1.1037, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.8170731707317073, | |
| "grad_norm": 0.15756241977214813, | |
| "learning_rate": 1.8699186991869919e-06, | |
| "loss": 1.113, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.8211382113821138, | |
| "grad_norm": 0.1509793996810913, | |
| "learning_rate": 1.8292682926829268e-06, | |
| "loss": 1.1318, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.8252032520325203, | |
| "grad_norm": 0.16543321311473846, | |
| "learning_rate": 1.788617886178862e-06, | |
| "loss": 1.1221, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.8292682926829268, | |
| "grad_norm": 0.16640353202819824, | |
| "learning_rate": 1.747967479674797e-06, | |
| "loss": 1.1049, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.8333333333333334, | |
| "grad_norm": 0.14792364835739136, | |
| "learning_rate": 1.707317073170732e-06, | |
| "loss": 1.11, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.8373983739837398, | |
| "grad_norm": 0.1446414440870285, | |
| "learning_rate": 1.6666666666666667e-06, | |
| "loss": 1.0932, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.8414634146341463, | |
| "grad_norm": 0.1550828069448471, | |
| "learning_rate": 1.6260162601626018e-06, | |
| "loss": 1.1198, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.8455284552845529, | |
| "grad_norm": 0.19470813870429993, | |
| "learning_rate": 1.5853658536585368e-06, | |
| "loss": 1.2043, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.8495934959349594, | |
| "grad_norm": 0.16045600175857544, | |
| "learning_rate": 1.5447154471544717e-06, | |
| "loss": 1.15, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.8536585365853658, | |
| "grad_norm": 0.17394383251667023, | |
| "learning_rate": 1.5040650406504067e-06, | |
| "loss": 1.1325, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.8577235772357723, | |
| "grad_norm": 0.16151657700538635, | |
| "learning_rate": 1.4634146341463414e-06, | |
| "loss": 1.1089, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.8617886178861789, | |
| "grad_norm": 0.155193492770195, | |
| "learning_rate": 1.4227642276422766e-06, | |
| "loss": 1.125, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.8658536585365854, | |
| "grad_norm": 0.1687816083431244, | |
| "learning_rate": 1.3821138211382116e-06, | |
| "loss": 1.1585, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.8699186991869918, | |
| "grad_norm": 0.15537872910499573, | |
| "learning_rate": 1.3414634146341465e-06, | |
| "loss": 1.0965, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.8739837398373984, | |
| "grad_norm": 0.15171952545642853, | |
| "learning_rate": 1.3008130081300815e-06, | |
| "loss": 1.1538, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.8780487804878049, | |
| "grad_norm": 0.15217146277427673, | |
| "learning_rate": 1.2601626016260162e-06, | |
| "loss": 1.1122, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.8821138211382114, | |
| "grad_norm": 0.16899816691875458, | |
| "learning_rate": 1.2195121951219514e-06, | |
| "loss": 1.1492, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.8861788617886179, | |
| "grad_norm": 0.15003301203250885, | |
| "learning_rate": 1.1788617886178863e-06, | |
| "loss": 1.1483, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.8902439024390244, | |
| "grad_norm": 0.16330182552337646, | |
| "learning_rate": 1.1382113821138213e-06, | |
| "loss": 1.1688, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.8943089430894309, | |
| "grad_norm": 0.15937192738056183, | |
| "learning_rate": 1.0975609756097562e-06, | |
| "loss": 1.1085, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.8983739837398373, | |
| "grad_norm": 0.15832844376564026, | |
| "learning_rate": 1.0569105691056912e-06, | |
| "loss": 1.1363, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.9024390243902439, | |
| "grad_norm": 0.15431027114391327, | |
| "learning_rate": 1.0162601626016261e-06, | |
| "loss": 1.1252, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.9065040650406504, | |
| "grad_norm": 0.15886257588863373, | |
| "learning_rate": 9.75609756097561e-07, | |
| "loss": 1.1312, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.9105691056910569, | |
| "grad_norm": 0.15784978866577148, | |
| "learning_rate": 9.349593495934959e-07, | |
| "loss": 1.1378, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.9146341463414634, | |
| "grad_norm": 0.15965990722179413, | |
| "learning_rate": 8.94308943089431e-07, | |
| "loss": 1.1345, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.9186991869918699, | |
| "grad_norm": 0.15162645280361176, | |
| "learning_rate": 8.53658536585366e-07, | |
| "loss": 1.1393, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.9227642276422764, | |
| "grad_norm": 0.14676712453365326, | |
| "learning_rate": 8.130081300813009e-07, | |
| "loss": 1.1216, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.926829268292683, | |
| "grad_norm": 0.1494797021150589, | |
| "learning_rate": 7.723577235772359e-07, | |
| "loss": 1.1409, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.9308943089430894, | |
| "grad_norm": 0.14101096987724304, | |
| "learning_rate": 7.317073170731707e-07, | |
| "loss": 1.1262, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.9349593495934959, | |
| "grad_norm": 0.1486268937587738, | |
| "learning_rate": 6.910569105691058e-07, | |
| "loss": 1.1157, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.9390243902439024, | |
| "grad_norm": 0.14077913761138916, | |
| "learning_rate": 6.504065040650407e-07, | |
| "loss": 1.1221, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.943089430894309, | |
| "grad_norm": 0.15489701926708221, | |
| "learning_rate": 6.097560975609757e-07, | |
| "loss": 1.1568, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.9471544715447154, | |
| "grad_norm": 0.14478756487369537, | |
| "learning_rate": 5.691056910569106e-07, | |
| "loss": 1.1292, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.9512195121951219, | |
| "grad_norm": 0.14684346318244934, | |
| "learning_rate": 5.284552845528456e-07, | |
| "loss": 1.0798, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.9552845528455285, | |
| "grad_norm": 0.15101996064186096, | |
| "learning_rate": 4.878048780487805e-07, | |
| "loss": 1.1424, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.959349593495935, | |
| "grad_norm": 0.14657896757125854, | |
| "learning_rate": 4.471544715447155e-07, | |
| "loss": 1.1174, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.9634146341463414, | |
| "grad_norm": 0.147206112742424, | |
| "learning_rate": 4.0650406504065046e-07, | |
| "loss": 1.1395, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.967479674796748, | |
| "grad_norm": 0.1617361456155777, | |
| "learning_rate": 3.6585365853658536e-07, | |
| "loss": 1.1254, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.9715447154471545, | |
| "grad_norm": 0.14291827380657196, | |
| "learning_rate": 3.2520325203252037e-07, | |
| "loss": 1.1177, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.975609756097561, | |
| "grad_norm": 0.1422155797481537, | |
| "learning_rate": 2.845528455284553e-07, | |
| "loss": 1.0811, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.9796747967479674, | |
| "grad_norm": 0.16996382176876068, | |
| "learning_rate": 2.439024390243903e-07, | |
| "loss": 1.1633, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.983739837398374, | |
| "grad_norm": 0.14874570071697235, | |
| "learning_rate": 2.0325203252032523e-07, | |
| "loss": 1.1163, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.9878048780487805, | |
| "grad_norm": 0.14603322744369507, | |
| "learning_rate": 1.6260162601626018e-07, | |
| "loss": 1.1166, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.991869918699187, | |
| "grad_norm": 0.14441542327404022, | |
| "learning_rate": 1.2195121951219514e-07, | |
| "loss": 1.1168, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.9959349593495935, | |
| "grad_norm": 0.1497318595647812, | |
| "learning_rate": 8.130081300813009e-08, | |
| "loss": 1.1344, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.14177581667900085, | |
| "learning_rate": 4.0650406504065046e-08, | |
| "loss": 1.0984, | |
| "step": 246 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 246, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 0, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.6102723928798e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |