| { | |
| "best_global_step": 5626, | |
| "best_metric": 0.9394, | |
| "best_model_checkpoint": "robert-imdb-tuned/checkpoint-5626", | |
| "epoch": 2.0, | |
| "eval_steps": 500, | |
| "global_step": 5626, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.017774617845716316, | |
| "grad_norm": 56.434574127197266, | |
| "learning_rate": 1.989098234387961e-05, | |
| "loss": 0.5656, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.03554923569143263, | |
| "grad_norm": 16.460412979125977, | |
| "learning_rate": 1.9772484891574832e-05, | |
| "loss": 0.3844, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.053323853537148955, | |
| "grad_norm": 10.739611625671387, | |
| "learning_rate": 1.9653987439270056e-05, | |
| "loss": 0.258, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.07109847138286526, | |
| "grad_norm": 45.43592071533203, | |
| "learning_rate": 1.953548998696528e-05, | |
| "loss": 0.2814, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.08887308922858159, | |
| "grad_norm": 19.858793258666992, | |
| "learning_rate": 1.9416992534660507e-05, | |
| "loss": 0.2643, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.10664770707429791, | |
| "grad_norm": 12.223872184753418, | |
| "learning_rate": 1.929849508235573e-05, | |
| "loss": 0.2467, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.12442232492001422, | |
| "grad_norm": 15.694002151489258, | |
| "learning_rate": 1.9179997630050954e-05, | |
| "loss": 0.2631, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.14219694276573053, | |
| "grad_norm": 16.515512466430664, | |
| "learning_rate": 1.9061500177746178e-05, | |
| "loss": 0.2712, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.15997156061144685, | |
| "grad_norm": 17.446800231933594, | |
| "learning_rate": 1.8943002725441405e-05, | |
| "loss": 0.2731, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.17774617845716317, | |
| "grad_norm": 13.160314559936523, | |
| "learning_rate": 1.882450527313663e-05, | |
| "loss": 0.2151, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.1955207963028795, | |
| "grad_norm": 14.691619873046875, | |
| "learning_rate": 1.8706007820831853e-05, | |
| "loss": 0.2567, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.21329541414859582, | |
| "grad_norm": 8.742100715637207, | |
| "learning_rate": 1.8587510368527076e-05, | |
| "loss": 0.2156, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.23107003199431211, | |
| "grad_norm": 25.084047317504883, | |
| "learning_rate": 1.8469012916222303e-05, | |
| "loss": 0.1931, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.24884464984002844, | |
| "grad_norm": 10.953790664672852, | |
| "learning_rate": 1.8350515463917527e-05, | |
| "loss": 0.284, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.26661926768574473, | |
| "grad_norm": 11.356263160705566, | |
| "learning_rate": 1.823201801161275e-05, | |
| "loss": 0.2515, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.28439388553146105, | |
| "grad_norm": 31.774959564208984, | |
| "learning_rate": 1.8113520559307974e-05, | |
| "loss": 0.2409, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.3021685033771774, | |
| "grad_norm": 14.814696311950684, | |
| "learning_rate": 1.79950231070032e-05, | |
| "loss": 0.1859, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.3199431212228937, | |
| "grad_norm": 2.47302508354187, | |
| "learning_rate": 1.7876525654698425e-05, | |
| "loss": 0.2537, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.33771773906861, | |
| "grad_norm": 6.675404071807861, | |
| "learning_rate": 1.775802820239365e-05, | |
| "loss": 0.2262, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.35549235691432635, | |
| "grad_norm": 11.948305130004883, | |
| "learning_rate": 1.7639530750088873e-05, | |
| "loss": 0.2098, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.37326697476004267, | |
| "grad_norm": 36.21531677246094, | |
| "learning_rate": 1.75210332977841e-05, | |
| "loss": 0.2028, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.391041592605759, | |
| "grad_norm": 10.536725044250488, | |
| "learning_rate": 1.7402535845479323e-05, | |
| "loss": 0.225, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.4088162104514753, | |
| "grad_norm": 2.2616159915924072, | |
| "learning_rate": 1.728403839317455e-05, | |
| "loss": 0.2004, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.42659082829719164, | |
| "grad_norm": 9.378534317016602, | |
| "learning_rate": 1.7165540940869774e-05, | |
| "loss": 0.1869, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.4443654461429079, | |
| "grad_norm": 11.102619171142578, | |
| "learning_rate": 1.7049413437611094e-05, | |
| "loss": 0.2581, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.46214006398862423, | |
| "grad_norm": 7.025556564331055, | |
| "learning_rate": 1.6930915985306317e-05, | |
| "loss": 0.1779, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.47991468183434055, | |
| "grad_norm": 1.8636491298675537, | |
| "learning_rate": 1.681241853300154e-05, | |
| "loss": 0.2524, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.4976892996800569, | |
| "grad_norm": 4.911526679992676, | |
| "learning_rate": 1.6693921080696768e-05, | |
| "loss": 0.2586, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.5154639175257731, | |
| "grad_norm": 15.698482513427734, | |
| "learning_rate": 1.6575423628391992e-05, | |
| "loss": 0.2305, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.5332385353714895, | |
| "grad_norm": 8.248071670532227, | |
| "learning_rate": 1.6456926176087216e-05, | |
| "loss": 0.2192, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.5510131532172058, | |
| "grad_norm": 11.133625030517578, | |
| "learning_rate": 1.633842872378244e-05, | |
| "loss": 0.2027, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.5687877710629221, | |
| "grad_norm": 21.111495971679688, | |
| "learning_rate": 1.6219931271477663e-05, | |
| "loss": 0.2482, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.5865623889086384, | |
| "grad_norm": 7.656630992889404, | |
| "learning_rate": 1.610143381917289e-05, | |
| "loss": 0.1956, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.6043370067543548, | |
| "grad_norm": 24.104310989379883, | |
| "learning_rate": 1.5982936366868114e-05, | |
| "loss": 0.2056, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.6221116246000711, | |
| "grad_norm": 2.415814161300659, | |
| "learning_rate": 1.5864438914563337e-05, | |
| "loss": 0.2152, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.6398862424457874, | |
| "grad_norm": 4.810522079467773, | |
| "learning_rate": 1.574594146225856e-05, | |
| "loss": 0.2194, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.6576608602915037, | |
| "grad_norm": 14.958556175231934, | |
| "learning_rate": 1.5627444009953788e-05, | |
| "loss": 0.2154, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.67543547813722, | |
| "grad_norm": 5.387829303741455, | |
| "learning_rate": 1.5508946557649012e-05, | |
| "loss": 0.1707, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.6932100959829364, | |
| "grad_norm": 7.485889434814453, | |
| "learning_rate": 1.5390449105344236e-05, | |
| "loss": 0.2813, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.7109847138286527, | |
| "grad_norm": 8.028847694396973, | |
| "learning_rate": 1.527195165303946e-05, | |
| "loss": 0.2345, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.728759331674369, | |
| "grad_norm": 18.40928077697754, | |
| "learning_rate": 1.5153454200734685e-05, | |
| "loss": 0.1838, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.7465339495200853, | |
| "grad_norm": 4.4190874099731445, | |
| "learning_rate": 1.503495674842991e-05, | |
| "loss": 0.2029, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.7643085673658017, | |
| "grad_norm": 2.5633809566497803, | |
| "learning_rate": 1.4916459296125134e-05, | |
| "loss": 0.2077, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.782083185211518, | |
| "grad_norm": 0.8002589344978333, | |
| "learning_rate": 1.479796184382036e-05, | |
| "loss": 0.1903, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.7998578030572343, | |
| "grad_norm": 16.00310516357422, | |
| "learning_rate": 1.4679464391515583e-05, | |
| "loss": 0.1967, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.8176324209029506, | |
| "grad_norm": 10.853177070617676, | |
| "learning_rate": 1.4560966939210808e-05, | |
| "loss": 0.2151, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.835407038748667, | |
| "grad_norm": 19.220428466796875, | |
| "learning_rate": 1.4442469486906032e-05, | |
| "loss": 0.2101, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.8531816565943833, | |
| "grad_norm": 25.55153465270996, | |
| "learning_rate": 1.4323972034601257e-05, | |
| "loss": 0.1768, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.8709562744400995, | |
| "grad_norm": 15.01659107208252, | |
| "learning_rate": 1.4205474582296481e-05, | |
| "loss": 0.198, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.8887308922858158, | |
| "grad_norm": 8.663373947143555, | |
| "learning_rate": 1.4086977129991707e-05, | |
| "loss": 0.1695, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.9065055101315321, | |
| "grad_norm": 26.266836166381836, | |
| "learning_rate": 1.396847967768693e-05, | |
| "loss": 0.2097, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.9242801279772485, | |
| "grad_norm": 17.312021255493164, | |
| "learning_rate": 1.3849982225382156e-05, | |
| "loss": 0.1919, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.9420547458229648, | |
| "grad_norm": 17.3812313079834, | |
| "learning_rate": 1.373148477307738e-05, | |
| "loss": 0.2515, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.9598293636686811, | |
| "grad_norm": 19.211204528808594, | |
| "learning_rate": 1.3612987320772603e-05, | |
| "loss": 0.2171, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.9776039815143974, | |
| "grad_norm": 13.78133487701416, | |
| "learning_rate": 1.3494489868467828e-05, | |
| "loss": 0.2008, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.9953785993601137, | |
| "grad_norm": 19.311662673950195, | |
| "learning_rate": 1.3375992416163052e-05, | |
| "loss": 0.1883, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.9358, | |
| "eval_f1": 0.9357694750853396, | |
| "eval_loss": 0.18624259531497955, | |
| "eval_runtime": 17.6578, | |
| "eval_samples_per_second": 283.161, | |
| "eval_steps_per_second": 17.726, | |
| "step": 2813 | |
| }, | |
| { | |
| "epoch": 1.0131532172058302, | |
| "grad_norm": 25.82658576965332, | |
| "learning_rate": 1.3257494963858277e-05, | |
| "loss": 0.1937, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.0309278350515463, | |
| "grad_norm": 50.36363220214844, | |
| "learning_rate": 1.3138997511553501e-05, | |
| "loss": 0.1132, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.0487024528972626, | |
| "grad_norm": 0.8865321278572083, | |
| "learning_rate": 1.3020500059248727e-05, | |
| "loss": 0.1468, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 1.066477070742979, | |
| "grad_norm": 9.666962623596191, | |
| "learning_rate": 1.290200260694395e-05, | |
| "loss": 0.1066, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.0842516885886953, | |
| "grad_norm": 9.870319366455078, | |
| "learning_rate": 1.2783505154639176e-05, | |
| "loss": 0.2127, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 1.1020263064344116, | |
| "grad_norm": 11.271082878112793, | |
| "learning_rate": 1.26650077023344e-05, | |
| "loss": 0.1336, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.119800924280128, | |
| "grad_norm": 17.068159103393555, | |
| "learning_rate": 1.2546510250029625e-05, | |
| "loss": 0.1497, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 1.1375755421258442, | |
| "grad_norm": 38.891361236572266, | |
| "learning_rate": 1.2428012797724852e-05, | |
| "loss": 0.1501, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.1553501599715605, | |
| "grad_norm": 13.285944938659668, | |
| "learning_rate": 1.2309515345420076e-05, | |
| "loss": 0.1526, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 1.1731247778172769, | |
| "grad_norm": 4.942183494567871, | |
| "learning_rate": 1.2193387842161395e-05, | |
| "loss": 0.1389, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.1908993956629932, | |
| "grad_norm": 0.8414945006370544, | |
| "learning_rate": 1.2074890389856619e-05, | |
| "loss": 0.1472, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 1.2086740135087095, | |
| "grad_norm": 27.053401947021484, | |
| "learning_rate": 1.1956392937551844e-05, | |
| "loss": 0.1577, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.2264486313544258, | |
| "grad_norm": 22.100234985351562, | |
| "learning_rate": 1.1837895485247068e-05, | |
| "loss": 0.1521, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 1.2442232492001422, | |
| "grad_norm": 0.08347504585981369, | |
| "learning_rate": 1.1719398032942293e-05, | |
| "loss": 0.1206, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.2619978670458585, | |
| "grad_norm": 24.487995147705078, | |
| "learning_rate": 1.1600900580637517e-05, | |
| "loss": 0.203, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 1.2797724848915748, | |
| "grad_norm": 9.617659568786621, | |
| "learning_rate": 1.1482403128332742e-05, | |
| "loss": 0.1374, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.2975471027372911, | |
| "grad_norm": 43.62260055541992, | |
| "learning_rate": 1.1363905676027966e-05, | |
| "loss": 0.1116, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 1.3153217205830074, | |
| "grad_norm": 1.9222966432571411, | |
| "learning_rate": 1.1245408223723191e-05, | |
| "loss": 0.1384, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.3330963384287238, | |
| "grad_norm": 4.166341304779053, | |
| "learning_rate": 1.1129280720464511e-05, | |
| "loss": 0.2119, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 1.35087095627444, | |
| "grad_norm": 21.861906051635742, | |
| "learning_rate": 1.1010783268159735e-05, | |
| "loss": 0.1243, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.3686455741201564, | |
| "grad_norm": 0.1768956333398819, | |
| "learning_rate": 1.089228581585496e-05, | |
| "loss": 0.1445, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 1.3864201919658727, | |
| "grad_norm": 1.804442286491394, | |
| "learning_rate": 1.0773788363550184e-05, | |
| "loss": 0.163, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.404194809811589, | |
| "grad_norm": 9.804183959960938, | |
| "learning_rate": 1.065529091124541e-05, | |
| "loss": 0.1682, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 1.4219694276573054, | |
| "grad_norm": 12.038265228271484, | |
| "learning_rate": 1.0536793458940633e-05, | |
| "loss": 0.1458, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.4397440455030217, | |
| "grad_norm": 0.020571628585457802, | |
| "learning_rate": 1.0418296006635857e-05, | |
| "loss": 0.1407, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 1.457518663348738, | |
| "grad_norm": 0.07532644271850586, | |
| "learning_rate": 1.0299798554331082e-05, | |
| "loss": 0.1209, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 1.4752932811944544, | |
| "grad_norm": 25.963619232177734, | |
| "learning_rate": 1.0181301102026306e-05, | |
| "loss": 0.1694, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 1.4930678990401707, | |
| "grad_norm": 0.32112210988998413, | |
| "learning_rate": 1.0062803649721531e-05, | |
| "loss": 0.1652, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 1.510842516885887, | |
| "grad_norm": 0.9838645458221436, | |
| "learning_rate": 9.944306197416756e-06, | |
| "loss": 0.156, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 1.5286171347316033, | |
| "grad_norm": 0.2524012327194214, | |
| "learning_rate": 9.825808745111982e-06, | |
| "loss": 0.1738, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 1.5463917525773194, | |
| "grad_norm": 0.21868811547756195, | |
| "learning_rate": 9.707311292807206e-06, | |
| "loss": 0.1417, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 1.564166370423036, | |
| "grad_norm": 17.266357421875, | |
| "learning_rate": 9.58881384050243e-06, | |
| "loss": 0.1912, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 1.581940988268752, | |
| "grad_norm": 18.44460678100586, | |
| "learning_rate": 9.470316388197655e-06, | |
| "loss": 0.1464, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 1.5997156061144686, | |
| "grad_norm": 0.12830661237239838, | |
| "learning_rate": 9.351818935892878e-06, | |
| "loss": 0.1023, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.6174902239601847, | |
| "grad_norm": 19.83939552307129, | |
| "learning_rate": 9.233321483588104e-06, | |
| "loss": 0.1519, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 1.6352648418059013, | |
| "grad_norm": 11.456741333007812, | |
| "learning_rate": 9.114824031283327e-06, | |
| "loss": 0.1266, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 1.6530394596516174, | |
| "grad_norm": 42.71019744873047, | |
| "learning_rate": 8.996326578978553e-06, | |
| "loss": 0.1539, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 1.670814077497334, | |
| "grad_norm": 14.500824928283691, | |
| "learning_rate": 8.877829126673777e-06, | |
| "loss": 0.1318, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 1.68858869534305, | |
| "grad_norm": 1.3086316585540771, | |
| "learning_rate": 8.759331674369002e-06, | |
| "loss": 0.1678, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 1.7063633131887666, | |
| "grad_norm": 22.793882369995117, | |
| "learning_rate": 8.640834222064226e-06, | |
| "loss": 0.1579, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 1.7241379310344827, | |
| "grad_norm": 0.10703279078006744, | |
| "learning_rate": 8.522336769759451e-06, | |
| "loss": 0.1621, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 1.7419125488801992, | |
| "grad_norm": 24.500709533691406, | |
| "learning_rate": 8.403839317454675e-06, | |
| "loss": 0.1426, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 1.7596871667259153, | |
| "grad_norm": 38.63484191894531, | |
| "learning_rate": 8.2853418651499e-06, | |
| "loss": 0.1144, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 1.7774617845716318, | |
| "grad_norm": 3.4262137413024902, | |
| "learning_rate": 8.166844412845124e-06, | |
| "loss": 0.1625, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.795236402417348, | |
| "grad_norm": 29.1049747467041, | |
| "learning_rate": 8.04834696054035e-06, | |
| "loss": 0.1545, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 1.8130110202630645, | |
| "grad_norm": 37.24311828613281, | |
| "learning_rate": 7.929849508235573e-06, | |
| "loss": 0.1427, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 1.8307856381087806, | |
| "grad_norm": 3.99753999710083, | |
| "learning_rate": 7.811352055930798e-06, | |
| "loss": 0.1474, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 1.8485602559544971, | |
| "grad_norm": 0.08612991869449615, | |
| "learning_rate": 7.692854603626024e-06, | |
| "loss": 0.0756, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 1.8663348738002132, | |
| "grad_norm": 3.234119415283203, | |
| "learning_rate": 7.574357151321247e-06, | |
| "loss": 0.1673, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 1.8841094916459296, | |
| "grad_norm": 6.5654401779174805, | |
| "learning_rate": 7.455859699016472e-06, | |
| "loss": 0.154, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 1.9018841094916459, | |
| "grad_norm": 0.12049467116594315, | |
| "learning_rate": 7.3373622467116965e-06, | |
| "loss": 0.1673, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 1.9196587273373622, | |
| "grad_norm": 0.20109856128692627, | |
| "learning_rate": 7.218864794406921e-06, | |
| "loss": 0.1363, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 1.9374333451830785, | |
| "grad_norm": 20.57840919494629, | |
| "learning_rate": 7.1003673421021455e-06, | |
| "loss": 0.1088, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 1.9552079630287948, | |
| "grad_norm": 41.415245056152344, | |
| "learning_rate": 6.98186988979737e-06, | |
| "loss": 0.1583, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.9729825808745112, | |
| "grad_norm": 9.189870834350586, | |
| "learning_rate": 6.863372437492595e-06, | |
| "loss": 0.1647, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 1.9907571987202275, | |
| "grad_norm": 1.1518114805221558, | |
| "learning_rate": 6.744874985187819e-06, | |
| "loss": 0.1448, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.9394, | |
| "eval_f1": 0.939398717676866, | |
| "eval_loss": 0.23394709825515747, | |
| "eval_runtime": 17.5576, | |
| "eval_samples_per_second": 284.777, | |
| "eval_steps_per_second": 17.827, | |
| "step": 5626 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 8439, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.183915060249056e+16, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |