rohan10juli's picture
Upload folder using huggingface_hub
89347a6 verified
{
"best_global_step": 5626,
"best_metric": 0.9394,
"best_model_checkpoint": "robert-imdb-tuned/checkpoint-5626",
"epoch": 2.0,
"eval_steps": 500,
"global_step": 5626,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.017774617845716316,
"grad_norm": 56.434574127197266,
"learning_rate": 1.989098234387961e-05,
"loss": 0.5656,
"step": 50
},
{
"epoch": 0.03554923569143263,
"grad_norm": 16.460412979125977,
"learning_rate": 1.9772484891574832e-05,
"loss": 0.3844,
"step": 100
},
{
"epoch": 0.053323853537148955,
"grad_norm": 10.739611625671387,
"learning_rate": 1.9653987439270056e-05,
"loss": 0.258,
"step": 150
},
{
"epoch": 0.07109847138286526,
"grad_norm": 45.43592071533203,
"learning_rate": 1.953548998696528e-05,
"loss": 0.2814,
"step": 200
},
{
"epoch": 0.08887308922858159,
"grad_norm": 19.858793258666992,
"learning_rate": 1.9416992534660507e-05,
"loss": 0.2643,
"step": 250
},
{
"epoch": 0.10664770707429791,
"grad_norm": 12.223872184753418,
"learning_rate": 1.929849508235573e-05,
"loss": 0.2467,
"step": 300
},
{
"epoch": 0.12442232492001422,
"grad_norm": 15.694002151489258,
"learning_rate": 1.9179997630050954e-05,
"loss": 0.2631,
"step": 350
},
{
"epoch": 0.14219694276573053,
"grad_norm": 16.515512466430664,
"learning_rate": 1.9061500177746178e-05,
"loss": 0.2712,
"step": 400
},
{
"epoch": 0.15997156061144685,
"grad_norm": 17.446800231933594,
"learning_rate": 1.8943002725441405e-05,
"loss": 0.2731,
"step": 450
},
{
"epoch": 0.17774617845716317,
"grad_norm": 13.160314559936523,
"learning_rate": 1.882450527313663e-05,
"loss": 0.2151,
"step": 500
},
{
"epoch": 0.1955207963028795,
"grad_norm": 14.691619873046875,
"learning_rate": 1.8706007820831853e-05,
"loss": 0.2567,
"step": 550
},
{
"epoch": 0.21329541414859582,
"grad_norm": 8.742100715637207,
"learning_rate": 1.8587510368527076e-05,
"loss": 0.2156,
"step": 600
},
{
"epoch": 0.23107003199431211,
"grad_norm": 25.084047317504883,
"learning_rate": 1.8469012916222303e-05,
"loss": 0.1931,
"step": 650
},
{
"epoch": 0.24884464984002844,
"grad_norm": 10.953790664672852,
"learning_rate": 1.8350515463917527e-05,
"loss": 0.284,
"step": 700
},
{
"epoch": 0.26661926768574473,
"grad_norm": 11.356263160705566,
"learning_rate": 1.823201801161275e-05,
"loss": 0.2515,
"step": 750
},
{
"epoch": 0.28439388553146105,
"grad_norm": 31.774959564208984,
"learning_rate": 1.8113520559307974e-05,
"loss": 0.2409,
"step": 800
},
{
"epoch": 0.3021685033771774,
"grad_norm": 14.814696311950684,
"learning_rate": 1.79950231070032e-05,
"loss": 0.1859,
"step": 850
},
{
"epoch": 0.3199431212228937,
"grad_norm": 2.47302508354187,
"learning_rate": 1.7876525654698425e-05,
"loss": 0.2537,
"step": 900
},
{
"epoch": 0.33771773906861,
"grad_norm": 6.675404071807861,
"learning_rate": 1.775802820239365e-05,
"loss": 0.2262,
"step": 950
},
{
"epoch": 0.35549235691432635,
"grad_norm": 11.948305130004883,
"learning_rate": 1.7639530750088873e-05,
"loss": 0.2098,
"step": 1000
},
{
"epoch": 0.37326697476004267,
"grad_norm": 36.21531677246094,
"learning_rate": 1.75210332977841e-05,
"loss": 0.2028,
"step": 1050
},
{
"epoch": 0.391041592605759,
"grad_norm": 10.536725044250488,
"learning_rate": 1.7402535845479323e-05,
"loss": 0.225,
"step": 1100
},
{
"epoch": 0.4088162104514753,
"grad_norm": 2.2616159915924072,
"learning_rate": 1.728403839317455e-05,
"loss": 0.2004,
"step": 1150
},
{
"epoch": 0.42659082829719164,
"grad_norm": 9.378534317016602,
"learning_rate": 1.7165540940869774e-05,
"loss": 0.1869,
"step": 1200
},
{
"epoch": 0.4443654461429079,
"grad_norm": 11.102619171142578,
"learning_rate": 1.7049413437611094e-05,
"loss": 0.2581,
"step": 1250
},
{
"epoch": 0.46214006398862423,
"grad_norm": 7.025556564331055,
"learning_rate": 1.6930915985306317e-05,
"loss": 0.1779,
"step": 1300
},
{
"epoch": 0.47991468183434055,
"grad_norm": 1.8636491298675537,
"learning_rate": 1.681241853300154e-05,
"loss": 0.2524,
"step": 1350
},
{
"epoch": 0.4976892996800569,
"grad_norm": 4.911526679992676,
"learning_rate": 1.6693921080696768e-05,
"loss": 0.2586,
"step": 1400
},
{
"epoch": 0.5154639175257731,
"grad_norm": 15.698482513427734,
"learning_rate": 1.6575423628391992e-05,
"loss": 0.2305,
"step": 1450
},
{
"epoch": 0.5332385353714895,
"grad_norm": 8.248071670532227,
"learning_rate": 1.6456926176087216e-05,
"loss": 0.2192,
"step": 1500
},
{
"epoch": 0.5510131532172058,
"grad_norm": 11.133625030517578,
"learning_rate": 1.633842872378244e-05,
"loss": 0.2027,
"step": 1550
},
{
"epoch": 0.5687877710629221,
"grad_norm": 21.111495971679688,
"learning_rate": 1.6219931271477663e-05,
"loss": 0.2482,
"step": 1600
},
{
"epoch": 0.5865623889086384,
"grad_norm": 7.656630992889404,
"learning_rate": 1.610143381917289e-05,
"loss": 0.1956,
"step": 1650
},
{
"epoch": 0.6043370067543548,
"grad_norm": 24.104310989379883,
"learning_rate": 1.5982936366868114e-05,
"loss": 0.2056,
"step": 1700
},
{
"epoch": 0.6221116246000711,
"grad_norm": 2.415814161300659,
"learning_rate": 1.5864438914563337e-05,
"loss": 0.2152,
"step": 1750
},
{
"epoch": 0.6398862424457874,
"grad_norm": 4.810522079467773,
"learning_rate": 1.574594146225856e-05,
"loss": 0.2194,
"step": 1800
},
{
"epoch": 0.6576608602915037,
"grad_norm": 14.958556175231934,
"learning_rate": 1.5627444009953788e-05,
"loss": 0.2154,
"step": 1850
},
{
"epoch": 0.67543547813722,
"grad_norm": 5.387829303741455,
"learning_rate": 1.5508946557649012e-05,
"loss": 0.1707,
"step": 1900
},
{
"epoch": 0.6932100959829364,
"grad_norm": 7.485889434814453,
"learning_rate": 1.5390449105344236e-05,
"loss": 0.2813,
"step": 1950
},
{
"epoch": 0.7109847138286527,
"grad_norm": 8.028847694396973,
"learning_rate": 1.527195165303946e-05,
"loss": 0.2345,
"step": 2000
},
{
"epoch": 0.728759331674369,
"grad_norm": 18.40928077697754,
"learning_rate": 1.5153454200734685e-05,
"loss": 0.1838,
"step": 2050
},
{
"epoch": 0.7465339495200853,
"grad_norm": 4.4190874099731445,
"learning_rate": 1.503495674842991e-05,
"loss": 0.2029,
"step": 2100
},
{
"epoch": 0.7643085673658017,
"grad_norm": 2.5633809566497803,
"learning_rate": 1.4916459296125134e-05,
"loss": 0.2077,
"step": 2150
},
{
"epoch": 0.782083185211518,
"grad_norm": 0.8002589344978333,
"learning_rate": 1.479796184382036e-05,
"loss": 0.1903,
"step": 2200
},
{
"epoch": 0.7998578030572343,
"grad_norm": 16.00310516357422,
"learning_rate": 1.4679464391515583e-05,
"loss": 0.1967,
"step": 2250
},
{
"epoch": 0.8176324209029506,
"grad_norm": 10.853177070617676,
"learning_rate": 1.4560966939210808e-05,
"loss": 0.2151,
"step": 2300
},
{
"epoch": 0.835407038748667,
"grad_norm": 19.220428466796875,
"learning_rate": 1.4442469486906032e-05,
"loss": 0.2101,
"step": 2350
},
{
"epoch": 0.8531816565943833,
"grad_norm": 25.55153465270996,
"learning_rate": 1.4323972034601257e-05,
"loss": 0.1768,
"step": 2400
},
{
"epoch": 0.8709562744400995,
"grad_norm": 15.01659107208252,
"learning_rate": 1.4205474582296481e-05,
"loss": 0.198,
"step": 2450
},
{
"epoch": 0.8887308922858158,
"grad_norm": 8.663373947143555,
"learning_rate": 1.4086977129991707e-05,
"loss": 0.1695,
"step": 2500
},
{
"epoch": 0.9065055101315321,
"grad_norm": 26.266836166381836,
"learning_rate": 1.396847967768693e-05,
"loss": 0.2097,
"step": 2550
},
{
"epoch": 0.9242801279772485,
"grad_norm": 17.312021255493164,
"learning_rate": 1.3849982225382156e-05,
"loss": 0.1919,
"step": 2600
},
{
"epoch": 0.9420547458229648,
"grad_norm": 17.3812313079834,
"learning_rate": 1.373148477307738e-05,
"loss": 0.2515,
"step": 2650
},
{
"epoch": 0.9598293636686811,
"grad_norm": 19.211204528808594,
"learning_rate": 1.3612987320772603e-05,
"loss": 0.2171,
"step": 2700
},
{
"epoch": 0.9776039815143974,
"grad_norm": 13.78133487701416,
"learning_rate": 1.3494489868467828e-05,
"loss": 0.2008,
"step": 2750
},
{
"epoch": 0.9953785993601137,
"grad_norm": 19.311662673950195,
"learning_rate": 1.3375992416163052e-05,
"loss": 0.1883,
"step": 2800
},
{
"epoch": 1.0,
"eval_accuracy": 0.9358,
"eval_f1": 0.9357694750853396,
"eval_loss": 0.18624259531497955,
"eval_runtime": 17.6578,
"eval_samples_per_second": 283.161,
"eval_steps_per_second": 17.726,
"step": 2813
},
{
"epoch": 1.0131532172058302,
"grad_norm": 25.82658576965332,
"learning_rate": 1.3257494963858277e-05,
"loss": 0.1937,
"step": 2850
},
{
"epoch": 1.0309278350515463,
"grad_norm": 50.36363220214844,
"learning_rate": 1.3138997511553501e-05,
"loss": 0.1132,
"step": 2900
},
{
"epoch": 1.0487024528972626,
"grad_norm": 0.8865321278572083,
"learning_rate": 1.3020500059248727e-05,
"loss": 0.1468,
"step": 2950
},
{
"epoch": 1.066477070742979,
"grad_norm": 9.666962623596191,
"learning_rate": 1.290200260694395e-05,
"loss": 0.1066,
"step": 3000
},
{
"epoch": 1.0842516885886953,
"grad_norm": 9.870319366455078,
"learning_rate": 1.2783505154639176e-05,
"loss": 0.2127,
"step": 3050
},
{
"epoch": 1.1020263064344116,
"grad_norm": 11.271082878112793,
"learning_rate": 1.26650077023344e-05,
"loss": 0.1336,
"step": 3100
},
{
"epoch": 1.119800924280128,
"grad_norm": 17.068159103393555,
"learning_rate": 1.2546510250029625e-05,
"loss": 0.1497,
"step": 3150
},
{
"epoch": 1.1375755421258442,
"grad_norm": 38.891361236572266,
"learning_rate": 1.2428012797724852e-05,
"loss": 0.1501,
"step": 3200
},
{
"epoch": 1.1553501599715605,
"grad_norm": 13.285944938659668,
"learning_rate": 1.2309515345420076e-05,
"loss": 0.1526,
"step": 3250
},
{
"epoch": 1.1731247778172769,
"grad_norm": 4.942183494567871,
"learning_rate": 1.2193387842161395e-05,
"loss": 0.1389,
"step": 3300
},
{
"epoch": 1.1908993956629932,
"grad_norm": 0.8414945006370544,
"learning_rate": 1.2074890389856619e-05,
"loss": 0.1472,
"step": 3350
},
{
"epoch": 1.2086740135087095,
"grad_norm": 27.053401947021484,
"learning_rate": 1.1956392937551844e-05,
"loss": 0.1577,
"step": 3400
},
{
"epoch": 1.2264486313544258,
"grad_norm": 22.100234985351562,
"learning_rate": 1.1837895485247068e-05,
"loss": 0.1521,
"step": 3450
},
{
"epoch": 1.2442232492001422,
"grad_norm": 0.08347504585981369,
"learning_rate": 1.1719398032942293e-05,
"loss": 0.1206,
"step": 3500
},
{
"epoch": 1.2619978670458585,
"grad_norm": 24.487995147705078,
"learning_rate": 1.1600900580637517e-05,
"loss": 0.203,
"step": 3550
},
{
"epoch": 1.2797724848915748,
"grad_norm": 9.617659568786621,
"learning_rate": 1.1482403128332742e-05,
"loss": 0.1374,
"step": 3600
},
{
"epoch": 1.2975471027372911,
"grad_norm": 43.62260055541992,
"learning_rate": 1.1363905676027966e-05,
"loss": 0.1116,
"step": 3650
},
{
"epoch": 1.3153217205830074,
"grad_norm": 1.9222966432571411,
"learning_rate": 1.1245408223723191e-05,
"loss": 0.1384,
"step": 3700
},
{
"epoch": 1.3330963384287238,
"grad_norm": 4.166341304779053,
"learning_rate": 1.1129280720464511e-05,
"loss": 0.2119,
"step": 3750
},
{
"epoch": 1.35087095627444,
"grad_norm": 21.861906051635742,
"learning_rate": 1.1010783268159735e-05,
"loss": 0.1243,
"step": 3800
},
{
"epoch": 1.3686455741201564,
"grad_norm": 0.1768956333398819,
"learning_rate": 1.089228581585496e-05,
"loss": 0.1445,
"step": 3850
},
{
"epoch": 1.3864201919658727,
"grad_norm": 1.804442286491394,
"learning_rate": 1.0773788363550184e-05,
"loss": 0.163,
"step": 3900
},
{
"epoch": 1.404194809811589,
"grad_norm": 9.804183959960938,
"learning_rate": 1.065529091124541e-05,
"loss": 0.1682,
"step": 3950
},
{
"epoch": 1.4219694276573054,
"grad_norm": 12.038265228271484,
"learning_rate": 1.0536793458940633e-05,
"loss": 0.1458,
"step": 4000
},
{
"epoch": 1.4397440455030217,
"grad_norm": 0.020571628585457802,
"learning_rate": 1.0418296006635857e-05,
"loss": 0.1407,
"step": 4050
},
{
"epoch": 1.457518663348738,
"grad_norm": 0.07532644271850586,
"learning_rate": 1.0299798554331082e-05,
"loss": 0.1209,
"step": 4100
},
{
"epoch": 1.4752932811944544,
"grad_norm": 25.963619232177734,
"learning_rate": 1.0181301102026306e-05,
"loss": 0.1694,
"step": 4150
},
{
"epoch": 1.4930678990401707,
"grad_norm": 0.32112210988998413,
"learning_rate": 1.0062803649721531e-05,
"loss": 0.1652,
"step": 4200
},
{
"epoch": 1.510842516885887,
"grad_norm": 0.9838645458221436,
"learning_rate": 9.944306197416756e-06,
"loss": 0.156,
"step": 4250
},
{
"epoch": 1.5286171347316033,
"grad_norm": 0.2524012327194214,
"learning_rate": 9.825808745111982e-06,
"loss": 0.1738,
"step": 4300
},
{
"epoch": 1.5463917525773194,
"grad_norm": 0.21868811547756195,
"learning_rate": 9.707311292807206e-06,
"loss": 0.1417,
"step": 4350
},
{
"epoch": 1.564166370423036,
"grad_norm": 17.266357421875,
"learning_rate": 9.58881384050243e-06,
"loss": 0.1912,
"step": 4400
},
{
"epoch": 1.581940988268752,
"grad_norm": 18.44460678100586,
"learning_rate": 9.470316388197655e-06,
"loss": 0.1464,
"step": 4450
},
{
"epoch": 1.5997156061144686,
"grad_norm": 0.12830661237239838,
"learning_rate": 9.351818935892878e-06,
"loss": 0.1023,
"step": 4500
},
{
"epoch": 1.6174902239601847,
"grad_norm": 19.83939552307129,
"learning_rate": 9.233321483588104e-06,
"loss": 0.1519,
"step": 4550
},
{
"epoch": 1.6352648418059013,
"grad_norm": 11.456741333007812,
"learning_rate": 9.114824031283327e-06,
"loss": 0.1266,
"step": 4600
},
{
"epoch": 1.6530394596516174,
"grad_norm": 42.71019744873047,
"learning_rate": 8.996326578978553e-06,
"loss": 0.1539,
"step": 4650
},
{
"epoch": 1.670814077497334,
"grad_norm": 14.500824928283691,
"learning_rate": 8.877829126673777e-06,
"loss": 0.1318,
"step": 4700
},
{
"epoch": 1.68858869534305,
"grad_norm": 1.3086316585540771,
"learning_rate": 8.759331674369002e-06,
"loss": 0.1678,
"step": 4750
},
{
"epoch": 1.7063633131887666,
"grad_norm": 22.793882369995117,
"learning_rate": 8.640834222064226e-06,
"loss": 0.1579,
"step": 4800
},
{
"epoch": 1.7241379310344827,
"grad_norm": 0.10703279078006744,
"learning_rate": 8.522336769759451e-06,
"loss": 0.1621,
"step": 4850
},
{
"epoch": 1.7419125488801992,
"grad_norm": 24.500709533691406,
"learning_rate": 8.403839317454675e-06,
"loss": 0.1426,
"step": 4900
},
{
"epoch": 1.7596871667259153,
"grad_norm": 38.63484191894531,
"learning_rate": 8.2853418651499e-06,
"loss": 0.1144,
"step": 4950
},
{
"epoch": 1.7774617845716318,
"grad_norm": 3.4262137413024902,
"learning_rate": 8.166844412845124e-06,
"loss": 0.1625,
"step": 5000
},
{
"epoch": 1.795236402417348,
"grad_norm": 29.1049747467041,
"learning_rate": 8.04834696054035e-06,
"loss": 0.1545,
"step": 5050
},
{
"epoch": 1.8130110202630645,
"grad_norm": 37.24311828613281,
"learning_rate": 7.929849508235573e-06,
"loss": 0.1427,
"step": 5100
},
{
"epoch": 1.8307856381087806,
"grad_norm": 3.99753999710083,
"learning_rate": 7.811352055930798e-06,
"loss": 0.1474,
"step": 5150
},
{
"epoch": 1.8485602559544971,
"grad_norm": 0.08612991869449615,
"learning_rate": 7.692854603626024e-06,
"loss": 0.0756,
"step": 5200
},
{
"epoch": 1.8663348738002132,
"grad_norm": 3.234119415283203,
"learning_rate": 7.574357151321247e-06,
"loss": 0.1673,
"step": 5250
},
{
"epoch": 1.8841094916459296,
"grad_norm": 6.5654401779174805,
"learning_rate": 7.455859699016472e-06,
"loss": 0.154,
"step": 5300
},
{
"epoch": 1.9018841094916459,
"grad_norm": 0.12049467116594315,
"learning_rate": 7.3373622467116965e-06,
"loss": 0.1673,
"step": 5350
},
{
"epoch": 1.9196587273373622,
"grad_norm": 0.20109856128692627,
"learning_rate": 7.218864794406921e-06,
"loss": 0.1363,
"step": 5400
},
{
"epoch": 1.9374333451830785,
"grad_norm": 20.57840919494629,
"learning_rate": 7.1003673421021455e-06,
"loss": 0.1088,
"step": 5450
},
{
"epoch": 1.9552079630287948,
"grad_norm": 41.415245056152344,
"learning_rate": 6.98186988979737e-06,
"loss": 0.1583,
"step": 5500
},
{
"epoch": 1.9729825808745112,
"grad_norm": 9.189870834350586,
"learning_rate": 6.863372437492595e-06,
"loss": 0.1647,
"step": 5550
},
{
"epoch": 1.9907571987202275,
"grad_norm": 1.1518114805221558,
"learning_rate": 6.744874985187819e-06,
"loss": 0.1448,
"step": 5600
},
{
"epoch": 2.0,
"eval_accuracy": 0.9394,
"eval_f1": 0.939398717676866,
"eval_loss": 0.23394709825515747,
"eval_runtime": 17.5576,
"eval_samples_per_second": 284.777,
"eval_steps_per_second": 17.827,
"step": 5626
}
],
"logging_steps": 50,
"max_steps": 8439,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.183915060249056e+16,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}