samehkhattab's picture
Upload folder using huggingface_hub
ce9092d verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 4921,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01016053647632595,
"grad_norm": 8.800110816955566,
"learning_rate": 2.45e-05,
"loss": 4.6537,
"step": 50
},
{
"epoch": 0.0203210729526519,
"grad_norm": 4.425034523010254,
"learning_rate": 4.9500000000000004e-05,
"loss": 0.3978,
"step": 100
},
{
"epoch": 0.03048160942897785,
"grad_norm": 3.6702349185943604,
"learning_rate": 4.949180667911222e-05,
"loss": 0.1825,
"step": 150
},
{
"epoch": 0.0406421459053038,
"grad_norm": 2.460794448852539,
"learning_rate": 4.897324206596142e-05,
"loss": 0.1429,
"step": 200
},
{
"epoch": 0.05080268238162975,
"grad_norm": 2.5568010807037354,
"learning_rate": 4.845467745281062e-05,
"loss": 0.1217,
"step": 250
},
{
"epoch": 0.0609632188579557,
"grad_norm": 2.620318651199341,
"learning_rate": 4.7936112839659826e-05,
"loss": 0.1185,
"step": 300
},
{
"epoch": 0.07112375533428165,
"grad_norm": 1.9228143692016602,
"learning_rate": 4.741754822650902e-05,
"loss": 0.1121,
"step": 350
},
{
"epoch": 0.0812842918106076,
"grad_norm": 1.5245689153671265,
"learning_rate": 4.689898361335823e-05,
"loss": 0.1088,
"step": 400
},
{
"epoch": 0.09144482828693355,
"grad_norm": 2.169473648071289,
"learning_rate": 4.638041900020743e-05,
"loss": 0.0943,
"step": 450
},
{
"epoch": 0.1016053647632595,
"grad_norm": 2.453381061553955,
"learning_rate": 4.586185438705663e-05,
"loss": 0.0935,
"step": 500
},
{
"epoch": 0.11176590123958545,
"grad_norm": 1.878066897392273,
"learning_rate": 4.534328977390583e-05,
"loss": 0.0948,
"step": 550
},
{
"epoch": 0.1219264377159114,
"grad_norm": 1.8042210340499878,
"learning_rate": 4.4824725160755035e-05,
"loss": 0.0911,
"step": 600
},
{
"epoch": 0.13208697419223736,
"grad_norm": 2.545236349105835,
"learning_rate": 4.4306160547604236e-05,
"loss": 0.0847,
"step": 650
},
{
"epoch": 0.1422475106685633,
"grad_norm": 1.5960779190063477,
"learning_rate": 4.3787595934453436e-05,
"loss": 0.0839,
"step": 700
},
{
"epoch": 0.15240804714488926,
"grad_norm": 1.4794697761535645,
"learning_rate": 4.3269031321302636e-05,
"loss": 0.0809,
"step": 750
},
{
"epoch": 0.1625685836212152,
"grad_norm": 1.2670629024505615,
"learning_rate": 4.2750466708151837e-05,
"loss": 0.083,
"step": 800
},
{
"epoch": 0.17272912009754116,
"grad_norm": 1.7690194845199585,
"learning_rate": 4.2231902095001044e-05,
"loss": 0.0815,
"step": 850
},
{
"epoch": 0.1828896565738671,
"grad_norm": 1.1626324653625488,
"learning_rate": 4.171333748185024e-05,
"loss": 0.0726,
"step": 900
},
{
"epoch": 0.19305019305019305,
"grad_norm": 1.3579654693603516,
"learning_rate": 4.1194772868699444e-05,
"loss": 0.0748,
"step": 950
},
{
"epoch": 0.203210729526519,
"grad_norm": 1.231998085975647,
"learning_rate": 4.0676208255548645e-05,
"loss": 0.0753,
"step": 1000
},
{
"epoch": 0.21337126600284495,
"grad_norm": 1.0823161602020264,
"learning_rate": 4.0157643642397845e-05,
"loss": 0.0707,
"step": 1050
},
{
"epoch": 0.2235318024791709,
"grad_norm": 1.0237410068511963,
"learning_rate": 3.9639079029247045e-05,
"loss": 0.0672,
"step": 1100
},
{
"epoch": 0.23369233895549685,
"grad_norm": 1.2036405801773071,
"learning_rate": 3.9120514416096246e-05,
"loss": 0.0739,
"step": 1150
},
{
"epoch": 0.2438528754318228,
"grad_norm": 1.7096917629241943,
"learning_rate": 3.8601949802945446e-05,
"loss": 0.0678,
"step": 1200
},
{
"epoch": 0.25401341190814875,
"grad_norm": 0.7131240963935852,
"learning_rate": 3.808338518979465e-05,
"loss": 0.0685,
"step": 1250
},
{
"epoch": 0.2641739483844747,
"grad_norm": 2.2163612842559814,
"learning_rate": 3.756482057664385e-05,
"loss": 0.068,
"step": 1300
},
{
"epoch": 0.27433448486080064,
"grad_norm": 1.3038839101791382,
"learning_rate": 3.7046255963493054e-05,
"loss": 0.0658,
"step": 1350
},
{
"epoch": 0.2844950213371266,
"grad_norm": 1.1102685928344727,
"learning_rate": 3.6527691350342254e-05,
"loss": 0.0636,
"step": 1400
},
{
"epoch": 0.29465555781345254,
"grad_norm": 0.9830764532089233,
"learning_rate": 3.6009126737191454e-05,
"loss": 0.0683,
"step": 1450
},
{
"epoch": 0.3048160942897785,
"grad_norm": 1.5837175846099854,
"learning_rate": 3.5490562124040655e-05,
"loss": 0.0641,
"step": 1500
},
{
"epoch": 0.31497663076610444,
"grad_norm": 1.4741511344909668,
"learning_rate": 3.497199751088986e-05,
"loss": 0.0569,
"step": 1550
},
{
"epoch": 0.3251371672424304,
"grad_norm": 1.4531124830245972,
"learning_rate": 3.4453432897739055e-05,
"loss": 0.064,
"step": 1600
},
{
"epoch": 0.33529770371875633,
"grad_norm": 0.8768426775932312,
"learning_rate": 3.393486828458826e-05,
"loss": 0.0607,
"step": 1650
},
{
"epoch": 0.3454582401950823,
"grad_norm": 1.4214024543762207,
"learning_rate": 3.341630367143746e-05,
"loss": 0.0711,
"step": 1700
},
{
"epoch": 0.35561877667140823,
"grad_norm": 1.3013160228729248,
"learning_rate": 3.289773905828666e-05,
"loss": 0.0612,
"step": 1750
},
{
"epoch": 0.3657793131477342,
"grad_norm": 0.8296806812286377,
"learning_rate": 3.237917444513587e-05,
"loss": 0.0587,
"step": 1800
},
{
"epoch": 0.37593984962406013,
"grad_norm": 1.803634762763977,
"learning_rate": 3.1860609831985064e-05,
"loss": 0.0596,
"step": 1850
},
{
"epoch": 0.3861003861003861,
"grad_norm": 0.849446177482605,
"learning_rate": 3.134204521883427e-05,
"loss": 0.0613,
"step": 1900
},
{
"epoch": 0.396260922576712,
"grad_norm": 1.071276307106018,
"learning_rate": 3.082348060568347e-05,
"loss": 0.0571,
"step": 1950
},
{
"epoch": 0.406421459053038,
"grad_norm": 1.5632612705230713,
"learning_rate": 3.030491599253267e-05,
"loss": 0.0672,
"step": 2000
},
{
"epoch": 0.4165819955293639,
"grad_norm": 1.3285644054412842,
"learning_rate": 2.9786351379381872e-05,
"loss": 0.0587,
"step": 2050
},
{
"epoch": 0.4267425320056899,
"grad_norm": 1.4475947618484497,
"learning_rate": 2.9267786766231076e-05,
"loss": 0.0602,
"step": 2100
},
{
"epoch": 0.4369030684820159,
"grad_norm": 1.3992334604263306,
"learning_rate": 2.8749222153080273e-05,
"loss": 0.0633,
"step": 2150
},
{
"epoch": 0.4470636049583418,
"grad_norm": 1.3859405517578125,
"learning_rate": 2.823065753992948e-05,
"loss": 0.0601,
"step": 2200
},
{
"epoch": 0.4572241414346678,
"grad_norm": 1.176958680152893,
"learning_rate": 2.7712092926778677e-05,
"loss": 0.0572,
"step": 2250
},
{
"epoch": 0.4673846779109937,
"grad_norm": 1.2909014225006104,
"learning_rate": 2.719352831362788e-05,
"loss": 0.0568,
"step": 2300
},
{
"epoch": 0.47754521438731967,
"grad_norm": 1.4880414009094238,
"learning_rate": 2.667496370047708e-05,
"loss": 0.0604,
"step": 2350
},
{
"epoch": 0.4877057508636456,
"grad_norm": 1.7912594079971313,
"learning_rate": 2.6156399087326284e-05,
"loss": 0.0615,
"step": 2400
},
{
"epoch": 0.49786628733997157,
"grad_norm": 1.503558874130249,
"learning_rate": 2.563783447417548e-05,
"loss": 0.0543,
"step": 2450
},
{
"epoch": 0.5080268238162975,
"grad_norm": 1.1078828573226929,
"learning_rate": 2.5119269861024685e-05,
"loss": 0.0529,
"step": 2500
},
{
"epoch": 0.5181873602926235,
"grad_norm": 1.1492643356323242,
"learning_rate": 2.4600705247873885e-05,
"loss": 0.0563,
"step": 2550
},
{
"epoch": 0.5283478967689494,
"grad_norm": 1.7058578729629517,
"learning_rate": 2.408214063472309e-05,
"loss": 0.0538,
"step": 2600
},
{
"epoch": 0.5385084332452753,
"grad_norm": 1.1064975261688232,
"learning_rate": 2.356357602157229e-05,
"loss": 0.0559,
"step": 2650
},
{
"epoch": 0.5486689697216013,
"grad_norm": 1.7355729341506958,
"learning_rate": 2.304501140842149e-05,
"loss": 0.0568,
"step": 2700
},
{
"epoch": 0.5588295061979273,
"grad_norm": 0.8841239213943481,
"learning_rate": 2.2526446795270694e-05,
"loss": 0.0533,
"step": 2750
},
{
"epoch": 0.5689900426742532,
"grad_norm": 1.3076387643814087,
"learning_rate": 2.2007882182119894e-05,
"loss": 0.0543,
"step": 2800
},
{
"epoch": 0.5791505791505791,
"grad_norm": 0.9876370429992676,
"learning_rate": 2.1489317568969094e-05,
"loss": 0.0498,
"step": 2850
},
{
"epoch": 0.5893111156269051,
"grad_norm": 1.1891224384307861,
"learning_rate": 2.0970752955818295e-05,
"loss": 0.0494,
"step": 2900
},
{
"epoch": 0.5994716521032311,
"grad_norm": 1.3922621011734009,
"learning_rate": 2.0452188342667498e-05,
"loss": 0.0536,
"step": 2950
},
{
"epoch": 0.609632188579557,
"grad_norm": 1.3601816892623901,
"learning_rate": 1.99336237295167e-05,
"loss": 0.0492,
"step": 3000
},
{
"epoch": 0.6197927250558829,
"grad_norm": 0.9285414814949036,
"learning_rate": 1.94150591163659e-05,
"loss": 0.053,
"step": 3050
},
{
"epoch": 0.6299532615322089,
"grad_norm": 1.2582364082336426,
"learning_rate": 1.88964945032151e-05,
"loss": 0.0536,
"step": 3100
},
{
"epoch": 0.6401137980085349,
"grad_norm": 0.732959508895874,
"learning_rate": 1.8377929890064303e-05,
"loss": 0.05,
"step": 3150
},
{
"epoch": 0.6502743344848608,
"grad_norm": 0.9881527423858643,
"learning_rate": 1.7859365276913503e-05,
"loss": 0.0513,
"step": 3200
},
{
"epoch": 0.6604348709611868,
"grad_norm": 0.9577229022979736,
"learning_rate": 1.7340800663762704e-05,
"loss": 0.0458,
"step": 3250
},
{
"epoch": 0.6705954074375127,
"grad_norm": 0.7625375390052795,
"learning_rate": 1.6822236050611907e-05,
"loss": 0.0509,
"step": 3300
},
{
"epoch": 0.6807559439138386,
"grad_norm": 1.1508798599243164,
"learning_rate": 1.6303671437461108e-05,
"loss": 0.0524,
"step": 3350
},
{
"epoch": 0.6909164803901646,
"grad_norm": 1.0728750228881836,
"learning_rate": 1.578510682431031e-05,
"loss": 0.0541,
"step": 3400
},
{
"epoch": 0.7010770168664906,
"grad_norm": 0.8899337649345398,
"learning_rate": 1.5266542211159512e-05,
"loss": 0.0518,
"step": 3450
},
{
"epoch": 0.7112375533428165,
"grad_norm": 1.1225217580795288,
"learning_rate": 1.4747977598008714e-05,
"loss": 0.0501,
"step": 3500
},
{
"epoch": 0.7213980898191424,
"grad_norm": 1.1296806335449219,
"learning_rate": 1.4229412984857916e-05,
"loss": 0.0488,
"step": 3550
},
{
"epoch": 0.7315586262954684,
"grad_norm": 0.8744774460792542,
"learning_rate": 1.3710848371707116e-05,
"loss": 0.05,
"step": 3600
},
{
"epoch": 0.7417191627717944,
"grad_norm": 0.714661717414856,
"learning_rate": 1.3192283758556318e-05,
"loss": 0.0511,
"step": 3650
},
{
"epoch": 0.7518796992481203,
"grad_norm": 0.7421184182167053,
"learning_rate": 1.2673719145405518e-05,
"loss": 0.05,
"step": 3700
},
{
"epoch": 0.7620402357244462,
"grad_norm": 0.9880332350730896,
"learning_rate": 1.215515453225472e-05,
"loss": 0.0492,
"step": 3750
},
{
"epoch": 0.7722007722007722,
"grad_norm": 1.4817878007888794,
"learning_rate": 1.163658991910392e-05,
"loss": 0.0511,
"step": 3800
},
{
"epoch": 0.7823613086770982,
"grad_norm": 0.8993025422096252,
"learning_rate": 1.1118025305953123e-05,
"loss": 0.0489,
"step": 3850
},
{
"epoch": 0.792521845153424,
"grad_norm": 1.2746511697769165,
"learning_rate": 1.0599460692802323e-05,
"loss": 0.0534,
"step": 3900
},
{
"epoch": 0.80268238162975,
"grad_norm": 0.9993358850479126,
"learning_rate": 1.0080896079651525e-05,
"loss": 0.0431,
"step": 3950
},
{
"epoch": 0.812842918106076,
"grad_norm": 1.5594353675842285,
"learning_rate": 9.562331466500726e-06,
"loss": 0.0492,
"step": 4000
},
{
"epoch": 0.823003454582402,
"grad_norm": 1.1576915979385376,
"learning_rate": 9.043766853349928e-06,
"loss": 0.0486,
"step": 4050
},
{
"epoch": 0.8331639910587278,
"grad_norm": 0.9295603036880493,
"learning_rate": 8.52520224019913e-06,
"loss": 0.052,
"step": 4100
},
{
"epoch": 0.8433245275350538,
"grad_norm": 1.3283494710922241,
"learning_rate": 8.00663762704833e-06,
"loss": 0.0525,
"step": 4150
},
{
"epoch": 0.8534850640113798,
"grad_norm": 0.9289240837097168,
"learning_rate": 7.488073013897531e-06,
"loss": 0.0454,
"step": 4200
},
{
"epoch": 0.8636456004877058,
"grad_norm": 1.2302299737930298,
"learning_rate": 6.969508400746734e-06,
"loss": 0.0488,
"step": 4250
},
{
"epoch": 0.8738061369640318,
"grad_norm": 1.3769161701202393,
"learning_rate": 6.450943787595935e-06,
"loss": 0.0455,
"step": 4300
},
{
"epoch": 0.8839666734403576,
"grad_norm": 1.1223291158676147,
"learning_rate": 5.9323791744451355e-06,
"loss": 0.0494,
"step": 4350
},
{
"epoch": 0.8941272099166836,
"grad_norm": 1.3049792051315308,
"learning_rate": 5.4138145612943375e-06,
"loss": 0.0487,
"step": 4400
},
{
"epoch": 0.9042877463930096,
"grad_norm": 0.7075643539428711,
"learning_rate": 4.895249948143539e-06,
"loss": 0.0482,
"step": 4450
},
{
"epoch": 0.9144482828693355,
"grad_norm": 0.9082944989204407,
"learning_rate": 4.37668533499274e-06,
"loss": 0.0486,
"step": 4500
},
{
"epoch": 0.9246088193456614,
"grad_norm": 1.1275919675827026,
"learning_rate": 3.858120721841942e-06,
"loss": 0.0465,
"step": 4550
},
{
"epoch": 0.9347693558219874,
"grad_norm": 1.5697418451309204,
"learning_rate": 3.3395561086911427e-06,
"loss": 0.0516,
"step": 4600
},
{
"epoch": 0.9449298922983134,
"grad_norm": 1.0647753477096558,
"learning_rate": 2.8209914955403443e-06,
"loss": 0.0481,
"step": 4650
},
{
"epoch": 0.9550904287746393,
"grad_norm": 1.0811455249786377,
"learning_rate": 2.302426882389546e-06,
"loss": 0.0493,
"step": 4700
},
{
"epoch": 0.9652509652509652,
"grad_norm": 1.133407473564148,
"learning_rate": 1.7838622692387473e-06,
"loss": 0.0457,
"step": 4750
},
{
"epoch": 0.9754115017272912,
"grad_norm": 0.9117683172225952,
"learning_rate": 1.2652976560879487e-06,
"loss": 0.0479,
"step": 4800
},
{
"epoch": 0.9855720382036172,
"grad_norm": 0.6459560394287109,
"learning_rate": 7.4673304293715e-07,
"loss": 0.0427,
"step": 4850
},
{
"epoch": 0.9957325746799431,
"grad_norm": 1.068882942199707,
"learning_rate": 2.2816842978635138e-07,
"loss": 0.0423,
"step": 4900
},
{
"epoch": 1.0,
"eval_bmretriever_cosine_accuracy": 0.9783856868743896,
"eval_loss": 0.04604451358318329,
"eval_runtime": 1963.6251,
"eval_samples_per_second": 35.648,
"eval_steps_per_second": 0.14,
"step": 4921
}
],
"logging_steps": 50,
"max_steps": 4921,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 256,
"trial_name": null,
"trial_params": null
}