| { | |
| "best_metric": 0.07892899960279465, | |
| "best_model_checkpoint": "./modernbert_ner_model_20250225/checkpoint-8900", | |
| "epoch": 3.834554071520896, | |
| "eval_steps": 100, | |
| "global_step": 8900, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.043084877208099955, | |
| "eval_accuracy": 0.935206015281409, | |
| "eval_f1": 0.5963316214233308, | |
| "eval_loss": 0.17664727568626404, | |
| "eval_precision": 0.6165983917463207, | |
| "eval_recall": 0.5773547378889047, | |
| "eval_runtime": 5.1315, | |
| "eval_samples_per_second": 584.63, | |
| "eval_steps_per_second": 2.923, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.08616975441619991, | |
| "eval_accuracy": 0.9478526985671145, | |
| "eval_f1": 0.6602559467862049, | |
| "eval_loss": 0.1430799961090088, | |
| "eval_precision": 0.6722614840989399, | |
| "eval_recall": 0.6486716863190795, | |
| "eval_runtime": 5.1042, | |
| "eval_samples_per_second": 587.755, | |
| "eval_steps_per_second": 2.939, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.12925463162429987, | |
| "eval_accuracy": 0.954013903244766, | |
| "eval_f1": 0.6945600584154802, | |
| "eval_loss": 0.12783046066761017, | |
| "eval_precision": 0.7145432692307693, | |
| "eval_recall": 0.6756641568404603, | |
| "eval_runtime": 5.1393, | |
| "eval_samples_per_second": 583.739, | |
| "eval_steps_per_second": 2.919, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.17233950883239982, | |
| "eval_accuracy": 0.9566587624896131, | |
| "eval_f1": 0.710146001630475, | |
| "eval_loss": 0.12156905978918076, | |
| "eval_precision": 0.7423303377750232, | |
| "eval_recall": 0.6806364540417673, | |
| "eval_runtime": 5.1439, | |
| "eval_samples_per_second": 583.215, | |
| "eval_steps_per_second": 2.916, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.2154243860404998, | |
| "grad_norm": 0.1655338853597641, | |
| "learning_rate": 0.00029463593278759154, | |
| "loss": 0.2008, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.2154243860404998, | |
| "eval_accuracy": 0.959161751889909, | |
| "eval_f1": 0.7199349545420948, | |
| "eval_loss": 0.112856425344944, | |
| "eval_precision": 0.7503852080123267, | |
| "eval_recall": 0.6918596391532889, | |
| "eval_runtime": 5.1102, | |
| "eval_samples_per_second": 587.066, | |
| "eval_steps_per_second": 2.935, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.25850926324859974, | |
| "eval_accuracy": 0.9605500496544456, | |
| "eval_f1": 0.7329311359623308, | |
| "eval_loss": 0.10928678512573242, | |
| "eval_precision": 0.7601098733404548, | |
| "eval_recall": 0.7076289245631482, | |
| "eval_runtime": 5.1042, | |
| "eval_samples_per_second": 587.749, | |
| "eval_steps_per_second": 2.939, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.3015941404566997, | |
| "eval_accuracy": 0.9617863440141059, | |
| "eval_f1": 0.7338403041825095, | |
| "eval_loss": 0.10559003055095673, | |
| "eval_precision": 0.7560644869670031, | |
| "eval_recall": 0.7128853530331013, | |
| "eval_runtime": 5.1458, | |
| "eval_samples_per_second": 583.005, | |
| "eval_steps_per_second": 2.915, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.34467901766479964, | |
| "eval_accuracy": 0.9620498165825582, | |
| "eval_f1": 0.7305187678349309, | |
| "eval_loss": 0.10398514568805695, | |
| "eval_precision": 0.7531683765841883, | |
| "eval_recall": 0.7091916465407018, | |
| "eval_runtime": 5.1291, | |
| "eval_samples_per_second": 584.896, | |
| "eval_steps_per_second": 2.924, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.38776389487289964, | |
| "eval_accuracy": 0.9628402342879148, | |
| "eval_f1": 0.7411920140927774, | |
| "eval_loss": 0.101581871509552, | |
| "eval_precision": 0.7667425968109339, | |
| "eval_recall": 0.717289387697116, | |
| "eval_runtime": 5.1388, | |
| "eval_samples_per_second": 583.791, | |
| "eval_steps_per_second": 2.919, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.4308487720809996, | |
| "grad_norm": 0.1194114089012146, | |
| "learning_rate": 0.00028925032313657903, | |
| "loss": 0.1049, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.4308487720809996, | |
| "eval_accuracy": 0.9633671794248191, | |
| "eval_f1": 0.7432511737089201, | |
| "eval_loss": 0.10185921937227249, | |
| "eval_precision": 0.7683907174275747, | |
| "eval_recall": 0.719704503480608, | |
| "eval_runtime": 5.1836, | |
| "eval_samples_per_second": 578.743, | |
| "eval_steps_per_second": 2.894, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.47393364928909953, | |
| "eval_accuracy": 0.9641271964492004, | |
| "eval_f1": 0.7452248016456068, | |
| "eval_loss": 0.09846850484609604, | |
| "eval_precision": 0.7716415639738323, | |
| "eval_recall": 0.7205568972865464, | |
| "eval_runtime": 5.145, | |
| "eval_samples_per_second": 583.095, | |
| "eval_steps_per_second": 2.915, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.5170185264971995, | |
| "eval_accuracy": 0.9632151760199429, | |
| "eval_f1": 0.7427463372594082, | |
| "eval_loss": 0.09953544288873672, | |
| "eval_precision": 0.7510530137981118, | |
| "eval_recall": 0.7346213950845291, | |
| "eval_runtime": 5.1262, | |
| "eval_samples_per_second": 585.225, | |
| "eval_steps_per_second": 2.926, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.5601034037052994, | |
| "eval_accuracy": 0.9641575971301757, | |
| "eval_f1": 0.7527304499781564, | |
| "eval_loss": 0.09697850793600082, | |
| "eval_precision": 0.7720687079910381, | |
| "eval_recall": 0.7343372638158829, | |
| "eval_runtime": 5.1395, | |
| "eval_samples_per_second": 583.715, | |
| "eval_steps_per_second": 2.919, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.6031882809133994, | |
| "eval_accuracy": 0.9648466792322815, | |
| "eval_f1": 0.7408794666267137, | |
| "eval_loss": 0.09701889753341675, | |
| "eval_precision": 0.783676703645008, | |
| "eval_recall": 0.7025145617275181, | |
| "eval_runtime": 5.1321, | |
| "eval_samples_per_second": 584.561, | |
| "eval_steps_per_second": 2.923, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.6462731581214993, | |
| "grad_norm": 0.11641442030668259, | |
| "learning_rate": 0.0002838647134855665, | |
| "loss": 0.0955, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.6462731581214993, | |
| "eval_accuracy": 0.9641069293285502, | |
| "eval_f1": 0.7526039742926792, | |
| "eval_loss": 0.0987541675567627, | |
| "eval_precision": 0.7839335180055401, | |
| "eval_recall": 0.7236823412416536, | |
| "eval_runtime": 5.1118, | |
| "eval_samples_per_second": 586.873, | |
| "eval_steps_per_second": 2.934, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.6893580353295993, | |
| "eval_accuracy": 0.9651101518007337, | |
| "eval_f1": 0.7509688531649203, | |
| "eval_loss": 0.09490419179201126, | |
| "eval_precision": 0.7588107324147934, | |
| "eval_recall": 0.7432873987782356, | |
| "eval_runtime": 5.0796, | |
| "eval_samples_per_second": 590.593, | |
| "eval_steps_per_second": 2.953, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.7324429125376992, | |
| "eval_accuracy": 0.9662653776777933, | |
| "eval_f1": 0.7569454545454545, | |
| "eval_loss": 0.09300602227449417, | |
| "eval_precision": 0.7754433020414245, | |
| "eval_recall": 0.7393095610171899, | |
| "eval_runtime": 5.1475, | |
| "eval_samples_per_second": 582.81, | |
| "eval_steps_per_second": 2.914, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.7755277897457993, | |
| "eval_accuracy": 0.9657992339028394, | |
| "eval_f1": 0.7599940775836542, | |
| "eval_loss": 0.09315136820077896, | |
| "eval_precision": 0.793476580615242, | |
| "eval_recall": 0.7292229009802529, | |
| "eval_runtime": 5.8376, | |
| "eval_samples_per_second": 513.907, | |
| "eval_steps_per_second": 2.57, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.8186126669538992, | |
| "eval_accuracy": 0.9664883160049452, | |
| "eval_f1": 0.754410779423807, | |
| "eval_loss": 0.09128749370574951, | |
| "eval_precision": 0.792461682827651, | |
| "eval_recall": 0.7198465691149311, | |
| "eval_runtime": 5.152, | |
| "eval_samples_per_second": 582.301, | |
| "eval_steps_per_second": 2.912, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.8616975441619992, | |
| "grad_norm": 0.10880027711391449, | |
| "learning_rate": 0.00027847910383455406, | |
| "loss": 0.0906, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.8616975441619992, | |
| "eval_accuracy": 0.9675118055977787, | |
| "eval_f1": 0.7641537107656204, | |
| "eval_loss": 0.0904640182852745, | |
| "eval_precision": 0.7897529179930272, | |
| "eval_recall": 0.7401619548231283, | |
| "eval_runtime": 5.1464, | |
| "eval_samples_per_second": 582.93, | |
| "eval_steps_per_second": 2.915, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.9047824213700991, | |
| "eval_accuracy": 0.966711254332097, | |
| "eval_f1": 0.7613536478171697, | |
| "eval_loss": 0.09170496463775635, | |
| "eval_precision": 0.7858763042492061, | |
| "eval_recall": 0.7383151015769286, | |
| "eval_runtime": 5.0869, | |
| "eval_samples_per_second": 589.746, | |
| "eval_steps_per_second": 2.949, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.9478672985781991, | |
| "eval_accuracy": 0.9671064631847753, | |
| "eval_f1": 0.754022988505747, | |
| "eval_loss": 0.09009002894163132, | |
| "eval_precision": 0.7887061743717034, | |
| "eval_recall": 0.722261684898423, | |
| "eval_runtime": 5.1309, | |
| "eval_samples_per_second": 584.697, | |
| "eval_steps_per_second": 2.923, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.990952175786299, | |
| "eval_accuracy": 0.9677651446059058, | |
| "eval_f1": 0.7611605527819985, | |
| "eval_loss": 0.08897808939218521, | |
| "eval_precision": 0.775582424063698, | |
| "eval_recall": 0.7472652365392811, | |
| "eval_runtime": 5.1531, | |
| "eval_samples_per_second": 582.168, | |
| "eval_steps_per_second": 2.911, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.034037052994399, | |
| "eval_accuracy": 0.9675624733994042, | |
| "eval_f1": 0.7690079652425779, | |
| "eval_loss": 0.08915847539901733, | |
| "eval_precision": 0.7842268498006203, | |
| "eval_recall": 0.754368518255434, | |
| "eval_runtime": 5.145, | |
| "eval_samples_per_second": 583.086, | |
| "eval_steps_per_second": 2.915, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.077121930202499, | |
| "grad_norm": 0.09866651147603989, | |
| "learning_rate": 0.00027309349418354155, | |
| "loss": 0.0842, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.077121930202499, | |
| "eval_accuracy": 0.9678664802091567, | |
| "eval_f1": 0.7673179396092362, | |
| "eval_loss": 0.08764609694480896, | |
| "eval_precision": 0.8008651320871312, | |
| "eval_recall": 0.7364682483307288, | |
| "eval_runtime": 5.0632, | |
| "eval_samples_per_second": 592.507, | |
| "eval_steps_per_second": 2.963, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.1202068074105989, | |
| "eval_accuracy": 0.9675320727184289, | |
| "eval_f1": 0.7698676720632808, | |
| "eval_loss": 0.08784696459770203, | |
| "eval_precision": 0.8025585696670777, | |
| "eval_recall": 0.7397357579201591, | |
| "eval_runtime": 5.1556, | |
| "eval_samples_per_second": 581.896, | |
| "eval_steps_per_second": 2.909, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.163291684618699, | |
| "eval_accuracy": 0.9681603534585841, | |
| "eval_f1": 0.7680871421211453, | |
| "eval_loss": 0.08904842287302017, | |
| "eval_precision": 0.7968845448992059, | |
| "eval_recall": 0.7412984798977127, | |
| "eval_runtime": 5.109, | |
| "eval_samples_per_second": 587.204, | |
| "eval_steps_per_second": 2.936, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.2063765618267988, | |
| "eval_accuracy": 0.9686467643541882, | |
| "eval_f1": 0.7770339106389171, | |
| "eval_loss": 0.08730504661798477, | |
| "eval_precision": 0.7965090258093391, | |
| "eval_recall": 0.7584884216508027, | |
| "eval_runtime": 5.1308, | |
| "eval_samples_per_second": 584.709, | |
| "eval_steps_per_second": 2.924, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.2494614390348988, | |
| "eval_accuracy": 0.9690521067671916, | |
| "eval_f1": 0.7750579374275782, | |
| "eval_loss": 0.08751235157251358, | |
| "eval_precision": 0.7905155857586054, | |
| "eval_recall": 0.7601932092626793, | |
| "eval_runtime": 5.1442, | |
| "eval_samples_per_second": 583.186, | |
| "eval_steps_per_second": 2.916, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.2925463162429986, | |
| "grad_norm": 0.1591762900352478, | |
| "learning_rate": 0.0002677078845325291, | |
| "loss": 0.0794, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.2925463162429986, | |
| "eval_accuracy": 0.9680691514156584, | |
| "eval_f1": 0.7753859857482184, | |
| "eval_loss": 0.08817364275455475, | |
| "eval_precision": 0.8119073527125757, | |
| "eval_recall": 0.742008808069328, | |
| "eval_runtime": 5.1488, | |
| "eval_samples_per_second": 582.661, | |
| "eval_steps_per_second": 2.913, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.3356311934510987, | |
| "eval_accuracy": 0.9690014389655661, | |
| "eval_f1": 0.7762973868237025, | |
| "eval_loss": 0.08583438396453857, | |
| "eval_precision": 0.8055300947143293, | |
| "eval_recall": 0.7491120897854809, | |
| "eval_runtime": 5.1482, | |
| "eval_samples_per_second": 582.727, | |
| "eval_steps_per_second": 2.914, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.3787160706591985, | |
| "eval_accuracy": 0.9687075657161387, | |
| "eval_f1": 0.7790055248618786, | |
| "eval_loss": 0.08625612407922745, | |
| "eval_precision": 0.7976775346136669, | |
| "eval_recall": 0.7611876687029407, | |
| "eval_runtime": 5.1662, | |
| "eval_samples_per_second": 580.698, | |
| "eval_steps_per_second": 2.903, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.4218009478672986, | |
| "eval_accuracy": 0.968626497233538, | |
| "eval_f1": 0.7790014684287813, | |
| "eval_loss": 0.08638014644384384, | |
| "eval_precision": 0.8061084941498252, | |
| "eval_recall": 0.7536581900838187, | |
| "eval_runtime": 5.1072, | |
| "eval_samples_per_second": 587.401, | |
| "eval_steps_per_second": 2.937, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.4648858250753984, | |
| "eval_accuracy": 0.9683528911047607, | |
| "eval_f1": 0.7756664980853986, | |
| "eval_loss": 0.08518864214420319, | |
| "eval_precision": 0.7891796530432226, | |
| "eval_recall": 0.7626083250461714, | |
| "eval_runtime": 5.1521, | |
| "eval_samples_per_second": 582.282, | |
| "eval_steps_per_second": 2.911, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.5079707022834985, | |
| "grad_norm": 0.10778328031301498, | |
| "learning_rate": 0.0002623222748815166, | |
| "loss": 0.0786, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.5079707022834985, | |
| "eval_accuracy": 0.9685150280699621, | |
| "eval_f1": 0.7766948234784491, | |
| "eval_loss": 0.08616286516189575, | |
| "eval_precision": 0.7896359365825014, | |
| "eval_recall": 0.764171047023725, | |
| "eval_runtime": 5.1115, | |
| "eval_samples_per_second": 586.911, | |
| "eval_steps_per_second": 2.935, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.5510555794915986, | |
| "eval_accuracy": 0.9693054457753187, | |
| "eval_f1": 0.7803809940880228, | |
| "eval_loss": 0.08442429453134537, | |
| "eval_precision": 0.8024617232062444, | |
| "eval_recall": 0.7594828810910641, | |
| "eval_runtime": 5.1816, | |
| "eval_samples_per_second": 578.971, | |
| "eval_steps_per_second": 2.895, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.5941404566996984, | |
| "eval_accuracy": 0.9684846273889869, | |
| "eval_f1": 0.7758786533481317, | |
| "eval_loss": 0.08674349635839462, | |
| "eval_precision": 0.8096046942557134, | |
| "eval_recall": 0.7448501207557892, | |
| "eval_runtime": 5.8233, | |
| "eval_samples_per_second": 515.172, | |
| "eval_steps_per_second": 2.576, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.6372253339077982, | |
| "eval_accuracy": 0.9685048945096371, | |
| "eval_f1": 0.7758557232241442, | |
| "eval_loss": 0.08749110996723175, | |
| "eval_precision": 0.8050717995722578, | |
| "eval_recall": 0.7486858928825117, | |
| "eval_runtime": 5.1543, | |
| "eval_samples_per_second": 582.041, | |
| "eval_steps_per_second": 2.91, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.6803102111158983, | |
| "eval_accuracy": 0.9685048945096371, | |
| "eval_f1": 0.7757390417940877, | |
| "eval_loss": 0.08649948239326477, | |
| "eval_precision": 0.7956684092606423, | |
| "eval_recall": 0.756783634038926, | |
| "eval_runtime": 5.1578, | |
| "eval_samples_per_second": 581.646, | |
| "eval_steps_per_second": 2.908, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.7233950883239983, | |
| "grad_norm": 0.09820359200239182, | |
| "learning_rate": 0.00025693666523050407, | |
| "loss": 0.0774, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.7233950883239983, | |
| "eval_accuracy": 0.9692547779736933, | |
| "eval_f1": 0.7781892107747042, | |
| "eval_loss": 0.08616424351930618, | |
| "eval_precision": 0.7958122958122958, | |
| "eval_recall": 0.7613297343372638, | |
| "eval_runtime": 5.1939, | |
| "eval_samples_per_second": 577.605, | |
| "eval_steps_per_second": 2.888, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.7664799655320982, | |
| "eval_accuracy": 0.9701465312823008, | |
| "eval_f1": 0.7791577864334533, | |
| "eval_loss": 0.08393336832523346, | |
| "eval_precision": 0.807095006090134, | |
| "eval_recall": 0.7530899275465265, | |
| "eval_runtime": 5.1775, | |
| "eval_samples_per_second": 579.433, | |
| "eval_steps_per_second": 2.897, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 1.8095648427401982, | |
| "eval_accuracy": 0.9697310553089723, | |
| "eval_f1": 0.7807633808937495, | |
| "eval_loss": 0.08360669761896133, | |
| "eval_precision": 0.7963947990543735, | |
| "eval_recall": 0.7657337690012785, | |
| "eval_runtime": 5.1736, | |
| "eval_samples_per_second": 579.871, | |
| "eval_steps_per_second": 2.899, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 1.8526497199482983, | |
| "eval_accuracy": 0.9697614559899476, | |
| "eval_f1": 0.7807478924994596, | |
| "eval_loss": 0.08350168168544769, | |
| "eval_precision": 0.7921052631578948, | |
| "eval_recall": 0.7697116067623242, | |
| "eval_runtime": 5.1598, | |
| "eval_samples_per_second": 581.416, | |
| "eval_steps_per_second": 2.907, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 1.8957345971563981, | |
| "eval_accuracy": 0.970166798402951, | |
| "eval_f1": 0.7817303469477382, | |
| "eval_loss": 0.08246050775051117, | |
| "eval_precision": 0.8062811414766722, | |
| "eval_recall": 0.7586304872851257, | |
| "eval_runtime": 5.1425, | |
| "eval_samples_per_second": 583.372, | |
| "eval_steps_per_second": 2.917, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 1.938819474364498, | |
| "grad_norm": 0.11857543140649796, | |
| "learning_rate": 0.00025155105557949156, | |
| "loss": 0.0771, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.938819474364498, | |
| "eval_accuracy": 0.9691331752497923, | |
| "eval_f1": 0.7826024131204393, | |
| "eval_loss": 0.0859459713101387, | |
| "eval_precision": 0.7962364010585122, | |
| "eval_recall": 0.7694274754936781, | |
| "eval_runtime": 5.128, | |
| "eval_samples_per_second": 585.027, | |
| "eval_steps_per_second": 2.925, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.981904351572598, | |
| "eval_accuracy": 0.9700755963600252, | |
| "eval_f1": 0.7879845806967779, | |
| "eval_loss": 0.0816790908575058, | |
| "eval_precision": 0.8073025335320417, | |
| "eval_recall": 0.7695695411280011, | |
| "eval_runtime": 5.1709, | |
| "eval_samples_per_second": 580.174, | |
| "eval_steps_per_second": 2.901, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 2.024989228780698, | |
| "eval_accuracy": 0.9698019902312478, | |
| "eval_f1": 0.7841042154566745, | |
| "eval_loss": 0.08529265224933624, | |
| "eval_precision": 0.8086037735849056, | |
| "eval_recall": 0.7610456030686177, | |
| "eval_runtime": 5.1834, | |
| "eval_samples_per_second": 578.77, | |
| "eval_steps_per_second": 2.894, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 2.068074105988798, | |
| "eval_accuracy": 0.9703796031697777, | |
| "eval_f1": 0.7924996351962643, | |
| "eval_loss": 0.0835157036781311, | |
| "eval_precision": 0.8146092695365231, | |
| "eval_recall": 0.7715584600085239, | |
| "eval_runtime": 5.1951, | |
| "eval_samples_per_second": 577.468, | |
| "eval_steps_per_second": 2.887, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 2.1111589831968978, | |
| "eval_accuracy": 0.9694473156198699, | |
| "eval_f1": 0.7853357531760434, | |
| "eval_loss": 0.08416793495416641, | |
| "eval_precision": 0.8029988123515439, | |
| "eval_recall": 0.7684330160534166, | |
| "eval_runtime": 5.1858, | |
| "eval_samples_per_second": 578.5, | |
| "eval_steps_per_second": 2.892, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 2.154243860404998, | |
| "grad_norm": 0.08804752677679062, | |
| "learning_rate": 0.0002461654459284791, | |
| "loss": 0.0681, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.154243860404998, | |
| "eval_accuracy": 0.9703390689284773, | |
| "eval_f1": 0.7868997565833149, | |
| "eval_loss": 0.08355652540922165, | |
| "eval_precision": 0.8183491868671372, | |
| "eval_recall": 0.7577780934791873, | |
| "eval_runtime": 5.1276, | |
| "eval_samples_per_second": 585.071, | |
| "eval_steps_per_second": 2.925, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.197328737613098, | |
| "eval_accuracy": 0.9699134593948238, | |
| "eval_f1": 0.7855120853596573, | |
| "eval_loss": 0.08640210330486298, | |
| "eval_precision": 0.803057287028792, | |
| "eval_recall": 0.7687171473220628, | |
| "eval_runtime": 5.1532, | |
| "eval_samples_per_second": 582.162, | |
| "eval_steps_per_second": 2.911, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 2.2404136148211977, | |
| "eval_accuracy": 0.9702073326442512, | |
| "eval_f1": 0.789276448544249, | |
| "eval_loss": 0.08586513251066208, | |
| "eval_precision": 0.8009360830773731, | |
| "eval_recall": 0.7779514135530615, | |
| "eval_runtime": 5.1699, | |
| "eval_samples_per_second": 580.277, | |
| "eval_steps_per_second": 2.901, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 2.2834984920292976, | |
| "eval_accuracy": 0.9699539936361241, | |
| "eval_f1": 0.7828817644051704, | |
| "eval_loss": 0.08344285935163498, | |
| "eval_precision": 0.8055305079651337, | |
| "eval_recall": 0.7614717999715869, | |
| "eval_runtime": 5.1913, | |
| "eval_samples_per_second": 577.886, | |
| "eval_steps_per_second": 2.889, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 2.326583369237398, | |
| "eval_accuracy": 0.9698323909122231, | |
| "eval_f1": 0.7898440333696048, | |
| "eval_loss": 0.08490483462810516, | |
| "eval_precision": 0.8069967388081827, | |
| "eval_recall": 0.7734053132547237, | |
| "eval_runtime": 5.153, | |
| "eval_samples_per_second": 582.188, | |
| "eval_steps_per_second": 2.911, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 2.3696682464454977, | |
| "grad_norm": 0.10004045069217682, | |
| "learning_rate": 0.00024077983627746657, | |
| "loss": 0.0674, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 2.3696682464454977, | |
| "eval_accuracy": 0.9706329421779047, | |
| "eval_f1": 0.7900129888872853, | |
| "eval_loss": 0.08235176652669907, | |
| "eval_precision": 0.8027570024930342, | |
| "eval_recall": 0.7776672822844154, | |
| "eval_runtime": 5.8382, | |
| "eval_samples_per_second": 513.861, | |
| "eval_steps_per_second": 2.569, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 2.4127531236535975, | |
| "eval_accuracy": 0.9701465312823008, | |
| "eval_f1": 0.7867615038467122, | |
| "eval_loss": 0.08183197677135468, | |
| "eval_precision": 0.8042736311025375, | |
| "eval_recall": 0.7699957380309703, | |
| "eval_runtime": 5.1409, | |
| "eval_samples_per_second": 583.555, | |
| "eval_steps_per_second": 2.918, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 2.4558380008616973, | |
| "eval_accuracy": 0.9702073326442512, | |
| "eval_f1": 0.7872696817420435, | |
| "eval_loss": 0.08382030576467514, | |
| "eval_precision": 0.8076808129109384, | |
| "eval_recall": 0.7678647535161245, | |
| "eval_runtime": 5.1657, | |
| "eval_samples_per_second": 580.751, | |
| "eval_steps_per_second": 2.904, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 2.4989228780697976, | |
| "eval_accuracy": 0.9702275997649014, | |
| "eval_f1": 0.7875814600571136, | |
| "eval_loss": 0.08378946781158447, | |
| "eval_precision": 0.8126322151707465, | |
| "eval_recall": 0.7640289813894019, | |
| "eval_runtime": 5.133, | |
| "eval_samples_per_second": 584.453, | |
| "eval_steps_per_second": 2.922, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 2.5420077552778975, | |
| "eval_accuracy": 0.9705620072556292, | |
| "eval_f1": 0.7894349617422014, | |
| "eval_loss": 0.08313048630952835, | |
| "eval_precision": 0.8187089882496567, | |
| "eval_recall": 0.7621821281432022, | |
| "eval_runtime": 5.1336, | |
| "eval_samples_per_second": 584.386, | |
| "eval_steps_per_second": 2.922, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 2.5850926324859973, | |
| "grad_norm": 0.08104603737592697, | |
| "learning_rate": 0.00023539422662645408, | |
| "loss": 0.0674, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 2.5850926324859973, | |
| "eval_accuracy": 0.9705721408159543, | |
| "eval_f1": 0.7914712778429074, | |
| "eval_loss": 0.08241896331310272, | |
| "eval_precision": 0.8172189438644273, | |
| "eval_recall": 0.7672964909788322, | |
| "eval_runtime": 5.1433, | |
| "eval_samples_per_second": 583.282, | |
| "eval_steps_per_second": 2.916, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 2.6281775096940976, | |
| "eval_accuracy": 0.9694777163008451, | |
| "eval_f1": 0.7920289855072463, | |
| "eval_loss": 0.0877351462841034, | |
| "eval_precision": 0.8083123798254696, | |
| "eval_recall": 0.7763886915755079, | |
| "eval_runtime": 5.1298, | |
| "eval_samples_per_second": 584.819, | |
| "eval_steps_per_second": 2.924, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 2.6712623869021974, | |
| "eval_accuracy": 0.970187065523601, | |
| "eval_f1": 0.7875146541617819, | |
| "eval_loss": 0.08446252346038818, | |
| "eval_precision": 0.8131336056892117, | |
| "eval_recall": 0.7634607188521096, | |
| "eval_runtime": 5.1724, | |
| "eval_samples_per_second": 580.006, | |
| "eval_steps_per_second": 2.9, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 2.7143472641102973, | |
| "eval_accuracy": 0.9709268154273323, | |
| "eval_f1": 0.79413046636641, | |
| "eval_loss": 0.0830024853348732, | |
| "eval_precision": 0.8125464545859967, | |
| "eval_recall": 0.7765307572098309, | |
| "eval_runtime": 5.1622, | |
| "eval_samples_per_second": 581.144, | |
| "eval_steps_per_second": 2.906, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 2.757432141318397, | |
| "eval_accuracy": 0.9705518736953042, | |
| "eval_f1": 0.7958092167924391, | |
| "eval_loss": 0.08372634649276733, | |
| "eval_precision": 0.8216338880484115, | |
| "eval_recall": 0.7715584600085239, | |
| "eval_runtime": 5.1748, | |
| "eval_samples_per_second": 579.728, | |
| "eval_steps_per_second": 2.899, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 2.800517018526497, | |
| "grad_norm": 0.07959684729576111, | |
| "learning_rate": 0.00023000861697544157, | |
| "loss": 0.0667, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 2.800517018526497, | |
| "eval_accuracy": 0.9703188018078271, | |
| "eval_f1": 0.7892570933506605, | |
| "eval_loss": 0.08360672742128372, | |
| "eval_precision": 0.8024075161479741, | |
| "eval_recall": 0.7765307572098309, | |
| "eval_runtime": 5.1231, | |
| "eval_samples_per_second": 585.583, | |
| "eval_steps_per_second": 2.928, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 2.843601895734597, | |
| "eval_accuracy": 0.9709369489876574, | |
| "eval_f1": 0.7909653916211292, | |
| "eval_loss": 0.08242646604776382, | |
| "eval_precision": 0.8118456476218965, | |
| "eval_recall": 0.7711322631055547, | |
| "eval_runtime": 5.1572, | |
| "eval_samples_per_second": 581.707, | |
| "eval_steps_per_second": 2.909, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 2.886686772942697, | |
| "eval_accuracy": 0.9714638941245617, | |
| "eval_f1": 0.7976650857351331, | |
| "eval_loss": 0.08036693930625916, | |
| "eval_precision": 0.81998199819982, | |
| "eval_recall": 0.7765307572098309, | |
| "eval_runtime": 5.1014, | |
| "eval_samples_per_second": 588.074, | |
| "eval_steps_per_second": 2.94, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 2.929771650150797, | |
| "eval_accuracy": 0.9718692365375651, | |
| "eval_f1": 0.793916460486101, | |
| "eval_loss": 0.08084654808044434, | |
| "eval_precision": 0.8138147098314188, | |
| "eval_recall": 0.7749680352322773, | |
| "eval_runtime": 5.1757, | |
| "eval_samples_per_second": 579.63, | |
| "eval_steps_per_second": 2.898, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 2.972856527358897, | |
| "eval_accuracy": 0.9711700208751343, | |
| "eval_f1": 0.792, | |
| "eval_loss": 0.08285341411828995, | |
| "eval_precision": 0.81135449262405, | |
| "eval_recall": 0.7735473788890468, | |
| "eval_runtime": 5.1616, | |
| "eval_samples_per_second": 581.214, | |
| "eval_steps_per_second": 2.906, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 3.015941404566997, | |
| "grad_norm": 0.07623889297246933, | |
| "learning_rate": 0.00022462300732442912, | |
| "loss": 0.066, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 3.015941404566997, | |
| "eval_accuracy": 0.9713220242800106, | |
| "eval_f1": 0.7958886135005102, | |
| "eval_loss": 0.0849044919013977, | |
| "eval_precision": 0.8173379248390478, | |
| "eval_recall": 0.7755362977695696, | |
| "eval_runtime": 5.1766, | |
| "eval_samples_per_second": 579.536, | |
| "eval_steps_per_second": 2.898, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 3.059026281775097, | |
| "eval_accuracy": 0.9710990859528587, | |
| "eval_f1": 0.7920576041893957, | |
| "eval_loss": 0.08765333890914917, | |
| "eval_precision": 0.8114754098360656, | |
| "eval_recall": 0.7735473788890468, | |
| "eval_runtime": 5.1302, | |
| "eval_samples_per_second": 584.777, | |
| "eval_steps_per_second": 2.924, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 3.102111158983197, | |
| "eval_accuracy": 0.970774812022456, | |
| "eval_f1": 0.7927130207577296, | |
| "eval_loss": 0.08476532250642776, | |
| "eval_precision": 0.8103576198248998, | |
| "eval_recall": 0.7758204290382157, | |
| "eval_runtime": 5.1437, | |
| "eval_samples_per_second": 583.242, | |
| "eval_steps_per_second": 2.916, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 3.145196036191297, | |
| "eval_accuracy": 0.9712206886767597, | |
| "eval_f1": 0.7949370771804759, | |
| "eval_loss": 0.08828677237033844, | |
| "eval_precision": 0.8145497912939773, | |
| "eval_recall": 0.7762466259411849, | |
| "eval_runtime": 5.1438, | |
| "eval_samples_per_second": 583.229, | |
| "eval_steps_per_second": 2.916, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 3.188280913399397, | |
| "eval_accuracy": 0.9700249285583997, | |
| "eval_f1": 0.7877159866414986, | |
| "eval_loss": 0.08564765751361847, | |
| "eval_precision": 0.8054936896807721, | |
| "eval_recall": 0.7707060662025856, | |
| "eval_runtime": 5.8414, | |
| "eval_samples_per_second": 513.575, | |
| "eval_steps_per_second": 2.568, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 3.2313657906074966, | |
| "grad_norm": 0.14740775525569916, | |
| "learning_rate": 0.00021923739767341663, | |
| "loss": 0.0556, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 3.2313657906074966, | |
| "eval_accuracy": 0.9712409557974099, | |
| "eval_f1": 0.7959982601130926, | |
| "eval_loss": 0.0849863737821579, | |
| "eval_precision": 0.8127313101406366, | |
| "eval_recall": 0.7799403324335843, | |
| "eval_runtime": 5.1395, | |
| "eval_samples_per_second": 583.711, | |
| "eval_steps_per_second": 2.919, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 3.274450667815597, | |
| "eval_accuracy": 0.9710686852718834, | |
| "eval_f1": 0.7949167397020157, | |
| "eval_loss": 0.08467870950698853, | |
| "eval_precision": 0.8179768525477228, | |
| "eval_recall": 0.7731211819860776, | |
| "eval_runtime": 5.1447, | |
| "eval_samples_per_second": 583.126, | |
| "eval_steps_per_second": 2.916, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 3.3175355450236967, | |
| "eval_accuracy": 0.971615897529438, | |
| "eval_f1": 0.7966792892513836, | |
| "eval_loss": 0.08322973549365997, | |
| "eval_precision": 0.8172717764828926, | |
| "eval_recall": 0.7770990197471231, | |
| "eval_runtime": 5.1113, | |
| "eval_samples_per_second": 586.93, | |
| "eval_steps_per_second": 2.935, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 3.3606204222317966, | |
| "eval_accuracy": 0.9713321578403357, | |
| "eval_f1": 0.7940792337831956, | |
| "eval_loss": 0.08206350356340408, | |
| "eval_precision": 0.811508230757823, | |
| "eval_recall": 0.7773831510157693, | |
| "eval_runtime": 5.1313, | |
| "eval_samples_per_second": 584.642, | |
| "eval_steps_per_second": 2.923, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 3.4037052994398964, | |
| "eval_accuracy": 0.9709977503496078, | |
| "eval_f1": 0.7957571324067301, | |
| "eval_loss": 0.08641249686479568, | |
| "eval_precision": 0.8202382747700196, | |
| "eval_recall": 0.7726949850831084, | |
| "eval_runtime": 5.1503, | |
| "eval_samples_per_second": 582.489, | |
| "eval_steps_per_second": 2.912, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 3.4467901766479967, | |
| "grad_norm": 0.10462938249111176, | |
| "learning_rate": 0.00021385178802240412, | |
| "loss": 0.0571, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 3.4467901766479967, | |
| "eval_accuracy": 0.9705924079366044, | |
| "eval_f1": 0.7928046989720997, | |
| "eval_loss": 0.08726098388433456, | |
| "eval_precision": 0.8203920376842425, | |
| "eval_recall": 0.7670123597101861, | |
| "eval_runtime": 5.1929, | |
| "eval_samples_per_second": 577.716, | |
| "eval_steps_per_second": 2.889, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 3.4898750538560965, | |
| "eval_accuracy": 0.971018017470258, | |
| "eval_f1": 0.792296511627907, | |
| "eval_loss": 0.08289187401533127, | |
| "eval_precision": 0.8110400238059813, | |
| "eval_recall": 0.7743997726949851, | |
| "eval_runtime": 5.1659, | |
| "eval_samples_per_second": 580.728, | |
| "eval_steps_per_second": 2.904, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 3.5329599310641964, | |
| "eval_accuracy": 0.9715246954865122, | |
| "eval_f1": 0.7980417945345607, | |
| "eval_loss": 0.0846642255783081, | |
| "eval_precision": 0.8215736422446216, | |
| "eval_recall": 0.7758204290382157, | |
| "eval_runtime": 5.165, | |
| "eval_samples_per_second": 580.835, | |
| "eval_steps_per_second": 2.904, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 3.5760448082722966, | |
| "eval_accuracy": 0.9707849455827811, | |
| "eval_f1": 0.7915995902239135, | |
| "eval_loss": 0.08357686549425125, | |
| "eval_precision": 0.8162064282480761, | |
| "eval_recall": 0.7684330160534166, | |
| "eval_runtime": 5.1568, | |
| "eval_samples_per_second": 581.751, | |
| "eval_steps_per_second": 2.909, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 3.6191296854803965, | |
| "eval_accuracy": 0.9708457469447316, | |
| "eval_f1": 0.7932402645113886, | |
| "eval_loss": 0.08390816301107407, | |
| "eval_precision": 0.8214883579363872, | |
| "eval_recall": 0.766870294075863, | |
| "eval_runtime": 5.1482, | |
| "eval_samples_per_second": 582.733, | |
| "eval_steps_per_second": 2.914, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 3.6622145626884963, | |
| "grad_norm": 0.11067754775285721, | |
| "learning_rate": 0.00020846617837139164, | |
| "loss": 0.0576, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 3.6622145626884963, | |
| "eval_accuracy": 0.9711902879957844, | |
| "eval_f1": 0.7922267679719464, | |
| "eval_loss": 0.0809941291809082, | |
| "eval_precision": 0.8154609715746729, | |
| "eval_recall": 0.7702798692996164, | |
| "eval_runtime": 5.1845, | |
| "eval_samples_per_second": 578.644, | |
| "eval_steps_per_second": 2.893, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 3.705299439896596, | |
| "eval_accuracy": 0.9711598873148092, | |
| "eval_f1": 0.7963543565439302, | |
| "eval_loss": 0.08366210758686066, | |
| "eval_precision": 0.8180047932893948, | |
| "eval_recall": 0.7758204290382157, | |
| "eval_runtime": 5.1558, | |
| "eval_samples_per_second": 581.865, | |
| "eval_steps_per_second": 2.909, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 3.748384317104696, | |
| "eval_accuracy": 0.9712814900387102, | |
| "eval_f1": 0.7941414587473319, | |
| "eval_loss": 0.08393159508705139, | |
| "eval_precision": 0.8239156994502138, | |
| "eval_recall": 0.7664440971728939, | |
| "eval_runtime": 5.1323, | |
| "eval_samples_per_second": 584.53, | |
| "eval_steps_per_second": 2.923, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 3.7914691943127963, | |
| "eval_accuracy": 0.9712916235990353, | |
| "eval_f1": 0.7929502585390722, | |
| "eval_loss": 0.0845068171620369, | |
| "eval_precision": 0.8135086670651525, | |
| "eval_recall": 0.7734053132547237, | |
| "eval_runtime": 5.1296, | |
| "eval_samples_per_second": 584.843, | |
| "eval_steps_per_second": 2.924, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 3.834554071520896, | |
| "eval_accuracy": 0.9722441782695932, | |
| "eval_f1": 0.7972508591065292, | |
| "eval_loss": 0.07892899960279465, | |
| "eval_precision": 0.8213317264236216, | |
| "eval_recall": 0.7745418383293081, | |
| "eval_runtime": 5.1954, | |
| "eval_samples_per_second": 577.434, | |
| "eval_steps_per_second": 2.887, | |
| "step": 8900 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 27852, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 12, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.129811768410176e+17, | |
| "train_batch_size": 200, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |