{ "best_metric": 0.5446548032758927, "best_model_checkpoint": "tmp_ner_damsay_304130/run-36/checkpoint-4125", "epoch": 25.0, "eval_steps": 500, "global_step": 4125, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.7470146314639639, "eval_loss": 1.2898669242858887, "eval_macro_f1": 0.060475942039181926, "eval_macro_precision": 0.09351501221756969, "eval_macro_recall": 0.06591547068187953, "eval_runtime": 3.9883, "eval_samples_per_second": 220.142, "eval_steps_per_second": 27.58, "step": 165 }, { "epoch": 2.0, "eval_accuracy": 0.8084088190052983, "eval_loss": 0.8025009632110596, "eval_macro_f1": 0.24340420766720258, "eval_macro_precision": 0.3139630221874961, "eval_macro_recall": 0.24833685017157406, "eval_runtime": 3.8063, "eval_samples_per_second": 230.671, "eval_steps_per_second": 28.9, "step": 330 }, { "epoch": 3.0, "eval_accuracy": 0.8305009177175213, "eval_loss": 0.6665005683898926, "eval_macro_f1": 0.37091081081224914, "eval_macro_precision": 0.4480773602522756, "eval_macro_recall": 0.36393297647770273, "eval_runtime": 3.7531, "eval_samples_per_second": 233.938, "eval_steps_per_second": 29.309, "step": 495 }, { "epoch": 3.0303030303030303, "grad_norm": 1.6078789234161377, "learning_rate": 7.735247090033069e-05, "loss": 1.4056, "step": 500 }, { "epoch": 4.0, "eval_accuracy": 0.8344504469693029, "eval_loss": 0.6422820091247559, "eval_macro_f1": 0.43071408069652817, "eval_macro_precision": 0.48878382034336376, "eval_macro_recall": 0.42148708788382616, "eval_runtime": 3.8104, "eval_samples_per_second": 230.421, "eval_steps_per_second": 28.868, "step": 660 }, { "epoch": 5.0, "eval_accuracy": 0.8393288401090858, "eval_loss": 0.6265012621879578, "eval_macro_f1": 0.47492811430942733, "eval_macro_precision": 0.5136390951479204, "eval_macro_recall": 0.46552778262584993, "eval_runtime": 3.749, "eval_samples_per_second": 234.199, "eval_steps_per_second": 29.342, "step": 825 }, { "epoch": 6.0, "eval_accuracy": 0.8411568442406724, "eval_loss": 0.6596290469169617, "eval_macro_f1": 0.4931684511043183, "eval_macro_precision": 0.5359461122068587, "eval_macro_recall": 0.48533559234357204, "eval_runtime": 3.8138, "eval_samples_per_second": 230.215, "eval_steps_per_second": 28.842, "step": 990 }, { "epoch": 6.0606060606060606, "grad_norm": 1.48198664188385, "learning_rate": 6.926120825385258e-05, "loss": 0.4174, "step": 1000 }, { "epoch": 7.0, "eval_accuracy": 0.8377386251328275, "eval_loss": 0.6801306009292603, "eval_macro_f1": 0.5105261331060333, "eval_macro_precision": 0.5298621091537271, "eval_macro_recall": 0.5130394897405378, "eval_runtime": 3.743, "eval_samples_per_second": 234.574, "eval_steps_per_second": 29.389, "step": 1155 }, { "epoch": 8.0, "eval_accuracy": 0.8405512249856955, "eval_loss": 0.7128350138664246, "eval_macro_f1": 0.5145736070824514, "eval_macro_precision": 0.5338232435701135, "eval_macro_recall": 0.5148104452511267, "eval_runtime": 3.8105, "eval_samples_per_second": 230.418, "eval_steps_per_second": 28.868, "step": 1320 }, { "epoch": 9.0, "eval_accuracy": 0.8356208154681846, "eval_loss": 0.7495563626289368, "eval_macro_f1": 0.5078761754368146, "eval_macro_precision": 0.5215628937215542, "eval_macro_recall": 0.510509104208083, "eval_runtime": 3.8142, "eval_samples_per_second": 230.191, "eval_steps_per_second": 28.839, "step": 1485 }, { "epoch": 9.090909090909092, "grad_norm": 1.2219234704971313, "learning_rate": 6.116994560737448e-05, "loss": 0.207, "step": 1500 }, { "epoch": 10.0, "eval_accuracy": 0.8404137531302713, "eval_loss": 0.7915265560150146, "eval_macro_f1": 0.5284150089162629, "eval_macro_precision": 0.5652985249581868, "eval_macro_recall": 0.5173389157983858, "eval_runtime": 3.8115, "eval_samples_per_second": 230.354, "eval_steps_per_second": 28.86, "step": 1650 }, { "epoch": 11.0, "eval_accuracy": 0.8365311020784258, "eval_loss": 0.8357340097427368, "eval_macro_f1": 0.5241371167696747, "eval_macro_precision": 0.5418612467571973, "eval_macro_recall": 0.5268409732620435, "eval_runtime": 3.8104, "eval_samples_per_second": 230.422, "eval_steps_per_second": 28.868, "step": 1815 }, { "epoch": 12.0, "eval_accuracy": 0.83948488924227, "eval_loss": 0.860895574092865, "eval_macro_f1": 0.5274542995604197, "eval_macro_precision": 0.5642301005555995, "eval_macro_recall": 0.5189445167674911, "eval_runtime": 3.7927, "eval_samples_per_second": 231.498, "eval_steps_per_second": 29.003, "step": 1980 }, { "epoch": 12.121212121212121, "grad_norm": 1.4894951581954956, "learning_rate": 5.307868296089637e-05, "loss": 0.1106, "step": 2000 }, { "epoch": 13.0, "eval_accuracy": 0.837367079577627, "eval_loss": 0.8975165486335754, "eval_macro_f1": 0.5324199411604539, "eval_macro_precision": 0.545792242605528, "eval_macro_recall": 0.5377469764932828, "eval_runtime": 3.7913, "eval_samples_per_second": 231.585, "eval_steps_per_second": 29.014, "step": 2145 }, { "epoch": 14.0, "eval_accuracy": 0.831667570760851, "eval_loss": 0.9421281218528748, "eval_macro_f1": 0.5262611445373023, "eval_macro_precision": 0.5347627543444692, "eval_macro_recall": 0.5417363003088691, "eval_runtime": 3.804, "eval_samples_per_second": 230.807, "eval_steps_per_second": 28.917, "step": 2310 }, { "epoch": 15.0, "eval_accuracy": 0.8381064552324761, "eval_loss": 0.9438627362251282, "eval_macro_f1": 0.5348984394876934, "eval_macro_precision": 0.546671824271427, "eval_macro_recall": 0.5379298790499474, "eval_runtime": 3.8089, "eval_samples_per_second": 230.516, "eval_steps_per_second": 28.88, "step": 2475 }, { "epoch": 15.151515151515152, "grad_norm": 0.9380698800086975, "learning_rate": 4.498742031441827e-05, "loss": 0.0622, "step": 2500 }, { "epoch": 16.0, "eval_accuracy": 0.835973783745625, "eval_loss": 0.9927621483802795, "eval_macro_f1": 0.5346150876236929, "eval_macro_precision": 0.5412591223724303, "eval_macro_recall": 0.5415646974890974, "eval_runtime": 3.8119, "eval_samples_per_second": 230.328, "eval_steps_per_second": 28.857, "step": 2640 }, { "epoch": 17.0, "eval_accuracy": 0.8385114398876446, "eval_loss": 1.00917387008667, "eval_macro_f1": 0.5375529028664181, "eval_macro_precision": 0.5691165677941541, "eval_macro_recall": 0.5349126486369854, "eval_runtime": 3.8075, "eval_samples_per_second": 230.599, "eval_steps_per_second": 28.891, "step": 2805 }, { "epoch": 18.0, "eval_accuracy": 0.8392285228091816, "eval_loss": 1.0304529666900635, "eval_macro_f1": 0.5373729743137114, "eval_macro_precision": 0.5576924127069764, "eval_macro_recall": 0.539334821981497, "eval_runtime": 3.8101, "eval_samples_per_second": 230.437, "eval_steps_per_second": 28.87, "step": 2970 }, { "epoch": 18.181818181818183, "grad_norm": 1.10272216796875, "learning_rate": 3.689615766794016e-05, "loss": 0.0365, "step": 3000 }, { "epoch": 19.0, "eval_accuracy": 0.8382290652656922, "eval_loss": 1.0565452575683594, "eval_macro_f1": 0.5382182396542954, "eval_macro_precision": 0.5617383574248755, "eval_macro_recall": 0.5399115920507231, "eval_runtime": 3.8119, "eval_samples_per_second": 230.33, "eval_steps_per_second": 28.857, "step": 3135 }, { "epoch": 20.0, "eval_accuracy": 0.8372110304444428, "eval_loss": 1.0832605361938477, "eval_macro_f1": 0.5426999278930784, "eval_macro_precision": 0.5539906546541987, "eval_macro_recall": 0.5517989175520424, "eval_runtime": 3.8083, "eval_samples_per_second": 230.549, "eval_steps_per_second": 28.884, "step": 3300 }, { "epoch": 21.0, "eval_accuracy": 0.839499751064478, "eval_loss": 1.0868653059005737, "eval_macro_f1": 0.53917903306677, "eval_macro_precision": 0.5749497192899481, "eval_macro_recall": 0.5373986303762837, "eval_runtime": 3.8141, "eval_samples_per_second": 230.199, "eval_steps_per_second": 28.84, "step": 3465 }, { "epoch": 21.21212121212121, "grad_norm": 0.46956342458724976, "learning_rate": 2.8804895021462055e-05, "loss": 0.0224, "step": 3500 }, { "epoch": 22.0, "eval_accuracy": 0.8391096282315175, "eval_loss": 1.1133960485458374, "eval_macro_f1": 0.5443083566401578, "eval_macro_precision": 0.5641662348298077, "eval_macro_recall": 0.5481264782508153, "eval_runtime": 3.813, "eval_samples_per_second": 230.266, "eval_steps_per_second": 28.849, "step": 3630 }, { "epoch": 23.0, "eval_accuracy": 0.838050723399196, "eval_loss": 1.1350845098495483, "eval_macro_f1": 0.544210275681084, "eval_macro_precision": 0.5622845529634525, "eval_macro_recall": 0.5482077780076627, "eval_runtime": 3.8169, "eval_samples_per_second": 230.032, "eval_steps_per_second": 28.82, "step": 3795 }, { "epoch": 24.0, "eval_accuracy": 0.838050723399196, "eval_loss": 1.1593064069747925, "eval_macro_f1": 0.5416925123679783, "eval_macro_precision": 0.5680966862603153, "eval_macro_recall": 0.5448186956610225, "eval_runtime": 3.8224, "eval_samples_per_second": 229.699, "eval_steps_per_second": 28.778, "step": 3960 }, { "epoch": 24.242424242424242, "grad_norm": 0.9432898759841919, "learning_rate": 2.071363237498395e-05, "loss": 0.0149, "step": 4000 }, { "epoch": 25.0, "eval_accuracy": 0.8398712966196785, "eval_loss": 1.1621172428131104, "eval_macro_f1": 0.5446548032758927, "eval_macro_precision": 0.572924667254898, "eval_macro_recall": 0.5453082375999836, "eval_runtime": 3.8202, "eval_samples_per_second": 229.832, "eval_steps_per_second": 28.794, "step": 4125 } ], "logging_steps": 500, "max_steps": 5280, "num_input_tokens_seen": 0, "num_train_epochs": 32, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.001 }, "attributes": { "early_stopping_patience_counter": 3 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.745821766419731e+16, "train_batch_size": 16, "trial_name": null, "trial_params": { "learning_rate": 7.86794379743531e-05, "per_device_train_batch_size": 16, "warmup_ratio": 0.07903396276412193, "weight_decay": 0.06816454557507429 } }