{ "best_metric": 0.8028125762939453, "best_model_checkpoint": "./llama3/26-07-24-Weni-WeniGPT-Agents-Llama3-5.1.24-SFT_Experiment with DPO and Llama3 70b-2_max_steps-32_batch_8_2024-07-26/checkpoint-32", "epoch": 28.444444444444443, "eval_steps": 2, "global_step": 32, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.7777777777777777, "eval_bleu": 0.02170523150063329, "eval_loss": 1.5546669960021973, "eval_rouge1": 0.5658290942329046, "eval_rouge2": 0.371216207944968, "eval_rougeL": 0.5544380522179795, "eval_rougeLsum": 0.557866697701094, "eval_runtime": 8.903, "eval_samples_per_second": 0.899, "eval_steps_per_second": 0.112, "step": 2 }, { "epoch": 3.5555555555555554, "eval_bleu": 0.016226404911743514, "eval_loss": 1.2373424768447876, "eval_rouge1": 0.1880159013464857, "eval_rouge2": 0.110501293225719, "eval_rougeL": 0.16774584781013288, "eval_rougeLsum": 0.18374679825109685, "eval_runtime": 7.2666, "eval_samples_per_second": 1.101, "eval_steps_per_second": 0.138, "step": 4 }, { "epoch": 5.333333333333333, "eval_bleu": 0.023706516954741297, "eval_loss": 1.123119592666626, "eval_rouge1": 0.6910290907363565, "eval_rouge2": 0.4711964975736579, "eval_rougeL": 0.673625245352441, "eval_rougeLsum": 0.6773221910352027, "eval_runtime": 5.1525, "eval_samples_per_second": 1.553, "eval_steps_per_second": 0.194, "step": 6 }, { "epoch": 7.111111111111111, "eval_bleu": 0.02402168983627918, "eval_loss": 1.0689845085144043, "eval_rouge1": 0.6850615792026293, "eval_rouge2": 0.4793755526234388, "eval_rougeL": 0.6702061146380454, "eval_rougeLsum": 0.6743158691399876, "eval_runtime": 3.5693, "eval_samples_per_second": 2.241, "eval_steps_per_second": 0.28, "step": 8 }, { "epoch": 8.88888888888889, "grad_norm": 0.07844142016839124, "learning_rate": 0.00015555702330196023, "loss": 1.3804, "step": 10 }, { "epoch": 8.88888888888889, "eval_bleu": 0.023887172871334808, "eval_loss": 1.0223946571350098, "eval_rouge1": 0.6905898370545817, "eval_rouge2": 0.48557691609082443, "eval_rougeL": 0.6761180651666854, "eval_rougeLsum": 0.6790446004304602, "eval_runtime": 3.5677, "eval_samples_per_second": 2.242, "eval_steps_per_second": 0.28, "step": 10 }, { "epoch": 10.666666666666666, "eval_bleu": 0.024572108418943618, "eval_loss": 0.9860392808914185, "eval_rouge1": 0.7026862650772006, "eval_rouge2": 0.5047043820859161, "eval_rougeL": 0.6864786779541874, "eval_rougeLsum": 0.6905060789723064, "eval_runtime": 3.3868, "eval_samples_per_second": 2.362, "eval_steps_per_second": 0.295, "step": 12 }, { "epoch": 12.444444444444445, "eval_bleu": 0.025429922278916713, "eval_loss": 0.9487287998199463, "eval_rouge1": 0.702795109607377, "eval_rouge2": 0.5129806168696595, "eval_rougeL": 0.6895042238397062, "eval_rougeLsum": 0.6947769064530469, "eval_runtime": 5.1904, "eval_samples_per_second": 1.541, "eval_steps_per_second": 0.193, "step": 14 }, { "epoch": 14.222222222222221, "eval_bleu": 0.025784195587265114, "eval_loss": 0.9159524440765381, "eval_rouge1": 0.7050102641077431, "eval_rouge2": 0.5179038464865816, "eval_rougeL": 0.6926453468372398, "eval_rougeLsum": 0.6956999803401138, "eval_runtime": 3.0778, "eval_samples_per_second": 2.599, "eval_steps_per_second": 0.325, "step": 16 }, { "epoch": 16.0, "eval_bleu": 0.026025861429164285, "eval_loss": 0.8864424228668213, "eval_rouge1": 0.705898212199683, "eval_rouge2": 0.5170955130574988, "eval_rougeL": 0.6904453277300169, "eval_rougeLsum": 0.6940204043388014, "eval_runtime": 2.9696, "eval_samples_per_second": 2.694, "eval_steps_per_second": 0.337, "step": 18 }, { "epoch": 17.77777777777778, "grad_norm": 0.07718723263288825, "learning_rate": 6.173165676349103e-05, "loss": 0.9654, "step": 20 }, { "epoch": 17.77777777777778, "eval_bleu": 0.02596132515691949, "eval_loss": 0.8659763336181641, "eval_rouge1": 0.7122588553468148, "eval_rouge2": 0.5211107654415348, "eval_rougeL": 0.6941911179368457, "eval_rougeLsum": 0.6984106609696767, "eval_runtime": 4.8517, "eval_samples_per_second": 1.649, "eval_steps_per_second": 0.206, "step": 20 }, { "epoch": 19.555555555555557, "eval_bleu": 0.026343897948467077, "eval_loss": 0.8515705466270447, "eval_rouge1": 0.7170480778501949, "eval_rouge2": 0.5292045209358929, "eval_rougeL": 0.6998572077319152, "eval_rougeLsum": 0.7024199215687508, "eval_runtime": 3.1988, "eval_samples_per_second": 2.501, "eval_steps_per_second": 0.313, "step": 22 }, { "epoch": 21.333333333333332, "eval_bleu": 0.025984446331357137, "eval_loss": 0.831386923789978, "eval_rouge1": 0.7153236968867391, "eval_rouge2": 0.5193671791233874, "eval_rougeL": 0.6965103363468828, "eval_rougeLsum": 0.6985595214525613, "eval_runtime": 3.4937, "eval_samples_per_second": 2.29, "eval_steps_per_second": 0.286, "step": 24 }, { "epoch": 23.11111111111111, "eval_bleu": 0.02650924667302787, "eval_loss": 0.8183804750442505, "eval_rouge1": 0.7231112511482293, "eval_rouge2": 0.5345562806445456, "eval_rougeL": 0.7030100294534072, "eval_rougeLsum": 0.7054157022984199, "eval_runtime": 6.0566, "eval_samples_per_second": 1.321, "eval_steps_per_second": 0.165, "step": 26 }, { "epoch": 24.88888888888889, "eval_bleu": 0.02615320831075556, "eval_loss": 0.8091402649879456, "eval_rouge1": 0.7207310641325595, "eval_rouge2": 0.5306337690270762, "eval_rougeL": 0.7021914980969189, "eval_rougeLsum": 0.70505062098815, "eval_runtime": 3.5767, "eval_samples_per_second": 2.237, "eval_steps_per_second": 0.28, "step": 28 }, { "epoch": 26.666666666666668, "grad_norm": 0.08878053905359502, "learning_rate": 1.921471959676957e-06, "loss": 0.7862, "step": 30 }, { "epoch": 26.666666666666668, "eval_bleu": 0.026398236337588678, "eval_loss": 0.8041359186172485, "eval_rouge1": 0.72570713977056, "eval_rouge2": 0.5397446508715937, "eval_rougeL": 0.7070571025409038, "eval_rougeLsum": 0.7093496693847328, "eval_runtime": 3.4925, "eval_samples_per_second": 2.291, "eval_steps_per_second": 0.286, "step": 30 }, { "epoch": 28.444444444444443, "eval_bleu": 0.026431275243775837, "eval_loss": 0.8028125762939453, "eval_rouge1": 0.7266203046783699, "eval_rouge2": 0.5395778050172039, "eval_rougeL": 0.7073728737550329, "eval_rougeLsum": 0.7103352101315058, "eval_runtime": 4.8603, "eval_samples_per_second": 1.646, "eval_steps_per_second": 0.206, "step": 32 } ], "logging_steps": 10, "max_steps": 32, "num_input_tokens_seen": 0, "num_train_epochs": 32, "save_steps": 2, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 23047781220352.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }