DGSlow_DialoGPT-small_BST_70epoch / trainer_state.json
shenkha's picture
Upload 13 files
2a03df6 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 69.94276094276094,
"global_step": 980,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.94,
"eval_accuracy": 0.4674227882409832,
"eval_loss": 2.8955132961273193,
"eval_runtime": 18.7933,
"eval_samples_per_second": 105.729,
"eval_steps_per_second": 0.692,
"step": 14
},
{
"epoch": 1.94,
"eval_accuracy": 0.46653443074701806,
"eval_loss": 0.8548561334609985,
"eval_runtime": 19.7558,
"eval_samples_per_second": 100.578,
"eval_steps_per_second": 0.658,
"step": 28
},
{
"epoch": 2.94,
"eval_accuracy": 0.4693885992808441,
"eval_loss": 0.4985657036304474,
"eval_runtime": 17.6713,
"eval_samples_per_second": 112.442,
"eval_steps_per_second": 0.736,
"step": 42
},
{
"epoch": 3.94,
"eval_accuracy": 0.47050840246337,
"eval_loss": 0.4260375499725342,
"eval_runtime": 18.6,
"eval_samples_per_second": 106.828,
"eval_steps_per_second": 0.699,
"step": 56
},
{
"epoch": 4.94,
"eval_accuracy": 0.4770233523775381,
"eval_loss": 0.28108683228492737,
"eval_runtime": 20.1989,
"eval_samples_per_second": 98.372,
"eval_steps_per_second": 0.644,
"step": 70
},
{
"epoch": 5.94,
"eval_accuracy": 0.4891314089527132,
"eval_loss": 0.14415042102336884,
"eval_runtime": 17.7108,
"eval_samples_per_second": 112.191,
"eval_steps_per_second": 0.734,
"step": 84
},
{
"epoch": 6.94,
"eval_accuracy": 0.4952228625005786,
"eval_loss": 0.07607654482126236,
"eval_runtime": 20.5293,
"eval_samples_per_second": 96.788,
"eval_steps_per_second": 0.633,
"step": 98
},
{
"epoch": 7.94,
"eval_accuracy": 0.4971059440177199,
"eval_loss": 0.044076837599277496,
"eval_runtime": 19.5255,
"eval_samples_per_second": 101.764,
"eval_steps_per_second": 0.666,
"step": 112
},
{
"epoch": 8.94,
"eval_accuracy": 0.49830059772080165,
"eval_loss": 0.028873631730675697,
"eval_runtime": 18.1929,
"eval_samples_per_second": 109.218,
"eval_steps_per_second": 0.715,
"step": 126
},
{
"epoch": 9.94,
"eval_accuracy": 0.49892894814336236,
"eval_loss": 0.02155212126672268,
"eval_runtime": 21.2728,
"eval_samples_per_second": 93.406,
"eval_steps_per_second": 0.611,
"step": 140
},
{
"epoch": 10.94,
"eval_accuracy": 0.4993455503827718,
"eval_loss": 0.01726018451154232,
"eval_runtime": 18.282,
"eval_samples_per_second": 108.686,
"eval_steps_per_second": 0.711,
"step": 154
},
{
"epoch": 11.94,
"eval_accuracy": 0.49957699607133255,
"eval_loss": 0.01476956345140934,
"eval_runtime": 21.5099,
"eval_samples_per_second": 92.376,
"eval_steps_per_second": 0.604,
"step": 168
},
{
"epoch": 12.94,
"eval_accuracy": 0.49980450225881146,
"eval_loss": 0.012352370657026768,
"eval_runtime": 20.1169,
"eval_samples_per_second": 98.773,
"eval_steps_per_second": 0.646,
"step": 182
},
{
"epoch": 13.94,
"eval_accuracy": 0.4999542032999231,
"eval_loss": 0.010818206705152988,
"eval_runtime": 19.2177,
"eval_samples_per_second": 103.395,
"eval_steps_per_second": 0.676,
"step": 196
},
{
"epoch": 14.94,
"eval_accuracy": 0.5000457967000769,
"eval_loss": 0.009764532558619976,
"eval_runtime": 20.2058,
"eval_samples_per_second": 98.338,
"eval_steps_per_second": 0.643,
"step": 210
},
{
"epoch": 15.94,
"eval_accuracy": 0.5001354203496898,
"eval_loss": 0.009084771387279034,
"eval_runtime": 18.5782,
"eval_samples_per_second": 106.953,
"eval_steps_per_second": 0.7,
"step": 224
},
{
"epoch": 16.94,
"eval_accuracy": 0.5001925431153772,
"eval_loss": 0.008187664672732353,
"eval_runtime": 19.5572,
"eval_samples_per_second": 101.599,
"eval_steps_per_second": 0.665,
"step": 238
},
{
"epoch": 17.94,
"eval_accuracy": 0.500231938126196,
"eval_loss": 0.008109861984848976,
"eval_runtime": 18.7388,
"eval_samples_per_second": 106.037,
"eval_steps_per_second": 0.694,
"step": 252
},
{
"epoch": 18.94,
"eval_accuracy": 0.500286106266072,
"eval_loss": 0.00722927413880825,
"eval_runtime": 18.6903,
"eval_samples_per_second": 106.312,
"eval_steps_per_second": 0.696,
"step": 266
},
{
"epoch": 19.94,
"eval_accuracy": 0.5003156525241861,
"eval_loss": 0.00708524277433753,
"eval_runtime": 22.4973,
"eval_samples_per_second": 88.322,
"eval_steps_per_second": 0.578,
"step": 280
},
{
"epoch": 20.94,
"eval_accuracy": 0.5003461836575707,
"eval_loss": 0.006836502812802792,
"eval_runtime": 18.9858,
"eval_samples_per_second": 104.657,
"eval_steps_per_second": 0.685,
"step": 294
},
{
"epoch": 21.94,
"eval_accuracy": 0.5003905030447419,
"eval_loss": 0.006470560096204281,
"eval_runtime": 21.4277,
"eval_samples_per_second": 92.73,
"eval_steps_per_second": 0.607,
"step": 308
},
{
"epoch": 22.94,
"eval_accuracy": 0.5004102005501513,
"eval_loss": 0.00611697556450963,
"eval_runtime": 18.7742,
"eval_samples_per_second": 105.837,
"eval_steps_per_second": 0.692,
"step": 322
},
{
"epoch": 23.94,
"eval_accuracy": 0.5004239888039379,
"eval_loss": 0.006002925336360931,
"eval_runtime": 17.6523,
"eval_samples_per_second": 112.563,
"eval_steps_per_second": 0.736,
"step": 336
},
{
"epoch": 24.94,
"eval_accuracy": 0.5004466409351588,
"eval_loss": 0.0059402757324278355,
"eval_runtime": 20.2385,
"eval_samples_per_second": 98.179,
"eval_steps_per_second": 0.642,
"step": 350
},
{
"epoch": 25.94,
"eval_accuracy": 0.5004584594384044,
"eval_loss": 0.005674673244357109,
"eval_runtime": 18.6707,
"eval_samples_per_second": 106.423,
"eval_steps_per_second": 0.696,
"step": 364
},
{
"epoch": 26.94,
"eval_accuracy": 0.5004683081911091,
"eval_loss": 0.0056230453774333,
"eval_runtime": 19.1333,
"eval_samples_per_second": 103.85,
"eval_steps_per_second": 0.679,
"step": 378
},
{
"epoch": 27.94,
"eval_accuracy": 0.5004850510707072,
"eval_loss": 0.005449134390801191,
"eval_runtime": 19.2945,
"eval_samples_per_second": 102.983,
"eval_steps_per_second": 0.674,
"step": 392
},
{
"epoch": 28.94,
"eval_accuracy": 0.5004929300728709,
"eval_loss": 0.005320119671523571,
"eval_runtime": 20.0519,
"eval_samples_per_second": 99.093,
"eval_steps_per_second": 0.648,
"step": 406
},
{
"epoch": 29.94,
"eval_accuracy": 0.5005008090750347,
"eval_loss": 0.005209068767726421,
"eval_runtime": 19.9363,
"eval_samples_per_second": 99.667,
"eval_steps_per_second": 0.652,
"step": 420
},
{
"epoch": 30.94,
"eval_accuracy": 0.5005067183266575,
"eval_loss": 0.005184635519981384,
"eval_runtime": 19.4285,
"eval_samples_per_second": 102.272,
"eval_steps_per_second": 0.669,
"step": 434
},
{
"epoch": 31.94,
"eval_accuracy": 0.5005254309567965,
"eval_loss": 0.004937352146953344,
"eval_runtime": 21.1515,
"eval_samples_per_second": 93.941,
"eval_steps_per_second": 0.615,
"step": 448
},
{
"epoch": 32.94,
"eval_accuracy": 0.5005382343353126,
"eval_loss": 0.004831444472074509,
"eval_runtime": 19.1176,
"eval_samples_per_second": 103.936,
"eval_steps_per_second": 0.68,
"step": 462
},
{
"epoch": 33.94,
"eval_accuracy": 0.500543158711665,
"eval_loss": 0.004661811515688896,
"eval_runtime": 21.9936,
"eval_samples_per_second": 90.345,
"eval_steps_per_second": 0.591,
"step": 476
},
{
"epoch": 34.94,
"eval_accuracy": 0.5005480830880173,
"eval_loss": 0.00474146893247962,
"eval_runtime": 18.9478,
"eval_samples_per_second": 104.867,
"eval_steps_per_second": 0.686,
"step": 490
},
{
"epoch": 35.67,
"learning_rate": 2.448979591836735e-05,
"loss": 2.3265,
"step": 500
},
{
"epoch": 35.94,
"eval_accuracy": 0.5005569469654516,
"eval_loss": 0.004643740598112345,
"eval_runtime": 18.6646,
"eval_samples_per_second": 106.458,
"eval_steps_per_second": 0.697,
"step": 504
},
{
"epoch": 36.94,
"eval_accuracy": 0.5005628562170744,
"eval_loss": 0.00456605339422822,
"eval_runtime": 18.4246,
"eval_samples_per_second": 107.845,
"eval_steps_per_second": 0.706,
"step": 518
},
{
"epoch": 37.94,
"eval_accuracy": 0.5005569469654516,
"eval_loss": 0.0045891194604337215,
"eval_runtime": 19.9985,
"eval_samples_per_second": 99.358,
"eval_steps_per_second": 0.65,
"step": 532
},
{
"epoch": 38.94,
"eval_accuracy": 0.5005677805934268,
"eval_loss": 0.004413667134940624,
"eval_runtime": 20.9756,
"eval_samples_per_second": 94.729,
"eval_steps_per_second": 0.62,
"step": 546
},
{
"epoch": 39.94,
"eval_accuracy": 0.5005717200945087,
"eval_loss": 0.004356020595878363,
"eval_runtime": 18.9086,
"eval_samples_per_second": 105.084,
"eval_steps_per_second": 0.688,
"step": 560
},
{
"epoch": 40.94,
"eval_accuracy": 0.5005707352192381,
"eval_loss": 0.004351349081844091,
"eval_runtime": 21.2375,
"eval_samples_per_second": 93.561,
"eval_steps_per_second": 0.612,
"step": 574
},
{
"epoch": 41.94,
"eval_accuracy": 0.5005727049697791,
"eval_loss": 0.004294094629585743,
"eval_runtime": 20.2798,
"eval_samples_per_second": 97.979,
"eval_steps_per_second": 0.641,
"step": 588
},
{
"epoch": 42.94,
"eval_accuracy": 0.5005795990966724,
"eval_loss": 0.004292026627808809,
"eval_runtime": 19.0207,
"eval_samples_per_second": 104.465,
"eval_steps_per_second": 0.683,
"step": 602
},
{
"epoch": 43.94,
"eval_accuracy": 0.5005815688472134,
"eval_loss": 0.004189325030893087,
"eval_runtime": 21.9849,
"eval_samples_per_second": 90.38,
"eval_steps_per_second": 0.591,
"step": 616
},
{
"epoch": 44.94,
"eval_accuracy": 0.5005815688472134,
"eval_loss": 0.0041327630169689655,
"eval_runtime": 19.2534,
"eval_samples_per_second": 103.203,
"eval_steps_per_second": 0.675,
"step": 630
},
{
"epoch": 45.94,
"eval_accuracy": 0.5005855083482952,
"eval_loss": 0.004192625638097525,
"eval_runtime": 21.5055,
"eval_samples_per_second": 92.395,
"eval_steps_per_second": 0.604,
"step": 644
},
{
"epoch": 46.94,
"eval_accuracy": 0.5005904327246475,
"eval_loss": 0.004125718027353287,
"eval_runtime": 19.7387,
"eval_samples_per_second": 100.665,
"eval_steps_per_second": 0.659,
"step": 658
},
{
"epoch": 47.94,
"eval_accuracy": 0.5005953571009999,
"eval_loss": 0.0040009464137256145,
"eval_runtime": 21.0892,
"eval_samples_per_second": 94.219,
"eval_steps_per_second": 0.616,
"step": 672
},
{
"epoch": 48.94,
"eval_accuracy": 0.5006002814773522,
"eval_loss": 0.00396856851875782,
"eval_runtime": 19.1717,
"eval_samples_per_second": 103.643,
"eval_steps_per_second": 0.678,
"step": 686
},
{
"epoch": 49.94,
"eval_accuracy": 0.5006032361031637,
"eval_loss": 0.0039261928759515285,
"eval_runtime": 19.9925,
"eval_samples_per_second": 99.387,
"eval_steps_per_second": 0.65,
"step": 700
},
{
"epoch": 50.94,
"eval_accuracy": 0.500610130230057,
"eval_loss": 0.0038781561888754368,
"eval_runtime": 22.1008,
"eval_samples_per_second": 89.906,
"eval_steps_per_second": 0.588,
"step": 714
},
{
"epoch": 51.94,
"eval_accuracy": 0.5006071756042456,
"eval_loss": 0.003933804575353861,
"eval_runtime": 17.351,
"eval_samples_per_second": 114.518,
"eval_steps_per_second": 0.749,
"step": 728
},
{
"epoch": 52.94,
"eval_accuracy": 0.500610130230057,
"eval_loss": 0.003865364473313093,
"eval_runtime": 20.5119,
"eval_samples_per_second": 96.871,
"eval_steps_per_second": 0.634,
"step": 742
},
{
"epoch": 53.94,
"eval_accuracy": 0.500612099980598,
"eval_loss": 0.0038321653846651316,
"eval_runtime": 17.3408,
"eval_samples_per_second": 114.585,
"eval_steps_per_second": 0.75,
"step": 756
},
{
"epoch": 54.94,
"eval_accuracy": 0.5006081604795161,
"eval_loss": 0.003891468746587634,
"eval_runtime": 19.3846,
"eval_samples_per_second": 102.504,
"eval_steps_per_second": 0.671,
"step": 770
},
{
"epoch": 55.94,
"eval_accuracy": 0.5006130848558684,
"eval_loss": 0.0038119996897876263,
"eval_runtime": 19.846,
"eval_samples_per_second": 100.121,
"eval_steps_per_second": 0.655,
"step": 784
},
{
"epoch": 56.94,
"eval_accuracy": 0.5006130848558684,
"eval_loss": 0.003837888827547431,
"eval_runtime": 18.0294,
"eval_samples_per_second": 110.209,
"eval_steps_per_second": 0.721,
"step": 798
},
{
"epoch": 57.94,
"eval_accuracy": 0.5006150546064094,
"eval_loss": 0.0037844169419258833,
"eval_runtime": 18.0119,
"eval_samples_per_second": 110.316,
"eval_steps_per_second": 0.722,
"step": 812
},
{
"epoch": 58.94,
"eval_accuracy": 0.5006160394816799,
"eval_loss": 0.003779872553423047,
"eval_runtime": 19.4256,
"eval_samples_per_second": 102.288,
"eval_steps_per_second": 0.669,
"step": 826
},
{
"epoch": 59.94,
"eval_accuracy": 0.5006170243569503,
"eval_loss": 0.0038144837599247694,
"eval_runtime": 18.4675,
"eval_samples_per_second": 107.595,
"eval_steps_per_second": 0.704,
"step": 840
},
{
"epoch": 60.94,
"eval_accuracy": 0.5006180092322208,
"eval_loss": 0.0037365842144936323,
"eval_runtime": 20.6911,
"eval_samples_per_second": 96.031,
"eval_steps_per_second": 0.628,
"step": 854
},
{
"epoch": 61.94,
"eval_accuracy": 0.5006209638580322,
"eval_loss": 0.0037281711120158434,
"eval_runtime": 17.8768,
"eval_samples_per_second": 111.149,
"eval_steps_per_second": 0.727,
"step": 868
},
{
"epoch": 62.94,
"eval_accuracy": 0.5006209638580322,
"eval_loss": 0.003779030404984951,
"eval_runtime": 18.2703,
"eval_samples_per_second": 108.756,
"eval_steps_per_second": 0.712,
"step": 882
},
{
"epoch": 63.94,
"eval_accuracy": 0.5006239184838436,
"eval_loss": 0.0037230353336781263,
"eval_runtime": 20.4452,
"eval_samples_per_second": 97.187,
"eval_steps_per_second": 0.636,
"step": 896
},
{
"epoch": 64.94,
"eval_accuracy": 0.5006249033591141,
"eval_loss": 0.003699967870488763,
"eval_runtime": 18.5245,
"eval_samples_per_second": 107.264,
"eval_steps_per_second": 0.702,
"step": 910
},
{
"epoch": 65.94,
"eval_accuracy": 0.5006249033591141,
"eval_loss": 0.0036831670440733433,
"eval_runtime": 17.7233,
"eval_samples_per_second": 112.112,
"eval_steps_per_second": 0.733,
"step": 924
},
{
"epoch": 66.94,
"eval_accuracy": 0.5006229336085731,
"eval_loss": 0.0037006225902587175,
"eval_runtime": 19.5885,
"eval_samples_per_second": 101.437,
"eval_steps_per_second": 0.664,
"step": 938
},
{
"epoch": 67.94,
"eval_accuracy": 0.5006258882343846,
"eval_loss": 0.0036684926599264145,
"eval_runtime": 19.3473,
"eval_samples_per_second": 102.702,
"eval_steps_per_second": 0.672,
"step": 952
},
{
"epoch": 68.94,
"eval_accuracy": 0.5006249033591141,
"eval_loss": 0.003660534741356969,
"eval_runtime": 19.8083,
"eval_samples_per_second": 100.312,
"eval_steps_per_second": 0.656,
"step": 966
},
{
"epoch": 69.94,
"eval_accuracy": 0.5006249033591141,
"eval_loss": 0.0036585668567568064,
"eval_runtime": 17.5658,
"eval_samples_per_second": 113.118,
"eval_steps_per_second": 0.74,
"step": 980
},
{
"epoch": 69.94,
"step": 980,
"total_flos": 1.73221462278144e+17,
"train_loss": 1.1901442605621961,
"train_runtime": 21048.4328,
"train_samples_per_second": 31.521,
"train_steps_per_second": 0.047
}
],
"max_steps": 980,
"num_train_epochs": 70,
"total_flos": 1.73221462278144e+17,
"trial_name": null,
"trial_params": null
}