| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 69.94276094276094, | |
| "global_step": 980, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.94, | |
| "eval_accuracy": 0.4674227882409832, | |
| "eval_loss": 2.8955132961273193, | |
| "eval_runtime": 18.7933, | |
| "eval_samples_per_second": 105.729, | |
| "eval_steps_per_second": 0.692, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "eval_accuracy": 0.46653443074701806, | |
| "eval_loss": 0.8548561334609985, | |
| "eval_runtime": 19.7558, | |
| "eval_samples_per_second": 100.578, | |
| "eval_steps_per_second": 0.658, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "eval_accuracy": 0.4693885992808441, | |
| "eval_loss": 0.4985657036304474, | |
| "eval_runtime": 17.6713, | |
| "eval_samples_per_second": 112.442, | |
| "eval_steps_per_second": 0.736, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 3.94, | |
| "eval_accuracy": 0.47050840246337, | |
| "eval_loss": 0.4260375499725342, | |
| "eval_runtime": 18.6, | |
| "eval_samples_per_second": 106.828, | |
| "eval_steps_per_second": 0.699, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 4.94, | |
| "eval_accuracy": 0.4770233523775381, | |
| "eval_loss": 0.28108683228492737, | |
| "eval_runtime": 20.1989, | |
| "eval_samples_per_second": 98.372, | |
| "eval_steps_per_second": 0.644, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 5.94, | |
| "eval_accuracy": 0.4891314089527132, | |
| "eval_loss": 0.14415042102336884, | |
| "eval_runtime": 17.7108, | |
| "eval_samples_per_second": 112.191, | |
| "eval_steps_per_second": 0.734, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 6.94, | |
| "eval_accuracy": 0.4952228625005786, | |
| "eval_loss": 0.07607654482126236, | |
| "eval_runtime": 20.5293, | |
| "eval_samples_per_second": 96.788, | |
| "eval_steps_per_second": 0.633, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 7.94, | |
| "eval_accuracy": 0.4971059440177199, | |
| "eval_loss": 0.044076837599277496, | |
| "eval_runtime": 19.5255, | |
| "eval_samples_per_second": 101.764, | |
| "eval_steps_per_second": 0.666, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 8.94, | |
| "eval_accuracy": 0.49830059772080165, | |
| "eval_loss": 0.028873631730675697, | |
| "eval_runtime": 18.1929, | |
| "eval_samples_per_second": 109.218, | |
| "eval_steps_per_second": 0.715, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 9.94, | |
| "eval_accuracy": 0.49892894814336236, | |
| "eval_loss": 0.02155212126672268, | |
| "eval_runtime": 21.2728, | |
| "eval_samples_per_second": 93.406, | |
| "eval_steps_per_second": 0.611, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 10.94, | |
| "eval_accuracy": 0.4993455503827718, | |
| "eval_loss": 0.01726018451154232, | |
| "eval_runtime": 18.282, | |
| "eval_samples_per_second": 108.686, | |
| "eval_steps_per_second": 0.711, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 11.94, | |
| "eval_accuracy": 0.49957699607133255, | |
| "eval_loss": 0.01476956345140934, | |
| "eval_runtime": 21.5099, | |
| "eval_samples_per_second": 92.376, | |
| "eval_steps_per_second": 0.604, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 12.94, | |
| "eval_accuracy": 0.49980450225881146, | |
| "eval_loss": 0.012352370657026768, | |
| "eval_runtime": 20.1169, | |
| "eval_samples_per_second": 98.773, | |
| "eval_steps_per_second": 0.646, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 13.94, | |
| "eval_accuracy": 0.4999542032999231, | |
| "eval_loss": 0.010818206705152988, | |
| "eval_runtime": 19.2177, | |
| "eval_samples_per_second": 103.395, | |
| "eval_steps_per_second": 0.676, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 14.94, | |
| "eval_accuracy": 0.5000457967000769, | |
| "eval_loss": 0.009764532558619976, | |
| "eval_runtime": 20.2058, | |
| "eval_samples_per_second": 98.338, | |
| "eval_steps_per_second": 0.643, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 15.94, | |
| "eval_accuracy": 0.5001354203496898, | |
| "eval_loss": 0.009084771387279034, | |
| "eval_runtime": 18.5782, | |
| "eval_samples_per_second": 106.953, | |
| "eval_steps_per_second": 0.7, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 16.94, | |
| "eval_accuracy": 0.5001925431153772, | |
| "eval_loss": 0.008187664672732353, | |
| "eval_runtime": 19.5572, | |
| "eval_samples_per_second": 101.599, | |
| "eval_steps_per_second": 0.665, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 17.94, | |
| "eval_accuracy": 0.500231938126196, | |
| "eval_loss": 0.008109861984848976, | |
| "eval_runtime": 18.7388, | |
| "eval_samples_per_second": 106.037, | |
| "eval_steps_per_second": 0.694, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 18.94, | |
| "eval_accuracy": 0.500286106266072, | |
| "eval_loss": 0.00722927413880825, | |
| "eval_runtime": 18.6903, | |
| "eval_samples_per_second": 106.312, | |
| "eval_steps_per_second": 0.696, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 19.94, | |
| "eval_accuracy": 0.5003156525241861, | |
| "eval_loss": 0.00708524277433753, | |
| "eval_runtime": 22.4973, | |
| "eval_samples_per_second": 88.322, | |
| "eval_steps_per_second": 0.578, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 20.94, | |
| "eval_accuracy": 0.5003461836575707, | |
| "eval_loss": 0.006836502812802792, | |
| "eval_runtime": 18.9858, | |
| "eval_samples_per_second": 104.657, | |
| "eval_steps_per_second": 0.685, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 21.94, | |
| "eval_accuracy": 0.5003905030447419, | |
| "eval_loss": 0.006470560096204281, | |
| "eval_runtime": 21.4277, | |
| "eval_samples_per_second": 92.73, | |
| "eval_steps_per_second": 0.607, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 22.94, | |
| "eval_accuracy": 0.5004102005501513, | |
| "eval_loss": 0.00611697556450963, | |
| "eval_runtime": 18.7742, | |
| "eval_samples_per_second": 105.837, | |
| "eval_steps_per_second": 0.692, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 23.94, | |
| "eval_accuracy": 0.5004239888039379, | |
| "eval_loss": 0.006002925336360931, | |
| "eval_runtime": 17.6523, | |
| "eval_samples_per_second": 112.563, | |
| "eval_steps_per_second": 0.736, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 24.94, | |
| "eval_accuracy": 0.5004466409351588, | |
| "eval_loss": 0.0059402757324278355, | |
| "eval_runtime": 20.2385, | |
| "eval_samples_per_second": 98.179, | |
| "eval_steps_per_second": 0.642, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 25.94, | |
| "eval_accuracy": 0.5004584594384044, | |
| "eval_loss": 0.005674673244357109, | |
| "eval_runtime": 18.6707, | |
| "eval_samples_per_second": 106.423, | |
| "eval_steps_per_second": 0.696, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 26.94, | |
| "eval_accuracy": 0.5004683081911091, | |
| "eval_loss": 0.0056230453774333, | |
| "eval_runtime": 19.1333, | |
| "eval_samples_per_second": 103.85, | |
| "eval_steps_per_second": 0.679, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 27.94, | |
| "eval_accuracy": 0.5004850510707072, | |
| "eval_loss": 0.005449134390801191, | |
| "eval_runtime": 19.2945, | |
| "eval_samples_per_second": 102.983, | |
| "eval_steps_per_second": 0.674, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 28.94, | |
| "eval_accuracy": 0.5004929300728709, | |
| "eval_loss": 0.005320119671523571, | |
| "eval_runtime": 20.0519, | |
| "eval_samples_per_second": 99.093, | |
| "eval_steps_per_second": 0.648, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 29.94, | |
| "eval_accuracy": 0.5005008090750347, | |
| "eval_loss": 0.005209068767726421, | |
| "eval_runtime": 19.9363, | |
| "eval_samples_per_second": 99.667, | |
| "eval_steps_per_second": 0.652, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 30.94, | |
| "eval_accuracy": 0.5005067183266575, | |
| "eval_loss": 0.005184635519981384, | |
| "eval_runtime": 19.4285, | |
| "eval_samples_per_second": 102.272, | |
| "eval_steps_per_second": 0.669, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 31.94, | |
| "eval_accuracy": 0.5005254309567965, | |
| "eval_loss": 0.004937352146953344, | |
| "eval_runtime": 21.1515, | |
| "eval_samples_per_second": 93.941, | |
| "eval_steps_per_second": 0.615, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 32.94, | |
| "eval_accuracy": 0.5005382343353126, | |
| "eval_loss": 0.004831444472074509, | |
| "eval_runtime": 19.1176, | |
| "eval_samples_per_second": 103.936, | |
| "eval_steps_per_second": 0.68, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 33.94, | |
| "eval_accuracy": 0.500543158711665, | |
| "eval_loss": 0.004661811515688896, | |
| "eval_runtime": 21.9936, | |
| "eval_samples_per_second": 90.345, | |
| "eval_steps_per_second": 0.591, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 34.94, | |
| "eval_accuracy": 0.5005480830880173, | |
| "eval_loss": 0.00474146893247962, | |
| "eval_runtime": 18.9478, | |
| "eval_samples_per_second": 104.867, | |
| "eval_steps_per_second": 0.686, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 35.67, | |
| "learning_rate": 2.448979591836735e-05, | |
| "loss": 2.3265, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 35.94, | |
| "eval_accuracy": 0.5005569469654516, | |
| "eval_loss": 0.004643740598112345, | |
| "eval_runtime": 18.6646, | |
| "eval_samples_per_second": 106.458, | |
| "eval_steps_per_second": 0.697, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 36.94, | |
| "eval_accuracy": 0.5005628562170744, | |
| "eval_loss": 0.00456605339422822, | |
| "eval_runtime": 18.4246, | |
| "eval_samples_per_second": 107.845, | |
| "eval_steps_per_second": 0.706, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 37.94, | |
| "eval_accuracy": 0.5005569469654516, | |
| "eval_loss": 0.0045891194604337215, | |
| "eval_runtime": 19.9985, | |
| "eval_samples_per_second": 99.358, | |
| "eval_steps_per_second": 0.65, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 38.94, | |
| "eval_accuracy": 0.5005677805934268, | |
| "eval_loss": 0.004413667134940624, | |
| "eval_runtime": 20.9756, | |
| "eval_samples_per_second": 94.729, | |
| "eval_steps_per_second": 0.62, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 39.94, | |
| "eval_accuracy": 0.5005717200945087, | |
| "eval_loss": 0.004356020595878363, | |
| "eval_runtime": 18.9086, | |
| "eval_samples_per_second": 105.084, | |
| "eval_steps_per_second": 0.688, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 40.94, | |
| "eval_accuracy": 0.5005707352192381, | |
| "eval_loss": 0.004351349081844091, | |
| "eval_runtime": 21.2375, | |
| "eval_samples_per_second": 93.561, | |
| "eval_steps_per_second": 0.612, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 41.94, | |
| "eval_accuracy": 0.5005727049697791, | |
| "eval_loss": 0.004294094629585743, | |
| "eval_runtime": 20.2798, | |
| "eval_samples_per_second": 97.979, | |
| "eval_steps_per_second": 0.641, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 42.94, | |
| "eval_accuracy": 0.5005795990966724, | |
| "eval_loss": 0.004292026627808809, | |
| "eval_runtime": 19.0207, | |
| "eval_samples_per_second": 104.465, | |
| "eval_steps_per_second": 0.683, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 43.94, | |
| "eval_accuracy": 0.5005815688472134, | |
| "eval_loss": 0.004189325030893087, | |
| "eval_runtime": 21.9849, | |
| "eval_samples_per_second": 90.38, | |
| "eval_steps_per_second": 0.591, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 44.94, | |
| "eval_accuracy": 0.5005815688472134, | |
| "eval_loss": 0.0041327630169689655, | |
| "eval_runtime": 19.2534, | |
| "eval_samples_per_second": 103.203, | |
| "eval_steps_per_second": 0.675, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 45.94, | |
| "eval_accuracy": 0.5005855083482952, | |
| "eval_loss": 0.004192625638097525, | |
| "eval_runtime": 21.5055, | |
| "eval_samples_per_second": 92.395, | |
| "eval_steps_per_second": 0.604, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 46.94, | |
| "eval_accuracy": 0.5005904327246475, | |
| "eval_loss": 0.004125718027353287, | |
| "eval_runtime": 19.7387, | |
| "eval_samples_per_second": 100.665, | |
| "eval_steps_per_second": 0.659, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 47.94, | |
| "eval_accuracy": 0.5005953571009999, | |
| "eval_loss": 0.0040009464137256145, | |
| "eval_runtime": 21.0892, | |
| "eval_samples_per_second": 94.219, | |
| "eval_steps_per_second": 0.616, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 48.94, | |
| "eval_accuracy": 0.5006002814773522, | |
| "eval_loss": 0.00396856851875782, | |
| "eval_runtime": 19.1717, | |
| "eval_samples_per_second": 103.643, | |
| "eval_steps_per_second": 0.678, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 49.94, | |
| "eval_accuracy": 0.5006032361031637, | |
| "eval_loss": 0.0039261928759515285, | |
| "eval_runtime": 19.9925, | |
| "eval_samples_per_second": 99.387, | |
| "eval_steps_per_second": 0.65, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 50.94, | |
| "eval_accuracy": 0.500610130230057, | |
| "eval_loss": 0.0038781561888754368, | |
| "eval_runtime": 22.1008, | |
| "eval_samples_per_second": 89.906, | |
| "eval_steps_per_second": 0.588, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 51.94, | |
| "eval_accuracy": 0.5006071756042456, | |
| "eval_loss": 0.003933804575353861, | |
| "eval_runtime": 17.351, | |
| "eval_samples_per_second": 114.518, | |
| "eval_steps_per_second": 0.749, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 52.94, | |
| "eval_accuracy": 0.500610130230057, | |
| "eval_loss": 0.003865364473313093, | |
| "eval_runtime": 20.5119, | |
| "eval_samples_per_second": 96.871, | |
| "eval_steps_per_second": 0.634, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 53.94, | |
| "eval_accuracy": 0.500612099980598, | |
| "eval_loss": 0.0038321653846651316, | |
| "eval_runtime": 17.3408, | |
| "eval_samples_per_second": 114.585, | |
| "eval_steps_per_second": 0.75, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 54.94, | |
| "eval_accuracy": 0.5006081604795161, | |
| "eval_loss": 0.003891468746587634, | |
| "eval_runtime": 19.3846, | |
| "eval_samples_per_second": 102.504, | |
| "eval_steps_per_second": 0.671, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 55.94, | |
| "eval_accuracy": 0.5006130848558684, | |
| "eval_loss": 0.0038119996897876263, | |
| "eval_runtime": 19.846, | |
| "eval_samples_per_second": 100.121, | |
| "eval_steps_per_second": 0.655, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 56.94, | |
| "eval_accuracy": 0.5006130848558684, | |
| "eval_loss": 0.003837888827547431, | |
| "eval_runtime": 18.0294, | |
| "eval_samples_per_second": 110.209, | |
| "eval_steps_per_second": 0.721, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 57.94, | |
| "eval_accuracy": 0.5006150546064094, | |
| "eval_loss": 0.0037844169419258833, | |
| "eval_runtime": 18.0119, | |
| "eval_samples_per_second": 110.316, | |
| "eval_steps_per_second": 0.722, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 58.94, | |
| "eval_accuracy": 0.5006160394816799, | |
| "eval_loss": 0.003779872553423047, | |
| "eval_runtime": 19.4256, | |
| "eval_samples_per_second": 102.288, | |
| "eval_steps_per_second": 0.669, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 59.94, | |
| "eval_accuracy": 0.5006170243569503, | |
| "eval_loss": 0.0038144837599247694, | |
| "eval_runtime": 18.4675, | |
| "eval_samples_per_second": 107.595, | |
| "eval_steps_per_second": 0.704, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 60.94, | |
| "eval_accuracy": 0.5006180092322208, | |
| "eval_loss": 0.0037365842144936323, | |
| "eval_runtime": 20.6911, | |
| "eval_samples_per_second": 96.031, | |
| "eval_steps_per_second": 0.628, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 61.94, | |
| "eval_accuracy": 0.5006209638580322, | |
| "eval_loss": 0.0037281711120158434, | |
| "eval_runtime": 17.8768, | |
| "eval_samples_per_second": 111.149, | |
| "eval_steps_per_second": 0.727, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 62.94, | |
| "eval_accuracy": 0.5006209638580322, | |
| "eval_loss": 0.003779030404984951, | |
| "eval_runtime": 18.2703, | |
| "eval_samples_per_second": 108.756, | |
| "eval_steps_per_second": 0.712, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 63.94, | |
| "eval_accuracy": 0.5006239184838436, | |
| "eval_loss": 0.0037230353336781263, | |
| "eval_runtime": 20.4452, | |
| "eval_samples_per_second": 97.187, | |
| "eval_steps_per_second": 0.636, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 64.94, | |
| "eval_accuracy": 0.5006249033591141, | |
| "eval_loss": 0.003699967870488763, | |
| "eval_runtime": 18.5245, | |
| "eval_samples_per_second": 107.264, | |
| "eval_steps_per_second": 0.702, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 65.94, | |
| "eval_accuracy": 0.5006249033591141, | |
| "eval_loss": 0.0036831670440733433, | |
| "eval_runtime": 17.7233, | |
| "eval_samples_per_second": 112.112, | |
| "eval_steps_per_second": 0.733, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 66.94, | |
| "eval_accuracy": 0.5006229336085731, | |
| "eval_loss": 0.0037006225902587175, | |
| "eval_runtime": 19.5885, | |
| "eval_samples_per_second": 101.437, | |
| "eval_steps_per_second": 0.664, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 67.94, | |
| "eval_accuracy": 0.5006258882343846, | |
| "eval_loss": 0.0036684926599264145, | |
| "eval_runtime": 19.3473, | |
| "eval_samples_per_second": 102.702, | |
| "eval_steps_per_second": 0.672, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 68.94, | |
| "eval_accuracy": 0.5006249033591141, | |
| "eval_loss": 0.003660534741356969, | |
| "eval_runtime": 19.8083, | |
| "eval_samples_per_second": 100.312, | |
| "eval_steps_per_second": 0.656, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 69.94, | |
| "eval_accuracy": 0.5006249033591141, | |
| "eval_loss": 0.0036585668567568064, | |
| "eval_runtime": 17.5658, | |
| "eval_samples_per_second": 113.118, | |
| "eval_steps_per_second": 0.74, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 69.94, | |
| "step": 980, | |
| "total_flos": 1.73221462278144e+17, | |
| "train_loss": 1.1901442605621961, | |
| "train_runtime": 21048.4328, | |
| "train_samples_per_second": 31.521, | |
| "train_steps_per_second": 0.047 | |
| } | |
| ], | |
| "max_steps": 980, | |
| "num_train_epochs": 70, | |
| "total_flos": 1.73221462278144e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |