{ "best_metric": null, "best_model_checkpoint": null, "epoch": 30.0, "eval_steps": 500, "global_step": 9390, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_loss": 35.253448486328125, "eval_mse_0": 27.93633460998535, "eval_mse_1": 40.281654357910156, "eval_mse_2": 37.096221923828125, "eval_mse_3": 35.69955825805664, "eval_pcc_0": -0.12943421939487107, "eval_pcc_1": 0.11087851067313088, "eval_pcc_2": -0.01677494886701865, "eval_pcc_3": -0.06531007486316416, "eval_runtime": 125.3927, "eval_samples_per_second": 19.937, "eval_steps_per_second": 2.496, "step": 313 }, { "epoch": 1.6, "learning_rate": 9.467518636847711e-06, "loss": 39.9026, "step": 500 }, { "epoch": 2.0, "eval_loss": 24.858530044555664, "eval_mse_0": 16.21411895751953, "eval_mse_1": 29.686359405517578, "eval_mse_2": 26.701322555541992, "eval_mse_3": 26.832250595092773, "eval_pcc_0": -0.12953277782821082, "eval_pcc_1": 0.15263354753596575, "eval_pcc_2": -0.012667331337741387, "eval_pcc_3": -0.1740250623680245, "eval_runtime": 119.1615, "eval_samples_per_second": 20.98, "eval_steps_per_second": 2.627, "step": 626 }, { "epoch": 3.0, "eval_loss": 15.985442161560059, "eval_mse_0": 8.285962104797363, "eval_mse_1": 19.734556198120117, "eval_mse_2": 17.5140323638916, "eval_mse_3": 18.40717124938965, "eval_pcc_0": -0.046687682567983446, "eval_pcc_1": 0.2051045936246996, "eval_pcc_2": -0.026585957250347247, "eval_pcc_3": -0.17914015544544384, "eval_runtime": 120.0286, "eval_samples_per_second": 20.828, "eval_steps_per_second": 2.608, "step": 939 }, { "epoch": 3.19, "learning_rate": 8.935037273695422e-06, "loss": 21.5881, "step": 1000 }, { "epoch": 4.0, "eval_loss": 9.054893493652344, "eval_mse_0": 4.047198295593262, "eval_mse_1": 11.194503784179688, "eval_mse_2": 9.912358283996582, "eval_mse_3": 11.065492630004883, "eval_pcc_0": 0.028015550873066734, "eval_pcc_1": 0.20084089515908607, "eval_pcc_2": -0.09479908506417078, "eval_pcc_3": -0.23376913241158687, "eval_runtime": 124.6145, "eval_samples_per_second": 20.062, "eval_steps_per_second": 2.512, "step": 1252 }, { "epoch": 4.79, "learning_rate": 8.402555910543132e-06, "loss": 9.6588, "step": 1500 }, { "epoch": 5.0, "eval_loss": 4.534116268157959, "eval_mse_0": 2.5608792304992676, "eval_mse_1": 5.271782875061035, "eval_mse_2": 4.763275623321533, "eval_mse_3": 5.540524482727051, "eval_pcc_0": 0.18326387925089216, "eval_pcc_1": 0.29440170760540424, "eval_pcc_2": -0.023020681634067185, "eval_pcc_3": -0.18361875818361142, "eval_runtime": 123.2269, "eval_samples_per_second": 20.288, "eval_steps_per_second": 2.54, "step": 1565 }, { "epoch": 6.0, "eval_loss": 3.9766886234283447, "eval_mse_0": 3.775294065475464, "eval_mse_1": 4.050389289855957, "eval_mse_2": 3.85265851020813, "eval_mse_3": 4.228421688079834, "eval_pcc_0": 0.10950709526046151, "eval_pcc_1": 0.17878205237621464, "eval_pcc_2": 0.15411864066632502, "eval_pcc_3": 0.11296732255791778, "eval_runtime": 123.3863, "eval_samples_per_second": 20.262, "eval_steps_per_second": 2.537, "step": 1878 }, { "epoch": 6.39, "learning_rate": 7.870074547390843e-06, "loss": 3.4997, "step": 2000 }, { "epoch": 7.0, "eval_loss": 1.9719064235687256, "eval_mse_0": 2.0252020359039307, "eval_mse_1": 1.9209357500076294, "eval_mse_2": 1.8631107807159424, "eval_mse_3": 2.078376054763794, "eval_pcc_0": 0.4697406407291476, "eval_pcc_1": 0.4714825247758767, "eval_pcc_2": 0.46683720988296845, "eval_pcc_3": 0.47567376150906243, "eval_runtime": 123.8915, "eval_samples_per_second": 20.179, "eval_steps_per_second": 2.526, "step": 2191 }, { "epoch": 7.99, "learning_rate": 7.337593184238552e-06, "loss": 1.5435, "step": 2500 }, { "epoch": 8.0, "eval_loss": 1.7928707599639893, "eval_mse_0": 1.9913127422332764, "eval_mse_1": 1.6557341814041138, "eval_mse_2": 1.6641719341278076, "eval_mse_3": 1.8602606058120728, "eval_pcc_0": 0.49067170864681886, "eval_pcc_1": 0.48233546636694913, "eval_pcc_2": 0.47482439405251997, "eval_pcc_3": 0.48934525150382063, "eval_runtime": 123.1617, "eval_samples_per_second": 20.299, "eval_steps_per_second": 2.541, "step": 2504 }, { "epoch": 9.0, "eval_loss": 1.7764745950698853, "eval_mse_0": 1.91068434715271, "eval_mse_1": 1.6419528722763062, "eval_mse_2": 1.6560568809509277, "eval_mse_3": 1.8972089290618896, "eval_pcc_0": 0.4966193508589803, "eval_pcc_1": 0.4926786631014638, "eval_pcc_2": 0.4863374398117205, "eval_pcc_3": 0.49518858002390853, "eval_runtime": 118.3567, "eval_samples_per_second": 21.123, "eval_steps_per_second": 2.645, "step": 2817 }, { "epoch": 9.58, "learning_rate": 6.805111821086262e-06, "loss": 1.1265, "step": 3000 }, { "epoch": 10.0, "eval_loss": 1.501739263534546, "eval_mse_0": 1.6362265348434448, "eval_mse_1": 1.361143708229065, "eval_mse_2": 1.3952668905258179, "eval_mse_3": 1.614320158958435, "eval_pcc_0": 0.5806262984037475, "eval_pcc_1": 0.5939864858919732, "eval_pcc_2": 0.5797502168628982, "eval_pcc_3": 0.5833515543731059, "eval_runtime": 117.2862, "eval_samples_per_second": 21.315, "eval_steps_per_second": 2.669, "step": 3130 }, { "epoch": 11.0, "eval_loss": 1.3959943056106567, "eval_mse_0": 1.5413020849227905, "eval_mse_1": 1.2384960651397705, "eval_mse_2": 1.28584885597229, "eval_mse_3": 1.5183273553848267, "eval_pcc_0": 0.6110165037907326, "eval_pcc_1": 0.6366207620213354, "eval_pcc_2": 0.6233864115342613, "eval_pcc_3": 0.6147175651996913, "eval_runtime": 122.9223, "eval_samples_per_second": 20.338, "eval_steps_per_second": 2.546, "step": 3443 }, { "epoch": 11.18, "learning_rate": 6.272630457933972e-06, "loss": 0.9846, "step": 3500 }, { "epoch": 12.0, "eval_loss": 1.2620220184326172, "eval_mse_0": 1.4125484228134155, "eval_mse_1": 1.0894999504089355, "eval_mse_2": 1.1511507034301758, "eval_mse_3": 1.3948848247528076, "eval_pcc_0": 0.6386613163271186, "eval_pcc_1": 0.6823474627630972, "eval_pcc_2": 0.6630484829779204, "eval_pcc_3": 0.6485774820429591, "eval_runtime": 123.5626, "eval_samples_per_second": 20.233, "eval_steps_per_second": 2.533, "step": 3756 }, { "epoch": 12.78, "learning_rate": 5.7401490947816826e-06, "loss": 0.922, "step": 4000 }, { "epoch": 13.0, "eval_loss": 1.2597845792770386, "eval_mse_0": 1.4059348106384277, "eval_mse_1": 1.0759116411209106, "eval_mse_2": 1.1540552377700806, "eval_mse_3": 1.403237223625183, "eval_pcc_0": 0.6445167475467969, "eval_pcc_1": 0.6877805024993177, "eval_pcc_2": 0.6675128211257984, "eval_pcc_3": 0.6496152231146246, "eval_runtime": 124.9289, "eval_samples_per_second": 20.011, "eval_steps_per_second": 2.505, "step": 4069 }, { "epoch": 14.0, "eval_loss": 1.286251187324524, "eval_mse_0": 1.466301441192627, "eval_mse_1": 1.1047468185424805, "eval_mse_2": 1.1404074430465698, "eval_mse_3": 1.4335484504699707, "eval_pcc_0": 0.6362665135071693, "eval_pcc_1": 0.6924939693575668, "eval_pcc_2": 0.6764887019377555, "eval_pcc_3": 0.6482415782472952, "eval_runtime": 120.8305, "eval_samples_per_second": 20.69, "eval_steps_per_second": 2.59, "step": 4382 }, { "epoch": 14.38, "learning_rate": 5.207667731629393e-06, "loss": 0.8126, "step": 4500 }, { "epoch": 15.0, "eval_loss": 1.1709247827529907, "eval_mse_0": 1.3320404291152954, "eval_mse_1": 0.9946478605270386, "eval_mse_2": 1.050872564315796, "eval_mse_3": 1.3061367273330688, "eval_pcc_0": 0.6633085453601341, "eval_pcc_1": 0.7144007675516612, "eval_pcc_2": 0.699356776928208, "eval_pcc_3": 0.6753245217035396, "eval_runtime": 121.0479, "eval_samples_per_second": 20.653, "eval_steps_per_second": 2.586, "step": 4695 }, { "epoch": 15.97, "learning_rate": 4.675186368477103e-06, "loss": 0.7783, "step": 5000 }, { "epoch": 16.0, "eval_loss": 1.2495259046554565, "eval_mse_0": 1.4424058198928833, "eval_mse_1": 1.0512551069259644, "eval_mse_2": 1.1075762510299683, "eval_mse_3": 1.3968638181686401, "eval_pcc_0": 0.6352550449549746, "eval_pcc_1": 0.6967280653723789, "eval_pcc_2": 0.6792741476546669, "eval_pcc_3": 0.64945214920132, "eval_runtime": 121.0064, "eval_samples_per_second": 20.66, "eval_steps_per_second": 2.587, "step": 5008 }, { "epoch": 17.0, "eval_loss": 1.4459205865859985, "eval_mse_0": 1.6440175771713257, "eval_mse_1": 1.267660140991211, "eval_mse_2": 1.279537558555603, "eval_mse_3": 1.592469334602356, "eval_pcc_0": 0.6292826434289844, "eval_pcc_1": 0.6901424064792703, "eval_pcc_2": 0.6738298495150237, "eval_pcc_3": 0.6447228899623312, "eval_runtime": 114.6109, "eval_samples_per_second": 21.813, "eval_steps_per_second": 2.731, "step": 5321 }, { "epoch": 17.57, "learning_rate": 4.142705005324814e-06, "loss": 0.7406, "step": 5500 }, { "epoch": 18.0, "eval_loss": 1.2125182151794434, "eval_mse_0": 1.3871902227401733, "eval_mse_1": 1.0241502523422241, "eval_mse_2": 1.084183692932129, "eval_mse_3": 1.3545498847961426, "eval_pcc_0": 0.648592932654662, "eval_pcc_1": 0.7049778702998659, "eval_pcc_2": 0.6896955826045925, "eval_pcc_3": 0.6621401050383381, "eval_runtime": 119.8271, "eval_samples_per_second": 20.863, "eval_steps_per_second": 2.612, "step": 5634 }, { "epoch": 19.0, "eval_loss": 1.2266896963119507, "eval_mse_0": 1.3873472213745117, "eval_mse_1": 1.039366364479065, "eval_mse_2": 1.1103386878967285, "eval_mse_3": 1.369706630706787, "eval_pcc_0": 0.6531011068847803, "eval_pcc_1": 0.7109674417540542, "eval_pcc_2": 0.6965897285366162, "eval_pcc_3": 0.6668592560212646, "eval_runtime": 120.6159, "eval_samples_per_second": 20.727, "eval_steps_per_second": 2.595, "step": 5947 }, { "epoch": 19.17, "learning_rate": 3.6102236421725247e-06, "loss": 0.727, "step": 6000 }, { "epoch": 20.0, "eval_loss": 1.2268439531326294, "eval_mse_0": 1.396562933921814, "eval_mse_1": 1.057541847229004, "eval_mse_2": 1.0942394733428955, "eval_mse_3": 1.359034538269043, "eval_pcc_0": 0.6507776199758031, "eval_pcc_1": 0.7003761762305559, "eval_pcc_2": 0.6872739839173044, "eval_pcc_3": 0.6644769465648772, "eval_runtime": 119.4331, "eval_samples_per_second": 20.932, "eval_steps_per_second": 2.621, "step": 6260 }, { "epoch": 20.77, "learning_rate": 3.0777422790202343e-06, "loss": 0.6663, "step": 6500 }, { "epoch": 21.0, "eval_loss": 1.2125563621520996, "eval_mse_0": 1.4073569774627686, "eval_mse_1": 1.0193030834197998, "eval_mse_2": 1.0586122274398804, "eval_mse_3": 1.3649543523788452, "eval_pcc_0": 0.6471936491552426, "eval_pcc_1": 0.7111482932311268, "eval_pcc_2": 0.6984204198361936, "eval_pcc_3": 0.6627486264786455, "eval_runtime": 120.2217, "eval_samples_per_second": 20.795, "eval_steps_per_second": 2.604, "step": 6573 }, { "epoch": 22.0, "eval_loss": 1.2184520959854126, "eval_mse_0": 1.4079796075820923, "eval_mse_1": 1.0253150463104248, "eval_mse_2": 1.0776245594024658, "eval_mse_3": 1.3628888130187988, "eval_pcc_0": 0.6521598121302783, "eval_pcc_1": 0.7078635737733601, "eval_pcc_2": 0.6953593800918589, "eval_pcc_3": 0.6670740488150717, "eval_runtime": 121.07, "eval_samples_per_second": 20.649, "eval_steps_per_second": 2.585, "step": 6886 }, { "epoch": 22.36, "learning_rate": 2.5452609158679447e-06, "loss": 0.679, "step": 7000 }, { "epoch": 23.0, "eval_loss": 1.210086703300476, "eval_mse_0": 1.3777058124542236, "eval_mse_1": 1.025907278060913, "eval_mse_2": 1.0858049392700195, "eval_mse_3": 1.3509302139282227, "eval_pcc_0": 0.6573554933056146, "eval_pcc_1": 0.708297350110508, "eval_pcc_2": 0.6949233524759934, "eval_pcc_3": 0.6696772655430471, "eval_runtime": 121.1393, "eval_samples_per_second": 20.637, "eval_steps_per_second": 2.584, "step": 7199 }, { "epoch": 23.96, "learning_rate": 2.012779552715655e-06, "loss": 0.6528, "step": 7500 }, { "epoch": 24.0, "eval_loss": 1.2004283666610718, "eval_mse_0": 1.3663476705551147, "eval_mse_1": 1.0228403806686401, "eval_mse_2": 1.0746558904647827, "eval_mse_3": 1.3378705978393555, "eval_pcc_0": 0.6582689400749444, "eval_pcc_1": 0.7062594658983674, "eval_pcc_2": 0.693820643427176, "eval_pcc_3": 0.6701055929214624, "eval_runtime": 114.7453, "eval_samples_per_second": 21.787, "eval_steps_per_second": 2.728, "step": 7512 }, { "epoch": 25.0, "eval_loss": 1.2322232723236084, "eval_mse_0": 1.4174244403839111, "eval_mse_1": 1.048292636871338, "eval_mse_2": 1.0903046131134033, "eval_mse_3": 1.3728687763214111, "eval_pcc_0": 0.6477830243165981, "eval_pcc_1": 0.7000484427666047, "eval_pcc_2": 0.6879032701994684, "eval_pcc_3": 0.6619093166774149, "eval_runtime": 116.7561, "eval_samples_per_second": 21.412, "eval_steps_per_second": 2.681, "step": 7825 }, { "epoch": 25.56, "learning_rate": 1.4802981895633654e-06, "loss": 0.6595, "step": 8000 }, { "epoch": 26.0, "eval_loss": 1.2048563957214355, "eval_mse_0": 1.379071831703186, "eval_mse_1": 1.0233663320541382, "eval_mse_2": 1.0703182220458984, "eval_mse_3": 1.346666932106018, "eval_pcc_0": 0.6500023241358558, "eval_pcc_1": 0.7045484661520249, "eval_pcc_2": 0.6920802171890341, "eval_pcc_3": 0.6637560007475978, "eval_runtime": 121.8052, "eval_samples_per_second": 20.525, "eval_steps_per_second": 2.57, "step": 8138 }, { "epoch": 27.0, "eval_loss": 1.228061556816101, "eval_mse_0": 1.4074989557266235, "eval_mse_1": 1.0446228981018066, "eval_mse_2": 1.09303617477417, "eval_mse_3": 1.3670885562896729, "eval_pcc_0": 0.6502650614159742, "eval_pcc_1": 0.7012770306789152, "eval_pcc_2": 0.6889389448570339, "eval_pcc_3": 0.6642077424768584, "eval_runtime": 120.9832, "eval_samples_per_second": 20.664, "eval_steps_per_second": 2.587, "step": 8451 }, { "epoch": 27.16, "learning_rate": 9.478168264110757e-07, "loss": 0.6276, "step": 8500 }, { "epoch": 28.0, "eval_loss": 1.2095798254013062, "eval_mse_0": 1.3814703226089478, "eval_mse_1": 1.0255109071731567, "eval_mse_2": 1.0790736675262451, "eval_mse_3": 1.352264404296875, "eval_pcc_0": 0.6484841338961417, "eval_pcc_1": 0.7040187033038017, "eval_pcc_2": 0.6907324570253328, "eval_pcc_3": 0.6621290104932265, "eval_runtime": 121.0164, "eval_samples_per_second": 20.658, "eval_steps_per_second": 2.586, "step": 8764 }, { "epoch": 28.75, "learning_rate": 4.1533546325878595e-07, "loss": 0.6087, "step": 9000 }, { "epoch": 29.0, "eval_loss": 1.209524393081665, "eval_mse_0": 1.386349081993103, "eval_mse_1": 1.02353835105896, "eval_mse_2": 1.0750586986541748, "eval_mse_3": 1.353151798248291, "eval_pcc_0": 0.6495366534228407, "eval_pcc_1": 0.704988942154168, "eval_pcc_2": 0.6921612298465728, "eval_pcc_3": 0.663405210333282, "eval_runtime": 120.3378, "eval_samples_per_second": 20.775, "eval_steps_per_second": 2.601, "step": 9077 }, { "epoch": 30.0, "eval_loss": 1.207377552986145, "eval_mse_0": 1.383621096611023, "eval_mse_1": 1.0227596759796143, "eval_mse_2": 1.0722577571868896, "eval_mse_3": 1.3508676290512085, "eval_pcc_0": 0.6492893725954768, "eval_pcc_1": 0.7047307122301355, "eval_pcc_2": 0.6919477211029743, "eval_pcc_3": 0.6631503671275302, "eval_runtime": 120.172, "eval_samples_per_second": 20.804, "eval_steps_per_second": 2.605, "step": 9390 }, { "epoch": 30.0, "step": 9390, "total_flos": 0.0, "train_loss": 4.614258646127134, "train_runtime": 10767.5202, "train_samples_per_second": 6.965, "train_steps_per_second": 0.872 } ], "logging_steps": 500, "max_steps": 9390, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }