{ "best_metric": null, "best_model_checkpoint": null, "epoch": 30.0, "eval_steps": 500, "global_step": 9390, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_loss": 3.8149499893188477, "eval_mse_0": 3.755868434906006, "eval_mse_1": 3.9979312419891357, "eval_mse_2": 3.7539875507354736, "eval_mse_3": 3.752009868621826, "eval_pcc_0": 0.19238500149154686, "eval_pcc_1": 0.1776398118553242, "eval_pcc_2": 0.007480538067184007, "eval_pcc_3": -0.3489487228560554, "eval_runtime": 102.287, "eval_samples_per_second": 24.441, "eval_steps_per_second": 3.06, "step": 313 }, { "epoch": 1.6, "learning_rate": 9.467518636847711e-06, "loss": 10.7002, "step": 500 }, { "epoch": 2.0, "eval_loss": 1.9381023645401, "eval_mse_0": 2.1219639778137207, "eval_mse_1": 1.756824016571045, "eval_mse_2": 1.7819912433624268, "eval_mse_3": 2.091628074645996, "eval_pcc_0": 0.3882147701970152, "eval_pcc_1": 0.44674624068925833, "eval_pcc_2": 0.4466174095825964, "eval_pcc_3": 0.3952736055599715, "eval_runtime": 101.3791, "eval_samples_per_second": 24.66, "eval_steps_per_second": 3.087, "step": 626 }, { "epoch": 3.0, "eval_loss": 1.68776273727417, "eval_mse_0": 1.873080849647522, "eval_mse_1": 1.4604036808013916, "eval_mse_2": 1.5174270868301392, "eval_mse_3": 1.9001433849334717, "eval_pcc_0": 0.4870196345841211, "eval_pcc_1": 0.5445910750056667, "eval_pcc_2": 0.5375179097868217, "eval_pcc_3": 0.4978717809473544, "eval_runtime": 101.2516, "eval_samples_per_second": 24.691, "eval_steps_per_second": 3.091, "step": 939 }, { "epoch": 3.19, "learning_rate": 8.935037273695422e-06, "loss": 2.2251, "step": 1000 }, { "epoch": 4.0, "eval_loss": 1.4689956903457642, "eval_mse_0": 1.6715960502624512, "eval_mse_1": 1.2319759130477905, "eval_mse_2": 1.34701406955719, "eval_mse_3": 1.6253972053527832, "eval_pcc_0": 0.6013148331951307, "eval_pcc_1": 0.6605847059188266, "eval_pcc_2": 0.6530310261210702, "eval_pcc_3": 0.6166350338813703, "eval_runtime": 101.2565, "eval_samples_per_second": 24.69, "eval_steps_per_second": 3.091, "step": 1252 }, { "epoch": 4.79, "learning_rate": 8.402555910543132e-06, "loss": 1.1372, "step": 1500 }, { "epoch": 5.0, "eval_loss": 1.2834810018539429, "eval_mse_0": 1.4975959062576294, "eval_mse_1": 1.1127667427062988, "eval_mse_2": 1.0967236757278442, "eval_mse_3": 1.4268358945846558, "eval_pcc_0": 0.667617438547489, "eval_pcc_1": 0.7272906468479914, "eval_pcc_2": 0.7394717314114011, "eval_pcc_3": 0.6982645021045678, "eval_runtime": 100.9481, "eval_samples_per_second": 24.765, "eval_steps_per_second": 3.101, "step": 1565 }, { "epoch": 6.0, "eval_loss": 1.5220290422439575, "eval_mse_0": 1.797974944114685, "eval_mse_1": 1.138425588607788, "eval_mse_2": 1.3066158294677734, "eval_mse_3": 1.845100998878479, "eval_pcc_0": 0.6437292562481384, "eval_pcc_1": 0.7277782333291264, "eval_pcc_2": 0.721712301994763, "eval_pcc_3": 0.6702157450411846, "eval_runtime": 100.7127, "eval_samples_per_second": 24.823, "eval_steps_per_second": 3.108, "step": 1878 }, { "epoch": 6.39, "learning_rate": 7.870074547390843e-06, "loss": 0.8374, "step": 2000 }, { "epoch": 7.0, "eval_loss": 1.1292527914047241, "eval_mse_0": 1.3234530687332153, "eval_mse_1": 0.974384069442749, "eval_mse_2": 0.9388296008110046, "eval_mse_3": 1.2803441286087036, "eval_pcc_0": 0.6891364312392516, "eval_pcc_1": 0.7499496458933641, "eval_pcc_2": 0.7549399472585467, "eval_pcc_3": 0.7128765406376988, "eval_runtime": 100.5658, "eval_samples_per_second": 24.859, "eval_steps_per_second": 3.112, "step": 2191 }, { "epoch": 7.99, "learning_rate": 7.337593184238552e-06, "loss": 0.6712, "step": 2500 }, { "epoch": 8.0, "eval_loss": 1.5684882402420044, "eval_mse_0": 1.8322019577026367, "eval_mse_1": 1.2989739179611206, "eval_mse_2": 1.323211669921875, "eval_mse_3": 1.8195669651031494, "eval_pcc_0": 0.6737439236904719, "eval_pcc_1": 0.7488637790730267, "eval_pcc_2": 0.7514121200560983, "eval_pcc_3": 0.6994098335223952, "eval_runtime": 100.8109, "eval_samples_per_second": 24.799, "eval_steps_per_second": 3.105, "step": 2504 }, { "epoch": 9.0, "eval_loss": 1.4102668762207031, "eval_mse_0": 1.6206148862838745, "eval_mse_1": 1.1973247528076172, "eval_mse_2": 1.2341444492340088, "eval_mse_3": 1.5889830589294434, "eval_pcc_0": 0.6851788133643094, "eval_pcc_1": 0.7581205361413508, "eval_pcc_2": 0.7615756986314213, "eval_pcc_3": 0.708741002662432, "eval_runtime": 100.6389, "eval_samples_per_second": 24.841, "eval_steps_per_second": 3.11, "step": 2817 }, { "epoch": 9.58, "learning_rate": 6.805111821086262e-06, "loss": 0.57, "step": 3000 }, { "epoch": 10.0, "eval_loss": 1.348330020904541, "eval_mse_0": 1.5797193050384521, "eval_mse_1": 1.1507281064987183, "eval_mse_2": 1.1433807611465454, "eval_mse_3": 1.5194944143295288, "eval_pcc_0": 0.6852188493270641, "eval_pcc_1": 0.7562537239574154, "eval_pcc_2": 0.7557024973237004, "eval_pcc_3": 0.7054120588235016, "eval_runtime": 100.5701, "eval_samples_per_second": 24.858, "eval_steps_per_second": 3.112, "step": 3130 }, { "epoch": 11.0, "eval_loss": 1.5709850788116455, "eval_mse_0": 1.7214244604110718, "eval_mse_1": 1.412024736404419, "eval_mse_2": 1.4254834651947021, "eval_mse_3": 1.7250086069107056, "eval_pcc_0": 0.677518409167023, "eval_pcc_1": 0.7571105568376092, "eval_pcc_2": 0.7600582646570031, "eval_pcc_3": 0.7045871868047965, "eval_runtime": 100.5813, "eval_samples_per_second": 24.856, "eval_steps_per_second": 3.112, "step": 3443 }, { "epoch": 11.18, "learning_rate": 6.272630457933972e-06, "loss": 0.5098, "step": 3500 }, { "epoch": 12.0, "eval_loss": 1.4235678911209106, "eval_mse_0": 1.7117582559585571, "eval_mse_1": 1.1990758180618286, "eval_mse_2": 1.233838438987732, "eval_mse_3": 1.549597978591919, "eval_pcc_0": 0.6972914883828407, "eval_pcc_1": 0.7674880831251762, "eval_pcc_2": 0.7730368089204195, "eval_pcc_3": 0.7219812663434483, "eval_runtime": 100.9265, "eval_samples_per_second": 24.771, "eval_steps_per_second": 3.101, "step": 3756 }, { "epoch": 12.78, "learning_rate": 5.7401490947816826e-06, "loss": 0.4561, "step": 4000 }, { "epoch": 13.0, "eval_loss": 1.3512660264968872, "eval_mse_0": 1.6560535430908203, "eval_mse_1": 1.0737866163253784, "eval_mse_2": 1.167884111404419, "eval_mse_3": 1.507339596748352, "eval_pcc_0": 0.6978020685983576, "eval_pcc_1": 0.7669595572984247, "eval_pcc_2": 0.7704479293605421, "eval_pcc_3": 0.7218605199092147, "eval_runtime": 100.7483, "eval_samples_per_second": 24.814, "eval_steps_per_second": 3.107, "step": 4069 }, { "epoch": 14.0, "eval_loss": 1.4450573921203613, "eval_mse_0": 1.7005558013916016, "eval_mse_1": 1.2587356567382812, "eval_mse_2": 1.264267086982727, "eval_mse_3": 1.5566717386245728, "eval_pcc_0": 0.69044979553412, "eval_pcc_1": 0.769899037280411, "eval_pcc_2": 0.7715228679160522, "eval_pcc_3": 0.7180620133606046, "eval_runtime": 100.74, "eval_samples_per_second": 24.816, "eval_steps_per_second": 3.107, "step": 4382 }, { "epoch": 14.38, "learning_rate": 5.207667731629393e-06, "loss": 0.4262, "step": 4500 }, { "epoch": 15.0, "eval_loss": 1.2482588291168213, "eval_mse_0": 1.5385853052139282, "eval_mse_1": 0.9972391724586487, "eval_mse_2": 1.0317867994308472, "eval_mse_3": 1.4254233837127686, "eval_pcc_0": 0.7066790588440384, "eval_pcc_1": 0.7721327175086767, "eval_pcc_2": 0.7759734776016606, "eval_pcc_3": 0.7353232362019365, "eval_runtime": 100.6177, "eval_samples_per_second": 24.847, "eval_steps_per_second": 3.111, "step": 4695 }, { "epoch": 15.97, "learning_rate": 4.675186368477103e-06, "loss": 0.3855, "step": 5000 }, { "epoch": 16.0, "eval_loss": 1.2972311973571777, "eval_mse_0": 1.5596505403518677, "eval_mse_1": 1.1698797941207886, "eval_mse_2": 1.006371021270752, "eval_mse_3": 1.4530200958251953, "eval_pcc_0": 0.6952806951896114, "eval_pcc_1": 0.7707645603019087, "eval_pcc_2": 0.77274772478712, "eval_pcc_3": 0.7223344399086041, "eval_runtime": 101.0829, "eval_samples_per_second": 24.732, "eval_steps_per_second": 3.096, "step": 5008 }, { "epoch": 17.0, "eval_loss": 1.1730587482452393, "eval_mse_0": 1.4187856912612915, "eval_mse_1": 0.958073616027832, "eval_mse_2": 0.9783998727798462, "eval_mse_3": 1.3369756937026978, "eval_pcc_0": 0.7180267227388664, "eval_pcc_1": 0.7730168663887997, "eval_pcc_2": 0.7747266394371812, "eval_pcc_3": 0.7386234213820474, "eval_runtime": 100.8439, "eval_samples_per_second": 24.791, "eval_steps_per_second": 3.104, "step": 5321 }, { "epoch": 17.57, "learning_rate": 4.142705005324814e-06, "loss": 0.3572, "step": 5500 }, { "epoch": 18.0, "eval_loss": 1.3291434049606323, "eval_mse_0": 1.5203914642333984, "eval_mse_1": 1.1383605003356934, "eval_mse_2": 1.206380009651184, "eval_mse_3": 1.4514374732971191, "eval_pcc_0": 0.6933471923810368, "eval_pcc_1": 0.769459601309813, "eval_pcc_2": 0.7712010361671546, "eval_pcc_3": 0.7209381847866932, "eval_runtime": 100.7791, "eval_samples_per_second": 24.807, "eval_steps_per_second": 3.106, "step": 5634 }, { "epoch": 19.0, "eval_loss": 1.312319278717041, "eval_mse_0": 1.602527379989624, "eval_mse_1": 1.0087671279907227, "eval_mse_2": 1.1071666479110718, "eval_mse_3": 1.5308144092559814, "eval_pcc_0": 0.6996240750220193, "eval_pcc_1": 0.7725455333159118, "eval_pcc_2": 0.7726030288434409, "eval_pcc_3": 0.7283744247063056, "eval_runtime": 100.9276, "eval_samples_per_second": 24.77, "eval_steps_per_second": 3.101, "step": 5947 }, { "epoch": 19.17, "learning_rate": 3.6102236421725247e-06, "loss": 0.3685, "step": 6000 }, { "epoch": 20.0, "eval_loss": 1.194506049156189, "eval_mse_0": 1.382067084312439, "eval_mse_1": 1.0061663389205933, "eval_mse_2": 1.0188947916030884, "eval_mse_3": 1.370891809463501, "eval_pcc_0": 0.7058221298639312, "eval_pcc_1": 0.7695843992718115, "eval_pcc_2": 0.7696140899835595, "eval_pcc_3": 0.728296502432432, "eval_runtime": 100.7002, "eval_samples_per_second": 24.826, "eval_steps_per_second": 3.108, "step": 6260 }, { "epoch": 20.77, "learning_rate": 3.0777422790202343e-06, "loss": 0.3088, "step": 6500 }, { "epoch": 21.0, "eval_loss": 1.3180090188980103, "eval_mse_0": 1.5541187524795532, "eval_mse_1": 1.1070194244384766, "eval_mse_2": 1.1667213439941406, "eval_mse_3": 1.4441734552383423, "eval_pcc_0": 0.7058127676527958, "eval_pcc_1": 0.7705582261262895, "eval_pcc_2": 0.774042720209831, "eval_pcc_3": 0.7320268286311518, "eval_runtime": 100.5901, "eval_samples_per_second": 24.853, "eval_steps_per_second": 3.112, "step": 6573 }, { "epoch": 22.0, "eval_loss": 1.3018730878829956, "eval_mse_0": 1.6211662292480469, "eval_mse_1": 1.0196444988250732, "eval_mse_2": 1.0701396465301514, "eval_mse_3": 1.4965418577194214, "eval_pcc_0": 0.7032254446171953, "eval_pcc_1": 0.7763021450998202, "eval_pcc_2": 0.7768905768460305, "eval_pcc_3": 0.7307131933544537, "eval_runtime": 100.4268, "eval_samples_per_second": 24.894, "eval_steps_per_second": 3.117, "step": 6886 }, { "epoch": 22.36, "learning_rate": 2.5452609158679447e-06, "loss": 0.3233, "step": 7000 }, { "epoch": 23.0, "eval_loss": 1.24806547164917, "eval_mse_0": 1.4808491468429565, "eval_mse_1": 1.0740902423858643, "eval_mse_2": 1.075546383857727, "eval_mse_3": 1.3617769479751587, "eval_pcc_0": 0.7083667911361838, "eval_pcc_1": 0.7728779871674696, "eval_pcc_2": 0.772914855053998, "eval_pcc_3": 0.7355157998059723, "eval_runtime": 100.6985, "eval_samples_per_second": 24.827, "eval_steps_per_second": 3.108, "step": 7199 }, { "epoch": 23.96, "learning_rate": 2.012779552715655e-06, "loss": 0.2833, "step": 7500 }, { "epoch": 24.0, "eval_loss": 1.2991966009140015, "eval_mse_0": 1.5615781545639038, "eval_mse_1": 1.0941851139068604, "eval_mse_2": 1.1104274988174438, "eval_mse_3": 1.4305994510650635, "eval_pcc_0": 0.7051879609373185, "eval_pcc_1": 0.7743239129791413, "eval_pcc_2": 0.7744441249826322, "eval_pcc_3": 0.7341415407223592, "eval_runtime": 100.4516, "eval_samples_per_second": 24.888, "eval_steps_per_second": 3.116, "step": 7512 }, { "epoch": 25.0, "eval_loss": 1.2408746480941772, "eval_mse_0": 1.5619524717330933, "eval_mse_1": 0.9850296974182129, "eval_mse_2": 1.0179213285446167, "eval_mse_3": 1.3985931873321533, "eval_pcc_0": 0.7060029463516057, "eval_pcc_1": 0.7733307038103625, "eval_pcc_2": 0.772266549126521, "eval_pcc_3": 0.7339611885634963, "eval_runtime": 100.8095, "eval_samples_per_second": 24.799, "eval_steps_per_second": 3.105, "step": 7825 }, { "epoch": 25.56, "learning_rate": 1.4802981895633654e-06, "loss": 0.2638, "step": 8000 }, { "epoch": 26.0, "eval_loss": 1.2704391479492188, "eval_mse_0": 1.5212668180465698, "eval_mse_1": 1.081721544265747, "eval_mse_2": 1.0785852670669556, "eval_mse_3": 1.4001832008361816, "eval_pcc_0": 0.7106093307015088, "eval_pcc_1": 0.7789024607932206, "eval_pcc_2": 0.778950859240916, "eval_pcc_3": 0.737807152463041, "eval_runtime": 100.6436, "eval_samples_per_second": 24.84, "eval_steps_per_second": 3.11, "step": 8138 }, { "epoch": 27.0, "eval_loss": 1.3225528001785278, "eval_mse_0": 1.5737501382827759, "eval_mse_1": 1.1116234064102173, "eval_mse_2": 1.1209893226623535, "eval_mse_3": 1.4838504791259766, "eval_pcc_0": 0.7037694468899893, "eval_pcc_1": 0.7743690194599888, "eval_pcc_2": 0.774563190337004, "eval_pcc_3": 0.732017624810135, "eval_runtime": 100.5945, "eval_samples_per_second": 24.852, "eval_steps_per_second": 3.112, "step": 8451 }, { "epoch": 27.16, "learning_rate": 9.478168264110757e-07, "loss": 0.2495, "step": 8500 }, { "epoch": 28.0, "eval_loss": 1.244736909866333, "eval_mse_0": 1.4972796440124512, "eval_mse_1": 1.041703701019287, "eval_mse_2": 1.0512263774871826, "eval_mse_3": 1.3887364864349365, "eval_pcc_0": 0.7094004718076047, "eval_pcc_1": 0.7743070082705473, "eval_pcc_2": 0.7746505211725243, "eval_pcc_3": 0.7355049412770104, "eval_runtime": 100.7018, "eval_samples_per_second": 24.826, "eval_steps_per_second": 3.108, "step": 8764 }, { "epoch": 28.75, "learning_rate": 4.1533546325878595e-07, "loss": 0.2396, "step": 9000 }, { "epoch": 29.0, "eval_loss": 1.2441821098327637, "eval_mse_0": 1.4906530380249023, "eval_mse_1": 1.0265562534332275, "eval_mse_2": 1.0737992525100708, "eval_mse_3": 1.3857206106185913, "eval_pcc_0": 0.7046759605484965, "eval_pcc_1": 0.7732490309579964, "eval_pcc_2": 0.7728792272728902, "eval_pcc_3": 0.7323298208640173, "eval_runtime": 100.5549, "eval_samples_per_second": 24.862, "eval_steps_per_second": 3.113, "step": 9077 }, { "epoch": 30.0, "eval_loss": 1.257910966873169, "eval_mse_0": 1.490696668624878, "eval_mse_1": 1.0560848712921143, "eval_mse_2": 1.087384819984436, "eval_mse_3": 1.3974806070327759, "eval_pcc_0": 0.7062959885893274, "eval_pcc_1": 0.7733662364545745, "eval_pcc_2": 0.7731921896926559, "eval_pcc_3": 0.7334781892016459, "eval_runtime": 100.5894, "eval_samples_per_second": 24.854, "eval_steps_per_second": 3.112, "step": 9390 }, { "epoch": 30.0, "step": 9390, "total_flos": 0.0, "train_loss": 1.0932516819118057, "train_runtime": 9115.9528, "train_samples_per_second": 8.227, "train_steps_per_second": 1.03 } ], "logging_steps": 500, "max_steps": 9390, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }