| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 9.992481203007518, | |
| "global_step": 330, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00011764705882352942, | |
| "loss": 5.1019, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00023529411764705883, | |
| "loss": 4.8202, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00035294117647058826, | |
| "loss": 4.4144, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00047058823529411766, | |
| "loss": 4.0763, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.0005882352941176471, | |
| "loss": 3.7584, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.0007058823529411765, | |
| "loss": 3.583, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.0008235294117647058, | |
| "loss": 3.4246, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.0009411764705882353, | |
| "loss": 3.301, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.0009999748146823375, | |
| "loss": 3.225, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.0009997733473639876, | |
| "loss": 3.1705, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.0009993704939095377, | |
| "loss": 3.0495, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.0009987664166507748, | |
| "loss": 2.9806, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.0009979613590036108, | |
| "loss": 2.9235, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.0009969556453699965, | |
| "loss": 2.8419, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.0009957496810072027, | |
| "loss": 2.7978, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.0009943439518645192, | |
| "loss": 2.7061, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_loss": 2.585865020751953, | |
| "eval_runtime": 6.5169, | |
| "eval_samples_per_second": 68.13, | |
| "eval_steps_per_second": 17.033, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 0.0009927390243874398, | |
| "loss": 2.9771, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 0.0009909355452894098, | |
| "loss": 2.5679, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 0.0009889342412912295, | |
| "loss": 2.5551, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 0.0009867359188282193, | |
| "loss": 2.4842, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 0.0009843414637252614, | |
| "loss": 2.4689, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 0.0009817518408398536, | |
| "loss": 2.4216, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 0.000978968093673314, | |
| "loss": 2.4006, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 0.0009759913439502981, | |
| "loss": 2.342, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 0.0009728227911667932, | |
| "loss": 2.3004, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 0.0009694637121067764, | |
| "loss": 2.2644, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 0.0009659154603277282, | |
| "loss": 2.2406, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 0.0009621794656152091, | |
| "loss": 2.2076, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 0.0009582572334067213, | |
| "loss": 2.1834, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 0.0009541503441850843, | |
| "loss": 2.1652, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 0.0009498604528415731, | |
| "loss": 2.1353, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 0.0009453892880090695, | |
| "loss": 2.1394, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 0.000940738651365503, | |
| "loss": 2.08, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "eval_loss": 1.996474027633667, | |
| "eval_runtime": 6.333, | |
| "eval_samples_per_second": 70.109, | |
| "eval_steps_per_second": 17.527, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 0.000935910416907854, | |
| "loss": 2.2925, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 0.0009309065301970192, | |
| "loss": 2.0167, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 0.0009257290075738364, | |
| "loss": 1.9594, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 0.0009203799353465918, | |
| "loss": 1.9508, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 0.0009148614689503306, | |
| "loss": 1.9579, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 0.0009091758320783139, | |
| "loss": 1.9166, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 0.0009033253157859713, | |
| "loss": 1.8802, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 0.0008973122775677078, | |
| "loss": 1.8642, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 0.0008911391404069408, | |
| "loss": 1.8552, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 0.0008848083917997462, | |
| "loss": 1.8637, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 0.0008783225827525098, | |
| "loss": 1.852, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 0.0008716843267539868, | |
| "loss": 1.7914, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 0.0008648962987221837, | |
| "loss": 1.8048, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 0.0008579612339264867, | |
| "loss": 1.7966, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 0.0008508819268854713, | |
| "loss": 1.7871, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 0.0008436612302408376, | |
| "loss": 1.7623, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "eval_loss": 1.724814772605896, | |
| "eval_runtime": 7.0329, | |
| "eval_samples_per_second": 63.132, | |
| "eval_steps_per_second": 15.783, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "learning_rate": 0.0008363020536079239, | |
| "loss": 1.9929, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 3.09, | |
| "learning_rate": 0.0008288073624032633, | |
| "loss": 1.7159, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "learning_rate": 0.0008211801766496537, | |
| "loss": 1.6946, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "learning_rate": 0.000813423569759226, | |
| "loss": 1.6397, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "learning_rate": 0.0008055406672949956, | |
| "loss": 1.669, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "learning_rate": 0.0007975346457114034, | |
| "loss": 1.6531, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 3.39, | |
| "learning_rate": 0.0007894087310743467, | |
| "loss": 1.6478, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "learning_rate": 0.0007811661977612201, | |
| "loss": 1.6231, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 3.51, | |
| "learning_rate": 0.0007728103671414887, | |
| "loss": 1.6478, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "learning_rate": 0.0007643446062383273, | |
| "loss": 1.6287, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 3.63, | |
| "learning_rate": 0.0007557723263718596, | |
| "loss": 1.5939, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 3.69, | |
| "learning_rate": 0.0007470969817845518, | |
| "loss": 1.6309, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "learning_rate": 0.000738322068249308, | |
| "loss": 1.5665, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 3.81, | |
| "learning_rate": 0.0007294511216608307, | |
| "loss": 1.5953, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 3.87, | |
| "learning_rate": 0.0007204877166108151, | |
| "loss": 1.5987, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 3.93, | |
| "learning_rate": 0.0007114354649475498, | |
| "loss": 1.5961, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 3.99, | |
| "learning_rate": 0.0007022980143205046, | |
| "loss": 1.5408, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 3.99, | |
| "eval_loss": 1.5449421405792236, | |
| "eval_runtime": 7.1026, | |
| "eval_samples_per_second": 62.512, | |
| "eval_steps_per_second": 15.628, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 4.06, | |
| "learning_rate": 0.0006930790467104916, | |
| "loss": 1.7394, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "learning_rate": 0.0006837822769459941, | |
| "loss": 1.5015, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 4.18, | |
| "learning_rate": 0.000674411451206257, | |
| "loss": 1.4962, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "learning_rate": 0.0006649703455117458, | |
| "loss": 1.496, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 4.3, | |
| "learning_rate": 0.0006554627642025807, | |
| "loss": 1.4703, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 4.36, | |
| "learning_rate": 0.0006458925384055585, | |
| "loss": 1.474, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 4.42, | |
| "learning_rate": 0.0006362635244903819, | |
| "loss": 1.4663, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "learning_rate": 0.0006265796025157153, | |
| "loss": 1.4556, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 4.54, | |
| "learning_rate": 0.0006168446746656973, | |
| "loss": 1.4779, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "learning_rate": 0.0006070626636775348, | |
| "loss": 1.4687, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 4.66, | |
| "learning_rate": 0.0005972375112608181, | |
| "loss": 1.4614, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "learning_rate": 0.000587373176509189, | |
| "loss": 1.4615, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 4.78, | |
| "learning_rate": 0.0005774736343050039, | |
| "loss": 1.4479, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "learning_rate": 0.0005675428737176367, | |
| "loss": 1.427, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 4.9, | |
| "learning_rate": 0.000557584896396062, | |
| "loss": 1.4327, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "learning_rate": 0.0005476037149563726, | |
| "loss": 1.4147, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "eval_loss": 1.4437452554702759, | |
| "eval_runtime": 6.9311, | |
| "eval_samples_per_second": 64.059, | |
| "eval_steps_per_second": 16.015, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 5.03, | |
| "learning_rate": 0.0005376033513648743, | |
| "loss": 1.5806, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 5.09, | |
| "learning_rate": 0.0005275878353174165, | |
| "loss": 1.3567, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 5.15, | |
| "learning_rate": 0.0005175612026156045, | |
| "loss": 1.3639, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 5.21, | |
| "learning_rate": 0.0005075274935405553, | |
| "loss": 1.3578, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 5.27, | |
| "learning_rate": 0.0004974907512248451, | |
| "loss": 1.3787, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "learning_rate": 0.0004874550200233085, | |
| "loss": 1.3406, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 5.39, | |
| "learning_rate": 0.0004774243438833481, | |
| "loss": 1.368, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 5.45, | |
| "learning_rate": 0.00046740276471540364, | |
| "loss": 1.3549, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 5.51, | |
| "learning_rate": 0.00045739432076424515, | |
| "loss": 1.3655, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 5.57, | |
| "learning_rate": 0.00044740304498174226, | |
| "loss": 1.35, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 5.63, | |
| "learning_rate": 0.0004374329634017669, | |
| "loss": 1.3604, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 5.69, | |
| "learning_rate": 0.00042748809351788165, | |
| "loss": 1.3692, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 5.75, | |
| "learning_rate": 0.0004175724426644724, | |
| "loss": 1.3231, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 5.81, | |
| "learning_rate": 0.00040769000640197205, | |
| "loss": 1.3361, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 5.87, | |
| "learning_rate": 0.00039784476690683085, | |
| "loss": 1.3391, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 5.93, | |
| "learning_rate": 0.0003880406913668777, | |
| "loss": 1.3259, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 5.99, | |
| "learning_rate": 0.0003782817303827226, | |
| "loss": 1.3593, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 5.99, | |
| "eval_loss": 1.3767662048339844, | |
| "eval_runtime": 6.7366, | |
| "eval_samples_per_second": 65.908, | |
| "eval_steps_per_second": 16.477, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 6.06, | |
| "learning_rate": 0.0003685718163758427, | |
| "loss": 1.4657, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 6.12, | |
| "learning_rate": 0.0003589148620039941, | |
| "loss": 1.2816, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 6.18, | |
| "learning_rate": 0.00034931475858458635, | |
| "loss": 1.2989, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 6.24, | |
| "learning_rate": 0.0003397753745266571, | |
| "loss": 1.3021, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 6.3, | |
| "learning_rate": 0.0003303005537720778, | |
| "loss": 1.2478, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 6.36, | |
| "learning_rate": 0.00032089411424661863, | |
| "loss": 1.2827, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 6.42, | |
| "learning_rate": 0.0003115598463214956, | |
| "loss": 1.2458, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 6.48, | |
| "learning_rate": 0.0003023015112860228, | |
| "loss": 1.2954, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 6.54, | |
| "learning_rate": 0.00029312283983198097, | |
| "loss": 1.2782, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 6.6, | |
| "learning_rate": 0.0002840275305503186, | |
| "loss": 1.2653, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 6.66, | |
| "learning_rate": 0.00027501924844078535, | |
| "loss": 1.2701, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 6.72, | |
| "learning_rate": 0.0002661016234351018, | |
| "loss": 1.2862, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 6.78, | |
| "learning_rate": 0.00025727824893426166, | |
| "loss": 1.277, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 6.84, | |
| "learning_rate": 0.00024855268036055346, | |
| "loss": 1.2791, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 6.9, | |
| "learning_rate": 0.00023992843372488355, | |
| "loss": 1.266, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 6.96, | |
| "learning_rate": 0.00023140898420998424, | |
| "loss": 1.2703, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 6.99, | |
| "eval_loss": 1.3362174034118652, | |
| "eval_runtime": 6.9977, | |
| "eval_samples_per_second": 63.449, | |
| "eval_steps_per_second": 15.862, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 7.03, | |
| "learning_rate": 0.0002229977647700707, | |
| "loss": 1.4282, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 7.09, | |
| "learning_rate": 0.00021469816474751563, | |
| "loss": 1.2356, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 7.15, | |
| "learning_rate": 0.00020651352850709653, | |
| "loss": 1.247, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 7.21, | |
| "learning_rate": 0.00019844715408836789, | |
| "loss": 1.2564, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 7.27, | |
| "learning_rate": 0.00019050229187669949, | |
| "loss": 1.2187, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 7.33, | |
| "learning_rate": 0.00018268214329351796, | |
| "loss": 1.2388, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 7.39, | |
| "learning_rate": 0.00017498985950627793, | |
| "loss": 1.2368, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 7.45, | |
| "learning_rate": 0.00016742854015868347, | |
| "loss": 1.212, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 7.51, | |
| "learning_rate": 0.00016000123212167155, | |
| "loss": 1.2377, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 7.57, | |
| "learning_rate": 0.00015271092826566108, | |
| "loss": 1.2146, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 7.63, | |
| "learning_rate": 0.0001455605662545592, | |
| "loss": 1.2209, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 7.69, | |
| "learning_rate": 0.00013855302736201687, | |
| "loss": 1.2319, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 7.75, | |
| "learning_rate": 0.00013169113531040461, | |
| "loss": 1.2271, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 7.81, | |
| "learning_rate": 0.00012497765513297976, | |
| "loss": 1.2021, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 7.87, | |
| "learning_rate": 0.00011841529205970281, | |
| "loss": 1.2264, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 7.93, | |
| "learning_rate": 0.00011200669042715162, | |
| "loss": 1.2228, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 7.99, | |
| "learning_rate": 0.00010575443261297229, | |
| "loss": 1.2528, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 7.99, | |
| "eval_loss": 1.3175491094589233, | |
| "eval_runtime": 6.7551, | |
| "eval_samples_per_second": 65.729, | |
| "eval_steps_per_second": 16.432, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 8.06, | |
| "learning_rate": 9.96610379952989e-05, | |
| "loss": 1.3448, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 8.12, | |
| "learning_rate": 9.37289619375562e-05, | |
| "loss": 1.1886, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 8.18, | |
| "learning_rate": 8.7960594799059e-05, | |
| "loss": 1.2062, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 8.24, | |
| "learning_rate": 8.235826097180565e-05, | |
| "loss": 1.2207, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 8.3, | |
| "learning_rate": 7.692421794385312e-05, | |
| "loss": 1.2095, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 8.36, | |
| "learning_rate": 7.166065538964955e-05, | |
| "loss": 1.1986, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 8.42, | |
| "learning_rate": 6.656969428769566e-05, | |
| "loss": 1.1962, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 8.48, | |
| "learning_rate": 6.165338606588517e-05, | |
| "loss": 1.2164, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 8.54, | |
| "learning_rate": 5.6913711774872144e-05, | |
| "loss": 1.1904, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 8.6, | |
| "learning_rate": 5.235258128979675e-05, | |
| "loss": 1.2172, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 8.66, | |
| "learning_rate": 4.797183254069176e-05, | |
| "loss": 1.2345, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 8.72, | |
| "learning_rate": 4.3773230771879005e-05, | |
| "loss": 1.1994, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 8.78, | |
| "learning_rate": 3.975846783065662e-05, | |
| "loss": 1.1963, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 8.84, | |
| "learning_rate": 3.5929161485559694e-05, | |
| "loss": 1.1995, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 8.9, | |
| "learning_rate": 3.2286854774472905e-05, | |
| "loss": 1.1779, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 8.96, | |
| "learning_rate": 2.883301538285582e-05, | |
| "loss": 1.1981, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 8.99, | |
| "eval_loss": 1.3090853691101074, | |
| "eval_runtime": 6.8294, | |
| "eval_samples_per_second": 65.013, | |
| "eval_steps_per_second": 16.253, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 9.03, | |
| "learning_rate": 2.5569035052332156e-05, | |
| "loss": 1.3461, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 9.09, | |
| "learning_rate": 2.2496229019879632e-05, | |
| "loss": 1.1812, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 9.15, | |
| "learning_rate": 1.9615835487849675e-05, | |
| "loss": 1.177, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 9.21, | |
| "learning_rate": 1.6929015125027312e-05, | |
| "loss": 1.1856, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 9.27, | |
| "learning_rate": 1.443685059893396e-05, | |
| "loss": 1.1984, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 9.33, | |
| "learning_rate": 1.2140346139561276e-05, | |
| "loss": 1.1711, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 9.39, | |
| "learning_rate": 1.0040427134711649e-05, | |
| "loss": 1.1905, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 9.45, | |
| "learning_rate": 8.137939757108525e-06, | |
| "loss": 1.1924, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 9.51, | |
| "learning_rate": 6.433650623427378e-06, | |
| "loss": 1.2033, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 9.57, | |
| "learning_rate": 4.928246485383147e-06, | |
| "loss": 1.2159, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 9.63, | |
| "learning_rate": 3.6223339530006004e-06, | |
| "loss": 1.1831, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 9.69, | |
| "learning_rate": 2.516439250177749e-06, | |
| "loss": 1.2011, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 9.75, | |
| "learning_rate": 1.611008002641412e-06, | |
| "loss": 1.1991, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 9.81, | |
| "learning_rate": 9.064050583800221e-07, | |
| "loss": 1.1786, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 9.87, | |
| "learning_rate": 4.029143406262259e-07, | |
| "loss": 1.1945, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 9.93, | |
| "learning_rate": 1.0073873344895734e-07, | |
| "loss": 1.1972, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 9.99, | |
| "learning_rate": 0.0, | |
| "loss": 1.2117, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 9.99, | |
| "eval_loss": 1.3089168071746826, | |
| "eval_runtime": 6.8863, | |
| "eval_samples_per_second": 64.476, | |
| "eval_steps_per_second": 16.119, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 9.99, | |
| "step": 330, | |
| "total_flos": 1.1112428209176576e+16, | |
| "train_loss": 1.7149053530259566, | |
| "train_runtime": 770.4396, | |
| "train_samples_per_second": 110.534, | |
| "train_steps_per_second": 0.428 | |
| } | |
| ], | |
| "max_steps": 330, | |
| "num_train_epochs": 10, | |
| "total_flos": 1.1112428209176576e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |