| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 0, | |
| "global_step": 1274, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0007849293563579278, | |
| "grad_norm": 0.3427978456020355, | |
| "learning_rate": 1e-05, | |
| "loss": 2.1035, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0015698587127158557, | |
| "grad_norm": 0.332590788602829, | |
| "learning_rate": 9.992150706436422e-06, | |
| "loss": 2.0738, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.002354788069073783, | |
| "grad_norm": 0.36298516392707825, | |
| "learning_rate": 9.984301412872842e-06, | |
| "loss": 2.0758, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.0031397174254317113, | |
| "grad_norm": 0.32631000876426697, | |
| "learning_rate": 9.976452119309263e-06, | |
| "loss": 1.9876, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.003924646781789639, | |
| "grad_norm": 0.36786606907844543, | |
| "learning_rate": 9.968602825745683e-06, | |
| "loss": 2.1213, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.004709576138147566, | |
| "grad_norm": 0.34687480330467224, | |
| "learning_rate": 9.960753532182104e-06, | |
| "loss": 2.0017, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.005494505494505495, | |
| "grad_norm": 0.3398713171482086, | |
| "learning_rate": 9.952904238618524e-06, | |
| "loss": 1.9805, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.006279434850863423, | |
| "grad_norm": 0.3185282051563263, | |
| "learning_rate": 9.945054945054946e-06, | |
| "loss": 1.8601, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.00706436420722135, | |
| "grad_norm": 0.372031033039093, | |
| "learning_rate": 9.937205651491367e-06, | |
| "loss": 2.1202, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.007849293563579277, | |
| "grad_norm": 0.3446281850337982, | |
| "learning_rate": 9.929356357927787e-06, | |
| "loss": 1.9856, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.008634222919937205, | |
| "grad_norm": 0.32924437522888184, | |
| "learning_rate": 9.921507064364208e-06, | |
| "loss": 1.8849, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.009419152276295133, | |
| "grad_norm": 0.33408552408218384, | |
| "learning_rate": 9.91365777080063e-06, | |
| "loss": 1.8825, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.01020408163265306, | |
| "grad_norm": 0.3477867841720581, | |
| "learning_rate": 9.90580847723705e-06, | |
| "loss": 1.9898, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.01098901098901099, | |
| "grad_norm": 0.3446689546108246, | |
| "learning_rate": 9.89795918367347e-06, | |
| "loss": 1.8995, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.011773940345368918, | |
| "grad_norm": 0.32618626952171326, | |
| "learning_rate": 9.890109890109892e-06, | |
| "loss": 1.8615, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.012558869701726845, | |
| "grad_norm": 0.3038957715034485, | |
| "learning_rate": 9.882260596546312e-06, | |
| "loss": 1.8644, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.013343799058084773, | |
| "grad_norm": 0.2970006465911865, | |
| "learning_rate": 9.874411302982733e-06, | |
| "loss": 1.8248, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.0141287284144427, | |
| "grad_norm": 0.2924885153770447, | |
| "learning_rate": 9.866562009419153e-06, | |
| "loss": 1.8896, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.014913657770800628, | |
| "grad_norm": 0.30948498845100403, | |
| "learning_rate": 9.858712715855574e-06, | |
| "loss": 2.0174, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.015698587127158554, | |
| "grad_norm": 0.27170419692993164, | |
| "learning_rate": 9.850863422291994e-06, | |
| "loss": 1.8595, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.016483516483516484, | |
| "grad_norm": 0.2529163360595703, | |
| "learning_rate": 9.843014128728415e-06, | |
| "loss": 1.8447, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.01726844583987441, | |
| "grad_norm": 0.26019564270973206, | |
| "learning_rate": 9.835164835164835e-06, | |
| "loss": 1.8919, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.01805337519623234, | |
| "grad_norm": 0.251017302274704, | |
| "learning_rate": 9.827315541601256e-06, | |
| "loss": 1.8662, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.018838304552590265, | |
| "grad_norm": 0.22893306612968445, | |
| "learning_rate": 9.819466248037678e-06, | |
| "loss": 1.7685, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.019623233908948195, | |
| "grad_norm": 0.22592955827713013, | |
| "learning_rate": 9.811616954474098e-06, | |
| "loss": 1.7604, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.02040816326530612, | |
| "grad_norm": 0.22802409529685974, | |
| "learning_rate": 9.803767660910519e-06, | |
| "loss": 1.8441, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.02119309262166405, | |
| "grad_norm": 0.21545954048633575, | |
| "learning_rate": 9.795918367346939e-06, | |
| "loss": 1.7258, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.02197802197802198, | |
| "grad_norm": 0.22040924429893494, | |
| "learning_rate": 9.78806907378336e-06, | |
| "loss": 1.7892, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.022762951334379906, | |
| "grad_norm": 0.2393616884946823, | |
| "learning_rate": 9.780219780219781e-06, | |
| "loss": 1.8538, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.023547880690737835, | |
| "grad_norm": 0.20232702791690826, | |
| "learning_rate": 9.772370486656201e-06, | |
| "loss": 1.7536, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.02433281004709576, | |
| "grad_norm": 0.2203434556722641, | |
| "learning_rate": 9.764521193092623e-06, | |
| "loss": 1.7088, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.02511773940345369, | |
| "grad_norm": 0.1975688636302948, | |
| "learning_rate": 9.756671899529044e-06, | |
| "loss": 1.7131, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.025902668759811617, | |
| "grad_norm": 0.19861450791358948, | |
| "learning_rate": 9.748822605965464e-06, | |
| "loss": 1.776, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.026687598116169546, | |
| "grad_norm": 0.19722306728363037, | |
| "learning_rate": 9.740973312401885e-06, | |
| "loss": 1.6889, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.027472527472527472, | |
| "grad_norm": 0.18340197205543518, | |
| "learning_rate": 9.733124018838307e-06, | |
| "loss": 1.6312, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.0282574568288854, | |
| "grad_norm": 0.20767861604690552, | |
| "learning_rate": 9.725274725274726e-06, | |
| "loss": 1.7293, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.029042386185243328, | |
| "grad_norm": 0.16855376958847046, | |
| "learning_rate": 9.717425431711148e-06, | |
| "loss": 1.5657, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.029827315541601257, | |
| "grad_norm": 0.19397929310798645, | |
| "learning_rate": 9.709576138147567e-06, | |
| "loss": 1.6843, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.030612244897959183, | |
| "grad_norm": 0.1965712457895279, | |
| "learning_rate": 9.701726844583989e-06, | |
| "loss": 1.7135, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.03139717425431711, | |
| "grad_norm": 0.18827113509178162, | |
| "learning_rate": 9.693877551020408e-06, | |
| "loss": 1.7002, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.03218210361067504, | |
| "grad_norm": 0.18982355296611786, | |
| "learning_rate": 9.68602825745683e-06, | |
| "loss": 1.6863, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.03296703296703297, | |
| "grad_norm": 0.18091963231563568, | |
| "learning_rate": 9.67817896389325e-06, | |
| "loss": 1.6784, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.033751962323390894, | |
| "grad_norm": 0.17879721522331238, | |
| "learning_rate": 9.670329670329671e-06, | |
| "loss": 1.6552, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.03453689167974882, | |
| "grad_norm": 0.19234947860240936, | |
| "learning_rate": 9.66248037676609e-06, | |
| "loss": 1.7857, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.03532182103610675, | |
| "grad_norm": 0.16198568046092987, | |
| "learning_rate": 9.654631083202512e-06, | |
| "loss": 1.5865, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.03610675039246468, | |
| "grad_norm": 0.16286341845989227, | |
| "learning_rate": 9.646781789638933e-06, | |
| "loss": 1.6461, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.036891679748822605, | |
| "grad_norm": 0.15107131004333496, | |
| "learning_rate": 9.638932496075353e-06, | |
| "loss": 1.6048, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.03767660910518053, | |
| "grad_norm": 0.16922970116138458, | |
| "learning_rate": 9.631083202511775e-06, | |
| "loss": 1.6644, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.038461538461538464, | |
| "grad_norm": 0.15843363106250763, | |
| "learning_rate": 9.623233908948196e-06, | |
| "loss": 1.5417, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.03924646781789639, | |
| "grad_norm": 0.17787528038024902, | |
| "learning_rate": 9.615384615384616e-06, | |
| "loss": 1.6869, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.040031397174254316, | |
| "grad_norm": 0.1719801276922226, | |
| "learning_rate": 9.607535321821037e-06, | |
| "loss": 1.6351, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.04081632653061224, | |
| "grad_norm": 0.1807231903076172, | |
| "learning_rate": 9.599686028257459e-06, | |
| "loss": 1.6697, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.041601255886970175, | |
| "grad_norm": 0.1894819140434265, | |
| "learning_rate": 9.591836734693878e-06, | |
| "loss": 1.6668, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.0423861852433281, | |
| "grad_norm": 0.16167454421520233, | |
| "learning_rate": 9.5839874411303e-06, | |
| "loss": 1.601, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.04317111459968603, | |
| "grad_norm": 0.15732350945472717, | |
| "learning_rate": 9.576138147566721e-06, | |
| "loss": 1.5291, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.04395604395604396, | |
| "grad_norm": 0.16725674271583557, | |
| "learning_rate": 9.56828885400314e-06, | |
| "loss": 1.6179, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.044740973312401885, | |
| "grad_norm": 0.1474035680294037, | |
| "learning_rate": 9.560439560439562e-06, | |
| "loss": 1.5644, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.04552590266875981, | |
| "grad_norm": 0.15364201366901398, | |
| "learning_rate": 9.552590266875982e-06, | |
| "loss": 1.6396, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.04631083202511774, | |
| "grad_norm": 0.19037100672721863, | |
| "learning_rate": 9.544740973312403e-06, | |
| "loss": 1.561, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.04709576138147567, | |
| "grad_norm": 0.14766716957092285, | |
| "learning_rate": 9.536891679748823e-06, | |
| "loss": 1.5349, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.047880690737833596, | |
| "grad_norm": 0.13931486010551453, | |
| "learning_rate": 9.529042386185244e-06, | |
| "loss": 1.4435, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.04866562009419152, | |
| "grad_norm": 0.1515316218137741, | |
| "learning_rate": 9.521193092621664e-06, | |
| "loss": 1.5309, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.04945054945054945, | |
| "grad_norm": 0.14389821887016296, | |
| "learning_rate": 9.513343799058085e-06, | |
| "loss": 1.4471, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.05023547880690738, | |
| "grad_norm": 0.14060775935649872, | |
| "learning_rate": 9.505494505494505e-06, | |
| "loss": 1.5074, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.05102040816326531, | |
| "grad_norm": 0.1455729603767395, | |
| "learning_rate": 9.497645211930927e-06, | |
| "loss": 1.5528, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.05180533751962323, | |
| "grad_norm": 0.14463943243026733, | |
| "learning_rate": 9.489795918367348e-06, | |
| "loss": 1.4809, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.05259026687598116, | |
| "grad_norm": 0.1522558331489563, | |
| "learning_rate": 9.481946624803768e-06, | |
| "loss": 1.4754, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.05337519623233909, | |
| "grad_norm": 0.1452169567346573, | |
| "learning_rate": 9.474097331240189e-06, | |
| "loss": 1.4867, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.05416012558869702, | |
| "grad_norm": 0.14983990788459778, | |
| "learning_rate": 9.46624803767661e-06, | |
| "loss": 1.4852, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.054945054945054944, | |
| "grad_norm": 0.15380005538463593, | |
| "learning_rate": 9.45839874411303e-06, | |
| "loss": 1.5105, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.05572998430141287, | |
| "grad_norm": 0.1510206013917923, | |
| "learning_rate": 9.450549450549452e-06, | |
| "loss": 1.5202, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.0565149136577708, | |
| "grad_norm": 0.1501176506280899, | |
| "learning_rate": 9.442700156985873e-06, | |
| "loss": 1.5119, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.05729984301412873, | |
| "grad_norm": 0.26818370819091797, | |
| "learning_rate": 9.434850863422293e-06, | |
| "loss": 1.5876, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.058084772370486655, | |
| "grad_norm": 0.1636001467704773, | |
| "learning_rate": 9.427001569858714e-06, | |
| "loss": 1.5482, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.05886970172684458, | |
| "grad_norm": 0.1485077142715454, | |
| "learning_rate": 9.419152276295134e-06, | |
| "loss": 1.5007, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.059654631083202514, | |
| "grad_norm": 0.14850012958049774, | |
| "learning_rate": 9.411302982731555e-06, | |
| "loss": 1.4424, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.06043956043956044, | |
| "grad_norm": 0.16702501475811005, | |
| "learning_rate": 9.403453689167977e-06, | |
| "loss": 1.5523, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.061224489795918366, | |
| "grad_norm": 0.15823277831077576, | |
| "learning_rate": 9.395604395604396e-06, | |
| "loss": 1.4958, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.06200941915227629, | |
| "grad_norm": 0.15339218080043793, | |
| "learning_rate": 9.387755102040818e-06, | |
| "loss": 1.4549, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.06279434850863422, | |
| "grad_norm": 0.15759187936782837, | |
| "learning_rate": 9.379905808477237e-06, | |
| "loss": 1.4164, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.06357927786499215, | |
| "grad_norm": 0.16237343847751617, | |
| "learning_rate": 9.372056514913659e-06, | |
| "loss": 1.5056, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.06436420722135008, | |
| "grad_norm": 0.145219624042511, | |
| "learning_rate": 9.364207221350079e-06, | |
| "loss": 1.4138, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.065149136577708, | |
| "grad_norm": 0.16004693508148193, | |
| "learning_rate": 9.3563579277865e-06, | |
| "loss": 1.4653, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.06593406593406594, | |
| "grad_norm": 0.17298448085784912, | |
| "learning_rate": 9.34850863422292e-06, | |
| "loss": 1.4232, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.06671899529042387, | |
| "grad_norm": 0.15541157126426697, | |
| "learning_rate": 9.340659340659341e-06, | |
| "loss": 1.4269, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.06750392464678179, | |
| "grad_norm": 0.1717188060283661, | |
| "learning_rate": 9.33281004709576e-06, | |
| "loss": 1.4827, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.06828885400313972, | |
| "grad_norm": 0.15374824404716492, | |
| "learning_rate": 9.324960753532182e-06, | |
| "loss": 1.4348, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.06907378335949764, | |
| "grad_norm": 0.16950677335262299, | |
| "learning_rate": 9.317111459968604e-06, | |
| "loss": 1.4779, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.06985871271585557, | |
| "grad_norm": 0.16479431092739105, | |
| "learning_rate": 9.309262166405025e-06, | |
| "loss": 1.4517, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.0706436420722135, | |
| "grad_norm": 0.16450008749961853, | |
| "learning_rate": 9.301412872841445e-06, | |
| "loss": 1.4352, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.07142857142857142, | |
| "grad_norm": 0.1584760844707489, | |
| "learning_rate": 9.293563579277866e-06, | |
| "loss": 1.3687, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.07221350078492936, | |
| "grad_norm": 0.17088983952999115, | |
| "learning_rate": 9.285714285714288e-06, | |
| "loss": 1.403, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.07299843014128729, | |
| "grad_norm": 0.16017332673072815, | |
| "learning_rate": 9.277864992150707e-06, | |
| "loss": 1.3504, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.07378335949764521, | |
| "grad_norm": 0.22771817445755005, | |
| "learning_rate": 9.270015698587129e-06, | |
| "loss": 1.4361, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.07456828885400314, | |
| "grad_norm": 0.1651688814163208, | |
| "learning_rate": 9.262166405023548e-06, | |
| "loss": 1.3933, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.07535321821036106, | |
| "grad_norm": 0.185661181807518, | |
| "learning_rate": 9.25431711145997e-06, | |
| "loss": 1.4446, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.076138147566719, | |
| "grad_norm": 0.17134390771389008, | |
| "learning_rate": 9.24646781789639e-06, | |
| "loss": 1.3812, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.07692307692307693, | |
| "grad_norm": 0.17040328681468964, | |
| "learning_rate": 9.238618524332811e-06, | |
| "loss": 1.393, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.07770800627943485, | |
| "grad_norm": 0.16197283565998077, | |
| "learning_rate": 9.230769230769232e-06, | |
| "loss": 1.3515, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.07849293563579278, | |
| "grad_norm": 0.18512940406799316, | |
| "learning_rate": 9.222919937205652e-06, | |
| "loss": 1.3884, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.07927786499215071, | |
| "grad_norm": 0.2023470401763916, | |
| "learning_rate": 9.215070643642073e-06, | |
| "loss": 1.4122, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.08006279434850863, | |
| "grad_norm": 0.1792641282081604, | |
| "learning_rate": 9.207221350078493e-06, | |
| "loss": 1.3936, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.08084772370486656, | |
| "grad_norm": 0.18946573138237, | |
| "learning_rate": 9.199372056514915e-06, | |
| "loss": 1.3314, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.08163265306122448, | |
| "grad_norm": 0.17217950522899628, | |
| "learning_rate": 9.191522762951334e-06, | |
| "loss": 1.3624, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.08241758241758242, | |
| "grad_norm": 0.1832340806722641, | |
| "learning_rate": 9.183673469387756e-06, | |
| "loss": 1.4058, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.08320251177394035, | |
| "grad_norm": 0.1757064312696457, | |
| "learning_rate": 9.175824175824175e-06, | |
| "loss": 1.3566, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.08398744113029827, | |
| "grad_norm": 0.17505265772342682, | |
| "learning_rate": 9.167974882260597e-06, | |
| "loss": 1.3217, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.0847723704866562, | |
| "grad_norm": 0.1719823032617569, | |
| "learning_rate": 9.160125588697018e-06, | |
| "loss": 1.3686, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.08555729984301413, | |
| "grad_norm": 0.16589918732643127, | |
| "learning_rate": 9.15227629513344e-06, | |
| "loss": 1.3998, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.08634222919937205, | |
| "grad_norm": 0.18314798176288605, | |
| "learning_rate": 9.14442700156986e-06, | |
| "loss": 1.399, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.08712715855572999, | |
| "grad_norm": 0.18713513016700745, | |
| "learning_rate": 9.13657770800628e-06, | |
| "loss": 1.4139, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.08791208791208792, | |
| "grad_norm": 0.16822576522827148, | |
| "learning_rate": 9.128728414442702e-06, | |
| "loss": 1.3247, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.08869701726844584, | |
| "grad_norm": 0.16645370423793793, | |
| "learning_rate": 9.120879120879122e-06, | |
| "loss": 1.3382, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.08948194662480377, | |
| "grad_norm": 0.16858340799808502, | |
| "learning_rate": 9.113029827315543e-06, | |
| "loss": 1.366, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.09026687598116169, | |
| "grad_norm": 0.15871913731098175, | |
| "learning_rate": 9.105180533751963e-06, | |
| "loss": 1.3446, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.09105180533751962, | |
| "grad_norm": 0.17642484605312347, | |
| "learning_rate": 9.097331240188384e-06, | |
| "loss": 1.4424, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.09183673469387756, | |
| "grad_norm": 0.16072145104408264, | |
| "learning_rate": 9.089481946624804e-06, | |
| "loss": 1.3247, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.09262166405023547, | |
| "grad_norm": 0.1545998454093933, | |
| "learning_rate": 9.081632653061225e-06, | |
| "loss": 1.3285, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.09340659340659341, | |
| "grad_norm": 0.15946722030639648, | |
| "learning_rate": 9.073783359497645e-06, | |
| "loss": 1.3123, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.09419152276295134, | |
| "grad_norm": 0.16009531915187836, | |
| "learning_rate": 9.065934065934067e-06, | |
| "loss": 1.3456, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.09497645211930926, | |
| "grad_norm": 0.15958304703235626, | |
| "learning_rate": 9.058084772370488e-06, | |
| "loss": 1.3039, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.09576138147566719, | |
| "grad_norm": 0.14737734198570251, | |
| "learning_rate": 9.050235478806908e-06, | |
| "loss": 1.3443, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.09654631083202511, | |
| "grad_norm": 0.15155759453773499, | |
| "learning_rate": 9.042386185243329e-06, | |
| "loss": 1.3459, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.09733124018838304, | |
| "grad_norm": 0.15477906167507172, | |
| "learning_rate": 9.034536891679749e-06, | |
| "loss": 1.3314, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.09811616954474098, | |
| "grad_norm": 0.14952191710472107, | |
| "learning_rate": 9.02668759811617e-06, | |
| "loss": 1.2907, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.0989010989010989, | |
| "grad_norm": 0.14685417711734772, | |
| "learning_rate": 9.01883830455259e-06, | |
| "loss": 1.3356, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.09968602825745683, | |
| "grad_norm": 0.1500052958726883, | |
| "learning_rate": 9.010989010989011e-06, | |
| "loss": 1.2724, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.10047095761381476, | |
| "grad_norm": 0.1418740600347519, | |
| "learning_rate": 9.003139717425433e-06, | |
| "loss": 1.3153, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.10125588697017268, | |
| "grad_norm": 0.15048982203006744, | |
| "learning_rate": 8.995290423861854e-06, | |
| "loss": 1.317, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.10204081632653061, | |
| "grad_norm": 0.1451820433139801, | |
| "learning_rate": 8.987441130298274e-06, | |
| "loss": 1.2907, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.10282574568288853, | |
| "grad_norm": 0.1553954780101776, | |
| "learning_rate": 8.979591836734695e-06, | |
| "loss": 1.3496, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.10361067503924647, | |
| "grad_norm": 0.13944025337696075, | |
| "learning_rate": 8.971742543171117e-06, | |
| "loss": 1.2967, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.1043956043956044, | |
| "grad_norm": 0.1505189687013626, | |
| "learning_rate": 8.963893249607536e-06, | |
| "loss": 1.242, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.10518053375196232, | |
| "grad_norm": 0.17538291215896606, | |
| "learning_rate": 8.956043956043958e-06, | |
| "loss": 1.3652, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.10596546310832025, | |
| "grad_norm": 0.1521396040916443, | |
| "learning_rate": 8.948194662480377e-06, | |
| "loss": 1.303, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.10675039246467818, | |
| "grad_norm": 0.14989960193634033, | |
| "learning_rate": 8.940345368916799e-06, | |
| "loss": 1.3348, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.1075353218210361, | |
| "grad_norm": 0.14487774670124054, | |
| "learning_rate": 8.932496075353219e-06, | |
| "loss": 1.307, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.10832025117739404, | |
| "grad_norm": 0.1494406759738922, | |
| "learning_rate": 8.92464678178964e-06, | |
| "loss": 1.3126, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.10910518053375197, | |
| "grad_norm": 0.16444607079029083, | |
| "learning_rate": 8.91679748822606e-06, | |
| "loss": 1.2694, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.10989010989010989, | |
| "grad_norm": 0.15526680648326874, | |
| "learning_rate": 8.908948194662481e-06, | |
| "loss": 1.2646, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.11067503924646782, | |
| "grad_norm": 0.15668538212776184, | |
| "learning_rate": 8.9010989010989e-06, | |
| "loss": 1.2587, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.11145996860282574, | |
| "grad_norm": 0.14996470510959625, | |
| "learning_rate": 8.893249607535322e-06, | |
| "loss": 1.3122, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.11224489795918367, | |
| "grad_norm": 0.14894387125968933, | |
| "learning_rate": 8.885400313971744e-06, | |
| "loss": 1.2909, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.1130298273155416, | |
| "grad_norm": 0.18087385594844818, | |
| "learning_rate": 8.877551020408163e-06, | |
| "loss": 1.3161, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.11381475667189953, | |
| "grad_norm": 0.15965577960014343, | |
| "learning_rate": 8.869701726844585e-06, | |
| "loss": 1.2683, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.11459968602825746, | |
| "grad_norm": 0.15005330741405487, | |
| "learning_rate": 8.861852433281004e-06, | |
| "loss": 1.3083, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.11538461538461539, | |
| "grad_norm": 0.14711208641529083, | |
| "learning_rate": 8.854003139717426e-06, | |
| "loss": 1.3019, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.11616954474097331, | |
| "grad_norm": 0.14721107482910156, | |
| "learning_rate": 8.846153846153847e-06, | |
| "loss": 1.2469, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.11695447409733124, | |
| "grad_norm": 0.16075782477855682, | |
| "learning_rate": 8.838304552590269e-06, | |
| "loss": 1.2696, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.11773940345368916, | |
| "grad_norm": 0.14981156587600708, | |
| "learning_rate": 8.830455259026688e-06, | |
| "loss": 1.2416, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.1185243328100471, | |
| "grad_norm": 0.15048524737358093, | |
| "learning_rate": 8.82260596546311e-06, | |
| "loss": 1.235, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.11930926216640503, | |
| "grad_norm": 0.1528954803943634, | |
| "learning_rate": 8.81475667189953e-06, | |
| "loss": 1.3038, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.12009419152276295, | |
| "grad_norm": 0.15498745441436768, | |
| "learning_rate": 8.80690737833595e-06, | |
| "loss": 1.3014, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.12087912087912088, | |
| "grad_norm": 0.15970808267593384, | |
| "learning_rate": 8.799058084772372e-06, | |
| "loss": 1.2824, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.12166405023547881, | |
| "grad_norm": 0.15735112130641937, | |
| "learning_rate": 8.791208791208792e-06, | |
| "loss": 1.2684, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.12244897959183673, | |
| "grad_norm": 0.15894393622875214, | |
| "learning_rate": 8.783359497645213e-06, | |
| "loss": 1.2283, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.12323390894819466, | |
| "grad_norm": 0.16271716356277466, | |
| "learning_rate": 8.775510204081633e-06, | |
| "loss": 1.2974, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.12401883830455258, | |
| "grad_norm": 0.15001676976680756, | |
| "learning_rate": 8.767660910518054e-06, | |
| "loss": 1.2688, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.12480376766091052, | |
| "grad_norm": 0.1491970270872116, | |
| "learning_rate": 8.759811616954474e-06, | |
| "loss": 1.2482, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.12558869701726844, | |
| "grad_norm": 0.15794126689434052, | |
| "learning_rate": 8.751962323390896e-06, | |
| "loss": 1.2605, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.12637362637362637, | |
| "grad_norm": 0.14844392240047455, | |
| "learning_rate": 8.744113029827315e-06, | |
| "loss": 1.2332, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.1271585557299843, | |
| "grad_norm": 0.16946828365325928, | |
| "learning_rate": 8.736263736263737e-06, | |
| "loss": 1.3136, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.12794348508634223, | |
| "grad_norm": 0.14547237753868103, | |
| "learning_rate": 8.728414442700158e-06, | |
| "loss": 1.2409, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.12872841444270017, | |
| "grad_norm": 0.15002243220806122, | |
| "learning_rate": 8.720565149136578e-06, | |
| "loss": 1.1916, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.12951334379905807, | |
| "grad_norm": 0.1735372096300125, | |
| "learning_rate": 8.712715855573e-06, | |
| "loss": 1.2371, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.130298273155416, | |
| "grad_norm": 0.18110059201717377, | |
| "learning_rate": 8.704866562009419e-06, | |
| "loss": 1.3246, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.13108320251177394, | |
| "grad_norm": 0.16027387976646423, | |
| "learning_rate": 8.69701726844584e-06, | |
| "loss": 1.2489, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.13186813186813187, | |
| "grad_norm": 0.15014226734638214, | |
| "learning_rate": 8.689167974882262e-06, | |
| "loss": 1.2068, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.1326530612244898, | |
| "grad_norm": 0.15863986313343048, | |
| "learning_rate": 8.681318681318681e-06, | |
| "loss": 1.2825, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.13343799058084774, | |
| "grad_norm": 0.17867471277713776, | |
| "learning_rate": 8.673469387755103e-06, | |
| "loss": 1.2607, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.13422291993720564, | |
| "grad_norm": 0.1596807986497879, | |
| "learning_rate": 8.665620094191524e-06, | |
| "loss": 1.2067, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.13500784929356358, | |
| "grad_norm": 0.1603277176618576, | |
| "learning_rate": 8.657770800627944e-06, | |
| "loss": 1.2414, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.1357927786499215, | |
| "grad_norm": 0.26285287737846375, | |
| "learning_rate": 8.649921507064365e-06, | |
| "loss": 1.1949, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.13657770800627944, | |
| "grad_norm": 0.15682700276374817, | |
| "learning_rate": 8.642072213500787e-06, | |
| "loss": 1.1832, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.13736263736263737, | |
| "grad_norm": 0.16141754388809204, | |
| "learning_rate": 8.634222919937206e-06, | |
| "loss": 1.2334, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.13814756671899528, | |
| "grad_norm": 0.18615436553955078, | |
| "learning_rate": 8.626373626373628e-06, | |
| "loss": 1.256, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.1389324960753532, | |
| "grad_norm": 0.15746115148067474, | |
| "learning_rate": 8.618524332810048e-06, | |
| "loss": 1.2267, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.13971742543171115, | |
| "grad_norm": 0.16463595628738403, | |
| "learning_rate": 8.610675039246469e-06, | |
| "loss": 1.2638, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.14050235478806908, | |
| "grad_norm": 0.15357612073421478, | |
| "learning_rate": 8.602825745682889e-06, | |
| "loss": 1.2262, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.141287284144427, | |
| "grad_norm": 0.16407904028892517, | |
| "learning_rate": 8.59497645211931e-06, | |
| "loss": 1.1631, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.14207221350078492, | |
| "grad_norm": 0.15864725410938263, | |
| "learning_rate": 8.58712715855573e-06, | |
| "loss": 1.1992, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.14285714285714285, | |
| "grad_norm": 0.16168276965618134, | |
| "learning_rate": 8.579277864992151e-06, | |
| "loss": 1.2385, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.14364207221350078, | |
| "grad_norm": 0.15585263073444366, | |
| "learning_rate": 8.571428571428571e-06, | |
| "loss": 1.2105, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.14442700156985872, | |
| "grad_norm": 0.15923231840133667, | |
| "learning_rate": 8.563579277864992e-06, | |
| "loss": 1.2208, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.14521193092621665, | |
| "grad_norm": 0.1669979989528656, | |
| "learning_rate": 8.555729984301414e-06, | |
| "loss": 1.2528, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.14599686028257458, | |
| "grad_norm": 0.1686784327030182, | |
| "learning_rate": 8.547880690737833e-06, | |
| "loss": 1.2136, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.14678178963893249, | |
| "grad_norm": 0.16007350385189056, | |
| "learning_rate": 8.540031397174255e-06, | |
| "loss": 1.2197, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.14756671899529042, | |
| "grad_norm": 0.15759383141994476, | |
| "learning_rate": 8.532182103610676e-06, | |
| "loss": 1.2286, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.14835164835164835, | |
| "grad_norm": 0.1621437519788742, | |
| "learning_rate": 8.524332810047096e-06, | |
| "loss": 1.1928, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.14913657770800628, | |
| "grad_norm": 0.1673436015844345, | |
| "learning_rate": 8.516483516483517e-06, | |
| "loss": 1.2709, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.14992150706436422, | |
| "grad_norm": 0.15842801332473755, | |
| "learning_rate": 8.508634222919939e-06, | |
| "loss": 1.213, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.15070643642072212, | |
| "grad_norm": 0.16606052219867706, | |
| "learning_rate": 8.500784929356358e-06, | |
| "loss": 1.2274, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.15149136577708006, | |
| "grad_norm": 0.24337609112262726, | |
| "learning_rate": 8.49293563579278e-06, | |
| "loss": 1.2763, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.152276295133438, | |
| "grad_norm": 0.1524026244878769, | |
| "learning_rate": 8.4850863422292e-06, | |
| "loss": 1.1981, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.15306122448979592, | |
| "grad_norm": 0.16204509139060974, | |
| "learning_rate": 8.477237048665621e-06, | |
| "loss": 1.2361, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.15384615384615385, | |
| "grad_norm": 0.16638584434986115, | |
| "learning_rate": 8.469387755102042e-06, | |
| "loss": 1.2309, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.1546310832025118, | |
| "grad_norm": 0.16714318096637726, | |
| "learning_rate": 8.461538461538462e-06, | |
| "loss": 1.2692, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.1554160125588697, | |
| "grad_norm": 0.168153315782547, | |
| "learning_rate": 8.453689167974884e-06, | |
| "loss": 1.2476, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.15620094191522763, | |
| "grad_norm": 0.1681162267923355, | |
| "learning_rate": 8.445839874411303e-06, | |
| "loss": 1.2504, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.15698587127158556, | |
| "grad_norm": 0.16602487862110138, | |
| "learning_rate": 8.437990580847725e-06, | |
| "loss": 1.2636, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.1577708006279435, | |
| "grad_norm": 0.1714046150445938, | |
| "learning_rate": 8.430141287284144e-06, | |
| "loss": 1.2159, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.15855572998430142, | |
| "grad_norm": 0.168313130736351, | |
| "learning_rate": 8.422291993720566e-06, | |
| "loss": 1.1932, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.15934065934065933, | |
| "grad_norm": 0.1813940554857254, | |
| "learning_rate": 8.414442700156985e-06, | |
| "loss": 1.2291, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.16012558869701726, | |
| "grad_norm": 0.16745160520076752, | |
| "learning_rate": 8.406593406593407e-06, | |
| "loss": 1.2088, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.1609105180533752, | |
| "grad_norm": 0.1752898395061493, | |
| "learning_rate": 8.398744113029828e-06, | |
| "loss": 1.2324, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.16169544740973313, | |
| "grad_norm": 0.1678934097290039, | |
| "learning_rate": 8.390894819466248e-06, | |
| "loss": 1.2545, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.16248037676609106, | |
| "grad_norm": 0.1756390780210495, | |
| "learning_rate": 8.38304552590267e-06, | |
| "loss": 1.2421, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.16326530612244897, | |
| "grad_norm": 0.17289415001869202, | |
| "learning_rate": 8.37519623233909e-06, | |
| "loss": 1.2063, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.1640502354788069, | |
| "grad_norm": 0.17906337976455688, | |
| "learning_rate": 8.36734693877551e-06, | |
| "loss": 1.2598, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.16483516483516483, | |
| "grad_norm": 0.17102067172527313, | |
| "learning_rate": 8.359497645211932e-06, | |
| "loss": 1.1696, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.16562009419152277, | |
| "grad_norm": 0.16523365676403046, | |
| "learning_rate": 8.351648351648353e-06, | |
| "loss": 1.1769, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.1664050235478807, | |
| "grad_norm": 0.1749919205904007, | |
| "learning_rate": 8.343799058084773e-06, | |
| "loss": 1.1784, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.16718995290423863, | |
| "grad_norm": 0.17963068187236786, | |
| "learning_rate": 8.335949764521194e-06, | |
| "loss": 1.1859, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.16797488226059654, | |
| "grad_norm": 0.17333124577999115, | |
| "learning_rate": 8.328100470957614e-06, | |
| "loss": 1.2148, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.16875981161695447, | |
| "grad_norm": 0.17331229150295258, | |
| "learning_rate": 8.320251177394036e-06, | |
| "loss": 1.2404, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.1695447409733124, | |
| "grad_norm": 0.23610031604766846, | |
| "learning_rate": 8.312401883830455e-06, | |
| "loss": 1.222, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.17032967032967034, | |
| "grad_norm": 0.19044527411460876, | |
| "learning_rate": 8.304552590266877e-06, | |
| "loss": 1.1923, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.17111459968602827, | |
| "grad_norm": 0.16924557089805603, | |
| "learning_rate": 8.296703296703298e-06, | |
| "loss": 1.2481, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.17189952904238617, | |
| "grad_norm": 0.17461296916007996, | |
| "learning_rate": 8.288854003139718e-06, | |
| "loss": 1.204, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.1726844583987441, | |
| "grad_norm": 0.16900260746479034, | |
| "learning_rate": 8.281004709576139e-06, | |
| "loss": 1.158, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.17346938775510204, | |
| "grad_norm": 0.1738884598016739, | |
| "learning_rate": 8.273155416012559e-06, | |
| "loss": 1.1835, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.17425431711145997, | |
| "grad_norm": 0.18512043356895447, | |
| "learning_rate": 8.26530612244898e-06, | |
| "loss": 1.202, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.1750392464678179, | |
| "grad_norm": 0.15882278978824615, | |
| "learning_rate": 8.2574568288854e-06, | |
| "loss": 1.1717, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.17582417582417584, | |
| "grad_norm": 0.1748121678829193, | |
| "learning_rate": 8.249607535321821e-06, | |
| "loss": 1.2407, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.17660910518053374, | |
| "grad_norm": 0.17194059491157532, | |
| "learning_rate": 8.241758241758243e-06, | |
| "loss": 1.2104, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.17739403453689168, | |
| "grad_norm": 0.17927075922489166, | |
| "learning_rate": 8.233908948194662e-06, | |
| "loss": 1.1295, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.1781789638932496, | |
| "grad_norm": 0.17285114526748657, | |
| "learning_rate": 8.226059654631084e-06, | |
| "loss": 1.1364, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.17896389324960754, | |
| "grad_norm": 0.18146753311157227, | |
| "learning_rate": 8.218210361067505e-06, | |
| "loss": 1.2123, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.17974882260596547, | |
| "grad_norm": 0.17958636581897736, | |
| "learning_rate": 8.210361067503925e-06, | |
| "loss": 1.2155, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.18053375196232338, | |
| "grad_norm": 0.1809559315443039, | |
| "learning_rate": 8.202511773940346e-06, | |
| "loss": 1.2193, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.1813186813186813, | |
| "grad_norm": 0.17428240180015564, | |
| "learning_rate": 8.194662480376768e-06, | |
| "loss": 1.1756, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.18210361067503925, | |
| "grad_norm": 0.16993722319602966, | |
| "learning_rate": 8.186813186813188e-06, | |
| "loss": 1.1373, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.18288854003139718, | |
| "grad_norm": 0.16454678773880005, | |
| "learning_rate": 8.178963893249609e-06, | |
| "loss": 1.1778, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.1836734693877551, | |
| "grad_norm": 0.19709423184394836, | |
| "learning_rate": 8.171114599686029e-06, | |
| "loss": 1.1672, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.18445839874411302, | |
| "grad_norm": 0.17396849393844604, | |
| "learning_rate": 8.16326530612245e-06, | |
| "loss": 1.1614, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.18524332810047095, | |
| "grad_norm": 0.17194689810276031, | |
| "learning_rate": 8.15541601255887e-06, | |
| "loss": 1.1568, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.18602825745682888, | |
| "grad_norm": 0.17015020549297333, | |
| "learning_rate": 8.147566718995291e-06, | |
| "loss": 1.1737, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.18681318681318682, | |
| "grad_norm": 0.181587353348732, | |
| "learning_rate": 8.139717425431711e-06, | |
| "loss": 1.1874, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.18759811616954475, | |
| "grad_norm": 0.17715583741664886, | |
| "learning_rate": 8.131868131868132e-06, | |
| "loss": 1.2042, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.18838304552590268, | |
| "grad_norm": 0.18029291927814484, | |
| "learning_rate": 8.124018838304554e-06, | |
| "loss": 1.2087, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.1891679748822606, | |
| "grad_norm": 0.1827882081270218, | |
| "learning_rate": 8.116169544740973e-06, | |
| "loss": 1.1878, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.18995290423861852, | |
| "grad_norm": 0.18994055688381195, | |
| "learning_rate": 8.108320251177395e-06, | |
| "loss": 1.2756, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.19073783359497645, | |
| "grad_norm": 0.18137842416763306, | |
| "learning_rate": 8.100470957613814e-06, | |
| "loss": 1.1633, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.19152276295133439, | |
| "grad_norm": 0.18730993568897247, | |
| "learning_rate": 8.092621664050236e-06, | |
| "loss": 1.1659, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.19230769230769232, | |
| "grad_norm": 0.17532621324062347, | |
| "learning_rate": 8.084772370486657e-06, | |
| "loss": 1.212, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.19309262166405022, | |
| "grad_norm": 0.18628445267677307, | |
| "learning_rate": 8.076923076923077e-06, | |
| "loss": 1.1684, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.19387755102040816, | |
| "grad_norm": 0.1802094727754593, | |
| "learning_rate": 8.069073783359498e-06, | |
| "loss": 1.2242, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.1946624803767661, | |
| "grad_norm": 0.18173186480998993, | |
| "learning_rate": 8.06122448979592e-06, | |
| "loss": 1.2428, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.19544740973312402, | |
| "grad_norm": 0.17413657903671265, | |
| "learning_rate": 8.05337519623234e-06, | |
| "loss": 1.1303, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.19623233908948196, | |
| "grad_norm": 0.18897269666194916, | |
| "learning_rate": 8.045525902668761e-06, | |
| "loss": 1.2011, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.1970172684458399, | |
| "grad_norm": 0.18273556232452393, | |
| "learning_rate": 8.037676609105182e-06, | |
| "loss": 1.195, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.1978021978021978, | |
| "grad_norm": 0.17363931238651276, | |
| "learning_rate": 8.029827315541602e-06, | |
| "loss": 1.151, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.19858712715855573, | |
| "grad_norm": 0.19355787336826324, | |
| "learning_rate": 8.021978021978023e-06, | |
| "loss": 1.1883, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.19937205651491366, | |
| "grad_norm": 0.1852579414844513, | |
| "learning_rate": 8.014128728414443e-06, | |
| "loss": 1.1829, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.2001569858712716, | |
| "grad_norm": 0.18897081911563873, | |
| "learning_rate": 8.006279434850865e-06, | |
| "loss": 1.1633, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.20094191522762953, | |
| "grad_norm": 0.177345871925354, | |
| "learning_rate": 7.998430141287284e-06, | |
| "loss": 1.1543, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.20172684458398743, | |
| "grad_norm": 0.19197995960712433, | |
| "learning_rate": 7.990580847723706e-06, | |
| "loss": 1.1124, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.20251177394034536, | |
| "grad_norm": 0.1918957382440567, | |
| "learning_rate": 7.982731554160125e-06, | |
| "loss": 1.2175, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.2032967032967033, | |
| "grad_norm": 0.3751870095729828, | |
| "learning_rate": 7.974882260596547e-06, | |
| "loss": 1.208, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.20408163265306123, | |
| "grad_norm": 0.19366663694381714, | |
| "learning_rate": 7.967032967032966e-06, | |
| "loss": 1.1758, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.20486656200941916, | |
| "grad_norm": 0.1843341588973999, | |
| "learning_rate": 7.959183673469388e-06, | |
| "loss": 1.1385, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.20565149136577707, | |
| "grad_norm": 0.17850859463214874, | |
| "learning_rate": 7.95133437990581e-06, | |
| "loss": 1.1707, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.206436420722135, | |
| "grad_norm": 0.21179649233818054, | |
| "learning_rate": 7.943485086342229e-06, | |
| "loss": 1.2079, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.20722135007849293, | |
| "grad_norm": 0.21911092102527618, | |
| "learning_rate": 7.93563579277865e-06, | |
| "loss": 1.2071, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.20800627943485087, | |
| "grad_norm": 0.1800030916929245, | |
| "learning_rate": 7.927786499215072e-06, | |
| "loss": 1.1897, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.2087912087912088, | |
| "grad_norm": 0.20178881287574768, | |
| "learning_rate": 7.919937205651492e-06, | |
| "loss": 1.1307, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.20957613814756673, | |
| "grad_norm": 0.18904000520706177, | |
| "learning_rate": 7.912087912087913e-06, | |
| "loss": 1.1234, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.21036106750392464, | |
| "grad_norm": 0.18753568828105927, | |
| "learning_rate": 7.904238618524334e-06, | |
| "loss": 1.1312, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.21114599686028257, | |
| "grad_norm": 0.18907895684242249, | |
| "learning_rate": 7.896389324960754e-06, | |
| "loss": 1.2075, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.2119309262166405, | |
| "grad_norm": 0.20011885464191437, | |
| "learning_rate": 7.888540031397175e-06, | |
| "loss": 1.1175, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.21271585557299844, | |
| "grad_norm": 0.1855919063091278, | |
| "learning_rate": 7.880690737833597e-06, | |
| "loss": 1.1581, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.21350078492935637, | |
| "grad_norm": 0.18904899060726166, | |
| "learning_rate": 7.872841444270017e-06, | |
| "loss": 1.1513, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.21428571428571427, | |
| "grad_norm": 0.18176890909671783, | |
| "learning_rate": 7.864992150706438e-06, | |
| "loss": 1.1448, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.2150706436420722, | |
| "grad_norm": 0.18389371037483215, | |
| "learning_rate": 7.857142857142858e-06, | |
| "loss": 1.1695, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.21585557299843014, | |
| "grad_norm": 0.1845601201057434, | |
| "learning_rate": 7.849293563579279e-06, | |
| "loss": 1.1295, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.21664050235478807, | |
| "grad_norm": 0.2147328108549118, | |
| "learning_rate": 7.841444270015699e-06, | |
| "loss": 1.1369, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.217425431711146, | |
| "grad_norm": 0.20094560086727142, | |
| "learning_rate": 7.83359497645212e-06, | |
| "loss": 1.1666, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.21821036106750394, | |
| "grad_norm": 0.1994454562664032, | |
| "learning_rate": 7.82574568288854e-06, | |
| "loss": 1.171, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.21899529042386184, | |
| "grad_norm": 0.19250410795211792, | |
| "learning_rate": 7.817896389324961e-06, | |
| "loss": 1.1709, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.21978021978021978, | |
| "grad_norm": 0.20511236786842346, | |
| "learning_rate": 7.810047095761381e-06, | |
| "loss": 1.1788, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.2205651491365777, | |
| "grad_norm": 0.2156459391117096, | |
| "learning_rate": 7.802197802197802e-06, | |
| "loss": 1.1543, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.22135007849293564, | |
| "grad_norm": 0.18635962903499603, | |
| "learning_rate": 7.794348508634224e-06, | |
| "loss": 1.1375, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.22213500784929358, | |
| "grad_norm": 0.1912240833044052, | |
| "learning_rate": 7.786499215070644e-06, | |
| "loss": 1.1901, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.22291993720565148, | |
| "grad_norm": 0.19378046691417694, | |
| "learning_rate": 7.778649921507065e-06, | |
| "loss": 1.2012, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.2237048665620094, | |
| "grad_norm": 0.19641022384166718, | |
| "learning_rate": 7.770800627943486e-06, | |
| "loss": 1.1817, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.22448979591836735, | |
| "grad_norm": 0.1922275871038437, | |
| "learning_rate": 7.762951334379906e-06, | |
| "loss": 1.182, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.22527472527472528, | |
| "grad_norm": 0.19869813323020935, | |
| "learning_rate": 7.755102040816327e-06, | |
| "loss": 1.1975, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.2260596546310832, | |
| "grad_norm": 0.183540940284729, | |
| "learning_rate": 7.747252747252749e-06, | |
| "loss": 1.2028, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.22684458398744112, | |
| "grad_norm": 0.20018337666988373, | |
| "learning_rate": 7.739403453689169e-06, | |
| "loss": 1.2147, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.22762951334379905, | |
| "grad_norm": 0.2085750699043274, | |
| "learning_rate": 7.73155416012559e-06, | |
| "loss": 1.1198, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.22841444270015698, | |
| "grad_norm": 0.19977134466171265, | |
| "learning_rate": 7.72370486656201e-06, | |
| "loss": 1.1528, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.22919937205651492, | |
| "grad_norm": 0.19265629351139069, | |
| "learning_rate": 7.715855572998431e-06, | |
| "loss": 1.1401, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.22998430141287285, | |
| "grad_norm": 0.191536083817482, | |
| "learning_rate": 7.708006279434852e-06, | |
| "loss": 1.1556, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.23076923076923078, | |
| "grad_norm": 0.2022550255060196, | |
| "learning_rate": 7.700156985871272e-06, | |
| "loss": 1.0994, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.2315541601255887, | |
| "grad_norm": 0.1965666264295578, | |
| "learning_rate": 7.692307692307694e-06, | |
| "loss": 1.0851, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.23233908948194662, | |
| "grad_norm": 0.23619407415390015, | |
| "learning_rate": 7.684458398744113e-06, | |
| "loss": 1.2371, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.23312401883830455, | |
| "grad_norm": 0.18914885818958282, | |
| "learning_rate": 7.676609105180535e-06, | |
| "loss": 1.1038, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.23390894819466249, | |
| "grad_norm": 0.18863645195960999, | |
| "learning_rate": 7.668759811616954e-06, | |
| "loss": 1.1318, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.23469387755102042, | |
| "grad_norm": 0.2066003382205963, | |
| "learning_rate": 7.660910518053376e-06, | |
| "loss": 1.1542, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.23547880690737832, | |
| "grad_norm": 0.21004410088062286, | |
| "learning_rate": 7.653061224489796e-06, | |
| "loss": 1.1647, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.23626373626373626, | |
| "grad_norm": 0.19493690133094788, | |
| "learning_rate": 7.645211930926217e-06, | |
| "loss": 1.1743, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.2370486656200942, | |
| "grad_norm": 0.1926044225692749, | |
| "learning_rate": 7.637362637362638e-06, | |
| "loss": 1.1333, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.23783359497645212, | |
| "grad_norm": 0.19471141695976257, | |
| "learning_rate": 7.629513343799058e-06, | |
| "loss": 1.0989, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.23861852433281006, | |
| "grad_norm": 0.1914125680923462, | |
| "learning_rate": 7.62166405023548e-06, | |
| "loss": 1.1427, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.239403453689168, | |
| "grad_norm": 0.21529747545719147, | |
| "learning_rate": 7.6138147566719e-06, | |
| "loss": 1.117, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.2401883830455259, | |
| "grad_norm": 0.2219187617301941, | |
| "learning_rate": 7.605965463108321e-06, | |
| "loss": 1.1584, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.24097331240188383, | |
| "grad_norm": 0.20323887467384338, | |
| "learning_rate": 7.598116169544741e-06, | |
| "loss": 1.1543, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.24175824175824176, | |
| "grad_norm": 0.19014237821102142, | |
| "learning_rate": 7.5902668759811625e-06, | |
| "loss": 1.1149, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.2425431711145997, | |
| "grad_norm": 0.2022504359483719, | |
| "learning_rate": 7.582417582417583e-06, | |
| "loss": 1.1835, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.24332810047095763, | |
| "grad_norm": 0.19809505343437195, | |
| "learning_rate": 7.574568288854004e-06, | |
| "loss": 1.0916, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.24411302982731553, | |
| "grad_norm": 0.19800910353660583, | |
| "learning_rate": 7.566718995290424e-06, | |
| "loss": 1.1467, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.24489795918367346, | |
| "grad_norm": 0.19982385635375977, | |
| "learning_rate": 7.558869701726846e-06, | |
| "loss": 1.1186, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.2456828885400314, | |
| "grad_norm": 0.19915273785591125, | |
| "learning_rate": 7.551020408163265e-06, | |
| "loss": 1.1743, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.24646781789638933, | |
| "grad_norm": 0.21240346133708954, | |
| "learning_rate": 7.543171114599687e-06, | |
| "loss": 1.1575, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.24725274725274726, | |
| "grad_norm": 0.19470131397247314, | |
| "learning_rate": 7.535321821036108e-06, | |
| "loss": 1.1745, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.24803767660910517, | |
| "grad_norm": 0.21574276685714722, | |
| "learning_rate": 7.527472527472528e-06, | |
| "loss": 1.1922, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.2488226059654631, | |
| "grad_norm": 0.1990724802017212, | |
| "learning_rate": 7.519623233908949e-06, | |
| "loss": 1.1765, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.24960753532182103, | |
| "grad_norm": 0.1972692310810089, | |
| "learning_rate": 7.511773940345369e-06, | |
| "loss": 1.1848, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.25039246467817894, | |
| "grad_norm": 0.21943879127502441, | |
| "learning_rate": 7.50392464678179e-06, | |
| "loss": 1.1103, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.25117739403453687, | |
| "grad_norm": 0.20259033143520355, | |
| "learning_rate": 7.496075353218211e-06, | |
| "loss": 1.1587, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.2519623233908948, | |
| "grad_norm": 0.20974001288414001, | |
| "learning_rate": 7.488226059654632e-06, | |
| "loss": 1.1161, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.25274725274725274, | |
| "grad_norm": 0.1978650540113449, | |
| "learning_rate": 7.480376766091052e-06, | |
| "loss": 1.1743, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.25353218210361067, | |
| "grad_norm": 0.21677835285663605, | |
| "learning_rate": 7.472527472527473e-06, | |
| "loss": 1.1915, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.2543171114599686, | |
| "grad_norm": 0.19082266092300415, | |
| "learning_rate": 7.464678178963893e-06, | |
| "loss": 1.1146, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.25510204081632654, | |
| "grad_norm": 0.20703904330730438, | |
| "learning_rate": 7.4568288854003145e-06, | |
| "loss": 1.1325, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.25588697017268447, | |
| "grad_norm": 0.20791569352149963, | |
| "learning_rate": 7.448979591836736e-06, | |
| "loss": 1.1569, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.2566718995290424, | |
| "grad_norm": 0.2217319905757904, | |
| "learning_rate": 7.441130298273156e-06, | |
| "loss": 1.1823, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.25745682888540034, | |
| "grad_norm": 0.20722460746765137, | |
| "learning_rate": 7.433281004709577e-06, | |
| "loss": 1.1482, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.25824175824175827, | |
| "grad_norm": 0.2060956358909607, | |
| "learning_rate": 7.425431711145998e-06, | |
| "loss": 1.1829, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.25902668759811615, | |
| "grad_norm": 0.21304012835025787, | |
| "learning_rate": 7.417582417582418e-06, | |
| "loss": 1.1984, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.2598116169544741, | |
| "grad_norm": 0.21216687560081482, | |
| "learning_rate": 7.409733124018839e-06, | |
| "loss": 1.1238, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.260596546310832, | |
| "grad_norm": 0.2066863626241684, | |
| "learning_rate": 7.40188383045526e-06, | |
| "loss": 1.1304, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.26138147566718994, | |
| "grad_norm": 0.2035655975341797, | |
| "learning_rate": 7.39403453689168e-06, | |
| "loss": 1.1423, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.2621664050235479, | |
| "grad_norm": 0.2093246579170227, | |
| "learning_rate": 7.386185243328101e-06, | |
| "loss": 1.185, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.2629513343799058, | |
| "grad_norm": 0.20066991448402405, | |
| "learning_rate": 7.378335949764521e-06, | |
| "loss": 1.0917, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.26373626373626374, | |
| "grad_norm": 0.21041403710842133, | |
| "learning_rate": 7.370486656200942e-06, | |
| "loss": 1.1487, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.2645211930926217, | |
| "grad_norm": 0.19787679612636566, | |
| "learning_rate": 7.362637362637364e-06, | |
| "loss": 1.1429, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.2653061224489796, | |
| "grad_norm": 0.2156287282705307, | |
| "learning_rate": 7.3547880690737835e-06, | |
| "loss": 1.1162, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.26609105180533754, | |
| "grad_norm": 0.23158158361911774, | |
| "learning_rate": 7.346938775510205e-06, | |
| "loss": 1.1608, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.2668759811616955, | |
| "grad_norm": 0.21346524357795715, | |
| "learning_rate": 7.339089481946625e-06, | |
| "loss": 1.1092, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.26766091051805335, | |
| "grad_norm": 0.2354096919298172, | |
| "learning_rate": 7.331240188383047e-06, | |
| "loss": 1.1448, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.2684458398744113, | |
| "grad_norm": 0.20579902827739716, | |
| "learning_rate": 7.3233908948194665e-06, | |
| "loss": 1.1627, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.2692307692307692, | |
| "grad_norm": 0.24660111963748932, | |
| "learning_rate": 7.315541601255888e-06, | |
| "loss": 1.1113, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.27001569858712715, | |
| "grad_norm": 0.21383073925971985, | |
| "learning_rate": 7.307692307692308e-06, | |
| "loss": 1.1361, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.2708006279434851, | |
| "grad_norm": 0.2156330794095993, | |
| "learning_rate": 7.299843014128729e-06, | |
| "loss": 1.0608, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.271585557299843, | |
| "grad_norm": 0.20856386423110962, | |
| "learning_rate": 7.29199372056515e-06, | |
| "loss": 1.1545, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.27237048665620095, | |
| "grad_norm": 0.21835865080356598, | |
| "learning_rate": 7.28414442700157e-06, | |
| "loss": 1.1998, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.2731554160125589, | |
| "grad_norm": 0.23513004183769226, | |
| "learning_rate": 7.2762951334379916e-06, | |
| "loss": 1.2048, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.2739403453689168, | |
| "grad_norm": 0.22506913542747498, | |
| "learning_rate": 7.268445839874412e-06, | |
| "loss": 1.1828, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.27472527472527475, | |
| "grad_norm": 0.216568723320961, | |
| "learning_rate": 7.260596546310833e-06, | |
| "loss": 1.1928, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.2755102040816326, | |
| "grad_norm": 0.2345465123653412, | |
| "learning_rate": 7.252747252747253e-06, | |
| "loss": 1.1723, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.27629513343799056, | |
| "grad_norm": 0.21116310358047485, | |
| "learning_rate": 7.244897959183675e-06, | |
| "loss": 1.1362, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.2770800627943485, | |
| "grad_norm": 0.20539937913417816, | |
| "learning_rate": 7.237048665620094e-06, | |
| "loss": 1.1692, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.2778649921507064, | |
| "grad_norm": 0.22223587334156036, | |
| "learning_rate": 7.229199372056516e-06, | |
| "loss": 1.1589, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.27864992150706436, | |
| "grad_norm": 0.20060519874095917, | |
| "learning_rate": 7.2213500784929355e-06, | |
| "loss": 1.1264, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.2794348508634223, | |
| "grad_norm": 0.21847526729106903, | |
| "learning_rate": 7.213500784929357e-06, | |
| "loss": 1.1401, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.2802197802197802, | |
| "grad_norm": 0.22963590919971466, | |
| "learning_rate": 7.205651491365777e-06, | |
| "loss": 1.1523, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.28100470957613816, | |
| "grad_norm": 0.21631349623203278, | |
| "learning_rate": 7.197802197802198e-06, | |
| "loss": 1.1265, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.2817896389324961, | |
| "grad_norm": 0.21508990228176117, | |
| "learning_rate": 7.189952904238619e-06, | |
| "loss": 1.1125, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.282574568288854, | |
| "grad_norm": 0.2024298906326294, | |
| "learning_rate": 7.18210361067504e-06, | |
| "loss": 1.0864, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.28335949764521196, | |
| "grad_norm": 0.21588054299354553, | |
| "learning_rate": 7.174254317111461e-06, | |
| "loss": 1.1265, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.28414442700156983, | |
| "grad_norm": 0.3438448905944824, | |
| "learning_rate": 7.166405023547881e-06, | |
| "loss": 1.1087, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.28492935635792777, | |
| "grad_norm": 0.23037710785865784, | |
| "learning_rate": 7.1585557299843024e-06, | |
| "loss": 1.1683, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.2857142857142857, | |
| "grad_norm": 0.371964693069458, | |
| "learning_rate": 7.150706436420722e-06, | |
| "loss": 1.1306, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.28649921507064363, | |
| "grad_norm": 0.2277277410030365, | |
| "learning_rate": 7.1428571428571436e-06, | |
| "loss": 1.1758, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.28728414442700156, | |
| "grad_norm": 0.20543402433395386, | |
| "learning_rate": 7.135007849293564e-06, | |
| "loss": 1.1501, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.2880690737833595, | |
| "grad_norm": 0.2121914029121399, | |
| "learning_rate": 7.127158555729985e-06, | |
| "loss": 1.1247, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.28885400313971743, | |
| "grad_norm": 0.22225429117679596, | |
| "learning_rate": 7.119309262166405e-06, | |
| "loss": 1.1231, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.28963893249607536, | |
| "grad_norm": 0.21142037212848663, | |
| "learning_rate": 7.111459968602827e-06, | |
| "loss": 1.1528, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.2904238618524333, | |
| "grad_norm": 0.2387160062789917, | |
| "learning_rate": 7.103610675039247e-06, | |
| "loss": 1.1037, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.29120879120879123, | |
| "grad_norm": 0.2219192534685135, | |
| "learning_rate": 7.095761381475668e-06, | |
| "loss": 1.1385, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.29199372056514916, | |
| "grad_norm": 0.21481142938137054, | |
| "learning_rate": 7.087912087912089e-06, | |
| "loss": 1.1199, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.29277864992150704, | |
| "grad_norm": 0.2126331329345703, | |
| "learning_rate": 7.080062794348509e-06, | |
| "loss": 1.1346, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.29356357927786497, | |
| "grad_norm": 0.23164328932762146, | |
| "learning_rate": 7.07221350078493e-06, | |
| "loss": 1.1783, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.2943485086342229, | |
| "grad_norm": 0.23608547449111938, | |
| "learning_rate": 7.06436420722135e-06, | |
| "loss": 1.1549, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.29513343799058084, | |
| "grad_norm": 0.2102809101343155, | |
| "learning_rate": 7.056514913657771e-06, | |
| "loss": 1.144, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.29591836734693877, | |
| "grad_norm": 0.21914629638195038, | |
| "learning_rate": 7.048665620094192e-06, | |
| "loss": 1.1649, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.2967032967032967, | |
| "grad_norm": 0.22110004723072052, | |
| "learning_rate": 7.0408163265306125e-06, | |
| "loss": 1.1096, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.29748822605965464, | |
| "grad_norm": 0.23272880911827087, | |
| "learning_rate": 7.032967032967034e-06, | |
| "loss": 1.17, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.29827315541601257, | |
| "grad_norm": 0.2373398244380951, | |
| "learning_rate": 7.0251177394034545e-06, | |
| "loss": 1.1734, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.2990580847723705, | |
| "grad_norm": 0.20945794880390167, | |
| "learning_rate": 7.017268445839875e-06, | |
| "loss": 1.1192, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.29984301412872844, | |
| "grad_norm": 0.22829462587833405, | |
| "learning_rate": 7.0094191522762956e-06, | |
| "loss": 1.0966, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.30062794348508637, | |
| "grad_norm": 0.28657934069633484, | |
| "learning_rate": 7.001569858712717e-06, | |
| "loss": 1.0827, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.30141287284144425, | |
| "grad_norm": 0.2213151603937149, | |
| "learning_rate": 6.993720565149137e-06, | |
| "loss": 1.1452, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.3021978021978022, | |
| "grad_norm": 0.2172708660364151, | |
| "learning_rate": 6.985871271585558e-06, | |
| "loss": 1.1301, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.3029827315541601, | |
| "grad_norm": 0.21562695503234863, | |
| "learning_rate": 6.978021978021979e-06, | |
| "loss": 1.0969, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.30376766091051804, | |
| "grad_norm": 0.22482970356941223, | |
| "learning_rate": 6.970172684458399e-06, | |
| "loss": 1.0412, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.304552590266876, | |
| "grad_norm": 0.22429226338863373, | |
| "learning_rate": 6.96232339089482e-06, | |
| "loss": 1.1175, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.3053375196232339, | |
| "grad_norm": 0.306858628988266, | |
| "learning_rate": 6.954474097331241e-06, | |
| "loss": 1.1633, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.30612244897959184, | |
| "grad_norm": 0.22035177052021027, | |
| "learning_rate": 6.946624803767662e-06, | |
| "loss": 1.105, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.3069073783359498, | |
| "grad_norm": 0.20838768780231476, | |
| "learning_rate": 6.938775510204082e-06, | |
| "loss": 1.1126, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.3076923076923077, | |
| "grad_norm": 0.24387304484844208, | |
| "learning_rate": 6.930926216640504e-06, | |
| "loss": 1.0718, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.30847723704866564, | |
| "grad_norm": 0.20998525619506836, | |
| "learning_rate": 6.923076923076923e-06, | |
| "loss": 1.1408, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.3092621664050236, | |
| "grad_norm": 0.21903569996356964, | |
| "learning_rate": 6.915227629513345e-06, | |
| "loss": 1.1225, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.31004709576138145, | |
| "grad_norm": 0.21235527098178864, | |
| "learning_rate": 6.9073783359497645e-06, | |
| "loss": 1.0895, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.3108320251177394, | |
| "grad_norm": 0.24162974953651428, | |
| "learning_rate": 6.899529042386186e-06, | |
| "loss": 1.1487, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.3116169544740973, | |
| "grad_norm": 0.22563737630844116, | |
| "learning_rate": 6.8916797488226065e-06, | |
| "loss": 1.1442, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.31240188383045525, | |
| "grad_norm": 0.2697785794734955, | |
| "learning_rate": 6.883830455259027e-06, | |
| "loss": 1.2054, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.3131868131868132, | |
| "grad_norm": 0.20973092317581177, | |
| "learning_rate": 6.8759811616954476e-06, | |
| "loss": 1.1354, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.3139717425431711, | |
| "grad_norm": 0.21411223709583282, | |
| "learning_rate": 6.868131868131869e-06, | |
| "loss": 1.1232, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.31475667189952905, | |
| "grad_norm": 0.2229021191596985, | |
| "learning_rate": 6.8602825745682895e-06, | |
| "loss": 1.0909, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.315541601255887, | |
| "grad_norm": 0.25998759269714355, | |
| "learning_rate": 6.85243328100471e-06, | |
| "loss": 1.1639, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.3163265306122449, | |
| "grad_norm": 0.2209548056125641, | |
| "learning_rate": 6.8445839874411315e-06, | |
| "loss": 1.122, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.31711145996860285, | |
| "grad_norm": 0.2104836255311966, | |
| "learning_rate": 6.836734693877551e-06, | |
| "loss": 1.1052, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.3178963893249607, | |
| "grad_norm": 0.22340314090251923, | |
| "learning_rate": 6.828885400313973e-06, | |
| "loss": 1.1621, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.31868131868131866, | |
| "grad_norm": 0.20940294861793518, | |
| "learning_rate": 6.821036106750393e-06, | |
| "loss": 1.0895, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.3194662480376766, | |
| "grad_norm": 0.23529349267482758, | |
| "learning_rate": 6.813186813186814e-06, | |
| "loss": 1.1796, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.3202511773940345, | |
| "grad_norm": 0.22994717955589294, | |
| "learning_rate": 6.805337519623234e-06, | |
| "loss": 1.1736, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.32103610675039246, | |
| "grad_norm": 0.21799279749393463, | |
| "learning_rate": 6.797488226059656e-06, | |
| "loss": 1.0804, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.3218210361067504, | |
| "grad_norm": 0.22495627403259277, | |
| "learning_rate": 6.789638932496075e-06, | |
| "loss": 1.1605, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.3226059654631083, | |
| "grad_norm": 0.236924409866333, | |
| "learning_rate": 6.781789638932497e-06, | |
| "loss": 1.1081, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.32339089481946626, | |
| "grad_norm": 0.22784234583377838, | |
| "learning_rate": 6.773940345368918e-06, | |
| "loss": 1.092, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.3241758241758242, | |
| "grad_norm": 0.22424954175949097, | |
| "learning_rate": 6.766091051805338e-06, | |
| "loss": 1.1431, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.3249607535321821, | |
| "grad_norm": 0.22108778357505798, | |
| "learning_rate": 6.758241758241759e-06, | |
| "loss": 1.0865, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.32574568288854006, | |
| "grad_norm": 0.22631670534610748, | |
| "learning_rate": 6.750392464678179e-06, | |
| "loss": 1.1481, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.32653061224489793, | |
| "grad_norm": 0.23880915343761444, | |
| "learning_rate": 6.7425431711146e-06, | |
| "loss": 1.1503, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.32731554160125587, | |
| "grad_norm": 0.21916764974594116, | |
| "learning_rate": 6.734693877551021e-06, | |
| "loss": 1.1001, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.3281004709576138, | |
| "grad_norm": 0.22851671278476715, | |
| "learning_rate": 6.7268445839874415e-06, | |
| "loss": 1.0496, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.32888540031397173, | |
| "grad_norm": 0.2205754965543747, | |
| "learning_rate": 6.718995290423862e-06, | |
| "loss": 1.1324, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.32967032967032966, | |
| "grad_norm": 0.2369145303964615, | |
| "learning_rate": 6.7111459968602835e-06, | |
| "loss": 1.1575, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.3304552590266876, | |
| "grad_norm": 0.21567904949188232, | |
| "learning_rate": 6.703296703296703e-06, | |
| "loss": 1.1247, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.33124018838304553, | |
| "grad_norm": 0.22381411492824554, | |
| "learning_rate": 6.695447409733125e-06, | |
| "loss": 1.0777, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.33202511773940346, | |
| "grad_norm": 0.21957136690616608, | |
| "learning_rate": 6.687598116169546e-06, | |
| "loss": 1.0945, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.3328100470957614, | |
| "grad_norm": 0.22392071783542633, | |
| "learning_rate": 6.679748822605966e-06, | |
| "loss": 1.084, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.33359497645211933, | |
| "grad_norm": 0.21616291999816895, | |
| "learning_rate": 6.671899529042387e-06, | |
| "loss": 1.0848, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.33437990580847726, | |
| "grad_norm": 0.2230328768491745, | |
| "learning_rate": 6.664050235478807e-06, | |
| "loss": 1.1058, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.33516483516483514, | |
| "grad_norm": 0.21062208712100983, | |
| "learning_rate": 6.656200941915228e-06, | |
| "loss": 1.0867, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.3359497645211931, | |
| "grad_norm": 0.22089996933937073, | |
| "learning_rate": 6.648351648351649e-06, | |
| "loss": 1.1032, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.336734693877551, | |
| "grad_norm": 0.2328905314207077, | |
| "learning_rate": 6.64050235478807e-06, | |
| "loss": 1.1551, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.33751962323390894, | |
| "grad_norm": 0.23525848984718323, | |
| "learning_rate": 6.63265306122449e-06, | |
| "loss": 1.1529, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.33830455259026687, | |
| "grad_norm": 0.2343936562538147, | |
| "learning_rate": 6.624803767660911e-06, | |
| "loss": 1.1262, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.3390894819466248, | |
| "grad_norm": 0.2223115712404251, | |
| "learning_rate": 6.616954474097331e-06, | |
| "loss": 1.0855, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.33987441130298274, | |
| "grad_norm": 0.24484506249427795, | |
| "learning_rate": 6.609105180533752e-06, | |
| "loss": 1.0972, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.34065934065934067, | |
| "grad_norm": 0.22786875069141388, | |
| "learning_rate": 6.601255886970174e-06, | |
| "loss": 1.0784, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.3414442700156986, | |
| "grad_norm": 0.23298341035842896, | |
| "learning_rate": 6.5934065934065935e-06, | |
| "loss": 1.1613, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.34222919937205654, | |
| "grad_norm": 0.2376134991645813, | |
| "learning_rate": 6.585557299843015e-06, | |
| "loss": 1.0892, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.34301412872841447, | |
| "grad_norm": 0.23039846122264862, | |
| "learning_rate": 6.5777080062794355e-06, | |
| "loss": 1.1073, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.34379905808477235, | |
| "grad_norm": 0.23520535230636597, | |
| "learning_rate": 6.569858712715856e-06, | |
| "loss": 1.0612, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.3445839874411303, | |
| "grad_norm": 0.26625117659568787, | |
| "learning_rate": 6.562009419152277e-06, | |
| "loss": 1.0821, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.3453689167974882, | |
| "grad_norm": 0.22589145600795746, | |
| "learning_rate": 6.554160125588698e-06, | |
| "loss": 1.0766, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.34615384615384615, | |
| "grad_norm": 0.23303988575935364, | |
| "learning_rate": 6.546310832025118e-06, | |
| "loss": 1.1523, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.3469387755102041, | |
| "grad_norm": 0.24992690980434418, | |
| "learning_rate": 6.538461538461539e-06, | |
| "loss": 1.1378, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.347723704866562, | |
| "grad_norm": 0.23018966615200043, | |
| "learning_rate": 6.530612244897959e-06, | |
| "loss": 1.1121, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.34850863422291994, | |
| "grad_norm": 0.23048517107963562, | |
| "learning_rate": 6.52276295133438e-06, | |
| "loss": 1.1031, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.3492935635792779, | |
| "grad_norm": 0.239332914352417, | |
| "learning_rate": 6.514913657770802e-06, | |
| "loss": 1.1592, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.3500784929356358, | |
| "grad_norm": 0.236043319106102, | |
| "learning_rate": 6.507064364207221e-06, | |
| "loss": 1.082, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.35086342229199374, | |
| "grad_norm": 0.22672872245311737, | |
| "learning_rate": 6.499215070643643e-06, | |
| "loss": 1.1188, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.3516483516483517, | |
| "grad_norm": 0.23188328742980957, | |
| "learning_rate": 6.491365777080063e-06, | |
| "loss": 1.1368, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.35243328100470955, | |
| "grad_norm": 0.23719431459903717, | |
| "learning_rate": 6.483516483516485e-06, | |
| "loss": 1.1847, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.3532182103610675, | |
| "grad_norm": 0.23115630447864532, | |
| "learning_rate": 6.4756671899529044e-06, | |
| "loss": 1.1583, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.3540031397174254, | |
| "grad_norm": 0.22967442870140076, | |
| "learning_rate": 6.467817896389326e-06, | |
| "loss": 1.1319, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.35478806907378335, | |
| "grad_norm": 0.22553735971450806, | |
| "learning_rate": 6.4599686028257455e-06, | |
| "loss": 1.0739, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.3555729984301413, | |
| "grad_norm": 0.2251790314912796, | |
| "learning_rate": 6.452119309262167e-06, | |
| "loss": 1.1276, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.3563579277864992, | |
| "grad_norm": 0.22052745521068573, | |
| "learning_rate": 6.4442700156985875e-06, | |
| "loss": 1.1079, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.35714285714285715, | |
| "grad_norm": 0.2503249943256378, | |
| "learning_rate": 6.436420722135008e-06, | |
| "loss": 1.1554, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.3579277864992151, | |
| "grad_norm": 0.23954695463180542, | |
| "learning_rate": 6.4285714285714295e-06, | |
| "loss": 1.1269, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.358712715855573, | |
| "grad_norm": 0.2597452700138092, | |
| "learning_rate": 6.42072213500785e-06, | |
| "loss": 1.0967, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.35949764521193095, | |
| "grad_norm": 0.2288360893726349, | |
| "learning_rate": 6.4128728414442706e-06, | |
| "loss": 1.1297, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.3602825745682888, | |
| "grad_norm": 0.23878303170204163, | |
| "learning_rate": 6.405023547880691e-06, | |
| "loss": 1.1208, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.36106750392464676, | |
| "grad_norm": 0.22452662885189056, | |
| "learning_rate": 6.3971742543171125e-06, | |
| "loss": 1.0891, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.3618524332810047, | |
| "grad_norm": 0.23185127973556519, | |
| "learning_rate": 6.389324960753532e-06, | |
| "loss": 1.058, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.3626373626373626, | |
| "grad_norm": 0.23189175128936768, | |
| "learning_rate": 6.381475667189954e-06, | |
| "loss": 1.0812, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.36342229199372056, | |
| "grad_norm": 0.23450538516044617, | |
| "learning_rate": 6.373626373626373e-06, | |
| "loss": 1.1089, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.3642072213500785, | |
| "grad_norm": 0.22508233785629272, | |
| "learning_rate": 6.365777080062795e-06, | |
| "loss": 1.1062, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.3649921507064364, | |
| "grad_norm": 0.24207262694835663, | |
| "learning_rate": 6.357927786499215e-06, | |
| "loss": 1.1396, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.36577708006279436, | |
| "grad_norm": 0.2606029510498047, | |
| "learning_rate": 6.350078492935636e-06, | |
| "loss": 1.1261, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.3665620094191523, | |
| "grad_norm": 0.22962482273578644, | |
| "learning_rate": 6.342229199372057e-06, | |
| "loss": 1.1354, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.3673469387755102, | |
| "grad_norm": 0.3107023239135742, | |
| "learning_rate": 6.334379905808478e-06, | |
| "loss": 1.1735, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.36813186813186816, | |
| "grad_norm": 0.23643136024475098, | |
| "learning_rate": 6.326530612244899e-06, | |
| "loss": 1.086, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.36891679748822603, | |
| "grad_norm": 0.240147203207016, | |
| "learning_rate": 6.318681318681319e-06, | |
| "loss": 1.1356, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.36970172684458397, | |
| "grad_norm": 0.2438742220401764, | |
| "learning_rate": 6.31083202511774e-06, | |
| "loss": 1.1539, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.3704866562009419, | |
| "grad_norm": 0.2621842920780182, | |
| "learning_rate": 6.30298273155416e-06, | |
| "loss": 1.1623, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.37127158555729983, | |
| "grad_norm": 0.246039479970932, | |
| "learning_rate": 6.2951334379905815e-06, | |
| "loss": 1.1418, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.37205651491365777, | |
| "grad_norm": 0.2392723560333252, | |
| "learning_rate": 6.287284144427002e-06, | |
| "loss": 1.1692, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.3728414442700157, | |
| "grad_norm": 0.23897096514701843, | |
| "learning_rate": 6.279434850863423e-06, | |
| "loss": 1.111, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.37362637362637363, | |
| "grad_norm": 0.24796399474143982, | |
| "learning_rate": 6.271585557299843e-06, | |
| "loss": 1.0847, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.37441130298273156, | |
| "grad_norm": 0.33171799778938293, | |
| "learning_rate": 6.2637362637362645e-06, | |
| "loss": 1.226, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.3751962323390895, | |
| "grad_norm": 0.2655907869338989, | |
| "learning_rate": 6.255886970172685e-06, | |
| "loss": 1.0548, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.37598116169544743, | |
| "grad_norm": 0.2475586235523224, | |
| "learning_rate": 6.248037676609106e-06, | |
| "loss": 1.0914, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.37676609105180536, | |
| "grad_norm": 0.2385740876197815, | |
| "learning_rate": 6.240188383045527e-06, | |
| "loss": 1.0984, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.37755102040816324, | |
| "grad_norm": 0.27485817670822144, | |
| "learning_rate": 6.232339089481947e-06, | |
| "loss": 1.1461, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.3783359497645212, | |
| "grad_norm": 0.23348768055438995, | |
| "learning_rate": 6.224489795918368e-06, | |
| "loss": 1.0617, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.3791208791208791, | |
| "grad_norm": 0.23941084742546082, | |
| "learning_rate": 6.216640502354788e-06, | |
| "loss": 1.104, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.37990580847723704, | |
| "grad_norm": 0.22513240575790405, | |
| "learning_rate": 6.208791208791209e-06, | |
| "loss": 1.0354, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.38069073783359497, | |
| "grad_norm": 0.2495739459991455, | |
| "learning_rate": 6.20094191522763e-06, | |
| "loss": 1.1075, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.3814756671899529, | |
| "grad_norm": 0.23454253375530243, | |
| "learning_rate": 6.19309262166405e-06, | |
| "loss": 1.07, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.38226059654631084, | |
| "grad_norm": 0.2577785849571228, | |
| "learning_rate": 6.185243328100472e-06, | |
| "loss": 1.1229, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.38304552590266877, | |
| "grad_norm": 0.24680796265602112, | |
| "learning_rate": 6.177394034536892e-06, | |
| "loss": 1.1011, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.3838304552590267, | |
| "grad_norm": 0.23051689565181732, | |
| "learning_rate": 6.169544740973314e-06, | |
| "loss": 1.0953, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.38461538461538464, | |
| "grad_norm": 0.25582489371299744, | |
| "learning_rate": 6.1616954474097335e-06, | |
| "loss": 1.0825, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.38540031397174257, | |
| "grad_norm": 0.2379298061132431, | |
| "learning_rate": 6.153846153846155e-06, | |
| "loss": 1.0859, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.38618524332810045, | |
| "grad_norm": 0.2519659996032715, | |
| "learning_rate": 6.145996860282575e-06, | |
| "loss": 1.1339, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.3869701726844584, | |
| "grad_norm": 0.23378105461597443, | |
| "learning_rate": 6.138147566718996e-06, | |
| "loss": 1.0865, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.3877551020408163, | |
| "grad_norm": 0.24967290461063385, | |
| "learning_rate": 6.1302982731554165e-06, | |
| "loss": 1.1257, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.38854003139717425, | |
| "grad_norm": 0.2489061802625656, | |
| "learning_rate": 6.122448979591837e-06, | |
| "loss": 1.108, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.3893249607535322, | |
| "grad_norm": 0.24166519939899445, | |
| "learning_rate": 6.114599686028258e-06, | |
| "loss": 1.1026, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.3901098901098901, | |
| "grad_norm": 0.2487422674894333, | |
| "learning_rate": 6.106750392464679e-06, | |
| "loss": 1.144, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.39089481946624804, | |
| "grad_norm": 0.2340540736913681, | |
| "learning_rate": 6.0989010989011e-06, | |
| "loss": 1.0835, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.391679748822606, | |
| "grad_norm": 0.24633920192718506, | |
| "learning_rate": 6.09105180533752e-06, | |
| "loss": 1.0741, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.3924646781789639, | |
| "grad_norm": 0.23868292570114136, | |
| "learning_rate": 6.0832025117739416e-06, | |
| "loss": 1.0815, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.39324960753532184, | |
| "grad_norm": 0.2488425076007843, | |
| "learning_rate": 6.075353218210361e-06, | |
| "loss": 1.09, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.3940345368916798, | |
| "grad_norm": 0.22781234979629517, | |
| "learning_rate": 6.067503924646783e-06, | |
| "loss": 1.0947, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.39481946624803765, | |
| "grad_norm": 0.24967414140701294, | |
| "learning_rate": 6.059654631083202e-06, | |
| "loss": 1.1331, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.3956043956043956, | |
| "grad_norm": 0.2647199332714081, | |
| "learning_rate": 6.051805337519624e-06, | |
| "loss": 1.1436, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.3963893249607535, | |
| "grad_norm": 0.24577093124389648, | |
| "learning_rate": 6.043956043956044e-06, | |
| "loss": 1.0649, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.39717425431711145, | |
| "grad_norm": 0.23209503293037415, | |
| "learning_rate": 6.036106750392465e-06, | |
| "loss": 1.0968, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.3979591836734694, | |
| "grad_norm": 0.23621460795402527, | |
| "learning_rate": 6.0282574568288855e-06, | |
| "loss": 1.0858, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.3987441130298273, | |
| "grad_norm": 0.23415528237819672, | |
| "learning_rate": 6.020408163265307e-06, | |
| "loss": 1.039, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.39952904238618525, | |
| "grad_norm": 0.24313485622406006, | |
| "learning_rate": 6.012558869701728e-06, | |
| "loss": 1.1233, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.4003139717425432, | |
| "grad_norm": 0.24086087942123413, | |
| "learning_rate": 6.004709576138148e-06, | |
| "loss": 1.0779, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.4010989010989011, | |
| "grad_norm": 0.22834287583827972, | |
| "learning_rate": 5.996860282574569e-06, | |
| "loss": 1.0448, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.40188383045525905, | |
| "grad_norm": 0.2525885999202728, | |
| "learning_rate": 5.989010989010989e-06, | |
| "loss": 1.0905, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.4026687598116169, | |
| "grad_norm": 0.26272568106651306, | |
| "learning_rate": 5.9811616954474105e-06, | |
| "loss": 1.114, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.40345368916797486, | |
| "grad_norm": 0.2448360174894333, | |
| "learning_rate": 5.973312401883831e-06, | |
| "loss": 1.0682, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.4042386185243328, | |
| "grad_norm": 0.24594442546367645, | |
| "learning_rate": 5.965463108320252e-06, | |
| "loss": 1.1067, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.4050235478806907, | |
| "grad_norm": 0.23227113485336304, | |
| "learning_rate": 5.957613814756672e-06, | |
| "loss": 1.0592, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.40580847723704866, | |
| "grad_norm": 0.24381886422634125, | |
| "learning_rate": 5.949764521193094e-06, | |
| "loss": 1.0932, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.4065934065934066, | |
| "grad_norm": 0.2468293309211731, | |
| "learning_rate": 5.941915227629513e-06, | |
| "loss": 1.0975, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.4073783359497645, | |
| "grad_norm": 0.24225494265556335, | |
| "learning_rate": 5.934065934065935e-06, | |
| "loss": 1.095, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.40816326530612246, | |
| "grad_norm": 0.2511250972747803, | |
| "learning_rate": 5.926216640502356e-06, | |
| "loss": 1.1268, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.4089481946624804, | |
| "grad_norm": 0.25592711567878723, | |
| "learning_rate": 5.918367346938776e-06, | |
| "loss": 1.0655, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.4097331240188383, | |
| "grad_norm": 0.2539741098880768, | |
| "learning_rate": 5.910518053375197e-06, | |
| "loss": 1.0909, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.41051805337519626, | |
| "grad_norm": 0.2586928904056549, | |
| "learning_rate": 5.902668759811617e-06, | |
| "loss": 1.102, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.41130298273155413, | |
| "grad_norm": 0.2530655860900879, | |
| "learning_rate": 5.894819466248038e-06, | |
| "loss": 1.1278, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.41208791208791207, | |
| "grad_norm": 0.2644376754760742, | |
| "learning_rate": 5.886970172684459e-06, | |
| "loss": 1.0853, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.41287284144427, | |
| "grad_norm": 0.2579070031642914, | |
| "learning_rate": 5.8791208791208794e-06, | |
| "loss": 1.0258, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.41365777080062793, | |
| "grad_norm": 0.2522946894168854, | |
| "learning_rate": 5.8712715855573e-06, | |
| "loss": 1.0873, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.41444270015698587, | |
| "grad_norm": 0.24585580825805664, | |
| "learning_rate": 5.863422291993721e-06, | |
| "loss": 1.0685, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.4152276295133438, | |
| "grad_norm": 0.23922014236450195, | |
| "learning_rate": 5.855572998430141e-06, | |
| "loss": 1.0501, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.41601255886970173, | |
| "grad_norm": 0.3307192325592041, | |
| "learning_rate": 5.8477237048665625e-06, | |
| "loss": 1.098, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.41679748822605966, | |
| "grad_norm": 0.26279258728027344, | |
| "learning_rate": 5.839874411302984e-06, | |
| "loss": 1.1451, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.4175824175824176, | |
| "grad_norm": 0.24915215373039246, | |
| "learning_rate": 5.832025117739404e-06, | |
| "loss": 1.0702, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.41836734693877553, | |
| "grad_norm": 0.3191397488117218, | |
| "learning_rate": 5.824175824175825e-06, | |
| "loss": 1.1355, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.41915227629513346, | |
| "grad_norm": 0.24395009875297546, | |
| "learning_rate": 5.816326530612246e-06, | |
| "loss": 1.0693, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.41993720565149134, | |
| "grad_norm": 0.2642136812210083, | |
| "learning_rate": 5.808477237048666e-06, | |
| "loss": 1.0774, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.4207221350078493, | |
| "grad_norm": 0.24453726410865784, | |
| "learning_rate": 5.800627943485087e-06, | |
| "loss": 1.0767, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.4215070643642072, | |
| "grad_norm": 0.381024569272995, | |
| "learning_rate": 5.792778649921508e-06, | |
| "loss": 1.1438, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.42229199372056514, | |
| "grad_norm": 0.30179327726364136, | |
| "learning_rate": 5.784929356357928e-06, | |
| "loss": 1.1929, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.4230769230769231, | |
| "grad_norm": 0.24929693341255188, | |
| "learning_rate": 5.777080062794349e-06, | |
| "loss": 1.1482, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.423861852433281, | |
| "grad_norm": 0.26368796825408936, | |
| "learning_rate": 5.769230769230769e-06, | |
| "loss": 1.0321, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.42464678178963894, | |
| "grad_norm": 0.2380351573228836, | |
| "learning_rate": 5.76138147566719e-06, | |
| "loss": 1.1042, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.42543171114599687, | |
| "grad_norm": 0.25883641839027405, | |
| "learning_rate": 5.753532182103612e-06, | |
| "loss": 1.0072, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.4262166405023548, | |
| "grad_norm": 0.23825643956661224, | |
| "learning_rate": 5.7456828885400314e-06, | |
| "loss": 1.0884, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.42700156985871274, | |
| "grad_norm": 0.3005066215991974, | |
| "learning_rate": 5.737833594976453e-06, | |
| "loss": 1.0975, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.42778649921507067, | |
| "grad_norm": 0.24875208735466003, | |
| "learning_rate": 5.729984301412873e-06, | |
| "loss": 1.1003, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.42857142857142855, | |
| "grad_norm": 0.350685715675354, | |
| "learning_rate": 5.722135007849294e-06, | |
| "loss": 1.0723, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.4293563579277865, | |
| "grad_norm": 0.2770320177078247, | |
| "learning_rate": 5.7142857142857145e-06, | |
| "loss": 1.1224, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.4301412872841444, | |
| "grad_norm": 0.24251538515090942, | |
| "learning_rate": 5.706436420722136e-06, | |
| "loss": 1.0548, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.43092621664050235, | |
| "grad_norm": 0.24872933328151703, | |
| "learning_rate": 5.698587127158556e-06, | |
| "loss": 1.0798, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.4317111459968603, | |
| "grad_norm": 0.2544524669647217, | |
| "learning_rate": 5.690737833594977e-06, | |
| "loss": 1.1251, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.4324960753532182, | |
| "grad_norm": 0.24090701341629028, | |
| "learning_rate": 5.682888540031397e-06, | |
| "loss": 1.1367, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.43328100470957615, | |
| "grad_norm": 0.23915883898735046, | |
| "learning_rate": 5.675039246467818e-06, | |
| "loss": 1.0647, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.4340659340659341, | |
| "grad_norm": 0.2448183000087738, | |
| "learning_rate": 5.6671899529042395e-06, | |
| "loss": 1.0755, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.434850863422292, | |
| "grad_norm": 0.25648248195648193, | |
| "learning_rate": 5.65934065934066e-06, | |
| "loss": 1.0525, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.43563579277864994, | |
| "grad_norm": 0.23780952394008636, | |
| "learning_rate": 5.651491365777081e-06, | |
| "loss": 1.0902, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.4364207221350079, | |
| "grad_norm": 0.2836390733718872, | |
| "learning_rate": 5.643642072213501e-06, | |
| "loss": 1.0601, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.43720565149136575, | |
| "grad_norm": 0.24821418523788452, | |
| "learning_rate": 5.635792778649923e-06, | |
| "loss": 1.083, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.4379905808477237, | |
| "grad_norm": 0.25154179334640503, | |
| "learning_rate": 5.627943485086342e-06, | |
| "loss": 1.1134, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.4387755102040816, | |
| "grad_norm": 0.26084184646606445, | |
| "learning_rate": 5.620094191522764e-06, | |
| "loss": 1.111, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.43956043956043955, | |
| "grad_norm": 0.2489292174577713, | |
| "learning_rate": 5.6122448979591834e-06, | |
| "loss": 1.1249, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.4403453689167975, | |
| "grad_norm": 0.2615135610103607, | |
| "learning_rate": 5.604395604395605e-06, | |
| "loss": 1.1283, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.4411302982731554, | |
| "grad_norm": 0.2750528156757355, | |
| "learning_rate": 5.596546310832025e-06, | |
| "loss": 1.0626, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.44191522762951335, | |
| "grad_norm": 0.2562703788280487, | |
| "learning_rate": 5.588697017268446e-06, | |
| "loss": 1.0732, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.4427001569858713, | |
| "grad_norm": 0.25479015707969666, | |
| "learning_rate": 5.580847723704867e-06, | |
| "loss": 1.0509, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.4434850863422292, | |
| "grad_norm": 0.30177152156829834, | |
| "learning_rate": 5.572998430141288e-06, | |
| "loss": 1.0856, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.44427001569858715, | |
| "grad_norm": 0.2459626942873001, | |
| "learning_rate": 5.5651491365777085e-06, | |
| "loss": 1.0828, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.44505494505494503, | |
| "grad_norm": 0.2764773964881897, | |
| "learning_rate": 5.557299843014129e-06, | |
| "loss": 1.1437, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.44583987441130296, | |
| "grad_norm": 0.24312351644039154, | |
| "learning_rate": 5.5494505494505504e-06, | |
| "loss": 1.0679, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.4466248037676609, | |
| "grad_norm": 0.5099506974220276, | |
| "learning_rate": 5.54160125588697e-06, | |
| "loss": 1.1154, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.4474097331240188, | |
| "grad_norm": 0.24318666756153107, | |
| "learning_rate": 5.5337519623233915e-06, | |
| "loss": 1.0725, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.44819466248037676, | |
| "grad_norm": 0.2769014835357666, | |
| "learning_rate": 5.525902668759811e-06, | |
| "loss": 1.1424, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.4489795918367347, | |
| "grad_norm": 0.2566874921321869, | |
| "learning_rate": 5.518053375196233e-06, | |
| "loss": 1.1231, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.4497645211930926, | |
| "grad_norm": 0.27724429965019226, | |
| "learning_rate": 5.510204081632653e-06, | |
| "loss": 1.105, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.45054945054945056, | |
| "grad_norm": 0.28734901547431946, | |
| "learning_rate": 5.502354788069074e-06, | |
| "loss": 1.1076, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.4513343799058085, | |
| "grad_norm": 0.25727584958076477, | |
| "learning_rate": 5.494505494505495e-06, | |
| "loss": 1.0624, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.4521193092621664, | |
| "grad_norm": 0.2726932764053345, | |
| "learning_rate": 5.486656200941916e-06, | |
| "loss": 1.0954, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.45290423861852436, | |
| "grad_norm": 0.249312624335289, | |
| "learning_rate": 5.478806907378337e-06, | |
| "loss": 1.1038, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.45368916797488223, | |
| "grad_norm": 0.2610965669155121, | |
| "learning_rate": 5.470957613814757e-06, | |
| "loss": 1.1033, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.45447409733124017, | |
| "grad_norm": 0.2687593400478363, | |
| "learning_rate": 5.463108320251178e-06, | |
| "loss": 1.1013, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.4552590266875981, | |
| "grad_norm": 0.2867527902126312, | |
| "learning_rate": 5.455259026687598e-06, | |
| "loss": 1.104, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.45604395604395603, | |
| "grad_norm": 0.2627805769443512, | |
| "learning_rate": 5.447409733124019e-06, | |
| "loss": 1.0958, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.45682888540031397, | |
| "grad_norm": 0.25707143545150757, | |
| "learning_rate": 5.43956043956044e-06, | |
| "loss": 1.1073, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.4576138147566719, | |
| "grad_norm": 0.26457729935646057, | |
| "learning_rate": 5.4317111459968605e-06, | |
| "loss": 1.1379, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.45839874411302983, | |
| "grad_norm": 0.25039640069007874, | |
| "learning_rate": 5.423861852433281e-06, | |
| "loss": 1.1107, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.45918367346938777, | |
| "grad_norm": 0.2630631923675537, | |
| "learning_rate": 5.4160125588697024e-06, | |
| "loss": 1.0336, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.4599686028257457, | |
| "grad_norm": 0.2637465298175812, | |
| "learning_rate": 5.408163265306123e-06, | |
| "loss": 1.1183, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.46075353218210363, | |
| "grad_norm": 0.2565993368625641, | |
| "learning_rate": 5.4003139717425436e-06, | |
| "loss": 1.0769, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.46153846153846156, | |
| "grad_norm": 0.2511787414550781, | |
| "learning_rate": 5.392464678178965e-06, | |
| "loss": 1.1005, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.46232339089481944, | |
| "grad_norm": 0.24590060114860535, | |
| "learning_rate": 5.384615384615385e-06, | |
| "loss": 1.1103, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.4631083202511774, | |
| "grad_norm": 0.26895543932914734, | |
| "learning_rate": 5.376766091051806e-06, | |
| "loss": 1.0971, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.4638932496075353, | |
| "grad_norm": 0.25688987970352173, | |
| "learning_rate": 5.368916797488226e-06, | |
| "loss": 1.0973, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.46467817896389324, | |
| "grad_norm": 0.2657226026058197, | |
| "learning_rate": 5.361067503924647e-06, | |
| "loss": 1.124, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.4654631083202512, | |
| "grad_norm": 0.2537216544151306, | |
| "learning_rate": 5.353218210361068e-06, | |
| "loss": 1.0958, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.4662480376766091, | |
| "grad_norm": 0.32557958364486694, | |
| "learning_rate": 5.345368916797488e-06, | |
| "loss": 1.1469, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.46703296703296704, | |
| "grad_norm": 0.25399667024612427, | |
| "learning_rate": 5.33751962323391e-06, | |
| "loss": 1.0587, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.46781789638932497, | |
| "grad_norm": 0.26794350147247314, | |
| "learning_rate": 5.32967032967033e-06, | |
| "loss": 1.058, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.4686028257456829, | |
| "grad_norm": 0.2695688009262085, | |
| "learning_rate": 5.321821036106752e-06, | |
| "loss": 1.1169, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.46938775510204084, | |
| "grad_norm": 0.2774251103401184, | |
| "learning_rate": 5.313971742543171e-06, | |
| "loss": 1.1368, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.47017268445839877, | |
| "grad_norm": 0.25040146708488464, | |
| "learning_rate": 5.306122448979593e-06, | |
| "loss": 1.0831, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.47095761381475665, | |
| "grad_norm": 0.2619199752807617, | |
| "learning_rate": 5.2982731554160125e-06, | |
| "loss": 1.0354, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.4717425431711146, | |
| "grad_norm": 0.25245144963264465, | |
| "learning_rate": 5.290423861852434e-06, | |
| "loss": 1.0487, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.4725274725274725, | |
| "grad_norm": 0.25916045904159546, | |
| "learning_rate": 5.2825745682888544e-06, | |
| "loss": 1.0987, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.47331240188383045, | |
| "grad_norm": 0.2599029541015625, | |
| "learning_rate": 5.274725274725275e-06, | |
| "loss": 1.0945, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.4740973312401884, | |
| "grad_norm": 0.27398571372032166, | |
| "learning_rate": 5.2668759811616956e-06, | |
| "loss": 1.0602, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.4748822605965463, | |
| "grad_norm": 0.24853894114494324, | |
| "learning_rate": 5.259026687598117e-06, | |
| "loss": 1.0169, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.47566718995290425, | |
| "grad_norm": 0.24999196827411652, | |
| "learning_rate": 5.2511773940345375e-06, | |
| "loss": 1.0557, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.4764521193092622, | |
| "grad_norm": 0.2587362229824066, | |
| "learning_rate": 5.243328100470958e-06, | |
| "loss": 1.0832, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.4772370486656201, | |
| "grad_norm": 0.255636990070343, | |
| "learning_rate": 5.2354788069073795e-06, | |
| "loss": 1.0378, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.47802197802197804, | |
| "grad_norm": 0.2529115080833435, | |
| "learning_rate": 5.227629513343799e-06, | |
| "loss": 1.0544, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.478806907378336, | |
| "grad_norm": 0.3072440028190613, | |
| "learning_rate": 5.219780219780221e-06, | |
| "loss": 1.1386, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.47959183673469385, | |
| "grad_norm": 0.24107390642166138, | |
| "learning_rate": 5.21193092621664e-06, | |
| "loss": 1.0658, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.4803767660910518, | |
| "grad_norm": 0.2693899869918823, | |
| "learning_rate": 5.204081632653062e-06, | |
| "loss": 1.0966, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.4811616954474097, | |
| "grad_norm": 0.2495475560426712, | |
| "learning_rate": 5.196232339089482e-06, | |
| "loss": 1.087, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.48194662480376765, | |
| "grad_norm": 0.2680012285709381, | |
| "learning_rate": 5.188383045525903e-06, | |
| "loss": 1.092, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.4827315541601256, | |
| "grad_norm": 0.2574861943721771, | |
| "learning_rate": 5.180533751962323e-06, | |
| "loss": 1.089, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.4835164835164835, | |
| "grad_norm": 0.2980441153049469, | |
| "learning_rate": 5.172684458398745e-06, | |
| "loss": 1.0895, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.48430141287284145, | |
| "grad_norm": 0.2533935606479645, | |
| "learning_rate": 5.164835164835166e-06, | |
| "loss": 1.0627, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.4850863422291994, | |
| "grad_norm": 0.2828797399997711, | |
| "learning_rate": 5.156985871271586e-06, | |
| "loss": 1.0874, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.4858712715855573, | |
| "grad_norm": 0.26118507981300354, | |
| "learning_rate": 5.149136577708007e-06, | |
| "loss": 1.0304, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.48665620094191525, | |
| "grad_norm": 0.277045339345932, | |
| "learning_rate": 5.141287284144427e-06, | |
| "loss": 1.0492, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.48744113029827313, | |
| "grad_norm": 0.2713971734046936, | |
| "learning_rate": 5.133437990580848e-06, | |
| "loss": 1.1037, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.48822605965463106, | |
| "grad_norm": 0.27905189990997314, | |
| "learning_rate": 5.125588697017269e-06, | |
| "loss": 1.0899, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.489010989010989, | |
| "grad_norm": 0.2574610710144043, | |
| "learning_rate": 5.1177394034536895e-06, | |
| "loss": 1.0842, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.4897959183673469, | |
| "grad_norm": 0.25002968311309814, | |
| "learning_rate": 5.10989010989011e-06, | |
| "loss": 1.0707, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.49058084772370486, | |
| "grad_norm": 0.25703486800193787, | |
| "learning_rate": 5.1020408163265315e-06, | |
| "loss": 1.0427, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.4913657770800628, | |
| "grad_norm": 0.26097506284713745, | |
| "learning_rate": 5.094191522762951e-06, | |
| "loss": 1.0554, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.4921507064364207, | |
| "grad_norm": 0.25719407200813293, | |
| "learning_rate": 5.086342229199373e-06, | |
| "loss": 1.0612, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.49293563579277866, | |
| "grad_norm": 0.25571000576019287, | |
| "learning_rate": 5.078492935635794e-06, | |
| "loss": 1.0471, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.4937205651491366, | |
| "grad_norm": 0.2634165287017822, | |
| "learning_rate": 5.070643642072214e-06, | |
| "loss": 1.0811, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.4945054945054945, | |
| "grad_norm": 0.247939795255661, | |
| "learning_rate": 5.062794348508635e-06, | |
| "loss": 1.0038, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.49529042386185246, | |
| "grad_norm": 0.27619796991348267, | |
| "learning_rate": 5.054945054945055e-06, | |
| "loss": 1.1057, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.49607535321821034, | |
| "grad_norm": 0.26260074973106384, | |
| "learning_rate": 5.047095761381476e-06, | |
| "loss": 1.1193, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.49686028257456827, | |
| "grad_norm": 0.258558988571167, | |
| "learning_rate": 5.039246467817897e-06, | |
| "loss": 1.0687, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.4976452119309262, | |
| "grad_norm": 0.26512664556503296, | |
| "learning_rate": 5.031397174254317e-06, | |
| "loss": 1.0561, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.49843014128728413, | |
| "grad_norm": 0.2602575421333313, | |
| "learning_rate": 5.023547880690738e-06, | |
| "loss": 1.0438, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.49921507064364207, | |
| "grad_norm": 0.2699330151081085, | |
| "learning_rate": 5.015698587127159e-06, | |
| "loss": 1.0989, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 0.27960050106048584, | |
| "learning_rate": 5.007849293563579e-06, | |
| "loss": 1.0354, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.5007849293563579, | |
| "grad_norm": 0.287338525056839, | |
| "learning_rate": 5e-06, | |
| "loss": 1.1538, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.5015698587127159, | |
| "grad_norm": 0.26822763681411743, | |
| "learning_rate": 4.992150706436421e-06, | |
| "loss": 1.0374, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.5023547880690737, | |
| "grad_norm": 0.2564684748649597, | |
| "learning_rate": 4.9843014128728415e-06, | |
| "loss": 1.0611, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.5031397174254317, | |
| "grad_norm": 0.2621293365955353, | |
| "learning_rate": 4.976452119309262e-06, | |
| "loss": 1.0946, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.5039246467817896, | |
| "grad_norm": 0.2555181086063385, | |
| "learning_rate": 4.9686028257456835e-06, | |
| "loss": 1.0753, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.5047095761381476, | |
| "grad_norm": 0.27201956510543823, | |
| "learning_rate": 4.960753532182104e-06, | |
| "loss": 1.0945, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.5054945054945055, | |
| "grad_norm": 0.2534913420677185, | |
| "learning_rate": 4.952904238618525e-06, | |
| "loss": 1.0746, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.5062794348508635, | |
| "grad_norm": 0.25126805901527405, | |
| "learning_rate": 4.945054945054946e-06, | |
| "loss": 1.0589, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.5070643642072213, | |
| "grad_norm": 0.27167466282844543, | |
| "learning_rate": 4.9372056514913666e-06, | |
| "loss": 1.1345, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.5078492935635793, | |
| "grad_norm": 0.25426211953163147, | |
| "learning_rate": 4.929356357927787e-06, | |
| "loss": 1.0779, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.5086342229199372, | |
| "grad_norm": 0.2680758237838745, | |
| "learning_rate": 4.921507064364208e-06, | |
| "loss": 1.0908, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.5094191522762951, | |
| "grad_norm": 0.26956409215927124, | |
| "learning_rate": 4.913657770800628e-06, | |
| "loss": 1.0492, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.5102040816326531, | |
| "grad_norm": 0.2579835057258606, | |
| "learning_rate": 4.905808477237049e-06, | |
| "loss": 1.0819, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.510989010989011, | |
| "grad_norm": 0.2698979675769806, | |
| "learning_rate": 4.897959183673469e-06, | |
| "loss": 1.069, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.5117739403453689, | |
| "grad_norm": 0.27058646082878113, | |
| "learning_rate": 4.890109890109891e-06, | |
| "loss": 1.0652, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.5125588697017268, | |
| "grad_norm": 0.28998565673828125, | |
| "learning_rate": 4.882260596546311e-06, | |
| "loss": 1.1569, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.5133437990580848, | |
| "grad_norm": 0.2735849916934967, | |
| "learning_rate": 4.874411302982732e-06, | |
| "loss": 1.084, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.5141287284144427, | |
| "grad_norm": 0.2719517648220062, | |
| "learning_rate": 4.866562009419153e-06, | |
| "loss": 1.0997, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.5149136577708007, | |
| "grad_norm": 0.2657535672187805, | |
| "learning_rate": 4.858712715855574e-06, | |
| "loss": 1.0977, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.5156985871271585, | |
| "grad_norm": 0.2546514570713043, | |
| "learning_rate": 4.850863422291994e-06, | |
| "loss": 1.0897, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.5164835164835165, | |
| "grad_norm": 0.2730329632759094, | |
| "learning_rate": 4.843014128728415e-06, | |
| "loss": 1.1082, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.5172684458398744, | |
| "grad_norm": 0.2657451331615448, | |
| "learning_rate": 4.8351648351648355e-06, | |
| "loss": 1.0823, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.5180533751962323, | |
| "grad_norm": 0.28653398156166077, | |
| "learning_rate": 4.827315541601256e-06, | |
| "loss": 1.0555, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.5188383045525903, | |
| "grad_norm": 0.2710787355899811, | |
| "learning_rate": 4.819466248037677e-06, | |
| "loss": 1.0385, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.5196232339089482, | |
| "grad_norm": 0.26727956533432007, | |
| "learning_rate": 4.811616954474098e-06, | |
| "loss": 1.068, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.5204081632653061, | |
| "grad_norm": 0.25900620222091675, | |
| "learning_rate": 4.8037676609105186e-06, | |
| "loss": 1.0889, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.521193092621664, | |
| "grad_norm": 0.26514261960983276, | |
| "learning_rate": 4.795918367346939e-06, | |
| "loss": 1.0978, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.521978021978022, | |
| "grad_norm": 0.29534074664115906, | |
| "learning_rate": 4.7880690737833605e-06, | |
| "loss": 1.0289, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.5227629513343799, | |
| "grad_norm": 0.2783236801624298, | |
| "learning_rate": 4.780219780219781e-06, | |
| "loss": 1.1002, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.5235478806907379, | |
| "grad_norm": 0.2634176015853882, | |
| "learning_rate": 4.772370486656202e-06, | |
| "loss": 1.0518, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.5243328100470958, | |
| "grad_norm": 0.27170518040657043, | |
| "learning_rate": 4.764521193092622e-06, | |
| "loss": 1.0823, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.5251177394034537, | |
| "grad_norm": 0.26880884170532227, | |
| "learning_rate": 4.756671899529043e-06, | |
| "loss": 1.0384, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.5259026687598116, | |
| "grad_norm": 0.27249330282211304, | |
| "learning_rate": 4.748822605965463e-06, | |
| "loss": 1.0706, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.5266875981161695, | |
| "grad_norm": 0.2680225074291229, | |
| "learning_rate": 4.740973312401884e-06, | |
| "loss": 1.0794, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.5274725274725275, | |
| "grad_norm": 0.27644068002700806, | |
| "learning_rate": 4.733124018838305e-06, | |
| "loss": 1.1037, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.5282574568288854, | |
| "grad_norm": 0.2654259204864502, | |
| "learning_rate": 4.725274725274726e-06, | |
| "loss": 1.0524, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.5290423861852434, | |
| "grad_norm": 0.2548593282699585, | |
| "learning_rate": 4.717425431711146e-06, | |
| "loss": 1.0596, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.5298273155416012, | |
| "grad_norm": 0.262279748916626, | |
| "learning_rate": 4.709576138147567e-06, | |
| "loss": 1.0731, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.5306122448979592, | |
| "grad_norm": 0.3365892767906189, | |
| "learning_rate": 4.701726844583988e-06, | |
| "loss": 1.1082, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.5313971742543171, | |
| "grad_norm": 0.25070279836654663, | |
| "learning_rate": 4.693877551020409e-06, | |
| "loss": 1.0554, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.5321821036106751, | |
| "grad_norm": 0.3416726291179657, | |
| "learning_rate": 4.6860282574568294e-06, | |
| "loss": 1.0501, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.532967032967033, | |
| "grad_norm": 0.2714819610118866, | |
| "learning_rate": 4.67817896389325e-06, | |
| "loss": 1.0317, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.533751962323391, | |
| "grad_norm": 0.2663348615169525, | |
| "learning_rate": 4.6703296703296706e-06, | |
| "loss": 1.0699, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.5345368916797488, | |
| "grad_norm": 0.26617753505706787, | |
| "learning_rate": 4.662480376766091e-06, | |
| "loss": 1.0755, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.5353218210361067, | |
| "grad_norm": 0.25647595524787903, | |
| "learning_rate": 4.6546310832025125e-06, | |
| "loss": 1.0746, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.5361067503924647, | |
| "grad_norm": 0.2799610197544098, | |
| "learning_rate": 4.646781789638933e-06, | |
| "loss": 1.0977, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.5368916797488226, | |
| "grad_norm": 0.26751118898391724, | |
| "learning_rate": 4.638932496075354e-06, | |
| "loss": 1.0633, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.5376766091051806, | |
| "grad_norm": 0.28023260831832886, | |
| "learning_rate": 4.631083202511774e-06, | |
| "loss": 1.0927, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.5384615384615384, | |
| "grad_norm": 0.27010107040405273, | |
| "learning_rate": 4.623233908948195e-06, | |
| "loss": 1.0869, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.5392464678178964, | |
| "grad_norm": 0.2625042796134949, | |
| "learning_rate": 4.615384615384616e-06, | |
| "loss": 1.0615, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.5400313971742543, | |
| "grad_norm": 0.2663073241710663, | |
| "learning_rate": 4.607535321821037e-06, | |
| "loss": 1.0473, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.5408163265306123, | |
| "grad_norm": 0.26662012934684753, | |
| "learning_rate": 4.599686028257457e-06, | |
| "loss": 1.0611, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.5416012558869702, | |
| "grad_norm": 0.2595308721065521, | |
| "learning_rate": 4.591836734693878e-06, | |
| "loss": 1.0645, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.542386185243328, | |
| "grad_norm": 0.286700040102005, | |
| "learning_rate": 4.583987441130298e-06, | |
| "loss": 1.0211, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.543171114599686, | |
| "grad_norm": 0.2713211476802826, | |
| "learning_rate": 4.57613814756672e-06, | |
| "loss": 1.0621, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.5439560439560439, | |
| "grad_norm": 0.2682141363620758, | |
| "learning_rate": 4.56828885400314e-06, | |
| "loss": 1.0571, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.5447409733124019, | |
| "grad_norm": 0.2777917683124542, | |
| "learning_rate": 4.560439560439561e-06, | |
| "loss": 1.0733, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.5455259026687598, | |
| "grad_norm": 0.25854945182800293, | |
| "learning_rate": 4.5525902668759815e-06, | |
| "loss": 1.0926, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.5463108320251178, | |
| "grad_norm": 0.2838626801967621, | |
| "learning_rate": 4.544740973312402e-06, | |
| "loss": 1.0707, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.5470957613814756, | |
| "grad_norm": 0.26979881525039673, | |
| "learning_rate": 4.5368916797488226e-06, | |
| "loss": 1.0949, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.5478806907378336, | |
| "grad_norm": 0.2945154309272766, | |
| "learning_rate": 4.529042386185244e-06, | |
| "loss": 1.1497, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.5486656200941915, | |
| "grad_norm": 0.25463616847991943, | |
| "learning_rate": 4.5211930926216645e-06, | |
| "loss": 1.0762, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.5494505494505495, | |
| "grad_norm": 0.2613489031791687, | |
| "learning_rate": 4.513343799058085e-06, | |
| "loss": 1.0629, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.5502354788069074, | |
| "grad_norm": 0.2718147039413452, | |
| "learning_rate": 4.505494505494506e-06, | |
| "loss": 1.1294, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.5510204081632653, | |
| "grad_norm": 0.2775886654853821, | |
| "learning_rate": 4.497645211930927e-06, | |
| "loss": 1.0303, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.5518053375196232, | |
| "grad_norm": 0.2916508913040161, | |
| "learning_rate": 4.489795918367348e-06, | |
| "loss": 1.0797, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.5525902668759811, | |
| "grad_norm": 0.2996635138988495, | |
| "learning_rate": 4.481946624803768e-06, | |
| "loss": 1.0696, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.5533751962323391, | |
| "grad_norm": 0.31456199288368225, | |
| "learning_rate": 4.474097331240189e-06, | |
| "loss": 1.0751, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.554160125588697, | |
| "grad_norm": 0.26043495535850525, | |
| "learning_rate": 4.466248037676609e-06, | |
| "loss": 1.0588, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.554945054945055, | |
| "grad_norm": 0.27495190501213074, | |
| "learning_rate": 4.45839874411303e-06, | |
| "loss": 1.0479, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.5557299843014128, | |
| "grad_norm": 0.2717645764350891, | |
| "learning_rate": 4.45054945054945e-06, | |
| "loss": 1.0718, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.5565149136577708, | |
| "grad_norm": 0.29588571190834045, | |
| "learning_rate": 4.442700156985872e-06, | |
| "loss": 1.0585, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.5572998430141287, | |
| "grad_norm": 0.2885316014289856, | |
| "learning_rate": 4.434850863422292e-06, | |
| "loss": 1.0565, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.5580847723704867, | |
| "grad_norm": 0.2899274230003357, | |
| "learning_rate": 4.427001569858713e-06, | |
| "loss": 1.0994, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.5588697017268446, | |
| "grad_norm": 0.27042970061302185, | |
| "learning_rate": 4.419152276295134e-06, | |
| "loss": 1.0642, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.5596546310832025, | |
| "grad_norm": 0.2861323654651642, | |
| "learning_rate": 4.411302982731555e-06, | |
| "loss": 1.0588, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.5604395604395604, | |
| "grad_norm": 0.2717403471469879, | |
| "learning_rate": 4.403453689167975e-06, | |
| "loss": 1.0684, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.5612244897959183, | |
| "grad_norm": 0.2660480737686157, | |
| "learning_rate": 4.395604395604396e-06, | |
| "loss": 1.0709, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.5620094191522763, | |
| "grad_norm": 0.27813735604286194, | |
| "learning_rate": 4.3877551020408165e-06, | |
| "loss": 1.0424, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.5627943485086342, | |
| "grad_norm": 0.27436062693595886, | |
| "learning_rate": 4.379905808477237e-06, | |
| "loss": 1.118, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.5635792778649922, | |
| "grad_norm": 0.2999591827392578, | |
| "learning_rate": 4.372056514913658e-06, | |
| "loss": 1.0542, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.5643642072213501, | |
| "grad_norm": 0.2673451006412506, | |
| "learning_rate": 4.364207221350079e-06, | |
| "loss": 1.0464, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.565149136577708, | |
| "grad_norm": 0.27569347620010376, | |
| "learning_rate": 4.3563579277865e-06, | |
| "loss": 1.1031, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.5659340659340659, | |
| "grad_norm": 0.30383068323135376, | |
| "learning_rate": 4.34850863422292e-06, | |
| "loss": 1.145, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 0.5667189952904239, | |
| "grad_norm": 0.2751275300979614, | |
| "learning_rate": 4.340659340659341e-06, | |
| "loss": 1.0859, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.5675039246467818, | |
| "grad_norm": 0.27169832587242126, | |
| "learning_rate": 4.332810047095762e-06, | |
| "loss": 1.0594, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.5682888540031397, | |
| "grad_norm": 0.28917625546455383, | |
| "learning_rate": 4.324960753532183e-06, | |
| "loss": 1.0823, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.5690737833594977, | |
| "grad_norm": 0.2915303409099579, | |
| "learning_rate": 4.317111459968603e-06, | |
| "loss": 1.0867, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.5698587127158555, | |
| "grad_norm": 0.2857419550418854, | |
| "learning_rate": 4.309262166405024e-06, | |
| "loss": 1.0843, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.5706436420722135, | |
| "grad_norm": 0.2684113085269928, | |
| "learning_rate": 4.301412872841444e-06, | |
| "loss": 1.0595, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.5714285714285714, | |
| "grad_norm": 0.2712114155292511, | |
| "learning_rate": 4.293563579277865e-06, | |
| "loss": 1.0989, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.5722135007849294, | |
| "grad_norm": 0.2832731604576111, | |
| "learning_rate": 4.2857142857142855e-06, | |
| "loss": 1.0498, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.5729984301412873, | |
| "grad_norm": 0.44053250551223755, | |
| "learning_rate": 4.277864992150707e-06, | |
| "loss": 1.0979, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.5737833594976453, | |
| "grad_norm": 0.27188801765441895, | |
| "learning_rate": 4.270015698587127e-06, | |
| "loss": 1.0941, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.5745682888540031, | |
| "grad_norm": 0.28184184432029724, | |
| "learning_rate": 4.262166405023548e-06, | |
| "loss": 1.1324, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.5753532182103611, | |
| "grad_norm": 0.27184492349624634, | |
| "learning_rate": 4.254317111459969e-06, | |
| "loss": 1.0883, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 0.576138147566719, | |
| "grad_norm": 0.2704511284828186, | |
| "learning_rate": 4.24646781789639e-06, | |
| "loss": 1.0746, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.5769230769230769, | |
| "grad_norm": 0.2777252793312073, | |
| "learning_rate": 4.2386185243328105e-06, | |
| "loss": 1.0202, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.5777080062794349, | |
| "grad_norm": 0.279325932264328, | |
| "learning_rate": 4.230769230769231e-06, | |
| "loss": 1.0334, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.5784929356357927, | |
| "grad_norm": 0.3658686876296997, | |
| "learning_rate": 4.222919937205652e-06, | |
| "loss": 1.0314, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 0.5792778649921507, | |
| "grad_norm": 0.2697162926197052, | |
| "learning_rate": 4.215070643642072e-06, | |
| "loss": 1.0703, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.5800627943485086, | |
| "grad_norm": 0.27418413758277893, | |
| "learning_rate": 4.207221350078493e-06, | |
| "loss": 1.0345, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.5808477237048666, | |
| "grad_norm": 0.2849864065647125, | |
| "learning_rate": 4.199372056514914e-06, | |
| "loss": 1.0609, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.5816326530612245, | |
| "grad_norm": 0.29697927832603455, | |
| "learning_rate": 4.191522762951335e-06, | |
| "loss": 1.0578, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 0.5824175824175825, | |
| "grad_norm": 0.2827671766281128, | |
| "learning_rate": 4.183673469387755e-06, | |
| "loss": 1.0264, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.5832025117739403, | |
| "grad_norm": 0.26230207085609436, | |
| "learning_rate": 4.175824175824177e-06, | |
| "loss": 1.0614, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 0.5839874411302983, | |
| "grad_norm": 0.28049588203430176, | |
| "learning_rate": 4.167974882260597e-06, | |
| "loss": 1.0784, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.5847723704866562, | |
| "grad_norm": 0.2954070270061493, | |
| "learning_rate": 4.160125588697018e-06, | |
| "loss": 1.1016, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.5855572998430141, | |
| "grad_norm": 0.3010290861129761, | |
| "learning_rate": 4.152276295133438e-06, | |
| "loss": 1.0409, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.5863422291993721, | |
| "grad_norm": 0.26996880769729614, | |
| "learning_rate": 4.144427001569859e-06, | |
| "loss": 1.0828, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 0.5871271585557299, | |
| "grad_norm": 0.27959656715393066, | |
| "learning_rate": 4.1365777080062794e-06, | |
| "loss": 0.9985, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.5879120879120879, | |
| "grad_norm": 0.26965901255607605, | |
| "learning_rate": 4.1287284144427e-06, | |
| "loss": 1.0649, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.5886970172684458, | |
| "grad_norm": 0.2751823365688324, | |
| "learning_rate": 4.120879120879121e-06, | |
| "loss": 1.0422, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.5894819466248038, | |
| "grad_norm": 0.27731597423553467, | |
| "learning_rate": 4.113029827315542e-06, | |
| "loss": 1.0927, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 0.5902668759811617, | |
| "grad_norm": 0.2711530029773712, | |
| "learning_rate": 4.1051805337519625e-06, | |
| "loss": 1.055, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.5910518053375197, | |
| "grad_norm": 0.28580254316329956, | |
| "learning_rate": 4.097331240188384e-06, | |
| "loss": 1.0859, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 0.5918367346938775, | |
| "grad_norm": 0.2740454077720642, | |
| "learning_rate": 4.0894819466248045e-06, | |
| "loss": 1.0285, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.5926216640502355, | |
| "grad_norm": 0.28732219338417053, | |
| "learning_rate": 4.081632653061225e-06, | |
| "loss": 1.0953, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.5934065934065934, | |
| "grad_norm": 0.2803926467895508, | |
| "learning_rate": 4.0737833594976456e-06, | |
| "loss": 1.0135, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.5941915227629513, | |
| "grad_norm": 0.2755890488624573, | |
| "learning_rate": 4.065934065934066e-06, | |
| "loss": 1.0422, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 0.5949764521193093, | |
| "grad_norm": 0.28653979301452637, | |
| "learning_rate": 4.058084772370487e-06, | |
| "loss": 1.0301, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.5957613814756672, | |
| "grad_norm": 0.2685067355632782, | |
| "learning_rate": 4.050235478806907e-06, | |
| "loss": 1.03, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.5965463108320251, | |
| "grad_norm": 0.2731209397315979, | |
| "learning_rate": 4.042386185243329e-06, | |
| "loss": 1.0346, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.597331240188383, | |
| "grad_norm": 0.29851043224334717, | |
| "learning_rate": 4.034536891679749e-06, | |
| "loss": 1.0961, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 0.598116169544741, | |
| "grad_norm": 0.28820693492889404, | |
| "learning_rate": 4.02668759811617e-06, | |
| "loss": 1.0942, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.5989010989010989, | |
| "grad_norm": 0.28037169575691223, | |
| "learning_rate": 4.018838304552591e-06, | |
| "loss": 1.0647, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.5996860282574569, | |
| "grad_norm": 0.27676883339881897, | |
| "learning_rate": 4.010989010989012e-06, | |
| "loss": 1.0605, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.6004709576138147, | |
| "grad_norm": 0.274884432554245, | |
| "learning_rate": 4.003139717425432e-06, | |
| "loss": 1.0577, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.6012558869701727, | |
| "grad_norm": 0.2712654173374176, | |
| "learning_rate": 3.995290423861853e-06, | |
| "loss": 1.0871, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.6020408163265306, | |
| "grad_norm": 0.2694210708141327, | |
| "learning_rate": 3.987441130298273e-06, | |
| "loss": 1.0412, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 0.6028257456828885, | |
| "grad_norm": 0.2664571702480316, | |
| "learning_rate": 3.979591836734694e-06, | |
| "loss": 1.0413, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.6036106750392465, | |
| "grad_norm": 0.2823816239833832, | |
| "learning_rate": 3.9717425431711145e-06, | |
| "loss": 1.0511, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 0.6043956043956044, | |
| "grad_norm": 0.27906733751296997, | |
| "learning_rate": 3.963893249607536e-06, | |
| "loss": 1.059, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.6051805337519623, | |
| "grad_norm": 0.26882991194725037, | |
| "learning_rate": 3.9560439560439565e-06, | |
| "loss": 1.0445, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 0.6059654631083202, | |
| "grad_norm": 0.273629754781723, | |
| "learning_rate": 3.948194662480377e-06, | |
| "loss": 1.0566, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.6067503924646782, | |
| "grad_norm": 0.28593695163726807, | |
| "learning_rate": 3.940345368916798e-06, | |
| "loss": 1.0857, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 0.6075353218210361, | |
| "grad_norm": 0.2721453309059143, | |
| "learning_rate": 3.932496075353219e-06, | |
| "loss": 1.0474, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.6083202511773941, | |
| "grad_norm": 0.2677747905254364, | |
| "learning_rate": 3.9246467817896395e-06, | |
| "loss": 1.0293, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.609105180533752, | |
| "grad_norm": 0.27768194675445557, | |
| "learning_rate": 3.91679748822606e-06, | |
| "loss": 1.0645, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.6098901098901099, | |
| "grad_norm": 0.2910935878753662, | |
| "learning_rate": 3.908948194662481e-06, | |
| "loss": 1.122, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 0.6106750392464678, | |
| "grad_norm": 0.2711617946624756, | |
| "learning_rate": 3.901098901098901e-06, | |
| "loss": 1.0663, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.6114599686028257, | |
| "grad_norm": 0.2801941931247711, | |
| "learning_rate": 3.893249607535322e-06, | |
| "loss": 1.057, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 0.6122448979591837, | |
| "grad_norm": 0.2923711836338043, | |
| "learning_rate": 3.885400313971743e-06, | |
| "loss": 1.1437, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.6130298273155416, | |
| "grad_norm": 0.2791869342327118, | |
| "learning_rate": 3.877551020408164e-06, | |
| "loss": 1.0828, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 0.6138147566718996, | |
| "grad_norm": 0.34363803267478943, | |
| "learning_rate": 3.869701726844584e-06, | |
| "loss": 1.0993, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.6145996860282574, | |
| "grad_norm": 0.2742525041103363, | |
| "learning_rate": 3.861852433281005e-06, | |
| "loss": 1.0626, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 0.6153846153846154, | |
| "grad_norm": 0.2778567671775818, | |
| "learning_rate": 3.854003139717426e-06, | |
| "loss": 1.0768, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.6161695447409733, | |
| "grad_norm": 0.274810791015625, | |
| "learning_rate": 3.846153846153847e-06, | |
| "loss": 1.07, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.6169544740973313, | |
| "grad_norm": 0.33274561166763306, | |
| "learning_rate": 3.838304552590267e-06, | |
| "loss": 1.0368, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.6177394034536892, | |
| "grad_norm": 0.2886803448200226, | |
| "learning_rate": 3.830455259026688e-06, | |
| "loss": 1.0458, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 0.6185243328100472, | |
| "grad_norm": 0.3283863961696625, | |
| "learning_rate": 3.8226059654631085e-06, | |
| "loss": 1.0733, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.619309262166405, | |
| "grad_norm": 0.30149760842323303, | |
| "learning_rate": 3.814756671899529e-06, | |
| "loss": 1.09, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 0.6200941915227629, | |
| "grad_norm": 0.2996380627155304, | |
| "learning_rate": 3.80690737833595e-06, | |
| "loss": 1.0629, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.6208791208791209, | |
| "grad_norm": 0.3076927661895752, | |
| "learning_rate": 3.7990580847723706e-06, | |
| "loss": 1.051, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 0.6216640502354788, | |
| "grad_norm": 0.27474308013916016, | |
| "learning_rate": 3.7912087912087915e-06, | |
| "loss": 1.0259, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.6224489795918368, | |
| "grad_norm": 0.2838291823863983, | |
| "learning_rate": 3.783359497645212e-06, | |
| "loss": 1.0489, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 0.6232339089481946, | |
| "grad_norm": 0.31608259677886963, | |
| "learning_rate": 3.7755102040816327e-06, | |
| "loss": 1.0994, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.6240188383045526, | |
| "grad_norm": 0.29825273156166077, | |
| "learning_rate": 3.767660910518054e-06, | |
| "loss": 1.0947, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.6248037676609105, | |
| "grad_norm": 0.28364327549934387, | |
| "learning_rate": 3.7598116169544746e-06, | |
| "loss": 1.0869, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.6255886970172685, | |
| "grad_norm": 0.2713553011417389, | |
| "learning_rate": 3.751962323390895e-06, | |
| "loss": 1.0656, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 0.6263736263736264, | |
| "grad_norm": 0.27098798751831055, | |
| "learning_rate": 3.744113029827316e-06, | |
| "loss": 1.073, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.6271585557299842, | |
| "grad_norm": 0.28046950697898865, | |
| "learning_rate": 3.7362637362637367e-06, | |
| "loss": 1.0539, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 0.6279434850863422, | |
| "grad_norm": 0.2658367156982422, | |
| "learning_rate": 3.7284144427001573e-06, | |
| "loss": 1.0596, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.6287284144427001, | |
| "grad_norm": 0.2806791365146637, | |
| "learning_rate": 3.720565149136578e-06, | |
| "loss": 1.0867, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 0.6295133437990581, | |
| "grad_norm": 0.27192607522010803, | |
| "learning_rate": 3.712715855572999e-06, | |
| "loss": 1.0323, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.630298273155416, | |
| "grad_norm": 0.29174211621284485, | |
| "learning_rate": 3.7048665620094194e-06, | |
| "loss": 1.0436, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 0.631083202511774, | |
| "grad_norm": 0.2949182093143463, | |
| "learning_rate": 3.69701726844584e-06, | |
| "loss": 1.0729, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.6318681318681318, | |
| "grad_norm": 0.28802135586738586, | |
| "learning_rate": 3.6891679748822605e-06, | |
| "loss": 1.0826, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.6326530612244898, | |
| "grad_norm": 0.27609679102897644, | |
| "learning_rate": 3.681318681318682e-06, | |
| "loss": 1.0617, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.6334379905808477, | |
| "grad_norm": 0.30500441789627075, | |
| "learning_rate": 3.6734693877551024e-06, | |
| "loss": 1.0943, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 0.6342229199372057, | |
| "grad_norm": 0.2916868329048157, | |
| "learning_rate": 3.6656200941915234e-06, | |
| "loss": 1.1224, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.6350078492935636, | |
| "grad_norm": 0.3102125823497772, | |
| "learning_rate": 3.657770800627944e-06, | |
| "loss": 1.1176, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 0.6357927786499215, | |
| "grad_norm": 0.2802576422691345, | |
| "learning_rate": 3.6499215070643645e-06, | |
| "loss": 1.0794, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.6365777080062794, | |
| "grad_norm": 0.3083432912826538, | |
| "learning_rate": 3.642072213500785e-06, | |
| "loss": 1.1249, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 0.6373626373626373, | |
| "grad_norm": 0.30711638927459717, | |
| "learning_rate": 3.634222919937206e-06, | |
| "loss": 1.0287, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.6381475667189953, | |
| "grad_norm": 0.3342186510562897, | |
| "learning_rate": 3.6263736263736266e-06, | |
| "loss": 1.0822, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 0.6389324960753532, | |
| "grad_norm": 0.27452296018600464, | |
| "learning_rate": 3.618524332810047e-06, | |
| "loss": 1.0322, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.6397174254317112, | |
| "grad_norm": 0.2835961580276489, | |
| "learning_rate": 3.6106750392464677e-06, | |
| "loss": 1.0897, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.640502354788069, | |
| "grad_norm": 0.27237561345100403, | |
| "learning_rate": 3.6028257456828887e-06, | |
| "loss": 1.0461, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.641287284144427, | |
| "grad_norm": 0.31648552417755127, | |
| "learning_rate": 3.5949764521193097e-06, | |
| "loss": 1.0393, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 0.6420722135007849, | |
| "grad_norm": 0.27146708965301514, | |
| "learning_rate": 3.5871271585557307e-06, | |
| "loss": 1.0706, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.6428571428571429, | |
| "grad_norm": 0.2728872299194336, | |
| "learning_rate": 3.5792778649921512e-06, | |
| "loss": 1.0361, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 0.6436420722135008, | |
| "grad_norm": 0.2697795033454895, | |
| "learning_rate": 3.5714285714285718e-06, | |
| "loss": 1.0238, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.6444270015698587, | |
| "grad_norm": 0.3166142702102661, | |
| "learning_rate": 3.5635792778649923e-06, | |
| "loss": 1.1152, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 0.6452119309262166, | |
| "grad_norm": 0.3062928020954132, | |
| "learning_rate": 3.5557299843014133e-06, | |
| "loss": 1.1208, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.6459968602825745, | |
| "grad_norm": 0.3098381757736206, | |
| "learning_rate": 3.547880690737834e-06, | |
| "loss": 1.1239, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 0.6467817896389325, | |
| "grad_norm": 0.2634499669075012, | |
| "learning_rate": 3.5400313971742544e-06, | |
| "loss": 1.0154, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.6475667189952904, | |
| "grad_norm": 0.27799683809280396, | |
| "learning_rate": 3.532182103610675e-06, | |
| "loss": 1.0574, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.6483516483516484, | |
| "grad_norm": 0.29064470529556274, | |
| "learning_rate": 3.524332810047096e-06, | |
| "loss": 1.0962, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.6491365777080063, | |
| "grad_norm": 0.3558158874511719, | |
| "learning_rate": 3.516483516483517e-06, | |
| "loss": 1.1029, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 0.6499215070643642, | |
| "grad_norm": 0.3792993426322937, | |
| "learning_rate": 3.5086342229199375e-06, | |
| "loss": 1.0777, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.6507064364207221, | |
| "grad_norm": 0.29269522428512573, | |
| "learning_rate": 3.5007849293563585e-06, | |
| "loss": 1.0245, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 0.6514913657770801, | |
| "grad_norm": 0.28923287987709045, | |
| "learning_rate": 3.492935635792779e-06, | |
| "loss": 1.0701, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.652276295133438, | |
| "grad_norm": 0.285043865442276, | |
| "learning_rate": 3.4850863422291996e-06, | |
| "loss": 1.0538, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 0.6530612244897959, | |
| "grad_norm": 0.2893431782722473, | |
| "learning_rate": 3.4772370486656206e-06, | |
| "loss": 1.038, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.6538461538461539, | |
| "grad_norm": 0.28342151641845703, | |
| "learning_rate": 3.469387755102041e-06, | |
| "loss": 1.0341, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 0.6546310832025117, | |
| "grad_norm": 0.27439647912979126, | |
| "learning_rate": 3.4615384615384617e-06, | |
| "loss": 1.0807, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.6554160125588697, | |
| "grad_norm": 0.283348023891449, | |
| "learning_rate": 3.4536891679748822e-06, | |
| "loss": 1.0321, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.6562009419152276, | |
| "grad_norm": 0.3052699565887451, | |
| "learning_rate": 3.4458398744113032e-06, | |
| "loss": 1.0977, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.6569858712715856, | |
| "grad_norm": 0.30584266781806946, | |
| "learning_rate": 3.4379905808477238e-06, | |
| "loss": 1.0312, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 0.6577708006279435, | |
| "grad_norm": 0.2961052656173706, | |
| "learning_rate": 3.4301412872841448e-06, | |
| "loss": 1.064, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.6585557299843015, | |
| "grad_norm": 0.32309481501579285, | |
| "learning_rate": 3.4222919937205657e-06, | |
| "loss": 1.087, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 0.6593406593406593, | |
| "grad_norm": 0.2820388674736023, | |
| "learning_rate": 3.4144427001569863e-06, | |
| "loss": 1.062, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.6601255886970173, | |
| "grad_norm": 0.31144237518310547, | |
| "learning_rate": 3.406593406593407e-06, | |
| "loss": 1.0137, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 0.6609105180533752, | |
| "grad_norm": 0.2903454303741455, | |
| "learning_rate": 3.398744113029828e-06, | |
| "loss": 1.0571, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.6616954474097331, | |
| "grad_norm": 0.29997384548187256, | |
| "learning_rate": 3.3908948194662484e-06, | |
| "loss": 1.0569, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 0.6624803767660911, | |
| "grad_norm": 0.34617769718170166, | |
| "learning_rate": 3.383045525902669e-06, | |
| "loss": 1.0925, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.6632653061224489, | |
| "grad_norm": 0.2661650776863098, | |
| "learning_rate": 3.3751962323390895e-06, | |
| "loss": 1.0299, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.6640502354788069, | |
| "grad_norm": 0.2766907215118408, | |
| "learning_rate": 3.3673469387755105e-06, | |
| "loss": 1.0807, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.6648351648351648, | |
| "grad_norm": 0.2823966145515442, | |
| "learning_rate": 3.359497645211931e-06, | |
| "loss": 1.0796, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 0.6656200941915228, | |
| "grad_norm": 0.32514306902885437, | |
| "learning_rate": 3.3516483516483516e-06, | |
| "loss": 1.1211, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.6664050235478807, | |
| "grad_norm": 0.3008269965648651, | |
| "learning_rate": 3.343799058084773e-06, | |
| "loss": 1.0751, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 0.6671899529042387, | |
| "grad_norm": 0.30757200717926025, | |
| "learning_rate": 3.3359497645211936e-06, | |
| "loss": 1.1073, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.6679748822605965, | |
| "grad_norm": 0.2902880609035492, | |
| "learning_rate": 3.328100470957614e-06, | |
| "loss": 1.0854, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 0.6687598116169545, | |
| "grad_norm": 0.2838514447212219, | |
| "learning_rate": 3.320251177394035e-06, | |
| "loss": 1.0474, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.6695447409733124, | |
| "grad_norm": 0.30020883679389954, | |
| "learning_rate": 3.3124018838304557e-06, | |
| "loss": 1.0338, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 0.6703296703296703, | |
| "grad_norm": 0.29149070382118225, | |
| "learning_rate": 3.304552590266876e-06, | |
| "loss": 1.0447, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.6711145996860283, | |
| "grad_norm": 0.2783101201057434, | |
| "learning_rate": 3.2967032967032968e-06, | |
| "loss": 1.0215, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.6718995290423861, | |
| "grad_norm": 0.2824500501155853, | |
| "learning_rate": 3.2888540031397177e-06, | |
| "loss": 1.002, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.6726844583987441, | |
| "grad_norm": 0.2815590798854828, | |
| "learning_rate": 3.2810047095761383e-06, | |
| "loss": 1.1188, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 0.673469387755102, | |
| "grad_norm": 0.2877782881259918, | |
| "learning_rate": 3.273155416012559e-06, | |
| "loss": 1.0294, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.67425431711146, | |
| "grad_norm": 0.27774369716644287, | |
| "learning_rate": 3.2653061224489794e-06, | |
| "loss": 1.0293, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 0.6750392464678179, | |
| "grad_norm": 0.3431270122528076, | |
| "learning_rate": 3.257456828885401e-06, | |
| "loss": 1.072, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.6758241758241759, | |
| "grad_norm": 0.3327620029449463, | |
| "learning_rate": 3.2496075353218214e-06, | |
| "loss": 1.095, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 0.6766091051805337, | |
| "grad_norm": 0.288352370262146, | |
| "learning_rate": 3.2417582417582424e-06, | |
| "loss": 1.0711, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.6773940345368917, | |
| "grad_norm": 0.2850242257118225, | |
| "learning_rate": 3.233908948194663e-06, | |
| "loss": 1.0785, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 0.6781789638932496, | |
| "grad_norm": 0.2831905782222748, | |
| "learning_rate": 3.2260596546310835e-06, | |
| "loss": 1.0341, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.6789638932496075, | |
| "grad_norm": 0.284157931804657, | |
| "learning_rate": 3.218210361067504e-06, | |
| "loss": 1.0639, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.6797488226059655, | |
| "grad_norm": 0.27813270688056946, | |
| "learning_rate": 3.210361067503925e-06, | |
| "loss": 1.0721, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.6805337519623234, | |
| "grad_norm": 0.28956329822540283, | |
| "learning_rate": 3.2025117739403456e-06, | |
| "loss": 1.0826, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 0.6813186813186813, | |
| "grad_norm": 0.29287075996398926, | |
| "learning_rate": 3.194662480376766e-06, | |
| "loss": 1.1073, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.6821036106750392, | |
| "grad_norm": 0.2984355092048645, | |
| "learning_rate": 3.1868131868131867e-06, | |
| "loss": 1.0131, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 0.6828885400313972, | |
| "grad_norm": 0.2821354269981384, | |
| "learning_rate": 3.1789638932496077e-06, | |
| "loss": 1.0452, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.6836734693877551, | |
| "grad_norm": 0.27414095401763916, | |
| "learning_rate": 3.1711145996860286e-06, | |
| "loss": 1.0228, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 0.6844583987441131, | |
| "grad_norm": 0.2787257730960846, | |
| "learning_rate": 3.1632653061224496e-06, | |
| "loss": 1.0317, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.685243328100471, | |
| "grad_norm": 0.2801453769207001, | |
| "learning_rate": 3.15541601255887e-06, | |
| "loss": 1.0305, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 0.6860282574568289, | |
| "grad_norm": 0.29602035880088806, | |
| "learning_rate": 3.1475667189952907e-06, | |
| "loss": 1.0955, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.6868131868131868, | |
| "grad_norm": 0.301297664642334, | |
| "learning_rate": 3.1397174254317113e-06, | |
| "loss": 1.036, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.6875981161695447, | |
| "grad_norm": 0.3049217462539673, | |
| "learning_rate": 3.1318681318681323e-06, | |
| "loss": 1.1071, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.6883830455259027, | |
| "grad_norm": 0.2837543785572052, | |
| "learning_rate": 3.124018838304553e-06, | |
| "loss": 1.0273, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 0.6891679748822606, | |
| "grad_norm": 0.28585192561149597, | |
| "learning_rate": 3.1161695447409734e-06, | |
| "loss": 1.0464, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.6899529042386185, | |
| "grad_norm": 0.2744940519332886, | |
| "learning_rate": 3.108320251177394e-06, | |
| "loss": 1.0626, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 0.6907378335949764, | |
| "grad_norm": 0.29953551292419434, | |
| "learning_rate": 3.100470957613815e-06, | |
| "loss": 1.0375, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.6915227629513344, | |
| "grad_norm": 0.30678224563598633, | |
| "learning_rate": 3.092621664050236e-06, | |
| "loss": 1.0765, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 0.6923076923076923, | |
| "grad_norm": 0.29770031571388245, | |
| "learning_rate": 3.084772370486657e-06, | |
| "loss": 1.0751, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.6930926216640503, | |
| "grad_norm": 0.2807864546775818, | |
| "learning_rate": 3.0769230769230774e-06, | |
| "loss": 1.0866, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 0.6938775510204082, | |
| "grad_norm": 0.29043442010879517, | |
| "learning_rate": 3.069073783359498e-06, | |
| "loss": 1.0886, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.6946624803767661, | |
| "grad_norm": 0.31291401386260986, | |
| "learning_rate": 3.0612244897959185e-06, | |
| "loss": 1.0481, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.695447409733124, | |
| "grad_norm": 0.28840577602386475, | |
| "learning_rate": 3.0533751962323395e-06, | |
| "loss": 1.0756, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.6962323390894819, | |
| "grad_norm": 0.30978354811668396, | |
| "learning_rate": 3.04552590266876e-06, | |
| "loss": 1.0258, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 0.6970172684458399, | |
| "grad_norm": 0.29402005672454834, | |
| "learning_rate": 3.0376766091051806e-06, | |
| "loss": 1.0499, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.6978021978021978, | |
| "grad_norm": 0.29459160566329956, | |
| "learning_rate": 3.029827315541601e-06, | |
| "loss": 1.1056, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 0.6985871271585558, | |
| "grad_norm": 0.3101595938205719, | |
| "learning_rate": 3.021978021978022e-06, | |
| "loss": 1.145, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.6993720565149136, | |
| "grad_norm": 0.2799089550971985, | |
| "learning_rate": 3.0141287284144427e-06, | |
| "loss": 1.0958, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 0.7001569858712716, | |
| "grad_norm": 0.29476428031921387, | |
| "learning_rate": 3.006279434850864e-06, | |
| "loss": 1.109, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.7009419152276295, | |
| "grad_norm": 0.291610985994339, | |
| "learning_rate": 2.9984301412872847e-06, | |
| "loss": 1.052, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 0.7017268445839875, | |
| "grad_norm": 0.29794690012931824, | |
| "learning_rate": 2.9905808477237053e-06, | |
| "loss": 1.083, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.7025117739403454, | |
| "grad_norm": 0.28312987089157104, | |
| "learning_rate": 2.982731554160126e-06, | |
| "loss": 1.0448, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.7032967032967034, | |
| "grad_norm": 0.28827813267707825, | |
| "learning_rate": 2.974882260596547e-06, | |
| "loss": 1.0442, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.7040816326530612, | |
| "grad_norm": 0.29806190729141235, | |
| "learning_rate": 2.9670329670329673e-06, | |
| "loss": 1.0796, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 0.7048665620094191, | |
| "grad_norm": 0.2928798496723175, | |
| "learning_rate": 2.959183673469388e-06, | |
| "loss": 1.0792, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.7056514913657771, | |
| "grad_norm": 0.2975620925426483, | |
| "learning_rate": 2.9513343799058085e-06, | |
| "loss": 1.0481, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 0.706436420722135, | |
| "grad_norm": 0.2881799042224884, | |
| "learning_rate": 2.9434850863422294e-06, | |
| "loss": 1.0588, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.707221350078493, | |
| "grad_norm": 0.289421021938324, | |
| "learning_rate": 2.93563579277865e-06, | |
| "loss": 1.1009, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 0.7080062794348508, | |
| "grad_norm": 0.30106186866760254, | |
| "learning_rate": 2.9277864992150706e-06, | |
| "loss": 1.1007, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.7087912087912088, | |
| "grad_norm": 0.2982400059700012, | |
| "learning_rate": 2.919937205651492e-06, | |
| "loss": 1.0781, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 0.7095761381475667, | |
| "grad_norm": 0.27340105175971985, | |
| "learning_rate": 2.9120879120879125e-06, | |
| "loss": 1.0401, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.7103610675039247, | |
| "grad_norm": 0.2898809313774109, | |
| "learning_rate": 2.904238618524333e-06, | |
| "loss": 1.1035, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.7111459968602826, | |
| "grad_norm": 0.28713247179985046, | |
| "learning_rate": 2.896389324960754e-06, | |
| "loss": 1.043, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.7119309262166404, | |
| "grad_norm": 0.2877185046672821, | |
| "learning_rate": 2.8885400313971746e-06, | |
| "loss": 1.0845, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 0.7127158555729984, | |
| "grad_norm": 0.2887587249279022, | |
| "learning_rate": 2.880690737833595e-06, | |
| "loss": 1.0751, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.7135007849293563, | |
| "grad_norm": 0.2877505123615265, | |
| "learning_rate": 2.8728414442700157e-06, | |
| "loss": 1.0312, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 0.7142857142857143, | |
| "grad_norm": 0.28425735235214233, | |
| "learning_rate": 2.8649921507064367e-06, | |
| "loss": 1.0554, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.7150706436420722, | |
| "grad_norm": 0.2878026068210602, | |
| "learning_rate": 2.8571428571428573e-06, | |
| "loss": 1.0041, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 0.7158555729984302, | |
| "grad_norm": 0.4087301194667816, | |
| "learning_rate": 2.849293563579278e-06, | |
| "loss": 1.0523, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.716640502354788, | |
| "grad_norm": 0.2949962913990021, | |
| "learning_rate": 2.8414442700156984e-06, | |
| "loss": 1.0276, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 0.717425431711146, | |
| "grad_norm": 0.2939402163028717, | |
| "learning_rate": 2.8335949764521198e-06, | |
| "loss": 1.0428, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.7182103610675039, | |
| "grad_norm": 0.27334320545196533, | |
| "learning_rate": 2.8257456828885403e-06, | |
| "loss": 1.0218, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.7189952904238619, | |
| "grad_norm": 0.28242912888526917, | |
| "learning_rate": 2.8178963893249613e-06, | |
| "loss": 1.0306, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.7197802197802198, | |
| "grad_norm": 0.30455905199050903, | |
| "learning_rate": 2.810047095761382e-06, | |
| "loss": 1.0888, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 0.7205651491365777, | |
| "grad_norm": 0.31174805760383606, | |
| "learning_rate": 2.8021978021978024e-06, | |
| "loss": 1.041, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 0.7213500784929356, | |
| "grad_norm": 0.30026131868362427, | |
| "learning_rate": 2.794348508634223e-06, | |
| "loss": 1.0294, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 0.7221350078492935, | |
| "grad_norm": 0.2858772575855255, | |
| "learning_rate": 2.786499215070644e-06, | |
| "loss": 1.0676, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.7229199372056515, | |
| "grad_norm": 0.3418976962566376, | |
| "learning_rate": 2.7786499215070645e-06, | |
| "loss": 1.0551, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 0.7237048665620094, | |
| "grad_norm": 0.31597593426704407, | |
| "learning_rate": 2.770800627943485e-06, | |
| "loss": 1.063, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 0.7244897959183674, | |
| "grad_norm": 0.2929779291152954, | |
| "learning_rate": 2.7629513343799056e-06, | |
| "loss": 1.0147, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 0.7252747252747253, | |
| "grad_norm": 0.2997245192527771, | |
| "learning_rate": 2.7551020408163266e-06, | |
| "loss": 1.1207, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.7260596546310832, | |
| "grad_norm": 0.2890755832195282, | |
| "learning_rate": 2.7472527472527476e-06, | |
| "loss": 1.0533, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.7268445839874411, | |
| "grad_norm": 0.29468992352485657, | |
| "learning_rate": 2.7394034536891686e-06, | |
| "loss": 1.0764, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 0.7276295133437991, | |
| "grad_norm": 0.28897175192832947, | |
| "learning_rate": 2.731554160125589e-06, | |
| "loss": 1.0755, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 0.728414442700157, | |
| "grad_norm": 0.30992481112480164, | |
| "learning_rate": 2.7237048665620097e-06, | |
| "loss": 1.0074, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.7291993720565149, | |
| "grad_norm": 0.29595842957496643, | |
| "learning_rate": 2.7158555729984302e-06, | |
| "loss": 1.1159, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 0.7299843014128728, | |
| "grad_norm": 0.2836659550666809, | |
| "learning_rate": 2.7080062794348512e-06, | |
| "loss": 1.0807, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.7307692307692307, | |
| "grad_norm": 0.2799653708934784, | |
| "learning_rate": 2.7001569858712718e-06, | |
| "loss": 1.0111, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 0.7315541601255887, | |
| "grad_norm": 0.29231712222099304, | |
| "learning_rate": 2.6923076923076923e-06, | |
| "loss": 1.039, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 0.7323390894819466, | |
| "grad_norm": 0.2751501202583313, | |
| "learning_rate": 2.684458398744113e-06, | |
| "loss": 1.0743, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 0.7331240188383046, | |
| "grad_norm": 0.2840130031108856, | |
| "learning_rate": 2.676609105180534e-06, | |
| "loss": 1.0533, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 0.7339089481946625, | |
| "grad_norm": 0.28328344225883484, | |
| "learning_rate": 2.668759811616955e-06, | |
| "loss": 1.0726, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.7346938775510204, | |
| "grad_norm": 0.2846105694770813, | |
| "learning_rate": 2.660910518053376e-06, | |
| "loss": 1.0482, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 0.7354788069073783, | |
| "grad_norm": 0.2846198081970215, | |
| "learning_rate": 2.6530612244897964e-06, | |
| "loss": 1.0549, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 0.7362637362637363, | |
| "grad_norm": 0.2910875678062439, | |
| "learning_rate": 2.645211930926217e-06, | |
| "loss": 1.0597, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 0.7370486656200942, | |
| "grad_norm": 0.3099419176578522, | |
| "learning_rate": 2.6373626373626375e-06, | |
| "loss": 1.1132, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 0.7378335949764521, | |
| "grad_norm": 0.30356669425964355, | |
| "learning_rate": 2.6295133437990585e-06, | |
| "loss": 1.0268, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.7386185243328101, | |
| "grad_norm": 0.28936824202537537, | |
| "learning_rate": 2.621664050235479e-06, | |
| "loss": 1.0237, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 0.7394034536891679, | |
| "grad_norm": 0.2909795641899109, | |
| "learning_rate": 2.6138147566718996e-06, | |
| "loss": 1.0425, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 0.7401883830455259, | |
| "grad_norm": 0.29834648966789246, | |
| "learning_rate": 2.60596546310832e-06, | |
| "loss": 1.0731, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 0.7409733124018838, | |
| "grad_norm": 0.3154754042625427, | |
| "learning_rate": 2.598116169544741e-06, | |
| "loss": 1.144, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 0.7417582417582418, | |
| "grad_norm": 0.2903672456741333, | |
| "learning_rate": 2.5902668759811617e-06, | |
| "loss": 1.0753, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.7425431711145997, | |
| "grad_norm": 0.28852578997612, | |
| "learning_rate": 2.582417582417583e-06, | |
| "loss": 1.0292, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 0.7433281004709577, | |
| "grad_norm": 0.2857038080692291, | |
| "learning_rate": 2.5745682888540036e-06, | |
| "loss": 1.0717, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 0.7441130298273155, | |
| "grad_norm": 0.2909829914569855, | |
| "learning_rate": 2.566718995290424e-06, | |
| "loss": 1.0209, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 0.7448979591836735, | |
| "grad_norm": 0.2876448631286621, | |
| "learning_rate": 2.5588697017268448e-06, | |
| "loss": 1.0688, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 0.7456828885400314, | |
| "grad_norm": 0.2869911789894104, | |
| "learning_rate": 2.5510204081632657e-06, | |
| "loss": 1.0332, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.7464678178963893, | |
| "grad_norm": 0.2981649935245514, | |
| "learning_rate": 2.5431711145996863e-06, | |
| "loss": 1.0635, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 0.7472527472527473, | |
| "grad_norm": 0.28783732652664185, | |
| "learning_rate": 2.535321821036107e-06, | |
| "loss": 1.0492, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 0.7480376766091051, | |
| "grad_norm": 0.28739696741104126, | |
| "learning_rate": 2.5274725274725274e-06, | |
| "loss": 1.0318, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 0.7488226059654631, | |
| "grad_norm": 0.28677845001220703, | |
| "learning_rate": 2.5196232339089484e-06, | |
| "loss": 1.032, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 0.749607535321821, | |
| "grad_norm": 0.3001886010169983, | |
| "learning_rate": 2.511773940345369e-06, | |
| "loss": 1.0855, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.750392464678179, | |
| "grad_norm": 0.2894863486289978, | |
| "learning_rate": 2.5039246467817895e-06, | |
| "loss": 1.044, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 0.7511773940345369, | |
| "grad_norm": 0.29826030135154724, | |
| "learning_rate": 2.4960753532182105e-06, | |
| "loss": 1.0313, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 0.7519623233908949, | |
| "grad_norm": 0.30050304532051086, | |
| "learning_rate": 2.488226059654631e-06, | |
| "loss": 1.0575, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 0.7527472527472527, | |
| "grad_norm": 0.3099324107170105, | |
| "learning_rate": 2.480376766091052e-06, | |
| "loss": 1.0636, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 0.7535321821036107, | |
| "grad_norm": 0.2929956912994385, | |
| "learning_rate": 2.472527472527473e-06, | |
| "loss": 1.0516, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.7543171114599686, | |
| "grad_norm": 0.29828134179115295, | |
| "learning_rate": 2.4646781789638936e-06, | |
| "loss": 1.0582, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 0.7551020408163265, | |
| "grad_norm": 0.3314920663833618, | |
| "learning_rate": 2.456828885400314e-06, | |
| "loss": 1.0783, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 0.7558869701726845, | |
| "grad_norm": 0.3030723035335541, | |
| "learning_rate": 2.4489795918367347e-06, | |
| "loss": 1.0155, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 0.7566718995290423, | |
| "grad_norm": 0.28593307733535767, | |
| "learning_rate": 2.4411302982731556e-06, | |
| "loss": 1.0266, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 0.7574568288854003, | |
| "grad_norm": 0.2924596667289734, | |
| "learning_rate": 2.4332810047095766e-06, | |
| "loss": 1.0367, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.7582417582417582, | |
| "grad_norm": 0.30590420961380005, | |
| "learning_rate": 2.425431711145997e-06, | |
| "loss": 1.0675, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 0.7590266875981162, | |
| "grad_norm": 0.30233892798423767, | |
| "learning_rate": 2.4175824175824177e-06, | |
| "loss": 1.0837, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 0.7598116169544741, | |
| "grad_norm": 0.32924067974090576, | |
| "learning_rate": 2.4097331240188383e-06, | |
| "loss": 1.109, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 0.7605965463108321, | |
| "grad_norm": 0.29074007272720337, | |
| "learning_rate": 2.4018838304552593e-06, | |
| "loss": 1.0616, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 0.7613814756671899, | |
| "grad_norm": 0.29699182510375977, | |
| "learning_rate": 2.3940345368916803e-06, | |
| "loss": 1.0717, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.7621664050235479, | |
| "grad_norm": 0.3066222071647644, | |
| "learning_rate": 2.386185243328101e-06, | |
| "loss": 1.0731, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 0.7629513343799058, | |
| "grad_norm": 0.37514591217041016, | |
| "learning_rate": 2.3783359497645214e-06, | |
| "loss": 1.0188, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 0.7637362637362637, | |
| "grad_norm": 0.33850035071372986, | |
| "learning_rate": 2.370486656200942e-06, | |
| "loss": 1.1529, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 0.7645211930926217, | |
| "grad_norm": 0.2899448275566101, | |
| "learning_rate": 2.362637362637363e-06, | |
| "loss": 1.0658, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 0.7653061224489796, | |
| "grad_norm": 0.3076562285423279, | |
| "learning_rate": 2.3547880690737835e-06, | |
| "loss": 1.028, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.7660910518053375, | |
| "grad_norm": 0.3137950599193573, | |
| "learning_rate": 2.3469387755102044e-06, | |
| "loss": 1.0447, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 0.7668759811616954, | |
| "grad_norm": 0.30430495738983154, | |
| "learning_rate": 2.339089481946625e-06, | |
| "loss": 1.105, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 0.7676609105180534, | |
| "grad_norm": 0.38252878189086914, | |
| "learning_rate": 2.3312401883830456e-06, | |
| "loss": 1.1403, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 0.7684458398744113, | |
| "grad_norm": 0.30081915855407715, | |
| "learning_rate": 2.3233908948194665e-06, | |
| "loss": 1.109, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 0.7692307692307693, | |
| "grad_norm": 0.29588279128074646, | |
| "learning_rate": 2.315541601255887e-06, | |
| "loss": 1.0319, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.7700156985871272, | |
| "grad_norm": 0.3080218434333801, | |
| "learning_rate": 2.307692307692308e-06, | |
| "loss": 1.039, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 0.7708006279434851, | |
| "grad_norm": 0.2921229302883148, | |
| "learning_rate": 2.2998430141287286e-06, | |
| "loss": 1.0493, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 0.771585557299843, | |
| "grad_norm": 0.2984630763530731, | |
| "learning_rate": 2.291993720565149e-06, | |
| "loss": 1.0191, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 0.7723704866562009, | |
| "grad_norm": 0.28374841809272766, | |
| "learning_rate": 2.28414442700157e-06, | |
| "loss": 1.0103, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 0.7731554160125589, | |
| "grad_norm": 0.3007064163684845, | |
| "learning_rate": 2.2762951334379907e-06, | |
| "loss": 1.0387, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.7739403453689168, | |
| "grad_norm": 0.2927864193916321, | |
| "learning_rate": 2.2684458398744113e-06, | |
| "loss": 1.005, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 0.7747252747252747, | |
| "grad_norm": 0.3065125048160553, | |
| "learning_rate": 2.2605965463108323e-06, | |
| "loss": 1.0851, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 0.7755102040816326, | |
| "grad_norm": 0.27849113941192627, | |
| "learning_rate": 2.252747252747253e-06, | |
| "loss": 1.0251, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 0.7762951334379906, | |
| "grad_norm": 0.2948971688747406, | |
| "learning_rate": 2.244897959183674e-06, | |
| "loss": 1.0274, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 0.7770800627943485, | |
| "grad_norm": 0.3202616274356842, | |
| "learning_rate": 2.2370486656200944e-06, | |
| "loss": 1.0485, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.7778649921507065, | |
| "grad_norm": 0.3328281342983246, | |
| "learning_rate": 2.229199372056515e-06, | |
| "loss": 1.069, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 0.7786499215070644, | |
| "grad_norm": 0.3161095976829529, | |
| "learning_rate": 2.221350078492936e-06, | |
| "loss": 1.0848, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 0.7794348508634223, | |
| "grad_norm": 0.28824999928474426, | |
| "learning_rate": 2.2135007849293564e-06, | |
| "loss": 1.0654, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 0.7802197802197802, | |
| "grad_norm": 0.3667064309120178, | |
| "learning_rate": 2.2056514913657774e-06, | |
| "loss": 1.0116, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 0.7810047095761381, | |
| "grad_norm": 0.2977278530597687, | |
| "learning_rate": 2.197802197802198e-06, | |
| "loss": 1.0814, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.7817896389324961, | |
| "grad_norm": 0.29998522996902466, | |
| "learning_rate": 2.1899529042386185e-06, | |
| "loss": 1.0431, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 0.782574568288854, | |
| "grad_norm": 0.31411993503570557, | |
| "learning_rate": 2.1821036106750395e-06, | |
| "loss": 1.0926, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 0.783359497645212, | |
| "grad_norm": 0.29877665638923645, | |
| "learning_rate": 2.17425431711146e-06, | |
| "loss": 1.0989, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 0.7841444270015698, | |
| "grad_norm": 0.2992810010910034, | |
| "learning_rate": 2.166405023547881e-06, | |
| "loss": 1.0119, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 0.7849293563579278, | |
| "grad_norm": 0.2953478991985321, | |
| "learning_rate": 2.1585557299843016e-06, | |
| "loss": 1.0669, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.7857142857142857, | |
| "grad_norm": 0.29331153631210327, | |
| "learning_rate": 2.150706436420722e-06, | |
| "loss": 1.0554, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 0.7864992150706437, | |
| "grad_norm": 0.2879624664783478, | |
| "learning_rate": 2.1428571428571427e-06, | |
| "loss": 1.0212, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 0.7872841444270016, | |
| "grad_norm": 0.2884847819805145, | |
| "learning_rate": 2.1350078492935637e-06, | |
| "loss": 1.0376, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 0.7880690737833596, | |
| "grad_norm": 0.29227468371391296, | |
| "learning_rate": 2.1271585557299847e-06, | |
| "loss": 1.0444, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 0.7888540031397174, | |
| "grad_norm": 0.3019685745239258, | |
| "learning_rate": 2.1193092621664052e-06, | |
| "loss": 1.1065, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.7896389324960753, | |
| "grad_norm": 0.287661075592041, | |
| "learning_rate": 2.111459968602826e-06, | |
| "loss": 1.0141, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 0.7904238618524333, | |
| "grad_norm": 0.29692256450653076, | |
| "learning_rate": 2.1036106750392464e-06, | |
| "loss": 1.0183, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 0.7912087912087912, | |
| "grad_norm": 0.29249200224876404, | |
| "learning_rate": 2.0957613814756673e-06, | |
| "loss": 1.0084, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 0.7919937205651492, | |
| "grad_norm": 0.31126755475997925, | |
| "learning_rate": 2.0879120879120883e-06, | |
| "loss": 1.023, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 0.792778649921507, | |
| "grad_norm": 0.29185745120048523, | |
| "learning_rate": 2.080062794348509e-06, | |
| "loss": 0.9944, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.793563579277865, | |
| "grad_norm": 0.30141139030456543, | |
| "learning_rate": 2.0722135007849294e-06, | |
| "loss": 1.0885, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 0.7943485086342229, | |
| "grad_norm": 0.29048752784729004, | |
| "learning_rate": 2.06436420722135e-06, | |
| "loss": 1.0231, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 0.7951334379905809, | |
| "grad_norm": 0.3008350431919098, | |
| "learning_rate": 2.056514913657771e-06, | |
| "loss": 1.0388, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 0.7959183673469388, | |
| "grad_norm": 0.30450665950775146, | |
| "learning_rate": 2.048665620094192e-06, | |
| "loss": 1.0316, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 0.7967032967032966, | |
| "grad_norm": 0.34311988949775696, | |
| "learning_rate": 2.0408163265306125e-06, | |
| "loss": 1.1252, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.7974882260596546, | |
| "grad_norm": 0.28808602690696716, | |
| "learning_rate": 2.032967032967033e-06, | |
| "loss": 1.0348, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 0.7982731554160125, | |
| "grad_norm": 0.28176361322402954, | |
| "learning_rate": 2.0251177394034536e-06, | |
| "loss": 1.0127, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 0.7990580847723705, | |
| "grad_norm": 0.30243223905563354, | |
| "learning_rate": 2.0172684458398746e-06, | |
| "loss": 1.0655, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 0.7998430141287284, | |
| "grad_norm": 0.2991596460342407, | |
| "learning_rate": 2.0094191522762956e-06, | |
| "loss": 1.071, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 0.8006279434850864, | |
| "grad_norm": 0.31719931960105896, | |
| "learning_rate": 2.001569858712716e-06, | |
| "loss": 1.0703, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.8014128728414442, | |
| "grad_norm": 0.28864073753356934, | |
| "learning_rate": 1.9937205651491367e-06, | |
| "loss": 1.0865, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 0.8021978021978022, | |
| "grad_norm": 0.2995680272579193, | |
| "learning_rate": 1.9858712715855573e-06, | |
| "loss": 1.043, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 0.8029827315541601, | |
| "grad_norm": 0.30036595463752747, | |
| "learning_rate": 1.9780219780219782e-06, | |
| "loss": 1.0925, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 0.8037676609105181, | |
| "grad_norm": 0.2845197916030884, | |
| "learning_rate": 1.970172684458399e-06, | |
| "loss": 1.0439, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 0.804552590266876, | |
| "grad_norm": 0.29325416684150696, | |
| "learning_rate": 1.9623233908948198e-06, | |
| "loss": 1.047, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.8053375196232339, | |
| "grad_norm": 0.2978193163871765, | |
| "learning_rate": 1.9544740973312403e-06, | |
| "loss": 1.0581, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 0.8061224489795918, | |
| "grad_norm": 0.31198781728744507, | |
| "learning_rate": 1.946624803767661e-06, | |
| "loss": 1.0275, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 0.8069073783359497, | |
| "grad_norm": 0.28849077224731445, | |
| "learning_rate": 1.938775510204082e-06, | |
| "loss": 1.0534, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 0.8076923076923077, | |
| "grad_norm": 0.3035949766635895, | |
| "learning_rate": 1.9309262166405024e-06, | |
| "loss": 0.9924, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 0.8084772370486656, | |
| "grad_norm": 0.330161988735199, | |
| "learning_rate": 1.9230769230769234e-06, | |
| "loss": 1.0625, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.8092621664050236, | |
| "grad_norm": 0.28037184476852417, | |
| "learning_rate": 1.915227629513344e-06, | |
| "loss": 1.0381, | |
| "step": 1031 | |
| }, | |
| { | |
| "epoch": 0.8100470957613815, | |
| "grad_norm": 0.3421080410480499, | |
| "learning_rate": 1.9073783359497645e-06, | |
| "loss": 1.0885, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 0.8108320251177394, | |
| "grad_norm": 0.3068152368068695, | |
| "learning_rate": 1.8995290423861853e-06, | |
| "loss": 1.0788, | |
| "step": 1033 | |
| }, | |
| { | |
| "epoch": 0.8116169544740973, | |
| "grad_norm": 0.2811432182788849, | |
| "learning_rate": 1.891679748822606e-06, | |
| "loss": 1.0359, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 0.8124018838304553, | |
| "grad_norm": 0.5453617572784424, | |
| "learning_rate": 1.883830455259027e-06, | |
| "loss": 1.0151, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.8131868131868132, | |
| "grad_norm": 0.28949666023254395, | |
| "learning_rate": 1.8759811616954476e-06, | |
| "loss": 1.0557, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 0.8139717425431711, | |
| "grad_norm": 0.2827453017234802, | |
| "learning_rate": 1.8681318681318684e-06, | |
| "loss": 1.006, | |
| "step": 1037 | |
| }, | |
| { | |
| "epoch": 0.814756671899529, | |
| "grad_norm": 0.2997809648513794, | |
| "learning_rate": 1.860282574568289e-06, | |
| "loss": 1.025, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 0.8155416012558869, | |
| "grad_norm": 0.3027696907520294, | |
| "learning_rate": 1.8524332810047097e-06, | |
| "loss": 1.0502, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 0.8163265306122449, | |
| "grad_norm": 0.3114776909351349, | |
| "learning_rate": 1.8445839874411302e-06, | |
| "loss": 1.1017, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.8171114599686028, | |
| "grad_norm": 0.2964245676994324, | |
| "learning_rate": 1.8367346938775512e-06, | |
| "loss": 1.0149, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 0.8178963893249608, | |
| "grad_norm": 0.2923440933227539, | |
| "learning_rate": 1.828885400313972e-06, | |
| "loss": 1.073, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 0.8186813186813187, | |
| "grad_norm": 0.2958196699619293, | |
| "learning_rate": 1.8210361067503925e-06, | |
| "loss": 1.0788, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 0.8194662480376766, | |
| "grad_norm": 0.32801884412765503, | |
| "learning_rate": 1.8131868131868133e-06, | |
| "loss": 1.0645, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 0.8202511773940345, | |
| "grad_norm": 0.30840179324150085, | |
| "learning_rate": 1.8053375196232339e-06, | |
| "loss": 1.0397, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 0.8210361067503925, | |
| "grad_norm": 0.2844547927379608, | |
| "learning_rate": 1.7974882260596548e-06, | |
| "loss": 1.0534, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 0.8218210361067504, | |
| "grad_norm": 0.31664690375328064, | |
| "learning_rate": 1.7896389324960756e-06, | |
| "loss": 1.0628, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 0.8226059654631083, | |
| "grad_norm": 0.3169183135032654, | |
| "learning_rate": 1.7817896389324962e-06, | |
| "loss": 1.0954, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 0.8233908948194663, | |
| "grad_norm": 0.2980157136917114, | |
| "learning_rate": 1.773940345368917e-06, | |
| "loss": 1.0459, | |
| "step": 1049 | |
| }, | |
| { | |
| "epoch": 0.8241758241758241, | |
| "grad_norm": 0.3251033425331116, | |
| "learning_rate": 1.7660910518053375e-06, | |
| "loss": 1.1277, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.8249607535321821, | |
| "grad_norm": 0.3023360073566437, | |
| "learning_rate": 1.7582417582417585e-06, | |
| "loss": 1.0413, | |
| "step": 1051 | |
| }, | |
| { | |
| "epoch": 0.82574568288854, | |
| "grad_norm": 0.33668410778045654, | |
| "learning_rate": 1.7503924646781792e-06, | |
| "loss": 1.0721, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 0.826530612244898, | |
| "grad_norm": 0.30133289098739624, | |
| "learning_rate": 1.7425431711145998e-06, | |
| "loss": 1.0245, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 0.8273155416012559, | |
| "grad_norm": 0.30766019225120544, | |
| "learning_rate": 1.7346938775510206e-06, | |
| "loss": 1.0795, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 0.8281004709576139, | |
| "grad_norm": 0.2983943521976471, | |
| "learning_rate": 1.7268445839874411e-06, | |
| "loss": 1.0699, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.8288854003139717, | |
| "grad_norm": 0.3063719570636749, | |
| "learning_rate": 1.7189952904238619e-06, | |
| "loss": 1.0624, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 0.8296703296703297, | |
| "grad_norm": 0.3908691704273224, | |
| "learning_rate": 1.7111459968602829e-06, | |
| "loss": 1.0461, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 0.8304552590266876, | |
| "grad_norm": 0.30761247873306274, | |
| "learning_rate": 1.7032967032967034e-06, | |
| "loss": 1.0858, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 0.8312401883830455, | |
| "grad_norm": 0.2950478792190552, | |
| "learning_rate": 1.6954474097331242e-06, | |
| "loss": 1.0143, | |
| "step": 1059 | |
| }, | |
| { | |
| "epoch": 0.8320251177394035, | |
| "grad_norm": 0.30142104625701904, | |
| "learning_rate": 1.6875981161695448e-06, | |
| "loss": 1.0693, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.8328100470957613, | |
| "grad_norm": 0.30439862608909607, | |
| "learning_rate": 1.6797488226059655e-06, | |
| "loss": 1.0668, | |
| "step": 1061 | |
| }, | |
| { | |
| "epoch": 0.8335949764521193, | |
| "grad_norm": 0.2978014349937439, | |
| "learning_rate": 1.6718995290423865e-06, | |
| "loss": 1.0782, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 0.8343799058084772, | |
| "grad_norm": 0.28805792331695557, | |
| "learning_rate": 1.664050235478807e-06, | |
| "loss": 1.0318, | |
| "step": 1063 | |
| }, | |
| { | |
| "epoch": 0.8351648351648352, | |
| "grad_norm": 0.29518917202949524, | |
| "learning_rate": 1.6562009419152278e-06, | |
| "loss": 1.0475, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 0.8359497645211931, | |
| "grad_norm": 0.2990979552268982, | |
| "learning_rate": 1.6483516483516484e-06, | |
| "loss": 1.0352, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.8367346938775511, | |
| "grad_norm": 0.3014602065086365, | |
| "learning_rate": 1.6405023547880692e-06, | |
| "loss": 1.0568, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 0.8375196232339089, | |
| "grad_norm": 0.2860758602619171, | |
| "learning_rate": 1.6326530612244897e-06, | |
| "loss": 1.0387, | |
| "step": 1067 | |
| }, | |
| { | |
| "epoch": 0.8383045525902669, | |
| "grad_norm": 0.293454647064209, | |
| "learning_rate": 1.6248037676609107e-06, | |
| "loss": 1.0444, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 0.8390894819466248, | |
| "grad_norm": 0.2916511297225952, | |
| "learning_rate": 1.6169544740973315e-06, | |
| "loss": 1.0512, | |
| "step": 1069 | |
| }, | |
| { | |
| "epoch": 0.8398744113029827, | |
| "grad_norm": 0.30183103680610657, | |
| "learning_rate": 1.609105180533752e-06, | |
| "loss": 1.0257, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.8406593406593407, | |
| "grad_norm": 0.3018069267272949, | |
| "learning_rate": 1.6012558869701728e-06, | |
| "loss": 1.0665, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 0.8414442700156985, | |
| "grad_norm": 0.27915433049201965, | |
| "learning_rate": 1.5934065934065933e-06, | |
| "loss": 1.0397, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 0.8422291993720565, | |
| "grad_norm": 0.3076684772968292, | |
| "learning_rate": 1.5855572998430143e-06, | |
| "loss": 1.0981, | |
| "step": 1073 | |
| }, | |
| { | |
| "epoch": 0.8430141287284144, | |
| "grad_norm": 0.29367414116859436, | |
| "learning_rate": 1.577708006279435e-06, | |
| "loss": 1.0547, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 0.8437990580847724, | |
| "grad_norm": 0.2984970211982727, | |
| "learning_rate": 1.5698587127158556e-06, | |
| "loss": 1.0494, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.8445839874411303, | |
| "grad_norm": 0.29925835132598877, | |
| "learning_rate": 1.5620094191522764e-06, | |
| "loss": 1.0686, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 0.8453689167974883, | |
| "grad_norm": 0.2967824935913086, | |
| "learning_rate": 1.554160125588697e-06, | |
| "loss": 1.1013, | |
| "step": 1077 | |
| }, | |
| { | |
| "epoch": 0.8461538461538461, | |
| "grad_norm": 0.30394864082336426, | |
| "learning_rate": 1.546310832025118e-06, | |
| "loss": 1.0908, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 0.8469387755102041, | |
| "grad_norm": 0.3596284091472626, | |
| "learning_rate": 1.5384615384615387e-06, | |
| "loss": 1.0496, | |
| "step": 1079 | |
| }, | |
| { | |
| "epoch": 0.847723704866562, | |
| "grad_norm": 0.30267584323883057, | |
| "learning_rate": 1.5306122448979593e-06, | |
| "loss": 1.063, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.8485086342229199, | |
| "grad_norm": 0.2946220934391022, | |
| "learning_rate": 1.52276295133438e-06, | |
| "loss": 1.056, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 0.8492935635792779, | |
| "grad_norm": 0.2994774281978607, | |
| "learning_rate": 1.5149136577708006e-06, | |
| "loss": 1.055, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 0.8500784929356358, | |
| "grad_norm": 0.2964215576648712, | |
| "learning_rate": 1.5070643642072214e-06, | |
| "loss": 1.0636, | |
| "step": 1083 | |
| }, | |
| { | |
| "epoch": 0.8508634222919937, | |
| "grad_norm": 0.313342422246933, | |
| "learning_rate": 1.4992150706436423e-06, | |
| "loss": 1.0341, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 0.8516483516483516, | |
| "grad_norm": 0.30197572708129883, | |
| "learning_rate": 1.491365777080063e-06, | |
| "loss": 1.0172, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 0.8524332810047096, | |
| "grad_norm": 0.3112085461616516, | |
| "learning_rate": 1.4835164835164837e-06, | |
| "loss": 1.0727, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 0.8532182103610675, | |
| "grad_norm": 0.3003901243209839, | |
| "learning_rate": 1.4756671899529042e-06, | |
| "loss": 1.0527, | |
| "step": 1087 | |
| }, | |
| { | |
| "epoch": 0.8540031397174255, | |
| "grad_norm": 0.2971203327178955, | |
| "learning_rate": 1.467817896389325e-06, | |
| "loss": 1.041, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 0.8547880690737834, | |
| "grad_norm": 0.3106226921081543, | |
| "learning_rate": 1.459968602825746e-06, | |
| "loss": 1.1165, | |
| "step": 1089 | |
| }, | |
| { | |
| "epoch": 0.8555729984301413, | |
| "grad_norm": 0.31148043274879456, | |
| "learning_rate": 1.4521193092621665e-06, | |
| "loss": 1.0847, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.8563579277864992, | |
| "grad_norm": 0.29365935921669006, | |
| "learning_rate": 1.4442700156985873e-06, | |
| "loss": 1.013, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 0.8571428571428571, | |
| "grad_norm": 0.30795639753341675, | |
| "learning_rate": 1.4364207221350079e-06, | |
| "loss": 1.0386, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 0.8579277864992151, | |
| "grad_norm": 0.2929840087890625, | |
| "learning_rate": 1.4285714285714286e-06, | |
| "loss": 1.036, | |
| "step": 1093 | |
| }, | |
| { | |
| "epoch": 0.858712715855573, | |
| "grad_norm": 0.31012848019599915, | |
| "learning_rate": 1.4207221350078492e-06, | |
| "loss": 1.0885, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 0.859497645211931, | |
| "grad_norm": 0.28400346636772156, | |
| "learning_rate": 1.4128728414442702e-06, | |
| "loss": 1.0615, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 0.8602825745682888, | |
| "grad_norm": 0.28853991627693176, | |
| "learning_rate": 1.405023547880691e-06, | |
| "loss": 1.0612, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 0.8610675039246468, | |
| "grad_norm": 0.28974905610084534, | |
| "learning_rate": 1.3971742543171115e-06, | |
| "loss": 1.0506, | |
| "step": 1097 | |
| }, | |
| { | |
| "epoch": 0.8618524332810047, | |
| "grad_norm": 0.3157826066017151, | |
| "learning_rate": 1.3893249607535323e-06, | |
| "loss": 1.0691, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 0.8626373626373627, | |
| "grad_norm": 0.28993314504623413, | |
| "learning_rate": 1.3814756671899528e-06, | |
| "loss": 1.0108, | |
| "step": 1099 | |
| }, | |
| { | |
| "epoch": 0.8634222919937206, | |
| "grad_norm": 0.282035231590271, | |
| "learning_rate": 1.3736263736263738e-06, | |
| "loss": 1.0208, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.8642072213500785, | |
| "grad_norm": 0.30081960558891296, | |
| "learning_rate": 1.3657770800627946e-06, | |
| "loss": 1.0423, | |
| "step": 1101 | |
| }, | |
| { | |
| "epoch": 0.8649921507064364, | |
| "grad_norm": 0.3326198160648346, | |
| "learning_rate": 1.3579277864992151e-06, | |
| "loss": 1.0326, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 0.8657770800627943, | |
| "grad_norm": 0.2900926470756531, | |
| "learning_rate": 1.3500784929356359e-06, | |
| "loss": 1.0525, | |
| "step": 1103 | |
| }, | |
| { | |
| "epoch": 0.8665620094191523, | |
| "grad_norm": 0.28752028942108154, | |
| "learning_rate": 1.3422291993720564e-06, | |
| "loss": 1.0074, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 0.8673469387755102, | |
| "grad_norm": 0.2825300395488739, | |
| "learning_rate": 1.3343799058084774e-06, | |
| "loss": 1.0223, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 0.8681318681318682, | |
| "grad_norm": 0.30561885237693787, | |
| "learning_rate": 1.3265306122448982e-06, | |
| "loss": 1.0464, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 0.868916797488226, | |
| "grad_norm": 0.28518933057785034, | |
| "learning_rate": 1.3186813186813187e-06, | |
| "loss": 1.0464, | |
| "step": 1107 | |
| }, | |
| { | |
| "epoch": 0.869701726844584, | |
| "grad_norm": 0.2933896780014038, | |
| "learning_rate": 1.3108320251177395e-06, | |
| "loss": 1.0177, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 0.8704866562009419, | |
| "grad_norm": 0.30531638860702515, | |
| "learning_rate": 1.30298273155416e-06, | |
| "loss": 0.9894, | |
| "step": 1109 | |
| }, | |
| { | |
| "epoch": 0.8712715855572999, | |
| "grad_norm": 0.2906123995780945, | |
| "learning_rate": 1.2951334379905808e-06, | |
| "loss": 1.0403, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.8720565149136578, | |
| "grad_norm": 0.29348504543304443, | |
| "learning_rate": 1.2872841444270018e-06, | |
| "loss": 1.0439, | |
| "step": 1111 | |
| }, | |
| { | |
| "epoch": 0.8728414442700158, | |
| "grad_norm": 0.3133821189403534, | |
| "learning_rate": 1.2794348508634224e-06, | |
| "loss": 1.0984, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 0.8736263736263736, | |
| "grad_norm": 0.2935754358768463, | |
| "learning_rate": 1.2715855572998431e-06, | |
| "loss": 1.0538, | |
| "step": 1113 | |
| }, | |
| { | |
| "epoch": 0.8744113029827315, | |
| "grad_norm": 0.484567791223526, | |
| "learning_rate": 1.2637362637362637e-06, | |
| "loss": 0.9939, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 0.8751962323390895, | |
| "grad_norm": 0.2972055673599243, | |
| "learning_rate": 1.2558869701726845e-06, | |
| "loss": 1.0524, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 0.8759811616954474, | |
| "grad_norm": 0.3031924366950989, | |
| "learning_rate": 1.2480376766091052e-06, | |
| "loss": 1.0597, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 0.8767660910518054, | |
| "grad_norm": 0.2977665662765503, | |
| "learning_rate": 1.240188383045526e-06, | |
| "loss": 1.0796, | |
| "step": 1117 | |
| }, | |
| { | |
| "epoch": 0.8775510204081632, | |
| "grad_norm": 0.3066113591194153, | |
| "learning_rate": 1.2323390894819468e-06, | |
| "loss": 1.0741, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 0.8783359497645212, | |
| "grad_norm": 0.30936139822006226, | |
| "learning_rate": 1.2244897959183673e-06, | |
| "loss": 1.0501, | |
| "step": 1119 | |
| }, | |
| { | |
| "epoch": 0.8791208791208791, | |
| "grad_norm": 0.32315778732299805, | |
| "learning_rate": 1.2166405023547883e-06, | |
| "loss": 1.0674, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.8799058084772371, | |
| "grad_norm": 0.3213968276977539, | |
| "learning_rate": 1.2087912087912089e-06, | |
| "loss": 1.079, | |
| "step": 1121 | |
| }, | |
| { | |
| "epoch": 0.880690737833595, | |
| "grad_norm": 0.30559977889060974, | |
| "learning_rate": 1.2009419152276296e-06, | |
| "loss": 1.0145, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 0.8814756671899528, | |
| "grad_norm": 0.32773932814598083, | |
| "learning_rate": 1.1930926216640504e-06, | |
| "loss": 1.0227, | |
| "step": 1123 | |
| }, | |
| { | |
| "epoch": 0.8822605965463108, | |
| "grad_norm": 0.2936771810054779, | |
| "learning_rate": 1.185243328100471e-06, | |
| "loss": 1.0222, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 0.8830455259026687, | |
| "grad_norm": 0.3083963096141815, | |
| "learning_rate": 1.1773940345368917e-06, | |
| "loss": 1.0424, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.8838304552590267, | |
| "grad_norm": 0.3056409955024719, | |
| "learning_rate": 1.1695447409733125e-06, | |
| "loss": 1.087, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 0.8846153846153846, | |
| "grad_norm": 0.2972738444805145, | |
| "learning_rate": 1.1616954474097333e-06, | |
| "loss": 1.034, | |
| "step": 1127 | |
| }, | |
| { | |
| "epoch": 0.8854003139717426, | |
| "grad_norm": 0.28523531556129456, | |
| "learning_rate": 1.153846153846154e-06, | |
| "loss": 1.0371, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 0.8861852433281004, | |
| "grad_norm": 0.3151058852672577, | |
| "learning_rate": 1.1459968602825746e-06, | |
| "loss": 1.0826, | |
| "step": 1129 | |
| }, | |
| { | |
| "epoch": 0.8869701726844584, | |
| "grad_norm": 0.2846231460571289, | |
| "learning_rate": 1.1381475667189954e-06, | |
| "loss": 1.0419, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.8877551020408163, | |
| "grad_norm": 0.3189791738986969, | |
| "learning_rate": 1.1302982731554161e-06, | |
| "loss": 1.0161, | |
| "step": 1131 | |
| }, | |
| { | |
| "epoch": 0.8885400313971743, | |
| "grad_norm": 0.29699793457984924, | |
| "learning_rate": 1.122448979591837e-06, | |
| "loss": 1.0944, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 0.8893249607535322, | |
| "grad_norm": 0.31134846806526184, | |
| "learning_rate": 1.1145996860282575e-06, | |
| "loss": 1.0405, | |
| "step": 1133 | |
| }, | |
| { | |
| "epoch": 0.8901098901098901, | |
| "grad_norm": 0.3218204975128174, | |
| "learning_rate": 1.1067503924646782e-06, | |
| "loss": 1.0721, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 0.890894819466248, | |
| "grad_norm": 0.2882716655731201, | |
| "learning_rate": 1.098901098901099e-06, | |
| "loss": 1.0527, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 0.8916797488226059, | |
| "grad_norm": 0.2993222177028656, | |
| "learning_rate": 1.0910518053375198e-06, | |
| "loss": 1.0707, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 0.8924646781789639, | |
| "grad_norm": 0.29466983675956726, | |
| "learning_rate": 1.0832025117739405e-06, | |
| "loss": 1.0645, | |
| "step": 1137 | |
| }, | |
| { | |
| "epoch": 0.8932496075353218, | |
| "grad_norm": 0.29492950439453125, | |
| "learning_rate": 1.075353218210361e-06, | |
| "loss": 1.0756, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 0.8940345368916798, | |
| "grad_norm": 0.2911151647567749, | |
| "learning_rate": 1.0675039246467819e-06, | |
| "loss": 1.0676, | |
| "step": 1139 | |
| }, | |
| { | |
| "epoch": 0.8948194662480377, | |
| "grad_norm": 0.3016468286514282, | |
| "learning_rate": 1.0596546310832026e-06, | |
| "loss": 1.0752, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.8956043956043956, | |
| "grad_norm": 0.2953685522079468, | |
| "learning_rate": 1.0518053375196232e-06, | |
| "loss": 1.001, | |
| "step": 1141 | |
| }, | |
| { | |
| "epoch": 0.8963893249607535, | |
| "grad_norm": 0.2959998548030853, | |
| "learning_rate": 1.0439560439560442e-06, | |
| "loss": 1.0695, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 0.8971742543171115, | |
| "grad_norm": 0.29669925570487976, | |
| "learning_rate": 1.0361067503924647e-06, | |
| "loss": 1.0214, | |
| "step": 1143 | |
| }, | |
| { | |
| "epoch": 0.8979591836734694, | |
| "grad_norm": 0.28717902302742004, | |
| "learning_rate": 1.0282574568288855e-06, | |
| "loss": 1.0687, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 0.8987441130298273, | |
| "grad_norm": 0.29382869601249695, | |
| "learning_rate": 1.0204081632653063e-06, | |
| "loss": 1.0213, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 0.8995290423861853, | |
| "grad_norm": 0.2865571081638336, | |
| "learning_rate": 1.0125588697017268e-06, | |
| "loss": 1.0308, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 0.9003139717425431, | |
| "grad_norm": 0.3298538029193878, | |
| "learning_rate": 1.0047095761381478e-06, | |
| "loss": 1.0949, | |
| "step": 1147 | |
| }, | |
| { | |
| "epoch": 0.9010989010989011, | |
| "grad_norm": 0.2812543511390686, | |
| "learning_rate": 9.968602825745683e-07, | |
| "loss": 1.0392, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 0.901883830455259, | |
| "grad_norm": 0.29315754771232605, | |
| "learning_rate": 9.890109890109891e-07, | |
| "loss": 1.0201, | |
| "step": 1149 | |
| }, | |
| { | |
| "epoch": 0.902668759811617, | |
| "grad_norm": 0.3410029411315918, | |
| "learning_rate": 9.811616954474099e-07, | |
| "loss": 1.1048, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.9034536891679749, | |
| "grad_norm": 0.2857743203639984, | |
| "learning_rate": 9.733124018838304e-07, | |
| "loss": 1.0611, | |
| "step": 1151 | |
| }, | |
| { | |
| "epoch": 0.9042386185243328, | |
| "grad_norm": 0.29381293058395386, | |
| "learning_rate": 9.654631083202512e-07, | |
| "loss": 1.0146, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 0.9050235478806907, | |
| "grad_norm": 0.29993733763694763, | |
| "learning_rate": 9.57613814756672e-07, | |
| "loss": 1.0831, | |
| "step": 1153 | |
| }, | |
| { | |
| "epoch": 0.9058084772370487, | |
| "grad_norm": 0.2960602343082428, | |
| "learning_rate": 9.497645211930926e-07, | |
| "loss": 1.0313, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 0.9065934065934066, | |
| "grad_norm": 0.3170572817325592, | |
| "learning_rate": 9.419152276295135e-07, | |
| "loss": 1.0092, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 0.9073783359497645, | |
| "grad_norm": 0.4631412625312805, | |
| "learning_rate": 9.340659340659342e-07, | |
| "loss": 1.0447, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 0.9081632653061225, | |
| "grad_norm": 0.2946299910545349, | |
| "learning_rate": 9.262166405023548e-07, | |
| "loss": 1.0705, | |
| "step": 1157 | |
| }, | |
| { | |
| "epoch": 0.9089481946624803, | |
| "grad_norm": 0.30375024676322937, | |
| "learning_rate": 9.183673469387756e-07, | |
| "loss": 1.0704, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 0.9097331240188383, | |
| "grad_norm": 0.2881094515323639, | |
| "learning_rate": 9.105180533751963e-07, | |
| "loss": 1.0145, | |
| "step": 1159 | |
| }, | |
| { | |
| "epoch": 0.9105180533751962, | |
| "grad_norm": 0.29087066650390625, | |
| "learning_rate": 9.026687598116169e-07, | |
| "loss": 1.0452, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.9113029827315542, | |
| "grad_norm": 0.29212790727615356, | |
| "learning_rate": 8.948194662480378e-07, | |
| "loss": 1.0573, | |
| "step": 1161 | |
| }, | |
| { | |
| "epoch": 0.9120879120879121, | |
| "grad_norm": 0.2992939352989197, | |
| "learning_rate": 8.869701726844585e-07, | |
| "loss": 1.0425, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 0.9128728414442701, | |
| "grad_norm": 0.29093456268310547, | |
| "learning_rate": 8.791208791208792e-07, | |
| "loss": 1.0422, | |
| "step": 1163 | |
| }, | |
| { | |
| "epoch": 0.9136577708006279, | |
| "grad_norm": 0.2929815351963043, | |
| "learning_rate": 8.712715855572999e-07, | |
| "loss": 1.0586, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 0.9144427001569859, | |
| "grad_norm": 0.36663711071014404, | |
| "learning_rate": 8.634222919937206e-07, | |
| "loss": 1.0573, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 0.9152276295133438, | |
| "grad_norm": 0.3045317232608795, | |
| "learning_rate": 8.555729984301414e-07, | |
| "loss": 1.1, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 0.9160125588697017, | |
| "grad_norm": 0.30183184146881104, | |
| "learning_rate": 8.477237048665621e-07, | |
| "loss": 1.036, | |
| "step": 1167 | |
| }, | |
| { | |
| "epoch": 0.9167974882260597, | |
| "grad_norm": 0.3052210509777069, | |
| "learning_rate": 8.398744113029828e-07, | |
| "loss": 1.0897, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 0.9175824175824175, | |
| "grad_norm": 0.3007582724094391, | |
| "learning_rate": 8.320251177394035e-07, | |
| "loss": 1.0259, | |
| "step": 1169 | |
| }, | |
| { | |
| "epoch": 0.9183673469387755, | |
| "grad_norm": 0.3091794550418854, | |
| "learning_rate": 8.241758241758242e-07, | |
| "loss": 1.0354, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.9191522762951334, | |
| "grad_norm": 0.30170249938964844, | |
| "learning_rate": 8.163265306122449e-07, | |
| "loss": 1.025, | |
| "step": 1171 | |
| }, | |
| { | |
| "epoch": 0.9199372056514914, | |
| "grad_norm": 0.5053988695144653, | |
| "learning_rate": 8.084772370486657e-07, | |
| "loss": 1.087, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 0.9207221350078493, | |
| "grad_norm": 0.3013533055782318, | |
| "learning_rate": 8.006279434850864e-07, | |
| "loss": 1.0278, | |
| "step": 1173 | |
| }, | |
| { | |
| "epoch": 0.9215070643642073, | |
| "grad_norm": 0.3028901517391205, | |
| "learning_rate": 7.927786499215072e-07, | |
| "loss": 1.0686, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 0.9222919937205651, | |
| "grad_norm": 0.28716418147087097, | |
| "learning_rate": 7.849293563579278e-07, | |
| "loss": 1.0335, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.9230769230769231, | |
| "grad_norm": 0.3054925501346588, | |
| "learning_rate": 7.770800627943485e-07, | |
| "loss": 1.0697, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 0.923861852433281, | |
| "grad_norm": 0.3063417673110962, | |
| "learning_rate": 7.692307692307694e-07, | |
| "loss": 1.061, | |
| "step": 1177 | |
| }, | |
| { | |
| "epoch": 0.9246467817896389, | |
| "grad_norm": 0.28995341062545776, | |
| "learning_rate": 7.6138147566719e-07, | |
| "loss": 1.0103, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 0.9254317111459969, | |
| "grad_norm": 0.2932472825050354, | |
| "learning_rate": 7.535321821036107e-07, | |
| "loss": 1.0164, | |
| "step": 1179 | |
| }, | |
| { | |
| "epoch": 0.9262166405023547, | |
| "grad_norm": 0.3210296034812927, | |
| "learning_rate": 7.456828885400315e-07, | |
| "loss": 1.118, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.9270015698587127, | |
| "grad_norm": 0.30883345007896423, | |
| "learning_rate": 7.378335949764521e-07, | |
| "loss": 1.0462, | |
| "step": 1181 | |
| }, | |
| { | |
| "epoch": 0.9277864992150706, | |
| "grad_norm": 0.2974553406238556, | |
| "learning_rate": 7.29984301412873e-07, | |
| "loss": 1.0685, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 0.9285714285714286, | |
| "grad_norm": 0.2933749258518219, | |
| "learning_rate": 7.221350078492937e-07, | |
| "loss": 1.0296, | |
| "step": 1183 | |
| }, | |
| { | |
| "epoch": 0.9293563579277865, | |
| "grad_norm": 0.2952398657798767, | |
| "learning_rate": 7.142857142857143e-07, | |
| "loss": 1.015, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 0.9301412872841445, | |
| "grad_norm": 0.3025822043418884, | |
| "learning_rate": 7.064364207221351e-07, | |
| "loss": 1.0847, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 0.9309262166405023, | |
| "grad_norm": 0.2824312448501587, | |
| "learning_rate": 6.985871271585557e-07, | |
| "loss": 0.9973, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 0.9317111459968603, | |
| "grad_norm": 0.29934030771255493, | |
| "learning_rate": 6.907378335949764e-07, | |
| "loss": 1.0566, | |
| "step": 1187 | |
| }, | |
| { | |
| "epoch": 0.9324960753532182, | |
| "grad_norm": 0.29235753417015076, | |
| "learning_rate": 6.828885400313973e-07, | |
| "loss": 0.9836, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 0.9332810047095761, | |
| "grad_norm": 0.3037624955177307, | |
| "learning_rate": 6.750392464678179e-07, | |
| "loss": 1.0722, | |
| "step": 1189 | |
| }, | |
| { | |
| "epoch": 0.9340659340659341, | |
| "grad_norm": 0.2828160524368286, | |
| "learning_rate": 6.671899529042387e-07, | |
| "loss": 1.0393, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.934850863422292, | |
| "grad_norm": 0.28773877024650574, | |
| "learning_rate": 6.593406593406594e-07, | |
| "loss": 1.0417, | |
| "step": 1191 | |
| }, | |
| { | |
| "epoch": 0.9356357927786499, | |
| "grad_norm": 0.2887474596500397, | |
| "learning_rate": 6.5149136577708e-07, | |
| "loss": 1.0321, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 0.9364207221350078, | |
| "grad_norm": 0.2963563799858093, | |
| "learning_rate": 6.436420722135009e-07, | |
| "loss": 1.0139, | |
| "step": 1193 | |
| }, | |
| { | |
| "epoch": 0.9372056514913658, | |
| "grad_norm": 0.2964331805706024, | |
| "learning_rate": 6.357927786499216e-07, | |
| "loss": 1.0662, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 0.9379905808477237, | |
| "grad_norm": 0.29119017720222473, | |
| "learning_rate": 6.279434850863422e-07, | |
| "loss": 1.0797, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 0.9387755102040817, | |
| "grad_norm": 0.32927101850509644, | |
| "learning_rate": 6.20094191522763e-07, | |
| "loss": 1.0963, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 0.9395604395604396, | |
| "grad_norm": 0.2921772003173828, | |
| "learning_rate": 6.122448979591837e-07, | |
| "loss": 1.0088, | |
| "step": 1197 | |
| }, | |
| { | |
| "epoch": 0.9403453689167975, | |
| "grad_norm": 0.28854402899742126, | |
| "learning_rate": 6.043956043956044e-07, | |
| "loss": 1.0501, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 0.9411302982731554, | |
| "grad_norm": 0.30962881445884705, | |
| "learning_rate": 5.965463108320252e-07, | |
| "loss": 1.1162, | |
| "step": 1199 | |
| }, | |
| { | |
| "epoch": 0.9419152276295133, | |
| "grad_norm": 0.2930225729942322, | |
| "learning_rate": 5.886970172684459e-07, | |
| "loss": 1.0008, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.9427001569858713, | |
| "grad_norm": 0.3310118317604065, | |
| "learning_rate": 5.808477237048666e-07, | |
| "loss": 1.0714, | |
| "step": 1201 | |
| }, | |
| { | |
| "epoch": 0.9434850863422292, | |
| "grad_norm": 0.300510048866272, | |
| "learning_rate": 5.729984301412873e-07, | |
| "loss": 1.054, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 0.9442700156985872, | |
| "grad_norm": 0.3338243067264557, | |
| "learning_rate": 5.651491365777081e-07, | |
| "loss": 1.0849, | |
| "step": 1203 | |
| }, | |
| { | |
| "epoch": 0.945054945054945, | |
| "grad_norm": 0.29972943663597107, | |
| "learning_rate": 5.572998430141287e-07, | |
| "loss": 1.0148, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 0.945839874411303, | |
| "grad_norm": 0.29417991638183594, | |
| "learning_rate": 5.494505494505495e-07, | |
| "loss": 1.0307, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 0.9466248037676609, | |
| "grad_norm": 0.2904272675514221, | |
| "learning_rate": 5.416012558869703e-07, | |
| "loss": 1.0504, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 0.9474097331240189, | |
| "grad_norm": 0.37245574593544006, | |
| "learning_rate": 5.337519623233909e-07, | |
| "loss": 1.077, | |
| "step": 1207 | |
| }, | |
| { | |
| "epoch": 0.9481946624803768, | |
| "grad_norm": 0.3075472414493561, | |
| "learning_rate": 5.259026687598116e-07, | |
| "loss": 1.0483, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 0.9489795918367347, | |
| "grad_norm": 0.29694482684135437, | |
| "learning_rate": 5.180533751962324e-07, | |
| "loss": 1.0239, | |
| "step": 1209 | |
| }, | |
| { | |
| "epoch": 0.9497645211930926, | |
| "grad_norm": 0.3054351806640625, | |
| "learning_rate": 5.102040816326531e-07, | |
| "loss": 1.0446, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.9505494505494505, | |
| "grad_norm": 0.30204030871391296, | |
| "learning_rate": 5.023547880690739e-07, | |
| "loss": 1.0195, | |
| "step": 1211 | |
| }, | |
| { | |
| "epoch": 0.9513343799058085, | |
| "grad_norm": 0.29818445444107056, | |
| "learning_rate": 4.945054945054946e-07, | |
| "loss": 1.0322, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 0.9521193092621664, | |
| "grad_norm": 0.4030686914920807, | |
| "learning_rate": 4.866562009419152e-07, | |
| "loss": 0.9908, | |
| "step": 1213 | |
| }, | |
| { | |
| "epoch": 0.9529042386185244, | |
| "grad_norm": 0.2839055061340332, | |
| "learning_rate": 4.78806907378336e-07, | |
| "loss": 1.0366, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 0.9536891679748822, | |
| "grad_norm": 0.29423198103904724, | |
| "learning_rate": 4.7095761381475676e-07, | |
| "loss": 1.038, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 0.9544740973312402, | |
| "grad_norm": 0.30914145708084106, | |
| "learning_rate": 4.631083202511774e-07, | |
| "loss": 1.1039, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 0.9552590266875981, | |
| "grad_norm": 0.3080761730670929, | |
| "learning_rate": 4.5525902668759813e-07, | |
| "loss": 1.1072, | |
| "step": 1217 | |
| }, | |
| { | |
| "epoch": 0.9560439560439561, | |
| "grad_norm": 0.3054615259170532, | |
| "learning_rate": 4.474097331240189e-07, | |
| "loss": 1.0865, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 0.956828885400314, | |
| "grad_norm": 0.3009425103664398, | |
| "learning_rate": 4.395604395604396e-07, | |
| "loss": 1.0368, | |
| "step": 1219 | |
| }, | |
| { | |
| "epoch": 0.957613814756672, | |
| "grad_norm": 0.28634223341941833, | |
| "learning_rate": 4.317111459968603e-07, | |
| "loss": 1.0526, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.9583987441130298, | |
| "grad_norm": 0.2902422547340393, | |
| "learning_rate": 4.2386185243328105e-07, | |
| "loss": 1.0402, | |
| "step": 1221 | |
| }, | |
| { | |
| "epoch": 0.9591836734693877, | |
| "grad_norm": 0.31759124994277954, | |
| "learning_rate": 4.1601255886970176e-07, | |
| "loss": 1.0519, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 0.9599686028257457, | |
| "grad_norm": 0.4426363408565521, | |
| "learning_rate": 4.0816326530612243e-07, | |
| "loss": 1.017, | |
| "step": 1223 | |
| }, | |
| { | |
| "epoch": 0.9607535321821036, | |
| "grad_norm": 0.3140206038951874, | |
| "learning_rate": 4.003139717425432e-07, | |
| "loss": 1.0142, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 0.9615384615384616, | |
| "grad_norm": 0.29569703340530396, | |
| "learning_rate": 3.924646781789639e-07, | |
| "loss": 1.0528, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.9623233908948194, | |
| "grad_norm": 0.30597856640815735, | |
| "learning_rate": 3.846153846153847e-07, | |
| "loss": 1.0253, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 0.9631083202511774, | |
| "grad_norm": 0.3235307037830353, | |
| "learning_rate": 3.7676609105180534e-07, | |
| "loss": 1.1179, | |
| "step": 1227 | |
| }, | |
| { | |
| "epoch": 0.9638932496075353, | |
| "grad_norm": 0.2932690680027008, | |
| "learning_rate": 3.6891679748822606e-07, | |
| "loss": 1.0423, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 0.9646781789638933, | |
| "grad_norm": 0.3032452166080475, | |
| "learning_rate": 3.610675039246468e-07, | |
| "loss": 1.0115, | |
| "step": 1229 | |
| }, | |
| { | |
| "epoch": 0.9654631083202512, | |
| "grad_norm": 0.3229339122772217, | |
| "learning_rate": 3.5321821036106754e-07, | |
| "loss": 1.0766, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.966248037676609, | |
| "grad_norm": 0.3844963312149048, | |
| "learning_rate": 3.453689167974882e-07, | |
| "loss": 1.1032, | |
| "step": 1231 | |
| }, | |
| { | |
| "epoch": 0.967032967032967, | |
| "grad_norm": 0.2927655279636383, | |
| "learning_rate": 3.3751962323390897e-07, | |
| "loss": 1.052, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 0.9678178963893249, | |
| "grad_norm": 0.3003545105457306, | |
| "learning_rate": 3.296703296703297e-07, | |
| "loss": 1.0825, | |
| "step": 1233 | |
| }, | |
| { | |
| "epoch": 0.9686028257456829, | |
| "grad_norm": 0.32615581154823303, | |
| "learning_rate": 3.2182103610675046e-07, | |
| "loss": 1.0915, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 0.9693877551020408, | |
| "grad_norm": 0.31106311082839966, | |
| "learning_rate": 3.139717425431711e-07, | |
| "loss": 1.0891, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 0.9701726844583988, | |
| "grad_norm": 0.29504525661468506, | |
| "learning_rate": 3.0612244897959183e-07, | |
| "loss": 1.0303, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 0.9709576138147566, | |
| "grad_norm": 0.3173236548900604, | |
| "learning_rate": 2.982731554160126e-07, | |
| "loss": 1.0699, | |
| "step": 1237 | |
| }, | |
| { | |
| "epoch": 0.9717425431711146, | |
| "grad_norm": 0.2948251962661743, | |
| "learning_rate": 2.904238618524333e-07, | |
| "loss": 1.0273, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 0.9725274725274725, | |
| "grad_norm": 0.3042560815811157, | |
| "learning_rate": 2.8257456828885403e-07, | |
| "loss": 1.019, | |
| "step": 1239 | |
| }, | |
| { | |
| "epoch": 0.9733124018838305, | |
| "grad_norm": 0.3046058118343353, | |
| "learning_rate": 2.7472527472527475e-07, | |
| "loss": 1.0301, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.9740973312401884, | |
| "grad_norm": 0.2964264452457428, | |
| "learning_rate": 2.6687598116169546e-07, | |
| "loss": 1.055, | |
| "step": 1241 | |
| }, | |
| { | |
| "epoch": 0.9748822605965463, | |
| "grad_norm": 0.43890246748924255, | |
| "learning_rate": 2.590266875981162e-07, | |
| "loss": 1.0003, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 0.9756671899529042, | |
| "grad_norm": 0.3069480359554291, | |
| "learning_rate": 2.5117739403453695e-07, | |
| "loss": 1.0983, | |
| "step": 1243 | |
| }, | |
| { | |
| "epoch": 0.9764521193092621, | |
| "grad_norm": 0.2868310213088989, | |
| "learning_rate": 2.433281004709576e-07, | |
| "loss": 1.0251, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 0.9772370486656201, | |
| "grad_norm": 0.28800535202026367, | |
| "learning_rate": 2.3547880690737838e-07, | |
| "loss": 1.0251, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 0.978021978021978, | |
| "grad_norm": 0.290998637676239, | |
| "learning_rate": 2.2762951334379907e-07, | |
| "loss": 0.996, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 0.978806907378336, | |
| "grad_norm": 0.3080032467842102, | |
| "learning_rate": 2.197802197802198e-07, | |
| "loss": 1.1012, | |
| "step": 1247 | |
| }, | |
| { | |
| "epoch": 0.9795918367346939, | |
| "grad_norm": 0.3035239577293396, | |
| "learning_rate": 2.1193092621664052e-07, | |
| "loss": 1.0589, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 0.9803767660910518, | |
| "grad_norm": 0.3100905120372772, | |
| "learning_rate": 2.0408163265306121e-07, | |
| "loss": 1.0276, | |
| "step": 1249 | |
| }, | |
| { | |
| "epoch": 0.9811616954474097, | |
| "grad_norm": 0.30332428216934204, | |
| "learning_rate": 1.9623233908948196e-07, | |
| "loss": 1.0549, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.9819466248037677, | |
| "grad_norm": 0.2909112572669983, | |
| "learning_rate": 1.8838304552590267e-07, | |
| "loss": 1.0492, | |
| "step": 1251 | |
| }, | |
| { | |
| "epoch": 0.9827315541601256, | |
| "grad_norm": 0.2939865291118622, | |
| "learning_rate": 1.805337519623234e-07, | |
| "loss": 1.0673, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 0.9835164835164835, | |
| "grad_norm": 0.31474772095680237, | |
| "learning_rate": 1.726844583987441e-07, | |
| "loss": 1.0801, | |
| "step": 1253 | |
| }, | |
| { | |
| "epoch": 0.9843014128728415, | |
| "grad_norm": 0.2975396513938904, | |
| "learning_rate": 1.6483516483516484e-07, | |
| "loss": 1.0645, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 0.9850863422291993, | |
| "grad_norm": 0.2967347204685211, | |
| "learning_rate": 1.5698587127158556e-07, | |
| "loss": 0.9959, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 0.9858712715855573, | |
| "grad_norm": 0.2978781461715698, | |
| "learning_rate": 1.491365777080063e-07, | |
| "loss": 1.0541, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 0.9866562009419152, | |
| "grad_norm": 0.37754517793655396, | |
| "learning_rate": 1.4128728414442702e-07, | |
| "loss": 1.0134, | |
| "step": 1257 | |
| }, | |
| { | |
| "epoch": 0.9874411302982732, | |
| "grad_norm": 0.28837668895721436, | |
| "learning_rate": 1.3343799058084773e-07, | |
| "loss": 1.0435, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 0.9882260596546311, | |
| "grad_norm": 0.2922952473163605, | |
| "learning_rate": 1.2558869701726847e-07, | |
| "loss": 1.0211, | |
| "step": 1259 | |
| }, | |
| { | |
| "epoch": 0.989010989010989, | |
| "grad_norm": 0.37126073241233826, | |
| "learning_rate": 1.1773940345368919e-07, | |
| "loss": 1.107, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.9897959183673469, | |
| "grad_norm": 0.30882400274276733, | |
| "learning_rate": 1.098901098901099e-07, | |
| "loss": 1.0274, | |
| "step": 1261 | |
| }, | |
| { | |
| "epoch": 0.9905808477237049, | |
| "grad_norm": 0.30320990085601807, | |
| "learning_rate": 1.0204081632653061e-07, | |
| "loss": 1.0478, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 0.9913657770800628, | |
| "grad_norm": 0.305622398853302, | |
| "learning_rate": 9.419152276295134e-08, | |
| "loss": 1.0782, | |
| "step": 1263 | |
| }, | |
| { | |
| "epoch": 0.9921507064364207, | |
| "grad_norm": 0.2983652353286743, | |
| "learning_rate": 8.634222919937205e-08, | |
| "loss": 1.0486, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 0.9929356357927787, | |
| "grad_norm": 0.3014610707759857, | |
| "learning_rate": 7.849293563579278e-08, | |
| "loss": 1.0696, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 0.9937205651491365, | |
| "grad_norm": 0.3930485248565674, | |
| "learning_rate": 7.064364207221351e-08, | |
| "loss": 1.0056, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 0.9945054945054945, | |
| "grad_norm": 0.30500683188438416, | |
| "learning_rate": 6.279434850863424e-08, | |
| "loss": 1.0317, | |
| "step": 1267 | |
| }, | |
| { | |
| "epoch": 0.9952904238618524, | |
| "grad_norm": 0.29284876585006714, | |
| "learning_rate": 5.494505494505495e-08, | |
| "loss": 1.0596, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 0.9960753532182104, | |
| "grad_norm": 0.28736308217048645, | |
| "learning_rate": 4.709576138147567e-08, | |
| "loss": 0.9853, | |
| "step": 1269 | |
| }, | |
| { | |
| "epoch": 0.9968602825745683, | |
| "grad_norm": 0.2951013445854187, | |
| "learning_rate": 3.924646781789639e-08, | |
| "loss": 1.0667, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.9976452119309263, | |
| "grad_norm": 0.396849662065506, | |
| "learning_rate": 3.139717425431712e-08, | |
| "loss": 1.1164, | |
| "step": 1271 | |
| }, | |
| { | |
| "epoch": 0.9984301412872841, | |
| "grad_norm": 0.3089428246021271, | |
| "learning_rate": 2.3547880690737834e-08, | |
| "loss": 1.0651, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 0.9992150706436421, | |
| "grad_norm": 0.295634388923645, | |
| "learning_rate": 1.569858712715856e-08, | |
| "loss": 1.027, | |
| "step": 1273 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.2972462475299835, | |
| "learning_rate": 7.84929356357928e-09, | |
| "loss": 1.0419, | |
| "step": 1274 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 1274, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 0, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.964361379037053e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |