DeepSeek-R1-Distill-Llama-70B-Uncensored-v2-Unbiased-Reasoner-Lora
/
checkpoint-158
/trainer_state.json
| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.10012674271229405, | |
| "eval_steps": 500, | |
| "global_step": 158, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0006337135614702154, | |
| "grad_norm": 0.22353313863277435, | |
| "learning_rate": 2e-05, | |
| "loss": 0.795, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0012674271229404308, | |
| "grad_norm": 0.270685613155365, | |
| "learning_rate": 4e-05, | |
| "loss": 0.9841, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0019011406844106464, | |
| "grad_norm": 0.13555319607257843, | |
| "learning_rate": 6e-05, | |
| "loss": 0.8728, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.0025348542458808617, | |
| "grad_norm": 0.1665652096271515, | |
| "learning_rate": 8e-05, | |
| "loss": 0.8625, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0031685678073510772, | |
| "grad_norm": 0.13588839769363403, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6776, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0038022813688212928, | |
| "grad_norm": 0.2811749279499054, | |
| "learning_rate": 0.00012, | |
| "loss": 0.8813, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.004435994930291508, | |
| "grad_norm": 0.327694833278656, | |
| "learning_rate": 0.00014, | |
| "loss": 0.9009, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.005069708491761723, | |
| "grad_norm": 0.24555213749408722, | |
| "learning_rate": 0.00016, | |
| "loss": 0.7054, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.005703422053231939, | |
| "grad_norm": 0.14921338856220245, | |
| "learning_rate": 0.00018, | |
| "loss": 0.697, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.0063371356147021544, | |
| "grad_norm": 0.13169103860855103, | |
| "learning_rate": 0.0002, | |
| "loss": 0.6007, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.00697084917617237, | |
| "grad_norm": 0.06807047873735428, | |
| "learning_rate": 0.00019999979928608238, | |
| "loss": 0.6155, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.0076045627376425855, | |
| "grad_norm": 0.08288167417049408, | |
| "learning_rate": 0.00019999919714513528, | |
| "loss": 0.5641, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.008238276299112801, | |
| "grad_norm": 0.12285872548818588, | |
| "learning_rate": 0.00019999819357957582, | |
| "loss": 0.7526, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.008871989860583017, | |
| "grad_norm": 0.15566691756248474, | |
| "learning_rate": 0.00019999678859343263, | |
| "loss": 0.4519, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.009505703422053232, | |
| "grad_norm": 0.1301712989807129, | |
| "learning_rate": 0.00019999498219234568, | |
| "loss": 0.486, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.010139416983523447, | |
| "grad_norm": 0.14493511617183685, | |
| "learning_rate": 0.00019999277438356638, | |
| "loss": 0.7146, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.010773130544993664, | |
| "grad_norm": 0.1372271478176117, | |
| "learning_rate": 0.00019999016517595753, | |
| "loss": 0.5933, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.011406844106463879, | |
| "grad_norm": 0.09944190829992294, | |
| "learning_rate": 0.00019998715457999314, | |
| "loss": 0.8399, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.012040557667934094, | |
| "grad_norm": 0.057923465967178345, | |
| "learning_rate": 0.0001999837426077586, | |
| "loss": 0.5613, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.012674271229404309, | |
| "grad_norm": 0.06214901804924011, | |
| "learning_rate": 0.00019997992927295059, | |
| "loss": 0.5374, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.013307984790874524, | |
| "grad_norm": 0.04898112639784813, | |
| "learning_rate": 0.0001999757145908768, | |
| "loss": 0.5451, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.01394169835234474, | |
| "grad_norm": 0.07026948034763336, | |
| "learning_rate": 0.0001999710985784562, | |
| "loss": 0.5635, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.014575411913814956, | |
| "grad_norm": 0.0672365352511406, | |
| "learning_rate": 0.00019996608125421873, | |
| "loss": 0.5996, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.015209125475285171, | |
| "grad_norm": 0.06477885693311691, | |
| "learning_rate": 0.00019996066263830531, | |
| "loss": 0.4707, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.015842839036755388, | |
| "grad_norm": 0.07720793038606644, | |
| "learning_rate": 0.0001999548427524678, | |
| "loss": 0.5891, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.016476552598225603, | |
| "grad_norm": 0.06699500977993011, | |
| "learning_rate": 0.0001999486216200688, | |
| "loss": 0.5316, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.017110266159695818, | |
| "grad_norm": 0.07539479434490204, | |
| "learning_rate": 0.00019994199926608172, | |
| "loss": 0.5854, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.017743979721166033, | |
| "grad_norm": 4.677523136138916, | |
| "learning_rate": 0.00019993497571709048, | |
| "loss": 0.5019, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.018377693282636248, | |
| "grad_norm": 0.07100815325975418, | |
| "learning_rate": 0.00019992755100128962, | |
| "loss": 0.4729, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.019011406844106463, | |
| "grad_norm": 0.06506210565567017, | |
| "learning_rate": 0.000199919725148484, | |
| "loss": 0.5597, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.01964512040557668, | |
| "grad_norm": 0.04945315420627594, | |
| "learning_rate": 0.0001999114981900887, | |
| "loss": 0.5044, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.020278833967046894, | |
| "grad_norm": 0.05103156715631485, | |
| "learning_rate": 0.0001999028701591291, | |
| "loss": 0.3637, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.02091254752851711, | |
| "grad_norm": 0.05288761481642723, | |
| "learning_rate": 0.00019989384109024048, | |
| "loss": 0.4345, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.021546261089987327, | |
| "grad_norm": 0.05457635968923569, | |
| "learning_rate": 0.0001998844110196681, | |
| "loss": 0.4714, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.022179974651457542, | |
| "grad_norm": 0.055830612778663635, | |
| "learning_rate": 0.0001998745799852668, | |
| "loss": 0.5285, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.022813688212927757, | |
| "grad_norm": 0.05858856439590454, | |
| "learning_rate": 0.00019986434802650113, | |
| "loss": 0.5106, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.023447401774397972, | |
| "grad_norm": 0.05847540497779846, | |
| "learning_rate": 0.00019985371518444503, | |
| "loss": 0.4394, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.024081115335868188, | |
| "grad_norm": 0.1140831857919693, | |
| "learning_rate": 0.00019984268150178167, | |
| "loss": 0.4782, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.024714828897338403, | |
| "grad_norm": 0.06483329832553864, | |
| "learning_rate": 0.00019983124702280334, | |
| "loss": 0.396, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.025348542458808618, | |
| "grad_norm": 0.07212468981742859, | |
| "learning_rate": 0.00019981941179341117, | |
| "loss": 0.5173, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.025982256020278833, | |
| "grad_norm": 0.1697537750005722, | |
| "learning_rate": 0.00019980717586111512, | |
| "loss": 0.6164, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.026615969581749048, | |
| "grad_norm": 0.05975339934229851, | |
| "learning_rate": 0.00019979453927503364, | |
| "loss": 0.4981, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.027249683143219267, | |
| "grad_norm": 0.0607403926551342, | |
| "learning_rate": 0.00019978150208589348, | |
| "loss": 0.533, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.02788339670468948, | |
| "grad_norm": 0.07225210964679718, | |
| "learning_rate": 0.00019976806434602952, | |
| "loss": 0.5055, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.028517110266159697, | |
| "grad_norm": 0.07008686661720276, | |
| "learning_rate": 0.00019975422610938462, | |
| "loss": 0.6274, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.029150823827629912, | |
| "grad_norm": 0.07289402186870575, | |
| "learning_rate": 0.0001997399874315093, | |
| "loss": 0.5247, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.029784537389100127, | |
| "grad_norm": 0.10037431120872498, | |
| "learning_rate": 0.0001997253483695616, | |
| "loss": 0.647, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.030418250950570342, | |
| "grad_norm": 0.06468270719051361, | |
| "learning_rate": 0.00019971030898230672, | |
| "loss": 0.5719, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.031051964512040557, | |
| "grad_norm": 0.0472278967499733, | |
| "learning_rate": 0.00019969486933011705, | |
| "loss": 0.5565, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.031685678073510776, | |
| "grad_norm": 0.0584145151078701, | |
| "learning_rate": 0.00019967902947497156, | |
| "loss": 0.5432, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.03231939163498099, | |
| "grad_norm": 0.08962458372116089, | |
| "learning_rate": 0.00019966278948045592, | |
| "loss": 0.6432, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.032953105196451206, | |
| "grad_norm": 0.08193643391132355, | |
| "learning_rate": 0.00019964614941176195, | |
| "loss": 0.5341, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.03358681875792142, | |
| "grad_norm": 0.07166769355535507, | |
| "learning_rate": 0.00019962910933568747, | |
| "loss": 0.5481, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.034220532319391636, | |
| "grad_norm": 0.10422351956367493, | |
| "learning_rate": 0.00019961166932063614, | |
| "loss": 0.6145, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.03485424588086185, | |
| "grad_norm": 0.06273826211690903, | |
| "learning_rate": 0.00019959382943661704, | |
| "loss": 0.4969, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.035487959442332066, | |
| "grad_norm": 0.06504670530557632, | |
| "learning_rate": 0.0001995755897552444, | |
| "loss": 0.6093, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.03612167300380228, | |
| "grad_norm": 0.05045778304338455, | |
| "learning_rate": 0.00019955695034973742, | |
| "loss": 0.4191, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.036755386565272496, | |
| "grad_norm": 0.06495866179466248, | |
| "learning_rate": 0.00019953791129491983, | |
| "loss": 0.4762, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.037389100126742715, | |
| "grad_norm": 0.0814126655459404, | |
| "learning_rate": 0.0001995184726672197, | |
| "loss": 0.5599, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.03802281368821293, | |
| "grad_norm": 0.052061304450035095, | |
| "learning_rate": 0.00019949863454466908, | |
| "loss": 0.4822, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.038656527249683145, | |
| "grad_norm": 0.05419475957751274, | |
| "learning_rate": 0.00019947839700690375, | |
| "loss": 0.5625, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.03929024081115336, | |
| "grad_norm": 0.06495067477226257, | |
| "learning_rate": 0.0001994577601351628, | |
| "loss": 0.5863, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.039923954372623575, | |
| "grad_norm": 0.055791907012462616, | |
| "learning_rate": 0.00019943672401228837, | |
| "loss": 0.4588, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.04055766793409379, | |
| "grad_norm": 0.03923908621072769, | |
| "learning_rate": 0.00019941528872272532, | |
| "loss": 0.3841, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.041191381495564006, | |
| "grad_norm": 0.08200399577617645, | |
| "learning_rate": 0.00019939345435252088, | |
| "loss": 0.6163, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.04182509505703422, | |
| "grad_norm": 0.05708305537700653, | |
| "learning_rate": 0.00019937122098932428, | |
| "loss": 0.6363, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.042458808618504436, | |
| "grad_norm": 0.053468603640794754, | |
| "learning_rate": 0.0001993485887223864, | |
| "loss": 0.4777, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.043092522179974654, | |
| "grad_norm": 0.08539824187755585, | |
| "learning_rate": 0.00019932555764255952, | |
| "loss": 0.4922, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.043726235741444866, | |
| "grad_norm": 0.07483454793691635, | |
| "learning_rate": 0.00019930212784229675, | |
| "loss": 0.6337, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.044359949302915085, | |
| "grad_norm": 0.06771700084209442, | |
| "learning_rate": 0.00019927829941565186, | |
| "loss": 0.4559, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.044993662864385296, | |
| "grad_norm": 0.05689261853694916, | |
| "learning_rate": 0.0001992540724582788, | |
| "loss": 0.5489, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.045627376425855515, | |
| "grad_norm": 0.05044565722346306, | |
| "learning_rate": 0.00019922944706743127, | |
| "loss": 0.4472, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.046261089987325726, | |
| "grad_norm": 0.07331253588199615, | |
| "learning_rate": 0.00019920442334196248, | |
| "loss": 0.4752, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.046894803548795945, | |
| "grad_norm": 0.057449884712696075, | |
| "learning_rate": 0.0001991790013823246, | |
| "loss": 0.4525, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.04752851711026616, | |
| "grad_norm": 0.08357278257608414, | |
| "learning_rate": 0.00019915318129056853, | |
| "loss": 0.5813, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.048162230671736375, | |
| "grad_norm": 0.051311176270246506, | |
| "learning_rate": 0.00019912696317034322, | |
| "loss": 0.4593, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.048795944233206594, | |
| "grad_norm": 0.06535078585147858, | |
| "learning_rate": 0.00019910034712689552, | |
| "loss": 0.5339, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.049429657794676805, | |
| "grad_norm": 0.13796891272068024, | |
| "learning_rate": 0.00019907333326706967, | |
| "loss": 0.5438, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.050063371356147024, | |
| "grad_norm": 0.05667581036686897, | |
| "learning_rate": 0.0001990459216993068, | |
| "loss": 0.6295, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.050697084917617236, | |
| "grad_norm": 0.05243121087551117, | |
| "learning_rate": 0.00019901811253364456, | |
| "loss": 0.4782, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.051330798479087454, | |
| "grad_norm": 0.0769771933555603, | |
| "learning_rate": 0.0001989899058817167, | |
| "loss": 0.5692, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.051964512040557666, | |
| "grad_norm": 0.07334766536951065, | |
| "learning_rate": 0.00019896130185675261, | |
| "loss": 0.569, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.052598225602027884, | |
| "grad_norm": 0.07953603565692902, | |
| "learning_rate": 0.00019893230057357671, | |
| "loss": 0.4059, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.053231939163498096, | |
| "grad_norm": 0.05282806232571602, | |
| "learning_rate": 0.00019890290214860833, | |
| "loss": 0.5186, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.053865652724968315, | |
| "grad_norm": 0.06661225110292435, | |
| "learning_rate": 0.00019887310669986085, | |
| "loss": 0.6404, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.05449936628643853, | |
| "grad_norm": 0.07150626182556152, | |
| "learning_rate": 0.00019884291434694152, | |
| "loss": 0.5865, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.055133079847908745, | |
| "grad_norm": 0.054674554616212845, | |
| "learning_rate": 0.00019881232521105089, | |
| "loss": 0.5429, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.05576679340937896, | |
| "grad_norm": 0.057950377464294434, | |
| "learning_rate": 0.00019878133941498224, | |
| "loss": 0.6705, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.056400506970849175, | |
| "grad_norm": 0.07045155763626099, | |
| "learning_rate": 0.0001987499570831211, | |
| "loss": 0.5393, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.057034220532319393, | |
| "grad_norm": 0.055960092693567276, | |
| "learning_rate": 0.00019871817834144504, | |
| "loss": 0.4481, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.057667934093789605, | |
| "grad_norm": 0.05631652846932411, | |
| "learning_rate": 0.00019868600331752264, | |
| "loss": 0.5963, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.058301647655259824, | |
| "grad_norm": 0.05120407044887543, | |
| "learning_rate": 0.00019865343214051347, | |
| "loss": 0.486, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.058935361216730035, | |
| "grad_norm": 0.05507562682032585, | |
| "learning_rate": 0.0001986204649411673, | |
| "loss": 0.5514, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.059569074778200254, | |
| "grad_norm": 0.057690516114234924, | |
| "learning_rate": 0.0001985871018518236, | |
| "loss": 0.4969, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.060202788339670466, | |
| "grad_norm": 0.05942325294017792, | |
| "learning_rate": 0.00019855334300641114, | |
| "loss": 0.51, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.060836501901140684, | |
| "grad_norm": 0.05777527391910553, | |
| "learning_rate": 0.0001985191885404473, | |
| "loss": 0.5401, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.0614702154626109, | |
| "grad_norm": 0.07077159732580185, | |
| "learning_rate": 0.00019848463859103763, | |
| "loss": 0.5568, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.062103929024081114, | |
| "grad_norm": 0.050649482756853104, | |
| "learning_rate": 0.00019844969329687527, | |
| "loss": 0.5418, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.06273764258555133, | |
| "grad_norm": 0.059522844851017, | |
| "learning_rate": 0.00019841435279824028, | |
| "loss": 0.4679, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.06337135614702155, | |
| "grad_norm": 0.061260003596544266, | |
| "learning_rate": 0.0001983786172369993, | |
| "loss": 0.557, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.06400506970849176, | |
| "grad_norm": 0.0513591468334198, | |
| "learning_rate": 0.00019834248675660486, | |
| "loss": 0.5849, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.06463878326996197, | |
| "grad_norm": 0.06722971051931381, | |
| "learning_rate": 0.0001983059615020947, | |
| "loss": 0.4003, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.06527249683143219, | |
| "grad_norm": 0.0629379004240036, | |
| "learning_rate": 0.0001982690416200914, | |
| "loss": 0.5322, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.06590621039290241, | |
| "grad_norm": 0.05402471870183945, | |
| "learning_rate": 0.00019823172725880165, | |
| "loss": 0.5634, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.06653992395437262, | |
| "grad_norm": 0.15680162608623505, | |
| "learning_rate": 0.0001981940185680156, | |
| "loss": 0.5361, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.06717363751584284, | |
| "grad_norm": 0.06348865479230881, | |
| "learning_rate": 0.00019815591569910654, | |
| "loss": 0.5322, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.06780735107731306, | |
| "grad_norm": 0.05004284158349037, | |
| "learning_rate": 0.00019811741880502995, | |
| "loss": 0.5524, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.06844106463878327, | |
| "grad_norm": 0.06271985173225403, | |
| "learning_rate": 0.00019807852804032305, | |
| "loss": 0.4347, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.06907477820025348, | |
| "grad_norm": 0.1546468287706375, | |
| "learning_rate": 0.00019803924356110423, | |
| "loss": 0.4294, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.0697084917617237, | |
| "grad_norm": 0.06472460180521011, | |
| "learning_rate": 0.00019799956552507233, | |
| "loss": 0.5693, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.07034220532319392, | |
| "grad_norm": 0.06021984666585922, | |
| "learning_rate": 0.00019795949409150598, | |
| "loss": 0.6554, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.07097591888466413, | |
| "grad_norm": 0.04533032327890396, | |
| "learning_rate": 0.00019791902942126313, | |
| "loss": 0.4425, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.07160963244613434, | |
| "grad_norm": 0.0662391185760498, | |
| "learning_rate": 0.0001978781716767802, | |
| "loss": 0.5258, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.07224334600760456, | |
| "grad_norm": 0.06131117045879364, | |
| "learning_rate": 0.00019783692102207155, | |
| "loss": 0.4556, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.07287705956907478, | |
| "grad_norm": 0.07924918830394745, | |
| "learning_rate": 0.00019779527762272877, | |
| "loss": 0.5137, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.07351077313054499, | |
| "grad_norm": 0.07061261683702469, | |
| "learning_rate": 0.0001977532416459201, | |
| "loss": 0.4554, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.0741444866920152, | |
| "grad_norm": 0.04919254407286644, | |
| "learning_rate": 0.00019771081326038962, | |
| "loss": 0.5213, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.07477820025348543, | |
| "grad_norm": 0.053799472749233246, | |
| "learning_rate": 0.00019766799263645673, | |
| "loss": 0.5648, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.07541191381495564, | |
| "grad_norm": 0.06857369095087051, | |
| "learning_rate": 0.00019762477994601522, | |
| "loss": 0.6841, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.07604562737642585, | |
| "grad_norm": 0.0719090923666954, | |
| "learning_rate": 0.000197581175362533, | |
| "loss": 0.4154, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.07667934093789606, | |
| "grad_norm": 0.10528447479009628, | |
| "learning_rate": 0.00019753717906105092, | |
| "loss": 0.5674, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.07731305449936629, | |
| "grad_norm": 0.05879104137420654, | |
| "learning_rate": 0.00019749279121818235, | |
| "loss": 0.5282, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.0779467680608365, | |
| "grad_norm": 0.050949644297361374, | |
| "learning_rate": 0.00019744801201211255, | |
| "loss": 0.4398, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.07858048162230671, | |
| "grad_norm": 0.061247747391462326, | |
| "learning_rate": 0.00019740284162259765, | |
| "loss": 0.4269, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.07921419518377694, | |
| "grad_norm": 0.09446462988853455, | |
| "learning_rate": 0.0001973572802309642, | |
| "loss": 0.6362, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.07984790874524715, | |
| "grad_norm": 0.06124195456504822, | |
| "learning_rate": 0.0001973113280201082, | |
| "loss": 0.435, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.08048162230671736, | |
| "grad_norm": 0.05198049172759056, | |
| "learning_rate": 0.0001972649851744948, | |
| "loss": 0.4617, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.08111533586818757, | |
| "grad_norm": 0.05457935482263565, | |
| "learning_rate": 0.00019721825188015693, | |
| "loss": 0.548, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.0817490494296578, | |
| "grad_norm": 0.054542481899261475, | |
| "learning_rate": 0.0001971711283246951, | |
| "loss": 0.4449, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.08238276299112801, | |
| "grad_norm": 0.0528152696788311, | |
| "learning_rate": 0.0001971236146972764, | |
| "loss": 0.5868, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.08301647655259822, | |
| "grad_norm": 0.049837883561849594, | |
| "learning_rate": 0.0001970757111886337, | |
| "loss": 0.4426, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.08365019011406843, | |
| "grad_norm": 0.04912682995200157, | |
| "learning_rate": 0.00019702741799106508, | |
| "loss": 0.5328, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.08428390367553866, | |
| "grad_norm": 0.06654444336891174, | |
| "learning_rate": 0.00019697873529843282, | |
| "loss": 0.6239, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.08491761723700887, | |
| "grad_norm": 0.1822642683982849, | |
| "learning_rate": 0.00019692966330616283, | |
| "loss": 0.6482, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.08555133079847908, | |
| "grad_norm": 0.07404999434947968, | |
| "learning_rate": 0.00019688020221124376, | |
| "loss": 0.5473, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.08618504435994931, | |
| "grad_norm": 0.08534666895866394, | |
| "learning_rate": 0.00019683035221222618, | |
| "loss": 0.4794, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.08681875792141952, | |
| "grad_norm": 0.05804799869656563, | |
| "learning_rate": 0.00019678011350922185, | |
| "loss": 0.5749, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.08745247148288973, | |
| "grad_norm": 0.0600556954741478, | |
| "learning_rate": 0.00019672948630390294, | |
| "loss": 0.4929, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.08808618504435994, | |
| "grad_norm": 0.07564158737659454, | |
| "learning_rate": 0.00019667847079950118, | |
| "loss": 0.5806, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.08871989860583017, | |
| "grad_norm": 0.06359097361564636, | |
| "learning_rate": 0.00019662706720080693, | |
| "loss": 0.5427, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.08935361216730038, | |
| "grad_norm": 0.05452190712094307, | |
| "learning_rate": 0.00019657527571416856, | |
| "loss": 0.4845, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.08998732572877059, | |
| "grad_norm": 0.05258841812610626, | |
| "learning_rate": 0.00019652309654749156, | |
| "loss": 0.5255, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.09062103929024082, | |
| "grad_norm": 0.06789179146289825, | |
| "learning_rate": 0.0001964705299102376, | |
| "loss": 0.6002, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.09125475285171103, | |
| "grad_norm": 0.05940316617488861, | |
| "learning_rate": 0.00019641757601342378, | |
| "loss": 0.6178, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.09188846641318124, | |
| "grad_norm": 0.08051005005836487, | |
| "learning_rate": 0.00019636423506962181, | |
| "loss": 0.4728, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.09252217997465145, | |
| "grad_norm": 0.06979210674762726, | |
| "learning_rate": 0.00019631050729295707, | |
| "loss": 0.5166, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.09315589353612168, | |
| "grad_norm": 0.04284743592143059, | |
| "learning_rate": 0.00019625639289910777, | |
| "loss": 0.3685, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.09378960709759189, | |
| "grad_norm": 0.05410388484597206, | |
| "learning_rate": 0.00019620189210530425, | |
| "loss": 0.582, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.0944233206590621, | |
| "grad_norm": 0.08875017613172531, | |
| "learning_rate": 0.00019614700513032775, | |
| "loss": 0.6757, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.09505703422053231, | |
| "grad_norm": 0.06792068481445312, | |
| "learning_rate": 0.00019609173219450998, | |
| "loss": 0.5236, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.09569074778200254, | |
| "grad_norm": 0.060000237077474594, | |
| "learning_rate": 0.0001960360735197318, | |
| "loss": 0.4813, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.09632446134347275, | |
| "grad_norm": 0.052172888070344925, | |
| "learning_rate": 0.00019598002932942266, | |
| "loss": 0.5792, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.09695817490494296, | |
| "grad_norm": 0.04992865398526192, | |
| "learning_rate": 0.00019592359984855952, | |
| "loss": 0.4652, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.09759188846641319, | |
| "grad_norm": 0.05908304825425148, | |
| "learning_rate": 0.00019586678530366606, | |
| "loss": 0.4968, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.0982256020278834, | |
| "grad_norm": 0.16080443561077118, | |
| "learning_rate": 0.00019580958592281167, | |
| "loss": 0.4804, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.09885931558935361, | |
| "grad_norm": 0.05863935872912407, | |
| "learning_rate": 0.00019575200193561057, | |
| "loss": 0.5313, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.09949302915082382, | |
| "grad_norm": 0.047341488301754, | |
| "learning_rate": 0.0001956940335732209, | |
| "loss": 0.4939, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.10012674271229405, | |
| "grad_norm": 0.059797484427690506, | |
| "learning_rate": 0.00019563568106834383, | |
| "loss": 0.4806, | |
| "step": 158 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1578, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 158, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.1719132780982436e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |