Qwen3-32B-0524_original_augmented_original_honeypot_emergency_override-def96ec2
/
trainer_state.json
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 287, | |
| "global_step": 287, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.003484320557491289, | |
| "grad_norm": 0.4681059420108795, | |
| "learning_rate": 1e-05, | |
| "loss": 2.3157, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.006968641114982578, | |
| "grad_norm": 0.43911832571029663, | |
| "learning_rate": 9.965156794425088e-06, | |
| "loss": 2.1798, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.010452961672473868, | |
| "grad_norm": 0.48994573950767517, | |
| "learning_rate": 9.930313588850174e-06, | |
| "loss": 2.3372, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.013937282229965157, | |
| "grad_norm": 0.4473416805267334, | |
| "learning_rate": 9.895470383275261e-06, | |
| "loss": 2.1997, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.017421602787456445, | |
| "grad_norm": 0.46916699409484863, | |
| "learning_rate": 9.860627177700349e-06, | |
| "loss": 2.2405, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.020905923344947737, | |
| "grad_norm": 0.43466097116470337, | |
| "learning_rate": 9.825783972125436e-06, | |
| "loss": 2.1193, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.024390243902439025, | |
| "grad_norm": 0.4701969623565674, | |
| "learning_rate": 9.790940766550524e-06, | |
| "loss": 2.2509, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.027874564459930314, | |
| "grad_norm": 0.4056742489337921, | |
| "learning_rate": 9.756097560975611e-06, | |
| "loss": 2.0733, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.0313588850174216, | |
| "grad_norm": 0.4089353084564209, | |
| "learning_rate": 9.721254355400698e-06, | |
| "loss": 2.1442, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.03484320557491289, | |
| "grad_norm": 0.39055365324020386, | |
| "learning_rate": 9.686411149825786e-06, | |
| "loss": 2.0705, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.03832752613240418, | |
| "grad_norm": 0.38841402530670166, | |
| "learning_rate": 9.651567944250871e-06, | |
| "loss": 2.1562, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.041811846689895474, | |
| "grad_norm": 0.3685610890388489, | |
| "learning_rate": 9.616724738675959e-06, | |
| "loss": 2.0895, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.04529616724738676, | |
| "grad_norm": 0.35734865069389343, | |
| "learning_rate": 9.581881533101046e-06, | |
| "loss": 2.1288, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.04878048780487805, | |
| "grad_norm": 0.3617437183856964, | |
| "learning_rate": 9.547038327526134e-06, | |
| "loss": 2.1294, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.05226480836236934, | |
| "grad_norm": 0.3264251947402954, | |
| "learning_rate": 9.51219512195122e-06, | |
| "loss": 2.0143, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.05574912891986063, | |
| "grad_norm": 0.3306083381175995, | |
| "learning_rate": 9.477351916376307e-06, | |
| "loss": 2.0109, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.059233449477351915, | |
| "grad_norm": 0.28681087493896484, | |
| "learning_rate": 9.442508710801394e-06, | |
| "loss": 1.8251, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.0627177700348432, | |
| "grad_norm": 0.3397851884365082, | |
| "learning_rate": 9.407665505226482e-06, | |
| "loss": 2.0378, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.06620209059233449, | |
| "grad_norm": 0.3293090760707855, | |
| "learning_rate": 9.372822299651569e-06, | |
| "loss": 2.0082, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.06968641114982578, | |
| "grad_norm": 0.3335629999637604, | |
| "learning_rate": 9.337979094076656e-06, | |
| "loss": 1.9971, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.07317073170731707, | |
| "grad_norm": 0.3299424350261688, | |
| "learning_rate": 9.303135888501744e-06, | |
| "loss": 1.9411, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.07665505226480836, | |
| "grad_norm": 0.3071727454662323, | |
| "learning_rate": 9.268292682926831e-06, | |
| "loss": 1.9911, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.08013937282229965, | |
| "grad_norm": 0.3316495418548584, | |
| "learning_rate": 9.233449477351917e-06, | |
| "loss": 1.9853, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.08362369337979095, | |
| "grad_norm": 0.2751210331916809, | |
| "learning_rate": 9.198606271777004e-06, | |
| "loss": 1.8808, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.08710801393728224, | |
| "grad_norm": 0.2718318998813629, | |
| "learning_rate": 9.163763066202092e-06, | |
| "loss": 1.8427, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.09059233449477352, | |
| "grad_norm": 0.2898252606391907, | |
| "learning_rate": 9.12891986062718e-06, | |
| "loss": 1.9426, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.09407665505226481, | |
| "grad_norm": 0.2664470672607422, | |
| "learning_rate": 9.094076655052265e-06, | |
| "loss": 1.8555, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.0975609756097561, | |
| "grad_norm": 0.2761334180831909, | |
| "learning_rate": 9.059233449477352e-06, | |
| "loss": 1.9105, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.10104529616724739, | |
| "grad_norm": 0.2501201629638672, | |
| "learning_rate": 9.02439024390244e-06, | |
| "loss": 1.8098, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.10452961672473868, | |
| "grad_norm": 0.2666330337524414, | |
| "learning_rate": 8.989547038327527e-06, | |
| "loss": 1.9121, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.10801393728222997, | |
| "grad_norm": 0.2564990520477295, | |
| "learning_rate": 8.954703832752613e-06, | |
| "loss": 1.8523, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.11149825783972125, | |
| "grad_norm": 0.25139304995536804, | |
| "learning_rate": 8.9198606271777e-06, | |
| "loss": 1.7876, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.11498257839721254, | |
| "grad_norm": 0.2525288164615631, | |
| "learning_rate": 8.885017421602788e-06, | |
| "loss": 1.8219, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.11846689895470383, | |
| "grad_norm": 0.23850581049919128, | |
| "learning_rate": 8.850174216027875e-06, | |
| "loss": 1.7896, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.12195121951219512, | |
| "grad_norm": 0.2513561546802521, | |
| "learning_rate": 8.815331010452962e-06, | |
| "loss": 1.8001, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.1254355400696864, | |
| "grad_norm": 0.23863127827644348, | |
| "learning_rate": 8.78048780487805e-06, | |
| "loss": 1.7681, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.1289198606271777, | |
| "grad_norm": 0.2393515706062317, | |
| "learning_rate": 8.745644599303137e-06, | |
| "loss": 1.755, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.13240418118466898, | |
| "grad_norm": 0.24767489731311798, | |
| "learning_rate": 8.710801393728223e-06, | |
| "loss": 1.825, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.13588850174216027, | |
| "grad_norm": 0.2272832989692688, | |
| "learning_rate": 8.67595818815331e-06, | |
| "loss": 1.7115, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.13937282229965156, | |
| "grad_norm": 0.2401566058397293, | |
| "learning_rate": 8.641114982578398e-06, | |
| "loss": 1.8225, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.14285714285714285, | |
| "grad_norm": 0.23259598016738892, | |
| "learning_rate": 8.606271777003485e-06, | |
| "loss": 1.7867, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.14634146341463414, | |
| "grad_norm": 0.2286115437746048, | |
| "learning_rate": 8.571428571428571e-06, | |
| "loss": 1.7541, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.14982578397212543, | |
| "grad_norm": 0.20777663588523865, | |
| "learning_rate": 8.536585365853658e-06, | |
| "loss": 1.6419, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.15331010452961671, | |
| "grad_norm": 0.21444709599018097, | |
| "learning_rate": 8.501742160278746e-06, | |
| "loss": 1.6579, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.156794425087108, | |
| "grad_norm": 0.2146528959274292, | |
| "learning_rate": 8.466898954703833e-06, | |
| "loss": 1.6798, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.1602787456445993, | |
| "grad_norm": 0.20760183036327362, | |
| "learning_rate": 8.43205574912892e-06, | |
| "loss": 1.6622, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.16376306620209058, | |
| "grad_norm": 0.22706815600395203, | |
| "learning_rate": 8.397212543554008e-06, | |
| "loss": 1.6818, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.1672473867595819, | |
| "grad_norm": 0.21084247529506683, | |
| "learning_rate": 8.362369337979095e-06, | |
| "loss": 1.6561, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.17073170731707318, | |
| "grad_norm": 0.21659813821315765, | |
| "learning_rate": 8.327526132404183e-06, | |
| "loss": 1.6813, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.17421602787456447, | |
| "grad_norm": 0.23408977687358856, | |
| "learning_rate": 8.292682926829268e-06, | |
| "loss": 1.7178, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.17770034843205576, | |
| "grad_norm": 0.20702576637268066, | |
| "learning_rate": 8.257839721254356e-06, | |
| "loss": 1.6326, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.18118466898954705, | |
| "grad_norm": 0.2025461345911026, | |
| "learning_rate": 8.222996515679443e-06, | |
| "loss": 1.6496, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.18466898954703834, | |
| "grad_norm": 0.194467231631279, | |
| "learning_rate": 8.18815331010453e-06, | |
| "loss": 1.5839, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.18815331010452963, | |
| "grad_norm": 0.19592227041721344, | |
| "learning_rate": 8.153310104529616e-06, | |
| "loss": 1.6172, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.1916376306620209, | |
| "grad_norm": 0.19883790612220764, | |
| "learning_rate": 8.118466898954704e-06, | |
| "loss": 1.6173, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.1951219512195122, | |
| "grad_norm": 0.19728149473667145, | |
| "learning_rate": 8.083623693379791e-06, | |
| "loss": 1.6547, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.1986062717770035, | |
| "grad_norm": 0.18247534334659576, | |
| "learning_rate": 8.048780487804879e-06, | |
| "loss": 1.596, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.20209059233449478, | |
| "grad_norm": 0.18350286781787872, | |
| "learning_rate": 8.013937282229966e-06, | |
| "loss": 1.5878, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.20557491289198607, | |
| "grad_norm": 0.19237418472766876, | |
| "learning_rate": 7.979094076655053e-06, | |
| "loss": 1.5288, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.20905923344947736, | |
| "grad_norm": 0.17903505265712738, | |
| "learning_rate": 7.94425087108014e-06, | |
| "loss": 1.5506, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.21254355400696864, | |
| "grad_norm": 0.18244066834449768, | |
| "learning_rate": 7.909407665505228e-06, | |
| "loss": 1.5469, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.21602787456445993, | |
| "grad_norm": 0.1802457869052887, | |
| "learning_rate": 7.874564459930314e-06, | |
| "loss": 1.5357, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.21951219512195122, | |
| "grad_norm": 0.18679888546466827, | |
| "learning_rate": 7.839721254355401e-06, | |
| "loss": 1.602, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.2229965156794425, | |
| "grad_norm": 0.17410430312156677, | |
| "learning_rate": 7.804878048780489e-06, | |
| "loss": 1.5729, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.2264808362369338, | |
| "grad_norm": 0.17459648847579956, | |
| "learning_rate": 7.770034843205574e-06, | |
| "loss": 1.5176, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.22996515679442509, | |
| "grad_norm": 0.18239694833755493, | |
| "learning_rate": 7.735191637630662e-06, | |
| "loss": 1.5452, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.23344947735191637, | |
| "grad_norm": 0.17324566841125488, | |
| "learning_rate": 7.70034843205575e-06, | |
| "loss": 1.4903, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.23693379790940766, | |
| "grad_norm": 0.17015033960342407, | |
| "learning_rate": 7.665505226480837e-06, | |
| "loss": 1.4805, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.24041811846689895, | |
| "grad_norm": 0.17489972710609436, | |
| "learning_rate": 7.630662020905924e-06, | |
| "loss": 1.531, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.24390243902439024, | |
| "grad_norm": 0.16403886675834656, | |
| "learning_rate": 7.595818815331011e-06, | |
| "loss": 1.505, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.24738675958188153, | |
| "grad_norm": 0.16804824769496918, | |
| "learning_rate": 7.560975609756098e-06, | |
| "loss": 1.4916, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.2508710801393728, | |
| "grad_norm": 0.166668102145195, | |
| "learning_rate": 7.5261324041811855e-06, | |
| "loss": 1.4467, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.25435540069686413, | |
| "grad_norm": 0.16838915646076202, | |
| "learning_rate": 7.491289198606272e-06, | |
| "loss": 1.5101, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.2578397212543554, | |
| "grad_norm": 0.1719081699848175, | |
| "learning_rate": 7.4564459930313594e-06, | |
| "loss": 1.4853, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.2613240418118467, | |
| "grad_norm": 0.16803263127803802, | |
| "learning_rate": 7.421602787456447e-06, | |
| "loss": 1.4933, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.26480836236933797, | |
| "grad_norm": 0.1734427511692047, | |
| "learning_rate": 7.386759581881534e-06, | |
| "loss": 1.5206, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.2682926829268293, | |
| "grad_norm": 0.162466362118721, | |
| "learning_rate": 7.35191637630662e-06, | |
| "loss": 1.4884, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.27177700348432055, | |
| "grad_norm": 0.17201292514801025, | |
| "learning_rate": 7.317073170731707e-06, | |
| "loss": 1.483, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.27526132404181186, | |
| "grad_norm": 0.16368691623210907, | |
| "learning_rate": 7.282229965156795e-06, | |
| "loss": 1.4476, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.2787456445993031, | |
| "grad_norm": 0.18879356980323792, | |
| "learning_rate": 7.247386759581882e-06, | |
| "loss": 1.4909, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.28222996515679444, | |
| "grad_norm": 0.184343159198761, | |
| "learning_rate": 7.212543554006969e-06, | |
| "loss": 1.5221, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.2857142857142857, | |
| "grad_norm": 0.16952507197856903, | |
| "learning_rate": 7.177700348432056e-06, | |
| "loss": 1.4541, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.289198606271777, | |
| "grad_norm": 0.17221859097480774, | |
| "learning_rate": 7.1428571428571436e-06, | |
| "loss": 1.4465, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.2926829268292683, | |
| "grad_norm": 0.1716550588607788, | |
| "learning_rate": 7.108013937282231e-06, | |
| "loss": 1.454, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.2961672473867596, | |
| "grad_norm": 0.16458812355995178, | |
| "learning_rate": 7.0731707317073175e-06, | |
| "loss": 1.4304, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.29965156794425085, | |
| "grad_norm": 0.1824132353067398, | |
| "learning_rate": 7.038327526132405e-06, | |
| "loss": 1.528, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.30313588850174217, | |
| "grad_norm": 0.17251546680927277, | |
| "learning_rate": 7.003484320557492e-06, | |
| "loss": 1.4066, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.30662020905923343, | |
| "grad_norm": 0.18399403989315033, | |
| "learning_rate": 6.96864111498258e-06, | |
| "loss": 1.4524, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.31010452961672474, | |
| "grad_norm": 0.175154909491539, | |
| "learning_rate": 6.9337979094076655e-06, | |
| "loss": 1.4094, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.313588850174216, | |
| "grad_norm": 0.17567573487758636, | |
| "learning_rate": 6.898954703832753e-06, | |
| "loss": 1.3933, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.3170731707317073, | |
| "grad_norm": 0.171456441283226, | |
| "learning_rate": 6.86411149825784e-06, | |
| "loss": 1.3669, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.3205574912891986, | |
| "grad_norm": 0.18115688860416412, | |
| "learning_rate": 6.829268292682928e-06, | |
| "loss": 1.4327, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.3240418118466899, | |
| "grad_norm": 0.1830427646636963, | |
| "learning_rate": 6.794425087108014e-06, | |
| "loss": 1.4109, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.32752613240418116, | |
| "grad_norm": 0.17550437152385712, | |
| "learning_rate": 6.759581881533102e-06, | |
| "loss": 1.45, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.3310104529616725, | |
| "grad_norm": 0.18112653493881226, | |
| "learning_rate": 6.724738675958189e-06, | |
| "loss": 1.4147, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.3344947735191638, | |
| "grad_norm": 0.19797854125499725, | |
| "learning_rate": 6.6898954703832765e-06, | |
| "loss": 1.4424, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.33797909407665505, | |
| "grad_norm": 0.17347250878810883, | |
| "learning_rate": 6.655052264808363e-06, | |
| "loss": 1.4013, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.34146341463414637, | |
| "grad_norm": 0.19375310838222504, | |
| "learning_rate": 6.62020905923345e-06, | |
| "loss": 1.4349, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.34494773519163763, | |
| "grad_norm": 0.2087317854166031, | |
| "learning_rate": 6.585365853658538e-06, | |
| "loss": 1.4518, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.34843205574912894, | |
| "grad_norm": 0.16593414545059204, | |
| "learning_rate": 6.5505226480836235e-06, | |
| "loss": 1.3375, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.3519163763066202, | |
| "grad_norm": 0.19046683609485626, | |
| "learning_rate": 6.515679442508711e-06, | |
| "loss": 1.405, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.3554006968641115, | |
| "grad_norm": 0.1831621676683426, | |
| "learning_rate": 6.480836236933798e-06, | |
| "loss": 1.393, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.3588850174216028, | |
| "grad_norm": 0.19390657544136047, | |
| "learning_rate": 6.445993031358886e-06, | |
| "loss": 1.4167, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.3623693379790941, | |
| "grad_norm": 0.1814665049314499, | |
| "learning_rate": 6.411149825783972e-06, | |
| "loss": 1.4104, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.36585365853658536, | |
| "grad_norm": 0.19021427631378174, | |
| "learning_rate": 6.37630662020906e-06, | |
| "loss": 1.4044, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.3693379790940767, | |
| "grad_norm": 0.1900520920753479, | |
| "learning_rate": 6.341463414634147e-06, | |
| "loss": 1.3625, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.37282229965156793, | |
| "grad_norm": 0.19241949915885925, | |
| "learning_rate": 6.3066202090592345e-06, | |
| "loss": 1.4015, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.37630662020905925, | |
| "grad_norm": 0.18765950202941895, | |
| "learning_rate": 6.27177700348432e-06, | |
| "loss": 1.3819, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.3797909407665505, | |
| "grad_norm": 0.19136080145835876, | |
| "learning_rate": 6.236933797909408e-06, | |
| "loss": 1.4064, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.3832752613240418, | |
| "grad_norm": 0.17665834724903107, | |
| "learning_rate": 6.202090592334495e-06, | |
| "loss": 1.3358, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.3867595818815331, | |
| "grad_norm": 0.1866646111011505, | |
| "learning_rate": 6.1672473867595825e-06, | |
| "loss": 1.3601, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.3902439024390244, | |
| "grad_norm": 0.1813240647315979, | |
| "learning_rate": 6.132404181184669e-06, | |
| "loss": 1.3467, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.39372822299651566, | |
| "grad_norm": 0.1798020899295807, | |
| "learning_rate": 6.0975609756097564e-06, | |
| "loss": 1.3516, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.397212543554007, | |
| "grad_norm": 0.1959715634584427, | |
| "learning_rate": 6.062717770034844e-06, | |
| "loss": 1.3947, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.40069686411149824, | |
| "grad_norm": 0.18121780455112457, | |
| "learning_rate": 6.027874564459931e-06, | |
| "loss": 1.3895, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.40418118466898956, | |
| "grad_norm": 0.182073712348938, | |
| "learning_rate": 5.993031358885018e-06, | |
| "loss": 1.36, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.4076655052264808, | |
| "grad_norm": 0.1899159997701645, | |
| "learning_rate": 5.958188153310105e-06, | |
| "loss": 1.3478, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.41114982578397213, | |
| "grad_norm": 0.1755976527929306, | |
| "learning_rate": 5.923344947735193e-06, | |
| "loss": 1.3632, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.4146341463414634, | |
| "grad_norm": 0.18569977581501007, | |
| "learning_rate": 5.88850174216028e-06, | |
| "loss": 1.3662, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.4181184668989547, | |
| "grad_norm": 0.17412231862545013, | |
| "learning_rate": 5.853658536585366e-06, | |
| "loss": 1.3452, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.42160278745644597, | |
| "grad_norm": 0.17247503995895386, | |
| "learning_rate": 5.818815331010453e-06, | |
| "loss": 1.3633, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.4250871080139373, | |
| "grad_norm": 0.1747383177280426, | |
| "learning_rate": 5.7839721254355405e-06, | |
| "loss": 1.3278, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.42857142857142855, | |
| "grad_norm": 0.17394213378429413, | |
| "learning_rate": 5.749128919860628e-06, | |
| "loss": 1.3567, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.43205574912891986, | |
| "grad_norm": 0.18396231532096863, | |
| "learning_rate": 5.7142857142857145e-06, | |
| "loss": 1.3777, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.4355400696864111, | |
| "grad_norm": 0.1933022290468216, | |
| "learning_rate": 5.679442508710802e-06, | |
| "loss": 1.3726, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.43902439024390244, | |
| "grad_norm": 0.1778842657804489, | |
| "learning_rate": 5.644599303135889e-06, | |
| "loss": 1.319, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.4425087108013937, | |
| "grad_norm": 0.17646637558937073, | |
| "learning_rate": 5.609756097560977e-06, | |
| "loss": 1.3428, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.445993031358885, | |
| "grad_norm": 0.18623341619968414, | |
| "learning_rate": 5.574912891986063e-06, | |
| "loss": 1.3247, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.44947735191637633, | |
| "grad_norm": 0.17367148399353027, | |
| "learning_rate": 5.540069686411151e-06, | |
| "loss": 1.3401, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.4529616724738676, | |
| "grad_norm": 0.19395571947097778, | |
| "learning_rate": 5.505226480836237e-06, | |
| "loss": 1.356, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.4564459930313589, | |
| "grad_norm": 0.19120880961418152, | |
| "learning_rate": 5.470383275261324e-06, | |
| "loss": 1.3288, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.45993031358885017, | |
| "grad_norm": 0.17239636182785034, | |
| "learning_rate": 5.435540069686411e-06, | |
| "loss": 1.3418, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.4634146341463415, | |
| "grad_norm": 0.1770051121711731, | |
| "learning_rate": 5.400696864111499e-06, | |
| "loss": 1.3404, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.46689895470383275, | |
| "grad_norm": 0.17593827843666077, | |
| "learning_rate": 5.365853658536586e-06, | |
| "loss": 1.3007, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.47038327526132406, | |
| "grad_norm": 0.17362338304519653, | |
| "learning_rate": 5.331010452961673e-06, | |
| "loss": 1.3065, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.4738675958188153, | |
| "grad_norm": 0.18237850069999695, | |
| "learning_rate": 5.29616724738676e-06, | |
| "loss": 1.301, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.47735191637630664, | |
| "grad_norm": 0.18188218772411346, | |
| "learning_rate": 5.261324041811847e-06, | |
| "loss": 1.3643, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.4808362369337979, | |
| "grad_norm": 0.17353297770023346, | |
| "learning_rate": 5.226480836236935e-06, | |
| "loss": 1.3077, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.4843205574912892, | |
| "grad_norm": 0.17389504611492157, | |
| "learning_rate": 5.1916376306620205e-06, | |
| "loss": 1.2832, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.4878048780487805, | |
| "grad_norm": 0.19165605306625366, | |
| "learning_rate": 5.156794425087108e-06, | |
| "loss": 1.3427, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.4912891986062718, | |
| "grad_norm": 0.16731810569763184, | |
| "learning_rate": 5.121951219512195e-06, | |
| "loss": 1.3087, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.49477351916376305, | |
| "grad_norm": 0.17610980570316315, | |
| "learning_rate": 5.087108013937283e-06, | |
| "loss": 1.3309, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.49825783972125437, | |
| "grad_norm": 0.1756918877363205, | |
| "learning_rate": 5.052264808362369e-06, | |
| "loss": 1.309, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.5017421602787456, | |
| "grad_norm": 0.1759544461965561, | |
| "learning_rate": 5.017421602787457e-06, | |
| "loss": 1.2852, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.5052264808362369, | |
| "grad_norm": 0.18174010515213013, | |
| "learning_rate": 4.982578397212544e-06, | |
| "loss": 1.3141, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.5087108013937283, | |
| "grad_norm": 0.17240025103092194, | |
| "learning_rate": 4.947735191637631e-06, | |
| "loss": 1.2943, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.5121951219512195, | |
| "grad_norm": 0.18199099600315094, | |
| "learning_rate": 4.912891986062718e-06, | |
| "loss": 1.2882, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.5156794425087108, | |
| "grad_norm": 0.19332602620124817, | |
| "learning_rate": 4.8780487804878055e-06, | |
| "loss": 1.3314, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.519163763066202, | |
| "grad_norm": 0.16645659506320953, | |
| "learning_rate": 4.843205574912893e-06, | |
| "loss": 1.3036, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.5226480836236934, | |
| "grad_norm": 0.18242418766021729, | |
| "learning_rate": 4.8083623693379794e-06, | |
| "loss": 1.3054, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.5261324041811847, | |
| "grad_norm": 0.1789921373128891, | |
| "learning_rate": 4.773519163763067e-06, | |
| "loss": 1.2994, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.5296167247386759, | |
| "grad_norm": 0.17717278003692627, | |
| "learning_rate": 4.738675958188153e-06, | |
| "loss": 1.3165, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.5331010452961672, | |
| "grad_norm": 0.18196922540664673, | |
| "learning_rate": 4.703832752613241e-06, | |
| "loss": 1.3406, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.5365853658536586, | |
| "grad_norm": 0.17408598959445953, | |
| "learning_rate": 4.668989547038328e-06, | |
| "loss": 1.3177, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.5400696864111498, | |
| "grad_norm": 0.17936593294143677, | |
| "learning_rate": 4.634146341463416e-06, | |
| "loss": 1.2894, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.5435540069686411, | |
| "grad_norm": 0.1784544587135315, | |
| "learning_rate": 4.599303135888502e-06, | |
| "loss": 1.2993, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.5470383275261324, | |
| "grad_norm": 0.18778389692306519, | |
| "learning_rate": 4.56445993031359e-06, | |
| "loss": 1.3458, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.5505226480836237, | |
| "grad_norm": 0.17315524816513062, | |
| "learning_rate": 4.529616724738676e-06, | |
| "loss": 1.3099, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.554006968641115, | |
| "grad_norm": 0.18212544918060303, | |
| "learning_rate": 4.4947735191637636e-06, | |
| "loss": 1.3059, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.5574912891986062, | |
| "grad_norm": 0.19053645431995392, | |
| "learning_rate": 4.45993031358885e-06, | |
| "loss": 1.3426, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.5609756097560976, | |
| "grad_norm": 0.1728803664445877, | |
| "learning_rate": 4.4250871080139375e-06, | |
| "loss": 1.2333, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.5644599303135889, | |
| "grad_norm": 0.17140808701515198, | |
| "learning_rate": 4.390243902439025e-06, | |
| "loss": 1.2702, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.5679442508710801, | |
| "grad_norm": 0.17718710005283356, | |
| "learning_rate": 4.3554006968641115e-06, | |
| "loss": 1.2948, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.5714285714285714, | |
| "grad_norm": 0.19173213839530945, | |
| "learning_rate": 4.320557491289199e-06, | |
| "loss": 1.3363, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.5749128919860628, | |
| "grad_norm": 0.17607201635837555, | |
| "learning_rate": 4.2857142857142855e-06, | |
| "loss": 1.3028, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.578397212543554, | |
| "grad_norm": 0.17451880872249603, | |
| "learning_rate": 4.250871080139373e-06, | |
| "loss": 1.2424, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.5818815331010453, | |
| "grad_norm": 0.17934007942676544, | |
| "learning_rate": 4.21602787456446e-06, | |
| "loss": 1.2925, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.5853658536585366, | |
| "grad_norm": 0.17370004951953888, | |
| "learning_rate": 4.181184668989548e-06, | |
| "loss": 1.2957, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.5888501742160279, | |
| "grad_norm": 0.18196696043014526, | |
| "learning_rate": 4.146341463414634e-06, | |
| "loss": 1.3194, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.5923344947735192, | |
| "grad_norm": 0.16050949692726135, | |
| "learning_rate": 4.111498257839722e-06, | |
| "loss": 1.2281, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.5958188153310104, | |
| "grad_norm": 0.17562592029571533, | |
| "learning_rate": 4.076655052264808e-06, | |
| "loss": 1.2271, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.5993031358885017, | |
| "grad_norm": 0.1825701743364334, | |
| "learning_rate": 4.041811846689896e-06, | |
| "loss": 1.3209, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.6027874564459931, | |
| "grad_norm": 0.17921793460845947, | |
| "learning_rate": 4.006968641114983e-06, | |
| "loss": 1.2698, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.6062717770034843, | |
| "grad_norm": 0.17750316858291626, | |
| "learning_rate": 3.97212543554007e-06, | |
| "loss": 1.226, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.6097560975609756, | |
| "grad_norm": 0.20432093739509583, | |
| "learning_rate": 3.937282229965157e-06, | |
| "loss": 1.3363, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.6132404181184669, | |
| "grad_norm": 0.20100562274456024, | |
| "learning_rate": 3.902439024390244e-06, | |
| "loss": 1.3018, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.6167247386759582, | |
| "grad_norm": 0.16703099012374878, | |
| "learning_rate": 3.867595818815331e-06, | |
| "loss": 1.2702, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.6202090592334495, | |
| "grad_norm": 0.1671084612607956, | |
| "learning_rate": 3.832752613240418e-06, | |
| "loss": 1.2791, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.6236933797909407, | |
| "grad_norm": 0.18099632859230042, | |
| "learning_rate": 3.7979094076655053e-06, | |
| "loss": 1.3091, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.627177700348432, | |
| "grad_norm": 0.16746889054775238, | |
| "learning_rate": 3.7630662020905927e-06, | |
| "loss": 1.195, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.6306620209059234, | |
| "grad_norm": 0.177221417427063, | |
| "learning_rate": 3.7282229965156797e-06, | |
| "loss": 1.2646, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.6341463414634146, | |
| "grad_norm": 0.1823604255914688, | |
| "learning_rate": 3.693379790940767e-06, | |
| "loss": 1.2805, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.6376306620209059, | |
| "grad_norm": 0.18074429035186768, | |
| "learning_rate": 3.6585365853658537e-06, | |
| "loss": 1.3078, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.6411149825783972, | |
| "grad_norm": 0.1870466023683548, | |
| "learning_rate": 3.623693379790941e-06, | |
| "loss": 1.356, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.6445993031358885, | |
| "grad_norm": 0.1736750304698944, | |
| "learning_rate": 3.588850174216028e-06, | |
| "loss": 1.3235, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.6480836236933798, | |
| "grad_norm": 0.18182937800884247, | |
| "learning_rate": 3.5540069686411155e-06, | |
| "loss": 1.2958, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.6515679442508711, | |
| "grad_norm": 0.17288650572299957, | |
| "learning_rate": 3.5191637630662025e-06, | |
| "loss": 1.2389, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.6550522648083623, | |
| "grad_norm": 0.172458216547966, | |
| "learning_rate": 3.48432055749129e-06, | |
| "loss": 1.2246, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.6585365853658537, | |
| "grad_norm": 0.18126848340034485, | |
| "learning_rate": 3.4494773519163764e-06, | |
| "loss": 1.2445, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.662020905923345, | |
| "grad_norm": 0.18049533665180206, | |
| "learning_rate": 3.414634146341464e-06, | |
| "loss": 1.2267, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.6655052264808362, | |
| "grad_norm": 0.17865833640098572, | |
| "learning_rate": 3.379790940766551e-06, | |
| "loss": 1.2596, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.6689895470383276, | |
| "grad_norm": 0.18119291961193085, | |
| "learning_rate": 3.3449477351916382e-06, | |
| "loss": 1.2839, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.6724738675958188, | |
| "grad_norm": 0.1973109394311905, | |
| "learning_rate": 3.310104529616725e-06, | |
| "loss": 1.2458, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.6759581881533101, | |
| "grad_norm": 0.20635153353214264, | |
| "learning_rate": 3.2752613240418118e-06, | |
| "loss": 1.3311, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.6794425087108014, | |
| "grad_norm": 0.18912208080291748, | |
| "learning_rate": 3.240418118466899e-06, | |
| "loss": 1.3094, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.6829268292682927, | |
| "grad_norm": 0.18446075916290283, | |
| "learning_rate": 3.205574912891986e-06, | |
| "loss": 1.2784, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.686411149825784, | |
| "grad_norm": 0.167321115732193, | |
| "learning_rate": 3.1707317073170736e-06, | |
| "loss": 1.2773, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.6898954703832753, | |
| "grad_norm": 0.17466844618320465, | |
| "learning_rate": 3.13588850174216e-06, | |
| "loss": 1.2511, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.6933797909407665, | |
| "grad_norm": 0.17522381246089935, | |
| "learning_rate": 3.1010452961672475e-06, | |
| "loss": 1.1999, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.6968641114982579, | |
| "grad_norm": 0.18197931349277496, | |
| "learning_rate": 3.0662020905923345e-06, | |
| "loss": 1.2883, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.7003484320557491, | |
| "grad_norm": 0.1782844066619873, | |
| "learning_rate": 3.031358885017422e-06, | |
| "loss": 1.2259, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.7038327526132404, | |
| "grad_norm": 0.18218591809272766, | |
| "learning_rate": 2.996515679442509e-06, | |
| "loss": 1.2698, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.7073170731707317, | |
| "grad_norm": 0.1745125651359558, | |
| "learning_rate": 2.9616724738675963e-06, | |
| "loss": 1.2787, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.710801393728223, | |
| "grad_norm": 0.1723330318927765, | |
| "learning_rate": 2.926829268292683e-06, | |
| "loss": 1.2605, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.7142857142857143, | |
| "grad_norm": 0.19427795708179474, | |
| "learning_rate": 2.8919860627177703e-06, | |
| "loss": 1.3027, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.7177700348432056, | |
| "grad_norm": 0.1679447740316391, | |
| "learning_rate": 2.8571428571428573e-06, | |
| "loss": 1.2551, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.7212543554006968, | |
| "grad_norm": 0.1713826209306717, | |
| "learning_rate": 2.8222996515679447e-06, | |
| "loss": 1.2415, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.7247386759581882, | |
| "grad_norm": 0.16561546921730042, | |
| "learning_rate": 2.7874564459930316e-06, | |
| "loss": 1.2411, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.7282229965156795, | |
| "grad_norm": 0.17764469981193542, | |
| "learning_rate": 2.7526132404181186e-06, | |
| "loss": 1.2505, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.7317073170731707, | |
| "grad_norm": 0.17709261178970337, | |
| "learning_rate": 2.7177700348432056e-06, | |
| "loss": 1.2605, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.735191637630662, | |
| "grad_norm": 0.17961543798446655, | |
| "learning_rate": 2.682926829268293e-06, | |
| "loss": 1.2603, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.7386759581881533, | |
| "grad_norm": 0.1778653860092163, | |
| "learning_rate": 2.64808362369338e-06, | |
| "loss": 1.28, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.7421602787456446, | |
| "grad_norm": 0.17125053703784943, | |
| "learning_rate": 2.6132404181184674e-06, | |
| "loss": 1.207, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.7456445993031359, | |
| "grad_norm": 0.19965337216854095, | |
| "learning_rate": 2.578397212543554e-06, | |
| "loss": 1.3237, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.7491289198606271, | |
| "grad_norm": 0.16868162155151367, | |
| "learning_rate": 2.5435540069686414e-06, | |
| "loss": 1.2214, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.7526132404181185, | |
| "grad_norm": 0.17115463316440582, | |
| "learning_rate": 2.5087108013937284e-06, | |
| "loss": 1.2352, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.7560975609756098, | |
| "grad_norm": 0.1781865358352661, | |
| "learning_rate": 2.4738675958188153e-06, | |
| "loss": 1.2605, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.759581881533101, | |
| "grad_norm": 0.17307884991168976, | |
| "learning_rate": 2.4390243902439027e-06, | |
| "loss": 1.2028, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.7630662020905923, | |
| "grad_norm": 0.17751869559288025, | |
| "learning_rate": 2.4041811846689897e-06, | |
| "loss": 1.2155, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.7665505226480837, | |
| "grad_norm": 0.17226622998714447, | |
| "learning_rate": 2.3693379790940767e-06, | |
| "loss": 1.2311, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.7700348432055749, | |
| "grad_norm": 0.16614432632923126, | |
| "learning_rate": 2.334494773519164e-06, | |
| "loss": 1.2555, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.7735191637630662, | |
| "grad_norm": 0.17073583602905273, | |
| "learning_rate": 2.299651567944251e-06, | |
| "loss": 1.2266, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.7770034843205574, | |
| "grad_norm": 0.16957539319992065, | |
| "learning_rate": 2.264808362369338e-06, | |
| "loss": 1.232, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.7804878048780488, | |
| "grad_norm": 0.1727531999349594, | |
| "learning_rate": 2.229965156794425e-06, | |
| "loss": 1.2678, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.7839721254355401, | |
| "grad_norm": 0.18064668774604797, | |
| "learning_rate": 2.1951219512195125e-06, | |
| "loss": 1.2285, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.7874564459930313, | |
| "grad_norm": 0.17367485165596008, | |
| "learning_rate": 2.1602787456445995e-06, | |
| "loss": 1.2263, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.7909407665505227, | |
| "grad_norm": 0.19192218780517578, | |
| "learning_rate": 2.1254355400696864e-06, | |
| "loss": 1.2658, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.794425087108014, | |
| "grad_norm": 0.18070359528064728, | |
| "learning_rate": 2.090592334494774e-06, | |
| "loss": 1.2468, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.7979094076655052, | |
| "grad_norm": 0.16924287378787994, | |
| "learning_rate": 2.055749128919861e-06, | |
| "loss": 1.2333, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.8013937282229965, | |
| "grad_norm": 0.19070173799991608, | |
| "learning_rate": 2.020905923344948e-06, | |
| "loss": 1.2642, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.8048780487804879, | |
| "grad_norm": 0.17108577489852905, | |
| "learning_rate": 1.986062717770035e-06, | |
| "loss": 1.2159, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.8083623693379791, | |
| "grad_norm": 0.17864665389060974, | |
| "learning_rate": 1.951219512195122e-06, | |
| "loss": 1.2583, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.8118466898954704, | |
| "grad_norm": 0.1880117803812027, | |
| "learning_rate": 1.916376306620209e-06, | |
| "loss": 1.2434, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.8153310104529616, | |
| "grad_norm": 0.1735537201166153, | |
| "learning_rate": 1.8815331010452964e-06, | |
| "loss": 1.2035, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.818815331010453, | |
| "grad_norm": 0.1790689378976822, | |
| "learning_rate": 1.8466898954703836e-06, | |
| "loss": 1.2682, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.8222996515679443, | |
| "grad_norm": 0.168999582529068, | |
| "learning_rate": 1.8118466898954705e-06, | |
| "loss": 1.2052, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.8257839721254355, | |
| "grad_norm": 0.16619513928890228, | |
| "learning_rate": 1.7770034843205577e-06, | |
| "loss": 1.1563, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.8292682926829268, | |
| "grad_norm": 0.17687417566776276, | |
| "learning_rate": 1.742160278745645e-06, | |
| "loss": 1.2323, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.8327526132404182, | |
| "grad_norm": 0.17165087163448334, | |
| "learning_rate": 1.707317073170732e-06, | |
| "loss": 1.2271, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.8362369337979094, | |
| "grad_norm": 0.18581567704677582, | |
| "learning_rate": 1.6724738675958191e-06, | |
| "loss": 1.2626, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.8397212543554007, | |
| "grad_norm": 0.1735846847295761, | |
| "learning_rate": 1.6376306620209059e-06, | |
| "loss": 1.259, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.8432055749128919, | |
| "grad_norm": 0.17916536331176758, | |
| "learning_rate": 1.602787456445993e-06, | |
| "loss": 1.2262, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.8466898954703833, | |
| "grad_norm": 0.17673039436340332, | |
| "learning_rate": 1.56794425087108e-06, | |
| "loss": 1.2417, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.8501742160278746, | |
| "grad_norm": 0.1725793182849884, | |
| "learning_rate": 1.5331010452961673e-06, | |
| "loss": 1.2603, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.8536585365853658, | |
| "grad_norm": 0.1795843541622162, | |
| "learning_rate": 1.4982578397212545e-06, | |
| "loss": 1.2208, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.8571428571428571, | |
| "grad_norm": 0.16699844598770142, | |
| "learning_rate": 1.4634146341463414e-06, | |
| "loss": 1.1961, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.8606271777003485, | |
| "grad_norm": 0.18817368149757385, | |
| "learning_rate": 1.4285714285714286e-06, | |
| "loss": 1.2847, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.8641114982578397, | |
| "grad_norm": 0.17419779300689697, | |
| "learning_rate": 1.3937282229965158e-06, | |
| "loss": 1.2612, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.867595818815331, | |
| "grad_norm": 0.17922304570674896, | |
| "learning_rate": 1.3588850174216028e-06, | |
| "loss": 1.2486, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.8710801393728222, | |
| "grad_norm": 0.16759800910949707, | |
| "learning_rate": 1.32404181184669e-06, | |
| "loss": 1.2553, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.8745644599303136, | |
| "grad_norm": 0.1764543354511261, | |
| "learning_rate": 1.289198606271777e-06, | |
| "loss": 1.1849, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.8780487804878049, | |
| "grad_norm": 0.1741049885749817, | |
| "learning_rate": 1.2543554006968642e-06, | |
| "loss": 1.2414, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.8815331010452961, | |
| "grad_norm": 0.18015867471694946, | |
| "learning_rate": 1.2195121951219514e-06, | |
| "loss": 1.2343, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.8850174216027874, | |
| "grad_norm": 0.17575283348560333, | |
| "learning_rate": 1.1846689895470384e-06, | |
| "loss": 1.2443, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.8885017421602788, | |
| "grad_norm": 0.17757894098758698, | |
| "learning_rate": 1.1498257839721255e-06, | |
| "loss": 1.2597, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.89198606271777, | |
| "grad_norm": 0.19557978212833405, | |
| "learning_rate": 1.1149825783972125e-06, | |
| "loss": 1.3058, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.8954703832752613, | |
| "grad_norm": 0.17333032190799713, | |
| "learning_rate": 1.0801393728222997e-06, | |
| "loss": 1.212, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.8989547038327527, | |
| "grad_norm": 0.18144485354423523, | |
| "learning_rate": 1.045296167247387e-06, | |
| "loss": 1.2373, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.9024390243902439, | |
| "grad_norm": 0.1775212287902832, | |
| "learning_rate": 1.010452961672474e-06, | |
| "loss": 1.2511, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.9059233449477352, | |
| "grad_norm": 0.17597387731075287, | |
| "learning_rate": 9.75609756097561e-07, | |
| "loss": 1.2204, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.9094076655052264, | |
| "grad_norm": 0.1810332089662552, | |
| "learning_rate": 9.407665505226482e-07, | |
| "loss": 1.2779, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.9128919860627178, | |
| "grad_norm": 0.17286810278892517, | |
| "learning_rate": 9.059233449477353e-07, | |
| "loss": 1.2494, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.9163763066202091, | |
| "grad_norm": 0.20967571437358856, | |
| "learning_rate": 8.710801393728225e-07, | |
| "loss": 1.3316, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.9198606271777003, | |
| "grad_norm": 0.17681102454662323, | |
| "learning_rate": 8.362369337979096e-07, | |
| "loss": 1.2464, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.9233449477351916, | |
| "grad_norm": 0.17696253955364227, | |
| "learning_rate": 8.013937282229965e-07, | |
| "loss": 1.204, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.926829268292683, | |
| "grad_norm": 0.1742849498987198, | |
| "learning_rate": 7.665505226480836e-07, | |
| "loss": 1.1951, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.9303135888501742, | |
| "grad_norm": 0.1856003701686859, | |
| "learning_rate": 7.317073170731707e-07, | |
| "loss": 1.276, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.9337979094076655, | |
| "grad_norm": 0.23204076290130615, | |
| "learning_rate": 6.968641114982579e-07, | |
| "loss": 1.2846, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.9372822299651568, | |
| "grad_norm": 0.16851183772087097, | |
| "learning_rate": 6.62020905923345e-07, | |
| "loss": 1.2323, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.9407665505226481, | |
| "grad_norm": 0.17223624885082245, | |
| "learning_rate": 6.271777003484321e-07, | |
| "loss": 1.2415, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.9442508710801394, | |
| "grad_norm": 0.16741947829723358, | |
| "learning_rate": 5.923344947735192e-07, | |
| "loss": 1.2606, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.9477351916376306, | |
| "grad_norm": 0.18007990717887878, | |
| "learning_rate": 5.574912891986063e-07, | |
| "loss": 1.2433, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.9512195121951219, | |
| "grad_norm": 0.179946169257164, | |
| "learning_rate": 5.226480836236935e-07, | |
| "loss": 1.255, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.9547038327526133, | |
| "grad_norm": 0.18067485094070435, | |
| "learning_rate": 4.878048780487805e-07, | |
| "loss": 1.2592, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.9581881533101045, | |
| "grad_norm": 0.17291846871376038, | |
| "learning_rate": 4.5296167247386764e-07, | |
| "loss": 1.2536, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.9616724738675958, | |
| "grad_norm": 0.17678707838058472, | |
| "learning_rate": 4.181184668989548e-07, | |
| "loss": 1.2276, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.9651567944250871, | |
| "grad_norm": 0.17698977887630463, | |
| "learning_rate": 3.832752613240418e-07, | |
| "loss": 1.2506, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.9686411149825784, | |
| "grad_norm": 0.1848505437374115, | |
| "learning_rate": 3.4843205574912896e-07, | |
| "loss": 1.2696, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.9721254355400697, | |
| "grad_norm": 0.1861286461353302, | |
| "learning_rate": 3.1358885017421604e-07, | |
| "loss": 1.2514, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.975609756097561, | |
| "grad_norm": 0.17837870121002197, | |
| "learning_rate": 2.7874564459930313e-07, | |
| "loss": 1.2283, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.9790940766550522, | |
| "grad_norm": 0.17116625607013702, | |
| "learning_rate": 2.439024390243903e-07, | |
| "loss": 1.2604, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.9825783972125436, | |
| "grad_norm": 0.17404304444789886, | |
| "learning_rate": 2.090592334494774e-07, | |
| "loss": 1.2538, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.9860627177700348, | |
| "grad_norm": 0.17341749370098114, | |
| "learning_rate": 1.7421602787456448e-07, | |
| "loss": 1.1947, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.9895470383275261, | |
| "grad_norm": 0.1717548966407776, | |
| "learning_rate": 1.3937282229965157e-07, | |
| "loss": 1.2662, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.9930313588850174, | |
| "grad_norm": 0.17564739286899567, | |
| "learning_rate": 1.045296167247387e-07, | |
| "loss": 1.2366, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.9965156794425087, | |
| "grad_norm": 0.17121367156505585, | |
| "learning_rate": 6.968641114982578e-08, | |
| "loss": 1.1993, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.18196478486061096, | |
| "learning_rate": 3.484320557491289e-08, | |
| "loss": 1.2342, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 1.2314398288726807, | |
| "eval_runtime": 5.6589, | |
| "eval_samples_per_second": 5.301, | |
| "eval_steps_per_second": 0.707, | |
| "step": 287 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 287, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 0, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.558019940623319e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |