| { |
| "best_metric": 0.8978955572876072, |
| "best_model_checkpoint": "./results/finetunes/20250205-121158__microsoft_Phi-3.5-mini-instruct__ft/checkpoint-1792", |
| "epoch": 0.13208520675167687, |
| "eval_steps": 16, |
| "global_step": 1792, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0001474165253924965, |
| "grad_norm": 112.13977813720703, |
| "learning_rate": 0.00012128399488167067, |
| "loss": 2.0334, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.000294833050784993, |
| "grad_norm": 47.18525695800781, |
| "learning_rate": 0.00012128399457682722, |
| "loss": 0.4295, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0004422495761774895, |
| "grad_norm": 46.89369583129883, |
| "learning_rate": 0.0001212839940687548, |
| "loss": 1.793, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.000589666101569986, |
| "grad_norm": 90.68251037597656, |
| "learning_rate": 0.00012128399335745342, |
| "loss": 1.582, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.0007370826269624825, |
| "grad_norm": 10.48133373260498, |
| "learning_rate": 0.00012128399244292309, |
| "loss": 1.152, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.000884499152354979, |
| "grad_norm": 57.58028030395508, |
| "learning_rate": 0.00012128399132516379, |
| "loss": 0.8417, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.0010319156777474755, |
| "grad_norm": 24.7613468170166, |
| "learning_rate": 0.00012128399000417552, |
| "loss": 0.6337, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.001179332203139972, |
| "grad_norm": 5.995689868927002, |
| "learning_rate": 0.00012128398847995831, |
| "loss": 0.29, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.001179332203139972, |
| "eval_1_ratio_diff": -0.12081060015588468, |
| "eval_accuracy": 0.6360093530787218, |
| "eval_f1": 0.5856255545696539, |
| "eval_loss": 0.7121835350990295, |
| "eval_precision": 0.6790123456790124, |
| "eval_recall": 0.514820592823713, |
| "eval_runtime": 1440.0319, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.0013267487285324685, |
| "grad_norm": 20.756057739257812, |
| "learning_rate": 0.00012128398675251216, |
| "loss": 0.4541, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.001474165253924965, |
| "grad_norm": 49.25767135620117, |
| "learning_rate": 0.00012128398482183706, |
| "loss": 1.1751, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0016215817793174615, |
| "grad_norm": 10.73904037475586, |
| "learning_rate": 0.00012128398268793303, |
| "loss": 0.2334, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.001768998304709958, |
| "grad_norm": 3.5153348445892334, |
| "learning_rate": 0.00012128398035080009, |
| "loss": 0.8965, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.0019164148301024544, |
| "grad_norm": 117.84137725830078, |
| "learning_rate": 0.0001212839778104382, |
| "loss": 2.9108, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.002063831355494951, |
| "grad_norm": 108.86376190185547, |
| "learning_rate": 0.00012128397506684742, |
| "loss": 2.1317, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.0022112478808874476, |
| "grad_norm": 19.305322647094727, |
| "learning_rate": 0.00012128397212002774, |
| "loss": 0.2653, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.002358664406279944, |
| "grad_norm": 46.865966796875, |
| "learning_rate": 0.00012128396896997918, |
| "loss": 2.2461, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.002358664406279944, |
| "eval_1_ratio_diff": -0.49961028838659394, |
| "eval_accuracy": 0.5003897116134061, |
| "eval_f1": 0.0, |
| "eval_loss": 1.7971160411834717, |
| "eval_precision": 0.0, |
| "eval_recall": 0.0, |
| "eval_runtime": 1438.1269, |
| "eval_samples_per_second": 0.892, |
| "eval_steps_per_second": 0.446, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.0025060809316724405, |
| "grad_norm": 55.90218734741211, |
| "learning_rate": 0.00012128396561670172, |
| "loss": 1.0773, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.002653497457064937, |
| "grad_norm": 8.257821083068848, |
| "learning_rate": 0.0001212839620601954, |
| "loss": 0.7481, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.0028009139824574335, |
| "grad_norm": 11.776910781860352, |
| "learning_rate": 0.00012128395830046022, |
| "loss": 0.0906, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.00294833050784993, |
| "grad_norm": 115.57841491699219, |
| "learning_rate": 0.00012128395433749618, |
| "loss": 3.0851, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0030957470332424264, |
| "grad_norm": 5.130585193634033, |
| "learning_rate": 0.00012128395017130333, |
| "loss": 0.9399, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.003243163558634923, |
| "grad_norm": 43.877689361572266, |
| "learning_rate": 0.00012128394580188166, |
| "loss": 0.9284, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.0033905800840274194, |
| "grad_norm": 48.76664733886719, |
| "learning_rate": 0.00012128394122923118, |
| "loss": 0.5431, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.003537996609419916, |
| "grad_norm": 33.9229736328125, |
| "learning_rate": 0.00012128393645335193, |
| "loss": 0.6688, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.003537996609419916, |
| "eval_1_ratio_diff": -0.09353078721745911, |
| "eval_accuracy": 0.764614185502728, |
| "eval_f1": 0.7401032702237521, |
| "eval_loss": 0.49912577867507935, |
| "eval_precision": 0.8253358925143954, |
| "eval_recall": 0.6708268330733229, |
| "eval_runtime": 1439.1521, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.0036854131348124123, |
| "grad_norm": 20.783430099487305, |
| "learning_rate": 0.00012128393147424389, |
| "loss": 0.7502, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.003832829660204909, |
| "grad_norm": 28.81708335876465, |
| "learning_rate": 0.0001212839262919071, |
| "loss": 0.8271, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.003980246185597405, |
| "grad_norm": 58.47079086303711, |
| "learning_rate": 0.00012128392090634156, |
| "loss": 1.0213, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.004127662710989902, |
| "grad_norm": 107.4663314819336, |
| "learning_rate": 0.00012128391531754733, |
| "loss": 1.6449, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.004275079236382398, |
| "grad_norm": 21.926761627197266, |
| "learning_rate": 0.00012128390952552436, |
| "loss": 1.5282, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.004422495761774895, |
| "grad_norm": 108.13206481933594, |
| "learning_rate": 0.00012128390353027275, |
| "loss": 1.2688, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.004569912287167391, |
| "grad_norm": 85.27387237548828, |
| "learning_rate": 0.00012128389733179246, |
| "loss": 1.4725, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.004717328812559888, |
| "grad_norm": 3.8993313312530518, |
| "learning_rate": 0.00012128389093008353, |
| "loss": 0.1737, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.004717328812559888, |
| "eval_1_ratio_diff": 0.05378020265003891, |
| "eval_accuracy": 0.7809820732657833, |
| "eval_f1": 0.7920059215396003, |
| "eval_loss": 0.4972352981567383, |
| "eval_precision": 0.7535211267605634, |
| "eval_recall": 0.8346333853354134, |
| "eval_runtime": 1439.2432, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.004864745337952384, |
| "grad_norm": 1.8932548761367798, |
| "learning_rate": 0.00012128388432514599, |
| "loss": 0.1574, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.005012161863344881, |
| "grad_norm": 31.92827606201172, |
| "learning_rate": 0.00012128387751697984, |
| "loss": 0.2239, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.005159578388737377, |
| "grad_norm": 57.11052703857422, |
| "learning_rate": 0.00012128387050558512, |
| "loss": 1.4278, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.005306994914129874, |
| "grad_norm": 0.45575767755508423, |
| "learning_rate": 0.00012128386329096184, |
| "loss": 2.7855, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.00545441143952237, |
| "grad_norm": 71.60086059570312, |
| "learning_rate": 0.00012128385587311005, |
| "loss": 1.4066, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.005601827964914867, |
| "grad_norm": 0.1034606546163559, |
| "learning_rate": 0.00012128384825202977, |
| "loss": 2.1198, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.005749244490307364, |
| "grad_norm": 0.3067642152309418, |
| "learning_rate": 0.00012128384042772098, |
| "loss": 0.0126, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.00589666101569986, |
| "grad_norm": 63.32870101928711, |
| "learning_rate": 0.00012128383240018376, |
| "loss": 1.4007, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.00589666101569986, |
| "eval_1_ratio_diff": 0.04130943102104445, |
| "eval_accuracy": 0.7653936087295401, |
| "eval_f1": 0.7745318352059926, |
| "eval_loss": 1.208424687385559, |
| "eval_precision": 0.7449567723342939, |
| "eval_recall": 0.8065522620904836, |
| "eval_runtime": 1438.9869, |
| "eval_samples_per_second": 0.892, |
| "eval_steps_per_second": 0.446, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.006044077541092357, |
| "grad_norm": 0.002626498695462942, |
| "learning_rate": 0.00012128382416941812, |
| "loss": 0.003, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.006191494066484853, |
| "grad_norm": 78.83605194091797, |
| "learning_rate": 0.00012128381573542408, |
| "loss": 1.7103, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.00633891059187735, |
| "grad_norm": 0.04237201437354088, |
| "learning_rate": 0.00012128380709820168, |
| "loss": 0.0184, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.006486327117269846, |
| "grad_norm": 57.11608123779297, |
| "learning_rate": 0.00012128379825775094, |
| "loss": 0.3886, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.006633743642662343, |
| "grad_norm": 71.66314697265625, |
| "learning_rate": 0.00012128378921407189, |
| "loss": 1.0122, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.006781160168054839, |
| "grad_norm": 60.63711166381836, |
| "learning_rate": 0.00012128377996716456, |
| "loss": 2.2072, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.006928576693447336, |
| "grad_norm": 64.88410186767578, |
| "learning_rate": 0.00012128377051702896, |
| "loss": 1.7641, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.007075993218839832, |
| "grad_norm": 15.290694236755371, |
| "learning_rate": 0.00012128376086366519, |
| "loss": 0.2084, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.007075993218839832, |
| "eval_1_ratio_diff": -0.07794232268121593, |
| "eval_accuracy": 0.764614185502728, |
| "eval_f1": 0.7445008460236887, |
| "eval_loss": 0.6278901100158691, |
| "eval_precision": 0.8133086876155268, |
| "eval_recall": 0.6864274570982839, |
| "eval_runtime": 1439.7986, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.007223409744232329, |
| "grad_norm": 4.278674125671387, |
| "learning_rate": 0.00012128375100707322, |
| "loss": 0.0205, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.007370826269624825, |
| "grad_norm": 12.730552673339844, |
| "learning_rate": 0.00012128374094725308, |
| "loss": 0.0596, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.007518242795017322, |
| "grad_norm": 0.03387758880853653, |
| "learning_rate": 0.00012128373068420486, |
| "loss": 1.1734, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.007665659320409818, |
| "grad_norm": 0.002689527813345194, |
| "learning_rate": 0.00012128372021792852, |
| "loss": 0.016, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.007813075845802315, |
| "grad_norm": 46.29806900024414, |
| "learning_rate": 0.00012128370954842415, |
| "loss": 3.8453, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.00796049237119481, |
| "grad_norm": 65.56766510009766, |
| "learning_rate": 0.00012128369867569178, |
| "loss": 3.0592, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.008107908896587307, |
| "grad_norm": 67.830322265625, |
| "learning_rate": 0.00012128368759973141, |
| "loss": 1.5232, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.008255325421979804, |
| "grad_norm": 1.828292965888977, |
| "learning_rate": 0.00012128367632054312, |
| "loss": 0.899, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.008255325421979804, |
| "eval_1_ratio_diff": -0.24707716289945442, |
| "eval_accuracy": 0.6952455183164459, |
| "eval_f1": 0.5948186528497409, |
| "eval_loss": 1.2687604427337646, |
| "eval_precision": 0.8858024691358025, |
| "eval_recall": 0.44773790951638065, |
| "eval_runtime": 1440.6646, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.0084027419473723, |
| "grad_norm": 2.445478916168213, |
| "learning_rate": 0.00012128366483812693, |
| "loss": 1.3983, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.008550158472764796, |
| "grad_norm": 0.8839952349662781, |
| "learning_rate": 0.00012128365315248287, |
| "loss": 2.515, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.008697574998157294, |
| "grad_norm": 20.67784881591797, |
| "learning_rate": 0.000121283641263611, |
| "loss": 1.5722, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.00884499152354979, |
| "grad_norm": 1.1078622341156006, |
| "learning_rate": 0.00012128362917151136, |
| "loss": 0.0058, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.008992408048942286, |
| "grad_norm": 52.540367126464844, |
| "learning_rate": 0.00012128361687618396, |
| "loss": 2.8601, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.009139824574334782, |
| "grad_norm": 40.01364517211914, |
| "learning_rate": 0.00012128360437762885, |
| "loss": 0.6845, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.00928724109972728, |
| "grad_norm": 4.011626243591309, |
| "learning_rate": 0.00012128359167584609, |
| "loss": 0.6806, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.009434657625119776, |
| "grad_norm": 12.99624252319336, |
| "learning_rate": 0.00012128357877083573, |
| "loss": 0.8965, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.009434657625119776, |
| "eval_1_ratio_diff": 0.33982852689010135, |
| "eval_accuracy": 0.6492595479345284, |
| "eval_f1": 0.7380675203725262, |
| "eval_loss": 0.9785400629043579, |
| "eval_precision": 0.5886722376973074, |
| "eval_recall": 0.9890795631825273, |
| "eval_runtime": 1440.0679, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.009582074150512272, |
| "grad_norm": 34.23851013183594, |
| "learning_rate": 0.00012128356566259777, |
| "loss": 0.3434, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.009729490675904768, |
| "grad_norm": 66.7353286743164, |
| "learning_rate": 0.0001212835523511323, |
| "loss": 0.475, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.009876907201297266, |
| "grad_norm": 56.82964324951172, |
| "learning_rate": 0.00012128353883643935, |
| "loss": 0.7709, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.010024323726689762, |
| "grad_norm": 34.38500213623047, |
| "learning_rate": 0.00012128352511851894, |
| "loss": 0.7302, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.010171740252082258, |
| "grad_norm": 106.88589477539062, |
| "learning_rate": 0.00012128351119737116, |
| "loss": 1.332, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.010319156777474754, |
| "grad_norm": 85.7337875366211, |
| "learning_rate": 0.00012128349707299602, |
| "loss": 1.6342, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.010466573302867252, |
| "grad_norm": 4.05411958694458, |
| "learning_rate": 0.00012128348274539358, |
| "loss": 0.0673, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.010613989828259748, |
| "grad_norm": 2.334378719329834, |
| "learning_rate": 0.0001212834682145639, |
| "loss": 0.0332, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.010613989828259748, |
| "eval_1_ratio_diff": -0.2346063912704599, |
| "eval_accuracy": 0.7014809041309431, |
| "eval_f1": 0.6095820591233435, |
| "eval_loss": 1.218570351600647, |
| "eval_precision": 0.8794117647058823, |
| "eval_recall": 0.4664586583463339, |
| "eval_runtime": 1440.6194, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.010761406353652244, |
| "grad_norm": 1.3649911880493164, |
| "learning_rate": 0.00012128345348050701, |
| "loss": 0.985, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.01090882287904474, |
| "grad_norm": 6.569690227508545, |
| "learning_rate": 0.00012128343854322297, |
| "loss": 0.0316, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.011056239404437238, |
| "grad_norm": 50.96843719482422, |
| "learning_rate": 0.00012128342340271183, |
| "loss": 2.5112, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.011203655929829734, |
| "grad_norm": 46.42570877075195, |
| "learning_rate": 0.00012128340805897364, |
| "loss": 2.5907, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.01135107245522223, |
| "grad_norm": 35.919315338134766, |
| "learning_rate": 0.00012128339251200845, |
| "loss": 0.5731, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.011498488980614728, |
| "grad_norm": 0.33857831358909607, |
| "learning_rate": 0.0001212833767618163, |
| "loss": 0.0029, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.011645905506007224, |
| "grad_norm": 0.6119909286499023, |
| "learning_rate": 0.00012128336080839724, |
| "loss": 0.0036, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.01179332203139972, |
| "grad_norm": 34.078514099121094, |
| "learning_rate": 0.00012128334465175136, |
| "loss": 3.0454, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.01179332203139972, |
| "eval_1_ratio_diff": -0.05222135619641466, |
| "eval_accuracy": 0.8106001558846454, |
| "eval_f1": 0.8, |
| "eval_loss": 0.9759823083877563, |
| "eval_precision": 0.8466898954703833, |
| "eval_recall": 0.7581903276131046, |
| "eval_runtime": 1440.5068, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.011940738556792216, |
| "grad_norm": 0.10960781574249268, |
| "learning_rate": 0.0001212833282918787, |
| "loss": 0.0036, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.012088155082184714, |
| "grad_norm": 0.12220565974712372, |
| "learning_rate": 0.0001212833117287793, |
| "loss": 0.0025, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.01223557160757721, |
| "grad_norm": 127.77825164794922, |
| "learning_rate": 0.00012128329496245321, |
| "loss": 2.7251, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.012382988132969706, |
| "grad_norm": 65.698486328125, |
| "learning_rate": 0.0001212832779929005, |
| "loss": 0.4867, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.012530404658362202, |
| "grad_norm": 37.85614013671875, |
| "learning_rate": 0.00012128326082012124, |
| "loss": 0.2097, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.0126778211837547, |
| "grad_norm": 12.939319610595703, |
| "learning_rate": 0.00012128324344411546, |
| "loss": 1.4561, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.012825237709147196, |
| "grad_norm": 81.24678039550781, |
| "learning_rate": 0.00012128322586488326, |
| "loss": 1.1304, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.012972654234539692, |
| "grad_norm": 58.61750030517578, |
| "learning_rate": 0.00012128320808242463, |
| "loss": 0.9005, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.012972654234539692, |
| "eval_1_ratio_diff": 0.2533125487139517, |
| "eval_accuracy": 0.7186282151208107, |
| "eval_f1": 0.7753578095830741, |
| "eval_loss": 0.8996144533157349, |
| "eval_precision": 0.644927536231884, |
| "eval_recall": 0.9719188767550702, |
| "eval_runtime": 1439.76, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.013120070759932188, |
| "grad_norm": 60.69062805175781, |
| "learning_rate": 0.00012128319009673968, |
| "loss": 1.4957, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.013267487285324685, |
| "grad_norm": 6.7324652671813965, |
| "learning_rate": 0.00012128317190782848, |
| "loss": 0.2882, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.013414903810717181, |
| "grad_norm": 0.18422821164131165, |
| "learning_rate": 0.00012128315351569106, |
| "loss": 0.5841, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.013562320336109678, |
| "grad_norm": 106.35135650634766, |
| "learning_rate": 0.00012128313492032748, |
| "loss": 1.3522, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.013709736861502174, |
| "grad_norm": 35.63379669189453, |
| "learning_rate": 0.00012128311612173782, |
| "loss": 1.237, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.013857153386894671, |
| "grad_norm": 83.5736312866211, |
| "learning_rate": 0.00012128309711992214, |
| "loss": 1.3351, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.014004569912287167, |
| "grad_norm": 97.8160400390625, |
| "learning_rate": 0.0001212830779148805, |
| "loss": 1.6019, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.014151986437679663, |
| "grad_norm": 2.5867555141448975, |
| "learning_rate": 0.00012128305850661298, |
| "loss": 0.0897, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.014151986437679663, |
| "eval_1_ratio_diff": 0.24863600935307872, |
| "eval_accuracy": 0.7295401402961809, |
| "eval_f1": 0.783260462211118, |
| "eval_loss": 1.138918161392212, |
| "eval_precision": 0.653125, |
| "eval_recall": 0.9781591263650546, |
| "eval_runtime": 1440.7407, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.014299402963072161, |
| "grad_norm": 71.02184295654297, |
| "learning_rate": 0.00012128303889511963, |
| "loss": 1.3708, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.014446819488464657, |
| "grad_norm": 0.5830493569374084, |
| "learning_rate": 0.0001212830190804005, |
| "loss": 3.0855, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.014594236013857153, |
| "grad_norm": 63.9030876159668, |
| "learning_rate": 0.00012128299906245568, |
| "loss": 1.6675, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.01474165253924965, |
| "grad_norm": 0.18025726079940796, |
| "learning_rate": 0.00012128297884128523, |
| "loss": 0.1379, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.014889069064642147, |
| "grad_norm": 0.8397954702377319, |
| "learning_rate": 0.00012128295841688921, |
| "loss": 1.528, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.015036485590034643, |
| "grad_norm": 78.28919219970703, |
| "learning_rate": 0.0001212829377892677, |
| "loss": 1.2677, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.01518390211542714, |
| "grad_norm": 5.996486186981201, |
| "learning_rate": 0.00012128291695842078, |
| "loss": 1.205, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.015331318640819635, |
| "grad_norm": 1.2115447521209717, |
| "learning_rate": 0.0001212828959243485, |
| "loss": 0.0076, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.015331318640819635, |
| "eval_1_ratio_diff": 0.03351519875292286, |
| "eval_accuracy": 0.8402182385035074, |
| "eval_f1": 0.8452830188679246, |
| "eval_loss": 0.5696436166763306, |
| "eval_precision": 0.8187134502923976, |
| "eval_recall": 0.8736349453978159, |
| "eval_runtime": 1440.7431, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.015478735166212133, |
| "grad_norm": 41.47733688354492, |
| "learning_rate": 0.00012128287468705092, |
| "loss": 1.0424, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.01562615169160463, |
| "grad_norm": 13.133481979370117, |
| "learning_rate": 0.00012128285324652816, |
| "loss": 0.0602, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.015773568216997127, |
| "grad_norm": 14.336326599121094, |
| "learning_rate": 0.00012128283160278022, |
| "loss": 0.0887, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.01592098474238962, |
| "grad_norm": 2.6840479373931885, |
| "learning_rate": 0.00012128280975580723, |
| "loss": 0.0105, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.01606840126778212, |
| "grad_norm": 0.026224393397569656, |
| "learning_rate": 0.00012128278770560924, |
| "loss": 0.0006, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.016215817793174613, |
| "grad_norm": 0.0356808602809906, |
| "learning_rate": 0.00012128276545218633, |
| "loss": 1.6274, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.01636323431856711, |
| "grad_norm": 0.03703249245882034, |
| "learning_rate": 0.00012128274299553858, |
| "loss": 1.6564, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.01651065084395961, |
| "grad_norm": 0.23091621696949005, |
| "learning_rate": 0.00012128272033566606, |
| "loss": 0.0017, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.01651065084395961, |
| "eval_1_ratio_diff": 0.10210444271239283, |
| "eval_accuracy": 0.8106001558846454, |
| "eval_f1": 0.8280254777070064, |
| "eval_loss": 1.4256943464279175, |
| "eval_precision": 0.7577720207253886, |
| "eval_recall": 0.9126365054602185, |
| "eval_runtime": 1440.6468, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.016658067369352103, |
| "grad_norm": 0.2899627983570099, |
| "learning_rate": 0.00012128269747256883, |
| "loss": 0.0048, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.0168054838947446, |
| "grad_norm": 138.98680114746094, |
| "learning_rate": 0.00012128267440624699, |
| "loss": 7.0607, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.0169529004201371, |
| "grad_norm": 64.21833801269531, |
| "learning_rate": 0.0001212826511367006, |
| "loss": 2.4323, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.017100316945529593, |
| "grad_norm": 69.21852111816406, |
| "learning_rate": 0.00012128262766392974, |
| "loss": 3.8941, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.01724773347092209, |
| "grad_norm": 0.6788825988769531, |
| "learning_rate": 0.00012128260398793452, |
| "loss": 0.0033, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.01739514999631459, |
| "grad_norm": 0.5503783822059631, |
| "learning_rate": 0.000121282580108715, |
| "loss": 0.0089, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.017542566521707083, |
| "grad_norm": 1.4736528396606445, |
| "learning_rate": 0.00012128255602627122, |
| "loss": 0.6923, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.01768998304709958, |
| "grad_norm": 0.052145253866910934, |
| "learning_rate": 0.0001212825317406033, |
| "loss": 0.003, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.01768998304709958, |
| "eval_1_ratio_diff": 0.05689789555728764, |
| "eval_accuracy": 0.8667186282151208, |
| "eval_f1": 0.8738007380073801, |
| "eval_loss": 0.5649486184120178, |
| "eval_precision": 0.8291316526610645, |
| "eval_recall": 0.9235569422776911, |
| "eval_runtime": 1440.858, |
| "eval_samples_per_second": 0.89, |
| "eval_steps_per_second": 0.446, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.017837399572492075, |
| "grad_norm": 33.907466888427734, |
| "learning_rate": 0.00012128250725171133, |
| "loss": 1.0754, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.017984816097884573, |
| "grad_norm": 1.5523881912231445, |
| "learning_rate": 0.00012128248255959539, |
| "loss": 2.2872, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.01813223262327707, |
| "grad_norm": 0.45814594626426697, |
| "learning_rate": 0.00012128245766425553, |
| "loss": 0.0082, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.018279649148669565, |
| "grad_norm": 63.94032669067383, |
| "learning_rate": 0.00012128243256569185, |
| "loss": 1.7641, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.018427065674062063, |
| "grad_norm": 0.17571286857128143, |
| "learning_rate": 0.00012128240726390445, |
| "loss": 0.0017, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.01857448219945456, |
| "grad_norm": 0.08677598834037781, |
| "learning_rate": 0.0001212823817588934, |
| "loss": 2.0446, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.018721898724847055, |
| "grad_norm": 0.06298824399709702, |
| "learning_rate": 0.00012128235605065879, |
| "loss": 0.0031, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.018869315250239552, |
| "grad_norm": 0.04490824043750763, |
| "learning_rate": 0.00012128233013920071, |
| "loss": 0.0016, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.018869315250239552, |
| "eval_1_ratio_diff": 0.26032735775526106, |
| "eval_accuracy": 0.7272018706157444, |
| "eval_f1": 0.7834158415841584, |
| "eval_loss": 1.7306467294692993, |
| "eval_precision": 0.6492307692307693, |
| "eval_recall": 0.9875195007800313, |
| "eval_runtime": 1441.1243, |
| "eval_samples_per_second": 0.89, |
| "eval_steps_per_second": 0.445, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.019016731775632047, |
| "grad_norm": 64.88382720947266, |
| "learning_rate": 0.00012128230402451925, |
| "loss": 1.4818, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.019164148301024544, |
| "grad_norm": 0.04304850101470947, |
| "learning_rate": 0.00012128227770661447, |
| "loss": 0.0006, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.019311564826417042, |
| "grad_norm": 99.55477142333984, |
| "learning_rate": 0.00012128225118548648, |
| "loss": 1.3041, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.019458981351809537, |
| "grad_norm": 64.24674987792969, |
| "learning_rate": 0.00012128222446113537, |
| "loss": 3.4221, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.019606397877202034, |
| "grad_norm": 1.130561351776123, |
| "learning_rate": 0.00012128219753356123, |
| "loss": 0.0047, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.019753814402594532, |
| "grad_norm": 60.320674896240234, |
| "learning_rate": 0.00012128217040276413, |
| "loss": 0.7215, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.019901230927987026, |
| "grad_norm": 56.348636627197266, |
| "learning_rate": 0.0001212821430687442, |
| "loss": 3.0486, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.020048647453379524, |
| "grad_norm": 4.682687759399414, |
| "learning_rate": 0.0001212821155315015, |
| "loss": 0.0195, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.020048647453379524, |
| "eval_1_ratio_diff": -0.07170693686671864, |
| "eval_accuracy": 0.8035853468433359, |
| "eval_f1": 0.788235294117647, |
| "eval_loss": 0.7957486510276794, |
| "eval_precision": 0.8542805100182149, |
| "eval_recall": 0.7316692667706708, |
| "eval_runtime": 1438.4097, |
| "eval_samples_per_second": 0.892, |
| "eval_steps_per_second": 0.446, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.020196063978772022, |
| "grad_norm": 0.11813419312238693, |
| "learning_rate": 0.00012128208779103613, |
| "loss": 0.1104, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.020343480504164516, |
| "grad_norm": 61.332427978515625, |
| "learning_rate": 0.0001212820598473482, |
| "loss": 0.8622, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.020490897029557014, |
| "grad_norm": 9.628612518310547, |
| "learning_rate": 0.00012128203170043776, |
| "loss": 0.0682, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.02063831355494951, |
| "grad_norm": 59.6220703125, |
| "learning_rate": 0.00012128200335030495, |
| "loss": 0.7833, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.020785730080342006, |
| "grad_norm": 1.084692358970642, |
| "learning_rate": 0.00012128197479694983, |
| "loss": 1.5881, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.020933146605734504, |
| "grad_norm": 0.44916099309921265, |
| "learning_rate": 0.00012128194604037253, |
| "loss": 0.0187, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.021080563131126998, |
| "grad_norm": 0.11146622151136398, |
| "learning_rate": 0.00012128191708057311, |
| "loss": 0.0025, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.021227979656519496, |
| "grad_norm": 0.05726571008563042, |
| "learning_rate": 0.00012128188791755172, |
| "loss": 0.0004, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.021227979656519496, |
| "eval_1_ratio_diff": 0.09119251753702262, |
| "eval_accuracy": 0.8277474668745128, |
| "eval_f1": 0.8420300214438885, |
| "eval_loss": 1.1355745792388916, |
| "eval_precision": 0.7770448548812665, |
| "eval_recall": 0.9188767550702028, |
| "eval_runtime": 1440.4727, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.021375396181911994, |
| "grad_norm": 63.95652770996094, |
| "learning_rate": 0.0001212818585513084, |
| "loss": 2.2186, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.021522812707304488, |
| "grad_norm": 0.041420936584472656, |
| "learning_rate": 0.00012128182898184326, |
| "loss": 2.2755, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.021670229232696986, |
| "grad_norm": 0.19315005838871002, |
| "learning_rate": 0.00012128179920915643, |
| "loss": 1.7156, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.02181764575808948, |
| "grad_norm": 0.06642986834049225, |
| "learning_rate": 0.00012128176923324799, |
| "loss": 0.0021, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.021965062283481978, |
| "grad_norm": 0.22619064152240753, |
| "learning_rate": 0.00012128173905411805, |
| "loss": 1.2636, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.022112478808874476, |
| "grad_norm": 0.30320611596107483, |
| "learning_rate": 0.00012128170867176669, |
| "loss": 0.0031, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.02225989533426697, |
| "grad_norm": 62.3597412109375, |
| "learning_rate": 0.00012128167808619403, |
| "loss": 1.3432, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.022407311859659468, |
| "grad_norm": 63.980323791503906, |
| "learning_rate": 0.00012128164729740015, |
| "loss": 0.8526, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.022407311859659468, |
| "eval_1_ratio_diff": 0.15354637568199536, |
| "eval_accuracy": 0.8028059236165238, |
| "eval_f1": 0.8289384719405003, |
| "eval_loss": 0.781088650226593, |
| "eval_precision": 0.7315035799522673, |
| "eval_recall": 0.9563182527301092, |
| "eval_runtime": 1439.8087, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.022554728385051966, |
| "grad_norm": 0.5441477298736572, |
| "learning_rate": 0.0001212816163053852, |
| "loss": 0.023, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.02270214491044446, |
| "grad_norm": 60.2026252746582, |
| "learning_rate": 0.00012128158511014924, |
| "loss": 0.4811, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.022849561435836958, |
| "grad_norm": 3.5183231830596924, |
| "learning_rate": 0.00012128155371169238, |
| "loss": 0.0164, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.022996977961229455, |
| "grad_norm": 49.883365631103516, |
| "learning_rate": 0.00012128152211001475, |
| "loss": 2.6559, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.02314439448662195, |
| "grad_norm": 0.21442897617816925, |
| "learning_rate": 0.00012128149030511643, |
| "loss": 1.0737, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.023291811012014448, |
| "grad_norm": 66.95639038085938, |
| "learning_rate": 0.00012128145829699753, |
| "loss": 2.2649, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.023439227537406942, |
| "grad_norm": 41.275150299072266, |
| "learning_rate": 0.00012128142608565818, |
| "loss": 1.4307, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.02358664406279944, |
| "grad_norm": 60.39665603637695, |
| "learning_rate": 0.00012128139367109845, |
| "loss": 0.8912, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.02358664406279944, |
| "eval_1_ratio_diff": 0.15666406858924398, |
| "eval_accuracy": 0.7903351519875292, |
| "eval_f1": 0.8186109238031019, |
| "eval_loss": 0.6988638043403625, |
| "eval_precision": 0.7209026128266033, |
| "eval_recall": 0.9469578783151326, |
| "eval_runtime": 1440.1147, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.023734060588191937, |
| "grad_norm": 0.26957735419273376, |
| "learning_rate": 0.0001212813610533185, |
| "loss": 0.0109, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.02388147711358443, |
| "grad_norm": 1.1442532539367676, |
| "learning_rate": 0.00012128132823231837, |
| "loss": 0.8164, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.02402889363897693, |
| "grad_norm": 2.7633121013641357, |
| "learning_rate": 0.00012128129520809825, |
| "loss": 0.0146, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.024176310164369427, |
| "grad_norm": 103.85281372070312, |
| "learning_rate": 0.00012128126198065819, |
| "loss": 2.8926, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.02432372668976192, |
| "grad_norm": 4.870635032653809, |
| "learning_rate": 0.00012128122854999832, |
| "loss": 0.0289, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.02447114321515442, |
| "grad_norm": 0.17178401350975037, |
| "learning_rate": 0.00012128119491611876, |
| "loss": 0.7425, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.024618559740546914, |
| "grad_norm": 37.24171447753906, |
| "learning_rate": 0.00012128116107901961, |
| "loss": 3.577, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.02476597626593941, |
| "grad_norm": 12.520587921142578, |
| "learning_rate": 0.00012128112703870099, |
| "loss": 0.0673, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.02476597626593941, |
| "eval_1_ratio_diff": -0.025720966484801266, |
| "eval_accuracy": 0.857365549493375, |
| "eval_f1": 0.8534827862289832, |
| "eval_loss": 0.4316674470901489, |
| "eval_precision": 0.8766447368421053, |
| "eval_recall": 0.8315132605304212, |
| "eval_runtime": 1440.3285, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.02491339279133191, |
| "grad_norm": 41.28479766845703, |
| "learning_rate": 0.00012128109279516303, |
| "loss": 0.2896, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.025060809316724404, |
| "grad_norm": 6.806232452392578, |
| "learning_rate": 0.00012128105834840581, |
| "loss": 0.0378, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.0252082258421169, |
| "grad_norm": 2.091874361038208, |
| "learning_rate": 0.00012128102369842947, |
| "loss": 0.0118, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.0253556423675094, |
| "grad_norm": 57.055580139160156, |
| "learning_rate": 0.00012128098884523412, |
| "loss": 0.6633, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.025503058892901893, |
| "grad_norm": 59.19140625, |
| "learning_rate": 0.00012128095378881987, |
| "loss": 0.4166, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.02565047541829439, |
| "grad_norm": 0.08690566569566727, |
| "learning_rate": 0.00012128091852918686, |
| "loss": 0.0041, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.02579789194368689, |
| "grad_norm": 0.4953851103782654, |
| "learning_rate": 0.00012128088306633519, |
| "loss": 0.0058, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.025945308469079383, |
| "grad_norm": 0.8310350179672241, |
| "learning_rate": 0.00012128084740026497, |
| "loss": 0.0115, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.025945308469079383, |
| "eval_1_ratio_diff": -0.05455962587685115, |
| "eval_accuracy": 0.8659392049883087, |
| "eval_f1": 0.858085808580858, |
| "eval_loss": 0.6554389595985413, |
| "eval_precision": 0.9106830122591943, |
| "eval_recall": 0.8112324492979719, |
| "eval_runtime": 1441.1917, |
| "eval_samples_per_second": 0.89, |
| "eval_steps_per_second": 0.445, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.02609272499447188, |
| "grad_norm": 75.97391510009766, |
| "learning_rate": 0.00012128081153097633, |
| "loss": 1.0946, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.026240141519864375, |
| "grad_norm": 0.1318621188402176, |
| "learning_rate": 0.0001212807754584694, |
| "loss": 0.0013, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.026387558045256873, |
| "grad_norm": 0.07249584794044495, |
| "learning_rate": 0.0001212807391827443, |
| "loss": 0.2854, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.02653497457064937, |
| "grad_norm": 23.931421279907227, |
| "learning_rate": 0.00012128070270380113, |
| "loss": 0.0587, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.026682391096041865, |
| "grad_norm": 228.77931213378906, |
| "learning_rate": 0.00012128066602164004, |
| "loss": 0.6358, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.026829807621434363, |
| "grad_norm": 0.020578529685735703, |
| "learning_rate": 0.00012128062913626113, |
| "loss": 0.0003, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.02697722414682686, |
| "grad_norm": 0.044141389429569244, |
| "learning_rate": 0.00012128059204766453, |
| "loss": 0.0003, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.027124640672219355, |
| "grad_norm": 35.83491516113281, |
| "learning_rate": 0.00012128055475585035, |
| "loss": 2.1523, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.027124640672219355, |
| "eval_1_ratio_diff": -0.044427123928293066, |
| "eval_accuracy": 0.8604832424006236, |
| "eval_f1": 0.8538775510204082, |
| "eval_loss": 1.1068644523620605, |
| "eval_precision": 0.8955479452054794, |
| "eval_recall": 0.8159126365054602, |
| "eval_runtime": 1440.348, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.027272057197611853, |
| "grad_norm": 0.004144140053540468, |
| "learning_rate": 0.00012128051726081876, |
| "loss": 0.0, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.027419473723004347, |
| "grad_norm": 0.0015425934689119458, |
| "learning_rate": 0.00012128047956256984, |
| "loss": 0.0002, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.027566890248396845, |
| "grad_norm": 36.42764663696289, |
| "learning_rate": 0.00012128044166110374, |
| "loss": 2.8486, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.027714306773789343, |
| "grad_norm": 0.6206398010253906, |
| "learning_rate": 0.00012128040355642058, |
| "loss": 2.924, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.027861723299181837, |
| "grad_norm": 97.60330963134766, |
| "learning_rate": 0.00012128036524852049, |
| "loss": 1.9209, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.028009139824574335, |
| "grad_norm": 2.1615848541259766, |
| "learning_rate": 0.0001212803267374036, |
| "loss": 0.0215, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.028156556349966833, |
| "grad_norm": 41.35491180419922, |
| "learning_rate": 0.00012128028802307003, |
| "loss": 0.8105, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.028303972875359327, |
| "grad_norm": 39.422916412353516, |
| "learning_rate": 0.00012128024910551992, |
| "loss": 1.131, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.028303972875359327, |
| "eval_1_ratio_diff": -0.3904910366328917, |
| "eval_accuracy": 0.5876851130163678, |
| "eval_f1": 0.322663252240717, |
| "eval_loss": 1.1657379865646362, |
| "eval_precision": 0.9, |
| "eval_recall": 0.19656786271450857, |
| "eval_runtime": 1441.4939, |
| "eval_samples_per_second": 0.89, |
| "eval_steps_per_second": 0.445, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.028451389400751825, |
| "grad_norm": 1.7290548086166382, |
| "learning_rate": 0.0001212802099847534, |
| "loss": 0.0986, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.028598805926144322, |
| "grad_norm": 40.167484283447266, |
| "learning_rate": 0.00012128017066077058, |
| "loss": 1.1352, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.028746222451536817, |
| "grad_norm": 36.6862678527832, |
| "learning_rate": 0.00012128013113357162, |
| "loss": 2.6405, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.028893638976929314, |
| "grad_norm": 1.1684958934783936, |
| "learning_rate": 0.00012128009140315665, |
| "loss": 1.1565, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.02904105550232181, |
| "grad_norm": 28.306957244873047, |
| "learning_rate": 0.00012128005146952578, |
| "loss": 1.6548, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.029188472027714307, |
| "grad_norm": 18.64267349243164, |
| "learning_rate": 0.00012128001133267917, |
| "loss": 1.1205, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.029335888553106804, |
| "grad_norm": 7.279528617858887, |
| "learning_rate": 0.00012127997099261693, |
| "loss": 0.6742, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.0294833050784993, |
| "grad_norm": 41.569854736328125, |
| "learning_rate": 0.00012127993044933921, |
| "loss": 0.6977, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.0294833050784993, |
| "eval_1_ratio_diff": -0.2704598597038192, |
| "eval_accuracy": 0.6344505066250974, |
| "eval_f1": 0.4983957219251337, |
| "eval_loss": 0.6263108849525452, |
| "eval_precision": 0.7925170068027211, |
| "eval_recall": 0.36349453978159124, |
| "eval_runtime": 1441.3891, |
| "eval_samples_per_second": 0.89, |
| "eval_steps_per_second": 0.445, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.029630721603891796, |
| "grad_norm": 14.647398948669434, |
| "learning_rate": 0.00012127988970284616, |
| "loss": 0.4508, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.029778138129284294, |
| "grad_norm": 21.75971221923828, |
| "learning_rate": 0.00012127984875313788, |
| "loss": 0.6282, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.02992555465467679, |
| "grad_norm": 32.292236328125, |
| "learning_rate": 0.00012127980760021456, |
| "loss": 1.0279, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.030072971180069286, |
| "grad_norm": 59.10111999511719, |
| "learning_rate": 0.00012127976624407626, |
| "loss": 1.7322, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.03022038770546178, |
| "grad_norm": 56.45620346069336, |
| "learning_rate": 0.00012127972468472319, |
| "loss": 2.3399, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.03036780423085428, |
| "grad_norm": 33.3152961730957, |
| "learning_rate": 0.00012127968292215546, |
| "loss": 1.1374, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.030515220756246776, |
| "grad_norm": 9.003528594970703, |
| "learning_rate": 0.00012127964095637322, |
| "loss": 0.531, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.03066263728163927, |
| "grad_norm": 11.181624412536621, |
| "learning_rate": 0.00012127959878737659, |
| "loss": 0.167, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.03066263728163927, |
| "eval_1_ratio_diff": -0.49961028838659394, |
| "eval_accuracy": 0.5003897116134061, |
| "eval_f1": 0.0, |
| "eval_loss": 0.9164891839027405, |
| "eval_precision": 0.0, |
| "eval_recall": 0.0, |
| "eval_runtime": 1441.9045, |
| "eval_samples_per_second": 0.89, |
| "eval_steps_per_second": 0.445, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.030810053807031768, |
| "grad_norm": 81.1378173828125, |
| "learning_rate": 0.00012127955641516573, |
| "loss": 1.5427, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.030957470332424266, |
| "grad_norm": 40.89067840576172, |
| "learning_rate": 0.00012127951383974079, |
| "loss": 0.8105, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.03110488685781676, |
| "grad_norm": 0.7650836706161499, |
| "learning_rate": 0.00012127947106110188, |
| "loss": 0.8716, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.03125230338320926, |
| "grad_norm": 41.49223709106445, |
| "learning_rate": 0.00012127942807924917, |
| "loss": 3.4998, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.031399719908601756, |
| "grad_norm": 0.12294773012399673, |
| "learning_rate": 0.00012127938489418281, |
| "loss": 1.8698, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.031547136433994254, |
| "grad_norm": 35.12305450439453, |
| "learning_rate": 0.00012127934150590295, |
| "loss": 1.6532, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.031694552959386744, |
| "grad_norm": 27.799177169799805, |
| "learning_rate": 0.00012127929791440968, |
| "loss": 0.5514, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.03184196948477924, |
| "grad_norm": 24.18194580078125, |
| "learning_rate": 0.00012127925411970319, |
| "loss": 0.6588, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.03184196948477924, |
| "eval_1_ratio_diff": 0.0615744349181605, |
| "eval_accuracy": 0.8074824629773967, |
| "eval_f1": 0.8185157972079353, |
| "eval_loss": 0.46238815784454346, |
| "eval_precision": 0.7736111111111111, |
| "eval_recall": 0.8689547581903276, |
| "eval_runtime": 1441.3065, |
| "eval_samples_per_second": 0.89, |
| "eval_steps_per_second": 0.445, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.03198938601017174, |
| "grad_norm": 39.476436614990234, |
| "learning_rate": 0.00012127921012178362, |
| "loss": 0.5056, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.03213680253556424, |
| "grad_norm": 17.45188331604004, |
| "learning_rate": 0.00012127916592065112, |
| "loss": 1.9197, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.032284219060956736, |
| "grad_norm": 37.614906311035156, |
| "learning_rate": 0.00012127912151630586, |
| "loss": 1.4371, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.032431635586349226, |
| "grad_norm": 6.937824726104736, |
| "learning_rate": 0.00012127907690874794, |
| "loss": 0.1527, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.032579052111741724, |
| "grad_norm": 1.9573392868041992, |
| "learning_rate": 0.00012127903209797754, |
| "loss": 0.0619, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.03272646863713422, |
| "grad_norm": 5.234042167663574, |
| "learning_rate": 0.00012127898708399481, |
| "loss": 0.0308, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.03287388516252672, |
| "grad_norm": 19.76664161682129, |
| "learning_rate": 0.00012127894186679988, |
| "loss": 2.5914, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.03302130168791922, |
| "grad_norm": 48.643428802490234, |
| "learning_rate": 0.00012127889644639293, |
| "loss": 3.5738, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.03302130168791922, |
| "eval_1_ratio_diff": -0.4964925954793453, |
| "eval_accuracy": 0.5035074045206547, |
| "eval_f1": 0.012403100775193798, |
| "eval_loss": 2.0848419666290283, |
| "eval_precision": 1.0, |
| "eval_recall": 0.0062402496099844, |
| "eval_runtime": 1441.7896, |
| "eval_samples_per_second": 0.89, |
| "eval_steps_per_second": 0.445, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.033168718213311715, |
| "grad_norm": 41.91992950439453, |
| "learning_rate": 0.0001212788508227741, |
| "loss": 3.656, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.033316134738704206, |
| "grad_norm": 58.21712112426758, |
| "learning_rate": 0.00012127880499594355, |
| "loss": 2.5973, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.033463551264096704, |
| "grad_norm": 14.196877479553223, |
| "learning_rate": 0.00012127875896590141, |
| "loss": 0.9817, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.0336109677894892, |
| "grad_norm": 21.982349395751953, |
| "learning_rate": 0.00012127871273264783, |
| "loss": 0.6516, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.0337583843148817, |
| "grad_norm": 26.360563278198242, |
| "learning_rate": 0.00012127866629618302, |
| "loss": 0.5606, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.0339058008402742, |
| "grad_norm": 15.224770545959473, |
| "learning_rate": 0.00012127861965650708, |
| "loss": 0.4791, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.03405321736566669, |
| "grad_norm": 40.95515441894531, |
| "learning_rate": 0.0001212785728136202, |
| "loss": 0.8481, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.034200633891059186, |
| "grad_norm": 0.4365566670894623, |
| "learning_rate": 0.00012127852576752252, |
| "loss": 0.2475, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.034200633891059186, |
| "eval_1_ratio_diff": 0.2938425565081839, |
| "eval_accuracy": 0.6890101325019485, |
| "eval_f1": 0.759493670886076, |
| "eval_loss": 0.8622868061065674, |
| "eval_precision": 0.618860510805501, |
| "eval_recall": 0.982839313572543, |
| "eval_runtime": 1441.2401, |
| "eval_samples_per_second": 0.89, |
| "eval_steps_per_second": 0.445, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.034348050416451684, |
| "grad_norm": 15.40101432800293, |
| "learning_rate": 0.0001212784785182142, |
| "loss": 0.6156, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.03449546694184418, |
| "grad_norm": 5.0568013191223145, |
| "learning_rate": 0.00012127843106569541, |
| "loss": 0.4877, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.03464288346723668, |
| "grad_norm": 0.277358740568161, |
| "learning_rate": 0.00012127838340996629, |
| "loss": 0.6857, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.03479029999262918, |
| "grad_norm": 0.04443424195051193, |
| "learning_rate": 0.00012127833555102701, |
| "loss": 0.0286, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.03493771651802167, |
| "grad_norm": 35.34669876098633, |
| "learning_rate": 0.00012127828748887773, |
| "loss": 1.7842, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.035085133043414166, |
| "grad_norm": 0.08662135899066925, |
| "learning_rate": 0.00012127823922351861, |
| "loss": 0.0011, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.03523254956880666, |
| "grad_norm": 0.021065138280391693, |
| "learning_rate": 0.00012127819075494979, |
| "loss": 0.0013, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.03537996609419916, |
| "grad_norm": 39.31500244140625, |
| "learning_rate": 0.00012127814208317148, |
| "loss": 1.3799, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.03537996609419916, |
| "eval_1_ratio_diff": 0.03975058456742009, |
| "eval_accuracy": 0.838659392049883, |
| "eval_f1": 0.8447111777944486, |
| "eval_loss": 0.967132568359375, |
| "eval_precision": 0.8135838150289018, |
| "eval_recall": 0.8783151326053042, |
| "eval_runtime": 1441.5685, |
| "eval_samples_per_second": 0.89, |
| "eval_steps_per_second": 0.445, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.03552738261959166, |
| "grad_norm": 0.021114541217684746, |
| "learning_rate": 0.0001212780932081838, |
| "loss": 0.0022, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.03567479914498415, |
| "grad_norm": 0.15021076798439026, |
| "learning_rate": 0.00012127804412998695, |
| "loss": 0.0023, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.03582221567037665, |
| "grad_norm": 0.017235957086086273, |
| "learning_rate": 0.00012127799484858106, |
| "loss": 0.0157, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.035969632195769145, |
| "grad_norm": 0.07619292289018631, |
| "learning_rate": 0.00012127794536396632, |
| "loss": 0.0006, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.03611704872116164, |
| "grad_norm": 0.35548681020736694, |
| "learning_rate": 0.0001212778956761429, |
| "loss": 0.0025, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.03626446524655414, |
| "grad_norm": 0.019310960546135902, |
| "learning_rate": 0.00012127784578511092, |
| "loss": 0.0006, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.03641188177194664, |
| "grad_norm": 0.0059149437583982944, |
| "learning_rate": 0.00012127779569087061, |
| "loss": 0.0222, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.03655929829733913, |
| "grad_norm": 0.0023440527729690075, |
| "learning_rate": 0.00012127774539342209, |
| "loss": 2.0713, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.03655929829733913, |
| "eval_1_ratio_diff": 0.0615744349181605, |
| "eval_accuracy": 0.848012470771629, |
| "eval_f1": 0.8567229977957385, |
| "eval_loss": 1.1258606910705566, |
| "eval_precision": 0.8097222222222222, |
| "eval_recall": 0.9095163806552262, |
| "eval_runtime": 1442.1776, |
| "eval_samples_per_second": 0.89, |
| "eval_steps_per_second": 0.445, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.03670671482273163, |
| "grad_norm": 0.4357898235321045, |
| "learning_rate": 0.00012127769489276555, |
| "loss": 0.0017, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.036854131348124125, |
| "grad_norm": 0.0051942430436611176, |
| "learning_rate": 0.00012127764418890117, |
| "loss": 0.0001, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.03700154787351662, |
| "grad_norm": 0.048877667635679245, |
| "learning_rate": 0.0001212775932818291, |
| "loss": 1.0276, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.03714896439890912, |
| "grad_norm": 0.030356034636497498, |
| "learning_rate": 0.00012127754217154949, |
| "loss": 2.3301, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.03729638092430161, |
| "grad_norm": 0.06719710677862167, |
| "learning_rate": 0.00012127749085806257, |
| "loss": 0.0008, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.03744379744969411, |
| "grad_norm": 0.8071137070655823, |
| "learning_rate": 0.00012127743934136846, |
| "loss": 0.0034, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.03759121397508661, |
| "grad_norm": 66.58085632324219, |
| "learning_rate": 0.00012127738762146735, |
| "loss": 2.0918, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.037738630500479105, |
| "grad_norm": 0.5617576241493225, |
| "learning_rate": 0.00012127733569835943, |
| "loss": 0.004, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.037738630500479105, |
| "eval_1_ratio_diff": 0.13795791114575218, |
| "eval_accuracy": 0.8152766952455183, |
| "eval_f1": 0.8375599725839616, |
| "eval_loss": 1.003125548362732, |
| "eval_precision": 0.7469437652811736, |
| "eval_recall": 0.953198127925117, |
| "eval_runtime": 1441.8288, |
| "eval_samples_per_second": 0.89, |
| "eval_steps_per_second": 0.445, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.0378860470258716, |
| "grad_norm": 0.019583938643336296, |
| "learning_rate": 0.00012127728357204487, |
| "loss": 0.0029, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.03803346355126409, |
| "grad_norm": 66.44640350341797, |
| "learning_rate": 0.00012127723124252383, |
| "loss": 1.346, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.03818088007665659, |
| "grad_norm": 0.05073532462120056, |
| "learning_rate": 0.00012127717870979647, |
| "loss": 1.726, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.03832829660204909, |
| "grad_norm": 0.008476372808218002, |
| "learning_rate": 0.000121277125973863, |
| "loss": 0.0002, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.03847571312744159, |
| "grad_norm": 78.07063293457031, |
| "learning_rate": 0.00012127707303472356, |
| "loss": 3.8118, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.038623129652834085, |
| "grad_norm": 37.921451568603516, |
| "learning_rate": 0.00012127701989237836, |
| "loss": 3.374, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.03877054617822658, |
| "grad_norm": 38.97615432739258, |
| "learning_rate": 0.0001212769665468276, |
| "loss": 1.849, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.03891796270361907, |
| "grad_norm": 1.3990278244018555, |
| "learning_rate": 0.0001212769129980714, |
| "loss": 0.2307, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.03891796270361907, |
| "eval_1_ratio_diff": -0.002338269680436489, |
| "eval_accuracy": 0.8511301636788776, |
| "eval_f1": 0.8506645817044566, |
| "eval_loss": 0.5836467742919922, |
| "eval_precision": 0.8526645768025078, |
| "eval_recall": 0.8486739469578783, |
| "eval_runtime": 1442.6344, |
| "eval_samples_per_second": 0.889, |
| "eval_steps_per_second": 0.445, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.03906537922901157, |
| "grad_norm": 0.5216283798217773, |
| "learning_rate": 0.00012127685924610997, |
| "loss": 0.0092, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.03921279575440407, |
| "grad_norm": 0.716465950012207, |
| "learning_rate": 0.00012127680529094349, |
| "loss": 0.0057, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.039360212279796566, |
| "grad_norm": 0.17090915143489838, |
| "learning_rate": 0.00012127675113257214, |
| "loss": 0.0031, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.039507628805189064, |
| "grad_norm": 62.14753723144531, |
| "learning_rate": 0.00012127669677099608, |
| "loss": 1.6501, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.039655045330581555, |
| "grad_norm": 35.18620681762695, |
| "learning_rate": 0.00012127664220621553, |
| "loss": 0.8287, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.03980246185597405, |
| "grad_norm": 34.50994873046875, |
| "learning_rate": 0.00012127658743823064, |
| "loss": 2.5161, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.03994987838136655, |
| "grad_norm": 0.9479020237922668, |
| "learning_rate": 0.00012127653246704162, |
| "loss": 0.0155, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.04009729490675905, |
| "grad_norm": 0.040624819695949554, |
| "learning_rate": 0.00012127647729264862, |
| "loss": 1.536, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.04009729490675905, |
| "eval_1_ratio_diff": -0.008573655494933774, |
| "eval_accuracy": 0.8791893998441154, |
| "eval_f1": 0.8780487804878049, |
| "eval_loss": 0.49514248967170715, |
| "eval_precision": 0.8857142857142857, |
| "eval_recall": 0.8705148205928237, |
| "eval_runtime": 1441.6693, |
| "eval_samples_per_second": 0.89, |
| "eval_steps_per_second": 0.445, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.040244711432151546, |
| "grad_norm": 0.08040345460176468, |
| "learning_rate": 0.00012127642191505187, |
| "loss": 0.0205, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.040392127957544044, |
| "grad_norm": 58.783809661865234, |
| "learning_rate": 0.00012127636633425152, |
| "loss": 1.1192, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.040539544482936535, |
| "grad_norm": 0.25617870688438416, |
| "learning_rate": 0.00012127631055024779, |
| "loss": 1.0263, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.04068696100832903, |
| "grad_norm": 46.056339263916016, |
| "learning_rate": 0.00012127625456304081, |
| "loss": 1.1183, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.04083437753372153, |
| "grad_norm": 0.17480018734931946, |
| "learning_rate": 0.00012127619837263082, |
| "loss": 0.0055, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.04098179405911403, |
| "grad_norm": 0.37528491020202637, |
| "learning_rate": 0.000121276141979018, |
| "loss": 0.0032, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.041129210584506526, |
| "grad_norm": 0.35542991757392883, |
| "learning_rate": 0.00012127608538220252, |
| "loss": 0.9512, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.04127662710989902, |
| "grad_norm": 0.08831676840782166, |
| "learning_rate": 0.00012127602858218457, |
| "loss": 0.0184, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.04127662710989902, |
| "eval_1_ratio_diff": 0.12860483242400622, |
| "eval_accuracy": 0.8402182385035074, |
| "eval_f1": 0.8583275742916379, |
| "eval_loss": 1.0018821954727173, |
| "eval_precision": 0.7704714640198511, |
| "eval_recall": 0.968798751950078, |
| "eval_runtime": 1442.4789, |
| "eval_samples_per_second": 0.889, |
| "eval_steps_per_second": 0.445, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.041424043635291515, |
| "grad_norm": 46.25735092163086, |
| "learning_rate": 0.00012127597157896437, |
| "loss": 0.6495, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.04157146016068401, |
| "grad_norm": 58.521575927734375, |
| "learning_rate": 0.00012127591437254209, |
| "loss": 1.4757, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.04171887668607651, |
| "grad_norm": 0.3296540379524231, |
| "learning_rate": 0.0001212758569629179, |
| "loss": 2.2725, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.04186629321146901, |
| "grad_norm": 0.03395453095436096, |
| "learning_rate": 0.00012127579935009204, |
| "loss": 0.0006, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.042013709736861506, |
| "grad_norm": 0.02328958362340927, |
| "learning_rate": 0.00012127574153406467, |
| "loss": 0.0004, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.042161126262253996, |
| "grad_norm": 58.99131774902344, |
| "learning_rate": 0.000121275683514836, |
| "loss": 2.0081, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.042308542787646494, |
| "grad_norm": 0.9085908532142639, |
| "learning_rate": 0.0001212756252924062, |
| "loss": 0.006, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.04245595931303899, |
| "grad_norm": 0.5718927383422852, |
| "learning_rate": 0.00012127556686677549, |
| "loss": 2.0144, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.04245595931303899, |
| "eval_1_ratio_diff": 0.22291504286827746, |
| "eval_accuracy": 0.7443491816056118, |
| "eval_f1": 0.7908163265306123, |
| "eval_loss": 0.9025093913078308, |
| "eval_precision": 0.668824163969795, |
| "eval_recall": 0.9672386895475819, |
| "eval_runtime": 1442.0314, |
| "eval_samples_per_second": 0.89, |
| "eval_steps_per_second": 0.445, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.04260337583843149, |
| "grad_norm": 0.4269089698791504, |
| "learning_rate": 0.00012127550823794406, |
| "loss": 1.8595, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.04275079236382399, |
| "grad_norm": 1.5817714929580688, |
| "learning_rate": 0.00012127544940591211, |
| "loss": 0.4153, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.04289820888921648, |
| "grad_norm": 56.673728942871094, |
| "learning_rate": 0.00012127539037067981, |
| "loss": 1.8132, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.043045625414608976, |
| "grad_norm": 4.291464805603027, |
| "learning_rate": 0.0001212753311322474, |
| "loss": 0.3818, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.043193041940001474, |
| "grad_norm": 18.92963981628418, |
| "learning_rate": 0.00012127527169061505, |
| "loss": 0.0941, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.04334045846539397, |
| "grad_norm": 27.108686447143555, |
| "learning_rate": 0.00012127521204578297, |
| "loss": 0.1314, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.04348787499078647, |
| "grad_norm": 33.73942184448242, |
| "learning_rate": 0.00012127515219775134, |
| "loss": 0.1772, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.04363529151617896, |
| "grad_norm": 52.08650588989258, |
| "learning_rate": 0.00012127509214652041, |
| "loss": 0.4505, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.04363529151617896, |
| "eval_1_ratio_diff": -0.1184723304754482, |
| "eval_accuracy": 0.798908807482463, |
| "eval_f1": 0.7716814159292036, |
| "eval_loss": 0.7536761164665222, |
| "eval_precision": 0.8916155419222904, |
| "eval_recall": 0.6801872074882995, |
| "eval_runtime": 1442.1268, |
| "eval_samples_per_second": 0.89, |
| "eval_steps_per_second": 0.445, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.04378270804157146, |
| "grad_norm": 0.05625031143426895, |
| "learning_rate": 0.00012127503189209032, |
| "loss": 0.3175, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.043930124566963956, |
| "grad_norm": 0.10953383892774582, |
| "learning_rate": 0.0001212749714344613, |
| "loss": 0.0059, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.044077541092356454, |
| "grad_norm": 71.34505462646484, |
| "learning_rate": 0.00012127491077363357, |
| "loss": 0.5113, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.04422495761774895, |
| "grad_norm": 0.012292311526834965, |
| "learning_rate": 0.00012127484990960732, |
| "loss": 0.0008, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.04437237414314145, |
| "grad_norm": 0.010139914229512215, |
| "learning_rate": 0.00012127478884238274, |
| "loss": 0.0002, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.04451979066853394, |
| "grad_norm": 58.99741744995117, |
| "learning_rate": 0.00012127472757196004, |
| "loss": 3.6273, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.04466720719392644, |
| "grad_norm": 56.25634765625, |
| "learning_rate": 0.00012127466609833943, |
| "loss": 3.663, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.044814623719318936, |
| "grad_norm": 56.98939895629883, |
| "learning_rate": 0.00012127460442152114, |
| "loss": 1.6247, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.044814623719318936, |
| "eval_1_ratio_diff": -0.03273577552611068, |
| "eval_accuracy": 0.8456742010911925, |
| "eval_f1": 0.8403225806451613, |
| "eval_loss": 0.6838305592536926, |
| "eval_precision": 0.8697829716193656, |
| "eval_recall": 0.8127925117004681, |
| "eval_runtime": 1441.7869, |
| "eval_samples_per_second": 0.89, |
| "eval_steps_per_second": 0.445, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.04496204024471143, |
| "grad_norm": 41.00777053833008, |
| "learning_rate": 0.00012127454254150532, |
| "loss": 3.2637, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.04510945677010393, |
| "grad_norm": 15.958291053771973, |
| "learning_rate": 0.00012127448045829223, |
| "loss": 0.0749, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.04525687329549642, |
| "grad_norm": 52.62068176269531, |
| "learning_rate": 0.00012127441817188204, |
| "loss": 1.1452, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.04540428982088892, |
| "grad_norm": 0.8104878067970276, |
| "learning_rate": 0.00012127435568227499, |
| "loss": 0.0086, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.04555170634628142, |
| "grad_norm": 6.7712883949279785, |
| "learning_rate": 0.00012127429298947129, |
| "loss": 0.035, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.045699122871673915, |
| "grad_norm": 1.2900152206420898, |
| "learning_rate": 0.00012127423009347112, |
| "loss": 0.0133, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.04584653939706641, |
| "grad_norm": 0.5468306541442871, |
| "learning_rate": 0.00012127416699427471, |
| "loss": 0.0066, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.04599395592245891, |
| "grad_norm": 0.16869762539863586, |
| "learning_rate": 0.00012127410369188226, |
| "loss": 0.0026, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.04599395592245891, |
| "eval_1_ratio_diff": 0.015588464536243185, |
| "eval_accuracy": 0.8862042088854248, |
| "eval_f1": 0.8878648233486943, |
| "eval_loss": 0.570717990398407, |
| "eval_precision": 0.8744326777609682, |
| "eval_recall": 0.9017160686427457, |
| "eval_runtime": 1441.498, |
| "eval_samples_per_second": 0.89, |
| "eval_steps_per_second": 0.445, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.0461413724478514, |
| "grad_norm": 0.20163878798484802, |
| "learning_rate": 0.00012127404018629401, |
| "loss": 0.0013, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.0462887889732439, |
| "grad_norm": 0.1430014669895172, |
| "learning_rate": 0.00012127397647751014, |
| "loss": 0.0016, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.0464362054986364, |
| "grad_norm": 60.50364303588867, |
| "learning_rate": 0.00012127391256553088, |
| "loss": 1.6526, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.046583622024028895, |
| "grad_norm": 0.009336289949715137, |
| "learning_rate": 0.00012127384845035646, |
| "loss": 0.0005, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.04673103854942139, |
| "grad_norm": 0.02924017794430256, |
| "learning_rate": 0.00012127378413198706, |
| "loss": 2.0099, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.046878455074813884, |
| "grad_norm": 0.1369701325893402, |
| "learning_rate": 0.00012127371961042292, |
| "loss": 1.9002, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.04702587160020638, |
| "grad_norm": 77.09698486328125, |
| "learning_rate": 0.00012127365488566423, |
| "loss": 1.0021, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.04717328812559888, |
| "grad_norm": 4.486428260803223, |
| "learning_rate": 0.00012127358995771124, |
| "loss": 1.8971, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.04717328812559888, |
| "eval_1_ratio_diff": -0.17225253312548716, |
| "eval_accuracy": 0.7669524551831645, |
| "eval_f1": 0.7181903864278982, |
| "eval_loss": 1.282883644104004, |
| "eval_precision": 0.9071428571428571, |
| "eval_recall": 0.594383775351014, |
| "eval_runtime": 1441.6631, |
| "eval_samples_per_second": 0.89, |
| "eval_steps_per_second": 0.445, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.04732070465099138, |
| "grad_norm": 0.3835877478122711, |
| "learning_rate": 0.00012127352482656414, |
| "loss": 1.5125, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.047468121176383875, |
| "grad_norm": 0.3453172445297241, |
| "learning_rate": 0.00012127345949222316, |
| "loss": 1.4256, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.04761553770177637, |
| "grad_norm": 56.087467193603516, |
| "learning_rate": 0.00012127339395468855, |
| "loss": 1.389, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.04776295422716886, |
| "grad_norm": 39.20930099487305, |
| "learning_rate": 0.00012127332821396047, |
| "loss": 2.2849, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.04791037075256136, |
| "grad_norm": 2.4249165058135986, |
| "learning_rate": 0.00012127326227003918, |
| "loss": 0.0286, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.04805778727795386, |
| "grad_norm": 1.4587557315826416, |
| "learning_rate": 0.0001212731961229249, |
| "loss": 0.775, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.04820520380334636, |
| "grad_norm": 45.33637237548828, |
| "learning_rate": 0.00012127312977261783, |
| "loss": 0.2852, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.048352620328738855, |
| "grad_norm": 0.07065322250127792, |
| "learning_rate": 0.0001212730632191182, |
| "loss": 0.7127, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.048352620328738855, |
| "eval_1_ratio_diff": -0.05455962587685115, |
| "eval_accuracy": 0.8487918939984411, |
| "eval_f1": 0.8399339933993399, |
| "eval_loss": 0.5940015316009521, |
| "eval_precision": 0.8914185639229422, |
| "eval_recall": 0.7940717628705148, |
| "eval_runtime": 1441.517, |
| "eval_samples_per_second": 0.89, |
| "eval_steps_per_second": 0.445, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.048500036854131345, |
| "grad_norm": 35.70323181152344, |
| "learning_rate": 0.00012127299646242624, |
| "loss": 0.6816, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.04864745337952384, |
| "grad_norm": 1.4870625734329224, |
| "learning_rate": 0.00012127292950254218, |
| "loss": 0.1488, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.04879486990491634, |
| "grad_norm": 0.6423426866531372, |
| "learning_rate": 0.00012127286233946625, |
| "loss": 0.0136, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.04894228643030884, |
| "grad_norm": 0.3320056200027466, |
| "learning_rate": 0.00012127279497319864, |
| "loss": 0.0058, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.049089702955701336, |
| "grad_norm": 4.33368444442749, |
| "learning_rate": 0.00012127272740373959, |
| "loss": 0.5196, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.04923711948109383, |
| "grad_norm": 71.66387939453125, |
| "learning_rate": 0.00012127265963108935, |
| "loss": 2.7961, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.049384536006486325, |
| "grad_norm": 94.96151733398438, |
| "learning_rate": 0.00012127259165524814, |
| "loss": 3.8152, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.04953195253187882, |
| "grad_norm": 39.40300369262695, |
| "learning_rate": 0.00012127252347621616, |
| "loss": 1.1659, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.04953195253187882, |
| "eval_1_ratio_diff": -0.11301636788776309, |
| "eval_accuracy": 0.779423226812159, |
| "eval_f1": 0.751099384344767, |
| "eval_loss": 1.093988060951233, |
| "eval_precision": 0.8608870967741935, |
| "eval_recall": 0.6661466458658346, |
| "eval_runtime": 1441.6444, |
| "eval_samples_per_second": 0.89, |
| "eval_steps_per_second": 0.445, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.04967936905727132, |
| "grad_norm": 142.39564514160156, |
| "learning_rate": 0.00012127245509399365, |
| "loss": 1.9772, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.04982678558266382, |
| "grad_norm": 0.43099793791770935, |
| "learning_rate": 0.00012127238650858088, |
| "loss": 0.0056, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.049974202108056316, |
| "grad_norm": 0.22017613053321838, |
| "learning_rate": 0.00012127231771997801, |
| "loss": 0.0026, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.05012161863344881, |
| "grad_norm": 0.06024312227964401, |
| "learning_rate": 0.00012127224872818532, |
| "loss": 1.4556, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.050269035158841305, |
| "grad_norm": 30.382848739624023, |
| "learning_rate": 0.00012127217953320302, |
| "loss": 0.959, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.0504164516842338, |
| "grad_norm": 0.12178266048431396, |
| "learning_rate": 0.00012127211013503136, |
| "loss": 0.0025, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.0505638682096263, |
| "grad_norm": 0.2670276165008545, |
| "learning_rate": 0.00012127204053367056, |
| "loss": 0.0059, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.0507112847350188, |
| "grad_norm": 0.7420686483383179, |
| "learning_rate": 0.00012127197072912085, |
| "loss": 0.0205, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.0507112847350188, |
| "eval_1_ratio_diff": -0.05300077942322684, |
| "eval_accuracy": 0.828526890101325, |
| "eval_f1": 0.8187808896210873, |
| "eval_loss": 0.5867729783058167, |
| "eval_precision": 0.8673647469458988, |
| "eval_recall": 0.7753510140405616, |
| "eval_runtime": 1441.426, |
| "eval_samples_per_second": 0.89, |
| "eval_steps_per_second": 0.445, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.05085870126041129, |
| "grad_norm": 69.81874084472656, |
| "learning_rate": 0.00012127190072138247, |
| "loss": 0.853, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.05100611778580379, |
| "grad_norm": 0.51251220703125, |
| "learning_rate": 0.00012127183051045567, |
| "loss": 0.037, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.051153534311196285, |
| "grad_norm": 32.83553695678711, |
| "learning_rate": 0.00012127176009634066, |
| "loss": 1.7711, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.05130095083658878, |
| "grad_norm": 0.029091738164424896, |
| "learning_rate": 0.00012127168947903768, |
| "loss": 0.0006, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.05144836736198128, |
| "grad_norm": 59.94422912597656, |
| "learning_rate": 0.00012127161865854698, |
| "loss": 1.6607, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.05159578388737378, |
| "grad_norm": 60.350067138671875, |
| "learning_rate": 0.00012127154763486877, |
| "loss": 2.055, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.05174320041276627, |
| "grad_norm": 0.08221148699522018, |
| "learning_rate": 0.00012127147640800332, |
| "loss": 1.6475, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.051890616938158766, |
| "grad_norm": 39.905357360839844, |
| "learning_rate": 0.00012127140497795086, |
| "loss": 1.2104, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.051890616938158766, |
| "eval_1_ratio_diff": -0.0919719407638348, |
| "eval_accuracy": 0.8316445830085737, |
| "eval_f1": 0.8144329896907216, |
| "eval_loss": 0.7349568605422974, |
| "eval_precision": 0.9063097514340345, |
| "eval_recall": 0.7394695787831513, |
| "eval_runtime": 1441.7333, |
| "eval_samples_per_second": 0.89, |
| "eval_steps_per_second": 0.445, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.052038033463551264, |
| "grad_norm": 4.226317882537842, |
| "learning_rate": 0.00012127133334471161, |
| "loss": 0.4275, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.05218544998894376, |
| "grad_norm": 0.05035168305039406, |
| "learning_rate": 0.00012127126150828585, |
| "loss": 1.3166, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.05233286651433626, |
| "grad_norm": 0.25760674476623535, |
| "learning_rate": 0.00012127118946867378, |
| "loss": 0.0081, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.05248028303972875, |
| "grad_norm": 36.74332809448242, |
| "learning_rate": 0.00012127111722587565, |
| "loss": 1.1506, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.05262769956512125, |
| "grad_norm": 36.16116714477539, |
| "learning_rate": 0.00012127104477989172, |
| "loss": 1.2632, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.052775116090513746, |
| "grad_norm": 37.083343505859375, |
| "learning_rate": 0.00012127097213072223, |
| "loss": 1.8408, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.052922532615906244, |
| "grad_norm": 3.0497827529907227, |
| "learning_rate": 0.0001212708992783674, |
| "loss": 0.0247, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.05306994914129874, |
| "grad_norm": 4.117802619934082, |
| "learning_rate": 0.00012127082622282751, |
| "loss": 0.0342, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.05306994914129874, |
| "eval_1_ratio_diff": -0.08183943881527672, |
| "eval_accuracy": 0.8121590023382697, |
| "eval_f1": 0.7952421410365336, |
| "eval_loss": 0.5786097645759583, |
| "eval_precision": 0.8731343283582089, |
| "eval_recall": 0.7301092043681747, |
| "eval_runtime": 1442.0276, |
| "eval_samples_per_second": 0.89, |
| "eval_steps_per_second": 0.445, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.05321736566669124, |
| "grad_norm": 0.633588969707489, |
| "learning_rate": 0.00012127075296410277, |
| "loss": 0.0056, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.05336478219208373, |
| "grad_norm": 36.505218505859375, |
| "learning_rate": 0.00012127067950219344, |
| "loss": 0.7263, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.05351219871747623, |
| "grad_norm": 0.6716632843017578, |
| "learning_rate": 0.00012127060583709976, |
| "loss": 0.0045, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.053659615242868726, |
| "grad_norm": 36.19940948486328, |
| "learning_rate": 0.000121270531968822, |
| "loss": 0.377, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.053807031768261224, |
| "grad_norm": 81.25736236572266, |
| "learning_rate": 0.00012127045789736038, |
| "loss": 0.6006, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.05395444829365372, |
| "grad_norm": 29.044986724853516, |
| "learning_rate": 0.00012127038362271517, |
| "loss": 1.1609, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.05410186481904621, |
| "grad_norm": 0.1593562811613083, |
| "learning_rate": 0.0001212703091448866, |
| "loss": 0.0055, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.05424928134443871, |
| "grad_norm": 7.988092422485352, |
| "learning_rate": 0.00012127023446387492, |
| "loss": 0.0238, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.05424928134443871, |
| "eval_1_ratio_diff": -0.07092751363990646, |
| "eval_accuracy": 0.838659392049883, |
| "eval_f1": 0.8261964735516373, |
| "eval_loss": 0.6887457370758057, |
| "eval_precision": 0.8945454545454545, |
| "eval_recall": 0.7675507020280812, |
| "eval_runtime": 1441.3319, |
| "eval_samples_per_second": 0.89, |
| "eval_steps_per_second": 0.445, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.05439669786983121, |
| "grad_norm": 58.56552505493164, |
| "learning_rate": 0.00012127015957968041, |
| "loss": 2.3194, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.054544114395223706, |
| "grad_norm": 0.37152421474456787, |
| "learning_rate": 0.00012127008449230329, |
| "loss": 0.0029, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.0546915309206162, |
| "grad_norm": 33.52932357788086, |
| "learning_rate": 0.00012127000920174381, |
| "loss": 1.1549, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.054838947446008694, |
| "grad_norm": 0.02616913430392742, |
| "learning_rate": 0.00012126993370800224, |
| "loss": 0.0021, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.05498636397140119, |
| "grad_norm": 36.83317565917969, |
| "learning_rate": 0.00012126985801107882, |
| "loss": 1.2016, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.05513378049679369, |
| "grad_norm": 0.006011671852320433, |
| "learning_rate": 0.00012126978211097381, |
| "loss": 2.834, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.05528119702218619, |
| "grad_norm": 58.966102600097656, |
| "learning_rate": 0.00012126970600768747, |
| "loss": 2.0661, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.055428613547578685, |
| "grad_norm": 57.80133819580078, |
| "learning_rate": 0.00012126962970122005, |
| "loss": 1.2417, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.055428613547578685, |
| "eval_1_ratio_diff": 0.09508963367108336, |
| "eval_accuracy": 0.8503507404520655, |
| "eval_f1": 0.8632478632478633, |
| "eval_loss": 0.7459388375282288, |
| "eval_precision": 0.7942332896461337, |
| "eval_recall": 0.9453978159126365, |
| "eval_runtime": 1440.976, |
| "eval_samples_per_second": 0.89, |
| "eval_steps_per_second": 0.446, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.05557603007297118, |
| "grad_norm": 0.10538947582244873, |
| "learning_rate": 0.00012126955319157181, |
| "loss": 1.5568, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.055723446598363674, |
| "grad_norm": 0.3577294647693634, |
| "learning_rate": 0.000121269476478743, |
| "loss": 1.3633, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.05587086312375617, |
| "grad_norm": 111.04033660888672, |
| "learning_rate": 0.00012126939956273387, |
| "loss": 1.5691, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.05601827964914867, |
| "grad_norm": 8.450987815856934, |
| "learning_rate": 0.00012126932244354469, |
| "loss": 0.6036, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.05616569617454117, |
| "grad_norm": 6.646569728851318, |
| "learning_rate": 0.00012126924512117572, |
| "loss": 0.0554, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.056313112699933665, |
| "grad_norm": 10.05777359008789, |
| "learning_rate": 0.00012126916759562719, |
| "loss": 0.0507, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.056460529225326156, |
| "grad_norm": 1.5429670810699463, |
| "learning_rate": 0.00012126908986689941, |
| "loss": 0.5476, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.056607945750718654, |
| "grad_norm": 0.7471988201141357, |
| "learning_rate": 0.0001212690119349926, |
| "loss": 0.0357, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.056607945750718654, |
| "eval_1_ratio_diff": -0.03273577552611068, |
| "eval_accuracy": 0.8534684333593141, |
| "eval_f1": 0.8483870967741935, |
| "eval_loss": 0.5041674971580505, |
| "eval_precision": 0.8781302170283807, |
| "eval_recall": 0.8205928237129485, |
| "eval_runtime": 1441.5634, |
| "eval_samples_per_second": 0.89, |
| "eval_steps_per_second": 0.445, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.05675536227611115, |
| "grad_norm": 25.423622131347656, |
| "learning_rate": 0.00012126893379990705, |
| "loss": 0.0991, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.05690277880150365, |
| "grad_norm": 8.131854057312012, |
| "learning_rate": 0.00012126885546164299, |
| "loss": 0.0467, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.05705019532689615, |
| "grad_norm": 0.7007619738578796, |
| "learning_rate": 0.00012126877692020069, |
| "loss": 0.0319, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.057197611852288645, |
| "grad_norm": 0.0242279302328825, |
| "learning_rate": 0.00012126869817558045, |
| "loss": 0.5106, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.057345028377681136, |
| "grad_norm": 1.126301646232605, |
| "learning_rate": 0.00012126861922778249, |
| "loss": 0.0068, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.05749244490307363, |
| "grad_norm": 2.2255496978759766, |
| "learning_rate": 0.0001212685400768071, |
| "loss": 0.0125, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.05763986142846613, |
| "grad_norm": 53.08203125, |
| "learning_rate": 0.00012126846072265453, |
| "loss": 3.4784, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.05778727795385863, |
| "grad_norm": 53.75185012817383, |
| "learning_rate": 0.00012126838116532506, |
| "loss": 5.3382, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.05778727795385863, |
| "eval_1_ratio_diff": 0.024162120031176904, |
| "eval_accuracy": 0.8074824629773967, |
| "eval_f1": 0.8118811881188119, |
| "eval_loss": 1.1864495277404785, |
| "eval_precision": 0.7931547619047619, |
| "eval_recall": 0.8315132605304212, |
| "eval_runtime": 1441.6478, |
| "eval_samples_per_second": 0.89, |
| "eval_steps_per_second": 0.445, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.05793469447925113, |
| "grad_norm": 20.95121955871582, |
| "learning_rate": 0.00012126830140481893, |
| "loss": 3.3432, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.05808211100464362, |
| "grad_norm": 49.42118453979492, |
| "learning_rate": 0.00012126822144113646, |
| "loss": 0.762, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.058229527530036115, |
| "grad_norm": 16.03618812561035, |
| "learning_rate": 0.00012126814127427784, |
| "loss": 0.1045, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.05837694405542861, |
| "grad_norm": 34.1168212890625, |
| "learning_rate": 0.00012126806090424342, |
| "loss": 3.1091, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.05852436058082111, |
| "grad_norm": 34.757083892822266, |
| "learning_rate": 0.00012126798033103342, |
| "loss": 2.0632, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.05867177710621361, |
| "grad_norm": 1.412405014038086, |
| "learning_rate": 0.00012126789955464813, |
| "loss": 0.2568, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.058819193631606106, |
| "grad_norm": 55.76416015625, |
| "learning_rate": 0.00012126781857508779, |
| "loss": 0.497, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.0589666101569986, |
| "grad_norm": 0.3345389664173126, |
| "learning_rate": 0.00012126773739235272, |
| "loss": 0.0121, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.0589666101569986, |
| "eval_1_ratio_diff": 0.06703039750584561, |
| "eval_accuracy": 0.8363211223694466, |
| "eval_f1": 0.8464912280701754, |
| "eval_loss": 0.7451047897338867, |
| "eval_precision": 0.796423658872077, |
| "eval_recall": 0.9032761310452418, |
| "eval_runtime": 1440.5179, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.059114026682391095, |
| "grad_norm": 0.15297777950763702, |
| "learning_rate": 0.00012126765600644314, |
| "loss": 0.0082, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.05926144320778359, |
| "grad_norm": 0.19080302119255066, |
| "learning_rate": 0.00012126757441735937, |
| "loss": 0.0026, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.05940885973317609, |
| "grad_norm": 0.43317776918411255, |
| "learning_rate": 0.00012126749262510164, |
| "loss": 0.0055, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.05955627625856859, |
| "grad_norm": 6.4003984334704e-06, |
| "learning_rate": 0.00012126741062967027, |
| "loss": 0.0031, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.05970369278396108, |
| "grad_norm": 0.03450751677155495, |
| "learning_rate": 0.00012126732843106551, |
| "loss": 0.0052, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.05985110930935358, |
| "grad_norm": 52.02117156982422, |
| "learning_rate": 0.00012126724602928764, |
| "loss": 5.0919, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.059998525834746075, |
| "grad_norm": 50.249900817871094, |
| "learning_rate": 0.00012126716342433692, |
| "loss": 4.0749, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.06014594236013857, |
| "grad_norm": 0.0067368014715611935, |
| "learning_rate": 0.00012126708061621366, |
| "loss": 0.0001, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.06014594236013857, |
| "eval_1_ratio_diff": 0.06469212782540923, |
| "eval_accuracy": 0.8651597817614964, |
| "eval_f1": 0.8732600732600733, |
| "eval_loss": 0.9449532628059387, |
| "eval_precision": 0.8232044198895028, |
| "eval_recall": 0.9297971918876755, |
| "eval_runtime": 1440.6727, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.06029335888553107, |
| "grad_norm": 44.037471771240234, |
| "learning_rate": 0.00012126699760491808, |
| "loss": 2.1184, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.06044077541092356, |
| "grad_norm": 31.20966148376465, |
| "learning_rate": 0.00012126691439045052, |
| "loss": 2.3532, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.06058819193631606, |
| "grad_norm": 1.108382225036621, |
| "learning_rate": 0.00012126683097281125, |
| "loss": 0.0093, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.06073560846170856, |
| "grad_norm": 1.2753050327301025, |
| "learning_rate": 0.0001212667473520005, |
| "loss": 0.011, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.060883024987101055, |
| "grad_norm": 4.512105941772461, |
| "learning_rate": 0.00012126666352801861, |
| "loss": 0.0212, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.06103044151249355, |
| "grad_norm": 0.3488874137401581, |
| "learning_rate": 0.00012126657950086582, |
| "loss": 1.2435, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.06117785803788605, |
| "grad_norm": 0.11297665536403656, |
| "learning_rate": 0.00012126649527054243, |
| "loss": 0.0027, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.06132527456327854, |
| "grad_norm": 0.07631942629814148, |
| "learning_rate": 0.00012126641083704874, |
| "loss": 0.0032, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.06132527456327854, |
| "eval_1_ratio_diff": 0.08885424785658613, |
| "eval_accuracy": 0.8799688230709275, |
| "eval_f1": 0.8896848137535817, |
| "eval_loss": 0.7254036068916321, |
| "eval_precision": 0.8225165562913908, |
| "eval_recall": 0.968798751950078, |
| "eval_runtime": 1440.5593, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.06147269108867104, |
| "grad_norm": 41.37874984741211, |
| "learning_rate": 0.00012126632620038498, |
| "loss": 1.3108, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.061620107614063536, |
| "grad_norm": 55.71513366699219, |
| "learning_rate": 0.00012126624136055149, |
| "loss": 1.7068, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.061767524139456034, |
| "grad_norm": 1.5174663066864014, |
| "learning_rate": 0.0001212661563175485, |
| "loss": 0.0173, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.06191494066484853, |
| "grad_norm": 0.10353035479784012, |
| "learning_rate": 0.00012126607107137636, |
| "loss": 1.2081, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.06206235719024102, |
| "grad_norm": 0.05997217819094658, |
| "learning_rate": 0.00012126598562203531, |
| "loss": 1.8296, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.06220977371563352, |
| "grad_norm": 0.17887941002845764, |
| "learning_rate": 0.00012126589996952563, |
| "loss": 0.0016, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.06235719024102602, |
| "grad_norm": 0.08932141214609146, |
| "learning_rate": 0.00012126581411384764, |
| "loss": 1.5849, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.06250460676641852, |
| "grad_norm": 41.82356643676758, |
| "learning_rate": 0.0001212657280550016, |
| "loss": 1.2425, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.06250460676641852, |
| "eval_1_ratio_diff": 0.09586905689789549, |
| "eval_accuracy": 0.8542478565861262, |
| "eval_f1": 0.8669039145907473, |
| "eval_loss": 0.6706948280334473, |
| "eval_precision": 0.7971204188481675, |
| "eval_recall": 0.9500780031201248, |
| "eval_runtime": 1441.8264, |
| "eval_samples_per_second": 0.89, |
| "eval_steps_per_second": 0.445, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.06265202329181101, |
| "grad_norm": 45.45724105834961, |
| "learning_rate": 0.00012126564179298783, |
| "loss": 0.7189, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.06279943981720351, |
| "grad_norm": 1.7170765399932861, |
| "learning_rate": 0.00012126555532780658, |
| "loss": 0.014, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.062946856342596, |
| "grad_norm": 1.8296376466751099, |
| "learning_rate": 0.00012126546865945818, |
| "loss": 0.0249, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.06309427286798851, |
| "grad_norm": 28.392093658447266, |
| "learning_rate": 0.00012126538178794288, |
| "loss": 2.1082, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.063241689393381, |
| "grad_norm": 0.024955546483397484, |
| "learning_rate": 0.00012126529471326101, |
| "loss": 0.0026, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.06338910591877349, |
| "grad_norm": 29.141136169433594, |
| "learning_rate": 0.00012126520743541283, |
| "loss": 1.5827, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.063536522444166, |
| "grad_norm": 0.3031620383262634, |
| "learning_rate": 0.00012126511995439865, |
| "loss": 1.4029, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.06368393896955848, |
| "grad_norm": 0.2821040451526642, |
| "learning_rate": 0.00012126503227021874, |
| "loss": 1.3931, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.06368393896955848, |
| "eval_1_ratio_diff": -0.04053000779423227, |
| "eval_accuracy": 0.8581449727201871, |
| "eval_f1": 0.8520325203252033, |
| "eval_loss": 0.5900216102600098, |
| "eval_precision": 0.8896434634974533, |
| "eval_recall": 0.8174726989079563, |
| "eval_runtime": 1439.3591, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.06383135549495099, |
| "grad_norm": 4.8987860679626465, |
| "learning_rate": 0.00012126494438287343, |
| "loss": 0.0265, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.06397877202034348, |
| "grad_norm": 0.27837908267974854, |
| "learning_rate": 0.000121264856292363, |
| "loss": 0.0093, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.06412618854573597, |
| "grad_norm": 0.5379538536071777, |
| "learning_rate": 0.00012126476799868773, |
| "loss": 0.0095, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.06427360507112848, |
| "grad_norm": 23.87804412841797, |
| "learning_rate": 0.00012126467950184793, |
| "loss": 0.8342, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.06442102159652097, |
| "grad_norm": 1.2284973859786987, |
| "learning_rate": 0.0001212645908018439, |
| "loss": 0.0162, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.06456843812191347, |
| "grad_norm": 36.555442810058594, |
| "learning_rate": 0.00012126450189867592, |
| "loss": 2.2561, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.06471585464730596, |
| "grad_norm": 24.54311180114746, |
| "learning_rate": 0.00012126441279234432, |
| "loss": 3.1743, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.06486327117269845, |
| "grad_norm": 0.13615825772285461, |
| "learning_rate": 0.00012126432348284936, |
| "loss": 0.0021, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.06486327117269845, |
| "eval_1_ratio_diff": -0.2899454403741232, |
| "eval_accuracy": 0.6975837879968823, |
| "eval_f1": 0.5736263736263736, |
| "eval_loss": 1.3224732875823975, |
| "eval_precision": 0.9702602230483272, |
| "eval_recall": 0.40717628705148207, |
| "eval_runtime": 1440.3311, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.06501068769809096, |
| "grad_norm": 3.8478543758392334, |
| "learning_rate": 0.00012126423397019136, |
| "loss": 0.021, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.06515810422348345, |
| "grad_norm": 0.08823257684707642, |
| "learning_rate": 0.00012126414425437062, |
| "loss": 0.0016, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.06530552074887595, |
| "grad_norm": 27.02589988708496, |
| "learning_rate": 0.00012126405433538744, |
| "loss": 2.9462, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.06545293727426844, |
| "grad_norm": 24.244503021240234, |
| "learning_rate": 0.00012126396421324212, |
| "loss": 0.8423, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.06560035379966095, |
| "grad_norm": 0.3652421236038208, |
| "learning_rate": 0.00012126387388793495, |
| "loss": 0.0081, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.06574777032505344, |
| "grad_norm": 22.919225692749023, |
| "learning_rate": 0.00012126378335946625, |
| "loss": 1.1268, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.06589518685044593, |
| "grad_norm": 0.18866397440433502, |
| "learning_rate": 0.00012126369262783633, |
| "loss": 1.8645, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.06604260337583844, |
| "grad_norm": 2.1540791988372803, |
| "learning_rate": 0.00012126360169304547, |
| "loss": 0.036, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.06604260337583844, |
| "eval_1_ratio_diff": 0.017147310989867437, |
| "eval_accuracy": 0.8862042088854248, |
| "eval_f1": 0.8880368098159509, |
| "eval_loss": 0.49060943722724915, |
| "eval_precision": 0.8733031674208145, |
| "eval_recall": 0.9032761310452418, |
| "eval_runtime": 1439.5517, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.06619001990123093, |
| "grad_norm": 0.7136353850364685, |
| "learning_rate": 0.00012126351055509399, |
| "loss": 1.4136, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.06633743642662343, |
| "grad_norm": 1.6063231229782104, |
| "learning_rate": 0.00012126341921398221, |
| "loss": 0.0358, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.06648485295201592, |
| "grad_norm": 4.673253536224365, |
| "learning_rate": 0.00012126332766971038, |
| "loss": 0.0494, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.06663226947740841, |
| "grad_norm": 0.21607956290245056, |
| "learning_rate": 0.00012126323592227886, |
| "loss": 0.0053, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.06677968600280092, |
| "grad_norm": 32.70335006713867, |
| "learning_rate": 0.00012126314397168796, |
| "loss": 1.5106, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.06692710252819341, |
| "grad_norm": 38.56415557861328, |
| "learning_rate": 0.00012126305181793794, |
| "loss": 0.8798, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.06707451905358591, |
| "grad_norm": 0.018692007288336754, |
| "learning_rate": 0.00012126295946102917, |
| "loss": 0.0004, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.0672219355789784, |
| "grad_norm": 22.49344825744629, |
| "learning_rate": 0.00012126286690096191, |
| "loss": 0.9364, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.0672219355789784, |
| "eval_1_ratio_diff": -0.18706157443491817, |
| "eval_accuracy": 0.7802026500389712, |
| "eval_f1": 0.7293666026871402, |
| "eval_loss": 1.4581658840179443, |
| "eval_precision": 0.9476309226932669, |
| "eval_recall": 0.592823712948518, |
| "eval_runtime": 1440.0947, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.0673693521043709, |
| "grad_norm": 182.47938537597656, |
| "learning_rate": 0.00012126277413773649, |
| "loss": 1.0293, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.0675167686297634, |
| "grad_norm": 4.0591816902160645, |
| "learning_rate": 0.00012126268117135323, |
| "loss": 0.0308, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.06766418515515589, |
| "grad_norm": 6.380730628967285, |
| "learning_rate": 0.00012126258800181242, |
| "loss": 1.2327, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.0678116016805484, |
| "grad_norm": 31.462488174438477, |
| "learning_rate": 0.00012126249462911438, |
| "loss": 0.8761, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.06795901820594089, |
| "grad_norm": 0.0057801539078354836, |
| "learning_rate": 0.00012126240105325944, |
| "loss": 0.0077, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.06810643473133338, |
| "grad_norm": 1.9855010509490967, |
| "learning_rate": 0.0001212623072742479, |
| "loss": 0.0239, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.06825385125672588, |
| "grad_norm": 0.4254453480243683, |
| "learning_rate": 0.00012126221329208006, |
| "loss": 0.0032, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.06840126778211837, |
| "grad_norm": 0.502257227897644, |
| "learning_rate": 0.00012126211910675626, |
| "loss": 0.0103, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.06840126778211837, |
| "eval_1_ratio_diff": 0.026500389711613392, |
| "eval_accuracy": 0.8768511301636789, |
| "eval_f1": 0.8799392097264438, |
| "eval_loss": 0.7347307205200195, |
| "eval_precision": 0.8577777777777778, |
| "eval_recall": 0.9032761310452418, |
| "eval_runtime": 1440.0126, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.06854868430751088, |
| "grad_norm": 0.6557896733283997, |
| "learning_rate": 0.00012126202471827679, |
| "loss": 0.003, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.06869610083290337, |
| "grad_norm": 0.020085789263248444, |
| "learning_rate": 0.00012126193012664201, |
| "loss": 0.0004, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.06884351735829586, |
| "grad_norm": 0.006013574078679085, |
| "learning_rate": 0.00012126183533185218, |
| "loss": 0.0001, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.06899093388368836, |
| "grad_norm": 252.07472229003906, |
| "learning_rate": 0.00012126174033390767, |
| "loss": 0.8075, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.06913835040908085, |
| "grad_norm": 0.002460025018081069, |
| "learning_rate": 0.00012126164513280875, |
| "loss": 0.0011, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.06928576693447336, |
| "grad_norm": 0.01136123575270176, |
| "learning_rate": 0.00012126154972855578, |
| "loss": 0.0003, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.06943318345986585, |
| "grad_norm": 37.112640380859375, |
| "learning_rate": 0.00012126145412114907, |
| "loss": 2.9468, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.06958059998525835, |
| "grad_norm": 0.006933971308171749, |
| "learning_rate": 0.00012126135831058891, |
| "loss": 0.0001, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.06958059998525835, |
| "eval_1_ratio_diff": 0.2478565861262666, |
| "eval_accuracy": 0.7443491816056118, |
| "eval_f1": 0.795, |
| "eval_loss": 2.4577670097351074, |
| "eval_precision": 0.6631908237747653, |
| "eval_recall": 0.9921996879875195, |
| "eval_runtime": 1439.5508, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.06972801651065084, |
| "grad_norm": 36.87862777709961, |
| "learning_rate": 0.00012126126229687566, |
| "loss": 5.0295, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.06987543303604334, |
| "grad_norm": 70.17023468017578, |
| "learning_rate": 0.00012126116608000961, |
| "loss": 4.0308, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.07002284956143584, |
| "grad_norm": 37.03538513183594, |
| "learning_rate": 0.00012126106965999112, |
| "loss": 1.8733, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.07017026608682833, |
| "grad_norm": 66.47712707519531, |
| "learning_rate": 0.00012126097303682048, |
| "loss": 4.2016, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.07031768261222084, |
| "grad_norm": 29.390884399414062, |
| "learning_rate": 0.00012126087621049803, |
| "loss": 1.9788, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.07046509913761333, |
| "grad_norm": 10.997523307800293, |
| "learning_rate": 0.00012126077918102409, |
| "loss": 0.1381, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.07061251566300582, |
| "grad_norm": 38.46750259399414, |
| "learning_rate": 0.00012126068194839898, |
| "loss": 0.8822, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.07075993218839832, |
| "grad_norm": 18.62594985961914, |
| "learning_rate": 0.00012126058451262304, |
| "loss": 0.3758, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.07075993218839832, |
| "eval_1_ratio_diff": -0.4505066250974279, |
| "eval_accuracy": 0.5494933749025721, |
| "eval_f1": 0.17897727272727273, |
| "eval_loss": 0.5460181832313538, |
| "eval_precision": 1.0, |
| "eval_recall": 0.09828393135725429, |
| "eval_runtime": 1440.4539, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.07090734871379081, |
| "grad_norm": 21.68712615966797, |
| "learning_rate": 0.00012126048687369658, |
| "loss": 0.3891, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.07105476523918332, |
| "grad_norm": 7.1598124504089355, |
| "learning_rate": 0.00012126038903161995, |
| "loss": 0.3555, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.07120218176457581, |
| "grad_norm": 28.80471420288086, |
| "learning_rate": 0.00012126029098639344, |
| "loss": 0.9078, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.0713495982899683, |
| "grad_norm": 18.606401443481445, |
| "learning_rate": 0.00012126019273801743, |
| "loss": 0.2927, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.0714970148153608, |
| "grad_norm": 21.51089859008789, |
| "learning_rate": 0.0001212600942864922, |
| "loss": 0.6348, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.0716444313407533, |
| "grad_norm": 4.713807582855225, |
| "learning_rate": 0.00012125999563181809, |
| "loss": 0.5351, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.0717918478661458, |
| "grad_norm": 11.428181648254395, |
| "learning_rate": 0.00012125989677399546, |
| "loss": 0.2465, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.07193926439153829, |
| "grad_norm": 13.697668075561523, |
| "learning_rate": 0.00012125979771302464, |
| "loss": 0.1411, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.07193926439153829, |
| "eval_1_ratio_diff": 0.03897116134060796, |
| "eval_accuracy": 0.8846453624318005, |
| "eval_f1": 0.8888888888888888, |
| "eval_loss": 0.38700371980667114, |
| "eval_precision": 0.8567293777134588, |
| "eval_recall": 0.9235569422776911, |
| "eval_runtime": 1440.5564, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.07208668091693078, |
| "grad_norm": 3.2123868465423584, |
| "learning_rate": 0.0001212596984489059, |
| "loss": 0.0295, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.07223409744232329, |
| "grad_norm": 0.026355383917689323, |
| "learning_rate": 0.00012125959898163965, |
| "loss": 0.0005, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.07238151396771578, |
| "grad_norm": 0.10228274017572403, |
| "learning_rate": 0.00012125949931122618, |
| "loss": 0.002, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.07252893049310828, |
| "grad_norm": 0.031520161777734756, |
| "learning_rate": 0.00012125939943766583, |
| "loss": 0.0008, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.07267634701850077, |
| "grad_norm": 0.1047026515007019, |
| "learning_rate": 0.00012125929936095894, |
| "loss": 0.0009, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.07282376354389328, |
| "grad_norm": 30.88459587097168, |
| "learning_rate": 0.00012125919908110585, |
| "loss": 2.267, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.07297118006928577, |
| "grad_norm": 0.029362376779317856, |
| "learning_rate": 0.0001212590985981069, |
| "loss": 0.0003, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.07311859659467826, |
| "grad_norm": 0.2791018784046173, |
| "learning_rate": 0.0001212589979119624, |
| "loss": 0.0017, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.07311859659467826, |
| "eval_1_ratio_diff": 0.05378020265003891, |
| "eval_accuracy": 0.877630553390491, |
| "eval_f1": 0.8837897853441895, |
| "eval_loss": 0.7231972813606262, |
| "eval_precision": 0.8408450704225352, |
| "eval_recall": 0.9313572542901716, |
| "eval_runtime": 1440.0578, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.07326601312007076, |
| "grad_norm": 0.056903205811977386, |
| "learning_rate": 0.00012125889702267272, |
| "loss": 0.0007, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.07341342964546325, |
| "grad_norm": 0.015094200149178505, |
| "learning_rate": 0.00012125879593023818, |
| "loss": 0.0002, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.07356084617085576, |
| "grad_norm": 0.6008047461509705, |
| "learning_rate": 0.00012125869463465912, |
| "loss": 0.0045, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.07370826269624825, |
| "grad_norm": 0.6626961827278137, |
| "learning_rate": 0.00012125859313593587, |
| "loss": 0.004, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.07385567922164074, |
| "grad_norm": 0.009313930757343769, |
| "learning_rate": 0.0001212584914340688, |
| "loss": 0.0002, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.07400309574703325, |
| "grad_norm": 0.01076335646212101, |
| "learning_rate": 0.00012125838952905822, |
| "loss": 0.0004, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.07415051227242574, |
| "grad_norm": 0.008014670573174953, |
| "learning_rate": 0.00012125828742090447, |
| "loss": 0.0001, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.07429792879781824, |
| "grad_norm": 33.344932556152344, |
| "learning_rate": 0.00012125818510960795, |
| "loss": 2.0841, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.07429792879781824, |
| "eval_1_ratio_diff": 0.08261886204208879, |
| "eval_accuracy": 0.8643803585346843, |
| "eval_f1": 0.8746397694524496, |
| "eval_loss": 0.954525887966156, |
| "eval_precision": 0.8125836680053548, |
| "eval_recall": 0.9469578783151326, |
| "eval_runtime": 1439.8312, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.07444534532321073, |
| "grad_norm": 0.009119726717472076, |
| "learning_rate": 0.00012125808259516893, |
| "loss": 2.2253, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.07459276184860322, |
| "grad_norm": 0.08696369081735611, |
| "learning_rate": 0.00012125797987758778, |
| "loss": 0.0009, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.07474017837399573, |
| "grad_norm": 26.136661529541016, |
| "learning_rate": 0.00012125787695686484, |
| "loss": 1.5774, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.07488759489938822, |
| "grad_norm": 32.24976348876953, |
| "learning_rate": 0.00012125777383300048, |
| "loss": 1.1735, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.07503501142478072, |
| "grad_norm": 0.5457736253738403, |
| "learning_rate": 0.00012125767050599501, |
| "loss": 0.0112, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.07518242795017321, |
| "grad_norm": 0.7166759967803955, |
| "learning_rate": 0.0001212575669758488, |
| "loss": 0.2859, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.0753298444755657, |
| "grad_norm": 0.32718005776405334, |
| "learning_rate": 0.00012125746324256221, |
| "loss": 1.5148, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.07547726100095821, |
| "grad_norm": 88.85284423828125, |
| "learning_rate": 0.00012125735930613554, |
| "loss": 2.6444, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.07547726100095821, |
| "eval_1_ratio_diff": 0.024162120031176904, |
| "eval_accuracy": 0.8838659392049883, |
| "eval_f1": 0.8865194211728865, |
| "eval_loss": 0.48203912377357483, |
| "eval_precision": 0.8660714285714286, |
| "eval_recall": 0.9079563182527302, |
| "eval_runtime": 1439.7938, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.0756246775263507, |
| "grad_norm": 0.2224024385213852, |
| "learning_rate": 0.00012125725516656918, |
| "loss": 0.0068, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.0757720940517432, |
| "grad_norm": 0.2110309898853302, |
| "learning_rate": 0.00012125715082386346, |
| "loss": 0.0036, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.0759195105771357, |
| "grad_norm": 0.20480689406394958, |
| "learning_rate": 0.00012125704627801874, |
| "loss": 0.0101, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.07606692710252819, |
| "grad_norm": 24.321718215942383, |
| "learning_rate": 0.00012125694152903538, |
| "loss": 2.3569, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.07621434362792069, |
| "grad_norm": 0.3324243426322937, |
| "learning_rate": 0.00012125683657691368, |
| "loss": 0.0101, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.07636176015331318, |
| "grad_norm": 1.0518757104873657, |
| "learning_rate": 0.00012125673142165406, |
| "loss": 0.013, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.07650917667870569, |
| "grad_norm": 25.96786880493164, |
| "learning_rate": 0.00012125662606325683, |
| "loss": 1.3031, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.07665659320409818, |
| "grad_norm": 0.12808893620967865, |
| "learning_rate": 0.00012125652050172236, |
| "loss": 0.0051, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.07665659320409818, |
| "eval_1_ratio_diff": -0.05689789555728764, |
| "eval_accuracy": 0.8698363211223694, |
| "eval_f1": 0.8618693134822167, |
| "eval_loss": 0.5904788970947266, |
| "eval_precision": 0.9172535211267606, |
| "eval_recall": 0.8127925117004681, |
| "eval_runtime": 1440.2519, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.07680400972949068, |
| "grad_norm": 30.74445343017578, |
| "learning_rate": 0.00012125641473705098, |
| "loss": 1.479, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.07695142625488317, |
| "grad_norm": 0.33330148458480835, |
| "learning_rate": 0.00012125630876924309, |
| "loss": 2.7544, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.07709884278027566, |
| "grad_norm": 0.4779714047908783, |
| "learning_rate": 0.00012125620259829898, |
| "loss": 0.0059, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.07724625930566817, |
| "grad_norm": 0.4376041889190674, |
| "learning_rate": 0.00012125609622421907, |
| "loss": 0.0273, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.07739367583106066, |
| "grad_norm": 0.14147210121154785, |
| "learning_rate": 0.00012125598964700367, |
| "loss": 1.3617, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.07754109235645316, |
| "grad_norm": 20.751298904418945, |
| "learning_rate": 0.00012125588286665319, |
| "loss": 2.4864, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.07768850888184566, |
| "grad_norm": 0.3589191138744354, |
| "learning_rate": 0.00012125577588316793, |
| "loss": 0.0102, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.07783592540723815, |
| "grad_norm": 31.519622802734375, |
| "learning_rate": 0.00012125566869654828, |
| "loss": 1.8161, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.07783592540723815, |
| "eval_1_ratio_diff": -0.05455962587685115, |
| "eval_accuracy": 0.8472330475448169, |
| "eval_f1": 0.8382838283828383, |
| "eval_loss": 0.5423593521118164, |
| "eval_precision": 0.8896672504378283, |
| "eval_recall": 0.7925117004680188, |
| "eval_runtime": 1440.6162, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.07798334193263065, |
| "grad_norm": 29.618946075439453, |
| "learning_rate": 0.00012125556130679457, |
| "loss": 0.9249, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.07813075845802314, |
| "grad_norm": 24.92931365966797, |
| "learning_rate": 0.0001212554537139072, |
| "loss": 1.3237, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.07827817498341565, |
| "grad_norm": 6.922366142272949, |
| "learning_rate": 0.00012125534591788653, |
| "loss": 0.0954, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.07842559150880814, |
| "grad_norm": 1.4033849239349365, |
| "learning_rate": 0.00012125523791873287, |
| "loss": 0.1059, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.07857300803420063, |
| "grad_norm": 0.5430750846862793, |
| "learning_rate": 0.00012125512971644664, |
| "loss": 0.0167, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.07872042455959313, |
| "grad_norm": 25.31169319152832, |
| "learning_rate": 0.00012125502131102817, |
| "loss": 1.4498, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.07886784108498562, |
| "grad_norm": 24.96006965637207, |
| "learning_rate": 0.00012125491270247783, |
| "loss": 1.3258, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.07901525761037813, |
| "grad_norm": 1.3635300397872925, |
| "learning_rate": 0.000121254803890796, |
| "loss": 0.0339, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.07901525761037813, |
| "eval_1_ratio_diff": -0.029618082618862063, |
| "eval_accuracy": 0.8581449727201871, |
| "eval_f1": 0.8536977491961415, |
| "eval_loss": 0.6108663082122803, |
| "eval_precision": 0.8805970149253731, |
| "eval_recall": 0.828393135725429, |
| "eval_runtime": 1440.3267, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.07916267413577062, |
| "grad_norm": 41.34056091308594, |
| "learning_rate": 0.00012125469487598301, |
| "loss": 1.665, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.07931009066116311, |
| "grad_norm": 0.8467972278594971, |
| "learning_rate": 0.00012125458565803925, |
| "loss": 0.0102, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.07945750718655561, |
| "grad_norm": 0.09642868489027023, |
| "learning_rate": 0.00012125447623696508, |
| "loss": 0.0053, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.0796049237119481, |
| "grad_norm": 0.06861916184425354, |
| "learning_rate": 0.00012125436661276089, |
| "loss": 0.5701, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.07975234023734061, |
| "grad_norm": 22.139467239379883, |
| "learning_rate": 0.000121254256785427, |
| "loss": 1.088, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.0798997567627331, |
| "grad_norm": 0.037754353135824203, |
| "learning_rate": 0.00012125414675496381, |
| "loss": 0.0046, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.08004717328812559, |
| "grad_norm": 32.785037994384766, |
| "learning_rate": 0.00012125403652137169, |
| "loss": 0.956, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.0801945898135181, |
| "grad_norm": 21.96536636352539, |
| "learning_rate": 0.000121253926084651, |
| "loss": 3.1959, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.0801945898135181, |
| "eval_1_ratio_diff": -0.04364770070148094, |
| "eval_accuracy": 0.8659392049883087, |
| "eval_f1": 0.8597063621533442, |
| "eval_loss": 0.6232408285140991, |
| "eval_precision": 0.9008547008547009, |
| "eval_recall": 0.8221528861154446, |
| "eval_runtime": 1440.7525, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.08034200633891059, |
| "grad_norm": 0.09767896682024002, |
| "learning_rate": 0.00012125381544480211, |
| "loss": 1.9769, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.08048942286430309, |
| "grad_norm": 4.43467378616333, |
| "learning_rate": 0.0001212537046018254, |
| "loss": 1.8357, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.08063683938969558, |
| "grad_norm": 22.267379760742188, |
| "learning_rate": 0.00012125359355572121, |
| "loss": 0.9555, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.08078425591508809, |
| "grad_norm": 0.7281066179275513, |
| "learning_rate": 0.00012125348230648997, |
| "loss": 0.0391, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.08093167244048058, |
| "grad_norm": 0.4542294442653656, |
| "learning_rate": 0.000121253370854132, |
| "loss": 0.9409, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.08107908896587307, |
| "grad_norm": 22.38312339782715, |
| "learning_rate": 0.0001212532591986477, |
| "loss": 1.0271, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.08122650549126557, |
| "grad_norm": 26.339080810546875, |
| "learning_rate": 0.00012125314734003743, |
| "loss": 1.4919, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.08137392201665807, |
| "grad_norm": 3.4797956943511963, |
| "learning_rate": 0.00012125303527830157, |
| "loss": 0.0342, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.08137392201665807, |
| "eval_1_ratio_diff": -0.15354637568199536, |
| "eval_accuracy": 0.7887763055339049, |
| "eval_f1": 0.7502304147465437, |
| "eval_loss": 0.8048840761184692, |
| "eval_precision": 0.9166666666666666, |
| "eval_recall": 0.6349453978159126, |
| "eval_runtime": 1440.2595, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.08152133854205057, |
| "grad_norm": 27.387937545776367, |
| "learning_rate": 0.0001212529230134405, |
| "loss": 2.2878, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.08166875506744306, |
| "grad_norm": 0.27098074555397034, |
| "learning_rate": 0.00012125281054545459, |
| "loss": 0.0115, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.08181617159283555, |
| "grad_norm": 0.17622074484825134, |
| "learning_rate": 0.00012125269787434425, |
| "loss": 0.9066, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.08196358811822806, |
| "grad_norm": 13.168516159057617, |
| "learning_rate": 0.00012125258500010979, |
| "loss": 1.0468, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.08211100464362055, |
| "grad_norm": 15.512298583984375, |
| "learning_rate": 0.00012125247192275165, |
| "loss": 0.1845, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.08225842116901305, |
| "grad_norm": 31.888328552246094, |
| "learning_rate": 0.00012125235864227018, |
| "loss": 0.3136, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.08240583769440554, |
| "grad_norm": 9.891843795776367, |
| "learning_rate": 0.00012125224515866574, |
| "loss": 0.8436, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.08255325421979803, |
| "grad_norm": 46.16787338256836, |
| "learning_rate": 0.00012125213147193877, |
| "loss": 2.5811, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.08255325421979803, |
| "eval_1_ratio_diff": 0.07560405300077944, |
| "eval_accuracy": 0.848012470771629, |
| "eval_f1": 0.8585931834662799, |
| "eval_loss": 0.6410078406333923, |
| "eval_precision": 0.8021680216802168, |
| "eval_recall": 0.9235569422776911, |
| "eval_runtime": 1440.0548, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.08270067074519054, |
| "grad_norm": 7.604285717010498, |
| "learning_rate": 0.00012125201758208962, |
| "loss": 1.1177, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.08284808727058303, |
| "grad_norm": 0.39338427782058716, |
| "learning_rate": 0.00012125190348911864, |
| "loss": 1.5911, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.08299550379597553, |
| "grad_norm": 36.94788360595703, |
| "learning_rate": 0.00012125178919302626, |
| "loss": 1.3629, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.08314292032136802, |
| "grad_norm": 0.8372169137001038, |
| "learning_rate": 0.00012125167469381283, |
| "loss": 0.0102, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.08329033684676052, |
| "grad_norm": 0.12225531786680222, |
| "learning_rate": 0.00012125155999147876, |
| "loss": 0.0043, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.08343775337215302, |
| "grad_norm": 62.011695861816406, |
| "learning_rate": 0.0001212514450860244, |
| "loss": 1.6697, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.08358516989754551, |
| "grad_norm": 0.02834857441484928, |
| "learning_rate": 0.00012125132997745018, |
| "loss": 0.0044, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.08373258642293802, |
| "grad_norm": 0.007508635055273771, |
| "learning_rate": 0.00012125121466575647, |
| "loss": 0.0067, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.08373258642293802, |
| "eval_1_ratio_diff": -0.059236165237724125, |
| "eval_accuracy": 0.8534684333593141, |
| "eval_f1": 0.8441127694859039, |
| "eval_loss": 0.8524520993232727, |
| "eval_precision": 0.9008849557522124, |
| "eval_recall": 0.7940717628705148, |
| "eval_runtime": 1440.1118, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.0838800029483305, |
| "grad_norm": 70.38623046875, |
| "learning_rate": 0.00012125109915094362, |
| "loss": 2.694, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.08402741947372301, |
| "grad_norm": 1.548732042312622, |
| "learning_rate": 0.00012125098343301206, |
| "loss": 0.0213, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.0841748359991155, |
| "grad_norm": 1.2770323753356934, |
| "learning_rate": 0.00012125086751196217, |
| "loss": 0.0099, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.08432225252450799, |
| "grad_norm": 30.610591888427734, |
| "learning_rate": 0.00012125075138779432, |
| "loss": 2.0352, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.0844696690499005, |
| "grad_norm": 0.8128361701965332, |
| "learning_rate": 0.0001212506350605089, |
| "loss": 1.0719, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.08461708557529299, |
| "grad_norm": 1.6853057146072388, |
| "learning_rate": 0.00012125051853010634, |
| "loss": 0.0092, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.0847645021006855, |
| "grad_norm": 39.670047760009766, |
| "learning_rate": 0.000121250401796587, |
| "loss": 1.7653, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.08491191862607798, |
| "grad_norm": 36.04311752319336, |
| "learning_rate": 0.00012125028485995127, |
| "loss": 1.3473, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.08491191862607798, |
| "eval_1_ratio_diff": 0.03897116134060796, |
| "eval_accuracy": 0.8737334372564303, |
| "eval_f1": 0.8783783783783784, |
| "eval_loss": 0.6749188899993896, |
| "eval_precision": 0.8465991316931982, |
| "eval_recall": 0.9126365054602185, |
| "eval_runtime": 1440.4215, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.08505933515147047, |
| "grad_norm": 0.17764577269554138, |
| "learning_rate": 0.00012125016772019952, |
| "loss": 0.0023, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.08520675167686298, |
| "grad_norm": 0.3527587652206421, |
| "learning_rate": 0.0001212500503773322, |
| "loss": 0.0055, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.08535416820225547, |
| "grad_norm": 0.1379138082265854, |
| "learning_rate": 0.00012124993283134963, |
| "loss": 1.6429, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.08550158472764798, |
| "grad_norm": 0.14264832437038422, |
| "learning_rate": 0.0001212498150822523, |
| "loss": 0.0089, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.08564900125304047, |
| "grad_norm": 30.086095809936523, |
| "learning_rate": 0.00012124969713004051, |
| "loss": 2.4261, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.08579641777843296, |
| "grad_norm": 0.26527953147888184, |
| "learning_rate": 0.00012124957897471469, |
| "loss": 0.6917, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.08594383430382546, |
| "grad_norm": 8.70952320098877, |
| "learning_rate": 0.00012124946061627526, |
| "loss": 0.0826, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.08609125082921795, |
| "grad_norm": 0.031940966844558716, |
| "learning_rate": 0.0001212493420547226, |
| "loss": 0.0008, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.08609125082921795, |
| "eval_1_ratio_diff": -0.010132501948558081, |
| "eval_accuracy": 0.8636009353078722, |
| "eval_f1": 0.8620961386918834, |
| "eval_loss": 0.5565428137779236, |
| "eval_precision": 0.8710191082802548, |
| "eval_recall": 0.8533541341653667, |
| "eval_runtime": 1440.6772, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.08623866735461046, |
| "grad_norm": 1.1032943725585938, |
| "learning_rate": 0.0001212492232900571, |
| "loss": 0.0135, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.08638608388000295, |
| "grad_norm": 0.6731190085411072, |
| "learning_rate": 0.00012124910432227916, |
| "loss": 0.0145, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.08653350040539544, |
| "grad_norm": 0.2941815257072449, |
| "learning_rate": 0.00012124898515138918, |
| "loss": 0.005, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.08668091693078794, |
| "grad_norm": 0.060058582574129105, |
| "learning_rate": 0.00012124886577738757, |
| "loss": 0.0024, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.08682833345618043, |
| "grad_norm": 0.029819436371326447, |
| "learning_rate": 0.0001212487462002747, |
| "loss": 0.0015, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.08697574998157294, |
| "grad_norm": 0.1549704670906067, |
| "learning_rate": 0.000121248626420051, |
| "loss": 0.0023, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.08712316650696543, |
| "grad_norm": 1.1005401611328125, |
| "learning_rate": 0.00012124850643671686, |
| "loss": 0.0065, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.08727058303235792, |
| "grad_norm": 200.2630157470703, |
| "learning_rate": 0.00012124838625027271, |
| "loss": 0.7416, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.08727058303235792, |
| "eval_1_ratio_diff": 0.04520654715510519, |
| "eval_accuracy": 0.8752922837100545, |
| "eval_f1": 0.8805970149253731, |
| "eval_loss": 0.8647755980491638, |
| "eval_precision": 0.844062947067239, |
| "eval_recall": 0.9204368174726989, |
| "eval_runtime": 1441.0897, |
| "eval_samples_per_second": 0.89, |
| "eval_steps_per_second": 0.445, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.08741799955775043, |
| "grad_norm": 0.012469271197915077, |
| "learning_rate": 0.0001212482658607189, |
| "loss": 0.0003, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.08756541608314292, |
| "grad_norm": 0.017095841467380524, |
| "learning_rate": 0.00012124814526805586, |
| "loss": 0.0003, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.08771283260853542, |
| "grad_norm": 23.186222076416016, |
| "learning_rate": 0.00012124802447228401, |
| "loss": 2.0149, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.08786024913392791, |
| "grad_norm": 0.010486994870007038, |
| "learning_rate": 0.00012124790347340374, |
| "loss": 0.0006, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.08800766565932042, |
| "grad_norm": 40.754051208496094, |
| "learning_rate": 0.00012124778227141545, |
| "loss": 2.8077, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.08815508218471291, |
| "grad_norm": 0.08611409366130829, |
| "learning_rate": 0.00012124766086631955, |
| "loss": 0.0013, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.0883024987101054, |
| "grad_norm": 0.28396108746528625, |
| "learning_rate": 0.00012124753925811646, |
| "loss": 2.2785, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.0884499152354979, |
| "grad_norm": 0.03215723857283592, |
| "learning_rate": 0.00012124741744680656, |
| "loss": 0.0026, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.0884499152354979, |
| "eval_1_ratio_diff": 0.04598597038191732, |
| "eval_accuracy": 0.877630553390491, |
| "eval_f1": 0.8829231916480239, |
| "eval_loss": 0.7880816459655762, |
| "eval_precision": 0.8457142857142858, |
| "eval_recall": 0.9235569422776911, |
| "eval_runtime": 1441.3958, |
| "eval_samples_per_second": 0.89, |
| "eval_steps_per_second": 0.445, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.0885973317608904, |
| "grad_norm": 0.03621472418308258, |
| "learning_rate": 0.00012124729543239029, |
| "loss": 0.7748, |
| "step": 1202 |
| }, |
| { |
| "epoch": 0.0887447482862829, |
| "grad_norm": 0.09097783267498016, |
| "learning_rate": 0.00012124717321486803, |
| "loss": 1.8821, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.08889216481167539, |
| "grad_norm": 0.3395259976387024, |
| "learning_rate": 0.00012124705079424022, |
| "loss": 0.0073, |
| "step": 1206 |
| }, |
| { |
| "epoch": 0.08903958133706788, |
| "grad_norm": 0.04736631363630295, |
| "learning_rate": 0.00012124692817050723, |
| "loss": 1.567, |
| "step": 1208 |
| }, |
| { |
| "epoch": 0.08918699786246038, |
| "grad_norm": 0.08807298541069031, |
| "learning_rate": 0.00012124680534366952, |
| "loss": 0.0014, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.08933441438785288, |
| "grad_norm": 0.05549991875886917, |
| "learning_rate": 0.00012124668231372745, |
| "loss": 0.0021, |
| "step": 1212 |
| }, |
| { |
| "epoch": 0.08948183091324538, |
| "grad_norm": 0.06815358251333237, |
| "learning_rate": 0.00012124655908068146, |
| "loss": 0.9174, |
| "step": 1214 |
| }, |
| { |
| "epoch": 0.08962924743863787, |
| "grad_norm": 0.03639994189143181, |
| "learning_rate": 0.00012124643564453199, |
| "loss": 0.0199, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.08962924743863787, |
| "eval_1_ratio_diff": 0.020265003897116163, |
| "eval_accuracy": 0.8628215120810601, |
| "eval_f1": 0.8654434250764526, |
| "eval_loss": 0.648876965045929, |
| "eval_precision": 0.848575712143928, |
| "eval_recall": 0.8829953198127926, |
| "eval_runtime": 1441.2405, |
| "eval_samples_per_second": 0.89, |
| "eval_steps_per_second": 0.445, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.08977666396403036, |
| "grad_norm": 23.13437271118164, |
| "learning_rate": 0.00012124631200527941, |
| "loss": 1.6889, |
| "step": 1218 |
| }, |
| { |
| "epoch": 0.08992408048942287, |
| "grad_norm": 0.2734740674495697, |
| "learning_rate": 0.00012124618816292414, |
| "loss": 0.0059, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.09007149701481536, |
| "grad_norm": 12.39369010925293, |
| "learning_rate": 0.00012124606411746661, |
| "loss": 0.0533, |
| "step": 1222 |
| }, |
| { |
| "epoch": 0.09021891354020786, |
| "grad_norm": 0.036048658192157745, |
| "learning_rate": 0.00012124593986890722, |
| "loss": 0.0011, |
| "step": 1224 |
| }, |
| { |
| "epoch": 0.09036633006560035, |
| "grad_norm": 0.3171124756336212, |
| "learning_rate": 0.00012124581541724642, |
| "loss": 1.5207, |
| "step": 1226 |
| }, |
| { |
| "epoch": 0.09051374659099284, |
| "grad_norm": 4.317696571350098, |
| "learning_rate": 0.00012124569076248459, |
| "loss": 1.5358, |
| "step": 1228 |
| }, |
| { |
| "epoch": 0.09066116311638535, |
| "grad_norm": 0.12044669687747955, |
| "learning_rate": 0.00012124556590462215, |
| "loss": 0.0053, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.09080857964177784, |
| "grad_norm": 0.21298988163471222, |
| "learning_rate": 0.00012124544084365953, |
| "loss": 0.0081, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.09080857964177784, |
| "eval_1_ratio_diff": 0.009353078721745844, |
| "eval_accuracy": 0.8222915042868277, |
| "eval_f1": 0.8238021638330757, |
| "eval_loss": 0.7862046360969543, |
| "eval_precision": 0.8162327718223583, |
| "eval_recall": 0.8315132605304212, |
| "eval_runtime": 1439.2896, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.09095599616717034, |
| "grad_norm": 20.541194915771484, |
| "learning_rate": 0.00012124531557959717, |
| "loss": 1.194, |
| "step": 1234 |
| }, |
| { |
| "epoch": 0.09110341269256284, |
| "grad_norm": 0.2897285223007202, |
| "learning_rate": 0.00012124519011243545, |
| "loss": 0.8952, |
| "step": 1236 |
| }, |
| { |
| "epoch": 0.09125082921795533, |
| "grad_norm": 0.08111666887998581, |
| "learning_rate": 0.0001212450644421748, |
| "loss": 0.006, |
| "step": 1238 |
| }, |
| { |
| "epoch": 0.09139824574334783, |
| "grad_norm": 0.4867294430732727, |
| "learning_rate": 0.00012124493856881568, |
| "loss": 1.7795, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.09154566226874032, |
| "grad_norm": 0.9198406934738159, |
| "learning_rate": 0.00012124481249235846, |
| "loss": 0.0259, |
| "step": 1242 |
| }, |
| { |
| "epoch": 0.09169307879413283, |
| "grad_norm": 0.08149991929531097, |
| "learning_rate": 0.0001212446862128036, |
| "loss": 1.2016, |
| "step": 1244 |
| }, |
| { |
| "epoch": 0.09184049531952532, |
| "grad_norm": 0.1457146853208542, |
| "learning_rate": 0.0001212445597301515, |
| "loss": 0.9302, |
| "step": 1246 |
| }, |
| { |
| "epoch": 0.09198791184491782, |
| "grad_norm": 0.24497820436954498, |
| "learning_rate": 0.00012124443304440259, |
| "loss": 0.0051, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.09198791184491782, |
| "eval_1_ratio_diff": 0.02260327357755254, |
| "eval_accuracy": 0.8339828526890102, |
| "eval_f1": 0.8375286041189931, |
| "eval_loss": 0.747604489326477, |
| "eval_precision": 0.8194029850746268, |
| "eval_recall": 0.8564742589703588, |
| "eval_runtime": 1440.6099, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.09213532837031031, |
| "grad_norm": 0.10772482305765152, |
| "learning_rate": 0.0001212443061555573, |
| "loss": 0.0032, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.0922827448957028, |
| "grad_norm": 3.8056480884552, |
| "learning_rate": 0.00012124417906361605, |
| "loss": 0.838, |
| "step": 1252 |
| }, |
| { |
| "epoch": 0.09243016142109531, |
| "grad_norm": 21.590364456176758, |
| "learning_rate": 0.00012124405176857927, |
| "loss": 2.5474, |
| "step": 1254 |
| }, |
| { |
| "epoch": 0.0925775779464878, |
| "grad_norm": 21.33682632446289, |
| "learning_rate": 0.00012124392427044737, |
| "loss": 2.7454, |
| "step": 1256 |
| }, |
| { |
| "epoch": 0.0927249944718803, |
| "grad_norm": 0.21534398198127747, |
| "learning_rate": 0.00012124379656922081, |
| "loss": 0.0068, |
| "step": 1258 |
| }, |
| { |
| "epoch": 0.0928724109972728, |
| "grad_norm": 20.76007843017578, |
| "learning_rate": 0.0001212436686649, |
| "loss": 1.2547, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.09301982752266529, |
| "grad_norm": 20.636024475097656, |
| "learning_rate": 0.00012124354055748535, |
| "loss": 1.5976, |
| "step": 1262 |
| }, |
| { |
| "epoch": 0.09316724404805779, |
| "grad_norm": 2.3518083095550537, |
| "learning_rate": 0.00012124341224697731, |
| "loss": 0.0369, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.09316724404805779, |
| "eval_1_ratio_diff": -0.03117692907248637, |
| "eval_accuracy": 0.8565861262665627, |
| "eval_f1": 0.8518518518518519, |
| "eval_loss": 0.43984636664390564, |
| "eval_precision": 0.8801996672212978, |
| "eval_recall": 0.8252730109204368, |
| "eval_runtime": 1440.9991, |
| "eval_samples_per_second": 0.89, |
| "eval_steps_per_second": 0.446, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.09331466057345028, |
| "grad_norm": 1.4304808378219604, |
| "learning_rate": 0.0001212432837333763, |
| "loss": 0.03, |
| "step": 1266 |
| }, |
| { |
| "epoch": 0.09346207709884279, |
| "grad_norm": 0.6885532736778259, |
| "learning_rate": 0.00012124315501668278, |
| "loss": 0.7603, |
| "step": 1268 |
| }, |
| { |
| "epoch": 0.09360949362423528, |
| "grad_norm": 0.5777420997619629, |
| "learning_rate": 0.00012124302609689715, |
| "loss": 1.1026, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.09375691014962777, |
| "grad_norm": 1.5885238647460938, |
| "learning_rate": 0.00012124289697401986, |
| "loss": 0.041, |
| "step": 1272 |
| }, |
| { |
| "epoch": 0.09390432667502027, |
| "grad_norm": 0.37640276551246643, |
| "learning_rate": 0.00012124276764805132, |
| "loss": 0.0182, |
| "step": 1274 |
| }, |
| { |
| "epoch": 0.09405174320041276, |
| "grad_norm": 25.54754066467285, |
| "learning_rate": 0.00012124263811899196, |
| "loss": 1.2952, |
| "step": 1276 |
| }, |
| { |
| "epoch": 0.09419915972580527, |
| "grad_norm": 41.04960632324219, |
| "learning_rate": 0.00012124250838684226, |
| "loss": 2.126, |
| "step": 1278 |
| }, |
| { |
| "epoch": 0.09434657625119776, |
| "grad_norm": 0.16556452214717865, |
| "learning_rate": 0.00012124237845160263, |
| "loss": 0.0078, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.09434657625119776, |
| "eval_1_ratio_diff": -0.08573655494933752, |
| "eval_accuracy": 0.8487918939984411, |
| "eval_f1": 0.8344709897610921, |
| "eval_loss": 0.6657168865203857, |
| "eval_precision": 0.9209039548022598, |
| "eval_recall": 0.7628705148205929, |
| "eval_runtime": 1440.6129, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.09449399277659025, |
| "grad_norm": 17.6622314453125, |
| "learning_rate": 0.00012124224831327347, |
| "loss": 0.1561, |
| "step": 1282 |
| }, |
| { |
| "epoch": 0.09464140930198275, |
| "grad_norm": 0.15980716049671173, |
| "learning_rate": 0.00012124211797185528, |
| "loss": 0.0042, |
| "step": 1284 |
| }, |
| { |
| "epoch": 0.09478882582737524, |
| "grad_norm": 0.04221845418214798, |
| "learning_rate": 0.00012124198742734845, |
| "loss": 1.4535, |
| "step": 1286 |
| }, |
| { |
| "epoch": 0.09493624235276775, |
| "grad_norm": 0.056126296520233154, |
| "learning_rate": 0.00012124185667975342, |
| "loss": 0.0031, |
| "step": 1288 |
| }, |
| { |
| "epoch": 0.09508365887816024, |
| "grad_norm": 0.08041621744632721, |
| "learning_rate": 0.00012124172572907067, |
| "loss": 0.0018, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.09523107540355275, |
| "grad_norm": 28.64826011657715, |
| "learning_rate": 0.00012124159457530059, |
| "loss": 1.6516, |
| "step": 1292 |
| }, |
| { |
| "epoch": 0.09537849192894524, |
| "grad_norm": 0.31489408016204834, |
| "learning_rate": 0.00012124146321844365, |
| "loss": 0.0038, |
| "step": 1294 |
| }, |
| { |
| "epoch": 0.09552590845433773, |
| "grad_norm": 1.7656670808792114, |
| "learning_rate": 0.00012124133165850026, |
| "loss": 0.0131, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.09552590845433773, |
| "eval_1_ratio_diff": 0.04832424006235381, |
| "eval_accuracy": 0.8176149649259548, |
| "eval_f1": 0.8258928571428571, |
| "eval_loss": 0.8926898241043091, |
| "eval_precision": 0.7894736842105263, |
| "eval_recall": 0.8658346333853354, |
| "eval_runtime": 1440.8824, |
| "eval_samples_per_second": 0.89, |
| "eval_steps_per_second": 0.446, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.09567332497973023, |
| "grad_norm": 0.3228819668292999, |
| "learning_rate": 0.00012124119989547089, |
| "loss": 0.8356, |
| "step": 1298 |
| }, |
| { |
| "epoch": 0.09582074150512272, |
| "grad_norm": 58.03204345703125, |
| "learning_rate": 0.00012124106792935597, |
| "loss": 0.5161, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.09596815803051523, |
| "grad_norm": 0.094666488468647, |
| "learning_rate": 0.00012124093576015595, |
| "loss": 0.0014, |
| "step": 1302 |
| }, |
| { |
| "epoch": 0.09611557455590772, |
| "grad_norm": 0.054852358996868134, |
| "learning_rate": 0.00012124080338787127, |
| "loss": 0.0025, |
| "step": 1304 |
| }, |
| { |
| "epoch": 0.09626299108130021, |
| "grad_norm": 2.4614083766937256, |
| "learning_rate": 0.00012124067081250235, |
| "loss": 0.0231, |
| "step": 1306 |
| }, |
| { |
| "epoch": 0.09641040760669271, |
| "grad_norm": 0.13067440688610077, |
| "learning_rate": 0.00012124053803404966, |
| "loss": 0.0019, |
| "step": 1308 |
| }, |
| { |
| "epoch": 0.0965578241320852, |
| "grad_norm": 0.05831296741962433, |
| "learning_rate": 0.00012124040505251365, |
| "loss": 1.1599, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.09670524065747771, |
| "grad_norm": 22.675302505493164, |
| "learning_rate": 0.00012124027186789477, |
| "loss": 1.7971, |
| "step": 1312 |
| }, |
| { |
| "epoch": 0.09670524065747771, |
| "eval_1_ratio_diff": -0.07638347622759162, |
| "eval_accuracy": 0.8207326578332035, |
| "eval_f1": 0.8057432432432432, |
| "eval_loss": 0.9711058735847473, |
| "eval_precision": 0.8784530386740331, |
| "eval_recall": 0.7441497659906396, |
| "eval_runtime": 1440.5355, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 1312 |
| }, |
| { |
| "epoch": 0.0968526571828702, |
| "grad_norm": 245.76840209960938, |
| "learning_rate": 0.00012124013848019342, |
| "loss": 2.4617, |
| "step": 1314 |
| }, |
| { |
| "epoch": 0.09700007370826269, |
| "grad_norm": 21.968021392822266, |
| "learning_rate": 0.00012124000488941008, |
| "loss": 1.4503, |
| "step": 1316 |
| }, |
| { |
| "epoch": 0.0971474902336552, |
| "grad_norm": 0.03653848171234131, |
| "learning_rate": 0.00012123987109554522, |
| "loss": 0.0015, |
| "step": 1318 |
| }, |
| { |
| "epoch": 0.09729490675904769, |
| "grad_norm": 0.16115568578243256, |
| "learning_rate": 0.00012123973709859925, |
| "loss": 0.0201, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.09744232328444019, |
| "grad_norm": 34.74784851074219, |
| "learning_rate": 0.00012123960289857264, |
| "loss": 1.092, |
| "step": 1322 |
| }, |
| { |
| "epoch": 0.09758973980983268, |
| "grad_norm": 17.326068878173828, |
| "learning_rate": 0.00012123946849546582, |
| "loss": 0.0826, |
| "step": 1324 |
| }, |
| { |
| "epoch": 0.09773715633522517, |
| "grad_norm": 22.532522201538086, |
| "learning_rate": 0.00012123933388927926, |
| "loss": 2.0905, |
| "step": 1326 |
| }, |
| { |
| "epoch": 0.09788457286061768, |
| "grad_norm": 0.09820098429918289, |
| "learning_rate": 0.0001212391990800134, |
| "loss": 0.002, |
| "step": 1328 |
| }, |
| { |
| "epoch": 0.09788457286061768, |
| "eval_1_ratio_diff": 0.05689789555728764, |
| "eval_accuracy": 0.8106001558846454, |
| "eval_f1": 0.8206642066420664, |
| "eval_loss": 0.7345473170280457, |
| "eval_precision": 0.7787114845938375, |
| "eval_recall": 0.8673946957878315, |
| "eval_runtime": 1439.7279, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 1328 |
| }, |
| { |
| "epoch": 0.09803198938601017, |
| "grad_norm": 0.11757276207208633, |
| "learning_rate": 0.00012123906406766871, |
| "loss": 0.0079, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.09817940591140267, |
| "grad_norm": 24.76763153076172, |
| "learning_rate": 0.00012123892885224563, |
| "loss": 1.3389, |
| "step": 1332 |
| }, |
| { |
| "epoch": 0.09832682243679516, |
| "grad_norm": 0.0959400087594986, |
| "learning_rate": 0.0001212387934337446, |
| "loss": 0.9421, |
| "step": 1334 |
| }, |
| { |
| "epoch": 0.09847423896218765, |
| "grad_norm": 0.3935282826423645, |
| "learning_rate": 0.00012123865781216609, |
| "loss": 0.0104, |
| "step": 1336 |
| }, |
| { |
| "epoch": 0.09862165548758016, |
| "grad_norm": 22.505558013916016, |
| "learning_rate": 0.00012123852198751054, |
| "loss": 0.7555, |
| "step": 1338 |
| }, |
| { |
| "epoch": 0.09876907201297265, |
| "grad_norm": 1.3673774003982544, |
| "learning_rate": 0.00012123838595977844, |
| "loss": 0.0409, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.09891648853836515, |
| "grad_norm": 0.6889051198959351, |
| "learning_rate": 0.0001212382497289702, |
| "loss": 0.0269, |
| "step": 1342 |
| }, |
| { |
| "epoch": 0.09906390506375765, |
| "grad_norm": 0.2218835949897766, |
| "learning_rate": 0.0001212381132950863, |
| "loss": 0.9572, |
| "step": 1344 |
| }, |
| { |
| "epoch": 0.09906390506375765, |
| "eval_1_ratio_diff": 0.07560405300077944, |
| "eval_accuracy": 0.8277474668745128, |
| "eval_f1": 0.8397389412617839, |
| "eval_loss": 0.7541435360908508, |
| "eval_precision": 0.7845528455284553, |
| "eval_recall": 0.9032761310452418, |
| "eval_runtime": 1440.149, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 1344 |
| }, |
| { |
| "epoch": 0.09921132158915015, |
| "grad_norm": 0.08860000967979431, |
| "learning_rate": 0.0001212379766581272, |
| "loss": 0.0038, |
| "step": 1346 |
| }, |
| { |
| "epoch": 0.09935873811454264, |
| "grad_norm": 0.1549777388572693, |
| "learning_rate": 0.00012123783981809338, |
| "loss": 0.6904, |
| "step": 1348 |
| }, |
| { |
| "epoch": 0.09950615463993513, |
| "grad_norm": 0.4857753813266754, |
| "learning_rate": 0.00012123770277498524, |
| "loss": 0.005, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.09965357116532764, |
| "grad_norm": 0.5475670099258423, |
| "learning_rate": 0.00012123756552880328, |
| "loss": 0.0057, |
| "step": 1352 |
| }, |
| { |
| "epoch": 0.09980098769072013, |
| "grad_norm": 0.8644952178001404, |
| "learning_rate": 0.00012123742807954794, |
| "loss": 2.7045, |
| "step": 1354 |
| }, |
| { |
| "epoch": 0.09994840421611263, |
| "grad_norm": 0.15051943063735962, |
| "learning_rate": 0.0001212372904272197, |
| "loss": 0.7707, |
| "step": 1356 |
| }, |
| { |
| "epoch": 0.10009582074150512, |
| "grad_norm": 0.04434569925069809, |
| "learning_rate": 0.00012123715257181902, |
| "loss": 0.0007, |
| "step": 1358 |
| }, |
| { |
| "epoch": 0.10024323726689761, |
| "grad_norm": 0.03767779842019081, |
| "learning_rate": 0.00012123701451334634, |
| "loss": 1.7987, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.10024323726689761, |
| "eval_1_ratio_diff": 0.10054559625876847, |
| "eval_accuracy": 0.8589243959469992, |
| "eval_f1": 0.8717221828490432, |
| "eval_loss": 0.7392542958259583, |
| "eval_precision": 0.7987012987012987, |
| "eval_recall": 0.9594383775351014, |
| "eval_runtime": 1439.9484, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.10039065379229012, |
| "grad_norm": 0.032404810190200806, |
| "learning_rate": 0.00012123687625180216, |
| "loss": 1.3724, |
| "step": 1362 |
| }, |
| { |
| "epoch": 0.10053807031768261, |
| "grad_norm": 0.02649116888642311, |
| "learning_rate": 0.00012123673778718691, |
| "loss": 1.3162, |
| "step": 1364 |
| }, |
| { |
| "epoch": 0.10068548684307511, |
| "grad_norm": 0.120023712515831, |
| "learning_rate": 0.00012123659911950106, |
| "loss": 0.0026, |
| "step": 1366 |
| }, |
| { |
| "epoch": 0.1008329033684676, |
| "grad_norm": 0.28818804025650024, |
| "learning_rate": 0.00012123646024874507, |
| "loss": 0.0048, |
| "step": 1368 |
| }, |
| { |
| "epoch": 0.1009803198938601, |
| "grad_norm": 0.5911560654640198, |
| "learning_rate": 0.00012123632117491944, |
| "loss": 0.0142, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.1011277364192526, |
| "grad_norm": 22.85379409790039, |
| "learning_rate": 0.00012123618189802459, |
| "loss": 1.8439, |
| "step": 1372 |
| }, |
| { |
| "epoch": 0.10127515294464509, |
| "grad_norm": 0.37168049812316895, |
| "learning_rate": 0.00012123604241806102, |
| "loss": 0.0065, |
| "step": 1374 |
| }, |
| { |
| "epoch": 0.1014225694700376, |
| "grad_norm": 0.10927151888608932, |
| "learning_rate": 0.00012123590273502919, |
| "loss": 1.1801, |
| "step": 1376 |
| }, |
| { |
| "epoch": 0.1014225694700376, |
| "eval_1_ratio_diff": 0.021823850350740415, |
| "eval_accuracy": 0.8862042088854248, |
| "eval_f1": 0.8885496183206106, |
| "eval_loss": 0.5426926612854004, |
| "eval_precision": 0.8699551569506726, |
| "eval_recall": 0.9079563182527302, |
| "eval_runtime": 1440.2334, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 1376 |
| }, |
| { |
| "epoch": 0.10156998599543009, |
| "grad_norm": 0.038460321724414825, |
| "learning_rate": 0.00012123576284892955, |
| "loss": 0.0101, |
| "step": 1378 |
| }, |
| { |
| "epoch": 0.10171740252082258, |
| "grad_norm": 25.498838424682617, |
| "learning_rate": 0.00012123562275976258, |
| "loss": 1.3981, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.10186481904621508, |
| "grad_norm": 159.9862060546875, |
| "learning_rate": 0.00012123548246752878, |
| "loss": 1.2495, |
| "step": 1382 |
| }, |
| { |
| "epoch": 0.10201223557160757, |
| "grad_norm": 0.06094611436128616, |
| "learning_rate": 0.00012123534197222857, |
| "loss": 0.0046, |
| "step": 1384 |
| }, |
| { |
| "epoch": 0.10215965209700008, |
| "grad_norm": 26.12101173400879, |
| "learning_rate": 0.00012123520127386245, |
| "loss": 1.3714, |
| "step": 1386 |
| }, |
| { |
| "epoch": 0.10230706862239257, |
| "grad_norm": 48.13339614868164, |
| "learning_rate": 0.00012123506037243086, |
| "loss": 0.0869, |
| "step": 1388 |
| }, |
| { |
| "epoch": 0.10245448514778506, |
| "grad_norm": 0.5880022644996643, |
| "learning_rate": 0.00012123491926793433, |
| "loss": 0.6204, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.10260190167317756, |
| "grad_norm": 24.889034271240234, |
| "learning_rate": 0.00012123477796037328, |
| "loss": 0.9381, |
| "step": 1392 |
| }, |
| { |
| "epoch": 0.10260190167317756, |
| "eval_1_ratio_diff": 0.09664848012470773, |
| "eval_accuracy": 0.8487918939984411, |
| "eval_f1": 0.8620199146514936, |
| "eval_loss": 0.5980536937713623, |
| "eval_precision": 0.792156862745098, |
| "eval_recall": 0.9453978159126365, |
| "eval_runtime": 1440.6605, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 1392 |
| }, |
| { |
| "epoch": 0.10274931819857006, |
| "grad_norm": 23.767898559570312, |
| "learning_rate": 0.00012123463644974822, |
| "loss": 1.3434, |
| "step": 1394 |
| }, |
| { |
| "epoch": 0.10289673472396256, |
| "grad_norm": 0.05240378528833389, |
| "learning_rate": 0.0001212344947360596, |
| "loss": 0.006, |
| "step": 1396 |
| }, |
| { |
| "epoch": 0.10304415124935505, |
| "grad_norm": 0.05574984475970268, |
| "learning_rate": 0.00012123435281930789, |
| "loss": 0.0062, |
| "step": 1398 |
| }, |
| { |
| "epoch": 0.10319156777474756, |
| "grad_norm": 25.049999237060547, |
| "learning_rate": 0.00012123421069949359, |
| "loss": 0.7515, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.10333898430014005, |
| "grad_norm": 0.6514810919761658, |
| "learning_rate": 0.00012123406837661717, |
| "loss": 0.0286, |
| "step": 1402 |
| }, |
| { |
| "epoch": 0.10348640082553254, |
| "grad_norm": 25.315319061279297, |
| "learning_rate": 0.00012123392585067908, |
| "loss": 0.6189, |
| "step": 1404 |
| }, |
| { |
| "epoch": 0.10363381735092504, |
| "grad_norm": 24.714847564697266, |
| "learning_rate": 0.00012123378312167983, |
| "loss": 0.7992, |
| "step": 1406 |
| }, |
| { |
| "epoch": 0.10378123387631753, |
| "grad_norm": 21.79236602783203, |
| "learning_rate": 0.00012123364018961989, |
| "loss": 1.8653, |
| "step": 1408 |
| }, |
| { |
| "epoch": 0.10378123387631753, |
| "eval_1_ratio_diff": -0.018706157443491855, |
| "eval_accuracy": 0.8752922837100545, |
| "eval_f1": 0.8728139904610492, |
| "eval_loss": 0.573785662651062, |
| "eval_precision": 0.8897893030794165, |
| "eval_recall": 0.8564742589703588, |
| "eval_runtime": 1440.8628, |
| "eval_samples_per_second": 0.89, |
| "eval_steps_per_second": 0.446, |
| "step": 1408 |
| }, |
| { |
| "epoch": 0.10392865040171004, |
| "grad_norm": 21.346384048461914, |
| "learning_rate": 0.00012123349705449974, |
| "loss": 1.923, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.10407606692710253, |
| "grad_norm": 10.506868362426758, |
| "learning_rate": 0.00012123335371631985, |
| "loss": 0.5301, |
| "step": 1412 |
| }, |
| { |
| "epoch": 0.10422348345249502, |
| "grad_norm": 1.1288862228393555, |
| "learning_rate": 0.00012123321017508069, |
| "loss": 0.0411, |
| "step": 1414 |
| }, |
| { |
| "epoch": 0.10437089997788752, |
| "grad_norm": 0.11825437843799591, |
| "learning_rate": 0.00012123306643078279, |
| "loss": 0.0026, |
| "step": 1416 |
| }, |
| { |
| "epoch": 0.10451831650328001, |
| "grad_norm": 0.14662548899650574, |
| "learning_rate": 0.00012123292248342657, |
| "loss": 1.3863, |
| "step": 1418 |
| }, |
| { |
| "epoch": 0.10466573302867252, |
| "grad_norm": 1.1349258422851562, |
| "learning_rate": 0.00012123277833301255, |
| "loss": 0.0148, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.10481314955406501, |
| "grad_norm": 20.21559715270996, |
| "learning_rate": 0.00012123263397954121, |
| "loss": 2.3576, |
| "step": 1422 |
| }, |
| { |
| "epoch": 0.1049605660794575, |
| "grad_norm": 27.789064407348633, |
| "learning_rate": 0.00012123248942301302, |
| "loss": 1.3553, |
| "step": 1424 |
| }, |
| { |
| "epoch": 0.1049605660794575, |
| "eval_1_ratio_diff": -0.18082618862042088, |
| "eval_accuracy": 0.7833203429462198, |
| "eval_f1": 0.7352380952380952, |
| "eval_loss": 0.8213497400283813, |
| "eval_precision": 0.9437652811735942, |
| "eval_recall": 0.6021840873634945, |
| "eval_runtime": 1440.567, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 1424 |
| }, |
| { |
| "epoch": 0.10510798260485, |
| "grad_norm": 0.472825288772583, |
| "learning_rate": 0.00012123234466342849, |
| "loss": 0.0161, |
| "step": 1426 |
| }, |
| { |
| "epoch": 0.1052553991302425, |
| "grad_norm": 44.982635498046875, |
| "learning_rate": 0.00012123219970078806, |
| "loss": 0.216, |
| "step": 1428 |
| }, |
| { |
| "epoch": 0.105402815655635, |
| "grad_norm": 20.85587501525879, |
| "learning_rate": 0.00012123205453509228, |
| "loss": 1.7555, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.10555023218102749, |
| "grad_norm": 19.432729721069336, |
| "learning_rate": 0.00012123190916634158, |
| "loss": 0.9614, |
| "step": 1432 |
| }, |
| { |
| "epoch": 0.10569764870641998, |
| "grad_norm": 1.2885982990264893, |
| "learning_rate": 0.00012123176359453646, |
| "loss": 0.7221, |
| "step": 1434 |
| }, |
| { |
| "epoch": 0.10584506523181249, |
| "grad_norm": 39.255924224853516, |
| "learning_rate": 0.00012123161781967742, |
| "loss": 0.7135, |
| "step": 1436 |
| }, |
| { |
| "epoch": 0.10599248175720498, |
| "grad_norm": 1.8398678302764893, |
| "learning_rate": 0.00012123147184176495, |
| "loss": 1.7681, |
| "step": 1438 |
| }, |
| { |
| "epoch": 0.10613989828259748, |
| "grad_norm": 0.04480309039354324, |
| "learning_rate": 0.00012123132566079952, |
| "loss": 0.0198, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.10613989828259748, |
| "eval_1_ratio_diff": 0.013250194855806696, |
| "eval_accuracy": 0.8495713172252534, |
| "eval_f1": 0.8514241724403387, |
| "eval_loss": 0.5520654916763306, |
| "eval_precision": 0.8404255319148937, |
| "eval_recall": 0.8627145085803433, |
| "eval_runtime": 1441.2669, |
| "eval_samples_per_second": 0.89, |
| "eval_steps_per_second": 0.445, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.10628731480798997, |
| "grad_norm": 0.10228992253541946, |
| "learning_rate": 0.00012123117927678164, |
| "loss": 0.0767, |
| "step": 1442 |
| }, |
| { |
| "epoch": 0.10643473133338248, |
| "grad_norm": 0.14043979346752167, |
| "learning_rate": 0.0001212310326897118, |
| "loss": 0.062, |
| "step": 1444 |
| }, |
| { |
| "epoch": 0.10658214785877497, |
| "grad_norm": 18.650835037231445, |
| "learning_rate": 0.00012123088589959048, |
| "loss": 0.5735, |
| "step": 1446 |
| }, |
| { |
| "epoch": 0.10672956438416746, |
| "grad_norm": 18.65635871887207, |
| "learning_rate": 0.00012123073890641816, |
| "loss": 0.697, |
| "step": 1448 |
| }, |
| { |
| "epoch": 0.10687698090955997, |
| "grad_norm": 24.889253616333008, |
| "learning_rate": 0.00012123059171019538, |
| "loss": 1.1449, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.10702439743495246, |
| "grad_norm": 0.32461315393447876, |
| "learning_rate": 0.00012123044431092258, |
| "loss": 0.0108, |
| "step": 1452 |
| }, |
| { |
| "epoch": 0.10717181396034496, |
| "grad_norm": 0.195255309343338, |
| "learning_rate": 0.00012123029670860029, |
| "loss": 0.0082, |
| "step": 1454 |
| }, |
| { |
| "epoch": 0.10731923048573745, |
| "grad_norm": 0.3942672312259674, |
| "learning_rate": 0.00012123014890322897, |
| "loss": 0.0278, |
| "step": 1456 |
| }, |
| { |
| "epoch": 0.10731923048573745, |
| "eval_1_ratio_diff": -0.05455962587685115, |
| "eval_accuracy": 0.8487918939984411, |
| "eval_f1": 0.8399339933993399, |
| "eval_loss": 0.6235100626945496, |
| "eval_precision": 0.8914185639229422, |
| "eval_recall": 0.7940717628705148, |
| "eval_runtime": 1441.051, |
| "eval_samples_per_second": 0.89, |
| "eval_steps_per_second": 0.446, |
| "step": 1456 |
| }, |
| { |
| "epoch": 0.10746664701112994, |
| "grad_norm": 20.8675537109375, |
| "learning_rate": 0.00012123000089480917, |
| "loss": 2.0488, |
| "step": 1458 |
| }, |
| { |
| "epoch": 0.10761406353652245, |
| "grad_norm": 19.674894332885742, |
| "learning_rate": 0.00012122985268334132, |
| "loss": 0.9135, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.10776148006191494, |
| "grad_norm": 0.16670210659503937, |
| "learning_rate": 0.00012122970426882597, |
| "loss": 0.0074, |
| "step": 1462 |
| }, |
| { |
| "epoch": 0.10790889658730744, |
| "grad_norm": 20.293106079101562, |
| "learning_rate": 0.00012122955565126358, |
| "loss": 1.0217, |
| "step": 1464 |
| }, |
| { |
| "epoch": 0.10805631311269993, |
| "grad_norm": 0.6973972916603088, |
| "learning_rate": 0.00012122940683065467, |
| "loss": 0.9069, |
| "step": 1466 |
| }, |
| { |
| "epoch": 0.10820372963809242, |
| "grad_norm": 25.440162658691406, |
| "learning_rate": 0.00012122925780699975, |
| "loss": 1.5865, |
| "step": 1468 |
| }, |
| { |
| "epoch": 0.10835114616348493, |
| "grad_norm": 4.310685157775879, |
| "learning_rate": 0.00012122910858029928, |
| "loss": 0.4176, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.10849856268887742, |
| "grad_norm": 0.3989110291004181, |
| "learning_rate": 0.00012122895915055379, |
| "loss": 1.2954, |
| "step": 1472 |
| }, |
| { |
| "epoch": 0.10849856268887742, |
| "eval_1_ratio_diff": 0.021823850350740415, |
| "eval_accuracy": 0.8581449727201871, |
| "eval_f1": 0.8610687022900764, |
| "eval_loss": 0.4833138585090637, |
| "eval_precision": 0.8430493273542601, |
| "eval_recall": 0.8798751950078003, |
| "eval_runtime": 1440.7462, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 1472 |
| }, |
| { |
| "epoch": 0.10864597921426992, |
| "grad_norm": 0.3485046923160553, |
| "learning_rate": 0.00012122880951776379, |
| "loss": 0.0092, |
| "step": 1474 |
| }, |
| { |
| "epoch": 0.10879339573966242, |
| "grad_norm": 31.38138198852539, |
| "learning_rate": 0.00012122865968192974, |
| "loss": 2.2038, |
| "step": 1476 |
| }, |
| { |
| "epoch": 0.1089408122650549, |
| "grad_norm": 0.1756962537765503, |
| "learning_rate": 0.00012122850964305218, |
| "loss": 0.0039, |
| "step": 1478 |
| }, |
| { |
| "epoch": 0.10908822879044741, |
| "grad_norm": 0.4892203211784363, |
| "learning_rate": 0.0001212283594011316, |
| "loss": 1.2883, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.1092356453158399, |
| "grad_norm": 0.38502997159957886, |
| "learning_rate": 0.00012122820895616849, |
| "loss": 0.015, |
| "step": 1482 |
| }, |
| { |
| "epoch": 0.1093830618412324, |
| "grad_norm": 0.3273461163043976, |
| "learning_rate": 0.00012122805830816339, |
| "loss": 0.0328, |
| "step": 1484 |
| }, |
| { |
| "epoch": 0.1095304783666249, |
| "grad_norm": 53.52883529663086, |
| "learning_rate": 0.00012122790745711678, |
| "loss": 1.4843, |
| "step": 1486 |
| }, |
| { |
| "epoch": 0.10967789489201739, |
| "grad_norm": 0.2854032814502716, |
| "learning_rate": 0.00012122775640302914, |
| "loss": 0.0227, |
| "step": 1488 |
| }, |
| { |
| "epoch": 0.10967789489201739, |
| "eval_1_ratio_diff": 0.014809041309431059, |
| "eval_accuracy": 0.8448947778643804, |
| "eval_f1": 0.8470407378939278, |
| "eval_loss": 0.6297035217285156, |
| "eval_precision": 0.8348484848484848, |
| "eval_recall": 0.859594383775351, |
| "eval_runtime": 1441.3865, |
| "eval_samples_per_second": 0.89, |
| "eval_steps_per_second": 0.445, |
| "step": 1488 |
| }, |
| { |
| "epoch": 0.1098253114174099, |
| "grad_norm": 0.2311754673719406, |
| "learning_rate": 0.00012122760514590104, |
| "loss": 0.0063, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.10997272794280238, |
| "grad_norm": 21.77858543395996, |
| "learning_rate": 0.00012122745368573293, |
| "loss": 1.6042, |
| "step": 1492 |
| }, |
| { |
| "epoch": 0.11012014446819489, |
| "grad_norm": 0.12185559421777725, |
| "learning_rate": 0.00012122730202252534, |
| "loss": 0.0054, |
| "step": 1494 |
| }, |
| { |
| "epoch": 0.11026756099358738, |
| "grad_norm": 0.07674361765384674, |
| "learning_rate": 0.00012122715015627879, |
| "loss": 1.2277, |
| "step": 1496 |
| }, |
| { |
| "epoch": 0.11041497751897988, |
| "grad_norm": 1.0588175058364868, |
| "learning_rate": 0.00012122699808699376, |
| "loss": 0.0121, |
| "step": 1498 |
| }, |
| { |
| "epoch": 0.11056239404437238, |
| "grad_norm": 148.854248046875, |
| "learning_rate": 0.00012122684581467078, |
| "loss": 1.6651, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.11070981056976487, |
| "grad_norm": 0.07673851400613785, |
| "learning_rate": 0.00012122669333931036, |
| "loss": 0.0037, |
| "step": 1502 |
| }, |
| { |
| "epoch": 0.11085722709515737, |
| "grad_norm": 0.14825621247291565, |
| "learning_rate": 0.00012122654066091301, |
| "loss": 0.0033, |
| "step": 1504 |
| }, |
| { |
| "epoch": 0.11085722709515737, |
| "eval_1_ratio_diff": 0.0, |
| "eval_accuracy": 0.8394388152766953, |
| "eval_f1": 0.8393135725429017, |
| "eval_loss": 0.7106738686561584, |
| "eval_precision": 0.8393135725429017, |
| "eval_recall": 0.8393135725429017, |
| "eval_runtime": 1440.7668, |
| "eval_samples_per_second": 0.89, |
| "eval_steps_per_second": 0.446, |
| "step": 1504 |
| }, |
| { |
| "epoch": 0.11100464362054986, |
| "grad_norm": 0.3325727880001068, |
| "learning_rate": 0.00012122638777947923, |
| "loss": 0.0043, |
| "step": 1506 |
| }, |
| { |
| "epoch": 0.11115206014594237, |
| "grad_norm": 0.16898727416992188, |
| "learning_rate": 0.00012122623469500956, |
| "loss": 1.3778, |
| "step": 1508 |
| }, |
| { |
| "epoch": 0.11129947667133486, |
| "grad_norm": 24.855741500854492, |
| "learning_rate": 0.00012122608140750447, |
| "loss": 1.1577, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.11144689319672735, |
| "grad_norm": 0.15268811583518982, |
| "learning_rate": 0.0001212259279169645, |
| "loss": 0.0057, |
| "step": 1512 |
| }, |
| { |
| "epoch": 0.11159430972211985, |
| "grad_norm": 37.5292854309082, |
| "learning_rate": 0.00012122577422339017, |
| "loss": 2.6301, |
| "step": 1514 |
| }, |
| { |
| "epoch": 0.11174172624751234, |
| "grad_norm": 0.23876796662807465, |
| "learning_rate": 0.000121225620326782, |
| "loss": 0.0067, |
| "step": 1516 |
| }, |
| { |
| "epoch": 0.11188914277290485, |
| "grad_norm": 0.14355158805847168, |
| "learning_rate": 0.00012122546622714046, |
| "loss": 0.0082, |
| "step": 1518 |
| }, |
| { |
| "epoch": 0.11203655929829734, |
| "grad_norm": 0.14837191998958588, |
| "learning_rate": 0.00012122531192446613, |
| "loss": 1.1954, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.11203655929829734, |
| "eval_1_ratio_diff": -0.006235385814497285, |
| "eval_accuracy": 0.8456742010911925, |
| "eval_f1": 0.8445839874411303, |
| "eval_loss": 0.5836101174354553, |
| "eval_precision": 0.8499210110584519, |
| "eval_recall": 0.8393135725429017, |
| "eval_runtime": 1440.5652, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.11218397582368983, |
| "grad_norm": 1.0671629905700684, |
| "learning_rate": 0.0001212251574187595, |
| "loss": 0.0128, |
| "step": 1522 |
| }, |
| { |
| "epoch": 0.11233139234908233, |
| "grad_norm": 22.311914443969727, |
| "learning_rate": 0.00012122500271002106, |
| "loss": 1.1378, |
| "step": 1524 |
| }, |
| { |
| "epoch": 0.11247880887447483, |
| "grad_norm": 24.98206329345703, |
| "learning_rate": 0.00012122484779825135, |
| "loss": 1.4429, |
| "step": 1526 |
| }, |
| { |
| "epoch": 0.11262622539986733, |
| "grad_norm": 0.10400061309337616, |
| "learning_rate": 0.00012122469268345093, |
| "loss": 0.8205, |
| "step": 1528 |
| }, |
| { |
| "epoch": 0.11277364192525982, |
| "grad_norm": 0.1311234086751938, |
| "learning_rate": 0.00012122453736562024, |
| "loss": 0.0052, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.11292105845065231, |
| "grad_norm": 24.459693908691406, |
| "learning_rate": 0.00012122438184475986, |
| "loss": 0.8169, |
| "step": 1532 |
| }, |
| { |
| "epoch": 0.11306847497604482, |
| "grad_norm": 0.6599878072738647, |
| "learning_rate": 0.0001212242261208703, |
| "loss": 0.0172, |
| "step": 1534 |
| }, |
| { |
| "epoch": 0.11321589150143731, |
| "grad_norm": 0.7011798024177551, |
| "learning_rate": 0.00012122407019395205, |
| "loss": 0.0101, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.11321589150143731, |
| "eval_1_ratio_diff": -0.011691348402182389, |
| "eval_accuracy": 0.852689010132502, |
| "eval_f1": 0.850828729281768, |
| "eval_loss": 0.616263747215271, |
| "eval_precision": 0.8610223642172524, |
| "eval_recall": 0.8408736349453978, |
| "eval_runtime": 1440.4808, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.11336330802682981, |
| "grad_norm": 0.11136188358068466, |
| "learning_rate": 0.00012122391406400568, |
| "loss": 0.0043, |
| "step": 1538 |
| }, |
| { |
| "epoch": 0.1135107245522223, |
| "grad_norm": 0.09410673379898071, |
| "learning_rate": 0.00012122375773103169, |
| "loss": 0.0029, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.1136581410776148, |
| "grad_norm": 0.0886264443397522, |
| "learning_rate": 0.00012122360119503061, |
| "loss": 0.0027, |
| "step": 1542 |
| }, |
| { |
| "epoch": 0.1138055576030073, |
| "grad_norm": 0.06019139662384987, |
| "learning_rate": 0.00012122344445600295, |
| "loss": 0.0012, |
| "step": 1544 |
| }, |
| { |
| "epoch": 0.11395297412839979, |
| "grad_norm": 24.27945327758789, |
| "learning_rate": 0.00012122328751394924, |
| "loss": 1.2476, |
| "step": 1546 |
| }, |
| { |
| "epoch": 0.1141003906537923, |
| "grad_norm": 0.07040827721357346, |
| "learning_rate": 0.00012122313036887001, |
| "loss": 1.05, |
| "step": 1548 |
| }, |
| { |
| "epoch": 0.11424780717918478, |
| "grad_norm": 21.743165969848633, |
| "learning_rate": 0.00012122297302076579, |
| "loss": 3.2561, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.11439522370457729, |
| "grad_norm": 0.21815018355846405, |
| "learning_rate": 0.00012122281546963711, |
| "loss": 0.0085, |
| "step": 1552 |
| }, |
| { |
| "epoch": 0.11439522370457729, |
| "eval_1_ratio_diff": 0.05845674201091189, |
| "eval_accuracy": 0.8620420888542478, |
| "eval_f1": 0.8695652173913043, |
| "eval_loss": 0.5588727593421936, |
| "eval_precision": 0.8240223463687151, |
| "eval_recall": 0.9204368174726989, |
| "eval_runtime": 1440.4266, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 1552 |
| }, |
| { |
| "epoch": 0.11454264022996978, |
| "grad_norm": 0.3978158235549927, |
| "learning_rate": 0.0001212226577154845, |
| "loss": 0.0087, |
| "step": 1554 |
| }, |
| { |
| "epoch": 0.11469005675536227, |
| "grad_norm": 0.07042258977890015, |
| "learning_rate": 0.00012122249975830848, |
| "loss": 0.0021, |
| "step": 1556 |
| }, |
| { |
| "epoch": 0.11483747328075478, |
| "grad_norm": 0.16607695817947388, |
| "learning_rate": 0.00012122234159810957, |
| "loss": 0.0024, |
| "step": 1558 |
| }, |
| { |
| "epoch": 0.11498488980614727, |
| "grad_norm": 0.11605281382799149, |
| "learning_rate": 0.00012122218323488832, |
| "loss": 0.0026, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.11513230633153977, |
| "grad_norm": 24.77876091003418, |
| "learning_rate": 0.00012122202466864525, |
| "loss": 1.4127, |
| "step": 1562 |
| }, |
| { |
| "epoch": 0.11527972285693226, |
| "grad_norm": 0.17567309737205505, |
| "learning_rate": 0.00012122186589938088, |
| "loss": 0.0037, |
| "step": 1564 |
| }, |
| { |
| "epoch": 0.11542713938232475, |
| "grad_norm": 0.19481156766414642, |
| "learning_rate": 0.00012122170692709576, |
| "loss": 0.6267, |
| "step": 1566 |
| }, |
| { |
| "epoch": 0.11557455590771726, |
| "grad_norm": 24.115211486816406, |
| "learning_rate": 0.00012122154775179043, |
| "loss": 0.8964, |
| "step": 1568 |
| }, |
| { |
| "epoch": 0.11557455590771726, |
| "eval_1_ratio_diff": 0.03975058456742009, |
| "eval_accuracy": 0.8713951675759938, |
| "eval_f1": 0.8762190547636909, |
| "eval_loss": 0.5382638573646545, |
| "eval_precision": 0.8439306358381503, |
| "eval_recall": 0.9110764430577223, |
| "eval_runtime": 1441.1253, |
| "eval_samples_per_second": 0.89, |
| "eval_steps_per_second": 0.445, |
| "step": 1568 |
| }, |
| { |
| "epoch": 0.11572197243310975, |
| "grad_norm": 0.140619158744812, |
| "learning_rate": 0.0001212213883734654, |
| "loss": 0.0054, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.11586938895850225, |
| "grad_norm": 0.12547695636749268, |
| "learning_rate": 0.00012122122879212122, |
| "loss": 0.3549, |
| "step": 1572 |
| }, |
| { |
| "epoch": 0.11601680548389474, |
| "grad_norm": 0.12592053413391113, |
| "learning_rate": 0.00012122106900775843, |
| "loss": 0.0105, |
| "step": 1574 |
| }, |
| { |
| "epoch": 0.11616422200928724, |
| "grad_norm": 0.11613775789737701, |
| "learning_rate": 0.00012122090902037755, |
| "loss": 0.0044, |
| "step": 1576 |
| }, |
| { |
| "epoch": 0.11631163853467974, |
| "grad_norm": 0.06327944993972778, |
| "learning_rate": 0.00012122074882997911, |
| "loss": 0.0052, |
| "step": 1578 |
| }, |
| { |
| "epoch": 0.11645905506007223, |
| "grad_norm": 0.26552170515060425, |
| "learning_rate": 0.00012122058843656367, |
| "loss": 0.0049, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.11660647158546474, |
| "grad_norm": 0.05181106925010681, |
| "learning_rate": 0.00012122042784013175, |
| "loss": 0.8965, |
| "step": 1582 |
| }, |
| { |
| "epoch": 0.11675388811085723, |
| "grad_norm": 0.07022108882665634, |
| "learning_rate": 0.0001212202670406839, |
| "loss": 1.4149, |
| "step": 1584 |
| }, |
| { |
| "epoch": 0.11675388811085723, |
| "eval_1_ratio_diff": 0.014029618082618822, |
| "eval_accuracy": 0.8565861262665627, |
| "eval_f1": 0.8584615384615385, |
| "eval_loss": 0.6103407144546509, |
| "eval_precision": 0.8467374810318664, |
| "eval_recall": 0.8705148205928237, |
| "eval_runtime": 1439.8898, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 1584 |
| }, |
| { |
| "epoch": 0.11690130463624972, |
| "grad_norm": 0.20126762986183167, |
| "learning_rate": 0.00012122010603822065, |
| "loss": 0.0077, |
| "step": 1586 |
| }, |
| { |
| "epoch": 0.11704872116164222, |
| "grad_norm": 0.09971367567777634, |
| "learning_rate": 0.00012121994483274255, |
| "loss": 0.0049, |
| "step": 1588 |
| }, |
| { |
| "epoch": 0.11719613768703471, |
| "grad_norm": 0.06467089802026749, |
| "learning_rate": 0.00012121978342425012, |
| "loss": 0.005, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.11734355421242722, |
| "grad_norm": 0.06981782615184784, |
| "learning_rate": 0.00012121962181274392, |
| "loss": 0.0028, |
| "step": 1592 |
| }, |
| { |
| "epoch": 0.11749097073781971, |
| "grad_norm": 0.12012193351984024, |
| "learning_rate": 0.00012121945999822448, |
| "loss": 0.0022, |
| "step": 1594 |
| }, |
| { |
| "epoch": 0.11763838726321221, |
| "grad_norm": 24.71665382385254, |
| "learning_rate": 0.00012121929798069236, |
| "loss": 1.756, |
| "step": 1596 |
| }, |
| { |
| "epoch": 0.1177858037886047, |
| "grad_norm": 0.31951653957366943, |
| "learning_rate": 0.0001212191357601481, |
| "loss": 0.004, |
| "step": 1598 |
| }, |
| { |
| "epoch": 0.1179332203139972, |
| "grad_norm": 0.03907225281000137, |
| "learning_rate": 0.0001212189733365922, |
| "loss": 0.0018, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.1179332203139972, |
| "eval_1_ratio_diff": 0.003117692907248615, |
| "eval_accuracy": 0.8628215120810601, |
| "eval_f1": 0.8631415241057543, |
| "eval_loss": 0.6979319453239441, |
| "eval_precision": 0.8604651162790697, |
| "eval_recall": 0.8658346333853354, |
| "eval_runtime": 1440.2188, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.1180806368393897, |
| "grad_norm": 0.14489419758319855, |
| "learning_rate": 0.00012121881071002525, |
| "loss": 0.004, |
| "step": 1602 |
| }, |
| { |
| "epoch": 0.11822805336478219, |
| "grad_norm": 0.02964833378791809, |
| "learning_rate": 0.00012121864788044781, |
| "loss": 0.0014, |
| "step": 1604 |
| }, |
| { |
| "epoch": 0.1183754698901747, |
| "grad_norm": 0.1308467835187912, |
| "learning_rate": 0.00012121848484786039, |
| "loss": 1.2428, |
| "step": 1606 |
| }, |
| { |
| "epoch": 0.11852288641556719, |
| "grad_norm": 0.012196216732263565, |
| "learning_rate": 0.00012121832161226353, |
| "loss": 0.0039, |
| "step": 1608 |
| }, |
| { |
| "epoch": 0.11867030294095968, |
| "grad_norm": 26.82729721069336, |
| "learning_rate": 0.0001212181581736578, |
| "loss": 0.9557, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.11881771946635218, |
| "grad_norm": 55.06840515136719, |
| "learning_rate": 0.00012121799453204374, |
| "loss": 1.341, |
| "step": 1612 |
| }, |
| { |
| "epoch": 0.11896513599174467, |
| "grad_norm": 0.10571928322315216, |
| "learning_rate": 0.0001212178306874219, |
| "loss": 0.0018, |
| "step": 1614 |
| }, |
| { |
| "epoch": 0.11911255251713718, |
| "grad_norm": 23.6888427734375, |
| "learning_rate": 0.00012121766663979284, |
| "loss": 2.8349, |
| "step": 1616 |
| }, |
| { |
| "epoch": 0.11911255251713718, |
| "eval_1_ratio_diff": 0.001558846453624252, |
| "eval_accuracy": 0.8612626656274357, |
| "eval_f1": 0.8613707165109035, |
| "eval_loss": 0.6912267804145813, |
| "eval_precision": 0.8600311041990669, |
| "eval_recall": 0.8627145085803433, |
| "eval_runtime": 1439.8805, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 1616 |
| }, |
| { |
| "epoch": 0.11925996904252967, |
| "grad_norm": 0.013893804512917995, |
| "learning_rate": 0.00012121750238915708, |
| "loss": 0.0039, |
| "step": 1618 |
| }, |
| { |
| "epoch": 0.11940738556792216, |
| "grad_norm": 0.0326993353664875, |
| "learning_rate": 0.00012121733793551521, |
| "loss": 0.0071, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.11955480209331466, |
| "grad_norm": 0.021896235644817352, |
| "learning_rate": 0.00012121717327886775, |
| "loss": 0.4694, |
| "step": 1622 |
| }, |
| { |
| "epoch": 0.11970221861870715, |
| "grad_norm": 2.5759835243225098, |
| "learning_rate": 0.00012121700841921524, |
| "loss": 0.8411, |
| "step": 1624 |
| }, |
| { |
| "epoch": 0.11984963514409966, |
| "grad_norm": 6.512516021728516, |
| "learning_rate": 0.00012121684335655828, |
| "loss": 1.2897, |
| "step": 1626 |
| }, |
| { |
| "epoch": 0.11999705166949215, |
| "grad_norm": 1.0826752185821533, |
| "learning_rate": 0.00012121667809089738, |
| "loss": 0.067, |
| "step": 1628 |
| }, |
| { |
| "epoch": 0.12014446819488464, |
| "grad_norm": 0.5020477771759033, |
| "learning_rate": 0.00012121651262223313, |
| "loss": 0.0061, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.12029188472027715, |
| "grad_norm": 1.0385483503341675, |
| "learning_rate": 0.00012121634695056605, |
| "loss": 0.0162, |
| "step": 1632 |
| }, |
| { |
| "epoch": 0.12029188472027715, |
| "eval_1_ratio_diff": 0.024162120031176904, |
| "eval_accuracy": 0.8854247856586126, |
| "eval_f1": 0.8880426504188881, |
| "eval_loss": 0.4667970538139343, |
| "eval_precision": 0.8675595238095238, |
| "eval_recall": 0.9095163806552262, |
| "eval_runtime": 1440.04, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 1632 |
| }, |
| { |
| "epoch": 0.12043930124566964, |
| "grad_norm": 0.3257231116294861, |
| "learning_rate": 0.00012121618107589671, |
| "loss": 0.0073, |
| "step": 1634 |
| }, |
| { |
| "epoch": 0.12058671777106214, |
| "grad_norm": 0.17591340839862823, |
| "learning_rate": 0.00012121601499822568, |
| "loss": 0.7197, |
| "step": 1636 |
| }, |
| { |
| "epoch": 0.12073413429645463, |
| "grad_norm": 20.77132797241211, |
| "learning_rate": 0.0001212158487175535, |
| "loss": 1.5072, |
| "step": 1638 |
| }, |
| { |
| "epoch": 0.12088155082184712, |
| "grad_norm": 0.013665467500686646, |
| "learning_rate": 0.00012121568223388071, |
| "loss": 0.0014, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.12102896734723963, |
| "grad_norm": 0.368145614862442, |
| "learning_rate": 0.00012121551554720792, |
| "loss": 1.0871, |
| "step": 1642 |
| }, |
| { |
| "epoch": 0.12117638387263212, |
| "grad_norm": 0.2764877378940582, |
| "learning_rate": 0.00012121534865753563, |
| "loss": 0.0044, |
| "step": 1644 |
| }, |
| { |
| "epoch": 0.12132380039802462, |
| "grad_norm": 0.15803444385528564, |
| "learning_rate": 0.00012121518156486446, |
| "loss": 0.0058, |
| "step": 1646 |
| }, |
| { |
| "epoch": 0.12147121692341711, |
| "grad_norm": 21.269418716430664, |
| "learning_rate": 0.0001212150142691949, |
| "loss": 1.5637, |
| "step": 1648 |
| }, |
| { |
| "epoch": 0.12147121692341711, |
| "eval_1_ratio_diff": -0.03117692907248637, |
| "eval_accuracy": 0.8877630553390491, |
| "eval_f1": 0.8840579710144928, |
| "eval_loss": 0.5637651681900024, |
| "eval_precision": 0.913477537437604, |
| "eval_recall": 0.8564742589703588, |
| "eval_runtime": 1440.5567, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 1648 |
| }, |
| { |
| "epoch": 0.12161863344880962, |
| "grad_norm": 21.965253829956055, |
| "learning_rate": 0.00012121484677052757, |
| "loss": 0.9775, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.12176604997420211, |
| "grad_norm": 5.706968307495117, |
| "learning_rate": 0.000121214679068863, |
| "loss": 1.2593, |
| "step": 1652 |
| }, |
| { |
| "epoch": 0.1219134664995946, |
| "grad_norm": 78.91386413574219, |
| "learning_rate": 0.00012121451116420174, |
| "loss": 1.8529, |
| "step": 1654 |
| }, |
| { |
| "epoch": 0.1220608830249871, |
| "grad_norm": 20.03242301940918, |
| "learning_rate": 0.00012121434305654442, |
| "loss": 3.822, |
| "step": 1656 |
| }, |
| { |
| "epoch": 0.1222082995503796, |
| "grad_norm": 18.92554473876953, |
| "learning_rate": 0.00012121417474589151, |
| "loss": 1.7478, |
| "step": 1658 |
| }, |
| { |
| "epoch": 0.1223557160757721, |
| "grad_norm": 18.513463973999023, |
| "learning_rate": 0.00012121400623224365, |
| "loss": 0.9207, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.12250313260116459, |
| "grad_norm": 2.1414077281951904, |
| "learning_rate": 0.00012121383751560137, |
| "loss": 0.0559, |
| "step": 1662 |
| }, |
| { |
| "epoch": 0.12265054912655708, |
| "grad_norm": 1.9082714319229126, |
| "learning_rate": 0.00012121366859596523, |
| "loss": 0.0867, |
| "step": 1664 |
| }, |
| { |
| "epoch": 0.12265054912655708, |
| "eval_1_ratio_diff": -0.11223694466095091, |
| "eval_accuracy": 0.8316445830085737, |
| "eval_f1": 0.8101933216168717, |
| "eval_loss": 0.46373099088668823, |
| "eval_precision": 0.9275653923541247, |
| "eval_recall": 0.719188767550702, |
| "eval_runtime": 1439.8045, |
| "eval_samples_per_second": 0.891, |
| "eval_steps_per_second": 0.446, |
| "step": 1664 |
| }, |
| { |
| "epoch": 0.12279796565194959, |
| "grad_norm": 17.66658592224121, |
| "learning_rate": 0.0001212134994733358, |
| "loss": 0.694, |
| "step": 1666 |
| }, |
| { |
| "epoch": 0.12294538217734208, |
| "grad_norm": 0.5736209750175476, |
| "learning_rate": 0.00012121333014771369, |
| "loss": 0.5414, |
| "step": 1668 |
| }, |
| { |
| "epoch": 0.12309279870273458, |
| "grad_norm": 16.726125717163086, |
| "learning_rate": 0.0001212131606190994, |
| "loss": 2.7414, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.12324021522812707, |
| "grad_norm": 1.1649620532989502, |
| "learning_rate": 0.00012121299088749353, |
| "loss": 0.0285, |
| "step": 1672 |
| }, |
| { |
| "epoch": 0.12338763175351956, |
| "grad_norm": 18.4560604095459, |
| "learning_rate": 0.00012121282095289665, |
| "loss": 0.9068, |
| "step": 1674 |
| }, |
| { |
| "epoch": 0.12353504827891207, |
| "grad_norm": 0.3899083137512207, |
| "learning_rate": 0.00012121265081530934, |
| "loss": 0.0192, |
| "step": 1676 |
| }, |
| { |
| "epoch": 0.12368246480430456, |
| "grad_norm": 0.6309532523155212, |
| "learning_rate": 0.00012121248047473215, |
| "loss": 0.0398, |
| "step": 1678 |
| }, |
| { |
| "epoch": 0.12382988132969706, |
| "grad_norm": 25.81404685974121, |
| "learning_rate": 0.00012121230993116564, |
| "loss": 0.9268, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.12382988132969706, |
| "eval_1_ratio_diff": -0.05222135619641466, |
| "eval_accuracy": 0.8620420888542478, |
| "eval_f1": 0.854320987654321, |
| "eval_loss": 0.49660980701446533, |
| "eval_precision": 0.9041811846689896, |
| "eval_recall": 0.8096723868954758, |
| "eval_runtime": 1439.1088, |
| "eval_samples_per_second": 0.892, |
| "eval_steps_per_second": 0.446, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.12397729785508955, |
| "grad_norm": 0.17194198071956635, |
| "learning_rate": 0.00012121213918461043, |
| "loss": 0.0091, |
| "step": 1682 |
| }, |
| { |
| "epoch": 0.12412471438048205, |
| "grad_norm": 0.1233774870634079, |
| "learning_rate": 0.00012121196823506704, |
| "loss": 0.033, |
| "step": 1684 |
| }, |
| { |
| "epoch": 0.12427213090587455, |
| "grad_norm": 0.12911829352378845, |
| "learning_rate": 0.00012121179708253609, |
| "loss": 0.9894, |
| "step": 1686 |
| }, |
| { |
| "epoch": 0.12441954743126704, |
| "grad_norm": 12.796908378601074, |
| "learning_rate": 0.00012121162572701811, |
| "loss": 0.2167, |
| "step": 1688 |
| }, |
| { |
| "epoch": 0.12456696395665955, |
| "grad_norm": 19.411853790283203, |
| "learning_rate": 0.0001212114541685137, |
| "loss": 1.1343, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.12471438048205204, |
| "grad_norm": 2.125748872756958, |
| "learning_rate": 0.00012121128240702341, |
| "loss": 0.0167, |
| "step": 1692 |
| }, |
| { |
| "epoch": 0.12486179700744453, |
| "grad_norm": 0.23534013330936432, |
| "learning_rate": 0.00012121111044254785, |
| "loss": 0.0099, |
| "step": 1694 |
| }, |
| { |
| "epoch": 0.12500921353283703, |
| "grad_norm": 0.2723231911659241, |
| "learning_rate": 0.00012121093827508758, |
| "loss": 0.0222, |
| "step": 1696 |
| }, |
| { |
| "epoch": 0.12500921353283703, |
| "eval_1_ratio_diff": -0.05144193296960253, |
| "eval_accuracy": 0.8721745908028059, |
| "eval_f1": 0.8651315789473685, |
| "eval_loss": 0.608511209487915, |
| "eval_precision": 0.9147826086956522, |
| "eval_recall": 0.8205928237129485, |
| "eval_runtime": 1438.9329, |
| "eval_samples_per_second": 0.892, |
| "eval_steps_per_second": 0.446, |
| "step": 1696 |
| }, |
| { |
| "epoch": 0.12515663005822952, |
| "grad_norm": 0.1575896292924881, |
| "learning_rate": 0.00012121076590464316, |
| "loss": 0.0045, |
| "step": 1698 |
| }, |
| { |
| "epoch": 0.12530404658362201, |
| "grad_norm": 25.341609954833984, |
| "learning_rate": 0.00012121059333121521, |
| "loss": 2.9943, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.12545146310901453, |
| "grad_norm": 0.13375264406204224, |
| "learning_rate": 0.00012121042055480427, |
| "loss": 0.0033, |
| "step": 1702 |
| }, |
| { |
| "epoch": 0.12559887963440702, |
| "grad_norm": 0.06750854849815369, |
| "learning_rate": 0.00012121024757541094, |
| "loss": 0.0024, |
| "step": 1704 |
| }, |
| { |
| "epoch": 0.12574629615979951, |
| "grad_norm": 0.05674993619322777, |
| "learning_rate": 0.00012121007439303577, |
| "loss": 1.2325, |
| "step": 1706 |
| }, |
| { |
| "epoch": 0.125893712685192, |
| "grad_norm": 0.06746107339859009, |
| "learning_rate": 0.00012120990100767938, |
| "loss": 0.0016, |
| "step": 1708 |
| }, |
| { |
| "epoch": 0.1260411292105845, |
| "grad_norm": 18.890642166137695, |
| "learning_rate": 0.00012120972741934233, |
| "loss": 1.5509, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.12618854573597701, |
| "grad_norm": 0.0601690337061882, |
| "learning_rate": 0.00012120955362802522, |
| "loss": 0.0042, |
| "step": 1712 |
| }, |
| { |
| "epoch": 0.12618854573597701, |
| "eval_1_ratio_diff": -0.21278254091971943, |
| "eval_accuracy": 0.7575993764614185, |
| "eval_f1": 0.6917740336967294, |
| "eval_loss": 1.260048747062683, |
| "eval_precision": 0.9483695652173914, |
| "eval_recall": 0.5444617784711389, |
| "eval_runtime": 1438.8451, |
| "eval_samples_per_second": 0.892, |
| "eval_steps_per_second": 0.446, |
| "step": 1712 |
| }, |
| { |
| "epoch": 0.1263359622613695, |
| "grad_norm": 31.62714385986328, |
| "learning_rate": 0.00012120937963372859, |
| "loss": 2.3397, |
| "step": 1714 |
| }, |
| { |
| "epoch": 0.126483378786762, |
| "grad_norm": 0.09423007071018219, |
| "learning_rate": 0.00012120920543645306, |
| "loss": 0.0056, |
| "step": 1716 |
| }, |
| { |
| "epoch": 0.1266307953121545, |
| "grad_norm": 18.73729133605957, |
| "learning_rate": 0.0001212090310361992, |
| "loss": 1.3417, |
| "step": 1718 |
| }, |
| { |
| "epoch": 0.12677821183754698, |
| "grad_norm": 0.16277751326560974, |
| "learning_rate": 0.0001212088564329676, |
| "loss": 0.0088, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.1269256283629395, |
| "grad_norm": 18.30181884765625, |
| "learning_rate": 0.00012120868162675886, |
| "loss": 0.966, |
| "step": 1722 |
| }, |
| { |
| "epoch": 0.127073044888332, |
| "grad_norm": 0.3613678812980652, |
| "learning_rate": 0.00012120850661757353, |
| "loss": 1.0053, |
| "step": 1724 |
| }, |
| { |
| "epoch": 0.12722046141372448, |
| "grad_norm": 0.7345402836799622, |
| "learning_rate": 0.00012120833140541222, |
| "loss": 1.4195, |
| "step": 1726 |
| }, |
| { |
| "epoch": 0.12736787793911697, |
| "grad_norm": 1.3485078811645508, |
| "learning_rate": 0.00012120815599027552, |
| "loss": 0.0247, |
| "step": 1728 |
| }, |
| { |
| "epoch": 0.12736787793911697, |
| "eval_1_ratio_diff": -0.04130943102104445, |
| "eval_accuracy": 0.8651597817614964, |
| "eval_f1": 0.8592351505288853, |
| "eval_loss": 0.4965825080871582, |
| "eval_precision": 0.8979591836734694, |
| "eval_recall": 0.8237129485179407, |
| "eval_runtime": 1438.3328, |
| "eval_samples_per_second": 0.892, |
| "eval_steps_per_second": 0.446, |
| "step": 1728 |
| }, |
| { |
| "epoch": 0.12751529446450946, |
| "grad_norm": 17.94972801208496, |
| "learning_rate": 0.000121207980372164, |
| "loss": 0.7518, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.12766271098990198, |
| "grad_norm": 1.6150920391082764, |
| "learning_rate": 0.00012120780455107827, |
| "loss": 0.0328, |
| "step": 1732 |
| }, |
| { |
| "epoch": 0.12781012751529447, |
| "grad_norm": 0.22876843810081482, |
| "learning_rate": 0.00012120762852701892, |
| "loss": 0.0105, |
| "step": 1734 |
| }, |
| { |
| "epoch": 0.12795754404068696, |
| "grad_norm": 0.1126691922545433, |
| "learning_rate": 0.0001212074522999865, |
| "loss": 0.0038, |
| "step": 1736 |
| }, |
| { |
| "epoch": 0.12810496056607945, |
| "grad_norm": 0.5277115702629089, |
| "learning_rate": 0.00012120727586998164, |
| "loss": 0.0094, |
| "step": 1738 |
| }, |
| { |
| "epoch": 0.12825237709147194, |
| "grad_norm": 0.11928611248731613, |
| "learning_rate": 0.00012120709923700492, |
| "loss": 0.0054, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.12839979361686446, |
| "grad_norm": 22.84393310546875, |
| "learning_rate": 0.00012120692240105693, |
| "loss": 1.7358, |
| "step": 1742 |
| }, |
| { |
| "epoch": 0.12854721014225695, |
| "grad_norm": 0.08426441997289658, |
| "learning_rate": 0.0001212067453621383, |
| "loss": 0.0029, |
| "step": 1744 |
| }, |
| { |
| "epoch": 0.12854721014225695, |
| "eval_1_ratio_diff": -0.014029618082618878, |
| "eval_accuracy": 0.8784099766173032, |
| "eval_f1": 0.8765822784810127, |
| "eval_loss": 0.6492618322372437, |
| "eval_precision": 0.8892455858747994, |
| "eval_recall": 0.8642745709828393, |
| "eval_runtime": 1438.7827, |
| "eval_samples_per_second": 0.892, |
| "eval_steps_per_second": 0.446, |
| "step": 1744 |
| }, |
| { |
| "epoch": 0.12869462666764944, |
| "grad_norm": 22.079143524169922, |
| "learning_rate": 0.00012120656812024955, |
| "loss": 1.2809, |
| "step": 1746 |
| }, |
| { |
| "epoch": 0.12884204319304193, |
| "grad_norm": 21.899768829345703, |
| "learning_rate": 0.00012120639067539131, |
| "loss": 3.0657, |
| "step": 1748 |
| }, |
| { |
| "epoch": 0.12898945971843442, |
| "grad_norm": 0.1824941784143448, |
| "learning_rate": 0.0001212062130275642, |
| "loss": 0.0032, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.12913687624382694, |
| "grad_norm": 0.1769951432943344, |
| "learning_rate": 0.00012120603517676877, |
| "loss": 1.2614, |
| "step": 1752 |
| }, |
| { |
| "epoch": 0.12928429276921943, |
| "grad_norm": 21.305864334106445, |
| "learning_rate": 0.00012120585712300566, |
| "loss": 1.0725, |
| "step": 1754 |
| }, |
| { |
| "epoch": 0.12943170929461192, |
| "grad_norm": 0.44233354926109314, |
| "learning_rate": 0.00012120567886627544, |
| "loss": 0.9641, |
| "step": 1756 |
| }, |
| { |
| "epoch": 0.12957912582000441, |
| "grad_norm": 0.2779258191585541, |
| "learning_rate": 0.00012120550040657871, |
| "loss": 0.0096, |
| "step": 1758 |
| }, |
| { |
| "epoch": 0.1297265423453969, |
| "grad_norm": 22.293994903564453, |
| "learning_rate": 0.00012120532174391606, |
| "loss": 0.9558, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.1297265423453969, |
| "eval_1_ratio_diff": 0.031956352299298496, |
| "eval_accuracy": 0.8901013250194856, |
| "eval_f1": 0.8934240362811792, |
| "eval_loss": 0.45321086049079895, |
| "eval_precision": 0.8665689149560117, |
| "eval_recall": 0.921996879875195, |
| "eval_runtime": 1438.3028, |
| "eval_samples_per_second": 0.892, |
| "eval_steps_per_second": 0.446, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.12987395887078942, |
| "grad_norm": 0.15532580018043518, |
| "learning_rate": 0.00012120514287828811, |
| "loss": 0.0082, |
| "step": 1762 |
| }, |
| { |
| "epoch": 0.13002137539618192, |
| "grad_norm": 27.137800216674805, |
| "learning_rate": 0.00012120496380969545, |
| "loss": 0.8253, |
| "step": 1764 |
| }, |
| { |
| "epoch": 0.1301687919215744, |
| "grad_norm": 0.12127237766981125, |
| "learning_rate": 0.00012120478453813868, |
| "loss": 0.007, |
| "step": 1766 |
| }, |
| { |
| "epoch": 0.1303162084469669, |
| "grad_norm": 0.12471210211515427, |
| "learning_rate": 0.00012120460506361839, |
| "loss": 0.0118, |
| "step": 1768 |
| }, |
| { |
| "epoch": 0.1304636249723594, |
| "grad_norm": 45.0229377746582, |
| "learning_rate": 0.0001212044253861352, |
| "loss": 3.5846, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.1306110414977519, |
| "grad_norm": 0.4128153622150421, |
| "learning_rate": 0.0001212042455056897, |
| "loss": 0.0073, |
| "step": 1772 |
| }, |
| { |
| "epoch": 0.1307584580231444, |
| "grad_norm": 0.40481987595558167, |
| "learning_rate": 0.0001212040654222825, |
| "loss": 0.0072, |
| "step": 1774 |
| }, |
| { |
| "epoch": 0.1309058745485369, |
| "grad_norm": 0.11055589467287064, |
| "learning_rate": 0.00012120388513591419, |
| "loss": 1.0826, |
| "step": 1776 |
| }, |
| { |
| "epoch": 0.1309058745485369, |
| "eval_1_ratio_diff": 0.1200311769290725, |
| "eval_accuracy": 0.8332034294621979, |
| "eval_f1": 0.850974930362117, |
| "eval_loss": 0.6285108923912048, |
| "eval_precision": 0.7685534591194969, |
| "eval_recall": 0.953198127925117, |
| "eval_runtime": 1438.3285, |
| "eval_samples_per_second": 0.892, |
| "eval_steps_per_second": 0.446, |
| "step": 1776 |
| }, |
| { |
| "epoch": 0.13105329107392938, |
| "grad_norm": 0.12451104074716568, |
| "learning_rate": 0.0001212037046465854, |
| "loss": 1.0074, |
| "step": 1778 |
| }, |
| { |
| "epoch": 0.1312007075993219, |
| "grad_norm": 0.27884507179260254, |
| "learning_rate": 0.0001212035239542967, |
| "loss": 0.0129, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.1313481241247144, |
| "grad_norm": 0.992557168006897, |
| "learning_rate": 0.00012120334305904872, |
| "loss": 1.4174, |
| "step": 1782 |
| }, |
| { |
| "epoch": 0.13149554065010688, |
| "grad_norm": 0.9067917466163635, |
| "learning_rate": 0.00012120316196084206, |
| "loss": 1.435, |
| "step": 1784 |
| }, |
| { |
| "epoch": 0.13164295717549937, |
| "grad_norm": 20.08501625061035, |
| "learning_rate": 0.00012120298065967733, |
| "loss": 1.7277, |
| "step": 1786 |
| }, |
| { |
| "epoch": 0.13179037370089186, |
| "grad_norm": 0.20194768905639648, |
| "learning_rate": 0.00012120279915555515, |
| "loss": 0.005, |
| "step": 1788 |
| }, |
| { |
| "epoch": 0.13193779022628438, |
| "grad_norm": 0.29110512137413025, |
| "learning_rate": 0.0001212026174484761, |
| "loss": 0.0065, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.13208520675167687, |
| "grad_norm": 0.3067338764667511, |
| "learning_rate": 0.00012120243553844079, |
| "loss": 0.006, |
| "step": 1792 |
| }, |
| { |
| "epoch": 0.13208520675167687, |
| "eval_1_ratio_diff": -0.002338269680436489, |
| "eval_accuracy": 0.8978955572876072, |
| "eval_f1": 0.8975762314308053, |
| "eval_loss": 0.42508459091186523, |
| "eval_precision": 0.8996865203761756, |
| "eval_recall": 0.8954758190327613, |
| "eval_runtime": 1439.0957, |
| "eval_samples_per_second": 0.892, |
| "eval_steps_per_second": 0.446, |
| "step": 1792 |
| } |
| ], |
| "logging_steps": 2, |
| "max_steps": 108536, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 8, |
| "save_steps": 64, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 1000, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.5936070605815808e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|