| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 400, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "Batch Mean": -3.22344970703125, | |
| "accuracy": 0.5390625, | |
| "epoch": 0, | |
| "step": 0 | |
| }, | |
| { | |
| "epoch": 0.0025, | |
| "grad_norm": 32.78241729736328, | |
| "learning_rate": 2.5000000000000004e-07, | |
| "loss": 0.8624, | |
| "step": 1 | |
| }, | |
| { | |
| "Batch Mean": -3.20245361328125, | |
| "accuracy": 0.5234375, | |
| "epoch": 0.0025, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.005, | |
| "grad_norm": 33.311607360839844, | |
| "learning_rate": 5.000000000000001e-07, | |
| "loss": 0.8721, | |
| "step": 2 | |
| }, | |
| { | |
| "Batch Mean": -3.2308349609375, | |
| "accuracy": 0.4453125, | |
| "epoch": 0.005, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0075, | |
| "grad_norm": 33.00279998779297, | |
| "learning_rate": 7.5e-07, | |
| "loss": 0.8678, | |
| "step": 3 | |
| }, | |
| { | |
| "Batch Mean": -3.2086181640625, | |
| "accuracy": 0.46875, | |
| "epoch": 0.0075, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 33.870765686035156, | |
| "learning_rate": 1.0000000000000002e-06, | |
| "loss": 0.8713, | |
| "step": 4 | |
| }, | |
| { | |
| "Batch Mean": -3.101593017578125, | |
| "accuracy": 0.5234375, | |
| "epoch": 0.01, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0125, | |
| "grad_norm": 31.95158576965332, | |
| "learning_rate": 1.25e-06, | |
| "loss": 0.8448, | |
| "step": 5 | |
| }, | |
| { | |
| "Batch Mean": -2.778167724609375, | |
| "accuracy": 0.4921875, | |
| "epoch": 0.0125, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.015, | |
| "grad_norm": 29.81480598449707, | |
| "learning_rate": 1.5e-06, | |
| "loss": 0.83, | |
| "step": 6 | |
| }, | |
| { | |
| "Batch Mean": -2.60491943359375, | |
| "accuracy": 0.5859375, | |
| "epoch": 0.015, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.0175, | |
| "grad_norm": 27.83857536315918, | |
| "learning_rate": 1.75e-06, | |
| "loss": 0.7811, | |
| "step": 7 | |
| }, | |
| { | |
| "Batch Mean": -1.55364990234375, | |
| "accuracy": 0.453125, | |
| "epoch": 0.0175, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 16.97942352294922, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 0.7517, | |
| "step": 8 | |
| }, | |
| { | |
| "Batch Mean": -1.18939208984375, | |
| "accuracy": 0.5, | |
| "epoch": 0.02, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.0225, | |
| "grad_norm": 14.753767967224121, | |
| "learning_rate": 2.25e-06, | |
| "loss": 0.7148, | |
| "step": 9 | |
| }, | |
| { | |
| "Batch Mean": 2.347869873046875, | |
| "accuracy": 0.5703125, | |
| "epoch": 0.0225, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.025, | |
| "grad_norm": 26.016891479492188, | |
| "learning_rate": 2.5e-06, | |
| "loss": 0.7679, | |
| "step": 10 | |
| }, | |
| { | |
| "Batch Mean": 2.7044677734375, | |
| "accuracy": 0.5078125, | |
| "epoch": 0.025, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0275, | |
| "grad_norm": 28.977327346801758, | |
| "learning_rate": 2.7500000000000004e-06, | |
| "loss": 0.8112, | |
| "step": 11 | |
| }, | |
| { | |
| "Batch Mean": 3.05706787109375, | |
| "accuracy": 0.578125, | |
| "epoch": 0.0275, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 31.655277252197266, | |
| "learning_rate": 3e-06, | |
| "loss": 0.8257, | |
| "step": 12 | |
| }, | |
| { | |
| "Batch Mean": 2.95458984375, | |
| "accuracy": 0.625, | |
| "epoch": 0.03, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.0325, | |
| "grad_norm": 30.472238540649414, | |
| "learning_rate": 3.2500000000000002e-06, | |
| "loss": 0.8023, | |
| "step": 13 | |
| }, | |
| { | |
| "Batch Mean": 2.543212890625, | |
| "accuracy": 0.6484375, | |
| "epoch": 0.0325, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.035, | |
| "grad_norm": 24.27365493774414, | |
| "learning_rate": 3.5e-06, | |
| "loss": 0.7431, | |
| "step": 14 | |
| }, | |
| { | |
| "Batch Mean": 2.131500244140625, | |
| "accuracy": 0.7265625, | |
| "epoch": 0.035, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.0375, | |
| "grad_norm": 20.063459396362305, | |
| "learning_rate": 3.7500000000000005e-06, | |
| "loss": 0.6869, | |
| "step": 15 | |
| }, | |
| { | |
| "Batch Mean": 1.4629058837890625, | |
| "accuracy": 0.65625, | |
| "epoch": 0.0375, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 13.922951698303223, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 0.6427, | |
| "step": 16 | |
| }, | |
| { | |
| "Batch Mean": 0.45629656314849854, | |
| "accuracy": 0.6953125, | |
| "epoch": 0.04, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.0425, | |
| "grad_norm": 8.479089736938477, | |
| "learning_rate": 4.25e-06, | |
| "loss": 0.6811, | |
| "step": 17 | |
| }, | |
| { | |
| "Batch Mean": -0.6575782299041748, | |
| "accuracy": 0.6875, | |
| "epoch": 0.0425, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.045, | |
| "grad_norm": 8.51148509979248, | |
| "learning_rate": 4.5e-06, | |
| "loss": 0.6173, | |
| "step": 18 | |
| }, | |
| { | |
| "Batch Mean": -1.4419012069702148, | |
| "accuracy": 0.6796875, | |
| "epoch": 0.045, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.0475, | |
| "grad_norm": 13.423223495483398, | |
| "learning_rate": 4.75e-06, | |
| "loss": 0.6464, | |
| "step": 19 | |
| }, | |
| { | |
| "Batch Mean": -1.6556549072265625, | |
| "accuracy": 0.7109375, | |
| "epoch": 0.0475, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 14.950621604919434, | |
| "learning_rate": 5e-06, | |
| "loss": 0.6417, | |
| "step": 20 | |
| }, | |
| { | |
| "Batch Mean": -1.1625317335128784, | |
| "accuracy": 0.5859375, | |
| "epoch": 0.05, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0525, | |
| "grad_norm": 12.365989685058594, | |
| "learning_rate": 4.986842105263158e-06, | |
| "loss": 0.6729, | |
| "step": 21 | |
| }, | |
| { | |
| "Batch Mean": -0.22168993949890137, | |
| "accuracy": 0.7265625, | |
| "epoch": 0.0525, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.055, | |
| "grad_norm": 7.174764633178711, | |
| "learning_rate": 4.973684210526316e-06, | |
| "loss": 0.5512, | |
| "step": 22 | |
| }, | |
| { | |
| "Batch Mean": 0.6636912226676941, | |
| "accuracy": 0.65625, | |
| "epoch": 0.055, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.0575, | |
| "grad_norm": 10.331341743469238, | |
| "learning_rate": 4.960526315789474e-06, | |
| "loss": 0.629, | |
| "step": 23 | |
| }, | |
| { | |
| "Batch Mean": 1.0205326080322266, | |
| "accuracy": 0.5859375, | |
| "epoch": 0.0575, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 11.934816360473633, | |
| "learning_rate": 4.947368421052632e-06, | |
| "loss": 0.6926, | |
| "step": 24 | |
| }, | |
| { | |
| "Batch Mean": 0.9423030614852905, | |
| "accuracy": 0.6875, | |
| "epoch": 0.06, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.0625, | |
| "grad_norm": 10.728033065795898, | |
| "learning_rate": 4.9342105263157895e-06, | |
| "loss": 0.5973, | |
| "step": 25 | |
| }, | |
| { | |
| "Batch Mean": 0.4570488929748535, | |
| "accuracy": 0.6875, | |
| "epoch": 0.0625, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.065, | |
| "grad_norm": 7.620607852935791, | |
| "learning_rate": 4.921052631578948e-06, | |
| "loss": 0.562, | |
| "step": 26 | |
| }, | |
| { | |
| "Batch Mean": -0.23614200949668884, | |
| "accuracy": 0.7421875, | |
| "epoch": 0.065, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.0675, | |
| "grad_norm": 6.778779029846191, | |
| "learning_rate": 4.907894736842106e-06, | |
| "loss": 0.5411, | |
| "step": 27 | |
| }, | |
| { | |
| "Batch Mean": -0.8051831722259521, | |
| "accuracy": 0.609375, | |
| "epoch": 0.0675, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 8.988683700561523, | |
| "learning_rate": 4.894736842105264e-06, | |
| "loss": 0.6348, | |
| "step": 28 | |
| }, | |
| { | |
| "Batch Mean": -0.7894483804702759, | |
| "accuracy": 0.7265625, | |
| "epoch": 0.07, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.0725, | |
| "grad_norm": 8.688234329223633, | |
| "learning_rate": 4.881578947368422e-06, | |
| "loss": 0.5782, | |
| "step": 29 | |
| }, | |
| { | |
| "Batch Mean": -0.18249624967575073, | |
| "accuracy": 0.71875, | |
| "epoch": 0.0725, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.075, | |
| "grad_norm": 5.793314456939697, | |
| "learning_rate": 4.8684210526315795e-06, | |
| "loss": 0.5502, | |
| "step": 30 | |
| }, | |
| { | |
| "Batch Mean": 0.33211851119995117, | |
| "accuracy": 0.75, | |
| "epoch": 0.075, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.0775, | |
| "grad_norm": 6.008676528930664, | |
| "learning_rate": 4.855263157894737e-06, | |
| "loss": 0.5131, | |
| "step": 31 | |
| }, | |
| { | |
| "Batch Mean": 0.7284603118896484, | |
| "accuracy": 0.765625, | |
| "epoch": 0.0775, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 8.172709465026855, | |
| "learning_rate": 4.842105263157895e-06, | |
| "loss": 0.5496, | |
| "step": 32 | |
| }, | |
| { | |
| "Batch Mean": 0.7859265804290771, | |
| "accuracy": 0.703125, | |
| "epoch": 0.08, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.0825, | |
| "grad_norm": 9.267521858215332, | |
| "learning_rate": 4.828947368421053e-06, | |
| "loss": 0.5903, | |
| "step": 33 | |
| }, | |
| { | |
| "Batch Mean": 0.13241028785705566, | |
| "accuracy": 0.7109375, | |
| "epoch": 0.0825, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.085, | |
| "grad_norm": 7.105382442474365, | |
| "learning_rate": 4.815789473684211e-06, | |
| "loss": 0.5725, | |
| "step": 34 | |
| }, | |
| { | |
| "Batch Mean": -0.3201490640640259, | |
| "accuracy": 0.7890625, | |
| "epoch": 0.085, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.0875, | |
| "grad_norm": 6.344538688659668, | |
| "learning_rate": 4.802631578947369e-06, | |
| "loss": 0.4902, | |
| "step": 35 | |
| }, | |
| { | |
| "Batch Mean": -0.7660524845123291, | |
| "accuracy": 0.796875, | |
| "epoch": 0.0875, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 8.478452682495117, | |
| "learning_rate": 4.789473684210527e-06, | |
| "loss": 0.5135, | |
| "step": 36 | |
| }, | |
| { | |
| "Batch Mean": -0.5964984893798828, | |
| "accuracy": 0.65625, | |
| "epoch": 0.09, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.0925, | |
| "grad_norm": 8.487342834472656, | |
| "learning_rate": 4.7763157894736844e-06, | |
| "loss": 0.5517, | |
| "step": 37 | |
| }, | |
| { | |
| "Batch Mean": -0.2701318860054016, | |
| "accuracy": 0.75, | |
| "epoch": 0.0925, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.095, | |
| "grad_norm": 7.8622236251831055, | |
| "learning_rate": 4.763157894736842e-06, | |
| "loss": 0.5405, | |
| "step": 38 | |
| }, | |
| { | |
| "Batch Mean": 0.3329949378967285, | |
| "accuracy": 0.6640625, | |
| "epoch": 0.095, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.0975, | |
| "grad_norm": 8.729914665222168, | |
| "learning_rate": 4.75e-06, | |
| "loss": 0.61, | |
| "step": 39 | |
| }, | |
| { | |
| "Batch Mean": 0.8064537048339844, | |
| "accuracy": 0.765625, | |
| "epoch": 0.0975, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 9.266338348388672, | |
| "learning_rate": 4.736842105263158e-06, | |
| "loss": 0.5148, | |
| "step": 40 | |
| }, | |
| { | |
| "Batch Mean": 0.636286735534668, | |
| "accuracy": 0.78125, | |
| "epoch": 0.1, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.1025, | |
| "grad_norm": 7.88658332824707, | |
| "learning_rate": 4.723684210526316e-06, | |
| "loss": 0.504, | |
| "step": 41 | |
| }, | |
| { | |
| "Batch Mean": -0.03002452850341797, | |
| "accuracy": 0.75, | |
| "epoch": 0.1025, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.105, | |
| "grad_norm": 6.346695899963379, | |
| "learning_rate": 4.710526315789474e-06, | |
| "loss": 0.5241, | |
| "step": 42 | |
| }, | |
| { | |
| "Batch Mean": -0.518393874168396, | |
| "accuracy": 0.734375, | |
| "epoch": 0.105, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.1075, | |
| "grad_norm": 8.447480201721191, | |
| "learning_rate": 4.697368421052632e-06, | |
| "loss": 0.5379, | |
| "step": 43 | |
| }, | |
| { | |
| "Batch Mean": -0.4221421480178833, | |
| "accuracy": 0.78125, | |
| "epoch": 0.1075, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "grad_norm": 7.111172676086426, | |
| "learning_rate": 4.68421052631579e-06, | |
| "loss": 0.463, | |
| "step": 44 | |
| }, | |
| { | |
| "Batch Mean": -0.09055519104003906, | |
| "accuracy": 0.7421875, | |
| "epoch": 0.11, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.1125, | |
| "grad_norm": 7.127180099487305, | |
| "learning_rate": 4.671052631578948e-06, | |
| "loss": 0.5026, | |
| "step": 45 | |
| }, | |
| { | |
| "Batch Mean": 0.23522070050239563, | |
| "accuracy": 0.8046875, | |
| "epoch": 0.1125, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.115, | |
| "grad_norm": 7.237793922424316, | |
| "learning_rate": 4.657894736842106e-06, | |
| "loss": 0.4267, | |
| "step": 46 | |
| }, | |
| { | |
| "Batch Mean": 0.3779289126396179, | |
| "accuracy": 0.7109375, | |
| "epoch": 0.115, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.1175, | |
| "grad_norm": 11.18747329711914, | |
| "learning_rate": 4.6447368421052635e-06, | |
| "loss": 0.5298, | |
| "step": 47 | |
| }, | |
| { | |
| "Batch Mean": 0.10932159423828125, | |
| "accuracy": 0.7265625, | |
| "epoch": 0.1175, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 7.571730136871338, | |
| "learning_rate": 4.631578947368421e-06, | |
| "loss": 0.4557, | |
| "step": 48 | |
| }, | |
| { | |
| "Batch Mean": 0.2413313388824463, | |
| "accuracy": 0.765625, | |
| "epoch": 0.12, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.1225, | |
| "grad_norm": 7.670024394989014, | |
| "learning_rate": 4.618421052631579e-06, | |
| "loss": 0.5069, | |
| "step": 49 | |
| }, | |
| { | |
| "Batch Mean": -0.15598678588867188, | |
| "accuracy": 0.6953125, | |
| "epoch": 0.1225, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.125, | |
| "grad_norm": 10.166374206542969, | |
| "learning_rate": 4.605263157894737e-06, | |
| "loss": 0.5951, | |
| "step": 50 | |
| }, | |
| { | |
| "Batch Mean": -0.4917259216308594, | |
| "accuracy": 0.6953125, | |
| "epoch": 0.125, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.1275, | |
| "grad_norm": 9.27407169342041, | |
| "learning_rate": 4.592105263157895e-06, | |
| "loss": 0.5234, | |
| "step": 51 | |
| }, | |
| { | |
| "Batch Mean": -0.1558108925819397, | |
| "accuracy": 0.78125, | |
| "epoch": 0.1275, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 6.958871364593506, | |
| "learning_rate": 4.578947368421053e-06, | |
| "loss": 0.4772, | |
| "step": 52 | |
| }, | |
| { | |
| "Batch Mean": 0.010368764400482178, | |
| "accuracy": 0.71875, | |
| "epoch": 0.13, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.1325, | |
| "grad_norm": 9.204265594482422, | |
| "learning_rate": 4.565789473684211e-06, | |
| "loss": 0.5679, | |
| "step": 53 | |
| }, | |
| { | |
| "Batch Mean": 0.1981235146522522, | |
| "accuracy": 0.734375, | |
| "epoch": 0.1325, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.135, | |
| "grad_norm": 7.802533149719238, | |
| "learning_rate": 4.552631578947369e-06, | |
| "loss": 0.5008, | |
| "step": 54 | |
| }, | |
| { | |
| "Batch Mean": 0.41782093048095703, | |
| "accuracy": 0.7421875, | |
| "epoch": 0.135, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.1375, | |
| "grad_norm": 7.295779228210449, | |
| "learning_rate": 4.539473684210527e-06, | |
| "loss": 0.5169, | |
| "step": 55 | |
| }, | |
| { | |
| "Batch Mean": 0.34867769479751587, | |
| "accuracy": 0.7890625, | |
| "epoch": 0.1375, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 6.531123161315918, | |
| "learning_rate": 4.526315789473685e-06, | |
| "loss": 0.4554, | |
| "step": 56 | |
| }, | |
| { | |
| "Batch Mean": -0.32196617126464844, | |
| "accuracy": 0.703125, | |
| "epoch": 0.14, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.1425, | |
| "grad_norm": 8.351095199584961, | |
| "learning_rate": 4.513157894736843e-06, | |
| "loss": 0.5658, | |
| "step": 57 | |
| }, | |
| { | |
| "Batch Mean": -0.301898717880249, | |
| "accuracy": 0.7578125, | |
| "epoch": 0.1425, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.145, | |
| "grad_norm": 6.481894493103027, | |
| "learning_rate": 4.5e-06, | |
| "loss": 0.5244, | |
| "step": 58 | |
| }, | |
| { | |
| "Batch Mean": -0.5122654438018799, | |
| "accuracy": 0.75, | |
| "epoch": 0.145, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.1475, | |
| "grad_norm": 7.288686752319336, | |
| "learning_rate": 4.4868421052631584e-06, | |
| "loss": 0.49, | |
| "step": 59 | |
| }, | |
| { | |
| "Batch Mean": -0.15232443809509277, | |
| "accuracy": 0.7109375, | |
| "epoch": 0.1475, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 6.799753665924072, | |
| "learning_rate": 4.473684210526316e-06, | |
| "loss": 0.5079, | |
| "step": 60 | |
| }, | |
| { | |
| "Batch Mean": 0.15944302082061768, | |
| "accuracy": 0.8046875, | |
| "epoch": 0.15, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.1525, | |
| "grad_norm": 6.836521625518799, | |
| "learning_rate": 4.460526315789474e-06, | |
| "loss": 0.4475, | |
| "step": 61 | |
| }, | |
| { | |
| "Batch Mean": 0.6559411287307739, | |
| "accuracy": 0.7734375, | |
| "epoch": 0.1525, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.155, | |
| "grad_norm": 9.224549293518066, | |
| "learning_rate": 4.447368421052632e-06, | |
| "loss": 0.4865, | |
| "step": 62 | |
| }, | |
| { | |
| "Batch Mean": 0.4529900550842285, | |
| "accuracy": 0.6953125, | |
| "epoch": 0.155, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.1575, | |
| "grad_norm": 8.381292343139648, | |
| "learning_rate": 4.43421052631579e-06, | |
| "loss": 0.4857, | |
| "step": 63 | |
| }, | |
| { | |
| "Batch Mean": -0.047610118985176086, | |
| "accuracy": 0.7109375, | |
| "epoch": 0.1575, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 8.044920921325684, | |
| "learning_rate": 4.4210526315789476e-06, | |
| "loss": 0.5013, | |
| "step": 64 | |
| }, | |
| { | |
| "Batch Mean": -0.3421214818954468, | |
| "accuracy": 0.7734375, | |
| "epoch": 0.16, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.1625, | |
| "grad_norm": 7.571659088134766, | |
| "learning_rate": 4.407894736842105e-06, | |
| "loss": 0.4956, | |
| "step": 65 | |
| }, | |
| { | |
| "Batch Mean": -0.0925983190536499, | |
| "accuracy": 0.703125, | |
| "epoch": 0.1625, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.165, | |
| "grad_norm": 9.129205703735352, | |
| "learning_rate": 4.394736842105263e-06, | |
| "loss": 0.5488, | |
| "step": 66 | |
| }, | |
| { | |
| "Batch Mean": -0.12005829811096191, | |
| "accuracy": 0.78125, | |
| "epoch": 0.165, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.1675, | |
| "grad_norm": 8.270757675170898, | |
| "learning_rate": 4.381578947368421e-06, | |
| "loss": 0.4158, | |
| "step": 67 | |
| }, | |
| { | |
| "Batch Mean": 0.4386889934539795, | |
| "accuracy": 0.7421875, | |
| "epoch": 0.1675, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "grad_norm": 11.129798889160156, | |
| "learning_rate": 4.368421052631579e-06, | |
| "loss": 0.4829, | |
| "step": 68 | |
| }, | |
| { | |
| "Batch Mean": -0.12494802474975586, | |
| "accuracy": 0.71875, | |
| "epoch": 0.17, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.1725, | |
| "grad_norm": 10.14079761505127, | |
| "learning_rate": 4.3552631578947375e-06, | |
| "loss": 0.4988, | |
| "step": 69 | |
| }, | |
| { | |
| "Batch Mean": -0.322005033493042, | |
| "accuracy": 0.7421875, | |
| "epoch": 0.1725, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.175, | |
| "grad_norm": 8.12278938293457, | |
| "learning_rate": 4.342105263157895e-06, | |
| "loss": 0.5207, | |
| "step": 70 | |
| }, | |
| { | |
| "Batch Mean": -0.3203928768634796, | |
| "accuracy": 0.7265625, | |
| "epoch": 0.175, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.1775, | |
| "grad_norm": 8.622071266174316, | |
| "learning_rate": 4.328947368421053e-06, | |
| "loss": 0.508, | |
| "step": 71 | |
| }, | |
| { | |
| "Batch Mean": -0.11252805590629578, | |
| "accuracy": 0.7578125, | |
| "epoch": 0.1775, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 7.229330062866211, | |
| "learning_rate": 4.315789473684211e-06, | |
| "loss": 0.443, | |
| "step": 72 | |
| }, | |
| { | |
| "Batch Mean": 0.08833515644073486, | |
| "accuracy": 0.828125, | |
| "epoch": 0.18, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.1825, | |
| "grad_norm": 5.973883152008057, | |
| "learning_rate": 4.302631578947369e-06, | |
| "loss": 0.3979, | |
| "step": 73 | |
| }, | |
| { | |
| "Batch Mean": 0.46913909912109375, | |
| "accuracy": 0.7265625, | |
| "epoch": 0.1825, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.185, | |
| "grad_norm": 7.876657485961914, | |
| "learning_rate": 4.289473684210527e-06, | |
| "loss": 0.4785, | |
| "step": 74 | |
| }, | |
| { | |
| "Batch Mean": 0.18231534957885742, | |
| "accuracy": 0.8125, | |
| "epoch": 0.185, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.1875, | |
| "grad_norm": 6.78118371963501, | |
| "learning_rate": 4.276315789473684e-06, | |
| "loss": 0.4094, | |
| "step": 75 | |
| }, | |
| { | |
| "Batch Mean": 0.42796552181243896, | |
| "accuracy": 0.8125, | |
| "epoch": 0.1875, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "grad_norm": 7.038549423217773, | |
| "learning_rate": 4.2631578947368425e-06, | |
| "loss": 0.4018, | |
| "step": 76 | |
| }, | |
| { | |
| "Batch Mean": -0.197587251663208, | |
| "accuracy": 0.7734375, | |
| "epoch": 0.19, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.1925, | |
| "grad_norm": 7.44735050201416, | |
| "learning_rate": 4.25e-06, | |
| "loss": 0.4534, | |
| "step": 77 | |
| }, | |
| { | |
| "Batch Mean": -0.12531863152980804, | |
| "accuracy": 0.7890625, | |
| "epoch": 0.1925, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.195, | |
| "grad_norm": 7.824428081512451, | |
| "learning_rate": 4.236842105263158e-06, | |
| "loss": 0.4709, | |
| "step": 78 | |
| }, | |
| { | |
| "Batch Mean": -0.003929615020751953, | |
| "accuracy": 0.7109375, | |
| "epoch": 0.195, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.1975, | |
| "grad_norm": 8.514533996582031, | |
| "learning_rate": 4.223684210526316e-06, | |
| "loss": 0.5496, | |
| "step": 79 | |
| }, | |
| { | |
| "Batch Mean": 0.02380847930908203, | |
| "accuracy": 0.7578125, | |
| "epoch": 0.1975, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 10.371747970581055, | |
| "learning_rate": 4.210526315789474e-06, | |
| "loss": 0.5056, | |
| "step": 80 | |
| }, | |
| { | |
| "Batch Mean": -0.47749924659729004, | |
| "accuracy": 0.796875, | |
| "epoch": 0.2, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.2025, | |
| "grad_norm": 9.618741989135742, | |
| "learning_rate": 4.197368421052632e-06, | |
| "loss": 0.5261, | |
| "step": 81 | |
| }, | |
| { | |
| "Batch Mean": -0.46868181228637695, | |
| "accuracy": 0.703125, | |
| "epoch": 0.2025, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.205, | |
| "grad_norm": 9.87120246887207, | |
| "learning_rate": 4.18421052631579e-06, | |
| "loss": 0.5131, | |
| "step": 82 | |
| }, | |
| { | |
| "Batch Mean": 0.32471323013305664, | |
| "accuracy": 0.8203125, | |
| "epoch": 0.205, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.2075, | |
| "grad_norm": 7.470169544219971, | |
| "learning_rate": 4.171052631578948e-06, | |
| "loss": 0.3878, | |
| "step": 83 | |
| }, | |
| { | |
| "Batch Mean": 0.13215994834899902, | |
| "accuracy": 0.7890625, | |
| "epoch": 0.2075, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 8.594000816345215, | |
| "learning_rate": 4.157894736842106e-06, | |
| "loss": 0.5226, | |
| "step": 84 | |
| }, | |
| { | |
| "Batch Mean": -0.15986275672912598, | |
| "accuracy": 0.734375, | |
| "epoch": 0.21, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.2125, | |
| "grad_norm": 8.745137214660645, | |
| "learning_rate": 4.144736842105263e-06, | |
| "loss": 0.5466, | |
| "step": 85 | |
| }, | |
| { | |
| "Batch Mean": -0.6324893832206726, | |
| "accuracy": 0.7734375, | |
| "epoch": 0.2125, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.215, | |
| "grad_norm": 9.078672409057617, | |
| "learning_rate": 4.1315789473684216e-06, | |
| "loss": 0.55, | |
| "step": 86 | |
| }, | |
| { | |
| "Batch Mean": 0.31803226470947266, | |
| "accuracy": 0.71875, | |
| "epoch": 0.215, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.2175, | |
| "grad_norm": 6.7177581787109375, | |
| "learning_rate": 4.118421052631579e-06, | |
| "loss": 0.5148, | |
| "step": 87 | |
| }, | |
| { | |
| "Batch Mean": 0.16668707132339478, | |
| "accuracy": 0.7578125, | |
| "epoch": 0.2175, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 6.152680397033691, | |
| "learning_rate": 4.105263157894737e-06, | |
| "loss": 0.5204, | |
| "step": 88 | |
| }, | |
| { | |
| "Batch Mean": -0.18514534831047058, | |
| "accuracy": 0.7578125, | |
| "epoch": 0.22, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.2225, | |
| "grad_norm": 5.66470193862915, | |
| "learning_rate": 4.092105263157895e-06, | |
| "loss": 0.4949, | |
| "step": 89 | |
| }, | |
| { | |
| "Batch Mean": 0.049420952796936035, | |
| "accuracy": 0.7265625, | |
| "epoch": 0.2225, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.225, | |
| "grad_norm": 5.757960796356201, | |
| "learning_rate": 4.078947368421053e-06, | |
| "loss": 0.4673, | |
| "step": 90 | |
| }, | |
| { | |
| "Batch Mean": 0.05896341800689697, | |
| "accuracy": 0.7578125, | |
| "epoch": 0.225, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.2275, | |
| "grad_norm": 5.6377739906311035, | |
| "learning_rate": 4.065789473684211e-06, | |
| "loss": 0.5187, | |
| "step": 91 | |
| }, | |
| { | |
| "Batch Mean": 0.11456716060638428, | |
| "accuracy": 0.75, | |
| "epoch": 0.2275, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 5.526971817016602, | |
| "learning_rate": 4.052631578947368e-06, | |
| "loss": 0.4885, | |
| "step": 92 | |
| }, | |
| { | |
| "Batch Mean": 0.30746984481811523, | |
| "accuracy": 0.796875, | |
| "epoch": 0.23, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.2325, | |
| "grad_norm": 6.554511070251465, | |
| "learning_rate": 4.0394736842105265e-06, | |
| "loss": 0.4602, | |
| "step": 93 | |
| }, | |
| { | |
| "Batch Mean": -0.19295340776443481, | |
| "accuracy": 0.7734375, | |
| "epoch": 0.2325, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.235, | |
| "grad_norm": 6.230556488037109, | |
| "learning_rate": 4.026315789473684e-06, | |
| "loss": 0.484, | |
| "step": 94 | |
| }, | |
| { | |
| "Batch Mean": -0.1081857681274414, | |
| "accuracy": 0.8125, | |
| "epoch": 0.235, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.2375, | |
| "grad_norm": 5.673005104064941, | |
| "learning_rate": 4.013157894736842e-06, | |
| "loss": 0.46, | |
| "step": 95 | |
| }, | |
| { | |
| "Batch Mean": -0.01769113540649414, | |
| "accuracy": 0.7109375, | |
| "epoch": 0.2375, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 6.975053787231445, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 0.5193, | |
| "step": 96 | |
| }, | |
| { | |
| "Batch Mean": 0.018754109740257263, | |
| "accuracy": 0.796875, | |
| "epoch": 0.24, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.2425, | |
| "grad_norm": 7.40658712387085, | |
| "learning_rate": 3.986842105263158e-06, | |
| "loss": 0.4594, | |
| "step": 97 | |
| }, | |
| { | |
| "Batch Mean": 0.1552438735961914, | |
| "accuracy": 0.78125, | |
| "epoch": 0.2425, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.245, | |
| "grad_norm": 8.391383171081543, | |
| "learning_rate": 3.9736842105263165e-06, | |
| "loss": 0.4326, | |
| "step": 98 | |
| }, | |
| { | |
| "Batch Mean": 0.16905894875526428, | |
| "accuracy": 0.8515625, | |
| "epoch": 0.245, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.2475, | |
| "grad_norm": 7.3470611572265625, | |
| "learning_rate": 3.960526315789474e-06, | |
| "loss": 0.3526, | |
| "step": 99 | |
| }, | |
| { | |
| "Batch Mean": 0.19422674179077148, | |
| "accuracy": 0.828125, | |
| "epoch": 0.2475, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 10.861686706542969, | |
| "learning_rate": 3.947368421052632e-06, | |
| "loss": 0.4482, | |
| "step": 100 | |
| }, | |
| { | |
| "Batch Mean": -0.07992172241210938, | |
| "accuracy": 0.7734375, | |
| "epoch": 0.25, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.2525, | |
| "grad_norm": 8.720736503601074, | |
| "learning_rate": 3.93421052631579e-06, | |
| "loss": 0.4643, | |
| "step": 101 | |
| }, | |
| { | |
| "Batch Mean": -0.08377265930175781, | |
| "accuracy": 0.796875, | |
| "epoch": 0.2525, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.255, | |
| "grad_norm": 9.04084300994873, | |
| "learning_rate": 3.921052631578947e-06, | |
| "loss": 0.4507, | |
| "step": 102 | |
| }, | |
| { | |
| "Batch Mean": -0.3490009307861328, | |
| "accuracy": 0.7421875, | |
| "epoch": 0.255, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.2575, | |
| "grad_norm": 11.579154968261719, | |
| "learning_rate": 3.907894736842106e-06, | |
| "loss": 0.5122, | |
| "step": 103 | |
| }, | |
| { | |
| "Batch Mean": -0.06490540504455566, | |
| "accuracy": 0.7265625, | |
| "epoch": 0.2575, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 10.022109031677246, | |
| "learning_rate": 3.894736842105263e-06, | |
| "loss": 0.4819, | |
| "step": 104 | |
| }, | |
| { | |
| "Batch Mean": -0.022978782653808594, | |
| "accuracy": 0.75, | |
| "epoch": 0.26, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.2625, | |
| "grad_norm": 8.935430526733398, | |
| "learning_rate": 3.8815789473684214e-06, | |
| "loss": 0.4774, | |
| "step": 105 | |
| }, | |
| { | |
| "Batch Mean": 0.007501125335693359, | |
| "accuracy": 0.7890625, | |
| "epoch": 0.2625, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.265, | |
| "grad_norm": 9.447911262512207, | |
| "learning_rate": 3.868421052631579e-06, | |
| "loss": 0.4426, | |
| "step": 106 | |
| }, | |
| { | |
| "Batch Mean": 0.17902326583862305, | |
| "accuracy": 0.7734375, | |
| "epoch": 0.265, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.2675, | |
| "grad_norm": 9.446606636047363, | |
| "learning_rate": 3.855263157894737e-06, | |
| "loss": 0.467, | |
| "step": 107 | |
| }, | |
| { | |
| "Batch Mean": -0.18947899341583252, | |
| "accuracy": 0.8046875, | |
| "epoch": 0.2675, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "grad_norm": 8.709321022033691, | |
| "learning_rate": 3.842105263157895e-06, | |
| "loss": 0.4297, | |
| "step": 108 | |
| }, | |
| { | |
| "Batch Mean": -0.4087619185447693, | |
| "accuracy": 0.84375, | |
| "epoch": 0.27, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.2725, | |
| "grad_norm": 9.074426651000977, | |
| "learning_rate": 3.828947368421053e-06, | |
| "loss": 0.3943, | |
| "step": 109 | |
| }, | |
| { | |
| "Batch Mean": 0.566619873046875, | |
| "accuracy": 0.8359375, | |
| "epoch": 0.2725, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.275, | |
| "grad_norm": 11.140929222106934, | |
| "learning_rate": 3.815789473684211e-06, | |
| "loss": 0.4923, | |
| "step": 110 | |
| }, | |
| { | |
| "Batch Mean": 0.041108131408691406, | |
| "accuracy": 0.796875, | |
| "epoch": 0.275, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.2775, | |
| "grad_norm": 8.454427719116211, | |
| "learning_rate": 3.802631578947369e-06, | |
| "loss": 0.4595, | |
| "step": 111 | |
| }, | |
| { | |
| "Batch Mean": 0.012408018112182617, | |
| "accuracy": 0.7421875, | |
| "epoch": 0.2775, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 8.899197578430176, | |
| "learning_rate": 3.789473684210527e-06, | |
| "loss": 0.4782, | |
| "step": 112 | |
| }, | |
| { | |
| "Batch Mean": 0.23875045776367188, | |
| "accuracy": 0.828125, | |
| "epoch": 0.28, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.2825, | |
| "grad_norm": 7.254844665527344, | |
| "learning_rate": 3.7763157894736847e-06, | |
| "loss": 0.4074, | |
| "step": 113 | |
| }, | |
| { | |
| "Batch Mean": -0.07475662231445312, | |
| "accuracy": 0.7890625, | |
| "epoch": 0.2825, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.285, | |
| "grad_norm": 9.057879447937012, | |
| "learning_rate": 3.7631578947368426e-06, | |
| "loss": 0.504, | |
| "step": 114 | |
| }, | |
| { | |
| "Batch Mean": 0.00018590688705444336, | |
| "accuracy": 0.7734375, | |
| "epoch": 0.285, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.2875, | |
| "grad_norm": 7.1575517654418945, | |
| "learning_rate": 3.7500000000000005e-06, | |
| "loss": 0.457, | |
| "step": 115 | |
| }, | |
| { | |
| "Batch Mean": -0.312822163105011, | |
| "accuracy": 0.796875, | |
| "epoch": 0.2875, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "grad_norm": 7.315279960632324, | |
| "learning_rate": 3.736842105263158e-06, | |
| "loss": 0.4325, | |
| "step": 116 | |
| }, | |
| { | |
| "Batch Mean": -0.2054152488708496, | |
| "accuracy": 0.7578125, | |
| "epoch": 0.29, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.2925, | |
| "grad_norm": 8.820938110351562, | |
| "learning_rate": 3.723684210526316e-06, | |
| "loss": 0.5494, | |
| "step": 117 | |
| }, | |
| { | |
| "Batch Mean": 0.07627412676811218, | |
| "accuracy": 0.734375, | |
| "epoch": 0.2925, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.295, | |
| "grad_norm": 8.194517135620117, | |
| "learning_rate": 3.710526315789474e-06, | |
| "loss": 0.5238, | |
| "step": 118 | |
| }, | |
| { | |
| "Batch Mean": 0.30588626861572266, | |
| "accuracy": 0.71875, | |
| "epoch": 0.295, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.2975, | |
| "grad_norm": 7.1583943367004395, | |
| "learning_rate": 3.6973684210526317e-06, | |
| "loss": 0.4738, | |
| "step": 119 | |
| }, | |
| { | |
| "Batch Mean": 0.5761882066726685, | |
| "accuracy": 0.7734375, | |
| "epoch": 0.2975, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 7.513785362243652, | |
| "learning_rate": 3.6842105263157896e-06, | |
| "loss": 0.4443, | |
| "step": 120 | |
| }, | |
| { | |
| "Batch Mean": 0.08078157901763916, | |
| "accuracy": 0.765625, | |
| "epoch": 0.3, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.3025, | |
| "grad_norm": 7.000987529754639, | |
| "learning_rate": 3.6710526315789476e-06, | |
| "loss": 0.4616, | |
| "step": 121 | |
| }, | |
| { | |
| "Batch Mean": -0.43543756008148193, | |
| "accuracy": 0.78125, | |
| "epoch": 0.3025, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.305, | |
| "grad_norm": 7.152343273162842, | |
| "learning_rate": 3.657894736842106e-06, | |
| "loss": 0.4843, | |
| "step": 122 | |
| }, | |
| { | |
| "Batch Mean": -0.19835996627807617, | |
| "accuracy": 0.859375, | |
| "epoch": 0.305, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.3075, | |
| "grad_norm": 6.46798038482666, | |
| "learning_rate": 3.644736842105264e-06, | |
| "loss": 0.4245, | |
| "step": 123 | |
| }, | |
| { | |
| "Batch Mean": -0.09520387649536133, | |
| "accuracy": 0.7578125, | |
| "epoch": 0.3075, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "grad_norm": 6.794553279876709, | |
| "learning_rate": 3.6315789473684217e-06, | |
| "loss": 0.4915, | |
| "step": 124 | |
| }, | |
| { | |
| "Batch Mean": 0.07479727268218994, | |
| "accuracy": 0.7890625, | |
| "epoch": 0.31, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.3125, | |
| "grad_norm": 6.4064459800720215, | |
| "learning_rate": 3.618421052631579e-06, | |
| "loss": 0.4949, | |
| "step": 125 | |
| }, | |
| { | |
| "Batch Mean": 0.30538392066955566, | |
| "accuracy": 0.859375, | |
| "epoch": 0.3125, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.315, | |
| "grad_norm": 6.305266380310059, | |
| "learning_rate": 3.605263157894737e-06, | |
| "loss": 0.412, | |
| "step": 126 | |
| }, | |
| { | |
| "Batch Mean": 0.07359528541564941, | |
| "accuracy": 0.8125, | |
| "epoch": 0.315, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.3175, | |
| "grad_norm": 6.956873416900635, | |
| "learning_rate": 3.592105263157895e-06, | |
| "loss": 0.4185, | |
| "step": 127 | |
| }, | |
| { | |
| "Batch Mean": -0.3063009977340698, | |
| "accuracy": 0.7890625, | |
| "epoch": 0.3175, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 7.109605312347412, | |
| "learning_rate": 3.578947368421053e-06, | |
| "loss": 0.4776, | |
| "step": 128 | |
| }, | |
| { | |
| "Batch Mean": -0.3017888069152832, | |
| "accuracy": 0.7265625, | |
| "epoch": 0.32, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.3225, | |
| "grad_norm": 8.304880142211914, | |
| "learning_rate": 3.565789473684211e-06, | |
| "loss": 0.4996, | |
| "step": 129 | |
| }, | |
| { | |
| "Batch Mean": -0.014749407768249512, | |
| "accuracy": 0.828125, | |
| "epoch": 0.3225, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.325, | |
| "grad_norm": 7.905064105987549, | |
| "learning_rate": 3.5526315789473687e-06, | |
| "loss": 0.4126, | |
| "step": 130 | |
| }, | |
| { | |
| "Batch Mean": 0.015563011169433594, | |
| "accuracy": 0.8046875, | |
| "epoch": 0.325, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.3275, | |
| "grad_norm": 8.412541389465332, | |
| "learning_rate": 3.5394736842105266e-06, | |
| "loss": 0.4277, | |
| "step": 131 | |
| }, | |
| { | |
| "Batch Mean": 0.7716255187988281, | |
| "accuracy": 0.7421875, | |
| "epoch": 0.3275, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "grad_norm": 11.674385070800781, | |
| "learning_rate": 3.5263157894736846e-06, | |
| "loss": 0.517, | |
| "step": 132 | |
| }, | |
| { | |
| "Batch Mean": 0.5991384983062744, | |
| "accuracy": 0.7421875, | |
| "epoch": 0.33, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.3325, | |
| "grad_norm": 9.975981712341309, | |
| "learning_rate": 3.513157894736842e-06, | |
| "loss": 0.4847, | |
| "step": 133 | |
| }, | |
| { | |
| "Batch Mean": 0.21936583518981934, | |
| "accuracy": 0.7421875, | |
| "epoch": 0.3325, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.335, | |
| "grad_norm": 10.040966987609863, | |
| "learning_rate": 3.5e-06, | |
| "loss": 0.527, | |
| "step": 134 | |
| }, | |
| { | |
| "Batch Mean": -0.5911221504211426, | |
| "accuracy": 0.828125, | |
| "epoch": 0.335, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.3375, | |
| "grad_norm": 8.332611083984375, | |
| "learning_rate": 3.486842105263158e-06, | |
| "loss": 0.4008, | |
| "step": 135 | |
| }, | |
| { | |
| "Batch Mean": -0.3432912826538086, | |
| "accuracy": 0.765625, | |
| "epoch": 0.3375, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "grad_norm": 7.913636207580566, | |
| "learning_rate": 3.473684210526316e-06, | |
| "loss": 0.4786, | |
| "step": 136 | |
| }, | |
| { | |
| "Batch Mean": -0.656095027923584, | |
| "accuracy": 0.7421875, | |
| "epoch": 0.34, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.3425, | |
| "grad_norm": 9.470314979553223, | |
| "learning_rate": 3.460526315789474e-06, | |
| "loss": 0.5443, | |
| "step": 137 | |
| }, | |
| { | |
| "Batch Mean": -0.42609643936157227, | |
| "accuracy": 0.7890625, | |
| "epoch": 0.3425, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.345, | |
| "grad_norm": 8.210694313049316, | |
| "learning_rate": 3.447368421052632e-06, | |
| "loss": 0.4324, | |
| "step": 138 | |
| }, | |
| { | |
| "Batch Mean": 0.05267333984375, | |
| "accuracy": 0.7890625, | |
| "epoch": 0.345, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.3475, | |
| "grad_norm": 8.18636417388916, | |
| "learning_rate": 3.43421052631579e-06, | |
| "loss": 0.4713, | |
| "step": 139 | |
| }, | |
| { | |
| "Batch Mean": 0.38585996627807617, | |
| "accuracy": 0.7890625, | |
| "epoch": 0.3475, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 7.745424747467041, | |
| "learning_rate": 3.421052631578948e-06, | |
| "loss": 0.4434, | |
| "step": 140 | |
| }, | |
| { | |
| "Batch Mean": 0.41099733114242554, | |
| "accuracy": 0.796875, | |
| "epoch": 0.35, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.3525, | |
| "grad_norm": 7.906560897827148, | |
| "learning_rate": 3.4078947368421057e-06, | |
| "loss": 0.4393, | |
| "step": 141 | |
| }, | |
| { | |
| "Batch Mean": 0.1317775845527649, | |
| "accuracy": 0.734375, | |
| "epoch": 0.3525, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.355, | |
| "grad_norm": 7.590117931365967, | |
| "learning_rate": 3.3947368421052636e-06, | |
| "loss": 0.5016, | |
| "step": 142 | |
| }, | |
| { | |
| "Batch Mean": 0.015200823545455933, | |
| "accuracy": 0.828125, | |
| "epoch": 0.355, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.3575, | |
| "grad_norm": 6.288278102874756, | |
| "learning_rate": 3.381578947368421e-06, | |
| "loss": 0.4034, | |
| "step": 143 | |
| }, | |
| { | |
| "Batch Mean": -0.5611648559570312, | |
| "accuracy": 0.796875, | |
| "epoch": 0.3575, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 8.634895324707031, | |
| "learning_rate": 3.368421052631579e-06, | |
| "loss": 0.4923, | |
| "step": 144 | |
| }, | |
| { | |
| "Batch Mean": -0.27208590507507324, | |
| "accuracy": 0.8203125, | |
| "epoch": 0.36, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.3625, | |
| "grad_norm": 8.025900840759277, | |
| "learning_rate": 3.355263157894737e-06, | |
| "loss": 0.4137, | |
| "step": 145 | |
| }, | |
| { | |
| "Batch Mean": 0.2424778938293457, | |
| "accuracy": 0.8125, | |
| "epoch": 0.3625, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.365, | |
| "grad_norm": 6.871817111968994, | |
| "learning_rate": 3.342105263157895e-06, | |
| "loss": 0.4357, | |
| "step": 146 | |
| }, | |
| { | |
| "Batch Mean": 0.12324929237365723, | |
| "accuracy": 0.796875, | |
| "epoch": 0.365, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.3675, | |
| "grad_norm": 6.921773910522461, | |
| "learning_rate": 3.3289473684210528e-06, | |
| "loss": 0.4154, | |
| "step": 147 | |
| }, | |
| { | |
| "Batch Mean": 0.3229856491088867, | |
| "accuracy": 0.8359375, | |
| "epoch": 0.3675, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "grad_norm": 6.962841033935547, | |
| "learning_rate": 3.3157894736842107e-06, | |
| "loss": 0.4052, | |
| "step": 148 | |
| }, | |
| { | |
| "Batch Mean": 0.32460731267929077, | |
| "accuracy": 0.765625, | |
| "epoch": 0.37, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.3725, | |
| "grad_norm": 7.69809627532959, | |
| "learning_rate": 3.302631578947369e-06, | |
| "loss": 0.4672, | |
| "step": 149 | |
| }, | |
| { | |
| "Batch Mean": 0.09575581550598145, | |
| "accuracy": 0.8046875, | |
| "epoch": 0.3725, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.375, | |
| "grad_norm": 6.439456462860107, | |
| "learning_rate": 3.289473684210527e-06, | |
| "loss": 0.4318, | |
| "step": 150 | |
| }, | |
| { | |
| "Batch Mean": -0.04370218515396118, | |
| "accuracy": 0.765625, | |
| "epoch": 0.375, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.3775, | |
| "grad_norm": 8.02590274810791, | |
| "learning_rate": 3.276315789473685e-06, | |
| "loss": 0.4976, | |
| "step": 151 | |
| }, | |
| { | |
| "Batch Mean": -0.019139885902404785, | |
| "accuracy": 0.7734375, | |
| "epoch": 0.3775, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 8.893292427062988, | |
| "learning_rate": 3.2631578947368423e-06, | |
| "loss": 0.5053, | |
| "step": 152 | |
| }, | |
| { | |
| "Batch Mean": -0.4432401657104492, | |
| "accuracy": 0.8515625, | |
| "epoch": 0.38, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.3825, | |
| "grad_norm": 7.497973918914795, | |
| "learning_rate": 3.2500000000000002e-06, | |
| "loss": 0.3261, | |
| "step": 153 | |
| }, | |
| { | |
| "Batch Mean": -0.724576473236084, | |
| "accuracy": 0.8046875, | |
| "epoch": 0.3825, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.385, | |
| "grad_norm": 8.990057945251465, | |
| "learning_rate": 3.236842105263158e-06, | |
| "loss": 0.3831, | |
| "step": 154 | |
| }, | |
| { | |
| "Batch Mean": 0.053240299224853516, | |
| "accuracy": 0.7421875, | |
| "epoch": 0.385, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.3875, | |
| "grad_norm": 8.894330978393555, | |
| "learning_rate": 3.223684210526316e-06, | |
| "loss": 0.4804, | |
| "step": 155 | |
| }, | |
| { | |
| "Batch Mean": 0.14265680313110352, | |
| "accuracy": 0.7890625, | |
| "epoch": 0.3875, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "grad_norm": 8.702239990234375, | |
| "learning_rate": 3.210526315789474e-06, | |
| "loss": 0.4734, | |
| "step": 156 | |
| }, | |
| { | |
| "Batch Mean": 0.5876922607421875, | |
| "accuracy": 0.7734375, | |
| "epoch": 0.39, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.3925, | |
| "grad_norm": 10.919489860534668, | |
| "learning_rate": 3.197368421052632e-06, | |
| "loss": 0.4808, | |
| "step": 157 | |
| }, | |
| { | |
| "Batch Mean": 0.5518760681152344, | |
| "accuracy": 0.7578125, | |
| "epoch": 0.3925, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.395, | |
| "grad_norm": 11.542296409606934, | |
| "learning_rate": 3.1842105263157898e-06, | |
| "loss": 0.4762, | |
| "step": 158 | |
| }, | |
| { | |
| "Batch Mean": 0.15475153923034668, | |
| "accuracy": 0.796875, | |
| "epoch": 0.395, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.3975, | |
| "grad_norm": 8.576542854309082, | |
| "learning_rate": 3.1710526315789477e-06, | |
| "loss": 0.4403, | |
| "step": 159 | |
| }, | |
| { | |
| "Batch Mean": -0.13883423805236816, | |
| "accuracy": 0.7109375, | |
| "epoch": 0.3975, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 11.454021453857422, | |
| "learning_rate": 3.157894736842105e-06, | |
| "loss": 0.5884, | |
| "step": 160 | |
| }, | |
| { | |
| "Batch Mean": -0.8513237833976746, | |
| "accuracy": 0.796875, | |
| "epoch": 0.4, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.4025, | |
| "grad_norm": 10.49366283416748, | |
| "learning_rate": 3.144736842105263e-06, | |
| "loss": 0.4538, | |
| "step": 161 | |
| }, | |
| { | |
| "Batch Mean": -0.8935723304748535, | |
| "accuracy": 0.765625, | |
| "epoch": 0.4025, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.405, | |
| "grad_norm": 10.27230453491211, | |
| "learning_rate": 3.131578947368421e-06, | |
| "loss": 0.5234, | |
| "step": 162 | |
| }, | |
| { | |
| "Batch Mean": -0.6798567771911621, | |
| "accuracy": 0.7578125, | |
| "epoch": 0.405, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.4075, | |
| "grad_norm": 9.556315422058105, | |
| "learning_rate": 3.1184210526315793e-06, | |
| "loss": 0.4783, | |
| "step": 163 | |
| }, | |
| { | |
| "Batch Mean": 0.3221316337585449, | |
| "accuracy": 0.78125, | |
| "epoch": 0.4075, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "grad_norm": 6.531960487365723, | |
| "learning_rate": 3.1052631578947372e-06, | |
| "loss": 0.4008, | |
| "step": 164 | |
| }, | |
| { | |
| "Batch Mean": 0.6966171264648438, | |
| "accuracy": 0.7890625, | |
| "epoch": 0.41, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.4125, | |
| "grad_norm": 8.115531921386719, | |
| "learning_rate": 3.092105263157895e-06, | |
| "loss": 0.4372, | |
| "step": 165 | |
| }, | |
| { | |
| "Batch Mean": 0.23778432607650757, | |
| "accuracy": 0.7578125, | |
| "epoch": 0.4125, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.415, | |
| "grad_norm": 7.085214614868164, | |
| "learning_rate": 3.078947368421053e-06, | |
| "loss": 0.4541, | |
| "step": 166 | |
| }, | |
| { | |
| "Batch Mean": 0.04204574227333069, | |
| "accuracy": 0.796875, | |
| "epoch": 0.415, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.4175, | |
| "grad_norm": 6.353330612182617, | |
| "learning_rate": 3.065789473684211e-06, | |
| "loss": 0.4495, | |
| "step": 167 | |
| }, | |
| { | |
| "Batch Mean": 0.166695237159729, | |
| "accuracy": 0.859375, | |
| "epoch": 0.4175, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 6.295947551727295, | |
| "learning_rate": 3.052631578947369e-06, | |
| "loss": 0.3927, | |
| "step": 168 | |
| }, | |
| { | |
| "Batch Mean": -0.11499762535095215, | |
| "accuracy": 0.7890625, | |
| "epoch": 0.42, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.4225, | |
| "grad_norm": 7.001240253448486, | |
| "learning_rate": 3.0394736842105268e-06, | |
| "loss": 0.455, | |
| "step": 169 | |
| }, | |
| { | |
| "Batch Mean": 0.0780038833618164, | |
| "accuracy": 0.7890625, | |
| "epoch": 0.4225, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.425, | |
| "grad_norm": 7.080629348754883, | |
| "learning_rate": 3.0263157894736843e-06, | |
| "loss": 0.4478, | |
| "step": 170 | |
| }, | |
| { | |
| "Batch Mean": -0.05321967601776123, | |
| "accuracy": 0.8203125, | |
| "epoch": 0.425, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.4275, | |
| "grad_norm": 7.381290435791016, | |
| "learning_rate": 3.013157894736842e-06, | |
| "loss": 0.4053, | |
| "step": 171 | |
| }, | |
| { | |
| "Batch Mean": -0.3578749895095825, | |
| "accuracy": 0.8046875, | |
| "epoch": 0.4275, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "grad_norm": 8.297618865966797, | |
| "learning_rate": 3e-06, | |
| "loss": 0.4019, | |
| "step": 172 | |
| }, | |
| { | |
| "Batch Mean": 0.11733776330947876, | |
| "accuracy": 0.796875, | |
| "epoch": 0.43, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.4325, | |
| "grad_norm": 7.648263454437256, | |
| "learning_rate": 2.986842105263158e-06, | |
| "loss": 0.4597, | |
| "step": 173 | |
| }, | |
| { | |
| "Batch Mean": 0.357774019241333, | |
| "accuracy": 0.75, | |
| "epoch": 0.4325, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.435, | |
| "grad_norm": 9.065951347351074, | |
| "learning_rate": 2.973684210526316e-06, | |
| "loss": 0.5486, | |
| "step": 174 | |
| }, | |
| { | |
| "Batch Mean": 0.36174535751342773, | |
| "accuracy": 0.7578125, | |
| "epoch": 0.435, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.4375, | |
| "grad_norm": 9.51435375213623, | |
| "learning_rate": 2.960526315789474e-06, | |
| "loss": 0.5019, | |
| "step": 175 | |
| }, | |
| { | |
| "Batch Mean": 0.046081721782684326, | |
| "accuracy": 0.8125, | |
| "epoch": 0.4375, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 7.350387096405029, | |
| "learning_rate": 2.9473684210526317e-06, | |
| "loss": 0.4045, | |
| "step": 176 | |
| }, | |
| { | |
| "Batch Mean": -0.42429447174072266, | |
| "accuracy": 0.8203125, | |
| "epoch": 0.44, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.4425, | |
| "grad_norm": 8.008047103881836, | |
| "learning_rate": 2.93421052631579e-06, | |
| "loss": 0.3562, | |
| "step": 177 | |
| }, | |
| { | |
| "Batch Mean": -0.2927101254463196, | |
| "accuracy": 0.8515625, | |
| "epoch": 0.4425, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.445, | |
| "grad_norm": 7.387500286102295, | |
| "learning_rate": 2.921052631578948e-06, | |
| "loss": 0.3909, | |
| "step": 178 | |
| }, | |
| { | |
| "Batch Mean": 0.04245138168334961, | |
| "accuracy": 0.7890625, | |
| "epoch": 0.445, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.4475, | |
| "grad_norm": 8.440096855163574, | |
| "learning_rate": 2.907894736842106e-06, | |
| "loss": 0.4496, | |
| "step": 179 | |
| }, | |
| { | |
| "Batch Mean": 0.24954700469970703, | |
| "accuracy": 0.78125, | |
| "epoch": 0.4475, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 8.415813446044922, | |
| "learning_rate": 2.8947368421052634e-06, | |
| "loss": 0.4481, | |
| "step": 180 | |
| }, | |
| { | |
| "Batch Mean": 0.27982330322265625, | |
| "accuracy": 0.8125, | |
| "epoch": 0.45, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.4525, | |
| "grad_norm": 7.720165729522705, | |
| "learning_rate": 2.8815789473684213e-06, | |
| "loss": 0.3969, | |
| "step": 181 | |
| }, | |
| { | |
| "Batch Mean": 0.16068685054779053, | |
| "accuracy": 0.7890625, | |
| "epoch": 0.4525, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.455, | |
| "grad_norm": 7.793689250946045, | |
| "learning_rate": 2.868421052631579e-06, | |
| "loss": 0.4422, | |
| "step": 182 | |
| }, | |
| { | |
| "Batch Mean": -0.15400493144989014, | |
| "accuracy": 0.796875, | |
| "epoch": 0.455, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.4575, | |
| "grad_norm": 7.5626983642578125, | |
| "learning_rate": 2.855263157894737e-06, | |
| "loss": 0.4099, | |
| "step": 183 | |
| }, | |
| { | |
| "Batch Mean": -0.12253165245056152, | |
| "accuracy": 0.7734375, | |
| "epoch": 0.4575, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 8.620087623596191, | |
| "learning_rate": 2.842105263157895e-06, | |
| "loss": 0.4243, | |
| "step": 184 | |
| }, | |
| { | |
| "Batch Mean": -0.4097585678100586, | |
| "accuracy": 0.7578125, | |
| "epoch": 0.46, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.4625, | |
| "grad_norm": 9.832942008972168, | |
| "learning_rate": 2.828947368421053e-06, | |
| "loss": 0.4886, | |
| "step": 185 | |
| }, | |
| { | |
| "Batch Mean": 0.03511929512023926, | |
| "accuracy": 0.8125, | |
| "epoch": 0.4625, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.465, | |
| "grad_norm": 7.654294967651367, | |
| "learning_rate": 2.815789473684211e-06, | |
| "loss": 0.4481, | |
| "step": 186 | |
| }, | |
| { | |
| "Batch Mean": 0.09729552268981934, | |
| "accuracy": 0.8203125, | |
| "epoch": 0.465, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.4675, | |
| "grad_norm": 7.023317337036133, | |
| "learning_rate": 2.8026315789473683e-06, | |
| "loss": 0.3595, | |
| "step": 187 | |
| }, | |
| { | |
| "Batch Mean": 0.33197975158691406, | |
| "accuracy": 0.796875, | |
| "epoch": 0.4675, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "grad_norm": 8.742325782775879, | |
| "learning_rate": 2.789473684210526e-06, | |
| "loss": 0.4665, | |
| "step": 188 | |
| }, | |
| { | |
| "Batch Mean": 0.25560569763183594, | |
| "accuracy": 0.75, | |
| "epoch": 0.47, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.4725, | |
| "grad_norm": 11.000374794006348, | |
| "learning_rate": 2.776315789473684e-06, | |
| "loss": 0.4986, | |
| "step": 189 | |
| }, | |
| { | |
| "Batch Mean": -0.2584996223449707, | |
| "accuracy": 0.7578125, | |
| "epoch": 0.4725, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.475, | |
| "grad_norm": 7.9717936515808105, | |
| "learning_rate": 2.7631578947368424e-06, | |
| "loss": 0.4161, | |
| "step": 190 | |
| }, | |
| { | |
| "Batch Mean": -0.22836577892303467, | |
| "accuracy": 0.7890625, | |
| "epoch": 0.475, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.4775, | |
| "grad_norm": 7.443301677703857, | |
| "learning_rate": 2.7500000000000004e-06, | |
| "loss": 0.4265, | |
| "step": 191 | |
| }, | |
| { | |
| "Batch Mean": -0.3825557231903076, | |
| "accuracy": 0.84375, | |
| "epoch": 0.4775, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 7.77215051651001, | |
| "learning_rate": 2.7368421052631583e-06, | |
| "loss": 0.3963, | |
| "step": 192 | |
| }, | |
| { | |
| "Batch Mean": -0.021443605422973633, | |
| "accuracy": 0.8046875, | |
| "epoch": 0.48, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.4825, | |
| "grad_norm": 7.788425922393799, | |
| "learning_rate": 2.723684210526316e-06, | |
| "loss": 0.4281, | |
| "step": 193 | |
| }, | |
| { | |
| "Batch Mean": -0.04027605056762695, | |
| "accuracy": 0.828125, | |
| "epoch": 0.4825, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.485, | |
| "grad_norm": 8.327126502990723, | |
| "learning_rate": 2.710526315789474e-06, | |
| "loss": 0.394, | |
| "step": 194 | |
| }, | |
| { | |
| "Batch Mean": 0.13981229066848755, | |
| "accuracy": 0.796875, | |
| "epoch": 0.485, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.4875, | |
| "grad_norm": 7.412836074829102, | |
| "learning_rate": 2.697368421052632e-06, | |
| "loss": 0.4357, | |
| "step": 195 | |
| }, | |
| { | |
| "Batch Mean": 0.4760279655456543, | |
| "accuracy": 0.8203125, | |
| "epoch": 0.4875, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "grad_norm": 9.25843334197998, | |
| "learning_rate": 2.68421052631579e-06, | |
| "loss": 0.3884, | |
| "step": 196 | |
| }, | |
| { | |
| "Batch Mean": 0.3217476010322571, | |
| "accuracy": 0.7109375, | |
| "epoch": 0.49, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.4925, | |
| "grad_norm": 9.074698448181152, | |
| "learning_rate": 2.6710526315789474e-06, | |
| "loss": 0.4952, | |
| "step": 197 | |
| }, | |
| { | |
| "Batch Mean": 0.5415520668029785, | |
| "accuracy": 0.7734375, | |
| "epoch": 0.4925, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.495, | |
| "grad_norm": 9.3367338180542, | |
| "learning_rate": 2.6578947368421053e-06, | |
| "loss": 0.4183, | |
| "step": 198 | |
| }, | |
| { | |
| "Batch Mean": -0.03570103645324707, | |
| "accuracy": 0.7890625, | |
| "epoch": 0.495, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.4975, | |
| "grad_norm": 6.996811389923096, | |
| "learning_rate": 2.644736842105263e-06, | |
| "loss": 0.4322, | |
| "step": 199 | |
| }, | |
| { | |
| "Batch Mean": -0.32998645305633545, | |
| "accuracy": 0.890625, | |
| "epoch": 0.4975, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 8.221077919006348, | |
| "learning_rate": 2.631578947368421e-06, | |
| "loss": 0.3345, | |
| "step": 200 | |
| }, | |
| { | |
| "Batch Mean": -0.454451322555542, | |
| "accuracy": 0.84375, | |
| "epoch": 0.5, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.5025, | |
| "grad_norm": 8.876556396484375, | |
| "learning_rate": 2.618421052631579e-06, | |
| "loss": 0.4244, | |
| "step": 201 | |
| }, | |
| { | |
| "Batch Mean": -0.2097405195236206, | |
| "accuracy": 0.8359375, | |
| "epoch": 0.5025, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.505, | |
| "grad_norm": 7.442080497741699, | |
| "learning_rate": 2.605263157894737e-06, | |
| "loss": 0.3356, | |
| "step": 202 | |
| }, | |
| { | |
| "Batch Mean": 0.26877450942993164, | |
| "accuracy": 0.828125, | |
| "epoch": 0.505, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.5075, | |
| "grad_norm": 8.254768371582031, | |
| "learning_rate": 2.592105263157895e-06, | |
| "loss": 0.3804, | |
| "step": 203 | |
| }, | |
| { | |
| "Batch Mean": 0.39564692974090576, | |
| "accuracy": 0.828125, | |
| "epoch": 0.5075, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "grad_norm": 9.268996238708496, | |
| "learning_rate": 2.578947368421053e-06, | |
| "loss": 0.3466, | |
| "step": 204 | |
| }, | |
| { | |
| "Batch Mean": 0.49916696548461914, | |
| "accuracy": 0.7734375, | |
| "epoch": 0.51, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.5125, | |
| "grad_norm": 9.833324432373047, | |
| "learning_rate": 2.565789473684211e-06, | |
| "loss": 0.4497, | |
| "step": 205 | |
| }, | |
| { | |
| "Batch Mean": 0.49285316467285156, | |
| "accuracy": 0.7421875, | |
| "epoch": 0.5125, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.515, | |
| "grad_norm": 11.675568580627441, | |
| "learning_rate": 2.552631578947369e-06, | |
| "loss": 0.5267, | |
| "step": 206 | |
| }, | |
| { | |
| "Batch Mean": -0.10904598236083984, | |
| "accuracy": 0.7890625, | |
| "epoch": 0.515, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.5175, | |
| "grad_norm": 8.038030624389648, | |
| "learning_rate": 2.5394736842105265e-06, | |
| "loss": 0.4061, | |
| "step": 207 | |
| }, | |
| { | |
| "Batch Mean": -0.701776921749115, | |
| "accuracy": 0.796875, | |
| "epoch": 0.5175, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 10.525407791137695, | |
| "learning_rate": 2.5263157894736844e-06, | |
| "loss": 0.4593, | |
| "step": 208 | |
| }, | |
| { | |
| "Batch Mean": -0.8417069911956787, | |
| "accuracy": 0.8515625, | |
| "epoch": 0.52, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.5225, | |
| "grad_norm": 10.60543441772461, | |
| "learning_rate": 2.5131578947368423e-06, | |
| "loss": 0.3632, | |
| "step": 209 | |
| }, | |
| { | |
| "Batch Mean": -0.4368765354156494, | |
| "accuracy": 0.78125, | |
| "epoch": 0.5225, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.525, | |
| "grad_norm": 8.699065208435059, | |
| "learning_rate": 2.5e-06, | |
| "loss": 0.4433, | |
| "step": 210 | |
| }, | |
| { | |
| "Batch Mean": -0.13661432266235352, | |
| "accuracy": 0.7890625, | |
| "epoch": 0.525, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.5275, | |
| "grad_norm": 7.995443344116211, | |
| "learning_rate": 2.486842105263158e-06, | |
| "loss": 0.4005, | |
| "step": 211 | |
| }, | |
| { | |
| "Batch Mean": 0.3625812530517578, | |
| "accuracy": 0.734375, | |
| "epoch": 0.5275, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "grad_norm": 10.416319847106934, | |
| "learning_rate": 2.473684210526316e-06, | |
| "loss": 0.4979, | |
| "step": 212 | |
| }, | |
| { | |
| "Batch Mean": 0.37776803970336914, | |
| "accuracy": 0.7734375, | |
| "epoch": 0.53, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.5325, | |
| "grad_norm": 10.456337928771973, | |
| "learning_rate": 2.460526315789474e-06, | |
| "loss": 0.4785, | |
| "step": 213 | |
| }, | |
| { | |
| "Batch Mean": 0.38120055198669434, | |
| "accuracy": 0.7890625, | |
| "epoch": 0.5325, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.535, | |
| "grad_norm": 10.987972259521484, | |
| "learning_rate": 2.447368421052632e-06, | |
| "loss": 0.4453, | |
| "step": 214 | |
| }, | |
| { | |
| "Batch Mean": 0.45799708366394043, | |
| "accuracy": 0.8125, | |
| "epoch": 0.535, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.5375, | |
| "grad_norm": 8.45465087890625, | |
| "learning_rate": 2.4342105263157898e-06, | |
| "loss": 0.3826, | |
| "step": 215 | |
| }, | |
| { | |
| "Batch Mean": 0.27211108803749084, | |
| "accuracy": 0.796875, | |
| "epoch": 0.5375, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 8.310362815856934, | |
| "learning_rate": 2.4210526315789477e-06, | |
| "loss": 0.4275, | |
| "step": 216 | |
| }, | |
| { | |
| "Batch Mean": -0.30036449432373047, | |
| "accuracy": 0.84375, | |
| "epoch": 0.54, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.5425, | |
| "grad_norm": 7.409989833831787, | |
| "learning_rate": 2.4078947368421056e-06, | |
| "loss": 0.3731, | |
| "step": 217 | |
| }, | |
| { | |
| "Batch Mean": -0.8127567768096924, | |
| "accuracy": 0.765625, | |
| "epoch": 0.5425, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.545, | |
| "grad_norm": 10.259916305541992, | |
| "learning_rate": 2.3947368421052635e-06, | |
| "loss": 0.4895, | |
| "step": 218 | |
| }, | |
| { | |
| "Batch Mean": -0.6803557872772217, | |
| "accuracy": 0.8203125, | |
| "epoch": 0.545, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.5475, | |
| "grad_norm": 9.909168243408203, | |
| "learning_rate": 2.381578947368421e-06, | |
| "loss": 0.4225, | |
| "step": 219 | |
| }, | |
| { | |
| "Batch Mean": -0.4241673946380615, | |
| "accuracy": 0.84375, | |
| "epoch": 0.5475, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "grad_norm": 8.27176284790039, | |
| "learning_rate": 2.368421052631579e-06, | |
| "loss": 0.3686, | |
| "step": 220 | |
| }, | |
| { | |
| "Batch Mean": -0.0004788041114807129, | |
| "accuracy": 0.828125, | |
| "epoch": 0.55, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.5525, | |
| "grad_norm": 7.069624423980713, | |
| "learning_rate": 2.355263157894737e-06, | |
| "loss": 0.3807, | |
| "step": 221 | |
| }, | |
| { | |
| "Batch Mean": 0.6189688444137573, | |
| "accuracy": 0.796875, | |
| "epoch": 0.5525, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.555, | |
| "grad_norm": 9.617613792419434, | |
| "learning_rate": 2.342105263157895e-06, | |
| "loss": 0.4403, | |
| "step": 222 | |
| }, | |
| { | |
| "Batch Mean": 0.8222973346710205, | |
| "accuracy": 0.8125, | |
| "epoch": 0.555, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.5575, | |
| "grad_norm": 11.406815528869629, | |
| "learning_rate": 2.328947368421053e-06, | |
| "loss": 0.4297, | |
| "step": 223 | |
| }, | |
| { | |
| "Batch Mean": 0.6735987663269043, | |
| "accuracy": 0.7265625, | |
| "epoch": 0.5575, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 9.760802268981934, | |
| "learning_rate": 2.3157894736842105e-06, | |
| "loss": 0.4976, | |
| "step": 224 | |
| }, | |
| { | |
| "Batch Mean": 0.4071592092514038, | |
| "accuracy": 0.8125, | |
| "epoch": 0.56, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.5625, | |
| "grad_norm": 8.573234558105469, | |
| "learning_rate": 2.3026315789473684e-06, | |
| "loss": 0.4259, | |
| "step": 225 | |
| }, | |
| { | |
| "Batch Mean": -0.2441411018371582, | |
| "accuracy": 0.8125, | |
| "epoch": 0.5625, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.565, | |
| "grad_norm": 7.475236415863037, | |
| "learning_rate": 2.2894736842105263e-06, | |
| "loss": 0.3659, | |
| "step": 226 | |
| }, | |
| { | |
| "Batch Mean": -0.4010552167892456, | |
| "accuracy": 0.7890625, | |
| "epoch": 0.565, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.5675, | |
| "grad_norm": 9.071084022521973, | |
| "learning_rate": 2.2763157894736847e-06, | |
| "loss": 0.4443, | |
| "step": 227 | |
| }, | |
| { | |
| "Batch Mean": -0.6741750240325928, | |
| "accuracy": 0.7890625, | |
| "epoch": 0.5675, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "grad_norm": 9.611616134643555, | |
| "learning_rate": 2.2631578947368426e-06, | |
| "loss": 0.4298, | |
| "step": 228 | |
| }, | |
| { | |
| "Batch Mean": -0.41678524017333984, | |
| "accuracy": 0.7421875, | |
| "epoch": 0.57, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.5725, | |
| "grad_norm": 8.71789836883545, | |
| "learning_rate": 2.25e-06, | |
| "loss": 0.5002, | |
| "step": 229 | |
| }, | |
| { | |
| "Batch Mean": -0.2588639259338379, | |
| "accuracy": 0.7890625, | |
| "epoch": 0.5725, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.575, | |
| "grad_norm": 7.758385181427002, | |
| "learning_rate": 2.236842105263158e-06, | |
| "loss": 0.445, | |
| "step": 230 | |
| }, | |
| { | |
| "Batch Mean": 0.010074615478515625, | |
| "accuracy": 0.84375, | |
| "epoch": 0.575, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.5775, | |
| "grad_norm": 6.772731781005859, | |
| "learning_rate": 2.223684210526316e-06, | |
| "loss": 0.3435, | |
| "step": 231 | |
| }, | |
| { | |
| "Batch Mean": 0.18453282117843628, | |
| "accuracy": 0.7578125, | |
| "epoch": 0.5775, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 9.391664505004883, | |
| "learning_rate": 2.2105263157894738e-06, | |
| "loss": 0.5119, | |
| "step": 232 | |
| }, | |
| { | |
| "Batch Mean": 0.1386091113090515, | |
| "accuracy": 0.875, | |
| "epoch": 0.58, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.5825, | |
| "grad_norm": 6.733539581298828, | |
| "learning_rate": 2.1973684210526317e-06, | |
| "loss": 0.3336, | |
| "step": 233 | |
| }, | |
| { | |
| "Batch Mean": 0.481150358915329, | |
| "accuracy": 0.7890625, | |
| "epoch": 0.5825, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.585, | |
| "grad_norm": 7.957646369934082, | |
| "learning_rate": 2.1842105263157896e-06, | |
| "loss": 0.4015, | |
| "step": 234 | |
| }, | |
| { | |
| "Batch Mean": 0.2404952049255371, | |
| "accuracy": 0.828125, | |
| "epoch": 0.585, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.5875, | |
| "grad_norm": 7.541924476623535, | |
| "learning_rate": 2.1710526315789475e-06, | |
| "loss": 0.3652, | |
| "step": 235 | |
| }, | |
| { | |
| "Batch Mean": 0.1457509994506836, | |
| "accuracy": 0.7421875, | |
| "epoch": 0.5875, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "grad_norm": 7.633518695831299, | |
| "learning_rate": 2.1578947368421054e-06, | |
| "loss": 0.4396, | |
| "step": 236 | |
| }, | |
| { | |
| "Batch Mean": -0.007049560546875, | |
| "accuracy": 0.8125, | |
| "epoch": 0.59, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.5925, | |
| "grad_norm": 9.89782428741455, | |
| "learning_rate": 2.1447368421052633e-06, | |
| "loss": 0.4291, | |
| "step": 237 | |
| }, | |
| { | |
| "Batch Mean": 0.09562346339225769, | |
| "accuracy": 0.8046875, | |
| "epoch": 0.5925, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.595, | |
| "grad_norm": 7.834791660308838, | |
| "learning_rate": 2.1315789473684212e-06, | |
| "loss": 0.4193, | |
| "step": 238 | |
| }, | |
| { | |
| "Batch Mean": -0.13153940439224243, | |
| "accuracy": 0.7421875, | |
| "epoch": 0.595, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.5975, | |
| "grad_norm": 10.504973411560059, | |
| "learning_rate": 2.118421052631579e-06, | |
| "loss": 0.5003, | |
| "step": 239 | |
| }, | |
| { | |
| "Batch Mean": -0.06204497814178467, | |
| "accuracy": 0.8515625, | |
| "epoch": 0.5975, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 7.466853141784668, | |
| "learning_rate": 2.105263157894737e-06, | |
| "loss": 0.3526, | |
| "step": 240 | |
| }, | |
| { | |
| "Batch Mean": -0.0694296658039093, | |
| "accuracy": 0.8046875, | |
| "epoch": 0.6, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.6025, | |
| "grad_norm": 8.141443252563477, | |
| "learning_rate": 2.092105263157895e-06, | |
| "loss": 0.4294, | |
| "step": 241 | |
| }, | |
| { | |
| "Batch Mean": -0.13682937622070312, | |
| "accuracy": 0.796875, | |
| "epoch": 0.6025, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.605, | |
| "grad_norm": 9.132308959960938, | |
| "learning_rate": 2.078947368421053e-06, | |
| "loss": 0.4273, | |
| "step": 242 | |
| }, | |
| { | |
| "Batch Mean": -0.13353228569030762, | |
| "accuracy": 0.8125, | |
| "epoch": 0.605, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.6075, | |
| "grad_norm": 10.082789421081543, | |
| "learning_rate": 2.0657894736842108e-06, | |
| "loss": 0.4547, | |
| "step": 243 | |
| }, | |
| { | |
| "Batch Mean": 0.11115974187850952, | |
| "accuracy": 0.8359375, | |
| "epoch": 0.6075, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "grad_norm": 9.633417129516602, | |
| "learning_rate": 2.0526315789473687e-06, | |
| "loss": 0.4577, | |
| "step": 244 | |
| }, | |
| { | |
| "Batch Mean": 0.2002342939376831, | |
| "accuracy": 0.8046875, | |
| "epoch": 0.61, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.6125, | |
| "grad_norm": 8.547195434570312, | |
| "learning_rate": 2.0394736842105266e-06, | |
| "loss": 0.4327, | |
| "step": 245 | |
| }, | |
| { | |
| "Batch Mean": 0.39250755310058594, | |
| "accuracy": 0.828125, | |
| "epoch": 0.6125, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.615, | |
| "grad_norm": 9.968074798583984, | |
| "learning_rate": 2.026315789473684e-06, | |
| "loss": 0.427, | |
| "step": 246 | |
| }, | |
| { | |
| "Batch Mean": -0.38251250982284546, | |
| "accuracy": 0.84375, | |
| "epoch": 0.615, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.6175, | |
| "grad_norm": 8.198342323303223, | |
| "learning_rate": 2.013157894736842e-06, | |
| "loss": 0.3526, | |
| "step": 247 | |
| }, | |
| { | |
| "Batch Mean": -0.25481224060058594, | |
| "accuracy": 0.765625, | |
| "epoch": 0.6175, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "grad_norm": 9.115253448486328, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 0.4641, | |
| "step": 248 | |
| }, | |
| { | |
| "Batch Mean": 0.19320857524871826, | |
| "accuracy": 0.84375, | |
| "epoch": 0.62, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.6225, | |
| "grad_norm": 8.824867248535156, | |
| "learning_rate": 1.9868421052631582e-06, | |
| "loss": 0.4034, | |
| "step": 249 | |
| }, | |
| { | |
| "Batch Mean": -0.10393857955932617, | |
| "accuracy": 0.8203125, | |
| "epoch": 0.6225, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.625, | |
| "grad_norm": 7.717092037200928, | |
| "learning_rate": 1.973684210526316e-06, | |
| "loss": 0.4, | |
| "step": 250 | |
| }, | |
| { | |
| "Batch Mean": 0.06804251670837402, | |
| "accuracy": 0.8203125, | |
| "epoch": 0.625, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.6275, | |
| "grad_norm": 8.48812484741211, | |
| "learning_rate": 1.9605263157894736e-06, | |
| "loss": 0.404, | |
| "step": 251 | |
| }, | |
| { | |
| "Batch Mean": -0.3888711929321289, | |
| "accuracy": 0.7890625, | |
| "epoch": 0.6275, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "grad_norm": 9.107695579528809, | |
| "learning_rate": 1.9473684210526315e-06, | |
| "loss": 0.4279, | |
| "step": 252 | |
| }, | |
| { | |
| "Batch Mean": 0.06354683637619019, | |
| "accuracy": 0.7890625, | |
| "epoch": 0.63, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.6325, | |
| "grad_norm": 8.300797462463379, | |
| "learning_rate": 1.9342105263157895e-06, | |
| "loss": 0.356, | |
| "step": 253 | |
| }, | |
| { | |
| "Batch Mean": 0.33930039405822754, | |
| "accuracy": 0.796875, | |
| "epoch": 0.6325, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.635, | |
| "grad_norm": 10.072660446166992, | |
| "learning_rate": 1.9210526315789474e-06, | |
| "loss": 0.4488, | |
| "step": 254 | |
| }, | |
| { | |
| "Batch Mean": 0.3767620921134949, | |
| "accuracy": 0.78125, | |
| "epoch": 0.635, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.6375, | |
| "grad_norm": 8.744544982910156, | |
| "learning_rate": 1.9078947368421057e-06, | |
| "loss": 0.4223, | |
| "step": 255 | |
| }, | |
| { | |
| "Batch Mean": 0.11081337928771973, | |
| "accuracy": 0.796875, | |
| "epoch": 0.6375, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 7.397643089294434, | |
| "learning_rate": 1.8947368421052634e-06, | |
| "loss": 0.4484, | |
| "step": 256 | |
| }, | |
| { | |
| "Batch Mean": -0.30564993619918823, | |
| "accuracy": 0.828125, | |
| "epoch": 0.64, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.6425, | |
| "grad_norm": 7.535917282104492, | |
| "learning_rate": 1.8815789473684213e-06, | |
| "loss": 0.3804, | |
| "step": 257 | |
| }, | |
| { | |
| "Batch Mean": -0.25523144006729126, | |
| "accuracy": 0.796875, | |
| "epoch": 0.6425, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.645, | |
| "grad_norm": 8.543533325195312, | |
| "learning_rate": 1.868421052631579e-06, | |
| "loss": 0.4219, | |
| "step": 258 | |
| }, | |
| { | |
| "Batch Mean": -0.17939138412475586, | |
| "accuracy": 0.78125, | |
| "epoch": 0.645, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.6475, | |
| "grad_norm": 8.032143592834473, | |
| "learning_rate": 1.855263157894737e-06, | |
| "loss": 0.3749, | |
| "step": 259 | |
| }, | |
| { | |
| "Batch Mean": -0.18132537603378296, | |
| "accuracy": 0.7734375, | |
| "epoch": 0.6475, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "grad_norm": 8.4529390335083, | |
| "learning_rate": 1.8421052631578948e-06, | |
| "loss": 0.4415, | |
| "step": 260 | |
| }, | |
| { | |
| "Batch Mean": 0.049228668212890625, | |
| "accuracy": 0.8046875, | |
| "epoch": 0.65, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.6525, | |
| "grad_norm": 8.111146926879883, | |
| "learning_rate": 1.828947368421053e-06, | |
| "loss": 0.4154, | |
| "step": 261 | |
| }, | |
| { | |
| "Batch Mean": 0.48825645446777344, | |
| "accuracy": 0.828125, | |
| "epoch": 0.6525, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.655, | |
| "grad_norm": 9.155312538146973, | |
| "learning_rate": 1.8157894736842109e-06, | |
| "loss": 0.4255, | |
| "step": 262 | |
| }, | |
| { | |
| "Batch Mean": 0.15371739864349365, | |
| "accuracy": 0.78125, | |
| "epoch": 0.655, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.6575, | |
| "grad_norm": 7.977846145629883, | |
| "learning_rate": 1.8026315789473685e-06, | |
| "loss": 0.4265, | |
| "step": 263 | |
| }, | |
| { | |
| "Batch Mean": 0.014174580574035645, | |
| "accuracy": 0.796875, | |
| "epoch": 0.6575, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "grad_norm": 7.430598258972168, | |
| "learning_rate": 1.7894736842105265e-06, | |
| "loss": 0.4238, | |
| "step": 264 | |
| }, | |
| { | |
| "Batch Mean": -0.22507664561271667, | |
| "accuracy": 0.78125, | |
| "epoch": 0.66, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.6625, | |
| "grad_norm": 8.024470329284668, | |
| "learning_rate": 1.7763157894736844e-06, | |
| "loss": 0.4393, | |
| "step": 265 | |
| }, | |
| { | |
| "Batch Mean": -0.24839115142822266, | |
| "accuracy": 0.7578125, | |
| "epoch": 0.6625, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.665, | |
| "grad_norm": 8.834558486938477, | |
| "learning_rate": 1.7631578947368423e-06, | |
| "loss": 0.4494, | |
| "step": 266 | |
| }, | |
| { | |
| "Batch Mean": -0.36356449127197266, | |
| "accuracy": 0.8671875, | |
| "epoch": 0.665, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.6675, | |
| "grad_norm": 8.237932205200195, | |
| "learning_rate": 1.75e-06, | |
| "loss": 0.3271, | |
| "step": 267 | |
| }, | |
| { | |
| "Batch Mean": -0.46784067153930664, | |
| "accuracy": 0.8125, | |
| "epoch": 0.6675, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "grad_norm": 8.771154403686523, | |
| "learning_rate": 1.736842105263158e-06, | |
| "loss": 0.3859, | |
| "step": 268 | |
| }, | |
| { | |
| "Batch Mean": 0.11419010162353516, | |
| "accuracy": 0.8359375, | |
| "epoch": 0.67, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.6725, | |
| "grad_norm": 8.358805656433105, | |
| "learning_rate": 1.723684210526316e-06, | |
| "loss": 0.3841, | |
| "step": 269 | |
| }, | |
| { | |
| "Batch Mean": 0.41220295429229736, | |
| "accuracy": 0.734375, | |
| "epoch": 0.6725, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.675, | |
| "grad_norm": 10.508111953735352, | |
| "learning_rate": 1.710526315789474e-06, | |
| "loss": 0.5026, | |
| "step": 270 | |
| }, | |
| { | |
| "Batch Mean": 0.3799011707305908, | |
| "accuracy": 0.75, | |
| "epoch": 0.675, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.6775, | |
| "grad_norm": 9.485908508300781, | |
| "learning_rate": 1.6973684210526318e-06, | |
| "loss": 0.4428, | |
| "step": 271 | |
| }, | |
| { | |
| "Batch Mean": 0.309903621673584, | |
| "accuracy": 0.8203125, | |
| "epoch": 0.6775, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 8.043779373168945, | |
| "learning_rate": 1.6842105263157895e-06, | |
| "loss": 0.397, | |
| "step": 272 | |
| }, | |
| { | |
| "Batch Mean": 0.4066788852214813, | |
| "accuracy": 0.796875, | |
| "epoch": 0.68, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.6825, | |
| "grad_norm": 8.586115837097168, | |
| "learning_rate": 1.6710526315789474e-06, | |
| "loss": 0.4127, | |
| "step": 273 | |
| }, | |
| { | |
| "Batch Mean": -0.16961884498596191, | |
| "accuracy": 0.8046875, | |
| "epoch": 0.6825, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.685, | |
| "grad_norm": 8.598859786987305, | |
| "learning_rate": 1.6578947368421053e-06, | |
| "loss": 0.4208, | |
| "step": 274 | |
| }, | |
| { | |
| "Batch Mean": -0.5758038759231567, | |
| "accuracy": 0.8203125, | |
| "epoch": 0.685, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.6875, | |
| "grad_norm": 9.626463890075684, | |
| "learning_rate": 1.6447368421052635e-06, | |
| "loss": 0.4272, | |
| "step": 275 | |
| }, | |
| { | |
| "Batch Mean": -0.19902324676513672, | |
| "accuracy": 0.828125, | |
| "epoch": 0.6875, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 10.618395805358887, | |
| "learning_rate": 1.6315789473684212e-06, | |
| "loss": 0.4061, | |
| "step": 276 | |
| }, | |
| { | |
| "Batch Mean": 0.022523045539855957, | |
| "accuracy": 0.7421875, | |
| "epoch": 0.69, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.6925, | |
| "grad_norm": 11.426433563232422, | |
| "learning_rate": 1.618421052631579e-06, | |
| "loss": 0.5282, | |
| "step": 277 | |
| }, | |
| { | |
| "Batch Mean": 0.432889461517334, | |
| "accuracy": 0.8203125, | |
| "epoch": 0.6925, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.695, | |
| "grad_norm": 9.978400230407715, | |
| "learning_rate": 1.605263157894737e-06, | |
| "loss": 0.4552, | |
| "step": 278 | |
| }, | |
| { | |
| "Batch Mean": 0.17058849334716797, | |
| "accuracy": 0.84375, | |
| "epoch": 0.695, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.6975, | |
| "grad_norm": 8.175233840942383, | |
| "learning_rate": 1.5921052631578949e-06, | |
| "loss": 0.3381, | |
| "step": 279 | |
| }, | |
| { | |
| "Batch Mean": -0.03832387924194336, | |
| "accuracy": 0.828125, | |
| "epoch": 0.6975, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 8.693676948547363, | |
| "learning_rate": 1.5789473684210526e-06, | |
| "loss": 0.4224, | |
| "step": 280 | |
| }, | |
| { | |
| "Batch Mean": -0.013722419738769531, | |
| "accuracy": 0.8359375, | |
| "epoch": 0.7, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.7025, | |
| "grad_norm": 9.704010009765625, | |
| "learning_rate": 1.5657894736842105e-06, | |
| "loss": 0.4005, | |
| "step": 281 | |
| }, | |
| { | |
| "Batch Mean": -0.050084829330444336, | |
| "accuracy": 0.7109375, | |
| "epoch": 0.7025, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.705, | |
| "grad_norm": 11.404817581176758, | |
| "learning_rate": 1.5526315789473686e-06, | |
| "loss": 0.547, | |
| "step": 282 | |
| }, | |
| { | |
| "Batch Mean": -0.14900946617126465, | |
| "accuracy": 0.8125, | |
| "epoch": 0.705, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.7075, | |
| "grad_norm": 9.11642837524414, | |
| "learning_rate": 1.5394736842105265e-06, | |
| "loss": 0.435, | |
| "step": 283 | |
| }, | |
| { | |
| "Batch Mean": -0.03146529197692871, | |
| "accuracy": 0.78125, | |
| "epoch": 0.7075, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "grad_norm": 8.562140464782715, | |
| "learning_rate": 1.5263157894736844e-06, | |
| "loss": 0.3905, | |
| "step": 284 | |
| }, | |
| { | |
| "Batch Mean": -0.028424382209777832, | |
| "accuracy": 0.828125, | |
| "epoch": 0.71, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.7125, | |
| "grad_norm": 9.128016471862793, | |
| "learning_rate": 1.5131578947368421e-06, | |
| "loss": 0.4346, | |
| "step": 285 | |
| }, | |
| { | |
| "Batch Mean": 0.03724491596221924, | |
| "accuracy": 0.7890625, | |
| "epoch": 0.7125, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.715, | |
| "grad_norm": 9.865401268005371, | |
| "learning_rate": 1.5e-06, | |
| "loss": 0.4305, | |
| "step": 286 | |
| }, | |
| { | |
| "Batch Mean": -0.45960211753845215, | |
| "accuracy": 0.7890625, | |
| "epoch": 0.715, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.7175, | |
| "grad_norm": 9.03154182434082, | |
| "learning_rate": 1.486842105263158e-06, | |
| "loss": 0.4051, | |
| "step": 287 | |
| }, | |
| { | |
| "Batch Mean": -0.15180978178977966, | |
| "accuracy": 0.8359375, | |
| "epoch": 0.7175, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 7.9355340003967285, | |
| "learning_rate": 1.4736842105263159e-06, | |
| "loss": 0.345, | |
| "step": 288 | |
| }, | |
| { | |
| "Batch Mean": -0.21271443367004395, | |
| "accuracy": 0.78125, | |
| "epoch": 0.72, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.7225, | |
| "grad_norm": 9.927572250366211, | |
| "learning_rate": 1.460526315789474e-06, | |
| "loss": 0.4839, | |
| "step": 289 | |
| }, | |
| { | |
| "Batch Mean": 0.05137157440185547, | |
| "accuracy": 0.7890625, | |
| "epoch": 0.7225, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.725, | |
| "grad_norm": 8.293874740600586, | |
| "learning_rate": 1.4473684210526317e-06, | |
| "loss": 0.403, | |
| "step": 290 | |
| }, | |
| { | |
| "Batch Mean": 0.052190959453582764, | |
| "accuracy": 0.703125, | |
| "epoch": 0.725, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.7275, | |
| "grad_norm": 9.438029289245605, | |
| "learning_rate": 1.4342105263157896e-06, | |
| "loss": 0.5022, | |
| "step": 291 | |
| }, | |
| { | |
| "Batch Mean": 0.45049190521240234, | |
| "accuracy": 0.8515625, | |
| "epoch": 0.7275, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "grad_norm": 7.8294596672058105, | |
| "learning_rate": 1.4210526315789475e-06, | |
| "loss": 0.3471, | |
| "step": 292 | |
| }, | |
| { | |
| "Batch Mean": 0.3938368558883667, | |
| "accuracy": 0.8203125, | |
| "epoch": 0.73, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.7325, | |
| "grad_norm": 8.612286567687988, | |
| "learning_rate": 1.4078947368421054e-06, | |
| "loss": 0.4202, | |
| "step": 293 | |
| }, | |
| { | |
| "Batch Mean": 0.05513477325439453, | |
| "accuracy": 0.7890625, | |
| "epoch": 0.7325, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.735, | |
| "grad_norm": 9.447745323181152, | |
| "learning_rate": 1.394736842105263e-06, | |
| "loss": 0.4307, | |
| "step": 294 | |
| }, | |
| { | |
| "Batch Mean": 0.05935770273208618, | |
| "accuracy": 0.78125, | |
| "epoch": 0.735, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.7375, | |
| "grad_norm": 9.84406566619873, | |
| "learning_rate": 1.3815789473684212e-06, | |
| "loss": 0.4867, | |
| "step": 295 | |
| }, | |
| { | |
| "Batch Mean": -0.09174442291259766, | |
| "accuracy": 0.78125, | |
| "epoch": 0.7375, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "grad_norm": 9.831645965576172, | |
| "learning_rate": 1.3684210526315791e-06, | |
| "loss": 0.4862, | |
| "step": 296 | |
| }, | |
| { | |
| "Batch Mean": -0.24367213249206543, | |
| "accuracy": 0.8125, | |
| "epoch": 0.74, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.7425, | |
| "grad_norm": 8.379569053649902, | |
| "learning_rate": 1.355263157894737e-06, | |
| "loss": 0.4052, | |
| "step": 297 | |
| }, | |
| { | |
| "Batch Mean": 0.005710422992706299, | |
| "accuracy": 0.7578125, | |
| "epoch": 0.7425, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.745, | |
| "grad_norm": 8.809996604919434, | |
| "learning_rate": 1.342105263157895e-06, | |
| "loss": 0.4804, | |
| "step": 298 | |
| }, | |
| { | |
| "Batch Mean": 0.01293325424194336, | |
| "accuracy": 0.7890625, | |
| "epoch": 0.745, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.7475, | |
| "grad_norm": 8.814909934997559, | |
| "learning_rate": 1.3289473684210526e-06, | |
| "loss": 0.4347, | |
| "step": 299 | |
| }, | |
| { | |
| "Batch Mean": 0.24314022064208984, | |
| "accuracy": 0.78125, | |
| "epoch": 0.7475, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 9.272860527038574, | |
| "learning_rate": 1.3157894736842106e-06, | |
| "loss": 0.4438, | |
| "step": 300 | |
| }, | |
| { | |
| "Batch Mean": -0.2619137763977051, | |
| "accuracy": 0.8046875, | |
| "epoch": 0.75, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.7525, | |
| "grad_norm": 7.985296726226807, | |
| "learning_rate": 1.3026315789473685e-06, | |
| "loss": 0.3883, | |
| "step": 301 | |
| }, | |
| { | |
| "Batch Mean": -0.2615363597869873, | |
| "accuracy": 0.8359375, | |
| "epoch": 0.7525, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.755, | |
| "grad_norm": 8.971776008605957, | |
| "learning_rate": 1.2894736842105266e-06, | |
| "loss": 0.3891, | |
| "step": 302 | |
| }, | |
| { | |
| "Batch Mean": -0.12707805633544922, | |
| "accuracy": 0.84375, | |
| "epoch": 0.755, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.7575, | |
| "grad_norm": 7.982441425323486, | |
| "learning_rate": 1.2763157894736845e-06, | |
| "loss": 0.378, | |
| "step": 303 | |
| }, | |
| { | |
| "Batch Mean": -0.11236917972564697, | |
| "accuracy": 0.796875, | |
| "epoch": 0.7575, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 7.220173358917236, | |
| "learning_rate": 1.2631578947368422e-06, | |
| "loss": 0.3652, | |
| "step": 304 | |
| }, | |
| { | |
| "Batch Mean": 0.16874241828918457, | |
| "accuracy": 0.8046875, | |
| "epoch": 0.76, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.7625, | |
| "grad_norm": 9.30842399597168, | |
| "learning_rate": 1.25e-06, | |
| "loss": 0.429, | |
| "step": 305 | |
| }, | |
| { | |
| "Batch Mean": 0.17092442512512207, | |
| "accuracy": 0.8515625, | |
| "epoch": 0.7625, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.765, | |
| "grad_norm": 7.298190116882324, | |
| "learning_rate": 1.236842105263158e-06, | |
| "loss": 0.3211, | |
| "step": 306 | |
| }, | |
| { | |
| "Batch Mean": 0.16017365455627441, | |
| "accuracy": 0.8359375, | |
| "epoch": 0.765, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.7675, | |
| "grad_norm": 8.668152809143066, | |
| "learning_rate": 1.223684210526316e-06, | |
| "loss": 0.4018, | |
| "step": 307 | |
| }, | |
| { | |
| "Batch Mean": 0.130733460187912, | |
| "accuracy": 0.8046875, | |
| "epoch": 0.7675, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "grad_norm": 8.445825576782227, | |
| "learning_rate": 1.2105263157894738e-06, | |
| "loss": 0.4339, | |
| "step": 308 | |
| }, | |
| { | |
| "Batch Mean": 0.05145275592803955, | |
| "accuracy": 0.8515625, | |
| "epoch": 0.77, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.7725, | |
| "grad_norm": 8.41434383392334, | |
| "learning_rate": 1.1973684210526317e-06, | |
| "loss": 0.3792, | |
| "step": 309 | |
| }, | |
| { | |
| "Batch Mean": 0.3817669153213501, | |
| "accuracy": 0.7734375, | |
| "epoch": 0.7725, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.775, | |
| "grad_norm": 9.84103012084961, | |
| "learning_rate": 1.1842105263157894e-06, | |
| "loss": 0.4371, | |
| "step": 310 | |
| }, | |
| { | |
| "Batch Mean": -0.058230042457580566, | |
| "accuracy": 0.796875, | |
| "epoch": 0.775, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.7775, | |
| "grad_norm": 8.7118558883667, | |
| "learning_rate": 1.1710526315789476e-06, | |
| "loss": 0.4407, | |
| "step": 311 | |
| }, | |
| { | |
| "Batch Mean": -0.25270986557006836, | |
| "accuracy": 0.84375, | |
| "epoch": 0.7775, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 8.176555633544922, | |
| "learning_rate": 1.1578947368421053e-06, | |
| "loss": 0.3644, | |
| "step": 312 | |
| }, | |
| { | |
| "Batch Mean": 0.10413014888763428, | |
| "accuracy": 0.796875, | |
| "epoch": 0.78, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.7825, | |
| "grad_norm": 10.638689994812012, | |
| "learning_rate": 1.1447368421052632e-06, | |
| "loss": 0.479, | |
| "step": 313 | |
| }, | |
| { | |
| "Batch Mean": -0.3255608081817627, | |
| "accuracy": 0.8046875, | |
| "epoch": 0.7825, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.785, | |
| "grad_norm": 8.78596019744873, | |
| "learning_rate": 1.1315789473684213e-06, | |
| "loss": 0.4162, | |
| "step": 314 | |
| }, | |
| { | |
| "Batch Mean": -0.3200516700744629, | |
| "accuracy": 0.7578125, | |
| "epoch": 0.785, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.7875, | |
| "grad_norm": 10.12102222442627, | |
| "learning_rate": 1.118421052631579e-06, | |
| "loss": 0.4531, | |
| "step": 315 | |
| }, | |
| { | |
| "Batch Mean": -0.23560285568237305, | |
| "accuracy": 0.8046875, | |
| "epoch": 0.7875, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "grad_norm": 10.315141677856445, | |
| "learning_rate": 1.1052631578947369e-06, | |
| "loss": 0.415, | |
| "step": 316 | |
| }, | |
| { | |
| "Batch Mean": -0.13549411296844482, | |
| "accuracy": 0.765625, | |
| "epoch": 0.79, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.7925, | |
| "grad_norm": 8.994793891906738, | |
| "learning_rate": 1.0921052631578948e-06, | |
| "loss": 0.4708, | |
| "step": 317 | |
| }, | |
| { | |
| "Batch Mean": 0.10734868049621582, | |
| "accuracy": 0.828125, | |
| "epoch": 0.7925, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.795, | |
| "grad_norm": 8.644208908081055, | |
| "learning_rate": 1.0789473684210527e-06, | |
| "loss": 0.4297, | |
| "step": 318 | |
| }, | |
| { | |
| "Batch Mean": -0.10696744918823242, | |
| "accuracy": 0.7890625, | |
| "epoch": 0.795, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.7975, | |
| "grad_norm": 8.902405738830566, | |
| "learning_rate": 1.0657894736842106e-06, | |
| "loss": 0.3664, | |
| "step": 319 | |
| }, | |
| { | |
| "Batch Mean": -0.06384849548339844, | |
| "accuracy": 0.75, | |
| "epoch": 0.7975, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 8.689393043518066, | |
| "learning_rate": 1.0526315789473685e-06, | |
| "loss": 0.4301, | |
| "step": 320 | |
| }, | |
| { | |
| "Batch Mean": 0.18018341064453125, | |
| "accuracy": 0.796875, | |
| "epoch": 0.8, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.8025, | |
| "grad_norm": 8.126524925231934, | |
| "learning_rate": 1.0394736842105264e-06, | |
| "loss": 0.3606, | |
| "step": 321 | |
| }, | |
| { | |
| "Batch Mean": 0.5397379398345947, | |
| "accuracy": 0.8671875, | |
| "epoch": 0.8025, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.805, | |
| "grad_norm": 9.469841003417969, | |
| "learning_rate": 1.0263157894736843e-06, | |
| "loss": 0.3225, | |
| "step": 322 | |
| }, | |
| { | |
| "Batch Mean": 0.03603154420852661, | |
| "accuracy": 0.890625, | |
| "epoch": 0.805, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.8075, | |
| "grad_norm": 7.566306114196777, | |
| "learning_rate": 1.013157894736842e-06, | |
| "loss": 0.3548, | |
| "step": 323 | |
| }, | |
| { | |
| "Batch Mean": 0.2926754951477051, | |
| "accuracy": 0.7734375, | |
| "epoch": 0.8075, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "grad_norm": 10.339540481567383, | |
| "learning_rate": 1.0000000000000002e-06, | |
| "loss": 0.4225, | |
| "step": 324 | |
| }, | |
| { | |
| "Batch Mean": 0.01955336332321167, | |
| "accuracy": 0.796875, | |
| "epoch": 0.81, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.8125, | |
| "grad_norm": 9.412261009216309, | |
| "learning_rate": 9.86842105263158e-07, | |
| "loss": 0.3916, | |
| "step": 325 | |
| }, | |
| { | |
| "Batch Mean": 0.38174504041671753, | |
| "accuracy": 0.75, | |
| "epoch": 0.8125, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.815, | |
| "grad_norm": 9.061002731323242, | |
| "learning_rate": 9.736842105263158e-07, | |
| "loss": 0.4265, | |
| "step": 326 | |
| }, | |
| { | |
| "Batch Mean": 0.11910462379455566, | |
| "accuracy": 0.8046875, | |
| "epoch": 0.815, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.8175, | |
| "grad_norm": 7.671031951904297, | |
| "learning_rate": 9.605263157894737e-07, | |
| "loss": 0.3923, | |
| "step": 327 | |
| }, | |
| { | |
| "Batch Mean": -0.25702619552612305, | |
| "accuracy": 0.8359375, | |
| "epoch": 0.8175, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 8.123684883117676, | |
| "learning_rate": 9.473684210526317e-07, | |
| "loss": 0.3677, | |
| "step": 328 | |
| }, | |
| { | |
| "Batch Mean": -0.21103650331497192, | |
| "accuracy": 0.8828125, | |
| "epoch": 0.82, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.8225, | |
| "grad_norm": 7.562531471252441, | |
| "learning_rate": 9.342105263157895e-07, | |
| "loss": 0.3264, | |
| "step": 329 | |
| }, | |
| { | |
| "Batch Mean": -0.2516193389892578, | |
| "accuracy": 0.7890625, | |
| "epoch": 0.8225, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.825, | |
| "grad_norm": 8.672572135925293, | |
| "learning_rate": 9.210526315789474e-07, | |
| "loss": 0.3817, | |
| "step": 330 | |
| }, | |
| { | |
| "Batch Mean": -0.26069092750549316, | |
| "accuracy": 0.7109375, | |
| "epoch": 0.825, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.8275, | |
| "grad_norm": 10.793391227722168, | |
| "learning_rate": 9.078947368421054e-07, | |
| "loss": 0.5323, | |
| "step": 331 | |
| }, | |
| { | |
| "Batch Mean": 0.05710291862487793, | |
| "accuracy": 0.8671875, | |
| "epoch": 0.8275, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "grad_norm": 7.65609073638916, | |
| "learning_rate": 8.947368421052632e-07, | |
| "loss": 0.3615, | |
| "step": 332 | |
| }, | |
| { | |
| "Batch Mean": -0.23122763633728027, | |
| "accuracy": 0.7734375, | |
| "epoch": 0.83, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.8325, | |
| "grad_norm": 9.912604331970215, | |
| "learning_rate": 8.815789473684211e-07, | |
| "loss": 0.4349, | |
| "step": 333 | |
| }, | |
| { | |
| "Batch Mean": -0.3970230519771576, | |
| "accuracy": 0.7578125, | |
| "epoch": 0.8325, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.835, | |
| "grad_norm": 9.435818672180176, | |
| "learning_rate": 8.68421052631579e-07, | |
| "loss": 0.4854, | |
| "step": 334 | |
| }, | |
| { | |
| "Batch Mean": 0.00642848014831543, | |
| "accuracy": 0.7734375, | |
| "epoch": 0.835, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.8375, | |
| "grad_norm": 9.625823020935059, | |
| "learning_rate": 8.55263157894737e-07, | |
| "loss": 0.4557, | |
| "step": 335 | |
| }, | |
| { | |
| "Batch Mean": -0.1357579231262207, | |
| "accuracy": 0.75, | |
| "epoch": 0.8375, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 11.37505054473877, | |
| "learning_rate": 8.421052631578948e-07, | |
| "loss": 0.5473, | |
| "step": 336 | |
| }, | |
| { | |
| "Batch Mean": 0.17408472299575806, | |
| "accuracy": 0.8046875, | |
| "epoch": 0.84, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.8425, | |
| "grad_norm": 8.96561336517334, | |
| "learning_rate": 8.289473684210527e-07, | |
| "loss": 0.4372, | |
| "step": 337 | |
| }, | |
| { | |
| "Batch Mean": -0.00230252742767334, | |
| "accuracy": 0.8203125, | |
| "epoch": 0.8425, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.845, | |
| "grad_norm": 8.864690780639648, | |
| "learning_rate": 8.157894736842106e-07, | |
| "loss": 0.405, | |
| "step": 338 | |
| }, | |
| { | |
| "Batch Mean": 0.4282846450805664, | |
| "accuracy": 0.8046875, | |
| "epoch": 0.845, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.8475, | |
| "grad_norm": 8.917513847351074, | |
| "learning_rate": 8.026315789473685e-07, | |
| "loss": 0.3769, | |
| "step": 339 | |
| }, | |
| { | |
| "Batch Mean": 0.3292938470840454, | |
| "accuracy": 0.7734375, | |
| "epoch": 0.8475, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "grad_norm": 7.596722602844238, | |
| "learning_rate": 7.894736842105263e-07, | |
| "loss": 0.3971, | |
| "step": 340 | |
| }, | |
| { | |
| "Batch Mean": 0.17405915260314941, | |
| "accuracy": 0.8125, | |
| "epoch": 0.85, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.8525, | |
| "grad_norm": 8.279147148132324, | |
| "learning_rate": 7.763157894736843e-07, | |
| "loss": 0.3861, | |
| "step": 341 | |
| }, | |
| { | |
| "Batch Mean": -0.1514298915863037, | |
| "accuracy": 0.796875, | |
| "epoch": 0.8525, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.855, | |
| "grad_norm": 8.507568359375, | |
| "learning_rate": 7.631578947368422e-07, | |
| "loss": 0.3943, | |
| "step": 342 | |
| }, | |
| { | |
| "Batch Mean": 8.153915405273438e-05, | |
| "accuracy": 0.8203125, | |
| "epoch": 0.855, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.8575, | |
| "grad_norm": 8.138630867004395, | |
| "learning_rate": 7.5e-07, | |
| "loss": 0.3695, | |
| "step": 343 | |
| }, | |
| { | |
| "Batch Mean": -0.15085434913635254, | |
| "accuracy": 0.8515625, | |
| "epoch": 0.8575, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 7.26024055480957, | |
| "learning_rate": 7.368421052631579e-07, | |
| "loss": 0.3451, | |
| "step": 344 | |
| }, | |
| { | |
| "Batch Mean": -0.21977519989013672, | |
| "accuracy": 0.796875, | |
| "epoch": 0.86, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.8625, | |
| "grad_norm": 7.881062030792236, | |
| "learning_rate": 7.236842105263158e-07, | |
| "loss": 0.423, | |
| "step": 345 | |
| }, | |
| { | |
| "Batch Mean": -0.19157737493515015, | |
| "accuracy": 0.8125, | |
| "epoch": 0.8625, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.865, | |
| "grad_norm": 8.916438102722168, | |
| "learning_rate": 7.105263157894737e-07, | |
| "loss": 0.411, | |
| "step": 346 | |
| }, | |
| { | |
| "Batch Mean": -0.14023709297180176, | |
| "accuracy": 0.7578125, | |
| "epoch": 0.865, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.8675, | |
| "grad_norm": 8.475634574890137, | |
| "learning_rate": 6.973684210526316e-07, | |
| "loss": 0.4299, | |
| "step": 347 | |
| }, | |
| { | |
| "Batch Mean": -0.11687850952148438, | |
| "accuracy": 0.796875, | |
| "epoch": 0.8675, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "grad_norm": 8.698983192443848, | |
| "learning_rate": 6.842105263157896e-07, | |
| "loss": 0.3624, | |
| "step": 348 | |
| }, | |
| { | |
| "Batch Mean": -0.029759228229522705, | |
| "accuracy": 0.8515625, | |
| "epoch": 0.87, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.8725, | |
| "grad_norm": 7.845675945281982, | |
| "learning_rate": 6.710526315789475e-07, | |
| "loss": 0.3724, | |
| "step": 349 | |
| }, | |
| { | |
| "Batch Mean": 0.03312182426452637, | |
| "accuracy": 0.84375, | |
| "epoch": 0.8725, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.875, | |
| "grad_norm": 7.433289527893066, | |
| "learning_rate": 6.578947368421053e-07, | |
| "loss": 0.3195, | |
| "step": 350 | |
| }, | |
| { | |
| "Batch Mean": -0.04676985740661621, | |
| "accuracy": 0.7890625, | |
| "epoch": 0.875, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.8775, | |
| "grad_norm": 9.495170593261719, | |
| "learning_rate": 6.447368421052633e-07, | |
| "loss": 0.4451, | |
| "step": 351 | |
| }, | |
| { | |
| "Batch Mean": 0.0372767448425293, | |
| "accuracy": 0.8203125, | |
| "epoch": 0.8775, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 9.247861862182617, | |
| "learning_rate": 6.315789473684211e-07, | |
| "loss": 0.393, | |
| "step": 352 | |
| }, | |
| { | |
| "Batch Mean": 0.2761037349700928, | |
| "accuracy": 0.78125, | |
| "epoch": 0.88, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.8825, | |
| "grad_norm": 9.92392635345459, | |
| "learning_rate": 6.18421052631579e-07, | |
| "loss": 0.4983, | |
| "step": 353 | |
| }, | |
| { | |
| "Batch Mean": 0.4323611259460449, | |
| "accuracy": 0.828125, | |
| "epoch": 0.8825, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.885, | |
| "grad_norm": 9.071545600891113, | |
| "learning_rate": 6.052631578947369e-07, | |
| "loss": 0.4193, | |
| "step": 354 | |
| }, | |
| { | |
| "Batch Mean": 0.4930781126022339, | |
| "accuracy": 0.75, | |
| "epoch": 0.885, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.8875, | |
| "grad_norm": 10.285765647888184, | |
| "learning_rate": 5.921052631578947e-07, | |
| "loss": 0.4975, | |
| "step": 355 | |
| }, | |
| { | |
| "Batch Mean": 0.08574128150939941, | |
| "accuracy": 0.8359375, | |
| "epoch": 0.8875, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "grad_norm": 7.1479878425598145, | |
| "learning_rate": 5.789473684210526e-07, | |
| "loss": 0.3539, | |
| "step": 356 | |
| }, | |
| { | |
| "Batch Mean": 0.133941650390625, | |
| "accuracy": 0.8203125, | |
| "epoch": 0.89, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.8925, | |
| "grad_norm": 8.113814353942871, | |
| "learning_rate": 5.657894736842106e-07, | |
| "loss": 0.3968, | |
| "step": 357 | |
| }, | |
| { | |
| "Batch Mean": 0.017683029174804688, | |
| "accuracy": 0.765625, | |
| "epoch": 0.8925, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.895, | |
| "grad_norm": 10.017806053161621, | |
| "learning_rate": 5.526315789473684e-07, | |
| "loss": 0.5036, | |
| "step": 358 | |
| }, | |
| { | |
| "Batch Mean": -0.14901137351989746, | |
| "accuracy": 0.8203125, | |
| "epoch": 0.895, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.8975, | |
| "grad_norm": 7.808798313140869, | |
| "learning_rate": 5.394736842105264e-07, | |
| "loss": 0.3906, | |
| "step": 359 | |
| }, | |
| { | |
| "Batch Mean": -0.0763850212097168, | |
| "accuracy": 0.828125, | |
| "epoch": 0.8975, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 7.832871437072754, | |
| "learning_rate": 5.263157894736843e-07, | |
| "loss": 0.3759, | |
| "step": 360 | |
| }, | |
| { | |
| "Batch Mean": -0.09305143356323242, | |
| "accuracy": 0.828125, | |
| "epoch": 0.9, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.9025, | |
| "grad_norm": 8.163674354553223, | |
| "learning_rate": 5.131578947368422e-07, | |
| "loss": 0.3764, | |
| "step": 361 | |
| }, | |
| { | |
| "Batch Mean": -0.0019698143005371094, | |
| "accuracy": 0.8125, | |
| "epoch": 0.9025, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.905, | |
| "grad_norm": 8.305913925170898, | |
| "learning_rate": 5.000000000000001e-07, | |
| "loss": 0.4264, | |
| "step": 362 | |
| }, | |
| { | |
| "Batch Mean": -0.033800363540649414, | |
| "accuracy": 0.8515625, | |
| "epoch": 0.905, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.9075, | |
| "grad_norm": 8.133559226989746, | |
| "learning_rate": 4.868421052631579e-07, | |
| "loss": 0.3646, | |
| "step": 363 | |
| }, | |
| { | |
| "Batch Mean": -0.45097827911376953, | |
| "accuracy": 0.7578125, | |
| "epoch": 0.9075, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "grad_norm": 10.242264747619629, | |
| "learning_rate": 4.7368421052631585e-07, | |
| "loss": 0.4668, | |
| "step": 364 | |
| }, | |
| { | |
| "Batch Mean": -0.038527727127075195, | |
| "accuracy": 0.8671875, | |
| "epoch": 0.91, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.9125, | |
| "grad_norm": 7.343667984008789, | |
| "learning_rate": 4.605263157894737e-07, | |
| "loss": 0.3583, | |
| "step": 365 | |
| }, | |
| { | |
| "Batch Mean": -0.24659931659698486, | |
| "accuracy": 0.78125, | |
| "epoch": 0.9125, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.915, | |
| "grad_norm": 8.302116394042969, | |
| "learning_rate": 4.473684210526316e-07, | |
| "loss": 0.4744, | |
| "step": 366 | |
| }, | |
| { | |
| "Batch Mean": -0.21030521392822266, | |
| "accuracy": 0.8359375, | |
| "epoch": 0.915, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.9175, | |
| "grad_norm": 9.05025863647461, | |
| "learning_rate": 4.342105263157895e-07, | |
| "loss": 0.3881, | |
| "step": 367 | |
| }, | |
| { | |
| "Batch Mean": -0.07368844747543335, | |
| "accuracy": 0.7265625, | |
| "epoch": 0.9175, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 8.670877456665039, | |
| "learning_rate": 4.210526315789474e-07, | |
| "loss": 0.4291, | |
| "step": 368 | |
| }, | |
| { | |
| "Batch Mean": -0.1727890968322754, | |
| "accuracy": 0.828125, | |
| "epoch": 0.92, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.9225, | |
| "grad_norm": 9.470555305480957, | |
| "learning_rate": 4.078947368421053e-07, | |
| "loss": 0.395, | |
| "step": 369 | |
| }, | |
| { | |
| "Batch Mean": -0.3188471794128418, | |
| "accuracy": 0.8125, | |
| "epoch": 0.9225, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.925, | |
| "grad_norm": 9.629358291625977, | |
| "learning_rate": 3.9473684210526315e-07, | |
| "loss": 0.3942, | |
| "step": 370 | |
| }, | |
| { | |
| "Batch Mean": 0.2340230941772461, | |
| "accuracy": 0.875, | |
| "epoch": 0.925, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.9275, | |
| "grad_norm": 7.180118560791016, | |
| "learning_rate": 3.815789473684211e-07, | |
| "loss": 0.3216, | |
| "step": 371 | |
| }, | |
| { | |
| "Batch Mean": 0.29411780834198, | |
| "accuracy": 0.796875, | |
| "epoch": 0.9275, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "grad_norm": 9.424283981323242, | |
| "learning_rate": 3.6842105263157896e-07, | |
| "loss": 0.3828, | |
| "step": 372 | |
| }, | |
| { | |
| "Batch Mean": 0.3843269348144531, | |
| "accuracy": 0.859375, | |
| "epoch": 0.93, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.9325, | |
| "grad_norm": 8.02033519744873, | |
| "learning_rate": 3.5526315789473687e-07, | |
| "loss": 0.3439, | |
| "step": 373 | |
| }, | |
| { | |
| "Batch Mean": 0.15648174285888672, | |
| "accuracy": 0.8125, | |
| "epoch": 0.9325, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.935, | |
| "grad_norm": 8.694818496704102, | |
| "learning_rate": 3.421052631578948e-07, | |
| "loss": 0.4216, | |
| "step": 374 | |
| }, | |
| { | |
| "Batch Mean": 0.23671111464500427, | |
| "accuracy": 0.84375, | |
| "epoch": 0.935, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.9375, | |
| "grad_norm": 9.467185020446777, | |
| "learning_rate": 3.2894736842105264e-07, | |
| "loss": 0.3956, | |
| "step": 375 | |
| }, | |
| { | |
| "Batch Mean": 0.33160504698753357, | |
| "accuracy": 0.84375, | |
| "epoch": 0.9375, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "grad_norm": 7.545875549316406, | |
| "learning_rate": 3.1578947368421055e-07, | |
| "loss": 0.3067, | |
| "step": 376 | |
| }, | |
| { | |
| "Batch Mean": 0.20784002542495728, | |
| "accuracy": 0.8203125, | |
| "epoch": 0.94, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.9425, | |
| "grad_norm": 7.867528915405273, | |
| "learning_rate": 3.0263157894736846e-07, | |
| "loss": 0.3758, | |
| "step": 377 | |
| }, | |
| { | |
| "Batch Mean": -0.07588949799537659, | |
| "accuracy": 0.828125, | |
| "epoch": 0.9425, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.945, | |
| "grad_norm": 8.548137664794922, | |
| "learning_rate": 2.894736842105263e-07, | |
| "loss": 0.3825, | |
| "step": 378 | |
| }, | |
| { | |
| "Batch Mean": 0.14056706428527832, | |
| "accuracy": 0.8046875, | |
| "epoch": 0.945, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.9475, | |
| "grad_norm": 7.883662223815918, | |
| "learning_rate": 2.763157894736842e-07, | |
| "loss": 0.4189, | |
| "step": 379 | |
| }, | |
| { | |
| "Batch Mean": -0.04343461990356445, | |
| "accuracy": 0.8203125, | |
| "epoch": 0.9475, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "grad_norm": 8.69774341583252, | |
| "learning_rate": 2.6315789473684213e-07, | |
| "loss": 0.3612, | |
| "step": 380 | |
| }, | |
| { | |
| "Batch Mean": 0.01430213451385498, | |
| "accuracy": 0.8515625, | |
| "epoch": 0.95, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.9525, | |
| "grad_norm": 7.480797290802002, | |
| "learning_rate": 2.5000000000000004e-07, | |
| "loss": 0.3102, | |
| "step": 381 | |
| }, | |
| { | |
| "Batch Mean": 0.22571241855621338, | |
| "accuracy": 0.8359375, | |
| "epoch": 0.9525, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.955, | |
| "grad_norm": 9.15902042388916, | |
| "learning_rate": 2.3684210526315792e-07, | |
| "loss": 0.4376, | |
| "step": 382 | |
| }, | |
| { | |
| "Batch Mean": -0.053093671798706055, | |
| "accuracy": 0.8515625, | |
| "epoch": 0.955, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.9575, | |
| "grad_norm": 7.246583938598633, | |
| "learning_rate": 2.236842105263158e-07, | |
| "loss": 0.3501, | |
| "step": 383 | |
| }, | |
| { | |
| "Batch Mean": -0.08722090721130371, | |
| "accuracy": 0.78125, | |
| "epoch": 0.9575, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 9.263538360595703, | |
| "learning_rate": 2.105263157894737e-07, | |
| "loss": 0.4351, | |
| "step": 384 | |
| }, | |
| { | |
| "Batch Mean": -0.051065683364868164, | |
| "accuracy": 0.8515625, | |
| "epoch": 0.96, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.9625, | |
| "grad_norm": 7.381646633148193, | |
| "learning_rate": 1.9736842105263157e-07, | |
| "loss": 0.3383, | |
| "step": 385 | |
| }, | |
| { | |
| "Batch Mean": 0.02757355570793152, | |
| "accuracy": 0.828125, | |
| "epoch": 0.9625, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.965, | |
| "grad_norm": 9.14664363861084, | |
| "learning_rate": 1.8421052631578948e-07, | |
| "loss": 0.4229, | |
| "step": 386 | |
| }, | |
| { | |
| "Batch Mean": -0.14691162109375, | |
| "accuracy": 0.8046875, | |
| "epoch": 0.965, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.9675, | |
| "grad_norm": 8.465057373046875, | |
| "learning_rate": 1.710526315789474e-07, | |
| "loss": 0.3797, | |
| "step": 387 | |
| }, | |
| { | |
| "Batch Mean": -0.29552268981933594, | |
| "accuracy": 0.7890625, | |
| "epoch": 0.9675, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "grad_norm": 10.373518943786621, | |
| "learning_rate": 1.5789473684210527e-07, | |
| "loss": 0.4511, | |
| "step": 388 | |
| }, | |
| { | |
| "Batch Mean": -0.03087782859802246, | |
| "accuracy": 0.7890625, | |
| "epoch": 0.97, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.9725, | |
| "grad_norm": 9.178572654724121, | |
| "learning_rate": 1.4473684210526316e-07, | |
| "loss": 0.4085, | |
| "step": 389 | |
| }, | |
| { | |
| "Batch Mean": -0.27834033966064453, | |
| "accuracy": 0.765625, | |
| "epoch": 0.9725, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.975, | |
| "grad_norm": 9.096630096435547, | |
| "learning_rate": 1.3157894736842107e-07, | |
| "loss": 0.4357, | |
| "step": 390 | |
| }, | |
| { | |
| "Batch Mean": 0.07524168491363525, | |
| "accuracy": 0.7890625, | |
| "epoch": 0.975, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.9775, | |
| "grad_norm": 10.293438911437988, | |
| "learning_rate": 1.1842105263157896e-07, | |
| "loss": 0.4003, | |
| "step": 391 | |
| }, | |
| { | |
| "Batch Mean": -0.046504974365234375, | |
| "accuracy": 0.84375, | |
| "epoch": 0.9775, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 8.16102409362793, | |
| "learning_rate": 1.0526315789473685e-07, | |
| "loss": 0.3528, | |
| "step": 392 | |
| }, | |
| { | |
| "Batch Mean": 0.11986196041107178, | |
| "accuracy": 0.78125, | |
| "epoch": 0.98, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.9825, | |
| "grad_norm": 9.697178840637207, | |
| "learning_rate": 9.210526315789474e-08, | |
| "loss": 0.4403, | |
| "step": 393 | |
| }, | |
| { | |
| "Batch Mean": -0.2319614291191101, | |
| "accuracy": 0.8203125, | |
| "epoch": 0.9825, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.985, | |
| "grad_norm": 9.398309707641602, | |
| "learning_rate": 7.894736842105264e-08, | |
| "loss": 0.439, | |
| "step": 394 | |
| }, | |
| { | |
| "Batch Mean": 0.1237255334854126, | |
| "accuracy": 0.8125, | |
| "epoch": 0.985, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.9875, | |
| "grad_norm": 10.374015808105469, | |
| "learning_rate": 6.578947368421053e-08, | |
| "loss": 0.4553, | |
| "step": 395 | |
| }, | |
| { | |
| "Batch Mean": 0.1040811538696289, | |
| "accuracy": 0.8359375, | |
| "epoch": 0.9875, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "grad_norm": 8.807071685791016, | |
| "learning_rate": 5.263157894736842e-08, | |
| "loss": 0.3497, | |
| "step": 396 | |
| }, | |
| { | |
| "Batch Mean": -0.12660646438598633, | |
| "accuracy": 0.8203125, | |
| "epoch": 0.99, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.9925, | |
| "grad_norm": 9.151079177856445, | |
| "learning_rate": 3.947368421052632e-08, | |
| "loss": 0.4005, | |
| "step": 397 | |
| }, | |
| { | |
| "Batch Mean": 0.21477198600769043, | |
| "accuracy": 0.7578125, | |
| "epoch": 0.9925, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.995, | |
| "grad_norm": 9.226529121398926, | |
| "learning_rate": 2.631578947368421e-08, | |
| "loss": 0.4488, | |
| "step": 398 | |
| }, | |
| { | |
| "Batch Mean": -0.18561577796936035, | |
| "accuracy": 0.828125, | |
| "epoch": 0.995, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.9975, | |
| "grad_norm": 8.799454689025879, | |
| "learning_rate": 1.3157894736842106e-08, | |
| "loss": 0.4152, | |
| "step": 399 | |
| }, | |
| { | |
| "Batch Mean": 0.22522014379501343, | |
| "accuracy": 0.7734375, | |
| "epoch": 0.9975, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 10.105998992919922, | |
| "learning_rate": 0.0, | |
| "loss": 0.4411, | |
| "step": 400 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 400, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |