| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.2, | |
| "eval_steps": 500, | |
| "global_step": 80, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "Batch Mean": 2.561767578125, | |
| "accuracy": 0.375, | |
| "epoch": 0, | |
| "step": 0 | |
| }, | |
| { | |
| "Batch Mean": 2.625244140625, | |
| "accuracy": 0.5, | |
| "epoch": 0, | |
| "step": 0 | |
| }, | |
| { | |
| "Batch Mean": 2.579345703125, | |
| "accuracy": 0.6875, | |
| "epoch": 0, | |
| "step": 0 | |
| }, | |
| { | |
| "Batch Mean": 2.6171875, | |
| "accuracy": 0.34375, | |
| "epoch": 0, | |
| "step": 0 | |
| }, | |
| { | |
| "epoch": 0.0025, | |
| "grad_norm": 6.891047954559326, | |
| "learning_rate": 1.5000000000000002e-07, | |
| "loss": 0.6968, | |
| "step": 1 | |
| }, | |
| { | |
| "Batch Mean": 2.593505859375, | |
| "accuracy": 0.5, | |
| "epoch": 0.0025, | |
| "step": 1 | |
| }, | |
| { | |
| "Batch Mean": 2.5416259765625, | |
| "accuracy": 0.5, | |
| "epoch": 0.0025, | |
| "step": 1 | |
| }, | |
| { | |
| "Batch Mean": 2.53125, | |
| "accuracy": 0.5, | |
| "epoch": 0.0025, | |
| "step": 1 | |
| }, | |
| { | |
| "Batch Mean": 2.525146484375, | |
| "accuracy": 0.5625, | |
| "epoch": 0.0025, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.005, | |
| "grad_norm": 7.476072311401367, | |
| "learning_rate": 3.0000000000000004e-07, | |
| "loss": 0.6855, | |
| "step": 2 | |
| }, | |
| { | |
| "Batch Mean": 2.633056640625, | |
| "accuracy": 0.5, | |
| "epoch": 0.005, | |
| "step": 2 | |
| }, | |
| { | |
| "Batch Mean": 2.579345703125, | |
| "accuracy": 0.4375, | |
| "epoch": 0.005, | |
| "step": 2 | |
| }, | |
| { | |
| "Batch Mean": 2.6005859375, | |
| "accuracy": 0.4375, | |
| "epoch": 0.005, | |
| "step": 2 | |
| }, | |
| { | |
| "Batch Mean": 2.56005859375, | |
| "accuracy": 0.59375, | |
| "epoch": 0.005, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0075, | |
| "grad_norm": 9.779342651367188, | |
| "learning_rate": 4.5e-07, | |
| "loss": 0.6984, | |
| "step": 3 | |
| }, | |
| { | |
| "Batch Mean": 2.668701171875, | |
| "accuracy": 0.5625, | |
| "epoch": 0.0075, | |
| "step": 3 | |
| }, | |
| { | |
| "Batch Mean": 2.6484375, | |
| "accuracy": 0.375, | |
| "epoch": 0.0075, | |
| "step": 3 | |
| }, | |
| { | |
| "Batch Mean": 2.505126953125, | |
| "accuracy": 0.53125, | |
| "epoch": 0.0075, | |
| "step": 3 | |
| }, | |
| { | |
| "Batch Mean": 2.651611328125, | |
| "accuracy": 0.53125, | |
| "epoch": 0.0075, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 7.470857620239258, | |
| "learning_rate": 6.000000000000001e-07, | |
| "loss": 0.6943, | |
| "step": 4 | |
| }, | |
| { | |
| "Batch Mean": 2.631591796875, | |
| "accuracy": 0.53125, | |
| "epoch": 0.01, | |
| "step": 4 | |
| }, | |
| { | |
| "Batch Mean": 2.61376953125, | |
| "accuracy": 0.5625, | |
| "epoch": 0.01, | |
| "step": 4 | |
| }, | |
| { | |
| "Batch Mean": 2.5850830078125, | |
| "accuracy": 0.46875, | |
| "epoch": 0.01, | |
| "step": 4 | |
| }, | |
| { | |
| "Batch Mean": 2.572509765625, | |
| "accuracy": 0.5625, | |
| "epoch": 0.01, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0125, | |
| "grad_norm": 4.961440563201904, | |
| "learning_rate": 7.5e-07, | |
| "loss": 0.6986, | |
| "step": 5 | |
| }, | |
| { | |
| "Batch Mean": 2.603271484375, | |
| "accuracy": 0.5, | |
| "epoch": 0.0125, | |
| "step": 5 | |
| }, | |
| { | |
| "Batch Mean": 2.56103515625, | |
| "accuracy": 0.46875, | |
| "epoch": 0.0125, | |
| "step": 5 | |
| }, | |
| { | |
| "Batch Mean": 2.60791015625, | |
| "accuracy": 0.5, | |
| "epoch": 0.0125, | |
| "step": 5 | |
| }, | |
| { | |
| "Batch Mean": 2.577392578125, | |
| "accuracy": 0.5, | |
| "epoch": 0.0125, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.015, | |
| "grad_norm": 8.984463691711426, | |
| "learning_rate": 9e-07, | |
| "loss": 0.6933, | |
| "step": 6 | |
| }, | |
| { | |
| "Batch Mean": 2.61767578125, | |
| "accuracy": 0.46875, | |
| "epoch": 0.015, | |
| "step": 6 | |
| }, | |
| { | |
| "Batch Mean": 2.596435546875, | |
| "accuracy": 0.4375, | |
| "epoch": 0.015, | |
| "step": 6 | |
| }, | |
| { | |
| "Batch Mean": 2.617919921875, | |
| "accuracy": 0.5, | |
| "epoch": 0.015, | |
| "step": 6 | |
| }, | |
| { | |
| "Batch Mean": 2.5743408203125, | |
| "accuracy": 0.4375, | |
| "epoch": 0.015, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.0175, | |
| "grad_norm": 5.3602728843688965, | |
| "learning_rate": 1.05e-06, | |
| "loss": 0.7051, | |
| "step": 7 | |
| }, | |
| { | |
| "Batch Mean": 2.658447265625, | |
| "accuracy": 0.5625, | |
| "epoch": 0.0175, | |
| "step": 7 | |
| }, | |
| { | |
| "Batch Mean": 2.598388671875, | |
| "accuracy": 0.5625, | |
| "epoch": 0.0175, | |
| "step": 7 | |
| }, | |
| { | |
| "Batch Mean": 2.5712890625, | |
| "accuracy": 0.40625, | |
| "epoch": 0.0175, | |
| "step": 7 | |
| }, | |
| { | |
| "Batch Mean": 2.60302734375, | |
| "accuracy": 0.5, | |
| "epoch": 0.0175, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 6.917173385620117, | |
| "learning_rate": 1.2000000000000002e-06, | |
| "loss": 0.6994, | |
| "step": 8 | |
| }, | |
| { | |
| "Batch Mean": 2.6435546875, | |
| "accuracy": 0.46875, | |
| "epoch": 0.02, | |
| "step": 8 | |
| }, | |
| { | |
| "Batch Mean": 2.58740234375, | |
| "accuracy": 0.4375, | |
| "epoch": 0.02, | |
| "step": 8 | |
| }, | |
| { | |
| "Batch Mean": 2.589111328125, | |
| "accuracy": 0.46875, | |
| "epoch": 0.02, | |
| "step": 8 | |
| }, | |
| { | |
| "Batch Mean": 2.61767578125, | |
| "accuracy": 0.53125, | |
| "epoch": 0.02, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.0225, | |
| "grad_norm": 6.093692302703857, | |
| "learning_rate": 1.35e-06, | |
| "loss": 0.6999, | |
| "step": 9 | |
| }, | |
| { | |
| "Batch Mean": 2.69091796875, | |
| "accuracy": 0.59375, | |
| "epoch": 0.0225, | |
| "step": 9 | |
| }, | |
| { | |
| "Batch Mean": 2.63525390625, | |
| "accuracy": 0.5625, | |
| "epoch": 0.0225, | |
| "step": 9 | |
| }, | |
| { | |
| "Batch Mean": 2.607421875, | |
| "accuracy": 0.46875, | |
| "epoch": 0.0225, | |
| "step": 9 | |
| }, | |
| { | |
| "Batch Mean": 2.65185546875, | |
| "accuracy": 0.40625, | |
| "epoch": 0.0225, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.025, | |
| "grad_norm": 6.671854496002197, | |
| "learning_rate": 1.5e-06, | |
| "loss": 0.6841, | |
| "step": 10 | |
| }, | |
| { | |
| "Batch Mean": 2.681640625, | |
| "accuracy": 0.5625, | |
| "epoch": 0.025, | |
| "step": 10 | |
| }, | |
| { | |
| "Batch Mean": 2.698974609375, | |
| "accuracy": 0.53125, | |
| "epoch": 0.025, | |
| "step": 10 | |
| }, | |
| { | |
| "Batch Mean": 2.645751953125, | |
| "accuracy": 0.625, | |
| "epoch": 0.025, | |
| "step": 10 | |
| }, | |
| { | |
| "Batch Mean": 2.648681640625, | |
| "accuracy": 0.59375, | |
| "epoch": 0.025, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0275, | |
| "grad_norm": 7.0144758224487305, | |
| "learning_rate": 1.65e-06, | |
| "loss": 0.6924, | |
| "step": 11 | |
| }, | |
| { | |
| "Batch Mean": 2.687255859375, | |
| "accuracy": 0.5625, | |
| "epoch": 0.0275, | |
| "step": 11 | |
| }, | |
| { | |
| "Batch Mean": 2.7158203125, | |
| "accuracy": 0.625, | |
| "epoch": 0.0275, | |
| "step": 11 | |
| }, | |
| { | |
| "Batch Mean": 2.649658203125, | |
| "accuracy": 0.4375, | |
| "epoch": 0.0275, | |
| "step": 11 | |
| }, | |
| { | |
| "Batch Mean": 2.67041015625, | |
| "accuracy": 0.53125, | |
| "epoch": 0.0275, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 7.060955047607422, | |
| "learning_rate": 1.8e-06, | |
| "loss": 0.6724, | |
| "step": 12 | |
| }, | |
| { | |
| "Batch Mean": 2.6439208984375, | |
| "accuracy": 0.6875, | |
| "epoch": 0.03, | |
| "step": 12 | |
| }, | |
| { | |
| "Batch Mean": 2.66748046875, | |
| "accuracy": 0.59375, | |
| "epoch": 0.03, | |
| "step": 12 | |
| }, | |
| { | |
| "Batch Mean": 2.59814453125, | |
| "accuracy": 0.59375, | |
| "epoch": 0.03, | |
| "step": 12 | |
| }, | |
| { | |
| "Batch Mean": 2.6533203125, | |
| "accuracy": 0.46875, | |
| "epoch": 0.03, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.0325, | |
| "grad_norm": 7.240173816680908, | |
| "learning_rate": 1.95e-06, | |
| "loss": 0.6762, | |
| "step": 13 | |
| }, | |
| { | |
| "Batch Mean": 2.741943359375, | |
| "accuracy": 0.5625, | |
| "epoch": 0.0325, | |
| "step": 13 | |
| }, | |
| { | |
| "Batch Mean": 2.7222900390625, | |
| "accuracy": 0.8125, | |
| "epoch": 0.0325, | |
| "step": 13 | |
| }, | |
| { | |
| "Batch Mean": 2.854736328125, | |
| "accuracy": 0.59375, | |
| "epoch": 0.0325, | |
| "step": 13 | |
| }, | |
| { | |
| "Batch Mean": 2.762451171875, | |
| "accuracy": 0.59375, | |
| "epoch": 0.0325, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.035, | |
| "grad_norm": 6.017094612121582, | |
| "learning_rate": 2.1e-06, | |
| "loss": 0.6564, | |
| "step": 14 | |
| }, | |
| { | |
| "Batch Mean": 2.851318359375, | |
| "accuracy": 0.4375, | |
| "epoch": 0.035, | |
| "step": 14 | |
| }, | |
| { | |
| "Batch Mean": 2.780029296875, | |
| "accuracy": 0.75, | |
| "epoch": 0.035, | |
| "step": 14 | |
| }, | |
| { | |
| "Batch Mean": 2.72509765625, | |
| "accuracy": 0.5625, | |
| "epoch": 0.035, | |
| "step": 14 | |
| }, | |
| { | |
| "Batch Mean": 2.74609375, | |
| "accuracy": 0.5, | |
| "epoch": 0.035, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.0375, | |
| "grad_norm": 7.460608959197998, | |
| "learning_rate": 2.25e-06, | |
| "loss": 0.6804, | |
| "step": 15 | |
| }, | |
| { | |
| "Batch Mean": 2.7120361328125, | |
| "accuracy": 0.625, | |
| "epoch": 0.0375, | |
| "step": 15 | |
| }, | |
| { | |
| "Batch Mean": 2.6575927734375, | |
| "accuracy": 0.78125, | |
| "epoch": 0.0375, | |
| "step": 15 | |
| }, | |
| { | |
| "Batch Mean": 2.6646881103515625, | |
| "accuracy": 0.5625, | |
| "epoch": 0.0375, | |
| "step": 15 | |
| }, | |
| { | |
| "Batch Mean": 2.817626953125, | |
| "accuracy": 0.5625, | |
| "epoch": 0.0375, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 5.134560585021973, | |
| "learning_rate": 2.4000000000000003e-06, | |
| "loss": 0.6439, | |
| "step": 16 | |
| }, | |
| { | |
| "Batch Mean": 2.87158203125, | |
| "accuracy": 0.625, | |
| "epoch": 0.04, | |
| "step": 16 | |
| }, | |
| { | |
| "Batch Mean": 2.80712890625, | |
| "accuracy": 0.875, | |
| "epoch": 0.04, | |
| "step": 16 | |
| }, | |
| { | |
| "Batch Mean": 2.92333984375, | |
| "accuracy": 0.65625, | |
| "epoch": 0.04, | |
| "step": 16 | |
| }, | |
| { | |
| "Batch Mean": 2.859375, | |
| "accuracy": 0.6875, | |
| "epoch": 0.04, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.0425, | |
| "grad_norm": 9.069143295288086, | |
| "learning_rate": 2.55e-06, | |
| "loss": 0.6009, | |
| "step": 17 | |
| }, | |
| { | |
| "Batch Mean": 2.988525390625, | |
| "accuracy": 0.46875, | |
| "epoch": 0.0425, | |
| "step": 17 | |
| }, | |
| { | |
| "Batch Mean": 2.9150390625, | |
| "accuracy": 0.46875, | |
| "epoch": 0.0425, | |
| "step": 17 | |
| }, | |
| { | |
| "Batch Mean": 2.9075927734375, | |
| "accuracy": 0.65625, | |
| "epoch": 0.0425, | |
| "step": 17 | |
| }, | |
| { | |
| "Batch Mean": 2.949462890625, | |
| "accuracy": 0.5625, | |
| "epoch": 0.0425, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.045, | |
| "grad_norm": 7.171381950378418, | |
| "learning_rate": 2.7e-06, | |
| "loss": 0.6478, | |
| "step": 18 | |
| }, | |
| { | |
| "Batch Mean": 2.8892822265625, | |
| "accuracy": 0.75, | |
| "epoch": 0.045, | |
| "step": 18 | |
| }, | |
| { | |
| "Batch Mean": 3.0479736328125, | |
| "accuracy": 0.59375, | |
| "epoch": 0.045, | |
| "step": 18 | |
| }, | |
| { | |
| "Batch Mean": 2.951904296875, | |
| "accuracy": 0.78125, | |
| "epoch": 0.045, | |
| "step": 18 | |
| }, | |
| { | |
| "Batch Mean": 2.9471435546875, | |
| "accuracy": 0.8125, | |
| "epoch": 0.045, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.0475, | |
| "grad_norm": 9.2951078414917, | |
| "learning_rate": 2.85e-06, | |
| "loss": 0.5669, | |
| "step": 19 | |
| }, | |
| { | |
| "Batch Mean": 2.9896240234375, | |
| "accuracy": 0.5625, | |
| "epoch": 0.0475, | |
| "step": 19 | |
| }, | |
| { | |
| "Batch Mean": 3.28076171875, | |
| "accuracy": 0.59375, | |
| "epoch": 0.0475, | |
| "step": 19 | |
| }, | |
| { | |
| "Batch Mean": 3.0648193359375, | |
| "accuracy": 0.78125, | |
| "epoch": 0.0475, | |
| "step": 19 | |
| }, | |
| { | |
| "Batch Mean": 3.02362060546875, | |
| "accuracy": 0.59375, | |
| "epoch": 0.0475, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 6.917195796966553, | |
| "learning_rate": 3e-06, | |
| "loss": 0.6227, | |
| "step": 20 | |
| }, | |
| { | |
| "Batch Mean": 3.187744140625, | |
| "accuracy": 0.65625, | |
| "epoch": 0.05, | |
| "step": 20 | |
| }, | |
| { | |
| "Batch Mean": 2.88177490234375, | |
| "accuracy": 0.6875, | |
| "epoch": 0.05, | |
| "step": 20 | |
| }, | |
| { | |
| "Batch Mean": 3.10791015625, | |
| "accuracy": 0.8125, | |
| "epoch": 0.05, | |
| "step": 20 | |
| }, | |
| { | |
| "Batch Mean": 2.94873046875, | |
| "accuracy": 0.625, | |
| "epoch": 0.05, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0525, | |
| "grad_norm": 8.974556922912598, | |
| "learning_rate": 2.992105263157895e-06, | |
| "loss": 0.5974, | |
| "step": 21 | |
| }, | |
| { | |
| "Batch Mean": 3.12005615234375, | |
| "accuracy": 0.5625, | |
| "epoch": 0.0525, | |
| "step": 21 | |
| }, | |
| { | |
| "Batch Mean": 3.238006591796875, | |
| "accuracy": 0.78125, | |
| "epoch": 0.0525, | |
| "step": 21 | |
| }, | |
| { | |
| "Batch Mean": 3.1559066772460938, | |
| "accuracy": 0.625, | |
| "epoch": 0.0525, | |
| "step": 21 | |
| }, | |
| { | |
| "Batch Mean": 2.941864013671875, | |
| "accuracy": 0.8125, | |
| "epoch": 0.0525, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.055, | |
| "grad_norm": 6.266777038574219, | |
| "learning_rate": 2.9842105263157896e-06, | |
| "loss": 0.5654, | |
| "step": 22 | |
| }, | |
| { | |
| "Batch Mean": 3.0380859375, | |
| "accuracy": 0.625, | |
| "epoch": 0.055, | |
| "step": 22 | |
| }, | |
| { | |
| "Batch Mean": 3.34686279296875, | |
| "accuracy": 0.625, | |
| "epoch": 0.055, | |
| "step": 22 | |
| }, | |
| { | |
| "Batch Mean": 3.39178466796875, | |
| "accuracy": 0.75, | |
| "epoch": 0.055, | |
| "step": 22 | |
| }, | |
| { | |
| "Batch Mean": 3.215087890625, | |
| "accuracy": 0.78125, | |
| "epoch": 0.055, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.0575, | |
| "grad_norm": 12.593354225158691, | |
| "learning_rate": 2.9763157894736843e-06, | |
| "loss": 0.6144, | |
| "step": 23 | |
| }, | |
| { | |
| "Batch Mean": 3.6195030212402344, | |
| "accuracy": 0.8125, | |
| "epoch": 0.0575, | |
| "step": 23 | |
| }, | |
| { | |
| "Batch Mean": 3.9134521484375, | |
| "accuracy": 0.4375, | |
| "epoch": 0.0575, | |
| "step": 23 | |
| }, | |
| { | |
| "Batch Mean": 3.4212799072265625, | |
| "accuracy": 0.6875, | |
| "epoch": 0.0575, | |
| "step": 23 | |
| }, | |
| { | |
| "Batch Mean": 3.5451812744140625, | |
| "accuracy": 0.6875, | |
| "epoch": 0.0575, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 11.987595558166504, | |
| "learning_rate": 2.968421052631579e-06, | |
| "loss": 0.643, | |
| "step": 24 | |
| }, | |
| { | |
| "Batch Mean": 4.074462890625, | |
| "accuracy": 0.71875, | |
| "epoch": 0.06, | |
| "step": 24 | |
| }, | |
| { | |
| "Batch Mean": 3.9752197265625, | |
| "accuracy": 0.75, | |
| "epoch": 0.06, | |
| "step": 24 | |
| }, | |
| { | |
| "Batch Mean": 3.98480224609375, | |
| "accuracy": 0.6875, | |
| "epoch": 0.06, | |
| "step": 24 | |
| }, | |
| { | |
| "Batch Mean": 4.22607421875, | |
| "accuracy": 0.8125, | |
| "epoch": 0.06, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.0625, | |
| "grad_norm": 9.740213394165039, | |
| "learning_rate": 2.960526315789474e-06, | |
| "loss": 0.5295, | |
| "step": 25 | |
| }, | |
| { | |
| "Batch Mean": 4.26507568359375, | |
| "accuracy": 0.6875, | |
| "epoch": 0.0625, | |
| "step": 25 | |
| }, | |
| { | |
| "Batch Mean": 4.357688903808594, | |
| "accuracy": 0.8125, | |
| "epoch": 0.0625, | |
| "step": 25 | |
| }, | |
| { | |
| "Batch Mean": 4.076980113983154, | |
| "accuracy": 0.59375, | |
| "epoch": 0.0625, | |
| "step": 25 | |
| }, | |
| { | |
| "Batch Mean": 4.0531005859375, | |
| "accuracy": 0.65625, | |
| "epoch": 0.0625, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.065, | |
| "grad_norm": 14.751266479492188, | |
| "learning_rate": 2.9526315789473685e-06, | |
| "loss": 0.6701, | |
| "step": 26 | |
| }, | |
| { | |
| "Batch Mean": 4.2257080078125, | |
| "accuracy": 0.65625, | |
| "epoch": 0.065, | |
| "step": 26 | |
| }, | |
| { | |
| "Batch Mean": 3.6409378051757812, | |
| "accuracy": 0.625, | |
| "epoch": 0.065, | |
| "step": 26 | |
| }, | |
| { | |
| "Batch Mean": 4.32666015625, | |
| "accuracy": 0.75, | |
| "epoch": 0.065, | |
| "step": 26 | |
| }, | |
| { | |
| "Batch Mean": 4.223388671875, | |
| "accuracy": 0.75, | |
| "epoch": 0.065, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.0675, | |
| "grad_norm": 12.241596221923828, | |
| "learning_rate": 2.9447368421052633e-06, | |
| "loss": 0.6132, | |
| "step": 27 | |
| }, | |
| { | |
| "Batch Mean": 4.0788421630859375, | |
| "accuracy": 0.75, | |
| "epoch": 0.0675, | |
| "step": 27 | |
| }, | |
| { | |
| "Batch Mean": 4.0919647216796875, | |
| "accuracy": 0.78125, | |
| "epoch": 0.0675, | |
| "step": 27 | |
| }, | |
| { | |
| "Batch Mean": 3.9208831787109375, | |
| "accuracy": 0.75, | |
| "epoch": 0.0675, | |
| "step": 27 | |
| }, | |
| { | |
| "Batch Mean": 3.6530914306640625, | |
| "accuracy": 0.78125, | |
| "epoch": 0.0675, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 9.23661994934082, | |
| "learning_rate": 2.936842105263158e-06, | |
| "loss": 0.491, | |
| "step": 28 | |
| }, | |
| { | |
| "Batch Mean": 3.82781982421875, | |
| "accuracy": 0.6875, | |
| "epoch": 0.07, | |
| "step": 28 | |
| }, | |
| { | |
| "Batch Mean": 3.8771190643310547, | |
| "accuracy": 0.75, | |
| "epoch": 0.07, | |
| "step": 28 | |
| }, | |
| { | |
| "Batch Mean": 4.152229309082031, | |
| "accuracy": 0.75, | |
| "epoch": 0.07, | |
| "step": 28 | |
| }, | |
| { | |
| "Batch Mean": 3.3394012451171875, | |
| "accuracy": 0.6875, | |
| "epoch": 0.07, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.0725, | |
| "grad_norm": 8.272553443908691, | |
| "learning_rate": 2.9289473684210528e-06, | |
| "loss": 0.4942, | |
| "step": 29 | |
| }, | |
| { | |
| "Batch Mean": 3.72723388671875, | |
| "accuracy": 0.625, | |
| "epoch": 0.0725, | |
| "step": 29 | |
| }, | |
| { | |
| "Batch Mean": 3.4618988037109375, | |
| "accuracy": 0.78125, | |
| "epoch": 0.0725, | |
| "step": 29 | |
| }, | |
| { | |
| "Batch Mean": 4.0829925537109375, | |
| "accuracy": 0.8125, | |
| "epoch": 0.0725, | |
| "step": 29 | |
| }, | |
| { | |
| "Batch Mean": 3.5148162841796875, | |
| "accuracy": 0.6875, | |
| "epoch": 0.0725, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.075, | |
| "grad_norm": 8.980628967285156, | |
| "learning_rate": 2.9210526315789475e-06, | |
| "loss": 0.5918, | |
| "step": 30 | |
| }, | |
| { | |
| "Batch Mean": 3.8092041015625, | |
| "accuracy": 0.625, | |
| "epoch": 0.075, | |
| "step": 30 | |
| }, | |
| { | |
| "Batch Mean": 3.889404296875, | |
| "accuracy": 0.5625, | |
| "epoch": 0.075, | |
| "step": 30 | |
| }, | |
| { | |
| "Batch Mean": 3.994617462158203, | |
| "accuracy": 0.78125, | |
| "epoch": 0.075, | |
| "step": 30 | |
| }, | |
| { | |
| "Batch Mean": 3.82794189453125, | |
| "accuracy": 0.875, | |
| "epoch": 0.075, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.0775, | |
| "grad_norm": 8.450087547302246, | |
| "learning_rate": 2.9131578947368423e-06, | |
| "loss": 0.5721, | |
| "step": 31 | |
| }, | |
| { | |
| "Batch Mean": 3.980316162109375, | |
| "accuracy": 0.6875, | |
| "epoch": 0.0775, | |
| "step": 31 | |
| }, | |
| { | |
| "Batch Mean": 4.373779296875, | |
| "accuracy": 0.625, | |
| "epoch": 0.0775, | |
| "step": 31 | |
| }, | |
| { | |
| "Batch Mean": 4.037841796875, | |
| "accuracy": 0.75, | |
| "epoch": 0.0775, | |
| "step": 31 | |
| }, | |
| { | |
| "Batch Mean": 3.831298828125, | |
| "accuracy": 0.75, | |
| "epoch": 0.0775, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 8.705353736877441, | |
| "learning_rate": 2.905263157894737e-06, | |
| "loss": 0.5436, | |
| "step": 32 | |
| }, | |
| { | |
| "Batch Mean": 3.7490234375, | |
| "accuracy": 0.8125, | |
| "epoch": 0.08, | |
| "step": 32 | |
| }, | |
| { | |
| "Batch Mean": 3.83935546875, | |
| "accuracy": 0.84375, | |
| "epoch": 0.08, | |
| "step": 32 | |
| }, | |
| { | |
| "Batch Mean": 3.965576171875, | |
| "accuracy": 0.71875, | |
| "epoch": 0.08, | |
| "step": 32 | |
| }, | |
| { | |
| "Batch Mean": 3.752685546875, | |
| "accuracy": 0.71875, | |
| "epoch": 0.08, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.0825, | |
| "grad_norm": 8.69457721710205, | |
| "learning_rate": 2.8973684210526318e-06, | |
| "loss": 0.4884, | |
| "step": 33 | |
| }, | |
| { | |
| "Batch Mean": 3.9962158203125, | |
| "accuracy": 0.84375, | |
| "epoch": 0.0825, | |
| "step": 33 | |
| }, | |
| { | |
| "Batch Mean": 3.93798828125, | |
| "accuracy": 0.90625, | |
| "epoch": 0.0825, | |
| "step": 33 | |
| }, | |
| { | |
| "Batch Mean": 3.80908203125, | |
| "accuracy": 0.90625, | |
| "epoch": 0.0825, | |
| "step": 33 | |
| }, | |
| { | |
| "Batch Mean": 3.912109375, | |
| "accuracy": 0.75, | |
| "epoch": 0.0825, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.085, | |
| "grad_norm": 7.060486316680908, | |
| "learning_rate": 2.8894736842105265e-06, | |
| "loss": 0.4117, | |
| "step": 34 | |
| }, | |
| { | |
| "Batch Mean": 3.6049652099609375, | |
| "accuracy": 0.78125, | |
| "epoch": 0.085, | |
| "step": 34 | |
| }, | |
| { | |
| "Batch Mean": 3.53125, | |
| "accuracy": 0.71875, | |
| "epoch": 0.085, | |
| "step": 34 | |
| }, | |
| { | |
| "Batch Mean": 3.86993408203125, | |
| "accuracy": 0.75, | |
| "epoch": 0.085, | |
| "step": 34 | |
| }, | |
| { | |
| "Batch Mean": 3.9173583984375, | |
| "accuracy": 0.59375, | |
| "epoch": 0.085, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.0875, | |
| "grad_norm": 7.442059516906738, | |
| "learning_rate": 2.8815789473684213e-06, | |
| "loss": 0.5188, | |
| "step": 35 | |
| }, | |
| { | |
| "Batch Mean": 3.7060546875, | |
| "accuracy": 0.71875, | |
| "epoch": 0.0875, | |
| "step": 35 | |
| }, | |
| { | |
| "Batch Mean": 3.6419677734375, | |
| "accuracy": 0.84375, | |
| "epoch": 0.0875, | |
| "step": 35 | |
| }, | |
| { | |
| "Batch Mean": 3.79437255859375, | |
| "accuracy": 0.71875, | |
| "epoch": 0.0875, | |
| "step": 35 | |
| }, | |
| { | |
| "Batch Mean": 3.9708251953125, | |
| "accuracy": 0.71875, | |
| "epoch": 0.0875, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 7.1602888107299805, | |
| "learning_rate": 2.873684210526316e-06, | |
| "loss": 0.4938, | |
| "step": 36 | |
| }, | |
| { | |
| "Batch Mean": 3.74139404296875, | |
| "accuracy": 0.625, | |
| "epoch": 0.09, | |
| "step": 36 | |
| }, | |
| { | |
| "Batch Mean": 4.314697265625, | |
| "accuracy": 0.625, | |
| "epoch": 0.09, | |
| "step": 36 | |
| }, | |
| { | |
| "Batch Mean": 3.3461456298828125, | |
| "accuracy": 0.75, | |
| "epoch": 0.09, | |
| "step": 36 | |
| }, | |
| { | |
| "Batch Mean": 3.1932830810546875, | |
| "accuracy": 0.78125, | |
| "epoch": 0.09, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.0925, | |
| "grad_norm": 7.272737503051758, | |
| "learning_rate": 2.8657894736842103e-06, | |
| "loss": 0.5001, | |
| "step": 37 | |
| }, | |
| { | |
| "Batch Mean": 4.018646240234375, | |
| "accuracy": 0.75, | |
| "epoch": 0.0925, | |
| "step": 37 | |
| }, | |
| { | |
| "Batch Mean": 3.30047607421875, | |
| "accuracy": 0.71875, | |
| "epoch": 0.0925, | |
| "step": 37 | |
| }, | |
| { | |
| "Batch Mean": 3.917877197265625, | |
| "accuracy": 0.78125, | |
| "epoch": 0.0925, | |
| "step": 37 | |
| }, | |
| { | |
| "Batch Mean": 3.60589599609375, | |
| "accuracy": 0.78125, | |
| "epoch": 0.0925, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.095, | |
| "grad_norm": 7.850794792175293, | |
| "learning_rate": 2.857894736842105e-06, | |
| "loss": 0.5267, | |
| "step": 38 | |
| }, | |
| { | |
| "Batch Mean": 4.016511917114258, | |
| "accuracy": 0.84375, | |
| "epoch": 0.095, | |
| "step": 38 | |
| }, | |
| { | |
| "Batch Mean": 3.8358612060546875, | |
| "accuracy": 0.71875, | |
| "epoch": 0.095, | |
| "step": 38 | |
| }, | |
| { | |
| "Batch Mean": 4.089599609375, | |
| "accuracy": 0.875, | |
| "epoch": 0.095, | |
| "step": 38 | |
| }, | |
| { | |
| "Batch Mean": 3.9793701171875, | |
| "accuracy": 0.65625, | |
| "epoch": 0.095, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.0975, | |
| "grad_norm": 7.21096134185791, | |
| "learning_rate": 2.85e-06, | |
| "loss": 0.4716, | |
| "step": 39 | |
| }, | |
| { | |
| "Batch Mean": 3.28546142578125, | |
| "accuracy": 0.78125, | |
| "epoch": 0.0975, | |
| "step": 39 | |
| }, | |
| { | |
| "Batch Mean": 3.844146728515625, | |
| "accuracy": 0.875, | |
| "epoch": 0.0975, | |
| "step": 39 | |
| }, | |
| { | |
| "Batch Mean": 4.03277587890625, | |
| "accuracy": 0.84375, | |
| "epoch": 0.0975, | |
| "step": 39 | |
| }, | |
| { | |
| "Batch Mean": 3.28216552734375, | |
| "accuracy": 0.78125, | |
| "epoch": 0.0975, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 11.831915855407715, | |
| "learning_rate": 2.8421052631578946e-06, | |
| "loss": 0.424, | |
| "step": 40 | |
| }, | |
| { | |
| "Batch Mean": 4.2200927734375, | |
| "accuracy": 0.6875, | |
| "epoch": 0.1, | |
| "step": 40 | |
| }, | |
| { | |
| "Batch Mean": 3.5522689819335938, | |
| "accuracy": 0.84375, | |
| "epoch": 0.1, | |
| "step": 40 | |
| }, | |
| { | |
| "Batch Mean": 3.92266845703125, | |
| "accuracy": 0.8125, | |
| "epoch": 0.1, | |
| "step": 40 | |
| }, | |
| { | |
| "Batch Mean": 3.9715576171875, | |
| "accuracy": 0.90625, | |
| "epoch": 0.1, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.1025, | |
| "grad_norm": 8.1405668258667, | |
| "learning_rate": 2.8342105263157897e-06, | |
| "loss": 0.424, | |
| "step": 41 | |
| }, | |
| { | |
| "Batch Mean": 4.1801605224609375, | |
| "accuracy": 0.75, | |
| "epoch": 0.1025, | |
| "step": 41 | |
| }, | |
| { | |
| "Batch Mean": 4.2984619140625, | |
| "accuracy": 0.75, | |
| "epoch": 0.1025, | |
| "step": 41 | |
| }, | |
| { | |
| "Batch Mean": 4.1153564453125, | |
| "accuracy": 0.75, | |
| "epoch": 0.1025, | |
| "step": 41 | |
| }, | |
| { | |
| "Batch Mean": 4.0693817138671875, | |
| "accuracy": 0.71875, | |
| "epoch": 0.1025, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.105, | |
| "grad_norm": 13.232189178466797, | |
| "learning_rate": 2.8263157894736845e-06, | |
| "loss": 0.5667, | |
| "step": 42 | |
| }, | |
| { | |
| "Batch Mean": 3.77996826171875, | |
| "accuracy": 0.71875, | |
| "epoch": 0.105, | |
| "step": 42 | |
| }, | |
| { | |
| "Batch Mean": 3.576416015625, | |
| "accuracy": 0.75, | |
| "epoch": 0.105, | |
| "step": 42 | |
| }, | |
| { | |
| "Batch Mean": 4.18341064453125, | |
| "accuracy": 0.8125, | |
| "epoch": 0.105, | |
| "step": 42 | |
| }, | |
| { | |
| "Batch Mean": 4.179298400878906, | |
| "accuracy": 0.84375, | |
| "epoch": 0.105, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.1075, | |
| "grad_norm": 10.862428665161133, | |
| "learning_rate": 2.8184210526315792e-06, | |
| "loss": 0.5047, | |
| "step": 43 | |
| }, | |
| { | |
| "Batch Mean": 3.411527633666992, | |
| "accuracy": 0.65625, | |
| "epoch": 0.1075, | |
| "step": 43 | |
| }, | |
| { | |
| "Batch Mean": 3.7935562133789062, | |
| "accuracy": 0.78125, | |
| "epoch": 0.1075, | |
| "step": 43 | |
| }, | |
| { | |
| "Batch Mean": 4.264190673828125, | |
| "accuracy": 0.625, | |
| "epoch": 0.1075, | |
| "step": 43 | |
| }, | |
| { | |
| "Batch Mean": 3.1276397705078125, | |
| "accuracy": 0.8125, | |
| "epoch": 0.1075, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "grad_norm": 10.28708267211914, | |
| "learning_rate": 2.810526315789474e-06, | |
| "loss": 0.4958, | |
| "step": 44 | |
| }, | |
| { | |
| "Batch Mean": 3.398651123046875, | |
| "accuracy": 0.8125, | |
| "epoch": 0.11, | |
| "step": 44 | |
| }, | |
| { | |
| "Batch Mean": 3.4655685424804688, | |
| "accuracy": 0.90625, | |
| "epoch": 0.11, | |
| "step": 44 | |
| }, | |
| { | |
| "Batch Mean": 3.491424560546875, | |
| "accuracy": 0.75, | |
| "epoch": 0.11, | |
| "step": 44 | |
| }, | |
| { | |
| "Batch Mean": 4.3106536865234375, | |
| "accuracy": 0.75, | |
| "epoch": 0.11, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.1125, | |
| "grad_norm": 9.006805419921875, | |
| "learning_rate": 2.8026315789473687e-06, | |
| "loss": 0.4885, | |
| "step": 45 | |
| }, | |
| { | |
| "Batch Mean": 3.865081787109375, | |
| "accuracy": 0.8125, | |
| "epoch": 0.1125, | |
| "step": 45 | |
| }, | |
| { | |
| "Batch Mean": 2.941986083984375, | |
| "accuracy": 0.75, | |
| "epoch": 0.1125, | |
| "step": 45 | |
| }, | |
| { | |
| "Batch Mean": 3.431640625, | |
| "accuracy": 0.90625, | |
| "epoch": 0.1125, | |
| "step": 45 | |
| }, | |
| { | |
| "Batch Mean": 3.38702392578125, | |
| "accuracy": 0.78125, | |
| "epoch": 0.1125, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.115, | |
| "grad_norm": 8.483081817626953, | |
| "learning_rate": 2.7947368421052635e-06, | |
| "loss": 0.4436, | |
| "step": 46 | |
| }, | |
| { | |
| "Batch Mean": 3.768218994140625, | |
| "accuracy": 0.71875, | |
| "epoch": 0.115, | |
| "step": 46 | |
| }, | |
| { | |
| "Batch Mean": 2.960357666015625, | |
| "accuracy": 0.78125, | |
| "epoch": 0.115, | |
| "step": 46 | |
| }, | |
| { | |
| "Batch Mean": 3.8204345703125, | |
| "accuracy": 0.90625, | |
| "epoch": 0.115, | |
| "step": 46 | |
| }, | |
| { | |
| "Batch Mean": 3.39542293548584, | |
| "accuracy": 0.78125, | |
| "epoch": 0.115, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.1175, | |
| "grad_norm": 13.869011878967285, | |
| "learning_rate": 2.7868421052631578e-06, | |
| "loss": 0.4939, | |
| "step": 47 | |
| }, | |
| { | |
| "Batch Mean": 3.5629959106445312, | |
| "accuracy": 0.65625, | |
| "epoch": 0.1175, | |
| "step": 47 | |
| }, | |
| { | |
| "Batch Mean": 3.121295928955078, | |
| "accuracy": 0.8125, | |
| "epoch": 0.1175, | |
| "step": 47 | |
| }, | |
| { | |
| "Batch Mean": 3.243316650390625, | |
| "accuracy": 0.8125, | |
| "epoch": 0.1175, | |
| "step": 47 | |
| }, | |
| { | |
| "Batch Mean": 3.4634838104248047, | |
| "accuracy": 0.90625, | |
| "epoch": 0.1175, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 10.390000343322754, | |
| "learning_rate": 2.7789473684210525e-06, | |
| "loss": 0.4101, | |
| "step": 48 | |
| }, | |
| { | |
| "Batch Mean": 2.8801422119140625, | |
| "accuracy": 0.875, | |
| "epoch": 0.12, | |
| "step": 48 | |
| }, | |
| { | |
| "Batch Mean": 2.7355971336364746, | |
| "accuracy": 0.59375, | |
| "epoch": 0.12, | |
| "step": 48 | |
| }, | |
| { | |
| "Batch Mean": 3.0902557373046875, | |
| "accuracy": 0.8125, | |
| "epoch": 0.12, | |
| "step": 48 | |
| }, | |
| { | |
| "Batch Mean": 2.7920761108398438, | |
| "accuracy": 0.8125, | |
| "epoch": 0.12, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.1225, | |
| "grad_norm": 7.682050704956055, | |
| "learning_rate": 2.7710526315789473e-06, | |
| "loss": 0.4488, | |
| "step": 49 | |
| }, | |
| { | |
| "Batch Mean": 2.1150331497192383, | |
| "accuracy": 0.78125, | |
| "epoch": 0.1225, | |
| "step": 49 | |
| }, | |
| { | |
| "Batch Mean": 2.1563758850097656, | |
| "accuracy": 0.625, | |
| "epoch": 0.1225, | |
| "step": 49 | |
| }, | |
| { | |
| "Batch Mean": 2.22674560546875, | |
| "accuracy": 0.78125, | |
| "epoch": 0.1225, | |
| "step": 49 | |
| }, | |
| { | |
| "Batch Mean": 2.350677490234375, | |
| "accuracy": 0.625, | |
| "epoch": 0.1225, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.125, | |
| "grad_norm": 10.955310821533203, | |
| "learning_rate": 2.763157894736842e-06, | |
| "loss": 0.543, | |
| "step": 50 | |
| }, | |
| { | |
| "Batch Mean": 1.7153472900390625, | |
| "accuracy": 0.6875, | |
| "epoch": 0.125, | |
| "step": 50 | |
| }, | |
| { | |
| "Batch Mean": 1.4781265258789062, | |
| "accuracy": 0.71875, | |
| "epoch": 0.125, | |
| "step": 50 | |
| }, | |
| { | |
| "Batch Mean": 1.9790096282958984, | |
| "accuracy": 0.8125, | |
| "epoch": 0.125, | |
| "step": 50 | |
| }, | |
| { | |
| "Batch Mean": 2.209127426147461, | |
| "accuracy": 0.875, | |
| "epoch": 0.125, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.1275, | |
| "grad_norm": 8.823392868041992, | |
| "learning_rate": 2.7552631578947368e-06, | |
| "loss": 0.4216, | |
| "step": 51 | |
| }, | |
| { | |
| "Batch Mean": 1.8213386535644531, | |
| "accuracy": 0.875, | |
| "epoch": 0.1275, | |
| "step": 51 | |
| }, | |
| { | |
| "Batch Mean": 1.600327491760254, | |
| "accuracy": 0.84375, | |
| "epoch": 0.1275, | |
| "step": 51 | |
| }, | |
| { | |
| "Batch Mean": 1.6077766418457031, | |
| "accuracy": 0.71875, | |
| "epoch": 0.1275, | |
| "step": 51 | |
| }, | |
| { | |
| "Batch Mean": 1.3565635681152344, | |
| "accuracy": 0.8125, | |
| "epoch": 0.1275, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 9.650039672851562, | |
| "learning_rate": 2.7473684210526315e-06, | |
| "loss": 0.4226, | |
| "step": 52 | |
| }, | |
| { | |
| "Batch Mean": 1.4758195877075195, | |
| "accuracy": 0.875, | |
| "epoch": 0.13, | |
| "step": 52 | |
| }, | |
| { | |
| "Batch Mean": 1.5553207397460938, | |
| "accuracy": 0.90625, | |
| "epoch": 0.13, | |
| "step": 52 | |
| }, | |
| { | |
| "Batch Mean": 1.637298583984375, | |
| "accuracy": 0.71875, | |
| "epoch": 0.13, | |
| "step": 52 | |
| }, | |
| { | |
| "Batch Mean": 1.4313430786132812, | |
| "accuracy": 0.8125, | |
| "epoch": 0.13, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.1325, | |
| "grad_norm": 8.510551452636719, | |
| "learning_rate": 2.7394736842105263e-06, | |
| "loss": 0.4413, | |
| "step": 53 | |
| }, | |
| { | |
| "Batch Mean": 1.7805156707763672, | |
| "accuracy": 0.8125, | |
| "epoch": 0.1325, | |
| "step": 53 | |
| }, | |
| { | |
| "Batch Mean": 1.9161019325256348, | |
| "accuracy": 0.6875, | |
| "epoch": 0.1325, | |
| "step": 53 | |
| }, | |
| { | |
| "Batch Mean": 1.6803773641586304, | |
| "accuracy": 0.78125, | |
| "epoch": 0.1325, | |
| "step": 53 | |
| }, | |
| { | |
| "Batch Mean": 2.052886962890625, | |
| "accuracy": 0.84375, | |
| "epoch": 0.1325, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.135, | |
| "grad_norm": 7.554328441619873, | |
| "learning_rate": 2.7315789473684214e-06, | |
| "loss": 0.4647, | |
| "step": 54 | |
| }, | |
| { | |
| "Batch Mean": 2.4049549102783203, | |
| "accuracy": 0.6875, | |
| "epoch": 0.135, | |
| "step": 54 | |
| }, | |
| { | |
| "Batch Mean": 1.8316669464111328, | |
| "accuracy": 0.71875, | |
| "epoch": 0.135, | |
| "step": 54 | |
| }, | |
| { | |
| "Batch Mean": 1.7167816162109375, | |
| "accuracy": 0.75, | |
| "epoch": 0.135, | |
| "step": 54 | |
| }, | |
| { | |
| "Batch Mean": 1.8629379272460938, | |
| "accuracy": 0.6875, | |
| "epoch": 0.135, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.1375, | |
| "grad_norm": 12.288453102111816, | |
| "learning_rate": 2.723684210526316e-06, | |
| "loss": 0.5379, | |
| "step": 55 | |
| }, | |
| { | |
| "Batch Mean": 2.3260297775268555, | |
| "accuracy": 0.6875, | |
| "epoch": 0.1375, | |
| "step": 55 | |
| }, | |
| { | |
| "Batch Mean": 2.1811676025390625, | |
| "accuracy": 0.75, | |
| "epoch": 0.1375, | |
| "step": 55 | |
| }, | |
| { | |
| "Batch Mean": 2.3114492893218994, | |
| "accuracy": 0.78125, | |
| "epoch": 0.1375, | |
| "step": 55 | |
| }, | |
| { | |
| "Batch Mean": 2.71484375, | |
| "accuracy": 0.75, | |
| "epoch": 0.1375, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 11.225523948669434, | |
| "learning_rate": 2.715789473684211e-06, | |
| "loss": 0.4975, | |
| "step": 56 | |
| }, | |
| { | |
| "Batch Mean": 2.4541587829589844, | |
| "accuracy": 0.8125, | |
| "epoch": 0.14, | |
| "step": 56 | |
| }, | |
| { | |
| "Batch Mean": 2.656031608581543, | |
| "accuracy": 0.71875, | |
| "epoch": 0.14, | |
| "step": 56 | |
| }, | |
| { | |
| "Batch Mean": 2.3141021728515625, | |
| "accuracy": 0.71875, | |
| "epoch": 0.14, | |
| "step": 56 | |
| }, | |
| { | |
| "Batch Mean": 2.468475341796875, | |
| "accuracy": 0.78125, | |
| "epoch": 0.14, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.1425, | |
| "grad_norm": 9.733325958251953, | |
| "learning_rate": 2.7078947368421052e-06, | |
| "loss": 0.5559, | |
| "step": 57 | |
| }, | |
| { | |
| "Batch Mean": 2.35528564453125, | |
| "accuracy": 0.8125, | |
| "epoch": 0.1425, | |
| "step": 57 | |
| }, | |
| { | |
| "Batch Mean": 2.867542266845703, | |
| "accuracy": 0.8125, | |
| "epoch": 0.1425, | |
| "step": 57 | |
| }, | |
| { | |
| "Batch Mean": 2.365894317626953, | |
| "accuracy": 0.6875, | |
| "epoch": 0.1425, | |
| "step": 57 | |
| }, | |
| { | |
| "Batch Mean": 2.246662139892578, | |
| "accuracy": 0.78125, | |
| "epoch": 0.1425, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.145, | |
| "grad_norm": 8.220906257629395, | |
| "learning_rate": 2.7e-06, | |
| "loss": 0.4247, | |
| "step": 58 | |
| }, | |
| { | |
| "Batch Mean": 2.3054046630859375, | |
| "accuracy": 0.78125, | |
| "epoch": 0.145, | |
| "step": 58 | |
| }, | |
| { | |
| "Batch Mean": 2.5247726440429688, | |
| "accuracy": 0.90625, | |
| "epoch": 0.145, | |
| "step": 58 | |
| }, | |
| { | |
| "Batch Mean": 2.8898544311523438, | |
| "accuracy": 0.90625, | |
| "epoch": 0.145, | |
| "step": 58 | |
| }, | |
| { | |
| "Batch Mean": 2.017698287963867, | |
| "accuracy": 0.8125, | |
| "epoch": 0.145, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.1475, | |
| "grad_norm": 7.708261013031006, | |
| "learning_rate": 2.6921052631578947e-06, | |
| "loss": 0.3713, | |
| "step": 59 | |
| }, | |
| { | |
| "Batch Mean": 2.355548858642578, | |
| "accuracy": 0.8125, | |
| "epoch": 0.1475, | |
| "step": 59 | |
| }, | |
| { | |
| "Batch Mean": 2.2453155517578125, | |
| "accuracy": 0.71875, | |
| "epoch": 0.1475, | |
| "step": 59 | |
| }, | |
| { | |
| "Batch Mean": 2.658827781677246, | |
| "accuracy": 0.84375, | |
| "epoch": 0.1475, | |
| "step": 59 | |
| }, | |
| { | |
| "Batch Mean": 2.4546985626220703, | |
| "accuracy": 1.0, | |
| "epoch": 0.1475, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 7.150002956390381, | |
| "learning_rate": 2.6842105263157895e-06, | |
| "loss": 0.4002, | |
| "step": 60 | |
| }, | |
| { | |
| "Batch Mean": 2.0813217163085938, | |
| "accuracy": 0.65625, | |
| "epoch": 0.15, | |
| "step": 60 | |
| }, | |
| { | |
| "Batch Mean": 1.9841499328613281, | |
| "accuracy": 0.78125, | |
| "epoch": 0.15, | |
| "step": 60 | |
| }, | |
| { | |
| "Batch Mean": 2.2633228302001953, | |
| "accuracy": 0.75, | |
| "epoch": 0.15, | |
| "step": 60 | |
| }, | |
| { | |
| "Batch Mean": 2.2690048217773438, | |
| "accuracy": 0.71875, | |
| "epoch": 0.15, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.1525, | |
| "grad_norm": 7.390719413757324, | |
| "learning_rate": 2.6763157894736842e-06, | |
| "loss": 0.5013, | |
| "step": 61 | |
| }, | |
| { | |
| "Batch Mean": 2.179004669189453, | |
| "accuracy": 0.65625, | |
| "epoch": 0.1525, | |
| "step": 61 | |
| }, | |
| { | |
| "Batch Mean": 2.1857452392578125, | |
| "accuracy": 0.78125, | |
| "epoch": 0.1525, | |
| "step": 61 | |
| }, | |
| { | |
| "Batch Mean": 1.4751567840576172, | |
| "accuracy": 0.8125, | |
| "epoch": 0.1525, | |
| "step": 61 | |
| }, | |
| { | |
| "Batch Mean": 2.2417678833007812, | |
| "accuracy": 0.875, | |
| "epoch": 0.1525, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.155, | |
| "grad_norm": 6.933903694152832, | |
| "learning_rate": 2.668421052631579e-06, | |
| "loss": 0.4227, | |
| "step": 62 | |
| }, | |
| { | |
| "Batch Mean": 1.8520653247833252, | |
| "accuracy": 0.78125, | |
| "epoch": 0.155, | |
| "step": 62 | |
| }, | |
| { | |
| "Batch Mean": 1.680877685546875, | |
| "accuracy": 0.8125, | |
| "epoch": 0.155, | |
| "step": 62 | |
| }, | |
| { | |
| "Batch Mean": 1.7311248779296875, | |
| "accuracy": 0.75, | |
| "epoch": 0.155, | |
| "step": 62 | |
| }, | |
| { | |
| "Batch Mean": 2.07061767578125, | |
| "accuracy": 0.65625, | |
| "epoch": 0.155, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.1575, | |
| "grad_norm": 6.547921180725098, | |
| "learning_rate": 2.6605263157894737e-06, | |
| "loss": 0.4736, | |
| "step": 63 | |
| }, | |
| { | |
| "Batch Mean": 2.2572174072265625, | |
| "accuracy": 0.875, | |
| "epoch": 0.1575, | |
| "step": 63 | |
| }, | |
| { | |
| "Batch Mean": 2.0034332275390625, | |
| "accuracy": 0.78125, | |
| "epoch": 0.1575, | |
| "step": 63 | |
| }, | |
| { | |
| "Batch Mean": 1.7276840209960938, | |
| "accuracy": 0.84375, | |
| "epoch": 0.1575, | |
| "step": 63 | |
| }, | |
| { | |
| "Batch Mean": 2.1414947509765625, | |
| "accuracy": 0.8125, | |
| "epoch": 0.1575, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 8.414312362670898, | |
| "learning_rate": 2.6526315789473685e-06, | |
| "loss": 0.4544, | |
| "step": 64 | |
| }, | |
| { | |
| "Batch Mean": 1.8973770141601562, | |
| "accuracy": 0.96875, | |
| "epoch": 0.16, | |
| "step": 64 | |
| }, | |
| { | |
| "Batch Mean": 2.1470298767089844, | |
| "accuracy": 0.78125, | |
| "epoch": 0.16, | |
| "step": 64 | |
| }, | |
| { | |
| "Batch Mean": 1.9893627166748047, | |
| "accuracy": 0.75, | |
| "epoch": 0.16, | |
| "step": 64 | |
| }, | |
| { | |
| "Batch Mean": 1.8890247344970703, | |
| "accuracy": 0.8125, | |
| "epoch": 0.16, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.1625, | |
| "grad_norm": 7.936588287353516, | |
| "learning_rate": 2.644736842105263e-06, | |
| "loss": 0.4333, | |
| "step": 65 | |
| }, | |
| { | |
| "Batch Mean": 1.9641265869140625, | |
| "accuracy": 0.75, | |
| "epoch": 0.1625, | |
| "step": 65 | |
| }, | |
| { | |
| "Batch Mean": 1.9673995971679688, | |
| "accuracy": 0.875, | |
| "epoch": 0.1625, | |
| "step": 65 | |
| }, | |
| { | |
| "Batch Mean": 1.6348600387573242, | |
| "accuracy": 0.6875, | |
| "epoch": 0.1625, | |
| "step": 65 | |
| }, | |
| { | |
| "Batch Mean": 1.740340232849121, | |
| "accuracy": 0.84375, | |
| "epoch": 0.1625, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.165, | |
| "grad_norm": 7.350888729095459, | |
| "learning_rate": 2.636842105263158e-06, | |
| "loss": 0.4376, | |
| "step": 66 | |
| }, | |
| { | |
| "Batch Mean": 1.7691650390625, | |
| "accuracy": 0.8125, | |
| "epoch": 0.165, | |
| "step": 66 | |
| }, | |
| { | |
| "Batch Mean": 2.187957763671875, | |
| "accuracy": 0.875, | |
| "epoch": 0.165, | |
| "step": 66 | |
| }, | |
| { | |
| "Batch Mean": 2.187624454498291, | |
| "accuracy": 0.84375, | |
| "epoch": 0.165, | |
| "step": 66 | |
| }, | |
| { | |
| "Batch Mean": 2.1125755310058594, | |
| "accuracy": 0.71875, | |
| "epoch": 0.165, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.1675, | |
| "grad_norm": 7.868091106414795, | |
| "learning_rate": 2.6289473684210527e-06, | |
| "loss": 0.3905, | |
| "step": 67 | |
| }, | |
| { | |
| "Batch Mean": 3.1567535400390625, | |
| "accuracy": 0.84375, | |
| "epoch": 0.1675, | |
| "step": 67 | |
| }, | |
| { | |
| "Batch Mean": 2.20458984375, | |
| "accuracy": 0.84375, | |
| "epoch": 0.1675, | |
| "step": 67 | |
| }, | |
| { | |
| "Batch Mean": 2.19390869140625, | |
| "accuracy": 0.71875, | |
| "epoch": 0.1675, | |
| "step": 67 | |
| }, | |
| { | |
| "Batch Mean": 2.5717415809631348, | |
| "accuracy": 0.84375, | |
| "epoch": 0.1675, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "grad_norm": 7.648073673248291, | |
| "learning_rate": 2.6210526315789474e-06, | |
| "loss": 0.4176, | |
| "step": 68 | |
| }, | |
| { | |
| "Batch Mean": 3.2033939361572266, | |
| "accuracy": 0.90625, | |
| "epoch": 0.17, | |
| "step": 68 | |
| }, | |
| { | |
| "Batch Mean": 2.538959503173828, | |
| "accuracy": 0.6875, | |
| "epoch": 0.17, | |
| "step": 68 | |
| }, | |
| { | |
| "Batch Mean": 2.950897216796875, | |
| "accuracy": 0.71875, | |
| "epoch": 0.17, | |
| "step": 68 | |
| }, | |
| { | |
| "Batch Mean": 2.9720191955566406, | |
| "accuracy": 0.65625, | |
| "epoch": 0.17, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.1725, | |
| "grad_norm": 9.133621215820312, | |
| "learning_rate": 2.613157894736842e-06, | |
| "loss": 0.4827, | |
| "step": 69 | |
| }, | |
| { | |
| "Batch Mean": 2.8690185546875, | |
| "accuracy": 0.78125, | |
| "epoch": 0.1725, | |
| "step": 69 | |
| }, | |
| { | |
| "Batch Mean": 3.2310791015625, | |
| "accuracy": 0.84375, | |
| "epoch": 0.1725, | |
| "step": 69 | |
| }, | |
| { | |
| "Batch Mean": 3.4392356872558594, | |
| "accuracy": 0.84375, | |
| "epoch": 0.1725, | |
| "step": 69 | |
| }, | |
| { | |
| "Batch Mean": 3.79766845703125, | |
| "accuracy": 0.8125, | |
| "epoch": 0.1725, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.175, | |
| "grad_norm": 8.451118469238281, | |
| "learning_rate": 2.605263157894737e-06, | |
| "loss": 0.416, | |
| "step": 70 | |
| }, | |
| { | |
| "Batch Mean": 3.61334228515625, | |
| "accuracy": 0.8125, | |
| "epoch": 0.175, | |
| "step": 70 | |
| }, | |
| { | |
| "Batch Mean": 3.4711837768554688, | |
| "accuracy": 0.78125, | |
| "epoch": 0.175, | |
| "step": 70 | |
| }, | |
| { | |
| "Batch Mean": 3.1711807250976562, | |
| "accuracy": 0.84375, | |
| "epoch": 0.175, | |
| "step": 70 | |
| }, | |
| { | |
| "Batch Mean": 3.73419189453125, | |
| "accuracy": 0.875, | |
| "epoch": 0.175, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.1775, | |
| "grad_norm": 8.39415168762207, | |
| "learning_rate": 2.5973684210526317e-06, | |
| "loss": 0.4187, | |
| "step": 71 | |
| }, | |
| { | |
| "Batch Mean": 3.579629898071289, | |
| "accuracy": 0.8125, | |
| "epoch": 0.1775, | |
| "step": 71 | |
| }, | |
| { | |
| "Batch Mean": 3.81597900390625, | |
| "accuracy": 0.90625, | |
| "epoch": 0.1775, | |
| "step": 71 | |
| }, | |
| { | |
| "Batch Mean": 4.013580322265625, | |
| "accuracy": 0.84375, | |
| "epoch": 0.1775, | |
| "step": 71 | |
| }, | |
| { | |
| "Batch Mean": 3.486042022705078, | |
| "accuracy": 0.8125, | |
| "epoch": 0.1775, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 7.406739711761475, | |
| "learning_rate": 2.5894736842105264e-06, | |
| "loss": 0.2955, | |
| "step": 72 | |
| }, | |
| { | |
| "Batch Mean": 3.823301315307617, | |
| "accuracy": 0.78125, | |
| "epoch": 0.18, | |
| "step": 72 | |
| }, | |
| { | |
| "Batch Mean": 3.58245849609375, | |
| "accuracy": 0.8125, | |
| "epoch": 0.18, | |
| "step": 72 | |
| }, | |
| { | |
| "Batch Mean": 3.817901611328125, | |
| "accuracy": 0.75, | |
| "epoch": 0.18, | |
| "step": 72 | |
| }, | |
| { | |
| "Batch Mean": 3.1810264587402344, | |
| "accuracy": 0.84375, | |
| "epoch": 0.18, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.1825, | |
| "grad_norm": 7.441671848297119, | |
| "learning_rate": 2.581578947368421e-06, | |
| "loss": 0.46, | |
| "step": 73 | |
| }, | |
| { | |
| "Batch Mean": 4.177032470703125, | |
| "accuracy": 0.84375, | |
| "epoch": 0.1825, | |
| "step": 73 | |
| }, | |
| { | |
| "Batch Mean": 4.8514404296875, | |
| "accuracy": 0.84375, | |
| "epoch": 0.1825, | |
| "step": 73 | |
| }, | |
| { | |
| "Batch Mean": 3.1745223999023438, | |
| "accuracy": 0.84375, | |
| "epoch": 0.1825, | |
| "step": 73 | |
| }, | |
| { | |
| "Batch Mean": 3.3532562255859375, | |
| "accuracy": 0.75, | |
| "epoch": 0.1825, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.185, | |
| "grad_norm": 7.315526008605957, | |
| "learning_rate": 2.573684210526316e-06, | |
| "loss": 0.4084, | |
| "step": 74 | |
| }, | |
| { | |
| "Batch Mean": 2.9517765045166016, | |
| "accuracy": 0.78125, | |
| "epoch": 0.185, | |
| "step": 74 | |
| }, | |
| { | |
| "Batch Mean": 3.189727783203125, | |
| "accuracy": 0.84375, | |
| "epoch": 0.185, | |
| "step": 74 | |
| }, | |
| { | |
| "Batch Mean": 3.7370223999023438, | |
| "accuracy": 0.84375, | |
| "epoch": 0.185, | |
| "step": 74 | |
| }, | |
| { | |
| "Batch Mean": 3.0487213134765625, | |
| "accuracy": 0.8125, | |
| "epoch": 0.185, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.1875, | |
| "grad_norm": 7.253166198730469, | |
| "learning_rate": 2.5657894736842107e-06, | |
| "loss": 0.4185, | |
| "step": 75 | |
| }, | |
| { | |
| "Batch Mean": 3.319133758544922, | |
| "accuracy": 0.84375, | |
| "epoch": 0.1875, | |
| "step": 75 | |
| }, | |
| { | |
| "Batch Mean": 3.280853271484375, | |
| "accuracy": 0.78125, | |
| "epoch": 0.1875, | |
| "step": 75 | |
| }, | |
| { | |
| "Batch Mean": 3.16741943359375, | |
| "accuracy": 0.71875, | |
| "epoch": 0.1875, | |
| "step": 75 | |
| }, | |
| { | |
| "Batch Mean": 2.6110219955444336, | |
| "accuracy": 0.78125, | |
| "epoch": 0.1875, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "grad_norm": 9.37726879119873, | |
| "learning_rate": 2.5578947368421054e-06, | |
| "loss": 0.4841, | |
| "step": 76 | |
| }, | |
| { | |
| "Batch Mean": 2.8535995483398438, | |
| "accuracy": 0.78125, | |
| "epoch": 0.19, | |
| "step": 76 | |
| }, | |
| { | |
| "Batch Mean": 2.5045166015625, | |
| "accuracy": 0.71875, | |
| "epoch": 0.19, | |
| "step": 76 | |
| }, | |
| { | |
| "Batch Mean": 3.445037841796875, | |
| "accuracy": 0.78125, | |
| "epoch": 0.19, | |
| "step": 76 | |
| }, | |
| { | |
| "Batch Mean": 3.05963134765625, | |
| "accuracy": 0.75, | |
| "epoch": 0.19, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.1925, | |
| "grad_norm": 9.00017261505127, | |
| "learning_rate": 2.55e-06, | |
| "loss": 0.4535, | |
| "step": 77 | |
| }, | |
| { | |
| "Batch Mean": 2.868865966796875, | |
| "accuracy": 0.8125, | |
| "epoch": 0.1925, | |
| "step": 77 | |
| }, | |
| { | |
| "Batch Mean": 2.8354110717773438, | |
| "accuracy": 0.78125, | |
| "epoch": 0.1925, | |
| "step": 77 | |
| }, | |
| { | |
| "Batch Mean": 3.1343002319335938, | |
| "accuracy": 0.90625, | |
| "epoch": 0.1925, | |
| "step": 77 | |
| }, | |
| { | |
| "Batch Mean": 3.1275634765625, | |
| "accuracy": 0.84375, | |
| "epoch": 0.1925, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.195, | |
| "grad_norm": 6.656139850616455, | |
| "learning_rate": 2.542105263157895e-06, | |
| "loss": 0.3829, | |
| "step": 78 | |
| }, | |
| { | |
| "Batch Mean": 2.7342681884765625, | |
| "accuracy": 0.8125, | |
| "epoch": 0.195, | |
| "step": 78 | |
| }, | |
| { | |
| "Batch Mean": 2.3845291137695312, | |
| "accuracy": 0.875, | |
| "epoch": 0.195, | |
| "step": 78 | |
| }, | |
| { | |
| "Batch Mean": 2.4807891845703125, | |
| "accuracy": 0.84375, | |
| "epoch": 0.195, | |
| "step": 78 | |
| }, | |
| { | |
| "Batch Mean": 2.4909114837646484, | |
| "accuracy": 0.8125, | |
| "epoch": 0.195, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.1975, | |
| "grad_norm": 6.564652919769287, | |
| "learning_rate": 2.5342105263157892e-06, | |
| "loss": 0.3979, | |
| "step": 79 | |
| }, | |
| { | |
| "Batch Mean": 2.9712295532226562, | |
| "accuracy": 0.84375, | |
| "epoch": 0.1975, | |
| "step": 79 | |
| }, | |
| { | |
| "Batch Mean": 2.5406951904296875, | |
| "accuracy": 0.65625, | |
| "epoch": 0.1975, | |
| "step": 79 | |
| }, | |
| { | |
| "Batch Mean": 2.974529266357422, | |
| "accuracy": 0.9375, | |
| "epoch": 0.1975, | |
| "step": 79 | |
| }, | |
| { | |
| "Batch Mean": 2.565216064453125, | |
| "accuracy": 0.875, | |
| "epoch": 0.1975, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 6.637782096862793, | |
| "learning_rate": 2.526315789473684e-06, | |
| "loss": 0.362, | |
| "step": 80 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 400, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 80, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |