dialect-debiasing-gpt2-medium-translated-pnlogmse-e3-r10000_train-n10.0
/
checkpoint-3125
/trainer_state.json
| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 3125, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0032, | |
| "grad_norm": 10.0, | |
| "learning_rate": 2.132196162046908e-06, | |
| "loss": 357.5563, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0064, | |
| "grad_norm": 10.0, | |
| "learning_rate": 4.264392324093816e-06, | |
| "loss": 339.1736, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0096, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 6.396588486140726e-06, | |
| "loss": 349.548, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.0128, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.528784648187633e-06, | |
| "loss": 355.9309, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.016, | |
| "grad_norm": 10.0, | |
| "learning_rate": 1.0660980810234541e-05, | |
| "loss": 323.813, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.0192, | |
| "grad_norm": 10.0, | |
| "learning_rate": 1.2793176972281452e-05, | |
| "loss": 349.687, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.0224, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 1.4925373134328357e-05, | |
| "loss": 337.978, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.0256, | |
| "grad_norm": 10.0, | |
| "learning_rate": 1.7057569296375266e-05, | |
| "loss": 314.5083, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.0288, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 1.9189765458422178e-05, | |
| "loss": 315.7551, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.032, | |
| "grad_norm": 9.999999046325684, | |
| "learning_rate": 2.1321961620469083e-05, | |
| "loss": 303.5806, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0352, | |
| "grad_norm": 10.0, | |
| "learning_rate": 2.345415778251599e-05, | |
| "loss": 284.4809, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.0384, | |
| "grad_norm": 10.0, | |
| "learning_rate": 2.5586353944562904e-05, | |
| "loss": 294.0154, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.0416, | |
| "grad_norm": 10.0, | |
| "learning_rate": 2.771855010660981e-05, | |
| "loss": 300.8392, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.0448, | |
| "grad_norm": 10.0, | |
| "learning_rate": 2.9850746268656714e-05, | |
| "loss": 264.0101, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.048, | |
| "grad_norm": 10.0, | |
| "learning_rate": 3.1982942430703626e-05, | |
| "loss": 256.8381, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.0512, | |
| "grad_norm": 9.999999046325684, | |
| "learning_rate": 3.411513859275053e-05, | |
| "loss": 252.8057, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.0544, | |
| "grad_norm": 10.0, | |
| "learning_rate": 3.624733475479744e-05, | |
| "loss": 240.8079, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.0576, | |
| "grad_norm": 10.0, | |
| "learning_rate": 3.8379530916844355e-05, | |
| "loss": 237.8714, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.0608, | |
| "grad_norm": 10.0, | |
| "learning_rate": 4.051172707889126e-05, | |
| "loss": 244.0058, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.064, | |
| "grad_norm": 10.0, | |
| "learning_rate": 4.2643923240938166e-05, | |
| "loss": 250.2869, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.0672, | |
| "grad_norm": 10.0, | |
| "learning_rate": 4.477611940298508e-05, | |
| "loss": 234.9533, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.0704, | |
| "grad_norm": 9.999998092651367, | |
| "learning_rate": 4.690831556503198e-05, | |
| "loss": 229.1305, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.0736, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 4.904051172707889e-05, | |
| "loss": 232.1072, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.0768, | |
| "grad_norm": 10.0, | |
| "learning_rate": 5.117270788912581e-05, | |
| "loss": 236.399, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 10.0, | |
| "learning_rate": 5.330490405117271e-05, | |
| "loss": 243.2338, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.0832, | |
| "grad_norm": 10.0, | |
| "learning_rate": 5.543710021321962e-05, | |
| "loss": 249.8973, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.0864, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 5.756929637526652e-05, | |
| "loss": 232.9605, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.0896, | |
| "grad_norm": 10.0, | |
| "learning_rate": 5.970149253731343e-05, | |
| "loss": 223.4648, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.0928, | |
| "grad_norm": 10.0, | |
| "learning_rate": 6.183368869936035e-05, | |
| "loss": 238.9943, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.096, | |
| "grad_norm": 9.999999046325684, | |
| "learning_rate": 6.396588486140725e-05, | |
| "loss": 227.773, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.0992, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 6.609808102345416e-05, | |
| "loss": 215.3868, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.1024, | |
| "grad_norm": 10.0, | |
| "learning_rate": 6.823027718550106e-05, | |
| "loss": 213.8815, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.1056, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.036247334754798e-05, | |
| "loss": 217.2, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.1088, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.249466950959489e-05, | |
| "loss": 228.863, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.112, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.46268656716418e-05, | |
| "loss": 237.3057, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.1152, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.675906183368871e-05, | |
| "loss": 243.3169, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.1184, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.889125799573562e-05, | |
| "loss": 227.889, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.1216, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.102345415778252e-05, | |
| "loss": 231.7757, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.1248, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 8.315565031982943e-05, | |
| "loss": 209.6663, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.128, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.528784648187633e-05, | |
| "loss": 215.7184, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.1312, | |
| "grad_norm": 9.999999046325684, | |
| "learning_rate": 8.742004264392325e-05, | |
| "loss": 205.8337, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.1344, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.955223880597016e-05, | |
| "loss": 238.4921, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.1376, | |
| "grad_norm": 9.999998092651367, | |
| "learning_rate": 9.168443496801706e-05, | |
| "loss": 226.8906, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.1408, | |
| "grad_norm": 9.999999046325684, | |
| "learning_rate": 9.381663113006397e-05, | |
| "loss": 220.9074, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.144, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.594882729211087e-05, | |
| "loss": 217.9529, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.1472, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 9.808102345415778e-05, | |
| "loss": 221.0881, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.1504, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.998877161464182e-05, | |
| "loss": 215.0034, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.1536, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.987648776105997e-05, | |
| "loss": 232.4474, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.1568, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.97642039074781e-05, | |
| "loss": 209.5053, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 9.965192005389625e-05, | |
| "loss": 220.7133, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.1632, | |
| "grad_norm": 9.999999046325684, | |
| "learning_rate": 9.95396362003144e-05, | |
| "loss": 219.0157, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.1664, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.942735234673256e-05, | |
| "loss": 221.8852, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.1696, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.931506849315069e-05, | |
| "loss": 207.6024, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.1728, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.920278463956883e-05, | |
| "loss": 220.2887, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.176, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.909050078598698e-05, | |
| "loss": 206.5356, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.1792, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.897821693240512e-05, | |
| "loss": 218.8879, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.1824, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.886593307882327e-05, | |
| "loss": 201.5034, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.1856, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.875364922524142e-05, | |
| "loss": 222.656, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.1888, | |
| "grad_norm": 9.999999046325684, | |
| "learning_rate": 9.864136537165956e-05, | |
| "loss": 219.2458, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.192, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.852908151807771e-05, | |
| "loss": 235.0156, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.1952, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.841679766449586e-05, | |
| "loss": 203.967, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.1984, | |
| "grad_norm": 10.000001907348633, | |
| "learning_rate": 9.8304513810914e-05, | |
| "loss": 238.8202, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.2016, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.819222995733213e-05, | |
| "loss": 210.0008, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.2048, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 9.807994610375028e-05, | |
| "loss": 217.756, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.208, | |
| "grad_norm": 9.999999046325684, | |
| "learning_rate": 9.796766225016843e-05, | |
| "loss": 202.0346, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.2112, | |
| "grad_norm": 9.999999046325684, | |
| "learning_rate": 9.785537839658657e-05, | |
| "loss": 213.0119, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.2144, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 9.774309454300472e-05, | |
| "loss": 216.7441, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.2176, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.763081068942287e-05, | |
| "loss": 217.3541, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.2208, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.751852683584101e-05, | |
| "loss": 224.6908, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.224, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.740624298225916e-05, | |
| "loss": 202.1809, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.2272, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.729395912867731e-05, | |
| "loss": 206.8913, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.2304, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 9.718167527509545e-05, | |
| "loss": 222.8366, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.2336, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.706939142151358e-05, | |
| "loss": 214.7459, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.2368, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.695710756793174e-05, | |
| "loss": 216.2085, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.684482371434989e-05, | |
| "loss": 206.1845, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.2432, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 9.673253986076802e-05, | |
| "loss": 212.6178, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.2464, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.662025600718617e-05, | |
| "loss": 206.3909, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.2496, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.650797215360432e-05, | |
| "loss": 196.6766, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.2528, | |
| "grad_norm": 9.999999046325684, | |
| "learning_rate": 9.639568830002246e-05, | |
| "loss": 208.7465, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.256, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.628340444644061e-05, | |
| "loss": 217.3836, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.2592, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.617112059285875e-05, | |
| "loss": 199.0042, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.2624, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.605883673927689e-05, | |
| "loss": 207.9872, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.2656, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.594655288569504e-05, | |
| "loss": 207.2379, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.2688, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.583426903211319e-05, | |
| "loss": 203.26, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.272, | |
| "grad_norm": 9.999999046325684, | |
| "learning_rate": 9.572198517853134e-05, | |
| "loss": 200.729, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.2752, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.560970132494948e-05, | |
| "loss": 213.6845, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.2784, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.549741747136763e-05, | |
| "loss": 203.4559, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.2816, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.538513361778578e-05, | |
| "loss": 207.1236, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.2848, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.527284976420391e-05, | |
| "loss": 204.6924, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.288, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.516056591062205e-05, | |
| "loss": 208.5811, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.2912, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 9.50482820570402e-05, | |
| "loss": 195.729, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.2944, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.493599820345834e-05, | |
| "loss": 214.6524, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.2976, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.482371434987649e-05, | |
| "loss": 229.564, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.3008, | |
| "grad_norm": 9.999999046325684, | |
| "learning_rate": 9.471143049629464e-05, | |
| "loss": 221.1298, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.304, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.459914664271278e-05, | |
| "loss": 206.4352, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.3072, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.448686278913093e-05, | |
| "loss": 190.5135, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.3104, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 9.437457893554908e-05, | |
| "loss": 228.4936, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.3136, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.426229508196722e-05, | |
| "loss": 217.441, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.3168, | |
| "grad_norm": 9.999999046325684, | |
| "learning_rate": 9.415001122838537e-05, | |
| "loss": 214.9121, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 9.999999046325684, | |
| "learning_rate": 9.40377273748035e-05, | |
| "loss": 212.7035, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.3232, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.392544352122165e-05, | |
| "loss": 179.4072, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.3264, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.381315966763979e-05, | |
| "loss": 199.3417, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.3296, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.370087581405794e-05, | |
| "loss": 196.6066, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.3328, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.358859196047609e-05, | |
| "loss": 210.137, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.336, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.347630810689423e-05, | |
| "loss": 223.5315, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.3392, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.336402425331238e-05, | |
| "loss": 211.4561, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.3424, | |
| "grad_norm": 9.999999046325684, | |
| "learning_rate": 9.325174039973053e-05, | |
| "loss": 196.8094, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.3456, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.313945654614867e-05, | |
| "loss": 195.0026, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.3488, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.30271726925668e-05, | |
| "loss": 198.5569, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.352, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.291488883898496e-05, | |
| "loss": 222.2918, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.3552, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.280260498540311e-05, | |
| "loss": 204.5885, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.3584, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 9.269032113182124e-05, | |
| "loss": 203.8271, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.3616, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 9.25780372782394e-05, | |
| "loss": 198.8627, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.3648, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.246575342465755e-05, | |
| "loss": 215.3792, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.368, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 9.235346957107568e-05, | |
| "loss": 210.2805, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.3712, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.224118571749383e-05, | |
| "loss": 203.8917, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.3744, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 9.212890186391197e-05, | |
| "loss": 202.3635, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.3776, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.201661801033011e-05, | |
| "loss": 225.9458, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.3808, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.190433415674826e-05, | |
| "loss": 213.7563, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.384, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 9.179205030316641e-05, | |
| "loss": 219.8337, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.3872, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.167976644958456e-05, | |
| "loss": 209.6916, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.3904, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.15674825960027e-05, | |
| "loss": 216.8063, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.3936, | |
| "grad_norm": 9.999999046325684, | |
| "learning_rate": 9.145519874242085e-05, | |
| "loss": 200.4562, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.3968, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.1342914888839e-05, | |
| "loss": 201.6241, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 9.123063103525713e-05, | |
| "loss": 225.5937, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.4032, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 9.111834718167527e-05, | |
| "loss": 215.3319, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.4064, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.100606332809342e-05, | |
| "loss": 195.5182, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.4096, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.089377947451156e-05, | |
| "loss": 188.1754, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.4128, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 9.078149562092971e-05, | |
| "loss": 202.3522, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.416, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.066921176734786e-05, | |
| "loss": 214.9085, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.4192, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.055692791376601e-05, | |
| "loss": 209.7786, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.4224, | |
| "grad_norm": 9.999999046325684, | |
| "learning_rate": 9.044464406018415e-05, | |
| "loss": 212.8, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.4256, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.03323602066023e-05, | |
| "loss": 202.0178, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.4288, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.022007635302045e-05, | |
| "loss": 206.0686, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.432, | |
| "grad_norm": 10.0, | |
| "learning_rate": 9.010779249943859e-05, | |
| "loss": 196.0894, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.4352, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.999550864585672e-05, | |
| "loss": 220.2167, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.4384, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.988322479227488e-05, | |
| "loss": 188.7137, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.4416, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.977094093869301e-05, | |
| "loss": 199.8496, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.4448, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.965865708511116e-05, | |
| "loss": 196.9146, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.448, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.954637323152931e-05, | |
| "loss": 199.8343, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.4512, | |
| "grad_norm": 9.999999046325684, | |
| "learning_rate": 8.943408937794746e-05, | |
| "loss": 202.8167, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.4544, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 8.93218055243656e-05, | |
| "loss": 192.5867, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.4576, | |
| "grad_norm": 9.999999046325684, | |
| "learning_rate": 8.920952167078375e-05, | |
| "loss": 200.7535, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.4608, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.909723781720189e-05, | |
| "loss": 196.9394, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.464, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.898495396362003e-05, | |
| "loss": 224.0633, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.4672, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.887267011003818e-05, | |
| "loss": 195.9796, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.4704, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.876038625645633e-05, | |
| "loss": 194.1877, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.4736, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.864810240287447e-05, | |
| "loss": 205.1659, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.4768, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.853581854929262e-05, | |
| "loss": 207.8239, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.842353469571077e-05, | |
| "loss": 201.21, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.4832, | |
| "grad_norm": 9.999999046325684, | |
| "learning_rate": 8.83112508421289e-05, | |
| "loss": 210.2497, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.4864, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.819896698854705e-05, | |
| "loss": 191.8573, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.4896, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.808668313496519e-05, | |
| "loss": 200.6932, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.4928, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.797439928138334e-05, | |
| "loss": 184.7692, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.496, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 8.786211542780148e-05, | |
| "loss": 200.675, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.4992, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.774983157421963e-05, | |
| "loss": 192.6302, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.5024, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.763754772063778e-05, | |
| "loss": 227.3658, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.5056, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.752526386705592e-05, | |
| "loss": 209.1216, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.5088, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 8.741298001347407e-05, | |
| "loss": 181.276, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.512, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.730069615989222e-05, | |
| "loss": 193.3418, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.5152, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.718841230631036e-05, | |
| "loss": 203.5718, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.5184, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.70761284527285e-05, | |
| "loss": 182.6966, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.5216, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 8.696384459914664e-05, | |
| "loss": 187.1169, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.5248, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.68515607455648e-05, | |
| "loss": 193.9065, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.528, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 8.673927689198293e-05, | |
| "loss": 196.7543, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.5312, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.662699303840108e-05, | |
| "loss": 204.3788, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.5344, | |
| "grad_norm": 9.999999046325684, | |
| "learning_rate": 8.651470918481923e-05, | |
| "loss": 208.3519, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.5376, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.640242533123737e-05, | |
| "loss": 196.6139, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.5408, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.629014147765552e-05, | |
| "loss": 182.2085, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.544, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.617785762407367e-05, | |
| "loss": 211.879, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.5472, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.606557377049181e-05, | |
| "loss": 202.8726, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.5504, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 8.595328991690995e-05, | |
| "loss": 203.7877, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.5536, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 8.58410060633281e-05, | |
| "loss": 222.2813, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.5568, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.572872220974623e-05, | |
| "loss": 200.6707, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 9.999998092651367, | |
| "learning_rate": 8.561643835616438e-05, | |
| "loss": 202.2661, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.5632, | |
| "grad_norm": 9.999999046325684, | |
| "learning_rate": 8.550415450258253e-05, | |
| "loss": 204.5085, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.5664, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.539187064900069e-05, | |
| "loss": 203.9782, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.5696, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 8.527958679541882e-05, | |
| "loss": 215.2938, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.5728, | |
| "grad_norm": 9.999999046325684, | |
| "learning_rate": 8.516730294183697e-05, | |
| "loss": 200.5973, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.576, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.505501908825511e-05, | |
| "loss": 204.2859, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.5792, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.494273523467325e-05, | |
| "loss": 190.3964, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.5824, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.48304513810914e-05, | |
| "loss": 203.4531, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.5856, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.471816752750955e-05, | |
| "loss": 186.5525, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.5888, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.460588367392769e-05, | |
| "loss": 213.5012, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.592, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.449359982034584e-05, | |
| "loss": 202.3058, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.5952, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.438131596676399e-05, | |
| "loss": 215.9803, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.5984, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.426903211318214e-05, | |
| "loss": 199.6429, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.6016, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.415674825960028e-05, | |
| "loss": 211.4417, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.6048, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.404446440601843e-05, | |
| "loss": 196.1022, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.608, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.393218055243656e-05, | |
| "loss": 205.1227, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.6112, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.38198966988547e-05, | |
| "loss": 194.6003, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.6144, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.370761284527285e-05, | |
| "loss": 210.6531, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.6176, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.3595328991691e-05, | |
| "loss": 199.19, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.6208, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.348304513810914e-05, | |
| "loss": 191.3823, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.624, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 8.337076128452729e-05, | |
| "loss": 192.6865, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.6272, | |
| "grad_norm": 9.999998092651367, | |
| "learning_rate": 8.325847743094544e-05, | |
| "loss": 206.4755, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.6304, | |
| "grad_norm": 10.000001907348633, | |
| "learning_rate": 8.314619357736358e-05, | |
| "loss": 208.1272, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.6336, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.303390972378173e-05, | |
| "loss": 201.2113, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.6368, | |
| "grad_norm": 9.999998092651367, | |
| "learning_rate": 8.292162587019986e-05, | |
| "loss": 202.681, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.280934201661802e-05, | |
| "loss": 211.5828, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.6432, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 8.269705816303615e-05, | |
| "loss": 217.7179, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.6464, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.25847743094543e-05, | |
| "loss": 213.2659, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.6496, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 8.247249045587245e-05, | |
| "loss": 199.9664, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.6528, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.236020660229059e-05, | |
| "loss": 179.8506, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.656, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 8.224792274870874e-05, | |
| "loss": 185.001, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.6592, | |
| "grad_norm": 9.999999046325684, | |
| "learning_rate": 8.213563889512689e-05, | |
| "loss": 176.3468, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.6624, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.202335504154503e-05, | |
| "loss": 208.299, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.6656, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 8.191107118796317e-05, | |
| "loss": 197.0074, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.6688, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.179878733438132e-05, | |
| "loss": 202.9926, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.672, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.168650348079947e-05, | |
| "loss": 195.0809, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.6752, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.15742196272176e-05, | |
| "loss": 207.0655, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.6784, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.146193577363576e-05, | |
| "loss": 188.0668, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.6816, | |
| "grad_norm": 9.999999046325684, | |
| "learning_rate": 8.13496519200539e-05, | |
| "loss": 203.6391, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.6848, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.123736806647204e-05, | |
| "loss": 213.6896, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.688, | |
| "grad_norm": 9.999999046325684, | |
| "learning_rate": 8.11250842128902e-05, | |
| "loss": 181.5027, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.6912, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 8.101280035930835e-05, | |
| "loss": 220.8065, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.6944, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.090051650572648e-05, | |
| "loss": 199.3392, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.6976, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.078823265214462e-05, | |
| "loss": 202.0433, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.7008, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 8.067594879856277e-05, | |
| "loss": 186.2878, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.704, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 8.056366494498092e-05, | |
| "loss": 211.8264, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.7072, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.045138109139906e-05, | |
| "loss": 195.7192, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.7104, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 8.033909723781721e-05, | |
| "loss": 220.9343, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.7136, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.022681338423536e-05, | |
| "loss": 177.814, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.7168, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.01145295306535e-05, | |
| "loss": 187.2061, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 10.0, | |
| "learning_rate": 8.000224567707165e-05, | |
| "loss": 194.2424, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.7232, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.988996182348978e-05, | |
| "loss": 207.7647, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.7264, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.977767796990792e-05, | |
| "loss": 209.1887, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.7296, | |
| "grad_norm": 9.999999046325684, | |
| "learning_rate": 7.966539411632607e-05, | |
| "loss": 207.5156, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.7328, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.955311026274422e-05, | |
| "loss": 208.1341, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.736, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 7.944082640916236e-05, | |
| "loss": 202.0579, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.7392, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.932854255558051e-05, | |
| "loss": 184.0778, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.7424, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.921625870199866e-05, | |
| "loss": 195.0439, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.7456, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.910397484841681e-05, | |
| "loss": 183.9819, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.7488, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.899169099483495e-05, | |
| "loss": 214.0829, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.752, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.887940714125309e-05, | |
| "loss": 200.2758, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.7552, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.876712328767124e-05, | |
| "loss": 223.9302, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.7584, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.865483943408937e-05, | |
| "loss": 191.5453, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.7616, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.854255558050752e-05, | |
| "loss": 195.9706, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.7648, | |
| "grad_norm": 9.999999046325684, | |
| "learning_rate": 7.843027172692568e-05, | |
| "loss": 204.0887, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.768, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.831798787334381e-05, | |
| "loss": 193.3045, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.7712, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.820570401976196e-05, | |
| "loss": 184.6662, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.7744, | |
| "grad_norm": 9.999999046325684, | |
| "learning_rate": 7.809342016618011e-05, | |
| "loss": 191.1202, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.7776, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 7.798113631259825e-05, | |
| "loss": 179.5854, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.7808, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 7.78688524590164e-05, | |
| "loss": 198.9862, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.784, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.775656860543454e-05, | |
| "loss": 197.0104, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.7872, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.764428475185269e-05, | |
| "loss": 199.5438, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.7904, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.753200089827083e-05, | |
| "loss": 187.4917, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.7936, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.741971704468898e-05, | |
| "loss": 209.9345, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.7968, | |
| "grad_norm": 9.999999046325684, | |
| "learning_rate": 7.730743319110713e-05, | |
| "loss": 202.1623, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.719514933752526e-05, | |
| "loss": 201.1795, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.8032, | |
| "grad_norm": 9.999999046325684, | |
| "learning_rate": 7.708286548394342e-05, | |
| "loss": 217.0681, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.8064, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.697058163036157e-05, | |
| "loss": 209.9177, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.8096, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.68582977767797e-05, | |
| "loss": 191.1048, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.8128, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.674601392319784e-05, | |
| "loss": 203.5914, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.816, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 7.663373006961599e-05, | |
| "loss": 193.325, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.8192, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 7.652144621603414e-05, | |
| "loss": 197.5087, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.8224, | |
| "grad_norm": 9.999999046325684, | |
| "learning_rate": 7.640916236245228e-05, | |
| "loss": 211.2045, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.8256, | |
| "grad_norm": 10.000001907348633, | |
| "learning_rate": 7.629687850887043e-05, | |
| "loss": 200.5594, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.8288, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.618459465528858e-05, | |
| "loss": 187.2038, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.832, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.607231080170672e-05, | |
| "loss": 193.1689, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.8352, | |
| "grad_norm": 9.999999046325684, | |
| "learning_rate": 7.596002694812487e-05, | |
| "loss": 192.8693, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.8384, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 7.5847743094543e-05, | |
| "loss": 200.7623, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.8416, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.573545924096114e-05, | |
| "loss": 196.8379, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.8448, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.562317538737929e-05, | |
| "loss": 191.7378, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.848, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.551089153379744e-05, | |
| "loss": 185.8669, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.8512, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.53986076802156e-05, | |
| "loss": 200.2838, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.8544, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.528632382663373e-05, | |
| "loss": 187.6903, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.8576, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.517403997305188e-05, | |
| "loss": 203.7209, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.8608, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 7.506175611947003e-05, | |
| "loss": 200.4418, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.864, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 7.494947226588817e-05, | |
| "loss": 204.9388, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.8672, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.483718841230631e-05, | |
| "loss": 184.6188, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.8704, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.472490455872446e-05, | |
| "loss": 197.405, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.8736, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.46126207051426e-05, | |
| "loss": 187.0493, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.8768, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.450033685156075e-05, | |
| "loss": 205.7833, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.43880529979789e-05, | |
| "loss": 193.678, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.8832, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 7.427576914439703e-05, | |
| "loss": 270.0522, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.8864, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 7.416348529081518e-05, | |
| "loss": 204.2933, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.8896, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.405120143723333e-05, | |
| "loss": 185.6908, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.8928, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.393891758365149e-05, | |
| "loss": 209.7563, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.896, | |
| "grad_norm": 9.999999046325684, | |
| "learning_rate": 7.382663373006962e-05, | |
| "loss": 190.002, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.8992, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.371434987648776e-05, | |
| "loss": 205.7809, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.9024, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.360206602290591e-05, | |
| "loss": 225.4268, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.9056, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.348978216932405e-05, | |
| "loss": 194.7974, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.9088, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.33774983157422e-05, | |
| "loss": 191.9117, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.912, | |
| "grad_norm": 9.999999046325684, | |
| "learning_rate": 7.326521446216035e-05, | |
| "loss": 211.9935, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.9152, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.315293060857849e-05, | |
| "loss": 179.6801, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.9184, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 7.304064675499664e-05, | |
| "loss": 200.4354, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.9216, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.292836290141479e-05, | |
| "loss": 184.3547, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.9248, | |
| "grad_norm": 9.999999046325684, | |
| "learning_rate": 7.281607904783292e-05, | |
| "loss": 199.1085, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.928, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.270379519425106e-05, | |
| "loss": 204.9658, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.9312, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.259151134066921e-05, | |
| "loss": 180.1226, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.9344, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.247922748708736e-05, | |
| "loss": 182.366, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.9376, | |
| "grad_norm": 9.999999046325684, | |
| "learning_rate": 7.23669436335055e-05, | |
| "loss": 189.3699, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.9408, | |
| "grad_norm": 9.999999046325684, | |
| "learning_rate": 7.225465977992365e-05, | |
| "loss": 201.2773, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.944, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.21423759263418e-05, | |
| "loss": 200.2912, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.9472, | |
| "grad_norm": 9.999999046325684, | |
| "learning_rate": 7.203009207275994e-05, | |
| "loss": 198.6626, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.9504, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.191780821917809e-05, | |
| "loss": 180.9213, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.9536, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.180552436559623e-05, | |
| "loss": 205.3271, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.9568, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.169324051201438e-05, | |
| "loss": 201.4292, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 9.999999046325684, | |
| "learning_rate": 7.158095665843251e-05, | |
| "loss": 180.9974, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.9632, | |
| "grad_norm": 9.999999046325684, | |
| "learning_rate": 7.146867280485066e-05, | |
| "loss": 190.5575, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.9664, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.135638895126882e-05, | |
| "loss": 193.0807, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.9696, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.124410509768695e-05, | |
| "loss": 183.3726, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.9728, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.11318212441051e-05, | |
| "loss": 202.116, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.976, | |
| "grad_norm": 9.999999046325684, | |
| "learning_rate": 7.101953739052325e-05, | |
| "loss": 201.1096, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.9792, | |
| "grad_norm": 9.999999046325684, | |
| "learning_rate": 7.090725353694139e-05, | |
| "loss": 206.1522, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.9824, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 7.079496968335954e-05, | |
| "loss": 195.1231, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.9856, | |
| "grad_norm": 10.000000953674316, | |
| "learning_rate": 7.068268582977768e-05, | |
| "loss": 194.8854, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.9888, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.057040197619582e-05, | |
| "loss": 197.801, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.992, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.045811812261397e-05, | |
| "loss": 194.1883, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.9952, | |
| "grad_norm": 10.0, | |
| "learning_rate": 7.034583426903212e-05, | |
| "loss": 182.1877, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.9984, | |
| "grad_norm": 9.999999046325684, | |
| "learning_rate": 7.023355041545027e-05, | |
| "loss": 203.0507, | |
| "step": 3120 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 9375, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |