| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9998731447418495, | |
| "eval_steps": 500, | |
| "global_step": 3941, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.002537105163009007, | |
| "grad_norm": 4.350327938684374, | |
| "learning_rate": 2.5316455696202533e-07, | |
| "loss": 1.8196, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.005074210326018014, | |
| "grad_norm": 3.678530456221003, | |
| "learning_rate": 5.063291139240507e-07, | |
| "loss": 1.8108, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.00761131548902702, | |
| "grad_norm": 2.945036910011768, | |
| "learning_rate": 7.59493670886076e-07, | |
| "loss": 1.8147, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.010148420652036028, | |
| "grad_norm": 2.3190574498378447, | |
| "learning_rate": 1.0126582278481013e-06, | |
| "loss": 1.7548, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.012685525815045033, | |
| "grad_norm": 2.074674879169714, | |
| "learning_rate": 1.2658227848101267e-06, | |
| "loss": 1.7204, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.01522263097805404, | |
| "grad_norm": 1.748653753253889, | |
| "learning_rate": 1.518987341772152e-06, | |
| "loss": 1.6762, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.01775973614106305, | |
| "grad_norm": 1.6751399170091914, | |
| "learning_rate": 1.7721518987341774e-06, | |
| "loss": 1.6487, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.020296841304072055, | |
| "grad_norm": 1.6363195765660283, | |
| "learning_rate": 2.0253164556962026e-06, | |
| "loss": 1.6116, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.022833946467081062, | |
| "grad_norm": 1.5794976909544993, | |
| "learning_rate": 2.278481012658228e-06, | |
| "loss": 1.5916, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.025371051630090066, | |
| "grad_norm": 1.6233632992191482, | |
| "learning_rate": 2.5316455696202535e-06, | |
| "loss": 1.598, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.027908156793099072, | |
| "grad_norm": 1.5800661094425872, | |
| "learning_rate": 2.7848101265822785e-06, | |
| "loss": 1.5626, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.03044526195610808, | |
| "grad_norm": 1.6072050143283245, | |
| "learning_rate": 3.037974683544304e-06, | |
| "loss": 1.5457, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.03298236711911709, | |
| "grad_norm": 1.6572306247078625, | |
| "learning_rate": 3.2911392405063294e-06, | |
| "loss": 1.5391, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.0355194722821261, | |
| "grad_norm": 1.586848380490154, | |
| "learning_rate": 3.544303797468355e-06, | |
| "loss": 1.5125, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.038056577445135104, | |
| "grad_norm": 1.638449311664989, | |
| "learning_rate": 3.7974683544303802e-06, | |
| "loss": 1.5243, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.04059368260814411, | |
| "grad_norm": 1.6031920359772533, | |
| "learning_rate": 4.050632911392405e-06, | |
| "loss": 1.4856, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.04313078777115312, | |
| "grad_norm": 1.5967439995800559, | |
| "learning_rate": 4.303797468354431e-06, | |
| "loss": 1.5129, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.045667892934162124, | |
| "grad_norm": 1.6075212497819606, | |
| "learning_rate": 4.556962025316456e-06, | |
| "loss": 1.4961, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.04820499809717113, | |
| "grad_norm": 1.6791238021539772, | |
| "learning_rate": 4.8101265822784815e-06, | |
| "loss": 1.4893, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.05074210326018013, | |
| "grad_norm": 1.6210921331693446, | |
| "learning_rate": 5.063291139240507e-06, | |
| "loss": 1.5013, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.05327920842318914, | |
| "grad_norm": 1.7652729374280518, | |
| "learning_rate": 5.3164556962025316e-06, | |
| "loss": 1.473, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.055816313586198145, | |
| "grad_norm": 1.6723244529240142, | |
| "learning_rate": 5.569620253164557e-06, | |
| "loss": 1.4791, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.05835341874920715, | |
| "grad_norm": 1.825326138794735, | |
| "learning_rate": 5.8227848101265824e-06, | |
| "loss": 1.4761, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.06089052391221616, | |
| "grad_norm": 1.9131148271572453, | |
| "learning_rate": 6.075949367088608e-06, | |
| "loss": 1.4626, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.06342762907522517, | |
| "grad_norm": 1.6613770739809675, | |
| "learning_rate": 6.329113924050634e-06, | |
| "loss": 1.4601, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.06596473423823418, | |
| "grad_norm": 1.6666458237214428, | |
| "learning_rate": 6.582278481012659e-06, | |
| "loss": 1.4686, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.06850183940124319, | |
| "grad_norm": 1.5745675069520453, | |
| "learning_rate": 6.835443037974684e-06, | |
| "loss": 1.461, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.0710389445642522, | |
| "grad_norm": 1.6507776778175596, | |
| "learning_rate": 7.08860759493671e-06, | |
| "loss": 1.47, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.0735760497272612, | |
| "grad_norm": 1.6009958375778823, | |
| "learning_rate": 7.341772151898735e-06, | |
| "loss": 1.4526, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.07611315489027021, | |
| "grad_norm": 1.6786912574149853, | |
| "learning_rate": 7.5949367088607605e-06, | |
| "loss": 1.4501, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.07865026005327921, | |
| "grad_norm": 1.6698693144659327, | |
| "learning_rate": 7.848101265822786e-06, | |
| "loss": 1.4483, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.08118736521628822, | |
| "grad_norm": 1.7393580296857223, | |
| "learning_rate": 8.10126582278481e-06, | |
| "loss": 1.4252, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.08372447037929723, | |
| "grad_norm": 1.6124831573952214, | |
| "learning_rate": 8.354430379746837e-06, | |
| "loss": 1.4274, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.08626157554230623, | |
| "grad_norm": 1.6899774259466704, | |
| "learning_rate": 8.607594936708861e-06, | |
| "loss": 1.437, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.08879868070531524, | |
| "grad_norm": 1.6821954539953226, | |
| "learning_rate": 8.860759493670886e-06, | |
| "loss": 1.4388, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.09133578586832425, | |
| "grad_norm": 1.8121412852354848, | |
| "learning_rate": 9.113924050632912e-06, | |
| "loss": 1.4151, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.09387289103133326, | |
| "grad_norm": 1.5407350947949157, | |
| "learning_rate": 9.367088607594937e-06, | |
| "loss": 1.4274, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.09640999619434226, | |
| "grad_norm": 1.7381357929095853, | |
| "learning_rate": 9.620253164556963e-06, | |
| "loss": 1.4309, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.09894710135735126, | |
| "grad_norm": 1.6085237968347799, | |
| "learning_rate": 9.87341772151899e-06, | |
| "loss": 1.4173, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.10148420652036026, | |
| "grad_norm": 1.728407830056737, | |
| "learning_rate": 9.999950942931784e-06, | |
| "loss": 1.4312, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.10402131168336927, | |
| "grad_norm": 1.6972786696047149, | |
| "learning_rate": 9.999558492161865e-06, | |
| "loss": 1.422, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.10655841684637828, | |
| "grad_norm": 1.6297612720977512, | |
| "learning_rate": 9.998773621425852e-06, | |
| "loss": 1.3892, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.10909552200938728, | |
| "grad_norm": 1.7368434280409393, | |
| "learning_rate": 9.997596392328971e-06, | |
| "loss": 1.4368, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.11163262717239629, | |
| "grad_norm": 1.8196387241516612, | |
| "learning_rate": 9.996026897273024e-06, | |
| "loss": 1.4129, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.1141697323354053, | |
| "grad_norm": 1.6083388947957715, | |
| "learning_rate": 9.994065259449128e-06, | |
| "loss": 1.4181, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.1167068374984143, | |
| "grad_norm": 1.7315485017229137, | |
| "learning_rate": 9.991711632828049e-06, | |
| "loss": 1.4107, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.11924394266142331, | |
| "grad_norm": 1.6192259621686464, | |
| "learning_rate": 9.988966202148115e-06, | |
| "loss": 1.3933, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.12178104782443232, | |
| "grad_norm": 1.685661277294985, | |
| "learning_rate": 9.985829182900717e-06, | |
| "loss": 1.4305, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.12431815298744132, | |
| "grad_norm": 1.7673237121898477, | |
| "learning_rate": 9.982300821313394e-06, | |
| "loss": 1.407, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.12685525815045035, | |
| "grad_norm": 1.6729684650970384, | |
| "learning_rate": 9.978381394330509e-06, | |
| "loss": 1.3941, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.12939236331345935, | |
| "grad_norm": 1.6737204711157692, | |
| "learning_rate": 9.974071209591507e-06, | |
| "loss": 1.4083, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.13192946847646836, | |
| "grad_norm": 1.5846450496238496, | |
| "learning_rate": 9.96937060540677e-06, | |
| "loss": 1.3913, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.13446657363947737, | |
| "grad_norm": 1.6440175318683266, | |
| "learning_rate": 9.964279950731066e-06, | |
| "loss": 1.4141, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.13700367880248637, | |
| "grad_norm": 1.5435352480418292, | |
| "learning_rate": 9.958799645134585e-06, | |
| "loss": 1.3923, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.13954078396549538, | |
| "grad_norm": 1.6806917695478834, | |
| "learning_rate": 9.952930118771576e-06, | |
| "loss": 1.3882, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.1420778891285044, | |
| "grad_norm": 1.6991483906725386, | |
| "learning_rate": 9.946671832346588e-06, | |
| "loss": 1.3806, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.1446149942915134, | |
| "grad_norm": 1.6444779930069549, | |
| "learning_rate": 9.940025277078304e-06, | |
| "loss": 1.3877, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.1471520994545224, | |
| "grad_norm": 1.584958906864304, | |
| "learning_rate": 9.932990974660992e-06, | |
| "loss": 1.3758, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.1496892046175314, | |
| "grad_norm": 1.6339337045637337, | |
| "learning_rate": 9.925569477223549e-06, | |
| "loss": 1.3942, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.15222630978054041, | |
| "grad_norm": 1.6782688039697937, | |
| "learning_rate": 9.917761367286164e-06, | |
| "loss": 1.3997, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.15476341494354942, | |
| "grad_norm": 1.722985917307532, | |
| "learning_rate": 9.909567257714605e-06, | |
| "loss": 1.3902, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.15730052010655843, | |
| "grad_norm": 1.515629790408513, | |
| "learning_rate": 9.9009877916721e-06, | |
| "loss": 1.3906, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.15983762526956743, | |
| "grad_norm": 1.6832684084973726, | |
| "learning_rate": 9.892023642568871e-06, | |
| "loss": 1.3644, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.16237473043257644, | |
| "grad_norm": 1.7217857604177804, | |
| "learning_rate": 9.882675514009262e-06, | |
| "loss": 1.3673, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.16491183559558545, | |
| "grad_norm": 1.772784930329774, | |
| "learning_rate": 9.872944139736523e-06, | |
| "loss": 1.3751, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.16744894075859446, | |
| "grad_norm": 1.528731449667675, | |
| "learning_rate": 9.862830283575215e-06, | |
| "loss": 1.3678, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.16998604592160346, | |
| "grad_norm": 1.6156407009731812, | |
| "learning_rate": 9.852334739371252e-06, | |
| "loss": 1.3825, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.17252315108461247, | |
| "grad_norm": 1.6415774929326135, | |
| "learning_rate": 9.841458330929598e-06, | |
| "loss": 1.3884, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.17506025624762148, | |
| "grad_norm": 1.6070221223746397, | |
| "learning_rate": 9.830201911949604e-06, | |
| "loss": 1.3934, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.17759736141063048, | |
| "grad_norm": 1.562454926578275, | |
| "learning_rate": 9.818566365957996e-06, | |
| "loss": 1.3645, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.1801344665736395, | |
| "grad_norm": 1.5996614008577792, | |
| "learning_rate": 9.80655260623953e-06, | |
| "loss": 1.3708, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.1826715717366485, | |
| "grad_norm": 1.5048794279696338, | |
| "learning_rate": 9.794161575765311e-06, | |
| "loss": 1.3749, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.1852086768996575, | |
| "grad_norm": 1.5935516523984996, | |
| "learning_rate": 9.78139424711877e-06, | |
| "loss": 1.3886, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.1877457820626665, | |
| "grad_norm": 1.5282445861981415, | |
| "learning_rate": 9.76825162241933e-06, | |
| "loss": 1.373, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.19028288722567552, | |
| "grad_norm": 1.6358363561782086, | |
| "learning_rate": 9.754734733243749e-06, | |
| "loss": 1.3742, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.19281999238868452, | |
| "grad_norm": 1.587601691095452, | |
| "learning_rate": 9.740844640545151e-06, | |
| "loss": 1.3603, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.19535709755169353, | |
| "grad_norm": 1.5280980736395107, | |
| "learning_rate": 9.726582434569744e-06, | |
| "loss": 1.3636, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.1978942027147025, | |
| "grad_norm": 1.5821827900533842, | |
| "learning_rate": 9.711949234771258e-06, | |
| "loss": 1.3536, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.20043130787771152, | |
| "grad_norm": 1.6354511380141648, | |
| "learning_rate": 9.696946189723067e-06, | |
| "loss": 1.3777, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.20296841304072052, | |
| "grad_norm": 1.4961728532518945, | |
| "learning_rate": 9.681574477028039e-06, | |
| "loss": 1.3555, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.20550551820372953, | |
| "grad_norm": 1.6165039622149184, | |
| "learning_rate": 9.66583530322611e-06, | |
| "loss": 1.3736, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.20804262336673854, | |
| "grad_norm": 1.6085071421673924, | |
| "learning_rate": 9.649729903699575e-06, | |
| "loss": 1.3685, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.21057972852974755, | |
| "grad_norm": 1.674024959469941, | |
| "learning_rate": 9.633259542576127e-06, | |
| "loss": 1.3516, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.21311683369275655, | |
| "grad_norm": 1.5457350813422102, | |
| "learning_rate": 9.61642551262963e-06, | |
| "loss": 1.3433, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.21565393885576556, | |
| "grad_norm": 1.5638603110160445, | |
| "learning_rate": 9.599229135178651e-06, | |
| "loss": 1.3596, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.21819104401877457, | |
| "grad_norm": 1.5845605847496684, | |
| "learning_rate": 9.581671759982747e-06, | |
| "loss": 1.3821, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.22072814918178357, | |
| "grad_norm": 1.56321358587459, | |
| "learning_rate": 9.563754765136522e-06, | |
| "loss": 1.3568, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.22326525434479258, | |
| "grad_norm": 1.621089242536098, | |
| "learning_rate": 9.545479556961457e-06, | |
| "loss": 1.3614, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.2258023595078016, | |
| "grad_norm": 1.5746598904044478, | |
| "learning_rate": 9.526847569895529e-06, | |
| "loss": 1.3536, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.2283394646708106, | |
| "grad_norm": 1.632144120218129, | |
| "learning_rate": 9.507860266380625e-06, | |
| "loss": 1.3521, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.2308765698338196, | |
| "grad_norm": 1.6660492103415234, | |
| "learning_rate": 9.488519136747741e-06, | |
| "loss": 1.3455, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.2334136749968286, | |
| "grad_norm": 1.6307872469664786, | |
| "learning_rate": 9.468825699100013e-06, | |
| "loss": 1.3388, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.23595078015983761, | |
| "grad_norm": 1.51760811186189, | |
| "learning_rate": 9.448781499193563e-06, | |
| "loss": 1.36, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.23848788532284662, | |
| "grad_norm": 1.6298958079571104, | |
| "learning_rate": 9.428388110316165e-06, | |
| "loss": 1.346, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.24102499048585563, | |
| "grad_norm": 1.6241168589647443, | |
| "learning_rate": 9.407647133163754e-06, | |
| "loss": 1.3565, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.24356209564886463, | |
| "grad_norm": 1.6330870068463266, | |
| "learning_rate": 9.386560195714796e-06, | |
| "loss": 1.3539, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.24609920081187364, | |
| "grad_norm": 1.5846187793083721, | |
| "learning_rate": 9.365128953102495e-06, | |
| "loss": 1.3443, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.24863630597488265, | |
| "grad_norm": 1.5816319458789425, | |
| "learning_rate": 9.343355087484893e-06, | |
| "loss": 1.3449, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.25117341113789166, | |
| "grad_norm": 1.608667389007063, | |
| "learning_rate": 9.321240307912818e-06, | |
| "loss": 1.3503, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.2537105163009007, | |
| "grad_norm": 1.535736158897923, | |
| "learning_rate": 9.298786350195758e-06, | |
| "loss": 1.3504, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.25624762146390967, | |
| "grad_norm": 1.6105502703548435, | |
| "learning_rate": 9.275994976765602e-06, | |
| "loss": 1.3512, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.2587847266269187, | |
| "grad_norm": 1.457234439212148, | |
| "learning_rate": 9.252867976538312e-06, | |
| "loss": 1.3447, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.2613218317899277, | |
| "grad_norm": 1.632312084639862, | |
| "learning_rate": 9.22940716477351e-06, | |
| "loss": 1.3451, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.2638589369529367, | |
| "grad_norm": 1.5657163405769847, | |
| "learning_rate": 9.205614382931986e-06, | |
| "loss": 1.3678, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.2663960421159457, | |
| "grad_norm": 1.523325498659843, | |
| "learning_rate": 9.181491498531179e-06, | |
| "loss": 1.355, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.26893314727895473, | |
| "grad_norm": 1.5647021825494114, | |
| "learning_rate": 9.157040404998572e-06, | |
| "loss": 1.3455, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.2714702524419637, | |
| "grad_norm": 1.581907280598391, | |
| "learning_rate": 9.132263021523096e-06, | |
| "loss": 1.353, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.27400735760497275, | |
| "grad_norm": 1.4861566014453274, | |
| "learning_rate": 9.107161292904476e-06, | |
| "loss": 1.3428, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.2765444627679817, | |
| "grad_norm": 1.6256210181495103, | |
| "learning_rate": 9.081737189400583e-06, | |
| "loss": 1.3421, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.27908156793099076, | |
| "grad_norm": 1.4876360574590954, | |
| "learning_rate": 9.0559927065728e-06, | |
| "loss": 1.3377, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.28161867309399974, | |
| "grad_norm": 1.5204275847901962, | |
| "learning_rate": 9.029929865129375e-06, | |
| "loss": 1.349, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.2841557782570088, | |
| "grad_norm": 1.5624633516357405, | |
| "learning_rate": 9.003550710766813e-06, | |
| "loss": 1.3552, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.28669288342001775, | |
| "grad_norm": 1.5591567540085947, | |
| "learning_rate": 8.97685731400932e-06, | |
| "loss": 1.3209, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.2892299885830268, | |
| "grad_norm": 1.5373317845285133, | |
| "learning_rate": 8.949851770046272e-06, | |
| "loss": 1.3267, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.29176709374603577, | |
| "grad_norm": 1.5556061129094692, | |
| "learning_rate": 8.922536198567772e-06, | |
| "loss": 1.3379, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.2943041989090448, | |
| "grad_norm": 1.971486780664198, | |
| "learning_rate": 8.894912743598269e-06, | |
| "loss": 1.3272, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.2968413040720538, | |
| "grad_norm": 1.5365700226491938, | |
| "learning_rate": 8.866983573328267e-06, | |
| "loss": 1.333, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.2993784092350628, | |
| "grad_norm": 1.6217713070921793, | |
| "learning_rate": 8.83875087994415e-06, | |
| "loss": 1.3497, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.3019155143980718, | |
| "grad_norm": 1.4917017043884344, | |
| "learning_rate": 8.810216879456114e-06, | |
| "loss": 1.3355, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.30445261956108083, | |
| "grad_norm": 1.5427563058731948, | |
| "learning_rate": 8.781383811524222e-06, | |
| "loss": 1.3339, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.3069897247240898, | |
| "grad_norm": 1.5666645778409243, | |
| "learning_rate": 8.752253939282622e-06, | |
| "loss": 1.332, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.30952682988709884, | |
| "grad_norm": 1.5940427272465527, | |
| "learning_rate": 8.722829549161904e-06, | |
| "loss": 1.3411, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.3120639350501078, | |
| "grad_norm": 1.569355522659196, | |
| "learning_rate": 8.69311295070964e-06, | |
| "loss": 1.321, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.31460104021311686, | |
| "grad_norm": 1.5823744419831982, | |
| "learning_rate": 8.663106476409107e-06, | |
| "loss": 1.3511, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.31713814537612584, | |
| "grad_norm": 1.5626340370876246, | |
| "learning_rate": 8.632812481496195e-06, | |
| "loss": 1.3491, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.31967525053913487, | |
| "grad_norm": 1.6216546055767536, | |
| "learning_rate": 8.602233343774562e-06, | |
| "loss": 1.3294, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.32221235570214385, | |
| "grad_norm": 1.4885399811487754, | |
| "learning_rate": 8.571371463428986e-06, | |
| "loss": 1.3419, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.3247494608651529, | |
| "grad_norm": 1.597124872589071, | |
| "learning_rate": 8.540229262836974e-06, | |
| "loss": 1.3245, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.32728656602816186, | |
| "grad_norm": 1.5069638761813242, | |
| "learning_rate": 8.508809186378631e-06, | |
| "loss": 1.3357, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.3298236711911709, | |
| "grad_norm": 1.5496475251999724, | |
| "learning_rate": 8.477113700244788e-06, | |
| "loss": 1.3297, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.3323607763541799, | |
| "grad_norm": 1.5177410295586948, | |
| "learning_rate": 8.445145292243446e-06, | |
| "loss": 1.3361, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.3348978815171889, | |
| "grad_norm": 1.4375424317665, | |
| "learning_rate": 8.412906471604489e-06, | |
| "loss": 1.3365, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.3374349866801979, | |
| "grad_norm": 1.4733958562961815, | |
| "learning_rate": 8.380399768782742e-06, | |
| "loss": 1.3364, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.3399720918432069, | |
| "grad_norm": 1.5665888162471464, | |
| "learning_rate": 8.347627735259344e-06, | |
| "loss": 1.3572, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.3425091970062159, | |
| "grad_norm": 1.5175787042273947, | |
| "learning_rate": 8.314592943341494e-06, | |
| "loss": 1.311, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.34504630216922494, | |
| "grad_norm": 1.5210307965368668, | |
| "learning_rate": 8.281297985960538e-06, | |
| "loss": 1.3261, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.3475834073322339, | |
| "grad_norm": 1.5365431443148119, | |
| "learning_rate": 8.247745476468449e-06, | |
| "loss": 1.3433, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.35012051249524295, | |
| "grad_norm": 1.5548012069585933, | |
| "learning_rate": 8.213938048432697e-06, | |
| "loss": 1.3134, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.35265761765825193, | |
| "grad_norm": 1.4642811591908687, | |
| "learning_rate": 8.179878355429556e-06, | |
| "loss": 1.3159, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.35519472282126097, | |
| "grad_norm": 1.6713134353309254, | |
| "learning_rate": 8.145569070835799e-06, | |
| "loss": 1.3285, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.35773182798426995, | |
| "grad_norm": 1.5444628338197106, | |
| "learning_rate": 8.111012887618882e-06, | |
| "loss": 1.344, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.360268933147279, | |
| "grad_norm": 1.5042040298049457, | |
| "learning_rate": 8.076212518125556e-06, | |
| "loss": 1.3217, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.36280603831028796, | |
| "grad_norm": 1.5827643194628298, | |
| "learning_rate": 8.041170693868985e-06, | |
| "loss": 1.3284, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.365343143473297, | |
| "grad_norm": 1.4314485322723574, | |
| "learning_rate": 8.005890165314334e-06, | |
| "loss": 1.3188, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.367880248636306, | |
| "grad_norm": 1.5452457890288078, | |
| "learning_rate": 7.970373701662892e-06, | |
| "loss": 1.3123, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.370417353799315, | |
| "grad_norm": 1.5944938106930338, | |
| "learning_rate": 7.934624090634713e-06, | |
| "loss": 1.3131, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.372954458962324, | |
| "grad_norm": 1.5553727991379855, | |
| "learning_rate": 7.8986441382498e-06, | |
| "loss": 1.3318, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.375491564125333, | |
| "grad_norm": 1.5196578480754726, | |
| "learning_rate": 7.862436668607865e-06, | |
| "loss": 1.3164, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.378028669288342, | |
| "grad_norm": 1.5354385242535227, | |
| "learning_rate": 7.826004523666661e-06, | |
| "loss": 1.3292, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.38056577445135104, | |
| "grad_norm": 1.5449910825994637, | |
| "learning_rate": 7.78935056301891e-06, | |
| "loss": 1.3272, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.38310287961436, | |
| "grad_norm": 1.4946907973724173, | |
| "learning_rate": 7.752477663667854e-06, | |
| "loss": 1.3391, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.38563998477736905, | |
| "grad_norm": 1.5791940161814702, | |
| "learning_rate": 7.715388719801437e-06, | |
| "loss": 1.3392, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.38817708994037803, | |
| "grad_norm": 1.4567702862839176, | |
| "learning_rate": 7.67808664256514e-06, | |
| "loss": 1.2971, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.39071419510338706, | |
| "grad_norm": 1.4605769814867744, | |
| "learning_rate": 7.640574359833472e-06, | |
| "loss": 1.3148, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.39325130026639604, | |
| "grad_norm": 1.5566796816874888, | |
| "learning_rate": 7.6028548159801685e-06, | |
| "loss": 1.3315, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.395788405429405, | |
| "grad_norm": 1.5768032029757384, | |
| "learning_rate": 7.564930971647087e-06, | |
| "loss": 1.3238, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.39832551059241406, | |
| "grad_norm": 1.5702550171255043, | |
| "learning_rate": 7.52680580351181e-06, | |
| "loss": 1.3175, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.40086261575542304, | |
| "grad_norm": 1.5687466837527182, | |
| "learning_rate": 7.488482304054019e-06, | |
| "loss": 1.3104, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.40339972091843207, | |
| "grad_norm": 1.5897550883645912, | |
| "learning_rate": 7.449963481320599e-06, | |
| "loss": 1.316, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.40593682608144105, | |
| "grad_norm": 1.5236147067965886, | |
| "learning_rate": 7.411252358689541e-06, | |
| "loss": 1.3273, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.4084739312444501, | |
| "grad_norm": 1.5469446528938424, | |
| "learning_rate": 7.372351974632634e-06, | |
| "loss": 1.3119, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.41101103640745906, | |
| "grad_norm": 1.4722026799112722, | |
| "learning_rate": 7.333265382476971e-06, | |
| "loss": 1.3151, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.4135481415704681, | |
| "grad_norm": 1.5178886141824586, | |
| "learning_rate": 7.293995650165287e-06, | |
| "loss": 1.3245, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.4160852467334771, | |
| "grad_norm": 1.5308435376939995, | |
| "learning_rate": 7.2545458600151615e-06, | |
| "loss": 1.3317, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.4186223518964861, | |
| "grad_norm": 1.5091424984828243, | |
| "learning_rate": 7.214919108477077e-06, | |
| "loss": 1.3044, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.4211594570594951, | |
| "grad_norm": 1.457202507709852, | |
| "learning_rate": 7.175118505891385e-06, | |
| "loss": 1.3339, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.4236965622225041, | |
| "grad_norm": 1.530896247556501, | |
| "learning_rate": 7.135147176244158e-06, | |
| "loss": 1.3044, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.4262336673855131, | |
| "grad_norm": 1.5274463812149695, | |
| "learning_rate": 7.0950082569219955e-06, | |
| "loss": 1.3048, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.42877077254852214, | |
| "grad_norm": 1.507428973101804, | |
| "learning_rate": 7.054704898465772e-06, | |
| "loss": 1.3069, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.4313078777115311, | |
| "grad_norm": 1.5716469315983397, | |
| "learning_rate": 7.0142402643233346e-06, | |
| "loss": 1.3136, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.43384498287454015, | |
| "grad_norm": 1.4220881687524514, | |
| "learning_rate": 6.973617530601209e-06, | |
| "loss": 1.3165, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.43638208803754913, | |
| "grad_norm": 1.5926945403384438, | |
| "learning_rate": 6.932839885815304e-06, | |
| "loss": 1.3301, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.43891919320055817, | |
| "grad_norm": 1.4527595611730801, | |
| "learning_rate": 6.891910530640642e-06, | |
| "loss": 1.3145, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.44145629836356715, | |
| "grad_norm": 1.5069254389998272, | |
| "learning_rate": 6.850832677660134e-06, | |
| "loss": 1.3139, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.4439934035265762, | |
| "grad_norm": 1.4587280578384394, | |
| "learning_rate": 6.809609551112419e-06, | |
| "loss": 1.3085, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.44653050868958516, | |
| "grad_norm": 1.5122830472595903, | |
| "learning_rate": 6.768244386638793e-06, | |
| "loss": 1.3158, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.4490676138525942, | |
| "grad_norm": 1.4912245201929943, | |
| "learning_rate": 6.726740431029243e-06, | |
| "loss": 1.3167, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.4516047190156032, | |
| "grad_norm": 1.5574941259720791, | |
| "learning_rate": 6.685100941967596e-06, | |
| "loss": 1.3118, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.4541418241786122, | |
| "grad_norm": 1.4994130740882026, | |
| "learning_rate": 6.643329187775827e-06, | |
| "loss": 1.307, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.4566789293416212, | |
| "grad_norm": 1.5791237950971593, | |
| "learning_rate": 6.601428447157525e-06, | |
| "loss": 1.3086, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.4592160345046302, | |
| "grad_norm": 1.5319564794342408, | |
| "learning_rate": 6.559402008940539e-06, | |
| "loss": 1.3025, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.4617531396676392, | |
| "grad_norm": 1.5560620624811086, | |
| "learning_rate": 6.517253171818844e-06, | |
| "loss": 1.3146, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.46429024483064824, | |
| "grad_norm": 1.5762189341956727, | |
| "learning_rate": 6.474985244093613e-06, | |
| "loss": 1.307, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.4668273499936572, | |
| "grad_norm": 1.568824162809672, | |
| "learning_rate": 6.432601543413552e-06, | |
| "loss": 1.2996, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.46936445515666625, | |
| "grad_norm": 1.461712822890638, | |
| "learning_rate": 6.390105396514497e-06, | |
| "loss": 1.3013, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.47190156031967523, | |
| "grad_norm": 1.4727912142252813, | |
| "learning_rate": 6.347500138958285e-06, | |
| "loss": 1.3086, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.47443866548268426, | |
| "grad_norm": 1.4842630358439066, | |
| "learning_rate": 6.304789114870953e-06, | |
| "loss": 1.3121, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.47697577064569324, | |
| "grad_norm": 1.5147058669468259, | |
| "learning_rate": 6.261975676680252e-06, | |
| "loss": 1.3109, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.4795128758087023, | |
| "grad_norm": 1.5879467208142688, | |
| "learning_rate": 6.219063184852509e-06, | |
| "loss": 1.3057, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.48204998097171126, | |
| "grad_norm": 1.4622817504218393, | |
| "learning_rate": 6.176055007628859e-06, | |
| "loss": 1.2978, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.4845870861347203, | |
| "grad_norm": 1.4651555100721898, | |
| "learning_rate": 6.132954520760882e-06, | |
| "loss": 1.2936, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.48712419129772927, | |
| "grad_norm": 1.4242680820832143, | |
| "learning_rate": 6.089765107245616e-06, | |
| "loss": 1.311, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.4896612964607383, | |
| "grad_norm": 1.4510357489546541, | |
| "learning_rate": 6.046490157060041e-06, | |
| "loss": 1.2917, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.4921984016237473, | |
| "grad_norm": 1.5389362630585735, | |
| "learning_rate": 6.003133066894987e-06, | |
| "loss": 1.3173, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.4947355067867563, | |
| "grad_norm": 1.5597918071325416, | |
| "learning_rate": 5.959697239888525e-06, | |
| "loss": 1.2978, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.4972726119497653, | |
| "grad_norm": 1.481163850939429, | |
| "learning_rate": 5.916186085358858e-06, | |
| "loss": 1.3125, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.49980971711277433, | |
| "grad_norm": 1.5172196100773179, | |
| "learning_rate": 5.872603018536713e-06, | |
| "loss": 1.3035, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.5023468222757833, | |
| "grad_norm": 1.520182324070576, | |
| "learning_rate": 5.828951460297277e-06, | |
| "loss": 1.2943, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.5048839274387923, | |
| "grad_norm": 1.374020881318329, | |
| "learning_rate": 5.785234836891697e-06, | |
| "loss": 1.3019, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.5074210326018014, | |
| "grad_norm": 1.609172422257604, | |
| "learning_rate": 5.741456579678141e-06, | |
| "loss": 1.2929, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.5099581377648104, | |
| "grad_norm": 1.451921659432821, | |
| "learning_rate": 5.697620124852472e-06, | |
| "loss": 1.2868, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.5124952429278193, | |
| "grad_norm": 1.531522896512812, | |
| "learning_rate": 5.65372891317854e-06, | |
| "loss": 1.2875, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.5150323480908283, | |
| "grad_norm": 1.443649652350418, | |
| "learning_rate": 5.6097863897181075e-06, | |
| "loss": 1.2963, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.5175694532538374, | |
| "grad_norm": 1.5591743411035264, | |
| "learning_rate": 5.565796003560447e-06, | |
| "loss": 1.3121, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.5201065584168464, | |
| "grad_norm": 1.428229068798765, | |
| "learning_rate": 5.521761207551622e-06, | |
| "loss": 1.2979, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.5226436635798554, | |
| "grad_norm": 1.5164415865949983, | |
| "learning_rate": 5.47768545802346e-06, | |
| "loss": 1.3107, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.5251807687428643, | |
| "grad_norm": 1.5292361648846982, | |
| "learning_rate": 5.433572214522275e-06, | |
| "loss": 1.2952, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.5277178739058734, | |
| "grad_norm": 1.4451039662214231, | |
| "learning_rate": 5.389424939537311e-06, | |
| "loss": 1.2922, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.5302549790688824, | |
| "grad_norm": 1.558654012548035, | |
| "learning_rate": 5.345247098228977e-06, | |
| "loss": 1.2942, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.5327920842318914, | |
| "grad_norm": 1.5393309134302235, | |
| "learning_rate": 5.301042158156866e-06, | |
| "loss": 1.2898, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.5353291893949004, | |
| "grad_norm": 1.5206662969722375, | |
| "learning_rate": 5.256813589007571e-06, | |
| "loss": 1.2967, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.5378662945579095, | |
| "grad_norm": 1.5295277898061372, | |
| "learning_rate": 5.212564862322355e-06, | |
| "loss": 1.2987, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.5404033997209184, | |
| "grad_norm": 1.5121887795702076, | |
| "learning_rate": 5.168299451224665e-06, | |
| "loss": 1.2859, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.5429405048839274, | |
| "grad_norm": 1.5405224763949017, | |
| "learning_rate": 5.124020830147525e-06, | |
| "loss": 1.2942, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.5454776100469364, | |
| "grad_norm": 1.5241647102261355, | |
| "learning_rate": 5.079732474560821e-06, | |
| "loss": 1.2967, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.5480147152099455, | |
| "grad_norm": 1.5740459163455902, | |
| "learning_rate": 5.035437860698508e-06, | |
| "loss": 1.2792, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.5505518203729545, | |
| "grad_norm": 1.455514390960437, | |
| "learning_rate": 4.991140465285762e-06, | |
| "loss": 1.2722, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.5530889255359634, | |
| "grad_norm": 1.4543563727275153, | |
| "learning_rate": 4.94684376526608e-06, | |
| "loss": 1.294, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.5556260306989724, | |
| "grad_norm": 1.415880887469612, | |
| "learning_rate": 4.902551237528387e-06, | |
| "loss": 1.2898, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.5581631358619815, | |
| "grad_norm": 1.5027054686198038, | |
| "learning_rate": 4.858266358634109e-06, | |
| "loss": 1.2943, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.5607002410249905, | |
| "grad_norm": 1.495415983271707, | |
| "learning_rate": 4.813992604544319e-06, | |
| "loss": 1.309, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.5632373461879995, | |
| "grad_norm": 1.5256997169566149, | |
| "learning_rate": 4.769733450346885e-06, | |
| "loss": 1.2941, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.5657744513510085, | |
| "grad_norm": 1.4207029137255274, | |
| "learning_rate": 4.725492369983721e-06, | |
| "loss": 1.2808, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.5683115565140175, | |
| "grad_norm": 1.5127789303300487, | |
| "learning_rate": 4.6812728359781064e-06, | |
| "loss": 1.2886, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.5708486616770265, | |
| "grad_norm": 1.4480660719145084, | |
| "learning_rate": 4.637078319162127e-06, | |
| "loss": 1.2848, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.5733857668400355, | |
| "grad_norm": 1.4818074524822986, | |
| "learning_rate": 4.592912288404251e-06, | |
| "loss": 1.2747, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.5759228720030445, | |
| "grad_norm": 1.496021447098999, | |
| "learning_rate": 4.5487782103370445e-06, | |
| "loss": 1.2889, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.5784599771660536, | |
| "grad_norm": 1.4726400774082267, | |
| "learning_rate": 4.504679549085077e-06, | |
| "loss": 1.2956, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.5809970823290626, | |
| "grad_norm": 1.492109044123467, | |
| "learning_rate": 4.460619765993025e-06, | |
| "loss": 1.2974, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.5835341874920715, | |
| "grad_norm": 1.4567515467141523, | |
| "learning_rate": 4.416602319353974e-06, | |
| "loss": 1.29, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.5860712926550805, | |
| "grad_norm": 1.460535915347314, | |
| "learning_rate": 4.3726306641379915e-06, | |
| "loss": 1.2745, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.5886083978180896, | |
| "grad_norm": 1.4651576736560898, | |
| "learning_rate": 4.328708251720924e-06, | |
| "loss": 1.2739, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.5911455029810986, | |
| "grad_norm": 1.6196158147206026, | |
| "learning_rate": 4.2848385296135165e-06, | |
| "loss": 1.3101, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.5936826081441076, | |
| "grad_norm": 1.527439804056797, | |
| "learning_rate": 4.241024941190792e-06, | |
| "loss": 1.2771, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.5962197133071165, | |
| "grad_norm": 1.4872645401772542, | |
| "learning_rate": 4.197270925421796e-06, | |
| "loss": 1.2877, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.5987568184701256, | |
| "grad_norm": 1.4908027336325684, | |
| "learning_rate": 4.153579916599659e-06, | |
| "loss": 1.2969, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.6012939236331346, | |
| "grad_norm": 1.370441167203172, | |
| "learning_rate": 4.109955344072036e-06, | |
| "loss": 1.2745, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.6038310287961436, | |
| "grad_norm": 1.457801692594122, | |
| "learning_rate": 4.066400631971938e-06, | |
| "loss": 1.2714, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.6063681339591526, | |
| "grad_norm": 1.5047248748403204, | |
| "learning_rate": 4.022919198948966e-06, | |
| "loss": 1.2759, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.6089052391221617, | |
| "grad_norm": 1.5232259549425642, | |
| "learning_rate": 3.979514457900982e-06, | |
| "loss": 1.2845, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.6114423442851706, | |
| "grad_norm": 1.4170452963382303, | |
| "learning_rate": 3.936189815706219e-06, | |
| "loss": 1.2833, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.6139794494481796, | |
| "grad_norm": 1.5010818180720833, | |
| "learning_rate": 3.8929486729558775e-06, | |
| "loss": 1.2941, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.6165165546111886, | |
| "grad_norm": 1.4420347497785075, | |
| "learning_rate": 3.849794423687212e-06, | |
| "loss": 1.2775, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.6190536597741977, | |
| "grad_norm": 1.520468191298721, | |
| "learning_rate": 3.8067304551171247e-06, | |
| "loss": 1.2627, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.6215907649372067, | |
| "grad_norm": 1.4753704862458017, | |
| "learning_rate": 3.7637601473763035e-06, | |
| "loss": 1.284, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.6241278701002156, | |
| "grad_norm": 1.469877746697786, | |
| "learning_rate": 3.7208868732439145e-06, | |
| "loss": 1.2927, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.6266649752632246, | |
| "grad_norm": 1.4601548141707599, | |
| "learning_rate": 3.6781139978828606e-06, | |
| "loss": 1.2947, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.6292020804262337, | |
| "grad_norm": 1.5092438879342172, | |
| "learning_rate": 3.6354448785756558e-06, | |
| "loss": 1.2843, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.6317391855892427, | |
| "grad_norm": 1.4368007055488876, | |
| "learning_rate": 3.592882864460905e-06, | |
| "loss": 1.265, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.6342762907522517, | |
| "grad_norm": 1.4672055312297339, | |
| "learning_rate": 3.5504312962704245e-06, | |
| "loss": 1.2709, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.6368133959152606, | |
| "grad_norm": 1.4995451462382032, | |
| "learning_rate": 3.5080935060670345e-06, | |
| "loss": 1.2679, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.6393505010782697, | |
| "grad_norm": 1.458116276283539, | |
| "learning_rate": 3.465872816983008e-06, | |
| "loss": 1.2821, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.6418876062412787, | |
| "grad_norm": 1.4447640379158275, | |
| "learning_rate": 3.4237725429592507e-06, | |
| "loss": 1.2865, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.6444247114042877, | |
| "grad_norm": 1.3965736731366891, | |
| "learning_rate": 3.3817959884851735e-06, | |
| "loss": 1.2698, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.6469618165672967, | |
| "grad_norm": 1.4648194884238146, | |
| "learning_rate": 3.3399464483393272e-06, | |
| "loss": 1.291, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.6494989217303058, | |
| "grad_norm": 1.4271493727093771, | |
| "learning_rate": 3.298227207330792e-06, | |
| "loss": 1.2765, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.6520360268933147, | |
| "grad_norm": 1.5962462881292958, | |
| "learning_rate": 3.256641540041346e-06, | |
| "loss": 1.2905, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.6545731320563237, | |
| "grad_norm": 1.4501719681830862, | |
| "learning_rate": 3.2151927105684423e-06, | |
| "loss": 1.298, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.6571102372193327, | |
| "grad_norm": 1.5186349976521718, | |
| "learning_rate": 3.1738839722690085e-06, | |
| "loss": 1.2742, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.6596473423823418, | |
| "grad_norm": 1.3901740398219145, | |
| "learning_rate": 3.1327185675040907e-06, | |
| "loss": 1.2769, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.6621844475453508, | |
| "grad_norm": 1.4618375024699428, | |
| "learning_rate": 3.0916997273843454e-06, | |
| "loss": 1.2938, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.6647215527083598, | |
| "grad_norm": 1.4675982361039484, | |
| "learning_rate": 3.0508306715164416e-06, | |
| "loss": 1.2913, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.6672586578713687, | |
| "grad_norm": 1.5086185778550512, | |
| "learning_rate": 3.0101146077503386e-06, | |
| "loss": 1.2777, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.6697957630343778, | |
| "grad_norm": 1.4573487737483761, | |
| "learning_rate": 2.9695547319275093e-06, | |
| "loss": 1.2633, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.6723328681973868, | |
| "grad_norm": 1.43323809832072, | |
| "learning_rate": 2.9291542276300866e-06, | |
| "loss": 1.289, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.6748699733603958, | |
| "grad_norm": 1.420082813628849, | |
| "learning_rate": 2.8889162659309832e-06, | |
| "loss": 1.2729, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.6774070785234048, | |
| "grad_norm": 1.424990219399345, | |
| "learning_rate": 2.848844005145004e-06, | |
| "loss": 1.3024, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.6799441836864138, | |
| "grad_norm": 1.4395745448115305, | |
| "learning_rate": 2.808940590580922e-06, | |
| "loss": 1.2845, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.6824812888494228, | |
| "grad_norm": 1.4802086998925903, | |
| "learning_rate": 2.769209154294623e-06, | |
| "loss": 1.2844, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.6850183940124318, | |
| "grad_norm": 1.491623196795251, | |
| "learning_rate": 2.7296528148432565e-06, | |
| "loss": 1.2683, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.6875554991754408, | |
| "grad_norm": 1.416764375906272, | |
| "learning_rate": 2.690274677040462e-06, | |
| "loss": 1.2776, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.6900926043384499, | |
| "grad_norm": 1.519033593874162, | |
| "learning_rate": 2.6510778317126597e-06, | |
| "loss": 1.2807, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.6926297095014589, | |
| "grad_norm": 1.3894691132515595, | |
| "learning_rate": 2.6120653554564624e-06, | |
| "loss": 1.2777, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.6951668146644678, | |
| "grad_norm": 1.4049713206074572, | |
| "learning_rate": 2.573240310397187e-06, | |
| "loss": 1.2736, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.6977039198274768, | |
| "grad_norm": 1.4357642101900112, | |
| "learning_rate": 2.5346057439484923e-06, | |
| "loss": 1.2803, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.7002410249904859, | |
| "grad_norm": 1.490167340198777, | |
| "learning_rate": 2.4961646885732034e-06, | |
| "loss": 1.2744, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.7027781301534949, | |
| "grad_norm": 1.4179312953545702, | |
| "learning_rate": 2.4579201615452812e-06, | |
| "loss": 1.2842, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.7053152353165039, | |
| "grad_norm": 1.6140649717523825, | |
| "learning_rate": 2.4198751647129896e-06, | |
| "loss": 1.2963, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.7078523404795128, | |
| "grad_norm": 1.530468810042779, | |
| "learning_rate": 2.3820326842632894e-06, | |
| "loss": 1.2637, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.7103894456425219, | |
| "grad_norm": 1.412588711043796, | |
| "learning_rate": 2.344395690487441e-06, | |
| "loss": 1.2856, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.7129265508055309, | |
| "grad_norm": 1.5447254908338892, | |
| "learning_rate": 2.3069671375478645e-06, | |
| "loss": 1.2848, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.7154636559685399, | |
| "grad_norm": 1.43691808431636, | |
| "learning_rate": 2.2697499632462695e-06, | |
| "loss": 1.2536, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.7180007611315489, | |
| "grad_norm": 1.5560428170621574, | |
| "learning_rate": 2.2327470887930595e-06, | |
| "loss": 1.3015, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.720537866294558, | |
| "grad_norm": 1.450374747082515, | |
| "learning_rate": 2.195961418578041e-06, | |
| "loss": 1.2744, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.7230749714575669, | |
| "grad_norm": 1.484538648746269, | |
| "learning_rate": 2.159395839942464e-06, | |
| "loss": 1.2664, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.7256120766205759, | |
| "grad_norm": 1.3953379506558543, | |
| "learning_rate": 2.1230532229523865e-06, | |
| "loss": 1.2489, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.7281491817835849, | |
| "grad_norm": 1.4415654155573785, | |
| "learning_rate": 2.086936420173399e-06, | |
| "loss": 1.2719, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.730686286946594, | |
| "grad_norm": 1.4271516629005172, | |
| "learning_rate": 2.051048266446727e-06, | |
| "loss": 1.2652, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.733223392109603, | |
| "grad_norm": 1.4951992832082914, | |
| "learning_rate": 2.0153915786667203e-06, | |
| "loss": 1.26, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.735760497272612, | |
| "grad_norm": 1.4351479751585414, | |
| "learning_rate": 1.9799691555597555e-06, | |
| "loss": 1.2881, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.7382976024356209, | |
| "grad_norm": 1.474899241565124, | |
| "learning_rate": 1.9447837774645513e-06, | |
| "loss": 1.2702, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.74083470759863, | |
| "grad_norm": 1.4426835070499822, | |
| "learning_rate": 1.9098382061139503e-06, | |
| "loss": 1.2699, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.743371812761639, | |
| "grad_norm": 1.4876818985570295, | |
| "learning_rate": 1.8751351844181414e-06, | |
| "loss": 1.2612, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.745908917924648, | |
| "grad_norm": 1.4360645410392319, | |
| "learning_rate": 1.8406774362493662e-06, | |
| "loss": 1.2754, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.748446023087657, | |
| "grad_norm": 1.4473888665732064, | |
| "learning_rate": 1.8064676662281206e-06, | |
| "loss": 1.2902, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.750983128250666, | |
| "grad_norm": 1.4434612838312966, | |
| "learning_rate": 1.7725085595108682e-06, | |
| "loss": 1.273, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.753520233413675, | |
| "grad_norm": 1.558136105535075, | |
| "learning_rate": 1.7388027815792725e-06, | |
| "loss": 1.2787, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.756057338576684, | |
| "grad_norm": 1.4724878594646564, | |
| "learning_rate": 1.705352978030993e-06, | |
| "loss": 1.2627, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.758594443739693, | |
| "grad_norm": 1.4768497018650097, | |
| "learning_rate": 1.672161774372022e-06, | |
| "loss": 1.2911, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.7611315489027021, | |
| "grad_norm": 1.4598692131173956, | |
| "learning_rate": 1.639231775810602e-06, | |
| "loss": 1.2907, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.763668654065711, | |
| "grad_norm": 1.3971487709781405, | |
| "learning_rate": 1.6065655670527546e-06, | |
| "loss": 1.2632, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.76620575922872, | |
| "grad_norm": 1.4196228285690422, | |
| "learning_rate": 1.574165712099392e-06, | |
| "loss": 1.2542, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.768742864391729, | |
| "grad_norm": 1.4395590200787511, | |
| "learning_rate": 1.542034754045067e-06, | |
| "loss": 1.2693, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.7712799695547381, | |
| "grad_norm": 1.4538143237649903, | |
| "learning_rate": 1.5101752148783705e-06, | |
| "loss": 1.2728, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.7738170747177471, | |
| "grad_norm": 1.4483981816763403, | |
| "learning_rate": 1.4785895952839735e-06, | |
| "loss": 1.2671, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.7763541798807561, | |
| "grad_norm": 1.5335192207213328, | |
| "learning_rate": 1.447280374446346e-06, | |
| "loss": 1.2778, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.778891285043765, | |
| "grad_norm": 1.4504666284348766, | |
| "learning_rate": 1.4162500098551608e-06, | |
| "loss": 1.276, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.7814283902067741, | |
| "grad_norm": 1.454412830474016, | |
| "learning_rate": 1.385500937112415e-06, | |
| "loss": 1.2804, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.7839654953697831, | |
| "grad_norm": 1.462536001446098, | |
| "learning_rate": 1.3550355697412386e-06, | |
| "loss": 1.2586, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.7865026005327921, | |
| "grad_norm": 1.4861860594882876, | |
| "learning_rate": 1.3248562989964719e-06, | |
| "loss": 1.2843, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.7890397056958011, | |
| "grad_norm": 1.391241218546658, | |
| "learning_rate": 1.2949654936769622e-06, | |
| "loss": 1.2723, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.79157681085881, | |
| "grad_norm": 1.412674356321388, | |
| "learning_rate": 1.2653654999396436e-06, | |
| "loss": 1.2621, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.7941139160218191, | |
| "grad_norm": 1.406341007739084, | |
| "learning_rate": 1.2360586411153747e-06, | |
| "loss": 1.2897, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.7966510211848281, | |
| "grad_norm": 1.4125498017483746, | |
| "learning_rate": 1.2070472175265857e-06, | |
| "loss": 1.2657, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.7991881263478371, | |
| "grad_norm": 1.575395352386111, | |
| "learning_rate": 1.1783335063067286e-06, | |
| "loss": 1.2974, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.8017252315108461, | |
| "grad_norm": 1.4632409776646316, | |
| "learning_rate": 1.1499197612215269e-06, | |
| "loss": 1.2914, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.8042623366738552, | |
| "grad_norm": 1.420307782085356, | |
| "learning_rate": 1.1218082124920903e-06, | |
| "loss": 1.2583, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.8067994418368641, | |
| "grad_norm": 1.4073518626370982, | |
| "learning_rate": 1.0940010666198575e-06, | |
| "loss": 1.2588, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.8093365469998731, | |
| "grad_norm": 1.427712685491864, | |
| "learning_rate": 1.0665005062134015e-06, | |
| "loss": 1.2641, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.8118736521628821, | |
| "grad_norm": 1.4042233353051128, | |
| "learning_rate": 1.0393086898171234e-06, | |
| "loss": 1.2623, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.8144107573258912, | |
| "grad_norm": 1.4353551061219325, | |
| "learning_rate": 1.0124277517418196e-06, | |
| "loss": 1.2701, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.8169478624889002, | |
| "grad_norm": 1.4714738106408498, | |
| "learning_rate": 9.858598018971599e-07, | |
| "loss": 1.2665, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.8194849676519091, | |
| "grad_norm": 1.3867710691517015, | |
| "learning_rate": 9.596069256260792e-07, | |
| "loss": 1.2811, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.8220220728149181, | |
| "grad_norm": 1.5556697110120234, | |
| "learning_rate": 9.336711835410972e-07, | |
| "loss": 1.2577, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.8245591779779272, | |
| "grad_norm": 1.4677232808017586, | |
| "learning_rate": 9.080546113625738e-07, | |
| "loss": 1.2675, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.8270962831409362, | |
| "grad_norm": 1.4009076826572764, | |
| "learning_rate": 8.827592197589341e-07, | |
| "loss": 1.2573, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.8296333883039452, | |
| "grad_norm": 1.3584163410682717, | |
| "learning_rate": 8.577869941888389e-07, | |
| "loss": 1.2654, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.8321704934669542, | |
| "grad_norm": 1.4763004523041792, | |
| "learning_rate": 8.331398947453512e-07, | |
| "loss": 1.271, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.8347075986299632, | |
| "grad_norm": 1.432107775038367, | |
| "learning_rate": 8.08819856002081e-07, | |
| "loss": 1.2771, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.8372447037929722, | |
| "grad_norm": 1.4636493219573536, | |
| "learning_rate": 7.848287868613441e-07, | |
| "loss": 1.2511, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.8397818089559812, | |
| "grad_norm": 1.501456048501624, | |
| "learning_rate": 7.611685704043281e-07, | |
| "loss": 1.2724, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 0.8423189141189902, | |
| "grad_norm": 1.4533194309629769, | |
| "learning_rate": 7.378410637432848e-07, | |
| "loss": 1.2761, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.8448560192819993, | |
| "grad_norm": 1.4930040288043631, | |
| "learning_rate": 7.148480978757694e-07, | |
| "loss": 1.2808, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.8473931244450083, | |
| "grad_norm": 1.4327631888495864, | |
| "learning_rate": 6.921914775409211e-07, | |
| "loss": 1.2764, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.8499302296080172, | |
| "grad_norm": 1.4066505744498654, | |
| "learning_rate": 6.698729810778065e-07, | |
| "loss": 1.2724, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.8524673347710262, | |
| "grad_norm": 1.4214214237910756, | |
| "learning_rate": 6.478943602858373e-07, | |
| "loss": 1.2703, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.8550044399340353, | |
| "grad_norm": 1.4609797404161982, | |
| "learning_rate": 6.262573402872707e-07, | |
| "loss": 1.2702, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 0.8575415450970443, | |
| "grad_norm": 1.4426076897314533, | |
| "learning_rate": 6.04963619391799e-07, | |
| "loss": 1.2652, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.8600786502600533, | |
| "grad_norm": 1.4557782632700174, | |
| "learning_rate": 5.840148689632536e-07, | |
| "loss": 1.2628, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.8626157554230622, | |
| "grad_norm": 1.4465949605683495, | |
| "learning_rate": 5.634127332884143e-07, | |
| "loss": 1.2649, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.8651528605860713, | |
| "grad_norm": 1.4543385042228827, | |
| "learning_rate": 5.431588294479479e-07, | |
| "loss": 1.2863, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 0.8676899657490803, | |
| "grad_norm": 1.4211990179028964, | |
| "learning_rate": 5.232547471894839e-07, | |
| "loss": 1.2603, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.8702270709120893, | |
| "grad_norm": 1.4970252757505178, | |
| "learning_rate": 5.037020488028322e-07, | |
| "loss": 1.2659, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 0.8727641760750983, | |
| "grad_norm": 1.476846612856639, | |
| "learning_rate": 4.845022689973567e-07, | |
| "loss": 1.2622, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 0.8753012812381074, | |
| "grad_norm": 1.3975933791175674, | |
| "learning_rate": 4.656569147815171e-07, | |
| "loss": 1.2675, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.8778383864011163, | |
| "grad_norm": 1.4066115466115592, | |
| "learning_rate": 4.471674653445801e-07, | |
| "loss": 1.2657, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 0.8803754915641253, | |
| "grad_norm": 1.4093052515025426, | |
| "learning_rate": 4.290353719405199e-07, | |
| "loss": 1.2622, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 0.8829125967271343, | |
| "grad_norm": 1.517434872609148, | |
| "learning_rate": 4.1126205777410054e-07, | |
| "loss": 1.2658, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.8854497018901434, | |
| "grad_norm": 1.3671982309013966, | |
| "learning_rate": 3.938489178891769e-07, | |
| "loss": 1.26, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 0.8879868070531524, | |
| "grad_norm": 1.3848746120915914, | |
| "learning_rate": 3.767973190591906e-07, | |
| "loss": 1.252, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.8905239122161613, | |
| "grad_norm": 1.404335518132526, | |
| "learning_rate": 3.6010859967988975e-07, | |
| "loss": 1.2684, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 0.8930610173791703, | |
| "grad_norm": 1.4696908362694405, | |
| "learning_rate": 3.437840696642797e-07, | |
| "loss": 1.28, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 0.8955981225421794, | |
| "grad_norm": 1.4394982066957633, | |
| "learning_rate": 3.2782501033980897e-07, | |
| "loss": 1.2596, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 0.8981352277051884, | |
| "grad_norm": 1.441827806292722, | |
| "learning_rate": 3.1223267434778934e-07, | |
| "loss": 1.2548, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 0.9006723328681974, | |
| "grad_norm": 1.4029572337771223, | |
| "learning_rate": 2.9700828554508175e-07, | |
| "loss": 1.2714, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.9032094380312063, | |
| "grad_norm": 1.456563644801128, | |
| "learning_rate": 2.82153038908034e-07, | |
| "loss": 1.271, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 0.9057465431942154, | |
| "grad_norm": 1.4854658320433165, | |
| "learning_rate": 2.6766810043867996e-07, | |
| "loss": 1.2636, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 0.9082836483572244, | |
| "grad_norm": 1.3976434876269141, | |
| "learning_rate": 2.53554607073227e-07, | |
| "loss": 1.2555, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 0.9108207535202334, | |
| "grad_norm": 1.4520566750739115, | |
| "learning_rate": 2.3981366659281135e-07, | |
| "loss": 1.2741, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 0.9133578586832424, | |
| "grad_norm": 1.431635462724296, | |
| "learning_rate": 2.2644635753654832e-07, | |
| "loss": 1.2641, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.9158949638462515, | |
| "grad_norm": 1.4459695881350807, | |
| "learning_rate": 2.1345372911687868e-07, | |
| "loss": 1.2719, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 0.9184320690092604, | |
| "grad_norm": 1.4523937909973577, | |
| "learning_rate": 2.008368011372136e-07, | |
| "loss": 1.2574, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 0.9209691741722694, | |
| "grad_norm": 1.448000545062192, | |
| "learning_rate": 1.8859656391188918e-07, | |
| "loss": 1.2678, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 0.9235062793352784, | |
| "grad_norm": 1.4574912967654818, | |
| "learning_rate": 1.7673397818843696e-07, | |
| "loss": 1.2631, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 0.9260433844982875, | |
| "grad_norm": 1.469570266612804, | |
| "learning_rate": 1.65249975072172e-07, | |
| "loss": 1.2676, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.9285804896612965, | |
| "grad_norm": 1.4705072480655184, | |
| "learning_rate": 1.5414545595311193e-07, | |
| "loss": 1.2363, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 0.9311175948243055, | |
| "grad_norm": 1.469943074464241, | |
| "learning_rate": 1.4342129243522241e-07, | |
| "loss": 1.2716, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 0.9336546999873144, | |
| "grad_norm": 1.4039691988667693, | |
| "learning_rate": 1.3307832626800966e-07, | |
| "loss": 1.2674, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 0.9361918051503235, | |
| "grad_norm": 1.4357184274036978, | |
| "learning_rate": 1.2311736928044437e-07, | |
| "loss": 1.2662, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 0.9387289103133325, | |
| "grad_norm": 1.4198942808558601, | |
| "learning_rate": 1.1353920331724666e-07, | |
| "loss": 1.2743, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.9412660154763415, | |
| "grad_norm": 1.472183542066415, | |
| "learning_rate": 1.0434458017751392e-07, | |
| "loss": 1.2505, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 0.9438031206393505, | |
| "grad_norm": 1.4530560276575668, | |
| "learning_rate": 9.553422155571257e-08, | |
| "loss": 1.2637, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 0.9463402258023595, | |
| "grad_norm": 1.4342800198901315, | |
| "learning_rate": 8.710881898503276e-08, | |
| "loss": 1.2706, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 0.9488773309653685, | |
| "grad_norm": 1.4769111711160674, | |
| "learning_rate": 7.906903378310738e-08, | |
| "loss": 1.2717, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 0.9514144361283775, | |
| "grad_norm": 1.4434198544006103, | |
| "learning_rate": 7.141549700010741e-08, | |
| "loss": 1.2764, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.9539515412913865, | |
| "grad_norm": 1.3967239883734883, | |
| "learning_rate": 6.414880936920665e-08, | |
| "loss": 1.2454, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 0.9564886464543956, | |
| "grad_norm": 1.4745766429828837, | |
| "learning_rate": 5.726954125943318e-08, | |
| "loss": 1.2747, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 0.9590257516174046, | |
| "grad_norm": 1.3354472782196753, | |
| "learning_rate": 5.0778232630897536e-08, | |
| "loss": 1.2717, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 0.9615628567804135, | |
| "grad_norm": 1.5048063707346293, | |
| "learning_rate": 4.4675392992412634e-08, | |
| "loss": 1.2728, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 0.9640999619434225, | |
| "grad_norm": 1.4399837174477832, | |
| "learning_rate": 3.896150136150134e-08, | |
| "loss": 1.2826, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.9666370671064316, | |
| "grad_norm": 1.4450380885385077, | |
| "learning_rate": 3.3637006226797665e-08, | |
| "loss": 1.2534, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 0.9691741722694406, | |
| "grad_norm": 1.35673950609508, | |
| "learning_rate": 2.8702325512844908e-08, | |
| "loss": 1.2609, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 0.9717112774324496, | |
| "grad_norm": 1.43190405721669, | |
| "learning_rate": 2.4157846547292473e-08, | |
| "loss": 1.2787, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 0.9742483825954585, | |
| "grad_norm": 1.4245067018508633, | |
| "learning_rate": 2.000392603049517e-08, | |
| "loss": 1.2665, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 0.9767854877584676, | |
| "grad_norm": 1.4112885660620007, | |
| "learning_rate": 1.6240890007510612e-08, | |
| "loss": 1.2785, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.9793225929214766, | |
| "grad_norm": 1.499341122900986, | |
| "learning_rate": 1.286903384251581e-08, | |
| "loss": 1.2539, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 0.9818596980844856, | |
| "grad_norm": 1.4494246005853764, | |
| "learning_rate": 9.888622195615705e-09, | |
| "loss": 1.2725, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 0.9843968032474946, | |
| "grad_norm": 1.421431046100693, | |
| "learning_rate": 7.299889002075344e-09, | |
| "loss": 1.2726, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 0.9869339084105037, | |
| "grad_norm": 1.4209346862475516, | |
| "learning_rate": 5.103037453954573e-09, | |
| "loss": 1.2548, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 0.9894710135735126, | |
| "grad_norm": 1.3751824191752333, | |
| "learning_rate": 3.2982399841618996e-09, | |
| "loss": 1.2859, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.9920081187365216, | |
| "grad_norm": 1.4627888810880714, | |
| "learning_rate": 1.8856382529192085e-09, | |
| "loss": 1.2842, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 0.9945452238995306, | |
| "grad_norm": 1.3905016842842302, | |
| "learning_rate": 8.653431366406617e-10, | |
| "loss": 1.2447, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 0.9970823290625397, | |
| "grad_norm": 1.5470180096733397, | |
| "learning_rate": 2.374347192335424e-10, | |
| "loss": 1.2707, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 0.9996194342255487, | |
| "grad_norm": 1.4506027900307656, | |
| "learning_rate": 1.9622858088430564e-12, | |
| "loss": 1.2737, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 0.9998731447418495, | |
| "step": 3941, | |
| "total_flos": 3.7575827488610714e+18, | |
| "train_loss": 1.3289946492206504, | |
| "train_runtime": 13442.1001, | |
| "train_samples_per_second": 37.53, | |
| "train_steps_per_second": 0.293 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 3941, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.7575827488610714e+18, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |