rabiulawal's picture
Add files using upload-large-folder tool
6723263 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.922065846752039,
"eval_steps": 400,
"global_step": 11600,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01690545623600017,
"grad_norm": 9.244617462158203,
"learning_rate": 1.6666666666666667e-05,
"loss": 5.2046,
"step": 50
},
{
"epoch": 0.03381091247200034,
"grad_norm": 11.779854774475098,
"learning_rate": 3.3333333333333335e-05,
"loss": 4.1731,
"step": 100
},
{
"epoch": 0.05071636870800051,
"grad_norm": 3.761927604675293,
"learning_rate": 5e-05,
"loss": 4.0566,
"step": 150
},
{
"epoch": 0.06762182494400068,
"grad_norm": 9.56804084777832,
"learning_rate": 6.666666666666667e-05,
"loss": 4.0416,
"step": 200
},
{
"epoch": 0.08452728118000084,
"grad_norm": 4.056901454925537,
"learning_rate": 8.333333333333334e-05,
"loss": 3.9922,
"step": 250
},
{
"epoch": 0.10143273741600102,
"grad_norm": 8.173892974853516,
"learning_rate": 0.0001,
"loss": 4.056,
"step": 300
},
{
"epoch": 0.11833819365200118,
"grad_norm": 7.099091053009033,
"learning_rate": 9.999735405375364e-05,
"loss": 3.9618,
"step": 350
},
{
"epoch": 0.13524364988800136,
"grad_norm": 3.7080025672912598,
"learning_rate": 9.998941652617147e-05,
"loss": 3.9753,
"step": 400
},
{
"epoch": 0.13524364988800136,
"eval_loss": 3.9605562686920166,
"eval_runtime": 11.6413,
"eval_samples_per_second": 85.901,
"eval_steps_per_second": 2.749,
"step": 400
},
{
"epoch": 0.1521491061240015,
"grad_norm": 3.6200530529022217,
"learning_rate": 9.997618835068782e-05,
"loss": 3.956,
"step": 450
},
{
"epoch": 0.1690545623600017,
"grad_norm": 3.6575722694396973,
"learning_rate": 9.995767108290448e-05,
"loss": 3.9065,
"step": 500
},
{
"epoch": 0.18596001859600186,
"grad_norm": 7.8048248291015625,
"learning_rate": 9.993386690040792e-05,
"loss": 3.88,
"step": 550
},
{
"epoch": 0.20286547483200204,
"grad_norm": 2.769477605819702,
"learning_rate": 9.990477860251314e-05,
"loss": 3.8931,
"step": 600
},
{
"epoch": 0.2197709310680022,
"grad_norm": 3.249483823776245,
"learning_rate": 9.987040960993446e-05,
"loss": 3.8389,
"step": 650
},
{
"epoch": 0.23667638730400237,
"grad_norm": 1.6407500505447388,
"learning_rate": 9.983076396438333e-05,
"loss": 3.855,
"step": 700
},
{
"epoch": 0.2535818435400025,
"grad_norm": 2.995973825454712,
"learning_rate": 9.978584632809293e-05,
"loss": 3.8473,
"step": 750
},
{
"epoch": 0.2704872997760027,
"grad_norm": 2.316067934036255,
"learning_rate": 9.973566198326996e-05,
"loss": 3.8353,
"step": 800
},
{
"epoch": 0.2704872997760027,
"eval_loss": 3.8578951358795166,
"eval_runtime": 11.681,
"eval_samples_per_second": 85.609,
"eval_steps_per_second": 2.739,
"step": 800
},
{
"epoch": 0.28739275601200287,
"grad_norm": 2.2685530185699463,
"learning_rate": 9.968021683147353e-05,
"loss": 3.8267,
"step": 850
},
{
"epoch": 0.304298212248003,
"grad_norm": 2.8378384113311768,
"learning_rate": 9.961951739292097e-05,
"loss": 3.8046,
"step": 900
},
{
"epoch": 0.3212036684840032,
"grad_norm": 3.528430938720703,
"learning_rate": 9.955357080572128e-05,
"loss": 3.8239,
"step": 950
},
{
"epoch": 0.3381091247200034,
"grad_norm": 5.5983099937438965,
"learning_rate": 9.948238482503555e-05,
"loss": 3.7842,
"step": 1000
},
{
"epoch": 0.3550145809560035,
"grad_norm": 2.4193906784057617,
"learning_rate": 9.940596782216504e-05,
"loss": 3.7816,
"step": 1050
},
{
"epoch": 0.3719200371920037,
"grad_norm": 1.893676996231079,
"learning_rate": 9.932432878356672e-05,
"loss": 3.7895,
"step": 1100
},
{
"epoch": 0.3888254934280039,
"grad_norm": 2.029632806777954,
"learning_rate": 9.92374773097965e-05,
"loss": 3.7668,
"step": 1150
},
{
"epoch": 0.4057309496640041,
"grad_norm": 2.21596622467041,
"learning_rate": 9.91454236143802e-05,
"loss": 3.7979,
"step": 1200
},
{
"epoch": 0.4057309496640041,
"eval_loss": 3.8054637908935547,
"eval_runtime": 11.6706,
"eval_samples_per_second": 85.685,
"eval_steps_per_second": 2.742,
"step": 1200
},
{
"epoch": 0.42263640590000423,
"grad_norm": 3.131930351257324,
"learning_rate": 9.90481785226125e-05,
"loss": 3.7824,
"step": 1250
},
{
"epoch": 0.4395418621360044,
"grad_norm": 1.5176011323928833,
"learning_rate": 9.894575347028381e-05,
"loss": 3.7666,
"step": 1300
},
{
"epoch": 0.4564473183720046,
"grad_norm": 2.2135422229766846,
"learning_rate": 9.883816050233566e-05,
"loss": 3.7684,
"step": 1350
},
{
"epoch": 0.47335277460800473,
"grad_norm": 1.565341830253601,
"learning_rate": 9.872541227144397e-05,
"loss": 3.7421,
"step": 1400
},
{
"epoch": 0.4902582308440049,
"grad_norm": 2.0272250175476074,
"learning_rate": 9.860752203653138e-05,
"loss": 3.7524,
"step": 1450
},
{
"epoch": 0.507163687080005,
"grad_norm": 2.3772072792053223,
"learning_rate": 9.848450366120785e-05,
"loss": 3.7489,
"step": 1500
},
{
"epoch": 0.5240691433160053,
"grad_norm": 2.349799633026123,
"learning_rate": 9.835637161214042e-05,
"loss": 3.7421,
"step": 1550
},
{
"epoch": 0.5409745995520054,
"grad_norm": 2.6349542140960693,
"learning_rate": 9.822314095735195e-05,
"loss": 3.7416,
"step": 1600
},
{
"epoch": 0.5409745995520054,
"eval_loss": 3.775789499282837,
"eval_runtime": 11.6523,
"eval_samples_per_second": 85.82,
"eval_steps_per_second": 2.746,
"step": 1600
},
{
"epoch": 0.5578800557880056,
"grad_norm": 2.5276811122894287,
"learning_rate": 9.808482736444913e-05,
"loss": 3.7517,
"step": 1650
},
{
"epoch": 0.5747855120240057,
"grad_norm": 2.6835217475891113,
"learning_rate": 9.794144709878008e-05,
"loss": 3.7325,
"step": 1700
},
{
"epoch": 0.5916909682600059,
"grad_norm": 2.924553632736206,
"learning_rate": 9.779301702152147e-05,
"loss": 3.7142,
"step": 1750
},
{
"epoch": 0.608596424496006,
"grad_norm": 2.129288673400879,
"learning_rate": 9.763955458769581e-05,
"loss": 3.7347,
"step": 1800
},
{
"epoch": 0.6255018807320063,
"grad_norm": 1.441651463508606,
"learning_rate": 9.748107784411867e-05,
"loss": 3.72,
"step": 1850
},
{
"epoch": 0.6424073369680064,
"grad_norm": 2.1518378257751465,
"learning_rate": 9.731760542727647e-05,
"loss": 3.7042,
"step": 1900
},
{
"epoch": 0.6593127932040066,
"grad_norm": 4.049715518951416,
"learning_rate": 9.714915656113491e-05,
"loss": 3.7284,
"step": 1950
},
{
"epoch": 0.6762182494400067,
"grad_norm": 1.596699595451355,
"learning_rate": 9.697575105487821e-05,
"loss": 3.7116,
"step": 2000
},
{
"epoch": 0.6762182494400067,
"eval_loss": 3.7348101139068604,
"eval_runtime": 11.6849,
"eval_samples_per_second": 85.58,
"eval_steps_per_second": 2.739,
"step": 2000
},
{
"epoch": 0.6931237056760069,
"grad_norm": 2.405496835708618,
"learning_rate": 9.679740930057965e-05,
"loss": 3.6867,
"step": 2050
},
{
"epoch": 0.710029161912007,
"grad_norm": 2.290134906768799,
"learning_rate": 9.661415227080342e-05,
"loss": 3.6967,
"step": 2100
},
{
"epoch": 0.7269346181480073,
"grad_norm": 1.6495215892791748,
"learning_rate": 9.642600151613847e-05,
"loss": 3.6822,
"step": 2150
},
{
"epoch": 0.7438400743840075,
"grad_norm": 1.6393038034439087,
"learning_rate": 9.623297916266399e-05,
"loss": 3.6747,
"step": 2200
},
{
"epoch": 0.7607455306200076,
"grad_norm": 1.7952766418457031,
"learning_rate": 9.603510790934765e-05,
"loss": 3.6778,
"step": 2250
},
{
"epoch": 0.7776509868560078,
"grad_norm": 2.7831900119781494,
"learning_rate": 9.583241102537609e-05,
"loss": 3.6891,
"step": 2300
},
{
"epoch": 0.7945564430920079,
"grad_norm": 1.602946400642395,
"learning_rate": 9.562491234741863e-05,
"loss": 3.6745,
"step": 2350
},
{
"epoch": 0.8114618993280082,
"grad_norm": 2.163341522216797,
"learning_rate": 9.541263627682408e-05,
"loss": 3.6622,
"step": 2400
},
{
"epoch": 0.8114618993280082,
"eval_loss": 3.7114827632904053,
"eval_runtime": 11.6568,
"eval_samples_per_second": 85.787,
"eval_steps_per_second": 2.745,
"step": 2400
},
{
"epoch": 0.8283673555640083,
"grad_norm": 3.2292706966400146,
"learning_rate": 9.519560777675117e-05,
"loss": 3.6907,
"step": 2450
},
{
"epoch": 0.8452728118000085,
"grad_norm": 2.3297388553619385,
"learning_rate": 9.497385236923304e-05,
"loss": 3.659,
"step": 2500
},
{
"epoch": 0.8621782680360086,
"grad_norm": 2.1775872707366943,
"learning_rate": 9.474739613217583e-05,
"loss": 3.6543,
"step": 2550
},
{
"epoch": 0.8790837242720088,
"grad_norm": 2.1940650939941406,
"learning_rate": 9.4516265696292e-05,
"loss": 3.6631,
"step": 2600
},
{
"epoch": 0.8959891805080089,
"grad_norm": 2.0075910091400146,
"learning_rate": 9.428048824196861e-05,
"loss": 3.6699,
"step": 2650
},
{
"epoch": 0.9128946367440092,
"grad_norm": 1.463968276977539,
"learning_rate": 9.404009149607104e-05,
"loss": 3.6458,
"step": 2700
},
{
"epoch": 0.9298000929800093,
"grad_norm": 1.776117205619812,
"learning_rate": 9.379510372868227e-05,
"loss": 3.6442,
"step": 2750
},
{
"epoch": 0.9467055492160095,
"grad_norm": 1.4943894147872925,
"learning_rate": 9.354555374977845e-05,
"loss": 3.6717,
"step": 2800
},
{
"epoch": 0.9467055492160095,
"eval_loss": 3.6869394779205322,
"eval_runtime": 11.6677,
"eval_samples_per_second": 85.706,
"eval_steps_per_second": 2.743,
"step": 2800
},
{
"epoch": 0.9636110054520096,
"grad_norm": 2.1420116424560547,
"learning_rate": 9.32914709058409e-05,
"loss": 3.6602,
"step": 2850
},
{
"epoch": 0.9805164616880098,
"grad_norm": 1.4225534200668335,
"learning_rate": 9.303288507640508e-05,
"loss": 3.6312,
"step": 2900
},
{
"epoch": 0.99742191792401,
"grad_norm": 1.5110421180725098,
"learning_rate": 9.276982667054676e-05,
"loss": 3.6309,
"step": 2950
},
{
"epoch": 1.01432737416001,
"grad_norm": 1.4234793186187744,
"learning_rate": 9.250232662330597e-05,
"loss": 3.5446,
"step": 3000
},
{
"epoch": 1.0312328303960103,
"grad_norm": 1.4176740646362305,
"learning_rate": 9.223041639204923e-05,
"loss": 3.5084,
"step": 3050
},
{
"epoch": 1.0481382866320106,
"grad_norm": 1.5388909578323364,
"learning_rate": 9.195412795277012e-05,
"loss": 3.4993,
"step": 3100
},
{
"epoch": 1.0650437428680106,
"grad_norm": 1.7309865951538086,
"learning_rate": 9.167349379632901e-05,
"loss": 3.5396,
"step": 3150
},
{
"epoch": 1.0819491991040109,
"grad_norm": 1.6854147911071777,
"learning_rate": 9.138854692463229e-05,
"loss": 3.5129,
"step": 3200
},
{
"epoch": 1.0819491991040109,
"eval_loss": 3.657722234725952,
"eval_runtime": 11.6989,
"eval_samples_per_second": 85.478,
"eval_steps_per_second": 2.735,
"step": 3200
},
{
"epoch": 1.098854655340011,
"grad_norm": 2.1751339435577393,
"learning_rate": 9.10993208467513e-05,
"loss": 3.523,
"step": 3250
},
{
"epoch": 1.1157601115760112,
"grad_norm": 1.1804617643356323,
"learning_rate": 9.08058495749818e-05,
"loss": 3.5124,
"step": 3300
},
{
"epoch": 1.1326655678120114,
"grad_norm": 2.0826492309570312,
"learning_rate": 9.050816762084426e-05,
"loss": 3.5145,
"step": 3350
},
{
"epoch": 1.1495710240480115,
"grad_norm": 1.2046183347702026,
"learning_rate": 9.020630999102529e-05,
"loss": 3.4959,
"step": 3400
},
{
"epoch": 1.1664764802840117,
"grad_norm": 1.7077451944351196,
"learning_rate": 8.990031218326104e-05,
"loss": 3.5147,
"step": 3450
},
{
"epoch": 1.1833819365200118,
"grad_norm": 1.5411063432693481,
"learning_rate": 8.959021018216262e-05,
"loss": 3.5041,
"step": 3500
},
{
"epoch": 1.200287392756012,
"grad_norm": 1.4028867483139038,
"learning_rate": 8.927604045498453e-05,
"loss": 3.5017,
"step": 3550
},
{
"epoch": 1.217192848992012,
"grad_norm": 1.7714195251464844,
"learning_rate": 8.895783994733611e-05,
"loss": 3.5056,
"step": 3600
},
{
"epoch": 1.217192848992012,
"eval_loss": 3.643071174621582,
"eval_runtime": 11.6982,
"eval_samples_per_second": 85.484,
"eval_steps_per_second": 2.735,
"step": 3600
},
{
"epoch": 1.2340983052280123,
"grad_norm": 1.4252102375030518,
"learning_rate": 8.863564607883687e-05,
"loss": 3.4944,
"step": 3650
},
{
"epoch": 1.2510037614640126,
"grad_norm": 1.6317962408065796,
"learning_rate": 8.8309496738716e-05,
"loss": 3.4899,
"step": 3700
},
{
"epoch": 1.2679092177000126,
"grad_norm": 1.9923129081726074,
"learning_rate": 8.79794302813567e-05,
"loss": 3.5166,
"step": 3750
},
{
"epoch": 1.284814673936013,
"grad_norm": 1.0414555072784424,
"learning_rate": 8.764548552178584e-05,
"loss": 3.4998,
"step": 3800
},
{
"epoch": 1.301720130172013,
"grad_norm": 1.9689300060272217,
"learning_rate": 8.730770173110932e-05,
"loss": 3.5138,
"step": 3850
},
{
"epoch": 1.3186255864080132,
"grad_norm": 1.6223361492156982,
"learning_rate": 8.696611863189395e-05,
"loss": 3.49,
"step": 3900
},
{
"epoch": 1.3355310426440132,
"grad_norm": 1.6299093961715698,
"learning_rate": 8.662077639349612e-05,
"loss": 3.4629,
"step": 3950
},
{
"epoch": 1.3524364988800135,
"grad_norm": 1.510591983795166,
"learning_rate": 8.627171562733803e-05,
"loss": 3.4711,
"step": 4000
},
{
"epoch": 1.3524364988800135,
"eval_loss": 3.620584726333618,
"eval_runtime": 11.664,
"eval_samples_per_second": 85.734,
"eval_steps_per_second": 2.743,
"step": 4000
},
{
"epoch": 1.3693419551160138,
"grad_norm": 1.280197262763977,
"learning_rate": 8.591897738213187e-05,
"loss": 3.4808,
"step": 4050
},
{
"epoch": 1.3862474113520138,
"grad_norm": 1.5453211069107056,
"learning_rate": 8.556260313905257e-05,
"loss": 3.4747,
"step": 4100
},
{
"epoch": 1.403152867588014,
"grad_norm": 1.439795970916748,
"learning_rate": 8.520263480685968e-05,
"loss": 3.4847,
"step": 4150
},
{
"epoch": 1.4200583238240143,
"grad_norm": 1.2832996845245361,
"learning_rate": 8.483911471696912e-05,
"loss": 3.482,
"step": 4200
},
{
"epoch": 1.4369637800600144,
"grad_norm": 1.684513807296753,
"learning_rate": 8.447208561847501e-05,
"loss": 3.4514,
"step": 4250
},
{
"epoch": 1.4538692362960146,
"grad_norm": 0.9559006094932556,
"learning_rate": 8.410159067312243e-05,
"loss": 3.4771,
"step": 4300
},
{
"epoch": 1.4707746925320146,
"grad_norm": 1.0770719051361084,
"learning_rate": 8.372767345023185e-05,
"loss": 3.483,
"step": 4350
},
{
"epoch": 1.487680148768015,
"grad_norm": 1.1667702198028564,
"learning_rate": 8.33503779215754e-05,
"loss": 3.4781,
"step": 4400
},
{
"epoch": 1.487680148768015,
"eval_loss": 3.585639715194702,
"eval_runtime": 11.6769,
"eval_samples_per_second": 85.639,
"eval_steps_per_second": 2.74,
"step": 4400
},
{
"epoch": 1.504585605004015,
"grad_norm": 1.183647632598877,
"learning_rate": 8.296974845620584e-05,
"loss": 3.4836,
"step": 4450
},
{
"epoch": 1.5214910612400152,
"grad_norm": 1.308076024055481,
"learning_rate": 8.258582981523895e-05,
"loss": 3.4571,
"step": 4500
},
{
"epoch": 1.5383965174760155,
"grad_norm": 1.5776398181915283,
"learning_rate": 8.219866714658971e-05,
"loss": 3.444,
"step": 4550
},
{
"epoch": 1.5553019737120155,
"grad_norm": 1.1617664098739624,
"learning_rate": 8.180830597966303e-05,
"loss": 3.4465,
"step": 4600
},
{
"epoch": 1.5722074299480158,
"grad_norm": 0.9418047070503235,
"learning_rate": 8.141479221999953e-05,
"loss": 3.4568,
"step": 4650
},
{
"epoch": 1.589112886184016,
"grad_norm": 1.3985543251037598,
"learning_rate": 8.101817214387723e-05,
"loss": 3.4502,
"step": 4700
},
{
"epoch": 1.606018342420016,
"grad_norm": 1.0654075145721436,
"learning_rate": 8.06184923928695e-05,
"loss": 3.4424,
"step": 4750
},
{
"epoch": 1.622923798656016,
"grad_norm": 1.5798131227493286,
"learning_rate": 8.021579996836025e-05,
"loss": 3.4336,
"step": 4800
},
{
"epoch": 1.622923798656016,
"eval_loss": 3.5558018684387207,
"eval_runtime": 11.6573,
"eval_samples_per_second": 85.783,
"eval_steps_per_second": 2.745,
"step": 4800
},
{
"epoch": 1.6398292548920164,
"grad_norm": 1.036365270614624,
"learning_rate": 7.981014222601651e-05,
"loss": 3.4397,
"step": 4850
},
{
"epoch": 1.6567347111280166,
"grad_norm": 1.2249760627746582,
"learning_rate": 7.940156687021969e-05,
"loss": 3.4315,
"step": 4900
},
{
"epoch": 1.6736401673640167,
"grad_norm": 1.3212870359420776,
"learning_rate": 7.899012194845549e-05,
"loss": 3.4342,
"step": 4950
},
{
"epoch": 1.690545623600017,
"grad_norm": 1.130004644393921,
"learning_rate": 7.857585584566375e-05,
"loss": 3.428,
"step": 5000
},
{
"epoch": 1.7074510798360172,
"grad_norm": 1.0531315803527832,
"learning_rate": 7.815881727854847e-05,
"loss": 3.4208,
"step": 5050
},
{
"epoch": 1.7243565360720172,
"grad_norm": 1.3188402652740479,
"learning_rate": 7.77390552898488e-05,
"loss": 3.4227,
"step": 5100
},
{
"epoch": 1.7412619923080173,
"grad_norm": 1.0934581756591797,
"learning_rate": 7.73166192425718e-05,
"loss": 3.4245,
"step": 5150
},
{
"epoch": 1.7581674485440177,
"grad_norm": 1.7236692905426025,
"learning_rate": 7.68915588141874e-05,
"loss": 3.4167,
"step": 5200
},
{
"epoch": 1.7581674485440177,
"eval_loss": 3.5321877002716064,
"eval_runtime": 11.6863,
"eval_samples_per_second": 85.57,
"eval_steps_per_second": 2.738,
"step": 5200
},
{
"epoch": 1.7750729047800178,
"grad_norm": 0.9837960600852966,
"learning_rate": 7.646392399078647e-05,
"loss": 3.4196,
"step": 5250
},
{
"epoch": 1.7919783610160178,
"grad_norm": 1.0993341207504272,
"learning_rate": 7.60337650612026e-05,
"loss": 3.4116,
"step": 5300
},
{
"epoch": 1.808883817252018,
"grad_norm": 1.3233968019485474,
"learning_rate": 7.560113261109827e-05,
"loss": 3.4118,
"step": 5350
},
{
"epoch": 1.8257892734880183,
"grad_norm": 1.250022530555725,
"learning_rate": 7.516607751701602e-05,
"loss": 3.3959,
"step": 5400
},
{
"epoch": 1.8426947297240184,
"grad_norm": 1.4518115520477295,
"learning_rate": 7.472865094039555e-05,
"loss": 3.4155,
"step": 5450
},
{
"epoch": 1.8596001859600186,
"grad_norm": 1.307420015335083,
"learning_rate": 7.428890432155719e-05,
"loss": 3.3955,
"step": 5500
},
{
"epoch": 1.876505642196019,
"grad_norm": 1.255487322807312,
"learning_rate": 7.384688937365279e-05,
"loss": 3.3877,
"step": 5550
},
{
"epoch": 1.893411098432019,
"grad_norm": 0.999951183795929,
"learning_rate": 7.340265807658422e-05,
"loss": 3.3942,
"step": 5600
},
{
"epoch": 1.893411098432019,
"eval_loss": 3.4983253479003906,
"eval_runtime": 11.6754,
"eval_samples_per_second": 85.65,
"eval_steps_per_second": 2.741,
"step": 5600
},
{
"epoch": 1.910316554668019,
"grad_norm": 1.4537323713302612,
"learning_rate": 7.29562626708907e-05,
"loss": 3.4025,
"step": 5650
},
{
"epoch": 1.9272220109040192,
"grad_norm": 1.2412198781967163,
"learning_rate": 7.25077556516055e-05,
"loss": 3.3924,
"step": 5700
},
{
"epoch": 1.9441274671400195,
"grad_norm": 1.592606544494629,
"learning_rate": 7.205718976208258e-05,
"loss": 3.3732,
"step": 5750
},
{
"epoch": 1.9610329233760195,
"grad_norm": 0.9837318658828735,
"learning_rate": 7.160461798779413e-05,
"loss": 3.3909,
"step": 5800
},
{
"epoch": 1.9779383796120198,
"grad_norm": 1.1004836559295654,
"learning_rate": 7.115009355009959e-05,
"loss": 3.3837,
"step": 5850
},
{
"epoch": 1.99484383584802,
"grad_norm": 1.5246398448944092,
"learning_rate": 7.069366989998692e-05,
"loss": 3.366,
"step": 5900
},
{
"epoch": 2.01174929208402,
"grad_norm": 1.1871501207351685,
"learning_rate": 7.023540071178697e-05,
"loss": 3.2107,
"step": 5950
},
{
"epoch": 2.02865474832002,
"grad_norm": 1.3174642324447632,
"learning_rate": 6.977533987686147e-05,
"loss": 3.1636,
"step": 6000
},
{
"epoch": 2.02865474832002,
"eval_loss": 3.476820230484009,
"eval_runtime": 11.7209,
"eval_samples_per_second": 85.318,
"eval_steps_per_second": 2.73,
"step": 6000
},
{
"epoch": 2.0455602045560206,
"grad_norm": 1.3271148204803467,
"learning_rate": 6.931354149726548e-05,
"loss": 3.145,
"step": 6050
},
{
"epoch": 2.0624656607920206,
"grad_norm": 1.0174113512039185,
"learning_rate": 6.885005987938516e-05,
"loss": 3.1572,
"step": 6100
},
{
"epoch": 2.0793711170280207,
"grad_norm": 1.086552619934082,
"learning_rate": 6.838494952755154e-05,
"loss": 3.1499,
"step": 6150
},
{
"epoch": 2.096276573264021,
"grad_norm": 0.9986650943756104,
"learning_rate": 6.791826513763076e-05,
"loss": 3.1508,
"step": 6200
},
{
"epoch": 2.113182029500021,
"grad_norm": 1.0550023317337036,
"learning_rate": 6.745006159059222e-05,
"loss": 3.1484,
"step": 6250
},
{
"epoch": 2.1300874857360212,
"grad_norm": 0.9948070049285889,
"learning_rate": 6.69803939460544e-05,
"loss": 3.1394,
"step": 6300
},
{
"epoch": 2.1469929419720213,
"grad_norm": 0.9610317945480347,
"learning_rate": 6.650931743581033e-05,
"loss": 3.1438,
"step": 6350
},
{
"epoch": 2.1638983982080218,
"grad_norm": 1.1480181217193604,
"learning_rate": 6.603688745733211e-05,
"loss": 3.1411,
"step": 6400
},
{
"epoch": 2.1638983982080218,
"eval_loss": 3.4495885372161865,
"eval_runtime": 11.6737,
"eval_samples_per_second": 85.663,
"eval_steps_per_second": 2.741,
"step": 6400
},
{
"epoch": 2.180803854444022,
"grad_norm": 0.8075036406517029,
"learning_rate": 6.556315956725662e-05,
"loss": 3.1346,
"step": 6450
},
{
"epoch": 2.197709310680022,
"grad_norm": 1.198843240737915,
"learning_rate": 6.50881894748519e-05,
"loss": 3.1739,
"step": 6500
},
{
"epoch": 2.2146147669160223,
"grad_norm": 1.3039313554763794,
"learning_rate": 6.461203303546615e-05,
"loss": 3.1518,
"step": 6550
},
{
"epoch": 2.2315202231520224,
"grad_norm": 1.0603476762771606,
"learning_rate": 6.413474624395905e-05,
"loss": 3.1561,
"step": 6600
},
{
"epoch": 2.2484256793880224,
"grad_norm": 1.0246437788009644,
"learning_rate": 6.365638522811704e-05,
"loss": 3.1361,
"step": 6650
},
{
"epoch": 2.265331135624023,
"grad_norm": 1.0935068130493164,
"learning_rate": 6.317700624205273e-05,
"loss": 3.1174,
"step": 6700
},
{
"epoch": 2.282236591860023,
"grad_norm": 0.9507274031639099,
"learning_rate": 6.269666565958963e-05,
"loss": 3.1381,
"step": 6750
},
{
"epoch": 2.299142048096023,
"grad_norm": 0.9847383499145508,
"learning_rate": 6.221541996763269e-05,
"loss": 3.1154,
"step": 6800
},
{
"epoch": 2.299142048096023,
"eval_loss": 3.407775640487671,
"eval_runtime": 11.6847,
"eval_samples_per_second": 85.582,
"eval_steps_per_second": 2.739,
"step": 6800
},
{
"epoch": 2.316047504332023,
"grad_norm": 0.9965755343437195,
"learning_rate": 6.173332575952557e-05,
"loss": 3.1175,
"step": 6850
},
{
"epoch": 2.3329529605680235,
"grad_norm": 0.9966267943382263,
"learning_rate": 6.125043972839536e-05,
"loss": 3.1507,
"step": 6900
},
{
"epoch": 2.3498584168040235,
"grad_norm": 1.1471587419509888,
"learning_rate": 6.0766818660485716e-05,
"loss": 3.1287,
"step": 6950
},
{
"epoch": 2.3667638730400236,
"grad_norm": 1.2493683099746704,
"learning_rate": 6.028251942847882e-05,
"loss": 3.1202,
"step": 7000
},
{
"epoch": 2.3836693292760236,
"grad_norm": 1.0860092639923096,
"learning_rate": 5.9797598984807335e-05,
"loss": 3.121,
"step": 7050
},
{
"epoch": 2.400574785512024,
"grad_norm": 1.1602693796157837,
"learning_rate": 5.931211435495694e-05,
"loss": 3.1282,
"step": 7100
},
{
"epoch": 2.417480241748024,
"grad_norm": 0.9266437292098999,
"learning_rate": 5.882612263076026e-05,
"loss": 3.1033,
"step": 7150
},
{
"epoch": 2.434385697984024,
"grad_norm": 1.0014028549194336,
"learning_rate": 5.833968096368301e-05,
"loss": 3.1007,
"step": 7200
},
{
"epoch": 2.434385697984024,
"eval_loss": 3.378451108932495,
"eval_runtime": 11.6786,
"eval_samples_per_second": 85.627,
"eval_steps_per_second": 2.74,
"step": 7200
},
{
"epoch": 2.4512911542200246,
"grad_norm": 1.0491281747817993,
"learning_rate": 5.785284655810308e-05,
"loss": 3.1117,
"step": 7250
},
{
"epoch": 2.4681966104560247,
"grad_norm": 0.8672958612442017,
"learning_rate": 5.7365676664583514e-05,
"loss": 3.1037,
"step": 7300
},
{
"epoch": 2.4851020666920247,
"grad_norm": 1.0733287334442139,
"learning_rate": 5.687822857313993e-05,
"loss": 3.1175,
"step": 7350
},
{
"epoch": 2.502007522928025,
"grad_norm": 0.8078711032867432,
"learning_rate": 5.63905596065033e-05,
"loss": 3.1099,
"step": 7400
},
{
"epoch": 2.5189129791640252,
"grad_norm": 1.0341311693191528,
"learning_rate": 5.590272711337908e-05,
"loss": 3.1167,
"step": 7450
},
{
"epoch": 2.5358184354000253,
"grad_norm": 1.1616711616516113,
"learning_rate": 5.541478846170298e-05,
"loss": 3.0931,
"step": 7500
},
{
"epoch": 2.5527238916360258,
"grad_norm": 0.8573184609413147,
"learning_rate": 5.4926801031894734e-05,
"loss": 3.1002,
"step": 7550
},
{
"epoch": 2.569629347872026,
"grad_norm": 0.9128609895706177,
"learning_rate": 5.4438822210110275e-05,
"loss": 3.1117,
"step": 7600
},
{
"epoch": 2.569629347872026,
"eval_loss": 3.3413400650024414,
"eval_runtime": 11.683,
"eval_samples_per_second": 85.595,
"eval_steps_per_second": 2.739,
"step": 7600
},
{
"epoch": 2.586534804108026,
"grad_norm": 0.9771824479103088,
"learning_rate": 5.395090938149321e-05,
"loss": 3.0962,
"step": 7650
},
{
"epoch": 2.603440260344026,
"grad_norm": 1.0292117595672607,
"learning_rate": 5.346311992342656e-05,
"loss": 3.0829,
"step": 7700
},
{
"epoch": 2.6203457165800264,
"grad_norm": 0.8531580567359924,
"learning_rate": 5.297551119878522e-05,
"loss": 3.0778,
"step": 7750
},
{
"epoch": 2.6372511728160264,
"grad_norm": 0.9693793058395386,
"learning_rate": 5.248814054919031e-05,
"loss": 3.0678,
"step": 7800
},
{
"epoch": 2.6541566290520264,
"grad_norm": 0.999812662601471,
"learning_rate": 5.200106528826586e-05,
"loss": 3.0851,
"step": 7850
},
{
"epoch": 2.6710620852880265,
"grad_norm": 0.8840042352676392,
"learning_rate": 5.151434269489889e-05,
"loss": 3.0775,
"step": 7900
},
{
"epoch": 2.687967541524027,
"grad_norm": 0.8592630624771118,
"learning_rate": 5.102803000650359e-05,
"loss": 3.0553,
"step": 7950
},
{
"epoch": 2.704872997760027,
"grad_norm": 0.8932810425758362,
"learning_rate": 5.054218441229031e-05,
"loss": 3.0653,
"step": 8000
},
{
"epoch": 2.704872997760027,
"eval_loss": 3.309781551361084,
"eval_runtime": 11.6906,
"eval_samples_per_second": 85.538,
"eval_steps_per_second": 2.737,
"step": 8000
},
{
"epoch": 2.721778453996027,
"grad_norm": 0.9920628070831299,
"learning_rate": 5.005686304654018e-05,
"loss": 3.0565,
"step": 8050
},
{
"epoch": 2.7386839102320275,
"grad_norm": 0.8969925045967102,
"learning_rate": 4.957212298188638e-05,
"loss": 3.0684,
"step": 8100
},
{
"epoch": 2.7555893664680275,
"grad_norm": 0.9055591821670532,
"learning_rate": 4.908802122260243e-05,
"loss": 3.0721,
"step": 8150
},
{
"epoch": 2.7724948227040276,
"grad_norm": 1.047692060470581,
"learning_rate": 4.8604614697898706e-05,
"loss": 3.0404,
"step": 8200
},
{
"epoch": 2.789400278940028,
"grad_norm": 0.9087944626808167,
"learning_rate": 4.8121960255227603e-05,
"loss": 3.0607,
"step": 8250
},
{
"epoch": 2.806305735176028,
"grad_norm": 0.845425546169281,
"learning_rate": 4.764011465359851e-05,
"loss": 3.0396,
"step": 8300
},
{
"epoch": 2.823211191412028,
"grad_norm": 0.9913256764411926,
"learning_rate": 4.715913455690301e-05,
"loss": 3.0444,
"step": 8350
},
{
"epoch": 2.8401166476480286,
"grad_norm": 1.0773906707763672,
"learning_rate": 4.66790765272514e-05,
"loss": 3.0378,
"step": 8400
},
{
"epoch": 2.8401166476480286,
"eval_loss": 3.284963846206665,
"eval_runtime": 11.6698,
"eval_samples_per_second": 85.691,
"eval_steps_per_second": 2.742,
"step": 8400
},
{
"epoch": 2.8570221038840287,
"grad_norm": 0.9364065527915955,
"learning_rate": 4.619999701832108e-05,
"loss": 3.0088,
"step": 8450
},
{
"epoch": 2.8739275601200287,
"grad_norm": 0.9384069442749023,
"learning_rate": 4.572195236871777e-05,
"loss": 3.0226,
"step": 8500
},
{
"epoch": 2.890833016356029,
"grad_norm": 0.8556334972381592,
"learning_rate": 4.524499879535016e-05,
"loss": 3.0257,
"step": 8550
},
{
"epoch": 2.907738472592029,
"grad_norm": 0.9114282131195068,
"learning_rate": 4.476919238681904e-05,
"loss": 3.0302,
"step": 8600
},
{
"epoch": 2.9246439288280293,
"grad_norm": 0.832721471786499,
"learning_rate": 4.4294589096821325e-05,
"loss": 3.0438,
"step": 8650
},
{
"epoch": 2.9415493850640293,
"grad_norm": 0.8480991125106812,
"learning_rate": 4.3821244737570046e-05,
"loss": 3.0276,
"step": 8700
},
{
"epoch": 2.9584548413000293,
"grad_norm": 1.021043300628662,
"learning_rate": 4.3349214973231024e-05,
"loss": 3.0216,
"step": 8750
},
{
"epoch": 2.97536029753603,
"grad_norm": 0.7957491874694824,
"learning_rate": 4.287855531337683e-05,
"loss": 3.0173,
"step": 8800
},
{
"epoch": 2.97536029753603,
"eval_loss": 3.2582342624664307,
"eval_runtime": 11.6907,
"eval_samples_per_second": 85.538,
"eval_steps_per_second": 2.737,
"step": 8800
},
{
"epoch": 2.99226575377203,
"grad_norm": 1.0264720916748047,
"learning_rate": 4.2409321106459077e-05,
"loss": 3.0152,
"step": 8850
},
{
"epoch": 3.00917121000803,
"grad_norm": 0.8381086587905884,
"learning_rate": 4.194156753329942e-05,
"loss": 2.886,
"step": 8900
},
{
"epoch": 3.0260766662440304,
"grad_norm": 0.9872603416442871,
"learning_rate": 4.147534960060059e-05,
"loss": 2.7538,
"step": 8950
},
{
"epoch": 3.0429821224800304,
"grad_norm": 0.8750160336494446,
"learning_rate": 4.1010722134477665e-05,
"loss": 2.791,
"step": 9000
},
{
"epoch": 3.0598875787160305,
"grad_norm": 0.7593681216239929,
"learning_rate": 4.054773977401066e-05,
"loss": 2.7791,
"step": 9050
},
{
"epoch": 3.076793034952031,
"grad_norm": 0.9249379634857178,
"learning_rate": 4.008645696481903e-05,
"loss": 2.7693,
"step": 9100
},
{
"epoch": 3.093698491188031,
"grad_norm": 1.0219660997390747,
"learning_rate": 3.962692795265914e-05,
"loss": 2.7869,
"step": 9150
},
{
"epoch": 3.110603947424031,
"grad_norm": 0.8459084630012512,
"learning_rate": 3.916920677704499e-05,
"loss": 2.778,
"step": 9200
},
{
"epoch": 3.110603947424031,
"eval_loss": 3.247343063354492,
"eval_runtime": 11.6589,
"eval_samples_per_second": 85.771,
"eval_steps_per_second": 2.745,
"step": 9200
},
{
"epoch": 3.1275094036600315,
"grad_norm": 0.7969251871109009,
"learning_rate": 3.8713347264893294e-05,
"loss": 2.7645,
"step": 9250
},
{
"epoch": 3.1444148598960315,
"grad_norm": 0.8492719531059265,
"learning_rate": 3.8259403024193616e-05,
"loss": 2.7729,
"step": 9300
},
{
"epoch": 3.1613203161320316,
"grad_norm": 0.862267255783081,
"learning_rate": 3.780742743770417e-05,
"loss": 2.7825,
"step": 9350
},
{
"epoch": 3.1782257723680316,
"grad_norm": 0.9612255692481995,
"learning_rate": 3.7357473656674126e-05,
"loss": 2.7848,
"step": 9400
},
{
"epoch": 3.195131228604032,
"grad_norm": 0.8228344321250916,
"learning_rate": 3.6909594594593175e-05,
"loss": 2.7684,
"step": 9450
},
{
"epoch": 3.212036684840032,
"grad_norm": 0.9417145252227783,
"learning_rate": 3.6463842920969026e-05,
"loss": 2.7771,
"step": 9500
},
{
"epoch": 3.228942141076032,
"grad_norm": 0.7457485795021057,
"learning_rate": 3.602027105513355e-05,
"loss": 2.8103,
"step": 9550
},
{
"epoch": 3.2458475973120327,
"grad_norm": 0.8420482277870178,
"learning_rate": 3.557893116007848e-05,
"loss": 2.7591,
"step": 9600
},
{
"epoch": 3.2458475973120327,
"eval_loss": 3.20989990234375,
"eval_runtime": 11.7,
"eval_samples_per_second": 85.47,
"eval_steps_per_second": 2.735,
"step": 9600
},
{
"epoch": 3.2627530535480327,
"grad_norm": 0.8763940930366516,
"learning_rate": 3.5139875136321066e-05,
"loss": 2.7569,
"step": 9650
},
{
"epoch": 3.2796585097840327,
"grad_norm": 0.8527898192405701,
"learning_rate": 3.470315461580079e-05,
"loss": 2.7533,
"step": 9700
},
{
"epoch": 3.2965639660200328,
"grad_norm": 0.8603160381317139,
"learning_rate": 3.426882095580751e-05,
"loss": 2.7438,
"step": 9750
},
{
"epoch": 3.3134694222560332,
"grad_norm": 0.8680624961853027,
"learning_rate": 3.3836925232942005e-05,
"loss": 2.7353,
"step": 9800
},
{
"epoch": 3.3303748784920333,
"grad_norm": 0.9345048666000366,
"learning_rate": 3.3407518237109456e-05,
"loss": 2.7667,
"step": 9850
},
{
"epoch": 3.3472803347280333,
"grad_norm": 0.838909387588501,
"learning_rate": 3.29806504655467e-05,
"loss": 2.7438,
"step": 9900
},
{
"epoch": 3.364185790964034,
"grad_norm": 0.8523705005645752,
"learning_rate": 3.2556372116883874e-05,
"loss": 2.771,
"step": 9950
},
{
"epoch": 3.381091247200034,
"grad_norm": 0.8146962523460388,
"learning_rate": 3.213473308524115e-05,
"loss": 2.7634,
"step": 10000
},
{
"epoch": 3.381091247200034,
"eval_loss": 3.1767163276672363,
"eval_runtime": 11.6994,
"eval_samples_per_second": 85.474,
"eval_steps_per_second": 2.735,
"step": 10000
},
{
"epoch": 3.397996703436034,
"grad_norm": 0.789837121963501,
"learning_rate": 3.171578295436133e-05,
"loss": 2.7489,
"step": 10050
},
{
"epoch": 3.4149021596720344,
"grad_norm": 0.8918471932411194,
"learning_rate": 3.129957099177892e-05,
"loss": 2.7424,
"step": 10100
},
{
"epoch": 3.4318076159080344,
"grad_norm": 0.859986424446106,
"learning_rate": 3.0886146143026346e-05,
"loss": 2.7504,
"step": 10150
},
{
"epoch": 3.4487130721440344,
"grad_norm": 0.8879761695861816,
"learning_rate": 3.047555702587816e-05,
"loss": 2.7572,
"step": 10200
},
{
"epoch": 3.4656185283800345,
"grad_norm": 0.8334829211235046,
"learning_rate": 3.0067851924633606e-05,
"loss": 2.7627,
"step": 10250
},
{
"epoch": 3.482523984616035,
"grad_norm": 0.8631997108459473,
"learning_rate": 2.9663078784438558e-05,
"loss": 2.7526,
"step": 10300
},
{
"epoch": 3.499429440852035,
"grad_norm": 0.8065224885940552,
"learning_rate": 2.9261285205647283e-05,
"loss": 2.7353,
"step": 10350
},
{
"epoch": 3.516334897088035,
"grad_norm": 0.8405721783638,
"learning_rate": 2.886251843822475e-05,
"loss": 2.7255,
"step": 10400
},
{
"epoch": 3.516334897088035,
"eval_loss": 3.1594960689544678,
"eval_runtime": 11.722,
"eval_samples_per_second": 85.31,
"eval_steps_per_second": 2.73,
"step": 10400
},
{
"epoch": 3.533240353324035,
"grad_norm": 0.8360841274261475,
"learning_rate": 2.8466825376190122e-05,
"loss": 2.723,
"step": 10450
},
{
"epoch": 3.5501458095600356,
"grad_norm": 0.8372629880905151,
"learning_rate": 2.8074252552102176e-05,
"loss": 2.7196,
"step": 10500
},
{
"epoch": 3.5670512657960356,
"grad_norm": 0.7810975313186646,
"learning_rate": 2.768484613158714e-05,
"loss": 2.7162,
"step": 10550
},
{
"epoch": 3.5839567220320356,
"grad_norm": 0.8663210272789001,
"learning_rate": 2.729865190790975e-05,
"loss": 2.736,
"step": 10600
},
{
"epoch": 3.600862178268036,
"grad_norm": 0.8004422187805176,
"learning_rate": 2.6915715296588083e-05,
"loss": 2.7291,
"step": 10650
},
{
"epoch": 3.617767634504036,
"grad_norm": 0.7938219308853149,
"learning_rate": 2.653608133005278e-05,
"loss": 2.6953,
"step": 10700
},
{
"epoch": 3.634673090740036,
"grad_norm": 0.8083840608596802,
"learning_rate": 2.6159794652351332e-05,
"loss": 2.7371,
"step": 10750
},
{
"epoch": 3.6515785469760367,
"grad_norm": 0.8592216968536377,
"learning_rate": 2.5786899513898066e-05,
"loss": 2.7152,
"step": 10800
},
{
"epoch": 3.6515785469760367,
"eval_loss": 3.1375598907470703,
"eval_runtime": 11.6562,
"eval_samples_per_second": 85.791,
"eval_steps_per_second": 2.745,
"step": 10800
},
{
"epoch": 3.6684840032120367,
"grad_norm": 0.7407676577568054,
"learning_rate": 2.54174397662704e-05,
"loss": 2.713,
"step": 10850
},
{
"epoch": 3.6853894594480368,
"grad_norm": 0.8212365508079529,
"learning_rate": 2.5051458857052006e-05,
"loss": 2.7203,
"step": 10900
},
{
"epoch": 3.7022949156840372,
"grad_norm": 0.8753028512001038,
"learning_rate": 2.468899982472346e-05,
"loss": 2.7398,
"step": 10950
},
{
"epoch": 3.7192003719200373,
"grad_norm": 0.8082839846611023,
"learning_rate": 2.4330105293601023e-05,
"loss": 2.7097,
"step": 11000
},
{
"epoch": 3.7361058281560373,
"grad_norm": 0.8813438415527344,
"learning_rate": 2.397481746882414e-05,
"loss": 2.7213,
"step": 11050
},
{
"epoch": 3.753011284392038,
"grad_norm": 0.8139234781265259,
"learning_rate": 2.36231781313922e-05,
"loss": 2.7002,
"step": 11100
},
{
"epoch": 3.769916740628038,
"grad_norm": 0.800145149230957,
"learning_rate": 2.3275228633251227e-05,
"loss": 2.7182,
"step": 11150
},
{
"epoch": 3.786822196864038,
"grad_norm": 0.7504892945289612,
"learning_rate": 2.29310098924309e-05,
"loss": 2.6992,
"step": 11200
},
{
"epoch": 3.786822196864038,
"eval_loss": 3.1104345321655273,
"eval_runtime": 11.6893,
"eval_samples_per_second": 85.548,
"eval_steps_per_second": 2.738,
"step": 11200
},
{
"epoch": 3.803727653100038,
"grad_norm": 0.8169353604316711,
"learning_rate": 2.2590562388232804e-05,
"loss": 2.7137,
"step": 11250
},
{
"epoch": 3.820633109336038,
"grad_norm": 0.8978986144065857,
"learning_rate": 2.225392615647006e-05,
"loss": 2.7369,
"step": 11300
},
{
"epoch": 3.8375385655720384,
"grad_norm": 0.8523368239402771,
"learning_rate": 2.1921140784759338e-05,
"loss": 2.7309,
"step": 11350
},
{
"epoch": 3.8544440218080385,
"grad_norm": 0.890410840511322,
"learning_rate": 2.1592245407865252e-05,
"loss": 2.6908,
"step": 11400
},
{
"epoch": 3.8713494780440385,
"grad_norm": 0.8792175650596619,
"learning_rate": 2.126727870309841e-05,
"loss": 2.698,
"step": 11450
},
{
"epoch": 3.888254934280039,
"grad_norm": 0.7908258438110352,
"learning_rate": 2.09462788857669e-05,
"loss": 2.6956,
"step": 11500
},
{
"epoch": 3.905160390516039,
"grad_norm": 0.7934091687202454,
"learning_rate": 2.0629283704682392e-05,
"loss": 2.7036,
"step": 11550
},
{
"epoch": 3.922065846752039,
"grad_norm": 0.8208107948303223,
"learning_rate": 2.031633043772086e-05,
"loss": 2.7007,
"step": 11600
},
{
"epoch": 3.922065846752039,
"eval_loss": 3.0899033546447754,
"eval_runtime": 11.6628,
"eval_samples_per_second": 85.743,
"eval_steps_per_second": 2.744,
"step": 11600
}
],
"logging_steps": 50,
"max_steps": 14785,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 400,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2.925050875573101e+20,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}