Vision-Zero-InternVL3-8B-Clevr / trainer_state.json
Qinsi1's picture
Upload folder using huggingface_hub
7bb727d verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.285714285714286,
"eval_steps": 500,
"global_step": 30,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"advantages_interactive_phase": -7.450580596923828e-09,
"clip_ratio": 0.009601324272807688,
"clue_civilian_adjusted_reward_mean": 0.04645636805253976,
"clue_civilian_advantage_adjustment": -0.0340991875030158,
"clue_civilian_baseline": 0.0340991875030158,
"clue_civilian_raw_reward_mean": 0.08055555555555557,
"clue_civilian_votes_avg": 0.5833333333333333,
"clue_invalid_votes": 2.875,
"clue_na_votes": 0.375,
"clue_spy_adjusted_reward": -0.13936911300005017,
"clue_spy_advantage_adjustment": 0.10229755366661651,
"clue_spy_baseline": -0.10229755366661651,
"clue_spy_raw_reward": -0.24166666666666672,
"clue_spy_votes_received": 3.0,
"clue_suspicion_potential_psi": 2.4166666666666665,
"clue_total_valid_votes": 4.75,
"completion_length": 468.671875,
"epoch": 0.14285714285714285,
"grad_norm": 1.5805621147155762,
"kl": 0.0014362335205078125,
"learning_rate": 3.5714285714285716e-07,
"loss": 0.0009,
"loss_interactive_phase": 0.0008710725232958794,
"reward": 0.04593749572205778,
"reward_original_clue_max": 0.13422530971612442,
"reward_original_clue_mean": -2.210607728011897e-09,
"reward_original_clue_min": -0.2361905828296588,
"reward_original_clue_std": 0.15096539176990595,
"reward_original_decision_max": 2.1412500000000003,
"reward_original_decision_mean": 0.18375000000000005,
"reward_original_decision_min": -1.5,
"reward_original_decision_std": 1.5282191941515817,
"reward_original_overall_mean": 0.09187499889469616,
"reward_original_overall_std": 1.1872952864471322,
"reward_std": 1.1281701095850887,
"step": 1
},
{
"advantages_interactive_phase": 1.1699739843606949e-08,
"clip_ratio": 0.01009189459728077,
"clue_civilian_adjusted_reward_mean": 0.032244541979207814,
"clue_civilian_advantage_adjustment": -0.04969990246523662,
"clue_civilian_baseline": 0.04969990246523663,
"clue_civilian_raw_reward_mean": 0.08194444444444444,
"clue_civilian_votes_avg": 0.29166666666666663,
"clue_invalid_votes": 3.375,
"clue_na_votes": 1.0,
"clue_spy_adjusted_reward": -0.09673363023500019,
"clue_spy_advantage_adjustment": 0.14909970309833318,
"clue_spy_baseline": -0.14909970309833318,
"clue_spy_raw_reward": -0.24583333333333335,
"clue_spy_votes_received": 2.75,
"clue_suspicion_potential_psi": 2.458333333333333,
"clue_total_valid_votes": 3.625,
"completion_length": 346.546875,
"epoch": 0.2857142857142857,
"grad_norm": 0.9062501192092896,
"kl": 0.0015811920166015625,
"learning_rate": 7.142857142857143e-07,
"loss": 0.0006,
"loss_interactive_phase": 0.000601769017521292,
"reward": 0.02179688058128391,
"reward_original_clue_max": 0.12385744329827358,
"reward_original_clue_mean": -1.0743441833859987e-09,
"reward_original_clue_min": -0.2033983952724114,
"reward_original_clue_std": 0.13295147035012433,
"reward_original_decision_max": 1.3687500000000001,
"reward_original_decision_mean": 0.08718750000000001,
"reward_original_decision_min": -1.5,
"reward_original_decision_std": 1.2333472361619537,
"reward_original_overall_mean": 0.04359374946282798,
"reward_original_overall_std": 1.0888861639122744,
"reward_std": 0.9135217889060865,
"step": 2
},
{
"advantages_interactive_phase": -3.91155481338501e-08,
"clip_ratio": 0.010360982327256352,
"clue_civilian_adjusted_reward_mean": -0.012963048645367207,
"clue_civilian_advantage_adjustment": -0.06712971531203388,
"clue_civilian_baseline": 0.06712971531203388,
"clue_civilian_raw_reward_mean": 0.05416666666666666,
"clue_civilian_votes_avg": 0.5,
"clue_invalid_votes": 4.125,
"clue_na_votes": 0.25,
"clue_spy_adjusted_reward": 0.03888914454732575,
"clue_spy_advantage_adjustment": 0.20138914454732576,
"clue_spy_baseline": -0.20138914454732576,
"clue_spy_raw_reward": -0.16249999999999998,
"clue_spy_votes_received": 2.125,
"clue_suspicion_potential_psi": 1.6249999999999998,
"clue_total_valid_votes": 3.625,
"completion_length": 469.40625,
"epoch": 0.42857142857142855,
"grad_norm": 1.7866086959838867,
"kl": 0.0020046234130859375,
"learning_rate": 1.0714285714285714e-06,
"loss": 0.001,
"loss_interactive_phase": 0.0009609556873328984,
"reward": -0.07253908214457255,
"reward_original_clue_max": 0.17909297508117578,
"reward_original_clue_mean": -3.4719396715260364e-10,
"reward_original_clue_min": -0.1906668791792172,
"reward_original_clue_std": 0.148119477615202,
"reward_original_decision_max": 0.8700000000000001,
"reward_original_decision_mean": -0.29015625,
"reward_original_decision_min": -1.5,
"reward_original_decision_std": 1.012053904965003,
"reward_original_overall_mean": -0.14507812517359697,
"reward_original_overall_std": 1.0090200646515894,
"reward_std": 1.0390325135884786,
"step": 3
},
{
"advantages_interactive_phase": 1.618172973394394e-08,
"clip_ratio": 0.011920451768673956,
"clue_civilian_adjusted_reward_mean": -0.016897605204000857,
"clue_civilian_advantage_adjustment": -0.05578649409288976,
"clue_civilian_baseline": 0.05578649409288975,
"clue_civilian_raw_reward_mean": 0.038888888888888896,
"clue_civilian_votes_avg": 0.5833333333333333,
"clue_invalid_votes": 3.75,
"clue_na_votes": 0.75,
"clue_spy_adjusted_reward": 0.05069280585926233,
"clue_spy_advantage_adjustment": 0.16735947252592903,
"clue_spy_baseline": -0.16735947252592903,
"clue_spy_raw_reward": -0.11666666666666668,
"clue_spy_votes_received": 1.75,
"clue_suspicion_potential_psi": 1.1666666666666665,
"clue_total_valid_votes": 3.5,
"completion_length": 349.9765625,
"epoch": 0.5714285714285714,
"grad_norm": 0.8659637570381165,
"kl": 0.0020847320556640625,
"learning_rate": 1.4285714285714286e-06,
"loss": 0.0006,
"loss_interactive_phase": 0.0005855783383594826,
"reward": -0.09585936751868138,
"reward_original_clue_max": 0.12609553713896215,
"reward_original_clue_mean": -2.438185060966991e-09,
"reward_original_clue_min": -0.1539659667175763,
"reward_original_clue_std": 0.11102248687112745,
"reward_original_decision_max": 1.7175,
"reward_original_decision_mean": -0.3834375,
"reward_original_decision_min": -1.5,
"reward_original_decision_std": 1.318500391473726,
"reward_original_overall_mean": -0.1917187512190925,
"reward_original_overall_std": 1.0483758882247372,
"reward_std": 0.8920968279522142,
"step": 4
},
{
"advantages_interactive_phase": -9.313225746154785e-09,
"clip_ratio": 0.011668159277178347,
"clue_civilian_adjusted_reward_mean": 0.012703904980288123,
"clue_civilian_advantage_adjustment": -0.06090720613082299,
"clue_civilian_baseline": 0.060907206130823004,
"clue_civilian_raw_reward_mean": 0.07361111111111111,
"clue_civilian_votes_avg": 0.6666666666666666,
"clue_invalid_votes": 3.125,
"clue_na_votes": 0.0,
"clue_spy_adjusted_reward": -0.038111721525748175,
"clue_spy_advantage_adjustment": 0.18272161180758517,
"clue_spy_baseline": -0.18272161180758517,
"clue_spy_raw_reward": -0.22083333333333333,
"clue_spy_votes_received": 2.875,
"clue_suspicion_potential_psi": 2.208333333333334,
"clue_total_valid_votes": 4.875,
"completion_length": 454.71875,
"epoch": 0.7142857142857143,
"grad_norm": 1.5932543277740479,
"kl": 0.0032215118408203125,
"learning_rate": 1.7857142857142859e-06,
"loss": 0.0011,
"loss_interactive_phase": 0.0011206967756152153,
"reward": 0.020976557431831903,
"reward_original_clue_max": 0.14661833481616776,
"reward_original_clue_mean": -1.646220952741459e-09,
"reward_original_clue_min": -0.24318362303306112,
"reward_original_clue_std": 0.15305170150846256,
"reward_original_decision_max": 1.6425,
"reward_original_decision_mean": 0.08390625000000004,
"reward_original_decision_min": -1.5,
"reward_original_decision_std": 1.4370029970655516,
"reward_original_overall_mean": 0.04195312417688955,
"reward_original_overall_std": 1.1718515588438811,
"reward_std": 1.1204482532340438,
"step": 5
},
{
"advantages_interactive_phase": 1.3998942449688911e-08,
"clip_ratio": 0.010447208071127534,
"clue_civilian_adjusted_reward_mean": 0.019708324802807216,
"clue_civilian_advantage_adjustment": -0.06084723075274834,
"clue_civilian_baseline": 0.06084723075274835,
"clue_civilian_raw_reward_mean": 0.08055555555555557,
"clue_civilian_votes_avg": 0.5833333333333333,
"clue_invalid_votes": 1.875,
"clue_na_votes": 1.375,
"clue_spy_adjusted_reward": -0.05912497942333426,
"clue_spy_advantage_adjustment": 0.18254168724333242,
"clue_spy_baseline": -0.18254168724333242,
"clue_spy_raw_reward": -0.2416666666666667,
"clue_spy_votes_received": 3.0,
"clue_suspicion_potential_psi": 2.416666666666667,
"clue_total_valid_votes": 4.75,
"completion_length": 334.2265625,
"epoch": 0.8571428571428571,
"grad_norm": 0.8248350024223328,
"kl": 0.0023393630981445312,
"learning_rate": 2.1428571428571427e-06,
"loss": 0.0006,
"loss_interactive_phase": 0.0005622203752864152,
"reward": 0.0834375066860392,
"reward_original_clue_max": 0.1274395314956929,
"reward_original_clue_mean": -1.253728150484891e-09,
"reward_original_clue_min": -0.188569575974057,
"reward_original_clue_std": 0.12904785603086397,
"reward_original_decision_max": 2.1412500000000003,
"reward_original_decision_mean": 0.33375000000000005,
"reward_original_decision_min": -1.425,
"reward_original_decision_std": 1.405177486682367,
"reward_original_overall_mean": 0.16687499937313596,
"reward_original_overall_std": 1.1144284606169126,
"reward_std": 0.9262521229778956,
"step": 6
},
{
"advantages_interactive_phase": 2.421438694000244e-08,
"clip_ratio": 0.011843746062368155,
"clue_civilian_adjusted_reward_mean": 0.06142050400483314,
"clue_civilian_advantage_adjustment": -0.09830171821738909,
"clue_civilian_baseline": 0.0983017182173891,
"clue_civilian_raw_reward_mean": 0.15972222222222224,
"clue_civilian_votes_avg": 0.5833333333333333,
"clue_invalid_votes": 0.625,
"clue_na_votes": 0.25,
"clue_spy_adjusted_reward": -0.18426152068943952,
"clue_spy_advantage_adjustment": 0.2949051459772272,
"clue_spy_baseline": -0.2949051459772272,
"clue_spy_raw_reward": -0.47916666666666674,
"clue_spy_votes_received": 5.375,
"clue_suspicion_potential_psi": 4.791666666666667,
"clue_total_valid_votes": 7.125,
"completion_length": 403.921875,
"epoch": 1.0,
"grad_norm": 1.83256995677948,
"kl": 0.008373260498046875,
"learning_rate": 2.5e-06,
"loss": 0.0082,
"loss_interactive_phase": 0.008202007971704006,
"reward": 0.33738282406500975,
"reward_original_clue_max": 0.16317140174540612,
"reward_original_clue_mean": -2.1687350280430798e-09,
"reward_original_clue_min": -0.2651790850966792,
"reward_original_clue_std": 0.1737400336164854,
"reward_original_decision_max": 2.1412500000000003,
"reward_original_decision_mean": 1.34953125,
"reward_original_decision_min": -1.05,
"reward_original_decision_std": 1.2417606105114989,
"reward_original_overall_mean": 0.6747656239156326,
"reward_original_overall_std": 1.2304992480944565,
"reward_std": 1.149772112760493,
"step": 7
},
{
"advantages_interactive_phase": 7.101334631443024e-09,
"clip_ratio": 0.010323125752620399,
"clue_civilian_adjusted_reward_mean": 0.014969077351818358,
"clue_civilian_advantage_adjustment": -0.13919758931484832,
"clue_civilian_baseline": 0.13919758931484832,
"clue_civilian_raw_reward_mean": 0.1541666666666667,
"clue_civilian_votes_avg": 0.6249999999999999,
"clue_invalid_votes": 0.5,
"clue_na_votes": 0.375,
"clue_spy_adjusted_reward": -0.0449072601736762,
"clue_spy_advantage_adjustment": 0.41759273982632383,
"clue_spy_baseline": -0.41759273982632383,
"clue_spy_raw_reward": -0.4625,
"clue_spy_votes_received": 5.25,
"clue_suspicion_potential_psi": 4.625,
"clue_total_valid_votes": 7.125,
"completion_length": 296.3125,
"epoch": 1.1428571428571428,
"grad_norm": 0.8603692054748535,
"kl": 0.00751495361328125,
"learning_rate": 2.8571428571428573e-06,
"loss": 0.0006,
"loss_interactive_phase": 0.0005805297987535596,
"reward": 0.32882812679327855,
"reward_original_clue_max": 0.14714505206208714,
"reward_original_clue_mean": -7.029555287255823e-09,
"reward_original_clue_min": -0.25208323488394496,
"reward_original_clue_std": 0.1584445317000752,
"reward_original_decision_max": 2.49,
"reward_original_decision_mean": 1.3153125,
"reward_original_decision_min": -1.05,
"reward_original_decision_std": 1.3998854722184924,
"reward_original_overall_mean": 0.6576562464852225,
"reward_original_overall_std": 1.2712911404891047,
"reward_std": 1.006177807237865,
"step": 8
},
{
"advantages_interactive_phase": 8.754432201385498e-08,
"clip_ratio": 0.00764932727906853,
"clue_civilian_adjusted_reward_mean": -0.04392835440414705,
"clue_civilian_advantage_adjustment": -0.12309502107081373,
"clue_civilian_baseline": 0.12309502107081373,
"clue_civilian_raw_reward_mean": 0.07916666666666668,
"clue_civilian_votes_avg": 1.25,
"clue_invalid_votes": 0.375,
"clue_na_votes": 0.25,
"clue_spy_adjusted_reward": 0.13178503837842287,
"clue_spy_advantage_adjustment": 0.36928503837842286,
"clue_spy_baseline": -0.36928503837842286,
"clue_spy_raw_reward": -0.23750000000000002,
"clue_spy_votes_received": 3.625,
"clue_suspicion_potential_psi": 2.375,
"clue_total_valid_votes": 7.375,
"completion_length": 383.28125,
"epoch": 1.2857142857142856,
"grad_norm": 1.2030161619186401,
"kl": 0.021331787109375,
"learning_rate": 3.2142857142857147e-06,
"loss": 0.0013,
"loss_interactive_phase": 0.0013305692409630865,
"reward": 0.15667972972003488,
"reward_original_clue_max": 0.31233062085321145,
"reward_original_clue_mean": -6.208504575678717e-09,
"reward_original_clue_min": -0.36730352417905254,
"reward_original_clue_std": 0.2591782484917472,
"reward_original_decision_max": 1.3687500000000001,
"reward_original_decision_mean": 0.6267187500000002,
"reward_original_decision_min": -0.7012499999999999,
"reward_original_decision_std": 0.7808322541858488,
"reward_original_overall_mean": 0.31335937189574775,
"reward_original_overall_std": 0.9976451254390301,
"reward_std": 0.8997144106280117,
"step": 9
},
{
"advantages_interactive_phase": -5.820766091346741e-10,
"clip_ratio": 0.009187435440253466,
"clue_civilian_adjusted_reward_mean": 0.03822758988085955,
"clue_civilian_advantage_adjustment": -0.11732796567469601,
"clue_civilian_baseline": 0.11732796567469601,
"clue_civilian_raw_reward_mean": 0.15555555555555556,
"clue_civilian_votes_avg": 0.7083333333333333,
"clue_invalid_votes": 0.125,
"clue_na_votes": 0.375,
"clue_spy_adjusted_reward": -0.11468278722393674,
"clue_spy_advantage_adjustment": 0.35198387944273,
"clue_spy_baseline": -0.35198387944273,
"clue_spy_raw_reward": -0.4666666666666667,
"clue_spy_votes_received": 5.375,
"clue_suspicion_potential_psi": 4.666666666666667,
"clue_total_valid_votes": 7.5,
"completion_length": 267.59375,
"epoch": 1.4285714285714286,
"grad_norm": 1.3427386283874512,
"kl": 0.0283355712890625,
"learning_rate": 3.5714285714285718e-06,
"loss": 0.002,
"loss_interactive_phase": 0.0019903209613403305,
"reward": 0.34910156111012686,
"reward_original_clue_max": 0.21486971111570505,
"reward_original_clue_mean": -4.395339522032804e-09,
"reward_original_clue_min": -0.3329915751254489,
"reward_original_clue_std": 0.2213489408298035,
"reward_original_decision_max": 2.1412500000000003,
"reward_original_decision_mean": 1.39640625,
"reward_original_decision_min": -0.052499999999999936,
"reward_original_decision_std": 0.777539948847112,
"reward_original_overall_mean": 0.6982031228023303,
"reward_original_overall_std": 1.1362766329525933,
"reward_std": 0.8716088880016916,
"step": 10
},
{
"advantages_interactive_phase": 7.450580596923828e-09,
"clip_ratio": 0.007512057200074196,
"clue_civilian_adjusted_reward_mean": 0.029820853200277782,
"clue_civilian_advantage_adjustment": -0.14240136902194445,
"clue_civilian_baseline": 0.14240136902194445,
"clue_civilian_raw_reward_mean": 0.17222222222222225,
"clue_civilian_votes_avg": 0.45833333333333337,
"clue_invalid_votes": 0.375,
"clue_na_votes": 0.625,
"clue_spy_adjusted_reward": -0.08946257515700651,
"clue_spy_advantage_adjustment": 0.42720409150966016,
"clue_spy_baseline": -0.42720409150966016,
"clue_spy_raw_reward": -0.5166666666666667,
"clue_spy_votes_received": 5.625,
"clue_suspicion_potential_psi": 5.166666666666667,
"clue_total_valid_votes": 7.0,
"completion_length": 345.90625,
"epoch": 1.5714285714285714,
"grad_norm": 1.4723293781280518,
"kl": 0.03619384765625,
"learning_rate": 3.928571428571429e-06,
"loss": 0.0017,
"loss_interactive_phase": 0.0016628647717880085,
"reward": 0.37558594025302955,
"reward_original_clue_max": 0.16687582015889904,
"reward_original_clue_mean": -3.8890432892298155e-09,
"reward_original_clue_min": -0.2556842087822944,
"reward_original_clue_std": 0.1723200812438524,
"reward_original_decision_max": 2.49,
"reward_original_decision_mean": 1.50234375,
"reward_original_decision_min": -0.1274999999999999,
"reward_original_decision_std": 0.9362325016572549,
"reward_original_overall_mean": 0.7511718730554785,
"reward_original_overall_std": 1.2049933860634257,
"reward_std": 1.0033885483907903,
"step": 11
},
{
"advantages_interactive_phase": 4.249159246683121e-09,
"clip_ratio": 0.00809995060262736,
"clue_civilian_adjusted_reward_mean": 0.022188266754369242,
"clue_civilian_advantage_adjustment": -0.187533955467853,
"clue_civilian_baseline": 0.187533955467853,
"clue_civilian_raw_reward_mean": 0.20972222222222225,
"clue_civilian_votes_avg": 0.33333333333333337,
"clue_invalid_votes": 0.125,
"clue_na_votes": 0.25,
"clue_spy_adjusted_reward": -0.0665648172282207,
"clue_spy_advantage_adjustment": 0.5626018494384459,
"clue_spy_baseline": -0.5626018494384459,
"clue_spy_raw_reward": -0.6291666666666668,
"clue_spy_votes_received": 6.625,
"clue_suspicion_potential_psi": 6.291666666666667,
"clue_total_valid_votes": 7.625,
"completion_length": 252.8515625,
"epoch": 1.7142857142857144,
"grad_norm": 0.6337663531303406,
"kl": 0.0308074951171875,
"learning_rate": 4.2857142857142855e-06,
"loss": 0.0016,
"loss_interactive_phase": 0.0016085498355096206,
"reward": 0.4791796885642602,
"reward_original_clue_max": 0.13644402464982577,
"reward_original_clue_mean": -4.2412782478781774e-09,
"reward_original_clue_min": -0.2099872417903439,
"reward_original_clue_std": 0.13869593786155765,
"reward_original_decision_max": 2.49,
"reward_original_decision_mean": 1.91671875,
"reward_original_decision_min": 0.2962500000000001,
"reward_original_decision_std": 0.7221503219446256,
"reward_original_overall_mean": 0.9583593728793611,
"reward_original_overall_std": 1.2329093618898852,
"reward_std": 0.8671444384671382,
"step": 12
},
{
"advantages_interactive_phase": 1.862645149230957e-08,
"clip_ratio": 0.0037154321908019483,
"clue_civilian_adjusted_reward_mean": -0.0063334161387741394,
"clue_civilian_advantage_adjustment": -0.17300008280544082,
"clue_civilian_baseline": 0.1730000828054408,
"clue_civilian_raw_reward_mean": 0.16666666666666663,
"clue_civilian_votes_avg": 0.75,
"clue_invalid_votes": 0.0,
"clue_na_votes": 0.0,
"clue_spy_adjusted_reward": 0.019000245031720445,
"clue_spy_advantage_adjustment": 0.5190002450317205,
"clue_spy_baseline": -0.5190002450317205,
"clue_spy_raw_reward": -0.5,
"clue_spy_votes_received": 5.75,
"clue_suspicion_potential_psi": 5.000000000000001,
"clue_total_valid_votes": 8.0,
"completion_length": 337.15625,
"epoch": 1.8571428571428572,
"grad_norm": 0.6522341370582581,
"kl": 0.060089111328125,
"learning_rate": 4.642857142857144e-06,
"loss": 0.0026,
"loss_interactive_phase": 0.002614069191622548,
"reward": 0.3841406341016882,
"reward_original_clue_max": 0.2507983958494952,
"reward_original_clue_mean": -8.461504942894327e-10,
"reward_original_clue_min": -0.3617781823290228,
"reward_original_clue_std": 0.23592987163167928,
"reward_original_decision_max": 1.6425,
"reward_original_decision_mean": 1.5365625,
"reward_original_decision_min": 1.21875,
"reward_original_decision_std": 0.183489132426828,
"reward_original_overall_mean": 0.7682812495769249,
"reward_original_overall_std": 1.0923944210890442,
"reward_std": 0.6130125272216156,
"step": 13
},
{
"advantages_interactive_phase": 1.6996636986732483e-08,
"clip_ratio": 0.00847951346077025,
"clue_civilian_adjusted_reward_mean": -0.008037120541629665,
"clue_civilian_advantage_adjustment": -0.1774815649860741,
"clue_civilian_baseline": 0.1774815649860741,
"clue_civilian_raw_reward_mean": 0.16944444444444443,
"clue_civilian_votes_avg": 0.6666666666666667,
"clue_invalid_votes": 0.0,
"clue_na_votes": 0.25,
"clue_spy_adjusted_reward": 0.024111363006878495,
"clue_spy_advantage_adjustment": 0.5324446963402119,
"clue_spy_baseline": -0.5324446963402119,
"clue_spy_raw_reward": -0.5083333333333333,
"clue_spy_votes_received": 5.75,
"clue_suspicion_potential_psi": 5.083333333333334,
"clue_total_valid_votes": 7.75,
"completion_length": 263.8359375,
"epoch": 2.0,
"grad_norm": 0.6224793195724487,
"kl": 0.05419921875,
"learning_rate": 5e-06,
"loss": 0.0029,
"loss_interactive_phase": 0.0029011927836108953,
"reward": 0.388828133584693,
"reward_original_clue_max": 0.2085423028111688,
"reward_original_clue_mean": 3.4549737831963967e-10,
"reward_original_clue_min": -0.2378369221284988,
"reward_original_clue_std": 0.17641129318052418,
"reward_original_decision_max": 2.1412500000000003,
"reward_original_decision_mean": 1.5553125000000003,
"reward_original_decision_min": 0.3712500000000002,
"reward_original_decision_std": 0.668291458945668,
"reward_original_overall_mean": 0.777656250172749,
"reward_original_overall_std": 1.157252940433482,
"reward_std": 0.8331386712800759,
"step": 14
},
{
"advantages_interactive_phase": 1.6763806343078613e-08,
"clip_ratio": 0.003465894202236086,
"clue_civilian_adjusted_reward_mean": 0.023268842590345305,
"clue_civilian_advantage_adjustment": -0.18367560185409915,
"clue_civilian_baseline": 0.18367560185409915,
"clue_civilian_raw_reward_mean": 0.20694444444444446,
"clue_civilian_votes_avg": 0.41666666666666663,
"clue_invalid_votes": 0.125,
"clue_na_votes": 0.0,
"clue_spy_adjusted_reward": -0.06980651791005336,
"clue_spy_advantage_adjustment": 0.55102681542328,
"clue_spy_baseline": -0.55102681542328,
"clue_spy_raw_reward": -0.6208333333333333,
"clue_spy_votes_received": 6.625,
"clue_suspicion_potential_psi": 6.208333333333333,
"clue_total_valid_votes": 7.875,
"completion_length": 355.0625,
"epoch": 2.142857142857143,
"grad_norm": 0.7302604913711548,
"kl": 0.08294677734375,
"learning_rate": 5.357142857142857e-06,
"loss": 0.0034,
"loss_interactive_phase": 0.0033593956904951483,
"reward": 0.4744921964982146,
"reward_original_clue_max": 0.15940368854312534,
"reward_original_clue_mean": 2.465245637836566e-09,
"reward_original_clue_min": -0.24915193754399267,
"reward_original_clue_std": 0.16536223128526092,
"reward_original_decision_max": 2.06625,
"reward_original_decision_mean": 1.8979687500000002,
"reward_original_decision_min": 1.14375,
"reward_original_decision_std": 0.34843519072601103,
"reward_original_overall_mean": 0.9489843762326229,
"reward_original_overall_std": 1.1879908640849528,
"reward_std": 0.7276260579460828,
"step": 15
},
{
"advantages_interactive_phase": -5.820766091346741e-09,
"clip_ratio": 0.006389049158315174,
"clue_civilian_adjusted_reward_mean": -0.046890086079656224,
"clue_civilian_advantage_adjustment": -0.1635567527463229,
"clue_civilian_baseline": 0.1635567527463229,
"clue_civilian_raw_reward_mean": 0.11666666666666668,
"clue_civilian_votes_avg": 0.625,
"clue_invalid_votes": 0.125,
"clue_na_votes": 1.875,
"clue_spy_adjusted_reward": 0.14067026040217548,
"clue_spy_advantage_adjustment": 0.49067026040217543,
"clue_spy_baseline": -0.49067026040217543,
"clue_spy_raw_reward": -0.35000000000000003,
"clue_spy_votes_received": 4.125,
"clue_suspicion_potential_psi": 3.5,
"clue_total_valid_votes": 6.0,
"completion_length": 298.203125,
"epoch": 2.2857142857142856,
"grad_norm": 18.787372589111328,
"kl": 0.0643310546875,
"learning_rate": 5.7142857142857145e-06,
"loss": 0.0227,
"loss_interactive_phase": 0.02273198706097901,
"reward": 0.2448046847248174,
"reward_original_clue_max": 0.31508776732274035,
"reward_original_clue_mean": 5.408017063682213e-10,
"reward_original_clue_min": -0.296307593000221,
"reward_original_clue_std": 0.24153634073282915,
"reward_original_decision_max": 1.7925,
"reward_original_decision_mean": 0.97921875,
"reward_original_decision_min": 0.37125000000000014,
"reward_original_decision_std": 0.4794102853203757,
"reward_original_overall_mean": 0.4896093752704009,
"reward_original_overall_std": 0.942394237518095,
"reward_std": 0.732489835712951,
"step": 16
},
{
"advantages_interactive_phase": 3.91155481338501e-08,
"clip_ratio": 0.003673899220302701,
"clue_civilian_adjusted_reward_mean": 0.04772389224517483,
"clue_civilian_advantage_adjustment": -0.16199832997704738,
"clue_civilian_baseline": 0.16199832997704738,
"clue_civilian_raw_reward_mean": 0.2097222222222222,
"clue_civilian_votes_avg": 0.20833333333333331,
"clue_invalid_votes": 0.375,
"clue_na_votes": 0.5,
"clue_spy_adjusted_reward": -0.1431716761674981,
"clue_spy_advantage_adjustment": 0.4859949904991686,
"clue_spy_baseline": -0.4859949904991686,
"clue_spy_raw_reward": -0.6291666666666668,
"clue_spy_votes_received": 6.5,
"clue_suspicion_potential_psi": 6.291666666666667,
"clue_total_valid_votes": 7.125,
"completion_length": 363.796875,
"epoch": 2.4285714285714284,
"grad_norm": 1.1137752532958984,
"kl": 0.099365234375,
"learning_rate": 6.071428571428571e-06,
"loss": 0.0042,
"loss_interactive_phase": 0.0041947553982026875,
"reward": 0.4659375195932758,
"reward_original_clue_max": 0.13827404245616234,
"reward_original_clue_mean": 1.420066000554221e-10,
"reward_original_clue_min": -0.25038849304515226,
"reward_original_clue_std": 0.16193395395069143,
"reward_original_decision_max": 2.1412500000000003,
"reward_original_decision_mean": 1.86375,
"reward_original_decision_min": 0.7200000000000001,
"reward_original_decision_std": 0.50161616427093,
"reward_original_overall_mean": 0.9318750000710034,
"reward_original_overall_std": 1.1870721598954623,
"reward_std": 0.8607973168537828,
"step": 17
},
{
"advantages_interactive_phase": 2.837623469531536e-09,
"clip_ratio": 0.00735139346215874,
"clue_civilian_adjusted_reward_mean": -0.0017000992579862158,
"clue_civilian_advantage_adjustment": -0.19475565481354176,
"clue_civilian_baseline": 0.19475565481354176,
"clue_civilian_raw_reward_mean": 0.19305555555555554,
"clue_civilian_votes_avg": 0.20833333333333331,
"clue_invalid_votes": 0.5,
"clue_na_votes": 0.875,
"clue_spy_adjusted_reward": 0.0051002900798029355,
"clue_spy_advantage_adjustment": 0.5842669567464696,
"clue_spy_baseline": -0.5842669567464696,
"clue_spy_raw_reward": -0.5791666666666666,
"clue_spy_votes_received": 6.0,
"clue_suspicion_potential_psi": 5.791666666666667,
"clue_total_valid_votes": 6.625,
"completion_length": 296.6875,
"epoch": 2.571428571428571,
"grad_norm": 0.6697026491165161,
"kl": 0.083984375,
"learning_rate": 6.4285714285714295e-06,
"loss": 0.0038,
"loss_interactive_phase": 0.003771388641325757,
"reward": 0.417656250937927,
"reward_original_clue_max": 0.112484931462635,
"reward_original_clue_mean": -1.923538928399332e-09,
"reward_original_clue_min": -0.12320458815006033,
"reward_original_clue_std": 0.09749921717912478,
"reward_original_decision_max": 2.49,
"reward_original_decision_mean": 1.6706250000000002,
"reward_original_decision_min": -0.27749999999999997,
"reward_original_decision_std": 1.025463516531797,
"reward_original_overall_mean": 0.8353124990382306,
"reward_original_overall_std": 1.2562141688103865,
"reward_std": 0.9142232616213656,
"step": 18
},
{
"advantages_interactive_phase": 2.421438694000244e-08,
"clip_ratio": 0.00690445041982457,
"clue_civilian_adjusted_reward_mean": -0.02088481382776897,
"clue_civilian_advantage_adjustment": -0.19588481382776896,
"clue_civilian_baseline": 0.19588481382776896,
"clue_civilian_raw_reward_mean": 0.17500000000000002,
"clue_civilian_votes_avg": 0.375,
"clue_invalid_votes": 0.125,
"clue_na_votes": 1.125,
"clue_spy_adjusted_reward": 0.06265442930658618,
"clue_spy_advantage_adjustment": 0.5876544293065862,
"clue_spy_baseline": -0.5876544293065862,
"clue_spy_raw_reward": -0.525,
"clue_spy_votes_received": 5.625,
"clue_suspicion_potential_psi": 5.249999999999999,
"clue_total_valid_votes": 6.75,
"completion_length": 345.75,
"epoch": 2.7142857142857144,
"grad_norm": 1.3586255311965942,
"kl": 0.135986328125,
"learning_rate": 6.785714285714287e-06,
"loss": 0.006,
"loss_interactive_phase": 0.00598463078495115,
"reward": 0.38964844884614847,
"reward_original_clue_max": 0.19217094683455577,
"reward_original_clue_mean": -3.044180179635758e-09,
"reward_original_clue_min": -0.18790133135573855,
"reward_original_clue_std": 0.15165388026680496,
"reward_original_decision_max": 2.1412500000000003,
"reward_original_decision_mean": 1.55859375,
"reward_original_decision_min": 0.3712500000000002,
"reward_original_decision_std": 0.6276860023276215,
"reward_original_overall_mean": 0.77929687347791,
"reward_original_overall_std": 1.1151671426286367,
"reward_std": 0.8916601099963023,
"step": 19
},
{
"advantages_interactive_phase": 1.1408701539039612e-08,
"clip_ratio": 0.0066332001879345626,
"clue_civilian_adjusted_reward_mean": -0.014852289498390544,
"clue_civilian_advantage_adjustment": -0.15790784505394606,
"clue_civilian_baseline": 0.1579078450539461,
"clue_civilian_raw_reward_mean": 0.14305555555555555,
"clue_civilian_votes_avg": 0.7083333333333334,
"clue_invalid_votes": 0.0,
"clue_na_votes": 0.875,
"clue_spy_adjusted_reward": 0.04455685833978199,
"clue_spy_advantage_adjustment": 0.4737235250064487,
"clue_spy_baseline": -0.47372352500644876,
"clue_spy_raw_reward": -0.4291666666666667,
"clue_spy_votes_received": 5.0,
"clue_suspicion_potential_psi": 4.291666666666667,
"clue_total_valid_votes": 7.125,
"completion_length": 291.84375,
"epoch": 2.857142857142857,
"grad_norm": 0.7186980247497559,
"kl": 0.120361328125,
"learning_rate": 7.1428571428571436e-06,
"loss": 0.0053,
"loss_interactive_phase": 0.005272059410344809,
"reward": 0.321093755069639,
"reward_original_clue_max": 0.23781143176053088,
"reward_original_clue_mean": -2.538847402276667e-09,
"reward_original_clue_min": -0.3372735295858061,
"reward_original_clue_std": 0.22314844267377926,
"reward_original_decision_max": 2.1412500000000003,
"reward_original_decision_mean": 1.2843750000000003,
"reward_original_decision_min": 0.44625000000000015,
"reward_original_decision_std": 0.682584747416193,
"reward_original_overall_mean": 0.6421874987305765,
"reward_original_overall_std": 1.0984288921667873,
"reward_std": 0.8115738822191327,
"step": 20
},
{
"advantages_interactive_phase": 1.862645149230957e-09,
"clip_ratio": 0.011862117098644376,
"clue_civilian_adjusted_reward_mean": -0.011171407247594729,
"clue_civilian_advantage_adjustment": -0.1500602961364836,
"clue_civilian_baseline": 0.1500602961364836,
"clue_civilian_raw_reward_mean": 0.1388888888888889,
"clue_civilian_votes_avg": 0.4583333333333333,
"clue_invalid_votes": 0.125,
"clue_na_votes": 1.875,
"clue_spy_adjusted_reward": 0.03351421315180729,
"clue_spy_advantage_adjustment": 0.450180879818474,
"clue_spy_baseline": -0.450180879818474,
"clue_spy_raw_reward": -0.41666666666666674,
"clue_spy_votes_received": 4.625,
"clue_suspicion_potential_psi": 4.166666666666667,
"clue_total_valid_votes": 6.0,
"completion_length": 341.484375,
"epoch": 3.0,
"grad_norm": 2.092695713043213,
"kl": 0.1983642578125,
"learning_rate": 7.500000000000001e-06,
"loss": 0.0115,
"loss_interactive_phase": 0.011498338542878628,
"reward": 0.29777343789438654,
"reward_original_clue_max": 0.180558073555015,
"reward_original_clue_mean": -2.147744222486675e-09,
"reward_original_clue_min": -0.16238193431746906,
"reward_original_clue_std": 0.138092222998997,
"reward_original_decision_max": 1.7925,
"reward_original_decision_mean": 1.19109375,
"reward_original_decision_min": -0.8249999999999998,
"reward_original_decision_std": 1.0596252090888885,
"reward_original_overall_mean": 0.5955468739261279,
"reward_original_overall_std": 1.0734861999424956,
"reward_std": 1.0712655663327704,
"step": 21
},
{
"advantages_interactive_phase": 2.3166649043560028e-08,
"clip_ratio": 0.007374117994913831,
"clue_civilian_adjusted_reward_mean": -0.013323967362128385,
"clue_civilian_advantage_adjustment": -0.1397128562510173,
"clue_civilian_baseline": 0.1397128562510173,
"clue_civilian_raw_reward_mean": 0.1263888888888889,
"clue_civilian_votes_avg": 0.8333333333333333,
"clue_invalid_votes": 0.25,
"clue_na_votes": 0.625,
"clue_spy_adjusted_reward": 0.03997188807820764,
"clue_spy_advantage_adjustment": 0.4191385547448744,
"clue_spy_baseline": -0.4191385547448744,
"clue_spy_raw_reward": -0.3791666666666667,
"clue_spy_votes_received": 4.625,
"clue_suspicion_potential_psi": 3.7916666666666665,
"clue_total_valid_votes": 7.125,
"completion_length": 314.3828125,
"epoch": 3.142857142857143,
"grad_norm": 0.7247025966644287,
"kl": 0.14990234375,
"learning_rate": 7.857142857142858e-06,
"loss": 0.0065,
"loss_interactive_phase": 0.006495335896033794,
"reward": 0.27199219820781345,
"reward_original_clue_max": 0.20973680630488367,
"reward_original_clue_mean": -3.5020443879887897e-09,
"reward_original_clue_min": -0.25500620188084894,
"reward_original_clue_std": 0.17895936766287218,
"reward_original_decision_max": 2.06625,
"reward_original_decision_mean": 1.0879687500000002,
"reward_original_decision_min": -0.55125,
"reward_original_decision_std": 1.0594716886601967,
"reward_original_overall_mean": 0.5439843732489779,
"reward_original_overall_std": 1.143192035744574,
"reward_std": 0.8779618775670658,
"step": 22
},
{
"advantages_interactive_phase": 1.210719347000122e-08,
"clip_ratio": 0.009816105710342526,
"clue_civilian_adjusted_reward_mean": 0.012905466579270795,
"clue_civilian_advantage_adjustment": -0.13431675564295142,
"clue_civilian_baseline": 0.13431675564295142,
"clue_civilian_raw_reward_mean": 0.14722222222222223,
"clue_civilian_votes_avg": 0.8333333333333334,
"clue_invalid_votes": 0.25,
"clue_na_votes": 0.0,
"clue_spy_adjusted_reward": -0.03871641069401783,
"clue_spy_advantage_adjustment": 0.4029502559726489,
"clue_spy_baseline": -0.4029502559726489,
"clue_spy_raw_reward": -0.44166666666666665,
"clue_spy_votes_received": 5.25,
"clue_suspicion_potential_psi": 4.416666666666667,
"clue_total_valid_votes": 7.75,
"completion_length": 349.46875,
"epoch": 3.2857142857142856,
"grad_norm": 1.495042085647583,
"kl": 0.20458984375,
"learning_rate": 8.214285714285714e-06,
"loss": 0.0098,
"loss_interactive_phase": 0.009842151892371476,
"reward": 0.3264843803688339,
"reward_original_clue_max": 0.18625860761540888,
"reward_original_clue_mean": -2.7390513624575563e-09,
"reward_original_clue_min": -0.25603524557359103,
"reward_original_clue_std": 0.16967942428353516,
"reward_original_decision_max": 2.49,
"reward_original_decision_mean": 1.3059375000000002,
"reward_original_decision_min": -0.62625,
"reward_original_decision_std": 1.2001143050462124,
"reward_original_overall_mean": 0.6529687486304744,
"reward_original_overall_std": 1.2300136329570874,
"reward_std": 1.0827139959652952,
"step": 23
},
{
"advantages_interactive_phase": 2.0721927285194397e-08,
"clip_ratio": 0.007756809587590396,
"clue_civilian_adjusted_reward_mean": -0.04694887868266013,
"clue_civilian_advantage_adjustment": -0.11500443423821569,
"clue_civilian_baseline": 0.11500443423821569,
"clue_civilian_raw_reward_mean": 0.06805555555555555,
"clue_civilian_votes_avg": 1.3333333333333333,
"clue_invalid_votes": 0.5,
"clue_na_votes": 0.125,
"clue_spy_adjusted_reward": 0.14084662848473525,
"clue_spy_advantage_adjustment": 0.345013295151402,
"clue_spy_baseline": -0.345013295151402,
"clue_spy_raw_reward": -0.20416666666666672,
"clue_spy_votes_received": 3.375,
"clue_suspicion_potential_psi": 2.041666666666667,
"clue_total_valid_votes": 7.375,
"completion_length": 362.4375,
"epoch": 3.4285714285714284,
"grad_norm": 0.721821129322052,
"kl": 0.15087890625,
"learning_rate": 8.571428571428571e-06,
"loss": 0.0064,
"loss_interactive_phase": 0.006402550439815968,
"reward": 0.12550782238826086,
"reward_original_clue_max": 0.2615399846222246,
"reward_original_clue_mean": -1.8908112909589436e-09,
"reward_original_clue_min": -0.29680578730801793,
"reward_original_clue_std": 0.21245343365808805,
"reward_original_decision_max": 2.06625,
"reward_original_decision_mean": 0.50203125,
"reward_original_decision_min": -1.125,
"reward_original_decision_std": 1.2899370730874167,
"reward_original_overall_mean": 0.25101562405459443,
"reward_original_overall_std": 1.1078002049178977,
"reward_std": 0.8967364060953804,
"step": 24
},
{
"advantages_interactive_phase": 1.1175870895385742e-08,
"clip_ratio": 0.0030952056986279786,
"clue_civilian_adjusted_reward_mean": 0.05641245992546336,
"clue_civilian_advantage_adjustment": -0.1352542067412033,
"clue_civilian_baseline": 0.1352542067412033,
"clue_civilian_raw_reward_mean": 0.19166666666666668,
"clue_civilian_votes_avg": 0.5,
"clue_invalid_votes": 0.25,
"clue_na_votes": 0.0,
"clue_spy_adjusted_reward": -0.16923738346885245,
"clue_spy_advantage_adjustment": 0.40576261653114754,
"clue_spy_baseline": -0.40576261653114754,
"clue_spy_raw_reward": -0.575,
"clue_spy_votes_received": 6.25,
"clue_suspicion_potential_psi": 5.75,
"clue_total_valid_votes": 7.75,
"completion_length": 378.28125,
"epoch": 3.571428571428571,
"grad_norm": 1.2147719860076904,
"kl": 0.2017822265625,
"learning_rate": 8.92857142857143e-06,
"loss": 0.0086,
"loss_interactive_phase": 0.008552196552045643,
"reward": 0.4324218803571566,
"reward_original_clue_max": 0.17644798867648273,
"reward_original_clue_mean": -9.231155991516748e-10,
"reward_original_clue_min": -0.3267729122198718,
"reward_original_clue_std": 0.20414503627659822,
"reward_original_decision_max": 2.49,
"reward_original_decision_mean": 1.7296875000000003,
"reward_original_decision_min": 0.645,
"reward_original_decision_std": 0.6397808895425074,
"reward_original_overall_mean": 0.8648437495384423,
"reward_original_overall_std": 1.219408722443206,
"reward_std": 0.876965590677074,
"step": 25
},
{
"advantages_interactive_phase": 2.421438694000244e-08,
"clip_ratio": 0.005824881722219288,
"clue_civilian_adjusted_reward_mean": -0.007093781877094507,
"clue_civilian_advantage_adjustment": -0.13209378187709453,
"clue_civilian_baseline": 0.13209378187709453,
"clue_civilian_raw_reward_mean": 0.125,
"clue_civilian_votes_avg": 1.0,
"clue_invalid_votes": 0.25,
"clue_na_votes": 0.0,
"clue_spy_adjusted_reward": 0.021281343083350023,
"clue_spy_advantage_adjustment": 0.39628134308335006,
"clue_spy_baseline": -0.39628134308335006,
"clue_spy_raw_reward": -0.375,
"clue_spy_votes_received": 4.75,
"clue_suspicion_potential_psi": 3.7499999999999996,
"clue_total_valid_votes": 7.75,
"completion_length": 333.84375,
"epoch": 3.7142857142857144,
"grad_norm": 0.8488478660583496,
"kl": 0.18359375,
"learning_rate": 9.285714285714288e-06,
"loss": 0.0078,
"loss_interactive_phase": 0.007767571427393705,
"reward": 0.27351563694794767,
"reward_original_clue_max": 0.25575890463160234,
"reward_original_clue_mean": -6.369833735996033e-10,
"reward_original_clue_min": -0.3290713434253468,
"reward_original_clue_std": 0.22778135711245912,
"reward_original_decision_max": 2.06625,
"reward_original_decision_mean": 1.0940625000000002,
"reward_original_decision_min": -0.1274999999999999,
"reward_original_decision_std": 0.8632276294383053,
"reward_original_overall_mean": 0.5470312496815084,
"reward_original_overall_std": 1.1264277840598063,
"reward_std": 0.8660217167160729,
"step": 26
},
{
"advantages_interactive_phase": -7.450580596923828e-09,
"clip_ratio": 0.0031035091960802674,
"clue_civilian_adjusted_reward_mean": 0.03245208673288981,
"clue_civilian_advantage_adjustment": -0.1689368021559991,
"clue_civilian_baseline": 0.1689368021559991,
"clue_civilian_raw_reward_mean": 0.2013888888888889,
"clue_civilian_votes_avg": 0.4583333333333333,
"clue_invalid_votes": 0.125,
"clue_na_votes": 0.0,
"clue_spy_adjusted_reward": -0.09735625958223877,
"clue_spy_advantage_adjustment": 0.5068104070844279,
"clue_spy_baseline": -0.5068104070844279,
"clue_spy_raw_reward": -0.6041666666666666,
"clue_spy_votes_received": 6.5,
"clue_suspicion_potential_psi": 6.041666666666667,
"clue_total_valid_votes": 7.875,
"completion_length": 306.40625,
"epoch": 3.857142857142857,
"grad_norm": 1.9208674430847168,
"kl": 0.3026123046875,
"learning_rate": 9.642857142857144e-06,
"loss": 0.0125,
"loss_interactive_phase": 0.01246593298856169,
"reward": 0.46124999631323677,
"reward_original_clue_max": 0.1545202636139022,
"reward_original_clue_mean": 1.5410765849196983e-10,
"reward_original_clue_min": -0.24859110312991783,
"reward_original_clue_std": 0.16558966332718913,
"reward_original_decision_max": 2.49,
"reward_original_decision_mean": 1.845,
"reward_original_decision_min": 0.7200000000000002,
"reward_original_decision_std": 0.7004523304839636,
"reward_original_overall_mean": 0.9225000000770541,
"reward_original_overall_std": 1.2500439582236307,
"reward_std": 0.8922832234684476,
"step": 27
},
{
"advantages_interactive_phase": 9.66247171163559e-09,
"clip_ratio": 0.004918134000035934,
"clue_civilian_adjusted_reward_mean": -0.03922444833702034,
"clue_civilian_advantage_adjustment": -0.168391115003687,
"clue_civilian_baseline": 0.168391115003687,
"clue_civilian_raw_reward_mean": 0.12916666666666665,
"clue_civilian_votes_avg": 1.0,
"clue_invalid_votes": 0.125,
"clue_na_votes": 0.0,
"clue_spy_adjusted_reward": 0.11767334568750121,
"clue_spy_advantage_adjustment": 0.5051733456875013,
"clue_spy_baseline": -0.5051733456875013,
"clue_spy_raw_reward": -0.3875000000000001,
"clue_spy_votes_received": 4.875,
"clue_suspicion_potential_psi": 3.8750000000000004,
"clue_total_valid_votes": 7.875,
"completion_length": 290.8359375,
"epoch": 4.0,
"grad_norm": 0.778830885887146,
"kl": 0.2080078125,
"learning_rate": 1e-05,
"loss": 0.0085,
"loss_interactive_phase": 0.008535244385711849,
"reward": 0.29144531737351337,
"reward_original_clue_max": 0.2624036269472061,
"reward_original_clue_mean": 1.691100571209192e-10,
"reward_original_clue_min": -0.2589547295967252,
"reward_original_clue_std": 0.20231139453953567,
"reward_original_decision_max": 2.49,
"reward_original_decision_mean": 1.16578125,
"reward_original_decision_min": -0.05249999999999988,
"reward_original_decision_std": 0.9492047780780623,
"reward_original_overall_mean": 0.582890625084555,
"reward_original_overall_std": 1.1522968480359421,
"reward_std": 0.8734021281821482,
"step": 28
},
{
"advantages_interactive_phase": 2.60770320892334e-08,
"clip_ratio": 0.004597238206770271,
"clue_civilian_adjusted_reward_mean": -0.017886474267241367,
"clue_civilian_advantage_adjustment": -0.13455314093390802,
"clue_civilian_baseline": 0.13455314093390805,
"clue_civilian_raw_reward_mean": 0.11666666666666667,
"clue_civilian_votes_avg": 1.125,
"clue_invalid_votes": 0.0,
"clue_na_votes": 0.0,
"clue_spy_adjusted_reward": 0.05365942077661086,
"clue_spy_advantage_adjustment": 0.4036594207766109,
"clue_spy_baseline": -0.4036594207766109,
"clue_spy_raw_reward": -0.35,
"clue_spy_votes_received": 4.625,
"clue_suspicion_potential_psi": 3.5,
"clue_total_valid_votes": 8.0,
"completion_length": 304.515625,
"epoch": 4.142857142857143,
"grad_norm": 1.3750234842300415,
"kl": 0.3173828125,
"learning_rate": 9.999611462404874e-06,
"loss": 0.0125,
"loss_interactive_phase": 0.012478827498853207,
"reward": 0.26496095041194656,
"reward_original_clue_max": 0.26774659185022603,
"reward_original_clue_mean": -5.062783100737156e-10,
"reward_original_clue_min": -0.31947364534085654,
"reward_original_clue_std": 0.23106213560541738,
"reward_original_decision_max": 2.06625,
"reward_original_decision_mean": 1.05984375,
"reward_original_decision_min": -0.05249999999999988,
"reward_original_decision_std": 0.8307205925127058,
"reward_original_overall_mean": 0.529921874746861,
"reward_original_overall_std": 1.1095292288061827,
"reward_std": 0.8888411754368172,
"step": 29
},
{
"advantages_interactive_phase": 3.259629011154175e-09,
"clip_ratio": 0.004411640649777837,
"clue_civilian_adjusted_reward_mean": -0.019376439787606455,
"clue_civilian_advantage_adjustment": -0.10826532867649533,
"clue_civilian_baseline": 0.10826532867649533,
"clue_civilian_raw_reward_mean": 0.08888888888888888,
"clue_civilian_votes_avg": 1.3333333333333333,
"clue_invalid_votes": 0.0,
"clue_na_votes": 0.0,
"clue_spy_adjusted_reward": 0.058129320628341516,
"clue_spy_advantage_adjustment": 0.3247959872950082,
"clue_spy_baseline": -0.3247959872950082,
"clue_spy_raw_reward": -0.2666666666666667,
"clue_spy_votes_received": 4.0,
"clue_suspicion_potential_psi": 2.6666666666666665,
"clue_total_valid_votes": 8.0,
"completion_length": 261.4453125,
"epoch": 4.285714285714286,
"grad_norm": 0.8253775238990784,
"kl": 0.2550048828125,
"learning_rate": 9.998445910004082e-06,
"loss": 0.0103,
"loss_interactive_phase": 0.010343844187445939,
"reward": 0.19875000170890966,
"reward_original_clue_max": 0.2470865782057527,
"reward_original_clue_mean": 3.1638053678631484e-10,
"reward_original_clue_min": -0.258379067343329,
"reward_original_clue_std": 0.19545089761013185,
"reward_original_decision_max": 2.06625,
"reward_original_decision_mean": 0.7949999999999999,
"reward_original_decision_min": -0.47625000000000006,
"reward_original_decision_std": 1.1225616644698808,
"reward_original_overall_mean": 0.3975000001581903,
"reward_original_overall_std": 1.1179099397348429,
"reward_std": 0.8648673956089749,
"step": 30
}
],
"logging_steps": 1.0,
"max_steps": 280,
"num_input_tokens_seen": 0,
"num_train_epochs": 40,
"save_steps": 5,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}