{ "train_losses": [ -0.0008886720752343535, 0.0018749998416751623, -0.0016992188757285476, 0.0014062500558793545, -0.0004687501350417733, -2.929696347564459e-05, -0.0003417970729060471, 0.00013671873603016138, -0.001972656464204192, 0.0019824218470603228, 9.765609866008162e-05, -0.0004492188454605639, -0.006416015792638063, 0.0014746093656867743, -0.0011328125838190317, 0.00041992179467342794, 0.0007226562011055648, -0.0038378906901925802, -0.0029199221171438694, -0.0007714845705777407, 0.00020507810404524207, 0.0009277343633584678, 0.0010449218098074198, -0.0011523438151925802, 0.00015624993829987943, -0.000693359412252903, 0.0012792968191206455, -0.001796875149011612, -0.0028515628073364496, 0.00013671873603016138, -0.0008789062849245965, -0.002451172098517418, -0.00018554693087935448, -0.002089843852445483, -0.0010546875419095159, -0.002011719159781933, -0.0029492187313735485, -0.001757812686264515, -0.0020703126210719347, 0.0016406249487772584, -0.0018164063803851604, -0.0008203125325962901, -0.0011230469681322575, 0.0005957029643468559, -0.0005468751187436283, 4.8828020226210356e-05, -0.0007324219914153218, -0.0020703128539025784, -0.0009960937313735485, 0.00038085927371867, -2.9296934371814132e-05, -0.0025097657926380634, -0.0012402344727888703, -0.0011328125838190317, -0.003867187537252903, -0.0019042969215661287, -0.002158203162252903, -4.8828194849193096e-05, 0.0011621094308793545, -0.002871094038709998, 0.0007421874906867743, -0.001679687644354999, 3.9062462747097015e-05, -0.0018652346916496754, -0.006669921800494194, -0.0012500000884756446, 0.00041015614988282323, -0.0017382815713062882, -1.9531260477378964e-05, -0.0010839845053851604, -0.000820312590803951, -0.0018652344588190317, -0.001064453274011612, -0.0021093753166496754, -0.002910156501457095, 0.0013671874767169356, 6.835930980741978e-05, -0.0012109376257285476, -0.0013281251303851604, 0.0001074218307621777, -0.002001953311264515, -0.0029980470426380634, 3.9062462747097015e-05, 1.9531173165887594e-05, -0.0007617189548909664, -0.0020996094681322575, -0.0006542970659211278, -0.0020703128539025784, 0.0009179686894640326, -0.0018457032274454832, -0.0014550783671438694, 3.9062375435605645e-05, -0.002050781389698386, -0.0021289063151925802, -0.0016503906808793545, -0.0003515625139698386, -0.00020507819135673344, -0.0005859376396983862, -0.001015624962747097, -0.0008007813594304025, 0.0001269530621357262, -0.0007421876071020961, -0.0030859375838190317, 0.0011132812360301614, -0.0017675783019512892, -4.8828194849193096e-05, 0.0003710936871357262, -0.0012500000884756446, -0.003554687835276127, -0.0015332032926380634, -0.0014941407134756446, -0.0010351561941206455, -0.001601562718860805, -0.0005175783298909664, 0.0013281248975545168, 0.001015624962747097, -0.0021289063151925802, -0.0005371093284338713, 0.0015917968703433871, -0.0018164064968004823, -0.0019628906156867743, -0.002080078236758709, -4.882816574536264e-05, -0.0002441406832076609, -0.0024609374813735485, 0.0024218750186264515, -0.0006054689292795956, -0.0012109377421438694, 0.0010253905784338713, -0.0001269530621357262, 0.0017285156063735485, 0.0012890624348074198, 0.001376952975988388, 0.0008789062267169356, 0.0013671874767169356, -0.0024023440200835466, -0.0009570313850417733, 0.0014746092492714524, -0.003847656538709998, -0.0036914064548909664, 0.00017578122788108885, -0.0011132813524454832, -0.00025390629889443517, 7.812489639036357e-05, -0.0019433595007285476, -0.0008007813012227416, -0.00278320349752903, 0.0002636718563735485, 0.002041015774011612, -0.0008300782646983862, -0.0005566407926380634, 0.001943359267897904, -0.0008789063431322575, -0.0001562500256113708, -0.0032324218191206455, 5.859369412064552e-05, 4.882807843387127e-05, -0.0002636719145812094, 9.765612776391208e-05, -0.00042968758498318493, -0.0018945314222946763, -0.002119140699505806, -0.0029882811941206455, -6.835945532657206e-05, 0.0011230468517169356, -0.00020507816225290298, -0.0013183593982830644, -0.0011230469681322575, -0.0008984374580904841, -0.001972656464204192, 0.00025390618247911334, 0.0011035155039280653, -0.0020605470053851604, -0.0017773439176380634, 0.0002441405667923391, -0.00498046912252903, 0.0009570312686264515, -0.0021484375465661287, -0.0019433596171438694, 0.0002441405667923391, 0.0011816405458375812, 0.00019531240104697645, -0.0010058595798909664, -0.0022753907833248377, -0.0019824218470603228, 0.0008203124161809683, -0.0008105469751171768, -0.0014160156715661287, -0.0009082031901925802, 0.0008886718424037099, 0.0009863279992714524, -2.9296992579475045e-05, -0.003017578274011612, -0.0029199218843132257, -0.0020117186941206455 ], "test_losses": [ -0.1667999999999999, -0.014999999999999871, -0.12039999999999987, -0.0959999999999999, -0.1423999999999999, -0.32739999999999986, -0.2145999999999999, -0.3208, -0.24459999999999993, -0.21639999999999993, -0.049999999999999815, -0.07439999999999984, -0.12819999999999993, -0.10779999999999988, -0.08139999999999988, -0.13039999999999993, -0.1643999999999999, -0.2215999999999999, -0.2357999999999999 ], "config": { "training_type": "online_ppo", "environment": "MiniHack-Room-Random-15x15-v0", "total_timesteps": 195, "training_time": 4420.289131402969, "device": "cuda", "ppo_config": { "learning_rate": 0.0003, "n_epochs": 4, "gamma": 0.99, "vf_coef": 0.5, "ent_coef": 0.01, "max_grad_norm": 0.5 }, "exploration_config": { "use_curiosity": true, "curiosity_dyn": true, "curiosity_skill_entropy": true, "curiosity_skill_transition_novelty": true, "curiosity_dyn_coef": 0.03, "curiosity_hdp_coef": 0.002, "curiosity_stn_coef": 0.0005, "use_rnd": false, "rnd_lr": null, "rnd_coef": null }, "model_sources": { "vae_repo_id": "CatkinChen/nethack-vae-hmm", "hmm_repo_id": "CatkinChen/nethack-hmm" } }, "final_train_loss": -0.0020117186941206455, "final_test_loss": -0.2357999999999999, "total_epochs": 195, "best_train_loss": -0.006669921800494194, "best_test_loss": -0.32739999999999986 }