| !!python/object/apply:collections.OrderedDict | |
| - - - batch_size | |
| - 2048 | |
| - - buffer_size | |
| - 1000000 | |
| - - ent_coef | |
| - auto | |
| - - gamma | |
| - 0.95 | |
| - - learning_rate | |
| - 0.001 | |
| - - learning_starts | |
| - 100 | |
| - - n_timesteps | |
| - 5000000.0 | |
| - - normalize | |
| - true | |
| - - policy | |
| - MultiInputPolicy | |
| - - policy_kwargs | |
| - dict(net_arch=[512, 512, 512], n_critics=2) | |
| - - replay_buffer_class | |
| - HerReplayBuffer | |
| - - replay_buffer_kwargs | |
| - dict( goal_selection_strategy='future', n_sampled_goal=4 ) | |
| - - tau | |
| - 0.05 | |