| exp_config = { | |
| 'type': 'ppo', | |
| 'on_policy': True, | |
| 'cuda': True, | |
| 'action_space': 'discrete', | |
| 'discount_factor': 0.99, | |
| 'gae_lambda': 0.95, | |
| 'epoch_per_collect': 1, | |
| 'batch_size': 320, | |
| 'learning_rate': 0.001, | |
| 'lr_scheduler': [2000, 0.1], | |
| 'weight_decay': 0, | |
| 'value_weight': 0.5, | |
| 'entropy_weight': 0.01, | |
| 'clip_ratio': 0.2, | |
| 'adv_norm': True, | |
| 'value_norm': 'baseline', | |
| 'ppo_param_init': True, | |
| 'grad_norm': 0.5, | |
| 'n_sample': 320, | |
| 'unroll_len': 1, | |
| 'deterministic_eval': True, | |
| 'model': { | |
| 'encoder_hidden_size_list': [64, 64, 128], | |
| 'actor_head_hidden_size': 128, | |
| 'critic_head_hidden_size': 128 | |
| }, | |
| 'cfg_type': 'PPOFPolicyDict', | |
| 'env_id': 'SpaceInvadersNoFrameskip-v4', | |
| 'exp_name': 'SpaceInvadersNoFrameskip-v4-PPO' | |
| } | |