| { | |
| "algorithm": "PPO", | |
| "policy": "MlpPolicy", | |
| "observation_space": "Box(-inf, inf, (3008,), float32)", | |
| "action_space": "Box(0.0, [2. 1.], (2,), float32)", | |
| "total_parameters": 393669, | |
| "trainable_parameters": 393669, | |
| "model_parameters": { | |
| "learning_rate": 0.0003, | |
| "gamma": 0.99, | |
| "verbose": 0, | |
| "seed": 42, | |
| "tensorboard_log": "experiments/MyTestExp/logs/tensorboard", | |
| "batch_size": 64, | |
| "n_epochs": 10, | |
| "clip_range": 0.2, | |
| "ent_coef": 0.0 | |
| }, | |
| "device": "cuda" | |
| } |