{ "algorithm": "PPO", "policy": "MlpPolicy", "observation_space": "Box(-inf, inf, (3008,), float32)", "action_space": "Box(0.0, [2. 1.], (2,), float32)", "total_parameters": 393669, "trainable_parameters": 393669, "model_parameters": { "learning_rate": 0.0003, "gamma": 0.99, "verbose": 0, "seed": 42, "tensorboard_log": "experiments/MyTestExp/logs/tensorboard", "batch_size": 64, "n_epochs": 10, "clip_range": 0.2, "ent_coef": 0.0 }, "device": "cuda" }