Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Meltingpot] Fine tuning PPO #78

Draft
wants to merge 8 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions benchmarl/environments/meltingpot/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ def get_env_fun(
return lambda: MeltingpotEnv(
substrate=self.name.lower(),
categorical_actions=True,
device=device,
**self.config,
)

Expand Down
72 changes: 72 additions & 0 deletions fine_tuned/meltingpot/conf/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
defaults:
- experiment: base_experiment
- algorithm: ippo
- task: meltingpot/commons_harvest__open
- model: layers/cnn
- model@critic_model: layers/cnn
- _self_

hydra:
searchpath:
# Tells hydra to add the default benchmarl configuration to its path
- pkg://benchmarl/conf

seed: 0

task:
max_steps: 200

model:
mlp_num_cells: [ 256, 256 ]

cnn_num_cells: [ 16, 32, 256 ]
cnn_kernel_sizes: [ 8, 4, 11 ]
cnn_strides: [4, 2, 1]
cnn_paddings: [2, 1, 5]
cnn_activation_class: torch.nn.ReLU

critic_model:
mlp_num_cells: [ 256, 256 ]

cnn_num_cells: [ 16, 32, 256 ]
cnn_kernel_sizes: [ 8, 4, 11 ]
cnn_strides: [ 4, 2, 1 ]
cnn_paddings: [ 2, 1, 5 ]
cnn_activation_class: torch.nn.ReLU

algorithm:
entropy_coef: 0.001
use_tanh_normal: True

experiment:
sampling_device: "cpu"
train_device: "cuda"

share_policy_params: True
gamma: 0.99

adam_eps: 0.000001
lr: 0.00025
clip_grad_norm: True
clip_grad_val: 5

max_n_iters: null
max_n_frames: 10_000_000

on_policy_collected_frames_per_batch: 1000
on_policy_n_envs_per_worker: 1
on_policy_n_minibatch_iters: 45
on_policy_minibatch_size: 100

evaluation: True
render: True
evaluation_interval: 1000
evaluation_episodes: 1
evaluation_deterministic_actions: False

loggers: [wandb]
create_json: False

save_folder: null
restore_file: null
checkpoint_interval: 0
31 changes: 31 additions & 0 deletions fine_tuned/meltingpot/meltingpot_run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
#

import hydra

from benchmarl.experiment import Experiment

from benchmarl.hydra_config import load_experiment_from_hydra
from hydra.core.hydra_config import HydraConfig
from omegaconf import DictConfig, OmegaConf


@hydra.main(version_base=None, config_path="conf", config_name="config")
def hydra_experiment(cfg: DictConfig) -> None:
hydra_choices = HydraConfig.get().runtime.choices
task_name = hydra_choices.task
algorithm_name = hydra_choices.algorithm

print(f"\nAlgorithm: {algorithm_name}, Task: {task_name}")
print("\nLoaded config:\n")
print(OmegaConf.to_yaml(cfg))

experiment: Experiment = load_experiment_from_hydra(cfg, task_name=task_name)
experiment.run()


if __name__ == "__main__":
hydra_experiment()
Loading