diff --git a/benchmarl/environments/meltingpot/common.py b/benchmarl/environments/meltingpot/common.py index f209d8b1..4e12e79f 100644 --- a/benchmarl/environments/meltingpot/common.py +++ b/benchmarl/environments/meltingpot/common.py @@ -81,6 +81,7 @@ def get_env_fun( return lambda: MeltingpotEnv( substrate=self.name.lower(), categorical_actions=True, + device=device, **self.config, ) diff --git a/fine_tuned/meltingpot/conf/config.yaml b/fine_tuned/meltingpot/conf/config.yaml new file mode 100644 index 00000000..4f57d154 --- /dev/null +++ b/fine_tuned/meltingpot/conf/config.yaml @@ -0,0 +1,72 @@ +defaults: + - experiment: base_experiment + - algorithm: ippo + - task: meltingpot/commons_harvest__open + - model: layers/cnn + - model@critic_model: layers/cnn + - _self_ + +hydra: + searchpath: + # Tells hydra to add the default benchmarl configuration to its path + - pkg://benchmarl/conf + +seed: 0 + +task: + max_steps: 200 + +model: + mlp_num_cells: [ 256, 256 ] + + cnn_num_cells: [ 16, 32, 256 ] + cnn_kernel_sizes: [ 8, 4, 11 ] + cnn_strides: [4, 2, 1] + cnn_paddings: [2, 1, 5] + cnn_activation_class: torch.nn.ReLU + +critic_model: + mlp_num_cells: [ 256, 256 ] + + cnn_num_cells: [ 16, 32, 256 ] + cnn_kernel_sizes: [ 8, 4, 11 ] + cnn_strides: [ 4, 2, 1 ] + cnn_paddings: [ 2, 1, 5 ] + cnn_activation_class: torch.nn.ReLU + +algorithm: + entropy_coef: 0.001 + use_tanh_normal: True + +experiment: + sampling_device: "cpu" + train_device: "cuda" + + share_policy_params: True + gamma: 0.99 + + adam_eps: 0.000001 + lr: 0.00025 + clip_grad_norm: True + clip_grad_val: 5 + + max_n_iters: null + max_n_frames: 10_000_000 + + on_policy_collected_frames_per_batch: 1000 + on_policy_n_envs_per_worker: 1 + on_policy_n_minibatch_iters: 45 + on_policy_minibatch_size: 100 + + evaluation: True + render: True + evaluation_interval: 1000 + evaluation_episodes: 1 + evaluation_deterministic_actions: False + + loggers: [wandb] + create_json: False + + save_folder: null + restore_file: null + checkpoint_interval: 0 diff --git a/fine_tuned/meltingpot/meltingpot_run.py b/fine_tuned/meltingpot/meltingpot_run.py new file mode 100644 index 00000000..703d38e1 --- /dev/null +++ b/fine_tuned/meltingpot/meltingpot_run.py @@ -0,0 +1,31 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. +# + +import hydra + +from benchmarl.experiment import Experiment + +from benchmarl.hydra_config import load_experiment_from_hydra +from hydra.core.hydra_config import HydraConfig +from omegaconf import DictConfig, OmegaConf + + +@hydra.main(version_base=None, config_path="conf", config_name="config") +def hydra_experiment(cfg: DictConfig) -> None: + hydra_choices = HydraConfig.get().runtime.choices + task_name = hydra_choices.task + algorithm_name = hydra_choices.algorithm + + print(f"\nAlgorithm: {algorithm_name}, Task: {task_name}") + print("\nLoaded config:\n") + print(OmegaConf.to_yaml(cfg)) + + experiment: Experiment = load_experiment_from_hydra(cfg, task_name=task_name) + experiment.run() + + +if __name__ == "__main__": + hydra_experiment()