Added suggestions from @sven1977's review.

Signed-off-by: simonsays1980 <[email protected]>
ray-project · Sep 19, 2024 · 398113b · 398113b
1 parent e26b53e
commit 398113b
Show file tree

Hide file tree

Showing 6 changed files with 37 additions and 27 deletions.
diff --git a/rllib/algorithms/algorithm_config.py b/rllib/algorithms/algorithm_config.py
@@ -2560,31 +2560,31 @@ def offline_data(
                 `MultiAgentEpisode` not supported, yet). Note,
                 `rllib.core.columns.Columns.OBS` will also try to decompress
                 `rllib.core.columns.Columns.NEXT_OBS`.
-            materialize_data: Whether the raw data should be materialized in memory. This
-                boosts performance, but requires enough memory to avoid an OOM, so
+            materialize_data: Whether the raw data should be materialized in memory.
+                This boosts performance, but requires enough memory to avoid an OOM, so
                 make sure that your cluster has the resources available. For very large
                 data you might want to switch to streaming mode by setting this to
                 `False` (default). If your algorithm does not need the RLModule in the
                 Learner connector pipeline or all (learner) connectors are stateless
                 you should consider setting `materialize_mapped_data` to `True`
-                instead (and set `materialize_data` to `False`).
-                If your data does not fit into memory and your Learner connector pipeline
-                requires an RLModule or is stateful, set both
-                `materialize_data` and `materialize_mapped_data` to `False`.
+                instead (and set `materialize_data` to `False`). If your data does not
+                fit into memory and your Learner connector pipeline requires an RLModule
+                or is stateful, set both `materialize_data` and
+                `materialize_mapped_data` to `False`.
             materialize_mapped_data: Whether the data should be materialized after
-                running it through the Learner connector pipeline (i.e. after running the
-                `OfflinePreLearner`). This improves performance, but should only be used
-                in case the (learner) connector pipeline does not require an RLModule
-                and the (learner) connector pipeline is stateless. For example, MARWIL's
-                Learner connector pipeline requires the RLModule for value function
-                predictions and training batches would become stale after some iterations
-                causing learning degradation or divergence. Also ensure that your cluster
-                has enough memory available to avoid an OOM. If set to `True` (True), make
-                sure that `materialize_data` is set to `False` to avoid materialization of two
-                datasets.
-                If your data does not fit into memory and your Learner connector pipeline
-                requires an RLModule or is stateful, set both
-                `materialize_data` and `materialize_mapped_data` to `False`.
+                running it through the Learner connector pipeline (i.e. after running
+                the `OfflinePreLearner`). This improves performance, but should only be
+                used in case the (learner) connector pipeline does not require an
+                RLModule and the (learner) connector pipeline is stateless. For example,
+                MARWIL's Learner connector pipeline requires the RLModule for value
+                function predictions and training batches would become stale after some
+                iterations causing learning degradation or divergence. Also ensure that
+                your cluster has enough memory available to avoid an OOM. If set to
+                `True` (True), make sure that `materialize_data` is set to `False` to
+                avoid materialization of two datasets. If your data does not fit into
+                memory and your Learner connector pipeline requires an RLModule or is
+                stateful, set both `materialize_data` and `materialize_mapped_data` to
+                `False`.
             map_batches_kwargs: Keyword args for the `map_batches` method. These will be
                 passed into the `ray.data.Dataset.map_batches` method when sampling
                 without checking. If no arguments passed in the default arguments

diff --git a/rllib/core/learner/learner.py b/rllib/core/learner/learner.py
@@ -1093,7 +1093,6 @@ def update_from_iterator(
             )
 
         self._check_is_built()
-        minibatch_size = minibatch_size or 1
 
         # Call `before_gradient_based_update` to allow for non-gradient based
         # preparations-, logging-, and update logic to happen.

diff --git a/rllib/offline/offline_data.py b/rllib/offline/offline_data.py
@@ -23,15 +23,17 @@ class OfflineData:
     def __init__(self, config: AlgorithmConfig):
 
         self.config = config
-        self.is_multi_agent = config.is_multi_agent()
+        self.is_multi_agent = self.config.is_multi_agent()
         self.path = (
-            config.input_ if isinstance(config.input_, list) else Path(config.input_)
+            self.config.input_
+            if isinstance(config.input_, list)
+            else Path(config.input_)
         )
         # Use `read_parquet` as default data read method.
-        self.data_read_method = config.input_read_method
+        self.data_read_method = self.config.input_read_method
         # Override default arguments for the data read method.
         self.data_read_method_kwargs = (
-            self.default_read_method_kwargs | config.input_read_method_kwargs
+            self.default_read_method_kwargs | self.config.input_read_method_kwargs
         )
 
         # If data should be materialized.

diff --git a/rllib/tuned_examples/bc/cartpole_bc.py b/rllib/tuned_examples/bc/cartpole_bc.py
@@ -62,7 +62,10 @@
             "prefetch_batches": 1,
             "local_shuffle_buffer_size": None,
         },
-        prelearner_module_synch_period=20,
+        # The number of iterations to be run per learner when in multi-learner
+        # mode in a single RLlib training iteration. Leave this to `None` to
+        # run an entire epoch on the dataset during a single RLlib training
+        # iteration. For single-learner mode 1 is the only option.
         dataset_num_iters_per_learner=1 if args.num_gpus == 0 else None,
     )
     .training(

diff --git a/rllib/tuned_examples/cql/pendulum_cql.py b/rllib/tuned_examples/cql/pendulum_cql.py
@@ -49,7 +49,10 @@
         # The `kwargs` for the `iter_batches` method. Due to the small
         # dataset we choose only a single batch to prefetch.
         iter_batches_kwargs={"prefetch_batches": 1},
-        prelearner_module_synch_period=20,
+        # The number of iterations to be run per learner when in multi-learner
+        # mode in a single RLlib training iteration. Leave this to `None` to
+        # run an entire epoch on the dataset during a single RLlib training
+        # iteration. For single-learner mode 1 is the only option.
         dataset_num_iters_per_learner=1 if args.num_gpus == 0 else None,
         # TODO (sven): Has this any influence in the connectors?
         actions_in_input_normalized=True,

diff --git a/rllib/tuned_examples/marwil/cartpole_marwil.py b/rllib/tuned_examples/marwil/cartpole_marwil.py
@@ -60,7 +60,10 @@
         # The `kwargs` for the `iter_batches` method. Due to the small
         # dataset we choose only a single batch to prefetch.
         iter_batches_kwargs={"prefetch_batches": 1},
-        prelearner_module_synch_period=20,
+        # The number of iterations to be run per learner when in multi-learner
+        # mode in a single RLlib training iteration. Leave this to `None` to
+        # run an entire epoch on the dataset during a single RLlib training
+        # iteration. For single-learner mode 1 is the only option.
         dataset_num_iters_per_learner=1 if args.num_gpus == 0 else None,
     )
     .training(