Document Atari wrapper reset behavior (#2170)

Copilot · araffin · web-flow · commit 8cd8c62890a2 · 2025-09-01T16:12:07.000+02:00
* Initial plan * Document Atari wrapper reset behavior and workaround for issue #666 Co-authored-by: araffin <1973948+araffin@users.noreply.github.com> * Move note to examples --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: araffin <1973948+araffin@users.noreply.github.com> Co-authored-by: Antonin RAFFIN <antonin.raffin@ensta.org>
diff --git a/docs/guide/examples.rst b/docs/guide/examples.rst
@@ -368,7 +368,25 @@ and multiprocessing for you. To install the Atari environments, run the command
 
 .. image:: ../_static/img/colab-badge.svg
    :target: https://colab.research.google.com/github/Stable-Baselines-Team/rl-colab-notebooks/blob/sb3/atari_games.ipynb
-..
+
+.. note::
+
+    When working with Atari environments, be aware that the default ``terminal_on_life_loss=True`` behavior
+    can cause ``env.reset()`` to perform a no-op step instead of truly resetting the environment when
+    the episode ends due to a life loss (not game over, see `issue #666 <https://github.com/DLR-RM/stable-baselines3/issues/666>`_).
+    To ensure ``reset()`` always resets the environment, use:
+
+    .. code-block:: python
+
+        from stable_baselines3.common.env_util import make_atari_env
+
+        import ale_py
+
+        env = make_atari_env(
+            "BreakoutNoFrameskip-v4",
+            n_envs=1,
+            wrapper_kwargs=dict(terminal_on_life_loss=False)
+        )
 
 .. code-block:: python
 
diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst
@@ -35,6 +35,7 @@ Documentation:
 ^^^^^^^^^^^^^^
 - Added plotting documentation and examples
 - Added documentation clarifying gSDE (Generalized State-Dependent Exploration) inference behavior for PPO, SAC, and A2C algorithms
+- Documented Atari wrapper reset behavior where ``env.reset()`` may perform a no-op step instead of truly resetting when ``terminal_on_life_loss=True`` (default), and how to avoid this behavior by setting ``terminal_on_life_loss=False``
 - Clarified comment in ``_sample_action()`` method to better explain action scaling behavior for off-policy algorithms (@copilot)
 
 
diff --git a/stable_baselines3/common/atari_wrappers.py b/stable_baselines3/common/atari_wrappers.py
@@ -100,6 +100,14 @@ class EpisodicLifeEnv(gym.Wrapper[np.ndarray, int, np.ndarray, int]):
     Make end-of-life == end-of-episode, but only reset on true game over.
     Done by DeepMind for the DQN and co. since it helps value estimation.
 
+    .. note::
+        This wrapper changes the behavior of ``env.reset()``. When the environment
+        terminates due to a loss of life (but not game over), calling ``reset()`` will
+        perform a no-op step instead of truly resetting the environment. This can be
+        confusing when evaluating or testing agents. To avoid this behavior and ensure ``reset()``
+        always resets to the env, set ``terminal_on_life_loss=False`` when
+        using ``make_atari_env()``.
+
     :param env: Environment to wrap
     """
 
@@ -273,7 +281,7 @@ class AtariWrapper(gym.Wrapper[np.ndarray, int, np.ndarray, int]):
     :param frame_skip: Frequency at which the agent experiences the game.
         This correspond to repeating the action ``frame_skip`` times.
     :param screen_size: Resize Atari frame
-    :param terminal_on_life_loss: If True, then step() returns done=True whenever a life is lost.
+    :param terminal_on_life_loss: If True, then step() returns terminated=True whenever a life is lost.
     :param clip_reward: If True (default), the reward is clip to {-1, 0, 1} depending on its sign.
     :param action_repeat_probability: Probability of repeating the last action
     """
diff --git a/stable_baselines3/common/env_util.py b/stable_baselines3/common/env_util.py
@@ -144,6 +144,12 @@ def make_atari_env(
     Create a wrapped, monitored VecEnv for Atari.
     It is a wrapper around ``make_vec_env`` that includes common preprocessing for Atari games.
 
+    .. note::
+        By default, the ``AtariWrapper`` uses ``terminal_on_life_loss=True``, which causes
+        ``env.reset()`` to perform a no-op step instead of truly resetting when the environment
+        terminates due to a loss of life (but not game over). To ensure ``reset()`` always
+        resets the env, pass ``wrapper_kwargs=dict(terminal_on_life_loss=False)``.
+
     :param env_id: either the env ID, the env class or a callable returning an env
     :param n_envs: the number of environments you wish to have in parallel
     :param seed: the initial seed for the random number generator