DLR-RM · araffin · Dec 5, 2025 · Dec 5, 2025 · Dec 5, 2025 · Dec 5, 2025
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -20,17 +20,17 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.10", "3.11", "3.12", "3.13"]
         include:
           # Default version
           - gymnasium-version: "1.0.0"
           # Add a new config to test gym<1.0
           - python-version: "3.10"
             gymnasium-version: "0.29.1"
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v6
       - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v6
         with:
           python-version: ${{ matrix.python-version }}
       - name: Install dependencies
@@ -40,7 +40,8 @@ jobs:
           pip install uv
           # cpu version of pytorch
           # See https://github.com/astral-sh/uv/issues/1497
-          uv pip install --system torch==2.3.1+cpu --index https://download.pytorch.org/whl/cpu
+          # Need Pytorch 2.9+ for Python 3.13
+          uv pip install --system torch==2.9.1+cpu --index https://download.pytorch.org/whl/cpu
 
           uv pip install --system .[extra,tests,docs]
           # Use headless version

diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst
@@ -3,6 +3,43 @@
 Changelog
 ==========
 
+
+Release 2.8.0a0 (WIP)
+--------------------------
+
+Breaking Changes:
+^^^^^^^^^^^^^^^^^
+- Removed support for Python 3.9, please upgrade to Python >= 3.10
+- Set ``strict=True`` for every call to ``zip(...)``
+
+New Features:
+^^^^^^^^^^^^^
+- Added official support for Python 3.13
+
+Bug Fixes:
+^^^^^^^^^^
+
+`SB3-Contrib`_
+^^^^^^^^^^^^^^
+
+`RL Zoo`_
+^^^^^^^^^
+
+`SBX`_ (SB3 + Jax)
+^^^^^^^^^^^^^^^^^^
+
+Deprecations:
+^^^^^^^^^^^^^
+- ``zip_strict()`` is not needed anymore since Python 3.10, please use ``zip(..., strict=True)`` instead
+
+Others:
+^^^^^^^
+- Updated to Python 3.10+ annotations
+
+Documentation:
+^^^^^^^^^^^^^^
+
+
 Release 2.7.1 (2025-12-05)
 --------------------------
 
@@ -30,18 +67,9 @@ Bug Fixes:
 ^^^^^^^^^^^^^^
 - Fixed tensorboard log name for ``MaskablePPO``
 
-`RL Zoo`_
-^^^^^^^^^
-
 `SBX`_ (SB3 + Jax)
 ^^^^^^^^^^^^^^^^^^
-- Added CnnPolicy to PPO
-
-Deprecations:
-^^^^^^^^^^^^^
-
-Others:
-^^^^^^^
+- Added ``CnnPolicy`` to PPO
 
 Documentation:
 ^^^^^^^^^^^^^^

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,8 +1,8 @@
 [tool.ruff]
 # Same as Black.
 line-length = 127
-# Assume Python 3.9
-target-version = "py39"
+# Assume Python 3.10
+target-version = "py310"
 
 [tool.ruff.lint]
 # See https://beta.ruff.rs/docs/rules/

diff --git a/setup.py b/setup.py
@@ -135,7 +135,7 @@
     long_description=long_description,
     long_description_content_type="text/markdown",
     version=__version__,
-    python_requires=">=3.9",
+    python_requires=">=3.10",
     # PyPI package information.
     project_urls={
         "Code": "https://github.com/DLR-RM/stable-baselines3",
@@ -147,10 +147,10 @@
     },
     classifiers=[
         "Programming Language :: Python :: 3",
-        "Programming Language :: Python :: 3.9",
         "Programming Language :: Python :: 3.10",
         "Programming Language :: Python :: 3.11",
         "Programming Language :: Python :: 3.12",
+        "Programming Language :: Python :: 3.13",
     ],
 )
 

diff --git a/stable_baselines3/a2c/a2c.py b/stable_baselines3/a2c/a2c.py
@@ -1,4 +1,4 @@
-from typing import Any, ClassVar, Optional, TypeVar, Union
+from typing import Any, ClassVar, TypeVar
 
 import torch as th
 from gymnasium import spaces
@@ -65,9 +65,9 @@ class A2C(OnPolicyAlgorithm):
 
     def __init__(
         self,
-        policy: Union[str, type[ActorCriticPolicy]],
-        env: Union[GymEnv, str],
-        learning_rate: Union[float, Schedule] = 7e-4,
+        policy: str | type[ActorCriticPolicy],
+        env: GymEnv | str,
+        learning_rate: float | Schedule = 7e-4,
         n_steps: int = 5,
         gamma: float = 0.99,
         gae_lambda: float = 1.0,
@@ -78,15 +78,15 @@ def __init__(
         use_rms_prop: bool = True,
         use_sde: bool = False,
         sde_sample_freq: int = -1,
-        rollout_buffer_class: Optional[type[RolloutBuffer]] = None,
-        rollout_buffer_kwargs: Optional[dict[str, Any]] = None,
+        rollout_buffer_class: type[RolloutBuffer] | None = None,
+        rollout_buffer_kwargs: dict[str, Any] | None = None,
         normalize_advantage: bool = False,
         stats_window_size: int = 100,
-        tensorboard_log: Optional[str] = None,
-        policy_kwargs: Optional[dict[str, Any]] = None,
+        tensorboard_log: str | None = None,
+        policy_kwargs: dict[str, Any] | None = None,
         verbose: int = 0,
-        seed: Optional[int] = None,
-        device: Union[th.device, str] = "auto",
+        seed: int | None = None,
+        device: th.device | str = "auto",
         _init_setup_model: bool = True,
     ):
         super().__init__(

diff --git a/stable_baselines3/common/base_class.py b/stable_baselines3/common/base_class.py
@@ -7,7 +7,7 @@
 from abc import ABC, abstractmethod
 from collections import deque
 from collections.abc import Iterable
-from typing import Any, ClassVar, Optional, TypeVar, Union
+from typing import Any, ClassVar, TypeVar
 
 import gymnasium as gym
 import numpy as np
@@ -45,7 +45,7 @@
 SelfBaseAlgorithm = TypeVar("SelfBaseAlgorithm", bound="BaseAlgorithm")
 
 
-def maybe_make_env(env: Union[GymEnv, str], verbose: int) -> GymEnv:
+def maybe_make_env(env: GymEnv | str, verbose: int) -> GymEnv:
     """If env is a string, make the environment; otherwise, return env.
 
     :param env: The environment to learn from.
@@ -105,20 +105,20 @@ class BaseAlgorithm(ABC):
 
     def __init__(
         self,
-        policy: Union[str, type[BasePolicy]],
-        env: Union[GymEnv, str, None],
-        learning_rate: Union[float, Schedule],
-        policy_kwargs: Optional[dict[str, Any]] = None,
+        policy: str | type[BasePolicy],
+        env: GymEnv | str | None,
+        learning_rate: float | Schedule,
+        policy_kwargs: dict[str, Any] | None = None,
         stats_window_size: int = 100,
-        tensorboard_log: Optional[str] = None,
+        tensorboard_log: str | None = None,
         verbose: int = 0,
-        device: Union[th.device, str] = "auto",
+        device: th.device | str = "auto",
         support_multi_env: bool = False,
         monitor_wrapper: bool = True,
-        seed: Optional[int] = None,
+        seed: int | None = None,
         use_sde: bool = False,
         sde_sample_freq: int = -1,
-        supported_action_spaces: Optional[tuple[type[spaces.Space], ...]] = None,
+        supported_action_spaces: tuple[type[spaces.Space], ...] | None = None,
     ) -> None:
         if isinstance(policy, str):
             self.policy_class = self._get_policy_from_name(policy)
@@ -138,14 +138,14 @@ def __init__(
         # Used for computing fps, it is updated at each call of learn()
         self._num_timesteps_at_start = 0
         self.seed = seed
-        self.action_noise: Optional[ActionNoise] = None
+        self.action_noise: ActionNoise | None = None
         self.start_time = 0.0
         self.learning_rate = learning_rate
         self.tensorboard_log = tensorboard_log
-        self._last_obs = None  # type: Optional[Union[np.ndarray, dict[str, np.ndarray]]]
-        self._last_episode_starts = None  # type: Optional[np.ndarray]
+        self._last_obs = None  # type: np.ndarray | dict[str, np.ndarray] | None
+        self._last_episode_starts = None  # type: np.ndarray | None
         # When using VecNormalize:
-        self._last_original_obs = None  # type: Optional[Union[np.ndarray, dict[str, np.ndarray]]]
+        self._last_original_obs = None  # type: np.ndarray | dict[str, np.ndarray] | None
         self._episode_num = 0
         # Used for gSDE only
         self.use_sde = use_sde
@@ -155,14 +155,14 @@ def __init__(
         self._current_progress_remaining = 1.0
         # Buffers for logging
         self._stats_window_size = stats_window_size
-        self.ep_info_buffer = None  # type: Optional[deque]
-        self.ep_success_buffer = None  # type: Optional[deque]
+        self.ep_info_buffer = None  # type: deque | None
+        self.ep_success_buffer = None  # type: deque | None
         # For logging (and TD3 delayed updates)
         self._n_updates = 0  # type: int
         # Whether the user passed a custom logger or not
         self._custom_logger = False
-        self.env: Optional[VecEnv] = None
-        self._vec_normalize_env: Optional[VecNormalize] = None
+        self.env: VecEnv | None = None
+        self._vec_normalize_env: VecNormalize | None = None
 
         # Create and wrap the env if needed
         if env is not None:
@@ -284,7 +284,7 @@ def _update_current_progress_remaining(self, num_timesteps: int, total_timesteps
         """
         self._current_progress_remaining = 1.0 - float(num_timesteps) / float(total_timesteps)
 
-    def _update_learning_rate(self, optimizers: Union[list[th.optim.Optimizer], th.optim.Optimizer]) -> None:
+    def _update_learning_rate(self, optimizers: list[th.optim.Optimizer] | th.optim.Optimizer) -> None:
         """
         Update the optimizers learning rate using the current learning rate schedule
         and the current progress remaining (from 1 to 0).
@@ -435,7 +435,7 @@ def _setup_learn(
 
         return total_timesteps, callback
 
-    def _update_info_buffer(self, infos: list[dict[str, Any]], dones: Optional[np.ndarray] = None) -> None:
+    def _update_info_buffer(self, infos: list[dict[str, Any]], dones: np.ndarray | None = None) -> None:
         """
         Retrieve reward, episode length, episode success and update the buffer
         if using Monitor wrapper or a GoalEnv.
@@ -456,15 +456,15 @@ def _update_info_buffer(self, infos: list[dict[str, Any]], dones: Optional[np.nd
             if maybe_is_success is not None and dones[idx]:
                 self.ep_success_buffer.append(maybe_is_success)
 
-    def get_env(self) -> Optional[VecEnv]:
+    def get_env(self) -> VecEnv | None:
         """
         Returns the current environment (can be None if not defined).
 
         :return: The current environment
         """
         return self.env
 
-    def get_vec_normalize_env(self) -> Optional[VecNormalize]:
+    def get_vec_normalize_env(self) -> VecNormalize | None:
         """
         Return the ``VecNormalize`` wrapper of the training env
         if it exists.
@@ -536,11 +536,11 @@ def learn(
 
     def predict(
         self,
-        observation: Union[np.ndarray, dict[str, np.ndarray]],
-        state: Optional[tuple[np.ndarray, ...]] = None,
-        episode_start: Optional[np.ndarray] = None,
+        observation: np.ndarray | dict[str, np.ndarray],
+        state: tuple[np.ndarray, ...] | None = None,
+        episode_start: np.ndarray | None = None,
         deterministic: bool = False,
-    ) -> tuple[np.ndarray, Optional[tuple[np.ndarray, ...]]]:
+    ) -> tuple[np.ndarray, tuple[np.ndarray, ...] | None]:
         """
         Get the policy action from an observation (and optional hidden state).
         Includes sugar-coating to handle different observations (e.g. normalizing images).
@@ -556,7 +556,7 @@ def predict(
         """
         return self.policy.predict(observation, state, episode_start, deterministic)
 
-    def set_random_seed(self, seed: Optional[int] = None) -> None:
+    def set_random_seed(self, seed: int | None = None) -> None:
         """
         Set the seed of the pseudo-random generators
         (python, numpy, pytorch, gym, action_space)
@@ -573,9 +573,9 @@ def set_random_seed(self, seed: Optional[int] = None) -> None:
 
     def set_parameters(
         self,
-        load_path_or_dict: Union[str, TensorDict],
+        load_path_or_dict: str | TensorDict,
         exact_match: bool = True,
-        device: Union[th.device, str] = "auto",
+        device: th.device | str = "auto",
     ) -> None:
         """
         Load parameters from a given zip-file or a nested dictionary containing parameters for
@@ -642,10 +642,10 @@ def set_parameters(
     @classmethod
     def load(  # noqa: C901
         cls: type[SelfBaseAlgorithm],
-        path: Union[str, pathlib.Path, io.BufferedIOBase],
-        env: Optional[GymEnv] = None,
-        device: Union[th.device, str] = "auto",
-        custom_objects: Optional[dict[str, Any]] = None,
+        path: str | pathlib.Path | io.BufferedIOBase,
+        env: GymEnv | None = None,
+        device: th.device | str = "auto",
+        custom_objects: dict[str, Any] | None = None,
         print_system_info: bool = False,
         force_reset: bool = True,
         **kwargs,
@@ -818,9 +818,9 @@ def get_parameters(self) -> dict[str, dict]:
 
     def save(
         self,
-        path: Union[str, pathlib.Path, io.BufferedIOBase],
-        exclude: Optional[Iterable[str]] = None,
-        include: Optional[Iterable[str]] = None,
+        path: str | pathlib.Path | io.BufferedIOBase,
+        exclude: Iterable[str] | None = None,
+        include: Iterable[str] | None = None,
     ) -> None:
         """
         Save all the attributes of the object and the model parameters in a zip-file.