Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,17 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.9", "3.10", "3.11", "3.12"]
python-version: ["3.10", "3.11", "3.12", "3.13"]
include:
# Default version
- gymnasium-version: "1.0.0"
# Add a new config to test gym<1.0
- python-version: "3.10"
gymnasium-version: "0.29.1"
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v6
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
uses: actions/setup-python@v6
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
Expand All @@ -40,7 +40,8 @@ jobs:
pip install uv
# cpu version of pytorch
# See https://github.com/astral-sh/uv/issues/1497
uv pip install --system torch==2.3.1+cpu --index https://download.pytorch.org/whl/cpu
# Need Pytorch 2.9+ for Python 3.13
uv pip install --system torch==2.9.1+cpu --index https://download.pytorch.org/whl/cpu

uv pip install --system .[extra,tests,docs]
# Use headless version
Expand Down
48 changes: 38 additions & 10 deletions docs/misc/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,43 @@
Changelog
==========


Release 2.8.0a0 (WIP)
--------------------------

Breaking Changes:
^^^^^^^^^^^^^^^^^
- Removed support for Python 3.9, please upgrade to Python >= 3.10
- Set ``strict=True`` for every call to ``zip(...)``

New Features:
^^^^^^^^^^^^^
- Added official support for Python 3.13

Bug Fixes:
^^^^^^^^^^

`SB3-Contrib`_
^^^^^^^^^^^^^^

`RL Zoo`_
^^^^^^^^^

`SBX`_ (SB3 + Jax)
^^^^^^^^^^^^^^^^^^

Deprecations:
^^^^^^^^^^^^^
- ``zip_strict()`` is not needed anymore since Python 3.10, please use ``zip(..., strict=True)`` instead

Others:
^^^^^^^
- Updated to Python 3.10+ annotations

Documentation:
^^^^^^^^^^^^^^


Release 2.7.1 (2025-12-05)
--------------------------

Expand Down Expand Up @@ -30,18 +67,9 @@ Bug Fixes:
^^^^^^^^^^^^^^
- Fixed tensorboard log name for ``MaskablePPO``

`RL Zoo`_
^^^^^^^^^

`SBX`_ (SB3 + Jax)
^^^^^^^^^^^^^^^^^^
- Added CnnPolicy to PPO

Deprecations:
^^^^^^^^^^^^^

Others:
^^^^^^^
- Added ``CnnPolicy`` to PPO

Documentation:
^^^^^^^^^^^^^^
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
[tool.ruff]
# Same as Black.
line-length = 127
# Assume Python 3.9
target-version = "py39"
# Assume Python 3.10
target-version = "py310"

[tool.ruff.lint]
# See https://beta.ruff.rs/docs/rules/
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@
long_description=long_description,
long_description_content_type="text/markdown",
version=__version__,
python_requires=">=3.9",
python_requires=">=3.10",
# PyPI package information.
project_urls={
"Code": "https://github.com/DLR-RM/stable-baselines3",
Expand All @@ -147,10 +147,10 @@
},
classifiers=[
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
],
)

Expand Down
20 changes: 10 additions & 10 deletions stable_baselines3/a2c/a2c.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any, ClassVar, Optional, TypeVar, Union
from typing import Any, ClassVar, TypeVar

import torch as th
from gymnasium import spaces
Expand Down Expand Up @@ -65,9 +65,9 @@ class A2C(OnPolicyAlgorithm):

def __init__(
self,
policy: Union[str, type[ActorCriticPolicy]],
env: Union[GymEnv, str],
learning_rate: Union[float, Schedule] = 7e-4,
policy: str | type[ActorCriticPolicy],
env: GymEnv | str,
learning_rate: float | Schedule = 7e-4,
n_steps: int = 5,
gamma: float = 0.99,
gae_lambda: float = 1.0,
Expand All @@ -78,15 +78,15 @@ def __init__(
use_rms_prop: bool = True,
use_sde: bool = False,
sde_sample_freq: int = -1,
rollout_buffer_class: Optional[type[RolloutBuffer]] = None,
rollout_buffer_kwargs: Optional[dict[str, Any]] = None,
rollout_buffer_class: type[RolloutBuffer] | None = None,
rollout_buffer_kwargs: dict[str, Any] | None = None,
normalize_advantage: bool = False,
stats_window_size: int = 100,
tensorboard_log: Optional[str] = None,
policy_kwargs: Optional[dict[str, Any]] = None,
tensorboard_log: str | None = None,
policy_kwargs: dict[str, Any] | None = None,
verbose: int = 0,
seed: Optional[int] = None,
device: Union[th.device, str] = "auto",
seed: int | None = None,
device: th.device | str = "auto",
_init_setup_model: bool = True,
):
super().__init__(
Expand Down
72 changes: 36 additions & 36 deletions stable_baselines3/common/base_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from abc import ABC, abstractmethod
from collections import deque
from collections.abc import Iterable
from typing import Any, ClassVar, Optional, TypeVar, Union
from typing import Any, ClassVar, TypeVar

import gymnasium as gym
import numpy as np
Expand Down Expand Up @@ -45,7 +45,7 @@
SelfBaseAlgorithm = TypeVar("SelfBaseAlgorithm", bound="BaseAlgorithm")


def maybe_make_env(env: Union[GymEnv, str], verbose: int) -> GymEnv:
def maybe_make_env(env: GymEnv | str, verbose: int) -> GymEnv:
"""If env is a string, make the environment; otherwise, return env.

:param env: The environment to learn from.
Expand Down Expand Up @@ -105,20 +105,20 @@ class BaseAlgorithm(ABC):

def __init__(
self,
policy: Union[str, type[BasePolicy]],
env: Union[GymEnv, str, None],
learning_rate: Union[float, Schedule],
policy_kwargs: Optional[dict[str, Any]] = None,
policy: str | type[BasePolicy],
env: GymEnv | str | None,
learning_rate: float | Schedule,
policy_kwargs: dict[str, Any] | None = None,
stats_window_size: int = 100,
tensorboard_log: Optional[str] = None,
tensorboard_log: str | None = None,
verbose: int = 0,
device: Union[th.device, str] = "auto",
device: th.device | str = "auto",
support_multi_env: bool = False,
monitor_wrapper: bool = True,
seed: Optional[int] = None,
seed: int | None = None,
use_sde: bool = False,
sde_sample_freq: int = -1,
supported_action_spaces: Optional[tuple[type[spaces.Space], ...]] = None,
supported_action_spaces: tuple[type[spaces.Space], ...] | None = None,
) -> None:
if isinstance(policy, str):
self.policy_class = self._get_policy_from_name(policy)
Expand All @@ -138,14 +138,14 @@ def __init__(
# Used for computing fps, it is updated at each call of learn()
self._num_timesteps_at_start = 0
self.seed = seed
self.action_noise: Optional[ActionNoise] = None
self.action_noise: ActionNoise | None = None
self.start_time = 0.0
self.learning_rate = learning_rate
self.tensorboard_log = tensorboard_log
self._last_obs = None # type: Optional[Union[np.ndarray, dict[str, np.ndarray]]]
self._last_episode_starts = None # type: Optional[np.ndarray]
self._last_obs = None # type: np.ndarray | dict[str, np.ndarray] | None
self._last_episode_starts = None # type: np.ndarray | None
# When using VecNormalize:
self._last_original_obs = None # type: Optional[Union[np.ndarray, dict[str, np.ndarray]]]
self._last_original_obs = None # type: np.ndarray | dict[str, np.ndarray] | None
self._episode_num = 0
# Used for gSDE only
self.use_sde = use_sde
Expand All @@ -155,14 +155,14 @@ def __init__(
self._current_progress_remaining = 1.0
# Buffers for logging
self._stats_window_size = stats_window_size
self.ep_info_buffer = None # type: Optional[deque]
self.ep_success_buffer = None # type: Optional[deque]
self.ep_info_buffer = None # type: deque | None
self.ep_success_buffer = None # type: deque | None
# For logging (and TD3 delayed updates)
self._n_updates = 0 # type: int
# Whether the user passed a custom logger or not
self._custom_logger = False
self.env: Optional[VecEnv] = None
self._vec_normalize_env: Optional[VecNormalize] = None
self.env: VecEnv | None = None
self._vec_normalize_env: VecNormalize | None = None

# Create and wrap the env if needed
if env is not None:
Expand Down Expand Up @@ -284,7 +284,7 @@ def _update_current_progress_remaining(self, num_timesteps: int, total_timesteps
"""
self._current_progress_remaining = 1.0 - float(num_timesteps) / float(total_timesteps)

def _update_learning_rate(self, optimizers: Union[list[th.optim.Optimizer], th.optim.Optimizer]) -> None:
def _update_learning_rate(self, optimizers: list[th.optim.Optimizer] | th.optim.Optimizer) -> None:
"""
Update the optimizers learning rate using the current learning rate schedule
and the current progress remaining (from 1 to 0).
Expand Down Expand Up @@ -435,7 +435,7 @@ def _setup_learn(

return total_timesteps, callback

def _update_info_buffer(self, infos: list[dict[str, Any]], dones: Optional[np.ndarray] = None) -> None:
def _update_info_buffer(self, infos: list[dict[str, Any]], dones: np.ndarray | None = None) -> None:
"""
Retrieve reward, episode length, episode success and update the buffer
if using Monitor wrapper or a GoalEnv.
Expand All @@ -456,15 +456,15 @@ def _update_info_buffer(self, infos: list[dict[str, Any]], dones: Optional[np.nd
if maybe_is_success is not None and dones[idx]:
self.ep_success_buffer.append(maybe_is_success)

def get_env(self) -> Optional[VecEnv]:
def get_env(self) -> VecEnv | None:
"""
Returns the current environment (can be None if not defined).

:return: The current environment
"""
return self.env

def get_vec_normalize_env(self) -> Optional[VecNormalize]:
def get_vec_normalize_env(self) -> VecNormalize | None:
"""
Return the ``VecNormalize`` wrapper of the training env
if it exists.
Expand Down Expand Up @@ -536,11 +536,11 @@ def learn(

def predict(
self,
observation: Union[np.ndarray, dict[str, np.ndarray]],
state: Optional[tuple[np.ndarray, ...]] = None,
episode_start: Optional[np.ndarray] = None,
observation: np.ndarray | dict[str, np.ndarray],
state: tuple[np.ndarray, ...] | None = None,
episode_start: np.ndarray | None = None,
deterministic: bool = False,
) -> tuple[np.ndarray, Optional[tuple[np.ndarray, ...]]]:
) -> tuple[np.ndarray, tuple[np.ndarray, ...] | None]:
"""
Get the policy action from an observation (and optional hidden state).
Includes sugar-coating to handle different observations (e.g. normalizing images).
Expand All @@ -556,7 +556,7 @@ def predict(
"""
return self.policy.predict(observation, state, episode_start, deterministic)

def set_random_seed(self, seed: Optional[int] = None) -> None:
def set_random_seed(self, seed: int | None = None) -> None:
"""
Set the seed of the pseudo-random generators
(python, numpy, pytorch, gym, action_space)
Expand All @@ -573,9 +573,9 @@ def set_random_seed(self, seed: Optional[int] = None) -> None:

def set_parameters(
self,
load_path_or_dict: Union[str, TensorDict],
load_path_or_dict: str | TensorDict,
exact_match: bool = True,
device: Union[th.device, str] = "auto",
device: th.device | str = "auto",
) -> None:
"""
Load parameters from a given zip-file or a nested dictionary containing parameters for
Expand Down Expand Up @@ -642,10 +642,10 @@ def set_parameters(
@classmethod
def load( # noqa: C901
cls: type[SelfBaseAlgorithm],
path: Union[str, pathlib.Path, io.BufferedIOBase],
env: Optional[GymEnv] = None,
device: Union[th.device, str] = "auto",
custom_objects: Optional[dict[str, Any]] = None,
path: str | pathlib.Path | io.BufferedIOBase,
env: GymEnv | None = None,
device: th.device | str = "auto",
custom_objects: dict[str, Any] | None = None,
print_system_info: bool = False,
force_reset: bool = True,
**kwargs,
Expand Down Expand Up @@ -818,9 +818,9 @@ def get_parameters(self) -> dict[str, dict]:

def save(
self,
path: Union[str, pathlib.Path, io.BufferedIOBase],
exclude: Optional[Iterable[str]] = None,
include: Optional[Iterable[str]] = None,
path: str | pathlib.Path | io.BufferedIOBase,
exclude: Iterable[str] | None = None,
include: Iterable[str] | None = None,
) -> None:
"""
Save all the attributes of the object and the model parameters in a zip-file.
Expand Down
Loading