Release 2.5.0 (#271)

* Release 2.5.0 * Ignore mypy errors
Stable-Baselines-Team · Jan 27, 2025 · c070fc2 · c070fc2
1 parent e1ca24a
commit c070fc2
Show file tree

Hide file tree

Showing 5 changed files with 15 additions and 8 deletions.
diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst
@@ -3,13 +3,14 @@
 Changelog
 ==========
 
-Release 2.5.0a0 (WIP)
+Release 2.5.0 (2025-01-27)
 --------------------------
 
 Breaking Changes:
 ^^^^^^^^^^^^^^^^^
 - Upgraded to PyTorch 2.3.0
 - Dropped Python 3.8 support
+- Upgraded to Stable-Baselines3 >= 2.5.0
 
 New Features:
 ^^^^^^^^^^^^^

diff --git a/sb3_contrib/common/envs/invalid_actions_env.py b/sb3_contrib/common/envs/invalid_actions_env.py
@@ -22,15 +22,17 @@ def __init__(
 
         space = spaces.Discrete(dim)
         self.n_invalid_actions = n_invalid_actions
-        self.possible_actions = np.arange(space.n)
+        self.possible_actions = np.arange(space.n, dtype=int)
         self.invalid_actions: list[int] = []
         super().__init__(space=space, ep_length=ep_length)
 
     def _choose_next_state(self) -> None:
         self.state = self.action_space.sample()
         # Randomly choose invalid actions that are not the current state
         potential_invalid_actions = [i for i in self.possible_actions if i != self.state]
-        self.invalid_actions = np.random.choice(potential_invalid_actions, self.n_invalid_actions, replace=False).tolist()
+        self.invalid_actions = np.random.choice(  # type: ignore[assignment]
+            potential_invalid_actions, self.n_invalid_actions, replace=False
+        ).tolist()
 
     def action_masks(self) -> list[bool]:
         return [action not in self.invalid_actions for action in self.possible_actions]
@@ -72,7 +74,9 @@ def _choose_next_state(self) -> None:
 
         # Randomly choose invalid actions that are not the current state
         potential_invalid_actions = [i for i in self.possible_actions if i not in converted_state]
-        self.invalid_actions = np.random.choice(potential_invalid_actions, self.n_invalid_actions, replace=False).tolist()
+        self.invalid_actions = np.random.choice(  # type: ignore[assignment]
+            potential_invalid_actions, self.n_invalid_actions, replace=False
+        ).tolist()
 
     def action_masks(self) -> list[bool]:
         return [action not in self.invalid_actions for action in self.possible_actions]
@@ -113,7 +117,9 @@ def _choose_next_state(self) -> None:
 
         # Randomly choose invalid actions that are not the current state
         potential_invalid_actions = [i for i in self.possible_actions if i not in converted_state]
-        self.invalid_actions = np.random.choice(potential_invalid_actions, self.n_invalid_actions, replace=False).tolist()
+        self.invalid_actions = np.random.choice(  # type: ignore[assignment]
+            potential_invalid_actions, self.n_invalid_actions, replace=False
+        ).tolist()
 
     def action_masks(self) -> list[bool]:
         return [action not in self.invalid_actions for action in self.possible_actions]
diff --git a/sb3_contrib/common/maskable/callbacks.py b/sb3_contrib/common/maskable/callbacks.py
@@ -84,7 +84,7 @@ def _on_step(self) -> bool:
                     timesteps=self.evaluations_timesteps,
                     results=self.evaluations_results,
                     ep_lengths=self.evaluations_length,
-                    **kwargs,
+                    **kwargs,  # type: ignore[arg-type]
                 )
 
             mean_reward, std_reward = np.mean(episode_rewards), np.std(episode_rewards)

diff --git a/sb3_contrib/version.txt b/sb3_contrib/version.txt
@@ -1 +1 @@
-2.5.0a0
+2.5.0
diff --git a/setup.py b/setup.py
@@ -67,7 +67,7 @@
     packages=[package for package in find_packages() if package.startswith("sb3_contrib")],
     package_data={"sb3_contrib": ["py.typed", "version.txt"]},
     install_requires=[
-        "stable_baselines3>=2.4.0,<3.0",
+        "stable_baselines3>=2.5.0,<3.0",
     ],
     description="Contrib package of Stable Baselines3, experimental code.",
     author="Antonin Raffin",