Replace Gym with Gymnasium (#1580)

* Replace Gym with Gymnasium The maintainers of OpenAI's Gym decided to stop maintaining the library and moved over to a fork called Gymnasium. They advise to switch to the new library, which can be used as a drop-in replacement. See: https://github.com/openai/gym/blob/master/README.md * Upgrade Gymnasium * Add a float conversion to please MyPy
online-ml · Jul 22, 2024 · 9d05b77 · 9d05b77
1 parent 038ad5d
commit 9d05b77
Show file tree

Hide file tree

Showing 14 changed files with 50 additions and 46 deletions.
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -35,7 +35,7 @@ pandas = "^2.1"
 
 [tool.poetry.group.dev.dependencies]
 graphviz = "^0.20.1"
-gym = "^0.26.2"
+gymnasium = "^0.29.0"
 matplotlib = "^3.0.2"
 mypy = "^1.6.1"
 pre-commit = "^3.5.0"

diff --git a/river/bandit/bayes_ucb.py b/river/bandit/bayes_ucb.py
@@ -28,7 +28,7 @@ class BayesUCB(bandit.base.Policy):
     Examples
     --------
 
-    >>> import gym
+    >>> import gymnasium as gym
     >>> from river import bandit
     >>> from river import proba
     >>> from river import stats

diff --git a/river/bandit/envs/__init__.py b/river/bandit/envs/__init__.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 try:
-    import gym
+    import gymnasium as gym
 
     GYM_INSTALLED = True
 except ImportError:

diff --git a/river/bandit/envs/candy_cane.py b/river/bandit/envs/candy_cane.py
@@ -2,7 +2,7 @@
 
 import dataclasses
 
-import gym
+import gymnasium as gym
 
 
 @dataclasses.dataclass
@@ -25,7 +25,7 @@ class CandyCaneContest(gym.Env):
     Examples
     --------
 
-    >>> import gym
+    >>> import gymnasium as gym
     >>> from river import stats
 
     >>> env = gym.make('river_bandits/CandyCaneContest-v0')

diff --git a/river/bandit/envs/testbed.py b/river/bandit/envs/testbed.py
@@ -2,7 +2,7 @@
 
 import math
 
-import gym
+import gymnasium as gym
 
 
 class KArmedTestbed(gym.Env):

diff --git a/river/bandit/epsilon_greedy.py b/river/bandit/epsilon_greedy.py
@@ -33,7 +33,7 @@ class EpsilonGreedy(bandit.base.Policy):
     Examples
     --------
 
-    >>> import gym
+    >>> import gymnasium as gym
     >>> from river import bandit
     >>> from river import stats
 

diff --git a/river/bandit/evaluate.py b/river/bandit/evaluate.py
@@ -5,7 +5,7 @@
 import typing
 
 try:
-    import gym
+    import gymnasium as gym
 except ImportError:
     ...
 
@@ -52,7 +52,7 @@ def evaluate(
     Examples
     --------
 
-    >>> import gym
+    >>> import gymnasium as gym
     >>> from river import bandit
 
     >>> trace = bandit.evaluate(

diff --git a/river/bandit/exp3.py b/river/bandit/exp3.py
@@ -35,7 +35,7 @@ class Exp3(bandit.base.Policy):
     Examples
     --------
 
-    >>> import gym
+    >>> import gymnasium as gym
     >>> from river import bandit
     >>> from river import proba
     >>> from river import stats

diff --git a/river/bandit/random.py b/river/bandit/random.py
@@ -23,7 +23,7 @@ class RandomPolicy(bandit.base.Policy):
     Examples
     --------
 
-    >>> import gym
+    >>> import gymnasium as gym
     >>> from river import bandit
     >>> from river import proba
     >>> from river import stats

diff --git a/river/bandit/test_envs.py b/river/bandit/test_envs.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
-import gym.utils.env_checker
+import gymnasium as gym
+import gymnasium.utils.env_checker
 import pytest
 
 from river import bandit

diff --git a/river/bandit/test_policies.py b/river/bandit/test_policies.py
@@ -5,7 +5,7 @@
 import inspect
 import random
 
-import gym
+import gymnasium as gym
 import pytest
 
 from river import bandit, metrics
@@ -111,7 +111,7 @@ def test_better_than_random_policy(policy: bandit.base.Policy, env: gym.Env):
             arm_id = policy.pull(arm_ids)  # type: ignore
             observation, reward, terminated, truncated, info = env.step(arm_id)
             policy.update(arm_id, reward)
-            policy_reward += reward
+            policy_reward += float(reward)
 
             random_arm_id = random_policy.pull(arm_ids)  # type: ignore
             (
@@ -122,7 +122,7 @@ def test_better_than_random_policy(policy: bandit.base.Policy, env: gym.Env):
                 info,
             ) = random_env.step(random_arm_id)
             random_policy.update(random_arm_id, reward)
-            random_reward += reward
+            random_reward += float(reward)
 
         n_successes += policy_reward > random_reward
 

diff --git a/river/bandit/thompson.py b/river/bandit/thompson.py
@@ -40,7 +40,7 @@ class ThompsonSampling(bandit.base.Policy):
     Examples
     --------
 
-    >>> import gym
+    >>> import gymnasium as gym
     >>> from river import bandit
     >>> from river import proba
     >>> from river import stats

diff --git a/river/bandit/ucb.py b/river/bandit/ucb.py
@@ -32,7 +32,7 @@ class UCB(bandit.base.Policy):
     Examples
     --------
 
-    >>> import gym
+    >>> import gymnasium as gym
     >>> from river import bandit
     >>> from river import preprocessing
     >>> from river import stats