diff --git a/examples/ppo/seed_rl_atari_preprocessing.py b/examples/ppo/seed_rl_atari_preprocessing.py index e8519357f..f5186433b 100644 --- a/examples/ppo/seed_rl_atari_preprocessing.py +++ b/examples/ppo/seed_rl_atari_preprocessing.py @@ -31,8 +31,8 @@ """ import cv2 -from gymnasium.spaces.box import Box import gymnasium as gym +from gymnasium.spaces.box import Box import numpy as np @@ -117,7 +117,7 @@ def action_space(self): @property def reward_range(self): - return self.environment.reward_range + return self.environment.reward_range # type: ignore @property def metadata(self): @@ -189,7 +189,7 @@ def step(self, action): # We bypass the Gym observation altogether and directly fetch the # grayscale image from the ALE. This is a little faster. _, reward, game_over, _, info = self.environment.step(action) - accumulated_reward += reward + accumulated_reward += float(reward) if self.terminal_on_life_loss: new_lives = self.environment.unwrapped.ale.lives() # pytype:disable=attribute-error