Cast sampled actions of rollout buffers to float32 to avoid breaking changes

Trenza1ore · Trenza1ore · commit 77d6ee15b260 · 2025-08-01T01:01:35.000+01:00
diff --git a/stable_baselines3/common/buffers.py b/stable_baselines3/common/buffers.py
@@ -585,7 +585,7 @@ def _get_samples(
     ) -> RolloutBufferSamples:
         data = (
             self.observations[batch_inds],
-            self.actions[batch_inds],
+            self.actions[batch_inds].astype(np.float32, copy=False),
             self.values[batch_inds].flatten(),
             self.log_probs[batch_inds].flatten(),
             self.advantages[batch_inds].flatten(),
@@ -907,7 +907,7 @@ def _get_samples(  # type: ignore[override]
     ) -> DictRolloutBufferSamples:
         return DictRolloutBufferSamples(
             observations={key: self.to_torch(obs[batch_inds]) for (key, obs) in self.observations.items()},
-            actions=self.to_torch(self.actions[batch_inds]),
+            actions=self.to_torch(self.actions[batch_inds].astype(np.float32, copy=False)),
             old_values=self.to_torch(self.values[batch_inds].flatten()),
             old_log_prob=self.to_torch(self.log_probs[batch_inds].flatten()),
             advantages=self.to_torch(self.advantages[batch_inds].flatten()),