[Feature] Cached resets, fix a few minor reset mask bugs, remove some default dict args (#1203)

StoneT2000 · web-flow · commit 80a204bcaf0f · 2025-07-24T23:06:17.000-07:00
* init

* Create tree.py

* work

* fix bugs

* add missing reset mask and scene id masking

* fix reset mask

* fixes

* fixes

* bug fix

* docs
diff --git a/docs/source/user_guide/wrappers/cached_reset.md b/docs/source/user_guide/wrappers/cached_reset.md
@@ -0,0 +1,98 @@
+# Cached Reset
+
+For some environments where environment resets may be slow/expensive, or during workflows like RL with partial resets where there are frequent small resets (instead of resetting all environments in GPU sim simultaneously), it can be useful to use cached resets.
+
+Cached resets essentially skips the process of calling the environment's reset function and instead loads a previous environment state and observation instead. Loading environment state instead of running environment reset code (the `_initialize_episode` function) can be faster and boost environment FPS.
+
+To use cached resets we provide a simple environment wrapper {py:class}`mani_skill.utils.wrappers.CachedResetWrapper` that can be used as follows
+
+
+```python
+from mani_skill.utils.wrappers import CachedResetWrapper
+import gymnasium as gym
+
+env = gym.make("StackCube-v1", num_envs=256)
+# upon applying the wrapper below we will by default sample 256 different reset states and the corresponding observations and cache them
+env = CachedResetWrapper(env)
+# obs is now fetched from a cache, and we initialize the environment with environment state
+obs, _ = env.reset()
+```
+
+Note that this does not cache geometry/texture details, only environment state. Most ManiSkill environments change geometries / textures / scenes when they are destroyed and recreated with a new seed or reconfigured with a new seed.
+
+## Configuration Options
+
+There are a few configuration options and ways to use the `CachedResetWrapper`. One way is to modify how the reset states are generated. Below is the configuration dataclass that you can use and/or override when creating the wrapper
+
+```python
+@dataclass
+class CachedResetsConfig:
+    num_resets: Optional[int] = None
+    """The number of reset states to cache. If none it will cache `num_envs` number of reset states."""
+    device: Optional[Device] = None
+    """The device to cache the reset states on. If none it will use the base environment's device."""
+    seed: Optional[int] = None
+    """The seed to use for generating the cached reset states."""
+
+    def dict(self):
+        return {k: v for k, v in asdict(self).items()}
+```
+
+For example to change the number of cached resets and the generation seed you can pass a dict as so
+
+```python
+env = CachedResetWrapper(env, config=dict(num_resets=16384, seed=0))
+```
+
+You can also manually pass in your own reset states and optionally observations paired with each reset.
+
+```python
+# env_states should be the result of env.get_state_dict(). It should be a dictionary where each leaf has the same batch size
+# obs can be the observations you previously generated. It can also be none
+env = CachedResetWrapper(env, reset_to_env_states=dict(env_states=env_states, obs=obs))
+```
+
+It may be useful to use the `tree` utility in ManiSkill if you want to e.g. concatenate multiple env_states values together from multiple calls to `env.get_state_dict` as so
+
+```python
+from mani_skill.utils import tree
+state_dict_1 = env.get_state_dict()
+# do something to the env
+state_dict_2 = env.get_state_dict()
+env_states = tree.cat([state_dict_1, state_dict_2])
+env = CachedResetWrapper(env, reset_to_env_states=dict(env_states=env_states, obs=None))
+```
+
+
+## Performance
+
+The following code snippet can quickly check the speed gains when using cached resets. For the example below with 256 envs, state observation mode
+cached resets took on average about 0.004s while normal resets took 0.007s on a RTX 3080. With the rgb observation mode the difference is more staggering, with cached resets taking on average about 0.005s while normal resets took 0.167s.
+
+```python
+from mani_skill.utils.wrappers import CachedResetWrapper
+import gymnasium as gym
+import time
+
+num_envs = 256
+obs_mode = "rgb"
+env = gym.make("StackCube-v1", obs_mode=obs_mode, num_envs=num_envs)
+env = CachedResetWrapper(env)
+
+trials = 100
+start_time = time.time()
+for i in range(trials):
+    env.reset()
+end_time = time.time()
+print(f"Average time per cached reset: {(end_time - start_time) / trials} seconds")
+
+env = gym.make("StackCube-v1", obs_mode=obs_mode, num_envs=num_envs)
+# env = CachedResetWrapper(env)
+
+trials = 100
+start_time = time.time()
+for i in range(trials):
+    env.reset()
+end_time = time.time()
+print(f"Average time per reset: {(end_time - start_time) / trials} seconds")
+```
diff --git a/docs/source/user_guide/wrappers/index.md b/docs/source/user_guide/wrappers/index.md
@@ -7,4 +7,5 @@
 record
 flatten
 action_repeat
+cached_reset
 ```
diff --git a/mani_skill/envs/sapien_env.py b/mani_skill/envs/sapien_env.py
@@ -37,7 +37,7 @@
     update_camera_configs_from_dict,
 )
 from mani_skill.sensors.depth_camera import StereoDepthCamera, StereoDepthCameraConfig
-from mani_skill.utils import common, gym_utils, sapien_utils
+from mani_skill.utils import common, gym_utils, sapien_utils, tree
 from mani_skill.utils.structs import Actor, Articulation
 from mani_skill.utils.structs.pose import Pose
 from mani_skill.utils.structs.types import Array, SimConfig
@@ -316,6 +316,8 @@ def __init__(
         self._elapsed_steps = (
             torch.zeros(self.num_envs, device=self.device, dtype=torch.int32)
         )
+        self._last_obs = None
+        """the last observation returned by the environment"""
         obs, _ = self.reset(seed=[2022 + i for i in range(self.num_envs)], options=dict(reconfigure=True))
 
         self._init_raw_obs = common.to_cpu_tensor(obs)
@@ -850,7 +852,11 @@ def reset(self, seed: Union[None, int, list[int]] = None, options: Union[None, d
         options["reconfigure"] is True, will call self._reconfigure() which deletes the entire physx scene and reconstructs everything.
         Users building custom tasks generally do not need to override this function.
 
-        Returns the first observation and a info dictionary. The info dictionary is of type
+        If options["reset_to_env_states"] is given, we expect there to be options["reset_to_env_states"]["env_states"] and optionally options["reset_to_env_states"]["obs"], both with 
+        batch size equal to the number of environments being reset. "env_states" can be a dictionary or flat tensor and we skip calling the environment's _initialize_episode function which
+        generates the initial state on a normal reset. If "obs" is given we skip calling the environment's get_obs function which can save some compute/time.
+
+        Returns the observations and an info dictionary. The info dictionary is of type
 
 
         .. highlight:: python
@@ -917,12 +923,22 @@ def reset(self, seed: Union[None, int, list[int]] = None, options: Union[None, d
         if self.agent is not None:
             self.agent.reset()
 
-        if seed is not None or self._enhanced_determinism:
-            with torch.random.fork_rng():
-                torch.manual_seed(self._episode_seed[0])
-                self._initialize_episode(env_idx, options)
+        # we either reset to given env states or use the environment's defined _initialize_episode function to generate the initial state
+        reset_to_env_states_obs = None
+        if "reset_to_env_states" in options:
+            env_states = options["reset_to_env_states"]["env_states"]
+            reset_to_env_states_obs = options["reset_to_env_states"].get("obs", None)
+            if isinstance(env_states, dict):            
+                self.set_state_dict(env_states, env_idx)
+            else:
+                self.set_state(env_states, env_idx)
         else:
-            self._initialize_episode(env_idx, options)
+            if seed is not None or self._enhanced_determinism:
+                with torch.random.fork_rng():
+                    torch.manual_seed(self._episode_seed[0])
+                    self._initialize_episode(env_idx, options)
+            else:
+                self._initialize_episode(env_idx, options)
         # reset the reset mask back to all ones so any internal code in maniskill can continue to manipulate all scenes at once as usual
         self.scene._reset_mask = torch.ones(
             self.num_envs, dtype=bool, device=self.device
@@ -942,9 +958,13 @@ def reset(self, seed: Union[None, int, list[int]] = None, options: Union[None, d
                 self.agent.controller.reset()
 
         info = self.get_info()
-        obs = self.get_obs(info)
-
+        if reset_to_env_states_obs is None:
+            obs = self.get_obs(info)
+        else:
+            obs = self._last_obs
+            tree.replace(obs, env_idx, common.to_tensor(reset_to_env_states_obs, device=self.device))
         info["reconfigure"] = reconfigure
+        self._last_obs = obs
         return obs, info
 
     def _set_main_rng(self, seed):
@@ -1031,7 +1051,7 @@ def step(self, action: Union[None, np.ndarray, torch.Tensor, Dict]):
                 terminated = info["fail"].clone()
             else:
                 terminated = torch.zeros(self.num_envs, dtype=bool, device=self.device)
-
+        self._last_obs = obs
         return (
             obs,
             reward,
diff --git a/mani_skill/utils/structs/articulation.py b/mani_skill/utils/structs/articulation.py
@@ -888,7 +888,7 @@ def set_joint_drive_targets(
             else:
                 gx, gy = self.get_joint_target_indices(joint_indices)
             self.px.cuda_articulation_target_qpos.torch()[
-                gx[self.scene._reset_mask], gy[self.scene._reset_mask]
+                gx[self.scene._reset_mask[self._scene_idxs]], gy[self.scene._reset_mask[self._scene_idxs]]
             ] = targets
         else:
             for i, joint in enumerate(joints):
@@ -911,7 +911,9 @@ def set_joint_drive_velocity_targets(
                 gx, gy = self.get_joint_target_indices(joints)
             else:
                 gx, gy = self.get_joint_target_indices(joint_indices)
-            self.px.cuda_articulation_target_qvel.torch()[gx, gy] = targets
+            self.px.cuda_articulation_target_qvel.torch()[
+                gx[self.scene._reset_mask[self._scene_idxs]], gy[self.scene._reset_mask[self._scene_idxs]]
+            ] = targets
         else:
             for i, joint in enumerate(joints):
                 joint.set_drive_velocity_target(targets[0, i])
diff --git a/mani_skill/utils/tree.py b/mani_skill/utils/tree.py
@@ -0,0 +1,24 @@
+import torch
+
+
+# NOTE (stao): when tensordict is used we should replace all of this
+def slice(x, i):
+    if isinstance(x, dict):
+        return {k: slice(v, i) for k, v in x.items()}
+    else:
+        return x[i]
+
+
+def cat(x: list):
+    if isinstance(x[0], dict):
+        return {k: cat([d[k] for d in x]) for k in x[0].keys()}
+    else:
+        return torch.cat(x, dim=0)
+
+
+def replace(x, i, y):
+    if isinstance(x, dict):
+        for k, v in x.items():
+            replace(v, i, y[k])
+    else:
+        x[i] = y
diff --git a/mani_skill/utils/wrappers/__init__.py b/mani_skill/utils/wrappers/__init__.py
@@ -1,3 +1,5 @@
+from .action_repeat import ActionRepeatWrapper
+from .cached_reset import CachedResetWrapper
 from .flatten import (
     FlattenActionSpaceWrapper,
     FlattenObservationWrapper,
@@ -6,4 +8,3 @@
 from .frame_stack import FrameStack
 from .gymnasium import CPUGymWrapper
 from .record import RecordEpisode
-from .action_repeat import ActionRepeatWrapper
diff --git a/mani_skill/utils/wrappers/cached_reset.py b/mani_skill/utils/wrappers/cached_reset.py
@@ -0,0 +1,151 @@
+from dataclasses import asdict, dataclass
+from typing import List, Optional, Union
+
+import dacite
+import gymnasium as gym
+import torch
+
+from mani_skill.envs.sapien_env import BaseEnv
+from mani_skill.utils import common, tree
+from mani_skill.utils.structs.types import Device
+
+
+@dataclass
+class CachedResetsConfig:
+    num_resets: Optional[int] = None
+    """The number of reset states to cache. If none it will cache `num_envs` number of reset states."""
+    device: Optional[Device] = None
+    """The device to cache the reset states on. If none it will use the base environment's device."""
+    seed: Optional[int] = None
+    """The seed to use for generating the cached reset states."""
+
+    def dict(self):
+        return {k: v for k, v in asdict(self).items()}
+
+
+class CachedResetWrapper(gym.Wrapper):
+    """
+    Cached reset wrapper for ManiSkill3 environments. Caching resets allows you to skip slower parts of the reset function call and boost environment FPS as a result.
+
+    Args:
+        env: The environment to wrap.
+        reset_to_env_states: A dictionary with keys "env_states" and optionally "obs". "env_states" is a dictionary of environment states to reset to.
+            "obs" contains the corresponding observations generated at those env states. If reset_to_env_states is not provided, the wrapper will sample reset states
+            from the environment using the given seed.
+        config: A dictionary or a `CachedResetsConfig` object that contains the configuration for the cached resets.
+    """
+
+    def __init__(
+        self,
+        env: gym.Env,
+        reset_to_env_states: Optional[dict] = None,
+        config: Union[CachedResetsConfig, dict] = CachedResetsConfig(),
+    ):
+        super().__init__(env)
+        self.num_envs = self.base_env.num_envs
+        if isinstance(config, CachedResetsConfig):
+            config = config.dict()
+        self.cached_resets_config = dacite.from_dict(
+            data_class=CachedResetsConfig,
+            data=config,
+            config=dacite.Config(strict=True),
+        )
+        cached_data_device = self.cached_resets_config.device
+        if cached_data_device is None:
+            cached_data_device = self.base_env.device
+        self._num_cached_resets = 0
+        if reset_to_env_states is not None:
+            self._cached_resets_env_states = reset_to_env_states["env_states"]
+            self._cached_resets_obs_buffer = reset_to_env_states.get("obs", None)
+            self._num_cached_resets = len(self._cached_resets_env_states)
+        else:
+            if self.cached_resets_config.num_resets is None:
+                self.cached_resets_config.num_resets = 16384
+            self._cached_resets_env_states = []
+            self._cached_resets_obs_buffer = []
+            while self._num_cached_resets < self.cached_resets_config.num_resets:
+                obs, _ = self.env.reset(
+                    seed=self.cached_resets_config.seed,
+                    options=dict(
+                        env_idx=torch.arange(
+                            0,
+                            min(
+                                self.cached_resets_config.num_resets
+                                - self._num_cached_resets,
+                                self.num_envs,
+                            ),
+                            device=self.base_env.device,
+                        )
+                    ),
+                )
+                state = self.env.get_wrapper_attr("get_state_dict")()
+                if (
+                    self.cached_resets_config.num_resets - self._num_cached_resets
+                    < self.num_envs
+                ):
+                    obs = tree.slice(
+                        obs,
+                        slice(
+                            0,
+                            self.cached_resets_config.num_resets
+                            - self._num_cached_resets,
+                        ),
+                    )
+                    state = tree.slice(
+                        state,
+                        slice(
+                            0,
+                            self.cached_resets_config.num_resets
+                            - self._num_cached_resets,
+                        ),
+                    )
+                self._cached_resets_obs_buffer.append(
+                    common.to_tensor(obs, device=self.cached_resets_config.device)
+                )
+                self._cached_resets_env_states.append(
+                    common.to_tensor(state, device=self.cached_resets_config.device)
+                )
+                self._num_cached_resets += self.num_envs
+            self._cached_resets_env_states = tree.cat(self._cached_resets_env_states)
+            self._cached_resets_obs_buffer = tree.cat(self._cached_resets_obs_buffer)
+
+        self._cached_resets_env_states = common.to_tensor(
+            self._cached_resets_env_states, device=cached_data_device
+        )
+        if self._cached_resets_obs_buffer is not None:
+            self._cached_resets_obs_buffer = common.to_tensor(
+                self._cached_resets_obs_buffer, device=cached_data_device
+            )
+
+    @property
+    def base_env(self) -> BaseEnv:
+        return self.env.unwrapped
+
+    def reset(
+        self,
+        *args,
+        seed: Optional[Union[int, List[int]]] = None,
+        options: Optional[dict] = None,
+        **kwargs
+    ):
+        env_idx = None
+        if options is None:
+            options = dict()
+        if "env_idx" in options:
+            env_idx = options["env_idx"]
+        if self._cached_resets_env_states is not None:
+            sampled_ids = torch.randint(
+                0,
+                self._num_cached_resets,
+                size=(len(env_idx) if env_idx is not None else self.num_envs,),
+                device=self.base_env.device,
+            )
+            options["reset_to_env_states"] = dict(
+                env_states=tree.slice(self._cached_resets_env_states, sampled_ids),
+            )
+            if self._cached_resets_obs_buffer is not None:
+                options["reset_to_env_states"]["obs"] = tree.slice(
+                    self._cached_resets_obs_buffer, sampled_ids
+                )
+        obs, info = self.env.reset(seed=seed, options=options)
+        return obs, info
diff --git a/mani_skill/utils/wrappers/record.py b/mani_skill/utils/wrappers/record.py
diff --git a/mani_skill/vector/wrappers/gymnasium.py b/mani_skill/vector/wrappers/gymnasium.py