|
| 1 | +from dataclasses import asdict, dataclass |
| 2 | +from typing import List, Optional, Union |
| 3 | + |
| 4 | +import dacite |
| 5 | +import gymnasium as gym |
| 6 | +import torch |
| 7 | + |
| 8 | +from mani_skill.envs.sapien_env import BaseEnv |
| 9 | +from mani_skill.utils import common, tree |
| 10 | +from mani_skill.utils.structs.types import Device |
| 11 | + |
| 12 | + |
| 13 | +@dataclass |
| 14 | +class CachedResetsConfig: |
| 15 | + num_resets: Optional[int] = None |
| 16 | + """The number of reset states to cache. If none it will cache `num_envs` number of reset states.""" |
| 17 | + device: Optional[Device] = None |
| 18 | + """The device to cache the reset states on. If none it will use the base environment's device.""" |
| 19 | + seed: Optional[int] = None |
| 20 | + """The seed to use for generating the cached reset states.""" |
| 21 | + |
| 22 | + def dict(self): |
| 23 | + return {k: v for k, v in asdict(self).items()} |
| 24 | + |
| 25 | + |
| 26 | +class CachedResetWrapper(gym.Wrapper): |
| 27 | + """ |
| 28 | + Cached reset wrapper for ManiSkill3 environments. Caching resets allows you to skip slower parts of the reset function call and boost environment FPS as a result. |
| 29 | +
|
| 30 | + Args: |
| 31 | + env: The environment to wrap. |
| 32 | + reset_to_env_states: A dictionary with keys "env_states" and optionally "obs". "env_states" is a dictionary of environment states to reset to. |
| 33 | + "obs" contains the corresponding observations generated at those env states. If reset_to_env_states is not provided, the wrapper will sample reset states |
| 34 | + from the environment using the given seed. |
| 35 | + config: A dictionary or a `CachedResetsConfig` object that contains the configuration for the cached resets. |
| 36 | + """ |
| 37 | + |
| 38 | + def __init__( |
| 39 | + self, |
| 40 | + env: gym.Env, |
| 41 | + reset_to_env_states: Optional[dict] = None, |
| 42 | + config: Union[CachedResetsConfig, dict] = CachedResetsConfig(), |
| 43 | + ): |
| 44 | + super().__init__(env) |
| 45 | + self.num_envs = self.base_env.num_envs |
| 46 | + if isinstance(config, CachedResetsConfig): |
| 47 | + config = config.dict() |
| 48 | + self.cached_resets_config = dacite.from_dict( |
| 49 | + data_class=CachedResetsConfig, |
| 50 | + data=config, |
| 51 | + config=dacite.Config(strict=True), |
| 52 | + ) |
| 53 | + cached_data_device = self.cached_resets_config.device |
| 54 | + if cached_data_device is None: |
| 55 | + cached_data_device = self.base_env.device |
| 56 | + self._num_cached_resets = 0 |
| 57 | + if reset_to_env_states is not None: |
| 58 | + self._cached_resets_env_states = reset_to_env_states["env_states"] |
| 59 | + self._cached_resets_obs_buffer = reset_to_env_states.get("obs", None) |
| 60 | + self._num_cached_resets = len(self._cached_resets_env_states) |
| 61 | + else: |
| 62 | + if self.cached_resets_config.num_resets is None: |
| 63 | + self.cached_resets_config.num_resets = 16384 |
| 64 | + self._cached_resets_env_states = [] |
| 65 | + self._cached_resets_obs_buffer = [] |
| 66 | + while self._num_cached_resets < self.cached_resets_config.num_resets: |
| 67 | + obs, _ = self.env.reset( |
| 68 | + seed=self.cached_resets_config.seed, |
| 69 | + options=dict( |
| 70 | + env_idx=torch.arange( |
| 71 | + 0, |
| 72 | + min( |
| 73 | + self.cached_resets_config.num_resets |
| 74 | + - self._num_cached_resets, |
| 75 | + self.num_envs, |
| 76 | + ), |
| 77 | + device=self.base_env.device, |
| 78 | + ) |
| 79 | + ), |
| 80 | + ) |
| 81 | + state = self.env.get_wrapper_attr("get_state_dict")() |
| 82 | + if ( |
| 83 | + self.cached_resets_config.num_resets - self._num_cached_resets |
| 84 | + < self.num_envs |
| 85 | + ): |
| 86 | + obs = tree.slice( |
| 87 | + obs, |
| 88 | + slice( |
| 89 | + 0, |
| 90 | + self.cached_resets_config.num_resets |
| 91 | + - self._num_cached_resets, |
| 92 | + ), |
| 93 | + ) |
| 94 | + state = tree.slice( |
| 95 | + state, |
| 96 | + slice( |
| 97 | + 0, |
| 98 | + self.cached_resets_config.num_resets |
| 99 | + - self._num_cached_resets, |
| 100 | + ), |
| 101 | + ) |
| 102 | + self._cached_resets_obs_buffer.append( |
| 103 | + common.to_tensor(obs, device=self.cached_resets_config.device) |
| 104 | + ) |
| 105 | + self._cached_resets_env_states.append( |
| 106 | + common.to_tensor(state, device=self.cached_resets_config.device) |
| 107 | + ) |
| 108 | + self._num_cached_resets += self.num_envs |
| 109 | + self._cached_resets_env_states = tree.cat(self._cached_resets_env_states) |
| 110 | + self._cached_resets_obs_buffer = tree.cat(self._cached_resets_obs_buffer) |
| 111 | + |
| 112 | + self._cached_resets_env_states = common.to_tensor( |
| 113 | + self._cached_resets_env_states, device=cached_data_device |
| 114 | + ) |
| 115 | + if self._cached_resets_obs_buffer is not None: |
| 116 | + self._cached_resets_obs_buffer = common.to_tensor( |
| 117 | + self._cached_resets_obs_buffer, device=cached_data_device |
| 118 | + ) |
| 119 | + |
| 120 | + @property |
| 121 | + def base_env(self) -> BaseEnv: |
| 122 | + return self.env.unwrapped |
| 123 | + |
| 124 | + def reset( |
| 125 | + self, |
| 126 | + *args, |
| 127 | + seed: Optional[Union[int, List[int]]] = None, |
| 128 | + options: Optional[dict] = None, |
| 129 | + **kwargs |
| 130 | + ): |
| 131 | + env_idx = None |
| 132 | + if options is None: |
| 133 | + options = dict() |
| 134 | + if "env_idx" in options: |
| 135 | + env_idx = options["env_idx"] |
| 136 | + if self._cached_resets_env_states is not None: |
| 137 | + sampled_ids = torch.randint( |
| 138 | + 0, |
| 139 | + self._num_cached_resets, |
| 140 | + size=(len(env_idx) if env_idx is not None else self.num_envs,), |
| 141 | + device=self.base_env.device, |
| 142 | + ) |
| 143 | + options["reset_to_env_states"] = dict( |
| 144 | + env_states=tree.slice(self._cached_resets_env_states, sampled_ids), |
| 145 | + ) |
| 146 | + if self._cached_resets_obs_buffer is not None: |
| 147 | + options["reset_to_env_states"]["obs"] = tree.slice( |
| 148 | + self._cached_resets_obs_buffer, sampled_ids |
| 149 | + ) |
| 150 | + obs, info = self.env.reset(seed=seed, options=options) |
| 151 | + return obs, info |
0 commit comments