Merge pull request #17 from Andrew-Luo1:main

copybara-github · copybara-github · commit 24c16ee4fed7 · 2025-01-17T11:40:32.000-08:00
PiperOrigin-RevId: 716742418
Change-Id: I2a3c32a5098f1645687af489320b834a1297ad4d
diff --git a/learning/notebooks/training_vision_2.ipynb b/learning/notebooks/training_vision_2.ipynb
diff --git a/mujoco_playground/_src/manipulation/franka_emika_panda/pick_cartesian.py b/mujoco_playground/_src/manipulation/franka_emika_panda/pick_cartesian.py
@@ -25,6 +25,7 @@
 from mujoco import mjx
 import numpy as np
 
+from mujoco_playground._src import collision
 from mujoco_playground._src import mjx_env
 from mujoco_playground._src.manipulation.franka_emika_panda import panda
 from mujoco_playground._src.manipulation.franka_emika_panda import panda_kinematics
@@ -49,7 +50,7 @@ def default_config():
       episode_length=200,
       action_repeat=1,
       # Size of cartesian increment.
-      action_scale=0.01,
+      action_scale=0.005,
       reward_config=config_dict.create(
           reward_scales=config_dict.create(
               # Gripper goes to the box.
@@ -58,6 +59,8 @@ def default_config():
               box_target=8.0,
               # Do not collide the gripper with the floor.
               no_floor_collision=0.25,
+              # Do not collide cube with gripper
+              no_box_collision=0.05,
               # Destabilizes training in cartesian action space.
               robot_target_qpos=0.0,
           ),
@@ -69,6 +72,9 @@ def default_config():
       vision=False,
       vision_config=default_vision_config(),
       obs_noise=config_dict.create(brightness=[1.0, 1.0]),
+      box_init_range=0.05,
+      success_threshold=0.05,
+      action_history_length=1,
   )
   return config
 
@@ -112,6 +118,7 @@ def __init__(
 
     # Set gripper in sight of camera
     self._post_init(obj_name='box', keyframe='low_home')
+    self._box_geom = self._mj_model.geom('box').id
 
     if self._vision:
       try:
@@ -168,9 +175,10 @@ def reset(self, rng: jax.Array) -> mjx_env.State:
 
     # intialize box position
     rng, rng_box = jax.random.split(rng)
+    r_range = self._config.box_init_range
     box_pos = jp.array([
         x_plane,
-        jax.random.uniform(rng_box, (), minval=-0.05, maxval=0.05),
+        jax.random.uniform(rng_box, (), minval=-r_range, maxval=r_range),
         0.0,
     ])
 
@@ -218,6 +226,9 @@ def reset(self, rng: jax.Array) -> mjx_env.State:
         'newly_reset': jp.array(False, dtype=bool),
         'prev_action': jp.zeros(3),
         '_steps': jp.array(0, dtype=int),
+        'action_history': jp.zeros((
+            self._config.action_history_length,
+        )),  # Gripper only
     }
 
     reward, done = jp.zeros(2)
@@ -245,6 +256,17 @@ def reset(self, rng: jax.Array) -> mjx_env.State:
 
   def step(self, state: mjx_env.State, action: jax.Array) -> mjx_env.State:
     """Runs one timestep of the environment's dynamics."""
+    action_history = (
+        jp.roll(state.info['action_history'], 1).at[0].set(action[2])
+    )
+    state.info['action_history'] = action_history
+    # Add action delay
+    state.info['rng'], key = jax.random.split(state.info['rng'])
+    action_idx = jax.random.randint(
+        key, (), minval=0, maxval=self._config.action_history_length
+    )
+    action = action.at[2].set(state.info['action_history'][action_idx])
+
     state.info['newly_reset'] = state.info['_steps'] == 0
 
     newly_reset = state.info['newly_reset']
@@ -275,9 +297,7 @@ def step(self, state: mjx_env.State, action: jax.Array) -> mjx_env.State:
 
     # Cartesian control
     increment = jp.zeros(4)
-    increment = increment.at[1:].set(
-        action[:]
-    )  # set y, z and gripper commands.
+    increment = increment.at[1:].set(action)  # set y, z and gripper commands.
     ctrl, new_tip_position, no_soln = self._move_tip(
         state.info['current_pos'],
         self._start_tip_transform[:3, :3],
@@ -297,6 +317,10 @@ def step(self, state: mjx_env.State, action: jax.Array) -> mjx_env.State:
         for k, v in raw_rewards.items()
     }
 
+    # Penalize collision with box.
+    hand_box = collision.geoms_colliding(data, self._box_geom, self._hand_geom)
+    raw_rewards['no_box_collision'] = jp.where(hand_box, 0.0, 1.0)
+
     total_reward = jp.clip(sum(rewards.values()), -1e4, 1e4)
 
     if not self._vision:
@@ -362,7 +386,11 @@ def step(self, state: mjx_env.State, action: jax.Array) -> mjx_env.State:
   def _get_success(self, data: mjx.Data, info: dict[str, Any]) -> jax.Array:
     box_pos = data.xpos[self._obj_body]
     target_pos = info['target_pos']
-    return jp.linalg.norm(box_pos - target_pos) < 0.05
+    if (
+        self._vision
+    ):  # Randomized camera positions cannot see location along y line.
+      box_pos, target_pos = box_pos[2], target_pos[2]
+    return jp.linalg.norm(box_pos - target_pos) < self._config.success_threshold
 
   def _move_tip(
       self,
diff --git a/mujoco_playground/_src/manipulation/franka_emika_panda/randomize_vision.py b/mujoco_playground/_src/manipulation/franka_emika_panda/randomize_vision.py
@@ -21,102 +21,167 @@
 from mujoco.mjx._src import math
 import numpy as np
 
-FLOOR_GEOM_ID = 0
-BOX_GEOM_ID = 81
+from mujoco_playground._src.manipulation.franka_emika_panda import pick_cartesian
+
+
+def sample_light_position():
+  position = np.zeros(3)
+  while np.linalg.norm(position) < 1.0:
+    position = np.random.uniform([1.5, -0.2, 0.8], [3, 0.2, 1.5])
+  return position
+
+
+def perturb_orientation(
+    key: jax.Array, original: jax.Array, deg: float
+) -> jax.Array:
+  """Perturbs a 3D or 4D orientation by up to deg."""
+  key_axis, key_theta, key = jax.random.split(key, 3)
+  perturb_axis = jax.random.uniform(key_axis, (3,), minval=-1, maxval=1)
+  # Only perturb upwards in the y axis.
+  key_y, key = jax.random.split(key, 2)
+  perturb_axis = perturb_axis.at[1].set(
+      jax.random.uniform(key_y, (), minval=0, maxval=1)
+  )
+  perturb_axis = perturb_axis / jp.linalg.norm(perturb_axis)
+  perturb_theta = jax.random.uniform(
+      key_theta, shape=(1,), minval=0, maxval=np.deg2rad(deg)
+  )
+  rot_offset = math.axis_angle_to_quat(perturb_axis, perturb_theta)
+  if original.shape == (4,):
+    return math.quat_mul(rot_offset, original)
+  elif original.shape == (3,):
+    return math.rotate(original, rot_offset)
+  else:
+    raise ValueError('Invalid input shape:', original.shape)
 
 
 def domain_randomize(
     mjx_model: mjx.Model, num_worlds: int
 ) -> Tuple[mjx.Model, mjx.Model]:
   """Tile the necessary axes for the Madrona BatchRenderer."""
+  mj_model = pick_cartesian.PandaPickCubeCartesian().mj_model
+  FLOOR_GEOM_ID = mj_model.geom('floor').id
+  BOX_GEOM_ID = mj_model.geom('box').id
+  STRIP_GEOM_ID = mj_model.geom('init_space').id
+
   in_axes = jax.tree_util.tree_map(lambda x: None, mjx_model)
   in_axes = in_axes.tree_replace({
       'geom_rgba': 0,
       'geom_matid': 0,
-      'geom_size': 0,
-      'geom_friction': 0,
       'cam_pos': 0,
       'cam_quat': 0,
       'light_pos': 0,
       'light_dir': 0,
       'light_directional': 0,
       'light_castshadow': 0,
-      'light_cutoff': 0,
   })
   rng = jax.random.key(0)
 
+  # Simpler logic implementing via Numpy.
+  np.random.seed(0)
+  light_positions = [sample_light_position() for _ in range(num_worlds)]
+  light_positions = jp.array(light_positions)
+
   @jax.vmap
-  def rand(rng):
+  def rand(rng: jax.Array, light_position: jax.Array):
+    """Generate randomized model fields."""
     _, key = jax.random.split(rng, 2)
-    # friction
-    friction = jax.random.uniform(key, (1,), minval=0.6, maxval=1.4)
-    friction = mjx_model.geom_friction.at[:, 0].set(friction)
-    key_r, key_g, key_b, key = jax.random.split(key, 4)
-    rgba = jp.array([
-        jax.random.uniform(key_r, (), minval=0.5, maxval=1.0),
-        jax.random.uniform(key_g, (), minval=0.0, maxval=0.5),
-        jax.random.uniform(key_b, (), minval=0.0, maxval=0.5),
-        1.0,
-    ])
+
+    #### Apearance ####
+    # Sample a random color for the box
+    key_box, key_strip, key_floor, key = jax.random.split(key, 4)
+    rgba = jp.array(
+        [jax.random.uniform(key_box, (), minval=0.5, maxval=1.0), 0.0, 0.0, 1.0]
+    )
     geom_rgba = mjx_model.geom_rgba.at[BOX_GEOM_ID].set(rgba)
 
+    strip_white = jax.random.uniform(key_strip, (), minval=0.8, maxval=1.0)
+    geom_rgba = mjx_model.geom_rgba.at[STRIP_GEOM_ID].set(
+        jp.array([strip_white, strip_white, strip_white, 1.0])
+    )
+
     # Sample a shade of gray
-    key_gs, key = jax.random.split(key)
-    gray_scale = jax.random.uniform(key_gs, (), minval=0.0, maxval=0.8)
+    gray_scale = jax.random.uniform(key_floor, (), minval=0.0, maxval=0.25)
     geom_rgba = geom_rgba.at[FLOOR_GEOM_ID].set(
         jp.array([gray_scale, gray_scale, gray_scale, 1.0])
     )
 
-    # Set unrandomized and randomized matID's to -1 and -2.
-    geom_matid = jp.ones_like(mjx_model.geom_matid) * -1
-    geom_matid = geom_matid.at[BOX_GEOM_ID].set(-2)
+    mat_offset, num_geoms = 5, geom_rgba.shape[0]
+    key_matid, key = jax.random.split(key)
+    geom_matid = (
+        jax.random.randint(key_matid, shape=(num_geoms,), minval=0, maxval=10)
+        + mat_offset
+    )
+    geom_matid = geom_matid.at[BOX_GEOM_ID].set(
+        -2
+    )  # Use the above randomized colors
     geom_matid = geom_matid.at[FLOOR_GEOM_ID].set(-2)
+    geom_matid = geom_matid.at[STRIP_GEOM_ID].set(-2)
 
-    key_pos, key = jax.random.split(key)
+    #### Cameras ####
+    key_pos, key_ori, key = jax.random.split(key, 3)
     cam_offset = jax.random.uniform(key_pos, (3,), minval=-0.05, maxval=0.05)
+    assert (
+        len(mjx_model.cam_pos) == 1
+    ), f'Expected single camera, got {len(mjx_model.cam_pos)}'
     cam_pos = mjx_model.cam_pos.at[0].set(mjx_model.cam_pos[0] + cam_offset)
+    cam_quat = mjx_model.cam_quat.at[0].set(
+        perturb_orientation(key_ori, mjx_model.cam_quat[0], 10)
+    )
+
+    #### Lighting ####
+    nlight = mjx_model.light_pos.shape[0]
+    assert (
+        nlight == 1
+    ), f'Sim2Real was trained with a single light source, got {nlight}'
+    key_lsha, key_ldir, key_ldct, key = jax.random.split(key, 4)
 
-    key_axis, key_theta, key = jax.random.split(key, 3)
-    perturb_axis = jax.random.uniform(key_axis, (3,), minval=-1, maxval=1)
-    perturb_axis = perturb_axis / jp.linalg.norm(perturb_axis)
-    perturb_theta = jax.random.uniform(
-        key_theta, shape=(1,), maxval=np.deg2rad(10)
+    # Direction
+    shine_at = jp.array([0.661, -0.001, 0.179])  # Gripper starting position
+    nom_dir = (shine_at - light_position) / jp.linalg.norm(
+        shine_at - light_position
     )
-    camera_rot_offset = math.axis_angle_to_quat(perturb_axis, perturb_theta)
-    cam_quat = mjx_model.cam_quat.at[0].set(
-        math.quat_mul(camera_rot_offset, mjx_model.cam_quat[0])
+    light_dir = mjx_model.light_dir.at[0].set(
+        perturb_orientation(key_ldir, nom_dir, 20)
     )
 
-    return friction, geom_rgba, geom_matid, cam_pos, cam_quat
+    # Whether to cast shadows
+    light_castshadow = jax.random.bernoulli(
+        key_lsha, 0.75, shape=(nlight,)
+    ).astype(jp.float32)
 
-  friction, geom_rgba, geom_matid, cam_pos, cam_quat = rand(
-      jax.random.split(rng, num_worlds)
-  )
+    # No need to randomize into specular lighting
+    light_directional = jp.ones((nlight,))
+
+    return (
+        geom_rgba,
+        geom_matid,
+        cam_pos,
+        cam_quat,
+        light_dir,
+        light_directional,
+        light_castshadow,
+    )
+
+  (
+      geom_rgba,
+      geom_matid,
+      cam_pos,
+      cam_quat,
+      light_dir,
+      light_directional,
+      light_castshadow,
+  ) = rand(jax.random.split(rng, num_worlds), light_positions)
 
   mjx_model = mjx_model.tree_replace({
       'geom_rgba': geom_rgba,
       'geom_matid': geom_matid,
-      'geom_size': jp.repeat(
-          jp.expand_dims(mjx_model.geom_size, 0), num_worlds, axis=0
-      ),
-      'geom_friction': friction,
       'cam_pos': cam_pos,
       'cam_quat': cam_quat,
-      'light_pos': jp.repeat(
-          jp.expand_dims(mjx_model.light_pos, 0), num_worlds, axis=0
-      ),
-      'light_dir': jp.repeat(
-          jp.expand_dims(mjx_model.light_dir, 0), num_worlds, axis=0
-      ),
-      'light_directional': jp.repeat(
-          jp.expand_dims(mjx_model.light_directional, 0), num_worlds, axis=0
-      ),
-      'light_castshadow': jp.repeat(
-          jp.expand_dims(mjx_model.light_castshadow, 0), num_worlds, axis=0
-      ),
-      'light_cutoff': jp.repeat(
-          jp.expand_dims(mjx_model.light_cutoff, 0), num_worlds, axis=0
-      ),
+      'light_pos': light_positions,
+      'light_dir': light_dir,
+      'light_directional': light_directional,
+      'light_castshadow': light_castshadow,
   })
 
   return mjx_model, in_axes
diff --git a/mujoco_playground/_src/manipulation/franka_emika_panda/xmls/mjx_cabinet.xml b/mujoco_playground/_src/manipulation/franka_emika_panda/xmls/mjx_cabinet.xml
@@ -1,4 +1,4 @@
-<mujoco model="panda scene">
+<mujoco model="panda cabinet">
   <include file="mjx_scene.xml"/>
 
   <worldbody>
diff --git a/mujoco_playground/_src/manipulation/franka_emika_panda/xmls/mjx_single_cube.xml b/mujoco_playground/_src/manipulation/franka_emika_panda/xmls/mjx_single_cube.xml
@@ -1,4 +1,4 @@
-<mujoco model="panda scene">
+<mujoco model="panda single cube">
   <include file="mjx_scene.xml"/>
 
   <worldbody>
diff --git a/mujoco_playground/_src/manipulation/franka_emika_panda/xmls/mjx_single_cube_camera.xml b/mujoco_playground/_src/manipulation/franka_emika_panda/xmls/mjx_single_cube_camera.xml

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-<mujoco model="panda scene">`
	`1`	`+<mujoco model="panda cabinet">`
`2`	`2`	`<include file="mjx_scene.xml"/>`
`3`	`3`
`4`	`4`	`<worldbody>`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-<mujoco model="panda scene">`
	`1`	`+<mujoco model="panda single cube">`
`2`	`2`	`<include file="mjx_scene.xml"/>`
`3`	`3`
`4`	`4`	`<worldbody>`