haosulab · chenyenru · Nov 5, 2024 · Nov 5, 2024 · Nov 5, 2024 · Nov 5, 2024
diff --git a/docs/source/tasks/table_top_gripper/index.md b/docs/source/tasks/table_top_gripper/index.md
@@ -279,6 +279,33 @@ A simple task where the objective is to grasp a red cube and move it to a target
 </video>
 </div>
 
+## StackPyramid-v1
+![dense-reward][reward-badge]
+:::{dropdown} Task Card
+:icon: note
+:color: primary
+
+**Task Description:**
+The goal is to pick up a red cube, place it next to the green cube, and stack the blue cube on top of the red and green cube without it falling off.
+
+**Supported Robots: Panda**
+
+**Randomizations:**
+- both cubes have their z-axis rotation randomized
+- both cubes have their xy positions on top of the table scene randomized. The positions are sampled such that the cubes do not collide with each other
+
+**Success Conditions:**
+- the blue cube is static
+- the blue cube is on top of both the red and green cube (to within half of the cube size)
+- the blue cube is static
+- none of the red, green, blue cubes are grasped by the robot (robot must let go of the cubes)
+:::
+
+<video preload="auto" controls="True" width="100%">
+<source src="https://github.com/haosulab/ManiSkill/raw/main/figures/environment_demos/StackCube-v1_rt.mp4" type="video/mp4">
+</video>
+
+
 ## PickSingleYCB-v1
 
 ![dense-reward][dense-reward-badge]
@@ -313,7 +340,25 @@ Pick up a random object sampled from the [YCB dataset](https://www.ycbbenchmarks
 </video>
 </div>
 
+<<<<<<< Updated upstream
 ## PlaceSphere-v1
+=======
+## PickAndPlace-v1
+
+:::{dropdown} Task Card
+:icon: note
+:color: primary
+
+
+
+
+<video preload="auto" controls="True" width="100%">
+<source src="https://github.com/haosulab/ManiSkill/raw/main/figures/environment_demos/PickAndPlace-v1_rt.mp4" type="video/mp4">
+</video>
+
+## PegInsertionSide-v1
+![dense-reward][reward-badge]
+>>>>>>> Stashed changes
 
 ![dense-reward][dense-reward-badge]
 ![sparse-reward][sparse-reward-badge]

diff --git a/examples/baselines/diffusion_policy/examples.sh b/examples/baselines/diffusion_policy/examples.sh
@@ -0,0 +1,87 @@
+### Example scripts for training Diffusion Policy that have some results ###
+
+# Learning from motion planning generated demonstrations
+
+# PushCube-v1
+python -m mani_skill.trajectory.replay_trajectory \
+  --traj-path ~/.maniskill/demos/PushCube-v1/motionplanning/trajectory.h5 \
+  --use-first-env-state -c pd_ee_delta_pos -o state \
+  --save-traj --num-procs 10 -b cpu
+
+python train.py --env-id PushCube-v1 \
+  --demo-path ~/.maniskill/demos/PushCube-v1/motionplanning/trajectory.state.pd_ee_delta_pos.cpu.h5 \
+  --control-mode "pd_ee_delta_pos" --sim-backend "cpu" --num-demos 100 --max_episode_steps 100 \
+  --total_iters 30000 
+
+# PickCube-v1
+python -m mani_skill.trajectory.replay_trajectory \
+  --traj-path ~/.maniskill/demos/PickCube-v1/motionplanning/trajectory.h5 \
+  --use-first-env-state -c pd_ee_delta_pos -o state \
+  --save-traj --num-procs 10 -b cpu
+
+python train.py --env-id PickCube-v1 \
+  --demo-path ~/.maniskill/demos/PickCube-v1/motionplanning/trajectory.state.pd_ee_delta_pos.cpu.h5 \
+  --control-mode "pd_ee_delta_pos" --sim-backend "cpu" --num-demos 100 --max_episode_steps 100 \
+  --total_iters 30000 
+
+# StackCube-v1
+python -m mani_skill.trajectory.replay_trajectory \
+  --traj-path ~/.maniskill/demos/StackCube-v1/motionplanning/trajectory.h5 \
+  --use-first-env-state -c pd_ee_delta_pos -o state \
+  --save-traj --num-procs 10 -b cpu
+
+python train.py --env-id StackCube-v1 \
+  --demo-path ~/.maniskill/demos/StackCube-v1/motionplanning/trajectory.state.pd_ee_delta_pos.cpu.h5 \
+  --control-mode "pd_ee_delta_pos" --sim-backend "cpu" --num-demos 100 --max_episode_steps 200 \
+  --total_iters 30000 
+
+# PegInsertionSide-v1
+python -m mani_skill.trajectory.replay_trajectory \
+  --traj-path ~/.maniskill/demos/PegInsertionSide-v1/motionplanning/trajectory.h5 \
+  --use-first-env-state -c pd_ee_delta_pose -o state \
+  --save-traj --num-procs 10 -b cpu
+
+python train.py --env-id PegInsertionSide-v1 \
+  --demo-path ~/.maniskill/demos/PegInsertionSide-v1/motionplanning/trajectory.state.pd_ee_delta_pose.cpu.h5 \
+  --control-mode "pd_ee_delta_pose" --sim-backend "cpu" --num-demos 100 --max_episode_steps 300 \
+  --total_iters 300000
+
+# StackPyramid-v1
+stack_pyramid_traj_name="stack_pyramid_trajectory"
+python -m mani_skill.examples.motionplanning.panda.run -e "StackPyramid-v1" \
+  --num-procs 20 -n 100 --reward-mode="sparse" \
+  --sim-backend cpu --only-count-success \
+  --traj-name $stack_pyramid_traj_name
+
+python -m mani_skill.trajectory.replay_trajectory  -b cpu \
+  --traj-path  ./demos/StackPyramid-v1/motionplanning/$stack_pyramid_traj_name.h5 \
+  --use-first-env-state -c pd_joint_delta_pos -o state \
+  --save-traj --num-procs 20
+
+python train.py --env-id StackPyramid-v1 \
+  --demo-path  ./demos/StackPyramid-v1/motionplanning/$stack_pyramid_traj_name.state.pd_joint_delta_pos.cpu.h5 \
+  --control-mode "pd_joint_delta_pos" --sim-backend "cpu" --num-demos 100 \
+  --max_episode_steps 300 --total_iters 30000 --num-eval-envs 20
+
+# PickAndPlace-v1
+pick_and_place_traj_name="pick_and_place_trajectory"
+python -m mani_skill.examples.motionplanning.panda.run -e "PickAndPlace-v1" \
+  --num-procs 20 -n 100 --reward-mode="sparse" \
+  --sim-backend cpu --only-count-success \
+  --traj-name $pick_and_place_traj_name
+
+python -m mani_skill.trajectory.replay_trajectory  -b cpu \
+  --traj-path  ./demos/PickAndPlace-v1/motionplanning/$pick_and_place_traj_name.h5 \
+  --use-first-env-state -c pd_joint_delta_pos -o state \
+  --save-traj --num-procs 20
+
+python train.py --env-id PickAndPlace-v1 \
+  --demo-path  ./demos/PickAndPlace-v1/motionplanning/$pick_and_place_traj_name.state.pd_joint_delta_pos.cpu.h5 \
+  --control-mode "pd_joint_delta_pos" --sim-backend "cpu" --num-demos 100 \
+  --max_episode_steps 300 --total_iters 30000 --num-eval-envs 20
+
+
+python train.py --env-id PickAndPlace-v1 \
+  --demo-path ../../../demos/PickAndPlace-v1/motionplanning/20241209_162523.state.pd_joint_delta_pos.cpu.h5 \
+  --control-mode "pd_joint_delta_pos" --sim-backend "cpu" --num-demos 100 --max_episode_steps 800 \
+  --total_iters 30000 --num-eval-envs 20
diff --git a/figures/environment_demos/PickAndPlace-v1_rt.mp4 b/figures/environment_demos/PickAndPlace-v1_rt.mp4
diff --git a/figures/environment_demos/StackPyramid-v1_rt.mp4 b/figures/environment_demos/StackPyramid-v1_rt.mp4
diff --git a/mani_skill/agents/robots/panda/panda.py b/mani_skill/agents/robots/panda/panda.py
@@ -16,7 +16,7 @@
 @register_agent()
 class Panda(BaseAgent):
     uid = "panda"
-    urdf_path = f"{PACKAGE_ASSET_DIR}/robots/panda/panda_v2.urdf"
+    urdf_path = f"{PACKAGE_ASSET_DIR}/robots/panda/panda_v3.urdf"
     urdf_config = dict(
         _materials=dict(
             gripper=dict(static_friction=2.0, dynamic_friction=2.0, restitution=0.0)

diff --git a/mani_skill/envs/tasks/tabletop/__init__.py b/mani_skill/envs/tasks/tabletop/__init__.py
@@ -15,4 +15,6 @@
 from .place_sphere import PlaceSphereEnv
 from .roll_ball import RollBallEnv
 from .push_t import PushTEnv
-from .pull_cube_tool import PullCubeToolEnv
+from .pull_cube_tool import PullCubeToolEnv
+from .stack_pyramid import StackPyramidEnv
+from .pick_and_place import PickAndPlaceEnv
diff --git a/mani_skill/envs/tasks/tabletop/pick_and_place.py b/mani_skill/envs/tasks/tabletop/pick_and_place.py
@@ -0,0 +1,212 @@
+from typing import Any, Dict, Union
+
+import numpy as np
+import sapien
+import torch
+
+from mani_skill import logger
+import mani_skill.envs.utils.randomization as randomization
+from mani_skill.agents.robots.panda.panda_wristcam import PandaWristCam
+from mani_skill.envs.sapien_env import BaseEnv
+from mani_skill.sensors.camera import CameraConfig
+from mani_skill.utils import sapien_utils
+from mani_skill.utils.building import actors
+from mani_skill.utils.registration import register_env
+from mani_skill.utils.scene_builder.table import TableSceneBuilder
+from mani_skill.utils.structs.actor import Actor
+from mani_skill.utils.structs.pose import Pose
+from mani_skill.utils.structs.types import SimConfig
+from mani_skill.utils.building.actors.common import build_container_grid
+
+
+@register_env("PickAndPlace-v1", max_episode_steps=500)
+class PickAndPlaceEnv(BaseEnv):
+    """
+    **Task Description:**
+    - Pick and place the four cubes into distinct container cells.
+    - The red cube must be placed on the upper left cell.
+    - The green cube must be placed on the upper right cell.
+    - The blue cube must be placed on the lower left cell.
+    - The yellow cube must be placed on the lower right cell.
+
+    **Randomizations:**
+    - The four cubes' positions are randomized in the xy range of x in [0.05, 0.09] and y in [-0.15, -0.1].
+    - The z-coordinate of the cubes is fixed at 0.04 to place them on the table surface.
+    - The cubes' orientations are randomized.
+    - Even though randomized, the cubes are ensured to have a minimum Euclidean distance of 0.06 from each other to avoid overlaps.
+
+    **Success Conditions:**
+    - Each cube is placed into its corresponding cell.
+    - Robot is static within the threshold of 0.2
+
+    **Goal Specification:**
+    - 3D goal position (also visualized in human renders)
+
+    _sample_video_link = "https://github.com/haosulab/ManiSkill/raw/main/figures/environment_demos/PickAndPlace-v1_rt.mp4"
+    """
+    SUPPORTED_ROBOTS = ["panda_wristcam"]
+    SUPPORTED_REWARD_MODES = ["none", "sparse"]
+    agent: Union[PandaWristCam]
+    cube_half_size = 0.02
+    goal_thresh = 0.05
+
+    def __init__(self, *args, robot_uids="panda", robot_init_qpos_noise=0.02, **kwargs):
+        self.robot_init_qpos_noise = robot_init_qpos_noise
+        super().__init__(*args, robot_uids=robot_uids, **kwargs)
+
+    @property
+    def _default_sensor_configs(self):
+        pose = sapien_utils.look_at(eye=[0.3, 0, 0.6], target=[-0.1, 0, 0.1])
+        return [CameraConfig("base_camera", pose, 128, 128, np.pi / 2, 0.01, 100)]
+
+    @property
+    def _default_human_render_camera_configs(self):
+        pose = sapien_utils.look_at([0.6, 0.7, 0.6], [0.0, 0.0, 0.35])
+        return CameraConfig("render_camera", pose, 512, 512, 1, 0.01, 100)
+
+    def _load_agent(self, options: dict):
+        super()._load_agent(options, sapien.Pose(p=[-0.615, 0, 0]))
+
+    def _load_scene(self, options: dict):
+        self.table_scene = TableSceneBuilder(
+            self, robot_init_qpos_noise=self.robot_init_qpos_noise
+        )
+        self.table_scene.build()
+        self.container_grid, self.goal_sites = build_container_grid(
+            self.scene,
+            initial_pose=sapien.Pose(p=[0.0, 0.2, 0.04], q=[1, 0, 0, 0]),
+            size=0.25,
+            height=0.05,
+            thickness=0.01,
+            color=(0.8, 0.6, 0.4),
+            name="container_grid",
+            n=2,
+            m=2,
+            body_type="kinematic",
+        )
+        self.red_cube = actors.build_cube(
+            self.scene,
+            half_size=self.cube_half_size,
+            color=[1, 0, 0, 1],
+            name="red_cube",
+            initial_pose=sapien.Pose(p=[0,0,0.1])
+        )
+
+        self.green_cube = actors.build_cube(
+            self.scene,
+            half_size=self.cube_half_size,
+            color=[0, 1, 0, 1],
+            name="green_cube",
+            initial_pose=sapien.Pose(p=[0,0,0.5])
+        )
+
+        self.blue_cube = actors.build_cube(
+            self.scene,
+            half_size=self.cube_half_size,
+            color=[0, 0, 1, 1],
+            name="blue_cube",
+            initial_pose=sapien.Pose(p=[0,0,1.0])
+        )
+
+        self.yellow_cube = actors.build_cube(
+            self.scene,
+            half_size=self.cube_half_size,
+            color=[1, 1, 0, 1],
+            name="yellow_cube",
+            initial_pose=sapien.Pose(p=[0,0,1.5])
+        )
+
+        self.cubes = [self.red_cube, self.green_cube, self.blue_cube, self.yellow_cube]
+        self._hidden_objects.extend(self.goal_sites)
+
+    def _initialize_episode(self, env_idx: torch.Tensor, options: dict):
+        with torch.device(self.device):
+            env_count = len(env_idx)
+            self.table_scene.initialize(env_idx)
+
+            # Initialize container grid
+            container_pose = sapien.Pose(p=[0.0, 0.2, 0.04], q=[1, 0, 0, 0])
+            self.container_grid.set_pose(container_pose)
+
+            region = [[0.05, -0.15], [0.09, -0.1]]
+            radius = torch.linalg.norm(torch.tensor([0.02, 0.02])) + 0.001
+            min_distance = 0.06
+
+            # Randomize cube positions
+            for i, cube in enumerate(self.cubes):
+                while True:
+                    xyz = torch.zeros((env_count, 3))
+                    sampler = randomization.UniformPlacementSampler(
+                        bounds=region, batch_size=env_count, device=self.device
+                    )                
+                    cube_xy = torch.rand((env_count, 2)) * 0.4 - 0.4 + sampler.sample(radius, 100)
+                    xyz[:, :2] = cube_xy
+                    xyz[:, 2] = 0.04
+                    qs = randomization.random_quaternions(
+                        env_count,
+                        lock_x=True,
+                        lock_y=True,
+                        lock_z=False,
+                    )
+                    cube.set_pose(Pose.create_from_pq(p=xyz, q=qs))
+
+                    overlap = False
+                    for j in range(i):
+                        other_cube = self.cubes[j]
+                        other_xyz = other_cube.pose.p
+                        distance = torch.linalg.norm(xyz - other_xyz, axis=1)
+                        if torch.any(distance < min_distance):
+                            overlap = True
+                            break
+
+                    if not overlap:
+                        break
+
+    def _get_obs_extra(self, info: Dict):
+        # in reality some people hack is_grasped into observations by checking if the gripper can close fully or not
+        obs = {"tcp_pose": self.agent.tcp.pose.raw_pose}
+        for goal_site in self.goal_sites:
+            obs[f"{goal_site.name}_pose"] = goal_site.pose.p
+
+        for i, obj in enumerate(self.cubes):
+            obs[f"{obj.name}_pose"] = obj.pose.raw_pose
+            if "state" in self.obs_mode:
+                pass
+                obs[f"{obj.name}_to_goal_pos"] = self.goal_sites[i].pose.p - obj.pose.p
+                obs[f"tcp_to_{obj.name}_pos"] = obj.pose.p - self.agent.tcp.pose.p
+
+        return obs
+
+
+    def evaluate(self):
+        results = dict()
+        all_placed = torch.tensor(True, device=self.device)
+        any_grasped = torch.tensor(False, device=self.device)
+        # only count success after all objects grasped and placed
+
+        for i, goal_site in enumerate(self.goal_sites):
+            obj = self.cubes[i]
+            obj_name = obj.name
+
+            distance_to_goal = torch.linalg.norm(goal_site.pose.p - obj.pose.p, axis=1)
+            is_placed =  (
+                distance_to_goal
+                <= self.goal_thresh
+            )
+            is_grasped = self.agent.is_grasping(obj)
+
+            results[f"{obj_name}_distance_to_goal"] = distance_to_goal
+            results[f"is_{obj_name}_placed"] = is_placed
+            results[f"is_{obj_name}_grasped"] = is_grasped
+
+            all_placed = torch.logical_and(all_placed, is_placed)
+            any_grasped = torch.logical_or(any_grasped, is_grasped)
+
+
+        # Success is defined as all cubes being placed and none being grasped
+        results["success"] = torch.logical_and(all_placed, torch.logical_not(any_grasped))
+
+        # Reward for the robot being static
+        results["is_robot_static"] = self.agent.is_static(0.2)
+
+        return results