From 0a8a15e6077998b9774712ad4cb44b16359d9724 Mon Sep 17 00:00:00 2001
From: DorsaRoh <dorsa.rohani@gmail.com>
Date: Mon, 28 Oct 2024 20:24:11 -0400
Subject: [PATCH 1/6] lift & place

---
 .../manipulation/lift/lift_env_cfg.py         | 16 ++++-
 .../state_machine/lift_cube_sm.py             | 61 +++++++++++++++++--
 2 files changed, 69 insertions(+), 8 deletions(-)

diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/manipulation/lift/lift_env_cfg.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/manipulation/lift/lift_env_cfg.py
index 6a3a078cb5..af98db2da9 100644
--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/manipulation/lift/lift_env_cfg.py
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/manipulation/lift/lift_env_cfg.py
@@ -6,7 +6,7 @@
 from dataclasses import MISSING
 
 import omni.isaac.lab.sim as sim_utils
-from omni.isaac.lab.assets import ArticulationCfg, AssetBaseCfg, DeformableObjectCfg, RigidObjectCfg
+from omni.isaac.lab.assets import ArticulationCfg, AssetBaseCfg, RigidObjectCfg
 from omni.isaac.lab.envs import ManagerBasedRLEnvCfg
 from omni.isaac.lab.managers import CurriculumTermCfg as CurrTerm
 from omni.isaac.lab.managers import EventTermCfg as EventTerm
@@ -40,7 +40,7 @@ class ObjectTableSceneCfg(InteractiveSceneCfg):
     # end-effector sensor: will be populated by agent env cfg
     ee_frame: FrameTransformerCfg = MISSING
     # target object: will be populated by agent env cfg
-    object: RigidObjectCfg | DeformableObjectCfg = MISSING
+    object: RigidObjectCfg = MISSING
 
     # Table
     table = AssetBaseCfg(
@@ -82,6 +82,16 @@ class CommandsCfg:
         ),
     )
 
+    place_pose = mdp.UniformPoseCommandCfg(
+        asset_name="robot",
+        body_name=MISSING,  # will be set by agent env cfg
+        resampling_time_range=(5.0, 5.0),
+        debug_vis=True,
+        ranges=mdp.UniformPoseCommandCfg.Ranges(
+            pos_x=(0.4, 0.6), pos_y=(-0.25, 0.25), pos_z=(0.0, 0.0), roll=(0.0, 0.0), pitch=(0.0, 0.0), yaw=(0.0, 0.0)
+        ),
+    )
+
 
 @configclass
 class ActionsCfg:
@@ -219,4 +229,4 @@ def __post_init__(self):
         self.sim.physx.bounce_threshold_velocity = 0.01
         self.sim.physx.gpu_found_lost_aggregate_pairs_capacity = 1024 * 1024 * 4
         self.sim.physx.gpu_total_aggregate_pairs_capacity = 16 * 1024
-        self.sim.physx.friction_correlation_distance = 0.00625
+        self.sim.physx.friction_correlation_distance = 0.00625
\ No newline at end of file
diff --git a/source/standalone/environments/state_machine/lift_cube_sm.py b/source/standalone/environments/state_machine/lift_cube_sm.py
index bd14dcaf0d..5be6ee1bbf 100644
--- a/source/standalone/environments/state_machine/lift_cube_sm.py
+++ b/source/standalone/environments/state_machine/lift_cube_sm.py
@@ -69,6 +69,9 @@ class PickSmState:
     APPROACH_OBJECT = wp.constant(2)
     GRASP_OBJECT = wp.constant(3)
     LIFT_OBJECT = wp.constant(4)
+    APPROACH_PLACE_POSITION = wp.constant(5)
+    APPROACH_DROP = wp.constant(6)
+    PLACE_OBJECT = wp.constant(7)
 
 
 class PickSmWaitTime:
@@ -79,6 +82,10 @@ class PickSmWaitTime:
     APPROACH_OBJECT = wp.constant(0.6)
     GRASP_OBJECT = wp.constant(0.3)
     LIFT_OBJECT = wp.constant(1.0)
+    APPROACH_PLACE_POSITION = wp.constant(1.0) 
+    APPROACH_DROP = wp.constant(0.6)
+    PLACE_OBJECT = wp.constant(0.5)
+
 
 
 @wp.kernel
@@ -89,6 +96,8 @@ def infer_state_machine(
     ee_pose: wp.array(dtype=wp.transform),
     object_pose: wp.array(dtype=wp.transform),
     des_object_pose: wp.array(dtype=wp.transform),
+    place_pose: wp.array(dtype=wp.transform),
+    des_place_pose: wp.array(dtype=wp.transform),
     des_ee_pose: wp.array(dtype=wp.transform),
     gripper_state: wp.array(dtype=float),
     offset: wp.array(dtype=wp.transform),
@@ -139,8 +148,41 @@ def infer_state_machine(
         # wait for a while
         if sm_wait_time[tid] >= PickSmWaitTime.LIFT_OBJECT:
             # move to next state and reset wait time
-            sm_state[tid] = PickSmState.LIFT_OBJECT
+            sm_state[tid] = PickSmState.APPROACH_PLACE_POSITION
+            sm_wait_time[tid] = 0.0
+
+    elif state == PickSmState.APPROACH_PLACE_POSITION:
+        des_ee_pose[tid] = wp.transform_multiply(offset[tid], object_pose[tid])
+        gripper_state[tid] = GripperState.CLOSE
+        # TODO: error between current and desired ee pose below threshold
+        # wait for a while
+        if sm_wait_time[tid] >= PickSmWaitTime.APPROACH_PLACE_POSITION:
+            # move to next state and reset wait time
+            sm_state[tid] = PickSmState.APPROACH_DROP
             sm_wait_time[tid] = 0.0
+    elif state == PickSmState.APPROACH_DROP:
+        des_ee_pose[tid] = object_pose[tid]
+        gripper_state[tid] = GripperState.CLOSE
+        # TODO: error between current and desired ee pose below threshold
+        # wait for a while
+        if sm_wait_time[tid] >= PickSmWaitTime.APPROACH_DROP:
+            # move to next state and reset wait time
+            sm_state[tid] = PickSmState.PLACE_OBJECT
+            sm_wait_time[tid] = 0.0
+
+    # place object 
+    elif state == PickSmState.PLACE_OBJECT:
+        des_ee_pose[tid] = des_place_pose[tid]
+        gripper_state[tid] = GripperState.CLOSE
+        # TODO: error between current and desired ee pose below threshold
+        # wait for a while
+        if sm_wait_time[tid] >= PickSmWaitTime.PLACE_OBJECT:
+            # move to next state and reset wait time
+            gripper_state[tid] = GripperState.OPEN
+            sm_state[tid] = PickSmState.PLACE_OBJECT # or PickSmState.REST
+            sm_wait_time[tid] = 0.0
+
+
     # increment wait time
     sm_wait_time[tid] = sm_wait_time[tid] + dt[tid]
 
@@ -183,7 +225,7 @@ def __init__(self, dt: float, num_envs: int, device: torch.device | str = "cpu")
 
         # approach above object offset
         self.offset = torch.zeros((self.num_envs, 7), device=self.device)
-        self.offset[:, 2] = 0.1
+        self.offset[:, 2] = 0.05
         self.offset[:, -1] = 1.0  # warp expects quaternion as (x, y, z, w)
 
         # convert to warp
@@ -201,17 +243,21 @@ def reset_idx(self, env_ids: Sequence[int] = None):
         self.sm_state[env_ids] = 0
         self.sm_wait_time[env_ids] = 0.0
 
-    def compute(self, ee_pose: torch.Tensor, object_pose: torch.Tensor, des_object_pose: torch.Tensor):
+    def compute(self, ee_pose: torch.Tensor, object_pose: torch.Tensor, des_object_pose: torch.Tensor, place_pose: torch.Tensor, des_place_pose: torch.Tensor):
         """Compute the desired state of the robot's end-effector and the gripper."""
         # convert all transformations from (w, x, y, z) to (x, y, z, w)
         ee_pose = ee_pose[:, [0, 1, 2, 4, 5, 6, 3]]
         object_pose = object_pose[:, [0, 1, 2, 4, 5, 6, 3]]
         des_object_pose = des_object_pose[:, [0, 1, 2, 4, 5, 6, 3]]
+        place_pose = place_pose[:, [0, 1, 2, 4, 5, 6, 3]]
+        des_place_pose = des_place_pose[:, [0, 1, 2, 4, 5, 6, 3]]
 
         # convert to warp
         ee_pose_wp = wp.from_torch(ee_pose.contiguous(), wp.transform)
         object_pose_wp = wp.from_torch(object_pose.contiguous(), wp.transform)
         des_object_pose_wp = wp.from_torch(des_object_pose.contiguous(), wp.transform)
+        place_pose_wp = wp.from_torch(place_pose.contiguous(), wp.transform)
+        des_place_pose_wp = wp.from_torch(des_place_pose.contiguous(), wp.transform)
 
         # run state machine
         wp.launch(
@@ -224,6 +270,8 @@ def compute(self, ee_pose: torch.Tensor, object_pose: torch.Tensor, des_object_p
                 ee_pose_wp,
                 object_pose_wp,
                 des_object_pose_wp,
+                place_pose_wp,
+                des_place_pose_wp,
                 self.des_ee_pose_wp,
                 self.des_gripper_state_wp,
                 self.offset_wp,
@@ -252,7 +300,7 @@ def main():
 
     # create action buffers (position + quaternion)
     actions = torch.zeros(env.unwrapped.action_space.shape, device=env.unwrapped.device)
-    actions[:, 3] = 1.0
+    actions[:, 4] = 1.0
     # desired object orientation (we only do position control of object)
     desired_orientation = torch.zeros((env.unwrapped.num_envs, 4), device=env.unwrapped.device)
     desired_orientation[:, 1] = 1.0
@@ -275,12 +323,15 @@ def main():
             object_position = object_data.root_pos_w - env.unwrapped.scene.env_origins
             # -- target object frame
             desired_position = env.unwrapped.command_manager.get_command("object_pose")[..., :3]
+            desired_place_position = env.unwrapped.command_manager.get_command("place_pose")[..., :3]
 
             # advance state machine
             actions = pick_sm.compute(
                 torch.cat([tcp_rest_position, tcp_rest_orientation], dim=-1),
                 torch.cat([object_position, desired_orientation], dim=-1),
                 torch.cat([desired_position, desired_orientation], dim=-1),
+                torch.cat([tcp_rest_position, desired_orientation], dim=-1),
+                torch.cat([desired_place_position, desired_orientation], dim=-1),
             )
 
             # reset state machine
@@ -295,4 +346,4 @@ def main():
     # run the main function
     main()
     # close sim app
-    simulation_app.close()
+    simulation_app.close()
\ No newline at end of file

From c296a68d8ea800dbcf50f17a2a3515c68ecc0a93 Mon Sep 17 00:00:00 2001
From: DorsaRoh <dorsa.rohani@gmail.com>
Date: Tue, 29 Oct 2024 14:09:49 -0400
Subject: [PATCH 2/6] Working: Placement

---
 .../manipulation/lift/lift_env_cfg.py         | 86 +++++++++++++++----
 .../manipulation/lift/mdp/rewards.py          | 54 +++++++++++-
 .../state_machine/lift_cube_sm.py             | 43 +++++-----
 3 files changed, 141 insertions(+), 42 deletions(-)

diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/manipulation/lift/lift_env_cfg.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/manipulation/lift/lift_env_cfg.py
index af98db2da9..0e2ffc6f1d 100644
--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/manipulation/lift/lift_env_cfg.py
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/manipulation/lift/lift_env_cfg.py
@@ -5,8 +5,10 @@
 
 from dataclasses import MISSING
 
+import torch
 import omni.isaac.lab.sim as sim_utils
 from omni.isaac.lab.assets import ArticulationCfg, AssetBaseCfg, RigidObjectCfg
+from omni.isaac.lab.envs import ManagerBasedRLEnv
 from omni.isaac.lab.envs import ManagerBasedRLEnvCfg
 from omni.isaac.lab.managers import CurriculumTermCfg as CurrTerm
 from omni.isaac.lab.managers import EventTermCfg as EventTerm
@@ -74,7 +76,7 @@ class CommandsCfg:
 
     object_pose = mdp.UniformPoseCommandCfg(
         asset_name="robot",
-        body_name=MISSING,  # will be set by agent env cfg
+        body_name="panda_hand",
         resampling_time_range=(5.0, 5.0),
         debug_vis=True,
         ranges=mdp.UniformPoseCommandCfg.Ranges(
@@ -84,11 +86,11 @@ class CommandsCfg:
 
     place_pose = mdp.UniformPoseCommandCfg(
         asset_name="robot",
-        body_name=MISSING,  # will be set by agent env cfg
+        body_name="panda_hand",
         resampling_time_range=(5.0, 5.0),
         debug_vis=True,
         ranges=mdp.UniformPoseCommandCfg.Ranges(
-            pos_x=(0.4, 0.6), pos_y=(-0.25, 0.25), pos_z=(0.0, 0.0), roll=(0.0, 0.0), pitch=(0.0, 0.0), yaw=(0.0, 0.0)
+            pos_x=(0.4, 0.6), pos_y=(-0.25, 0.25), pos_z=(0.05, 0.05), roll=(0.0, 0.0), pitch=(0.0, 0.0), yaw=(0.0, 0.0)
         ),
     )
 
@@ -114,6 +116,7 @@ class PolicyCfg(ObsGroup):
         joint_vel = ObsTerm(func=mdp.joint_vel_rel)
         object_position = ObsTerm(func=mdp.object_position_in_robot_root_frame)
         target_object_position = ObsTerm(func=mdp.generated_commands, params={"command_name": "object_pose"})
+        target_place_position = ObsTerm(func=mdp.generated_commands, params={"command_name": "place_pose"})
         actions = ObsTerm(func=mdp.last_action)
 
         def __post_init__(self):
@@ -140,34 +143,83 @@ class EventCfg:
         },
     )
 
+def object_is_placed(
+    env: ManagerBasedRLEnv, 
+    distance_threshold: float,
+    height_threshold: float,
+    object_cfg: SceneEntityCfg = SceneEntityCfg("object")
+) -> torch.Tensor:
+    """Reward the agent for placing the object at target position."""
+    object: RigidObject = env.scene[object_cfg.name]
+    place_command = env.command_manager.get_command("place_pose")
+    
+    # Get positions
+    object_pos = object.data.root_pos_w
+    target_pos = place_command[:, :3]
+    
+    # Check xy-distance to target
+    xy_distance = torch.norm(object_pos[:, :2] - target_pos[:, :2], dim=1)
+    
+    # Check height difference
+    height_diff = torch.abs(object_pos[:, 2] - target_pos[:, 2])
+    
+    # Return 1.0 if within thresholds, 0.0 otherwise
+    return torch.where(
+        (xy_distance < distance_threshold) & (height_diff < height_threshold),
+        1.0,
+        0.0
+    )
+
 
 @configclass
 class RewardsCfg:
     """Reward terms for the MDP."""
 
-    reaching_object = RewTerm(func=mdp.object_ee_distance, params={"std": 0.1}, weight=1.0)
+    # Reward for reaching object
+    reaching_object = RewTerm(
+        func=mdp.object_ee_distance, 
+        params={"std": 0.1}, 
+        weight=1.0
+    )
 
-    lifting_object = RewTerm(func=mdp.object_is_lifted, params={"minimal_height": 0.04}, weight=15.0)
+    # Reward for lifting object
+    lifting_object = RewTerm(
+        func=mdp.object_is_lifted,
+        params={"minimal_height": 0.04, "maximal_height": 0.5},
+        weight=15.0
+    )
 
+    # Reward for moving object to lifting position
     object_goal_tracking = RewTerm(
         func=mdp.object_goal_distance,
-        params={"std": 0.3, "minimal_height": 0.04, "command_name": "object_pose"},
-        weight=16.0,
+        params={
+            "std": 0.3,
+            "minimal_height": 0.04,
+            "maximal_height": 0.5,
+            "command_name": "object_pose"
+        },
+        weight=16.0
     )
 
-    object_goal_tracking_fine_grained = RewTerm(
+    # Reward for moving object to placing position
+    placing_tracking = RewTerm(
         func=mdp.object_goal_distance,
-        params={"std": 0.05, "minimal_height": 0.04, "command_name": "object_pose"},
-        weight=5.0,
+        params={
+            "std": 0.1,
+            "minimal_height": 0.04,
+            "maximal_height": 0.5,
+            "command_name": "place_pose"
+        },
+        weight=18.0
     )
 
-    # action penalty
-    action_rate = RewTerm(func=mdp.action_rate_l2, weight=-1e-4)
-
-    joint_vel = RewTerm(
-        func=mdp.joint_vel_l2,
-        weight=-1e-4,
-        params={"asset_cfg": SceneEntityCfg("robot")},
+    object_placed = RewTerm(
+        func=object_is_placed,  # Reference the function directly
+        params={
+            "distance_threshold": 0.02,
+            "height_threshold": 0.01,
+        },
+        weight=20.0
     )
 
 
diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/manipulation/lift/mdp/rewards.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/manipulation/lift/mdp/rewards.py
index 334df9ea50..cf0b94be66 100644
--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/manipulation/lift/mdp/rewards.py
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/manipulation/lift/mdp/rewards.py
@@ -18,11 +18,49 @@
 
 
 def object_is_lifted(
-    env: ManagerBasedRLEnv, minimal_height: float, object_cfg: SceneEntityCfg = SceneEntityCfg("object")
+    env: ManagerBasedRLEnv, minimal_height: float, maximal_height: float, object_cfg: SceneEntityCfg = SceneEntityCfg("object")
 ) -> torch.Tensor:
     """Reward the agent for lifting the object above the minimal height."""
     object: RigidObject = env.scene[object_cfg.name]
-    return torch.where(object.data.root_pos_w[:, 2] > minimal_height, 1.0, 0.0)
+    return torch.where((object.data.root_pos_w[:, 2] > minimal_height) & (object.data.root_pos_w[:, 2] < maximal_height), 1.0, 0.0)
+
+def object_is_placed(
+    env: ManagerBasedRLEnv, 
+    distance_threshold: float,
+    height_threshold: float,
+    object_cfg: SceneEntityCfg = SceneEntityCfg("object")
+) -> torch.Tensor:
+    """Reward the agent for placing the object at the target position.
+    
+    Args:
+        env: Environment instance
+        distance_threshold: Maximum allowed xy-distance from target
+        height_threshold: Maximum allowed height difference from target
+        object_cfg: Object configuration
+        
+    Returns:
+        Binary reward tensor (1.0 for successful placement, 0.0 otherwise)
+    """
+    # Get object and target information
+    object: RigidObject = env.scene[object_cfg.name]
+    place_command = env.command_manager.get_command("place_pose")
+    
+    # Get positions
+    object_pos = object.data.root_pos_w
+    target_pos = place_command[:, :3]
+    
+    # Check xy-distance to target
+    xy_distance = torch.norm(object_pos[:, :2] - target_pos[:, :2], dim=1)
+    
+    # Check height difference
+    height_diff = torch.abs(object_pos[:, 2] - target_pos[:, 2])
+    
+    # Return 1.0 if within thresholds, 0.0 otherwise
+    return torch.where(
+        (xy_distance < distance_threshold) & (height_diff < height_threshold),
+        1.0,
+        0.0
+    )
 
 
 def object_ee_distance(
@@ -49,6 +87,7 @@ def object_goal_distance(
     env: ManagerBasedRLEnv,
     std: float,
     minimal_height: float,
+    maximal_height: float,
     command_name: str,
     robot_cfg: SceneEntityCfg = SceneEntityCfg("robot"),
     object_cfg: SceneEntityCfg = SceneEntityCfg("object"),
@@ -64,4 +103,13 @@ def object_goal_distance(
     # distance of the end-effector to the object: (num_envs,)
     distance = torch.norm(des_pos_w - object.data.root_pos_w[:, :3], dim=1)
     # rewarded if the object is lifted above the threshold
-    return (object.data.root_pos_w[:, 2] > minimal_height) * (1 - torch.tanh(distance / std))
+    
+    if command_name == "object_pose":
+        # for lifting - reward when above minimal height
+        return ((object.data.root_pos_w[:, 2] > minimal_height) & (object.data.root_pos_w[:, 2] < maximal_height)) * (1 - torch.tanh(distance / std))
+    elif command_name == "place_pose":
+        # for placing - reward getting close to place position
+        target_height = des_pos_w[:, 2]  # Get height from command
+        height_diff = torch.abs(object.data.root_pos_w[:, 2] - target_height)
+        return 1 - torch.tanh((distance + height_diff) / std)
+
diff --git a/source/standalone/environments/state_machine/lift_cube_sm.py b/source/standalone/environments/state_machine/lift_cube_sm.py
index 5be6ee1bbf..b4d55e9a21 100644
--- a/source/standalone/environments/state_machine/lift_cube_sm.py
+++ b/source/standalone/environments/state_machine/lift_cube_sm.py
@@ -70,8 +70,8 @@ class PickSmState:
     GRASP_OBJECT = wp.constant(3)
     LIFT_OBJECT = wp.constant(4)
     APPROACH_PLACE_POSITION = wp.constant(5)
-    APPROACH_DROP = wp.constant(6)
-    PLACE_OBJECT = wp.constant(7)
+    PLACE_OBJECT = wp.constant(6)
+    DROP_OBJECT = wp.constant(7)
 
 
 class PickSmWaitTime:
@@ -82,10 +82,9 @@ class PickSmWaitTime:
     APPROACH_OBJECT = wp.constant(0.6)
     GRASP_OBJECT = wp.constant(0.3)
     LIFT_OBJECT = wp.constant(1.0)
-    APPROACH_PLACE_POSITION = wp.constant(1.0) 
-    APPROACH_DROP = wp.constant(0.6)
-    PLACE_OBJECT = wp.constant(0.5)
-
+    APPROACH_PLACE_POSITION = wp.constant(0.5)
+    PLACE_OBJECT = wp.constant(0.6)
+    DROP_OBJECT = wp.constant(0.3)
 
 
 @wp.kernel
@@ -151,21 +150,12 @@ def infer_state_machine(
             sm_state[tid] = PickSmState.APPROACH_PLACE_POSITION
             sm_wait_time[tid] = 0.0
 
+    # approach place position
     elif state == PickSmState.APPROACH_PLACE_POSITION:
-        des_ee_pose[tid] = wp.transform_multiply(offset[tid], object_pose[tid])
+        des_ee_pose[tid] = wp.transform_multiply(offset[tid], place_pose[tid])
         gripper_state[tid] = GripperState.CLOSE
-        # TODO: error between current and desired ee pose below threshold
-        # wait for a while
+
         if sm_wait_time[tid] >= PickSmWaitTime.APPROACH_PLACE_POSITION:
-            # move to next state and reset wait time
-            sm_state[tid] = PickSmState.APPROACH_DROP
-            sm_wait_time[tid] = 0.0
-    elif state == PickSmState.APPROACH_DROP:
-        des_ee_pose[tid] = object_pose[tid]
-        gripper_state[tid] = GripperState.CLOSE
-        # TODO: error between current and desired ee pose below threshold
-        # wait for a while
-        if sm_wait_time[tid] >= PickSmWaitTime.APPROACH_DROP:
             # move to next state and reset wait time
             sm_state[tid] = PickSmState.PLACE_OBJECT
             sm_wait_time[tid] = 0.0
@@ -178,8 +168,17 @@ def infer_state_machine(
         # wait for a while
         if sm_wait_time[tid] >= PickSmWaitTime.PLACE_OBJECT:
             # move to next state and reset wait time
-            gripper_state[tid] = GripperState.OPEN
-            sm_state[tid] = PickSmState.PLACE_OBJECT # or PickSmState.REST
+            sm_state[tid] = PickSmState.DROP_OBJECT # or PickSmState.REST
+            sm_wait_time[tid] = 0.0
+
+    elif state == PickSmState.DROP_OBJECT:
+        des_ee_pose[tid] = des_place_pose[tid]
+        gripper_state[tid] = GripperState.OPEN
+        # TODO: error between current and desired ee pose below threshold
+        # wait for a while
+        if sm_wait_time[tid] >= PickSmWaitTime.DROP_OBJECT:
+            # move to next state and reset wait time
+            sm_state[tid] = PickSmState.DROP_OBJECT
             sm_wait_time[tid] = 0.0
 
 
@@ -225,7 +224,7 @@ def __init__(self, dt: float, num_envs: int, device: torch.device | str = "cpu")
 
         # approach above object offset
         self.offset = torch.zeros((self.num_envs, 7), device=self.device)
-        self.offset[:, 2] = 0.05
+        self.offset[:, 2] = 0.1
         self.offset[:, -1] = 1.0  # warp expects quaternion as (x, y, z, w)
 
         # convert to warp
@@ -330,7 +329,7 @@ def main():
                 torch.cat([tcp_rest_position, tcp_rest_orientation], dim=-1),
                 torch.cat([object_position, desired_orientation], dim=-1),
                 torch.cat([desired_position, desired_orientation], dim=-1),
-                torch.cat([tcp_rest_position, desired_orientation], dim=-1),
+                torch.cat([desired_place_position, desired_orientation], dim=-1),
                 torch.cat([desired_place_position, desired_orientation], dim=-1),
             )
 

From aea8e6936b3ce7f5e56cddd57d8d574901139b61 Mon Sep 17 00:00:00 2001
From: DorsaRoh <dorsa.rohani@gmail.com>
Date: Tue, 29 Oct 2024 14:20:05 -0400
Subject: [PATCH 3/6] rename files + move class

---
 ...{lift_env_cfg.py => lift_place_env_cfg.py} | 29 +----------
 .../manipulation/lift/mdp/rewards.py          | 19 ++------
 ...lift_cube_sm.py => lift_and_place_cube.py} | 48 +++++++++----------
 3 files changed, 27 insertions(+), 69 deletions(-)
 rename source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/manipulation/lift/{lift_env_cfg.py => lift_place_env_cfg.py} (89%)
 rename source/standalone/environments/state_machine/{lift_cube_sm.py => lift_and_place_cube.py} (92%)

diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/manipulation/lift/lift_env_cfg.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/manipulation/lift/lift_place_env_cfg.py
similarity index 89%
rename from source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/manipulation/lift/lift_env_cfg.py
rename to source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/manipulation/lift/lift_place_env_cfg.py
index 0e2ffc6f1d..15f4acf6bf 100644
--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/manipulation/lift/lift_env_cfg.py
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/manipulation/lift/lift_place_env_cfg.py
@@ -143,33 +143,6 @@ class EventCfg:
         },
     )
 
-def object_is_placed(
-    env: ManagerBasedRLEnv, 
-    distance_threshold: float,
-    height_threshold: float,
-    object_cfg: SceneEntityCfg = SceneEntityCfg("object")
-) -> torch.Tensor:
-    """Reward the agent for placing the object at target position."""
-    object: RigidObject = env.scene[object_cfg.name]
-    place_command = env.command_manager.get_command("place_pose")
-    
-    # Get positions
-    object_pos = object.data.root_pos_w
-    target_pos = place_command[:, :3]
-    
-    # Check xy-distance to target
-    xy_distance = torch.norm(object_pos[:, :2] - target_pos[:, :2], dim=1)
-    
-    # Check height difference
-    height_diff = torch.abs(object_pos[:, 2] - target_pos[:, 2])
-    
-    # Return 1.0 if within thresholds, 0.0 otherwise
-    return torch.where(
-        (xy_distance < distance_threshold) & (height_diff < height_threshold),
-        1.0,
-        0.0
-    )
-
 
 @configclass
 class RewardsCfg:
@@ -214,7 +187,7 @@ class RewardsCfg:
     )
 
     object_placed = RewTerm(
-        func=object_is_placed,  # Reference the function directly
+        func=mdp.object_is_placed,  # Reference the function directly
         params={
             "distance_threshold": 0.02,
             "height_threshold": 0.01,
diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/manipulation/lift/mdp/rewards.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/manipulation/lift/mdp/rewards.py
index cf0b94be66..d5af9225f6 100644
--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/manipulation/lift/mdp/rewards.py
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/manipulation/lift/mdp/rewards.py
@@ -30,32 +30,19 @@ def object_is_placed(
     height_threshold: float,
     object_cfg: SceneEntityCfg = SceneEntityCfg("object")
 ) -> torch.Tensor:
-    """Reward the agent for placing the object at the target position.
-    
-    Args:
-        env: Environment instance
-        distance_threshold: Maximum allowed xy-distance from target
-        height_threshold: Maximum allowed height difference from target
-        object_cfg: Object configuration
-        
-    Returns:
-        Binary reward tensor (1.0 for successful placement, 0.0 otherwise)
-    """
-    # Get object and target information
     object: RigidObject = env.scene[object_cfg.name]
     place_command = env.command_manager.get_command("place_pose")
     
-    # Get positions
     object_pos = object.data.root_pos_w
     target_pos = place_command[:, :3]
     
-    # Check xy-distance to target
+    # check xy-distance to target
     xy_distance = torch.norm(object_pos[:, :2] - target_pos[:, :2], dim=1)
     
-    # Check height difference
+    # check height difference
     height_diff = torch.abs(object_pos[:, 2] - target_pos[:, 2])
     
-    # Return 1.0 if within thresholds, 0.0 otherwise
+    # return 1.0 if within thresholds, 0.0 otherwise
     return torch.where(
         (xy_distance < distance_threshold) & (height_diff < height_threshold),
         1.0,
diff --git a/source/standalone/environments/state_machine/lift_cube_sm.py b/source/standalone/environments/state_machine/lift_and_place_cube.py
similarity index 92%
rename from source/standalone/environments/state_machine/lift_cube_sm.py
rename to source/standalone/environments/state_machine/lift_and_place_cube.py
index b4d55e9a21..0d4006c6b6 100644
--- a/source/standalone/environments/state_machine/lift_cube_sm.py
+++ b/source/standalone/environments/state_machine/lift_and_place_cube.py
@@ -4,14 +4,14 @@
 # SPDX-License-Identifier: BSD-3-Clause
 
 """
-Script to run an environment with a pick and lift state machine.
+Script to run an environment with a pick, lift and place state machine.
 
 The state machine is implemented in the kernel function `infer_state_machine`.
 It uses the `warp` library to run the state machine in parallel on the GPU.
 
 .. code-block:: bash
 
-    ./isaaclab.sh -p source/standalone/environments/state_machine/lift_cube_sm.py --num_envs 32
+    ./isaaclab.sh -p source/standalone/environments/state_machine/lift_and_place_cube.py --num_envs 32
 
 """
 
@@ -22,7 +22,7 @@
 from omni.isaac.lab.app import AppLauncher
 
 # add argparse arguments
-parser = argparse.ArgumentParser(description="Pick and lift state machine for lift environments.")
+parser = argparse.ArgumentParser(description="Pick, lift and place state machine for lift environments.")
 parser.add_argument(
     "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations."
 )
@@ -69,9 +69,9 @@ class PickSmState:
     APPROACH_OBJECT = wp.constant(2)
     GRASP_OBJECT = wp.constant(3)
     LIFT_OBJECT = wp.constant(4)
-    APPROACH_PLACE_POSITION = wp.constant(5)
-    PLACE_OBJECT = wp.constant(6)
-    DROP_OBJECT = wp.constant(7)
+    APPROACH_ABOVE_PLACE_POSITION = wp.constant(5)
+    APPROACH_PLACE_POSITION = wp.constant(6)
+    PLACE_OBJECT = wp.constant(7)
 
 
 class PickSmWaitTime:
@@ -82,9 +82,9 @@ class PickSmWaitTime:
     APPROACH_OBJECT = wp.constant(0.6)
     GRASP_OBJECT = wp.constant(0.3)
     LIFT_OBJECT = wp.constant(1.0)
-    APPROACH_PLACE_POSITION = wp.constant(0.5)
-    PLACE_OBJECT = wp.constant(0.6)
-    DROP_OBJECT = wp.constant(0.3)
+    APPROACH_ABOVE_PLACE_POSITION = wp.constant(0.5)
+    APPROACH_PLACE_POSITION = wp.constant(0.6)
+    PLACE_OBJECT = wp.constant(0.3)
 
 
 @wp.kernel
@@ -147,38 +147,33 @@ def infer_state_machine(
         # wait for a while
         if sm_wait_time[tid] >= PickSmWaitTime.LIFT_OBJECT:
             # move to next state and reset wait time
-            sm_state[tid] = PickSmState.APPROACH_PLACE_POSITION
+            sm_state[tid] = PickSmState.APPROACH_ABOVE_PLACE_POSITION
             sm_wait_time[tid] = 0.0
-
-    # approach place position
-    elif state == PickSmState.APPROACH_PLACE_POSITION:
+    elif state == PickSmState.APPROACH_ABOVE_PLACE_POSITION:
         des_ee_pose[tid] = wp.transform_multiply(offset[tid], place_pose[tid])
         gripper_state[tid] = GripperState.CLOSE
 
-        if sm_wait_time[tid] >= PickSmWaitTime.APPROACH_PLACE_POSITION:
+        if sm_wait_time[tid] >= PickSmWaitTime.APPROACH_ABOVE_PLACE_POSITION:
             # move to next state and reset wait time
-            sm_state[tid] = PickSmState.PLACE_OBJECT
+            sm_state[tid] = PickSmState.APPROACH_PLACE_POSITION
             sm_wait_time[tid] = 0.0
-
-    # place object 
-    elif state == PickSmState.PLACE_OBJECT:
+    elif state == PickSmState.APPROACH_PLACE_POSITION:
         des_ee_pose[tid] = des_place_pose[tid]
         gripper_state[tid] = GripperState.CLOSE
         # TODO: error between current and desired ee pose below threshold
         # wait for a while
-        if sm_wait_time[tid] >= PickSmWaitTime.PLACE_OBJECT:
+        if sm_wait_time[tid] >= PickSmWaitTime.APPROACH_PLACE_POSITION:
             # move to next state and reset wait time
-            sm_state[tid] = PickSmState.DROP_OBJECT # or PickSmState.REST
+            sm_state[tid] = PickSmState.PLACE_OBJECT # or PickSmState.REST
             sm_wait_time[tid] = 0.0
-
-    elif state == PickSmState.DROP_OBJECT:
+    elif state == PickSmState.PLACE_OBJECT:
         des_ee_pose[tid] = des_place_pose[tid]
         gripper_state[tid] = GripperState.OPEN
         # TODO: error between current and desired ee pose below threshold
         # wait for a while
-        if sm_wait_time[tid] >= PickSmWaitTime.DROP_OBJECT:
+        if sm_wait_time[tid] >= PickSmWaitTime.PLACE_OBJECT:
             # move to next state and reset wait time
-            sm_state[tid] = PickSmState.DROP_OBJECT
+            sm_state[tid] = PickSmState.PLACE_OBJECT
             sm_wait_time[tid] = 0.0
 
 
@@ -187,7 +182,7 @@ def infer_state_machine(
 
 
 class PickAndLiftSm:
-    """A simple state machine in a robot's task space to pick and lift an object.
+    """A simple state machine in a robot's task space to pick, lift and place an object.
 
     The state machine is implemented as a warp kernel. It takes in the current state of
     the robot's end-effector and the object, and outputs the desired state of the robot's
@@ -199,6 +194,9 @@ class PickAndLiftSm:
     3. APPROACH_OBJECT: The robot moves to the object.
     4. GRASP_OBJECT: The robot grasps the object.
     5. LIFT_OBJECT: The robot lifts the object to the desired pose. This is the final state.
+    6. APPROACH_ABOVE_PLACE_POSITION: The robot moves above the place position.
+    7. APPROACH_PLACE_POSITION: The robot moves to the place position.
+    8. PLACE_OBJECT: The robot drops the object.
     """
 
     def __init__(self, dt: float, num_envs: int, device: torch.device | str = "cpu"):

From 7fe38558577da7381108b4ee1eacb5d48d00d8b0 Mon Sep 17 00:00:00 2001
From: DorsaRoh <dorsa.rohani@gmail.com>
Date: Tue, 29 Oct 2024 14:47:23 -0400
Subject: [PATCH 4/6] keep original lift sim

---
 .../manipulation/lift/lift_env_cfg.py         | 222 +++++++++++++
 ...lace_cube.py => lift_and_place_cube_sm.py} |   2 +-
 .../state_machine/lift_cube_sm.py             | 298 ++++++++++++++++++
 3 files changed, 521 insertions(+), 1 deletion(-)
 create mode 100644 source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/manipulation/lift/lift_env_cfg.py
 rename source/standalone/environments/state_machine/{lift_and_place_cube.py => lift_and_place_cube_sm.py} (99%)
 create mode 100644 source/standalone/environments/state_machine/lift_cube_sm.py

diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/manipulation/lift/lift_env_cfg.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/manipulation/lift/lift_env_cfg.py
new file mode 100644
index 0000000000..925c956b15
--- /dev/null
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/manipulation/lift/lift_env_cfg.py
@@ -0,0 +1,222 @@
+# Copyright (c) 2022-2024, The Isaac Lab Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+from dataclasses import MISSING
+
+import omni.isaac.lab.sim as sim_utils
+from omni.isaac.lab.assets import ArticulationCfg, AssetBaseCfg, DeformableObjectCfg, RigidObjectCfg
+from omni.isaac.lab.envs import ManagerBasedRLEnvCfg
+from omni.isaac.lab.managers import CurriculumTermCfg as CurrTerm
+from omni.isaac.lab.managers import EventTermCfg as EventTerm
+from omni.isaac.lab.managers import ObservationGroupCfg as ObsGroup
+from omni.isaac.lab.managers import ObservationTermCfg as ObsTerm
+from omni.isaac.lab.managers import RewardTermCfg as RewTerm
+from omni.isaac.lab.managers import SceneEntityCfg
+from omni.isaac.lab.managers import TerminationTermCfg as DoneTerm
+from omni.isaac.lab.scene import InteractiveSceneCfg
+from omni.isaac.lab.sensors.frame_transformer.frame_transformer_cfg import FrameTransformerCfg
+from omni.isaac.lab.sim.spawners.from_files.from_files_cfg import GroundPlaneCfg, UsdFileCfg
+from omni.isaac.lab.utils import configclass
+from omni.isaac.lab.utils.assets import ISAAC_NUCLEUS_DIR
+
+from . import mdp
+
+##
+# Scene definition
+##
+
+
+@configclass
+class ObjectTableSceneCfg(InteractiveSceneCfg):
+    """Configuration for the lift scene with a robot and a object.
+    This is the abstract base implementation, the exact scene is defined in the derived classes
+    which need to set the target object, robot and end-effector frames
+    """
+
+    # robots: will be populated by agent env cfg
+    robot: ArticulationCfg = MISSING
+    # end-effector sensor: will be populated by agent env cfg
+    ee_frame: FrameTransformerCfg = MISSING
+    # target object: will be populated by agent env cfg
+    object: RigidObjectCfg | DeformableObjectCfg = MISSING
+
+    # Table
+    table = AssetBaseCfg(
+        prim_path="{ENV_REGEX_NS}/Table",
+        init_state=AssetBaseCfg.InitialStateCfg(pos=[0.5, 0, 0], rot=[0.707, 0, 0, 0.707]),
+        spawn=UsdFileCfg(usd_path=f"{ISAAC_NUCLEUS_DIR}/Props/Mounts/SeattleLabTable/table_instanceable.usd"),
+    )
+
+    # plane
+    plane = AssetBaseCfg(
+        prim_path="/World/GroundPlane",
+        init_state=AssetBaseCfg.InitialStateCfg(pos=[0, 0, -1.05]),
+        spawn=GroundPlaneCfg(),
+    )
+
+    # lights
+    light = AssetBaseCfg(
+        prim_path="/World/light",
+        spawn=sim_utils.DomeLightCfg(color=(0.75, 0.75, 0.75), intensity=3000.0),
+    )
+
+
+##
+# MDP settings
+##
+
+
+@configclass
+class CommandsCfg:
+    """Command terms for the MDP."""
+
+    object_pose = mdp.UniformPoseCommandCfg(
+        asset_name="robot",
+        body_name=MISSING,  # will be set by agent env cfg
+        resampling_time_range=(5.0, 5.0),
+        debug_vis=True,
+        ranges=mdp.UniformPoseCommandCfg.Ranges(
+            pos_x=(0.4, 0.6), pos_y=(-0.25, 0.25), pos_z=(0.25, 0.5), roll=(0.0, 0.0), pitch=(0.0, 0.0), yaw=(0.0, 0.0)
+        ),
+    )
+
+
+@configclass
+class ActionsCfg:
+    """Action specifications for the MDP."""
+
+    # will be set by agent env cfg
+    arm_action: mdp.JointPositionActionCfg | mdp.DifferentialInverseKinematicsActionCfg = MISSING
+    gripper_action: mdp.BinaryJointPositionActionCfg = MISSING
+
+
+@configclass
+class ObservationsCfg:
+    """Observation specifications for the MDP."""
+
+    @configclass
+    class PolicyCfg(ObsGroup):
+        """Observations for policy group."""
+
+        joint_pos = ObsTerm(func=mdp.joint_pos_rel)
+        joint_vel = ObsTerm(func=mdp.joint_vel_rel)
+        object_position = ObsTerm(func=mdp.object_position_in_robot_root_frame)
+        target_object_position = ObsTerm(func=mdp.generated_commands, params={"command_name": "object_pose"})
+        actions = ObsTerm(func=mdp.last_action)
+
+        def __post_init__(self):
+            self.enable_corruption = True
+            self.concatenate_terms = True
+
+    # observation groups
+    policy: PolicyCfg = PolicyCfg()
+
+
+@configclass
+class EventCfg:
+    """Configuration for events."""
+
+    reset_all = EventTerm(func=mdp.reset_scene_to_default, mode="reset")
+
+    reset_object_position = EventTerm(
+        func=mdp.reset_root_state_uniform,
+        mode="reset",
+        params={
+            "pose_range": {"x": (-0.1, 0.1), "y": (-0.25, 0.25), "z": (0.0, 0.0)},
+            "velocity_range": {},
+            "asset_cfg": SceneEntityCfg("object", body_names="Object"),
+        },
+    )
+
+
+@configclass
+class RewardsCfg:
+    """Reward terms for the MDP."""
+
+    reaching_object = RewTerm(func=mdp.object_ee_distance, params={"std": 0.1}, weight=1.0)
+
+    lifting_object = RewTerm(func=mdp.object_is_lifted, params={"minimal_height": 0.04}, weight=15.0)
+
+    object_goal_tracking = RewTerm(
+        func=mdp.object_goal_distance,
+        params={"std": 0.3, "minimal_height": 0.04, "command_name": "object_pose"},
+        weight=16.0,
+    )
+
+    object_goal_tracking_fine_grained = RewTerm(
+        func=mdp.object_goal_distance,
+        params={"std": 0.05, "minimal_height": 0.04, "command_name": "object_pose"},
+        weight=5.0,
+    )
+
+    # action penalty
+    action_rate = RewTerm(func=mdp.action_rate_l2, weight=-1e-4)
+
+    joint_vel = RewTerm(
+        func=mdp.joint_vel_l2,
+        weight=-1e-4,
+        params={"asset_cfg": SceneEntityCfg("robot")},
+    )
+
+
+@configclass
+class TerminationsCfg:
+    """Termination terms for the MDP."""
+
+    time_out = DoneTerm(func=mdp.time_out, time_out=True)
+
+    object_dropping = DoneTerm(
+        func=mdp.root_height_below_minimum, params={"minimum_height": -0.05, "asset_cfg": SceneEntityCfg("object")}
+    )
+
+
+@configclass
+class CurriculumCfg:
+    """Curriculum terms for the MDP."""
+
+    action_rate = CurrTerm(
+        func=mdp.modify_reward_weight, params={"term_name": "action_rate", "weight": -1e-1, "num_steps": 10000}
+    )
+
+    joint_vel = CurrTerm(
+        func=mdp.modify_reward_weight, params={"term_name": "joint_vel", "weight": -1e-1, "num_steps": 10000}
+    )
+
+
+##
+# Environment configuration
+##
+
+
+@configclass
+class LiftEnvCfg(ManagerBasedRLEnvCfg):
+    """Configuration for the lifting environment."""
+
+    # Scene settings
+    scene: ObjectTableSceneCfg = ObjectTableSceneCfg(num_envs=4096, env_spacing=2.5)
+    # Basic settings
+    observations: ObservationsCfg = ObservationsCfg()
+    actions: ActionsCfg = ActionsCfg()
+    commands: CommandsCfg = CommandsCfg()
+    # MDP settings
+    rewards: RewardsCfg = RewardsCfg()
+    terminations: TerminationsCfg = TerminationsCfg()
+    events: EventCfg = EventCfg()
+    curriculum: CurriculumCfg = CurriculumCfg()
+
+    def __post_init__(self):
+        """Post initialization."""
+        # general settings
+        self.decimation = 2
+        self.episode_length_s = 5.0
+        # simulation settings
+        self.sim.dt = 0.01  # 100Hz
+        self.sim.render_interval = self.decimation
+
+        self.sim.physx.bounce_threshold_velocity = 0.2
+        self.sim.physx.bounce_threshold_velocity = 0.01
+        self.sim.physx.gpu_found_lost_aggregate_pairs_capacity = 1024 * 1024 * 4
+        self.sim.physx.gpu_total_aggregate_pairs_capacity = 16 * 1024
+        self.sim.physx.friction_correlation_distance = 0.00625
\ No newline at end of file
diff --git a/source/standalone/environments/state_machine/lift_and_place_cube.py b/source/standalone/environments/state_machine/lift_and_place_cube_sm.py
similarity index 99%
rename from source/standalone/environments/state_machine/lift_and_place_cube.py
rename to source/standalone/environments/state_machine/lift_and_place_cube_sm.py
index 0d4006c6b6..d244596739 100644
--- a/source/standalone/environments/state_machine/lift_and_place_cube.py
+++ b/source/standalone/environments/state_machine/lift_and_place_cube_sm.py
@@ -11,7 +11,7 @@
 
 .. code-block:: bash
 
-    ./isaaclab.sh -p source/standalone/environments/state_machine/lift_and_place_cube.py --num_envs 32
+    ./isaaclab.sh -p source/standalone/environments/state_machine/lift_and_place_cube_sm.py --num_envs 32
 
 """
 
diff --git a/source/standalone/environments/state_machine/lift_cube_sm.py b/source/standalone/environments/state_machine/lift_cube_sm.py
new file mode 100644
index 0000000000..c4185d7927
--- /dev/null
+++ b/source/standalone/environments/state_machine/lift_cube_sm.py
@@ -0,0 +1,298 @@
+# Copyright (c) 2022-2024, The Isaac Lab Project Developers.
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+"""
+Script to run an environment with a pick and lift state machine.
+
+The state machine is implemented in the kernel function `infer_state_machine`.
+It uses the `warp` library to run the state machine in parallel on the GPU.
+
+.. code-block:: bash
+
+    ./isaaclab.sh -p source/standalone/environments/state_machine/lift_cube_sm.py --num_envs 32
+
+"""
+
+"""Launch Omniverse Toolkit first."""
+
+import argparse
+
+from omni.isaac.lab.app import AppLauncher
+
+# add argparse arguments
+parser = argparse.ArgumentParser(description="Pick and lift state machine for lift environments.")
+parser.add_argument(
+    "--disable_fabric", action="store_true", default=False, help="Disable fabric and use USD I/O operations."
+)
+parser.add_argument("--num_envs", type=int, default=None, help="Number of environments to simulate.")
+# append AppLauncher cli args
+AppLauncher.add_app_launcher_args(parser)
+# parse the arguments
+args_cli = parser.parse_args()
+
+# launch omniverse app
+app_launcher = AppLauncher(headless=args_cli.headless)
+simulation_app = app_launcher.app
+
+"""Rest everything else."""
+
+import gymnasium as gym
+import torch
+from collections.abc import Sequence
+
+import warp as wp
+
+from omni.isaac.lab.assets.rigid_object.rigid_object_data import RigidObjectData
+
+import omni.isaac.lab_tasks  # noqa: F401
+from omni.isaac.lab_tasks.manager_based.manipulation.lift.lift_env_cfg import LiftEnvCfg
+from omni.isaac.lab_tasks.utils.parse_cfg import parse_env_cfg
+
+# initialize warp
+wp.init()
+
+
+class GripperState:
+    """States for the gripper."""
+
+    OPEN = wp.constant(1.0)
+    CLOSE = wp.constant(-1.0)
+
+
+class PickSmState:
+    """States for the pick state machine."""
+
+    REST = wp.constant(0)
+    APPROACH_ABOVE_OBJECT = wp.constant(1)
+    APPROACH_OBJECT = wp.constant(2)
+    GRASP_OBJECT = wp.constant(3)
+    LIFT_OBJECT = wp.constant(4)
+
+
+class PickSmWaitTime:
+    """Additional wait times (in s) for states for before switching."""
+
+    REST = wp.constant(0.2)
+    APPROACH_ABOVE_OBJECT = wp.constant(0.5)
+    APPROACH_OBJECT = wp.constant(0.6)
+    GRASP_OBJECT = wp.constant(0.3)
+    LIFT_OBJECT = wp.constant(1.0)
+
+
+@wp.kernel
+def infer_state_machine(
+    dt: wp.array(dtype=float),
+    sm_state: wp.array(dtype=int),
+    sm_wait_time: wp.array(dtype=float),
+    ee_pose: wp.array(dtype=wp.transform),
+    object_pose: wp.array(dtype=wp.transform),
+    des_object_pose: wp.array(dtype=wp.transform),
+    des_ee_pose: wp.array(dtype=wp.transform),
+    gripper_state: wp.array(dtype=float),
+    offset: wp.array(dtype=wp.transform),
+):
+    # retrieve thread id
+    tid = wp.tid()
+    # retrieve state machine state
+    state = sm_state[tid]
+    # decide next state
+    if state == PickSmState.REST:
+        des_ee_pose[tid] = ee_pose[tid]
+        gripper_state[tid] = GripperState.OPEN
+        # wait for a while
+        if sm_wait_time[tid] >= PickSmWaitTime.REST:
+            # move to next state and reset wait time
+            sm_state[tid] = PickSmState.APPROACH_ABOVE_OBJECT
+            sm_wait_time[tid] = 0.0
+    elif state == PickSmState.APPROACH_ABOVE_OBJECT:
+        des_ee_pose[tid] = wp.transform_multiply(offset[tid], object_pose[tid])
+        gripper_state[tid] = GripperState.OPEN
+        # TODO: error between current and desired ee pose below threshold
+        # wait for a while
+        if sm_wait_time[tid] >= PickSmWaitTime.APPROACH_OBJECT:
+            # move to next state and reset wait time
+            sm_state[tid] = PickSmState.APPROACH_OBJECT
+            sm_wait_time[tid] = 0.0
+    elif state == PickSmState.APPROACH_OBJECT:
+        des_ee_pose[tid] = object_pose[tid]
+        gripper_state[tid] = GripperState.OPEN
+        # TODO: error between current and desired ee pose below threshold
+        # wait for a while
+        if sm_wait_time[tid] >= PickSmWaitTime.APPROACH_OBJECT:
+            # move to next state and reset wait time
+            sm_state[tid] = PickSmState.GRASP_OBJECT
+            sm_wait_time[tid] = 0.0
+    elif state == PickSmState.GRASP_OBJECT:
+        des_ee_pose[tid] = object_pose[tid]
+        gripper_state[tid] = GripperState.CLOSE
+        # wait for a while
+        if sm_wait_time[tid] >= PickSmWaitTime.GRASP_OBJECT:
+            # move to next state and reset wait time
+            sm_state[tid] = PickSmState.LIFT_OBJECT
+            sm_wait_time[tid] = 0.0
+    elif state == PickSmState.LIFT_OBJECT:
+        des_ee_pose[tid] = des_object_pose[tid]
+        gripper_state[tid] = GripperState.CLOSE
+        # TODO: error between current and desired ee pose below threshold
+        # wait for a while
+        if sm_wait_time[tid] >= PickSmWaitTime.LIFT_OBJECT:
+            # move to next state and reset wait time
+            sm_state[tid] = PickSmState.LIFT_OBJECT
+            sm_wait_time[tid] = 0.0
+    # increment wait time
+    sm_wait_time[tid] = sm_wait_time[tid] + dt[tid]
+
+
+class PickAndLiftSm:
+    """A simple state machine in a robot's task space to pick and lift an object.
+
+    The state machine is implemented as a warp kernel. It takes in the current state of
+    the robot's end-effector and the object, and outputs the desired state of the robot's
+    end-effector and the gripper. The state machine is implemented as a finite state
+    machine with the following states:
+
+    1. REST: The robot is at rest.
+    2. APPROACH_ABOVE_OBJECT: The robot moves above the object.
+    3. APPROACH_OBJECT: The robot moves to the object.
+    4. GRASP_OBJECT: The robot grasps the object.
+    5. LIFT_OBJECT: The robot lifts the object to the desired pose. This is the final state.
+    """
+
+    def __init__(self, dt: float, num_envs: int, device: torch.device | str = "cpu"):
+        """Initialize the state machine.
+
+        Args:
+            dt: The environment time step.
+            num_envs: The number of environments to simulate.
+            device: The device to run the state machine on.
+        """
+        # save parameters
+        self.dt = float(dt)
+        self.num_envs = num_envs
+        self.device = device
+        # initialize state machine
+        self.sm_dt = torch.full((self.num_envs,), self.dt, device=self.device)
+        self.sm_state = torch.full((self.num_envs,), 0, dtype=torch.int32, device=self.device)
+        self.sm_wait_time = torch.zeros((self.num_envs,), device=self.device)
+
+        # desired state
+        self.des_ee_pose = torch.zeros((self.num_envs, 7), device=self.device)
+        self.des_gripper_state = torch.full((self.num_envs,), 0.0, device=self.device)
+
+        # approach above object offset
+        self.offset = torch.zeros((self.num_envs, 7), device=self.device)
+        self.offset[:, 2] = 0.1
+        self.offset[:, -1] = 1.0  # warp expects quaternion as (x, y, z, w)
+
+        # convert to warp
+        self.sm_dt_wp = wp.from_torch(self.sm_dt, wp.float32)
+        self.sm_state_wp = wp.from_torch(self.sm_state, wp.int32)
+        self.sm_wait_time_wp = wp.from_torch(self.sm_wait_time, wp.float32)
+        self.des_ee_pose_wp = wp.from_torch(self.des_ee_pose, wp.transform)
+        self.des_gripper_state_wp = wp.from_torch(self.des_gripper_state, wp.float32)
+        self.offset_wp = wp.from_torch(self.offset, wp.transform)
+
+    def reset_idx(self, env_ids: Sequence[int] = None):
+        """Reset the state machine."""
+        if env_ids is None:
+            env_ids = slice(None)
+        self.sm_state[env_ids] = 0
+        self.sm_wait_time[env_ids] = 0.0
+
+    def compute(self, ee_pose: torch.Tensor, object_pose: torch.Tensor, des_object_pose: torch.Tensor):
+        """Compute the desired state of the robot's end-effector and the gripper."""
+        # convert all transformations from (w, x, y, z) to (x, y, z, w)
+        ee_pose = ee_pose[:, [0, 1, 2, 4, 5, 6, 3]]
+        object_pose = object_pose[:, [0, 1, 2, 4, 5, 6, 3]]
+        des_object_pose = des_object_pose[:, [0, 1, 2, 4, 5, 6, 3]]
+
+        # convert to warp
+        ee_pose_wp = wp.from_torch(ee_pose.contiguous(), wp.transform)
+        object_pose_wp = wp.from_torch(object_pose.contiguous(), wp.transform)
+        des_object_pose_wp = wp.from_torch(des_object_pose.contiguous(), wp.transform)
+
+        # run state machine
+        wp.launch(
+            kernel=infer_state_machine,
+            dim=self.num_envs,
+            inputs=[
+                self.sm_dt_wp,
+                self.sm_state_wp,
+                self.sm_wait_time_wp,
+                ee_pose_wp,
+                object_pose_wp,
+                des_object_pose_wp,
+                self.des_ee_pose_wp,
+                self.des_gripper_state_wp,
+                self.offset_wp,
+            ],
+            device=self.device,
+        )
+
+        # convert transformations back to (w, x, y, z)
+        des_ee_pose = self.des_ee_pose[:, [0, 1, 2, 6, 3, 4, 5]]
+        # convert to torch
+        return torch.cat([des_ee_pose, self.des_gripper_state.unsqueeze(-1)], dim=-1)
+
+
+def main():
+    # parse configuration
+    env_cfg: LiftEnvCfg = parse_env_cfg(
+        "Isaac-Lift-Cube-Franka-IK-Abs-v0",
+        device=args_cli.device,
+        num_envs=args_cli.num_envs,
+        use_fabric=not args_cli.disable_fabric,
+    )
+    # create environment
+    env = gym.make("Isaac-Lift-Cube-Franka-IK-Abs-v0", cfg=env_cfg)
+    # reset environment at start
+    env.reset()
+
+    # create action buffers (position + quaternion)
+    actions = torch.zeros(env.unwrapped.action_space.shape, device=env.unwrapped.device)
+    actions[:, 3] = 1.0
+    # desired object orientation (we only do position control of object)
+    desired_orientation = torch.zeros((env.unwrapped.num_envs, 4), device=env.unwrapped.device)
+    desired_orientation[:, 1] = 1.0
+    # create state machine
+    pick_sm = PickAndLiftSm(env_cfg.sim.dt * env_cfg.decimation, env.unwrapped.num_envs, env.unwrapped.device)
+
+    while simulation_app.is_running():
+        # run everything in inference mode
+        with torch.inference_mode():
+            # step environment
+            dones = env.step(actions)[-2]
+
+            # observations
+            # -- end-effector frame
+            ee_frame_sensor = env.unwrapped.scene["ee_frame"]
+            tcp_rest_position = ee_frame_sensor.data.target_pos_w[..., 0, :].clone() - env.unwrapped.scene.env_origins
+            tcp_rest_orientation = ee_frame_sensor.data.target_quat_w[..., 0, :].clone()
+            # -- object frame
+            object_data: RigidObjectData = env.unwrapped.scene["object"].data
+            object_position = object_data.root_pos_w - env.unwrapped.scene.env_origins
+            # -- target object frame
+            desired_position = env.unwrapped.command_manager.get_command("object_pose")[..., :3]
+
+            # advance state machine
+            actions = pick_sm.compute(
+                torch.cat([tcp_rest_position, tcp_rest_orientation], dim=-1),
+                torch.cat([object_position, desired_orientation], dim=-1),
+                torch.cat([desired_position, desired_orientation], dim=-1),
+            )
+
+            # reset state machine
+            if dones.any():
+                pick_sm.reset_idx(dones.nonzero(as_tuple=False).squeeze(-1))
+
+    # close the environment
+    env.close()
+
+
+if __name__ == "__main__":
+    # run the main function
+    main()
+    # close sim app
+    simulation_app.close()
\ No newline at end of file

From c97987f1678a75606af0c6e00b6714f7ae610829 Mon Sep 17 00:00:00 2001
From: DorsaRoh <dorsa.rohani@gmail.com>
Date: Tue, 29 Oct 2024 14:54:57 -0400
Subject: [PATCH 5/6] correct renamed file import

---
 .../environments/state_machine/lift_and_place_cube_sm.py        | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/standalone/environments/state_machine/lift_and_place_cube_sm.py b/source/standalone/environments/state_machine/lift_and_place_cube_sm.py
index d244596739..215fd87462 100644
--- a/source/standalone/environments/state_machine/lift_and_place_cube_sm.py
+++ b/source/standalone/environments/state_machine/lift_and_place_cube_sm.py
@@ -47,7 +47,7 @@
 from omni.isaac.lab.assets.rigid_object.rigid_object_data import RigidObjectData
 
 import omni.isaac.lab_tasks  # noqa: F401
-from omni.isaac.lab_tasks.manager_based.manipulation.lift.lift_env_cfg import LiftEnvCfg
+from omni.isaac.lab_tasks.manager_based.manipulation.lift.lift_place_env_cfg import LiftEnvCfg
 from omni.isaac.lab_tasks.utils.parse_cfg import parse_env_cfg
 
 # initialize warp

From bc5f5e697e2a5d26c3dc8fa096a38ae893af7248 Mon Sep 17 00:00:00 2001
From: DorsaRoh <dorsa.rohani@gmail.com>
Date: Tue, 29 Oct 2024 15:20:22 -0400
Subject: [PATCH 6/6] fix import statement + readability

---
 .../manipulation/lift/mdp/rewards.py          | 35 ++++++++++---------
 .../state_machine/lift_cube_sm.py             |  2 +-
 2 files changed, 20 insertions(+), 17 deletions(-)

diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/manipulation/lift/mdp/rewards.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/manipulation/lift/mdp/rewards.py
index d5af9225f6..2c90967718 100644
--- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/manipulation/lift/mdp/rewards.py
+++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/manipulation/lift/mdp/rewards.py
@@ -18,36 +18,38 @@
 
 
 def object_is_lifted(
-    env: ManagerBasedRLEnv, minimal_height: float, maximal_height: float, object_cfg: SceneEntityCfg = SceneEntityCfg("object")
+    env: ManagerBasedRLEnv,
+    minimal_height: float,
+    maximal_height: float,
+    object_cfg: SceneEntityCfg = SceneEntityCfg("object"),
 ) -> torch.Tensor:
     """Reward the agent for lifting the object above the minimal height."""
     object: RigidObject = env.scene[object_cfg.name]
-    return torch.where((object.data.root_pos_w[:, 2] > minimal_height) & (object.data.root_pos_w[:, 2] < maximal_height), 1.0, 0.0)
+    return torch.where(
+        (object.data.root_pos_w[:, 2] > minimal_height) & (object.data.root_pos_w[:, 2] < maximal_height), 1.0, 0.0
+    )
+
 
 def object_is_placed(
-    env: ManagerBasedRLEnv, 
+    env: ManagerBasedRLEnv,
     distance_threshold: float,
     height_threshold: float,
-    object_cfg: SceneEntityCfg = SceneEntityCfg("object")
+    object_cfg: SceneEntityCfg = SceneEntityCfg("object"),
 ) -> torch.Tensor:
     object: RigidObject = env.scene[object_cfg.name]
     place_command = env.command_manager.get_command("place_pose")
-    
+
     object_pos = object.data.root_pos_w
     target_pos = place_command[:, :3]
-    
+
     # check xy-distance to target
     xy_distance = torch.norm(object_pos[:, :2] - target_pos[:, :2], dim=1)
-    
+
     # check height difference
     height_diff = torch.abs(object_pos[:, 2] - target_pos[:, 2])
-    
+
     # return 1.0 if within thresholds, 0.0 otherwise
-    return torch.where(
-        (xy_distance < distance_threshold) & (height_diff < height_threshold),
-        1.0,
-        0.0
-    )
+    return torch.where((xy_distance < distance_threshold) & (height_diff < height_threshold), 1.0, 0.0)
 
 
 def object_ee_distance(
@@ -90,13 +92,14 @@ def object_goal_distance(
     # distance of the end-effector to the object: (num_envs,)
     distance = torch.norm(des_pos_w - object.data.root_pos_w[:, :3], dim=1)
     # rewarded if the object is lifted above the threshold
-    
+
     if command_name == "object_pose":
         # for lifting - reward when above minimal height
-        return ((object.data.root_pos_w[:, 2] > minimal_height) & (object.data.root_pos_w[:, 2] < maximal_height)) * (1 - torch.tanh(distance / std))
+        return ((object.data.root_pos_w[:, 2] > minimal_height) & (object.data.root_pos_w[:, 2] < maximal_height)) * (
+            1 - torch.tanh(distance / std)
+        )
     elif command_name == "place_pose":
         # for placing - reward getting close to place position
         target_height = des_pos_w[:, 2]  # Get height from command
         height_diff = torch.abs(object.data.root_pos_w[:, 2] - target_height)
         return 1 - torch.tanh((distance + height_diff) / std)
-
diff --git a/source/standalone/environments/state_machine/lift_cube_sm.py b/source/standalone/environments/state_machine/lift_cube_sm.py
index c4185d7927..f5fdb280c4 100644
--- a/source/standalone/environments/state_machine/lift_cube_sm.py
+++ b/source/standalone/environments/state_machine/lift_cube_sm.py
@@ -47,7 +47,7 @@
 from omni.isaac.lab.assets.rigid_object.rigid_object_data import RigidObjectData
 
 import omni.isaac.lab_tasks  # noqa: F401
-from omni.isaac.lab_tasks.manager_based.manipulation.lift.lift_env_cfg import LiftEnvCfg
+from omni.isaac.lab_tasks.manager_based.manipulation.lift.lift_place_env_cfg import LiftEnvCfg
 from omni.isaac.lab_tasks.utils.parse_cfg import parse_env_cfg
 
 # initialize warp