From aad9cc39c101b662d70a377c213679cd4dabaaa1 Mon Sep 17 00:00:00 2001
From: skywhite1024 <129768272+skywhite1024@users.noreply.github.com>
Date: Fri, 12 Jun 2026 16:15:26 +0800
Subject: [PATCH 01/14] Add upright atomic action

---
 .../lab/sim/atomic_actions/__init__.py        |   4 +
 embodichain/lab/sim/atomic_actions/actions.py | 259 ++++++++++
 embodichain/lab/sim/objects/articulation.py   |   2 -
 .../atomic_action/upright_atomic_action.py    | 466 ++++++++++++++++++
 4 files changed, 729 insertions(+), 2 deletions(-)
 create mode 100644 scripts/tutorials/atomic_action/upright_atomic_action.py

diff --git a/embodichain/lab/sim/atomic_actions/__init__.py b/embodichain/lab/sim/atomic_actions/__init__.py
index cf1e60ce..bbbf7df5 100644
--- a/embodichain/lab/sim/atomic_actions/__init__.py
+++ b/embodichain/lab/sim/atomic_actions/__init__.py
@@ -33,9 +33,11 @@
     MoveAction,
     PickUpAction,
     PlaceAction,
+    UprightAction,
     MoveActionCfg,
     PickUpActionCfg,
     PlaceActionCfg,
+    UprightActionCfg,
 )
 from .engine import (
     AtomicActionEngine,
@@ -56,9 +58,11 @@
     "MoveAction",
     "PickUpAction",
     "PlaceAction",
+    "UprightAction",
     "MoveActionCfg",
     "PickUpActionCfg",
     "PlaceActionCfg",
+    "UprightActionCfg",
     # Engine
     "AtomicActionEngine",
     "register_action",
diff --git a/embodichain/lab/sim/atomic_actions/actions.py b/embodichain/lab/sim/atomic_actions/actions.py
index 1aa8901a..97782601 100644
--- a/embodichain/lab/sim/atomic_actions/actions.py
+++ b/embodichain/lab/sim/atomic_actions/actions.py
@@ -517,6 +517,265 @@ def validate(self, target, start_qpos=None, **kwargs):
         return True
 
 
+@configclass
+class UprightActionCfg(PickUpActionCfg):
+    name: str = "upright"
+    """Name of the action, used for identification and logging."""
+
+    place_clearance: float = 0.005
+    """Clearance (m) between the upright object bottom and the support plane."""
+
+    upright_axis_sign: float = 1.0
+    """Direction of the object's local Z axis after upright placement.
+
+    Use ``1.0`` to align local +Z with world +Z. Use ``-1.0`` when the mesh's
+    local +Z points toward the physical bottom and local -Z should face upward.
+    """
+
+
+class UprightAction(PickUpAction):
+    def __init__(
+        self,
+        motion_generator: MotionGenerator,
+        cfg: UprightActionCfg | None = None,
+    ):
+        """
+        Initialize the atomic action.
+        Args:
+            motion_generator: The motion generator instance to use for planning.
+            cfg: Configuration for the action.
+        """
+        super().__init__(
+            motion_generator, cfg=cfg if cfg is not None else UprightActionCfg()
+        )
+
+    @staticmethod
+    def _invert_pose(pose: torch.Tensor) -> torch.Tensor:
+        """Invert a batched homogeneous transform."""
+        inv_pose = pose.clone()
+        rot_t = pose[:, :3, :3].transpose(1, 2)
+        inv_pose[:, :3, :3] = rot_t
+        inv_pose[:, :3, 3] = -torch.bmm(rot_t, pose[:, :3, 3:4]).squeeze(-1)
+        return inv_pose
+
+    def _build_upright_object_pose(
+        self, semantics: ObjectSemantics, obj_poses: torch.Tensor
+    ) -> torch.Tensor:
+        """Build a target object pose whose configured local Z direction is upright."""
+        world_z = torch.tensor([0.0, 0.0, 1.0], device=self.device)
+        axis_sign = 1.0 if self.cfg.upright_axis_sign >= 0.0 else -1.0
+        projected_x = obj_poses[:, :3, 0].clone()
+        projected_x[:, 2] = 0.0
+        projected_x_norm = projected_x.norm(dim=1, keepdim=True)
+
+        fallback_x = obj_poses[:, :3, 1].clone()
+        fallback_x[:, 2] = 0.0
+        fallback_x_norm = fallback_x.norm(dim=1, keepdim=True)
+        fallback_x = fallback_x / fallback_x_norm.clamp(min=1e-6)
+
+        default_x = torch.tensor([1.0, 0.0, 0.0], device=self.device).repeat(
+            self.n_envs, 1
+        )
+        upright_x = torch.where(
+            projected_x_norm > 1e-6,
+            projected_x / projected_x_norm.clamp(min=1e-6),
+            torch.where(fallback_x_norm > 1e-6, fallback_x, default_x),
+        )
+        upright_z = axis_sign * world_z.repeat(self.n_envs, 1)
+        upright_y = torch.cross(upright_z, upright_x, dim=1)
+        upright_y = upright_y / upright_y.norm(dim=1, keepdim=True).clamp(min=1e-6)
+        upright_x = torch.cross(upright_y, upright_z, dim=1)
+        upright_x = upright_x / upright_x.norm(dim=1, keepdim=True).clamp(min=1e-6)
+
+        upright_pose = obj_poses.clone()
+        upright_pose[:, :3, 0] = upright_x
+        upright_pose[:, :3, 1] = upright_y
+        upright_pose[:, :3, 2] = upright_z
+
+        mesh_vertices = semantics.geometry.get("mesh_vertices")
+        if isinstance(mesh_vertices, torch.Tensor) and mesh_vertices.numel() > 0:
+            mesh_vertices = mesh_vertices.to(device=self.device, dtype=torch.float32)
+            vertical_offsets = torch.matmul(
+                mesh_vertices, upright_pose[:, 2, :3].transpose(0, 1)
+            )
+            local_bottom_z = vertical_offsets.min(dim=0).values
+            upright_pose[:, 2, 3] = self.cfg.place_clearance - local_bottom_z
+        return upright_pose
+
+    def execute(
+        self,
+        target: Union[ObjectSemantics, torch.Tensor],
+        start_qpos: Optional[torch.Tensor] = None,
+        **kwargs,
+    ) -> tuple[bool, torch.Tensor, list[float]]:
+        """Pick up an object, rotate it upright, place it down, and release it."""
+        if not isinstance(target, ObjectSemantics):
+            return super().execute(target=target, start_qpos=start_qpos, **kwargs)
+
+        obj_poses = target.entity.get_local_pose(to_matrix=True)
+        is_success, grasp_xpos = self._resolve_grasp_pose(target)
+        if not is_success:
+            logger.log_warning(
+                "Failed to resolve grasp pose, using default approach pose"
+            )
+            return False, torch.empty(0), self.joint_ids
+
+        pre_grasp_xpos = self._apply_offset(
+            pose=grasp_xpos,
+            offset=-grasp_xpos[:, :3, 2] * self.cfg.pre_grasp_distance,
+        )
+        lift_xpos = self._apply_offset(
+            pose=grasp_xpos,
+            offset=torch.tensor([0, 0, 1], device=self.device) * self.cfg.lift_height,
+        )
+
+        obj_to_grasp = torch.bmm(self._invert_pose(obj_poses), grasp_xpos)
+        upright_obj_xpos = self._build_upright_object_pose(target, obj_poses)
+        upright_lift_obj_xpos = self._apply_offset(
+            pose=upright_obj_xpos,
+            offset=torch.tensor([0, 0, 1], device=self.device) * self.cfg.lift_height,
+        )
+        upright_lift_xpos = torch.bmm(upright_lift_obj_xpos, obj_to_grasp)
+        upright_place_xpos = torch.bmm(upright_obj_xpos, obj_to_grasp)
+        retreat_xpos = self._apply_offset(
+            pose=upright_place_xpos,
+            offset=torch.tensor([0, 0, 1], device=self.device) * self.cfg.lift_height,
+        )
+
+        start_qpos = self._resolve_start_qpos(start_qpos, self.arm_dof)
+        n_close_waypoint = self.cfg.hand_interp_steps
+        n_open_waypoint = self.cfg.hand_interp_steps
+        motion_waypoints = self.cfg.sample_interval - n_close_waypoint - n_open_waypoint
+        if motion_waypoints < 6:
+            logger.log_error(
+                "Not enough waypoints for upright action. Please increase "
+                "sample_interval or decrease hand_interp_steps.",
+                ValueError,
+            )
+        n_approach_waypoint = max(2, int(np.round(motion_waypoints * 0.35)))
+        n_upright_waypoint = max(2, int(np.round(motion_waypoints * 0.50)))
+        n_retreat_waypoint = (
+            self.cfg.sample_interval
+            - n_close_waypoint
+            - n_open_waypoint
+            - n_approach_waypoint
+            - n_upright_waypoint
+        )
+        if n_retreat_waypoint < 2:
+            n_retreat_waypoint = 2
+            n_upright_waypoint = max(2, n_upright_waypoint - 1)
+
+        target_states_list = [
+            [
+                PlanState(xpos=pre_grasp_xpos[i], move_type=MoveType.EEF_MOVE),
+                PlanState(xpos=grasp_xpos[i], move_type=MoveType.EEF_MOVE),
+            ]
+            for i in range(self.n_envs)
+        ]
+        is_success, plan_traj = self._plan_arm_trajectory(
+            target_states_list,
+            start_qpos,
+            n_approach_waypoint,
+            self.arm_dof,
+        )
+        if not is_success:
+            logger.log_warning("Failed to plan approach trajectory.")
+            return False, torch.empty(0), self.joint_ids
+        approach_trajectory = torch.zeros(
+            size=(self.n_envs, n_approach_waypoint, self.dof),
+            dtype=torch.float32,
+            device=self.device,
+        )
+        approach_trajectory[:, :, : self.arm_dof] = plan_traj
+        approach_trajectory[:, :, self.arm_dof :] = self.hand_open_qpos
+
+        grasp_qpos = approach_trajectory[:, -1, : self.arm_dof]
+        hand_close_path = self._interpolate_hand_qpos(
+            self.hand_open_qpos,
+            self.hand_close_qpos,
+            n_close_waypoint,
+        )
+        close_trajectory = torch.zeros(
+            size=(self.n_envs, n_close_waypoint, self.dof),
+            dtype=torch.float32,
+            device=self.device,
+        )
+        close_trajectory[:, :, : self.arm_dof] = grasp_qpos
+        close_trajectory[:, :, self.arm_dof :] = hand_close_path
+
+        target_states_list = [
+            [
+                PlanState(xpos=lift_xpos[i], move_type=MoveType.EEF_MOVE),
+                PlanState(xpos=upright_lift_xpos[i], move_type=MoveType.EEF_MOVE),
+                PlanState(xpos=upright_place_xpos[i], move_type=MoveType.EEF_MOVE),
+            ]
+            for i in range(self.n_envs)
+        ]
+        is_success, plan_traj = self._plan_arm_trajectory(
+            target_states_list,
+            grasp_qpos,
+            n_upright_waypoint,
+            self.arm_dof,
+        )
+        if not is_success:
+            logger.log_warning("Failed to plan upright trajectory.")
+            return False, torch.empty(0), self.joint_ids
+        upright_trajectory = torch.zeros(
+            size=(self.n_envs, n_upright_waypoint, self.dof),
+            dtype=torch.float32,
+            device=self.device,
+        )
+        upright_trajectory[:, :, : self.arm_dof] = plan_traj
+        upright_trajectory[:, :, self.arm_dof :] = self.hand_close_qpos
+
+        place_qpos = upright_trajectory[:, -1, : self.arm_dof]
+        hand_open_path = self._interpolate_hand_qpos(
+            self.hand_close_qpos,
+            self.hand_open_qpos,
+            n_open_waypoint,
+        )
+        open_trajectory = torch.zeros(
+            size=(self.n_envs, n_open_waypoint, self.dof),
+            dtype=torch.float32,
+            device=self.device,
+        )
+        open_trajectory[:, :, : self.arm_dof] = place_qpos
+        open_trajectory[:, :, self.arm_dof :] = hand_open_path
+
+        target_states_list = [
+            [PlanState(xpos=retreat_xpos[i], move_type=MoveType.EEF_MOVE)]
+            for i in range(self.n_envs)
+        ]
+        is_success, plan_traj = self._plan_arm_trajectory(
+            target_states_list,
+            place_qpos,
+            n_retreat_waypoint,
+            self.arm_dof,
+        )
+        if not is_success:
+            logger.log_warning("Failed to plan retreat trajectory.")
+            return False, torch.empty(0), self.joint_ids
+        retreat_trajectory = torch.zeros(
+            size=(self.n_envs, n_retreat_waypoint, self.dof),
+            dtype=torch.float32,
+            device=self.device,
+        )
+        retreat_trajectory[:, :, : self.arm_dof] = plan_traj
+        retreat_trajectory[:, :, self.arm_dof :] = self.hand_open_qpos
+
+        trajectory = torch.cat(
+            [
+                approach_trajectory,
+                close_trajectory,
+                upright_trajectory,
+                open_trajectory,
+                retreat_trajectory,
+            ],
+            dim=1,
+        )
+        return True, trajectory, self.joint_ids
+
+
 @configclass
 class PlaceActionCfg(GraspActionCfg):
     name: str = "place"
diff --git a/embodichain/lab/sim/objects/articulation.py b/embodichain/lab/sim/objects/articulation.py
index ed5a9e32..3fbe68e1 100644
--- a/embodichain/lab/sim/objects/articulation.py
+++ b/embodichain/lab/sim/objects/articulation.py
@@ -1444,8 +1444,6 @@ def set_joint_drive(
                 drive_args["max_velocity"] = max_velocity[i].cpu().numpy()
             if friction is not None:
                 drive_args["joint_friction"] = friction[i].cpu().numpy()
-            if armature is not None:
-                drive_args["armature"] = armature[i].cpu().numpy()
             self._entities[env_idx].set_drive(**drive_args)
 
     def get_joint_drive(
diff --git a/scripts/tutorials/atomic_action/upright_atomic_action.py b/scripts/tutorials/atomic_action/upright_atomic_action.py
new file mode 100644
index 00000000..92383cc4
--- /dev/null
+++ b/scripts/tutorials/atomic_action/upright_atomic_action.py
@@ -0,0 +1,466 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+"""
+This script demonstrates the creation and simulation of a robot that grasps a rigid mug
+in a simulated environment using the SimulationManager and grasp planning utilities.
+"""
+
+import argparse
+import sys
+import time
+from pathlib import Path
+
+_REPO_ROOT = Path(__file__).resolve().parents[3]
+if str(_REPO_ROOT) not in sys.path:
+    sys.path.insert(0, str(_REPO_ROOT))
+
+import numpy as np
+import torch
+
+from embodichain.lab.sim import SimulationManager, SimulationManagerCfg
+from embodichain.lab.sim.objects import Robot, RigidObject
+from embodichain.lab.sim.utility.action_utils import interpolate_with_distance
+from embodichain.lab.sim.shapes import MeshCfg
+from embodichain.lab.sim.solvers import PytorchSolverCfg
+from embodichain.lab.sim.planners import MotionGenerator, MotionGenCfg, ToppraPlannerCfg
+from embodichain.data import get_data_path
+from embodichain.lab.gym.utils.gym_utils import add_env_launcher_args_to_parser
+from embodichain.utils import logger
+from embodichain.lab.sim.cfg import (
+    RenderCfg,
+    JointDrivePropertiesCfg,
+    RobotCfg,
+    LightCfg,
+    RigidBodyAttributesCfg,
+    RigidObjectCfg,
+    URDFCfg,
+)
+from embodichain.lab.sim.atomic_actions import (
+    AntipodalAffordance,
+    ObjectSemantics,
+    UprightAction,
+    UprightActionCfg,
+)
+from embodichain.toolkits.graspkit.pg_grasp.antipodal_generator import (
+    GraspGenerator,
+    GraspGeneratorCfg,
+    AntipodalSamplerCfg,
+)
+from embodichain.toolkits.graspkit.pg_grasp.gripper_collision_checker import (
+    GripperCollisionCfg,
+)
+
+
+def parse_arguments():
+    """
+    Parse command-line arguments to configure the simulation.
+
+    Returns:
+        argparse.Namespace: Parsed arguments including number of environments and rendering options.
+    """
+    parser = argparse.ArgumentParser(
+        description="Create and simulate a robot in SimulationManager"
+    )
+    add_env_launcher_args_to_parser(parser)
+    parser.add_argument(
+        "--demo_mode",
+        type=str,
+        choices=["grasp", "upright"],
+        default="grasp",
+        help="Select the tutorial scenario to run.",
+    )
+    parser.add_argument(
+        "--object_kind",
+        type=str,
+        choices=["cup", "bottle"],
+        default="bottle",
+        help="Object to use in upright mode.",
+    )
+    parser.add_argument(
+        "--n_sample",
+        type=int,
+        default=10000,
+        help="Number of surface samples for antipodal grasp generation.",
+    )
+    parser.add_argument(
+        "--n_top_grasps",
+        type=int,
+        default=30,
+        help="Number of top-ranked grasp poses to keep.",
+    )
+    parser.add_argument(
+        "--force_reannotate",
+        action="store_true",
+        help="Force grasp region re-annotation instead of reusing cached antipodal pairs.",
+    )
+    return parser.parse_args()
+
+
+def initialize_simulation(args) -> SimulationManager:
+    """
+    Initialize the simulation environment based on the provided arguments.
+
+    Args:
+        args (argparse.Namespace): Parsed command-line arguments.
+
+    Returns:
+        SimulationManager: Configured simulation manager instance.
+    """
+    config = SimulationManagerCfg(
+        headless=True,
+        sim_device=args.device,
+        render_cfg=RenderCfg(renderer=args.renderer),
+        physics_dt=1.0 / 100.0,
+        arena_space=2.5,
+    )
+    sim = SimulationManager(config)
+
+    light = sim.add_light(
+        cfg=LightCfg(
+            uid="main_light",
+            color=(0.6, 0.6, 0.6),
+            intensity=30.0,
+            init_pos=(1.0, 0, 3.0),
+        )
+    )
+
+    return sim
+
+
+def create_robot(sim: SimulationManager, position=[0.0, 0.0, 0.0]) -> Robot:
+    """
+    Create and configure a robot with an arm and a dexterous hand in the simulation.
+
+    Args:
+        sim (SimulationManager): The simulation manager instance.
+
+    Returns:
+        Robot: The configured robot instance added to the simulation.
+    """
+    # Retrieve URDF paths for the robot arm and hand
+    ur10_urdf_path = get_data_path("UniversalRobots/UR10/UR10.urdf")
+    gripper_urdf_path = get_data_path("DH_PGC_140_50_M/DH_PGC_140_50_M.urdf")
+    # Configure the robot with its components and control properties
+    cfg = RobotCfg(
+        uid="UR10",
+        urdf_cfg=URDFCfg(
+            components=[
+                {"component_type": "arm", "urdf_path": ur10_urdf_path},
+                {"component_type": "hand", "urdf_path": gripper_urdf_path},
+            ]
+        ),
+        drive_pros=JointDrivePropertiesCfg(
+            stiffness={"JOINT[0-9]": 1e4, "FINGER[1-2]": 1e3},
+            damping={"JOINT[0-9]": 1e3, "FINGER[1-2]": 1e2},
+            max_effort={"JOINT[0-9]": 1e5, "FINGER[1-2]": 1e4},
+            drive_type="force",
+        ),
+        control_parts={
+            "arm": ["JOINT[0-9]"],
+            "hand": ["FINGER[1-2]"],
+        },
+        solver_cfg={
+            "arm": PytorchSolverCfg(
+                end_link_name="ee_link",
+                root_link_name="base_link",
+                tcp=[
+                    [0.0, 1.0, 0.0, 0.0],
+                    [-1.0, 0.0, 0.0, 0.0],
+                    [0.0, 0.0, 1.0, 0.12],
+                    [0.0, 0.0, 0.0, 1.0],
+                ],
+            )
+        },
+        init_qpos=[-3.14, -1.57, -1.57, 1.57, -1.57, 0.0, 0.0, 0.0],
+        init_pos=position,
+    )
+    return sim.add_robot(cfg=cfg)
+
+
+def create_mug(sim: SimulationManager):
+    mug_cfg = RigidObjectCfg(
+        uid="table",
+        shape=MeshCfg(
+            fpath=get_data_path("CoffeeCup/cup.ply"),
+        ),
+        attrs=RigidBodyAttributesCfg(
+            mass=0.01,
+            dynamic_friction=0.97,
+            static_friction=0.99,
+        ),
+        max_convex_hull_num=16,
+        init_pos=[0.55, 0.0, 0.01],
+        init_rot=[0.0, 0.0, -90],
+        body_scale=(4, 4, 4),
+    )
+    mug = sim.add_rigid_object(cfg=mug_cfg)
+    return mug
+
+
+def create_fallen_cup(sim: SimulationManager) -> RigidObject:
+    cup_cfg = RigidObjectCfg(
+        uid="cup",
+        shape=MeshCfg(
+            fpath=get_data_path("CoffeeCup/cup.ply"),
+        ),
+        attrs=RigidBodyAttributesCfg(
+            mass=0.01,
+            dynamic_friction=0.97,
+            static_friction=0.99,
+        ),
+        max_convex_hull_num=16,
+        init_pos=[0.55, 0.0, 0.01],
+        init_rot=[90.0, 0.0, -90.0],
+        body_scale=(4, 4, 4),
+    )
+    return sim.add_rigid_object(cfg=cup_cfg)
+
+
+def create_fallen_bottle(sim: SimulationManager) -> RigidObject:
+    # Use a slightly smaller and closer bottle for the UR10 gripper demo.
+    bottle_scale = 0.0008
+    bottle_cfg = RigidObjectCfg(
+        uid="bottle",
+        shape=MeshCfg(
+            fpath=get_data_path("ScannedBottle/yibao.ply"),
+        ),
+        attrs=RigidBodyAttributesCfg(
+            mass=0.02,
+            dynamic_friction=0.97,
+            static_friction=0.99,
+        ),
+        max_convex_hull_num=16,
+        init_pos=[-0.4294, -0.0825, -0.0997],
+        init_rot=[90.0, 0.0, 0.0],
+        body_scale=(bottle_scale, bottle_scale, bottle_scale),
+    )
+    return sim.add_rigid_object(cfg=bottle_cfg)
+
+
+def build_grasp_generator_cfg(args: argparse.Namespace) -> GraspGeneratorCfg:
+    return GraspGeneratorCfg(
+        viser_port=11801,
+        antipodal_sampler_cfg=AntipodalSamplerCfg(
+            n_sample=args.n_sample,
+            max_length=0.088,
+            min_length=0.003,
+        ),
+        is_partial_annotate=False,
+        is_filter_ground_collision=True,
+        n_top_grasps=args.n_top_grasps,
+    )
+
+
+def build_gripper_collision_cfg() -> GripperCollisionCfg:
+    return GripperCollisionCfg(
+        max_open_length=0.088,
+        finger_length=0.078,
+        point_sample_dense=0.012,
+    )
+
+
+def create_object_semantics(
+    obj: RigidObject, label: str, args: argparse.Namespace
+) -> ObjectSemantics:
+    return ObjectSemantics(
+        label=label,
+        geometry={
+            "mesh_vertices": obj.get_vertices(env_ids=[0], scale=True)[0],
+            "mesh_triangles": obj.get_triangles(env_ids=[0])[0],
+        },
+        affordance=AntipodalAffordance(
+            object_label=label,
+            force_reannotate=args.force_reannotate,
+            custom_config={
+                "gripper_collision_cfg": build_gripper_collision_cfg(),
+                "generator_cfg": build_grasp_generator_cfg(args),
+            },
+        ),
+        entity=obj,
+    )
+
+
+def get_grasp_traj(sim: SimulationManager, robot: Robot, grasp_xpos: torch.Tensor):
+    n_envs = sim.num_envs
+    rest_arm_qpos = robot.get_qpos("arm")
+
+    approach_xpos = grasp_xpos.clone()
+    approach_xpos[:, 2, 3] += 0.1
+
+    _, qpos_approach = robot.compute_ik(
+        pose=approach_xpos, joint_seed=rest_arm_qpos, name="arm"
+    )
+    _, qpos_grasp = robot.compute_ik(
+        pose=grasp_xpos, joint_seed=qpos_approach, name="arm"
+    )
+    hand_open_qpos = torch.tensor([0.00, 0.00], dtype=torch.float32, device=sim.device)
+    hand_close_qpos = torch.tensor(
+        [0.025, 0.025], dtype=torch.float32, device=sim.device
+    )
+
+    arm_trajectory = torch.cat(
+        [
+            rest_arm_qpos[:, None, :],
+            qpos_approach[:, None, :],
+            qpos_grasp[:, None, :],
+            qpos_grasp[:, None, :],
+            qpos_approach[:, None, :],
+            rest_arm_qpos[:, None, :],
+        ],
+        dim=1,
+    )
+    hand_trajectory = torch.cat(
+        [
+            hand_open_qpos[None, None, :].repeat(n_envs, 1, 1),
+            hand_open_qpos[None, None, :].repeat(n_envs, 1, 1),
+            hand_open_qpos[None, None, :].repeat(n_envs, 1, 1),
+            hand_close_qpos[None, None, :].repeat(n_envs, 1, 1),
+            hand_close_qpos[None, None, :].repeat(n_envs, 1, 1),
+            hand_close_qpos[None, None, :].repeat(n_envs, 1, 1),
+        ],
+        dim=1,
+    )
+    all_trajectory = torch.cat([arm_trajectory, hand_trajectory], dim=-1)
+    interp_trajectory = interpolate_with_distance(
+        trajectory=all_trajectory, interp_num=200, device=sim.device
+    )
+    return interp_trajectory
+
+
+def run_grasp_demo(
+    args: argparse.Namespace, sim: SimulationManager, robot: Robot
+) -> None:
+    mug = create_mug(sim)
+    grasp_cfg = build_grasp_generator_cfg(args)
+    sim.open_window()
+
+    # Annotate part of the mug to be grasped by following the instructions in the visualization window:
+    # 1. View grasp object in browser (e.g http://localhost:11801)
+    # 2. press 'Rect Select Region', select grasp region
+    # 3. press 'Confirm Selection' to finish grasp region selection.
+
+    start_time = time.time()
+
+    vertices = mug.get_vertices(env_ids=[0], scale=True)[0]
+    triangles = mug.get_triangles(env_ids=[0])[0]
+    grasp_generator = GraspGenerator(
+        vertices=vertices,
+        triangles=triangles,
+        cfg=grasp_cfg,
+        gripper_collision_cfg=build_gripper_collision_cfg(),
+    )
+
+    grasp_generator.annotate()
+
+    approach_direction = torch.tensor(
+        [0, 0, -1], dtype=torch.float32, device=sim.device
+    )
+    obj_poses = mug.get_local_pose(to_matrix=True)
+    grasp_xpos_list = []
+
+    rest_xpos = robot.compute_fk(
+        qpos=robot.get_qpos("arm"), name="arm", to_matrix=True
+    )[0]
+    for i, obj_pose in enumerate(obj_poses):
+        is_success, grasp_pose, _ = grasp_generator.get_grasp_poses(
+            obj_pose,
+            approach_direction,
+            visualize_collision=False,
+            visualize_pose=True,
+        )
+        if is_success:
+            grasp_xpos_list.append(grasp_pose.unsqueeze(0))
+        else:
+            logger.log_warning(f"No valid grasp pose found for {i}-th object.")
+            grasp_xpos_list.append(rest_xpos.unsqueeze(0))
+
+    grasp_xpos = torch.cat(grasp_xpos_list, dim=0)
+    cost_time = time.time() - start_time
+    logger.log_info(f"Get grasp pose cost time: {cost_time:.2f} seconds")
+
+    grab_traj = get_grasp_traj(sim, robot, grasp_xpos)
+    input("Press Enter to start the grab mug demo...")
+    for i in range(grab_traj.shape[1]):
+        robot.set_qpos(grab_traj[:, i, :])
+        sim.update(step=4)
+        time.sleep(1e-2)
+    input("Press Enter to exit the simulation...")
+
+
+def run_upright_demo(
+    args: argparse.Namespace, sim: SimulationManager, robot: Robot
+) -> None:
+    if args.object_kind == "cup":
+        obj = create_fallen_cup(sim)
+    else:
+        obj = create_fallen_bottle(sim)
+
+    semantics = create_object_semantics(obj, args.object_kind, args)
+    motion_gen = MotionGenerator(
+        cfg=MotionGenCfg(planner_cfg=ToppraPlannerCfg(robot_uid=robot.uid))
+    )
+    hand_open = torch.tensor([0.00, 0.00], dtype=torch.float32, device=sim.device)
+    hand_close = torch.tensor([0.025, 0.025], dtype=torch.float32, device=sim.device)
+    upright_action = UprightAction(
+        motion_generator=motion_gen,
+        cfg=UprightActionCfg(
+            control_part="arm",
+            hand_control_part="hand",
+            hand_open_qpos=hand_open,
+            hand_close_qpos=hand_close,
+            approach_direction=torch.tensor(
+                [0.0, 0.0, -1.0], dtype=torch.float32, device=sim.device
+            ),
+            pre_grasp_distance=0.15,
+            lift_height=0.15,
+            upright_axis_sign=-1.0 if args.object_kind == "bottle" else 1.0,
+        ),
+    )
+
+    sim.init_gpu_physics()
+    sim.open_window()
+    input(f"Inspect the fallen {args.object_kind}, then press Enter to plan upright...")
+
+    start_time = time.time()
+    is_success, traj, _ = upright_action.execute(semantics)
+    cost_time = time.time() - start_time
+    logger.log_info(f"Plan upright trajectory cost time: {cost_time:.2f} seconds")
+    if not is_success:
+        logger.log_warning("Failed to plan upright trajectory.")
+        return
+
+    input("Press Enter to start the upright demo...")
+    for i in range(traj.shape[1]):
+        robot.set_qpos(traj[:, i, :])
+        sim.update(step=4)
+        time.sleep(1e-2)
+    input("Press Enter to exit the simulation...")
+
+
+def main() -> None:
+    args = parse_arguments()
+    sim = initialize_simulation(args)
+    robot = create_robot(sim, position=[0.0, 0.0, 0.0])
+
+    if args.demo_mode == "upright":
+        run_upright_demo(args, sim, robot)
+    else:
+        run_grasp_demo(args, sim, robot)
+
+
+if __name__ == "__main__":
+    main()

From d258064aa3a1dc5da06a18d24448b2a2474f05cb Mon Sep 17 00:00:00 2001
From: skywhite1024 <129768272+skywhite1024@users.noreply.github.com>
Date: Sun, 14 Jun 2026 12:50:51 +0800
Subject: [PATCH 02/14] Address upright action review feedback

---
 embodichain/lab/sim/atomic_actions/actions.py | 451 ++++++++++++++++--
 embodichain/lab/sim/objects/articulation.py   |   2 +
 .../atomic_action/upright_atomic_action.py    |  79 ++-
 3 files changed, 482 insertions(+), 50 deletions(-)

diff --git a/embodichain/lab/sim/atomic_actions/actions.py b/embodichain/lab/sim/atomic_actions/actions.py
index 97782601..e8fd4983 100644
--- a/embodichain/lab/sim/atomic_actions/actions.py
+++ b/embodichain/lab/sim/atomic_actions/actions.py
@@ -17,7 +17,7 @@
 from __future__ import annotations
 
 import torch
-from typing import Optional, Union, TYPE_CHECKING, Any
+from typing import Optional, Union, TYPE_CHECKING
 
 from embodichain.lab.sim.planners import PlanResult, PlanState, MoveType
 from embodichain.lab.sim.planners.motion_generator import MotionGenOptions
@@ -231,11 +231,41 @@ def _interpolate_hand_qpos(
         n_waypoints: int,
     ) -> torch.Tensor:
         """Interpolate hand joint positions between two gripper states."""
-        weights = torch.linspace(0, 1, steps=n_waypoints, device=self.device)
-        hand_qpos_list = [
-            torch.lerp(start_hand_qpos, end_hand_qpos, weight) for weight in weights
-        ]
-        return torch.stack(hand_qpos_list, dim=0)
+        start_hand_qpos = start_hand_qpos.to(self.device)
+        end_hand_qpos = end_hand_qpos.to(self.device)
+
+        if start_hand_qpos.dim() == 1 and end_hand_qpos.dim() == 2:
+            start_hand_qpos = start_hand_qpos.unsqueeze(0).repeat(
+                end_hand_qpos.shape[0], 1
+            )
+        if start_hand_qpos.dim() == 2 and end_hand_qpos.dim() == 1:
+            end_hand_qpos = end_hand_qpos.unsqueeze(0).repeat(
+                start_hand_qpos.shape[0], 1
+            )
+
+        weights = torch.linspace(
+            0,
+            1,
+            steps=n_waypoints,
+            device=self.device,
+            dtype=start_hand_qpos.dtype,
+        )
+        if start_hand_qpos.dim() == 1 and end_hand_qpos.dim() == 1:
+            return torch.lerp(
+                start_hand_qpos.unsqueeze(0),
+                end_hand_qpos.unsqueeze(0),
+                weights[:, None],
+            )
+        if start_hand_qpos.dim() == 2 and end_hand_qpos.dim() == 2:
+            return torch.lerp(
+                start_hand_qpos.unsqueeze(1),
+                end_hand_qpos.unsqueeze(1),
+                weights[None, :, None],
+            )
+        logger.log_error(
+            "hand qpos must have shape (hand_dof,) or (n_envs, hand_dof)",
+            ValueError,
+        )
 
     def execute(
         self,
@@ -326,6 +356,26 @@ def __init__(
         self.arm_dof = len(self.arm_joint_ids)
         self.dof = len(self.joint_ids)
 
+    def _expand_hand_qpos(self, hand_qpos: torch.Tensor) -> torch.Tensor:
+        """Resolve hand qpos to batched shape ``(n_envs, hand_dof)``."""
+        hand_dof = len(self.hand_joint_ids)
+        hand_qpos = hand_qpos.to(device=self.device, dtype=torch.float32)
+        if hand_qpos.shape == (hand_dof,):
+            return hand_qpos.unsqueeze(0).repeat(self.n_envs, 1)
+        if hand_qpos.shape == (self.n_envs, hand_dof):
+            return hand_qpos
+        logger.log_error(
+            f"hand_qpos must have shape ({hand_dof},) or "
+            f"({self.n_envs}, {hand_dof}), but got {hand_qpos.shape}",
+            ValueError,
+        )
+
+    def _repeat_hand_qpos(
+        self, hand_qpos: torch.Tensor, n_waypoints: int
+    ) -> torch.Tensor:
+        """Repeat hand qpos across trajectory waypoints."""
+        return self._expand_hand_qpos(hand_qpos).unsqueeze(1).repeat(1, n_waypoints, 1)
+
     def execute(
         self,
         target: Union[ObjectSemantics, torch.Tensor],
@@ -532,6 +582,54 @@ class UprightActionCfg(PickUpActionCfg):
     local +Z points toward the physical bottom and local -Z should face upward.
     """
 
+    place_press_depth: float = 0.002
+    """Additional downward displacement (m) after pre-place to make support contact."""
+
+    place_press_steps: int = 4
+    """Number of closed-hand waypoints used for the downward place press."""
+
+    upright_hold_steps: int = 0
+    """Number of closed-hand waypoints to hold after upright placement."""
+
+    place_hold_steps: int = 8
+    """Number of closed-hand waypoints to hold the object after pressing it down."""
+
+    release_interp_steps: int = 12
+    """Number of waypoints for the slow hand release phase."""
+
+    release_retreat_distance: float = 0.08
+    """Horizontal distance (m) to retreat after releasing the upright object."""
+
+    release_retreat_lift: float = 0.01
+    """Small upward offset (m) added during release retreat."""
+
+    use_grasp_width_qpos: bool = False
+    """Whether to map selected grasp open length into a dynamic hand close qpos."""
+
+    gripper_max_open_width: float = 0.088
+    """Maximum total gripper opening width (m) used for width-to-qpos mapping."""
+
+    grasp_squeeze_width: float = 0.003
+    """Width margin (m) subtracted from the selected grasp width before closing."""
+
+    final_approach_steps: int = 12
+    """Number of waypoints for the slow final approach from pre-grasp to grasp."""
+
+    final_approach_preclose_width_margin: float = 0.010
+    """Extra opening width (m) kept around the selected grasp width during final approach."""
+
+    grasp_hold_steps: int = 4
+    """Number of closed-hand waypoints to hold the grasp before lifting."""
+
+    min_dynamic_hand_close_qpos: torch.Tensor | None = None
+    """Optional minimum hand qpos used when mapping grasp width into close qpos."""
+
+    max_grasp_open_length: float | None = None
+    """Optional maximum selected grasp opening length (m) for upright placement."""
+
+    max_grasp_axis_approach_dot: float | None = None
+    """Optional maximum absolute dot between grasp X axis and approach direction."""
+
 
 class UprightAction(PickUpAction):
     def __init__(
@@ -549,6 +647,95 @@ def __init__(
             motion_generator, cfg=cfg if cfg is not None else UprightActionCfg()
         )
 
+    def _resolve_grasp_pose(
+        self, semantics: ObjectSemantics
+    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+        is_success, grasp_xpos, open_length = super()._resolve_grasp_pose(semantics)
+        max_open_length = self.cfg.max_grasp_open_length
+        max_axis_dot = self.cfg.max_grasp_axis_approach_dot
+        if max_open_length is None and max_axis_dot is None:
+            return is_success, grasp_xpos, open_length
+
+        approach_direction = self.approach_direction.to(
+            device=self.device, dtype=torch.float32
+        )
+        approach_direction = approach_direction / approach_direction.norm().clamp(
+            min=1e-6
+        )
+        grasp_axis_dot = torch.abs(
+            (grasp_xpos[:, :3, 0] * approach_direction).sum(dim=1)
+        )
+        violates_width = (
+            torch.zeros_like(open_length, dtype=torch.bool)
+            if max_open_length is None
+            else open_length > max_open_length
+        )
+        violates_axis = (
+            torch.zeros_like(grasp_axis_dot, dtype=torch.bool)
+            if max_axis_dot is None
+            else grasp_axis_dot > max_axis_dot
+        )
+        needs_filter = violates_width | violates_axis
+
+        if not bool(torch.any(needs_filter).item()):
+            return is_success, grasp_xpos, open_length
+
+        generator = semantics.affordance.generator
+        hit_point_pairs = (
+            None if generator is None else getattr(generator, "_hit_point_pairs", None)
+        )
+        if hit_point_pairs is None:
+            return is_success, grasp_xpos, open_length
+
+        pair_lengths = torch.norm(
+            hit_point_pairs[:, 1, :] - hit_point_pairs[:, 0, :], dim=-1
+        )
+        valid_mask = torch.ones(
+            hit_point_pairs.shape[0], dtype=torch.bool, device=hit_point_pairs.device
+        )
+        if max_open_length is not None:
+            valid_mask &= pair_lengths <= max_open_length
+        obj_poses = semantics.entity.get_local_pose(to_matrix=True)
+        pair_axes = torch.nn.functional.normalize(
+            hit_point_pairs[:, 1, :] - hit_point_pairs[:, 0, :], dim=-1
+        )
+        pair_axes_world = torch.matmul(
+            pair_axes.unsqueeze(0), obj_poses[:, :3, :3].transpose(1, 2)
+        )
+        pair_axis_dot = torch.abs(
+            (pair_axes_world * approach_direction.view(1, 1, 3)).sum(dim=-1)
+        )
+        if max_axis_dot is not None:
+            valid_mask &= torch.all(pair_axis_dot <= max_axis_dot, dim=0)
+        valid_count = int(valid_mask.sum().item())
+        if valid_count == 0:
+            logger.log_warning(
+                "No grasp candidates remain after upright grasp filtering."
+            )
+            return is_success, grasp_xpos, open_length
+
+        original_hit_point_pairs = generator._hit_point_pairs
+        try:
+            generator._hit_point_pairs = hit_point_pairs[valid_mask]
+            filtered_success, filtered_grasp_xpos, filtered_open_length = (
+                semantics.affordance.get_best_grasp_poses(
+                    obj_poses=obj_poses, approach_direction=self.approach_direction
+                )
+            )
+        finally:
+            generator._hit_point_pairs = original_hit_point_pairs
+
+        use_filtered = needs_filter & filtered_success
+        has_filtered = bool(torch.any(use_filtered).item())
+        if has_filtered:
+            grasp_xpos = torch.where(
+                use_filtered[:, None, None], filtered_grasp_xpos, grasp_xpos
+            )
+            open_length = torch.where(use_filtered, filtered_open_length, open_length)
+            is_success = torch.where(use_filtered, filtered_success, is_success)
+
+        return is_success, grasp_xpos, open_length
+
     @staticmethod
     def _invert_pose(pose: torch.Tensor) -> torch.Tensor:
         """Invert a batched homogeneous transform."""
@@ -602,6 +789,61 @@ def _build_upright_object_pose(
             upright_pose[:, 2, 3] = self.cfg.place_clearance - local_bottom_z
         return upright_pose
 
+    def _compute_hand_qpos_for_width(self, target_width: torch.Tensor) -> torch.Tensor:
+        """Map desired total gripper width to batched hand qpos."""
+        target_width = target_width.to(device=self.device, dtype=torch.float32).view(
+            self.n_envs, 1
+        )
+        target_width = target_width.clamp(min=0.0, max=self.cfg.gripper_max_open_width)
+        closing_distance = 0.5 * (self.cfg.gripper_max_open_width - target_width).clamp(
+            min=0.0
+        )
+        hand_qpos_limits = self.robot.get_qpos_limits(
+            name=self.cfg.hand_control_part
+        ).to(self.device)
+        lower_limits = hand_qpos_limits[:, :, 0]
+        upper_limits = hand_qpos_limits[:, :, 1]
+        hand_open_qpos = self._expand_hand_qpos(self.hand_open_qpos)
+        dynamic_qpos = hand_open_qpos + closing_distance.repeat(
+            1, len(self.hand_joint_ids)
+        )
+        return torch.max(torch.min(dynamic_qpos, upper_limits), lower_limits)
+
+    def _compute_dynamic_hand_close_qpos(
+        self, grasp_open_length: torch.Tensor
+    ) -> torch.Tensor:
+        """Map selected grasp width to batched hand close qpos for parallel grippers."""
+        fallback_qpos = self._expand_hand_qpos(self.hand_close_qpos)
+        if not self.cfg.use_grasp_width_qpos:
+            return fallback_qpos
+
+        grasp_open_length = grasp_open_length.to(
+            device=self.device, dtype=torch.float32
+        ).view(self.n_envs, 1)
+        target_width = (grasp_open_length - self.cfg.grasp_squeeze_width).clamp(min=0.0)
+        dynamic_qpos = self._compute_hand_qpos_for_width(target_width)
+        if self.cfg.min_dynamic_hand_close_qpos is not None:
+            min_close_qpos = self._expand_hand_qpos(
+                self.cfg.min_dynamic_hand_close_qpos
+            )
+            dynamic_qpos = torch.max(dynamic_qpos, min_close_qpos)
+        return dynamic_qpos
+
+    def _compute_final_approach_hand_qpos(
+        self, grasp_open_length: torch.Tensor, hand_close_qpos: torch.Tensor
+    ) -> torch.Tensor:
+        """Pre-close the gripper during final approach without reaching squeeze force."""
+        hand_open_qpos = self._expand_hand_qpos(self.hand_open_qpos)
+        if not self.cfg.use_grasp_width_qpos:
+            return hand_open_qpos
+
+        grasp_open_length = grasp_open_length.to(
+            device=self.device, dtype=torch.float32
+        ).view(self.n_envs, 1)
+        target_width = grasp_open_length + self.cfg.final_approach_preclose_width_margin
+        preclose_qpos = self._compute_hand_qpos_for_width(target_width)
+        return torch.max(torch.min(preclose_qpos, hand_close_qpos), hand_open_qpos)
+
     def execute(
         self,
         target: Union[ObjectSemantics, torch.Tensor],
@@ -612,87 +854,161 @@ def execute(
         if not isinstance(target, ObjectSemantics):
             return super().execute(target=target, start_qpos=start_qpos, **kwargs)
 
+        is_success, grasp_xpos, grasp_open_length = self._resolve_grasp_pose(target)
         obj_poses = target.entity.get_local_pose(to_matrix=True)
-        is_success, grasp_xpos = self._resolve_grasp_pose(target)
-        if not is_success:
+        if not torch.all(is_success).item():
             logger.log_warning(
                 "Failed to resolve grasp pose, using default approach pose"
             )
             return False, torch.empty(0), self.joint_ids
 
+        world_z = torch.tensor([0, 0, 1], device=self.device, dtype=torch.float32)
+        hand_close_qpos = self._compute_dynamic_hand_close_qpos(grasp_open_length)
+        final_approach_hand_qpos = self._compute_final_approach_hand_qpos(
+            grasp_open_length, hand_close_qpos
+        )
         pre_grasp_xpos = self._apply_offset(
             pose=grasp_xpos,
             offset=-grasp_xpos[:, :3, 2] * self.cfg.pre_grasp_distance,
         )
         lift_xpos = self._apply_offset(
             pose=grasp_xpos,
-            offset=torch.tensor([0, 0, 1], device=self.device) * self.cfg.lift_height,
+            offset=world_z * self.cfg.lift_height,
         )
 
         obj_to_grasp = torch.bmm(self._invert_pose(obj_poses), grasp_xpos)
         upright_obj_xpos = self._build_upright_object_pose(target, obj_poses)
         upright_lift_obj_xpos = self._apply_offset(
             pose=upright_obj_xpos,
-            offset=torch.tensor([0, 0, 1], device=self.device) * self.cfg.lift_height,
+            offset=world_z * self.cfg.lift_height,
         )
         upright_lift_xpos = torch.bmm(upright_lift_obj_xpos, obj_to_grasp)
         upright_place_xpos = torch.bmm(upright_obj_xpos, obj_to_grasp)
-        retreat_xpos = self._apply_offset(
+        press_down_distance = self.cfg.place_clearance + self.cfg.place_press_depth
+        press_xpos = self._apply_offset(
             pose=upright_place_xpos,
-            offset=torch.tensor([0, 0, 1], device=self.device) * self.cfg.lift_height,
+            offset=-world_z * press_down_distance,
+        )
+        retreat_direction = -press_xpos[:, :3, 2]
+        retreat_direction[:, 2] = 0.0
+        retreat_direction_norm = retreat_direction.norm(dim=1, keepdim=True)
+        retreat_direction = torch.where(
+            retreat_direction_norm > 1e-6,
+            retreat_direction / retreat_direction_norm.clamp(min=1e-6),
+            -press_xpos[:, :3, 0],
+        )
+        retreat_direction[:, 2] = 0.0
+        retreat_direction = retreat_direction / retreat_direction.norm(
+            dim=1, keepdim=True
+        ).clamp(min=1e-6)
+        retreat_offset = (
+            retreat_direction * self.cfg.release_retreat_distance
+            + world_z * self.cfg.release_retreat_lift
+        )
+        retreat_xpos = self._apply_offset(
+            pose=press_xpos,
+            offset=retreat_offset,
         )
 
         start_qpos = self._resolve_start_qpos(start_qpos, self.arm_dof)
         n_close_waypoint = self.cfg.hand_interp_steps
-        n_open_waypoint = self.cfg.hand_interp_steps
-        motion_waypoints = self.cfg.sample_interval - n_close_waypoint - n_open_waypoint
+        n_final_approach_waypoint = max(2, self.cfg.final_approach_steps)
+        n_grasp_hold_waypoint = max(0, self.cfg.grasp_hold_steps)
+        n_press_waypoint = max(2, self.cfg.place_press_steps)
+        n_upright_hold_waypoint = max(0, self.cfg.upright_hold_steps)
+        n_hold_waypoint = max(0, self.cfg.place_hold_steps)
+        n_open_waypoint = max(2, self.cfg.release_interp_steps)
+        motion_waypoints = (
+            self.cfg.sample_interval
+            - n_close_waypoint
+            - n_final_approach_waypoint
+            - n_grasp_hold_waypoint
+            - n_upright_hold_waypoint
+            - n_press_waypoint
+            - n_hold_waypoint
+            - n_open_waypoint
+        )
         if motion_waypoints < 6:
             logger.log_error(
                 "Not enough waypoints for upright action. Please increase "
-                "sample_interval or decrease hand_interp_steps.",
+                "sample_interval or decrease hand/press/upright-hold/hold/release "
+                "steps.",
                 ValueError,
             )
-        n_approach_waypoint = max(2, int(np.round(motion_waypoints * 0.35)))
-        n_upright_waypoint = max(2, int(np.round(motion_waypoints * 0.50)))
+        n_pre_approach_waypoint = max(2, int(np.round(motion_waypoints * 0.25)))
+        n_upright_waypoint = max(2, int(np.round(motion_waypoints * 0.60)))
         n_retreat_waypoint = (
             self.cfg.sample_interval
             - n_close_waypoint
+            - n_final_approach_waypoint
+            - n_grasp_hold_waypoint
+            - n_upright_hold_waypoint
+            - n_press_waypoint
+            - n_hold_waypoint
             - n_open_waypoint
-            - n_approach_waypoint
+            - n_pre_approach_waypoint
             - n_upright_waypoint
         )
         if n_retreat_waypoint < 2:
+            retreat_deficit = 2 - n_retreat_waypoint
             n_retreat_waypoint = 2
-            n_upright_waypoint = max(2, n_upright_waypoint - 1)
-
+            n_upright_waypoint = max(2, n_upright_waypoint - retreat_deficit)
         target_states_list = [
             [
                 PlanState(xpos=pre_grasp_xpos[i], move_type=MoveType.EEF_MOVE),
-                PlanState(xpos=grasp_xpos[i], move_type=MoveType.EEF_MOVE),
             ]
             for i in range(self.n_envs)
         ]
         is_success, plan_traj = self._plan_arm_trajectory(
             target_states_list,
             start_qpos,
-            n_approach_waypoint,
+            n_pre_approach_waypoint,
             self.arm_dof,
         )
         if not is_success:
             logger.log_warning("Failed to plan approach trajectory.")
             return False, torch.empty(0), self.joint_ids
         approach_trajectory = torch.zeros(
-            size=(self.n_envs, n_approach_waypoint, self.dof),
+            size=(self.n_envs, n_pre_approach_waypoint, self.dof),
             dtype=torch.float32,
             device=self.device,
         )
         approach_trajectory[:, :, : self.arm_dof] = plan_traj
-        approach_trajectory[:, :, self.arm_dof :] = self.hand_open_qpos
+        approach_trajectory[:, :, self.arm_dof :] = self._repeat_hand_qpos(
+            self.hand_open_qpos, n_pre_approach_waypoint
+        )
 
-        grasp_qpos = approach_trajectory[:, -1, : self.arm_dof]
-        hand_close_path = self._interpolate_hand_qpos(
+        pre_grasp_qpos = approach_trajectory[:, -1, : self.arm_dof]
+        target_states_list = [
+            [PlanState(xpos=grasp_xpos[i], move_type=MoveType.EEF_MOVE)]
+            for i in range(self.n_envs)
+        ]
+        is_success, plan_traj = self._plan_arm_trajectory(
+            target_states_list,
+            pre_grasp_qpos,
+            n_final_approach_waypoint,
+            self.arm_dof,
+        )
+        if not is_success:
+            logger.log_warning("Failed to plan final approach trajectory.")
+            return False, torch.empty(0), self.joint_ids
+        final_approach_trajectory = torch.zeros(
+            size=(self.n_envs, n_final_approach_waypoint, self.dof),
+            dtype=torch.float32,
+            device=self.device,
+        )
+        final_approach_trajectory[:, :, : self.arm_dof] = plan_traj
+        final_approach_hand_path = self._interpolate_hand_qpos(
             self.hand_open_qpos,
-            self.hand_close_qpos,
+            final_approach_hand_qpos,
+            n_final_approach_waypoint,
+        )
+        final_approach_trajectory[:, :, self.arm_dof :] = final_approach_hand_path
+
+        grasp_qpos = final_approach_trajectory[:, -1, : self.arm_dof]
+        hand_close_path = self._interpolate_hand_qpos(
+            final_approach_hand_qpos,
+            hand_close_qpos,
             n_close_waypoint,
         )
         close_trajectory = torch.zeros(
@@ -700,9 +1016,21 @@ def execute(
             dtype=torch.float32,
             device=self.device,
         )
-        close_trajectory[:, :, : self.arm_dof] = grasp_qpos
+        close_trajectory[:, :, : self.arm_dof] = grasp_qpos.unsqueeze(1)
         close_trajectory[:, :, self.arm_dof :] = hand_close_path
 
+        closed_grasp_qpos = close_trajectory[:, -1, : self.arm_dof]
+        grasp_hold_trajectory = torch.zeros(
+            size=(self.n_envs, n_grasp_hold_waypoint, self.dof),
+            dtype=torch.float32,
+            device=self.device,
+        )
+        if n_grasp_hold_waypoint > 0:
+            grasp_hold_trajectory[:, :, : self.arm_dof] = closed_grasp_qpos.unsqueeze(1)
+            grasp_hold_trajectory[:, :, self.arm_dof :] = self._repeat_hand_qpos(
+                hand_close_qpos, n_grasp_hold_waypoint
+            )
+
         target_states_list = [
             [
                 PlanState(xpos=lift_xpos[i], move_type=MoveType.EEF_MOVE),
@@ -713,7 +1041,7 @@ def execute(
         ]
         is_success, plan_traj = self._plan_arm_trajectory(
             target_states_list,
-            grasp_qpos,
+            closed_grasp_qpos,
             n_upright_waypoint,
             self.arm_dof,
         )
@@ -726,11 +1054,59 @@ def execute(
             device=self.device,
         )
         upright_trajectory[:, :, : self.arm_dof] = plan_traj
-        upright_trajectory[:, :, self.arm_dof :] = self.hand_close_qpos
+        upright_trajectory[:, :, self.arm_dof :] = self._repeat_hand_qpos(
+            hand_close_qpos, n_upright_waypoint
+        )
 
         place_qpos = upright_trajectory[:, -1, : self.arm_dof]
+        upright_hold_trajectory = torch.zeros(
+            size=(self.n_envs, n_upright_hold_waypoint, self.dof),
+            dtype=torch.float32,
+            device=self.device,
+        )
+        if n_upright_hold_waypoint > 0:
+            upright_hold_trajectory[:, :, : self.arm_dof] = place_qpos.unsqueeze(1)
+            upright_hold_trajectory[:, :, self.arm_dof :] = self._repeat_hand_qpos(
+                hand_close_qpos, n_upright_hold_waypoint
+            )
+
+        target_states_list = [
+            [PlanState(xpos=press_xpos[i], move_type=MoveType.EEF_MOVE)]
+            for i in range(self.n_envs)
+        ]
+        is_success, plan_traj = self._plan_arm_trajectory(
+            target_states_list,
+            place_qpos,
+            n_press_waypoint,
+            self.arm_dof,
+        )
+        if not is_success:
+            logger.log_warning("Failed to plan place press trajectory.")
+            return False, torch.empty(0), self.joint_ids
+        press_trajectory = torch.zeros(
+            size=(self.n_envs, n_press_waypoint, self.dof),
+            dtype=torch.float32,
+            device=self.device,
+        )
+        press_trajectory[:, :, : self.arm_dof] = plan_traj
+        press_trajectory[:, :, self.arm_dof :] = self._repeat_hand_qpos(
+            hand_close_qpos, n_press_waypoint
+        )
+
+        press_qpos = press_trajectory[:, -1, : self.arm_dof]
+        hold_trajectory = torch.zeros(
+            size=(self.n_envs, n_hold_waypoint, self.dof),
+            dtype=torch.float32,
+            device=self.device,
+        )
+        if n_hold_waypoint > 0:
+            hold_trajectory[:, :, : self.arm_dof] = press_qpos.unsqueeze(1)
+            hold_trajectory[:, :, self.arm_dof :] = self._repeat_hand_qpos(
+                hand_close_qpos, n_hold_waypoint
+            )
+
         hand_open_path = self._interpolate_hand_qpos(
-            self.hand_close_qpos,
+            hand_close_qpos,
             self.hand_open_qpos,
             n_open_waypoint,
         )
@@ -739,7 +1115,7 @@ def execute(
             dtype=torch.float32,
             device=self.device,
         )
-        open_trajectory[:, :, : self.arm_dof] = place_qpos
+        open_trajectory[:, :, : self.arm_dof] = press_qpos.unsqueeze(1)
         open_trajectory[:, :, self.arm_dof :] = hand_open_path
 
         target_states_list = [
@@ -748,7 +1124,7 @@ def execute(
         ]
         is_success, plan_traj = self._plan_arm_trajectory(
             target_states_list,
-            place_qpos,
+            press_qpos,
             n_retreat_waypoint,
             self.arm_dof,
         )
@@ -761,13 +1137,20 @@ def execute(
             device=self.device,
         )
         retreat_trajectory[:, :, : self.arm_dof] = plan_traj
-        retreat_trajectory[:, :, self.arm_dof :] = self.hand_open_qpos
+        retreat_trajectory[:, :, self.arm_dof :] = self._repeat_hand_qpos(
+            self.hand_open_qpos, n_retreat_waypoint
+        )
 
         trajectory = torch.cat(
             [
                 approach_trajectory,
+                final_approach_trajectory,
                 close_trajectory,
+                grasp_hold_trajectory,
                 upright_trajectory,
+                upright_hold_trajectory,
+                press_trajectory,
+                hold_trajectory,
                 open_trajectory,
                 retreat_trajectory,
             ],
diff --git a/embodichain/lab/sim/objects/articulation.py b/embodichain/lab/sim/objects/articulation.py
index 3fbe68e1..ed5a9e32 100644
--- a/embodichain/lab/sim/objects/articulation.py
+++ b/embodichain/lab/sim/objects/articulation.py
@@ -1444,6 +1444,8 @@ def set_joint_drive(
                 drive_args["max_velocity"] = max_velocity[i].cpu().numpy()
             if friction is not None:
                 drive_args["joint_friction"] = friction[i].cpu().numpy()
+            if armature is not None:
+                drive_args["armature"] = armature[i].cpu().numpy()
             self._entities[env_idx].set_drive(**drive_args)
 
     def get_joint_drive(
diff --git a/scripts/tutorials/atomic_action/upright_atomic_action.py b/scripts/tutorials/atomic_action/upright_atomic_action.py
index 92383cc4..d16f3a9b 100644
--- a/scripts/tutorials/atomic_action/upright_atomic_action.py
+++ b/scripts/tutorials/atomic_action/upright_atomic_action.py
@@ -64,6 +64,9 @@
     GripperCollisionCfg,
 )
 
+GRIPPER_MAX_OPEN_WIDTH = 0.088
+GRIPPER_FINGER_LENGTH = 0.078
+
 
 def parse_arguments():
     """
@@ -96,12 +99,6 @@ def parse_arguments():
         default=10000,
         help="Number of surface samples for antipodal grasp generation.",
     )
-    parser.add_argument(
-        "--n_top_grasps",
-        type=int,
-        default=30,
-        help="Number of top-ranked grasp poses to keep.",
-    )
     parser.add_argument(
         "--force_reannotate",
         action="store_true",
@@ -185,7 +182,7 @@ def create_robot(sim: SimulationManager, position=[0.0, 0.0, 0.0]) -> Robot:
                 ],
             )
         },
-        init_qpos=[-3.14, -1.57, -1.57, 1.57, -1.57, 0.0, 0.0, 0.0],
+        init_qpos=[0, -1.57, 1.57, -1.57, -1.57, 0.0, 0.0, 0.0],
         init_pos=position,
     )
     return sim.add_robot(cfg=cfg)
@@ -261,18 +258,52 @@ def build_grasp_generator_cfg(args: argparse.Namespace) -> GraspGeneratorCfg:
         ),
         is_partial_annotate=False,
         is_filter_ground_collision=True,
-        n_top_grasps=args.n_top_grasps,
     )
 
 
 def build_gripper_collision_cfg() -> GripperCollisionCfg:
     return GripperCollisionCfg(
-        max_open_length=0.088,
-        finger_length=0.078,
+        max_open_length=GRIPPER_MAX_OPEN_WIDTH,
+        finger_length=GRIPPER_FINGER_LENGTH,
         point_sample_dense=0.012,
     )
 
 
+def get_hand_open_close_qpos(
+    robot: Robot, device: torch.device
+) -> tuple[torch.Tensor, torch.Tensor]:
+    hand_limits = robot.get_qpos_limits(name="hand")[0].to(
+        device=device, dtype=torch.float32
+    )
+    return hand_limits[:, 0], hand_limits[:, 1]
+
+
+def get_grasp_squeeze_width(object_kind: str) -> float:
+    if object_kind == "bottle":
+        return 0.004
+    return 0.002
+
+
+def get_max_grasp_open_length(object_kind: str) -> float | None:
+    if object_kind == "bottle":
+        return 0.060
+    return None
+
+
+def get_max_grasp_axis_approach_dot(object_kind: str) -> float | None:
+    if object_kind == "bottle":
+        return 0.080
+    return None
+
+
+def get_min_dynamic_hand_close_qpos(
+    object_kind: str, hand_close_qpos: torch.Tensor
+) -> torch.Tensor | None:
+    if object_kind != "bottle":
+        return None
+    return torch.full_like(hand_close_qpos, 0.025)
+
+
 def create_object_semantics(
     obj: RigidObject, label: str, args: argparse.Namespace
 ) -> ObjectSemantics:
@@ -307,10 +338,7 @@ def get_grasp_traj(sim: SimulationManager, robot: Robot, grasp_xpos: torch.Tenso
     _, qpos_grasp = robot.compute_ik(
         pose=grasp_xpos, joint_seed=qpos_approach, name="arm"
     )
-    hand_open_qpos = torch.tensor([0.00, 0.00], dtype=torch.float32, device=sim.device)
-    hand_close_qpos = torch.tensor(
-        [0.025, 0.025], dtype=torch.float32, device=sim.device
-    )
+    hand_open_qpos, hand_close_qpos = get_hand_open_close_qpos(robot, sim.device)
 
     arm_trajectory = torch.cat(
         [
@@ -413,8 +441,7 @@ def run_upright_demo(
     motion_gen = MotionGenerator(
         cfg=MotionGenCfg(planner_cfg=ToppraPlannerCfg(robot_uid=robot.uid))
     )
-    hand_open = torch.tensor([0.00, 0.00], dtype=torch.float32, device=sim.device)
-    hand_close = torch.tensor([0.025, 0.025], dtype=torch.float32, device=sim.device)
+    hand_open, hand_close = get_hand_open_close_qpos(robot, sim.device)
     upright_action = UprightAction(
         motion_generator=motion_gen,
         cfg=UprightActionCfg(
@@ -428,6 +455,26 @@ def run_upright_demo(
             pre_grasp_distance=0.15,
             lift_height=0.15,
             upright_axis_sign=-1.0 if args.object_kind == "bottle" else 1.0,
+            place_press_depth=0.002,
+            place_press_steps=4,
+            upright_hold_steps=3,
+            place_hold_steps=8,
+            release_interp_steps=12,
+            release_retreat_distance=0.08,
+            release_retreat_lift=0.01,
+            final_approach_steps=12,
+            final_approach_preclose_width_margin=0.010,
+            grasp_hold_steps=4,
+            use_grasp_width_qpos=True,
+            gripper_max_open_width=GRIPPER_MAX_OPEN_WIDTH,
+            max_grasp_open_length=get_max_grasp_open_length(args.object_kind),
+            max_grasp_axis_approach_dot=get_max_grasp_axis_approach_dot(
+                args.object_kind
+            ),
+            grasp_squeeze_width=get_grasp_squeeze_width(args.object_kind),
+            min_dynamic_hand_close_qpos=get_min_dynamic_hand_close_qpos(
+                args.object_kind, hand_close
+            ),
         ),
     )
 

From 555259740dc9dc7aa8dc248cee2f77c3a1945343 Mon Sep 17 00:00:00 2001
From: skywhite1024 <129768272+skywhite1024@users.noreply.github.com>
Date: Tue, 16 Jun 2026 18:14:11 +0800
Subject: [PATCH 03/14] Address upright action follow-up fixes

---
 .../lab/sim/atomic_actions/__init__.py        |   1 -
 embodichain/lab/sim/atomic_actions/actions.py | 289 ++++++--
 .../atomic_action/upright_atomic_action.py    | 692 ++++++++++++------
 3 files changed, 678 insertions(+), 304 deletions(-)

diff --git a/embodichain/lab/sim/atomic_actions/__init__.py b/embodichain/lab/sim/atomic_actions/__init__.py
index bbbf7df5..9bc7a741 100644
--- a/embodichain/lab/sim/atomic_actions/__init__.py
+++ b/embodichain/lab/sim/atomic_actions/__init__.py
@@ -49,7 +49,6 @@
 __all__ = [
     # Core classes
     "Affordance",
-    "GraspPose",
     "InteractionPoints",
     "ObjectSemantics",
     "ActionCfg",
diff --git a/embodichain/lab/sim/atomic_actions/actions.py b/embodichain/lab/sim/atomic_actions/actions.py
index e8fd4983..d774cb98 100644
--- a/embodichain/lab/sim/atomic_actions/actions.py
+++ b/embodichain/lab/sim/atomic_actions/actions.py
@@ -292,9 +292,7 @@ def execute(
 
         # TODO: warning and fallback if no valid grasp pose found
         if not is_success:
-            logger.log_warning(
-                "Failed to resolve grasp pose, using default approach pose"
-            )
+            logger.log_warning("Failed to resolve move target pose.")
             return False, torch.empty(0), self.arm_joint_ids
 
         target_states_list = [
@@ -403,11 +401,10 @@ def execute(
                 target, action_name=self.__class__.__name__
             )
 
-        # TODO: warning and fallback if no valid grasp pose found
+        if isinstance(is_success, torch.Tensor):
+            is_success = torch.all(is_success).item()
         if not is_success:
-            logger.log_warning(
-                "Failed to resolve grasp pose, using default approach pose"
-            )
+            logger.log_warning("Failed to resolve grasp pose for all environments.")
             return False, torch.empty(0), self.joint_ids
 
         # Compute pre-grasp pose
@@ -630,6 +627,17 @@ class UprightActionCfg(PickUpActionCfg):
     max_grasp_axis_approach_dot: float | None = None
     """Optional maximum absolute dot between grasp X axis and approach direction."""
 
+    max_grasp_axis_upright_axis_dot: float | None = None
+    """Optional maximum absolute dot between grasp X axis and object upright axis."""
+
+    upright_yaw_offsets: tuple[float, ...] = (
+        0.0,
+        0.5 * np.pi,
+        -0.5 * np.pi,
+        np.pi,
+    )
+    """Yaw offsets (rad) to try after aligning the object upright axis."""
+
 
 class UprightAction(PickUpAction):
     def __init__(
@@ -650,90 +658,193 @@ def __init__(
     def _resolve_grasp_pose(
         self, semantics: ObjectSemantics
     ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        is_success, grasp_xpos, open_length = super()._resolve_grasp_pose(semantics)
-        max_open_length = self.cfg.max_grasp_open_length
-        max_axis_dot = self.cfg.max_grasp_axis_approach_dot
-        if max_open_length is None and max_axis_dot is None:
-            return is_success, grasp_xpos, open_length
+        if not isinstance(semantics.affordance, AntipodalAffordance):
+            logger.log_error(
+                "Grasp pose affordance must be of type AntipodalAffordance"
+            )
+        if semantics.entity is None:
+            logger.log_error(
+                "ObjectSemantics must be associated with an entity to get object pose"
+            )
+        obj_poses = semantics.entity.get_local_pose(to_matrix=True)
+        if semantics.affordance.generator is None:
+            semantics.affordance._init_generator()
+        generator = semantics.affordance.generator
+        if generator is None:
+            logger.log_error("Failed to initialize antipodal grasp generator")
 
+        n_envs = obj_poses.shape[0]
         approach_direction = self.approach_direction.to(
             device=self.device, dtype=torch.float32
         )
         approach_direction = approach_direction / approach_direction.norm().clamp(
             min=1e-6
         )
-        grasp_axis_dot = torch.abs(
-            (grasp_xpos[:, :3, 0] * approach_direction).sum(dim=1)
-        )
-        violates_width = (
-            torch.zeros_like(open_length, dtype=torch.bool)
-            if max_open_length is None
-            else open_length > max_open_length
-        )
-        violates_axis = (
-            torch.zeros_like(grasp_axis_dot, dtype=torch.bool)
-            if max_axis_dot is None
-            else grasp_axis_dot > max_axis_dot
+        max_open_length = self.cfg.max_grasp_open_length
+        max_approach_axis_dot = self.cfg.max_grasp_axis_approach_dot
+        max_upright_axis_dot = self.cfg.max_grasp_axis_upright_axis_dot
+
+        is_success = torch.zeros(n_envs, dtype=torch.bool, device=self.device)
+        grasp_xpos = torch.eye(4, dtype=torch.float32, device=self.device).repeat(
+            n_envs, 1, 1
         )
-        needs_filter = violates_width | violates_axis
+        open_length = torch.zeros(n_envs, dtype=torch.float32, device=self.device)
+        init_qpos = self.robot.get_qpos(name=self.cfg.control_part)
+        world_z = torch.tensor([0.0, 0.0, 1.0], device=self.device)
+        upright_obj_pose_candidates = self._build_upright_object_pose_candidates(
+            semantics, obj_poses
+        )
+        selected_upright_obj_poses = upright_obj_pose_candidates[:, 0].clone()
+
+        for env_idx in range(n_envs):
+            (
+                has_candidates,
+                candidate_grasp_xpos,
+                candidate_open_length,
+                candidate_cost,
+            ) = generator.get_valid_grasp_poses(obj_poses[env_idx], approach_direction)
+            if not has_candidates:
+                logger.log_warning(
+                    f"No valid grasp candidates found for {env_idx}-th object."
+                )
+                continue
 
-        if not bool(torch.any(needs_filter).item()):
-            return is_success, grasp_xpos, open_length
+            candidate_grasp_xpos = candidate_grasp_xpos.to(
+                device=self.device, dtype=torch.float32
+            )
+            candidate_open_length = candidate_open_length.to(
+                device=self.device, dtype=torch.float32
+            )
+            candidate_cost = candidate_cost.to(device=self.device, dtype=torch.float32)
+            candidate_mask = torch.ones(
+                candidate_grasp_xpos.shape[0], dtype=torch.bool, device=self.device
+            )
+            if max_open_length is not None:
+                candidate_mask &= candidate_open_length <= max_open_length
 
-        generator = semantics.affordance.generator
-        hit_point_pairs = (
-            None if generator is None else getattr(generator, "_hit_point_pairs", None)
-        )
-        if hit_point_pairs is None:
-            return is_success, grasp_xpos, open_length
+            grasp_axis_dot = torch.abs(
+                (candidate_grasp_xpos[:, :3, 0] * approach_direction).sum(dim=1)
+            )
+            if max_approach_axis_dot is not None:
+                candidate_mask &= grasp_axis_dot <= max_approach_axis_dot
 
-        pair_lengths = torch.norm(
-            hit_point_pairs[:, 1, :] - hit_point_pairs[:, 0, :], dim=-1
-        )
-        valid_mask = torch.ones(
-            hit_point_pairs.shape[0], dtype=torch.bool, device=hit_point_pairs.device
-        )
-        if max_open_length is not None:
-            valid_mask &= pair_lengths <= max_open_length
-        obj_poses = semantics.entity.get_local_pose(to_matrix=True)
-        pair_axes = torch.nn.functional.normalize(
-            hit_point_pairs[:, 1, :] - hit_point_pairs[:, 0, :], dim=-1
-        )
-        pair_axes_world = torch.matmul(
-            pair_axes.unsqueeze(0), obj_poses[:, :3, :3].transpose(1, 2)
-        )
-        pair_axis_dot = torch.abs(
-            (pair_axes_world * approach_direction.view(1, 1, 3)).sum(dim=-1)
-        )
-        if max_axis_dot is not None:
-            valid_mask &= torch.all(pair_axis_dot <= max_axis_dot, dim=0)
-        valid_count = int(valid_mask.sum().item())
-        if valid_count == 0:
-            logger.log_warning(
-                "No grasp candidates remain after upright grasp filtering."
+            upright_axis = torch.nn.functional.normalize(
+                obj_poses[env_idx, :3, 2], dim=0
             )
-            return is_success, grasp_xpos, open_length
-
-        original_hit_point_pairs = generator._hit_point_pairs
-        try:
-            generator._hit_point_pairs = hit_point_pairs[valid_mask]
-            filtered_success, filtered_grasp_xpos, filtered_open_length = (
-                semantics.affordance.get_best_grasp_poses(
-                    obj_poses=obj_poses, approach_direction=self.approach_direction
+            grasp_upright_axis_dot = torch.abs(
+                (candidate_grasp_xpos[:, :3, 0] * upright_axis).sum(dim=1)
+            )
+            if max_upright_axis_dot is not None:
+                candidate_mask &= grasp_upright_axis_dot <= max_upright_axis_dot
+
+            if not bool(torch.any(candidate_mask).item()):
+                logger.log_warning(
+                    "No grasp candidates remain after upright grasp filtering "
+                    f"for {env_idx}-th object."
                 )
+                continue
+
+            candidate_grasp_xpos = candidate_grasp_xpos[candidate_mask]
+            candidate_open_length = candidate_open_length[candidate_mask]
+            candidate_cost = candidate_cost[candidate_mask]
+            n_candidate = candidate_grasp_xpos.shape[0]
+
+            pre_grasp_xpos = self._apply_offset(
+                pose=candidate_grasp_xpos,
+                offset=-candidate_grasp_xpos[:, :3, 2] * self.cfg.pre_grasp_distance,
+            )
+            lift_xpos = self._apply_offset(
+                pose=candidate_grasp_xpos,
+                offset=world_z * self.cfg.lift_height,
             )
-        finally:
-            generator._hit_point_pairs = original_hit_point_pairs
-
-        use_filtered = needs_filter & filtered_success
-        has_filtered = bool(torch.any(use_filtered).item())
-        if has_filtered:
-            grasp_xpos = torch.where(
-                use_filtered[:, None, None], filtered_grasp_xpos, grasp_xpos
+            obj_pose_repeat = obj_poses[env_idx].unsqueeze(0).repeat(n_candidate, 1, 1)
+            obj_to_grasp = torch.bmm(
+                self._invert_pose(obj_pose_repeat), candidate_grasp_xpos
             )
-            open_length = torch.where(use_filtered, filtered_open_length, open_length)
-            is_success = torch.where(use_filtered, filtered_success, is_success)
 
+            base_ik_success = torch.ones(
+                n_candidate, dtype=torch.bool, device=self.device
+            )
+            qpos_seed = init_qpos[env_idx : env_idx + 1, None, :].repeat(
+                1, n_candidate, 1
+            )
+            for target_xpos in (
+                pre_grasp_xpos,
+                candidate_grasp_xpos,
+                lift_xpos,
+            ):
+                target_success, target_qpos = self.robot.compute_batch_ik(
+                    pose=target_xpos.unsqueeze(0),
+                    name=self.cfg.control_part,
+                    joint_seed=qpos_seed,
+                    env_ids=[env_idx],
+                )
+                base_ik_success &= target_success[0]
+                qpos_seed = target_qpos
+
+            n_upright_pose = upright_obj_pose_candidates.shape[1]
+            upright_obj_pose_repeat = (
+                upright_obj_pose_candidates[env_idx]
+                .unsqueeze(1)
+                .repeat(1, n_candidate, 1, 1)
+                .reshape(-1, 4, 4)
+            )
+            obj_to_grasp_repeat = (
+                obj_to_grasp.unsqueeze(0)
+                .repeat(n_upright_pose, 1, 1, 1)
+                .reshape(-1, 4, 4)
+            )
+            upright_lift_obj_xpos = self._apply_offset(
+                pose=upright_obj_pose_repeat,
+                offset=world_z * self.cfg.lift_height,
+            )
+            upright_lift_xpos = torch.bmm(upright_lift_obj_xpos, obj_to_grasp_repeat)
+            upright_place_xpos = torch.bmm(upright_obj_pose_repeat, obj_to_grasp_repeat)
+            press_xpos = self._apply_offset(
+                pose=upright_place_xpos,
+                offset=-world_z
+                * (self.cfg.place_clearance + self.cfg.place_press_depth),
+            )
+
+            ik_success = base_ik_success.repeat(n_upright_pose)
+            upright_qpos_seed = qpos_seed.repeat(1, n_upright_pose, 1)
+            for target_xpos in (upright_lift_xpos, upright_place_xpos, press_xpos):
+                target_success, target_qpos = self.robot.compute_batch_ik(
+                    pose=target_xpos.unsqueeze(0),
+                    name=self.cfg.control_part,
+                    joint_seed=upright_qpos_seed,
+                    env_ids=[env_idx],
+                )
+                ik_success &= target_success[0]
+                upright_qpos_seed = target_qpos
+
+            flat_candidate_cost = candidate_cost.repeat(n_upright_pose)
+            masked_cost = torch.where(
+                ik_success,
+                flat_candidate_cost,
+                torch.full_like(flat_candidate_cost, float("inf")),
+            )
+            best_cost, best_flat_idx = masked_cost.min(dim=0)
+            best_upright_idx = torch.div(
+                best_flat_idx, n_candidate, rounding_mode="floor"
+            )
+            best_idx = best_flat_idx % n_candidate
+
+            if not torch.isfinite(best_cost):
+                logger.log_warning(
+                    "No upright grasp candidates remain after IK feasibility "
+                    f"filtering for {env_idx}-th object."
+                )
+                continue
+
+            is_success[env_idx] = True
+            grasp_xpos[env_idx] = candidate_grasp_xpos[best_idx]
+            open_length[env_idx] = candidate_open_length[best_idx]
+            selected_upright_obj_poses[env_idx] = upright_obj_pose_candidates[
+                env_idx, best_upright_idx
+            ]
+
+        self._selected_upright_obj_xpos = selected_upright_obj_poses
         return is_success, grasp_xpos, open_length
 
     @staticmethod
@@ -789,6 +900,24 @@ def _build_upright_object_pose(
             upright_pose[:, 2, 3] = self.cfg.place_clearance - local_bottom_z
         return upright_pose
 
+    def _build_upright_object_pose_candidates(
+        self, semantics: ObjectSemantics, obj_poses: torch.Tensor
+    ) -> torch.Tensor:
+        """Build upright target poses with alternative yaw rotations."""
+        base_pose = self._build_upright_object_pose(semantics, obj_poses)
+        yaw_offsets = torch.as_tensor(
+            self.cfg.upright_yaw_offsets, device=self.device, dtype=torch.float32
+        )
+        cos_yaw = torch.cos(yaw_offsets).view(1, -1, 1)
+        sin_yaw = torch.sin(yaw_offsets).view(1, -1, 1)
+
+        base_x = base_pose[:, None, :3, 0]
+        base_y = base_pose[:, None, :3, 1]
+        candidates = base_pose[:, None, :, :].repeat(1, yaw_offsets.numel(), 1, 1)
+        candidates[:, :, :3, 0] = cos_yaw * base_x + sin_yaw * base_y
+        candidates[:, :, :3, 1] = -sin_yaw * base_x + cos_yaw * base_y
+        return candidates
+
     def _compute_hand_qpos_for_width(self, target_width: torch.Tensor) -> torch.Tensor:
         """Map desired total gripper width to batched hand qpos."""
         target_width = target_width.to(device=self.device, dtype=torch.float32).view(
@@ -858,7 +987,7 @@ def execute(
         obj_poses = target.entity.get_local_pose(to_matrix=True)
         if not torch.all(is_success).item():
             logger.log_warning(
-                "Failed to resolve grasp pose, using default approach pose"
+                "Failed to resolve upright grasp pose for all environments."
             )
             return False, torch.empty(0), self.joint_ids
 
@@ -877,7 +1006,9 @@ def execute(
         )
 
         obj_to_grasp = torch.bmm(self._invert_pose(obj_poses), grasp_xpos)
-        upright_obj_xpos = self._build_upright_object_pose(target, obj_poses)
+        upright_obj_xpos = getattr(self, "_selected_upright_obj_xpos", None)
+        if upright_obj_xpos is None or upright_obj_xpos.shape != obj_poses.shape:
+            upright_obj_xpos = self._build_upright_object_pose(target, obj_poses)
         upright_lift_obj_xpos = self._apply_offset(
             pose=upright_obj_xpos,
             offset=world_z * self.cfg.lift_height,
@@ -1218,9 +1349,7 @@ def execute(
 
         # TODO: warning and fallback if no valid grasp pose found
         if not is_success:
-            logger.log_warning(
-                "Failed to resolve grasp pose, using default approach pose"
-            )
+            logger.log_warning("Failed to resolve place target pose.")
             return False, torch.empty(0), self.joint_ids
 
         # compute waypoint number for each phase
diff --git a/scripts/tutorials/atomic_action/upright_atomic_action.py b/scripts/tutorials/atomic_action/upright_atomic_action.py
index d16f3a9b..ae1f5525 100644
--- a/scripts/tutorials/atomic_action/upright_atomic_action.py
+++ b/scripts/tutorials/atomic_action/upright_atomic_action.py
@@ -15,10 +15,12 @@
 # ----------------------------------------------------------------------------
 
 """
-This script demonstrates the creation and simulation of a robot that grasps a rigid mug
-in a simulated environment using the SimulationManager and grasp planning utilities.
+This script demonstrates the creation and simulation of a robot that uprights a fallen
+bottle in a simulated environment using the SimulationManager and atomic actions.
 """
 
+from __future__ import annotations
+
 import argparse
 import sys
 import time
@@ -28,13 +30,11 @@
 if str(_REPO_ROOT) not in sys.path:
     sys.path.insert(0, str(_REPO_ROOT))
 
-import numpy as np
 import torch
 
 from embodichain.lab.sim import SimulationManager, SimulationManagerCfg
 from embodichain.lab.sim.objects import Robot, RigidObject
-from embodichain.lab.sim.utility.action_utils import interpolate_with_distance
-from embodichain.lab.sim.shapes import MeshCfg
+from embodichain.lab.sim.shapes import CubeCfg, MeshCfg
 from embodichain.lab.sim.solvers import PytorchSolverCfg
 from embodichain.lab.sim.planners import MotionGenerator, MotionGenCfg, ToppraPlannerCfg
 from embodichain.data import get_data_path
@@ -64,8 +64,24 @@
     GripperCollisionCfg,
 )
 
-GRIPPER_MAX_OPEN_WIDTH = 0.088
-GRIPPER_FINGER_LENGTH = 0.078
+GRIPPER_URDF_PATH = "DH_PGI_140_80/DH_PGI_140_80.urdf"
+GRIPPER_HAND_JOINT_PATTERN = "GRIPPER_FINGER1_JOINT_1"
+GRIPPER_MAX_OPEN_WIDTH = 0.080
+GRIPPER_FINGER_LENGTH = 0.088
+GRIPPER_ROOT_Z_WIDTH = 0.096
+GRIPPER_Y_THICKNESS = 0.040
+GRIPPER_TCP_Z = 0.121
+PGI_SAMPLE_INTERVAL = 120
+PGI_HAND_CLOSE_STEPS = 12
+PGI_GRASP_HOLD_STEPS = 20
+BOTTLE_LABEL = "bottle"
+BOTTLE_APPROACH_DIRECTION = (0.0, 0.0, -1.0)
+BOTTLE_GRASP_SQUEEZE_WIDTH = 0.020
+BOTTLE_MAX_GRASP_OPEN_LENGTH = 0.060
+BOTTLE_MAX_GRASP_AXIS_APPROACH_DOT = 0.080
+BOTTLE_MAX_GRASP_AXIS_UPRIGHT_AXIS_DOT = 0.35
+BOTTLE_MIN_DYNAMIC_HAND_CLOSE_QPOS = 0.024
+BOTTLE_GRASP_COLLISION_THRESHOLD = -0.004
 
 
 def parse_arguments():
@@ -73,26 +89,13 @@ def parse_arguments():
     Parse command-line arguments to configure the simulation.
 
     Returns:
-        argparse.Namespace: Parsed arguments including number of environments and rendering options.
+        argparse.Namespace: Parsed arguments including simulation and rendering
+        options.
     """
     parser = argparse.ArgumentParser(
         description="Create and simulate a robot in SimulationManager"
     )
     add_env_launcher_args_to_parser(parser)
-    parser.add_argument(
-        "--demo_mode",
-        type=str,
-        choices=["grasp", "upright"],
-        default="grasp",
-        help="Select the tutorial scenario to run.",
-    )
-    parser.add_argument(
-        "--object_kind",
-        type=str,
-        choices=["cup", "bottle"],
-        default="bottle",
-        help="Object to use in upright mode.",
-    )
     parser.add_argument(
         "--n_sample",
         type=int,
@@ -102,7 +105,25 @@ def parse_arguments():
     parser.add_argument(
         "--force_reannotate",
         action="store_true",
-        help="Force grasp region re-annotation instead of reusing cached antipodal pairs.",
+        help=(
+            "Force grasp region re-annotation instead of reusing cached antipodal "
+            "pairs."
+        ),
+    )
+    parser.add_argument(
+        "--debug_hand_state",
+        action="store_true",
+        help="Log planned hand targets and simulated hand qpos during execution.",
+    )
+    parser.add_argument(
+        "--diagnose_grasp",
+        action="store_true",
+        help="Plan once and print grasp/TCP diagnostics without opening the viewer.",
+    )
+    parser.add_argument(
+        "--auto_play",
+        action="store_true",
+        help="Run the viewer demo without waiting for keyboard input.",
     )
     return parser.parse_args()
 
@@ -126,7 +147,7 @@ def initialize_simulation(args) -> SimulationManager:
     )
     sim = SimulationManager(config)
 
-    light = sim.add_light(
+    sim.add_light(
         cfg=LightCfg(
             uid="main_light",
             color=(0.6, 0.6, 0.6),
@@ -150,7 +171,7 @@ def create_robot(sim: SimulationManager, position=[0.0, 0.0, 0.0]) -> Robot:
     """
     # Retrieve URDF paths for the robot arm and hand
     ur10_urdf_path = get_data_path("UniversalRobots/UR10/UR10.urdf")
-    gripper_urdf_path = get_data_path("DH_PGC_140_50_M/DH_PGC_140_50_M.urdf")
+    gripper_urdf_path = get_data_path(GRIPPER_URDF_PATH)
     # Configure the robot with its components and control properties
     cfg = RobotCfg(
         uid="UR10",
@@ -161,23 +182,23 @@ def create_robot(sim: SimulationManager, position=[0.0, 0.0, 0.0]) -> Robot:
             ]
         ),
         drive_pros=JointDrivePropertiesCfg(
-            stiffness={"JOINT[0-9]": 1e4, "FINGER[1-2]": 1e3},
-            damping={"JOINT[0-9]": 1e3, "FINGER[1-2]": 1e2},
-            max_effort={"JOINT[0-9]": 1e5, "FINGER[1-2]": 1e4},
+            stiffness={"JOINT[0-9]": 1e4, GRIPPER_HAND_JOINT_PATTERN: 1e3},
+            damping={"JOINT[0-9]": 1e3, GRIPPER_HAND_JOINT_PATTERN: 1e2},
+            max_effort={"JOINT[0-9]": 1e5, GRIPPER_HAND_JOINT_PATTERN: 1e4},
             drive_type="force",
         ),
         control_parts={
             "arm": ["JOINT[0-9]"],
-            "hand": ["FINGER[1-2]"],
+            "hand": [GRIPPER_HAND_JOINT_PATTERN],
         },
         solver_cfg={
             "arm": PytorchSolverCfg(
                 end_link_name="ee_link",
                 root_link_name="base_link",
                 tcp=[
+                    [1.0, 0.0, 0.0, 0.0],
                     [0.0, 1.0, 0.0, 0.0],
-                    [-1.0, 0.0, 0.0, 0.0],
-                    [0.0, 0.0, 1.0, 0.12],
+                    [0.0, 0.0, 1.0, GRIPPER_TCP_Z],
                     [0.0, 0.0, 0.0, 1.0],
                 ],
             )
@@ -188,45 +209,6 @@ def create_robot(sim: SimulationManager, position=[0.0, 0.0, 0.0]) -> Robot:
     return sim.add_robot(cfg=cfg)
 
 
-def create_mug(sim: SimulationManager):
-    mug_cfg = RigidObjectCfg(
-        uid="table",
-        shape=MeshCfg(
-            fpath=get_data_path("CoffeeCup/cup.ply"),
-        ),
-        attrs=RigidBodyAttributesCfg(
-            mass=0.01,
-            dynamic_friction=0.97,
-            static_friction=0.99,
-        ),
-        max_convex_hull_num=16,
-        init_pos=[0.55, 0.0, 0.01],
-        init_rot=[0.0, 0.0, -90],
-        body_scale=(4, 4, 4),
-    )
-    mug = sim.add_rigid_object(cfg=mug_cfg)
-    return mug
-
-
-def create_fallen_cup(sim: SimulationManager) -> RigidObject:
-    cup_cfg = RigidObjectCfg(
-        uid="cup",
-        shape=MeshCfg(
-            fpath=get_data_path("CoffeeCup/cup.ply"),
-        ),
-        attrs=RigidBodyAttributesCfg(
-            mass=0.01,
-            dynamic_friction=0.97,
-            static_friction=0.99,
-        ),
-        max_convex_hull_num=16,
-        init_pos=[0.55, 0.0, 0.01],
-        init_rot=[90.0, 0.0, -90.0],
-        body_scale=(4, 4, 4),
-    )
-    return sim.add_rigid_object(cfg=cup_cfg)
-
-
 def create_fallen_bottle(sim: SimulationManager) -> RigidObject:
     # Use a slightly smaller and closer bottle for the UR10 gripper demo.
     bottle_scale = 0.0008
@@ -242,22 +224,56 @@ def create_fallen_bottle(sim: SimulationManager) -> RigidObject:
         ),
         max_convex_hull_num=16,
         init_pos=[-0.4294, -0.0825, -0.0997],
-        init_rot=[90.0, 0.0, 0.0],
+        init_rot=[90.0, 90.0, 0.0],
         body_scale=(bottle_scale, bottle_scale, bottle_scale),
     )
     return sim.add_rigid_object(cfg=bottle_cfg)
 
 
+def create_support_plane(sim: SimulationManager) -> RigidObject:
+    support_cfg = RigidObjectCfg(
+        uid="support_plane",
+        shape=CubeCfg(size=[1.4, 1.4, 0.02]),
+        attrs=RigidBodyAttributesCfg(
+            dynamic_friction=0.97,
+            static_friction=0.99,
+        ),
+        body_type="static",
+        init_pos=[-0.2, 0.0, -0.015],
+    )
+    return sim.add_rigid_object(cfg=support_cfg)
+
+
+def settle_object(sim: SimulationManager, obj: RigidObject, step: int = 5) -> None:
+    """Settle an object through the same explicit sequence on CPU and CUDA."""
+    if sim.device.type == "cuda":
+        sim.init_gpu_physics()
+
+    obj.reset()
+    sim.update(step=step)
+    obj.clear_dynamics()
+
+
+def freeze_object_before_grasp(obj: RigidObject) -> None:
+    """Clear residual motion before planning without changing the body type."""
+    obj.clear_dynamics()
+
+
+def release_object_after_grasp(obj: RigidObject) -> None:
+    """Clear residual motion after the gripper has closed on the object."""
+    obj.clear_dynamics()
+
+
 def build_grasp_generator_cfg(args: argparse.Namespace) -> GraspGeneratorCfg:
     return GraspGeneratorCfg(
         viser_port=11801,
         antipodal_sampler_cfg=AntipodalSamplerCfg(
             n_sample=args.n_sample,
-            max_length=0.088,
+            max_length=GRIPPER_MAX_OPEN_WIDTH,
             min_length=0.003,
         ),
         is_partial_annotate=False,
-        is_filter_ground_collision=True,
+        is_filter_ground_collision=False,
     )
 
 
@@ -265,6 +281,9 @@ def build_gripper_collision_cfg() -> GripperCollisionCfg:
     return GripperCollisionCfg(
         max_open_length=GRIPPER_MAX_OPEN_WIDTH,
         finger_length=GRIPPER_FINGER_LENGTH,
+        y_thickness=GRIPPER_Y_THICKNESS,
+        root_z_width=GRIPPER_ROOT_Z_WIDTH,
+        open_check_margin=0.002,
         point_sample_dense=0.012,
     )
 
@@ -278,170 +297,345 @@ def get_hand_open_close_qpos(
     return hand_limits[:, 0], hand_limits[:, 1]
 
 
-def get_grasp_squeeze_width(object_kind: str) -> float:
-    if object_kind == "bottle":
-        return 0.004
-    return 0.002
+def format_tensor(tensor: torch.Tensor) -> str:
+    rounded = (tensor.detach().cpu() * 10000.0).round() / 10000.0
+    return str(rounded.tolist())
 
 
-def get_max_grasp_open_length(object_kind: str) -> float | None:
-    if object_kind == "bottle":
-        return 0.060
-    return None
+def log_hand_setup(robot: Robot, hand_open: torch.Tensor, hand_close: torch.Tensor):
+    hand_joint_ids = robot.get_joint_ids(name="hand")
+    hand_joint_names = [robot.joint_names[joint_id] for joint_id in hand_joint_ids]
+    logger.log_info(f"Hand joint ids: {hand_joint_ids}")
+    logger.log_info(f"Hand joint names: {hand_joint_names}")
+    logger.log_info(f"Robot mimic ids: {robot.mimic_ids}")
+    logger.log_info(f"Robot mimic parents: {robot.mimic_parents}")
+    logger.log_info(f"Robot mimic multipliers: {robot.mimic_multipliers}")
+    logger.log_info(f"Robot mimic offsets: {robot.mimic_offsets}")
+    logger.log_info(
+        f"Hand qpos limits: {format_tensor(robot.get_qpos_limits(name='hand')[0])}"
+    )
+    logger.log_info(f"Hand open qpos: {format_tensor(hand_open)}")
+    logger.log_info(f"Hand close qpos: {format_tensor(hand_close)}")
 
 
-def get_max_grasp_axis_approach_dot(object_kind: str) -> float | None:
-    if object_kind == "bottle":
-        return 0.080
-    return None
+def log_hand_execution_state(
+    robot: Robot,
+    step_idx: int,
+    total_steps: int,
+    action_hand_target: torch.Tensor,
+) -> None:
+    sim_hand_target = robot.get_qpos(name="hand", target=True)
+    actual_hand_qpos = robot.get_qpos(name="hand")
+    tracking_error = actual_hand_qpos - sim_hand_target
+    logger.log_info(
+        "Hand state "
+        f"step={step_idx}/{total_steps - 1}, "
+        f"action_target={format_tensor(action_hand_target[0])}, "
+        f"sim_target={format_tensor(sim_hand_target[0])}, "
+        f"actual={format_tensor(actual_hand_qpos[0])}, "
+        f"actual_minus_target={format_tensor(tracking_error[0])}"
+    )
 
 
-def get_min_dynamic_hand_close_qpos(
-    object_kind: str, hand_close_qpos: torch.Tensor
-) -> torch.Tensor | None:
-    if object_kind != "bottle":
-        return None
-    return torch.full_like(hand_close_qpos, 0.025)
+def log_object_execution_state(
+    obj: RigidObject,
+    step_idx: int,
+    total_steps: int,
+) -> None:
+    obj_pose = obj.get_local_pose(to_matrix=True)
+    logger.log_info(
+        "Object state "
+        f"step={step_idx}/{total_steps - 1}, "
+        f"pos={format_tensor(obj_pose[0, :3, 3])}, "
+        f"z_axis={format_tensor(obj_pose[0, :3, 2])}"
+    )
 
 
-def create_object_semantics(
-    obj: RigidObject, label: str, args: argparse.Namespace
-) -> ObjectSemantics:
-    return ObjectSemantics(
-        label=label,
-        geometry={
-            "mesh_vertices": obj.get_vertices(env_ids=[0], scale=True)[0],
-            "mesh_triangles": obj.get_triangles(env_ids=[0])[0],
-        },
-        affordance=AntipodalAffordance(
-            object_label=label,
-            force_reannotate=args.force_reannotate,
-            custom_config={
-                "gripper_collision_cfg": build_gripper_collision_cfg(),
-                "generator_cfg": build_grasp_generator_cfg(args),
-            },
-        ),
-        entity=obj,
+def get_upright_segment_lengths(action: UprightAction) -> dict[str, int]:
+    n_close = action.cfg.hand_interp_steps
+    n_final = max(2, action.cfg.final_approach_steps)
+    n_hold = max(0, action.cfg.grasp_hold_steps)
+    n_press = max(2, action.cfg.place_press_steps)
+    n_upright_hold = max(0, action.cfg.upright_hold_steps)
+    n_place_hold = max(0, action.cfg.place_hold_steps)
+    n_open = max(2, action.cfg.release_interp_steps)
+    motion_waypoints = (
+        action.cfg.sample_interval
+        - n_close
+        - n_final
+        - n_hold
+        - n_upright_hold
+        - n_press
+        - n_place_hold
+        - n_open
+    )
+    n_pre = max(2, int(round(motion_waypoints * 0.25)))
+    n_upright = max(2, int(round(motion_waypoints * 0.60)))
+    n_retreat = (
+        action.cfg.sample_interval
+        - n_close
+        - n_final
+        - n_hold
+        - n_upright_hold
+        - n_press
+        - n_place_hold
+        - n_open
+        - n_pre
+        - n_upright
+    )
+    if n_retreat < 2:
+        retreat_deficit = 2 - n_retreat
+        n_retreat = 2
+        n_upright = max(2, n_upright - retreat_deficit)
+    return {
+        "pre": n_pre,
+        "final": n_final,
+        "close": n_close,
+        "grasp_hold": n_hold,
+        "upright": n_upright,
+        "upright_hold": n_upright_hold,
+        "press": n_press,
+        "place_hold": n_place_hold,
+        "open": n_open,
+        "retreat": n_retreat,
+    }
+
+
+def log_tcp_alignment(
+    robot: Robot,
+    traj: torch.Tensor,
+    grasp_xpos: torch.Tensor,
+    arm_dof: int,
+    index: int,
+    label: str,
+) -> None:
+    arm_qpos = traj[:, index, :arm_dof]
+    tcp_xpos = robot.compute_fk(qpos=arm_qpos, name="arm", to_matrix=True)
+    pos_error = torch.norm(tcp_xpos[:, :3, 3] - grasp_xpos[:, :3, 3], dim=1)
+    rot_delta = torch.bmm(tcp_xpos[:, :3, :3].transpose(1, 2), grasp_xpos[:, :3, :3])
+    trace = rot_delta[:, 0, 0] + rot_delta[:, 1, 1] + rot_delta[:, 2, 2]
+    rot_error = torch.acos(((trace - 1.0) * 0.5).clamp(-1.0, 1.0))
+    logger.log_info(
+        f"{label}: index={index}, "
+        f"tcp_pos={format_tensor(tcp_xpos[0, :3, 3])}, "
+        f"pos_error={format_tensor(pos_error)}, "
+        f"rot_error_rad={format_tensor(rot_error)}, "
+        f"tcp_rot={format_tensor(tcp_xpos[0, :3, :3])}, "
+        f"target_rot={format_tensor(grasp_xpos[0, :3, :3])}, "
+        f"hand_target={format_tensor(traj[0, index, arm_dof:])}"
     )
 
 
-def get_grasp_traj(sim: SimulationManager, robot: Robot, grasp_xpos: torch.Tensor):
-    n_envs = sim.num_envs
-    rest_arm_qpos = robot.get_qpos("arm")
+def log_selected_gripper_clearance(
+    semantics: ObjectSemantics,
+    obj_pose: torch.Tensor,
+    grasp_xpos: torch.Tensor,
+    grasp_open_length: torch.Tensor,
+) -> None:
+    generator = semantics.affordance.generator
+    if generator is None:
+        return
 
-    approach_xpos = grasp_xpos.clone()
-    approach_xpos[:, 2, 3] += 0.1
+    collision_checker = getattr(generator, "_collision_checker", None)
+    if collision_checker is None:
+        return
 
-    _, qpos_approach = robot.compute_ik(
-        pose=approach_xpos, joint_seed=rest_arm_qpos, name="arm"
-    )
-    _, qpos_grasp = robot.compute_ik(
-        pose=grasp_xpos, joint_seed=qpos_approach, name="arm"
+    gripper_pc_world = collision_checker._get_gripper_pc(grasp_xpos, grasp_open_length)
+    ground_height = collision_checker.get_ground_height(obj_pose[0])
+    min_z = gripper_pc_world[:, :, 2].min(dim=1).values
+    max_z = gripper_pc_world[:, :, 2].max(dim=1).values
+    is_colliding, min_distance = collision_checker.query(
+        obj_pose=obj_pose[0],
+        grasp_poses=grasp_xpos,
+        open_lengths=grasp_open_length,
+        is_filter_ground_collision=True,
+        collision_threshold=BOTTLE_GRASP_COLLISION_THRESHOLD,
     )
-    hand_open_qpos, hand_close_qpos = get_hand_open_close_qpos(robot, sim.device)
-
-    arm_trajectory = torch.cat(
-        [
-            rest_arm_qpos[:, None, :],
-            qpos_approach[:, None, :],
-            qpos_grasp[:, None, :],
-            qpos_grasp[:, None, :],
-            qpos_approach[:, None, :],
-            rest_arm_qpos[:, None, :],
-        ],
-        dim=1,
+    logger.log_info(f"Selected gripper pc min z: {format_tensor(min_z)}")
+    logger.log_info(f"Selected gripper pc max z: {format_tensor(max_z)}")
+    logger.log_info(f"Selected grasp ground height: {ground_height:.4f}")
+    logger.log_info(
+        f"Selected grasp min collision distance: {format_tensor(min_distance)}"
     )
-    hand_trajectory = torch.cat(
-        [
-            hand_open_qpos[None, None, :].repeat(n_envs, 1, 1),
-            hand_open_qpos[None, None, :].repeat(n_envs, 1, 1),
-            hand_open_qpos[None, None, :].repeat(n_envs, 1, 1),
-            hand_close_qpos[None, None, :].repeat(n_envs, 1, 1),
-            hand_close_qpos[None, None, :].repeat(n_envs, 1, 1),
-            hand_close_qpos[None, None, :].repeat(n_envs, 1, 1),
-        ],
-        dim=1,
+    logger.log_info(
+        f"Selected grasp collision threshold: {BOTTLE_GRASP_COLLISION_THRESHOLD:.4f}"
     )
-    all_trajectory = torch.cat([arm_trajectory, hand_trajectory], dim=-1)
-    interp_trajectory = interpolate_with_distance(
-        trajectory=all_trajectory, interp_num=200, device=sim.device
+    logger.log_info(
+        f"Selected grasp collision flag: {is_colliding.detach().cpu().tolist()}"
     )
-    return interp_trajectory
 
 
-def run_grasp_demo(
-    args: argparse.Namespace, sim: SimulationManager, robot: Robot
-) -> None:
-    mug = create_mug(sim)
-    grasp_cfg = build_grasp_generator_cfg(args)
-    sim.open_window()
+def log_grasp_direction_probe(semantics: ObjectSemantics) -> None:
+    generator = semantics.affordance.generator
+    if generator is None:
+        return
 
-    # Annotate part of the mug to be grasped by following the instructions in the visualization window:
-    # 1. View grasp object in browser (e.g http://localhost:11801)
-    # 2. press 'Rect Select Region', select grasp region
-    # 3. press 'Confirm Selection' to finish grasp region selection.
+    obj_pose = semantics.entity.get_local_pose(to_matrix=True)[0]
+    hit_point_pairs = getattr(generator, "_hit_point_pairs", None)
+    if hit_point_pairs is None or hit_point_pairs.numel() == 0:
+        logger.log_info("Probe grasp direction: no antipodal pairs available")
+        return
 
-    start_time = time.time()
+    origin_points = hit_point_pairs[:, 0, :]
+    hit_points = hit_point_pairs[:, 1, :]
+    origin_points_world = generator._apply_transform(origin_points, obj_pose)
+    hit_points_world = generator._apply_transform(hit_points, obj_pose)
+    centers = (origin_points_world + hit_points_world) * 0.5
+    grasp_x = torch.nn.functional.normalize(
+        hit_points_world - origin_points_world, dim=-1
+    )
+    open_lengths = torch.norm(origin_points_world - hit_points_world, dim=-1)
+
+    probe_directions = {
+        "top_down": [0.0, 0.0, -1.0],
+        "from_robot_x": [-1.0, 0.0, 0.0],
+    }
+    for label, direction in probe_directions.items():
+        approach_direction = torch.tensor(
+            direction, dtype=torch.float32, device=generator.device
+        )
+        cos_angle = torch.clamp((grasp_x * approach_direction).sum(dim=-1), -1.0, 1.0)
+        positive_angle = torch.abs(torch.acos(cos_angle))
+        angle_mask = (
+            positive_angle - torch.pi / 2
+        ).abs() <= generator.cfg.max_deviation_angle
+        width_mask = open_lengths <= GRIPPER_MAX_OPEN_WIDTH
+        candidate_mask = angle_mask & width_mask
+        logger.log_info(
+            f"Probe grasp direction {label}: "
+            f"angle_count={int(angle_mask.sum().item())}, "
+            f"width_angle_count={int(candidate_mask.sum().item())}"
+        )
+        if torch.any(candidate_mask):
+            candidate_grasp_poses = GraspGenerator._grasp_pose_from_approach_direction(
+                grasp_x[candidate_mask],
+                approach_direction,
+                centers[candidate_mask],
+            )
+            candidate_open_lengths = open_lengths[candidate_mask]
+            is_colliding, min_distance = generator._collision_checker.query(
+                obj_pose,
+                candidate_grasp_poses,
+                candidate_open_lengths,
+                is_filter_ground_collision=True,
+                collision_threshold=BOTTLE_GRASP_COLLISION_THRESHOLD,
+            )
+            collision_free_count = int((~is_colliding).sum().item())
+            logger.log_info(
+                f"Probe grasp direction {label}: "
+                f"collision_free_count={collision_free_count}, "
+                f"collision_threshold={BOTTLE_GRASP_COLLISION_THRESHOLD:.4f}, "
+                f"min_distance={format_tensor(min_distance.min().unsqueeze(0))}, "
+                f"max_distance={format_tensor(min_distance.max().unsqueeze(0))}"
+            )
+            if collision_free_count > 0:
+                grasp_xpos = candidate_grasp_poses[~is_colliding][0]
+                open_length = candidate_open_lengths[~is_colliding][0]
+                gripper_pc_world = generator._collision_checker._get_gripper_pc(
+                    grasp_xpos.unsqueeze(0),
+                    open_length.unsqueeze(0),
+                )
+                ground_height = generator._collision_checker.get_ground_height(obj_pose)
+                min_z = gripper_pc_world[:, :, 2].min(dim=1).values
+                logger.log_info(
+                    f"Probe grasp direction {label}: "
+                    f"pos={format_tensor(grasp_xpos[:3, 3])}, "
+                    f"open_length={open_length.item():.4f}, "
+                    f"min_z={format_tensor(min_z)}, "
+                    f"ground_height={ground_height:.4f}"
+                )
+
+
+def diagnose_upright_plan(
+    robot: Robot,
+    action: UprightAction,
+    semantics: ObjectSemantics,
+) -> None:
+    is_success, grasp_xpos, grasp_open_length = action._resolve_grasp_pose(semantics)
+    if not torch.all(is_success).item():
+        obj_pose = semantics.entity.get_local_pose(to_matrix=True)
+        logger.log_info(f"Object pos: {format_tensor(obj_pose[0, :3, 3])}")
+        log_grasp_direction_probe(semantics)
+        logger.log_warning("Failed to resolve grasp pose during diagnostics.")
+        return
 
-    vertices = mug.get_vertices(env_ids=[0], scale=True)[0]
-    triangles = mug.get_triangles(env_ids=[0])[0]
-    grasp_generator = GraspGenerator(
-        vertices=vertices,
-        triangles=triangles,
-        cfg=grasp_cfg,
-        gripper_collision_cfg=build_gripper_collision_cfg(),
+    hand_close_qpos = action._compute_dynamic_hand_close_qpos(grasp_open_length)
+    final_approach_qpos = action._compute_final_approach_hand_qpos(
+        grasp_open_length, hand_close_qpos
     )
+    obj_pose = semantics.entity.get_local_pose(to_matrix=True)
+    approach_direction = action.approach_direction / action.approach_direction.norm()
+    grasp_axis_dot = torch.abs((grasp_xpos[:, :3, 0] * approach_direction).sum(dim=1))
+    bottle_axis = torch.nn.functional.normalize(obj_pose[:, :3, 2], dim=1)
+    grasp_axis_bottle_dot = torch.abs((grasp_xpos[:, :3, 0] * bottle_axis).sum(dim=1))
+
+    logger.log_info(f"Object pos: {format_tensor(obj_pose[0, :3, 3])}")
+    logger.log_info(f"Grasp pos: {format_tensor(grasp_xpos[0, :3, 3])}")
+    logger.log_info(f"Grasp rotation columns: {format_tensor(grasp_xpos[0, :3, :3])}")
+    logger.log_info(f"Grasp open length: {format_tensor(grasp_open_length)}")
+    logger.log_info(f"Grasp axis approach dot: {format_tensor(grasp_axis_dot)}")
+    logger.log_info(f"Grasp axis bottle dot: {format_tensor(grasp_axis_bottle_dot)}")
+    log_selected_gripper_clearance(semantics, obj_pose, grasp_xpos, grasp_open_length)
+    logger.log_info(
+        f"Final approach hand qpos: {format_tensor(final_approach_qpos[0])}"
+    )
+    logger.log_info(f"Close hand qpos: {format_tensor(hand_close_qpos[0])}")
 
-    grasp_generator.annotate()
+    is_success, traj, joint_ids = action.execute(semantics)
+    if not is_success:
+        logger.log_warning("Failed to plan upright trajectory during diagnostics.")
+        return
 
-    approach_direction = torch.tensor(
-        [0, 0, -1], dtype=torch.float32, device=sim.device
-    )
-    obj_poses = mug.get_local_pose(to_matrix=True)
-    grasp_xpos_list = []
-
-    rest_xpos = robot.compute_fk(
-        qpos=robot.get_qpos("arm"), name="arm", to_matrix=True
-    )[0]
-    for i, obj_pose in enumerate(obj_poses):
-        is_success, grasp_pose, _ = grasp_generator.get_grasp_poses(
-            obj_pose,
-            approach_direction,
-            visualize_collision=False,
-            visualize_pose=True,
-        )
-        if is_success:
-            grasp_xpos_list.append(grasp_pose.unsqueeze(0))
-        else:
-            logger.log_warning(f"No valid grasp pose found for {i}-th object.")
-            grasp_xpos_list.append(rest_xpos.unsqueeze(0))
+    arm_dof = len(robot.get_joint_ids(name="arm"))
+    segments = get_upright_segment_lengths(action)
+    logger.log_info(f"Action joint ids: {joint_ids}")
+    logger.log_info(f"Upright trajectory segments: {segments}")
+    logger.log_info(f"Trajectory shape: {tuple(traj.shape)}")
 
-    grasp_xpos = torch.cat(grasp_xpos_list, dim=0)
-    cost_time = time.time() - start_time
-    logger.log_info(f"Get grasp pose cost time: {cost_time:.2f} seconds")
+    grasp_idx = segments["pre"] + segments["final"] - 1
+    close_end_idx = grasp_idx + segments["close"]
+    hold_end_idx = close_end_idx + segments["grasp_hold"]
+    log_tcp_alignment(robot, traj, grasp_xpos, arm_dof, grasp_idx, "grasp")
+    log_tcp_alignment(robot, traj, grasp_xpos, arm_dof, close_end_idx, "close_end")
+    log_tcp_alignment(robot, traj, grasp_xpos, arm_dof, hold_end_idx, "hold_end")
 
-    grab_traj = get_grasp_traj(sim, robot, grasp_xpos)
-    input("Press Enter to start the grab mug demo...")
-    for i in range(grab_traj.shape[1]):
-        robot.set_qpos(grab_traj[:, i, :])
-        sim.update(step=4)
-        time.sleep(1e-2)
-    input("Press Enter to exit the simulation...")
+
+def create_object_semantics(
+    obj: RigidObject, args: argparse.Namespace
+) -> ObjectSemantics:
+    return ObjectSemantics(
+        label=BOTTLE_LABEL,
+        geometry={
+            "mesh_vertices": obj.get_vertices(env_ids=[0], scale=True)[0],
+            "mesh_triangles": obj.get_triangles(env_ids=[0])[0],
+        },
+        affordance=AntipodalAffordance(
+            object_label=BOTTLE_LABEL,
+            force_reannotate=args.force_reannotate,
+            custom_config={
+                "gripper_collision_cfg": build_gripper_collision_cfg(),
+                "generator_cfg": build_grasp_generator_cfg(args),
+            },
+        ),
+        entity=obj,
+    )
 
 
 def run_upright_demo(
     args: argparse.Namespace, sim: SimulationManager, robot: Robot
 ) -> None:
-    if args.object_kind == "cup":
-        obj = create_fallen_cup(sim)
-    else:
-        obj = create_fallen_bottle(sim)
-
-    semantics = create_object_semantics(obj, args.object_kind, args)
+    create_support_plane(sim)
+    obj = create_fallen_bottle(sim)
+    settle_object(sim, obj, step=5)
+    freeze_object_before_grasp(obj)
+    semantics = create_object_semantics(obj, args)
     motion_gen = MotionGenerator(
         cfg=MotionGenCfg(planner_cfg=ToppraPlannerCfg(robot_uid=robot.uid))
     )
     hand_open, hand_close = get_hand_open_close_qpos(robot, sim.device)
+    if args.debug_hand_state:
+        log_hand_setup(robot, hand_open, hand_close)
+
     upright_action = UprightAction(
         motion_generator=motion_gen,
         cfg=UprightActionCfg(
@@ -450,12 +644,14 @@ def run_upright_demo(
             hand_open_qpos=hand_open,
             hand_close_qpos=hand_close,
             approach_direction=torch.tensor(
-                [0.0, 0.0, -1.0], dtype=torch.float32, device=sim.device
+                BOTTLE_APPROACH_DIRECTION, dtype=torch.float32, device=sim.device
             ),
             pre_grasp_distance=0.15,
             lift_height=0.15,
-            upright_axis_sign=-1.0 if args.object_kind == "bottle" else 1.0,
-            place_press_depth=0.002,
+            sample_interval=PGI_SAMPLE_INTERVAL,
+            hand_interp_steps=PGI_HAND_CLOSE_STEPS,
+            upright_axis_sign=-1.0,
+            place_press_depth=0.0,
             place_press_steps=4,
             upright_hold_steps=3,
             place_hold_steps=8,
@@ -464,49 +660,99 @@ def run_upright_demo(
             release_retreat_lift=0.01,
             final_approach_steps=12,
             final_approach_preclose_width_margin=0.010,
-            grasp_hold_steps=4,
+            grasp_hold_steps=PGI_GRASP_HOLD_STEPS,
             use_grasp_width_qpos=True,
             gripper_max_open_width=GRIPPER_MAX_OPEN_WIDTH,
-            max_grasp_open_length=get_max_grasp_open_length(args.object_kind),
-            max_grasp_axis_approach_dot=get_max_grasp_axis_approach_dot(
-                args.object_kind
-            ),
-            grasp_squeeze_width=get_grasp_squeeze_width(args.object_kind),
-            min_dynamic_hand_close_qpos=get_min_dynamic_hand_close_qpos(
-                args.object_kind, hand_close
+            max_grasp_open_length=BOTTLE_MAX_GRASP_OPEN_LENGTH,
+            max_grasp_axis_approach_dot=BOTTLE_MAX_GRASP_AXIS_APPROACH_DOT,
+            max_grasp_axis_upright_axis_dot=BOTTLE_MAX_GRASP_AXIS_UPRIGHT_AXIS_DOT,
+            grasp_squeeze_width=BOTTLE_GRASP_SQUEEZE_WIDTH,
+            min_dynamic_hand_close_qpos=torch.full_like(
+                hand_close, BOTTLE_MIN_DYNAMIC_HAND_CLOSE_QPOS
             ),
         ),
     )
 
-    sim.init_gpu_physics()
+    if args.diagnose_grasp:
+        diagnose_upright_plan(robot, upright_action, semantics)
+        return
+
     sim.open_window()
-    input(f"Inspect the fallen {args.object_kind}, then press Enter to plan upright...")
+    if not args.auto_play:
+        input("Inspect the fallen bottle, then press Enter to plan upright...")
 
     start_time = time.time()
-    is_success, traj, _ = upright_action.execute(semantics)
+    is_success, traj, joint_ids = upright_action.execute(semantics)
     cost_time = time.time() - start_time
     logger.log_info(f"Plan upright trajectory cost time: {cost_time:.2f} seconds")
     if not is_success:
         logger.log_warning("Failed to plan upright trajectory.")
         return
 
-    input("Press Enter to start the upright demo...")
+    arm_dof = len(robot.get_joint_ids(name="arm"))
+    total_steps = traj.shape[1]
+    segments = get_upright_segment_lengths(upright_action)
+    post_grasp_clear_step = segments["pre"] + segments["final"] + segments["close"]
+    should_clear_object_dynamics = True
+    if args.debug_hand_state:
+        joint_names = [robot.joint_names[joint_id] for joint_id in joint_ids]
+        hand_traj = traj[:, :, arm_dof:]
+        logger.log_info(f"Action joint ids: {joint_ids}")
+        logger.log_info(f"Action joint names: {joint_names}")
+        logger.log_info(
+            f"Post-grasp object dynamics clear step: {post_grasp_clear_step}"
+        )
+        logger.log_info(
+            f"Planned hand qpos min: {format_tensor(hand_traj.min(dim=1).values[0])}"
+        )
+        logger.log_info(
+            f"Planned hand qpos max: {format_tensor(hand_traj.max(dim=1).values[0])}"
+        )
+
+    if not args.auto_play:
+        input("Press Enter to start the upright demo...")
+    last_logged_hand_target: torch.Tensor | None = None
+    log_stride = max(1, total_steps // 10)
     for i in range(traj.shape[1]):
-        robot.set_qpos(traj[:, i, :])
+        robot.set_qpos(traj[:, i, :], joint_ids=joint_ids)
         sim.update(step=4)
+        if should_clear_object_dynamics and i + 1 >= post_grasp_clear_step:
+            release_object_after_grasp(obj)
+            should_clear_object_dynamics = False
+            if args.debug_hand_state:
+                logger.log_info(
+                    f"Object dynamics cleared at step={i}/{total_steps - 1}"
+                )
+        if args.debug_hand_state:
+            action_hand_target = traj[:, i, arm_dof:]
+            target_changed = last_logged_hand_target is None or not torch.allclose(
+                action_hand_target, last_logged_hand_target, atol=1e-4
+            )
+            should_log = target_changed or i % log_stride == 0 or i == total_steps - 1
+            if should_log:
+                log_hand_execution_state(
+                    robot,
+                    step_idx=i,
+                    total_steps=total_steps,
+                    action_hand_target=action_hand_target,
+                )
+                last_logged_hand_target = action_hand_target.detach().clone()
+            if i % log_stride == 0 or i == total_steps - 1:
+                log_object_execution_state(
+                    obj,
+                    step_idx=i,
+                    total_steps=total_steps,
+                )
         time.sleep(1e-2)
-    input("Press Enter to exit the simulation...")
+    if not args.auto_play:
+        input("Press Enter to exit the simulation...")
 
 
 def main() -> None:
     args = parse_arguments()
     sim = initialize_simulation(args)
     robot = create_robot(sim, position=[0.0, 0.0, 0.0])
-
-    if args.demo_mode == "upright":
-        run_upright_demo(args, sim, robot)
-    else:
-        run_grasp_demo(args, sim, robot)
+    run_upright_demo(args, sim, robot)
 
 
 if __name__ == "__main__":

From 0561aaecb67a28c9fffc8da583e17194014f1557 Mon Sep 17 00:00:00 2001
From: yuecideng <dengyueci@qq.com>
Date: Wed, 17 Jun 2026 14:19:10 +0800
Subject: [PATCH 04/14] wip

---
 embodichain/lab/sim/atomic_actions/actions.py | 31 +++++--------------
 1 file changed, 8 insertions(+), 23 deletions(-)

diff --git a/embodichain/lab/sim/atomic_actions/actions.py b/embodichain/lab/sim/atomic_actions/actions.py
index d774cb98..ce1e42a0 100644
--- a/embodichain/lab/sim/atomic_actions/actions.py
+++ b/embodichain/lab/sim/atomic_actions/actions.py
@@ -234,14 +234,10 @@ def _interpolate_hand_qpos(
         start_hand_qpos = start_hand_qpos.to(self.device)
         end_hand_qpos = end_hand_qpos.to(self.device)
 
-        if start_hand_qpos.dim() == 1 and end_hand_qpos.dim() == 2:
-            start_hand_qpos = start_hand_qpos.unsqueeze(0).repeat(
-                end_hand_qpos.shape[0], 1
-            )
-        if start_hand_qpos.dim() == 2 and end_hand_qpos.dim() == 1:
-            end_hand_qpos = end_hand_qpos.unsqueeze(0).repeat(
-                start_hand_qpos.shape[0], 1
-            )
+        if start_hand_qpos.dim() == 1:
+            start_hand_qpos = start_hand_qpos.unsqueeze(0)
+        if end_hand_qpos.dim() == 1:
+            end_hand_qpos = end_hand_qpos.unsqueeze(0)
 
         weights = torch.linspace(
             0,
@@ -250,21 +246,10 @@ def _interpolate_hand_qpos(
             device=self.device,
             dtype=start_hand_qpos.dtype,
         )
-        if start_hand_qpos.dim() == 1 and end_hand_qpos.dim() == 1:
-            return torch.lerp(
-                start_hand_qpos.unsqueeze(0),
-                end_hand_qpos.unsqueeze(0),
-                weights[:, None],
-            )
-        if start_hand_qpos.dim() == 2 and end_hand_qpos.dim() == 2:
-            return torch.lerp(
-                start_hand_qpos.unsqueeze(1),
-                end_hand_qpos.unsqueeze(1),
-                weights[None, :, None],
-            )
-        logger.log_error(
-            "hand qpos must have shape (hand_dof,) or (n_envs, hand_dof)",
-            ValueError,
+        return torch.lerp(
+            start_hand_qpos.unsqueeze(1),
+            end_hand_qpos.unsqueeze(1),
+            weights[None, :, None],
         )
 
     def execute(

From 74fbe439a67fe0f8bd166e75b228dde67f95da8e Mon Sep 17 00:00:00 2001
From: yuecideng <dengyueci@qq.com>
Date: Wed, 17 Jun 2026 14:24:53 +0800
Subject: [PATCH 05/14] wip

---
 docs/source/overview/sim/atomic_actions.md                 | 2 +-
 docs/source/tutorial/atomic_actions.rst                    | 4 ++--
 scripts/tutorials/{sim => atomic_action}/atomic_actions.py | 0
 3 files changed, 3 insertions(+), 3 deletions(-)
 rename scripts/tutorials/{sim => atomic_action}/atomic_actions.py (100%)

diff --git a/docs/source/overview/sim/atomic_actions.md b/docs/source/overview/sim/atomic_actions.md
index 979df571..9d2523d5 100644
--- a/docs/source/overview/sim/atomic_actions.md
+++ b/docs/source/overview/sim/atomic_actions.md
@@ -238,4 +238,4 @@ is_success, traj = engine.execute_static(target_list=[target_pose])
 
 - {doc}`planners/motion_generator` — the trajectory planner used by every action
 - {doc}`sim_robot` — how control parts and IK solvers are configured
-- Tutorial: `scripts/tutorials/sim/atomic_actions.py`
+- Tutorial: `scripts/tutorials/atomic_action/atomic_actions.py`
diff --git a/docs/source/tutorial/atomic_actions.rst b/docs/source/tutorial/atomic_actions.rst
index 10b8e97c..411f374f 100644
--- a/docs/source/tutorial/atomic_actions.rst
+++ b/docs/source/tutorial/atomic_actions.rst
@@ -30,13 +30,13 @@ For the full design overview, architecture diagram, and extension guide see
 The Code
 --------
 
-The tutorial corresponds to the ``atomic_actions.py`` script in the ``scripts/tutorials/sim``
+The tutorial corresponds to the ``atomic_actions.py`` script in the ``scripts/tutorials/atomic_action``
 directory.
 
 .. dropdown:: Code for atomic_actions.py
     :icon: code
 
-    .. literalinclude:: ../../../scripts/tutorials/sim/atomic_actions.py
+    .. literalinclude:: ../../../scripts/tutorials/atomic_action/atomic_actions.py
         :language: python
         :linenos:
 
diff --git a/scripts/tutorials/sim/atomic_actions.py b/scripts/tutorials/atomic_action/atomic_actions.py
similarity index 100%
rename from scripts/tutorials/sim/atomic_actions.py
rename to scripts/tutorials/atomic_action/atomic_actions.py

From 9da06f968a7b6b77f9b1b2da7df94c5c3673a7eb Mon Sep 17 00:00:00 2001
From: yuecideng <dengyueci@qq.com>
Date: Wed, 17 Jun 2026 15:14:56 +0800
Subject: [PATCH 06/14] wip

---
 .../atomic_action/upright_atomic_action.py    | 27 +++----------------
 1 file changed, 4 insertions(+), 23 deletions(-)

diff --git a/scripts/tutorials/atomic_action/upright_atomic_action.py b/scripts/tutorials/atomic_action/upright_atomic_action.py
index ae1f5525..661dd5dc 100644
--- a/scripts/tutorials/atomic_action/upright_atomic_action.py
+++ b/scripts/tutorials/atomic_action/upright_atomic_action.py
@@ -224,26 +224,12 @@ def create_fallen_bottle(sim: SimulationManager) -> RigidObject:
         ),
         max_convex_hull_num=16,
         init_pos=[-0.4294, -0.0825, -0.0997],
-        init_rot=[90.0, 90.0, 0.0],
+        init_rot=[90.0, 135.0, 0.0],
         body_scale=(bottle_scale, bottle_scale, bottle_scale),
     )
     return sim.add_rigid_object(cfg=bottle_cfg)
 
 
-def create_support_plane(sim: SimulationManager) -> RigidObject:
-    support_cfg = RigidObjectCfg(
-        uid="support_plane",
-        shape=CubeCfg(size=[1.4, 1.4, 0.02]),
-        attrs=RigidBodyAttributesCfg(
-            dynamic_friction=0.97,
-            static_friction=0.99,
-        ),
-        body_type="static",
-        init_pos=[-0.2, 0.0, -0.015],
-    )
-    return sim.add_rigid_object(cfg=support_cfg)
-
-
 def settle_object(sim: SimulationManager, obj: RigidObject, step: int = 5) -> None:
     """Settle an object through the same explicit sequence on CPU and CUDA."""
     if sim.device.type == "cuda":
@@ -254,11 +240,6 @@ def settle_object(sim: SimulationManager, obj: RigidObject, step: int = 5) -> No
     obj.clear_dynamics()
 
 
-def freeze_object_before_grasp(obj: RigidObject) -> None:
-    """Clear residual motion before planning without changing the body type."""
-    obj.clear_dynamics()
-
-
 def release_object_after_grasp(obj: RigidObject) -> None:
     """Clear residual motion after the gripper has closed on the object."""
     obj.clear_dynamics()
@@ -624,10 +605,11 @@ def create_object_semantics(
 def run_upright_demo(
     args: argparse.Namespace, sim: SimulationManager, robot: Robot
 ) -> None:
-    create_support_plane(sim)
+
+    sim.open_window()
+
     obj = create_fallen_bottle(sim)
     settle_object(sim, obj, step=5)
-    freeze_object_before_grasp(obj)
     semantics = create_object_semantics(obj, args)
     motion_gen = MotionGenerator(
         cfg=MotionGenCfg(planner_cfg=ToppraPlannerCfg(robot_uid=robot.uid))
@@ -677,7 +659,6 @@ def run_upright_demo(
         diagnose_upright_plan(robot, upright_action, semantics)
         return
 
-    sim.open_window()
     if not args.auto_play:
         input("Inspect the fallen bottle, then press Enter to plan upright...")
 

From 2aaac19d80c36e532f75859990a4babf18bf2afe Mon Sep 17 00:00:00 2001
From: yuecideng <dengyueci@qq.com>
Date: Wed, 17 Jun 2026 15:25:14 +0800
Subject: [PATCH 07/14] wip

---
 scripts/tutorials/atomic_action/upright_atomic_action.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/tutorials/atomic_action/upright_atomic_action.py b/scripts/tutorials/atomic_action/upright_atomic_action.py
index 661dd5dc..8a5f5d42 100644
--- a/scripts/tutorials/atomic_action/upright_atomic_action.py
+++ b/scripts/tutorials/atomic_action/upright_atomic_action.py
@@ -70,7 +70,7 @@
 GRIPPER_FINGER_LENGTH = 0.088
 GRIPPER_ROOT_Z_WIDTH = 0.096
 GRIPPER_Y_THICKNESS = 0.040
-GRIPPER_TCP_Z = 0.121
+GRIPPER_TCP_Z = 0.15
 PGI_SAMPLE_INTERVAL = 120
 PGI_HAND_CLOSE_STEPS = 12
 PGI_GRASP_HOLD_STEPS = 20

From ddd1236e4efb5ac4b3d28e232ea3858749dd4db6 Mon Sep 17 00:00:00 2001
From: skywhite1024 <129768272+skywhite1024@users.noreply.github.com>
Date: Thu, 18 Jun 2026 16:54:18 +0800
Subject: [PATCH 08/14] Add move object atomic action

---
 .../lab/sim/atomic_actions/__init__.py        |   8 +
 embodichain/lab/sim/atomic_actions/actions.py | 227 ++++++++-
 embodichain/lab/sim/atomic_actions/core.py    |  22 +
 embodichain/lab/sim/atomic_actions/engine.py  |  57 ++-
 .../move_object_atomic_actions.py             | 467 ++++++++++++++++++
 5 files changed, 765 insertions(+), 16 deletions(-)
 create mode 100644 scripts/tutorials/atomic_action/move_object_atomic_actions.py

diff --git a/embodichain/lab/sim/atomic_actions/__init__.py b/embodichain/lab/sim/atomic_actions/__init__.py
index 9bc7a741..7fbc506f 100644
--- a/embodichain/lab/sim/atomic_actions/__init__.py
+++ b/embodichain/lab/sim/atomic_actions/__init__.py
@@ -26,15 +26,19 @@
     AntipodalAffordance,
     InteractionPoints,
     ObjectSemantics,
+    HeldObjectState,
+    MoveObjectTarget,
     ActionCfg,
     AtomicAction,
 )
 from .actions import (
     MoveAction,
+    MoveObjectAction,
     PickUpAction,
     PlaceAction,
     UprightAction,
     MoveActionCfg,
+    MoveObjectActionCfg,
     PickUpActionCfg,
     PlaceActionCfg,
     UprightActionCfg,
@@ -51,14 +55,18 @@
     "Affordance",
     "InteractionPoints",
     "ObjectSemantics",
+    "HeldObjectState",
+    "MoveObjectTarget",
     "ActionCfg",
     "AtomicAction",
     # Action implementations
     "MoveAction",
+    "MoveObjectAction",
     "PickUpAction",
     "PlaceAction",
     "UprightAction",
     "MoveActionCfg",
+    "MoveObjectActionCfg",
     "PickUpActionCfg",
     "PlaceActionCfg",
     "UprightActionCfg",
diff --git a/embodichain/lab/sim/atomic_actions/actions.py b/embodichain/lab/sim/atomic_actions/actions.py
index ce1e42a0..3a4696c6 100644
--- a/embodichain/lab/sim/atomic_actions/actions.py
+++ b/embodichain/lab/sim/atomic_actions/actions.py
@@ -22,7 +22,14 @@
 from embodichain.lab.sim.planners import PlanResult, PlanState, MoveType
 from embodichain.lab.sim.planners.motion_generator import MotionGenOptions
 from embodichain.lab.sim.planners.toppra_planner import ToppraPlanOptions
-from .core import AtomicAction, ObjectSemantics, AntipodalAffordance, ActionCfg
+from .core import (
+    AtomicAction,
+    ObjectSemantics,
+    AntipodalAffordance,
+    ActionCfg,
+    HeldObjectState,
+    MoveObjectTarget,
+)
 from embodichain.utils import logger
 from embodichain.utils import configclass
 from embodichain.lab.sim.utility.action_utils import interpolate_with_distance
@@ -252,6 +259,15 @@ def _interpolate_hand_qpos(
             weights[None, :, None],
         )
 
+    @staticmethod
+    def _invert_pose(pose: torch.Tensor) -> torch.Tensor:
+        """Invert a batched homogeneous transform."""
+        inv_pose = pose.clone()
+        rot_t = pose[:, :3, :3].transpose(1, 2)
+        inv_pose[:, :3, :3] = rot_t
+        inv_pose[:, :3, 3] = -torch.bmm(rot_t, pose[:, :3, 3:4]).squeeze(-1)
+        return inv_pose
+
     def execute(
         self,
         target: Union[ObjectSemantics, torch.Tensor],
@@ -325,7 +341,8 @@ def __init__(
         super().__init__(
             motion_generator, cfg=cfg if cfg is not None else PickUpActionCfg()
         )
-        self.cfg = cfg
+        self.cfg = cfg if cfg is not None else self.cfg
+        self._held_object_state: HeldObjectState | None = None
         self.approach_direction = self.cfg.approach_direction.to(self.device)
         if self.cfg.hand_open_qpos is None:
             logger.log_error("hand_open_qpos must be specified in PickUpActionCfg")
@@ -379,8 +396,10 @@ def execute(
         """
 
         # Resolve grasp pose
-        if isinstance(target, ObjectSemantics):
-            is_success, grasp_xpos = self._resolve_grasp_pose(target)
+        self._held_object_state = None
+        target_semantics = target if isinstance(target, ObjectSemantics) else None
+        if target_semantics is not None:
+            is_success, grasp_xpos = self._resolve_grasp_pose(target_semantics)
         else:
             is_success, grasp_xpos = self._resolve_pose_target(
                 target, action_name=self.__class__.__name__
@@ -392,6 +411,15 @@ def execute(
             logger.log_warning("Failed to resolve grasp pose for all environments.")
             return False, torch.empty(0), self.joint_ids
 
+        if target_semantics is not None:
+            obj_poses = target_semantics.entity.get_local_pose(to_matrix=True)
+            object_to_eef = torch.bmm(self._invert_pose(obj_poses), grasp_xpos)
+            self._held_object_state = HeldObjectState(
+                semantics=target_semantics,
+                object_to_eef=object_to_eef,
+                grasp_xpos=grasp_xpos,
+            )
+
         # Compute pre-grasp pose
         # TODO: only for parallel gripper, approach in negative grasp z direction
         grasp_z = grasp_xpos[:, :3, 2]
@@ -548,6 +576,10 @@ def validate(self, target, start_qpos=None, **kwargs):
         # TODO: implement proper validation logic for pick up action
         return True
 
+    def get_held_object_state(self) -> HeldObjectState | None:
+        """Return the held-object state produced by the latest successful pickup."""
+        return self._held_object_state
+
 
 @configclass
 class UprightActionCfg(PickUpActionCfg):
@@ -1275,6 +1307,163 @@ def execute(
         return True, trajectory, self.joint_ids
 
 
+@configclass
+class MoveObjectActionCfg(MoveActionCfg):
+    name: str = "move_object"
+    """Name of the action, used for identification and logging."""
+
+    hand_close_qpos: torch.Tensor | None = None
+    """[hand_dof,] or [n_envs, hand_dof] joint positions for keeping the hand closed."""
+
+    hand_control_part: str = "hand"
+    """Name of the robot part that controls the hand joints."""
+
+
+class MoveObjectAction(MoveAction):
+    def __init__(
+        self,
+        motion_generator: MotionGenerator,
+        cfg: MoveObjectActionCfg | None = None,
+    ):
+        """
+        Initialize the atomic action.
+        Args:
+            motion_generator: The motion generator instance to use for planning.
+            cfg: Configuration for the action.
+        """
+        super().__init__(
+            motion_generator, cfg=cfg if cfg is not None else MoveObjectActionCfg()
+        )
+        self.cfg = cfg if cfg is not None else self.cfg
+        self._held_object_state: HeldObjectState | None = None
+        if self.cfg.hand_close_qpos is None:
+            logger.log_error("hand_close_qpos must be specified in MoveObjectActionCfg")
+        self.hand_close_qpos = self.cfg.hand_close_qpos.to(self.device)
+
+        self.hand_joint_ids = self.robot.get_joint_ids(name=self.cfg.hand_control_part)
+        self.joint_ids = self.arm_joint_ids + self.hand_joint_ids
+        self.arm_dof = len(self.arm_joint_ids)
+        self.hand_dof = len(self.hand_joint_ids)
+        self.dof = len(self.joint_ids)
+
+    def _resolve_move_object_target(
+        self,
+        target: MoveObjectTarget,
+        action_context: dict | None = None,
+        held_object_state: HeldObjectState | None = None,
+    ) -> tuple[bool, torch.Tensor, HeldObjectState]:
+        """Resolve an object target pose into an end-effector target pose."""
+        if not isinstance(target, MoveObjectTarget):
+            logger.log_error(
+                "MoveObjectAction target must be a MoveObjectTarget.",
+                TypeError,
+            )
+
+        held_state = held_object_state
+        if held_state is None and action_context is not None:
+            held_state = action_context.get("held_object_state")
+        if held_state is None:
+            logger.log_error(
+                "MoveObjectTarget requires a HeldObjectState from a prior PickUpAction.",
+                ValueError,
+            )
+
+        object_target_pose = target.object_target_pose.to(
+            device=self.device, dtype=torch.float32
+        )
+        if object_target_pose.shape == (4, 4):
+            object_target_pose = object_target_pose.unsqueeze(0).repeat(
+                self.n_envs, 1, 1
+            )
+        if object_target_pose.shape != (self.n_envs, 4, 4):
+            logger.log_error(
+                f"object_target_pose must have shape (4, 4) or "
+                f"({self.n_envs}, 4, 4), but got {object_target_pose.shape}",
+                ValueError,
+            )
+
+        object_to_eef = held_state.object_to_eef.to(
+            device=self.device, dtype=torch.float32
+        )
+        if object_to_eef.shape == (4, 4):
+            object_to_eef = object_to_eef.unsqueeze(0).repeat(self.n_envs, 1, 1)
+        if object_to_eef.shape != (self.n_envs, 4, 4):
+            logger.log_error(
+                f"object_to_eef must have shape (4, 4) or "
+                f"({self.n_envs}, 4, 4), but got {object_to_eef.shape}",
+                ValueError,
+            )
+
+        move_object_xpos = torch.bmm(object_target_pose, object_to_eef)
+        return True, move_object_xpos, held_state
+
+    def _repeat_hand_close_qpos(self, n_waypoints: int) -> torch.Tensor:
+        """Repeat the closed-hand target across trajectory waypoints."""
+        hand_qpos = self.hand_close_qpos.to(device=self.device, dtype=torch.float32)
+        if hand_qpos.shape == (self.hand_dof,):
+            hand_qpos = hand_qpos.unsqueeze(0).repeat(self.n_envs, 1)
+        if hand_qpos.shape != (self.n_envs, self.hand_dof):
+            logger.log_error(
+                f"hand_close_qpos must have shape ({self.hand_dof},) or "
+                f"({self.n_envs}, {self.hand_dof}), but got {hand_qpos.shape}",
+                ValueError,
+            )
+        return hand_qpos.unsqueeze(1).repeat(1, n_waypoints, 1)
+
+    def execute(
+        self,
+        target: MoveObjectTarget,
+        start_qpos: Optional[torch.Tensor] = None,
+        **kwargs,
+    ) -> tuple[bool, torch.Tensor, list[float]]:
+        """Move the held object to a target object pose and keep grasping it."""
+        is_success, move_object_xpos, held_state = self._resolve_move_object_target(
+            target,
+            action_context=kwargs.get("action_context"),
+            held_object_state=kwargs.get("held_object_state"),
+        )
+        start_qpos = self._resolve_start_qpos(start_qpos, self.arm_dof)
+        self._held_object_state = held_state
+
+        if not is_success:
+            logger.log_warning("Failed to resolve move_object target pose.")
+            return False, torch.empty(0), self.joint_ids
+
+        target_states_list = [
+            [
+                PlanState(xpos=move_object_xpos[i], move_type=MoveType.EEF_MOVE),
+            ]
+            for i in range(self.n_envs)
+        ]
+        trajectory = torch.zeros(
+            size=(self.n_envs, self.cfg.sample_interval, self.dof),
+            dtype=torch.float32,
+            device=self.device,
+        )
+        is_success, plan_traj = self._plan_arm_trajectory(
+            target_states_list,
+            start_qpos,
+            self.cfg.sample_interval,
+            self.arm_dof,
+        )
+        if not is_success:
+            logger.log_warning("Failed to plan move_object trajectory.")
+            return False, trajectory, self.joint_ids
+        trajectory[:, :, : self.arm_dof] = plan_traj
+        trajectory[:, :, self.arm_dof :] = self._repeat_hand_close_qpos(
+            self.cfg.sample_interval
+        )
+        return True, trajectory, self.joint_ids
+
+    def get_held_object_state(self) -> HeldObjectState | None:
+        """Return the held-object state after moving the object."""
+        return self._held_object_state
+
+    def validate(self, target, start_qpos=None, **kwargs):
+        # TODO: implement proper validation logic for move object action
+        return True
+
+
 @configclass
 class PlaceActionCfg(GraspActionCfg):
     name: str = "place"
@@ -1296,7 +1485,8 @@ def __init__(
         super().__init__(
             motion_generator, cfg=cfg if cfg is not None else PlaceActionCfg()
         )
-        self.cfg = cfg
+        self.cfg = cfg if cfg is not None else self.cfg
+        self._held_object_state: HeldObjectState | None = None
         if self.cfg.hand_open_qpos is None:
             logger.log_error("hand_open_qpos must be specified in PlaceActionCfg")
         if self.cfg.hand_close_qpos is None:
@@ -1311,7 +1501,7 @@ def __init__(
 
     def execute(
         self,
-        target: Union[ObjectSemantics, torch.Tensor],
+        target: Union[ObjectSemantics, torch.Tensor, None],
         start_qpos: Optional[torch.Tensor] = None,
         **kwargs,
     ) -> tuple[bool, torch.Tensor, list[float]]:
@@ -1327,10 +1517,29 @@ def execute(
             trajectory of shape (n_envs, n_waypoints, dof),
             joint_ids corresponding to trajectory
         """
+        self._held_object_state = None
+        start_qpos = self._resolve_start_qpos(start_qpos, self.arm_dof)
+
+        if target is None:
+            n_open_waypoint = max(2, self.cfg.hand_interp_steps)
+            hand_open_trajectory = torch.zeros(
+                size=(self.n_envs, n_open_waypoint, self.dof),
+                dtype=torch.float32,
+                device=self.device,
+            )
+            hand_open_trajectory[:, :, : self.arm_dof] = start_qpos.unsqueeze(1).repeat(
+                1, n_open_waypoint, 1
+            )
+            hand_open_trajectory[:, :, self.arm_dof :] = self._interpolate_hand_qpos(
+                self.hand_close_qpos,
+                self.hand_open_qpos,
+                n_open_waypoint,
+            )
+            return True, hand_open_trajectory, self.joint_ids
+
         is_success, place_xpos = self._resolve_pose_target(
             target, action_name=self.__class__.__name__
         )
-        start_qpos = self._resolve_start_qpos(start_qpos, self.arm_dof)
 
         # TODO: warning and fallback if no valid grasp pose found
         if not is_success:
@@ -1425,3 +1634,7 @@ def execute(
     def validate(self, target, start_qpos=None, **kwargs):
         # TODO: implement proper validation logic for pick up action
         return True
+
+    def get_held_object_state(self) -> HeldObjectState | None:
+        """Return None after place releases the held object."""
+        return self._held_object_state
diff --git a/embodichain/lab/sim/atomic_actions/core.py b/embodichain/lab/sim/atomic_actions/core.py
index a30698cb..4c3c1e48 100644
--- a/embodichain/lab/sim/atomic_actions/core.py
+++ b/embodichain/lab/sim/atomic_actions/core.py
@@ -320,6 +320,28 @@ def __post_init__(self) -> None:
         self.affordance.geometry = self.geometry
 
 
+@dataclass
+class HeldObjectState:
+    """State shared by actions while an object is held by the robot."""
+
+    semantics: ObjectSemantics
+    """Semantic object currently held by the gripper."""
+
+    object_to_eef: torch.Tensor
+    """Batched transform from object frame to end-effector frame, shape [B, 4, 4]."""
+
+    grasp_xpos: torch.Tensor
+    """Batched end-effector grasp pose selected during pickup, shape [B, 4, 4]."""
+
+
+@dataclass
+class MoveObjectTarget:
+    """Object-centric target for moving a held object without releasing it."""
+
+    object_target_pose: torch.Tensor
+    """Target object pose, shape [4, 4] or [B, 4, 4]."""
+
+
 # =============================================================================
 # ActionCfg and AtomicAction
 # =============================================================================
diff --git a/embodichain/lab/sim/atomic_actions/engine.py b/embodichain/lab/sim/atomic_actions/engine.py
index 15b868a8..44fe789a 100644
--- a/embodichain/lab/sim/atomic_actions/engine.py
+++ b/embodichain/lab/sim/atomic_actions/engine.py
@@ -21,7 +21,7 @@
 
 from embodichain.lab.sim.planners import PlanResult
 from embodichain.utils import logger
-from .core import AtomicAction, ObjectSemantics, ActionCfg
+from .core import AtomicAction, ObjectSemantics, ActionCfg, MoveObjectTarget
 
 if TYPE_CHECKING:
     from embodichain.lab.sim.planners import MotionGenerator
@@ -178,6 +178,7 @@ def __init__(
 
         # Semantic analyzer for object understanding
         self._semantic_analyzer = SemanticAnalyzer()
+        self._action_context: Dict[str, Any] = {}
 
         # Initialize default actions
         self._actions: Dict[str, AtomicAction] = self._init_actions(actions_cfg_list)
@@ -186,11 +187,12 @@ def _init_actions(
         self, actions_cfg_list: Optional[List[ActionCfg]] = None
     ) -> Dict[str, "AtomicAction"]:
         actions: Dict[str, AtomicAction] = {}
-        from .actions import MoveAction, PickUpAction, PlaceAction
+        from .actions import MoveAction, MoveObjectAction, PickUpAction, PlaceAction
 
         builtin_action_map: Dict[str, Type[AtomicAction]] = {
             "move": MoveAction,
             "pick_up": PickUpAction,
+            "move_object": MoveObjectAction,
             "place": PlaceAction,
         }
         if actions_cfg_list is not None:
@@ -207,7 +209,16 @@ def _init_actions(
 
     def execute_static(
         self,
-        target_list: List[Union[torch.Tensor, str, ObjectSemantics, Dict[str, Any]]],
+        target_list: List[
+            Union[
+                torch.Tensor,
+                str,
+                ObjectSemantics,
+                MoveObjectTarget,
+                Dict[str, Any],
+                None,
+            ]
+        ],
     ) -> tuple[bool, torch.Tensor]:
         """Execute a sequence of actions to target poses.
 
@@ -233,10 +244,22 @@ def execute_static(
             arm_joint_ids = self.motion_generator.robot.get_joint_ids(name=control_part)
             start_qpos_part = start_qpos[:, arm_joint_ids]
             is_success, traj, joint_ids = atom_action.execute(
-                target=target, start_qpos=start_qpos_part
+                target=target,
+                start_qpos=start_qpos_part,
+                action_context=self._action_context,
+                held_object_state=self._action_context.get("held_object_state"),
             )
             if not is_success:
                 return False, all_trajectory
+
+            held_state_getter = getattr(atom_action, "get_held_object_state", None)
+            if callable(held_state_getter):
+                held_state = held_state_getter()
+                if held_state is None:
+                    self._action_context.pop("held_object_state", None)
+                else:
+                    self._action_context["held_object_state"] = held_state
+
             n_waypoints = traj.shape[1]
 
             traj_full = torch.zeros(
@@ -254,7 +277,9 @@ def execute_static(
     def validate(
         self,
         action_name: str,
-        target: Union[torch.Tensor, str, ObjectSemantics, Dict[str, Any]],
+        target: Union[
+            torch.Tensor, str, ObjectSemantics, MoveObjectTarget, Dict[str, Any], None
+        ],
         **kwargs,
     ) -> bool:
         """Validate if a named action is feasible without executing."""
@@ -268,22 +293,35 @@ def validate(
 
     def _resolve_target(
         self,
-        target: Union[torch.Tensor, str, ObjectSemantics, Dict[str, Any]],
-    ) -> Union[torch.Tensor, ObjectSemantics]:
+        target: Union[
+            torch.Tensor, str, ObjectSemantics, MoveObjectTarget, Dict[str, Any], None
+        ],
+    ) -> Union[torch.Tensor, ObjectSemantics, MoveObjectTarget, None]:
         """Resolve user target input into tensor pose or ObjectSemantics.
 
         Supports the convenience dict format in ``execute`` and ``validate``.
         """
+        if target is None:
+            return None
+
         if isinstance(target, torch.Tensor):
             return target
 
-        if isinstance(target, ObjectSemantics):
+        if isinstance(target, (ObjectSemantics, MoveObjectTarget)):
             return target
 
         if isinstance(target, str):
             return self._semantic_analyzer.analyze(target)
 
         if isinstance(target, dict):
+            if "object_target_pose" in target:
+                object_target_pose = target["object_target_pose"]
+                if not isinstance(object_target_pose, torch.Tensor):
+                    raise TypeError(
+                        "target['object_target_pose'] must be a torch.Tensor"
+                    )
+                return MoveObjectTarget(object_target_pose=object_target_pose)
+
             if "pose" in target:
                 pose = target["pose"]
                 if not isinstance(pose, torch.Tensor):
@@ -328,7 +366,8 @@ def _resolve_target(
             return semantics
 
         raise TypeError(
-            "target must be torch.Tensor, str, ObjectSemantics, or Dict[str, Any]"
+            "target must be torch.Tensor, str, ObjectSemantics, MoveObjectTarget, "
+            "Dict[str, Any], or None"
         )
 
     def get_semantic_analyzer(self) -> SemanticAnalyzer:
diff --git a/scripts/tutorials/atomic_action/move_object_atomic_actions.py b/scripts/tutorials/atomic_action/move_object_atomic_actions.py
new file mode 100644
index 00000000..34ae37ca
--- /dev/null
+++ b/scripts/tutorials/atomic_action/move_object_atomic_actions.py
@@ -0,0 +1,467 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+"""Demonstrate moving a held object to an object-centric target pose."""
+
+from __future__ import annotations
+
+import argparse
+import sys
+import time
+from pathlib import Path
+
+_REPO_ROOT = Path(__file__).resolve().parents[3]
+if str(_REPO_ROOT) not in sys.path:
+    sys.path.insert(0, str(_REPO_ROOT))
+
+import torch
+
+from embodichain.data import get_data_path
+from embodichain.lab.gym.utils.gym_utils import add_env_launcher_args_to_parser
+from embodichain.lab.sim import SimulationManager, SimulationManagerCfg
+from embodichain.lab.sim.atomic_actions import (
+    AntipodalAffordance,
+    AtomicActionEngine,
+    MoveActionCfg,
+    MoveObjectActionCfg,
+    MoveObjectTarget,
+    ObjectSemantics,
+    PickUpActionCfg,
+    PlaceActionCfg,
+)
+from embodichain.lab.sim.cfg import (
+    JointDrivePropertiesCfg,
+    LightCfg,
+    RenderCfg,
+    RigidBodyAttributesCfg,
+    RigidObjectCfg,
+    RobotCfg,
+    URDFCfg,
+)
+from embodichain.lab.sim.objects import RigidObject, Robot
+from embodichain.lab.sim.planners import MotionGenerator, MotionGenCfg, ToppraPlannerCfg
+from embodichain.lab.sim.shapes import CubeCfg, MeshCfg
+from embodichain.lab.sim.solvers import PytorchSolverCfg
+from embodichain.toolkits.graspkit.pg_grasp.antipodal_generator import (
+    AntipodalSamplerCfg,
+    GraspGeneratorCfg,
+)
+from embodichain.toolkits.graspkit.pg_grasp.gripper_collision_checker import (
+    GripperCollisionCfg,
+)
+from embodichain.utils import logger
+
+GRIPPER_URDF_PATH = "DH_PGI_140_80/DH_PGI_140_80.urdf"
+GRIPPER_HAND_JOINT_PATTERN = "GRIPPER_FINGER1_JOINT_1"
+GRIPPER_MAX_OPEN_WIDTH = 0.080
+GRIPPER_FINGER_LENGTH = 0.088
+GRIPPER_ROOT_Z_WIDTH = 0.096
+GRIPPER_Y_THICKNESS = 0.040
+GRIPPER_TCP_Z = 0.15
+
+BOTTLE_LABEL = "bottle"
+BOTTLE_APPROACH_DIRECTION = (0.0, 0.0, -1.0)
+BOTTLE_MIN_HAND_CLOSE_QPOS = 0.024
+
+MOVE_SAMPLE_INTERVAL = 60
+PICK_SAMPLE_INTERVAL = 120
+MOVE_OBJECT_SAMPLE_INTERVAL = 120
+PLACE_SAMPLE_INTERVAL = 36
+HAND_INTERP_STEPS = 12
+POST_TRAJECTORY_STEPS = 240
+TABLE_SIZE = [1.0, 1.4, 0.05]
+TABLE_TOP_Z = -0.045
+
+
+def parse_arguments() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description="Demonstrate MoveObjectAction with optional release."
+    )
+    add_env_launcher_args_to_parser(parser)
+    parser.add_argument(
+        "--mode",
+        choices=("hold", "release"),
+        default="hold",
+        help="hold keeps the bottle in the gripper; release opens the gripper.",
+    )
+    parser.add_argument(
+        "--n_sample",
+        type=int,
+        default=10000,
+        help="Number of samples for antipodal grasp generation.",
+    )
+    parser.add_argument(
+        "--force_reannotate",
+        action="store_true",
+        help="Force grasp region re-annotation instead of using cached data.",
+    )
+    parser.add_argument(
+        "--auto_play",
+        action="store_true",
+        help="Run the viewer demo without waiting for keyboard input.",
+    )
+    parser.add_argument(
+        "--debug_state",
+        action="store_true",
+        help="Log bottle pose during replay.",
+    )
+    return parser.parse_args()
+
+
+def initialize_simulation(args: argparse.Namespace) -> SimulationManager:
+    cfg = SimulationManagerCfg(
+        headless=True,
+        sim_device=args.device,
+        render_cfg=RenderCfg(renderer=args.renderer),
+        physics_dt=1.0 / 100.0,
+        arena_space=2.5,
+    )
+    sim = SimulationManager(cfg)
+    sim.add_light(
+        cfg=LightCfg(
+            uid="main_light",
+            color=(0.6, 0.6, 0.6),
+            intensity=30.0,
+            init_pos=(1.0, 0.0, 3.0),
+        )
+    )
+    return sim
+
+
+def create_robot(sim: SimulationManager, position=(0.0, 0.0, 0.0)) -> Robot:
+    ur5_urdf_path = get_data_path("UniversalRobots/UR5/UR5.urdf")
+    gripper_urdf_path = get_data_path(GRIPPER_URDF_PATH)
+
+    cfg = RobotCfg(
+        uid="UR5",
+        urdf_cfg=URDFCfg(
+            components=[
+                {"component_type": "arm", "urdf_path": ur5_urdf_path},
+                {"component_type": "hand", "urdf_path": gripper_urdf_path},
+            ]
+        ),
+        drive_pros=JointDrivePropertiesCfg(
+            stiffness={"JOINT[0-9]": 1e4, GRIPPER_HAND_JOINT_PATTERN: 1e3},
+            damping={"JOINT[0-9]": 1e3, GRIPPER_HAND_JOINT_PATTERN: 1e2},
+            max_effort={"JOINT[0-9]": 1e5, GRIPPER_HAND_JOINT_PATTERN: 1e4},
+            drive_type="force",
+        ),
+        control_parts={
+            "arm": ["JOINT[0-9]"],
+            "hand": [GRIPPER_HAND_JOINT_PATTERN],
+        },
+        solver_cfg={
+            "arm": PytorchSolverCfg(
+                end_link_name="ee_link",
+                root_link_name="base_link",
+                tcp=[
+                    [1.0, 0.0, 0.0, 0.0],
+                    [0.0, 1.0, 0.0, 0.0],
+                    [0.0, 0.0, 1.0, GRIPPER_TCP_Z],
+                    [0.0, 0.0, 0.0, 1.0],
+                ],
+            )
+        },
+        init_qpos=[0.0, -1.57, 1.57, -1.57, -1.57, 0.0, 0.0, 0.0],
+        init_pos=position,
+    )
+    return sim.add_robot(cfg=cfg)
+
+
+def create_table(sim: SimulationManager) -> RigidObject:
+    cfg = RigidObjectCfg(
+        uid="table",
+        shape=CubeCfg(size=TABLE_SIZE),
+        body_type="static",
+        attrs=RigidBodyAttributesCfg(
+            dynamic_friction=0.8,
+            static_friction=0.9,
+        ),
+        init_pos=[-0.30, 0.10, TABLE_TOP_Z - 0.5 * TABLE_SIZE[2]],
+    )
+    return sim.add_rigid_object(cfg=cfg)
+
+
+def create_fallen_bottle(sim: SimulationManager) -> RigidObject:
+    bottle_scale = 0.0008
+    cfg = RigidObjectCfg(
+        uid="bottle",
+        shape=MeshCfg(fpath=get_data_path("ScannedBottle/yibao.ply")),
+        attrs=RigidBodyAttributesCfg(
+            mass=0.02,
+            dynamic_friction=0.97,
+            static_friction=0.99,
+        ),
+        max_convex_hull_num=16,
+        init_pos=[-0.4294, -0.0825, -0.0997],
+        init_rot=[90.0, 45.0, 0.0],
+        body_scale=(bottle_scale, bottle_scale, bottle_scale),
+    )
+    return sim.add_rigid_object(cfg=cfg)
+
+
+def settle_object(sim: SimulationManager, obj: RigidObject, step: int = 5) -> None:
+    if sim.device.type == "cuda":
+        sim.init_gpu_physics()
+    obj.reset()
+    sim.update(step=step)
+    obj.clear_dynamics()
+
+
+def build_grasp_generator_cfg(args: argparse.Namespace) -> GraspGeneratorCfg:
+    return GraspGeneratorCfg(
+        viser_port=11801,
+        antipodal_sampler_cfg=AntipodalSamplerCfg(
+            n_sample=args.n_sample,
+            max_length=GRIPPER_MAX_OPEN_WIDTH,
+            min_length=0.003,
+        ),
+        is_partial_annotate=False,
+        is_filter_ground_collision=False,
+    )
+
+
+def build_gripper_collision_cfg() -> GripperCollisionCfg:
+    return GripperCollisionCfg(
+        max_open_length=GRIPPER_MAX_OPEN_WIDTH,
+        finger_length=GRIPPER_FINGER_LENGTH,
+        y_thickness=GRIPPER_Y_THICKNESS,
+        root_z_width=GRIPPER_ROOT_Z_WIDTH,
+        open_check_margin=0.002,
+        point_sample_dense=0.012,
+    )
+
+
+def create_object_semantics(
+    obj: RigidObject, args: argparse.Namespace
+) -> ObjectSemantics:
+    return ObjectSemantics(
+        label=BOTTLE_LABEL,
+        geometry={
+            "mesh_vertices": obj.get_vertices(env_ids=[0], scale=True)[0],
+            "mesh_triangles": obj.get_triangles(env_ids=[0])[0],
+        },
+        affordance=AntipodalAffordance(
+            object_label=BOTTLE_LABEL,
+            force_reannotate=args.force_reannotate,
+            custom_config={
+                "gripper_collision_cfg": build_gripper_collision_cfg(),
+                "generator_cfg": build_grasp_generator_cfg(args),
+            },
+        ),
+        entity=obj,
+    )
+
+
+def get_hand_open_close_qpos(
+    robot: Robot, device: torch.device
+) -> tuple[torch.Tensor, torch.Tensor]:
+    hand_limits = robot.get_qpos_limits(name="hand")[0].to(
+        device=device, dtype=torch.float32
+    )
+    hand_open = hand_limits[:, 0]
+    hand_close_limit = hand_limits[:, 1]
+    hand_close = torch.minimum(
+        hand_close_limit,
+        torch.full_like(hand_close_limit, BOTTLE_MIN_HAND_CLOSE_QPOS),
+    )
+    return hand_open, hand_close
+
+
+def make_top_down_eef_pose(position: torch.Tensor) -> torch.Tensor:
+    pose = torch.eye(4, dtype=torch.float32, device=position.device)
+    pose[:3, :3] = torch.tensor(
+        [
+            [-0.0539, -0.9985, -0.0022],
+            [-0.9977, 0.0540, -0.0401],
+            [0.0401, 0.0000, -0.9992],
+        ],
+        dtype=torch.float32,
+        device=position.device,
+    )
+    pose[:3, 3] = position
+    return pose
+
+
+def make_upright_object_pose(device: torch.device) -> torch.Tensor:
+    pose = torch.eye(4, dtype=torch.float32, device=device)
+    pose[:3, :3] = torch.tensor(
+        [
+            [1.0, 0.0, 0.0],
+            [0.0, -1.0, 0.0],
+            [0.0, 0.0, -1.0],
+        ],
+        dtype=torch.float32,
+        device=device,
+    )
+    pose[:3, 3] = torch.tensor([0.28, -0.2, 0.22], dtype=torch.float32, device=device)
+    return pose
+
+
+def compute_pick_close_end_step() -> int:
+    motion_waypoints = PICK_SAMPLE_INTERVAL - HAND_INTERP_STEPS
+    n_approach = int(round(motion_waypoints) * 0.6)
+    return MOVE_SAMPLE_INTERVAL + n_approach + HAND_INTERP_STEPS
+
+
+def format_tensor(tensor: torch.Tensor) -> str:
+    rounded = (tensor.detach().cpu() * 10000.0).round() / 10000.0
+    return str(rounded.tolist())
+
+
+def log_object_state(obj: RigidObject, label: str) -> None:
+    obj_pose = obj.get_local_pose(to_matrix=True)
+    logger.log_info(
+        f"{label}: pos={format_tensor(obj_pose[0, :3, 3])}, "
+        f"z_axis={format_tensor(obj_pose[0, :3, 2])}"
+    )
+
+
+def build_action_sequence(
+    args: argparse.Namespace,
+    hand_open: torch.Tensor,
+    hand_close: torch.Tensor,
+    device: torch.device,
+) -> tuple[list, list]:
+    move_cfg = MoveActionCfg(
+        control_part="arm",
+        sample_interval=MOVE_SAMPLE_INTERVAL,
+    )
+    pickup_cfg = PickUpActionCfg(
+        control_part="arm",
+        hand_control_part="hand",
+        hand_open_qpos=hand_open,
+        hand_close_qpos=hand_close,
+        approach_direction=torch.tensor(
+            BOTTLE_APPROACH_DIRECTION, dtype=torch.float32, device=device
+        ),
+        pre_grasp_distance=0.15,
+        lift_height=0.16,
+        sample_interval=PICK_SAMPLE_INTERVAL,
+        hand_interp_steps=HAND_INTERP_STEPS,
+    )
+    move_object_cfg = MoveObjectActionCfg(
+        control_part="arm",
+        hand_control_part="hand",
+        hand_close_qpos=hand_close,
+        sample_interval=MOVE_OBJECT_SAMPLE_INTERVAL,
+    )
+    action_cfgs = [move_cfg, pickup_cfg, move_object_cfg]
+    target_suffix = []
+
+    if args.mode == "release":
+        place_cfg = PlaceActionCfg(
+            control_part="arm",
+            hand_control_part="hand",
+            hand_open_qpos=hand_open,
+            hand_close_qpos=hand_close,
+            lift_height=0.08,
+            sample_interval=PLACE_SAMPLE_INTERVAL,
+            hand_interp_steps=HAND_INTERP_STEPS,
+        )
+        action_cfgs.append(place_cfg)
+        target_suffix.append(None)
+
+    return action_cfgs, target_suffix
+
+
+def run_move_object_demo(args: argparse.Namespace) -> None:
+    sim = initialize_simulation(args)
+    robot = create_robot(sim)
+    create_table(sim)
+    obj = create_fallen_bottle(sim)
+
+    settle_object(sim, obj, step=5)
+    semantics = create_object_semantics(obj, args)
+    motion_gen = MotionGenerator(
+        cfg=MotionGenCfg(planner_cfg=ToppraPlannerCfg(robot_uid=robot.uid))
+    )
+    hand_open, hand_close = get_hand_open_close_qpos(robot, sim.device)
+    action_cfgs, target_suffix = build_action_sequence(
+        args, hand_open, hand_close, sim.device
+    )
+    atomic_engine = AtomicActionEngine(
+        motion_generator=motion_gen,
+        actions_cfg_list=action_cfgs,
+    )
+
+    sim.open_window()
+    if not args.auto_play:
+        input("Inspect the fallen bottle, then press Enter to plan...")
+
+    obj_pose = obj.get_local_pose(to_matrix=True)
+    move_position = obj_pose[0, :3, 3].clone()
+    move_position[2] = 0.36
+    move_target = make_top_down_eef_pose(move_position)
+    move_object_target = MoveObjectTarget(
+        object_target_pose=make_upright_object_pose(sim.device)
+    )
+
+    action_chain = "move -> pick_up -> move_object"
+    if args.mode == "release":
+        action_chain += " -> place"
+    logger.log_info(f"Planning {action_chain} (mode={args.mode})")
+    start_time = time.time()
+    is_success, traj = atomic_engine.execute_static(
+        target_list=[move_target, semantics, move_object_target, *target_suffix]
+    )
+    cost_time = time.time() - start_time
+    logger.log_info(f"Plan trajectory cost time: {cost_time:.2f} seconds")
+    if not is_success:
+        logger.log_warning("Failed to plan move_object demo trajectory.")
+        return
+
+    if not args.auto_play:
+        input("Press Enter to replay the move_object demo...")
+
+    post_grasp_clear_step = compute_pick_close_end_step()
+    should_clear_object_dynamics = True
+    log_stride = max(1, traj.shape[1] // 10)
+    for i in range(traj.shape[1]):
+        robot.set_qpos(traj[:, i, :])
+        sim.update(step=4)
+        if should_clear_object_dynamics and i + 1 >= post_grasp_clear_step:
+            obj.clear_dynamics()
+            should_clear_object_dynamics = False
+            logger.log_info(f"Object dynamics cleared after grasp at step={i}")
+        if args.debug_state and (i % log_stride == 0 or i == traj.shape[1] - 1):
+            log_object_state(obj, f"replay step {i}/{traj.shape[1] - 1}")
+        time.sleep(1e-2)
+
+    if args.mode == "release":
+        logger.log_info("Release mode: gripper opened after move_object.")
+    else:
+        logger.log_info("Hold mode: keeping the bottle suspended in the gripper.")
+
+    final_qpos = traj[:, -1, :]
+    for i in range(POST_TRAJECTORY_STEPS):
+        robot.set_qpos(final_qpos)
+        sim.update(step=2)
+        if args.debug_state and i % max(1, POST_TRAJECTORY_STEPS // 5) == 0:
+            log_object_state(obj, f"post step {i}/{POST_TRAJECTORY_STEPS - 1}")
+        time.sleep(1e-2)
+
+    if not args.auto_play:
+        input("Press Enter to exit the simulation...")
+
+
+def main() -> None:
+    args = parse_arguments()
+    run_move_object_demo(args)
+
+
+if __name__ == "__main__":
+    main()

From dfc0800f3e64fe5fa892c39772039b42022ddd51 Mon Sep 17 00:00:00 2001
From: skywhite1024 <129768272+skywhite1024@users.noreply.github.com>
Date: Thu, 18 Jun 2026 17:33:16 +0800
Subject: [PATCH 09/14] Keep move_object demo hold-only

---
 embodichain/lab/sim/atomic_actions/actions.py | 19 +-------
 embodichain/lab/sim/atomic_actions/engine.py  | 12 ++---
 .../move_object_atomic_actions.py             | 47 +++----------------
 3 files changed, 12 insertions(+), 66 deletions(-)

diff --git a/embodichain/lab/sim/atomic_actions/actions.py b/embodichain/lab/sim/atomic_actions/actions.py
index 3a4696c6..14e4bf62 100644
--- a/embodichain/lab/sim/atomic_actions/actions.py
+++ b/embodichain/lab/sim/atomic_actions/actions.py
@@ -1501,7 +1501,7 @@ def __init__(
 
     def execute(
         self,
-        target: Union[ObjectSemantics, torch.Tensor, None],
+        target: Union[ObjectSemantics, torch.Tensor],
         start_qpos: Optional[torch.Tensor] = None,
         **kwargs,
     ) -> tuple[bool, torch.Tensor, list[float]]:
@@ -1520,23 +1520,6 @@ def execute(
         self._held_object_state = None
         start_qpos = self._resolve_start_qpos(start_qpos, self.arm_dof)
 
-        if target is None:
-            n_open_waypoint = max(2, self.cfg.hand_interp_steps)
-            hand_open_trajectory = torch.zeros(
-                size=(self.n_envs, n_open_waypoint, self.dof),
-                dtype=torch.float32,
-                device=self.device,
-            )
-            hand_open_trajectory[:, :, : self.arm_dof] = start_qpos.unsqueeze(1).repeat(
-                1, n_open_waypoint, 1
-            )
-            hand_open_trajectory[:, :, self.arm_dof :] = self._interpolate_hand_qpos(
-                self.hand_close_qpos,
-                self.hand_open_qpos,
-                n_open_waypoint,
-            )
-            return True, hand_open_trajectory, self.joint_ids
-
         is_success, place_xpos = self._resolve_pose_target(
             target, action_name=self.__class__.__name__
         )
diff --git a/embodichain/lab/sim/atomic_actions/engine.py b/embodichain/lab/sim/atomic_actions/engine.py
index 44fe789a..59275f9d 100644
--- a/embodichain/lab/sim/atomic_actions/engine.py
+++ b/embodichain/lab/sim/atomic_actions/engine.py
@@ -216,7 +216,6 @@ def execute_static(
                 ObjectSemantics,
                 MoveObjectTarget,
                 Dict[str, Any],
-                None,
             ]
         ],
     ) -> tuple[bool, torch.Tensor]:
@@ -278,7 +277,7 @@ def validate(
         self,
         action_name: str,
         target: Union[
-            torch.Tensor, str, ObjectSemantics, MoveObjectTarget, Dict[str, Any], None
+            torch.Tensor, str, ObjectSemantics, MoveObjectTarget, Dict[str, Any]
         ],
         **kwargs,
     ) -> bool:
@@ -294,16 +293,13 @@ def validate(
     def _resolve_target(
         self,
         target: Union[
-            torch.Tensor, str, ObjectSemantics, MoveObjectTarget, Dict[str, Any], None
+            torch.Tensor, str, ObjectSemantics, MoveObjectTarget, Dict[str, Any]
         ],
-    ) -> Union[torch.Tensor, ObjectSemantics, MoveObjectTarget, None]:
+    ) -> Union[torch.Tensor, ObjectSemantics, MoveObjectTarget]:
         """Resolve user target input into tensor pose or ObjectSemantics.
 
         Supports the convenience dict format in ``execute`` and ``validate``.
         """
-        if target is None:
-            return None
-
         if isinstance(target, torch.Tensor):
             return target
 
@@ -367,7 +363,7 @@ def _resolve_target(
 
         raise TypeError(
             "target must be torch.Tensor, str, ObjectSemantics, MoveObjectTarget, "
-            "Dict[str, Any], or None"
+            "or Dict[str, Any]"
         )
 
     def get_semantic_analyzer(self) -> SemanticAnalyzer:
diff --git a/scripts/tutorials/atomic_action/move_object_atomic_actions.py b/scripts/tutorials/atomic_action/move_object_atomic_actions.py
index 34ae37ca..abce7271 100644
--- a/scripts/tutorials/atomic_action/move_object_atomic_actions.py
+++ b/scripts/tutorials/atomic_action/move_object_atomic_actions.py
@@ -40,7 +40,6 @@
     MoveObjectTarget,
     ObjectSemantics,
     PickUpActionCfg,
-    PlaceActionCfg,
 )
 from embodichain.lab.sim.cfg import (
     JointDrivePropertiesCfg,
@@ -79,7 +78,6 @@
 MOVE_SAMPLE_INTERVAL = 60
 PICK_SAMPLE_INTERVAL = 120
 MOVE_OBJECT_SAMPLE_INTERVAL = 120
-PLACE_SAMPLE_INTERVAL = 36
 HAND_INTERP_STEPS = 12
 POST_TRAJECTORY_STEPS = 240
 TABLE_SIZE = [1.0, 1.4, 0.05]
@@ -88,15 +86,9 @@
 
 def parse_arguments() -> argparse.Namespace:
     parser = argparse.ArgumentParser(
-        description="Demonstrate MoveObjectAction with optional release."
+        description="Demonstrate MoveObjectAction holding a bottle in the gripper."
     )
     add_env_launcher_args_to_parser(parser)
-    parser.add_argument(
-        "--mode",
-        choices=("hold", "release"),
-        default="hold",
-        help="hold keeps the bottle in the gripper; release opens the gripper.",
-    )
     parser.add_argument(
         "--n_sample",
         type=int,
@@ -331,11 +323,10 @@ def log_object_state(obj: RigidObject, label: str) -> None:
 
 
 def build_action_sequence(
-    args: argparse.Namespace,
     hand_open: torch.Tensor,
     hand_close: torch.Tensor,
     device: torch.device,
-) -> tuple[list, list]:
+) -> list:
     move_cfg = MoveActionCfg(
         control_part="arm",
         sample_interval=MOVE_SAMPLE_INTERVAL,
@@ -359,23 +350,7 @@ def build_action_sequence(
         hand_close_qpos=hand_close,
         sample_interval=MOVE_OBJECT_SAMPLE_INTERVAL,
     )
-    action_cfgs = [move_cfg, pickup_cfg, move_object_cfg]
-    target_suffix = []
-
-    if args.mode == "release":
-        place_cfg = PlaceActionCfg(
-            control_part="arm",
-            hand_control_part="hand",
-            hand_open_qpos=hand_open,
-            hand_close_qpos=hand_close,
-            lift_height=0.08,
-            sample_interval=PLACE_SAMPLE_INTERVAL,
-            hand_interp_steps=HAND_INTERP_STEPS,
-        )
-        action_cfgs.append(place_cfg)
-        target_suffix.append(None)
-
-    return action_cfgs, target_suffix
+    return [move_cfg, pickup_cfg, move_object_cfg]
 
 
 def run_move_object_demo(args: argparse.Namespace) -> None:
@@ -390,9 +365,7 @@ def run_move_object_demo(args: argparse.Namespace) -> None:
         cfg=MotionGenCfg(planner_cfg=ToppraPlannerCfg(robot_uid=robot.uid))
     )
     hand_open, hand_close = get_hand_open_close_qpos(robot, sim.device)
-    action_cfgs, target_suffix = build_action_sequence(
-        args, hand_open, hand_close, sim.device
-    )
+    action_cfgs = build_action_sequence(hand_open, hand_close, sim.device)
     atomic_engine = AtomicActionEngine(
         motion_generator=motion_gen,
         actions_cfg_list=action_cfgs,
@@ -410,13 +383,10 @@ def run_move_object_demo(args: argparse.Namespace) -> None:
         object_target_pose=make_upright_object_pose(sim.device)
     )
 
-    action_chain = "move -> pick_up -> move_object"
-    if args.mode == "release":
-        action_chain += " -> place"
-    logger.log_info(f"Planning {action_chain} (mode={args.mode})")
+    logger.log_info("Planning move -> pick_up -> move_object")
     start_time = time.time()
     is_success, traj = atomic_engine.execute_static(
-        target_list=[move_target, semantics, move_object_target, *target_suffix]
+        target_list=[move_target, semantics, move_object_target]
     )
     cost_time = time.time() - start_time
     logger.log_info(f"Plan trajectory cost time: {cost_time:.2f} seconds")
@@ -441,10 +411,7 @@ def run_move_object_demo(args: argparse.Namespace) -> None:
             log_object_state(obj, f"replay step {i}/{traj.shape[1] - 1}")
         time.sleep(1e-2)
 
-    if args.mode == "release":
-        logger.log_info("Release mode: gripper opened after move_object.")
-    else:
-        logger.log_info("Hold mode: keeping the bottle suspended in the gripper.")
+    logger.log_info("MoveObjectAction keeps the bottle suspended in the gripper.")
 
     final_qpos = traj[:, -1, :]
     for i in range(POST_TRAJECTORY_STEPS):

From bb41004613a1a6fc8839ee7cea981dfcd5510127 Mon Sep 17 00:00:00 2001
From: skywhite1024 <129768272+skywhite1024@users.noreply.github.com>
Date: Fri, 19 Jun 2026 12:11:13 +0800
Subject: [PATCH 10/14] add pickup_atomic_actions.py

---
 embodichain/lab/sim/solvers/pytorch_solver.py |   2 +-
 .../atomic_action/pickup_atomic_actions.py    | 499 ++++++++++++++++++
 2 files changed, 500 insertions(+), 1 deletion(-)
 create mode 100644 scripts/tutorials/atomic_action/pickup_atomic_actions.py

diff --git a/embodichain/lab/sim/solvers/pytorch_solver.py b/embodichain/lab/sim/solvers/pytorch_solver.py
index c0fcf465..277eaf87 100644
--- a/embodichain/lab/sim/solvers/pytorch_solver.py
+++ b/embodichain/lab/sim/solvers/pytorch_solver.py
@@ -62,7 +62,7 @@ class PytorchSolverCfg(SolverCfg):
     is_only_position_constraint: bool = False
     """Flag to indicate whether the solver should only consider position constraints."""
 
-    num_samples: int = 5
+    num_samples: int = 30
     """Number of samples to generate different joint seeds for IK iterations.
 
     A higher number of samples increases the chances of finding a valid solution
diff --git a/scripts/tutorials/atomic_action/pickup_atomic_actions.py b/scripts/tutorials/atomic_action/pickup_atomic_actions.py
new file mode 100644
index 00000000..64bcea61
--- /dev/null
+++ b/scripts/tutorials/atomic_action/pickup_atomic_actions.py
@@ -0,0 +1,499 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+
+"""Demonstrate PickUpAction on an upright object with configurable approach."""
+
+from __future__ import annotations
+
+import argparse
+import sys
+import time
+from pathlib import Path
+
+_REPO_ROOT = Path(__file__).resolve().parents[3]
+if str(_REPO_ROOT) not in sys.path:
+    sys.path.insert(0, str(_REPO_ROOT))
+
+import torch
+
+from embodichain.data import get_data_path
+from embodichain.lab.gym.utils.gym_utils import add_env_launcher_args_to_parser
+from embodichain.lab.sim import SimulationManager, SimulationManagerCfg
+from embodichain.lab.sim.atomic_actions import (
+    AntipodalAffordance,
+    AtomicActionEngine,
+    MoveActionCfg,
+    ObjectSemantics,
+    PickUpActionCfg,
+)
+from embodichain.lab.sim.cfg import (
+    JointDrivePropertiesCfg,
+    LightCfg,
+    RenderCfg,
+    RigidBodyAttributesCfg,
+    RigidObjectCfg,
+    RobotCfg,
+    URDFCfg,
+)
+from embodichain.lab.sim.objects import RigidObject, Robot
+from embodichain.lab.sim.planners import MotionGenerator, MotionGenCfg, ToppraPlannerCfg
+from embodichain.lab.sim.shapes import CubeCfg, MeshCfg
+from embodichain.lab.sim.solvers import PytorchSolverCfg
+from embodichain.toolkits.graspkit.pg_grasp.antipodal_generator import (
+    AntipodalSamplerCfg,
+    GraspGeneratorCfg,
+)
+from embodichain.toolkits.graspkit.pg_grasp.gripper_collision_checker import (
+    GripperCollisionCfg,
+)
+from embodichain.utils import logger
+from embodichain.utils.math import matrix_from_euler
+
+GRIPPER_URDF_PATH = "DH_PGI_140_80/DH_PGI_140_80.urdf"
+GRIPPER_HAND_JOINT_PATTERN = "GRIPPER_FINGER1_JOINT_1"
+GRIPPER_MAX_OPEN_WIDTH = 0.080
+GRIPPER_FINGER_LENGTH = 0.088
+GRIPPER_ROOT_Z_WIDTH = 0.096
+GRIPPER_Y_THICKNESS = 0.040
+GRIPPER_TCP_Z = 0.15
+
+OBJECT_MIN_HAND_CLOSE_QPOS = 0.024
+OBJECT_XY = (-0.42, -0.08)
+OBJECT_CLEARANCE = 0.0
+
+OBJECT_PRESETS = {
+    "paper_cup": {
+        "label": "paper_cup",
+        "mesh_path": "PaperCup/paper_cup.ply",
+        "init_rot": (0.0, 0.0, 0.0),
+        "body_scale": (1.0, 1.0, 1.0),
+        "mass": 0.01,
+    },
+    "coffee_cup": {
+        "label": "coffee_cup",
+        "mesh_path": "CoffeeCup/cup.ply",
+        "init_rot": (0.0, 0.0, -90.0),
+        "body_scale": (1.0, 1.0, 1.0),
+        "mass": 0.01,
+    },
+    "bottle": {
+        "label": "bottle",
+        "mesh_path": "ScannedBottle/yibao.ply",
+        "init_rot": (180.0, 0.0, 0.0),
+        "body_scale": (0.0008, 0.0008, 0.0008),
+        "mass": 0.02,
+    },
+}
+
+MOVE_SAMPLE_INTERVAL = 60
+PICK_SAMPLE_INTERVAL = 120
+HAND_INTERP_STEPS = 12
+POST_TRAJECTORY_STEPS = 240
+TABLE_SIZE = [1.0, 1.4, 0.05]
+TABLE_TOP_Z = -0.045
+
+APPROACH_DIRECTIONS = {
+    "top": (0.0, 0.0, -1.0),
+    "side": (0.0, 1.0, 0.0),
+    "side_y": (0.0, -1.0, 0.0),
+}
+
+
+def parse_arguments() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description="Demonstrate PickUpAction on an upright object."
+    )
+    add_env_launcher_args_to_parser(parser)
+    parser.add_argument(
+        "--object",
+        choices=sorted(OBJECT_PRESETS.keys()),
+        default="paper_cup",
+        help="Object preset to pick.",
+    )
+    parser.add_argument(
+        "--n_sample",
+        type=int,
+        default=10000,
+        help="Number of samples for antipodal grasp generation.",
+    )
+    parser.add_argument(
+        "--force_reannotate",
+        action="store_true",
+        help="Force grasp region re-annotation instead of using cached data.",
+    )
+    parser.add_argument(
+        "--auto_play",
+        action="store_true",
+        help="Run the viewer demo without waiting for keyboard input.",
+    )
+    parser.add_argument(
+        "--debug_state",
+        "--debug",
+        action="store_true",
+        help="Log object pose during replay.",
+    )
+    parser.add_argument(
+        "--approach",
+        choices=["top", "side", "side_y", "custom"],
+        default="side",
+        help="Pick approach direction preset.",
+    )
+    parser.add_argument(
+        "--custom_approach_direction",
+        type=float,
+        nargs=3,
+        default=None,
+        metavar=("X", "Y", "Z"),
+        help="World-frame approach direction used when --approach custom.",
+    )
+    return parser.parse_args()
+
+
+def initialize_simulation(args: argparse.Namespace) -> SimulationManager:
+    cfg = SimulationManagerCfg(
+        headless=True,
+        sim_device=args.device,
+        render_cfg=RenderCfg(renderer=args.renderer),
+        physics_dt=1.0 / 100.0,
+        arena_space=2.5,
+    )
+    sim = SimulationManager(cfg)
+    sim.add_light(
+        cfg=LightCfg(
+            uid="main_light",
+            color=(0.6, 0.6, 0.6),
+            intensity=30.0,
+            init_pos=(1.0, 0.0, 3.0),
+        )
+    )
+    return sim
+
+
+def create_robot(sim: SimulationManager, position=(0.0, 0.0, 0.0)) -> Robot:
+    ur5_urdf_path = get_data_path("UniversalRobots/UR5/UR5.urdf")
+    gripper_urdf_path = get_data_path(GRIPPER_URDF_PATH)
+
+    cfg = RobotCfg(
+        uid="UR5",
+        urdf_cfg=URDFCfg(
+            components=[
+                {"component_type": "arm", "urdf_path": ur5_urdf_path},
+                {"component_type": "hand", "urdf_path": gripper_urdf_path},
+            ]
+        ),
+        drive_pros=JointDrivePropertiesCfg(
+            stiffness={"JOINT[0-9]": 1e4, GRIPPER_HAND_JOINT_PATTERN: 1e3},
+            damping={"JOINT[0-9]": 1e3, GRIPPER_HAND_JOINT_PATTERN: 1e2},
+            max_effort={"JOINT[0-9]": 1e5, GRIPPER_HAND_JOINT_PATTERN: 1e4},
+            drive_type="force",
+        ),
+        control_parts={
+            "arm": ["JOINT[0-9]"],
+            "hand": [GRIPPER_HAND_JOINT_PATTERN],
+        },
+        solver_cfg={
+            "arm": PytorchSolverCfg(
+                end_link_name="ee_link",
+                root_link_name="base_link",
+                tcp=[
+                    [1.0, 0.0, 0.0, 0.0],
+                    [0.0, 1.0, 0.0, 0.0],
+                    [0.0, 0.0, 1.0, GRIPPER_TCP_Z],
+                    [0.0, 0.0, 0.0, 1.0],
+                ],
+            )
+        },
+        init_qpos=[0.0, -1.57, 1.57, -1.57, -1.57, 0.0, 0.0, 0.0],
+        init_pos=position,
+    )
+    return sim.add_robot(cfg=cfg)
+
+
+def create_table(sim: SimulationManager) -> RigidObject:
+    cfg = RigidObjectCfg(
+        uid="table",
+        shape=CubeCfg(size=TABLE_SIZE),
+        body_type="static",
+        attrs=RigidBodyAttributesCfg(
+            dynamic_friction=0.8,
+            static_friction=0.9,
+        ),
+        init_pos=[-0.30, 0.10, TABLE_TOP_Z - 0.5 * TABLE_SIZE[2]],
+    )
+    return sim.add_rigid_object(cfg=cfg)
+
+
+def create_pick_object(sim: SimulationManager, object_name: str) -> RigidObject:
+    preset = OBJECT_PRESETS[object_name]
+    cfg = RigidObjectCfg(
+        uid=preset["label"],
+        shape=MeshCfg(fpath=get_data_path(preset["mesh_path"])),
+        attrs=RigidBodyAttributesCfg(
+            mass=preset["mass"],
+            dynamic_friction=0.97,
+            static_friction=0.99,
+        ),
+        max_convex_hull_num=16,
+        init_pos=[OBJECT_XY[0], OBJECT_XY[1], 0.0],
+        init_rot=preset["init_rot"],
+        body_scale=preset["body_scale"],
+    )
+    obj = sim.add_rigid_object(cfg=cfg)
+    obj.cfg.init_pos = _compute_tabletop_init_pos(obj, cfg.init_rot)
+    obj.reset()
+    return obj
+
+
+def _compute_tabletop_init_pos(
+    obj: RigidObject, init_rot: tuple[float, float, float]
+) -> tuple[float, float, float]:
+    vertices = obj.get_vertices(env_ids=[0], scale=True)[0]
+    rot = torch.as_tensor(init_rot, dtype=torch.float32, device=vertices.device)
+    rot = rot.unsqueeze(0) * torch.pi / 180.0
+    upright_rot = matrix_from_euler(rot, "XYZ")[0]
+    rotated_vertices = vertices @ upright_rot.T
+    bottom_z = rotated_vertices[:, 2].min().item()
+    z = TABLE_TOP_Z + OBJECT_CLEARANCE - bottom_z
+    return (OBJECT_XY[0], OBJECT_XY[1], z)
+
+
+def settle_object(sim: SimulationManager, obj: RigidObject, step: int = 5) -> None:
+    if sim.device.type == "cuda":
+        sim.init_gpu_physics()
+    obj.reset()
+    sim.update(step=step)
+    obj.clear_dynamics()
+
+
+def build_grasp_generator_cfg(args: argparse.Namespace) -> GraspGeneratorCfg:
+    return GraspGeneratorCfg(
+        viser_port=11801,
+        antipodal_sampler_cfg=AntipodalSamplerCfg(
+            n_sample=args.n_sample,
+            max_length=GRIPPER_MAX_OPEN_WIDTH,
+            min_length=0.003,
+        ),
+        is_partial_annotate=False,
+        is_filter_ground_collision=False,
+    )
+
+
+def build_gripper_collision_cfg() -> GripperCollisionCfg:
+    return GripperCollisionCfg(
+        max_open_length=GRIPPER_MAX_OPEN_WIDTH,
+        finger_length=GRIPPER_FINGER_LENGTH,
+        y_thickness=GRIPPER_Y_THICKNESS,
+        root_z_width=GRIPPER_ROOT_Z_WIDTH,
+        open_check_margin=0.002,
+        point_sample_dense=0.012,
+    )
+
+
+def create_object_semantics(
+    obj: RigidObject, args: argparse.Namespace
+) -> ObjectSemantics:
+    label = OBJECT_PRESETS[args.object]["label"]
+    return ObjectSemantics(
+        label=label,
+        geometry={
+            "mesh_vertices": obj.get_vertices(env_ids=[0], scale=True)[0],
+            "mesh_triangles": obj.get_triangles(env_ids=[0])[0],
+        },
+        affordance=AntipodalAffordance(
+            object_label=label,
+            force_reannotate=args.force_reannotate,
+            custom_config={
+                "gripper_collision_cfg": build_gripper_collision_cfg(),
+                "generator_cfg": build_grasp_generator_cfg(args),
+            },
+        ),
+        entity=obj,
+    )
+
+
+def get_hand_open_close_qpos(
+    robot: Robot, device: torch.device
+) -> tuple[torch.Tensor, torch.Tensor]:
+    hand_limits = robot.get_qpos_limits(name="hand")[0].to(
+        device=device, dtype=torch.float32
+    )
+    hand_open = hand_limits[:, 0]
+    hand_close_limit = hand_limits[:, 1]
+    hand_close = torch.minimum(
+        hand_close_limit,
+        torch.full_like(hand_close_limit, OBJECT_MIN_HAND_CLOSE_QPOS),
+    )
+    return hand_open, hand_close
+
+
+def resolve_approach_direction(
+    args: argparse.Namespace, device: torch.device
+) -> torch.Tensor:
+    if args.approach == "custom":
+        if args.custom_approach_direction is None:
+            raise ValueError(
+                "--custom_approach_direction is required when --approach custom."
+            )
+        direction = args.custom_approach_direction
+    else:
+        direction = APPROACH_DIRECTIONS[args.approach]
+
+    approach_direction = torch.tensor(direction, dtype=torch.float32, device=device)
+    norm = torch.linalg.norm(approach_direction)
+    if norm < 1e-6:
+        raise ValueError("approach_direction must be non-zero.")
+    return approach_direction / norm
+
+
+def make_top_down_eef_pose(position: torch.Tensor) -> torch.Tensor:
+    pose = torch.eye(4, dtype=torch.float32, device=position.device)
+    pose[:3, :3] = torch.tensor(
+        [
+            [-0.0539, -0.9985, -0.0022],
+            [-0.9977, 0.0540, -0.0401],
+            [0.0401, 0.0000, -0.9992],
+        ],
+        dtype=torch.float32,
+        device=position.device,
+    )
+    pose[:3, 3] = position
+    return pose
+
+
+def compute_pick_close_end_step() -> int:
+    motion_waypoints = PICK_SAMPLE_INTERVAL - HAND_INTERP_STEPS
+    n_approach = int(round(motion_waypoints) * 0.6)
+    return MOVE_SAMPLE_INTERVAL + n_approach + HAND_INTERP_STEPS
+
+
+def format_tensor(tensor: torch.Tensor) -> str:
+    rounded = (tensor.detach().cpu() * 10000.0).round() / 10000.0
+    return str(rounded.tolist())
+
+
+def log_object_state(obj: RigidObject, label: str) -> None:
+    obj_pose = obj.get_local_pose(to_matrix=True)
+    logger.log_info(
+        f"{label}: pos={format_tensor(obj_pose[0, :3, 3])}, "
+        f"z_axis={format_tensor(obj_pose[0, :3, 2])}"
+    )
+
+
+def build_action_sequence(
+    hand_open: torch.Tensor,
+    hand_close: torch.Tensor,
+    approach_direction: torch.Tensor,
+) -> list:
+    move_cfg = MoveActionCfg(
+        control_part="arm",
+        sample_interval=MOVE_SAMPLE_INTERVAL,
+    )
+    pickup_cfg = PickUpActionCfg(
+        control_part="arm",
+        hand_control_part="hand",
+        hand_open_qpos=hand_open,
+        hand_close_qpos=hand_close,
+        approach_direction=approach_direction,
+        pre_grasp_distance=0.15,
+        lift_height=0.16,
+        sample_interval=PICK_SAMPLE_INTERVAL,
+        hand_interp_steps=HAND_INTERP_STEPS,
+    )
+    return [move_cfg, pickup_cfg]
+
+
+def run_pickup_demo(args: argparse.Namespace) -> None:
+    sim = initialize_simulation(args)
+    robot = create_robot(sim)
+    create_table(sim)
+    obj = create_pick_object(sim, args.object)
+
+    settle_object(sim, obj, step=5)
+    semantics = create_object_semantics(obj, args)
+    motion_gen = MotionGenerator(
+        cfg=MotionGenCfg(planner_cfg=ToppraPlannerCfg(robot_uid=robot.uid))
+    )
+    hand_open, hand_close = get_hand_open_close_qpos(robot, sim.device)
+    approach_direction = resolve_approach_direction(args, sim.device)
+    action_cfgs = build_action_sequence(hand_open, hand_close, approach_direction)
+    atomic_engine = AtomicActionEngine(
+        motion_generator=motion_gen,
+        actions_cfg_list=action_cfgs,
+    )
+
+    sim.open_window()
+    if not args.auto_play:
+        input(f"Inspect the upright {args.object}, then press Enter to plan...")
+
+    obj_pose = obj.get_local_pose(to_matrix=True)
+    move_position = obj_pose[0, :3, 3].clone()
+    move_position[2] = 0.36
+    move_target = make_top_down_eef_pose(move_position)
+
+    logger.log_info(
+        f"Planning move -> pick_up for {args.object} with "
+        f"approach_direction={format_tensor(approach_direction)}"
+    )
+    start_time = time.time()
+    is_success, traj = atomic_engine.execute_static(
+        target_list=[move_target, semantics]
+    )
+    cost_time = time.time() - start_time
+    logger.log_info(f"Plan trajectory cost time: {cost_time:.2f} seconds")
+    if not is_success:
+        logger.log_warning("Failed to plan pickup demo trajectory.")
+        return
+
+    if not args.auto_play:
+        input("Press Enter to replay the pickup demo...")
+
+    post_grasp_clear_step = compute_pick_close_end_step()
+    should_clear_object_dynamics = True
+    log_stride = max(1, traj.shape[1] // 10)
+    for i in range(traj.shape[1]):
+        robot.set_qpos(traj[:, i, :])
+        sim.update(step=4)
+        if should_clear_object_dynamics and i + 1 >= post_grasp_clear_step:
+            obj.clear_dynamics()
+            should_clear_object_dynamics = False
+            logger.log_info(f"Object dynamics cleared after grasp at step={i}")
+        if args.debug_state and (i % log_stride == 0 or i == traj.shape[1] - 1):
+            log_object_state(obj, f"replay step {i}/{traj.shape[1] - 1}")
+        time.sleep(1e-2)
+
+    logger.log_info(
+        f"PickUpAction keeps the upright {args.object} suspended in the gripper."
+    )
+
+    final_qpos = traj[:, -1, :]
+    for i in range(POST_TRAJECTORY_STEPS):
+        robot.set_qpos(final_qpos)
+        sim.update(step=2)
+        if args.debug_state and i % max(1, POST_TRAJECTORY_STEPS // 5) == 0:
+            log_object_state(obj, f"post step {i}/{POST_TRAJECTORY_STEPS - 1}")
+        time.sleep(1e-2)
+
+    if not args.auto_play:
+        input("Press Enter to exit the simulation...")
+
+
+def main() -> None:
+    args = parse_arguments()
+    run_pickup_demo(args)
+
+
+if __name__ == "__main__":
+    main()

From 9eb8fb4dce29f581053fc42d780038219f203769 Mon Sep 17 00:00:00 2001
From: skywhite1024 <129768272+skywhite1024@users.noreply.github.com>
Date: Fri, 19 Jun 2026 15:26:15 +0800
Subject: [PATCH 11/14] Address atomic action review feedback

---
 .../embodichain.lab.sim.atomic_actions.rst    |  21 +
 docs/source/overview/sim/atomic_actions.md    |  36 +-
 docs/source/tutorial/atomic_actions.rst       |  37 +-
 .../lab/sim/atomic_actions/__init__.py        |   4 -
 embodichain/lab/sim/atomic_actions/actions.py | 883 ++----------------
 embodichain/lab/sim/atomic_actions/core.py    |   9 +-
 embodichain/lab/sim/atomic_actions/engine.py  |   5 +-
 .../atomic_action/upright_atomic_action.py    | 740 ---------------
 .../{atomic_action => sim}/atomic_actions.py  |   0
 tests/sim/atomic_actions/test_actions.py      |  97 ++
 tests/sim/atomic_actions/test_engine.py       |  94 ++
 11 files changed, 360 insertions(+), 1566 deletions(-)
 delete mode 100644 scripts/tutorials/atomic_action/upright_atomic_action.py
 rename scripts/tutorials/{atomic_action => sim}/atomic_actions.py (100%)

diff --git a/docs/source/api_reference/embodichain/embodichain.lab.sim.atomic_actions.rst b/docs/source/api_reference/embodichain/embodichain.lab.sim.atomic_actions.rst
index 181086c3..451d7db1 100644
--- a/docs/source/api_reference/embodichain/embodichain.lab.sim.atomic_actions.rst
+++ b/docs/source/api_reference/embodichain/embodichain.lab.sim.atomic_actions.rst
@@ -10,12 +10,16 @@ embodichain.lab.sim.atomic_actions
       Affordance
       InteractionPoints
       ObjectSemantics
+      HeldObjectState
+      MoveObjectTarget
       ActionCfg
       AtomicAction
       MoveActionCfg
       MoveAction
       PickUpActionCfg
       PickUpAction
+      MoveObjectActionCfg
+      MoveObjectAction
       PlaceActionCfg
       PlaceAction
       AtomicActionEngine
@@ -37,6 +41,14 @@ Core
     :members:
     :show-inheritance:
 
+.. autoclass:: HeldObjectState
+    :members:
+    :show-inheritance:
+
+.. autoclass:: MoveObjectTarget
+    :members:
+    :show-inheritance:
+
 .. autoclass:: ActionCfg
     :members:
     :exclude-members: __init__, copy, replace, to_dict, validate
@@ -66,6 +78,15 @@ Actions
     :members:
     :show-inheritance:
 
+.. autoclass:: MoveObjectActionCfg
+    :members:
+    :exclude-members: __init__, copy, replace, to_dict, validate
+    :show-inheritance:
+
+.. autoclass:: MoveObjectAction
+    :members:
+    :show-inheritance:
+
 .. autoclass:: PlaceActionCfg
     :members:
     :exclude-members: __init__, copy, replace, to_dict, validate
diff --git a/docs/source/overview/sim/atomic_actions.md b/docs/source/overview/sim/atomic_actions.md
index 9d2523d5..24c3fdb4 100644
--- a/docs/source/overview/sim/atomic_actions.md
+++ b/docs/source/overview/sim/atomic_actions.md
@@ -42,6 +42,13 @@ AtomicActionEngine ◄─────────────── PlanResult 
 - `affordance` — *how* to interact with the object (e.g. antipodal grasp poses)
 - `entity` — a live reference to the simulation object, so actions can read its current pose
 
+**`HeldObjectState`** is runtime state produced after a successful semantic pickup. It stores
+the held object's semantics and object-to-end-effector transform so later actions can move the
+object without recomputing the grasp. It is intentionally separate from `ObjectSemantics`,
+which remains a reusable object description rather than per-execution robot state.
+
+**`MoveObjectTarget`** describes an object-centric target pose for an already-held object.
+
 **`Affordance`** is a data class that encodes a specific interaction capability. The built-in affordance types are:
 
 | Class | Use case |
@@ -67,6 +74,7 @@ The following actions are available out of the box:
 |---|---|---|---|
 | `MoveAction` | `MoveActionCfg` | `Tensor (4,4)` — EEF pose | Move arm to pose |
 | `PickUpAction` | `PickUpActionCfg` | `ObjectSemantics` or `Tensor (4,4)` | Approach → close gripper → lift |
+| `MoveObjectAction` | `MoveObjectActionCfg` | `MoveObjectTarget` | Move held object and keep gripper closed |
 | `PlaceAction` | `PlaceActionCfg` | `Tensor (4,4)` — EEF release pose | Lower → open gripper → retract |
 
 ### `MoveAction`
@@ -101,6 +109,26 @@ Three-phase grasp motion: *approach → close gripper → lift*.
 
 ---
 
+### `MoveObjectAction`
+
+Moves a held object to an object-centric target pose while preserving the grasp. It consumes
+the `HeldObjectState` produced by a prior semantic `PickUpAction`.
+
+`HeldObjectState` and `MoveObjectTarget` are intentionally kept separate from
+`ObjectSemantics`: `ObjectSemantics` describes the object and affordances, while these
+types describe runtime held-object state and action-specific targets.
+
+| Config field | Default | Description |
+|---|---|---|
+| `hand_close_qpos` | `None` | **Required.** Gripper closed joint positions |
+| `hand_control_part` | `"hand"` | Robot control part for the gripper |
+| `sample_interval` | `50` | Number of waypoints in the trajectory |
+
+**Target:** `MoveObjectTarget` or `dict` with `"object_target_pose"` containing a `torch.Tensor`
+of shape `(4, 4)` or `(n_envs, 4, 4)`.
+
+---
+
 ### `PlaceAction`
 
 Three-phase release motion: *lower → open gripper → retract*. Mirrors `PickUpAction`.
@@ -119,6 +147,8 @@ from embodichain.lab.sim.atomic_actions import (
     ObjectSemantics,
     AntipodalAffordance,
     PickUpActionCfg,
+    MoveObjectActionCfg,
+    MoveObjectTarget,
     PlaceActionCfg,
     MoveActionCfg,
 )
@@ -131,6 +161,7 @@ pickup_cfg = PickUpActionCfg(
     hand_close_qpos=torch.tensor([0.025, 0.025]),
 )
 place_cfg  = PlaceActionCfg(...)
+move_object_cfg = MoveObjectActionCfg(hand_close_qpos=torch.tensor([0.025, 0.025]))
 move_cfg   = MoveActionCfg(control_part="arm")
 
 # 2. Build the engine — action order matches target_list order
@@ -228,8 +259,10 @@ is_success, traj = engine.execute_static(target_list=[target_pose])
 |---|---|
 | `torch.Tensor (4,4)` or `(n_envs,4,4)` | EEF pose, broadcast across envs |
 | `ObjectSemantics` | Passed directly to the action |
+| `MoveObjectTarget` | Passed directly to `MoveObjectAction` |
 | `str` (object label) | Looked up in `SemanticAnalyzer` cache |
 | `dict` with `"pose"` key | Unwrapped to tensor |
+| `dict` with `"object_target_pose"` key | Wrapped as `MoveObjectTarget` |
 | `dict` with `"label"` key | Analyzed via `SemanticAnalyzer` |
 
 ---
@@ -238,4 +271,5 @@ is_success, traj = engine.execute_static(target_list=[target_pose])
 
 - {doc}`planners/motion_generator` — the trajectory planner used by every action
 - {doc}`sim_robot` — how control parts and IK solvers are configured
-- Tutorial: `scripts/tutorials/atomic_action/atomic_actions.py`
+- Tutorial: `scripts/tutorials/sim/atomic_actions.py`
+- Move object demo: `scripts/tutorials/sim/move_object_atomic_actions.py`
diff --git a/docs/source/tutorial/atomic_actions.rst b/docs/source/tutorial/atomic_actions.rst
index 411f374f..6a7b0b05 100644
--- a/docs/source/tutorial/atomic_actions.rst
+++ b/docs/source/tutorial/atomic_actions.rst
@@ -15,7 +15,8 @@ Key Features
 - **Semantic-aware execution** — actions accept either a raw pose tensor or an
   ``ObjectSemantics`` descriptor that bundles affordance data (grasp poses, interaction
   points) with the simulation entity.
-- **Three built-in primitives** — ``MoveAction``, ``PickUpAction``, and ``PlaceAction``
+- **Built-in primitives** — ``MoveAction``, ``PickUpAction``, ``MoveObjectAction``,
+  and ``PlaceAction``
   cover the most common tabletop manipulation workflows out of the box.
   See the :ref:`supported_atomic_actions` table for configs and target types.
 - **Extensible registry** — custom actions can be registered globally with
@@ -30,13 +31,13 @@ For the full design overview, architecture diagram, and extension guide see
 The Code
 --------
 
-The tutorial corresponds to the ``atomic_actions.py`` script in the ``scripts/tutorials/atomic_action``
+The tutorial corresponds to the ``atomic_actions.py`` script in the ``scripts/tutorials/sim``
 directory.
 
 .. dropdown:: Code for atomic_actions.py
     :icon: code
 
-    .. literalinclude:: ../../../scripts/tutorials/atomic_action/atomic_actions.py
+    .. literalinclude:: ../../../scripts/tutorials/sim/atomic_actions.py
         :language: python
         :linenos:
 
@@ -53,6 +54,7 @@ Setting up the engine
    from embodichain.lab.sim.atomic_actions import (
        AtomicActionEngine,
        PickUpActionCfg,
+       MoveObjectActionCfg,
        PlaceActionCfg,
        MoveActionCfg,
    )
@@ -78,6 +80,11 @@ Setting up the engine
        hand_control_part="hand",
        lift_height=0.15,
    )
+   move_object_cfg = MoveObjectActionCfg(
+       hand_close_qpos=hand_close,
+       control_part="arm",
+       hand_control_part="hand",
+   )
    move_cfg = MoveActionCfg(control_part="arm")
 
    engine = AtomicActionEngine(
@@ -139,6 +146,30 @@ Executing a pick-place-move sequence
        robot.set_qpos(trajectory[:, i])
        sim.update(step=4)
 
+Moving a held object
+~~~~~~~~~~~~~~~~~~~~
+
+``MoveObjectAction`` consumes the runtime ``HeldObjectState`` produced by a previous
+semantic ``PickUpAction``. The target is object-centric, so the caller specifies where the
+object should move, and the action converts that pose into an end-effector target while
+keeping the gripper closed.
+
+.. code-block:: python
+
+   from embodichain.lab.sim.atomic_actions import MoveObjectTarget
+
+   engine = AtomicActionEngine(
+       motion_generator=motion_gen,
+       actions_cfg_list=[pickup_cfg, move_object_cfg],
+   )
+
+   object_target_pose = torch.eye(4, dtype=torch.float32, device=device)
+   object_target_pose[:3, 3] = torch.tensor([0.3, -0.2, 0.25], device=device)
+
+   is_success, trajectory = engine.execute_static(
+       target_list=[semantics, MoveObjectTarget(object_target_pose=object_target_pose)]
+   )
+
 Registering custom actions
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/embodichain/lab/sim/atomic_actions/__init__.py b/embodichain/lab/sim/atomic_actions/__init__.py
index 7fbc506f..466419b3 100644
--- a/embodichain/lab/sim/atomic_actions/__init__.py
+++ b/embodichain/lab/sim/atomic_actions/__init__.py
@@ -36,12 +36,10 @@
     MoveObjectAction,
     PickUpAction,
     PlaceAction,
-    UprightAction,
     MoveActionCfg,
     MoveObjectActionCfg,
     PickUpActionCfg,
     PlaceActionCfg,
-    UprightActionCfg,
 )
 from .engine import (
     AtomicActionEngine,
@@ -64,12 +62,10 @@
     "MoveObjectAction",
     "PickUpAction",
     "PlaceAction",
-    "UprightAction",
     "MoveActionCfg",
     "MoveObjectActionCfg",
     "PickUpActionCfg",
     "PlaceActionCfg",
-    "UprightActionCfg",
     # Engine
     "AtomicActionEngine",
     "register_action",
diff --git a/embodichain/lab/sim/atomic_actions/actions.py b/embodichain/lab/sim/atomic_actions/actions.py
index 14e4bf62..ae9b2f54 100644
--- a/embodichain/lab/sim/atomic_actions/actions.py
+++ b/embodichain/lab/sim/atomic_actions/actions.py
@@ -32,6 +32,7 @@
 )
 from embodichain.utils import logger
 from embodichain.utils import configclass
+from embodichain.utils.math import pose_inv
 from embodichain.lab.sim.utility.action_utils import interpolate_with_distance
 import numpy as np
 
@@ -50,11 +51,8 @@ class MoveActionCfg(ActionCfg):
 
 
 @configclass
-class GraspActionCfg(MoveActionCfg):
-    """Shared configuration for actions that involve gripper open/close motions."""
-
-    hand_open_qpos: torch.Tensor | None = None
-    """[hand_dof,] of float. Joint positions for open hand state."""
+class HandCloseActionCfg(MoveActionCfg):
+    """Shared configuration for actions that keep or move the gripper closed."""
 
     hand_close_qpos: torch.Tensor | None = None
     """[hand_dof,] of float. Joint positions for closed hand state."""
@@ -62,6 +60,14 @@ class GraspActionCfg(MoveActionCfg):
     hand_control_part: str = "hand"
     """Name of the robot part that controls the hand joints."""
 
+
+@configclass
+class GraspActionCfg(HandCloseActionCfg):
+    """Shared configuration for actions that involve gripper open/close motions."""
+
+    hand_open_qpos: torch.Tensor | None = None
+    """[hand_dof,] of float. Joint positions for open hand state."""
+
     lift_height: float = 0.1
     """Height (m) to lift the end-effector after the gripper phase."""
 
@@ -92,6 +98,24 @@ def __init__(
         self.arm_joint_ids = self.robot.get_joint_ids(name=self.cfg.control_part)
         self.dof = len(self.arm_joint_ids)
 
+    def _init_hand_close_state(self, cfg_name: str) -> None:
+        """Initialize shared closed-hand state for gripper actions."""
+        if self.cfg.hand_close_qpos is None:
+            logger.log_error(f"hand_close_qpos must be specified in {cfg_name}")
+        self.hand_close_qpos = self.cfg.hand_close_qpos.to(self.device)
+
+        self.hand_joint_ids = self.robot.get_joint_ids(name=self.cfg.hand_control_part)
+        self.joint_ids = self.arm_joint_ids + self.hand_joint_ids
+        self.arm_dof = len(self.arm_joint_ids)
+        self.dof = len(self.joint_ids)
+
+    def _init_grasp_hand_state(self, cfg_name: str) -> None:
+        """Initialize shared open/closed gripper state for grasp actions."""
+        if self.cfg.hand_open_qpos is None:
+            logger.log_error(f"hand_open_qpos must be specified in {cfg_name}")
+        self.hand_open_qpos = self.cfg.hand_open_qpos.to(self.device)
+        self._init_hand_close_state(cfg_name)
+
     def _resolve_pose_target(
         self,
         target: Union[ObjectSemantics, torch.Tensor],
@@ -238,6 +262,7 @@ def _interpolate_hand_qpos(
         n_waypoints: int,
     ) -> torch.Tensor:
         """Interpolate hand joint positions between two gripper states."""
+        is_unbatched = start_hand_qpos.dim() == 1 and end_hand_qpos.dim() == 1
         start_hand_qpos = start_hand_qpos.to(self.device)
         end_hand_qpos = end_hand_qpos.to(self.device)
 
@@ -253,20 +278,34 @@ def _interpolate_hand_qpos(
             device=self.device,
             dtype=start_hand_qpos.dtype,
         )
-        return torch.lerp(
+        result = torch.lerp(
             start_hand_qpos.unsqueeze(1),
             end_hand_qpos.unsqueeze(1),
             weights[None, :, None],
         )
+        if is_unbatched:
+            return result.squeeze(0)
+        return result
+
+    def _expand_hand_qpos(self, hand_qpos: torch.Tensor) -> torch.Tensor:
+        """Resolve hand qpos to batched shape ``(n_envs, hand_dof)``."""
+        hand_dof = len(self.hand_joint_ids)
+        hand_qpos = hand_qpos.to(device=self.device, dtype=torch.float32)
+        if hand_qpos.shape == (hand_dof,):
+            return hand_qpos.unsqueeze(0).repeat(self.n_envs, 1)
+        if hand_qpos.shape == (self.n_envs, hand_dof):
+            return hand_qpos
+        logger.log_error(
+            f"hand_qpos must have shape ({hand_dof},) or "
+            f"({self.n_envs}, {hand_dof}), but got {hand_qpos.shape}",
+            ValueError,
+        )
 
-    @staticmethod
-    def _invert_pose(pose: torch.Tensor) -> torch.Tensor:
-        """Invert a batched homogeneous transform."""
-        inv_pose = pose.clone()
-        rot_t = pose[:, :3, :3].transpose(1, 2)
-        inv_pose[:, :3, :3] = rot_t
-        inv_pose[:, :3, 3] = -torch.bmm(rot_t, pose[:, :3, 3:4]).squeeze(-1)
-        return inv_pose
+    def _repeat_hand_qpos(
+        self, hand_qpos: torch.Tensor, n_waypoints: int
+    ) -> torch.Tensor:
+        """Repeat hand qpos across trajectory waypoints."""
+        return self._expand_hand_qpos(hand_qpos).unsqueeze(1).repeat(1, n_waypoints, 1)
 
     def execute(
         self,
@@ -327,6 +366,8 @@ class PickUpActionCfg(GraspActionCfg):
 
 
 class PickUpAction(MoveAction):
+    updates_held_object_state = True
+
     def __init__(
         self,
         motion_generator: MotionGenerator,
@@ -344,37 +385,7 @@ def __init__(
         self.cfg = cfg if cfg is not None else self.cfg
         self._held_object_state: HeldObjectState | None = None
         self.approach_direction = self.cfg.approach_direction.to(self.device)
-        if self.cfg.hand_open_qpos is None:
-            logger.log_error("hand_open_qpos must be specified in PickUpActionCfg")
-        if self.cfg.hand_close_qpos is None:
-            logger.log_error("hand_close_qpos must be specified in PickUpActionCfg")
-        self.hand_open_qpos = self.cfg.hand_open_qpos.to(self.device)
-        self.hand_close_qpos = self.cfg.hand_close_qpos.to(self.device)
-
-        self.hand_joint_ids = self.robot.get_joint_ids(name=self.cfg.hand_control_part)
-        self.joint_ids = self.arm_joint_ids + self.hand_joint_ids
-        self.arm_dof = len(self.arm_joint_ids)
-        self.dof = len(self.joint_ids)
-
-    def _expand_hand_qpos(self, hand_qpos: torch.Tensor) -> torch.Tensor:
-        """Resolve hand qpos to batched shape ``(n_envs, hand_dof)``."""
-        hand_dof = len(self.hand_joint_ids)
-        hand_qpos = hand_qpos.to(device=self.device, dtype=torch.float32)
-        if hand_qpos.shape == (hand_dof,):
-            return hand_qpos.unsqueeze(0).repeat(self.n_envs, 1)
-        if hand_qpos.shape == (self.n_envs, hand_dof):
-            return hand_qpos
-        logger.log_error(
-            f"hand_qpos must have shape ({hand_dof},) or "
-            f"({self.n_envs}, {hand_dof}), but got {hand_qpos.shape}",
-            ValueError,
-        )
-
-    def _repeat_hand_qpos(
-        self, hand_qpos: torch.Tensor, n_waypoints: int
-    ) -> torch.Tensor:
-        """Repeat hand qpos across trajectory waypoints."""
-        return self._expand_hand_qpos(hand_qpos).unsqueeze(1).repeat(1, n_waypoints, 1)
+        self._init_grasp_hand_state("PickUpActionCfg")
 
     def execute(
         self,
@@ -406,6 +417,8 @@ def execute(
             )
 
         if isinstance(is_success, torch.Tensor):
+            # Current engine contract is all-or-nothing; per-env masking is outside
+            # this action's return protocol.
             is_success = torch.all(is_success).item()
         if not is_success:
             logger.log_warning("Failed to resolve grasp pose for all environments.")
@@ -413,7 +426,7 @@ def execute(
 
         if target_semantics is not None:
             obj_poses = target_semantics.entity.get_local_pose(to_matrix=True)
-            object_to_eef = torch.bmm(self._invert_pose(obj_poses), grasp_xpos)
+            object_to_eef = torch.bmm(pose_inv(obj_poses), grasp_xpos)
             self._held_object_state = HeldObjectState(
                 semantics=target_semantics,
                 object_to_eef=object_to_eef,
@@ -519,7 +532,7 @@ def execute(
 
     def _resolve_grasp_pose(
         self, semantics: ObjectSemantics
-    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+    ) -> tuple[torch.Tensor, torch.Tensor]:
         if not isinstance(semantics.affordance, AntipodalAffordance):
             logger.log_error(
                 "Grasp pose affordance must be of type AntipodalAffordance"
@@ -582,744 +595,14 @@ def get_held_object_state(self) -> HeldObjectState | None:
 
 
 @configclass
-class UprightActionCfg(PickUpActionCfg):
-    name: str = "upright"
-    """Name of the action, used for identification and logging."""
-
-    place_clearance: float = 0.005
-    """Clearance (m) between the upright object bottom and the support plane."""
-
-    upright_axis_sign: float = 1.0
-    """Direction of the object's local Z axis after upright placement.
-
-    Use ``1.0`` to align local +Z with world +Z. Use ``-1.0`` when the mesh's
-    local +Z points toward the physical bottom and local -Z should face upward.
-    """
-
-    place_press_depth: float = 0.002
-    """Additional downward displacement (m) after pre-place to make support contact."""
-
-    place_press_steps: int = 4
-    """Number of closed-hand waypoints used for the downward place press."""
-
-    upright_hold_steps: int = 0
-    """Number of closed-hand waypoints to hold after upright placement."""
-
-    place_hold_steps: int = 8
-    """Number of closed-hand waypoints to hold the object after pressing it down."""
-
-    release_interp_steps: int = 12
-    """Number of waypoints for the slow hand release phase."""
-
-    release_retreat_distance: float = 0.08
-    """Horizontal distance (m) to retreat after releasing the upright object."""
-
-    release_retreat_lift: float = 0.01
-    """Small upward offset (m) added during release retreat."""
-
-    use_grasp_width_qpos: bool = False
-    """Whether to map selected grasp open length into a dynamic hand close qpos."""
-
-    gripper_max_open_width: float = 0.088
-    """Maximum total gripper opening width (m) used for width-to-qpos mapping."""
-
-    grasp_squeeze_width: float = 0.003
-    """Width margin (m) subtracted from the selected grasp width before closing."""
-
-    final_approach_steps: int = 12
-    """Number of waypoints for the slow final approach from pre-grasp to grasp."""
-
-    final_approach_preclose_width_margin: float = 0.010
-    """Extra opening width (m) kept around the selected grasp width during final approach."""
-
-    grasp_hold_steps: int = 4
-    """Number of closed-hand waypoints to hold the grasp before lifting."""
-
-    min_dynamic_hand_close_qpos: torch.Tensor | None = None
-    """Optional minimum hand qpos used when mapping grasp width into close qpos."""
-
-    max_grasp_open_length: float | None = None
-    """Optional maximum selected grasp opening length (m) for upright placement."""
-
-    max_grasp_axis_approach_dot: float | None = None
-    """Optional maximum absolute dot between grasp X axis and approach direction."""
-
-    max_grasp_axis_upright_axis_dot: float | None = None
-    """Optional maximum absolute dot between grasp X axis and object upright axis."""
-
-    upright_yaw_offsets: tuple[float, ...] = (
-        0.0,
-        0.5 * np.pi,
-        -0.5 * np.pi,
-        np.pi,
-    )
-    """Yaw offsets (rad) to try after aligning the object upright axis."""
-
-
-class UprightAction(PickUpAction):
-    def __init__(
-        self,
-        motion_generator: MotionGenerator,
-        cfg: UprightActionCfg | None = None,
-    ):
-        """
-        Initialize the atomic action.
-        Args:
-            motion_generator: The motion generator instance to use for planning.
-            cfg: Configuration for the action.
-        """
-        super().__init__(
-            motion_generator, cfg=cfg if cfg is not None else UprightActionCfg()
-        )
-
-    def _resolve_grasp_pose(
-        self, semantics: ObjectSemantics
-    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        if not isinstance(semantics.affordance, AntipodalAffordance):
-            logger.log_error(
-                "Grasp pose affordance must be of type AntipodalAffordance"
-            )
-        if semantics.entity is None:
-            logger.log_error(
-                "ObjectSemantics must be associated with an entity to get object pose"
-            )
-        obj_poses = semantics.entity.get_local_pose(to_matrix=True)
-        if semantics.affordance.generator is None:
-            semantics.affordance._init_generator()
-        generator = semantics.affordance.generator
-        if generator is None:
-            logger.log_error("Failed to initialize antipodal grasp generator")
-
-        n_envs = obj_poses.shape[0]
-        approach_direction = self.approach_direction.to(
-            device=self.device, dtype=torch.float32
-        )
-        approach_direction = approach_direction / approach_direction.norm().clamp(
-            min=1e-6
-        )
-        max_open_length = self.cfg.max_grasp_open_length
-        max_approach_axis_dot = self.cfg.max_grasp_axis_approach_dot
-        max_upright_axis_dot = self.cfg.max_grasp_axis_upright_axis_dot
-
-        is_success = torch.zeros(n_envs, dtype=torch.bool, device=self.device)
-        grasp_xpos = torch.eye(4, dtype=torch.float32, device=self.device).repeat(
-            n_envs, 1, 1
-        )
-        open_length = torch.zeros(n_envs, dtype=torch.float32, device=self.device)
-        init_qpos = self.robot.get_qpos(name=self.cfg.control_part)
-        world_z = torch.tensor([0.0, 0.0, 1.0], device=self.device)
-        upright_obj_pose_candidates = self._build_upright_object_pose_candidates(
-            semantics, obj_poses
-        )
-        selected_upright_obj_poses = upright_obj_pose_candidates[:, 0].clone()
-
-        for env_idx in range(n_envs):
-            (
-                has_candidates,
-                candidate_grasp_xpos,
-                candidate_open_length,
-                candidate_cost,
-            ) = generator.get_valid_grasp_poses(obj_poses[env_idx], approach_direction)
-            if not has_candidates:
-                logger.log_warning(
-                    f"No valid grasp candidates found for {env_idx}-th object."
-                )
-                continue
-
-            candidate_grasp_xpos = candidate_grasp_xpos.to(
-                device=self.device, dtype=torch.float32
-            )
-            candidate_open_length = candidate_open_length.to(
-                device=self.device, dtype=torch.float32
-            )
-            candidate_cost = candidate_cost.to(device=self.device, dtype=torch.float32)
-            candidate_mask = torch.ones(
-                candidate_grasp_xpos.shape[0], dtype=torch.bool, device=self.device
-            )
-            if max_open_length is not None:
-                candidate_mask &= candidate_open_length <= max_open_length
-
-            grasp_axis_dot = torch.abs(
-                (candidate_grasp_xpos[:, :3, 0] * approach_direction).sum(dim=1)
-            )
-            if max_approach_axis_dot is not None:
-                candidate_mask &= grasp_axis_dot <= max_approach_axis_dot
-
-            upright_axis = torch.nn.functional.normalize(
-                obj_poses[env_idx, :3, 2], dim=0
-            )
-            grasp_upright_axis_dot = torch.abs(
-                (candidate_grasp_xpos[:, :3, 0] * upright_axis).sum(dim=1)
-            )
-            if max_upright_axis_dot is not None:
-                candidate_mask &= grasp_upright_axis_dot <= max_upright_axis_dot
-
-            if not bool(torch.any(candidate_mask).item()):
-                logger.log_warning(
-                    "No grasp candidates remain after upright grasp filtering "
-                    f"for {env_idx}-th object."
-                )
-                continue
-
-            candidate_grasp_xpos = candidate_grasp_xpos[candidate_mask]
-            candidate_open_length = candidate_open_length[candidate_mask]
-            candidate_cost = candidate_cost[candidate_mask]
-            n_candidate = candidate_grasp_xpos.shape[0]
-
-            pre_grasp_xpos = self._apply_offset(
-                pose=candidate_grasp_xpos,
-                offset=-candidate_grasp_xpos[:, :3, 2] * self.cfg.pre_grasp_distance,
-            )
-            lift_xpos = self._apply_offset(
-                pose=candidate_grasp_xpos,
-                offset=world_z * self.cfg.lift_height,
-            )
-            obj_pose_repeat = obj_poses[env_idx].unsqueeze(0).repeat(n_candidate, 1, 1)
-            obj_to_grasp = torch.bmm(
-                self._invert_pose(obj_pose_repeat), candidate_grasp_xpos
-            )
-
-            base_ik_success = torch.ones(
-                n_candidate, dtype=torch.bool, device=self.device
-            )
-            qpos_seed = init_qpos[env_idx : env_idx + 1, None, :].repeat(
-                1, n_candidate, 1
-            )
-            for target_xpos in (
-                pre_grasp_xpos,
-                candidate_grasp_xpos,
-                lift_xpos,
-            ):
-                target_success, target_qpos = self.robot.compute_batch_ik(
-                    pose=target_xpos.unsqueeze(0),
-                    name=self.cfg.control_part,
-                    joint_seed=qpos_seed,
-                    env_ids=[env_idx],
-                )
-                base_ik_success &= target_success[0]
-                qpos_seed = target_qpos
-
-            n_upright_pose = upright_obj_pose_candidates.shape[1]
-            upright_obj_pose_repeat = (
-                upright_obj_pose_candidates[env_idx]
-                .unsqueeze(1)
-                .repeat(1, n_candidate, 1, 1)
-                .reshape(-1, 4, 4)
-            )
-            obj_to_grasp_repeat = (
-                obj_to_grasp.unsqueeze(0)
-                .repeat(n_upright_pose, 1, 1, 1)
-                .reshape(-1, 4, 4)
-            )
-            upright_lift_obj_xpos = self._apply_offset(
-                pose=upright_obj_pose_repeat,
-                offset=world_z * self.cfg.lift_height,
-            )
-            upright_lift_xpos = torch.bmm(upright_lift_obj_xpos, obj_to_grasp_repeat)
-            upright_place_xpos = torch.bmm(upright_obj_pose_repeat, obj_to_grasp_repeat)
-            press_xpos = self._apply_offset(
-                pose=upright_place_xpos,
-                offset=-world_z
-                * (self.cfg.place_clearance + self.cfg.place_press_depth),
-            )
-
-            ik_success = base_ik_success.repeat(n_upright_pose)
-            upright_qpos_seed = qpos_seed.repeat(1, n_upright_pose, 1)
-            for target_xpos in (upright_lift_xpos, upright_place_xpos, press_xpos):
-                target_success, target_qpos = self.robot.compute_batch_ik(
-                    pose=target_xpos.unsqueeze(0),
-                    name=self.cfg.control_part,
-                    joint_seed=upright_qpos_seed,
-                    env_ids=[env_idx],
-                )
-                ik_success &= target_success[0]
-                upright_qpos_seed = target_qpos
-
-            flat_candidate_cost = candidate_cost.repeat(n_upright_pose)
-            masked_cost = torch.where(
-                ik_success,
-                flat_candidate_cost,
-                torch.full_like(flat_candidate_cost, float("inf")),
-            )
-            best_cost, best_flat_idx = masked_cost.min(dim=0)
-            best_upright_idx = torch.div(
-                best_flat_idx, n_candidate, rounding_mode="floor"
-            )
-            best_idx = best_flat_idx % n_candidate
-
-            if not torch.isfinite(best_cost):
-                logger.log_warning(
-                    "No upright grasp candidates remain after IK feasibility "
-                    f"filtering for {env_idx}-th object."
-                )
-                continue
-
-            is_success[env_idx] = True
-            grasp_xpos[env_idx] = candidate_grasp_xpos[best_idx]
-            open_length[env_idx] = candidate_open_length[best_idx]
-            selected_upright_obj_poses[env_idx] = upright_obj_pose_candidates[
-                env_idx, best_upright_idx
-            ]
-
-        self._selected_upright_obj_xpos = selected_upright_obj_poses
-        return is_success, grasp_xpos, open_length
-
-    @staticmethod
-    def _invert_pose(pose: torch.Tensor) -> torch.Tensor:
-        """Invert a batched homogeneous transform."""
-        inv_pose = pose.clone()
-        rot_t = pose[:, :3, :3].transpose(1, 2)
-        inv_pose[:, :3, :3] = rot_t
-        inv_pose[:, :3, 3] = -torch.bmm(rot_t, pose[:, :3, 3:4]).squeeze(-1)
-        return inv_pose
-
-    def _build_upright_object_pose(
-        self, semantics: ObjectSemantics, obj_poses: torch.Tensor
-    ) -> torch.Tensor:
-        """Build a target object pose whose configured local Z direction is upright."""
-        world_z = torch.tensor([0.0, 0.0, 1.0], device=self.device)
-        axis_sign = 1.0 if self.cfg.upright_axis_sign >= 0.0 else -1.0
-        projected_x = obj_poses[:, :3, 0].clone()
-        projected_x[:, 2] = 0.0
-        projected_x_norm = projected_x.norm(dim=1, keepdim=True)
-
-        fallback_x = obj_poses[:, :3, 1].clone()
-        fallback_x[:, 2] = 0.0
-        fallback_x_norm = fallback_x.norm(dim=1, keepdim=True)
-        fallback_x = fallback_x / fallback_x_norm.clamp(min=1e-6)
-
-        default_x = torch.tensor([1.0, 0.0, 0.0], device=self.device).repeat(
-            self.n_envs, 1
-        )
-        upright_x = torch.where(
-            projected_x_norm > 1e-6,
-            projected_x / projected_x_norm.clamp(min=1e-6),
-            torch.where(fallback_x_norm > 1e-6, fallback_x, default_x),
-        )
-        upright_z = axis_sign * world_z.repeat(self.n_envs, 1)
-        upright_y = torch.cross(upright_z, upright_x, dim=1)
-        upright_y = upright_y / upright_y.norm(dim=1, keepdim=True).clamp(min=1e-6)
-        upright_x = torch.cross(upright_y, upright_z, dim=1)
-        upright_x = upright_x / upright_x.norm(dim=1, keepdim=True).clamp(min=1e-6)
-
-        upright_pose = obj_poses.clone()
-        upright_pose[:, :3, 0] = upright_x
-        upright_pose[:, :3, 1] = upright_y
-        upright_pose[:, :3, 2] = upright_z
-
-        mesh_vertices = semantics.geometry.get("mesh_vertices")
-        if isinstance(mesh_vertices, torch.Tensor) and mesh_vertices.numel() > 0:
-            mesh_vertices = mesh_vertices.to(device=self.device, dtype=torch.float32)
-            vertical_offsets = torch.matmul(
-                mesh_vertices, upright_pose[:, 2, :3].transpose(0, 1)
-            )
-            local_bottom_z = vertical_offsets.min(dim=0).values
-            upright_pose[:, 2, 3] = self.cfg.place_clearance - local_bottom_z
-        return upright_pose
-
-    def _build_upright_object_pose_candidates(
-        self, semantics: ObjectSemantics, obj_poses: torch.Tensor
-    ) -> torch.Tensor:
-        """Build upright target poses with alternative yaw rotations."""
-        base_pose = self._build_upright_object_pose(semantics, obj_poses)
-        yaw_offsets = torch.as_tensor(
-            self.cfg.upright_yaw_offsets, device=self.device, dtype=torch.float32
-        )
-        cos_yaw = torch.cos(yaw_offsets).view(1, -1, 1)
-        sin_yaw = torch.sin(yaw_offsets).view(1, -1, 1)
-
-        base_x = base_pose[:, None, :3, 0]
-        base_y = base_pose[:, None, :3, 1]
-        candidates = base_pose[:, None, :, :].repeat(1, yaw_offsets.numel(), 1, 1)
-        candidates[:, :, :3, 0] = cos_yaw * base_x + sin_yaw * base_y
-        candidates[:, :, :3, 1] = -sin_yaw * base_x + cos_yaw * base_y
-        return candidates
-
-    def _compute_hand_qpos_for_width(self, target_width: torch.Tensor) -> torch.Tensor:
-        """Map desired total gripper width to batched hand qpos."""
-        target_width = target_width.to(device=self.device, dtype=torch.float32).view(
-            self.n_envs, 1
-        )
-        target_width = target_width.clamp(min=0.0, max=self.cfg.gripper_max_open_width)
-        closing_distance = 0.5 * (self.cfg.gripper_max_open_width - target_width).clamp(
-            min=0.0
-        )
-        hand_qpos_limits = self.robot.get_qpos_limits(
-            name=self.cfg.hand_control_part
-        ).to(self.device)
-        lower_limits = hand_qpos_limits[:, :, 0]
-        upper_limits = hand_qpos_limits[:, :, 1]
-        hand_open_qpos = self._expand_hand_qpos(self.hand_open_qpos)
-        dynamic_qpos = hand_open_qpos + closing_distance.repeat(
-            1, len(self.hand_joint_ids)
-        )
-        return torch.max(torch.min(dynamic_qpos, upper_limits), lower_limits)
-
-    def _compute_dynamic_hand_close_qpos(
-        self, grasp_open_length: torch.Tensor
-    ) -> torch.Tensor:
-        """Map selected grasp width to batched hand close qpos for parallel grippers."""
-        fallback_qpos = self._expand_hand_qpos(self.hand_close_qpos)
-        if not self.cfg.use_grasp_width_qpos:
-            return fallback_qpos
-
-        grasp_open_length = grasp_open_length.to(
-            device=self.device, dtype=torch.float32
-        ).view(self.n_envs, 1)
-        target_width = (grasp_open_length - self.cfg.grasp_squeeze_width).clamp(min=0.0)
-        dynamic_qpos = self._compute_hand_qpos_for_width(target_width)
-        if self.cfg.min_dynamic_hand_close_qpos is not None:
-            min_close_qpos = self._expand_hand_qpos(
-                self.cfg.min_dynamic_hand_close_qpos
-            )
-            dynamic_qpos = torch.max(dynamic_qpos, min_close_qpos)
-        return dynamic_qpos
-
-    def _compute_final_approach_hand_qpos(
-        self, grasp_open_length: torch.Tensor, hand_close_qpos: torch.Tensor
-    ) -> torch.Tensor:
-        """Pre-close the gripper during final approach without reaching squeeze force."""
-        hand_open_qpos = self._expand_hand_qpos(self.hand_open_qpos)
-        if not self.cfg.use_grasp_width_qpos:
-            return hand_open_qpos
-
-        grasp_open_length = grasp_open_length.to(
-            device=self.device, dtype=torch.float32
-        ).view(self.n_envs, 1)
-        target_width = grasp_open_length + self.cfg.final_approach_preclose_width_margin
-        preclose_qpos = self._compute_hand_qpos_for_width(target_width)
-        return torch.max(torch.min(preclose_qpos, hand_close_qpos), hand_open_qpos)
-
-    def execute(
-        self,
-        target: Union[ObjectSemantics, torch.Tensor],
-        start_qpos: Optional[torch.Tensor] = None,
-        **kwargs,
-    ) -> tuple[bool, torch.Tensor, list[float]]:
-        """Pick up an object, rotate it upright, place it down, and release it."""
-        if not isinstance(target, ObjectSemantics):
-            return super().execute(target=target, start_qpos=start_qpos, **kwargs)
-
-        is_success, grasp_xpos, grasp_open_length = self._resolve_grasp_pose(target)
-        obj_poses = target.entity.get_local_pose(to_matrix=True)
-        if not torch.all(is_success).item():
-            logger.log_warning(
-                "Failed to resolve upright grasp pose for all environments."
-            )
-            return False, torch.empty(0), self.joint_ids
-
-        world_z = torch.tensor([0, 0, 1], device=self.device, dtype=torch.float32)
-        hand_close_qpos = self._compute_dynamic_hand_close_qpos(grasp_open_length)
-        final_approach_hand_qpos = self._compute_final_approach_hand_qpos(
-            grasp_open_length, hand_close_qpos
-        )
-        pre_grasp_xpos = self._apply_offset(
-            pose=grasp_xpos,
-            offset=-grasp_xpos[:, :3, 2] * self.cfg.pre_grasp_distance,
-        )
-        lift_xpos = self._apply_offset(
-            pose=grasp_xpos,
-            offset=world_z * self.cfg.lift_height,
-        )
-
-        obj_to_grasp = torch.bmm(self._invert_pose(obj_poses), grasp_xpos)
-        upright_obj_xpos = getattr(self, "_selected_upright_obj_xpos", None)
-        if upright_obj_xpos is None or upright_obj_xpos.shape != obj_poses.shape:
-            upright_obj_xpos = self._build_upright_object_pose(target, obj_poses)
-        upright_lift_obj_xpos = self._apply_offset(
-            pose=upright_obj_xpos,
-            offset=world_z * self.cfg.lift_height,
-        )
-        upright_lift_xpos = torch.bmm(upright_lift_obj_xpos, obj_to_grasp)
-        upright_place_xpos = torch.bmm(upright_obj_xpos, obj_to_grasp)
-        press_down_distance = self.cfg.place_clearance + self.cfg.place_press_depth
-        press_xpos = self._apply_offset(
-            pose=upright_place_xpos,
-            offset=-world_z * press_down_distance,
-        )
-        retreat_direction = -press_xpos[:, :3, 2]
-        retreat_direction[:, 2] = 0.0
-        retreat_direction_norm = retreat_direction.norm(dim=1, keepdim=True)
-        retreat_direction = torch.where(
-            retreat_direction_norm > 1e-6,
-            retreat_direction / retreat_direction_norm.clamp(min=1e-6),
-            -press_xpos[:, :3, 0],
-        )
-        retreat_direction[:, 2] = 0.0
-        retreat_direction = retreat_direction / retreat_direction.norm(
-            dim=1, keepdim=True
-        ).clamp(min=1e-6)
-        retreat_offset = (
-            retreat_direction * self.cfg.release_retreat_distance
-            + world_z * self.cfg.release_retreat_lift
-        )
-        retreat_xpos = self._apply_offset(
-            pose=press_xpos,
-            offset=retreat_offset,
-        )
-
-        start_qpos = self._resolve_start_qpos(start_qpos, self.arm_dof)
-        n_close_waypoint = self.cfg.hand_interp_steps
-        n_final_approach_waypoint = max(2, self.cfg.final_approach_steps)
-        n_grasp_hold_waypoint = max(0, self.cfg.grasp_hold_steps)
-        n_press_waypoint = max(2, self.cfg.place_press_steps)
-        n_upright_hold_waypoint = max(0, self.cfg.upright_hold_steps)
-        n_hold_waypoint = max(0, self.cfg.place_hold_steps)
-        n_open_waypoint = max(2, self.cfg.release_interp_steps)
-        motion_waypoints = (
-            self.cfg.sample_interval
-            - n_close_waypoint
-            - n_final_approach_waypoint
-            - n_grasp_hold_waypoint
-            - n_upright_hold_waypoint
-            - n_press_waypoint
-            - n_hold_waypoint
-            - n_open_waypoint
-        )
-        if motion_waypoints < 6:
-            logger.log_error(
-                "Not enough waypoints for upright action. Please increase "
-                "sample_interval or decrease hand/press/upright-hold/hold/release "
-                "steps.",
-                ValueError,
-            )
-        n_pre_approach_waypoint = max(2, int(np.round(motion_waypoints * 0.25)))
-        n_upright_waypoint = max(2, int(np.round(motion_waypoints * 0.60)))
-        n_retreat_waypoint = (
-            self.cfg.sample_interval
-            - n_close_waypoint
-            - n_final_approach_waypoint
-            - n_grasp_hold_waypoint
-            - n_upright_hold_waypoint
-            - n_press_waypoint
-            - n_hold_waypoint
-            - n_open_waypoint
-            - n_pre_approach_waypoint
-            - n_upright_waypoint
-        )
-        if n_retreat_waypoint < 2:
-            retreat_deficit = 2 - n_retreat_waypoint
-            n_retreat_waypoint = 2
-            n_upright_waypoint = max(2, n_upright_waypoint - retreat_deficit)
-        target_states_list = [
-            [
-                PlanState(xpos=pre_grasp_xpos[i], move_type=MoveType.EEF_MOVE),
-            ]
-            for i in range(self.n_envs)
-        ]
-        is_success, plan_traj = self._plan_arm_trajectory(
-            target_states_list,
-            start_qpos,
-            n_pre_approach_waypoint,
-            self.arm_dof,
-        )
-        if not is_success:
-            logger.log_warning("Failed to plan approach trajectory.")
-            return False, torch.empty(0), self.joint_ids
-        approach_trajectory = torch.zeros(
-            size=(self.n_envs, n_pre_approach_waypoint, self.dof),
-            dtype=torch.float32,
-            device=self.device,
-        )
-        approach_trajectory[:, :, : self.arm_dof] = plan_traj
-        approach_trajectory[:, :, self.arm_dof :] = self._repeat_hand_qpos(
-            self.hand_open_qpos, n_pre_approach_waypoint
-        )
-
-        pre_grasp_qpos = approach_trajectory[:, -1, : self.arm_dof]
-        target_states_list = [
-            [PlanState(xpos=grasp_xpos[i], move_type=MoveType.EEF_MOVE)]
-            for i in range(self.n_envs)
-        ]
-        is_success, plan_traj = self._plan_arm_trajectory(
-            target_states_list,
-            pre_grasp_qpos,
-            n_final_approach_waypoint,
-            self.arm_dof,
-        )
-        if not is_success:
-            logger.log_warning("Failed to plan final approach trajectory.")
-            return False, torch.empty(0), self.joint_ids
-        final_approach_trajectory = torch.zeros(
-            size=(self.n_envs, n_final_approach_waypoint, self.dof),
-            dtype=torch.float32,
-            device=self.device,
-        )
-        final_approach_trajectory[:, :, : self.arm_dof] = plan_traj
-        final_approach_hand_path = self._interpolate_hand_qpos(
-            self.hand_open_qpos,
-            final_approach_hand_qpos,
-            n_final_approach_waypoint,
-        )
-        final_approach_trajectory[:, :, self.arm_dof :] = final_approach_hand_path
-
-        grasp_qpos = final_approach_trajectory[:, -1, : self.arm_dof]
-        hand_close_path = self._interpolate_hand_qpos(
-            final_approach_hand_qpos,
-            hand_close_qpos,
-            n_close_waypoint,
-        )
-        close_trajectory = torch.zeros(
-            size=(self.n_envs, n_close_waypoint, self.dof),
-            dtype=torch.float32,
-            device=self.device,
-        )
-        close_trajectory[:, :, : self.arm_dof] = grasp_qpos.unsqueeze(1)
-        close_trajectory[:, :, self.arm_dof :] = hand_close_path
-
-        closed_grasp_qpos = close_trajectory[:, -1, : self.arm_dof]
-        grasp_hold_trajectory = torch.zeros(
-            size=(self.n_envs, n_grasp_hold_waypoint, self.dof),
-            dtype=torch.float32,
-            device=self.device,
-        )
-        if n_grasp_hold_waypoint > 0:
-            grasp_hold_trajectory[:, :, : self.arm_dof] = closed_grasp_qpos.unsqueeze(1)
-            grasp_hold_trajectory[:, :, self.arm_dof :] = self._repeat_hand_qpos(
-                hand_close_qpos, n_grasp_hold_waypoint
-            )
-
-        target_states_list = [
-            [
-                PlanState(xpos=lift_xpos[i], move_type=MoveType.EEF_MOVE),
-                PlanState(xpos=upright_lift_xpos[i], move_type=MoveType.EEF_MOVE),
-                PlanState(xpos=upright_place_xpos[i], move_type=MoveType.EEF_MOVE),
-            ]
-            for i in range(self.n_envs)
-        ]
-        is_success, plan_traj = self._plan_arm_trajectory(
-            target_states_list,
-            closed_grasp_qpos,
-            n_upright_waypoint,
-            self.arm_dof,
-        )
-        if not is_success:
-            logger.log_warning("Failed to plan upright trajectory.")
-            return False, torch.empty(0), self.joint_ids
-        upright_trajectory = torch.zeros(
-            size=(self.n_envs, n_upright_waypoint, self.dof),
-            dtype=torch.float32,
-            device=self.device,
-        )
-        upright_trajectory[:, :, : self.arm_dof] = plan_traj
-        upright_trajectory[:, :, self.arm_dof :] = self._repeat_hand_qpos(
-            hand_close_qpos, n_upright_waypoint
-        )
-
-        place_qpos = upright_trajectory[:, -1, : self.arm_dof]
-        upright_hold_trajectory = torch.zeros(
-            size=(self.n_envs, n_upright_hold_waypoint, self.dof),
-            dtype=torch.float32,
-            device=self.device,
-        )
-        if n_upright_hold_waypoint > 0:
-            upright_hold_trajectory[:, :, : self.arm_dof] = place_qpos.unsqueeze(1)
-            upright_hold_trajectory[:, :, self.arm_dof :] = self._repeat_hand_qpos(
-                hand_close_qpos, n_upright_hold_waypoint
-            )
-
-        target_states_list = [
-            [PlanState(xpos=press_xpos[i], move_type=MoveType.EEF_MOVE)]
-            for i in range(self.n_envs)
-        ]
-        is_success, plan_traj = self._plan_arm_trajectory(
-            target_states_list,
-            place_qpos,
-            n_press_waypoint,
-            self.arm_dof,
-        )
-        if not is_success:
-            logger.log_warning("Failed to plan place press trajectory.")
-            return False, torch.empty(0), self.joint_ids
-        press_trajectory = torch.zeros(
-            size=(self.n_envs, n_press_waypoint, self.dof),
-            dtype=torch.float32,
-            device=self.device,
-        )
-        press_trajectory[:, :, : self.arm_dof] = plan_traj
-        press_trajectory[:, :, self.arm_dof :] = self._repeat_hand_qpos(
-            hand_close_qpos, n_press_waypoint
-        )
-
-        press_qpos = press_trajectory[:, -1, : self.arm_dof]
-        hold_trajectory = torch.zeros(
-            size=(self.n_envs, n_hold_waypoint, self.dof),
-            dtype=torch.float32,
-            device=self.device,
-        )
-        if n_hold_waypoint > 0:
-            hold_trajectory[:, :, : self.arm_dof] = press_qpos.unsqueeze(1)
-            hold_trajectory[:, :, self.arm_dof :] = self._repeat_hand_qpos(
-                hand_close_qpos, n_hold_waypoint
-            )
-
-        hand_open_path = self._interpolate_hand_qpos(
-            hand_close_qpos,
-            self.hand_open_qpos,
-            n_open_waypoint,
-        )
-        open_trajectory = torch.zeros(
-            size=(self.n_envs, n_open_waypoint, self.dof),
-            dtype=torch.float32,
-            device=self.device,
-        )
-        open_trajectory[:, :, : self.arm_dof] = press_qpos.unsqueeze(1)
-        open_trajectory[:, :, self.arm_dof :] = hand_open_path
-
-        target_states_list = [
-            [PlanState(xpos=retreat_xpos[i], move_type=MoveType.EEF_MOVE)]
-            for i in range(self.n_envs)
-        ]
-        is_success, plan_traj = self._plan_arm_trajectory(
-            target_states_list,
-            press_qpos,
-            n_retreat_waypoint,
-            self.arm_dof,
-        )
-        if not is_success:
-            logger.log_warning("Failed to plan retreat trajectory.")
-            return False, torch.empty(0), self.joint_ids
-        retreat_trajectory = torch.zeros(
-            size=(self.n_envs, n_retreat_waypoint, self.dof),
-            dtype=torch.float32,
-            device=self.device,
-        )
-        retreat_trajectory[:, :, : self.arm_dof] = plan_traj
-        retreat_trajectory[:, :, self.arm_dof :] = self._repeat_hand_qpos(
-            self.hand_open_qpos, n_retreat_waypoint
-        )
-
-        trajectory = torch.cat(
-            [
-                approach_trajectory,
-                final_approach_trajectory,
-                close_trajectory,
-                grasp_hold_trajectory,
-                upright_trajectory,
-                upright_hold_trajectory,
-                press_trajectory,
-                hold_trajectory,
-                open_trajectory,
-                retreat_trajectory,
-            ],
-            dim=1,
-        )
-        return True, trajectory, self.joint_ids
-
-
-@configclass
-class MoveObjectActionCfg(MoveActionCfg):
+class MoveObjectActionCfg(HandCloseActionCfg):
     name: str = "move_object"
     """Name of the action, used for identification and logging."""
 
-    hand_close_qpos: torch.Tensor | None = None
-    """[hand_dof,] or [n_envs, hand_dof] joint positions for keeping the hand closed."""
-
-    hand_control_part: str = "hand"
-    """Name of the robot part that controls the hand joints."""
-
 
 class MoveObjectAction(MoveAction):
+    updates_held_object_state = True
+
     def __init__(
         self,
         motion_generator: MotionGenerator,
@@ -1336,15 +619,7 @@ def __init__(
         )
         self.cfg = cfg if cfg is not None else self.cfg
         self._held_object_state: HeldObjectState | None = None
-        if self.cfg.hand_close_qpos is None:
-            logger.log_error("hand_close_qpos must be specified in MoveObjectActionCfg")
-        self.hand_close_qpos = self.cfg.hand_close_qpos.to(self.device)
-
-        self.hand_joint_ids = self.robot.get_joint_ids(name=self.cfg.hand_control_part)
-        self.joint_ids = self.arm_joint_ids + self.hand_joint_ids
-        self.arm_dof = len(self.arm_joint_ids)
-        self.hand_dof = len(self.hand_joint_ids)
-        self.dof = len(self.joint_ids)
+        self._init_hand_close_state("MoveObjectActionCfg")
 
     def _resolve_move_object_target(
         self,
@@ -1397,19 +672,6 @@ def _resolve_move_object_target(
         move_object_xpos = torch.bmm(object_target_pose, object_to_eef)
         return True, move_object_xpos, held_state
 
-    def _repeat_hand_close_qpos(self, n_waypoints: int) -> torch.Tensor:
-        """Repeat the closed-hand target across trajectory waypoints."""
-        hand_qpos = self.hand_close_qpos.to(device=self.device, dtype=torch.float32)
-        if hand_qpos.shape == (self.hand_dof,):
-            hand_qpos = hand_qpos.unsqueeze(0).repeat(self.n_envs, 1)
-        if hand_qpos.shape != (self.n_envs, self.hand_dof):
-            logger.log_error(
-                f"hand_close_qpos must have shape ({self.hand_dof},) or "
-                f"({self.n_envs}, {self.hand_dof}), but got {hand_qpos.shape}",
-                ValueError,
-            )
-        return hand_qpos.unsqueeze(1).repeat(1, n_waypoints, 1)
-
     def execute(
         self,
         target: MoveObjectTarget,
@@ -1450,7 +712,8 @@ def execute(
             logger.log_warning("Failed to plan move_object trajectory.")
             return False, trajectory, self.joint_ids
         trajectory[:, :, : self.arm_dof] = plan_traj
-        trajectory[:, :, self.arm_dof :] = self._repeat_hand_close_qpos(
+        trajectory[:, :, self.arm_dof :] = self._repeat_hand_qpos(
+            self.hand_close_qpos,
             self.cfg.sample_interval
         )
         return True, trajectory, self.joint_ids
@@ -1471,6 +734,8 @@ class PlaceActionCfg(GraspActionCfg):
 
 
 class PlaceAction(MoveAction):
+    updates_held_object_state = True
+
     def __init__(
         self,
         motion_generator: MotionGenerator,
@@ -1487,17 +752,7 @@ def __init__(
         )
         self.cfg = cfg if cfg is not None else self.cfg
         self._held_object_state: HeldObjectState | None = None
-        if self.cfg.hand_open_qpos is None:
-            logger.log_error("hand_open_qpos must be specified in PlaceActionCfg")
-        if self.cfg.hand_close_qpos is None:
-            logger.log_error("hand_close_qpos must be specified in PlaceActionCfg")
-        self.hand_open_qpos = self.cfg.hand_open_qpos.to(self.device)
-        self.hand_close_qpos = self.cfg.hand_close_qpos.to(self.device)
-
-        self.hand_joint_ids = self.robot.get_joint_ids(name=self.cfg.hand_control_part)
-        self.joint_ids = self.arm_joint_ids + self.hand_joint_ids
-        self.arm_dof = len(self.arm_joint_ids)
-        self.dof = len(self.joint_ids)
+        self._init_grasp_hand_state("PlaceActionCfg")
 
     def execute(
         self,
diff --git a/embodichain/lab/sim/atomic_actions/core.py b/embodichain/lab/sim/atomic_actions/core.py
index 4c3c1e48..cac89de8 100644
--- a/embodichain/lab/sim/atomic_actions/core.py
+++ b/embodichain/lab/sim/atomic_actions/core.py
@@ -19,7 +19,7 @@
 import torch
 from abc import ABC, abstractmethod
 from dataclasses import dataclass, field
-from typing import Any, Dict, List, Optional, Union, TYPE_CHECKING
+from typing import Any, ClassVar, Dict, List, Optional, Union, TYPE_CHECKING
 
 from embodichain.lab.sim.planners import PlanResult, PlanState, MoveType
 from embodichain.utils import configclass
@@ -375,6 +375,9 @@ class AtomicAction(ABC):
     the existing motion planning infrastructure.
     """
 
+    updates_held_object_state: ClassVar[bool] = False
+    """Whether the engine should read held-object state after this action."""
+
     def __init__(
         self,
         motion_generator: MotionGenerator,
@@ -392,6 +395,10 @@ def __init__(
         self.control_part = cfg.control_part
         self.device = self.robot.device
 
+    def get_held_object_state(self) -> HeldObjectState | None:
+        """Return held-object state after execution if this action updates it."""
+        return None
+
     @abstractmethod
     def execute(
         self,
diff --git a/embodichain/lab/sim/atomic_actions/engine.py b/embodichain/lab/sim/atomic_actions/engine.py
index 59275f9d..efe03f7a 100644
--- a/embodichain/lab/sim/atomic_actions/engine.py
+++ b/embodichain/lab/sim/atomic_actions/engine.py
@@ -251,9 +251,8 @@ def execute_static(
             if not is_success:
                 return False, all_trajectory
 
-            held_state_getter = getattr(atom_action, "get_held_object_state", None)
-            if callable(held_state_getter):
-                held_state = held_state_getter()
+            if atom_action.updates_held_object_state:
+                held_state = atom_action.get_held_object_state()
                 if held_state is None:
                     self._action_context.pop("held_object_state", None)
                 else:
diff --git a/scripts/tutorials/atomic_action/upright_atomic_action.py b/scripts/tutorials/atomic_action/upright_atomic_action.py
deleted file mode 100644
index 8a5f5d42..00000000
--- a/scripts/tutorials/atomic_action/upright_atomic_action.py
+++ /dev/null
@@ -1,740 +0,0 @@
-# ----------------------------------------------------------------------------
-# Copyright (c) 2021-2026 DexForce Technology Co., Ltd.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ----------------------------------------------------------------------------
-
-"""
-This script demonstrates the creation and simulation of a robot that uprights a fallen
-bottle in a simulated environment using the SimulationManager and atomic actions.
-"""
-
-from __future__ import annotations
-
-import argparse
-import sys
-import time
-from pathlib import Path
-
-_REPO_ROOT = Path(__file__).resolve().parents[3]
-if str(_REPO_ROOT) not in sys.path:
-    sys.path.insert(0, str(_REPO_ROOT))
-
-import torch
-
-from embodichain.lab.sim import SimulationManager, SimulationManagerCfg
-from embodichain.lab.sim.objects import Robot, RigidObject
-from embodichain.lab.sim.shapes import CubeCfg, MeshCfg
-from embodichain.lab.sim.solvers import PytorchSolverCfg
-from embodichain.lab.sim.planners import MotionGenerator, MotionGenCfg, ToppraPlannerCfg
-from embodichain.data import get_data_path
-from embodichain.lab.gym.utils.gym_utils import add_env_launcher_args_to_parser
-from embodichain.utils import logger
-from embodichain.lab.sim.cfg import (
-    RenderCfg,
-    JointDrivePropertiesCfg,
-    RobotCfg,
-    LightCfg,
-    RigidBodyAttributesCfg,
-    RigidObjectCfg,
-    URDFCfg,
-)
-from embodichain.lab.sim.atomic_actions import (
-    AntipodalAffordance,
-    ObjectSemantics,
-    UprightAction,
-    UprightActionCfg,
-)
-from embodichain.toolkits.graspkit.pg_grasp.antipodal_generator import (
-    GraspGenerator,
-    GraspGeneratorCfg,
-    AntipodalSamplerCfg,
-)
-from embodichain.toolkits.graspkit.pg_grasp.gripper_collision_checker import (
-    GripperCollisionCfg,
-)
-
-GRIPPER_URDF_PATH = "DH_PGI_140_80/DH_PGI_140_80.urdf"
-GRIPPER_HAND_JOINT_PATTERN = "GRIPPER_FINGER1_JOINT_1"
-GRIPPER_MAX_OPEN_WIDTH = 0.080
-GRIPPER_FINGER_LENGTH = 0.088
-GRIPPER_ROOT_Z_WIDTH = 0.096
-GRIPPER_Y_THICKNESS = 0.040
-GRIPPER_TCP_Z = 0.15
-PGI_SAMPLE_INTERVAL = 120
-PGI_HAND_CLOSE_STEPS = 12
-PGI_GRASP_HOLD_STEPS = 20
-BOTTLE_LABEL = "bottle"
-BOTTLE_APPROACH_DIRECTION = (0.0, 0.0, -1.0)
-BOTTLE_GRASP_SQUEEZE_WIDTH = 0.020
-BOTTLE_MAX_GRASP_OPEN_LENGTH = 0.060
-BOTTLE_MAX_GRASP_AXIS_APPROACH_DOT = 0.080
-BOTTLE_MAX_GRASP_AXIS_UPRIGHT_AXIS_DOT = 0.35
-BOTTLE_MIN_DYNAMIC_HAND_CLOSE_QPOS = 0.024
-BOTTLE_GRASP_COLLISION_THRESHOLD = -0.004
-
-
-def parse_arguments():
-    """
-    Parse command-line arguments to configure the simulation.
-
-    Returns:
-        argparse.Namespace: Parsed arguments including simulation and rendering
-        options.
-    """
-    parser = argparse.ArgumentParser(
-        description="Create and simulate a robot in SimulationManager"
-    )
-    add_env_launcher_args_to_parser(parser)
-    parser.add_argument(
-        "--n_sample",
-        type=int,
-        default=10000,
-        help="Number of surface samples for antipodal grasp generation.",
-    )
-    parser.add_argument(
-        "--force_reannotate",
-        action="store_true",
-        help=(
-            "Force grasp region re-annotation instead of reusing cached antipodal "
-            "pairs."
-        ),
-    )
-    parser.add_argument(
-        "--debug_hand_state",
-        action="store_true",
-        help="Log planned hand targets and simulated hand qpos during execution.",
-    )
-    parser.add_argument(
-        "--diagnose_grasp",
-        action="store_true",
-        help="Plan once and print grasp/TCP diagnostics without opening the viewer.",
-    )
-    parser.add_argument(
-        "--auto_play",
-        action="store_true",
-        help="Run the viewer demo without waiting for keyboard input.",
-    )
-    return parser.parse_args()
-
-
-def initialize_simulation(args) -> SimulationManager:
-    """
-    Initialize the simulation environment based on the provided arguments.
-
-    Args:
-        args (argparse.Namespace): Parsed command-line arguments.
-
-    Returns:
-        SimulationManager: Configured simulation manager instance.
-    """
-    config = SimulationManagerCfg(
-        headless=True,
-        sim_device=args.device,
-        render_cfg=RenderCfg(renderer=args.renderer),
-        physics_dt=1.0 / 100.0,
-        arena_space=2.5,
-    )
-    sim = SimulationManager(config)
-
-    sim.add_light(
-        cfg=LightCfg(
-            uid="main_light",
-            color=(0.6, 0.6, 0.6),
-            intensity=30.0,
-            init_pos=(1.0, 0, 3.0),
-        )
-    )
-
-    return sim
-
-
-def create_robot(sim: SimulationManager, position=[0.0, 0.0, 0.0]) -> Robot:
-    """
-    Create and configure a robot with an arm and a dexterous hand in the simulation.
-
-    Args:
-        sim (SimulationManager): The simulation manager instance.
-
-    Returns:
-        Robot: The configured robot instance added to the simulation.
-    """
-    # Retrieve URDF paths for the robot arm and hand
-    ur10_urdf_path = get_data_path("UniversalRobots/UR10/UR10.urdf")
-    gripper_urdf_path = get_data_path(GRIPPER_URDF_PATH)
-    # Configure the robot with its components and control properties
-    cfg = RobotCfg(
-        uid="UR10",
-        urdf_cfg=URDFCfg(
-            components=[
-                {"component_type": "arm", "urdf_path": ur10_urdf_path},
-                {"component_type": "hand", "urdf_path": gripper_urdf_path},
-            ]
-        ),
-        drive_pros=JointDrivePropertiesCfg(
-            stiffness={"JOINT[0-9]": 1e4, GRIPPER_HAND_JOINT_PATTERN: 1e3},
-            damping={"JOINT[0-9]": 1e3, GRIPPER_HAND_JOINT_PATTERN: 1e2},
-            max_effort={"JOINT[0-9]": 1e5, GRIPPER_HAND_JOINT_PATTERN: 1e4},
-            drive_type="force",
-        ),
-        control_parts={
-            "arm": ["JOINT[0-9]"],
-            "hand": [GRIPPER_HAND_JOINT_PATTERN],
-        },
-        solver_cfg={
-            "arm": PytorchSolverCfg(
-                end_link_name="ee_link",
-                root_link_name="base_link",
-                tcp=[
-                    [1.0, 0.0, 0.0, 0.0],
-                    [0.0, 1.0, 0.0, 0.0],
-                    [0.0, 0.0, 1.0, GRIPPER_TCP_Z],
-                    [0.0, 0.0, 0.0, 1.0],
-                ],
-            )
-        },
-        init_qpos=[0, -1.57, 1.57, -1.57, -1.57, 0.0, 0.0, 0.0],
-        init_pos=position,
-    )
-    return sim.add_robot(cfg=cfg)
-
-
-def create_fallen_bottle(sim: SimulationManager) -> RigidObject:
-    # Use a slightly smaller and closer bottle for the UR10 gripper demo.
-    bottle_scale = 0.0008
-    bottle_cfg = RigidObjectCfg(
-        uid="bottle",
-        shape=MeshCfg(
-            fpath=get_data_path("ScannedBottle/yibao.ply"),
-        ),
-        attrs=RigidBodyAttributesCfg(
-            mass=0.02,
-            dynamic_friction=0.97,
-            static_friction=0.99,
-        ),
-        max_convex_hull_num=16,
-        init_pos=[-0.4294, -0.0825, -0.0997],
-        init_rot=[90.0, 135.0, 0.0],
-        body_scale=(bottle_scale, bottle_scale, bottle_scale),
-    )
-    return sim.add_rigid_object(cfg=bottle_cfg)
-
-
-def settle_object(sim: SimulationManager, obj: RigidObject, step: int = 5) -> None:
-    """Settle an object through the same explicit sequence on CPU and CUDA."""
-    if sim.device.type == "cuda":
-        sim.init_gpu_physics()
-
-    obj.reset()
-    sim.update(step=step)
-    obj.clear_dynamics()
-
-
-def release_object_after_grasp(obj: RigidObject) -> None:
-    """Clear residual motion after the gripper has closed on the object."""
-    obj.clear_dynamics()
-
-
-def build_grasp_generator_cfg(args: argparse.Namespace) -> GraspGeneratorCfg:
-    return GraspGeneratorCfg(
-        viser_port=11801,
-        antipodal_sampler_cfg=AntipodalSamplerCfg(
-            n_sample=args.n_sample,
-            max_length=GRIPPER_MAX_OPEN_WIDTH,
-            min_length=0.003,
-        ),
-        is_partial_annotate=False,
-        is_filter_ground_collision=False,
-    )
-
-
-def build_gripper_collision_cfg() -> GripperCollisionCfg:
-    return GripperCollisionCfg(
-        max_open_length=GRIPPER_MAX_OPEN_WIDTH,
-        finger_length=GRIPPER_FINGER_LENGTH,
-        y_thickness=GRIPPER_Y_THICKNESS,
-        root_z_width=GRIPPER_ROOT_Z_WIDTH,
-        open_check_margin=0.002,
-        point_sample_dense=0.012,
-    )
-
-
-def get_hand_open_close_qpos(
-    robot: Robot, device: torch.device
-) -> tuple[torch.Tensor, torch.Tensor]:
-    hand_limits = robot.get_qpos_limits(name="hand")[0].to(
-        device=device, dtype=torch.float32
-    )
-    return hand_limits[:, 0], hand_limits[:, 1]
-
-
-def format_tensor(tensor: torch.Tensor) -> str:
-    rounded = (tensor.detach().cpu() * 10000.0).round() / 10000.0
-    return str(rounded.tolist())
-
-
-def log_hand_setup(robot: Robot, hand_open: torch.Tensor, hand_close: torch.Tensor):
-    hand_joint_ids = robot.get_joint_ids(name="hand")
-    hand_joint_names = [robot.joint_names[joint_id] for joint_id in hand_joint_ids]
-    logger.log_info(f"Hand joint ids: {hand_joint_ids}")
-    logger.log_info(f"Hand joint names: {hand_joint_names}")
-    logger.log_info(f"Robot mimic ids: {robot.mimic_ids}")
-    logger.log_info(f"Robot mimic parents: {robot.mimic_parents}")
-    logger.log_info(f"Robot mimic multipliers: {robot.mimic_multipliers}")
-    logger.log_info(f"Robot mimic offsets: {robot.mimic_offsets}")
-    logger.log_info(
-        f"Hand qpos limits: {format_tensor(robot.get_qpos_limits(name='hand')[0])}"
-    )
-    logger.log_info(f"Hand open qpos: {format_tensor(hand_open)}")
-    logger.log_info(f"Hand close qpos: {format_tensor(hand_close)}")
-
-
-def log_hand_execution_state(
-    robot: Robot,
-    step_idx: int,
-    total_steps: int,
-    action_hand_target: torch.Tensor,
-) -> None:
-    sim_hand_target = robot.get_qpos(name="hand", target=True)
-    actual_hand_qpos = robot.get_qpos(name="hand")
-    tracking_error = actual_hand_qpos - sim_hand_target
-    logger.log_info(
-        "Hand state "
-        f"step={step_idx}/{total_steps - 1}, "
-        f"action_target={format_tensor(action_hand_target[0])}, "
-        f"sim_target={format_tensor(sim_hand_target[0])}, "
-        f"actual={format_tensor(actual_hand_qpos[0])}, "
-        f"actual_minus_target={format_tensor(tracking_error[0])}"
-    )
-
-
-def log_object_execution_state(
-    obj: RigidObject,
-    step_idx: int,
-    total_steps: int,
-) -> None:
-    obj_pose = obj.get_local_pose(to_matrix=True)
-    logger.log_info(
-        "Object state "
-        f"step={step_idx}/{total_steps - 1}, "
-        f"pos={format_tensor(obj_pose[0, :3, 3])}, "
-        f"z_axis={format_tensor(obj_pose[0, :3, 2])}"
-    )
-
-
-def get_upright_segment_lengths(action: UprightAction) -> dict[str, int]:
-    n_close = action.cfg.hand_interp_steps
-    n_final = max(2, action.cfg.final_approach_steps)
-    n_hold = max(0, action.cfg.grasp_hold_steps)
-    n_press = max(2, action.cfg.place_press_steps)
-    n_upright_hold = max(0, action.cfg.upright_hold_steps)
-    n_place_hold = max(0, action.cfg.place_hold_steps)
-    n_open = max(2, action.cfg.release_interp_steps)
-    motion_waypoints = (
-        action.cfg.sample_interval
-        - n_close
-        - n_final
-        - n_hold
-        - n_upright_hold
-        - n_press
-        - n_place_hold
-        - n_open
-    )
-    n_pre = max(2, int(round(motion_waypoints * 0.25)))
-    n_upright = max(2, int(round(motion_waypoints * 0.60)))
-    n_retreat = (
-        action.cfg.sample_interval
-        - n_close
-        - n_final
-        - n_hold
-        - n_upright_hold
-        - n_press
-        - n_place_hold
-        - n_open
-        - n_pre
-        - n_upright
-    )
-    if n_retreat < 2:
-        retreat_deficit = 2 - n_retreat
-        n_retreat = 2
-        n_upright = max(2, n_upright - retreat_deficit)
-    return {
-        "pre": n_pre,
-        "final": n_final,
-        "close": n_close,
-        "grasp_hold": n_hold,
-        "upright": n_upright,
-        "upright_hold": n_upright_hold,
-        "press": n_press,
-        "place_hold": n_place_hold,
-        "open": n_open,
-        "retreat": n_retreat,
-    }
-
-
-def log_tcp_alignment(
-    robot: Robot,
-    traj: torch.Tensor,
-    grasp_xpos: torch.Tensor,
-    arm_dof: int,
-    index: int,
-    label: str,
-) -> None:
-    arm_qpos = traj[:, index, :arm_dof]
-    tcp_xpos = robot.compute_fk(qpos=arm_qpos, name="arm", to_matrix=True)
-    pos_error = torch.norm(tcp_xpos[:, :3, 3] - grasp_xpos[:, :3, 3], dim=1)
-    rot_delta = torch.bmm(tcp_xpos[:, :3, :3].transpose(1, 2), grasp_xpos[:, :3, :3])
-    trace = rot_delta[:, 0, 0] + rot_delta[:, 1, 1] + rot_delta[:, 2, 2]
-    rot_error = torch.acos(((trace - 1.0) * 0.5).clamp(-1.0, 1.0))
-    logger.log_info(
-        f"{label}: index={index}, "
-        f"tcp_pos={format_tensor(tcp_xpos[0, :3, 3])}, "
-        f"pos_error={format_tensor(pos_error)}, "
-        f"rot_error_rad={format_tensor(rot_error)}, "
-        f"tcp_rot={format_tensor(tcp_xpos[0, :3, :3])}, "
-        f"target_rot={format_tensor(grasp_xpos[0, :3, :3])}, "
-        f"hand_target={format_tensor(traj[0, index, arm_dof:])}"
-    )
-
-
-def log_selected_gripper_clearance(
-    semantics: ObjectSemantics,
-    obj_pose: torch.Tensor,
-    grasp_xpos: torch.Tensor,
-    grasp_open_length: torch.Tensor,
-) -> None:
-    generator = semantics.affordance.generator
-    if generator is None:
-        return
-
-    collision_checker = getattr(generator, "_collision_checker", None)
-    if collision_checker is None:
-        return
-
-    gripper_pc_world = collision_checker._get_gripper_pc(grasp_xpos, grasp_open_length)
-    ground_height = collision_checker.get_ground_height(obj_pose[0])
-    min_z = gripper_pc_world[:, :, 2].min(dim=1).values
-    max_z = gripper_pc_world[:, :, 2].max(dim=1).values
-    is_colliding, min_distance = collision_checker.query(
-        obj_pose=obj_pose[0],
-        grasp_poses=grasp_xpos,
-        open_lengths=grasp_open_length,
-        is_filter_ground_collision=True,
-        collision_threshold=BOTTLE_GRASP_COLLISION_THRESHOLD,
-    )
-    logger.log_info(f"Selected gripper pc min z: {format_tensor(min_z)}")
-    logger.log_info(f"Selected gripper pc max z: {format_tensor(max_z)}")
-    logger.log_info(f"Selected grasp ground height: {ground_height:.4f}")
-    logger.log_info(
-        f"Selected grasp min collision distance: {format_tensor(min_distance)}"
-    )
-    logger.log_info(
-        f"Selected grasp collision threshold: {BOTTLE_GRASP_COLLISION_THRESHOLD:.4f}"
-    )
-    logger.log_info(
-        f"Selected grasp collision flag: {is_colliding.detach().cpu().tolist()}"
-    )
-
-
-def log_grasp_direction_probe(semantics: ObjectSemantics) -> None:
-    generator = semantics.affordance.generator
-    if generator is None:
-        return
-
-    obj_pose = semantics.entity.get_local_pose(to_matrix=True)[0]
-    hit_point_pairs = getattr(generator, "_hit_point_pairs", None)
-    if hit_point_pairs is None or hit_point_pairs.numel() == 0:
-        logger.log_info("Probe grasp direction: no antipodal pairs available")
-        return
-
-    origin_points = hit_point_pairs[:, 0, :]
-    hit_points = hit_point_pairs[:, 1, :]
-    origin_points_world = generator._apply_transform(origin_points, obj_pose)
-    hit_points_world = generator._apply_transform(hit_points, obj_pose)
-    centers = (origin_points_world + hit_points_world) * 0.5
-    grasp_x = torch.nn.functional.normalize(
-        hit_points_world - origin_points_world, dim=-1
-    )
-    open_lengths = torch.norm(origin_points_world - hit_points_world, dim=-1)
-
-    probe_directions = {
-        "top_down": [0.0, 0.0, -1.0],
-        "from_robot_x": [-1.0, 0.0, 0.0],
-    }
-    for label, direction in probe_directions.items():
-        approach_direction = torch.tensor(
-            direction, dtype=torch.float32, device=generator.device
-        )
-        cos_angle = torch.clamp((grasp_x * approach_direction).sum(dim=-1), -1.0, 1.0)
-        positive_angle = torch.abs(torch.acos(cos_angle))
-        angle_mask = (
-            positive_angle - torch.pi / 2
-        ).abs() <= generator.cfg.max_deviation_angle
-        width_mask = open_lengths <= GRIPPER_MAX_OPEN_WIDTH
-        candidate_mask = angle_mask & width_mask
-        logger.log_info(
-            f"Probe grasp direction {label}: "
-            f"angle_count={int(angle_mask.sum().item())}, "
-            f"width_angle_count={int(candidate_mask.sum().item())}"
-        )
-        if torch.any(candidate_mask):
-            candidate_grasp_poses = GraspGenerator._grasp_pose_from_approach_direction(
-                grasp_x[candidate_mask],
-                approach_direction,
-                centers[candidate_mask],
-            )
-            candidate_open_lengths = open_lengths[candidate_mask]
-            is_colliding, min_distance = generator._collision_checker.query(
-                obj_pose,
-                candidate_grasp_poses,
-                candidate_open_lengths,
-                is_filter_ground_collision=True,
-                collision_threshold=BOTTLE_GRASP_COLLISION_THRESHOLD,
-            )
-            collision_free_count = int((~is_colliding).sum().item())
-            logger.log_info(
-                f"Probe grasp direction {label}: "
-                f"collision_free_count={collision_free_count}, "
-                f"collision_threshold={BOTTLE_GRASP_COLLISION_THRESHOLD:.4f}, "
-                f"min_distance={format_tensor(min_distance.min().unsqueeze(0))}, "
-                f"max_distance={format_tensor(min_distance.max().unsqueeze(0))}"
-            )
-            if collision_free_count > 0:
-                grasp_xpos = candidate_grasp_poses[~is_colliding][0]
-                open_length = candidate_open_lengths[~is_colliding][0]
-                gripper_pc_world = generator._collision_checker._get_gripper_pc(
-                    grasp_xpos.unsqueeze(0),
-                    open_length.unsqueeze(0),
-                )
-                ground_height = generator._collision_checker.get_ground_height(obj_pose)
-                min_z = gripper_pc_world[:, :, 2].min(dim=1).values
-                logger.log_info(
-                    f"Probe grasp direction {label}: "
-                    f"pos={format_tensor(grasp_xpos[:3, 3])}, "
-                    f"open_length={open_length.item():.4f}, "
-                    f"min_z={format_tensor(min_z)}, "
-                    f"ground_height={ground_height:.4f}"
-                )
-
-
-def diagnose_upright_plan(
-    robot: Robot,
-    action: UprightAction,
-    semantics: ObjectSemantics,
-) -> None:
-    is_success, grasp_xpos, grasp_open_length = action._resolve_grasp_pose(semantics)
-    if not torch.all(is_success).item():
-        obj_pose = semantics.entity.get_local_pose(to_matrix=True)
-        logger.log_info(f"Object pos: {format_tensor(obj_pose[0, :3, 3])}")
-        log_grasp_direction_probe(semantics)
-        logger.log_warning("Failed to resolve grasp pose during diagnostics.")
-        return
-
-    hand_close_qpos = action._compute_dynamic_hand_close_qpos(grasp_open_length)
-    final_approach_qpos = action._compute_final_approach_hand_qpos(
-        grasp_open_length, hand_close_qpos
-    )
-    obj_pose = semantics.entity.get_local_pose(to_matrix=True)
-    approach_direction = action.approach_direction / action.approach_direction.norm()
-    grasp_axis_dot = torch.abs((grasp_xpos[:, :3, 0] * approach_direction).sum(dim=1))
-    bottle_axis = torch.nn.functional.normalize(obj_pose[:, :3, 2], dim=1)
-    grasp_axis_bottle_dot = torch.abs((grasp_xpos[:, :3, 0] * bottle_axis).sum(dim=1))
-
-    logger.log_info(f"Object pos: {format_tensor(obj_pose[0, :3, 3])}")
-    logger.log_info(f"Grasp pos: {format_tensor(grasp_xpos[0, :3, 3])}")
-    logger.log_info(f"Grasp rotation columns: {format_tensor(grasp_xpos[0, :3, :3])}")
-    logger.log_info(f"Grasp open length: {format_tensor(grasp_open_length)}")
-    logger.log_info(f"Grasp axis approach dot: {format_tensor(grasp_axis_dot)}")
-    logger.log_info(f"Grasp axis bottle dot: {format_tensor(grasp_axis_bottle_dot)}")
-    log_selected_gripper_clearance(semantics, obj_pose, grasp_xpos, grasp_open_length)
-    logger.log_info(
-        f"Final approach hand qpos: {format_tensor(final_approach_qpos[0])}"
-    )
-    logger.log_info(f"Close hand qpos: {format_tensor(hand_close_qpos[0])}")
-
-    is_success, traj, joint_ids = action.execute(semantics)
-    if not is_success:
-        logger.log_warning("Failed to plan upright trajectory during diagnostics.")
-        return
-
-    arm_dof = len(robot.get_joint_ids(name="arm"))
-    segments = get_upright_segment_lengths(action)
-    logger.log_info(f"Action joint ids: {joint_ids}")
-    logger.log_info(f"Upright trajectory segments: {segments}")
-    logger.log_info(f"Trajectory shape: {tuple(traj.shape)}")
-
-    grasp_idx = segments["pre"] + segments["final"] - 1
-    close_end_idx = grasp_idx + segments["close"]
-    hold_end_idx = close_end_idx + segments["grasp_hold"]
-    log_tcp_alignment(robot, traj, grasp_xpos, arm_dof, grasp_idx, "grasp")
-    log_tcp_alignment(robot, traj, grasp_xpos, arm_dof, close_end_idx, "close_end")
-    log_tcp_alignment(robot, traj, grasp_xpos, arm_dof, hold_end_idx, "hold_end")
-
-
-def create_object_semantics(
-    obj: RigidObject, args: argparse.Namespace
-) -> ObjectSemantics:
-    return ObjectSemantics(
-        label=BOTTLE_LABEL,
-        geometry={
-            "mesh_vertices": obj.get_vertices(env_ids=[0], scale=True)[0],
-            "mesh_triangles": obj.get_triangles(env_ids=[0])[0],
-        },
-        affordance=AntipodalAffordance(
-            object_label=BOTTLE_LABEL,
-            force_reannotate=args.force_reannotate,
-            custom_config={
-                "gripper_collision_cfg": build_gripper_collision_cfg(),
-                "generator_cfg": build_grasp_generator_cfg(args),
-            },
-        ),
-        entity=obj,
-    )
-
-
-def run_upright_demo(
-    args: argparse.Namespace, sim: SimulationManager, robot: Robot
-) -> None:
-
-    sim.open_window()
-
-    obj = create_fallen_bottle(sim)
-    settle_object(sim, obj, step=5)
-    semantics = create_object_semantics(obj, args)
-    motion_gen = MotionGenerator(
-        cfg=MotionGenCfg(planner_cfg=ToppraPlannerCfg(robot_uid=robot.uid))
-    )
-    hand_open, hand_close = get_hand_open_close_qpos(robot, sim.device)
-    if args.debug_hand_state:
-        log_hand_setup(robot, hand_open, hand_close)
-
-    upright_action = UprightAction(
-        motion_generator=motion_gen,
-        cfg=UprightActionCfg(
-            control_part="arm",
-            hand_control_part="hand",
-            hand_open_qpos=hand_open,
-            hand_close_qpos=hand_close,
-            approach_direction=torch.tensor(
-                BOTTLE_APPROACH_DIRECTION, dtype=torch.float32, device=sim.device
-            ),
-            pre_grasp_distance=0.15,
-            lift_height=0.15,
-            sample_interval=PGI_SAMPLE_INTERVAL,
-            hand_interp_steps=PGI_HAND_CLOSE_STEPS,
-            upright_axis_sign=-1.0,
-            place_press_depth=0.0,
-            place_press_steps=4,
-            upright_hold_steps=3,
-            place_hold_steps=8,
-            release_interp_steps=12,
-            release_retreat_distance=0.08,
-            release_retreat_lift=0.01,
-            final_approach_steps=12,
-            final_approach_preclose_width_margin=0.010,
-            grasp_hold_steps=PGI_GRASP_HOLD_STEPS,
-            use_grasp_width_qpos=True,
-            gripper_max_open_width=GRIPPER_MAX_OPEN_WIDTH,
-            max_grasp_open_length=BOTTLE_MAX_GRASP_OPEN_LENGTH,
-            max_grasp_axis_approach_dot=BOTTLE_MAX_GRASP_AXIS_APPROACH_DOT,
-            max_grasp_axis_upright_axis_dot=BOTTLE_MAX_GRASP_AXIS_UPRIGHT_AXIS_DOT,
-            grasp_squeeze_width=BOTTLE_GRASP_SQUEEZE_WIDTH,
-            min_dynamic_hand_close_qpos=torch.full_like(
-                hand_close, BOTTLE_MIN_DYNAMIC_HAND_CLOSE_QPOS
-            ),
-        ),
-    )
-
-    if args.diagnose_grasp:
-        diagnose_upright_plan(robot, upright_action, semantics)
-        return
-
-    if not args.auto_play:
-        input("Inspect the fallen bottle, then press Enter to plan upright...")
-
-    start_time = time.time()
-    is_success, traj, joint_ids = upright_action.execute(semantics)
-    cost_time = time.time() - start_time
-    logger.log_info(f"Plan upright trajectory cost time: {cost_time:.2f} seconds")
-    if not is_success:
-        logger.log_warning("Failed to plan upright trajectory.")
-        return
-
-    arm_dof = len(robot.get_joint_ids(name="arm"))
-    total_steps = traj.shape[1]
-    segments = get_upright_segment_lengths(upright_action)
-    post_grasp_clear_step = segments["pre"] + segments["final"] + segments["close"]
-    should_clear_object_dynamics = True
-    if args.debug_hand_state:
-        joint_names = [robot.joint_names[joint_id] for joint_id in joint_ids]
-        hand_traj = traj[:, :, arm_dof:]
-        logger.log_info(f"Action joint ids: {joint_ids}")
-        logger.log_info(f"Action joint names: {joint_names}")
-        logger.log_info(
-            f"Post-grasp object dynamics clear step: {post_grasp_clear_step}"
-        )
-        logger.log_info(
-            f"Planned hand qpos min: {format_tensor(hand_traj.min(dim=1).values[0])}"
-        )
-        logger.log_info(
-            f"Planned hand qpos max: {format_tensor(hand_traj.max(dim=1).values[0])}"
-        )
-
-    if not args.auto_play:
-        input("Press Enter to start the upright demo...")
-    last_logged_hand_target: torch.Tensor | None = None
-    log_stride = max(1, total_steps // 10)
-    for i in range(traj.shape[1]):
-        robot.set_qpos(traj[:, i, :], joint_ids=joint_ids)
-        sim.update(step=4)
-        if should_clear_object_dynamics and i + 1 >= post_grasp_clear_step:
-            release_object_after_grasp(obj)
-            should_clear_object_dynamics = False
-            if args.debug_hand_state:
-                logger.log_info(
-                    f"Object dynamics cleared at step={i}/{total_steps - 1}"
-                )
-        if args.debug_hand_state:
-            action_hand_target = traj[:, i, arm_dof:]
-            target_changed = last_logged_hand_target is None or not torch.allclose(
-                action_hand_target, last_logged_hand_target, atol=1e-4
-            )
-            should_log = target_changed or i % log_stride == 0 or i == total_steps - 1
-            if should_log:
-                log_hand_execution_state(
-                    robot,
-                    step_idx=i,
-                    total_steps=total_steps,
-                    action_hand_target=action_hand_target,
-                )
-                last_logged_hand_target = action_hand_target.detach().clone()
-            if i % log_stride == 0 or i == total_steps - 1:
-                log_object_execution_state(
-                    obj,
-                    step_idx=i,
-                    total_steps=total_steps,
-                )
-        time.sleep(1e-2)
-    if not args.auto_play:
-        input("Press Enter to exit the simulation...")
-
-
-def main() -> None:
-    args = parse_arguments()
-    sim = initialize_simulation(args)
-    robot = create_robot(sim, position=[0.0, 0.0, 0.0])
-    run_upright_demo(args, sim, robot)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/scripts/tutorials/atomic_action/atomic_actions.py b/scripts/tutorials/sim/atomic_actions.py
similarity index 100%
rename from scripts/tutorials/atomic_action/atomic_actions.py
rename to scripts/tutorials/sim/atomic_actions.py
diff --git a/tests/sim/atomic_actions/test_actions.py b/tests/sim/atomic_actions/test_actions.py
index ba7324cc..af27bb96 100644
--- a/tests/sim/atomic_actions/test_actions.py
+++ b/tests/sim/atomic_actions/test_actions.py
@@ -25,11 +25,15 @@
 from embodichain.lab.sim.atomic_actions.core import (
     ActionCfg,
     Affordance,
+    HeldObjectState,
+    MoveObjectTarget,
     ObjectSemantics,
 )
 from embodichain.lab.sim.atomic_actions.actions import (
     MoveAction,
     MoveActionCfg,
+    MoveObjectAction,
+    MoveObjectActionCfg,
     PickUpAction,
     PickUpActionCfg,
     PlaceAction,
@@ -186,6 +190,15 @@ def test_interpolate_hand_qpos_linear(self):
         expected_mid = torch.tensor([0.5, 0.5])
         assert torch.allclose(result[1], expected_mid, atol=1e-6)
 
+    def test_interpolate_hand_qpos_batched_shape(self):
+        n_waypoints = 4
+        start = torch.zeros(NUM_ENVS, HAND_DOF)
+        end = torch.ones(NUM_ENVS, HAND_DOF)
+        result = self.action._interpolate_hand_qpos(start, end, n_waypoints)
+        assert result.shape == (NUM_ENVS, n_waypoints, HAND_DOF)
+        assert torch.allclose(result[:, 0], start)
+        assert torch.allclose(result[:, -1], end)
+
 
 # ---------------------------------------------------------------------------
 # PickUpAction
@@ -222,6 +235,90 @@ def test_init_sets_hand_joint_ids(self):
         assert action.dof == TOTAL_DOF
 
 
+# ---------------------------------------------------------------------------
+# MoveObjectAction
+# ---------------------------------------------------------------------------
+
+
+class TestMoveObjectAction:
+    """Tests for MoveObjectAction without requiring simulation."""
+
+    def setup_method(self):
+        self.robot = _make_mock_robot()
+        self.mg = _make_mock_motion_generator(self.robot)
+
+    def _make_cfg(self, **overrides):
+        defaults = dict(
+            hand_close_qpos=torch.tensor([0.025, 0.025]),
+            control_part="arm",
+            hand_control_part="hand",
+            sample_interval=5,
+        )
+        defaults.update(overrides)
+        return MoveObjectActionCfg(**defaults)
+
+    def _make_held_state(self) -> HeldObjectState:
+        semantics = ObjectSemantics(affordance=Affordance(), geometry={}, label="box")
+        object_to_eef = torch.eye(4).unsqueeze(0).repeat(NUM_ENVS, 1, 1)
+        object_to_eef[:, 2, 3] = 0.2
+        grasp_xpos = torch.eye(4).unsqueeze(0).repeat(NUM_ENVS, 1, 1)
+        return HeldObjectState(
+            semantics=semantics,
+            object_to_eef=object_to_eef,
+            grasp_xpos=grasp_xpos,
+        )
+
+    def test_init_sets_hand_joint_ids(self):
+        action = MoveObjectAction(self.mg, cfg=self._make_cfg())
+        assert action.hand_joint_ids == list(range(ARM_DOF, ARM_DOF + HAND_DOF))
+        assert action.joint_ids == list(range(ARM_DOF)) + list(
+            range(ARM_DOF, ARM_DOF + HAND_DOF)
+        )
+        assert action.dof == TOTAL_DOF
+
+    def test_resolve_move_object_target_uses_held_transform(self):
+        action = MoveObjectAction(self.mg, cfg=self._make_cfg())
+        target_pose = torch.eye(4)
+        target_pose[0, 3] = 0.3
+        target = MoveObjectTarget(object_target_pose=target_pose)
+        is_success, move_xpos, held_state = action._resolve_move_object_target(
+            target,
+            held_object_state=self._make_held_state(),
+        )
+        assert is_success is True
+        assert held_state is not None
+        assert move_xpos.shape == (NUM_ENVS, 4, 4)
+        assert torch.allclose(move_xpos[:, 0, 3], torch.full((NUM_ENVS,), 0.3))
+        assert torch.allclose(move_xpos[:, 2, 3], torch.full((NUM_ENVS,), 0.2))
+
+    def test_resolve_move_object_target_requires_held_state(self):
+        action = MoveObjectAction(self.mg, cfg=self._make_cfg())
+        with pytest.raises(ValueError, match="requires a HeldObjectState"):
+            action._resolve_move_object_target(MoveObjectTarget(torch.eye(4)))
+
+    def test_execute_pads_closed_hand_and_preserves_held_state(self):
+        cfg = self._make_cfg(sample_interval=4)
+        action = MoveObjectAction(self.mg, cfg=cfg)
+        plan_traj = torch.zeros(NUM_ENVS, cfg.sample_interval, ARM_DOF)
+        action._plan_arm_trajectory = Mock(return_value=(True, plan_traj))
+        held_state = self._make_held_state()
+
+        is_success, trajectory, joint_ids = action.execute(
+            target=MoveObjectTarget(torch.eye(4)),
+            start_qpos=torch.zeros(NUM_ENVS, ARM_DOF),
+            held_object_state=held_state,
+        )
+
+        assert is_success is True
+        assert joint_ids == list(range(TOTAL_DOF))
+        assert trajectory.shape == (NUM_ENVS, cfg.sample_interval, TOTAL_DOF)
+        expected_hand = cfg.hand_close_qpos.expand(
+            NUM_ENVS, cfg.sample_interval, HAND_DOF
+        )
+        assert torch.allclose(trajectory[:, :, ARM_DOF:], expected_hand)
+        assert action.get_held_object_state() is held_state
+
+
 # ---------------------------------------------------------------------------
 # PlaceAction
 # ---------------------------------------------------------------------------
diff --git a/tests/sim/atomic_actions/test_engine.py b/tests/sim/atomic_actions/test_engine.py
index 52dc034d..3e477d48 100644
--- a/tests/sim/atomic_actions/test_engine.py
+++ b/tests/sim/atomic_actions/test_engine.py
@@ -25,6 +25,7 @@
 from embodichain.lab.sim.atomic_actions.core import (
     ActionCfg,
     Affordance,
+    MoveObjectTarget,
     ObjectSemantics,
 )
 from embodichain.lab.sim.atomic_actions.engine import (
@@ -144,6 +145,11 @@ def test_object_semantics_passthrough(self):
         result = self.engine._resolve_target(sem)
         assert result is sem
 
+    def test_move_object_target_passthrough(self):
+        target = MoveObjectTarget(object_target_pose=torch.eye(4))
+        result = self.engine._resolve_target(target)
+        assert result is target
+
     def test_string_resolved_via_semantic_analyzer(self):
         result = self.engine._resolve_target("mug")
         assert isinstance(result, ObjectSemantics)
@@ -158,6 +164,16 @@ def test_dict_with_pose_raises_on_non_tensor(self):
         with pytest.raises(TypeError, match="must be a torch.Tensor"):
             self.engine._resolve_target({"pose": "not_a_tensor"})
 
+    def test_dict_with_object_target_pose(self):
+        pose = torch.eye(4)
+        result = self.engine._resolve_target({"object_target_pose": pose})
+        assert isinstance(result, MoveObjectTarget)
+        assert result.object_target_pose is pose
+
+    def test_dict_with_object_target_pose_raises_on_non_tensor(self):
+        with pytest.raises(TypeError, match="must be a torch.Tensor"):
+            self.engine._resolve_target({"object_target_pose": "not_a_tensor"})
+
     def test_dict_with_semantics_key(self):
         sem = ObjectSemantics(affordance=Affordance(), geometry={}, label="bottle")
         result = self.engine._resolve_target({"semantics": sem})
@@ -185,6 +201,84 @@ def test_unsupported_type_raises(self):
             self.engine._resolve_target(42)
 
 
+# ---------------------------------------------------------------------------
+# AtomicActionEngine held-object state contract
+# ---------------------------------------------------------------------------
+
+
+class TestHeldObjectStateContract:
+    """Tests for explicit held-object state updates in execute_static."""
+
+    def setup_method(self):
+        self.robot = Mock()
+        self.robot.device = torch.device("cpu")
+        self.robot.dof = 6
+        self.robot.get_qpos.return_value = torch.zeros(1, 6)
+        self.robot.get_joint_ids.return_value = list(range(6))
+
+        self.mg = Mock()
+        self.mg.robot = self.robot
+        self.mg.device = torch.device("cpu")
+
+        self.engine = AtomicActionEngine(self.mg, actions_cfg_list=[])
+
+    def _make_action(self, *, updates_held_object_state: bool, held_state):
+        action = Mock()
+        action.control_part = "arm"
+        action.updates_held_object_state = updates_held_object_state
+        action.execute.return_value = (
+            True,
+            torch.zeros(1, 2, 6),
+            list(range(6)),
+        )
+        action.get_held_object_state.return_value = held_state
+        return action
+
+    def test_execute_static_ignores_getter_when_action_does_not_update_state(self):
+        held_state = object()
+        action = self._make_action(
+            updates_held_object_state=False,
+            held_state=None,
+        )
+        self.engine._actions = {"noop": action}
+        self.engine._action_context["held_object_state"] = held_state
+
+        is_success, _ = self.engine.execute_static([torch.eye(4)])
+
+        assert is_success is True
+        action.get_held_object_state.assert_not_called()
+        assert self.engine._action_context["held_object_state"] is held_state
+
+    def test_execute_static_updates_state_when_action_declares_contract(self):
+        held_state = object()
+        action = self._make_action(
+            updates_held_object_state=True,
+            held_state=held_state,
+        )
+        self.engine._actions = {"producer": action}
+
+        is_success, _ = self.engine.execute_static([torch.eye(4)])
+
+        assert is_success is True
+        action.get_held_object_state.assert_called_once_with()
+        assert self.engine._action_context["held_object_state"] is held_state
+
+    def test_execute_static_clears_state_when_action_returns_none(self):
+        held_state = object()
+        action = self._make_action(
+            updates_held_object_state=True,
+            held_state=None,
+        )
+        self.engine._actions = {"release": action}
+        self.engine._action_context["held_object_state"] = held_state
+
+        is_success, _ = self.engine.execute_static([torch.eye(4)])
+
+        assert is_success is True
+        action.get_held_object_state.assert_called_once_with()
+        assert "held_object_state" not in self.engine._action_context
+
+
 if __name__ == "__main__":
     test = TestSemanticAnalyzer()
     test.setup_method()

From 7f808b6bb166df353a5aef7f1672613eb00bd4a6 Mon Sep 17 00:00:00 2001
From: skywhite1024 <129768272+skywhite1024@users.noreply.github.com>
Date: Fri, 19 Jun 2026 17:11:50 +0800
Subject: [PATCH 12/14] Address move object atomic action review feedback

---
 docs/source/overview/sim/atomic_actions.md    |   2 +-
 embodichain/lab/sim/atomic_actions/actions.py | 123 ++++++++++--------
 embodichain/lab/sim/atomic_actions/engine.py  |   1 +
 tests/sim/atomic_actions/test_engine.py       |  22 +++-
 4 files changed, 92 insertions(+), 56 deletions(-)

diff --git a/docs/source/overview/sim/atomic_actions.md b/docs/source/overview/sim/atomic_actions.md
index 24c3fdb4..878f549c 100644
--- a/docs/source/overview/sim/atomic_actions.md
+++ b/docs/source/overview/sim/atomic_actions.md
@@ -272,4 +272,4 @@ is_success, traj = engine.execute_static(target_list=[target_pose])
 - {doc}`planners/motion_generator` — the trajectory planner used by every action
 - {doc}`sim_robot` — how control parts and IK solvers are configured
 - Tutorial: `scripts/tutorials/sim/atomic_actions.py`
-- Move object demo: `scripts/tutorials/sim/move_object_atomic_actions.py`
+- Move object demo: `scripts/tutorials/atomic_action/move_object_atomic_actions.py`
diff --git a/embodichain/lab/sim/atomic_actions/actions.py b/embodichain/lab/sim/atomic_actions/actions.py
index ae9b2f54..d152a3ca 100644
--- a/embodichain/lab/sim/atomic_actions/actions.py
+++ b/embodichain/lab/sim/atomic_actions/actions.py
@@ -98,24 +98,6 @@ def __init__(
         self.arm_joint_ids = self.robot.get_joint_ids(name=self.cfg.control_part)
         self.dof = len(self.arm_joint_ids)
 
-    def _init_hand_close_state(self, cfg_name: str) -> None:
-        """Initialize shared closed-hand state for gripper actions."""
-        if self.cfg.hand_close_qpos is None:
-            logger.log_error(f"hand_close_qpos must be specified in {cfg_name}")
-        self.hand_close_qpos = self.cfg.hand_close_qpos.to(self.device)
-
-        self.hand_joint_ids = self.robot.get_joint_ids(name=self.cfg.hand_control_part)
-        self.joint_ids = self.arm_joint_ids + self.hand_joint_ids
-        self.arm_dof = len(self.arm_joint_ids)
-        self.dof = len(self.joint_ids)
-
-    def _init_grasp_hand_state(self, cfg_name: str) -> None:
-        """Initialize shared open/closed gripper state for grasp actions."""
-        if self.cfg.hand_open_qpos is None:
-            logger.log_error(f"hand_open_qpos must be specified in {cfg_name}")
-        self.hand_open_qpos = self.cfg.hand_open_qpos.to(self.device)
-        self._init_hand_close_state(cfg_name)
-
     def _resolve_pose_target(
         self,
         target: Union[ObjectSemantics, torch.Tensor],
@@ -287,26 +269,6 @@ def _interpolate_hand_qpos(
             return result.squeeze(0)
         return result
 
-    def _expand_hand_qpos(self, hand_qpos: torch.Tensor) -> torch.Tensor:
-        """Resolve hand qpos to batched shape ``(n_envs, hand_dof)``."""
-        hand_dof = len(self.hand_joint_ids)
-        hand_qpos = hand_qpos.to(device=self.device, dtype=torch.float32)
-        if hand_qpos.shape == (hand_dof,):
-            return hand_qpos.unsqueeze(0).repeat(self.n_envs, 1)
-        if hand_qpos.shape == (self.n_envs, hand_dof):
-            return hand_qpos
-        logger.log_error(
-            f"hand_qpos must have shape ({hand_dof},) or "
-            f"({self.n_envs}, {hand_dof}), but got {hand_qpos.shape}",
-            ValueError,
-        )
-
-    def _repeat_hand_qpos(
-        self, hand_qpos: torch.Tensor, n_waypoints: int
-    ) -> torch.Tensor:
-        """Repeat hand qpos across trajectory waypoints."""
-        return self._expand_hand_qpos(hand_qpos).unsqueeze(1).repeat(1, n_waypoints, 1)
-
     def execute(
         self,
         target: Union[ObjectSemantics, torch.Tensor],
@@ -351,6 +313,64 @@ def validate(self, target, start_qpos=None, **kwargs):
         return True
 
 
+class _HandCloseAction(MoveAction):
+    """Internal base for actions that keep the gripper closed."""
+
+    def __init__(
+        self,
+        motion_generator: MotionGenerator,
+        cfg: HandCloseActionCfg,
+        *,
+        cfg_name: str,
+    ):
+        super().__init__(motion_generator, cfg=cfg)
+        self._held_object_state: HeldObjectState | None = None
+        if self.cfg.hand_close_qpos is None:
+            logger.log_error(f"hand_close_qpos must be specified in {cfg_name}")
+        self.hand_close_qpos = self.cfg.hand_close_qpos.to(self.device)
+
+        self.hand_joint_ids = self.robot.get_joint_ids(name=self.cfg.hand_control_part)
+        self.joint_ids = self.arm_joint_ids + self.hand_joint_ids
+        self.arm_dof = len(self.arm_joint_ids)
+        self.dof = len(self.joint_ids)
+
+    def _expand_hand_qpos(self, hand_qpos: torch.Tensor) -> torch.Tensor:
+        """Resolve hand qpos to batched shape ``(n_envs, hand_dof)``."""
+        hand_dof = len(self.hand_joint_ids)
+        hand_qpos = hand_qpos.to(device=self.device, dtype=torch.float32)
+        if hand_qpos.shape == (hand_dof,):
+            return hand_qpos.unsqueeze(0).repeat(self.n_envs, 1)
+        if hand_qpos.shape == (self.n_envs, hand_dof):
+            return hand_qpos
+        logger.log_error(
+            f"hand_qpos must have shape ({hand_dof},) or "
+            f"({self.n_envs}, {hand_dof}), but got {hand_qpos.shape}",
+            ValueError,
+        )
+
+    def _repeat_hand_qpos(
+        self, hand_qpos: torch.Tensor, n_waypoints: int
+    ) -> torch.Tensor:
+        """Repeat hand qpos across trajectory waypoints."""
+        return self._expand_hand_qpos(hand_qpos).unsqueeze(1).repeat(1, n_waypoints, 1)
+
+
+class _GraspAction(_HandCloseAction):
+    """Internal base for actions that open and close the gripper."""
+
+    def __init__(
+        self,
+        motion_generator: MotionGenerator,
+        cfg: GraspActionCfg,
+        *,
+        cfg_name: str,
+    ):
+        super().__init__(motion_generator, cfg=cfg, cfg_name=cfg_name)
+        if self.cfg.hand_open_qpos is None:
+            logger.log_error(f"hand_open_qpos must be specified in {cfg_name}")
+        self.hand_open_qpos = self.cfg.hand_open_qpos.to(self.device)
+
+
 @configclass
 class PickUpActionCfg(GraspActionCfg):
     name: str = "pick_up"
@@ -365,7 +385,7 @@ class PickUpActionCfg(GraspActionCfg):
     in the object local frame. Default [0, 0, -1] means approaching from above."""
 
 
-class PickUpAction(MoveAction):
+class PickUpAction(_GraspAction):
     updates_held_object_state = True
 
     def __init__(
@@ -380,12 +400,11 @@ def __init__(
             cfg: Configuration for the action.
         """
         super().__init__(
-            motion_generator, cfg=cfg if cfg is not None else PickUpActionCfg()
+            motion_generator,
+            cfg=cfg if cfg is not None else PickUpActionCfg(),
+            cfg_name="PickUpActionCfg",
         )
-        self.cfg = cfg if cfg is not None else self.cfg
-        self._held_object_state: HeldObjectState | None = None
         self.approach_direction = self.cfg.approach_direction.to(self.device)
-        self._init_grasp_hand_state("PickUpActionCfg")
 
     def execute(
         self,
@@ -600,7 +619,7 @@ class MoveObjectActionCfg(HandCloseActionCfg):
     """Name of the action, used for identification and logging."""
 
 
-class MoveObjectAction(MoveAction):
+class MoveObjectAction(_HandCloseAction):
     updates_held_object_state = True
 
     def __init__(
@@ -615,11 +634,10 @@ def __init__(
             cfg: Configuration for the action.
         """
         super().__init__(
-            motion_generator, cfg=cfg if cfg is not None else MoveObjectActionCfg()
+            motion_generator,
+            cfg=cfg if cfg is not None else MoveObjectActionCfg(),
+            cfg_name="MoveObjectActionCfg",
         )
-        self.cfg = cfg if cfg is not None else self.cfg
-        self._held_object_state: HeldObjectState | None = None
-        self._init_hand_close_state("MoveObjectActionCfg")
 
     def _resolve_move_object_target(
         self,
@@ -733,7 +751,7 @@ class PlaceActionCfg(GraspActionCfg):
     """Name of the action, used for identification and logging."""
 
 
-class PlaceAction(MoveAction):
+class PlaceAction(_GraspAction):
     updates_held_object_state = True
 
     def __init__(
@@ -748,11 +766,10 @@ def __init__(
             cfg: Configuration for the action.
         """
         super().__init__(
-            motion_generator, cfg=cfg if cfg is not None else PlaceActionCfg()
+            motion_generator,
+            cfg=cfg if cfg is not None else PlaceActionCfg(),
+            cfg_name="PlaceActionCfg",
         )
-        self.cfg = cfg if cfg is not None else self.cfg
-        self._held_object_state: HeldObjectState | None = None
-        self._init_grasp_hand_state("PlaceActionCfg")
 
     def execute(
         self,
diff --git a/embodichain/lab/sim/atomic_actions/engine.py b/embodichain/lab/sim/atomic_actions/engine.py
index efe03f7a..591abe41 100644
--- a/embodichain/lab/sim/atomic_actions/engine.py
+++ b/embodichain/lab/sim/atomic_actions/engine.py
@@ -229,6 +229,7 @@ def execute_static(
             logger.log_error(
                 f"Length of target_list ({len(target_list)}) must match number of actions ({len(action_names)})."
             )
+        self._action_context.clear()
         start_qpos = self.motion_generator.robot.get_qpos()
         n_envs = start_qpos.shape[0]
         all_dof = self.motion_generator.robot.dof
diff --git a/tests/sim/atomic_actions/test_engine.py b/tests/sim/atomic_actions/test_engine.py
index 3e477d48..e390177a 100644
--- a/tests/sim/atomic_actions/test_engine.py
+++ b/tests/sim/atomic_actions/test_engine.py
@@ -234,7 +234,7 @@ def _make_action(self, *, updates_held_object_state: bool, held_state):
         action.get_held_object_state.return_value = held_state
         return action
 
-    def test_execute_static_ignores_getter_when_action_does_not_update_state(self):
+    def test_execute_static_clears_stale_context_before_running_actions(self):
         held_state = object()
         action = self._make_action(
             updates_held_object_state=False,
@@ -247,7 +247,7 @@ def test_execute_static_ignores_getter_when_action_does_not_update_state(self):
 
         assert is_success is True
         action.get_held_object_state.assert_not_called()
-        assert self.engine._action_context["held_object_state"] is held_state
+        assert "held_object_state" not in self.engine._action_context
 
     def test_execute_static_updates_state_when_action_declares_contract(self):
         held_state = object()
@@ -278,6 +278,24 @@ def test_execute_static_clears_state_when_action_returns_none(self):
         action.get_held_object_state.assert_called_once_with()
         assert "held_object_state" not in self.engine._action_context
 
+    def test_execute_static_keeps_state_within_single_action_sequence(self):
+        held_state = object()
+        producer = self._make_action(
+            updates_held_object_state=True,
+            held_state=held_state,
+        )
+        release = self._make_action(
+            updates_held_object_state=True,
+            held_state=None,
+        )
+        self.engine._actions = {"producer": producer, "release": release}
+
+        is_success, _ = self.engine.execute_static([torch.eye(4), torch.eye(4)])
+
+        assert is_success is True
+        assert release.execute.call_args.kwargs["held_object_state"] is held_state
+        assert "held_object_state" not in self.engine._action_context
+
 
 if __name__ == "__main__":
     test = TestSemanticAnalyzer()

From 6cc298a34463fceff6ee0f7cfd28463ca9ac606f Mon Sep 17 00:00:00 2001
From: skywhite1024 <129768272+skywhite1024@users.noreply.github.com>
Date: Fri, 19 Jun 2026 19:44:17 +0800
Subject: [PATCH 13/14] Minimize pickup changes for move object action

---
 embodichain/lab/sim/atomic_actions/actions.py | 72 ++++++++++---------
 1 file changed, 38 insertions(+), 34 deletions(-)

diff --git a/embodichain/lab/sim/atomic_actions/actions.py b/embodichain/lab/sim/atomic_actions/actions.py
index d152a3ca..43571e88 100644
--- a/embodichain/lab/sim/atomic_actions/actions.py
+++ b/embodichain/lab/sim/atomic_actions/actions.py
@@ -355,22 +355,6 @@ def _repeat_hand_qpos(
         return self._expand_hand_qpos(hand_qpos).unsqueeze(1).repeat(1, n_waypoints, 1)
 
 
-class _GraspAction(_HandCloseAction):
-    """Internal base for actions that open and close the gripper."""
-
-    def __init__(
-        self,
-        motion_generator: MotionGenerator,
-        cfg: GraspActionCfg,
-        *,
-        cfg_name: str,
-    ):
-        super().__init__(motion_generator, cfg=cfg, cfg_name=cfg_name)
-        if self.cfg.hand_open_qpos is None:
-            logger.log_error(f"hand_open_qpos must be specified in {cfg_name}")
-        self.hand_open_qpos = self.cfg.hand_open_qpos.to(self.device)
-
-
 @configclass
 class PickUpActionCfg(GraspActionCfg):
     name: str = "pick_up"
@@ -385,7 +369,7 @@ class PickUpActionCfg(GraspActionCfg):
     in the object local frame. Default [0, 0, -1] means approaching from above."""
 
 
-class PickUpAction(_GraspAction):
+class PickUpAction(MoveAction):
     updates_held_object_state = True
 
     def __init__(
@@ -400,11 +384,22 @@ def __init__(
             cfg: Configuration for the action.
         """
         super().__init__(
-            motion_generator,
-            cfg=cfg if cfg is not None else PickUpActionCfg(),
-            cfg_name="PickUpActionCfg",
+            motion_generator, cfg=cfg if cfg is not None else PickUpActionCfg()
         )
+        self.cfg = cfg if cfg is not None else self.cfg
         self.approach_direction = self.cfg.approach_direction.to(self.device)
+        if self.cfg.hand_open_qpos is None:
+            logger.log_error("hand_open_qpos must be specified in PickUpActionCfg")
+        if self.cfg.hand_close_qpos is None:
+            logger.log_error("hand_close_qpos must be specified in PickUpActionCfg")
+        self.hand_open_qpos = self.cfg.hand_open_qpos.to(self.device)
+        self.hand_close_qpos = self.cfg.hand_close_qpos.to(self.device)
+
+        self.hand_joint_ids = self.robot.get_joint_ids(name=self.cfg.hand_control_part)
+        self.joint_ids = self.arm_joint_ids + self.hand_joint_ids
+        self.arm_dof = len(self.arm_joint_ids)
+        self.dof = len(self.joint_ids)
+        self._held_object_state: HeldObjectState | None = None
 
     def execute(
         self,
@@ -435,12 +430,11 @@ def execute(
                 target, action_name=self.__class__.__name__
             )
 
-        if isinstance(is_success, torch.Tensor):
-            # Current engine contract is all-or-nothing; per-env masking is outside
-            # this action's return protocol.
-            is_success = torch.all(is_success).item()
+        # TODO: warning and fallback if no valid grasp pose found
         if not is_success:
-            logger.log_warning("Failed to resolve grasp pose for all environments.")
+            logger.log_warning(
+                "Failed to resolve grasp pose, using default approach pose"
+            )
             return False, torch.empty(0), self.joint_ids
 
         if target_semantics is not None:
@@ -751,7 +745,7 @@ class PlaceActionCfg(GraspActionCfg):
     """Name of the action, used for identification and logging."""
 
 
-class PlaceAction(_GraspAction):
+class PlaceAction(MoveAction):
     updates_held_object_state = True
 
     def __init__(
@@ -766,10 +760,20 @@ def __init__(
             cfg: Configuration for the action.
         """
         super().__init__(
-            motion_generator,
-            cfg=cfg if cfg is not None else PlaceActionCfg(),
-            cfg_name="PlaceActionCfg",
+            motion_generator, cfg=cfg if cfg is not None else PlaceActionCfg()
         )
+        self.cfg = cfg if cfg is not None else self.cfg
+        if self.cfg.hand_open_qpos is None:
+            logger.log_error("hand_open_qpos must be specified in PlaceActionCfg")
+        if self.cfg.hand_close_qpos is None:
+            logger.log_error("hand_close_qpos must be specified in PlaceActionCfg")
+        self.hand_open_qpos = self.cfg.hand_open_qpos.to(self.device)
+        self.hand_close_qpos = self.cfg.hand_close_qpos.to(self.device)
+
+        self.hand_joint_ids = self.robot.get_joint_ids(name=self.cfg.hand_control_part)
+        self.joint_ids = self.arm_joint_ids + self.hand_joint_ids
+        self.arm_dof = len(self.arm_joint_ids)
+        self.dof = len(self.joint_ids)
 
     def execute(
         self,
@@ -789,16 +793,16 @@ def execute(
             trajectory of shape (n_envs, n_waypoints, dof),
             joint_ids corresponding to trajectory
         """
-        self._held_object_state = None
-        start_qpos = self._resolve_start_qpos(start_qpos, self.arm_dof)
-
         is_success, place_xpos = self._resolve_pose_target(
             target, action_name=self.__class__.__name__
         )
+        start_qpos = self._resolve_start_qpos(start_qpos, self.arm_dof)
 
         # TODO: warning and fallback if no valid grasp pose found
         if not is_success:
-            logger.log_warning("Failed to resolve place target pose.")
+            logger.log_warning(
+                "Failed to resolve grasp pose, using default approach pose"
+            )
             return False, torch.empty(0), self.joint_ids
 
         # compute waypoint number for each phase
@@ -892,4 +896,4 @@ def validate(self, target, start_qpos=None, **kwargs):
 
     def get_held_object_state(self) -> HeldObjectState | None:
         """Return None after place releases the held object."""
-        return self._held_object_state
+        return None

From ef98a817d278bde7d0e11975e0b12a531826c415 Mon Sep 17 00:00:00 2001
From: skywhite1024 <129768272+skywhite1024@users.noreply.github.com>
Date: Fri, 19 Jun 2026 20:12:33 +0800
Subject: [PATCH 14/14] Fix batched pickup success handling

---
 embodichain/lab/sim/atomic_actions/actions.py |  10 +-
 tests/sim/atomic_actions/test_actions.py      | 127 +++++++++++++++++-
 2 files changed, 134 insertions(+), 3 deletions(-)

diff --git a/embodichain/lab/sim/atomic_actions/actions.py b/embodichain/lab/sim/atomic_actions/actions.py
index 43571e88..f5d03d78 100644
--- a/embodichain/lab/sim/atomic_actions/actions.py
+++ b/embodichain/lab/sim/atomic_actions/actions.py
@@ -98,6 +98,12 @@ def __init__(
         self.arm_joint_ids = self.robot.get_joint_ids(name=self.cfg.control_part)
         self.dof = len(self.arm_joint_ids)
 
+    def _all_envs_success(self, is_success: bool | torch.Tensor) -> bool:
+        """Return true only when all environments report success."""
+        if isinstance(is_success, torch.Tensor):
+            return bool(torch.all(is_success).item())
+        return bool(is_success)
+
     def _resolve_pose_target(
         self,
         target: Union[ObjectSemantics, torch.Tensor],
@@ -222,7 +228,7 @@ def _plan_arm_trajectory(
             is_success, qpos = self.robot.compute_ik(
                 pose=xpos_traj[:, j], name=self.cfg.control_part, joint_seed=qpos_seed
             )
-            if not is_success:
+            if not self._all_envs_success(is_success):
                 logger.log_warning(
                     f"Failed to compute IK for target state {j} in some environments. "
                     "The resulting trajectory may be invalid."
@@ -431,7 +437,7 @@ def execute(
             )
 
         # TODO: warning and fallback if no valid grasp pose found
-        if not is_success:
+        if not self._all_envs_success(is_success):
             logger.log_warning(
                 "Failed to resolve grasp pose, using default approach pose"
             )
diff --git a/tests/sim/atomic_actions/test_actions.py b/tests/sim/atomic_actions/test_actions.py
index af27bb96..7cc881ab 100644
--- a/tests/sim/atomic_actions/test_actions.py
+++ b/tests/sim/atomic_actions/test_actions.py
@@ -20,7 +20,7 @@
 
 import pytest
 import torch
-from unittest.mock import MagicMock, Mock
+from unittest.mock import Mock, patch
 
 from embodichain.lab.sim.atomic_actions.core import (
     ActionCfg,
@@ -199,6 +199,48 @@ def test_interpolate_hand_qpos_batched_shape(self):
         assert torch.allclose(result[:, 0], start)
         assert torch.allclose(result[:, -1], end)
 
+    def test_plan_arm_trajectory_accepts_all_env_success_tensor(self):
+        n_waypoints = 5
+        target_state = Mock()
+        target_state.xpos = torch.eye(4)
+        target_states_list = [[target_state] for _ in range(NUM_ENVS)]
+        start_qpos = torch.zeros(NUM_ENVS, ARM_DOF)
+        interp_traj = torch.zeros(NUM_ENVS, n_waypoints, ARM_DOF)
+
+        with patch(
+            "embodichain.lab.sim.atomic_actions.actions.interpolate_with_distance",
+            return_value=interp_traj,
+        ):
+            is_success, trajectory = self.action._plan_arm_trajectory(
+                target_states_list,
+                start_qpos,
+                n_waypoints,
+            )
+
+        assert is_success is True
+        assert trajectory is interp_traj
+
+    def test_plan_arm_trajectory_fails_on_partial_env_success_tensor(self):
+        target_state = Mock()
+        target_state.xpos = torch.eye(4)
+        target_states_list = [[target_state] for _ in range(NUM_ENVS)]
+        start_qpos = torch.zeros(NUM_ENVS, ARM_DOF)
+        self.robot.compute_ik = Mock(
+            return_value=(
+                torch.tensor([True, False], dtype=torch.bool),
+                torch.zeros(NUM_ENVS, ARM_DOF),
+            )
+        )
+
+        is_success, trajectory = self.action._plan_arm_trajectory(
+            target_states_list,
+            start_qpos,
+            n_waypoints=5,
+        )
+
+        assert is_success is False
+        assert trajectory.shape == (NUM_ENVS, 1, ARM_DOF)
+
 
 # ---------------------------------------------------------------------------
 # PickUpAction
@@ -235,6 +277,89 @@ def test_init_sets_hand_joint_ids(self):
         assert action.dof == TOTAL_DOF
 
 
+class TestPickUpActionExecute:
+    """Tests for PickUpAction execution with batched success flags."""
+
+    def setup_method(self):
+        self.robot = _make_mock_robot()
+        self.mg = _make_mock_motion_generator(self.robot)
+
+    def _make_cfg(self, **overrides):
+        defaults = dict(
+            hand_open_qpos=torch.tensor([0.0, 0.0]),
+            hand_close_qpos=torch.tensor([0.025, 0.025]),
+            control_part="arm",
+            hand_control_part="hand",
+            pre_grasp_distance=0.15,
+            lift_height=0.15,
+            approach_direction=torch.tensor([0.0, 0.0, -1.0]),
+            sample_interval=8,
+            hand_interp_steps=2,
+        )
+        defaults.update(overrides)
+        return PickUpActionCfg(**defaults)
+
+    def _make_semantics(self) -> ObjectSemantics:
+        entity = Mock()
+        entity.get_local_pose.return_value = torch.eye(4).unsqueeze(0).repeat(
+            NUM_ENVS, 1, 1
+        )
+        return ObjectSemantics(
+            affordance=Affordance(),
+            geometry={},
+            label="box",
+            entity=entity,
+        )
+
+    def test_execute_accepts_all_env_grasp_success_tensor(self):
+        cfg = self._make_cfg()
+        action = PickUpAction(self.mg, cfg=cfg)
+        semantics = self._make_semantics()
+        grasp_xpos = torch.eye(4).unsqueeze(0).repeat(NUM_ENVS, 1, 1)
+        action._resolve_grasp_pose = Mock(
+            return_value=(torch.tensor([True, True], dtype=torch.bool), grasp_xpos)
+        )
+
+        def plan_success(target_states_list, start_qpos, n_waypoints, arm_dof):
+            return True, torch.zeros(NUM_ENVS, n_waypoints, arm_dof)
+
+        action._plan_arm_trajectory = Mock(side_effect=plan_success)
+
+        is_success, trajectory, joint_ids = action.execute(
+            target=semantics,
+            start_qpos=torch.zeros(NUM_ENVS, ARM_DOF),
+        )
+
+        assert is_success is True
+        assert joint_ids == list(range(TOTAL_DOF))
+        assert trajectory.shape == (NUM_ENVS, cfg.sample_interval, TOTAL_DOF)
+        assert action._plan_arm_trajectory.call_count == 2
+        held_state = action.get_held_object_state()
+        assert held_state is not None
+        assert held_state.semantics is semantics
+
+    def test_execute_fails_on_partial_env_grasp_success_tensor(self):
+        action = PickUpAction(self.mg, cfg=self._make_cfg())
+        semantics = self._make_semantics()
+        grasp_xpos = torch.eye(4).unsqueeze(0).repeat(NUM_ENVS, 1, 1)
+        action._resolve_grasp_pose = Mock(
+            return_value=(torch.tensor([True, False], dtype=torch.bool), grasp_xpos)
+        )
+        action._plan_arm_trajectory = Mock()
+
+        is_success, trajectory, joint_ids = action.execute(
+            target=semantics,
+            start_qpos=torch.zeros(NUM_ENVS, ARM_DOF),
+        )
+
+        assert is_success is False
+        assert trajectory.numel() == 0
+        assert joint_ids == list(range(TOTAL_DOF))
+        action._plan_arm_trajectory.assert_not_called()
+        semantics.entity.get_local_pose.assert_not_called()
+        assert action.get_held_object_state() is None
+
+
 # ---------------------------------------------------------------------------
 # MoveObjectAction
 # ---------------------------------------------------------------------------