Source code for mani_skill.envs.tasks.tabletop.push_t

from typing import Any

import numpy as np
import sapien
import torch
import torch.random
from transforms3d.euler import euler2quat

from mani_skill.agents.robots import PandaStick
from mani_skill.envs.sapien_env import BaseEnv
from mani_skill.sensors.camera import CameraConfig
from mani_skill.utils import common, sapien_utils
from mani_skill.utils.building import actors
from mani_skill.utils.registration import register_env
from mani_skill.utils.scene_builder.table import TableSceneBuilder
from mani_skill.utils.structs import Pose
from mani_skill.utils.structs.types import Array, GPUMemoryConfig, SimConfig


# extending TableSceneBuilder and only making 2 changes:
# 1.Making table smooth and white, 2. adding support for keyframes of new robots - panda stick

[docs]
class WhiteTableSceneBuilder(TableSceneBuilder):

[docs]
    def initialize(self, env_idx: torch.Tensor):
        super().initialize(env_idx)
        b = len(env_idx)
        if self.env.robot_uids == "panda_stick":
            qpos = np.array(
                [
                    0.662,
                    0.212,
                    0.086,
                    -2.685,
                    -0.115,
                    2.898,
                    1.673,
                ]
            )
            qpos = (
                self.env._episode_rng.normal(
                    0, self.robot_init_qpos_noise, (b, len(qpos))
                )
                + qpos
            )
            self.env.agent.reset(qpos)
            self.env.agent.robot.set_pose(sapien.Pose([-0.615, 0, 0]))



[docs]
    def build(self):
        super().build()
        # cheap way to un-texture table
        for part in self.table._objs:
            for triangle in (
                part.find_component_by_type(sapien.render.RenderBodyComponent)
                .render_shapes[0]
                .parts
            ):
                triangle.material.set_base_color(np.array([255, 255, 255, 255]) / 255)
                triangle.material.set_base_color_texture(None)
                triangle.material.set_normal_texture(None)
                triangle.material.set_emission_texture(None)
                triangle.material.set_transmission_texture(None)
                triangle.material.set_metallic_texture(None)
                triangle.material.set_roughness_texture(None)




@register_env("PushT-v1", max_episode_steps=100)

[docs]
class PushTEnv(BaseEnv):
    """
    **Task Description:**
    A simulated version of the real-world push-T task from Diffusion Policy: https://diffusion-policy.cs.columbia.edu/

    In this task, the robot needs to:
    1. Precisely push the T-shaped block into the target region, and
    2. Move the end-effector to the end-zone which terminates the episode. [2 Not required for PushT-easy-v1]

    **Randomizations:**
    - 3D T block initial position on table  [-1,1] x [-1,2] + T Goal initial position
    - 3D T block initial z rotation         [0,2pi]

    **Success Conditions:**
    - The T block covers 90% of the 2D goal T's area
    """


[docs]
    _sample_video_link = "https://github.com/mani-skill/ManiSkill/raw/main/figures/environment_demos/PushT-v1_rt.mp4"


[docs]
    SUPPORTED_ROBOTS = ["panda_stick"]


[docs]
    agent: PandaStick


    # # # # # # # # All Unspecified real-life Parameters Here # # # # # # # #
    # Randomizations
    # 3D T center of mass spawnbox dimensions

[docs]
    tee_spawnbox_xlength = 0.2


[docs]
    tee_spawnbox_ylength = 0.3


    # translation of the spawnbox from goal tee as upper left of spawnbox

[docs]
    tee_spawnbox_xoffset = -0.1


[docs]
    tee_spawnbox_yoffset = -0.1

    #  end randomizations - rotation around z is simply uniform

    # Hand crafted params to match visual of real life setup
    # T Goal initial position on table

[docs]
    goal_offset = torch.tensor([-0.156, -0.1])


[docs]
    goal_z_rot = (5 / 3) * np.pi


    # end effector goal - NOTE that chaning this will not change the actual
    # ee starting position of the robot - need to change joint position resting
    # keyframe in table setup to change ee starting location, then copy that location here

[docs]
    ee_starting_pos2D = torch.tensor([-0.321, 0.284, 1e-3])

    # this will be used in the state observations

[docs]
    ee_starting_pos3D = torch.tensor([-0.321, 0.284, 0.024])


    # intersection threshold for success in T position

[docs]
    intersection_thresh = 0.90


    # T block design choices

[docs]
    T_mass = 0.8


[docs]
    T_dynamic_friction = 3


[docs]
    T_static_friction = 3


    def __init__(
        self, *args, robot_uids="panda_stick", robot_init_qpos_noise=0.02, **kwargs
    ):

[docs]
        self.robot_init_qpos_noise = robot_init_qpos_noise

        super().__init__(*args, robot_uids=robot_uids, **kwargs)

    @property

[docs]
    def _default_sim_config(self):
        return SimConfig(
            gpu_memory_config=GPUMemoryConfig(
                found_lost_pairs_capacity=2**25, max_rigid_patch_count=2**18
            )
        )


    @property

[docs]
    def _default_sensor_configs(self):
        pose = sapien_utils.look_at(eye=[0.3, 0, 0.6], target=[-0.1, 0, 0.1])
        return [
            CameraConfig(
                "base_camera",
                pose=pose,
                width=128,
                height=128,
                fov=np.pi / 2,
                near=0.01,
                far=100,
            )
        ]


    @property

[docs]
    def _default_human_render_camera_configs(self):
        pose = sapien_utils.look_at(eye=[0.3, 0, 0.6], target=[-0.1, 0, 0.1])
        return CameraConfig(
            "render_camera", pose=pose, width=512, height=512, fov=1, near=0.01, far=100
        )



[docs]
    def _load_agent(self, options: dict):
        super()._load_agent(options, sapien.Pose(p=[-0.615, 0, 0]))



[docs]
    def _load_scene(self, options: dict):
        # have to put these parmaeters to device - defined before we had access to device
        # load scene is a convienent place for this one time operation
        self.ee_starting_pos2D = self.ee_starting_pos2D.to(self.device)
        self.ee_starting_pos3D = self.ee_starting_pos3D.to(self.device)

        # we use a prebuilt scene builder class that automatically loads in a floor and table.
        self.table_scene = WhiteTableSceneBuilder(
            env=self, robot_init_qpos_noise=self.robot_init_qpos_noise
        )
        self.table_scene.build()

        # returns 3d cad of create_tee - center of mass at (0,0,0)
        # cad Tee is upside down (both 3D tee and target)
        TARGET_RED = (
            np.array([194, 19, 22, 255]) / 255
        )  # same as mani_skill.utils.building.actors.common - goal target

        def create_tee(name="tee", target=False, base_color=TARGET_RED):
            # dimensions of boxes that make tee
            # box2 is same as box1, except (3/4) the lenght, and rotated 90 degrees
            # these dimensions are an exact replica of the 3D tee model given by diffusion policy: https://cad.onshape.com/documents/f1140134e38f6ed6902648d5/w/a78cf81827600e4ff4058d03/e/f35f57fb7589f72e05c76caf
            box1_half_w = 0.2 / 2
            box1_half_h = 0.05 / 2
            half_thickness = 0.04 / 2 if not target else 1e-4

            # we have to center tee at its com so rotations are applied to com
            # vertical block is (3/4) size of horizontal block, so
            # center of mass is (1*com_horiz + (3/4)*com_vert) / (1+(3/4))
            # # center of mass is (1*(0,0)) + (3/4)*(0,(.025+.15)/2)) / (1+(3/4)) = (0,0.0375)
            com_y = 0.0375

            builder = self.scene.create_actor_builder()
            first_block_pose = sapien.Pose([0.0, 0.0 - com_y, 0.0])
            first_block_size = [box1_half_w, box1_half_h, half_thickness]
            if not target:
                builder._mass = self.T_mass
                tee_material = sapien.pysapien.physx.PhysxMaterial(
                    static_friction=self.T_dynamic_friction,
                    dynamic_friction=self.T_static_friction,
                    restitution=0,
                )
                builder.add_box_collision(
                    pose=first_block_pose,
                    half_size=first_block_size,
                    material=tee_material,
                )
                # builder.add_box_collision(pose=first_block_pose, half_size=first_block_size)
            builder.add_box_visual(
                pose=first_block_pose,
                half_size=first_block_size,
                material=sapien.render.RenderMaterial(
                    base_color=base_color,
                ),
            )

            # for the second block (vertical part), we translate y by 4*(box1_half_h)-com_y to align flush with horizontal block
            # note that the cad model tee made here is upside down
            second_block_pose = sapien.Pose([0.0, 4 * (box1_half_h) - com_y, 0.0])
            second_block_size = [box1_half_h, (3 / 4) * (box1_half_w), half_thickness]
            if not target:
                builder.add_box_collision(
                    pose=second_block_pose,
                    half_size=second_block_size,
                    material=tee_material,
                )
                # builder.add_box_collision(pose=second_block_pose, half_size=second_block_size)
            builder.add_box_visual(
                pose=second_block_pose,
                half_size=second_block_size,
                material=sapien.render.RenderMaterial(
                    base_color=base_color,
                ),
            )
            builder.initial_pose = sapien.Pose(p=[0, 0, 0.1])
            if not target:
                return builder.build(name=name)
            else:
                return builder.build_kinematic(name=name)

        self.tee = create_tee(name="Tee", target=False)
        self.goal_tee = create_tee(
            name="goal_Tee",
            target=True,
            base_color=np.array([128, 128, 128, 255]) / 255,
        )

        # adding end-effector end-episode goal position
        builder = self.scene.create_actor_builder()
        builder.add_cylinder_visual(
            radius=0.02,
            half_length=1e-4,
            material=sapien.render.RenderMaterial(
                base_color=np.array([128, 128, 128, 255]) / 255
            ),
        )
        builder.initial_pose = sapien.Pose(p=[0, 0, 0.1])
        self.ee_goal_pos = builder.build_kinematic(name="goal_ee")

        # Rest of function is setting up for Custom 2D "Pseudo-Rendering" function below
        res = 64
        uv_half_width = 0.15
        self.uv_half_width = uv_half_width
        self.res = res
        oned_grid = torch.arange(res, dtype=torch.float32).view(1, res).repeat(
            res, 1
        ) - (res / 2)
        self.uv_grid = (
            torch.cat([oned_grid.unsqueeze(0), (-1 * oned_grid.T).unsqueeze(0)], dim=0)
            + 0.5
        ) / ((res / 2) / uv_half_width)
        self.uv_grid = self.uv_grid.to(self.device)
        self.homo_uv = torch.cat(
            [self.uv_grid, torch.ones_like(self.uv_grid[0]).unsqueeze(0)], dim=0
        )

        # tee render
        # tee is made of two different boxes, and then translated by center of mass
        self.center_of_mass = (
            0,
            0.0375,
        )  # in frame of upside tee with center of horizontal box (add cetner of mass to get to real tee frame)
        box1 = torch.tensor(
            [[-0.1, 0.025], [0.1, 0.025], [-0.1, -0.025], [0.1, -0.025]]
        )
        box2 = torch.tensor(
            [[-0.025, 0.175], [0.025, 0.175], [-0.025, 0.025], [0.025, 0.025]]
        )
        box1[:, 1] -= self.center_of_mass[1]
        box2[:, 1] -= self.center_of_mass[1]

        # convert tee boxes to indices
        box1 *= (res / 2) / uv_half_width
        box1 += res / 2

        box2 *= (res / 2) / uv_half_width
        box2 += res / 2

        box1 = box1.long()
        box2 = box2.long()

        self.tee_render = torch.zeros(res, res)
        # image map has flipped x and y, set values in transpose to undo
        self.tee_render.T[box1[0, 0] : box1[1, 0], box1[2, 1] : box1[0, 1]] = 1
        self.tee_render.T[box2[0, 0] : box2[1, 0], box2[2, 1] : box2[0, 1]] = 1
        # image map y is flipped of xy plane, flip to unflip
        self.tee_render = self.tee_render.flip(0).to(self.device)

        goal_fake_quat = torch.tensor(
            [(torch.tensor([self.goal_z_rot]) / 2).cos(), 0, 0, 0.0]
        ).unsqueeze(0)
        zrot = self.quat_to_zrot(goal_fake_quat).squeeze(
            0
        )  # 3x3 rot matrix for goal to world transform
        goal_trans = torch.eye(3)
        goal_trans[:2, :2] = zrot[:2, :2]
        goal_trans[0:2, 2] = self.goal_offset
        self.world_to_goal_trans = torch.linalg.inv(goal_trans).to(
            self.device
        )  # this is just a 3x3 matrix (2d homogenious transform)



[docs]
    def quat_to_z_euler(self, quats):
        assert len(quats.shape) == 2 and quats.shape[-1] == 4
        # z rotation == can be defined by just qw = cos(alpha/2), so alpha = 2*cos^{-1}(qw)
        # for fixing quaternion double covering
        # for some reason, torch.sign() had bugs???
        signs = torch.ones_like(quats[:, -1])
        signs[quats[:, -1] < 0] = -1.0
        qw = quats[:, 0] * signs
        z_euler = 2 * qw.acos()
        return z_euler



[docs]
    def quat_to_zrot(self, quats):
        # expecting batch of quaternions (b,4)
        assert len(quats.shape) == 2 and quats.shape[-1] == 4
        # output is batch of rotation matrices (b,3,3)
        alphas = self.quat_to_z_euler(quats)
        # constructing rot matrix with rotation around z
        rot_mats = torch.zeros(quats.shape[0], 3, 3).to(quats.device)
        rot_mats[:, 2, 2] = 1
        rot_mats[:, 0, 0] = alphas.cos()
        rot_mats[:, 1, 1] = alphas.cos()
        rot_mats[:, 0, 1] = -alphas.sin()
        rot_mats[:, 1, 0] = alphas.sin()
        return rot_mats



[docs]
    def pseudo_render_intersection(self):
        """'pseudo render' algo for calculating the intersection
        made custom 'psuedo renderer' to compute intersection area
        all computation in parallel on cuda, zero explicit loops
        views blocks in 2d in the goal tee frame to see overlap"""
        # we are given T_{a->w} where a == actor frame and w == world frame
        # we are given T_{g->w} where g == goal frame and w == world frame
        # applying T_{a->w} and then T_{w->g}, we get the actor's orientation in the goal tee's frame
        # T_{w->g} is T_{g->w}^{-1}, we already have the goal's orientation, and it doesn't change
        tee_to_world_trans = self.quat_to_zrot(
            self.tee.pose.q
        )  # should be (b,3,3) rot matrices
        tee_to_world_trans[:, 0:2, 2] = self.tee.pose.p[
            :, :2
        ]  # should be (b,3,3) rigid trans matrices

        # these matrices convert egocentric 3d tee to 2d goal tee frame
        tee_to_goal_trans = (
            self.world_to_goal_trans @ tee_to_world_trans
        )  # should be (b,3,3) rigid trans matrices

        # making homogenious coords of uv map to apply transformations to view tee in goal tee frame
        b = tee_to_world_trans.shape[0]
        res = self.uv_grid.shape[1]
        homo_uv = self.homo_uv

        # finally, get uv coordinates of tee in goal tee frame
        tees_in_goal_frame = (tee_to_goal_trans @ homo_uv.view(3, -1)).view(
            b, 3, res, res
        )
        # convert from homogenious coords to normal coords
        tees_in_goal_frame = tees_in_goal_frame[:, 0:2, :, :] / tees_in_goal_frame[
            :, -1, :, :
        ].unsqueeze(
            1
        )  #  now (b,2,res,res)

        # we now have a collection of coordinates xy that are the coordinates of the tees in the goal frame
        # we just extract the indices in the uv map where the egocentic T is, to get the transformed T coords
        # this works because while we transformed the coordinates of the uv map -
        # the indices where the egocentric T is is still the indices of the T in the uv map (indices of uv map never chnaged, just values)
        tee_coords = tees_in_goal_frame[:, :, self.tee_render == 1].view(
            b, 2, -1
        )  #  (b,2,num_points_in_tee)

        # convert tee_coords to indices - this is basically a batch of indices - same shape as tee_coords
        # this is the inverse function of creating the uv map from image indices used in load_scene
        tee_indices = (
            (tee_coords * ((res / 2) / self.uv_half_width) + (res / 2))
            .long()
            .view(b, 2, -1)
        )  #  (b,2,num_points_in_tee)

        # setting all of our work in image format to compare with egocentric image of goal T
        final_renders = torch.zeros(b, res, res).to(self.device)
        # for batch indexing
        num_tee_pixels = tee_indices.shape[-1]
        batch_indices = (
            torch.arange(b).view(-1, 1).repeat(1, num_tee_pixels).to(self.device)
        )

        # # ensure no out of bounds indexing - it's fine to not fully 'render' tee, just need to fully see goal tee which is insured
        # # because we are in the goal tee frame, and 'cad' tee render setup of egocentric view includes full tee
        # # also, the reward isn't miou, it's intersection area / goal area - don't need union -> don't need full T 'render'
        # #ugly solution for now to keep parallelism no loop - set out of bound image t indices to [0,0]
        # # anywhere where x or y is out of bounds, make indices (0,0)
        invalid_xs = (tee_indices[:, 0, :] < 0) | (tee_indices[:, 0, :] >= self.res)
        invalid_ys = (tee_indices[:, 1, :] < 0) | (tee_indices[:, 1, :] >= self.res)
        tee_indices[:, 0, :][invalid_xs] = 0
        tee_indices[:, 1, :][invalid_xs] = 0
        tee_indices[:, 0, :][invalid_ys] = 0
        tee_indices[:, 1, :][invalid_ys] = 0

        final_renders[batch_indices, tee_indices[:, 0, :], tee_indices[:, 1, :]] = 1
        # coord to image fix - need to transpose each image in the batch, then reverse y coords to correctly visualize
        final_renders = final_renders.permute(0, 2, 1).flip(1)

        # finally, we can calculate intersection/goal_area for reward
        intersection = (
            (final_renders.bool() & self.tee_render.bool()).sum(dim=[-1, -2]).float()
        )
        goal_area = self.tee_render.bool().sum().float()

        reward = intersection / goal_area

        # del tee_to_world_trans; del tee_to_goal_trans; del tees_in_goal_frame; del tee_coords; del tee_indices
        # del final_renders; del invalid_xs; del invalid_ys; batch_indices; del intersection; del goal_area
        # torch.cuda.empty_cache()
        return reward



[docs]
    def _initialize_episode(self, env_idx: torch.Tensor, options: dict):
        with torch.device(self.device):
            b = len(env_idx)
            self.table_scene.initialize(env_idx)

            # setting the goal tee position, which is fixed, offset from center, and slightly rotated
            target_region_xyz = torch.zeros((b, 3))
            target_region_xyz[:, 0] += self.goal_offset[0]
            target_region_xyz[:, 1] += self.goal_offset[1]
            # set a little bit above 0 so the target is sitting on the table
            target_region_xyz[..., 2] = 1e-3
            self.goal_tee.set_pose(
                Pose.create_from_pq(
                    p=target_region_xyz,
                    q=euler2quat(0, 0, self.goal_z_rot),
                )
            )

            # randomization code that randomizes the x, y position of the tee we
            # goal tee is alredy at y = -0.1 relative to robot, so we allow the tee to be only -0.2 y relative to robot arm
            target_region_xyz[..., 0] += (
                torch.rand(b) * (self.tee_spawnbox_xlength) + self.tee_spawnbox_xoffset
            )
            target_region_xyz[..., 1] += (
                torch.rand(b) * (self.tee_spawnbox_ylength) + self.tee_spawnbox_yoffset
            )

            target_region_xyz[..., 2] = (
                0.04 / 2 + 1e-3
            )  # this is the half thickness of the tee plus a little
            # rotation for pose is just random rotation around z axis
            # z axis rotation euler to quaternion = [cos(theta/2),0,0,sin(theta/2)]
            q_euler_angle = torch.rand(b) * (2 * torch.pi)
            q = torch.zeros((b, 4))
            q[:, 0] = (q_euler_angle / 2).cos()
            q[:, -1] = (q_euler_angle / 2).sin()

            obj_pose = Pose.create_from_pq(p=target_region_xyz, q=q)
            self.tee.set_pose(obj_pose)

            # ee starting/ending position marked on table like irl task
            xyz = torch.zeros((b, 3))
            xyz[:] = self.ee_starting_pos2D
            self.ee_goal_pos.set_pose(
                Pose.create_from_pq(
                    p=xyz,
                    q=euler2quat(0, np.pi / 2, 0),
                )
            )



[docs]
    def evaluate(self):
        # success is where the overlap is over intersection thresh and ee dist to start pos is less than it's own thresh
        inter_area = self.pseudo_render_intersection()
        tee_place_success = (inter_area) >= self.intersection_thresh

        success = tee_place_success

        return {"success": success}



[docs]
    def _get_obs_extra(self, info: dict):
        # ee position is super useful for pandastick robot
        obs = dict(
            tcp_pose=self.agent.tcp.pose.raw_pose,
        )
        if self.obs_mode_struct.use_state:
            # state based gets info on goal position and t full pose - necessary to learn task
            obs.update(
                goal_pos=self.goal_tee.pose.p,
                obj_pose=self.tee.pose.raw_pose,
            )
        return obs



[docs]
    def compute_dense_reward(self, obs: Any, action: Array, info: dict):
        # reward for overlap of the tees

        # legacy reward
        # reward = self.pseudo_render_reward()
        # Pose based reward below is preferred over legacy reward
        # legacy reward gets stuck in local maxs of 50-75% intersection
        # and then fails to promote large explorations to perfectly orient the T, for PPO algorithm

        # new pose based reward: cos(z_rot_euler) + function of translation, between target and goal both in [0,1]
        # z euler cosine similarity reward: -- quat_to_z_euler guarenteed to reutrn value from [0,2pi]
        tee_z_eulers = self.quat_to_z_euler(self.tee.pose.q)
        # subtract the goal z rotatation to get relative rotation
        rot_rew = (tee_z_eulers - self.goal_z_rot).cos()
        # cos output [-1,1], we want reward of 0.5
        reward = (((rot_rew + 1) / 2) ** 2) / 2

        # x and y distance as reward
        tee_to_goal_pose = self.tee.pose.p[:, 0:2] - self.goal_tee.pose.p[:, 0:2]
        tee_to_goal_pose_dist = torch.linalg.norm(tee_to_goal_pose, axis=1)
        reward += ((1 - torch.tanh(5 * tee_to_goal_pose_dist)) ** 2) / 2

        # giving the robot a little help by rewarding it for having its end-effector close to the tee center of mass
        tcp_to_push_pose = self.tee.pose.p - self.agent.tcp.pose.p
        tcp_to_push_pose_dist = torch.linalg.norm(tcp_to_push_pose, axis=1)
        reward += ((1 - torch.tanh(5 * tcp_to_push_pose_dist)).sqrt()) / 20

        # assign rewards to parallel environments that achieved success to the maximum of 3.
        reward[info["success"]] = 3
        return reward



[docs]
    def compute_normalized_dense_reward(self, obs: Any, action: Array, info: dict):
        max_reward = 3.0
        return self.compute_dense_reward(obs=obs, action=action, info=info) / max_reward