Source code for mani_skill.envs.tasks.tabletop.pull_cube

from typing import Any, Union

import numpy as np
import sapien
import torch
import torch.random
from transforms3d.euler import euler2quat

from mani_skill.agents.robots import Fetch, Panda
from mani_skill.envs.sapien_env import BaseEnv
from mani_skill.sensors.camera import CameraConfig
from mani_skill.utils.building import actors
from mani_skill.utils.registration import register_env
from mani_skill.utils.sapien_utils import look_at
from mani_skill.utils.scene_builder.table import TableSceneBuilder
from mani_skill.utils.structs.pose import Pose
from mani_skill.utils.structs.types import Array


@register_env("PullCube-v1", max_episode_steps=50)
[docs]class PullCubeEnv(BaseEnv): """ **Task Description:** A simple task where the objective is to pull a cube onto a target. **Randomizations:** - the cube's xy position is randomized on top of a table in the region [0.1, 0.1] x [-0.1, -0.1]. - the target goal region is marked by a red and white target. The position of the target is fixed to be the cube's xy position - [0.1 + goal_radius, 0] **Success Conditions:** - the cube's xy position is within goal_radius (default 0.1) of the target's xy position by euclidean distance. """
[docs] SUPPORTED_ROBOTS = ["panda", "fetch"]
[docs] agent: Union[Panda, Fetch]
[docs] goal_radius = 0.1
[docs] cube_half_size = 0.02
def __init__(self, *args, robot_uids="panda", robot_init_qpos_noise=0.02, **kwargs):
[docs] self.robot_init_qpos_noise = robot_init_qpos_noise
super().__init__(*args, robot_uids=robot_uids, **kwargs) @property
[docs] def _default_sensor_configs(self): pose = look_at(eye=[-0.5, 0.0, 0.25], target=[0.2, 0.0, -0.5]) return [CameraConfig("base_camera", pose, 128, 128, np.pi / 2, 0.01, 100)]
@property
[docs] def _default_human_render_camera_configs(self): pose = look_at([0.6, 0.7, 0.6], [0.0, 0.0, 0.35]) return CameraConfig("render_camera", pose, 512, 512, 1, 0.01, 100)
[docs] def _load_agent(self, options: dict): super()._load_agent(options, sapien.Pose(p=[-0.615, 0, 0]))
[docs] def _load_scene(self, options: dict): self.table_scene = TableSceneBuilder( env=self, robot_init_qpos_noise=self.robot_init_qpos_noise ) self.table_scene.build() # create cube self.obj = actors.build_cube( self.scene, half_size=self.cube_half_size, color=np.array([12, 42, 160, 255]) / 255, name="cube", body_type="dynamic", initial_pose=sapien.Pose(p=[0, 0, self.cube_half_size]), ) # create target self.goal_region = actors.build_red_white_target( self.scene, radius=self.goal_radius, thickness=1e-5, name="goal_region", add_collision=False, body_type="kinematic", )
[docs] def _initialize_episode(self, env_idx: torch.Tensor, options: dict): with torch.device(self.device): b = len(env_idx) self.table_scene.initialize(env_idx) xyz = torch.zeros((b, 3)) xyz[..., :2] = torch.rand((b, 2)) * 0.2 - 0.1 xyz[..., 2] = self.cube_half_size q = [1, 0, 0, 0] obj_pose = Pose.create_from_pq(p=xyz, q=q) self.obj.set_pose(obj_pose) target_region_xyz = xyz - torch.tensor([0.1 + self.goal_radius, 0, 0]) target_region_xyz[..., 2] = 1e-3 self.goal_region.set_pose( Pose.create_from_pq( p=target_region_xyz, q=euler2quat(0, np.pi / 2, 0), ) )
[docs] def evaluate(self): is_obj_placed = ( torch.linalg.norm( self.obj.pose.p[..., :2] - self.goal_region.pose.p[..., :2], axis=1 ) < self.goal_radius ) return { "success": is_obj_placed, }
[docs] def _get_obs_extra(self, info: dict): obs = dict( tcp_pose=self.agent.tcp.pose.raw_pose, goal_pos=self.goal_region.pose.p, ) if self.obs_mode_struct.use_state: obs.update( obj_pose=self.obj.pose.raw_pose, ) return obs
[docs] def compute_dense_reward(self, obs: Any, action: Array, info: dict): # grippers should close and pull from behind the cube, not grip it # distance to backside of cube (+ 2*0.005) sufficiently encourages this tcp_pull_pos = self.obj.pose.p + torch.tensor( [self.cube_half_size + 2 * 0.005, 0, 0], device=self.device ) tcp_to_pull_pose = tcp_pull_pos - self.agent.tcp.pose.p tcp_to_pull_pose_dist = torch.linalg.norm(tcp_to_pull_pose, axis=1) reaching_reward = 1 - torch.tanh(5 * tcp_to_pull_pose_dist) reward = reaching_reward reached = tcp_to_pull_pose_dist < 0.01 obj_to_goal_dist = torch.linalg.norm( self.obj.pose.p[..., :2] - self.goal_region.pose.p[..., :2], axis=1 ) place_reward = 1 - torch.tanh(5 * obj_to_goal_dist) reward += place_reward * reached reward[info["success"]] = 3 return reward
[docs] def compute_normalized_dense_reward(self, obs: Any, action: Array, info: dict): max_reward = 3.0 return self.compute_dense_reward(obs=obs, action=action, info=info) / max_reward